12 #include <cuda_runtime.h>
13 #include <cusparse_v2.h>
16 #define CUDA_ENABLE_ERROR_CHECK
18 #ifdef CUDA_ENABLE_ERROR_CHECK
19 #define CUDA_CHECK_ERROR(CUDA_CALL) \
21 cudaError_t code = (cudaError_t)CUDA_CALL; \
22 if (code != cudaSuccess) { \
23 printf("CUDA Error: %s\n", cudaGetErrorString(code)); \
27 #define CUDA_CHECK_ERROR(CUDA_CALL) CUDA_CALL
43 void copybackAndPrint(std::ostream &stream,
const char *msg, T *ptr,
int n) {
44 std::vector<T> buffer(n);
46 cudaMemcpy(buffer.data(), ptr, n *
sizeof(T), cudaMemcpyDeviceToHost));
49 std::cout << d <<
' ';
56 Vector(
size_t preAlloc) : buf(
nullptr), n(preAlloc) {
58 CUDA_CHECK_ERROR(cudaMalloc(&buf, preAlloc *
sizeof(T)));
64 std::swap(buf, other.buf);
65 std::swap(n, other.n);
70 CUDA_CHECK_ERROR(cudaFree(buf));
76 std::swap(buf, other.buf);
77 std::swap(n, other.n);
81 T operator[](
size_t pos) {
84 cudaMemcpy(&element, &buf[pos],
sizeof(T), cudaMemcpyDeviceToHost));
88 T *data()
const {
return buf; }
90 class iterator :
public std::iterator<std::random_access_iterator_tag, T> {
92 iterator(T *buf,
long num) : buf(buf), pos(num) {}
111 bool operator==(
iterator other)
const {
112 return buf == other.buf && pos == other.pos;
114 bool operator!=(
iterator other)
const {
return !(*
this == other); }
115 T operator*()
const {
118 cudaMemcpy(&element, &buf[pos],
sizeof(T), cudaMemcpyDeviceToHost));
128 iterator end() {
return iterator(buf, n - 1); }
136 template <
typename ValueType,
typename IndexType>
struct CudaMatrix {
138 CudaMatrix(
const Eigen::SparseMatrix<ValueType, Eigen::RowMajor> &mat,
140 : dim(dim), non_zero(mat.nonZeros()), row(dim + 1), col(mat.nonZeros()),
141 val(mat.nonZeros()) {
144 CUDA_CHECK_ERROR(cudaMemcpy(row.data(), mat.outerIndexPtr(),
145 (dim + 1) *
sizeof(
int),
146 cudaMemcpyHostToDevice));
147 CUDA_CHECK_ERROR(cudaMemcpy(col.data(), mat.innerIndexPtr(),
148 non_zero *
sizeof(
int),
149 cudaMemcpyHostToDevice));
150 CUDA_CHECK_ERROR(cudaMemcpy(val.data(), mat.valuePtr(),
151 non_zero *
sizeof(
double),
152 cudaMemcpyHostToDevice));
162 friend std::ostream &
163 operator<<(std::ostream &os,
166 std::vector<double> bufferVal(mat.non_zero);
167 std::vector<int> bufferCol(mat.non_zero);
168 std::vector<int> bufferRow(mat.dim);
169 CUDA_CHECK_ERROR(cudaMemcpy(bufferVal.data(), mat.val.data(),
170 mat.non_zero *
sizeof(
double),
171 cudaMemcpyDeviceToHost));
172 CUDA_CHECK_ERROR(cudaMemcpy(bufferCol.data(), mat.col.data(),
173 mat.non_zero *
sizeof(
int),
174 cudaMemcpyDeviceToHost));
175 CUDA_CHECK_ERROR(cudaMemcpy(bufferRow.data(), mat.row.data(),
176 mat.dim *
sizeof(
int), cudaMemcpyDeviceToHost));
179 os <<
"Nonzero entries:\n";
180 for (
int i = 0; i < mat.non_zero; i++) {
181 os <<
'(' << bufferVal[i] <<
',' << bufferCol[i] <<
") ";
184 os <<
"Outer pointers:\n";
185 for (
auto i : bufferRow) {