DPsim
CudaUtility.h
1 /* Copyright 2017-2021 Institute for Automation of Complex Power Systems,
2  * EONERC, RWTH Aachen University
3  *
4  * This Source Code Form is subject to the terms of the Mozilla Public
5  * License, v. 2.0. If a copy of the MPL was not distributed with this
6  * file, You can obtain one at https://mozilla.org/MPL/2.0/.
7  *********************************************************************************/
8 
9 #include <iostream>
10 #include <utility>
11 
12 #include <cuda_runtime.h>
13 #include <cusparse_v2.h>
14 
15 //Error Handling
16 #define CUDA_ENABLE_ERROR_CHECK
17 
18 #ifdef CUDA_ENABLE_ERROR_CHECK
19 #define CUDA_CHECK_ERROR(CUDA_CALL) \
20  { \
21  cudaError_t code = (cudaError_t)CUDA_CALL; \
22  if (code != cudaSuccess) { \
23  printf("CUDA Error: %s\n", cudaGetErrorString(code)); \
24  } \
25  }
26 #else
27 #define CUDA_CHECK_ERROR(CUDA_CALL) CUDA_CALL
28 #endif
29 
39 namespace DPsim {
40 namespace cuda {
41 
42 template <typename T>
43 void copybackAndPrint(std::ostream &stream, const char *msg, T *ptr, int n) {
44  std::vector<T> buffer(n);
45  CUDA_CHECK_ERROR(
46  cudaMemcpy(buffer.data(), ptr, n * sizeof(T), cudaMemcpyDeviceToHost));
47  stream << msg;
48  for (T d : buffer) {
49  std::cout << d << ' ';
50  }
51  stream << std::endl;
52 }
53 
54 template <typename T> class Vector {
55 public:
56  Vector(size_t preAlloc) : buf(nullptr), n(preAlloc) {
57  if (n > 0) {
58  CUDA_CHECK_ERROR(cudaMalloc(&buf, preAlloc * sizeof(T)));
59  }
60  }
61 
62  Vector(Vector &) = delete;
63  Vector(Vector &&other) {
64  std::swap(buf, other.buf);
65  std::swap(n, other.n);
66  }
67 
68  ~Vector() {
69  if (n > 0) {
70  CUDA_CHECK_ERROR(cudaFree(buf));
71  }
72  }
73 
74  Vector &operator=(const Vector &) = delete;
75  Vector &operator=(Vector &&other) {
76  std::swap(buf, other.buf);
77  std::swap(n, other.n);
78  return *this;
79  }
80 
81  T operator[](size_t pos) {
82  T element;
83  CUDA_CHECK_ERROR(
84  cudaMemcpy(&element, &buf[pos], sizeof(T), cudaMemcpyDeviceToHost));
85  return element;
86  }
87 
88  T *data() const { return buf; }
89 
90  class iterator : public std::iterator<std::random_access_iterator_tag, T> {
91  public:
92  iterator(T *buf, long num) : buf(buf), pos(num) {}
93  iterator &operator++() {
94  pos++;
95  return *this;
96  }
97  iterator operator++(int) {
98  iterator retval = *this;
99  ++(*this);
100  return retval;
101  }
102  iterator &operator--() {
103  pos--;
104  return *this;
105  }
106  iterator operator--(int) {
107  iterator retval = *this;
108  --(*this);
109  return retval;
110  }
111  bool operator==(iterator other) const {
112  return buf == other.buf && pos == other.pos;
113  }
114  bool operator!=(iterator other) const { return !(*this == other); }
115  T operator*() const {
116  T element;
117  CUDA_CHECK_ERROR(
118  cudaMemcpy(&element, &buf[pos], sizeof(T), cudaMemcpyDeviceToHost));
119  return element;
120  }
121 
122  private:
123  size_t pos;
124  T *buf;
125  };
126 
127  iterator begin() { return iterator(buf, 0); }
128  iterator end() { return iterator(buf, n - 1); }
129 
130 private:
131  T *buf;
132  size_t n;
133 };
134 
135 // Matrix (CSR)
136 template <typename ValueType, typename IndexType> struct CudaMatrix {
137 
138  CudaMatrix(const Eigen::SparseMatrix<ValueType, Eigen::RowMajor> &mat,
139  int dim)
140  : dim(dim), non_zero(mat.nonZeros()), row(dim + 1), col(mat.nonZeros()),
141  val(mat.nonZeros()) {
142 
143  // Copy Matrix (Host -> Device)
144  CUDA_CHECK_ERROR(cudaMemcpy(row.data(), mat.outerIndexPtr(),
145  (dim + 1) * sizeof(int),
146  cudaMemcpyHostToDevice));
147  CUDA_CHECK_ERROR(cudaMemcpy(col.data(), mat.innerIndexPtr(),
148  non_zero * sizeof(int),
149  cudaMemcpyHostToDevice));
150  CUDA_CHECK_ERROR(cudaMemcpy(val.data(), mat.valuePtr(),
151  non_zero * sizeof(double),
152  cudaMemcpyHostToDevice));
153  }
154 
155  //Matrix Data
156  const int dim;
157  const int non_zero;
161 
162  friend std::ostream &
163  operator<<(std::ostream &os,
165  //Copy back
166  std::vector<double> bufferVal(mat.non_zero);
167  std::vector<int> bufferCol(mat.non_zero);
168  std::vector<int> bufferRow(mat.dim);
169  CUDA_CHECK_ERROR(cudaMemcpy(bufferVal.data(), mat.val.data(),
170  mat.non_zero * sizeof(double),
171  cudaMemcpyDeviceToHost));
172  CUDA_CHECK_ERROR(cudaMemcpy(bufferCol.data(), mat.col.data(),
173  mat.non_zero * sizeof(int),
174  cudaMemcpyDeviceToHost));
175  CUDA_CHECK_ERROR(cudaMemcpy(bufferRow.data(), mat.row.data(),
176  mat.dim * sizeof(int), cudaMemcpyDeviceToHost));
177 
178  //Print Sparse Structures (Eigen-Format)
179  os << "Nonzero entries:\n";
180  for (int i = 0; i < mat.non_zero; i++) {
181  os << '(' << bufferVal[i] << ',' << bufferCol[i] << ") ";
182  }
183  os << "\n\n";
184  os << "Outer pointers:\n";
185  for (auto i : bufferRow) {
186  os << i << ' ';
187  }
188  os << " $\n";
189  // TODO Print whole Matrix
190  // for(int i = 0; i < mat.dim; i++) {
191  // for(int j = 0; j < mat.dim; j++) {
192 
193  // }
194  // }
195  return os;
196  }
197 };
198 } // namespace cuda
199 } // namespace DPsim