DPsim
GpuSparseAdapter.cpp
1 /* Copyright 2017-2021 Institute for Automation of Complex Power Systems,
2  * EONERC, RWTH Aachen University
3  *
4  * This Source Code Form is subject to the terms of the Mozilla Public
5  * License, v. 2.0. If a copy of the MPL was not distributed with this
6  * file, You can obtain one at https://mozilla.org/MPL/2.0/.
7  *********************************************************************************/
8 
9 #include <dpsim/GpuSparseAdapter.h>
10 
11 using namespace DPsim;
12 
13 #pragma GCC diagnostic push
14 #pragma GCC diagnostic ignored "-Wdeprecated-declarations"
15 
16 namespace DPsim {
18 
20  if (mCusparsehandle != nullptr) {
21  cusparseDestroyMatDescr(descr_L);
22  cusparseDestroyMatDescr(descr_U);
23  cusparseDestroyCsrsv2Info(info_L);
24  cusparseDestroyCsrsv2Info(info_U);
25  cusparseDestroy(mCusparsehandle);
26  }
27  if (mCusolverhandle != nullptr) {
28  cusolverSpDestroy(mCusolverhandle);
29  }
30 }
31 
32 inline void GpuSparseAdapter::checkCusparseStatus(cusparseStatus_t status,
33  std::string additionalInfo) {
34  if (status != CUSPARSE_STATUS_SUCCESS) {
35  //SPDLOG_LOGGER_ERROR(mSLog, "{} {}", additionalInfo, cusparseGetErrorString(status));
36  std::cout << "Status not success: " << status << std::endl;
37  throw SolverException();
38  }
39 }
40 
41 void GpuSparseAdapter::performFactorization(SparseMatrix &systemMatrix) {
42  cusparseStatus_t csp_status;
43  cusolverStatus_t cso_status;
44  csp_status = cusparseCreate(&mCusparsehandle);
45  checkCusparseStatus(csp_status, "cuSparse initialization failed:");
46 
47  if ((cso_status = cusolverSpCreate(&mCusolverhandle)) !=
48  CUSOLVER_STATUS_SUCCESS) {
49  //SPDLOG_LOGGER_ERROR(mSLog, "cuSolver initialization failed: Error code {}", cso_status);
50  std::cout << "cso_status not success" << std::endl;
51  throw SolverException();
52  }
53 
54  size_t N = systemMatrix.rows();
55  auto hMat = systemMatrix;
56 
60 
61  cusparseMatDescr_t descr_M = 0;
62  csrilu02Info_t info_M = 0;
63  int structural_zero;
64  int numerical_zero;
65 
66  size_t nnz = hMat.nonZeros();
67 
68  // step 1: create a descriptor which contains
69  // - matrix M is base-1
70  // - matrix L is base-1
71  // - matrix L is lower triangular
72  // - matrix L has unit diagonal
73  // - matrix U is base-1
74  // - matrix U is upper triangular
75  // - matrix U has non-unit diagonal
76  checkCusparseStatus(cusparseCreateMatDescr(&descr_M));
77  checkCusparseStatus(
78  cusparseSetMatIndexBase(descr_M, CUSPARSE_INDEX_BASE_ZERO));
79  checkCusparseStatus(
80  cusparseSetMatType(descr_M, CUSPARSE_MATRIX_TYPE_GENERAL));
81 
82  checkCusparseStatus(cusparseCreateMatDescr(&descr_L));
83  checkCusparseStatus(
84  cusparseSetMatIndexBase(descr_L, CUSPARSE_INDEX_BASE_ZERO));
85  checkCusparseStatus(
86  cusparseSetMatType(descr_L, CUSPARSE_MATRIX_TYPE_GENERAL));
87  checkCusparseStatus(
88  cusparseSetMatFillMode(descr_L, CUSPARSE_FILL_MODE_LOWER));
89  checkCusparseStatus(cusparseSetMatDiagType(descr_L, CUSPARSE_DIAG_TYPE_UNIT));
90 
91  checkCusparseStatus(cusparseCreateMatDescr(&descr_U));
92  checkCusparseStatus(
93  cusparseSetMatIndexBase(descr_U, CUSPARSE_INDEX_BASE_ZERO));
94  checkCusparseStatus(
95  cusparseSetMatType(descr_U, CUSPARSE_MATRIX_TYPE_GENERAL));
96  checkCusparseStatus(
97  cusparseSetMatFillMode(descr_U, CUSPARSE_FILL_MODE_UPPER));
98  checkCusparseStatus(
99  cusparseSetMatDiagType(descr_U, CUSPARSE_DIAG_TYPE_NON_UNIT));
100 
101  // step 2: create a empty info structure
102  // we need one info for csrilu02 and two info's for csrsv2
103  checkCusparseStatus(cusparseCreateCsrilu02Info(&info_M));
104  checkCusparseStatus(cusparseCreateCsrsv2Info(&info_L));
105  checkCusparseStatus(cusparseCreateCsrsv2Info(&info_U));
106 
107  // step 2a: permutate Matrix M' = P*M
108  int p_nnz = 0;
109  int p[N];
110 
111  cso_status = cusolverSpDcsrzfdHost(mCusolverhandle, N, nnz, descr_M,
112  hMat.valuePtr(), hMat.outerIndexPtr(),
113  hMat.innerIndexPtr(), p, &p_nnz);
114 
115  if (cso_status != CUSOLVER_STATUS_SUCCESS) {
116  //SPDLOG_LOGGER_ERROR(mSLog, "cusolverSpDcsrzfdHost returend an error");
117  }
118  // create Eigen::PermutationMatrix from the p
119  mTransp = std::unique_ptr<Eigen::PermutationMatrix<Eigen::Dynamic>>(
120  new Eigen::PermutationMatrix<Eigen::Dynamic>(
121  Eigen::Map<Eigen::Matrix<int, Eigen::Dynamic, 1>>(p, N, 1)));
122 
123  // apply permutation
124  hMat = *mTransp * hMat;
125 
126  // copy P' to GPU
127  mSysMat = std::unique_ptr<cuda::CudaMatrix<double, int>>(
128  new cuda::CudaMatrix<double, int>(hMat, N));
129 
130  double *d_csrVal = mSysMat->val.data();
131  int *d_csrRowPtr = mSysMat->row.data();
132  int *d_csrColInd = mSysMat->col.data();
133 
134  // step 3: query how much memory used in csrilu02 and csrsv2, and allocate the buffer
135  int pBufferSize_M, pBufferSize_L, pBufferSize_U;
136  int pBufferSize;
137  csp_status = cusparseDcsrilu02_bufferSize(mCusparsehandle, N, nnz, descr_M,
138  d_csrVal, d_csrRowPtr, d_csrColInd,
139  info_M, &pBufferSize_M);
140  checkCusparseStatus(csp_status, "failed to get cusparse bufferSize:");
141 
142  csp_status = cusparseDcsrsv2_bufferSize(
143  mCusparsehandle, CUSPARSE_OPERATION_NON_TRANSPOSE, N, nnz, descr_L,
144  d_csrVal, d_csrRowPtr, d_csrColInd, info_L, &pBufferSize_L);
145  checkCusparseStatus(csp_status, "failed to get cusparse bufferSize:");
146 
147  csp_status = cusparseDcsrsv2_bufferSize(
148  mCusparsehandle, CUSPARSE_OPERATION_NON_TRANSPOSE, N, nnz, descr_U,
149  d_csrVal, d_csrRowPtr, d_csrColInd, info_U, &pBufferSize_U);
150  checkCusparseStatus(csp_status, "failed to get cusparse bufferSize:");
151 
152  // Buffer
153  pBufferSize = std::max({pBufferSize_M, pBufferSize_L, pBufferSize_U});
154  pBuffer = cuda::Vector<char>(pBufferSize);
155  // step 4: perform analysis of incomplete Cholesky on M
156  // perform analysis of triangular solve on L
157  // perform analysis of triangular solve on U
158  // The lower(upper) triangular part of M has the same sparsity pattern as L(U),
159  // we can do analysis of csrilu0 and csrsv2 simultaneously.
160  csp_status = cusparseDcsrilu02_analysis(
161  mCusparsehandle, N, nnz, descr_M, d_csrVal, d_csrRowPtr, d_csrColInd,
162  info_M, CUSPARSE_SOLVE_POLICY_NO_LEVEL, pBuffer.data());
163  checkCusparseStatus(csp_status, "failed to analyse cusparse problem:");
164 
165  csp_status =
166  cusparseXcsrilu02_zeroPivot(mCusparsehandle, info_M, &structural_zero);
167  if (csp_status == CUSPARSE_STATUS_ZERO_PIVOT) {
168  //SPDLOG_LOGGER_ERROR(mSLog, "A({},{}) is missing", structural_zero, structural_zero);
169  checkCusparseStatus(csp_status);
170  std::cout << "csp_status zero pivot" << std::endl;
171  throw SolverException();
172  }
173 
174  csp_status = cusparseDcsrsv2_analysis(
175  mCusparsehandle, CUSPARSE_OPERATION_NON_TRANSPOSE, N, nnz, descr_L,
176  d_csrVal, d_csrRowPtr, d_csrColInd, info_L,
177  CUSPARSE_SOLVE_POLICY_NO_LEVEL, pBuffer.data());
178  checkCusparseStatus(csp_status, "failed to analyse cusparse problem:");
179 
180  csp_status = cusparseDcsrsv2_analysis(
181  mCusparsehandle, CUSPARSE_OPERATION_NON_TRANSPOSE, N, nnz, descr_U,
182  d_csrVal, d_csrRowPtr, d_csrColInd, info_U,
183  CUSPARSE_SOLVE_POLICY_USE_LEVEL, pBuffer.data());
184  checkCusparseStatus(csp_status, "failed to analyse cusparse problem:");
185 
186  // step 5: M = L * U
187 
188  csp_status = cusparseDcsrilu02(
189  mCusparsehandle, N, nnz, descr_M, d_csrVal, d_csrRowPtr, d_csrColInd,
190  info_M, CUSPARSE_SOLVE_POLICY_NO_LEVEL, pBuffer.data());
191  checkCusparseStatus(csp_status, "failed to perform cusparse ILU:");
192 
193  csp_status =
194  cusparseXcsrilu02_zeroPivot(mCusparsehandle, info_M, &numerical_zero);
195  if (csp_status == CUSPARSE_STATUS_ZERO_PIVOT) {
196  //SPDLOG_LOGGER_ERROR(mSLog, "U({},{}) is zero\n", numerical_zero, numerical_zero);
197  checkCusparseStatus(csp_status);
198  std::cout << "csp_status zero pivot" << std::endl;
199  throw SolverException();
200  }
201 
202  //std::cout << *mSysMat.get() << std::endl;
203 
204  csp_status = cusparseDestroyCsrilu02Info(info_M);
205  checkCusparseStatus(csp_status, "failed to destroy info:");
206  csp_status = cusparseDestroyMatDescr(descr_M);
207  checkCusparseStatus(csp_status, "failed to destroy MatDescr:");
208 }
209 
211  SparseMatrix &systemMatrix,
212  std::vector<std::pair<UInt, UInt>> &listVariableSystemMatrixEntries) {
213  /* No preprocessing phase available yet */
214 }
215 
216 void GpuSparseAdapter::factorize(SparseMatrix &systemMatrix) {
217  performFactorization(systemMatrix);
218 }
219 
220 void GpuSparseAdapter::refactorize(SparseMatrix &systemMatrix) {
221  performFactorization(systemMatrix);
222 }
223 
225  SparseMatrix &systemMatrix,
226  std::vector<std::pair<UInt, UInt>> &listVariableSystemMatrixEntries) {
227  performFactorization(systemMatrix);
228 }
229 
230 Matrix GpuSparseAdapter::solve(Matrix &mRightHandSideVector) {
231  Matrix leftSideVector = mRightHandSideVector;
232  cudaError_t status;
233  cusparseStatus_t csp_status;
234  int size = mRightHandSideVector.rows();
235 
236  //Copy right vector to device
237  //Permutate right side: R' = P * R
238  mRightHandSideVector = *mTransp * mRightHandSideVector;
239  status = cudaMemcpy(mGpuRhsVec.data(), &mRightHandSideVector(0),
240  size * sizeof(Real), cudaMemcpyHostToDevice);
241  if (status != cudaSuccess) {
242  //SPDLOG_LOGGER_ERROR(mSLog, "Cuda Error: {}", cudaGetErrorString(status));
243  std::cout << "status not cudasuccess" << std::endl;
244  throw SolverException();
245  }
246 
247  const double alpha = 1.;
248  // Solve
249  // step 6: solve L*z = x
250  csp_status = cusparseDcsrsv2_solve(
251  mCusparsehandle, CUSPARSE_OPERATION_NON_TRANSPOSE, size,
252  mSysMat->non_zero, &alpha, descr_L, mSysMat->val.data(),
253  mSysMat->row.data(), mSysMat->col.data(), info_L, mGpuRhsVec.data(),
254  mGpuIntermediateVec.data(), CUSPARSE_SOLVE_POLICY_NO_LEVEL,
255  pBuffer.data());
256  checkCusparseStatus(csp_status, "failed to solve L*z=x:");
257 
258  // step 7: solve U*y = z
259  csp_status = cusparseDcsrsv2_solve(
260  mCusparsehandle, CUSPARSE_OPERATION_NON_TRANSPOSE, size,
261  mSysMat->non_zero, &alpha, descr_U, mSysMat->val.data(),
262  mSysMat->row.data(), mSysMat->col.data(), info_U,
263  mGpuIntermediateVec.data(), mGpuLhsVec.data(),
264  CUSPARSE_SOLVE_POLICY_USE_LEVEL, pBuffer.data());
265  checkCusparseStatus(csp_status, "failed to solve U*y=z:");
266 
267  //Copy Solution back
268  status = cudaMemcpy(&(leftSideVector)(0), mGpuLhsVec.data(),
269  size * sizeof(Real), cudaMemcpyDeviceToHost);
270  if (status != cudaSuccess) {
271  //SPDLOG_LOGGER_ERROR(mSLog, "Cuda Error: {}", cudaGetErrorString(status));
272  std::cout << "status not cudasuccess" << std::endl;
273  throw SolverException();
274  }
275  return leftSideVector;
276 }
277 } // namespace DPsim
virtual ~GpuSparseAdapter()
Destructor.
virtual void preprocessing(SparseMatrix &systemMatrix, std::vector< std::pair< UInt, UInt >> &listVariableSystemMatrixEntries) override
preprocessing function pre-ordering and scaling the matrix
virtual void refactorize(SparseMatrix &systemMatrix) override
refactorization without partial pivoting
cuda::Vector< double > mGpuLhsVec
LHS-Vector.
cuda::Vector< double > mGpuRhsVec
RHS-Vector.
virtual Matrix solve(Matrix &rightSideVector) override
solution function for a right hand side
virtual void partialRefactorize(SparseMatrix &systemMatrix, std::vector< std::pair< UInt, UInt >> &listVariableSystemMatrixEntries) override
partial refactorization withouth partial pivoting
std::unique_ptr< cuda::CudaMatrix< double, int > > mSysMat
Systemmatrix on Device.
virtual void factorize(SparseMatrix &systemMatrix) override
factorization function with partial pivoting
cuda::Vector< double > mGpuIntermediateVec
Intermediate Vector.
cusparseHandle_t mCusparsehandle
Solver-Handle.