DPsim
GpuDenseAdapter.cpp
1 /* Copyright 2017-2021 Institute for Automation of Complex Power Systems,
2  * EONERC, RWTH Aachen University
3  *
4  * This Source Code Form is subject to the terms of the Mozilla Public
5  * License, v. 2.0. If a copy of the MPL was not distributed with this
6  * file, You can obtain one at https://mozilla.org/MPL/2.0/.
7  *********************************************************************************/
8 
9 #include <dpsim/GpuDenseAdapter.h>
10 
11 using namespace DPsim;
12 
13 namespace DPsim {
15  //Handle & Stream
16  if (mCusolverHandle)
17  cusolverDnDestroy(mCusolverHandle);
18  if (mStream)
19  cudaStreamDestroy(mStream);
20 
21  //Memory allocated on device
22  cudaFree(mDeviceCopy.matrix);
23  cudaFree(mDeviceCopy.vector);
24  cudaFree(mDeviceCopy.workSpace);
25  cudaFree(mDeviceCopy.pivSeq);
26  cudaFree(mDeviceCopy.errInfo);
27 
28  cudaDeviceReset();
29 }
30 
32  mDeviceCopy = {};
33 
34  cusolverStatus_t status = CUSOLVER_STATUS_SUCCESS;
35  cudaError_t error = cudaSuccess;
36  if ((status = cusolverDnCreate(&mCusolverHandle)) != CUSOLVER_STATUS_SUCCESS)
37  std::cerr << "cusolverDnCreate() failed (initializing cusolver-library)"
38  << std::endl;
39  if ((error = cudaStreamCreateWithFlags(&mStream, cudaStreamNonBlocking)) !=
40  cudaSuccess)
41  std::cerr << cudaGetErrorString(error) << std::endl;
42  if ((status = cusolverDnSetStream(mCusolverHandle, mStream)) !=
43  CUSOLVER_STATUS_SUCCESS)
44  std::cerr << "cusolverDnSetStream() failed" << std::endl;
45 }
46 
47 void GpuDenseAdapter::allocateDeviceMemory() {
48  //Allocate memory for...
49  //Vector
50  CUDA_ERROR_HANDLER(
51  cudaMalloc((void **)&mDeviceCopy.vector, mDeviceCopy.size * sizeof(Real)))
52  //Matrix
53  CUDA_ERROR_HANDLER(
54  cudaMalloc((void **)&mDeviceCopy.matrix,
55  mDeviceCopy.size * mDeviceCopy.size * sizeof(Real)))
56  //Pivoting-Sequence
57  CUDA_ERROR_HANDLER(
58  cudaMalloc((void **)&mDeviceCopy.pivSeq, mDeviceCopy.size * sizeof(Real)))
59  //Errorcode
60  CUDA_ERROR_HANDLER(cudaMalloc((void **)&mDeviceCopy.errInfo, sizeof(int)))
61 
62  //Workspace
63  int workSpaceSize = 0;
64  cusolverStatus_t status = CUSOLVER_STATUS_SUCCESS;
65  if ((status = cusolverDnDgetrf_bufferSize(
66  mCusolverHandle, mDeviceCopy.size, mDeviceCopy.size,
67  mDeviceCopy.matrix, mDeviceCopy.size, &workSpaceSize)) !=
68  CUSOLVER_STATUS_SUCCESS)
69  std::cerr << "cusolverDnDgetrf_bufferSize() failed (calculating required "
70  "space for LU-factorization)"
71  << std::endl;
72  CUDA_ERROR_HANDLER(cudaMalloc((void **)&mDeviceCopy.workSpace, workSpaceSize))
73 }
74 
75 void GpuDenseAdapter::copySystemMatrixToDevice(Matrix systemMatrix) {
76  auto *data = systemMatrix.data();
77  CUDA_ERROR_HANDLER(
78  cudaMemcpy(mDeviceCopy.matrix, data,
79  mDeviceCopy.size * mDeviceCopy.size * sizeof(Real),
80  cudaMemcpyHostToDevice))
81 }
82 
83 void GpuDenseAdapter::LUfactorization() {
84  //Variables for error-handling
85  cusolverStatus_t status = CUSOLVER_STATUS_SUCCESS;
86  int info;
87 
88  //LU-factorization
89  status = cusolverDnDgetrf(mCusolverHandle, mDeviceCopy.size, mDeviceCopy.size,
90  mDeviceCopy.matrix, mDeviceCopy.size,
91  mDeviceCopy.workSpace, mDeviceCopy.pivSeq,
92  mDeviceCopy.errInfo);
93 
94  CUDA_ERROR_HANDLER(cudaDeviceSynchronize())
95 
96  if (status != CUSOLVER_STATUS_SUCCESS) {
97  std::cerr << "cusolverDnDgetrf() failed (calculating LU-factorization)"
98  << std::endl;
99  }
100  CUDA_ERROR_HANDLER(cudaMemcpy(&info, mDeviceCopy.errInfo, sizeof(int),
101  cudaMemcpyDeviceToHost))
102  if (0 > info) {
103  std::cerr << -info << "-th parameter is wrong" << std::endl;
104  }
105 }
106 
108  SparseMatrix &systemMatrix,
109  std::vector<std::pair<UInt, UInt>> &listVariableSystemMatrixEntries) {
110  mDeviceCopy.size = systemMatrix.rows();
111 
112  //Allocate Memory on Device
113  allocateDeviceMemory();
114  //Copy Systemmatrix to device
115  copySystemMatrixToDevice(Matrix(systemMatrix));
116 
117  // Debug logging, whether LU-factorization and copying was successfull
118  /*DPsim::Matrix mat;
119  mat.resize(mDeviceCopy.size, mDeviceCopy.size);
120  double *buffer = &mat(0);
121  CUDA_ERROR_HANDLER(cudaMemcpy(buffer, mDeviceCopy.matrix, mDeviceCopy.size * mDeviceCopy.size * sizeof(Real), cudaMemcpyDeviceToHost))
122  this->SPDLOG_LOGGER_INFO(mSLog, "Systemmatrix Gpu: \n{}", mat);*/
123 }
124 
125 void GpuDenseAdapter::factorize(SparseMatrix &systemMatrix) {
126  //Copy Systemmatrix to device
127  copySystemMatrixToDevice(Matrix(systemMatrix));
128  //LU factorization
129  LUfactorization();
130  /*CUDA_ERROR_HANDLER(cudaMemcpy(buffer, mDeviceCopy.matrix, mDeviceCopy.size * mDeviceCopy.size * sizeof(Real), cudaMemcpyDeviceToHost))
131  this->SPDLOG_LOGGER_INFO(mSLog, "LU decomposition Gpu: \n{}", mat);*/
132 }
133 
134 void GpuDenseAdapter::refactorize(SparseMatrix &systemMatrix) {
135  //Copy Systemmatrix to device
136  copySystemMatrixToDevice(Matrix(systemMatrix));
137  //LU factorization
138  LUfactorization();
139  /*CUDA_ERROR_HANDLER(cudaMemcpy(buffer, mDeviceCopy.matrix, mDeviceCopy.size * mDeviceCopy.size * sizeof(Real), cudaMemcpyDeviceToHost))
140  this->SPDLOG_LOGGER_INFO(mSLog, "LU decomposition Gpu: \n{}", mat);*/
141 }
142 
144  SparseMatrix &systemMatrix,
145  std::vector<std::pair<UInt, UInt>> &listVariableSystemMatrixEntries) {
146  //Copy Systemmatrix to device
147  copySystemMatrixToDevice(Matrix(systemMatrix));
148  //LU factorization
149  LUfactorization();
150  /*CUDA_ERROR_HANDLER(cudaMemcpy(buffer, mDeviceCopy.matrix, mDeviceCopy.size * mDeviceCopy.size * sizeof(Real), cudaMemcpyDeviceToHost))
151  this->SPDLOG_LOGGER_INFO(mSLog, "LU decomposition Gpu: \n{}", mat);*/
152 }
153 
154 Matrix GpuDenseAdapter::solve(Matrix &mRightHandSideVector) {
155  Matrix leftSideVector = mRightHandSideVector;
156 
157  CUDA_ERROR_HANDLER(cudaMemcpy(mDeviceCopy.vector, &mRightHandSideVector(0),
158  mDeviceCopy.size * sizeof(Real),
159  cudaMemcpyHostToDevice))
160 
161  cusolverStatus_t status = cusolverDnDgetrs(
162  mCusolverHandle, CUBLAS_OP_N, mDeviceCopy.size, 1, /* nrhs */
163  mDeviceCopy.matrix, mDeviceCopy.size, mDeviceCopy.pivSeq,
164  mDeviceCopy.vector, mDeviceCopy.size, mDeviceCopy.errInfo);
165 
166  CUDA_ERROR_HANDLER(cudaDeviceSynchronize())
167 
168  if (status != CUSOLVER_STATUS_SUCCESS)
169  std::cerr << "cusolverDnDgetrs() failed (Solving A*x = b)" << std::endl;
170  int info;
171  CUDA_ERROR_HANDLER(cudaMemcpy(&info, mDeviceCopy.errInfo, sizeof(int),
172  cudaMemcpyDeviceToHost))
173  if (0 > info) {
174  std::cerr << -info << "-th parameter is wrong" << std::endl;
175  }
176 
177  CUDA_ERROR_HANDLER(cudaMemcpy(&(leftSideVector)(0), mDeviceCopy.vector,
178  mDeviceCopy.size * sizeof(Real),
179  cudaMemcpyDeviceToHost))
180  //return LUFactorizedSparse.solve(mRightHandSideVector);
181 
182  return leftSideVector;
183 }
184 } // namespace DPsim
GpuDenseAdapter()
Constructor.
cusolverDnHandle_t mCusolverHandle
Solver-Handle.
virtual Matrix solve(Matrix &rightSideVector) override
solution function for a right hand side
cudaStream_t mStream
Stream.
virtual void factorize(SparseMatrix &systemMatrix) override
factorization function with partial pivoting
virtual void partialRefactorize(SparseMatrix &systemMatrix, std::vector< std::pair< UInt, UInt >> &listVariableSystemMatrixEntries) override
partial refactorization withouth partial pivoting
virtual void preprocessing(SparseMatrix &systemMatrix, std::vector< std::pair< UInt, UInt >> &listVariableSystemMatrixEntries) override
preprocessing function pre-ordering and scaling the matrix
virtual ~GpuDenseAdapter()
Destructor.
virtual void refactorize(SparseMatrix &systemMatrix) override
refactorization without partial pivoting
double * matrix
Device copy of System-Matrix.
double * vector
Device copy of Vector.
int * pivSeq
Pivoting-Sequence.
UInt size
Size of one dimension.
double * workSpace
Device-Workspace for getrf.