diff --git a/include/layers/dense.h b/include/layers/dense.h index 7040f42..774a120 100644 --- a/include/layers/dense.h +++ b/include/layers/dense.h @@ -15,7 +15,8 @@ namespace Layers { ~Dense(); void forward(const float* input, float* output); - void to_cuda(); + virtual void setWeights(const std::vector>& weights) = 0; + virtual void setBiases(const std::vector& biases) = 0; private: int inputSize; @@ -31,6 +32,7 @@ namespace Layers { void initializeWeights(); void initializeBiases(); + void toCuda(); }; } // namespace Layers diff --git a/include/layers/ilayer.h b/include/layers/ilayer.h index 95e3964..b75fe0d 100644 --- a/include/layers/ilayer.h +++ b/include/layers/ilayer.h @@ -11,7 +11,6 @@ namespace Layers { virtual ~ILayer() {} virtual void forward(const float* input, float* output) = 0; - virtual void to_cuda() = 0; }; } // namespace Layers diff --git a/include/utils/cuda_helper.h b/include/utils/cuda_helper.h index e24853d..4f8656d 100644 --- a/include/utils/cuda_helper.h +++ b/include/utils/cuda_helper.h @@ -2,6 +2,7 @@ #define CUDA_HELPER_H #include +#include #define IDX2C(i,j,ld) (((j)*(ld))+(i)) @@ -17,7 +18,15 @@ do { \ } \ } while (0) -// Initialize CUDA and return the device properties -cudaDeviceProp initializeCUDA(); +// cuBLAS error checking macro +#define CUBLAS_CHECK(call) \ +do { \ + cublasStatus_t result = call; \ + if (result != CUBLAS_STATUS_SUCCESS) { \ + fprintf(stderr, "cuBLAS error at %s:%d code=%d\n", \ + __FILE__, __LINE__, static_cast(result)); \ + exit(EXIT_FAILURE); \ + } \ +} while (0) #endif // CUDA_HELPER_H diff --git a/src/layers/dense.cpp b/src/layers/dense.cpp index 9780052..f0ab341 100644 --- a/src/layers/dense.cpp +++ b/src/layers/dense.cpp @@ -21,7 +21,7 @@ Layers::Dense::Dense(int inputSize, int outputSize, cublasHandle_t cublasHandle) CUDA_CHECK(cudaMalloc((void**)&d_weights, sizeof(float) * inputSize * outputSize)); CUDA_CHECK(cudaMalloc((void**)&d_biases, sizeof(float) * biases.size())); - to_cuda(); + toCuda(); } Layers::Dense::~Dense() { @@ -54,7 +54,17 @@ void Layers::Dense::forward(const float* input, float* output) { cublasSaxpy(cublasHandle, outputSize, &alpha, d_biases, 1, output, 1); } -void Layers::Dense::to_cuda() { - CUDA_CHECK(cudaMemcpy(d_weights, weights.data(), sizeof(float) * inputSize * outputSize, cudaMemcpyHostToDevice)); - CUDA_CHECK(cudaMemcpy(d_biases, biases.data(), sizeof(float) * biases.size(), cudaMemcpyHostToDevice)); +void Layers::Dense::toCuda() { + CUBLAS_CHECK(cublasSetMatrix(outputSize, inputSize, sizeof(float), weights.data(), inputSize, d_weights, outputSize)); + CUBLAS_CHECK(cublasSetVector(biases.size(), sizeof(float), biases.data(), 1, d_biases, 1)); +} + +void Layers::Dense::setWeights(const std::vector>& weights) { + this->weights = weights; + toCuda(); +} + +void Layers::Dense::setBiases(const std::vector& biases) { + this->biases = biases; + toCuda(); } \ No newline at end of file diff --git a/src/utils/cuda_helper.cpp b/src/utils/cuda_helper.cpp index 307036d..396fe50 100644 --- a/src/utils/cuda_helper.cpp +++ b/src/utils/cuda_helper.cpp @@ -1,9 +1,9 @@ #include #include #include "cuda_helper.h" +#include -// Initialize CUDA and return the device properties -cudaDeviceProp initializeCUDA() { +cudaDeviceProp initializeCUDA(cublasHandle_t& cublasHandle) { int deviceCount; CUDA_CHECK(cudaGetDeviceCount(&deviceCount)); @@ -12,7 +12,7 @@ cudaDeviceProp initializeCUDA() { std::exit(EXIT_FAILURE); } - int device = 0; // You can modify this to choose a different GPU + int device = 0; CUDA_CHECK(cudaSetDevice(device)); cudaDeviceProp deviceProp; @@ -20,5 +20,8 @@ cudaDeviceProp initializeCUDA() { std::printf("Using CUDA device %d: %s\n", device, deviceProp.name); + // Initialize cuBLAS + CUBLAS_CHECK(cublasCreate(&cublasHandle)); + return deviceProp; -} +} \ No newline at end of file