diff --git a/include/layers/dense.h b/include/layers/dense.h index 5c25457..7040f42 100644 --- a/include/layers/dense.h +++ b/include/layers/dense.h @@ -5,15 +5,17 @@ #include #include +#include namespace Layers { - class Dense { + class Dense : public ILayer { public: Dense(int inputSize, int outputSize, cublasHandle_t cublasHandle); ~Dense(); void forward(const float* input, float* output); + void to_cuda(); private: int inputSize; diff --git a/include/layers/ilayer.h b/include/layers/ilayer.h new file mode 100644 index 0000000..95e3964 --- /dev/null +++ b/include/layers/ilayer.h @@ -0,0 +1,19 @@ + +#ifndef I_LAYER_H +#define I_LAYER_H + +#include + +namespace Layers { + + class ILayer { + public: + virtual ~ILayer() {} + + virtual void forward(const float* input, float* output) = 0; + virtual void to_cuda() = 0; + }; + +} // namespace Layers + +#endif // I_LAYERH \ No newline at end of file diff --git a/include/utils/cuda_helper.h b/include/utils/cuda_helper.h index 79718a2..e24853d 100644 --- a/include/utils/cuda_helper.h +++ b/include/utils/cuda_helper.h @@ -3,6 +3,8 @@ #include +#define IDX2C(i,j,ld) (((j)*(ld))+(i)) + // CUDA error checking macro #define CUDA_CHECK(call) \ do { \ diff --git a/src/layers/dense.cpp b/src/layers/dense.cpp index c19349e..9780052 100644 --- a/src/layers/dense.cpp +++ b/src/layers/dense.cpp @@ -1,5 +1,10 @@ #include "dense.h" +#include "cuda_helper.h" +#include #include +#include +#include + Layers::Dense::Dense(int inputSize, int outputSize, cublasHandle_t cublasHandle) @@ -13,12 +18,10 @@ Layers::Dense::Dense(int inputSize, int outputSize, cublasHandle_t cublasHandle) initializeBiases(); // Allocate GPU memory for weights and biases - cudaMalloc((void**)&d_weights, sizeof(float) * inputSize * outputSize); - cudaMalloc((void**)&d_biases, sizeof(float) * biases.size()); + CUDA_CHECK(cudaMalloc((void**)&d_weights, sizeof(float) * inputSize * outputSize)); + CUDA_CHECK(cudaMalloc((void**)&d_biases, sizeof(float) * biases.size())); - // Copy weights and biases to GPU - cudaMemcpy(d_weights, weights.data(), sizeof(float) * inputSize * outputSize, cudaMemcpyHostToDevice); - cudaMemcpy(d_biases, biases.data(), sizeof(float) * biases.size(), cudaMemcpyHostToDevice); + to_cuda(); } Layers::Dense::~Dense() { @@ -49,4 +52,9 @@ void Layers::Dense::forward(const float* input, float* output) { // Add biases cublasSaxpy(cublasHandle, outputSize, &alpha, d_biases, 1, output, 1); +} + +void Layers::Dense::to_cuda() { + CUDA_CHECK(cudaMemcpy(d_weights, weights.data(), sizeof(float) * inputSize * outputSize, cudaMemcpyHostToDevice)); + CUDA_CHECK(cudaMemcpy(d_biases, biases.data(), sizeof(float) * biases.size(), cudaMemcpyHostToDevice)); } \ No newline at end of file