diff --git a/include/cudanet.cuh b/include/cudanet.cuh index 528aa45..5705f51 100644 --- a/include/cudanet.cuh +++ b/include/cudanet.cuh @@ -8,15 +8,15 @@ #include "pooling.cuh" // Layers -#include "activation.cuh" +#include "activation.hpp" #include "add.cuh" #include "avg_pooling.cuh" #include "batch_norm.cuh" #include "concat.cuh" #include "conv2d.cuh" -#include "dense.cuh" +#include "dense.hpp" #include "input.cuh" -#include "layer.cuh" +#include "layer.hpp" #include "max_pooling.cuh" #include "output.cuh" diff --git a/include/kernels/convolution.cuh b/include/kernels/convolution.cuh index 87ec79e..e368864 100644 --- a/include/kernels/convolution.cuh +++ b/include/kernels/convolution.cuh @@ -2,7 +2,7 @@ #define CUDANET_CONVOLUTION_H #include -#include "layer.cuh" +#include "layer.hpp" namespace CUDANet::Kernels { diff --git a/include/kernels/pooling.cuh b/include/kernels/pooling.cuh index 79ab7f6..3ca8578 100644 --- a/include/kernels/pooling.cuh +++ b/include/kernels/pooling.cuh @@ -2,7 +2,7 @@ #define CUDANET_POOLING_H #include -#include "layer.cuh" +#include "layer.hpp" namespace CUDANet::Kernels { diff --git a/include/layers/activation.hpp b/include/layers/activation.hpp index 5c8f28b..c157225 100644 --- a/include/layers/activation.hpp +++ b/include/layers/activation.hpp @@ -48,6 +48,8 @@ class Activation { ActivationType activationType; int length; + void activateCPU(float* input); + #ifdef USE_CUDA int gridSize; @@ -58,10 +60,7 @@ class Activation { void initCUDA(); void delCUDA(); -#else - void activateCPU(float* input); #endif - }; diff --git a/include/layers/avg_pooling.cuh b/include/layers/avg_pooling.cuh index 7f52b58..28bf7de 100644 --- a/include/layers/avg_pooling.cuh +++ b/include/layers/avg_pooling.cuh @@ -1,8 +1,8 @@ #ifndef CUDANET_AVG_POOLING_H #define CUDANET_AVG_POOLING_H -#include "activation.cuh" -#include "layer.cuh" +#include "activation.hpp" +#include "layer.hpp" namespace CUDANet::Layers { diff --git a/include/layers/batch_norm.cuh b/include/layers/batch_norm.cuh index c940d93..45402ec 100644 --- a/include/layers/batch_norm.cuh +++ b/include/layers/batch_norm.cuh @@ -3,8 +3,8 @@ #include -#include "activation.cuh" -#include "layer.cuh" +#include "activation.hpp" +#include "layer.hpp" namespace CUDANet::Layers { diff --git a/include/layers/conv2d.cuh b/include/layers/conv2d.cuh index f1420b8..a5e1cf8 100644 --- a/include/layers/conv2d.cuh +++ b/include/layers/conv2d.cuh @@ -3,9 +3,9 @@ #include -#include "activation.cuh" +#include "activation.hpp" #include "convolution.cuh" -#include "layer.cuh" +#include "layer.hpp" namespace CUDANet::Layers { diff --git a/include/layers/dense.cuh b/include/layers/dense.hpp similarity index 91% rename from include/layers/dense.cuh rename to include/layers/dense.hpp index 1624fcc..24fc2d4 100644 --- a/include/layers/dense.cuh +++ b/include/layers/dense.hpp @@ -3,8 +3,8 @@ #include -#include "activation.cuh" -#include "layer.cuh" +#include "activation.hpp" +#include "layer.hpp" namespace CUDANet::Layers { @@ -84,20 +84,11 @@ class Dense : public WeightedLayer { int inputSize; int outputSize; - float* d_output; - - float* d_weights; - float* d_biases; - std::vector weights; std::vector biases; Layers::Activation* activation; - // Precompute kernel launch parameters - int forwardGridSize; - int biasGridSize; - /** * @brief Initialize the weights to zeros * @@ -110,11 +101,30 @@ class Dense : public WeightedLayer { */ void initializeBiases(); + float* forwardCPU(const float* input); + +#ifdef USE_CUDA + float* d_output; + + float* d_weights; + float* d_biases; + + // Precompute kernel launch parameters + int forwardGridSize; + int biasGridSize; + /** * @brief Copy the weights and biases to the device * */ void toCuda(); + + void initCUDA(); + void delCUDA(); + + float* forwardCUDA(const float* d_input); +#endif + }; } // namespace CUDANet::Layers diff --git a/include/layers/input.cuh b/include/layers/input.cuh index 52e43bd..21fb5c4 100644 --- a/include/layers/input.cuh +++ b/include/layers/input.cuh @@ -1,7 +1,7 @@ #ifndef CUDANET_INPUT_LAYER_H #define CUDANET_INPUT_LAYER_H -#include "layer.cuh" +#include "layer.hpp" namespace CUDANet::Layers { diff --git a/include/layers/layer.cuh b/include/layers/layer.hpp similarity index 99% rename from include/layers/layer.cuh rename to include/layers/layer.hpp index 08b6aa2..a175acf 100644 --- a/include/layers/layer.cuh +++ b/include/layers/layer.hpp @@ -111,10 +111,12 @@ class WeightedLayer : public SequentialLayer { */ virtual void initializeBiases() = 0; +#ifdef USE_CUDA /** * @brief Copy the weights and biases to the device */ virtual void toCuda() = 0; +#endif }; } // namespace CUDANet::Layers diff --git a/include/layers/max_pooling.cuh b/include/layers/max_pooling.cuh index af52aae..74727e8 100644 --- a/include/layers/max_pooling.cuh +++ b/include/layers/max_pooling.cuh @@ -1,8 +1,8 @@ #ifndef CUDANET_MAX_POOLING_H #define CUDANET_MAX_POOLING_H -#include "activation.cuh" -#include "layer.cuh" +#include "activation.hpp" +#include "layer.hpp" namespace CUDANet::Layers { diff --git a/include/layers/output.cuh b/include/layers/output.cuh index a8531de..40f2b29 100644 --- a/include/layers/output.cuh +++ b/include/layers/output.cuh @@ -1,7 +1,7 @@ #ifndef CUDANET_OUTPUT_LAYER_H #define CUDANET_OUTPUT_LAYER_H -#include "layer.cuh" +#include "layer.hpp" namespace CUDANet::Layers { diff --git a/include/model/model.hpp b/include/model/model.hpp index 5a2d777..a6ea7fd 100644 --- a/include/model/model.hpp +++ b/include/model/model.hpp @@ -6,7 +6,7 @@ #include #include "input.cuh" -#include "layer.cuh" +#include "layer.hpp" #include "module.hpp" #include "output.cuh" diff --git a/include/model/module.hpp b/include/model/module.hpp index 57c641e..e6ae17b 100644 --- a/include/model/module.hpp +++ b/include/model/module.hpp @@ -5,7 +5,7 @@ #include #include -#include "layer.cuh" +#include "layer.hpp" namespace CUDANet { diff --git a/src/backends/cuda/kernels/pooling.cu b/src/backends/cuda/kernels/pooling.cu index 993cc0c..dbb5e09 100644 --- a/src/backends/cuda/kernels/pooling.cu +++ b/src/backends/cuda/kernels/pooling.cu @@ -1,5 +1,5 @@ #include "cuda_helper.cuh" -#include "layer.cuh" +#include "layer.hpp" #include "pooling.cuh" using namespace CUDANet; diff --git a/src/layers/dense.cu b/src/backends/cuda/layers/dense.cu similarity index 56% rename from src/layers/dense.cu rename to src/backends/cuda/layers/dense.cu index 6a13d94..f334376 100644 --- a/src/layers/dense.cu +++ b/src/backends/cuda/layers/dense.cu @@ -6,26 +6,14 @@ #include #include "vector.cuh" -#include "activation.cuh" +#include "activation.hpp" #include "cuda_helper.cuh" -#include "dense.cuh" +#include "dense.hpp" #include "matmul.cuh" using namespace CUDANet::Layers; -Dense::Dense( - int inputSize, - int outputSize, - ActivationType activationType -) - : inputSize(inputSize), outputSize(outputSize) { - // Allocate memory for weights and biases - weights.resize(outputSize * inputSize); - biases.resize(outputSize); - - initializeWeights(); - initializeBiases(); - +void Dense::initCUDA() { d_output = nullptr; CUDA_CHECK(cudaMalloc((void**)&d_output, sizeof(float) * outputSize)); @@ -44,27 +32,26 @@ Dense::Dense( forwardGridSize = (std::max(inputSize, outputSize) + BLOCK_SIZE - 1) / BLOCK_SIZE; biasGridSize = (outputSize + BLOCK_SIZE - 1) / BLOCK_SIZE; - - activation = new Activation(activationType, outputSize); } -Dense::~Dense() { +void Dense::delCUDA() { cudaFree(d_output); cudaFree(d_weights); cudaFree(d_biases); - delete activation; } -void Dense::initializeWeights() { - std::fill(weights.begin(), weights.end(), 0.0f); +void Dense::toCuda() { + CUDA_CHECK(cudaMemcpy( + d_weights, weights.data(), sizeof(float) * inputSize * outputSize, + cudaMemcpyHostToDevice + )); + CUDA_CHECK(cudaMemcpy( + d_biases, biases.data(), sizeof(float) * outputSize, + cudaMemcpyHostToDevice + )); } -void Dense::initializeBiases() { - std::fill(biases.begin(), biases.end(), 0.0f); -} - -float* Dense::forward(const float* d_input) { - +float* Dense::forwardCUDA(const float* d_input) { Kernels::mat_vec_mul<<>>( d_weights, d_input, d_output, inputSize, outputSize ); @@ -80,40 +67,3 @@ float* Dense::forward(const float* d_input) { return d_output; } - -void Dense::toCuda() { - CUDA_CHECK(cudaMemcpy( - d_weights, weights.data(), sizeof(float) * inputSize * outputSize, - cudaMemcpyHostToDevice - )); - CUDA_CHECK(cudaMemcpy( - d_biases, biases.data(), sizeof(float) * outputSize, - cudaMemcpyHostToDevice - )); -} - -void Dense::setWeights(const float* weights_input) { - std::copy(weights_input, weights_input + weights.size(), weights.begin()); - toCuda(); -} - -std::vector Dense::getWeights() { - return weights; -} - -void Dense::setBiases(const float* biases_input) { - std::copy(biases_input, biases_input + biases.size(), biases.begin()); - toCuda(); -} - -std::vector Dense::getBiases() { - return biases; -} - -int Dense::getOutputSize() { - return outputSize; -} - -int Dense::getInputSize() { - return inputSize; -} \ No newline at end of file diff --git a/src/layers/batch_norm.cu b/src/layers/batch_norm.cu index b086525..02c33cb 100644 --- a/src/layers/batch_norm.cu +++ b/src/layers/batch_norm.cu @@ -1,9 +1,9 @@ #include -#include "activation.cuh" +#include "activation.hpp" #include "batch_norm.cuh" #include "cuda_helper.cuh" -#include "layer.cuh" +#include "layer.hpp" #include "matmul.cuh" #include "vector.cuh" diff --git a/src/layers/conv2d.cu b/src/layers/conv2d.cu index bd61555..c4374ec 100644 --- a/src/layers/conv2d.cu +++ b/src/layers/conv2d.cu @@ -1,11 +1,11 @@ #include #include -#include "activation.cuh" +#include "activation.hpp" #include "conv2d.cuh" #include "convolution.cuh" #include "cuda_helper.cuh" -#include "layer.cuh" +#include "layer.hpp" #include "matmul.cuh" #include "vector.cuh" diff --git a/src/layers/dense.cpp b/src/layers/dense.cpp new file mode 100644 index 0000000..61f9ab1 --- /dev/null +++ b/src/layers/dense.cpp @@ -0,0 +1,80 @@ +#include "dense.hpp" + +#include + +#include "activation.hpp" + +using namespace CUDANet::Layers; + +Dense::Dense(int inputSize, int outputSize, ActivationType activationType) + : inputSize(inputSize), outputSize(outputSize) { + // Allocate memory for weights and biases + weights.resize(outputSize * inputSize); + biases.resize(outputSize); + + initializeWeights(); + initializeBiases(); + + activation = new Activation(activationType, outputSize); + +#ifdef USE_CUDA + initCUDA(); +#endif +} + +Dense::~Dense() { + delete activation; +#ifdef USE_CUDA + delCUDA(); +#endif +} + +void Dense::initializeWeights() { + std::fill(weights.begin(), weights.end(), 0.0f); +} + +void Dense::initializeBiases() { + std::fill(biases.begin(), biases.end(), 0.0f); +} + +float* Dense::forwardCPU(const float* input) { + throw std::logic_error("Not implemented"); +} + +float* Dense::forward(const float* input) { +#ifdef USE_CUDA + return forwardCUDA(input); +#else + return forwardCPU(input); +#endif +} + +void Dense::setWeights(const float* weights_input) { + std::copy(weights_input, weights_input + weights.size(), weights.begin()); +#ifdef USE_CUDA + toCuda(); +#endif +} + +std::vector Dense::getWeights() { + return weights; +} + +void Dense::setBiases(const float* biases_input) { + std::copy(biases_input, biases_input + biases.size(), biases.begin()); +#ifdef USE_CUDA + toCuda(); +#endif +} + +std::vector Dense::getBiases() { + return biases; +} + +int Dense::getOutputSize() { + return outputSize; +} + +int Dense::getInputSize() { + return inputSize; +} \ No newline at end of file diff --git a/src/model/model.cpp b/src/model/model.cpp index ba5b88c..729ac81 100644 --- a/src/model/model.cpp +++ b/src/model/model.cpp @@ -8,7 +8,7 @@ #include #include "input.cuh" -#include "layer.cuh" +#include "layer.hpp" #include "batch_norm.cuh" using namespace CUDANet; diff --git a/test/cuda/layers/test_activation.cu b/test/cuda/layers/test_activation.cu index 7452fb4..5c7c95e 100644 --- a/test/cuda/layers/test_activation.cu +++ b/test/cuda/layers/test_activation.cu @@ -1,4 +1,4 @@ -#include "activation.cuh" +#include "activation.hpp" #include #include #include diff --git a/test/cuda/layers/test_batch_norm.cu b/test/cuda/layers/test_batch_norm.cu index 904eb44..e6ea673 100644 --- a/test/cuda/layers/test_batch_norm.cu +++ b/test/cuda/layers/test_batch_norm.cu @@ -3,7 +3,7 @@ #include -#include "activation.cuh" +#include "activation.hpp" #include "batch_norm.cuh" class BatchNormLayerTest : public ::testing::Test { diff --git a/test/cuda/layers/test_dense.cu b/test/cuda/layers/test_dense.cu index 96fc8f3..1a60d68 100644 --- a/test/cuda/layers/test_dense.cu +++ b/test/cuda/layers/test_dense.cu @@ -3,8 +3,8 @@ #include -#include "activation.cuh" -#include "dense.cuh" +#include "activation.hpp" +#include "dense.hpp" class DenseLayerTest : public ::testing::Test { protected: diff --git a/test/model/test_model.cpp b/test/model/test_model.cpp index d5dca0d..876284c 100644 --- a/test/model/test_model.cpp +++ b/test/model/test_model.cpp @@ -1,7 +1,7 @@ #include #include "conv2d.cuh" -#include "dense.cuh" +#include "dense.hpp" #include "max_pooling.cuh" #include "model.hpp"