diff --git a/include/cudanet.cuh b/include/cudanet.cuh index 5705f51..7257e6c 100644 --- a/include/cudanet.cuh +++ b/include/cudanet.cuh @@ -1,16 +1,17 @@ #ifndef CUDANET_H #define CUDANET_H -// Kernels +#ifdef USE_CUDA #include "activation_functions.cuh" #include "convolution.cuh" #include "matmul.cuh" #include "pooling.cuh" +#endif // Layers #include "activation.hpp" -#include "add.cuh" -#include "avg_pooling.cuh" +#include "add.hpp" +#include "avg_pooling.hpp" #include "batch_norm.cuh" #include "concat.cuh" #include "conv2d.cuh" @@ -25,8 +26,10 @@ #include "module.hpp" // Utils -#include "cuda_helper.cuh" #include "imagenet.hpp" +#ifdef USE_CUDA +#include "cuda_helper.cuh" #include "vector.cuh" +#endif #endif // CUDANET_H \ No newline at end of file diff --git a/include/layers/avg_pooling.cuh b/include/layers/avg_pooling.hpp similarity index 71% rename from include/layers/avg_pooling.cuh rename to include/layers/avg_pooling.hpp index 28bf7de..495fa98 100644 --- a/include/layers/avg_pooling.cuh +++ b/include/layers/avg_pooling.hpp @@ -18,7 +18,7 @@ class AvgPooling2d : public SequentialLayer, public TwoDLayer { ); ~AvgPooling2d(); - float* forward(const float* d_input); + float* forward(const float* input); /** * @brief Get output size @@ -45,14 +45,32 @@ class AvgPooling2d : public SequentialLayer, public TwoDLayer { shape2d outputSize; - float* d_output; - Activation* activation; + + float* forwardCPU(const float* input); + +#ifdef USE_CUDA + float* d_output; + float* forwardCUDA(const float* d_input); + + void initCUDA(); + void delCUDA(); +#endif }; class AdaptiveAvgPooling2d : public AvgPooling2d { public: - AdaptiveAvgPooling2d(shape2d inputShape, int nChannels, shape2d outputShape, ActivationType activationType); + AdaptiveAvgPooling2d( + shape2d inputShape, + int nChannels, + shape2d outputShape, + ActivationType activationType + ); + + private: +#ifdef USE_CUDA + void initCUDA(); +#endif }; } // namespace CUDANet::Layers diff --git a/src/backends/cuda/layers/avg_pooling.cu b/src/backends/cuda/layers/avg_pooling.cu new file mode 100644 index 0000000..94b28b5 --- /dev/null +++ b/src/backends/cuda/layers/avg_pooling.cu @@ -0,0 +1,45 @@ +#include "avg_pooling.hpp" +#include "cuda_helper.cuh" +#include "pooling.cuh" + +using namespace CUDANet::Layers; + +void AvgPooling2d::initCUDA() { + d_output = nullptr; + CUDA_CHECK(cudaMalloc( + (void**)&d_output, + sizeof(float) * outputSize.first * outputSize.second * nChannels + )); +} + +void AvgPooling2d::delCUDA() { + cudaFree(d_output); +} + +float* AvgPooling2d::forwardCUDA(const float* d_input) { + dim3 block(8, 8, 8); + dim3 grid( + (outputSize.first + block.x - 1) / block.x, + (outputSize.second + block.y - 1) / block.y, + (nChannels + block.z - 1) / block.z + ); + + Kernels::avg_pooling<<>>( + d_input, d_output, inputSize, outputSize, nChannels, poolingSize, + stride, padding + ); + CUDA_CHECK(cudaGetLastError()); + + activation->activate(d_output); + CUDA_CHECK(cudaDeviceSynchronize()); + + return d_output; +} + +void AdaptiveAvgPooling2d::initCUDA() { + cudaFree(d_output); + cudaMalloc( + (void**)&d_output, + sizeof(float) * outputSize.first * outputSize.second * nChannels + ); +} diff --git a/src/layers/avg_pooling.cu b/src/layers/avg_pooling.cu index c0e6c4b..22501c4 100644 --- a/src/layers/avg_pooling.cu +++ b/src/layers/avg_pooling.cu @@ -1,15 +1,15 @@ -#include "avg_pooling.cuh" -#include "cuda_helper.cuh" -#include "pooling.cuh" +#include + +#include "avg_pooling.hpp" using namespace CUDANet::Layers; AvgPooling2d::AvgPooling2d( - shape2d inputSize, + shape2d inputSize, int nChannels, - shape2d poolingSize, - shape2d stride, - shape2d padding, + shape2d poolingSize, + shape2d stride, + shape2d padding, ActivationType activationType ) : inputSize(inputSize), @@ -18,44 +18,40 @@ AvgPooling2d::AvgPooling2d( stride(stride), padding(padding) { outputSize = { - (inputSize.first + 2 * padding.first - poolingSize.first) / stride.first + 1, - (inputSize.second + 2 * padding.second - poolingSize.second) / stride.second + 1 + (inputSize.first + 2 * padding.first - poolingSize.first) / + stride.first + + 1, + (inputSize.second + 2 * padding.second - poolingSize.second) / + stride.second + + 1 }; activation = new Activation( activationType, outputSize.first * outputSize.second * nChannels ); - d_output = nullptr; - CUDA_CHECK(cudaMalloc( - (void**)&d_output, - sizeof(float) * outputSize.first * outputSize.second * nChannels - )); +#ifdef USE_CUDA + initCUDA(); +#endif } AvgPooling2d::~AvgPooling2d() { - cudaFree(d_output); +#ifdef USE_CUDA + delCUDA(); +#endif delete activation; } -float* AvgPooling2d::forward(const float* d_input) { - dim3 block(8, 8, 8); - dim3 grid( - (outputSize.first + block.x - 1) / block.x, - (outputSize.second + block.y - 1) / block.y, - (nChannels + block.z - 1) / block.z - ); +float* AvgPooling2d::forwardCPU(const float* input) { + throw std::logic_error("Not implemented"); +} - Kernels::avg_pooling<<>>( - d_input, d_output, inputSize, outputSize, nChannels, poolingSize, - stride, padding - ); - CUDA_CHECK(cudaGetLastError()); - - activation->activate(d_output); - CUDA_CHECK(cudaDeviceSynchronize()); - - return d_output; +float* AvgPooling2d::forward(const float* input) { +#ifdef USE_CUDA + return forwardCUDA(input); +#else + return forwardCPU(input); +#endif } int AvgPooling2d::getOutputSize() { @@ -70,22 +66,36 @@ shape2d AvgPooling2d::getOutputDims() { return outputSize; } -AdaptiveAvgPooling2d::AdaptiveAvgPooling2d(shape2d inputShape, int nChannels, shape2d outputShape, ActivationType activationType) - : AvgPooling2d(inputShape, nChannels, {1, 1}, {1, 1}, {0, 0}, activationType) { - - stride = {inputShape.first / outputShape.first, inputShape.second / outputShape.second}; +AdaptiveAvgPooling2d::AdaptiveAvgPooling2d( + shape2d inputShape, + int nChannels, + shape2d outputShape, + ActivationType activationType +) + : AvgPooling2d( + inputShape, + nChannels, + {1, 1}, + {1, 1}, + {0, 0}, + activationType + ) { + stride = { + inputShape.first / outputShape.first, + inputShape.second / outputShape.second + }; poolingSize = { inputShape.first - (outputShape.first - 1) * stride.first, inputShape.second - (outputShape.second - 1) * stride.second }; - padding = { - (poolingSize.first - 1) / 2, - (poolingSize.second - 1) / 2 - }; - outputSize = outputShape; + padding = {(poolingSize.first - 1) / 2, (poolingSize.second - 1) / 2}; + outputSize = outputShape; - activation = new Activation(activationType, outputSize.first * outputSize.second * nChannels); + activation = new Activation( + activationType, outputSize.first * outputSize.second * nChannels + ); - cudaFree(d_output); - cudaMalloc((void**)&d_output, sizeof(float) * outputSize.first * outputSize.second * nChannels); +#ifdef USE_CUDA + initCUDA(); +#endif } \ No newline at end of file diff --git a/test/cuda/layers/test_avg_pooling.cu b/test/cuda/layers/test_avg_pooling.cu index d13d7f6..8d2a7c5 100644 --- a/test/cuda/layers/test_avg_pooling.cu +++ b/test/cuda/layers/test_avg_pooling.cu @@ -3,7 +3,7 @@ #include -#include "avg_pooling.cuh" +#include "avg_pooling.hpp" class AvgPoolingLayerTest : public ::testing::Test { protected: