diff --git a/include/backend.hpp b/include/backend.hpp index 1edb47e..02da005 100644 --- a/include/backend.hpp +++ b/include/backend.hpp @@ -62,6 +62,16 @@ class Backend { CUDANet::Shape padding_shape, CUDANet::Shape output_shape ) = 0; + + virtual CUDANet::Tensor& avgPool2d( + const CUDANet::Tensor& input, + CUDANet::Tensor& output, + CUDANet::Shape input_shape, + CUDANet::Shape pool_shape, + CUDANet::Shape stride_shape, + CUDANet::Shape padding_shape, + CUDANet::Shape output_shape + ) = 0; }; } // namespace CUDANet \ No newline at end of file diff --git a/include/backend/cuda.cuh b/include/backend/cuda.cuh index 1dbd649..2012353 100644 --- a/include/backend/cuda.cuh +++ b/include/backend/cuda.cuh @@ -49,7 +49,7 @@ class CUDA : public Backend { const CUDANet::Shape out_shape ) override; - CUDANet::Tensor& CUDA::maxPool2d( + CUDANet::Tensor& maxPool2d( const CUDANet::Tensor& input, CUDANet::Tensor& output, CUDANet::Shape input_shape, @@ -58,6 +58,16 @@ class CUDA : public Backend { CUDANet::Shape padding_shape, CUDANet::Shape output_shape ) override; + + CUDANet::Tensor& avgPool2d( + const CUDANet::Tensor& input, + CUDANet::Tensor& output, + CUDANet::Shape input_shape, + CUDANet::Shape pool_shape, + CUDANet::Shape stride_shape, + CUDANet::Shape padding_shape, + CUDANet::Shape output_shape + ) = 0; }; } // namespace CUDANet::Backend \ No newline at end of file diff --git a/include/kernels/pooling.cuh b/include/kernels/pool.cuh similarity index 100% rename from include/kernels/pooling.cuh rename to include/kernels/pool.cuh diff --git a/src/backends/cuda/kernels/pooling.cu b/src/backends/cuda/kernels/pool.cu similarity index 99% rename from src/backends/cuda/kernels/pooling.cu rename to src/backends/cuda/kernels/pool.cu index 29904fa..5c7f3c2 100644 --- a/src/backends/cuda/kernels/pooling.cu +++ b/src/backends/cuda/kernels/pool.cu @@ -1,6 +1,6 @@ #include "cuda_helper.cuh" #include "layer.hpp" -#include "pooling.cuh" +#include "pool.cuh" using namespace CUDANet; diff --git a/src/backends/cuda/layer_ops.cu b/src/backends/cuda/layer_ops.cu index 129948b..a7cfc6a 100644 --- a/src/backends/cuda/layer_ops.cu +++ b/src/backends/cuda/layer_ops.cu @@ -135,5 +135,31 @@ CUDANet::Tensor& CUDA::maxPool2d( CUDA_CHECK(cudaGetLastError()); CUDA_CHECK(cudaDeviceSynchronize()); + return output; +} + +CUDANet::Tensor& CUDA::avgPool2d( + const CUDANet::Tensor& input, + CUDANet::Tensor& output, + CUDANet::Shape input_shape, + CUDANet::Shape pool_shape, + CUDANet::Shape stride_shape, + CUDANet::Shape padding_shape, + CUDANet::Shape output_shape +) { + dim3 block(8, 8, 8); + dim3 grid( + (output_shape[0] + block.x - 1) / block.x, + (output_shape[1] + block.y - 1) / block.y, + (output_shape[2] + block.z - 1) / block.z + ); + + Kernels::avg_pool<<>>( + input.data(), output.data(), input_shape, output_shape, pool_shape, + stride_shape, padding_shape + ); + CUDA_CHECK(cudaGetLastError()); + CUDA_CHECK(cudaDeviceSynchronize()); + return output; } \ No newline at end of file diff --git a/src/backends/cuda/layers/avg_pooling.cu b/src/backends/cuda/layers/avg_pooling.cu deleted file mode 100644 index 94b28b5..0000000 --- a/src/backends/cuda/layers/avg_pooling.cu +++ /dev/null @@ -1,45 +0,0 @@ -#include "avg_pooling.hpp" -#include "cuda_helper.cuh" -#include "pooling.cuh" - -using namespace CUDANet::Layers; - -void AvgPooling2d::initCUDA() { - d_output = nullptr; - CUDA_CHECK(cudaMalloc( - (void**)&d_output, - sizeof(float) * outputSize.first * outputSize.second * nChannels - )); -} - -void AvgPooling2d::delCUDA() { - cudaFree(d_output); -} - -float* AvgPooling2d::forwardCUDA(const float* d_input) { - dim3 block(8, 8, 8); - dim3 grid( - (outputSize.first + block.x - 1) / block.x, - (outputSize.second + block.y - 1) / block.y, - (nChannels + block.z - 1) / block.z - ); - - Kernels::avg_pooling<<>>( - d_input, d_output, inputSize, outputSize, nChannels, poolingSize, - stride, padding - ); - CUDA_CHECK(cudaGetLastError()); - - activation->activate(d_output); - CUDA_CHECK(cudaDeviceSynchronize()); - - return d_output; -} - -void AdaptiveAvgPooling2d::initCUDA() { - cudaFree(d_output); - cudaMalloc( - (void**)&d_output, - sizeof(float) * outputSize.first * outputSize.second * nChannels - ); -} diff --git a/src/layers/avg_pooling.cpp b/src/layers/avg_pooling.cpp index 7ebd14b..d4b144f 100644 --- a/src/layers/avg_pooling.cpp +++ b/src/layers/avg_pooling.cpp @@ -49,23 +49,43 @@ AvgPool2d::AvgPool2d( AvgPool2d::~AvgPool2d() {} -CUDANet::Tensor& AvgPool2d::forward(CUDANet::Tensor& input); +CUDANet::Tensor& AvgPool2d::forward(CUDANet::Tensor& input) { + output.zero(); + backend->avgPool2d( + input, + output, + in_shape, + pool_shape, + stride_shape, + padding_shape, + out_shape + ); + return output; +} -CUDANet::Shape AvgPool2d::input_shape(); +CUDANet::Shape AvgPool2d::input_shape() { + return in_shape; +} -CUDANet::Shape AvgPool2d::output_shape(); +CUDANet::Shape AvgPool2d::output_shape() { + return out_shape; +} -size_t AvgPool2d::input_size(); +size_t AvgPool2d::input_size() { + return sizeof(float) * in_shape[0] * in_shape[1] * in_shape[2]; +} -size_t AvgPool2d::output_size(); +size_t AvgPool2d::output_size() { + return sizeof(float) * out_shape[0] * out_shape[1] * out_shape[3]; +} -void AvgPool2d::set_weights(void* input); +void AvgPool2d::set_weights(void* input) {} -CUDANet::Tensor& AvgPool2d::get_weights(); +CUDANet::Tensor& AvgPool2d::get_weights() {} -void AvgPool2d::set_biases(void* input); +void AvgPool2d::set_biases(void* input) {} -CUDANet::Tensor& AvgPool2d::get_biases(); +CUDANet::Tensor& AvgPool2d::get_biases() {} AdaptiveAvgPool2d::AdaptiveAvgPool2d( @@ -73,15 +93,29 @@ AdaptiveAvgPool2d::AdaptiveAvgPool2d( CUDANet::Shape output_shape, CUDANet::Backend *backend ) - : AvgPool2d(input_shape, {1, 1}, {1, 1}, {0, 0}, backend) { - stride_shape = { - input_shape[0] / output_shape[0], - input_shape[1] / output_shape[1] - }; - pool_shape = { - input_shape[0] - (output_shape[0] - 1) * stride_shape[0], - input_shape[1] - (output_shape[1] - 1) * stride_shape[1] - }; - padding_shape = {(pool_shape[0] - 1) / 2, (pool_shape[1] - 1) / 2}; + : AvgPool2d( + input_shape, + // pool_shape + { + input_shape[0] - (output_shape[0] - 1) * (input_shape[0] / output_shape[0]), + input_shape[1] - (output_shape[1] - 1) * (input_shape[1] / output_shape[1]) + }, + // stride_shape + { + input_shape[0] / output_shape[0], + input_shape[1] / output_shape[1] + }, + // padding_shape + { + (input_shape[0] - (output_shape[0] - 1) * (input_shape[0] / output_shape[0]) - 1) / 2, + (input_shape[1] - (output_shape[1] - 1) * (input_shape[1] / output_shape[1]) - 1) / 2 + }, + backend + ) { out_shape = output_shape; -} \ No newline at end of file + + output = CUDANet::Tensor( + Shape{out_shape[0] * out_shape[1] * out_shape[2]}, + CUDANet::DType::FLOAT32, backend + ); +}