diff --git a/include/kernels/pooling.cuh b/include/kernels/pooling.cuh index 35d6dda..7e5a0aa 100644 --- a/include/kernels/pooling.cuh +++ b/include/kernels/pooling.cuh @@ -13,7 +13,8 @@ __global__ void max_pooling( const dim2d outputSize, const int nChannels, const dim2d poolingSize, - const dim2d stride + const dim2d stride, + const dim2d padding ); __global__ void avg_pooling( diff --git a/include/layers/max_pooling.cuh b/include/layers/max_pooling.cuh index 020e643..7cef5f5 100644 --- a/include/layers/max_pooling.cuh +++ b/include/layers/max_pooling.cuh @@ -13,6 +13,7 @@ class MaxPooling2d : public SequentialLayer, public TwoDLayer { int nChannels, dim2d poolingSize, dim2d stride, + dim2d padding, ActivationType activationType ); ~MaxPooling2d(); @@ -40,6 +41,7 @@ class MaxPooling2d : public SequentialLayer, public TwoDLayer { int nChannels; dim2d poolingSize; dim2d stride; + dim2d padding; dim2d outputSize; diff --git a/src/kernels/pooling.cu b/src/kernels/pooling.cu index ffc520f..5a27555 100644 --- a/src/kernels/pooling.cu +++ b/src/kernels/pooling.cu @@ -11,7 +11,8 @@ __global__ void Kernels::max_pooling( const dim2d outputSize, const int nChannels, const dim2d poolingSize, - const dim2d stride + const dim2d stride, + const dim2d padding ) { int j = blockDim.x * blockIdx.x + threadIdx.x; int i = blockDim.y * blockIdx.y + threadIdx.y; @@ -25,12 +26,16 @@ __global__ void Kernels::max_pooling( for (int k = 0; k < poolingSize.first; k++) { for (int l = 0; l < poolingSize.second; l++) { - int inputIndex = c * inputSize.first * inputSize.second + - (i * stride.first + k) * inputSize.second + - (j * stride.second + l); + int inputRow = i * stride.first + k - padding.first; + int inputCol = j * stride.second + l - padding.second; - if (d_input[inputIndex] > max) { - max = d_input[inputIndex]; + if (inputRow >= 0 && inputRow < inputSize.first && inputCol >= 0 && + inputCol < inputSize.second) { + int inputIndex = c * inputSize.first * inputSize.second + + inputRow * inputSize.second + inputCol; + if (d_input[inputIndex] > max) { + max = d_input[inputIndex]; + } } } } @@ -62,12 +67,11 @@ __global__ void Kernels::avg_pooling( for (int k = 0; k < poolingSize.first; k++) { for (int l = 0; l < poolingSize.second; l++) { - int inputRow = i * stride.first + k - padding.first; int inputCol = j * stride.second + l - padding.second; - if (inputRow >= 0 && inputRow < inputSize.first && - inputCol >= 0 && inputCol < inputSize.second) { + if (inputRow >= 0 && inputRow < inputSize.first && inputCol >= 0 && + inputCol < inputSize.second) { int inputIndex = c * inputSize.first * inputSize.second + inputRow * inputSize.second + inputCol; sum += d_input[inputIndex]; diff --git a/src/layers/max_pooling.cu b/src/layers/max_pooling.cu index f1819cd..cd50b5f 100644 --- a/src/layers/max_pooling.cu +++ b/src/layers/max_pooling.cu @@ -9,23 +9,31 @@ MaxPooling2d::MaxPooling2d( int nChannels, dim2d poolingSize, dim2d stride, + dim2d padding, ActivationType activationType ) : inputSize(inputSize), nChannels(nChannels), poolingSize(poolingSize), - stride(stride) { + stride(stride), + padding(padding) { outputSize = { - (inputSize.first - poolingSize.first) / stride.first + 1, - (inputSize.second - poolingSize.second) / stride.second + 1 + (inputSize.first + 2 * padding.first - poolingSize.first) / + stride.first + + 1, + (inputSize.second + 2 * padding.second - poolingSize.second) / + stride.second + + 1 }; - activation = - new Activation(activationType, outputSize.first * outputSize.second * nChannels); + activation = new Activation( + activationType, outputSize.first * outputSize.second * nChannels + ); d_output = nullptr; CUDA_CHECK(cudaMalloc( - (void**)&d_output, sizeof(float) * outputSize.first * outputSize.second * nChannels + (void**)&d_output, + sizeof(float) * outputSize.first * outputSize.second * nChannels )); } @@ -43,7 +51,8 @@ float* MaxPooling2d::forward(const float* d_input) { ); Kernels::max_pooling<<>>( - d_input, d_output, inputSize, outputSize, nChannels, poolingSize, stride + d_input, d_output, inputSize, outputSize, nChannels, poolingSize, + stride, padding ); CUDA_CHECK(cudaGetLastError()); diff --git a/test/layers/test_max_pooling.cu b/test/layers/test_max_pooling.cu index bc883f5..a25e88c 100644 --- a/test/layers/test_max_pooling.cu +++ b/test/layers/test_max_pooling.cu @@ -11,6 +11,7 @@ class MaxPoolingLayerTest : public ::testing::Test { int nChannels; dim2d poolingSize; dim2d stride; + dim2d padding; std::vector input; std::vector expected; @@ -35,7 +36,7 @@ class MaxPoolingLayerTest : public ::testing::Test { cudaError_t cudaStatus; maxPoolingLayer = new CUDANet::Layers::MaxPooling2d( - inputSize, nChannels, poolingSize, stride, + inputSize, nChannels, poolingSize, stride, padding, CUDANet::Layers::ActivationType::NONE ); @@ -71,6 +72,7 @@ TEST_F(MaxPoolingLayerTest, MaxPoolForwardTest) { nChannels = 2; poolingSize = {2, 2}; stride = {2, 2}; + padding = {0, 0}; input = { // clang-format off @@ -97,6 +99,7 @@ TEST_F(MaxPoolingLayerTest, MaxPoolForwardNonSquareInputTest) { nChannels = 2; poolingSize = {2, 2}; stride = {2, 2}; + padding = {0, 0}; input = {// Channel 0 0.573f, 0.619f, 0.732f, 0.055f, 0.123f, 0.234f, 0.243f, 0.316f, @@ -118,6 +121,7 @@ TEST_F(MaxPoolingLayerTest, MaxPoolForwardNonSquarePoolSizeTest) { nChannels = 2; poolingSize = {2, 3}; // Non-square pooling size stride = {2, 2}; + padding = {0, 0}; input = { // clang-format off @@ -145,6 +149,7 @@ TEST_F(MaxPoolingLayerTest, MaxPoolForwardNonSquareStrideTest) { nChannels = 2; poolingSize = {2, 2}; stride = {1, 2}; // Non-square stride + padding = {0, 0}; input = { // clang-format off @@ -165,4 +170,32 @@ TEST_F(MaxPoolingLayerTest, MaxPoolForwardNonSquareStrideTest) { runTest(); +} + +TEST_F(MaxPoolingLayerTest, MaxPoolForwardNonSquarePaddingTest) { + inputSize = {4, 4}; + nChannels = 2; + poolingSize = {2, 2}; + stride = {2, 2}; // Non-square stride + padding = {0, 1}; + + input = { + // clang-format off + // Channel 0 + 0.573f, 0.619f, 0.732f, 0.055f, + 0.243f, 0.316f, 0.573f, 0.619f, + 0.712f, 0.055f, 0.243f, 0.316f, + 0.573f, 0.619f, 0.742f, 0.055f, + // Channel 1 + 0.473f, 0.919f, 0.107f, 0.073f, + 0.073f, 0.362f, 0.973f, 0.059f, + 0.473f, 0.455f, 0.283f, 0.416f, + 0.532f, 0.819f, 0.732f, 0.850f + // clang-format on + }; + + expected = {0.573f, 0.732f, 0.619f, 0.712f, 0.742f, 0.316f, 0.473f, 0.973f, 0.073f, 0.532f, 0.819f, 0.85f}; + + runTest(); + } \ No newline at end of file diff --git a/test/model/test_model.cu b/test/model/test_model.cu index 571cfe4..c9b397c 100644 --- a/test/model/test_model.cu +++ b/test/model/test_model.cu @@ -45,7 +45,7 @@ class ModelTest : public ::testing::Test { CUDANet::Layers::MaxPooling2d *maxpool2d = new CUDANet::Layers::MaxPooling2d( poolingInput, numFilters, poolingSize, - poolingStride, CUDANet::Layers::ActivationType::RELU + poolingStride, {0, 0}, CUDANet::Layers::ActivationType::RELU ); model->addLayer("maxpool1", maxpool2d); diff --git a/tools/pooling_test.py b/tools/pooling_test.py index 3e48381..97f38ba 100644 --- a/tools/pooling_test.py +++ b/tools/pooling_test.py @@ -62,6 +62,14 @@ def gen_max_pool_non_square_stride_test_result(): print_cpp_vector(output) +def gen_max_pool_non_square_padding_test_result(): + input = _get_pool_input() + + output = torch.nn.MaxPool2d(kernel_size=2, stride=2, padding=(0, 1))(input) + output = torch.flatten(output) + + print_cpp_vector(output) + def gen_avg_pool_test_result(): @@ -123,6 +131,8 @@ if __name__ == "__main__": gen_max_non_square_pool_test_result() print("Max pool non square stride test:") gen_max_pool_non_square_stride_test_result() + print("Max pool non square padding test:") + gen_max_pool_non_square_padding_test_result() print("--------------")