From 019ccc33d922204b24ce732078c7dde6bbd60bee Mon Sep 17 00:00:00 2001 From: LordMathis Date: Thu, 29 Feb 2024 22:21:48 +0100 Subject: [PATCH] Start implementing padding kernel --- CMakeLists.txt | 1 + include/kernels/padding.cuh | 13 ++++++ src/kernels/padding.cu | 57 +++++++++++++++++++++++++ test/CMakeLists.txt | 6 ++- test/kernels/test_padding.cu | 82 ++++++++++++++++++++++++++++++++++++ 5 files changed, 158 insertions(+), 1 deletion(-) create mode 100644 include/kernels/padding.cuh create mode 100644 src/kernels/padding.cu create mode 100644 test/kernels/test_padding.cu diff --git a/CMakeLists.txt b/CMakeLists.txt index d2daefd..ed802bb 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -11,6 +11,7 @@ include_directories(${CUDAToolkit_INCLUDE_DIRS}) set(LIBRARY_SOURCES src/utils/cuda_helper.cu src/kernels/activations.cu + src/kernels/padding.cu src/layers/dense.cu ) diff --git a/include/kernels/padding.cuh b/include/kernels/padding.cuh new file mode 100644 index 0000000..cba01ae --- /dev/null +++ b/include/kernels/padding.cuh @@ -0,0 +1,13 @@ +#ifndef PADDING_H +#define PADDING_H + +__global__ void pad_matrix_kernel( + const float* d_input, + float* d_padded, + int w, + int h, + int n, + int p +); + +#endif // PADDING_H \ No newline at end of file diff --git a/src/kernels/padding.cu b/src/kernels/padding.cu new file mode 100644 index 0000000..ca78008 --- /dev/null +++ b/src/kernels/padding.cu @@ -0,0 +1,57 @@ +/* +Pads matrix width x height x n_channels to width + 2 * padding x height + 2 * +padding x n_channels Matrix is represented as a pointer to column major vector + +For example: + +w = 2 +h = 3 +n = 2 +p = 1 + +Channel 0: + 0 1 + 2 3 + 4 5 +Channel 1: + 6 7 + 8 9 + 10 11 + +Is represented as: + +0 2 4 1 3 5 6 8 10 7 9 11 + +Padded result: + +0 0 0 0 0 0 0 2 4 0 0 1 3 5 0 0 0 0 0 0 0 0 0 0 0 0 6 8 10 0 0 7 9 11 0 0 0 0 0 0 + + +Args: + d_input: Pointer to input vector representing matrix + d_padded: Pointer to output vector representing padded matrix (needs to be +pre-allocated) w: Width of input matrix h: Height of input matrix n: Number of +channels in input matrix p: Padding +*/ +__global__ void pad_matrix_kernel( + const float* d_input, + float* d_padded, + int w, + int h, + int n, + int p +) { + int stride = gridDim.x * blockDim.x; + int tid = blockDim.x * blockIdx.x + threadIdx.x; + + for (int i = tid; i < (w + 2 * p) * (h + 2 * p) * n; i += stride) { + + // if i is in the padding region + if (i < p * (h + 2 * p) * n || i >= (w + p) * (h + 2 * p) * n) { + d_padded[i] = 0.0f; + } else { + // if i is in the original region + d_padded[i] = d_input[(i - p * (h + 2 * p) * n) / (h + 2 * p) * w + (i - p * (h + 2 * p) * n) % (h + 2 * p)]; + } + } +} \ No newline at end of file diff --git a/test/CMakeLists.txt b/test/CMakeLists.txt index 5813516..dff831d 100644 --- a/test/CMakeLists.txt +++ b/test/CMakeLists.txt @@ -1,7 +1,11 @@ find_package(GTest REQUIRED) include_directories(${GTEST_INCLUDE_DIRS}) -add_executable(test_main layers/test_dense.cu kernels/test_activations.cu) +add_executable(test_main + layers/test_dense.cu + kernels/test_activations.cu + kernels/test_padding.cu +) add_library(test_utils test_utils/test_cublas_fixture.cu diff --git a/test/kernels/test_padding.cu b/test/kernels/test_padding.cu new file mode 100644 index 0000000..c8b89af --- /dev/null +++ b/test/kernels/test_padding.cu @@ -0,0 +1,82 @@ +#include +#include + +#include + +#include "padding.cuh" +#include "test_cublas_fixture.cuh" + +class PaddingTest : public CublasTestFixture { + protected: + cudaError_t cudaStatus; + cublasStatus_t cublasStatus; +}; + +TEST_F(PaddingTest, SimplePaddingTest) { + int w = 2; + int h = 3; + int n = 2; + int p = 1; + + float* d_input; + float* d_padded; + + int inputSize = w * h * n; + int paddedSize = (w + 2 * p) * (h + 2 * p) * n; + + cudaStatus = cudaMalloc((void**)&d_input, sizeof(float) * inputSize); + EXPECT_EQ(cudaStatus, cudaSuccess); + + cudaStatus = cudaMalloc( + (void**)&d_padded, sizeof(float) * paddedSize + ); + EXPECT_EQ(cudaStatus, cudaSuccess); + + /* + Matrix channel 0: + 0 1 + 2 3 + 4 5 + Matrix channel 1: + 6 7 + 8 9 + 10 11 + + Represented as column major vector: + + 0 2 4 1 3 5 6 8 10 7 9 11 + */ + + std::vector input = {0.0f, 2.0f, 4.0f, 1.0f, 3.0f, 5.0f, + 6.0f, 8.0f, 10.0f, 7.0f, 9.0f, 11.0f}; + + cublasStatus = + cublasSetVector(inputSize, sizeof(float), input.data(), 1, d_input, 1); + EXPECT_EQ(cublasStatus, CUBLAS_STATUS_SUCCESS); + + pad_matrix_kernel<<<1, 1>>>(d_input, d_padded, w, h, n, p); + cudaStatus = cudaDeviceSynchronize(); + EXPECT_EQ(cudaStatus, cudaSuccess); + + std::vector expectedOutput = { + 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 2.0f, 4.0f, 0.0f, + 0.0f, 1.0f, 3.0f, 5.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, + 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 6.0f, 8.0f, 10.0f, 0.0f, + 0.0f, 7.0f, 9.0f, 11.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f + }; + + std::vector output(paddedSize); + cublasStatus = cublasGetVector( + paddedSize, sizeof(float), d_padded, 1, output.data(), 1 + ); + + std::cout << "Actual output: " << std::endl; + for (int i = 0; i < paddedSize; i++) { + std::cout << output[i] << " "; + } + std::cout << std::endl; + + for (int i = 0; i < paddedSize; i++) { + EXPECT_NEAR(expectedOutput[i], output[i], 1e-5); + } +} \ No newline at end of file