From 5e663b902985ec185dd1b95263e93f64b9e08ab6 Mon Sep 17 00:00:00 2001 From: LordMathis Date: Sat, 20 Apr 2024 19:09:00 +0200 Subject: [PATCH] Fix bias in conv layer --- include/kernels/convolution.cuh | 2 ++ src/kernels/convolution.cu | 3 ++- src/layers/conv2d.cu | 13 ++++--------- 3 files changed, 8 insertions(+), 10 deletions(-) diff --git a/include/kernels/convolution.cuh b/include/kernels/convolution.cuh index c827690..f4f94a7 100644 --- a/include/kernels/convolution.cuh +++ b/include/kernels/convolution.cuh @@ -10,6 +10,7 @@ namespace CUDANet::Kernels { * * @param d_input Device pointer to the input matrix * @param d_kernel Device pointer to the convolution kernel + * @param d_bias Device pointer to the bias * @param d_output Device pointer to the output matrix * @param inputSize Width and height of the input matrix * @param nChannels Number of channels in the input matrix @@ -21,6 +22,7 @@ namespace CUDANet::Kernels { __global__ void convolution( const float* __restrict__ d_input, const float* __restrict__ d_kernel, + const float* __restrict__ d_bias, float* __restrict__ d_output, const int inputSize, const int nChannels, diff --git a/src/kernels/convolution.cu b/src/kernels/convolution.cu index 9c7a6b3..64a227a 100644 --- a/src/kernels/convolution.cu +++ b/src/kernels/convolution.cu @@ -7,6 +7,7 @@ using namespace CUDANet; __global__ void Kernels::convolution( const float* __restrict__ d_input, const float* __restrict__ d_kernel, + const float* __restrict__ d_bias, float* __restrict__ d_output, const int inputSize, const int nChannels, @@ -51,5 +52,5 @@ __global__ void Kernels::convolution( } } - d_output[f * outputSize * outputSize + i * outputSize + j] = sum; + d_output[f * outputSize * outputSize + i * outputSize + j] = sum + d_bias[f]; } \ No newline at end of file diff --git a/src/layers/conv2d.cu b/src/layers/conv2d.cu index 3713d07..4d7bbcd 100644 --- a/src/layers/conv2d.cu +++ b/src/layers/conv2d.cu @@ -42,12 +42,12 @@ Conv2d::Conv2d( sizeof(float) * kernelSize * kernelSize * inputChannels * numFilters )); - biases.resize(outputSize * outputSize * numFilters); + biases.resize(numFilters); initializeBiases(); d_biases = nullptr; CUDA_CHECK(cudaMalloc( - (void**)&d_biases, sizeof(float) * outputSize * outputSize * numFilters + (void**)&d_biases, sizeof(float) * numFilters )); toCuda(); @@ -94,7 +94,7 @@ void Conv2d::toCuda() { CUDA_CHECK(cudaMemcpy( d_biases, biases.data(), - sizeof(float) * outputSize * outputSize * numFilters, + sizeof(float) * numFilters, cudaMemcpyHostToDevice )); } @@ -109,15 +109,10 @@ float* Conv2d::forward(const float* d_input) { ); Kernels::convolution<<>>( - d_input, d_weights, d_output, inputSize, inputChannels, paddingSize, + d_input, d_weights, d_biases, d_output, inputSize, inputChannels, paddingSize, kernelSize, stride, numFilters, outputSize ); - // Add bias - Kernels::vec_vec_add<<<1, biases.size()>>>( - d_biases, d_output, d_output, biases.size() - ); - // Apply activation activation.activate(d_output);