Fix bias in conv layer

This commit is contained in:
2024-04-20 19:09:00 +02:00
parent d08567a563
commit 5e663b9029
3 changed files with 8 additions and 10 deletions

View File

@@ -10,6 +10,7 @@ namespace CUDANet::Kernels {
* *
* @param d_input Device pointer to the input matrix * @param d_input Device pointer to the input matrix
* @param d_kernel Device pointer to the convolution kernel * @param d_kernel Device pointer to the convolution kernel
* @param d_bias Device pointer to the bias
* @param d_output Device pointer to the output matrix * @param d_output Device pointer to the output matrix
* @param inputSize Width and height of the input matrix * @param inputSize Width and height of the input matrix
* @param nChannels Number of channels in the input matrix * @param nChannels Number of channels in the input matrix
@@ -21,6 +22,7 @@ namespace CUDANet::Kernels {
__global__ void convolution( __global__ void convolution(
const float* __restrict__ d_input, const float* __restrict__ d_input,
const float* __restrict__ d_kernel, const float* __restrict__ d_kernel,
const float* __restrict__ d_bias,
float* __restrict__ d_output, float* __restrict__ d_output,
const int inputSize, const int inputSize,
const int nChannels, const int nChannels,

View File

@@ -7,6 +7,7 @@ using namespace CUDANet;
__global__ void Kernels::convolution( __global__ void Kernels::convolution(
const float* __restrict__ d_input, const float* __restrict__ d_input,
const float* __restrict__ d_kernel, const float* __restrict__ d_kernel,
const float* __restrict__ d_bias,
float* __restrict__ d_output, float* __restrict__ d_output,
const int inputSize, const int inputSize,
const int nChannels, const int nChannels,
@@ -51,5 +52,5 @@ __global__ void Kernels::convolution(
} }
} }
d_output[f * outputSize * outputSize + i * outputSize + j] = sum; d_output[f * outputSize * outputSize + i * outputSize + j] = sum + d_bias[f];
} }

View File

@@ -42,12 +42,12 @@ Conv2d::Conv2d(
sizeof(float) * kernelSize * kernelSize * inputChannels * numFilters sizeof(float) * kernelSize * kernelSize * inputChannels * numFilters
)); ));
biases.resize(outputSize * outputSize * numFilters); biases.resize(numFilters);
initializeBiases(); initializeBiases();
d_biases = nullptr; d_biases = nullptr;
CUDA_CHECK(cudaMalloc( CUDA_CHECK(cudaMalloc(
(void**)&d_biases, sizeof(float) * outputSize * outputSize * numFilters (void**)&d_biases, sizeof(float) * numFilters
)); ));
toCuda(); toCuda();
@@ -94,7 +94,7 @@ void Conv2d::toCuda() {
CUDA_CHECK(cudaMemcpy( CUDA_CHECK(cudaMemcpy(
d_biases, biases.data(), d_biases, biases.data(),
sizeof(float) * outputSize * outputSize * numFilters, sizeof(float) * numFilters,
cudaMemcpyHostToDevice cudaMemcpyHostToDevice
)); ));
} }
@@ -109,15 +109,10 @@ float* Conv2d::forward(const float* d_input) {
); );
Kernels::convolution<<<grid, block>>>( Kernels::convolution<<<grid, block>>>(
d_input, d_weights, d_output, inputSize, inputChannels, paddingSize, d_input, d_weights, d_biases, d_output, inputSize, inputChannels, paddingSize,
kernelSize, stride, numFilters, outputSize kernelSize, stride, numFilters, outputSize
); );
// Add bias
Kernels::vec_vec_add<<<1, biases.size()>>>(
d_biases, d_output, d_output, biases.size()
);
// Apply activation // Apply activation
activation.activate(d_output); activation.activate(d_output);