From d177a67cd6110a521c0dcdfe3713855dae7319c4 Mon Sep 17 00:00:00 2001 From: LordMathis Date: Sat, 9 Mar 2024 23:03:23 +0100 Subject: [PATCH] Add bias to conv2d --- include/layers/conv2d.cuh | 3 +++ src/layers/conv2d.cu | 29 +++++++++++++++++++++++++---- 2 files changed, 28 insertions(+), 4 deletions(-) diff --git a/include/layers/conv2d.cuh b/include/layers/conv2d.cuh index 3ec73ca..6c64240 100644 --- a/include/layers/conv2d.cuh +++ b/include/layers/conv2d.cuh @@ -42,15 +42,18 @@ class Conv2d { // Kernels std::vector kernels; + std::vector biases; // Cuda float* d_kernels; + float* d_biases; float* d_padded; // Kernels Activation activation; void initializeKernels(); + void initializeBiases(); void toCuda(); }; diff --git a/src/layers/conv2d.cu b/src/layers/conv2d.cu index e941104..05d7f60 100644 --- a/src/layers/conv2d.cu +++ b/src/layers/conv2d.cu @@ -5,6 +5,7 @@ #include "conv2d.cuh" #include "convolution.cuh" #include "cuda_helper.cuh" +#include "matrix_math.cuh" #include "padding.cuh" Layers::Conv2d::Conv2d( @@ -33,25 +34,33 @@ Layers::Conv2d::Conv2d( } kernels.resize(kernelSize * kernelSize * inputChannels * numFilters); - initializeKernels(); + initializeKernels(); d_kernels = nullptr; - CUDA_CHECK( cudaMalloc((void**)&d_kernels, sizeof(float) * kernelSize * kernelSize * inputChannels * numFilters) ); - toCuda(); + + biases.resize(outputSize * outputSize * numFilters); + initializeBiases(); + + d_biases = nullptr; + CUDA_CHECK( + cudaMalloc((void**)&d_biases, sizeof(float) * outputSize * outputSize * numFilters) + ); d_padded = nullptr; - CUDA_CHECK(cudaMalloc( (void**)&d_padded, sizeof(float) * (inputSize + 2 * paddingSize) * (inputSize + 2 * paddingSize) * inputChannels )); + + toCuda(); } Layers::Conv2d::~Conv2d() { cudaFree(d_kernels); + cudaFree(d_biases); cudaFree(d_padded); } @@ -59,6 +68,10 @@ void Layers::Conv2d::initializeKernels() { std::fill(kernels.begin(), kernels.end(), 0.0f); } +void Layers::Conv2d::initializeBiases() { + std::fill(biases.begin(), biases.end(), 0.0f); +} + void Layers::Conv2d::setKernels(const std::vector& kernels_input) { std::copy(kernels_input.begin(), kernels_input.end(), kernels.begin()); toCuda(); @@ -69,6 +82,11 @@ void Layers::Conv2d::toCuda() { d_kernels, kernels.data(), sizeof(float) * kernelSize * kernelSize * numFilters, cudaMemcpyHostToDevice )); + + CUDA_CHECK(cudaMemcpy( + d_biases, biases.data(), sizeof(float) * outputSize * outputSize * numFilters, + cudaMemcpyHostToDevice + )); } void Layers::Conv2d::forward(const float* d_input, float* d_output) { @@ -85,6 +103,9 @@ void Layers::Conv2d::forward(const float* d_input, float* d_output) { d_padded, d_kernels, d_output, inputSize + (2 * paddingSize), inputChannels, kernelSize, stride, numFilters, outputSize ); + // Add bias + vec_vec_add_kernel<<<1, biases.size()>>>(d_biases, d_output, d_output, biases.size()); + CUDA_CHECK(cudaDeviceSynchronize()); }