mirror of
https://github.com/lordmathis/CUDANet.git
synced 2025-11-05 17:34:21 +00:00
Fix bias in conv layer
This commit is contained in:
@@ -10,6 +10,7 @@ namespace CUDANet::Kernels {
|
||||
*
|
||||
* @param d_input Device pointer to the input matrix
|
||||
* @param d_kernel Device pointer to the convolution kernel
|
||||
* @param d_bias Device pointer to the bias
|
||||
* @param d_output Device pointer to the output matrix
|
||||
* @param inputSize Width and height of the input matrix
|
||||
* @param nChannels Number of channels in the input matrix
|
||||
@@ -21,6 +22,7 @@ namespace CUDANet::Kernels {
|
||||
__global__ void convolution(
|
||||
const float* __restrict__ d_input,
|
||||
const float* __restrict__ d_kernel,
|
||||
const float* __restrict__ d_bias,
|
||||
float* __restrict__ d_output,
|
||||
const int inputSize,
|
||||
const int nChannels,
|
||||
|
||||
@@ -7,6 +7,7 @@ using namespace CUDANet;
|
||||
__global__ void Kernels::convolution(
|
||||
const float* __restrict__ d_input,
|
||||
const float* __restrict__ d_kernel,
|
||||
const float* __restrict__ d_bias,
|
||||
float* __restrict__ d_output,
|
||||
const int inputSize,
|
||||
const int nChannels,
|
||||
@@ -51,5 +52,5 @@ __global__ void Kernels::convolution(
|
||||
}
|
||||
}
|
||||
|
||||
d_output[f * outputSize * outputSize + i * outputSize + j] = sum;
|
||||
d_output[f * outputSize * outputSize + i * outputSize + j] = sum + d_bias[f];
|
||||
}
|
||||
@@ -42,12 +42,12 @@ Conv2d::Conv2d(
|
||||
sizeof(float) * kernelSize * kernelSize * inputChannels * numFilters
|
||||
));
|
||||
|
||||
biases.resize(outputSize * outputSize * numFilters);
|
||||
biases.resize(numFilters);
|
||||
initializeBiases();
|
||||
|
||||
d_biases = nullptr;
|
||||
CUDA_CHECK(cudaMalloc(
|
||||
(void**)&d_biases, sizeof(float) * outputSize * outputSize * numFilters
|
||||
(void**)&d_biases, sizeof(float) * numFilters
|
||||
));
|
||||
|
||||
toCuda();
|
||||
@@ -94,7 +94,7 @@ void Conv2d::toCuda() {
|
||||
|
||||
CUDA_CHECK(cudaMemcpy(
|
||||
d_biases, biases.data(),
|
||||
sizeof(float) * outputSize * outputSize * numFilters,
|
||||
sizeof(float) * numFilters,
|
||||
cudaMemcpyHostToDevice
|
||||
));
|
||||
}
|
||||
@@ -109,15 +109,10 @@ float* Conv2d::forward(const float* d_input) {
|
||||
);
|
||||
|
||||
Kernels::convolution<<<grid, block>>>(
|
||||
d_input, d_weights, d_output, inputSize, inputChannels, paddingSize,
|
||||
d_input, d_weights, d_biases, d_output, inputSize, inputChannels, paddingSize,
|
||||
kernelSize, stride, numFilters, outputSize
|
||||
);
|
||||
|
||||
// Add bias
|
||||
Kernels::vec_vec_add<<<1, biases.size()>>>(
|
||||
d_biases, d_output, d_output, biases.size()
|
||||
);
|
||||
|
||||
// Apply activation
|
||||
activation.activate(d_output);
|
||||
|
||||
|
||||
Reference in New Issue
Block a user