mirror of
https://github.com/lordmathis/CUDANet.git
synced 2025-11-06 01:34:22 +00:00
Add bias to conv2d
This commit is contained in:
@@ -42,15 +42,18 @@ class Conv2d {
|
|||||||
|
|
||||||
// Kernels
|
// Kernels
|
||||||
std::vector<float> kernels;
|
std::vector<float> kernels;
|
||||||
|
std::vector<float> biases;
|
||||||
|
|
||||||
// Cuda
|
// Cuda
|
||||||
float* d_kernels;
|
float* d_kernels;
|
||||||
|
float* d_biases;
|
||||||
float* d_padded;
|
float* d_padded;
|
||||||
|
|
||||||
// Kernels
|
// Kernels
|
||||||
Activation activation;
|
Activation activation;
|
||||||
|
|
||||||
void initializeKernels();
|
void initializeKernels();
|
||||||
|
void initializeBiases();
|
||||||
void toCuda();
|
void toCuda();
|
||||||
};
|
};
|
||||||
|
|
||||||
|
|||||||
@@ -5,6 +5,7 @@
|
|||||||
#include "conv2d.cuh"
|
#include "conv2d.cuh"
|
||||||
#include "convolution.cuh"
|
#include "convolution.cuh"
|
||||||
#include "cuda_helper.cuh"
|
#include "cuda_helper.cuh"
|
||||||
|
#include "matrix_math.cuh"
|
||||||
#include "padding.cuh"
|
#include "padding.cuh"
|
||||||
|
|
||||||
Layers::Conv2d::Conv2d(
|
Layers::Conv2d::Conv2d(
|
||||||
@@ -36,22 +37,30 @@ Layers::Conv2d::Conv2d(
|
|||||||
initializeKernels();
|
initializeKernels();
|
||||||
|
|
||||||
d_kernels = nullptr;
|
d_kernels = nullptr;
|
||||||
|
|
||||||
CUDA_CHECK(
|
CUDA_CHECK(
|
||||||
cudaMalloc((void**)&d_kernels, sizeof(float) * kernelSize * kernelSize * inputChannels * numFilters)
|
cudaMalloc((void**)&d_kernels, sizeof(float) * kernelSize * kernelSize * inputChannels * numFilters)
|
||||||
);
|
);
|
||||||
toCuda();
|
|
||||||
|
biases.resize(outputSize * outputSize * numFilters);
|
||||||
|
initializeBiases();
|
||||||
|
|
||||||
|
d_biases = nullptr;
|
||||||
|
CUDA_CHECK(
|
||||||
|
cudaMalloc((void**)&d_biases, sizeof(float) * outputSize * outputSize * numFilters)
|
||||||
|
);
|
||||||
|
|
||||||
d_padded = nullptr;
|
d_padded = nullptr;
|
||||||
|
|
||||||
CUDA_CHECK(cudaMalloc(
|
CUDA_CHECK(cudaMalloc(
|
||||||
(void**)&d_padded, sizeof(float) * (inputSize + 2 * paddingSize) *
|
(void**)&d_padded, sizeof(float) * (inputSize + 2 * paddingSize) *
|
||||||
(inputSize + 2 * paddingSize) * inputChannels
|
(inputSize + 2 * paddingSize) * inputChannels
|
||||||
));
|
));
|
||||||
|
|
||||||
|
toCuda();
|
||||||
}
|
}
|
||||||
|
|
||||||
Layers::Conv2d::~Conv2d() {
|
Layers::Conv2d::~Conv2d() {
|
||||||
cudaFree(d_kernels);
|
cudaFree(d_kernels);
|
||||||
|
cudaFree(d_biases);
|
||||||
cudaFree(d_padded);
|
cudaFree(d_padded);
|
||||||
}
|
}
|
||||||
|
|
||||||
@@ -59,6 +68,10 @@ void Layers::Conv2d::initializeKernels() {
|
|||||||
std::fill(kernels.begin(), kernels.end(), 0.0f);
|
std::fill(kernels.begin(), kernels.end(), 0.0f);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
void Layers::Conv2d::initializeBiases() {
|
||||||
|
std::fill(biases.begin(), biases.end(), 0.0f);
|
||||||
|
}
|
||||||
|
|
||||||
void Layers::Conv2d::setKernels(const std::vector<float>& kernels_input) {
|
void Layers::Conv2d::setKernels(const std::vector<float>& kernels_input) {
|
||||||
std::copy(kernels_input.begin(), kernels_input.end(), kernels.begin());
|
std::copy(kernels_input.begin(), kernels_input.end(), kernels.begin());
|
||||||
toCuda();
|
toCuda();
|
||||||
@@ -69,6 +82,11 @@ void Layers::Conv2d::toCuda() {
|
|||||||
d_kernels, kernels.data(), sizeof(float) * kernelSize * kernelSize * numFilters,
|
d_kernels, kernels.data(), sizeof(float) * kernelSize * kernelSize * numFilters,
|
||||||
cudaMemcpyHostToDevice
|
cudaMemcpyHostToDevice
|
||||||
));
|
));
|
||||||
|
|
||||||
|
CUDA_CHECK(cudaMemcpy(
|
||||||
|
d_biases, biases.data(), sizeof(float) * outputSize * outputSize * numFilters,
|
||||||
|
cudaMemcpyHostToDevice
|
||||||
|
));
|
||||||
}
|
}
|
||||||
|
|
||||||
void Layers::Conv2d::forward(const float* d_input, float* d_output) {
|
void Layers::Conv2d::forward(const float* d_input, float* d_output) {
|
||||||
@@ -85,6 +103,9 @@ void Layers::Conv2d::forward(const float* d_input, float* d_output) {
|
|||||||
d_padded, d_kernels, d_output, inputSize + (2 * paddingSize), inputChannels, kernelSize, stride, numFilters, outputSize
|
d_padded, d_kernels, d_output, inputSize + (2 * paddingSize), inputChannels, kernelSize, stride, numFilters, outputSize
|
||||||
);
|
);
|
||||||
|
|
||||||
|
// Add bias
|
||||||
|
vec_vec_add_kernel<<<1, biases.size()>>>(d_biases, d_output, d_output, biases.size());
|
||||||
|
|
||||||
CUDA_CHECK(cudaDeviceSynchronize());
|
CUDA_CHECK(cudaDeviceSynchronize());
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|||||||
Reference in New Issue
Block a user