Add Kernels namespace

This commit is contained in:
2024-03-11 21:04:23 +01:00
parent e0178e2d5c
commit d2ab78fbc7
18 changed files with 188 additions and 186 deletions

View File

@@ -5,17 +5,16 @@
#include "conv2d.cuh"
#include "convolution.cuh"
#include "cuda_helper.cuh"
#include "matrix_math.cuh"
#include "padding.cuh"
#include "matmul.cuh"
Layers::Conv2d::Conv2d(
int inputSize,
int inputChannels,
int kernelSize,
int stride,
Padding padding,
int numFilters,
Activation activation
int inputSize,
int inputChannels,
int kernelSize,
int stride,
Layers::Padding padding,
int numFilters,
Layers::Activation activation
)
: inputSize(inputSize),
inputChannels(inputChannels),
@@ -23,21 +22,19 @@ Layers::Conv2d::Conv2d(
stride(stride),
numFilters(numFilters),
activation(activation) {
switch (padding) {
case SAME:
outputSize = inputSize;
paddingSize = ((stride - 1) * inputSize - stride + kernelSize) / 2;
break;
switch (padding)
{
case SAME:
outputSize = inputSize;
paddingSize = ((stride - 1) * inputSize - stride + kernelSize) / 2;
break;
case VALID:
paddingSize = 0;
outputSize = (inputSize - kernelSize) / stride + 1;
break;
case VALID:
paddingSize = 0;
outputSize = (inputSize - kernelSize) / stride + 1;
break;
default:
break;
default:
break;
}
weights.resize(kernelSize * kernelSize * inputChannels * numFilters);
@@ -109,19 +106,19 @@ void Layers::Conv2d::forward(const float* d_input, float* d_output) {
int THREADS_PER_BLOCK = (inputSize + 2 * paddingSize) *
(inputSize + 2 * paddingSize) * inputChannels;
pad_matrix_kernel<<<1, THREADS_PER_BLOCK>>>(
Kernels::padding<<<1, THREADS_PER_BLOCK>>>(
d_input, d_padded, inputSize, inputSize, inputChannels, paddingSize
);
// Convolve
THREADS_PER_BLOCK = outputSize * outputSize * numFilters;
convolution_kernel<<<1, THREADS_PER_BLOCK>>>(
Kernels::convolution<<<1, THREADS_PER_BLOCK>>>(
d_padded, d_weights, d_output, inputSize + (2 * paddingSize),
inputChannels, kernelSize, stride, numFilters, outputSize
);
// Add bias
vec_vec_add_kernel<<<1, biases.size()>>>(
Kernels::vec_vec_add<<<1, biases.size()>>>(
d_biases, d_output, d_output, biases.size()
);
@@ -138,8 +135,7 @@ outputSize x numFilters
*/
void Layers::Conv2d::host_conv(const float* input, float* output) {
// Iterate over output matrix
for (int tid = 0; tid < outputSize * outputSize * numFilters; tid++)
{
for (int tid = 0; tid < outputSize * outputSize * numFilters; tid++) {
// Get output index
int f = tid / (outputSize * outputSize);
int i = tid % (outputSize * outputSize) / outputSize;
@@ -153,19 +149,17 @@ void Layers::Conv2d::host_conv(const float* input, float* output) {
for (int c = 0; c < inputChannels; c++) {
int kernelIndex =
f * kernelSize * kernelSize * inputChannels +
c * kernelSize * kernelSize + k * kernelSize +
l;
c * kernelSize * kernelSize + k * kernelSize + l;
int inputIndex = c * inputSize * inputSize +
(i * stride + k) * inputSize +
(j * stride + l);
(i * stride + k) * inputSize +
(j * stride + l);
sum += weights[kernelIndex] * input[inputIndex];
}
}
}
int outputIndex =
f * outputSize * outputSize + i * outputSize + j;
int outputIndex = f * outputSize * outputSize + i * outputSize + j;
output[outputIndex] = sum;
}