Initial cuda conv kernel implementation

This commit is contained in:
2024-03-08 23:35:54 +01:00
parent 4b6fcbc191
commit e51aabc2f2
2 changed files with 69 additions and 0 deletions

View File

@@ -0,0 +1,53 @@
#include "convolution.cuh"
__global__ void convolution_kernel(
const float* d_input,
const float* d_kernel,
float* d_output,
int inputSize,
int nChannels,
int kernelSize,
int stride,
int nFilters,
int outputSize
) {
int tid = blockDim.x * blockIdx.x + threadIdx.x;
if (tid >= outputSize * outputSize * nFilters) {
return;
}
// Get output index
int f = tid / (outputSize * outputSize);
int i = (tid % (outputSize * outputSize)) / outputSize;
int j = (tid % (outputSize * outputSize)) % outputSize;
float sum = 0.0f;
// std::cout << "f: " << f << ", i: " << i << ", j: " << j << std::endl;
// Iterate over kernel and input matrix
for (int k = 0; k < kernelSize; k++) {
for (int l = 0; l < kernelSize; l++) {
for (int c = 0; c < nChannels; c++) {
int kernelIndex =
k * (kernelSize * nChannels * nFilters) +
l * (nChannels * nFilters) + c * (nFilters) + f;
int inputIndex =
(i * stride + k) * (inputSize * nChannels) +
(j * stride + l) * (nChannels) + c;
// std::cout << "kernelIndex: " << kernelIndex << ", kernel
// value: " << kernels[kernelIndex] << ", inputIndex: " <<
// inputIndex << ", input value: " << input[inputIndex] <<
// std::endl;
sum += d_kernel[kernelIndex] * d_input[inputIndex];
}
}
}
// std::cout << "sum: " << sum << std::endl;
d_output[i * (outputSize * nFilters) + j * (nFilters) + f] = sum;
}