Add toplevel CUDANet namespace

This commit is contained in:
2024-03-17 16:08:53 +01:00
parent dc86cddeb7
commit 0c22fac64e
19 changed files with 183 additions and 149 deletions

View File

@@ -1,6 +1,7 @@
#include "convolution.cuh"
#include <iostream>
#include "convolution.cuh"
/*
Pads matrix width x height x n_channels to width + 2 * padding x height + 2 *
padding x n_channels Matrix is represented as a pointer to a vector
@@ -47,13 +48,13 @@ pre-allocated)
n: Number of channels in input matrix
p: Padding
*/
__global__ void Kernels::padding(
const float* d_input,
float* d_padded,
int w,
int h,
int n,
int p
__global__ void CUDANet::Kernels::padding(
const float* __restrict__ d_input,
float* __restrict__ d_padded,
const unsigned int w,
const unsigned int h,
const unsigned int n,
const unsigned int p
) {
int tid = blockDim.x * blockIdx.x + threadIdx.x;
@@ -78,16 +79,16 @@ __global__ void Kernels::padding(
}
}
__global__ void Kernels::convolution(
const float* d_input,
const float* d_kernel,
float* d_output,
int inputSize,
int nChannels,
int kernelSize,
int stride,
int nFilters,
int outputSize
__global__ void CUDANet::Kernels::convolution(
const float* __restrict__ d_input,
const float* __restrict__ d_kernel,
float* __restrict__ d_output,
const unsigned int inputSize,
const unsigned int nChannels,
const unsigned int kernelSize,
const unsigned int stride,
const unsigned int nFilters,
const unsigned int outputSize
) {
int tid = blockDim.x * blockIdx.x + threadIdx.x;