Add toplevel CUDANet namespace

This commit is contained in:
2024-03-17 16:08:53 +01:00
parent dc86cddeb7
commit 0c22fac64e
19 changed files with 183 additions and 149 deletions

View File

@@ -1,7 +1,7 @@
#ifndef CUDANET_ACTIVATIONS_H
#define CUDANET_ACTIVATIONS_H
namespace Kernels {
namespace CUDANet::Kernels {
/**
* @brief Sigmoid activation function kernel
@@ -23,6 +23,6 @@ sigmoid(const float* __restrict__ src, float* __restrict__ dst, int len);
__global__ void
relu(const float* __restrict__ src, float* __restrict__ dst, int len);
} // namespace Kernels
} // namespace CUDANet::Kernels
#endif // CUDANET_ACTIVATIONS_H

View File

@@ -1,11 +1,11 @@
#ifndef CUDANET_CONVOLUTION_H
#define CUDANET_CONVOLUTION_H
namespace Kernels {
namespace CUDANet::Kernels {
/**
* @brief Kernel that pads the input matrix with zeros
*
*
* @param d_input Device pointer to the input matrix (as vector)
* @param d_padded Device pointer to the padded matrix (as vector)
* @param w Width of the input matrix
@@ -14,17 +14,17 @@ namespace Kernels {
* @param p Padding size
*/
__global__ void padding(
const float* d_input,
float* d_padded,
int w,
int h,
int n,
int p
const float* __restrict__ d_input,
float* __restrict__ d_padded,
const unsigned int w,
const unsigned int h,
const unsigned int n,
const unsigned int p
);
/**
* @brief Convolution kernel
*
*
* @param d_input Device pointer to the input matrix
* @param d_kernel Device pointer to the convolution kernel
* @param d_output Device pointer to the output matrix
@@ -36,17 +36,17 @@ __global__ void padding(
* @param outputSize Width and height of the output matrix
*/
__global__ void convolution(
const float* d_input,
const float* d_kernel,
float* d_output,
int inputSize,
int nChannels,
int kernelSize,
int stride,
int nFilters,
int outputSize
const float* __restrict__ d_input,
const float* __restrict__ d_kernel,
float* __restrict__ d_output,
const unsigned int inputSize,
const unsigned int nChannels,
const unsigned int kernelSize,
const unsigned int stride,
const unsigned int nFilters,
const unsigned int outputSize
);
} // namespace Kernels
} // namespace CUDANet::Kernels
#endif // CUDANET_CONVOLUTION_H

View File

@@ -1,11 +1,11 @@
#ifndef CUDANET_MATMUL_H
#define CUDANET_MATMUL_H
namespace Kernels {
namespace CUDANet::Kernels {
/**
* @brief Matrix vector multiplication kernel
*
*
* @param d_matrix Device pointer to matrix
* @param d_vector Device pointer to vector
* @param d_output Device pointer to output vector
@@ -13,28 +13,41 @@ namespace Kernels {
* @param h Height of the matrix
*/
__global__ void mat_vec_mul(
const float* d_matrix,
const float* d_vector,
float* d_output,
int w,
int h
const float* __restrict__ d_matrix,
const float* __restrict__ d_vector,
float* __restrict__ d_output,
const unsigned int w,
const unsigned int h
);
/**
* @brief Vector vector addition kernel
*
*
* @param d_vector1 Device pointer to first vector
* @param d_vector2 Device pointer to second vector
* @param d_output Device pointer to output vector
* @param w Length of the vectors
*/
__global__ void vec_vec_add(
const float* d_vector1,
const float* d_vector2,
float* d_output,
int w
const float* __restrict__ d_vector1,
const float* __restrict__ d_vector2,
float* __restrict__ d_output,
const unsigned int w
);
} // namespace Kernels
/**
* @brief
*
* @param d_vector Device pointer to vector
* @param d_output Device pointer to output vector
* @param w Length of the vector
*/
__global__ void reduce_sum(
const float* __restrict__ d_vector,
float* __restrict__ d_output,
const unsigned int w
);
} // namespace CUDANet::Kernels
#endif // CUDANET_MATMUL_H

View File

@@ -8,7 +8,7 @@
#include "convolution.cuh"
#include "ilayer.cuh"
namespace Layers {
namespace CUDANet::Layers {
/**
* @brief 2D convolutional layer
@@ -125,6 +125,6 @@ class Conv2d : public ILayer {
void toCuda();
};
} // namespace Layers
} // namespace CUDANet::Layers
#endif // CUDANET_CONV_LAYER_H

View File

@@ -7,7 +7,7 @@
#include "ilayer.cuh"
namespace Layers {
namespace CUDANet::Layers {
/**
* @brief Dense (fully connected) layer
@@ -53,8 +53,8 @@ class Dense : public ILayer {
void setBiases(const float* biases);
private:
int inputSize;
int outputSize;
unsigned int inputSize;
unsigned int outputSize;
float* d_output;
@@ -67,8 +67,8 @@ class Dense : public ILayer {
Layers::Activation activation;
// Precompute kernel launch parameters
int forwardGridSize;
int biasGridSize;
unsigned int forwardGridSize;
unsigned int biasGridSize;
/**
* @brief Initialize the weights to zeros
@@ -89,6 +89,6 @@ class Dense : public ILayer {
void toCuda();
};
} // namespace Layers
} // namespace CUDANet::Layers
#endif // CUDANET_DENSE_LAYER_H

View File

@@ -4,7 +4,7 @@
#include <vector>
namespace Layers {
namespace CUDANet::Layers {
/**
* @brief Activation functions
@@ -88,6 +88,6 @@ class ILayer {
Layers::Activation activation;
};
} // namespace Layers
} // namespace CUDANet::Layers
#endif // CUDANET_I_LAYERH

View File

@@ -3,7 +3,7 @@
#include <ilayer.cuh>
namespace Layers {
namespace CUDANet::Layers {
/**
* @brief Input layer, just copies the input to the device
@@ -45,6 +45,6 @@ class Input : public ILayer {
float* d_output;
};
} // namespace Layers
} // namespace CUDANet::Layers
#endif // CUDANET_INPUT_LAYER_H