Add documentation comments

This commit is contained in:
2024-03-12 21:50:06 +01:00
parent 708164e4d0
commit 7157a27e56
9 changed files with 255 additions and 19 deletions

View File

@@ -3,9 +3,23 @@
namespace Kernels {
/**
* @brief Sigmoid activation function kernel
*
* @param src Pointer to the source array
* @param dst Pointer to the destination array
* @param len Length of the arrays
*/
__global__ void
sigmoid(const float* __restrict__ src, float* __restrict__ dst, int len);
/**
* @brief Relu activation function kernel
*
* @param src Pointer to the source array
* @param dst Pointer to the destination array
* @param len Length of the arrays
*/
__global__ void
relu(const float* __restrict__ src, float* __restrict__ dst, int len);

View File

@@ -3,6 +3,16 @@
namespace Kernels {
/**
* @brief Kernel that pads the input matrix with zeros
*
* @param d_input Device pointer to the input matrix (as vector)
* @param d_padded Device pointer to the padded matrix (as vector)
* @param w Width of the input matrix
* @param h Height of the input matrix
* @param n Number of input channels
* @param p Padding size
*/
__global__ void padding(
const float* d_input,
float* d_padded,
@@ -12,6 +22,19 @@ __global__ void padding(
int p
);
/**
* @brief Convolution kernel
*
* @param d_input Device pointer to the input matrix
* @param d_kernel Device pointer to the convolution kernel
* @param d_output Device pointer to the output matrix
* @param inputSize Width and height of the input matrix
* @param nChannels Number of channels in the input matrix
* @param kernelSize Width and height of the convolution kernel
* @param stride Convolution stride
* @param nFilters Number of output filters
* @param outputSize Width and height of the output matrix
*/
__global__ void convolution(
const float* d_input,
const float* d_kernel,

View File

@@ -3,6 +3,15 @@
namespace Kernels {
/**
* @brief Matrix vector multiplication kernel
*
* @param d_matrix Device pointer to matrix
* @param d_vector Device pointer to vector
* @param d_output Device pointer to output vector
* @param w Width of the matrix
* @param h Height of the matrix
*/
__global__ void mat_vec_mul(
const float* d_matrix,
const float* d_vector,
@@ -11,6 +20,14 @@ __global__ void mat_vec_mul(
int h
);
/**
* @brief Vector vector addition kernel
*
* @param d_vector1 Device pointer to first vector
* @param d_vector2 Device pointer to second vector
* @param d_output Device pointer to output vector
* @param w Length of the vectors
*/
__global__ void vec_vec_add(
const float* d_vector1,
const float* d_vector2,

View File

@@ -10,8 +10,23 @@
namespace Layers {
/**
* @brief 2D convolutional layer
*
*/
class Conv2d : public ILayer {
public:
/**
* @brief Construct a new Conv 2d layer
*
* @param inputSize Width and height of the input matrix
* @param inputChannels Number of channels in the input matrix
* @param kernelSize Width and height of the convolution kernel
* @param stride Convolution stride
* @param padding Padding type ('SAME' or 'VALID')
* @param numFilters Number of output filters
* @param activation Activation function ('RELU', 'SIGMOID' or 'NONE')
*/
Conv2d(
int inputSize,
int inputChannels,
@@ -21,21 +36,57 @@ class Conv2d : public ILayer {
int numFilters,
Layers::Activation activation
);
/**
* @brief Destroy the Conv 2d object
*
*/
~Conv2d();
// Outputs
int outputSize;
/**
* @brief Forward pass of the convolutional layer
*
* @param d_input Device pointer to the input matrix
* @return Device pointer to the output matrix
*/
float* forward(const float* d_input);
void setWeights(const float* weights_input);
void setBiases(const float* biases_input);
void host_conv(const float* input, float* output);
/**
* @brief Set the weights of the convolutional layer
*
* @param weights_input Pointer to the weights
*/
void setWeights(const float* weights_input);
/**
* @brief Set the biases of the convolutional layer
*
* @param biases_input Pointer to the biases
*/
void setBiases(const float* biases_input);
/**
* @brief Get the output width (/ height) of the layer
*
* @return int
*/
int getOutputSize() { return outputSize; }
/**
* @brief Get the padding size of the layer
*
* @return int
*/
int getPaddingSize() { return paddingSize; }
private:
// Inputs
int inputSize;
int inputChannels;
// Outputs
int outputSize;
// Kernel
int kernelSize;
int stride;
@@ -55,8 +106,22 @@ class Conv2d : public ILayer {
// Kernels
Layers::Activation activation;
/**
* @brief Initialize weights of the convolutional layer with zeros
*
*/
void initializeWeights();
/**
* @brief Initialize biases of the convolutional layer with zeros
*
*/
void initializeBiases();
/**
* @brief Copy weights and biases to the device
*
*/
void toCuda();
};

View File

@@ -9,17 +9,47 @@
namespace Layers {
/**
* @brief Dense (fully connected) layer
*
*/
class Dense : public ILayer {
public:
Dense(
int inputSize,
int outputSize,
Layers::Activation activation
);
/**
* @brief Construct a new Dense layer
*
* @param inputSize Size of the input vector
* @param outputSize Size of the output vector
* @param activation Activation function ('RELU', 'SIGMOID' or 'NONE')
*/
Dense(int inputSize, int outputSize, Layers::Activation activation);
/**
* @brief Destroy the Dense layer
*
*/
~Dense();
/**
* @brief Forward pass of the dense layer
*
* @param d_input Device pointer to the input vector
* @return Device pointer to the output vector
*/
float* forward(const float* d_input);
/**
* @brief Set the weights of the layer
*
* @param weights Pointer to vector of weights
*/
void setWeights(const float* weights);
/**
* @brief Set the biases of the layer
*
* @param biases Pointer to vector of biases
*/
void setBiases(const float* biases);
private:
@@ -36,8 +66,22 @@ class Dense : public ILayer {
Layers::Activation activation;
/**
* @brief Initialize the weights to zeros
*
*/
void initializeWeights();
/**
* @brief Initialize the biases to zeros
*
*/
void initializeBiases();
/**
* @brief Copy the weights and biases to the device
*
*/
void toCuda();
};

View File

@@ -6,22 +6,72 @@
namespace Layers {
/**
* @brief Activation functions
*
* SIGMOID: Sigmoid
* RELU: Rectified Linear Unit
*
*/
enum Activation { SIGMOID, RELU, NONE };
/**
* @brief Padding types
*
* SAME: Zero padding such that the output size is the same as the input
* VALID: No padding
*
*/
enum Padding { SAME, VALID };
/**
* @brief Base class for all layers
*/
class ILayer {
public:
/**
* @brief Destroy the ILayer object
*
*/
virtual ~ILayer() {}
/**
* @brief Virtual function for forward pass
*
* @param input (Device) Pointer to the input
* @return float* Device pointer to the output
*/
virtual float* forward(const float* input) = 0;
/**
* @brief Virtual function for setting weights
*
* @param weights Pointer to the weights
*/
virtual void setWeights(const float* weights) = 0;
/**
* @brief Virtual function for setting biases
*
* @param biases Pointer to the biases
*/
virtual void setBiases(const float* biases) = 0;
private:
/**
* @brief Initialize the weights
*/
virtual void initializeWeights() = 0;
/**
* @brief Initialize the biases
*/
virtual void initializeBiases() = 0;
/**
* @brief Copy the weights and biases to the device
*/
virtual void toCuda() = 0;
int inputSize;

View File

@@ -5,11 +5,31 @@
namespace Layers {
/**
* @brief Input layer, just copies the input to the device
*
*/
class Input : public ILayer {
public:
/**
* @brief Create a new Input layer
*
* @param inputSize Size of the input vector
*/
Input(int inputSize);
/**
* @brief Destroy the Input layer
*
*/
~Input();
/**
* @brief Forward pass of the input layer. Just copies the input to the device
*
* @param input Host pointer to the input vector
* @return Device pointer to the output vector
*/
float* forward(const float* input);
void setWeights(const float* weights);

View File

@@ -4,7 +4,10 @@
#include <cuda_runtime.h>
#include <cstdio>
// CUDA error checking macro
/**
* @brief CUDA error checking macro
*
*/
#define CUDA_CHECK(call) \
do { \
cudaError_t result = call; \

View File

@@ -80,7 +80,7 @@ TEST_F(Conv2dTest, SimpleTest) {
);
int outputSize = (inputSize - kernelSize) / stride + 1;
EXPECT_EQ(outputSize, conv2d.outputSize);
EXPECT_EQ(outputSize, conv2d.getOutputSize());
d_output = conv2d.forward(d_input);
@@ -168,16 +168,16 @@ TEST_F(Conv2dTest, PaddedTest) {
activation, input, kernels.data(), d_input
);
EXPECT_EQ(inputSize, conv2d.outputSize);
EXPECT_EQ(inputSize, conv2d.getOutputSize());
d_output = conv2d.forward(d_input);
std::vector<float> output(
conv2d.outputSize * conv2d.outputSize * numFilters
conv2d.getOutputSize() * conv2d.getOutputSize() * numFilters
);
cudaMemcpy(
output.data(), d_output,
sizeof(float) * conv2d.outputSize * conv2d.outputSize * numFilters,
sizeof(float) * conv2d.getOutputSize() * conv2d.getOutputSize() * numFilters,
cudaMemcpyDeviceToHost
);
@@ -253,16 +253,16 @@ TEST_F(Conv2dTest, StridedPaddedConvolution) {
activation, input, kernels.data(), d_input
);
EXPECT_EQ(inputSize, conv2d.outputSize);
EXPECT_EQ(inputSize, conv2d.getOutputSize());
d_output = conv2d.forward(d_input);
std::vector<float> output(
conv2d.outputSize * conv2d.outputSize * numFilters
conv2d.getOutputSize() * conv2d.getOutputSize() * numFilters
);
cudaMemcpy(
output.data(), d_output,
sizeof(float) * conv2d.outputSize * conv2d.outputSize * numFilters,
sizeof(float) * conv2d.getOutputSize() * conv2d.getOutputSize() * numFilters,
cudaMemcpyDeviceToHost
);