Add documentation comments

This commit is contained in:
2024-03-12 21:50:06 +01:00
parent 708164e4d0
commit 7157a27e56
9 changed files with 255 additions and 19 deletions

View File

@@ -3,9 +3,23 @@
namespace Kernels { namespace Kernels {
/**
* @brief Sigmoid activation function kernel
*
* @param src Pointer to the source array
* @param dst Pointer to the destination array
* @param len Length of the arrays
*/
__global__ void __global__ void
sigmoid(const float* __restrict__ src, float* __restrict__ dst, int len); sigmoid(const float* __restrict__ src, float* __restrict__ dst, int len);
/**
* @brief Relu activation function kernel
*
* @param src Pointer to the source array
* @param dst Pointer to the destination array
* @param len Length of the arrays
*/
__global__ void __global__ void
relu(const float* __restrict__ src, float* __restrict__ dst, int len); relu(const float* __restrict__ src, float* __restrict__ dst, int len);

View File

@@ -3,6 +3,16 @@
namespace Kernels { namespace Kernels {
/**
* @brief Kernel that pads the input matrix with zeros
*
* @param d_input Device pointer to the input matrix (as vector)
* @param d_padded Device pointer to the padded matrix (as vector)
* @param w Width of the input matrix
* @param h Height of the input matrix
* @param n Number of input channels
* @param p Padding size
*/
__global__ void padding( __global__ void padding(
const float* d_input, const float* d_input,
float* d_padded, float* d_padded,
@@ -12,6 +22,19 @@ __global__ void padding(
int p int p
); );
/**
* @brief Convolution kernel
*
* @param d_input Device pointer to the input matrix
* @param d_kernel Device pointer to the convolution kernel
* @param d_output Device pointer to the output matrix
* @param inputSize Width and height of the input matrix
* @param nChannels Number of channels in the input matrix
* @param kernelSize Width and height of the convolution kernel
* @param stride Convolution stride
* @param nFilters Number of output filters
* @param outputSize Width and height of the output matrix
*/
__global__ void convolution( __global__ void convolution(
const float* d_input, const float* d_input,
const float* d_kernel, const float* d_kernel,

View File

@@ -3,6 +3,15 @@
namespace Kernels { namespace Kernels {
/**
* @brief Matrix vector multiplication kernel
*
* @param d_matrix Device pointer to matrix
* @param d_vector Device pointer to vector
* @param d_output Device pointer to output vector
* @param w Width of the matrix
* @param h Height of the matrix
*/
__global__ void mat_vec_mul( __global__ void mat_vec_mul(
const float* d_matrix, const float* d_matrix,
const float* d_vector, const float* d_vector,
@@ -11,6 +20,14 @@ __global__ void mat_vec_mul(
int h int h
); );
/**
* @brief Vector vector addition kernel
*
* @param d_vector1 Device pointer to first vector
* @param d_vector2 Device pointer to second vector
* @param d_output Device pointer to output vector
* @param w Length of the vectors
*/
__global__ void vec_vec_add( __global__ void vec_vec_add(
const float* d_vector1, const float* d_vector1,
const float* d_vector2, const float* d_vector2,

View File

@@ -10,8 +10,23 @@
namespace Layers { namespace Layers {
/**
* @brief 2D convolutional layer
*
*/
class Conv2d : public ILayer { class Conv2d : public ILayer {
public: public:
/**
* @brief Construct a new Conv 2d layer
*
* @param inputSize Width and height of the input matrix
* @param inputChannels Number of channels in the input matrix
* @param kernelSize Width and height of the convolution kernel
* @param stride Convolution stride
* @param padding Padding type ('SAME' or 'VALID')
* @param numFilters Number of output filters
* @param activation Activation function ('RELU', 'SIGMOID' or 'NONE')
*/
Conv2d( Conv2d(
int inputSize, int inputSize,
int inputChannels, int inputChannels,
@@ -21,21 +36,57 @@ class Conv2d : public ILayer {
int numFilters, int numFilters,
Layers::Activation activation Layers::Activation activation
); );
/**
* @brief Destroy the Conv 2d object
*
*/
~Conv2d(); ~Conv2d();
// Outputs /**
int outputSize; * @brief Forward pass of the convolutional layer
*
* @param d_input Device pointer to the input matrix
* @return Device pointer to the output matrix
*/
float* forward(const float* d_input); float* forward(const float* d_input);
void setWeights(const float* weights_input);
void setBiases(const float* biases_input); /**
void host_conv(const float* input, float* output); * @brief Set the weights of the convolutional layer
*
* @param weights_input Pointer to the weights
*/
void setWeights(const float* weights_input);
/**
* @brief Set the biases of the convolutional layer
*
* @param biases_input Pointer to the biases
*/
void setBiases(const float* biases_input);
/**
* @brief Get the output width (/ height) of the layer
*
* @return int
*/
int getOutputSize() { return outputSize; }
/**
* @brief Get the padding size of the layer
*
* @return int
*/
int getPaddingSize() { return paddingSize; }
private: private:
// Inputs // Inputs
int inputSize; int inputSize;
int inputChannels; int inputChannels;
// Outputs
int outputSize;
// Kernel // Kernel
int kernelSize; int kernelSize;
int stride; int stride;
@@ -55,8 +106,22 @@ class Conv2d : public ILayer {
// Kernels // Kernels
Layers::Activation activation; Layers::Activation activation;
/**
* @brief Initialize weights of the convolutional layer with zeros
*
*/
void initializeWeights(); void initializeWeights();
/**
* @brief Initialize biases of the convolutional layer with zeros
*
*/
void initializeBiases(); void initializeBiases();
/**
* @brief Copy weights and biases to the device
*
*/
void toCuda(); void toCuda();
}; };

View File

@@ -9,17 +9,47 @@
namespace Layers { namespace Layers {
/**
* @brief Dense (fully connected) layer
*
*/
class Dense : public ILayer { class Dense : public ILayer {
public: public:
Dense( /**
int inputSize, * @brief Construct a new Dense layer
int outputSize, *
Layers::Activation activation * @param inputSize Size of the input vector
); * @param outputSize Size of the output vector
* @param activation Activation function ('RELU', 'SIGMOID' or 'NONE')
*/
Dense(int inputSize, int outputSize, Layers::Activation activation);
/**
* @brief Destroy the Dense layer
*
*/
~Dense(); ~Dense();
/**
* @brief Forward pass of the dense layer
*
* @param d_input Device pointer to the input vector
* @return Device pointer to the output vector
*/
float* forward(const float* d_input); float* forward(const float* d_input);
/**
* @brief Set the weights of the layer
*
* @param weights Pointer to vector of weights
*/
void setWeights(const float* weights); void setWeights(const float* weights);
/**
* @brief Set the biases of the layer
*
* @param biases Pointer to vector of biases
*/
void setBiases(const float* biases); void setBiases(const float* biases);
private: private:
@@ -36,8 +66,22 @@ class Dense : public ILayer {
Layers::Activation activation; Layers::Activation activation;
/**
* @brief Initialize the weights to zeros
*
*/
void initializeWeights(); void initializeWeights();
/**
* @brief Initialize the biases to zeros
*
*/
void initializeBiases(); void initializeBiases();
/**
* @brief Copy the weights and biases to the device
*
*/
void toCuda(); void toCuda();
}; };

View File

@@ -6,22 +6,72 @@
namespace Layers { namespace Layers {
/**
* @brief Activation functions
*
* SIGMOID: Sigmoid
* RELU: Rectified Linear Unit
*
*/
enum Activation { SIGMOID, RELU, NONE }; enum Activation { SIGMOID, RELU, NONE };
/**
* @brief Padding types
*
* SAME: Zero padding such that the output size is the same as the input
* VALID: No padding
*
*/
enum Padding { SAME, VALID }; enum Padding { SAME, VALID };
/**
* @brief Base class for all layers
*/
class ILayer { class ILayer {
public: public:
/**
* @brief Destroy the ILayer object
*
*/
virtual ~ILayer() {} virtual ~ILayer() {}
/**
* @brief Virtual function for forward pass
*
* @param input (Device) Pointer to the input
* @return float* Device pointer to the output
*/
virtual float* forward(const float* input) = 0; virtual float* forward(const float* input) = 0;
/**
* @brief Virtual function for setting weights
*
* @param weights Pointer to the weights
*/
virtual void setWeights(const float* weights) = 0; virtual void setWeights(const float* weights) = 0;
/**
* @brief Virtual function for setting biases
*
* @param biases Pointer to the biases
*/
virtual void setBiases(const float* biases) = 0; virtual void setBiases(const float* biases) = 0;
private: private:
/**
* @brief Initialize the weights
*/
virtual void initializeWeights() = 0; virtual void initializeWeights() = 0;
/**
* @brief Initialize the biases
*/
virtual void initializeBiases() = 0; virtual void initializeBiases() = 0;
/**
* @brief Copy the weights and biases to the device
*/
virtual void toCuda() = 0; virtual void toCuda() = 0;
int inputSize; int inputSize;

View File

@@ -5,11 +5,31 @@
namespace Layers { namespace Layers {
/**
* @brief Input layer, just copies the input to the device
*
*/
class Input : public ILayer { class Input : public ILayer {
public: public:
/**
* @brief Create a new Input layer
*
* @param inputSize Size of the input vector
*/
Input(int inputSize); Input(int inputSize);
/**
* @brief Destroy the Input layer
*
*/
~Input(); ~Input();
/**
* @brief Forward pass of the input layer. Just copies the input to the device
*
* @param input Host pointer to the input vector
* @return Device pointer to the output vector
*/
float* forward(const float* input); float* forward(const float* input);
void setWeights(const float* weights); void setWeights(const float* weights);

View File

@@ -4,7 +4,10 @@
#include <cuda_runtime.h> #include <cuda_runtime.h>
#include <cstdio> #include <cstdio>
// CUDA error checking macro /**
* @brief CUDA error checking macro
*
*/
#define CUDA_CHECK(call) \ #define CUDA_CHECK(call) \
do { \ do { \
cudaError_t result = call; \ cudaError_t result = call; \

View File

@@ -80,7 +80,7 @@ TEST_F(Conv2dTest, SimpleTest) {
); );
int outputSize = (inputSize - kernelSize) / stride + 1; int outputSize = (inputSize - kernelSize) / stride + 1;
EXPECT_EQ(outputSize, conv2d.outputSize); EXPECT_EQ(outputSize, conv2d.getOutputSize());
d_output = conv2d.forward(d_input); d_output = conv2d.forward(d_input);
@@ -168,16 +168,16 @@ TEST_F(Conv2dTest, PaddedTest) {
activation, input, kernels.data(), d_input activation, input, kernels.data(), d_input
); );
EXPECT_EQ(inputSize, conv2d.outputSize); EXPECT_EQ(inputSize, conv2d.getOutputSize());
d_output = conv2d.forward(d_input); d_output = conv2d.forward(d_input);
std::vector<float> output( std::vector<float> output(
conv2d.outputSize * conv2d.outputSize * numFilters conv2d.getOutputSize() * conv2d.getOutputSize() * numFilters
); );
cudaMemcpy( cudaMemcpy(
output.data(), d_output, output.data(), d_output,
sizeof(float) * conv2d.outputSize * conv2d.outputSize * numFilters, sizeof(float) * conv2d.getOutputSize() * conv2d.getOutputSize() * numFilters,
cudaMemcpyDeviceToHost cudaMemcpyDeviceToHost
); );
@@ -253,16 +253,16 @@ TEST_F(Conv2dTest, StridedPaddedConvolution) {
activation, input, kernels.data(), d_input activation, input, kernels.data(), d_input
); );
EXPECT_EQ(inputSize, conv2d.outputSize); EXPECT_EQ(inputSize, conv2d.getOutputSize());
d_output = conv2d.forward(d_input); d_output = conv2d.forward(d_input);
std::vector<float> output( std::vector<float> output(
conv2d.outputSize * conv2d.outputSize * numFilters conv2d.getOutputSize() * conv2d.getOutputSize() * numFilters
); );
cudaMemcpy( cudaMemcpy(
output.data(), d_output, output.data(), d_output,
sizeof(float) * conv2d.outputSize * conv2d.outputSize * numFilters, sizeof(float) * conv2d.getOutputSize() * conv2d.getOutputSize() * numFilters,
cudaMemcpyDeviceToHost cudaMemcpyDeviceToHost
); );