diff --git a/include/kernels/activations.cuh b/include/kernels/activations.cuh index 34ff65d..a23402d 100644 --- a/include/kernels/activations.cuh +++ b/include/kernels/activations.cuh @@ -3,9 +3,23 @@ namespace Kernels { +/** + * @brief Sigmoid activation function kernel + * + * @param src Pointer to the source array + * @param dst Pointer to the destination array + * @param len Length of the arrays + */ __global__ void sigmoid(const float* __restrict__ src, float* __restrict__ dst, int len); +/** + * @brief Relu activation function kernel + * + * @param src Pointer to the source array + * @param dst Pointer to the destination array + * @param len Length of the arrays + */ __global__ void relu(const float* __restrict__ src, float* __restrict__ dst, int len); diff --git a/include/kernels/convolution.cuh b/include/kernels/convolution.cuh index 0332320..ea3fc32 100644 --- a/include/kernels/convolution.cuh +++ b/include/kernels/convolution.cuh @@ -3,6 +3,16 @@ namespace Kernels { +/** + * @brief Kernel that pads the input matrix with zeros + * + * @param d_input Device pointer to the input matrix (as vector) + * @param d_padded Device pointer to the padded matrix (as vector) + * @param w Width of the input matrix + * @param h Height of the input matrix + * @param n Number of input channels + * @param p Padding size + */ __global__ void padding( const float* d_input, float* d_padded, @@ -12,6 +22,19 @@ __global__ void padding( int p ); +/** + * @brief Convolution kernel + * + * @param d_input Device pointer to the input matrix + * @param d_kernel Device pointer to the convolution kernel + * @param d_output Device pointer to the output matrix + * @param inputSize Width and height of the input matrix + * @param nChannels Number of channels in the input matrix + * @param kernelSize Width and height of the convolution kernel + * @param stride Convolution stride + * @param nFilters Number of output filters + * @param outputSize Width and height of the output matrix + */ __global__ void convolution( const float* d_input, const float* d_kernel, diff --git a/include/kernels/matmul.cuh b/include/kernels/matmul.cuh index e8dd782..897f8df 100644 --- a/include/kernels/matmul.cuh +++ b/include/kernels/matmul.cuh @@ -3,6 +3,15 @@ namespace Kernels { +/** + * @brief Matrix vector multiplication kernel + * + * @param d_matrix Device pointer to matrix + * @param d_vector Device pointer to vector + * @param d_output Device pointer to output vector + * @param w Width of the matrix + * @param h Height of the matrix + */ __global__ void mat_vec_mul( const float* d_matrix, const float* d_vector, @@ -11,6 +20,14 @@ __global__ void mat_vec_mul( int h ); +/** + * @brief Vector vector addition kernel + * + * @param d_vector1 Device pointer to first vector + * @param d_vector2 Device pointer to second vector + * @param d_output Device pointer to output vector + * @param w Length of the vectors + */ __global__ void vec_vec_add( const float* d_vector1, const float* d_vector2, diff --git a/include/layers/conv2d.cuh b/include/layers/conv2d.cuh index 8da8377..8e0335c 100644 --- a/include/layers/conv2d.cuh +++ b/include/layers/conv2d.cuh @@ -10,8 +10,23 @@ namespace Layers { +/** + * @brief 2D convolutional layer + * + */ class Conv2d : public ILayer { public: + /** + * @brief Construct a new Conv 2d layer + * + * @param inputSize Width and height of the input matrix + * @param inputChannels Number of channels in the input matrix + * @param kernelSize Width and height of the convolution kernel + * @param stride Convolution stride + * @param padding Padding type ('SAME' or 'VALID') + * @param numFilters Number of output filters + * @param activation Activation function ('RELU', 'SIGMOID' or 'NONE') + */ Conv2d( int inputSize, int inputChannels, @@ -21,21 +36,57 @@ class Conv2d : public ILayer { int numFilters, Layers::Activation activation ); + + /** + * @brief Destroy the Conv 2d object + * + */ ~Conv2d(); - // Outputs - int outputSize; - + /** + * @brief Forward pass of the convolutional layer + * + * @param d_input Device pointer to the input matrix + * @return Device pointer to the output matrix + */ float* forward(const float* d_input); - void setWeights(const float* weights_input); - void setBiases(const float* biases_input); - void host_conv(const float* input, float* output); + + /** + * @brief Set the weights of the convolutional layer + * + * @param weights_input Pointer to the weights + */ + void setWeights(const float* weights_input); + + /** + * @brief Set the biases of the convolutional layer + * + * @param biases_input Pointer to the biases + */ + void setBiases(const float* biases_input); + + /** + * @brief Get the output width (/ height) of the layer + * + * @return int + */ + int getOutputSize() { return outputSize; } + + /** + * @brief Get the padding size of the layer + * + * @return int + */ + int getPaddingSize() { return paddingSize; } private: // Inputs int inputSize; int inputChannels; + // Outputs + int outputSize; + // Kernel int kernelSize; int stride; @@ -55,8 +106,22 @@ class Conv2d : public ILayer { // Kernels Layers::Activation activation; + /** + * @brief Initialize weights of the convolutional layer with zeros + * + */ void initializeWeights(); + + /** + * @brief Initialize biases of the convolutional layer with zeros + * + */ void initializeBiases(); + + /** + * @brief Copy weights and biases to the device + * + */ void toCuda(); }; diff --git a/include/layers/dense.cuh b/include/layers/dense.cuh index 6d17e2c..7191dd7 100644 --- a/include/layers/dense.cuh +++ b/include/layers/dense.cuh @@ -9,17 +9,47 @@ namespace Layers { +/** + * @brief Dense (fully connected) layer + * + */ class Dense : public ILayer { public: - Dense( - int inputSize, - int outputSize, - Layers::Activation activation - ); + /** + * @brief Construct a new Dense layer + * + * @param inputSize Size of the input vector + * @param outputSize Size of the output vector + * @param activation Activation function ('RELU', 'SIGMOID' or 'NONE') + */ + Dense(int inputSize, int outputSize, Layers::Activation activation); + + /** + * @brief Destroy the Dense layer + * + */ ~Dense(); + /** + * @brief Forward pass of the dense layer + * + * @param d_input Device pointer to the input vector + * @return Device pointer to the output vector + */ float* forward(const float* d_input); + + /** + * @brief Set the weights of the layer + * + * @param weights Pointer to vector of weights + */ void setWeights(const float* weights); + + /** + * @brief Set the biases of the layer + * + * @param biases Pointer to vector of biases + */ void setBiases(const float* biases); private: @@ -36,8 +66,22 @@ class Dense : public ILayer { Layers::Activation activation; + /** + * @brief Initialize the weights to zeros + * + */ void initializeWeights(); + + /** + * @brief Initialize the biases to zeros + * + */ void initializeBiases(); + + /** + * @brief Copy the weights and biases to the device + * + */ void toCuda(); }; diff --git a/include/layers/ilayer.cuh b/include/layers/ilayer.cuh index a63fbe1..8aa42db 100644 --- a/include/layers/ilayer.cuh +++ b/include/layers/ilayer.cuh @@ -6,22 +6,72 @@ namespace Layers { +/** + * @brief Activation functions + * + * SIGMOID: Sigmoid + * RELU: Rectified Linear Unit + * + */ enum Activation { SIGMOID, RELU, NONE }; +/** + * @brief Padding types + * + * SAME: Zero padding such that the output size is the same as the input + * VALID: No padding + * + */ enum Padding { SAME, VALID }; +/** + * @brief Base class for all layers + */ class ILayer { public: + /** + * @brief Destroy the ILayer object + * + */ virtual ~ILayer() {} + /** + * @brief Virtual function for forward pass + * + * @param input (Device) Pointer to the input + * @return float* Device pointer to the output + */ virtual float* forward(const float* input) = 0; + + /** + * @brief Virtual function for setting weights + * + * @param weights Pointer to the weights + */ virtual void setWeights(const float* weights) = 0; + + /** + * @brief Virtual function for setting biases + * + * @param biases Pointer to the biases + */ virtual void setBiases(const float* biases) = 0; private: + + /** + * @brief Initialize the weights + */ virtual void initializeWeights() = 0; + + /** + * @brief Initialize the biases + */ virtual void initializeBiases() = 0; + /** + * @brief Copy the weights and biases to the device + */ virtual void toCuda() = 0; int inputSize; diff --git a/include/layers/input.cuh b/include/layers/input.cuh index 5c66124..d4a8283 100644 --- a/include/layers/input.cuh +++ b/include/layers/input.cuh @@ -5,11 +5,31 @@ namespace Layers { +/** + * @brief Input layer, just copies the input to the device + * + */ class Input : public ILayer { public: + /** + * @brief Create a new Input layer + * + * @param inputSize Size of the input vector + */ Input(int inputSize); + + /** + * @brief Destroy the Input layer + * + */ ~Input(); + /** + * @brief Forward pass of the input layer. Just copies the input to the device + * + * @param input Host pointer to the input vector + * @return Device pointer to the output vector + */ float* forward(const float* input); void setWeights(const float* weights); diff --git a/include/utils/cuda_helper.cuh b/include/utils/cuda_helper.cuh index 0a80a05..799044d 100644 --- a/include/utils/cuda_helper.cuh +++ b/include/utils/cuda_helper.cuh @@ -4,7 +4,10 @@ #include #include -// CUDA error checking macro +/** + * @brief CUDA error checking macro + * + */ #define CUDA_CHECK(call) \ do { \ cudaError_t result = call; \ diff --git a/test/layers/test_conv2d.cu b/test/layers/test_conv2d.cu index ac73652..993a91b 100644 --- a/test/layers/test_conv2d.cu +++ b/test/layers/test_conv2d.cu @@ -80,7 +80,7 @@ TEST_F(Conv2dTest, SimpleTest) { ); int outputSize = (inputSize - kernelSize) / stride + 1; - EXPECT_EQ(outputSize, conv2d.outputSize); + EXPECT_EQ(outputSize, conv2d.getOutputSize()); d_output = conv2d.forward(d_input); @@ -168,16 +168,16 @@ TEST_F(Conv2dTest, PaddedTest) { activation, input, kernels.data(), d_input ); - EXPECT_EQ(inputSize, conv2d.outputSize); + EXPECT_EQ(inputSize, conv2d.getOutputSize()); d_output = conv2d.forward(d_input); std::vector output( - conv2d.outputSize * conv2d.outputSize * numFilters + conv2d.getOutputSize() * conv2d.getOutputSize() * numFilters ); cudaMemcpy( output.data(), d_output, - sizeof(float) * conv2d.outputSize * conv2d.outputSize * numFilters, + sizeof(float) * conv2d.getOutputSize() * conv2d.getOutputSize() * numFilters, cudaMemcpyDeviceToHost ); @@ -253,16 +253,16 @@ TEST_F(Conv2dTest, StridedPaddedConvolution) { activation, input, kernels.data(), d_input ); - EXPECT_EQ(inputSize, conv2d.outputSize); + EXPECT_EQ(inputSize, conv2d.getOutputSize()); d_output = conv2d.forward(d_input); std::vector output( - conv2d.outputSize * conv2d.outputSize * numFilters + conv2d.getOutputSize() * conv2d.getOutputSize() * numFilters ); cudaMemcpy( output.data(), d_output, - sizeof(float) * conv2d.outputSize * conv2d.outputSize * numFilters, + sizeof(float) * conv2d.getOutputSize() * conv2d.getOutputSize() * numFilters, cudaMemcpyDeviceToHost );