mirror of
https://github.com/lordmathis/CUDANet.git
synced 2025-11-05 17:34:21 +00:00
Add documentation comments
This commit is contained in:
@@ -3,9 +3,23 @@
|
||||
|
||||
namespace Kernels {
|
||||
|
||||
/**
|
||||
* @brief Sigmoid activation function kernel
|
||||
*
|
||||
* @param src Pointer to the source array
|
||||
* @param dst Pointer to the destination array
|
||||
* @param len Length of the arrays
|
||||
*/
|
||||
__global__ void
|
||||
sigmoid(const float* __restrict__ src, float* __restrict__ dst, int len);
|
||||
|
||||
/**
|
||||
* @brief Relu activation function kernel
|
||||
*
|
||||
* @param src Pointer to the source array
|
||||
* @param dst Pointer to the destination array
|
||||
* @param len Length of the arrays
|
||||
*/
|
||||
__global__ void
|
||||
relu(const float* __restrict__ src, float* __restrict__ dst, int len);
|
||||
|
||||
|
||||
@@ -3,6 +3,16 @@
|
||||
|
||||
namespace Kernels {
|
||||
|
||||
/**
|
||||
* @brief Kernel that pads the input matrix with zeros
|
||||
*
|
||||
* @param d_input Device pointer to the input matrix (as vector)
|
||||
* @param d_padded Device pointer to the padded matrix (as vector)
|
||||
* @param w Width of the input matrix
|
||||
* @param h Height of the input matrix
|
||||
* @param n Number of input channels
|
||||
* @param p Padding size
|
||||
*/
|
||||
__global__ void padding(
|
||||
const float* d_input,
|
||||
float* d_padded,
|
||||
@@ -12,6 +22,19 @@ __global__ void padding(
|
||||
int p
|
||||
);
|
||||
|
||||
/**
|
||||
* @brief Convolution kernel
|
||||
*
|
||||
* @param d_input Device pointer to the input matrix
|
||||
* @param d_kernel Device pointer to the convolution kernel
|
||||
* @param d_output Device pointer to the output matrix
|
||||
* @param inputSize Width and height of the input matrix
|
||||
* @param nChannels Number of channels in the input matrix
|
||||
* @param kernelSize Width and height of the convolution kernel
|
||||
* @param stride Convolution stride
|
||||
* @param nFilters Number of output filters
|
||||
* @param outputSize Width and height of the output matrix
|
||||
*/
|
||||
__global__ void convolution(
|
||||
const float* d_input,
|
||||
const float* d_kernel,
|
||||
|
||||
@@ -3,6 +3,15 @@
|
||||
|
||||
namespace Kernels {
|
||||
|
||||
/**
|
||||
* @brief Matrix vector multiplication kernel
|
||||
*
|
||||
* @param d_matrix Device pointer to matrix
|
||||
* @param d_vector Device pointer to vector
|
||||
* @param d_output Device pointer to output vector
|
||||
* @param w Width of the matrix
|
||||
* @param h Height of the matrix
|
||||
*/
|
||||
__global__ void mat_vec_mul(
|
||||
const float* d_matrix,
|
||||
const float* d_vector,
|
||||
@@ -11,6 +20,14 @@ __global__ void mat_vec_mul(
|
||||
int h
|
||||
);
|
||||
|
||||
/**
|
||||
* @brief Vector vector addition kernel
|
||||
*
|
||||
* @param d_vector1 Device pointer to first vector
|
||||
* @param d_vector2 Device pointer to second vector
|
||||
* @param d_output Device pointer to output vector
|
||||
* @param w Length of the vectors
|
||||
*/
|
||||
__global__ void vec_vec_add(
|
||||
const float* d_vector1,
|
||||
const float* d_vector2,
|
||||
|
||||
@@ -10,8 +10,23 @@
|
||||
|
||||
namespace Layers {
|
||||
|
||||
/**
|
||||
* @brief 2D convolutional layer
|
||||
*
|
||||
*/
|
||||
class Conv2d : public ILayer {
|
||||
public:
|
||||
/**
|
||||
* @brief Construct a new Conv 2d layer
|
||||
*
|
||||
* @param inputSize Width and height of the input matrix
|
||||
* @param inputChannels Number of channels in the input matrix
|
||||
* @param kernelSize Width and height of the convolution kernel
|
||||
* @param stride Convolution stride
|
||||
* @param padding Padding type ('SAME' or 'VALID')
|
||||
* @param numFilters Number of output filters
|
||||
* @param activation Activation function ('RELU', 'SIGMOID' or 'NONE')
|
||||
*/
|
||||
Conv2d(
|
||||
int inputSize,
|
||||
int inputChannels,
|
||||
@@ -21,21 +36,57 @@ class Conv2d : public ILayer {
|
||||
int numFilters,
|
||||
Layers::Activation activation
|
||||
);
|
||||
|
||||
/**
|
||||
* @brief Destroy the Conv 2d object
|
||||
*
|
||||
*/
|
||||
~Conv2d();
|
||||
|
||||
// Outputs
|
||||
int outputSize;
|
||||
|
||||
/**
|
||||
* @brief Forward pass of the convolutional layer
|
||||
*
|
||||
* @param d_input Device pointer to the input matrix
|
||||
* @return Device pointer to the output matrix
|
||||
*/
|
||||
float* forward(const float* d_input);
|
||||
|
||||
/**
|
||||
* @brief Set the weights of the convolutional layer
|
||||
*
|
||||
* @param weights_input Pointer to the weights
|
||||
*/
|
||||
void setWeights(const float* weights_input);
|
||||
|
||||
/**
|
||||
* @brief Set the biases of the convolutional layer
|
||||
*
|
||||
* @param biases_input Pointer to the biases
|
||||
*/
|
||||
void setBiases(const float* biases_input);
|
||||
void host_conv(const float* input, float* output);
|
||||
|
||||
/**
|
||||
* @brief Get the output width (/ height) of the layer
|
||||
*
|
||||
* @return int
|
||||
*/
|
||||
int getOutputSize() { return outputSize; }
|
||||
|
||||
/**
|
||||
* @brief Get the padding size of the layer
|
||||
*
|
||||
* @return int
|
||||
*/
|
||||
int getPaddingSize() { return paddingSize; }
|
||||
|
||||
private:
|
||||
// Inputs
|
||||
int inputSize;
|
||||
int inputChannels;
|
||||
|
||||
// Outputs
|
||||
int outputSize;
|
||||
|
||||
// Kernel
|
||||
int kernelSize;
|
||||
int stride;
|
||||
@@ -55,8 +106,22 @@ class Conv2d : public ILayer {
|
||||
// Kernels
|
||||
Layers::Activation activation;
|
||||
|
||||
/**
|
||||
* @brief Initialize weights of the convolutional layer with zeros
|
||||
*
|
||||
*/
|
||||
void initializeWeights();
|
||||
|
||||
/**
|
||||
* @brief Initialize biases of the convolutional layer with zeros
|
||||
*
|
||||
*/
|
||||
void initializeBiases();
|
||||
|
||||
/**
|
||||
* @brief Copy weights and biases to the device
|
||||
*
|
||||
*/
|
||||
void toCuda();
|
||||
};
|
||||
|
||||
|
||||
@@ -9,17 +9,47 @@
|
||||
|
||||
namespace Layers {
|
||||
|
||||
/**
|
||||
* @brief Dense (fully connected) layer
|
||||
*
|
||||
*/
|
||||
class Dense : public ILayer {
|
||||
public:
|
||||
Dense(
|
||||
int inputSize,
|
||||
int outputSize,
|
||||
Layers::Activation activation
|
||||
);
|
||||
/**
|
||||
* @brief Construct a new Dense layer
|
||||
*
|
||||
* @param inputSize Size of the input vector
|
||||
* @param outputSize Size of the output vector
|
||||
* @param activation Activation function ('RELU', 'SIGMOID' or 'NONE')
|
||||
*/
|
||||
Dense(int inputSize, int outputSize, Layers::Activation activation);
|
||||
|
||||
/**
|
||||
* @brief Destroy the Dense layer
|
||||
*
|
||||
*/
|
||||
~Dense();
|
||||
|
||||
/**
|
||||
* @brief Forward pass of the dense layer
|
||||
*
|
||||
* @param d_input Device pointer to the input vector
|
||||
* @return Device pointer to the output vector
|
||||
*/
|
||||
float* forward(const float* d_input);
|
||||
|
||||
/**
|
||||
* @brief Set the weights of the layer
|
||||
*
|
||||
* @param weights Pointer to vector of weights
|
||||
*/
|
||||
void setWeights(const float* weights);
|
||||
|
||||
/**
|
||||
* @brief Set the biases of the layer
|
||||
*
|
||||
* @param biases Pointer to vector of biases
|
||||
*/
|
||||
void setBiases(const float* biases);
|
||||
|
||||
private:
|
||||
@@ -36,8 +66,22 @@ class Dense : public ILayer {
|
||||
|
||||
Layers::Activation activation;
|
||||
|
||||
/**
|
||||
* @brief Initialize the weights to zeros
|
||||
*
|
||||
*/
|
||||
void initializeWeights();
|
||||
|
||||
/**
|
||||
* @brief Initialize the biases to zeros
|
||||
*
|
||||
*/
|
||||
void initializeBiases();
|
||||
|
||||
/**
|
||||
* @brief Copy the weights and biases to the device
|
||||
*
|
||||
*/
|
||||
void toCuda();
|
||||
};
|
||||
|
||||
|
||||
@@ -6,22 +6,72 @@
|
||||
|
||||
namespace Layers {
|
||||
|
||||
/**
|
||||
* @brief Activation functions
|
||||
*
|
||||
* SIGMOID: Sigmoid
|
||||
* RELU: Rectified Linear Unit
|
||||
*
|
||||
*/
|
||||
enum Activation { SIGMOID, RELU, NONE };
|
||||
|
||||
/**
|
||||
* @brief Padding types
|
||||
*
|
||||
* SAME: Zero padding such that the output size is the same as the input
|
||||
* VALID: No padding
|
||||
*
|
||||
*/
|
||||
enum Padding { SAME, VALID };
|
||||
|
||||
/**
|
||||
* @brief Base class for all layers
|
||||
*/
|
||||
class ILayer {
|
||||
public:
|
||||
/**
|
||||
* @brief Destroy the ILayer object
|
||||
*
|
||||
*/
|
||||
virtual ~ILayer() {}
|
||||
|
||||
/**
|
||||
* @brief Virtual function for forward pass
|
||||
*
|
||||
* @param input (Device) Pointer to the input
|
||||
* @return float* Device pointer to the output
|
||||
*/
|
||||
virtual float* forward(const float* input) = 0;
|
||||
|
||||
/**
|
||||
* @brief Virtual function for setting weights
|
||||
*
|
||||
* @param weights Pointer to the weights
|
||||
*/
|
||||
virtual void setWeights(const float* weights) = 0;
|
||||
|
||||
/**
|
||||
* @brief Virtual function for setting biases
|
||||
*
|
||||
* @param biases Pointer to the biases
|
||||
*/
|
||||
virtual void setBiases(const float* biases) = 0;
|
||||
|
||||
private:
|
||||
|
||||
/**
|
||||
* @brief Initialize the weights
|
||||
*/
|
||||
virtual void initializeWeights() = 0;
|
||||
|
||||
/**
|
||||
* @brief Initialize the biases
|
||||
*/
|
||||
virtual void initializeBiases() = 0;
|
||||
|
||||
/**
|
||||
* @brief Copy the weights and biases to the device
|
||||
*/
|
||||
virtual void toCuda() = 0;
|
||||
|
||||
int inputSize;
|
||||
|
||||
@@ -5,11 +5,31 @@
|
||||
|
||||
namespace Layers {
|
||||
|
||||
/**
|
||||
* @brief Input layer, just copies the input to the device
|
||||
*
|
||||
*/
|
||||
class Input : public ILayer {
|
||||
public:
|
||||
/**
|
||||
* @brief Create a new Input layer
|
||||
*
|
||||
* @param inputSize Size of the input vector
|
||||
*/
|
||||
Input(int inputSize);
|
||||
|
||||
/**
|
||||
* @brief Destroy the Input layer
|
||||
*
|
||||
*/
|
||||
~Input();
|
||||
|
||||
/**
|
||||
* @brief Forward pass of the input layer. Just copies the input to the device
|
||||
*
|
||||
* @param input Host pointer to the input vector
|
||||
* @return Device pointer to the output vector
|
||||
*/
|
||||
float* forward(const float* input);
|
||||
|
||||
void setWeights(const float* weights);
|
||||
|
||||
@@ -4,7 +4,10 @@
|
||||
#include <cuda_runtime.h>
|
||||
#include <cstdio>
|
||||
|
||||
// CUDA error checking macro
|
||||
/**
|
||||
* @brief CUDA error checking macro
|
||||
*
|
||||
*/
|
||||
#define CUDA_CHECK(call) \
|
||||
do { \
|
||||
cudaError_t result = call; \
|
||||
|
||||
@@ -80,7 +80,7 @@ TEST_F(Conv2dTest, SimpleTest) {
|
||||
);
|
||||
|
||||
int outputSize = (inputSize - kernelSize) / stride + 1;
|
||||
EXPECT_EQ(outputSize, conv2d.outputSize);
|
||||
EXPECT_EQ(outputSize, conv2d.getOutputSize());
|
||||
|
||||
d_output = conv2d.forward(d_input);
|
||||
|
||||
@@ -168,16 +168,16 @@ TEST_F(Conv2dTest, PaddedTest) {
|
||||
activation, input, kernels.data(), d_input
|
||||
);
|
||||
|
||||
EXPECT_EQ(inputSize, conv2d.outputSize);
|
||||
EXPECT_EQ(inputSize, conv2d.getOutputSize());
|
||||
|
||||
d_output = conv2d.forward(d_input);
|
||||
|
||||
std::vector<float> output(
|
||||
conv2d.outputSize * conv2d.outputSize * numFilters
|
||||
conv2d.getOutputSize() * conv2d.getOutputSize() * numFilters
|
||||
);
|
||||
cudaMemcpy(
|
||||
output.data(), d_output,
|
||||
sizeof(float) * conv2d.outputSize * conv2d.outputSize * numFilters,
|
||||
sizeof(float) * conv2d.getOutputSize() * conv2d.getOutputSize() * numFilters,
|
||||
cudaMemcpyDeviceToHost
|
||||
);
|
||||
|
||||
@@ -253,16 +253,16 @@ TEST_F(Conv2dTest, StridedPaddedConvolution) {
|
||||
activation, input, kernels.data(), d_input
|
||||
);
|
||||
|
||||
EXPECT_EQ(inputSize, conv2d.outputSize);
|
||||
EXPECT_EQ(inputSize, conv2d.getOutputSize());
|
||||
|
||||
d_output = conv2d.forward(d_input);
|
||||
|
||||
std::vector<float> output(
|
||||
conv2d.outputSize * conv2d.outputSize * numFilters
|
||||
conv2d.getOutputSize() * conv2d.getOutputSize() * numFilters
|
||||
);
|
||||
cudaMemcpy(
|
||||
output.data(), d_output,
|
||||
sizeof(float) * conv2d.outputSize * conv2d.outputSize * numFilters,
|
||||
sizeof(float) * conv2d.getOutputSize() * conv2d.getOutputSize() * numFilters,
|
||||
cudaMemcpyDeviceToHost
|
||||
);
|
||||
|
||||
|
||||
Reference in New Issue
Block a user