mirror of
https://github.com/lordmathis/CUDANet.git
synced 2025-11-05 17:34:21 +00:00
Add documentation comments
This commit is contained in:
@@ -3,9 +3,23 @@
|
|||||||
|
|
||||||
namespace Kernels {
|
namespace Kernels {
|
||||||
|
|
||||||
|
/**
|
||||||
|
* @brief Sigmoid activation function kernel
|
||||||
|
*
|
||||||
|
* @param src Pointer to the source array
|
||||||
|
* @param dst Pointer to the destination array
|
||||||
|
* @param len Length of the arrays
|
||||||
|
*/
|
||||||
__global__ void
|
__global__ void
|
||||||
sigmoid(const float* __restrict__ src, float* __restrict__ dst, int len);
|
sigmoid(const float* __restrict__ src, float* __restrict__ dst, int len);
|
||||||
|
|
||||||
|
/**
|
||||||
|
* @brief Relu activation function kernel
|
||||||
|
*
|
||||||
|
* @param src Pointer to the source array
|
||||||
|
* @param dst Pointer to the destination array
|
||||||
|
* @param len Length of the arrays
|
||||||
|
*/
|
||||||
__global__ void
|
__global__ void
|
||||||
relu(const float* __restrict__ src, float* __restrict__ dst, int len);
|
relu(const float* __restrict__ src, float* __restrict__ dst, int len);
|
||||||
|
|
||||||
|
|||||||
@@ -3,6 +3,16 @@
|
|||||||
|
|
||||||
namespace Kernels {
|
namespace Kernels {
|
||||||
|
|
||||||
|
/**
|
||||||
|
* @brief Kernel that pads the input matrix with zeros
|
||||||
|
*
|
||||||
|
* @param d_input Device pointer to the input matrix (as vector)
|
||||||
|
* @param d_padded Device pointer to the padded matrix (as vector)
|
||||||
|
* @param w Width of the input matrix
|
||||||
|
* @param h Height of the input matrix
|
||||||
|
* @param n Number of input channels
|
||||||
|
* @param p Padding size
|
||||||
|
*/
|
||||||
__global__ void padding(
|
__global__ void padding(
|
||||||
const float* d_input,
|
const float* d_input,
|
||||||
float* d_padded,
|
float* d_padded,
|
||||||
@@ -12,6 +22,19 @@ __global__ void padding(
|
|||||||
int p
|
int p
|
||||||
);
|
);
|
||||||
|
|
||||||
|
/**
|
||||||
|
* @brief Convolution kernel
|
||||||
|
*
|
||||||
|
* @param d_input Device pointer to the input matrix
|
||||||
|
* @param d_kernel Device pointer to the convolution kernel
|
||||||
|
* @param d_output Device pointer to the output matrix
|
||||||
|
* @param inputSize Width and height of the input matrix
|
||||||
|
* @param nChannels Number of channels in the input matrix
|
||||||
|
* @param kernelSize Width and height of the convolution kernel
|
||||||
|
* @param stride Convolution stride
|
||||||
|
* @param nFilters Number of output filters
|
||||||
|
* @param outputSize Width and height of the output matrix
|
||||||
|
*/
|
||||||
__global__ void convolution(
|
__global__ void convolution(
|
||||||
const float* d_input,
|
const float* d_input,
|
||||||
const float* d_kernel,
|
const float* d_kernel,
|
||||||
|
|||||||
@@ -3,6 +3,15 @@
|
|||||||
|
|
||||||
namespace Kernels {
|
namespace Kernels {
|
||||||
|
|
||||||
|
/**
|
||||||
|
* @brief Matrix vector multiplication kernel
|
||||||
|
*
|
||||||
|
* @param d_matrix Device pointer to matrix
|
||||||
|
* @param d_vector Device pointer to vector
|
||||||
|
* @param d_output Device pointer to output vector
|
||||||
|
* @param w Width of the matrix
|
||||||
|
* @param h Height of the matrix
|
||||||
|
*/
|
||||||
__global__ void mat_vec_mul(
|
__global__ void mat_vec_mul(
|
||||||
const float* d_matrix,
|
const float* d_matrix,
|
||||||
const float* d_vector,
|
const float* d_vector,
|
||||||
@@ -11,6 +20,14 @@ __global__ void mat_vec_mul(
|
|||||||
int h
|
int h
|
||||||
);
|
);
|
||||||
|
|
||||||
|
/**
|
||||||
|
* @brief Vector vector addition kernel
|
||||||
|
*
|
||||||
|
* @param d_vector1 Device pointer to first vector
|
||||||
|
* @param d_vector2 Device pointer to second vector
|
||||||
|
* @param d_output Device pointer to output vector
|
||||||
|
* @param w Length of the vectors
|
||||||
|
*/
|
||||||
__global__ void vec_vec_add(
|
__global__ void vec_vec_add(
|
||||||
const float* d_vector1,
|
const float* d_vector1,
|
||||||
const float* d_vector2,
|
const float* d_vector2,
|
||||||
|
|||||||
@@ -10,8 +10,23 @@
|
|||||||
|
|
||||||
namespace Layers {
|
namespace Layers {
|
||||||
|
|
||||||
|
/**
|
||||||
|
* @brief 2D convolutional layer
|
||||||
|
*
|
||||||
|
*/
|
||||||
class Conv2d : public ILayer {
|
class Conv2d : public ILayer {
|
||||||
public:
|
public:
|
||||||
|
/**
|
||||||
|
* @brief Construct a new Conv 2d layer
|
||||||
|
*
|
||||||
|
* @param inputSize Width and height of the input matrix
|
||||||
|
* @param inputChannels Number of channels in the input matrix
|
||||||
|
* @param kernelSize Width and height of the convolution kernel
|
||||||
|
* @param stride Convolution stride
|
||||||
|
* @param padding Padding type ('SAME' or 'VALID')
|
||||||
|
* @param numFilters Number of output filters
|
||||||
|
* @param activation Activation function ('RELU', 'SIGMOID' or 'NONE')
|
||||||
|
*/
|
||||||
Conv2d(
|
Conv2d(
|
||||||
int inputSize,
|
int inputSize,
|
||||||
int inputChannels,
|
int inputChannels,
|
||||||
@@ -21,21 +36,57 @@ class Conv2d : public ILayer {
|
|||||||
int numFilters,
|
int numFilters,
|
||||||
Layers::Activation activation
|
Layers::Activation activation
|
||||||
);
|
);
|
||||||
|
|
||||||
|
/**
|
||||||
|
* @brief Destroy the Conv 2d object
|
||||||
|
*
|
||||||
|
*/
|
||||||
~Conv2d();
|
~Conv2d();
|
||||||
|
|
||||||
// Outputs
|
/**
|
||||||
int outputSize;
|
* @brief Forward pass of the convolutional layer
|
||||||
|
*
|
||||||
|
* @param d_input Device pointer to the input matrix
|
||||||
|
* @return Device pointer to the output matrix
|
||||||
|
*/
|
||||||
float* forward(const float* d_input);
|
float* forward(const float* d_input);
|
||||||
void setWeights(const float* weights_input);
|
|
||||||
void setBiases(const float* biases_input);
|
/**
|
||||||
void host_conv(const float* input, float* output);
|
* @brief Set the weights of the convolutional layer
|
||||||
|
*
|
||||||
|
* @param weights_input Pointer to the weights
|
||||||
|
*/
|
||||||
|
void setWeights(const float* weights_input);
|
||||||
|
|
||||||
|
/**
|
||||||
|
* @brief Set the biases of the convolutional layer
|
||||||
|
*
|
||||||
|
* @param biases_input Pointer to the biases
|
||||||
|
*/
|
||||||
|
void setBiases(const float* biases_input);
|
||||||
|
|
||||||
|
/**
|
||||||
|
* @brief Get the output width (/ height) of the layer
|
||||||
|
*
|
||||||
|
* @return int
|
||||||
|
*/
|
||||||
|
int getOutputSize() { return outputSize; }
|
||||||
|
|
||||||
|
/**
|
||||||
|
* @brief Get the padding size of the layer
|
||||||
|
*
|
||||||
|
* @return int
|
||||||
|
*/
|
||||||
|
int getPaddingSize() { return paddingSize; }
|
||||||
|
|
||||||
private:
|
private:
|
||||||
// Inputs
|
// Inputs
|
||||||
int inputSize;
|
int inputSize;
|
||||||
int inputChannels;
|
int inputChannels;
|
||||||
|
|
||||||
|
// Outputs
|
||||||
|
int outputSize;
|
||||||
|
|
||||||
// Kernel
|
// Kernel
|
||||||
int kernelSize;
|
int kernelSize;
|
||||||
int stride;
|
int stride;
|
||||||
@@ -55,8 +106,22 @@ class Conv2d : public ILayer {
|
|||||||
// Kernels
|
// Kernels
|
||||||
Layers::Activation activation;
|
Layers::Activation activation;
|
||||||
|
|
||||||
|
/**
|
||||||
|
* @brief Initialize weights of the convolutional layer with zeros
|
||||||
|
*
|
||||||
|
*/
|
||||||
void initializeWeights();
|
void initializeWeights();
|
||||||
|
|
||||||
|
/**
|
||||||
|
* @brief Initialize biases of the convolutional layer with zeros
|
||||||
|
*
|
||||||
|
*/
|
||||||
void initializeBiases();
|
void initializeBiases();
|
||||||
|
|
||||||
|
/**
|
||||||
|
* @brief Copy weights and biases to the device
|
||||||
|
*
|
||||||
|
*/
|
||||||
void toCuda();
|
void toCuda();
|
||||||
};
|
};
|
||||||
|
|
||||||
|
|||||||
@@ -9,17 +9,47 @@
|
|||||||
|
|
||||||
namespace Layers {
|
namespace Layers {
|
||||||
|
|
||||||
|
/**
|
||||||
|
* @brief Dense (fully connected) layer
|
||||||
|
*
|
||||||
|
*/
|
||||||
class Dense : public ILayer {
|
class Dense : public ILayer {
|
||||||
public:
|
public:
|
||||||
Dense(
|
/**
|
||||||
int inputSize,
|
* @brief Construct a new Dense layer
|
||||||
int outputSize,
|
*
|
||||||
Layers::Activation activation
|
* @param inputSize Size of the input vector
|
||||||
);
|
* @param outputSize Size of the output vector
|
||||||
|
* @param activation Activation function ('RELU', 'SIGMOID' or 'NONE')
|
||||||
|
*/
|
||||||
|
Dense(int inputSize, int outputSize, Layers::Activation activation);
|
||||||
|
|
||||||
|
/**
|
||||||
|
* @brief Destroy the Dense layer
|
||||||
|
*
|
||||||
|
*/
|
||||||
~Dense();
|
~Dense();
|
||||||
|
|
||||||
|
/**
|
||||||
|
* @brief Forward pass of the dense layer
|
||||||
|
*
|
||||||
|
* @param d_input Device pointer to the input vector
|
||||||
|
* @return Device pointer to the output vector
|
||||||
|
*/
|
||||||
float* forward(const float* d_input);
|
float* forward(const float* d_input);
|
||||||
|
|
||||||
|
/**
|
||||||
|
* @brief Set the weights of the layer
|
||||||
|
*
|
||||||
|
* @param weights Pointer to vector of weights
|
||||||
|
*/
|
||||||
void setWeights(const float* weights);
|
void setWeights(const float* weights);
|
||||||
|
|
||||||
|
/**
|
||||||
|
* @brief Set the biases of the layer
|
||||||
|
*
|
||||||
|
* @param biases Pointer to vector of biases
|
||||||
|
*/
|
||||||
void setBiases(const float* biases);
|
void setBiases(const float* biases);
|
||||||
|
|
||||||
private:
|
private:
|
||||||
@@ -36,8 +66,22 @@ class Dense : public ILayer {
|
|||||||
|
|
||||||
Layers::Activation activation;
|
Layers::Activation activation;
|
||||||
|
|
||||||
|
/**
|
||||||
|
* @brief Initialize the weights to zeros
|
||||||
|
*
|
||||||
|
*/
|
||||||
void initializeWeights();
|
void initializeWeights();
|
||||||
|
|
||||||
|
/**
|
||||||
|
* @brief Initialize the biases to zeros
|
||||||
|
*
|
||||||
|
*/
|
||||||
void initializeBiases();
|
void initializeBiases();
|
||||||
|
|
||||||
|
/**
|
||||||
|
* @brief Copy the weights and biases to the device
|
||||||
|
*
|
||||||
|
*/
|
||||||
void toCuda();
|
void toCuda();
|
||||||
};
|
};
|
||||||
|
|
||||||
|
|||||||
@@ -6,22 +6,72 @@
|
|||||||
|
|
||||||
namespace Layers {
|
namespace Layers {
|
||||||
|
|
||||||
|
/**
|
||||||
|
* @brief Activation functions
|
||||||
|
*
|
||||||
|
* SIGMOID: Sigmoid
|
||||||
|
* RELU: Rectified Linear Unit
|
||||||
|
*
|
||||||
|
*/
|
||||||
enum Activation { SIGMOID, RELU, NONE };
|
enum Activation { SIGMOID, RELU, NONE };
|
||||||
|
|
||||||
|
/**
|
||||||
|
* @brief Padding types
|
||||||
|
*
|
||||||
|
* SAME: Zero padding such that the output size is the same as the input
|
||||||
|
* VALID: No padding
|
||||||
|
*
|
||||||
|
*/
|
||||||
enum Padding { SAME, VALID };
|
enum Padding { SAME, VALID };
|
||||||
|
|
||||||
|
/**
|
||||||
|
* @brief Base class for all layers
|
||||||
|
*/
|
||||||
class ILayer {
|
class ILayer {
|
||||||
public:
|
public:
|
||||||
|
/**
|
||||||
|
* @brief Destroy the ILayer object
|
||||||
|
*
|
||||||
|
*/
|
||||||
virtual ~ILayer() {}
|
virtual ~ILayer() {}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* @brief Virtual function for forward pass
|
||||||
|
*
|
||||||
|
* @param input (Device) Pointer to the input
|
||||||
|
* @return float* Device pointer to the output
|
||||||
|
*/
|
||||||
virtual float* forward(const float* input) = 0;
|
virtual float* forward(const float* input) = 0;
|
||||||
|
|
||||||
|
/**
|
||||||
|
* @brief Virtual function for setting weights
|
||||||
|
*
|
||||||
|
* @param weights Pointer to the weights
|
||||||
|
*/
|
||||||
virtual void setWeights(const float* weights) = 0;
|
virtual void setWeights(const float* weights) = 0;
|
||||||
|
|
||||||
|
/**
|
||||||
|
* @brief Virtual function for setting biases
|
||||||
|
*
|
||||||
|
* @param biases Pointer to the biases
|
||||||
|
*/
|
||||||
virtual void setBiases(const float* biases) = 0;
|
virtual void setBiases(const float* biases) = 0;
|
||||||
|
|
||||||
private:
|
private:
|
||||||
|
|
||||||
|
/**
|
||||||
|
* @brief Initialize the weights
|
||||||
|
*/
|
||||||
virtual void initializeWeights() = 0;
|
virtual void initializeWeights() = 0;
|
||||||
|
|
||||||
|
/**
|
||||||
|
* @brief Initialize the biases
|
||||||
|
*/
|
||||||
virtual void initializeBiases() = 0;
|
virtual void initializeBiases() = 0;
|
||||||
|
|
||||||
|
/**
|
||||||
|
* @brief Copy the weights and biases to the device
|
||||||
|
*/
|
||||||
virtual void toCuda() = 0;
|
virtual void toCuda() = 0;
|
||||||
|
|
||||||
int inputSize;
|
int inputSize;
|
||||||
|
|||||||
@@ -5,11 +5,31 @@
|
|||||||
|
|
||||||
namespace Layers {
|
namespace Layers {
|
||||||
|
|
||||||
|
/**
|
||||||
|
* @brief Input layer, just copies the input to the device
|
||||||
|
*
|
||||||
|
*/
|
||||||
class Input : public ILayer {
|
class Input : public ILayer {
|
||||||
public:
|
public:
|
||||||
|
/**
|
||||||
|
* @brief Create a new Input layer
|
||||||
|
*
|
||||||
|
* @param inputSize Size of the input vector
|
||||||
|
*/
|
||||||
Input(int inputSize);
|
Input(int inputSize);
|
||||||
|
|
||||||
|
/**
|
||||||
|
* @brief Destroy the Input layer
|
||||||
|
*
|
||||||
|
*/
|
||||||
~Input();
|
~Input();
|
||||||
|
|
||||||
|
/**
|
||||||
|
* @brief Forward pass of the input layer. Just copies the input to the device
|
||||||
|
*
|
||||||
|
* @param input Host pointer to the input vector
|
||||||
|
* @return Device pointer to the output vector
|
||||||
|
*/
|
||||||
float* forward(const float* input);
|
float* forward(const float* input);
|
||||||
|
|
||||||
void setWeights(const float* weights);
|
void setWeights(const float* weights);
|
||||||
|
|||||||
@@ -4,7 +4,10 @@
|
|||||||
#include <cuda_runtime.h>
|
#include <cuda_runtime.h>
|
||||||
#include <cstdio>
|
#include <cstdio>
|
||||||
|
|
||||||
// CUDA error checking macro
|
/**
|
||||||
|
* @brief CUDA error checking macro
|
||||||
|
*
|
||||||
|
*/
|
||||||
#define CUDA_CHECK(call) \
|
#define CUDA_CHECK(call) \
|
||||||
do { \
|
do { \
|
||||||
cudaError_t result = call; \
|
cudaError_t result = call; \
|
||||||
|
|||||||
@@ -80,7 +80,7 @@ TEST_F(Conv2dTest, SimpleTest) {
|
|||||||
);
|
);
|
||||||
|
|
||||||
int outputSize = (inputSize - kernelSize) / stride + 1;
|
int outputSize = (inputSize - kernelSize) / stride + 1;
|
||||||
EXPECT_EQ(outputSize, conv2d.outputSize);
|
EXPECT_EQ(outputSize, conv2d.getOutputSize());
|
||||||
|
|
||||||
d_output = conv2d.forward(d_input);
|
d_output = conv2d.forward(d_input);
|
||||||
|
|
||||||
@@ -168,16 +168,16 @@ TEST_F(Conv2dTest, PaddedTest) {
|
|||||||
activation, input, kernels.data(), d_input
|
activation, input, kernels.data(), d_input
|
||||||
);
|
);
|
||||||
|
|
||||||
EXPECT_EQ(inputSize, conv2d.outputSize);
|
EXPECT_EQ(inputSize, conv2d.getOutputSize());
|
||||||
|
|
||||||
d_output = conv2d.forward(d_input);
|
d_output = conv2d.forward(d_input);
|
||||||
|
|
||||||
std::vector<float> output(
|
std::vector<float> output(
|
||||||
conv2d.outputSize * conv2d.outputSize * numFilters
|
conv2d.getOutputSize() * conv2d.getOutputSize() * numFilters
|
||||||
);
|
);
|
||||||
cudaMemcpy(
|
cudaMemcpy(
|
||||||
output.data(), d_output,
|
output.data(), d_output,
|
||||||
sizeof(float) * conv2d.outputSize * conv2d.outputSize * numFilters,
|
sizeof(float) * conv2d.getOutputSize() * conv2d.getOutputSize() * numFilters,
|
||||||
cudaMemcpyDeviceToHost
|
cudaMemcpyDeviceToHost
|
||||||
);
|
);
|
||||||
|
|
||||||
@@ -253,16 +253,16 @@ TEST_F(Conv2dTest, StridedPaddedConvolution) {
|
|||||||
activation, input, kernels.data(), d_input
|
activation, input, kernels.data(), d_input
|
||||||
);
|
);
|
||||||
|
|
||||||
EXPECT_EQ(inputSize, conv2d.outputSize);
|
EXPECT_EQ(inputSize, conv2d.getOutputSize());
|
||||||
|
|
||||||
d_output = conv2d.forward(d_input);
|
d_output = conv2d.forward(d_input);
|
||||||
|
|
||||||
std::vector<float> output(
|
std::vector<float> output(
|
||||||
conv2d.outputSize * conv2d.outputSize * numFilters
|
conv2d.getOutputSize() * conv2d.getOutputSize() * numFilters
|
||||||
);
|
);
|
||||||
cudaMemcpy(
|
cudaMemcpy(
|
||||||
output.data(), d_output,
|
output.data(), d_output,
|
||||||
sizeof(float) * conv2d.outputSize * conv2d.outputSize * numFilters,
|
sizeof(float) * conv2d.getOutputSize() * conv2d.getOutputSize() * numFilters,
|
||||||
cudaMemcpyDeviceToHost
|
cudaMemcpyDeviceToHost
|
||||||
);
|
);
|
||||||
|
|
||||||
|
|||||||
Reference in New Issue
Block a user