diff --git a/CMakeLists.txt b/CMakeLists.txt index 258d06c..da4e226 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -10,6 +10,7 @@ include_directories(${CUDAToolkit_INCLUDE_DIRS}) # Add project source files for the library set(LIBRARY_SOURCES src/utils/cuda_helper.cu + src/functions/activations.cu src/layers/dense.cu ) @@ -27,11 +28,12 @@ target_link_libraries(${PROJECT_NAME} CUDA::cublas CUDA::cudart) target_include_directories(${PROJECT_NAME} PUBLIC ${CMAKE_CURRENT_SOURCE_DIR}/include ${CMAKE_CURRENT_SOURCE_DIR}/include/utils + ${CMAKE_CURRENT_SOURCE_DIR}/include/functions ${CMAKE_CURRENT_SOURCE_DIR}/include/layers ${CMAKE_CURRENT_SOURCE_DIR}/src ) -set_property(TARGET ${PROJECT_NAME} PROPERTY CXX_STANDARD 14) +set_property(TARGET ${PROJECT_NAME} PROPERTY CXX_STANDARD 20) # Add testing subdirectory add_subdirectory(test) \ No newline at end of file diff --git a/include/functions/activations.cuh b/include/functions/activations.cuh new file mode 100644 index 0000000..638756c --- /dev/null +++ b/include/functions/activations.cuh @@ -0,0 +1,14 @@ +#include + +#ifndef ACTIVATIONS_H +#define ACTIVATIONS_H + +__device__ float sigmoid(float a); +__device__ float relu(float a); +__device__ float linear(float a); + +__global__ void sigmoid_kernel(const float* __restrict__ src, float* __restrict__ dst, int len); +__global__ void relu_kernel(const float* __restrict__ src, float* __restrict__ dst, int len); +__global__ void linear_kernel(const float* __restrict__ src, float* __restrict__ dst, int len); + +#endif // ACTIVATIONS_H \ No newline at end of file diff --git a/include/layers/conv.cuh b/include/layers/conv.cuh index 63c5ccc..eed7d76 100644 --- a/include/layers/conv.cuh +++ b/include/layers/conv.cuh @@ -1,5 +1,3 @@ -// fully_connected_layer.h - #ifndef CONV_LAYER_H #define CONV_LAYER_H diff --git a/include/layers/dense.cuh b/include/layers/dense.cuh index cd002fd..9393154 100644 --- a/include/layers/dense.cuh +++ b/include/layers/dense.cuh @@ -1,15 +1,17 @@ #ifndef DENSE_LAYER_H #define DENSE_LAYER_H +#include #include #include -#include +#include +#include "ilayer.cuh" namespace Layers { class Dense : public ILayer { public: - Dense(int inputSize, int outputSize, cublasHandle_t cublasHandle); + Dense(int inputSize, int outputSize, std::string activation, cublasHandle_t cublasHandle); ~Dense(); void forward(const float* input, float* output); @@ -28,6 +30,8 @@ namespace Layers { std::vector weights; std::vector biases; + std::string activation; + void initializeWeights(); void initializeBiases(); void toCuda(); diff --git a/src/CMakeLists.txt b/src/CMakeLists.txt deleted file mode 100644 index bbf5f52..0000000 --- a/src/CMakeLists.txt +++ /dev/null @@ -1,7 +0,0 @@ -set(LAYER_SOURCES layers/dense.cu) - -add_library(CUDANet - utils/cuda_helper.cu - utils/functions.cu - ${LAYER_SOURCES} -) diff --git a/src/functions/activations.cu b/src/functions/activations.cu new file mode 100644 index 0000000..57399b4 --- /dev/null +++ b/src/functions/activations.cu @@ -0,0 +1,44 @@ +#include "activations.cuh" +#include + +__device__ float sigmoid(float a) +{ + return 1.0 / (1.0 + exp (-a)); +} + +__device__ float relu(float a) +{ + return a < 0.0 ? 0.0 : a; +} + +__device__ float linear(float a) +{ + return a; +} + +__global__ void sigmoid_kernel(const float* __restrict__ src, float* __restrict__ dst, int len) { + int stride = gridDim.x * blockDim.x; + int tid = blockDim.x * blockIdx.x + threadIdx.x; + + for (int i = tid; i < len; i += stride) { + dst[i] = sigmoid(src[i]); + } +} + +__global__ void relu_kernel(const float* __restrict__ src, float* __restrict__ dst, int len) { + int stride = gridDim.x * blockDim.x; + int tid = blockDim.x * blockIdx.x + threadIdx.x; + + for (int i = tid; i < len; i += stride) { + dst[i] = relu(src[i]); + } +} + +__global__ void linear_kernel(const float* __restrict__ src, float* __restrict__ dst, int len) { + int stride = gridDim.x * blockDim.x; + int tid = blockDim.x * blockIdx.x + threadIdx.x; + + for (int i = tid; i < len; i += stride) { + dst[i] = linear(src[i]); + } +} diff --git a/src/layers/dense.cu b/src/layers/dense.cu index 86f3328..37b6dc3 100644 --- a/src/layers/dense.cu +++ b/src/layers/dense.cu @@ -1,13 +1,15 @@ #include "dense.cuh" #include "cuda_helper.cuh" +#include "activations.cuh" #include #include #include #include #include +#include -Layers::Dense::Dense(int inputSize, int outputSize, cublasHandle_t cublasHandle) - : inputSize(inputSize), outputSize(outputSize), cublasHandle(cublasHandle) { +Layers::Dense::Dense(int inputSize, int outputSize, std::string activation, cublasHandle_t cublasHandle) + : inputSize(inputSize), outputSize(outputSize), cublasHandle(cublasHandle), activation(activation) { // Allocate memory for weights and biases weights.resize(outputSize * inputSize); @@ -33,13 +35,7 @@ Layers::Dense::~Dense() { } void Layers::Dense::initializeWeights() { - - for (int j = 0; j < inputSize; ++j) { - for (int i = 0; i < outputSize; ++i) { - int idx = IDX2C(i, j, outputSize); - weights[idx] = 0.0f; - } - } + std::fill(weights.begin(), weights.end(), 0.0f); } void Layers::Dense::initializeBiases() { @@ -52,6 +48,18 @@ void Layers::Dense::forward(const float* d_input, float* d_output) { CUBLAS_CHECK(cublasSgemv(cublasHandle, CUBLAS_OP_N, inputSize, outputSize, &alpha, d_weights, inputSize, d_input, 1, &beta, d_output, 1)); CUBLAS_CHECK(cublasSaxpy(cublasHandle, outputSize, &alpha, d_biases, 1, d_output, 1)); + + int threadsPerBlock = 256; + int blocksPerGrid = (outputSize + threadsPerBlock - 1) / threadsPerBlock; + + if (activation == "sigmoid") { + sigmoid_kernel<<>>(d_output, d_output, outputSize); + } else if (activation == "relu") { + relu_kernel<<>>(d_output, d_output, outputSize); + } else { + linear_kernel<<>>(d_output, d_output, outputSize); + } + } void Layers::Dense::toCuda() { diff --git a/test/layers/test_dense.cu b/test/layers/test_dense.cu index 226d03b..cecd1ad 100644 --- a/test/layers/test_dense.cu +++ b/test/layers/test_dense.cu @@ -2,6 +2,7 @@ #include #include #include +#include "activations.cuh" #include "dense.cuh" #include "test_cublas_fixture.cuh" @@ -9,7 +10,7 @@ class DenseLayerTest : public CublasTestFixture { protected: Layers::Dense commonTestSetup(int inputSize, int outputSize, std::vector& input, std::vector>& weights, std::vector& biases, float*& d_input, float*& d_output) { // Create Dense layer - Layers::Dense denseLayer(inputSize, outputSize, cublasHandle); + Layers::Dense denseLayer(inputSize, outputSize, "linear", cublasHandle); // Set weights and biases denseLayer.setWeights(weights); @@ -48,7 +49,7 @@ TEST_F(DenseLayerTest, Init) { int outputSize = j; // std::cout << "Dense layer: input size = " << inputSize << ", output size = " << outputSize << std::endl; - Layers::Dense denseLayer(inputSize, outputSize, cublasHandle); + Layers::Dense denseLayer(inputSize, outputSize, "linear", cublasHandle); } } } @@ -67,7 +68,7 @@ TEST_F(DenseLayerTest, setWeights) { {1.3f, 0.5f, 0.0f, 1.7f} }; - Layers::Dense denseLayer(inputSize, outputSize, cublasHandle); + Layers::Dense denseLayer(inputSize, outputSize, "linear", cublasHandle); denseLayer.setWeights(weights); diff --git a/test/utils/test_functions.cu b/test/utils/test_functions.cu new file mode 100644 index 0000000..8431916 --- /dev/null +++ b/test/utils/test_functions.cu @@ -0,0 +1,16 @@ +#include "gtest/gtest.h" +#include +#include +#include +#include "functions.cuh" +#include "test_cublas_fixture.cuh" + +class FunctionsTest : public CublasTestFixture { +protected: + cudaError_t cudaStatus; + cublasStatus_t cublasStatus; +}; + +TEST_F(FunctionsTest, sigmoid) { + +} \ No newline at end of file