From f8220f0ec12b430be03a0b1a0ab5869714f5804c Mon Sep 17 00:00:00 2001 From: LordMathis Date: Thu, 5 Sep 2024 22:23:47 +0200 Subject: [PATCH] Restructure cuda backend --- CMakeLists.txt | 10 +++-- include/layers/{add.cuh => add.hpp} | 14 +++++- .../cuda/kernels/activation_functions.cu | 0 .../cuda/kernels/convolution.cu | 0 src/{ => backends}/cuda/kernels/matmul.cu | 0 src/{ => backends}/cuda/kernels/pooling.cu | 0 src/{ => backends}/cuda/layers/add.cu | 17 +++---- src/{ => backends}/cuda/utils/cuda_helper.cu | 0 src/{ => backends}/cuda/utils/vector.cu | 0 src/{cuda => }/layers/activation.cu | 0 src/layers/add.cpp | 44 +++++++++++++++++++ src/{cuda => }/layers/avg_pooling.cu | 0 src/{cuda => }/layers/batch_norm.cu | 0 src/{cuda => }/layers/concat.cu | 0 src/{cuda => }/layers/conv2d.cu | 0 src/{cuda => }/layers/dense.cu | 0 src/{cuda => }/layers/input.cu | 0 src/{cuda => }/layers/max_pooling.cu | 0 src/{cuda => }/layers/output.cu | 0 19 files changed, 69 insertions(+), 16 deletions(-) rename include/layers/{add.cuh => add.hpp} (69%) rename src/{ => backends}/cuda/kernels/activation_functions.cu (100%) rename src/{ => backends}/cuda/kernels/convolution.cu (100%) rename src/{ => backends}/cuda/kernels/matmul.cu (100%) rename src/{ => backends}/cuda/kernels/pooling.cu (100%) rename src/{ => backends}/cuda/layers/add.cu (74%) rename src/{ => backends}/cuda/utils/cuda_helper.cu (100%) rename src/{ => backends}/cuda/utils/vector.cu (100%) rename src/{cuda => }/layers/activation.cu (100%) create mode 100644 src/layers/add.cpp rename src/{cuda => }/layers/avg_pooling.cu (100%) rename src/{cuda => }/layers/batch_norm.cu (100%) rename src/{cuda => }/layers/concat.cu (100%) rename src/{cuda => }/layers/conv2d.cu (100%) rename src/{cuda => }/layers/dense.cu (100%) rename src/{cuda => }/layers/input.cu (100%) rename src/{cuda => }/layers/max_pooling.cu (100%) rename src/{cuda => }/layers/output.cu (100%) diff --git a/CMakeLists.txt b/CMakeLists.txt index c2f1351..e73c1e4 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -20,6 +20,7 @@ if(USE_CUDA) endif() file(GLOB_RECURSE CPU_SOURCES + src/layers/*.cpp src/model/*.cpp ) @@ -27,10 +28,11 @@ set(LIBRARY_SOURCES ${CPU_SOURCES}) if(USE_CUDA) file(GLOB_RECURSE CUDA_SOURCES - src/*.cu - src/cuda/utils/*.cu - src/cuda/kernels/*.cu - src/cuda/layers/*.cu + src/backends/cuda/*.cu + src/backends/cuda/utils/*.cu + src/backends/cuda/kernels/*.cu + src/backends/cuda/layers/*.cu + src/layers/*.cu # To be removed ) set(LIBRARY_SOURCES ${LIBRARY_SOURCES} ${CUDA_SOURCES}) endif() diff --git a/include/layers/add.cuh b/include/layers/add.hpp similarity index 69% rename from include/layers/add.cuh rename to include/layers/add.hpp index 5efb545..95d1e12 100644 --- a/include/layers/add.cuh +++ b/include/layers/add.hpp @@ -25,13 +25,23 @@ class Add { * @param d_inputB Device pointer to the second input * */ - void forward(const float* d_inputA, const float* d_inputB); + float* forward(const float* inputA, const float* inputB); private: int inputSize; + + float* output; + + float* forwardCPU(const float* inputA, const float* inputB); + +#ifdef USE_CUDA + float* d_output; int gridSize; - float* d_output; + float* forwardCUDA(const float* d_inputA, const float* d_inputB); + void initCUDA(); + void delCUDA(); +#endif }; } // namespace CUDANet::Layers diff --git a/src/cuda/kernels/activation_functions.cu b/src/backends/cuda/kernels/activation_functions.cu similarity index 100% rename from src/cuda/kernels/activation_functions.cu rename to src/backends/cuda/kernels/activation_functions.cu diff --git a/src/cuda/kernels/convolution.cu b/src/backends/cuda/kernels/convolution.cu similarity index 100% rename from src/cuda/kernels/convolution.cu rename to src/backends/cuda/kernels/convolution.cu diff --git a/src/cuda/kernels/matmul.cu b/src/backends/cuda/kernels/matmul.cu similarity index 100% rename from src/cuda/kernels/matmul.cu rename to src/backends/cuda/kernels/matmul.cu diff --git a/src/cuda/kernels/pooling.cu b/src/backends/cuda/kernels/pooling.cu similarity index 100% rename from src/cuda/kernels/pooling.cu rename to src/backends/cuda/kernels/pooling.cu diff --git a/src/cuda/layers/add.cu b/src/backends/cuda/layers/add.cu similarity index 74% rename from src/cuda/layers/add.cu rename to src/backends/cuda/layers/add.cu index 2c672d9..c6a735d 100644 --- a/src/cuda/layers/add.cu +++ b/src/backends/cuda/layers/add.cu @@ -1,26 +1,21 @@ -#include "add.cuh" +#include "add.hpp" #include "matmul.cuh" #include "cuda_helper.cuh" using namespace CUDANet::Layers; - -Add::Add(int inputSize) - : inputSize(inputSize) { - +void Add::initCUDA() { d_output = nullptr; CUDA_CHECK(cudaMalloc((void**)&d_output, sizeof(float) * inputSize)); gridSize = (inputSize + BLOCK_SIZE - 1) / BLOCK_SIZE; } - -Add::~Add() { +void Add::delCUDA() { cudaFree(d_output); } - -void Add::forward(const float* d_inputA, const float* d_inputB) { +float* Add::forwardCUDA(const float* d_inputA, const float* d_inputB) { Kernels::vec_vec_add<<>>( d_inputA, d_inputB, d_output, inputSize @@ -28,4 +23,6 @@ void Add::forward(const float* d_inputA, const float* d_inputB) { CUDA_CHECK(cudaGetLastError()); CUDA_CHECK(cudaDeviceSynchronize()); -} \ No newline at end of file + return d_output; + +} diff --git a/src/cuda/utils/cuda_helper.cu b/src/backends/cuda/utils/cuda_helper.cu similarity index 100% rename from src/cuda/utils/cuda_helper.cu rename to src/backends/cuda/utils/cuda_helper.cu diff --git a/src/cuda/utils/vector.cu b/src/backends/cuda/utils/vector.cu similarity index 100% rename from src/cuda/utils/vector.cu rename to src/backends/cuda/utils/vector.cu diff --git a/src/cuda/layers/activation.cu b/src/layers/activation.cu similarity index 100% rename from src/cuda/layers/activation.cu rename to src/layers/activation.cu diff --git a/src/layers/add.cpp b/src/layers/add.cpp new file mode 100644 index 0000000..8d101f5 --- /dev/null +++ b/src/layers/add.cpp @@ -0,0 +1,44 @@ +#include "add.hpp" + +#include + +using namespace CUDANet::Layers; + + +Add::Add(int inputSize) + : inputSize(inputSize) { + + output = new float[inputSize]; + +#ifdef USE_CUDA + initCUDA(); +#endif + +} + + +Add::~Add() { +#ifdef USE_CUDA + delCUDA(); +#endif +} + + +float* Add::forward(const float* inputA, const float* inputB) { + +#ifdef USE_CUDA + return forwardCUDA(inputA, inputB); +#else + return forwardCPU(inputA, inputB); +#endif + +} + +float* Add::forwardCPU(const float* inputA, const float* inputB) { + for (size_t i = 0; i < inputSize; i++) + { + output[i] = inputA[i] + inputB[i]; + } + + return output; +} \ No newline at end of file diff --git a/src/cuda/layers/avg_pooling.cu b/src/layers/avg_pooling.cu similarity index 100% rename from src/cuda/layers/avg_pooling.cu rename to src/layers/avg_pooling.cu diff --git a/src/cuda/layers/batch_norm.cu b/src/layers/batch_norm.cu similarity index 100% rename from src/cuda/layers/batch_norm.cu rename to src/layers/batch_norm.cu diff --git a/src/cuda/layers/concat.cu b/src/layers/concat.cu similarity index 100% rename from src/cuda/layers/concat.cu rename to src/layers/concat.cu diff --git a/src/cuda/layers/conv2d.cu b/src/layers/conv2d.cu similarity index 100% rename from src/cuda/layers/conv2d.cu rename to src/layers/conv2d.cu diff --git a/src/cuda/layers/dense.cu b/src/layers/dense.cu similarity index 100% rename from src/cuda/layers/dense.cu rename to src/layers/dense.cu diff --git a/src/cuda/layers/input.cu b/src/layers/input.cu similarity index 100% rename from src/cuda/layers/input.cu rename to src/layers/input.cu diff --git a/src/cuda/layers/max_pooling.cu b/src/layers/max_pooling.cu similarity index 100% rename from src/cuda/layers/max_pooling.cu rename to src/layers/max_pooling.cu diff --git a/src/cuda/layers/output.cu b/src/layers/output.cu similarity index 100% rename from src/cuda/layers/output.cu rename to src/layers/output.cu