diff --git a/include/cudanet.cuh b/include/cudanet.cuh index aaf5bdb..c5dcb78 100644 --- a/include/cudanet.cuh +++ b/include/cudanet.cuh @@ -13,7 +13,7 @@ #include "add.hpp" #include "avg_pooling.hpp" #include "batch_norm.cuh" -#include "concat.cuh" +#include "concat.hpp" #include "conv2d.cuh" #include "dense.hpp" #include "input.cuh" diff --git a/include/layers/concat.cuh b/include/layers/concat.hpp similarity index 78% rename from include/layers/concat.cuh rename to include/layers/concat.hpp index 753be48..543f7e2 100644 --- a/include/layers/concat.cuh +++ b/include/layers/concat.hpp @@ -1,6 +1,8 @@ #ifndef CUDANET_CONCAT_LAYER_H #define CUDANET_CONCAT_LAYER_H +#include "layer.hpp" + namespace CUDANet::Layers { /** @@ -11,7 +13,7 @@ class Concat { public: /** * @brief Create a new Concat layer - * + * * @param inputASize Size of the first input * @param inputBSize Size of the second input */ @@ -19,7 +21,7 @@ class Concat { /** * @brief Destroy the Concat layer - * + * */ ~Concat(); @@ -28,7 +30,7 @@ class Concat { * * @param d_input_A Device pointer to the first input * @param d_input_B Device pointer to the second input - * + * * @return Device pointer to the output */ float* forward(const float* d_input_A, const float* d_input_B); @@ -39,7 +41,15 @@ class Concat { int inputASize; int inputBSize; + float* forwardCPU(const float* input_A, const float* input_B); + +#ifdef USE_CUDA float* d_output; + float* forwardCUDA(const float* d_input_A, const float* d_input_B); + + void initCUDA(); + void delCUDA(); +#endif }; } // namespace CUDANet::Layers diff --git a/src/backends/cuda/layers/concat.cu b/src/backends/cuda/layers/concat.cu new file mode 100644 index 0000000..d93e469 --- /dev/null +++ b/src/backends/cuda/layers/concat.cu @@ -0,0 +1,31 @@ +#include "concat.hpp" +#include "cuda_helper.cuh" + +using namespace CUDANet::Layers; + +void Concat::initCUDA() { + d_output = nullptr; + CUDA_CHECK( + cudaMalloc((void**)&d_output, sizeof(float) * (inputASize + inputBSize)) + ); +} + +void Concat::delCUDA() { + cudaFree(d_output); +} + +float* Concat::forwardCUDA(const float* d_input_A, const float* d_input_B) { + CUDA_CHECK(cudaMemcpy( + d_output, d_input_A, sizeof(float) * inputASize, + cudaMemcpyDeviceToDevice + )); + + CUDA_CHECK(cudaMemcpy( + d_output + inputASize, d_input_B, sizeof(float) * inputBSize, + cudaMemcpyDeviceToDevice + )); + + CUDA_CHECK(cudaDeviceSynchronize()); + + return d_output; +} \ No newline at end of file diff --git a/src/layers/concat.cpp b/src/layers/concat.cpp new file mode 100644 index 0000000..ae1152e --- /dev/null +++ b/src/layers/concat.cpp @@ -0,0 +1,34 @@ +#include + +#include "concat.hpp" + +using namespace CUDANet::Layers; + +Concat::Concat(const int inputASize, const int inputBSize) + : inputASize(inputASize), inputBSize(inputBSize) { +#ifdef USE_CUDA + initCUDA(); +#endif +} + +Concat::~Concat() { +#ifdef USE_CUDA + delCUDA(); +#endif +} + +float* Concat::forwardCPU(const float* input_A, const float* input_B) { + throw std::logic_error("Not implemented"); +} + +float* Concat::forward(const float* input_A, const float* input_B) { +#ifdef USE_CUDA + return forwardCUDA(input_A, input_B); +#else + return forwardCPU(input_A, input_B); +#endif +} + +int Concat::getOutputSize() { + return inputASize + inputBSize; +}; diff --git a/src/layers/concat.cu b/src/layers/concat.cu deleted file mode 100644 index f7ef037..0000000 --- a/src/layers/concat.cu +++ /dev/null @@ -1,37 +0,0 @@ -#include "concat.cuh" -#include "cuda_helper.cuh" - -using namespace CUDANet::Layers; - - -Concat::Concat(const int inputASize, const int inputBSize) - : inputASize(inputASize), inputBSize(inputBSize) { - - d_output = nullptr; - CUDA_CHECK(cudaMalloc( - (void**)&d_output, sizeof(float) * (inputASize + inputBSize) - )); -} - -Concat::~Concat() { - cudaFree(d_output); -} - -float* Concat::forward(const float* d_input_A, const float* d_input_B) { - CUDA_CHECK(cudaMemcpy( - d_output, d_input_A, sizeof(float) * inputASize, cudaMemcpyDeviceToDevice - )); - - CUDA_CHECK(cudaMemcpy( - d_output + inputASize, d_input_B, - sizeof(float) * inputBSize, cudaMemcpyDeviceToDevice - )); - - CUDA_CHECK(cudaDeviceSynchronize()); - - return d_output; -} - -int Concat::getOutputSize() { - return inputASize + inputBSize; -}; diff --git a/test/cuda/layers/test_concat.cu b/test/cuda/layers/test_concat.cu index 80b0f7d..80c3907 100644 --- a/test/cuda/layers/test_concat.cu +++ b/test/cuda/layers/test_concat.cu @@ -1,4 +1,4 @@ -#include "concat.cuh" +#include "concat.hpp" #include #include #include