From aeb1739c4696e295e0c8821428ef2b7cf1e8a883 Mon Sep 17 00:00:00 2001 From: LordMathis Date: Fri, 21 Nov 2025 23:52:58 +0100 Subject: [PATCH] Migrate concat layer --- include/backend.hpp | 6 ++++ include/backend/cuda.cuh | 6 ++++ include/layers/concat.hpp | 44 ++++++-------------------- include/layers/conv2d.hpp | 3 -- include/layers/dense.hpp | 2 -- include/shape.hpp | 24 ++++++++++++++ src/backends/cuda/layer_ops.cu | 20 ++++++++++++ src/backends/cuda/layers/concat.cu | 31 ------------------ src/layers/concat.cpp | 50 ++++++++++++++---------------- 9 files changed, 90 insertions(+), 96 deletions(-) delete mode 100644 src/backends/cuda/layers/concat.cu diff --git a/include/backend.hpp b/include/backend.hpp index b17882b..dc71ded 100644 --- a/include/backend.hpp +++ b/include/backend.hpp @@ -84,6 +84,12 @@ class Backend { CUDANet::Tensor& running_var, CUDANet::Tensor& epsilon ) = 0; + + virtual CUDANet::Tensor& concat( + CUDANet::Tensor& input_a, + CUDANet::Tensor& input_b, + CUDANet::Tensor& output + ) = 0; }; } // namespace CUDANet \ No newline at end of file diff --git a/include/backend/cuda.cuh b/include/backend/cuda.cuh index ba7b255..638650b 100644 --- a/include/backend/cuda.cuh +++ b/include/backend/cuda.cuh @@ -80,6 +80,12 @@ class CUDA : public Backend { CUDANet::Tensor& running_var, CUDANet::Tensor& epsilon ) override; + + CUDANet::Tensor& concat( + CUDANet::Tensor& input_a, + CUDANet::Tensor& input_b, + CUDANet::Tensor& output + ) override; }; } // namespace CUDANet::Backend \ No newline at end of file diff --git a/include/layers/concat.hpp b/include/layers/concat.hpp index 543f7e2..400006c 100644 --- a/include/layers/concat.hpp +++ b/include/layers/concat.hpp @@ -1,5 +1,4 @@ -#ifndef CUDANET_CONCAT_LAYER_H -#define CUDANET_CONCAT_LAYER_H +#pragma once #include "layer.hpp" @@ -11,47 +10,24 @@ namespace CUDANet::Layers { */ class Concat { public: - /** - * @brief Create a new Concat layer - * - * @param inputASize Size of the first input - * @param inputBSize Size of the second input - */ - Concat(const int inputASize, const int inputBSize); - /** - * @brief Destroy the Concat layer - * - */ + Concat(const CUDANet::Shape a_shape, const CUDANet::Shape b_shape, CUDANet::Backend *backend); + ~Concat(); - /** - * @brief Concatenates the two inputs - * - * @param d_input_A Device pointer to the first input - * @param d_input_B Device pointer to the second input - * - * @return Device pointer to the output - */ - float* forward(const float* d_input_A, const float* d_input_B); + CUDANet::Tensor& forward(CUDANet::Tensor& input_a, CUDANet::Tensor& input_b); - int getOutputSize(); + CUDANet::Shape output_shape(); private: - int inputASize; - int inputBSize; + CUDANet::Shape a_shape; + CUDANet::Shape b_shape; - float* forwardCPU(const float* input_A, const float* input_B); + CUDANet::Shape out_shape; + CUDANet::Tensor output; -#ifdef USE_CUDA - float* d_output; - float* forwardCUDA(const float* d_input_A, const float* d_input_B); - - void initCUDA(); - void delCUDA(); -#endif + CUDANet::Backend *backend; }; } // namespace CUDANet::Layers -#endif // CUDANET_CONCAT_LAYER_H diff --git a/include/layers/conv2d.hpp b/include/layers/conv2d.hpp index e3c4954..d520cf2 100644 --- a/include/layers/conv2d.hpp +++ b/include/layers/conv2d.hpp @@ -1,8 +1,5 @@ #pragma once -#include - -#include "activation.hpp" #include "layer.hpp" namespace CUDANet::Layers { diff --git a/include/layers/dense.hpp b/include/layers/dense.hpp index 41d53f2..83cee2e 100644 --- a/include/layers/dense.hpp +++ b/include/layers/dense.hpp @@ -1,7 +1,5 @@ #pragma once -#include - #include "backend.hpp" #include "layer.hpp" diff --git a/include/shape.hpp b/include/shape.hpp index cc39128..88634bf 100644 --- a/include/shape.hpp +++ b/include/shape.hpp @@ -21,6 +21,30 @@ class InvalidShapeException : public std::runtime_error { actual ) ) {} + + InvalidShapeException( + const std::string& message, + const Shape& shape_a, + const Shape& shape_b + ) + : std::runtime_error( + std::format( + "{}. Shape A: [{}], Shape B: [{}]", + message, + format_shape(shape_a), + format_shape(shape_b) + ) + ) {} + + private: + static std::string format_shape(const Shape& shape) { + std::string result; + for (size_t i = 0; i < shape.size(); ++i) { + if (i > 0) result += ", "; + result += std::to_string(shape[i]); + } + return result; + } }; } // namespace CUDANet diff --git a/src/backends/cuda/layer_ops.cu b/src/backends/cuda/layer_ops.cu index 5b05fb9..11079ad 100644 --- a/src/backends/cuda/layer_ops.cu +++ b/src/backends/cuda/layer_ops.cu @@ -211,4 +211,24 @@ CUDANet::Tensor& CUDA::batch_norm( ); CUDA_CHECK(cudaGetLastError()); } +} + +CUDANet::Tensor& CUDA::concat( + CUDANet::Tensor& input_a, + CUDANet::Tensor& input_b, + CUDANet::Tensor& output +) { + CUDA_CHECK(cudaMemcpy( + output.data(), input_a.data(), input_a.size(), + cudaMemcpyDeviceToDevice + )); + + CUDA_CHECK(cudaMemcpy( + output.data() + input_a.numel(), input_b.data(), input_b.size(), + cudaMemcpyDeviceToDevice + )); + + CUDA_CHECK(cudaDeviceSynchronize()); + + return output; } \ No newline at end of file diff --git a/src/backends/cuda/layers/concat.cu b/src/backends/cuda/layers/concat.cu deleted file mode 100644 index d93e469..0000000 --- a/src/backends/cuda/layers/concat.cu +++ /dev/null @@ -1,31 +0,0 @@ -#include "concat.hpp" -#include "cuda_helper.cuh" - -using namespace CUDANet::Layers; - -void Concat::initCUDA() { - d_output = nullptr; - CUDA_CHECK( - cudaMalloc((void**)&d_output, sizeof(float) * (inputASize + inputBSize)) - ); -} - -void Concat::delCUDA() { - cudaFree(d_output); -} - -float* Concat::forwardCUDA(const float* d_input_A, const float* d_input_B) { - CUDA_CHECK(cudaMemcpy( - d_output, d_input_A, sizeof(float) * inputASize, - cudaMemcpyDeviceToDevice - )); - - CUDA_CHECK(cudaMemcpy( - d_output + inputASize, d_input_B, sizeof(float) * inputBSize, - cudaMemcpyDeviceToDevice - )); - - CUDA_CHECK(cudaDeviceSynchronize()); - - return d_output; -} \ No newline at end of file diff --git a/src/layers/concat.cpp b/src/layers/concat.cpp index ae1152e..cad4ca0 100644 --- a/src/layers/concat.cpp +++ b/src/layers/concat.cpp @@ -1,34 +1,32 @@ -#include - #include "concat.hpp" using namespace CUDANet::Layers; -Concat::Concat(const int inputASize, const int inputBSize) - : inputASize(inputASize), inputBSize(inputBSize) { -#ifdef USE_CUDA - initCUDA(); -#endif +Concat::Concat(const CUDANet::Shape a_shape, const CUDANet::Shape b_shape, CUDANet::Backend *backend) + : a_shape(a_shape), b_shape(b_shape), backend(backend) { + if (a_shape[0] != b_shape[0] || a_shape[1] != b_shape[1]) { + throw InvalidShapeException( + "Concat requires matching batch and height dimensions", a_shape, + b_shape + ); + } + + out_shape = {a_shape[0], a_shape[1], a_shape[2] + b_shape[2]}; + output = CUDANet::Tensor(out_shape, CUDANet::DType::FLOAT32, backend); } -Concat::~Concat() { -#ifdef USE_CUDA - delCUDA(); -#endif +Concat::~Concat() {} + +CUDANet::Tensor& Concat::forward(CUDANet::Tensor& input_a, CUDANet::Tensor& input_b) { + output.zero(); + backend->concat( + input_a, + input_b, + output + ); + return output; } -float* Concat::forwardCPU(const float* input_A, const float* input_B) { - throw std::logic_error("Not implemented"); -} - -float* Concat::forward(const float* input_A, const float* input_B) { -#ifdef USE_CUDA - return forwardCUDA(input_A, input_B); -#else - return forwardCPU(input_A, input_B); -#endif -} - -int Concat::getOutputSize() { - return inputASize + inputBSize; -}; +CUDANet::Shape Concat::output_shape() { + return out_shape; +} \ No newline at end of file