Implement concat layer

2025-11-06 01:34:22 +00:00 · 2024-03-17 21:38:29 +01:00
parent 9aab7cdb10
commit aac0c3a826
9 changed files with 121 additions and 33 deletions
--- a/CMakeLists.txt
+++ b/CMakeLists.txt
@@ -14,6 +14,7 @@ set(LIBRARY_SOURCES
    src/kernels/matmul.cu
    src/layers/dense.cu
    src/layers/conv2d.cu
    src/layers/concat.cu
    src/layers/input.cu
    src/layers/activation.cu
 )
--- a/README.md
+++ b/README.md
@@ -11,7 +11,7 @@ Convolutional Neural Network inference library running on CUDA.
 - [x] Conv2d layer
 - [ ] Max pooling
 - [ ] Average pooling
- [ ] Concat layer
+- [x] Concat layer
 - [x] Sigmoid activation
 - [x] ReLU activation
 - [x] Softmax activation
--- a/include/layers/concat.cuh
+++ b/include/layers/concat.cuh
@@ -0,0 +1,45 @@
 #ifndef CUDANET_CONCAT_LAYER_H
 #define CUDANET_CONCAT_LAYER_H
 #include <ilayer.cuh>
 namespace CUDANet::Layers {
 /**
 * @brief Concatenate layers
 *
 */
 class Concat {
  public:
    /**
     * @brief Create a new Concat layer
     * 
     * @param layers Layers to concatenate
     */
    Concat(const unsigned int inputASize, const unsigned int inputBSize);
    /**
     * @brief Destroy the Concat layer
     * 
     */
    ~Concat();
    /**
     * @brief Forward pass of the concat layer
     *
     * @param d_input_A Device pointer to the first input
     * @param d_input_B Device pointer to the second input
     * @return Device pointer to the output
     */
    float* forward(const float* d_input_A, const float* d_input_B);
  private:
    unsigned int inputASize;
    unsigned int inputBSize;
    float* d_output;
 };
 }  // namespace CUDANet::Layers
 #endif  // CUDANET_CONCAT_LAYER_H
--- a/include/layers/ilayer.cuh
+++ b/include/layers/ilayer.cuh
@@ -64,18 +64,6 @@ class ILayer {
     * @brief Copy the weights and biases to the device
     */
    virtual void toCuda() = 0;
    int inputSize;
    int outputSize;
    float* d_output;
    float* d_weights;
    float* d_biases;
    std::vector<float> weights;
    std::vector<float> biases;
 };
 }  // namespace CUDANet::Layers
--- a/include/layers/input.cuh
+++ b/include/layers/input.cuh
@@ -9,7 +9,7 @@ namespace CUDANet::Layers {
 * @brief Input layer, just copies the input to the device
 *
 */
-class Input : public ILayer {
+class Input {
  public:
    /**
     * @brief Create a new Input layer
@@ -32,15 +32,7 @@ class Input : public ILayer {
     */
    float* forward(const float* input);
    void setWeights(const float* weights);
    void setBiases(const float* biases);
  private:
    void initializeWeights();
    void initializeBiases();
    void toCuda();
    int    inputSize;
    float* d_output;
 };
--- a/src/layers/concat.cu
+++ b/src/layers/concat.cu
@@ -0,0 +1,32 @@
 #include "concat.cuh"
 #include "cuda_helper.cuh"
 using namespace CUDANet;
 Layers::Concat::Concat(const unsigned int inputASize, const unsigned int inputBSize)
    : inputASize(inputASize), inputBSize(inputBSize) {
    d_output = nullptr;
    CUDA_CHECK(cudaMalloc(
        (void**)&d_output, sizeof(float) * (inputASize + inputBSize)
    ));
 }
 Layers::Concat::~Concat() {
    cudaFree(d_output);
 }
 float* Layers::Concat::forward(const float* d_input_A, const float* d_input_B) {
    CUDA_CHECK(cudaMemcpy(
        d_output, d_input_A, sizeof(float) * inputASize, cudaMemcpyDeviceToDevice
    ));
    CUDA_CHECK(cudaMemcpy(
        d_output + inputASize, d_input_B,
        sizeof(float) * inputBSize, cudaMemcpyDeviceToDevice
    ));
    return d_output;
 }
--- a/src/layers/input.cu
+++ b/src/layers/input.cu
@@ -26,11 +26,3 @@ float* Layers::Input::forward(const float* input) {
    return d_output;
 }
 void Layers::Input::setWeights(const float* weights) {}
 void Layers::Input::setBiases(const float* biases) {}
 void Layers::Input::initializeWeights() {}
 void Layers::Input::initializeBiases() {}
 void Layers::Input::toCuda() {}
--- a/test/CMakeLists.txt
+++ b/test/CMakeLists.txt
@@ -3,10 +3,11 @@ include_directories(${GTEST_INCLUDE_DIRS})
 add_executable(test_main 
    EXCLUDE_FROM_ALL
    layers/test_dense.cu
    layers/test_conv2d.cu
    layers/test_input.cu
    layers/test_activation.cu
    layers/test_concat.cu
    layers/test_conv2d.cu
    layers/test_dense.cu
    layers/test_input.cu
    kernels/test_activation_functions.cu
    kernels/test_padding.cu
    kernels/test_matmul.cu
--- a/test/layers/test_concat.cu
+++ b/test/layers/test_concat.cu
@@ -0,0 +1,37 @@
 #include "concat.cuh"
 #include <gtest/gtest.h>
 #include <cuda_runtime.h>
 #include <vector>
 TEST(ConcatLayerTest, Init) {
    std::vector<float> inputA = {0.573f, 0.619f, 0.732f, 0.055f, 0.243f};
    std::vector<float> inputB = {0.123f, 0.321f, 0.456f, 0.789f, 0.654f, 0.123f};
    CUDANet::Layers::Concat concat(5, 6);   
    float* d_inputA;
    float* d_inputB;
    cudaMalloc((void**)&d_inputA, sizeof(float) * 5);
    cudaMalloc((void**)&d_inputB, sizeof(float) * 6);
    cudaMemcpy(
        d_inputA, inputA.data(), sizeof(float) * 5, cudaMemcpyHostToDevice
    );
    cudaMemcpy(
        d_inputB, inputB.data(), sizeof(float) * 6, cudaMemcpyHostToDevice
    );
    float* d_output = concat.forward(d_inputA, d_inputB);
    std::vector<float> output(11);
    cudaMemcpy(
        output.data(), d_output, sizeof(float) * 11, cudaMemcpyDeviceToHost
    );
    for (int i = 0; i < 5; ++i) {
        EXPECT_EQ(output[i], inputA[i]);
    }
    for (int i = 0; i < 6; ++i) {
        EXPECT_EQ(output[i + 5], inputB[i]);
    }
    cudaFree(d_output);
 }