Implement Add layer

2025-12-22 14:24:22 +00:00 · 2024-03-18 20:37:13 +01:00
parent d9c6c663c8
commit 8d14b74f66
3 changed files with 70 additions and 0 deletions
--- a/CMakeLists.txt
+++ b/CMakeLists.txt
@@ -12,6 +12,7 @@ set(LIBRARY_SOURCES
    src/kernels/activation_functions.cu
    src/kernels/convolution.cu
    src/kernels/matmul.cu
    src/layers/add.cu
    src/layers/dense.cu
    src/layers/conv2d.cu
    src/layers/concat.cu
--- a/include/layers/add.cuh
+++ b/include/layers/add.cuh
@@ -0,0 +1,40 @@
 #ifndef CUDANET_ADD_LAYER_H
 #define CUDANET_ADD_LAYER_H
 namespace CUDANet::Layers {
 class Add {
  public:
    /**
     * @brief Create a new Add layer
     *
     * @param inputSize Size of the input arrays
     */
    Add(int inputSize);
    /**
     * @brief Destroy the Add layer
     *
     */
    ~Add();
    /**
     * @brief Adds the two inputs
     *
     * @param d_inputA Device pointer to the first input
     * @param d_inputB Device pointer to the second input
     *
     * @return Device pointer to the output
     */
    float* forward(const float* d_inputA, const float* d_inputB);
  private:
    int inputSize;
    int gridSize;
    float* d_output;
 };
 }  // namespace CUDANet::Layers
 #endif  // CUDANET_ADD_LAYER_H
--- a/src/layers/add.cu
+++ b/src/layers/add.cu
@@ -0,0 +1,29 @@
 #include "add.cuh"
 #include "matmul.cuh"
 #include "cuda_helper.cuh"
 using namespace CUDANet;
 Layers::Add::Add(int inputSize)
    : inputSize(inputSize) {
    d_output = nullptr;
    CUDA_CHECK(cudaMalloc((void**)&d_output, sizeof(float) * inputSize));
    gridSize = (inputSize + BLOCK_SIZE - 1) / BLOCK_SIZE;
 }
 Layers::Add::~Add() {
    cudaFree(d_output);
 }
 float* Layers::Add::forward(const float* d_inputA, const float* d_inputB) {
    Kernels::vec_vec_add<<<gridSize, BLOCK_SIZE>>>(
        d_inputA, d_inputB, d_output, inputSize
    );
 }