diff --git a/CMakeLists.txt b/CMakeLists.txt index 5d95a76..4a12f29 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -12,6 +12,7 @@ set(LIBRARY_SOURCES src/kernels/activation_functions.cu src/kernels/convolution.cu src/kernels/matmul.cu + src/layers/add.cu src/layers/dense.cu src/layers/conv2d.cu src/layers/concat.cu diff --git a/include/layers/add.cuh b/include/layers/add.cuh new file mode 100644 index 0000000..653b699 --- /dev/null +++ b/include/layers/add.cuh @@ -0,0 +1,40 @@ +#ifndef CUDANET_ADD_LAYER_H +#define CUDANET_ADD_LAYER_H + +namespace CUDANet::Layers { + +class Add { + public: + /** + * @brief Create a new Add layer + * + * @param inputSize Size of the input arrays + */ + Add(int inputSize); + + /** + * @brief Destroy the Add layer + * + */ + ~Add(); + + /** + * @brief Adds the two inputs + * + * @param d_inputA Device pointer to the first input + * @param d_inputB Device pointer to the second input + * + * @return Device pointer to the output + */ + float* forward(const float* d_inputA, const float* d_inputB); + + private: + int inputSize; + int gridSize; + + float* d_output; +}; + +} // namespace CUDANet::Layers + +#endif // CUDANET_ADD_LAYER_H \ No newline at end of file diff --git a/src/layers/add.cu b/src/layers/add.cu new file mode 100644 index 0000000..4b528a9 --- /dev/null +++ b/src/layers/add.cu @@ -0,0 +1,29 @@ +#include "add.cuh" +#include "matmul.cuh" +#include "cuda_helper.cuh" + +using namespace CUDANet; + + +Layers::Add::Add(int inputSize) + : inputSize(inputSize) { + + d_output = nullptr; + CUDA_CHECK(cudaMalloc((void**)&d_output, sizeof(float) * inputSize)); + + gridSize = (inputSize + BLOCK_SIZE - 1) / BLOCK_SIZE; +} + + +Layers::Add::~Add() { + cudaFree(d_output); +} + + +float* Layers::Add::forward(const float* d_inputA, const float* d_inputB) { + + Kernels::vec_vec_add<<>>( + d_inputA, d_inputB, d_output, inputSize + ); + +} \ No newline at end of file