From 64eac7050b6af9e1711f3aa30e2fe3e317392087 Mon Sep 17 00:00:00 2001 From: LordMathis Date: Tue, 18 Nov 2025 19:33:51 +0100 Subject: [PATCH] WIP Migrate Dense layer --- include/backend.hpp | 2 +- include/backend/cpu.hpp | 2 +- include/backend/cuda.cuh | 2 +- include/layers/dense.hpp | 133 ++++++++++---------------------- include/tensor.hpp | 5 ++ src/backends/cuda/tensor_ops.cu | 10 +-- src/backends/tensor.cpp | 4 + src/layers/dense.cpp | 90 ++++++++------------- 8 files changed, 90 insertions(+), 158 deletions(-) diff --git a/include/backend.hpp b/include/backend.hpp index 8da3f2d..e8d397a 100644 --- a/include/backend.hpp +++ b/include/backend.hpp @@ -17,7 +17,7 @@ public: // Tensor ops virtual void print(const CUDANet::Tensor &input) = 0; - virtual void clear(CUDANet::Tensor &input) = 0; + virtual void zero(CUDANet::Tensor &input) = 0; virtual void sum(const CUDANet::Tensor &input, CUDANet::Tensor &sum) = 0; virtual void max(const CUDANet::Tensor &input, CUDANet::Tensor &max) = 0; diff --git a/include/backend/cpu.hpp b/include/backend/cpu.hpp index beb65b1..ad261bb 100644 --- a/include/backend/cpu.hpp +++ b/include/backend/cpu.hpp @@ -13,7 +13,7 @@ public: // Tensor ops void print(const CUDANet::Tensor &input) override; - void clear(CUDANet::Tensor &input) override; + void zero(CUDANet::Tensor &input) override; void sum(const CUDANet::Tensor &input, CUDANet::Tensor &sum) override; void max(const CUDANet::Tensor &input, CUDANet::Tensor &max) override; diff --git a/include/backend/cuda.cuh b/include/backend/cuda.cuh index e08ce34..5045e28 100644 --- a/include/backend/cuda.cuh +++ b/include/backend/cuda.cuh @@ -13,7 +13,7 @@ public: // Tensor ops void print(const CUDANet::Tensor &input) override; - void clear(CUDANet::Tensor &input) override; + void zero(CUDANet::Tensor &input) override; void sum(const CUDANet::Tensor &input, CUDANet::Tensor &sum) override; void max(const CUDANet::Tensor &input, CUDANet::Tensor &max) override; diff --git a/include/layers/dense.hpp b/include/layers/dense.hpp index 24fc2d4..d6bee21 100644 --- a/include/layers/dense.hpp +++ b/include/layers/dense.hpp @@ -1,9 +1,8 @@ -#ifndef CUDANET_DENSE_LAYER_H -#define CUDANET_DENSE_LAYER_H +#pragma once #include -#include "activation.hpp" +#include "backend.hpp" #include "layer.hpp" namespace CUDANet::Layers { @@ -12,121 +11,67 @@ namespace CUDANet::Layers { * @brief Dense (fully connected) layer * */ -class Dense : public WeightedLayer { +class Dense : public Layer { public: - /** - * @brief Construct a new Dense layer - * - * @param inputSize Size of the input vector - * @param outputSize Size of the output vector - * @param activationType Activation function type ('RELU', 'SIGMOID', - * 'SOFTMAX' or 'NONE') - */ - Dense(int inputSize, int outputSize, Layers::ActivationType activationType); - /** - * @brief Destroy the Dense layer - * - */ + Dense(CUDANet::Backend *backend, CUDANet::Shape input_shape, CUDANet::Shape output_shape); + ~Dense(); - /** - * @brief Forward pass of the dense layer - * - * @param d_input Device pointer to the input vector - * @return Device pointer to the output vector - */ - float* forward(const float* d_input); + CUDANet::Tensor& forward(CUDANet::Tensor &input); - /** - * @brief Set the weights of the layer - * - * @param weights Pointer to vector of weights - */ - void setWeights(const float* weights); + CUDANet::Shape input_shape(); - /** - * @brief Get the weights of the layer - * - * @return Vector of weights - */ - std::vector getWeights(); + CUDANet::Shape output_shape(); - /** - * @brief Set the biases of the layer - * - * @param biases Pointer to vector of biases - */ - void setBiases(const float* biases); + size_t input_size(); - /** - * @brief Get the biases of the layer - * - * @return Vector of biases - */ - std::vector getBiases(); + size_t output_size(); - /** - * @brief Get output size - * - * @return int output size - */ - int getOutputSize(); + void set_weights(CUDANet::Tensor &input); - /** - * @brief Get input size - * - * @return int input size - */ - int getInputSize(); + CUDANet::Tensor& get_weights(); + + void set_biases(CUDANet::Tensor &input); + + CUDANet::Tensor& get_biases(); private: - int inputSize; - int outputSize; + CUDANet::Backend *backend; - std::vector weights; - std::vector biases; + CUDANet::Shape in_shape; + CUDANet::Shape out_shape; - Layers::Activation* activation; + CUDANet::Tensor weights; + CUDANet::Tensor biases; - /** - * @brief Initialize the weights to zeros - * - */ - void initializeWeights(); - /** - * @brief Initialize the biases to zeros - * - */ - void initializeBiases(); + void init_weights(); + void init_biases(); - float* forwardCPU(const float* input); +// #ifdef USE_CUDA +// float* d_output; -#ifdef USE_CUDA - float* d_output; +// float* d_weights; +// float* d_biases; - float* d_weights; - float* d_biases; +// // Precompute kernel launch parameters +// int forwardGridSize; +// int biasGridSize; - // Precompute kernel launch parameters - int forwardGridSize; - int biasGridSize; +// /** +// * @brief Copy the weights and biases to the device +// * +// */ +// void toCuda(); - /** - * @brief Copy the weights and biases to the device - * - */ - void toCuda(); +// void initCUDA(); +// void delCUDA(); - void initCUDA(); - void delCUDA(); - - float* forwardCUDA(const float* d_input); -#endif +// float* forwardCUDA(const float* d_input); +// #endif }; } // namespace CUDANet::Layers -#endif // CUDANET_DENSE_LAYER_H diff --git a/include/tensor.hpp b/include/tensor.hpp index 56b6848..5e074b9 100644 --- a/include/tensor.hpp +++ b/include/tensor.hpp @@ -33,6 +33,11 @@ public: template T* data(); + void zero(); + + template + void set_data(T *data); + private: Shape shape; DType dtype; diff --git a/src/backends/cuda/tensor_ops.cu b/src/backends/cuda/tensor_ops.cu index 508d6cf..ef9e256 100644 --- a/src/backends/cuda/tensor_ops.cu +++ b/src/backends/cuda/tensor_ops.cu @@ -1,13 +1,13 @@ #include -#include "backend/backend.hpp" +#include "backend.hpp" #include "backend/cuda.cuh" #include "utils/cuda_helper.cuh" #include "kernels/matmul.cuh" using namespace CUDANet::Backend; -void CUDA::print(const CUDANet::Backend::Tensor &input) { +void CUDA::print(const CUDANet::Tensor &input) { auto length = input.numel(); std::vector h_vec(input.numel()); @@ -22,11 +22,11 @@ void CUDA::print(const CUDANet::Backend::Tensor &input) { std::cout << std::endl; } -void CUDA::clear(CUDANet::Backend::Tensor &input) { +void CUDA::zero(CUDANet::Tensor &input) { CUDA_CHECK(cudaMemset(input.data(), 0, sizeof(float) * input.numel())); } -void CUDA::sum(const CUDANet::Backend::Tensor &input, CUDANet::Backend::Tensor &sum) { +void CUDA::sum(const CUDANet::Tensor &input, CUDANet::Tensor &sum) { auto length = input.numel(); const int gridSize = ( + BLOCK_SIZE - 1) / BLOCK_SIZE; @@ -45,7 +45,7 @@ void CUDA::sum(const CUDANet::Backend::Tensor &input, CUDANet::Backend::Tensor & } } -void CUDA::max(const CUDANet::Backend::Tensor &input, CUDANet::Backend::Tensor &max) { +void CUDA::max(const CUDANet::Tensor &input, CUDANet::Tensor &max) { auto length = input.numel(); const int grid_size = (length + BLOCK_SIZE - 1) / BLOCK_SIZE; diff --git a/src/backends/tensor.cpp b/src/backends/tensor.cpp index 1026e3c..f15a7e9 100644 --- a/src/backends/tensor.cpp +++ b/src/backends/tensor.cpp @@ -50,3 +50,7 @@ template T* Tensor::data() { return static_cast(d_ptr); } + +void Tensor::zero() { + backend->zero(*this); +} diff --git a/src/layers/dense.cpp b/src/layers/dense.cpp index 61f9ab1..245281c 100644 --- a/src/layers/dense.cpp +++ b/src/layers/dense.cpp @@ -1,80 +1,58 @@ -#include "dense.hpp" - +#include #include -#include "activation.hpp" +#include "dense.hpp" using namespace CUDANet::Layers; -Dense::Dense(int inputSize, int outputSize, ActivationType activationType) - : inputSize(inputSize), outputSize(outputSize) { +Dense::Dense(CUDANet::Backend *backend, CUDANet::Shape input_shape, CUDANet::Shape output_shape) + : backend(backend), in_shape(input_shape), out_shape(output_shape) { // Allocate memory for weights and biases - weights.resize(outputSize * inputSize); - biases.resize(outputSize); - initializeWeights(); - initializeBiases(); + if (input_shape.size() != 1) { + throw std::runtime_error(std::format("Invalid shape. Expected [1], got {}", input_shape)); + } + + if (output_shape.size() != 1) { + throw std::runtime_error(std::format("Invalid shape. Expected [1], got {}", output_shape)); + } - activation = new Activation(activationType, outputSize); + auto input_len = input_shape[0]; + auto output_len = output_shape[0]; -#ifdef USE_CUDA - initCUDA(); -#endif + auto weights = CUDANet::Tensor{Shape(input_len * output_len), CUDANet::DType::FLOAT32, backend}; + auto biases = CUDANet::Tensor(Shape(output_len), CUDANet::DType::FLOAT32, backend); + + weights.zero(); + biases.zero(); } -Dense::~Dense() { - delete activation; -#ifdef USE_CUDA - delCUDA(); -#endif +CUDANet::Tensor& Dense::forward(CUDANet::Tensor &input); + +CUDANet::Shape Dense::input_shape() { + return in_shape; } -void Dense::initializeWeights() { - std::fill(weights.begin(), weights.end(), 0.0f); +CUDANet::Shape Dense::output_shape() { + return out_shape; } -void Dense::initializeBiases() { - std::fill(biases.begin(), biases.end(), 0.0f); -} +size_t Dense::input_size() { + return in_shape[0]; +}; -float* Dense::forwardCPU(const float* input) { - throw std::logic_error("Not implemented"); -} +size_t Dense::output_size() { + return out_shape[0]; +}; -float* Dense::forward(const float* input) { -#ifdef USE_CUDA - return forwardCUDA(input); -#else - return forwardCPU(input); -#endif -} +void Dense::set_weights(CUDANet::Tensor &input); -void Dense::setWeights(const float* weights_input) { - std::copy(weights_input, weights_input + weights.size(), weights.begin()); -#ifdef USE_CUDA - toCuda(); -#endif -} - -std::vector Dense::getWeights() { +CUDANet::Tensor& Dense::get_weights() { return weights; } -void Dense::setBiases(const float* biases_input) { - std::copy(biases_input, biases_input + biases.size(), biases.begin()); -#ifdef USE_CUDA - toCuda(); -#endif -} +void Dense::set_biases(CUDANet::Tensor &input); -std::vector Dense::getBiases() { +CUDANet::Tensor& Dense::get_biases() { return biases; -} - -int Dense::getOutputSize() { - return outputSize; -} - -int Dense::getInputSize() { - return inputSize; } \ No newline at end of file