WIP Migrate Dense layer

2025-12-22 22:34:22 +00:00 · 2025-11-18 21:12:47 +01:00
parent 64eac7050b
commit 7f203b8947
14 changed files with 116 additions and 221 deletions
--- a/include/backend.hpp
+++ b/include/backend.hpp
@@ -4,27 +4,41 @@

 #include "tensor.hpp"

-namespace CUDANet
-{   
-
-class Backend
-{
-public:
+namespace CUDANet {

+class Backend {
+  public:
    // Memory management
    virtual void* allocate(size_t bytes) = 0;
-    virtual void deallocate(void* ptr) = 0;
+    virtual void  deallocate(void* ptr)  = 0;

    // Tensor ops
-    virtual void print(const CUDANet::Tensor &input) = 0;
-    virtual void zero(CUDANet::Tensor &input) = 0;
-    virtual void sum(const CUDANet::Tensor &input, CUDANet::Tensor &sum) = 0;
-    virtual void max(const CUDANet::Tensor &input, CUDANet::Tensor &max) = 0;
+    virtual void print(const CUDANet::Tensor& input) = 0;
+    virtual void zero(CUDANet::Tensor& input)        = 0;
+
+    virtual void
+    copy_to_device(CUDANet::Tensor& tensor, void* data, size_t size) = 0;
+
+    virtual void sum(const CUDANet::Tensor& input, CUDANet::Tensor& sum) = 0;
+    virtual void max(const CUDANet::Tensor& input, CUDANet::Tensor& max) = 0;

    // Layer ops
-    virtual void relu(CUDANet::Tensor &tensor) = 0;
-    virtual void sigmoid(CUDANet::Tensor &tensor) = 0;
-    virtual void softmax(CUDANet::Tensor &tensor, CUDANet::Tensor &temp_max, CUDANet::Tensor &temp_sum) = 0;
+    virtual void relu(CUDANet::Tensor& tensor)    = 0;
+    virtual void sigmoid(CUDANet::Tensor& tensor) = 0;
+    virtual void softmax(
+        CUDANet::Tensor& tensor,
+        CUDANet::Tensor& temp_max,
+        CUDANet::Tensor& temp_sum
+    ) = 0;
+
+    virtual CUDANet::Tensor& dense(
+        CUDANet::Tensor& weights,
+        CUDANet::Tensor& biases,
+        CUDANet::Tensor& input,
+        CUDANet::Tensor& output,
+        size_t           input_size,
+        size_t           output_size
+    ) = 0;
 };

-} // namespace CUDANet::Backend
+}  // namespace CUDANet
--- a/include/backend/cuda.cuh
+++ b/include/backend/cuda.cuh
@@ -6,21 +6,36 @@
 namespace CUDANet::Backend {

 class CUDA : public Backend {
-public:
+  public:
    // Memory management
    void* allocate(size_t bytes) override;
-    void deallocate(void* ptr) override;
+    void  deallocate(void* ptr) override;

    // Tensor ops
-    void print(const CUDANet::Tensor &input) override;
-    void zero(CUDANet::Tensor &input) override;
-    void sum(const CUDANet::Tensor &input, CUDANet::Tensor &sum) override;
-    void max(const CUDANet::Tensor &input, CUDANet::Tensor &max) override;
+    void print(const CUDANet::Tensor& input) override;
+    void zero(CUDANet::Tensor& input) override;
+    void
+    copy_to_device(CUDANet::Tensor& tensor, void* data, size_t size) override;
+    void sum(const CUDANet::Tensor& input, CUDANet::Tensor& sum) override;
+    void max(const CUDANet::Tensor& input, CUDANet::Tensor& max) override;

    // Layer ops
-    void relu(CUDANet::Tensor &tensor) override;
-    void sigmoid(CUDANet::Tensor &tensor) override;
-    void softmax(CUDANet::Tensor &tensor, CUDANet::Tensor &temp_max, CUDANet::Tensor &temp_sum) override;
+    void relu(CUDANet::Tensor& tensor) override;
+    void sigmoid(CUDANet::Tensor& tensor) override;
+    void softmax(
+        CUDANet::Tensor& tensor,
+        CUDANet::Tensor& temp_max,
+        CUDANet::Tensor& temp_sum
+    ) override;
+
+    CUDANet::Tensor& dense(
+        CUDANet::Tensor& weights,
+        CUDANet::Tensor& biases,
+        CUDANet::Tensor& input,
+        CUDANet::Tensor& output,
+        size_t           input_size,
+        size_t           output_size
+    ) override;
 };

 }  // namespace CUDANet::Backend
--- a/include/layer.hpp
+++ b/include/layer.hpp
@@ -30,11 +30,11 @@ class Layer {

    virtual size_t output_size() = 0;

-    virtual void set_weights(CUDANet::Tensor &input) = 0;
+    virtual void set_weights(void *input) = 0;

    virtual CUDANet::Tensor& get_weights() = 0;

-    virtual void set_biases(CUDANet::Tensor &input) = 0;
+    virtual void set_biases(void *input) = 0;

    virtual CUDANet::Tensor& get_biases() = 0;
 };
--- a/include/layers/activation.hpp
+++ b/include/layers/activation.hpp
@@ -29,23 +29,23 @@ class Activation : public Layer {

    ~Activation() = default;

-    CUDANet::Tensor& forward(CUDANet::Tensor &input);
+    CUDANet::Tensor& forward(CUDANet::Tensor &input) override;
    
-    CUDANet::Shape input_shape();
+    CUDANet::Shape input_shape() override;

-    CUDANet::Shape output_shape();
+    CUDANet::Shape output_shape() override;

-    size_t input_size();
+    size_t input_size() override;

-    size_t output_size();
+    size_t output_size() override;

-    void set_weights(CUDANet::Tensor &input);
+    void set_weights(void *input) override;

-    CUDANet::Tensor& get_weights();
+    CUDANet::Tensor& get_weights() override;

-    void set_biases(CUDANet::Tensor &input);
+    void set_biases(void *input) override;

-    CUDANet::Tensor& get_biases();
+    CUDANet::Tensor& get_biases() override;


  private:
--- a/include/layers/dense.hpp
+++ b/include/layers/dense.hpp
@@ -18,23 +18,23 @@ class Dense : public Layer {

    ~Dense();

-    CUDANet::Tensor& forward(CUDANet::Tensor &input);
+    CUDANet::Tensor& forward(CUDANet::Tensor &input) override;

-    CUDANet::Shape input_shape();
+    CUDANet::Shape input_shape() override;

-    CUDANet::Shape output_shape();
+    CUDANet::Shape output_shape() override;

-    size_t input_size();
+    size_t input_size() override;

-    size_t output_size();
+    size_t output_size() override;

-    void set_weights(CUDANet::Tensor &input);
+    void set_weights(void *input) override;

-    CUDANet::Tensor& get_weights();
+    CUDANet::Tensor& get_weights() override;

-    void set_biases(CUDANet::Tensor &input);
+    void set_biases(void *input) override;

-    CUDANet::Tensor& get_biases();
+    CUDANet::Tensor& get_biases() override;

  private:
    CUDANet::Backend *backend;
@@ -45,32 +45,7 @@ class Dense : public Layer {
    CUDANet::Tensor weights;
    CUDANet::Tensor biases;

-
-    void init_weights();
-    void init_biases();
-
-// #ifdef USE_CUDA
-//     float* d_output;
-
-//     float* d_weights;
-//     float* d_biases;
-
-//     // Precompute kernel launch parameters
-//     int forwardGridSize;
-//     int biasGridSize;
-
-//     /**
-//      * @brief Copy the weights and biases to the device
-//      *
-//      */
-//     void toCuda();
-
-//     void initCUDA();
-//     void delCUDA();
-
-//     float* forwardCUDA(const float* d_input);
-// #endif
-
+    CUDANet::Tensor output;
 };

 }  // namespace CUDANet::Layers