WIP Refactor Layer and Activation classes

2025-12-22 14:24:22 +00:00 · 2025-11-18 19:10:18 +01:00
parent 6340b27055
commit 24606491a3
5 changed files with 74 additions and 41 deletions
--- a/include/layer.hpp
+++ b/include/layer.hpp
@@ -26,9 +26,9 @@ class Layer {
    virtual CUDANet::Shape output_shape() = 0;
-    virtual int input_size() = 0;
+    virtual size_t input_size() = 0;
-    virtual int output_size() = 0;
+    virtual size_t output_size() = 0;
    virtual void set_weights(CUDANet::Tensor &input) = 0;
--- a/include/layers/activation.hpp
+++ b/include/layers/activation.hpp
@@ -1,8 +1,8 @@
 #pragma once
-#include "backend/tensor.hpp"
+#include "tensor.hpp"
-#include "backend/backend.hpp"
+#include "backend.hpp"
-#include "layers/layer.hpp"
+#include "layer.hpp"
 namespace CUDANet::Layers {
@@ -20,40 +20,41 @@ enum ActivationType { SIGMOID, RELU, SOFTMAX, NONE };
 * @brief Utility class that performs activation
 * 
 */
-class Activation : Layer {
+class Activation : public Layer {
  public:
    Activation() = default;
-    /**
+    Activation(CUDANet::Backend* backend, ActivationType activation, const CUDANet::Shape &shape);
     * @brief Construct a new Activation object
     * 
     * @param activation Type of activation
     * @param length     Length of the input
     */
    Activation(CUDANet::Backend::IBackend* backend, ActivationType activation, const int length);
-    /**
+    ~Activation() = default;
     * @brief Destroy the Activation object
     * 
     */
    ~Activation();
-    /**
+    CUDANet::Tensor& forward(CUDANet::Tensor &input);
-     * @brief Run the activation function on the input
+    
-     * 
+    CUDANet::Shape input_shape();
-     * @param d_input Pointer to the input vector on the device
+
-     */
+    CUDANet::Shape output_shape();
-    void activate(CUDANet::Backend::Tensor input);
+
    size_t input_size();
    size_t output_size();
    void set_weights(CUDANet::Tensor &input);
    CUDANet::Tensor& get_weights();
    void set_biases(CUDANet::Tensor &input);
    CUDANet::Tensor& get_biases();
  private:
-    CUDANet::Backend::IBackend* backend;
+    CUDANet::Backend* backend;
    ActivationType activationType;
-    int length;
+    CUDANet::Shape shape;
-    CUDANet::Backend::Tensor softmax_sum;
+    CUDANet::Tensor softmax_sum;
-    CUDANet::Backend::Tensor tensor_max;
+    CUDANet::Tensor tensor_max;
 };
 }  // namespace CUDANet::Layers
--- a/include/tensor.hpp
+++ b/include/tensor.hpp
@@ -21,7 +21,7 @@ class Tensor
 public:
    Tensor() = default;
-    Tensor(Shape shape, DType dtype, CUDANet::Backend::IBackend* backend);
+    Tensor(Shape shape, DType dtype, CUDANet::Backend* backend);
    ~Tensor();
    size_t size() const;
@@ -40,7 +40,7 @@ private:
    size_t total_elms;
    size_t total_size;
-    CUDANet::Backend::IBackend*   backend;
+    CUDANet::Backend*   backend;
    void*       d_ptr;
 };
--- a/src/backends/tensor.cpp
+++ b/src/backends/tensor.cpp
@@ -1,10 +1,10 @@
 #include "backend/tensor.hpp"
 #include <stdexcept>
-using namespace CUDANet::Backend;
+#include "tensor.hpp"
-Tensor::Tensor(Shape shape, DType dtype, IBackend* backend)
+using namespace CUDANet;
 Tensor::Tensor(Shape shape, DType dtype, Backend* backend)
    : shape(shape), dtype(dtype), backend(backend), d_ptr(nullptr) {
    // Count total elements
    size_t count = 1;
--- a/src/layers/activation.cpp
+++ b/src/layers/activation.cpp
@@ -1,22 +1,28 @@
 #include <format>
 #include <stdexcept>
 #include <vector>
 #include "activation.hpp"
-#include "backend/tensor.hpp"
+#include "tensor.hpp"
 using namespace CUDANet::Layers;
-Activation::Activation(CUDANet::Backend::IBackend* backend, ActivationType activation, const int length)
+Activation::Activation(CUDANet::Backend* backend, ActivationType activation, const CUDANet::Shape &shape)
-    : backend(backend), activationType(activation), length(length) {
+    : backend(backend), activationType(activation), shape(shape) {
    if (shape.size() != 1) {
        throw std::runtime_error(std::format("Invalid shape. Expected [1], got {}", shape));
    }
    auto length = shape[0];
    if (activationType == SOFTMAX) {
-      softmax_sum = CUDANet::Backend::Tensor({static_cast<size_t>(length)}, CUDANet::Backend::DType::FLOAT32, backend);
+      softmax_sum = CUDANet::Tensor({static_cast<size_t>(length)}, CUDANet::DType::FLOAT32, backend);
-      tensor_max = CUDANet::Backend::Tensor({static_cast<size_t>(length)}, CUDANet::Backend::DType::FLOAT32, backend);
+      tensor_max = CUDANet::Tensor({static_cast<size_t>(length)}, CUDANet::DType::FLOAT32, backend);
    }
 }
-void Activation::activate(CUDANet::Backend::Tensor input) {
+CUDANet::Tensor& Activation::forward(CUDANet::Tensor &input) {
    switch (activationType)
    {
    case ActivationType::SIGMOID:
@@ -31,4 +37,30 @@ void Activation::activate(CUDANet::Backend::Tensor input) {
    default:
        break;
    }
-}
+
    return input;
 }
 CUDANet::Shape Activation::input_shape() {
    return shape;
 }
 CUDANet::Shape Activation::output_shape() {
    return shape;
 }
 size_t Activation::input_size() {
    return shape[0];
 }
 size_t Activation::output_size() {
    return shape[0];
 }
 void Activation::set_weights(CUDANet::Tensor &input) {}
 CUDANet::Tensor& Activation::get_weights() {}
 void Activation::set_biases(CUDANet::Tensor &input) {}
 CUDANet::Tensor& Activation::get_biases() {}