Add dtype parameter to layer constructors

2025-12-22 22:34:22 +00:00 · 2025-11-26 00:19:33 +01:00
parent 84153ac49c
commit 13d3d38b68
17 changed files with 169 additions and 49 deletions
--- a/include/layer.hpp
+++ b/include/layer.hpp
@@ -16,6 +16,8 @@ namespace CUDANet {
 *
 */
 class Layer {
+  protected:
+    CUDANet::DType dtype;
  public:

    virtual ~Layer(){};
@@ -39,4 +41,4 @@ class Layer {
    virtual size_t get_biases_size() = 0;
 };

-}  // namespace CUDANet::Layers
+}  // namespace CUDANet
--- a/include/layers/activation.hpp
+++ b/include/layers/activation.hpp
@@ -20,12 +20,13 @@ enum ActivationType { SIGMOID, RELU, SOFTMAX, NONE };
 * @brief Utility class that performs activation
 * 
 */
-class Activation : public Layer {
+class Activation : public CUDANet::Layer {
  public:

    Activation() = default;

    Activation(ActivationType activation, const CUDANet::Shape &shape, CUDANet::Backend* backend);
+    Activation(ActivationType activation, const CUDANet::Shape &shape, CUDANet::DType dtype, CUDANet::Backend* backend);

    ~Activation() = default;

@@ -50,7 +51,7 @@ class Activation : public Layer {

  private:
    CUDANet::Backend* backend;
-    ActivationType activationType;
+    ActivationType activation_type;
    CUDANet::Shape shape;

    CUDANet::Tensor softmax_sum;
--- a/include/layers/add.hpp
+++ b/include/layers/add.hpp
@@ -8,6 +8,7 @@ namespace CUDANet::Layers {
 class Add {
  public:
    Add(CUDANet::Shape a_shape, CUDANet::Shape b_shape, CUDANet::Backend* backend);
+    Add(CUDANet::Shape a_shape, CUDANet::Shape b_shape, CUDANet::DType dtype, CUDANet::Backend* backend);

    ~Add();

@@ -19,6 +20,8 @@ class Add {
    CUDANet::Tensor output;

    CUDANet::Backend *backend;
+
+    CUDANet::DType dtype;
 };

 }  // namespace CUDANet::Layers
--- a/include/layers/avg_pool.hpp
+++ b/include/layers/avg_pool.hpp
@@ -4,7 +4,7 @@

 namespace CUDANet::Layers {

-class AvgPool2d : public Layer {
+class AvgPool2d : public CUDANet::Layer {
  public:
    AvgPool2d(
        CUDANet::Shape input_shape,
@@ -13,6 +13,14 @@ class AvgPool2d : public Layer {
        CUDANet::Shape padding_shape,
        CUDANet::Backend *backend
    );
+    AvgPool2d(
+        CUDANet::Shape input_shape,
+        CUDANet::Shape pool_shape,
+        CUDANet::Shape stride_shape,
+        CUDANet::Shape padding_shape,
+        CUDANet::DType dtype,
+        CUDANet::Backend *backend
+    );

    ~AvgPool2d();

@@ -50,6 +58,7 @@ class AvgPool2d : public Layer {
 class AdaptiveAvgPool2d : public AvgPool2d {
  public:
    AdaptiveAvgPool2d(CUDANet::Shape input_shape, CUDANet::Shape output_shape, CUDANet::Backend *backend);
+    AdaptiveAvgPool2d(CUDANet::Shape input_shape, CUDANet::Shape output_shape, CUDANet::DType dtype, CUDANet::Backend *backend);
 };

 }  // namespace CUDANet::Layers
--- a/include/layers/batch_norm.hpp
+++ b/include/layers/batch_norm.hpp
@@ -4,9 +4,10 @@

 namespace CUDANet::Layers {

-class BatchNorm2d : public Layer {
+class BatchNorm2d : public CUDANet::Layer {
  public:
    BatchNorm2d(CUDANet::Shape input_shape, float epsilon, CUDANet::Backend *backend);
+    BatchNorm2d(CUDANet::Shape input_shape, float epsilon, CUDANet::DType dtype, CUDANet::Backend *backend);

    ~BatchNorm2d();

--- a/include/layers/concat.hpp
+++ b/include/layers/concat.hpp
@@ -12,6 +12,7 @@ class Concat {
  public:

    Concat(const CUDANet::Shape a_shape, const CUDANet::Shape b_shape, CUDANet::Backend *backend);
+    Concat(const CUDANet::Shape a_shape, const CUDANet::Shape b_shape, CUDANet::DType dtype, CUDANet::Backend *backend);

    ~Concat();

@@ -27,6 +28,8 @@ class Concat {
    CUDANet::Tensor output;

    CUDANet::Backend *backend;
+
+    CUDANet::DType dtype;
 };

 }  // namespace CUDANet::Layers
--- a/include/layers/conv2d.hpp
+++ b/include/layers/conv2d.hpp
@@ -8,7 +8,7 @@ namespace CUDANet::Layers {
 * @brief 2D convolutional layer
 *
 */
-class Conv2d : public Layer {
+class Conv2d : public CUDANet::Layer {
  public:
    Conv2d(
        CUDANet::Shape    input_shape,
@@ -17,6 +17,14 @@ class Conv2d : public Layer {
        CUDANet::Shape    padding_shape,
        CUDANet::Backend* backend
    );
+    Conv2d(
+        CUDANet::Shape    input_shape,
+        CUDANet::Shape    kernel_shape,
+        CUDANet::Shape    stride_shape,
+        CUDANet::Shape    padding_shape,
+        CUDANet::DType    dtype,
+        CUDANet::Backend* backend
+    );

    ~Conv2d();

--- a/include/layers/dense.hpp
+++ b/include/layers/dense.hpp
@@ -9,10 +9,11 @@ namespace CUDANet::Layers {
 * @brief Dense (fully connected) layer
 *
 */
-class Dense : public Layer {
+class Dense : public CUDANet::Layer {
  public:

    Dense(CUDANet::Shape input_shape, CUDANet::Shape output_shape, CUDANet::Backend *backend);
+    Dense(CUDANet::Shape input_shape, CUDANet::Shape output_shape, CUDANet::DType dtype, CUDANet::Backend *backend);

    ~Dense();

--- a/include/layers/max_pool.hpp
+++ b/include/layers/max_pool.hpp
@@ -4,7 +4,7 @@

 namespace CUDANet::Layers {

-class MaxPool2d : public Layer {
+class MaxPool2d : public CUDANet::Layer {
  public:
    MaxPool2d(
        CUDANet::Shape        input_shape,
@@ -13,6 +13,14 @@ class MaxPool2d : public Layer {
        CUDANet::Shape        padding_shape,
        CUDANet::Backend* backend
    );
+    MaxPool2d(
+        CUDANet::Shape        input_shape,
+        CUDANet::Shape        pool_shape,
+        CUDANet::Shape        stride_shape,
+        CUDANet::Shape        padding_shape,
+        CUDANet::DType        dtype,
+        CUDANet::Backend* backend
+    );
    ~MaxPool2d();

    CUDANet::Tensor& forward(CUDANet::Tensor &input) override;
--- a/src/layers/activation.cpp
+++ b/src/layers/activation.cpp
@@ -1,14 +1,30 @@
+#include "activation.hpp"
+
 #include <format>
 #include <stdexcept>
 #include <vector>

-#include "activation.hpp"
 #include "tensor.hpp"

 using namespace CUDANet::Layers;

-Activation::Activation(ActivationType activation, const CUDANet::Shape &shape, CUDANet::Backend* backend)
-    : backend(backend), activationType(activation), shape(shape) {
+Activation::Activation(
+    ActivationType        activation,
+    const CUDANet::Shape& shape,
+    CUDANet::Backend*     backend
+)
+    : Activation(activation, shape, backend->get_default_dtype(), backend) {}
+
+Activation::Activation(
+    ActivationType        activation,
+    const CUDANet::Shape& shape,
+    CUDANet::DType        dtype,
+    CUDANet::Backend*     backend
+)
+    : activation_type(activation),
+      shape(shape),
+      backend(backend) {
+    this->dtype = dtype;
    
    if (shape.size() != 1) {
        throw InvalidShapeException("input", 1, shape.size());
@@ -16,15 +32,16 @@ Activation::Activation(ActivationType activation, const CUDANet::Shape &shape, C

    auto length = shape[0];

-    if (activationType == SOFTMAX) {
-      softmax_sum = CUDANet::Tensor({static_cast<size_t>(length)}, CUDANet::DType::FLOAT32, backend);
-      tensor_max = CUDANet::Tensor({static_cast<size_t>(length)}, CUDANet::DType::FLOAT32, backend);
+    if (activation_type == SOFTMAX) {
+        softmax_sum =
+            CUDANet::Tensor({static_cast<size_t>(length)}, dtype, backend);
+        tensor_max =
+            CUDANet::Tensor({static_cast<size_t>(length)}, dtype, backend);
    }
 }

 CUDANet::Tensor& Activation::forward(CUDANet::Tensor& input) {
-    switch (activationType)
-    {
+    switch (activation_type) {
        case ActivationType::SIGMOID:
            backend->sigmoid(input);
            break;
--- a/src/layers/add.cpp
+++ b/src/layers/add.cpp
@@ -3,7 +3,11 @@
 using namespace CUDANet::Layers;


-Add::Add(CUDANet::Shape a_shape, CUDANet::Shape b_shape, CUDANet::Backend* backend) : backend(backend) {
+Add::Add(CUDANet::Shape a_shape, CUDANet::Shape b_shape, CUDANet::Backend* backend)
+    : Add(a_shape, b_shape, backend->get_default_dtype(), backend) {}
+
+Add::Add(CUDANet::Shape a_shape, CUDANet::Shape b_shape, CUDANet::DType dtype, CUDANet::Backend* backend)
+    : backend(backend), dtype(dtype) {
    if (a_shape != b_shape) {
        throw InvalidShapeException(
            "Add requires matching dimensions", a_shape, b_shape
@@ -11,7 +15,7 @@ Add::Add(CUDANet::Shape a_shape, CUDANet::Shape b_shape, CUDANet::Backend* backe
    }

    out_shape = a_shape;
-    output = CUDANet::Tensor(out_shape, CUDANet::DType::FLOAT32, backend);
+    output = CUDANet::Tensor(out_shape, dtype, backend);
 }

 Add::~Add() {}
--- a/src/layers/avg_pooling.cpp
+++ b/src/layers/avg_pooling.cpp
@@ -11,6 +11,16 @@ AvgPool2d::AvgPool2d(
    CUDANet::Shape    stride_shape,
    CUDANet::Shape    padding_shape,
    CUDANet::Backend* backend
+)
+    : AvgPool2d(input_shape, pool_shape, stride_shape, padding_shape, backend->get_default_dtype(), backend) {}
+
+AvgPool2d::AvgPool2d(
+    CUDANet::Shape    input_shape,
+    CUDANet::Shape    pool_shape,
+    CUDANet::Shape    stride_shape,
+    CUDANet::Shape    padding_shape,
+    CUDANet::DType    dtype,
+    CUDANet::Backend* backend
 )
    : in_shape(input_shape),
      pool_shape(pool_shape),
@@ -33,6 +43,8 @@ AvgPool2d::AvgPool2d(
        throw InvalidShapeException("padding", 2, padding_shape.size());
    }

+    this->dtype = dtype;
+
    out_shape = {
        (in_shape[0] + 2 * padding_shape[0] - pool_shape[0]) / stride_shape[0] +
            1,
@@ -43,7 +55,7 @@ AvgPool2d::AvgPool2d(

    output = CUDANet::Tensor(
        Shape{out_shape[0] * out_shape[1] * out_shape[2]},
-        CUDANet::DType::FLOAT32, backend
+        dtype, backend
    );
 }

@@ -96,6 +108,14 @@ AdaptiveAvgPool2d::AdaptiveAvgPool2d(
    CUDANet::Shape        input_shape,
    CUDANet::Shape        output_shape,
    CUDANet::Backend *backend
+)
+    : AdaptiveAvgPool2d(input_shape, output_shape, backend->get_default_dtype(), backend) {}
+
+AdaptiveAvgPool2d::AdaptiveAvgPool2d(
+    CUDANet::Shape        input_shape,
+    CUDANet::Shape        output_shape,
+    CUDANet::DType        dtype,
+    CUDANet::Backend *backend
 )
    : AvgPool2d(
        input_shape,
@@ -114,12 +134,13 @@ AdaptiveAvgPool2d::AdaptiveAvgPool2d(
            (input_shape[0] - (output_shape[0] - 1) * (input_shape[0] / output_shape[0]) - 1) / 2,
            (input_shape[1] - (output_shape[1] - 1) * (input_shape[1] / output_shape[1]) - 1) / 2
        },
+        dtype,
        backend
    ) {
    out_shape = output_shape;

    output = CUDANet::Tensor(
        Shape{out_shape[0] * out_shape[1] * out_shape[2]},
-        CUDANet::DType::FLOAT32, backend
+        dtype, backend
    );
 }
--- a/src/layers/batch_norm.cpp
+++ b/src/layers/batch_norm.cpp
@@ -12,6 +12,14 @@ BatchNorm2d::BatchNorm2d(
    CUDANet::Shape input_shape,
    float          eps,
    CUDANet::Backend *backend
+)
+    : BatchNorm2d(input_shape, eps, backend->get_default_dtype(), backend) {}
+
+BatchNorm2d::BatchNorm2d(
+    CUDANet::Shape input_shape,
+    float          eps,
+    CUDANet::DType dtype,
+    CUDANet::Backend *backend
 )
    : in_shape(input_shape), backend(backend)  {

@@ -19,22 +27,24 @@ BatchNorm2d::BatchNorm2d(
        throw InvalidShapeException("input", 3, in_shape.size());
    }

-    epsilon = CUDANet::Tensor({1}, CUDANet::DType::FLOAT32, backend);
+    this->dtype = dtype;
+
+    epsilon = CUDANet::Tensor({1}, dtype, backend);
    epsilon.set_data<float>(&eps);

-    running_mean = CUDANet::Tensor({in_shape[2]}, CUDANet::DType::FLOAT32, backend);
+    running_mean = CUDANet::Tensor({in_shape[2]}, dtype, backend);
    running_mean.zero();

-    running_var = CUDANet::Tensor({in_shape[2]}, CUDANet::DType::FLOAT32, backend);
+    running_var = CUDANet::Tensor({in_shape[2]}, dtype, backend);
    running_var.fill(1);

-    weights = CUDANet::Tensor({in_shape[2]}, CUDANet::DType::FLOAT32, backend);
+    weights = CUDANet::Tensor({in_shape[2]}, dtype, backend);
    weights.fill(1);

-    biases = CUDANet::Tensor({in_shape[2]}, CUDANet::DType::FLOAT32, backend);
+    biases = CUDANet::Tensor({in_shape[2]}, dtype, backend);
    biases.zero();

-    output = CUDANet::Tensor(in_shape, CUDANet::DType::FLOAT32, backend);
+    output = CUDANet::Tensor(in_shape, dtype, backend);
 }

 BatchNorm2d::~BatchNorm2d() {}
--- a/src/layers/concat.cpp
+++ b/src/layers/concat.cpp
@@ -3,7 +3,10 @@
 using namespace CUDANet::Layers;

 Concat::Concat(const CUDANet::Shape a_shape, const CUDANet::Shape b_shape, CUDANet::Backend *backend)
-    : a_shape(a_shape), b_shape(b_shape), backend(backend) {
+    : Concat(a_shape, b_shape, backend->get_default_dtype(), backend) {}
+
+Concat::Concat(const CUDANet::Shape a_shape, const CUDANet::Shape b_shape, CUDANet::DType dtype, CUDANet::Backend *backend)
+    : a_shape(a_shape), b_shape(b_shape), backend(backend), dtype(dtype) {
    if (a_shape[0] != b_shape[0] || a_shape[1] != b_shape[1]) {
        throw InvalidShapeException(
            "Concat requires matching height and width dimensions", a_shape,
@@ -12,7 +15,7 @@ Concat::Concat(const CUDANet::Shape a_shape, const CUDANet::Shape b_shape, CUDAN
    }

    out_shape = {a_shape[0], a_shape[1], a_shape[2] + b_shape[2]};
-    output = CUDANet::Tensor(out_shape, CUDANet::DType::FLOAT32, backend);
+    output = CUDANet::Tensor(out_shape, dtype, backend);
 }

 Concat::~Concat() {}
--- a/src/layers/conv2d.cpp
+++ b/src/layers/conv2d.cpp
@@ -14,6 +14,16 @@ Conv2d::Conv2d(
    CUDANet::Shape    stride_shape,
    CUDANet::Shape    padding_shape,
    CUDANet::Backend* backend
+)
+    : Conv2d(input_shape, kernel_shape, stride_shape, padding_shape, backend->get_default_dtype(), backend) {}
+
+Conv2d::Conv2d(
+    CUDANet::Shape    input_shape,
+    CUDANet::Shape    kernel_shape,
+    CUDANet::Shape    stride_shape,
+    CUDANet::Shape    padding_shape,
+    CUDANet::DType    dtype,
+    CUDANet::Backend* backend
 )
    : in_shape(input_shape),
      kernel_shape(kernel_shape),
@@ -36,6 +46,8 @@ Conv2d::Conv2d(
        throw InvalidShapeException("padding", 3, padding_shape.size());
    }

+    this->dtype = dtype;
+
    out_shape = {
        (in_shape[0] - kernel_shape[0] + 2 * padding_shape[0]) /
                stride_shape[0] +
@@ -48,17 +60,17 @@ Conv2d::Conv2d(

    output = CUDANet::Tensor(
        Shape{out_shape[0], out_shape[1], out_shape[2]},
-        CUDANet::DType::FLOAT32, backend
+        dtype, backend
    );

    weights = CUDANet::Tensor(
        Shape{
            kernel_shape[0], kernel_shape[1], kernel_shape[2], in_shape[2]
        },
-        CUDANet::DType::FLOAT32, backend
+        dtype, backend
    );
    biases = CUDANet::Tensor(
-        Shape{kernel_shape[2]}, CUDANet::DType::FLOAT32, backend
+        Shape{kernel_shape[2]}, dtype, backend
    );

    weights.zero();
--- a/src/layers/dense.cpp
+++ b/src/layers/dense.cpp
@@ -6,6 +6,9 @@
 using namespace CUDANet::Layers;

 Dense::Dense(CUDANet::Shape in_shape, CUDANet::Shape out_shape, CUDANet::Backend* backend)
+    : Dense(in_shape, out_shape, backend->get_default_dtype(), backend) {}
+
+Dense::Dense(CUDANet::Shape in_shape, CUDANet::Shape out_shape, CUDANet::DType dtype, CUDANet::Backend* backend)
    : backend(backend),
      in_shape(in_shape),
      out_shape(out_shape) {
@@ -18,9 +21,11 @@ Dense::Dense(CUDANet::Shape in_shape, CUDANet::Shape out_shape, CUDANet::Backend
        throw InvalidShapeException("output", 1, out_shape.size());
    }

-    weights = CUDANet::Tensor(Shape{out_shape[0], in_shape[0]}, CUDANet::DType::FLOAT32, backend);
-    biases = CUDANet::Tensor(Shape{out_shape[0]}, CUDANet::DType::FLOAT32, backend);
-    output = CUDANet::Tensor(Shape{out_shape[0]}, CUDANet::DType::FLOAT32, backend);
+    this->dtype = dtype;
+
+    weights = CUDANet::Tensor(Shape{out_shape[0], in_shape[0]}, dtype, backend);
+    biases = CUDANet::Tensor(Shape{out_shape[0]}, dtype, backend);
+    output = CUDANet::Tensor(Shape{out_shape[0]}, dtype, backend);

    weights.zero();
    biases.zero();
--- a/src/layers/max_pool.cpp
+++ b/src/layers/max_pool.cpp
@@ -10,6 +10,16 @@ MaxPool2d::MaxPool2d(
    CUDANet::Shape    stride_shape,
    CUDANet::Shape    padding_shape,
    CUDANet::Backend* backend
+)
+    : MaxPool2d(input_shape, pool_shape, stride_shape, padding_shape, backend->get_default_dtype(), backend) {}
+
+MaxPool2d::MaxPool2d(
+    CUDANet::Shape    input_shape,
+    CUDANet::Shape    pool_shape,
+    CUDANet::Shape    stride_shape,
+    CUDANet::Shape    padding_shape,
+    CUDANet::DType    dtype,
+    CUDANet::Backend* backend
 )
    : in_shape(input_shape),
      pool_shape(pool_shape),
@@ -32,6 +42,8 @@ MaxPool2d::MaxPool2d(
        throw InvalidShapeException("padding", 2, padding_shape.size());
    }

+    this->dtype = dtype;
+
    out_shape = {
        (in_shape[0] + 2 * padding_shape[0] - pool_shape[0]) / stride_shape[0] +
            1,
@@ -42,7 +54,7 @@ MaxPool2d::MaxPool2d(

    output = CUDANet::Tensor(
        Shape{out_shape[0] * out_shape[1] * out_shape[2]},
-        CUDANet::DType::FLOAT32, backend
+        dtype, backend
    );
 }