Update main include file

Restructure include paths
Update BatchNorm2d to return sizes for running mean and var
2025-12-22 14:24:22 +00:00 · 2025-11-23 21:07:34 +01:00 · 2025-11-23 20:57:08 +01:00 · 2025-11-23 20:48:41 +01:00 · 2025-11-23 20:44:25 +01:00 · 2025-11-23 19:21:22 +01:00
32 changed files with 206 additions and 112 deletions
--- a/include/backend.hpp
+++ b/include/backend.hpp
@@ -2,6 +2,8 @@

 #include <cstddef>

+#include "shape.hpp"
+
 namespace CUDANet {

 // Forward declaration
--- a/include/backend/cpu/cpu.hpp
+++ b/include/backend/cpu/cpu.hpp
@@ -3,7 +3,7 @@
 #include "backend.hpp"
 #include "tensor.hpp"

-namespace CUDANet::Backend {
+namespace CUDANet::Backends {

 class CPU : public Backend {
 public:
--- a/include/backend/cuda/cuda.cuh
+++ b/include/backend/cuda/cuda.cuh
@@ -24,7 +24,7 @@ do { \
    } \
 } while (0)

-namespace CUDANet::Backend {
+namespace CUDANet::Backends {

 class CUDA : public Backend {
  public:
--- a/include/backend/cuda/cuda_backend.cuh
+++ b/include/backend/cuda/cuda_backend.cuh
@@ -0,0 +1,11 @@
+#pragma once
+
+// CUDA Backend Implementation
+#include "backend/cuda/cuda.cuh"
+
+// CUDA Kernels
+#include "backend/cuda/kernels/activation_functions.cuh"
+#include "backend/cuda/kernels/convolution.cuh"
+#include "backend/cuda/kernels/matmul.cuh"
+#include "backend/cuda/kernels/pool.cuh"
+
--- a/include/backend/cuda/kernels/activation_functions.cuh
+++ b/include/backend/cuda/kernels/activation_functions.cuh
@@ -1,5 +1,4 @@
-#ifndef CUDANET_ACTIVATION_FUNCTIONS_H
-#define CUDANET_ACTIVATION_FUNCTIONS_H
+#pragma once

 #include <cuda_runtime.h>

@@ -32,5 +31,3 @@ __global__ void relu(
 );

 }  // namespace CUDANet::Kernels
-
-#endif  // CUDANET_ACTIVATION_FUNCTIONS_H
--- a/include/backend/cuda/kernels/convolution.cuh
+++ b/include/backend/cuda/kernels/convolution.cuh
--- a/include/backend/cuda/kernels/matmul.cuh
+++ b/include/backend/cuda/kernels/matmul.cuh
@@ -1,5 +1,4 @@
-#ifndef CUDANET_MATMUL_H
-#define CUDANET_MATMUL_H
+#pragma once

 #include <cuda_runtime.h>

@@ -191,5 +190,3 @@ __global__ void sum_reduce(
 );

 }  // namespace CUDANet::Kernels
-
-#endif  // CUDANET_MATMUL_H
--- a/include/backend/cuda/kernels/pool.cuh
+++ b/include/backend/cuda/kernels/pool.cuh
--- a/include/cudanet.cuh
+++ b/include/cudanet.cuh
@@ -1,35 +1,55 @@
-#ifndef CUDANET_H
-#define CUDANET_H
+#pragma once

-#ifdef USE_CUDA
-#include "activation_functions.cuh"
-#include "convolution.cuh"
-#include "matmul.cuh"
-#include "pooling.cuh"
-#endif
+// ============================================================================
+// Core Data Structures & Abstractions (BACKEND-INDEPENDENT)
+// ============================================================================

-// Layers
-#include "activation.hpp"
-#include "add.hpp"
-#include "avg_pooling.hpp"
-#include "batch_norm.hpp"
-#include "concat.hpp"
-#include "conv2d.hpp"
-#include "dense.hpp"
-#include "input.hpp"
+#include "shape.hpp"
+#include "backend.hpp"
+#include "tensor.hpp"
 #include "layer.hpp"
-#include "max_pooling.hpp"
-#include "output.hpp"

-// Models
-#include "model.hpp"
+// ============================================================================
+// Container Classes
+// ============================================================================
+
 #include "module.hpp"
+#include "model.hpp"
+
+// ============================================================================
+// Layer Implementations
+// ============================================================================
+
+// Activation
+#include "layers/activation.hpp"
+
+// Normalization
+#include "layers/batch_norm.hpp"
+
+// Linear
+#include "layers/dense.hpp"
+
+// Convolutional
+#include "layers/conv2d.hpp"
+
+// Pooling
+#include "layers/max_pool.hpp"
+#include "layers/avg_pool.hpp"
+
+// Composition (element-wise operations)
+#include "layers/add.hpp"
+#include "layers/concat.hpp"
+
+// ============================================================================
+// Utilities
+// ============================================================================
+
+#include "utils/imagenet.hpp"
+
+// ============================================================================
+// Backend-Specific Includes (conditionally compiled)
+// ============================================================================

-// Utils
-#include "imagenet.hpp"
 #ifdef USE_CUDA
-#include "cuda_helper.cuh"
-#include "vector.cuh"
+#include "backend/cuda/cuda_backend.cuh"
 #endif
-
-#endif  // CUDANET_H
--- a/include/layer.hpp
+++ b/include/layer.hpp
@@ -32,11 +32,11 @@ class Layer {

    virtual void set_weights(void *input) = 0;

-    virtual CUDANet::Tensor& get_weights() = 0;
+    virtual size_t get_weights_size() = 0;

    virtual void set_biases(void *input) = 0;

-    virtual CUDANet::Tensor& get_biases() = 0;
+    virtual size_t get_biases_size() = 0;
 };

 }  // namespace CUDANet::Layers
--- a/include/layers/activation.hpp
+++ b/include/layers/activation.hpp
@@ -25,7 +25,7 @@ class Activation : public Layer {

    Activation() = default;

-    Activation(CUDANet::Backend* backend, ActivationType activation, const CUDANet::Shape &shape);
+    Activation(ActivationType activation, const CUDANet::Shape &shape, CUDANet::Backend* backend);

    ~Activation() = default;

@@ -41,11 +41,11 @@ class Activation : public Layer {

    void set_weights(void *input) override;

-    CUDANet::Tensor& get_weights() override;
+    size_t get_weights_size() override;

    void set_biases(void *input) override;

-    CUDANet::Tensor& get_biases() override;
+    size_t get_biases_size() override;


  private:
--- a/include/layers/avg_pool.hpp
+++ b/include/layers/avg_pool.hpp
@@ -28,11 +28,11 @@ class AvgPool2d : public Layer {

    void set_weights(void* input) override;

-    CUDANet::Tensor& get_weights() override;
+    size_t get_weights_size() override;

    void set_biases(void* input) override;

-    CUDANet::Tensor& get_biases() override;
+    size_t get_biases_size() override;

  protected:
    CUDANet::Shape in_shape;
--- a/include/layers/batch_norm.hpp
+++ b/include/layers/batch_norm.hpp
@@ -22,19 +22,19 @@ class BatchNorm2d : public Layer {

    void set_weights(void* input) override;

-    CUDANet::Tensor& get_weights() override;
+    size_t get_weights_size() override;

    void set_biases(void* input) override;

-    CUDANet::Tensor& get_biases() override;
+    size_t get_biases_size() override;

    void set_running_mean(void* input);

-    CUDANet::Tensor& get_running_mean();
+    size_t get_running_mean_size();

    void set_running_var(void* input);

-    CUDANet::Tensor& get_running_var();
+    size_t get_running_var_size();

  private:
    CUDANet::Shape  in_shape;
--- a/include/layers/conv2d.hpp
+++ b/include/layers/conv2d.hpp
@@ -18,7 +18,7 @@ class Conv2d : public Layer {
        CUDANet::Backend* backend
    );

-    ~Conv2d() {};
+    ~Conv2d();

    CUDANet::Tensor& forward(CUDANet::Tensor& input) override;

@@ -32,11 +32,11 @@ class Conv2d : public Layer {

    void set_weights(void* input) override;

-    CUDANet::Tensor& get_weights() override;
+    size_t get_weights_size() override;

    void set_biases(void* input) override;

-    CUDANet::Tensor& get_biases() override;
+    size_t get_biases_size() override;

    CUDANet::Shape get_padding_shape();

--- a/include/layers/dense.hpp
+++ b/include/layers/dense.hpp
@@ -28,11 +28,11 @@ class Dense : public Layer {

    void set_weights(void *input) override;

-    CUDANet::Tensor& get_weights() override;
+    size_t get_weights_size() override;

    void set_biases(void *input) override;

-    CUDANet::Tensor& get_biases() override;
+    size_t get_biases_size() override;

  private:
    CUDANet::Backend *backend;
--- a/include/layers/max_pool.hpp
+++ b/include/layers/max_pool.hpp
@@ -27,11 +27,11 @@ class MaxPool2d : public Layer {

    void set_weights(void *input) override;

-    CUDANet::Tensor& get_weights() override;
+    size_t get_weights_size() override;

    void set_biases(void *input) override;

-    CUDANet::Tensor& get_biases() override;
+    size_t get_biases_size() override;



--- a/include/shape.hpp
+++ b/include/shape.hpp
@@ -1,11 +1,71 @@
 #pragma once

+#ifndef __host__
+#define __host__
+#endif
+
+#ifndef __device__
+#define __device__
+#endif
+
 #include <format>
 #include <vector>

 namespace CUDANet {

-typedef std::vector<size_t> Shape;
+struct Shape {
+    static constexpr size_t MAX_DIMS = 8;
+    
+    size_t dims[MAX_DIMS];
+    size_t ndim;
+    
+    __host__ __device__ Shape() : ndim(0) {
+        for (int i = 0; i < MAX_DIMS; i++) dims[i] = 0;
+    }
+    
+    __host__ Shape(std::initializer_list<size_t> list) : ndim(list.size()) {
+        if (ndim > MAX_DIMS) {
+            throw std::runtime_error("Too many dimensions");
+        }
+        size_t i = 0;
+        for (auto val : list) {
+            dims[i++] = val;
+        }
+        for (; i < MAX_DIMS; i++) dims[i] = 0;
+    }
+    
+    __host__ Shape(const std::vector<size_t>& vec) : ndim(vec.size()) {
+        if (ndim > MAX_DIMS) {
+            throw std::runtime_error("Too many dimensions");
+        }
+        for (size_t i = 0; i < ndim; i++) {
+            dims[i] = vec[i];
+        }
+        for (size_t i = ndim; i < MAX_DIMS; i++) dims[i] = 0;
+    }
+    
+    __host__ __device__ size_t operator[](size_t idx) const {
+        return dims[idx];
+    }
+    
+    __host__ __device__ size_t& operator[](size_t idx) {
+        return dims[idx];
+    }
+    
+    __host__ __device__ size_t size() const { return ndim; }
+    
+    __host__ bool operator==(const Shape& other) const {
+        if (ndim != other.ndim) return false;
+        for (size_t i = 0; i < ndim; i++) {
+            if (dims[i] != other.dims[i]) return false;
+        }
+        return true;
+    }
+    
+    __host__ bool operator!=(const Shape& other) const {
+        return !(*this == other);
+    }
+};

 std::string format_shape(const Shape& shape) {
    std::string result;
--- a/include/utils/imagenet.hpp
+++ b/include/utils/imagenet.hpp
@@ -1,5 +1,4 @@
-#ifndef CUDANET_IMAGENET_H
-#define CUDANET_IMAGENET_H
+#pragma once

 #include <map>
 #include <string>
@@ -1012,5 +1011,3 @@ const std::map <int, std::string> IMAGENET_CLASS_MAP = {
 // clang-format on

 }
-
-#endif // CUDANET_IMAGENET_H
--- a/src/backends/cuda/cuda_backend.cu
+++ b/src/backends/cuda/cuda_backend.cu
@@ -3,7 +3,7 @@
 #include <cstdio>
 #include <cstdlib>

-#include "backend/cuda.cuh"
+#include "backend/cuda/cuda.cuh"

 cudaDeviceProp initializeCUDA() {
    int deviceCount;
@@ -25,7 +25,7 @@ cudaDeviceProp initializeCUDA() {
    return deviceProp;
 }

-using namespace CUDANet::Backend;
+using namespace CUDANet::Backends;

 void* CUDA::allocate(size_t bytes) {
    void* d_ptr = nullptr;
--- a/src/backends/cuda/kernels/activation_functions.cu
+++ b/src/backends/cuda/kernels/activation_functions.cu
@@ -1,5 +1,4 @@
-#include "activation_functions.cuh"
-#include "cuda_helper.cuh"
+#include "backend/cuda/kernels/activation_functions.cuh"

 using namespace CUDANet;

--- a/src/backends/cuda/kernels/convolution.cu
+++ b/src/backends/cuda/kernels/convolution.cu
@@ -1,6 +1,6 @@
 #include <iostream>

-#include "convolution.cuh"
+#include "backend/cuda/kernels/convolution.cuh"

 using namespace CUDANet;

@@ -39,7 +39,7 @@ __global__ void Kernels::convolution(
                    continue;
                }

-                int kernelIndex =
+                int kernel_idx =
                    f * kernel_shape[0] * kernel_shape[1] * input_shape[2] +
                    c * kernel_shape[0] * kernel_shape[1] +
                    k * kernel_shape[1] + l;
@@ -48,7 +48,7 @@ __global__ void Kernels::convolution(
                                     input_shape[1] +
                                 (j * stride_shape[1] + l - padding_shape[1]);

-                sum += d_kernel[kernelIndex] * d_input[inputIndex];
+                sum += d_kernel[kernel_idx] * d_input[inputIndex];
            }
        }
    }
--- a/src/backends/cuda/kernels/matmul.cu
+++ b/src/backends/cuda/kernels/matmul.cu
@@ -1,5 +1,5 @@
-#include "cuda_helper.cuh"
-#include "matmul.cuh"
+#include "backend/cuda/cuda.cuh"
+#include "backend/cuda/kernels/matmul.cuh"

 using namespace CUDANet;

--- a/src/backends/cuda/kernels/pool.cu
+++ b/src/backends/cuda/kernels/pool.cu
@@ -1,6 +1,5 @@
-#include "cuda_helper.cuh"
 #include "layer.hpp"
-#include "pool.cuh"
+#include "backend/cuda/kernels/pool.cuh"

 using namespace CUDANet;

--- a/src/backends/cuda/layer_ops.cu
+++ b/src/backends/cuda/layer_ops.cu
@@ -1,10 +1,10 @@
-#include "backend/cuda.cuh"
-#include "kernels/activation_functions.cuh"
-#include "kernels/convolution.cuh"
-#include "kernels/matmul.cuh"
-#include "kernels/pool.cuh"
+#include "backend/cuda/cuda.cuh"
+#include "backend/cuda/kernels/activation_functions.cuh"
+#include "backend/cuda/kernels/convolution.cuh"
+#include "backend/cuda/kernels/matmul.cuh"
+#include "backend/cuda/kernels/pool.cuh"

-using namespace CUDANet::Backend;
+using namespace CUDANet::Backends;

 void CUDA::relu(Tensor& tensor) {
    int gridSize = (tensor.numel() + BLOCK_SIZE - 1) / BLOCK_SIZE;
--- a/src/backends/cuda/tensor_ops.cu
+++ b/src/backends/cuda/tensor_ops.cu
@@ -1,10 +1,10 @@
 #include <iostream>

 #include "backend.hpp"
-#include "backend/cuda.cuh"
-#include "kernels/matmul.cuh"
+#include "backend/cuda/cuda.cuh"
+#include "backend/cuda/kernels/matmul.cuh"

-using namespace CUDANet::Backend;
+using namespace CUDANet::Backends;

 void CUDA::print(const CUDANet::Tensor &input) {
    auto length = input.numel();
--- a/src/layers/activation.cpp
+++ b/src/layers/activation.cpp
@@ -7,11 +7,11 @@

 using namespace CUDANet::Layers;

-Activation::Activation(CUDANet::Backend* backend, ActivationType activation, const CUDANet::Shape &shape)
+Activation::Activation(ActivationType activation, const CUDANet::Shape &shape, CUDANet::Backend* backend)
    : backend(backend), activationType(activation), shape(shape) {

    if (shape.size() != 1) {
-        throw std::runtime_error(std::format("Invalid shape. Expected [1], got {}", shape));
+        throw InvalidShapeException("input", 1, shape.size());
    }

    auto length = shape[0];
@@ -59,8 +59,12 @@ size_t Activation::output_size() {

 void Activation::set_weights(void *input) {}

-CUDANet::Tensor& Activation::get_weights() {}
+size_t Activation::get_weights_size() {
+    return 0;
+}

 void Activation::set_biases(void *input) {}

-CUDANet::Tensor& Activation::get_biases() {}
+size_t Activation::get_biases_size() {
+    return 0;
+}
--- a/src/layers/avg_pooling.cpp
+++ b/src/layers/avg_pooling.cpp
@@ -81,11 +81,15 @@ size_t AvgPool2d::output_size() {

 void AvgPool2d::set_weights(void* input) {}

-CUDANet::Tensor& AvgPool2d::get_weights() {}
+size_t AvgPool2d::get_weights_size() {
+    return 0;
+}

 void AvgPool2d::set_biases(void* input) {}

-CUDANet::Tensor& AvgPool2d::get_biases() {}
+size_t AvgPool2d::get_biases_size() {
+    return 0;
+}


 AdaptiveAvgPool2d::AdaptiveAvgPool2d(
--- a/src/layers/batch_norm.cpp
+++ b/src/layers/batch_norm.cpp
@@ -74,30 +74,30 @@ void BatchNorm2d::set_weights(void* input) {
    weights.set_data<float>(static_cast<float*>(input));
 }

-CUDANet::Tensor& BatchNorm2d::get_weights() {
-    return weights;
+size_t BatchNorm2d::get_weights_size() {
+    return weights.size();
 }

 void BatchNorm2d::set_biases(void* input) {
    biases.set_data<float>(static_cast<float*>(input));
 }

-CUDANet::Tensor& BatchNorm2d::get_biases() {
-    return biases;
+size_t BatchNorm2d::get_biases_size() {
+    return biases.size();
 }

 void BatchNorm2d::set_running_mean(void* input) {
    running_mean.set_data<float>(static_cast<float*>(input));
 }

-CUDANet::Tensor& BatchNorm2d::get_running_mean() {
-    return running_mean;
+size_t BatchNorm2d::get_running_mean_size() {
+    return running_mean.size();
 }

 void BatchNorm2d::set_running_var(void* input) {
    running_var.set_data<float>(static_cast<float*>(input));
 }

-CUDANet::Tensor& BatchNorm2d::get_running_var() {
-    return running_var;
+size_t BatchNorm2d::get_running_var_size() {
+    return running_var.size();
 }
--- a/src/layers/conv2d.cpp
+++ b/src/layers/conv2d.cpp
@@ -96,16 +96,16 @@ void Conv2d::set_weights(void* input) {
    weights.set_data<float>(static_cast<float*>(input));
 }

-CUDANet::Tensor& Conv2d::get_weights() {
-    return weights;
+size_t Conv2d::get_weights_size() {
+    return weights.size();
 }

 void Conv2d::set_biases(void* input) {
    biases.set_data<float>(static_cast<float*>(input));
 }

-CUDANet::Tensor& Conv2d::get_biases() {
-    return biases;
+size_t Conv2d::get_biases_size() {
+    return biases.size();
 }

 CUDANet::Shape Conv2d::get_padding_shape() {
--- a/src/layers/dense.cpp
+++ b/src/layers/dense.cpp
@@ -55,14 +55,14 @@ void Dense::set_weights(void* input) {
    weights.set_data<float>(static_cast<float*>(input));
 }

-CUDANet::Tensor& Dense::get_weights() {
-    return weights;
+size_t Dense::get_weights_size() {
+    return weights.size();
 }

 void Dense::set_biases(void* input) {
    biases.set_data<float>(static_cast<float*>(input));
 }

-CUDANet::Tensor& Dense::get_biases() {
-    return biases;
+size_t Dense::get_biases_size() {
+    return biases.size();
 }
--- a/src/layers/max_pool.cpp
+++ b/src/layers/max_pool.cpp
@@ -75,8 +75,12 @@ size_t MaxPool2d::output_size() {

 void MaxPool2d::set_weights(void* input) {}

-CUDANet::Tensor& MaxPool2d::get_weights() {}
+size_t MaxPool2d::get_weights_size() {
+    return 0;
+}

 void MaxPool2d::set_biases(void* input) {}

-CUDANet::Tensor& MaxPool2d::get_biases() {}
+size_t MaxPool2d::get_biases_size() {
+    return 0;
+}
--- a/src/model.cpp
+++ b/src/model.cpp
@@ -128,20 +128,20 @@ void Model::load_weights(const std::string& path) {
            Layer* layer = layer_map[tensor_info.name];

            if (tensor_info.type == TensorType::WEIGHT) {
-                if (layer->get_weights().size() != values.size()) {
+                if (layer->get_weights_size() != values.size()) {
                    std::cerr << "Layer: " << tensor_info.name
                              << " has incorrect number of weights, expected "
-                              << layer->get_weights().size() << " but got "
+                              << layer->get_weights_size() << " but got "
                              << values.size() << ", skipping" << std::endl;
                    continue;
                }

                layer->set_weights(values.data());
            } else if (tensor_info.type == TensorType::BIAS) {
-                if (layer->get_biases().size() != values.size()) {
+                if (layer->get_biases_size() != values.size()) {
                    std::cerr << "Layer: " << tensor_info.name
                              << " has incorrect number of biases, expected "
-                              << layer->get_biases().size() << " but got "
+                              << layer->get_biases_size() << " but got "
                              << values.size() << ", skipping" << std::endl;
                    continue;
                }
@@ -155,16 +155,16 @@ void Model::load_weights(const std::string& path) {
            }

            if (tensor_info.type == TensorType::RUNNING_MEAN) {
-                if (bn_layer->get_running_mean().size() != values.size()) {
+                if (bn_layer->get_running_mean_size() != values.size()) {
                    std::cerr << "Layer: " << tensor_info.name << " has incorrect number of running mean values, expected "
-                                << bn_layer->get_running_mean().size() << " but got " << values.size() << ", skipping" << std::endl;
+                                << bn_layer->get_running_mean_size() << " but got " << values.size() << ", skipping" << std::endl;
                    continue;
                }
                bn_layer->set_running_mean(values.data());
            } else if (tensor_info.type == TensorType::RUNNING_VAR) {
-                if (bn_layer->get_running_var().size() != values.size()) {
+                if (bn_layer->get_running_var_size() != values.size()) {
                    std::cerr << "Layer: " << tensor_info.name << " has incorrect number of running var values, expected "
-                                << bn_layer->get_running_var().size() << " but got " << values.size() << ", skipping" << std::endl;
+                                << bn_layer->get_running_var_size() << " but got " << values.size() << ", skipping" << std::endl;
                    continue;
                }
                bn_layer->set_running_var(values.data());
Author	SHA1	Message	Date
LordMathis	a97ff8e1f6	Update main include file	2025-11-23 21:07:34 +01:00
LordMathis	38cb0c9ac0	Restructure include paths	2025-11-23 20:57:08 +01:00
LordMathis	4161caf3e1	Update BatchNorm2d to return sizes for running mean and var	2025-11-23 20:48:41 +01:00
LordMathis	9f1a56c699	Refactor Layer interface to return size of weights and biases instead of Tensor references	2025-11-23 20:44:25 +01:00
LordMathis	547cd0c224	Remove unnecessary inclusion of cuda_helper.cuh in pool.cu	2025-11-23 19:21:22 +01:00
LordMathis	1102aef293	Implement custom Shape struct with __device__ support	2025-11-23 19:21:06 +01:00
LordMathis	82a0e7c19d	Fix some compilation errors	2025-11-23 18:50:57 +01:00