Start implementing weights import

2025-12-22 14:24:22 +00:00 · 2024-04-15 22:17:48 +02:00
parent d8c50116e8
commit f4ae45f867
3 changed files with 159 additions and 31 deletions
--- a/include/model/model.hpp
+++ b/include/model/model.hpp
@@ -2,15 +2,24 @@
 #define CUDANET_MODEL_H
 #include <string>
 #include <vector>
 #include <unordered_map>
 #include <vector>
 #include "layer.cuh"
 #include "input.cuh"
 #include "layer.cuh"
 #include "output.cuh"
 namespace CUDANet {
 enum TensorType { WEIGHT, BIAS, };
 struct TensorInfo {
    std::string name;
    TensorType  type;
    int         size;
    int         offset;
 };
 class Model {
  public:
    Model(const int inputSize, const int inputChannels, const int outputSize);
@@ -20,20 +29,19 @@ class Model {
    float* predict(const float* input);
    void addLayer(const std::string& name, Layers::SequentialLayer* layer);
    void loadWeights(const std::string& path);
  private:
-
+    Layers::Input*  inputLayer;
-    Layers::Input *inputLayer;
+    Layers::Output* outputLayer;
    Layers::Output *outputLayer;
    int inputSize;
    int inputChannels;
    int outputSize;
-    std::vector<Layers::SequentialLayer*> layers;
+    std::vector<Layers::SequentialLayer*>                   layers;
    std::unordered_map<std::string, Layers::WeightedLayer*> layerMap;
 };
 }  // namespace CUDANet
--- a/src/model/model.cpp
+++ b/src/model/model.cpp
@@ -1,5 +1,11 @@
 #include "model.hpp"
 #include <fstream>
 #include <iostream>
 #include <string>
 #include <unordered_map>
 #include <vector>
 #include "input.cuh"
 #include "layer.cuh"
@@ -25,7 +31,7 @@ Model::Model(const Model& other)
    outputLayer = new Layers::Output(*other.outputLayer);
 }
-Model::~Model(){
+Model::~Model() {
    delete inputLayer;
    delete outputLayer;
    for (auto layer : layers) {
@@ -52,3 +58,70 @@ void Model::addLayer(const std::string& name, Layers::SequentialLayer* layer) {
        layerMap[name] = wLayer;
    }
 }
 void Model::loadWeights(const std::string& path) {
    std::ifstream file(path, std::ios::binary);
    if (!file.is_open()) {
        std::cerr << "Failed to open file: " << path << std::endl;
        return;
    }
    int64_t headerSize;
    file.read(reinterpret_cast<char*>(&headerSize), sizeof(headerSize));
    std::string header(headerSize, '\0');
    file.read(&header[0], headerSize);
    std::vector<TensorInfo> tensorInfos;
    size_t pos = 0;
    while (pos < header.size()) {
        size_t nextPos = header.find('\n', pos);
        if (nextPos == std::string::npos)
            break;
        std::string line = header.substr(pos, nextPos - pos);
        pos = nextPos + 1;
        size_t commaPos = line.find(',');
        if (commaPos == std::string::npos)
            continue;
        // Parse tensor name into name and type
        std::string nameStr = line.substr(0, commaPos);
        size_t dotPos = nameStr.find('.');
        if (dotPos == std::string::npos)
            continue;
        std::string name = nameStr.substr(0, dotPos);
        TensorType type = nameStr.substr(dotPos + 1) == "w" ? TensorType::WEIGHT : TensorType::BIAS;
        line = line.substr(commaPos + 1);
        commaPos = line.find(',');
        if (commaPos == std::string::npos)
            continue;
        int size = std::stoi(line.substr(0, commaPos));
        int offset = std::stoi(line.substr(commaPos + 1));
        tensorInfos.push_back({name, type, size, offset});
    }
    for (const auto& tensorInfo : tensorInfos) {
        std::vector<float> values(tensorInfo.size);
        file.seekg(tensorInfo.offset);
        file.read(reinterpret_cast<char*>(values.data()), tensorInfo.size * sizeof(float));
        if (layerMap.find(tensorInfo.name) != layerMap.end()) {
            if (tensorInfo.type == TensorType::WEIGHT) {
                layerMap[tensorInfo.name]->setWeights(values.data());
            } else if (tensorInfo.type == TensorType::BIAS) {
                layerMap[tensorInfo.name]->setBiases(values.data());
            }
        }
    }
    file.close();
 }
--- a/test/model/test_model.cu
+++ b/test/model/test_model.cu
@@ -8,6 +8,8 @@
 class ModelTest : public ::testing::Test {
  protected:
    CUDANet::Model *commonTestSetup(
        bool setWeights = true,
        int inputSize     = 6,
        int inputChannels = 2,
        int outputSize    = 3,
@@ -28,17 +30,19 @@ class ModelTest : public ::testing::Test {
            CUDANet::Layers::Padding::VALID,
            CUDANet::Layers::ActivationType::NONE
        );
-        // weights 6*6*2*2
+
-        std::vector<float> conv2dWeights = {
+        if (setWeights) {
-            0.18313f, 0.53363f, 0.39527f, 0.27575f, 0.3433f,  0.41746f,
+            std::vector<float> conv2dWeights = {
-            0.16831f, 0.61693f, 0.54599f, 0.99692f, 0.77127f, 0.25146f,
+                0.18313f, 0.53363f, 0.39527f, 0.27575f, 0.3433f,  0.41746f,
-            0.4206f,  0.16291f, 0.93484f, 0.79765f, 0.74982f, 0.78336f,
+                0.16831f, 0.61693f, 0.54599f, 0.99692f, 0.77127f, 0.25146f,
-            0.6386f,  0.87744f, 0.33587f, 0.9691f,  0.68437f, 0.65098f,
+                0.4206f,  0.16291f, 0.93484f, 0.79765f, 0.74982f, 0.78336f,
-            0.48153f, 0.97546f, 0.8026f,  0.36689f, 0.98152f, 0.37351f,
+                0.6386f,  0.87744f, 0.33587f, 0.9691f,  0.68437f, 0.65098f,
-            0.68407f, 0.2684f,  0.2855f,  0.76195f, 0.67828f, 0.603f
+                0.48153f, 0.97546f, 0.8026f,  0.36689f, 0.98152f, 0.37351f,
-        };
+                0.68407f, 0.2684f,  0.2855f,  0.76195f, 0.67828f, 0.603f
-        conv2d->setWeights(conv2dWeights.data());
+            };
-        model->addLayer("conv2d", conv2d);
+            conv2d->setWeights(conv2dWeights.data());
        }
        model->addLayer("conv1", conv2d);
        // maxpool2d
        CUDANet::Layers::MaxPooling2D *maxpool2d =
@@ -46,21 +50,23 @@ class ModelTest : public ::testing::Test {
                inputSize - kernelSize + 1, numFilters, poolingSize,
                poolingStride, CUDANet::Layers::ActivationType::RELU
            );
-        model->addLayer("maxpool2d", maxpool2d);
+        model->addLayer("maxpool1", maxpool2d);
        // dense
        CUDANet::Layers::Dense *dense = new CUDANet::Layers::Dense(
            8, 3, CUDANet::Layers::ActivationType::SOFTMAX
        );
-        // dense weights 18*6
+
-        std::vector<float> denseWeights = {
+        if (setWeights) {
-            0.36032f, 0.33115f, 0.02948f, 0.09802f, 0.45072f, 0.56266f,
+            std::vector<float> denseWeights = {
-            0.43514f, 0.80946f, 0.43439f, 0.90916f, 0.08605f, 0.07473f,
+                0.36032f, 0.33115f, 0.02948f, 0.09802f, 0.45072f, 0.56266f,
-            0.94788f, 0.66168f, 0.34927f, 0.09464f, 0.61963f, 0.73775f,
+                0.43514f, 0.80946f, 0.43439f, 0.90916f, 0.08605f, 0.07473f,
-            0.51559f, 0.81916f, 0.64915f, 0.03934f, 0.87608f, 0.68364f,
+                0.94788f, 0.66168f, 0.34927f, 0.09464f, 0.61963f, 0.73775f,
-        };
+                0.51559f, 0.81916f, 0.64915f, 0.03934f, 0.87608f, 0.68364f,
-        dense->setWeights(denseWeights.data());
+            };
-        model->addLayer("dense", dense);
+            dense->setWeights(denseWeights.data());
        }
        model->addLayer("linear", dense);
        return model;
    }
@@ -103,6 +109,8 @@ TEST_F(ModelTest, TestModelPredict) {
    }
    EXPECT_NEAR(sum, 1.0f, 1e-5f);
    commonTestTeardown(model);
 }
 TEST_F(ModelTest, TestModelPredictMultiple) {
@@ -162,4 +170,43 @@ TEST_F(ModelTest, TestModelPredictMultiple) {
    }
    EXPECT_NEAR(sum_2, 1.0f, 1e-5f);
    commonTestTeardown(model);
 }
 TEST_F(ModelTest, TestLoadWeights) {
    int             outputSize = 3;
    CUDANet::Model *model      = commonTestSetup();
    model->loadWeights("../test/resources/model.bin");
    std::vector<float> input = {
        0.12762f, 0.99056f, 0.77565f, 0.29058f, 0.29787f, 0.58415f, 0.20484f,
        0.05415f, 0.60593f, 0.3162f,  0.08198f, 0.92749f, 0.72392f, 0.91786f,
        0.65266f, 0.80908f, 0.53389f, 0.36069f, 0.18614f, 0.52381f, 0.08525f,
        0.43054f, 0.3355f,  0.96587f, 0.98194f, 0.71336f, 0.78392f, 0.50648f,
        0.40355f, 0.31863f, 0.54686f, 0.1836f,  0.77171f, 0.01262f, 0.41108f,
        0.53467f, 0.3553f,  0.42808f, 0.45798f, 0.29958f, 0.3923f,  0.98277f,
        0.02033f, 0.99868f, 0.90584f, 0.57554f, 0.15957f, 0.91273f, 0.38901f,
        0.27097f, 0.64788f, 0.84272f, 0.42984f, 0.07466f, 0.53658f, 0.83388f,
        0.28232f, 0.48046f, 0.85626f, 0.04721f, 0.36139f, 0.6123f,  0.56991f,
        0.84854f, 0.61415f, 0.2466f,  0.20017f, 0.78952f, 0.93797f, 0.27884f,
        0.30514f, 0.23521f
    };
    std::vector<float> expected = {2e-05f, 0.00021f, 0.99977f};
    // predict
    const float *output = model->predict(input.data());
    float sum = 0.0f;
    for (int i = 0; i < outputSize; ++i) {
        EXPECT_NEAR(expected[i], output[i], 1e-5f);
        sum += output[i];
    }
    EXPECT_NEAR(sum, 1.0f, 1e-5f);
    commonTestTeardown(model);
 }