Implement getting layer, weights and biases

2025-11-06 01:34:22 +00:00 · 2024-04-16 19:09:41 +02:00
parent f4ae45f867
commit 9fb9d7e8e1
7 changed files with 77 additions and 5 deletions
--- a/include/layers/conv2d.cuh
+++ b/include/layers/conv2d.cuh
@@ -58,6 +58,13 @@ class Conv2d : public WeightedLayer {
     */
    void setWeights(const float* weights_input);
    /**
     * @brief Get the weights of the convolutional layer
     *
     * @return std::vector<float>
     */
    std::vector<float> getWeights();
    /**
     * @brief Set the biases of the convolutional layer
     *
@@ -65,6 +72,13 @@ class Conv2d : public WeightedLayer {
     */
    void setBiases(const float* biases_input);
    /**
     * @brief Get the biases of the convolutional layer
     *
     * @return std::vector<float>
     */
    std::vector<float> getBiases();
    /**
     * @brief Get the output width (/ height) of the layer
     *
--- a/include/layers/dense.cuh
+++ b/include/layers/dense.cuh
@@ -43,6 +43,13 @@ class Dense : public WeightedLayer {
     */
    void setWeights(const float* weights);
    /**
     * @brief Get the weights of the layer
     *
     * @return Vector of weights
     */
    std::vector<float> getWeights();
    /**
     * @brief Set the biases of the layer
     *
@@ -50,6 +57,13 @@ class Dense : public WeightedLayer {
     */
    void setBiases(const float* biases);
    /**
     * @brief Get the biases of the layer
     *
     * @return Vector of biases
     */
    std::vector<float> getBiases();
  private:
    unsigned int inputSize;
    unsigned int outputSize;
--- a/include/layers/layer.cuh
+++ b/include/layers/layer.cuh
@@ -2,6 +2,8 @@
 #ifndef CUDANET_I_LAYER_H
 #define CUDANET_I_LAYER_H
 #include <vector>
 namespace CUDANet::Layers {
 /**
@@ -60,6 +62,12 @@ class WeightedLayer : public SequentialLayer {
     */
    virtual void setWeights(const float* weights) = 0;
    /**
     * @brief Virtual function for getting weights
     * 
     */
    virtual std::vector<float> getWeights() = 0;
    /**
     * @brief Virtual function for setting biases
     *
@@ -67,6 +75,12 @@ class WeightedLayer : public SequentialLayer {
     */
    virtual void setBiases(const float* biases) = 0;
    /**
     * @brief Virtual function for getting biases
     * 
     */
    virtual std::vector<float> getBiases() = 0;
  private:
    /**
     * @brief Initialize the weights
--- a/include/model/model.hpp
+++ b/include/model/model.hpp
@@ -29,6 +29,8 @@ class Model {
    float* predict(const float* input);
    void addLayer(const std::string& name, Layers::SequentialLayer* layer);
    Layers::SequentialLayer* getLayer(const std::string& name);
    void loadWeights(const std::string& path);
  private:
@@ -41,7 +43,7 @@ class Model {
    int outputSize;
    std::vector<Layers::SequentialLayer*>                   layers;
-    std::unordered_map<std::string, Layers::WeightedLayer*> layerMap;
+    std::unordered_map<std::string, Layers::SequentialLayer*> layerMap;
 };
 }  // namespace CUDANet
--- a/src/layers/conv2d.cu
+++ b/src/layers/conv2d.cu
@@ -84,11 +84,19 @@ void Conv2d::setWeights(const float* weights_input) {
    toCuda();
 }
 std::vector<float> Conv2d::getWeights() {
    return weights;
 }
 void Conv2d::setBiases(const float* biases_input) {
    std::copy(biases_input, biases_input + biases.size(), biases.begin());
    toCuda();
 }
 std::vector<float> Conv2d::getBiases() {
    return biases;
 }
 void Conv2d::toCuda() {
    CUDA_CHECK(cudaMemcpy(
        d_weights, weights.data(),
--- a/src/layers/dense.cu
+++ b/src/layers/dense.cu
@@ -98,7 +98,15 @@ void Dense::setWeights(const float* weights_input) {
    toCuda();
 }
 std::vector<float> Dense::getWeights() {
    return weights;
 }
 void Dense::setBiases(const float* biases_input) {
    std::copy(biases_input, biases_input + biases.size(), biases.begin());
    toCuda();
 }
 std::vector<float> Dense::getBiases() {
    return biases;
 }
--- a/src/model/model.cpp
+++ b/src/model/model.cpp
@@ -16,7 +16,7 @@ Model::Model(const int inputSize, const int inputChannels, const int outputSize)
      inputChannels(inputChannels),
      outputSize(outputSize),
      layers(std::vector<Layers::SequentialLayer*>()),
-      layerMap(std::unordered_map<std::string, Layers::WeightedLayer*>()) {
+      layerMap(std::unordered_map<std::string, Layers::SequentialLayer*>()) {
    inputLayer  = new Layers::Input(inputSize * inputSize * inputChannels);
    outputLayer = new Layers::Output(outputSize);
 };
@@ -26,7 +26,7 @@ Model::Model(const Model& other)
      inputChannels(other.inputChannels),
      outputSize(other.outputSize),
      layers(std::vector<Layers::SequentialLayer*>()),
-      layerMap(std::unordered_map<std::string, Layers::WeightedLayer*>()) {
+      layerMap(std::unordered_map<std::string, Layers::SequentialLayer*>()) {
    inputLayer  = new Layers::Input(*other.inputLayer);
    outputLayer = new Layers::Output(*other.outputLayer);
 }
@@ -59,6 +59,10 @@ void Model::addLayer(const std::string& name, Layers::SequentialLayer* layer) {
    }
 }
 Layers::SequentialLayer* Model::getLayer(const std::string& name) {
    return layerMap[name];
 }
 void Model::loadWeights(const std::string& path) {
    std::ifstream file(path, std::ios::binary);
@@ -115,10 +119,18 @@ void Model::loadWeights(const std::string& path) {
        file.read(reinterpret_cast<char*>(values.data()), tensorInfo.size * sizeof(float));
        if (layerMap.find(tensorInfo.name) != layerMap.end()) {
            Layers::WeightedLayer* wLayer = dynamic_cast<Layers::WeightedLayer*>(layerMap[tensorInfo.name]);
            if (wLayer == nullptr) {
                std::cerr << "Layer: " << tensorInfo.name << "does not have weights, skipping" << std::endl;
                continue;
            }
            if (tensorInfo.type == TensorType::WEIGHT) {
-                layerMap[tensorInfo.name]->setWeights(values.data());
+                wLayer->setWeights(values.data());
            } else if (tensorInfo.type == TensorType::BIAS) {
-                layerMap[tensorInfo.name]->setBiases(values.data());
+                wLayer->setBiases(values.data());
            }
        }
    }