From d08567a56363edf63c58d16d6e23fa1820988146 Mon Sep 17 00:00:00 2001 From: LordMathis Date: Sat, 20 Apr 2024 18:36:53 +0200 Subject: [PATCH] Fix weigh bias parsing and better error logging --- examples/alexnet/main.cpp | 75 +++++++++++++++++++++--- include/kernels/activation_functions.cuh | 2 + include/kernels/convolution.cuh | 2 + include/kernels/matmul.cuh | 2 + include/layers/dense.cuh | 1 + src/model/model.cpp | 22 ++++++- 6 files changed, 93 insertions(+), 11 deletions(-) diff --git a/examples/alexnet/main.cpp b/examples/alexnet/main.cpp index 0a44fde..1e88b9d 100644 --- a/examples/alexnet/main.cpp +++ b/examples/alexnet/main.cpp @@ -5,6 +5,8 @@ #include #include +#include +#include std::vector readAndNormalizeImage(const std::string& imagePath, int width, int height) { // Read the image using OpenCV @@ -30,15 +32,63 @@ CUDANet::Model* createModel(const int inputSize, const int inputChannels, const CUDANet::Model *model = new CUDANet::Model(inputSize, inputChannels, outputSize); - // AlexNet + // Block 1 CUDANet::Layers::Conv2d *conv1 = new CUDANet::Layers::Conv2d( - inputSize, inputChannels, 11, 4, 96, CUDANet::Layers::Padding::SAME, CUDANet::Layers::ActivationType::RELU + inputSize, inputChannels, 11, 4, 64, 2, CUDANet::Layers::ActivationType::RELU ); - model->addLayer("conv1", conv1); - CUDANet::Layers::MaxPooling *pool1 = new CUDANet::Layers::MaxPooling( - 3, 3, 2 - ) + model->addLayer("features.0", conv1); // Match pytorch naming + CUDANet::Layers::MaxPooling2D *pool1 = new CUDANet::Layers::MaxPooling2D( + 56, 64, 3, 2, CUDANet::Layers::ActivationType::NONE + ); + model->addLayer("pool1", pool1); + // Block 2 + CUDANet::Layers::Conv2d *conv2 = new CUDANet::Layers::Conv2d( + 27, 64, 5, 1, 192, 2, CUDANet::Layers::ActivationType::RELU + ); + model->addLayer("features.3", conv2); + CUDANet::Layers::MaxPooling2D *pool2 = new CUDANet::Layers::MaxPooling2D( + 27, 192, 3, 2, CUDANet::Layers::ActivationType::NONE + ); + model->addLayer("pool2", pool2); + + // Block 3 + CUDANet::Layers::Conv2d *conv3 = new CUDANet::Layers::Conv2d( + 13, 192, 3, 1, 384, 1, CUDANet::Layers::ActivationType::RELU + ); + model->addLayer("features.6", conv3); + + // Block 4 + CUDANet::Layers::Conv2d *conv4 = new CUDANet::Layers::Conv2d( + 13, 384, 3, 1, 256, 1, CUDANet::Layers::ActivationType::RELU + ); + model->addLayer("features.8", conv4); + + // Block 5 + CUDANet::Layers::Conv2d *conv5 = new CUDANet::Layers::Conv2d( + 13, 256, 3, 1, 256, 1, CUDANet::Layers::ActivationType::RELU + ); + model->addLayer("features.10", conv5); + CUDANet::Layers::MaxPooling2D *pool5 = new CUDANet::Layers::MaxPooling2D( + 13, 256, 3, 2, CUDANet::Layers::ActivationType::NONE + ); + model->addLayer("pool5", pool5); + + // Classifier + CUDANet::Layers::Dense *dense1 = new CUDANet::Layers::Dense( + 6 * 6 * 256, 4096, CUDANet::Layers::ActivationType::RELU + ); + model->addLayer("classifier.1", dense1); + + CUDANet::Layers::Dense *dense2 = new CUDANet::Layers::Dense( + 4096, 4096, CUDANet::Layers::ActivationType::RELU + ); + model->addLayer("classifier.4", dense2); + + CUDANet::Layers::Dense *dense3 = new CUDANet::Layers::Dense( + 4096, 1000, CUDANet::Layers::ActivationType::NONE + ); + model->addLayer("classifier.6", dense3); return model; } @@ -59,13 +109,22 @@ int main(int argc, const char* const argv[]) { const int outputSize = 1000; CUDANet::Model *model = createModel(inputSize, inputChannels, outputSize); - + model->loadWeights(modelWeightsPath); // Read and normalize the image std::vector imageData = readAndNormalizeImage(imagePath, inputSize, inputSize); // Print the size of the image data - std::cout << "Size of image data: " << imageData.size() << std::endl; + float* output = model->predict(imageData.data()); + // Get max index + int maxIndex = 0; + for (int i = 0; i < outputSize; i++) { + if (output[i] > output[maxIndex]) { + maxIndex = i; + } + } + + std::cout << "Prediction: " << maxIndex << std::endl; return 0; } \ No newline at end of file diff --git a/include/kernels/activation_functions.cuh b/include/kernels/activation_functions.cuh index a74c2cc..db2809d 100644 --- a/include/kernels/activation_functions.cuh +++ b/include/kernels/activation_functions.cuh @@ -1,6 +1,8 @@ #ifndef CUDANET_ACTIVATION_FUNCTIONS_H #define CUDANET_ACTIVATION_FUNCTIONS_H +#include + namespace CUDANet::Kernels { /** diff --git a/include/kernels/convolution.cuh b/include/kernels/convolution.cuh index 89861f1..c827690 100644 --- a/include/kernels/convolution.cuh +++ b/include/kernels/convolution.cuh @@ -1,6 +1,8 @@ #ifndef CUDANET_CONVOLUTION_H #define CUDANET_CONVOLUTION_H +#include + namespace CUDANet::Kernels { /** diff --git a/include/kernels/matmul.cuh b/include/kernels/matmul.cuh index 3784251..da7cd5f 100644 --- a/include/kernels/matmul.cuh +++ b/include/kernels/matmul.cuh @@ -1,6 +1,8 @@ #ifndef CUDANET_MATMUL_H #define CUDANET_MATMUL_H +#include + namespace CUDANet::Kernels { /** diff --git a/include/layers/dense.cuh b/include/layers/dense.cuh index bc1861f..e0e1ab8 100644 --- a/include/layers/dense.cuh +++ b/include/layers/dense.cuh @@ -4,6 +4,7 @@ #include #include "layer.cuh" +#include "activation.cuh" namespace CUDANet::Layers { diff --git a/src/model/model.cpp b/src/model/model.cpp index d80994c..387ea1f 100644 --- a/src/model/model.cpp +++ b/src/model/model.cpp @@ -89,11 +89,11 @@ void Model::loadWeights(const std::string& path) { // Parse tensor name into name and type std::string nameStr = line.substr(0, commaPos); - size_t dotPos = nameStr.find('.'); + size_t dotPos = nameStr.find_last_of('.'); if (dotPos == std::string::npos) continue; std::string name = nameStr.substr(0, dotPos); - TensorType type = nameStr.substr(dotPos + 1) == "w" ? TensorType::WEIGHT : TensorType::BIAS; + TensorType type = nameStr.substr(dotPos + 1) == "weight" ? TensorType::WEIGHT : TensorType::BIAS; line = line.substr(commaPos + 1); @@ -118,15 +118,31 @@ void Model::loadWeights(const std::string& path) { Layers::WeightedLayer* wLayer = dynamic_cast(layerMap[tensorInfo.name]); if (wLayer == nullptr) { - std::cerr << "Layer: " << tensorInfo.name << "does not have weights, skipping" << std::endl; + std::cerr << "Layer: " << tensorInfo.name << " does not have weights" << std::endl; continue; } if (tensorInfo.type == TensorType::WEIGHT) { + + if (wLayer->getWeights().size() != values.size()) { + std::cerr << "Layer: " << tensorInfo.name << " has incorrect number of weights, expected " + << wLayer->getWeights().size() << " but got " << values.size() << ", skipping" << std::endl; + continue; + } + wLayer->setWeights(values.data()); } else if (tensorInfo.type == TensorType::BIAS) { + + if (wLayer->getBiases().size() != values.size()) { + std::cerr << "Layer: " << tensorInfo.name << " has incorrect number of biases, expected " + << wLayer->getBiases().size() << " but got " << values.size() << ", skipping" << std::endl; + continue; + } + wLayer->setBiases(values.data()); } + } else { + std::cerr << "Layer: " << tensorInfo.name << " does not exist, skipping" << std::endl; } }