Implement Inception v3 main func

2025-12-22 14:24:22 +00:00 · 2024-05-30 13:51:21 +02:00
parent da95fa203e
commit ac53b1672e
1 changed files with 64 additions and 9 deletions
--- a/examples/inception_v3/inception_v3.cpp
+++ b/examples/inception_v3/inception_v3.cpp
@@ -1,4 +1,7 @@
 #include <cudanet.cuh>
+
+#include <opencv2/opencv.hpp>
+
 #include <iostream>

 class BasicConv2d : public CUDANet::Module {
@@ -623,8 +626,6 @@ class InceptionE : public CUDANet::Module {
        branch_3x3_2_concat = new CUDANet::Layers::Concat(
            branch3x3_2a->getOutputSize(), branch3x3_2b->getOutputSize()
        );
-        std::cout << "branch_3x3_2_concat: "
-                  << branch_3x3_2_concat->getOutputSize() << std::endl;

        // Branch 3x3dbl
        branch3x3dbl_1 = new BasicConv2d(
@@ -650,8 +651,6 @@ class InceptionE : public CUDANet::Module {
        branch_3x3dbl_3_concat = new CUDANet::Layers::Concat(
            branch3x3dbl_3a->getOutputSize(), branch3x3dbl_3b->getOutputSize()
        );
-        std::cout << "branch_3x3dbl_3_concat: "
-                  << branch_3x3dbl_3_concat->getOutputSize() << std::endl;

        // Branch Pool
        branchPool_1 = new CUDANet::Layers::AvgPooling2d(
@@ -932,16 +931,72 @@ class InceptionV3 : public CUDANet::Model {
    CUDANet::Layers::Dense *fc;
 };

+std::vector<float>
+readAndNormalizeImage(const std::string &imagePath, int width, int height) {
+    // Read the image using OpenCV
+    cv::Mat image = cv::imread(imagePath, cv::IMREAD_COLOR);
+
+    // Resize and normalize the image
+    cv::resize(image, image, cv::Size(width, height));
+    image.convertTo(image, CV_32FC3, 1.0 / 255.0);
+
+    // Normalize the image https://pytorch.org/hub/pytorch_vision_alexnet/
+    cv::Mat mean(image.size(), CV_32FC3, cv::Scalar(0.485, 0.456, 0.406));
+    cv::Mat std(image.size(), CV_32FC3, cv::Scalar(0.229, 0.224, 0.225));
+    cv::subtract(image, mean, image);
+    cv::divide(image, std, image);
+
+    // Convert the 3D image matrix to a 1D array of floats
+    std::vector<float> imageData;
+    for (int c = 0; c < image.channels(); ++c) {
+        for (int i = 0; i < image.rows; ++i) {
+            for (int j = 0; j < image.cols; ++j) {
+                imageData.push_back(image.at<cv::Vec3f>(i, j)[c]);
+            }
+        }
+    }
+
+    return imageData;
+}
+
 int main(int argc, const char *const argv[]) {
-    InceptionV3 *inception_v3 = new InceptionV3({299, 299}, 3, 1000);
-
-    inception_v3->printSummary();
-
    if (argc != 3) {
        std::cerr << "Usage: " << argv[0] << "<model_weights_path> <image_path>"
                  << std::endl;
        return 1;  // Return error code indicating incorrect usage
    }

-    std::cout << "Loading model..." << std::endl;
+    std::string modelWeightsPath = argv[1];
+    std::string imagePath        = argv[2];
+
+    const shape2d inputSize     = {299, 299};
+    const int inputChannels = 3;
+    const int outputSize    = 1000;
+
+    InceptionV3 *inception_v3 = new InceptionV3(inputSize, inputChannels, outputSize);
+    inception_v3->printSummary();
+
+    std::cout << std::endl;
+
+    inception_v3->loadWeights(modelWeightsPath);
+
+    std::vector<float> imageData =
+        readAndNormalizeImage(imagePath, inputSize.first, inputSize.second);
+
+    // Print the size of the image data
+    const float *output = inception_v3->predict(imageData.data());
+
+    // Get max index
+    int maxIndex = 0;
+    for (int i = 0; i < outputSize; i++) {
+        if (output[i] > output[maxIndex]) {
+            maxIndex = i;
+        }
+    }
+
+    std::string classLabel = CUDANet::Utils::IMAGENET_CLASS_MAP.at(maxIndex);
+
+    std::cout << "Prediction: " << maxIndex << " " << classLabel << std::endl;
+    return 0;
+
 }