From ac53b1672e1361e30d06bcd1b1d0815137a06d94 Mon Sep 17 00:00:00 2001 From: LordMathis Date: Thu, 30 May 2024 13:51:21 +0200 Subject: [PATCH] Implement Inception v3 main func --- examples/inception_v3/inception_v3.cpp | 73 ++++++++++++++++++++++---- 1 file changed, 64 insertions(+), 9 deletions(-) diff --git a/examples/inception_v3/inception_v3.cpp b/examples/inception_v3/inception_v3.cpp index 4ccfb83..a19db62 100644 --- a/examples/inception_v3/inception_v3.cpp +++ b/examples/inception_v3/inception_v3.cpp @@ -1,4 +1,7 @@ #include + +#include + #include class BasicConv2d : public CUDANet::Module { @@ -623,8 +626,6 @@ class InceptionE : public CUDANet::Module { branch_3x3_2_concat = new CUDANet::Layers::Concat( branch3x3_2a->getOutputSize(), branch3x3_2b->getOutputSize() ); - std::cout << "branch_3x3_2_concat: " - << branch_3x3_2_concat->getOutputSize() << std::endl; // Branch 3x3dbl branch3x3dbl_1 = new BasicConv2d( @@ -650,8 +651,6 @@ class InceptionE : public CUDANet::Module { branch_3x3dbl_3_concat = new CUDANet::Layers::Concat( branch3x3dbl_3a->getOutputSize(), branch3x3dbl_3b->getOutputSize() ); - std::cout << "branch_3x3dbl_3_concat: " - << branch_3x3dbl_3_concat->getOutputSize() << std::endl; // Branch Pool branchPool_1 = new CUDANet::Layers::AvgPooling2d( @@ -932,16 +931,72 @@ class InceptionV3 : public CUDANet::Model { CUDANet::Layers::Dense *fc; }; +std::vector +readAndNormalizeImage(const std::string &imagePath, int width, int height) { + // Read the image using OpenCV + cv::Mat image = cv::imread(imagePath, cv::IMREAD_COLOR); + + // Resize and normalize the image + cv::resize(image, image, cv::Size(width, height)); + image.convertTo(image, CV_32FC3, 1.0 / 255.0); + + // Normalize the image https://pytorch.org/hub/pytorch_vision_alexnet/ + cv::Mat mean(image.size(), CV_32FC3, cv::Scalar(0.485, 0.456, 0.406)); + cv::Mat std(image.size(), CV_32FC3, cv::Scalar(0.229, 0.224, 0.225)); + cv::subtract(image, mean, image); + cv::divide(image, std, image); + + // Convert the 3D image matrix to a 1D array of floats + std::vector imageData; + for (int c = 0; c < image.channels(); ++c) { + for (int i = 0; i < image.rows; ++i) { + for (int j = 0; j < image.cols; ++j) { + imageData.push_back(image.at(i, j)[c]); + } + } + } + + return imageData; +} + int main(int argc, const char *const argv[]) { - InceptionV3 *inception_v3 = new InceptionV3({299, 299}, 3, 1000); - - inception_v3->printSummary(); - if (argc != 3) { std::cerr << "Usage: " << argv[0] << " " << std::endl; return 1; // Return error code indicating incorrect usage } - std::cout << "Loading model..." << std::endl; + std::string modelWeightsPath = argv[1]; + std::string imagePath = argv[2]; + + const shape2d inputSize = {299, 299}; + const int inputChannels = 3; + const int outputSize = 1000; + + InceptionV3 *inception_v3 = new InceptionV3(inputSize, inputChannels, outputSize); + inception_v3->printSummary(); + + std::cout << std::endl; + + inception_v3->loadWeights(modelWeightsPath); + + std::vector imageData = + readAndNormalizeImage(imagePath, inputSize.first, inputSize.second); + + // Print the size of the image data + const float *output = inception_v3->predict(imageData.data()); + + // Get max index + int maxIndex = 0; + for (int i = 0; i < outputSize; i++) { + if (output[i] > output[maxIndex]) { + maxIndex = i; + } + } + + std::string classLabel = CUDANet::Utils::IMAGENET_CLASS_MAP.at(maxIndex); + + std::cout << "Prediction: " << maxIndex << " " << classLabel << std::endl; + return 0; + } \ No newline at end of file