Update alexnet preprocessing

This commit is contained in:
2024-09-04 21:41:26 +02:00
parent 7b8c4bd811
commit e7ec6c91f8
2 changed files with 31 additions and 10 deletions

View File

@@ -4,15 +4,30 @@
#include <vector> #include <vector>
std::vector<float> std::vector<float>
readAndNormalizeImage(const std::string &imagePath, int width, int height) { readAndNormalizeImage(const std::string &imagePath, int resizeSize, int cropSize) {
// Read the image using OpenCV // Read the image using OpenCV
cv::Mat image = cv::imread(imagePath, cv::IMREAD_COLOR); cv::Mat image = cv::imread(imagePath, cv::IMREAD_COLOR);
// Convert the image from BGR to RGB
cv::cvtColor(image, image, cv::COLOR_BGR2RGB);
// Resize and normalize the image // Calculate the scaling factor
cv::resize(image, image, cv::Size(width, height)); double scale = std::max(static_cast<double>(resizeSize) / image.cols, static_cast<double>(resizeSize) / image.rows);
// Resize the image
cv::Mat resized;
cv::resize(image, resized, cv::Size(), scale, scale, cv::INTER_AREA);
// Calculate the cropping coordinates
int x = (resized.cols - cropSize) / 2;
int y = (resized.rows - cropSize) / 2;
// Perform center cropping
cv::Rect roi(x, y, cropSize, cropSize);
image = resized(roi);
// Normalize the image
image.convertTo(image, CV_32FC3, 1.0 / 255.0); image.convertTo(image, CV_32FC3, 1.0 / 255.0);
// Normalize the image https://pytorch.org/hub/pytorch_vision_alexnet/
cv::Mat mean(image.size(), CV_32FC3, cv::Scalar(0.485, 0.456, 0.406)); cv::Mat mean(image.size(), CV_32FC3, cv::Scalar(0.485, 0.456, 0.406));
cv::Mat std(image.size(), CV_32FC3, cv::Scalar(0.229, 0.224, 0.225)); cv::Mat std(image.size(), CV_32FC3, cv::Scalar(0.229, 0.224, 0.225));
cv::subtract(image, mean, image); cv::subtract(image, mean, image);
@@ -124,7 +139,7 @@ int main(int argc, const char *const argv[]) {
// Read and normalize the image // Read and normalize the image
std::vector<float> imageData = std::vector<float> imageData =
readAndNormalizeImage(imagePath, inputSize.first, inputSize.second); readAndNormalizeImage(imagePath, inputSize.first, inputSize.first);
// Print the size of the image data // Print the size of the image data
const float *output = model->predict(imageData.data()); const float *output = model->predict(imageData.data());

View File

@@ -3,11 +3,17 @@ import sys
import torchvision import torchvision
sys.path.append('../../tools') # Ugly hack sys.path.append('../../tools') # Ugly hack
from utils import export_model_weights, print_model_parameters from utils import export_model_weights, print_model_parameters, predict
if __name__ == "__main__": if __name__ == "__main__":
alexnet = torchvision.models.alexnet(weights=torchvision.models.AlexNet_Weights.DEFAULT)
print_model_parameters(alexnet) # print layer names and number of parameters weights = torchvision.models.AlexNet_Weights.DEFAULT
export_model_weights(alexnet, 'alexnet_weights.bin') alexnet = torchvision.models.alexnet(weights=weights)
# predict(alexnet, 'cat.jpg')
# print_model_parameters(alexnet) # print layer names and number of parameters
export_model_weights(alexnet, 'alexnet_weights.bin')
# class_labels = weights.meta["categories"]
# prediction = predict(alexnet, "margot.jpg")
# print(prediction, class_labels[prediction])