diff --git a/include/backend/cuda.cuh b/include/backend/cuda.cuh index 68d9adb..783dca0 100644 --- a/include/backend/cuda.cuh +++ b/include/backend/cuda.cuh @@ -1,8 +1,29 @@ #pragma once +#include + #include "backend.hpp" #include "tensor.hpp" +#ifndef BLOCK_SIZE +#define BLOCK_SIZE 128 +#endif // BLOCK_SIZE + +/** + * @brief CUDA error checking macro + * + */ +#define CUDA_CHECK(call) \ +do { \ + cudaError_t result = call; \ + if (result != cudaSuccess) { \ + fprintf(stderr, "CUDA error at %s:%d code=%d(%s) \"%s\" \n", \ + __FILE__, __LINE__, static_cast(result), \ + cudaGetErrorString(result), #call); \ + exit(EXIT_FAILURE); \ + } \ +} while (0) + namespace CUDANet::Backend { class CUDA : public Backend { diff --git a/include/model.hpp b/include/model.hpp new file mode 100644 index 0000000..a3187ed --- /dev/null +++ b/include/model.hpp @@ -0,0 +1,55 @@ +#pragma once + +#include +#include +#include + +#include "layer.hpp" +#include "module.hpp" + +namespace CUDANet { + +enum TensorType { + WEIGHT, + BIAS, + RUNNING_MEAN, + RUNNING_VAR +}; + +struct TensorInfo { + std::string name; + TensorType type; + int size; + int offset; +}; + +class Model { + public: + Model(const CUDANet::Shape input_shape, const CUDANet::Shape output_shape); + ~Model(); + + virtual CUDANet::Tensor& predict(CUDANet::Tensor& input); + + CUDANet::Layer* get_layer(const std::string& name); + + void register_layer(const std::string& name, Layer* layer); + + void register_module(Module& module); + + void load_weights(const std::string& path); + + bool validate(); + + void print_summary(); + + protected: + CUDANet::Shape in_shape; + CUDANet::Shape out_shape; + + CUDANet::Tensor output; + + std::vector> layers; + std::unordered_map layer_map; +}; + +} // namespace CUDANet diff --git a/include/model/model.hpp b/include/model/model.hpp deleted file mode 100644 index 3ac878a..0000000 --- a/include/model/model.hpp +++ /dev/null @@ -1,61 +0,0 @@ -#ifndef CUDANET_MODEL_H -#define CUDANET_MODEL_H - -#include -#include -#include - -#include "input.hpp" -#include "layer.hpp" -#include "module.hpp" -#include "output.hpp" - -namespace CUDANet { - -enum TensorType { - WEIGHT, - BIAS, - RUNNING_MEAN, - RUNNING_VAR -}; - -struct TensorInfo { - std::string name; - TensorType type; - int size; - int offset; -}; - -class Model { - public: - Model(const shape2d inputSize, const int inputChannels, const int outputSize); - Model(const Model& other); - ~Model(); - - virtual float* predict(const float* input); - - void addLayer(const std::string& name, Layers::SequentialLayer* layer); - Layers::SequentialLayer* getLayer(const std::string& name); - - void loadWeights(const std::string& path); - - bool validate(); - - void printSummary(); - - protected: - Layers::Input* inputLayer; - Layers::Output* outputLayer; - - shape2d inputSize; - int inputChannels; - - int outputSize; - - std::vector> layers; - std::unordered_map layerMap; -}; - -} // namespace CUDANet - -#endif // CUDANET_MODEL_H \ No newline at end of file diff --git a/include/module.hpp b/include/module.hpp index 4521741..ea84007 100644 --- a/include/module.hpp +++ b/include/module.hpp @@ -19,14 +19,14 @@ class Module { size_t output_size(); - void register_layer(const std::string& name, Layer& layer); + void register_layer(const std::string& name, Layer* layer); void register_module(Module& module); - const std::vector>& get_layers() const; + const std::vector>& get_layers() const; protected: - std::vector> layers; + std::vector> layers; CUDANet::Shape in_shape; CUDANet::Shape out_shape; diff --git a/include/shape.hpp b/include/shape.hpp index 88634bf..5f0b1ae 100644 --- a/include/shape.hpp +++ b/include/shape.hpp @@ -1,11 +1,21 @@ #pragma once +#include #include namespace CUDANet { typedef std::vector Shape; +std::string format_shape(const Shape& shape) { + std::string result; + for (size_t i = 0; i < shape.size(); ++i) { + if (i > 0) result += ", "; + result += std::to_string(shape[i]); + } + return result; +} + class InvalidShapeException : public std::runtime_error { public: InvalidShapeException( @@ -35,16 +45,6 @@ class InvalidShapeException : public std::runtime_error { format_shape(shape_b) ) ) {} - - private: - static std::string format_shape(const Shape& shape) { - std::string result; - for (size_t i = 0; i < shape.size(); ++i) { - if (i > 0) result += ", "; - result += std::to_string(shape[i]); - } - return result; - } }; } // namespace CUDANet diff --git a/include/utils/cuda_helper.cuh b/include/utils/cuda_helper.cuh deleted file mode 100644 index aa5b7a3..0000000 --- a/include/utils/cuda_helper.cuh +++ /dev/null @@ -1,26 +0,0 @@ -#ifndef CUDANET_HELPER_H -#define CUDANET_HELPER_H - -#include -#include - -#ifndef BLOCK_SIZE -#define BLOCK_SIZE 128 -#endif // BLOCK_SIZE - -/** - * @brief CUDA error checking macro - * - */ -#define CUDA_CHECK(call) \ -do { \ - cudaError_t result = call; \ - if (result != cudaSuccess) { \ - fprintf(stderr, "CUDA error at %s:%d code=%d(%s) \"%s\" \n", \ - __FILE__, __LINE__, static_cast(result), \ - cudaGetErrorString(result), #call); \ - exit(EXIT_FAILURE); \ - } \ -} while (0) - -#endif // CUDANET_HELPER_H diff --git a/src/backends/cuda/cuda_backend.cu b/src/backends/cuda/cuda_backend.cu index c05983e..f178579 100644 --- a/src/backends/cuda/cuda_backend.cu +++ b/src/backends/cuda/cuda_backend.cu @@ -2,7 +2,6 @@ #include #include -#include #include "backend/cuda.cuh" diff --git a/src/backends/cuda/layer_ops.cu b/src/backends/cuda/layer_ops.cu index f8c760a..0f6b557 100644 --- a/src/backends/cuda/layer_ops.cu +++ b/src/backends/cuda/layer_ops.cu @@ -3,7 +3,6 @@ #include "kernels/convolution.cuh" #include "kernels/matmul.cuh" #include "kernels/pool.cuh" -#include "utils/cuda_helper.cuh" using namespace CUDANet::Backend; diff --git a/src/backends/cuda/tensor_ops.cu b/src/backends/cuda/tensor_ops.cu index ee95cf8..7653ee9 100644 --- a/src/backends/cuda/tensor_ops.cu +++ b/src/backends/cuda/tensor_ops.cu @@ -2,7 +2,6 @@ #include "backend.hpp" #include "backend/cuda.cuh" -#include "utils/cuda_helper.cuh" #include "kernels/matmul.cuh" using namespace CUDANet::Backend; diff --git a/src/model.cpp b/src/model.cpp index 025eb47..91641f2 100644 --- a/src/model.cpp +++ b/src/model.cpp @@ -7,76 +7,49 @@ #include #include -#include "input.hpp" #include "layer.hpp" #include "batch_norm.hpp" using namespace CUDANet; Model::Model( - const shape2d inputSize, - const int inputChannels, - const int outputSize + const CUDANet::Shape input_shape, + const CUDANet::Shape output_shape ) - : inputSize(inputSize), - inputChannels(inputChannels), - outputSize(outputSize), - layers(std::vector>()), - layerMap(std::unordered_map()) { - inputLayer = - new Layers::Input(inputSize.first * inputSize.second * inputChannels); - outputLayer = new Layers::Output(outputSize); -}; + : in_shape(input_shape), + out_shape(out_shape), + layers(std::vector>()), + layer_map(std::unordered_map()) {}; -Model::Model(const Model& other) - : inputSize(other.inputSize), - inputChannels(other.inputChannels), - outputSize(other.outputSize), - layers(std::vector>()), - layerMap(std::unordered_map()) { - inputLayer = new Layers::Input(*other.inputLayer); - outputLayer = new Layers::Output(*other.outputLayer); +Model::~Model() {}; + +CUDANet::Tensor& Model::predict(CUDANet::Tensor& input) { + CUDANet::Tensor* current = &input; + for (const auto& [name, layer_ptr] : layers) { + current = &(layer_ptr->forward(*current)); + } + return *current; } -Model::~Model() { - delete inputLayer; - delete outputLayer; - for (const auto& layer : layers) { - delete layer.second; - } -}; - -float* Model::predict(const float* input) { - float* d_input = inputLayer->forward(input); - - for (auto& layer : layers) { - d_input = layer.second->forward(d_input); - } - - return outputLayer->forward(d_input); -} - -void Model::addLayer(const std::string& name, Layers::SequentialLayer* layer) { - const Module* module = dynamic_cast(layer); - - if (module != nullptr) { - for (const auto& moduleLayer : module->getLayers()) { - layerMap[moduleLayer.first] = moduleLayer.second; - layers.push_back({moduleLayer.first, moduleLayer.second}); - } - - return; - } - +void Model::register_layer(const std::string& name, Layer* layer) { layers.push_back({name, layer}); - layerMap[name] = layer; + layer_map[name] = layer; } -Layers::SequentialLayer* Model::getLayer(const std::string& name) { - return layerMap[name]; +void Model::register_module(Module& module) { + for (const auto& [name, layer_ptr] : module.get_layers()) { + layer_map[name] = layer_ptr; + layers.push_back({name, layer_ptr}); + } + + return; } -void Model::loadWeights(const std::string& path) { +Layer* Model::get_layer(const std::string& name) { + return layer_map[name]; +} + +void Model::load_weights(const std::string& path) { std::ifstream file(path, std::ios::binary); if (!file.is_open()) { @@ -92,120 +65,114 @@ void Model::loadWeights(const std::string& path) { return; } - auto getTensorType = [](const std::string& typeStr) { - if (typeStr == "weight") return TensorType::WEIGHT; - if (typeStr == "bias") return TensorType::BIAS; - if (typeStr == "running_mean") return TensorType::RUNNING_MEAN; - if (typeStr == "running_var") return TensorType::RUNNING_VAR; - throw std::runtime_error("Unknown tensor type: " + typeStr); + auto get_tensor_type = [](const std::string& type_str) { + if (type_str == "weight") return TensorType::WEIGHT; + if (type_str == "bias") return TensorType::BIAS; + if (type_str == "running_mean") return TensorType::RUNNING_MEAN; + if (type_str == "running_var") return TensorType::RUNNING_VAR; + throw std::runtime_error("Unknown tensor type: " + type_str); }; - u_int64_t headerSize; - file.read(reinterpret_cast(&headerSize), sizeof(headerSize)); + u_int64_t header_size; + file.read(reinterpret_cast(&header_size), sizeof(header_size)); - std::string header(headerSize, '\0'); - file.read(&header[0], headerSize); + std::string header(header_size, '\0'); + file.read(&header[0], header_size); - std::vector tensorInfos; + std::vector tensor_infos; size_t pos = 0; while (pos < header.size()) { - size_t nextPos = header.find('\n', pos); - if (nextPos == std::string::npos) break; + size_t next_pos = header.find('\n', pos); + if (next_pos == std::string::npos) break; - std::string line = header.substr(pos, nextPos - pos); - pos = nextPos + 1; + std::string line = header.substr(pos, next_pos - pos); + pos = next_pos + 1; - size_t commaPos = line.find(','); - if (commaPos == std::string::npos) continue; + size_t comma_pos = line.find(','); + if (comma_pos == std::string::npos) continue; // Parse tensor name into name and type - std::string nameStr = line.substr(0, commaPos); - size_t dotPos = nameStr.find_last_of('.'); - if (dotPos == std::string::npos) continue; - std::string name = nameStr.substr(0, dotPos); + std::string name_str = line.substr(0, comma_pos); + size_t dot_pos = name_str.find_last_of('.'); + if (dot_pos == std::string::npos) continue; + std::string name = name_str.substr(0, dot_pos); - TensorType type = getTensorType(nameStr.substr(dotPos + 1)); + TensorType type = get_tensor_type(name_str.substr(dot_pos + 1)); - line = line.substr(commaPos + 1); + line = line.substr(comma_pos + 1); - commaPos = line.find(','); - if (commaPos == std::string::npos) continue; + comma_pos = line.find(','); + if (comma_pos == std::string::npos) continue; - int size = std::stoi(line.substr(0, commaPos)); - int offset = std::stoi(line.substr(commaPos + 1)); + int size = std::stoi(line.substr(0, comma_pos)); + int offset = std::stoi(line.substr(comma_pos + 1)); - tensorInfos.push_back({name, type, size, offset}); + tensor_infos.push_back({name, type, size, offset}); } - for (const auto& tensorInfo : tensorInfos) { - std::vector values(tensorInfo.size); + for (const auto& tensor_info : tensor_infos) { + std::vector values(tensor_info.size); file.seekg( - sizeof(version) + sizeof(headerSize) + header.size() + - tensorInfo.offset + sizeof(version) + sizeof(header_size) + header.size() + + tensor_info.offset ); file.read( reinterpret_cast(values.data()), - tensorInfo.size * sizeof(float) + tensor_info.size * sizeof(float) ); - if (layerMap.find(tensorInfo.name) != layerMap.end()) { - Layers::WeightedLayer* wLayer = - dynamic_cast(layerMap[tensorInfo.name]); + if (layer_map.find(tensor_info.name) != layer_map.end()) { - if (wLayer == nullptr) { - std::cerr << "Layer: " << tensorInfo.name - << " does not have weights" << std::endl; - continue; - } + Layer* layer = layer_map[tensor_info.name]; - if (tensorInfo.type == TensorType::WEIGHT) { - if (wLayer->getWeights().size() != values.size()) { - std::cerr << "Layer: " << tensorInfo.name + if (tensor_info.type == TensorType::WEIGHT) { + if (layer->get_weights().size() != values.size()) { + std::cerr << "Layer: " << tensor_info.name << " has incorrect number of weights, expected " - << wLayer->getWeights().size() << " but got " + << layer->get_weights().size() << " but got " << values.size() << ", skipping" << std::endl; continue; } - wLayer->setWeights(values.data()); - } else if (tensorInfo.type == TensorType::BIAS) { - if (wLayer->getBiases().size() != values.size()) { - std::cerr << "Layer: " << tensorInfo.name + layer->set_weights(values.data()); + } else if (tensor_info.type == TensorType::BIAS) { + if (layer->get_biases().size() != values.size()) { + std::cerr << "Layer: " << tensor_info.name << " has incorrect number of biases, expected " - << wLayer->getBiases().size() << " but got " + << layer->get_biases().size() << " but got " << values.size() << ", skipping" << std::endl; continue; } - wLayer->setBiases(values.data()); + layer->set_biases(values.data()); } - Layers::BatchNorm2d* bnLayer = dynamic_cast(wLayer); - if (bnLayer == nullptr) { + Layers::BatchNorm2d* bn_layer = dynamic_cast(layer); + if (bn_layer == nullptr) { continue; } - if (tensorInfo.type == TensorType::RUNNING_MEAN) { - if (bnLayer->getRunningMean().size() != values.size()) { - std::cerr << "Layer: " << tensorInfo.name << " has incorrect number of running mean values, expected " - << bnLayer->getRunningMean().size() << " but got " << values.size() << ", skipping" << std::endl; + if (tensor_info.type == TensorType::RUNNING_MEAN) { + if (bn_layer->get_running_mean().size() != values.size()) { + std::cerr << "Layer: " << tensor_info.name << " has incorrect number of running mean values, expected " + << bn_layer->get_running_mean().size() << " but got " << values.size() << ", skipping" << std::endl; continue; } - bnLayer->setRunningMean(values.data()); - } else if (tensorInfo.type == TensorType::RUNNING_VAR) { - if (bnLayer->getRunningVar().size() != values.size()) { - std::cerr << "Layer: " << tensorInfo.name << " has incorrect number of running var values, expected " - << bnLayer->getRunningVar().size() << " but got " << values.size() << ", skipping" << std::endl; + bn_layer->set_running_mean(values.data()); + } else if (tensor_info.type == TensorType::RUNNING_VAR) { + if (bn_layer->get_running_var().size() != values.size()) { + std::cerr << "Layer: " << tensor_info.name << " has incorrect number of running var values, expected " + << bn_layer->get_running_var().size() << " but got " << values.size() << ", skipping" << std::endl; continue; } - bnLayer->setRunningVar(values.data()); + bn_layer->set_running_var(values.data()); } } else { - std::cerr << "Layer: " << tensorInfo.name + std::cerr << "Layer: " << tensor_info.name << " does not exist, skipping" << std::endl; } } @@ -215,63 +182,63 @@ void Model::loadWeights(const std::string& path) { bool Model::validate() { bool valid = true; - int size = inputLayer->getInputSize(); + CUDANet::Shape shape = in_shape; - for (const auto& layer : layers) { - if (layer.second->getInputSize() != size) { + for (const auto& [name, layer_ptr] : layers) { + if (layer_ptr->input_shape() != shape) { valid = false; - std::cerr << "Layer: " << layer.first - << " has incorrect input size, expected " << size - << " but got " << layer.second->getInputSize() + std::cerr << "Layer: " << name + << " has incorrect input shape, expected " << format_shape(shape) + << " but got " << format_shape(layer_ptr->input_shape()) << std::endl; break; } - size = layer.second->getOutputSize(); + shape = layer_ptr->output_shape(); } return valid; } -void Model::printSummary() { +void Model::print_summary() { struct layer_info { std::string name; - std::string inputSize; - std::string outputSize; + std::string input_shape; + std::string output_shape; }; - std::vector layerInfos; + std::vector layer_infos; - int maxNameLength = 0; - int maxInputLength = 0; - int maxOutputLength = 0; + int max_name_length = 0; + int max_input_length = 0; + int max_output_length = 0; - for (const auto& layer : layers) { - layer_info layerInfo = { - layer.first, std::to_string(layer.second->getInputSize()), - std::to_string(layer.second->getOutputSize()) + for (const auto& [name, layer_ptr] : layers) { + layer_info li = { + name, format_shape(layer_ptr->input_shape()), + format_shape(layer_ptr->output_shape()) }; - layerInfos.push_back(layerInfo); + layer_infos.push_back(li); - maxNameLength = std::max(maxNameLength, (int)layerInfo.name.size()); - maxInputLength = - std::max(maxInputLength, (int)layerInfo.inputSize.size()); - maxOutputLength = - std::max(maxOutputLength, (int)layerInfo.outputSize.size()); + max_name_length = std::max(max_name_length, (int)li.name.size()); + max_input_length = + std::max(max_input_length, (int)li.input_shape.size()); + max_output_length = + std::max(max_output_length, (int)li.output_shape.size()); } - int rowLength = maxNameLength + maxInputLength + maxOutputLength + 6; + int row_length = max_name_length + max_input_length + max_output_length + 6; std::cout << "Model Summary:" << std::endl - << std::string(rowLength, '-') << std::endl; + << std::string(row_length, '-') << std::endl; - for (const auto& layerInfo : layerInfos) { + for (const auto& li : layer_infos) { std::cout << std::left - << std::setw(maxNameLength) << layerInfo.name + << std::setw(max_name_length) << li.name << " | " << std::right - << std::setw(maxInputLength) << layerInfo.inputSize + << std::setw(max_input_length) << li.input_shape << " | " - << std::setw(maxOutputLength) << layerInfo.outputSize + << std::setw(max_output_length) << li.output_shape << std::endl; } } \ No newline at end of file diff --git a/src/module.cpp b/src/module.cpp index 3ea9e00..9d3acde 100644 --- a/src/module.cpp +++ b/src/module.cpp @@ -28,7 +28,7 @@ size_t Module::output_size() { return sizeof(float) * count; } -void Module::register_layer(const std::string& name, Layer& layer) { +void Module::register_layer(const std::string& name, Layer* layer) { layers.push_back({name, layer}); } @@ -38,7 +38,7 @@ void Module::register_module(Module& module) { } } -const std::vector>& +const std::vector>& Module::get_layers() const { return layers; }