diff --git a/examples/inception_v3/inception_modules.cpp b/examples/inception_v3/inception_modules.cpp index 8bdf203..3d39690 100644 --- a/examples/inception_v3/inception_modules.cpp +++ b/examples/inception_v3/inception_modules.cpp @@ -213,7 +213,7 @@ InceptionB::InceptionB( branch3x3->getOutputSize(), branch3x3dbl_3->getOutputSize() ); concat_2 = new CUDANet::Layers::Concat( - concat_1->getOutputSize(), branchPool->getOutputSize() + concat_1->getOutputSize(), branchPool->get_output_size() ); outputSize = concat_2->getOutputSize(); @@ -441,7 +441,7 @@ InceptionD::InceptionD( branch3x3_2->getOutputSize(), branch7x7x3_4->getOutputSize() ); concat_2 = new CUDANet::Layers::Concat( - concat_1->getOutputSize(), branchPool->getOutputSize() + concat_1->getOutputSize(), branchPool->get_output_size() ); outputSize = concat_2->getOutputSize(); @@ -707,7 +707,7 @@ InceptionV3::InceptionV3( addLayer("AveragePool", avgpool); fc = new CUDANet::Layers::Dense( - avgpool->getOutputSize(), 1000, CUDANet::Layers::ActivationType::NONE + avgpool->get_output_size(), 1000, CUDANet::Layers::ActivationType::NONE ); addLayer("fc", fc); } diff --git a/include/backend.hpp b/include/backend.hpp new file mode 100644 index 0000000..8da3f2d --- /dev/null +++ b/include/backend.hpp @@ -0,0 +1,30 @@ +#pragma once + +#include + +#include "tensor.hpp" + +namespace CUDANet +{ + +class Backend +{ +public: + + // Memory management + virtual void* allocate(size_t bytes) = 0; + virtual void deallocate(void* ptr) = 0; + + // Tensor ops + virtual void print(const CUDANet::Tensor &input) = 0; + virtual void clear(CUDANet::Tensor &input) = 0; + virtual void sum(const CUDANet::Tensor &input, CUDANet::Tensor &sum) = 0; + virtual void max(const CUDANet::Tensor &input, CUDANet::Tensor &max) = 0; + + // Layer ops + virtual void relu(CUDANet::Tensor &tensor) = 0; + virtual void sigmoid(CUDANet::Tensor &tensor) = 0; + virtual void softmax(CUDANet::Tensor &tensor, CUDANet::Tensor &temp_max, CUDANet::Tensor &temp_sum) = 0; +}; + +} // namespace CUDANet::Backend \ No newline at end of file diff --git a/include/backend/backend.hpp b/include/backend/backend.hpp deleted file mode 100644 index 41a1d27..0000000 --- a/include/backend/backend.hpp +++ /dev/null @@ -1,29 +0,0 @@ -#pragma once - -#include -#include "backend/tensor.hpp" - -namespace CUDANet::Backend -{ - -class IBackend -{ -public: - - // Memory management - virtual void* allocate(size_t bytes) = 0; - virtual void deallocate(void* ptr) = 0; - - // Tensor ops - virtual void print(const CUDANet::Backend::Tensor &input) = 0; - virtual void clear(CUDANet::Backend::Tensor &input) = 0; - virtual void sum(const CUDANet::Backend::Tensor &input, CUDANet::Backend::Tensor &sum) = 0; - virtual void max(const CUDANet::Backend::Tensor &input, CUDANet::Backend::Tensor &max) = 0; - - // Layer ops - virtual void relu(CUDANet::Backend::Tensor &tensor) = 0; - virtual void sigmoid(CUDANet::Backend::Tensor &tensor) = 0; - virtual void softmax(CUDANet::Backend::Tensor &tensor, CUDANet::Backend::Tensor &temp_max, CUDANet::Backend::Tensor &temp_sum) = 0; -}; - -} // namespace CUDANet::Backend \ No newline at end of file diff --git a/include/backend/cpu.hpp b/include/backend/cpu.hpp new file mode 100644 index 0000000..beb65b1 --- /dev/null +++ b/include/backend/cpu.hpp @@ -0,0 +1,26 @@ +#pragma once + +#include "backend.hpp" +#include "tensor.hpp" + +namespace CUDANet::Backend { + +class CPU : public Backend { +public: + // Memory management + void* allocate(size_t bytes) override; + void deallocate(void* ptr) override; + + // Tensor ops + void print(const CUDANet::Tensor &input) override; + void clear(CUDANet::Tensor &input) override; + void sum(const CUDANet::Tensor &input, CUDANet::Tensor &sum) override; + void max(const CUDANet::Tensor &input, CUDANet::Tensor &max) override; + + // Layer ops + void relu(CUDANet::Tensor &tensor) override; + void sigmoid(CUDANet::Tensor &tensor) override; + void softmax(CUDANet::Tensor &tensor, CUDANet::Tensor &temp_max, CUDANet::Tensor &temp_sum) override; +}; + +} \ No newline at end of file diff --git a/include/backend/cuda.cuh b/include/backend/cuda.cuh index f2b788d..e08ce34 100644 --- a/include/backend/cuda.cuh +++ b/include/backend/cuda.cuh @@ -1,29 +1,26 @@ #pragma once -#include "backend/backend.hpp" -#include "backend/tensor.hpp" +#include "backend.hpp" +#include "tensor.hpp" namespace CUDANet::Backend { -class CUDABackend : public IBackend { +class CUDA : public Backend { public: // Memory management void* allocate(size_t bytes) override; void deallocate(void* ptr) override; // Tensor ops - void print(const CUDANet::Backend::Tensor &input) override; - void clear(CUDANet::Backend::Tensor &input) override; - void sum(const CUDANet::Backend::Tensor &input, CUDANet::Backend::Tensor &sum) override; - void max(const CUDANet::Backend::Tensor &input, CUDANet::Backend::Tensor &max) override; + void print(const CUDANet::Tensor &input) override; + void clear(CUDANet::Tensor &input) override; + void sum(const CUDANet::Tensor &input, CUDANet::Tensor &sum) override; + void max(const CUDANet::Tensor &input, CUDANet::Tensor &max) override; // Layer ops - void relu(CUDANet::Backend::Tensor &tensor) override; - void sigmoid(CUDANet::Backend::Tensor &tensor) override; - void softmax(CUDANet::Backend::Tensor &tensor, CUDANet::Backend::Tensor &temp_max, CUDANet::Backend::Tensor &temp_sum) override; - -private: - static constexpr int BLOCK_SIZE = 256; + void relu(CUDANet::Tensor &tensor) override; + void sigmoid(CUDANet::Tensor &tensor) override; + void softmax(CUDANet::Tensor &tensor, CUDANet::Tensor &temp_max, CUDANet::Tensor &temp_sum) override; }; } // namespace CUDANet::Backend \ No newline at end of file diff --git a/include/layer.hpp b/include/layer.hpp new file mode 100644 index 0000000..783af9c --- /dev/null +++ b/include/layer.hpp @@ -0,0 +1,42 @@ +#pragma once + +#include + +#include "shape.hpp" +#include "tensor.hpp" + +#define CUDANET_SAME_PADDING(inputSize, kernelSize, stride) \ + ((stride - 1) * inputSize - stride + kernelSize) / 2; + + +namespace CUDANet { + +/** + * @brief Basic Sequential Layer + * + */ +class Layer { + public: + + virtual ~Layer(){}; + + virtual CUDANet::Tensor& forward(CUDANet::Tensor &input) = 0; + + virtual CUDANet::Shape input_shape() = 0; + + virtual CUDANet::Shape output_shape() = 0; + + virtual int input_size() = 0; + + virtual int output_size() = 0; + + virtual void set_weights(CUDANet::Tensor &input) = 0; + + virtual CUDANet::Tensor& get_weights() = 0; + + virtual void set_biases(CUDANet::Tensor &input) = 0; + + virtual CUDANet::Tensor& get_biases() = 0; +}; + +} // namespace CUDANet::Layers diff --git a/include/layers/activation.hpp b/include/layers/activation.hpp index b1aa3d7..0e16aef 100644 --- a/include/layers/activation.hpp +++ b/include/layers/activation.hpp @@ -2,6 +2,7 @@ #include "backend/tensor.hpp" #include "backend/backend.hpp" +#include "layers/layer.hpp" namespace CUDANet::Layers { @@ -19,7 +20,7 @@ enum ActivationType { SIGMOID, RELU, SOFTMAX, NONE }; * @brief Utility class that performs activation * */ -class Activation { +class Activation : Layer { public: Activation() = default; diff --git a/include/layers/avg_pooling.hpp b/include/layers/avg_pooling.hpp index 495fa98..fd7fc02 100644 --- a/include/layers/avg_pooling.hpp +++ b/include/layers/avg_pooling.hpp @@ -6,7 +6,7 @@ namespace CUDANet::Layers { -class AvgPooling2d : public SequentialLayer, public TwoDLayer { +class AvgPooling2d : public Layer, public TwoDLayer { public: AvgPooling2d( shape2d inputSize, @@ -25,7 +25,7 @@ class AvgPooling2d : public SequentialLayer, public TwoDLayer { * * @return int output size */ - int getOutputSize(); + int get_output_size(); /** * @brief Get input size diff --git a/include/layers/input.hpp b/include/layers/input.hpp index 638b9e8..ab84c75 100644 --- a/include/layers/input.hpp +++ b/include/layers/input.hpp @@ -9,7 +9,7 @@ namespace CUDANet::Layers { * @brief Input layer, just copies the input to the device * */ -class Input : public SequentialLayer { +class Input : public Layer { public: /** * @brief Create a new Input layer @@ -38,7 +38,7 @@ class Input : public SequentialLayer { * * @return int output size */ - int getOutputSize(); + int get_output_size(); /** * @brief Get input size diff --git a/include/layers/layer.hpp b/include/layers/layer.hpp deleted file mode 100644 index a175acf..0000000 --- a/include/layers/layer.hpp +++ /dev/null @@ -1,124 +0,0 @@ - -#ifndef CUDANET_I_LAYER_H -#define CUDANET_I_LAYER_H - -#include - -#define CUDANET_SAME_PADDING(inputSize, kernelSize, stride) \ - ((stride - 1) * inputSize - stride + kernelSize) / 2; - -typedef std::pair shape2d; - -namespace CUDANet::Layers { - - -class TwoDLayer { - - public: - virtual shape2d getOutputDims() = 0; - -}; - - -/** - * @brief Basic Sequential Layer - * - */ -class SequentialLayer { - public: - /** - * @brief Destroy the Sequential Layer - * - */ - virtual ~SequentialLayer(){}; - - /** - * @brief Forward propagation virtual function - * - * @param input Device pointer to the input - * @return float* Device pointer to the output - */ - virtual float* forward(const float* input) = 0; - - /** - * @brief Get output size - * - * @return int output size - */ - virtual int getOutputSize() = 0; - - /** - * @brief Get input size - * - * @return int input size - */ - virtual int getInputSize() = 0; -}; - -/** - * @brief Base class for layers with weights and biases - */ -class WeightedLayer : public SequentialLayer { - public: - /** - * @brief Destroy the ILayer object - * - */ - virtual ~WeightedLayer(){}; - - /** - * @brief Virtual function for forward pass - * - * @param input (Device) Pointer to the input - * @return float* Device pointer to the output - */ - virtual float* forward(const float* input) = 0; - - /** - * @brief Virtual function for setting weights - * - * @param weights Pointer to the weights - */ - virtual void setWeights(const float* weights) = 0; - - /** - * @brief Virtual function for getting weights - * - */ - virtual std::vector getWeights() = 0; - - /** - * @brief Virtual function for setting biases - * - * @param biases Pointer to the biases - */ - virtual void setBiases(const float* biases) = 0; - - /** - * @brief Virtual function for getting biases - * - */ - virtual std::vector getBiases() = 0; - - private: - /** - * @brief Initialize the weights - */ - virtual void initializeWeights() = 0; - - /** - * @brief Initialize the biases - */ - virtual void initializeBiases() = 0; - -#ifdef USE_CUDA - /** - * @brief Copy the weights and biases to the device - */ - virtual void toCuda() = 0; -#endif -}; - -} // namespace CUDANet::Layers - -#endif // CUDANET_I_LAYERH \ No newline at end of file diff --git a/include/layers/max_pooling.hpp b/include/layers/max_pooling.hpp index 6891eac..bcc66cf 100644 --- a/include/layers/max_pooling.hpp +++ b/include/layers/max_pooling.hpp @@ -6,7 +6,7 @@ namespace CUDANet::Layers { -class MaxPooling2d : public SequentialLayer, public TwoDLayer { +class MaxPooling2d : public Layer, public TwoDLayer { public: MaxPooling2d( shape2d inputSize, @@ -25,7 +25,7 @@ class MaxPooling2d : public SequentialLayer, public TwoDLayer { * * @return int output size */ - int getOutputSize(); + int get_output_size(); /** * @brief Get input size diff --git a/include/layers/output.hpp b/include/layers/output.hpp index e165e62..28e5634 100644 --- a/include/layers/output.hpp +++ b/include/layers/output.hpp @@ -5,7 +5,7 @@ namespace CUDANet::Layers { -class Output : public SequentialLayer { +class Output : public Layer { public: /** * @brief Create a new Output layer @@ -34,7 +34,7 @@ class Output : public SequentialLayer { * * @return int output size */ - int getOutputSize(); + int get_output_size(); /** * @brief Get input size diff --git a/include/shape.hpp b/include/shape.hpp new file mode 100644 index 0000000..901350c --- /dev/null +++ b/include/shape.hpp @@ -0,0 +1,9 @@ +#pragma once + +#include + +namespace CUDANet { + +typedef std::vector Shape; + +} // namespace CUDANet diff --git a/include/backend/tensor.hpp b/include/tensor.hpp similarity index 71% rename from include/backend/tensor.hpp rename to include/tensor.hpp index 0586f25..cce618b 100644 --- a/include/backend/tensor.hpp +++ b/include/tensor.hpp @@ -1,9 +1,12 @@ #pragma once + #include -#include "backend/backend.hpp" #include -namespace CUDANet::Backend +#include "backend.hpp" +#include "shape.hpp" + +namespace CUDANet { enum class DType @@ -13,14 +16,12 @@ enum class DType // INT32, // Not implemented yet }; -typedef std::vector Shape; - class Tensor { public: Tensor() = default; - Tensor(Shape shape, DType dtype, IBackend* backend); + Tensor(Shape shape, DType dtype, CUDANet::Backend::IBackend* backend); ~Tensor(); size_t size() const; @@ -39,8 +40,8 @@ private: size_t total_elms; size_t total_size; - IBackend* backend; + CUDANet::Backend::IBackend* backend; void* d_ptr; }; -} // namespace CUDANet::Backend \ No newline at end of file +} // namespace CUDANet \ No newline at end of file diff --git a/src/backends/cuda/cuda_backend.cu b/src/backends/cuda/cuda_backend.cu index 6e47316..c05983e 100644 --- a/src/backends/cuda/cuda_backend.cu +++ b/src/backends/cuda/cuda_backend.cu @@ -28,12 +28,12 @@ cudaDeviceProp initializeCUDA() { using namespace CUDANet::Backend; -void* CUDABackend::allocate(size_t bytes) { +void* CUDA::allocate(size_t bytes) { void* d_ptr = nullptr; CUDA_CHECK(cudaMalloc(&d_ptr, bytes)); return d_ptr; } -void CUDABackend::deallocate(void* ptr) { +void CUDA::deallocate(void* ptr) { CUDA_CHECK(cudaFree(ptr)); } diff --git a/src/backends/cuda/layer_ops.cu b/src/backends/cuda/layer_ops.cu index 9b70f4e..252d403 100644 --- a/src/backends/cuda/layer_ops.cu +++ b/src/backends/cuda/layer_ops.cu @@ -5,21 +5,21 @@ using namespace CUDANet::Backend; -void CUDABackend::relu(Tensor &tensor) { +void CUDA::relu(Tensor &tensor) { int gridSize = (tensor.numel() + BLOCK_SIZE - 1) / BLOCK_SIZE; Kernels::relu<<>>(tensor.data(), tensor.data(), tensor.numel()); CUDA_CHECK(cudaGetLastError()); CUDA_CHECK(cudaDeviceSynchronize()); } -void CUDABackend::sigmoid(Tensor &tensor) { +void CUDA::sigmoid(Tensor &tensor) { int gridSize = (tensor.numel() + BLOCK_SIZE - 1) / BLOCK_SIZE; Kernels::sigmoid<<>>(tensor.data(), tensor.data(), tensor.numel()); CUDA_CHECK(cudaGetLastError()); CUDA_CHECK(cudaDeviceSynchronize()); } -void CUDABackend::softmax(Tensor &tensor, Tensor &temp_max, Tensor &temp_sum) { +void CUDA::softmax(Tensor &tensor, Tensor &temp_max, Tensor &temp_sum) { int gridSize = (tensor.numel() + BLOCK_SIZE - 1) / BLOCK_SIZE; // Find max value diff --git a/src/backends/cuda/tensor_ops.cu b/src/backends/cuda/tensor_ops.cu index b5334e4..508d6cf 100644 --- a/src/backends/cuda/tensor_ops.cu +++ b/src/backends/cuda/tensor_ops.cu @@ -7,7 +7,7 @@ using namespace CUDANet::Backend; -void CUDABackend::print(const CUDANet::Backend::Tensor &input) { +void CUDA::print(const CUDANet::Backend::Tensor &input) { auto length = input.numel(); std::vector h_vec(input.numel()); @@ -22,11 +22,11 @@ void CUDABackend::print(const CUDANet::Backend::Tensor &input) { std::cout << std::endl; } -void CUDABackend::clear(CUDANet::Backend::Tensor &input) { +void CUDA::clear(CUDANet::Backend::Tensor &input) { CUDA_CHECK(cudaMemset(input.data(), 0, sizeof(float) * input.numel())); } -void CUDABackend::sum(const CUDANet::Backend::Tensor &input, CUDANet::Backend::Tensor &sum) { +void CUDA::sum(const CUDANet::Backend::Tensor &input, CUDANet::Backend::Tensor &sum) { auto length = input.numel(); const int gridSize = ( + BLOCK_SIZE - 1) / BLOCK_SIZE; @@ -45,7 +45,7 @@ void CUDABackend::sum(const CUDANet::Backend::Tensor &input, CUDANet::Backend::T } } -void CUDABackend::max(const CUDANet::Backend::Tensor &input, CUDANet::Backend::Tensor &max) { +void CUDA::max(const CUDANet::Backend::Tensor &input, CUDANet::Backend::Tensor &max) { auto length = input.numel(); const int grid_size = (length + BLOCK_SIZE - 1) / BLOCK_SIZE; diff --git a/src/layers/avg_pooling.cpp b/src/layers/avg_pooling.cpp index 22501c4..13a93ff 100644 --- a/src/layers/avg_pooling.cpp +++ b/src/layers/avg_pooling.cpp @@ -54,7 +54,7 @@ float* AvgPooling2d::forward(const float* input) { #endif } -int AvgPooling2d::getOutputSize() { +int AvgPooling2d::get_output_size() { return outputSize.first * outputSize.second * nChannels; } diff --git a/src/layers/input.cpp b/src/layers/input.cpp index 537302a..8c9affc 100644 --- a/src/layers/input.cpp +++ b/src/layers/input.cpp @@ -28,7 +28,7 @@ float* Input::forward(const float* input) { #endif } -int Input::getOutputSize() { +int Input::get_output_size() { return inputSize; } diff --git a/src/layers/max_pooling.cpp b/src/layers/max_pooling.cpp index ac67235..dbf5778 100644 --- a/src/layers/max_pooling.cpp +++ b/src/layers/max_pooling.cpp @@ -54,7 +54,7 @@ float* MaxPooling2d::forward(const float* input) { } -int MaxPooling2d::getOutputSize() { +int MaxPooling2d::get_output_size() { return outputSize.first * outputSize.second * nChannels; } diff --git a/src/layers/output.cu b/src/layers/output.cu index 6e851a7..f62d0f2 100644 --- a/src/layers/output.cu +++ b/src/layers/output.cu @@ -24,7 +24,7 @@ float* Output::forward(const float* input) { #endif } -int Output::getOutputSize() { +int Output::get_output_size() { return inputSize; } diff --git a/test/cuda/layers/test_avg_pooling.cu b/test/cuda/layers/test_avg_pooling.cu index 8d2a7c5..e9af297 100644 --- a/test/cuda/layers/test_avg_pooling.cu +++ b/test/cuda/layers/test_avg_pooling.cu @@ -54,7 +54,7 @@ class AvgPoolingLayerTest : public ::testing::Test { d_output = avgPoolingLayer->forward(d_input); - int outputSize = avgPoolingLayer->getOutputSize(); + int outputSize = avgPoolingLayer->get_output_size(); std::vector output(outputSize); cudaStatus = cudaMemcpy( @@ -229,7 +229,7 @@ class AdaptiveAvgPoolingLayerTest : public ::testing::Test { d_output = adaptiveAvgPoolingLayer->forward(d_input); - int outputSize = adaptiveAvgPoolingLayer->getOutputSize(); + int outputSize = adaptiveAvgPoolingLayer->get_output_size(); std::vector output(outputSize); cudaStatus = cudaMemcpy( diff --git a/test/cuda/layers/test_max_pooling.cu b/test/cuda/layers/test_max_pooling.cu index 3bbe9a9..b553376 100644 --- a/test/cuda/layers/test_max_pooling.cu +++ b/test/cuda/layers/test_max_pooling.cu @@ -52,7 +52,7 @@ class MaxPoolingLayerTest : public ::testing::Test { d_output = maxPoolingLayer->forward(d_input); - int outputSize = maxPoolingLayer->getOutputSize(); + int outputSize = maxPoolingLayer->get_output_size(); std::vector output(outputSize); cudaStatus = cudaMemcpy(