diff --git a/include/cudanet.cuh b/include/cudanet.cuh index c4636b3..3caa89d 100644 --- a/include/cudanet.cuh +++ b/include/cudanet.cuh @@ -14,7 +14,7 @@ #include "avg_pooling.hpp" #include "batch_norm.cuh" #include "concat.hpp" -#include "conv2d.cuh" +#include "conv2d.hpp" #include "dense.hpp" #include "input.hpp" #include "layer.hpp" diff --git a/include/layers/conv2d.cuh b/include/layers/conv2d.hpp similarity index 89% rename from include/layers/conv2d.cuh rename to include/layers/conv2d.hpp index a5e1cf8..7bfe2b7 100644 --- a/include/layers/conv2d.cuh +++ b/include/layers/conv2d.hpp @@ -4,7 +4,6 @@ #include #include "activation.hpp" -#include "convolution.cuh" #include "layer.hpp" namespace CUDANet::Layers { @@ -28,12 +27,12 @@ class Conv2d : public WeightedLayer, public TwoDLayer { * 'SOFTMAX' or 'NONE') */ Conv2d( - shape2d inputSize, + shape2d inputSize, int inputChannels, - shape2d kernelSize, - shape2d stride, + shape2d kernelSize, + shape2d stride, int numFilters, - shape2d paddingSize, + shape2d paddingSize, ActivationType activationType ); @@ -107,7 +106,7 @@ class Conv2d : public WeightedLayer, public TwoDLayer { private: // Inputs shape2d inputSize; - int inputChannels; + int inputChannels; // Outputs shape2d outputSize; @@ -116,17 +115,31 @@ class Conv2d : public WeightedLayer, public TwoDLayer { shape2d kernelSize; shape2d stride; shape2d paddingSize; - int numFilters; + int numFilters; // Kernels std::vector weights; std::vector biases; - // Cuda + float* forwardCPU(const float* input); + +// Cuda +#ifdef USE_CUDA float* d_output; float* d_weights; float* d_biases; + float* forwardCUDA(const float* d_input); + void initCUDA(); + void delCUDA(); + + /** + * @brief Copy weights and biases to the device + * + */ + void toCuda(); +#endif + Activation* activation; /** @@ -140,12 +153,6 @@ class Conv2d : public WeightedLayer, public TwoDLayer { * */ void initializeBiases(); - - /** - * @brief Copy weights and biases to the device - * - */ - void toCuda(); }; } // namespace CUDANet::Layers diff --git a/src/backends/cuda/layers/conv2d.cu b/src/backends/cuda/layers/conv2d.cu new file mode 100644 index 0000000..2a6bc41 --- /dev/null +++ b/src/backends/cuda/layers/conv2d.cu @@ -0,0 +1,73 @@ +#include + +#include "activation.hpp" +#include "conv2d.hpp" +#include "convolution.cuh" +#include "cuda_helper.cuh" +#include "layer.hpp" +#include "matmul.cuh" +#include "vector.cuh" + +using namespace CUDANet::Layers; + +void Conv2d::initCUDA() { + d_output = nullptr; + CUDA_CHECK(cudaMalloc( + (void**)&d_output, + sizeof(float) * outputSize.first * outputSize.second * numFilters + )); + + d_weights = nullptr; + CUDA_CHECK(cudaMalloc( + (void**)&d_weights, sizeof(float) * kernelSize.first * + kernelSize.second * inputChannels * numFilters + )); + + d_biases = nullptr; + CUDA_CHECK(cudaMalloc((void**)&d_biases, sizeof(float) * numFilters)); +} + +void Conv2d::delCUDA() { + cudaFree(d_output); + cudaFree(d_weights); + cudaFree(d_biases); +} + +void Conv2d::toCuda() { + CUDA_CHECK(cudaMemcpy( + d_weights, weights.data(), + sizeof(float) * kernelSize.first * kernelSize.second * inputChannels * + numFilters, + cudaMemcpyHostToDevice + )); + + CUDA_CHECK(cudaMemcpy( + d_biases, biases.data(), sizeof(float) * numFilters, + cudaMemcpyHostToDevice + )); +} + +float* Conv2d::forwardCUDA(const float* d_input) { + // Convolve + dim3 block(8, 8, 8); + dim3 grid( + (outputSize.first + block.x - 1) / block.x, + (outputSize.second + block.y - 1) / block.y, + (numFilters + block.z - 1) / block.z + ); + + CUDANet::Utils::clear(d_output, outputSize.first * outputSize.second * numFilters); + + Kernels::convolution<<>>( + d_input, d_weights, d_biases, d_output, inputSize, inputChannels, + paddingSize, kernelSize, stride, numFilters, outputSize + ); + CUDA_CHECK(cudaGetLastError()); + + // Apply activation + activation->activate(d_output); + + CUDA_CHECK(cudaDeviceSynchronize()); + + return d_output; +} diff --git a/src/layers/conv2d.cpp b/src/layers/conv2d.cpp new file mode 100644 index 0000000..d1bdcc9 --- /dev/null +++ b/src/layers/conv2d.cpp @@ -0,0 +1,111 @@ +#include +#include + +#include "activation.hpp" +#include "conv2d.hpp" +#include "layer.hpp" + +using namespace CUDANet::Layers; + +Conv2d::Conv2d( + shape2d inputSize, + int inputChannels, + shape2d kernelSize, + shape2d stride, + int numFilters, + shape2d paddingSize, + ActivationType activationType +) + : inputSize(inputSize), + inputChannels(inputChannels), + kernelSize(kernelSize), + stride(stride), + numFilters(numFilters), + paddingSize(paddingSize) { + outputSize = { + (inputSize.first - kernelSize.first + 2 * paddingSize.first) / + stride.first + + 1, + (inputSize.second - kernelSize.second + 2 * paddingSize.second) / + stride.second + + 1 + }; + + activation = new Activation( + activationType, outputSize.first * outputSize.second * numFilters + ); + + weights.resize( + kernelSize.first * kernelSize.second * inputChannels * numFilters + ); + initializeWeights(); + + biases.resize(numFilters); + initializeBiases(); + +#ifdef USE_CUDA + initCUDA(); + toCuda(); +#endif +} + +Conv2d::~Conv2d() { +#ifdef USE_CUDA + delCUDA(); +#endif + delete activation; +} + +void Conv2d::initializeWeights() { + std::fill(weights.begin(), weights.end(), 0.0f); +} + +void Conv2d::initializeBiases() { + std::fill(biases.begin(), biases.end(), 0.0f); +} + +void Conv2d::setWeights(const float* weights_input) { + std::copy(weights_input, weights_input + weights.size(), weights.begin()); +#ifdef USE_CUDA + toCuda(); +#endif +} + +std::vector Conv2d::getWeights() { + return weights; +} + +void Conv2d::setBiases(const float* biases_input) { + std::copy(biases_input, biases_input + biases.size(), biases.begin()); +#ifdef USE_CUDA + toCuda(); +#endif +} + +std::vector Conv2d::getBiases() { + return biases; +} + +float* Conv2d::forwardCPU(const float* input) { + throw std::logic_error("Not implemented"); +} + +float* Conv2d::forward(const float* input) { +#ifdef USE_CUDA + return forwardCUDA(input); +#else + return forwardCPU(input); +#endif +} + +int Conv2d::getOutputSize() { + return outputSize.first * outputSize.second * numFilters; +} + +int Conv2d::getInputSize() { + return inputSize.first * inputSize.second * inputChannels; +} + +shape2d Conv2d::getOutputDims() { + return outputSize; +} \ No newline at end of file diff --git a/src/layers/conv2d.cu b/src/layers/conv2d.cu deleted file mode 100644 index c4374ec..0000000 --- a/src/layers/conv2d.cu +++ /dev/null @@ -1,144 +0,0 @@ -#include -#include - -#include "activation.hpp" -#include "conv2d.cuh" -#include "convolution.cuh" -#include "cuda_helper.cuh" -#include "layer.hpp" -#include "matmul.cuh" -#include "vector.cuh" - -using namespace CUDANet::Layers; - -Conv2d::Conv2d( - shape2d inputSize, - int inputChannels, - shape2d kernelSize, - shape2d stride, - int numFilters, - shape2d paddingSize, - ActivationType activationType -) - : inputSize(inputSize), - inputChannels(inputChannels), - kernelSize(kernelSize), - stride(stride), - numFilters(numFilters), - paddingSize(paddingSize) { - - outputSize = { - (inputSize.first - kernelSize.first + 2 * paddingSize.first) / - stride.first + 1, - (inputSize.second - kernelSize.second + 2 * paddingSize.second) / - stride.second + 1 - }; - - activation = - new Activation(activationType, outputSize.first * outputSize.second * numFilters); - - d_output = nullptr; - CUDA_CHECK(cudaMalloc( - (void**)&d_output, sizeof(float) * outputSize.first * outputSize.second * numFilters - )); - - weights.resize(kernelSize.first * kernelSize.second * inputChannels * numFilters); - initializeWeights(); - - d_weights = nullptr; - CUDA_CHECK(cudaMalloc( - (void**)&d_weights, - sizeof(float) * kernelSize.first * kernelSize.second * inputChannels * numFilters - )); - - biases.resize(numFilters); - initializeBiases(); - - d_biases = nullptr; - CUDA_CHECK(cudaMalloc((void**)&d_biases, sizeof(float) * numFilters)); - - toCuda(); -} - -Conv2d::~Conv2d() { - cudaFree(d_output); - cudaFree(d_weights); - cudaFree(d_biases); - delete activation; -} - -void Conv2d::initializeWeights() { - std::fill(weights.begin(), weights.end(), 0.0f); -} - -void Conv2d::initializeBiases() { - std::fill(biases.begin(), biases.end(), 0.0f); -} - -void Conv2d::setWeights(const float* weights_input) { - std::copy(weights_input, weights_input + weights.size(), weights.begin()); - toCuda(); -} - -std::vector Conv2d::getWeights() { - return weights; -} - -void Conv2d::setBiases(const float* biases_input) { - std::copy(biases_input, biases_input + biases.size(), biases.begin()); - toCuda(); -} - -std::vector Conv2d::getBiases() { - return biases; -} - -void Conv2d::toCuda() { - CUDA_CHECK(cudaMemcpy( - d_weights, weights.data(), - sizeof(float) * kernelSize.first * kernelSize.second * inputChannels * numFilters, - cudaMemcpyHostToDevice - )); - - CUDA_CHECK(cudaMemcpy( - d_biases, biases.data(), sizeof(float) * numFilters, - cudaMemcpyHostToDevice - )); -} - -float* Conv2d::forward(const float* d_input) { - // Convolve - dim3 block(8, 8, 8); - dim3 grid( - (outputSize.first + block.x - 1) / block.x, - (outputSize.second + block.y - 1) / block.y, - (numFilters + block.z - 1) / block.z - ); - - CUDANet::Utils::clear(d_output, outputSize.first * outputSize.second * numFilters); - - Kernels::convolution<<>>( - d_input, d_weights, d_biases, d_output, inputSize, inputChannels, - paddingSize, kernelSize, stride, numFilters, outputSize - ); - CUDA_CHECK(cudaGetLastError()); - - // Apply activation - activation->activate(d_output); - - CUDA_CHECK(cudaDeviceSynchronize()); - - return d_output; -} - -int Conv2d::getOutputSize() { - return outputSize.first * outputSize.second * numFilters; -} - -int Conv2d::getInputSize() { - return inputSize.first * inputSize.second * inputChannels; -} - -shape2d Conv2d::getOutputDims() { - return outputSize; -} \ No newline at end of file diff --git a/test/cuda/layers/test_conv2d.cu b/test/cuda/layers/test_conv2d.cu index 0a9dad2..628c1b7 100644 --- a/test/cuda/layers/test_conv2d.cu +++ b/test/cuda/layers/test_conv2d.cu @@ -3,7 +3,7 @@ #include -#include "conv2d.cuh" +#include "conv2d.hpp" class Conv2dTest : public ::testing::Test { protected: diff --git a/test/model/test_model.cpp b/test/model/test_model.cpp index 6eae469..8c60035 100644 --- a/test/model/test_model.cpp +++ b/test/model/test_model.cpp @@ -1,6 +1,6 @@ #include -#include "conv2d.cuh" +#include "conv2d.hpp" #include "dense.hpp" #include "max_pooling.hpp" #include "model.hpp" @@ -85,8 +85,6 @@ class ModelTest : public ::testing::Test { void commonTestTeardown(CUDANet::Model *model) { delete model; } - - cudaError_t cudaStatus; }; TEST_F(ModelTest, TestModelPredict) {