Implement getOutputSize and getInputSize for seq layers

This commit is contained in:
2024-04-22 20:31:58 +02:00
parent fbf6c44bdd
commit f17debc244
18 changed files with 186 additions and 66 deletions

View File

@@ -20,13 +20,18 @@ class AvgPooling2D : public SequentialLayer {
float* forward(const float* d_input); float* forward(const float* d_input);
/** /**
* @brief Get the output width (/ height) of the layer * @brief Get output size
* *
* @return int * @return int output size
*/ */
int getOutputSize() { int getOutputSize();
return outputSize;
} /**
* @brief Get input size
*
* @return int input size
*/
int getInputSize();
private: private:
int inputSize; int inputSize;

View File

@@ -80,13 +80,18 @@ class Conv2d : public WeightedLayer {
std::vector<float> getBiases(); std::vector<float> getBiases();
/** /**
* @brief Get the output width (/ height) of the layer * @brief Get output size
* *
* @return int * @return int output size
*/ */
int getOutputSize() { int getOutputSize();
return outputSize;
} /**
* @brief Get input size
*
* @return int input size
*/
int getInputSize();
/** /**
* @brief Get the padding size of the layer * @brief Get the padding size of the layer

View File

@@ -3,8 +3,8 @@
#include <vector> #include <vector>
#include "layer.cuh"
#include "activation.cuh" #include "activation.cuh"
#include "layer.cuh"
namespace CUDANet::Layers { namespace CUDANet::Layers {
@@ -19,7 +19,8 @@ class Dense : public WeightedLayer {
* *
* @param inputSize Size of the input vector * @param inputSize Size of the input vector
* @param outputSize Size of the output vector * @param outputSize Size of the output vector
* @param activationType Activation function type ('RELU', 'SIGMOID', 'SOFTMAX' or 'NONE') * @param activationType Activation function type ('RELU', 'SIGMOID',
* 'SOFTMAX' or 'NONE')
*/ */
Dense(int inputSize, int outputSize, Layers::ActivationType activationType); Dense(int inputSize, int outputSize, Layers::ActivationType activationType);
@@ -65,6 +66,20 @@ class Dense : public WeightedLayer {
*/ */
std::vector<float> getBiases(); std::vector<float> getBiases();
/**
* @brief Get output size
*
* @return int output size
*/
int getOutputSize();
/**
* @brief Get input size
*
* @return int input size
*/
int getInputSize();
private: private:
unsigned int inputSize; unsigned int inputSize;
unsigned int outputSize; unsigned int outputSize;

View File

@@ -13,25 +13,40 @@ class Input : public SequentialLayer {
public: public:
/** /**
* @brief Create a new Input layer * @brief Create a new Input layer
* *
* @param inputSize Size of the input vector * @param inputSize Size of the input vector
*/ */
explicit Input(int inputSize); explicit Input(int inputSize);
/** /**
* @brief Destroy the Input layer * @brief Destroy the Input layer
* *
*/ */
~Input(); ~Input();
/** /**
* @brief Forward pass of the input layer. Just copies the input to the device * @brief Forward pass of the input layer. Just copies the input to the
* * device
*
* @param input Host pointer to the input vector * @param input Host pointer to the input vector
* @return Device pointer to the output vector * @return Device pointer to the output vector
*/ */
float* forward(const float* input); float* forward(const float* input);
/**
* @brief Get output size
*
* @return int output size
*/
int getOutputSize();
/**
* @brief Get input size
*
* @return int input size
*/
int getInputSize();
private: private:
int inputSize; int inputSize;
float* d_output; float* d_output;

View File

@@ -4,8 +4,8 @@
#include <vector> #include <vector>
#define CUDANET_SAME_PADDING(inputSize, kernelSize, stride) ((stride - 1) * inputSize - stride + kernelSize) / 2; #define CUDANET_SAME_PADDING(inputSize, kernelSize, stride) \
((stride - 1) * inputSize - stride + kernelSize) / 2;
namespace CUDANet::Layers { namespace CUDANet::Layers {
@@ -28,6 +28,20 @@ class SequentialLayer {
* @return float* Device pointer to the output * @return float* Device pointer to the output
*/ */
virtual float* forward(const float* input) = 0; virtual float* forward(const float* input) = 0;
/**
* @brief Get output size
*
* @return int output size
*/
virtual int getOutputSize() = 0;
/**
* @brief Get input size
*
* @return int input size
*/
virtual int getInputSize() = 0;
}; };
/** /**

View File

@@ -20,13 +20,18 @@ class MaxPooling2D : public SequentialLayer {
float* forward(const float* d_input); float* forward(const float* d_input);
/** /**
* @brief Get the output width (/ height) of the layer * @brief Get output size
* *
* @return int * @return int output size
*/ */
int getOutputSize() { int getOutputSize();
return outputSize;
} /**
* @brief Get input size
*
* @return int input size
*/
int getInputSize();
private: private:
int inputSize; int inputSize;

View File

@@ -6,34 +6,48 @@
namespace CUDANet::Layers { namespace CUDANet::Layers {
class Output : public SequentialLayer { class Output : public SequentialLayer {
public: public:
/** /**
* @brief Create a new Output layer * @brief Create a new Output layer
* *
* @param inputSize Size of the input vector * @param inputSize Size of the input vector
*/ */
explicit Output(int inputSize); explicit Output(int inputSize);
/** /**
* @brief Destroy the Output layer * @brief Destroy the Output layer
* *
*/ */
~Output(); ~Output();
/** /**
* @brief Forward pass of the output layer. Just copies the input from device to host * @brief Forward pass of the output layer. Just copies the input from
* * device to host
* @param input Device pointer to the input vector *
* @return Host pointer to the output vector * @param input Device pointer to the input vector
*/ * @return Host pointer to the output vector
float* forward(const float* input); */
float* forward(const float* input);
private: /**
int inputSize; * @brief Get output size
float* h_output; *
* @return int output size
*/
int getOutputSize();
/**
* @brief Get input size
*
* @return int input size
*/
int getInputSize();
private:
int inputSize;
float* h_output;
}; };
} // namespace CUDANet::Layers
} // namespace CUDANet::Layers
#endif // CUDANET_OUTPUT_LAYER_H #endif // CUDANET_OUTPUT_LAYER_H

View File

@@ -49,4 +49,12 @@ float* AvgPooling2D::forward(const float* d_input) {
CUDA_CHECK(cudaDeviceSynchronize()); CUDA_CHECK(cudaDeviceSynchronize());
return d_output; return d_output;
}
int AvgPooling2D::getOutputSize() {
return outputSize * outputSize * nChannels;
}
int AvgPooling2D::getInputSize() {
return inputSize * inputSize * nChannels;
} }

View File

@@ -130,3 +130,11 @@ float* Conv2d::forward(const float* d_input) {
return d_output; return d_output;
} }
int Conv2d::getOutputSize() {
return outputSize * outputSize * numFilters;
}
int Conv2d::getInputSize() {
return inputSize * inputSize * inputChannels;
}

View File

@@ -108,4 +108,12 @@ void Dense::setBiases(const float* biases_input) {
std::vector<float> Dense::getBiases() { std::vector<float> Dense::getBiases() {
return biases; return biases;
}
int Dense::getOutputSize() {
return outputSize;
}
int Dense::getInputSize() {
return inputSize;
} }

View File

@@ -20,3 +20,12 @@ float* Input::forward(const float* input) {
return d_output; return d_output;
} }
int Input::getOutputSize() {
return inputSize;
}
int Input::getInputSize() {
return inputSize;
}

View File

@@ -52,4 +52,12 @@ float* MaxPooling2D::forward(const float* d_input) {
CUDA_CHECK(cudaDeviceSynchronize()); CUDA_CHECK(cudaDeviceSynchronize());
return d_output; return d_output;
}
int MaxPooling2D::getOutputSize() {
return outputSize * outputSize * nChannels;
}
int MaxPooling2D::getInputSize() {
return inputSize * inputSize * nChannels;
} }

View File

@@ -20,4 +20,13 @@ float* Output::forward(const float* input) {
CUDA_CHECK(cudaDeviceSynchronize()); CUDA_CHECK(cudaDeviceSynchronize());
return h_output; return h_output;
}
int Output::getOutputSize() {
return inputSize;
}
int Output::getInputSize() {
return inputSize;
} }

View File

@@ -43,7 +43,6 @@ float* Model::predict(const float* input) {
float* d_input = inputLayer->forward(input); float* d_input = inputLayer->forward(input);
for (auto& layer : layers) { for (auto& layer : layers) {
std::cout << layer.first << std::endl;
d_input = layer.second->forward(d_input); d_input = layer.second->forward(d_input);
} }

View File

@@ -187,6 +187,8 @@ TEST(MatMulTest, SumReduceTest) {
cudaMemcpy(d_input, input.data(), sizeof(float) * n, cudaMemcpyHostToDevice); cudaMemcpy(d_input, input.data(), sizeof(float) * n, cudaMemcpyHostToDevice);
EXPECT_EQ(cudaStatus, cudaSuccess); EXPECT_EQ(cudaStatus, cudaSuccess);
CUDANet::Utils::clear(d_sum, n);
CUDANet::Kernels::sum_reduce<<<gridSize, BLOCK_SIZE>>>( CUDANet::Kernels::sum_reduce<<<gridSize, BLOCK_SIZE>>>(
d_input, d_sum, n d_input, d_sum, n
); );
@@ -208,7 +210,5 @@ TEST(MatMulTest, SumReduceTest) {
EXPECT_FLOAT_EQ(expected, sum[0]); EXPECT_FLOAT_EQ(expected, sum[0]);
cudaFree(d_input); cudaFree(d_input);
cudaFree(d_sum); cudaFree(d_sum);
} }

View File

@@ -51,10 +51,10 @@ TEST(AvgPoolingLayerTest, AvgPoolForwardTest) {
int outputSize = avgPoolingLayer.getOutputSize(); int outputSize = avgPoolingLayer.getOutputSize();
std::vector<float> output(outputSize * outputSize * nChannels); std::vector<float> output(outputSize);
cudaStatus = cudaMemcpy( cudaStatus = cudaMemcpy(
output.data(), d_output, output.data(), d_output,
sizeof(float) * outputSize * outputSize * nChannels, sizeof(float) * outputSize,
cudaMemcpyDeviceToHost cudaMemcpyDeviceToHost
); );
EXPECT_EQ(cudaStatus, cudaSuccess); EXPECT_EQ(cudaStatus, cudaSuccess);

View File

@@ -82,14 +82,15 @@ TEST_F(Conv2dTest, SimpleTest) {
activationType, input, kernels.data(), d_input activationType, input, kernels.data(), d_input
); );
int outputSize = (inputSize - kernelSize) / stride + 1; int outputWidth = (inputSize - kernelSize) / stride + 1;
int outputSize = outputWidth * outputWidth * numFilters;
EXPECT_EQ(outputSize, conv2d.getOutputSize()); EXPECT_EQ(outputSize, conv2d.getOutputSize());
d_output = conv2d.forward(d_input); d_output = conv2d.forward(d_input);
std::vector<float> expected = {44.0f, 54.0f, 64.0f, 84.0f, 94.0f, std::vector<float> expected = {44.0f, 54.0f, 64.0f, 84.0f, 94.0f,
104.0f, 124.0f, 134.0f, 144.0f}; 104.0f, 124.0f, 134.0f, 144.0f};
std::vector<float> output(outputSize * outputSize * numFilters); std::vector<float> output(outputSize);
cudaStatus = cudaMemcpy( cudaStatus = cudaMemcpy(
output.data(), d_output, sizeof(float) * output.size(), output.data(), d_output, sizeof(float) * output.size(),
@@ -172,18 +173,16 @@ TEST_F(Conv2dTest, PaddedTest) {
activationType, input, kernels.data(), d_input activationType, input, kernels.data(), d_input
); );
EXPECT_EQ(inputSize, conv2d.getOutputSize()); EXPECT_EQ(inputSize * inputSize * numFilters, conv2d.getOutputSize());
d_output = conv2d.forward(d_input); d_output = conv2d.forward(d_input);
std::vector<float> output( std::vector<float> output(
conv2d.getOutputSize() * conv2d.getOutputSize() * numFilters conv2d.getOutputSize()
); );
cudaMemcpy( cudaMemcpy(
output.data(), d_output, output.data(), d_output,
sizeof(float) * conv2d.getOutputSize() * conv2d.getOutputSize() * sizeof(float) * conv2d.getOutputSize(), cudaMemcpyDeviceToHost
numFilters,
cudaMemcpyDeviceToHost
); );
// Generated by tools/generate_conv2d_test.py // Generated by tools/generate_conv2d_test.py
@@ -259,17 +258,16 @@ TEST_F(Conv2dTest, StridedPaddedConvolution) {
activationType, input, kernels.data(), d_input activationType, input, kernels.data(), d_input
); );
EXPECT_EQ(inputSize, conv2d.getOutputSize()); EXPECT_EQ(inputSize * inputSize * numFilters, conv2d.getOutputSize());
d_output = conv2d.forward(d_input); d_output = conv2d.forward(d_input);
std::vector<float> output( std::vector<float> output(
conv2d.getOutputSize() * conv2d.getOutputSize() * numFilters conv2d.getOutputSize()
); );
cudaMemcpy( cudaMemcpy(
output.data(), d_output, output.data(), d_output,
sizeof(float) * conv2d.getOutputSize() * conv2d.getOutputSize() * sizeof(float) * conv2d.getOutputSize(),
numFilters,
cudaMemcpyDeviceToHost cudaMemcpyDeviceToHost
); );

View File

@@ -51,10 +51,10 @@ TEST(MaxPoolingLayerTest, MaxPoolForwardTest) {
int outputSize = maxPoolingLayer.getOutputSize(); int outputSize = maxPoolingLayer.getOutputSize();
std::vector<float> output(outputSize * outputSize * nChannels); std::vector<float> output(outputSize);
cudaStatus = cudaMemcpy( cudaStatus = cudaMemcpy(
output.data(), d_output, output.data(), d_output,
sizeof(float) * outputSize * outputSize * nChannels, sizeof(float) * outputSize,
cudaMemcpyDeviceToHost cudaMemcpyDeviceToHost
); );
EXPECT_EQ(cudaStatus, cudaSuccess); EXPECT_EQ(cudaStatus, cudaSuccess);