Add toplevel CUDANet namespace

This commit is contained in:
2024-03-17 16:08:53 +01:00
parent dc86cddeb7
commit 0c22fac64e
19 changed files with 183 additions and 149 deletions

View File

@@ -25,7 +25,7 @@ TEST(ActivationsTest, SigmoidSanityCheck) {
cudaStatus = cudaMemcpy(d_input, input, sizeof(float) * 3, cudaMemcpyHostToDevice);
EXPECT_EQ(cudaStatus, cudaSuccess);
Kernels::sigmoid<<<1, 3>>>(d_input, d_output, 3);
CUDANet::Kernels::sigmoid<<<1, 3>>>(d_input, d_output, 3);
cudaStatus = cudaDeviceSynchronize();
EXPECT_EQ(cudaStatus, cudaSuccess);

View File

@@ -44,7 +44,7 @@ TEST(MatMulTest, MatVecMulTest) {
int THREADS_PER_BLOCK = std::max(w, h);
int BLOCKS = 1;
Kernels::mat_vec_mul<<<BLOCKS, THREADS_PER_BLOCK, sizeof(float) * w>>>(d_matrix, d_vector, d_output, w, h);
CUDANet::Kernels::mat_vec_mul<<<BLOCKS, THREADS_PER_BLOCK, sizeof(float) * w>>>(d_matrix, d_vector, d_output, w, h);
cudaStatus = cudaDeviceSynchronize();
EXPECT_EQ(cudaStatus, cudaSuccess);

View File

@@ -51,7 +51,7 @@ TEST(PaddingTest, SimplePaddingTest) {
int THREADS_PER_BLOCK = 64;
int BLOCKS = paddedSize / THREADS_PER_BLOCK + 1;
Kernels::padding<<<BLOCKS, THREADS_PER_BLOCK>>>(
CUDANet::Kernels::padding<<<BLOCKS, THREADS_PER_BLOCK>>>(
d_input, d_padded, w, h, n, p
);
cudaStatus = cudaDeviceSynchronize();

View File

@@ -7,20 +7,20 @@
class Conv2dTest : public ::testing::Test {
protected:
Layers::Conv2d commonTestSetup(
int inputSize,
int inputChannels,
int kernelSize,
int stride,
Layers::Padding padding,
int numFilters,
Layers::Activation activation,
std::vector<float>& input,
float* kernels,
float*& d_input
CUDANet::Layers::Conv2d commonTestSetup(
int inputSize,
int inputChannels,
int kernelSize,
int stride,
CUDANet::Layers::Padding padding,
int numFilters,
CUDANet::Layers::Activation activation,
std::vector<float>& input,
float* kernels,
float*& d_input
) {
// Create Conv2d layer
Layers::Conv2d conv2d(
CUDANet::Layers::Conv2d conv2d(
inputSize, inputChannels, kernelSize, stride, padding, numFilters,
activation
);
@@ -53,13 +53,13 @@ class Conv2dTest : public ::testing::Test {
};
TEST_F(Conv2dTest, SimpleTest) {
int inputSize = 4;
int inputChannels = 1;
int kernelSize = 2;
int stride = 1;
Layers::Padding padding = Layers::Padding::VALID;
int numFilters = 1;
Layers::Activation activation = Layers::Activation::NONE;
int inputSize = 4;
int inputChannels = 1;
int kernelSize = 2;
int stride = 1;
CUDANet::Layers::Padding padding = CUDANet::Layers::Padding::VALID;
int numFilters = 1;
CUDANet::Layers::Activation activation = CUDANet::Layers::Activation::NONE;
std::vector<float> input = {1.0f, 2.0f, 3.0f, 4.0f, 5.0f, 6.0f,
7.0f, 8.0f, 9.0f, 10.0f, 11.0f, 12.0f,
@@ -74,7 +74,7 @@ TEST_F(Conv2dTest, SimpleTest) {
float* d_input;
float* d_output;
Layers::Conv2d conv2d = commonTestSetup(
CUDANet::Layers::Conv2d conv2d = commonTestSetup(
inputSize, inputChannels, kernelSize, stride, padding, numFilters,
activation, input, kernels.data(), d_input
);
@@ -102,13 +102,13 @@ TEST_F(Conv2dTest, SimpleTest) {
}
TEST_F(Conv2dTest, PaddedTest) {
int inputSize = 5;
int inputChannels = 3;
int kernelSize = 3;
int stride = 1;
Layers::Padding padding = Layers::Padding::SAME;
int numFilters = 2;
Layers::Activation activation = Layers::Activation::NONE;
int inputSize = 5;
int inputChannels = 3;
int kernelSize = 3;
int stride = 1;
CUDANet::Layers::Padding padding = CUDANet::Layers::Padding::SAME;
int numFilters = 2;
CUDANet::Layers::Activation activation = CUDANet::Layers::Activation::NONE;
// clang-format off
std::vector<float> input = {
@@ -163,7 +163,7 @@ TEST_F(Conv2dTest, PaddedTest) {
float* d_input;
float* d_output;
Layers::Conv2d conv2d = commonTestSetup(
CUDANet::Layers::Conv2d conv2d = commonTestSetup(
inputSize, inputChannels, kernelSize, stride, padding, numFilters,
activation, input, kernels.data(), d_input
);
@@ -177,7 +177,8 @@ TEST_F(Conv2dTest, PaddedTest) {
);
cudaMemcpy(
output.data(), d_output,
sizeof(float) * conv2d.getOutputSize() * conv2d.getOutputSize() * numFilters,
sizeof(float) * conv2d.getOutputSize() * conv2d.getOutputSize() *
numFilters,
cudaMemcpyDeviceToHost
);
@@ -202,13 +203,13 @@ TEST_F(Conv2dTest, PaddedTest) {
}
TEST_F(Conv2dTest, StridedPaddedConvolution) {
int inputSize = 5;
int inputChannels = 2;
int kernelSize = 3;
int stride = 2;
int numFilters = 2;
Layers::Padding padding = Layers::Padding::SAME;
Layers::Activation activation = Layers::Activation::RELU;
int inputSize = 5;
int inputChannels = 2;
int kernelSize = 3;
int stride = 2;
int numFilters = 2;
CUDANet::Layers::Padding padding = CUDANet::Layers::Padding::SAME;
CUDANet::Layers::Activation activation = CUDANet::Layers::Activation::RELU;
// clang-format off
std::vector<float> input = {
@@ -248,7 +249,7 @@ TEST_F(Conv2dTest, StridedPaddedConvolution) {
float* d_input;
float* d_output;
Layers::Conv2d conv2d = commonTestSetup(
CUDANet::Layers::Conv2d conv2d = commonTestSetup(
inputSize, inputChannels, kernelSize, stride, padding, numFilters,
activation, input, kernels.data(), d_input
);
@@ -262,7 +263,8 @@ TEST_F(Conv2dTest, StridedPaddedConvolution) {
);
cudaMemcpy(
output.data(), d_output,
sizeof(float) * conv2d.getOutputSize() * conv2d.getOutputSize() * numFilters,
sizeof(float) * conv2d.getOutputSize() * conv2d.getOutputSize() *
numFilters,
cudaMemcpyDeviceToHost
);

View File

@@ -8,17 +8,17 @@
class DenseLayerTest : public ::testing::Test {
protected:
Layers::Dense commonTestSetup(
int inputSize,
int outputSize,
std::vector<float>& input,
float* weights,
float* biases,
float*& d_input,
Layers::Activation activation
CUDANet::Layers::Dense commonTestSetup(
int inputSize,
int outputSize,
std::vector<float>& input,
float* weights,
float* biases,
float*& d_input,
CUDANet::Layers::Activation activation
) {
// Create Dense layer
Layers::Dense denseLayer(inputSize, outputSize, activation);
CUDANet::Layers::Dense denseLayer(inputSize, outputSize, activation);
// Set weights and biases
denseLayer.setWeights(weights);
@@ -52,8 +52,8 @@ TEST_F(DenseLayerTest, Init) {
int inputSize = i;
int outputSize = j;
Layers::Dense denseLayer(
inputSize, outputSize, Layers::Activation::SIGMOID
CUDANet::Layers::Dense denseLayer(
inputSize, outputSize, CUDANet::Layers::Activation::SIGMOID
);
}
}
@@ -73,8 +73,8 @@ TEST_F(DenseLayerTest, setWeights) {
};
// clang-format on
Layers::Dense denseLayer(
inputSize, outputSize, Layers::Activation::SIGMOID
CUDANet::Layers::Dense denseLayer(
inputSize, outputSize, CUDANet::Layers::Activation::SIGMOID
);
denseLayer.setWeights(weights.data());
@@ -99,9 +99,9 @@ TEST_F(DenseLayerTest, ForwardUnitWeightMatrixLinear) {
float* d_input;
float* d_output;
Layers::Dense denseLayer = commonTestSetup(
CUDANet::Layers::Dense denseLayer = commonTestSetup(
inputSize, outputSize, input, weights.data(), biases.data(), d_input,
Layers::Activation::NONE
CUDANet::Layers::Activation::NONE
);
d_output = denseLayer.forward(d_input);
@@ -140,9 +140,9 @@ TEST_F(DenseLayerTest, ForwardRandomWeightMatrixRelu) {
float* d_input;
float* d_output;
Layers::Dense denseLayer = commonTestSetup(
CUDANet::Layers::Dense denseLayer = commonTestSetup(
inputSize, outputSize, input, weights.data(), biases.data(), d_input,
Layers::Activation::RELU
CUDANet::Layers::Activation::RELU
);
d_output = denseLayer.forward(d_input);
@@ -185,9 +185,9 @@ TEST_F(DenseLayerTest, ForwardRandomWeightMatrixSigmoid) {
float* d_input;
float* d_output;
Layers::Dense denseLayer = commonTestSetup(
CUDANet::Layers::Dense denseLayer = commonTestSetup(
inputSize, outputSize, input, weights.data(), biases.data(), d_input,
Layers::Activation::SIGMOID
CUDANet::Layers::Activation::SIGMOID
);
d_output = denseLayer.forward(d_input);

View File

@@ -1,16 +1,16 @@
#include <gtest/gtest.h>
#include "input.cuh"
#include "cuda_helper.cuh"
#include "input.cuh"
TEST(InputLayerTest, Init) {
std::vector<float> input = {0.573f, 0.619f, 0.732f, 0.055f, 0.243f, 0.316f};
Layers::Input inputLayer(6);
float* d_output = inputLayer.forward(input.data());
CUDANet::Layers::Input inputLayer(6);
float* d_output = inputLayer.forward(input.data());
std::vector<float> output(6);
CUDA_CHECK(cudaMemcpy(output.data(), d_output, sizeof(float) * 6, cudaMemcpyDeviceToHost));
CUDA_CHECK(cudaMemcpy(
output.data(), d_output, sizeof(float) * 6, cudaMemcpyDeviceToHost
));
EXPECT_EQ(input, output);
}