mirror of
https://github.com/lordmathis/CUDANet.git
synced 2025-11-06 01:34:22 +00:00
Split tests to cpu and cuda
This commit is contained in:
49
test/cuda/kernels/test_activation_functions.cu
Normal file
49
test/cuda/kernels/test_activation_functions.cu
Normal file
@@ -0,0 +1,49 @@
|
||||
#include <cuda_runtime_api.h>
|
||||
#include <gtest/gtest.h>
|
||||
|
||||
#include <iostream>
|
||||
|
||||
#include "activation_functions.cuh"
|
||||
#include "matmul.cuh"
|
||||
#include "cuda_helper.cuh"
|
||||
|
||||
TEST(ActivationFunctionsTest, SigmoidSanityCheck) {
|
||||
cudaError_t cudaStatus;
|
||||
|
||||
float input[3] = {-100.0f, 0.0f, 100.0f};
|
||||
|
||||
std::vector<float> expected_output = {0.0f, 0.5f, 1.0f};
|
||||
|
||||
float* d_input;
|
||||
float* d_output;
|
||||
|
||||
cudaStatus = cudaMalloc((void**)&d_input, sizeof(float) * 3);
|
||||
EXPECT_EQ(cudaStatus, cudaSuccess);
|
||||
|
||||
cudaStatus = cudaMalloc((void**)&d_output, sizeof(float) * 3);
|
||||
EXPECT_EQ(cudaStatus, cudaSuccess);
|
||||
|
||||
cudaStatus =
|
||||
cudaMemcpy(d_input, input, sizeof(float) * 3, cudaMemcpyHostToDevice);
|
||||
EXPECT_EQ(cudaStatus, cudaSuccess);
|
||||
|
||||
CUDANet::Kernels::sigmoid<<<1, 3>>>(d_input, d_output, 3);
|
||||
cudaStatus = cudaDeviceSynchronize();
|
||||
EXPECT_EQ(cudaStatus, cudaSuccess);
|
||||
|
||||
std::vector<float> output(3);
|
||||
|
||||
cudaStatus = cudaMemcpy(
|
||||
output.data(), d_output, sizeof(float) * 3, cudaMemcpyDeviceToHost
|
||||
);
|
||||
EXPECT_EQ(cudaStatus, cudaSuccess);
|
||||
|
||||
for (int i = 0; i < 3; i++) {
|
||||
EXPECT_NEAR(expected_output[i], output[i], 1e-5);
|
||||
}
|
||||
|
||||
cudaFree(d_input);
|
||||
cudaFree(d_output);
|
||||
|
||||
|
||||
}
|
||||
268
test/cuda/kernels/test_matmul.cu
Normal file
268
test/cuda/kernels/test_matmul.cu
Normal file
@@ -0,0 +1,268 @@
|
||||
#include <cuda_runtime_api.h>
|
||||
#include <gtest/gtest.h>
|
||||
|
||||
#include <vector>
|
||||
|
||||
#include "cuda_helper.cuh"
|
||||
#include "vector.cuh"
|
||||
#include "matmul.cuh"
|
||||
|
||||
TEST(MatMulTest, MatVecMulTest) {
|
||||
cudaError_t cudaStatus;
|
||||
|
||||
int w = 10;
|
||||
int h = 5;
|
||||
|
||||
float* d_matrix;
|
||||
float* d_vector;
|
||||
float* d_output;
|
||||
|
||||
cudaStatus = cudaMalloc((void**)&d_matrix, sizeof(float) * w * h);
|
||||
EXPECT_EQ(cudaStatus, cudaSuccess);
|
||||
|
||||
cudaStatus = cudaMalloc((void**)&d_vector, sizeof(float) * w);
|
||||
EXPECT_EQ(cudaStatus, cudaSuccess);
|
||||
|
||||
cudaStatus = cudaMalloc((void**)&d_output, sizeof(float) * h);
|
||||
EXPECT_EQ(cudaStatus, cudaSuccess);
|
||||
|
||||
std::vector<float> matrix = {
|
||||
0.643f, 0.912f, 0.723f, 0.587f, 0.155f, 0.932f, 0.391f, 0.279f, 0.846f, 0.788f,
|
||||
0.641f, 0.445f, 0.528f, 0.316f, 0.247f, 0.181f, 0.549f, 0.328f, 0.919f, 0.405f,
|
||||
0.733f, 0.287f, 0.901f, 0.602f, 0.816f, 0.495f, 0.797f, 0.210f, 0.305f, 0.613f,
|
||||
0.178f, 0.856f, 0.724f, 0.263f, 0.559f, 0.677f, 0.193f, 0.389f, 0.488f, 0.848f,
|
||||
0.121f, 0.734f, 0.587f, 0.904f, 0.312f, 0.672f, 0.807f, 0.478f, 0.581f, 0.964f
|
||||
};
|
||||
std::vector<float> vector = {
|
||||
0.643f, 0.912f, 0.723f, 0.587f, 0.155f, 0.932f, 0.391f, 0.279f, 0.846f, 0.788f
|
||||
};
|
||||
|
||||
cudaStatus = cudaMemcpy(d_matrix, matrix.data(), sizeof(float) * w * h, cudaMemcpyHostToDevice);
|
||||
EXPECT_EQ(cudaStatus, cudaSuccess);
|
||||
|
||||
cudaStatus = cudaMemcpy(d_vector, vector.data(), sizeof(float) * w, cudaMemcpyHostToDevice);
|
||||
EXPECT_EQ(cudaStatus, cudaSuccess);
|
||||
|
||||
int grid_size = (std::max(w, h) + BLOCK_SIZE - 1) / BLOCK_SIZE;
|
||||
|
||||
|
||||
CUDANet::Utils::clear(d_output, h);
|
||||
|
||||
CUDANet::Kernels::mat_vec_mul<<<grid_size, BLOCK_SIZE>>>(d_matrix, d_vector, d_output, w, h);
|
||||
cudaStatus = cudaDeviceSynchronize();
|
||||
EXPECT_EQ(cudaStatus, cudaSuccess);
|
||||
|
||||
std::vector<float> output_gpu(h);
|
||||
cudaStatus = cudaMemcpy(output_gpu.data(), d_output, sizeof(float) * h, cudaMemcpyDeviceToHost);
|
||||
EXPECT_EQ(cudaStatus, cudaSuccess);
|
||||
|
||||
for (int i = 0; i < h; i++) {
|
||||
float sum = 0.0f;
|
||||
for (int j = 0; j < w; j++) {
|
||||
sum += matrix[i * w + j] * vector[j];
|
||||
}
|
||||
EXPECT_NEAR(sum, output_gpu[i], 1e-5f);
|
||||
}
|
||||
|
||||
cudaFree(d_matrix);
|
||||
cudaFree(d_vector);
|
||||
cudaFree(d_output);
|
||||
}
|
||||
|
||||
TEST(MatMulTest, MaxReduceTest) {
|
||||
cudaError_t cudaStatus;
|
||||
|
||||
const int n = 1 << 16;
|
||||
|
||||
std::vector<float> input(n);
|
||||
for (int i = 0; i < n; i++) {
|
||||
input[i] = i;
|
||||
}
|
||||
|
||||
float* d_input;
|
||||
float* d_output;
|
||||
|
||||
cudaStatus = cudaMalloc((void**)&d_input, sizeof(float) * n);
|
||||
EXPECT_EQ(cudaStatus, cudaSuccess);
|
||||
|
||||
cudaStatus = cudaMalloc((void**)&d_output, sizeof(float) * n);
|
||||
EXPECT_EQ(cudaStatus, cudaSuccess);
|
||||
|
||||
cudaStatus = cudaMemcpy(d_input, input.data(), sizeof(float) * n, cudaMemcpyHostToDevice);
|
||||
EXPECT_EQ(cudaStatus, cudaSuccess);
|
||||
|
||||
const int grid_size = (n + BLOCK_SIZE - 1) / BLOCK_SIZE;
|
||||
|
||||
CUDANet::Kernels::max_reduce<<<grid_size, BLOCK_SIZE>>>(d_input, d_output, n);
|
||||
|
||||
int remaining = grid_size;
|
||||
while (remaining > 1) {
|
||||
int blocks_needed = (remaining + BLOCK_SIZE - 1) / BLOCK_SIZE;
|
||||
CUDANet::Kernels::max_reduce<<<blocks_needed, BLOCK_SIZE>>>(d_output, d_output, remaining);
|
||||
remaining = blocks_needed;
|
||||
}
|
||||
|
||||
std::vector<float> output(n);
|
||||
cudaStatus = cudaMemcpy(output.data(), d_output, sizeof(float), cudaMemcpyDeviceToHost);
|
||||
EXPECT_EQ(cudaStatus, cudaSuccess);
|
||||
|
||||
EXPECT_EQ(output[0], 65535.0f);
|
||||
|
||||
cudaFree(d_input);
|
||||
cudaFree(d_output);
|
||||
|
||||
|
||||
}
|
||||
|
||||
TEST(MatMulTest, VecExpTest) {
|
||||
cudaError_t cudaStatus;
|
||||
|
||||
float input[6] = {22.496f, 36.9006f, 30.9904f,
|
||||
28.4213f, 26.4541f, 31.7887f};
|
||||
|
||||
std::vector<float> expected = {5886928896.0f, 1.06102872080384e+16f,
|
||||
28771323215872.0f, 2204012904448.0f,
|
||||
308226162688.0f, 63922983927808.0f};
|
||||
|
||||
float* d_input;
|
||||
float* d_output;
|
||||
|
||||
cudaStatus = cudaMalloc((void**)&d_input, sizeof(float) * 6);
|
||||
EXPECT_EQ(cudaStatus, cudaSuccess);
|
||||
|
||||
cudaStatus = cudaMalloc((void**)&d_output, sizeof(float) * 6);
|
||||
EXPECT_EQ(cudaStatus, cudaSuccess);
|
||||
|
||||
cudaStatus =
|
||||
cudaMemcpy(d_input, input, sizeof(float) * 6, cudaMemcpyHostToDevice);
|
||||
EXPECT_EQ(cudaStatus, cudaSuccess);
|
||||
|
||||
CUDANet::Kernels::vec_exp<<<1, 6>>>(d_input, d_output, 6);
|
||||
cudaStatus = cudaDeviceSynchronize();
|
||||
EXPECT_EQ(cudaStatus, cudaSuccess);
|
||||
|
||||
std::vector<float> output(6);
|
||||
|
||||
cudaStatus = cudaMemcpy(
|
||||
output.data(), d_output, sizeof(float) * 6, cudaMemcpyDeviceToHost
|
||||
);
|
||||
EXPECT_EQ(cudaStatus, cudaSuccess);
|
||||
|
||||
for (int i = 0; i < 6; i++) {
|
||||
EXPECT_NEAR(expected[i], output[i], 1e7f);
|
||||
}
|
||||
|
||||
cudaFree(d_input);
|
||||
cudaFree(d_output);
|
||||
|
||||
|
||||
}
|
||||
|
||||
TEST(MatMulTest, SumReduceTest) {
|
||||
cudaError_t cudaStatus;
|
||||
|
||||
const int n = 1 << 16;
|
||||
|
||||
std::vector<float> input(n);
|
||||
for (int i = 0; i < n; i++) {
|
||||
input[i] = 1.0f;
|
||||
}
|
||||
|
||||
const float expected = n;
|
||||
|
||||
float* d_input = nullptr;
|
||||
float* d_sum = nullptr;
|
||||
|
||||
const int gridSize = (n + BLOCK_SIZE - 1) / BLOCK_SIZE;
|
||||
|
||||
cudaStatus = cudaMalloc((void**)&d_input, sizeof(float) * n);
|
||||
EXPECT_EQ(cudaStatus, cudaSuccess);
|
||||
|
||||
cudaStatus = cudaMalloc((void**)&d_sum, sizeof(float) * n);
|
||||
EXPECT_EQ(cudaStatus, cudaSuccess);
|
||||
|
||||
cudaStatus =
|
||||
cudaMemcpy(d_input, input.data(), sizeof(float) * n, cudaMemcpyHostToDevice);
|
||||
EXPECT_EQ(cudaStatus, cudaSuccess);
|
||||
|
||||
CUDANet::Utils::clear(d_sum, n);
|
||||
|
||||
CUDANet::Kernels::sum_reduce<<<gridSize, BLOCK_SIZE>>>(
|
||||
d_input, d_sum, n
|
||||
);
|
||||
|
||||
int remaining = gridSize;
|
||||
while (remaining > 1) {
|
||||
int blocks_needed = (remaining + BLOCK_SIZE - 1) / BLOCK_SIZE;
|
||||
CUDANet::Kernels::sum_reduce<<<blocks_needed, BLOCK_SIZE>>>(d_sum, d_sum, remaining);
|
||||
remaining = blocks_needed;
|
||||
}
|
||||
|
||||
|
||||
std::vector<float> sum(n);
|
||||
cudaStatus = cudaMemcpy(
|
||||
sum.data(), d_sum, sizeof(float) * n, cudaMemcpyDeviceToHost
|
||||
);
|
||||
EXPECT_EQ(cudaStatus, cudaSuccess);
|
||||
|
||||
EXPECT_FLOAT_EQ(expected, sum[0]);
|
||||
|
||||
cudaFree(d_input);
|
||||
cudaFree(d_sum);
|
||||
}
|
||||
|
||||
TEST(MatMulTest, VecScaleTest) {
|
||||
cudaError_t cudaStatus;
|
||||
int len = 1000;
|
||||
float* d_src;
|
||||
float* d_dst;
|
||||
float* d_scale;
|
||||
float* d_epsilon;
|
||||
|
||||
cudaStatus = cudaMalloc((void**)&d_src, sizeof(float) * len);
|
||||
EXPECT_EQ(cudaStatus, cudaSuccess);
|
||||
|
||||
cudaStatus = cudaMalloc((void**)&d_dst, sizeof(float) * len);
|
||||
EXPECT_EQ(cudaStatus, cudaSuccess);
|
||||
|
||||
cudaStatus = cudaMalloc((void**)&d_scale, sizeof(float));
|
||||
EXPECT_EQ(cudaStatus, cudaSuccess);
|
||||
|
||||
cudaStatus = cudaMalloc((void**)&d_epsilon, sizeof(float));
|
||||
EXPECT_EQ(cudaStatus, cudaSuccess);
|
||||
|
||||
std::vector<float> src(len);
|
||||
for (int i = 0; i < len; ++i) {
|
||||
src[i] = static_cast<float>(rand()) / RAND_MAX;
|
||||
}
|
||||
|
||||
float scale = 1.5f;
|
||||
float epsilon = 1e-5f;
|
||||
|
||||
cudaStatus = cudaMemcpy(d_src, src.data(), sizeof(float) * len, cudaMemcpyHostToDevice);
|
||||
EXPECT_EQ(cudaStatus, cudaSuccess);
|
||||
cudaStatus = cudaMemcpy(d_scale, &scale, sizeof(float), cudaMemcpyHostToDevice);
|
||||
EXPECT_EQ(cudaStatus, cudaSuccess);
|
||||
cudaStatus = cudaMemcpy(d_epsilon, &epsilon, sizeof(float), cudaMemcpyHostToDevice);
|
||||
EXPECT_EQ(cudaStatus, cudaSuccess);
|
||||
|
||||
int grid_size = (len + BLOCK_SIZE - 1) / BLOCK_SIZE;
|
||||
CUDANet::Kernels::vec_scale<<<grid_size, BLOCK_SIZE>>>(d_src, d_dst, d_scale, d_epsilon, len);
|
||||
|
||||
cudaStatus = cudaDeviceSynchronize();
|
||||
EXPECT_EQ(cudaStatus, cudaSuccess);
|
||||
|
||||
std::vector<float> dst_gpu(len);
|
||||
cudaStatus = cudaMemcpy(dst_gpu.data(), d_dst, sizeof(float) * len, cudaMemcpyDeviceToHost);
|
||||
EXPECT_EQ(cudaStatus, cudaSuccess);
|
||||
|
||||
float inv_std = 1.0f / std::sqrt(scale + epsilon);
|
||||
for (int i = 0; i < len; ++i) {
|
||||
EXPECT_NEAR(src[i] * inv_std, dst_gpu[i], 1e-5f);
|
||||
}
|
||||
|
||||
cudaFree(d_src);
|
||||
cudaFree(d_dst);
|
||||
cudaFree(d_scale);
|
||||
cudaFree(d_epsilon);
|
||||
}
|
||||
84
test/cuda/layers/test_activation.cu
Normal file
84
test/cuda/layers/test_activation.cu
Normal file
@@ -0,0 +1,84 @@
|
||||
#include "activation.cuh"
|
||||
#include <gtest/gtest.h>
|
||||
#include <cuda_runtime.h>
|
||||
#include <vector>
|
||||
|
||||
|
||||
TEST(ActivationTest, SoftmaxTest1) {
|
||||
const int inputSize = 5;
|
||||
cudaError_t cudaStatus;
|
||||
|
||||
CUDANet::Layers::Activation activation(
|
||||
CUDANet::Layers::ActivationType::SOFTMAX, inputSize
|
||||
);
|
||||
|
||||
std::vector<float> input = {0.573f, 0.619f, 0.732f, 0.055f, 0.243f};
|
||||
|
||||
float* d_input;
|
||||
cudaStatus = cudaMalloc((void**)&d_input, sizeof(float) * inputSize);
|
||||
EXPECT_EQ(cudaStatus, cudaSuccess);
|
||||
|
||||
cudaStatus = cudaMemcpy(d_input, input.data(), sizeof(float) * inputSize, cudaMemcpyHostToDevice);
|
||||
EXPECT_EQ(cudaStatus, cudaSuccess);
|
||||
|
||||
activation.activate(d_input);
|
||||
std::vector<float> output(5);
|
||||
cudaStatus = cudaMemcpy(
|
||||
output.data(), d_input, sizeof(float) * inputSize, cudaMemcpyDeviceToHost
|
||||
);
|
||||
EXPECT_EQ(cudaStatus, cudaSuccess);
|
||||
|
||||
float sum = 0.0f;
|
||||
|
||||
std::vector<float> expected = {0.22055f, 0.23094f, 0.25856f, 0.13139f, 0.15856f};
|
||||
for (int i = 0; i < inputSize; ++i) {
|
||||
sum += output[i];
|
||||
EXPECT_NEAR(output[i], expected[i], 1e-5f);
|
||||
}
|
||||
|
||||
EXPECT_NEAR(sum, 1.0f, 1e-5f);
|
||||
|
||||
cudaStatus = cudaFree(d_input);
|
||||
EXPECT_EQ(cudaStatus, cudaSuccess);
|
||||
}
|
||||
|
||||
TEST(ActivationTest, SoftmaxTest2) {
|
||||
const int inputSize = 6;
|
||||
cudaError_t cudaStatus;
|
||||
|
||||
CUDANet::Layers::Activation activation(
|
||||
CUDANet::Layers::ActivationType::SOFTMAX, inputSize
|
||||
);
|
||||
|
||||
cudaStatus = cudaGetLastError();
|
||||
EXPECT_EQ(cudaStatus, cudaSuccess);
|
||||
|
||||
std::vector<float> input = {22.496f, 36.9006f, 30.9904f, 28.4213f, 26.4541f, 31.7887f};
|
||||
|
||||
float* d_input;
|
||||
cudaStatus = cudaMalloc((void**)&d_input, sizeof(float) * inputSize);
|
||||
EXPECT_EQ(cudaStatus, cudaSuccess);
|
||||
|
||||
cudaStatus = cudaMemcpy(d_input, input.data(), sizeof(float) * inputSize, cudaMemcpyHostToDevice);
|
||||
EXPECT_EQ(cudaStatus, cudaSuccess);
|
||||
|
||||
activation.activate(d_input);
|
||||
std::vector<float> output(inputSize);
|
||||
cudaStatus = cudaMemcpy(
|
||||
output.data(), d_input, sizeof(float) * inputSize, cudaMemcpyDeviceToHost
|
||||
);
|
||||
EXPECT_EQ(cudaStatus, cudaSuccess);
|
||||
|
||||
float sum = 0.0f;
|
||||
|
||||
std::vector<float> expected = {0.0f, 0.99111f, 0.00269f, 0.00021f, 3e-05f, 0.00597f};
|
||||
for (int i = 0; i < inputSize; ++i) {
|
||||
sum += output[i];
|
||||
EXPECT_NEAR(output[i], expected[i], 1e-5f);
|
||||
}
|
||||
EXPECT_NEAR(sum, 1.0f, 1e-5f);
|
||||
|
||||
// Cleanup
|
||||
cudaStatus = cudaFree(d_input);
|
||||
EXPECT_EQ(cudaStatus, cudaSuccess);
|
||||
}
|
||||
281
test/cuda/layers/test_avg_pooling.cu
Normal file
281
test/cuda/layers/test_avg_pooling.cu
Normal file
@@ -0,0 +1,281 @@
|
||||
#include <cuda_runtime.h>
|
||||
#include <gtest/gtest.h>
|
||||
|
||||
#include <vector>
|
||||
|
||||
#include "avg_pooling.cuh"
|
||||
|
||||
class AvgPoolingLayerTest : public ::testing::Test {
|
||||
protected:
|
||||
shape2d inputSize;
|
||||
int nChannels;
|
||||
shape2d poolingSize;
|
||||
shape2d stride;
|
||||
shape2d padding;
|
||||
std::vector<float> input;
|
||||
std::vector<float> expected;
|
||||
|
||||
float *d_input;
|
||||
float *d_output;
|
||||
CUDANet::Layers::AvgPooling2d *avgPoolingLayer;
|
||||
|
||||
virtual void SetUp() override {
|
||||
d_input = nullptr;
|
||||
d_output = nullptr;
|
||||
avgPoolingLayer = nullptr;
|
||||
}
|
||||
|
||||
virtual void TearDown() override {
|
||||
if (d_input) {
|
||||
cudaFree(d_input);
|
||||
}
|
||||
}
|
||||
|
||||
void runTest() {
|
||||
cudaError_t cudaStatus;
|
||||
|
||||
avgPoolingLayer = new CUDANet::Layers::AvgPooling2d(
|
||||
inputSize, nChannels, poolingSize, stride, padding,
|
||||
CUDANet::Layers::ActivationType::NONE
|
||||
);
|
||||
|
||||
cudaStatus = cudaMalloc(
|
||||
(void **)&d_input,
|
||||
sizeof(float) * inputSize.first * inputSize.second * nChannels
|
||||
);
|
||||
EXPECT_EQ(cudaStatus, cudaSuccess);
|
||||
|
||||
cudaStatus = cudaMemcpy(
|
||||
d_input, input.data(),
|
||||
sizeof(float) * inputSize.first * inputSize.second * nChannels,
|
||||
cudaMemcpyHostToDevice
|
||||
);
|
||||
EXPECT_EQ(cudaStatus, cudaSuccess);
|
||||
|
||||
d_output = avgPoolingLayer->forward(d_input);
|
||||
|
||||
int outputSize = avgPoolingLayer->getOutputSize();
|
||||
|
||||
std::vector<float> output(outputSize);
|
||||
cudaStatus = cudaMemcpy(
|
||||
output.data(), d_output, sizeof(float) * outputSize,
|
||||
cudaMemcpyDeviceToHost
|
||||
);
|
||||
EXPECT_EQ(cudaStatus, cudaSuccess);
|
||||
|
||||
for (int i = 0; i < output.size(); ++i) {
|
||||
EXPECT_NEAR(expected[i], output[i], 1e-4);
|
||||
}
|
||||
|
||||
delete avgPoolingLayer;
|
||||
}
|
||||
};
|
||||
|
||||
TEST_F(AvgPoolingLayerTest, AvgPoolForwardTest) {
|
||||
inputSize = {4, 4};
|
||||
nChannels = 2;
|
||||
poolingSize = {2, 2};
|
||||
stride = {2, 2};
|
||||
padding = {0, 0};
|
||||
|
||||
input = {
|
||||
// clang-format off
|
||||
// Channel 0
|
||||
0.573f, 0.619f, 0.732f, 0.055f,
|
||||
0.243f, 0.316f, 0.573f, 0.619f,
|
||||
0.712f, 0.055f, 0.243f, 0.316f,
|
||||
0.573f, 0.619f, 0.742f, 0.055f,
|
||||
// Channel 1
|
||||
0.473f, 0.919f, 0.107f, 0.073f,
|
||||
0.073f, 0.362f, 0.973f, 0.059f,
|
||||
0.473f, 0.455f, 0.283f, 0.416f,
|
||||
0.532f, 0.819f, 0.732f, 0.850f
|
||||
// clang-format on
|
||||
};
|
||||
|
||||
expected = {0.43775f, 0.49475f, 0.48975f, 0.339f,
|
||||
0.45675f, 0.303f, 0.56975f, 0.57025f};
|
||||
|
||||
runTest();
|
||||
}
|
||||
|
||||
TEST_F(AvgPoolingLayerTest, AvgPoolForwardNonSquareInputTest) {
|
||||
inputSize = {4, 6}; // Non-square input
|
||||
nChannels = 2;
|
||||
poolingSize = {2, 2};
|
||||
stride = {2, 2};
|
||||
padding = {0, 0};
|
||||
|
||||
input = {// Channel 0
|
||||
0.573f, 0.619f, 0.732f, 0.055f, 0.123f, 0.234f, 0.243f, 0.316f,
|
||||
0.573f, 0.619f, 0.456f, 0.789f, 0.712f, 0.055f, 0.243f, 0.316f,
|
||||
0.654f, 0.987f, 0.573f, 0.619f, 0.742f, 0.055f, 0.321f, 0.654f,
|
||||
// Channel 1
|
||||
0.473f, 0.919f, 0.107f, 0.073f, 0.321f, 0.654f, 0.073f, 0.362f,
|
||||
0.973f, 0.059f, 0.654f, 0.987f, 0.473f, 0.455f, 0.283f, 0.416f,
|
||||
0.789f, 0.123f, 0.532f, 0.819f, 0.732f, 0.850f, 0.987f, 0.321f
|
||||
};
|
||||
|
||||
expected = {0.43775f, 0.49475f, 0.4005f, 0.48975f, 0.339f, 0.654f,
|
||||
0.45675f, 0.303f, 0.654f, 0.56975f, 0.57025f, 0.555f};
|
||||
|
||||
runTest();
|
||||
}
|
||||
|
||||
TEST_F(AvgPoolingLayerTest, AvgPoolForwardNonSquarePoolingTest) {
|
||||
inputSize = {4, 4};
|
||||
nChannels = 2;
|
||||
poolingSize = {2, 3}; // Non-square pooling
|
||||
stride = {2, 2};
|
||||
padding = {0, 0};
|
||||
|
||||
input = {// Channel 0
|
||||
0.573f, 0.619f, 0.732f, 0.055f, 0.243f, 0.316f, 0.573f, 0.619f,
|
||||
0.712f, 0.055f, 0.243f, 0.316f, 0.573f, 0.619f, 0.742f, 0.055f,
|
||||
// Channel 1
|
||||
0.473f, 0.919f, 0.107f, 0.073f, 0.073f, 0.362f, 0.973f, 0.059f,
|
||||
0.473f, 0.455f, 0.283f, 0.416f, 0.532f, 0.819f, 0.732f, 0.850f
|
||||
};
|
||||
|
||||
expected = {0.50933f, 0.49067f, 0.4845f, 0.549f};
|
||||
|
||||
runTest();
|
||||
}
|
||||
|
||||
TEST_F(AvgPoolingLayerTest, AvgPoolForwardNonSquareStrideTest) {
|
||||
inputSize = {4, 4};
|
||||
nChannels = 2;
|
||||
poolingSize = {2, 2};
|
||||
stride = {1, 2}; // Non-square stride
|
||||
padding = {0, 0};
|
||||
|
||||
input = {// Channel 0
|
||||
0.573f, 0.619f, 0.732f, 0.055f, 0.243f, 0.316f, 0.573f, 0.619f,
|
||||
0.712f, 0.055f, 0.243f, 0.316f, 0.573f, 0.619f, 0.742f, 0.055f,
|
||||
// Channel 1
|
||||
0.473f, 0.919f, 0.107f, 0.073f, 0.073f, 0.362f, 0.973f, 0.059f,
|
||||
0.473f, 0.455f, 0.283f, 0.416f, 0.532f, 0.819f, 0.732f, 0.850f
|
||||
};
|
||||
|
||||
expected = {0.43775f, 0.49475f, 0.3315f, 0.43775f, 0.48975f, 0.339f,
|
||||
0.45675f, 0.303f, 0.34075f, 0.43275f, 0.56975f, 0.57025f};
|
||||
|
||||
runTest();
|
||||
}
|
||||
|
||||
TEST_F(AvgPoolingLayerTest, AvgPoolForwardNonSquarePaddingTest) {
|
||||
inputSize = {4, 4};
|
||||
nChannels = 2;
|
||||
poolingSize = {2, 2};
|
||||
stride = {2, 2};
|
||||
padding = {1, 0}; // Non-square padding
|
||||
|
||||
input = {// Channel 0
|
||||
0.573f, 0.619f, 0.732f, 0.055f, 0.243f, 0.316f, 0.573f, 0.619f,
|
||||
0.712f, 0.055f, 0.243f, 0.316f, 0.573f, 0.619f, 0.742f, 0.055f,
|
||||
// Channel 1
|
||||
0.473f, 0.919f, 0.107f, 0.073f, 0.073f, 0.362f, 0.973f, 0.059f,
|
||||
0.473f, 0.455f, 0.283f, 0.416f, 0.532f, 0.819f, 0.732f, 0.850f
|
||||
};
|
||||
|
||||
expected = {0.298f, 0.19675f, 0.3315f, 0.43775f, 0.298f, 0.19925f,
|
||||
0.348f, 0.045f, 0.34075f, 0.43275f, 0.33775f, 0.3955f};
|
||||
|
||||
runTest();
|
||||
}
|
||||
|
||||
|
||||
class AdaptiveAvgPoolingLayerTest : public ::testing::Test {
|
||||
protected:
|
||||
shape2d inputSize;
|
||||
shape2d outputSize;
|
||||
int nChannels;
|
||||
std::vector<float> input;
|
||||
std::vector<float> expected;
|
||||
|
||||
float* d_input;
|
||||
float* d_output;
|
||||
CUDANet::Layers::AdaptiveAvgPooling2d* adaptiveAvgPoolingLayer;
|
||||
|
||||
virtual void SetUp() override {
|
||||
d_input = nullptr;
|
||||
d_output = nullptr;
|
||||
adaptiveAvgPoolingLayer = nullptr;
|
||||
}
|
||||
|
||||
virtual void TearDown() override {
|
||||
cudaFree(d_input);
|
||||
}
|
||||
|
||||
void runTest() {
|
||||
cudaError_t cudaStatus;
|
||||
|
||||
adaptiveAvgPoolingLayer = new CUDANet::Layers::AdaptiveAvgPooling2d(
|
||||
inputSize, nChannels, outputSize, CUDANet::Layers::ActivationType::NONE
|
||||
);
|
||||
|
||||
cudaStatus = cudaMalloc(
|
||||
(void**)&d_input,
|
||||
sizeof(float) * inputSize.first * inputSize.second * nChannels
|
||||
);
|
||||
EXPECT_EQ(cudaStatus, cudaSuccess);
|
||||
|
||||
cudaStatus = cudaMemcpy(
|
||||
d_input, input.data(),
|
||||
sizeof(float) * inputSize.first * inputSize.second * nChannels,
|
||||
cudaMemcpyHostToDevice
|
||||
);
|
||||
EXPECT_EQ(cudaStatus, cudaSuccess);
|
||||
|
||||
d_output = adaptiveAvgPoolingLayer->forward(d_input);
|
||||
|
||||
int outputSize = adaptiveAvgPoolingLayer->getOutputSize();
|
||||
|
||||
std::vector<float> output(outputSize);
|
||||
cudaStatus = cudaMemcpy(
|
||||
output.data(), d_output, sizeof(float) * outputSize,
|
||||
cudaMemcpyDeviceToHost
|
||||
);
|
||||
EXPECT_EQ(cudaStatus, cudaSuccess);
|
||||
|
||||
for (int i = 0; i < output.size(); ++i) {
|
||||
EXPECT_NEAR(expected[i], output[i], 1e-5);
|
||||
}
|
||||
|
||||
delete adaptiveAvgPoolingLayer;
|
||||
}
|
||||
};
|
||||
|
||||
TEST_F(AdaptiveAvgPoolingLayerTest, AdaptiveAvgPoolForwardTest) {
|
||||
inputSize = {4, 4};
|
||||
outputSize = {2, 2};
|
||||
nChannels = 2;
|
||||
|
||||
input = {
|
||||
// clang-format off
|
||||
// Channel 0
|
||||
0.573f, 0.619f, 0.732f, 0.055f,
|
||||
0.243f, 0.316f, 0.573f, 0.619f,
|
||||
0.712f, 0.055f, 0.243f, 0.316f,
|
||||
0.573f, 0.619f, 0.742f, 0.055f,
|
||||
// Channel 1
|
||||
0.473f, 0.919f, 0.107f, 0.073f,
|
||||
0.073f, 0.362f, 0.973f, 0.059f,
|
||||
0.473f, 0.455f, 0.283f, 0.416f,
|
||||
0.532f, 0.819f, 0.732f, 0.850f
|
||||
// clang-format on
|
||||
};
|
||||
|
||||
expected = {
|
||||
// clang-format off
|
||||
// Channel 0
|
||||
0.43775f, 0.49475f,
|
||||
0.48975f, 0.339f,
|
||||
// Channel 1
|
||||
0.45675f, 0.303f,
|
||||
0.56975f, 0.57025f
|
||||
// clang-format on
|
||||
};
|
||||
|
||||
runTest();
|
||||
}
|
||||
147
test/cuda/layers/test_batch_norm.cu
Normal file
147
test/cuda/layers/test_batch_norm.cu
Normal file
@@ -0,0 +1,147 @@
|
||||
#include <cuda_runtime.h>
|
||||
#include <gtest/gtest.h>
|
||||
|
||||
#include <vector>
|
||||
|
||||
#include "activation.cuh"
|
||||
#include "batch_norm.cuh"
|
||||
|
||||
class BatchNormLayerTest : public ::testing::Test {
|
||||
protected:
|
||||
shape2d inputSize;
|
||||
int nChannels;
|
||||
std::vector<float> weights;
|
||||
std::vector<float> biases;
|
||||
|
||||
std::vector<float> runningMean;
|
||||
std::vector<float> runningVar;
|
||||
|
||||
std::vector<float> input;
|
||||
std::vector<float> expected;
|
||||
|
||||
float *d_input;
|
||||
float *d_output;
|
||||
CUDANet::Layers::BatchNorm2d *batchNorm;
|
||||
|
||||
virtual void SetUp() override {
|
||||
d_input = nullptr;
|
||||
d_output = nullptr;
|
||||
batchNorm = nullptr;
|
||||
}
|
||||
|
||||
virtual void TearDown() override {
|
||||
if (d_input) {
|
||||
cudaFree(d_input);
|
||||
}
|
||||
}
|
||||
|
||||
void runTest() {
|
||||
cudaError_t cudaStatus;
|
||||
|
||||
batchNorm = new CUDANet::Layers::BatchNorm2d(
|
||||
inputSize, nChannels, 1e-5f, CUDANet::Layers::ActivationType::NONE
|
||||
);
|
||||
|
||||
batchNorm->setWeights(weights.data());
|
||||
batchNorm->setBiases(biases.data());
|
||||
|
||||
batchNorm->setRunningMean(runningMean.data());
|
||||
batchNorm->setRunningVar(runningVar.data());
|
||||
|
||||
cudaStatus = cudaGetLastError();
|
||||
EXPECT_EQ(cudaStatus, cudaSuccess);
|
||||
|
||||
cudaStatus =
|
||||
cudaMalloc((void **)&d_input, sizeof(float) * input.size());
|
||||
EXPECT_EQ(cudaStatus, cudaSuccess);
|
||||
|
||||
cudaStatus = cudaMemcpy(
|
||||
d_input, input.data(), sizeof(float) * input.size(),
|
||||
cudaMemcpyHostToDevice
|
||||
);
|
||||
EXPECT_EQ(cudaStatus, cudaSuccess);
|
||||
|
||||
d_output = batchNorm->forward(d_input);
|
||||
|
||||
std::vector<float> output(input.size());
|
||||
cudaStatus = cudaMemcpy(
|
||||
output.data(), d_output, sizeof(float) * output.size(),
|
||||
cudaMemcpyDeviceToHost
|
||||
);
|
||||
EXPECT_EQ(cudaStatus, cudaSuccess);
|
||||
|
||||
for (int i = 0; i < output.size(); ++i) {
|
||||
EXPECT_NEAR(output[i], expected[i], 1e-5);
|
||||
}
|
||||
|
||||
delete batchNorm;
|
||||
}
|
||||
};
|
||||
|
||||
TEST_F(BatchNormLayerTest, BatchNormSmallForwardTest) {
|
||||
inputSize = {4, 4};
|
||||
nChannels = 2;
|
||||
|
||||
weights = {0.63508f, 0.64903f};
|
||||
biases = {0.25079f, 0.66841f};
|
||||
|
||||
runningMean = {0.5f, 0.5f};
|
||||
runningVar = {1.0f, 1.0f};
|
||||
|
||||
// clang-format off
|
||||
input = {
|
||||
// Channel 0
|
||||
0.38899f, 0.80478f, 0.48836f, 0.97381f,
|
||||
0.57508f, 0.60835f, 0.65467f, 0.00168f,
|
||||
0.65869f, 0.74235f, 0.17928f, 0.70349f,
|
||||
0.15524f, 0.38664f, 0.23411f, 0.7137f,
|
||||
// Channel 1
|
||||
0.32473f, 0.15698f, 0.314f, 0.60888f,
|
||||
0.80268f, 0.99766f, 0.93694f, 0.89237f,
|
||||
0.13449f, 0.27367f, 0.53036f, 0.18962f,
|
||||
0.57672f, 0.48364f, 0.10863f, 0.0571f
|
||||
};
|
||||
// clang-format on
|
||||
|
||||
expected = {0.18029f, 0.44435f, 0.2434f, 0.5517f, 0.29847f, 0.3196f,
|
||||
0.34902f, -0.06568f, 0.35157f, 0.4047f, 0.04711f, 0.38002f,
|
||||
0.03184f, 0.1788f, 0.08193f, 0.38651f, 0.55466f, 0.44578f,
|
||||
0.54769f, 0.73908f, 0.86486f, 0.9914f, 0.952f, 0.92307f,
|
||||
0.43118f, 0.52152f, 0.68811f, 0.46697f, 0.7182f, 0.65779f,
|
||||
0.4144f, 0.38096f};
|
||||
|
||||
runTest();
|
||||
}
|
||||
|
||||
TEST_F(BatchNormLayerTest, BatchNormNonSquareInputTest) {
|
||||
inputSize = {4, 6}; // Non-square input
|
||||
nChannels = 2;
|
||||
weights = {0.63508f, 0.64903f};
|
||||
biases = {0.25079f, 0.66841f};
|
||||
|
||||
runningMean = {0.5f, 0.5f};
|
||||
runningVar = {1.0f, 1.0f};
|
||||
|
||||
input = {// Channel 0
|
||||
0.38899f, 0.80478f, 0.48836f, 0.97381f, 0.21567f, 0.92312f,
|
||||
0.57508f, 0.60835f, 0.65467f, 0.00168f, 0.31567f, 0.71345f,
|
||||
0.65869f, 0.74235f, 0.17928f, 0.70349f, 0.12856f, 0.95645f,
|
||||
0.15524f, 0.38664f, 0.23411f, 0.7137f, 0.26789f, 0.83412f,
|
||||
// Channel 1
|
||||
0.32473f, 0.15698f, 0.314f, 0.60888f, 0.23145f, 0.78945f, 0.80268f,
|
||||
0.99766f, 0.93694f, 0.89237f, 0.61234f, 0.92314f, 0.13449f,
|
||||
0.27367f, 0.53036f, 0.18962f, 0.45623f, 0.14523f, 0.57672f,
|
||||
0.48364f, 0.10863f, 0.0571f, 0.78934f, 0.67545f
|
||||
};
|
||||
|
||||
expected = {0.18029f, 0.44435f, 0.2434f, 0.5517f, 0.07022f, 0.5195f,
|
||||
0.29847f, 0.3196f, 0.34902f, -0.06568f, 0.13373f, 0.38635f,
|
||||
0.35157f, 0.4047f, 0.04711f, 0.38002f, 0.0149f, 0.54067f,
|
||||
0.03184f, 0.1788f, 0.08193f, 0.38651f, 0.10338f, 0.46298f,
|
||||
0.55466f, 0.44578f, 0.54769f, 0.73908f, 0.49411f, 0.85627f,
|
||||
0.86486f, 0.9914f, 0.952f, 0.92307f, 0.74132f, 0.94304f,
|
||||
0.43118f, 0.52152f, 0.68811f, 0.46697f, 0.64f, 0.43815f,
|
||||
0.7182f, 0.65779f, 0.4144f, 0.38096f, 0.8562f, 0.78228f};
|
||||
|
||||
runTest();
|
||||
}
|
||||
54
test/cuda/layers/test_concat.cu
Normal file
54
test/cuda/layers/test_concat.cu
Normal file
@@ -0,0 +1,54 @@
|
||||
#include "concat.cuh"
|
||||
#include <gtest/gtest.h>
|
||||
#include <cuda_runtime.h>
|
||||
#include <vector>
|
||||
|
||||
TEST(ConcatLayerTest, Init) {
|
||||
|
||||
cudaError_t cudaStatus;
|
||||
|
||||
std::vector<float> inputA = {0.573f, 0.619f, 0.732f, 0.055f, 0.243f};
|
||||
std::vector<float> inputB = {0.123f, 0.321f, 0.456f, 0.789f, 0.654f, 0.123f};
|
||||
|
||||
CUDANet::Layers::Concat concat(5, 6);
|
||||
|
||||
float* d_inputA;
|
||||
float* d_inputB;
|
||||
|
||||
cudaStatus = cudaMalloc((void**)&d_inputA, sizeof(float) * 5);
|
||||
EXPECT_EQ(cudaStatus, cudaSuccess);
|
||||
|
||||
cudaStatus = cudaMalloc((void**)&d_inputB, sizeof(float) * 6);
|
||||
EXPECT_EQ(cudaStatus, cudaSuccess);
|
||||
|
||||
cudaStatus = cudaMemcpy(
|
||||
d_inputA, inputA.data(), sizeof(float) * 5, cudaMemcpyHostToDevice
|
||||
);
|
||||
EXPECT_EQ(cudaStatus, cudaSuccess);
|
||||
|
||||
cudaStatus = cudaMemcpy(
|
||||
d_inputB, inputB.data(), sizeof(float) * 6, cudaMemcpyHostToDevice
|
||||
);
|
||||
EXPECT_EQ(cudaStatus, cudaSuccess);
|
||||
|
||||
float* d_output = concat.forward(d_inputA, d_inputB);
|
||||
|
||||
std::vector<float> output(11);
|
||||
cudaStatus = cudaMemcpy(
|
||||
output.data(), d_output, sizeof(float) * 11, cudaMemcpyDeviceToHost
|
||||
);
|
||||
EXPECT_EQ(cudaStatus, cudaSuccess);
|
||||
|
||||
for (int i = 0; i < 5; ++i) {
|
||||
EXPECT_EQ(output[i], inputA[i]);
|
||||
}
|
||||
|
||||
for (int i = 0; i < 6; ++i) {
|
||||
EXPECT_EQ(output[i + 5], inputB[i]);
|
||||
}
|
||||
|
||||
cudaStatus = cudaFree(d_inputA);
|
||||
EXPECT_EQ(cudaStatus, cudaSuccess);
|
||||
cudaStatus = cudaFree(d_inputB);
|
||||
EXPECT_EQ(cudaStatus, cudaSuccess);
|
||||
}
|
||||
355
test/cuda/layers/test_conv2d.cu
Normal file
355
test/cuda/layers/test_conv2d.cu
Normal file
@@ -0,0 +1,355 @@
|
||||
#include <cuda_runtime_api.h>
|
||||
#include <gtest/gtest.h>
|
||||
|
||||
#include <iostream>
|
||||
|
||||
#include "conv2d.cuh"
|
||||
|
||||
class Conv2dTest : public ::testing::Test {
|
||||
protected:
|
||||
shape2d inputSize;
|
||||
int inputChannels;
|
||||
shape2d kernelSize;
|
||||
shape2d stride;
|
||||
int numFilters;
|
||||
shape2d paddingSize;
|
||||
CUDANet::Layers::ActivationType activationType;
|
||||
std::vector<float> input;
|
||||
std::vector<float> kernels;
|
||||
std::vector<float> expected;
|
||||
|
||||
float *d_input;
|
||||
float *d_output;
|
||||
CUDANet::Layers::Conv2d *conv2dLayer;
|
||||
|
||||
virtual void SetUp() override {
|
||||
d_input = nullptr;
|
||||
d_output = nullptr;
|
||||
conv2dLayer = nullptr;
|
||||
}
|
||||
|
||||
virtual void TearDown() override {
|
||||
if (d_input) {
|
||||
cudaFree(d_input);
|
||||
}
|
||||
delete conv2dLayer;
|
||||
}
|
||||
|
||||
void runTest() {
|
||||
cudaError_t cudaStatus;
|
||||
|
||||
conv2dLayer = new CUDANet::Layers::Conv2d(
|
||||
inputSize, inputChannels, kernelSize, stride, numFilters,
|
||||
paddingSize, activationType
|
||||
);
|
||||
|
||||
conv2dLayer->setWeights(kernels.data());
|
||||
|
||||
cudaStatus =
|
||||
cudaMalloc((void **)&d_input, sizeof(float) * input.size());
|
||||
EXPECT_EQ(cudaStatus, cudaSuccess);
|
||||
|
||||
cudaStatus = cudaMemcpy(
|
||||
d_input, input.data(), sizeof(float) * input.size(),
|
||||
cudaMemcpyHostToDevice
|
||||
);
|
||||
EXPECT_EQ(cudaStatus, cudaSuccess);
|
||||
|
||||
d_output = conv2dLayer->forward(d_input);
|
||||
|
||||
int outputHeight =
|
||||
(inputSize.first - kernelSize.first + 2 * paddingSize.first) /
|
||||
stride.first +
|
||||
1;
|
||||
int outputWidth =
|
||||
(inputSize.second - kernelSize.second + 2 * paddingSize.second) /
|
||||
stride.second +
|
||||
1;
|
||||
int outputSize = outputHeight * outputWidth * numFilters;
|
||||
EXPECT_EQ(outputSize, conv2dLayer->getOutputSize());
|
||||
|
||||
std::vector<float> output(outputSize);
|
||||
cudaStatus = cudaMemcpy(
|
||||
output.data(), d_output, sizeof(float) * output.size(),
|
||||
cudaMemcpyDeviceToHost
|
||||
);
|
||||
EXPECT_EQ(cudaStatus, cudaSuccess);
|
||||
|
||||
for (int i = 0; i < output.size(); ++i) {
|
||||
EXPECT_NEAR(expected[i], output[i], 1e-5f);
|
||||
}
|
||||
}
|
||||
};
|
||||
|
||||
TEST_F(Conv2dTest, SimpleTest) {
|
||||
inputSize = {4, 4};
|
||||
inputChannels = 1;
|
||||
kernelSize = {2, 2};
|
||||
stride = {1, 1};
|
||||
numFilters = 1;
|
||||
paddingSize = {0, 0};
|
||||
activationType = CUDANet::Layers::ActivationType::NONE;
|
||||
|
||||
input = {
|
||||
// clang-format off
|
||||
1.0f, 2.0f, 3.0f, 4.0f,
|
||||
5.0f, 6.0f, 7.0f, 8.0f,
|
||||
9.0f, 10.0f, 11.0f, 12.0f,
|
||||
13.0f, 14.0f, 15.0f, 16.0f
|
||||
// clang-format on
|
||||
};
|
||||
kernels = {
|
||||
// clang-format off
|
||||
1.0f,2.0f,
|
||||
3.0f, 4.0f
|
||||
// clang-format on
|
||||
};
|
||||
expected = {44.0f, 54.0f, 64.0f, 84.0f, 94.0f,
|
||||
104.0f, 124.0f, 134.0f, 144.0f};
|
||||
|
||||
runTest();
|
||||
}
|
||||
|
||||
TEST_F(Conv2dTest, PaddedTest) {
|
||||
inputSize = {5, 5};
|
||||
inputChannels = 3;
|
||||
kernelSize = {3, 3};
|
||||
stride = {1, 1};
|
||||
numFilters = 2;
|
||||
|
||||
int paddingFirst =
|
||||
CUDANET_SAME_PADDING(inputSize.first, kernelSize.first, stride.first);
|
||||
int paddingSecond = CUDANET_SAME_PADDING(
|
||||
inputSize.second, kernelSize.second, stride.second
|
||||
);
|
||||
paddingSize = {paddingFirst, paddingSecond};
|
||||
|
||||
activationType = CUDANet::Layers::ActivationType::NONE;
|
||||
|
||||
// clang-format off
|
||||
input = {
|
||||
// Channel 1
|
||||
0.823f, 0.217f, 0.435f, 0.981f, 0.742f,
|
||||
0.109f, 0.518f, 0.374f, 0.681f, 0.147f,
|
||||
0.956f, 0.729f, 0.654f, 0.087f, 0.392f,
|
||||
0.784f, 0.921f, 0.543f, 0.231f, 0.816f,
|
||||
0.472f, 0.614f, 0.102f, 0.987f, 0.398f,
|
||||
// Channel 2
|
||||
0.051f, 0.756f, 0.841f, 0.293f, 0.128f,
|
||||
0.417f, 0.632f, 0.095f, 0.184f, 0.529f,
|
||||
0.871f, 0.958f, 0.213f, 0.347f, 0.725f,
|
||||
0.461f, 0.012f, 0.278f, 0.195f, 0.649f,
|
||||
0.853f, 0.707f, 0.988f, 0.988f, 0.322f,
|
||||
// Channel 3
|
||||
0.345f, 0.123f, 0.789f, 0.123f, 0.456f,
|
||||
0.456f, 0.789f, 0.123f, 0.345f, 0.123f,
|
||||
0.789f, 0.123f, 0.345f, 0.123f, 0.456f,
|
||||
0.123f, 0.345f, 0.123f, 0.789f, 0.123f,
|
||||
0.345f, 0.123f, 0.789f, 0.123f, 0.456f
|
||||
};
|
||||
|
||||
kernels = {
|
||||
// Filter 1, Channel 1
|
||||
0.128f, 0.754f, 0.987f,
|
||||
0.321f, 0.412f, 0.635f,
|
||||
0.298f, 0.017f, 0.845f,
|
||||
// Filter 1, Channel 2
|
||||
0.514f, 0.729f, 0.952f,
|
||||
0.684f, 0.378f, 0.159f,
|
||||
0.823f, 0.547f, 0.216f,
|
||||
// Filter 1, Channel 3
|
||||
0.983f, 0.231f, 0.456f,
|
||||
0.178f, 0.654f, 0.821f,
|
||||
0.345f, 0.987f, 0.123f,
|
||||
// Filter 2, Channel 1
|
||||
0.789f, 0.543f, 0.210f,
|
||||
0.012f, 0.371f, 0.638f,
|
||||
0.456f, 0.198f, 0.907f,
|
||||
// Filter 2, Channel 2
|
||||
0.101f, 0.432f, 0.759f,
|
||||
0.234f, 0.567f, 0.890f,
|
||||
0.543f, 0.876f, 0.219f,
|
||||
// Filter 2, Channel 3
|
||||
0.345f, 0.678f, 0.011f,
|
||||
0.678f, 0.011f, 0.345f,
|
||||
0.011f, 0.345f, 0.678f
|
||||
};
|
||||
// clang-format on
|
||||
|
||||
// Generated by tools/generate_conv2d_test.py
|
||||
expected = {
|
||||
// Channel 1
|
||||
2.29426f, 3.89173f, 4.17634f, 3.25501f, 2.07618f, 5.41483f, 7.09971f,
|
||||
6.39811f, 5.71432f, 3.10928f, 5.12973f, 6.29638f, 5.26962f, 5.21997f,
|
||||
3.05852f, 6.17517f, 7.19311f, 6.69771f, 6.2142f, 4.03242f, 3.3792f,
|
||||
4.36444f, 4.396f, 4.69905f, 3.62061f,
|
||||
// Channel 2
|
||||
2.87914f, 3.71743f, 3.51854f, 2.98413f, 1.46579f, 4.94951f, 6.18983f,
|
||||
4.98187f, 4.38372f, 3.35386f, 5.0364f, 5.3756f, 4.05993f, 4.89299f,
|
||||
2.78625f, 5.33763f, 5.80899f, 5.89785f, 5.51095f, 3.74287f, 2.64053f,
|
||||
4.05895f, 3.96482f, 4.30177f, 1.94269f
|
||||
};
|
||||
|
||||
runTest();
|
||||
}
|
||||
|
||||
TEST_F(Conv2dTest, StridedPaddedTest) {
|
||||
inputSize = {5, 5};
|
||||
inputChannels = 2;
|
||||
kernelSize = {3, 3};
|
||||
stride = {2, 2};
|
||||
numFilters = 2;
|
||||
|
||||
int paddingFirst =
|
||||
CUDANET_SAME_PADDING(inputSize.first, kernelSize.second, stride.first);
|
||||
int paddingSecond = CUDANET_SAME_PADDING(
|
||||
inputSize.second, kernelSize.second, stride.second
|
||||
);
|
||||
paddingSize = {paddingFirst, paddingSecond};
|
||||
|
||||
activationType = CUDANet::Layers::ActivationType::RELU;
|
||||
|
||||
// clang-format off
|
||||
input = {
|
||||
// Channel 1
|
||||
0.946f, 0.879f, 0.382f, 0.542f, 0.453f,
|
||||
0.128f, 0.860f, 0.778f, 0.049f, 0.974f,
|
||||
0.400f, 0.874f, 0.161f, 0.271f, 0.580f,
|
||||
0.373f, 0.078f, 0.366f, 0.396f, 0.181f,
|
||||
0.246f, 0.112f, 0.179f, 0.979f, 0.026f,
|
||||
// Channel 2
|
||||
0.598f, 0.458f, 0.776f, 0.213f, 0.199f,
|
||||
0.853f, 0.170f, 0.609f, 0.269f, 0.777f,
|
||||
0.776f, 0.694f, 0.430f, 0.238f, 0.968f,
|
||||
0.473f, 0.303f, 0.084f, 0.785f, 0.444f,
|
||||
0.464f, 0.413f, 0.779f, 0.298f, 0.783f
|
||||
};
|
||||
kernels = {
|
||||
// Filter 1, Channel 1
|
||||
0.744f, 0.745f, 0.641f,
|
||||
0.164f, 0.157f, 0.127f,
|
||||
0.732f, 0.761f, 0.601f,
|
||||
// Filter 1, Channel 2
|
||||
0.475f, 0.335f, 0.499f,
|
||||
0.833f, 0.793f, 0.176f,
|
||||
0.822f, 0.163f, 0.175f,
|
||||
// Filter 2, Channel 1
|
||||
0.918f, 0.340f, 0.497f,
|
||||
0.233f, 0.218f, 0.847f,
|
||||
0.931f, 0.926f, 0.199f,
|
||||
// Filter 2, Channel 2
|
||||
0.510f, 0.432f, 0.567f,
|
||||
0.236f, 0.397f, 0.739f,
|
||||
0.939f, 0.891f, 0.006f
|
||||
};
|
||||
// clang-format on
|
||||
|
||||
expected = {// Channel 1
|
||||
0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 1.59803f, 2.84444f, 1.6201f,
|
||||
0.0f, 0.0f, 2.38937f, 3.80762f, 3.39679f, 0.0f, 0.0f, 1.13102f,
|
||||
2.33335f, 1.98488f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f,
|
||||
// Channel 2
|
||||
0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 2.57732f, 3.55543f,
|
||||
2.24675f, 0.0f, 0.0f, 3.36842f, 3.41373f, 3.14804f, 0.0f, 0.0f,
|
||||
1.17963f, 2.55005f, 1.63218f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f
|
||||
};
|
||||
|
||||
runTest();
|
||||
}
|
||||
|
||||
TEST_F(Conv2dTest, NonSquareInputTest) {
|
||||
inputSize = {4, 6}; // Non-square input
|
||||
inputChannels = 1;
|
||||
kernelSize = {2, 2};
|
||||
stride = {1, 1};
|
||||
numFilters = 1;
|
||||
paddingSize = {0, 0};
|
||||
activationType = CUDANet::Layers::ActivationType::NONE;
|
||||
|
||||
input = {
|
||||
// clang-format off
|
||||
0.946f, 0.879f, 0.382f, 0.542f, 0.453f, 0.128f,
|
||||
0.128f, 0.860f, 0.778f, 0.049f, 0.974f, 0.400f,
|
||||
0.400f, 0.874f, 0.161f, 0.271f, 0.580f, 0.373f,
|
||||
0.078f, 0.366f, 0.396f, 0.181f, 0.246f, 0.112f
|
||||
// clang-format on
|
||||
};
|
||||
kernels = {0.744f, 0.745f, 0.164f, 0.157f};
|
||||
expected = {1.51469f, 1.20175f, 0.82328f, 0.90169f, 0.65493f,
|
||||
0.93875f, 1.38806f, 0.68429f, 0.89759f, 1.17634f,
|
||||
1.01898f, 0.8924f, 0.41504f, 0.70203f, 0.76733f};
|
||||
|
||||
runTest();
|
||||
}
|
||||
|
||||
TEST_F(Conv2dTest, NonSquareKernelTest) {
|
||||
inputSize = {4, 4};
|
||||
inputChannels = 1;
|
||||
kernelSize = {1, 3}; // Non-square kernel
|
||||
stride = {1, 1};
|
||||
numFilters = 1;
|
||||
paddingSize = {0, 0};
|
||||
activationType = CUDANet::Layers::ActivationType::NONE;
|
||||
|
||||
input = {
|
||||
// clang-format off
|
||||
0.946f, 0.879f, 0.382f, 0.542f,
|
||||
0.128f, 0.860f, 0.778f, 0.049f,
|
||||
0.400f, 0.874f, 0.161f, 0.271f,
|
||||
0.078f, 0.366f, 0.396f, 0.181f
|
||||
// clang-format on
|
||||
};
|
||||
kernels = {0.744f, 0.745f, 0.164f};
|
||||
expected = {1.42133f, 1.02745f, 0.86352f, 1.22749f,
|
||||
0.97513f, 0.81465f, 0.39565f, 0.59701f};
|
||||
|
||||
runTest();
|
||||
}
|
||||
|
||||
TEST_F(Conv2dTest, NonSquareStrideTest) {
|
||||
inputSize = {4, 4};
|
||||
inputChannels = 1;
|
||||
kernelSize = {2, 2};
|
||||
stride = {1, 2}; // Non-square stride
|
||||
numFilters = 1;
|
||||
paddingSize = {0, 0};
|
||||
activationType = CUDANet::Layers::ActivationType::NONE;
|
||||
|
||||
input = {
|
||||
// clang-format off
|
||||
0.946f, 0.879f, 0.382f, 0.542f,
|
||||
0.128f, 0.860f, 0.778f, 0.049f,
|
||||
0.400f, 0.874f, 0.161f, 0.271f,
|
||||
0.078f, 0.366f, 0.396f, 0.181f
|
||||
// clang-format on
|
||||
};
|
||||
kernels = {0.144f, 0.745f, 0.964f, 0.164f};
|
||||
expected = {1.05551f, 1.21683f, 1.18807f, 0.34818f, 0.84395f, 0.63651f};
|
||||
|
||||
runTest();
|
||||
}
|
||||
|
||||
TEST_F(Conv2dTest, NonSquarePaddingTest) {
|
||||
inputSize = {4, 4};
|
||||
inputChannels = 1;
|
||||
kernelSize = {2, 2};
|
||||
stride = {1, 1};
|
||||
numFilters = 1;
|
||||
paddingSize = {1, 2}; // Non-square padding
|
||||
activationType = CUDANet::Layers::ActivationType::NONE;
|
||||
|
||||
input = {
|
||||
// clang-format off
|
||||
0.946f, 0.879f, 0.382f, 0.542f,
|
||||
0.128f, 0.860f, 0.778f, 0.049f,
|
||||
0.400f, 0.874f, 0.161f, 0.271f,
|
||||
0.078f, 0.366f, 0.396f, 0.181f
|
||||
// clang-format on
|
||||
};
|
||||
kernels = {0.144f, 0.745f, 0.964f, 0.164f};
|
||||
expected = {0.0f, 0.15514f, 1.0561f, 0.91f, 0.45714f, 0.52249f, 0.0f,
|
||||
0.0f, 0.72576f, 1.05551f, 1.3678f, 1.21683f, 0.12528f, 0.0f,
|
||||
0.0f, 0.16096f, 1.18807f, 1.57239f, 0.34818f, 0.2683f, 0.0f,
|
||||
0.0f, 0.31079f, 0.84395f, 0.66357f, 0.63651f, 0.21351f, 0.0f,
|
||||
0.0f, 0.05811f, 0.2839f, 0.34772f, 0.19187f, 0.02606f, 0.0f};
|
||||
}
|
||||
244
test/cuda/layers/test_dense.cu
Normal file
244
test/cuda/layers/test_dense.cu
Normal file
@@ -0,0 +1,244 @@
|
||||
#include <cuda_runtime_api.h>
|
||||
#include <gtest/gtest.h>
|
||||
|
||||
#include <iostream>
|
||||
|
||||
#include "activation.cuh"
|
||||
#include "dense.cuh"
|
||||
|
||||
class DenseLayerTest : public ::testing::Test {
|
||||
protected:
|
||||
CUDANet::Layers::Dense commonTestSetup(
|
||||
int inputSize,
|
||||
int outputSize,
|
||||
std::vector<float>& input,
|
||||
float* weights,
|
||||
float* biases,
|
||||
float*& d_input,
|
||||
CUDANet::Layers::ActivationType activationType
|
||||
) {
|
||||
// Create Dense layer
|
||||
CUDANet::Layers::Dense denseLayer(inputSize, outputSize, activationType);
|
||||
|
||||
// Set weights and biases
|
||||
denseLayer.setWeights(weights);
|
||||
denseLayer.setBiases(biases);
|
||||
|
||||
// Allocate device memory
|
||||
cudaStatus = cudaMalloc((void**)&d_input, sizeof(float) * input.size());
|
||||
EXPECT_EQ(cudaStatus, cudaSuccess);
|
||||
|
||||
// Copy input to device
|
||||
cudaStatus = cudaMemcpy(
|
||||
d_input, input.data(), sizeof(float) * input.size(),
|
||||
cudaMemcpyHostToDevice
|
||||
);
|
||||
EXPECT_EQ(cudaStatus, cudaSuccess);
|
||||
|
||||
return denseLayer;
|
||||
}
|
||||
|
||||
void commonTestTeardown(float* d_input) {
|
||||
// Free device memory
|
||||
cudaFree(d_input);
|
||||
|
||||
}
|
||||
|
||||
cudaError_t cudaStatus;
|
||||
};
|
||||
|
||||
TEST_F(DenseLayerTest, setWeights) {
|
||||
int inputSize = 4;
|
||||
int outputSize = 5;
|
||||
|
||||
// clang-format off
|
||||
std::vector<float> weights = {
|
||||
0.5f, 1.0f, 0.2f, 0.8f,
|
||||
1.2f, 0.3f, 1.5f, 0.4f,
|
||||
0.7f, 1.8f, 0.9f, 0.1f,
|
||||
0.4f, 2.0f, 0.6f, 1.1f,
|
||||
1.3f, 0.5f, 0.0f, 1.7f
|
||||
};
|
||||
// clang-format on
|
||||
|
||||
CUDANet::Layers::Dense denseLayer(
|
||||
inputSize, outputSize, CUDANet::Layers::ActivationType::SIGMOID
|
||||
);
|
||||
|
||||
denseLayer.setWeights(weights.data());
|
||||
}
|
||||
|
||||
TEST_F(DenseLayerTest, ForwardUnitWeightMatrixLinear) {
|
||||
int inputSize = 3;
|
||||
int outputSize = 3;
|
||||
|
||||
std::vector<float> input = {1.0f, 2.0f, 3.0f};
|
||||
|
||||
std::vector<float> weights(outputSize * inputSize, 0.0f);
|
||||
for (int i = 0; i < inputSize; ++i) {
|
||||
for (int j = 0; j < outputSize; ++j) {
|
||||
if (i == j) {
|
||||
weights[i * outputSize + j] = 1.0f;
|
||||
}
|
||||
}
|
||||
}
|
||||
std::vector<float> biases(outputSize, 1.0f);
|
||||
|
||||
float* d_input;
|
||||
float* d_output;
|
||||
|
||||
CUDANet::Layers::Dense denseLayer = commonTestSetup(
|
||||
inputSize, outputSize, input, weights.data(), biases.data(), d_input,
|
||||
CUDANet::Layers::ActivationType::NONE
|
||||
);
|
||||
d_output = denseLayer.forward(d_input);
|
||||
|
||||
std::vector<float> output(outputSize);
|
||||
cudaStatus = cudaMemcpy(
|
||||
output.data(), d_output, sizeof(float) * outputSize,
|
||||
cudaMemcpyDeviceToHost
|
||||
);
|
||||
EXPECT_EQ(cudaStatus, cudaSuccess);
|
||||
|
||||
// Check if the output is a zero vector
|
||||
EXPECT_FLOAT_EQ(output[0], 2.0f);
|
||||
EXPECT_FLOAT_EQ(output[1], 3.0f);
|
||||
EXPECT_FLOAT_EQ(output[2], 4.0f);
|
||||
|
||||
commonTestTeardown(d_input);
|
||||
}
|
||||
|
||||
TEST_F(DenseLayerTest, ForwardRandomWeightMatrixRelu) {
|
||||
int inputSize = 5;
|
||||
int outputSize = 4;
|
||||
|
||||
std::vector<float> input = {1.0f, 2.0f, 3.0f, 4.0f, -5.0f};
|
||||
|
||||
// clang-format off
|
||||
std::vector<float> weights = {
|
||||
0.5f, 1.2f, 0.7f, 0.4f,
|
||||
1.3f, 1.0f, 0.3f, 1.8f,
|
||||
2.0f, 0.5f, 0.2f, 1.5f,
|
||||
0.9f, 0.6f, 0.0f, 0.8f,
|
||||
0.4f, 0.1f, 1.1f, 1.7f
|
||||
};
|
||||
std::vector<float> biases = {0.2f, 0.5f, 0.7f, -1.1f};
|
||||
// clang-format on
|
||||
|
||||
float* d_input;
|
||||
float* d_output;
|
||||
|
||||
CUDANet::Layers::Dense denseLayer = commonTestSetup(
|
||||
inputSize, outputSize, input, weights.data(), biases.data(), d_input,
|
||||
CUDANet::Layers::ActivationType::RELU
|
||||
);
|
||||
|
||||
d_output = denseLayer.forward(d_input);
|
||||
|
||||
std::vector<float> output(outputSize);
|
||||
cudaStatus = cudaMemcpy(
|
||||
output.data(), d_output, sizeof(float) * outputSize,
|
||||
cudaMemcpyDeviceToHost
|
||||
);
|
||||
EXPECT_EQ(cudaStatus, cudaSuccess);
|
||||
|
||||
// weights * inputs = 0.1, 12.5, 8.3, -2.2
|
||||
// + biases = 0.3, 13, 9, -3.3
|
||||
|
||||
std::vector<float> expectedOutput = {0.3f, 13.0f, 9.0f, 0.0f};
|
||||
for (int i = 0; i < outputSize; ++i) {
|
||||
EXPECT_NEAR(
|
||||
output[i], expectedOutput[i], 1e-4
|
||||
); // Allow small tolerance for floating-point comparison
|
||||
}
|
||||
|
||||
commonTestTeardown(d_input);
|
||||
}
|
||||
|
||||
TEST_F(DenseLayerTest, ForwardRandomWeightMatrixSigmoid) {
|
||||
int inputSize = 5;
|
||||
int outputSize = 4;
|
||||
|
||||
// clang-format off
|
||||
std::vector<float> input = {0.1f, 0.2f, 0.3f, 0.4f, 0.5f};
|
||||
std::vector<float> weights = {
|
||||
0.8f, 0.7f, 0.7f, 0.3f, 0.8f,
|
||||
0.1f, 0.4f, 0.8f, 0.0f, 0.2f,
|
||||
0.2f, 0.5f, 0.7f, 0.3f, 0.0f,
|
||||
0.1f, 0.7f, 0.6f, 1.0f, 0.4f
|
||||
};
|
||||
std::vector<float> biases = {0.1f, 0.2f, 0.3f, 0.4f};
|
||||
// clang-format on
|
||||
|
||||
float* d_input;
|
||||
float* d_output;
|
||||
|
||||
CUDANet::Layers::Dense denseLayer = commonTestSetup(
|
||||
inputSize, outputSize, input, weights.data(), biases.data(), d_input,
|
||||
CUDANet::Layers::ActivationType::SIGMOID
|
||||
);
|
||||
|
||||
d_output = denseLayer.forward(d_input);
|
||||
|
||||
std::vector<float> output(outputSize);
|
||||
cudaStatus = cudaMemcpy(
|
||||
output.data(), d_output, sizeof(float) * outputSize,
|
||||
cudaMemcpyDeviceToHost
|
||||
);
|
||||
EXPECT_EQ(cudaStatus, cudaSuccess);
|
||||
|
||||
std::vector<float> expectedOutput = {
|
||||
0.740775f, 0.652489f, 0.679179f, 0.790841f
|
||||
};
|
||||
|
||||
for (int i = 0; i < outputSize; ++i) {
|
||||
EXPECT_NEAR(output[i], expectedOutput[i], 1e-5);
|
||||
}
|
||||
|
||||
commonTestTeardown(d_input);
|
||||
}
|
||||
|
||||
TEST_F(DenseLayerTest, ForwardRandomWeightMatrixSoftmax) {
|
||||
int inputSize = 5;
|
||||
int outputSize = 4;
|
||||
|
||||
std::vector<float> input = {0.1f, 0.2f, 0.3f, 0.4f, 0.5f};
|
||||
std::vector<float> weights = {
|
||||
0.5f, 0.1f, 0.1f, 0.4f, 0.2f,
|
||||
0.4f, 0.3f, 0.9f, 0.0f, 0.8f,
|
||||
0.8f, 0.4f, 0.6f, 0.2f, 0.0f,
|
||||
0.1f, 0.7f, 0.3f, 1.0f, 0.1f
|
||||
};
|
||||
std::vector<float> biases = {0.1f, 0.2f, 0.3f, 0.4f};
|
||||
|
||||
float* d_input;
|
||||
float* d_output;
|
||||
|
||||
CUDANet::Layers::Dense denseLayer = commonTestSetup(
|
||||
inputSize, outputSize, input, weights.data(), biases.data(), d_input,
|
||||
CUDANet::Layers::ActivationType::SOFTMAX
|
||||
);
|
||||
|
||||
d_output = denseLayer.forward(d_input);
|
||||
|
||||
std::vector<float> output(outputSize);
|
||||
cudaStatus = cudaMemcpy(
|
||||
output.data(), d_output, sizeof(float) * outputSize,
|
||||
cudaMemcpyDeviceToHost
|
||||
);
|
||||
EXPECT_EQ(cudaStatus, cudaSuccess);
|
||||
|
||||
std::vector<float> expected = {0.17124f, 0.28516f, 0.22208f, 0.32152f};
|
||||
|
||||
float sum = 0.0f;
|
||||
|
||||
for (int i = 0; i < outputSize; ++i) {
|
||||
sum += output[i];
|
||||
EXPECT_NEAR(output[i], expected[i], 1e-5f);
|
||||
}
|
||||
|
||||
EXPECT_NEAR(sum, 1.0f, 1e-5f);
|
||||
|
||||
commonTestTeardown(d_input);
|
||||
|
||||
}
|
||||
19
test/cuda/layers/test_input.cu
Normal file
19
test/cuda/layers/test_input.cu
Normal file
@@ -0,0 +1,19 @@
|
||||
#include <cuda_runtime_api.h>
|
||||
#include <gtest/gtest.h>
|
||||
|
||||
#include "input.cuh"
|
||||
|
||||
TEST(InputLayerTest, InputForward) {
|
||||
std::vector<float> input = {0.573f, 0.619f, 0.732f, 0.055f, 0.243f, 0.316f};
|
||||
CUDANet::Layers::Input inputLayer(6);
|
||||
float* d_output = inputLayer.forward(input.data());
|
||||
|
||||
std::vector<float> output(6);
|
||||
cudaError_t cudaStatus = cudaMemcpy(
|
||||
output.data(), d_output, sizeof(float) * 6, cudaMemcpyDeviceToHost
|
||||
);
|
||||
EXPECT_EQ(cudaStatus, cudaSuccess);
|
||||
EXPECT_EQ(input, output);
|
||||
|
||||
|
||||
}
|
||||
201
test/cuda/layers/test_max_pooling.cu
Normal file
201
test/cuda/layers/test_max_pooling.cu
Normal file
@@ -0,0 +1,201 @@
|
||||
#include <cuda_runtime.h>
|
||||
#include <gtest/gtest.h>
|
||||
|
||||
#include <vector>
|
||||
|
||||
#include "max_pooling.cuh"
|
||||
|
||||
class MaxPoolingLayerTest : public ::testing::Test {
|
||||
protected:
|
||||
shape2d inputSize;
|
||||
int nChannels;
|
||||
shape2d poolingSize;
|
||||
shape2d stride;
|
||||
shape2d padding;
|
||||
std::vector<float> input;
|
||||
std::vector<float> expected;
|
||||
|
||||
float *d_input;
|
||||
float *d_output;
|
||||
CUDANet::Layers::MaxPooling2d *maxPoolingLayer;
|
||||
|
||||
virtual void SetUp() override {
|
||||
d_input = nullptr;
|
||||
d_output = nullptr;
|
||||
maxPoolingLayer = nullptr;
|
||||
}
|
||||
|
||||
virtual void TearDown() override {
|
||||
if (d_input) {
|
||||
cudaFree(d_input);
|
||||
}
|
||||
delete maxPoolingLayer;
|
||||
}
|
||||
|
||||
void runTest() {
|
||||
cudaError_t cudaStatus;
|
||||
|
||||
maxPoolingLayer = new CUDANet::Layers::MaxPooling2d(
|
||||
inputSize, nChannels, poolingSize, stride, padding,
|
||||
CUDANet::Layers::ActivationType::NONE
|
||||
);
|
||||
|
||||
cudaStatus =
|
||||
cudaMalloc((void **)&d_input, sizeof(float) * input.size());
|
||||
EXPECT_EQ(cudaStatus, cudaSuccess);
|
||||
|
||||
cudaStatus = cudaMemcpy(
|
||||
d_input, input.data(), sizeof(float) * input.size(),
|
||||
cudaMemcpyHostToDevice
|
||||
);
|
||||
EXPECT_EQ(cudaStatus, cudaSuccess);
|
||||
|
||||
d_output = maxPoolingLayer->forward(d_input);
|
||||
|
||||
int outputSize = maxPoolingLayer->getOutputSize();
|
||||
|
||||
std::vector<float> output(outputSize);
|
||||
cudaStatus = cudaMemcpy(
|
||||
output.data(), d_output, sizeof(float) * output.size(),
|
||||
cudaMemcpyDeviceToHost
|
||||
);
|
||||
EXPECT_EQ(cudaStatus, cudaSuccess);
|
||||
|
||||
for (int i = 0; i < output.size(); ++i) {
|
||||
EXPECT_FLOAT_EQ(expected[i], output[i]);
|
||||
}
|
||||
}
|
||||
};
|
||||
|
||||
TEST_F(MaxPoolingLayerTest, MaxPoolForwardTest) {
|
||||
inputSize = {4, 4};
|
||||
nChannels = 2;
|
||||
poolingSize = {2, 2};
|
||||
stride = {2, 2};
|
||||
padding = {0, 0};
|
||||
|
||||
input = {
|
||||
// clang-format off
|
||||
// Channel 0
|
||||
0.573f, 0.619f, 0.732f, 0.055f,
|
||||
0.243f, 0.316f, 0.573f, 0.619f,
|
||||
0.712f, 0.055f, 0.243f, 0.316f,
|
||||
0.573f, 0.619f, 0.742f, 0.055f,
|
||||
// Channel 1
|
||||
0.473f, 0.919f, 0.107f, 0.073f,
|
||||
0.073f, 0.362f, 0.973f, 0.059f,
|
||||
0.473f, 0.455f, 0.283f, 0.416f,
|
||||
0.532f, 0.819f, 0.732f, 0.850f
|
||||
// clang-format on
|
||||
};
|
||||
|
||||
expected = {0.619f, 0.732f, 0.712f, 0.742f, 0.919f, 0.973f, 0.819f, 0.85f};
|
||||
|
||||
runTest();
|
||||
}
|
||||
|
||||
TEST_F(MaxPoolingLayerTest, MaxPoolForwardNonSquareInputTest) {
|
||||
inputSize = {4, 6}; // Non-square input
|
||||
nChannels = 2;
|
||||
poolingSize = {2, 2};
|
||||
stride = {2, 2};
|
||||
padding = {0, 0};
|
||||
|
||||
input = {// Channel 0
|
||||
0.573f, 0.619f, 0.732f, 0.055f, 0.123f, 0.234f, 0.243f, 0.316f,
|
||||
0.573f, 0.619f, 0.456f, 0.789f, 0.712f, 0.055f, 0.243f, 0.316f,
|
||||
0.654f, 0.987f, 0.573f, 0.619f, 0.742f, 0.055f, 0.321f, 0.654f,
|
||||
// Channel 1
|
||||
0.473f, 0.919f, 0.107f, 0.073f, 0.321f, 0.654f, 0.073f, 0.362f,
|
||||
0.973f, 0.059f, 0.654f, 0.987f, 0.473f, 0.455f, 0.283f, 0.416f,
|
||||
0.789f, 0.123f, 0.532f, 0.819f, 0.732f, 0.850f, 0.987f, 0.321f
|
||||
};
|
||||
|
||||
expected = {0.619f, 0.732f, 0.789f, 0.712f, 0.742f, 0.987f, 0.919f, 0.973f, 0.987f, 0.819f, 0.85f, 0.987f};
|
||||
|
||||
runTest();
|
||||
}
|
||||
|
||||
TEST_F(MaxPoolingLayerTest, MaxPoolForwardNonSquarePoolSizeTest) {
|
||||
inputSize = {4, 4};
|
||||
nChannels = 2;
|
||||
poolingSize = {2, 3}; // Non-square pooling size
|
||||
stride = {2, 2};
|
||||
padding = {0, 0};
|
||||
|
||||
input = {
|
||||
// clang-format off
|
||||
// Channel 0
|
||||
0.573f, 0.619f, 0.732f, 0.055f,
|
||||
0.243f, 0.316f, 0.573f, 0.619f,
|
||||
0.712f, 0.055f, 0.243f, 0.316f,
|
||||
0.573f, 0.619f, 0.742f, 0.055f,
|
||||
// Channel 1
|
||||
0.473f, 0.919f, 0.107f, 0.073f,
|
||||
0.073f, 0.362f, 0.973f, 0.059f,
|
||||
0.473f, 0.455f, 0.283f, 0.416f,
|
||||
0.532f, 0.819f, 0.732f, 0.850f
|
||||
// clang-format on
|
||||
};
|
||||
|
||||
expected = {0.732f, 0.742f, 0.973f, 0.819f};
|
||||
|
||||
runTest();
|
||||
|
||||
}
|
||||
|
||||
TEST_F(MaxPoolingLayerTest, MaxPoolForwardNonSquareStrideTest) {
|
||||
inputSize = {4, 4};
|
||||
nChannels = 2;
|
||||
poolingSize = {2, 2};
|
||||
stride = {1, 2}; // Non-square stride
|
||||
padding = {0, 0};
|
||||
|
||||
input = {
|
||||
// clang-format off
|
||||
// Channel 0
|
||||
0.573f, 0.619f, 0.732f, 0.055f,
|
||||
0.243f, 0.316f, 0.573f, 0.619f,
|
||||
0.712f, 0.055f, 0.243f, 0.316f,
|
||||
0.573f, 0.619f, 0.742f, 0.055f,
|
||||
// Channel 1
|
||||
0.473f, 0.919f, 0.107f, 0.073f,
|
||||
0.073f, 0.362f, 0.973f, 0.059f,
|
||||
0.473f, 0.455f, 0.283f, 0.416f,
|
||||
0.532f, 0.819f, 0.732f, 0.850f
|
||||
// clang-format on
|
||||
};
|
||||
|
||||
expected = {0.619f, 0.732f, 0.712f, 0.619f, 0.712f, 0.742f, 0.919f, 0.973f, 0.473f, 0.973f, 0.819f, 0.85f};
|
||||
|
||||
runTest();
|
||||
|
||||
}
|
||||
|
||||
TEST_F(MaxPoolingLayerTest, MaxPoolForwardNonSquarePaddingTest) {
|
||||
inputSize = {4, 4};
|
||||
nChannels = 2;
|
||||
poolingSize = {2, 2};
|
||||
stride = {2, 2}; // Non-square stride
|
||||
padding = {0, 1};
|
||||
|
||||
input = {
|
||||
// clang-format off
|
||||
// Channel 0
|
||||
0.573f, 0.619f, 0.732f, 0.055f,
|
||||
0.243f, 0.316f, 0.573f, 0.619f,
|
||||
0.712f, 0.055f, 0.243f, 0.316f,
|
||||
0.573f, 0.619f, 0.742f, 0.055f,
|
||||
// Channel 1
|
||||
0.473f, 0.919f, 0.107f, 0.073f,
|
||||
0.073f, 0.362f, 0.973f, 0.059f,
|
||||
0.473f, 0.455f, 0.283f, 0.416f,
|
||||
0.532f, 0.819f, 0.732f, 0.850f
|
||||
// clang-format on
|
||||
};
|
||||
|
||||
expected = {0.573f, 0.732f, 0.619f, 0.712f, 0.742f, 0.316f, 0.473f, 0.973f, 0.073f, 0.532f, 0.819f, 0.85f};
|
||||
|
||||
runTest();
|
||||
|
||||
}
|
||||
27
test/cuda/layers/test_output.cu
Normal file
27
test/cuda/layers/test_output.cu
Normal file
@@ -0,0 +1,27 @@
|
||||
#include <cuda_runtime_api.h>
|
||||
#include <gtest/gtest.h>
|
||||
|
||||
#include "output.cuh"
|
||||
|
||||
TEST(OutputLayerTest, OutputForward) {
|
||||
cudaError_t cudaStatus;
|
||||
|
||||
std::vector<float> input = {0.573f, 0.619f, 0.732f, 0.055f, 0.243f, 0.316f};
|
||||
float* d_input;
|
||||
cudaStatus = cudaMalloc((void**)&d_input, sizeof(float) * 6);
|
||||
EXPECT_EQ(cudaStatus, cudaSuccess);
|
||||
cudaStatus = cudaMemcpy(
|
||||
d_input, input.data(), sizeof(float) * 6, cudaMemcpyHostToDevice
|
||||
);
|
||||
EXPECT_EQ(cudaStatus, cudaSuccess);
|
||||
|
||||
CUDANet::Layers::Output outputLayer(6);
|
||||
float* h_output = outputLayer.forward(d_input);
|
||||
|
||||
for (int i = 0; i < 6; ++i) {
|
||||
EXPECT_EQ(input[i], h_output[i]);
|
||||
}
|
||||
|
||||
cudaFree(d_input);
|
||||
|
||||
}
|
||||
39
test/cuda/utils/test_vector.cu
Normal file
39
test/cuda/utils/test_vector.cu
Normal file
@@ -0,0 +1,39 @@
|
||||
#include <gtest/gtest.h>
|
||||
|
||||
#include "vector.cuh"
|
||||
|
||||
TEST(VectorTest, TestVectorMean) {
|
||||
|
||||
cudaError_t cudaStatus;
|
||||
float length = 10;
|
||||
|
||||
std::vector<float> input = {0.44371f, 0.20253f, 0.73232f, 0.40378f, 0.93348f, 0.72756f, 0.63388f, 0.5251f, 0.23973f, 0.52233f};
|
||||
|
||||
float* d_vec = nullptr;
|
||||
cudaStatus = cudaMalloc((void **)&d_vec, sizeof(float) * length);
|
||||
EXPECT_EQ(cudaStatus, cudaSuccess);
|
||||
|
||||
float* d_mean = nullptr;
|
||||
cudaStatus = cudaMalloc((void **)&d_mean, sizeof(float) * length);
|
||||
EXPECT_EQ(cudaStatus, cudaSuccess);
|
||||
|
||||
float* d_length = nullptr;
|
||||
cudaStatus = cudaMalloc((void **)&d_length, sizeof(float));
|
||||
EXPECT_EQ(cudaStatus, cudaSuccess);
|
||||
|
||||
cudaStatus = cudaMemcpy(d_vec, input.data(), sizeof(float) * length, cudaMemcpyHostToDevice);
|
||||
EXPECT_EQ(cudaStatus, cudaSuccess);
|
||||
|
||||
cudaStatus = cudaMemcpy(d_length, &length, sizeof(float), cudaMemcpyHostToDevice);
|
||||
EXPECT_EQ(cudaStatus, cudaSuccess);
|
||||
|
||||
CUDANet::Utils::mean(d_vec, d_mean, d_length, length);
|
||||
|
||||
std::vector<float> mean(length);
|
||||
cudaStatus = cudaMemcpy(mean.data(), d_mean, sizeof(float) * length, cudaMemcpyDeviceToHost);
|
||||
EXPECT_EQ(cudaStatus, cudaSuccess);
|
||||
|
||||
float expected_mean = 0.5364f;
|
||||
EXPECT_NEAR(mean[0], expected_mean, 1e-4);
|
||||
|
||||
}
|
||||
Reference in New Issue
Block a user