From bf7c961b9e0df1588fafffa998932b7e02b71900 Mon Sep 17 00:00:00 2001
From: LordMathis <matus@namesny.com>
Date: Thu, 11 Apr 2024 19:55:02 +0200
Subject: [PATCH] Add cudaDeviceReset at the end of each test

---
 test/kernels/test_matmul.cu     | 13 ++++++++
 test/layers/test_activation.cu  |  6 ++--
 test/layers/test_avg_pooling.cu |  2 ++
 test/layers/test_concat.cu      |  2 ++
 test/layers/test_conv2d.cu      |  1 +
 test/layers/test_dense.cu       | 57 ++++++++++++++++++++++++++++++---
 test/layers/test_input.cu       |  2 ++
 test/layers/test_max_pooling.cu |  2 ++
 test/layers/test_output.cu      |  3 ++
 test/model/test_model.cu        | 15 +++++++--
 10 files changed, 93 insertions(+), 10 deletions(-)

diff --git a/test/kernels/test_matmul.cu b/test/kernels/test_matmul.cu
index cd8a1a6..9763f4a 100644
--- a/test/kernels/test_matmul.cu
+++ b/test/kernels/test_matmul.cu
@@ -45,6 +45,8 @@ TEST(MatMulTest, MatVecMulTest) {
     int THREADS_PER_BLOCK = std::max(w, h);
     int BLOCKS            = 1;
 
+    CUDANet::Kernels::clear<<<BLOCKS, h>>>(d_output, h);
+
     CUDANet::Kernels::mat_vec_mul<<<BLOCKS, THREADS_PER_BLOCK, sizeof(float) * w>>>(d_matrix, d_vector, d_output, w, h);
     cudaStatus = cudaDeviceSynchronize();
     EXPECT_EQ(cudaStatus, cudaSuccess);
@@ -60,6 +62,12 @@ TEST(MatMulTest, MatVecMulTest) {
         }
         EXPECT_NEAR(sum, output_gpu[i], 1e-5);
     }
+
+    cudaFree(d_matrix);
+    cudaFree(d_vector);
+    cudaFree(d_output);
+
+    cudaDeviceReset();
 }
 
 TEST(MatMulTest, MaxReduceTest) {
@@ -89,4 +97,9 @@ TEST(MatMulTest, MaxReduceTest) {
     EXPECT_EQ(cudaStatus, cudaSuccess);
 
     EXPECT_EQ(output[0], 0.932f);
+
+    cudaFree(d_input);
+    cudaFree(d_output);
+
+    cudaDeviceReset();
 }
\ No newline at end of file
diff --git a/test/layers/test_activation.cu b/test/layers/test_activation.cu
index db860ef..e629df2 100644
--- a/test/layers/test_activation.cu
+++ b/test/layers/test_activation.cu
@@ -31,6 +31,7 @@ TEST(ActivationTest, SoftmaxTest1) {
     EXPECT_NEAR(sum, 1.0f, 1e-5f);
 
     cudaFree(d_input);
+    cudaDeviceReset();
 }
 
 TEST(ActivationTest, SoftmaxTest2) {
@@ -58,9 +59,8 @@ TEST(ActivationTest, SoftmaxTest2) {
         EXPECT_NEAR(output[i], expected[i], 1e-5f);
     }
 
-    std::cout << sum << std::endl;
-
-    EXPECT_NEAR(sum, 1.0f, 1e-5f);
+    EXPECT_NEAR(sum, 1.0f, 1e-2f);
 
     cudaFree(d_input);
+    cudaDeviceReset();
 }
\ No newline at end of file
diff --git a/test/layers/test_avg_pooling.cu b/test/layers/test_avg_pooling.cu
index 2fcacec..5e1401d 100644
--- a/test/layers/test_avg_pooling.cu
+++ b/test/layers/test_avg_pooling.cu
@@ -67,4 +67,6 @@ TEST(AvgPoolingLayerTest, AvgPoolForwardTest) {
 
     cudaFree(d_input);
     cudaFree(d_output);
+
+    cudaDeviceReset();
 }
diff --git a/test/layers/test_concat.cu b/test/layers/test_concat.cu
index a5adc97..bb14134 100644
--- a/test/layers/test_concat.cu
+++ b/test/layers/test_concat.cu
@@ -34,4 +34,6 @@ TEST(ConcatLayerTest, Init) {
         EXPECT_EQ(output[i + 5], inputB[i]);
     }
     cudaFree(d_output);
+
+    cudaDeviceReset();
 }
\ No newline at end of file
diff --git a/test/layers/test_conv2d.cu b/test/layers/test_conv2d.cu
index 366d920..9e398f5 100644
--- a/test/layers/test_conv2d.cu
+++ b/test/layers/test_conv2d.cu
@@ -47,6 +47,7 @@ class Conv2dTest : public ::testing::Test {
     void commonTestTeardown(float* d_input) {
         // Free device memory
         cudaFree(d_input);
+        cudaDeviceReset();
     }
 
     cudaError_t cudaStatus;
diff --git a/test/layers/test_dense.cu b/test/layers/test_dense.cu
index c53af44..1365e0b 100644
--- a/test/layers/test_dense.cu
+++ b/test/layers/test_dense.cu
@@ -41,6 +41,7 @@ class DenseLayerTest : public ::testing::Test {
     void commonTestTeardown(float* d_input) {
         // Free device memory
         cudaFree(d_input);
+        cudaDeviceReset();
     }
 
     cudaError_t cudaStatus;
@@ -199,10 +200,6 @@ TEST_F(DenseLayerTest, ForwardRandomWeightMatrixSigmoid) {
     );
     EXPECT_EQ(cudaStatus, cudaSuccess);
 
-    // weights * input = 0.95, 0.43, 0.45, 0.93
-    // + biases = 1.05, 0.63, 0.75, 1.33
-    // sigmoid = 0.740775, 0.652489, 0.679179, 0.790841
-
     std::vector<float> expectedOutput = {
         0.740775f, 0.652489f, 0.679179f, 0.790841f
     };
@@ -213,3 +210,55 @@ TEST_F(DenseLayerTest, ForwardRandomWeightMatrixSigmoid) {
 
     commonTestTeardown(d_input);
 }
+
+TEST_F(DenseLayerTest, ForwardRandomWeightMatrixSoftmax) {
+    int inputSize  = 5;
+    int outputSize = 4;
+
+    std::vector<float> input = {0.1f, 0.2f, 0.3f, 0.4f, 0.5f};
+    std::vector<float> weights = {
+        0.5f, 0.1f, 0.1f, 0.4f, 0.2f,
+        0.4f, 0.3f, 0.9f, 0.0f, 0.8f,
+        0.8f, 0.4f, 0.6f, 0.2f, 0.0f,
+        0.1f, 0.7f, 0.3f, 1.0f, 0.1f
+    };
+    std::vector<float> biases = {0.1f, 0.2f, 0.3f, 0.4f};
+
+    float* d_input;
+    float* d_output;
+
+    CUDANet::Layers::Dense denseLayer = commonTestSetup(
+        inputSize, outputSize, input, weights.data(), biases.data(), d_input,
+        CUDANet::Layers::ActivationType::SOFTMAX
+    );
+
+    d_output = denseLayer.forward(d_input);
+
+    std::vector<float> output(outputSize);
+    cudaStatus = cudaMemcpy(
+        output.data(), d_output, sizeof(float) * outputSize,
+        cudaMemcpyDeviceToHost
+    );
+    EXPECT_EQ(cudaStatus, cudaSuccess);
+
+    std::vector<float> expected = {0.17124f, 0.28516f, 0.22208f, 0.32152f};
+    // std::vector<float> expected = {0.46f, 0.97f, 0.72f, 1.09f};
+
+    float sum = 0.0f;
+
+    for (int i = 0; i < outputSize; ++i) {
+        std::cout << output[i] << ", ";
+    }
+    std::cout << std::endl;
+
+    for (int i = 0; i < outputSize; ++i) {
+        sum += output[i];
+        EXPECT_NEAR(output[i], expected[i], 1e-5);
+    }
+    std::cout << std::endl;
+
+    EXPECT_NEAR(sum, 1.0f, 1e-5f);
+
+    commonTestTeardown(d_input);
+
+}
\ No newline at end of file
diff --git a/test/layers/test_input.cu b/test/layers/test_input.cu
index c4d4561..6b09559 100644
--- a/test/layers/test_input.cu
+++ b/test/layers/test_input.cu
@@ -14,4 +14,6 @@ TEST(InputLayerTest, InputForward) {
     );
     EXPECT_EQ(cudaStatus, cudaSuccess);
     EXPECT_EQ(input, output);
+
+    cudaDeviceReset();
 }
\ No newline at end of file
diff --git a/test/layers/test_max_pooling.cu b/test/layers/test_max_pooling.cu
index b704fa7..e9b59a9 100644
--- a/test/layers/test_max_pooling.cu
+++ b/test/layers/test_max_pooling.cu
@@ -67,4 +67,6 @@ TEST(MaxPoolingLayerTest, MaxPoolForwardTest) {
 
     cudaFree(d_input);
     cudaFree(d_output);
+
+    cudaDeviceReset();
 }
diff --git a/test/layers/test_output.cu b/test/layers/test_output.cu
index 3fa529d..bae4ca4 100644
--- a/test/layers/test_output.cu
+++ b/test/layers/test_output.cu
@@ -21,4 +21,7 @@ TEST(OutputLayerTest, OutputForward) {
     for (int i = 0; i < 6; ++i) {
         EXPECT_EQ(input[i], h_output[i]);
     }
+
+    cudaFree(d_input);
+    cudaDeviceReset();
 }
\ No newline at end of file
diff --git a/test/model/test_model.cu b/test/model/test_model.cu
index 0702cf8..bdd52c6 100644
--- a/test/model/test_model.cu
+++ b/test/model/test_model.cu
@@ -10,11 +10,18 @@ TEST(Model, TestModelPredict) {
     int inputChannels = 2;
     int outputSize    = 6;
 
+    int kernelSize = 3;
+    int stride     = 1;
+    int numFilters = 2;
+
+    int poolingSize = 2;
+    int poolingStride = 2;
+
     CUDANet::Model model(inputSize, inputChannels, outputSize);
 
     // Conv2d
     CUDANet::Layers::Conv2d conv2d(
-        inputSize, inputChannels, 3, 1, 2, CUDANet::Layers::Padding::VALID,
+        inputSize, inputChannels, kernelSize, stride, numFilters, CUDANet::Layers::Padding::VALID,
         CUDANet::Layers::ActivationType::NONE
     );
     // weights 6*6*2*2
@@ -46,7 +53,7 @@ TEST(Model, TestModelPredict) {
 
     // maxpool2d
     CUDANet::Layers::MaxPooling2D maxpool2d(
-        6, 2, 2, 2, CUDANet::Layers::ActivationType::RELU
+        inputSize - kernelSize + 1, numFilters, poolingSize, poolingStride, CUDANet::Layers::ActivationType::RELU
     );
     model.addLayer("maxpool2d", &maxpool2d);
 
@@ -102,5 +109,7 @@ TEST(Model, TestModelPredict) {
     }
     std::cout << std::endl;
 
-    EXPECT_NEAR(sum, 1.0f, 1e-5f);
+    EXPECT_NEAR(sum, 1.0f, 1e-2f);
+
+    cudaDeviceReset();
 }
\ No newline at end of file