From 18522c2deaa7e17d48294d437cbe0e1ab8852fcf Mon Sep 17 00:00:00 2001 From: LordMathis Date: Thu, 11 Apr 2024 22:52:41 +0200 Subject: [PATCH] Cleanup and refactor --- include/layers/activation.cuh | 6 +-- src/layers/activation.cu | 62 ++++++++++------------- src/layers/dense.cu | 4 +- src/utils/vector.cu | 1 - test/kernels/test_activation_functions.cu | 18 ------- test/kernels/test_matmul.cu | 5 +- test/layers/test_dense.cu | 9 +--- test/model/test_model.cu | 4 +- tools/dense_test.py | 44 ++++++++++++++++ 9 files changed, 81 insertions(+), 72 deletions(-) create mode 100644 tools/dense_test.py diff --git a/include/layers/activation.cuh b/include/layers/activation.cuh index dab7aa9..0496f7b 100644 --- a/include/layers/activation.cuh +++ b/include/layers/activation.cuh @@ -28,7 +28,7 @@ class Activation { * @param activation Type of activation * @param length Length of the input */ - Activation(ActivationType activation, const unsigned int length); + Activation(ActivationType activation, const int length); /** * @brief Destroy the Activation object @@ -46,8 +46,8 @@ class Activation { private: ActivationType activationType; - unsigned int length; - unsigned int gridSize; + int length; + int gridSize; float* d_softmax_sum; float* d_max; diff --git a/src/layers/activation.cu b/src/layers/activation.cu index 15175b4..fc71bd2 100644 --- a/src/layers/activation.cu +++ b/src/layers/activation.cu @@ -1,23 +1,22 @@ -#include "activation.cuh" - -#include "cuda_helper.cuh" -#include "activation_functions.cuh" -#include "matmul.cuh" - #include #include +#include "activation.cuh" +#include "activation_functions.cuh" +#include "cuda_helper.cuh" +#include "matmul.cuh" +#include "vector.cuh" + using namespace CUDANet::Layers; -Activation::Activation(ActivationType activation, const unsigned int length) +Activation::Activation(ActivationType activation, const int length) : activationType(activation), length(length) { - if (activationType == SOFTMAX) { - d_softmax_sum = nullptr; - CUDA_CHECK(cudaMalloc((void**)&d_softmax_sum, sizeof(float) * length)); - d_max = nullptr; CUDA_CHECK(cudaMalloc((void**)&d_max, sizeof(float) * length)); + + d_softmax_sum = nullptr; + CUDA_CHECK(cudaMalloc((void**)&d_softmax_sum, sizeof(float) * length)); } gridSize = (length + BLOCK_SIZE - 1) / BLOCK_SIZE; @@ -26,10 +25,13 @@ Activation::Activation(ActivationType activation, const unsigned int length) Activation::~Activation() { if (activationType == SOFTMAX) { cudaFree(d_softmax_sum); + cudaFree(d_max); } } -void Activation::activate(float* __restrict__ d_input) { +void Activation::activate(float* d_input) { + + // float sum = 0.0f; switch (activationType) { case SIGMOID: @@ -39,44 +41,36 @@ void Activation::activate(float* __restrict__ d_input) { break; case RELU: - Kernels::relu<<>>( - d_input, d_input, length - ); + Kernels::relu<<>>(d_input, d_input, length); break; case SOFTMAX: // Find max value - Kernels::max_reduce<<>>( - d_input, d_max - ); - Kernels::max_reduce<<<1, BLOCK_SIZE>>>( - d_max, d_max - ); + Utils::max(d_input, d_max, length); // Subtract max value to improve numerical stability Kernels::vec_scalar_sub<<>>( - d_input, d_max, d_input, length + d_input, d_input, d_max, length ); - // Compute softmax - Kernels::softmax_exp<<>>( + // Compute exponentials + Kernels::vec_exp<<>>( d_input, d_input, length ); - Kernels::softmax_sum<<>>( - d_input, d_softmax_sum - ); + // Find sum + Utils::sum(d_input, d_softmax_sum, length); - Kernels::softmax_sum<<<1, BLOCK_SIZE>>>( - d_softmax_sum, d_softmax_sum - ); - - Kernels::softmax_div<<>>( + Kernels::vec_scalar_div<<>>( d_input, d_input, d_softmax_sum, length ); + break; default: - break; + break; } -} \ No newline at end of file + + cudaDeviceSynchronize(); +} + diff --git a/src/layers/dense.cu b/src/layers/dense.cu index 0ab538d..15a26c9 100644 --- a/src/layers/dense.cu +++ b/src/layers/dense.cu @@ -23,8 +23,6 @@ Dense::Dense( weights.resize(outputSize * inputSize); biases.resize(outputSize); - activation = Activation(activationType, outputSize); - initializeWeights(); initializeBiases(); @@ -46,6 +44,8 @@ Dense::Dense( forwardGridSize = (std::max(inputSize, outputSize) + BLOCK_SIZE - 1) / BLOCK_SIZE; biasGridSize = (outputSize + BLOCK_SIZE - 1) / BLOCK_SIZE; + + activation = Activation(activationType, outputSize); } Dense::~Dense() { diff --git a/src/utils/vector.cu b/src/utils/vector.cu index 6fb7e23..ea97be8 100644 --- a/src/utils/vector.cu +++ b/src/utils/vector.cu @@ -50,7 +50,6 @@ void Utils::sum(float* d_vec, float* d_sum, const unsigned int length) { int remaining = gridSize; while (remaining > 1) { - std::cout << remaining << std::endl; int blocks_needed = (remaining + BLOCK_SIZE - 1) / BLOCK_SIZE; CUDANet::Kernels::sum_reduce<<>>(d_sum, d_sum, remaining); remaining = blocks_needed; diff --git a/test/kernels/test_activation_functions.cu b/test/kernels/test_activation_functions.cu index 714db19..d52f7e7 100644 --- a/test/kernels/test_activation_functions.cu +++ b/test/kernels/test_activation_functions.cu @@ -47,21 +47,3 @@ TEST(ActivationFunctionsTest, SigmoidSanityCheck) { cudaDeviceReset(); } - -// void print_vec(float* d_vec, int length) { - -// std::vector h_vec(length); -// CUDA_CHECK(cudaMemcpy( -// h_vec.data(), d_vec, sizeof(float) * length, cudaMemcpyDeviceToHost -// )); - -// float sum = 0.0f; - -// for (int i = 0; i < length; ++i) { -// std::cout << h_vec[i] << ", "; -// sum += h_vec[i]; -// } - -// std::cout << std::endl; - -// } \ No newline at end of file diff --git a/test/kernels/test_matmul.cu b/test/kernels/test_matmul.cu index d91fb9a..2364cc3 100644 --- a/test/kernels/test_matmul.cu +++ b/test/kernels/test_matmul.cu @@ -61,7 +61,7 @@ TEST(MatMulTest, MatVecMulTest) { for (int j = 0; j < w; j++) { sum += matrix[i * w + j] * vector[j]; } - EXPECT_NEAR(sum, output_gpu[i], 1e-5); + EXPECT_NEAR(sum, output_gpu[i], 1e-5f); } cudaFree(d_matrix); @@ -151,7 +151,7 @@ TEST(MatMulTest, VecExpTest) { EXPECT_EQ(cudaStatus, cudaSuccess); for (int i = 0; i < 6; i++) { - EXPECT_NEAR(expected[i], output[i], 1e7); + EXPECT_NEAR(expected[i], output[i], 1e7f); } cudaFree(d_input); @@ -193,7 +193,6 @@ TEST(MatMulTest, SumReduceTest) { int remaining = gridSize; while (remaining > 1) { - std::cout << remaining << std::endl; int blocks_needed = (remaining + BLOCK_SIZE - 1) / BLOCK_SIZE; CUDANet::Kernels::sum_reduce<<>>(d_sum, d_sum, remaining); remaining = blocks_needed; diff --git a/test/layers/test_dense.cu b/test/layers/test_dense.cu index 1365e0b..1491915 100644 --- a/test/layers/test_dense.cu +++ b/test/layers/test_dense.cu @@ -242,20 +242,13 @@ TEST_F(DenseLayerTest, ForwardRandomWeightMatrixSoftmax) { EXPECT_EQ(cudaStatus, cudaSuccess); std::vector expected = {0.17124f, 0.28516f, 0.22208f, 0.32152f}; - // std::vector expected = {0.46f, 0.97f, 0.72f, 1.09f}; float sum = 0.0f; - for (int i = 0; i < outputSize; ++i) { - std::cout << output[i] << ", "; - } - std::cout << std::endl; - for (int i = 0; i < outputSize; ++i) { sum += output[i]; - EXPECT_NEAR(output[i], expected[i], 1e-5); + EXPECT_NEAR(output[i], expected[i], 1e-5f); } - std::cout << std::endl; EXPECT_NEAR(sum, 1.0f, 1e-5f); diff --git a/test/model/test_model.cu b/test/model/test_model.cu index bdd52c6..687e8ba 100644 --- a/test/model/test_model.cu +++ b/test/model/test_model.cu @@ -105,11 +105,9 @@ TEST(Model, TestModelPredict) { // float sum = 0.0f; for (int i = 0; i < outputSize; ++i) { sum += output[i]; - std::cout << output[i] << " "; } - std::cout << std::endl; - EXPECT_NEAR(sum, 1.0f, 1e-2f); + EXPECT_NEAR(sum, 1.0f, 1e-5f); cudaDeviceReset(); } \ No newline at end of file diff --git a/tools/dense_test.py b/tools/dense_test.py new file mode 100644 index 0000000..886e95a --- /dev/null +++ b/tools/dense_test.py @@ -0,0 +1,44 @@ +import torch +from utils import print_cpp_vector + +def gen_dense_softmax_test(): + + input = torch.tensor([ + 0.1, 0.2, 0.3, 0.4, 0.5 + ]) + + weights = torch.tensor([ + 0.5, 0.1, 0.1, 0.4, 0.2, + 0.4, 0.3, 0.9, 0.0, 0.8, + 0.8, 0.4, 0.6, 0.2, 0.0, + 0.1, 0.7, 0.3, 1.0, 0.1 + ]).reshape(4, 5) + + biases = torch.tensor([ + 0.1, 0.2, 0.3, 0.4 + ]) + + dense = torch.nn.Linear(5, 4) + dense.weight = torch.nn.Parameter(weights) + dense.bias = torch.nn.Parameter(biases) + + output = dense(input) + print_cpp_vector(output) + + # Manual softmax + softmax_exp = torch.exp(output) + print(softmax_exp) + + softmax_sum = torch.sum(softmax_exp, dim=0) + print(softmax_sum) + + souftmax_out = softmax_exp / softmax_sum + print(souftmax_out) + + + softmax = torch.nn.Softmax(dim=0)(output) + print_cpp_vector(softmax) + + +if __name__ == "__main__": + gen_dense_softmax_test() \ No newline at end of file