From e419a93408fab40049da0750f4e2cc7b2ed030e7 Mon Sep 17 00:00:00 2001 From: LordMathis Date: Mon, 8 Apr 2024 22:09:18 +0200 Subject: [PATCH] Fix softmax sum kernel --- include/kernels/activation_functions.cuh | 3 +- src/kernels/activation_functions.cu | 9 +++-- src/layers/activation.cu | 6 ++-- test/kernels/test_activation_functions.cu | 43 ++++++++++++++++++++--- test/layers/test_activation.cu | 2 ++ 5 files changed, 49 insertions(+), 14 deletions(-) diff --git a/include/kernels/activation_functions.cuh b/include/kernels/activation_functions.cuh index ce85e0c..fee787e 100644 --- a/include/kernels/activation_functions.cuh +++ b/include/kernels/activation_functions.cuh @@ -51,8 +51,7 @@ __global__ void softmax_exp( */ __global__ void softmax_sum( const float* __restrict__ d_vector, - float* __restrict__ d_output, - const unsigned int w + float* __restrict__ d_output ); /** diff --git a/src/kernels/activation_functions.cu b/src/kernels/activation_functions.cu index 1642e80..5864043 100644 --- a/src/kernels/activation_functions.cu +++ b/src/kernels/activation_functions.cu @@ -44,15 +44,14 @@ __global__ void Kernels::softmax_exp( __global__ void Kernels::softmax_sum( const float* __restrict__ d_vector, - float* __restrict__ d_output, - const unsigned int w + float* __restrict__ d_output ) { __shared__ float partial_sum[BLOCK_SIZE]; - int i = blockIdx.x * blockDim.x * 2 + threadIdx.x; - partial_sum[threadIdx.x] = d_vector[i] + d_vector[i + blockDim.x]; + int i = blockIdx.x * blockDim.x + threadIdx.x; + partial_sum[threadIdx.x] = d_vector[i]; __syncthreads(); - for (unsigned int s = blockDim.x / 2; s > 0; s >>= 1) { + for (int s = blockDim.x / 2; s > 0; s >>= 1) { if (threadIdx.x < s) { partial_sum[threadIdx.x] += partial_sum[threadIdx.x + s]; } diff --git a/src/layers/activation.cu b/src/layers/activation.cu index d874ec5..8c002cd 100644 --- a/src/layers/activation.cu +++ b/src/layers/activation.cu @@ -42,12 +42,12 @@ void Activation::activate(float* __restrict__ d_input) { ); Kernels::softmax_sum<<>>( - d_input, d_softmax_sum, length + d_input, d_softmax_sum ); Kernels::softmax_sum<<<1, BLOCK_SIZE>>>( - d_softmax_sum, d_softmax_sum, length - ); + d_softmax_sum, d_softmax_sum + ); Kernels::softmax_div<<>>( d_input, d_input, d_softmax_sum, length diff --git a/test/kernels/test_activation_functions.cu b/test/kernels/test_activation_functions.cu index 41dfe90..72b0e73 100644 --- a/test/kernels/test_activation_functions.cu +++ b/test/kernels/test_activation_functions.cu @@ -4,6 +4,7 @@ #include #include "activation_functions.cuh" +#include "cuda_helper.cuh" TEST(ActivationFunctionsTest, SigmoidSanityCheck) { cudaError_t cudaStatus; @@ -89,12 +90,46 @@ TEST(ActivationFunctionsTest, SoftmaxExpTest) { TEST(ActivationFunctionsTest, SoftmaxSumTest) { cudaError_t cudaStatus; - std::vector input = {5886928896.0f, 1.06102872080384e+16f, - 28771323215872.0f, 2204012904448.0f, - 308226162688.0f, 63922983927808.0f}; + const int n = 10; + std::vector input(n); + for (int i = 0; i < n; i++) { + input[i] = i; + } + + const float expected = n * (n - 1) / 2; float* d_input; + float* d_sum; - cudaStatus = cudaMalloc((void**)&d_input, sizeof(float) * 6); + const int gridSize = (n + BLOCK_SIZE - 1) / BLOCK_SIZE; + + cudaStatus = cudaMalloc((void**)&d_input, sizeof(float) * n); EXPECT_EQ(cudaStatus, cudaSuccess); + + cudaStatus = cudaMalloc((void**)&d_sum, sizeof(float) * n); + EXPECT_EQ(cudaStatus, cudaSuccess); + + cudaStatus = + cudaMemcpy(d_input, input.data(), sizeof(float) * n, cudaMemcpyHostToDevice); + EXPECT_EQ(cudaStatus, cudaSuccess); + + CUDANet::Kernels::softmax_sum<<>>( + d_input, d_sum + ); + + CUDANet::Kernels::softmax_sum<<<1, BLOCK_SIZE>>>( + d_sum, d_sum + ); + + CUDANet::Kernels::softmax_sum<<<1, BLOCK_SIZE>>>( + d_sum, d_sum + ); + + std::vector sum(n); + cudaStatus = cudaMemcpy( + sum.data(), d_sum, sizeof(float) * n, cudaMemcpyDeviceToHost + ); + EXPECT_EQ(cudaStatus, cudaSuccess); + + EXPECT_FLOAT_EQ(expected, sum[0]); } \ No newline at end of file diff --git a/test/layers/test_activation.cu b/test/layers/test_activation.cu index cf2b0d8..db860ef 100644 --- a/test/layers/test_activation.cu +++ b/test/layers/test_activation.cu @@ -58,6 +58,8 @@ TEST(ActivationTest, SoftmaxTest2) { EXPECT_NEAR(output[i], expected[i], 1e-5f); } + std::cout << sum << std::endl; + EXPECT_NEAR(sum, 1.0f, 1e-5f); cudaFree(d_input);