diff --git a/src/layers/batch_norm.cu b/src/layers/batch_norm.cu index 27f8876..b9f9fa9 100644 --- a/src/layers/batch_norm.cu +++ b/src/layers/batch_norm.cu @@ -42,8 +42,9 @@ BatchNorm::BatchNorm( CUDA_CHECK(cudaMalloc((void **)&d_biases, sizeof(float) * inputChannels)); d_length = nullptr; + float length = (float) inputSize * inputSize; CUDA_CHECK(cudaMalloc((void **)&d_length, sizeof(float))); - CUDA_CHECK(cudaMemset(d_length, inputSize * inputSize, sizeof(float))); + CUDA_CHECK(cudaMemcpy(d_length, &length, sizeof(float), cudaMemcpyHostToDevice)); d_epsilon = nullptr; float epsilon = 1e-5f; @@ -124,21 +125,12 @@ float *BatchNorm::forward(const float *d_input) { for (int i = 0; i < inputChannels; i++) { // Compute mean - // Sum over all values - Utils::sum( + Utils::mean( d_input + i * inputSize * inputSize, d_mean, - inputSize * inputSize - ); - - // Divide sum by length -> mean - Kernels::vec_scalar_div<<>>( - d_mean, - d_mean, d_length, inputSize * inputSize ); - CUDA_CHECK(cudaGetLastError()); // Subtract mean from input Kernels::vec_scalar_sub<<>>( diff --git a/test/layers/test_batch_norm.cu b/test/layers/test_batch_norm.cu index c791d33..da54619 100644 --- a/test/layers/test_batch_norm.cu +++ b/test/layers/test_batch_norm.cu @@ -71,7 +71,7 @@ TEST(BatchNormLayerTest, BatchNormSmallForwardTest) { // std::cout << "BatchNorm: " << std::endl; for (int i = 0; i < output.size(); i++) { - EXPECT_EQ(output[i], expected[i]); + EXPECT_NEAR(output[i], expected[i], 1e-5); // std::cout << output[i] << " "; } // std::cout << std::endl;