Fix more cuda memcheck errors

This commit is contained in:
2024-04-21 22:56:55 +02:00
parent 58af95eb25
commit 26cea9b12c
3 changed files with 25 additions and 15 deletions

View File

@@ -65,21 +65,15 @@ void Dense::initializeBiases() {
float* Dense::forward(const float* d_input) { float* Dense::forward(const float* d_input) {
// CUDANet::Utils::clear(d_output, outputSize);
// CUDA_CHECK(cudaPeekAtLastError());
std::cout << "Dense::forward" << std::endl;
Kernels::mat_vec_mul<<<forwardGridSize, BLOCK_SIZE>>>( Kernels::mat_vec_mul<<<forwardGridSize, BLOCK_SIZE>>>(
d_weights, d_input, d_output, inputSize, outputSize d_weights, d_input, d_output, inputSize, outputSize
); );
CUDA_CHECK(cudaPeekAtLastError()); CUDA_CHECK(cudaGetLastError());
Kernels::vec_vec_add<<<biasGridSize, BLOCK_SIZE>>>( Kernels::vec_vec_add<<<biasGridSize, BLOCK_SIZE>>>(
d_biases, d_output, d_output, outputSize d_biases, d_output, d_output, outputSize
); );
CUDA_CHECK(cudaPeekAtLastError()); CUDA_CHECK(cudaGetLastError());
activation.activate(d_output); activation.activate(d_output);
CUDA_CHECK(cudaDeviceSynchronize()); CUDA_CHECK(cudaDeviceSynchronize());

View File

@@ -66,5 +66,4 @@ TEST(AvgPoolingLayerTest, AvgPoolForwardTest) {
} }
cudaFree(d_input); cudaFree(d_input);
cudaFree(d_output);
} }

View File

@@ -4,6 +4,9 @@
#include <vector> #include <vector>
TEST(ConcatLayerTest, Init) { TEST(ConcatLayerTest, Init) {
cudaError_t cudaStatus;
std::vector<float> inputA = {0.573f, 0.619f, 0.732f, 0.055f, 0.243f}; std::vector<float> inputA = {0.573f, 0.619f, 0.732f, 0.055f, 0.243f};
std::vector<float> inputB = {0.123f, 0.321f, 0.456f, 0.789f, 0.654f, 0.123f}; std::vector<float> inputB = {0.123f, 0.321f, 0.456f, 0.789f, 0.654f, 0.123f};
@@ -11,27 +14,41 @@ TEST(ConcatLayerTest, Init) {
float* d_inputA; float* d_inputA;
float* d_inputB; float* d_inputB;
cudaMalloc((void**)&d_inputA, sizeof(float) * 5);
cudaMalloc((void**)&d_inputB, sizeof(float) * 6); cudaStatus = cudaMalloc((void**)&d_inputA, sizeof(float) * 5);
cudaMemcpy( EXPECT_EQ(cudaStatus, cudaSuccess);
cudaStatus = cudaMalloc((void**)&d_inputB, sizeof(float) * 6);
EXPECT_EQ(cudaStatus, cudaSuccess);
cudaStatus = cudaMemcpy(
d_inputA, inputA.data(), sizeof(float) * 5, cudaMemcpyHostToDevice d_inputA, inputA.data(), sizeof(float) * 5, cudaMemcpyHostToDevice
); );
cudaMemcpy( EXPECT_EQ(cudaStatus, cudaSuccess);
cudaStatus = cudaMemcpy(
d_inputB, inputB.data(), sizeof(float) * 6, cudaMemcpyHostToDevice d_inputB, inputB.data(), sizeof(float) * 6, cudaMemcpyHostToDevice
); );
EXPECT_EQ(cudaStatus, cudaSuccess);
float* d_output = concat.forward(d_inputA, d_inputB); float* d_output = concat.forward(d_inputA, d_inputB);
std::vector<float> output(11); std::vector<float> output(11);
cudaMemcpy( cudaStatus = cudaMemcpy(
output.data(), d_output, sizeof(float) * 11, cudaMemcpyDeviceToHost output.data(), d_output, sizeof(float) * 11, cudaMemcpyDeviceToHost
); );
EXPECT_EQ(cudaStatus, cudaSuccess);
for (int i = 0; i < 5; ++i) { for (int i = 0; i < 5; ++i) {
EXPECT_EQ(output[i], inputA[i]); EXPECT_EQ(output[i], inputA[i]);
} }
for (int i = 0; i < 6; ++i) { for (int i = 0; i < 6; ++i) {
EXPECT_EQ(output[i + 5], inputB[i]); EXPECT_EQ(output[i + 5], inputB[i]);
} }
cudaFree(d_output);
cudaStatus = cudaFree(d_inputA);
EXPECT_EQ(cudaStatus, cudaSuccess);
cudaStatus = cudaFree(d_inputB);
EXPECT_EQ(cudaStatus, cudaSuccess);
} }