mirror of
https://github.com/lordmathis/CUDANet.git
synced 2025-11-05 17:34:21 +00:00
Fix more cuda memcheck errors
This commit is contained in:
@@ -65,21 +65,15 @@ void Dense::initializeBiases() {
|
|||||||
|
|
||||||
float* Dense::forward(const float* d_input) {
|
float* Dense::forward(const float* d_input) {
|
||||||
|
|
||||||
// CUDANet::Utils::clear(d_output, outputSize);
|
|
||||||
|
|
||||||
// CUDA_CHECK(cudaPeekAtLastError());
|
|
||||||
|
|
||||||
std::cout << "Dense::forward" << std::endl;
|
|
||||||
|
|
||||||
Kernels::mat_vec_mul<<<forwardGridSize, BLOCK_SIZE>>>(
|
Kernels::mat_vec_mul<<<forwardGridSize, BLOCK_SIZE>>>(
|
||||||
d_weights, d_input, d_output, inputSize, outputSize
|
d_weights, d_input, d_output, inputSize, outputSize
|
||||||
);
|
);
|
||||||
CUDA_CHECK(cudaPeekAtLastError());
|
CUDA_CHECK(cudaGetLastError());
|
||||||
|
|
||||||
Kernels::vec_vec_add<<<biasGridSize, BLOCK_SIZE>>>(
|
Kernels::vec_vec_add<<<biasGridSize, BLOCK_SIZE>>>(
|
||||||
d_biases, d_output, d_output, outputSize
|
d_biases, d_output, d_output, outputSize
|
||||||
);
|
);
|
||||||
CUDA_CHECK(cudaPeekAtLastError());
|
CUDA_CHECK(cudaGetLastError());
|
||||||
|
|
||||||
activation.activate(d_output);
|
activation.activate(d_output);
|
||||||
CUDA_CHECK(cudaDeviceSynchronize());
|
CUDA_CHECK(cudaDeviceSynchronize());
|
||||||
|
|||||||
@@ -66,5 +66,4 @@ TEST(AvgPoolingLayerTest, AvgPoolForwardTest) {
|
|||||||
}
|
}
|
||||||
|
|
||||||
cudaFree(d_input);
|
cudaFree(d_input);
|
||||||
cudaFree(d_output);
|
|
||||||
}
|
}
|
||||||
|
|||||||
@@ -4,6 +4,9 @@
|
|||||||
#include <vector>
|
#include <vector>
|
||||||
|
|
||||||
TEST(ConcatLayerTest, Init) {
|
TEST(ConcatLayerTest, Init) {
|
||||||
|
|
||||||
|
cudaError_t cudaStatus;
|
||||||
|
|
||||||
std::vector<float> inputA = {0.573f, 0.619f, 0.732f, 0.055f, 0.243f};
|
std::vector<float> inputA = {0.573f, 0.619f, 0.732f, 0.055f, 0.243f};
|
||||||
std::vector<float> inputB = {0.123f, 0.321f, 0.456f, 0.789f, 0.654f, 0.123f};
|
std::vector<float> inputB = {0.123f, 0.321f, 0.456f, 0.789f, 0.654f, 0.123f};
|
||||||
|
|
||||||
@@ -11,27 +14,41 @@ TEST(ConcatLayerTest, Init) {
|
|||||||
|
|
||||||
float* d_inputA;
|
float* d_inputA;
|
||||||
float* d_inputB;
|
float* d_inputB;
|
||||||
cudaMalloc((void**)&d_inputA, sizeof(float) * 5);
|
|
||||||
cudaMalloc((void**)&d_inputB, sizeof(float) * 6);
|
cudaStatus = cudaMalloc((void**)&d_inputA, sizeof(float) * 5);
|
||||||
cudaMemcpy(
|
EXPECT_EQ(cudaStatus, cudaSuccess);
|
||||||
|
|
||||||
|
cudaStatus = cudaMalloc((void**)&d_inputB, sizeof(float) * 6);
|
||||||
|
EXPECT_EQ(cudaStatus, cudaSuccess);
|
||||||
|
|
||||||
|
cudaStatus = cudaMemcpy(
|
||||||
d_inputA, inputA.data(), sizeof(float) * 5, cudaMemcpyHostToDevice
|
d_inputA, inputA.data(), sizeof(float) * 5, cudaMemcpyHostToDevice
|
||||||
);
|
);
|
||||||
cudaMemcpy(
|
EXPECT_EQ(cudaStatus, cudaSuccess);
|
||||||
|
|
||||||
|
cudaStatus = cudaMemcpy(
|
||||||
d_inputB, inputB.data(), sizeof(float) * 6, cudaMemcpyHostToDevice
|
d_inputB, inputB.data(), sizeof(float) * 6, cudaMemcpyHostToDevice
|
||||||
);
|
);
|
||||||
|
EXPECT_EQ(cudaStatus, cudaSuccess);
|
||||||
|
|
||||||
float* d_output = concat.forward(d_inputA, d_inputB);
|
float* d_output = concat.forward(d_inputA, d_inputB);
|
||||||
|
|
||||||
std::vector<float> output(11);
|
std::vector<float> output(11);
|
||||||
cudaMemcpy(
|
cudaStatus = cudaMemcpy(
|
||||||
output.data(), d_output, sizeof(float) * 11, cudaMemcpyDeviceToHost
|
output.data(), d_output, sizeof(float) * 11, cudaMemcpyDeviceToHost
|
||||||
);
|
);
|
||||||
|
EXPECT_EQ(cudaStatus, cudaSuccess);
|
||||||
|
|
||||||
for (int i = 0; i < 5; ++i) {
|
for (int i = 0; i < 5; ++i) {
|
||||||
EXPECT_EQ(output[i], inputA[i]);
|
EXPECT_EQ(output[i], inputA[i]);
|
||||||
}
|
}
|
||||||
|
|
||||||
for (int i = 0; i < 6; ++i) {
|
for (int i = 0; i < 6; ++i) {
|
||||||
EXPECT_EQ(output[i + 5], inputB[i]);
|
EXPECT_EQ(output[i + 5], inputB[i]);
|
||||||
}
|
}
|
||||||
cudaFree(d_output);
|
|
||||||
|
cudaStatus = cudaFree(d_inputA);
|
||||||
|
EXPECT_EQ(cudaStatus, cudaSuccess);
|
||||||
|
cudaStatus = cudaFree(d_inputB);
|
||||||
|
EXPECT_EQ(cudaStatus, cudaSuccess);
|
||||||
}
|
}
|
||||||
Reference in New Issue
Block a user