Add toplevel CUDANet namespace

This commit is contained in:
2024-03-17 16:08:53 +01:00
parent dc86cddeb7
commit 0c22fac64e
19 changed files with 183 additions and 149 deletions

View File

@@ -25,7 +25,7 @@ TEST(ActivationsTest, SigmoidSanityCheck) {
cudaStatus = cudaMemcpy(d_input, input, sizeof(float) * 3, cudaMemcpyHostToDevice);
EXPECT_EQ(cudaStatus, cudaSuccess);
Kernels::sigmoid<<<1, 3>>>(d_input, d_output, 3);
CUDANet::Kernels::sigmoid<<<1, 3>>>(d_input, d_output, 3);
cudaStatus = cudaDeviceSynchronize();
EXPECT_EQ(cudaStatus, cudaSuccess);

View File

@@ -44,7 +44,7 @@ TEST(MatMulTest, MatVecMulTest) {
int THREADS_PER_BLOCK = std::max(w, h);
int BLOCKS = 1;
Kernels::mat_vec_mul<<<BLOCKS, THREADS_PER_BLOCK, sizeof(float) * w>>>(d_matrix, d_vector, d_output, w, h);
CUDANet::Kernels::mat_vec_mul<<<BLOCKS, THREADS_PER_BLOCK, sizeof(float) * w>>>(d_matrix, d_vector, d_output, w, h);
cudaStatus = cudaDeviceSynchronize();
EXPECT_EQ(cudaStatus, cudaSuccess);

View File

@@ -51,7 +51,7 @@ TEST(PaddingTest, SimplePaddingTest) {
int THREADS_PER_BLOCK = 64;
int BLOCKS = paddedSize / THREADS_PER_BLOCK + 1;
Kernels::padding<<<BLOCKS, THREADS_PER_BLOCK>>>(
CUDANet::Kernels::padding<<<BLOCKS, THREADS_PER_BLOCK>>>(
d_input, d_padded, w, h, n, p
);
cudaStatus = cudaDeviceSynchronize();