Add toplevel CUDANet namespace

This commit is contained in:
2024-03-17 16:08:53 +01:00
parent dc86cddeb7
commit 0c22fac64e
19 changed files with 183 additions and 149 deletions

View File

@@ -1,7 +1,7 @@
#ifndef CUDANET_ACTIVATIONS_H #ifndef CUDANET_ACTIVATIONS_H
#define CUDANET_ACTIVATIONS_H #define CUDANET_ACTIVATIONS_H
namespace Kernels { namespace CUDANet::Kernels {
/** /**
* @brief Sigmoid activation function kernel * @brief Sigmoid activation function kernel
@@ -23,6 +23,6 @@ sigmoid(const float* __restrict__ src, float* __restrict__ dst, int len);
__global__ void __global__ void
relu(const float* __restrict__ src, float* __restrict__ dst, int len); relu(const float* __restrict__ src, float* __restrict__ dst, int len);
} // namespace Kernels } // namespace CUDANet::Kernels
#endif // CUDANET_ACTIVATIONS_H #endif // CUDANET_ACTIVATIONS_H

View File

@@ -1,11 +1,11 @@
#ifndef CUDANET_CONVOLUTION_H #ifndef CUDANET_CONVOLUTION_H
#define CUDANET_CONVOLUTION_H #define CUDANET_CONVOLUTION_H
namespace Kernels { namespace CUDANet::Kernels {
/** /**
* @brief Kernel that pads the input matrix with zeros * @brief Kernel that pads the input matrix with zeros
* *
* @param d_input Device pointer to the input matrix (as vector) * @param d_input Device pointer to the input matrix (as vector)
* @param d_padded Device pointer to the padded matrix (as vector) * @param d_padded Device pointer to the padded matrix (as vector)
* @param w Width of the input matrix * @param w Width of the input matrix
@@ -14,17 +14,17 @@ namespace Kernels {
* @param p Padding size * @param p Padding size
*/ */
__global__ void padding( __global__ void padding(
const float* d_input, const float* __restrict__ d_input,
float* d_padded, float* __restrict__ d_padded,
int w, const unsigned int w,
int h, const unsigned int h,
int n, const unsigned int n,
int p const unsigned int p
); );
/** /**
* @brief Convolution kernel * @brief Convolution kernel
* *
* @param d_input Device pointer to the input matrix * @param d_input Device pointer to the input matrix
* @param d_kernel Device pointer to the convolution kernel * @param d_kernel Device pointer to the convolution kernel
* @param d_output Device pointer to the output matrix * @param d_output Device pointer to the output matrix
@@ -36,17 +36,17 @@ __global__ void padding(
* @param outputSize Width and height of the output matrix * @param outputSize Width and height of the output matrix
*/ */
__global__ void convolution( __global__ void convolution(
const float* d_input, const float* __restrict__ d_input,
const float* d_kernel, const float* __restrict__ d_kernel,
float* d_output, float* __restrict__ d_output,
int inputSize, const unsigned int inputSize,
int nChannels, const unsigned int nChannels,
int kernelSize, const unsigned int kernelSize,
int stride, const unsigned int stride,
int nFilters, const unsigned int nFilters,
int outputSize const unsigned int outputSize
); );
} // namespace Kernels } // namespace CUDANet::Kernels
#endif // CUDANET_CONVOLUTION_H #endif // CUDANET_CONVOLUTION_H

View File

@@ -1,11 +1,11 @@
#ifndef CUDANET_MATMUL_H #ifndef CUDANET_MATMUL_H
#define CUDANET_MATMUL_H #define CUDANET_MATMUL_H
namespace Kernels { namespace CUDANet::Kernels {
/** /**
* @brief Matrix vector multiplication kernel * @brief Matrix vector multiplication kernel
* *
* @param d_matrix Device pointer to matrix * @param d_matrix Device pointer to matrix
* @param d_vector Device pointer to vector * @param d_vector Device pointer to vector
* @param d_output Device pointer to output vector * @param d_output Device pointer to output vector
@@ -13,28 +13,41 @@ namespace Kernels {
* @param h Height of the matrix * @param h Height of the matrix
*/ */
__global__ void mat_vec_mul( __global__ void mat_vec_mul(
const float* d_matrix, const float* __restrict__ d_matrix,
const float* d_vector, const float* __restrict__ d_vector,
float* d_output, float* __restrict__ d_output,
int w, const unsigned int w,
int h const unsigned int h
); );
/** /**
* @brief Vector vector addition kernel * @brief Vector vector addition kernel
* *
* @param d_vector1 Device pointer to first vector * @param d_vector1 Device pointer to first vector
* @param d_vector2 Device pointer to second vector * @param d_vector2 Device pointer to second vector
* @param d_output Device pointer to output vector * @param d_output Device pointer to output vector
* @param w Length of the vectors * @param w Length of the vectors
*/ */
__global__ void vec_vec_add( __global__ void vec_vec_add(
const float* d_vector1, const float* __restrict__ d_vector1,
const float* d_vector2, const float* __restrict__ d_vector2,
float* d_output, float* __restrict__ d_output,
int w const unsigned int w
); );
} // namespace Kernels /**
* @brief
*
* @param d_vector Device pointer to vector
* @param d_output Device pointer to output vector
* @param w Length of the vector
*/
__global__ void reduce_sum(
const float* __restrict__ d_vector,
float* __restrict__ d_output,
const unsigned int w
);
} // namespace CUDANet::Kernels
#endif // CUDANET_MATMUL_H #endif // CUDANET_MATMUL_H

View File

@@ -8,7 +8,7 @@
#include "convolution.cuh" #include "convolution.cuh"
#include "ilayer.cuh" #include "ilayer.cuh"
namespace Layers { namespace CUDANet::Layers {
/** /**
* @brief 2D convolutional layer * @brief 2D convolutional layer
@@ -125,6 +125,6 @@ class Conv2d : public ILayer {
void toCuda(); void toCuda();
}; };
} // namespace Layers } // namespace CUDANet::Layers
#endif // CUDANET_CONV_LAYER_H #endif // CUDANET_CONV_LAYER_H

View File

@@ -7,7 +7,7 @@
#include "ilayer.cuh" #include "ilayer.cuh"
namespace Layers { namespace CUDANet::Layers {
/** /**
* @brief Dense (fully connected) layer * @brief Dense (fully connected) layer
@@ -53,8 +53,8 @@ class Dense : public ILayer {
void setBiases(const float* biases); void setBiases(const float* biases);
private: private:
int inputSize; unsigned int inputSize;
int outputSize; unsigned int outputSize;
float* d_output; float* d_output;
@@ -67,8 +67,8 @@ class Dense : public ILayer {
Layers::Activation activation; Layers::Activation activation;
// Precompute kernel launch parameters // Precompute kernel launch parameters
int forwardGridSize; unsigned int forwardGridSize;
int biasGridSize; unsigned int biasGridSize;
/** /**
* @brief Initialize the weights to zeros * @brief Initialize the weights to zeros
@@ -89,6 +89,6 @@ class Dense : public ILayer {
void toCuda(); void toCuda();
}; };
} // namespace Layers } // namespace CUDANet::Layers
#endif // CUDANET_DENSE_LAYER_H #endif // CUDANET_DENSE_LAYER_H

View File

@@ -4,7 +4,7 @@
#include <vector> #include <vector>
namespace Layers { namespace CUDANet::Layers {
/** /**
* @brief Activation functions * @brief Activation functions
@@ -88,6 +88,6 @@ class ILayer {
Layers::Activation activation; Layers::Activation activation;
}; };
} // namespace Layers } // namespace CUDANet::Layers
#endif // CUDANET_I_LAYERH #endif // CUDANET_I_LAYERH

View File

@@ -3,7 +3,7 @@
#include <ilayer.cuh> #include <ilayer.cuh>
namespace Layers { namespace CUDANet::Layers {
/** /**
* @brief Input layer, just copies the input to the device * @brief Input layer, just copies the input to the device
@@ -45,6 +45,6 @@ class Input : public ILayer {
float* d_output; float* d_output;
}; };
} // namespace Layers } // namespace CUDANet::Layers
#endif // CUDANET_INPUT_LAYER_H #endif // CUDANET_INPUT_LAYER_H

View File

@@ -2,7 +2,7 @@
#include "activations.cuh" #include "activations.cuh"
__global__ void Kernels::sigmoid( __global__ void CUDANet::Kernels::sigmoid(
const float* __restrict__ src, const float* __restrict__ src,
float* __restrict__ dst, float* __restrict__ dst,
int len int len
@@ -15,8 +15,11 @@ __global__ void Kernels::sigmoid(
} }
} }
__global__ void __global__ void CUDANet::Kernels::relu(
Kernels::relu(const float* __restrict__ src, float* __restrict__ dst, int len) { const float* __restrict__ src,
float* __restrict__ dst,
int len
) {
int stride = gridDim.x * blockDim.x; int stride = gridDim.x * blockDim.x;
int tid = blockDim.x * blockIdx.x + threadIdx.x; int tid = blockDim.x * blockIdx.x + threadIdx.x;

View File

@@ -1,6 +1,7 @@
#include "convolution.cuh"
#include <iostream> #include <iostream>
#include "convolution.cuh"
/* /*
Pads matrix width x height x n_channels to width + 2 * padding x height + 2 * Pads matrix width x height x n_channels to width + 2 * padding x height + 2 *
padding x n_channels Matrix is represented as a pointer to a vector padding x n_channels Matrix is represented as a pointer to a vector
@@ -47,13 +48,13 @@ pre-allocated)
n: Number of channels in input matrix n: Number of channels in input matrix
p: Padding p: Padding
*/ */
__global__ void Kernels::padding( __global__ void CUDANet::Kernels::padding(
const float* d_input, const float* __restrict__ d_input,
float* d_padded, float* __restrict__ d_padded,
int w, const unsigned int w,
int h, const unsigned int h,
int n, const unsigned int n,
int p const unsigned int p
) { ) {
int tid = blockDim.x * blockIdx.x + threadIdx.x; int tid = blockDim.x * blockIdx.x + threadIdx.x;
@@ -78,16 +79,16 @@ __global__ void Kernels::padding(
} }
} }
__global__ void Kernels::convolution( __global__ void CUDANet::Kernels::convolution(
const float* d_input, const float* __restrict__ d_input,
const float* d_kernel, const float* __restrict__ d_kernel,
float* d_output, float* __restrict__ d_output,
int inputSize, const unsigned int inputSize,
int nChannels, const unsigned int nChannels,
int kernelSize, const unsigned int kernelSize,
int stride, const unsigned int stride,
int nFilters, const unsigned int nFilters,
int outputSize const unsigned int outputSize
) { ) {
int tid = blockDim.x * blockIdx.x + threadIdx.x; int tid = blockDim.x * blockIdx.x + threadIdx.x;

View File

@@ -1,14 +1,12 @@
#include "cuda_helper.cuh" #include "cuda_helper.cuh"
#include "matmul.cuh" #include "matmul.cuh"
#define SHARED_SIZE 128 * 4 __global__ void CUDANet::Kernels::mat_vec_mul(
__global__ void Kernels::mat_vec_mul(
const float* __restrict__ d_matrix, const float* __restrict__ d_matrix,
const float* __restrict__ d_vector, const float* __restrict__ d_vector,
float* __restrict__ d_output, float* __restrict__ d_output,
int w, const unsigned int w,
int h const unsigned int h
) { ) {
int tid = blockDim.x * blockIdx.x + threadIdx.x; int tid = blockDim.x * blockIdx.x + threadIdx.x;
@@ -16,9 +14,8 @@ __global__ void Kernels::mat_vec_mul(
float temp = 0.0f; float temp = 0.0f;
#pragma unroll #pragma unroll
for (unsigned int i = 0; i < (w + BLOCK_SIZE - 1) / BLOCK_SIZE; i++) for (unsigned int i = 0; i < (w + BLOCK_SIZE - 1) / BLOCK_SIZE; i++) {
{
if (i * BLOCK_SIZE + threadIdx.x < w) { if (i * BLOCK_SIZE + threadIdx.x < w) {
shared[threadIdx.x] = d_vector[i * BLOCK_SIZE + threadIdx.x]; shared[threadIdx.x] = d_vector[i * BLOCK_SIZE + threadIdx.x];
} else { } else {
@@ -27,22 +24,22 @@ __global__ void Kernels::mat_vec_mul(
__syncthreads(); __syncthreads();
for (unsigned int j = 0; j < BLOCK_SIZE; j++) #pragma unroll
{ for (unsigned int j = 0; j < BLOCK_SIZE; j++) {
temp += d_matrix[tid * w + i * BLOCK_SIZE + j] * shared[j]; temp += d_matrix[tid * w + i * BLOCK_SIZE + j] * shared[j];
} }
__syncthreads(); __syncthreads();
} }
d_output[tid] = temp; d_output[tid] = temp;
} }
__global__ void Kernels::vec_vec_add( __global__ void CUDANet::Kernels::vec_vec_add(
const float* d_vector1, const float* __restrict__ d_vector1,
const float* d_vector2, const float* __restrict__ d_vector2,
float* d_output, float* __restrict__ d_output,
int w const unsigned int w
) { ) {
int tid = blockDim.x * blockIdx.x + threadIdx.x; int tid = blockDim.x * blockIdx.x + threadIdx.x;
if (tid >= w) { if (tid >= w) {
@@ -50,3 +47,15 @@ __global__ void Kernels::vec_vec_add(
} }
d_output[tid] = d_vector1[tid] + d_vector2[tid]; d_output[tid] = d_vector1[tid] + d_vector2[tid];
} }
__global__ void CUDANet::Kernels::reduce_sum(
const float* __restrict__ d_vector,
float* __restrict__ d_output,
const unsigned int w
) {
int tid = blockDim.x * blockIdx.x + threadIdx.x;
__shared__ float shared[BLOCK_SIZE];
shared[threadIdx.x] = d_vector[tid];
__syncthreads();
}

View File

@@ -7,6 +7,8 @@
#include "cuda_helper.cuh" #include "cuda_helper.cuh"
#include "matmul.cuh" #include "matmul.cuh"
using namespace CUDANet;
Layers::Conv2d::Conv2d( Layers::Conv2d::Conv2d(
int inputSize, int inputSize,
int inputChannels, int inputChannels,

View File

@@ -10,6 +10,8 @@
#include "dense.cuh" #include "dense.cuh"
#include "matmul.cuh" #include "matmul.cuh"
using namespace CUDANet;
Layers::Dense::Dense( Layers::Dense::Dense(
int inputSize, int inputSize,
int outputSize, int outputSize,

View File

@@ -1,6 +1,8 @@
#include "cuda_helper.cuh" #include "cuda_helper.cuh"
#include "input.cuh" #include "input.cuh"
using namespace CUDANet;
Layers::Input::Input(int inputSize) : inputSize(inputSize) { Layers::Input::Input(int inputSize) : inputSize(inputSize) {
d_output = nullptr; d_output = nullptr;
CUDA_CHECK(cudaMalloc((void**)&d_output, sizeof(float) * inputSize)); CUDA_CHECK(cudaMalloc((void**)&d_output, sizeof(float) * inputSize));

View File

@@ -25,7 +25,7 @@ TEST(ActivationsTest, SigmoidSanityCheck) {
cudaStatus = cudaMemcpy(d_input, input, sizeof(float) * 3, cudaMemcpyHostToDevice); cudaStatus = cudaMemcpy(d_input, input, sizeof(float) * 3, cudaMemcpyHostToDevice);
EXPECT_EQ(cudaStatus, cudaSuccess); EXPECT_EQ(cudaStatus, cudaSuccess);
Kernels::sigmoid<<<1, 3>>>(d_input, d_output, 3); CUDANet::Kernels::sigmoid<<<1, 3>>>(d_input, d_output, 3);
cudaStatus = cudaDeviceSynchronize(); cudaStatus = cudaDeviceSynchronize();
EXPECT_EQ(cudaStatus, cudaSuccess); EXPECT_EQ(cudaStatus, cudaSuccess);

View File

@@ -44,7 +44,7 @@ TEST(MatMulTest, MatVecMulTest) {
int THREADS_PER_BLOCK = std::max(w, h); int THREADS_PER_BLOCK = std::max(w, h);
int BLOCKS = 1; int BLOCKS = 1;
Kernels::mat_vec_mul<<<BLOCKS, THREADS_PER_BLOCK, sizeof(float) * w>>>(d_matrix, d_vector, d_output, w, h); CUDANet::Kernels::mat_vec_mul<<<BLOCKS, THREADS_PER_BLOCK, sizeof(float) * w>>>(d_matrix, d_vector, d_output, w, h);
cudaStatus = cudaDeviceSynchronize(); cudaStatus = cudaDeviceSynchronize();
EXPECT_EQ(cudaStatus, cudaSuccess); EXPECT_EQ(cudaStatus, cudaSuccess);

View File

@@ -51,7 +51,7 @@ TEST(PaddingTest, SimplePaddingTest) {
int THREADS_PER_BLOCK = 64; int THREADS_PER_BLOCK = 64;
int BLOCKS = paddedSize / THREADS_PER_BLOCK + 1; int BLOCKS = paddedSize / THREADS_PER_BLOCK + 1;
Kernels::padding<<<BLOCKS, THREADS_PER_BLOCK>>>( CUDANet::Kernels::padding<<<BLOCKS, THREADS_PER_BLOCK>>>(
d_input, d_padded, w, h, n, p d_input, d_padded, w, h, n, p
); );
cudaStatus = cudaDeviceSynchronize(); cudaStatus = cudaDeviceSynchronize();

View File

@@ -7,20 +7,20 @@
class Conv2dTest : public ::testing::Test { class Conv2dTest : public ::testing::Test {
protected: protected:
Layers::Conv2d commonTestSetup( CUDANet::Layers::Conv2d commonTestSetup(
int inputSize, int inputSize,
int inputChannels, int inputChannels,
int kernelSize, int kernelSize,
int stride, int stride,
Layers::Padding padding, CUDANet::Layers::Padding padding,
int numFilters, int numFilters,
Layers::Activation activation, CUDANet::Layers::Activation activation,
std::vector<float>& input, std::vector<float>& input,
float* kernels, float* kernels,
float*& d_input float*& d_input
) { ) {
// Create Conv2d layer // Create Conv2d layer
Layers::Conv2d conv2d( CUDANet::Layers::Conv2d conv2d(
inputSize, inputChannels, kernelSize, stride, padding, numFilters, inputSize, inputChannels, kernelSize, stride, padding, numFilters,
activation activation
); );
@@ -53,13 +53,13 @@ class Conv2dTest : public ::testing::Test {
}; };
TEST_F(Conv2dTest, SimpleTest) { TEST_F(Conv2dTest, SimpleTest) {
int inputSize = 4; int inputSize = 4;
int inputChannels = 1; int inputChannels = 1;
int kernelSize = 2; int kernelSize = 2;
int stride = 1; int stride = 1;
Layers::Padding padding = Layers::Padding::VALID; CUDANet::Layers::Padding padding = CUDANet::Layers::Padding::VALID;
int numFilters = 1; int numFilters = 1;
Layers::Activation activation = Layers::Activation::NONE; CUDANet::Layers::Activation activation = CUDANet::Layers::Activation::NONE;
std::vector<float> input = {1.0f, 2.0f, 3.0f, 4.0f, 5.0f, 6.0f, std::vector<float> input = {1.0f, 2.0f, 3.0f, 4.0f, 5.0f, 6.0f,
7.0f, 8.0f, 9.0f, 10.0f, 11.0f, 12.0f, 7.0f, 8.0f, 9.0f, 10.0f, 11.0f, 12.0f,
@@ -74,7 +74,7 @@ TEST_F(Conv2dTest, SimpleTest) {
float* d_input; float* d_input;
float* d_output; float* d_output;
Layers::Conv2d conv2d = commonTestSetup( CUDANet::Layers::Conv2d conv2d = commonTestSetup(
inputSize, inputChannels, kernelSize, stride, padding, numFilters, inputSize, inputChannels, kernelSize, stride, padding, numFilters,
activation, input, kernels.data(), d_input activation, input, kernels.data(), d_input
); );
@@ -102,13 +102,13 @@ TEST_F(Conv2dTest, SimpleTest) {
} }
TEST_F(Conv2dTest, PaddedTest) { TEST_F(Conv2dTest, PaddedTest) {
int inputSize = 5; int inputSize = 5;
int inputChannels = 3; int inputChannels = 3;
int kernelSize = 3; int kernelSize = 3;
int stride = 1; int stride = 1;
Layers::Padding padding = Layers::Padding::SAME; CUDANet::Layers::Padding padding = CUDANet::Layers::Padding::SAME;
int numFilters = 2; int numFilters = 2;
Layers::Activation activation = Layers::Activation::NONE; CUDANet::Layers::Activation activation = CUDANet::Layers::Activation::NONE;
// clang-format off // clang-format off
std::vector<float> input = { std::vector<float> input = {
@@ -163,7 +163,7 @@ TEST_F(Conv2dTest, PaddedTest) {
float* d_input; float* d_input;
float* d_output; float* d_output;
Layers::Conv2d conv2d = commonTestSetup( CUDANet::Layers::Conv2d conv2d = commonTestSetup(
inputSize, inputChannels, kernelSize, stride, padding, numFilters, inputSize, inputChannels, kernelSize, stride, padding, numFilters,
activation, input, kernels.data(), d_input activation, input, kernels.data(), d_input
); );
@@ -177,7 +177,8 @@ TEST_F(Conv2dTest, PaddedTest) {
); );
cudaMemcpy( cudaMemcpy(
output.data(), d_output, output.data(), d_output,
sizeof(float) * conv2d.getOutputSize() * conv2d.getOutputSize() * numFilters, sizeof(float) * conv2d.getOutputSize() * conv2d.getOutputSize() *
numFilters,
cudaMemcpyDeviceToHost cudaMemcpyDeviceToHost
); );
@@ -202,13 +203,13 @@ TEST_F(Conv2dTest, PaddedTest) {
} }
TEST_F(Conv2dTest, StridedPaddedConvolution) { TEST_F(Conv2dTest, StridedPaddedConvolution) {
int inputSize = 5; int inputSize = 5;
int inputChannels = 2; int inputChannels = 2;
int kernelSize = 3; int kernelSize = 3;
int stride = 2; int stride = 2;
int numFilters = 2; int numFilters = 2;
Layers::Padding padding = Layers::Padding::SAME; CUDANet::Layers::Padding padding = CUDANet::Layers::Padding::SAME;
Layers::Activation activation = Layers::Activation::RELU; CUDANet::Layers::Activation activation = CUDANet::Layers::Activation::RELU;
// clang-format off // clang-format off
std::vector<float> input = { std::vector<float> input = {
@@ -248,7 +249,7 @@ TEST_F(Conv2dTest, StridedPaddedConvolution) {
float* d_input; float* d_input;
float* d_output; float* d_output;
Layers::Conv2d conv2d = commonTestSetup( CUDANet::Layers::Conv2d conv2d = commonTestSetup(
inputSize, inputChannels, kernelSize, stride, padding, numFilters, inputSize, inputChannels, kernelSize, stride, padding, numFilters,
activation, input, kernels.data(), d_input activation, input, kernels.data(), d_input
); );
@@ -262,7 +263,8 @@ TEST_F(Conv2dTest, StridedPaddedConvolution) {
); );
cudaMemcpy( cudaMemcpy(
output.data(), d_output, output.data(), d_output,
sizeof(float) * conv2d.getOutputSize() * conv2d.getOutputSize() * numFilters, sizeof(float) * conv2d.getOutputSize() * conv2d.getOutputSize() *
numFilters,
cudaMemcpyDeviceToHost cudaMemcpyDeviceToHost
); );

View File

@@ -8,17 +8,17 @@
class DenseLayerTest : public ::testing::Test { class DenseLayerTest : public ::testing::Test {
protected: protected:
Layers::Dense commonTestSetup( CUDANet::Layers::Dense commonTestSetup(
int inputSize, int inputSize,
int outputSize, int outputSize,
std::vector<float>& input, std::vector<float>& input,
float* weights, float* weights,
float* biases, float* biases,
float*& d_input, float*& d_input,
Layers::Activation activation CUDANet::Layers::Activation activation
) { ) {
// Create Dense layer // Create Dense layer
Layers::Dense denseLayer(inputSize, outputSize, activation); CUDANet::Layers::Dense denseLayer(inputSize, outputSize, activation);
// Set weights and biases // Set weights and biases
denseLayer.setWeights(weights); denseLayer.setWeights(weights);
@@ -52,8 +52,8 @@ TEST_F(DenseLayerTest, Init) {
int inputSize = i; int inputSize = i;
int outputSize = j; int outputSize = j;
Layers::Dense denseLayer( CUDANet::Layers::Dense denseLayer(
inputSize, outputSize, Layers::Activation::SIGMOID inputSize, outputSize, CUDANet::Layers::Activation::SIGMOID
); );
} }
} }
@@ -73,8 +73,8 @@ TEST_F(DenseLayerTest, setWeights) {
}; };
// clang-format on // clang-format on
Layers::Dense denseLayer( CUDANet::Layers::Dense denseLayer(
inputSize, outputSize, Layers::Activation::SIGMOID inputSize, outputSize, CUDANet::Layers::Activation::SIGMOID
); );
denseLayer.setWeights(weights.data()); denseLayer.setWeights(weights.data());
@@ -99,9 +99,9 @@ TEST_F(DenseLayerTest, ForwardUnitWeightMatrixLinear) {
float* d_input; float* d_input;
float* d_output; float* d_output;
Layers::Dense denseLayer = commonTestSetup( CUDANet::Layers::Dense denseLayer = commonTestSetup(
inputSize, outputSize, input, weights.data(), biases.data(), d_input, inputSize, outputSize, input, weights.data(), biases.data(), d_input,
Layers::Activation::NONE CUDANet::Layers::Activation::NONE
); );
d_output = denseLayer.forward(d_input); d_output = denseLayer.forward(d_input);
@@ -140,9 +140,9 @@ TEST_F(DenseLayerTest, ForwardRandomWeightMatrixRelu) {
float* d_input; float* d_input;
float* d_output; float* d_output;
Layers::Dense denseLayer = commonTestSetup( CUDANet::Layers::Dense denseLayer = commonTestSetup(
inputSize, outputSize, input, weights.data(), biases.data(), d_input, inputSize, outputSize, input, weights.data(), biases.data(), d_input,
Layers::Activation::RELU CUDANet::Layers::Activation::RELU
); );
d_output = denseLayer.forward(d_input); d_output = denseLayer.forward(d_input);
@@ -185,9 +185,9 @@ TEST_F(DenseLayerTest, ForwardRandomWeightMatrixSigmoid) {
float* d_input; float* d_input;
float* d_output; float* d_output;
Layers::Dense denseLayer = commonTestSetup( CUDANet::Layers::Dense denseLayer = commonTestSetup(
inputSize, outputSize, input, weights.data(), biases.data(), d_input, inputSize, outputSize, input, weights.data(), biases.data(), d_input,
Layers::Activation::SIGMOID CUDANet::Layers::Activation::SIGMOID
); );
d_output = denseLayer.forward(d_input); d_output = denseLayer.forward(d_input);

View File

@@ -1,16 +1,16 @@
#include <gtest/gtest.h> #include <gtest/gtest.h>
#include "input.cuh"
#include "cuda_helper.cuh" #include "cuda_helper.cuh"
#include "input.cuh"
TEST(InputLayerTest, Init) { TEST(InputLayerTest, Init) {
std::vector<float> input = {0.573f, 0.619f, 0.732f, 0.055f, 0.243f, 0.316f}; std::vector<float> input = {0.573f, 0.619f, 0.732f, 0.055f, 0.243f, 0.316f};
Layers::Input inputLayer(6); CUDANet::Layers::Input inputLayer(6);
float* d_output = inputLayer.forward(input.data()); float* d_output = inputLayer.forward(input.data());
std::vector<float> output(6); std::vector<float> output(6);
CUDA_CHECK(cudaMemcpy(output.data(), d_output, sizeof(float) * 6, cudaMemcpyDeviceToHost)); CUDA_CHECK(cudaMemcpy(
output.data(), d_output, sizeof(float) * 6, cudaMemcpyDeviceToHost
));
EXPECT_EQ(input, output); EXPECT_EQ(input, output);
} }