mirror of
https://github.com/lordmathis/CUDANet.git
synced 2025-11-06 09:44:28 +00:00
Add toplevel CUDANet namespace
This commit is contained in:
@@ -1,7 +1,7 @@
|
|||||||
#ifndef CUDANET_ACTIVATIONS_H
|
#ifndef CUDANET_ACTIVATIONS_H
|
||||||
#define CUDANET_ACTIVATIONS_H
|
#define CUDANET_ACTIVATIONS_H
|
||||||
|
|
||||||
namespace Kernels {
|
namespace CUDANet::Kernels {
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* @brief Sigmoid activation function kernel
|
* @brief Sigmoid activation function kernel
|
||||||
@@ -23,6 +23,6 @@ sigmoid(const float* __restrict__ src, float* __restrict__ dst, int len);
|
|||||||
__global__ void
|
__global__ void
|
||||||
relu(const float* __restrict__ src, float* __restrict__ dst, int len);
|
relu(const float* __restrict__ src, float* __restrict__ dst, int len);
|
||||||
|
|
||||||
} // namespace Kernels
|
} // namespace CUDANet::Kernels
|
||||||
|
|
||||||
#endif // CUDANET_ACTIVATIONS_H
|
#endif // CUDANET_ACTIVATIONS_H
|
||||||
@@ -1,7 +1,7 @@
|
|||||||
#ifndef CUDANET_CONVOLUTION_H
|
#ifndef CUDANET_CONVOLUTION_H
|
||||||
#define CUDANET_CONVOLUTION_H
|
#define CUDANET_CONVOLUTION_H
|
||||||
|
|
||||||
namespace Kernels {
|
namespace CUDANet::Kernels {
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* @brief Kernel that pads the input matrix with zeros
|
* @brief Kernel that pads the input matrix with zeros
|
||||||
@@ -14,12 +14,12 @@ namespace Kernels {
|
|||||||
* @param p Padding size
|
* @param p Padding size
|
||||||
*/
|
*/
|
||||||
__global__ void padding(
|
__global__ void padding(
|
||||||
const float* d_input,
|
const float* __restrict__ d_input,
|
||||||
float* d_padded,
|
float* __restrict__ d_padded,
|
||||||
int w,
|
const unsigned int w,
|
||||||
int h,
|
const unsigned int h,
|
||||||
int n,
|
const unsigned int n,
|
||||||
int p
|
const unsigned int p
|
||||||
);
|
);
|
||||||
|
|
||||||
/**
|
/**
|
||||||
@@ -36,17 +36,17 @@ __global__ void padding(
|
|||||||
* @param outputSize Width and height of the output matrix
|
* @param outputSize Width and height of the output matrix
|
||||||
*/
|
*/
|
||||||
__global__ void convolution(
|
__global__ void convolution(
|
||||||
const float* d_input,
|
const float* __restrict__ d_input,
|
||||||
const float* d_kernel,
|
const float* __restrict__ d_kernel,
|
||||||
float* d_output,
|
float* __restrict__ d_output,
|
||||||
int inputSize,
|
const unsigned int inputSize,
|
||||||
int nChannels,
|
const unsigned int nChannels,
|
||||||
int kernelSize,
|
const unsigned int kernelSize,
|
||||||
int stride,
|
const unsigned int stride,
|
||||||
int nFilters,
|
const unsigned int nFilters,
|
||||||
int outputSize
|
const unsigned int outputSize
|
||||||
);
|
);
|
||||||
|
|
||||||
} // namespace Kernels
|
} // namespace CUDANet::Kernels
|
||||||
|
|
||||||
#endif // CUDANET_CONVOLUTION_H
|
#endif // CUDANET_CONVOLUTION_H
|
||||||
@@ -1,7 +1,7 @@
|
|||||||
#ifndef CUDANET_MATMUL_H
|
#ifndef CUDANET_MATMUL_H
|
||||||
#define CUDANET_MATMUL_H
|
#define CUDANET_MATMUL_H
|
||||||
|
|
||||||
namespace Kernels {
|
namespace CUDANet::Kernels {
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* @brief Matrix vector multiplication kernel
|
* @brief Matrix vector multiplication kernel
|
||||||
@@ -13,11 +13,11 @@ namespace Kernels {
|
|||||||
* @param h Height of the matrix
|
* @param h Height of the matrix
|
||||||
*/
|
*/
|
||||||
__global__ void mat_vec_mul(
|
__global__ void mat_vec_mul(
|
||||||
const float* d_matrix,
|
const float* __restrict__ d_matrix,
|
||||||
const float* d_vector,
|
const float* __restrict__ d_vector,
|
||||||
float* d_output,
|
float* __restrict__ d_output,
|
||||||
int w,
|
const unsigned int w,
|
||||||
int h
|
const unsigned int h
|
||||||
);
|
);
|
||||||
|
|
||||||
/**
|
/**
|
||||||
@@ -29,12 +29,25 @@ __global__ void mat_vec_mul(
|
|||||||
* @param w Length of the vectors
|
* @param w Length of the vectors
|
||||||
*/
|
*/
|
||||||
__global__ void vec_vec_add(
|
__global__ void vec_vec_add(
|
||||||
const float* d_vector1,
|
const float* __restrict__ d_vector1,
|
||||||
const float* d_vector2,
|
const float* __restrict__ d_vector2,
|
||||||
float* d_output,
|
float* __restrict__ d_output,
|
||||||
int w
|
const unsigned int w
|
||||||
);
|
);
|
||||||
|
|
||||||
} // namespace Kernels
|
/**
|
||||||
|
* @brief
|
||||||
|
*
|
||||||
|
* @param d_vector Device pointer to vector
|
||||||
|
* @param d_output Device pointer to output vector
|
||||||
|
* @param w Length of the vector
|
||||||
|
*/
|
||||||
|
__global__ void reduce_sum(
|
||||||
|
const float* __restrict__ d_vector,
|
||||||
|
float* __restrict__ d_output,
|
||||||
|
const unsigned int w
|
||||||
|
);
|
||||||
|
|
||||||
|
} // namespace CUDANet::Kernels
|
||||||
|
|
||||||
#endif // CUDANET_MATMUL_H
|
#endif // CUDANET_MATMUL_H
|
||||||
@@ -8,7 +8,7 @@
|
|||||||
#include "convolution.cuh"
|
#include "convolution.cuh"
|
||||||
#include "ilayer.cuh"
|
#include "ilayer.cuh"
|
||||||
|
|
||||||
namespace Layers {
|
namespace CUDANet::Layers {
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* @brief 2D convolutional layer
|
* @brief 2D convolutional layer
|
||||||
@@ -125,6 +125,6 @@ class Conv2d : public ILayer {
|
|||||||
void toCuda();
|
void toCuda();
|
||||||
};
|
};
|
||||||
|
|
||||||
} // namespace Layers
|
} // namespace CUDANet::Layers
|
||||||
|
|
||||||
#endif // CUDANET_CONV_LAYER_H
|
#endif // CUDANET_CONV_LAYER_H
|
||||||
|
|||||||
@@ -7,7 +7,7 @@
|
|||||||
|
|
||||||
#include "ilayer.cuh"
|
#include "ilayer.cuh"
|
||||||
|
|
||||||
namespace Layers {
|
namespace CUDANet::Layers {
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* @brief Dense (fully connected) layer
|
* @brief Dense (fully connected) layer
|
||||||
@@ -53,8 +53,8 @@ class Dense : public ILayer {
|
|||||||
void setBiases(const float* biases);
|
void setBiases(const float* biases);
|
||||||
|
|
||||||
private:
|
private:
|
||||||
int inputSize;
|
unsigned int inputSize;
|
||||||
int outputSize;
|
unsigned int outputSize;
|
||||||
|
|
||||||
float* d_output;
|
float* d_output;
|
||||||
|
|
||||||
@@ -67,8 +67,8 @@ class Dense : public ILayer {
|
|||||||
Layers::Activation activation;
|
Layers::Activation activation;
|
||||||
|
|
||||||
// Precompute kernel launch parameters
|
// Precompute kernel launch parameters
|
||||||
int forwardGridSize;
|
unsigned int forwardGridSize;
|
||||||
int biasGridSize;
|
unsigned int biasGridSize;
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* @brief Initialize the weights to zeros
|
* @brief Initialize the weights to zeros
|
||||||
@@ -89,6 +89,6 @@ class Dense : public ILayer {
|
|||||||
void toCuda();
|
void toCuda();
|
||||||
};
|
};
|
||||||
|
|
||||||
} // namespace Layers
|
} // namespace CUDANet::Layers
|
||||||
|
|
||||||
#endif // CUDANET_DENSE_LAYER_H
|
#endif // CUDANET_DENSE_LAYER_H
|
||||||
|
|||||||
@@ -4,7 +4,7 @@
|
|||||||
|
|
||||||
#include <vector>
|
#include <vector>
|
||||||
|
|
||||||
namespace Layers {
|
namespace CUDANet::Layers {
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* @brief Activation functions
|
* @brief Activation functions
|
||||||
@@ -88,6 +88,6 @@ class ILayer {
|
|||||||
Layers::Activation activation;
|
Layers::Activation activation;
|
||||||
};
|
};
|
||||||
|
|
||||||
} // namespace Layers
|
} // namespace CUDANet::Layers
|
||||||
|
|
||||||
#endif // CUDANET_I_LAYERH
|
#endif // CUDANET_I_LAYERH
|
||||||
@@ -3,7 +3,7 @@
|
|||||||
|
|
||||||
#include <ilayer.cuh>
|
#include <ilayer.cuh>
|
||||||
|
|
||||||
namespace Layers {
|
namespace CUDANet::Layers {
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* @brief Input layer, just copies the input to the device
|
* @brief Input layer, just copies the input to the device
|
||||||
@@ -45,6 +45,6 @@ class Input : public ILayer {
|
|||||||
float* d_output;
|
float* d_output;
|
||||||
};
|
};
|
||||||
|
|
||||||
} // namespace Layers
|
} // namespace CUDANet::Layers
|
||||||
|
|
||||||
#endif // CUDANET_INPUT_LAYER_H
|
#endif // CUDANET_INPUT_LAYER_H
|
||||||
@@ -2,7 +2,7 @@
|
|||||||
|
|
||||||
#include "activations.cuh"
|
#include "activations.cuh"
|
||||||
|
|
||||||
__global__ void Kernels::sigmoid(
|
__global__ void CUDANet::Kernels::sigmoid(
|
||||||
const float* __restrict__ src,
|
const float* __restrict__ src,
|
||||||
float* __restrict__ dst,
|
float* __restrict__ dst,
|
||||||
int len
|
int len
|
||||||
@@ -15,8 +15,11 @@ __global__ void Kernels::sigmoid(
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
__global__ void
|
__global__ void CUDANet::Kernels::relu(
|
||||||
Kernels::relu(const float* __restrict__ src, float* __restrict__ dst, int len) {
|
const float* __restrict__ src,
|
||||||
|
float* __restrict__ dst,
|
||||||
|
int len
|
||||||
|
) {
|
||||||
int stride = gridDim.x * blockDim.x;
|
int stride = gridDim.x * blockDim.x;
|
||||||
int tid = blockDim.x * blockIdx.x + threadIdx.x;
|
int tid = blockDim.x * blockIdx.x + threadIdx.x;
|
||||||
|
|
||||||
|
|||||||
@@ -1,6 +1,7 @@
|
|||||||
#include "convolution.cuh"
|
|
||||||
#include <iostream>
|
#include <iostream>
|
||||||
|
|
||||||
|
#include "convolution.cuh"
|
||||||
|
|
||||||
/*
|
/*
|
||||||
Pads matrix width x height x n_channels to width + 2 * padding x height + 2 *
|
Pads matrix width x height x n_channels to width + 2 * padding x height + 2 *
|
||||||
padding x n_channels Matrix is represented as a pointer to a vector
|
padding x n_channels Matrix is represented as a pointer to a vector
|
||||||
@@ -47,13 +48,13 @@ pre-allocated)
|
|||||||
n: Number of channels in input matrix
|
n: Number of channels in input matrix
|
||||||
p: Padding
|
p: Padding
|
||||||
*/
|
*/
|
||||||
__global__ void Kernels::padding(
|
__global__ void CUDANet::Kernels::padding(
|
||||||
const float* d_input,
|
const float* __restrict__ d_input,
|
||||||
float* d_padded,
|
float* __restrict__ d_padded,
|
||||||
int w,
|
const unsigned int w,
|
||||||
int h,
|
const unsigned int h,
|
||||||
int n,
|
const unsigned int n,
|
||||||
int p
|
const unsigned int p
|
||||||
) {
|
) {
|
||||||
int tid = blockDim.x * blockIdx.x + threadIdx.x;
|
int tid = blockDim.x * blockIdx.x + threadIdx.x;
|
||||||
|
|
||||||
@@ -78,16 +79,16 @@ __global__ void Kernels::padding(
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
__global__ void Kernels::convolution(
|
__global__ void CUDANet::Kernels::convolution(
|
||||||
const float* d_input,
|
const float* __restrict__ d_input,
|
||||||
const float* d_kernel,
|
const float* __restrict__ d_kernel,
|
||||||
float* d_output,
|
float* __restrict__ d_output,
|
||||||
int inputSize,
|
const unsigned int inputSize,
|
||||||
int nChannels,
|
const unsigned int nChannels,
|
||||||
int kernelSize,
|
const unsigned int kernelSize,
|
||||||
int stride,
|
const unsigned int stride,
|
||||||
int nFilters,
|
const unsigned int nFilters,
|
||||||
int outputSize
|
const unsigned int outputSize
|
||||||
) {
|
) {
|
||||||
int tid = blockDim.x * blockIdx.x + threadIdx.x;
|
int tid = blockDim.x * blockIdx.x + threadIdx.x;
|
||||||
|
|
||||||
|
|||||||
@@ -1,14 +1,12 @@
|
|||||||
#include "cuda_helper.cuh"
|
#include "cuda_helper.cuh"
|
||||||
#include "matmul.cuh"
|
#include "matmul.cuh"
|
||||||
|
|
||||||
#define SHARED_SIZE 128 * 4
|
__global__ void CUDANet::Kernels::mat_vec_mul(
|
||||||
|
|
||||||
__global__ void Kernels::mat_vec_mul(
|
|
||||||
const float* __restrict__ d_matrix,
|
const float* __restrict__ d_matrix,
|
||||||
const float* __restrict__ d_vector,
|
const float* __restrict__ d_vector,
|
||||||
float* __restrict__ d_output,
|
float* __restrict__ d_output,
|
||||||
int w,
|
const unsigned int w,
|
||||||
int h
|
const unsigned int h
|
||||||
) {
|
) {
|
||||||
int tid = blockDim.x * blockIdx.x + threadIdx.x;
|
int tid = blockDim.x * blockIdx.x + threadIdx.x;
|
||||||
|
|
||||||
@@ -16,9 +14,8 @@ __global__ void Kernels::mat_vec_mul(
|
|||||||
|
|
||||||
float temp = 0.0f;
|
float temp = 0.0f;
|
||||||
|
|
||||||
#pragma unroll
|
#pragma unroll
|
||||||
for (unsigned int i = 0; i < (w + BLOCK_SIZE - 1) / BLOCK_SIZE; i++)
|
for (unsigned int i = 0; i < (w + BLOCK_SIZE - 1) / BLOCK_SIZE; i++) {
|
||||||
{
|
|
||||||
if (i * BLOCK_SIZE + threadIdx.x < w) {
|
if (i * BLOCK_SIZE + threadIdx.x < w) {
|
||||||
shared[threadIdx.x] = d_vector[i * BLOCK_SIZE + threadIdx.x];
|
shared[threadIdx.x] = d_vector[i * BLOCK_SIZE + threadIdx.x];
|
||||||
} else {
|
} else {
|
||||||
@@ -27,8 +24,8 @@ __global__ void Kernels::mat_vec_mul(
|
|||||||
|
|
||||||
__syncthreads();
|
__syncthreads();
|
||||||
|
|
||||||
for (unsigned int j = 0; j < BLOCK_SIZE; j++)
|
#pragma unroll
|
||||||
{
|
for (unsigned int j = 0; j < BLOCK_SIZE; j++) {
|
||||||
temp += d_matrix[tid * w + i * BLOCK_SIZE + j] * shared[j];
|
temp += d_matrix[tid * w + i * BLOCK_SIZE + j] * shared[j];
|
||||||
}
|
}
|
||||||
|
|
||||||
@@ -38,11 +35,11 @@ __global__ void Kernels::mat_vec_mul(
|
|||||||
d_output[tid] = temp;
|
d_output[tid] = temp;
|
||||||
}
|
}
|
||||||
|
|
||||||
__global__ void Kernels::vec_vec_add(
|
__global__ void CUDANet::Kernels::vec_vec_add(
|
||||||
const float* d_vector1,
|
const float* __restrict__ d_vector1,
|
||||||
const float* d_vector2,
|
const float* __restrict__ d_vector2,
|
||||||
float* d_output,
|
float* __restrict__ d_output,
|
||||||
int w
|
const unsigned int w
|
||||||
) {
|
) {
|
||||||
int tid = blockDim.x * blockIdx.x + threadIdx.x;
|
int tid = blockDim.x * blockIdx.x + threadIdx.x;
|
||||||
if (tid >= w) {
|
if (tid >= w) {
|
||||||
@@ -50,3 +47,15 @@ __global__ void Kernels::vec_vec_add(
|
|||||||
}
|
}
|
||||||
d_output[tid] = d_vector1[tid] + d_vector2[tid];
|
d_output[tid] = d_vector1[tid] + d_vector2[tid];
|
||||||
}
|
}
|
||||||
|
|
||||||
|
__global__ void CUDANet::Kernels::reduce_sum(
|
||||||
|
const float* __restrict__ d_vector,
|
||||||
|
float* __restrict__ d_output,
|
||||||
|
const unsigned int w
|
||||||
|
) {
|
||||||
|
int tid = blockDim.x * blockIdx.x + threadIdx.x;
|
||||||
|
|
||||||
|
__shared__ float shared[BLOCK_SIZE];
|
||||||
|
shared[threadIdx.x] = d_vector[tid];
|
||||||
|
__syncthreads();
|
||||||
|
}
|
||||||
@@ -7,6 +7,8 @@
|
|||||||
#include "cuda_helper.cuh"
|
#include "cuda_helper.cuh"
|
||||||
#include "matmul.cuh"
|
#include "matmul.cuh"
|
||||||
|
|
||||||
|
using namespace CUDANet;
|
||||||
|
|
||||||
Layers::Conv2d::Conv2d(
|
Layers::Conv2d::Conv2d(
|
||||||
int inputSize,
|
int inputSize,
|
||||||
int inputChannels,
|
int inputChannels,
|
||||||
|
|||||||
@@ -10,6 +10,8 @@
|
|||||||
#include "dense.cuh"
|
#include "dense.cuh"
|
||||||
#include "matmul.cuh"
|
#include "matmul.cuh"
|
||||||
|
|
||||||
|
using namespace CUDANet;
|
||||||
|
|
||||||
Layers::Dense::Dense(
|
Layers::Dense::Dense(
|
||||||
int inputSize,
|
int inputSize,
|
||||||
int outputSize,
|
int outputSize,
|
||||||
|
|||||||
@@ -1,6 +1,8 @@
|
|||||||
#include "cuda_helper.cuh"
|
#include "cuda_helper.cuh"
|
||||||
#include "input.cuh"
|
#include "input.cuh"
|
||||||
|
|
||||||
|
using namespace CUDANet;
|
||||||
|
|
||||||
Layers::Input::Input(int inputSize) : inputSize(inputSize) {
|
Layers::Input::Input(int inputSize) : inputSize(inputSize) {
|
||||||
d_output = nullptr;
|
d_output = nullptr;
|
||||||
CUDA_CHECK(cudaMalloc((void**)&d_output, sizeof(float) * inputSize));
|
CUDA_CHECK(cudaMalloc((void**)&d_output, sizeof(float) * inputSize));
|
||||||
|
|||||||
@@ -25,7 +25,7 @@ TEST(ActivationsTest, SigmoidSanityCheck) {
|
|||||||
cudaStatus = cudaMemcpy(d_input, input, sizeof(float) * 3, cudaMemcpyHostToDevice);
|
cudaStatus = cudaMemcpy(d_input, input, sizeof(float) * 3, cudaMemcpyHostToDevice);
|
||||||
EXPECT_EQ(cudaStatus, cudaSuccess);
|
EXPECT_EQ(cudaStatus, cudaSuccess);
|
||||||
|
|
||||||
Kernels::sigmoid<<<1, 3>>>(d_input, d_output, 3);
|
CUDANet::Kernels::sigmoid<<<1, 3>>>(d_input, d_output, 3);
|
||||||
cudaStatus = cudaDeviceSynchronize();
|
cudaStatus = cudaDeviceSynchronize();
|
||||||
EXPECT_EQ(cudaStatus, cudaSuccess);
|
EXPECT_EQ(cudaStatus, cudaSuccess);
|
||||||
|
|
||||||
|
|||||||
@@ -44,7 +44,7 @@ TEST(MatMulTest, MatVecMulTest) {
|
|||||||
int THREADS_PER_BLOCK = std::max(w, h);
|
int THREADS_PER_BLOCK = std::max(w, h);
|
||||||
int BLOCKS = 1;
|
int BLOCKS = 1;
|
||||||
|
|
||||||
Kernels::mat_vec_mul<<<BLOCKS, THREADS_PER_BLOCK, sizeof(float) * w>>>(d_matrix, d_vector, d_output, w, h);
|
CUDANet::Kernels::mat_vec_mul<<<BLOCKS, THREADS_PER_BLOCK, sizeof(float) * w>>>(d_matrix, d_vector, d_output, w, h);
|
||||||
cudaStatus = cudaDeviceSynchronize();
|
cudaStatus = cudaDeviceSynchronize();
|
||||||
EXPECT_EQ(cudaStatus, cudaSuccess);
|
EXPECT_EQ(cudaStatus, cudaSuccess);
|
||||||
|
|
||||||
|
|||||||
@@ -51,7 +51,7 @@ TEST(PaddingTest, SimplePaddingTest) {
|
|||||||
int THREADS_PER_BLOCK = 64;
|
int THREADS_PER_BLOCK = 64;
|
||||||
int BLOCKS = paddedSize / THREADS_PER_BLOCK + 1;
|
int BLOCKS = paddedSize / THREADS_PER_BLOCK + 1;
|
||||||
|
|
||||||
Kernels::padding<<<BLOCKS, THREADS_PER_BLOCK>>>(
|
CUDANet::Kernels::padding<<<BLOCKS, THREADS_PER_BLOCK>>>(
|
||||||
d_input, d_padded, w, h, n, p
|
d_input, d_padded, w, h, n, p
|
||||||
);
|
);
|
||||||
cudaStatus = cudaDeviceSynchronize();
|
cudaStatus = cudaDeviceSynchronize();
|
||||||
|
|||||||
@@ -7,20 +7,20 @@
|
|||||||
|
|
||||||
class Conv2dTest : public ::testing::Test {
|
class Conv2dTest : public ::testing::Test {
|
||||||
protected:
|
protected:
|
||||||
Layers::Conv2d commonTestSetup(
|
CUDANet::Layers::Conv2d commonTestSetup(
|
||||||
int inputSize,
|
int inputSize,
|
||||||
int inputChannels,
|
int inputChannels,
|
||||||
int kernelSize,
|
int kernelSize,
|
||||||
int stride,
|
int stride,
|
||||||
Layers::Padding padding,
|
CUDANet::Layers::Padding padding,
|
||||||
int numFilters,
|
int numFilters,
|
||||||
Layers::Activation activation,
|
CUDANet::Layers::Activation activation,
|
||||||
std::vector<float>& input,
|
std::vector<float>& input,
|
||||||
float* kernels,
|
float* kernels,
|
||||||
float*& d_input
|
float*& d_input
|
||||||
) {
|
) {
|
||||||
// Create Conv2d layer
|
// Create Conv2d layer
|
||||||
Layers::Conv2d conv2d(
|
CUDANet::Layers::Conv2d conv2d(
|
||||||
inputSize, inputChannels, kernelSize, stride, padding, numFilters,
|
inputSize, inputChannels, kernelSize, stride, padding, numFilters,
|
||||||
activation
|
activation
|
||||||
);
|
);
|
||||||
@@ -57,9 +57,9 @@ TEST_F(Conv2dTest, SimpleTest) {
|
|||||||
int inputChannels = 1;
|
int inputChannels = 1;
|
||||||
int kernelSize = 2;
|
int kernelSize = 2;
|
||||||
int stride = 1;
|
int stride = 1;
|
||||||
Layers::Padding padding = Layers::Padding::VALID;
|
CUDANet::Layers::Padding padding = CUDANet::Layers::Padding::VALID;
|
||||||
int numFilters = 1;
|
int numFilters = 1;
|
||||||
Layers::Activation activation = Layers::Activation::NONE;
|
CUDANet::Layers::Activation activation = CUDANet::Layers::Activation::NONE;
|
||||||
|
|
||||||
std::vector<float> input = {1.0f, 2.0f, 3.0f, 4.0f, 5.0f, 6.0f,
|
std::vector<float> input = {1.0f, 2.0f, 3.0f, 4.0f, 5.0f, 6.0f,
|
||||||
7.0f, 8.0f, 9.0f, 10.0f, 11.0f, 12.0f,
|
7.0f, 8.0f, 9.0f, 10.0f, 11.0f, 12.0f,
|
||||||
@@ -74,7 +74,7 @@ TEST_F(Conv2dTest, SimpleTest) {
|
|||||||
float* d_input;
|
float* d_input;
|
||||||
float* d_output;
|
float* d_output;
|
||||||
|
|
||||||
Layers::Conv2d conv2d = commonTestSetup(
|
CUDANet::Layers::Conv2d conv2d = commonTestSetup(
|
||||||
inputSize, inputChannels, kernelSize, stride, padding, numFilters,
|
inputSize, inputChannels, kernelSize, stride, padding, numFilters,
|
||||||
activation, input, kernels.data(), d_input
|
activation, input, kernels.data(), d_input
|
||||||
);
|
);
|
||||||
@@ -106,9 +106,9 @@ TEST_F(Conv2dTest, PaddedTest) {
|
|||||||
int inputChannels = 3;
|
int inputChannels = 3;
|
||||||
int kernelSize = 3;
|
int kernelSize = 3;
|
||||||
int stride = 1;
|
int stride = 1;
|
||||||
Layers::Padding padding = Layers::Padding::SAME;
|
CUDANet::Layers::Padding padding = CUDANet::Layers::Padding::SAME;
|
||||||
int numFilters = 2;
|
int numFilters = 2;
|
||||||
Layers::Activation activation = Layers::Activation::NONE;
|
CUDANet::Layers::Activation activation = CUDANet::Layers::Activation::NONE;
|
||||||
|
|
||||||
// clang-format off
|
// clang-format off
|
||||||
std::vector<float> input = {
|
std::vector<float> input = {
|
||||||
@@ -163,7 +163,7 @@ TEST_F(Conv2dTest, PaddedTest) {
|
|||||||
float* d_input;
|
float* d_input;
|
||||||
float* d_output;
|
float* d_output;
|
||||||
|
|
||||||
Layers::Conv2d conv2d = commonTestSetup(
|
CUDANet::Layers::Conv2d conv2d = commonTestSetup(
|
||||||
inputSize, inputChannels, kernelSize, stride, padding, numFilters,
|
inputSize, inputChannels, kernelSize, stride, padding, numFilters,
|
||||||
activation, input, kernels.data(), d_input
|
activation, input, kernels.data(), d_input
|
||||||
);
|
);
|
||||||
@@ -177,7 +177,8 @@ TEST_F(Conv2dTest, PaddedTest) {
|
|||||||
);
|
);
|
||||||
cudaMemcpy(
|
cudaMemcpy(
|
||||||
output.data(), d_output,
|
output.data(), d_output,
|
||||||
sizeof(float) * conv2d.getOutputSize() * conv2d.getOutputSize() * numFilters,
|
sizeof(float) * conv2d.getOutputSize() * conv2d.getOutputSize() *
|
||||||
|
numFilters,
|
||||||
cudaMemcpyDeviceToHost
|
cudaMemcpyDeviceToHost
|
||||||
);
|
);
|
||||||
|
|
||||||
@@ -207,8 +208,8 @@ TEST_F(Conv2dTest, StridedPaddedConvolution) {
|
|||||||
int kernelSize = 3;
|
int kernelSize = 3;
|
||||||
int stride = 2;
|
int stride = 2;
|
||||||
int numFilters = 2;
|
int numFilters = 2;
|
||||||
Layers::Padding padding = Layers::Padding::SAME;
|
CUDANet::Layers::Padding padding = CUDANet::Layers::Padding::SAME;
|
||||||
Layers::Activation activation = Layers::Activation::RELU;
|
CUDANet::Layers::Activation activation = CUDANet::Layers::Activation::RELU;
|
||||||
|
|
||||||
// clang-format off
|
// clang-format off
|
||||||
std::vector<float> input = {
|
std::vector<float> input = {
|
||||||
@@ -248,7 +249,7 @@ TEST_F(Conv2dTest, StridedPaddedConvolution) {
|
|||||||
float* d_input;
|
float* d_input;
|
||||||
float* d_output;
|
float* d_output;
|
||||||
|
|
||||||
Layers::Conv2d conv2d = commonTestSetup(
|
CUDANet::Layers::Conv2d conv2d = commonTestSetup(
|
||||||
inputSize, inputChannels, kernelSize, stride, padding, numFilters,
|
inputSize, inputChannels, kernelSize, stride, padding, numFilters,
|
||||||
activation, input, kernels.data(), d_input
|
activation, input, kernels.data(), d_input
|
||||||
);
|
);
|
||||||
@@ -262,7 +263,8 @@ TEST_F(Conv2dTest, StridedPaddedConvolution) {
|
|||||||
);
|
);
|
||||||
cudaMemcpy(
|
cudaMemcpy(
|
||||||
output.data(), d_output,
|
output.data(), d_output,
|
||||||
sizeof(float) * conv2d.getOutputSize() * conv2d.getOutputSize() * numFilters,
|
sizeof(float) * conv2d.getOutputSize() * conv2d.getOutputSize() *
|
||||||
|
numFilters,
|
||||||
cudaMemcpyDeviceToHost
|
cudaMemcpyDeviceToHost
|
||||||
);
|
);
|
||||||
|
|
||||||
|
|||||||
@@ -8,17 +8,17 @@
|
|||||||
|
|
||||||
class DenseLayerTest : public ::testing::Test {
|
class DenseLayerTest : public ::testing::Test {
|
||||||
protected:
|
protected:
|
||||||
Layers::Dense commonTestSetup(
|
CUDANet::Layers::Dense commonTestSetup(
|
||||||
int inputSize,
|
int inputSize,
|
||||||
int outputSize,
|
int outputSize,
|
||||||
std::vector<float>& input,
|
std::vector<float>& input,
|
||||||
float* weights,
|
float* weights,
|
||||||
float* biases,
|
float* biases,
|
||||||
float*& d_input,
|
float*& d_input,
|
||||||
Layers::Activation activation
|
CUDANet::Layers::Activation activation
|
||||||
) {
|
) {
|
||||||
// Create Dense layer
|
// Create Dense layer
|
||||||
Layers::Dense denseLayer(inputSize, outputSize, activation);
|
CUDANet::Layers::Dense denseLayer(inputSize, outputSize, activation);
|
||||||
|
|
||||||
// Set weights and biases
|
// Set weights and biases
|
||||||
denseLayer.setWeights(weights);
|
denseLayer.setWeights(weights);
|
||||||
@@ -52,8 +52,8 @@ TEST_F(DenseLayerTest, Init) {
|
|||||||
int inputSize = i;
|
int inputSize = i;
|
||||||
int outputSize = j;
|
int outputSize = j;
|
||||||
|
|
||||||
Layers::Dense denseLayer(
|
CUDANet::Layers::Dense denseLayer(
|
||||||
inputSize, outputSize, Layers::Activation::SIGMOID
|
inputSize, outputSize, CUDANet::Layers::Activation::SIGMOID
|
||||||
);
|
);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
@@ -73,8 +73,8 @@ TEST_F(DenseLayerTest, setWeights) {
|
|||||||
};
|
};
|
||||||
// clang-format on
|
// clang-format on
|
||||||
|
|
||||||
Layers::Dense denseLayer(
|
CUDANet::Layers::Dense denseLayer(
|
||||||
inputSize, outputSize, Layers::Activation::SIGMOID
|
inputSize, outputSize, CUDANet::Layers::Activation::SIGMOID
|
||||||
);
|
);
|
||||||
|
|
||||||
denseLayer.setWeights(weights.data());
|
denseLayer.setWeights(weights.data());
|
||||||
@@ -99,9 +99,9 @@ TEST_F(DenseLayerTest, ForwardUnitWeightMatrixLinear) {
|
|||||||
float* d_input;
|
float* d_input;
|
||||||
float* d_output;
|
float* d_output;
|
||||||
|
|
||||||
Layers::Dense denseLayer = commonTestSetup(
|
CUDANet::Layers::Dense denseLayer = commonTestSetup(
|
||||||
inputSize, outputSize, input, weights.data(), biases.data(), d_input,
|
inputSize, outputSize, input, weights.data(), biases.data(), d_input,
|
||||||
Layers::Activation::NONE
|
CUDANet::Layers::Activation::NONE
|
||||||
);
|
);
|
||||||
d_output = denseLayer.forward(d_input);
|
d_output = denseLayer.forward(d_input);
|
||||||
|
|
||||||
@@ -140,9 +140,9 @@ TEST_F(DenseLayerTest, ForwardRandomWeightMatrixRelu) {
|
|||||||
float* d_input;
|
float* d_input;
|
||||||
float* d_output;
|
float* d_output;
|
||||||
|
|
||||||
Layers::Dense denseLayer = commonTestSetup(
|
CUDANet::Layers::Dense denseLayer = commonTestSetup(
|
||||||
inputSize, outputSize, input, weights.data(), biases.data(), d_input,
|
inputSize, outputSize, input, weights.data(), biases.data(), d_input,
|
||||||
Layers::Activation::RELU
|
CUDANet::Layers::Activation::RELU
|
||||||
);
|
);
|
||||||
|
|
||||||
d_output = denseLayer.forward(d_input);
|
d_output = denseLayer.forward(d_input);
|
||||||
@@ -185,9 +185,9 @@ TEST_F(DenseLayerTest, ForwardRandomWeightMatrixSigmoid) {
|
|||||||
float* d_input;
|
float* d_input;
|
||||||
float* d_output;
|
float* d_output;
|
||||||
|
|
||||||
Layers::Dense denseLayer = commonTestSetup(
|
CUDANet::Layers::Dense denseLayer = commonTestSetup(
|
||||||
inputSize, outputSize, input, weights.data(), biases.data(), d_input,
|
inputSize, outputSize, input, weights.data(), biases.data(), d_input,
|
||||||
Layers::Activation::SIGMOID
|
CUDANet::Layers::Activation::SIGMOID
|
||||||
);
|
);
|
||||||
|
|
||||||
d_output = denseLayer.forward(d_input);
|
d_output = denseLayer.forward(d_input);
|
||||||
|
|||||||
@@ -1,16 +1,16 @@
|
|||||||
#include <gtest/gtest.h>
|
#include <gtest/gtest.h>
|
||||||
|
|
||||||
#include "input.cuh"
|
|
||||||
#include "cuda_helper.cuh"
|
#include "cuda_helper.cuh"
|
||||||
|
#include "input.cuh"
|
||||||
|
|
||||||
TEST(InputLayerTest, Init) {
|
TEST(InputLayerTest, Init) {
|
||||||
std::vector<float> input = {0.573f, 0.619f, 0.732f, 0.055f, 0.243f, 0.316f};
|
std::vector<float> input = {0.573f, 0.619f, 0.732f, 0.055f, 0.243f, 0.316f};
|
||||||
Layers::Input inputLayer(6);
|
CUDANet::Layers::Input inputLayer(6);
|
||||||
float* d_output = inputLayer.forward(input.data());
|
float* d_output = inputLayer.forward(input.data());
|
||||||
|
|
||||||
std::vector<float> output(6);
|
std::vector<float> output(6);
|
||||||
CUDA_CHECK(cudaMemcpy(output.data(), d_output, sizeof(float) * 6, cudaMemcpyDeviceToHost));
|
CUDA_CHECK(cudaMemcpy(
|
||||||
|
output.data(), d_output, sizeof(float) * 6, cudaMemcpyDeviceToHost
|
||||||
|
));
|
||||||
EXPECT_EQ(input, output);
|
EXPECT_EQ(input, output);
|
||||||
|
|
||||||
}
|
}
|
||||||
Reference in New Issue
Block a user