Add support for non square matrices

This commit is contained in:
2024-05-20 15:20:43 +02:00
parent 6f8b5f4081
commit 74098b24e3
21 changed files with 314 additions and 299 deletions

View File

@@ -2,6 +2,7 @@
#define CUDANET_CONVOLUTION_H
#include <cuda_runtime.h>
#include "layer.cuh"
namespace CUDANet::Kernels {
@@ -24,13 +25,13 @@ __global__ void convolution(
const float* __restrict__ d_kernel,
const float* __restrict__ d_bias,
float* __restrict__ d_output,
const int inputSize,
const dim2d inputSize,
const int nChannels,
const int paddingSize,
const int kernelSize,
const int stride,
const dim2d paddingSize,
const dim2d kernelSize,
const dim2d stride,
const int nFilters,
const int outputSize
const dim2d outputSize
);
} // namespace CUDANet::Kernels

View File

@@ -2,27 +2,28 @@
#define CUDANET_POOLING_H
#include <cuda_runtime.h>
#include "layer.cuh"
namespace CUDANet::Kernels {
__global__ void max_pooling(
const float* __restrict__ d_input,
float* __restrict__ d_output,
const int inputSize,
const int outputSize,
const dim2d inputSize,
const dim2d outputSize,
const int nChannels,
const int poolingSize,
const int stride
const dim2d poolingSize,
const dim2d stride
);
__global__ void avg_pooling(
const float* __restrict__ d_input,
float* __restrict__ d_output,
const int inputSize,
const int outputSize,
const dim2d inputSize,
const dim2d outputSize,
const int nChannels,
const int poolingSize,
const int stride
const dim2d poolingSize,
const dim2d stride
);
} // namespace CUDANet::Kernels

View File

@@ -9,10 +9,10 @@ namespace CUDANet::Layers {
class AvgPooling2D : public SequentialLayer {
public:
AvgPooling2D(
int inputSize,
dim2d inputSize,
int nChannels,
int poolingSize,
int stride,
dim2d poolingSize,
dim2d stride,
ActivationType activationType
);
~AvgPooling2D();
@@ -28,18 +28,18 @@ class AvgPooling2D : public SequentialLayer {
/**
* @brief Get input size
*
*
* @return int input size
*/
int getInputSize();
private:
int inputSize;
int nChannels;
int poolingSize;
int stride;
dim2d inputSize;
int nChannels;
dim2d poolingSize;
dim2d stride;
int outputSize;
dim2d outputSize;
float* d_output;

View File

@@ -10,7 +10,7 @@ namespace CUDANet::Layers {
class BatchNorm2D : public WeightedLayer {
public:
BatchNorm2D(int inputSize, int inputChannels, float epsilon, ActivationType activationType);
BatchNorm2D(dim2d inputSize, int inputChannels, float epsilon, ActivationType activationType);
~BatchNorm2D();
@@ -66,7 +66,7 @@ class BatchNorm2D : public WeightedLayer {
private:
int inputSize;
dim2d inputSize;
int inputChannels;
int gridSize;

View File

@@ -28,12 +28,12 @@ class Conv2d : public WeightedLayer {
* 'SOFTMAX' or 'NONE')
*/
Conv2d(
int inputSize,
dim2d inputSize,
int inputChannels,
int kernelSize,
int stride,
dim2d kernelSize,
dim2d stride,
int numFilters,
int paddingSize,
dim2d paddingSize,
ActivationType activationType
);
@@ -98,23 +98,23 @@ class Conv2d : public WeightedLayer {
*
* @return int
*/
int getPaddingSize() {
dim2d getPaddingSize() {
return paddingSize;
}
private:
// Inputs
int inputSize;
int inputChannels;
dim2d inputSize;
int inputChannels;
// Outputs
int outputSize;
dim2d outputSize;
// Kernel
int kernelSize;
int stride;
int paddingSize;
int numFilters;
dim2d kernelSize;
dim2d stride;
dim2d paddingSize;
int numFilters;
// Kernels
std::vector<float> weights;

View File

@@ -81,8 +81,8 @@ class Dense : public WeightedLayer {
int getInputSize();
private:
unsigned int inputSize;
unsigned int outputSize;
int inputSize;
int outputSize;
float* d_output;
@@ -95,8 +95,8 @@ class Dense : public WeightedLayer {
Layers::Activation* activation;
// Precompute kernel launch parameters
unsigned int forwardGridSize;
unsigned int biasGridSize;
int forwardGridSize;
int biasGridSize;
/**
* @brief Initialize the weights to zeros

View File

@@ -7,6 +7,8 @@
#define CUDANET_SAME_PADDING(inputSize, kernelSize, stride) \
((stride - 1) * inputSize - stride + kernelSize) / 2;
typedef std::pair<int, int> dim2d;
namespace CUDANet::Layers {
/**

View File

@@ -9,10 +9,10 @@ namespace CUDANet::Layers {
class MaxPooling2D : public SequentialLayer {
public:
MaxPooling2D(
int inputSize,
dim2d inputSize,
int nChannels,
int poolingSize,
int stride,
dim2d poolingSize,
dim2d stride,
ActivationType activationType
);
~MaxPooling2D();
@@ -28,18 +28,18 @@ class MaxPooling2D : public SequentialLayer {
/**
* @brief Get input size
*
*
* @return int input size
*/
int getInputSize();
private:
int inputSize;
int nChannels;
int poolingSize;
int stride;
dim2d inputSize;
int nChannels;
dim2d poolingSize;
dim2d stride;
int outputSize;
dim2d outputSize;
float* d_output;

View File

@@ -26,7 +26,7 @@ struct TensorInfo {
class Model {
public:
Model(const int inputSize, const int inputChannels, const int outputSize);
Model(const dim2d inputSize, const int inputChannels, const int outputSize);
Model(const Model& other);
~Model();
@@ -43,7 +43,7 @@ class Model {
Layers::Input* inputLayer;
Layers::Output* outputLayer;
int inputSize;
dim2d inputSize;
int inputChannels;
int outputSize;