Allocate activation on heap

This commit is contained in:
2024-04-22 18:59:16 +02:00
parent 26cea9b12c
commit a32c737785
10 changed files with 17 additions and 15 deletions

View File

@@ -38,7 +38,7 @@ class AvgPooling2D : public SequentialLayer {
float* d_output; float* d_output;
Activation activation; Activation* activation;
}; };
} // namespace CUDANet::Layers } // namespace CUDANet::Layers

View File

@@ -120,8 +120,7 @@ class Conv2d : public WeightedLayer {
float* d_weights; float* d_weights;
float* d_biases; float* d_biases;
// Kernels Activation* activation;
Activation activation;
/** /**
* @brief Initialize weights of the convolutional layer with zeros * @brief Initialize weights of the convolutional layer with zeros

View File

@@ -77,7 +77,7 @@ class Dense : public WeightedLayer {
std::vector<float> weights; std::vector<float> weights;
std::vector<float> biases; std::vector<float> biases;
Layers::Activation activation; Layers::Activation* activation;
// Precompute kernel launch parameters // Precompute kernel launch parameters
unsigned int forwardGridSize; unsigned int forwardGridSize;

View File

@@ -38,7 +38,7 @@ class MaxPooling2D : public SequentialLayer {
float* d_output; float* d_output;
Activation activation; Activation* activation;
}; };
} // namespace CUDANet::Layers } // namespace CUDANet::Layers

View File

@@ -77,4 +77,3 @@ void Activation::activate(float* d_input) {
CUDA_CHECK(cudaDeviceSynchronize()); CUDA_CHECK(cudaDeviceSynchronize());
} }

View File

@@ -18,7 +18,7 @@ AvgPooling2D::AvgPooling2D(
outputSize = (inputSize - poolingSize) / stride + 1; outputSize = (inputSize - poolingSize) / stride + 1;
activation = activation =
Activation(activationType, outputSize * outputSize * nChannels); new Activation(activationType, outputSize * outputSize * nChannels);
d_output = nullptr; d_output = nullptr;
CUDA_CHECK(cudaMalloc( CUDA_CHECK(cudaMalloc(
@@ -28,6 +28,7 @@ AvgPooling2D::AvgPooling2D(
AvgPooling2D::~AvgPooling2D() { AvgPooling2D::~AvgPooling2D() {
cudaFree(d_output); cudaFree(d_output);
delete activation;
} }
float* AvgPooling2D::forward(const float* d_input) { float* AvgPooling2D::forward(const float* d_input) {
@@ -44,7 +45,7 @@ float* AvgPooling2D::forward(const float* d_input) {
); );
CUDA_CHECK(cudaGetLastError()); CUDA_CHECK(cudaGetLastError());
activation.activate(d_output); activation->activate(d_output);
CUDA_CHECK(cudaDeviceSynchronize()); CUDA_CHECK(cudaDeviceSynchronize());
return d_output; return d_output;

View File

@@ -29,7 +29,7 @@ Conv2d::Conv2d(
outputSize = (inputSize - kernelSize + 2 * paddingSize) / stride + 1; outputSize = (inputSize - kernelSize + 2 * paddingSize) / stride + 1;
activation = Activation( activation = new Activation(
activationType, outputSize * outputSize * numFilters activationType, outputSize * outputSize * numFilters
); );
@@ -62,6 +62,7 @@ Conv2d::~Conv2d() {
cudaFree(d_output); cudaFree(d_output);
cudaFree(d_weights); cudaFree(d_weights);
cudaFree(d_biases); cudaFree(d_biases);
delete activation;
} }
void Conv2d::initializeWeights() { void Conv2d::initializeWeights() {
@@ -123,7 +124,7 @@ float* Conv2d::forward(const float* d_input) {
CUDA_CHECK(cudaGetLastError()); CUDA_CHECK(cudaGetLastError());
// Apply activation // Apply activation
activation.activate(d_output); activation->activate(d_output);
CUDA_CHECK(cudaDeviceSynchronize()); CUDA_CHECK(cudaDeviceSynchronize());

View File

@@ -45,14 +45,14 @@ Dense::Dense(
(std::max(inputSize, outputSize) + BLOCK_SIZE - 1) / BLOCK_SIZE; (std::max(inputSize, outputSize) + BLOCK_SIZE - 1) / BLOCK_SIZE;
biasGridSize = (outputSize + BLOCK_SIZE - 1) / BLOCK_SIZE; biasGridSize = (outputSize + BLOCK_SIZE - 1) / BLOCK_SIZE;
activation = Activation(activationType, outputSize); activation = new Activation(activationType, outputSize);
} }
Dense::~Dense() { Dense::~Dense() {
// Free GPU memory
cudaFree(d_output); cudaFree(d_output);
cudaFree(d_weights); cudaFree(d_weights);
cudaFree(d_biases); cudaFree(d_biases);
delete activation;
} }
void Dense::initializeWeights() { void Dense::initializeWeights() {
@@ -75,7 +75,7 @@ float* Dense::forward(const float* d_input) {
); );
CUDA_CHECK(cudaGetLastError()); CUDA_CHECK(cudaGetLastError());
activation.activate(d_output); activation->activate(d_output);
CUDA_CHECK(cudaDeviceSynchronize()); CUDA_CHECK(cudaDeviceSynchronize());
return d_output; return d_output;

View File

@@ -17,7 +17,7 @@ MaxPooling2D::MaxPooling2D(
outputSize = (inputSize - 1) / stride + 1; outputSize = (inputSize - 1) / stride + 1;
activation = Activation( activation = new Activation(
activationType, outputSize * outputSize * nChannels activationType, outputSize * outputSize * nChannels
); );
@@ -30,6 +30,7 @@ MaxPooling2D::MaxPooling2D(
MaxPooling2D::~MaxPooling2D() { MaxPooling2D::~MaxPooling2D() {
cudaFree(d_output); cudaFree(d_output);
delete activation;
} }
@@ -47,7 +48,7 @@ float* MaxPooling2D::forward(const float* d_input) {
); );
CUDA_CHECK(cudaGetLastError()); CUDA_CHECK(cudaGetLastError());
activation.activate(d_output); activation->activate(d_output);
CUDA_CHECK(cudaDeviceSynchronize()); CUDA_CHECK(cudaDeviceSynchronize());
return d_output; return d_output;

View File

@@ -31,6 +31,7 @@ void Utils::max(float* d_vec, float* d_max, const unsigned int length) {
CUDA_CHECK(cudaGetLastError()); CUDA_CHECK(cudaGetLastError());
int remaining = grid_size; int remaining = grid_size;
while (remaining > 1) { while (remaining > 1) {
int blocks_needed = (remaining + BLOCK_SIZE - 1) / BLOCK_SIZE; int blocks_needed = (remaining + BLOCK_SIZE - 1) / BLOCK_SIZE;
CUDANet::Kernels::max_reduce<<<blocks_needed, BLOCK_SIZE>>>(d_max, d_max, remaining); CUDANet::Kernels::max_reduce<<<blocks_needed, BLOCK_SIZE>>>(d_max, d_max, remaining);