Allocate activation on heap

This commit is contained in:
2024-04-22 18:59:16 +02:00
parent 26cea9b12c
commit a32c737785
10 changed files with 17 additions and 15 deletions

View File

@@ -77,4 +77,3 @@ void Activation::activate(float* d_input) {
CUDA_CHECK(cudaDeviceSynchronize());
}

View File

@@ -18,7 +18,7 @@ AvgPooling2D::AvgPooling2D(
outputSize = (inputSize - poolingSize) / stride + 1;
activation =
Activation(activationType, outputSize * outputSize * nChannels);
new Activation(activationType, outputSize * outputSize * nChannels);
d_output = nullptr;
CUDA_CHECK(cudaMalloc(
@@ -28,6 +28,7 @@ AvgPooling2D::AvgPooling2D(
AvgPooling2D::~AvgPooling2D() {
cudaFree(d_output);
delete activation;
}
float* AvgPooling2D::forward(const float* d_input) {
@@ -44,7 +45,7 @@ float* AvgPooling2D::forward(const float* d_input) {
);
CUDA_CHECK(cudaGetLastError());
activation.activate(d_output);
activation->activate(d_output);
CUDA_CHECK(cudaDeviceSynchronize());
return d_output;

View File

@@ -29,7 +29,7 @@ Conv2d::Conv2d(
outputSize = (inputSize - kernelSize + 2 * paddingSize) / stride + 1;
activation = Activation(
activation = new Activation(
activationType, outputSize * outputSize * numFilters
);
@@ -62,6 +62,7 @@ Conv2d::~Conv2d() {
cudaFree(d_output);
cudaFree(d_weights);
cudaFree(d_biases);
delete activation;
}
void Conv2d::initializeWeights() {
@@ -123,7 +124,7 @@ float* Conv2d::forward(const float* d_input) {
CUDA_CHECK(cudaGetLastError());
// Apply activation
activation.activate(d_output);
activation->activate(d_output);
CUDA_CHECK(cudaDeviceSynchronize());

View File

@@ -45,14 +45,14 @@ Dense::Dense(
(std::max(inputSize, outputSize) + BLOCK_SIZE - 1) / BLOCK_SIZE;
biasGridSize = (outputSize + BLOCK_SIZE - 1) / BLOCK_SIZE;
activation = Activation(activationType, outputSize);
activation = new Activation(activationType, outputSize);
}
Dense::~Dense() {
// Free GPU memory
cudaFree(d_output);
cudaFree(d_weights);
cudaFree(d_biases);
delete activation;
}
void Dense::initializeWeights() {
@@ -75,7 +75,7 @@ float* Dense::forward(const float* d_input) {
);
CUDA_CHECK(cudaGetLastError());
activation.activate(d_output);
activation->activate(d_output);
CUDA_CHECK(cudaDeviceSynchronize());
return d_output;

View File

@@ -17,7 +17,7 @@ MaxPooling2D::MaxPooling2D(
outputSize = (inputSize - 1) / stride + 1;
activation = Activation(
activation = new Activation(
activationType, outputSize * outputSize * nChannels
);
@@ -30,6 +30,7 @@ MaxPooling2D::MaxPooling2D(
MaxPooling2D::~MaxPooling2D() {
cudaFree(d_output);
delete activation;
}
@@ -47,7 +48,7 @@ float* MaxPooling2D::forward(const float* d_input) {
);
CUDA_CHECK(cudaGetLastError());
activation.activate(d_output);
activation->activate(d_output);
CUDA_CHECK(cudaDeviceSynchronize());
return d_output;

View File

@@ -31,6 +31,7 @@ void Utils::max(float* d_vec, float* d_max, const unsigned int length) {
CUDA_CHECK(cudaGetLastError());
int remaining = grid_size;
while (remaining > 1) {
int blocks_needed = (remaining + BLOCK_SIZE - 1) / BLOCK_SIZE;
CUDANet::Kernels::max_reduce<<<blocks_needed, BLOCK_SIZE>>>(d_max, d_max, remaining);