mirror of
https://github.com/lordmathis/CUDANet.git
synced 2025-11-05 17:34:21 +00:00
Allocate activation on heap
This commit is contained in:
@@ -77,4 +77,3 @@ void Activation::activate(float* d_input) {
|
||||
|
||||
CUDA_CHECK(cudaDeviceSynchronize());
|
||||
}
|
||||
|
||||
|
||||
@@ -18,7 +18,7 @@ AvgPooling2D::AvgPooling2D(
|
||||
outputSize = (inputSize - poolingSize) / stride + 1;
|
||||
|
||||
activation =
|
||||
Activation(activationType, outputSize * outputSize * nChannels);
|
||||
new Activation(activationType, outputSize * outputSize * nChannels);
|
||||
|
||||
d_output = nullptr;
|
||||
CUDA_CHECK(cudaMalloc(
|
||||
@@ -28,6 +28,7 @@ AvgPooling2D::AvgPooling2D(
|
||||
|
||||
AvgPooling2D::~AvgPooling2D() {
|
||||
cudaFree(d_output);
|
||||
delete activation;
|
||||
}
|
||||
|
||||
float* AvgPooling2D::forward(const float* d_input) {
|
||||
@@ -44,7 +45,7 @@ float* AvgPooling2D::forward(const float* d_input) {
|
||||
);
|
||||
CUDA_CHECK(cudaGetLastError());
|
||||
|
||||
activation.activate(d_output);
|
||||
activation->activate(d_output);
|
||||
CUDA_CHECK(cudaDeviceSynchronize());
|
||||
|
||||
return d_output;
|
||||
|
||||
@@ -29,7 +29,7 @@ Conv2d::Conv2d(
|
||||
|
||||
outputSize = (inputSize - kernelSize + 2 * paddingSize) / stride + 1;
|
||||
|
||||
activation = Activation(
|
||||
activation = new Activation(
|
||||
activationType, outputSize * outputSize * numFilters
|
||||
);
|
||||
|
||||
@@ -62,6 +62,7 @@ Conv2d::~Conv2d() {
|
||||
cudaFree(d_output);
|
||||
cudaFree(d_weights);
|
||||
cudaFree(d_biases);
|
||||
delete activation;
|
||||
}
|
||||
|
||||
void Conv2d::initializeWeights() {
|
||||
@@ -123,7 +124,7 @@ float* Conv2d::forward(const float* d_input) {
|
||||
CUDA_CHECK(cudaGetLastError());
|
||||
|
||||
// Apply activation
|
||||
activation.activate(d_output);
|
||||
activation->activate(d_output);
|
||||
|
||||
CUDA_CHECK(cudaDeviceSynchronize());
|
||||
|
||||
|
||||
@@ -45,14 +45,14 @@ Dense::Dense(
|
||||
(std::max(inputSize, outputSize) + BLOCK_SIZE - 1) / BLOCK_SIZE;
|
||||
biasGridSize = (outputSize + BLOCK_SIZE - 1) / BLOCK_SIZE;
|
||||
|
||||
activation = Activation(activationType, outputSize);
|
||||
activation = new Activation(activationType, outputSize);
|
||||
}
|
||||
|
||||
Dense::~Dense() {
|
||||
// Free GPU memory
|
||||
cudaFree(d_output);
|
||||
cudaFree(d_weights);
|
||||
cudaFree(d_biases);
|
||||
delete activation;
|
||||
}
|
||||
|
||||
void Dense::initializeWeights() {
|
||||
@@ -75,7 +75,7 @@ float* Dense::forward(const float* d_input) {
|
||||
);
|
||||
CUDA_CHECK(cudaGetLastError());
|
||||
|
||||
activation.activate(d_output);
|
||||
activation->activate(d_output);
|
||||
CUDA_CHECK(cudaDeviceSynchronize());
|
||||
|
||||
return d_output;
|
||||
|
||||
@@ -17,7 +17,7 @@ MaxPooling2D::MaxPooling2D(
|
||||
|
||||
outputSize = (inputSize - 1) / stride + 1;
|
||||
|
||||
activation = Activation(
|
||||
activation = new Activation(
|
||||
activationType, outputSize * outputSize * nChannels
|
||||
);
|
||||
|
||||
@@ -30,6 +30,7 @@ MaxPooling2D::MaxPooling2D(
|
||||
|
||||
MaxPooling2D::~MaxPooling2D() {
|
||||
cudaFree(d_output);
|
||||
delete activation;
|
||||
}
|
||||
|
||||
|
||||
@@ -47,7 +48,7 @@ float* MaxPooling2D::forward(const float* d_input) {
|
||||
);
|
||||
CUDA_CHECK(cudaGetLastError());
|
||||
|
||||
activation.activate(d_output);
|
||||
activation->activate(d_output);
|
||||
CUDA_CHECK(cudaDeviceSynchronize());
|
||||
|
||||
return d_output;
|
||||
|
||||
@@ -31,6 +31,7 @@ void Utils::max(float* d_vec, float* d_max, const unsigned int length) {
|
||||
CUDA_CHECK(cudaGetLastError());
|
||||
|
||||
int remaining = grid_size;
|
||||
|
||||
while (remaining > 1) {
|
||||
int blocks_needed = (remaining + BLOCK_SIZE - 1) / BLOCK_SIZE;
|
||||
CUDANet::Kernels::max_reduce<<<blocks_needed, BLOCK_SIZE>>>(d_max, d_max, remaining);
|
||||
|
||||
Reference in New Issue
Block a user