mirror of
https://github.com/lordmathis/CUDANet.git
synced 2025-11-05 17:34:21 +00:00
Allocate activation on heap
This commit is contained in:
@@ -38,7 +38,7 @@ class AvgPooling2D : public SequentialLayer {
|
|||||||
|
|
||||||
float* d_output;
|
float* d_output;
|
||||||
|
|
||||||
Activation activation;
|
Activation* activation;
|
||||||
};
|
};
|
||||||
|
|
||||||
} // namespace CUDANet::Layers
|
} // namespace CUDANet::Layers
|
||||||
|
|||||||
@@ -120,8 +120,7 @@ class Conv2d : public WeightedLayer {
|
|||||||
float* d_weights;
|
float* d_weights;
|
||||||
float* d_biases;
|
float* d_biases;
|
||||||
|
|
||||||
// Kernels
|
Activation* activation;
|
||||||
Activation activation;
|
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* @brief Initialize weights of the convolutional layer with zeros
|
* @brief Initialize weights of the convolutional layer with zeros
|
||||||
|
|||||||
@@ -77,7 +77,7 @@ class Dense : public WeightedLayer {
|
|||||||
std::vector<float> weights;
|
std::vector<float> weights;
|
||||||
std::vector<float> biases;
|
std::vector<float> biases;
|
||||||
|
|
||||||
Layers::Activation activation;
|
Layers::Activation* activation;
|
||||||
|
|
||||||
// Precompute kernel launch parameters
|
// Precompute kernel launch parameters
|
||||||
unsigned int forwardGridSize;
|
unsigned int forwardGridSize;
|
||||||
|
|||||||
@@ -38,7 +38,7 @@ class MaxPooling2D : public SequentialLayer {
|
|||||||
|
|
||||||
float* d_output;
|
float* d_output;
|
||||||
|
|
||||||
Activation activation;
|
Activation* activation;
|
||||||
};
|
};
|
||||||
|
|
||||||
} // namespace CUDANet::Layers
|
} // namespace CUDANet::Layers
|
||||||
|
|||||||
@@ -77,4 +77,3 @@ void Activation::activate(float* d_input) {
|
|||||||
|
|
||||||
CUDA_CHECK(cudaDeviceSynchronize());
|
CUDA_CHECK(cudaDeviceSynchronize());
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|||||||
@@ -18,7 +18,7 @@ AvgPooling2D::AvgPooling2D(
|
|||||||
outputSize = (inputSize - poolingSize) / stride + 1;
|
outputSize = (inputSize - poolingSize) / stride + 1;
|
||||||
|
|
||||||
activation =
|
activation =
|
||||||
Activation(activationType, outputSize * outputSize * nChannels);
|
new Activation(activationType, outputSize * outputSize * nChannels);
|
||||||
|
|
||||||
d_output = nullptr;
|
d_output = nullptr;
|
||||||
CUDA_CHECK(cudaMalloc(
|
CUDA_CHECK(cudaMalloc(
|
||||||
@@ -28,6 +28,7 @@ AvgPooling2D::AvgPooling2D(
|
|||||||
|
|
||||||
AvgPooling2D::~AvgPooling2D() {
|
AvgPooling2D::~AvgPooling2D() {
|
||||||
cudaFree(d_output);
|
cudaFree(d_output);
|
||||||
|
delete activation;
|
||||||
}
|
}
|
||||||
|
|
||||||
float* AvgPooling2D::forward(const float* d_input) {
|
float* AvgPooling2D::forward(const float* d_input) {
|
||||||
@@ -44,7 +45,7 @@ float* AvgPooling2D::forward(const float* d_input) {
|
|||||||
);
|
);
|
||||||
CUDA_CHECK(cudaGetLastError());
|
CUDA_CHECK(cudaGetLastError());
|
||||||
|
|
||||||
activation.activate(d_output);
|
activation->activate(d_output);
|
||||||
CUDA_CHECK(cudaDeviceSynchronize());
|
CUDA_CHECK(cudaDeviceSynchronize());
|
||||||
|
|
||||||
return d_output;
|
return d_output;
|
||||||
|
|||||||
@@ -29,7 +29,7 @@ Conv2d::Conv2d(
|
|||||||
|
|
||||||
outputSize = (inputSize - kernelSize + 2 * paddingSize) / stride + 1;
|
outputSize = (inputSize - kernelSize + 2 * paddingSize) / stride + 1;
|
||||||
|
|
||||||
activation = Activation(
|
activation = new Activation(
|
||||||
activationType, outputSize * outputSize * numFilters
|
activationType, outputSize * outputSize * numFilters
|
||||||
);
|
);
|
||||||
|
|
||||||
@@ -62,6 +62,7 @@ Conv2d::~Conv2d() {
|
|||||||
cudaFree(d_output);
|
cudaFree(d_output);
|
||||||
cudaFree(d_weights);
|
cudaFree(d_weights);
|
||||||
cudaFree(d_biases);
|
cudaFree(d_biases);
|
||||||
|
delete activation;
|
||||||
}
|
}
|
||||||
|
|
||||||
void Conv2d::initializeWeights() {
|
void Conv2d::initializeWeights() {
|
||||||
@@ -123,7 +124,7 @@ float* Conv2d::forward(const float* d_input) {
|
|||||||
CUDA_CHECK(cudaGetLastError());
|
CUDA_CHECK(cudaGetLastError());
|
||||||
|
|
||||||
// Apply activation
|
// Apply activation
|
||||||
activation.activate(d_output);
|
activation->activate(d_output);
|
||||||
|
|
||||||
CUDA_CHECK(cudaDeviceSynchronize());
|
CUDA_CHECK(cudaDeviceSynchronize());
|
||||||
|
|
||||||
|
|||||||
@@ -45,14 +45,14 @@ Dense::Dense(
|
|||||||
(std::max(inputSize, outputSize) + BLOCK_SIZE - 1) / BLOCK_SIZE;
|
(std::max(inputSize, outputSize) + BLOCK_SIZE - 1) / BLOCK_SIZE;
|
||||||
biasGridSize = (outputSize + BLOCK_SIZE - 1) / BLOCK_SIZE;
|
biasGridSize = (outputSize + BLOCK_SIZE - 1) / BLOCK_SIZE;
|
||||||
|
|
||||||
activation = Activation(activationType, outputSize);
|
activation = new Activation(activationType, outputSize);
|
||||||
}
|
}
|
||||||
|
|
||||||
Dense::~Dense() {
|
Dense::~Dense() {
|
||||||
// Free GPU memory
|
|
||||||
cudaFree(d_output);
|
cudaFree(d_output);
|
||||||
cudaFree(d_weights);
|
cudaFree(d_weights);
|
||||||
cudaFree(d_biases);
|
cudaFree(d_biases);
|
||||||
|
delete activation;
|
||||||
}
|
}
|
||||||
|
|
||||||
void Dense::initializeWeights() {
|
void Dense::initializeWeights() {
|
||||||
@@ -75,7 +75,7 @@ float* Dense::forward(const float* d_input) {
|
|||||||
);
|
);
|
||||||
CUDA_CHECK(cudaGetLastError());
|
CUDA_CHECK(cudaGetLastError());
|
||||||
|
|
||||||
activation.activate(d_output);
|
activation->activate(d_output);
|
||||||
CUDA_CHECK(cudaDeviceSynchronize());
|
CUDA_CHECK(cudaDeviceSynchronize());
|
||||||
|
|
||||||
return d_output;
|
return d_output;
|
||||||
|
|||||||
@@ -17,7 +17,7 @@ MaxPooling2D::MaxPooling2D(
|
|||||||
|
|
||||||
outputSize = (inputSize - 1) / stride + 1;
|
outputSize = (inputSize - 1) / stride + 1;
|
||||||
|
|
||||||
activation = Activation(
|
activation = new Activation(
|
||||||
activationType, outputSize * outputSize * nChannels
|
activationType, outputSize * outputSize * nChannels
|
||||||
);
|
);
|
||||||
|
|
||||||
@@ -30,6 +30,7 @@ MaxPooling2D::MaxPooling2D(
|
|||||||
|
|
||||||
MaxPooling2D::~MaxPooling2D() {
|
MaxPooling2D::~MaxPooling2D() {
|
||||||
cudaFree(d_output);
|
cudaFree(d_output);
|
||||||
|
delete activation;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
@@ -47,7 +48,7 @@ float* MaxPooling2D::forward(const float* d_input) {
|
|||||||
);
|
);
|
||||||
CUDA_CHECK(cudaGetLastError());
|
CUDA_CHECK(cudaGetLastError());
|
||||||
|
|
||||||
activation.activate(d_output);
|
activation->activate(d_output);
|
||||||
CUDA_CHECK(cudaDeviceSynchronize());
|
CUDA_CHECK(cudaDeviceSynchronize());
|
||||||
|
|
||||||
return d_output;
|
return d_output;
|
||||||
|
|||||||
@@ -31,6 +31,7 @@ void Utils::max(float* d_vec, float* d_max, const unsigned int length) {
|
|||||||
CUDA_CHECK(cudaGetLastError());
|
CUDA_CHECK(cudaGetLastError());
|
||||||
|
|
||||||
int remaining = grid_size;
|
int remaining = grid_size;
|
||||||
|
|
||||||
while (remaining > 1) {
|
while (remaining > 1) {
|
||||||
int blocks_needed = (remaining + BLOCK_SIZE - 1) / BLOCK_SIZE;
|
int blocks_needed = (remaining + BLOCK_SIZE - 1) / BLOCK_SIZE;
|
||||||
CUDANet::Kernels::max_reduce<<<blocks_needed, BLOCK_SIZE>>>(d_max, d_max, remaining);
|
CUDANet::Kernels::max_reduce<<<blocks_needed, BLOCK_SIZE>>>(d_max, d_max, remaining);
|
||||||
|
|||||||
Reference in New Issue
Block a user