Allocate activation on heap

This commit is contained in:
2024-04-22 18:59:16 +02:00
parent 26cea9b12c
commit a32c737785
10 changed files with 17 additions and 15 deletions

View File

@@ -31,6 +31,7 @@ void Utils::max(float* d_vec, float* d_max, const unsigned int length) {
CUDA_CHECK(cudaGetLastError());
int remaining = grid_size;
while (remaining > 1) {
int blocks_needed = (remaining + BLOCK_SIZE - 1) / BLOCK_SIZE;
CUDANet::Kernels::max_reduce<<<blocks_needed, BLOCK_SIZE>>>(d_max, d_max, remaining);