WIP Migrate Dense layer

This commit is contained in:
2025-11-18 21:12:47 +01:00
parent 64eac7050b
commit 7f203b8947
14 changed files with 116 additions and 221 deletions

View File

@@ -45,4 +45,24 @@ void CUDA::softmax(Tensor &tensor, Tensor &temp_max, Tensor &temp_sum) {
);
CUDA_CHECK(cudaGetLastError());
CUDA_CHECK(cudaDeviceSynchronize());
}
CUDANet::Tensor& CUDA::dense(CUDANet::Tensor &weights, CUDANet::Tensor &biases, CUDANet::Tensor &input, CUDANet::Tensor &output, size_t input_size, size_t output_size) {
auto forwardGridSize =
(std::max(input_size, output_size) + BLOCK_SIZE - 1) / BLOCK_SIZE;
auto biasGridSize = (output_size + BLOCK_SIZE - 1) / BLOCK_SIZE;
Kernels::mat_vec_mul<<<forwardGridSize, BLOCK_SIZE>>>(
weights.data<float>(), input.data<float>(), output.data<float>(), input_size, output_size
);
CUDA_CHECK(cudaGetLastError());
Kernels::vec_vec_add<<<biasGridSize, BLOCK_SIZE>>>(
biases.data<float>(), output.data<float>(), output.data<float>(), output_size
);
CUDA_CHECK(cudaGetLastError());
CUDA_CHECK(cudaDeviceSynchronize());
return output;
}