Use shared memory for mat vec mul kernel

This commit is contained in:
2024-03-13 22:13:11 +01:00
parent 09480e42e5
commit 77004c16be
4 changed files with 77 additions and 7 deletions

View File

@@ -51,7 +51,7 @@ void Layers::Dense::initializeBiases() {
}
float* Layers::Dense::forward(const float* d_input) {
Kernels::mat_vec_mul<<<1, outputSize>>>(
Kernels::mat_vec_mul<<<1, std::max(inputSize, outputSize), sizeof(float) * inputSize>>>(
d_weights, d_input, d_output, inputSize, outputSize
);