Use shared memory for mat vec mul kernel

2025-11-06 01:34:22 +00:00 · 2024-03-13 22:13:11 +01:00
parent 09480e42e5
commit 77004c16be
4 changed files with 77 additions and 7 deletions
--- a/src/layers/dense.cu
+++ b/src/layers/dense.cu
@@ -51,7 +51,7 @@ void Layers::Dense::initializeBiases() {
 }

 float* Layers::Dense::forward(const float* d_input) {
-    Kernels::mat_vec_mul<<<1, outputSize>>>(
+    Kernels::mat_vec_mul<<<1, std::max(inputSize, outputSize), sizeof(float) * inputSize>>>(
        d_weights, d_input, d_output, inputSize, outputSize
    );