Use tiling shmem for mat vec mul kernel

This commit is contained in:
2024-03-15 23:33:09 +01:00
parent 88f7fff217
commit dc86cddeb7
4 changed files with 54 additions and 24 deletions

View File

@@ -4,6 +4,10 @@
#include <cuda_runtime.h>
#include <cstdio>
#ifndef BLOCK_SIZE
#define BLOCK_SIZE 128
#endif // BLOCK_SIZE
/**
* @brief CUDA error checking macro
*