mirror of
https://github.com/lordmathis/CUDANet.git
synced 2025-11-06 01:34:22 +00:00
Use tiling shmem for mat vec mul kernel
This commit is contained in:
@@ -66,6 +66,10 @@ class Dense : public ILayer {
|
||||
|
||||
Layers::Activation activation;
|
||||
|
||||
// Precompute kernel launch parameters
|
||||
int forwardGridSize;
|
||||
int biasGridSize;
|
||||
|
||||
/**
|
||||
* @brief Initialize the weights to zeros
|
||||
*
|
||||
|
||||
@@ -4,6 +4,10 @@
|
||||
#include <cuda_runtime.h>
|
||||
#include <cstdio>
|
||||
|
||||
#ifndef BLOCK_SIZE
|
||||
#define BLOCK_SIZE 128
|
||||
#endif // BLOCK_SIZE
|
||||
|
||||
/**
|
||||
* @brief CUDA error checking macro
|
||||
*
|
||||
|
||||
Reference in New Issue
Block a user