mirror of
https://github.com/lordmathis/CUDANet.git
synced 2025-11-06 01:34:22 +00:00
Use shared memory for mat vec mul kernel
This commit is contained in:
@@ -51,7 +51,7 @@ void Layers::Dense::initializeBiases() {
|
||||
}
|
||||
|
||||
float* Layers::Dense::forward(const float* d_input) {
|
||||
Kernels::mat_vec_mul<<<1, outputSize>>>(
|
||||
Kernels::mat_vec_mul<<<1, std::max(inputSize, outputSize), sizeof(float) * inputSize>>>(
|
||||
d_weights, d_input, d_output, inputSize, outputSize
|
||||
);
|
||||
|
||||
|
||||
Reference in New Issue
Block a user