diff --git a/include/kernels/matmul.cuh b/include/kernels/matmul.cuh index 75100ba..de70f1f 100644 --- a/include/kernels/matmul.cuh +++ b/include/kernels/matmul.cuh @@ -62,6 +62,11 @@ __global__ void vec_scalar_sub( const unsigned int w ); +__global__ void clear( + float* __restrict__ d_vector, + const unsigned int w +); + } // namespace CUDANet::Kernels #endif // CUDANET_MATMUL_H \ No newline at end of file diff --git a/src/kernels/matmul.cu b/src/kernels/matmul.cu index 190b8b4..80e3c00 100644 --- a/src/kernels/matmul.cu +++ b/src/kernels/matmul.cu @@ -83,4 +83,15 @@ __global__ void Kernels::vec_scalar_sub( return; } d_output[tid] = d_vector[tid] - d_scalar[0]; +} + +__global__ void Kernels::clear( + float* __restrict__ d_vector, + const unsigned int w +) { + int tid = blockDim.x * blockIdx.x + threadIdx.x; + if (tid >= w) { + return; + } + d_vector[tid] = 0.0f; } \ No newline at end of file