#pragma once #include namespace CUDANet::Kernels { template __global__ void sigmoid( const T* __restrict__ src, T* __restrict__ dst, const unsigned int len ); template __global__ void relu( const T* __restrict__ src, T* __restrict__ dst, const unsigned int len ); } // namespace CUDANet::Kernels