mirror of
https://github.com/lordmathis/CUDANet.git
synced 2025-12-23 14:54:28 +00:00
Refactor CUDA kernels and tensor operations for type generality
This commit is contained in:
@@ -4,29 +4,18 @@
|
||||
|
||||
namespace CUDANet::Kernels {
|
||||
|
||||
/**
|
||||
* @brief Sigmoid activation function kernel
|
||||
*
|
||||
* @param src Pointer to the source array
|
||||
* @param dst Pointer to the destination array
|
||||
* @param len Length of the arrays
|
||||
*/
|
||||
|
||||
template <typename T>
|
||||
__global__ void sigmoid(
|
||||
const float* __restrict__ src,
|
||||
float* __restrict__ dst,
|
||||
const T* __restrict__ src,
|
||||
T* __restrict__ dst,
|
||||
const unsigned int len
|
||||
);
|
||||
|
||||
/**
|
||||
* @brief Relu activation function kernel
|
||||
*
|
||||
* @param src Pointer to the source array
|
||||
* @param dst Pointer to the destination array
|
||||
* @param len Length of the arrays
|
||||
*/
|
||||
template <typename T>
|
||||
__global__ void relu(
|
||||
const float* __restrict__ src,
|
||||
float* __restrict__ dst,
|
||||
const T* __restrict__ src,
|
||||
T* __restrict__ dst,
|
||||
const unsigned int len
|
||||
);
|
||||
|
||||
|
||||
Reference in New Issue
Block a user