Refactor CUDA kernels and tensor operations for type generality

This commit is contained in:
2025-11-26 20:47:55 +01:00
parent 13d3d38b68
commit 9ff214d759
14 changed files with 818 additions and 297 deletions

View File

@@ -4,29 +4,18 @@
namespace CUDANet::Kernels {
/**
* @brief Sigmoid activation function kernel
*
* @param src Pointer to the source array
* @param dst Pointer to the destination array
* @param len Length of the arrays
*/
template <typename T>
__global__ void sigmoid(
const float* __restrict__ src,
float* __restrict__ dst,
const T* __restrict__ src,
T* __restrict__ dst,
const unsigned int len
);
/**
* @brief Relu activation function kernel
*
* @param src Pointer to the source array
* @param dst Pointer to the destination array
* @param len Length of the arrays
*/
template <typename T>
__global__ void relu(
const float* __restrict__ src,
float* __restrict__ dst,
const T* __restrict__ src,
T* __restrict__ dst,
const unsigned int len
);