Refactor CUDA kernels and tensor operations for type generality

This commit is contained in:
2025-11-26 20:47:55 +01:00
parent 13d3d38b68
commit 9ff214d759
14 changed files with 818 additions and 297 deletions

View File

@@ -5,11 +5,12 @@
namespace CUDANet::Kernels {
template <typename T>
__global__ void convolution(
const float* __restrict__ d_input,
const float* __restrict__ d_kernel,
const float* __restrict__ d_bias,
float* __restrict__ d_output,
const T* __restrict__ d_input,
const T* __restrict__ d_kernel,
const T* __restrict__ d_bias,
T* __restrict__ d_output,
const Shape input_shape,
const Shape padding_shape,
const Shape kernel_shape,