Refactor CUDA kernels and tensor operations for type generality

2025-12-23 14:54:28 +00:00 · 2025-11-26 20:47:55 +01:00
parent 13d3d38b68
commit 9ff214d759
14 changed files with 818 additions and 297 deletions
--- a/include/backend/cuda/kernels/convolution.cuh
+++ b/include/backend/cuda/kernels/convolution.cuh
@@ -5,11 +5,12 @@

 namespace CUDANet::Kernels {

+template <typename T>
 __global__ void convolution(
-    const float* __restrict__ d_input,
-    const float* __restrict__ d_kernel,
-    const float* __restrict__ d_bias,
-    float* __restrict__ d_output,
+    const T* __restrict__ d_input,
+    const T* __restrict__ d_kernel,
+    const T* __restrict__ d_bias,
+    T* __restrict__ d_output,
    const Shape input_shape,
    const Shape padding_shape,
    const Shape kernel_shape,