Add toplevel CUDANet namespace

2025-12-23 14:54:28 +00:00 · 2024-03-17 16:08:53 +01:00
parent dc86cddeb7
commit 0c22fac64e
19 changed files with 183 additions and 149 deletions
--- a/include/kernels/matmul.cuh
+++ b/include/kernels/matmul.cuh
@@ -1,11 +1,11 @@
 #ifndef CUDANET_MATMUL_H
 #define CUDANET_MATMUL_H

-namespace Kernels {
+namespace CUDANet::Kernels {

 /**
 * @brief Matrix vector multiplication kernel
- * 
+ *
 * @param d_matrix Device pointer to matrix
 * @param d_vector Device pointer to vector
 * @param d_output Device pointer to output vector
@@ -13,28 +13,41 @@ namespace Kernels {
 * @param h Height of the matrix
 */
 __global__ void mat_vec_mul(
-    const float* d_matrix,
-    const float* d_vector,
-    float*       d_output,
-    int          w,
-    int          h
+    const float* __restrict__ d_matrix,
+    const float* __restrict__ d_vector,
+    float* __restrict__ d_output,
+    const unsigned int w,
+    const unsigned int h
 );

 /**
 * @brief Vector vector addition kernel
- * 
+ *
 * @param d_vector1 Device pointer to first vector
 * @param d_vector2 Device pointer to second vector
 * @param d_output Device pointer to output vector
 * @param w Length of the vectors
 */
 __global__ void vec_vec_add(
-    const float* d_vector1,
-    const float* d_vector2,
-    float*       d_output,
-    int          w
+    const float* __restrict__ d_vector1,
+    const float* __restrict__ d_vector2,
+    float* __restrict__ d_output,
+    const unsigned int w
 );

-}  // namespace Kernels
+/**
+ * @brief 
+ * 
+ * @param d_vector Device pointer to vector
+ * @param d_output Device pointer to output vector
+ * @param w Length of the vector
+ */
+__global__ void reduce_sum(
+    const float* __restrict__ d_vector,
+    float* __restrict__ d_output,
+    const unsigned int w
+);
+
+}  // namespace CUDANet::Kernels

 #endif  // CUDANET_MATMUL_H