Implement vector mean calculation

2025-12-23 06:44:24 +00:00 · 2024-04-28 22:04:15 +02:00
parent 26c12dafc0
commit 0ab623fa23
4 changed files with 79 additions and 1 deletions
--- a/include/kernels/matmul.cuh
+++ b/include/kernels/matmul.cuh
@@ -37,6 +37,29 @@ __global__ void vec_vec_add(
    const unsigned int w
 );

+/**
+ * @brief Vector vector subtraction kernel
+ * 
+ * @param d_vector1 
+ * @param d_vector2 
+ * @param d_output 
+ * @param w 
+ * @return __global__ 
+ */
+__global__ void vec_vec_sub(
+    const float* __restrict__ d_vector1,
+    const float* __restrict__ d_vector2,
+    float* __restrict__ d_output,
+    const unsigned int w
+);
+
+__global__ void vec_vec_mul(
+    const float* __restrict__ d_vector1,
+    const float* __restrict__ d_vector2,
+    float* __restrict__ d_output,
+    const unsigned int w
+);
+
 /**
 * @brief Sub scalar from each element of the vector
 * 
--- a/include/utils/vector.cuh
+++ b/include/utils/vector.cuh
@@ -31,13 +31,30 @@ void sum(float *d_vec, float *d_sum, const unsigned int length);


 /**
- * @brief Utility function that returns the max of a vector
+ * @brief Get the max of a vector
 * 
 * @param d_vec Pointer to the vector
 * @param length Length of the vector
 */
 void max(float *d_vec, float *d_max, const unsigned int length);

+/**
+ * @brief Compute the mean of a vector
+ * 
+ * @param d_vec 
+ * @param d_mean 
+ * @param length 
+ */
+void mean(float *d_vec, float *d_mean, const unsigned int length);
+
+/**
+ * @brief Compute the variance of a vector
+ * 
+ * @param d_vec 
+ * @param d_var 
+ * @param length 
+ */
+void var(float *d_vec, float *d_var, const unsigned int length);

 }  // namespace CUDANet::Utils

--- a/src/kernels/matmul.cu
+++ b/src/kernels/matmul.cu
@@ -36,6 +36,32 @@ __global__ void Kernels::vec_vec_add(
    d_output[tid] = d_vector1[tid] + d_vector2[tid];
 }

+__global__ void Kernels::vec_vec_sub(
+    const float* __restrict__ d_vector1,
+    const float* __restrict__ d_vector2,
+    float* __restrict__ d_output,
+    const unsigned int w
+) {
+    int tid = blockDim.x * blockIdx.x + threadIdx.x;
+    if (tid >= w) {
+        return;
+    }
+    d_output[tid] = d_vector1[tid] - d_vector2[tid];
+}
+
+__global__ void Kernels::vec_vec_mul(
+    const float* __restrict__ d_vector1,
+    const float* __restrict__ d_vector2,
+    float* __restrict__ d_output,
+    const unsigned int w
+) {
+    int tid = blockDim.x * blockIdx.x + threadIdx.x;
+    if (tid >= w) {
+        return;
+    }
+    d_output[tid] = d_vector1[tid] * d_vector2[tid];
+}
+
 __global__ void Kernels::vec_scalar_sub(
    const float* __restrict__ d_src,
    float* __restrict__ d_out,
--- a/src/utils/vector.cu
+++ b/src/utils/vector.cu
@@ -60,3 +60,15 @@ void Utils::sum(float* d_vec, float* d_sum, const unsigned int length) {
        remaining = blocks_needed;
    }
 }
+
+void Utils::mean(float* d_vec, float* d_mean, const unsigned int length) {
+    float sum;
+    Utils::sum(d_vec, &sum, length);
+    *d_mean = sum / length;
+}
+
+void Utils::var(float* d_vec, float* d_mean, float* d_var, const unsigned int length) {
+    
+    // TODO:
+
+}