Add running mean and running var to batchnorm

2025-11-05 17:34:21 +00:00 · 2024-08-25 19:05:10 +02:00
parent 1136ca452f
commit 9704d0d53e
8 changed files with 205 additions and 71 deletions
--- a/include/kernels/matmul.cuh
+++ b/include/kernels/matmul.cuh
@@ -141,7 +141,6 @@ __global__ void vec_exp(
 * @param src Device pointer to source vector
 * @param dst Device pointer to destination vector
 * @param len Length of the vector
- * @return __global__ 
 */
 __global__ void vec_sqrt(
    const float* __restrict__ src,
@@ -149,6 +148,23 @@ __global__ void vec_sqrt(
    const unsigned int len
 );

+/**
+ * @brief Scales the vector by 1/sqrt(scale + epsilon)
+ * 
+ * @param src Device pointer to source vector
+ * @param dst Device pointer to destination vector
+ * @param scale Scale
+ * @param epsilon Epsilon
+ * @param len Length of the vector
+ */
+__global__ void vec_scale(
+    const float* __restrict__ src,
+    float* __restrict__ dst,
+    const float* __restrict__ scale,
+    const float* epsilon,
+    const unsigned int len
+);
+
 /**
 * @brief Max reduction kernel
 *
--- a/include/layers/batch_norm.cuh
+++ b/include/layers/batch_norm.cuh
@@ -50,6 +50,20 @@ class BatchNorm2d : public WeightedLayer, public TwoDLayer {
     */
    std::vector<float> getBiases();

+    /**
+     * @brief Set the Running Mean
+     * 
+     * @param running_mean_input 
+     */
+    void setRunningMean(const float* running_mean_input);
+
+    /**
+     * @brief Set the Running Var
+     * 
+     * @param running_mean_input 
+     */
+    void setRunningVar(const float* running_mean_input);
+
    /**
     * @brief Get output size
     *
@@ -75,9 +89,8 @@ class BatchNorm2d : public WeightedLayer, public TwoDLayer {

    float* d_output;

-    float* d_mean;
-    float* d_mean_sub;
-    float* d_sqrt_var;
+    float* d_running_mean;
+    float* d_running_var;

    float* d_length;
    float* d_epsilon;
@@ -88,8 +101,8 @@ class BatchNorm2d : public WeightedLayer, public TwoDLayer {
    std::vector<float> weights;
    std::vector<float> biases;

-    std::vector<float> mean;
-    std::vector<float> sqrt_var;
+    std::vector<float> running_mean;
+    std::vector<float> running_var;

    Activation* activation;

@@ -109,13 +122,13 @@ class BatchNorm2d : public WeightedLayer, public TwoDLayer {
     * @brief Initialize mean of the batchnorm layer with zeros
     *
     */
-    void initializeMean();
+    void initializeRunningMean();

    /**
     * @brief Initialize sqrt of variance of the batchnorm layer with ones
     *
     */
-    void initializeSqrtVar();
+    void initializeRunningVar();

    /**
     * @brief Copy weights and biases to the device