Implement batch norm layer

2025-11-05 17:34:21 +00:00 · 2024-04-28 19:58:00 +02:00
parent 3320f610db
commit f60d62f6bd
4 changed files with 340 additions and 3 deletions
--- a/include/kernels/matmul.cuh
+++ b/include/kernels/matmul.cuh
@@ -38,7 +38,7 @@ __global__ void vec_vec_add(
 );

 /**
- * @brief Add scalar to each element of the vector
+ * @brief Sub scalar from each element of the vector
 * 
 * @param d_vector 
 * @param d_scalar 
@@ -54,7 +54,23 @@ __global__ void vec_scalar_sub(
 );

 /**
- * @brief Softmax activation function kernel
+ * @brief Add scalar to each element of the vector
+ * 
+ * @param d_src 
+ * @param d_out 
+ * @param d_scalar 
+ * @param len 
+ * @return __global__ 
+ */
+__global__ void vec_scalar_add(
+    const float* __restrict__ d_src,
+    float* __restrict__ d_out,
+    const float* __restrict__ d_scalar,
+    const unsigned int len
+);
+
+/**
+ * @brief Divide each element of the vector by a scalar
 *
 * @param src Pointer to the source array
 * @param dst Pointer to the destination array
@@ -68,7 +84,23 @@ __global__ void vec_scalar_div(
 );

 /**
- * @brief Softmax activation exponentiation kernel
+ * @brief Multiply each element of the vector by a scalar
+ * 
+ * @param d_src 
+ * @param d_out 
+ * @param d_scalar 
+ * @param len 
+ * @return __global__ 
+ */
+__global__ void vec_scalar_mul(
+    const float* __restrict__ d_src,
+    float* __restrict__ d_out,
+    const float* __restrict__ d_scalar,
+    const unsigned int len
+);
+
+/**
+ * @brief Exponentiate each element of the vector
 *
 * @param src Pointer to the source array
 * @param dst Pointer to the destination array
--- a/include/layers/batch_norm.cuh
+++ b/include/layers/batch_norm.cuh
@@ -0,0 +1,123 @@
+#ifndef CUDANET_BATCH_NORM_H
+#define CUDANET_BATCH_NORM_H
+
+#include <vector>
+
+#include "activation.cuh"
+#include "layer.cuh"
+
+namespace CUDANet::Layers {
+
+class BatchNorm : public WeightedLayer {
+  public:
+    BatchNorm(int inputSize, int inputChannels, ActivationType activationType);
+
+    ~BatchNorm();
+
+    /**
+     * @brief Compute the forward pass of the batchnorm layer
+     *
+     * @param d_input Device pointer to the input
+     * @return float* Device pointer to the output
+     */
+    float* forward(const float* d_input);
+
+    /**
+     * @brief Set the weights of the batchnorm layer
+     *
+     * @param weights_input Pointer to the weights
+     */
+    void setWeights(const float* weights_input);
+
+    /**
+     * @brief Get the weights of the batchnorm layer
+     *
+     * @return std::vector<float>
+     */
+    std::vector<float> getWeights();
+
+    /**
+     * @brief Set the biases of the batchnorm layer
+     *
+     * @param biases_input Pointer to the biases
+     */
+    void setBiases(const float* biases_input);
+
+    /**
+     * @brief Get the biases of the batchnorm layer
+     *
+     * @return std::vector<float>
+     */
+    std::vector<float> getBiases();
+
+    /**
+     * @brief Get output size
+     *
+     * @return int output size
+     */
+    int getOutputSize();
+
+    /**
+     * @brief Get input size
+     *
+     * @return int input size
+     */
+    int getInputSize();
+
+  private:
+
+    int inputSize;
+    int inputChannels;
+
+    int gridSize;
+
+    float* d_output;
+
+    float* d_mean;
+    float* d_sqrt_var;
+
+    float* d_weights;
+    float* d_biases;
+
+    std::vector<float> weights;
+    std::vector<float> biases;
+
+    std::vector<float> mean;
+    std::vector<float> sqrt_var;
+
+    Activation* activation;
+
+    /**
+     * @brief Initialize weights of the batchnorm layer with zeros
+     *
+     */
+    void initializeWeights();
+
+    /**
+     * @brief Initialize biases of the batchnorm layer with zeros
+     *
+     */
+    void initializeBiases();
+
+    /**
+     * @brief Initialize mean of the batchnorm layer with zeros
+     * 
+     */
+    void initializeMean();
+
+    /**
+     * @brief Initialize sqrt of variance of the batchnorm layer with ones
+     * 
+     */
+    void initializeSqrtVar();
+
+    /**
+     * @brief Copy weights and biases to the device
+     *
+     */
+    void toCuda();
+};
+
+}  // namespace CUDANet::Layers
+
+#endif  // CUDANET_BATCH_NORM_H