Add running mean and running var to batchnorm

This commit is contained in:
2024-08-25 19:05:10 +02:00
parent 1136ca452f
commit 9704d0d53e
8 changed files with 205 additions and 71 deletions

View File

@@ -141,7 +141,6 @@ __global__ void vec_exp(
* @param src Device pointer to source vector
* @param dst Device pointer to destination vector
* @param len Length of the vector
* @return __global__
*/
__global__ void vec_sqrt(
const float* __restrict__ src,
@@ -149,6 +148,23 @@ __global__ void vec_sqrt(
const unsigned int len
);
/**
* @brief Scales the vector by 1/sqrt(scale + epsilon)
*
* @param src Device pointer to source vector
* @param dst Device pointer to destination vector
* @param scale Scale
* @param epsilon Epsilon
* @param len Length of the vector
*/
__global__ void vec_scale(
const float* __restrict__ src,
float* __restrict__ dst,
const float* __restrict__ scale,
const float* epsilon,
const unsigned int len
);
/**
* @brief Max reduction kernel
*

View File

@@ -50,6 +50,20 @@ class BatchNorm2d : public WeightedLayer, public TwoDLayer {
*/
std::vector<float> getBiases();
/**
* @brief Set the Running Mean
*
* @param running_mean_input
*/
void setRunningMean(const float* running_mean_input);
/**
* @brief Set the Running Var
*
* @param running_mean_input
*/
void setRunningVar(const float* running_mean_input);
/**
* @brief Get output size
*
@@ -75,9 +89,8 @@ class BatchNorm2d : public WeightedLayer, public TwoDLayer {
float* d_output;
float* d_mean;
float* d_mean_sub;
float* d_sqrt_var;
float* d_running_mean;
float* d_running_var;
float* d_length;
float* d_epsilon;
@@ -88,8 +101,8 @@ class BatchNorm2d : public WeightedLayer, public TwoDLayer {
std::vector<float> weights;
std::vector<float> biases;
std::vector<float> mean;
std::vector<float> sqrt_var;
std::vector<float> running_mean;
std::vector<float> running_var;
Activation* activation;
@@ -109,13 +122,13 @@ class BatchNorm2d : public WeightedLayer, public TwoDLayer {
* @brief Initialize mean of the batchnorm layer with zeros
*
*/
void initializeMean();
void initializeRunningMean();
/**
* @brief Initialize sqrt of variance of the batchnorm layer with ones
*
*/
void initializeSqrtVar();
void initializeRunningVar();
/**
* @brief Copy weights and biases to the device