Implement vector variance function

This commit is contained in:
2024-05-14 21:58:23 +02:00
parent 33a3c6afcb
commit 7c48ed86d2
3 changed files with 42 additions and 27 deletions

View File

@@ -49,14 +49,14 @@ void max(const float *d_vec, float *d_max, const unsigned int length);
*/
void mean(const float *d_vec, float *d_mean, float *d_length, int length);
// /**
// * @brief Compute the variance of a vector
// *
// * @param d_vec
// * @param d_var
// * @param length
// */
// void var(float *d_vec, float *d_var, const unsigned int length);
/**
* @brief Compute the variance of a vector
*
* @param d_vec
* @param d_var
* @param length
*/
void var(float *d_vec, float *d_var, float *d_length, const unsigned int length);
} // namespace CUDANet::Utils

View File

@@ -142,30 +142,12 @@ float *BatchNorm::forward(const float *d_input) {
CUDA_CHECK(cudaGetLastError());
// Compute variance
// Square differences of input - mean
Kernels::vec_vec_mul<<<gridSize, BLOCK_SIZE>>>(
Utils::var(
d_mean_sub,
d_mean_sub,
d_sqrt_var,
inputSize * inputSize
);
CUDA_CHECK(cudaGetLastError());
// Sum over all differences
Utils::sum(
d_sqrt_var,
d_sqrt_var,
inputSize * inputSize
);
// Divide by difference sum / length -> variance
Kernels::vec_scalar_div<<<gridSize, BLOCK_SIZE>>>(
d_sqrt_var,
d_sqrt_var,
d_length,
inputSize * inputSize
);
CUDA_CHECK(cudaGetLastError());
// Add epsilon to variance to avoid division by zero
Kernels::vec_scalar_add<<<gridSize, BLOCK_SIZE>>>(
@@ -193,6 +175,7 @@ float *BatchNorm::forward(const float *d_input) {
);
CUDA_CHECK(cudaGetLastError());
// Multiply by weights
Kernels::vec_scalar_mul<<<gridSize, BLOCK_SIZE>>>(
d_output + i * inputSize * inputSize,
d_output + i * inputSize * inputSize,
@@ -201,6 +184,7 @@ float *BatchNorm::forward(const float *d_input) {
);
CUDA_CHECK(cudaGetLastError());
// Add biases
Kernels::vec_scalar_add<<<gridSize, BLOCK_SIZE>>>(
d_output + i * inputSize * inputSize,
d_output + i * inputSize * inputSize,

View File

@@ -73,4 +73,35 @@ void Utils::mean(const float* d_vec, float* d_mean, float *d_length, int length)
);
CUDA_CHECK(cudaGetLastError());
}
void Utils::var(float* d_vec, float* d_var, float *d_length, const unsigned int length) {
const int gridSize = (length + BLOCK_SIZE - 1) / BLOCK_SIZE;
Kernels::vec_vec_mul<<<gridSize, BLOCK_SIZE>>>(
d_vec,
d_vec,
d_var,
length
);
CUDA_CHECK(cudaGetLastError());
// Sum over all differences
Utils::sum(
d_var,
d_var,
length
);
// Divide by difference sum / length -> variance
Kernels::vec_scalar_div<<<gridSize, BLOCK_SIZE>>>(
d_var,
d_var,
d_length,
length
);
CUDA_CHECK(cudaGetLastError());
}