mirror of
https://github.com/lordmathis/CUDANet.git
synced 2025-11-05 17:34:21 +00:00
Implement vector variance function
This commit is contained in:
@@ -49,14 +49,14 @@ void max(const float *d_vec, float *d_max, const unsigned int length);
|
||||
*/
|
||||
void mean(const float *d_vec, float *d_mean, float *d_length, int length);
|
||||
|
||||
// /**
|
||||
// * @brief Compute the variance of a vector
|
||||
// *
|
||||
// * @param d_vec
|
||||
// * @param d_var
|
||||
// * @param length
|
||||
// */
|
||||
// void var(float *d_vec, float *d_var, const unsigned int length);
|
||||
/**
|
||||
* @brief Compute the variance of a vector
|
||||
*
|
||||
* @param d_vec
|
||||
* @param d_var
|
||||
* @param length
|
||||
*/
|
||||
void var(float *d_vec, float *d_var, float *d_length, const unsigned int length);
|
||||
|
||||
} // namespace CUDANet::Utils
|
||||
|
||||
|
||||
@@ -142,30 +142,12 @@ float *BatchNorm::forward(const float *d_input) {
|
||||
CUDA_CHECK(cudaGetLastError());
|
||||
|
||||
// Compute variance
|
||||
// Square differences of input - mean
|
||||
Kernels::vec_vec_mul<<<gridSize, BLOCK_SIZE>>>(
|
||||
Utils::var(
|
||||
d_mean_sub,
|
||||
d_mean_sub,
|
||||
d_sqrt_var,
|
||||
inputSize * inputSize
|
||||
);
|
||||
CUDA_CHECK(cudaGetLastError());
|
||||
|
||||
// Sum over all differences
|
||||
Utils::sum(
|
||||
d_sqrt_var,
|
||||
d_sqrt_var,
|
||||
inputSize * inputSize
|
||||
);
|
||||
|
||||
// Divide by difference sum / length -> variance
|
||||
Kernels::vec_scalar_div<<<gridSize, BLOCK_SIZE>>>(
|
||||
d_sqrt_var,
|
||||
d_sqrt_var,
|
||||
d_length,
|
||||
inputSize * inputSize
|
||||
);
|
||||
CUDA_CHECK(cudaGetLastError());
|
||||
|
||||
// Add epsilon to variance to avoid division by zero
|
||||
Kernels::vec_scalar_add<<<gridSize, BLOCK_SIZE>>>(
|
||||
@@ -193,6 +175,7 @@ float *BatchNorm::forward(const float *d_input) {
|
||||
);
|
||||
CUDA_CHECK(cudaGetLastError());
|
||||
|
||||
// Multiply by weights
|
||||
Kernels::vec_scalar_mul<<<gridSize, BLOCK_SIZE>>>(
|
||||
d_output + i * inputSize * inputSize,
|
||||
d_output + i * inputSize * inputSize,
|
||||
@@ -201,6 +184,7 @@ float *BatchNorm::forward(const float *d_input) {
|
||||
);
|
||||
CUDA_CHECK(cudaGetLastError());
|
||||
|
||||
// Add biases
|
||||
Kernels::vec_scalar_add<<<gridSize, BLOCK_SIZE>>>(
|
||||
d_output + i * inputSize * inputSize,
|
||||
d_output + i * inputSize * inputSize,
|
||||
|
||||
@@ -73,4 +73,35 @@ void Utils::mean(const float* d_vec, float* d_mean, float *d_length, int length)
|
||||
);
|
||||
|
||||
CUDA_CHECK(cudaGetLastError());
|
||||
}
|
||||
|
||||
|
||||
void Utils::var(float* d_vec, float* d_var, float *d_length, const unsigned int length) {
|
||||
|
||||
const int gridSize = (length + BLOCK_SIZE - 1) / BLOCK_SIZE;
|
||||
|
||||
Kernels::vec_vec_mul<<<gridSize, BLOCK_SIZE>>>(
|
||||
d_vec,
|
||||
d_vec,
|
||||
d_var,
|
||||
length
|
||||
);
|
||||
CUDA_CHECK(cudaGetLastError());
|
||||
|
||||
// Sum over all differences
|
||||
Utils::sum(
|
||||
d_var,
|
||||
d_var,
|
||||
length
|
||||
);
|
||||
|
||||
// Divide by difference sum / length -> variance
|
||||
Kernels::vec_scalar_div<<<gridSize, BLOCK_SIZE>>>(
|
||||
d_var,
|
||||
d_var,
|
||||
d_length,
|
||||
length
|
||||
);
|
||||
CUDA_CHECK(cudaGetLastError());
|
||||
|
||||
}
|
||||
Reference in New Issue
Block a user