mirror of
https://github.com/lordmathis/CUDANet.git
synced 2025-11-05 17:34:21 +00:00
Implement vector variance function
This commit is contained in:
@@ -142,30 +142,12 @@ float *BatchNorm::forward(const float *d_input) {
|
||||
CUDA_CHECK(cudaGetLastError());
|
||||
|
||||
// Compute variance
|
||||
// Square differences of input - mean
|
||||
Kernels::vec_vec_mul<<<gridSize, BLOCK_SIZE>>>(
|
||||
Utils::var(
|
||||
d_mean_sub,
|
||||
d_mean_sub,
|
||||
d_sqrt_var,
|
||||
inputSize * inputSize
|
||||
);
|
||||
CUDA_CHECK(cudaGetLastError());
|
||||
|
||||
// Sum over all differences
|
||||
Utils::sum(
|
||||
d_sqrt_var,
|
||||
d_sqrt_var,
|
||||
inputSize * inputSize
|
||||
);
|
||||
|
||||
// Divide by difference sum / length -> variance
|
||||
Kernels::vec_scalar_div<<<gridSize, BLOCK_SIZE>>>(
|
||||
d_sqrt_var,
|
||||
d_sqrt_var,
|
||||
d_length,
|
||||
inputSize * inputSize
|
||||
);
|
||||
CUDA_CHECK(cudaGetLastError());
|
||||
|
||||
// Add epsilon to variance to avoid division by zero
|
||||
Kernels::vec_scalar_add<<<gridSize, BLOCK_SIZE>>>(
|
||||
@@ -193,6 +175,7 @@ float *BatchNorm::forward(const float *d_input) {
|
||||
);
|
||||
CUDA_CHECK(cudaGetLastError());
|
||||
|
||||
// Multiply by weights
|
||||
Kernels::vec_scalar_mul<<<gridSize, BLOCK_SIZE>>>(
|
||||
d_output + i * inputSize * inputSize,
|
||||
d_output + i * inputSize * inputSize,
|
||||
@@ -201,6 +184,7 @@ float *BatchNorm::forward(const float *d_input) {
|
||||
);
|
||||
CUDA_CHECK(cudaGetLastError());
|
||||
|
||||
// Add biases
|
||||
Kernels::vec_scalar_add<<<gridSize, BLOCK_SIZE>>>(
|
||||
d_output + i * inputSize * inputSize,
|
||||
d_output + i * inputSize * inputSize,
|
||||
|
||||
@@ -73,4 +73,35 @@ void Utils::mean(const float* d_vec, float* d_mean, float *d_length, int length)
|
||||
);
|
||||
|
||||
CUDA_CHECK(cudaGetLastError());
|
||||
}
|
||||
|
||||
|
||||
void Utils::var(float* d_vec, float* d_var, float *d_length, const unsigned int length) {
|
||||
|
||||
const int gridSize = (length + BLOCK_SIZE - 1) / BLOCK_SIZE;
|
||||
|
||||
Kernels::vec_vec_mul<<<gridSize, BLOCK_SIZE>>>(
|
||||
d_vec,
|
||||
d_vec,
|
||||
d_var,
|
||||
length
|
||||
);
|
||||
CUDA_CHECK(cudaGetLastError());
|
||||
|
||||
// Sum over all differences
|
||||
Utils::sum(
|
||||
d_var,
|
||||
d_var,
|
||||
length
|
||||
);
|
||||
|
||||
// Divide by difference sum / length -> variance
|
||||
Kernels::vec_scalar_div<<<gridSize, BLOCK_SIZE>>>(
|
||||
d_var,
|
||||
d_var,
|
||||
d_length,
|
||||
length
|
||||
);
|
||||
CUDA_CHECK(cudaGetLastError());
|
||||
|
||||
}
|
||||
Reference in New Issue
Block a user