mirror of
https://github.com/lordmathis/CUDANet.git
synced 2025-11-05 17:34:21 +00:00
Implement vector variance function
This commit is contained in:
@@ -49,14 +49,14 @@ void max(const float *d_vec, float *d_max, const unsigned int length);
|
|||||||
*/
|
*/
|
||||||
void mean(const float *d_vec, float *d_mean, float *d_length, int length);
|
void mean(const float *d_vec, float *d_mean, float *d_length, int length);
|
||||||
|
|
||||||
// /**
|
/**
|
||||||
// * @brief Compute the variance of a vector
|
* @brief Compute the variance of a vector
|
||||||
// *
|
*
|
||||||
// * @param d_vec
|
* @param d_vec
|
||||||
// * @param d_var
|
* @param d_var
|
||||||
// * @param length
|
* @param length
|
||||||
// */
|
*/
|
||||||
// void var(float *d_vec, float *d_var, const unsigned int length);
|
void var(float *d_vec, float *d_var, float *d_length, const unsigned int length);
|
||||||
|
|
||||||
} // namespace CUDANet::Utils
|
} // namespace CUDANet::Utils
|
||||||
|
|
||||||
|
|||||||
@@ -142,30 +142,12 @@ float *BatchNorm::forward(const float *d_input) {
|
|||||||
CUDA_CHECK(cudaGetLastError());
|
CUDA_CHECK(cudaGetLastError());
|
||||||
|
|
||||||
// Compute variance
|
// Compute variance
|
||||||
// Square differences of input - mean
|
Utils::var(
|
||||||
Kernels::vec_vec_mul<<<gridSize, BLOCK_SIZE>>>(
|
|
||||||
d_mean_sub,
|
d_mean_sub,
|
||||||
d_mean_sub,
|
|
||||||
d_sqrt_var,
|
|
||||||
inputSize * inputSize
|
|
||||||
);
|
|
||||||
CUDA_CHECK(cudaGetLastError());
|
|
||||||
|
|
||||||
// Sum over all differences
|
|
||||||
Utils::sum(
|
|
||||||
d_sqrt_var,
|
|
||||||
d_sqrt_var,
|
|
||||||
inputSize * inputSize
|
|
||||||
);
|
|
||||||
|
|
||||||
// Divide by difference sum / length -> variance
|
|
||||||
Kernels::vec_scalar_div<<<gridSize, BLOCK_SIZE>>>(
|
|
||||||
d_sqrt_var,
|
|
||||||
d_sqrt_var,
|
d_sqrt_var,
|
||||||
d_length,
|
d_length,
|
||||||
inputSize * inputSize
|
inputSize * inputSize
|
||||||
);
|
);
|
||||||
CUDA_CHECK(cudaGetLastError());
|
|
||||||
|
|
||||||
// Add epsilon to variance to avoid division by zero
|
// Add epsilon to variance to avoid division by zero
|
||||||
Kernels::vec_scalar_add<<<gridSize, BLOCK_SIZE>>>(
|
Kernels::vec_scalar_add<<<gridSize, BLOCK_SIZE>>>(
|
||||||
@@ -193,6 +175,7 @@ float *BatchNorm::forward(const float *d_input) {
|
|||||||
);
|
);
|
||||||
CUDA_CHECK(cudaGetLastError());
|
CUDA_CHECK(cudaGetLastError());
|
||||||
|
|
||||||
|
// Multiply by weights
|
||||||
Kernels::vec_scalar_mul<<<gridSize, BLOCK_SIZE>>>(
|
Kernels::vec_scalar_mul<<<gridSize, BLOCK_SIZE>>>(
|
||||||
d_output + i * inputSize * inputSize,
|
d_output + i * inputSize * inputSize,
|
||||||
d_output + i * inputSize * inputSize,
|
d_output + i * inputSize * inputSize,
|
||||||
@@ -201,6 +184,7 @@ float *BatchNorm::forward(const float *d_input) {
|
|||||||
);
|
);
|
||||||
CUDA_CHECK(cudaGetLastError());
|
CUDA_CHECK(cudaGetLastError());
|
||||||
|
|
||||||
|
// Add biases
|
||||||
Kernels::vec_scalar_add<<<gridSize, BLOCK_SIZE>>>(
|
Kernels::vec_scalar_add<<<gridSize, BLOCK_SIZE>>>(
|
||||||
d_output + i * inputSize * inputSize,
|
d_output + i * inputSize * inputSize,
|
||||||
d_output + i * inputSize * inputSize,
|
d_output + i * inputSize * inputSize,
|
||||||
|
|||||||
@@ -73,4 +73,35 @@ void Utils::mean(const float* d_vec, float* d_mean, float *d_length, int length)
|
|||||||
);
|
);
|
||||||
|
|
||||||
CUDA_CHECK(cudaGetLastError());
|
CUDA_CHECK(cudaGetLastError());
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
void Utils::var(float* d_vec, float* d_var, float *d_length, const unsigned int length) {
|
||||||
|
|
||||||
|
const int gridSize = (length + BLOCK_SIZE - 1) / BLOCK_SIZE;
|
||||||
|
|
||||||
|
Kernels::vec_vec_mul<<<gridSize, BLOCK_SIZE>>>(
|
||||||
|
d_vec,
|
||||||
|
d_vec,
|
||||||
|
d_var,
|
||||||
|
length
|
||||||
|
);
|
||||||
|
CUDA_CHECK(cudaGetLastError());
|
||||||
|
|
||||||
|
// Sum over all differences
|
||||||
|
Utils::sum(
|
||||||
|
d_var,
|
||||||
|
d_var,
|
||||||
|
length
|
||||||
|
);
|
||||||
|
|
||||||
|
// Divide by difference sum / length -> variance
|
||||||
|
Kernels::vec_scalar_div<<<gridSize, BLOCK_SIZE>>>(
|
||||||
|
d_var,
|
||||||
|
d_var,
|
||||||
|
d_length,
|
||||||
|
length
|
||||||
|
);
|
||||||
|
CUDA_CHECK(cudaGetLastError());
|
||||||
|
|
||||||
}
|
}
|
||||||
Reference in New Issue
Block a user