mirror of
https://github.com/lordmathis/CUDANet.git
synced 2025-11-05 17:34:21 +00:00
Implement batch norm test
This commit is contained in:
@@ -129,6 +129,7 @@ float *BatchNorm::forward(const float *d_input) {
|
||||
&d_mean[i],
|
||||
inputSize * inputSize
|
||||
);
|
||||
CUDA_CHECK(cudaGetLastError());
|
||||
|
||||
Kernels::vec_scalar_div<<<gridSize, BLOCK_SIZE>>>(
|
||||
d_output + i * inputSize * inputSize,
|
||||
@@ -136,6 +137,7 @@ float *BatchNorm::forward(const float *d_input) {
|
||||
&d_sqrt_var[i],
|
||||
inputSize * inputSize
|
||||
);
|
||||
CUDA_CHECK(cudaGetLastError());
|
||||
|
||||
Kernels::vec_scalar_mul<<<gridSize, BLOCK_SIZE>>>(
|
||||
d_output + i * inputSize * inputSize,
|
||||
@@ -143,6 +145,7 @@ float *BatchNorm::forward(const float *d_input) {
|
||||
&d_weights[i],
|
||||
inputSize * inputSize
|
||||
);
|
||||
CUDA_CHECK(cudaGetLastError());
|
||||
|
||||
Kernels::vec_scalar_add<<<gridSize, BLOCK_SIZE>>>(
|
||||
d_output + i * inputSize * inputSize,
|
||||
@@ -150,6 +153,7 @@ float *BatchNorm::forward(const float *d_input) {
|
||||
&d_biases[i],
|
||||
inputSize * inputSize
|
||||
);
|
||||
CUDA_CHECK(cudaGetLastError());
|
||||
}
|
||||
|
||||
return d_output;
|
||||
|
||||
Reference in New Issue
Block a user