Implement vector mean calculation

This commit is contained in:
2024-04-28 22:04:15 +02:00
parent 26c12dafc0
commit 0ab623fa23
4 changed files with 79 additions and 1 deletions

View File

@@ -37,6 +37,29 @@ __global__ void vec_vec_add(
const unsigned int w
);
/**
* @brief Vector vector subtraction kernel
*
* @param d_vector1
* @param d_vector2
* @param d_output
* @param w
* @return __global__
*/
__global__ void vec_vec_sub(
const float* __restrict__ d_vector1,
const float* __restrict__ d_vector2,
float* __restrict__ d_output,
const unsigned int w
);
__global__ void vec_vec_mul(
const float* __restrict__ d_vector1,
const float* __restrict__ d_vector2,
float* __restrict__ d_output,
const unsigned int w
);
/**
* @brief Sub scalar from each element of the vector
*

View File

@@ -31,13 +31,30 @@ void sum(float *d_vec, float *d_sum, const unsigned int length);
/**
* @brief Utility function that returns the max of a vector
* @brief Get the max of a vector
*
* @param d_vec Pointer to the vector
* @param length Length of the vector
*/
void max(float *d_vec, float *d_max, const unsigned int length);
/**
* @brief Compute the mean of a vector
*
* @param d_vec
* @param d_mean
* @param length
*/
void mean(float *d_vec, float *d_mean, const unsigned int length);
/**
* @brief Compute the variance of a vector
*
* @param d_vec
* @param d_var
* @param length
*/
void var(float *d_vec, float *d_var, const unsigned int length);
} // namespace CUDANet::Utils

View File

@@ -36,6 +36,32 @@ __global__ void Kernels::vec_vec_add(
d_output[tid] = d_vector1[tid] + d_vector2[tid];
}
__global__ void Kernels::vec_vec_sub(
const float* __restrict__ d_vector1,
const float* __restrict__ d_vector2,
float* __restrict__ d_output,
const unsigned int w
) {
int tid = blockDim.x * blockIdx.x + threadIdx.x;
if (tid >= w) {
return;
}
d_output[tid] = d_vector1[tid] - d_vector2[tid];
}
__global__ void Kernels::vec_vec_mul(
const float* __restrict__ d_vector1,
const float* __restrict__ d_vector2,
float* __restrict__ d_output,
const unsigned int w
) {
int tid = blockDim.x * blockIdx.x + threadIdx.x;
if (tid >= w) {
return;
}
d_output[tid] = d_vector1[tid] * d_vector2[tid];
}
__global__ void Kernels::vec_scalar_sub(
const float* __restrict__ d_src,
float* __restrict__ d_out,

View File

@@ -60,3 +60,15 @@ void Utils::sum(float* d_vec, float* d_sum, const unsigned int length) {
remaining = blocks_needed;
}
}
void Utils::mean(float* d_vec, float* d_mean, const unsigned int length) {
float sum;
Utils::sum(d_vec, &sum, length);
*d_mean = sum / length;
}
void Utils::var(float* d_vec, float* d_mean, float* d_var, const unsigned int length) {
// TODO:
}