Add toplevel CUDANet namespace

This commit is contained in:
2024-03-17 16:08:53 +01:00
parent dc86cddeb7
commit 0c22fac64e
19 changed files with 183 additions and 149 deletions

View File

@@ -1,11 +1,11 @@
#ifndef CUDANET_MATMUL_H
#define CUDANET_MATMUL_H
namespace Kernels {
namespace CUDANet::Kernels {
/**
* @brief Matrix vector multiplication kernel
*
*
* @param d_matrix Device pointer to matrix
* @param d_vector Device pointer to vector
* @param d_output Device pointer to output vector
@@ -13,28 +13,41 @@ namespace Kernels {
* @param h Height of the matrix
*/
__global__ void mat_vec_mul(
const float* d_matrix,
const float* d_vector,
float* d_output,
int w,
int h
const float* __restrict__ d_matrix,
const float* __restrict__ d_vector,
float* __restrict__ d_output,
const unsigned int w,
const unsigned int h
);
/**
* @brief Vector vector addition kernel
*
*
* @param d_vector1 Device pointer to first vector
* @param d_vector2 Device pointer to second vector
* @param d_output Device pointer to output vector
* @param w Length of the vectors
*/
__global__ void vec_vec_add(
const float* d_vector1,
const float* d_vector2,
float* d_output,
int w
const float* __restrict__ d_vector1,
const float* __restrict__ d_vector2,
float* __restrict__ d_output,
const unsigned int w
);
} // namespace Kernels
/**
* @brief
*
* @param d_vector Device pointer to vector
* @param d_output Device pointer to output vector
* @param w Length of the vector
*/
__global__ void reduce_sum(
const float* __restrict__ d_vector,
float* __restrict__ d_output,
const unsigned int w
);
} // namespace CUDANet::Kernels
#endif // CUDANET_MATMUL_H