mirror of
https://github.com/lordmathis/CUDANet.git
synced 2025-11-05 17:34:21 +00:00
114 lines
2.6 KiB
Plaintext
114 lines
2.6 KiB
Plaintext
#ifndef CUDANET_MATMUL_H
|
|
#define CUDANET_MATMUL_H
|
|
|
|
namespace CUDANet::Kernels {
|
|
|
|
/**
|
|
* @brief Matrix vector multiplication kernel
|
|
*
|
|
* @param d_matrix Device pointer to matrix
|
|
* @param d_vector Device pointer to vector
|
|
* @param d_output Device pointer to output vector
|
|
* @param w Width of the matrix
|
|
* @param h Height of the matrix
|
|
*/
|
|
__global__ void mat_vec_mul(
|
|
const float* __restrict__ d_matrix,
|
|
const float* __restrict__ d_vector,
|
|
float* __restrict__ d_output,
|
|
const unsigned int w,
|
|
const unsigned int h
|
|
);
|
|
|
|
/**
|
|
* @brief Vector vector addition kernel
|
|
*
|
|
* @param d_vector1 Device pointer to first vector
|
|
* @param d_vector2 Device pointer to second vector
|
|
* @param d_output Device pointer to output vector
|
|
* @param w Length of the vectors
|
|
*/
|
|
__global__ void vec_vec_add(
|
|
const float* __restrict__ d_vector1,
|
|
const float* __restrict__ d_vector2,
|
|
float* __restrict__ d_output,
|
|
const unsigned int w
|
|
);
|
|
|
|
/**
|
|
* @brief Add scalar to each element of the vector
|
|
*
|
|
* @param d_vector
|
|
* @param d_scalar
|
|
* @param d_output
|
|
* @param w
|
|
* @return __global__
|
|
*/
|
|
__global__ void vec_scalar_sub(
|
|
const float* __restrict__ d_src,
|
|
float* __restrict__ d_out,
|
|
const float* __restrict__ d_scalar,
|
|
const unsigned int len
|
|
);
|
|
|
|
/**
|
|
* @brief Softmax activation function kernel
|
|
*
|
|
* @param src Pointer to the source array
|
|
* @param dst Pointer to the destination array
|
|
* @param len Length of the arrays
|
|
*/
|
|
__global__ void vec_scalar_div(
|
|
const float* __restrict__ d_src,
|
|
float* __restrict__ d_out,
|
|
const float* __restrict__ d_scalar,
|
|
const unsigned int len
|
|
);
|
|
|
|
/**
|
|
* @brief Softmax activation exponentiation kernel
|
|
*
|
|
* @param src Pointer to the source array
|
|
* @param dst Pointer to the destination array
|
|
* @param len Length of the arrays
|
|
*/
|
|
__global__ void vec_exp(
|
|
const float* __restrict__ src,
|
|
float* __restrict__ dst,
|
|
const unsigned int len
|
|
);
|
|
|
|
/**
|
|
* @brief Max reduction kernel
|
|
*
|
|
* @param d_vector Device pointer to vector
|
|
* @param d_output Device pointer to output vector
|
|
*/
|
|
__global__ void max_reduce(
|
|
const float* __restrict__ d_vector,
|
|
float* __restrict__ d_output,
|
|
const unsigned int len
|
|
);
|
|
|
|
/**
|
|
* @brief
|
|
*
|
|
* @param d_vector Device pointer to vector
|
|
* @param d_output Device pointer to output vector
|
|
* @param len Length of the vector
|
|
*/
|
|
__global__ void sum_reduce(
|
|
const float* __restrict__ d_vector,
|
|
float* __restrict__ d_output,
|
|
const unsigned int len
|
|
);
|
|
|
|
|
|
__global__ void clear(
|
|
float* __restrict__ d_vector,
|
|
const unsigned int len
|
|
);
|
|
|
|
} // namespace CUDANet::Kernels
|
|
|
|
#endif // CUDANET_MATMUL_H |