Refactor kernels

This commit is contained in:
2024-03-19 21:37:25 +01:00
parent b6c4b7d2ae
commit 364715ff70
3 changed files with 14 additions and 8 deletions

View File

@@ -3,7 +3,9 @@
#include "activation_functions.cuh"
#include "cuda_helper.cuh"
__global__ void CUDANet::Kernels::sigmoid(
using namespace CUDANet::Kernels;
__global__ void sigmoid(
const float* __restrict__ src,
float* __restrict__ dst,
const unsigned int len
@@ -16,7 +18,7 @@ __global__ void CUDANet::Kernels::sigmoid(
}
}
__global__ void CUDANet::Kernels::relu(
__global__ void relu(
const float* __restrict__ src,
float* __restrict__ dst,
const unsigned int len
@@ -29,7 +31,7 @@ __global__ void CUDANet::Kernels::relu(
}
}
__global__ void CUDANet::Kernels::softmax_exp(
__global__ void softmax_exp(
const float* __restrict__ src,
float* __restrict__ dst,
const unsigned int len
@@ -42,7 +44,7 @@ __global__ void CUDANet::Kernels::softmax_exp(
}
}
__global__ void CUDANet::Kernels::softmax_sum(
__global__ void softmax_sum(
const float* __restrict__ d_vector,
float* __restrict__ d_output,
const unsigned int w
@@ -64,7 +66,7 @@ __global__ void CUDANet::Kernels::softmax_sum(
}
}
__global__ void CUDANet::Kernels::softmax_div(
__global__ void softmax_div(
const float* __restrict__ src,
float* __restrict__ dst,
const float* __restrict__ sum,

View File

@@ -2,7 +2,9 @@
#include "convolution.cuh"
__global__ void CUDANet::Kernels::convolution(
using namespace CUDANet::Kernels;
__global__ void convolution(
const float* __restrict__ d_input,
const float* __restrict__ d_kernel,
float* __restrict__ d_output,

View File

@@ -1,7 +1,9 @@
#include "cuda_helper.cuh"
#include "matmul.cuh"
__global__ void CUDANet::Kernels::mat_vec_mul(
using namespace CUDANet::Kernels;
__global__ void mat_vec_mul(
const float* __restrict__ d_matrix,
const float* __restrict__ d_vector,
float* __restrict__ d_output,
@@ -35,7 +37,7 @@ __global__ void CUDANet::Kernels::mat_vec_mul(
d_output[tid] = temp;
}
__global__ void CUDANet::Kernels::vec_vec_add(
__global__ void vec_vec_add(
const float* __restrict__ d_vector1,
const float* __restrict__ d_vector2,
float* __restrict__ d_output,