mirror of
https://github.com/lordmathis/CUDANet.git
synced 2025-11-05 17:34:21 +00:00
Refactor kernels
This commit is contained in:
@@ -3,7 +3,9 @@
|
||||
#include "activation_functions.cuh"
|
||||
#include "cuda_helper.cuh"
|
||||
|
||||
__global__ void CUDANet::Kernels::sigmoid(
|
||||
using namespace CUDANet::Kernels;
|
||||
|
||||
__global__ void sigmoid(
|
||||
const float* __restrict__ src,
|
||||
float* __restrict__ dst,
|
||||
const unsigned int len
|
||||
@@ -16,7 +18,7 @@ __global__ void CUDANet::Kernels::sigmoid(
|
||||
}
|
||||
}
|
||||
|
||||
__global__ void CUDANet::Kernels::relu(
|
||||
__global__ void relu(
|
||||
const float* __restrict__ src,
|
||||
float* __restrict__ dst,
|
||||
const unsigned int len
|
||||
@@ -29,7 +31,7 @@ __global__ void CUDANet::Kernels::relu(
|
||||
}
|
||||
}
|
||||
|
||||
__global__ void CUDANet::Kernels::softmax_exp(
|
||||
__global__ void softmax_exp(
|
||||
const float* __restrict__ src,
|
||||
float* __restrict__ dst,
|
||||
const unsigned int len
|
||||
@@ -42,7 +44,7 @@ __global__ void CUDANet::Kernels::softmax_exp(
|
||||
}
|
||||
}
|
||||
|
||||
__global__ void CUDANet::Kernels::softmax_sum(
|
||||
__global__ void softmax_sum(
|
||||
const float* __restrict__ d_vector,
|
||||
float* __restrict__ d_output,
|
||||
const unsigned int w
|
||||
@@ -64,7 +66,7 @@ __global__ void CUDANet::Kernels::softmax_sum(
|
||||
}
|
||||
}
|
||||
|
||||
__global__ void CUDANet::Kernels::softmax_div(
|
||||
__global__ void softmax_div(
|
||||
const float* __restrict__ src,
|
||||
float* __restrict__ dst,
|
||||
const float* __restrict__ sum,
|
||||
|
||||
@@ -2,7 +2,9 @@
|
||||
|
||||
#include "convolution.cuh"
|
||||
|
||||
__global__ void CUDANet::Kernels::convolution(
|
||||
using namespace CUDANet::Kernels;
|
||||
|
||||
__global__ void convolution(
|
||||
const float* __restrict__ d_input,
|
||||
const float* __restrict__ d_kernel,
|
||||
float* __restrict__ d_output,
|
||||
|
||||
@@ -1,7 +1,9 @@
|
||||
#include "cuda_helper.cuh"
|
||||
#include "matmul.cuh"
|
||||
|
||||
__global__ void CUDANet::Kernels::mat_vec_mul(
|
||||
using namespace CUDANet::Kernels;
|
||||
|
||||
__global__ void mat_vec_mul(
|
||||
const float* __restrict__ d_matrix,
|
||||
const float* __restrict__ d_vector,
|
||||
float* __restrict__ d_output,
|
||||
@@ -35,7 +37,7 @@ __global__ void CUDANet::Kernels::mat_vec_mul(
|
||||
d_output[tid] = temp;
|
||||
}
|
||||
|
||||
__global__ void CUDANet::Kernels::vec_vec_add(
|
||||
__global__ void vec_vec_add(
|
||||
const float* __restrict__ d_vector1,
|
||||
const float* __restrict__ d_vector2,
|
||||
float* __restrict__ d_output,
|
||||
|
||||
Reference in New Issue
Block a user