Restructure cuda backend

This commit is contained in:
2024-09-05 22:23:47 +02:00
parent 65727dfee8
commit f8220f0ec1
19 changed files with 69 additions and 16 deletions

View File

@@ -0,0 +1,28 @@
#include "add.hpp"
#include "matmul.cuh"
#include "cuda_helper.cuh"
using namespace CUDANet::Layers;
void Add::initCUDA() {
d_output = nullptr;
CUDA_CHECK(cudaMalloc((void**)&d_output, sizeof(float) * inputSize));
gridSize = (inputSize + BLOCK_SIZE - 1) / BLOCK_SIZE;
}
void Add::delCUDA() {
cudaFree(d_output);
}
float* Add::forwardCUDA(const float* d_inputA, const float* d_inputB) {
Kernels::vec_vec_add<<<gridSize, BLOCK_SIZE>>>(
d_inputA, d_inputB, d_output, inputSize
);
CUDA_CHECK(cudaGetLastError());
CUDA_CHECK(cudaDeviceSynchronize());
return d_output;
}