Refactor Backend and Layer interfaces

This commit is contained in:
2025-11-18 18:27:57 +01:00
parent 25670f90c4
commit 6340b27055
23 changed files with 154 additions and 201 deletions

View File

@@ -28,12 +28,12 @@ cudaDeviceProp initializeCUDA() {
using namespace CUDANet::Backend;
void* CUDABackend::allocate(size_t bytes) {
void* CUDA::allocate(size_t bytes) {
void* d_ptr = nullptr;
CUDA_CHECK(cudaMalloc(&d_ptr, bytes));
return d_ptr;
}
void CUDABackend::deallocate(void* ptr) {
void CUDA::deallocate(void* ptr) {
CUDA_CHECK(cudaFree(ptr));
}

View File

@@ -5,21 +5,21 @@
using namespace CUDANet::Backend;
void CUDABackend::relu(Tensor &tensor) {
void CUDA::relu(Tensor &tensor) {
int gridSize = (tensor.numel() + BLOCK_SIZE - 1) / BLOCK_SIZE;
Kernels::relu<<<gridSize, BLOCK_SIZE>>>(tensor.data<float>(), tensor.data<float>(), tensor.numel());
CUDA_CHECK(cudaGetLastError());
CUDA_CHECK(cudaDeviceSynchronize());
}
void CUDABackend::sigmoid(Tensor &tensor) {
void CUDA::sigmoid(Tensor &tensor) {
int gridSize = (tensor.numel() + BLOCK_SIZE - 1) / BLOCK_SIZE;
Kernels::sigmoid<<<gridSize, BLOCK_SIZE>>>(tensor.data<float>(), tensor.data<float>(), tensor.numel());
CUDA_CHECK(cudaGetLastError());
CUDA_CHECK(cudaDeviceSynchronize());
}
void CUDABackend::softmax(Tensor &tensor, Tensor &temp_max, Tensor &temp_sum) {
void CUDA::softmax(Tensor &tensor, Tensor &temp_max, Tensor &temp_sum) {
int gridSize = (tensor.numel() + BLOCK_SIZE - 1) / BLOCK_SIZE;
// Find max value

View File

@@ -7,7 +7,7 @@
using namespace CUDANet::Backend;
void CUDABackend::print(const CUDANet::Backend::Tensor &input) {
void CUDA::print(const CUDANet::Backend::Tensor &input) {
auto length = input.numel();
std::vector<float> h_vec(input.numel());
@@ -22,11 +22,11 @@ void CUDABackend::print(const CUDANet::Backend::Tensor &input) {
std::cout << std::endl;
}
void CUDABackend::clear(CUDANet::Backend::Tensor &input) {
void CUDA::clear(CUDANet::Backend::Tensor &input) {
CUDA_CHECK(cudaMemset(input.data<float>(), 0, sizeof(float) * input.numel()));
}
void CUDABackend::sum(const CUDANet::Backend::Tensor &input, CUDANet::Backend::Tensor &sum) {
void CUDA::sum(const CUDANet::Backend::Tensor &input, CUDANet::Backend::Tensor &sum) {
auto length = input.numel();
const int gridSize = ( + BLOCK_SIZE - 1) / BLOCK_SIZE;
@@ -45,7 +45,7 @@ void CUDABackend::sum(const CUDANet::Backend::Tensor &input, CUDANet::Backend::T
}
}
void CUDABackend::max(const CUDANet::Backend::Tensor &input, CUDANet::Backend::Tensor &max) {
void CUDA::max(const CUDANet::Backend::Tensor &input, CUDANet::Backend::Tensor &max) {
auto length = input.numel();
const int grid_size = (length + BLOCK_SIZE - 1) / BLOCK_SIZE;

View File

@@ -54,7 +54,7 @@ float* AvgPooling2d::forward(const float* input) {
#endif
}
int AvgPooling2d::getOutputSize() {
int AvgPooling2d::get_output_size() {
return outputSize.first * outputSize.second * nChannels;
}

View File

@@ -28,7 +28,7 @@ float* Input::forward(const float* input) {
#endif
}
int Input::getOutputSize() {
int Input::get_output_size() {
return inputSize;
}

View File

@@ -54,7 +54,7 @@ float* MaxPooling2d::forward(const float* input) {
}
int MaxPooling2d::getOutputSize() {
int MaxPooling2d::get_output_size() {
return outputSize.first * outputSize.second * nChannels;
}

View File

@@ -24,7 +24,7 @@ float* Output::forward(const float* input) {
#endif
}
int Output::getOutputSize() {
int Output::get_output_size() {
return inputSize;
}