WIP Migrate Activation to Tensor

This commit is contained in:
2025-11-16 22:17:46 +01:00
parent 64bf9197ff
commit 6133fb20af
7 changed files with 160 additions and 41 deletions

View File

@@ -0,0 +1,69 @@
#include "backend/cuda_backend.cuh"
#include "utils/cuda_helper.cuh"
#include "kernels/activation_functions.cuh"
#include "kernels/matmul.cuh"
#include "utils/vector.cuh"
using namespace CUDANet::Backend;
void *CUDABackend::allocate(size_t bytes) {
void* devicePtr = nullptr;
CUDA_CHECK(cudaMalloc(&devicePtr, bytes));
return devicePtr;
}
void CUDABackend::deallocate(void* ptr) {
CUDA_CHECK(cudaFree(ptr));
}
// void CUDABackend::copyToDevice(void* devicePtr, const void* hostPtr, size_t bytes) {
// CUDA_CHECK(cudaMemcpy(devicePtr, hostPtr, bytes, cudaMemcpyHostToDevice));
// CUDA_CHECK(cudaDeviceSynchronize());
// }
// void CUDABackend::copyToHost(void* hostPtr, const void* devicePtr, size_t bytes) {
// CUDA_CHECK(cudaMemcpy(hostPtr, devicePtr, bytes, cudaMemcpyDeviceToHost));
// CUDA_CHECK(cudaDeviceSynchronize());
// }
void CUDABackend::relu(Tensor &tensor) {
int gridSize = (tensor.numel() + BLOCK_SIZE - 1) / BLOCK_SIZE;
Kernels::relu<<<gridSize, BLOCK_SIZE>>>((float*)tensor.data(), (float*)tensor.data(), tensor.numel());
CUDA_CHECK(cudaGetLastError());
CUDA_CHECK(cudaDeviceSynchronize());
}
void CUDABackend::sigmoid(Tensor &tensor) {
int gridSize = (tensor.numel() + BLOCK_SIZE - 1) / BLOCK_SIZE;
Kernels::sigmoid<<<gridSize, BLOCK_SIZE>>>((float*)tensor.data(), (float*)tensor.data(), tensor.numel());
CUDA_CHECK(cudaGetLastError());
CUDA_CHECK(cudaDeviceSynchronize());
}
void CUDABackend::softmax(Tensor &tensor, Tensor &temp_max, Tensor &temp_sum) {
int gridSize = (tensor.numel() + BLOCK_SIZE - 1) / BLOCK_SIZE;
// Find max value
Utils::max(tensor, temp_max, tensor.numel());
// Subtract max value to improve numerical stability
Kernels::vec_scalar_sub<<<gridSize, BLOCK_SIZE>>>(
(float*)tensor.data(), (float*)tensor.data(), (float*)temp_max.data(), tensor.numel()
);
CUDA_CHECK(cudaGetLastError());
// Compute exponentials
Kernels::vec_exp<<<gridSize, BLOCK_SIZE>>>(
(float*)tensor.data(), (float*)tensor.data(), tensor.numel()
);
CUDA_CHECK(cudaGetLastError());
// Find sum
Utils::sum(tensor, temp_sum, tensor.numel());
Kernels::vec_scalar_div<<<gridSize, BLOCK_SIZE>>>(
(float*)tensor.data(), (float*)tensor.data(), (float*)temp_sum.data(), tensor.numel()
);
CUDA_CHECK(cudaGetLastError());
CUDA_CHECK(cudaDeviceSynchronize());
}

View File

@@ -1,3 +1,5 @@
#include <stdexcept>
#include "backend/tensor.hpp"
using namespace CUDANet::Backend;
@@ -9,3 +11,29 @@ Tensor::~Tensor() {
deallocate();
}
size_t Tensor::numel() const {
size_t totalElements = 1;
for (const auto& dim : shape) {
totalElements *= dim;
}
return totalElements;
}
size_t Tensor::size() const {
size_t totalSize = numel();
size_t typeSize = 0;
switch (dtype) {
case DType::FLOAT32:
typeSize = 4;
break;
default:
throw std::runtime_error("Unsupported data type");
}
return totalSize * typeSize;
}
void* Tensor::data() const {
return devicePtr;
}