WIP Implement Tensor constructor and destructor

2025-12-22 22:34:22 +00:00 · 2025-11-17 22:36:29 +01:00
parent 6744c8964f
commit d231e515b1
6 changed files with 110 additions and 102 deletions
--- a/src/backends/cuda/layer_ops.cu
+++ b/src/backends/cuda/layer_ops.cu
@@ -0,0 +1,48 @@
+#include "backend/cuda.cuh"
+#include "utils/cuda_helper.cuh"
+#include "kernels/activation_functions.cuh"
+#include "kernels/matmul.cuh"
+
+using namespace CUDANet::Backend;
+
+void CUDABackend::relu(Tensor &tensor) {
+    int gridSize = (tensor.numel() + BLOCK_SIZE - 1) / BLOCK_SIZE;
+    Kernels::relu<<<gridSize, BLOCK_SIZE>>>(tensor.data<float>(), tensor.data<float>(), tensor.numel());
+    CUDA_CHECK(cudaGetLastError());
+    CUDA_CHECK(cudaDeviceSynchronize());
+}
+
+void CUDABackend::sigmoid(Tensor &tensor) {
+    int gridSize = (tensor.numel() + BLOCK_SIZE - 1) / BLOCK_SIZE;
+    Kernels::sigmoid<<<gridSize, BLOCK_SIZE>>>(tensor.data<float>(), tensor.data<float>(), tensor.numel());
+    CUDA_CHECK(cudaGetLastError());
+    CUDA_CHECK(cudaDeviceSynchronize());
+}
+
+void CUDABackend::softmax(Tensor &tensor, Tensor &temp_max, Tensor &temp_sum) {
+    int gridSize = (tensor.numel() + BLOCK_SIZE - 1) / BLOCK_SIZE;
+
+    // Find max value
+    max(tensor, temp_max);
+
+    // Subtract max value to improve numerical stability
+    Kernels::vec_scalar_sub<<<gridSize, BLOCK_SIZE>>>(
+        tensor.data<float>(), tensor.data<float>(), temp_max.data<float>(), tensor.numel()
+    );
+    CUDA_CHECK(cudaGetLastError());
+
+    // Compute exponentials
+    Kernels::vec_exp<<<gridSize, BLOCK_SIZE>>>(
+        tensor.data<float>(), tensor.data<float>(), tensor.numel()
+    );
+    CUDA_CHECK(cudaGetLastError());
+    
+    // Find sum
+    sum(tensor, temp_sum);
+
+    Kernels::vec_scalar_div<<<gridSize, BLOCK_SIZE>>>(
+        tensor.data<float>(), tensor.data<float>(), temp_sum.data<float>(), tensor.numel()
+    );
+    CUDA_CHECK(cudaGetLastError());
+    CUDA_CHECK(cudaDeviceSynchronize());
+}