Change unsigned int to int

2025-12-22 14:24:22 +00:00 · 2024-03-18 19:40:00 +01:00
parent aac0c3a826
commit e6d3757312
3 changed files with 19 additions and 17 deletions
--- a/include/kernels/convolution.cuh
+++ b/include/kernels/convolution.cuh
@@ -39,12 +39,13 @@ __global__ void convolution(
    const float* __restrict__ d_input,
    const float* __restrict__ d_kernel,
    float* __restrict__ d_output,
-    const unsigned int inputSize,
-    const unsigned int nChannels,
-    const unsigned int kernelSize,
-    const unsigned int stride,
-    const unsigned int nFilters,
-    const unsigned int outputSize
+    const int inputSize,
+    const int nChannels,
+    const int paddingSize,
+    const int kernelSize,
+    const int stride,
+    const int nFilters,
+    const int outputSize
 );

 }  // namespace CUDANet::Kernels
--- a/src/kernels/convolution.cu
+++ b/src/kernels/convolution.cu
@@ -83,12 +83,13 @@ __global__ void CUDANet::Kernels::convolution(
    const float* __restrict__ d_input,
    const float* __restrict__ d_kernel,
    float* __restrict__ d_output,
-    const unsigned int inputSize,
-    const unsigned int nChannels,
-    const unsigned int kernelSize,
-    const unsigned int stride,
-    const unsigned int nFilters,
-    const unsigned int outputSize
+    const int inputSize,
+    const int nChannels,
+    const int paddingSize,
+    const int kernelSize,
+    const int stride,
+    const int nFilters,
+    const int outputSize
 ) {
    int tid = blockDim.x * blockIdx.x + threadIdx.x;

@@ -104,9 +105,9 @@ __global__ void CUDANet::Kernels::convolution(
    float sum = 0.0f;

    // Iterate over kernel and input matrix
-    for (int k = 0; k < kernelSize; k++) {
-        for (int l = 0; l < kernelSize; l++) {
-            for (int c = 0; c < nChannels; c++) {
+    for (int c = 0; c < nChannels; c++) {
+        for (int k = 0; k < kernelSize; k++) {
+            for (int l = 0; l < kernelSize; l++) {
                int kernelIndex = f * kernelSize * kernelSize * nChannels +
                                  c * kernelSize * kernelSize + k * kernelSize +
                                  l;
--- a/src/layers/conv2d.cu
+++ b/src/layers/conv2d.cu
@@ -124,8 +124,8 @@ float* Layers::Conv2d::forward(const float* d_input) {
    // Convolve
    THREADS_PER_BLOCK = outputSize * outputSize * numFilters;
    Kernels::convolution<<<1, THREADS_PER_BLOCK>>>(
-        d_padded, d_weights, d_output, inputSize + (2 * paddingSize),
-        inputChannels, kernelSize, stride, numFilters, outputSize
+        d_padded, d_weights, d_output, inputSize + 2 * paddingSize, inputChannels, paddingSize,
+        kernelSize, stride, numFilters, outputSize
    );

    // Add bias