From 035f3b053b095996ab583ca05c752110ea59f86a Mon Sep 17 00:00:00 2001 From: LordMathis Date: Wed, 21 Feb 2024 20:03:04 +0100 Subject: [PATCH] Rename files to .cu and fix IDX2C usage --- CMakeLists.txt | 4 +- include/layers/dense.h | 2 - include/utils/cuda_helper.h | 2 +- src/CMakeLists.txt | 4 +- src/layers/{dense.cpp => dense.cu} | 39 +++++++------- src/utils/{cuda_helper.cpp => cuda_helper.cu} | 0 test/CMakeLists.txt | 4 +- test/layers/{test_dense.cpp => test_dense.cu} | 53 +++++++++++++++---- ...las_fixture.cpp => test_cublas_fixture.cu} | 0 9 files changed, 72 insertions(+), 36 deletions(-) rename src/layers/{dense.cpp => dense.cu} (62%) rename src/utils/{cuda_helper.cpp => cuda_helper.cu} (100%) rename test/layers/{test_dense.cpp => test_dense.cu} (78%) rename test/test_utils/{test_cublas_fixture.cpp => test_cublas_fixture.cu} (100%) diff --git a/CMakeLists.txt b/CMakeLists.txt index 3adf434..258d06c 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -9,8 +9,8 @@ include_directories(${CUDAToolkit_INCLUDE_DIRS}) # Add project source files for the library set(LIBRARY_SOURCES - src/utils/cuda_helper.cpp - src/layers/dense.cpp + src/utils/cuda_helper.cu + src/layers/dense.cu ) set(CMAKE_CUDA_ARCHITECTURES 75) diff --git a/include/layers/dense.h b/include/layers/dense.h index 93073d4..a35f157 100644 --- a/include/layers/dense.h +++ b/include/layers/dense.h @@ -1,5 +1,3 @@ -// fully_connected_layer.h - #ifndef DENSE_LAYER_H #define DENSE_LAYER_H diff --git a/include/utils/cuda_helper.h b/include/utils/cuda_helper.h index 4f8656d..f2776bb 100644 --- a/include/utils/cuda_helper.h +++ b/include/utils/cuda_helper.h @@ -24,7 +24,7 @@ do { \ cublasStatus_t result = call; \ if (result != CUBLAS_STATUS_SUCCESS) { \ fprintf(stderr, "cuBLAS error at %s:%d code=%d\n", \ - __FILE__, __LINE__, static_cast(result)); \ + __FILE__, __LINE__, static_cast(result)); \ exit(EXIT_FAILURE); \ } \ } while (0) diff --git a/src/CMakeLists.txt b/src/CMakeLists.txt index 92bf280..2c7d52c 100644 --- a/src/CMakeLists.txt +++ b/src/CMakeLists.txt @@ -1,6 +1,6 @@ -set(LAYER_SOURCES layers/dense.cpp) +set(LAYER_SOURCES layers/dense.cu) add_library(CUDANet - utils/cuda_helper.cpp + utils/cuda_helper.cu ${LAYER_SOURCES} ) diff --git a/src/layers/dense.cpp b/src/layers/dense.cu similarity index 62% rename from src/layers/dense.cpp rename to src/layers/dense.cu index 1c8dfec..a547b20 100644 --- a/src/layers/dense.cpp +++ b/src/layers/dense.cu @@ -4,7 +4,7 @@ #include #include #include -#include +#include Layers::Dense::Dense(int inputSize, int outputSize, cublasHandle_t cublasHandle) : inputSize(inputSize), outputSize(outputSize), cublasHandle(cublasHandle) { @@ -16,9 +16,12 @@ Layers::Dense::Dense(int inputSize, int outputSize, cublasHandle_t cublasHandle) initializeWeights(); initializeBiases(); + d_weights = nullptr; + d_biases = nullptr; + // Allocate GPU memory for weights and biases CUDA_CHECK(cudaMalloc((void**)&d_weights, sizeof(float) * inputSize * outputSize)); - CUDA_CHECK(cudaMalloc((void**)&d_biases, sizeof(float) * biases.size())); + CUDA_CHECK(cudaMalloc((void**)&d_biases, sizeof(float) * outputSize)); toCuda(); } @@ -30,43 +33,43 @@ Layers::Dense::~Dense() { } void Layers::Dense::initializeWeights() { - int numWeights = inputSize * outputSize; - std::random_device rd; - std::mt19937 gen(rd()); - std::normal_distribution dist(0.0f, 0.01f); // Xavier initialization - - for (int i = 0; i < outputSize; ++i) { - for (int j = 0; j < inputSize; ++j) { - int idx = IDX2C(i, j, inputSize); - weights[idx] = dist(gen); + for (int j = 0; j < inputSize; ++j) { + for (int i = 0; i < outputSize; ++i) { + int idx = IDX2C(i, j, outputSize); + weights[idx] = 0.0f; } } } void Layers::Dense::initializeBiases() { - std::fill(biases.begin(), biases.end(), 0.1f); + std::fill(biases.begin(), biases.end(), 0.0f); } void Layers::Dense::forward(const float* d_input, float* d_output) { const float alpha = 1.0f; const float beta = 1.0f; - cublasSgemv(cublasHandle, CUBLAS_OP_N, inputSize, outputSize, &alpha, d_weights, inputSize, d_input, 1, &beta, d_output, 1); - cublasSaxpy(cublasHandle, outputSize, &alpha, d_biases, 1, d_output, 1); + CUBLAS_CHECK(cublasSgemv(cublasHandle, CUBLAS_OP_N, inputSize, outputSize, &alpha, d_weights, inputSize, d_input, 1, &beta, d_output, 1)); + CUBLAS_CHECK(cublasSaxpy(cublasHandle, outputSize, &alpha, d_biases, 1, d_output, 1)); } void Layers::Dense::toCuda() { - CUBLAS_CHECK(cublasSetMatrix(outputSize, inputSize, sizeof(float), weights.data(), inputSize, d_weights, outputSize)); + CUBLAS_CHECK(cublasSetMatrix(outputSize, inputSize, sizeof(float), weights.data(), outputSize, d_weights, outputSize)); CUBLAS_CHECK(cublasSetVector(biases.size(), sizeof(float), biases.data(), 1, d_biases, 1)); } void Layers::Dense::setWeights(const std::vector>& weights_input) { int numWeights = inputSize * outputSize; - for (int i = 0; i < outputSize; ++i) { - for (int j = 0; j < inputSize; ++j) { - int idx = IDX2C(i, j, inputSize); + if (weights.size() != numWeights) { + std::cerr << "Invalid number of weights" << std::endl; + exit(EXIT_FAILURE); + } + + for (int j = 0; j < inputSize; ++j) { + for (int i = 0; i < outputSize; ++i) { + int idx = IDX2C(i, j, outputSize); weights[idx] = weights_input[i][j]; } } diff --git a/src/utils/cuda_helper.cpp b/src/utils/cuda_helper.cu similarity index 100% rename from src/utils/cuda_helper.cpp rename to src/utils/cuda_helper.cu diff --git a/test/CMakeLists.txt b/test/CMakeLists.txt index ccdf4b9..628fbc6 100644 --- a/test/CMakeLists.txt +++ b/test/CMakeLists.txt @@ -1,10 +1,10 @@ find_package(GTest REQUIRED) include_directories(${GTEST_INCLUDE_DIRS}) -add_executable(test_dense layers/test_dense.cpp) +add_executable(test_dense layers/test_dense.cu) add_library(test_utils - test_utils/test_cublas_fixture.cpp + test_utils/test_cublas_fixture.cu ) target_include_directories(test_utils PUBLIC test_utils) diff --git a/test/layers/test_dense.cpp b/test/layers/test_dense.cu similarity index 78% rename from test/layers/test_dense.cpp rename to test/layers/test_dense.cu index 966acaf..bad3793 100644 --- a/test/layers/test_dense.cpp +++ b/test/layers/test_dense.cu @@ -1,6 +1,7 @@ #include "gtest/gtest.h" #include #include +#include #include "dense.h" #include "test_cublas_fixture.h" @@ -38,6 +39,40 @@ protected: cublasStatus_t cublasStatus; }; +TEST_F(DenseLayerTest, Init) { + + for (int i = 1; i < 100; ++i) { + for (int j = 1; j < 100; ++j) { + + int inputSize = i; + int outputSize = j; + + // std::cout << "Dense layer: input size = " << inputSize << ", output size = " << outputSize << std::endl; + Layers::Dense denseLayer(inputSize, outputSize, cublasHandle); + } + } +} + +TEST_F(DenseLayerTest, setWeights) { + + + int inputSize = 4; + int outputSize = 5; + + std::vector> weights = { + {0.5f, 1.0f, 0.2f, 0.8f}, + {1.2f, 0.3f, 1.5f, 0.4f}, + {0.7f, 1.8f, 0.9f, 0.1f}, + {0.4f, 2.0f, 0.6f, 1.1f}, + {1.3f, 0.5f, 0.0f, 1.7f} + }; + + Layers::Dense denseLayer(inputSize, outputSize, cublasHandle); + + denseLayer.setWeights(weights); + +} + TEST_F(DenseLayerTest, ForwardUnitWeightMatrix) { int inputSize = 3; @@ -80,28 +115,28 @@ TEST_F(DenseLayerTest, ForwardRandomWeightMatrix) { std::vector input = {1.0f, 2.0f, 3.0f, 4.0f, 5.0f}; std::vector> weights = { - {0.5f, 1.0f, 0.2f, 0.8f}, - {1.2f, 0.3f, 1.5f, 0.4f}, - {0.7f, 1.8f, 0.9f, 0.1f}, - {0.4f, 2.0f, 0.6f, 1.1f}, - {1.3f, 0.5f, 0.0f, 1.7f} - }; + {0.5f, 1.2f, 0.7f, 0.4f, 1.3f}, + {1.0f, 0.3f, 1.8f, 2.0f, 0.5f}, + {0.2f, 1.5f, 0.9f, 0.6f, 0.0f}, + {0.8f, 0.4f, 0.1f, 1.1f, 1.7f} + }; std::vector biases = {0.2f, 0.5f, 0.7f, 1.1f}; float* d_input; - float* d_output; + float* d_output; Layers::Dense denseLayer = commonTestSetup(inputSize, outputSize, input, weights, biases, d_input, d_output); + denseLayer.forward(d_input, d_output); std::vector output(outputSize); cublasStatus = cublasGetVector(outputSize, sizeof(float), d_output, 1, output.data(), 1); EXPECT_EQ(cublasStatus, CUBLAS_STATUS_SUCCESS); - std::vector expectedOutput = {3.4f, 4.4f, 5.6f, 7.4f}; + std::vector expectedOutput = {10.4f, 13.0f, 8.9f, 9.3f}; for (int i = 0; i < outputSize; ++i) { EXPECT_NEAR(output[i], expectedOutput[i], 1e-4); // Allow small tolerance for floating-point comparison } commonTestTeardown(d_input, d_output); -} \ No newline at end of file +} diff --git a/test/test_utils/test_cublas_fixture.cpp b/test/test_utils/test_cublas_fixture.cu similarity index 100% rename from test/test_utils/test_cublas_fixture.cpp rename to test/test_utils/test_cublas_fixture.cu