diff --git a/src/layers/dense.cpp b/src/layers/dense.cpp index 13e4457..c39d92a 100644 --- a/src/layers/dense.cpp +++ b/src/layers/dense.cpp @@ -45,14 +45,12 @@ void Layers::Dense::initializeBiases() { } } -void Layers::Dense::forward(const float* input, float* output) { - // Perform matrix multiplication: output = weights * input + biases +void Layers::Dense::forward(const float* d_input, float* d_output) { const float alpha = 1.0f; const float beta = 1.0f; - cublasSgemv(cublasHandle, CUBLAS_OP_N, inputSize, outputSize, &alpha, d_weights, inputSize, input, 1, &beta, output, 1); - // Add biases - cublasSaxpy(cublasHandle, outputSize, &alpha, d_biases, 1, output, 1); + cublasSgemv(cublasHandle, CUBLAS_OP_N, inputSize, outputSize, &alpha, d_weights, inputSize, d_input, 1, &beta, d_output, 1); + cublasSaxpy(cublasHandle, outputSize, &alpha, d_biases, 1, d_output, 1); } void Layers::Dense::toCuda() { diff --git a/test/layers/test_dense.cpp b/test/layers/test_dense.cpp index 31b3d14..e427a60 100644 --- a/test/layers/test_dense.cpp +++ b/test/layers/test_dense.cpp @@ -1,4 +1,6 @@ #include "gtest/gtest.h" +#include +#include #include "dense.h" #include "test_cublas_fixture.h" @@ -9,16 +11,57 @@ protected: TEST_F(DenseLayerTest, Forward) { - Layers::Dense denseLayer(3, 2, cublasHandle); + cudaError_t cudaStatus; + cublasStatus_t cublasStatus; - // Create input and output arrays - float input[3] = {1.0f, 2.0f, 3.0f}; - float output[2] = {0.0f, 0.0f}; + int inputSize = 3; + int outputSize = 3; + + Layers::Dense denseLayer(inputSize, outputSize, cublasHandle); + + // Initialize a weight matrix + std::vector> weights(inputSize, std::vector(outputSize, 0.0f)); + for (int i = 0; i < inputSize; ++i) { + for (int j = 0; j < outputSize; ++j) { + if (i == j) { + weights[i][j] = 1.0f; + } + } + } + + // Set the weights + denseLayer.setWeights(weights); + + // Initialize and set a bias vector + std::vector biases(outputSize, 1.0f); + denseLayer.setBiases(biases); + + std::vector input = {1.0f, 2.0f, 3.0f}; + std::vector output(outputSize); + + float* d_input; + float* d_output; + + cudaStatus =cudaMalloc((void**)&d_input, sizeof(float) * input.size()); + EXPECT_EQ(cudaStatus, cudaSuccess); + + cudaStatus = cudaMalloc((void**)&d_output, sizeof(float) * outputSize); + EXPECT_EQ(cudaStatus, cudaSuccess); + + cublasStatus =cublasSetVector(input.size(), sizeof(float), input.data(), 1, d_input, 1); + EXPECT_EQ(cublasStatus, CUBLAS_STATUS_SUCCESS); // Perform forward pass - denseLayer.forward(input, output); + denseLayer.forward(d_input, d_output); + + cublasStatus = cublasGetVector(outputSize, sizeof(float), d_output, 1, output.data(), 1); + EXPECT_EQ(cublasStatus, CUBLAS_STATUS_SUCCESS); // Check if the output is a zero vector - EXPECT_FLOAT_EQ(output[0], 0.0f); - EXPECT_FLOAT_EQ(output[1], 0.0f); + EXPECT_FLOAT_EQ(output[0], 2.0f); + EXPECT_FLOAT_EQ(output[1], 3.0f); + EXPECT_FLOAT_EQ(output[2], 4.0f); + + cudaFree(d_input); + cudaFree(d_output); }