Implement output layer

2025-12-22 14:24:22 +00:00 · 2024-03-21 23:07:46 +01:00
parent e46d5d3f76
commit 90fb104dae
8 changed files with 92 additions and 9 deletions
--- a/include/layers/conv2d.cuh
+++ b/include/layers/conv2d.cuh
@@ -1,7 +1,6 @@
 #ifndef CUDANET_CONV_LAYER_H
 #define CUDANET_CONV_LAYER_H

-#include <string>
 #include <vector>

 #include "activation.cuh"
--- a/include/layers/dense.cuh
+++ b/include/layers/dense.cuh
@@ -1,8 +1,6 @@
 #ifndef CUDANET_DENSE_LAYER_H
 #define CUDANET_DENSE_LAYER_H

-#include <functional>
-#include <string>
 #include <vector>

 #include "layer.cuh"
--- a/include/layers/layer.cuh
+++ b/include/layers/layer.cuh
@@ -2,8 +2,6 @@
 #ifndef CUDANET_I_LAYER_H
 #define CUDANET_I_LAYER_H

-#include <vector>
-
 namespace CUDANet::Layers {

 /**
--- a/include/layers/output.cuh
+++ b/include/layers/output.cuh
@@ -0,0 +1,39 @@
+#ifndef CUDANET_OUTPUT_LAYER_H
+#define CUDANET_OUTPUT_LAYER_H
+
+#include "layer.cuh"
+
+namespace CUDANet::Layers {
+
+class Output : public SequentialLayer {
+    public:
+        /**
+         * @brief Create a new Output layer
+         * 
+         * @param inputSize Size of the input vector
+         */
+        explicit Output(int inputSize);
+
+        /**
+         * @brief Destroy the Output layer
+         * 
+         */
+        ~Output();
+
+        /**
+         * @brief Forward pass of the output layer. Just copies the input from device to host
+         * 
+         * @param input Device pointer to the input vector
+         * @return Host pointer to the output vector
+         */
+        float* forward(const float* input);
+
+    private:
+        int    inputSize;
+        float* h_output;
+};
+
+
+} // namespace CUDANet::Layers
+
+#endif  // CUDANET_OUTPUT_LAYER_H
--- a/include/model/model.hpp
+++ b/include/model/model.hpp
@@ -4,7 +4,9 @@
 #include <string>
 #include <vector>
 #include <map>
+
 #include "layer.cuh"
+#include "input.cuh"

 namespace CUDANet {

--- a/src/layers/output.cu
+++ b/src/layers/output.cu
@@ -0,0 +1,22 @@
+#include "output.cuh"
+
+#include "cuda_helper.cuh"
+
+using namespace CUDANet::Layers;
+
+
+Output::Output(int inputSize) : inputSize(inputSize) {
+    h_output = (float*) malloc(sizeof(float) * inputSize);
+}
+
+Output::~Output() {
+    free(h_output);
+}
+
+float* Output::forward(const float* input) {
+    CUDA_CHECK(cudaMemcpy(
+        h_output, input, sizeof(float) * inputSize, cudaMemcpyDeviceToHost
+    ));
+
+    return h_output;
+}
--- a/test/layers/test_input.cu
+++ b/test/layers/test_input.cu
@@ -1,16 +1,17 @@
+#include <cuda_runtime_api.h>
 #include <gtest/gtest.h>

-#include "cuda_helper.cuh"
 #include "input.cuh"

-TEST(InputLayerTest, Init) {
+TEST(InputLayerTest, InputForward) {
    std::vector<float> input = {0.573f, 0.619f, 0.732f, 0.055f, 0.243f, 0.316f};
    CUDANet::Layers::Input inputLayer(6);
    float*                 d_output = inputLayer.forward(input.data());

    std::vector<float> output(6);
-    CUDA_CHECK(cudaMemcpy(
+    cudaError_t        cudaStatus = cudaMemcpy(
        output.data(), d_output, sizeof(float) * 6, cudaMemcpyDeviceToHost
-    ));
+    );
+    EXPECT_EQ(cudaStatus, cudaSuccess);
    EXPECT_EQ(input, output);
 }
--- a/test/layers/test_output.cu
+++ b/test/layers/test_output.cu
@@ -0,0 +1,24 @@
+#include <cuda_runtime_api.h>
+#include <gtest/gtest.h>
+
+#include "output.cuh"
+
+TEST(OutputLayerTest, OutputForward) {
+    cudaError_t cudaStatus;
+
+    std::vector<float> input = {0.573f, 0.619f, 0.732f, 0.055f, 0.243f, 0.316f};
+    float*             d_input;
+    cudaStatus = cudaMalloc((void**)&d_input, sizeof(float) * 6);
+    EXPECT_EQ(cudaStatus, cudaSuccess);
+    cudaStatus = cudaMemcpy(
+        d_input, input.data(), sizeof(float) * 6, cudaMemcpyHostToDevice
+    );
+    EXPECT_EQ(cudaStatus, cudaSuccess);
+
+    CUDANet::Layers::Output outputLayer(6);
+    float* h_output = outputLayer.forward(d_input);
+
+    for (int i = 0; i < 6; ++i) {
+        EXPECT_EQ(input[i], h_output[i]);
+    }
+}