Set up cmake to compile library

This commit is contained in:
2024-02-17 23:07:09 +01:00
parent ac18768297
commit f541e2f7f8
7 changed files with 40 additions and 93 deletions

View File

@@ -1,35 +1,36 @@
cmake_minimum_required(VERSION 3.12)
cmake_minimum_required(VERSION 3.17)
project(CUDANet)
# Find CUDA
find_package(CUDA REQUIRED)
# Add CUDA include directories
include_directories(${CUDA_INCLUDE_DIRS})
# Add project source files
set(SOURCES
src/main.cpp
src/utils/cuda_helper.cpp
project(CUDANet
LANGUAGES CXX CUDA
)
# Set CUDA architecture (change according to your GPU)
find_package(CUDAToolkit REQUIRED)
include_directories(${CUDAToolkit_INCLUDE_DIRS})
# Add project source files for the library
set(LIBRARY_SOURCES
src/utils/cuda_helper.cpp
src/layers/dense.cpp
)
set(CMAKE_EXPORT_COMPILE_COMMANDS ON)
set(CUDA_NVCC_FLAGS ${CUDA_NVCC_FLAGS} -arch=sm_75)
# Build executable
cuda_add_executable(${PROJECT_NAME} ${SOURCES})
# Build static library
add_library(${PROJECT_NAME} STATIC ${LIBRARY_SOURCES})
# Link cuBLAS library
target_link_libraries(${PROJECT_NAME} ${CUDA_cublas_LIBRARY})
# Link cuBLAS library to the library
target_link_libraries(${PROJECT_NAME} CUDA::cublas CUDA::cudart)
# Set include directories
target_include_directories(${PROJECT_NAME} PRIVATE
# Set include directories for the library
target_include_directories(${PROJECT_NAME} PUBLIC
${CMAKE_CURRENT_SOURCE_DIR}/include
${CMAKE_CURRENT_SOURCE_DIR}/include/utils
${CMAKE_CURRENT_SOURCE_DIR}/include/layers
${CMAKE_CURRENT_SOURCE_DIR}/src
)
# Set C++ standard
set_property(TARGET ${PROJECT_NAME} PROPERTY CXX_STANDARD 11)
set_property(TARGET ${PROJECT_NAME} PROPERTY CXX_STANDARD 14)
# Add testing subdirectory
add_subdirectory(test)

View File

@@ -1 +1,5 @@
# CUDANet
# CUDANet
requirements:
- CUDA, cuBLAS
- Google Test

View File

@@ -15,8 +15,8 @@ namespace Layers {
~Dense();
void forward(const float* input, float* output);
virtual void setWeights(const std::vector<std::vector<float>>& weights) = 0;
virtual void setBiases(const std::vector<float>& biases) = 0;
void setWeights(const std::vector<std::vector<float>>& weights);
void setBiases(const std::vector<float>& biases);
private:
int inputSize;

View File

@@ -3,6 +3,7 @@
#define I_LAYER_H
#include <cublas_v2.h>
#include <vector>
namespace Layers {
@@ -11,6 +12,8 @@ namespace Layers {
virtual ~ILayer() {}
virtual void forward(const float* input, float* output) = 0;
virtual void setWeights(const std::vector<std::vector<float>>& weights) = 0;
virtual void setBiases(const std::vector<float>& biases) = 0;
};
} // namespace Layers

6
src/CMakeLists.txt Normal file
View File

@@ -0,0 +1,6 @@
set(LAYER_SOURCES layers/dense.cpp)
add_library(CUDANet
utils/cuda_helper.cpp
${LAYER_SOURCES}
)

View File

@@ -1,6 +1,7 @@
#include "dense.h"
#include "cuda_helper.h"
#include <cstdlib>
#include <cuda_runtime.h>
#include <cublas_v2.h>
#include <cstdio>
#include <stdexcept>

View File

@@ -1,68 +0,0 @@
#include <stdio.h>
#include <stdlib.h>
#include <cuda_runtime.h>
#include "cublas_v2.h"
#include "cuda_helper.h"
int main() {
// Initialize CUDA and get device properties
cudaDeviceProp deviceProp = initializeCUDA();
// Specify vector size
const int N = 5;
// Host vectors
float *h_A, *h_B, *h_C;
// Allocate host memory
h_A = (float*)malloc(N * sizeof(float));
h_B = (float*)malloc(N * sizeof(float));
h_C = (float*)malloc(N * sizeof(float));
// Initialize host vectors
for (int i = 0; i < N; ++i) {
h_A[i] = static_cast<float>(i);
h_B[i] = static_cast<float>(2 * i);
}
// Allocate device memory
float *d_A, *d_B, *d_C;
cudaMalloc((void**)&d_A, N * sizeof(float));
cudaMalloc((void**)&d_B, N * sizeof(float));
cudaMalloc((void**)&d_C, N * sizeof(float));
// Copy host vectors to device
cudaMemcpy(d_A, h_A, N * sizeof(float), cudaMemcpyHostToDevice);
cudaMemcpy(d_B, h_B, N * sizeof(float), cudaMemcpyHostToDevice);
// Create cuBLAS handle
cublasHandle_t handle;
cublasCreate(&handle);
// Perform vector addition: C = A + B
const float alpha = 1.0f;
const float beta = 1.0f;
cublasSaxpy(handle, N, &alpha, d_A, 1, d_B, 1);
cublasSaxpy(handle, N, &beta, d_B, 1, d_C, 1);
// Copy result from device to host
cudaMemcpy(h_C, d_C, N * sizeof(float), cudaMemcpyDeviceToHost);
// Display result
printf("Result: ");
for (int i = 0; i < N; ++i) {
printf("%f ", h_C[i]);
}
printf("\n");
// Clean up
free(h_A);
free(h_B);
free(h_C);
cudaFree(d_A);
cudaFree(d_B);
cudaFree(d_C);
cublasDestroy(handle);
return 0;
}