mirror of
https://github.com/lordmathis/CUDANet.git
synced 2025-11-06 01:34:22 +00:00
Set up cmake to compile library
This commit is contained in:
@@ -1,35 +1,36 @@
|
|||||||
cmake_minimum_required(VERSION 3.12)
|
cmake_minimum_required(VERSION 3.17)
|
||||||
|
|
||||||
project(CUDANet)
|
project(CUDANet
|
||||||
|
LANGUAGES CXX CUDA
|
||||||
# Find CUDA
|
|
||||||
find_package(CUDA REQUIRED)
|
|
||||||
|
|
||||||
# Add CUDA include directories
|
|
||||||
include_directories(${CUDA_INCLUDE_DIRS})
|
|
||||||
|
|
||||||
# Add project source files
|
|
||||||
set(SOURCES
|
|
||||||
src/main.cpp
|
|
||||||
src/utils/cuda_helper.cpp
|
|
||||||
)
|
)
|
||||||
|
|
||||||
# Set CUDA architecture (change according to your GPU)
|
find_package(CUDAToolkit REQUIRED)
|
||||||
|
include_directories(${CUDAToolkit_INCLUDE_DIRS})
|
||||||
|
|
||||||
|
# Add project source files for the library
|
||||||
|
set(LIBRARY_SOURCES
|
||||||
|
src/utils/cuda_helper.cpp
|
||||||
|
src/layers/dense.cpp
|
||||||
|
)
|
||||||
|
|
||||||
|
set(CMAKE_EXPORT_COMPILE_COMMANDS ON)
|
||||||
set(CUDA_NVCC_FLAGS ${CUDA_NVCC_FLAGS} -arch=sm_75)
|
set(CUDA_NVCC_FLAGS ${CUDA_NVCC_FLAGS} -arch=sm_75)
|
||||||
|
|
||||||
# Build executable
|
# Build static library
|
||||||
cuda_add_executable(${PROJECT_NAME} ${SOURCES})
|
add_library(${PROJECT_NAME} STATIC ${LIBRARY_SOURCES})
|
||||||
|
|
||||||
# Link cuBLAS library
|
# Link cuBLAS library to the library
|
||||||
target_link_libraries(${PROJECT_NAME} ${CUDA_cublas_LIBRARY})
|
target_link_libraries(${PROJECT_NAME} CUDA::cublas CUDA::cudart)
|
||||||
|
|
||||||
# Set include directories
|
# Set include directories for the library
|
||||||
target_include_directories(${PROJECT_NAME} PRIVATE
|
target_include_directories(${PROJECT_NAME} PUBLIC
|
||||||
${CMAKE_CURRENT_SOURCE_DIR}/include
|
${CMAKE_CURRENT_SOURCE_DIR}/include
|
||||||
${CMAKE_CURRENT_SOURCE_DIR}/include/utils
|
${CMAKE_CURRENT_SOURCE_DIR}/include/utils
|
||||||
${CMAKE_CURRENT_SOURCE_DIR}/include/layers
|
${CMAKE_CURRENT_SOURCE_DIR}/include/layers
|
||||||
${CMAKE_CURRENT_SOURCE_DIR}/src
|
${CMAKE_CURRENT_SOURCE_DIR}/src
|
||||||
)
|
)
|
||||||
|
|
||||||
# Set C++ standard
|
set_property(TARGET ${PROJECT_NAME} PROPERTY CXX_STANDARD 14)
|
||||||
set_property(TARGET ${PROJECT_NAME} PROPERTY CXX_STANDARD 11)
|
|
||||||
|
# Add testing subdirectory
|
||||||
|
add_subdirectory(test)
|
||||||
@@ -15,8 +15,8 @@ namespace Layers {
|
|||||||
~Dense();
|
~Dense();
|
||||||
|
|
||||||
void forward(const float* input, float* output);
|
void forward(const float* input, float* output);
|
||||||
virtual void setWeights(const std::vector<std::vector<float>>& weights) = 0;
|
void setWeights(const std::vector<std::vector<float>>& weights);
|
||||||
virtual void setBiases(const std::vector<float>& biases) = 0;
|
void setBiases(const std::vector<float>& biases);
|
||||||
|
|
||||||
private:
|
private:
|
||||||
int inputSize;
|
int inputSize;
|
||||||
|
|||||||
@@ -3,6 +3,7 @@
|
|||||||
#define I_LAYER_H
|
#define I_LAYER_H
|
||||||
|
|
||||||
#include <cublas_v2.h>
|
#include <cublas_v2.h>
|
||||||
|
#include <vector>
|
||||||
|
|
||||||
namespace Layers {
|
namespace Layers {
|
||||||
|
|
||||||
@@ -11,6 +12,8 @@ namespace Layers {
|
|||||||
virtual ~ILayer() {}
|
virtual ~ILayer() {}
|
||||||
|
|
||||||
virtual void forward(const float* input, float* output) = 0;
|
virtual void forward(const float* input, float* output) = 0;
|
||||||
|
virtual void setWeights(const std::vector<std::vector<float>>& weights) = 0;
|
||||||
|
virtual void setBiases(const std::vector<float>& biases) = 0;
|
||||||
};
|
};
|
||||||
|
|
||||||
} // namespace Layers
|
} // namespace Layers
|
||||||
|
|||||||
6
src/CMakeLists.txt
Normal file
6
src/CMakeLists.txt
Normal file
@@ -0,0 +1,6 @@
|
|||||||
|
set(LAYER_SOURCES layers/dense.cpp)
|
||||||
|
|
||||||
|
add_library(CUDANet
|
||||||
|
utils/cuda_helper.cpp
|
||||||
|
${LAYER_SOURCES}
|
||||||
|
)
|
||||||
@@ -1,6 +1,7 @@
|
|||||||
#include "dense.h"
|
#include "dense.h"
|
||||||
#include "cuda_helper.h"
|
#include "cuda_helper.h"
|
||||||
#include <cstdlib>
|
#include <cstdlib>
|
||||||
|
#include <cuda_runtime.h>
|
||||||
#include <cublas_v2.h>
|
#include <cublas_v2.h>
|
||||||
#include <cstdio>
|
#include <cstdio>
|
||||||
#include <stdexcept>
|
#include <stdexcept>
|
||||||
|
|||||||
68
src/main.cpp
68
src/main.cpp
@@ -1,68 +0,0 @@
|
|||||||
#include <stdio.h>
|
|
||||||
#include <stdlib.h>
|
|
||||||
#include <cuda_runtime.h>
|
|
||||||
#include "cublas_v2.h"
|
|
||||||
#include "cuda_helper.h"
|
|
||||||
|
|
||||||
int main() {
|
|
||||||
// Initialize CUDA and get device properties
|
|
||||||
cudaDeviceProp deviceProp = initializeCUDA();
|
|
||||||
|
|
||||||
// Specify vector size
|
|
||||||
const int N = 5;
|
|
||||||
|
|
||||||
// Host vectors
|
|
||||||
float *h_A, *h_B, *h_C;
|
|
||||||
|
|
||||||
// Allocate host memory
|
|
||||||
h_A = (float*)malloc(N * sizeof(float));
|
|
||||||
h_B = (float*)malloc(N * sizeof(float));
|
|
||||||
h_C = (float*)malloc(N * sizeof(float));
|
|
||||||
|
|
||||||
// Initialize host vectors
|
|
||||||
for (int i = 0; i < N; ++i) {
|
|
||||||
h_A[i] = static_cast<float>(i);
|
|
||||||
h_B[i] = static_cast<float>(2 * i);
|
|
||||||
}
|
|
||||||
|
|
||||||
// Allocate device memory
|
|
||||||
float *d_A, *d_B, *d_C;
|
|
||||||
cudaMalloc((void**)&d_A, N * sizeof(float));
|
|
||||||
cudaMalloc((void**)&d_B, N * sizeof(float));
|
|
||||||
cudaMalloc((void**)&d_C, N * sizeof(float));
|
|
||||||
|
|
||||||
// Copy host vectors to device
|
|
||||||
cudaMemcpy(d_A, h_A, N * sizeof(float), cudaMemcpyHostToDevice);
|
|
||||||
cudaMemcpy(d_B, h_B, N * sizeof(float), cudaMemcpyHostToDevice);
|
|
||||||
|
|
||||||
// Create cuBLAS handle
|
|
||||||
cublasHandle_t handle;
|
|
||||||
cublasCreate(&handle);
|
|
||||||
|
|
||||||
// Perform vector addition: C = A + B
|
|
||||||
const float alpha = 1.0f;
|
|
||||||
const float beta = 1.0f;
|
|
||||||
cublasSaxpy(handle, N, &alpha, d_A, 1, d_B, 1);
|
|
||||||
cublasSaxpy(handle, N, &beta, d_B, 1, d_C, 1);
|
|
||||||
|
|
||||||
// Copy result from device to host
|
|
||||||
cudaMemcpy(h_C, d_C, N * sizeof(float), cudaMemcpyDeviceToHost);
|
|
||||||
|
|
||||||
// Display result
|
|
||||||
printf("Result: ");
|
|
||||||
for (int i = 0; i < N; ++i) {
|
|
||||||
printf("%f ", h_C[i]);
|
|
||||||
}
|
|
||||||
printf("\n");
|
|
||||||
|
|
||||||
// Clean up
|
|
||||||
free(h_A);
|
|
||||||
free(h_B);
|
|
||||||
free(h_C);
|
|
||||||
cudaFree(d_A);
|
|
||||||
cudaFree(d_B);
|
|
||||||
cudaFree(d_C);
|
|
||||||
cublasDestroy(handle);
|
|
||||||
|
|
||||||
return 0;
|
|
||||||
}
|
|
||||||
Reference in New Issue
Block a user