diff --git a/.gitignore b/.gitignore index 259148f..7170c9a 100644 --- a/.gitignore +++ b/.gitignore @@ -30,3 +30,5 @@ *.exe *.out *.app + +build/ diff --git a/CMakeLists.txt b/CMakeLists.txt new file mode 100644 index 0000000..2563658 --- /dev/null +++ b/CMakeLists.txt @@ -0,0 +1,34 @@ +cmake_minimum_required(VERSION 3.12) + +project(CUDANet) + +# Find CUDA +find_package(CUDA REQUIRED) + +# Add CUDA include directories +include_directories(${CUDA_INCLUDE_DIRS}) + +# Add project source files +set(SOURCES + src/main.cpp + src/utils/cuda_helper.cpp +) + +# Set CUDA architecture (change according to your GPU) +set(CUDA_NVCC_FLAGS ${CUDA_NVCC_FLAGS} -arch=sm_75) + +# Build executable +cuda_add_executable(${PROJECT_NAME} ${SOURCES}) + +# Link cuBLAS library +target_link_libraries(${PROJECT_NAME} ${CUDA_cublas_LIBRARY}) + +# Set include directories +target_include_directories(${PROJECT_NAME} PRIVATE + ${CMAKE_CURRENT_SOURCE_DIR}/include + ${CMAKE_CURRENT_SOURCE_DIR}/include/utils + ${CMAKE_CURRENT_SOURCE_DIR}/src +) + +# Set C++ standard +set_property(TARGET ${PROJECT_NAME} PROPERTY CXX_STANDARD 11) \ No newline at end of file diff --git a/include/utils/cuda_helper.h b/include/utils/cuda_helper.h new file mode 100644 index 0000000..79718a2 --- /dev/null +++ b/include/utils/cuda_helper.h @@ -0,0 +1,21 @@ +#ifndef CUDA_HELPER_H +#define CUDA_HELPER_H + +#include + +// CUDA error checking macro +#define CUDA_CHECK(call) \ +do { \ + cudaError_t result = call; \ + if (result != cudaSuccess) { \ + fprintf(stderr, "CUDA error at %s:%d code=%d(%s) \"%s\" \n", \ + __FILE__, __LINE__, static_cast(result), \ + cudaGetErrorString(result), #call); \ + exit(EXIT_FAILURE); \ + } \ +} while (0) + +// Initialize CUDA and return the device properties +cudaDeviceProp initializeCUDA(); + +#endif // CUDA_HELPER_H diff --git a/src/main.cpp b/src/main.cpp new file mode 100644 index 0000000..8eb20db --- /dev/null +++ b/src/main.cpp @@ -0,0 +1,68 @@ +#include +#include +#include +#include "cublas_v2.h" +#include "cuda_helper.h" + +int main() { + // Initialize CUDA and get device properties + cudaDeviceProp deviceProp = initializeCUDA(); + + // Specify vector size + const int N = 5; + + // Host vectors + float *h_A, *h_B, *h_C; + + // Allocate host memory + h_A = (float*)malloc(N * sizeof(float)); + h_B = (float*)malloc(N * sizeof(float)); + h_C = (float*)malloc(N * sizeof(float)); + + // Initialize host vectors + for (int i = 0; i < N; ++i) { + h_A[i] = static_cast(i); + h_B[i] = static_cast(2 * i); + } + + // Allocate device memory + float *d_A, *d_B, *d_C; + cudaMalloc((void**)&d_A, N * sizeof(float)); + cudaMalloc((void**)&d_B, N * sizeof(float)); + cudaMalloc((void**)&d_C, N * sizeof(float)); + + // Copy host vectors to device + cudaMemcpy(d_A, h_A, N * sizeof(float), cudaMemcpyHostToDevice); + cudaMemcpy(d_B, h_B, N * sizeof(float), cudaMemcpyHostToDevice); + + // Create cuBLAS handle + cublasHandle_t handle; + cublasCreate(&handle); + + // Perform vector addition: C = A + B + const float alpha = 1.0f; + const float beta = 1.0f; + cublasSaxpy(handle, N, &alpha, d_A, 1, d_B, 1); + cublasSaxpy(handle, N, &beta, d_B, 1, d_C, 1); + + // Copy result from device to host + cudaMemcpy(h_C, d_C, N * sizeof(float), cudaMemcpyDeviceToHost); + + // Display result + printf("Result: "); + for (int i = 0; i < N; ++i) { + printf("%f ", h_C[i]); + } + printf("\n"); + + // Clean up + free(h_A); + free(h_B); + free(h_C); + cudaFree(d_A); + cudaFree(d_B); + cudaFree(d_C); + cublasDestroy(handle); + + return 0; +} \ No newline at end of file diff --git a/src/utils/cuda_helper.cpp b/src/utils/cuda_helper.cpp new file mode 100644 index 0000000..4de7c18 --- /dev/null +++ b/src/utils/cuda_helper.cpp @@ -0,0 +1,36 @@ +#include +#include +#include "cuda_helper.h" + +// CUDA error checking macro +#define CUDA_CHECK(call) \ +do { \ + cudaError_t result = call; \ + if (result != cudaSuccess) { \ + std::fprintf(stderr, "CUDA error at %s:%d code=%d(%s) \"%s\" \n", \ + __FILE__, __LINE__, static_cast(result), \ + cudaGetErrorString(result), #call); \ + std::exit(EXIT_FAILURE); \ + } \ +} while (0) + +// Initialize CUDA and return the device properties +cudaDeviceProp initializeCUDA() { + int deviceCount; + CUDA_CHECK(cudaGetDeviceCount(&deviceCount)); + + if (deviceCount == 0) { + std::fprintf(stderr, "No CUDA devices found. Exiting.\n"); + std::exit(EXIT_FAILURE); + } + + int device = 0; // You can modify this to choose a different GPU + CUDA_CHECK(cudaSetDevice(device)); + + cudaDeviceProp deviceProp; + CUDA_CHECK(cudaGetDeviceProperties(&deviceProp, device)); + + std::printf("Using CUDA device %d: %s\n", device, deviceProp.name); + + return deviceProp; +}