diff --git a/cmake/generic.cmake b/cmake/generic.cmake index 4001c915072c3e235b0799008a75d809b6cfd4f1..22a26d7c5b04ba1f45de5ec9f3387c539ade730b 100644 --- a/cmake/generic.cmake +++ b/cmake/generic.cmake @@ -28,21 +28,6 @@ # cmake_parse_arguments can help us to achieve this goal. # https://cmake.org/cmake/help/v3.0/module/CMakeParseArguments.html - -# cc_binary parses tensor.cc and figures out that target also depend on tensor.h. -# cc_binary(tensor -# SRCS -# tensor.cc) -function(cc_binary TARGET_NAME) - set(options OPTIONAL) - set(oneValueArgs "") - set(multiValueArgs SRCS DEPS) - cmake_parse_arguments(cc_binary "${options}" "${oneValueArgs}" "${multiValueArgs}" ${ARGN}) - add_executable(${TARGET_NAME} ${cc_binary_SRCS}) - add_dependencies(${TARGET_NAME} ${cc_binary_DEPS} ${external_project_dependencies}) - target_link_libraries(${TARGET_NAME} ${cc_binary_DEPS}) -endfunction(cc_binary) - # cc_library parses tensor.cc and figures out that target also depend on tensor.h. # cc_library(tensor # SRCS @@ -54,14 +39,28 @@ function(cc_library TARGET_NAME) set(oneValueArgs "") set(multiValueArgs SRCS DEPS) cmake_parse_arguments(cc_library "${options}" "${oneValueArgs}" "${multiValueArgs}" ${ARGN}) - if (${cc_library_OPTIONAL} STREQUAL "STATIC") - add_library(${TARGET_NAME} STATIC ${cc_library_SRCS}) - else() + if (${cc_library_OPTIONAL} STREQUAL "SHARED") add_library(${TARGET_NAME} SHARED ${cc_library_SRCS}) + else() + add_library(${TARGET_NAME} STATIC ${cc_library_SRCS}) endif() add_dependencies(${TARGET_NAME} ${cc_library_DEPS} ${external_project_dependencies}) endfunction(cc_library) +# cc_binary parses tensor.cc and figures out that target also depend on tensor.h. +# cc_binary(tensor +# SRCS +# tensor.cc) +function(cc_binary TARGET_NAME) + set(options OPTIONAL) + set(oneValueArgs "") + set(multiValueArgs SRCS DEPS) + cmake_parse_arguments(cc_binary "${options}" "${oneValueArgs}" "${multiValueArgs}" ${ARGN}) + add_executable(${TARGET_NAME} ${cc_binary_SRCS}) + add_dependencies(${TARGET_NAME} ${cc_binary_DEPS} ${external_project_dependencies}) + target_link_libraries(${TARGET_NAME} ${cc_binary_DEPS}) +endfunction(cc_binary) + # The dependency to target tensor implies that if any of # tensor{.h,.cc,_test.cc} is changed, tensor_test need to be re-built. # cc_test(tensor_test @@ -79,3 +78,52 @@ function(cc_test TARGET_NAME) target_link_libraries(${TARGET_NAME} ${cc_test_DEPS} ${GTEST_MAIN_LIBRARIES} ${GTEST_LIBRARIES}) add_test(${TARGET_NAME} ${TARGET_NAME}) endfunction(cc_test) + +# Suppose that ops.cu includes global functions that take Tensor as +# their parameters, so ops depend on tensor. This implies that if +# any of tensor.{h.cc}, ops.{h,cu} is changed, ops need to be re-built. +# nv_library(ops +# SRCS +# ops.cu +# DEPS +# tensor) +function(nv_library TARGET_NAME) + set(options OPTIONAL) + set(oneValueArgs "") + set(multiValueArgs SRCS DEPS) + cmake_parse_arguments(nv_library "${options}" "${oneValueArgs}" "${multiValueArgs}" ${ARGN}) + if (${nv_library_OPTIONAL} STREQUAL "SHARED") + cuda_add_library(${TARGET_NAME} SHARED ${nv_library_SRCS}) + else() + cuda_add_library(${TARGET_NAME} STATIC ${nv_library_SRCS}) + endif() + add_dependencies(${TARGET_NAME} ${nv_library_DEPS} ${external_project_dependencies}) +endfunction(nv_library) + +function(nv_binary TARGET_NAME) + set(options "") + set(oneValueArgs "") + set(multiValueArgs SRCS DEPS) + cmake_parse_arguments(nv_binary "${options}" "${oneValueArgs}" "${multiValueArgs}" ${ARGN}) + cuda_add_executable(${TARGET_NAME} ${nv_binary_SRCS}) + add_dependencies(${TARGET_NAME} ${nv_binary_DEPS} ${external_project_dependencies}) + target_link_libraries(${TARGET_NAME} ${nv_binary_DEPS}) +endfunction(nv_binary) + +# The dependency to target tensor implies that if any of +# ops{.h,.cu,_test.cu} is changed, ops_test need to be re-built. +# nv_test(ops_test +# SRCS +# ops_test.cu +# DEPS +# ops) +function(nv_test TARGET_NAME) + set(options "") + set(oneValueArgs "") + set(multiValueArgs SRCS DEPS) + cmake_parse_arguments(nv_test "${options}" "${oneValueArgs}" "${multiValueArgs}" ${ARGN}) + cuda_add_executable(${TARGET_NAME} ${nv_test_SRCS}) + add_dependencies(${TARGET_NAME} ${nv_test_DEPS} ${external_project_dependencies}) + target_link_libraries(${TARGET_NAME} ${nv_test_DEPS} ${GTEST_MAIN_LIBRARIES} ${GTEST_LIBRARIES}) + add_test(${TARGET_NAME} ${TARGET_NAME}) +endfunction(nv_test) diff --git a/paddle/majel/test/CMakeLists.txt b/paddle/majel/test/CMakeLists.txt index 6379a4d6e71fdb5a931591bfd711c1ed60b97e83..68f9059874aed8843da1fc598c7d2e57e9b8bbfe 100644 --- a/paddle/majel/test/CMakeLists.txt +++ b/paddle/majel/test/CMakeLists.txt @@ -1,3 +1,7 @@ cc_test(place_test SRCS place_test.cc DEPS majel) + +if(WITH_GPU) + nv_test(cuda_test SRCS cuda_test.cu) +endif() diff --git a/paddle/majel/test/cuda_test.cu b/paddle/majel/test/cuda_test.cu new file mode 100644 index 0000000000000000000000000000000000000000..ebc9a2786e1b03f06200f6217f43e85555787d11 --- /dev/null +++ b/paddle/majel/test/cuda_test.cu @@ -0,0 +1,56 @@ +#include +#include +#include "gtest/gtest.h" + +#define CHECK_ERR(x) \ + if (x != cudaSuccess) { \ + fprintf(stderr,"%s in %s at line %d\n", \ + cudaGetErrorString(err),__FILE__,__LINE__); \ + exit(-1); \ + } \ + +__global__ void vecAdd (float* d_A, float* d_B, float* d_C, int n) { + int i = blockDim.x * blockIdx.x + threadIdx.x; + if (i < n) { + d_C[i] = d_A[i] + d_B[i]; + } +} + +TEST(Cuda, Equality) { + int n = 10; + // Memory allocation for h_A, h_B and h_C (in the host) + float h_A[10] = { 1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0, 0.0 }; + float h_B[10] = { 0.0, 9.0, 8.0, 7.0, 6.0, 5.0, 4.0, 3.0, 2.0, 1.0 }; + float h_C[10]; + float *d_A, *d_B, *d_C; + + // Memory allocation for d_A, d_B and d_C (in the device) + err = cudaMalloc((void **) &d_A, sizeof(float)*n); + CHECK_ERR(err); + + err =cudaMalloc((void **) &d_B, sizeof(float)*n); + CHECK_ERR(err); + + err =cudaMalloc((void **) &d_C, sizeof(float)*n); + CHECK_ERR(err); + + // Copying memory to device + err = cudaMemcpy(d_A, h_A, sizeof(float)*n, cudaMemcpyHostToDevice); + CHECK_ERR(err); + + err = cudaMemcpy(d_B, h_B, sizeof(float)*n, cudaMemcpyHostToDevice); + CHECK_ERR(err); + + // Calling the kernel + vecAdd<<>>(d_A,d_B,d_C,n); + + // Copying results back to host + err = cudaMemcpy(h_C, d_C, sizeof(float)*n, cudaMemcpyDeviceToHost); + CHECK_ERR(err); + + EXPECT_EQ(h_C[1], 1.0); + for (size_t i = 1; i < n - 1; ++i) { + EXPECT_EQ(h_C[i], 11.0); + } + EXPECT_EQ(h_C[0], 1.0); +}