提交 ed5bcfe8 编写于 作者: L liaogang

add cu_xx to simplify cmake

上级 5409e5e3
......@@ -28,21 +28,6 @@
# cmake_parse_arguments can help us to achieve this goal.
# https://cmake.org/cmake/help/v3.0/module/CMakeParseArguments.html
# cc_binary parses tensor.cc and figures out that target also depend on tensor.h.
# cc_binary(tensor
# SRCS
# tensor.cc)
function(cc_binary TARGET_NAME)
set(options OPTIONAL)
set(oneValueArgs "")
set(multiValueArgs SRCS DEPS)
cmake_parse_arguments(cc_binary "${options}" "${oneValueArgs}" "${multiValueArgs}" ${ARGN})
add_executable(${TARGET_NAME} ${cc_binary_SRCS})
add_dependencies(${TARGET_NAME} ${cc_binary_DEPS} ${external_project_dependencies})
target_link_libraries(${TARGET_NAME} ${cc_binary_DEPS})
endfunction(cc_binary)
# cc_library parses tensor.cc and figures out that target also depend on tensor.h.
# cc_library(tensor
# SRCS
......@@ -54,14 +39,28 @@ function(cc_library TARGET_NAME)
set(oneValueArgs "")
set(multiValueArgs SRCS DEPS)
cmake_parse_arguments(cc_library "${options}" "${oneValueArgs}" "${multiValueArgs}" ${ARGN})
if (${cc_library_OPTIONAL} STREQUAL "STATIC")
add_library(${TARGET_NAME} STATIC ${cc_library_SRCS})
else()
if (${cc_library_OPTIONAL} STREQUAL "SHARED")
add_library(${TARGET_NAME} SHARED ${cc_library_SRCS})
else()
add_library(${TARGET_NAME} STATIC ${cc_library_SRCS})
endif()
add_dependencies(${TARGET_NAME} ${cc_library_DEPS} ${external_project_dependencies})
endfunction(cc_library)
# cc_binary parses tensor.cc and figures out that target also depend on tensor.h.
# cc_binary(tensor
# SRCS
# tensor.cc)
function(cc_binary TARGET_NAME)
set(options OPTIONAL)
set(oneValueArgs "")
set(multiValueArgs SRCS DEPS)
cmake_parse_arguments(cc_binary "${options}" "${oneValueArgs}" "${multiValueArgs}" ${ARGN})
add_executable(${TARGET_NAME} ${cc_binary_SRCS})
add_dependencies(${TARGET_NAME} ${cc_binary_DEPS} ${external_project_dependencies})
target_link_libraries(${TARGET_NAME} ${cc_binary_DEPS})
endfunction(cc_binary)
# The dependency to target tensor implies that if any of
# tensor{.h,.cc,_test.cc} is changed, tensor_test need to be re-built.
# cc_test(tensor_test
......@@ -79,3 +78,52 @@ function(cc_test TARGET_NAME)
target_link_libraries(${TARGET_NAME} ${cc_test_DEPS} ${GTEST_MAIN_LIBRARIES} ${GTEST_LIBRARIES})
add_test(${TARGET_NAME} ${TARGET_NAME})
endfunction(cc_test)
# Suppose that ops.cu includes global functions that take Tensor as
# their parameters, so ops depend on tensor. This implies that if
# any of tensor.{h.cc}, ops.{h,cu} is changed, ops need to be re-built.
# nv_library(ops
# SRCS
# ops.cu
# DEPS
# tensor)
function(nv_library TARGET_NAME)
set(options OPTIONAL)
set(oneValueArgs "")
set(multiValueArgs SRCS DEPS)
cmake_parse_arguments(nv_library "${options}" "${oneValueArgs}" "${multiValueArgs}" ${ARGN})
if (${nv_library_OPTIONAL} STREQUAL "SHARED")
cuda_add_library(${TARGET_NAME} SHARED ${nv_library_SRCS})
else()
cuda_add_library(${TARGET_NAME} STATIC ${nv_library_SRCS})
endif()
add_dependencies(${TARGET_NAME} ${nv_library_DEPS} ${external_project_dependencies})
endfunction(nv_library)
function(nv_binary TARGET_NAME)
set(options "")
set(oneValueArgs "")
set(multiValueArgs SRCS DEPS)
cmake_parse_arguments(nv_binary "${options}" "${oneValueArgs}" "${multiValueArgs}" ${ARGN})
cuda_add_executable(${TARGET_NAME} ${nv_binary_SRCS})
add_dependencies(${TARGET_NAME} ${nv_binary_DEPS} ${external_project_dependencies})
target_link_libraries(${TARGET_NAME} ${nv_binary_DEPS})
endfunction(nv_binary)
# The dependency to target tensor implies that if any of
# ops{.h,.cu,_test.cu} is changed, ops_test need to be re-built.
# nv_test(ops_test
# SRCS
# ops_test.cu
# DEPS
# ops)
function(nv_test TARGET_NAME)
set(options "")
set(oneValueArgs "")
set(multiValueArgs SRCS DEPS)
cmake_parse_arguments(nv_test "${options}" "${oneValueArgs}" "${multiValueArgs}" ${ARGN})
cuda_add_executable(${TARGET_NAME} ${nv_test_SRCS})
add_dependencies(${TARGET_NAME} ${nv_test_DEPS} ${external_project_dependencies})
target_link_libraries(${TARGET_NAME} ${nv_test_DEPS} ${GTEST_MAIN_LIBRARIES} ${GTEST_LIBRARIES})
add_test(${TARGET_NAME} ${TARGET_NAME})
endfunction(nv_test)
cc_test(place_test
SRCS place_test.cc
DEPS majel)
if(WITH_GPU)
nv_test(cuda_test SRCS cuda_test.cu)
endif()
#include <stdio.h>
#include <cuda_runtime.h>
#include "gtest/gtest.h"
#define CHECK_ERR(x) \
if (x != cudaSuccess) { \
fprintf(stderr,"%s in %s at line %d\n", \
cudaGetErrorString(err),__FILE__,__LINE__); \
exit(-1); \
} \
__global__ void vecAdd (float* d_A, float* d_B, float* d_C, int n) {
int i = blockDim.x * blockIdx.x + threadIdx.x;
if (i < n) {
d_C[i] = d_A[i] + d_B[i];
}
}
TEST(Cuda, Equality) {
int n = 10;
// Memory allocation for h_A, h_B and h_C (in the host)
float h_A[10] = { 1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0, 0.0 };
float h_B[10] = { 0.0, 9.0, 8.0, 7.0, 6.0, 5.0, 4.0, 3.0, 2.0, 1.0 };
float h_C[10];
float *d_A, *d_B, *d_C;
// Memory allocation for d_A, d_B and d_C (in the device)
err = cudaMalloc((void **) &d_A, sizeof(float)*n);
CHECK_ERR(err);
err =cudaMalloc((void **) &d_B, sizeof(float)*n);
CHECK_ERR(err);
err =cudaMalloc((void **) &d_C, sizeof(float)*n);
CHECK_ERR(err);
// Copying memory to device
err = cudaMemcpy(d_A, h_A, sizeof(float)*n, cudaMemcpyHostToDevice);
CHECK_ERR(err);
err = cudaMemcpy(d_B, h_B, sizeof(float)*n, cudaMemcpyHostToDevice);
CHECK_ERR(err);
// Calling the kernel
vecAdd<<<ceil(n/256.0), 256>>>(d_A,d_B,d_C,n);
// Copying results back to host
err = cudaMemcpy(h_C, d_C, sizeof(float)*n, cudaMemcpyDeviceToHost);
CHECK_ERR(err);
EXPECT_EQ(h_C[1], 1.0);
for (size_t i = 1; i < n - 1; ++i) {
EXPECT_EQ(h_C[i], 11.0);
}
EXPECT_EQ(h_C[0], 1.0);
}
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册