提交 0a4b540a 编写于 作者: Y Yi Wang 提交者: GitHub

Merge pull request #2174 from gangliao/define_cmake

Define cc_xxx and nv_xxx to simplify cmake
......@@ -92,6 +92,7 @@ include(external/swig) # download, build, install swig
include(external/warpctc) # download, build, install warpctc
include(external/any) # download libn::any
include(generic) # simplify cmake module
include(package) # set paddle packages
include(cpplint) # set paddle c++ style
include(ccache) # set ccache for compilation
......
# Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserve.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# To simplify the build process of PaddlePaddle, we defined couple of
# fundamental abstractions, e.g., how to build library, binary and
# test in C++, CUDA and Go.
#
# -------------------------------------------
# C++ CUDA C++ Go
# -------------------------------------------
# cc_library nv_library go_library
# cc_binary nv_binary go_binary
# cc_test nv_test go_test
# -------------------------------------------
#
# cmake_parse_arguments can help us to achieve this goal.
# https://cmake.org/cmake/help/v3.0/module/CMakeParseArguments.html
# cc_library parses tensor.cc and figures out that target also depend on tensor.h.
# cc_library(tensor
# SRCS
# tensor.cc
# DEPS
# variant)
function(cc_library TARGET_NAME)
set(options OPTIONAL)
set(oneValueArgs "")
set(multiValueArgs SRCS DEPS)
cmake_parse_arguments(cc_library "${options}" "${oneValueArgs}" "${multiValueArgs}" ${ARGN})
if (${cc_library_OPTIONAL} STREQUAL "SHARED")
add_library(${TARGET_NAME} SHARED ${cc_library_SRCS})
else()
add_library(${TARGET_NAME} STATIC ${cc_library_SRCS})
endif()
add_dependencies(${TARGET_NAME} ${cc_library_DEPS} ${external_project_dependencies})
endfunction(cc_library)
# cc_binary parses tensor.cc and figures out that target also depend on tensor.h.
# cc_binary(tensor
# SRCS
# tensor.cc)
function(cc_binary TARGET_NAME)
set(options OPTIONAL)
set(oneValueArgs "")
set(multiValueArgs SRCS DEPS)
cmake_parse_arguments(cc_binary "${options}" "${oneValueArgs}" "${multiValueArgs}" ${ARGN})
add_executable(${TARGET_NAME} ${cc_binary_SRCS})
add_dependencies(${TARGET_NAME} ${cc_binary_DEPS} ${external_project_dependencies})
target_link_libraries(${TARGET_NAME} ${cc_binary_DEPS})
endfunction(cc_binary)
# The dependency to target tensor implies that if any of
# tensor{.h,.cc,_test.cc} is changed, tensor_test need to be re-built.
# cc_test(tensor_test
# SRCS
# tensor_test.cc
# DEPS
# tensor)
function(cc_test TARGET_NAME)
set(options "")
set(oneValueArgs "")
set(multiValueArgs SRCS DEPS)
cmake_parse_arguments(cc_test "${options}" "${oneValueArgs}" "${multiValueArgs}" ${ARGN})
add_executable(${TARGET_NAME} ${cc_test_SRCS})
add_dependencies(${TARGET_NAME} ${cc_test_DEPS} ${external_project_dependencies})
target_link_libraries(${TARGET_NAME} ${cc_test_DEPS} ${GTEST_MAIN_LIBRARIES} ${GTEST_LIBRARIES})
add_test(${TARGET_NAME} ${TARGET_NAME})
endfunction(cc_test)
# Suppose that ops.cu includes global functions that take Tensor as
# their parameters, so ops depend on tensor. This implies that if
# any of tensor.{h.cc}, ops.{h,cu} is changed, ops need to be re-built.
# nv_library(ops
# SRCS
# ops.cu
# DEPS
# tensor)
function(nv_library TARGET_NAME)
set(options OPTIONAL)
set(oneValueArgs "")
set(multiValueArgs SRCS DEPS)
cmake_parse_arguments(nv_library "${options}" "${oneValueArgs}" "${multiValueArgs}" ${ARGN})
if (${nv_library_OPTIONAL} STREQUAL "SHARED")
cuda_add_library(${TARGET_NAME} SHARED ${nv_library_SRCS})
else()
cuda_add_library(${TARGET_NAME} STATIC ${nv_library_SRCS})
endif()
add_dependencies(${TARGET_NAME} ${nv_library_DEPS} ${external_project_dependencies})
endfunction(nv_library)
function(nv_binary TARGET_NAME)
set(options "")
set(oneValueArgs "")
set(multiValueArgs SRCS DEPS)
cmake_parse_arguments(nv_binary "${options}" "${oneValueArgs}" "${multiValueArgs}" ${ARGN})
cuda_add_executable(${TARGET_NAME} ${nv_binary_SRCS})
add_dependencies(${TARGET_NAME} ${nv_binary_DEPS} ${external_project_dependencies})
target_link_libraries(${TARGET_NAME} ${nv_binary_DEPS})
endfunction(nv_binary)
# The dependency to target tensor implies that if any of
# ops{.h,.cu,_test.cu} is changed, ops_test need to be re-built.
# nv_test(ops_test
# SRCS
# ops_test.cu
# DEPS
# ops)
function(nv_test TARGET_NAME)
set(options "")
set(oneValueArgs "")
set(multiValueArgs SRCS DEPS)
cmake_parse_arguments(nv_test "${options}" "${oneValueArgs}" "${multiValueArgs}" ${ARGN})
cuda_add_executable(${TARGET_NAME} ${nv_test_SRCS})
add_dependencies(${TARGET_NAME} ${nv_test_DEPS} ${external_project_dependencies})
target_link_libraries(${TARGET_NAME} ${nv_test_DEPS} ${GTEST_MAIN_LIBRARIES} ${GTEST_LIBRARIES})
add_test(${TARGET_NAME} ${TARGET_NAME})
endfunction(nv_test)
cmake_minimum_required(VERSION 3.0)
if(${CMAKE_CURRENT_SOURCE_DIR} STREQUAL ${CMAKE_SOURCE_DIR})
# find #include <majel/xx.h>
get_filename_component(PARENT_DIR ${CMAKE_CURRENT_SOURCE_DIR} DIRECTORY)
include_directories(${PARENT_DIR})
# find cmake directory modules
get_filename_component(PARENT_DIR ${PARENT_DIR} DIRECTORY)
set(CMAKE_MODULE_PATH ${CMAKE_MODULE_PATH} "${PARENT_DIR}/cmake")
# enable boost
find_package(Boost REQUIRED)
if(NOT Boost_FOUND)
message(FATAL "Cannot find Boost library.")
endif()
include_directories(${Boost_INCLUDE_DIRS})
# enable c++11
set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -std=c++11")
# enable gtest
set(THIRD_PARTY_PATH ${CMAKE_CURRENT_SOURCE_DIR}/third_party)
set(WITH_TESTING ON)
include(external/gtest)
else()
message("-- Found gtest (include: ${GTEST_INCLUDE_DIR}, library: ${GTEST_LIBRARIES})")
endif()
########################### Build Majel #############################
set(MAJEL_CXX_FILES place.cc)
set(MAJEL_CUDA_FILES "")
if(CUDA_FOUND)
cuda_add_library(majel ${MAJEL_CUDA_FILES} ${MAJEL_CXX_FILES})
else()
add_library(majel ${MAJEL_CXX_FILES})
endif()
add_dependencies(majel ${external_project_dependencies})
#####################################################################
cc_library(majel SRCS place.cc)
if(WITH_TESTING)
add_subdirectory(test)
......
file(GLOB_RECURSE ALL_TEST_FILES RELATIVE "${CMAKE_CURRENT_SOURCE_DIR}" "*.cc")
cc_test(place_test
SRCS place_test.cc
DEPS majel)
add_executable(majel_tests ${ALL_TEST_FILES})
add_dependencies(majel_tests majel)
target_link_libraries(majel_tests
${GTEST_LIBRARIES}
${GTEST_MAIN_LIBRARIES}
majel
)
add_test(majel_tests majel_tests)
if(WITH_GPU)
nv_test(cuda_test SRCS cuda_test.cu)
endif()
#include <cuda_runtime.h>
#include <stdio.h>
#include "gtest/gtest.h"
#define CHECK_ERR(x) \
if (x != cudaSuccess) { \
fprintf(stderr, \
"%s in %s at line %d\n", \
cudaGetErrorString(err), \
__FILE__, \
__LINE__); \
exit(-1); \
}
__global__ void vecAdd(float *d_A, float *d_B, float *d_C, int n) {
int i = blockDim.x * blockIdx.x + threadIdx.x;
if (i < n) {
d_C[i] = d_A[i] + d_B[i];
}
}
TEST(Cuda, Equality) {
int n = 10;
// Memory allocation for h_A, h_B and h_C (in the host)
float h_A[10] = {1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0, 0.0};
float h_B[10] = {0.0, 9.0, 8.0, 7.0, 6.0, 5.0, 4.0, 3.0, 2.0, 1.0};
float h_C[10];
float *d_A, *d_B, *d_C;
cudaError_t err;
// Memory allocation for d_A, d_B and d_C (in the device)
err = cudaMalloc((void **)&d_A, sizeof(float) * n);
CHECK_ERR(err);
err = cudaMalloc((void **)&d_B, sizeof(float) * n);
CHECK_ERR(err);
err = cudaMalloc((void **)&d_C, sizeof(float) * n);
CHECK_ERR(err);
// Copying memory to device
err = cudaMemcpy(d_A, h_A, sizeof(float) * n, cudaMemcpyHostToDevice);
CHECK_ERR(err);
err = cudaMemcpy(d_B, h_B, sizeof(float) * n, cudaMemcpyHostToDevice);
CHECK_ERR(err);
// Calling the kernel
vecAdd<<<ceil(n / 256.0), 256>>>(d_A, d_B, d_C, n);
// Copying results back to host
err = cudaMemcpy(h_C, d_C, sizeof(float) * n, cudaMemcpyDeviceToHost);
CHECK_ERR(err);
EXPECT_EQ(h_C[0], 1.0);
for (int i = 1; i < n - 1; ++i) {
EXPECT_EQ(h_C[i], 11.0);
}
EXPECT_EQ(h_C[9], 1.0);
}
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册