diff --git a/cmake/copyfile.py b/cmake/copyfile.py new file mode 100644 index 0000000000000000000000000000000000000000..7ba4d95049dc76d1f6bd5bb67e116d5d3f4ea23b --- /dev/null +++ b/cmake/copyfile.py @@ -0,0 +1,44 @@ +# Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import os +import sys +import shutil +import glob + + +def main(): + src = sys.argv[1] + dst = sys.argv[2] + if os.path.isdir(src): #copy directory + pathList = os.path.split(src) + dst = os.path.join(dst, pathList[-1]) + if not os.path.exists(dst): + shutil.copytree(src, dst) + print("first copy directory: {0} --->>> {1}".format(src, dst)) + else: + shutil.rmtree(dst) + shutil.copytree(src, dst) + print("overwritten copy directory: {0} --->>> {1}".format(src, dst)) + else: #copy file, wildcard + if not os.path.exists(dst): + os.makedirs(dst) + srcFiles = glob.glob(src) + for srcFile in srcFiles: + shutil.copy(srcFile, dst) + print("copy file: {0} --->>> {1}".format(srcFile, dst)) + + +if __name__ == "__main__": + main() diff --git a/cmake/inference_lib.cmake b/cmake/inference_lib.cmake index bd46bac0b36542082677a950e2afa747242e01df..c3877d2a4a4fee0dd08ca2ae35db0f6c9abb6d41 100644 --- a/cmake/inference_lib.cmake +++ b/cmake/inference_lib.cmake @@ -13,6 +13,14 @@ # limitations under the License. # make package for paddle fluid shared and static library + +if(WIN32) + if(NOT PYTHON_EXECUTABLE) + FIND_PACKAGE(PythonInterp REQUIRED) + endif() +endif() + +set(COPY_SCRIPT_DIR ${PADDLE_SOURCE_DIR}/cmake) function(copy TARGET) set(options "") set(oneValueArgs "") @@ -26,42 +34,16 @@ function(copy TARGET) message(FATAL_ERROR "${TARGET} source numbers are not equal to destination numbers") endif () math(EXPR len "${copy_lib_SRCS_len} - 1") - add_custom_target(${TARGET} DEPENDS ${copy_lib_DEPS}) foreach (index RANGE ${len}) list(GET copy_lib_SRCS ${index} src) list(GET copy_lib_DSTS ${index} dst) - if (WIN32) - if(IS_DIRECTORY ${src}) - get_filename_component(last_path ${src} NAME) - string(APPEND dst "/" ${last_path}) - add_custom_command(TARGET ${TARGET} PRE_BUILD - COMMAND ${CMAKE_COMMAND} -E make_directory "${dst}" - ) - if(EXISTS ${src}) - add_custom_command(TARGET ${TARGET} PRE_BUILD - COMMAND cmake -E copy_directory "${src}" "${dst}" - COMMENT "copying ${src} -> ${dst}") - else() - message(WARNING "${src} not exist!") - endif() - else() - # windows cmd shell will not expand wildcard automatically. - # below expand the files, and copy them by rules. - file(GLOB src_files ${src}) - if (NOT "${src_files}" STREQUAL "") - list(REMOVE_DUPLICATES src_files) - endif () - add_custom_command(TARGET ${TARGET} PRE_BUILD - COMMAND ${CMAKE_COMMAND} -E make_directory "${dst}" - ) - foreach (src_file ${src_files}) - add_custom_command(TARGET ${TARGET} PRE_BUILD - COMMAND ${CMAKE_COMMAND} -E copy "${src_file}" "${dst}" - COMMENT "copying ${src_file} -> ${dst}") - endforeach () - endif() - else (WIN32) # not windows + if (WIN32) #windows + file(TO_NATIVE_PATH ${src} native_src) + file(TO_NATIVE_PATH ${dst} native_dst) + add_custom_command(TARGET ${TARGET} POST_BUILD + COMMAND ${PYTHON_EXECUTABLE} ${COPY_SCRIPT_DIR}/copyfile.py ${native_src} ${native_dst}) + else (WIN32) #not windows add_custom_command(TARGET ${TARGET} PRE_BUILD COMMAND mkdir -p "${dst}" COMMAND cp -r "${src}" "${dst}" @@ -189,13 +171,12 @@ copy(zlib_lib set(src_dir "${PADDLE_SOURCE_DIR}/paddle/fluid") set(dst_dir "${FLUID_INSTALL_DIR}/paddle/fluid") set(module "framework") -if (NOT WIN32) - set(framework_lib_deps framework_py_proto) -endif (NOT WIN32) +set(framework_lib_deps framework_py_proto) + copy(framework_lib DEPS ${framework_lib_deps} - SRCS ${src_dir}/${module}/*.h ${src_dir}/${module}/details/*.h ${PADDLE_BINARY_DIR}/paddle/fluid/framework/framework.pb.h - ${src_dir}/${module}/ir/*.h - DSTS ${dst_dir}/${module} ${dst_dir}/${module}/details ${dst_dir}/${module} ${dst_dir}/${module}/ir + SRCS ${src_dir}/${module}/*.h ${src_dir}/${module}/details/*.h ${PADDLE_BINARY_DIR}/paddle/fluid/framework/framework.pb.h ${PADDLE_BINARY_DIR}/paddle/fluid/framework/data_feed.pb.h ${src_dir}/${module}/ir/memory_optimize_pass/*.h + ${src_dir}/${module}/ir/*.h ${src_dir}/${module}/fleet/*.h + DSTS ${dst_dir}/${module} ${dst_dir}/${module}/details ${dst_dir}/${module} ${dst_dir}/${module} ${dst_dir}/${module}/ir/memory_optimize_pass ${dst_dir}/${module}/ir ${dst_dir}/${module}/fleet ) set(module "memory") diff --git a/paddle/fluid/platform/gpu_info.cc b/paddle/fluid/platform/gpu_info.cc index a8f3b084b0d8b7f792520d0335cce9580ec12a0c..cf00ab32b8accbfef88bc64e1aad3d1efd78fa48 100644 --- a/paddle/fluid/platform/gpu_info.cc +++ b/paddle/fluid/platform/gpu_info.cc @@ -79,6 +79,12 @@ DEFINE_string(selected_gpus, "", namespace paddle { namespace platform { +inline std::string CudaErrorWebsite() { + return "Please see detail in https://docs.nvidia.com/cuda/cuda-runtime-api" + "/group__CUDART__TYPES.html#group__CUDART__TYPES_1g3f51e3575c217824" + "6db0a94a430e0038"; +} + static int GetCUDADeviceCountImpl() { const auto *cuda_visible_devices = std::getenv("CUDA_VISIBLE_DEVICES"); if (cuda_visible_devices != nullptr) { @@ -92,9 +98,12 @@ static int GetCUDADeviceCountImpl() { } int count; + auto error_code = cudaGetDeviceCount(&count); PADDLE_ENFORCE( - cudaGetDeviceCount(&count), - "cudaGetDeviceCount failed in paddle::platform::GetCUDADeviceCount"); + error_code, + "cudaGetDeviceCount failed in " + "paddle::platform::GetCUDADeviceCountImpl, error code : %d, %s", + error_code, CudaErrorWebsite()); return count; } @@ -106,9 +115,12 @@ int GetCUDADeviceCount() { int GetCUDAComputeCapability(int id) { PADDLE_ENFORCE_LT(id, GetCUDADeviceCount(), "id must less than GPU count"); cudaDeviceProp device_prop; - PADDLE_ENFORCE(cudaGetDeviceProperties(&device_prop, id), - "cudaGetDeviceProperties failed in " - "paddle::platform::GetCUDAComputeCapability"); + auto error_code = cudaGetDeviceProperties(&device_prop, id); + PADDLE_ENFORCE( + error_code, + "cudaGetDeviceProperties failed in " + "paddle::platform::GetCUDAComputeCapability, error code : %d, %s", + error_code, CudaErrorWebsite()); return device_prop.major * 10 + device_prop.minor; } @@ -143,20 +155,25 @@ bool TensorCoreAvailable() { int GetCUDAMultiProcessors(int id) { PADDLE_ENFORCE_LT(id, GetCUDADeviceCount(), "id must less than GPU count"); int count; - PADDLE_ENFORCE( - cudaDeviceGetAttribute(&count, cudaDevAttrMultiProcessorCount, id), - "cudaDeviceGetAttribute failed in " - "paddle::platform::GetCUDAMultiProcessors"); + auto error_code = + cudaDeviceGetAttribute(&count, cudaDevAttrMultiProcessorCount, id); + PADDLE_ENFORCE(error_code, + "cudaDeviceGetAttribute failed in " + "paddle::platform::GetCUDAMultiProcess, error code : %d, %s", + error_code, CudaErrorWebsite()); return count; } int GetCUDAMaxThreadsPerMultiProcessor(int id) { PADDLE_ENFORCE_LT(id, GetCUDADeviceCount(), "id must less than GPU count"); int count; - PADDLE_ENFORCE(cudaDeviceGetAttribute( - &count, cudaDevAttrMaxThreadsPerMultiProcessor, id), - "cudaDeviceGetAttribute failed in " - "paddle::platform::GetCUDAMaxThreadsPerMultiProcessor"); + auto error_code = cudaDeviceGetAttribute( + &count, cudaDevAttrMaxThreadsPerMultiProcessor, id); + PADDLE_ENFORCE( + error_code, + "cudaDeviceGetAttribute failed in paddle::" + "platform::GetCUDAMaxThreadsPerMultiProcessor, error code : %d, %s", + error_code, CudaErrorWebsite()); return count; } @@ -266,37 +283,50 @@ size_t GpuMaxChunkSize() { void GpuMemcpyAsync(void *dst, const void *src, size_t count, enum cudaMemcpyKind kind, cudaStream_t stream) { - PADDLE_ENFORCE(cudaMemcpyAsync(dst, src, count, kind, stream), + auto error_code = cudaMemcpyAsync(dst, src, count, kind, stream); + PADDLE_ENFORCE(error_code, "cudaMemcpyAsync failed in paddle::platform::GpuMemcpyAsync " - "(%p -> %p, length: %d)", - src, dst, static_cast(count)); + "(%p -> %p, length: %d) error code : %d, %s", + src, dst, static_cast(count), error_code, + CudaErrorWebsite()); } void GpuMemcpySync(void *dst, const void *src, size_t count, enum cudaMemcpyKind kind) { - PADDLE_ENFORCE(cudaMemcpy(dst, src, count, kind), - "cudaMemcpy failed in paddle::platform::GpuMemcpySync (%p -> " - "%p, length: %d)", - src, dst, static_cast(count)); + auto error_code = cudaMemcpy(dst, src, count, kind); + PADDLE_ENFORCE(error_code, + "cudaMemcpy failed in paddle::platform::GpuMemcpySync " + "(%p -> %p, length: %d) error code : %d, %s", + src, dst, static_cast(count), error_code, + CudaErrorWebsite()); } void GpuMemcpyPeerAsync(void *dst, int dst_device, const void *src, int src_device, size_t count, cudaStream_t stream) { + auto error_code = + cudaMemcpyPeerAsync(dst, dst_device, src, src_device, count, stream); PADDLE_ENFORCE( - cudaMemcpyPeerAsync(dst, dst_device, src, src_device, count, stream), - "cudaMemcpyPeerAsync failed in paddle::platform::GpuMemcpyPeerAsync"); + error_code, + "cudaMemcpyPeerAsync failed in paddle::platform::GpuMemcpyPeerAsync " + "error code : %d, %s", + error_code, CudaErrorWebsite()); } void GpuMemcpyPeerSync(void *dst, int dst_device, const void *src, int src_device, size_t count) { - PADDLE_ENFORCE( - cudaMemcpyPeer(dst, dst_device, src, src_device, count), - "cudaMemcpyPeer failed in paddle::platform::GpuMemcpyPeerSync"); + auto error_code = cudaMemcpyPeer(dst, dst_device, src, src_device, count); + PADDLE_ENFORCE(error_code, + "cudaMemcpyPeer failed in paddle::platform::GpuMemcpyPeerSync " + "error code : %d, %s", + error_code, CudaErrorWebsite()); } void GpuMemsetAsync(void *dst, int value, size_t count, cudaStream_t stream) { - PADDLE_ENFORCE(cudaMemsetAsync(dst, value, count, stream), - "cudaMemsetAsync failed in paddle::platform::GpuMemsetAsync"); + auto error_code = cudaMemsetAsync(dst, value, count, stream); + PADDLE_ENFORCE(error_code, + "cudaMemsetAsync failed in paddle::platform::GpuMemsetAsync " + "error code : %d, %s", + error_code, CudaErrorWebsite()); } } // namespace platform } // namespace paddle