cherry-pick PR#18671 , PR#19190 and PR#19225 into Release/1.5 (#19207)

* add function:error code,test=develop * add function:fix inference lib bug,test=develop * fix the bug that PYTHON_EXECUTABLE not exists

cherry-pick PR#18671 , PR#19190 and PR#19225 into Release/1.5 (#19207)
* add function:error code,test=develop * add function:fix inference lib bug,test=develop * fix the bug that PYTHON_EXECUTABLE not exists
1bb013fa · zhouwei25 · liuwei1031 · 1fd0ca82 · 1bb013fa · 1bb013fa
隐藏空白更改
内联并排

Showing with 120 addition and 65 deletion

cmake/copyfile.py cmake/copyfile.py +44 -0

cmake/inference_lib.cmake cmake/inference_lib.cmake +19 -38

paddle/fluid/platform/gpu_info.cc paddle/fluid/platform/gpu_info.cc +57 -27

未找到文件。
--- a/cmake/copyfile.py
+++ b/cmake/copyfile.py
+#   Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+import os
+import sys
+import shutil
+import glob
+
+
+def main():
+    src = sys.argv[1]
+    dst = sys.argv[2]
+    if os.path.isdir(src):  #copy directory
+        pathList = os.path.split(src)
+        dst = os.path.join(dst, pathList[-1])
+        if not os.path.exists(dst):
+            shutil.copytree(src, dst)
+            print("first copy directory: {0} --->>> {1}".format(src, dst))
+        else:
+            shutil.rmtree(dst)
+            shutil.copytree(src, dst)
+            print("overwritten copy directory: {0} --->>> {1}".format(src, dst))
+    else:  #copy file, wildcard
+        if not os.path.exists(dst):
+            os.makedirs(dst)
+        srcFiles = glob.glob(src)
+        for srcFile in srcFiles:
+            shutil.copy(srcFile, dst)
+            print("copy file: {0} --->>> {1}".format(srcFile, dst))
+
+
+if __name__ == "__main__":
+    main()
--- a/cmake/inference_lib.cmake
+++ b/cmake/inference_lib.cmake
@@ -13,6 +13,14 @@
 # limitations under the License.

 # make package for paddle fluid shared and static library
+
+if(WIN32)
+    if(NOT PYTHON_EXECUTABLE)
+	FIND_PACKAGE(PythonInterp REQUIRED)
+    endif()
+endif()
+
+set(COPY_SCRIPT_DIR ${PADDLE_SOURCE_DIR}/cmake)
 function(copy TARGET)
    set(options "")
    set(oneValueArgs "")
@@ -26,42 +34,16 @@ function(copy TARGET)
        message(FATAL_ERROR "${TARGET} source numbers are not equal to destination numbers")
    endif ()
    math(EXPR len "${copy_lib_SRCS_len} - 1")
-
    add_custom_target(${TARGET} DEPENDS ${copy_lib_DEPS})
    foreach (index RANGE ${len})
        list(GET copy_lib_SRCS ${index} src)
        list(GET copy_lib_DSTS ${index} dst)
-        if (WIN32)
-            if(IS_DIRECTORY ${src})
-                get_filename_component(last_path ${src} NAME)
-                string(APPEND dst "/" ${last_path})
-                add_custom_command(TARGET ${TARGET} PRE_BUILD
-                        COMMAND ${CMAKE_COMMAND} -E make_directory "${dst}"
-                        )
-                if(EXISTS ${src})
-                    add_custom_command(TARGET ${TARGET} PRE_BUILD
-                            COMMAND cmake -E copy_directory "${src}" "${dst}"
-                            COMMENT "copying ${src} -> ${dst}")
-                else()
-                    message(WARNING "${src} not exist!")
-                endif()
-            else()
-                # windows cmd shell will not expand wildcard automatically.
-                # below expand the files, and copy them by rules.
-                file(GLOB src_files ${src})
-                if (NOT "${src_files}" STREQUAL "")
-                    list(REMOVE_DUPLICATES src_files)
-                endif ()
-                add_custom_command(TARGET ${TARGET} PRE_BUILD
-                        COMMAND ${CMAKE_COMMAND} -E make_directory "${dst}"
-                        )
-                foreach (src_file ${src_files})
-                    add_custom_command(TARGET ${TARGET} PRE_BUILD
-                            COMMAND ${CMAKE_COMMAND} -E copy "${src_file}" "${dst}"
-                            COMMENT "copying ${src_file} -> ${dst}")
-                endforeach ()
-            endif()
-        else (WIN32) # not windows
+        if (WIN32)   #windows
+            file(TO_NATIVE_PATH ${src} native_src)
+            file(TO_NATIVE_PATH ${dst} native_dst)
+            add_custom_command(TARGET ${TARGET} POST_BUILD
+                    COMMAND ${PYTHON_EXECUTABLE} ${COPY_SCRIPT_DIR}/copyfile.py ${native_src} ${native_dst})
+        else (WIN32) #not windows
            add_custom_command(TARGET ${TARGET} PRE_BUILD
                    COMMAND mkdir -p "${dst}"
                    COMMAND cp -r "${src}" "${dst}"
@@ -189,13 +171,12 @@ copy(zlib_lib
 set(src_dir "${PADDLE_SOURCE_DIR}/paddle/fluid")
 set(dst_dir "${FLUID_INSTALL_DIR}/paddle/fluid")
 set(module "framework")
-if (NOT WIN32)
-    set(framework_lib_deps framework_py_proto)
-endif (NOT WIN32)
+set(framework_lib_deps framework_py_proto)
+
 copy(framework_lib DEPS ${framework_lib_deps}
-        SRCS ${src_dir}/${module}/*.h ${src_dir}/${module}/details/*.h ${PADDLE_BINARY_DIR}/paddle/fluid/framework/framework.pb.h
-        ${src_dir}/${module}/ir/*.h
-        DSTS ${dst_dir}/${module} ${dst_dir}/${module}/details ${dst_dir}/${module} ${dst_dir}/${module}/ir
+        SRCS ${src_dir}/${module}/*.h ${src_dir}/${module}/details/*.h ${PADDLE_BINARY_DIR}/paddle/fluid/framework/framework.pb.h ${PADDLE_BINARY_DIR}/paddle/fluid/framework/data_feed.pb.h ${src_dir}/${module}/ir/memory_optimize_pass/*.h
+        ${src_dir}/${module}/ir/*.h ${src_dir}/${module}/fleet/*.h
+        DSTS ${dst_dir}/${module} ${dst_dir}/${module}/details ${dst_dir}/${module} ${dst_dir}/${module} ${dst_dir}/${module}/ir/memory_optimize_pass ${dst_dir}/${module}/ir ${dst_dir}/${module}/fleet
        )

 set(module "memory")

--- a/paddle/fluid/platform/gpu_info.cc
+++ b/paddle/fluid/platform/gpu_info.cc
@@ -79,6 +79,12 @@ DEFINE_string(selected_gpus, "",
 namespace paddle {
 namespace platform {

+inline std::string CudaErrorWebsite() {
+  return "Please see detail in https://docs.nvidia.com/cuda/cuda-runtime-api"
+         "/group__CUDART__TYPES.html#group__CUDART__TYPES_1g3f51e3575c217824"
+         "6db0a94a430e0038";
+}
+
 static int GetCUDADeviceCountImpl() {
  const auto *cuda_visible_devices = std::getenv("CUDA_VISIBLE_DEVICES");
  if (cuda_visible_devices != nullptr) {
@@ -92,9 +98,12 @@ static int GetCUDADeviceCountImpl() {
  }

  int count;
+  auto error_code = cudaGetDeviceCount(&count);
  PADDLE_ENFORCE(
-      cudaGetDeviceCount(&count),
-      "cudaGetDeviceCount failed in paddle::platform::GetCUDADeviceCount");
+      error_code,
+      "cudaGetDeviceCount failed in "
+      "paddle::platform::GetCUDADeviceCountImpl, error code : %d, %s",
+      error_code, CudaErrorWebsite());
  return count;
 }

@@ -106,9 +115,12 @@ int GetCUDADeviceCount() {
 int GetCUDAComputeCapability(int id) {
  PADDLE_ENFORCE_LT(id, GetCUDADeviceCount(), "id must less than GPU count");
  cudaDeviceProp device_prop;
-  PADDLE_ENFORCE(cudaGetDeviceProperties(&device_prop, id),
-                 "cudaGetDeviceProperties failed in "
-                 "paddle::platform::GetCUDAComputeCapability");
+  auto error_code = cudaGetDeviceProperties(&device_prop, id);
+  PADDLE_ENFORCE(
+      error_code,
+      "cudaGetDeviceProperties failed in "
+      "paddle::platform::GetCUDAComputeCapability, error code : %d, %s",
+      error_code, CudaErrorWebsite());
  return device_prop.major * 10 + device_prop.minor;
 }

@@ -143,20 +155,25 @@ bool TensorCoreAvailable() {
 int GetCUDAMultiProcessors(int id) {
  PADDLE_ENFORCE_LT(id, GetCUDADeviceCount(), "id must less than GPU count");
  int count;
-  PADDLE_ENFORCE(
-      cudaDeviceGetAttribute(&count, cudaDevAttrMultiProcessorCount, id),
-      "cudaDeviceGetAttribute failed in "
-      "paddle::platform::GetCUDAMultiProcessors");
+  auto error_code =
+      cudaDeviceGetAttribute(&count, cudaDevAttrMultiProcessorCount, id);
+  PADDLE_ENFORCE(error_code,
+                 "cudaDeviceGetAttribute failed in "
+                 "paddle::platform::GetCUDAMultiProcess, error code : %d, %s",
+                 error_code, CudaErrorWebsite());
  return count;
 }

 int GetCUDAMaxThreadsPerMultiProcessor(int id) {
  PADDLE_ENFORCE_LT(id, GetCUDADeviceCount(), "id must less than GPU count");
  int count;
-  PADDLE_ENFORCE(cudaDeviceGetAttribute(
-                     &count, cudaDevAttrMaxThreadsPerMultiProcessor, id),
-                 "cudaDeviceGetAttribute failed in "
-                 "paddle::platform::GetCUDAMaxThreadsPerMultiProcessor");
+  auto error_code = cudaDeviceGetAttribute(
+      &count, cudaDevAttrMaxThreadsPerMultiProcessor, id);
+  PADDLE_ENFORCE(
+      error_code,
+      "cudaDeviceGetAttribute failed in paddle::"
+      "platform::GetCUDAMaxThreadsPerMultiProcessor, error code : %d, %s",
+      error_code, CudaErrorWebsite());
  return count;
 }

@@ -266,37 +283,50 @@ size_t GpuMaxChunkSize() {

 void GpuMemcpyAsync(void *dst, const void *src, size_t count,
                    enum cudaMemcpyKind kind, cudaStream_t stream) {
-  PADDLE_ENFORCE(cudaMemcpyAsync(dst, src, count, kind, stream),
+  auto error_code = cudaMemcpyAsync(dst, src, count, kind, stream);
+  PADDLE_ENFORCE(error_code,
                 "cudaMemcpyAsync failed in paddle::platform::GpuMemcpyAsync "
-                 "(%p -> %p, length: %d)",
-                 src, dst, static_cast<int>(count));
+                 "(%p -> %p, length: %d) error code : %d, %s",
+                 src, dst, static_cast<int>(count), error_code,
+                 CudaErrorWebsite());
 }

 void GpuMemcpySync(void *dst, const void *src, size_t count,
                   enum cudaMemcpyKind kind) {
-  PADDLE_ENFORCE(cudaMemcpy(dst, src, count, kind),
-                 "cudaMemcpy failed in paddle::platform::GpuMemcpySync (%p -> "
-                 "%p, length: %d)",
-                 src, dst, static_cast<int>(count));
+  auto error_code = cudaMemcpy(dst, src, count, kind);
+  PADDLE_ENFORCE(error_code,
+                 "cudaMemcpy failed in paddle::platform::GpuMemcpySync "
+                 "(%p -> %p, length: %d) error code : %d, %s",
+                 src, dst, static_cast<int>(count), error_code,
+                 CudaErrorWebsite());
 }

 void GpuMemcpyPeerAsync(void *dst, int dst_device, const void *src,
                        int src_device, size_t count, cudaStream_t stream) {
+  auto error_code =
+      cudaMemcpyPeerAsync(dst, dst_device, src, src_device, count, stream);
  PADDLE_ENFORCE(
-      cudaMemcpyPeerAsync(dst, dst_device, src, src_device, count, stream),
-      "cudaMemcpyPeerAsync failed in paddle::platform::GpuMemcpyPeerAsync");
+      error_code,
+      "cudaMemcpyPeerAsync failed in paddle::platform::GpuMemcpyPeerAsync "
+      "error code : %d, %s",
+      error_code, CudaErrorWebsite());
 }

 void GpuMemcpyPeerSync(void *dst, int dst_device, const void *src,
                       int src_device, size_t count) {
-  PADDLE_ENFORCE(
-      cudaMemcpyPeer(dst, dst_device, src, src_device, count),
-      "cudaMemcpyPeer failed in paddle::platform::GpuMemcpyPeerSync");
+  auto error_code = cudaMemcpyPeer(dst, dst_device, src, src_device, count);
+  PADDLE_ENFORCE(error_code,
+                 "cudaMemcpyPeer failed in paddle::platform::GpuMemcpyPeerSync "
+                 "error code : %d, %s",
+                 error_code, CudaErrorWebsite());
 }

 void GpuMemsetAsync(void *dst, int value, size_t count, cudaStream_t stream) {
-  PADDLE_ENFORCE(cudaMemsetAsync(dst, value, count, stream),
-                 "cudaMemsetAsync failed in paddle::platform::GpuMemsetAsync");
+  auto error_code = cudaMemsetAsync(dst, value, count, stream);
+  PADDLE_ENFORCE(error_code,
+                 "cudaMemsetAsync failed in paddle::platform::GpuMemsetAsync "
+                 "error code : %d, %s",
+                 error_code, CudaErrorWebsite());
 }
 }  // namespace platform
 }  // namespace paddle