From 6cc7870517bd1820e82bdb635391bf705820578c Mon Sep 17 00:00:00 2001 From: dzhwinter Date: Mon, 27 Aug 2018 09:42:24 +0800 Subject: [PATCH] fix concat synchronization bug --- paddle/fluid/operators/math/concat.cu | 6 ++++ paddle/fluid/platform/CMakeLists.txt | 5 ++++ paddle/fluid/platform/cpu_info.cc | 21 ++++++++++--- paddle/fluid/platform/device_tracer.h | 10 ++++++- paddle/fluid/platform/dynload/CMakeLists.txt | 2 ++ .../fluid/platform/dynload/dynamic_loader.cc | 3 +- paddle/fluid/platform/enforce.h | 30 +++++++++++++++++-- paddle/fluid/platform/profiler.h | 10 +++++++ 8 files changed, 77 insertions(+), 10 deletions(-) diff --git a/paddle/fluid/operators/math/concat.cu b/paddle/fluid/operators/math/concat.cu index 820e73e7797..342379268be 100644 --- a/paddle/fluid/operators/math/concat.cu +++ b/paddle/fluid/operators/math/concat.cu @@ -177,6 +177,9 @@ class ConcatFunctor { dev_ins_data, dev_ins_col_data, static_cast(inputs_col.size()), out_row, out_col, output->data()); } + // Wait() must be called because `inputs_data` may be destructed before + // kernel ends + context.Wait(); } }; @@ -252,6 +255,9 @@ class ConcatGradFunctor { input.data(), in_row, in_col, dev_outs_col_data, static_cast(outputs_cols.size()), dev_out_gpu_data); } + // Wait() must be called because `outputs_data` may be destructed before + // kernel ends + context.Wait(); } }; diff --git a/paddle/fluid/platform/CMakeLists.txt b/paddle/fluid/platform/CMakeLists.txt index 75d3856d0dd..e25efebe6c3 100644 --- a/paddle/fluid/platform/CMakeLists.txt +++ b/paddle/fluid/platform/CMakeLists.txt @@ -1,3 +1,4 @@ +if (NOT WIN32) proto_library(profiler_proto SRCS profiler.proto DEPS framework_proto) py_proto_compile(profiler_py_proto SRCS profiler.proto) @@ -10,6 +11,7 @@ add_custom_command(TARGET profiler_py_proto POST_BUILD COMMAND cp *.py ${PADDLE_BINARY_DIR}/python/paddle/fluid/proto/profiler COMMENT "Copy generated python proto into directory paddle/fluid/proto/profiler." WORKING_DIRECTORY ${CMAKE_CURRENT_BINARY_DIR}) +endif(NOT WIN32) if(WITH_GPU) nv_library(enforce SRCS enforce.cc) @@ -58,9 +60,12 @@ cc_test(init_test SRCS init_test.cc DEPS device_context) nv_test(cudnn_helper_test SRCS cudnn_helper_test.cc DEPS dynload_cuda) nv_test(transform_test SRCS transform_test.cu DEPS memory place device_context) + +if (NOT WIN32) cc_library(device_tracer SRCS device_tracer.cc DEPS boost profiler_proto framework_proto ${GPU_CTX_DEPS}) cc_library(profiler SRCS profiler.cc DEPS device_context device_tracer) cc_test(profiler_test SRCS profiler_test.cc DEPS profiler) +endif(NOT WIN32) nv_test(float16_gpu_test SRCS float16_test.cu DEPS lod_tensor) cc_test(float16_test SRCS float16_test.cc DEPS lod_tensor) diff --git a/paddle/fluid/platform/cpu_info.cc b/paddle/fluid/platform/cpu_info.cc index fcd658d67cf..2880c09263f 100644 --- a/paddle/fluid/platform/cpu_info.cc +++ b/paddle/fluid/platform/cpu_info.cc @@ -22,9 +22,13 @@ limitations under the License. */ #ifdef __APPLE__ #include #include + +#elif defined(_WIN32) +#define NOMINMAX // msvc max/min macro conflict with std::min/max +#include #else #include -#endif +#endif // _WIN32 #include #include "gflags/gflags.h" @@ -32,16 +36,20 @@ limitations under the License. */ DEFINE_double(fraction_of_cpu_memory_to_use, 1, "Default use 100% of CPU memory for PaddlePaddle," "reserve the rest for page tables, etc"); - +#if !defined(_WIN32) DEFINE_uint64(initial_cpu_memory_in_mb, #ifdef PADDLE_WITH_MKLDNN /* Aligned with mozga-intel, MKLDNN need at least 5000 MB * to obtain the best performance*/ - 5000, + 5000ul, #else - 500, + 500ul, #endif "Initial CPU memory for PaddlePaddle, in MD unit."); +#else +DEFINE_uint64(initial_cpu_memory_in_mb, 500ul, + "Initial CPU memory for PaddlePaddle, in MD unit."); +#endif // !defined(_WIN32) DEFINE_double( fraction_of_cuda_pinned_memory_to_use, 0.5, @@ -60,6 +68,11 @@ inline size_t CpuTotalPhysicalMemory() { size_t len = sizeof(size); if (sysctl(mib, 2, &size, &len, NULL, 0) == 0) return (size_t)size; return 0L; +#elif defined(_WIN32) + MEMORYSTATUSEX sMeminfo; + sMeminfo.dwLength = sizeof(sMeminfo); + GlobalMemoryStatusEx(&sMeminfo); + return sMeminfo.ullTotalPhys; #else int64_t pages = sysconf(_SC_PHYS_PAGES); int64_t page_size = sysconf(_SC_PAGE_SIZE); diff --git a/paddle/fluid/platform/device_tracer.h b/paddle/fluid/platform/device_tracer.h index 322996fb4f5..f59fc40b716 100644 --- a/paddle/fluid/platform/device_tracer.h +++ b/paddle/fluid/platform/device_tracer.h @@ -13,7 +13,12 @@ See the License for the specific language governing permissions and limitations under the License. */ #pragma once +#if !defined(_WIN32) #include +#else +#include +#endif // !_WIN32 + #include #include // NOLINT #include @@ -27,12 +32,15 @@ namespace platform { /////////////////////// // WARN: Under Development. Don't depend on it yet. ////////////////////// - +#if !defined(_WIN32) inline uint64_t PosixInNsec() { struct timeval tv; gettimeofday(&tv, nullptr); return 1000 * (static_cast(tv.tv_sec) * 1000000 + tv.tv_usec); } +#else +inline uint64_t PosixInNsec() { return static_cast(0); } +#endif // !_WIN32 // DeviceTracer performs the following tasks: // 1. Register cuda callbacks for various events: kernel, memcpy, etc. diff --git a/paddle/fluid/platform/dynload/CMakeLists.txt b/paddle/fluid/platform/dynload/CMakeLists.txt index 07159d4a12e..5939c500c94 100644 --- a/paddle/fluid/platform/dynload/CMakeLists.txt +++ b/paddle/fluid/platform/dynload/CMakeLists.txt @@ -16,7 +16,9 @@ if (CUPTI_FOUND) list(APPEND CUDA_SRCS cupti.cc) endif(CUPTI_FOUND) nv_library(dynload_cuda SRCS ${CUDA_SRCS} DEPS dynamic_loader) +if (NOT WIN32) cc_library(dynload_warpctc SRCS warpctc.cc DEPS dynamic_loader warpctc) +endif(NOT WIN32) if (WITH_MKLML) cc_library(dynload_mklml SRCS mklml.cc DEPS dynamic_loader mklml) endif() diff --git a/paddle/fluid/platform/dynload/dynamic_loader.cc b/paddle/fluid/platform/dynload/dynamic_loader.cc index 93bf7c13516..4fbfa6354ab 100644 --- a/paddle/fluid/platform/dynload/dynamic_loader.cc +++ b/paddle/fluid/platform/dynload/dynamic_loader.cc @@ -13,8 +13,6 @@ See the License for the specific language governing permissions and limitations under the License. */ #include "paddle/fluid/platform/dynload/dynamic_loader.h" -#include - #include #include // NOLINT #include @@ -23,6 +21,7 @@ limitations under the License. */ #include "glog/logging.h" #include "paddle/fluid/platform/dynload/cupti_lib_path.h" #include "paddle/fluid/platform/enforce.h" +#include "paddle/fluid/platform/port.h" DEFINE_string(cudnn_dir, "", "Specify path for loading libcudnn.so. For instance, " diff --git a/paddle/fluid/platform/enforce.h b/paddle/fluid/platform/enforce.h index a76ba75f9ee..61a653d9313 100644 --- a/paddle/fluid/platform/enforce.h +++ b/paddle/fluid/platform/enforce.h @@ -18,6 +18,11 @@ limitations under the License. */ #include // for __cxa_demangle #endif // __GNUC__ +#if defined(_WIN32) +#define NOMINMAX // msvc max/min macro conflict with std::min/max +#define GLOG_NO_ABBREVIATED_SEVERITIES // msvc conflict logging with windows.h +#endif + #ifdef PADDLE_WITH_CUDA #include #include @@ -117,7 +122,12 @@ struct EOFException : public std::exception { // always forces branch prediction of true. // This generates faster binary code. __builtin_expect is since C++11. // For more details, please check https://stackoverflow.com/a/43870188/724872. +#if !defined(_WIN32) #define UNLIKELY(condition) __builtin_expect(static_cast(condition), 0) +#else +// there is no equivalent intrinsics in msvc. +#define UNLIKELY(condition) (condition == 0) +#endif template inline typename std::enable_if::type throw_on_error( @@ -230,6 +240,7 @@ inline void throw_on_error(T e) { throw_on_error(e, ""); } +#if !defined(_WIN32) #define PADDLE_THROW(...) \ do { \ throw ::paddle::platform::EnforceNotMet( \ @@ -248,15 +259,28 @@ inline void throw_on_error(T e) { __FILE__, __LINE__); \ } \ } while (false) -#else -#define PADDLE_ENFORCE(...) ::paddle::platform::throw_on_error(__VA_ARGS__); -#endif #define PADDLE_THROW_EOF() \ do { \ throw ::paddle::platform::EOFException("There is no next data.", __FILE__, \ __LINE__); \ } while (false) + +#else +#define PADDLE_ENFORCE(...) ::paddle::platform::throw_on_error(__VA_ARGS__) +#endif // REPLACE_ENFORCE_GLOG + +#else // !_WIN32 +// disable enforce, caused by the varardic macro exception error +#define PADDLE_THROW(x) \ + do { \ + throw std::make_exception_ptr( \ + std::runtime_error("Windows disable the enforce.")); \ + } while (false) + +#define PADDLE_ENFORCE(x, ...) x +#endif // !_WIN32 + /* * Some enforce helpers here, usage: * int a = 1; diff --git a/paddle/fluid/platform/profiler.h b/paddle/fluid/platform/profiler.h index c99d9c807d1..38630686f7c 100644 --- a/paddle/fluid/platform/profiler.h +++ b/paddle/fluid/platform/profiler.h @@ -69,6 +69,7 @@ void PushEvent(const std::string& name, const DeviceContext* dev_ctx); void PopEvent(const std::string& name, const DeviceContext* dev_ctx); +#if !defined(_WIN32) struct RecordEvent { RecordEvent(const std::string& name, const DeviceContext* dev_ctx); @@ -94,6 +95,15 @@ struct RecordBlock { std::string name_; uint64_t start_ns_; }; +#else +// windows do not support profiler temporarily. +struct RecordEvent { + RecordEvent(const std::string& name, const DeviceContext* dev_ctx) {} +}; +struct RecordBlock { + explicit RecordBlock(int block_id) {} +}; +#endif // Return the event list of all threads. Assumed the returned value calls // event_lists, event_lists[i][j] represents the j-th Event of i-th thread. -- GitLab