From adaec0073d02c0ea55bcabc4671ebfc8dbd3182c Mon Sep 17 00:00:00 2001 From: Zhou Wei <52485244+zhouwei25@users.noreply.github.com> Date: Mon, 22 Feb 2021 16:52:04 +0800 Subject: [PATCH] [2.0Custom OP]Support New Custom OP on Windows (#31063) * [2.0.1]Support New Custom OP on windows * fix CI * fix code style * fix CI * fix CI * fix coverage * fix CI * fix CI --- CMakeLists.txt | 2 + paddle/fluid/extension/include/all.h | 6 + paddle/fluid/extension/include/dll_decl.h | 27 +++ paddle/fluid/extension/include/op_meta_info.h | 44 +++- paddle/fluid/extension/include/tensor.h | 3 +- paddle/fluid/extension/src/op_meta_info.cc | 13 +- paddle/fluid/extension/src/tensor.cc | 116 +++++----- paddle/fluid/framework/CMakeLists.txt | 7 +- .../fluid/platform/dynload/dynamic_loader.cc | 3 - paddle/scripts/paddle_build.bat | 22 +- python/paddle/fluid/tests/CMakeLists.txt | 9 +- .../fluid/tests/custom_op/CMakeLists.txt | 49 +++-- .../fluid/tests/custom_op/test_dispatch.py | 12 +- .../custom_op/test_simple_custom_op_jit.py | 13 +- .../custom_op/test_simple_custom_op_setup.py | 13 +- python/paddle/fluid/tests/custom_op/utils.py | 4 +- .../utils/cpp_extension/cpp_extension.py | 148 +++++++++++-- .../utils/cpp_extension/extension_utils.py | 208 ++++++++++++------ python/requirements.txt | 3 +- python/setup.py.in | 24 +- 20 files changed, 523 insertions(+), 203 deletions(-) create mode 100644 paddle/fluid/extension/include/dll_decl.h diff --git a/CMakeLists.txt b/CMakeLists.txt index bd9605a1abb..f24513d605c 100755 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -335,6 +335,8 @@ set(PADDLE_PYTHON_BUILD_DIR "${CMAKE_CURRENT_BINARY_DIR}/python/build") set(CMAKE_CXX_FLAGS_RELWITHDEBINFO "-O3 -g -DNDEBUG") set(CMAKE_C_FLAGS_RELWITHDEBINFO "-O3 -g -DNDEBUG") +add_definitions(-DPADDLE_DLL_EXPORT) + if(ON_INFER) # you can trun off the paddle fluid and inference lib by set ON_INFER=OFF message(STATUS "On inference mode, will take place some specific optimization.") diff --git a/paddle/fluid/extension/include/all.h b/paddle/fluid/extension/include/all.h index 5aa61f8203e..e2a3bc38c5f 100644 --- a/paddle/fluid/extension/include/all.h +++ b/paddle/fluid/extension/include/all.h @@ -18,6 +18,12 @@ limitations under the License. */ #error C++11 or later compatible compiler is required to use Paddle. #endif +#ifdef _WIN32 +#ifndef NOMINMAX +#define NOMINMAX // msvc max/min macro conflict with std::min/max +#endif +#endif + #include "paddle/fluid/extension/include/dispatch.h" #include "paddle/fluid/extension/include/dtype.h" #include "paddle/fluid/extension/include/op_meta_info.h" diff --git a/paddle/fluid/extension/include/dll_decl.h b/paddle/fluid/extension/include/dll_decl.h new file mode 100644 index 00000000000..3dbea5e6dff --- /dev/null +++ b/paddle/fluid/extension/include/dll_decl.h @@ -0,0 +1,27 @@ +// Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#pragma once + +#if defined(_WIN32) +#ifndef PD_DLL_DECL +#ifdef PADDLE_DLL_EXPORT +#define PD_DLL_DECL __declspec(dllexport) +#else +#define PD_DLL_DECL __declspec(dllimport) +#endif // PADDLE_DLL_EXPORT +#endif // PD_DLL_DECL +#else +#define PD_DLL_DECL +#endif // _WIN32 diff --git a/paddle/fluid/extension/include/op_meta_info.h b/paddle/fluid/extension/include/op_meta_info.h index 920049e2390..c16f61374f7 100644 --- a/paddle/fluid/extension/include/op_meta_info.h +++ b/paddle/fluid/extension/include/op_meta_info.h @@ -14,12 +14,14 @@ limitations under the License. */ #pragma once +#include #include #include #include #include +#include "paddle/fluid/extension/include/dll_decl.h" #include "paddle/fluid/extension/include/tensor.h" /** @@ -31,7 +33,7 @@ limitations under the License. */ namespace paddle { namespace framework { -class OpMetaInfoHelper; +class PD_DLL_DECL OpMetaInfoHelper; } // namespace framework using Tensor = paddle::Tensor; @@ -43,6 +45,26 @@ using Tensor = paddle::Tensor; classname& operator=(const classname&) = delete; \ classname& operator=(classname&&) = delete +#if defined _WIN32 +#define HANDLE_THE_ERROR try { +#define END_HANDLE_THE_ERROR \ + } \ + catch (const std::exception& e) { \ + std::cerr << e.what() << std::endl; \ + throw e; \ + } +#else +#define HANDLE_THE_ERROR +#define END_HANDLE_THE_ERROR +#endif + +#define PD_THROW(err_msg) \ + do { \ + HANDLE_THE_ERROR \ + throw std::runtime_error(err_msg); \ + END_HANDLE_THE_ERROR \ + } while (0) + ///////////////// Util Define and Function //////////////// inline std::string Grad(const std::string& var_name) { @@ -106,7 +128,7 @@ struct KernelFuncImpl { attr_idx + 1>( inputs, attrs, pargs..., arg); } catch (boost::bad_any_cast&) { - throw std::runtime_error( + PD_THROW( "Attribute cast error in custom operator. Expected int value."); } } @@ -220,7 +242,7 @@ struct InferDtypeFuncImpl { ////////////////////// Op Meta Info ////////////////////// -class OpMetaInfo { +class PD_DLL_DECL OpMetaInfo { public: explicit OpMetaInfo(const std::string& op_name) : name_(op_name) {} OpMetaInfo& Inputs(std::vector&& inputs); @@ -246,7 +268,7 @@ class OpMetaInfo { //////////////// Op Meta Info Map ///////////////// -class OpMetaInfoMap { +class PD_DLL_DECL OpMetaInfoMap { public: // this function's impl should keep in header file. // if move to cc file, meta info can not be added @@ -270,14 +292,14 @@ class OpMetaInfoMap { //////////////// Op Meta Info Builder ///////////////// -class OpMetaInfoBuilder { +class PD_DLL_DECL OpMetaInfoBuilder { public: explicit OpMetaInfoBuilder(std::string&& name); OpMetaInfoBuilder& Inputs(std::vector&& inputs); OpMetaInfoBuilder& Outputs(std::vector&& outputs); - OpMetaInfoBuilder& SetKernelFn(KernelFunc&& func); - OpMetaInfoBuilder& SetInferShapeFn(InferShapeFunc&& func); - OpMetaInfoBuilder& SetInferDtypeFn(InferDtypeFunc&& func); + OpMetaInfoBuilder& SetKernelFn(KernelFunc func); + OpMetaInfoBuilder& SetInferShapeFn(InferShapeFunc func); + OpMetaInfoBuilder& SetInferDtypeFn(InferDtypeFunc func); OpMetaInfoBuilder& SetBackwardOp(const std::string& bwd_op_name); private: @@ -317,8 +339,12 @@ void LoadCustomOperatorLib(const std::string& dso_name); extern "C" { #endif +#if defined(_WIN32) // C-API to get global OpMetaInfoMap. -paddle::OpMetaInfoMap& PD_GetOpMetaInfoMap(); +__declspec(dllexport) inline paddle::OpMetaInfoMap& PD_GetOpMetaInfoMap() { + return paddle::OpMetaInfoMap::Instance(); +} +#endif // _WIN32 #ifdef __cplusplus } diff --git a/paddle/fluid/extension/include/tensor.h b/paddle/fluid/extension/include/tensor.h index a5ce0d1a585..47af4dc70a1 100644 --- a/paddle/fluid/extension/include/tensor.h +++ b/paddle/fluid/extension/include/tensor.h @@ -16,6 +16,7 @@ limitations under the License. */ #include #include +#include "paddle/fluid/extension/include/dll_decl.h" #include "paddle/fluid/extension/include/dtype.h" #include "paddle/fluid/extension/include/place.h" @@ -23,7 +24,7 @@ namespace paddle { namespace framework { class CustomTensorUtils; } // namespace framework -class Tensor { +class PD_DLL_DECL Tensor { public: /// \brief Construct a Tensor on target Place for CustomOp. /// Generally it's only used for user to create Tensor. diff --git a/paddle/fluid/extension/src/op_meta_info.cc b/paddle/fluid/extension/src/op_meta_info.cc index f31723e5ac8..0273dfd5d07 100644 --- a/paddle/fluid/extension/src/op_meta_info.cc +++ b/paddle/fluid/extension/src/op_meta_info.cc @@ -78,17 +78,17 @@ OpMetaInfoBuilder& OpMetaInfoBuilder::Outputs( return *this; } -OpMetaInfoBuilder& OpMetaInfoBuilder::SetKernelFn(KernelFunc&& func) { +OpMetaInfoBuilder& OpMetaInfoBuilder::SetKernelFn(KernelFunc func) { info_ptr_->SetKernelFn(std::forward(func)); return *this; } -OpMetaInfoBuilder& OpMetaInfoBuilder::SetInferShapeFn(InferShapeFunc&& func) { +OpMetaInfoBuilder& OpMetaInfoBuilder::SetInferShapeFn(InferShapeFunc func) { info_ptr_->SetInferShapeFn(std::forward(func)); return *this; } -OpMetaInfoBuilder& OpMetaInfoBuilder::SetInferDtypeFn(InferDtypeFunc&& func) { +OpMetaInfoBuilder& OpMetaInfoBuilder::SetInferDtypeFn(InferDtypeFunc func) { info_ptr_->SetInferDtypeFn(std::forward(func)); return *this; } @@ -114,10 +114,17 @@ void LoadCustomOperatorLib(const std::string& dso_name) { } } // namespace paddle +#ifdef __cplusplus extern "C" { +#endif +#ifndef _WIN32 +// C-API to get global OpMetaInfoMap. paddle::OpMetaInfoMap& PD_GetOpMetaInfoMap() { return paddle::OpMetaInfoMap::Instance(); } +#endif +#ifdef __cplusplus } // end extern "C" +#endif diff --git a/paddle/fluid/extension/src/tensor.cc b/paddle/fluid/extension/src/tensor.cc index 11d505a5aab..39ed2748641 100644 --- a/paddle/fluid/extension/src/tensor.cc +++ b/paddle/fluid/extension/src/tensor.cc @@ -207,73 +207,87 @@ Tensor Tensor::copy_to(const PlaceType &target_place) const { return target; } -template Tensor Tensor::copy_to( +template PD_DLL_DECL Tensor +Tensor::copy_to(const PlaceType &target_place) const; +template PD_DLL_DECL Tensor Tensor::copy_to( const PlaceType &target_place) const; -template Tensor Tensor::copy_to( +template PD_DLL_DECL Tensor Tensor::copy_to( const PlaceType &target_place) const; -template Tensor Tensor::copy_to( +template PD_DLL_DECL Tensor Tensor::copy_to( const PlaceType &target_place) const; -template Tensor Tensor::copy_to( - const PlaceType &target_place) const; -template Tensor Tensor::copy_to(const PlaceType &target_place) const; -template Tensor Tensor::copy_to(const PlaceType &target_place) const; -template Tensor Tensor::copy_to(const PlaceType &target_place) const; -template Tensor Tensor::copy_to(const PlaceType &target_place) const; -template Tensor Tensor::copy_to(const PlaceType &target_place) const; -template Tensor Tensor::copy_to(const PlaceType &target_place) const; -template Tensor Tensor::copy_to(const PlaceType &target_place) const; -template Tensor Tensor::copy_to(const PlaceType &target_place) const; +template PD_DLL_DECL Tensor +Tensor::copy_to(const PlaceType &target_place) const; +template PD_DLL_DECL Tensor +Tensor::copy_to(const PlaceType &target_place) const; +template PD_DLL_DECL Tensor +Tensor::copy_to(const PlaceType &target_place) const; +template PD_DLL_DECL Tensor +Tensor::copy_to(const PlaceType &target_place) const; +template PD_DLL_DECL Tensor +Tensor::copy_to(const PlaceType &target_place) const; +template PD_DLL_DECL Tensor +Tensor::copy_to(const PlaceType &target_place) const; +template PD_DLL_DECL Tensor +Tensor::copy_to(const PlaceType &target_place) const; +template PD_DLL_DECL Tensor +Tensor::copy_to(const PlaceType &target_place) const; -template float *Tensor::data() const; -template double *Tensor::data() const; -template int64_t *Tensor::data() const; -template int32_t *Tensor::data() const; -template uint8_t *Tensor::data() const; -template int8_t *Tensor::data() const; -template paddle::platform::float16 *Tensor::data() - const; -template paddle::platform::bfloat16 *Tensor::data() - const; -template paddle::platform::complex128 * +template PD_DLL_DECL float *Tensor::data() const; +template PD_DLL_DECL double *Tensor::data() const; +template PD_DLL_DECL int64_t *Tensor::data() const; +template PD_DLL_DECL int32_t *Tensor::data() const; +template PD_DLL_DECL uint8_t *Tensor::data() const; +template PD_DLL_DECL int8_t *Tensor::data() const; +template PD_DLL_DECL paddle::platform::float16 * +Tensor::data() const; +template PD_DLL_DECL paddle::platform::bfloat16 * +Tensor::data() const; +template PD_DLL_DECL paddle::platform::complex128 * Tensor::data() const; -template paddle::platform::complex64 * +template PD_DLL_DECL paddle::platform::complex64 * Tensor::data() const; -template int16_t *Tensor::data() const; -template bool *Tensor::data() const; +template PD_DLL_DECL int16_t *Tensor::data() const; +template PD_DLL_DECL bool *Tensor::data() const; -template float *Tensor::mutable_data(); -template double *Tensor::mutable_data(); -template int64_t *Tensor::mutable_data(); -template int32_t *Tensor::mutable_data(); -template uint8_t *Tensor::mutable_data(); -template int8_t *Tensor::mutable_data(); -template paddle::platform::float16 * +template PD_DLL_DECL float *Tensor::mutable_data(); +template PD_DLL_DECL double *Tensor::mutable_data(); +template PD_DLL_DECL int64_t *Tensor::mutable_data(); +template PD_DLL_DECL int32_t *Tensor::mutable_data(); +template PD_DLL_DECL uint8_t *Tensor::mutable_data(); +template PD_DLL_DECL int8_t *Tensor::mutable_data(); +template PD_DLL_DECL paddle::platform::float16 * Tensor::mutable_data(); -template paddle::platform::bfloat16 * +template PD_DLL_DECL paddle::platform::bfloat16 * Tensor::mutable_data(); -template paddle::platform::complex128 * +template PD_DLL_DECL paddle::platform::complex128 * Tensor::mutable_data(); -template paddle::platform::complex64 * +template PD_DLL_DECL paddle::platform::complex64 * Tensor::mutable_data(); -template int16_t *Tensor::mutable_data(); -template bool *Tensor::mutable_data(); +template PD_DLL_DECL int16_t *Tensor::mutable_data(); +template PD_DLL_DECL bool *Tensor::mutable_data(); -template float *Tensor::mutable_data(const PlaceType &place); -template double *Tensor::mutable_data(const PlaceType &place); -template int64_t *Tensor::mutable_data(const PlaceType &place); -template int32_t *Tensor::mutable_data(const PlaceType &place); -template uint8_t *Tensor::mutable_data(const PlaceType &place); -template int8_t *Tensor::mutable_data(const PlaceType &place); -template paddle::platform::float16 * +template PD_DLL_DECL float *Tensor::mutable_data(const PlaceType &place); +template PD_DLL_DECL double *Tensor::mutable_data( + const PlaceType &place); +template PD_DLL_DECL int64_t *Tensor::mutable_data( + const PlaceType &place); +template PD_DLL_DECL int32_t *Tensor::mutable_data( + const PlaceType &place); +template PD_DLL_DECL uint8_t *Tensor::mutable_data( + const PlaceType &place); +template PD_DLL_DECL int8_t *Tensor::mutable_data( + const PlaceType &place); +template PD_DLL_DECL paddle::platform::float16 * Tensor::mutable_data(const PlaceType &place); -template paddle::platform::bfloat16 * +template PD_DLL_DECL paddle::platform::bfloat16 * Tensor::mutable_data(const PlaceType &place); -template paddle::platform::complex128 * +template PD_DLL_DECL paddle::platform::complex128 * Tensor::mutable_data(const PlaceType &place); -template paddle::platform::complex64 * +template PD_DLL_DECL paddle::platform::complex64 * Tensor::mutable_data(const PlaceType &place); -template int16_t *Tensor::mutable_data(const PlaceType &place); -template bool *Tensor::mutable_data(const PlaceType &place); +template PD_DLL_DECL int16_t *Tensor::mutable_data( + const PlaceType &place); +template PD_DLL_DECL bool *Tensor::mutable_data(const PlaceType &place); std::vector Tensor::shape() const { GET_CASTED_TENSOR diff --git a/paddle/fluid/framework/CMakeLists.txt b/paddle/fluid/framework/CMakeLists.txt index 14179172db2..b037c111865 100644 --- a/paddle/fluid/framework/CMakeLists.txt +++ b/paddle/fluid/framework/CMakeLists.txt @@ -345,9 +345,12 @@ if (LINUX) endif() if (WIN32) + set(FLUID_FRAMEWORK_IMPORT_LIB + ${PADDLE_BINARY_DIR}/paddle/fluid/framework/${CMAKE_BUILD_TYPE}/paddle_framework.lib + CACHE INTERNAL "Fluid framework lib") set(FLUID_FRAMEWORK_SHARED_LIB - ${PADDLE_BINARY_DIR}/paddle/fluid/framework/libpaddle_framework.dll - CACHE INTERNAL "Fluid framework lib") + ${PADDLE_BINARY_DIR}/paddle/fluid/framework/${CMAKE_BUILD_TYPE}/paddle_framework.dll + CACHE INTERNAL "Fluid framework dll") endif() if(APPLE) diff --git a/paddle/fluid/platform/dynload/dynamic_loader.cc b/paddle/fluid/platform/dynload/dynamic_loader.cc index c347d82d1d1..6669d18f75c 100644 --- a/paddle/fluid/platform/dynload/dynamic_loader.cc +++ b/paddle/fluid/platform/dynload/dynamic_loader.cc @@ -416,9 +416,6 @@ void* GetOpDsoHandle(const std::string& dso_name) { #if defined(__APPLE__) || defined(__OSX__) PADDLE_THROW(platform::errors::Unimplemented( "Create custom cpp op outside framework do not support Apple.")); -#elif defined(_WIN32) && defined(PADDLE_WITH_CUDA) - PADDLE_THROW(platform::errors::Unimplemented( - "Create custom cpp op outside framework do not support Windows.")); #else return GetDsoHandleFromSearchPath(FLAGS_op_dir, dso_name); #endif diff --git a/paddle/scripts/paddle_build.bat b/paddle/scripts/paddle_build.bat index eb356b58693..8050e881a48 100644 --- a/paddle/scripts/paddle_build.bat +++ b/paddle/scripts/paddle_build.bat @@ -114,23 +114,24 @@ rem ------pre install python requirement---------- where python where pip pip install wheel --user -pip install -r %work_dir%\python\requirements.txt --user -pip install -r %work_dir%\python\unittest_py\requirements.txt --user +pip install --force-reinstall -r %work_dir%\python\requirements.txt --user +pip install --force-reinstall -r %work_dir%\python\unittest_py\requirements.txt --user if %ERRORLEVEL% NEQ 0 ( echo pip install requirements.txt failed! exit /b 7 ) rem ------pre install clcache and init config---------- -pip install clcache --user +rem pip install clcache --user +pip uninstall -y clcache :: set USE_CLCACHE to enable clcache -set USE_CLCACHE=1 +rem set USE_CLCACHE=1 :: In some scenarios, CLCACHE_HARDLINK can save one file copy. -set CLCACHE_HARDLINK=1 +rem set CLCACHE_HARDLINK=1 :: If it takes more than 1000s to obtain the right to use the cache, an error will be reported -set CLCACHE_OBJECT_CACHE_TIMEOUT_MS=1000000 +rem set CLCACHE_OBJECT_CACHE_TIMEOUT_MS=1000000 :: set maximum cache size to 20G -clcache.exe -M 21474836480 +rem clcache.exe -M 21474836480 rem ------show summary of current environment---------- cmake --version @@ -281,7 +282,7 @@ echo Build third_party successfully! set build_times=1 :build_paddle :: reset clcache zero stats for collect PR's actual hit rate -clcache.exe -z +rem clcache.exe -z echo Build Paddle the %build_times% time: if "%WITH_CLCACHE%"=="OFF" ( @@ -305,7 +306,7 @@ echo 0 > %cache_dir%\error_code.txt type %cache_dir%\error_code.txt :: ci will collect clcache hit rate -goto :collect_clcache_hits +rem goto :collect_clcache_hits goto:eof @@ -346,13 +347,14 @@ set /p PADDLE_WHL_FILE_WIN=< whl_file.txt @ECHO ON pip uninstall -y paddlepaddle pip uninstall -y paddlepaddle-gpu -pip install -U %PADDLE_WHL_FILE_WIN% --user +pip install %PADDLE_WHL_FILE_WIN% --user if %ERRORLEVEL% NEQ 0 ( call paddle_winci\Scripts\deactivate.bat 2>NUL echo pip install whl package failed! exit /b 1 ) + set CUDA_VISIBLE_DEVICES=0 python %work_dir%\paddle\scripts\installation_validate.py goto:eof diff --git a/python/paddle/fluid/tests/CMakeLists.txt b/python/paddle/fluid/tests/CMakeLists.txt index bee49945f00..60be92b892f 100644 --- a/python/paddle/fluid/tests/CMakeLists.txt +++ b/python/paddle/fluid/tests/CMakeLists.txt @@ -9,7 +9,14 @@ endforeach() add_subdirectory(unittests) add_subdirectory(book) -if(NOT APPLE AND NOT WIN32) +# TODO: support New Custom OP on Mac +if(Linux) add_subdirectory(custom_op) endif() + +# Windows CPU machine doesn't have CUDA, can't compile .cu file +# if(WIN32 AND WITH_GPU) +# add_subdirectory(custom_op) +# endif() + set_tests_properties(test_beam_search_decoder PROPERTIES TIMEOUT 120) diff --git a/python/paddle/fluid/tests/custom_op/CMakeLists.txt b/python/paddle/fluid/tests/custom_op/CMakeLists.txt index 9b89e5ceda5..0daf662f551 100644 --- a/python/paddle/fluid/tests/custom_op/CMakeLists.txt +++ b/python/paddle/fluid/tests/custom_op/CMakeLists.txt @@ -1,3 +1,36 @@ +# New custom OP can support Windows/Linux now +# 'test_simple_custom_op_jit/test_simple_custom_op_setup' compile .cc and .cu file +py_test(test_simple_custom_op_setup SRCS test_simple_custom_op_setup.py) +py_test(test_simple_custom_op_jit SRCS test_simple_custom_op_jit.py) + +# Compiling shared library will cost some time, but running process is very fast. +set_tests_properties(test_simple_custom_op_setup PROPERTIES TIMEOUT 250) +set_tests_properties(test_simple_custom_op_jit PROPERTIES TIMEOUT 180) + +py_test(test_sysconfig SRCS test_sysconfig.py) + +# 'test_dispatch' compile .cc file +py_test(test_dispatch SRCS test_dispatch.py) +set_tests_properties(test_dispatch PROPERTIES TIMEOUT 180) + +if(NOT Linux) + return() +endif() + +# TODO(zhouwei): support test_check_abi and abi check on Windows +py_test(test_check_abi SRCS test_check_abi.py) + +# Old custom OP only support Linux, only run on Linux +py_test(test_custom_op SRCS test_custom_op.py) +py_test(test_jit_load SRCS test_jit_load.py) +py_test(test_setup_install SRCS test_setup_install.py) +py_test(test_setup_build SRCS test_setup_build.py) + +set_tests_properties(test_jit_load PROPERTIES TIMEOUT 180) +set_tests_properties(test_setup_install PROPERTIES TIMEOUT 180) +set_tests_properties(test_setup_build PROPERTIES TIMEOUT 180) + + if(WITH_ROCM) hip_library(relu_op_shared SHARED SRCS relu_op.cc relu_op.cu DEPS paddle_framework_shared) elseif(WITH_GPU) @@ -18,19 +51,3 @@ get_target_property(TARGET_LIBRARIES relu_op_shared LINK_LIBRARIES) LIST(REMOVE_ITEM TARGET_LIBRARIES glog) LIST(REMOVE_ITEM TARGET_LIBRARIES gflags) set_property(TARGET relu_op_shared PROPERTY LINK_LIBRARIES ${TARGET_LIBRARIES} ) - -file(GLOB TEST_OPS RELATIVE "${CMAKE_CURRENT_SOURCE_DIR}" "test_*.py") -string(REPLACE ".py" "" TEST_OPS "${TEST_OPS}") - -foreach(src ${TEST_OPS}) - py_test(${src} SRCS ${src}.py) -endforeach() - -# Compiling .so will cost some time, but running process is very fast. -set_tests_properties(test_jit_load PROPERTIES TIMEOUT 180) -set_tests_properties(test_setup_install PROPERTIES TIMEOUT 180) -set_tests_properties(test_setup_build PROPERTIES TIMEOUT 180) -set_tests_properties(test_dispatch PROPERTIES TIMEOUT 180) - -set_tests_properties(test_simple_custom_op_setup PROPERTIES TIMEOUT 250) -set_tests_properties(test_simple_custom_op_jit PROPERTIES TIMEOUT 180) diff --git a/python/paddle/fluid/tests/custom_op/test_dispatch.py b/python/paddle/fluid/tests/custom_op/test_dispatch.py index 1766a6042f3..aaca7333561 100644 --- a/python/paddle/fluid/tests/custom_op/test_dispatch.py +++ b/python/paddle/fluid/tests/custom_op/test_dispatch.py @@ -16,8 +16,18 @@ import os import unittest import paddle import numpy as np -from paddle.utils.cpp_extension import load +from paddle.utils.cpp_extension import load, get_build_directory from utils import paddle_includes, extra_compile_args +from paddle.utils.cpp_extension.extension_utils import run_cmd + +# Because the shared lib already exists in the cache dir, +# it will not be compiled again unless the cache dir is cleared. +if os.name == 'nt': + cmd = 'rmdir {} /s/q'.format(get_build_directory()) +else: + cmd = 'rm -rf {}'.format(get_build_directory()) + +run_cmd(cmd, True) dispatch_op = load( name='dispatch_op', diff --git a/python/paddle/fluid/tests/custom_op/test_simple_custom_op_jit.py b/python/paddle/fluid/tests/custom_op/test_simple_custom_op_jit.py index 2c0dc1a4ca6..2832e8070d1 100644 --- a/python/paddle/fluid/tests/custom_op/test_simple_custom_op_jit.py +++ b/python/paddle/fluid/tests/custom_op/test_simple_custom_op_jit.py @@ -13,13 +13,24 @@ # limitations under the License. import os +import subprocess import unittest import paddle import numpy as np -from paddle.utils.cpp_extension import load +from paddle.utils.cpp_extension import load, get_build_directory +from paddle.utils.cpp_extension.extension_utils import run_cmd from utils import paddle_includes, extra_compile_args from test_simple_custom_op_setup import relu2_dynamic, relu2_static +# Because the shared lib already exists in the cache dir, +# it will not be compiled again unless the cache dir is cleared. +if os.name == 'nt': + cmd = 'rmdir {} /s/q'.format(get_build_directory()) +else: + cmd = 'rm -rf {}'.format(get_build_directory()) + +run_cmd(cmd, True) + # Compile and load custom op Just-In-Time. custom_module = load( name='simple_jit_relu2', diff --git a/python/paddle/fluid/tests/custom_op/test_simple_custom_op_setup.py b/python/paddle/fluid/tests/custom_op/test_simple_custom_op_setup.py index cfa2db0ba24..f312508d393 100644 --- a/python/paddle/fluid/tests/custom_op/test_simple_custom_op_setup.py +++ b/python/paddle/fluid/tests/custom_op/test_simple_custom_op_setup.py @@ -91,7 +91,12 @@ class TestNewCustomOpSetUpInstall(unittest.TestCase): def setUp(self): cur_dir = os.path.dirname(os.path.abspath(__file__)) # compile, install the custom op egg into site-packages under background - cmd = 'cd {} && python setup_install_simple.py install'.format(cur_dir) + if os.name == 'nt': + cmd = 'cd /d {} && python setup_install_simple.py install'.format( + cur_dir) + else: + cmd = 'cd {} && python setup_install_simple.py install'.format( + cur_dir) run_cmd(cmd) # NOTE(Aurelius84): Normally, it's no need to add following codes for users. @@ -99,7 +104,11 @@ class TestNewCustomOpSetUpInstall(unittest.TestCase): # sys.path has been updated. So we update it manually. # See: https://stackoverflow.com/questions/56974185/import-runtime-installed-module-using-pip-in-python-3 - site_dir = site.getsitepackages()[0] + if os.name == 'nt': + # NOTE(zhouwei25): getsitepackages on windows will return a list: [python install dir, site packages dir] + site_dir = site.getsitepackages()[1] + else: + site_dir = site.getsitepackages()[0] custom_egg_path = [ x for x in os.listdir(site_dir) if 'simple_setup_relu2' in x ] diff --git a/python/paddle/fluid/tests/custom_op/utils.py b/python/paddle/fluid/tests/custom_op/utils.py index f293c751942..52b294dc72b 100644 --- a/python/paddle/fluid/tests/custom_op/utils.py +++ b/python/paddle/fluid/tests/custom_op/utils.py @@ -23,8 +23,8 @@ site_packages_path = get_python_lib() # paddle include directory. Because the following path is generated after insalling # PaddlePaddle whl. So here we specific `include_dirs` to avoid errors in CI. paddle_includes = [ - os.path.join(site_packages_path, 'paddle/include'), - os.path.join(site_packages_path, 'paddle/include/third_party') + os.path.join(site_packages_path, 'paddle', 'include'), + os.path.join(site_packages_path, 'paddle', 'include', 'third_party') ] # TODO(Aurelius84): Memory layout is different if build paddle with PADDLE_WITH_MKLDNN=ON, diff --git a/python/paddle/utils/cpp_extension/cpp_extension.py b/python/paddle/utils/cpp_extension/cpp_extension.py index 121c1626125..8c0893b16cf 100644 --- a/python/paddle/utils/cpp_extension/cpp_extension.py +++ b/python/paddle/utils/cpp_extension/cpp_extension.py @@ -17,16 +17,25 @@ import six import sys import textwrap import copy +import re import setuptools from setuptools.command.easy_install import easy_install from setuptools.command.build_ext import build_ext from .extension_utils import find_cuda_home, normalize_extension_kwargs, add_compile_flag, bootstrap_context -from .extension_utils import is_cuda_file, prepare_unix_cflags, add_std_without_repeat, get_build_directory +from .extension_utils import is_cuda_file, prepare_unix_cflags, prepare_win_cflags, add_std_without_repeat, get_build_directory from .extension_utils import _import_module_from_library, CustomOpInfo, _write_setup_file, _jit_compile, parse_op_name_from -from .extension_utils import check_abi_compatibility, log_v, IS_WINDOWS -from .extension_utils import use_new_custom_op_load_method +from .extension_utils import check_abi_compatibility, log_v, IS_WINDOWS, OS_NAME +from .extension_utils import use_new_custom_op_load_method, MSVC_COMPILE_FLAGS + +# Note(zhouwei): On windows, it will export function 'PyInit_[name]' by default, +# The solution is: 1.User add function PyInit_[name] 2. set not to export +# refer to https://stackoverflow.com/questions/34689210/error-exporting-symbol-when-building-python-c-extension-in-windows +if IS_WINDOWS and six.PY3: + from distutils.command.build_ext import build_ext as _du_build_ext + from unittest.mock import Mock + _du_build_ext.get_export_symbols = Mock(return_value=None) CUDA_HOME = find_cuda_home() @@ -112,7 +121,7 @@ def CppExtension(sources, *args, **kwargs): sources(list[str]): The C++/CUDA source file names args(list[options]): list of config options used to compile shared library kwargs(dict[option]): dict of config options used to compile shared library - + Returns: Extension: An instance of setuptools.Extension """ @@ -137,7 +146,7 @@ def CUDAExtension(sources, *args, **kwargs): sources(list[str]): The C++/CUDA source file names args(list[options]): list of config options used to compile shared library kwargs(dict[option]): dict of config options used to compile shared library - + Returns: Extension: An instance of setuptools.Extension """ @@ -191,12 +200,12 @@ class BuildExtension(build_ext, object): def __init__(self, *args, **kwargs): """ Attributes is initialized with following oreder: - + 1. super(self).__init__() 2. initialize_options(self) 3. the reset of current __init__() 4. finalize_options(self) - + So, it is recommended to set attribute value in `finalize_options`. """ super(BuildExtension, self).__init__(*args, **kwargs) @@ -225,15 +234,17 @@ class BuildExtension(build_ext, object): for compiler in ['cxx', 'nvcc']: if compiler not in extension.extra_compile_args: extension.extra_compile_args[compiler] = [] - # add determine compile flags - add_compile_flag(extension, '-std=c++11') # Consider .cu, .cu.cc as valid source extensions. self.compiler.src_extensions += ['.cu', '.cu.cc'] # Save the original _compile method for later. - if self.compiler.compiler_type == 'msvc' or IS_WINDOWS: - raise NotImplementedError("Not support on MSVC currently.") + if self.compiler.compiler_type == 'msvc': + self.compiler._cpp_extensions += ['.cu', '.cuh'] + original_compile = self.compiler.compile + original_spawn = self.compiler.spawn else: + # add determine compile flags + add_compile_flag(extension, '-std=c++11') original_compile = self.compiler._compile def unix_custom_single_compiler(obj, src, ext, cc_args, extra_postargs, @@ -268,6 +279,81 @@ class BuildExtension(build_ext, object): # restore original_compiler self.compiler.compiler_so = original_compiler + def win_custom_single_compiler(sources, + output_dir=None, + macros=None, + include_dirs=None, + debug=0, + extra_preargs=None, + extra_postargs=None, + depends=None): + + self.cflags = copy.deepcopy(extra_postargs) + extra_postargs = None + + def win_custom_spawn(cmd): + # Using regex to modify compile options + compile_options = self.compiler.compile_options + for i in range(len(cmd)): + if re.search('/MD', cmd[i]) is not None: + cmd[i] = '/MT' + if re.search('/W[1-4]', cmd[i]) is not None: + cmd[i] = '/W0' + + # Using regex to match src, obj and include files + src_regex = re.compile('/T(p|c)(.*)') + src_list = [ + m.group(2) for m in (src_regex.match(elem) for elem in cmd) + if m + ] + + obj_regex = re.compile('/Fo(.*)') + obj_list = [ + m.group(1) for m in (obj_regex.match(elem) for elem in cmd) + if m + ] + + include_regex = re.compile(r'((\-|\/)I.*)') + include_list = [ + m.group(1) + for m in (include_regex.match(elem) for elem in cmd) if m + ] + + assert len(src_list) == 1 and len(obj_list) == 1 + src = src_list[0] + obj = obj_list[0] + if is_cuda_file(src): + assert CUDA_HOME is not None + nvcc_cmd = os.path.join(CUDA_HOME, 'bin', 'nvcc') + if isinstance(self.cflags, dict): + cflags = self.cflags['nvcc'] + elif isinstance(self.cflags, list): + cflags = self.cflags + else: + cflags = [] + + cflags = prepare_win_cflags(cflags) + ['--use-local-env'] + for flag in MSVC_COMPILE_FLAGS: + cflags = ['-Xcompiler', flag] + cflags + cmd = [nvcc_cmd, '-c', src, '-o', obj + ] + include_list + cflags + elif isinstance(self.cflags, dict): + cflags = MSVC_COMPILE_FLAGS + self.cflags['cxx'] + cmd += cflags + elif isinstance(self.cflags, list): + cflags = MSVC_COMPILE_FLAGS + self.cflags + cmd += cflags + + return original_spawn(cmd) + + try: + self.compiler.spawn = win_custom_spawn + return original_compile(sources, output_dir, macros, + include_dirs, debug, extra_preargs, + extra_postargs, depends) + finally: + self.compiler.spawn = original_spawn + def object_filenames_with_cuda(origina_func, build_directory): """ Decorated the function to add customized naming machanism. @@ -280,10 +366,13 @@ class BuildExtension(build_ext, object): objects = origina_func(source_filenames, strip_dir, output_dir) for i, source in enumerate(source_filenames): - # modify xx.o -> xx.cu.o + # modify xx.o -> xx.cu.o/xx.cu.obj if is_cuda_file(source): old_obj = objects[i] - objects[i] = old_obj[:-1] + 'cu.o' + if self.compiler.compiler_type == 'msvc': + objects[i] = old_obj[:-3] + 'cu.obj' + else: + objects[i] = old_obj[:-1] + 'cu.o' # if user set build_directory, output objects there. if build_directory is not None: objects = [ @@ -300,10 +389,13 @@ class BuildExtension(build_ext, object): return wrapper # customized compile process - self.compiler._compile = unix_custom_single_compiler + if self.compiler.compiler_type == 'msvc': + self.compiler.compile = win_custom_single_compiler + else: + self.compiler._compile = unix_custom_single_compiler + self.compiler.object_filenames = object_filenames_with_cuda( self.compiler.object_filenames, self.build_lib) - self._record_op_info() print("Compiling user custom op, it will cost a few seconds.....") @@ -333,15 +425,21 @@ class BuildExtension(build_ext, object): compiler = self.compiler.compiler_cxx[0] elif IS_WINDOWS: compiler = os.environ.get('CXX', 'cl') - raise NotImplementedError("We don't support Windows Currently.") else: compiler = os.environ.get('CXX', 'c++') check_abi_compatibility(compiler) + # Warn user if VC env is activated but `DISTUILS_USE_SDK` is not set. + if IS_WINDOWS and 'VSCMD_ARG_TGT_ARCH' in os.environ and 'DISTUTILS_USE_SDK' not in os.environ: + msg = ( + 'It seems that the VC environment is activated but DISTUTILS_USE_SDK is not set.' + 'This may lead to multiple activations of the VC env.' + 'Please set `DISTUTILS_USE_SDK=1` and try again.') + raise UserWarning(msg) def _record_op_info(self): """ - Record custum op inforomation. + Record custum op inforomation. """ # parse shared library abs path outputs = self.get_outputs() @@ -380,7 +478,13 @@ class EasyInstallCommand(easy_install, object): # .so shared library to another name. for egg_file in self.outputs: filename, ext = os.path.splitext(egg_file) - if ext == '.so': + will_rename = False + if OS_NAME.startswith('linux') and ext == '.so': + will_rename = True + elif IS_WINDOWS and ext == '.pyd': + will_rename = True + + if will_rename: new_so_path = filename + "_pd_" + ext if not os.path.exists(new_so_path): os.rename(r'%s' % egg_file, r'%s' % new_so_path) @@ -425,7 +529,7 @@ def load(name, extra_include_paths(list[str]): additional include path used to search header files. Default None. build_directory(str): specific directory path to put shared library file. If set None, - it will use `PADDLE_EXTENSION_DIR` from os.environ. Use + it will use `PADDLE_EXTENSION_DIR` from os.environ. Use `paddle.utils.cpp_extension.get_build_directory()` to see the location. interpreter(str): alias or full interpreter path to specific which one to use if have installed multiple. If set None, will use `python` as default interpreter. @@ -448,6 +552,10 @@ def load(name, # ensure to use abs path build_directory = os.path.abspath(build_directory) + # Will load shared library from 'path' on windows + if IS_WINDOWS: + os.environ['path'] = build_directory + ';' + os.environ['path'] + log_v("build_directory: {}".format(build_directory), verbose) file_path = os.path.join(build_directory, "setup.py") @@ -460,7 +568,7 @@ def load(name, log_v("additonal compile_flags: [{}]".format(' '.join(compile_flags)), verbose) - # write setup.py file and compile it + # write setup.py file and compile it _write_setup_file(name, sources, file_path, extra_include_paths, compile_flags, extra_ldflags, verbose) _jit_compile(file_path, interpreter, verbose) diff --git a/python/paddle/utils/cpp_extension/extension_utils.py b/python/paddle/utils/cpp_extension/extension_utils.py index 52c17d77bd4..f4a801fe3ec 100644 --- a/python/paddle/utils/cpp_extension/extension_utils.py +++ b/python/paddle/utils/cpp_extension/extension_utils.py @@ -38,9 +38,19 @@ logger = logging.getLogger("utils.cpp_extension") OS_NAME = sys.platform IS_WINDOWS = OS_NAME.startswith('win') -NVCC_COMPILE_FLAGS = [ - '-ccbin', 'cc', '-DPADDLE_WITH_CUDA', '-DEIGEN_USE_GPU', '-DPADDLE_USE_DSO', - '-Xcompiler', '-fPIC', '-w', '--expt-relaxed-constexpr', '-O3', '-DNVCC' + +MSVC_COMPILE_FLAGS = [ + '/MT', '/wd4819', '/wd4251', '/wd4244', '/wd4267', '/wd4275', '/wd4018', + '/wd4190', '/EHsc', '/w', '/DPADDLE_WITH_CUDA', '/DEIGEN_USE_GPU', + '/DNDEBUG' +] + +MSVC_LINK_FLAGS = [ + '/MACHINE:X64', 'paddle_framework.lib', 'cudadevrt.lib', 'cudart_static.lib' +] + +COMMON_NVCC_FLAGS = [ + '-DPADDLE_WITH_CUDA', '-DEIGEN_USE_GPU', '-DPADDLE_USE_DSO', '-O3' ] GCC_MINI_VERSION = (5, 4, 0) @@ -81,8 +91,8 @@ information USING_NEW_CUSTOM_OP_LOAD_METHOD = True -# NOTE(chenweihang): In order to be compatible with -# the two custom op define method, after removing +# NOTE(chenweihang): In order to be compatible with +# the two custom op define method, after removing # old method, we can remove them together def use_new_custom_op_load_method(*args): global USING_NEW_CUSTOM_OP_LOAD_METHOD @@ -210,7 +220,21 @@ def prepare_unix_cflags(cflags): """ Prepare all necessary compiled flags for nvcc compiling CUDA files. """ - cflags = NVCC_COMPILE_FLAGS + cflags + get_cuda_arch_flags(cflags) + cflags = COMMON_NVCC_FLAGS + [ + '-ccbin', 'cc', '-Xcompiler', '-fPIC', '-w', '--expt-relaxed-constexpr', + '-DNVCC' + ] + cflags + get_cuda_arch_flags(cflags) + + return cflags + + +def prepare_win_cflags(cflags): + """ + Prepare all necessary compiled flags for nvcc compiling CUDA files. + """ + cflags = COMMON_NVCC_FLAGS + [ + '-DGOOGLE_GLOG_DLL_DECL', '-DBOOST_HAS_STATIC_ASSERT', '-w' + ] + cflags + get_cuda_arch_flags(cflags) return cflags @@ -238,7 +262,7 @@ def get_cuda_arch_flags(cflags): def normalize_extension_kwargs(kwargs, use_cuda=False): - """ + """ Normalize include_dirs, library_dir and other attributes in kwargs. """ assert isinstance(kwargs, dict) @@ -252,52 +276,36 @@ def normalize_extension_kwargs(kwargs, use_cuda=False): library_dirs.extend(find_paddle_libraries(use_cuda)) kwargs['library_dirs'] = library_dirs - # add runtime library dirs - runtime_library_dirs = kwargs.get('runtime_library_dirs', []) - runtime_library_dirs.extend(find_paddle_libraries(use_cuda)) - kwargs['runtime_library_dirs'] = runtime_library_dirs + if IS_WINDOWS: + # TODO(zhouwei): may append compile flags in future + pass + # append link flags + extra_link_args = kwargs.get('extra_link_args', []) + extra_link_args.extend(MSVC_LINK_FLAGS) + kwargs['extra_link_args'] = extra_link_args + else: + # append compile flags + extra_compile_args = kwargs.get('extra_compile_args', []) + extra_compile_args.extend(['-g', '-w']) # diable warnings + kwargs['extra_compile_args'] = extra_compile_args - # append compile flags - extra_compile_args = kwargs.get('extra_compile_args', []) - extra_compile_args.extend(['-g', '-w']) # diable warnings - kwargs['extra_compile_args'] = extra_compile_args + # append link flags + extra_link_args = kwargs.get('extra_link_args', []) + extra_link_args.append('-lpaddle_framework') + if use_cuda: + extra_link_args.append('-lcudart') - # append link flags - extra_link_args = kwargs.get('extra_link_args', []) - extra_link_args.append('-lpaddle_framework') - if use_cuda: - extra_link_args.append('-lcudart') + kwargs['extra_link_args'] = extra_link_args - kwargs['extra_link_args'] = extra_link_args + # add runtime library dirs + runtime_library_dirs = kwargs.get('runtime_library_dirs', []) + runtime_library_dirs.extend(find_paddle_libraries(use_cuda)) + kwargs['runtime_library_dirs'] = runtime_library_dirs kwargs['language'] = 'c++' return kwargs -def find_paddle_includes(use_cuda=False): - """ - Return Paddle necessary include dir path. - """ - # pythonXX/site-packages/paddle/include - paddle_include_dir = get_include() - third_party_dir = os.path.join(paddle_include_dir, 'third_party') - - include_dirs = [paddle_include_dir, third_party_dir] - - return include_dirs - - -def find_cuda_includes(): - - cuda_home = find_cuda_home() - if cuda_home is None: - raise ValueError( - "Not found CUDA runtime, please use `export CUDA_HOME=XXX` to specific it." - ) - - return [os.path.join(cuda_home, 'lib64')] - - def find_cuda_home(): """ Use heuristic method to find cuda path @@ -315,19 +323,22 @@ def find_cuda_home(): if six.PY3: nvcc_path = nvcc_path.decode() nvcc_path = nvcc_path.rstrip('\r\n') + log_v(nvcc_path) # for example: /usr/local/cuda/bin/nvcc cuda_home = os.path.dirname(os.path.dirname(nvcc_path)) except: if IS_WINDOWS: # search from default NVIDIA GPU path candidate_paths = glob.glob( - 'C:/Program Files/NVIDIA GPU Computing Toolkit/CUDA/v*.*') + 'C:\\Program Files\\NVIDIA GPU Computing Toolkit\\CUDA\\v*.*' + ) if len(candidate_paths) > 0: cuda_home = candidate_paths[0] else: cuda_home = "/usr/local/cuda" # step 3. check whether path is valid - if not os.path.exists(cuda_home) and core.is_compiled_with_cuda(): + if cuda_home and not os.path.exists( + cuda_home) and core.is_compiled_with_cuda(): cuda_home = None warnings.warn( "Not found CUDA runtime, please use `export CUDA_HOME= XXX` to specific it." @@ -336,15 +347,65 @@ def find_cuda_home(): return cuda_home +def find_cuda_includes(): + """ + Use heuristic method to find cuda include path + """ + cuda_home = find_cuda_home() + if cuda_home is None: + raise ValueError( + "Not found CUDA runtime, please use `export CUDA_HOME=XXX` to specific it." + ) + + return [os.path.join(cuda_home, 'include')] + + +def find_paddle_includes(use_cuda=False): + """ + Return Paddle necessary include dir path. + """ + # pythonXX/site-packages/paddle/include + paddle_include_dir = get_include() + third_party_dir = os.path.join(paddle_include_dir, 'third_party') + include_dirs = [paddle_include_dir, third_party_dir] + + #TODO(zhouwei): because eigen need cuda_runtime.h + #So, extend cuda_include_dir always + cuda_include_dir = find_cuda_includes() + include_dirs.extend(cuda_include_dir) + + return include_dirs + + +def find_cuda_libraries(): + """ + Use heuristic method to find cuda static lib path + """ + cuda_home = find_cuda_home() + if cuda_home is None: + raise ValueError( + "Not found CUDA runtime, please use `export CUDA_HOME=XXX` to specific it." + ) + if IS_WINDOWS: + cuda_lib_dir = [os.path.join(cuda_home, 'lib', 'x64')] + else: + cuda_lib_dir = [os.path.join(cuda_home, 'lib64')] + + return cuda_lib_dir + + def find_paddle_libraries(use_cuda=False): """ Return Paddle necessary library dir path. """ # pythonXX/site-packages/paddle/libs paddle_lib_dirs = [get_lib()] - if use_cuda: - cuda_dirs = find_cuda_includes() - paddle_lib_dirs.extend(cuda_dirs) + + #TODO(zhouwei): because eigen need cuda_runtime.h + #So, extend cuda_lib_dir always + cuda_lib_dir = find_cuda_libraries() + paddle_lib_dirs.extend(cuda_lib_dir) + return paddle_lib_dirs @@ -374,12 +435,14 @@ def get_build_directory(verbose=False): root_extensions_directory = os.environ.get('PADDLE_EXTENSION_DIR') if root_extensions_directory is None: dir_name = "paddle_extensions" - if OS_NAME.startswith('linux'): - root_extensions_directory = os.path.join( - os.path.expanduser('~/.cache'), dir_name) - else: - # TODO(Aurelius84): consider wind32/macOs - raise NotImplementedError("Only support Linux now.") + root_extensions_directory = os.path.join( + os.path.expanduser('~/.cache'), dir_name) + if IS_WINDOWS: + root_extensions_directory = os.path.normpath( + root_extensions_directory) + elif OS_NAME.startswith('darwin'): + # TODO(Aurelius84): consider macOs + raise NotImplementedError("Not support Mac now.") log_v("$PADDLE_EXTENSION_DIR is not set, using path: {} by default.". format(root_extensions_directory), verbose) @@ -410,10 +473,13 @@ def parse_op_info(op_name): def _import_module_from_library(module_name, build_directory, verbose=False): """ - Load .so shared library and import it as callable python module. + Load shared library and import it as callable python module. """ - # TODO(Aurelius84): Consider file suffix is .dll on Windows Platform. - ext_path = os.path.join(build_directory, module_name + '.so') + if IS_WINDOWS: + dynamic_suffix = '.pyd' + else: + dynamic_suffix = '.so' + ext_path = os.path.join(build_directory, module_name + dynamic_suffix) if not os.path.exists(ext_path): raise FileNotFoundError("Extension path: {} does not exist.".format( ext_path)) @@ -565,12 +631,12 @@ def _write_setup_file(name, def list2str(args): """ - Convert list[str] into string. For example: [x, y] -> "['x', 'y']" + Convert list[str] into string. For example: ['x', 'y'] -> "['x', 'y']" """ if args is None: return '[]' assert isinstance(args, (list, tuple)) - args = ["'{}'".format(arg) for arg in args] - return '[' + ','.join(args) + ']' + args = ["{}".format(arg) for arg in args] + return repr(args) def _jit_compile(file_path, interpreter=None, verbose=False): @@ -583,7 +649,8 @@ def _jit_compile(file_path, interpreter=None, verbose=False): if interpreter is None: interpreter = 'python' try: - py_path = subprocess.check_output(['which', interpreter]) + which = 'where' if IS_WINDOWS else 'which' + py_path = subprocess.check_output([which, interpreter]) py_version = subprocess.check_output([interpreter, '-V']) if six.PY3: py_path = py_path.decode() @@ -596,8 +663,13 @@ def _jit_compile(file_path, interpreter=None, verbose=False): 'Failed to check Python interpreter with `{}`, errors: {}'.format( interpreter, error)) - compile_cmd = 'cd {} && {} {} build'.format(ext_dir, interpreter, - setup_file) + if IS_WINDOWS: + compile_cmd = 'cd /d {} && {} {} build'.format(ext_dir, interpreter, + setup_file) + else: + compile_cmd = 'cd {} && {} {} build'.format(ext_dir, interpreter, + setup_file) + print("Compiling user custom op, it will cost a few seconds.....") run_cmd(compile_cmd, verbose) @@ -682,7 +754,7 @@ def check_abi_compatibility(compiler, verbose=False): try: if OS_NAME.startswith('linux'): version_info = subprocess.check_output( - [compiler, '-dumpfullversion']) + [compiler, '-dumpfullversion', '-dumpversion']) if six.PY3: version_info = version_info.decode() version = version_info.strip().split('.') @@ -694,8 +766,8 @@ def check_abi_compatibility(compiler, verbose=False): warnings.warn( ABI_INCOMPATIBILITY_WARNING.format( user_compiler=compiler, version=version_info.strip())) - # TODO(Aurelius84): check version compatibility on windows elif IS_WINDOWS: + # TODO(zhouwei): support check abi compatibility on windows warnings.warn("We don't support Windows now.") except Exception: _, error, _ = sys.exc_info() @@ -714,7 +786,7 @@ def _expected_compiler_current_platform(): return expect_compilers -def log_v(info, verbose): +def log_v(info, verbose=True): """ Print log information on stdout. """ diff --git a/python/requirements.txt b/python/requirements.txt index 77232f4fd71..e89b3ede94f 100644 --- a/python/requirements.txt +++ b/python/requirements.txt @@ -3,7 +3,8 @@ numpy>=1.13, <=1.16.4 ; python_version<"3.5" numpy>=1.13 ; python_version>="3.5" and platform_system != "Windows" numpy>=1.13, <=1.19.3 ; python_version>="3.5" and platform_system == "Windows" protobuf>=3.1.0 -gast>=0.3.3 +gast>=0.3.3 ; platform_system != "Windows" +gast==0.3.3 ; platform_system == "Windows" Pillow six decorator diff --git a/python/setup.py.in b/python/setup.py.in index d5c098aa9e3..43a74d191d8 100644 --- a/python/setup.py.in +++ b/python/setup.py.in @@ -335,11 +335,16 @@ if '${WITH_XPU_BKCL}' == 'ON': shutil.copy('${XPU_BKCL_LIB}', libs_path) package_data['paddle.libs']+=['${XPU_BKCL_LIB_NAME}'] -# copy libfuild_framework.so to libs -if os.name != 'nt' and sys.platform != 'darwin': - paddle_framework_lib='${FLUID_FRAMEWORK_SHARED_LIB}' - shutil.copy(paddle_framework_lib, libs_path) - package_data['paddle.libs'] += [('libpaddle_framework' if os.name != 'nt' else 'paddle_framework') + ext_name] +# copy libpaddle_framework.so to libs on linux +if sys.platform.startswith('linux'): + shutil.copy('${FLUID_FRAMEWORK_SHARED_LIB}', libs_path) + package_data['paddle.libs'] += ['libpaddle_framework.so'] + +# copy paddle_framework.lib/paddle_framework.dll to libs on windows +if os.name == 'nt': + shutil.copy('${FLUID_FRAMEWORK_IMPORT_LIB}', libs_path) + shutil.copy('${FLUID_FRAMEWORK_SHARED_LIB}', libs_path) + package_data['paddle.libs'] += ['paddle_framework.lib', 'paddle_framework.dll'] # remove unused paddle/libs/__init__.py if os.path.isfile(libs_path+'/__init__.py'): @@ -410,9 +415,9 @@ if '${WITH_GPU}' == 'ON' or '${WITH_ROCM}' == 'ON': class InstallCommand(InstallCommandBase): def finalize_options(self): ret = InstallCommandBase.finalize_options(self) - self.install_headers = os.path.join(self.install_purelib, 'paddle', - 'include') self.install_lib = self.install_platlib + self.install_headers = os.path.join(self.install_platlib, 'paddle', + 'include') return ret @@ -463,11 +468,6 @@ class InstallHeaders(Command): return self.copy_file(header, install_dir) def run(self): - # only copy third_party/cudaErrorMessage.pb for cudaErrorMessage on mac or windows - if os.name == 'nt' or sys.platform == 'darwin': - if '${WITH_GPU}' == 'ON' or '${WITH_ROCM}' == 'ON': - self.mkdir_and_copy_file('${cudaerror_INCLUDE_DIR}/cudaErrorMessage.pb') - return hdrs = self.distribution.headers if not hdrs: return -- GitLab