You need to sign in or sign up before continuing.
未验证 提交 adaec007 编写于 作者: Z Zhou Wei 提交者: GitHub

[2.0Custom OP]Support New Custom OP on Windows (#31063)

* [2.0.1]Support New Custom OP on windows

* fix CI

* fix code style

* fix CI

* fix CI

* fix coverage

* fix CI

* fix CI
上级 2168f08a
......@@ -335,6 +335,8 @@ set(PADDLE_PYTHON_BUILD_DIR "${CMAKE_CURRENT_BINARY_DIR}/python/build")
set(CMAKE_CXX_FLAGS_RELWITHDEBINFO "-O3 -g -DNDEBUG")
set(CMAKE_C_FLAGS_RELWITHDEBINFO "-O3 -g -DNDEBUG")
add_definitions(-DPADDLE_DLL_EXPORT)
if(ON_INFER)
# you can trun off the paddle fluid and inference lib by set ON_INFER=OFF
message(STATUS "On inference mode, will take place some specific optimization.")
......
......@@ -18,6 +18,12 @@ limitations under the License. */
#error C++11 or later compatible compiler is required to use Paddle.
#endif
#ifdef _WIN32
#ifndef NOMINMAX
#define NOMINMAX // msvc max/min macro conflict with std::min/max
#endif
#endif
#include "paddle/fluid/extension/include/dispatch.h"
#include "paddle/fluid/extension/include/dtype.h"
#include "paddle/fluid/extension/include/op_meta_info.h"
......
// Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#pragma once
#if defined(_WIN32)
#ifndef PD_DLL_DECL
#ifdef PADDLE_DLL_EXPORT
#define PD_DLL_DECL __declspec(dllexport)
#else
#define PD_DLL_DECL __declspec(dllimport)
#endif // PADDLE_DLL_EXPORT
#endif // PD_DLL_DECL
#else
#define PD_DLL_DECL
#endif // _WIN32
......@@ -14,12 +14,14 @@ limitations under the License. */
#pragma once
#include <iostream>
#include <string>
#include <unordered_map>
#include <vector>
#include <boost/any.hpp>
#include "paddle/fluid/extension/include/dll_decl.h"
#include "paddle/fluid/extension/include/tensor.h"
/**
......@@ -31,7 +33,7 @@ limitations under the License. */
namespace paddle {
namespace framework {
class OpMetaInfoHelper;
class PD_DLL_DECL OpMetaInfoHelper;
} // namespace framework
using Tensor = paddle::Tensor;
......@@ -43,6 +45,26 @@ using Tensor = paddle::Tensor;
classname& operator=(const classname&) = delete; \
classname& operator=(classname&&) = delete
#if defined _WIN32
#define HANDLE_THE_ERROR try {
#define END_HANDLE_THE_ERROR \
} \
catch (const std::exception& e) { \
std::cerr << e.what() << std::endl; \
throw e; \
}
#else
#define HANDLE_THE_ERROR
#define END_HANDLE_THE_ERROR
#endif
#define PD_THROW(err_msg) \
do { \
HANDLE_THE_ERROR \
throw std::runtime_error(err_msg); \
END_HANDLE_THE_ERROR \
} while (0)
///////////////// Util Define and Function ////////////////
inline std::string Grad(const std::string& var_name) {
......@@ -106,7 +128,7 @@ struct KernelFuncImpl<Return (*)(Args...), impl_fn> {
attr_idx + 1>(
inputs, attrs, pargs..., arg);
} catch (boost::bad_any_cast&) {
throw std::runtime_error(
PD_THROW(
"Attribute cast error in custom operator. Expected int value.");
}
}
......@@ -220,7 +242,7 @@ struct InferDtypeFuncImpl<Return (*)(Args...), impl_fn> {
////////////////////// Op Meta Info //////////////////////
class OpMetaInfo {
class PD_DLL_DECL OpMetaInfo {
public:
explicit OpMetaInfo(const std::string& op_name) : name_(op_name) {}
OpMetaInfo& Inputs(std::vector<std::string>&& inputs);
......@@ -246,7 +268,7 @@ class OpMetaInfo {
//////////////// Op Meta Info Map /////////////////
class OpMetaInfoMap {
class PD_DLL_DECL OpMetaInfoMap {
public:
// this function's impl should keep in header file.
// if move to cc file, meta info can not be added
......@@ -270,14 +292,14 @@ class OpMetaInfoMap {
//////////////// Op Meta Info Builder /////////////////
class OpMetaInfoBuilder {
class PD_DLL_DECL OpMetaInfoBuilder {
public:
explicit OpMetaInfoBuilder(std::string&& name);
OpMetaInfoBuilder& Inputs(std::vector<std::string>&& inputs);
OpMetaInfoBuilder& Outputs(std::vector<std::string>&& outputs);
OpMetaInfoBuilder& SetKernelFn(KernelFunc&& func);
OpMetaInfoBuilder& SetInferShapeFn(InferShapeFunc&& func);
OpMetaInfoBuilder& SetInferDtypeFn(InferDtypeFunc&& func);
OpMetaInfoBuilder& SetKernelFn(KernelFunc func);
OpMetaInfoBuilder& SetInferShapeFn(InferShapeFunc func);
OpMetaInfoBuilder& SetInferDtypeFn(InferDtypeFunc func);
OpMetaInfoBuilder& SetBackwardOp(const std::string& bwd_op_name);
private:
......@@ -317,8 +339,12 @@ void LoadCustomOperatorLib(const std::string& dso_name);
extern "C" {
#endif
#if defined(_WIN32)
// C-API to get global OpMetaInfoMap.
paddle::OpMetaInfoMap& PD_GetOpMetaInfoMap();
__declspec(dllexport) inline paddle::OpMetaInfoMap& PD_GetOpMetaInfoMap() {
return paddle::OpMetaInfoMap::Instance();
}
#endif // _WIN32
#ifdef __cplusplus
}
......
......@@ -16,6 +16,7 @@ limitations under the License. */
#include <memory>
#include <vector>
#include "paddle/fluid/extension/include/dll_decl.h"
#include "paddle/fluid/extension/include/dtype.h"
#include "paddle/fluid/extension/include/place.h"
......@@ -23,7 +24,7 @@ namespace paddle {
namespace framework {
class CustomTensorUtils;
} // namespace framework
class Tensor {
class PD_DLL_DECL Tensor {
public:
/// \brief Construct a Tensor on target Place for CustomOp.
/// Generally it's only used for user to create Tensor.
......
......@@ -78,17 +78,17 @@ OpMetaInfoBuilder& OpMetaInfoBuilder::Outputs(
return *this;
}
OpMetaInfoBuilder& OpMetaInfoBuilder::SetKernelFn(KernelFunc&& func) {
OpMetaInfoBuilder& OpMetaInfoBuilder::SetKernelFn(KernelFunc func) {
info_ptr_->SetKernelFn(std::forward<KernelFunc>(func));
return *this;
}
OpMetaInfoBuilder& OpMetaInfoBuilder::SetInferShapeFn(InferShapeFunc&& func) {
OpMetaInfoBuilder& OpMetaInfoBuilder::SetInferShapeFn(InferShapeFunc func) {
info_ptr_->SetInferShapeFn(std::forward<InferShapeFunc>(func));
return *this;
}
OpMetaInfoBuilder& OpMetaInfoBuilder::SetInferDtypeFn(InferDtypeFunc&& func) {
OpMetaInfoBuilder& OpMetaInfoBuilder::SetInferDtypeFn(InferDtypeFunc func) {
info_ptr_->SetInferDtypeFn(std::forward<InferDtypeFunc>(func));
return *this;
}
......@@ -114,10 +114,17 @@ void LoadCustomOperatorLib(const std::string& dso_name) {
}
} // namespace paddle
#ifdef __cplusplus
extern "C" {
#endif
#ifndef _WIN32
// C-API to get global OpMetaInfoMap.
paddle::OpMetaInfoMap& PD_GetOpMetaInfoMap() {
return paddle::OpMetaInfoMap::Instance();
}
#endif
#ifdef __cplusplus
} // end extern "C"
#endif
......@@ -207,73 +207,87 @@ Tensor Tensor::copy_to(const PlaceType &target_place) const {
return target;
}
template Tensor Tensor::copy_to<paddle::platform::float16>(
template PD_DLL_DECL Tensor
Tensor::copy_to<paddle::platform::float16>(const PlaceType &target_place) const;
template PD_DLL_DECL Tensor Tensor::copy_to<paddle::platform::bfloat16>(
const PlaceType &target_place) const;
template Tensor Tensor::copy_to<paddle::platform::bfloat16>(
template PD_DLL_DECL Tensor Tensor::copy_to<paddle::platform::complex64>(
const PlaceType &target_place) const;
template Tensor Tensor::copy_to<paddle::platform::complex64>(
template PD_DLL_DECL Tensor Tensor::copy_to<paddle::platform::complex128>(
const PlaceType &target_place) const;
template Tensor Tensor::copy_to<paddle::platform::complex128>(
const PlaceType &target_place) const;
template Tensor Tensor::copy_to<float>(const PlaceType &target_place) const;
template Tensor Tensor::copy_to<double>(const PlaceType &target_place) const;
template Tensor Tensor::copy_to<int64_t>(const PlaceType &target_place) const;
template Tensor Tensor::copy_to<int32_t>(const PlaceType &target_place) const;
template Tensor Tensor::copy_to<uint8_t>(const PlaceType &target_place) const;
template Tensor Tensor::copy_to<int8_t>(const PlaceType &target_place) const;
template Tensor Tensor::copy_to<int16_t>(const PlaceType &target_place) const;
template Tensor Tensor::copy_to<bool>(const PlaceType &target_place) const;
template PD_DLL_DECL Tensor
Tensor::copy_to<float>(const PlaceType &target_place) const;
template PD_DLL_DECL Tensor
Tensor::copy_to<double>(const PlaceType &target_place) const;
template PD_DLL_DECL Tensor
Tensor::copy_to<int64_t>(const PlaceType &target_place) const;
template PD_DLL_DECL Tensor
Tensor::copy_to<int32_t>(const PlaceType &target_place) const;
template PD_DLL_DECL Tensor
Tensor::copy_to<uint8_t>(const PlaceType &target_place) const;
template PD_DLL_DECL Tensor
Tensor::copy_to<int8_t>(const PlaceType &target_place) const;
template PD_DLL_DECL Tensor
Tensor::copy_to<int16_t>(const PlaceType &target_place) const;
template PD_DLL_DECL Tensor
Tensor::copy_to<bool>(const PlaceType &target_place) const;
template float *Tensor::data<float>() const;
template double *Tensor::data<double>() const;
template int64_t *Tensor::data<int64_t>() const;
template int32_t *Tensor::data<int32_t>() const;
template uint8_t *Tensor::data<uint8_t>() const;
template int8_t *Tensor::data<int8_t>() const;
template paddle::platform::float16 *Tensor::data<paddle::platform::float16>()
const;
template paddle::platform::bfloat16 *Tensor::data<paddle::platform::bfloat16>()
const;
template paddle::platform::complex128 *
template PD_DLL_DECL float *Tensor::data<float>() const;
template PD_DLL_DECL double *Tensor::data<double>() const;
template PD_DLL_DECL int64_t *Tensor::data<int64_t>() const;
template PD_DLL_DECL int32_t *Tensor::data<int32_t>() const;
template PD_DLL_DECL uint8_t *Tensor::data<uint8_t>() const;
template PD_DLL_DECL int8_t *Tensor::data<int8_t>() const;
template PD_DLL_DECL paddle::platform::float16 *
Tensor::data<paddle::platform::float16>() const;
template PD_DLL_DECL paddle::platform::bfloat16 *
Tensor::data<paddle::platform::bfloat16>() const;
template PD_DLL_DECL paddle::platform::complex128 *
Tensor::data<paddle::platform::complex128>() const;
template paddle::platform::complex64 *
template PD_DLL_DECL paddle::platform::complex64 *
Tensor::data<paddle::platform::complex64>() const;
template int16_t *Tensor::data<int16_t>() const;
template bool *Tensor::data<bool>() const;
template PD_DLL_DECL int16_t *Tensor::data<int16_t>() const;
template PD_DLL_DECL bool *Tensor::data<bool>() const;
template float *Tensor::mutable_data<float>();
template double *Tensor::mutable_data<double>();
template int64_t *Tensor::mutable_data<int64_t>();
template int32_t *Tensor::mutable_data<int32_t>();
template uint8_t *Tensor::mutable_data<uint8_t>();
template int8_t *Tensor::mutable_data<int8_t>();
template paddle::platform::float16 *
template PD_DLL_DECL float *Tensor::mutable_data<float>();
template PD_DLL_DECL double *Tensor::mutable_data<double>();
template PD_DLL_DECL int64_t *Tensor::mutable_data<int64_t>();
template PD_DLL_DECL int32_t *Tensor::mutable_data<int32_t>();
template PD_DLL_DECL uint8_t *Tensor::mutable_data<uint8_t>();
template PD_DLL_DECL int8_t *Tensor::mutable_data<int8_t>();
template PD_DLL_DECL paddle::platform::float16 *
Tensor::mutable_data<paddle::platform::float16>();
template paddle::platform::bfloat16 *
template PD_DLL_DECL paddle::platform::bfloat16 *
Tensor::mutable_data<paddle::platform::bfloat16>();
template paddle::platform::complex128 *
template PD_DLL_DECL paddle::platform::complex128 *
Tensor::mutable_data<paddle::platform::complex128>();
template paddle::platform::complex64 *
template PD_DLL_DECL paddle::platform::complex64 *
Tensor::mutable_data<paddle::platform::complex64>();
template int16_t *Tensor::mutable_data<int16_t>();
template bool *Tensor::mutable_data<bool>();
template PD_DLL_DECL int16_t *Tensor::mutable_data<int16_t>();
template PD_DLL_DECL bool *Tensor::mutable_data<bool>();
template float *Tensor::mutable_data<float>(const PlaceType &place);
template double *Tensor::mutable_data<double>(const PlaceType &place);
template int64_t *Tensor::mutable_data<int64_t>(const PlaceType &place);
template int32_t *Tensor::mutable_data<int32_t>(const PlaceType &place);
template uint8_t *Tensor::mutable_data<uint8_t>(const PlaceType &place);
template int8_t *Tensor::mutable_data<int8_t>(const PlaceType &place);
template paddle::platform::float16 *
template PD_DLL_DECL float *Tensor::mutable_data<float>(const PlaceType &place);
template PD_DLL_DECL double *Tensor::mutable_data<double>(
const PlaceType &place);
template PD_DLL_DECL int64_t *Tensor::mutable_data<int64_t>(
const PlaceType &place);
template PD_DLL_DECL int32_t *Tensor::mutable_data<int32_t>(
const PlaceType &place);
template PD_DLL_DECL uint8_t *Tensor::mutable_data<uint8_t>(
const PlaceType &place);
template PD_DLL_DECL int8_t *Tensor::mutable_data<int8_t>(
const PlaceType &place);
template PD_DLL_DECL paddle::platform::float16 *
Tensor::mutable_data<paddle::platform::float16>(const PlaceType &place);
template paddle::platform::bfloat16 *
template PD_DLL_DECL paddle::platform::bfloat16 *
Tensor::mutable_data<paddle::platform::bfloat16>(const PlaceType &place);
template paddle::platform::complex128 *
template PD_DLL_DECL paddle::platform::complex128 *
Tensor::mutable_data<paddle::platform::complex128>(const PlaceType &place);
template paddle::platform::complex64 *
template PD_DLL_DECL paddle::platform::complex64 *
Tensor::mutable_data<paddle::platform::complex64>(const PlaceType &place);
template int16_t *Tensor::mutable_data<int16_t>(const PlaceType &place);
template bool *Tensor::mutable_data<bool>(const PlaceType &place);
template PD_DLL_DECL int16_t *Tensor::mutable_data<int16_t>(
const PlaceType &place);
template PD_DLL_DECL bool *Tensor::mutable_data<bool>(const PlaceType &place);
std::vector<int> Tensor::shape() const {
GET_CASTED_TENSOR
......
......@@ -345,9 +345,12 @@ if (LINUX)
endif()
if (WIN32)
set(FLUID_FRAMEWORK_SHARED_LIB
${PADDLE_BINARY_DIR}/paddle/fluid/framework/libpaddle_framework.dll
set(FLUID_FRAMEWORK_IMPORT_LIB
${PADDLE_BINARY_DIR}/paddle/fluid/framework/${CMAKE_BUILD_TYPE}/paddle_framework.lib
CACHE INTERNAL "Fluid framework lib")
set(FLUID_FRAMEWORK_SHARED_LIB
${PADDLE_BINARY_DIR}/paddle/fluid/framework/${CMAKE_BUILD_TYPE}/paddle_framework.dll
CACHE INTERNAL "Fluid framework dll")
endif()
if(APPLE)
......
......@@ -416,9 +416,6 @@ void* GetOpDsoHandle(const std::string& dso_name) {
#if defined(__APPLE__) || defined(__OSX__)
PADDLE_THROW(platform::errors::Unimplemented(
"Create custom cpp op outside framework do not support Apple."));
#elif defined(_WIN32) && defined(PADDLE_WITH_CUDA)
PADDLE_THROW(platform::errors::Unimplemented(
"Create custom cpp op outside framework do not support Windows."));
#else
return GetDsoHandleFromSearchPath(FLAGS_op_dir, dso_name);
#endif
......
......@@ -114,23 +114,24 @@ rem ------pre install python requirement----------
where python
where pip
pip install wheel --user
pip install -r %work_dir%\python\requirements.txt --user
pip install -r %work_dir%\python\unittest_py\requirements.txt --user
pip install --force-reinstall -r %work_dir%\python\requirements.txt --user
pip install --force-reinstall -r %work_dir%\python\unittest_py\requirements.txt --user
if %ERRORLEVEL% NEQ 0 (
echo pip install requirements.txt failed!
exit /b 7
)
rem ------pre install clcache and init config----------
pip install clcache --user
rem pip install clcache --user
pip uninstall -y clcache
:: set USE_CLCACHE to enable clcache
set USE_CLCACHE=1
rem set USE_CLCACHE=1
:: In some scenarios, CLCACHE_HARDLINK can save one file copy.
set CLCACHE_HARDLINK=1
rem set CLCACHE_HARDLINK=1
:: If it takes more than 1000s to obtain the right to use the cache, an error will be reported
set CLCACHE_OBJECT_CACHE_TIMEOUT_MS=1000000
rem set CLCACHE_OBJECT_CACHE_TIMEOUT_MS=1000000
:: set maximum cache size to 20G
clcache.exe -M 21474836480
rem clcache.exe -M 21474836480
rem ------show summary of current environment----------
cmake --version
......@@ -281,7 +282,7 @@ echo Build third_party successfully!
set build_times=1
:build_paddle
:: reset clcache zero stats for collect PR's actual hit rate
clcache.exe -z
rem clcache.exe -z
echo Build Paddle the %build_times% time:
if "%WITH_CLCACHE%"=="OFF" (
......@@ -305,7 +306,7 @@ echo 0 > %cache_dir%\error_code.txt
type %cache_dir%\error_code.txt
:: ci will collect clcache hit rate
goto :collect_clcache_hits
rem goto :collect_clcache_hits
goto:eof
......@@ -346,13 +347,14 @@ set /p PADDLE_WHL_FILE_WIN=< whl_file.txt
@ECHO ON
pip uninstall -y paddlepaddle
pip uninstall -y paddlepaddle-gpu
pip install -U %PADDLE_WHL_FILE_WIN% --user
pip install %PADDLE_WHL_FILE_WIN% --user
if %ERRORLEVEL% NEQ 0 (
call paddle_winci\Scripts\deactivate.bat 2>NUL
echo pip install whl package failed!
exit /b 1
)
set CUDA_VISIBLE_DEVICES=0
python %work_dir%\paddle\scripts\installation_validate.py
goto:eof
......
......@@ -9,7 +9,14 @@ endforeach()
add_subdirectory(unittests)
add_subdirectory(book)
if(NOT APPLE AND NOT WIN32)
# TODO: support New Custom OP on Mac
if(Linux)
add_subdirectory(custom_op)
endif()
# Windows CPU machine doesn't have CUDA, can't compile .cu file
# if(WIN32 AND WITH_GPU)
# add_subdirectory(custom_op)
# endif()
set_tests_properties(test_beam_search_decoder PROPERTIES TIMEOUT 120)
# New custom OP can support Windows/Linux now
# 'test_simple_custom_op_jit/test_simple_custom_op_setup' compile .cc and .cu file
py_test(test_simple_custom_op_setup SRCS test_simple_custom_op_setup.py)
py_test(test_simple_custom_op_jit SRCS test_simple_custom_op_jit.py)
# Compiling shared library will cost some time, but running process is very fast.
set_tests_properties(test_simple_custom_op_setup PROPERTIES TIMEOUT 250)
set_tests_properties(test_simple_custom_op_jit PROPERTIES TIMEOUT 180)
py_test(test_sysconfig SRCS test_sysconfig.py)
# 'test_dispatch' compile .cc file
py_test(test_dispatch SRCS test_dispatch.py)
set_tests_properties(test_dispatch PROPERTIES TIMEOUT 180)
if(NOT Linux)
return()
endif()
# TODO(zhouwei): support test_check_abi and abi check on Windows
py_test(test_check_abi SRCS test_check_abi.py)
# Old custom OP only support Linux, only run on Linux
py_test(test_custom_op SRCS test_custom_op.py)
py_test(test_jit_load SRCS test_jit_load.py)
py_test(test_setup_install SRCS test_setup_install.py)
py_test(test_setup_build SRCS test_setup_build.py)
set_tests_properties(test_jit_load PROPERTIES TIMEOUT 180)
set_tests_properties(test_setup_install PROPERTIES TIMEOUT 180)
set_tests_properties(test_setup_build PROPERTIES TIMEOUT 180)
if(WITH_ROCM)
hip_library(relu_op_shared SHARED SRCS relu_op.cc relu_op.cu DEPS paddle_framework_shared)
elseif(WITH_GPU)
......@@ -18,19 +51,3 @@ get_target_property(TARGET_LIBRARIES relu_op_shared LINK_LIBRARIES)
LIST(REMOVE_ITEM TARGET_LIBRARIES glog)
LIST(REMOVE_ITEM TARGET_LIBRARIES gflags)
set_property(TARGET relu_op_shared PROPERTY LINK_LIBRARIES ${TARGET_LIBRARIES} )
file(GLOB TEST_OPS RELATIVE "${CMAKE_CURRENT_SOURCE_DIR}" "test_*.py")
string(REPLACE ".py" "" TEST_OPS "${TEST_OPS}")
foreach(src ${TEST_OPS})
py_test(${src} SRCS ${src}.py)
endforeach()
# Compiling .so will cost some time, but running process is very fast.
set_tests_properties(test_jit_load PROPERTIES TIMEOUT 180)
set_tests_properties(test_setup_install PROPERTIES TIMEOUT 180)
set_tests_properties(test_setup_build PROPERTIES TIMEOUT 180)
set_tests_properties(test_dispatch PROPERTIES TIMEOUT 180)
set_tests_properties(test_simple_custom_op_setup PROPERTIES TIMEOUT 250)
set_tests_properties(test_simple_custom_op_jit PROPERTIES TIMEOUT 180)
......@@ -16,8 +16,18 @@ import os
import unittest
import paddle
import numpy as np
from paddle.utils.cpp_extension import load
from paddle.utils.cpp_extension import load, get_build_directory
from utils import paddle_includes, extra_compile_args
from paddle.utils.cpp_extension.extension_utils import run_cmd
# Because the shared lib already exists in the cache dir,
# it will not be compiled again unless the cache dir is cleared.
if os.name == 'nt':
cmd = 'rmdir {} /s/q'.format(get_build_directory())
else:
cmd = 'rm -rf {}'.format(get_build_directory())
run_cmd(cmd, True)
dispatch_op = load(
name='dispatch_op',
......
......@@ -13,13 +13,24 @@
# limitations under the License.
import os
import subprocess
import unittest
import paddle
import numpy as np
from paddle.utils.cpp_extension import load
from paddle.utils.cpp_extension import load, get_build_directory
from paddle.utils.cpp_extension.extension_utils import run_cmd
from utils import paddle_includes, extra_compile_args
from test_simple_custom_op_setup import relu2_dynamic, relu2_static
# Because the shared lib already exists in the cache dir,
# it will not be compiled again unless the cache dir is cleared.
if os.name == 'nt':
cmd = 'rmdir {} /s/q'.format(get_build_directory())
else:
cmd = 'rm -rf {}'.format(get_build_directory())
run_cmd(cmd, True)
# Compile and load custom op Just-In-Time.
custom_module = load(
name='simple_jit_relu2',
......
......@@ -91,7 +91,12 @@ class TestNewCustomOpSetUpInstall(unittest.TestCase):
def setUp(self):
cur_dir = os.path.dirname(os.path.abspath(__file__))
# compile, install the custom op egg into site-packages under background
cmd = 'cd {} && python setup_install_simple.py install'.format(cur_dir)
if os.name == 'nt':
cmd = 'cd /d {} && python setup_install_simple.py install'.format(
cur_dir)
else:
cmd = 'cd {} && python setup_install_simple.py install'.format(
cur_dir)
run_cmd(cmd)
# NOTE(Aurelius84): Normally, it's no need to add following codes for users.
......@@ -99,6 +104,10 @@ class TestNewCustomOpSetUpInstall(unittest.TestCase):
# sys.path has been updated. So we update it manually.
# See: https://stackoverflow.com/questions/56974185/import-runtime-installed-module-using-pip-in-python-3
if os.name == 'nt':
# NOTE(zhouwei25): getsitepackages on windows will return a list: [python install dir, site packages dir]
site_dir = site.getsitepackages()[1]
else:
site_dir = site.getsitepackages()[0]
custom_egg_path = [
x for x in os.listdir(site_dir) if 'simple_setup_relu2' in x
......
......@@ -23,8 +23,8 @@ site_packages_path = get_python_lib()
# paddle include directory. Because the following path is generated after insalling
# PaddlePaddle whl. So here we specific `include_dirs` to avoid errors in CI.
paddle_includes = [
os.path.join(site_packages_path, 'paddle/include'),
os.path.join(site_packages_path, 'paddle/include/third_party')
os.path.join(site_packages_path, 'paddle', 'include'),
os.path.join(site_packages_path, 'paddle', 'include', 'third_party')
]
# TODO(Aurelius84): Memory layout is different if build paddle with PADDLE_WITH_MKLDNN=ON,
......
......@@ -17,16 +17,25 @@ import six
import sys
import textwrap
import copy
import re
import setuptools
from setuptools.command.easy_install import easy_install
from setuptools.command.build_ext import build_ext
from .extension_utils import find_cuda_home, normalize_extension_kwargs, add_compile_flag, bootstrap_context
from .extension_utils import is_cuda_file, prepare_unix_cflags, add_std_without_repeat, get_build_directory
from .extension_utils import is_cuda_file, prepare_unix_cflags, prepare_win_cflags, add_std_without_repeat, get_build_directory
from .extension_utils import _import_module_from_library, CustomOpInfo, _write_setup_file, _jit_compile, parse_op_name_from
from .extension_utils import check_abi_compatibility, log_v, IS_WINDOWS
from .extension_utils import use_new_custom_op_load_method
from .extension_utils import check_abi_compatibility, log_v, IS_WINDOWS, OS_NAME
from .extension_utils import use_new_custom_op_load_method, MSVC_COMPILE_FLAGS
# Note(zhouwei): On windows, it will export function 'PyInit_[name]' by default,
# The solution is: 1.User add function PyInit_[name] 2. set not to export
# refer to https://stackoverflow.com/questions/34689210/error-exporting-symbol-when-building-python-c-extension-in-windows
if IS_WINDOWS and six.PY3:
from distutils.command.build_ext import build_ext as _du_build_ext
from unittest.mock import Mock
_du_build_ext.get_export_symbols = Mock(return_value=None)
CUDA_HOME = find_cuda_home()
......@@ -225,15 +234,17 @@ class BuildExtension(build_ext, object):
for compiler in ['cxx', 'nvcc']:
if compiler not in extension.extra_compile_args:
extension.extra_compile_args[compiler] = []
# add determine compile flags
add_compile_flag(extension, '-std=c++11')
# Consider .cu, .cu.cc as valid source extensions.
self.compiler.src_extensions += ['.cu', '.cu.cc']
# Save the original _compile method for later.
if self.compiler.compiler_type == 'msvc' or IS_WINDOWS:
raise NotImplementedError("Not support on MSVC currently.")
if self.compiler.compiler_type == 'msvc':
self.compiler._cpp_extensions += ['.cu', '.cuh']
original_compile = self.compiler.compile
original_spawn = self.compiler.spawn
else:
# add determine compile flags
add_compile_flag(extension, '-std=c++11')
original_compile = self.compiler._compile
def unix_custom_single_compiler(obj, src, ext, cc_args, extra_postargs,
......@@ -268,6 +279,81 @@ class BuildExtension(build_ext, object):
# restore original_compiler
self.compiler.compiler_so = original_compiler
def win_custom_single_compiler(sources,
output_dir=None,
macros=None,
include_dirs=None,
debug=0,
extra_preargs=None,
extra_postargs=None,
depends=None):
self.cflags = copy.deepcopy(extra_postargs)
extra_postargs = None
def win_custom_spawn(cmd):
# Using regex to modify compile options
compile_options = self.compiler.compile_options
for i in range(len(cmd)):
if re.search('/MD', cmd[i]) is not None:
cmd[i] = '/MT'
if re.search('/W[1-4]', cmd[i]) is not None:
cmd[i] = '/W0'
# Using regex to match src, obj and include files
src_regex = re.compile('/T(p|c)(.*)')
src_list = [
m.group(2) for m in (src_regex.match(elem) for elem in cmd)
if m
]
obj_regex = re.compile('/Fo(.*)')
obj_list = [
m.group(1) for m in (obj_regex.match(elem) for elem in cmd)
if m
]
include_regex = re.compile(r'((\-|\/)I.*)')
include_list = [
m.group(1)
for m in (include_regex.match(elem) for elem in cmd) if m
]
assert len(src_list) == 1 and len(obj_list) == 1
src = src_list[0]
obj = obj_list[0]
if is_cuda_file(src):
assert CUDA_HOME is not None
nvcc_cmd = os.path.join(CUDA_HOME, 'bin', 'nvcc')
if isinstance(self.cflags, dict):
cflags = self.cflags['nvcc']
elif isinstance(self.cflags, list):
cflags = self.cflags
else:
cflags = []
cflags = prepare_win_cflags(cflags) + ['--use-local-env']
for flag in MSVC_COMPILE_FLAGS:
cflags = ['-Xcompiler', flag] + cflags
cmd = [nvcc_cmd, '-c', src, '-o', obj
] + include_list + cflags
elif isinstance(self.cflags, dict):
cflags = MSVC_COMPILE_FLAGS + self.cflags['cxx']
cmd += cflags
elif isinstance(self.cflags, list):
cflags = MSVC_COMPILE_FLAGS + self.cflags
cmd += cflags
return original_spawn(cmd)
try:
self.compiler.spawn = win_custom_spawn
return original_compile(sources, output_dir, macros,
include_dirs, debug, extra_preargs,
extra_postargs, depends)
finally:
self.compiler.spawn = original_spawn
def object_filenames_with_cuda(origina_func, build_directory):
"""
Decorated the function to add customized naming machanism.
......@@ -280,9 +366,12 @@ class BuildExtension(build_ext, object):
objects = origina_func(source_filenames, strip_dir,
output_dir)
for i, source in enumerate(source_filenames):
# modify xx.o -> xx.cu.o
# modify xx.o -> xx.cu.o/xx.cu.obj
if is_cuda_file(source):
old_obj = objects[i]
if self.compiler.compiler_type == 'msvc':
objects[i] = old_obj[:-3] + 'cu.obj'
else:
objects[i] = old_obj[:-1] + 'cu.o'
# if user set build_directory, output objects there.
if build_directory is not None:
......@@ -300,10 +389,13 @@ class BuildExtension(build_ext, object):
return wrapper
# customized compile process
if self.compiler.compiler_type == 'msvc':
self.compiler.compile = win_custom_single_compiler
else:
self.compiler._compile = unix_custom_single_compiler
self.compiler.object_filenames = object_filenames_with_cuda(
self.compiler.object_filenames, self.build_lib)
self._record_op_info()
print("Compiling user custom op, it will cost a few seconds.....")
......@@ -333,11 +425,17 @@ class BuildExtension(build_ext, object):
compiler = self.compiler.compiler_cxx[0]
elif IS_WINDOWS:
compiler = os.environ.get('CXX', 'cl')
raise NotImplementedError("We don't support Windows Currently.")
else:
compiler = os.environ.get('CXX', 'c++')
check_abi_compatibility(compiler)
# Warn user if VC env is activated but `DISTUILS_USE_SDK` is not set.
if IS_WINDOWS and 'VSCMD_ARG_TGT_ARCH' in os.environ and 'DISTUTILS_USE_SDK' not in os.environ:
msg = (
'It seems that the VC environment is activated but DISTUTILS_USE_SDK is not set.'
'This may lead to multiple activations of the VC env.'
'Please set `DISTUTILS_USE_SDK=1` and try again.')
raise UserWarning(msg)
def _record_op_info(self):
"""
......@@ -380,7 +478,13 @@ class EasyInstallCommand(easy_install, object):
# .so shared library to another name.
for egg_file in self.outputs:
filename, ext = os.path.splitext(egg_file)
if ext == '.so':
will_rename = False
if OS_NAME.startswith('linux') and ext == '.so':
will_rename = True
elif IS_WINDOWS and ext == '.pyd':
will_rename = True
if will_rename:
new_so_path = filename + "_pd_" + ext
if not os.path.exists(new_so_path):
os.rename(r'%s' % egg_file, r'%s' % new_so_path)
......@@ -448,6 +552,10 @@ def load(name,
# ensure to use abs path
build_directory = os.path.abspath(build_directory)
# Will load shared library from 'path' on windows
if IS_WINDOWS:
os.environ['path'] = build_directory + ';' + os.environ['path']
log_v("build_directory: {}".format(build_directory), verbose)
file_path = os.path.join(build_directory, "setup.py")
......
......@@ -38,9 +38,19 @@ logger = logging.getLogger("utils.cpp_extension")
OS_NAME = sys.platform
IS_WINDOWS = OS_NAME.startswith('win')
NVCC_COMPILE_FLAGS = [
'-ccbin', 'cc', '-DPADDLE_WITH_CUDA', '-DEIGEN_USE_GPU', '-DPADDLE_USE_DSO',
'-Xcompiler', '-fPIC', '-w', '--expt-relaxed-constexpr', '-O3', '-DNVCC'
MSVC_COMPILE_FLAGS = [
'/MT', '/wd4819', '/wd4251', '/wd4244', '/wd4267', '/wd4275', '/wd4018',
'/wd4190', '/EHsc', '/w', '/DPADDLE_WITH_CUDA', '/DEIGEN_USE_GPU',
'/DNDEBUG'
]
MSVC_LINK_FLAGS = [
'/MACHINE:X64', 'paddle_framework.lib', 'cudadevrt.lib', 'cudart_static.lib'
]
COMMON_NVCC_FLAGS = [
'-DPADDLE_WITH_CUDA', '-DEIGEN_USE_GPU', '-DPADDLE_USE_DSO', '-O3'
]
GCC_MINI_VERSION = (5, 4, 0)
......@@ -210,7 +220,21 @@ def prepare_unix_cflags(cflags):
"""
Prepare all necessary compiled flags for nvcc compiling CUDA files.
"""
cflags = NVCC_COMPILE_FLAGS + cflags + get_cuda_arch_flags(cflags)
cflags = COMMON_NVCC_FLAGS + [
'-ccbin', 'cc', '-Xcompiler', '-fPIC', '-w', '--expt-relaxed-constexpr',
'-DNVCC'
] + cflags + get_cuda_arch_flags(cflags)
return cflags
def prepare_win_cflags(cflags):
"""
Prepare all necessary compiled flags for nvcc compiling CUDA files.
"""
cflags = COMMON_NVCC_FLAGS + [
'-DGOOGLE_GLOG_DLL_DECL', '-DBOOST_HAS_STATIC_ASSERT', '-w'
] + cflags + get_cuda_arch_flags(cflags)
return cflags
......@@ -252,11 +276,14 @@ def normalize_extension_kwargs(kwargs, use_cuda=False):
library_dirs.extend(find_paddle_libraries(use_cuda))
kwargs['library_dirs'] = library_dirs
# add runtime library dirs
runtime_library_dirs = kwargs.get('runtime_library_dirs', [])
runtime_library_dirs.extend(find_paddle_libraries(use_cuda))
kwargs['runtime_library_dirs'] = runtime_library_dirs
if IS_WINDOWS:
# TODO(zhouwei): may append compile flags in future
pass
# append link flags
extra_link_args = kwargs.get('extra_link_args', [])
extra_link_args.extend(MSVC_LINK_FLAGS)
kwargs['extra_link_args'] = extra_link_args
else:
# append compile flags
extra_compile_args = kwargs.get('extra_compile_args', [])
extra_compile_args.extend(['-g', '-w']) # diable warnings
......@@ -270,34 +297,15 @@ def normalize_extension_kwargs(kwargs, use_cuda=False):
kwargs['extra_link_args'] = extra_link_args
# add runtime library dirs
runtime_library_dirs = kwargs.get('runtime_library_dirs', [])
runtime_library_dirs.extend(find_paddle_libraries(use_cuda))
kwargs['runtime_library_dirs'] = runtime_library_dirs
kwargs['language'] = 'c++'
return kwargs
def find_paddle_includes(use_cuda=False):
"""
Return Paddle necessary include dir path.
"""
# pythonXX/site-packages/paddle/include
paddle_include_dir = get_include()
third_party_dir = os.path.join(paddle_include_dir, 'third_party')
include_dirs = [paddle_include_dir, third_party_dir]
return include_dirs
def find_cuda_includes():
cuda_home = find_cuda_home()
if cuda_home is None:
raise ValueError(
"Not found CUDA runtime, please use `export CUDA_HOME=XXX` to specific it."
)
return [os.path.join(cuda_home, 'lib64')]
def find_cuda_home():
"""
Use heuristic method to find cuda path
......@@ -315,19 +323,22 @@ def find_cuda_home():
if six.PY3:
nvcc_path = nvcc_path.decode()
nvcc_path = nvcc_path.rstrip('\r\n')
log_v(nvcc_path)
# for example: /usr/local/cuda/bin/nvcc
cuda_home = os.path.dirname(os.path.dirname(nvcc_path))
except:
if IS_WINDOWS:
# search from default NVIDIA GPU path
candidate_paths = glob.glob(
'C:/Program Files/NVIDIA GPU Computing Toolkit/CUDA/v*.*')
'C:\\Program Files\\NVIDIA GPU Computing Toolkit\\CUDA\\v*.*'
)
if len(candidate_paths) > 0:
cuda_home = candidate_paths[0]
else:
cuda_home = "/usr/local/cuda"
# step 3. check whether path is valid
if not os.path.exists(cuda_home) and core.is_compiled_with_cuda():
if cuda_home and not os.path.exists(
cuda_home) and core.is_compiled_with_cuda():
cuda_home = None
warnings.warn(
"Not found CUDA runtime, please use `export CUDA_HOME= XXX` to specific it."
......@@ -336,15 +347,65 @@ def find_cuda_home():
return cuda_home
def find_cuda_includes():
"""
Use heuristic method to find cuda include path
"""
cuda_home = find_cuda_home()
if cuda_home is None:
raise ValueError(
"Not found CUDA runtime, please use `export CUDA_HOME=XXX` to specific it."
)
return [os.path.join(cuda_home, 'include')]
def find_paddle_includes(use_cuda=False):
"""
Return Paddle necessary include dir path.
"""
# pythonXX/site-packages/paddle/include
paddle_include_dir = get_include()
third_party_dir = os.path.join(paddle_include_dir, 'third_party')
include_dirs = [paddle_include_dir, third_party_dir]
#TODO(zhouwei): because eigen need cuda_runtime.h
#So, extend cuda_include_dir always
cuda_include_dir = find_cuda_includes()
include_dirs.extend(cuda_include_dir)
return include_dirs
def find_cuda_libraries():
"""
Use heuristic method to find cuda static lib path
"""
cuda_home = find_cuda_home()
if cuda_home is None:
raise ValueError(
"Not found CUDA runtime, please use `export CUDA_HOME=XXX` to specific it."
)
if IS_WINDOWS:
cuda_lib_dir = [os.path.join(cuda_home, 'lib', 'x64')]
else:
cuda_lib_dir = [os.path.join(cuda_home, 'lib64')]
return cuda_lib_dir
def find_paddle_libraries(use_cuda=False):
"""
Return Paddle necessary library dir path.
"""
# pythonXX/site-packages/paddle/libs
paddle_lib_dirs = [get_lib()]
if use_cuda:
cuda_dirs = find_cuda_includes()
paddle_lib_dirs.extend(cuda_dirs)
#TODO(zhouwei): because eigen need cuda_runtime.h
#So, extend cuda_lib_dir always
cuda_lib_dir = find_cuda_libraries()
paddle_lib_dirs.extend(cuda_lib_dir)
return paddle_lib_dirs
......@@ -374,12 +435,14 @@ def get_build_directory(verbose=False):
root_extensions_directory = os.environ.get('PADDLE_EXTENSION_DIR')
if root_extensions_directory is None:
dir_name = "paddle_extensions"
if OS_NAME.startswith('linux'):
root_extensions_directory = os.path.join(
os.path.expanduser('~/.cache'), dir_name)
else:
# TODO(Aurelius84): consider wind32/macOs
raise NotImplementedError("Only support Linux now.")
if IS_WINDOWS:
root_extensions_directory = os.path.normpath(
root_extensions_directory)
elif OS_NAME.startswith('darwin'):
# TODO(Aurelius84): consider macOs
raise NotImplementedError("Not support Mac now.")
log_v("$PADDLE_EXTENSION_DIR is not set, using path: {} by default.".
format(root_extensions_directory), verbose)
......@@ -410,10 +473,13 @@ def parse_op_info(op_name):
def _import_module_from_library(module_name, build_directory, verbose=False):
"""
Load .so shared library and import it as callable python module.
Load shared library and import it as callable python module.
"""
# TODO(Aurelius84): Consider file suffix is .dll on Windows Platform.
ext_path = os.path.join(build_directory, module_name + '.so')
if IS_WINDOWS:
dynamic_suffix = '.pyd'
else:
dynamic_suffix = '.so'
ext_path = os.path.join(build_directory, module_name + dynamic_suffix)
if not os.path.exists(ext_path):
raise FileNotFoundError("Extension path: {} does not exist.".format(
ext_path))
......@@ -565,12 +631,12 @@ def _write_setup_file(name,
def list2str(args):
"""
Convert list[str] into string. For example: [x, y] -> "['x', 'y']"
Convert list[str] into string. For example: ['x', 'y'] -> "['x', 'y']"
"""
if args is None: return '[]'
assert isinstance(args, (list, tuple))
args = ["'{}'".format(arg) for arg in args]
return '[' + ','.join(args) + ']'
args = ["{}".format(arg) for arg in args]
return repr(args)
def _jit_compile(file_path, interpreter=None, verbose=False):
......@@ -583,7 +649,8 @@ def _jit_compile(file_path, interpreter=None, verbose=False):
if interpreter is None:
interpreter = 'python'
try:
py_path = subprocess.check_output(['which', interpreter])
which = 'where' if IS_WINDOWS else 'which'
py_path = subprocess.check_output([which, interpreter])
py_version = subprocess.check_output([interpreter, '-V'])
if six.PY3:
py_path = py_path.decode()
......@@ -596,8 +663,13 @@ def _jit_compile(file_path, interpreter=None, verbose=False):
'Failed to check Python interpreter with `{}`, errors: {}'.format(
interpreter, error))
if IS_WINDOWS:
compile_cmd = 'cd /d {} && {} {} build'.format(ext_dir, interpreter,
setup_file)
else:
compile_cmd = 'cd {} && {} {} build'.format(ext_dir, interpreter,
setup_file)
print("Compiling user custom op, it will cost a few seconds.....")
run_cmd(compile_cmd, verbose)
......@@ -682,7 +754,7 @@ def check_abi_compatibility(compiler, verbose=False):
try:
if OS_NAME.startswith('linux'):
version_info = subprocess.check_output(
[compiler, '-dumpfullversion'])
[compiler, '-dumpfullversion', '-dumpversion'])
if six.PY3:
version_info = version_info.decode()
version = version_info.strip().split('.')
......@@ -694,8 +766,8 @@ def check_abi_compatibility(compiler, verbose=False):
warnings.warn(
ABI_INCOMPATIBILITY_WARNING.format(
user_compiler=compiler, version=version_info.strip()))
# TODO(Aurelius84): check version compatibility on windows
elif IS_WINDOWS:
# TODO(zhouwei): support check abi compatibility on windows
warnings.warn("We don't support Windows now.")
except Exception:
_, error, _ = sys.exc_info()
......@@ -714,7 +786,7 @@ def _expected_compiler_current_platform():
return expect_compilers
def log_v(info, verbose):
def log_v(info, verbose=True):
"""
Print log information on stdout.
"""
......
......@@ -3,7 +3,8 @@ numpy>=1.13, <=1.16.4 ; python_version<"3.5"
numpy>=1.13 ; python_version>="3.5" and platform_system != "Windows"
numpy>=1.13, <=1.19.3 ; python_version>="3.5" and platform_system == "Windows"
protobuf>=3.1.0
gast>=0.3.3
gast>=0.3.3 ; platform_system != "Windows"
gast==0.3.3 ; platform_system == "Windows"
Pillow
six
decorator
......
......@@ -335,11 +335,16 @@ if '${WITH_XPU_BKCL}' == 'ON':
shutil.copy('${XPU_BKCL_LIB}', libs_path)
package_data['paddle.libs']+=['${XPU_BKCL_LIB_NAME}']
# copy libfuild_framework.so to libs
if os.name != 'nt' and sys.platform != 'darwin':
paddle_framework_lib='${FLUID_FRAMEWORK_SHARED_LIB}'
shutil.copy(paddle_framework_lib, libs_path)
package_data['paddle.libs'] += [('libpaddle_framework' if os.name != 'nt' else 'paddle_framework') + ext_name]
# copy libpaddle_framework.so to libs on linux
if sys.platform.startswith('linux'):
shutil.copy('${FLUID_FRAMEWORK_SHARED_LIB}', libs_path)
package_data['paddle.libs'] += ['libpaddle_framework.so']
# copy paddle_framework.lib/paddle_framework.dll to libs on windows
if os.name == 'nt':
shutil.copy('${FLUID_FRAMEWORK_IMPORT_LIB}', libs_path)
shutil.copy('${FLUID_FRAMEWORK_SHARED_LIB}', libs_path)
package_data['paddle.libs'] += ['paddle_framework.lib', 'paddle_framework.dll']
# remove unused paddle/libs/__init__.py
if os.path.isfile(libs_path+'/__init__.py'):
......@@ -410,9 +415,9 @@ if '${WITH_GPU}' == 'ON' or '${WITH_ROCM}' == 'ON':
class InstallCommand(InstallCommandBase):
def finalize_options(self):
ret = InstallCommandBase.finalize_options(self)
self.install_headers = os.path.join(self.install_purelib, 'paddle',
'include')
self.install_lib = self.install_platlib
self.install_headers = os.path.join(self.install_platlib, 'paddle',
'include')
return ret
......@@ -463,11 +468,6 @@ class InstallHeaders(Command):
return self.copy_file(header, install_dir)
def run(self):
# only copy third_party/cudaErrorMessage.pb for cudaErrorMessage on mac or windows
if os.name == 'nt' or sys.platform == 'darwin':
if '${WITH_GPU}' == 'ON' or '${WITH_ROCM}' == 'ON':
self.mkdir_and_copy_file('${cudaerror_INCLUDE_DIR}/cudaErrorMessage.pb')
return
hdrs = self.distribution.headers
if not hdrs:
return
......
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册