Skip to content
体验新版
项目
组织
正在加载...
登录
切换导航
打开侧边栏
Crayon鑫
Paddle
提交
d3e60959
P
Paddle
项目概览
Crayon鑫
/
Paddle
与 Fork 源项目一致
Fork自
PaddlePaddle / Paddle
通知
1
Star
1
Fork
0
代码
文件
提交
分支
Tags
贡献者
分支图
Diff
Issue
1
列表
看板
标记
里程碑
合并请求
0
Wiki
0
Wiki
分析
仓库
DevOps
项目成员
Pages
P
Paddle
项目概览
项目概览
详情
发布
仓库
仓库
文件
提交
分支
标签
贡献者
分支图
比较
Issue
1
Issue
1
列表
看板
标记
里程碑
合并请求
0
合并请求
0
Pages
分析
分析
仓库分析
DevOps
Wiki
0
Wiki
成员
成员
收起侧边栏
关闭侧边栏
动态
分支图
创建新Issue
提交
Issue看板
未验证
提交
d3e60959
编写于
2月 26, 2021
作者:
C
Chen Weihang
提交者:
GitHub
2月 26, 2021
浏览文件
操作
浏览文件
下载
电子邮件补丁
差异文件
[Cherry-pick] The Second part of new custom op extension in 2.0.1 (#31237)
[Cherry-pick] The Second part of new custom op extension in 2.0.1
上级
34092ab3
变更
32
展开全部
隐藏空白更改
内联
并排
Showing
32 changed file
with
1705 addition
and
504 deletion
+1705
-504
CMakeLists.txt
CMakeLists.txt
+2
-0
cmake/generic.cmake
cmake/generic.cmake
+6
-8
paddle/fluid/extension/include/all.h
paddle/fluid/extension/include/all.h
+6
-0
paddle/fluid/extension/include/dll_decl.h
paddle/fluid/extension/include/dll_decl.h
+27
-0
paddle/fluid/extension/include/op_meta_info.h
paddle/fluid/extension/include/op_meta_info.h
+82
-28
paddle/fluid/extension/include/tensor.h
paddle/fluid/extension/include/tensor.h
+2
-1
paddle/fluid/extension/src/op_meta_info.cc
paddle/fluid/extension/src/op_meta_info.cc
+19
-3
paddle/fluid/extension/src/tensor.cc
paddle/fluid/extension/src/tensor.cc
+65
-51
paddle/fluid/framework/CMakeLists.txt
paddle/fluid/framework/CMakeLists.txt
+41
-4
paddle/fluid/framework/custom_operator.cc
paddle/fluid/framework/custom_operator.cc
+118
-14
paddle/fluid/platform/dynload/dynamic_loader.cc
paddle/fluid/platform/dynload/dynamic_loader.cc
+0
-3
paddle/scripts/paddle_build.bat
paddle/scripts/paddle_build.bat
+40
-10
python/paddle/fluid/tests/CMakeLists.txt
python/paddle/fluid/tests/CMakeLists.txt
+3
-1
python/paddle/fluid/tests/custom_op/CMakeLists.txt
python/paddle/fluid/tests/custom_op/CMakeLists.txt
+44
-17
python/paddle/fluid/tests/custom_op/attr_test_op.cc
python/paddle/fluid/tests/custom_op/attr_test_op.cc
+182
-0
python/paddle/fluid/tests/custom_op/custom_relu_op.cc
python/paddle/fluid/tests/custom_op/custom_relu_op.cc
+5
-25
python/paddle/fluid/tests/custom_op/custom_relu_op.cu
python/paddle/fluid/tests/custom_op/custom_relu_op.cu
+1
-21
python/paddle/fluid/tests/custom_op/custom_relu_op_dup.cc
python/paddle/fluid/tests/custom_op/custom_relu_op_dup.cc
+3
-3
python/paddle/fluid/tests/custom_op/custom_relu_setup.py
python/paddle/fluid/tests/custom_op/custom_relu_setup.py
+5
-2
python/paddle/fluid/tests/custom_op/multi_out_test_op.cc
python/paddle/fluid/tests/custom_op/multi_out_test_op.cc
+76
-0
python/paddle/fluid/tests/custom_op/test_custom_attrs_jit.py
python/paddle/fluid/tests/custom_op/test_custom_attrs_jit.py
+67
-0
python/paddle/fluid/tests/custom_op/test_custom_relu_op_jit.py
...n/paddle/fluid/tests/custom_op/test_custom_relu_op_jit.py
+89
-0
python/paddle/fluid/tests/custom_op/test_custom_relu_op_setup.py
...paddle/fluid/tests/custom_op/test_custom_relu_op_setup.py
+108
-19
python/paddle/fluid/tests/custom_op/test_dispatch_jit.py
python/paddle/fluid/tests/custom_op/test_dispatch_jit.py
+11
-2
python/paddle/fluid/tests/custom_op/test_jit_load.py
python/paddle/fluid/tests/custom_op/test_jit_load.py
+2
-1
python/paddle/fluid/tests/custom_op/test_multi_out_jit.py
python/paddle/fluid/tests/custom_op/test_multi_out_jit.py
+47
-72
python/paddle/fluid/tests/custom_op/utils.py
python/paddle/fluid/tests/custom_op/utils.py
+2
-2
python/paddle/utils/cpp_extension/__init__.py
python/paddle/utils/cpp_extension/__init__.py
+1
-2
python/paddle/utils/cpp_extension/cpp_extension.py
python/paddle/utils/cpp_extension/cpp_extension.py
+424
-112
python/paddle/utils/cpp_extension/extension_utils.py
python/paddle/utils/cpp_extension/extension_utils.py
+208
-90
python/requirements.txt
python/requirements.txt
+2
-1
python/setup.py.in
python/setup.py.in
+17
-12
未找到文件。
CMakeLists.txt
浏览文件 @
d3e60959
...
...
@@ -293,6 +293,8 @@ set(PADDLE_PYTHON_BUILD_DIR "${CMAKE_CURRENT_BINARY_DIR}/python/build")
set
(
CMAKE_CXX_FLAGS_RELWITHDEBINFO
"-O3 -g -DNDEBUG"
)
set
(
CMAKE_C_FLAGS_RELWITHDEBINFO
"-O3 -g -DNDEBUG"
)
add_definitions
(
-DPADDLE_DLL_EXPORT
)
if
(
ON_INFER
)
# you can trun off the paddle fluid and inference lib by set ON_INFER=OFF
message
(
STATUS
"On inference mode, will take place some specific optimization."
)
...
...
cmake/generic.cmake
浏览文件 @
d3e60959
...
...
@@ -792,17 +792,15 @@ function(py_test TARGET_NAME)
if
(
WITH_COVERAGE
)
add_test
(
NAME
${
TARGET_NAME
}
COMMAND
${
CMAKE_COMMAND
}
-E env FLAGS_init_allocated_mem=true FLAGS_cudnn_deterministic=true
FLAGS_cpu_deterministic=true
PYTHONPATH=
${
PADDLE_BINARY_DIR
}
/python
${
py_test_ENVS
}
COVERAGE_FILE=
${
PADDLE_BINARY_DIR
}
/python-coverage.data
${
PYTHON_EXECUTABLE
}
-m coverage run --branch -p
${
py_test_SRCS
}
${
py_test_ARGS
}
WORKING_DIRECTORY
${
CMAKE_CURRENT_BINARY_DIR
}
)
COMMAND
${
CMAKE_COMMAND
}
-E env FLAGS_init_allocated_mem=true FLAGS_cudnn_deterministic=true
FLAGS_cpu_deterministic=true
${
py_test_ENVS
}
COVERAGE_FILE=
${
PADDLE_BINARY_DIR
}
/python-coverage.data
${
PYTHON_EXECUTABLE
}
-m coverage run --branch -p
${
py_test_SRCS
}
${
py_test_ARGS
}
WORKING_DIRECTORY
${
CMAKE_CURRENT_BINARY_DIR
}
)
else
()
add_test
(
NAME
${
TARGET_NAME
}
COMMAND
${
CMAKE_COMMAND
}
-E env FLAGS_init_allocated_mem=true FLAGS_cudnn_deterministic=true
FLAGS_cpu_deterministic=true
PYTHONPATH=
${
PADDLE_BINARY_DIR
}
/python
${
py_test_ENVS
}
FLAGS_cpu_deterministic=true
${
py_test_ENVS
}
${
PYTHON_EXECUTABLE
}
-u
${
py_test_SRCS
}
${
py_test_ARGS
}
WORKING_DIRECTORY
${
CMAKE_CURRENT_BINARY_DIR
}
)
endif
()
...
...
paddle/fluid/extension/include/all.h
浏览文件 @
d3e60959
...
...
@@ -18,6 +18,12 @@ limitations under the License. */
#error C++11 or later compatible compiler is required to use Paddle.
#endif
#ifdef _WIN32
#ifndef NOMINMAX
#define NOMINMAX // msvc max/min macro conflict with std::min/max
#endif
#endif
#include "paddle/fluid/extension/include/dispatch.h"
#include "paddle/fluid/extension/include/dtype.h"
#include "paddle/fluid/extension/include/op_meta_info.h"
...
...
paddle/fluid/extension/include/dll_decl.h
0 → 100644
浏览文件 @
d3e60959
// Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#pragma once
#if defined(_WIN32)
#ifndef PD_DLL_DECL
#ifdef PADDLE_DLL_EXPORT
#define PD_DLL_DECL __declspec(dllexport)
#else
#define PD_DLL_DECL __declspec(dllimport)
#endif // PADDLE_DLL_EXPORT
#endif // PD_DLL_DECL
#else
#define PD_DLL_DECL
#endif // _WIN32
paddle/fluid/extension/include/op_meta_info.h
浏览文件 @
d3e60959
...
...
@@ -14,12 +14,14 @@ limitations under the License. */
#pragma once
#include <iostream>
#include <string>
#include <unordered_map>
#include <vector>
#include <boost/any.hpp>
#include "paddle/fluid/extension/include/dll_decl.h"
#include "paddle/fluid/extension/include/tensor.h"
/**
...
...
@@ -31,7 +33,7 @@ limitations under the License. */
namespace
paddle
{
namespace
framework
{
class
OpMetaInfoHelper
;
class
PD_DLL_DECL
OpMetaInfoHelper
;
}
// namespace framework
using
Tensor
=
paddle
::
Tensor
;
...
...
@@ -43,6 +45,26 @@ using Tensor = paddle::Tensor;
classname& operator=(const classname&) = delete; \
classname& operator=(classname&&) = delete
#if defined _WIN32
#define HANDLE_THE_ERROR try {
#define END_HANDLE_THE_ERROR \
} \
catch (const std::exception& e) { \
std::cerr << e.what() << std::endl; \
throw e; \
}
#else
#define HANDLE_THE_ERROR
#define END_HANDLE_THE_ERROR
#endif
#define PD_THROW(err_msg) \
do { \
HANDLE_THE_ERROR \
throw std::runtime_error(err_msg); \
END_HANDLE_THE_ERROR \
} while (0)
///////////////// Util Define and Function ////////////////
inline
std
::
string
Grad
(
const
std
::
string
&
var_name
)
{
...
...
@@ -59,6 +81,26 @@ inline std::string Grad(const std::string& var_name) {
using
KernelFunc
=
std
::
vector
<
Tensor
>
(
*
)(
std
::
vector
<
Tensor
>
inputs
,
std
::
vector
<
boost
::
any
>
attrs
);
#define PD_SPECIALIZE_ComputeCallHelper(attr_type) \
template <typename... Tail> \
struct ComputeCallHelper<attr_type, Tail...> { \
template <int in_idx, int attr_idx, typename... PreviousArgs> \
static Return Compute(std::vector<Tensor> inputs, \
std::vector<boost::any> attrs, \
const PreviousArgs&... pargs) { \
try { \
attr_type arg = boost::any_cast<attr_type>(attrs[attr_idx]); \
return ComputeCallHelper<Tail...>::template Compute<in_idx, \
attr_idx + 1>( \
inputs, attrs, pargs..., arg); \
} catch (boost::bad_any_cast&) { \
PD_THROW( \
"Attribute cast error in custom operator. Expected " #attr_type \
" value."); \
} \
} \
}
template
<
typename
T
>
struct
TypeTag
{};
...
...
@@ -92,26 +134,20 @@ struct KernelFuncImpl<Return (*)(Args...), impl_fn> {
}
};
// TODO(chenweihang): add support for attribute input
// int attribute input (not used now)
template
<
typename
...
Tail
>
struct
ComputeCallHelper
<
int
,
Tail
...
>
{
template
<
int
in_idx
,
int
attr_idx
,
typename
...
PreviousArgs
>
static
Return
Compute
(
std
::
vector
<
Tensor
>
inputs
,
std
::
vector
<
boost
::
any
>
attrs
,
const
PreviousArgs
&
...
pargs
)
{
try
{
int
arg
=
boost
::
any_cast
<
int
>
(
attrs
[
attr_idx
]);
return
ComputeCallHelper
<
Tail
...
>::
template
Compute
<
in_idx
,
attr_idx
+
1
>(
inputs
,
attrs
,
pargs
...,
arg
);
}
catch
(
boost
::
bad_any_cast
&
)
{
throw
std
::
runtime_error
(
"Attribute cast error in custom operator. Expected int value."
);
}
}
};
PD_SPECIALIZE_ComputeCallHelper
(
bool
);
PD_SPECIALIZE_ComputeCallHelper
(
int
);
PD_SPECIALIZE_ComputeCallHelper
(
float
);
PD_SPECIALIZE_ComputeCallHelper
(
int64_t
);
PD_SPECIALIZE_ComputeCallHelper
(
std
::
string
);
PD_SPECIALIZE_ComputeCallHelper
(
std
::
vector
<
int
>
);
PD_SPECIALIZE_ComputeCallHelper
(
std
::
vector
<
float
>
);
PD_SPECIALIZE_ComputeCallHelper
(
std
::
vector
<
int64_t
>
);
PD_SPECIALIZE_ComputeCallHelper
(
std
::
vector
<
std
::
string
>
);
// TODO(chenweihang): support other attribute type if needed.
// Why not support other attribute type here?
// - boost::blank, std::vector<bool> and std::vector<double>
// are not used in op
// - BlockDesc* and std::vector<BlockDesc*> are used in framework
// end: base template
template
<
typename
T
>
struct
ComputeCallHelper
<
TypeTag
<
T
>>
{
...
...
@@ -220,13 +256,26 @@ struct InferDtypeFuncImpl<Return (*)(Args...), impl_fn> {
////////////////////// Op Meta Info //////////////////////
class
OpMetaInfo
{
class
PD_DLL_DECL
OpMetaInfo
{
public:
explicit
OpMetaInfo
(
const
std
::
string
&
op_name
)
:
name_
(
op_name
)
{}
// format: {"<name1>", "<name2>", ...}
OpMetaInfo
&
Inputs
(
std
::
vector
<
std
::
string
>&&
inputs
);
// format: {"<name1>", "<name2>", ...}
OpMetaInfo
&
Outputs
(
std
::
vector
<
std
::
string
>&&
outputs
);
// format: {"<name1>:<type1>", "<name1>:<type1>", ...}
OpMetaInfo
&
Attrs
(
std
::
vector
<
std
::
string
>&&
attrs
);
// format: PD_KERNEL(...)
OpMetaInfo
&
SetKernelFn
(
KernelFunc
&&
func
);
// format: PD_INFER_SHAPE(...)
OpMetaInfo
&
SetInferShapeFn
(
InferShapeFunc
&&
func
);
// format: PD_INFER_DTYPE(...)
OpMetaInfo
&
SetInferDtypeFn
(
InferDtypeFunc
&&
func
);
private:
...
...
@@ -246,7 +295,7 @@ class OpMetaInfo {
//////////////// Op Meta Info Map /////////////////
class
OpMetaInfoMap
{
class
PD_DLL_DECL
OpMetaInfoMap
{
public:
// this function's impl should keep in header file.
// if move to cc file, meta info can not be added
...
...
@@ -270,14 +319,15 @@ class OpMetaInfoMap {
//////////////// Op Meta Info Builder /////////////////
class
OpMetaInfoBuilder
{
class
PD_DLL_DECL
OpMetaInfoBuilder
{
public:
explicit
OpMetaInfoBuilder
(
std
::
string
&&
name
);
OpMetaInfoBuilder
&
Inputs
(
std
::
vector
<
std
::
string
>&&
inputs
);
OpMetaInfoBuilder
&
Outputs
(
std
::
vector
<
std
::
string
>&&
outputs
);
OpMetaInfoBuilder
&
SetKernelFn
(
KernelFunc
&&
func
);
OpMetaInfoBuilder
&
SetInferShapeFn
(
InferShapeFunc
&&
func
);
OpMetaInfoBuilder
&
SetInferDtypeFn
(
InferDtypeFunc
&&
func
);
OpMetaInfoBuilder
&
Attrs
(
std
::
vector
<
std
::
string
>&&
attrs
);
OpMetaInfoBuilder
&
SetKernelFn
(
KernelFunc
func
);
OpMetaInfoBuilder
&
SetInferShapeFn
(
InferShapeFunc
func
);
OpMetaInfoBuilder
&
SetInferDtypeFn
(
InferDtypeFunc
func
);
OpMetaInfoBuilder
&
SetBackwardOp
(
const
std
::
string
&
bwd_op_name
);
private:
...
...
@@ -317,8 +367,12 @@ void LoadCustomOperatorLib(const std::string& dso_name);
extern
"C"
{
#endif
#if defined(_WIN32)
// C-API to get global OpMetaInfoMap.
paddle
::
OpMetaInfoMap
&
PD_GetOpMetaInfoMap
();
__declspec
(
dllexport
)
inline
paddle
::
OpMetaInfoMap
&
PD_GetOpMetaInfoMap
()
{
return
paddle
::
OpMetaInfoMap
::
Instance
();
}
#endif // _WIN32
#ifdef __cplusplus
}
...
...
paddle/fluid/extension/include/tensor.h
浏览文件 @
d3e60959
...
...
@@ -16,6 +16,7 @@ limitations under the License. */
#include <memory>
#include <vector>
#include "paddle/fluid/extension/include/dll_decl.h"
#include "paddle/fluid/extension/include/dtype.h"
#include "paddle/fluid/extension/include/place.h"
...
...
@@ -23,7 +24,7 @@ namespace paddle {
namespace
framework
{
class
CustomTensorUtils
;
}
// namespace framework
class
Tensor
{
class
PD_DLL_DECL
Tensor
{
public:
/// \brief Construct a Tensor on target Place for CustomOp.
/// Generally it's only used for user to create Tensor.
...
...
paddle/fluid/extension/src/op_meta_info.cc
浏览文件 @
d3e60959
...
...
@@ -32,6 +32,10 @@ OpMetaInfo& OpMetaInfo::Outputs(std::vector<std::string>&& outputs) {
outputs_
=
std
::
forward
<
std
::
vector
<
std
::
string
>>
(
outputs
);
return
*
this
;
}
OpMetaInfo
&
OpMetaInfo
::
Attrs
(
std
::
vector
<
std
::
string
>&&
attrs
)
{
attrs_
=
std
::
forward
<
std
::
vector
<
std
::
string
>>
(
attrs
);
return
*
this
;
}
OpMetaInfo
&
OpMetaInfo
::
SetKernelFn
(
KernelFunc
&&
func
)
{
kernel_fn_
=
std
::
forward
<
KernelFunc
>
(
func
);
return
*
this
;
...
...
@@ -78,17 +82,22 @@ OpMetaInfoBuilder& OpMetaInfoBuilder::Outputs(
return
*
this
;
}
OpMetaInfoBuilder
&
OpMetaInfoBuilder
::
SetKernelFn
(
KernelFunc
&&
func
)
{
OpMetaInfoBuilder
&
OpMetaInfoBuilder
::
Attrs
(
std
::
vector
<
std
::
string
>&&
attrs
)
{
info_ptr_
->
Attrs
(
std
::
forward
<
std
::
vector
<
std
::
string
>>
(
attrs
));
return
*
this
;
}
OpMetaInfoBuilder
&
OpMetaInfoBuilder
::
SetKernelFn
(
KernelFunc
func
)
{
info_ptr_
->
SetKernelFn
(
std
::
forward
<
KernelFunc
>
(
func
));
return
*
this
;
}
OpMetaInfoBuilder
&
OpMetaInfoBuilder
::
SetInferShapeFn
(
InferShapeFunc
&&
func
)
{
OpMetaInfoBuilder
&
OpMetaInfoBuilder
::
SetInferShapeFn
(
InferShapeFunc
func
)
{
info_ptr_
->
SetInferShapeFn
(
std
::
forward
<
InferShapeFunc
>
(
func
));
return
*
this
;
}
OpMetaInfoBuilder
&
OpMetaInfoBuilder
::
SetInferDtypeFn
(
InferDtypeFunc
&&
func
)
{
OpMetaInfoBuilder
&
OpMetaInfoBuilder
::
SetInferDtypeFn
(
InferDtypeFunc
func
)
{
info_ptr_
->
SetInferDtypeFn
(
std
::
forward
<
InferDtypeFunc
>
(
func
));
return
*
this
;
}
...
...
@@ -114,10 +123,17 @@ void LoadCustomOperatorLib(const std::string& dso_name) {
}
}
// namespace paddle
#ifdef __cplusplus
extern
"C"
{
#endif
#ifndef _WIN32
// C-API to get global OpMetaInfoMap.
paddle
::
OpMetaInfoMap
&
PD_GetOpMetaInfoMap
()
{
return
paddle
::
OpMetaInfoMap
::
Instance
();
}
#endif
#ifdef __cplusplus
}
// end extern "C"
#endif
paddle/fluid/extension/src/tensor.cc
浏览文件 @
d3e60959
...
...
@@ -207,73 +207,87 @@ Tensor Tensor::copy_to(const PlaceType &target_place) const {
return
target
;
}
template
Tensor
Tensor
::
copy_to
<
paddle
::
platform
::
float16
>(
template
PD_DLL_DECL
Tensor
Tensor
::
copy_to
<
paddle
::
platform
::
float16
>(
const
PlaceType
&
target_place
)
const
;
template
PD_DLL_DECL
Tensor
Tensor
::
copy_to
<
paddle
::
platform
::
bfloat16
>(
const
PlaceType
&
target_place
)
const
;
template
Tensor
Tensor
::
copy_to
<
paddle
::
platform
::
bfloat16
>(
template
PD_DLL_DECL
Tensor
Tensor
::
copy_to
<
paddle
::
platform
::
complex64
>(
const
PlaceType
&
target_place
)
const
;
template
Tensor
Tensor
::
copy_to
<
paddle
::
platform
::
complex64
>(
template
PD_DLL_DECL
Tensor
Tensor
::
copy_to
<
paddle
::
platform
::
complex128
>(
const
PlaceType
&
target_place
)
const
;
template
Tensor
Tensor
::
copy_to
<
paddle
::
platform
::
complex128
>(
const
PlaceType
&
target_place
)
const
;
template
Tensor
Tensor
::
copy_to
<
float
>(
const
PlaceType
&
target_place
)
const
;
template
Tensor
Tensor
::
copy_to
<
double
>(
const
PlaceType
&
target_place
)
const
;
template
Tensor
Tensor
::
copy_to
<
int64_t
>(
const
PlaceType
&
target_place
)
const
;
template
Tensor
Tensor
::
copy_to
<
int32_t
>(
const
PlaceType
&
target_place
)
const
;
template
Tensor
Tensor
::
copy_to
<
uint8_t
>(
const
PlaceType
&
target_place
)
const
;
template
Tensor
Tensor
::
copy_to
<
int8_t
>(
const
PlaceType
&
target_place
)
const
;
template
Tensor
Tensor
::
copy_to
<
int16_t
>(
const
PlaceType
&
target_place
)
const
;
template
Tensor
Tensor
::
copy_to
<
bool
>(
const
PlaceType
&
target_place
)
const
;
template
PD_DLL_DECL
Tensor
Tensor
::
copy_to
<
float
>(
const
PlaceType
&
target_place
)
const
;
template
PD_DLL_DECL
Tensor
Tensor
::
copy_to
<
double
>(
const
PlaceType
&
target_place
)
const
;
template
PD_DLL_DECL
Tensor
Tensor
::
copy_to
<
int64_t
>(
const
PlaceType
&
target_place
)
const
;
template
PD_DLL_DECL
Tensor
Tensor
::
copy_to
<
int32_t
>(
const
PlaceType
&
target_place
)
const
;
template
PD_DLL_DECL
Tensor
Tensor
::
copy_to
<
uint8_t
>(
const
PlaceType
&
target_place
)
const
;
template
PD_DLL_DECL
Tensor
Tensor
::
copy_to
<
int8_t
>(
const
PlaceType
&
target_place
)
const
;
template
PD_DLL_DECL
Tensor
Tensor
::
copy_to
<
int16_t
>(
const
PlaceType
&
target_place
)
const
;
template
PD_DLL_DECL
Tensor
Tensor
::
copy_to
<
bool
>(
const
PlaceType
&
target_place
)
const
;
template
float
*
Tensor
::
data
<
float
>()
const
;
template
double
*
Tensor
::
data
<
double
>()
const
;
template
int64_t
*
Tensor
::
data
<
int64_t
>()
const
;
template
int32_t
*
Tensor
::
data
<
int32_t
>()
const
;
template
uint8_t
*
Tensor
::
data
<
uint8_t
>()
const
;
template
int8_t
*
Tensor
::
data
<
int8_t
>()
const
;
template
paddle
::
platform
::
float16
*
Tensor
::
data
<
paddle
::
platform
::
float16
>()
const
;
template
paddle
::
platform
::
bfloat16
*
Tensor
::
data
<
paddle
::
platform
::
bfloat16
>()
const
;
template
paddle
::
platform
::
complex128
*
template
PD_DLL_DECL
float
*
Tensor
::
data
<
float
>()
const
;
template
PD_DLL_DECL
double
*
Tensor
::
data
<
double
>()
const
;
template
PD_DLL_DECL
int64_t
*
Tensor
::
data
<
int64_t
>()
const
;
template
PD_DLL_DECL
int32_t
*
Tensor
::
data
<
int32_t
>()
const
;
template
PD_DLL_DECL
uint8_t
*
Tensor
::
data
<
uint8_t
>()
const
;
template
PD_DLL_DECL
int8_t
*
Tensor
::
data
<
int8_t
>()
const
;
template
PD_DLL_DECL
paddle
::
platform
::
float16
*
Tensor
::
data
<
paddle
::
platform
::
float16
>()
const
;
template
PD_DLL_DECL
paddle
::
platform
::
bfloat16
*
Tensor
::
data
<
paddle
::
platform
::
bfloat16
>()
const
;
template
PD_DLL_DECL
paddle
::
platform
::
complex128
*
Tensor
::
data
<
paddle
::
platform
::
complex128
>()
const
;
template
paddle
::
platform
::
complex64
*
template
PD_DLL_DECL
paddle
::
platform
::
complex64
*
Tensor
::
data
<
paddle
::
platform
::
complex64
>()
const
;
template
int16_t
*
Tensor
::
data
<
int16_t
>()
const
;
template
bool
*
Tensor
::
data
<
bool
>()
const
;
template
PD_DLL_DECL
int16_t
*
Tensor
::
data
<
int16_t
>()
const
;
template
PD_DLL_DECL
bool
*
Tensor
::
data
<
bool
>()
const
;
template
float
*
Tensor
::
mutable_data
<
float
>();
template
double
*
Tensor
::
mutable_data
<
double
>();
template
int64_t
*
Tensor
::
mutable_data
<
int64_t
>();
template
int32_t
*
Tensor
::
mutable_data
<
int32_t
>();
template
uint8_t
*
Tensor
::
mutable_data
<
uint8_t
>();
template
int8_t
*
Tensor
::
mutable_data
<
int8_t
>();
template
paddle
::
platform
::
float16
*
template
PD_DLL_DECL
float
*
Tensor
::
mutable_data
<
float
>();
template
PD_DLL_DECL
double
*
Tensor
::
mutable_data
<
double
>();
template
PD_DLL_DECL
int64_t
*
Tensor
::
mutable_data
<
int64_t
>();
template
PD_DLL_DECL
int32_t
*
Tensor
::
mutable_data
<
int32_t
>();
template
PD_DLL_DECL
uint8_t
*
Tensor
::
mutable_data
<
uint8_t
>();
template
PD_DLL_DECL
int8_t
*
Tensor
::
mutable_data
<
int8_t
>();
template
PD_DLL_DECL
paddle
::
platform
::
float16
*
Tensor
::
mutable_data
<
paddle
::
platform
::
float16
>();
template
paddle
::
platform
::
bfloat16
*
template
PD_DLL_DECL
paddle
::
platform
::
bfloat16
*
Tensor
::
mutable_data
<
paddle
::
platform
::
bfloat16
>();
template
paddle
::
platform
::
complex128
*
template
PD_DLL_DECL
paddle
::
platform
::
complex128
*
Tensor
::
mutable_data
<
paddle
::
platform
::
complex128
>();
template
paddle
::
platform
::
complex64
*
template
PD_DLL_DECL
paddle
::
platform
::
complex64
*
Tensor
::
mutable_data
<
paddle
::
platform
::
complex64
>();
template
int16_t
*
Tensor
::
mutable_data
<
int16_t
>();
template
bool
*
Tensor
::
mutable_data
<
bool
>();
template
PD_DLL_DECL
int16_t
*
Tensor
::
mutable_data
<
int16_t
>();
template
PD_DLL_DECL
bool
*
Tensor
::
mutable_data
<
bool
>();
template
float
*
Tensor
::
mutable_data
<
float
>(
const
PlaceType
&
place
);
template
double
*
Tensor
::
mutable_data
<
double
>(
const
PlaceType
&
place
);
template
int64_t
*
Tensor
::
mutable_data
<
int64_t
>(
const
PlaceType
&
place
);
template
int32_t
*
Tensor
::
mutable_data
<
int32_t
>(
const
PlaceType
&
place
);
template
uint8_t
*
Tensor
::
mutable_data
<
uint8_t
>(
const
PlaceType
&
place
);
template
int8_t
*
Tensor
::
mutable_data
<
int8_t
>(
const
PlaceType
&
place
);
template
paddle
::
platform
::
float16
*
template
PD_DLL_DECL
float
*
Tensor
::
mutable_data
<
float
>(
const
PlaceType
&
place
);
template
PD_DLL_DECL
double
*
Tensor
::
mutable_data
<
double
>(
const
PlaceType
&
place
);
template
PD_DLL_DECL
int64_t
*
Tensor
::
mutable_data
<
int64_t
>(
const
PlaceType
&
place
);
template
PD_DLL_DECL
int32_t
*
Tensor
::
mutable_data
<
int32_t
>(
const
PlaceType
&
place
);
template
PD_DLL_DECL
uint8_t
*
Tensor
::
mutable_data
<
uint8_t
>(
const
PlaceType
&
place
);
template
PD_DLL_DECL
int8_t
*
Tensor
::
mutable_data
<
int8_t
>(
const
PlaceType
&
place
);
template
PD_DLL_DECL
paddle
::
platform
::
float16
*
Tensor
::
mutable_data
<
paddle
::
platform
::
float16
>(
const
PlaceType
&
place
);
template
paddle
::
platform
::
bfloat16
*
template
PD_DLL_DECL
paddle
::
platform
::
bfloat16
*
Tensor
::
mutable_data
<
paddle
::
platform
::
bfloat16
>(
const
PlaceType
&
place
);
template
paddle
::
platform
::
complex128
*
template
PD_DLL_DECL
paddle
::
platform
::
complex128
*
Tensor
::
mutable_data
<
paddle
::
platform
::
complex128
>(
const
PlaceType
&
place
);
template
paddle
::
platform
::
complex64
*
template
PD_DLL_DECL
paddle
::
platform
::
complex64
*
Tensor
::
mutable_data
<
paddle
::
platform
::
complex64
>(
const
PlaceType
&
place
);
template
int16_t
*
Tensor
::
mutable_data
<
int16_t
>(
const
PlaceType
&
place
);
template
bool
*
Tensor
::
mutable_data
<
bool
>(
const
PlaceType
&
place
);
template
PD_DLL_DECL
int16_t
*
Tensor
::
mutable_data
<
int16_t
>(
const
PlaceType
&
place
);
template
PD_DLL_DECL
bool
*
Tensor
::
mutable_data
<
bool
>(
const
PlaceType
&
place
);
std
::
vector
<
int
>
Tensor
::
shape
()
const
{
GET_CASTED_TENSOR
...
...
paddle/fluid/framework/CMakeLists.txt
浏览文件 @
d3e60959
...
...
@@ -321,9 +321,9 @@ message(STATUS "branch: ${PADDLE_BRANCH}")
configure_file
(
commit.h.in commit.h
)
cc_library
(
custom_tensor SRCS ../extension/src/tensor.cc DEPS lod_tensor
)
cc_library
(
custom_tensor SRCS ../extension/src/tensor.cc DEPS lod_tensor
memory enforce
)
cc_library
(
op_meta_info SRCS ../extension/src/op_meta_info.cc DEPS custom_tensor
)
cc_library
(
custom_operator SRCS custom_operator.cc DEPS
operator op_registry device_context dynamic_load
er custom_tensor op_meta_info
)
cc_library
(
custom_operator SRCS custom_operator.cc DEPS
tensor attribute framework_proto op_registry operator dynamic_loader string_help
er custom_tensor op_meta_info
)
cc_test
(
custom_tensor_test SRCS custom_tensor_test.cc DEPS custom_tensor glog
)
set
(
FLUID_FRAMEWORK_MODULES proto_desc memory lod_tensor executor data_feed_proto layer dynamic_loader custom_operator
)
...
...
@@ -346,9 +346,12 @@ if (LINUX)
endif
()
if
(
WIN32
)
set
(
FLUID_FRAMEWORK_IMPORT_LIB
${
PADDLE_BINARY_DIR
}
/paddle/fluid/framework/
${
CMAKE_BUILD_TYPE
}
/paddle_framework.lib
CACHE INTERNAL
"Fluid framework lib"
)
set
(
FLUID_FRAMEWORK_SHARED_LIB
${
PADDLE_BINARY_DIR
}
/paddle/fluid/framework/
lib
paddle_framework.dll
CACHE INTERNAL
"Fluid framework
lib
"
)
${
PADDLE_BINARY_DIR
}
/paddle/fluid/framework/
${
CMAKE_BUILD_TYPE
}
/
paddle_framework.dll
CACHE INTERNAL
"Fluid framework
dll
"
)
endif
()
if
(
APPLE
)
...
...
@@ -359,3 +362,37 @@ endif()
if
(
WITH_TESTING
)
set_tests_properties
(
selected_rows_test PROPERTIES TIMEOUT 120
)
endif
()
# New custom op extension mechanism related
# if not deps `layer`, will cause: undefined symbol: _ZN6paddle10imperative7VarBase9name_set_
set
(
PADDLE_CUSTOM_OP_MODULES custom_tensor op_meta_info custom_operator layer
)
cc_library
(
paddle_custom_op_shared
SHARED SRCS custom_operator.cc ../extension/src/tensor.cc ../extension/src/op_meta_info.cc
${
CMAKE_SOURCE_DIR
}
/paddle/fluid/imperative/layer.cc
DEPS
${
PADDLE_CUSTOM_OP_MODULES
}
)
get_property
(
os_dependency_modules GLOBAL PROPERTY OS_DEPENDENCY_MODULES
)
set_target_properties
(
paddle_custom_op_shared PROPERTIES OUTPUT_NAME paddle_custom_op
)
target_link_libraries
(
paddle_custom_op_shared
${
os_dependency_modules
}
)
if
(
LINUX
)
set
(
PADDLE_CUSTOM_OP_SHARED_LIB
${
PADDLE_BINARY_DIR
}
/paddle/fluid/framework/libpaddle_custom_op.so
CACHE INTERNAL
"Paddle custom op lib"
)
endif
()
if
(
WIN32
)
set
(
PADDLE_CUSTOM_OP_SHARED_LIB
${
PADDLE_BINARY_DIR
}
/paddle/fluid/framework/
${
CMAKE_BUILD_TYPE
}
/paddle_custom_op.lib
CACHE INTERNAL
"Paddle custom op lib"
)
set
(
PADDLE_CUSTOM_OP_SHARED_LIB
${
PADDLE_BINARY_DIR
}
/paddle/fluid/framework/
${
CMAKE_BUILD_TYPE
}
/paddle_custom_op.dll
CACHE INTERNAL
"Paddle custom op dll"
)
endif
()
if
(
APPLE
)
set
(
PADDLE_CUSTOM_OP_SHARED_LIB
${
PADDLE_BINARY_DIR
}
/paddle/fluid/framework/paddle_custom_op.dylib
CACHE INTERNAL
"Paddle custom op lib"
)
endif
()
paddle/fluid/framework/custom_operator.cc
浏览文件 @
d3e60959
...
...
@@ -73,6 +73,24 @@ inline bool IsMemberOf(const std::vector<std::string>& vec,
return
std
::
find
(
vec
.
cbegin
(),
vec
.
cend
(),
name
)
!=
vec
.
cend
();
}
std
::
vector
<
std
::
string
>
ParseAttrStr
(
const
std
::
string
&
attr
)
{
auto
split_pos
=
attr
.
find_first_of
(
":"
);
PADDLE_ENFORCE_NE
(
split_pos
,
std
::
string
::
npos
,
platform
::
errors
::
InvalidArgument
(
"Invalid attribute string format. Attribute string "
"format is `<name>:<type>`."
));
std
::
vector
<
std
::
string
>
rlt
;
// 1. name
rlt
.
emplace_back
(
string
::
trim_spaces
(
attr
.
substr
(
0
,
split_pos
)));
// 2. type
rlt
.
emplace_back
(
string
::
trim_spaces
(
attr
.
substr
(
split_pos
+
1
)));
VLOG
(
1
)
<<
"attr name: "
<<
rlt
[
0
]
<<
", attr type str: "
<<
rlt
[
1
];
return
rlt
;
}
}
// namespace detail
////////////////// Kernel Define ////////////////////
...
...
@@ -81,7 +99,8 @@ inline bool IsMemberOf(const std::vector<std::string>& vec,
static
void
RunKernelFunc
(
const
framework
::
ExecutionContext
&
ctx
,
const
paddle
::
KernelFunc
&
func
,
const
std
::
vector
<
std
::
string
>&
inputs
,
const
std
::
vector
<
std
::
string
>&
outputs
)
{
const
std
::
vector
<
std
::
string
>&
outputs
,
const
std
::
vector
<
std
::
string
>&
attrs
)
{
VLOG
(
1
)
<<
"Custom Operator: Start run KernelFunc."
;
std
::
vector
<
paddle
::
Tensor
>
custom_ins
;
for
(
auto
&
in_name
:
inputs
)
{
...
...
@@ -98,10 +117,43 @@ static void RunKernelFunc(const framework::ExecutionContext& ctx,
custom_ins
.
emplace_back
(
custom_in
);
}
std
::
vector
<
boost
::
any
>
attrs
;
std
::
vector
<
boost
::
any
>
custom_attrs
;
for
(
auto
&
attr_str
:
attrs
)
{
auto
attr_name_and_type
=
detail
::
ParseAttrStr
(
attr_str
);
auto
attr_name
=
attr_name_and_type
[
0
];
auto
attr_type_str
=
attr_name_and_type
[
1
];
if
(
attr_type_str
==
"bool"
)
{
custom_attrs
.
emplace_back
(
ctx
.
Attr
<
bool
>
(
attr_name
));
}
else
if
(
attr_type_str
==
"int"
)
{
custom_attrs
.
emplace_back
(
ctx
.
Attr
<
int
>
(
attr_name
));
}
else
if
(
attr_type_str
==
"float"
)
{
custom_attrs
.
emplace_back
(
ctx
.
Attr
<
float
>
(
attr_name
));
}
else
if
(
attr_type_str
==
"int64_t"
)
{
custom_attrs
.
emplace_back
(
ctx
.
Attr
<
int64_t
>
(
attr_name
));
}
else
if
(
attr_type_str
==
"std::string"
)
{
custom_attrs
.
emplace_back
(
ctx
.
Attr
<
std
::
string
>
(
attr_name
));
}
else
if
(
attr_type_str
==
"std::vector<int>"
)
{
custom_attrs
.
emplace_back
(
ctx
.
Attr
<
std
::
vector
<
int
>>
(
attr_name
));
}
else
if
(
attr_type_str
==
"std::vector<float>"
)
{
custom_attrs
.
emplace_back
(
ctx
.
Attr
<
std
::
vector
<
float
>>
(
attr_name
));
}
else
if
(
attr_type_str
==
"std::vector<int64_t>"
)
{
custom_attrs
.
emplace_back
(
ctx
.
Attr
<
std
::
vector
<
int64_t
>>
(
attr_name
));
}
else
if
(
attr_type_str
==
"std::vector<std::string>"
)
{
custom_attrs
.
emplace_back
(
ctx
.
Attr
<
std
::
vector
<
std
::
string
>>
(
attr_name
));
}
else
{
PADDLE_THROW
(
platform
::
errors
::
Unimplemented
(
"Unsupported `%s` type value as custom attribute now. "
"Supported data types include `bool`, `int`, `float`, "
"`int64_t`, `std::string`, `std::vector<int>`, "
"`std::vector<float>`, `std::vector<int64_t>, "
"`std::vector<std::string>`, Please check whether "
"the attribute data type and data type string are matched."
,
attr_type_str
));
}
}
VLOG
(
1
)
<<
"Run ComputeFunc."
;
auto
outs
=
func
(
custom_ins
,
attrs
);
auto
outs
=
func
(
custom_ins
,
custom_
attrs
);
VLOG
(
1
)
<<
"Custom Operator: Share outputs into ExecutionContext."
;
for
(
size_t
i
=
0
;
i
<
outputs
.
size
();
++
i
)
{
...
...
@@ -164,7 +216,51 @@ class CustomOpMaker : public OpProtoAndCheckerMaker {
for
(
auto
&
out_name
:
outputs_
)
{
AddOutput
(
out_name
,
"The output "
+
out_name
+
"of Custom Operator."
);
}
// TODO(chenweihang): support attrs in later PR
for
(
auto
&
attr
:
attrs_
)
{
auto
attr_name_and_type
=
detail
::
ParseAttrStr
(
attr
);
auto
attr_name
=
attr_name_and_type
[
0
];
auto
attr_type_str
=
attr_name_and_type
[
1
];
if
(
attr_type_str
==
"bool"
)
{
AddAttr
<
bool
>
(
attr_name
,
"custom operator bool attribute."
)
.
SetDefault
(
false
);
}
else
if
(
attr_type_str
==
"int"
)
{
AddAttr
<
int
>
(
attr_name
,
"custom operator int attribute."
).
SetDefault
(
1
);
}
else
if
(
attr_type_str
==
"float"
)
{
AddAttr
<
float
>
(
attr_name
,
"custom operator float attribute."
)
.
SetDefault
(
1.0
f
);
}
else
if
(
attr_type_str
==
"int64_t"
)
{
AddAttr
<
int64_t
>
(
attr_name
,
"custom operator int64_t attribute."
)
.
SetDefault
(
1
);
}
else
if
(
attr_type_str
==
"std::string"
)
{
AddAttr
<
std
::
string
>
(
attr_name
,
"custom operator int attribute."
)
.
SetDefault
(
""
);
}
else
if
(
attr_type_str
==
"std::vector<int>"
)
{
AddAttr
<
std
::
vector
<
int
>>
(
attr_name
,
"custom operator std::vector<int> attribute."
)
.
SetDefault
({});
}
else
if
(
attr_type_str
==
"std::vector<float>"
)
{
AddAttr
<
std
::
vector
<
float
>>
(
attr_name
,
"custom operator std::vector<float> attribute."
)
.
SetDefault
({});
}
else
if
(
attr_type_str
==
"std::vector<int64_t>"
)
{
AddAttr
<
std
::
vector
<
int64_t
>>
(
attr_name
,
"custom operator std::vector<int64_t> attribute."
)
.
SetDefault
({});
}
else
if
(
attr_type_str
==
"std::vector<std::string>"
)
{
AddAttr
<
std
::
vector
<
std
::
string
>>
(
attr_name
,
"custom operator std::vector<std::string> attribute."
)
.
SetDefault
({});
}
else
{
PADDLE_THROW
(
platform
::
errors
::
Unimplemented
(
"Unsupported `%s` type value as custom attribute now. "
"Supported data types include `bool`, `int`, `float`, "
"`int64_t`, `std::string`, `std::vector<int>`, "
"`std::vector<float>`, `std::vector<int64_t>, "
"`std::vector<std::string>`, Please check whether "
"the attribute data type and data type string are matched."
,
attr_type_str
));
}
}
AddComment
(
R"DOC(
Custom Operator.
...
...
@@ -227,7 +323,7 @@ class CustomGradOpMaker<OpDesc> : public SingleGradOpMaker<OpDesc> {
VLOG
(
1
)
<<
"Custom Operator: GradOpDescMaker - output: "
<<
out_name
;
grad_op
->
SetOutput
(
out_name
,
this
->
InputGrad
(
detail
::
NoGrad
(
out_name
)));
}
// TODO(chenweihang): support attrs in later PR
grad_op
->
SetAttrMap
(
this
->
Attrs
());
}
private:
...
...
@@ -287,7 +383,7 @@ class CustomGradOpMaker<imperative::OpBase>
VLOG
(
1
)
<<
"Custom Operator: GradOpBaseMaker - output: "
<<
out_name
;
grad_op
->
SetOutput
(
out_name
,
this
->
InputGrad
(
detail
::
NoGrad
(
out_name
)));
}
// TODO(chenweihang): support attrs in later PR
grad_op
->
SetAttrMap
(
this
->
Attrs
());
}
private:
...
...
@@ -303,21 +399,24 @@ void RegisterOperatorKernelWithPlace(const std::string& name,
const
proto
::
VarType
::
Type
type
,
const
PlaceType
&
place
,
const
std
::
vector
<
std
::
string
>&
inputs
,
const
std
::
vector
<
std
::
string
>&
outputs
)
{
const
std
::
vector
<
std
::
string
>&
outputs
,
const
std
::
vector
<
std
::
string
>&
attrs
)
{
OpKernelType
key
(
type
,
CustomTensorUtils
::
ConvertEnumPlaceToInnerPlace
(
place
));
VLOG
(
1
)
<<
"Custom Operator: op kernel key: "
<<
key
;
OperatorWithKernel
::
AllOpKernels
()[
name
][
key
]
=
[
kernel_func
,
inputs
,
outputs
](
const
framework
::
ExecutionContext
&
ctx
)
{
[
kernel_func
,
inputs
,
outputs
,
attrs
](
const
framework
::
ExecutionContext
&
ctx
)
{
VLOG
(
1
)
<<
"Custom Operator: run custom kernel func in lambda."
;
RunKernelFunc
(
ctx
,
kernel_func
,
inputs
,
outputs
);
RunKernelFunc
(
ctx
,
kernel_func
,
inputs
,
outputs
,
attrs
);
};
}
void
RegisterOperatorKernel
(
const
std
::
string
&
name
,
const
paddle
::
KernelFunc
&
kernel_func
,
const
std
::
vector
<
std
::
string
>&
inputs
,
const
std
::
vector
<
std
::
string
>&
outputs
)
{
const
std
::
vector
<
std
::
string
>&
outputs
,
const
std
::
vector
<
std
::
string
>&
attrs
)
{
VLOG
(
1
)
<<
"Custom Operator: op name in kernel: "
<<
name
;
// NOTE [ Dummy Op Kernel Key ]
// TODO(chenweihang): Because execute engine need get device context based
...
...
@@ -325,9 +424,11 @@ void RegisterOperatorKernel(const std::string& name,
// device. But this is not entirely correct, if user only give a cpu kernel,
// but call api in gpu device, it will cause error.
RegisterOperatorKernelWithPlace
(
name
,
kernel_func
,
proto
::
VarType
::
RAW
,
PlaceType
::
kCPU
,
inputs
,
outputs
);
PlaceType
::
kCPU
,
inputs
,
outputs
,
attrs
);
#ifdef PADDLE_WITH_CUDA
RegisterOperatorKernelWithPlace
(
name
,
kernel_func
,
proto
::
VarType
::
RAW
,
PlaceType
::
kGPU
,
inputs
,
outputs
);
PlaceType
::
kGPU
,
inputs
,
outputs
,
attrs
);
#endif
}
void
RegisterOperatorWithMetaInfo
(
...
...
@@ -350,6 +451,8 @@ void RegisterOperatorWithMetaInfo(
<<
string
::
join_strings
(
op_inputs
,
','
);
VLOG
(
1
)
<<
"Custom Operator: forward, op outputs: "
<<
string
::
join_strings
(
op_outputs
,
','
);
VLOG
(
1
)
<<
"Custom Operator: forward, op attrs: "
<<
string
::
join_strings
(
op_attrs
,
','
);
// Op
info
.
creator_
=
[](
const
std
::
string
&
op_name
,
const
VariableNameMap
&
inputs
,
...
...
@@ -426,7 +529,7 @@ void RegisterOperatorWithMetaInfo(
};
// Kernel func
RegisterOperatorKernel
(
op_name
,
kernel_fn
,
op_inputs
,
op_outputs
);
RegisterOperatorKernel
(
op_name
,
kernel_fn
,
op_inputs
,
op_outputs
,
op_attrs
);
// If grad op or double grad op exists
std
::
string
cur_op_name
=
op_name
;
...
...
@@ -436,6 +539,7 @@ void RegisterOperatorWithMetaInfo(
auto
&
grad_op_name
=
OpMetaInfoHelper
::
GetOpName
(
cur_grad_op
);
auto
&
grad_op_inputs
=
OpMetaInfoHelper
::
GetInputs
(
cur_grad_op
);
auto
&
grad_op_outputs
=
OpMetaInfoHelper
::
GetOutputs
(
cur_grad_op
);
auto
&
grad_op_attrs
=
OpMetaInfoHelper
::
GetAttrs
(
cur_grad_op
);
auto
&
grad_kernel_fn
=
OpMetaInfoHelper
::
GetKernelFn
(
cur_grad_op
);
VLOG
(
1
)
<<
"Custom Operator: backward, op name: "
<<
grad_op_name
;
...
...
@@ -489,7 +593,7 @@ void RegisterOperatorWithMetaInfo(
// Kernel func
RegisterOperatorKernel
(
grad_op_name
,
grad_kernel_fn
,
grad_op_inputs
,
grad_op_outputs
);
grad_op_outputs
,
grad_op_attrs
);
// update current info
OpInfoMap
::
Instance
().
Insert
(
cur_op_name
,
info
);
...
...
paddle/fluid/platform/dynload/dynamic_loader.cc
浏览文件 @
d3e60959
...
...
@@ -378,9 +378,6 @@ void* GetOpDsoHandle(const std::string& dso_name) {
#if defined(__APPLE__) || defined(__OSX__)
PADDLE_THROW
(
platform
::
errors
::
Unimplemented
(
"Create custom cpp op outside framework do not support Apple."
));
#elif defined(_WIN32) && defined(PADDLE_WITH_CUDA)
PADDLE_THROW
(
platform
::
errors
::
Unimplemented
(
"Create custom cpp op outside framework do not support Windows."
));
#else
return
GetDsoHandleFromSearchPath
(
FLAGS_op_dir
,
dso_name
);
#endif
...
...
paddle/scripts/paddle_build.bat
浏览文件 @
d3e60959
...
...
@@ -114,23 +114,25 @@ rem ------pre install python requirement----------
where
python
where
pip
pip
install
wheel
--user
pip
install
-r
%work_dir%
\python\requirements.txt
--user
pip
install
-r
%work_dir%
\python\unittest_py\requirements.txt
--user
pip
install
-r
%work_dir%
\python\requirements.txt
--user
if
%ERRORLEVEL%
NEQ
0
(
echo
pip
install
requirements
.txt
failed
!
exit
/b
7
)
rem ------pre install clcache and init config----------
pip
install
clcache
--user
rem pip install clcache --user
pip
uninstall
-y
clcache
:: set USE_CLCACHE to enable clcache
set
USE_CLCACHE
=
1
rem
set USE_CLCACHE=1
:: In some scenarios, CLCACHE_HARDLINK can save one file copy.
set
CLCACHE_HARDLINK
=
1
rem
set CLCACHE_HARDLINK=1
:: If it takes more than 1000s to obtain the right to use the cache, an error will be reported
set
CLCACHE_OBJECT_CACHE_TIMEOUT_MS
=
1000000
rem
set CLCACHE_OBJECT_CACHE_TIMEOUT_MS=1000000
:: set maximum cache size to 20G
clcache
.exe
-M
21474836480
rem
clcache.exe -M 21474836480
rem ------show summary of current environment----------
python
%work_dir%
\tools\summary_env.py
...
...
@@ -194,11 +196,28 @@ set start=%start:~4,10%
@ECHO
ON
if
not
defined
CUDA_TOOLKIT_ROOT_DIR
set
CUDA_TOOLKIT_ROOT_DIR
=
C
:\Program
Files
\NVIDIA
GPU
Computing
Toolkit
\CUDA\v10.0
set
PATH
=
%CUDA_TOOLKIT_ROOT_DIR%
\bin
;
%CUDA_TOOLKIT_ROOT_DIR%
\libnvvp
;
%PATH%
set
CUDA_PATH
=
%CUDA_TOOLKIT_ROOT_DIR%
set
PATH
=
%TENSORRT
_ROOT:/
=
\
%
\lib
;
%CUDA_TOOLKIT_ROOT_DIR%
\bin
;
%CUDA_TOOLKIT_ROOT_DIR%
\libnvvp
;
%PATH%
rem ------set third_party cache dir------
:
clear
third
party
cache
every
once
in
a
while
for
/F
%%
#
in
(
'wmic os get localdatetime
^|
findstr 20'
)
do
set
datetime
=
%%
#
set
day_now
=
%datetime
:
~
6
,
2
%
set
day_before
=-
1
set
/p
day_before
=<
%cache_dir%
\day.txt
if
%day_now%
NEQ
%day_before%
(
echo
%day_now%
>
%cache_dir%
\day.txt
type
%cache_dir%
\day.txt
if
%day_now%
EQU
25
(
rmdir
%cache_dir%
\third_party_GPU/
/s/q
rmdir
%cache_dir%
\third_party/
/s/q
)
if
%day_now%
EQU
10
(
rmdir
%cache_dir%
\third_party_GPU/
/s/q
rmdir
%cache_dir%
\third_party/
/s/q
)
)
if
"
%WITH_TPCACHE%
"
==
"OFF"
(
set
THIRD_PARTY_PATH
=
%work
_dir:\
=
/
%
/build/third
_party
goto
:cmake
_impl
...
...
@@ -263,6 +282,9 @@ echo Build third_party successfully!
set
build_times
=
1
:build
_paddle
:: reset clcache zero stats for collect PR's actual hit rate
rem clcache.exe -z
echo
Build
Paddle
the
%build_times%
time
:
if
"
%WITH_CLCACHE%
"
==
"OFF"
(
msbuild
/m
:
%PARALLEL_PROJECT_COUNT%
/p
:Configuration
=
Release
/verbosity
:minimal
paddle
.sln
...
...
@@ -281,6 +303,11 @@ if %ERRORLEVEL% NEQ 0 (
)
echo
Build
Paddle
successfully
!
echo
0
>
%cache_dir%
\error_code.txt
type
%cache_dir%
\error_code.txt
:: ci will collect clcache hit rate
rem goto :collect_clcache_hits
goto
:eof
...
...
@@ -319,13 +346,14 @@ set /p PADDLE_WHL_FILE_WIN=< whl_file.txt
@ECHO
ON
pip
uninstall
-y
paddlepaddle
pip
uninstall
-y
paddlepaddle
-gpu
pip
install
-U
%PADDLE_WHL_FILE_WIN%
--user
pip
install
%PADDLE_WHL_FILE_WIN%
--user
if
%ERRORLEVEL%
NEQ
0
(
call
paddle_winci
\Scripts\deactivate.bat
2
>
NUL
echo
pip
install
whl
package
failed
!
exit
/b
1
)
set
CUDA_VISIBLE_DEVICES
=
0
python
%work_dir%
\paddle\scripts\installation_validate.py
goto
:eof
...
...
@@ -383,7 +411,7 @@ if "%WITH_GPU%"=="ON" (
:parallel
_test_base_gpu
echo
========================================
echo
Running
GPU
unit
tests
...
echo
Running
GPU
unit
tests
in
parallel
way
...
echo
========================================
setlocal
enabledelayedexpansion
...
...
@@ -451,6 +479,7 @@ goto:eof
echo
========================================
echo
Running
CPU
unit
tests
in
parallel
way
...
echo
========================================
ctest
.exe
-E
"(
%disable_ut_quickly%
)"
-LE
%nightly_label%
--output-on-failure -C
Release
-j
8
--repeat
until
-pass
:4
after
-timeout
:4
goto
:eof
...
...
@@ -622,6 +651,7 @@ taskkill /f /im vctip.exe 2>NUL
taskkill
/f /im
cvtres
.exe
2
>
NUL
taskkill
/f /im
rc
.exe
2
>
NUL
wmic
process
where
name
=
"op_function_generator.exe"
call
terminate
2
>
NUL
wmic
process
where
name
=
"python.exe"
call
terminate
2
>
NUL
taskkill
/f /im
python
.exe
2
>
NUL
echo
0
>
%cache_dir%
\error_code.txt
type
%cache_dir%
\error_code.txt
...
...
python/paddle/fluid/tests/CMakeLists.txt
浏览文件 @
d3e60959
...
...
@@ -9,7 +9,9 @@ endforeach()
add_subdirectory
(
unittests
)
add_subdirectory
(
book
)
if
(
NOT APPLE AND NOT WIN32
)
# TODO: support New Custom OP on Mac
if
(
NOT APPLE
)
add_subdirectory
(
custom_op
)
endif
()
set_tests_properties
(
test_beam_search_decoder PROPERTIES TIMEOUT 120
)
python/paddle/fluid/tests/custom_op/CMakeLists.txt
浏览文件 @
d3e60959
if
(
WITH_GPU
)
# New custom OP can support Windows/Linux now
if
(
WITH_GPU
)
# 'test_custom_relu_op_setup/jit' compile .cc and .cu file
py_test
(
test_custom_relu_op_setup SRCS test_custom_relu_op_setup.py
)
py_test
(
test_custom_relu_op_jit SRCS test_custom_relu_op_jit.py
)
# Compiling shared library will cost some time, but running process is very fast.
set_tests_properties
(
test_custom_relu_op_setup PROPERTIES TIMEOUT 250
)
set_tests_properties
(
test_custom_relu_op_jit PROPERTIES TIMEOUT 180
)
endif
()
py_test
(
test_sysconfig SRCS test_sysconfig.py
)
# 'test_dispatch' compile .cc file
py_test
(
test_dispatch_jit SRCS test_dispatch_jit.py
)
set_tests_properties
(
test_dispatch_jit PROPERTIES TIMEOUT 120
)
py_test
(
test_multi_out_jit SRCS test_multi_out_jit.py
)
set_tests_properties
(
test_multi_out_jit PROPERTIES TIMEOUT 120
)
py_test
(
test_custom_attrs_jit SRCS test_custom_attrs_jit.py
)
set_tests_properties
(
test_custom_attrs_jit PROPERTIES TIMEOUT 120
)
if
(
NOT LINUX
)
return
()
endif
()
# TODO(zhouwei): support test_check_abi and abi check on Windows
py_test
(
test_check_abi SRCS test_check_abi.py
)
# Old custom OP only support Linux, only run on Linux
py_test
(
test_custom_op SRCS test_custom_op.py
)
py_test
(
test_jit_load SRCS test_jit_load.py
)
py_test
(
test_setup_install SRCS test_setup_install.py
)
py_test
(
test_setup_build SRCS test_setup_build.py
)
set_tests_properties
(
test_jit_load PROPERTIES TIMEOUT 180
)
set_tests_properties
(
test_setup_install PROPERTIES TIMEOUT 180
)
set_tests_properties
(
test_setup_build PROPERTIES TIMEOUT 180
)
if
(
WITH_ROCM
)
hip_library
(
relu_op_shared SHARED SRCS relu_op.cc relu_op.cu DEPS paddle_framework_shared
)
elseif
(
WITH_GPU
)
nv_library
(
relu_op_shared SHARED SRCS relu_op.cc relu_op.cu DEPS paddle_framework_shared
)
else
()
cc_library
(
relu_op_shared SHARED SRCS relu_op.cc DEPS paddle_framework_shared
)
...
...
@@ -16,19 +59,3 @@ get_target_property(TARGET_LIBRARIES relu_op_shared LINK_LIBRARIES)
LIST
(
REMOVE_ITEM TARGET_LIBRARIES glog
)
LIST
(
REMOVE_ITEM TARGET_LIBRARIES gflags
)
set_property
(
TARGET relu_op_shared PROPERTY LINK_LIBRARIES
${
TARGET_LIBRARIES
}
)
file
(
GLOB TEST_OPS RELATIVE
"
${
CMAKE_CURRENT_SOURCE_DIR
}
"
"test_*.py"
)
string
(
REPLACE
".py"
""
TEST_OPS
"
${
TEST_OPS
}
"
)
foreach
(
src
${
TEST_OPS
}
)
py_test
(
${
src
}
SRCS
${
src
}
.py
)
endforeach
()
# Compiling .so will cost some time, but running process is very fast.
set_tests_properties
(
test_jit_load PROPERTIES TIMEOUT 180
)
set_tests_properties
(
test_setup_install PROPERTIES TIMEOUT 180
)
set_tests_properties
(
test_setup_build PROPERTIES TIMEOUT 180
)
set_tests_properties
(
test_dispatch PROPERTIES TIMEOUT 180
)
set_tests_properties
(
test_simple_custom_op_setup PROPERTIES TIMEOUT 250
)
set_tests_properties
(
test_simple_custom_op_jit PROPERTIES TIMEOUT 180
)
python/paddle/fluid/tests/custom_op/attr_test_op.cc
0 → 100644
浏览文件 @
d3e60959
// Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#include <cstdlib>
#include <iostream>
#include <vector>
#include "paddle/extension.h"
template
<
typename
data_t
>
void
assign_cpu_kernel
(
const
data_t
*
x_data
,
data_t
*
out_data
,
int64_t
x_numel
)
{
for
(
int
i
=
0
;
i
<
x_numel
;
++
i
)
{
out_data
[
i
]
=
x_data
[
i
];
}
}
std
::
vector
<
paddle
::
Tensor
>
AttrTestForward
(
const
paddle
::
Tensor
&
x
,
bool
bool_attr
,
int
int_attr
,
float
float_attr
,
int64_t
int64_attr
,
std
::
string
str_attr
,
std
::
vector
<
int
>
int_vec_attr
,
std
::
vector
<
float
>
float_vec_attr
,
std
::
vector
<
int64_t
>
int64_vec_attr
,
std
::
vector
<
std
::
string
>
str_vec_attr
)
{
auto
out
=
paddle
::
Tensor
(
paddle
::
PlaceType
::
kCPU
);
out
.
reshape
(
x
.
shape
());
PD_DISPATCH_FLOATING_TYPES
(
x
.
type
(),
"assign_cpu_kernel"
,
([
&
]
{
assign_cpu_kernel
<
data_t
>
(
x
.
data
<
data_t
>
(),
out
.
mutable_data
<
data_t
>
(),
x
.
size
());
}));
// Check attrs value
if
(
bool_attr
!=
true
)
{
throw
std
::
runtime_error
(
"bool_attr value error."
);
}
if
(
int_attr
!=
10
)
{
throw
std
::
runtime_error
(
"int_attr value error."
);
}
if
(
std
::
abs
(
float_attr
-
3.14
)
>
1e-6
)
{
throw
std
::
runtime_error
(
"float_attr value error."
);
}
if
(
int64_attr
!=
10000000000
)
{
throw
std
::
runtime_error
(
"int64_attr value error."
);
}
if
(
str_attr
!=
"StrAttr"
)
{
throw
std
::
runtime_error
(
"str_attr value error."
);
}
if
(
int_vec_attr
.
size
()
!=
3
)
{
throw
std
::
runtime_error
(
"int_vec_attr size error."
);
}
else
{
for
(
auto
&
value
:
int_vec_attr
)
{
if
(
value
!=
10
)
{
throw
std
::
runtime_error
(
"int_vec_attr value error."
);
}
}
}
if
(
float_vec_attr
.
size
()
!=
3
)
{
throw
std
::
runtime_error
(
"float_vec_attr size error."
);
}
else
{
for
(
auto
&
value
:
float_vec_attr
)
{
if
(
std
::
abs
(
value
-
3.14
)
>
1e-6
)
{
throw
std
::
runtime_error
(
"float_vec_attr value error."
);
}
}
}
if
(
int64_vec_attr
.
size
()
!=
3
)
{
throw
std
::
runtime_error
(
"int64_vec_attr size error."
);
}
else
{
for
(
auto
&
value
:
int64_vec_attr
)
{
if
(
value
!=
10000000000
)
{
throw
std
::
runtime_error
(
"int64_vec_attr value error."
);
}
}
}
if
(
str_vec_attr
.
size
()
!=
3
)
{
throw
std
::
runtime_error
(
"str_vec_attr size error."
);
}
else
{
for
(
auto
&
value
:
str_vec_attr
)
{
if
(
value
!=
"StrAttr"
)
{
throw
std
::
runtime_error
(
"str_vec_attr value error."
);
}
}
}
return
{
out
};
}
// The attrs of backward op must be the subset of attrs of forward op
std
::
vector
<
paddle
::
Tensor
>
AttrTestBackward
(
const
paddle
::
Tensor
&
grad_out
,
int
int_attr
,
std
::
vector
<
float
>
float_vec_attr
,
std
::
vector
<
std
::
string
>
str_vec_attr
)
{
auto
grad_x
=
paddle
::
Tensor
(
paddle
::
PlaceType
::
kCPU
);
grad_x
.
reshape
(
grad_out
.
shape
());
PD_DISPATCH_FLOATING_TYPES
(
grad_out
.
type
(),
"assign_cpu_kernel"
,
([
&
]
{
assign_cpu_kernel
<
data_t
>
(
grad_out
.
data
<
data_t
>
(),
grad_x
.
mutable_data
<
data_t
>
(),
grad_out
.
size
());
}));
if
(
int_attr
!=
10
)
{
throw
std
::
runtime_error
(
"int_attr value error."
);
}
if
(
float_vec_attr
.
size
()
!=
3
)
{
throw
std
::
runtime_error
(
"float_vec_attr size error."
);
}
else
{
for
(
auto
&
value
:
float_vec_attr
)
{
if
(
std
::
abs
(
value
-
3.14
)
>
1e-6
)
{
throw
std
::
runtime_error
(
"float_vec_attr value error."
);
}
}
}
if
(
str_vec_attr
.
size
()
!=
3
)
{
throw
std
::
runtime_error
(
"str_vec_attr size error."
);
}
else
{
for
(
auto
&
value
:
str_vec_attr
)
{
if
(
value
!=
"StrAttr"
)
{
throw
std
::
runtime_error
(
"str_vec_attr value error."
);
}
}
}
return
{
grad_x
};
}
std
::
vector
<
std
::
vector
<
int64_t
>>
InferShape
(
std
::
vector
<
int64_t
>
x_shape
)
{
return
{
x_shape
};
}
std
::
vector
<
paddle
::
DataType
>
InferDType
(
paddle
::
DataType
x_dtype
)
{
return
{
x_dtype
};
}
PD_BUILD_OP
(
"attr_test"
)
.
Inputs
({
"X"
})
.
Outputs
({
"Out"
})
.
Attrs
({
"bool_attr: bool"
,
"int_attr: int"
,
"float_attr: float"
,
"int64_attr: int64_t"
,
"str_attr: std::string"
,
"int_vec_attr: std::vector<int>"
,
"float_vec_attr: std::vector<float>"
,
"int64_vec_attr: std::vector<int64_t>"
,
"str_vec_attr: std::vector<std::string>"
})
.
SetKernelFn
(
PD_KERNEL
(
AttrTestForward
))
.
SetInferShapeFn
(
PD_INFER_SHAPE
(
InferShape
))
.
SetInferDtypeFn
(
PD_INFER_DTYPE
(
InferDType
))
.
SetBackwardOp
(
"attr_test_grad"
)
.
Inputs
({
paddle
::
Grad
(
"Out"
)})
.
Outputs
({
paddle
::
Grad
(
"X"
)})
.
Attrs
({
"int_attr: int"
,
"float_vec_attr: std::vector<float>"
,
"str_vec_attr: std::vector<std::string>"
})
.
SetKernelFn
(
PD_KERNEL
(
AttrTestBackward
));
python/paddle/fluid/tests/custom_op/
relu_op_simple
.cc
→
python/paddle/fluid/tests/custom_op/
custom_relu_op
.cc
浏览文件 @
d3e60959
...
...
@@ -17,13 +17,6 @@
#include "paddle/extension.h"
template
<
typename
data_t
>
void
fill_constant_cpu_kernel
(
data_t
*
out_data
,
int64_t
x_numel
,
data_t
value
)
{
for
(
int
i
=
0
;
i
<
x_numel
;
++
i
)
{
out_data
[
i
]
=
value
;
}
}
template
<
typename
data_t
>
void
relu_cpu_forward_kernel
(
const
data_t
*
x_data
,
data_t
*
out_data
,
...
...
@@ -53,21 +46,8 @@ std::vector<paddle::Tensor> relu_cpu_forward(const paddle::Tensor& x) {
relu_cpu_forward_kernel
<
data_t
>
(
x
.
data
<
data_t
>
(),
out
.
mutable_data
<
data_t
>
(
x
.
place
()),
x
.
size
());
}));
// fake multi output: Fake_float64 with float64 dtype
auto
fake_float64
=
paddle
::
Tensor
(
paddle
::
PlaceType
::
kCPU
);
fake_float64
.
reshape
(
x
.
shape
());
fill_constant_cpu_kernel
<
double
>
(
fake_float64
.
mutable_data
<
double
>
(
x
.
place
()),
x
.
size
(),
0.
);
// fake multi output: ZFake_int32 with int32 dtype
auto
zfake_int32
=
paddle
::
Tensor
(
paddle
::
PlaceType
::
kCPU
);
zfake_int32
.
reshape
(
x
.
shape
());
fill_constant_cpu_kernel
<
int32_t
>
(
zfake_int32
.
mutable_data
<
int32_t
>
(
x
.
place
()),
x
.
size
(),
1
);
return
{
out
,
fake_float64
,
zfake_int32
};
return
{
out
};
}
std
::
vector
<
paddle
::
Tensor
>
relu_cpu_backward
(
const
paddle
::
Tensor
&
x
,
...
...
@@ -117,16 +97,16 @@ std::vector<paddle::Tensor> ReluBackward(const paddle::Tensor& x,
}
std
::
vector
<
std
::
vector
<
int64_t
>>
ReluInferShape
(
std
::
vector
<
int64_t
>
x_shape
)
{
return
{
x_shape
,
x_shape
,
x_shape
};
return
{
x_shape
};
}
std
::
vector
<
paddle
::
DataType
>
ReluInferDType
(
paddle
::
DataType
x_dtype
)
{
return
{
x_dtype
,
paddle
::
DataType
::
FLOAT64
,
paddle
::
DataType
::
INT32
};
return
{
x_dtype
};
}
PD_BUILD_OP
(
"
relu2
"
)
PD_BUILD_OP
(
"
custom_relu
"
)
.
Inputs
({
"X"
})
.
Outputs
({
"Out"
,
"Fake_float64"
,
"ZFake_int32"
})
.
Outputs
({
"Out"
})
.
SetKernelFn
(
PD_KERNEL
(
ReluForward
))
.
SetInferShapeFn
(
PD_INFER_SHAPE
(
ReluInferShape
))
.
SetInferDtypeFn
(
PD_INFER_DTYPE
(
ReluInferDType
))
...
...
python/paddle/fluid/tests/custom_op/
relu_op_simple
.cu
→
python/paddle/fluid/tests/custom_op/
custom_relu_op
.cu
浏览文件 @
d3e60959
...
...
@@ -14,16 +14,6 @@
#include "paddle/extension.h"
template
<
typename
data_t
>
__global__
void
fill_constant_cuda_kernel
(
data_t
*
y
,
const
int
num
,
data_t
value
)
{
int
gid
=
blockIdx
.
x
*
blockDim
.
x
+
threadIdx
.
x
;
for
(
int
i
=
gid
;
i
<
num
;
i
+=
blockDim
.
x
*
gridDim
.
x
)
{
y
[
i
]
=
value
;
}
}
template
<
typename
data_t
>
__global__
void
relu_cuda_forward_kernel
(
const
data_t
*
x
,
data_t
*
y
,
...
...
@@ -57,18 +47,8 @@ std::vector<paddle::Tensor> relu_cuda_forward(const paddle::Tensor& x) {
relu_cuda_forward_kernel
<
data_t
><<<
grid
,
block
>>>
(
x
.
data
<
data_t
>
(),
out
.
mutable_data
<
data_t
>
(
x
.
place
()),
numel
);
}));
// fake multi output: Fake_1
auto
fake_float64
=
paddle
::
Tensor
(
paddle
::
PlaceType
::
kGPU
);
fake_float64
.
reshape
(
x
.
shape
());
fill_constant_cuda_kernel
<
double
><<<
grid
,
block
>>>
(
fake_float64
.
mutable_data
<
double
>
(
x
.
place
()),
numel
,
0.
);
// fake multi output: ZFake_1
auto
zfake_int32
=
paddle
::
Tensor
(
paddle
::
PlaceType
::
kGPU
);
zfake_int32
.
reshape
(
x
.
shape
());
fill_constant_cuda_kernel
<
int32_t
><<<
grid
,
block
>>>
(
zfake_int32
.
mutable_data
<
int32_t
>
(
x
.
place
()),
numel
,
1
);
return
{
out
,
fake_float64
,
zfake_int32
};
return
{
out
};
}
std
::
vector
<
paddle
::
Tensor
>
relu_cuda_backward
(
const
paddle
::
Tensor
&
x
,
...
...
python/paddle/fluid/tests/custom_op/
relu_op3_simple
.cc
→
python/paddle/fluid/tests/custom_op/
custom_relu_op_dup
.cc
浏览文件 @
d3e60959
...
...
@@ -29,11 +29,11 @@ std::vector<std::vector<int64_t>> ReluInferShape(std::vector<int64_t> x_shape);
std
::
vector
<
paddle
::
DataType
>
ReluInferDType
(
paddle
::
DataType
x_dtype
);
// Reuse codes in `
relu_op_simple
.cc/cu` to register another custom operator
// Reuse codes in `
custom_relu_op
.cc/cu` to register another custom operator
// to test jointly compile multi operators at same time.
PD_BUILD_OP
(
"
relu3
"
)
PD_BUILD_OP
(
"
custom_relu_dup
"
)
.
Inputs
({
"X"
})
.
Outputs
({
"Out"
,
"Fake_float64"
,
"ZFake_int32"
})
.
Outputs
({
"Out"
})
.
SetKernelFn
(
PD_KERNEL
(
ReluForward
))
.
SetInferShapeFn
(
PD_INFER_SHAPE
(
ReluInferShape
))
.
SetInferDtypeFn
(
PD_INFER_DTYPE
(
ReluInferDType
))
...
...
python/paddle/fluid/tests/custom_op/
setup_install_simple
.py
→
python/paddle/fluid/tests/custom_op/
custom_relu_setup
.py
浏览文件 @
d3e60959
...
...
@@ -17,11 +17,14 @@ import os
from
utils
import
paddle_includes
,
extra_compile_args
from
paddle.utils.cpp_extension
import
CUDAExtension
,
setup
# custom_relu_op_dup.cc is only used for multi ops test,
# not a new op, if you want to test only one op, remove this
# source file
setup
(
name
=
'
simple_setup_relu2
'
,
name
=
'
custom_relu_module_setup
'
,
ext_modules
=
CUDAExtension
(
# test for not specific name here.
sources
=
[
'
relu_op_simple.cc'
,
'relu_op_simple.cu'
,
'relu_op3_simple
.cc'
'
custom_relu_op.cc'
,
'custom_relu_op.cu'
,
'custom_relu_op_dup
.cc'
],
# test for multi ops
include_dirs
=
paddle_includes
,
extra_compile_args
=
extra_compile_args
))
python/paddle/fluid/tests/custom_op/multi_out_test_op.cc
0 → 100644
浏览文件 @
d3e60959
// Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#include <iostream>
#include <vector>
#include "paddle/extension.h"
template
<
typename
data_t
>
void
assign_cpu_kernel
(
const
data_t
*
x_data
,
data_t
*
out_data
,
int64_t
x_numel
)
{
for
(
int
i
=
0
;
i
<
x_numel
;
++
i
)
{
out_data
[
i
]
=
x_data
[
i
];
}
}
template
<
typename
data_t
>
void
fill_constant_cpu_kernel
(
data_t
*
out_data
,
int64_t
x_numel
,
data_t
value
)
{
for
(
int
i
=
0
;
i
<
x_numel
;
++
i
)
{
out_data
[
i
]
=
value
;
}
}
std
::
vector
<
paddle
::
Tensor
>
MultiOutCPU
(
const
paddle
::
Tensor
&
x
)
{
auto
out
=
paddle
::
Tensor
(
paddle
::
PlaceType
::
kCPU
);
out
.
reshape
(
x
.
shape
());
PD_DISPATCH_FLOATING_TYPES
(
x
.
type
(),
"assign_cpu_kernel"
,
([
&
]
{
assign_cpu_kernel
<
data_t
>
(
x
.
data
<
data_t
>
(),
out
.
mutable_data
<
data_t
>
(
x
.
place
()),
x
.
size
());
}));
// fake multi output: Fake_float64 with float64 dtype
auto
fake_float64
=
paddle
::
Tensor
(
paddle
::
PlaceType
::
kCPU
);
fake_float64
.
reshape
(
x
.
shape
());
fill_constant_cpu_kernel
<
double
>
(
fake_float64
.
mutable_data
<
double
>
(
x
.
place
()),
x
.
size
(),
0.
);
// fake multi output: ZFake_int32 with int32 dtype
auto
zfake_int32
=
paddle
::
Tensor
(
paddle
::
PlaceType
::
kCPU
);
zfake_int32
.
reshape
(
x
.
shape
());
fill_constant_cpu_kernel
<
int32_t
>
(
zfake_int32
.
mutable_data
<
int32_t
>
(
x
.
place
()),
x
.
size
(),
1
);
return
{
out
,
fake_float64
,
zfake_int32
};
}
std
::
vector
<
std
::
vector
<
int64_t
>>
InferShape
(
std
::
vector
<
int64_t
>
x_shape
)
{
return
{
x_shape
,
x_shape
,
x_shape
};
}
std
::
vector
<
paddle
::
DataType
>
InferDtype
(
paddle
::
DataType
x_dtype
)
{
return
{
x_dtype
,
paddle
::
DataType
::
FLOAT64
,
paddle
::
DataType
::
INT32
};
}
PD_BUILD_OP
(
"multi_out"
)
.
Inputs
({
"X"
})
.
Outputs
({
"Out"
,
"Fake_float64"
,
"ZFake_int32"
})
.
SetKernelFn
(
PD_KERNEL
(
MultiOutCPU
))
.
SetInferShapeFn
(
PD_INFER_SHAPE
(
InferShape
))
.
SetInferDtypeFn
(
PD_INFER_DTYPE
(
InferDtype
));
python/paddle/fluid/tests/custom_op/test_custom_attrs_jit.py
0 → 100644
浏览文件 @
d3e60959
# Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
import
os
import
unittest
import
numpy
as
np
import
paddle
from
paddle.utils.cpp_extension
import
load
,
get_build_directory
from
utils
import
paddle_includes
,
extra_compile_args
from
paddle.utils.cpp_extension.extension_utils
import
run_cmd
# Because Windows don't use docker, the shared lib already exists in the
# cache dir, it will not be compiled again unless the shared lib is removed.
file
=
'{}
\\
custom_attrs_jit
\\
custom_attrs_jit.pyd'
.
format
(
get_build_directory
(
))
if
os
.
name
==
'nt'
and
os
.
path
.
isfile
(
file
):
cmd
=
'del {}'
.
format
(
file
)
run_cmd
(
cmd
,
True
)
# Compile and load custom op Just-In-Time.
custom_attrs
=
load
(
name
=
'custom_attrs_jit'
,
sources
=
[
'attr_test_op.cc'
],
extra_include_paths
=
paddle_includes
,
# add for Coverage CI
extra_cxx_cflags
=
extra_compile_args
,
# add for Coverage CI
verbose
=
True
)
class
TestJitCustomAttrs
(
unittest
.
TestCase
):
def
test_attr_value
(
self
):
paddle
.
set_device
(
'cpu'
)
# prepare test value
bool_attr
=
True
int_attr
=
10
float_attr
=
3.14
int64_attr
=
10000000000
str_attr
=
"StrAttr"
int_vec_attr
=
[
10
,
10
,
10
]
float_vec_attr
=
[
3.14
,
3.14
,
3.14
]
int64_vec_attr
=
[
10000000000
,
10000000000
,
10000000000
]
str_vec_attr
=
[
"StrAttr"
,
"StrAttr"
,
"StrAttr"
]
x
=
paddle
.
ones
([
2
,
2
],
dtype
=
'float32'
)
x
.
stop_gradient
=
False
out
=
custom_attrs
.
attr_test
(
x
,
bool_attr
,
int_attr
,
float_attr
,
int64_attr
,
str_attr
,
int_vec_attr
,
float_vec_attr
,
int64_vec_attr
,
str_vec_attr
)
out
.
stop_gradient
=
False
out
.
backward
()
self
.
assertTrue
(
np
.
array_equal
(
x
.
numpy
(),
out
.
numpy
()))
if
__name__
==
'__main__'
:
unittest
.
main
()
python/paddle/fluid/tests/custom_op/test_custom_relu_op_jit.py
0 → 100644
浏览文件 @
d3e60959
# Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
import
os
import
subprocess
import
unittest
import
paddle
import
numpy
as
np
from
paddle.utils.cpp_extension
import
load
,
get_build_directory
from
paddle.utils.cpp_extension.extension_utils
import
run_cmd
from
utils
import
paddle_includes
,
extra_compile_args
from
test_custom_relu_op_setup
import
custom_relu_dynamic
,
custom_relu_static
# Because Windows don't use docker, the shared lib already exists in the
# cache dir, it will not be compiled again unless the shared lib is removed.
file
=
'{}
\\
custom_relu_module_jit
\\
custom_relu_module_jit.pyd'
.
format
(
get_build_directory
())
if
os
.
name
==
'nt'
and
os
.
path
.
isfile
(
file
):
cmd
=
'del {}'
.
format
(
file
)
run_cmd
(
cmd
,
True
)
# Compile and load custom op Just-In-Time.
# custom_relu_op_dup.cc is only used for multi ops test,
# not a new op, if you want to test only one op, remove this
# source file
custom_module
=
load
(
name
=
'custom_relu_module_jit'
,
sources
=
[
'custom_relu_op.cc'
,
'custom_relu_op.cu'
,
'custom_relu_op_dup.cc'
],
extra_include_paths
=
paddle_includes
,
# add for Coverage CI
extra_cxx_cflags
=
extra_compile_args
,
# add for Coverage CI
extra_cuda_cflags
=
extra_compile_args
,
# add for Coverage CI
verbose
=
True
)
class
TestJITLoad
(
unittest
.
TestCase
):
def
setUp
(
self
):
self
.
custom_ops
=
[
custom_module
.
custom_relu
,
custom_module
.
custom_relu_dup
]
self
.
dtypes
=
[
'float32'
,
'float64'
]
self
.
devices
=
[
'cpu'
,
'gpu'
]
def
test_static
(
self
):
for
device
in
self
.
devices
:
for
dtype
in
self
.
dtypes
:
x
=
np
.
random
.
uniform
(
-
1
,
1
,
[
4
,
8
]).
astype
(
dtype
)
for
custom_op
in
self
.
custom_ops
:
out
=
custom_relu_static
(
custom_op
,
device
,
dtype
,
x
)
pd_out
=
custom_relu_static
(
custom_op
,
device
,
dtype
,
x
,
False
)
self
.
assertTrue
(
np
.
array_equal
(
out
,
pd_out
),
"custom op out: {},
\n
paddle api out: {}"
.
format
(
out
,
pd_out
))
def
test_dynamic
(
self
):
for
device
in
self
.
devices
:
for
dtype
in
self
.
dtypes
:
x
=
np
.
random
.
uniform
(
-
1
,
1
,
[
4
,
8
]).
astype
(
dtype
)
for
custom_op
in
self
.
custom_ops
:
out
,
x_grad
=
custom_relu_dynamic
(
custom_op
,
device
,
dtype
,
x
)
pd_out
,
pd_x_grad
=
custom_relu_dynamic
(
custom_op
,
device
,
dtype
,
x
,
False
)
self
.
assertTrue
(
np
.
array_equal
(
out
,
pd_out
),
"custom op out: {},
\n
paddle api out: {}"
.
format
(
out
,
pd_out
))
self
.
assertTrue
(
np
.
array_equal
(
x_grad
,
pd_x_grad
),
"custom op x grad: {},
\n
paddle api x grad: {}"
.
format
(
x_grad
,
pd_x_grad
))
if
__name__
==
'__main__'
:
unittest
.
main
()
python/paddle/fluid/tests/custom_op/test_
simple_custom
_op_setup.py
→
python/paddle/fluid/tests/custom_op/test_
custom_relu
_op_setup.py
浏览文件 @
d3e60959
...
...
@@ -23,13 +23,13 @@ import numpy as np
from
paddle.utils.cpp_extension.extension_utils
import
run_cmd
def
relu2
_dynamic
(
func
,
device
,
dtype
,
np_x
,
use_func
=
True
):
def
custom_relu
_dynamic
(
func
,
device
,
dtype
,
np_x
,
use_func
=
True
):
paddle
.
set_device
(
device
)
t
=
paddle
.
to_tensor
(
np_x
)
t
.
stop_gradient
=
False
out
=
func
(
t
)
[
0
]
if
use_func
else
paddle
.
nn
.
functional
.
relu
(
t
)
out
=
func
(
t
)
if
use_func
else
paddle
.
nn
.
functional
.
relu
(
t
)
out
.
stop_gradient
=
False
out
.
backward
()
...
...
@@ -37,7 +37,12 @@ def relu2_dynamic(func, device, dtype, np_x, use_func=True):
return
out
.
numpy
(),
t
.
grad
def
relu2_static
(
func
,
device
,
dtype
,
np_x
,
use_func
=
True
):
def
custom_relu_static
(
func
,
device
,
dtype
,
np_x
,
use_func
=
True
,
test_infer
=
False
):
paddle
.
enable_static
()
paddle
.
set_device
(
device
)
...
...
@@ -45,8 +50,7 @@ def relu2_static(func, device, dtype, np_x, use_func=True):
with
static
.
program_guard
(
static
.
Program
()):
x
=
static
.
data
(
name
=
'X'
,
shape
=
[
None
,
8
],
dtype
=
dtype
)
x
.
stop_gradient
=
False
# out, fake_float64, fake_int32
out
=
func
(
x
)[
0
]
if
use_func
else
paddle
.
nn
.
functional
.
relu
(
x
)
out
=
func
(
x
)
if
use_func
else
paddle
.
nn
.
functional
.
relu
(
x
)
static
.
append_backward
(
out
)
exe
=
static
.
Executor
()
...
...
@@ -60,7 +64,7 @@ def relu2_static(func, device, dtype, np_x, use_func=True):
return
out_v
def
relu2
_static_pe
(
func
,
device
,
dtype
,
np_x
,
use_func
=
True
):
def
custom_relu
_static_pe
(
func
,
device
,
dtype
,
np_x
,
use_func
=
True
):
paddle
.
enable_static
()
paddle
.
set_device
(
device
)
...
...
@@ -69,7 +73,7 @@ def relu2_static_pe(func, device, dtype, np_x, use_func=True):
with
static
.
program_guard
(
static
.
Program
()):
x
=
static
.
data
(
name
=
'X'
,
shape
=
[
None
,
8
],
dtype
=
dtype
)
x
.
stop_gradient
=
False
out
=
func
(
x
)
[
0
]
if
use_func
else
paddle
.
nn
.
functional
.
relu
(
x
)
out
=
func
(
x
)
if
use_func
else
paddle
.
nn
.
functional
.
relu
(
x
)
static
.
append_backward
(
out
)
exe
=
static
.
Executor
()
...
...
@@ -87,11 +91,58 @@ def relu2_static_pe(func, device, dtype, np_x, use_func=True):
return
out_v
def
custom_relu_static_inference
(
func
,
device
,
np_data
,
np_label
,
path_prefix
):
paddle
.
set_device
(
device
)
with
static
.
scope_guard
(
static
.
Scope
()):
with
static
.
program_guard
(
static
.
Program
()):
# simple module
data
=
static
.
data
(
name
=
'data'
,
shape
=
[
None
,
1
,
28
,
28
],
dtype
=
'float32'
)
label
=
static
.
data
(
name
=
'label'
,
shape
=
[
None
,
1
],
dtype
=
'int64'
)
hidden
=
static
.
nn
.
fc
(
data
,
size
=
128
)
hidden
=
func
(
hidden
)
hidden
=
static
.
nn
.
fc
(
hidden
,
size
=
128
)
predict
=
static
.
nn
.
fc
(
hidden
,
size
=
10
,
activation
=
'softmax'
)
loss
=
paddle
.
nn
.
functional
.
cross_entropy
(
input
=
hidden
,
label
=
label
)
avg_loss
=
paddle
.
mean
(
loss
)
opt
=
paddle
.
optimizer
.
SGD
(
learning_rate
=
0.1
)
opt
.
minimize
(
avg_loss
)
# run start up model
exe
=
static
.
Executor
()
exe
.
run
(
static
.
default_startup_program
())
# train
for
i
in
range
(
4
):
avg_loss_v
=
exe
.
run
(
static
.
default_main_program
(),
feed
=
{
'data'
:
np_data
,
'label'
:
np_label
},
fetch_list
=
[
avg_loss
])
# save inference model
static
.
save_inference_model
(
path_prefix
,
[
data
],
[
predict
],
exe
)
# get train predict value
predict_v
=
exe
.
run
(
static
.
default_main_program
(),
feed
=
{
'data'
:
np_data
,
'label'
:
np_label
},
fetch_list
=
[
predict
])
return
predict_v
class
TestNewCustomOpSetUpInstall
(
unittest
.
TestCase
):
def
setUp
(
self
):
cur_dir
=
os
.
path
.
dirname
(
os
.
path
.
abspath
(
__file__
))
# compile, install the custom op egg into site-packages under background
cmd
=
'cd {} && python setup_install_simple.py install'
.
format
(
cur_dir
)
if
os
.
name
==
'nt'
:
cmd
=
'cd /d {} && python custom_relu_setup.py install'
.
format
(
cur_dir
)
else
:
cmd
=
'cd {} && python custom_relu_setup.py install'
.
format
(
cur_dir
)
run_cmd
(
cmd
)
# NOTE(Aurelius84): Normally, it's no need to add following codes for users.
...
...
@@ -99,28 +150,42 @@ class TestNewCustomOpSetUpInstall(unittest.TestCase):
# sys.path has been updated. So we update it manually.
# See: https://stackoverflow.com/questions/56974185/import-runtime-installed-module-using-pip-in-python-3
site_dir
=
site
.
getsitepackages
()[
0
]
if
os
.
name
==
'nt'
:
# NOTE(zhouwei25): getsitepackages on windows will return a list: [python install dir, site packages dir]
site_dir
=
site
.
getsitepackages
()[
1
]
else
:
site_dir
=
site
.
getsitepackages
()[
0
]
custom_egg_path
=
[
x
for
x
in
os
.
listdir
(
site_dir
)
if
'
simple_setup_relu2
'
in
x
x
for
x
in
os
.
listdir
(
site_dir
)
if
'
custom_relu_module_setup
'
in
x
]
assert
len
(
custom_egg_path
)
==
1
,
"Matched egg number is %d."
%
len
(
custom_egg_path
)
sys
.
path
.
append
(
os
.
path
.
join
(
site_dir
,
custom_egg_path
[
0
]))
# usage: import the package directly
import
simple_setup_relu2
self
.
custom_ops
=
[
simple_setup_relu2
.
relu2
,
simple_setup_relu2
.
relu3
]
import
custom_relu_module_setup
# `custom_relu_dup` is same as `custom_relu_dup`
self
.
custom_ops
=
[
custom_relu_module_setup
.
custom_relu
,
custom_relu_module_setup
.
custom_relu_dup
]
self
.
dtypes
=
[
'float32'
,
'float64'
]
self
.
devices
=
[
'cpu'
,
'gpu'
]
# config seed
SEED
=
2021
paddle
.
seed
(
SEED
)
paddle
.
framework
.
random
.
_manual_program_seed
(
SEED
)
def
test_static
(
self
):
for
device
in
self
.
devices
:
for
dtype
in
self
.
dtypes
:
x
=
np
.
random
.
uniform
(
-
1
,
1
,
[
4
,
8
]).
astype
(
dtype
)
for
custom_op
in
self
.
custom_ops
:
out
=
relu2_static
(
custom_op
,
device
,
dtype
,
x
)
pd_out
=
relu2_static
(
custom_op
,
device
,
dtype
,
x
,
False
)
out
=
custom_relu_static
(
custom_op
,
device
,
dtype
,
x
)
pd_out
=
custom_relu_static
(
custom_op
,
device
,
dtype
,
x
,
False
)
self
.
assertTrue
(
np
.
array_equal
(
out
,
pd_out
),
"custom op out: {},
\n
paddle api out: {}"
.
format
(
...
...
@@ -131,8 +196,9 @@ class TestNewCustomOpSetUpInstall(unittest.TestCase):
for
dtype
in
self
.
dtypes
:
x
=
np
.
random
.
uniform
(
-
1
,
1
,
[
4
,
8
]).
astype
(
dtype
)
for
custom_op
in
self
.
custom_ops
:
out
=
relu2_static_pe
(
custom_op
,
device
,
dtype
,
x
)
pd_out
=
relu2_static_pe
(
custom_op
,
device
,
dtype
,
x
,
False
)
out
=
custom_relu_static_pe
(
custom_op
,
device
,
dtype
,
x
)
pd_out
=
custom_relu_static_pe
(
custom_op
,
device
,
dtype
,
x
,
False
)
self
.
assertTrue
(
np
.
array_equal
(
out
,
pd_out
),
"custom op out: {},
\n
paddle api out: {}"
.
format
(
...
...
@@ -143,9 +209,10 @@ class TestNewCustomOpSetUpInstall(unittest.TestCase):
for
dtype
in
self
.
dtypes
:
x
=
np
.
random
.
uniform
(
-
1
,
1
,
[
4
,
8
]).
astype
(
dtype
)
for
custom_op
in
self
.
custom_ops
:
out
,
x_grad
=
relu2_dynamic
(
custom_op
,
device
,
dtype
,
x
)
pd_out
,
pd_x_grad
=
relu2_dynamic
(
custom_op
,
device
,
dtype
,
x
,
False
)
out
,
x_grad
=
custom_relu_dynamic
(
custom_op
,
device
,
dtype
,
x
)
pd_out
,
pd_x_grad
=
custom_relu_dynamic
(
custom_op
,
device
,
dtype
,
x
,
False
)
self
.
assertTrue
(
np
.
array_equal
(
out
,
pd_out
),
"custom op out: {},
\n
paddle api out: {}"
.
format
(
...
...
@@ -155,6 +222,28 @@ class TestNewCustomOpSetUpInstall(unittest.TestCase):
"custom op x grad: {},
\n
paddle api x grad: {}"
.
format
(
x_grad
,
pd_x_grad
))
def
test_static_save_and_load_inference_model
(
self
):
paddle
.
enable_static
()
np_data
=
np
.
random
.
random
((
1
,
1
,
28
,
28
)).
astype
(
"float32"
)
np_label
=
np
.
random
.
random
((
1
,
1
)).
astype
(
"int64"
)
path_prefix
=
"custom_op_inference/custom_relu"
for
device
in
self
.
devices
:
predict
=
custom_relu_static_inference
(
self
.
custom_ops
[
0
],
device
,
np_data
,
np_label
,
path_prefix
)
# load inference model
with
static
.
scope_guard
(
static
.
Scope
()):
exe
=
static
.
Executor
()
[
inference_program
,
feed_target_names
,
fetch_targets
]
=
static
.
load_inference_model
(
path_prefix
,
exe
)
predict_infer
=
exe
.
run
(
inference_program
,
feed
=
{
feed_target_names
[
0
]:
np_data
},
fetch_list
=
fetch_targets
)
self
.
assertTrue
(
np
.
array_equal
(
predict
,
predict_infer
),
"custom op predict: {},
\n
custom op infer predict: {}"
.
format
(
predict
,
predict_infer
))
paddle
.
disable_static
()
if
__name__
==
'__main__'
:
unittest
.
main
()
python/paddle/fluid/tests/custom_op/test_dispatch.py
→
python/paddle/fluid/tests/custom_op/test_dispatch
_jit
.py
浏览文件 @
d3e60959
...
...
@@ -16,14 +16,23 @@ import os
import
unittest
import
paddle
import
numpy
as
np
from
paddle.utils.cpp_extension
import
load
from
paddle.utils.cpp_extension
import
load
,
get_build_directory
from
utils
import
paddle_includes
,
extra_compile_args
from
paddle.utils.cpp_extension.extension_utils
import
run_cmd
# Because Windows don't use docker, the shared lib already exists in the
# cache dir, it will not be compiled again unless the shared lib is removed.
file
=
'{}
\\
dispatch_op
\\
dispatch_op.pyd'
.
format
(
get_build_directory
())
if
os
.
name
==
'nt'
and
os
.
path
.
isfile
(
file
):
cmd
=
'del {}'
.
format
(
file
)
run_cmd
(
cmd
,
True
)
dispatch_op
=
load
(
name
=
'dispatch_op'
,
sources
=
[
'dispatch_test_op.cc'
],
extra_include_paths
=
paddle_includes
,
# add for Coverage CI
extra_cflags
=
extra_compile_args
)
# add for Coverage CI
extra_cxx_cflags
=
extra_compile_args
,
verbose
=
True
)
class
TestJitDispatch
(
unittest
.
TestCase
):
...
...
python/paddle/fluid/tests/custom_op/test_jit_load.py
浏览文件 @
d3e60959
...
...
@@ -29,7 +29,8 @@ custom_module = load(
sources
=
[
'relu_op.cc'
,
'relu_op.cu'
,
'relu_op3.cc'
,
'relu_op3.cu'
],
interpreter
=
'python'
,
# add for unittest
extra_include_paths
=
paddle_includes
,
# add for Coverage CI
extra_cflags
=
extra_compile_args
,
# add for Coverage CI
extra_cxx_cflags
=
extra_compile_args
,
# add for Coverage CI,
extra_cuda_cflags
=
extra_compile_args
,
# add for split cpp/cuda flags
verbose
=
True
# add for unittest
)
...
...
python/paddle/fluid/tests/custom_op/test_
simple_custom_op
_jit.py
→
python/paddle/fluid/tests/custom_op/test_
multi_out
_jit.py
浏览文件 @
d3e60959
...
...
@@ -13,81 +13,54 @@
# limitations under the License.
import
os
import
subprocess
import
unittest
import
paddle
import
numpy
as
np
import
paddle
from
paddle.utils.cpp_extension
import
load
from
paddle.utils.cpp_extension
import
load
,
get_build_directory
from
paddle.utils.cpp_extension.extension_utils
import
run_cmd
from
utils
import
paddle_includes
,
extra_compile_args
from
test_simple_custom_op_setup
import
relu2_dynamic
,
relu2_static
# Because Windows don't use docker, the shared lib already exists in the
# cache dir, it will not be compiled again unless the shared lib is removed.
file
=
'{}
\\
multi_out_jit
\\
multi_out_jit.pyd'
.
format
(
get_build_directory
())
if
os
.
name
==
'nt'
and
os
.
path
.
isfile
(
file
):
cmd
=
'del {}'
.
format
(
file
)
run_cmd
(
cmd
,
True
)
# Compile and load custom op Just-In-Time.
custom
_module
=
load
(
name
=
'
simple_jit_relu2
'
,
sources
=
[
'
relu_op_simple.cc'
,
'relu_op_simple.cu'
,
'relu_op3_simple
.cc'
],
multi_out
_module
=
load
(
name
=
'
multi_out_jit
'
,
sources
=
[
'
multi_out_test_op
.cc'
],
extra_include_paths
=
paddle_includes
,
# add for Coverage CI
extra_cflags
=
extra_compile_args
)
# add for Coverage CI
class
TestJITLoad
(
unittest
.
TestCase
):
def
setUp
(
self
):
self
.
custom_ops
=
[
custom_module
.
relu2
,
custom_module
.
relu3
]
self
.
dtypes
=
[
'float32'
,
'float64'
]
self
.
devices
=
[
'cpu'
,
'gpu'
]
def
test_static
(
self
):
for
device
in
self
.
devices
:
for
dtype
in
self
.
dtypes
:
x
=
np
.
random
.
uniform
(
-
1
,
1
,
[
4
,
8
]).
astype
(
dtype
)
for
custom_op
in
self
.
custom_ops
:
out
=
relu2_static
(
custom_op
,
device
,
dtype
,
x
)
pd_out
=
relu2_static
(
custom_op
,
device
,
dtype
,
x
,
False
)
self
.
assertTrue
(
np
.
array_equal
(
out
,
pd_out
),
"custom op out: {},
\n
paddle api out: {}"
.
format
(
out
,
pd_out
))
def
test_dynamic
(
self
):
for
device
in
self
.
devices
:
for
dtype
in
self
.
dtypes
:
x
=
np
.
random
.
uniform
(
-
1
,
1
,
[
4
,
8
]).
astype
(
dtype
)
for
custom_op
in
self
.
custom_ops
:
out
,
x_grad
=
relu2_dynamic
(
custom_op
,
device
,
dtype
,
x
)
pd_out
,
pd_x_grad
=
relu2_dynamic
(
custom_op
,
device
,
dtype
,
x
,
False
)
self
.
assertTrue
(
np
.
array_equal
(
out
,
pd_out
),
"custom op out: {},
\n
paddle api out: {}"
.
format
(
out
,
pd_out
))
self
.
assertTrue
(
np
.
array_equal
(
x_grad
,
pd_x_grad
),
"custom op x grad: {},
\n
paddle api x grad: {}"
.
format
(
x_grad
,
pd_x_grad
))
extra_cxx_cflags
=
extra_compile_args
,
# add for Coverage CI
verbose
=
True
)
class
TestMultiOutputDtypes
(
unittest
.
TestCase
):
def
setUp
(
self
):
self
.
custom_op
=
custom_module
.
relu2
self
.
custom_op
=
multi_out_module
.
multi_out
self
.
dtypes
=
[
'float32'
,
'float64'
]
self
.
devices
=
[
'cpu'
,
'gpu'
]
self
.
devices
=
[
'cpu'
]
def
test_static
(
self
):
paddle
.
enable_static
()
for
device
in
self
.
devices
:
for
dtype
in
self
.
dtypes
:
res
=
self
.
run_static
(
device
,
dtype
)
self
.
check_multi_outputs
(
res
)
paddle
.
disable_static
()
def
run_static
(
self
,
device
,
dtype
):
paddle
.
set_device
(
device
)
x_data
=
np
.
random
.
uniform
(
-
1
,
1
,
[
4
,
8
]).
astype
(
dtype
)
def
test_dynamic
(
self
):
for
device
in
self
.
devices
:
for
dtype
in
self
.
dtypes
:
paddle
.
set_device
(
device
)
x_data
=
np
.
random
.
uniform
(
-
1
,
1
,
[
4
,
8
]).
astype
(
dtype
)
x
=
paddle
.
to_tensor
(
x_data
)
with
paddle
.
static
.
scope_guard
(
paddle
.
static
.
Scope
()):
with
paddle
.
static
.
program_guard
(
paddle
.
static
.
Program
()):
x
=
paddle
.
static
.
data
(
name
=
'X'
,
shape
=
[
None
,
8
],
dtype
=
dtype
)
outs
=
self
.
custom_op
(
x
)
self
.
assertTrue
(
len
(
outs
)
==
3
)
self
.
check_multi_outputs
(
outs
,
True
)
exe
=
paddle
.
static
.
Executor
()
exe
.
run
(
paddle
.
static
.
default_startup_program
())
res
=
exe
.
run
(
paddle
.
static
.
default_main_program
(),
feed
=
{
'X'
:
x_data
},
fetch_list
=
outs
)
return
res
def
check_multi_outputs
(
self
,
outs
,
is_dynamic
=
False
):
out
,
zero_float64
,
one_int32
=
outs
...
...
@@ -103,22 +76,24 @@ class TestMultiOutputDtypes(unittest.TestCase):
self
.
assertTrue
(
np
.
array_equal
(
one_int32
,
np
.
ones
([
4
,
8
]).
astype
(
'int32'
)))
def
run_static
(
self
,
device
,
dtype
):
paddle
.
set_device
(
device
)
x_data
=
np
.
random
.
uniform
(
-
1
,
1
,
[
4
,
8
]).
astype
(
dtype
)
def
test_static
(
self
):
paddle
.
enable_static
()
for
device
in
self
.
devices
:
for
dtype
in
self
.
dtypes
:
res
=
self
.
run_static
(
device
,
dtype
)
self
.
check_multi_outputs
(
res
)
paddle
.
disable_static
()
with
paddle
.
static
.
scope_guard
(
paddle
.
static
.
Scope
()):
with
paddle
.
static
.
program_guard
(
paddle
.
static
.
Program
()):
x
=
paddle
.
static
.
data
(
name
=
'X'
,
shape
=
[
None
,
8
],
dtype
=
dtype
)
def
test_dynamic
(
self
):
for
device
in
self
.
devices
:
for
dtype
in
self
.
dtypes
:
paddle
.
set_device
(
device
)
x_data
=
np
.
random
.
uniform
(
-
1
,
1
,
[
4
,
8
]).
astype
(
dtype
)
x
=
paddle
.
to_tensor
(
x_data
)
outs
=
self
.
custom_op
(
x
)
exe
=
paddle
.
static
.
Executor
()
exe
.
run
(
paddle
.
static
.
default_startup_program
())
res
=
exe
.
run
(
paddle
.
static
.
default_main_program
(),
feed
=
{
'X'
:
x_data
},
fetch_list
=
outs
)
return
res
self
.
assertTrue
(
len
(
outs
)
==
3
)
self
.
check_multi_outputs
(
outs
,
True
)
if
__name__
==
'__main__'
:
...
...
python/paddle/fluid/tests/custom_op/utils.py
浏览文件 @
d3e60959
...
...
@@ -23,8 +23,8 @@ site_packages_path = get_python_lib()
# paddle include directory. Because the following path is generated after insalling
# PaddlePaddle whl. So here we specific `include_dirs` to avoid errors in CI.
paddle_includes
=
[
os
.
path
.
join
(
site_packages_path
,
'paddle
/
include'
),
os
.
path
.
join
(
site_packages_path
,
'paddle
/include/
third_party'
)
os
.
path
.
join
(
site_packages_path
,
'paddle
'
,
'
include'
),
os
.
path
.
join
(
site_packages_path
,
'paddle
'
,
'include'
,
'
third_party'
)
]
# TODO(Aurelius84): Memory layout is different if build paddle with PADDLE_WITH_MKLDNN=ON,
...
...
python/paddle/utils/cpp_extension/__init__.py
浏览文件 @
d3e60959
...
...
@@ -25,6 +25,5 @@ from . import cpp_extension
from
.
import
extension_utils
__all__
=
[
'CppExtension'
,
'CUDAExtension'
,
'BuildExtension'
,
'load'
,
'setup'
,
'get_build_directory'
'CppExtension'
,
'CUDAExtension'
,
'load'
,
'setup'
,
'get_build_directory'
]
python/paddle/utils/cpp_extension/cpp_extension.py
浏览文件 @
d3e60959
此差异已折叠。
点击以展开。
python/paddle/utils/cpp_extension/extension_utils.py
浏览文件 @
d3e60959
...
...
@@ -16,7 +16,6 @@ import os
import
re
import
six
import
sys
import
copy
import
glob
import
logging
import
collections
...
...
@@ -38,11 +37,17 @@ logger = logging.getLogger("utils.cpp_extension")
OS_NAME
=
sys
.
platform
IS_WINDOWS
=
OS_NAME
.
startswith
(
'win'
)
NVCC_COMPILE_FLAGS
=
[
'-ccbin'
,
'cc'
,
'-DPADDLE_WITH_CUDA'
,
'-DEIGEN_USE_GPU'
,
'-DPADDLE_USE_DSO'
,
'-Xcompiler'
,
'-fPIC'
,
'-w'
,
'--expt-relaxed-constexpr'
,
'-O3'
,
'-DNVCC'
MSVC_COMPILE_FLAGS
=
[
'/MT'
,
'/wd4819'
,
'/wd4251'
,
'/wd4244'
,
'/wd4267'
,
'/wd4275'
,
'/wd4018'
,
'/wd4190'
,
'/EHsc'
,
'/w'
,
'/DGOOGLE_GLOG_DLL_DECL'
,
'/DBOOST_HAS_STATIC_ASSERT'
,
'/DNDEBUG'
,
'/DPADDLE_USE_DSO'
]
MSVC_LINK_FLAGS
=
[
'/MACHINE:X64'
,
'paddle_framework.lib'
]
COMMON_NVCC_FLAGS
=
[
'-DPADDLE_WITH_CUDA'
,
'-DEIGEN_USE_GPU'
,
'-O3'
]
GCC_MINI_VERSION
=
(
5
,
4
,
0
)
# Give warning if using wrong compiler
WRONG_COMPILER_WARNING
=
'''
...
...
@@ -80,9 +85,17 @@ information
'''
USING_NEW_CUSTOM_OP_LOAD_METHOD
=
True
DEFAULT_OP_ATTR_NAMES
=
[
core
.
op_proto_and_checker_maker
.
kOpRoleAttrName
(),
core
.
op_proto_and_checker_maker
.
kOpRoleVarAttrName
(),
core
.
op_proto_and_checker_maker
.
kOpNameScopeAttrName
(),
core
.
op_proto_and_checker_maker
.
kOpCreationCallstackAttrName
(),
core
.
op_proto_and_checker_maker
.
kOpDeviceAttrName
()
]
# NOTE(chenweihang): In order to be compatible with
# the two custom op define method, after removing
# NOTE(chenweihang): In order to be compatible with
# the two custom op define method, after removing
# old method, we can remove them together
def
use_new_custom_op_load_method
(
*
args
):
global
USING_NEW_CUSTOM_OP_LOAD_METHOD
...
...
@@ -206,11 +219,23 @@ class CustomOpInfo:
return
next
(
reversed
(
self
.
op_info_map
.
items
()))
def
prepare_unix_cflags
(
cflags
):
def
prepare_unix_c
uda
flags
(
cflags
):
"""
Prepare all necessary compiled flags for nvcc compiling CUDA files.
"""
cflags
=
NVCC_COMPILE_FLAGS
+
cflags
+
get_cuda_arch_flags
(
cflags
)
cflags
=
COMMON_NVCC_FLAGS
+
[
'-ccbin'
,
'cc'
,
'-Xcompiler'
,
'-fPIC'
,
'-w'
,
'--expt-relaxed-constexpr'
,
'-DNVCC'
]
+
cflags
+
get_cuda_arch_flags
(
cflags
)
return
cflags
def
prepare_win_cudaflags
(
cflags
):
"""
Prepare all necessary compiled flags for nvcc compiling CUDA files.
"""
cflags
=
COMMON_NVCC_FLAGS
+
[
'-w'
]
+
cflags
+
get_cuda_arch_flags
(
cflags
)
return
cflags
...
...
@@ -238,13 +263,14 @@ def get_cuda_arch_flags(cflags):
def
normalize_extension_kwargs
(
kwargs
,
use_cuda
=
False
):
"""
"""
Normalize include_dirs, library_dir and other attributes in kwargs.
"""
assert
isinstance
(
kwargs
,
dict
)
# append necessary include dir path of paddle
include_dirs
=
kwargs
.
get
(
'include_dirs'
,
[])
include_dirs
.
extend
(
find_paddle_includes
(
use_cuda
))
kwargs
[
'include_dirs'
]
=
include_dirs
# append necessary lib path of paddle
...
...
@@ -252,50 +278,46 @@ def normalize_extension_kwargs(kwargs, use_cuda=False):
library_dirs
.
extend
(
find_paddle_libraries
(
use_cuda
))
kwargs
[
'library_dirs'
]
=
library_dirs
# add runtime library dirs
runtime_library_dirs
=
kwargs
.
get
(
'runtime_library_dirs'
,
[])
runtime_library_dirs
.
extend
(
find_paddle_libraries
(
use_cuda
))
kwargs
[
'runtime_library_dirs'
]
=
runtime_library_dirs
# append compile flags
# append compile flags and check settings of compiler
extra_compile_args
=
kwargs
.
get
(
'extra_compile_args'
,
[])
extra_compile_args
.
extend
([
'-g'
,
'-w'
])
# diable warnings
kwargs
[
'extra_compile_args'
]
=
extra_compile_args
# append link flags
extra_link_args
=
kwargs
.
get
(
'extra_link_args'
,
[])
extra_link_args
.
append
(
'-lpaddle_framework'
)
if
use_cuda
:
extra_link_args
.
append
(
'-lcudart'
)
kwargs
[
'extra_link_args'
]
=
extra_link_args
kwargs
[
'language'
]
=
'c++'
return
kwargs
def
find_paddle_includes
(
use_cuda
=
False
):
"""
Return Paddle necessary include dir path.
"""
# pythonXX/site-packages/paddle/include
paddle_include_dir
=
get_include
()
third_party_dir
=
os
.
path
.
join
(
paddle_include_dir
,
'third_party'
)
include_dirs
=
[
paddle_include_dir
,
third_party_dir
]
if
isinstance
(
extra_compile_args
,
dict
):
for
compiler
in
[
'cxx'
,
'nvcc'
]:
if
compiler
not
in
extra_compile_args
:
extra_compile_args
[
compiler
]
=
[]
if
IS_WINDOWS
:
# TODO(zhouwei): may append compile flags in future
pass
# append link flags
extra_link_args
=
kwargs
.
get
(
'extra_link_args'
,
[])
extra_link_args
.
extend
(
MSVC_LINK_FLAGS
)
if
use_cuda
:
extra_link_args
.
extend
([
'cudadevrt.lib'
,
'cudart_static.lib'
])
kwargs
[
'extra_link_args'
]
=
extra_link_args
else
:
# append compile flags
add_compile_flag
(
extra_compile_args
,
[
'-g'
,
'-w'
])
# disable warnings
return
include_dirs
# append link flags
extra_link_args
=
kwargs
.
get
(
'extra_link_args'
,
[])
if
use_new_custom_op_load_method
():
extra_link_args
.
append
(
'-lpaddle_custom_op'
)
else
:
extra_link_args
.
append
(
'-lpaddle_framework'
)
if
use_cuda
:
extra_link_args
.
append
(
'-lcudart'
)
kwargs
[
'extra_link_args'
]
=
extra_link_args
def
find_cuda_includes
():
# add runtime library dirs
runtime_library_dirs
=
kwargs
.
get
(
'runtime_library_dirs'
,
[])
runtime_library_dirs
.
extend
(
find_paddle_libraries
(
use_cuda
))
kwargs
[
'runtime_library_dirs'
]
=
runtime_library_dirs
cuda_home
=
find_cuda_home
()
if
cuda_home
is
None
:
raise
ValueError
(
"Not found CUDA runtime, please use `export CUDA_HOME=XXX` to specific it."
)
kwargs
[
'extra_compile_args'
]
=
extra_compile_args
return
[
os
.
path
.
join
(
cuda_home
,
'lib64'
)]
kwargs
[
'language'
]
=
'c++'
return
kwargs
def
find_cuda_home
():
...
...
@@ -315,19 +337,22 @@ def find_cuda_home():
if
six
.
PY3
:
nvcc_path
=
nvcc_path
.
decode
()
nvcc_path
=
nvcc_path
.
rstrip
(
'
\r\n
'
)
# for example: /usr/local/cuda/bin/nvcc
cuda_home
=
os
.
path
.
dirname
(
os
.
path
.
dirname
(
nvcc_path
))
except
:
if
IS_WINDOWS
:
# search from default NVIDIA GPU path
candidate_paths
=
glob
.
glob
(
'C:/Program Files/NVIDIA GPU Computing Toolkit/CUDA/v*.*'
)
'C:
\\
Program Files
\\
NVIDIA GPU Computing Toolkit
\\
CUDA
\\
v*.*'
)
if
len
(
candidate_paths
)
>
0
:
cuda_home
=
candidate_paths
[
0
]
else
:
cuda_home
=
"/usr/local/cuda"
# step 3. check whether path is valid
if
not
os
.
path
.
exists
(
cuda_home
)
and
core
.
is_compiled_with_cuda
():
if
cuda_home
and
not
os
.
path
.
exists
(
cuda_home
)
and
core
.
is_compiled_with_cuda
():
cuda_home
=
None
warnings
.
warn
(
"Not found CUDA runtime, please use `export CUDA_HOME= XXX` to specific it."
...
...
@@ -336,27 +361,73 @@ def find_cuda_home():
return
cuda_home
def
find_cuda_includes
():
"""
Use heuristic method to find cuda include path
"""
cuda_home
=
find_cuda_home
()
if
cuda_home
is
None
:
raise
ValueError
(
"Not found CUDA runtime, please use `export CUDA_HOME=XXX` to specific it."
)
return
[
os
.
path
.
join
(
cuda_home
,
'include'
)]
def
find_paddle_includes
(
use_cuda
=
False
):
"""
Return Paddle necessary include dir path.
"""
# pythonXX/site-packages/paddle/include
paddle_include_dir
=
get_include
()
third_party_dir
=
os
.
path
.
join
(
paddle_include_dir
,
'third_party'
)
include_dirs
=
[
paddle_include_dir
,
third_party_dir
]
if
use_cuda
:
cuda_include_dir
=
find_cuda_includes
()
include_dirs
.
extend
(
cuda_include_dir
)
return
include_dirs
def
find_cuda_libraries
():
"""
Use heuristic method to find cuda static lib path
"""
cuda_home
=
find_cuda_home
()
if
cuda_home
is
None
:
raise
ValueError
(
"Not found CUDA runtime, please use `export CUDA_HOME=XXX` to specific it."
)
if
IS_WINDOWS
:
cuda_lib_dir
=
[
os
.
path
.
join
(
cuda_home
,
'lib'
,
'x64'
)]
else
:
cuda_lib_dir
=
[
os
.
path
.
join
(
cuda_home
,
'lib64'
)]
return
cuda_lib_dir
def
find_paddle_libraries
(
use_cuda
=
False
):
"""
Return Paddle necessary library dir path.
"""
# pythonXX/site-packages/paddle/libs
paddle_lib_dirs
=
[
get_lib
()]
if
use_cuda
:
cuda_dirs
=
find_cuda_includes
()
paddle_lib_dirs
.
extend
(
cuda_dirs
)
cuda_lib_dir
=
find_cuda_libraries
()
paddle_lib_dirs
.
extend
(
cuda_lib_dir
)
return
paddle_lib_dirs
def
add_compile_flag
(
ext
ension
,
flag
):
extra_compile_args
=
copy
.
deepcopy
(
extension
.
extra_compile_args
)
def
add_compile_flag
(
ext
ra_compile_args
,
flags
):
assert
isinstance
(
flags
,
list
)
if
isinstance
(
extra_compile_args
,
dict
):
for
args
in
extra_compile_args
.
values
():
args
.
append
(
flag
)
args
.
extend
(
flags
)
else
:
extra_compile_args
.
append
(
flag
)
extension
.
extra_compile_args
=
extra_compile_args
extra_compile_args
.
extend
(
flags
)
def
is_cuda_file
(
path
):
...
...
@@ -369,17 +440,34 @@ def is_cuda_file(path):
def
get_build_directory
(
verbose
=
False
):
"""
Return paddle extension root directory, default specific by `PADDLE_EXTENSION_DIR`
Return paddle extension root directory to put shared library. It could be specified by
``export PADDLE_EXTENSION_DIR=XXX`` . If not set, ``~/.cache/paddle_extension`` will be used
by default.
Returns:
The root directory of compiling customized operators.
Examples:
.. code-block:: python
from paddle.utils.cpp_extension import get_build_directory
build_dir = get_build_directory()
print(build_dir)
"""
root_extensions_directory
=
os
.
environ
.
get
(
'PADDLE_EXTENSION_DIR'
)
if
root_extensions_directory
is
None
:
dir_name
=
"paddle_extensions"
if
OS_NAME
.
startswith
(
'linux'
):
root_extensions_directory
=
os
.
path
.
join
(
os
.
path
.
expanduser
(
'~/.cache'
),
dir_name
)
else
:
# TODO(Aurelius84): consider wind32/macOs
raise
NotImplementedError
(
"Only support Linux now."
)
root_extensions_directory
=
os
.
path
.
join
(
os
.
path
.
expanduser
(
'~/.cache'
),
dir_name
)
if
IS_WINDOWS
:
root_extensions_directory
=
os
.
path
.
normpath
(
root_extensions_directory
)
elif
OS_NAME
.
startswith
(
'darwin'
):
# TODO(Aurelius84): consider macOs
raise
NotImplementedError
(
"Not support Mac now."
)
log_v
(
"$PADDLE_EXTENSION_DIR is not set, using path: {} by default."
.
format
(
root_extensions_directory
),
verbose
)
...
...
@@ -404,16 +492,22 @@ def parse_op_info(op_name):
in_names
=
[
x
.
name
for
x
in
op_proto
.
inputs
]
out_names
=
[
x
.
name
for
x
in
op_proto
.
outputs
]
attr_names
=
[
x
.
name
for
x
in
op_proto
.
attrs
if
x
.
name
not
in
DEFAULT_OP_ATTR_NAMES
]
return
in_names
,
out_names
return
in_names
,
out_names
,
attr_names
def
_import_module_from_library
(
module_name
,
build_directory
,
verbose
=
False
):
"""
Load
.so
shared library and import it as callable python module.
Load shared library and import it as callable python module.
"""
# TODO(Aurelius84): Consider file suffix is .dll on Windows Platform.
ext_path
=
os
.
path
.
join
(
build_directory
,
module_name
+
'.so'
)
if
IS_WINDOWS
:
dynamic_suffix
=
'.pyd'
else
:
dynamic_suffix
=
'.so'
ext_path
=
os
.
path
.
join
(
build_directory
,
module_name
+
dynamic_suffix
)
if
not
os
.
path
.
exists
(
ext_path
):
raise
FileNotFoundError
(
"Extension path: {} does not exist."
.
format
(
ext_path
))
...
...
@@ -448,7 +542,7 @@ def _generate_python_module(module_name,
def
_custom_api_content
(
op_name
):
params_str
,
ins_str
,
outs_str
=
_get_api_inputs_str
(
op_name
)
params_str
,
ins_str
,
attrs_str
,
outs_str
=
_get_api_inputs_str
(
op_name
)
API_TEMPLATE
=
textwrap
.
dedent
(
"""
from paddle.fluid.layer_helper import LayerHelper
...
...
@@ -456,8 +550,9 @@ def _custom_api_content(op_name):
def {op_name}({inputs}):
helper = LayerHelper("{op_name}", **locals())
# prepare inputs and output
# prepare inputs and output
s
ins = {ins}
attrs = {attrs}
outs = {{}}
out_names = {out_names}
for out_name in out_names:
...
...
@@ -465,7 +560,7 @@ def _custom_api_content(op_name):
# in runtime.
outs[out_name] = helper.create_variable(dtype='float32')
helper.append_op(type="{op_name}", inputs=ins, outputs=outs)
helper.append_op(type="{op_name}", inputs=ins, outputs=outs
, attrs=attrs
)
res = [outs[out_name] for out_name in out_names]
...
...
@@ -474,7 +569,11 @@ def _custom_api_content(op_name):
# generate python api file
api_content
=
API_TEMPLATE
.
format
(
op_name
=
op_name
,
inputs
=
params_str
,
ins
=
ins_str
,
out_names
=
outs_str
)
op_name
=
op_name
,
inputs
=
params_str
,
ins
=
ins_str
,
attrs
=
attrs_str
,
out_names
=
outs_str
)
return
api_content
...
...
@@ -505,22 +604,30 @@ def _get_api_inputs_str(op_name):
"""
Returns string of api parameters and inputs dict.
"""
in_names
,
out_names
=
parse_op_info
(
op_name
)
in_names
,
out_names
,
attr_names
=
parse_op_info
(
op_name
)
# e.g: x, y, z
params_str
=
','
.
join
([
p
.
lower
()
for
p
in
in_names
])
param_names
=
in_names
+
attr_names
params_str
=
','
.
join
([
p
.
lower
()
for
p
in
param_names
])
# e.g: {'X': x, 'Y': y, 'Z': z}
ins_str
=
"{%s}"
%
','
.
join
(
[
"'{}' : {}"
.
format
(
in_name
,
in_name
.
lower
())
for
in_name
in
in_names
])
# e.g: {'num': n}
attrs_str
=
"{%s}"
%
","
.
join
([
"'{}' : {}"
.
format
(
attr_name
,
attr_name
.
lower
())
for
attr_name
in
attr_names
])
# e.g: ['Out', 'Index']
outs_str
=
"[%s]"
%
','
.
join
([
"'{}'"
.
format
(
name
)
for
name
in
out_names
])
return
params_str
,
ins_str
,
outs_str
return
params_str
,
ins_str
,
attrs_str
,
outs_str
def
_write_setup_file
(
name
,
sources
,
file_path
,
build_dir
,
include_dirs
,
compile_flags
,
extra_cxx_cflags
,
extra_cuda_cflags
,
link_args
,
verbose
=
False
):
"""
...
...
@@ -530,18 +637,21 @@ def _write_setup_file(name,
import os
from paddle.utils.cpp_extension import CppExtension, CUDAExtension, BuildExtension, setup
from paddle.utils.cpp_extension import get_build_directory
from paddle.utils.cpp_extension.extension_utils import use_new_custom_op_load_method
use_new_custom_op_load_method({use_new_method})
setup(
name='{name}',
ext_modules=[
{prefix}Extension(
sources={sources},
include_dirs={include_dirs},
extra_compile_args={
extra_compile_args
},
extra_compile_args={
{'cxx':{extra_cxx_cflags}, 'nvcc':{extra_cuda_cflags}}
},
extra_link_args={extra_link_args})],
cmdclass={{"build_ext" : BuildExtension.with_options(
output_dir=get_build_directory(),
no_python_abi_suffix=True,
use_new_method={use_new_method})
output_dir=r'{build_dir}',
no_python_abi_suffix=True)
}})"""
).
lstrip
()
with_cuda
=
False
...
...
@@ -554,8 +664,10 @@ def _write_setup_file(name,
prefix
=
'CUDA'
if
with_cuda
else
'Cpp'
,
sources
=
list2str
(
sources
),
include_dirs
=
list2str
(
include_dirs
),
extra_compile_args
=
list2str
(
compile_flags
),
extra_cxx_cflags
=
list2str
(
extra_cxx_cflags
),
extra_cuda_cflags
=
list2str
(
extra_cuda_cflags
),
extra_link_args
=
list2str
(
link_args
),
build_dir
=
build_dir
,
use_new_method
=
use_new_custom_op_load_method
())
log_v
(
'write setup.py into {}'
.
format
(
file_path
),
verbose
)
...
...
@@ -565,12 +677,12 @@ def _write_setup_file(name,
def
list2str
(
args
):
"""
Convert list[str] into string. For example: [
x, y
] -> "['x', 'y']"
Convert list[str] into string. For example: [
'x', 'y'
] -> "['x', 'y']"
"""
if
args
is
None
:
return
'[]'
assert
isinstance
(
args
,
(
list
,
tuple
))
args
=
[
"
'{}'
"
.
format
(
arg
)
for
arg
in
args
]
return
'['
+
','
.
join
(
args
)
+
']'
args
=
[
"
{}
"
.
format
(
arg
)
for
arg
in
args
]
return
repr
(
args
)
def
_jit_compile
(
file_path
,
interpreter
=
None
,
verbose
=
False
):
...
...
@@ -583,7 +695,8 @@ def _jit_compile(file_path, interpreter=None, verbose=False):
if
interpreter
is
None
:
interpreter
=
'python'
try
:
py_path
=
subprocess
.
check_output
([
'which'
,
interpreter
])
which
=
'where'
if
IS_WINDOWS
else
'which'
py_path
=
subprocess
.
check_output
([
which
,
interpreter
])
py_version
=
subprocess
.
check_output
([
interpreter
,
'-V'
])
if
six
.
PY3
:
py_path
=
py_path
.
decode
()
...
...
@@ -596,8 +709,13 @@ def _jit_compile(file_path, interpreter=None, verbose=False):
'Failed to check Python interpreter with `{}`, errors: {}'
.
format
(
interpreter
,
error
))
compile_cmd
=
'cd {} && {} {} build'
.
format
(
ext_dir
,
interpreter
,
setup_file
)
if
IS_WINDOWS
:
compile_cmd
=
'cd /d {} && {} {} build'
.
format
(
ext_dir
,
interpreter
,
setup_file
)
else
:
compile_cmd
=
'cd {} && {} {} build'
.
format
(
ext_dir
,
interpreter
,
setup_file
)
print
(
"Compiling user custom op, it will cost a few seconds....."
)
run_cmd
(
compile_cmd
,
verbose
)
...
...
@@ -682,7 +800,7 @@ def check_abi_compatibility(compiler, verbose=False):
try
:
if
OS_NAME
.
startswith
(
'linux'
):
version_info
=
subprocess
.
check_output
(
[
compiler
,
'-dumpfullversion'
])
[
compiler
,
'-dumpfullversion'
,
'-dumpversion'
])
if
six
.
PY3
:
version_info
=
version_info
.
decode
()
version
=
version_info
.
strip
().
split
(
'.'
)
...
...
@@ -694,8 +812,8 @@ def check_abi_compatibility(compiler, verbose=False):
warnings
.
warn
(
ABI_INCOMPATIBILITY_WARNING
.
format
(
user_compiler
=
compiler
,
version
=
version_info
.
strip
()))
# TODO(Aurelius84): check version compatibility on windows
elif
IS_WINDOWS
:
# TODO(zhouwei): support check abi compatibility on windows
warnings
.
warn
(
"We don't support Windows now."
)
except
Exception
:
_
,
error
,
_
=
sys
.
exc_info
()
...
...
@@ -714,7 +832,7 @@ def _expected_compiler_current_platform():
return
expect_compilers
def
log_v
(
info
,
verbose
):
def
log_v
(
info
,
verbose
=
True
):
"""
Print log information on stdout.
"""
...
...
python/requirements.txt
浏览文件 @
d3e60959
...
...
@@ -3,7 +3,8 @@ numpy>=1.13, <=1.16.4 ; python_version<"3.5"
numpy>=1.13 ; python_version>="3.5" and platform_system != "Windows"
numpy>=1.13, <=1.19.3 ; python_version>="3.5" and platform_system == "Windows"
protobuf>=3.1.0
gast==0.3.3
gast>=0.3.3 ; platform_system != "Windows"
gast==0.3.3 ; platform_system == "Windows"
Pillow
six
decorator
...
...
python/setup.py.in
浏览文件 @
d3e60959
...
...
@@ -334,11 +334,21 @@ if '${WITH_XPU_BKCL}' == 'ON':
shutil.copy('${XPU_BKCL_LIB}', libs_path)
package_data['paddle.libs']+=['${XPU_BKCL_LIB_NAME}']
# copy libfuild_framework.so to libs
if os.name != 'nt' and sys.platform != 'darwin':
paddle_framework_lib='${FLUID_FRAMEWORK_SHARED_LIB}'
shutil.copy(paddle_framework_lib, libs_path)
package_data['paddle.libs'] += [('libpaddle_framework' if os.name != 'nt' else 'paddle_framework') + ext_name]
# copy libpaddle_framework.so to libs on linux
if sys.platform.startswith('linux'):
shutil.copy('${FLUID_FRAMEWORK_SHARED_LIB}', libs_path)
package_data['paddle.libs'] += ['libpaddle_framework.so']
# copy libpaddle_custom_op.so to libs on linux
if sys.platform.startswith('linux'):
shutil.copy('${PADDLE_CUSTOM_OP_SHARED_LIB}', libs_path)
package_data['paddle.libs'] += ['libpaddle_custom_op.so']
# copy paddle_framework.lib/paddle_framework.dll to libs on windows
if os.name == 'nt':
shutil.copy('${FLUID_FRAMEWORK_IMPORT_LIB}', libs_path)
shutil.copy('${FLUID_FRAMEWORK_SHARED_LIB}', libs_path)
package_data['paddle.libs'] += ['paddle_framework.lib', 'paddle_framework.dll']
# remove unused paddle/libs/__init__.py
if os.path.isfile(libs_path+'/__init__.py'):
...
...
@@ -409,9 +419,9 @@ if '${WITH_GPU}' == 'ON':
class InstallCommand(InstallCommandBase):
def finalize_options(self):
ret = InstallCommandBase.finalize_options(self)
self.install_headers = os.path.join(self.install_purelib, 'paddle',
'include')
self.install_lib = self.install_platlib
self.install_headers = os.path.join(self.install_platlib, 'paddle',
'include')
return ret
...
...
@@ -462,11 +472,6 @@ class InstallHeaders(Command):
return self.copy_file(header, install_dir)
def run(self):
# only copy third_party/cudaErrorMessage.pb for cudaErrorMessage on mac or windows
if os.name == 'nt' or sys.platform == 'darwin':
if '${WITH_GPU}' == 'ON':
self.mkdir_and_copy_file('${cudaerror_INCLUDE_DIR}/cudaErrorMessage.pb')
return
hdrs = self.distribution.headers
if not hdrs:
return
...
...
编辑
预览
Markdown
is supported
0%
请重试
或
添加新附件
.
添加附件
取消
You are about to add
0
people
to the discussion. Proceed with caution.
先完成此消息的编辑!
取消
想要评论请
注册
或
登录