Skip to content
体验新版
项目
组织
正在加载...
登录
切换导航
打开侧边栏
PaddlePaddle
Paddle
提交
d3e60959
P
Paddle
项目概览
PaddlePaddle
/
Paddle
9 个月 前同步成功
通知
2282
Star
20931
Fork
5422
代码
文件
提交
分支
Tags
贡献者
分支图
Diff
Issue
1423
列表
看板
标记
里程碑
合并请求
543
Wiki
0
Wiki
分析
仓库
DevOps
项目成员
Pages
P
Paddle
项目概览
项目概览
详情
发布
仓库
仓库
文件
提交
分支
标签
贡献者
分支图
比较
Issue
1,423
Issue
1,423
列表
看板
标记
里程碑
合并请求
543
合并请求
543
Pages
分析
分析
仓库分析
DevOps
Wiki
0
Wiki
成员
成员
收起侧边栏
关闭侧边栏
动态
分支图
创建新Issue
提交
Issue看板
前往新版Gitcode,体验更适合开发者的 AI 搜索 >>
未验证
提交
d3e60959
编写于
2月 26, 2021
作者:
C
Chen Weihang
提交者:
GitHub
2月 26, 2021
浏览文件
操作
浏览文件
下载
电子邮件补丁
差异文件
[Cherry-pick] The Second part of new custom op extension in 2.0.1 (#31237)
[Cherry-pick] The Second part of new custom op extension in 2.0.1
上级
34092ab3
变更
32
展开全部
隐藏空白更改
内联
并排
Showing
32 changed file
with
1705 addition
and
504 deletion
+1705
-504
CMakeLists.txt
CMakeLists.txt
+2
-0
cmake/generic.cmake
cmake/generic.cmake
+6
-8
paddle/fluid/extension/include/all.h
paddle/fluid/extension/include/all.h
+6
-0
paddle/fluid/extension/include/dll_decl.h
paddle/fluid/extension/include/dll_decl.h
+27
-0
paddle/fluid/extension/include/op_meta_info.h
paddle/fluid/extension/include/op_meta_info.h
+82
-28
paddle/fluid/extension/include/tensor.h
paddle/fluid/extension/include/tensor.h
+2
-1
paddle/fluid/extension/src/op_meta_info.cc
paddle/fluid/extension/src/op_meta_info.cc
+19
-3
paddle/fluid/extension/src/tensor.cc
paddle/fluid/extension/src/tensor.cc
+65
-51
paddle/fluid/framework/CMakeLists.txt
paddle/fluid/framework/CMakeLists.txt
+41
-4
paddle/fluid/framework/custom_operator.cc
paddle/fluid/framework/custom_operator.cc
+118
-14
paddle/fluid/platform/dynload/dynamic_loader.cc
paddle/fluid/platform/dynload/dynamic_loader.cc
+0
-3
paddle/scripts/paddle_build.bat
paddle/scripts/paddle_build.bat
+40
-10
python/paddle/fluid/tests/CMakeLists.txt
python/paddle/fluid/tests/CMakeLists.txt
+3
-1
python/paddle/fluid/tests/custom_op/CMakeLists.txt
python/paddle/fluid/tests/custom_op/CMakeLists.txt
+44
-17
python/paddle/fluid/tests/custom_op/attr_test_op.cc
python/paddle/fluid/tests/custom_op/attr_test_op.cc
+182
-0
python/paddle/fluid/tests/custom_op/custom_relu_op.cc
python/paddle/fluid/tests/custom_op/custom_relu_op.cc
+5
-25
python/paddle/fluid/tests/custom_op/custom_relu_op.cu
python/paddle/fluid/tests/custom_op/custom_relu_op.cu
+1
-21
python/paddle/fluid/tests/custom_op/custom_relu_op_dup.cc
python/paddle/fluid/tests/custom_op/custom_relu_op_dup.cc
+3
-3
python/paddle/fluid/tests/custom_op/custom_relu_setup.py
python/paddle/fluid/tests/custom_op/custom_relu_setup.py
+5
-2
python/paddle/fluid/tests/custom_op/multi_out_test_op.cc
python/paddle/fluid/tests/custom_op/multi_out_test_op.cc
+76
-0
python/paddle/fluid/tests/custom_op/test_custom_attrs_jit.py
python/paddle/fluid/tests/custom_op/test_custom_attrs_jit.py
+67
-0
python/paddle/fluid/tests/custom_op/test_custom_relu_op_jit.py
...n/paddle/fluid/tests/custom_op/test_custom_relu_op_jit.py
+89
-0
python/paddle/fluid/tests/custom_op/test_custom_relu_op_setup.py
...paddle/fluid/tests/custom_op/test_custom_relu_op_setup.py
+108
-19
python/paddle/fluid/tests/custom_op/test_dispatch_jit.py
python/paddle/fluid/tests/custom_op/test_dispatch_jit.py
+11
-2
python/paddle/fluid/tests/custom_op/test_jit_load.py
python/paddle/fluid/tests/custom_op/test_jit_load.py
+2
-1
python/paddle/fluid/tests/custom_op/test_multi_out_jit.py
python/paddle/fluid/tests/custom_op/test_multi_out_jit.py
+47
-72
python/paddle/fluid/tests/custom_op/utils.py
python/paddle/fluid/tests/custom_op/utils.py
+2
-2
python/paddle/utils/cpp_extension/__init__.py
python/paddle/utils/cpp_extension/__init__.py
+1
-2
python/paddle/utils/cpp_extension/cpp_extension.py
python/paddle/utils/cpp_extension/cpp_extension.py
+424
-112
python/paddle/utils/cpp_extension/extension_utils.py
python/paddle/utils/cpp_extension/extension_utils.py
+208
-90
python/requirements.txt
python/requirements.txt
+2
-1
python/setup.py.in
python/setup.py.in
+17
-12
未找到文件。
CMakeLists.txt
浏览文件 @
d3e60959
...
...
@@ -293,6 +293,8 @@ set(PADDLE_PYTHON_BUILD_DIR "${CMAKE_CURRENT_BINARY_DIR}/python/build")
set
(
CMAKE_CXX_FLAGS_RELWITHDEBINFO
"-O3 -g -DNDEBUG"
)
set
(
CMAKE_C_FLAGS_RELWITHDEBINFO
"-O3 -g -DNDEBUG"
)
add_definitions
(
-DPADDLE_DLL_EXPORT
)
if
(
ON_INFER
)
# you can trun off the paddle fluid and inference lib by set ON_INFER=OFF
message
(
STATUS
"On inference mode, will take place some specific optimization."
)
...
...
cmake/generic.cmake
浏览文件 @
d3e60959
...
...
@@ -792,17 +792,15 @@ function(py_test TARGET_NAME)
if
(
WITH_COVERAGE
)
add_test
(
NAME
${
TARGET_NAME
}
COMMAND
${
CMAKE_COMMAND
}
-E env FLAGS_init_allocated_mem=true FLAGS_cudnn_deterministic=true
FLAGS_cpu_deterministic=true
PYTHONPATH=
${
PADDLE_BINARY_DIR
}
/python
${
py_test_ENVS
}
COVERAGE_FILE=
${
PADDLE_BINARY_DIR
}
/python-coverage.data
${
PYTHON_EXECUTABLE
}
-m coverage run --branch -p
${
py_test_SRCS
}
${
py_test_ARGS
}
WORKING_DIRECTORY
${
CMAKE_CURRENT_BINARY_DIR
}
)
COMMAND
${
CMAKE_COMMAND
}
-E env FLAGS_init_allocated_mem=true FLAGS_cudnn_deterministic=true
FLAGS_cpu_deterministic=true
${
py_test_ENVS
}
COVERAGE_FILE=
${
PADDLE_BINARY_DIR
}
/python-coverage.data
${
PYTHON_EXECUTABLE
}
-m coverage run --branch -p
${
py_test_SRCS
}
${
py_test_ARGS
}
WORKING_DIRECTORY
${
CMAKE_CURRENT_BINARY_DIR
}
)
else
()
add_test
(
NAME
${
TARGET_NAME
}
COMMAND
${
CMAKE_COMMAND
}
-E env FLAGS_init_allocated_mem=true FLAGS_cudnn_deterministic=true
FLAGS_cpu_deterministic=true
PYTHONPATH=
${
PADDLE_BINARY_DIR
}
/python
${
py_test_ENVS
}
FLAGS_cpu_deterministic=true
${
py_test_ENVS
}
${
PYTHON_EXECUTABLE
}
-u
${
py_test_SRCS
}
${
py_test_ARGS
}
WORKING_DIRECTORY
${
CMAKE_CURRENT_BINARY_DIR
}
)
endif
()
...
...
paddle/fluid/extension/include/all.h
浏览文件 @
d3e60959
...
...
@@ -18,6 +18,12 @@ limitations under the License. */
#error C++11 or later compatible compiler is required to use Paddle.
#endif
#ifdef _WIN32
#ifndef NOMINMAX
#define NOMINMAX // msvc max/min macro conflict with std::min/max
#endif
#endif
#include "paddle/fluid/extension/include/dispatch.h"
#include "paddle/fluid/extension/include/dtype.h"
#include "paddle/fluid/extension/include/op_meta_info.h"
...
...
paddle/fluid/extension/include/dll_decl.h
0 → 100644
浏览文件 @
d3e60959
// Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#pragma once
#if defined(_WIN32)
#ifndef PD_DLL_DECL
#ifdef PADDLE_DLL_EXPORT
#define PD_DLL_DECL __declspec(dllexport)
#else
#define PD_DLL_DECL __declspec(dllimport)
#endif // PADDLE_DLL_EXPORT
#endif // PD_DLL_DECL
#else
#define PD_DLL_DECL
#endif // _WIN32
paddle/fluid/extension/include/op_meta_info.h
浏览文件 @
d3e60959
...
...
@@ -14,12 +14,14 @@ limitations under the License. */
#pragma once
#include <iostream>
#include <string>
#include <unordered_map>
#include <vector>
#include <boost/any.hpp>
#include "paddle/fluid/extension/include/dll_decl.h"
#include "paddle/fluid/extension/include/tensor.h"
/**
...
...
@@ -31,7 +33,7 @@ limitations under the License. */
namespace
paddle
{
namespace
framework
{
class
OpMetaInfoHelper
;
class
PD_DLL_DECL
OpMetaInfoHelper
;
}
// namespace framework
using
Tensor
=
paddle
::
Tensor
;
...
...
@@ -43,6 +45,26 @@ using Tensor = paddle::Tensor;
classname& operator=(const classname&) = delete; \
classname& operator=(classname&&) = delete
#if defined _WIN32
#define HANDLE_THE_ERROR try {
#define END_HANDLE_THE_ERROR \
} \
catch (const std::exception& e) { \
std::cerr << e.what() << std::endl; \
throw e; \
}
#else
#define HANDLE_THE_ERROR
#define END_HANDLE_THE_ERROR
#endif
#define PD_THROW(err_msg) \
do { \
HANDLE_THE_ERROR \
throw std::runtime_error(err_msg); \
END_HANDLE_THE_ERROR \
} while (0)
///////////////// Util Define and Function ////////////////
inline
std
::
string
Grad
(
const
std
::
string
&
var_name
)
{
...
...
@@ -59,6 +81,26 @@ inline std::string Grad(const std::string& var_name) {
using
KernelFunc
=
std
::
vector
<
Tensor
>
(
*
)(
std
::
vector
<
Tensor
>
inputs
,
std
::
vector
<
boost
::
any
>
attrs
);
#define PD_SPECIALIZE_ComputeCallHelper(attr_type) \
template <typename... Tail> \
struct ComputeCallHelper<attr_type, Tail...> { \
template <int in_idx, int attr_idx, typename... PreviousArgs> \
static Return Compute(std::vector<Tensor> inputs, \
std::vector<boost::any> attrs, \
const PreviousArgs&... pargs) { \
try { \
attr_type arg = boost::any_cast<attr_type>(attrs[attr_idx]); \
return ComputeCallHelper<Tail...>::template Compute<in_idx, \
attr_idx + 1>( \
inputs, attrs, pargs..., arg); \
} catch (boost::bad_any_cast&) { \
PD_THROW( \
"Attribute cast error in custom operator. Expected " #attr_type \
" value."); \
} \
} \
}
template
<
typename
T
>
struct
TypeTag
{};
...
...
@@ -92,26 +134,20 @@ struct KernelFuncImpl<Return (*)(Args...), impl_fn> {
}
};
// TODO(chenweihang): add support for attribute input
// int attribute input (not used now)
template
<
typename
...
Tail
>
struct
ComputeCallHelper
<
int
,
Tail
...
>
{
template
<
int
in_idx
,
int
attr_idx
,
typename
...
PreviousArgs
>
static
Return
Compute
(
std
::
vector
<
Tensor
>
inputs
,
std
::
vector
<
boost
::
any
>
attrs
,
const
PreviousArgs
&
...
pargs
)
{
try
{
int
arg
=
boost
::
any_cast
<
int
>
(
attrs
[
attr_idx
]);
return
ComputeCallHelper
<
Tail
...
>::
template
Compute
<
in_idx
,
attr_idx
+
1
>(
inputs
,
attrs
,
pargs
...,
arg
);
}
catch
(
boost
::
bad_any_cast
&
)
{
throw
std
::
runtime_error
(
"Attribute cast error in custom operator. Expected int value."
);
}
}
};
PD_SPECIALIZE_ComputeCallHelper
(
bool
);
PD_SPECIALIZE_ComputeCallHelper
(
int
);
PD_SPECIALIZE_ComputeCallHelper
(
float
);
PD_SPECIALIZE_ComputeCallHelper
(
int64_t
);
PD_SPECIALIZE_ComputeCallHelper
(
std
::
string
);
PD_SPECIALIZE_ComputeCallHelper
(
std
::
vector
<
int
>
);
PD_SPECIALIZE_ComputeCallHelper
(
std
::
vector
<
float
>
);
PD_SPECIALIZE_ComputeCallHelper
(
std
::
vector
<
int64_t
>
);
PD_SPECIALIZE_ComputeCallHelper
(
std
::
vector
<
std
::
string
>
);
// TODO(chenweihang): support other attribute type if needed.
// Why not support other attribute type here?
// - boost::blank, std::vector<bool> and std::vector<double>
// are not used in op
// - BlockDesc* and std::vector<BlockDesc*> are used in framework
// end: base template
template
<
typename
T
>
struct
ComputeCallHelper
<
TypeTag
<
T
>>
{
...
...
@@ -220,13 +256,26 @@ struct InferDtypeFuncImpl<Return (*)(Args...), impl_fn> {
////////////////////// Op Meta Info //////////////////////
class
OpMetaInfo
{
class
PD_DLL_DECL
OpMetaInfo
{
public:
explicit
OpMetaInfo
(
const
std
::
string
&
op_name
)
:
name_
(
op_name
)
{}
// format: {"<name1>", "<name2>", ...}
OpMetaInfo
&
Inputs
(
std
::
vector
<
std
::
string
>&&
inputs
);
// format: {"<name1>", "<name2>", ...}
OpMetaInfo
&
Outputs
(
std
::
vector
<
std
::
string
>&&
outputs
);
// format: {"<name1>:<type1>", "<name1>:<type1>", ...}
OpMetaInfo
&
Attrs
(
std
::
vector
<
std
::
string
>&&
attrs
);
// format: PD_KERNEL(...)
OpMetaInfo
&
SetKernelFn
(
KernelFunc
&&
func
);
// format: PD_INFER_SHAPE(...)
OpMetaInfo
&
SetInferShapeFn
(
InferShapeFunc
&&
func
);
// format: PD_INFER_DTYPE(...)
OpMetaInfo
&
SetInferDtypeFn
(
InferDtypeFunc
&&
func
);
private:
...
...
@@ -246,7 +295,7 @@ class OpMetaInfo {
//////////////// Op Meta Info Map /////////////////
class
OpMetaInfoMap
{
class
PD_DLL_DECL
OpMetaInfoMap
{
public:
// this function's impl should keep in header file.
// if move to cc file, meta info can not be added
...
...
@@ -270,14 +319,15 @@ class OpMetaInfoMap {
//////////////// Op Meta Info Builder /////////////////
class
OpMetaInfoBuilder
{
class
PD_DLL_DECL
OpMetaInfoBuilder
{
public:
explicit
OpMetaInfoBuilder
(
std
::
string
&&
name
);
OpMetaInfoBuilder
&
Inputs
(
std
::
vector
<
std
::
string
>&&
inputs
);
OpMetaInfoBuilder
&
Outputs
(
std
::
vector
<
std
::
string
>&&
outputs
);
OpMetaInfoBuilder
&
SetKernelFn
(
KernelFunc
&&
func
);
OpMetaInfoBuilder
&
SetInferShapeFn
(
InferShapeFunc
&&
func
);
OpMetaInfoBuilder
&
SetInferDtypeFn
(
InferDtypeFunc
&&
func
);
OpMetaInfoBuilder
&
Attrs
(
std
::
vector
<
std
::
string
>&&
attrs
);
OpMetaInfoBuilder
&
SetKernelFn
(
KernelFunc
func
);
OpMetaInfoBuilder
&
SetInferShapeFn
(
InferShapeFunc
func
);
OpMetaInfoBuilder
&
SetInferDtypeFn
(
InferDtypeFunc
func
);
OpMetaInfoBuilder
&
SetBackwardOp
(
const
std
::
string
&
bwd_op_name
);
private:
...
...
@@ -317,8 +367,12 @@ void LoadCustomOperatorLib(const std::string& dso_name);
extern
"C"
{
#endif
#if defined(_WIN32)
// C-API to get global OpMetaInfoMap.
paddle
::
OpMetaInfoMap
&
PD_GetOpMetaInfoMap
();
__declspec
(
dllexport
)
inline
paddle
::
OpMetaInfoMap
&
PD_GetOpMetaInfoMap
()
{
return
paddle
::
OpMetaInfoMap
::
Instance
();
}
#endif // _WIN32
#ifdef __cplusplus
}
...
...
paddle/fluid/extension/include/tensor.h
浏览文件 @
d3e60959
...
...
@@ -16,6 +16,7 @@ limitations under the License. */
#include <memory>
#include <vector>
#include "paddle/fluid/extension/include/dll_decl.h"
#include "paddle/fluid/extension/include/dtype.h"
#include "paddle/fluid/extension/include/place.h"
...
...
@@ -23,7 +24,7 @@ namespace paddle {
namespace
framework
{
class
CustomTensorUtils
;
}
// namespace framework
class
Tensor
{
class
PD_DLL_DECL
Tensor
{
public:
/// \brief Construct a Tensor on target Place for CustomOp.
/// Generally it's only used for user to create Tensor.
...
...
paddle/fluid/extension/src/op_meta_info.cc
浏览文件 @
d3e60959
...
...
@@ -32,6 +32,10 @@ OpMetaInfo& OpMetaInfo::Outputs(std::vector<std::string>&& outputs) {
outputs_
=
std
::
forward
<
std
::
vector
<
std
::
string
>>
(
outputs
);
return
*
this
;
}
OpMetaInfo
&
OpMetaInfo
::
Attrs
(
std
::
vector
<
std
::
string
>&&
attrs
)
{
attrs_
=
std
::
forward
<
std
::
vector
<
std
::
string
>>
(
attrs
);
return
*
this
;
}
OpMetaInfo
&
OpMetaInfo
::
SetKernelFn
(
KernelFunc
&&
func
)
{
kernel_fn_
=
std
::
forward
<
KernelFunc
>
(
func
);
return
*
this
;
...
...
@@ -78,17 +82,22 @@ OpMetaInfoBuilder& OpMetaInfoBuilder::Outputs(
return
*
this
;
}
OpMetaInfoBuilder
&
OpMetaInfoBuilder
::
SetKernelFn
(
KernelFunc
&&
func
)
{
OpMetaInfoBuilder
&
OpMetaInfoBuilder
::
Attrs
(
std
::
vector
<
std
::
string
>&&
attrs
)
{
info_ptr_
->
Attrs
(
std
::
forward
<
std
::
vector
<
std
::
string
>>
(
attrs
));
return
*
this
;
}
OpMetaInfoBuilder
&
OpMetaInfoBuilder
::
SetKernelFn
(
KernelFunc
func
)
{
info_ptr_
->
SetKernelFn
(
std
::
forward
<
KernelFunc
>
(
func
));
return
*
this
;
}
OpMetaInfoBuilder
&
OpMetaInfoBuilder
::
SetInferShapeFn
(
InferShapeFunc
&&
func
)
{
OpMetaInfoBuilder
&
OpMetaInfoBuilder
::
SetInferShapeFn
(
InferShapeFunc
func
)
{
info_ptr_
->
SetInferShapeFn
(
std
::
forward
<
InferShapeFunc
>
(
func
));
return
*
this
;
}
OpMetaInfoBuilder
&
OpMetaInfoBuilder
::
SetInferDtypeFn
(
InferDtypeFunc
&&
func
)
{
OpMetaInfoBuilder
&
OpMetaInfoBuilder
::
SetInferDtypeFn
(
InferDtypeFunc
func
)
{
info_ptr_
->
SetInferDtypeFn
(
std
::
forward
<
InferDtypeFunc
>
(
func
));
return
*
this
;
}
...
...
@@ -114,10 +123,17 @@ void LoadCustomOperatorLib(const std::string& dso_name) {
}
}
// namespace paddle
#ifdef __cplusplus
extern
"C"
{
#endif
#ifndef _WIN32
// C-API to get global OpMetaInfoMap.
paddle
::
OpMetaInfoMap
&
PD_GetOpMetaInfoMap
()
{
return
paddle
::
OpMetaInfoMap
::
Instance
();
}
#endif
#ifdef __cplusplus
}
// end extern "C"
#endif
paddle/fluid/extension/src/tensor.cc
浏览文件 @
d3e60959
...
...
@@ -207,73 +207,87 @@ Tensor Tensor::copy_to(const PlaceType &target_place) const {
return
target
;
}
template
Tensor
Tensor
::
copy_to
<
paddle
::
platform
::
float16
>(
template
PD_DLL_DECL
Tensor
Tensor
::
copy_to
<
paddle
::
platform
::
float16
>(
const
PlaceType
&
target_place
)
const
;
template
PD_DLL_DECL
Tensor
Tensor
::
copy_to
<
paddle
::
platform
::
bfloat16
>(
const
PlaceType
&
target_place
)
const
;
template
Tensor
Tensor
::
copy_to
<
paddle
::
platform
::
bfloat16
>(
template
PD_DLL_DECL
Tensor
Tensor
::
copy_to
<
paddle
::
platform
::
complex64
>(
const
PlaceType
&
target_place
)
const
;
template
Tensor
Tensor
::
copy_to
<
paddle
::
platform
::
complex64
>(
template
PD_DLL_DECL
Tensor
Tensor
::
copy_to
<
paddle
::
platform
::
complex128
>(
const
PlaceType
&
target_place
)
const
;
template
Tensor
Tensor
::
copy_to
<
paddle
::
platform
::
complex128
>(
const
PlaceType
&
target_place
)
const
;
template
Tensor
Tensor
::
copy_to
<
float
>(
const
PlaceType
&
target_place
)
const
;
template
Tensor
Tensor
::
copy_to
<
double
>(
const
PlaceType
&
target_place
)
const
;
template
Tensor
Tensor
::
copy_to
<
int64_t
>(
const
PlaceType
&
target_place
)
const
;
template
Tensor
Tensor
::
copy_to
<
int32_t
>(
const
PlaceType
&
target_place
)
const
;
template
Tensor
Tensor
::
copy_to
<
uint8_t
>(
const
PlaceType
&
target_place
)
const
;
template
Tensor
Tensor
::
copy_to
<
int8_t
>(
const
PlaceType
&
target_place
)
const
;
template
Tensor
Tensor
::
copy_to
<
int16_t
>(
const
PlaceType
&
target_place
)
const
;
template
Tensor
Tensor
::
copy_to
<
bool
>(
const
PlaceType
&
target_place
)
const
;
template
PD_DLL_DECL
Tensor
Tensor
::
copy_to
<
float
>(
const
PlaceType
&
target_place
)
const
;
template
PD_DLL_DECL
Tensor
Tensor
::
copy_to
<
double
>(
const
PlaceType
&
target_place
)
const
;
template
PD_DLL_DECL
Tensor
Tensor
::
copy_to
<
int64_t
>(
const
PlaceType
&
target_place
)
const
;
template
PD_DLL_DECL
Tensor
Tensor
::
copy_to
<
int32_t
>(
const
PlaceType
&
target_place
)
const
;
template
PD_DLL_DECL
Tensor
Tensor
::
copy_to
<
uint8_t
>(
const
PlaceType
&
target_place
)
const
;
template
PD_DLL_DECL
Tensor
Tensor
::
copy_to
<
int8_t
>(
const
PlaceType
&
target_place
)
const
;
template
PD_DLL_DECL
Tensor
Tensor
::
copy_to
<
int16_t
>(
const
PlaceType
&
target_place
)
const
;
template
PD_DLL_DECL
Tensor
Tensor
::
copy_to
<
bool
>(
const
PlaceType
&
target_place
)
const
;
template
float
*
Tensor
::
data
<
float
>()
const
;
template
double
*
Tensor
::
data
<
double
>()
const
;
template
int64_t
*
Tensor
::
data
<
int64_t
>()
const
;
template
int32_t
*
Tensor
::
data
<
int32_t
>()
const
;
template
uint8_t
*
Tensor
::
data
<
uint8_t
>()
const
;
template
int8_t
*
Tensor
::
data
<
int8_t
>()
const
;
template
paddle
::
platform
::
float16
*
Tensor
::
data
<
paddle
::
platform
::
float16
>()
const
;
template
paddle
::
platform
::
bfloat16
*
Tensor
::
data
<
paddle
::
platform
::
bfloat16
>()
const
;
template
paddle
::
platform
::
complex128
*
template
PD_DLL_DECL
float
*
Tensor
::
data
<
float
>()
const
;
template
PD_DLL_DECL
double
*
Tensor
::
data
<
double
>()
const
;
template
PD_DLL_DECL
int64_t
*
Tensor
::
data
<
int64_t
>()
const
;
template
PD_DLL_DECL
int32_t
*
Tensor
::
data
<
int32_t
>()
const
;
template
PD_DLL_DECL
uint8_t
*
Tensor
::
data
<
uint8_t
>()
const
;
template
PD_DLL_DECL
int8_t
*
Tensor
::
data
<
int8_t
>()
const
;
template
PD_DLL_DECL
paddle
::
platform
::
float16
*
Tensor
::
data
<
paddle
::
platform
::
float16
>()
const
;
template
PD_DLL_DECL
paddle
::
platform
::
bfloat16
*
Tensor
::
data
<
paddle
::
platform
::
bfloat16
>()
const
;
template
PD_DLL_DECL
paddle
::
platform
::
complex128
*
Tensor
::
data
<
paddle
::
platform
::
complex128
>()
const
;
template
paddle
::
platform
::
complex64
*
template
PD_DLL_DECL
paddle
::
platform
::
complex64
*
Tensor
::
data
<
paddle
::
platform
::
complex64
>()
const
;
template
int16_t
*
Tensor
::
data
<
int16_t
>()
const
;
template
bool
*
Tensor
::
data
<
bool
>()
const
;
template
PD_DLL_DECL
int16_t
*
Tensor
::
data
<
int16_t
>()
const
;
template
PD_DLL_DECL
bool
*
Tensor
::
data
<
bool
>()
const
;
template
float
*
Tensor
::
mutable_data
<
float
>();
template
double
*
Tensor
::
mutable_data
<
double
>();
template
int64_t
*
Tensor
::
mutable_data
<
int64_t
>();
template
int32_t
*
Tensor
::
mutable_data
<
int32_t
>();
template
uint8_t
*
Tensor
::
mutable_data
<
uint8_t
>();
template
int8_t
*
Tensor
::
mutable_data
<
int8_t
>();
template
paddle
::
platform
::
float16
*
template
PD_DLL_DECL
float
*
Tensor
::
mutable_data
<
float
>();
template
PD_DLL_DECL
double
*
Tensor
::
mutable_data
<
double
>();
template
PD_DLL_DECL
int64_t
*
Tensor
::
mutable_data
<
int64_t
>();
template
PD_DLL_DECL
int32_t
*
Tensor
::
mutable_data
<
int32_t
>();
template
PD_DLL_DECL
uint8_t
*
Tensor
::
mutable_data
<
uint8_t
>();
template
PD_DLL_DECL
int8_t
*
Tensor
::
mutable_data
<
int8_t
>();
template
PD_DLL_DECL
paddle
::
platform
::
float16
*
Tensor
::
mutable_data
<
paddle
::
platform
::
float16
>();
template
paddle
::
platform
::
bfloat16
*
template
PD_DLL_DECL
paddle
::
platform
::
bfloat16
*
Tensor
::
mutable_data
<
paddle
::
platform
::
bfloat16
>();
template
paddle
::
platform
::
complex128
*
template
PD_DLL_DECL
paddle
::
platform
::
complex128
*
Tensor
::
mutable_data
<
paddle
::
platform
::
complex128
>();
template
paddle
::
platform
::
complex64
*
template
PD_DLL_DECL
paddle
::
platform
::
complex64
*
Tensor
::
mutable_data
<
paddle
::
platform
::
complex64
>();
template
int16_t
*
Tensor
::
mutable_data
<
int16_t
>();
template
bool
*
Tensor
::
mutable_data
<
bool
>();
template
PD_DLL_DECL
int16_t
*
Tensor
::
mutable_data
<
int16_t
>();
template
PD_DLL_DECL
bool
*
Tensor
::
mutable_data
<
bool
>();
template
float
*
Tensor
::
mutable_data
<
float
>(
const
PlaceType
&
place
);
template
double
*
Tensor
::
mutable_data
<
double
>(
const
PlaceType
&
place
);
template
int64_t
*
Tensor
::
mutable_data
<
int64_t
>(
const
PlaceType
&
place
);
template
int32_t
*
Tensor
::
mutable_data
<
int32_t
>(
const
PlaceType
&
place
);
template
uint8_t
*
Tensor
::
mutable_data
<
uint8_t
>(
const
PlaceType
&
place
);
template
int8_t
*
Tensor
::
mutable_data
<
int8_t
>(
const
PlaceType
&
place
);
template
paddle
::
platform
::
float16
*
template
PD_DLL_DECL
float
*
Tensor
::
mutable_data
<
float
>(
const
PlaceType
&
place
);
template
PD_DLL_DECL
double
*
Tensor
::
mutable_data
<
double
>(
const
PlaceType
&
place
);
template
PD_DLL_DECL
int64_t
*
Tensor
::
mutable_data
<
int64_t
>(
const
PlaceType
&
place
);
template
PD_DLL_DECL
int32_t
*
Tensor
::
mutable_data
<
int32_t
>(
const
PlaceType
&
place
);
template
PD_DLL_DECL
uint8_t
*
Tensor
::
mutable_data
<
uint8_t
>(
const
PlaceType
&
place
);
template
PD_DLL_DECL
int8_t
*
Tensor
::
mutable_data
<
int8_t
>(
const
PlaceType
&
place
);
template
PD_DLL_DECL
paddle
::
platform
::
float16
*
Tensor
::
mutable_data
<
paddle
::
platform
::
float16
>(
const
PlaceType
&
place
);
template
paddle
::
platform
::
bfloat16
*
template
PD_DLL_DECL
paddle
::
platform
::
bfloat16
*
Tensor
::
mutable_data
<
paddle
::
platform
::
bfloat16
>(
const
PlaceType
&
place
);
template
paddle
::
platform
::
complex128
*
template
PD_DLL_DECL
paddle
::
platform
::
complex128
*
Tensor
::
mutable_data
<
paddle
::
platform
::
complex128
>(
const
PlaceType
&
place
);
template
paddle
::
platform
::
complex64
*
template
PD_DLL_DECL
paddle
::
platform
::
complex64
*
Tensor
::
mutable_data
<
paddle
::
platform
::
complex64
>(
const
PlaceType
&
place
);
template
int16_t
*
Tensor
::
mutable_data
<
int16_t
>(
const
PlaceType
&
place
);
template
bool
*
Tensor
::
mutable_data
<
bool
>(
const
PlaceType
&
place
);
template
PD_DLL_DECL
int16_t
*
Tensor
::
mutable_data
<
int16_t
>(
const
PlaceType
&
place
);
template
PD_DLL_DECL
bool
*
Tensor
::
mutable_data
<
bool
>(
const
PlaceType
&
place
);
std
::
vector
<
int
>
Tensor
::
shape
()
const
{
GET_CASTED_TENSOR
...
...
paddle/fluid/framework/CMakeLists.txt
浏览文件 @
d3e60959
...
...
@@ -321,9 +321,9 @@ message(STATUS "branch: ${PADDLE_BRANCH}")
configure_file
(
commit.h.in commit.h
)
cc_library
(
custom_tensor SRCS ../extension/src/tensor.cc DEPS lod_tensor
)
cc_library
(
custom_tensor SRCS ../extension/src/tensor.cc DEPS lod_tensor
memory enforce
)
cc_library
(
op_meta_info SRCS ../extension/src/op_meta_info.cc DEPS custom_tensor
)
cc_library
(
custom_operator SRCS custom_operator.cc DEPS
operator op_registry device_context dynamic_load
er custom_tensor op_meta_info
)
cc_library
(
custom_operator SRCS custom_operator.cc DEPS
tensor attribute framework_proto op_registry operator dynamic_loader string_help
er custom_tensor op_meta_info
)
cc_test
(
custom_tensor_test SRCS custom_tensor_test.cc DEPS custom_tensor glog
)
set
(
FLUID_FRAMEWORK_MODULES proto_desc memory lod_tensor executor data_feed_proto layer dynamic_loader custom_operator
)
...
...
@@ -346,9 +346,12 @@ if (LINUX)
endif
()
if
(
WIN32
)
set
(
FLUID_FRAMEWORK_IMPORT_LIB
${
PADDLE_BINARY_DIR
}
/paddle/fluid/framework/
${
CMAKE_BUILD_TYPE
}
/paddle_framework.lib
CACHE INTERNAL
"Fluid framework lib"
)
set
(
FLUID_FRAMEWORK_SHARED_LIB
${
PADDLE_BINARY_DIR
}
/paddle/fluid/framework/
lib
paddle_framework.dll
CACHE INTERNAL
"Fluid framework
lib
"
)
${
PADDLE_BINARY_DIR
}
/paddle/fluid/framework/
${
CMAKE_BUILD_TYPE
}
/
paddle_framework.dll
CACHE INTERNAL
"Fluid framework
dll
"
)
endif
()
if
(
APPLE
)
...
...
@@ -359,3 +362,37 @@ endif()
if
(
WITH_TESTING
)
set_tests_properties
(
selected_rows_test PROPERTIES TIMEOUT 120
)
endif
()
# New custom op extension mechanism related
# if not deps `layer`, will cause: undefined symbol: _ZN6paddle10imperative7VarBase9name_set_
set
(
PADDLE_CUSTOM_OP_MODULES custom_tensor op_meta_info custom_operator layer
)
cc_library
(
paddle_custom_op_shared
SHARED SRCS custom_operator.cc ../extension/src/tensor.cc ../extension/src/op_meta_info.cc
${
CMAKE_SOURCE_DIR
}
/paddle/fluid/imperative/layer.cc
DEPS
${
PADDLE_CUSTOM_OP_MODULES
}
)
get_property
(
os_dependency_modules GLOBAL PROPERTY OS_DEPENDENCY_MODULES
)
set_target_properties
(
paddle_custom_op_shared PROPERTIES OUTPUT_NAME paddle_custom_op
)
target_link_libraries
(
paddle_custom_op_shared
${
os_dependency_modules
}
)
if
(
LINUX
)
set
(
PADDLE_CUSTOM_OP_SHARED_LIB
${
PADDLE_BINARY_DIR
}
/paddle/fluid/framework/libpaddle_custom_op.so
CACHE INTERNAL
"Paddle custom op lib"
)
endif
()
if
(
WIN32
)
set
(
PADDLE_CUSTOM_OP_SHARED_LIB
${
PADDLE_BINARY_DIR
}
/paddle/fluid/framework/
${
CMAKE_BUILD_TYPE
}
/paddle_custom_op.lib
CACHE INTERNAL
"Paddle custom op lib"
)
set
(
PADDLE_CUSTOM_OP_SHARED_LIB
${
PADDLE_BINARY_DIR
}
/paddle/fluid/framework/
${
CMAKE_BUILD_TYPE
}
/paddle_custom_op.dll
CACHE INTERNAL
"Paddle custom op dll"
)
endif
()
if
(
APPLE
)
set
(
PADDLE_CUSTOM_OP_SHARED_LIB
${
PADDLE_BINARY_DIR
}
/paddle/fluid/framework/paddle_custom_op.dylib
CACHE INTERNAL
"Paddle custom op lib"
)
endif
()
paddle/fluid/framework/custom_operator.cc
浏览文件 @
d3e60959
...
...
@@ -73,6 +73,24 @@ inline bool IsMemberOf(const std::vector<std::string>& vec,
return
std
::
find
(
vec
.
cbegin
(),
vec
.
cend
(),
name
)
!=
vec
.
cend
();
}
std
::
vector
<
std
::
string
>
ParseAttrStr
(
const
std
::
string
&
attr
)
{
auto
split_pos
=
attr
.
find_first_of
(
":"
);
PADDLE_ENFORCE_NE
(
split_pos
,
std
::
string
::
npos
,
platform
::
errors
::
InvalidArgument
(
"Invalid attribute string format. Attribute string "
"format is `<name>:<type>`."
));
std
::
vector
<
std
::
string
>
rlt
;
// 1. name
rlt
.
emplace_back
(
string
::
trim_spaces
(
attr
.
substr
(
0
,
split_pos
)));
// 2. type
rlt
.
emplace_back
(
string
::
trim_spaces
(
attr
.
substr
(
split_pos
+
1
)));
VLOG
(
1
)
<<
"attr name: "
<<
rlt
[
0
]
<<
", attr type str: "
<<
rlt
[
1
];
return
rlt
;
}
}
// namespace detail
////////////////// Kernel Define ////////////////////
...
...
@@ -81,7 +99,8 @@ inline bool IsMemberOf(const std::vector<std::string>& vec,
static
void
RunKernelFunc
(
const
framework
::
ExecutionContext
&
ctx
,
const
paddle
::
KernelFunc
&
func
,
const
std
::
vector
<
std
::
string
>&
inputs
,
const
std
::
vector
<
std
::
string
>&
outputs
)
{
const
std
::
vector
<
std
::
string
>&
outputs
,
const
std
::
vector
<
std
::
string
>&
attrs
)
{
VLOG
(
1
)
<<
"Custom Operator: Start run KernelFunc."
;
std
::
vector
<
paddle
::
Tensor
>
custom_ins
;
for
(
auto
&
in_name
:
inputs
)
{
...
...
@@ -98,10 +117,43 @@ static void RunKernelFunc(const framework::ExecutionContext& ctx,
custom_ins
.
emplace_back
(
custom_in
);
}
std
::
vector
<
boost
::
any
>
attrs
;
std
::
vector
<
boost
::
any
>
custom_attrs
;
for
(
auto
&
attr_str
:
attrs
)
{
auto
attr_name_and_type
=
detail
::
ParseAttrStr
(
attr_str
);
auto
attr_name
=
attr_name_and_type
[
0
];
auto
attr_type_str
=
attr_name_and_type
[
1
];
if
(
attr_type_str
==
"bool"
)
{
custom_attrs
.
emplace_back
(
ctx
.
Attr
<
bool
>
(
attr_name
));
}
else
if
(
attr_type_str
==
"int"
)
{
custom_attrs
.
emplace_back
(
ctx
.
Attr
<
int
>
(
attr_name
));
}
else
if
(
attr_type_str
==
"float"
)
{
custom_attrs
.
emplace_back
(
ctx
.
Attr
<
float
>
(
attr_name
));
}
else
if
(
attr_type_str
==
"int64_t"
)
{
custom_attrs
.
emplace_back
(
ctx
.
Attr
<
int64_t
>
(
attr_name
));
}
else
if
(
attr_type_str
==
"std::string"
)
{
custom_attrs
.
emplace_back
(
ctx
.
Attr
<
std
::
string
>
(
attr_name
));
}
else
if
(
attr_type_str
==
"std::vector<int>"
)
{
custom_attrs
.
emplace_back
(
ctx
.
Attr
<
std
::
vector
<
int
>>
(
attr_name
));
}
else
if
(
attr_type_str
==
"std::vector<float>"
)
{
custom_attrs
.
emplace_back
(
ctx
.
Attr
<
std
::
vector
<
float
>>
(
attr_name
));
}
else
if
(
attr_type_str
==
"std::vector<int64_t>"
)
{
custom_attrs
.
emplace_back
(
ctx
.
Attr
<
std
::
vector
<
int64_t
>>
(
attr_name
));
}
else
if
(
attr_type_str
==
"std::vector<std::string>"
)
{
custom_attrs
.
emplace_back
(
ctx
.
Attr
<
std
::
vector
<
std
::
string
>>
(
attr_name
));
}
else
{
PADDLE_THROW
(
platform
::
errors
::
Unimplemented
(
"Unsupported `%s` type value as custom attribute now. "
"Supported data types include `bool`, `int`, `float`, "
"`int64_t`, `std::string`, `std::vector<int>`, "
"`std::vector<float>`, `std::vector<int64_t>, "
"`std::vector<std::string>`, Please check whether "
"the attribute data type and data type string are matched."
,
attr_type_str
));
}
}
VLOG
(
1
)
<<
"Run ComputeFunc."
;
auto
outs
=
func
(
custom_ins
,
attrs
);
auto
outs
=
func
(
custom_ins
,
custom_
attrs
);
VLOG
(
1
)
<<
"Custom Operator: Share outputs into ExecutionContext."
;
for
(
size_t
i
=
0
;
i
<
outputs
.
size
();
++
i
)
{
...
...
@@ -164,7 +216,51 @@ class CustomOpMaker : public OpProtoAndCheckerMaker {
for
(
auto
&
out_name
:
outputs_
)
{
AddOutput
(
out_name
,
"The output "
+
out_name
+
"of Custom Operator."
);
}
// TODO(chenweihang): support attrs in later PR
for
(
auto
&
attr
:
attrs_
)
{
auto
attr_name_and_type
=
detail
::
ParseAttrStr
(
attr
);
auto
attr_name
=
attr_name_and_type
[
0
];
auto
attr_type_str
=
attr_name_and_type
[
1
];
if
(
attr_type_str
==
"bool"
)
{
AddAttr
<
bool
>
(
attr_name
,
"custom operator bool attribute."
)
.
SetDefault
(
false
);
}
else
if
(
attr_type_str
==
"int"
)
{
AddAttr
<
int
>
(
attr_name
,
"custom operator int attribute."
).
SetDefault
(
1
);
}
else
if
(
attr_type_str
==
"float"
)
{
AddAttr
<
float
>
(
attr_name
,
"custom operator float attribute."
)
.
SetDefault
(
1.0
f
);
}
else
if
(
attr_type_str
==
"int64_t"
)
{
AddAttr
<
int64_t
>
(
attr_name
,
"custom operator int64_t attribute."
)
.
SetDefault
(
1
);
}
else
if
(
attr_type_str
==
"std::string"
)
{
AddAttr
<
std
::
string
>
(
attr_name
,
"custom operator int attribute."
)
.
SetDefault
(
""
);
}
else
if
(
attr_type_str
==
"std::vector<int>"
)
{
AddAttr
<
std
::
vector
<
int
>>
(
attr_name
,
"custom operator std::vector<int> attribute."
)
.
SetDefault
({});
}
else
if
(
attr_type_str
==
"std::vector<float>"
)
{
AddAttr
<
std
::
vector
<
float
>>
(
attr_name
,
"custom operator std::vector<float> attribute."
)
.
SetDefault
({});
}
else
if
(
attr_type_str
==
"std::vector<int64_t>"
)
{
AddAttr
<
std
::
vector
<
int64_t
>>
(
attr_name
,
"custom operator std::vector<int64_t> attribute."
)
.
SetDefault
({});
}
else
if
(
attr_type_str
==
"std::vector<std::string>"
)
{
AddAttr
<
std
::
vector
<
std
::
string
>>
(
attr_name
,
"custom operator std::vector<std::string> attribute."
)
.
SetDefault
({});
}
else
{
PADDLE_THROW
(
platform
::
errors
::
Unimplemented
(
"Unsupported `%s` type value as custom attribute now. "
"Supported data types include `bool`, `int`, `float`, "
"`int64_t`, `std::string`, `std::vector<int>`, "
"`std::vector<float>`, `std::vector<int64_t>, "
"`std::vector<std::string>`, Please check whether "
"the attribute data type and data type string are matched."
,
attr_type_str
));
}
}
AddComment
(
R"DOC(
Custom Operator.
...
...
@@ -227,7 +323,7 @@ class CustomGradOpMaker<OpDesc> : public SingleGradOpMaker<OpDesc> {
VLOG
(
1
)
<<
"Custom Operator: GradOpDescMaker - output: "
<<
out_name
;
grad_op
->
SetOutput
(
out_name
,
this
->
InputGrad
(
detail
::
NoGrad
(
out_name
)));
}
// TODO(chenweihang): support attrs in later PR
grad_op
->
SetAttrMap
(
this
->
Attrs
());
}
private:
...
...
@@ -287,7 +383,7 @@ class CustomGradOpMaker<imperative::OpBase>
VLOG
(
1
)
<<
"Custom Operator: GradOpBaseMaker - output: "
<<
out_name
;
grad_op
->
SetOutput
(
out_name
,
this
->
InputGrad
(
detail
::
NoGrad
(
out_name
)));
}
// TODO(chenweihang): support attrs in later PR
grad_op
->
SetAttrMap
(
this
->
Attrs
());
}
private:
...
...
@@ -303,21 +399,24 @@ void RegisterOperatorKernelWithPlace(const std::string& name,
const
proto
::
VarType
::
Type
type
,
const
PlaceType
&
place
,
const
std
::
vector
<
std
::
string
>&
inputs
,
const
std
::
vector
<
std
::
string
>&
outputs
)
{
const
std
::
vector
<
std
::
string
>&
outputs
,
const
std
::
vector
<
std
::
string
>&
attrs
)
{
OpKernelType
key
(
type
,
CustomTensorUtils
::
ConvertEnumPlaceToInnerPlace
(
place
));
VLOG
(
1
)
<<
"Custom Operator: op kernel key: "
<<
key
;
OperatorWithKernel
::
AllOpKernels
()[
name
][
key
]
=
[
kernel_func
,
inputs
,
outputs
](
const
framework
::
ExecutionContext
&
ctx
)
{
[
kernel_func
,
inputs
,
outputs
,
attrs
](
const
framework
::
ExecutionContext
&
ctx
)
{
VLOG
(
1
)
<<
"Custom Operator: run custom kernel func in lambda."
;
RunKernelFunc
(
ctx
,
kernel_func
,
inputs
,
outputs
);
RunKernelFunc
(
ctx
,
kernel_func
,
inputs
,
outputs
,
attrs
);
};
}
void
RegisterOperatorKernel
(
const
std
::
string
&
name
,
const
paddle
::
KernelFunc
&
kernel_func
,
const
std
::
vector
<
std
::
string
>&
inputs
,
const
std
::
vector
<
std
::
string
>&
outputs
)
{
const
std
::
vector
<
std
::
string
>&
outputs
,
const
std
::
vector
<
std
::
string
>&
attrs
)
{
VLOG
(
1
)
<<
"Custom Operator: op name in kernel: "
<<
name
;
// NOTE [ Dummy Op Kernel Key ]
// TODO(chenweihang): Because execute engine need get device context based
...
...
@@ -325,9 +424,11 @@ void RegisterOperatorKernel(const std::string& name,
// device. But this is not entirely correct, if user only give a cpu kernel,
// but call api in gpu device, it will cause error.
RegisterOperatorKernelWithPlace
(
name
,
kernel_func
,
proto
::
VarType
::
RAW
,
PlaceType
::
kCPU
,
inputs
,
outputs
);
PlaceType
::
kCPU
,
inputs
,
outputs
,
attrs
);
#ifdef PADDLE_WITH_CUDA
RegisterOperatorKernelWithPlace
(
name
,
kernel_func
,
proto
::
VarType
::
RAW
,
PlaceType
::
kGPU
,
inputs
,
outputs
);
PlaceType
::
kGPU
,
inputs
,
outputs
,
attrs
);
#endif
}
void
RegisterOperatorWithMetaInfo
(
...
...
@@ -350,6 +451,8 @@ void RegisterOperatorWithMetaInfo(
<<
string
::
join_strings
(
op_inputs
,
','
);
VLOG
(
1
)
<<
"Custom Operator: forward, op outputs: "
<<
string
::
join_strings
(
op_outputs
,
','
);
VLOG
(
1
)
<<
"Custom Operator: forward, op attrs: "
<<
string
::
join_strings
(
op_attrs
,
','
);
// Op
info
.
creator_
=
[](
const
std
::
string
&
op_name
,
const
VariableNameMap
&
inputs
,
...
...
@@ -426,7 +529,7 @@ void RegisterOperatorWithMetaInfo(
};
// Kernel func
RegisterOperatorKernel
(
op_name
,
kernel_fn
,
op_inputs
,
op_outputs
);
RegisterOperatorKernel
(
op_name
,
kernel_fn
,
op_inputs
,
op_outputs
,
op_attrs
);
// If grad op or double grad op exists
std
::
string
cur_op_name
=
op_name
;
...
...
@@ -436,6 +539,7 @@ void RegisterOperatorWithMetaInfo(
auto
&
grad_op_name
=
OpMetaInfoHelper
::
GetOpName
(
cur_grad_op
);
auto
&
grad_op_inputs
=
OpMetaInfoHelper
::
GetInputs
(
cur_grad_op
);
auto
&
grad_op_outputs
=
OpMetaInfoHelper
::
GetOutputs
(
cur_grad_op
);
auto
&
grad_op_attrs
=
OpMetaInfoHelper
::
GetAttrs
(
cur_grad_op
);
auto
&
grad_kernel_fn
=
OpMetaInfoHelper
::
GetKernelFn
(
cur_grad_op
);
VLOG
(
1
)
<<
"Custom Operator: backward, op name: "
<<
grad_op_name
;
...
...
@@ -489,7 +593,7 @@ void RegisterOperatorWithMetaInfo(
// Kernel func
RegisterOperatorKernel
(
grad_op_name
,
grad_kernel_fn
,
grad_op_inputs
,
grad_op_outputs
);
grad_op_outputs
,
grad_op_attrs
);
// update current info
OpInfoMap
::
Instance
().
Insert
(
cur_op_name
,
info
);
...
...
paddle/fluid/platform/dynload/dynamic_loader.cc
浏览文件 @
d3e60959
...
...
@@ -378,9 +378,6 @@ void* GetOpDsoHandle(const std::string& dso_name) {
#if defined(__APPLE__) || defined(__OSX__)
PADDLE_THROW
(
platform
::
errors
::
Unimplemented
(
"Create custom cpp op outside framework do not support Apple."
));
#elif defined(_WIN32) && defined(PADDLE_WITH_CUDA)
PADDLE_THROW
(
platform
::
errors
::
Unimplemented
(
"Create custom cpp op outside framework do not support Windows."
));
#else
return
GetDsoHandleFromSearchPath
(
FLAGS_op_dir
,
dso_name
);
#endif
...
...
paddle/scripts/paddle_build.bat
浏览文件 @
d3e60959
...
...
@@ -114,23 +114,25 @@ rem ------pre install python requirement----------
where
python
where
pip
pip
install
wheel
--user
pip
install
-r
%work_dir%
\python\requirements.txt
--user
pip
install
-r
%work_dir%
\python\unittest_py\requirements.txt
--user
pip
install
-r
%work_dir%
\python\requirements.txt
--user
if
%ERRORLEVEL%
NEQ
0
(
echo
pip
install
requirements
.txt
failed
!
exit
/b
7
)
rem ------pre install clcache and init config----------
pip
install
clcache
--user
rem pip install clcache --user
pip
uninstall
-y
clcache
:: set USE_CLCACHE to enable clcache
set
USE_CLCACHE
=
1
rem
set USE_CLCACHE=1
:: In some scenarios, CLCACHE_HARDLINK can save one file copy.
set
CLCACHE_HARDLINK
=
1
rem
set CLCACHE_HARDLINK=1
:: If it takes more than 1000s to obtain the right to use the cache, an error will be reported
set
CLCACHE_OBJECT_CACHE_TIMEOUT_MS
=
1000000
rem
set CLCACHE_OBJECT_CACHE_TIMEOUT_MS=1000000
:: set maximum cache size to 20G
clcache
.exe
-M
21474836480
rem
clcache.exe -M 21474836480
rem ------show summary of current environment----------
python
%work_dir%
\tools\summary_env.py
...
...
@@ -194,11 +196,28 @@ set start=%start:~4,10%
@ECHO
ON
if
not
defined
CUDA_TOOLKIT_ROOT_DIR
set
CUDA_TOOLKIT_ROOT_DIR
=
C
:\Program
Files
\NVIDIA
GPU
Computing
Toolkit
\CUDA\v10.0
set
PATH
=
%CUDA_TOOLKIT_ROOT_DIR%
\bin
;
%CUDA_TOOLKIT_ROOT_DIR%
\libnvvp
;
%PATH%
set
CUDA_PATH
=
%CUDA_TOOLKIT_ROOT_DIR%
set
PATH
=
%TENSORRT
_ROOT:/
=
\
%
\lib
;
%CUDA_TOOLKIT_ROOT_DIR%
\bin
;
%CUDA_TOOLKIT_ROOT_DIR%
\libnvvp
;
%PATH%
rem ------set third_party cache dir------
:
clear
third
party
cache
every
once
in
a
while
for
/F
%%
#
in
(
'wmic os get localdatetime
^|
findstr 20'
)
do
set
datetime
=
%%
#
set
day_now
=
%datetime
:
~
6
,
2
%
set
day_before
=-
1
set
/p
day_before
=<
%cache_dir%
\day.txt
if
%day_now%
NEQ
%day_before%
(
echo
%day_now%
>
%cache_dir%
\day.txt
type
%cache_dir%
\day.txt
if
%day_now%
EQU
25
(
rmdir
%cache_dir%
\third_party_GPU/
/s/q
rmdir
%cache_dir%
\third_party/
/s/q
)
if
%day_now%
EQU
10
(
rmdir
%cache_dir%
\third_party_GPU/
/s/q
rmdir
%cache_dir%
\third_party/
/s/q
)
)
if
"
%WITH_TPCACHE%
"
==
"OFF"
(
set
THIRD_PARTY_PATH
=
%work
_dir:\
=
/
%
/build/third
_party
goto
:cmake
_impl
...
...
@@ -263,6 +282,9 @@ echo Build third_party successfully!
set
build_times
=
1
:build
_paddle
:: reset clcache zero stats for collect PR's actual hit rate
rem clcache.exe -z
echo
Build
Paddle
the
%build_times%
time
:
if
"
%WITH_CLCACHE%
"
==
"OFF"
(
msbuild
/m
:
%PARALLEL_PROJECT_COUNT%
/p
:Configuration
=
Release
/verbosity
:minimal
paddle
.sln
...
...
@@ -281,6 +303,11 @@ if %ERRORLEVEL% NEQ 0 (
)
echo
Build
Paddle
successfully
!
echo
0
>
%cache_dir%
\error_code.txt
type
%cache_dir%
\error_code.txt
:: ci will collect clcache hit rate
rem goto :collect_clcache_hits
goto
:eof
...
...
@@ -319,13 +346,14 @@ set /p PADDLE_WHL_FILE_WIN=< whl_file.txt
@ECHO
ON
pip
uninstall
-y
paddlepaddle
pip
uninstall
-y
paddlepaddle
-gpu
pip
install
-U
%PADDLE_WHL_FILE_WIN%
--user
pip
install
%PADDLE_WHL_FILE_WIN%
--user
if
%ERRORLEVEL%
NEQ
0
(
call
paddle_winci
\Scripts\deactivate.bat
2
>
NUL
echo
pip
install
whl
package
failed
!
exit
/b
1
)
set
CUDA_VISIBLE_DEVICES
=
0
python
%work_dir%
\paddle\scripts\installation_validate.py
goto
:eof
...
...
@@ -383,7 +411,7 @@ if "%WITH_GPU%"=="ON" (
:parallel
_test_base_gpu
echo
========================================
echo
Running
GPU
unit
tests
...
echo
Running
GPU
unit
tests
in
parallel
way
...
echo
========================================
setlocal
enabledelayedexpansion
...
...
@@ -451,6 +479,7 @@ goto:eof
echo
========================================
echo
Running
CPU
unit
tests
in
parallel
way
...
echo
========================================
ctest
.exe
-E
"(
%disable_ut_quickly%
)"
-LE
%nightly_label%
--output-on-failure -C
Release
-j
8
--repeat
until
-pass
:4
after
-timeout
:4
goto
:eof
...
...
@@ -622,6 +651,7 @@ taskkill /f /im vctip.exe 2>NUL
taskkill
/f /im
cvtres
.exe
2
>
NUL
taskkill
/f /im
rc
.exe
2
>
NUL
wmic
process
where
name
=
"op_function_generator.exe"
call
terminate
2
>
NUL
wmic
process
where
name
=
"python.exe"
call
terminate
2
>
NUL
taskkill
/f /im
python
.exe
2
>
NUL
echo
0
>
%cache_dir%
\error_code.txt
type
%cache_dir%
\error_code.txt
...
...
python/paddle/fluid/tests/CMakeLists.txt
浏览文件 @
d3e60959
...
...
@@ -9,7 +9,9 @@ endforeach()
add_subdirectory
(
unittests
)
add_subdirectory
(
book
)
if
(
NOT APPLE AND NOT WIN32
)
# TODO: support New Custom OP on Mac
if
(
NOT APPLE
)
add_subdirectory
(
custom_op
)
endif
()
set_tests_properties
(
test_beam_search_decoder PROPERTIES TIMEOUT 120
)
python/paddle/fluid/tests/custom_op/CMakeLists.txt
浏览文件 @
d3e60959
if
(
WITH_GPU
)
# New custom OP can support Windows/Linux now
if
(
WITH_GPU
)
# 'test_custom_relu_op_setup/jit' compile .cc and .cu file
py_test
(
test_custom_relu_op_setup SRCS test_custom_relu_op_setup.py
)
py_test
(
test_custom_relu_op_jit SRCS test_custom_relu_op_jit.py
)
# Compiling shared library will cost some time, but running process is very fast.
set_tests_properties
(
test_custom_relu_op_setup PROPERTIES TIMEOUT 250
)
set_tests_properties
(
test_custom_relu_op_jit PROPERTIES TIMEOUT 180
)
endif
()
py_test
(
test_sysconfig SRCS test_sysconfig.py
)
# 'test_dispatch' compile .cc file
py_test
(
test_dispatch_jit SRCS test_dispatch_jit.py
)
set_tests_properties
(
test_dispatch_jit PROPERTIES TIMEOUT 120
)
py_test
(
test_multi_out_jit SRCS test_multi_out_jit.py
)
set_tests_properties
(
test_multi_out_jit PROPERTIES TIMEOUT 120
)
py_test
(
test_custom_attrs_jit SRCS test_custom_attrs_jit.py
)
set_tests_properties
(
test_custom_attrs_jit PROPERTIES TIMEOUT 120
)
if
(
NOT LINUX
)
return
()
endif
()
# TODO(zhouwei): support test_check_abi and abi check on Windows
py_test
(
test_check_abi SRCS test_check_abi.py
)
# Old custom OP only support Linux, only run on Linux
py_test
(
test_custom_op SRCS test_custom_op.py
)
py_test
(
test_jit_load SRCS test_jit_load.py
)
py_test
(
test_setup_install SRCS test_setup_install.py
)
py_test
(
test_setup_build SRCS test_setup_build.py
)
set_tests_properties
(
test_jit_load PROPERTIES TIMEOUT 180
)
set_tests_properties
(
test_setup_install PROPERTIES TIMEOUT 180
)
set_tests_properties
(
test_setup_build PROPERTIES TIMEOUT 180
)
if
(
WITH_ROCM
)
hip_library
(
relu_op_shared SHARED SRCS relu_op.cc relu_op.cu DEPS paddle_framework_shared
)
elseif
(
WITH_GPU
)
nv_library
(
relu_op_shared SHARED SRCS relu_op.cc relu_op.cu DEPS paddle_framework_shared
)
else
()
cc_library
(
relu_op_shared SHARED SRCS relu_op.cc DEPS paddle_framework_shared
)
...
...
@@ -16,19 +59,3 @@ get_target_property(TARGET_LIBRARIES relu_op_shared LINK_LIBRARIES)
LIST
(
REMOVE_ITEM TARGET_LIBRARIES glog
)
LIST
(
REMOVE_ITEM TARGET_LIBRARIES gflags
)
set_property
(
TARGET relu_op_shared PROPERTY LINK_LIBRARIES
${
TARGET_LIBRARIES
}
)
file
(
GLOB TEST_OPS RELATIVE
"
${
CMAKE_CURRENT_SOURCE_DIR
}
"
"test_*.py"
)
string
(
REPLACE
".py"
""
TEST_OPS
"
${
TEST_OPS
}
"
)
foreach
(
src
${
TEST_OPS
}
)
py_test
(
${
src
}
SRCS
${
src
}
.py
)
endforeach
()
# Compiling .so will cost some time, but running process is very fast.
set_tests_properties
(
test_jit_load PROPERTIES TIMEOUT 180
)
set_tests_properties
(
test_setup_install PROPERTIES TIMEOUT 180
)
set_tests_properties
(
test_setup_build PROPERTIES TIMEOUT 180
)
set_tests_properties
(
test_dispatch PROPERTIES TIMEOUT 180
)
set_tests_properties
(
test_simple_custom_op_setup PROPERTIES TIMEOUT 250
)
set_tests_properties
(
test_simple_custom_op_jit PROPERTIES TIMEOUT 180
)
python/paddle/fluid/tests/custom_op/attr_test_op.cc
0 → 100644
浏览文件 @
d3e60959
// Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#include <cstdlib>
#include <iostream>
#include <vector>
#include "paddle/extension.h"
template
<
typename
data_t
>
void
assign_cpu_kernel
(
const
data_t
*
x_data
,
data_t
*
out_data
,
int64_t
x_numel
)
{
for
(
int
i
=
0
;
i
<
x_numel
;
++
i
)
{
out_data
[
i
]
=
x_data
[
i
];
}
}
std
::
vector
<
paddle
::
Tensor
>
AttrTestForward
(
const
paddle
::
Tensor
&
x
,
bool
bool_attr
,
int
int_attr
,
float
float_attr
,
int64_t
int64_attr
,
std
::
string
str_attr
,
std
::
vector
<
int
>
int_vec_attr
,
std
::
vector
<
float
>
float_vec_attr
,
std
::
vector
<
int64_t
>
int64_vec_attr
,
std
::
vector
<
std
::
string
>
str_vec_attr
)
{
auto
out
=
paddle
::
Tensor
(
paddle
::
PlaceType
::
kCPU
);
out
.
reshape
(
x
.
shape
());
PD_DISPATCH_FLOATING_TYPES
(
x
.
type
(),
"assign_cpu_kernel"
,
([
&
]
{
assign_cpu_kernel
<
data_t
>
(
x
.
data
<
data_t
>
(),
out
.
mutable_data
<
data_t
>
(),
x
.
size
());
}));
// Check attrs value
if
(
bool_attr
!=
true
)
{
throw
std
::
runtime_error
(
"bool_attr value error."
);
}
if
(
int_attr
!=
10
)
{
throw
std
::
runtime_error
(
"int_attr value error."
);
}
if
(
std
::
abs
(
float_attr
-
3.14
)
>
1e-6
)
{
throw
std
::
runtime_error
(
"float_attr value error."
);
}
if
(
int64_attr
!=
10000000000
)
{
throw
std
::
runtime_error
(
"int64_attr value error."
);
}
if
(
str_attr
!=
"StrAttr"
)
{
throw
std
::
runtime_error
(
"str_attr value error."
);
}
if
(
int_vec_attr
.
size
()
!=
3
)
{
throw
std
::
runtime_error
(
"int_vec_attr size error."
);
}
else
{
for
(
auto
&
value
:
int_vec_attr
)
{
if
(
value
!=
10
)
{
throw
std
::
runtime_error
(
"int_vec_attr value error."
);
}
}
}
if
(
float_vec_attr
.
size
()
!=
3
)
{
throw
std
::
runtime_error
(
"float_vec_attr size error."
);
}
else
{
for
(
auto
&
value
:
float_vec_attr
)
{
if
(
std
::
abs
(
value
-
3.14
)
>
1e-6
)
{
throw
std
::
runtime_error
(
"float_vec_attr value error."
);
}
}
}
if
(
int64_vec_attr
.
size
()
!=
3
)
{
throw
std
::
runtime_error
(
"int64_vec_attr size error."
);
}
else
{
for
(
auto
&
value
:
int64_vec_attr
)
{
if
(
value
!=
10000000000
)
{
throw
std
::
runtime_error
(
"int64_vec_attr value error."
);
}
}
}
if
(
str_vec_attr
.
size
()
!=
3
)
{
throw
std
::
runtime_error
(
"str_vec_attr size error."
);
}
else
{
for
(
auto
&
value
:
str_vec_attr
)
{
if
(
value
!=
"StrAttr"
)
{
throw
std
::
runtime_error
(
"str_vec_attr value error."
);
}
}
}
return
{
out
};
}
// The attrs of backward op must be the subset of attrs of forward op
std
::
vector
<
paddle
::
Tensor
>
AttrTestBackward
(
const
paddle
::
Tensor
&
grad_out
,
int
int_attr
,
std
::
vector
<
float
>
float_vec_attr
,
std
::
vector
<
std
::
string
>
str_vec_attr
)
{
auto
grad_x
=
paddle
::
Tensor
(
paddle
::
PlaceType
::
kCPU
);
grad_x
.
reshape
(
grad_out
.
shape
());
PD_DISPATCH_FLOATING_TYPES
(
grad_out
.
type
(),
"assign_cpu_kernel"
,
([
&
]
{
assign_cpu_kernel
<
data_t
>
(
grad_out
.
data
<
data_t
>
(),
grad_x
.
mutable_data
<
data_t
>
(),
grad_out
.
size
());
}));
if
(
int_attr
!=
10
)
{
throw
std
::
runtime_error
(
"int_attr value error."
);
}
if
(
float_vec_attr
.
size
()
!=
3
)
{
throw
std
::
runtime_error
(
"float_vec_attr size error."
);
}
else
{
for
(
auto
&
value
:
float_vec_attr
)
{
if
(
std
::
abs
(
value
-
3.14
)
>
1e-6
)
{
throw
std
::
runtime_error
(
"float_vec_attr value error."
);
}
}
}
if
(
str_vec_attr
.
size
()
!=
3
)
{
throw
std
::
runtime_error
(
"str_vec_attr size error."
);
}
else
{
for
(
auto
&
value
:
str_vec_attr
)
{
if
(
value
!=
"StrAttr"
)
{
throw
std
::
runtime_error
(
"str_vec_attr value error."
);
}
}
}
return
{
grad_x
};
}
std
::
vector
<
std
::
vector
<
int64_t
>>
InferShape
(
std
::
vector
<
int64_t
>
x_shape
)
{
return
{
x_shape
};
}
std
::
vector
<
paddle
::
DataType
>
InferDType
(
paddle
::
DataType
x_dtype
)
{
return
{
x_dtype
};
}
PD_BUILD_OP
(
"attr_test"
)
.
Inputs
({
"X"
})
.
Outputs
({
"Out"
})
.
Attrs
({
"bool_attr: bool"
,
"int_attr: int"
,
"float_attr: float"
,
"int64_attr: int64_t"
,
"str_attr: std::string"
,
"int_vec_attr: std::vector<int>"
,
"float_vec_attr: std::vector<float>"
,
"int64_vec_attr: std::vector<int64_t>"
,
"str_vec_attr: std::vector<std::string>"
})
.
SetKernelFn
(
PD_KERNEL
(
AttrTestForward
))
.
SetInferShapeFn
(
PD_INFER_SHAPE
(
InferShape
))
.
SetInferDtypeFn
(
PD_INFER_DTYPE
(
InferDType
))
.
SetBackwardOp
(
"attr_test_grad"
)
.
Inputs
({
paddle
::
Grad
(
"Out"
)})
.
Outputs
({
paddle
::
Grad
(
"X"
)})
.
Attrs
({
"int_attr: int"
,
"float_vec_attr: std::vector<float>"
,
"str_vec_attr: std::vector<std::string>"
})
.
SetKernelFn
(
PD_KERNEL
(
AttrTestBackward
));
python/paddle/fluid/tests/custom_op/
relu_op_simple
.cc
→
python/paddle/fluid/tests/custom_op/
custom_relu_op
.cc
浏览文件 @
d3e60959
...
...
@@ -17,13 +17,6 @@
#include "paddle/extension.h"
template
<
typename
data_t
>
void
fill_constant_cpu_kernel
(
data_t
*
out_data
,
int64_t
x_numel
,
data_t
value
)
{
for
(
int
i
=
0
;
i
<
x_numel
;
++
i
)
{
out_data
[
i
]
=
value
;
}
}
template
<
typename
data_t
>
void
relu_cpu_forward_kernel
(
const
data_t
*
x_data
,
data_t
*
out_data
,
...
...
@@ -53,21 +46,8 @@ std::vector<paddle::Tensor> relu_cpu_forward(const paddle::Tensor& x) {
relu_cpu_forward_kernel
<
data_t
>
(
x
.
data
<
data_t
>
(),
out
.
mutable_data
<
data_t
>
(
x
.
place
()),
x
.
size
());
}));
// fake multi output: Fake_float64 with float64 dtype
auto
fake_float64
=
paddle
::
Tensor
(
paddle
::
PlaceType
::
kCPU
);
fake_float64
.
reshape
(
x
.
shape
());
fill_constant_cpu_kernel
<
double
>
(
fake_float64
.
mutable_data
<
double
>
(
x
.
place
()),
x
.
size
(),
0.
);
// fake multi output: ZFake_int32 with int32 dtype
auto
zfake_int32
=
paddle
::
Tensor
(
paddle
::
PlaceType
::
kCPU
);
zfake_int32
.
reshape
(
x
.
shape
());
fill_constant_cpu_kernel
<
int32_t
>
(
zfake_int32
.
mutable_data
<
int32_t
>
(
x
.
place
()),
x
.
size
(),
1
);
return
{
out
,
fake_float64
,
zfake_int32
};
return
{
out
};
}
std
::
vector
<
paddle
::
Tensor
>
relu_cpu_backward
(
const
paddle
::
Tensor
&
x
,
...
...
@@ -117,16 +97,16 @@ std::vector<paddle::Tensor> ReluBackward(const paddle::Tensor& x,
}
std
::
vector
<
std
::
vector
<
int64_t
>>
ReluInferShape
(
std
::
vector
<
int64_t
>
x_shape
)
{
return
{
x_shape
,
x_shape
,
x_shape
};
return
{
x_shape
};
}
std
::
vector
<
paddle
::
DataType
>
ReluInferDType
(
paddle
::
DataType
x_dtype
)
{
return
{
x_dtype
,
paddle
::
DataType
::
FLOAT64
,
paddle
::
DataType
::
INT32
};
return
{
x_dtype
};
}
PD_BUILD_OP
(
"
relu2
"
)
PD_BUILD_OP
(
"
custom_relu
"
)
.
Inputs
({
"X"
})
.
Outputs
({
"Out"
,
"Fake_float64"
,
"ZFake_int32"
})
.
Outputs
({
"Out"
})
.
SetKernelFn
(
PD_KERNEL
(
ReluForward
))
.
SetInferShapeFn
(
PD_INFER_SHAPE
(
ReluInferShape
))
.
SetInferDtypeFn
(
PD_INFER_DTYPE
(
ReluInferDType
))
...
...
python/paddle/fluid/tests/custom_op/
relu_op_simple
.cu
→
python/paddle/fluid/tests/custom_op/
custom_relu_op
.cu
浏览文件 @
d3e60959
...
...
@@ -14,16 +14,6 @@
#include "paddle/extension.h"
template
<
typename
data_t
>
__global__
void
fill_constant_cuda_kernel
(
data_t
*
y
,
const
int
num
,
data_t
value
)
{
int
gid
=
blockIdx
.
x
*
blockDim
.
x
+
threadIdx
.
x
;
for
(
int
i
=
gid
;
i
<
num
;
i
+=
blockDim
.
x
*
gridDim
.
x
)
{
y
[
i
]
=
value
;
}
}
template
<
typename
data_t
>
__global__
void
relu_cuda_forward_kernel
(
const
data_t
*
x
,
data_t
*
y
,
...
...
@@ -57,18 +47,8 @@ std::vector<paddle::Tensor> relu_cuda_forward(const paddle::Tensor& x) {
relu_cuda_forward_kernel
<
data_t
><<<
grid
,
block
>>>
(
x
.
data
<
data_t
>
(),
out
.
mutable_data
<
data_t
>
(
x
.
place
()),
numel
);
}));
// fake multi output: Fake_1
auto
fake_float64
=
paddle
::
Tensor
(
paddle
::
PlaceType
::
kGPU
);
fake_float64
.
reshape
(
x
.
shape
());
fill_constant_cuda_kernel
<
double
><<<
grid
,
block
>>>
(
fake_float64
.
mutable_data
<
double
>
(
x
.
place
()),
numel
,
0.
);
// fake multi output: ZFake_1
auto
zfake_int32
=
paddle
::
Tensor
(
paddle
::
PlaceType
::
kGPU
);
zfake_int32
.
reshape
(
x
.
shape
());
fill_constant_cuda_kernel
<
int32_t
><<<
grid
,
block
>>>
(
zfake_int32
.
mutable_data
<
int32_t
>
(
x
.
place
()),
numel
,
1
);
return
{
out
,
fake_float64
,
zfake_int32
};
return
{
out
};
}
std
::
vector
<
paddle
::
Tensor
>
relu_cuda_backward
(
const
paddle
::
Tensor
&
x
,
...
...
python/paddle/fluid/tests/custom_op/
relu_op3_simple
.cc
→
python/paddle/fluid/tests/custom_op/
custom_relu_op_dup
.cc
浏览文件 @
d3e60959
...
...
@@ -29,11 +29,11 @@ std::vector<std::vector<int64_t>> ReluInferShape(std::vector<int64_t> x_shape);
std
::
vector
<
paddle
::
DataType
>
ReluInferDType
(
paddle
::
DataType
x_dtype
);
// Reuse codes in `
relu_op_simple
.cc/cu` to register another custom operator
// Reuse codes in `
custom_relu_op
.cc/cu` to register another custom operator
// to test jointly compile multi operators at same time.
PD_BUILD_OP
(
"
relu3
"
)
PD_BUILD_OP
(
"
custom_relu_dup
"
)
.
Inputs
({
"X"
})
.
Outputs
({
"Out"
,
"Fake_float64"
,
"ZFake_int32"
})
.
Outputs
({
"Out"
})
.
SetKernelFn
(
PD_KERNEL
(
ReluForward
))
.
SetInferShapeFn
(
PD_INFER_SHAPE
(
ReluInferShape
))
.
SetInferDtypeFn
(
PD_INFER_DTYPE
(
ReluInferDType
))
...
...
python/paddle/fluid/tests/custom_op/
setup_install_simple
.py
→
python/paddle/fluid/tests/custom_op/
custom_relu_setup
.py
浏览文件 @
d3e60959
...
...
@@ -17,11 +17,14 @@ import os
from
utils
import
paddle_includes
,
extra_compile_args
from
paddle.utils.cpp_extension
import
CUDAExtension
,
setup
# custom_relu_op_dup.cc is only used for multi ops test,
# not a new op, if you want to test only one op, remove this
# source file
setup
(
name
=
'
simple_setup_relu2
'
,
name
=
'
custom_relu_module_setup
'
,
ext_modules
=
CUDAExtension
(
# test for not specific name here.
sources
=
[
'
relu_op_simple.cc'
,
'relu_op_simple.cu'
,
'relu_op3_simple
.cc'
'
custom_relu_op.cc'
,
'custom_relu_op.cu'
,
'custom_relu_op_dup
.cc'
],
# test for multi ops
include_dirs
=
paddle_includes
,
extra_compile_args
=
extra_compile_args
))
python/paddle/fluid/tests/custom_op/multi_out_test_op.cc
0 → 100644
浏览文件 @
d3e60959
// Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#include <iostream>
#include <vector>
#include "paddle/extension.h"
template
<
typename
data_t
>
void
assign_cpu_kernel
(
const
data_t
*
x_data
,
data_t
*
out_data
,
int64_t
x_numel
)
{
for
(
int
i
=
0
;
i
<
x_numel
;
++
i
)
{
out_data
[
i
]
=
x_data
[
i
];
}
}
template
<
typename
data_t
>
void
fill_constant_cpu_kernel
(
data_t
*
out_data
,
int64_t
x_numel
,
data_t
value
)
{
for
(
int
i
=
0
;
i
<
x_numel
;
++
i
)
{
out_data
[
i
]
=
value
;
}
}
std
::
vector
<
paddle
::
Tensor
>
MultiOutCPU
(
const
paddle
::
Tensor
&
x
)
{
auto
out
=
paddle
::
Tensor
(
paddle
::
PlaceType
::
kCPU
);
out
.
reshape
(
x
.
shape
());
PD_DISPATCH_FLOATING_TYPES
(
x
.
type
(),
"assign_cpu_kernel"
,
([
&
]
{
assign_cpu_kernel
<
data_t
>
(
x
.
data
<
data_t
>
(),
out
.
mutable_data
<
data_t
>
(
x
.
place
()),
x
.
size
());
}));
// fake multi output: Fake_float64 with float64 dtype
auto
fake_float64
=
paddle
::
Tensor
(
paddle
::
PlaceType
::
kCPU
);
fake_float64
.
reshape
(
x
.
shape
());
fill_constant_cpu_kernel
<
double
>
(
fake_float64
.
mutable_data
<
double
>
(
x
.
place
()),
x
.
size
(),
0.
);
// fake multi output: ZFake_int32 with int32 dtype
auto
zfake_int32
=
paddle
::
Tensor
(
paddle
::
PlaceType
::
kCPU
);
zfake_int32
.
reshape
(
x
.
shape
());
fill_constant_cpu_kernel
<
int32_t
>
(
zfake_int32
.
mutable_data
<
int32_t
>
(
x
.
place
()),
x
.
size
(),
1
);
return
{
out
,
fake_float64
,
zfake_int32
};
}
std
::
vector
<
std
::
vector
<
int64_t
>>
InferShape
(
std
::
vector
<
int64_t
>
x_shape
)
{
return
{
x_shape
,
x_shape
,
x_shape
};
}
std
::
vector
<
paddle
::
DataType
>
InferDtype
(
paddle
::
DataType
x_dtype
)
{
return
{
x_dtype
,
paddle
::
DataType
::
FLOAT64
,
paddle
::
DataType
::
INT32
};
}
PD_BUILD_OP
(
"multi_out"
)
.
Inputs
({
"X"
})
.
Outputs
({
"Out"
,
"Fake_float64"
,
"ZFake_int32"
})
.
SetKernelFn
(
PD_KERNEL
(
MultiOutCPU
))
.
SetInferShapeFn
(
PD_INFER_SHAPE
(
InferShape
))
.
SetInferDtypeFn
(
PD_INFER_DTYPE
(
InferDtype
));
python/paddle/fluid/tests/custom_op/test_custom_attrs_jit.py
0 → 100644
浏览文件 @
d3e60959
# Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
import
os
import
unittest
import
numpy
as
np
import
paddle
from
paddle.utils.cpp_extension
import
load
,
get_build_directory
from
utils
import
paddle_includes
,
extra_compile_args
from
paddle.utils.cpp_extension.extension_utils
import
run_cmd
# Because Windows don't use docker, the shared lib already exists in the
# cache dir, it will not be compiled again unless the shared lib is removed.
file
=
'{}
\\
custom_attrs_jit
\\
custom_attrs_jit.pyd'
.
format
(
get_build_directory
(
))
if
os
.
name
==
'nt'
and
os
.
path
.
isfile
(
file
):
cmd
=
'del {}'
.
format
(
file
)
run_cmd
(
cmd
,
True
)
# Compile and load custom op Just-In-Time.
custom_attrs
=
load
(
name
=
'custom_attrs_jit'
,
sources
=
[
'attr_test_op.cc'
],
extra_include_paths
=
paddle_includes
,
# add for Coverage CI
extra_cxx_cflags
=
extra_compile_args
,
# add for Coverage CI
verbose
=
True
)
class
TestJitCustomAttrs
(
unittest
.
TestCase
):
def
test_attr_value
(
self
):
paddle
.
set_device
(
'cpu'
)
# prepare test value
bool_attr
=
True
int_attr
=
10
float_attr
=
3.14
int64_attr
=
10000000000
str_attr
=
"StrAttr"
int_vec_attr
=
[
10
,
10
,
10
]
float_vec_attr
=
[
3.14
,
3.14
,
3.14
]
int64_vec_attr
=
[
10000000000
,
10000000000
,
10000000000
]
str_vec_attr
=
[
"StrAttr"
,
"StrAttr"
,
"StrAttr"
]
x
=
paddle
.
ones
([
2
,
2
],
dtype
=
'float32'
)
x
.
stop_gradient
=
False
out
=
custom_attrs
.
attr_test
(
x
,
bool_attr
,
int_attr
,
float_attr
,
int64_attr
,
str_attr
,
int_vec_attr
,
float_vec_attr
,
int64_vec_attr
,
str_vec_attr
)
out
.
stop_gradient
=
False
out
.
backward
()
self
.
assertTrue
(
np
.
array_equal
(
x
.
numpy
(),
out
.
numpy
()))
if
__name__
==
'__main__'
:
unittest
.
main
()
python/paddle/fluid/tests/custom_op/test_custom_relu_op_jit.py
0 → 100644
浏览文件 @
d3e60959
# Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
import
os
import
subprocess
import
unittest
import
paddle
import
numpy
as
np
from
paddle.utils.cpp_extension
import
load
,
get_build_directory
from
paddle.utils.cpp_extension.extension_utils
import
run_cmd
from
utils
import
paddle_includes
,
extra_compile_args
from
test_custom_relu_op_setup
import
custom_relu_dynamic
,
custom_relu_static
# Because Windows don't use docker, the shared lib already exists in the
# cache dir, it will not be compiled again unless the shared lib is removed.
file
=
'{}
\\
custom_relu_module_jit
\\
custom_relu_module_jit.pyd'
.
format
(
get_build_directory
())
if
os
.
name
==
'nt'
and
os
.
path
.
isfile
(
file
):
cmd
=
'del {}'
.
format
(
file
)
run_cmd
(
cmd
,
True
)
# Compile and load custom op Just-In-Time.
# custom_relu_op_dup.cc is only used for multi ops test,
# not a new op, if you want to test only one op, remove this
# source file
custom_module
=
load
(
name
=
'custom_relu_module_jit'
,
sources
=
[
'custom_relu_op.cc'
,
'custom_relu_op.cu'
,
'custom_relu_op_dup.cc'
],
extra_include_paths
=
paddle_includes
,
# add for Coverage CI
extra_cxx_cflags
=
extra_compile_args
,
# add for Coverage CI
extra_cuda_cflags
=
extra_compile_args
,
# add for Coverage CI
verbose
=
True
)
class
TestJITLoad
(
unittest
.
TestCase
):
def
setUp
(
self
):
self
.
custom_ops
=
[
custom_module
.
custom_relu
,
custom_module
.
custom_relu_dup
]
self
.
dtypes
=
[
'float32'
,
'float64'
]
self
.
devices
=
[
'cpu'
,
'gpu'
]
def
test_static
(
self
):
for
device
in
self
.
devices
:
for
dtype
in
self
.
dtypes
:
x
=
np
.
random
.
uniform
(
-
1
,
1
,
[
4
,
8
]).
astype
(
dtype
)
for
custom_op
in
self
.
custom_ops
:
out
=
custom_relu_static
(
custom_op
,
device
,
dtype
,
x
)
pd_out
=
custom_relu_static
(
custom_op
,
device
,
dtype
,
x
,
False
)
self
.
assertTrue
(
np
.
array_equal
(
out
,
pd_out
),
"custom op out: {},
\n
paddle api out: {}"
.
format
(
out
,
pd_out
))
def
test_dynamic
(
self
):
for
device
in
self
.
devices
:
for
dtype
in
self
.
dtypes
:
x
=
np
.
random
.
uniform
(
-
1
,
1
,
[
4
,
8
]).
astype
(
dtype
)
for
custom_op
in
self
.
custom_ops
:
out
,
x_grad
=
custom_relu_dynamic
(
custom_op
,
device
,
dtype
,
x
)
pd_out
,
pd_x_grad
=
custom_relu_dynamic
(
custom_op
,
device
,
dtype
,
x
,
False
)
self
.
assertTrue
(
np
.
array_equal
(
out
,
pd_out
),
"custom op out: {},
\n
paddle api out: {}"
.
format
(
out
,
pd_out
))
self
.
assertTrue
(
np
.
array_equal
(
x_grad
,
pd_x_grad
),
"custom op x grad: {},
\n
paddle api x grad: {}"
.
format
(
x_grad
,
pd_x_grad
))
if
__name__
==
'__main__'
:
unittest
.
main
()
python/paddle/fluid/tests/custom_op/test_
simple_custom
_op_setup.py
→
python/paddle/fluid/tests/custom_op/test_
custom_relu
_op_setup.py
浏览文件 @
d3e60959
...
...
@@ -23,13 +23,13 @@ import numpy as np
from
paddle.utils.cpp_extension.extension_utils
import
run_cmd
def
relu2
_dynamic
(
func
,
device
,
dtype
,
np_x
,
use_func
=
True
):
def
custom_relu
_dynamic
(
func
,
device
,
dtype
,
np_x
,
use_func
=
True
):
paddle
.
set_device
(
device
)
t
=
paddle
.
to_tensor
(
np_x
)
t
.
stop_gradient
=
False
out
=
func
(
t
)
[
0
]
if
use_func
else
paddle
.
nn
.
functional
.
relu
(
t
)
out
=
func
(
t
)
if
use_func
else
paddle
.
nn
.
functional
.
relu
(
t
)
out
.
stop_gradient
=
False
out
.
backward
()
...
...
@@ -37,7 +37,12 @@ def relu2_dynamic(func, device, dtype, np_x, use_func=True):
return
out
.
numpy
(),
t
.
grad
def
relu2_static
(
func
,
device
,
dtype
,
np_x
,
use_func
=
True
):
def
custom_relu_static
(
func
,
device
,
dtype
,
np_x
,
use_func
=
True
,
test_infer
=
False
):
paddle
.
enable_static
()
paddle
.
set_device
(
device
)
...
...
@@ -45,8 +50,7 @@ def relu2_static(func, device, dtype, np_x, use_func=True):
with
static
.
program_guard
(
static
.
Program
()):
x
=
static
.
data
(
name
=
'X'
,
shape
=
[
None
,
8
],
dtype
=
dtype
)
x
.
stop_gradient
=
False
# out, fake_float64, fake_int32
out
=
func
(
x
)[
0
]
if
use_func
else
paddle
.
nn
.
functional
.
relu
(
x
)
out
=
func
(
x
)
if
use_func
else
paddle
.
nn
.
functional
.
relu
(
x
)
static
.
append_backward
(
out
)
exe
=
static
.
Executor
()
...
...
@@ -60,7 +64,7 @@ def relu2_static(func, device, dtype, np_x, use_func=True):
return
out_v
def
relu2
_static_pe
(
func
,
device
,
dtype
,
np_x
,
use_func
=
True
):
def
custom_relu
_static_pe
(
func
,
device
,
dtype
,
np_x
,
use_func
=
True
):
paddle
.
enable_static
()
paddle
.
set_device
(
device
)
...
...
@@ -69,7 +73,7 @@ def relu2_static_pe(func, device, dtype, np_x, use_func=True):
with
static
.
program_guard
(
static
.
Program
()):
x
=
static
.
data
(
name
=
'X'
,
shape
=
[
None
,
8
],
dtype
=
dtype
)
x
.
stop_gradient
=
False
out
=
func
(
x
)
[
0
]
if
use_func
else
paddle
.
nn
.
functional
.
relu
(
x
)
out
=
func
(
x
)
if
use_func
else
paddle
.
nn
.
functional
.
relu
(
x
)
static
.
append_backward
(
out
)
exe
=
static
.
Executor
()
...
...
@@ -87,11 +91,58 @@ def relu2_static_pe(func, device, dtype, np_x, use_func=True):
return
out_v
def
custom_relu_static_inference
(
func
,
device
,
np_data
,
np_label
,
path_prefix
):
paddle
.
set_device
(
device
)
with
static
.
scope_guard
(
static
.
Scope
()):
with
static
.
program_guard
(
static
.
Program
()):
# simple module
data
=
static
.
data
(
name
=
'data'
,
shape
=
[
None
,
1
,
28
,
28
],
dtype
=
'float32'
)
label
=
static
.
data
(
name
=
'label'
,
shape
=
[
None
,
1
],
dtype
=
'int64'
)
hidden
=
static
.
nn
.
fc
(
data
,
size
=
128
)
hidden
=
func
(
hidden
)
hidden
=
static
.
nn
.
fc
(
hidden
,
size
=
128
)
predict
=
static
.
nn
.
fc
(
hidden
,
size
=
10
,
activation
=
'softmax'
)
loss
=
paddle
.
nn
.
functional
.
cross_entropy
(
input
=
hidden
,
label
=
label
)
avg_loss
=
paddle
.
mean
(
loss
)
opt
=
paddle
.
optimizer
.
SGD
(
learning_rate
=
0.1
)
opt
.
minimize
(
avg_loss
)
# run start up model
exe
=
static
.
Executor
()
exe
.
run
(
static
.
default_startup_program
())
# train
for
i
in
range
(
4
):
avg_loss_v
=
exe
.
run
(
static
.
default_main_program
(),
feed
=
{
'data'
:
np_data
,
'label'
:
np_label
},
fetch_list
=
[
avg_loss
])
# save inference model
static
.
save_inference_model
(
path_prefix
,
[
data
],
[
predict
],
exe
)
# get train predict value
predict_v
=
exe
.
run
(
static
.
default_main_program
(),
feed
=
{
'data'
:
np_data
,
'label'
:
np_label
},
fetch_list
=
[
predict
])
return
predict_v
class
TestNewCustomOpSetUpInstall
(
unittest
.
TestCase
):
def
setUp
(
self
):
cur_dir
=
os
.
path
.
dirname
(
os
.
path
.
abspath
(
__file__
))
# compile, install the custom op egg into site-packages under background
cmd
=
'cd {} && python setup_install_simple.py install'
.
format
(
cur_dir
)
if
os
.
name
==
'nt'
:
cmd
=
'cd /d {} && python custom_relu_setup.py install'
.
format
(
cur_dir
)
else
:
cmd
=
'cd {} && python custom_relu_setup.py install'
.
format
(
cur_dir
)
run_cmd
(
cmd
)
# NOTE(Aurelius84): Normally, it's no need to add following codes for users.
...
...
@@ -99,28 +150,42 @@ class TestNewCustomOpSetUpInstall(unittest.TestCase):
# sys.path has been updated. So we update it manually.
# See: https://stackoverflow.com/questions/56974185/import-runtime-installed-module-using-pip-in-python-3
site_dir
=
site
.
getsitepackages
()[
0
]
if
os
.
name
==
'nt'
:
# NOTE(zhouwei25): getsitepackages on windows will return a list: [python install dir, site packages dir]
site_dir
=
site
.
getsitepackages
()[
1
]
else
:
site_dir
=
site
.
getsitepackages
()[
0
]
custom_egg_path
=
[
x
for
x
in
os
.
listdir
(
site_dir
)
if
'
simple_setup_relu2
'
in
x
x
for
x
in
os
.
listdir
(
site_dir
)
if
'
custom_relu_module_setup
'
in
x
]
assert
len
(
custom_egg_path
)
==
1
,
"Matched egg number is %d."
%
len
(
custom_egg_path
)
sys
.
path
.
append
(
os
.
path
.
join
(
site_dir
,
custom_egg_path
[
0
]))
# usage: import the package directly
import
simple_setup_relu2
self
.
custom_ops
=
[
simple_setup_relu2
.
relu2
,
simple_setup_relu2
.
relu3
]
import
custom_relu_module_setup
# `custom_relu_dup` is same as `custom_relu_dup`
self
.
custom_ops
=
[
custom_relu_module_setup
.
custom_relu
,
custom_relu_module_setup
.
custom_relu_dup
]
self
.
dtypes
=
[
'float32'
,
'float64'
]
self
.
devices
=
[
'cpu'
,
'gpu'
]
# config seed
SEED
=
2021
paddle
.
seed
(
SEED
)
paddle
.
framework
.
random
.
_manual_program_seed
(
SEED
)
def
test_static
(
self
):
for
device
in
self
.
devices
:
for
dtype
in
self
.
dtypes
:
x
=
np
.
random
.
uniform
(
-
1
,
1
,
[
4
,
8
]).
astype
(
dtype
)
for
custom_op
in
self
.
custom_ops
:
out
=
relu2_static
(
custom_op
,
device
,
dtype
,
x
)
pd_out
=
relu2_static
(
custom_op
,
device
,
dtype
,
x
,
False
)
out
=
custom_relu_static
(
custom_op
,
device
,
dtype
,
x
)
pd_out
=
custom_relu_static
(
custom_op
,
device
,
dtype
,
x
,
False
)
self
.
assertTrue
(
np
.
array_equal
(
out
,
pd_out
),
"custom op out: {},
\n
paddle api out: {}"
.
format
(
...
...
@@ -131,8 +196,9 @@ class TestNewCustomOpSetUpInstall(unittest.TestCase):
for
dtype
in
self
.
dtypes
:
x
=
np
.
random
.
uniform
(
-
1
,
1
,
[
4
,
8
]).
astype
(
dtype
)
for
custom_op
in
self
.
custom_ops
:
out
=
relu2_static_pe
(
custom_op
,
device
,
dtype
,
x
)
pd_out
=
relu2_static_pe
(
custom_op
,
device
,
dtype
,
x
,
False
)
out
=
custom_relu_static_pe
(
custom_op
,
device
,
dtype
,
x
)
pd_out
=
custom_relu_static_pe
(
custom_op
,
device
,
dtype
,
x
,
False
)
self
.
assertTrue
(
np
.
array_equal
(
out
,
pd_out
),
"custom op out: {},
\n
paddle api out: {}"
.
format
(
...
...
@@ -143,9 +209,10 @@ class TestNewCustomOpSetUpInstall(unittest.TestCase):
for
dtype
in
self
.
dtypes
:
x
=
np
.
random
.
uniform
(
-
1
,
1
,
[
4
,
8
]).
astype
(
dtype
)
for
custom_op
in
self
.
custom_ops
:
out
,
x_grad
=
relu2_dynamic
(
custom_op
,
device
,
dtype
,
x
)
pd_out
,
pd_x_grad
=
relu2_dynamic
(
custom_op
,
device
,
dtype
,
x
,
False
)
out
,
x_grad
=
custom_relu_dynamic
(
custom_op
,
device
,
dtype
,
x
)
pd_out
,
pd_x_grad
=
custom_relu_dynamic
(
custom_op
,
device
,
dtype
,
x
,
False
)
self
.
assertTrue
(
np
.
array_equal
(
out
,
pd_out
),
"custom op out: {},
\n
paddle api out: {}"
.
format
(
...
...
@@ -155,6 +222,28 @@ class TestNewCustomOpSetUpInstall(unittest.TestCase):
"custom op x grad: {},
\n
paddle api x grad: {}"
.
format
(
x_grad
,
pd_x_grad
))
def
test_static_save_and_load_inference_model
(
self
):
paddle
.
enable_static
()
np_data
=
np
.
random
.
random
((
1
,
1
,
28
,
28
)).
astype
(
"float32"
)
np_label
=
np
.
random
.
random
((
1
,
1
)).
astype
(
"int64"
)
path_prefix
=
"custom_op_inference/custom_relu"
for
device
in
self
.
devices
:
predict
=
custom_relu_static_inference
(
self
.
custom_ops
[
0
],
device
,
np_data
,
np_label
,
path_prefix
)
# load inference model
with
static
.
scope_guard
(
static
.
Scope
()):
exe
=
static
.
Executor
()
[
inference_program
,
feed_target_names
,
fetch_targets
]
=
static
.
load_inference_model
(
path_prefix
,
exe
)
predict_infer
=
exe
.
run
(
inference_program
,
feed
=
{
feed_target_names
[
0
]:
np_data
},
fetch_list
=
fetch_targets
)
self
.
assertTrue
(
np
.
array_equal
(
predict
,
predict_infer
),
"custom op predict: {},
\n
custom op infer predict: {}"
.
format
(
predict
,
predict_infer
))
paddle
.
disable_static
()
if
__name__
==
'__main__'
:
unittest
.
main
()
python/paddle/fluid/tests/custom_op/test_dispatch.py
→
python/paddle/fluid/tests/custom_op/test_dispatch
_jit
.py
浏览文件 @
d3e60959
...
...
@@ -16,14 +16,23 @@ import os
import
unittest
import
paddle
import
numpy
as
np
from
paddle.utils.cpp_extension
import
load
from
paddle.utils.cpp_extension
import
load
,
get_build_directory
from
utils
import
paddle_includes
,
extra_compile_args
from
paddle.utils.cpp_extension.extension_utils
import
run_cmd
# Because Windows don't use docker, the shared lib already exists in the
# cache dir, it will not be compiled again unless the shared lib is removed.
file
=
'{}
\\
dispatch_op
\\
dispatch_op.pyd'
.
format
(
get_build_directory
())
if
os
.
name
==
'nt'
and
os
.
path
.
isfile
(
file
):
cmd
=
'del {}'
.
format
(
file
)
run_cmd
(
cmd
,
True
)
dispatch_op
=
load
(
name
=
'dispatch_op'
,
sources
=
[
'dispatch_test_op.cc'
],
extra_include_paths
=
paddle_includes
,
# add for Coverage CI
extra_cflags
=
extra_compile_args
)
# add for Coverage CI
extra_cxx_cflags
=
extra_compile_args
,
verbose
=
True
)
class
TestJitDispatch
(
unittest
.
TestCase
):
...
...
python/paddle/fluid/tests/custom_op/test_jit_load.py
浏览文件 @
d3e60959
...
...
@@ -29,7 +29,8 @@ custom_module = load(
sources
=
[
'relu_op.cc'
,
'relu_op.cu'
,
'relu_op3.cc'
,
'relu_op3.cu'
],
interpreter
=
'python'
,
# add for unittest
extra_include_paths
=
paddle_includes
,
# add for Coverage CI
extra_cflags
=
extra_compile_args
,
# add for Coverage CI
extra_cxx_cflags
=
extra_compile_args
,
# add for Coverage CI,
extra_cuda_cflags
=
extra_compile_args
,
# add for split cpp/cuda flags
verbose
=
True
# add for unittest
)
...
...
python/paddle/fluid/tests/custom_op/test_
simple_custom_op
_jit.py
→
python/paddle/fluid/tests/custom_op/test_
multi_out
_jit.py
浏览文件 @
d3e60959
...
...
@@ -13,81 +13,54 @@
# limitations under the License.
import
os
import
subprocess
import
unittest
import
paddle
import
numpy
as
np
import
paddle
from
paddle.utils.cpp_extension
import
load
from
paddle.utils.cpp_extension
import
load
,
get_build_directory
from
paddle.utils.cpp_extension.extension_utils
import
run_cmd
from
utils
import
paddle_includes
,
extra_compile_args
from
test_simple_custom_op_setup
import
relu2_dynamic
,
relu2_static
# Because Windows don't use docker, the shared lib already exists in the
# cache dir, it will not be compiled again unless the shared lib is removed.
file
=
'{}
\\
multi_out_jit
\\
multi_out_jit.pyd'
.
format
(
get_build_directory
())
if
os
.
name
==
'nt'
and
os
.
path
.
isfile
(
file
):
cmd
=
'del {}'
.
format
(
file
)
run_cmd
(
cmd
,
True
)
# Compile and load custom op Just-In-Time.
custom
_module
=
load
(
name
=
'
simple_jit_relu2
'
,
sources
=
[
'
relu_op_simple.cc'
,
'relu_op_simple.cu'
,
'relu_op3_simple
.cc'
],
multi_out
_module
=
load
(
name
=
'
multi_out_jit
'
,
sources
=
[
'
multi_out_test_op
.cc'
],
extra_include_paths
=
paddle_includes
,
# add for Coverage CI
extra_cflags
=
extra_compile_args
)
# add for Coverage CI
class
TestJITLoad
(
unittest
.
TestCase
):
def
setUp
(
self
):
self
.
custom_ops
=
[
custom_module
.
relu2
,
custom_module
.
relu3
]
self
.
dtypes
=
[
'float32'
,
'float64'
]
self
.
devices
=
[
'cpu'
,
'gpu'
]
def
test_static
(
self
):
for
device
in
self
.
devices
:
for
dtype
in
self
.
dtypes
:
x
=
np
.
random
.
uniform
(
-
1
,
1
,
[
4
,
8
]).
astype
(
dtype
)
for
custom_op
in
self
.
custom_ops
:
out
=
relu2_static
(
custom_op
,
device
,
dtype
,
x
)
pd_out
=
relu2_static
(
custom_op
,
device
,
dtype
,
x
,
False
)
self
.
assertTrue
(
np
.
array_equal
(
out
,
pd_out
),
"custom op out: {},
\n
paddle api out: {}"
.
format
(
out
,
pd_out
))
def
test_dynamic
(
self
):
for
device
in
self
.
devices
:
for
dtype
in
self
.
dtypes
:
x
=
np
.
random
.
uniform
(
-
1
,
1
,
[
4
,
8
]).
astype
(
dtype
)
for
custom_op
in
self
.
custom_ops
:
out
,
x_grad
=
relu2_dynamic
(
custom_op
,
device
,
dtype
,
x
)
pd_out
,
pd_x_grad
=
relu2_dynamic
(
custom_op
,
device
,
dtype
,
x
,
False
)
self
.
assertTrue
(
np
.
array_equal
(
out
,
pd_out
),
"custom op out: {},
\n
paddle api out: {}"
.
format
(
out
,
pd_out
))
self
.
assertTrue
(
np
.
array_equal
(
x_grad
,
pd_x_grad
),
"custom op x grad: {},
\n
paddle api x grad: {}"
.
format
(
x_grad
,
pd_x_grad
))
extra_cxx_cflags
=
extra_compile_args
,
# add for Coverage CI
verbose
=
True
)
class
TestMultiOutputDtypes
(
unittest
.
TestCase
):
def
setUp
(
self
):
self
.
custom_op
=
custom_module
.
relu2
self
.
custom_op
=
multi_out_module
.
multi_out
self
.
dtypes
=
[
'float32'
,
'float64'
]
self
.
devices
=
[
'cpu'
,
'gpu'
]
self
.
devices
=
[
'cpu'
]
def
test_static
(
self
):
paddle
.
enable_static
()
for
device
in
self
.
devices
:
for
dtype
in
self
.
dtypes
:
res
=
self
.
run_static
(
device
,
dtype
)
self
.
check_multi_outputs
(
res
)
paddle
.
disable_static
()
def
run_static
(
self
,
device
,
dtype
):
paddle
.
set_device
(
device
)
x_data
=
np
.
random
.
uniform
(
-
1
,
1
,
[
4
,
8
]).
astype
(
dtype
)
def
test_dynamic
(
self
):
for
device
in
self
.
devices
:
for
dtype
in
self
.
dtypes
:
paddle
.
set_device
(
device
)
x_data
=
np
.
random
.
uniform
(
-
1
,
1
,
[
4
,
8
]).
astype
(
dtype
)
x
=
paddle
.
to_tensor
(
x_data
)
with
paddle
.
static
.
scope_guard
(
paddle
.
static
.
Scope
()):
with
paddle
.
static
.
program_guard
(
paddle
.
static
.
Program
()):
x
=
paddle
.
static
.
data
(
name
=
'X'
,
shape
=
[
None
,
8
],
dtype
=
dtype
)
outs
=
self
.
custom_op
(
x
)
self
.
assertTrue
(
len
(
outs
)
==
3
)
self
.
check_multi_outputs
(
outs
,
True
)
exe
=
paddle
.
static
.
Executor
()
exe
.
run
(
paddle
.
static
.
default_startup_program
())
res
=
exe
.
run
(
paddle
.
static
.
default_main_program
(),
feed
=
{
'X'
:
x_data
},
fetch_list
=
outs
)
return
res
def
check_multi_outputs
(
self
,
outs
,
is_dynamic
=
False
):
out
,
zero_float64
,
one_int32
=
outs
...
...
@@ -103,22 +76,24 @@ class TestMultiOutputDtypes(unittest.TestCase):
self
.
assertTrue
(
np
.
array_equal
(
one_int32
,
np
.
ones
([
4
,
8
]).
astype
(
'int32'
)))
def
run_static
(
self
,
device
,
dtype
):
paddle
.
set_device
(
device
)
x_data
=
np
.
random
.
uniform
(
-
1
,
1
,
[
4
,
8
]).
astype
(
dtype
)
def
test_static
(
self
):
paddle
.
enable_static
()
for
device
in
self
.
devices
:
for
dtype
in
self
.
dtypes
:
res
=
self
.
run_static
(
device
,
dtype
)
self
.
check_multi_outputs
(
res
)
paddle
.
disable_static
()
with
paddle
.
static
.
scope_guard
(
paddle
.
static
.
Scope
()):
with
paddle
.
static
.
program_guard
(
paddle
.
static
.
Program
()):
x
=
paddle
.
static
.
data
(
name
=
'X'
,
shape
=
[
None
,
8
],
dtype
=
dtype
)
def
test_dynamic
(
self
):
for
device
in
self
.
devices
:
for
dtype
in
self
.
dtypes
:
paddle
.
set_device
(
device
)
x_data
=
np
.
random
.
uniform
(
-
1
,
1
,
[
4
,
8
]).
astype
(
dtype
)
x
=
paddle
.
to_tensor
(
x_data
)
outs
=
self
.
custom_op
(
x
)
exe
=
paddle
.
static
.
Executor
()
exe
.
run
(
paddle
.
static
.
default_startup_program
())
res
=
exe
.
run
(
paddle
.
static
.
default_main_program
(),
feed
=
{
'X'
:
x_data
},
fetch_list
=
outs
)
return
res
self
.
assertTrue
(
len
(
outs
)
==
3
)
self
.
check_multi_outputs
(
outs
,
True
)
if
__name__
==
'__main__'
:
...
...
python/paddle/fluid/tests/custom_op/utils.py
浏览文件 @
d3e60959
...
...
@@ -23,8 +23,8 @@ site_packages_path = get_python_lib()
# paddle include directory. Because the following path is generated after insalling
# PaddlePaddle whl. So here we specific `include_dirs` to avoid errors in CI.
paddle_includes
=
[
os
.
path
.
join
(
site_packages_path
,
'paddle
/
include'
),
os
.
path
.
join
(
site_packages_path
,
'paddle
/include/
third_party'
)
os
.
path
.
join
(
site_packages_path
,
'paddle
'
,
'
include'
),
os
.
path
.
join
(
site_packages_path
,
'paddle
'
,
'include'
,
'
third_party'
)
]
# TODO(Aurelius84): Memory layout is different if build paddle with PADDLE_WITH_MKLDNN=ON,
...
...
python/paddle/utils/cpp_extension/__init__.py
浏览文件 @
d3e60959
...
...
@@ -25,6 +25,5 @@ from . import cpp_extension
from
.
import
extension_utils
__all__
=
[
'CppExtension'
,
'CUDAExtension'
,
'BuildExtension'
,
'load'
,
'setup'
,
'get_build_directory'
'CppExtension'
,
'CUDAExtension'
,
'load'
,
'setup'
,
'get_build_directory'
]
python/paddle/utils/cpp_extension/cpp_extension.py
浏览文件 @
d3e60959
此差异已折叠。
点击以展开。
python/paddle/utils/cpp_extension/extension_utils.py
浏览文件 @
d3e60959
...
...
@@ -16,7 +16,6 @@ import os
import
re
import
six
import
sys
import
copy
import
glob
import
logging
import
collections
...
...
@@ -38,11 +37,17 @@ logger = logging.getLogger("utils.cpp_extension")
OS_NAME
=
sys
.
platform
IS_WINDOWS
=
OS_NAME
.
startswith
(
'win'
)
NVCC_COMPILE_FLAGS
=
[
'-ccbin'
,
'cc'
,
'-DPADDLE_WITH_CUDA'
,
'-DEIGEN_USE_GPU'
,
'-DPADDLE_USE_DSO'
,
'-Xcompiler'
,
'-fPIC'
,
'-w'
,
'--expt-relaxed-constexpr'
,
'-O3'
,
'-DNVCC'
MSVC_COMPILE_FLAGS
=
[
'/MT'
,
'/wd4819'
,
'/wd4251'
,
'/wd4244'
,
'/wd4267'
,
'/wd4275'
,
'/wd4018'
,
'/wd4190'
,
'/EHsc'
,
'/w'
,
'/DGOOGLE_GLOG_DLL_DECL'
,
'/DBOOST_HAS_STATIC_ASSERT'
,
'/DNDEBUG'
,
'/DPADDLE_USE_DSO'
]
MSVC_LINK_FLAGS
=
[
'/MACHINE:X64'
,
'paddle_framework.lib'
]
COMMON_NVCC_FLAGS
=
[
'-DPADDLE_WITH_CUDA'
,
'-DEIGEN_USE_GPU'
,
'-O3'
]
GCC_MINI_VERSION
=
(
5
,
4
,
0
)
# Give warning if using wrong compiler
WRONG_COMPILER_WARNING
=
'''
...
...
@@ -80,9 +85,17 @@ information
'''
USING_NEW_CUSTOM_OP_LOAD_METHOD
=
True
DEFAULT_OP_ATTR_NAMES
=
[
core
.
op_proto_and_checker_maker
.
kOpRoleAttrName
(),
core
.
op_proto_and_checker_maker
.
kOpRoleVarAttrName
(),
core
.
op_proto_and_checker_maker
.
kOpNameScopeAttrName
(),
core
.
op_proto_and_checker_maker
.
kOpCreationCallstackAttrName
(),
core
.
op_proto_and_checker_maker
.
kOpDeviceAttrName
()
]
# NOTE(chenweihang): In order to be compatible with
# the two custom op define method, after removing
# NOTE(chenweihang): In order to be compatible with
# the two custom op define method, after removing
# old method, we can remove them together
def
use_new_custom_op_load_method
(
*
args
):
global
USING_NEW_CUSTOM_OP_LOAD_METHOD
...
...
@@ -206,11 +219,23 @@ class CustomOpInfo:
return
next
(
reversed
(
self
.
op_info_map
.
items
()))
def
prepare_unix_cflags
(
cflags
):
def
prepare_unix_c
uda
flags
(
cflags
):
"""
Prepare all necessary compiled flags for nvcc compiling CUDA files.
"""
cflags
=
NVCC_COMPILE_FLAGS
+
cflags
+
get_cuda_arch_flags
(
cflags
)
cflags
=
COMMON_NVCC_FLAGS
+
[
'-ccbin'
,
'cc'
,
'-Xcompiler'
,
'-fPIC'
,
'-w'
,
'--expt-relaxed-constexpr'
,
'-DNVCC'
]
+
cflags
+
get_cuda_arch_flags
(
cflags
)
return
cflags
def
prepare_win_cudaflags
(
cflags
):
"""
Prepare all necessary compiled flags for nvcc compiling CUDA files.
"""
cflags
=
COMMON_NVCC_FLAGS
+
[
'-w'
]
+
cflags
+
get_cuda_arch_flags
(
cflags
)
return
cflags
...
...
@@ -238,13 +263,14 @@ def get_cuda_arch_flags(cflags):
def
normalize_extension_kwargs
(
kwargs
,
use_cuda
=
False
):
"""
"""
Normalize include_dirs, library_dir and other attributes in kwargs.
"""
assert
isinstance
(
kwargs
,
dict
)
# append necessary include dir path of paddle
include_dirs
=
kwargs
.
get
(
'include_dirs'
,
[])
include_dirs
.
extend
(
find_paddle_includes
(
use_cuda
))
kwargs
[
'include_dirs'
]
=
include_dirs
# append necessary lib path of paddle
...
...
@@ -252,50 +278,46 @@ def normalize_extension_kwargs(kwargs, use_cuda=False):
library_dirs
.
extend
(
find_paddle_libraries
(
use_cuda
))
kwargs
[
'library_dirs'
]
=
library_dirs
# add runtime library dirs
runtime_library_dirs
=
kwargs
.
get
(
'runtime_library_dirs'
,
[])
runtime_library_dirs
.
extend
(
find_paddle_libraries
(
use_cuda
))
kwargs
[
'runtime_library_dirs'
]
=
runtime_library_dirs
# append compile flags
# append compile flags and check settings of compiler
extra_compile_args
=
kwargs
.
get
(
'extra_compile_args'
,
[])
extra_compile_args
.
extend
([
'-g'
,
'-w'
])
# diable warnings
kwargs
[
'extra_compile_args'
]
=
extra_compile_args
# append link flags
extra_link_args
=
kwargs
.
get
(
'extra_link_args'
,
[])
extra_link_args
.
append
(
'-lpaddle_framework'
)
if
use_cuda
:
extra_link_args
.
append
(
'-lcudart'
)
kwargs
[
'extra_link_args'
]
=
extra_link_args
kwargs
[
'language'
]
=
'c++'
return
kwargs
def
find_paddle_includes
(
use_cuda
=
False
):
"""
Return Paddle necessary include dir path.
"""
# pythonXX/site-packages/paddle/include
paddle_include_dir
=
get_include
()
third_party_dir
=
os
.
path
.
join
(
paddle_include_dir
,
'third_party'
)
include_dirs
=
[
paddle_include_dir
,
third_party_dir
]
if
isinstance
(
extra_compile_args
,
dict
):
for
compiler
in
[
'cxx'
,
'nvcc'
]:
if
compiler
not
in
extra_compile_args
:
extra_compile_args
[
compiler
]
=
[]
if
IS_WINDOWS
:
# TODO(zhouwei): may append compile flags in future
pass
# append link flags
extra_link_args
=
kwargs
.
get
(
'extra_link_args'
,
[])
extra_link_args
.
extend
(
MSVC_LINK_FLAGS
)
if
use_cuda
:
extra_link_args
.
extend
([
'cudadevrt.lib'
,
'cudart_static.lib'
])
kwargs
[
'extra_link_args'
]
=
extra_link_args
else
:
# append compile flags
add_compile_flag
(
extra_compile_args
,
[
'-g'
,
'-w'
])
# disable warnings
return
include_dirs
# append link flags
extra_link_args
=
kwargs
.
get
(
'extra_link_args'
,
[])
if
use_new_custom_op_load_method
():
extra_link_args
.
append
(
'-lpaddle_custom_op'
)
else
:
extra_link_args
.
append
(
'-lpaddle_framework'
)
if
use_cuda
:
extra_link_args
.
append
(
'-lcudart'
)
kwargs
[
'extra_link_args'
]
=
extra_link_args
def
find_cuda_includes
():
# add runtime library dirs
runtime_library_dirs
=
kwargs
.
get
(
'runtime_library_dirs'
,
[])
runtime_library_dirs
.
extend
(
find_paddle_libraries
(
use_cuda
))
kwargs
[
'runtime_library_dirs'
]
=
runtime_library_dirs
cuda_home
=
find_cuda_home
()
if
cuda_home
is
None
:
raise
ValueError
(
"Not found CUDA runtime, please use `export CUDA_HOME=XXX` to specific it."
)
kwargs
[
'extra_compile_args'
]
=
extra_compile_args
return
[
os
.
path
.
join
(
cuda_home
,
'lib64'
)]
kwargs
[
'language'
]
=
'c++'
return
kwargs
def
find_cuda_home
():
...
...
@@ -315,19 +337,22 @@ def find_cuda_home():
if
six
.
PY3
:
nvcc_path
=
nvcc_path
.
decode
()
nvcc_path
=
nvcc_path
.
rstrip
(
'
\r\n
'
)
# for example: /usr/local/cuda/bin/nvcc
cuda_home
=
os
.
path
.
dirname
(
os
.
path
.
dirname
(
nvcc_path
))
except
:
if
IS_WINDOWS
:
# search from default NVIDIA GPU path
candidate_paths
=
glob
.
glob
(
'C:/Program Files/NVIDIA GPU Computing Toolkit/CUDA/v*.*'
)
'C:
\\
Program Files
\\
NVIDIA GPU Computing Toolkit
\\
CUDA
\\
v*.*'
)
if
len
(
candidate_paths
)
>
0
:
cuda_home
=
candidate_paths
[
0
]
else
:
cuda_home
=
"/usr/local/cuda"
# step 3. check whether path is valid
if
not
os
.
path
.
exists
(
cuda_home
)
and
core
.
is_compiled_with_cuda
():
if
cuda_home
and
not
os
.
path
.
exists
(
cuda_home
)
and
core
.
is_compiled_with_cuda
():
cuda_home
=
None
warnings
.
warn
(
"Not found CUDA runtime, please use `export CUDA_HOME= XXX` to specific it."
...
...
@@ -336,27 +361,73 @@ def find_cuda_home():
return
cuda_home
def
find_cuda_includes
():
"""
Use heuristic method to find cuda include path
"""
cuda_home
=
find_cuda_home
()
if
cuda_home
is
None
:
raise
ValueError
(
"Not found CUDA runtime, please use `export CUDA_HOME=XXX` to specific it."
)
return
[
os
.
path
.
join
(
cuda_home
,
'include'
)]
def
find_paddle_includes
(
use_cuda
=
False
):
"""
Return Paddle necessary include dir path.
"""
# pythonXX/site-packages/paddle/include
paddle_include_dir
=
get_include
()
third_party_dir
=
os
.
path
.
join
(
paddle_include_dir
,
'third_party'
)
include_dirs
=
[
paddle_include_dir
,
third_party_dir
]
if
use_cuda
:
cuda_include_dir
=
find_cuda_includes
()
include_dirs
.
extend
(
cuda_include_dir
)
return
include_dirs
def
find_cuda_libraries
():
"""
Use heuristic method to find cuda static lib path
"""
cuda_home
=
find_cuda_home
()
if
cuda_home
is
None
:
raise
ValueError
(
"Not found CUDA runtime, please use `export CUDA_HOME=XXX` to specific it."
)
if
IS_WINDOWS
:
cuda_lib_dir
=
[
os
.
path
.
join
(
cuda_home
,
'lib'
,
'x64'
)]
else
:
cuda_lib_dir
=
[
os
.
path
.
join
(
cuda_home
,
'lib64'
)]
return
cuda_lib_dir
def
find_paddle_libraries
(
use_cuda
=
False
):
"""
Return Paddle necessary library dir path.
"""
# pythonXX/site-packages/paddle/libs
paddle_lib_dirs
=
[
get_lib
()]
if
use_cuda
:
cuda_dirs
=
find_cuda_includes
()
paddle_lib_dirs
.
extend
(
cuda_dirs
)
cuda_lib_dir
=
find_cuda_libraries
()
paddle_lib_dirs
.
extend
(
cuda_lib_dir
)
return
paddle_lib_dirs
def
add_compile_flag
(
ext
ension
,
flag
):
extra_compile_args
=
copy
.
deepcopy
(
extension
.
extra_compile_args
)
def
add_compile_flag
(
ext
ra_compile_args
,
flags
):
assert
isinstance
(
flags
,
list
)
if
isinstance
(
extra_compile_args
,
dict
):
for
args
in
extra_compile_args
.
values
():
args
.
append
(
flag
)
args
.
extend
(
flags
)
else
:
extra_compile_args
.
append
(
flag
)
extension
.
extra_compile_args
=
extra_compile_args
extra_compile_args
.
extend
(
flags
)
def
is_cuda_file
(
path
):
...
...
@@ -369,17 +440,34 @@ def is_cuda_file(path):
def
get_build_directory
(
verbose
=
False
):
"""
Return paddle extension root directory, default specific by `PADDLE_EXTENSION_DIR`
Return paddle extension root directory to put shared library. It could be specified by
``export PADDLE_EXTENSION_DIR=XXX`` . If not set, ``~/.cache/paddle_extension`` will be used
by default.
Returns:
The root directory of compiling customized operators.
Examples:
.. code-block:: python
from paddle.utils.cpp_extension import get_build_directory
build_dir = get_build_directory()
print(build_dir)
"""
root_extensions_directory
=
os
.
environ
.
get
(
'PADDLE_EXTENSION_DIR'
)
if
root_extensions_directory
is
None
:
dir_name
=
"paddle_extensions"
if
OS_NAME
.
startswith
(
'linux'
):
root_extensions_directory
=
os
.
path
.
join
(
os
.
path
.
expanduser
(
'~/.cache'
),
dir_name
)
else
:
# TODO(Aurelius84): consider wind32/macOs
raise
NotImplementedError
(
"Only support Linux now."
)
root_extensions_directory
=
os
.
path
.
join
(
os
.
path
.
expanduser
(
'~/.cache'
),
dir_name
)
if
IS_WINDOWS
:
root_extensions_directory
=
os
.
path
.
normpath
(
root_extensions_directory
)
elif
OS_NAME
.
startswith
(
'darwin'
):
# TODO(Aurelius84): consider macOs
raise
NotImplementedError
(
"Not support Mac now."
)
log_v
(
"$PADDLE_EXTENSION_DIR is not set, using path: {} by default."
.
format
(
root_extensions_directory
),
verbose
)
...
...
@@ -404,16 +492,22 @@ def parse_op_info(op_name):
in_names
=
[
x
.
name
for
x
in
op_proto
.
inputs
]
out_names
=
[
x
.
name
for
x
in
op_proto
.
outputs
]
attr_names
=
[
x
.
name
for
x
in
op_proto
.
attrs
if
x
.
name
not
in
DEFAULT_OP_ATTR_NAMES
]
return
in_names
,
out_names
return
in_names
,
out_names
,
attr_names
def
_import_module_from_library
(
module_name
,
build_directory
,
verbose
=
False
):
"""
Load
.so
shared library and import it as callable python module.
Load shared library and import it as callable python module.
"""
# TODO(Aurelius84): Consider file suffix is .dll on Windows Platform.
ext_path
=
os
.
path
.
join
(
build_directory
,
module_name
+
'.so'
)
if
IS_WINDOWS
:
dynamic_suffix
=
'.pyd'
else
:
dynamic_suffix
=
'.so'
ext_path
=
os
.
path
.
join
(
build_directory
,
module_name
+
dynamic_suffix
)
if
not
os
.
path
.
exists
(
ext_path
):
raise
FileNotFoundError
(
"Extension path: {} does not exist."
.
format
(
ext_path
))
...
...
@@ -448,7 +542,7 @@ def _generate_python_module(module_name,
def
_custom_api_content
(
op_name
):
params_str
,
ins_str
,
outs_str
=
_get_api_inputs_str
(
op_name
)
params_str
,
ins_str
,
attrs_str
,
outs_str
=
_get_api_inputs_str
(
op_name
)
API_TEMPLATE
=
textwrap
.
dedent
(
"""
from paddle.fluid.layer_helper import LayerHelper
...
...
@@ -456,8 +550,9 @@ def _custom_api_content(op_name):
def {op_name}({inputs}):
helper = LayerHelper("{op_name}", **locals())
# prepare inputs and output
# prepare inputs and output
s
ins = {ins}
attrs = {attrs}
outs = {{}}
out_names = {out_names}
for out_name in out_names:
...
...
@@ -465,7 +560,7 @@ def _custom_api_content(op_name):
# in runtime.
outs[out_name] = helper.create_variable(dtype='float32')
helper.append_op(type="{op_name}", inputs=ins, outputs=outs)
helper.append_op(type="{op_name}", inputs=ins, outputs=outs
, attrs=attrs
)
res = [outs[out_name] for out_name in out_names]
...
...
@@ -474,7 +569,11 @@ def _custom_api_content(op_name):
# generate python api file
api_content
=
API_TEMPLATE
.
format
(
op_name
=
op_name
,
inputs
=
params_str
,
ins
=
ins_str
,
out_names
=
outs_str
)
op_name
=
op_name
,
inputs
=
params_str
,
ins
=
ins_str
,
attrs
=
attrs_str
,
out_names
=
outs_str
)
return
api_content
...
...
@@ -505,22 +604,30 @@ def _get_api_inputs_str(op_name):
"""
Returns string of api parameters and inputs dict.
"""
in_names
,
out_names
=
parse_op_info
(
op_name
)
in_names
,
out_names
,
attr_names
=
parse_op_info
(
op_name
)
# e.g: x, y, z
params_str
=
','
.
join
([
p
.
lower
()
for
p
in
in_names
])
param_names
=
in_names
+
attr_names
params_str
=
','
.
join
([
p
.
lower
()
for
p
in
param_names
])
# e.g: {'X': x, 'Y': y, 'Z': z}
ins_str
=
"{%s}"
%
','
.
join
(
[
"'{}' : {}"
.
format
(
in_name
,
in_name
.
lower
())
for
in_name
in
in_names
])
# e.g: {'num': n}
attrs_str
=
"{%s}"
%
","
.
join
([
"'{}' : {}"
.
format
(
attr_name
,
attr_name
.
lower
())
for
attr_name
in
attr_names
])
# e.g: ['Out', 'Index']
outs_str
=
"[%s]"
%
','
.
join
([
"'{}'"
.
format
(
name
)
for
name
in
out_names
])
return
params_str
,
ins_str
,
outs_str
return
params_str
,
ins_str
,
attrs_str
,
outs_str
def
_write_setup_file
(
name
,
sources
,
file_path
,
build_dir
,
include_dirs
,
compile_flags
,
extra_cxx_cflags
,
extra_cuda_cflags
,
link_args
,
verbose
=
False
):
"""
...
...
@@ -530,18 +637,21 @@ def _write_setup_file(name,
import os
from paddle.utils.cpp_extension import CppExtension, CUDAExtension, BuildExtension, setup
from paddle.utils.cpp_extension import get_build_directory
from paddle.utils.cpp_extension.extension_utils import use_new_custom_op_load_method
use_new_custom_op_load_method({use_new_method})
setup(
name='{name}',
ext_modules=[
{prefix}Extension(
sources={sources},
include_dirs={include_dirs},
extra_compile_args={
extra_compile_args
},
extra_compile_args={
{'cxx':{extra_cxx_cflags}, 'nvcc':{extra_cuda_cflags}}
},
extra_link_args={extra_link_args})],
cmdclass={{"build_ext" : BuildExtension.with_options(
output_dir=get_build_directory(),
no_python_abi_suffix=True,
use_new_method={use_new_method})
output_dir=r'{build_dir}',
no_python_abi_suffix=True)
}})"""
).
lstrip
()
with_cuda
=
False
...
...
@@ -554,8 +664,10 @@ def _write_setup_file(name,
prefix
=
'CUDA'
if
with_cuda
else
'Cpp'
,
sources
=
list2str
(
sources
),
include_dirs
=
list2str
(
include_dirs
),
extra_compile_args
=
list2str
(
compile_flags
),
extra_cxx_cflags
=
list2str
(
extra_cxx_cflags
),
extra_cuda_cflags
=
list2str
(
extra_cuda_cflags
),
extra_link_args
=
list2str
(
link_args
),
build_dir
=
build_dir
,
use_new_method
=
use_new_custom_op_load_method
())
log_v
(
'write setup.py into {}'
.
format
(
file_path
),
verbose
)
...
...
@@ -565,12 +677,12 @@ def _write_setup_file(name,
def
list2str
(
args
):
"""
Convert list[str] into string. For example: [
x, y
] -> "['x', 'y']"
Convert list[str] into string. For example: [
'x', 'y'
] -> "['x', 'y']"
"""
if
args
is
None
:
return
'[]'
assert
isinstance
(
args
,
(
list
,
tuple
))
args
=
[
"
'{}'
"
.
format
(
arg
)
for
arg
in
args
]
return
'['
+
','
.
join
(
args
)
+
']'
args
=
[
"
{}
"
.
format
(
arg
)
for
arg
in
args
]
return
repr
(
args
)
def
_jit_compile
(
file_path
,
interpreter
=
None
,
verbose
=
False
):
...
...
@@ -583,7 +695,8 @@ def _jit_compile(file_path, interpreter=None, verbose=False):
if
interpreter
is
None
:
interpreter
=
'python'
try
:
py_path
=
subprocess
.
check_output
([
'which'
,
interpreter
])
which
=
'where'
if
IS_WINDOWS
else
'which'
py_path
=
subprocess
.
check_output
([
which
,
interpreter
])
py_version
=
subprocess
.
check_output
([
interpreter
,
'-V'
])
if
six
.
PY3
:
py_path
=
py_path
.
decode
()
...
...
@@ -596,8 +709,13 @@ def _jit_compile(file_path, interpreter=None, verbose=False):
'Failed to check Python interpreter with `{}`, errors: {}'
.
format
(
interpreter
,
error
))
compile_cmd
=
'cd {} && {} {} build'
.
format
(
ext_dir
,
interpreter
,
setup_file
)
if
IS_WINDOWS
:
compile_cmd
=
'cd /d {} && {} {} build'
.
format
(
ext_dir
,
interpreter
,
setup_file
)
else
:
compile_cmd
=
'cd {} && {} {} build'
.
format
(
ext_dir
,
interpreter
,
setup_file
)
print
(
"Compiling user custom op, it will cost a few seconds....."
)
run_cmd
(
compile_cmd
,
verbose
)
...
...
@@ -682,7 +800,7 @@ def check_abi_compatibility(compiler, verbose=False):
try
:
if
OS_NAME
.
startswith
(
'linux'
):
version_info
=
subprocess
.
check_output
(
[
compiler
,
'-dumpfullversion'
])
[
compiler
,
'-dumpfullversion'
,
'-dumpversion'
])
if
six
.
PY3
:
version_info
=
version_info
.
decode
()
version
=
version_info
.
strip
().
split
(
'.'
)
...
...
@@ -694,8 +812,8 @@ def check_abi_compatibility(compiler, verbose=False):
warnings
.
warn
(
ABI_INCOMPATIBILITY_WARNING
.
format
(
user_compiler
=
compiler
,
version
=
version_info
.
strip
()))
# TODO(Aurelius84): check version compatibility on windows
elif
IS_WINDOWS
:
# TODO(zhouwei): support check abi compatibility on windows
warnings
.
warn
(
"We don't support Windows now."
)
except
Exception
:
_
,
error
,
_
=
sys
.
exc_info
()
...
...
@@ -714,7 +832,7 @@ def _expected_compiler_current_platform():
return
expect_compilers
def
log_v
(
info
,
verbose
):
def
log_v
(
info
,
verbose
=
True
):
"""
Print log information on stdout.
"""
...
...
python/requirements.txt
浏览文件 @
d3e60959
...
...
@@ -3,7 +3,8 @@ numpy>=1.13, <=1.16.4 ; python_version<"3.5"
numpy>=1.13 ; python_version>="3.5" and platform_system != "Windows"
numpy>=1.13, <=1.19.3 ; python_version>="3.5" and platform_system == "Windows"
protobuf>=3.1.0
gast==0.3.3
gast>=0.3.3 ; platform_system != "Windows"
gast==0.3.3 ; platform_system == "Windows"
Pillow
six
decorator
...
...
python/setup.py.in
浏览文件 @
d3e60959
...
...
@@ -334,11 +334,21 @@ if '${WITH_XPU_BKCL}' == 'ON':
shutil.copy('${XPU_BKCL_LIB}', libs_path)
package_data['paddle.libs']+=['${XPU_BKCL_LIB_NAME}']
# copy libfuild_framework.so to libs
if os.name != 'nt' and sys.platform != 'darwin':
paddle_framework_lib='${FLUID_FRAMEWORK_SHARED_LIB}'
shutil.copy(paddle_framework_lib, libs_path)
package_data['paddle.libs'] += [('libpaddle_framework' if os.name != 'nt' else 'paddle_framework') + ext_name]
# copy libpaddle_framework.so to libs on linux
if sys.platform.startswith('linux'):
shutil.copy('${FLUID_FRAMEWORK_SHARED_LIB}', libs_path)
package_data['paddle.libs'] += ['libpaddle_framework.so']
# copy libpaddle_custom_op.so to libs on linux
if sys.platform.startswith('linux'):
shutil.copy('${PADDLE_CUSTOM_OP_SHARED_LIB}', libs_path)
package_data['paddle.libs'] += ['libpaddle_custom_op.so']
# copy paddle_framework.lib/paddle_framework.dll to libs on windows
if os.name == 'nt':
shutil.copy('${FLUID_FRAMEWORK_IMPORT_LIB}', libs_path)
shutil.copy('${FLUID_FRAMEWORK_SHARED_LIB}', libs_path)
package_data['paddle.libs'] += ['paddle_framework.lib', 'paddle_framework.dll']
# remove unused paddle/libs/__init__.py
if os.path.isfile(libs_path+'/__init__.py'):
...
...
@@ -409,9 +419,9 @@ if '${WITH_GPU}' == 'ON':
class InstallCommand(InstallCommandBase):
def finalize_options(self):
ret = InstallCommandBase.finalize_options(self)
self.install_headers = os.path.join(self.install_purelib, 'paddle',
'include')
self.install_lib = self.install_platlib
self.install_headers = os.path.join(self.install_platlib, 'paddle',
'include')
return ret
...
...
@@ -462,11 +472,6 @@ class InstallHeaders(Command):
return self.copy_file(header, install_dir)
def run(self):
# only copy third_party/cudaErrorMessage.pb for cudaErrorMessage on mac or windows
if os.name == 'nt' or sys.platform == 'darwin':
if '${WITH_GPU}' == 'ON':
self.mkdir_and_copy_file('${cudaerror_INCLUDE_DIR}/cudaErrorMessage.pb')
return
hdrs = self.distribution.headers
if not hdrs:
return
...
...
编辑
预览
Markdown
is supported
0%
请重试
或
添加新附件
.
添加附件
取消
You are about to add
0
people
to the discussion. Proceed with caution.
先完成此消息的编辑!
取消
想要评论请
注册
或
登录