Skip to content
体验新版
项目
组织
正在加载...
登录
切换导航
打开侧边栏
BaiXuePrincess
Paddle
提交
fbedf77e
P
Paddle
项目概览
BaiXuePrincess
/
Paddle
与 Fork 源项目一致
Fork自
PaddlePaddle / Paddle
通知
1
Star
1
Fork
0
代码
文件
提交
分支
Tags
贡献者
分支图
Diff
Issue
0
列表
看板
标记
里程碑
合并请求
0
Wiki
0
Wiki
分析
仓库
DevOps
项目成员
Pages
P
Paddle
项目概览
项目概览
详情
发布
仓库
仓库
文件
提交
分支
标签
贡献者
分支图
比较
Issue
0
Issue
0
列表
看板
标记
里程碑
合并请求
0
合并请求
0
Pages
分析
分析
仓库分析
DevOps
Wiki
0
Wiki
成员
成员
收起侧边栏
关闭侧边栏
动态
分支图
创建新Issue
提交
Issue看板
未验证
提交
fbedf77e
编写于
7月 18, 2022
作者:
王
王明冬
提交者:
GitHub
7月 18, 2022
浏览文件
操作
浏览文件
下载
电子邮件补丁
差异文件
add ipu support for standalone executor. (#44342)
上级
04e55582
变更
10
隐藏空白更改
内联
并排
Showing
10 changed file
with
78 addition
and
7 deletion
+78
-7
paddle/fluid/framework/new_executor/data_transfer.cc
paddle/fluid/framework/new_executor/data_transfer.cc
+1
-0
paddle/fluid/framework/new_executor/interpretercore.cc
paddle/fluid/framework/new_executor/interpretercore.cc
+6
-0
paddle/fluid/framework/new_executor/new_executor_defs.h
paddle/fluid/framework/new_executor/new_executor_defs.h
+1
-1
paddle/fluid/framework/new_executor/stream_analyzer.cc
paddle/fluid/framework/new_executor/stream_analyzer.cc
+3
-2
paddle/fluid/framework/op_registry.h
paddle/fluid/framework/op_registry.h
+6
-0
paddle/fluid/operators/memcpy_d2h_op.cc
paddle/fluid/operators/memcpy_d2h_op.cc
+28
-0
paddle/fluid/operators/memcpy_h2d_op.cc
paddle/fluid/operators/memcpy_h2d_op.cc
+29
-0
paddle/fluid/operators/memcpy_h2d_op.h
paddle/fluid/operators/memcpy_h2d_op.h
+1
-1
paddle/fluid/platform/device_event_base.h
paddle/fluid/platform/device_event_base.h
+1
-1
python/paddle/fluid/executor.py
python/paddle/fluid/executor.py
+2
-2
未找到文件。
paddle/fluid/framework/new_executor/data_transfer.cc
浏览文件 @
fbedf77e
...
@@ -315,6 +315,7 @@ std::shared_ptr<OperatorBase> TransferDevice(const std::string& var_name,
...
@@ -315,6 +315,7 @@ std::shared_ptr<OperatorBase> TransferDevice(const std::string& var_name,
op_type
=
kMemcpyH2D
;
op_type
=
kMemcpyH2D
;
int
dst_place_type
=
platform
::
is_gpu_place
(
dst_place
)
?
0
int
dst_place_type
=
platform
::
is_gpu_place
(
dst_place
)
?
0
:
platform
::
is_npu_place
(
dst_place
)
?
1
:
platform
::
is_npu_place
(
dst_place
)
?
1
:
platform
::
is_ipu_place
(
dst_place
)
?
3
:
platform
::
is_xpu_place
(
dst_place
)
?
2
:
platform
::
is_xpu_place
(
dst_place
)
?
2
:
-
1
;
:
-
1
;
attr_map
=
{{
"dst_place_type"
,
dst_place_type
}};
attr_map
=
{{
"dst_place_type"
,
dst_place_type
}};
...
...
paddle/fluid/framework/new_executor/interpretercore.cc
浏览文件 @
fbedf77e
...
@@ -25,6 +25,7 @@
...
@@ -25,6 +25,7 @@
#include "paddle/fluid/platform/os_info.h"
#include "paddle/fluid/platform/os_info.h"
#include "paddle/fluid/platform/profiler/event_tracing.h"
#include "paddle/fluid/platform/profiler/event_tracing.h"
#include "paddle/fluid/platform/profiler/supplement_tracing.h"
#include "paddle/fluid/platform/profiler/supplement_tracing.h"
#include "paddle/phi/common/place.h"
#include "paddle/phi/core/kernel_context.h"
#include "paddle/phi/core/kernel_context.h"
#ifdef PADDLE_WITH_MKLDNN
#ifdef PADDLE_WITH_MKLDNN
#include "paddle/fluid/platform/mkldnn_helper.h"
#include "paddle/fluid/platform/mkldnn_helper.h"
...
@@ -475,8 +476,13 @@ void InterpreterCore::Convert(
...
@@ -475,8 +476,13 @@ void InterpreterCore::Convert(
BuildSkipShareLoDInfo
();
BuildSkipShareLoDInfo
();
for
(
size_t
i
=
0
;
i
<
vec_instruction_
.
size
();
++
i
)
{
for
(
size_t
i
=
0
;
i
<
vec_instruction_
.
size
();
++
i
)
{
#ifdef PADDLE_WITH_IPU
gc_event_
.
emplace_back
(
phi
::
CPUPlace
(),
0
);
#else
gc_event_
.
emplace_back
(
vec_instruction_
[
i
].
DeviceContext
().
GetPlace
(),
gc_event_
.
emplace_back
(
vec_instruction_
[
i
].
DeviceContext
().
GetPlace
(),
platform
::
GenerateDeviceEventFlag
());
platform
::
GenerateDeviceEventFlag
());
#endif
}
}
bool
inplaced
=
false
;
bool
inplaced
=
false
;
for
(
auto
inst
:
vec_instruction_
)
{
for
(
auto
inst
:
vec_instruction_
)
{
...
...
paddle/fluid/framework/new_executor/new_executor_defs.h
浏览文件 @
fbedf77e
...
@@ -390,7 +390,7 @@ static bool IsCpuOp(const Instruction& instr) {
...
@@ -390,7 +390,7 @@ static bool IsCpuOp(const Instruction& instr) {
// is supported heterogeneous place
// is supported heterogeneous place
static
bool
IsSupportedHetePlace
(
const
phi
::
Place
&
place
)
{
static
bool
IsSupportedHetePlace
(
const
phi
::
Place
&
place
)
{
return
platform
::
is_gpu_place
(
place
)
||
platform
::
is_npu_place
(
place
)
||
return
platform
::
is_gpu_place
(
place
)
||
platform
::
is_npu_place
(
place
)
||
platform
::
is_xpu_place
(
place
);
platform
::
is_xpu_place
(
place
)
||
platform
::
is_ipu_place
(
place
)
;
}
}
}
// namespace interpreter
}
// namespace interpreter
...
...
paddle/fluid/framework/new_executor/stream_analyzer.cc
浏览文件 @
fbedf77e
...
@@ -204,8 +204,9 @@ bool StreamAnalyzer::IsDirectRun(Instruction& cur_instr,
...
@@ -204,8 +204,9 @@ bool StreamAnalyzer::IsDirectRun(Instruction& cur_instr,
const
Instruction
&
next_instr
)
{
const
Instruction
&
next_instr
)
{
if
(
&
cur_instr
.
DeviceContext
()
==
&
next_instr
.
DeviceContext
())
return
true
;
if
(
&
cur_instr
.
DeviceContext
()
==
&
next_instr
.
DeviceContext
())
return
true
;
// xpu memcpy kerenl is synchronous.
// xpu&ipu memcpy kerenl is synchronous.
if
(
platform
::
is_xpu_place
(
place_
))
return
true
;
if
(
platform
::
is_ipu_place
(
place_
)
||
platform
::
is_xpu_place
(
place_
))
return
true
;
// npu d2h kernel is asynchronous.
// npu d2h kernel is asynchronous.
if
(
platform
::
is_npu_place
(
place_
))
{
if
(
platform
::
is_npu_place
(
place_
))
{
...
...
paddle/fluid/framework/op_registry.h
浏览文件 @
fbedf77e
...
@@ -408,6 +408,12 @@ struct OpKernelRegistrarFunctorEx<PlaceType,
...
@@ -408,6 +408,12 @@ struct OpKernelRegistrarFunctorEx<PlaceType,
::paddle::framework::OpKernelType::kDefaultCustomizedTypeValue, \
::paddle::framework::OpKernelType::kDefaultCustomizedTypeValue, \
__VA_ARGS__)
__VA_ARGS__)
#define REGISTER_OP_IPU_KERNEL_FUNCTOR(op_type, ...) \
REGISTER_OP_KERNEL_EX( \
op_type, IPU, ::paddle::platform::IPUPlace, DEFAULT_TYPE, \
::paddle::framework::OpKernelType::kDefaultCustomizedTypeValue, \
__VA_ARGS__)
/**
/**
* Macro to mark what Operator and Kernel
* Macro to mark what Operator and Kernel
* we will use and tell the compiler to
* we will use and tell the compiler to
...
...
paddle/fluid/operators/memcpy_d2h_op.cc
浏览文件 @
fbedf77e
...
@@ -233,3 +233,31 @@ REGISTER_OP_NPU_KERNEL_FUNCTOR(memcpy_d2h,
...
@@ -233,3 +233,31 @@ REGISTER_OP_NPU_KERNEL_FUNCTOR(memcpy_d2h,
int16_t
,
int16_t
,
ops
::
MemcpyD2HKernel
);
ops
::
MemcpyD2HKernel
);
#endif
#endif
#ifdef PADDLE_WITH_IPU
REGISTER_OP_IPU_KERNEL_FUNCTOR
(
memcpy_d2h
,
float
,
ops
::
MemcpyD2HKernel
,
double
,
ops
::
MemcpyD2HKernel
,
int8_t
,
ops
::
MemcpyD2HKernel
,
uint8_t
,
ops
::
MemcpyD2HKernel
,
int
,
ops
::
MemcpyD2HKernel
,
int64_t
,
ops
::
MemcpyD2HKernel
,
bool
,
ops
::
MemcpyD2HKernel
,
paddle
::
platform
::
bfloat16
,
ops
::
MemcpyD2HKernel
,
paddle
::
platform
::
complex
<
float
>
,
ops
::
MemcpyD2HKernel
,
paddle
::
platform
::
complex
<
double
>
,
ops
::
MemcpyD2HKernel
,
plat
::
float16
,
ops
::
MemcpyD2HKernel
,
int16_t
,
ops
::
MemcpyD2HKernel
);
#endif
paddle/fluid/operators/memcpy_h2d_op.cc
浏览文件 @
fbedf77e
...
@@ -100,6 +100,7 @@ class MemcpyH2DOpProtoMaker : public framework::OpProtoAndCheckerMaker {
...
@@ -100,6 +100,7 @@ class MemcpyH2DOpProtoMaker : public framework::OpProtoAndCheckerMaker {
"0. CUDAPinnedPlace/CPU <->CUDAPlace"
"0. CUDAPinnedPlace/CPU <->CUDAPlace"
"1. NPUPinnedPlace/CPU <-> NPUPlace"
"1. NPUPinnedPlace/CPU <-> NPUPlace"
"2. CPU <->XPUPlace"
"2. CPU <->XPUPlace"
"3. CPU <->IPUPlace"
"Other place type is Unimplemented and will cause ERROR."
);
"Other place type is Unimplemented and will cause ERROR."
);
AddComment
(
R"DOC(
AddComment
(
R"DOC(
MemcpyD2H Operator.
MemcpyD2H Operator.
...
@@ -233,3 +234,31 @@ REGISTER_OP_NPU_KERNEL_FUNCTOR(memcpy_h2d,
...
@@ -233,3 +234,31 @@ REGISTER_OP_NPU_KERNEL_FUNCTOR(memcpy_h2d,
int16_t
,
int16_t
,
ops
::
MemcpyH2DKernel
);
ops
::
MemcpyH2DKernel
);
#endif
#endif
#ifdef PADDLE_WITH_IPU
REGISTER_OP_IPU_KERNEL_FUNCTOR
(
memcpy_h2d
,
float
,
ops
::
MemcpyH2DKernel
,
double
,
ops
::
MemcpyH2DKernel
,
int8_t
,
ops
::
MemcpyH2DKernel
,
uint8_t
,
ops
::
MemcpyH2DKernel
,
int
,
ops
::
MemcpyH2DKernel
,
int64_t
,
ops
::
MemcpyH2DKernel
,
bool
,
ops
::
MemcpyH2DKernel
,
paddle
::
platform
::
bfloat16
,
ops
::
MemcpyH2DKernel
,
paddle
::
platform
::
complex
<
float
>
,
ops
::
MemcpyH2DKernel
,
paddle
::
platform
::
complex
<
double
>
,
ops
::
MemcpyH2DKernel
,
plat
::
float16
,
ops
::
MemcpyH2DKernel
,
int16_t
,
ops
::
MemcpyH2DKernel
);
#endif
paddle/fluid/operators/memcpy_h2d_op.h
浏览文件 @
fbedf77e
...
@@ -50,7 +50,7 @@ class MemcpyH2DFunctor {
...
@@ -50,7 +50,7 @@ class MemcpyH2DFunctor {
lod_tensor
.
dtype
(),
lod_tensor
.
dtype
(),
phi
::
Stream
(
reinterpret_cast
<
phi
::
StreamId
>
(
stream
)));
phi
::
Stream
(
reinterpret_cast
<
phi
::
StreamId
>
(
stream
)));
if
(
dst_place_type_
==
0
||
dst_place_type_
==
1
||
dst_place_type_
==
2
)
{
if
(
dst_place_type_
>=
0
&&
dst_place_type_
<=
3
)
{
framework
::
TensorCopy
(
framework
::
TensorCopy
(
lod_tensor
,
dev_ctx_
.
GetPlace
(),
dev_ctx_
,
&
out_tensor
);
lod_tensor
,
dev_ctx_
.
GetPlace
(),
dev_ctx_
,
&
out_tensor
);
}
else
{
}
else
{
...
...
paddle/fluid/platform/device_event_base.h
浏览文件 @
fbedf77e
...
@@ -64,7 +64,7 @@ class DeviceEvent {
...
@@ -64,7 +64,7 @@ class DeviceEvent {
"Required type < %d, but received type = %d"
,
"Required type < %d, but received type = %d"
,
MaxDeviceTypes
,
MaxDeviceTypes
,
type_id_
));
type_id_
));
// TODO(Aurelius84): only support CPU/CUDA
, need consider XPU/NPU later
// TODO(Aurelius84): only support CPU/CUDA
/XPU/NPU.
PADDLE_ENFORCE_LT
(
type_id_
,
PADDLE_ENFORCE_LT
(
type_id_
,
4
,
4
,
platform
::
errors
::
Unavailable
(
platform
::
errors
::
Unavailable
(
...
...
python/paddle/fluid/executor.py
浏览文件 @
fbedf77e
...
@@ -1388,8 +1388,8 @@ class Executor(object):
...
@@ -1388,8 +1388,8 @@ class Executor(object):
program
=
pruned_program
program
=
pruned_program
def
_can_use_interpreter_core
(
program
,
place
):
def
_can_use_interpreter_core
(
program
,
place
):
if
core
.
is_compiled_with_mlu
()
or
core
.
is_compiled_with_ipu
(
if
core
.
is_compiled_with_mlu
()
or
isinstance
(
)
or
isinstance
(
place
,
core
.
CustomPlace
):
place
,
core
.
CustomPlace
):
return
False
return
False
compiled
=
isinstance
(
program
,
compiler
.
CompiledProgram
)
compiled
=
isinstance
(
program
,
compiler
.
CompiledProgram
)
...
...
编辑
预览
Markdown
is supported
0%
请重试
或
添加新附件
.
添加附件
取消
You are about to add
0
people
to the discussion. Proceed with caution.
先完成此消息的编辑!
取消
想要评论请
注册
或
登录