Skip to content
体验新版
项目
组织
正在加载...
登录
切换导航
打开侧边栏
PaddlePaddle
Paddle
提交
b5af9575
P
Paddle
项目概览
PaddlePaddle
/
Paddle
大约 1 年 前同步成功
通知
2299
Star
20931
Fork
5422
代码
文件
提交
分支
Tags
贡献者
分支图
Diff
Issue
1423
列表
看板
标记
里程碑
合并请求
543
Wiki
0
Wiki
分析
仓库
DevOps
项目成员
Pages
P
Paddle
项目概览
项目概览
详情
发布
仓库
仓库
文件
提交
分支
标签
贡献者
分支图
比较
Issue
1,423
Issue
1,423
列表
看板
标记
里程碑
合并请求
543
合并请求
543
Pages
分析
分析
仓库分析
DevOps
Wiki
0
Wiki
成员
成员
收起侧边栏
关闭侧边栏
动态
分支图
创建新Issue
提交
Issue看板
未验证
提交
b5af9575
编写于
10月 29, 2021
作者:
W
wanghuancoder
提交者:
GitHub
10月 29, 2021
浏览文件
操作
浏览文件
下载
电子邮件补丁
差异文件
fix some bug in new executor (#36822)
* fix some bug in new executor, test=develop * fix error message, test=develop
上级
be55bac3
变更
10
隐藏空白更改
内联
并排
Showing
10 changed file
with
91 addition
and
63 deletion
+91
-63
paddle/fluid/framework/new_executor/interpretercore.cc
paddle/fluid/framework/new_executor/interpretercore.cc
+5
-3
paddle/fluid/framework/new_executor/interpretercore.h
paddle/fluid/framework/new_executor/interpretercore.h
+3
-3
paddle/fluid/framework/new_executor/interpretercore_util.cc
paddle/fluid/framework/new_executor/interpretercore_util.cc
+4
-2
paddle/fluid/framework/new_executor/standalone_executor.cc
paddle/fluid/framework/new_executor/standalone_executor.cc
+2
-2
paddle/fluid/framework/new_executor/standalone_executor.h
paddle/fluid/framework/new_executor/standalone_executor.h
+3
-3
paddle/fluid/operators/controlflow/fetch_v2_op.cc
paddle/fluid/operators/controlflow/fetch_v2_op.cc
+14
-8
paddle/fluid/operators/memcpy_d2h_op.cc
paddle/fluid/operators/memcpy_d2h_op.cc
+24
-15
paddle/fluid/operators/memcpy_h2d_op.cc
paddle/fluid/operators/memcpy_h2d_op.cc
+24
-15
paddle/fluid/pybind/pybind.cc
paddle/fluid/pybind/pybind.cc
+4
-4
python/paddle/fluid/executor.py
python/paddle/fluid/executor.py
+8
-8
未找到文件。
paddle/fluid/framework/new_executor/interpretercore.cc
浏览文件 @
b5af9575
...
@@ -79,12 +79,13 @@ void InterpreterCore::AddFetch(const std::vector<std::string>& fetch_names) {
...
@@ -79,12 +79,13 @@ void InterpreterCore::AddFetch(const std::vector<std::string>& fetch_names) {
}
}
paddle
::
framework
::
FetchList
InterpreterCore
::
Run
(
paddle
::
framework
::
FetchList
InterpreterCore
::
Run
(
const
std
::
vector
<
framework
::
Tensor
>&
feed_tensors
)
{
const
std
::
vector
<
framework
::
LoD
Tensor
>&
feed_tensors
)
{
auto
FeedInput
=
[
&
]
{
auto
FeedInput
=
[
&
]
{
for
(
size_t
i
=
0
;
i
<
feed_names_
.
size
();
++
i
)
{
for
(
size_t
i
=
0
;
i
<
feed_names_
.
size
();
++
i
)
{
auto
*
feed_var
=
global_scope_
->
Var
(
feed_names_
[
i
]);
auto
*
feed_var
=
global_scope_
->
Var
(
feed_names_
[
i
]);
auto
feed_tensor
=
feed_var
->
GetMutable
<
framework
::
LoDTensor
>
();
auto
feed_tensor
=
feed_var
->
GetMutable
<
framework
::
LoDTensor
>
();
feed_tensor
->
ShareDataWith
(
feed_tensors
[
i
]);
feed_tensor
->
ShareDataWith
(
feed_tensors
[
i
]);
feed_tensor
->
set_lod
(
feed_tensors
[
i
].
lod
());
}
}
};
};
...
@@ -495,7 +496,7 @@ void InterpreterCore::CheckGC(const Instruction& instr) {
...
@@ -495,7 +496,7 @@ void InterpreterCore::CheckGC(const Instruction& instr) {
}
}
void
InterpreterCore
::
DryRunPrepare
(
void
InterpreterCore
::
DryRunPrepare
(
const
std
::
vector
<
framework
::
Tensor
>&
feed_tensors
)
{
const
std
::
vector
<
framework
::
LoD
Tensor
>&
feed_tensors
)
{
auto
FeedInput
=
[
&
]
{
auto
FeedInput
=
[
&
]
{
for
(
size_t
i
=
0
;
i
<
feed_names_
.
size
();
++
i
)
{
for
(
size_t
i
=
0
;
i
<
feed_names_
.
size
();
++
i
)
{
auto
*
feed_var
=
global_scope_
->
FindVar
(
feed_names_
[
i
]);
auto
*
feed_var
=
global_scope_
->
FindVar
(
feed_names_
[
i
]);
...
@@ -504,6 +505,7 @@ void InterpreterCore::DryRunPrepare(
...
@@ -504,6 +505,7 @@ void InterpreterCore::DryRunPrepare(
auto
feed_tensor
=
feed_var
->
GetMutable
<
framework
::
LoDTensor
>
();
auto
feed_tensor
=
feed_var
->
GetMutable
<
framework
::
LoDTensor
>
();
feed_tensor
->
ShareDataWith
(
feed_tensors
[
i
]);
feed_tensor
->
ShareDataWith
(
feed_tensors
[
i
]);
feed_tensor
->
set_lod
(
feed_tensors
[
i
].
lod
());
}
}
};
};
...
@@ -525,7 +527,7 @@ void InterpreterCore::DryRunPrepare(
...
@@ -525,7 +527,7 @@ void InterpreterCore::DryRunPrepare(
}
}
const
CostInfo
&
InterpreterCore
::
DryRun
(
const
CostInfo
&
InterpreterCore
::
DryRun
(
const
std
::
vector
<
framework
::
Tensor
>&
feed_tensors
)
{
const
std
::
vector
<
framework
::
LoD
Tensor
>&
feed_tensors
)
{
DryRunPrepare
(
feed_tensors
);
DryRunPrepare
(
feed_tensors
);
// DryRun may be called many times.
// DryRun may be called many times.
dry_run_profiler_
.
Reset
();
dry_run_profiler_
.
Reset
();
...
...
paddle/fluid/framework/new_executor/interpretercore.h
浏览文件 @
b5af9575
...
@@ -46,9 +46,9 @@ class InterpreterCore {
...
@@ -46,9 +46,9 @@ class InterpreterCore {
const
std
::
vector
<
std
::
string
>&
fetch_names
);
const
std
::
vector
<
std
::
string
>&
fetch_names
);
paddle
::
framework
::
FetchList
Run
(
paddle
::
framework
::
FetchList
Run
(
const
std
::
vector
<
framework
::
Tensor
>&
feed_tensors
);
const
std
::
vector
<
framework
::
LoD
Tensor
>&
feed_tensors
);
const
CostInfo
&
DryRun
(
const
std
::
vector
<
framework
::
Tensor
>&
feed_tensors
);
const
CostInfo
&
DryRun
(
const
std
::
vector
<
framework
::
LoD
Tensor
>&
feed_tensors
);
private:
private:
void
Convert
();
void
Convert
();
...
@@ -65,7 +65,7 @@ class InterpreterCore {
...
@@ -65,7 +65,7 @@ class InterpreterCore {
void
ExecuteInstructionList
(
const
std
::
vector
<
Instruction
>&
vec_instr
);
void
ExecuteInstructionList
(
const
std
::
vector
<
Instruction
>&
vec_instr
);
void
DryRunPrepare
(
const
std
::
vector
<
framework
::
Tensor
>&
feed_tensors
);
void
DryRunPrepare
(
const
std
::
vector
<
framework
::
LoD
Tensor
>&
feed_tensors
);
void
CheckGC
(
const
Instruction
&
instr
);
void
CheckGC
(
const
Instruction
&
instr
);
...
...
paddle/fluid/framework/new_executor/interpretercore_util.cc
浏览文件 @
b5af9575
...
@@ -287,7 +287,7 @@ void build_op_func_list(const platform::Place& place,
...
@@ -287,7 +287,7 @@ void build_op_func_list(const platform::Place& place,
for
(
size_t
i
=
0
;
i
<
var_name_item
.
second
.
size
();
++
i
)
{
for
(
size_t
i
=
0
;
i
<
var_name_item
.
second
.
size
();
++
i
)
{
auto
var
=
var_name_item
.
second
[
i
];
auto
var
=
var_name_item
.
second
[
i
];
auto
&
var_name
=
inputs_names
[
var_name_item
.
first
].
at
(
i
);
auto
&
var_name
=
inputs_names
[
var_name_item
.
first
].
at
(
i
);
auto
tensor_in
=
static_cast
<
const
Tensor
*>
(
&
(
var
->
Get
<
LoDTensor
>
())
);
auto
tensor_in
=
GetLoDTensorOrSelectedRowsValueFromVar
(
*
var
);
if
(
!
tensor_in
->
IsInitialized
())
{
if
(
!
tensor_in
->
IsInitialized
())
{
continue
;
continue
;
}
}
...
@@ -296,7 +296,9 @@ void build_op_func_list(const platform::Place& place,
...
@@ -296,7 +296,9 @@ void build_op_func_list(const platform::Place& place,
->
GetKernelTypeForVar
(
var_name_item
.
first
,
*
tensor_in
,
->
GetKernelTypeForVar
(
var_name_item
.
first
,
*
tensor_in
,
expected_kernel_key
);
expected_kernel_key
);
if
(
platform
::
is_same_place
(
kernel_type_for_var
.
place_
,
if
(
platform
::
is_same_place
(
kernel_type_for_var
.
place_
,
expected_kernel_key
.
place_
))
{
expected_kernel_key
.
place_
)
||
(
is_cuda_pinned_place
(
kernel_type_for_var
.
place_
)
&&
is_cpu_place
(
expected_kernel_key
.
place_
)))
{
// record no need data transformer input var_id
// record no need data transformer input var_id
VLOG
(
3
)
<<
op
->
Type
()
<<
" found no data_transform var: "
<<
var_name
VLOG
(
3
)
<<
op
->
Type
()
<<
" found no data_transform var: "
<<
var_name
<<
" with id: "
<<
var_name
;
<<
" with id: "
<<
var_name
;
...
...
paddle/fluid/framework/new_executor/standalone_executor.cc
浏览文件 @
b5af9575
...
@@ -47,7 +47,7 @@ StandaloneExecutor::StandaloneExecutor(const platform::Place& place,
...
@@ -47,7 +47,7 @@ StandaloneExecutor::StandaloneExecutor(const platform::Place& place,
paddle
::
framework
::
FetchList
StandaloneExecutor
::
Run
(
paddle
::
framework
::
FetchList
StandaloneExecutor
::
Run
(
const
std
::
vector
<
std
::
string
>&
feed_names
,
const
std
::
vector
<
std
::
string
>&
feed_names
,
const
std
::
vector
<
framework
::
Tensor
>&
feed_tensors
,
const
std
::
vector
<
framework
::
LoD
Tensor
>&
feed_tensors
,
const
std
::
vector
<
std
::
string
>&
fetch_names
)
{
const
std
::
vector
<
std
::
string
>&
fetch_names
)
{
auto
core
=
GetInterpreterCore
(
feed_names
,
fetch_names
);
auto
core
=
GetInterpreterCore
(
feed_names
,
fetch_names
);
...
@@ -56,7 +56,7 @@ paddle::framework::FetchList StandaloneExecutor::Run(
...
@@ -56,7 +56,7 @@ paddle::framework::FetchList StandaloneExecutor::Run(
const
CostInfo
&
StandaloneExecutor
::
DryRun
(
const
CostInfo
&
StandaloneExecutor
::
DryRun
(
const
std
::
vector
<
std
::
string
>&
feed_names
,
const
std
::
vector
<
std
::
string
>&
feed_names
,
const
std
::
vector
<
framework
::
Tensor
>&
feed_tensors
)
{
const
std
::
vector
<
framework
::
LoD
Tensor
>&
feed_tensors
)
{
auto
core
=
GetInterpreterCore
(
feed_names
,
{});
auto
core
=
GetInterpreterCore
(
feed_names
,
{});
auto
&
cost_info
=
core
->
DryRun
(
feed_tensors
);
auto
&
cost_info
=
core
->
DryRun
(
feed_tensors
);
...
...
paddle/fluid/framework/new_executor/standalone_executor.h
浏览文件 @
b5af9575
...
@@ -28,7 +28,7 @@ class ExecutorBase {
...
@@ -28,7 +28,7 @@ class ExecutorBase {
virtual
~
ExecutorBase
()
{}
virtual
~
ExecutorBase
()
{}
virtual
paddle
::
framework
::
FetchList
Run
(
virtual
paddle
::
framework
::
FetchList
Run
(
const
std
::
vector
<
std
::
string
>&
feed_names
,
const
std
::
vector
<
std
::
string
>&
feed_names
,
const
std
::
vector
<
framework
::
Tensor
>&
feed_tensors
,
const
std
::
vector
<
framework
::
LoD
Tensor
>&
feed_tensors
,
const
std
::
vector
<
std
::
string
>&
fetch_names
)
=
0
;
const
std
::
vector
<
std
::
string
>&
fetch_names
)
=
0
;
};
};
...
@@ -42,11 +42,11 @@ class StandaloneExecutor : public ExecutorBase {
...
@@ -42,11 +42,11 @@ class StandaloneExecutor : public ExecutorBase {
virtual
paddle
::
framework
::
FetchList
Run
(
virtual
paddle
::
framework
::
FetchList
Run
(
const
std
::
vector
<
std
::
string
>&
feed_names
,
const
std
::
vector
<
std
::
string
>&
feed_names
,
const
std
::
vector
<
framework
::
Tensor
>&
feed_tensors
,
const
std
::
vector
<
framework
::
LoD
Tensor
>&
feed_tensors
,
const
std
::
vector
<
std
::
string
>&
fetch_names
);
const
std
::
vector
<
std
::
string
>&
fetch_names
);
const
CostInfo
&
DryRun
(
const
std
::
vector
<
std
::
string
>&
feed_names
,
const
CostInfo
&
DryRun
(
const
std
::
vector
<
std
::
string
>&
feed_names
,
const
std
::
vector
<
framework
::
Tensor
>&
feed_tensors
);
const
std
::
vector
<
framework
::
LoD
Tensor
>&
feed_tensors
);
private:
private:
void
BuildVariableOuterScope
(
const
framework
::
ProgramDesc
&
pdesc
,
void
BuildVariableOuterScope
(
const
framework
::
ProgramDesc
&
pdesc
,
...
...
paddle/fluid/operators/controlflow/fetch_v2_op.cc
浏览文件 @
b5af9575
...
@@ -128,9 +128,12 @@ class FetchV2Kernel {
...
@@ -128,9 +128,12 @@ class FetchV2Kernel {
if
(
fetch_var
->
IsType
<
framework
::
LoDTensor
>
())
{
if
(
fetch_var
->
IsType
<
framework
::
LoDTensor
>
())
{
auto
&
src_item
=
fetch_var
->
Get
<
framework
::
LoDTensor
>
();
auto
&
src_item
=
fetch_var
->
Get
<
framework
::
LoDTensor
>
();
auto
*
dst_item
=
&
(
BOOST_GET
(
framework
::
LoDTensor
,
fetch_list
->
at
(
col
)));
auto
*
dst_item
=
&
(
BOOST_GET
(
framework
::
LoDTensor
,
fetch_list
->
at
(
col
)));
PADDLE_ENFORCE_EQ
(
platform
::
is_cpu_place
(
src_item
.
place
()),
true
,
bool
check_place
=
platform
::
is_cpu_place
(
src_item
.
place
())
||
platform
::
errors
::
InvalidArgument
(
platform
::
is_cuda_pinned_place
(
src_item
.
place
());
"Tensor's place of input(X) must be CPUPlace."
));
PADDLE_ENFORCE_EQ
(
check_place
,
true
,
platform
::
errors
::
InvalidArgument
(
"Tensor's place of input(X) must "
"be CPUPlace or CUDAPinnedPlace."
));
if
(
deepcopy
)
{
if
(
deepcopy
)
{
DeepCopy
(
src_item
,
fetch_var_name
,
dst_item
);
DeepCopy
(
src_item
,
fetch_var_name
,
dst_item
);
}
else
{
}
else
{
...
@@ -188,8 +191,11 @@ REGISTER_OPERATOR(
...
@@ -188,8 +191,11 @@ REGISTER_OPERATOR(
paddle
::
framework
::
EmptyGradOpMaker
<
paddle
::
framework
::
OpDesc
>
,
paddle
::
framework
::
EmptyGradOpMaker
<
paddle
::
framework
::
OpDesc
>
,
paddle
::
framework
::
EmptyGradOpMaker
<
paddle
::
imperative
::
OpBase
>
);
paddle
::
framework
::
EmptyGradOpMaker
<
paddle
::
imperative
::
OpBase
>
);
REGISTER_OP_CPU_KERNEL_FUNCTOR
(
fetch_v2
,
float
,
ops
::
FetchV2Kernel
,
double
,
REGISTER_OP_CPU_KERNEL_FUNCTOR
(
ops
::
FetchV2Kernel
,
int
,
ops
::
FetchV2Kernel
,
fetch_v2
,
float
,
ops
::
FetchV2Kernel
,
double
,
ops
::
FetchV2Kernel
,
int8_t
,
int64_t
,
ops
::
FetchV2Kernel
,
bool
,
ops
::
FetchV2Kernel
,
uint8_t
,
ops
::
FetchV2Kernel
,
int
,
ops
::
FetchV2Kernel
,
ops
::
FetchV2Kernel
,
plat
::
float16
,
int64_t
,
ops
::
FetchV2Kernel
,
bool
,
ops
::
FetchV2Kernel
,
ops
::
FetchV2Kernel
);
paddle
::
platform
::
bfloat16
,
ops
::
FetchV2Kernel
,
paddle
::
platform
::
complex
<
float
>
,
ops
::
FetchV2Kernel
,
paddle
::
platform
::
complex
<
double
>
,
ops
::
FetchV2Kernel
,
plat
::
float16
,
ops
::
FetchV2Kernel
,
int16_t
,
ops
::
FetchV2Kernel
);
paddle/fluid/operators/memcpy_d2h_op.cc
浏览文件 @
b5af9575
...
@@ -125,24 +125,33 @@ REGISTER_OPERATOR(
...
@@ -125,24 +125,33 @@ REGISTER_OPERATOR(
paddle
::
framework
::
EmptyGradOpMaker
<
paddle
::
framework
::
OpDesc
>
,
paddle
::
framework
::
EmptyGradOpMaker
<
paddle
::
framework
::
OpDesc
>
,
paddle
::
framework
::
EmptyGradOpMaker
<
paddle
::
imperative
::
OpBase
>
);
paddle
::
framework
::
EmptyGradOpMaker
<
paddle
::
imperative
::
OpBase
>
);
REGISTER_OP_CPU_KERNEL_FUNCTOR
(
memcpy_d2h
,
float
,
ops
::
MemcpyD2HKernel
,
double
,
REGISTER_OP_CPU_KERNEL_FUNCTOR
(
ops
::
MemcpyD2HKernel
,
int
,
ops
::
MemcpyD2HKernel
,
memcpy_d2h
,
float
,
ops
::
MemcpyD2HKernel
,
double
,
ops
::
MemcpyD2HKernel
,
int64_t
,
ops
::
MemcpyD2HKernel
,
bool
,
int8_t
,
ops
::
MemcpyD2HKernel
,
uint8_t
,
ops
::
MemcpyD2HKernel
,
int
,
ops
::
MemcpyD2HKernel
,
plat
::
float16
,
ops
::
MemcpyD2HKernel
,
int64_t
,
ops
::
MemcpyD2HKernel
,
bool
,
ops
::
MemcpyD2HKernel
);
ops
::
MemcpyD2HKernel
,
paddle
::
platform
::
bfloat16
,
ops
::
MemcpyD2HKernel
,
paddle
::
platform
::
complex
<
float
>
,
ops
::
MemcpyD2HKernel
,
paddle
::
platform
::
complex
<
double
>
,
ops
::
MemcpyD2HKernel
,
plat
::
float16
,
ops
::
MemcpyD2HKernel
,
int16_t
,
ops
::
MemcpyD2HKernel
);
#if defined(PADDLE_WITH_CUDA) || defined(PADDLE_WITH_HIP)
#if defined(PADDLE_WITH_CUDA) || defined(PADDLE_WITH_HIP)
REGISTER_OP_CUDA_KERNEL_FUNCTOR
(
memcpy_d2h
,
float
,
ops
::
MemcpyD2HKernel
,
double
,
REGISTER_OP_CUDA_KERNEL_FUNCTOR
(
ops
::
MemcpyD2HKernel
,
int
,
ops
::
MemcpyD2HKernel
,
memcpy_d2h
,
float
,
ops
::
MemcpyD2HKernel
,
double
,
ops
::
MemcpyD2HKernel
,
int64_t
,
ops
::
MemcpyD2HKernel
,
bool
,
int8_t
,
ops
::
MemcpyD2HKernel
,
uint8_t
,
ops
::
MemcpyD2HKernel
,
int
,
ops
::
MemcpyD2HKernel
,
plat
::
float16
,
ops
::
MemcpyD2HKernel
,
int64_t
,
ops
::
MemcpyD2HKernel
,
bool
,
ops
::
MemcpyD2HKernel
);
ops
::
MemcpyD2HKernel
,
paddle
::
platform
::
bfloat16
,
ops
::
MemcpyD2HKernel
,
paddle
::
platform
::
complex
<
float
>
,
ops
::
MemcpyD2HKernel
,
paddle
::
platform
::
complex
<
double
>
,
ops
::
MemcpyD2HKernel
,
plat
::
float16
,
ops
::
MemcpyD2HKernel
,
int16_t
,
ops
::
MemcpyD2HKernel
);
#endif
#endif
#ifdef PADDLE_WITH_ASCEND_CL
#ifdef PADDLE_WITH_ASCEND_CL
REGISTER_OP_NPU_KERNEL_FUNCTOR
(
memcpy_d2h
,
float
,
ops
::
MemcpyD2HKernel
,
double
,
REGISTER_OP_NPU_KERNEL_FUNCTOR
(
ops
::
MemcpyD2HKernel
,
int
,
ops
::
MemcpyD2HKernel
,
memcpy_d2h
,
float
,
ops
::
MemcpyD2HKernel
,
double
,
ops
::
MemcpyD2HKernel
,
int64_t
,
ops
::
MemcpyD2HKernel
,
bool
,
int8_t
,
ops
::
MemcpyD2HKernel
,
uint8_t
,
ops
::
MemcpyD2HKernel
,
int
,
ops
::
MemcpyD2HKernel
,
plat
::
float16
,
ops
::
MemcpyD2HKernel
,
int64_t
,
ops
::
MemcpyD2HKernel
,
bool
,
ops
::
MemcpyD2HKernel
);
ops
::
MemcpyD2HKernel
,
paddle
::
platform
::
bfloat16
,
ops
::
MemcpyD2HKernel
,
paddle
::
platform
::
complex
<
float
>
,
ops
::
MemcpyD2HKernel
,
paddle
::
platform
::
complex
<
double
>
,
ops
::
MemcpyD2HKernel
,
plat
::
float16
,
ops
::
MemcpyD2HKernel
,
int16_t
,
ops
::
MemcpyD2HKernel
);
#endif
#endif
paddle/fluid/operators/memcpy_h2d_op.cc
浏览文件 @
b5af9575
...
@@ -125,24 +125,33 @@ REGISTER_OPERATOR(
...
@@ -125,24 +125,33 @@ REGISTER_OPERATOR(
paddle
::
framework
::
EmptyGradOpMaker
<
paddle
::
framework
::
OpDesc
>
,
paddle
::
framework
::
EmptyGradOpMaker
<
paddle
::
framework
::
OpDesc
>
,
paddle
::
framework
::
EmptyGradOpMaker
<
paddle
::
imperative
::
OpBase
>
);
paddle
::
framework
::
EmptyGradOpMaker
<
paddle
::
imperative
::
OpBase
>
);
REGISTER_OP_CPU_KERNEL_FUNCTOR
(
memcpy_h2d
,
float
,
ops
::
MemcpyH2DKernel
,
double
,
REGISTER_OP_CPU_KERNEL_FUNCTOR
(
ops
::
MemcpyH2DKernel
,
int
,
ops
::
MemcpyH2DKernel
,
memcpy_h2d
,
float
,
ops
::
MemcpyH2DKernel
,
double
,
ops
::
MemcpyH2DKernel
,
int64_t
,
ops
::
MemcpyH2DKernel
,
bool
,
int8_t
,
ops
::
MemcpyH2DKernel
,
uint8_t
,
ops
::
MemcpyH2DKernel
,
int
,
ops
::
MemcpyH2DKernel
,
plat
::
float16
,
ops
::
MemcpyH2DKernel
,
int64_t
,
ops
::
MemcpyH2DKernel
,
bool
,
ops
::
MemcpyH2DKernel
);
ops
::
MemcpyH2DKernel
,
paddle
::
platform
::
bfloat16
,
ops
::
MemcpyH2DKernel
,
paddle
::
platform
::
complex
<
float
>
,
ops
::
MemcpyH2DKernel
,
paddle
::
platform
::
complex
<
double
>
,
ops
::
MemcpyH2DKernel
,
plat
::
float16
,
ops
::
MemcpyH2DKernel
,
int16_t
,
ops
::
MemcpyH2DKernel
);
#if defined(PADDLE_WITH_CUDA) || defined(PADDLE_WITH_HIP)
#if defined(PADDLE_WITH_CUDA) || defined(PADDLE_WITH_HIP)
REGISTER_OP_CUDA_KERNEL_FUNCTOR
(
memcpy_h2d
,
float
,
ops
::
MemcpyH2DKernel
,
double
,
REGISTER_OP_CUDA_KERNEL_FUNCTOR
(
ops
::
MemcpyH2DKernel
,
int
,
ops
::
MemcpyH2DKernel
,
memcpy_h2d
,
float
,
ops
::
MemcpyH2DKernel
,
double
,
ops
::
MemcpyH2DKernel
,
int64_t
,
ops
::
MemcpyH2DKernel
,
bool
,
int8_t
,
ops
::
MemcpyH2DKernel
,
uint8_t
,
ops
::
MemcpyH2DKernel
,
int
,
ops
::
MemcpyH2DKernel
,
plat
::
float16
,
ops
::
MemcpyH2DKernel
,
int64_t
,
ops
::
MemcpyH2DKernel
,
bool
,
ops
::
MemcpyH2DKernel
);
ops
::
MemcpyH2DKernel
,
paddle
::
platform
::
bfloat16
,
ops
::
MemcpyH2DKernel
,
paddle
::
platform
::
complex
<
float
>
,
ops
::
MemcpyH2DKernel
,
paddle
::
platform
::
complex
<
double
>
,
ops
::
MemcpyH2DKernel
,
plat
::
float16
,
ops
::
MemcpyH2DKernel
,
int16_t
,
ops
::
MemcpyH2DKernel
);
#endif
#endif
#ifdef PADDLE_WITH_ASCEND_CL
#ifdef PADDLE_WITH_ASCEND_CL
REGISTER_OP_NPU_KERNEL_FUNCTOR
(
memcpy_h2d
,
float
,
ops
::
MemcpyH2DKernel
,
double
,
REGISTER_OP_NPU_KERNEL_FUNCTOR
(
ops
::
MemcpyH2DKernel
,
int
,
ops
::
MemcpyH2DKernel
,
memcpy_h2d
,
float
,
ops
::
MemcpyH2DKernel
,
double
,
ops
::
MemcpyH2DKernel
,
int64_t
,
ops
::
MemcpyH2DKernel
,
bool
,
int8_t
,
ops
::
MemcpyH2DKernel
,
uint8_t
,
ops
::
MemcpyH2DKernel
,
int
,
ops
::
MemcpyH2DKernel
,
plat
::
float16
,
ops
::
MemcpyH2DKernel
,
int64_t
,
ops
::
MemcpyH2DKernel
,
bool
,
ops
::
MemcpyH2DKernel
);
ops
::
MemcpyH2DKernel
,
paddle
::
platform
::
bfloat16
,
ops
::
MemcpyH2DKernel
,
paddle
::
platform
::
complex
<
float
>
,
ops
::
MemcpyH2DKernel
,
paddle
::
platform
::
complex
<
double
>
,
ops
::
MemcpyH2DKernel
,
plat
::
float16
,
ops
::
MemcpyH2DKernel
,
int16_t
,
ops
::
MemcpyH2DKernel
);
#endif
#endif
paddle/fluid/pybind/pybind.cc
浏览文件 @
b5af9575
...
@@ -2046,7 +2046,7 @@ All parameter, weight, gradient are variables in Paddle.
...
@@ -2046,7 +2046,7 @@ All parameter, weight, gradient are variables in Paddle.
[](
StandaloneExecutor
&
self
,
[](
StandaloneExecutor
&
self
,
const
std
::
unordered_map
<
std
::
string
,
py
::
array
>
&
input_dict
,
const
std
::
unordered_map
<
std
::
string
,
py
::
array
>
&
input_dict
,
std
::
vector
<
std
::
string
>
fetch_names
)
{
std
::
vector
<
std
::
string
>
fetch_names
)
{
std
::
vector
<
framework
::
Tensor
>
feed_tensors
;
std
::
vector
<
framework
::
LoD
Tensor
>
feed_tensors
;
std
::
vector
<
std
::
string
>
feed_names
;
std
::
vector
<
std
::
string
>
feed_names
;
for
(
auto
&
item
:
input_dict
)
{
for
(
auto
&
item
:
input_dict
)
{
...
@@ -2066,10 +2066,10 @@ All parameter, weight, gradient are variables in Paddle.
...
@@ -2066,10 +2066,10 @@ All parameter, weight, gradient are variables in Paddle.
})
})
.
def
(
"run"
,
.
def
(
"run"
,
[](
StandaloneExecutor
&
self
,
[](
StandaloneExecutor
&
self
,
const
std
::
unordered_map
<
std
::
string
,
framework
::
Tensor
>
const
std
::
unordered_map
<
std
::
string
,
framework
::
LoD
Tensor
>
&
input_dict
,
&
input_dict
,
std
::
vector
<
std
::
string
>
fetch_names
)
{
std
::
vector
<
std
::
string
>
fetch_names
)
{
std
::
vector
<
framework
::
Tensor
>
feed_tensors
;
std
::
vector
<
framework
::
LoD
Tensor
>
feed_tensors
;
std
::
vector
<
std
::
string
>
feed_names
;
std
::
vector
<
std
::
string
>
feed_names
;
for
(
auto
&
item
:
input_dict
)
{
for
(
auto
&
item
:
input_dict
)
{
...
@@ -2087,7 +2087,7 @@ All parameter, weight, gradient are variables in Paddle.
...
@@ -2087,7 +2087,7 @@ All parameter, weight, gradient are variables in Paddle.
.
def
(
"dry_run"
,
.
def
(
"dry_run"
,
[](
StandaloneExecutor
&
self
,
[](
StandaloneExecutor
&
self
,
const
std
::
unordered_map
<
std
::
string
,
py
::
array
>
&
input_dict
)
{
const
std
::
unordered_map
<
std
::
string
,
py
::
array
>
&
input_dict
)
{
std
::
vector
<
framework
::
Tensor
>
feed_tensors
;
std
::
vector
<
framework
::
LoD
Tensor
>
feed_tensors
;
std
::
vector
<
std
::
string
>
feed_names
;
std
::
vector
<
std
::
string
>
feed_names
;
for
(
auto
&
item
:
input_dict
)
{
for
(
auto
&
item
:
input_dict
)
{
...
...
python/paddle/fluid/executor.py
浏览文件 @
b5af9575
...
@@ -485,10 +485,11 @@ handler = FetchHandlerExample(var_dict=var_dict)
...
@@ -485,10 +485,11 @@ handler = FetchHandlerExample(var_dict=var_dict)
class
_StandaloneExecutor
(
object
):
class
_StandaloneExecutor
(
object
):
def
__init__
(
self
,
place
,
main_program
):
def
__init__
(
self
,
place
,
main_program
,
scope
):
self
.
_place
=
core
.
Place
()
self
.
_place
=
core
.
Place
()
self
.
_place
.
set_place
(
place
)
self
.
_place
.
set_place
(
place
)
self
.
_main_program
=
main_program
self
.
_main_program
=
main_program
self
.
_scope
=
scope
self
.
_new_exe
=
self
.
_create_new_executor
()
self
.
_new_exe
=
self
.
_create_new_executor
()
def
run
(
self
,
feed
,
fetch_list
,
return_numpy
=
True
):
def
run
(
self
,
feed
,
fetch_list
,
return_numpy
=
True
):
...
@@ -522,9 +523,8 @@ class _StandaloneExecutor(object):
...
@@ -522,9 +523,8 @@ class _StandaloneExecutor(object):
def
_create_new_executor
(
self
):
def
_create_new_executor
(
self
):
# NOTE: It's a trick to set empty start_up program.
# NOTE: It's a trick to set empty start_up program.
startup_program
=
Program
()
startup_program
=
Program
()
outer_scope
=
global_scope
()
new_exe
=
core
.
StandaloneExecutor
(
self
.
_place
,
startup_program
.
desc
,
new_exe
=
core
.
StandaloneExecutor
(
self
.
_place
,
startup_program
.
desc
,
self
.
_main_program
.
desc
,
outer
_scope
)
self
.
_main_program
.
desc
,
self
.
_scope
)
return
new_exe
return
new_exe
...
@@ -585,11 +585,11 @@ class _ExecutorCache(object):
...
@@ -585,11 +585,11 @@ class _ExecutorCache(object):
self
.
_place
=
place
self
.
_place
=
place
self
.
_cached_executors
=
{}
self
.
_cached_executors
=
{}
def
run
(
self
,
program
,
feed
,
fetch_list
,
return_numpy
=
True
):
def
run
(
self
,
program
,
scope
,
feed
,
fetch_list
,
return_numpy
=
True
):
new_exe
=
self
.
_get_exe_from_cache
(
program
)
new_exe
=
self
.
_get_exe_from_cache
(
program
,
scope
)
return
new_exe
.
run
(
feed
,
fetch_list
,
return_numpy
)
return
new_exe
.
run
(
feed
,
fetch_list
,
return_numpy
)
def
_get_exe_from_cache
(
self
,
program
):
def
_get_exe_from_cache
(
self
,
program
,
scope
):
"""
"""
Return cached _StandaloneExecutor instance. If not found, create associated
Return cached _StandaloneExecutor instance. If not found, create associated
_StandaloneExecutor instance with given program and cache it.
_StandaloneExecutor instance with given program and cache it.
...
@@ -598,7 +598,7 @@ class _ExecutorCache(object):
...
@@ -598,7 +598,7 @@ class _ExecutorCache(object):
program
,
Program
),
"Required type(Program), but received {}"
.
format
(
program
,
Program
),
"Required type(Program), but received {}"
.
format
(
type
(
program
).
__name__
)
type
(
program
).
__name__
)
if
program
not
in
self
.
_cached_executors
:
if
program
not
in
self
.
_cached_executors
:
new_exe
=
_StandaloneExecutor
(
self
.
_place
,
program
)
new_exe
=
_StandaloneExecutor
(
self
.
_place
,
program
,
scope
)
self
.
_cached_executors
[
program
]
=
new_exe
self
.
_cached_executors
[
program
]
=
new_exe
return
self
.
_cached_executors
[
program
]
return
self
.
_cached_executors
[
program
]
...
@@ -1297,7 +1297,7 @@ class Executor(object):
...
@@ -1297,7 +1297,7 @@ class Executor(object):
# NOTE: This is an experimental feature. If `export FLAGS_USE_STANDALONE_EXECUTOR=1 `,
# NOTE: This is an experimental feature. If `export FLAGS_USE_STANDALONE_EXECUTOR=1 `,
# use StandaloneExecutor to run the program.
# use StandaloneExecutor to run the program.
if
self
.
_enable_interpreter_core
and
not
program
.
_is_start_up_program_
:
if
self
.
_enable_interpreter_core
and
not
program
.
_is_start_up_program_
:
return
self
.
_executor_cache
.
run
(
program
,
feed
,
fetch_list
,
return
self
.
_executor_cache
.
run
(
program
,
scope
,
feed
,
fetch_list
,
return_numpy
)
return_numpy
)
# use_prune can be overrided by putting optimize_ops in fetch_list
# use_prune can be overrided by putting optimize_ops in fetch_list
...
...
编辑
预览
Markdown
is supported
0%
请重试
或
添加新附件
.
添加附件
取消
You are about to add
0
people
to the discussion. Proceed with caution.
先完成此消息的编辑!
取消
想要评论请
注册
或
登录