Skip to content
体验新版
项目
组织
正在加载...
登录
切换导航
打开侧边栏
PaddlePaddle
Paddle
提交
39278731
P
Paddle
项目概览
PaddlePaddle
/
Paddle
大约 2 年 前同步成功
通知
2325
Star
20933
Fork
5424
代码
文件
提交
分支
Tags
贡献者
分支图
Diff
Issue
1423
列表
看板
标记
里程碑
合并请求
543
Wiki
0
Wiki
分析
仓库
DevOps
项目成员
Pages
P
Paddle
项目概览
项目概览
详情
发布
仓库
仓库
文件
提交
分支
标签
贡献者
分支图
比较
Issue
1,423
Issue
1,423
列表
看板
标记
里程碑
合并请求
543
合并请求
543
Pages
分析
分析
仓库分析
DevOps
Wiki
0
Wiki
成员
成员
收起侧边栏
关闭侧边栏
动态
分支图
创建新Issue
提交
Issue看板
未验证
提交
39278731
编写于
4月 07, 2023
作者:
K
kangguangli
提交者:
GitHub
4月 07, 2023
浏览文件
操作
浏览文件
下载
电子邮件补丁
差异文件
[Executor] remove run_program branch (#52471)
* remove run_program * remove FLAGS_USE_STANDALONE_EXECUTOR
上级
47c740e7
变更
16
隐藏空白更改
内联
并排
Showing
16 changed file
with
122 addition
and
284 deletion
+122
-284
python/paddle/distributed/auto_parallel/tuner/optimization_tuner.py
...dle/distributed/auto_parallel/tuner/optimization_tuner.py
+1
-3
python/paddle/distributed/passes/auto_parallel_data_parallel_optimization.py
...ibuted/passes/auto_parallel_data_parallel_optimization.py
+3
-6
python/paddle/distributed/passes/auto_parallel_grad_clip.py
python/paddle/distributed/passes/auto_parallel_grad_clip.py
+1
-5
python/paddle/distributed/passes/auto_parallel_sharding.py
python/paddle/distributed/passes/auto_parallel_sharding.py
+1
-2
python/paddle/distributed/passes/auto_parallel_supplement_explicit_dependencies.py
.../passes/auto_parallel_supplement_explicit_dependencies.py
+1
-4
python/paddle/fluid/executor.py
python/paddle/fluid/executor.py
+1
-133
python/paddle/fluid/framework.py
python/paddle/fluid/framework.py
+1
-14
python/paddle/fluid/tests/unittests/CMakeLists.txt
python/paddle/fluid/tests/unittests/CMakeLists.txt
+0
-19
python/paddle/fluid/tests/unittests/ir/inference/CMakeLists.txt
.../paddle/fluid/tests/unittests/ir/inference/CMakeLists.txt
+1
-3
python/paddle/fluid/tests/unittests/test_eager_run_program.py
...on/paddle/fluid/tests/unittests/test_eager_run_program.py
+1
-8
python/paddle/fluid/tests/unittests/test_run_program_op.py
python/paddle/fluid/tests/unittests/test_run_program_op.py
+2
-12
python/paddle/jit/translated_layer.py
python/paddle/jit/translated_layer.py
+1
-8
test/custom_op/CMakeLists.txt
test/custom_op/CMakeLists.txt
+0
-9
test/standalone_executor/test_standalone_controlflow.py
test/standalone_executor/test_standalone_controlflow.py
+42
-9
test/standalone_executor/test_standalone_executor.py
test/standalone_executor/test_standalone_executor.py
+58
-49
test/standalone_executor/test_standalone_multiply_write.py
test/standalone_executor/test_standalone_multiply_write.py
+8
-0
未找到文件。
python/paddle/distributed/auto_parallel/tuner/optimization_tuner.py
浏览文件 @
39278731
...
@@ -473,9 +473,7 @@ class OptimizationTuner:
...
@@ -473,9 +473,7 @@ class OptimizationTuner:
parent_env
=
copy
.
copy
(
os
.
environ
.
copy
())
parent_env
=
copy
.
copy
(
os
.
environ
.
copy
())
# env flags need for profile
# env flags need for profile
new_env
=
{
new_env
=
{}
"FLAGS_USE_STANDALONE_EXECUTOR"
:
"False"
,
}
new_env
.
update
(
parent_env
)
new_env
.
update
(
parent_env
)
# TODO if any rank hang or fail, kill all processes
# TODO if any rank hang or fail, kill all processes
...
...
python/paddle/distributed/passes/auto_parallel_data_parallel_optimization.py
浏览文件 @
39278731
...
@@ -34,7 +34,6 @@ from paddle.distributed.auto_parallel.utils import (
...
@@ -34,7 +34,6 @@ from paddle.distributed.auto_parallel.utils import (
ring_id_to_process_group
,
ring_id_to_process_group
,
)
)
from
paddle.distributed.fleet.meta_optimizers.common
import
OP_ROLE_KEY
,
OpRole
from
paddle.distributed.fleet.meta_optimizers.common
import
OP_ROLE_KEY
,
OpRole
from
paddle.fluid.executor
import
_is_enable_standalone_executor
from
paddle.static
import
default_main_program
from
paddle.static
import
default_main_program
from
paddle.utils
import
unique_name
from
paddle.utils
import
unique_name
...
@@ -97,8 +96,7 @@ class DataParallelOptimizationPass(PassBase):
...
@@ -97,8 +96,7 @@ class DataParallelOptimizationPass(PassBase):
self
.
global_rank
=
int
(
self
.
get_attr
(
"global_rank"
))
self
.
global_rank
=
int
(
self
.
get_attr
(
"global_rank"
))
self
.
use_sharding
=
self
.
get_attr
(
"use_sharding"
)
self
.
use_sharding
=
self
.
get_attr
(
"use_sharding"
)
self
.
coalesce_prefix
=
'coalesce_grad'
self
.
coalesce_prefix
=
'coalesce_grad'
if
_is_enable_standalone_executor
():
self
.
gradient_sync_stream
=
"gradient_sync_stream"
self
.
gradient_sync_stream
=
"gradient_sync_stream"
with
paddle
.
static
.
program_guard
(
main_program
,
startup_program
):
with
paddle
.
static
.
program_guard
(
main_program
,
startup_program
):
self
.
_analyze_program
()
self
.
_analyze_program
()
...
@@ -316,8 +314,7 @@ class DataParallelOptimizationPass(PassBase):
...
@@ -316,8 +314,7 @@ class DataParallelOptimizationPass(PassBase):
def
_calc_wait_comms
(
self
):
def
_calc_wait_comms
(
self
):
if
_is_enable_standalone_executor
():
return
return
block
=
default_main_program
().
global_block
()
block
=
default_main_program
().
global_block
()
...
@@ -602,7 +599,7 @@ class DataParallelOptimizationPass(PassBase):
...
@@ -602,7 +599,7 @@ class DataParallelOptimizationPass(PassBase):
# multiple stream executor(standalone exe). This function just for standalone exe. Refactor here
# multiple stream executor(standalone exe). This function just for standalone exe. Refactor here
# in future when only one executor stay.
# in future when only one executor stay.
if
not
_is_enable_standalone_executor
()
or
len
(
grad_groups
)
==
0
:
if
len
(
grad_groups
)
==
0
:
return
return
block
=
default_main_program
().
global_block
()
block
=
default_main_program
().
global_block
()
...
...
python/paddle/distributed/passes/auto_parallel_grad_clip.py
浏览文件 @
39278731
...
@@ -18,7 +18,6 @@ import numpy as np
...
@@ -18,7 +18,6 @@ import numpy as np
import
paddle
import
paddle
from
paddle.distributed.fleet.meta_optimizers.common
import
OP_ROLE_KEY
,
OpRole
from
paddle.distributed.fleet.meta_optimizers.common
import
OP_ROLE_KEY
,
OpRole
from
paddle.fluid.executor
import
_is_enable_standalone_executor
from
..auto_parallel.dist_attribute
import
OperatorDistAttr
,
TensorDistAttr
from
..auto_parallel.dist_attribute
import
OperatorDistAttr
,
TensorDistAttr
from
..auto_parallel.operators.common
import
(
from
..auto_parallel.operators.common
import
(
...
@@ -460,10 +459,7 @@ class ClipGradByGloblNormPass(PassBase):
...
@@ -460,10 +459,7 @@ class ClipGradByGloblNormPass(PassBase):
)
)
self
.
clip_helper
.
_init_dist_attr
(
allreduce_op
)
self
.
clip_helper
.
_init_dist_attr
(
allreduce_op
)
if
(
if
insert_leaf_fill_constant_node
:
_is_enable_standalone_executor
()
and
insert_leaf_fill_constant_node
):
# NOTE add naive deps for global norm sync in graph exe
# NOTE add naive deps for global norm sync in graph exe
j
=
idx
-
1
j
=
idx
-
1
...
...
python/paddle/distributed/passes/auto_parallel_sharding.py
浏览文件 @
39278731
...
@@ -35,7 +35,6 @@ from paddle.distributed.auto_parallel.utils import (
...
@@ -35,7 +35,6 @@ from paddle.distributed.auto_parallel.utils import (
set_var_dist_attr
,
set_var_dist_attr
,
)
)
from
paddle.distributed.fleet.meta_optimizers.sharding.utils
import
get_var_size
from
paddle.distributed.fleet.meta_optimizers.sharding.utils
import
get_var_size
from
paddle.fluid.executor
import
_is_enable_standalone_executor
from
paddle.framework
import
core
from
paddle.framework
import
core
from
paddle.static
import
default_main_program
,
default_startup_program
from
paddle.static
import
default_main_program
,
default_startup_program
from
paddle.utils
import
unique_name
from
paddle.utils
import
unique_name
...
@@ -1168,7 +1167,7 @@ class ShardingPass(PassBase):
...
@@ -1168,7 +1167,7 @@ class ShardingPass(PassBase):
P.S. this overlap pass is ONLY adapted for standalone executor (graph based) and stream awared allocator.
P.S. this overlap pass is ONLY adapted for standalone executor (graph based) and stream awared allocator.
"""
"""
if
not
_is_enable_standalone_executor
()
or
(
not
self
.
enable_overlap
)
:
if
not
self
.
enable_overlap
:
return
return
self
.
grad_comm_group_stream_pairs
=
[]
self
.
grad_comm_group_stream_pairs
=
[]
...
...
python/paddle/distributed/passes/auto_parallel_supplement_explicit_dependencies.py
浏览文件 @
39278731
...
@@ -21,7 +21,6 @@ from paddle.distributed.auto_parallel.utils import (
...
@@ -21,7 +21,6 @@ from paddle.distributed.auto_parallel.utils import (
OpRole
,
OpRole
,
insert_dependencies_for_vars
,
insert_dependencies_for_vars
,
)
)
from
paddle.fluid.executor
import
_is_enable_standalone_executor
from
.auto_parallel_sharding
import
ShardingPass
,
_supported_optimizer_type
from
.auto_parallel_sharding
import
ShardingPass
,
_supported_optimizer_type
from
.pass_base
import
PassBase
,
register_pass
from
.pass_base
import
PassBase
,
register_pass
...
@@ -70,9 +69,7 @@ class AutoParalSupplementDepPass(PassBase):
...
@@ -70,9 +69,7 @@ class AutoParalSupplementDepPass(PassBase):
def
_apply_single_impl
(
self
,
main_program
,
startup_program
,
context
):
def
_apply_single_impl
(
self
,
main_program
,
startup_program
,
context
):
# TODO general this pass for all case.
# TODO general this pass for all case.
if
not
_is_enable_standalone_executor
or
not
_sharding_pass_applied
(
if
not
_sharding_pass_applied
(
context
):
context
):
return
return
self
.
_dist_context
=
self
.
get_attr
(
"dist_context"
,
None
)
self
.
_dist_context
=
self
.
get_attr
(
"dist_context"
,
None
)
...
...
python/paddle/fluid/executor.py
浏览文件 @
39278731
...
@@ -493,14 +493,6 @@ def _to_name_str(var):
...
@@ -493,14 +493,6 @@ def _to_name_str(var):
return
_to_str
(
var
)
return
_to_str
(
var
)
def
_is_enable_standalone_executor
():
return
(
framework
.
_enable_standalone_executor_
is
None
or
framework
.
_enable_standalone_executor_
in
[
1
,
'1'
,
True
,
'True'
,
'true'
]
)
def
_is_dy2st_enable_standalone_executor
():
def
_is_dy2st_enable_standalone_executor
():
return
framework
.
_dy2st_enable_standalone_executor_
in
[
return
framework
.
_dy2st_enable_standalone_executor_
in
[
1
,
1
,
...
@@ -1004,8 +996,6 @@ class Executor:
...
@@ -1004,8 +996,6 @@ class Executor:
"__auto_checkpoint_executor__"
"__auto_checkpoint_executor__"
)
)
# NOTE: Whether to use experimental executor `StandaloneExecutor`.
self
.
_enable_interpreter_core
=
_is_enable_standalone_executor
()
self
.
_executor_cache
=
_ExecutorCache
()
self
.
_executor_cache
=
_ExecutorCache
()
self
.
_fleet_executor
=
None
self
.
_fleet_executor
=
None
...
@@ -1605,9 +1595,7 @@ class Executor:
...
@@ -1605,9 +1595,7 @@ class Executor:
return
True
return
True
if
self
.
_enable_interpreter_core
and
_can_use_interpreter_core
(
if
_can_use_interpreter_core
(
program
,
self
.
place
):
program
,
self
.
place
):
if
feed
is
None
:
if
feed
is
None
:
feed
=
{}
feed
=
{}
...
@@ -1685,132 +1673,12 @@ class Executor:
...
@@ -1685,132 +1673,12 @@ class Executor:
acp
.
_auto_checkpoint
(
self
,
program
)
acp
.
_auto_checkpoint
(
self
,
program
)
# For backward compatibility, run directly.
if
not
compiled
:
return
self
.
_run_program
(
program
,
feed
=
feed
,
fetch_list
=
fetch_list
,
feed_var_name
=
feed_var_name
,
fetch_var_name
=
fetch_var_name
,
scope
=
scope
,
return_numpy
=
return_numpy
,
use_program_cache
=
use_program_cache
,
)
program
.
_compile
(
scope
,
self
.
place
)
program
.
_compile
(
scope
,
self
.
place
)
assert
(
assert
(
program
.
_is_inference
program
.
_is_inference
),
f
"Program must have _is_inference = True, but get
{
program
.
_is_inference
}
"
),
f
"Program must have _is_inference = True, but get
{
program
.
_is_inference
}
"
return
self
.
_run_inference
(
program
.
_executor
,
feed
)
return
self
.
_run_inference
(
program
.
_executor
,
feed
)
def
_run_program
(
self
,
program
,
feed
,
fetch_list
,
feed_var_name
,
fetch_var_name
,
scope
,
return_numpy
,
use_program_cache
,
):
from
paddle.optimizer.lr
import
LRScheduler
if
feed
is
None
:
feed
=
{}
elif
isinstance
(
feed
,
(
list
,
tuple
)):
assert
len
(
feed
)
==
1
,
"Not compiled with data parallel"
feed
=
feed
[
0
]
if
not
isinstance
(
feed
,
dict
):
raise
TypeError
(
"feed requires dict as its Parameter. But you passed in %s"
%
(
type
(
feed
))
)
assert
program
is
not
None
,
"The program should not be Empty"
if
not
isinstance
(
program
,
Program
):
raise
TypeError
(
"Executor requires Program as its Parameter. But you passed in %s"
%
(
type
(
program
))
)
if
not
isinstance
(
fetch_var_name
,
str
):
raise
TypeError
(
"The name of fetch variable requires string as its Parameter. But you passed in %s"
%
(
type
(
fetch_var_name
))
)
if
use_program_cache
:
cache_key
=
_get_strong_program_cache_key
(
program
,
feed
,
fetch_list
)
cached_program
=
self
.
_get_program_cache
(
cache_key
)
cached_ctx
=
self
.
_get_ctx_cache
(
cache_key
)
cached_scope
=
self
.
_get_scope_cache
(
cache_key
)
if
cached_program
is
None
:
cached_program
=
_add_feed_fetch_ops
(
program
=
program
,
feed
=
feed
,
fetch_list
=
fetch_list
,
feed_var_name
=
feed_var_name
,
fetch_var_name
=
fetch_var_name
,
)
self
.
_add_program_cache
(
cache_key
,
cached_program
)
fetch_list_str
=
list
(
map
(
_to_name_str
,
fetch_list
))
cached_ctx
=
self
.
_default_executor
.
prepare
(
cached_program
.
desc
,
0
,
fetch_list_str
,
False
)
# currently, we cache program, vars, sub_scope here
# we suppose that in a life cycle of training, a user
# will not create many programs. So, here the basic
# rule of caching is to cache all unseen (program, var, scope)
# when a user use use_program_cache.
cached_scope
=
scope
.
new_scope
()
self
.
_default_executor
.
create_variables
(
cached_program
.
desc
,
cached_scope
,
0
)
self
.
_add_ctx_cache
(
cache_key
,
cached_ctx
)
self
.
_add_scope_cache
(
cache_key
,
cached_scope
)
program
=
cached_program
ctx
=
cached_ctx
scope
=
cached_scope
else
:
program
=
_add_feed_fetch_ops
(
program
=
program
,
feed
=
feed
,
fetch_list
=
fetch_list
,
feed_var_name
=
feed_var_name
,
fetch_var_name
=
fetch_var_name
,
)
self
.
_feed_data
(
program
,
feed
,
feed_var_name
,
scope
)
if
hasattr
(
program
,
'lr_schedulerr'
):
assert
isinstance
(
program
.
lr_scheduler
,
LRScheduler
),
"must be LRScheduler"
lr_scheduler
=
program
.
lr_scheduler
lr_value
=
lr_scheduler
()
lr_var
=
program
.
global_block
().
vars
[
lr_scheduler
.
_var_name
]
data
=
np
.
array
([
lr_value
]).
astype
(
convert_dtype
(
lr_var
.
dtype
))
tensor
=
core
.
get_variable_tensor
(
scope
,
lr_scheduler
.
_var_name
)
tensor
.
set
(
data
,
self
.
place
)
if
not
use_program_cache
:
self
.
_default_executor
.
run
(
program
.
desc
,
scope
,
0
,
True
,
True
,
[
fetch_var_name
]
)
else
:
self
.
_default_executor
.
run_prepared_ctx
(
ctx
,
scope
,
False
,
False
,
False
)
arr
=
scope
.
find_var
(
fetch_var_name
).
get_fetch_list
()
tensors
=
arr
.
_move_to_list
()
if
return_numpy
:
return
as_numpy
(
tensors
)
else
:
return
tensors
def
_run_inference
(
self
,
exe
,
feed
):
def
_run_inference
(
self
,
exe
,
feed
):
return
exe
.
run
(
feed
)
return
exe
.
run
(
feed
)
...
...
python/paddle/fluid/framework.py
浏览文件 @
39278731
...
@@ -116,9 +116,7 @@ _already_patch_eager_tensor = False
...
@@ -116,9 +116,7 @@ _already_patch_eager_tensor = False
_already_patch_varbase
=
False
_already_patch_varbase
=
False
_current_cuda_graph_mode
=
None
_current_cuda_graph_mode
=
None
_global_flags_
=
core
.
globals
()
_global_flags_
=
core
.
globals
()
_enable_standalone_executor_
=
os
.
environ
.
get
(
'FLAGS_USE_STANDALONE_EXECUTOR'
,
None
)
_dy2st_enable_standalone_executor_
=
os
.
environ
.
get
(
_dy2st_enable_standalone_executor_
=
os
.
environ
.
get
(
'FLAGS_DY2ST_USE_STANDALONE_EXECUTOR'
,
1
'FLAGS_DY2ST_USE_STANDALONE_EXECUTOR'
,
1
)
)
...
@@ -270,17 +268,6 @@ ipu_index_attr_name = 'ipu_index'
...
@@ -270,17 +268,6 @@ ipu_index_attr_name = 'ipu_index'
ipu_stage_attr_name
=
'ipu_stage'
ipu_stage_attr_name
=
'ipu_stage'
@
signature_safe_contextmanager
def
_enable_standalone_executor
(
enable
=
True
):
global
_enable_standalone_executor_
original_
=
_enable_standalone_executor_
_enable_standalone_executor_
=
enable
try
:
yield
finally
:
_enable_standalone_executor_
=
original_
@
signature_safe_contextmanager
@
signature_safe_contextmanager
def
ipu_shard_guard
(
index
=-
1
,
stage
=-
1
):
def
ipu_shard_guard
(
index
=-
1
,
stage
=-
1
):
"""
"""
...
...
python/paddle/fluid/tests/unittests/CMakeLists.txt
浏览文件 @
39278731
...
@@ -1150,25 +1150,6 @@ if(WITH_GLOO)
...
@@ -1150,25 +1150,6 @@ if(WITH_GLOO)
PROPERTIES TIMEOUT 120
)
PROPERTIES TIMEOUT 120
)
endif
()
endif
()
if
(
$ENV{USE_STANDALONE_EXECUTOR}
)
# these test will fail in some server due to PR#42149, temporarily set it use old executor.
set_tests_properties
(
test_apply_pass_to_program
PROPERTIES ENVIRONMENT FLAGS_USE_STANDALONE_EXECUTOR=0
)
set_tests_properties
(
test_buffer_shared_memory_reuse_pass
PROPERTIES ENVIRONMENT FLAGS_USE_STANDALONE_EXECUTOR=0
)
set_tests_properties
(
test_buffer_shared_memory_reuse_pass_and_fuse_optimization_op_pass
PROPERTIES ENVIRONMENT FLAGS_USE_STANDALONE_EXECUTOR=0
)
set_tests_properties
(
test_imperative_optimizer
PROPERTIES ENVIRONMENT FLAGS_USE_STANDALONE_EXECUTOR=0
)
set_tests_properties
(
test_imperative_star_gan_with_gradient_penalty
PROPERTIES ENVIRONMENT FLAGS_USE_STANDALONE_EXECUTOR=0
)
set_tests_properties
(
test_switch_autotune
PROPERTIES ENVIRONMENT FLAGS_USE_STANDALONE_EXECUTOR=0
)
set_tests_properties
(
test_imperative_mnist_sorted_gradient
PROPERTIES ENVIRONMENT FLAGS_USE_STANDALONE_EXECUTOR=0
)
endif
()
set
(
TEST_CINN_OPS
set
(
TEST_CINN_OPS
test_softmax_op
test_softmax_op
test_expand_v2_op
test_expand_v2_op
...
...
python/paddle/fluid/tests/unittests/ir/inference/CMakeLists.txt
浏览文件 @
39278731
...
@@ -201,9 +201,7 @@ if(WITH_GPU AND TENSORRT_FOUND)
...
@@ -201,9 +201,7 @@ if(WITH_GPU AND TENSORRT_FOUND)
set_tests_properties
(
test_trt_conv3d_op PROPERTIES TIMEOUT 60
)
set_tests_properties
(
test_trt_conv3d_op PROPERTIES TIMEOUT 60
)
set_tests_properties
(
test_trt_conv3d_transpose_op PROPERTIES TIMEOUT 60
)
set_tests_properties
(
test_trt_conv3d_transpose_op PROPERTIES TIMEOUT 60
)
set_tests_properties
(
test_trt_nearest_interp_v2_op PROPERTIES TIMEOUT 30
)
set_tests_properties
(
test_trt_nearest_interp_v2_op PROPERTIES TIMEOUT 30
)
set_tests_properties
(
set_tests_properties
(
test_trt_multiclass_nms3_op PROPERTIES TIMEOUT 60
)
test_trt_multiclass_nms3_op PROPERTIES TIMEOUT 60 ENVIRONMENT
FLAGS_USE_STANDALONE_EXECUTOR=0
)
if
(
WITH_MKLDNN
if
(
WITH_MKLDNN
AND TENSORRT_FOUND
AND TENSORRT_FOUND
...
...
python/paddle/fluid/tests/unittests/test_eager_run_program.py
浏览文件 @
39278731
...
@@ -20,10 +20,6 @@ import paddle
...
@@ -20,10 +20,6 @@ import paddle
from
paddle
import
_legacy_C_ops
from
paddle
import
_legacy_C_ops
from
paddle.fluid
import
core
from
paddle.fluid
import
core
from
paddle.fluid.dygraph.base
import
switch_to_static_graph
from
paddle.fluid.dygraph.base
import
switch_to_static_graph
from
paddle.fluid.executor
import
(
_is_dy2st_enable_standalone_executor
,
_is_enable_standalone_executor
,
)
from
paddle.fluid.framework
import
Variable
from
paddle.fluid.framework
import
Variable
...
@@ -140,10 +136,7 @@ class TestRunProgram(unittest.TestCase):
...
@@ -140,10 +136,7 @@ class TestRunProgram(unittest.TestCase):
[
out
.
name
+
'@GRAD'
],
[
out
.
name
+
'@GRAD'
],
]
]
use_interpretorcore
=
(
use_interpretorcore
=
True
_is_enable_standalone_executor
()
and
_is_dy2st_enable_standalone_executor
()
)
attrs
.
extend
((
'use_interpretorcore'
,
use_interpretorcore
))
attrs
.
extend
((
'use_interpretorcore'
,
use_interpretorcore
))
if
use_interpretorcore
:
if
use_interpretorcore
:
attrs
.
extend
(
attrs
.
extend
(
...
...
python/paddle/fluid/tests/unittests/test_run_program_op.py
浏览文件 @
39278731
...
@@ -21,10 +21,6 @@ import paddle
...
@@ -21,10 +21,6 @@ import paddle
from
paddle
import
_legacy_C_ops
,
fluid
from
paddle
import
_legacy_C_ops
,
fluid
from
paddle.fluid
import
core
,
framework
from
paddle.fluid
import
core
,
framework
from
paddle.fluid.dygraph.base
import
switch_to_static_graph
from
paddle.fluid.dygraph.base
import
switch_to_static_graph
from
paddle.fluid.executor
import
(
_is_dy2st_enable_standalone_executor
,
_is_enable_standalone_executor
,
)
from
paddle.fluid.framework
import
global_var
from
paddle.fluid.framework
import
global_var
paddle
.
enable_static
()
paddle
.
enable_static
()
...
@@ -240,10 +236,7 @@ class RunProgramOpTest(unittest.TestCase):
...
@@ -240,10 +236,7 @@ class RunProgramOpTest(unittest.TestCase):
self
.
program_desc
,
self
.
fwd_op_num
,
len
(
outputs
[
'Out'
])
self
.
program_desc
,
self
.
fwd_op_num
,
len
(
outputs
[
'Out'
])
)
)
use_interpretorcore
=
(
use_interpretorcore
=
True
_is_enable_standalone_executor
()
and
_is_dy2st_enable_standalone_executor
()
)
self
.
attrs
.
extend
((
'use_interpretorcore'
,
use_interpretorcore
))
self
.
attrs
.
extend
((
'use_interpretorcore'
,
use_interpretorcore
))
if
use_interpretorcore
:
if
use_interpretorcore
:
self
.
attrs
.
extend
(
self
.
attrs
.
extend
(
...
@@ -292,10 +285,7 @@ class RunProgramOpTest(unittest.TestCase):
...
@@ -292,10 +285,7 @@ class RunProgramOpTest(unittest.TestCase):
self
.
program_desc
,
self
.
fwd_op_num
,
len
(
outputs
[
'Out'
])
self
.
program_desc
,
self
.
fwd_op_num
,
len
(
outputs
[
'Out'
])
)
)
use_interpretorcore
=
(
use_interpretorcore
=
True
_is_enable_standalone_executor
()
and
_is_dy2st_enable_standalone_executor
()
)
self
.
attrs
.
extend
((
'use_interpretorcore'
,
use_interpretorcore
))
self
.
attrs
.
extend
((
'use_interpretorcore'
,
use_interpretorcore
))
if
use_interpretorcore
:
if
use_interpretorcore
:
self
.
attrs
.
extend
(
self
.
attrs
.
extend
(
...
...
python/paddle/jit/translated_layer.py
浏览文件 @
39278731
...
@@ -21,10 +21,6 @@ import paddle
...
@@ -21,10 +21,6 @@ import paddle
from
paddle
import
_legacy_C_ops
from
paddle
import
_legacy_C_ops
from
paddle.fluid
import
backward
,
core
,
framework
,
unique_name
from
paddle.fluid
import
backward
,
core
,
framework
,
unique_name
from
paddle.fluid.dygraph.base
import
switch_to_static_graph
from
paddle.fluid.dygraph.base
import
switch_to_static_graph
from
paddle.fluid.executor
import
(
_is_dy2st_enable_standalone_executor
,
_is_enable_standalone_executor
,
)
from
paddle.fluid.framework
import
OpProtoHolder
,
_non_static_mode
from
paddle.fluid.framework
import
OpProtoHolder
,
_non_static_mode
from
paddle.jit.dy2static.partial_program
import
(
from
paddle.jit.dy2static.partial_program
import
(
LazyInitialized
,
LazyInitialized
,
...
@@ -976,10 +972,7 @@ def _run_dygraph(instance, input, program_holder):
...
@@ -976,10 +972,7 @@ def _run_dygraph(instance, input, program_holder):
)
)
)
)
use_interpretorcore
=
(
use_interpretorcore
=
True
_is_enable_standalone_executor
()
and
_is_dy2st_enable_standalone_executor
()
)
attrs
.
extend
((
'use_interpretorcore'
,
use_interpretorcore
))
attrs
.
extend
((
'use_interpretorcore'
,
use_interpretorcore
))
if
use_interpretorcore
:
if
use_interpretorcore
:
attrs
.
extend
(
attrs
.
extend
(
...
...
test/custom_op/CMakeLists.txt
浏览文件 @
39278731
...
@@ -11,15 +11,6 @@ if(WITH_TESTING)
...
@@ -11,15 +11,6 @@ if(WITH_TESTING)
set_tests_properties
(
test_custom_relu_op_jit PROPERTIES TIMEOUT 180
)
set_tests_properties
(
test_custom_relu_op_jit PROPERTIES TIMEOUT 180
)
set_tests_properties
(
test_custom_relu_model PROPERTIES TIMEOUT 180
)
set_tests_properties
(
test_custom_relu_model PROPERTIES TIMEOUT 180
)
set_tests_properties
(
test_context_pool PROPERTIES TIMEOUT 180
)
set_tests_properties
(
test_context_pool PROPERTIES TIMEOUT 180
)
if
(
$ENV{USE_STANDALONE_EXECUTOR}
)
# these test will fail in some server due to PR#42149, temporarily set it use old executor.
set_tests_properties
(
test_custom_relu_op_setup PROPERTIES ENVIRONMENT
FLAGS_USE_STANDALONE_EXECUTOR=0
)
set_tests_properties
(
test_custom_relu_model PROPERTIES ENVIRONMENT
FLAGS_USE_STANDALONE_EXECUTOR=0
)
endif
()
endif
()
endif
()
if
(
WITH_GPU AND WITH_DISTRIBUTE
)
if
(
WITH_GPU AND WITH_DISTRIBUTE
)
...
...
test/standalone_executor/test_standalone_controlflow.py
浏览文件 @
39278731
...
@@ -17,7 +17,7 @@ import unittest
...
@@ -17,7 +17,7 @@ import unittest
import
numpy
as
np
import
numpy
as
np
import
paddle
import
paddle
from
paddle.fluid
import
core
,
framework
from
paddle.fluid
import
core
from
paddle.fluid.framework
import
Program
,
program_guard
from
paddle.fluid.framework
import
Program
,
program_guard
paddle
.
enable_static
()
paddle
.
enable_static
()
...
@@ -25,7 +25,7 @@ paddle.enable_static()
...
@@ -25,7 +25,7 @@ paddle.enable_static()
# test the compatibility of new executor: run old
# test the compatibility of new executor: run old
# and new executor twice and check the result.
# and new executor twice and check the result.
# please override the _get_feeds() and build_prgram()
# please override the _get_feeds() and build_prgram()
, run_dygraph_once()
class
TestCompatibility
(
unittest
.
TestCase
):
class
TestCompatibility
(
unittest
.
TestCase
):
def
setUp
(
self
):
def
setUp
(
self
):
self
.
place
=
(
self
.
place
=
(
...
@@ -78,26 +78,53 @@ class TestCompatibility(unittest.TestCase):
...
@@ -78,26 +78,53 @@ class TestCompatibility(unittest.TestCase):
ret
.
append
(
exe
.
run
(
main_program
,
feed
=
feed
,
fetch_list
=
fetch_vars
))
ret
.
append
(
exe
.
run
(
main_program
,
feed
=
feed
,
fetch_list
=
fetch_vars
))
return
ret
return
ret
def
run_raw_executor
(
self
,
feed
):
def
run_dygraph_once
(
self
,
feed
):
with
framework
.
_enable_standalone_executor
(
False
):
x
=
paddle
.
tensor
.
fill_constant
(
shape
=
[
1
],
dtype
=
'float32'
,
value
=
0.1
)
out
=
self
.
_run
(
feed
)
y
=
paddle
.
tensor
.
fill_constant
(
shape
=
[
1
],
dtype
=
'float32'
,
value
=
0.23
)
if
x
<
y
:
out
=
[
paddle
.
tensor
.
fill_constant
(
shape
=
[
1
,
2
],
dtype
=
'int32'
,
value
=
1
).
numpy
(),
paddle
.
tensor
.
fill_constant
(
shape
=
[
2
,
3
],
dtype
=
'bool'
,
value
=
True
).
numpy
(),
]
else
:
out
=
[
paddle
.
tensor
.
fill_constant
(
shape
=
[
3
,
4
],
dtype
=
'float32'
,
value
=
3
).
numpy
(),
paddle
.
tensor
.
fill_constant
(
shape
=
[
4
,
5
],
dtype
=
'int64'
,
value
=
2
).
numpy
(),
]
return
out
return
out
def
run_dygraph
(
self
,
feed
):
ret
=
[]
for
_
in
range
(
self
.
iter_run
):
ret
.
append
(
self
.
run_dygraph_once
(
feed
))
return
ret
def
run_new_executor
(
self
,
feed
):
def
run_new_executor
(
self
,
feed
):
with
framework
.
_enable_standalone_executor
(
True
):
out
=
self
.
_run
(
feed
)
out
=
self
.
_run
(
feed
)
return
out
return
out
def
test_with_feed
(
self
):
def
test_with_feed
(
self
):
feed
=
self
.
_get_feed
()
feed
=
self
.
_get_feed
()
paddle
.
enable_static
()
res
=
self
.
run_new_executor
(
feed
)
res
=
self
.
run_new_executor
(
feed
)
gt
=
self
.
run_raw_executor
(
feed
)
paddle
.
disable_static
()
gt
=
self
.
run_dygraph
(
feed
)
for
x
,
y
in
zip
(
gt
,
res
):
for
x
,
y
in
zip
(
gt
,
res
):
if
isinstance
(
x
,
list
):
if
isinstance
(
x
,
list
):
for
tx
,
ty
in
zip
(
x
,
y
):
for
tx
,
ty
in
zip
(
x
,
y
):
np
.
testing
.
assert_array_equal
(
tx
,
ty
)
np
.
testing
.
assert_array_equal
(
tx
,
ty
)
elif
isinstance
(
x
,
np
.
ndarray
):
elif
isinstance
(
x
,
np
.
ndarray
):
np
.
testing
.
assert_array_equal
(
tx
,
t
y
)
np
.
testing
.
assert_array_equal
(
x
,
y
)
else
:
else
:
raise
Exception
(
"Not Implement!"
)
raise
Exception
(
"Not Implement!"
)
...
@@ -129,6 +156,12 @@ class TestWhile(TestCompatibility):
...
@@ -129,6 +156,12 @@ class TestWhile(TestCompatibility):
exe
=
paddle
.
static
.
Executor
(
paddle
.
CPUPlace
())
exe
=
paddle
.
static
.
Executor
(
paddle
.
CPUPlace
())
return
main_program
,
startup_program
,
i
return
main_program
,
startup_program
,
i
def
run_dygraph_once
(
self
,
feed
):
i
=
1
while
i
<
10
:
i
=
i
+
1
return
[
i
]
if
__name__
==
"__main__"
:
if
__name__
==
"__main__"
:
unittest
.
main
()
unittest
.
main
()
test/standalone_executor/test_standalone_executor.py
浏览文件 @
39278731
...
@@ -23,7 +23,7 @@ import unittest
...
@@ -23,7 +23,7 @@ import unittest
import
numpy
as
np
import
numpy
as
np
import
paddle
import
paddle
from
paddle.fluid
import
core
,
framework
from
paddle.fluid
import
core
from
paddle.fluid.core
import
StandaloneExecutor
from
paddle.fluid.core
import
StandaloneExecutor
from
paddle.profiler
import
profiler
from
paddle.profiler
import
profiler
...
@@ -143,16 +143,15 @@ class ExecutorStatisticsTestCase(unittest.TestCase):
...
@@ -143,16 +143,15 @@ class ExecutorStatisticsTestCase(unittest.TestCase):
scope
=
paddle
.
static
.
Scope
()
scope
=
paddle
.
static
.
Scope
()
with
paddle
.
static
.
scope_guard
(
scope
):
with
paddle
.
static
.
scope_guard
(
scope
):
with
framework
.
_enable_standalone_executor
(
enable
):
exe
=
paddle
.
static
.
Executor
(
self
.
place
)
exe
=
paddle
.
static
.
Executor
(
self
.
place
)
helper_profiler
=
profiler
.
Profiler
(
helper_profiler
=
profiler
.
Profiler
(
targets
=
[
profiler
.
ProfilerTarget
.
CPU
],
scheduler
=
(
1
,
2
)
targets
=
[
profiler
.
ProfilerTarget
.
CPU
],
scheduler
=
(
1
,
2
)
)
)
helper_profiler
.
start
()
helper_profiler
.
start
()
for
i
in
range
(
self
.
iter_n
):
for
i
in
range
(
self
.
iter_n
):
exe
.
run
(
main_program
,
fetch_list
=
fetch_list
)
exe
.
run
(
main_program
,
fetch_list
=
fetch_list
)
helper_profiler
.
step
()
helper_profiler
.
step
()
helper_profiler
.
stop
()
helper_profiler
.
stop
()
self
.
assertTrue
(
os
.
path
.
exists
(
self
.
perf_path
))
self
.
assertTrue
(
os
.
path
.
exists
(
self
.
perf_path
))
with
open
(
self
.
perf_path
,
'r'
)
as
load_f
:
with
open
(
self
.
perf_path
,
'r'
)
as
load_f
:
...
@@ -183,15 +182,14 @@ class MultiStreamModelTestCase(unittest.TestCase):
...
@@ -183,15 +182,14 @@ class MultiStreamModelTestCase(unittest.TestCase):
paddle
.
seed
(
2020
)
paddle
.
seed
(
2020
)
main_program
,
startup_program
,
fetch_list
=
build_program
()
main_program
,
startup_program
,
fetch_list
=
build_program
()
with
framework
.
_enable_standalone_executor
(
use_new_executor
):
scope
=
core
.
Scope
()
scope
=
core
.
Scope
()
exe
=
paddle
.
static
.
Executor
(
self
.
place
)
exe
=
paddle
.
static
.
Executor
(
self
.
place
)
outs
=
[]
outs
=
[]
for
i
in
range
(
self
.
iter_n
):
for
i
in
range
(
self
.
iter_n
):
outs
.
append
(
outs
.
append
(
exe
.
run
(
main_program
,
scope
=
scope
,
fetch_list
=
fetch_list
)
exe
.
run
(
main_program
,
scope
=
scope
,
fetch_list
=
fetch_list
)
)
)
print
(
outs
)
print
(
outs
)
return
outs
return
outs
...
@@ -249,30 +247,46 @@ class SwitchExecutorInterfaceWithFeed(unittest.TestCase):
...
@@ -249,30 +247,46 @@ class SwitchExecutorInterfaceWithFeed(unittest.TestCase):
return
outs
return
outs
def
run_raw_executor
(
self
,
feed
,
use_compiled
=
False
):
def
run_dygraph
(
self
,
feed
):
with
framework
.
_enable_standalone_executor
(
False
):
def
run_once
(
is_double
):
# run construct program 1
paddle
.
seed
(
2020
)
out1
=
self
.
_run
(
a
=
feed
[
'a'
]
feed
,
use_str
=
False
,
is_double
=
False
,
use_compiled
=
use_compiled
a
=
paddle
.
to_tensor
(
a
,
dtype
=
'float32'
)
)
b
=
paddle
.
ones
([
2
,
2
])
*
2
# run construct program 2 with same executor
t
=
paddle
.
nn
.
Linear
(
2
,
2
)(
a
)
out2
=
self
.
_run
(
c
=
t
+
b
feed
,
use_str
=
True
,
is_double
=
True
,
use_compiled
=
use_compiled
if
is_double
:
)
c
=
c
+
c
return
c
.
numpy
()
return
[
out1
,
out2
]
out1
=
[]
for
i
in
range
(
self
.
iter_run
):
out1
.
append
(
run_once
(
False
))
out2
=
[]
for
i
in
range
(
self
.
iter_run
):
out2
.
append
(
run_once
(
True
))
return
[
out1
,
out2
]
def
run_new_executor
(
self
,
feed
,
use_compiled
=
False
):
def
run_new_executor
(
self
,
feed
,
use_compiled
=
False
):
with
framework
.
_enable_standalone_executor
():
# run construct program 1
out
=
self
.
run_raw_executor
(
feed
,
use_compiled
=
use_compiled
)
out1
=
self
.
_run
(
return
out
feed
,
use_str
=
False
,
is_double
=
False
,
use_compiled
=
use_compiled
)
# run construct program 2 with same executor
out2
=
self
.
_run
(
feed
,
use_str
=
True
,
is_double
=
True
,
use_compiled
=
use_compiled
)
return
[
out1
,
out2
]
def
test_with_feed
(
self
):
def
test_with_feed
(
self
):
data
=
np
.
ones
([
2
,
2
],
dtype
=
"float32"
)
data
=
np
.
ones
([
2
,
2
],
dtype
=
"float32"
)
feed
=
{
"a"
:
data
,
'fake_input'
:
data
}
feed
=
{
"a"
:
data
,
'fake_input'
:
data
}
res
=
self
.
run_new_executor
(
feed
)
with
paddle
.
fluid
.
framework
.
_static_guard
():
gt
=
self
.
run_raw_executor
(
feed
)
res
=
self
.
run_new_executor
(
feed
)
with
paddle
.
fluid
.
dygraph
.
guard
():
gt
=
self
.
run_dygraph
(
feed
)
for
x
,
y
in
zip
(
gt
,
res
):
for
x
,
y
in
zip
(
gt
,
res
):
np
.
testing
.
assert_array_equal
(
x
,
y
)
np
.
testing
.
assert_array_equal
(
x
,
y
)
...
@@ -280,8 +294,7 @@ class SwitchExecutorInterfaceWithFeed(unittest.TestCase):
...
@@ -280,8 +294,7 @@ class SwitchExecutorInterfaceWithFeed(unittest.TestCase):
feed
=
[{
'a'
:
np
.
ones
([
2
,
2
],
dtype
=
"float32"
)}]
feed
=
[{
'a'
:
np
.
ones
([
2
,
2
],
dtype
=
"float32"
)}]
with
self
.
assertRaises
(
TypeError
):
with
self
.
assertRaises
(
TypeError
):
with
framework
.
_enable_standalone_executor
():
self
.
_run
(
feed
[
0
],
add_wrong_fetch
=
True
)
self
.
_run
(
feed
[
0
],
add_wrong_fetch
=
True
)
def
test_empty_program
(
self
):
def
test_empty_program
(
self
):
program
=
paddle
.
static
.
Program
()
program
=
paddle
.
static
.
Program
()
...
@@ -291,8 +304,7 @@ class SwitchExecutorInterfaceWithFeed(unittest.TestCase):
...
@@ -291,8 +304,7 @@ class SwitchExecutorInterfaceWithFeed(unittest.TestCase):
for
i
in
range
(
10
):
for
i
in
range
(
10
):
print
(
i
,
flush
=
1
)
print
(
i
,
flush
=
1
)
with
framework
.
_enable_standalone_executor
():
out
=
exe
.
run
(
program
,
feed
=
None
)
out
=
exe
.
run
(
program
,
feed
=
None
)
class
TestException
(
unittest
.
TestCase
):
class
TestException
(
unittest
.
TestCase
):
...
@@ -328,8 +340,7 @@ class TestException(unittest.TestCase):
...
@@ -328,8 +340,7 @@ class TestException(unittest.TestCase):
return
out
return
out
def
run_new_executor
(
self
,
feed
):
def
run_new_executor
(
self
,
feed
):
with
framework
.
_enable_standalone_executor
():
out
=
self
.
_run
(
feed
)
out
=
self
.
_run
(
feed
)
return
out
return
out
def
test_exception
(
self
):
def
test_exception
(
self
):
...
@@ -399,13 +410,11 @@ class TestInplaceApiWithDataTransform(unittest.TestCase):
...
@@ -399,13 +410,11 @@ class TestInplaceApiWithDataTransform(unittest.TestCase):
with
paddle
.
fluid
.
device_guard
(
"cpu"
):
with
paddle
.
fluid
.
device_guard
(
"cpu"
):
x
=
paddle
.
increment
(
x
)
x
=
paddle
.
increment
(
x
)
exe
=
paddle
.
static
.
Executor
(
paddle
.
CUDAPlace
(
0
))
exe
=
paddle
.
static
.
Executor
(
paddle
.
CUDAPlace
(
0
))
with
framework
.
_enable_standalone_executor
():
for
i
in
range
(
10
):
(
a
,)
=
exe
.
run
(
for
i
in
range
(
10
):
paddle
.
static
.
default_main_program
(),
fetch_list
=
[
x
]
(
a
,)
=
exe
.
run
(
)
paddle
.
static
.
default_main_program
(),
fetch_list
=
[
x
]
self
.
assertEqual
(
a
[
0
],
1
)
)
self
.
assertEqual
(
a
[
0
],
1
)
if
__name__
==
"__main__"
:
if
__name__
==
"__main__"
:
...
...
test/standalone_executor/test_standalone_multiply_write.py
浏览文件 @
39278731
...
@@ -39,6 +39,14 @@ class TestMultiplyWrite(TestCompatibility):
...
@@ -39,6 +39,14 @@ class TestMultiplyWrite(TestCompatibility):
paddle
.
assign
(
inp2
,
out
)
paddle
.
assign
(
inp2
,
out
)
return
main_program
,
startup_program
,
out
return
main_program
,
startup_program
,
out
def
run_dygraph_once
(
self
,
feed
):
out
=
paddle
.
full
((
1
,),
1
)
inp1
=
paddle
.
full
((
1
,),
2
)
inp2
=
paddle
.
full
((
1
,),
3
)
paddle
.
assign
(
inp1
,
out
)
paddle
.
assign
(
inp2
,
out
)
return
[
out
.
numpy
()]
def
setUp
(
self
):
def
setUp
(
self
):
self
.
place
=
paddle
.
CPUPlace
()
self
.
place
=
paddle
.
CPUPlace
()
self
.
iter_run
=
5
self
.
iter_run
=
5
...
...
编辑
预览
Markdown
is supported
0%
请重试
或
添加新附件
.
添加附件
取消
You are about to add
0
people
to the discussion. Proceed with caution.
先完成此消息的编辑!
取消
想要评论请
注册
或
登录