未验证 提交 38865fcd 编写于 作者: K kangguangli 提交者: GitHub

remove flags_enable_parallel_graph (#51375)

上级 559de39a
...@@ -59,10 +59,6 @@ PADDLE_DEFINE_EXPORTED_string( ...@@ -59,10 +59,6 @@ PADDLE_DEFINE_EXPORTED_string(
"", "",
"Profiler filename for PE, which generated by gperftools." "Profiler filename for PE, which generated by gperftools."
"Only valid when compiled `WITH_PRIFILER=ON`. Empty if disable."); "Only valid when compiled `WITH_PRIFILER=ON`. Empty if disable.");
PADDLE_DEFINE_EXPORTED_bool(
enable_parallel_graph,
false,
"Force disable parallel graph execution mode if set false.");
namespace paddle { namespace paddle {
namespace framework { namespace framework {
...@@ -1276,9 +1272,7 @@ bool ParallelExecutor::EnableParallelGraphExecution( ...@@ -1276,9 +1272,7 @@ bool ParallelExecutor::EnableParallelGraphExecution(
const ir::Graph &graph, const ir::Graph &graph,
const ExecutionStrategy &exec_strategy, const ExecutionStrategy &exec_strategy,
const BuildStrategy &build_strategy) const { const BuildStrategy &build_strategy) const {
if (!FLAGS_enable_parallel_graph) {
return false; return false;
}
bool enable_parallel_graph = true; bool enable_parallel_graph = true;
......
...@@ -1689,21 +1689,7 @@ class Executor: ...@@ -1689,21 +1689,7 @@ class Executor:
) )
return False return False
# Unsupported case 2: parallel graph # Unsupported case 2: inference
if core.globals()['FLAGS_enable_parallel_graph'] in [
1,
'1',
True,
'True',
'true',
]:
warnings.warn(
"Standalone executor is not used for parallel graph",
UserWarning,
)
return False
# Unsupported case 3: inference
if compiled_program._is_inference: if compiled_program._is_inference:
warnings.warn( warnings.warn(
"Standalone executor is not used for inference", "Standalone executor is not used for inference",
...@@ -1711,7 +1697,7 @@ class Executor: ...@@ -1711,7 +1697,7 @@ class Executor:
) )
return False return False
# Unsupported case 4: async mode # Unsupported case 3: async mode
if ( if (
compiled_program._build_strategy is not None compiled_program._build_strategy is not None
and compiled_program._build_strategy.async_mode and compiled_program._build_strategy.async_mode
...@@ -1722,7 +1708,7 @@ class Executor: ...@@ -1722,7 +1708,7 @@ class Executor:
) )
return False return False
# Unsupported case 5: CUDA Graph # Unsupported case 4: CUDA Graph
if ( if (
compiled_program._build_strategy is not None compiled_program._build_strategy is not None
and compiled_program._build_strategy.allow_cuda_graph_capture and compiled_program._build_strategy.allow_cuda_graph_capture
......
...@@ -886,12 +886,8 @@ add_subdirectory(standalone_executor) ...@@ -886,12 +886,8 @@ add_subdirectory(standalone_executor)
set_tests_properties( set_tests_properties(
test_buffer_shared_memory_reuse_pass_and_fuse_optimization_op_pass test_buffer_shared_memory_reuse_pass_and_fuse_optimization_op_pass
test_data_norm_op test_data_norm_op test_dataloader_keep_order test_dataloader_unkeep_order
test_dataloader_keep_order test_buffer_shared_memory_reuse_pass PROPERTIES LABELS "RUN_TYPE=DIST")
test_dataloader_unkeep_order
test_parallel_ssa_graph_inference_feed_partial_data
test_buffer_shared_memory_reuse_pass
PROPERTIES LABELS "RUN_TYPE=DIST")
set_tests_properties( set_tests_properties(
test_sync_batch_norm_op test_sync_batch_norm_op
test_inplace_abn_op test_inplace_abn_op
...@@ -1018,8 +1014,6 @@ set_tests_properties(test_imperative_selected_rows_to_lod_tensor ...@@ -1018,8 +1014,6 @@ set_tests_properties(test_imperative_selected_rows_to_lod_tensor
PROPERTIES TIMEOUT 200) PROPERTIES TIMEOUT 200)
set_tests_properties(test_index_select_op PROPERTIES TIMEOUT 120) set_tests_properties(test_index_select_op PROPERTIES TIMEOUT 120)
set_tests_properties(test_index_add_op PROPERTIES TIMEOUT 120) set_tests_properties(test_index_add_op PROPERTIES TIMEOUT 120)
set_tests_properties(test_parallel_ssa_graph_inference_feed_partial_data
PROPERTIES TIMEOUT 120)
set_tests_properties(test_tensordot PROPERTIES TIMEOUT 200) set_tests_properties(test_tensordot PROPERTIES TIMEOUT 200)
set_tests_properties(test_partial_eager_deletion_transformer PROPERTIES TIMEOUT set_tests_properties(test_partial_eager_deletion_transformer PROPERTIES TIMEOUT
120) 120)
......
...@@ -40,7 +40,6 @@ class TestFleetWithASPSharding(unittest.TestCase): ...@@ -40,7 +40,6 @@ class TestFleetWithASPSharding(unittest.TestCase):
os.environ["PADDLE_TRAINERS_NUM"] = "1" os.environ["PADDLE_TRAINERS_NUM"] = "1"
os.environ["PADDLE_TRAINER_ID"] = "0" os.environ["PADDLE_TRAINER_ID"] = "0"
os.environ['FLAGS_enable_parallel_graph'] = "0"
os.environ['FLAGS_fraction_of_gpu_memory_to_use'] = "0.1" os.environ['FLAGS_fraction_of_gpu_memory_to_use'] = "0.1"
os.environ['FLAGS_sync_nccl_allreduce'] = "1" os.environ['FLAGS_sync_nccl_allreduce'] = "1"
os.environ['FLAGS_eager_delete_tensor_gb'] = "0" os.environ['FLAGS_eager_delete_tensor_gb'] = "0"
......
# Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
import os
import unittest
import numpy as np
os.environ['FLAGS_enable_parallel_graph'] = str(1)
import os
from parallel_executor_test_base import DeviceType, TestParallelExecutorBase
from simple_nets import init_data, simple_fc_net
import paddle.fluid.core as core
class TestMNIST(TestParallelExecutorBase):
@classmethod
def setUpClass(cls):
os.environ['CPU_NUM'] = str(4)
# simple_fc
def check_simple_fc_convergence(self, use_device, use_reduce=False):
if use_device == DeviceType.CUDA and not core.is_compiled_with_cuda():
return
img, label = init_data()
self.check_network_convergence(
simple_fc_net,
feed_dict={"image": img, "label": label},
use_device=use_device,
use_reduce=use_reduce,
)
def test_simple_fc(self):
# use_device
self.check_simple_fc_convergence(True)
def check_simple_fc_parallel_accuracy(self, use_device):
if use_device and not core.is_compiled_with_cuda():
return
img, label = init_data()
single_first_loss, single_last_loss, _ = self.check_network_convergence(
method=simple_fc_net,
feed_dict={"image": img, "label": label},
use_device=use_device,
use_parallel_executor=False,
)
(
parallel_first_loss,
parallel_last_loss,
_,
) = self.check_network_convergence(
method=simple_fc_net,
feed_dict={"image": img, "label": label},
use_device=use_device,
use_parallel_executor=True,
)
self.assertAlmostEqual(
np.mean(parallel_first_loss),
single_first_loss,
delta=1e-6,
)
self.assertAlmostEqual(
np.mean(parallel_last_loss), single_last_loss, delta=1e-6
)
def test_simple_fc_parallel_accuracy(self):
self.check_simple_fc_parallel_accuracy(DeviceType.CUDA)
if __name__ == '__main__':
unittest.main()
# Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
import unittest
import paddle.fluid as fluid
fluid.core.globals()['FLAGS_enable_parallel_graph'] = 1
if __name__ == '__main__':
unittest.main()
...@@ -1492,7 +1492,6 @@ FOURTH_HIGH_PARALLEL_JOB_NEW = [ ...@@ -1492,7 +1492,6 @@ FOURTH_HIGH_PARALLEL_JOB_NEW = [
'float16_gpu_test', 'float16_gpu_test',
'test_dict', 'test_dict',
'test_bilinear_tensor_product_op', 'test_bilinear_tensor_product_op',
'test_parallel_executor_pg',
'test_assert', 'test_assert',
'test_smooth_l1_loss_op', 'test_smooth_l1_loss_op',
'sequence_padding_test', 'sequence_padding_test',
...@@ -1508,7 +1507,6 @@ FOURTH_HIGH_PARALLEL_JOB_NEW = [ ...@@ -1508,7 +1507,6 @@ FOURTH_HIGH_PARALLEL_JOB_NEW = [
'test_inplace_abn_op', 'test_inplace_abn_op',
'test_parallel_executor_seresnext_base_gpu', 'test_parallel_executor_seresnext_base_gpu',
'test_parallel_executor_seresnext_with_fuse_all_reduce_gpu', 'test_parallel_executor_seresnext_with_fuse_all_reduce_gpu',
'test_parallel_ssa_graph_inference_feed_partial_data',
'test_parallel_executor_seresnext_with_reduce_gpu', 'test_parallel_executor_seresnext_with_reduce_gpu',
'test_data_norm_op', 'test_data_norm_op',
'test_install_check', 'test_install_check',
...@@ -2351,7 +2349,6 @@ TETRAD_PARALLEL_JOB = [ ...@@ -2351,7 +2349,6 @@ TETRAD_PARALLEL_JOB = [
'test_transforms', 'test_transforms',
'test_sum_op', 'test_sum_op',
'test_scatter_op', 'test_scatter_op',
'test_parallel_executor_pg',
'test_mix_precision_all_reduce_fuse', 'test_mix_precision_all_reduce_fuse',
'test_tensorrt_engine_op', 'test_tensorrt_engine_op',
'test_zeropad2d', 'test_zeropad2d',
...@@ -2657,7 +2654,6 @@ TWO_PARALLEL_JOB = [ ...@@ -2657,7 +2654,6 @@ TWO_PARALLEL_JOB = [
'test_sigmoid_cross_entropy_with_logits_op', 'test_sigmoid_cross_entropy_with_logits_op',
'test_regularizer_api', 'test_regularizer_api',
'test_lrn_op', 'test_lrn_op',
'test_parallel_ssa_graph_inference_feed_partial_data',
'test_lod_reset_op', 'test_lod_reset_op',
'test_install_check', 'test_install_check',
'test_anchor_generator_op', 'test_anchor_generator_op',
......
...@@ -370,7 +370,6 @@ STATIC_MODE_TESTING_LIST = [ ...@@ -370,7 +370,6 @@ STATIC_MODE_TESTING_LIST = [
'test_pairwise_distance', 'test_pairwise_distance',
'test_parallel_executor_drop_scope', 'test_parallel_executor_drop_scope',
'test_parallel_executor_run_load_infer_program', 'test_parallel_executor_run_load_infer_program',
'test_parallel_ssa_graph_inference_feed_partial_data',
'test_parameter', 'test_parameter',
'test_partial_concat_op', 'test_partial_concat_op',
'test_partial_eager_deletion_transformer', 'test_partial_eager_deletion_transformer',
...@@ -650,7 +649,6 @@ STATIC_MODE_TESTING_LIST = [ ...@@ -650,7 +649,6 @@ STATIC_MODE_TESTING_LIST = [
'test_fused_multihead_matmul_op', 'test_fused_multihead_matmul_op',
'test_ir_inplace_pass', 'test_ir_inplace_pass',
'test_mix_precision_all_reduce_fuse', 'test_mix_precision_all_reduce_fuse',
'test_parallel_executor_pg',
'test_rank_attention_op', 'test_rank_attention_op',
'test_fleet_base', 'test_fleet_base',
'test_fleet_graph_executor', 'test_fleet_graph_executor',
......
...@@ -32,7 +32,6 @@ disable_wingpu_test="^test_model$|\ ...@@ -32,7 +32,6 @@ disable_wingpu_test="^test_model$|\
^test_multiprocess_dataloader_iterable_dataset_dynamic$|\ ^test_multiprocess_dataloader_iterable_dataset_dynamic$|\
^test_parallel_executor_feed_persistable_var$|\ ^test_parallel_executor_feed_persistable_var$|\
^test_parallel_executor_inference_feed_partial_data$|\ ^test_parallel_executor_inference_feed_partial_data$|\
^test_parallel_ssa_graph_inference_feed_partial_data$|\
^test_py_reader_combination$|\ ^test_py_reader_combination$|\
^test_py_reader_pin_memory$|\ ^test_py_reader_pin_memory$|\
^test_py_reader_push_pop$|\ ^test_py_reader_push_pop$|\
...@@ -145,7 +144,6 @@ disable_win_inference_test="^trt_quant_int8_yolov3_r50_test$|\ ...@@ -145,7 +144,6 @@ disable_win_inference_test="^trt_quant_int8_yolov3_r50_test$|\
^test_py_reader_push_pop$|\ ^test_py_reader_push_pop$|\
^test_parallel_executor_feed_persistable_var$|\ ^test_parallel_executor_feed_persistable_var$|\
^test_parallel_executor_inference_feed_partial_data$|\ ^test_parallel_executor_inference_feed_partial_data$|\
^test_parallel_ssa_graph_inference_feed_partial_data$|\
^test_reader_reset$|\ ^test_reader_reset$|\
^test_parallel_executor_seresnext_base_gpu$|\ ^test_parallel_executor_seresnext_base_gpu$|\
^test_py_reader_pin_memory$|\ ^test_py_reader_pin_memory$|\
......
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册