From c8429d36e091054e780cb9b1021896af34ccec07 Mon Sep 17 00:00:00 2001 From: Steffy-zxf <48793257+Steffy-zxf@users.noreply.github.com> Date: Thu, 25 Nov 2021 22:47:37 +0800 Subject: [PATCH] [cherry-pick 2.2]fix data parallel when VOCAB var in program (#37546) * fix data parallel when VOCAB var in program * fix ci coverage --- python/paddle/fluid/dygraph/parallel.py | 3 ++ .../fluid/tests/unittests/CMakeLists.txt | 1 + .../unittests/test_faster_tokenizer_op.py | 28 +++++++++++++++++++ 3 files changed, 32 insertions(+) diff --git a/python/paddle/fluid/dygraph/parallel.py b/python/paddle/fluid/dygraph/parallel.py index 7dd8d38aa70..4c2ee53ff7e 100644 --- a/python/paddle/fluid/dygraph/parallel.py +++ b/python/paddle/fluid/dygraph/parallel.py @@ -365,6 +365,9 @@ def sync_params_buffers(model, if getattr(param, "no_sync", False): continue + if param.type == core.VarDesc.VarType.VOCAB: + continue + model_vars.append(param.detach()) if len(model_vars) == 0: return diff --git a/python/paddle/fluid/tests/unittests/CMakeLists.txt b/python/paddle/fluid/tests/unittests/CMakeLists.txt index 18c19266aca..878313c283a 100644 --- a/python/paddle/fluid/tests/unittests/CMakeLists.txt +++ b/python/paddle/fluid/tests/unittests/CMakeLists.txt @@ -554,6 +554,7 @@ py_test_modules(test_imperative_static_runner_mnist MODULES test_imperative_stat py_test_modules(test_imperative_static_runner_while MODULES test_imperative_static_runner_while ENVS FLAGS_cudnn_deterministic=1) set_tests_properties(test_conv2d_op PROPERTIES LABELS "RUN_TYPE=EXCLUSIVE") +set_tests_properties(test_faster_tokenizer_op PROPERTIES LABELS "RUN_TYPE=EXCLUSIVE") set_tests_properties(test_conv2d_op_depthwise_conv PROPERTIES LABELS "RUN_TYPE=EXCLUSIVE") set_tests_properties(test_conv2d_api PROPERTIES LABELS "RUN_TYPE=EXCLUSIVE") set_tests_properties(test_conv_nn_grad PROPERTIES LABELS "RUN_TYPE=EXCLUSIVE") diff --git a/python/paddle/fluid/tests/unittests/test_faster_tokenizer_op.py b/python/paddle/fluid/tests/unittests/test_faster_tokenizer_op.py index 496f3505ec4..dab56e162f3 100755 --- a/python/paddle/fluid/tests/unittests/test_faster_tokenizer_op.py +++ b/python/paddle/fluid/tests/unittests/test_faster_tokenizer_op.py @@ -388,6 +388,34 @@ class TestBertTokenizerOp(unittest.TestCase): exe.run(paddle.static.default_main_program(), feed={'x': self.text}) paddle.disable_static() + def test_data_parallel(self): + self.max_seq_len = 128 + self.pad_to_max_seq_len = True + self.is_split_into_words = False + + model = paddle.DataParallel(self.faster_tokenizer) + input_ids, token_type_ids = model( + text=self.text_tensor, + do_lower_case=self.bert_tokenizer.do_lower_case, + max_seq_len=self.max_seq_len, + pad_to_max_seq_len=self.pad_to_max_seq_len, + is_split_into_words=self.is_split_into_words) + input_ids = input_ids.numpy() + token_type_ids = token_type_ids.numpy() + + encoded_inputs = self.bert_tokenizer( + self.text, + max_seq_len=self.max_seq_len, + pad_to_max_seq_len=self.pad_to_max_seq_len, + is_split_into_words=self.is_split_into_words) + py_input_ids = np.array(encoded_inputs[0]["input_ids"]).reshape([1, -1]) + py_token_type_ids = np.array(encoded_inputs[0][ + "token_type_ids"]).reshape([1, -1]) + self.assertTrue(np.allclose(input_ids, py_input_ids, rtol=0, atol=0.01)) + self.assertTrue( + np.allclose( + token_type_ids, py_token_type_ids, rtol=0, atol=0.01)) + if __name__ == '__main__': unittest.main() -- GitLab