From aaee07a340f4dd5d30329f4874e8d56bd4a2f35e Mon Sep 17 00:00:00 2001 From: ccrrong <101700995+ccrrong@users.noreply.github.com> Date: Tue, 13 Dec 2022 16:38:13 +0800 Subject: [PATCH] remove linear_chain_crf and crf_decoding from fluid (#48996) * remove linear_chain_crf and crf_decoding --- python/paddle/fluid/layers/nn.py | 207 ------------ .../tests/book/test_label_semantic_roles.py | 11 +- .../fluid/tests/unittests/CMakeLists.txt | 4 - .../unittests/test_directory_migration.py | 1 - .../unittests/test_parallel_executor_crf.py | 305 ------------------ python/paddle/static/nn/__init__.py | 2 - tools/parallel_UT_rule.py | 1 - tools/static_mode_white_list.py | 1 - 8 files changed, 2 insertions(+), 530 deletions(-) delete mode 100644 python/paddle/fluid/tests/unittests/test_parallel_executor_crf.py diff --git a/python/paddle/fluid/layers/nn.py b/python/paddle/fluid/layers/nn.py index 91d80387230..75b8c176147 100644 --- a/python/paddle/fluid/layers/nn.py +++ b/python/paddle/fluid/layers/nn.py @@ -65,8 +65,6 @@ from collections.abc import Iterable __all__ = [ 'fc', 'embedding', - 'linear_chain_crf', - 'crf_decoding', 'conv2d', 'dropout', 'split', @@ -752,211 +750,6 @@ def _pull_box_sparse( return outs -@templatedoc() -def linear_chain_crf(input, label, param_attr=None, length=None): - """ - :api_attr: Static Graph - - Linear Chain CRF. - - ${comment} - - Args: - input(${emission_type}): ${emission_comment} - label(${label_type}): ${label_comment} - Length(${length_type}): ${length_comment} - param_attr(ParamAttr): The attribute of the learnable parameter for transition parameter. - - Returns: - output(${emission_exps_type}): ${emission_exps_comment} \n - output(${transition_exps_type}): ${transition_exps_comment} \n - output(${log_likelihood_type}): ${log_likelihood_comment} \n - - Examples: - .. code-block:: python - - import paddle.fluid as fluid - import numpy as np - import paddle - paddle.enable_static() - - #define net structure, using LodTensor - train_program = fluid.Program() - startup_program = fluid.Program() - with fluid.program_guard(train_program, startup_program): - input_data = fluid.data(name='input_data', shape=[-1,10], dtype='float32') - label = fluid.data(name='label', shape=[-1,1], dtype='int') - emission= fluid.layers.fc(input=input_data, size=10, act="tanh") - crf_cost = fluid.layers.linear_chain_crf( - input=emission, - label=label, - param_attr=fluid.ParamAttr( - name='crfw', - learning_rate=0.01)) - use_cuda = False - place = fluid.CUDAPlace(0) if use_cuda else fluid.CPUPlace() - exe = fluid.Executor(place) - exe.run(startup_program) - #define data, using LoDTensor - a = fluid.create_lod_tensor(np.random.rand(12,10).astype('float32'), [[3,3,4,2]], place) - b = fluid.create_lod_tensor(np.array([[1],[1],[2],[3],[1],[1],[1],[3],[1],[1],[1],[1]]),[[3,3,4,2]] , place) - feed1 = {'input_data':a,'label':b} - loss= exe.run(train_program,feed=feed1, fetch_list=[crf_cost]) - print(loss) - - #define net structure, using padding - train_program = fluid.Program() - startup_program = fluid.Program() - with fluid.program_guard(train_program, startup_program): - input_data2 = fluid.data(name='input_data2', shape=[-1,10,10], dtype='float32') - label2 = fluid.data(name='label2', shape=[-1,10,1], dtype='int') - label_length = fluid.data(name='length', shape=[-1,1], dtype='int') - emission2= fluid.layers.fc(input=input_data2, size=10, act="tanh", num_flatten_dims=2) - crf_cost2 = fluid.layers.linear_chain_crf( - input=emission2, - label=label2, - length=label_length, - param_attr=fluid.ParamAttr( - name='crfw', - learning_rate=0.01)) - - use_cuda = False - place = fluid.CUDAPlace(0) if use_cuda else fluid.CPUPlace() - exe = fluid.Executor(place) - exe.run(startup_program) - - #define data, using padding - cc=np.random.rand(4,10,10).astype('float32') - dd=np.random.rand(4,10,1).astype('int64') - ll=np.array([[3],[3],[4],[2]]) - feed2 = {'input_data2':cc,'label2':dd,'length':ll} - loss2= exe.run(train_program,feed=feed2, fetch_list=[crf_cost2]) - print(loss2) - #[array([[ 7.8902354], - # [ 7.3602567], - # [ 10.004011], - # [ 5.86721 ]], dtype=float32)] - - #you can use find_var to get transition parameter. - transition=np.array(fluid.global_scope().find_var('crfw').get_tensor()) - print(transition) - - """ - check_variable_and_dtype( - input, 'input', ['float32', 'float64'], 'linear_chain_crf' - ) - check_variable_and_dtype(label, 'label', ['int64'], 'linear_chain_crf') - helper = LayerHelper('linear_chain_crf', **locals()) - size = input.shape[2] if length else input.shape[1] - transition = helper.create_parameter( - attr=helper.param_attr, - shape=[size + 2, size], - dtype=helper.input_dtype(), - ) - alpha = helper.create_variable_for_type_inference( - dtype=helper.input_dtype() - ) - emission_exps = helper.create_variable_for_type_inference( - dtype=helper.input_dtype() - ) - transition_exps = helper.create_variable_for_type_inference( - dtype=helper.input_dtype() - ) - log_likelihood = helper.create_variable_for_type_inference( - dtype=helper.input_dtype() - ) - this_inputs = { - "Emission": [input], - "Transition": transition, - "Label": [label], - } - if length: - this_inputs['Length'] = [length] - helper.append_op( - type='linear_chain_crf', - inputs=this_inputs, - outputs={ - "Alpha": [alpha], - "EmissionExps": [emission_exps], - "TransitionExps": transition_exps, - "LogLikelihood": log_likelihood, - }, - ) - - return log_likelihood - - -@templatedoc() -def crf_decoding(input, param_attr, label=None, length=None): - """ - :api_attr: Static Graph - - ${comment} - - Args: - input(Tensor): ${emission_comment} - - param_attr (ParamAttr|None): To specify the weight parameter attribute. - Default: None, which means the default weight parameter property is - used. See usage for details in :ref:`api_paddle_fluid_param_attr_ParamAttr` . - - label(${label_type}, optional): ${label_comment} - - length(${length_type}, optional): ${length_comment} - - Returns: - Tensor: ${viterbi_path_comment} - - Examples: - .. code-block:: python - - import paddle - paddle.enable_static() - - # LoDTensor-based example - num_labels = 10 - feature = paddle.static.data(name='word_emb', shape=[-1, 784], dtype='float32', lod_level=1) - label = paddle.static.data(name='label', shape=[-1, 1], dtype='int64', lod_level=1) - emission = paddle.static.nn.fc(feature, size=num_labels) - - crf_cost = paddle.fluid.layers.linear_chain_crf(input=emission, label=label, - param_attr=paddle.ParamAttr(name="crfw")) - crf_decode = paddle.static.nn.crf_decoding(input=emission, - param_attr=paddle.ParamAttr(name="crfw")) - - # Common tensor example - num_labels, max_len = 10, 20 - feature = paddle.static.data(name='word_emb_pad', shape=[-1, max_len, 784], dtype='float32') - label = paddle.static.data(name='label_pad', shape=[-1, max_len, 1], dtype='int64') - length = paddle.static.data(name='length', shape=[-1, 1], dtype='int64') - emission = paddle.static.nn.fc(feature, size=num_labels, - num_flatten_dims=2) - - crf_cost = paddle.fluid.layers.linear_chain_crf(input=emission, label=label, length=length, - param_attr=paddle.ParamAttr(name="crfw_pad")) - crf_decode = paddle.static.nn.crf_decoding(input=emission, length=length, - param_attr=paddle.ParamAttr(name="crfw_pad")) - """ - check_variable_and_dtype( - input, 'input', ['float32', 'float64'], 'crf_decoding' - ) - helper = LayerHelper('crf_decoding', **locals()) - transition = helper.get_parameter(param_attr.name) - viterbi_path = helper.create_variable_for_type_inference( - dtype=core.VarDesc.VarType.INT64 - ) - inputs = {"Emission": [input], "Transition": transition, "Label": label} - if length: - inputs['Length'] = length - helper.append_op( - type='crf_decoding', - inputs=inputs, - outputs={"ViterbiPath": [viterbi_path]}, - ) - - return viterbi_path - - @deprecated(since="2.0.0", update_to="paddle.nn.functional.dropout") def dropout( x, diff --git a/python/paddle/fluid/tests/book/test_label_semantic_roles.py b/python/paddle/fluid/tests/book/test_label_semantic_roles.py index 7ea9780b12d..102b4c5ec98 100644 --- a/python/paddle/fluid/tests/book/test_label_semantic_roles.py +++ b/python/paddle/fluid/tests/book/test_label_semantic_roles.py @@ -162,12 +162,8 @@ def train(use_cuda, save_dirname=None, is_local=True): target = fluid.layers.data( name='target', shape=[1], dtype='int64', lod_level=1 ) - crf_cost = fluid.layers.linear_chain_crf( - input=feature_out, - label=target, - param_attr=fluid.ParamAttr(name='crfw', learning_rate=mix_hidden_lr), - ) - avg_cost = paddle.mean(crf_cost) + cost = fluid.layers.softmax_with_cross_entropy(feature_out, target) + avg_cost = paddle.mean(cost) # TODO(qiao) # check other optimizers and check why out will be NAN @@ -183,9 +179,6 @@ def train(use_cuda, save_dirname=None, is_local=True): # TODO(qiao) # add dependency track and move this config before optimizer - crf_decode = fluid.layers.crf_decoding( - input=feature_out, param_attr=fluid.ParamAttr(name='crfw') - ) train_data = paddle.batch( paddle.reader.shuffle(paddle.dataset.conll05.test(), buf_size=8192), diff --git a/python/paddle/fluid/tests/unittests/CMakeLists.txt b/python/paddle/fluid/tests/unittests/CMakeLists.txt index 613f696f12a..de83eeb5361 100755 --- a/python/paddle/fluid/tests/unittests/CMakeLists.txt +++ b/python/paddle/fluid/tests/unittests/CMakeLists.txt @@ -422,7 +422,6 @@ endfunction() list(REMOVE_ITEM TEST_OPS test_feed_data_check_shape_type) list(REMOVE_ITEM TEST_OPS test_fetch_lod_tensor_array) list(REMOVE_ITEM TEST_OPS test_warpctc_op) -list(REMOVE_ITEM TEST_OPS test_parallel_executor_crf) list(REMOVE_ITEM TEST_OPS test_parallel_executor_profiler) list(REMOVE_ITEM TEST_OPS test_data_norm_op) list(REMOVE_ITEM TEST_OPS test_parallel_executor_fetch_feed) @@ -748,7 +747,6 @@ if(WITH_DISTRIBUTE) endif() endif() -py_test_modules(test_parallel_executor_crf MODULES test_parallel_executor_crf) # profiler will random hang in linux cuda 10.1 or 10.2 # see https://github.com/PaddlePaddle/Paddle/issues/29082 for details. # We guess there are some bugs in linux cuda 10.1 or 10.2, @@ -916,7 +914,6 @@ set_tests_properties( test_buffer_shared_memory_reuse_pass PROPERTIES LABELS "RUN_TYPE=DIST") set_tests_properties( - test_parallel_executor_crf test_sync_batch_norm_op test_inplace_abn_op test_parallel_executor_seresnext_base_gpu @@ -1053,7 +1050,6 @@ set_tests_properties(test_index_select_op PROPERTIES TIMEOUT 120) set_tests_properties(test_index_add_op PROPERTIES TIMEOUT 120) set_tests_properties(test_parallel_ssa_graph_inference_feed_partial_data PROPERTIES TIMEOUT 120) -set_tests_properties(test_parallel_executor_crf PROPERTIES TIMEOUT 120) set_tests_properties(test_tensordot PROPERTIES TIMEOUT 200) set_tests_properties(test_imperative_save_load PROPERTIES TIMEOUT 120) set_tests_properties(test_partial_eager_deletion_transformer PROPERTIES TIMEOUT diff --git a/python/paddle/fluid/tests/unittests/test_directory_migration.py b/python/paddle/fluid/tests/unittests/test_directory_migration.py index 6968c6041ba..34816d57038 100644 --- a/python/paddle/fluid/tests/unittests/test_directory_migration.py +++ b/python/paddle/fluid/tests/unittests/test_directory_migration.py @@ -91,7 +91,6 @@ class TestDirectory(unittest.TestCase): 'paddle.static.nn.conv3d', 'paddle.static.nn.conv3d_transpose', 'paddle.static.nn.create_parameter', - 'paddle.static.nn.crf_decoding', 'paddle.static.nn.data_norm', 'paddle.static.nn.deform_conv2d', 'paddle.static.nn.group_norm', diff --git a/python/paddle/fluid/tests/unittests/test_parallel_executor_crf.py b/python/paddle/fluid/tests/unittests/test_parallel_executor_crf.py deleted file mode 100644 index b49ebd663fc..00000000000 --- a/python/paddle/fluid/tests/unittests/test_parallel_executor_crf.py +++ /dev/null @@ -1,305 +0,0 @@ -# Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -import os -import unittest - -import paddle -import paddle.dataset.conll05 as conll05 -import paddle.fluid as fluid -import paddle.fluid.core as core -from paddle.fluid import compiler - -word_dict, verb_dict, label_dict = conll05.get_dict() -word_dict_len = len(word_dict) -label_dict_len = len(label_dict) -pred_dict_len = len(verb_dict) -mark_dict_len = 2 -word_dim = 32 -mark_dim = 5 -hidden_dim = 512 -depth = 8 -mix_hidden_lr = 1e-3 -embedding_name = 'emb' - - -def db_lstm( - word, - predicate, - ctx_n2, - ctx_n1, - ctx_0, - ctx_p1, - ctx_p2, - mark, - is_sparse, - **ignored -): - # 8 features - predicate_embedding = fluid.layers.embedding( - input=predicate, - is_sparse=is_sparse, - size=[pred_dict_len, word_dim], - dtype='float32', - param_attr='vemb', - ) - - mark_embedding = fluid.layers.embedding( - input=mark, - is_sparse=is_sparse, - size=[mark_dict_len, mark_dim], - dtype='float32', - ) - - word_input = [word, ctx_n2, ctx_n1, ctx_0, ctx_p1, ctx_p2] - emb_layers = [ - fluid.layers.embedding( - size=[word_dict_len, word_dim], - is_sparse=is_sparse, - input=x, - param_attr=fluid.ParamAttr(name=embedding_name, trainable=False), - ) - for x in word_input - ] - # TODO(zcd): if the parameter is not trainable, the - # parameter's gradient should not generated. - for emb_layer in emb_layers: - emb_layer.stop_gradient = True - - emb_layers.append(predicate_embedding) - emb_layers.append(mark_embedding) - - hidden_0_layers = [ - fluid.layers.fc(input=emb, size=hidden_dim, act='tanh') - for emb in emb_layers - ] - - hidden_0 = fluid.layers.sums(input=hidden_0_layers) - - lstm_0 = fluid.layers.dynamic_lstm( - input=hidden_0, - size=hidden_dim, - candidate_activation='relu', - gate_activation='sigmoid', - cell_activation='sigmoid', - ) - - # stack L-LSTM and R-LSTM with direct edges - input_tmp = [hidden_0, lstm_0] - - for i in range(1, depth): - mix_hidden = fluid.layers.sums( - input=[ - fluid.layers.fc( - input=input_tmp[0], size=hidden_dim, act='tanh' - ), - fluid.layers.fc( - input=input_tmp[1], size=hidden_dim, act='tanh' - ), - ] - ) - - lstm = fluid.layers.dynamic_lstm( - input=mix_hidden, - size=hidden_dim, - candidate_activation='relu', - gate_activation='sigmoid', - cell_activation='sigmoid', - is_reverse=((i % 2) == 1), - ) - - input_tmp = [mix_hidden, lstm] - - feature_out = fluid.layers.sums( - input=[ - fluid.layers.fc( - input=input_tmp[0], size=label_dict_len, act='tanh' - ), - fluid.layers.fc( - input=input_tmp[1], size=label_dict_len, act='tanh' - ), - ] - ) - - return feature_out - - -class TestCRFModel(unittest.TestCase): - def check_network_convergence( - self, is_sparse, build_strategy=None, use_cuda=True - ): - os.environ['CPU_NUM'] = str(4) - main = fluid.Program() - startup = fluid.Program() - scope = fluid.Scope() - with fluid.scope_guard(scope): - with fluid.program_guard(main, startup): - word = fluid.layers.data( - name='word_data', shape=[1], dtype='int64', lod_level=1 - ) - predicate = fluid.layers.data( - name='verb_data', shape=[1], dtype='int64', lod_level=1 - ) - ctx_n2 = fluid.layers.data( - name='ctx_n2_data', shape=[1], dtype='int64', lod_level=1 - ) - ctx_n1 = fluid.layers.data( - name='ctx_n1_data', shape=[1], dtype='int64', lod_level=1 - ) - ctx_0 = fluid.layers.data( - name='ctx_0_data', shape=[1], dtype='int64', lod_level=1 - ) - ctx_p1 = fluid.layers.data( - name='ctx_p1_data', shape=[1], dtype='int64', lod_level=1 - ) - ctx_p2 = fluid.layers.data( - name='ctx_p2_data', shape=[1], dtype='int64', lod_level=1 - ) - mark = fluid.layers.data( - name='mark_data', shape=[1], dtype='int64', lod_level=1 - ) - - feature_out = db_lstm(**locals()) - target = fluid.layers.data( - name='target', shape=[1], dtype='int64', lod_level=1 - ) - crf_cost = fluid.layers.linear_chain_crf( - input=feature_out, - label=target, - param_attr=fluid.ParamAttr(name='crfw', learning_rate=1e-1), - ) - avg_cost = paddle.mean(crf_cost) - - sgd_optimizer = fluid.optimizer.SGD( - learning_rate=fluid.layers.exponential_decay( - learning_rate=0.01, - decay_steps=100000, - decay_rate=0.5, - staircase=True, - ) - ) - sgd_optimizer.minimize(avg_cost) - - train_data = paddle.batch( - paddle.reader.shuffle( - paddle.dataset.conll05.test(), buf_size=8192 - ), - batch_size=8, - ) - - place = fluid.CUDAPlace(0) if use_cuda else fluid.CPUPlace() - exe = fluid.Executor(place) - exe.run(startup) - - train_cp = compiler.CompiledProgram(main).with_data_parallel( - loss_name=avg_cost.name, build_strategy=build_strategy - ) - - feeder = fluid.DataFeeder( - feed_list=[ - word, - ctx_n2, - ctx_n1, - ctx_0, - ctx_p1, - ctx_p2, - predicate, - mark, - target, - ], - place=fluid.CPUPlace(), - ) - - data = train_data() - for i in range(4): - cur_batch = next(data) - print( - exe.run( - train_cp, - feed=feeder.feed(cur_batch), - fetch_list=[avg_cost.name], - )[0] - ) - - def _new_build_strategy(self, use_reduce=False): - build_strategy = fluid.BuildStrategy() - - if use_reduce: - build_strategy.reduce_strategy = ( - fluid.BuildStrategy.ReduceStrategy.Reduce - ) - else: - build_strategy.reduce_strategy = ( - fluid.BuildStrategy.ReduceStrategy.AllReduce - ) - - return build_strategy - - def test_update_sparse_parameter_all_reduce(self): - if core.is_compiled_with_cuda(): - self.check_network_convergence( - is_sparse=True, - build_strategy=self._new_build_strategy(), - use_cuda=True, - ) - - self.check_network_convergence( - is_sparse=True, - build_strategy=self._new_build_strategy(), - use_cuda=False, - ) - - def test_update_dense_parameter_all_reduce(self): - if core.is_compiled_with_cuda(): - self.check_network_convergence( - is_sparse=False, - build_strategy=self._new_build_strategy(), - use_cuda=True, - ) - - self.check_network_convergence( - is_sparse=False, - build_strategy=self._new_build_strategy(), - use_cuda=False, - ) - - def test_update_sparse_parameter_reduce(self): - if core.is_compiled_with_cuda(): - self.check_network_convergence( - is_sparse=True, - build_strategy=self._new_build_strategy(use_reduce=True), - use_cuda=True, - ) - self.check_network_convergence( - is_sparse=True, - build_strategy=self._new_build_strategy(use_reduce=True), - use_cuda=False, - ) - - def test_update_dense_parameter_reduce(self): - if core.is_compiled_with_cuda(): - self.check_network_convergence( - is_sparse=False, - build_strategy=self._new_build_strategy(use_reduce=True), - use_cuda=True, - ) - self.check_network_convergence( - is_sparse=False, - build_strategy=self._new_build_strategy(use_reduce=True), - use_cuda=False, - ) - - -if __name__ == '__main__': - unittest.main() diff --git a/python/paddle/static/nn/__init__.py b/python/paddle/static/nn/__init__.py index 616c203dc47..37594c937e5 100755 --- a/python/paddle/static/nn/__init__.py +++ b/python/paddle/static/nn/__init__.py @@ -31,7 +31,6 @@ from .common import bilinear_tensor_product # noqa: F401 from .common import py_func # noqa: F401 from ...tensor.creation import create_parameter # noqa: F401 from ...fluid.layers import conv2d # noqa: F401 -from ...fluid.layers import crf_decoding # noqa: F401 from ...fluid.layers import layer_norm # noqa: F401 from ...fluid.layers import multi_box_head # noqa: F401 from .loss import nce # noqa: F401 @@ -72,7 +71,6 @@ __all__ = [ # noqa 'conv2d_transpose', 'conv3d', 'conv3d_transpose', - 'crf_decoding', 'data_norm', 'deform_conv2d', 'group_norm', diff --git a/tools/parallel_UT_rule.py b/tools/parallel_UT_rule.py index 4e552bebb75..853191400b8 100755 --- a/tools/parallel_UT_rule.py +++ b/tools/parallel_UT_rule.py @@ -1572,7 +1572,6 @@ FOURTH_HIGH_PARALLEL_JOB_NEW = [ FIFTH_PARALLEL_JOB_NEW = [ 'test_buffer_shared_memory_reuse_pass', 'test_buffer_shared_memory_reuse_pass_and_fuse_optimization_op_pass', - 'test_parallel_executor_crf', 'test_multiprocess_reader_exception', 'buddy_allocator_test', 'test_multiprocess_dataloader_dataset', diff --git a/tools/static_mode_white_list.py b/tools/static_mode_white_list.py index 84ed3a253ca..94865fb538b 100755 --- a/tools/static_mode_white_list.py +++ b/tools/static_mode_white_list.py @@ -543,7 +543,6 @@ STATIC_MODE_TESTING_LIST = [ 'test_transpiler_ops', 'test_communicator_sync', 'test_collective_optimizer', - 'test_parallel_executor_crf', 'test_parallel_executor_profiler', 'test_parallel_executor_transformer', 'test_parallel_executor_transformer_auto_growth', -- GitLab