未验证 提交 aaee07a3 编写于 作者: C ccrrong 提交者: GitHub

remove linear_chain_crf and crf_decoding from fluid (#48996)

* remove linear_chain_crf and crf_decoding
上级 265a54aa
......@@ -65,8 +65,6 @@ from collections.abc import Iterable
__all__ = [
'fc',
'embedding',
'linear_chain_crf',
'crf_decoding',
'conv2d',
'dropout',
'split',
......@@ -752,211 +750,6 @@ def _pull_box_sparse(
return outs
@templatedoc()
def linear_chain_crf(input, label, param_attr=None, length=None):
"""
:api_attr: Static Graph
Linear Chain CRF.
${comment}
Args:
input(${emission_type}): ${emission_comment}
label(${label_type}): ${label_comment}
Length(${length_type}): ${length_comment}
param_attr(ParamAttr): The attribute of the learnable parameter for transition parameter.
Returns:
output(${emission_exps_type}): ${emission_exps_comment} \n
output(${transition_exps_type}): ${transition_exps_comment} \n
output(${log_likelihood_type}): ${log_likelihood_comment} \n
Examples:
.. code-block:: python
import paddle.fluid as fluid
import numpy as np
import paddle
paddle.enable_static()
#define net structure, using LodTensor
train_program = fluid.Program()
startup_program = fluid.Program()
with fluid.program_guard(train_program, startup_program):
input_data = fluid.data(name='input_data', shape=[-1,10], dtype='float32')
label = fluid.data(name='label', shape=[-1,1], dtype='int')
emission= fluid.layers.fc(input=input_data, size=10, act="tanh")
crf_cost = fluid.layers.linear_chain_crf(
input=emission,
label=label,
param_attr=fluid.ParamAttr(
name='crfw',
learning_rate=0.01))
use_cuda = False
place = fluid.CUDAPlace(0) if use_cuda else fluid.CPUPlace()
exe = fluid.Executor(place)
exe.run(startup_program)
#define data, using LoDTensor
a = fluid.create_lod_tensor(np.random.rand(12,10).astype('float32'), [[3,3,4,2]], place)
b = fluid.create_lod_tensor(np.array([[1],[1],[2],[3],[1],[1],[1],[3],[1],[1],[1],[1]]),[[3,3,4,2]] , place)
feed1 = {'input_data':a,'label':b}
loss= exe.run(train_program,feed=feed1, fetch_list=[crf_cost])
print(loss)
#define net structure, using padding
train_program = fluid.Program()
startup_program = fluid.Program()
with fluid.program_guard(train_program, startup_program):
input_data2 = fluid.data(name='input_data2', shape=[-1,10,10], dtype='float32')
label2 = fluid.data(name='label2', shape=[-1,10,1], dtype='int')
label_length = fluid.data(name='length', shape=[-1,1], dtype='int')
emission2= fluid.layers.fc(input=input_data2, size=10, act="tanh", num_flatten_dims=2)
crf_cost2 = fluid.layers.linear_chain_crf(
input=emission2,
label=label2,
length=label_length,
param_attr=fluid.ParamAttr(
name='crfw',
learning_rate=0.01))
use_cuda = False
place = fluid.CUDAPlace(0) if use_cuda else fluid.CPUPlace()
exe = fluid.Executor(place)
exe.run(startup_program)
#define data, using padding
cc=np.random.rand(4,10,10).astype('float32')
dd=np.random.rand(4,10,1).astype('int64')
ll=np.array([[3],[3],[4],[2]])
feed2 = {'input_data2':cc,'label2':dd,'length':ll}
loss2= exe.run(train_program,feed=feed2, fetch_list=[crf_cost2])
print(loss2)
#[array([[ 7.8902354],
# [ 7.3602567],
# [ 10.004011],
# [ 5.86721 ]], dtype=float32)]
#you can use find_var to get transition parameter.
transition=np.array(fluid.global_scope().find_var('crfw').get_tensor())
print(transition)
"""
check_variable_and_dtype(
input, 'input', ['float32', 'float64'], 'linear_chain_crf'
)
check_variable_and_dtype(label, 'label', ['int64'], 'linear_chain_crf')
helper = LayerHelper('linear_chain_crf', **locals())
size = input.shape[2] if length else input.shape[1]
transition = helper.create_parameter(
attr=helper.param_attr,
shape=[size + 2, size],
dtype=helper.input_dtype(),
)
alpha = helper.create_variable_for_type_inference(
dtype=helper.input_dtype()
)
emission_exps = helper.create_variable_for_type_inference(
dtype=helper.input_dtype()
)
transition_exps = helper.create_variable_for_type_inference(
dtype=helper.input_dtype()
)
log_likelihood = helper.create_variable_for_type_inference(
dtype=helper.input_dtype()
)
this_inputs = {
"Emission": [input],
"Transition": transition,
"Label": [label],
}
if length:
this_inputs['Length'] = [length]
helper.append_op(
type='linear_chain_crf',
inputs=this_inputs,
outputs={
"Alpha": [alpha],
"EmissionExps": [emission_exps],
"TransitionExps": transition_exps,
"LogLikelihood": log_likelihood,
},
)
return log_likelihood
@templatedoc()
def crf_decoding(input, param_attr, label=None, length=None):
"""
:api_attr: Static Graph
${comment}
Args:
input(Tensor): ${emission_comment}
param_attr (ParamAttr|None): To specify the weight parameter attribute.
Default: None, which means the default weight parameter property is
used. See usage for details in :ref:`api_paddle_fluid_param_attr_ParamAttr` .
label(${label_type}, optional): ${label_comment}
length(${length_type}, optional): ${length_comment}
Returns:
Tensor: ${viterbi_path_comment}
Examples:
.. code-block:: python
import paddle
paddle.enable_static()
# LoDTensor-based example
num_labels = 10
feature = paddle.static.data(name='word_emb', shape=[-1, 784], dtype='float32', lod_level=1)
label = paddle.static.data(name='label', shape=[-1, 1], dtype='int64', lod_level=1)
emission = paddle.static.nn.fc(feature, size=num_labels)
crf_cost = paddle.fluid.layers.linear_chain_crf(input=emission, label=label,
param_attr=paddle.ParamAttr(name="crfw"))
crf_decode = paddle.static.nn.crf_decoding(input=emission,
param_attr=paddle.ParamAttr(name="crfw"))
# Common tensor example
num_labels, max_len = 10, 20
feature = paddle.static.data(name='word_emb_pad', shape=[-1, max_len, 784], dtype='float32')
label = paddle.static.data(name='label_pad', shape=[-1, max_len, 1], dtype='int64')
length = paddle.static.data(name='length', shape=[-1, 1], dtype='int64')
emission = paddle.static.nn.fc(feature, size=num_labels,
num_flatten_dims=2)
crf_cost = paddle.fluid.layers.linear_chain_crf(input=emission, label=label, length=length,
param_attr=paddle.ParamAttr(name="crfw_pad"))
crf_decode = paddle.static.nn.crf_decoding(input=emission, length=length,
param_attr=paddle.ParamAttr(name="crfw_pad"))
"""
check_variable_and_dtype(
input, 'input', ['float32', 'float64'], 'crf_decoding'
)
helper = LayerHelper('crf_decoding', **locals())
transition = helper.get_parameter(param_attr.name)
viterbi_path = helper.create_variable_for_type_inference(
dtype=core.VarDesc.VarType.INT64
)
inputs = {"Emission": [input], "Transition": transition, "Label": label}
if length:
inputs['Length'] = length
helper.append_op(
type='crf_decoding',
inputs=inputs,
outputs={"ViterbiPath": [viterbi_path]},
)
return viterbi_path
@deprecated(since="2.0.0", update_to="paddle.nn.functional.dropout")
def dropout(
x,
......
......@@ -162,12 +162,8 @@ def train(use_cuda, save_dirname=None, is_local=True):
target = fluid.layers.data(
name='target', shape=[1], dtype='int64', lod_level=1
)
crf_cost = fluid.layers.linear_chain_crf(
input=feature_out,
label=target,
param_attr=fluid.ParamAttr(name='crfw', learning_rate=mix_hidden_lr),
)
avg_cost = paddle.mean(crf_cost)
cost = fluid.layers.softmax_with_cross_entropy(feature_out, target)
avg_cost = paddle.mean(cost)
# TODO(qiao)
# check other optimizers and check why out will be NAN
......@@ -183,9 +179,6 @@ def train(use_cuda, save_dirname=None, is_local=True):
# TODO(qiao)
# add dependency track and move this config before optimizer
crf_decode = fluid.layers.crf_decoding(
input=feature_out, param_attr=fluid.ParamAttr(name='crfw')
)
train_data = paddle.batch(
paddle.reader.shuffle(paddle.dataset.conll05.test(), buf_size=8192),
......
......@@ -422,7 +422,6 @@ endfunction()
list(REMOVE_ITEM TEST_OPS test_feed_data_check_shape_type)
list(REMOVE_ITEM TEST_OPS test_fetch_lod_tensor_array)
list(REMOVE_ITEM TEST_OPS test_warpctc_op)
list(REMOVE_ITEM TEST_OPS test_parallel_executor_crf)
list(REMOVE_ITEM TEST_OPS test_parallel_executor_profiler)
list(REMOVE_ITEM TEST_OPS test_data_norm_op)
list(REMOVE_ITEM TEST_OPS test_parallel_executor_fetch_feed)
......@@ -748,7 +747,6 @@ if(WITH_DISTRIBUTE)
endif()
endif()
py_test_modules(test_parallel_executor_crf MODULES test_parallel_executor_crf)
# profiler will random hang in linux cuda 10.1 or 10.2
# see https://github.com/PaddlePaddle/Paddle/issues/29082 for details.
# We guess there are some bugs in linux cuda 10.1 or 10.2,
......@@ -916,7 +914,6 @@ set_tests_properties(
test_buffer_shared_memory_reuse_pass
PROPERTIES LABELS "RUN_TYPE=DIST")
set_tests_properties(
test_parallel_executor_crf
test_sync_batch_norm_op
test_inplace_abn_op
test_parallel_executor_seresnext_base_gpu
......@@ -1053,7 +1050,6 @@ set_tests_properties(test_index_select_op PROPERTIES TIMEOUT 120)
set_tests_properties(test_index_add_op PROPERTIES TIMEOUT 120)
set_tests_properties(test_parallel_ssa_graph_inference_feed_partial_data
PROPERTIES TIMEOUT 120)
set_tests_properties(test_parallel_executor_crf PROPERTIES TIMEOUT 120)
set_tests_properties(test_tensordot PROPERTIES TIMEOUT 200)
set_tests_properties(test_imperative_save_load PROPERTIES TIMEOUT 120)
set_tests_properties(test_partial_eager_deletion_transformer PROPERTIES TIMEOUT
......
......@@ -91,7 +91,6 @@ class TestDirectory(unittest.TestCase):
'paddle.static.nn.conv3d',
'paddle.static.nn.conv3d_transpose',
'paddle.static.nn.create_parameter',
'paddle.static.nn.crf_decoding',
'paddle.static.nn.data_norm',
'paddle.static.nn.deform_conv2d',
'paddle.static.nn.group_norm',
......
# Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
import os
import unittest
import paddle
import paddle.dataset.conll05 as conll05
import paddle.fluid as fluid
import paddle.fluid.core as core
from paddle.fluid import compiler
word_dict, verb_dict, label_dict = conll05.get_dict()
word_dict_len = len(word_dict)
label_dict_len = len(label_dict)
pred_dict_len = len(verb_dict)
mark_dict_len = 2
word_dim = 32
mark_dim = 5
hidden_dim = 512
depth = 8
mix_hidden_lr = 1e-3
embedding_name = 'emb'
def db_lstm(
word,
predicate,
ctx_n2,
ctx_n1,
ctx_0,
ctx_p1,
ctx_p2,
mark,
is_sparse,
**ignored
):
# 8 features
predicate_embedding = fluid.layers.embedding(
input=predicate,
is_sparse=is_sparse,
size=[pred_dict_len, word_dim],
dtype='float32',
param_attr='vemb',
)
mark_embedding = fluid.layers.embedding(
input=mark,
is_sparse=is_sparse,
size=[mark_dict_len, mark_dim],
dtype='float32',
)
word_input = [word, ctx_n2, ctx_n1, ctx_0, ctx_p1, ctx_p2]
emb_layers = [
fluid.layers.embedding(
size=[word_dict_len, word_dim],
is_sparse=is_sparse,
input=x,
param_attr=fluid.ParamAttr(name=embedding_name, trainable=False),
)
for x in word_input
]
# TODO(zcd): if the parameter is not trainable, the
# parameter's gradient should not generated.
for emb_layer in emb_layers:
emb_layer.stop_gradient = True
emb_layers.append(predicate_embedding)
emb_layers.append(mark_embedding)
hidden_0_layers = [
fluid.layers.fc(input=emb, size=hidden_dim, act='tanh')
for emb in emb_layers
]
hidden_0 = fluid.layers.sums(input=hidden_0_layers)
lstm_0 = fluid.layers.dynamic_lstm(
input=hidden_0,
size=hidden_dim,
candidate_activation='relu',
gate_activation='sigmoid',
cell_activation='sigmoid',
)
# stack L-LSTM and R-LSTM with direct edges
input_tmp = [hidden_0, lstm_0]
for i in range(1, depth):
mix_hidden = fluid.layers.sums(
input=[
fluid.layers.fc(
input=input_tmp[0], size=hidden_dim, act='tanh'
),
fluid.layers.fc(
input=input_tmp[1], size=hidden_dim, act='tanh'
),
]
)
lstm = fluid.layers.dynamic_lstm(
input=mix_hidden,
size=hidden_dim,
candidate_activation='relu',
gate_activation='sigmoid',
cell_activation='sigmoid',
is_reverse=((i % 2) == 1),
)
input_tmp = [mix_hidden, lstm]
feature_out = fluid.layers.sums(
input=[
fluid.layers.fc(
input=input_tmp[0], size=label_dict_len, act='tanh'
),
fluid.layers.fc(
input=input_tmp[1], size=label_dict_len, act='tanh'
),
]
)
return feature_out
class TestCRFModel(unittest.TestCase):
def check_network_convergence(
self, is_sparse, build_strategy=None, use_cuda=True
):
os.environ['CPU_NUM'] = str(4)
main = fluid.Program()
startup = fluid.Program()
scope = fluid.Scope()
with fluid.scope_guard(scope):
with fluid.program_guard(main, startup):
word = fluid.layers.data(
name='word_data', shape=[1], dtype='int64', lod_level=1
)
predicate = fluid.layers.data(
name='verb_data', shape=[1], dtype='int64', lod_level=1
)
ctx_n2 = fluid.layers.data(
name='ctx_n2_data', shape=[1], dtype='int64', lod_level=1
)
ctx_n1 = fluid.layers.data(
name='ctx_n1_data', shape=[1], dtype='int64', lod_level=1
)
ctx_0 = fluid.layers.data(
name='ctx_0_data', shape=[1], dtype='int64', lod_level=1
)
ctx_p1 = fluid.layers.data(
name='ctx_p1_data', shape=[1], dtype='int64', lod_level=1
)
ctx_p2 = fluid.layers.data(
name='ctx_p2_data', shape=[1], dtype='int64', lod_level=1
)
mark = fluid.layers.data(
name='mark_data', shape=[1], dtype='int64', lod_level=1
)
feature_out = db_lstm(**locals())
target = fluid.layers.data(
name='target', shape=[1], dtype='int64', lod_level=1
)
crf_cost = fluid.layers.linear_chain_crf(
input=feature_out,
label=target,
param_attr=fluid.ParamAttr(name='crfw', learning_rate=1e-1),
)
avg_cost = paddle.mean(crf_cost)
sgd_optimizer = fluid.optimizer.SGD(
learning_rate=fluid.layers.exponential_decay(
learning_rate=0.01,
decay_steps=100000,
decay_rate=0.5,
staircase=True,
)
)
sgd_optimizer.minimize(avg_cost)
train_data = paddle.batch(
paddle.reader.shuffle(
paddle.dataset.conll05.test(), buf_size=8192
),
batch_size=8,
)
place = fluid.CUDAPlace(0) if use_cuda else fluid.CPUPlace()
exe = fluid.Executor(place)
exe.run(startup)
train_cp = compiler.CompiledProgram(main).with_data_parallel(
loss_name=avg_cost.name, build_strategy=build_strategy
)
feeder = fluid.DataFeeder(
feed_list=[
word,
ctx_n2,
ctx_n1,
ctx_0,
ctx_p1,
ctx_p2,
predicate,
mark,
target,
],
place=fluid.CPUPlace(),
)
data = train_data()
for i in range(4):
cur_batch = next(data)
print(
exe.run(
train_cp,
feed=feeder.feed(cur_batch),
fetch_list=[avg_cost.name],
)[0]
)
def _new_build_strategy(self, use_reduce=False):
build_strategy = fluid.BuildStrategy()
if use_reduce:
build_strategy.reduce_strategy = (
fluid.BuildStrategy.ReduceStrategy.Reduce
)
else:
build_strategy.reduce_strategy = (
fluid.BuildStrategy.ReduceStrategy.AllReduce
)
return build_strategy
def test_update_sparse_parameter_all_reduce(self):
if core.is_compiled_with_cuda():
self.check_network_convergence(
is_sparse=True,
build_strategy=self._new_build_strategy(),
use_cuda=True,
)
self.check_network_convergence(
is_sparse=True,
build_strategy=self._new_build_strategy(),
use_cuda=False,
)
def test_update_dense_parameter_all_reduce(self):
if core.is_compiled_with_cuda():
self.check_network_convergence(
is_sparse=False,
build_strategy=self._new_build_strategy(),
use_cuda=True,
)
self.check_network_convergence(
is_sparse=False,
build_strategy=self._new_build_strategy(),
use_cuda=False,
)
def test_update_sparse_parameter_reduce(self):
if core.is_compiled_with_cuda():
self.check_network_convergence(
is_sparse=True,
build_strategy=self._new_build_strategy(use_reduce=True),
use_cuda=True,
)
self.check_network_convergence(
is_sparse=True,
build_strategy=self._new_build_strategy(use_reduce=True),
use_cuda=False,
)
def test_update_dense_parameter_reduce(self):
if core.is_compiled_with_cuda():
self.check_network_convergence(
is_sparse=False,
build_strategy=self._new_build_strategy(use_reduce=True),
use_cuda=True,
)
self.check_network_convergence(
is_sparse=False,
build_strategy=self._new_build_strategy(use_reduce=True),
use_cuda=False,
)
if __name__ == '__main__':
unittest.main()
......@@ -31,7 +31,6 @@ from .common import bilinear_tensor_product # noqa: F401
from .common import py_func # noqa: F401
from ...tensor.creation import create_parameter # noqa: F401
from ...fluid.layers import conv2d # noqa: F401
from ...fluid.layers import crf_decoding # noqa: F401
from ...fluid.layers import layer_norm # noqa: F401
from ...fluid.layers import multi_box_head # noqa: F401
from .loss import nce # noqa: F401
......@@ -72,7 +71,6 @@ __all__ = [ # noqa
'conv2d_transpose',
'conv3d',
'conv3d_transpose',
'crf_decoding',
'data_norm',
'deform_conv2d',
'group_norm',
......
......@@ -1572,7 +1572,6 @@ FOURTH_HIGH_PARALLEL_JOB_NEW = [
FIFTH_PARALLEL_JOB_NEW = [
'test_buffer_shared_memory_reuse_pass',
'test_buffer_shared_memory_reuse_pass_and_fuse_optimization_op_pass',
'test_parallel_executor_crf',
'test_multiprocess_reader_exception',
'buddy_allocator_test',
'test_multiprocess_dataloader_dataset',
......
......@@ -543,7 +543,6 @@ STATIC_MODE_TESTING_LIST = [
'test_transpiler_ops',
'test_communicator_sync',
'test_collective_optimizer',
'test_parallel_executor_crf',
'test_parallel_executor_profiler',
'test_parallel_executor_transformer',
'test_parallel_executor_transformer_auto_growth',
......
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册