未验证 提交 b5ab8979 编写于 作者: L liu zhengxi 提交者: GitHub

[remove fluid] Remove fluid APIs (#48641)

上级 b01f979b
此差异已折叠。
...@@ -1719,161 +1719,6 @@ def wrap_decoder( ...@@ -1719,161 +1719,6 @@ def wrap_decoder(
return predict return predict
def fast_decode(
src_vocab_size,
trg_vocab_size,
max_in_len,
n_layer,
n_head,
d_key,
d_value,
d_model,
d_inner_hid,
dropout_rate,
weight_sharing,
beam_size,
max_out_len,
eos_idx,
):
"""
Use beam search to decode. Caches will be used to store states of history
steps which can make the decoding faster.
"""
enc_output = wrap_encoder(
src_vocab_size,
max_in_len,
n_layer,
n_head,
d_key,
d_value,
d_model,
d_inner_hid,
dropout_rate,
weight_sharing,
)
start_tokens, init_scores, trg_src_attn_bias = make_all_inputs(
fast_decoder_data_input_fields
)
def beam_search():
max_len = layers.fill_constant(
shape=[1], dtype=start_tokens.dtype, value=max_out_len
)
step_idx = layers.fill_constant(
shape=[1], dtype=start_tokens.dtype, value=0
)
cond = paddle.less_than(x=step_idx, y=max_len)
while_op = paddle.static.nn.control_flow.While(cond)
# array states will be stored for each step.
ids = layers.array_write(
paddle.reshape(start_tokens, (-1, 1)), step_idx
)
scores = layers.array_write(init_scores, step_idx)
# cell states will be overwrited at each step.
# caches contains states of history steps to reduce redundant
# computation in decoder.
caches = [
{
"k": layers.fill_constant_batch_size_like(
input=start_tokens,
shape=[-1, 0, d_model],
dtype=enc_output.dtype,
value=0,
),
"v": layers.fill_constant_batch_size_like(
input=start_tokens,
shape=[-1, 0, d_model],
dtype=enc_output.dtype,
value=0,
),
}
for i in range(n_layer)
]
with while_op.block():
pre_ids = layers.array_read(array=ids, i=step_idx)
pre_ids = paddle.reshape(pre_ids, (-1, 1, 1))
pre_scores = layers.array_read(array=scores, i=step_idx)
# sequence_expand can gather sequences according to lod thus can be
# used in beam search to sift states corresponding to selected ids.
pre_src_attn_bias = layers.sequence_expand(
x=trg_src_attn_bias, y=pre_scores
)
pre_enc_output = layers.sequence_expand(x=enc_output, y=pre_scores)
pre_caches = [
{
"k": layers.sequence_expand(x=cache["k"], y=pre_scores),
"v": layers.sequence_expand(x=cache["v"], y=pre_scores),
}
for cache in caches
]
pre_pos = layers.elementwise_mul(
x=layers.fill_constant_batch_size_like(
input=pre_enc_output, # can't use pre_ids here since it has lod
value=1,
shape=[-1, 1, 1],
dtype=pre_ids.dtype,
),
y=layers.increment(x=step_idx, value=1.0, in_place=False),
axis=0,
)
logits = wrap_decoder(
trg_vocab_size,
max_in_len,
n_layer,
n_head,
d_key,
d_value,
d_model,
d_inner_hid,
dropout_rate,
weight_sharing,
dec_inputs=(pre_ids, pre_pos, None, pre_src_attn_bias),
enc_output=pre_enc_output,
caches=pre_caches,
)
logits = paddle.reshape(logits, (-1, trg_vocab_size))
topk_scores, topk_indices = paddle.topk(
x=paddle.nn.functional.softmax(logits), k=beam_size
)
accu_scores = layers.elementwise_add(
x=paddle.log(topk_scores),
y=paddle.reshape(pre_scores, shape=[-1]),
axis=0,
)
# beam_search op uses lod to distinguish branches.
topk_indices = layers.lod_reset(topk_indices, pre_ids)
selected_ids, selected_scores = layers.beam_search(
pre_ids=pre_ids,
pre_scores=pre_scores,
ids=topk_indices,
scores=accu_scores,
beam_size=beam_size,
end_id=eos_idx,
)
layers.increment(x=step_idx, value=1.0, in_place=True)
# update states
layers.array_write(selected_ids, i=step_idx, array=ids)
layers.array_write(selected_scores, i=step_idx, array=scores)
layers.assign(pre_src_attn_bias, trg_src_attn_bias)
layers.assign(pre_enc_output, enc_output)
for i in range(n_layer):
layers.assign(pre_caches[i]["k"], caches[i]["k"])
layers.assign(pre_caches[i]["v"], caches[i]["v"])
length_cond = paddle.less_than(x=step_idx, y=max_len)
finish_cond = paddle.logical_not(layers.is_empty(x=selected_ids))
paddle.logical_and(x=length_cond, y=finish_cond, out=cond)
finished_ids, finished_scores = layers.beam_search_decode(
ids, scores, beam_size=beam_size, end_id=eos_idx
)
return finished_ids, finished_scores
finished_ids, finished_scores = beam_search()
return finished_ids, finished_scores
def get_model(is_dist, is_async): def get_model(is_dist, is_async):
sum_cost, avg_cost, predict, token_num = transformer( sum_cost, avg_cost, predict, token_num = transformer(
ModelHyperParams.src_vocab_size, ModelHyperParams.src_vocab_size,
......
...@@ -16,10 +16,7 @@ import unittest ...@@ -16,10 +16,7 @@ import unittest
import numpy as np import numpy as np
import paddle
import paddle.fluid as fluid
import paddle.fluid.core as core import paddle.fluid.core as core
from paddle.fluid.framework import Program, program_guard
from paddle.fluid.op import Operator from paddle.fluid.op import Operator
...@@ -118,50 +115,5 @@ class TestBeamSearchDecodeOpGPU(TestBeamSearchDecodeOp): ...@@ -118,50 +115,5 @@ class TestBeamSearchDecodeOpGPU(TestBeamSearchDecodeOp):
self.place = core.CUDAPlace(0) self.place = core.CUDAPlace(0)
class TestBeamSearchDecodeOpError(unittest.TestCase):
def test_errors(self):
with program_guard(Program(), Program()):
def test_id_Variable():
# the input pre_ids must be Variable
test_ids = np.random.randint(1, 5, [5, 1]).astype("int64")
scores = paddle.tensor.create_array(dtype='float32')
fluid.layers.beam_search_decode(
test_ids, scores, beam_size=5, end_id=0
)
self.assertRaises(TypeError, test_id_Variable)
def test_score_Variable():
# the input pre_scores must be Variable
ids = paddle.tensor.create_array(dtype='int64')
test_scores = np.random.uniform(1, 5, [5, 1]).astype("float32")
fluid.layers.beam_search_decode(
ids, test_scores, beam_size=5, end_id=0
)
self.assertRaises(TypeError, test_score_Variable)
def test_id_dtype():
# the dtype of input pre_ids must be int64
type_ids = paddle.tensor.create_array(dtype='float32')
scores = paddle.tensor.create_array(dtype='float32')
fluid.layers.beam_search_decode(
type_ids, scores, beam_size=5, end_id=0
)
self.assertRaises(TypeError, test_id_dtype)
def test_score_dtype():
# the dtype of input pre_scores must be float32
ids = paddle.tensor.create_array(dtype='int64')
type_scores = paddle.tensor.create_array(dtype='int64')
fluid.layers.beam_search_decode(
ids, type_scores, beam_size=5, end_id=0
)
self.assertRaises(TypeError, test_score_dtype)
if __name__ == '__main__': if __name__ == '__main__':
unittest.main() unittest.main()
...@@ -16,10 +16,7 @@ import unittest ...@@ -16,10 +16,7 @@ import unittest
import numpy as np import numpy as np
import paddle
import paddle.fluid as fluid
import paddle.fluid.core as core import paddle.fluid.core as core
from paddle.fluid.framework import Program, program_guard
from paddle.fluid.op import Operator from paddle.fluid.op import Operator
...@@ -302,119 +299,5 @@ class BeamSearchOpTester6(BeamSearchOpTester): ...@@ -302,119 +299,5 @@ class BeamSearchOpTester6(BeamSearchOpTester):
self.output_parent_idx = np.array([0, 1, 2, 3]) self.output_parent_idx = np.array([0, 1, 2, 3])
class TestBeamSearchOpError(unittest.TestCase):
def test_errors(self):
with program_guard(Program(), Program()):
pre_ids = fluid.data(
name='pre_id', shape=[1], lod_level=2, dtype='int64'
)
pre_scores = fluid.data(
name='pre_scores', shape=[1], lod_level=2, dtype='float32'
)
probs = fluid.data(name='probs', shape=[10000], dtype='float32')
topk_scores, topk_indices = paddle.topk(probs, k=4)
accu_scores = fluid.layers.elementwise_add(
x=paddle.log(x=topk_scores),
y=paddle.reshape(pre_scores, shape=[-1]),
axis=0,
)
def test_preids_Variable():
# the input pre_ids must be Variable
preids_data = np.random.randint(1, 5, [5, 1]).astype("int64")
fluid.layers.beam_search(
pre_ids=preids_data,
pre_scores=pre_scores,
ids=topk_indices,
scores=accu_scores,
beam_size=4,
end_id=1,
)
self.assertRaises(TypeError, test_preids_Variable)
def test_prescores_Variable():
# the input pre_scores must be Variable
prescores_data = np.random.uniform(1, 5, [5, 1]).astype(
"float32"
)
fluid.layers.beam_search(
pre_ids=pre_ids,
pre_scores=prescores_data,
ids=topk_indices,
scores=accu_scores,
beam_size=4,
end_id=1,
)
self.assertRaises(TypeError, test_prescores_Variable)
def test_ids_Variable():
# the input ids must be Variable or None
ids_data = np.random.randint(1, 5, [5, 1]).astype("int64")
fluid.layers.beam_search(
pre_ids=pre_ids,
pre_scores=pre_scores,
ids=ids_data,
scores=accu_scores,
beam_size=4,
end_id=1,
)
self.assertRaises(TypeError, test_ids_Variable)
def test_scores_Variable():
# the input scores must be Variable
scores_data = np.random.uniform(1, 5, [5, 1]).astype("float32")
fluid.layers.beam_search(
pre_ids=pre_ids,
pre_scores=pre_scores,
ids=topk_indices,
scores=scores_data,
beam_size=4,
end_id=1,
)
self.assertRaises(TypeError, test_scores_Variable)
def test_preids_dtype():
# the dtype of input pre_ids must be int64
preids_type_data = fluid.data(
name='preids_type_data',
shape=[1],
lod_level=2,
dtype='float32',
)
fluid.layers.beam_search(
pre_ids=preids_type_data,
pre_scores=pre_scores,
ids=topk_indices,
scores=accu_scores,
beam_size=4,
end_id=1,
)
self.assertRaises(TypeError, test_preids_dtype)
def test_prescores_dtype():
# the dtype of input pre_scores must be float32
prescores_type_data = fluid.data(
name='prescores_type_data',
shape=[1],
lod_level=2,
dtype='int64',
)
fluid.layers.beam_search(
pre_ids=pre_ids,
pre_scores=prescores_type_data,
ids=topk_indices,
scores=accu_scores,
beam_size=4,
end_id=1,
)
self.assertRaises(TypeError, test_prescores_dtype)
if __name__ == '__main__': if __name__ == '__main__':
unittest.main() unittest.main()
...@@ -141,15 +141,7 @@ class Decoder: ...@@ -141,15 +141,7 @@ class Decoder:
**kwargs **kwargs
): ):
output_layer = kwargs.pop("output_layer", None) output_layer = kwargs.pop("output_layer", None)
if self.decoding_strategy == "train_greedy":
# for teach-forcing MLE pre-training
helper = layers.TrainingHelper(**kwargs)
elif self.decoding_strategy == "infer_sample":
helper = layers.SampleEmbeddingHelper(**kwargs)
elif self.decoding_strategy == "infer_greedy":
helper = layers.GreedyEmbeddingHelper(**kwargs)
if self.decoding_strategy == "beam_search":
beam_size = kwargs.get("beam_size", 4) beam_size = kwargs.get("beam_size", 4)
encoder_output = BeamSearchDecoder.tile_beam_merge_with_batch( encoder_output = BeamSearchDecoder.tile_beam_merge_with_batch(
encoder_output, beam_size encoder_output, beam_size
......
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册