diff --git a/python/paddle/fluid/layers/rnn.py b/python/paddle/fluid/layers/rnn.py index c0ad3e3bea7d71bbd923d274c84a4ef522adeeac..ec20712cac76043c7632aaa060e07153bdde4fe7 100644 --- a/python/paddle/fluid/layers/rnn.py +++ b/python/paddle/fluid/layers/rnn.py @@ -958,7 +958,7 @@ class BeamSearchDecoder(Decoder): x = nn.unsqueeze(x, [1]) # [batch_size, 1, ...] expand_times = [1] * len(x.shape) expand_times[1] = beam_size - x = nn.expand(x, expand_times) # [batch_size, beam_size, ...] + x = paddle.tile(x, expand_times) # [batch_size, beam_size, ...] x = nn.transpose(x, list(range(2, len(x.shape))) + [0, 1]) # [..., batch_size, beam_size] # use 0 to copy to avoid wrong shape @@ -1024,7 +1024,7 @@ class BeamSearchDecoder(Decoder): x = nn.unsqueeze(x, [1]) expand_times = [1] * len(x.shape) expand_times[1] = self.beam_size - x = nn.expand(x, expand_times) + x = paddle.tile(x, expand_times) return x def _mask_probs(self, probs, finished): @@ -1050,7 +1050,7 @@ class BeamSearchDecoder(Decoder): # TODO: use where_op finished = tensor.cast(finished, dtype=probs.dtype) probs = nn.elementwise_mul( - nn.expand(nn.unsqueeze(finished, [2]), [1, 1, self.vocab_size]), + paddle.tile(nn.unsqueeze(finished, [2]), [1, 1, self.vocab_size]), self.noend_mask_tensor, axis=-1) - nn.elementwise_mul( probs, (finished - 1), axis=0) @@ -1080,7 +1080,7 @@ class BeamSearchDecoder(Decoder): batch_size, indices.dtype) if batch_size.dtype != indices.dtype else batch_size batch_size.stop_gradient = True # TODO: remove this - batch_pos = nn.expand( + batch_pos = paddle.tile( nn.unsqueeze( tensor.range( 0, batch_size, 1, dtype=indices.dtype), [1]), @@ -1140,12 +1140,11 @@ class BeamSearchDecoder(Decoder): init_cell_states = map_structure(self._expand_to_beam_size, initial_cell_states) - # TODO: use fill_constant when support variable shape - init_inputs = nn.expand( - nn.unsqueeze( - nn.expand(self.start_token_tensor, [self.batch_size]), [1]), - [1, self.beam_size]) - log_probs = nn.expand( + init_inputs = paddle.full( + shape=[self.batch_size, self.beam_size], + fill_value=self.start_token_tensor, + dtype=self.start_token_tensor.dtype) + log_probs = paddle.tile( tensor.assign( np.array( [[0.] + [-self.kinf] * (self.beam_size - 1)], @@ -1213,7 +1212,7 @@ class BeamSearchDecoder(Decoder): scores = log_probs scores = nn.reshape(scores, [-1, self.beam_size * self.vocab_size]) # TODO: add grad for topk then this beam search can be used to train - topk_scores, topk_indices = nn.topk(input=scores, k=self.beam_size) + topk_scores, topk_indices = paddle.topk(x=scores, k=self.beam_size) beam_indices = nn.elementwise_floordiv(topk_indices, self.vocab_size_tensor) token_indices = nn.elementwise_mod(topk_indices, self.vocab_size_tensor)