diff --git a/PaddleNLP/paddlenlp/seq2vec/encoder.py b/PaddleNLP/paddlenlp/seq2vec/encoder.py index 728d4e97760fdf8bb189f8a8fb1160f7e79ac955..3b8dc67e331932d31c27bfdb67b67d3adf416b7d 100644 --- a/PaddleNLP/paddlenlp/seq2vec/encoder.py +++ b/PaddleNLP/paddlenlp/seq2vec/encoder.py @@ -129,13 +129,13 @@ class CNNEncoder(nn.Layer): self._activation = conv_layer_activation self._output_dim = output_dim - self.convs = [ + self.convs = paddle.nn.LayerList([ nn.Conv2D( in_channels=1, out_channels=self._num_filter, kernel_size=(i, self._emb_dim), **kwargs) for i in self._ngram_filter_sizes - ] + ]) maxpool_output_dim = self._num_filter * len(self._ngram_filter_sizes) if self._output_dim: diff --git a/PaddleNLP/paddlenlp/transformers/roberta/modeling.py b/PaddleNLP/paddlenlp/transformers/roberta/modeling.py index fc797cfee5ff2ddca44257b503906c769cdf9791..d14296568887ab50cb5ad9e81b1334554491fa88 100644 --- a/PaddleNLP/paddlenlp/transformers/roberta/modeling.py +++ b/PaddleNLP/paddlenlp/transformers/roberta/modeling.py @@ -50,8 +50,10 @@ class RobertaEmbeddings(nn.Layer): def forward(self, input_ids, token_type_ids=None, position_ids=None): if position_ids is None: # maybe need use shape op to unify static graph and dynamic graph - seq_length = input_ids.shape[1] - position_ids = paddle.arange(0, seq_length, dtype="int64") + ones = paddle.ones_like(input_ids, dtype="int64") + seq_length = paddle.cumsum(ones, axis=1) + position_ids = seq_length - ones + position_ids.stop_gradient = True if token_type_ids is None: token_type_ids = paddle.zeros_like(input_ids, dtype="int64")