diff --git a/PaddleNLP/Research/Dialogue-PLATO/plato/models/unified_transformer.py b/PaddleNLP/Research/Dialogue-PLATO/plato/models/unified_transformer.py
index a95eb9dfbf1975f75fe81abab104bdb37147a01c..cff48e33417f6fc7b8e01afaa4cf5b62d4c1cce0 100644
--- a/PaddleNLP/Research/Dialogue-PLATO/plato/models/unified_transformer.py
+++ b/PaddleNLP/Research/Dialogue-PLATO/plato/models/unified_transformer.py
@@ -267,11 +267,12 @@ class UnifiedTransformer(ModelBase):
         Create attention mask.
 
         @param : input_mask
-        @type : Variable(shape: [batch_size, max_seq_len, 1])
+        @type : Variable(shape: [batch_size, max_seq_len])
 
         @param : auto_regressive
         @type : bool
         """
+        input_mask = fluid.layers.unsqueeze(input=input_mask, axes=[2])
         seq_len = input_mask.shape[1]
 
         input_mask = layers.cast(input_mask, self._dtype)
diff --git a/PaddleNLP/Research/Dialogue-PLATO/plato/modules/embedder.py b/PaddleNLP/Research/Dialogue-PLATO/plato/modules/embedder.py
index d67c4a2977826dd478c19b801163ebf0d04e40bd..bfebcc875473de4d73f85c35bd5d9ce6c4b4502b 100644
--- a/PaddleNLP/Research/Dialogue-PLATO/plato/modules/embedder.py
+++ b/PaddleNLP/Research/Dialogue-PLATO/plato/modules/embedder.py
@@ -67,10 +67,10 @@ def main():
     place = fluid.CPUPlace()
     with fluid.dygraph.guard(place):
         model = Embedder("Embedder", 10, 20, 20, 20, 20)
-        token_inp = fluid.dygraph.to_variable(np.random.randint(0, 19, [10, 10, 1]).astype("int64"))
-        pos_inp = fluid.dygraph.to_variable(np.random.randint(0, 19, [10, 10, 1]).astype("int64"))
-        type_inp = fluid.dygraph.to_variable(np.random.randint(0, 19, [10, 10, 1]).astype("int64"))
-        turn_inp = fluid.dygraph.to_variable(np.random.randint(0, 19, [10, 10, 1]).astype("int64"))
+        token_inp = fluid.dygraph.to_variable(np.random.randint(0, 19, [10, 10]).astype("int64"))
+        pos_inp = fluid.dygraph.to_variable(np.random.randint(0, 19, [10, 10]).astype("int64"))
+        type_inp = fluid.dygraph.to_variable(np.random.randint(0, 19, [10, 10]).astype("int64"))
+        turn_inp = fluid.dygraph.to_variable(np.random.randint(0, 19, [10, 10]).astype("int64"))
         out = model(token_inp, pos_inp, type_inp, turn_inp)
         print(out)
 
diff --git a/PaddleNLP/Research/Dialogue-PLATO/run.py b/PaddleNLP/Research/Dialogue-PLATO/run.py
index b0daeb8bc94bf491f3b5dd7d1b8e5c863e99119f..a45410d8b2e3b014a0ccf5c6b0224112b353e88f 100644
--- a/PaddleNLP/Research/Dialogue-PLATO/run.py
+++ b/PaddleNLP/Research/Dialogue-PLATO/run.py
@@ -99,7 +99,6 @@ def main():
         test_loader = DataLoader(test_dataset, hparams.Trainer, collate_fn=collate_fn, is_test=hparams.do_infer)
 
     def to_tensor(array):
-        array = np.expand_dims(array, -1)
         return fluid.dygraph.to_variable(array)
 
     if hparams.use_data_distributed:
diff --git a/PaddleSpeech/DeepVoice3/deepvoice3_paddle/data.py b/PaddleSpeech/DeepVoice3/deepvoice3_paddle/data.py
index 9bdda194a5720435d075169f3bdf0b78e7279b7a..bb9d62a31bfe44297fc53644d2d5cc4ea77ebc09 100644
--- a/PaddleSpeech/DeepVoice3/deepvoice3_paddle/data.py
+++ b/PaddleSpeech/DeepVoice3/deepvoice3_paddle/data.py
@@ -273,7 +273,6 @@ def create_batch(batch):
 
     x_batch = np.array(
         [_pad(x[0], max_input_len) for x in batch], dtype=np.int64)
-    x_batch = np.expand_dims(x_batch, axis=-1)
 
     mel_batch = np.array(
         [_pad_2d(
@@ -318,7 +317,7 @@ def create_batch(batch):
     done = np.expand_dims(np.expand_dims(done, axis=1), axis=1)
 
     if multi_speaker:
-        speaker_ids = np.expand_dims(np.array([x[3] for x in batch]), axis=-1)
+        speaker_ids = np.array([x[3] for x in batch])
         return (x_batch, input_lengths, mel_batch, y_batch, text_positions,
                 frame_positions, done, target_lengths, speaker_ids)
     else:
diff --git a/PaddleSpeech/DeepVoice3/deepvoice3_paddle/deepvoice3.py b/PaddleSpeech/DeepVoice3/deepvoice3_paddle/deepvoice3.py
index 83b5b2ef8122457fa130301b85ff5433e514e756..4dda07c1fe53df7a040b435e1ac0c7840ad5b840 100644
--- a/PaddleSpeech/DeepVoice3/deepvoice3_paddle/deepvoice3.py
+++ b/PaddleSpeech/DeepVoice3/deepvoice3_paddle/deepvoice3.py
@@ -206,7 +206,7 @@ class Encoder(dg.Layer):
         Encode text sequence.
         
         Args:
-            x (Variable): Shape(B, T_enc, 1), dtype: int64. Ihe input text
+            x (Variable): Shape(B, T_enc), dtype: int64. Ihe input text
                 indices. T_enc means the timesteps of decoder input x.
             speaker_embed (Variable, optional): Shape(Batch_size, speaker_dim),
                 dtype: float32. Speaker embeddings. This arg is not None only
@@ -1228,7 +1228,7 @@ class DeepVoiceTTS(dg.Layer):
                 valid lengths for each example in text_sequences.
             mel_inputs (Variable): Shape(B, C_mel, 1, T_mel), ground truth
                 mel-spectrogram, which is used as decoder inputs when training. 
-            speaker_indices (Variable, optional): Shape(Batch_size, 1),
+            speaker_indices (Variable, optional): Shape(Batch_size),
                 dtype: int64. Speaker index for each example. This arg is not
                 None only when the model is a multispeaker model.
             text_positions (Variable): Shape(B, T_enc, 1), dtype: int64.
diff --git a/dygraph/ocr_recognition/train.py b/dygraph/ocr_recognition/train.py
index a8d32849c30b3325b7c66bbf3a7bac670bce9eaf..a27d2fb75d286c37346fe5cec2aa8c50d2f2c55c 100644
--- a/dygraph/ocr_recognition/train.py
+++ b/dygraph/ocr_recognition/train.py
@@ -433,7 +433,7 @@ class OCRAttention(fluid.dygraph.Layer):
 
         decoder_boot = self.fc(backward_first)
 
-        label_in = fluid.layers.reshape(label_in, [-1, 1], inplace=False)
+        label_in = fluid.layers.reshape(label_in, [-1], inplace=False)
         trg_embedding = self.embedding(label_in)
 
         trg_embedding = fluid.layers.reshape(
diff --git a/dygraph/ptb_lm/ptb_dy.py b/dygraph/ptb_lm/ptb_dy.py
index 1f1d40b40c6fd14c3998d6e1b39ac745e8c1fe70..fac2b267c7f81c7caffcdf382fbfc85ae23c5171 100644
--- a/dygraph/ptb_lm/ptb_dy.py
+++ b/dygraph/ptb_lm/ptb_dy.py
@@ -360,7 +360,7 @@ def train_ptb_lm():
             train_data_iter = reader.get_data_iter(data, batch_size, num_steps)
             for batch_id, batch in enumerate(train_data_iter):
                 x_data, y_data = batch
-                x_data = x_data.reshape((-1, num_steps, 1))
+                x_data = x_data.reshape((-1, num_steps))
                 y_data = y_data.reshape((-1, 1))
                 x = to_variable(x_data)
                 y = to_variable(y_data)
@@ -399,7 +399,7 @@ def train_ptb_lm():
             start_time = time.time()
             for batch_id, batch in enumerate(train_data_iter):
                 x_data, y_data = batch
-                x_data = x_data.reshape((-1, num_steps, 1))
+                x_data = x_data.reshape((-1, num_steps))
                 y_data = y_data.reshape((-1, 1))
                 x = to_variable(x_data)
                 y = to_variable(y_data)
diff --git a/dygraph/sentiment/main.py b/dygraph/sentiment/main.py
index e141d2cf8f2d50efae17ede78ecc990aa76095ec..a9d327cb81a82b055a3287865bf6c1428f8cfd9e 100755
--- a/dygraph/sentiment/main.py
+++ b/dygraph/sentiment/main.py
@@ -162,7 +162,7 @@ def train():
                                    'constant',
                                    constant_values=(args.vocab_size))
                             for x in data
-                        ]).astype('int64').reshape(-1, 1))
+                        ]).astype('int64').reshape(-1))
                     label = to_variable(
                         np.array([x[1] for x in data]).astype('int64').reshape(
                             args.batch_size, 1))
@@ -206,7 +206,7 @@ def train():
                             eval_label = to_variable(
                                 np.array([x[1] for x in eval_data]).astype(
                                     'int64').reshape(args.batch_size, 1))
-                            eval_doc = to_variable(eval_np_doc.reshape(-1, 1))
+                            eval_doc = to_variable(eval_np_doc.reshape(-1))
                             eval_avg_cost, eval_prediction, eval_acc = model(
                                 eval_doc, eval_label)
                             eval_np_mask = (
diff --git a/dygraph/sentiment/nets.py b/dygraph/sentiment/nets.py
index 6dd607855bb2b6d6f1293e4f3d31621fbdbee53d..8e732376575336aef83af5d8a56dbba1569d16cb 100755
--- a/dygraph/sentiment/nets.py
+++ b/dygraph/sentiment/nets.py
@@ -114,7 +114,7 @@ class CNN(fluid.dygraph.Layer):
 
     def forward(self, inputs, label=None):
         emb = self.embedding(inputs)
-        o_np_mask = (inputs.numpy() != self.dict_dim).astype('float32')
+        o_np_mask = (np.expand_dims(inputs.numpy(), -1) != self.dict_dim).astype('float32')
         mask_emb = fluid.layers.expand(
             to_variable(o_np_mask), [1, self.hid_dim])
         emb = emb * mask_emb
@@ -155,7 +155,7 @@ class BOW(fluid.dygraph.Layer):
 
     def forward(self, inputs, label=None):
         emb = self.embedding(inputs)
-        o_np_mask = (inputs.numpy() != self.dict_dim).astype('float32')
+        o_np_mask = (np.expand_dims(inputs.numpy(), -1) != self.dict_dim).astype('float32')
         mask_emb = fluid.layers.expand(
             to_variable(o_np_mask), [1, self.hid_dim])
         emb = emb * mask_emb
@@ -205,7 +205,7 @@ class GRU(fluid.dygraph.Layer):
     def forward(self, inputs, label=None):
         emb = self.embedding(inputs)
         o_np_mask = to_variable(
-            inputs.numpy() != self.dict_dim).astype('float32')
+            np.expand_dims(inputs.numpy(), -1) != self.dict_dim).astype('float32')
         mask_emb = fluid.layers.expand(
             to_variable(o_np_mask), [1, self.hid_dim])
         emb = emb * mask_emb
@@ -258,7 +258,7 @@ class BiGRU(fluid.dygraph.Layer):
     def forward(self, inputs, label=None):
         emb = self.embedding(inputs)
         o_np_mask = to_variable(
-            inputs.numpy() != self.dict_dim).astype('float32')
+            np.expand_dims(inputs.numpy(), -1) != self.dict_dim).astype('float32')
         mask_emb = fluid.layers.expand(
             to_variable(o_np_mask), [1, self.hid_dim])
         emb = emb * mask_emb