diff --git a/paddlepalm/_downloader.py b/paddlepalm/_downloader.py
index 52521e812ce279139ba72b87827cbdfdf6881965..1b8de4b629a491148e43b71f96cb70c0542d15d4 100644
--- a/paddlepalm/_downloader.py
+++ b/paddlepalm/_downloader.py
@@ -33,6 +33,7 @@ ssl._create_default_https_context = ssl._create_unverified_context
 _items = {
     'pretrain': {'ernie-en-uncased-large': 'https://ernie.bj.bcebos.com/ERNIE_Large_en_stable-2.0.0.tar.gz',
                  'bert-en-uncased-large': 'https://bert-models.bj.bcebos.com/uncased_L-24_H-1024_A-16.tar.gz',
+                 'bert-en-uncased-base': 'https://bert-models.bj.bcebos.com/uncased_L-12_H-768_A-12.tar.gz',
                  'utils': None},
     'reader': {'utils': None},
     'backbone': {'utils': None},
@@ -90,7 +91,7 @@ def _download(item, scope, path, silent=False):
             tar.extractall(path = data_dir)
             tar.close()
             os.remove(filename)
-        if scope == 'bert-en-uncased-large':
+        if scope.startswith('bert'):
             source_path = data_dir + '/' + data_name.split('.')[0]
             fileList = os.listdir(source_path)
             for file in fileList:
diff --git a/paddlepalm/backbone/bert.py b/paddlepalm/backbone/bert.py
index 74f772c44fab376bd411011f8ca82b59cef4f0df..d3592a5526447694e8a14d01dee2b9987740b2ed 100644
--- a/paddlepalm/backbone/bert.py
+++ b/paddlepalm/backbone/bert.py
@@ -52,9 +52,9 @@ class Model(backbone):
 
     @property
     def inputs_attr(self):
-        return {"token_ids": [[-1, -1, 1], 'int64'],
-                "position_ids": [[-1, -1, 1], 'int64'],
-                "segment_ids": [[-1, -1, 1], 'int64'],
+        return {"token_ids": [[-1, -1], 'int64'],
+                "position_ids": [[-1, -1], 'int64'],
+                "segment_ids": [[-1, -1], 'int64'],
                 "input_mask": [[-1, -1, 1], 'float32']}
 
     @property
@@ -73,7 +73,7 @@ class Model(backbone):
 
         self._emb_dtype = 'float32'
         # padding id in vocabulary must be set to 0
-        emb_out = fluid.layers.embedding(
+        emb_out = fluid.embedding(
             input=src_ids,
             size=[self._voc_size, self._emb_size],
             dtype=self._emb_dtype,
@@ -84,14 +84,14 @@ class Model(backbone):
         # fluid.global_scope().find_var('backbone-word_embedding').get_tensor()
         embedding_table = fluid.default_main_program().global_block().var(scope_name+self._word_emb_name)
         
-        position_emb_out = fluid.layers.embedding(
+        position_emb_out = fluid.embedding(
             input=pos_ids,
             size=[self._max_position_seq_len, self._emb_size],
             dtype=self._emb_dtype,
             param_attr=fluid.ParamAttr(
                 name=scope_name+self._pos_emb_name, initializer=self._param_initializer))
 
-        sent_emb_out = fluid.layers.embedding(
+        sent_emb_out = fluid.embedding(
             sent_ids,
             size=[self._sent_types, self._emb_size],
             dtype=self._emb_dtype,
diff --git a/paddlepalm/backbone/ernie.py b/paddlepalm/backbone/ernie.py
index 1e471537cf9485f533dbc4f048662f0a5bc30c60..ded196385112513d001c6db4505cdc3883592984 100644
--- a/paddlepalm/backbone/ernie.py
+++ b/paddlepalm/backbone/ernie.py
@@ -62,11 +62,11 @@ class Model(backbone):
 
     @property
     def inputs_attr(self):
-        return {"token_ids": [[-1, -1, 1], 'int64'],
-                "position_ids": [[-1, -1, 1], 'int64'],
-                "segment_ids": [[-1, -1, 1], 'int64'],
+        return {"token_ids": [[-1, -1], 'int64'],
+                "position_ids": [[-1, -1], 'int64'],
+                "segment_ids": [[-1, -1], 'int64'],
                 "input_mask": [[-1, -1, 1], 'float32'],
-                "task_ids": [[-1,-1, 1], 'int64']}
+                "task_ids": [[-1,-1], 'int64']}
 
     @property
     def outputs_attr(self):
@@ -85,7 +85,7 @@ class Model(backbone):
         task_ids = inputs['task_ids']
 
         # padding id in vocabulary must be set to 0
-        emb_out = fluid.layers.embedding(
+        emb_out = fluid.embedding(
             input=src_ids,
             size=[self._voc_size, self._emb_size],
             dtype=self._emb_dtype,
@@ -96,14 +96,14 @@ class Model(backbone):
         # fluid.global_scope().find_var('backbone-word_embedding').get_tensor()
         embedding_table = fluid.default_main_program().global_block().var(scope_name+self._word_emb_name)
         
-        position_emb_out = fluid.layers.embedding(
+        position_emb_out = fluid.embedding(
             input=pos_ids,
             size=[self._max_position_seq_len, self._emb_size],
             dtype=self._emb_dtype,
             param_attr=fluid.ParamAttr(
                 name=scope_name+self._pos_emb_name, initializer=self._param_initializer))
 
-        sent_emb_out = fluid.layers.embedding(
+        sent_emb_out = fluid.embedding(
             sent_ids,
             size=[self._sent_types, self._emb_size],
             dtype=self._emb_dtype,
@@ -113,7 +113,7 @@ class Model(backbone):
         emb_out = emb_out + position_emb_out
         emb_out = emb_out + sent_emb_out
 
-        task_emb_out = fluid.layers.embedding(
+        task_emb_out = fluid.embedding(
             task_ids,
             size=[self._task_types, self._emb_size],
             dtype=self._emb_dtype,
diff --git a/paddlepalm/mtl_controller.py b/paddlepalm/mtl_controller.py
index e2657587a2091397af85eda08fc65da11b2d0e38..286fd2759a6f56fba9e480daab21367682cfcabf 100755
--- a/paddlepalm/mtl_controller.py
+++ b/paddlepalm/mtl_controller.py
@@ -454,7 +454,7 @@ class Controller(object):
 
         # compute loss
         task_id_var = net_inputs['__task_id']
-        task_id_vec = layers.one_hot(task_id_var, num_instances)
+        task_id_vec = fluid.one_hot(task_id_var, num_instances)
         losses = fluid.layers.concat([task_output_vars[inst.name+'/loss'] for inst in instances], axis=0)
         loss = layers.reduce_sum(task_id_vec * losses)
 
diff --git a/paddlepalm/reader/cls.py b/paddlepalm/reader/cls.py
index 1ecf6cbf7ffd5c6aea62297a292ca2e014232053..dd5e7f3b88b980410de13a23d2b10717b88ea6b3 100644
--- a/paddlepalm/reader/cls.py
+++ b/paddlepalm/reader/cls.py
@@ -62,18 +62,18 @@ class Reader(reader):
     @property
     def outputs_attr(self):
         if self._is_training:
-            return {"token_ids": [[-1, -1, 1], 'int64'],
-                    "position_ids": [[-1, -1, 1], 'int64'],
-                    "segment_ids": [[-1, -1, 1], 'int64'],
+            return {"token_ids": [[-1, -1], 'int64'],
+                    "position_ids": [[-1, -1], 'int64'],
+                    "segment_ids": [[-1, -1], 'int64'],
                     "input_mask": [[-1, -1, 1], 'float32'],
-                    "label_ids": [[-1,1], 'int64'],
-                    "task_ids": [[-1, -1, 1], 'int64']
+                    "label_ids": [[-1], 'int64'],
+                    "task_ids": [[-1, -1], 'int64']
                     }
         else:
-            return {"token_ids": [[-1, -1, 1], 'int64'],
-                    "position_ids": [[-1, -1, 1], 'int64'],
-                    "segment_ids": [[-1, -1, 1], 'int64'],
-                    "task_ids": [[-1, -1, 1], 'int64'],
+            return {"token_ids": [[-1, -1], 'int64'],
+                    "position_ids": [[-1, -1], 'int64'],
+                    "segment_ids": [[-1, -1], 'int64'],
+                    "task_ids": [[-1, -1], 'int64'],
                     "input_mask": [[-1, -1, 1], 'float32']
                     }
 
diff --git a/paddlepalm/reader/match.py b/paddlepalm/reader/match.py
index 965b8309d57a80ac76655892f24bbd2f961c9e87..d6be0f82a4bac255341f3c58a14430c078314bf1 100644
--- a/paddlepalm/reader/match.py
+++ b/paddlepalm/reader/match.py
@@ -60,18 +60,18 @@ class Reader(reader):
     @property
     def outputs_attr(self):
         if self._is_training:
-            return {"token_ids": [[-1, -1, 1], 'int64'],
-                    "position_ids": [[-1, -1, 1], 'int64'],
-                    "segment_ids": [[-1, -1, 1], 'int64'],
+            return {"token_ids": [[-1, -1], 'int64'],
+                    "position_ids": [[-1, -1], 'int64'],
+                    "segment_ids": [[-1, -1], 'int64'],
                     "input_mask": [[-1, -1, 1], 'float32'],
-                    "label_ids": [[-1,1], 'int64'],
-                    "task_ids": [[-1, -1, 1], 'int64']
+                    "label_ids": [[-1], 'int64'],
+                    "task_ids": [[-1, -1], 'int64']
                     }
         else:
-            return {"token_ids": [[-1, -1, 1], 'int64'],
-                    "position_ids": [[-1, -1, 1], 'int64'],
-                    "segment_ids": [[-1, -1, 1], 'int64'],
-                    "task_ids": [[-1, -1, 1], 'int64'],
+            return {"token_ids": [[-1, -1], 'int64'],
+                    "position_ids": [[-1, -1], 'int64'],
+                    "segment_ids": [[-1, -1], 'int64'],
+                    "task_ids": [[-1, -1], 'int64'],
                     "input_mask": [[-1, -1, 1], 'float32']
                     }
 
diff --git a/paddlepalm/reader/mlm.py b/paddlepalm/reader/mlm.py
index eb09c5e438c1348fc70d6f45d8a3af5e07201eea..e4dff3477f3ffd56864bcbaed439f7bd03716377 100644
--- a/paddlepalm/reader/mlm.py
+++ b/paddlepalm/reader/mlm.py
@@ -60,13 +60,13 @@ class Reader(reader):
 
     @property
     def outputs_attr(self):
-        return {"token_ids": [[-1, -1, 1], 'int64'],
-                "position_ids": [[-1, -1, 1], 'int64'],
-                "segment_ids": [[-1, -1, 1], 'int64'],
+        return {"token_ids": [[-1, -1], 'int64'],
+                "position_ids": [[-1, -1], 'int64'],
+                "segment_ids": [[-1, -1], 'int64'],
                 "input_mask": [[-1, -1, 1], 'float32'],
-                "task_ids": [[-1, -1, 1], 'int64'],
-                "mask_label": [[-1, 1], 'int64'],
-                "mask_pos": [[-1, 1], 'int64'],
+                "task_ids": [[-1, -1], 'int64'],
+                "mask_label": [[-1], 'int64'],
+                "mask_pos": [[-1], 'int64'],
                 }
 
 
diff --git a/paddlepalm/reader/mrc.py b/paddlepalm/reader/mrc.py
index 6cac89adca1b8244d271cab4605b6a834a7faa37..2906b97ecb591fd6cc65f3a246c6d88e87dfccb8 100644
--- a/paddlepalm/reader/mrc.py
+++ b/paddlepalm/reader/mrc.py
@@ -68,21 +68,21 @@ class Reader(reader):
     @property
     def outputs_attr(self):
         if self._is_training:
-            return {"token_ids": [[-1, -1, 1], 'int64'],
-                    "position_ids": [[-1, -1, 1], 'int64'],
-                    "segment_ids": [[-1, -1, 1], 'int64'],
+            return {"token_ids": [[-1, -1], 'int64'],
+                    "position_ids": [[-1, -1], 'int64'],
+                    "segment_ids": [[-1, -1], 'int64'],
                     "input_mask": [[-1, -1, 1], 'float32'],
-                    "start_positions": [[-1, 1], 'int64'],
-                    "end_positions": [[-1, 1], 'int64'],
-                    "task_ids": [[-1, -1, 1], 'int64']
+                    "start_positions": [[-1], 'int64'],
+                    "end_positions": [[-1], 'int64'],
+                    "task_ids": [[-1, -1], 'int64']
                     }
         else:
-            return {"token_ids": [[-1, -1, 1], 'int64'],
-                    "position_ids": [[-1, -1, 1], 'int64'],
-                    "segment_ids": [[-1, -1, 1], 'int64'],
-                    "task_ids": [[-1, -1, 1], 'int64'],
+            return {"token_ids": [[-1, -1], 'int64'],
+                    "position_ids": [[-1, -1], 'int64'],
+                    "segment_ids": [[-1, -1], 'int64'],
+                    "task_ids": [[-1, -1], 'int64'],
                     "input_mask": [[-1, -1, 1], 'float32'],
-                    "unique_ids": [[-1, 1], 'int64']
+                    "unique_ids": [[-1], 'int64']
                     }
 
     @property
diff --git a/paddlepalm/reader/utils/batching4bert.py b/paddlepalm/reader/utils/batching4bert.py
index daeb25ae9e0fd2dfd4abe021453a71ccd790d562..96998b21e0fe97c18776fef827d290e4c2a89525 100644
--- a/paddlepalm/reader/utils/batching4bert.py
+++ b/paddlepalm/reader/utils/batching4bert.py
@@ -67,8 +67,8 @@ def mask(batch_tokens, total_token_num, vocab_size, CLS=1, SEP=2, MASK=3):
                 sent[token_index] = MASK
                 mask_flag = True
                 mask_pos.append(sent_index * max_len + token_index)
-    mask_label = np.array(mask_label).astype("int64").reshape([-1, 1])
-    mask_pos = np.array(mask_pos).astype("int64").reshape([-1, 1])
+    mask_label = np.array(mask_label).astype("int64").reshape([-1])
+    mask_pos = np.array(mask_pos).astype("int64").reshape([-1])
     return batch_tokens, mask_label, mask_pos
 
 
@@ -96,7 +96,7 @@ def prepare_batch_data(insts,
     # or unique id
     for i in range(3, len(insts[0]), 1):
         labels = [inst[i] for inst in insts]
-        labels = np.array(labels).astype("int64").reshape([-1, 1])
+        labels = np.array(labels).astype("int64").reshape([-1])
         labels_list.append(labels)
     # First step: do mask without padding
     if mask_id >= 0:
@@ -154,14 +154,14 @@ def pad_batch_data(insts,
     inst_data = np.array([
         list(inst) + list([pad_idx] * (max_len - len(inst))) for inst in insts
     ])
-    return_list += [inst_data.astype("int64").reshape([-1, max_len, 1])]
+    return_list += [inst_data.astype("int64").reshape([-1, max_len])]
     # position data
     if return_pos:
         inst_pos = np.array([
             list(range(0, len(inst))) + [pad_idx] * (max_len - len(inst))
             for inst in insts
         ])
-        return_list += [inst_pos.astype("int64").reshape([-1, max_len, 1])]
+        return_list += [inst_pos.astype("int64").reshape([-1, max_len])]
     if return_input_mask:
         # This is used to avoid attention on paddings.
         input_mask_data = np.array([[1] * len(inst) + [0] *
diff --git a/paddlepalm/reader/utils/batching4ernie.py b/paddlepalm/reader/utils/batching4ernie.py
index d3d13573c38af3d7d6e7027cbff06969b449b722..7a7f86890c5d01b0c36ec0f3aeefec1b3135128a 100644
--- a/paddlepalm/reader/utils/batching4ernie.py
+++ b/paddlepalm/reader/utils/batching4ernie.py
@@ -113,8 +113,8 @@ def mask(batch_tokens,
 
         pre_sent_len = len(sent)
 
-    mask_label = np.array(mask_label).astype("int64").reshape([-1, 1])
-    mask_pos = np.array(mask_pos).astype("int64").reshape([-1, 1])
+    mask_label = np.array(mask_label).astype("int64").reshape([-1])
+    mask_pos = np.array(mask_pos).astype("int64").reshape([-1])
     return batch_tokens, mask_label, mask_pos
 
 
@@ -136,7 +136,7 @@ def pad_batch_data(insts,
 
     inst_data = np.array(
         [inst + list([pad_idx] * (max_len - len(inst))) for inst in insts])
-    return_list += [inst_data.astype("int64").reshape([-1, max_len, 1])]
+    return_list += [inst_data.astype("int64").reshape([-1, max_len])]
 
     # position data
     if return_pos:
@@ -145,7 +145,7 @@ def pad_batch_data(insts,
             for inst in insts
         ])
 
-        return_list += [inst_pos.astype("int64").reshape([-1, max_len, 1])]
+        return_list += [inst_pos.astype("int64").reshape([-1, max_len])]
 
     if return_input_mask:
         # This is used to avoid attention on paddings.
@@ -165,7 +165,7 @@ def pad_batch_data(insts,
 
     if return_seq_lens:
         seq_lens = np.array([len(inst) for inst in insts])
-        return_list += [seq_lens.astype("int64").reshape([-1, 1])]
+        return_list += [seq_lens.astype("int64").reshape([-1])]
 
     return return_list if len(return_list) > 1 else return_list[0]
 
diff --git a/paddlepalm/reader/utils/mlm_batching.py b/paddlepalm/reader/utils/mlm_batching.py
index 991d02d3b50c9b3c10b0cebd3d12f6762cb91f01..8d6061d42a4ea096d17e4aeb6d3df394e852deb6 100644
--- a/paddlepalm/reader/utils/mlm_batching.py
+++ b/paddlepalm/reader/utils/mlm_batching.py
@@ -67,8 +67,8 @@ def mask(batch_tokens, total_token_num, vocab_size, CLS=1, SEP=2, MASK=3):
                 sent[token_index] = MASK
                 mask_flag = True
                 mask_pos.append(sent_index * max_len + token_index)
-    mask_label = np.array(mask_label).astype("int64").reshape([-1, 1])
-    mask_pos = np.array(mask_pos).astype("int64").reshape([-1, 1])
+    mask_label = np.array(mask_label).astype("int64").reshape([-1])
+    mask_pos = np.array(mask_pos).astype("int64").reshape([-1])
     return batch_tokens, mask_label, mask_pos
 
 
@@ -147,14 +147,14 @@ def pad_batch_data(insts,
     inst_data = np.array([
         list(inst) + list([pad_idx] * (max_len - len(inst))) for inst in insts
     ])
-    return_list += [inst_data.astype("int64").reshape([-1, max_len, 1])]
+    return_list += [inst_data.astype("int64").reshape([-1, max_len])]
     # position data
     if return_pos:
         inst_pos = np.array([
             list(range(0, len(inst))) + [pad_idx] * (max_len - len(inst))
             for inst in insts
         ])
-        return_list += [inst_pos.astype("int64").reshape([-1, max_len, 1])]
+        return_list += [inst_pos.astype("int64").reshape([-1, max_len])]
     if return_input_mask:
         # This is used to avoid attention on paddings.
         input_mask_data = np.array([[1] * len(inst) + [0] *
diff --git a/paddlepalm/reader/utils/reader4ernie.py b/paddlepalm/reader/utils/reader4ernie.py
index 37b6396dd80a6e158bf06e295894a2094dfd16f6..a57a747efaf1df70372d402e8777dbff791f224a 100644
--- a/paddlepalm/reader/utils/reader4ernie.py
+++ b/paddlepalm/reader/utils/reader4ernie.py
@@ -479,17 +479,17 @@ class ClassifyReader(BaseReader):
             batch_labels = [record.label_id for record in batch_records]
             if self.is_classify:
                 batch_labels = np.array(batch_labels).astype("int64").reshape(
-                    [-1, 1])
+                    [-1])
             elif self.is_regression:
                 batch_labels = np.array(batch_labels).astype("float32").reshape(
-                    [-1, 1])
+                    [-1])
 
             if batch_records[0].qid:
                 batch_qids = [record.qid for record in batch_records]
                 batch_qids = np.array(batch_qids).astype("int64").reshape(
-                    [-1, 1])
+                    [-1])
             else:
-                batch_qids = np.array([]).astype("int64").reshape([-1, 1])
+                batch_qids = np.array([]).astype("int64").reshape([-1])
 
         # padding
         padded_token_ids, input_mask = pad_batch_data(
@@ -908,19 +908,19 @@ class MRCReader(BaseReader):
                 record.end_position for record in batch_records
             ]
             batch_start_position = np.array(batch_start_position).astype(
-                "int64").reshape([-1, 1])
+                "int64").reshape([-1])
             batch_end_position = np.array(batch_end_position).astype(
-                "int64").reshape([-1, 1])
+                "int64").reshape([-1])
 
         else:
             batch_size = len(batch_token_ids)
             batch_start_position = np.zeros(
-                shape=[batch_size, 1], dtype="int64")
-            batch_end_position = np.zeros(shape=[batch_size, 1], dtype="int64")
+                shape=[batch_size], dtype="int64")
+            batch_end_position = np.zeros(shape=[batch_size], dtype="int64")
 
         batch_unique_ids = [record.unique_id for record in batch_records]
         batch_unique_ids = np.array(batch_unique_ids).astype("int64").reshape(
-            [-1, 1])
+            [-1])
 
         # padding
         padded_token_ids, input_mask = pad_batch_data(
diff --git a/paddlepalm/task_paradigm/cls.py b/paddlepalm/task_paradigm/cls.py
index 6cbacf79dd12622c4d952c29040c0c42768e2d11..2893dc33ce833f597d1f04311f8728d15112e606 100644
--- a/paddlepalm/task_paradigm/cls.py
+++ b/paddlepalm/task_paradigm/cls.py
@@ -43,7 +43,7 @@ class TaskParadigm(task_paradigm):
     @property
     def inputs_attrs(self):
         if self._is_training:
-            reader = {"label_ids": [[-1, 1], 'int64']}
+            reader = {"label_ids": [[-1], 'int64']}
         else:
             reader = {}
         bb = {"sentence_embedding": [[-1, self._hidden_size], 'float32']}
@@ -75,8 +75,9 @@ class TaskParadigm(task_paradigm):
                 name=scope_name+"cls_out_b", initializer=fluid.initializer.Constant(0.)))
 
         if self._is_training:
-            loss = fluid.layers.softmax_with_cross_entropy(
-                logits=logits, label=label_ids)
+            inputs = fluid.layers.softmax(logits)
+            loss = fluid.layers.cross_entropy(
+                input=inputs, label=label_ids)
             loss = layers.mean(loss)
             return {"loss": loss}
         else:
diff --git a/paddlepalm/task_paradigm/match.py b/paddlepalm/task_paradigm/match.py
index ee0d175b01e09ede242aa7fe404366dc48804580..07e1e8e4a3111bfe24dc766d9e12dfd9cdc5f52a 100644
--- a/paddlepalm/task_paradigm/match.py
+++ b/paddlepalm/task_paradigm/match.py
@@ -44,7 +44,7 @@ class TaskParadigm(task_paradigm):
     @property
     def inputs_attrs(self):
         if self._is_training:
-            reader = {"label_ids": [[-1, 1], 'int64']}
+            reader = {"label_ids": [[-1], 'int64']}
         else:
             reader = {}
         bb = {"sentence_pair_embedding": [[-1, self._hidden_size], 'float32']}
@@ -79,8 +79,9 @@ class TaskParadigm(task_paradigm):
                 initializer=fluid.initializer.Constant(0.)))
 
         if self._is_training:
-            ce_loss, probs = fluid.layers.softmax_with_cross_entropy(
-                logits=logits, label=labels, return_softmax=True)
+            inputs = fluid.layers.softmax(logits)
+            ce_loss = fluid.layers.cross_entropy(
+                input=inputs, label=labels)
             loss = fluid.layers.mean(x=ce_loss)
             return {'loss': loss}
         else:
diff --git a/paddlepalm/task_paradigm/mlm.py b/paddlepalm/task_paradigm/mlm.py
index ec86dd151e8b0f86c345120f4a5907f0afb91d5c..5b99ac7dbd3a5591ce871533f970cc888abb754c 100644
--- a/paddlepalm/task_paradigm/mlm.py
+++ b/paddlepalm/task_paradigm/mlm.py
@@ -33,8 +33,8 @@ class TaskParadigm(task_paradigm):
     @property
     def inputs_attrs(self):
         reader = {
-            "mask_label": [[-1, 1], 'int64'],
-            "mask_pos": [[-1, 1], 'int64']}
+            "mask_label": [[-1], 'int64'],
+            "mask_pos": [[-1], 'int64']}
         if not self._is_training:
             del reader['mask_label']
             del reader['batchsize_x_seqlen']
@@ -100,8 +100,9 @@ class TaskParadigm(task_paradigm):
             is_bias=True)
 
         if self._is_training:
-            mask_lm_loss = fluid.layers.softmax_with_cross_entropy(
-                logits=fc_out, label=mask_label)
+            inputs = fluid.layers.softmax(fc_out)
+            mask_lm_loss = fluid.layers.cross_entropy(
+                input=inputs, label=mask_label)
             loss = fluid.layers.mean(mask_lm_loss)
             return {'loss': loss}
         else:
diff --git a/paddlepalm/task_paradigm/mrc.py b/paddlepalm/task_paradigm/mrc.py
index b1f0b5688d18fc9c84156a1570f24815febba17f..ae36ecac82b94b6db89636645d226975fb94d5e6 100644
--- a/paddlepalm/task_paradigm/mrc.py
+++ b/paddlepalm/task_paradigm/mrc.py
@@ -49,11 +49,11 @@ class TaskParadigm(task_paradigm):
     @property
     def inputs_attrs(self):
         if self._is_training:
-            reader = {"start_positions": [[-1, 1], 'int64'],
-                      "end_positions": [[-1, 1], 'int64'],
+            reader = {"start_positions": [[-1], 'int64'],
+                      "end_positions": [[-1], 'int64'],
                       }
         else:
-            reader = {'unique_ids': [[-1, 1], 'int64']}
+            reader = {'unique_ids': [[-1], 'int64']}
         bb = {"encoder_outputs": [[-1, -1, self._hidden_size], 'float32']}
         return {'reader': reader, 'backbone': bb}
         
@@ -70,7 +70,7 @@ class TaskParadigm(task_paradigm):
         else:
             return {'start_logits': [[-1, -1, 1], 'float32'],
                     'end_logits': [[-1, -1, 1], 'float32'],
-                    'unique_ids': [[-1, 1], 'int64']}
+                    'unique_ids': [[-1], 'int64']}
 
 
     def build(self, inputs, scope_name=""):
@@ -100,9 +100,11 @@ class TaskParadigm(task_paradigm):
         start_logits, end_logits = fluid.layers.unstack(x=logits, axis=0)
 
         def _compute_single_loss(logits, positions):
-            """Compute start/end loss for mrc model"""
-            loss = fluid.layers.softmax_with_cross_entropy(
-                logits=logits, label=positions)
+            """Compute start/en
+            d loss for mrc model"""
+            inputs = fluid.layers.softmax(logits)
+            loss = fluid.layers.cross_entropy(
+                input=inputs, label=positions)
             loss = fluid.layers.mean(x=loss)
             return loss
 
@@ -120,7 +122,7 @@ class TaskParadigm(task_paradigm):
     def postprocess(self, rt_outputs):
         """this func will be called after each step(batch) of training/evaluating/predicting process."""
         if not self._is_training:
-            unique_ids = np.squeeze(rt_outputs['unique_ids'], -1)
+            unique_ids = rt_outputs['unique_ids']
             start_logits = rt_outputs['start_logits']
             end_logits = rt_outputs['end_logits']
             for idx in range(len(unique_ids)):
diff --git a/paddlepalm/utils/reader_helper.py b/paddlepalm/utils/reader_helper.py
index 544d4881d11d9acccbfd3a9aaa0538f6ff8c0cbc..d91517935a80a5f8669013d4a592a524c2131347 100644
--- a/paddlepalm/utils/reader_helper.py
+++ b/paddlepalm/utils/reader_helper.py
@@ -19,7 +19,6 @@ import random
 import numpy as np
 import paddle
 from paddle import fluid
-from paddle.fluid import layers
 
 
 def _check_and_adapt_shape_dtype(rt_val, attr, message=""):
@@ -65,7 +64,7 @@ def create_net_inputs(input_attrs, async=False, iterator_fn=None, dev_count=1, n
     inputs = []
     ret = {}
     for name, shape, dtype in input_attrs:
-        p = layers.data(name, shape=shape, dtype=dtype)
+        p = fluid.data(name, shape=shape, dtype=dtype)
         ret[name] = p
         inputs.append(p)
 
@@ -219,7 +218,7 @@ def merge_input_attrs(backbone_attr, task_attrs, insert_taskid=True, insert_batc
     names = []
     start = 0
     if insert_taskid:
-        ret.append(([1,1], 'int64'))
+        ret.append(([1, 1], 'int64'))
         names.append('__task_id')
         start += 1