fix bugs

dc1c43e8 · xixiaoyao · e2368644 · e2368644 · e2368644 · e2368644
45 changed file
--- a/build/lib/paddlepalm/__init__.py
+++ b/build/lib/paddlepalm/__init__.py
-import sys
-from paddlepalm.mtl_controller import Controller
-sys.path.append('paddlepalm')
--- a/build/lib/paddlepalm/backbone/__init__.py
+++ b/build/lib/paddlepalm/backbone/__init__.py
--- a/build/lib/paddlepalm/backbone/bert.py
+++ b/build/lib/paddlepalm/backbone/bert.py
-# -*- coding: UTF-8 -*-
-#   Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserved.
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#     http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-"""v1.1 
-BERT model."""
-from __future__ import absolute_import
-from __future__ import division
-from __future__ import print_function
-from paddle import fluid
-from paddle.fluid import layers
-from paddlepalm.backbone.utils.transformer import pre_process_layer, encoder
-from paddlepalm.interface import backbone
-class Model(backbone):
-    def __init__(self,
-                 config,
-                 phase):
-        # self._is_training = phase == 'train' # backbone一般不用关心运行阶段，因为outputs在任何阶段基本不会变
-        self._emb_size = config["hidden_size"]
-        self._n_layer = config["num_hidden_layers"]
-        self._n_head = config["num_attention_heads"]
-        self._voc_size = config["vocab_size"]
-        self._max_position_seq_len = config["max_position_embeddings"]
-        self._sent_types = config["type_vocab_size"]
-        self._hidden_act = config["hidden_act"]
-        self._prepostprocess_dropout = config["hidden_dropout_prob"]
-        self._attention_dropout = config["attention_probs_dropout_prob"]
-        self.model_name = model_name
-        self._word_emb_name = self.model_name + "word_embedding"
-        self._pos_emb_name = self.model_name + "pos_embedding"
-        self._sent_emb_name = self.model_name + "sent_embedding"
-        # Initialize all weigths by truncated normal initializer, and all biases 
-        # will be initialized by constant zero by default.
-        self._param_initializer = fluid.initializer.TruncatedNormal(
-            scale=config["initializer_range"])
-    @property
-    def inputs_attr(self):
-        return {"token_ids": [-1, self._max_position_seq_len, 1], 'int64'],
-                "position_ids": [-1, self._max_position_seq_len, 1], 'int64'],
-                "segment_ids": [-1, self._max_position_seq_len, 1], 'int64'],
-                "input_mask": [-1, self._max_position_seq_len, 1], 'float32']}
-    @property
-    def outputs_attr(self):
-        return {"word_emb": [-1, self._max_position_seq_len, self._emb_size],
-                "sentence_emb": [-1, self._emb_size],
-                "sentence_pair_emb": [-1, self._emb_size]}
-    def build(self, inputs):
-        src_ids = inputs['token_ids']
-        pos_ids = inputs['position_ids']
-        sent_ids = inputs['segment_ids']
-        input_mask = inputs['input_mask']
-        # padding id in vocabulary must be set to 0
-        emb_out = layers.embedding(
-            input=src_ids,
-            size=[self._voc_size, self._emb_size],
-            dtype="float32",
-            param_attr=fluid.ParamAttr(
-                name=self._word_emb_name, initializer=self._param_initializer),
-            is_sparse=False)
-        self.emb_out = emb_out
-        position_emb_out = layers.embedding(
-            input=pos_ids,
-            size=[self._max_position_seq_len, self._emb_size],
-            dtype="float32",
-            param_attr=fluid.ParamAttr(
-                name=self._pos_emb_name, initializer=self._param_initializer))
-        self.position_emb_out = position_emb_out
-        sent_emb_out = layers.embedding(
-            sent_ids,
-            size=[self._sent_types, self._emb_size],
-            dtype="float32"
-            param_attr=fluid.ParamAttr(
-                name=self._sent_emb_name, initializer=self._param_initializer))
-        self.sent_emb_out = sent_emb_out
-        emb_out = emb_out + position_emb_out + sent_emb_out
-        emb_out = pre_process_layer(
-            emb_out, 'nd', self._prepostprocess_dropout, name='pre_encoder')
-        self_attn_mask = layers.matmul(
-            x = input_mask, y = input_mask, transpose_y = True)
-        self_attn_mask = layers.scale(
-            x = self_attn_mask, scale = 10000.0, bias = -1.0, bias_after_scale = False)
-        n_head_self_attn_mask = layers.stack(
-            x=[self_attn_mask] * self._n_head, axis=1)
-        n_head_self_attn_mask.stop_gradient = True
-        enc_out = encoder(
-            enc_input = emb_out,
-            attn_bias = n_head_self_attn_mask,
-            n_layer = self._n_layer,
-            n_head = self._n_head,
-            d_key = self._emb_size // self._n_head,
-            d_value = self._emb_size // self._n_head,
-            d_model = self._emb_size,
-            d_inner_hid = self._emb_size * 4,
-            prepostprocess_dropout = self._prepostprocess_dropout,
-            attention_dropout = self._attention_dropout,
-            relu_dropout = 0,
-            hidden_act = self._hidden_act,
-            preprocess_cmd = "",
-            postprocess_cmd = "dan",
-            param_initializer = self._param_initializer,
-            name = self.model_name + 'encoder')
-        next_sent_feat = layers.slice(
-            input = enc_out, axes = [1], starts = [0], ends = [1])
-        next_sent_feat = layers.fc(
-            input = next_sent_feat,
-            size = self._emb_size,
-            act = "tanh",
-            param_attr = fluid.ParamAttr(
-                name = self.model_name + "pooled_fc.w_0", 
-                initializer = self._param_initializer),
-            bias_attr = "pooled_fc.b_0")
-        return {'word_emb': enc_out,
-                'sentence_emb': next_sent_feat, 
-                'sentence_pair_emb': next_sent_feat}
-    def postprocess(self, rt_outputs):
-        pass
--- a/build/lib/paddlepalm/backbone/bow.py
+++ b/build/lib/paddlepalm/backbone/bow.py
-# -*- coding: UTF-8 -*-
-#   Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserved.
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#     http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-from __future__ import absolute_import
-from __future__ import division
-from __future__ import print_function
-from paddle import fluid
-from paddle.fluid import layers
-class Model(backbone):
-    def __init__(self, config, phase):
-        # self._is_training = phase == 'train' # backbone一般不用关心运行阶段，因为outputs在任何阶段基本不会变
-        self._emb_size = config["emb_size"]
-        self._voc_size = config["vocab_size"]
-    @property
-    def inputs_attr(self):
-        return {"token_ids": [-1, self._max_position_seq_len, 1], 'int64']}
-    @property
-    def outputs_attr(self):
-        return {"word_emb": [-1, self._max_position_seq_len, self._emb_size],
-                "sentence_emb": [-1, self._emb_size*2]}
-    def build(self, inputs):
-        tok_ids = inputs['token_ids']
-        emb_out = layers.embedding(
-            input=tok_ids,
-            size=[self._voc_size, self._emb_size],
-            dtype='float32',
-            param_attr=fluid.ParamAttr(
-                name='word_emb', 
-                initializer=fluid.initializer.TruncatedNormal(scale=0.1)),
-            is_sparse=False)
-        sent_emb1 = layers.reduce_mean(emb_out, axis=1)
-        sent_emb2 = layers.reduce_max(emb_out, axis=1)
-        sent_emb = layers.concat([sent_emb1, sent_emb2], axis=1)
-        return {'word_emb': emb_out,
-                'sentence_emb': sent_emb}
-    def postprocess(self, rt_outputs):
-        pass
--- a/build/lib/paddlepalm/backbone/ernie.py
+++ b/build/lib/paddlepalm/backbone/ernie.py
-# -*- coding: UTF-8 -*-
-#   Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserved.
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#     http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-"""Ernie model."""
-from __future__ import absolute_import
-from __future__ import division
-from __future__ import print_function
-from __future__ import unicode_literals
-from __future__ import absolute_import
-from paddle import fluid
-from paddle.fluid import layers
-from paddlepalm.backbone.utils.transformer import pre_process_layer, encoder
-from paddlepalm.interface import backbone
-class Model(backbone):
-    def __init__(self,
-                 config,
-                 phase):
-        # self._is_training = phase == 'train' # backbone一般不用关心运行阶段，因为outputs在任何阶段基本不会变
-        self._emb_size = config['hidden_size']
-        self._n_layer = config['num_hidden_layers']
-        self._n_head = config['num_attention_heads']
-        self._voc_size = config['vocab_size']
-        self._max_position_seq_len = config['max_position_embeddings']
-        if config['sent_type_vocab_size']:
-            self._sent_types = config['sent_type_vocab_size']
-        else:
-            self._sent_types = config['type_vocab_size']
-        self._task_types = config['task_type_vocab_size']
-        self._hidden_act = config['hidden_act']
-        self._prepostprocess_dropout = config['hidden_dropout_prob']
-        self._attention_dropout = config['attention_probs_dropout_prob']
-        self._word_emb_name = "word_embedding"
-        self._pos_emb_name = "pos_embedding"
-        self._sent_emb_name = "sent_embedding"
-        self._task_emb_name = "task_embedding"
-        self._emb_dtype = "float32"
-        self._param_initializer = fluid.initializer.TruncatedNormal(
-            scale=config['initializer_range'])
-    @property
-    def inputs_attr(self):
-        return {"token_ids": [[-1, -1, 1], 'int64'],
-                "position_ids": [[-1, -1, 1], 'int64'],
-                "segment_ids": [[-1, -1, 1], 'int64'],
-                "input_mask": [[-1, -1, 1], 'float32'],
-                "task_ids": [[-1,-1, 1], 'int64']}
-    @property
-    def outputs_attr(self):
-        return {"word_embedding": [[-1, -1, self._emb_size], 'float32'],
-                "encoder_outputs": [[-1, -1, self._emb_size], 'float32'],
-                "sentence_embedding": [[-1, self._emb_size], 'float32'],
-                "sentence_pair_embedding": [[-1, self._emb_size], 'float32']}
-    def build(self, inputs):
-        src_ids = inputs['token_ids']
-        pos_ids = inputs['position_ids']
-        sent_ids = inputs['segment_ids']
-        input_mask = inputs['input_mask']
-        task_ids = inputs['task_ids']
-        # padding id in vocabulary must be set to 0
-        emb_out = fluid.layers.embedding(
-            input=src_ids,
-            size=[self._voc_size, self._emb_size],
-            dtype=self._emb_dtype,
-            param_attr=fluid.ParamAttr(
-                name=self._word_emb_name, initializer=self._param_initializer),
-            is_sparse=False)
-        position_emb_out = fluid.layers.embedding(
-            input=pos_ids,
-            size=[self._max_position_seq_len, self._emb_size],
-            dtype=self._emb_dtype,
-            param_attr=fluid.ParamAttr(
-                name=self._pos_emb_name, initializer=self._param_initializer))
-        sent_emb_out = fluid.layers.embedding(
-            sent_ids,
-            size=[self._sent_types, self._emb_size],
-            dtype=self._emb_dtype,
-            param_attr=fluid.ParamAttr(
-                name=self._sent_emb_name, initializer=self._param_initializer))
-        emb_out = emb_out + position_emb_out
-        emb_out = emb_out + sent_emb_out
-        task_emb_out = fluid.layers.embedding(
-            task_ids,
-            size=[self._task_types, self._emb_size],
-            dtype=self._emb_dtype,
-            param_attr=fluid.ParamAttr(
-                name=self._task_emb_name,
-                initializer=self._param_initializer))
-        emb_out = emb_out + task_emb_out
-        emb_out = pre_process_layer(
-            emb_out, 'nd', self._prepostprocess_dropout, name='pre_encoder')
-        self_attn_mask = fluid.layers.matmul(
-            x=input_mask, y=input_mask, transpose_y=True)
-        self_attn_mask = fluid.layers.scale(
-            x=self_attn_mask, scale=10000.0, bias=-1.0, bias_after_scale=False)
-        n_head_self_attn_mask = fluid.layers.stack(
-            x=[self_attn_mask] * self._n_head, axis=1)
-        n_head_self_attn_mask.stop_gradient = True
-        enc_out = encoder(
-            enc_input=emb_out,
-            attn_bias=n_head_self_attn_mask,
-            n_layer=self._n_layer,
-            n_head=self._n_head,
-            d_key=self._emb_size // self._n_head,
-            d_value=self._emb_size // self._n_head,
-            d_model=self._emb_size,
-            d_inner_hid=self._emb_size * 4,
-            prepostprocess_dropout=self._prepostprocess_dropout,
-            attention_dropout=self._attention_dropout,
-            relu_dropout=0,
-            hidden_act=self._hidden_act,
-            preprocess_cmd="",
-            postprocess_cmd="dan",
-            param_initializer=self._param_initializer,
-            name='encoder')
-        next_sent_feat = fluid.layers.slice(
-            input=enc_out, axes=[1], starts=[0], ends=[1])
-        next_sent_feat = fluid.layers.reshape(next_sent_feat, [-1, next_sent_feat.shape[-1]])
-        next_sent_feat = fluid.layers.fc(
-            input=next_sent_feat,
-            size=self._emb_size,
-            act="tanh",
-            param_attr=fluid.ParamAttr(
-                name="pooled_fc.w_0", initializer=self._param_initializer),
-            bias_attr="pooled_fc.b_0")
-        return {'word_embedding': emb_out,
-                'encoder_outputs': enc_out,
-                'sentence_embedding': next_sent_feat,
-                'sentence_pair_embedding': next_sent_feat}
-    def postprocess(self, rt_outputs):
-        pass
--- a/build/lib/paddlepalm/backbone/utils/__init__.py
+++ b/build/lib/paddlepalm/backbone/utils/__init__.py
--- a/build/lib/paddlepalm/backbone/utils/transformer.py
+++ b/build/lib/paddlepalm/backbone/utils/transformer.py
-# -*- coding: UTF-8 -*-
-#   Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserved.
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#     http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-"""Transformer encoder."""
-from __future__ import absolute_import
-from __future__ import division
-from __future__ import print_function
-from functools import partial
-import paddle.fluid as fluid
-import paddle.fluid.layers as layers
-def multi_head_attention(queries,
-                         keys,
-                         values,
-                         attn_bias,
-                         d_key,
-                         d_value,
-                         d_model,
-                         n_head=1,
-                         dropout_rate=0.,
-                         cache=None,
-                         param_initializer=None,
-                         name='multi_head_att'):
-    """
-    Multi-Head Attention. Note that attn_bias is added to the logit before
-    computing softmax activiation to mask certain selected positions so that
-    they will not considered in attention weights.
-    """
-    keys = queries if keys is None else keys
-    values = keys if values is None else values
-    if not (len(queries.shape) == len(keys.shape) == len(values.shape) == 3):
-        raise ValueError(
-            "Inputs: quries, keys and values should all be 3-D tensors.")
-    def __compute_qkv(queries, keys, values, n_head, d_key, d_value):
-        """
-        Add linear projection to queries, keys, and values.
-        """
-        q = layers.fc(input=queries,
-                      size=d_key * n_head,
-                      num_flatten_dims=2,
-                      param_attr=fluid.ParamAttr(
-                          name=name + '_query_fc.w_0',
-                          initializer=param_initializer),
-                      bias_attr=name + '_query_fc.b_0')
-        k = layers.fc(input=keys,
-                      size=d_key * n_head,
-                      num_flatten_dims=2,
-                      param_attr=fluid.ParamAttr(
-                          name=name + '_key_fc.w_0',
-                          initializer=param_initializer),
-                      bias_attr=name + '_key_fc.b_0')
-        v = layers.fc(input=values,
-                      size=d_value * n_head,
-                      num_flatten_dims=2,
-                      param_attr=fluid.ParamAttr(
-                          name=name + '_value_fc.w_0',
-                          initializer=param_initializer),
-                      bias_attr=name + '_value_fc.b_0')
-        return q, k, v
-    def __split_heads(x, n_head):
-        """
-        Reshape the last dimension of inpunt tensor x so that it becomes two
-        dimensions and then transpose. Specifically, input a tensor with shape
-        [bs, max_sequence_length, n_head * hidden_dim] then output a tensor
-        with shape [bs, n_head, max_sequence_length, hidden_dim].
-        """
-        hidden_size = x.shape[-1]
-        # The value 0 in shape attr means copying the corresponding dimension
-        # size of the input as the output dimension size.
-        reshaped = layers.reshape(
-            x=x, shape=[0, 0, n_head, hidden_size // n_head], inplace=True)
-        # permuate the dimensions into:
-        # [batch_size, n_head, max_sequence_len, hidden_size_per_head]
-        return layers.transpose(x=reshaped, perm=[0, 2, 1, 3])
-    def __combine_heads(x):
-        """
-        Transpose and then reshape the last two dimensions of inpunt tensor x
-        so that it becomes one dimension, which is reverse to __split_heads.
-        """
-        if len(x.shape) == 3: return x
-        if len(x.shape) != 4:
-            raise ValueError("Input(x) should be a 4-D Tensor.")
-        trans_x = layers.transpose(x, perm=[0, 2, 1, 3])
-        # The value 0 in shape attr means copying the corresponding dimension
-        # size of the input as the output dimension size.
-        return layers.reshape(
-            x=trans_x,
-            shape=[0, 0, trans_x.shape[2] * trans_x.shape[3]],
-            inplace=True)
-    def scaled_dot_product_attention(q, k, v, attn_bias, d_key, dropout_rate):
-        """
-        Scaled Dot-Product Attention
-        """
-        scaled_q = layers.scale(x=q, scale=d_key**-0.5)
-        product = layers.matmul(x=scaled_q, y=k, transpose_y=True)
-        if attn_bias:
-            product += attn_bias
-        weights = layers.softmax(product)
-        if dropout_rate:
-            weights = layers.dropout(
-                weights,
-                dropout_prob=dropout_rate,
-                dropout_implementation="upscale_in_train",
-                is_test=False)
-        out = layers.matmul(weights, v)
-        return out
-    q, k, v = __compute_qkv(queries, keys, values, n_head, d_key, d_value)
-    if cache is not None:  # use cache and concat time steps
-        # Since the inplace reshape in __split_heads changes the shape of k and
-        # v, which is the cache input for next time step, reshape the cache
-        # input from the previous time step first.
-        k = cache["k"] = layers.concat(
-            [layers.reshape(
-                cache["k"], shape=[0, 0, d_model]), k], axis=1)
-        v = cache["v"] = layers.concat(
-            [layers.reshape(
-                cache["v"], shape=[0, 0, d_model]), v], axis=1)
-    q = __split_heads(q, n_head)
-    k = __split_heads(k, n_head)
-    v = __split_heads(v, n_head)
-    ctx_multiheads = scaled_dot_product_attention(q, k, v, attn_bias, d_key,
-                                                  dropout_rate)
-    out = __combine_heads(ctx_multiheads)
-    # Project back to the model size.
-    proj_out = layers.fc(input=out,
-                         size=d_model,
-                         num_flatten_dims=2,
-                         param_attr=fluid.ParamAttr(
-                             name=name + '_output_fc.w_0',
-                             initializer=param_initializer),
-                         bias_attr=name + '_output_fc.b_0')
-    return proj_out
-def positionwise_feed_forward(x,
-                              d_inner_hid,
-                              d_hid,
-                              dropout_rate,
-                              hidden_act,
-                              param_initializer=None,
-                              name='ffn'):
-    """
-    Position-wise Feed-Forward Networks.
-    This module consists of two linear transformations with a ReLU activation
-    in between, which is applied to each position separately and identically.
-    """
-    hidden = layers.fc(input=x,
-                       size=d_inner_hid,
-                       num_flatten_dims=2,
-                       act=hidden_act,
-                       param_attr=fluid.ParamAttr(
-                           name=name + '_fc_0.w_0',
-                           initializer=param_initializer),
-                       bias_attr=name + '_fc_0.b_0')
-    if dropout_rate:
-        hidden = layers.dropout(
-            hidden,
-            dropout_prob=dropout_rate,
-            dropout_implementation="upscale_in_train",
-            is_test=False)
-    out = layers.fc(input=hidden,
-                    size=d_hid,
-                    num_flatten_dims=2,
-                    param_attr=fluid.ParamAttr(
-                        name=name + '_fc_1.w_0', initializer=param_initializer),
-                    bias_attr=name + '_fc_1.b_0')
-    return out
-def pre_post_process_layer(prev_out, out, process_cmd, dropout_rate=0.,
-                           name=''):
-    """
-    Add residual connection, layer normalization and droput to the out tensor
-    optionally according to the value of process_cmd.
-    This will be used before or after multi-head attention and position-wise
-    feed-forward networks.
-    """
-    for cmd in process_cmd:
-        if cmd == "a":  # add residual connection
-            out = out + prev_out if prev_out else out
-        elif cmd == "n":  # add layer normalization
-            out_dtype = out.dtype
-            if out_dtype == fluid.core.VarDesc.VarType.FP16:
-                out = layers.cast(x=out, dtype="float32")
-            out = layers.layer_norm(
-                out,
-                begin_norm_axis=len(out.shape) - 1,
-                param_attr=fluid.ParamAttr(
-                    name=name + '_layer_norm_scale',
-                    initializer=fluid.initializer.Constant(1.)),
-                bias_attr=fluid.ParamAttr(
-                    name=name + '_layer_norm_bias',
-                    initializer=fluid.initializer.Constant(0.)))
-            if out_dtype == fluid.core.VarDesc.VarType.FP16:
-                out = layers.cast(x=out, dtype="float16")
-        elif cmd == "d":  # add dropout
-            if dropout_rate:
-                out = layers.dropout(
-                    out,
-                    dropout_prob=dropout_rate,
-                    dropout_implementation="upscale_in_train",
-                    is_test=False)
-    return out
-pre_process_layer = partial(pre_post_process_layer, None)
-post_process_layer = pre_post_process_layer
-def encoder_layer(enc_input,
-                  attn_bias,
-                  n_head,
-                  d_key,
-                  d_value,
-                  d_model,
-                  d_inner_hid,
-                  prepostprocess_dropout,
-                  attention_dropout,
-                  relu_dropout,
-                  hidden_act,
-                  preprocess_cmd="n",
-                  postprocess_cmd="da",
-                  param_initializer=None,
-                  name=''):
-    """The encoder layers that can be stacked to form a deep encoder.
-    This module consits of a multi-head (self) attention followed by
-    position-wise feed-forward networks and both the two components companied
-    with the post_process_layer to add residual connection, layer normalization
-    and droput.
-    """
-    attn_output = multi_head_attention(
-        pre_process_layer(
-            enc_input,
-            preprocess_cmd,
-            prepostprocess_dropout,
-            name=name + '_pre_att'),
-        None,
-        None,
-        attn_bias,
-        d_key,
-        d_value,
-        d_model,
-        n_head,
-        attention_dropout,
-        param_initializer=param_initializer,
-        name=name + '_multi_head_att')
-    attn_output = post_process_layer(
-        enc_input,
-        attn_output,
-        postprocess_cmd,
-        prepostprocess_dropout,
-        name=name + '_post_att')
-    ffd_output = positionwise_feed_forward(
-        pre_process_layer(
-            attn_output,
-            preprocess_cmd,
-            prepostprocess_dropout,
-            name=name + '_pre_ffn'),
-        d_inner_hid,
-        d_model,
-        relu_dropout,
-        hidden_act,
-        param_initializer=param_initializer,
-        name=name + '_ffn')
-    return post_process_layer(
-        attn_output,
-        ffd_output,
-        postprocess_cmd,
-        prepostprocess_dropout,
-        name=name + '_post_ffn')
-def encoder(enc_input,
-            attn_bias,
-            n_layer,
-            n_head,
-            d_key,
-            d_value,
-            d_model,
-            d_inner_hid,
-            prepostprocess_dropout,
-            attention_dropout,
-            relu_dropout,
-            hidden_act,
-            preprocess_cmd="n",
-            postprocess_cmd="da",
-            param_initializer=None,
-            name=''):
-    """
-    The encoder is composed of a stack of identical layers returned by calling
-    encoder_layer.
-    """
-    for i in range(n_layer):
-        enc_output = encoder_layer(
-            enc_input,
-            attn_bias,
-            n_head,
-            d_key,
-            d_value,
-            d_model,
-            d_inner_hid,
-            prepostprocess_dropout,
-            attention_dropout,
-            relu_dropout,
-            hidden_act,
-            preprocess_cmd,
-            postprocess_cmd,
-            param_initializer=param_initializer,
-            name=name + '_layer_' + str(i))
-        enc_input = enc_output
-    enc_output = pre_process_layer(
-        enc_output, preprocess_cmd, prepostprocess_dropout, name="post_encoder")
-    return enc_output
--- a/build/lib/paddlepalm/default_settings.py
+++ b/build/lib/paddlepalm/default_settings.py
-# -*- coding: UTF-8 -*-
-#   Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserved.
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#     http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-BACKBONE_DIR='paddlepalm.backbone'
-TASK_INSTANCE_DIR='paddlepalm.task_instance'
-READER_DIR='paddlepalm.reader'
-PARADIGM_DIR='paddlepalm.task_paradigm'
-OPTIMIZER_DIR='paddlepalm.optimizer'
-OPTIMIZE_METHOD='optimize'
-REQUIRED_ARGS={
-    'task_instance': str,
-    'backbone': str,
-    'optimizer': str,
-    'learning_rate': float,
-    'batch_size': int
-    }
-OPTIONAL_ARGS={
-    'mix_ratio': str,
-    'target_tag': str,
-    'reuse_rag': str
-    }
-TASK_REQUIRED_ARGS={
-    'paradigm': str,
-    'reader': str,
-    'train_file': str
-    }
--- a/build/lib/paddlepalm/interface.py
+++ b/build/lib/paddlepalm/interface.py
-# -*- coding: UTF-8 -*-
-#   Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserved.
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#     http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-"""v1.1"""
-class reader(object):
-    """interface of data manager."""
-    def __init__(self, config):
-        assert isinstance(config, dict)
-    # @property
-    # def inputs_attr(self):
-    #     """描述reader输入对象的属性，包含各个对象的名字、shape以及数据类型。当某个对象为标量数据类型（如str, int, float等）时，shape设置为空列表[]，当某个对象的某个维度长度可变时，shape中的相应维度设置为-1.
-    #     Return:
-    #         dict类型。对各个输入对象的属性描述。例如，
-    #         对于文本分类任务，可能需要包含输入文本和所属标签的id
-    #             {"text": ([], 'str'),
-    #              "label": ([], 'int')}
-    #         对于标注任务，可能需要输入词序列和对应的标签
-    #             {"tokens", ([-1], 'str'),
-    #              "tags", ([-1], 'str')}
-    #         对于机器阅读理解任务，可能需要包含上下文、问题、回答、答案区域的起止位置等
-    #             {"paragraph", ([], 'str'),
-    #              "question", ([], 'str'),
-    #              "start_position", ([], 'int')
-    #         """
-    #     raise NotImplementedError()
-    @property
-    def outputs_attr(self):
-        """描述reader输出对象（被yield出的对象）的属性，包含各个对象的名字、shape以及数据类型。当某个对象为标量数据类型（如str, int, float等）时，shape设置为空列表[]，当某个对象的某个维度长度可变时，shape中的相应维度设置为-1。
-        注意：当使用mini-batch梯度下降学习策略时，，应为常规的输入对象设置batch_size维度（一般为-1）
-        Return:
-            dict类型。对各个输入对象的属性描述。例如，
-            对于文本分类和匹配任务，yield的输出内容可能包含如下的对象（下游backbone和task可按需访问其中的对象）
-                {"token_ids": ([-1, max_len], 'int64'),
-                 "input_ids": ([-1, max_len], 'int64'),
-                 "segment_ids": ([-1, max_len], 'int64'),
-                 "input_mask": ([-1, max_len], 'float32'),
-                 "label": ([-1], 'int')}
-        """
-        raise NotImplementedError()
-    # def parse_line(self):
-    #     """框架内部使用字典描述每个样本，字典的key为inputs_attr，value为每个input对应的符合attr描述的值。
-    #         该函数负责将文本行解析成符合inputs_attr描述的字典类型的样本。默认的parse_line方法会读取json格式的数据集文件，数据集的每一行为json格式描述的样本。
-    #         用户可通过对该方法的继承改写来适配不同格式的数据集，例如csv格式甚至tfrecord文件。
-    #         """
-    #     raise NotImplementedError()
-    # 
-    # def tokenize(self, line):
-    #     """框架中内置了word piece tokenizer等分词器，用户可通过修改tokenizer超参数来制定使用的分词器，若内置的分词器均无法满足需求，用户可通过对该方法的继承改写来自定义分词器。
-    #         Args:
-    #             - line: a unicode string. 
-    #         Return:
-    #             a list of tokens
-    #         """
-    #     raise NotImplementedError()
-    def iterator(self):
-        """数据集遍历接口，注意，当数据集遍历到尾部时该接口应自动完成指针重置，即重新从数据集头部开始新的遍历。
-        Yield:
-            (dict) elements that meet the requirements in output_templete
-        """
-        raise NotImplementedError()
-    @property
-    def num_examples(self):
-        """数据集中的样本数量，即每个epoch中iterator所生成的样本数。注意，使用滑动窗口等可能导致数据集样本数发生变化的策略时，该接口应返回runtime阶段的实际样本数。"""
-        raise NotImplementedError()
-class backbone(object):
-    """interface of backbone model."""
-    def __init__(self, config, phase):
-        """
-        Args:
-            config: dict类型。描述了 多任务配置文件+预训练模型配置文件 中定义超参数
-            phase: str类型。运行阶段，目前支持train和predict
-            """
-        assert isinstance(config, dict)
-    @property
-    def inputs_attr(self):
-        """描述backbone从reader处需要得到的输入对象的属性，包含各个对象的名字、shape以及数据类型。当某个对象为标量数据类型（如str, int, float等）时，shape设置为空列表[]，当某个对象的某个维度长度可变时，shape中的相应维度设置为-1。
-        Return:
-            dict类型。对各个输入对象的属性描述。例如，
-            对于文本分类和匹配任务，bert backbone依赖的reader对象主要包含如下的对象
-                {"token_ids": ([-1, max_len], 'int64'),
-                 "input_ids": ([-1, max_len], 'int64'),
-                 "segment_ids": ([-1, max_len], 'int64'),
-                 "input_mask": ([-1, max_len], 'float32')}"""
-        raise NotImplementedError()
-    @property
-    def outputs_attr(self):
-        """描述backbone输出对象的属性，包含各个对象的名字、shape以及数据类型。当某个对象为标量数据类型（如str, int, float等）时，shape设置为空列表[]，当某个对象的某个维度长度可变时，shape中的相应维度设置为-1。
-        Return:
-            dict类型。对各个输出对象的属性描述。例如，
-            对于文本分类和匹配任务，bert backbone的输出内容可能包含如下的对象
-                {"word_emb": ([-1, max_seqlen, word_emb_size], 'float32'),
-                 "sentence_emb": ([-1, hidden_size], 'float32'),
-                 "sim_vec": ([-1, hidden_size], 'float32')}""" 
-        raise NotImplementedError()
-    def build(self, inputs):
-        """建立backbone的计算图。将符合inputs_attr描述的静态图Variable输入映射成符合outputs_attr描述的静态图Variable输出。
-        Args:
-            inputs: dict类型。字典中包含inputs_attr中的对象名到计算图Variable的映射，inputs中至少会包含inputs_attr中定义的对象
-        Return:
-           需要输出的计算图变量，输出对象会被加入到fetch_list中，从而在每个训练/推理step时得到runtime的计算结果，该计算结果会被传入postprocess方法中供用户处理。
-            """
-        raise NotImplementedError()
-class task_paradigm(object):
-    def __init__(self, config, phase, backbone_config):
-        """
-            config: dict类型。描述了 任务实例(task instance)+多任务配置文件 中定义超参数
-            phase: str类型。运行阶段，目前支持train和predict
-            """
-    @property
-    def inputs_attrs(self):
-        """描述task_layer需要从reader, backbone等输入对象集合所读取到的输入对象的属性，第一级key为对象集和的名字，如backbone，reader等（后续会支持更灵活的输入），第二级key为对象集和中各对象的属性，包括对象的名字，shape和dtype。当某个对象为标量数据类型（如str, int, float等）时，shape设置为空列表[]，当某个对象的某个维度长度可变时，shape中的相应维度设置为-1。
-        Return:
-            dict类型。对各个对象集及其输入对象的属性描述。"""
-        raise NotImplementedError()
-    @property
-    def outputs_attr(self):
-        """描述task输出对象的属性，包括对象的名字，shape和dtype。输出对象会被加入到fetch_list中，从而在每个训练/推理step时得到runtime的计算结果，该计算结果会被传入postprocess方法中供用户处理。
-        当某个对象为标量数据类型（如str, int, float等）时，shape设置为空列表[]，当某个对象的某个维度长度可变时，shape中的相应维度设置为-1。
-        Return:
-            dict类型。对各个输入对象的属性描述。注意，训练阶段必须包含名为loss的输出对象。
-            """
-        raise NotImplementedError()
-    def build(self, inputs):
-        """建立task_layer的计算图。将符合inputs_attrs描述的来自各个对象集的静态图Variables映射成符合outputs_attr描述的静态图Variable输出。
-        Args:
-            inputs: dict类型。字典中包含inputs_attrs中的对象名到计算图Variable的映射，inputs中至少会包含inputs_attr中定义的对象
-        Return:
-           需要输出的计算图变量，输出对象会被加入到fetch_list中，从而在每个训练/推理step时得到runtime的计算结果，该计算结果会被传入postprocess方法中供用户处理。
-        """
-        raise NotImplementedError()
-    def postprocess(self, rt_outputs):
-        """每个训练或推理step后针对当前batch的task_layer的runtime计算结果进行相关后处理。注意，rt_outputs除了包含build方法，还自动包含了loss的计算结果。"""
-        pass
-    def post_postprocess(self, global_buffer):
-        pass
--- a/build/lib/paddlepalm/mtl_controller.py
+++ b/build/lib/paddlepalm/mtl_controller.py
-# -*- coding: UTF-8 -*-
-#   Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserved.
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#     http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-import os
-import sys
-import importlib
-import multiprocessing
-from paddle import fluid
-from paddle.fluid import layers
-import yaml
-import json
-import logging
-import time
-import numpy as np
-from paddlepalm.utils.saver import init_pretraining_params, init_checkpoint
-from paddlepalm.utils.config_helper import PDConfig
-from paddlepalm.utils.print_helper import print_dict
-from paddlepalm.utils.reader_helper import create_net_inputs, create_iterator_fn, create_joint_iterator_fn, merge_input_attrs 
-from paddlepalm.default_settings import *
-from paddlepalm.task_instance import TaskInstance, check_instances
-DEBUG=False
-VERBOSE=0
-def _get_basename(f):
-    return os.path.splitext(f)[0]
-def _get_suffix(f):
-    return os.path.splitext(f)[-1]
-def _parse_yaml(f, asdict=True, support_cmd_line=False):
-    assert os.path.exists(f), "file {} not found.".format(f)
-    if support_cmd_line:
-        args = PDConfig(yaml_file=f, fuse_args=True)
-        args.build()
-        return args.asdict() if asdict else args
-    else:
-        if asdict:
-            with open(f, "r") as fin: 
-                yaml_config = yaml.load(fin, Loader=yaml.SafeLoader)
-            return yaml_config
-        else:
-            raise NotImplementedError()
-def _parse_json(f, asdict=True, support_cmd_line=False):
-    assert os.path.exists(f), "file {} not found.".format(f)
-    if support_cmd_line:
-        args = PDConfig(json_file=f, fuse_args=support_cmd_line)
-        args.build()
-        return args.asdict() if asdict else args
-    else:
-        if asdict:
-            with open(f, "r") as fin: 
-                config = json.load(fin)
-            return config
-        else:
-            raise NotImplementedError()
-def _parse_list(string, astype=str):
-    assert isinstance(string, str), "{} is not a string.".format(string)
-    if ',' not in string:
-        return [astype(string)]
-    string = string.replace(',', ' ')
-    return [astype(i) for i in string.split()]
-def _try_float(s):
-    try:
-        float(s)
-        return(float(s))
-    except:
-        return s
-def _check_conf(conf, checklist=None):
-    assert isinstance(conf, dict), "{} is not a dict.".format(conf)
-    ret = {}
-    for k,v in conf.items():
-        if isinstance(v, str):
-            v = _try_float(v)
-        ret[k] = v
-    if checklist is not None:
-        for k, t in checklist:
-            assert k in ret, "required argument {} is NOT exist in config file.".format(k)
-            assert isintance(ret[k], t), "value type of argument {} should be {}".format(k, t)
-    return ret
-# TODO: 增加None机制，允许hidden size、batch size和seqlen设置为None
-def _check_io(in_attr, out_attr, strict=False, in_name="left", out_name="right"):
-    for name, attr in in_attr.items():
-        assert name in out_attr, in_name+': '+name+' not found in '+out_name
-        if attr != out_attr[name]:
-            if strict:
-                raise ValueError(name+': shape or dtype not consistent!')
-            else:
-                logging.warning('{}: shape or dtype not consistent!\n{}:\n{}\n{}:\n{}'.format(name, in_name, attr, out_name, out_attr[name]))
-def _merge_conf(conf1, conf2, conf1_first=True, strict=False):
-    assert isinstance(conf1, dict), "{} is not a dict.".format(conf1)
-    assert isinstance(conf2, dict), "{} is not a dict.".format(conf2)
-    base_conf = conf2 if conf1_first else conf1
-    base_conf = base_conf.copy()
-    new_conf = conf1 if conf1_first else conf2
-    for k, v in new_conf.items():
-        if k in base_conf:
-            if base_conf[k] != v:
-                raise Warning("value of argument {} has been updated to {}.".format(k, v))
-        else:
-            if strict:
-                continue
-        base_conf[k] = v
-    return base_conf
-def _encode_inputs(inputs, scope_name, sep='/', cand_set=None):
-    outputs = {}
-    for k, v in inputs.items():
-        if cand_set is not None:
-            if k in cand_set:
-                outputs[k] = v
-            if scope_name+sep+k in cand_set:
-                outputs[scope_name+sep+k] = v
-        else:
-            outputs[scope_name+sep+k] = v
-    return outputs
-def _decode_inputs(inputs, scope_name, sep='/', keep_unk_keys=True):
-    outputs = {}
-    for name, value in inputs.items():
-        # var for backbone are also available to tasks
-        if keep_unk_keys and sep not in name:
-            outputs[name] = value
-        # var for this inst
-        if name.startswith(scope_name+'/'):
-            outputs[name[len(scope_name+'/'):]] = value
-    return outputs
-def _init_env(use_gpu):
-    if use_gpu:
-        place = fluid.CUDAPlace(0)
-        dev_count = fluid.core.get_cuda_device_count()
-    else:
-        place = fluid.CPUPlace()
-        dev_count = int(os.environ.get('CPU_NUM', multiprocessing.cpu_count()))
-    return fluid.Executor(place), dev_count
-def _fit_attr(conf, fit_attr, strict=False):
-    for i, attr in fit_attr.items():
-        if i not in conf:
-            if strict:
-                raise Exception('Argument {} is required to create a controller.'.format(i))
-            else:
-                continue
-        conf[i] = attr(conf[i])
-    return conf
-class Controller(object):
-    def __init__(self, config=None, task_dir='.', for_train=True):
-        """
-        Args:
-            config: (str|dict) 字符串类型时，给出yaml格式的config配置文件路径；
-        """
-        self._for_train = for_train
-        # default mtl_conf
-        # if config is None and config_path is None:
-        #     raise ValueError('For config and config_path, at least one of them should be set.')
-        if isinstance(config, str):
-            mtl_conf = _parse_yaml(config, support_cmd_line=True)
-            # if config is not None:
-            #     mtl_conf = _merge_conf(config, mtl_conf)
-        else:
-            mtl_conf = config
-        mtl_conf = _check_conf(mtl_conf)
-        mtl_conf = _fit_attr(mtl_conf, REQUIRED_ARGS, strict=True)
-        mtl_conf = _fit_attr(mtl_conf, OPTIONAL_ARGS, strict=False)
-        exe, dev_count = _init_env(use_gpu=mtl_conf.get('use_gpu', True))
-        self.exe = exe
-        self.dev_count = dev_count
-        print_dict(mtl_conf, title='main configuration')
-        # parse task instances and target tags
-        instnames = _parse_list(mtl_conf['task_instance'])
-        assert len(instnames) == len(set(instnames)), "repeated task_instance is NOT supported."
-        num_instances = len(instnames)
-        self.num_instances = num_instances
-        instname_to_conf = {}
-        instname_to_id = {}
-        for id, instname in enumerate(instnames):
-            instpath = os.path.join(task_dir, instname+'.yaml')
-            conf = _parse_yaml(instpath, support_cmd_line=False)
-            # conf = _check_conf(conf, TASK_INSTANCE_REQUIRED_ARGS)
-            conf = _check_conf(conf)
-            temp_conf = _merge_conf(mtl_conf, conf, strict=True)
-            print_dict(temp_conf, title='{} configuration'.format(instname))
-            conf = _merge_conf(mtl_conf, conf)
-            instname_to_conf[instname] = conf
-            instname_to_id[instname] = id
-        # create task instances
-        instances = []
-        for name in instnames:
-            instances.append(TaskInstance(name, instname_to_id[name], instname_to_conf[name]))
-        check_instances(instances)
-        # parse target_tag
-        if 'target_tag' in mtl_conf:
-            target_tag = str(mtl_conf['target_tag'])
-            tags = _parse_list(target_tag, astype=int)
-            assert len(tags) == len(instnames), "number of target_tag is NOT consistent with that in task_instance."
-            for tag, inst in zip(tags, instances):
-                inst.is_target = tag
-        else:
-            tags = [i.is_target for i in instances]
-        num_targets = sum(tags)
-        num_auxes = num_instances - num_targets
-        # parse mix ratios
-        if 'mix_ratio' in mtl_conf:
-            mix_ratio = str(mtl_conf['mix_ratio'])
-            mrs = _parse_list(mix_ratio, astype=float)
-            assert len(mrs) == num_instances, "number of mix_ratios is NOT consistent with num_instances."
-        else:
-            # TODO: 增加joint training模式，让num_epochs平等的作用于每个instance
-            mrs = [1.0] * num_instances
-        for mr, inst in zip(mrs, instances):
-            inst.mix_ratio = mr
-        # parse task layer reuse tags
-        instname_to_reusehost = {i:i for i in instnames}
-        if 'task_reuse_tag' in mtl_conf:
-            tags = _parse_list(mtl_conf['task_reuse_tag'], astype=int)
-            assert len(tags) == num_targets, 'number of reuse_tags is NOT consistent with number of instances.'
-        else:
-            tags = []
-            mapper = {}
-            for inst in instances:
-                # 有环则tag_id + 1，否则被mapper shutdown
-                history = set()
-                history.add(inst.name)
-                cur_inst = inst
-                while True:
-                    # 发现有环
-                    if cur_inst.task_reuse_scope in history:
-                        mapper[inst.name] = len(tags)
-                        break
-                    # 发现在mapper中
-                    elif cur_inst.task_reuse_scope in mapper:
-                        mapper[inst.name] = mapper[cur_inst.task_reuse_scope]
-                        break
-                    else:
-                        cur_inst = name_to_instance[cur_inst.task_reuse_scope]
-                        history.add(cur_inst.name)
-                tags.append(mapper[inst.name])
-            # 注意，上面这段需要做单元测试
-        for i in range(1, num_instances):
-            for j in range(i):
-                if tags[i] == tags[j]:
-                    # check paradigm of reused tasks
-                    assert instances[i].task_paradigm == \
-                            instances[j].task_paradigm, \
-                            "paradigm of reuse tasks should be consistent"
-                    instances[i].task_reuse_scope = instances[j].name
-                    break
-        # parse Reader and Paradigm for each instance
-        for inst in instances:
-            reader_name = inst.config['reader']
-            reader_mod = importlib.import_module(READER_DIR + '.' + reader_name)
-            Reader = getattr(reader_mod, 'Reader')
-            parad_name = inst.config['paradigm']
-            parad_mod = importlib.import_module(PARADIGM_DIR + '.' + parad_name)
-            Paradigm = getattr(parad_mod, 'TaskParadigm')
-            inst.Reader = Reader
-            inst.Paradigm = Paradigm
-        # prepare backbone
-        if 'backbone_config_path' in mtl_conf:
-            bb_conf = _parse_json(mtl_conf['backbone_config_path'])
-            bb_conf = _merge_conf(mtl_conf, bb_conf)
-        else:
-            bb_conf = mtl_conf
-        print_dict(bb_conf, title='backbone configuration'.format(instname))
-        bb_name = mtl_conf['backbone']
-        bb_mod = importlib.import_module(BACKBONE_DIR + '.' + bb_name)
-        Backbone = getattr(bb_mod, 'Model')
-        self.instances = instances
-        self.mrs = mrs
-        self.Backbone = Backbone
-        self.bb_conf = bb_conf
-        self.bb_name = bb_name
-        self.has_init_train = False
-        self.has_init_pred = False
-        if self._for_train:
-            print("initialing for training...")
-            self._init_train()
-            self.has_init_train = True
-    def _init_train(self):
-        instances = self.instances
-        Backbone = self.Backbone
-        bb_conf = self.bb_conf
-        bb_name = self.bb_name
-        dev_count = self.dev_count
-        num_instances = len(instances)
-        mrs = self.mrs
-        # set first_target/main task instance
-        main_inst = None
-        for inst in instances:
-            if inst.is_target:
-                main_inst = inst
-                inst.is_first_target = True
-                break
-        main_conf = main_inst.config
-        if not os.path.exists(main_conf['save_path']):
-            os.makedirs(main_conf['save_path'])
-        # prepare backbone
-        train_backbone = Backbone(bb_conf, phase='train')
-        pred_backbone = Backbone(bb_conf, phase='pred')
-        # create reader, task
-        # then check i/o across reader, backbone and task_layer
-        task_attrs = []
-        pred_task_attrs = []
-        for inst in instances:
-            train_reader = inst.Reader(inst.config, phase='train')
-            inst.reader['train'] = train_reader
-            train_parad = inst.Paradigm(inst.config, phase='train', backbone_config=bb_conf)
-            inst.task_layer['train'] = train_parad
-            task_attr_from_reader = _encode_inputs(train_parad.inputs_attrs['reader'], inst.name)
-            task_attrs.append(task_attr_from_reader)
-            _check_io(train_backbone.inputs_attr, train_reader.outputs_attr, in_name=bb_name+'_backbone', out_name='reader.train')
-            _check_io(train_parad.inputs_attrs['reader'], train_reader.outputs_attr, in_name='task_paradigm.train.reader', out_name='reader.train')
-            _check_io(train_parad.inputs_attrs['backbone'], train_backbone.outputs_attr, in_name='task_paradigm.train.backbone', out_name=bb_name+'_backbone')
-            if inst.is_target:
-                if 'pred_file' not in inst.config:
-                    inst.config['pred_file'] = ''
-                pred_reader = inst.Reader(inst.config, phase='pred')
-                pred_parad = inst.Paradigm(inst.config, phase='pred', backbone_config=bb_conf)
-                # inst.reader['pred'] = pred_reader # 这里创建的reader是个假reader，只是为了读取output_attr而已，所以不做保存
-                inst.task_layer['pred'] = pred_parad
-                # 框架有巨坑，先这样写吧
-                task_attr_from_reader = _encode_inputs(pred_parad.inputs_attrs['reader'], inst.name)
-                pred_task_attrs.append(task_attr_from_reader)
-                # task_attr = pred_parad.inputs_attrs['reader']
-                _check_io(pred_backbone.inputs_attr, pred_reader.outputs_attr, in_name=bb_name+'_backbone', out_name='reader.pred')
-                _check_io(pred_parad.inputs_attrs['reader'], pred_reader.outputs_attr, in_name='task_paradigm.pred.reader', out_name='reader.pred')
-                _check_io(pred_parad.inputs_attrs['backbone'], pred_backbone.outputs_attr, in_name='task_paradigm.pred.backbone', out_name=bb_name+'_backbone')
-        # merge reader input attrs from backbone and task_instances
-        joint_input_names, joint_shape_and_dtypes, name_to_position = merge_input_attrs(train_backbone.inputs_attr, task_attrs)
-        pred_joint_input_names, pred_joint_shape_and_dtypes, _ = merge_input_attrs(pred_backbone.inputs_attr, pred_task_attrs, insert_taskid=False)
-        # shapes: [task_id, shapes_of_backbone, shapes_of_inst1, ..., shapes_of_instN]
-        if DEBUG:
-            print('----- for debug -----')
-            print('joint input names:')
-            print(joint_input_names)
-            print('joint input shape and dtypes:')
-            print(joint_shape_and_dtypes)
-        # load data
-        for inst in instances:
-            print(inst.name+": preparing data...")
-            inst.reader['train'].load_data()
-        # merge dataset iterators and create net input vars
-        iterators = []
-        prefixes = []
-        mrs = []
-        for inst in instances:
-            iterators.append(inst.reader['train'].iterator())
-            prefixes.append(inst.name)
-            mrs.append(inst.mix_ratio)
-        joint_iterator_fn = create_joint_iterator_fn(iterators, prefixes, joint_shape_and_dtypes, mrs, name_to_position, dev_count=dev_count, verbose=VERBOSE, batch_size=main_conf['batch_size'])
-        input_attrs = [[i, j, k] for i, (j,k) in zip(joint_input_names, joint_shape_and_dtypes)]
-        pred_input_attrs = [[i, j, k] for i, (j,k) in zip(pred_joint_input_names, pred_joint_shape_and_dtypes)]
-        net_inputs = create_net_inputs(input_attrs, async=True, iterator_fn=joint_iterator_fn, dev_count=dev_count, n_prefetch=3)
-        # build backbone and task layers
-        # 不指定scope名字会挂，框架有坑
-        with fluid.unique_name.guard("backbone-"):
-            bb_output_vars = train_backbone.build(net_inputs)
-        # bb_output_vars = train_backbone.build(net_inputs)
-        assert sorted(bb_output_vars.keys()) == sorted(train_backbone.outputs_attr.keys())
-        # 会挂
-        # 这里是否有必要新建一个program？是的，被坑死了
-        pred_prog = fluid.Program()
-        pred_init_prog = fluid.Program()
-        train_prog = fluid.default_main_program()
-        train_init_prog = fluid.default_startup_program()
-        with fluid.program_guard(main_program = pred_prog, startup_program = pred_init_prog):
-            pred_net_inputs = create_net_inputs(pred_input_attrs)
-            with fluid.unique_name.guard("backbone-"):
-                pred_bb_output_vars = pred_backbone.build(pred_net_inputs)
-        fluid.framework.switch_main_program(train_prog)
-        fluid.framework.switch_startup_program(train_init_prog)
-        # pred_backbone = train_backbone
-        # pred_bb_output_vars = bb_output_vars
-        task_output_vars = {}
-        for inst in instances:
-            task_inputs = {'backbone': bb_output_vars}
-            task_inputs_from_reader = _decode_inputs(net_inputs, inst.name)
-            task_inputs['reader'] = task_inputs_from_reader
-            scope = inst.task_reuse_scope + '/'
-            with fluid.unique_name.guard(scope):
-                output_vars = inst.build_task_layer(task_inputs, phase='train')
-                output_vars = {inst.name+'/'+key: val for key, val in output_vars.items()}
-                old = len(task_output_vars) # for debug
-                task_output_vars.update(output_vars)
-                assert len(task_output_vars) - old == len(output_vars) # for debug
-            # # prepare predict vars for saving inference model
-            if inst.is_target:
-                # task_attr = inst.task_layer['pred'].inputs_attrs['reader']
-                # _input_names, _shape_and_dtypes, _ = merge_input_attrs(pred_backbone.inputs_attr, task_attr, insert_taskid=False)
-                # pred_input_attrs = [[i, j, k] for i, (j,k) in zip(_input_names, _shape_and_dtypes)]
-                with fluid.program_guard(pred_prog, pred_init_prog):
-                    # pred_net_inputs = create_net_inputs(pred_input_attrs)
-                    # 这里同时建立了pred阶段的backbone计算图，不知道是否会造成额外的显存开销（paddle不会计算运行路径）
-                    cur_inputs = _decode_inputs(pred_net_inputs, inst.name)
-                    inst.pred_input = cur_inputs
-                    pred_task_inputs = {'backbone': pred_bb_output_vars, 'reader': cur_inputs}
-                    scope = inst.task_reuse_scope + '/'
-                    with fluid.unique_name.guard(scope):
-                        inst.build_task_layer(pred_task_inputs, phase='pred')
-        bb_fetches = {k: v.name for k,v in bb_output_vars.items()}
-        task_fetches = {k: v.name for k,v in task_output_vars.items()}
-        # fetches = bb_fetches.copy() # 注意！框架在多卡时无法fetch变长维度的tensor，这里加入bb的out后会挂
-        # fetches.update(task_fetches)
-        fetches = task_fetches
-        fetches['__task_id'] = net_inputs['__task_id'].name
-        # compute loss
-        task_id_var = net_inputs['__task_id']
-        task_id_vec = layers.one_hot(task_id_var, num_instances)
-        losses = fluid.layers.concat([task_output_vars[inst.name+'/loss'] for inst in instances], axis=0)
-        loss = layers.reduce_sum(task_id_vec * losses)
-        main_reader = main_inst.reader['train']
-        num_examples = main_reader.num_examples
-        for inst in instances:
-            max_train_steps = int(main_conf['num_epochs']* inst.mix_ratio * num_examples) // main_conf['batch_size']  // dev_count
-            if inst.is_target:
-                print('{}: expected train steps {}.'.format(inst.name, max_train_steps))
-            inst.steps_pur_epoch = inst.reader['train'].num_examples // main_conf['batch_size']  // dev_count
-            inst.expected_train_steps = max_train_steps
-        global_max_train_steps = int(main_conf['num_epochs'] * num_examples * sum(mrs)) // main_conf['batch_size']  // dev_count
-        print('Estimated overall train steps {}.'.format(global_max_train_steps))
-        if 'warmup_proportion' in main_conf and main_conf['warmup_proportion'] > 0:
-            warmup_steps = int(global_max_train_steps * main_conf['warmup_proportion'])
-            print('Warmup steps: '+str(warmup_steps))
-        else:
-            warmup_steps = 0
-        # steps_pur_epoch = num_examples // main_conf['batch_size'] // dev_count
-        # build optimizer
-        # 其实也完全可以支持每个任务用它自己的optimizer
-        if 'optimizer' in main_conf:
-            optim_mod = importlib.import_module(OPTIMIZER_DIR + '.' + main_conf['optimizer'])
-            optimize = getattr(optim_mod, OPTIMIZE_METHOD)
-            optimize(loss, main_conf, max_train_steps, warmup_steps, fluid.default_main_program())
-            loss.persistable = True
-            if main_conf.get('use_ema', False):
-                assert 'ema_decay' in main_conf, "ema_decay should be set when use_ema is enabled."
-                ema = fluid.optimizer.ExponentialMovingAverage(main_conf['ema_decay'])
-                ema.update()
-        # prepare for train
-        self.train_backbone = train_backbone
-        self.train_program = fluid.CompiledProgram(fluid.default_main_program()).with_data_parallel(loss_name=loss.name)
-        self.saver_program = fluid.default_main_program()
-        self.main_inst = main_inst
-        self.fetches = fetches
-        self.has_init_train = True
-        self.has_init_pred = True
-        # self.max_train_steps = max_train_steps
-        # self.steps_pur_epoch = steps_pur_epoch
-        self.exe.run(fluid.default_startup_program())
-        print("\nRandomly initialize parameters...\n")
-    def _init_pred(self, instance, infer_model_path):
-        inst = instance
-        pred_backbone = self.Backbone(self.bb_conf, phase='pred')
-        pred_parad = inst.Paradigm(inst.config, phase='pred', backbone_config=self.bb_conf)
-        inst.task_layer['pred'] = pred_parad
-        pred_joint_input_names, pred_joint_shape_and_dtypes, name_to_position = merge_input_attrs(
-            pred_backbone.inputs_attr, inst.task_layer['pred'].inputs_attrs['reader'], 
-            insert_taskid=False)
-        pred_prog = inst.load(infer_model_path)
-        # pred_prog = fluid.CompiledProgram(pred_prog).with_data_parallel()
-        if inst.reader['pred'] is None:
-            pred_reader = inst.Reader(inst.config, phase='pred')
-            inst.reader['pred'] = pred_reader
-        return pred_prog
-    def load_pretrain(self, pretrain_model_path=None):
-        # load pretrain model (or ckpt)
-        if pretrain_model_path is None:
-            assert 'pretrain_model_path' in self.main_conf, "pretrain_model_path NOT set."
-            pretrain_model_path = self.main_conf['pretrain_model_path']
-        init_pretraining_params(
-            self.exe,
-            pretrain_model_path,
-            main_program=fluid.default_startup_program())
-    def train(self):
-        # TODO: 备份各种配置文件，以便用户断点重新训练以及支持将来的预测
-        if not self.has_init_train:
-            self._init_train()
-            self.has_init_train = True
-        instances = self.instances
-        num_instances = self.num_instances
-        main_inst = self.main_inst
-        main_conf = main_inst.config
-        backbone = self.train_backbone
-        train_program = self.train_program
-        saver_program = self.saver_program
-        fetches = self.fetches
-        # max_train_steps = self.max_train_steps
-        # steps_pur_epoch = self.steps_pur_epoch
-        finish = []
-        for inst in instances:
-            if inst.is_target:
-                finish.append(False)
-        def train_finish():
-            for inst in instances:
-                if inst.is_target:
-                    if not inst.train_finish:
-                        return False
-            return True
-        # do training
-        # loss_fetches = {inst.name+'/loss': inst.task_layer['train'].loss for inst in instances}
-        # old = len(fetches) # for debug
-        # fetches.update(loss_fetches)
-        # assert len(fetches) == old + len(loss_fetches) # for debug and avoid user-caused bug
-        # assert 'task_id' not in fetches # for debug and avoid user-caused bug
-        # fetches['task_id'] = task_id_var
-        fetch_names, fetch_list = zip(*fetches.items())
-        main_step = 0 # only count for main task
-        global_step = 0 # count for all tasks
-        epoch = 0
-        time_begin = time.time()
-        backbone_buffer = []
-        while not train_finish():
-            rt_outputs = self.exe.run(train_program, fetch_list=fetch_list)
-            rt_outputs = {k:v for k,v in zip(fetch_names, rt_outputs)}
-            rt_task_id = np.squeeze(rt_outputs['__task_id']).tolist()
-            assert (not isinstance(rt_task_id, list)) or len(set(rt_task_id)) == 1, rt_task_id
-            rt_task_id = rt_task_id[0] if isinstance(rt_task_id, list) else rt_task_id
-            cur_task = instances[rt_task_id]
-            backbone_rt_outputs = {k:v for k,v in rt_outputs.items() if '/' not in k}
-            backbone_buffer.append(backbone.postprocess(backbone_rt_outputs))
-            task_rt_outputs = {k[len(cur_task.name+'/'):]: v for k,v in rt_outputs.items() if k.startswith(cur_task.name+'/')}
-            instances[rt_task_id].task_layer['train'].postprocess(task_rt_outputs)
-            global_step += 1
-            # if cur_task.is_target:
-            cur_task.cur_train_step += 1
-            if global_step % main_conf.get('print_every_n_steps', 5) == 0:
-                loss = rt_outputs[cur_task.name+'/loss']
-                loss = np.mean(np.squeeze(loss)).tolist()
-                time_end = time.time()
-                time_cost = time_end - time_begin
-                print("Global step: {}. Task: {}, step {}/{} (epoch {}), loss: {:.3f}, speed: {:.2f} steps/s".format(
-                       global_step, cur_task.name, cur_task.cur_train_step, cur_task.steps_pur_epoch, cur_task.cur_train_epoch,
-                       loss, main_conf.get('print_every_n_steps', 5) / time_cost))
-                time_begin = time.time()
-            if 'save_every_n_steps' in main_conf and global_step % main_conf['save_every_n_steps'] == 0:
-                save_path = os.path.join(main_conf['save_path'],
-                                         "step_" + str(global_step))
-                fluid.io.save_persistables(self.exe, save_path, saver_program)
-        save_path = os.path.join(main_conf['save_path'],
-                                 "step_" + str(global_step) + "_final")
-        fluid.io.save_persistables(self.exe, save_path, saver_program)
-    def pred(self, task_instance, inference_model_dir=None):
-        if self._for_train:
-            raise Exception('This controller is a trainer. Please build a new controller with for_train=False for predicting.')
-        assert isinstance(task_instance, str)
-        if isinstance(inference_model_dir, str):
-            assert os.path.exists(inference_model_dir), inference_model_dir+" not found."
-        if not self.has_init_pred and inference_model_dir is None:
-            raise ValueError('infer_model_path is required for prediction.')
-        instance = None
-        for inst in self.instances:
-            if inst.name == task_instance:
-                instance = inst
-                break
-        if instance is None:
-            raise ValueError(task_instance + ' is not a valid task_instance.')
-        pred_prog = self._init_pred(instance, inference_model_dir)
-        inst = instance
-        inst.reader['pred'].load_data()
-        fetch_names, fetch_vars = inst.pred_fetch_list
-        # iterator = create_iterator_fn(inst.reader['pred'].iterator, inst.name, pred_joint_shape_and_dtypes, name_to_position)
-        mapper = {k:v for k,v in inst.pred_input}
-        buf = []
-        for feed in inst.reader['pred'].iterator():
-            feed = _encode_inputs(feed, inst.name, cand_set=mapper)
-            feed = {mapper[k]: v for k,v in feed.items()}
-            rt_outputs = self.exe.run(pred_prog, feed, fetch_vars)
-            rt_outputs = {k:v for k,v in zip(fetch_names, rt_outputs)}
-            inst.postprocess(rt_outputs, phase='pred')
-        reader_outputs = inst.reader['pred'].get_epoch_outputs()
-        inst.epoch_postprocess({'reader':reader_outputs}, phase='pred')
-if __name__ == '__main__':
-    assert len(sys.argv) == 2, "Usage: python mtl_controller.py <mtl_conf_path>"
-    conf_path = sys.argv[1]
-    del sys.argv[1]
-    controller = Controller(conf_path)
-    if controller.main_conf['do_train']:
-        controller.train()
--- a/build/lib/paddlepalm/optimizer/__init__.py
+++ b/build/lib/paddlepalm/optimizer/__init__.py
--- a/build/lib/paddlepalm/optimizer/adam.py
+++ b/build/lib/paddlepalm/optimizer/adam.py
-# -*- coding: UTF-8 -*-
-#   Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserved.
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#     http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-"""Optimization and learning rate scheduling."""
-from __future__ import absolute_import
-from __future__ import division
-from __future__ import print_function
-import numpy as np
-import paddle.fluid as fluid
-def linear_warmup_decay(learning_rate, warmup_steps, num_train_steps):
-    """ Applies linear warmup of learning rate from 0 and decay to 0."""
-    with fluid.default_main_program()._lr_schedule_guard():
-        lr = fluid.layers.tensor.create_global_var(
-            shape=[1],
-            value=0.0,
-            dtype='float32',
-            persistable=True,
-            name="scheduled_learning_rate")
-        global_step = fluid.layers.learning_rate_scheduler._decay_step_counter()
-        with fluid.layers.control_flow.Switch() as switch:
-            with switch.case(global_step < warmup_steps):
-                warmup_lr = learning_rate * (global_step / warmup_steps)
-                fluid.layers.tensor.assign(warmup_lr, lr)
-            with switch.default():
-                decayed_lr = fluid.layers.learning_rate_scheduler.polynomial_decay(
-                    learning_rate=learning_rate,
-                    decay_steps=num_train_steps,
-                    end_learning_rate=0.0,
-                    power=1.0,
-                    cycle=False)
-                fluid.layers.tensor.assign(decayed_lr, lr)
-        return lr
-def optimize(loss, config, max_train_steps=None, warmup_steps=0, train_program=None):
-    if warmup_steps > 0:
-        decay_strategy = config.get('lr_scheduler', 'linear_warmup_decay')
-        if decay_strategy == 'noam_decay':
-            scheduled_lr = fluid.layers.learning_rate_scheduler\
-             .noam_decay(1/(warmup_steps *(config['learning_rate'] ** 2)),
-                         warmup_steps)
-        elif decay_strategy == 'linear_warmup_decay':
-            scheduled_lr = linear_warmup_decay(config['learning_rate'], warmup_steps,
-                                               max_train_steps)
-        else:
-            raise ValueError("Unkown lr_scheduler, should be "
-                             "'noam_decay' or 'linear_warmup_decay'")
-        optimizer = fluid.optimizer.Adam(learning_rate=scheduled_lr)
-    else:
-        optimizer = fluid.optimizer.Adam(learning_rate=config['learning_rate'])
-        scheduled_lr = config['learning_rate']
-    clip_norm_thres = 1.0
-    # When using mixed precision training, scale the gradient clip threshold
-    # by loss_scaling
-    fluid.clip.set_gradient_clip(
-        clip=fluid.clip.GradientClipByGlobalNorm(clip_norm=clip_norm_thres))
-    def exclude_from_weight_decay(name):
-        if name.find("layer_norm") > -1:
-            return True
-        bias_suffix = ["_bias", "_b", ".b_0"]
-        for suffix in bias_suffix:
-            if name.endswith(suffix):
-                return True
-        return False
-    param_list = dict()
-    for param in train_program.global_block().all_parameters():
-        param_list[param.name] = param * 1.0
-        param_list[param.name].stop_gradient = True
-    _, param_grads = optimizer.minimize(loss)
-    for block in fluid.default_main_program().blocks:
-        for var_name in block.vars:
-            if var_name.startswith("embedding"):
-                print(block.vars[var_name])
-    if config.get('weight_decay', 0) > 0:
-        for param, grad in param_grads:
-            if exclude_from_weight_decay(param.name):
-                continue
-            with param.block.program._optimized_guard(
-                [param, grad]), fluid.framework.name_scope("weight_decay"):
-                updated_param = param - param_list[
-                    param.name] * config['weight_decay'] * scheduled_lr
-                fluid.layers.assign(output=param, input=updated_param)
--- a/build/lib/paddlepalm/reader/__init__.py
+++ b/build/lib/paddlepalm/reader/__init__.py
--- a/build/lib/paddlepalm/reader/cls4bert.py
+++ b/build/lib/paddlepalm/reader/cls4bert.py
--- a/build/lib/paddlepalm/reader/match4ernie.py
+++ b/build/lib/paddlepalm/reader/match4ernie.py
-# -*- coding: UTF-8 -*-
-#   Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserved.
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#     http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-from paddlepalm.interface import reader
-from paddlepalm.reader.utils.reader4ernie import ClassifyReader
-class Reader(reader):
-    def __init__(self, config, phase='train', dev_count=1, print_prefix=''):
-        """
-        Args:
-            phase: train, eval, pred
-            """
-        self._is_training = phase == 'train'
-        reader = ClassifyReader(config['vocab_path'],
-            max_seq_len=config['max_seq_len'],
-            do_lower_case=config.get('do_lower_case', False),
-            for_cn=config.get('for_cn', False),
-            random_seed=config.get('seed', None))
-        self._reader = reader
-        self._dev_count = dev_count
-        self._batch_size = config['batch_size']
-        self._max_seq_len = config['max_seq_len']
-        if phase == 'train':
-            self._input_file = config['train_file']
-            self._num_epochs = None # 防止iteartor终止
-            self._shuffle = config.get('shuffle', False)
-            self._shuffle_buffer = config.get('shuffle_buffer', 5000)
-        elif phase == 'eval':
-            self._input_file = config['dev_file']
-            self._num_epochs = 1
-            self._shuffle = False
-            self._batch_size = config.get('pred_batch_size', self._batch_size)
-        elif phase == 'pred':
-            self._input_file = config['pred_file']
-            self._num_epochs = 1
-            self._shuffle = False
-            self._batch_size = config.get('pred_batch_size', self._batch_size)
-        self._phase = phase
-        # self._batch_size = 
-        self._print_first_n = config.get('print_first_n', 1)
-    @property
-    def outputs_attr(self):
-        if self._is_training:
-            return {"token_ids": [[-1, -1, 1], 'int64'],
-                    "position_ids": [[-1, -1, 1], 'int64'],
-                    "segment_ids": [[-1, -1, 1], 'int64'],
-                    "input_mask": [[-1, -1, 1], 'float32'],
-                    "label_ids": [[-1,1], 'int64'],
-                    "task_ids": [[-1, -1, 1], 'int64']
-                    }
-        else:
-            return {"token_ids": [[-1, -1, 1], 'int64'],
-                    "position_ids": [[-1, -1, 1], 'int64'],
-                    "segment_ids": [[-1, -1, 1], 'int64'],
-                    "task_ids": [[-1, -1, 1], 'int64'],
-                    "input_mask": [[-1, -1, 1], 'float32']
-                    }
-    def load_data(self):
-        self._data_generator = self._reader.data_generator(self._input_file, self._batch_size, self._num_epochs, dev_count=self._dev_count, shuffle=self._shuffle, phase=self._phase)
-    def iterator(self): 
-        def list_to_dict(x):
-            names = ['token_ids', 'segment_ids', 'position_ids', 'task_ids', 'input_mask', 
-                'label_ids', 'unique_ids']
-            outputs = {n: i for n,i in zip(names, x)}
-            del outputs['unique_ids']
-            if not self._is_training:
-                del outputs['label_ids']
-            return outputs
-        for batch in self._data_generator():
-            yield list_to_dict(batch)
-    def get_epoch_outputs(self):
-        return {'examples': self._reader.get_examples(self._phase),
-                'features': self._reader.get_features(self._phase)}
-    @property
-    def num_examples(self):
-        return self._reader.get_num_examples(phase=self._phase)
--- a/build/lib/paddlepalm/reader/mlm.py
+++ b/build/lib/paddlepalm/reader/mlm.py
-# -*- coding: UTF-8 -*-
-#   Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserved.
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#     http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-from paddlepalm.interface import reader
-from paddlepalm.reader.utils.reader4ernie import BaseReader
-class Reader(reader):
-    def __init__(self, config, phase='train', dev_count=1, print_prefix=''):
-        """
-        Args:
-            phase: train, eval, pred
-            """
-        self._is_training = phase == 'train'
-        reader = ClassifyReader(config['vocab_path'],
-            max_seq_len=config['max_seq_len'],
-            do_lower_case=config.get('do_lower_case', False),
-            for_cn=config.get('for_cn', False),
-            random_seed=config.get('seed', None))
-        self._reader = reader
-        self._dev_count = dev_count
-        self._batch_size = config['batch_size']
-        self._max_seq_len = config['max_seq_len']
-        if phase == 'train':
-            self._input_file = config['train_file']
-            self._num_epochs = None # 防止iteartor终止
-            self._shuffle = config.get('shuffle', False)
-            self._shuffle_buffer = config.get('shuffle_buffer', 5000)
-        elif phase == 'eval':
-            self._input_file = config['dev_file']
-            self._num_epochs = 1
-            self._shuffle = False
-            self._batch_size = config.get('pred_batch_size', self._batch_size)
-        elif phase == 'pred':
-            self._input_file = config['pred_file']
-            self._num_epochs = 1
-            self._shuffle = False
-            self._batch_size = config.get('pred_batch_size', self._batch_size)
-        self._phase = phase
-        # self._batch_size = 
-        self._print_first_n = config.get('print_first_n', 1)
-    @property
-    def outputs_attr(self):
-        if self._is_training:
-            return {"token_ids": [[-1, -1, 1], 'int64'],
-                    "position_ids": [[-1, -1, 1], 'int64'],
-                    "segment_ids": [[-1, -1, 1], 'int64'],
-                    "input_mask": [[-1, -1, 1], 'float32'],
-                    "label_ids": [[-1,1], 'int64'],
-                    "task_ids": [[-1, -1, 1], 'int64']
-                    }
-        else:
-            return {"token_ids": [[-1, -1, 1], 'int64'],
-                    "position_ids": [[-1, -1, 1], 'int64'],
-                    "segment_ids": [[-1, -1, 1], 'int64'],
-                    "task_ids": [[-1, -1, 1], 'int64'],
-                    "input_mask": [[-1, -1, 1], 'float32']
-                    }
-    def load_data(self):
-        self._data_generator = self._reader.data_generator(self._input_file, self._batch_size, self._num_epochs, dev_count=self._dev_count, shuffle=self._shuffle, phase=self._phase)
-    def iterator(self): 
-        def list_to_dict(x):
-            names = ['token_ids', 'position_ids', 'segment_ids', 'input_mask', 
-                'task_ids', 'mask_label', 'mask_pos']
-            outputs = {n: i for n,i in zip(names, x)}
-            del outputs['unique_ids']
-            if not self._is_training:
-                del outputs['label_ids']
-            return outputs
-        for batch in self._data_generator():
-            yield list_to_dict(batch)
-    def get_epoch_outputs(self):
-        return {'examples': self._reader.get_examples(self._phase),
-                'features': self._reader.get_features(self._phase)}
-    @property
-    def num_examples(self):
-        return self._reader.get_num_examples(phase=self._phase)
--- a/build/lib/paddlepalm/reader/mrc4bert.py
+++ b/build/lib/paddlepalm/reader/mrc4bert.py
-# -*- coding: UTF-8 -*-
-#   Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserved.
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#     http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-from __future__ import absolute_import
-from paddlepalm.interface import reader
-from paddlepalm.utils.textprocess_helper import is_whitespace
-from paddlepalm.reader.utils.mrqa_helper import MRQAExample, MRQAFeature
-import paddlepalm.tokenizer.bert_tokenizer as tokenization
-class Reader(reader):
-    def __init__(self, config, phase='train', dev_count=1, print_prefix=''):
-        """
-        Args:
-            phase: train, eval, pred
-            """
-        self._is_training = phase == 'train'
-        self._tokenizer = tokenization.FullTokenizer(
-            vocab_file=config['vocab_path'], do_lower_case=config.get('do_lower_case', False))
-        self._max_seq_length = config['max_seq_len']
-        self._doc_stride = config['doc_stride']
-        self._max_query_length = config['max_query_len']
-        if phase == 'train':
-            self._input_file = config['train_file']
-            self._num_epochs = config['num_epochs']
-            self._shuffle = config.get('shuffle', False)
-            self._shuffle_buffer = config.get('shuffle_buffer', 5000)
-        if phase == 'eval':
-            self._input_file = config['dev_file']
-            self._num_epochs = 1
-            self._shuffle = False
-        elif phase == 'pred':
-            self._input_file = config['predict_file']
-            self._num_epochs = 1
-            self._shuffle = False
-        # self._batch_size = 
-        self._batch_size = config['batch_size']
-        self._pred_batch_size = config.get('pred_batch_size', self._batch_size)
-        self._print_first_n = config.get('print_first_n', 1)
-        self._with_negative = config.get('with_negative', False)
-        self._sample_rate = config.get('sample_rate', 0.02)
-        # TODO: without slide window version
-        self._with_slide_window = config.get('with_slide_window', False)
-        self.vocab = self._tokenizer.vocab
-        self.vocab_size = len(self.vocab)
-        self.pad_id = self.vocab["[PAD]"]
-        self.cls_id = self.vocab["[CLS]"]
-        self.sep_id = self.vocab["[SEP]"]
-        self.mask_id = self.vocab["[MASK]"]
-        self.current_train_example = -1
-        self.num_train_examples = -1
-        self.current_train_epoch = -1
-        self.n_examples = None
-        print(print_prefix + 'reading raw data...')
-        with open(input_file, "r") as reader:
-            self.raw_data = json.load(reader)["data"]
-        print(print_prfix + 'done!')
-    @property
-    def outputs_attr(self):
-        if self._is_training:
-            return {"token_ids": [[-1, self.max_seq_len, 1], 'int64'],
-                    "position_ids": [[-1, self.max_seq_len, 1], 'int64'],
-                    "segment_ids": [[-1, self.max_seq_len, 1], 'int64'],
-                    "input_mask": [[-1, self.max_seq_len, 1], 'float32'],
-                    "start_positions": [[-1, self.max_seq_len, 1], 'int64'],
-                    "end_positions": [[-1, self.max_seq_len, 1], 'int64']
-                    }
-        else:
-            return {"token_ids": [[-1, self.max_seq_len, 1], 'int64'],
-                    "position_ids": [[-1, self.max_seq_len, 1], 'int64'],
-                    "segment_ids": [[-1, self.max_seq_len, 1], 'int64'],
-                    "input_mask": [[-1, self.max_seq_len, 1], 'float32'],
-                    "unique_ids": [[-1, 1], 'int64']
-                    }
-    def iterator(self): 
-        features = []
-        for i in self._num_epochs:
-            if self._is_training:
-                print(self.print_prefix + '{} epoch {} {}'.format('-'*16, i, '-'*16))
-            example_id = 0
-            feature_id = 1000000000
-            for line in self.train_file:
-                raw = self.parse_line(line)
-                examples = _raw_to_examples(raw['context'], raw['qa_list'], is_training=self._is_training)
-                for example in examples:
-                    features.extend(_example_to_features(example, example_id, self._tokenizer, \
-                                        self._max_seq_length, self._doc_stride, self._max_query_length, \
-                                        id_offset=1000000000+len(features), is_training=self._is_training))
-                    if len(features) >= self._batch_size * self._dev_count:
-                        for batch, total_token_num in _features_to_batches( \
-                                                        features[:self._batch_size * self._dev_count], \
-                                                        batch_size, in_tokens=self._in_tokens):
-                            temp = prepare_batch_data(batch, total_token_num, \
-                                    max_len=self._max_seq_length, voc_size=-1, \
-                                    pad_id=self.pad_id, cls_id=self.cls_id, sep_id=self.sep_id, mask_id=-1, \
-                                    return_input_mask=True, return_max_len=False, return_num_token=False)
-                            if self._is_training:
-                                tok_ids, pos_ids, seg_ids, input_mask, start_positions, end_positions = temp
-                                yield {"token_ids": tok_ids, "position_ids": pos_ids, "segment_ids": seg_ids, "input_mask": input_mask, "start_positions": start_positions, 'end_positions': end_positions}
-                            else:
-                                tok_ids, pos_ids, seg_ids, input_mask, unique_ids = temp
-                                yield {"token_ids": tok_ids, "position_ids": pos_ids, "segment_ids": seg_ids, "input_mask": input_mask, "unique_ids": unique_ids}
-                        features = features[self._batch_size * self._dev_count:]
-                    example_id += 1
-        # The last batch may be discarded when running with distributed prediction, so we build some fake batches for the last prediction step.
-        if self._is_training and len(features) > 0:
-            pred_batches = []
-            for batch, total_token_num in _features_to_batches( \
-                                            features[:self._batch_size * self._dev_count], \
-                                            batch_size, in_tokens=self._in_tokens):
-                pred_batches.append(prepare_batch_data(batch, total_token_num, max_len=self._max_seq_length, voc_size=-1,
-                                        pad_id=self.pad_id, cls_id=self.cls_id, sep_id=self.sep_id, mask_id=-1, \
-                                        return_input_mask=True, return_max_len=False, return_num_token=False))
-            fake_batch = pred_batches[-1]
-            fake_batch = fake_batch[:-1] + [np.array([-1]*len(fake_batch[0]))]
-            pred_batches = pred_batches + [fake_batch] * (dev_count - len(pred_batches))
-            for batch in pred_batches:
-                yield batch
-    @property
-    def num_examples(self):
-        if self.n_examples is None:
-            self.n_examples = _estimate_runtime_examples(self.raw_data, self._sample_rate, self._tokenizer, \
-                                  self._max_seq_length, self._doc_stride, self._max_query_length, \
-                                  remove_impossible_questions=True, filter_invalid_spans=True)
-        return self.n_examples
-        # return math.ceil(n_examples * self._num_epochs / float(self._batch_size * self._dev_count))
-def _raw_to_examples(context, qa_list, is_training=True, remove_impossible_questions=True, filter_invalid_spans=True):
-    """
-    Args:
-        context: (str) the paragraph that provide information for QA
-        qa_list: (list) nested dict. Each element in qa_list should contain at least 'id' and 'question'. And the ....
-        """
-    examples = []
-    doc_tokens = []
-    char_to_word_offset = []
-    prev_is_whitespace = True
-    for c in context:
-        if is_whitespace(c):
-            prev_is_whitespace = True
-        else:
-            if prev_is_whitespace:
-                doc_tokens.append(c)
-            else:
-                doc_tokens[-1] += c
-            prev_is_whitespace = False
-        char_to_word_offset.append(len(doc_tokens) - 1)
-    for qa in qa_list:
-        qas_id = qa["id"]
-        question_text = qa["question"]
-        start_position = None
-        end_position = None
-        orig_answer_text = None
-        is_impossible = False
-        if is_training:
-            assert len(qa["answers"]) == 1, "For training, each question should have exactly 1 answer."
-            if ('is_impossible' in qa) and (qa["is_impossible"]):
-                if remove_impossible_questions or filter_invalid_spans:
-                    continue
-                else:
-                    start_position = -1
-                    end_position = -1
-                    orig_answer_text = ""
-                    is_impossible = True
-            else:
-                answer = qa["answers"][0]
-                orig_answer_text = answer["text"]
-                answer_offset = answer["answer_start"]
-                answer_length = len(orig_answer_text)
-                start_position = char_to_word_offset[answer_offset]
-                end_position = char_to_word_offset[answer_offset +
-                                                   answer_length - 1]
-                # remove corrupt samples
-                actual_text = " ".join(doc_tokens[start_position:(
-                    end_position + 1)])
-                cleaned_answer_text = " ".join(
-                    tokenization.whitespace_tokenize(orig_answer_text))
-                if actual_text.find(cleaned_answer_text) == -1:
-                    print(self.print_prefix + "Could not find answer: '%s' vs. '%s'",
-                          actual_text, cleaned_answer_text)
-                    continue
-        examples.append(MRQAExample(
-            qas_id=qas_id,
-            question_text=question_text,
-            doc_tokens=doc_tokens,
-            orig_answer_text=orig_answer_text,
-            start_position=start_position,
-            end_position=end_position,
-            is_impossible=is_impossible))
-    return examples
-def _example_to_features(example, example_id, tokenizer, max_seq_length, doc_stride, max_query_length, id_offset, is_training):
-    query_tokens = tokenizer.tokenize(example.question_text)
-    if len(query_tokens) > max_query_length:
-        query_tokens = query_tokens[0:max_query_length]
-    tok_to_orig_index = []
-    orig_to_tok_index = []
-    all_doc_tokens = []
-    for (i, token) in enumerate(example.doc_tokens):
-        orig_to_tok_index.append(len(all_doc_tokens))
-        sub_tokens = tokenizer.tokenize(token)
-        for sub_token in sub_tokens:
-            tok_to_orig_index.append(i)
-            all_doc_tokens.append(sub_token)
-    tok_start_position = None
-    tok_end_position = None
-    if is_training and example.is_impossible:
-        tok_start_position = -1
-        tok_end_position = -1
-    if is_training and not example.is_impossible:
-        tok_start_position = orig_to_tok_index[example.start_position]
-        if example.end_position < len(example.doc_tokens) - 1:
-            tok_end_position = orig_to_tok_index[example.end_position +
-                                                 1] - 1
-        else:
-            tok_end_position = len(all_doc_tokens) - 1
-        (tok_start_position, tok_end_position) = _improve_answer_span(
-            all_doc_tokens, tok_start_position, tok_end_position, tokenizer,
-            example.orig_answer_text)
-    # The -3 accounts for [CLS], [SEP] and [SEP]
-    max_tokens_for_doc = max_seq_length - len(query_tokens) - 3
-    # We can have documents that are longer than the maximum sequence length.
-    # To deal with this we do a sliding window approach, where we take chunks
-    # of the up to our max length with a stride of `doc_stride`.
-    _DocSpan = collections.namedtuple(  # pylint: disable=invalid-name
-        "DocSpan", ["start", "length"])
-    doc_spans = []
-    start_offset = 0
-    while start_offset < len(all_doc_tokens):
-        length = len(all_doc_tokens) - start_offset
-        if length > max_tokens_for_doc:
-            length = max_tokens_for_doc
-        doc_spans.append(_DocSpan(start=start_offset, length=length))
-        if start_offset + length == len(all_doc_tokens):
-            break
-        start_offset += min(length, doc_stride)
-    for (doc_span_index, doc_span) in enumerate(doc_spans):
-        tokens = []
-        token_to_orig_map = {}
-        token_is_max_context = {}
-        segment_ids = []
-        tokens.append("[CLS]")
-        segment_ids.append(0)
-        for token in query_tokens:
-            tokens.append(token)
-            segment_ids.append(0)
-        tokens.append("[SEP]")
-        segment_ids.append(0)
-        for i in range(doc_span.length):
-            split_token_index = doc_span.start + i
-            token_to_orig_map[len(tokens)] = tok_to_orig_index[
-                split_token_index]
-            is_max_context = _check_is_max_context(
-                doc_spans, doc_span_index, split_token_index)
-            token_is_max_context[len(tokens)] = is_max_context
-            tokens.append(all_doc_tokens[split_token_index])
-            segment_ids.append(1)
-        tokens.append("[SEP]")
-        segment_ids.append(1)
-        input_ids = tokenizer.convert_tokens_to_ids(tokens)
-        # The mask has 1 for real tokens and 0 for padding tokens. Only real
-        # tokens are attended to.
-        input_mask = [1] * len(input_ids)
-        # Zero-pad up to the sequence length.
-        #while len(input_ids) < max_seq_length:
-        #  input_ids.append(0)
-        #  input_mask.append(0)
-        #  segment_ids.append(0)
-        #assert len(input_ids) == max_seq_length
-        #assert len(input_mask) == max_seq_length
-        #assert len(segment_ids) == max_seq_length
-        start_position = None
-        end_position = None
-        if is_training and not example.is_impossible:
-            # For training, if our document chunk does not contain an annotation
-            # we throw it out, since there is nothing to predict.
-            doc_start = doc_span.start
-            doc_end = doc_span.start + doc_span.length - 1
-            out_of_span = False
-            if not (tok_start_position >= doc_start and
-                    tok_end_position <= doc_end):
-                out_of_span = True
-            if out_of_span:
-                start_position = 0
-                end_position = 0
-                continue
-            else:
-                doc_offset = len(query_tokens) + 2
-                start_position = tok_start_position - doc_start + doc_offset
-                end_position = tok_end_position - doc_start + doc_offset
-        if is_training and example.is_impossible:
-            start_position = 0
-            end_position = 0
-        def format_print():
-            print("*** Example ***")
-            print("unique_id: %s" % (unique_id))
-            print("example_index: %s" % (example_index))
-            print("doc_span_index: %s" % (doc_span_index))
-            print("tokens: %s" % " ".join(
-                [tokenization.printable_text(x) for x in tokens]))
-            print("token_to_orig_map: %s" % " ".join([
-                "%d:%d" % (x, y)
-                for (x, y) in six.iteritems(token_to_orig_map)
-            ]))
-            print("token_is_max_context: %s" % " ".join([
-                "%d:%s" % (x, y)
-                for (x, y) in six.iteritems(token_is_max_context)
-            ]))
-            print("input_ids: %s" % " ".join([str(x) for x in input_ids]))
-            print("input_mask: %s" % " ".join([str(x) for x in input_mask]))
-            print("segment_ids: %s" %
-                  " ".join([str(x) for x in segment_ids]))
-            if is_training and example.is_impossible:
-                print("impossible example")
-            if is_training and not example.is_impossible:
-                answer_text = " ".join(tokens[start_position:(end_position +
-                                                              1)])
-                print("start_position: %d" % (start_position))
-                print("end_position: %d" % (end_position))
-                print("answer: %s" %
-                      (tokenization.printable_text(answer_text)))
-        if self._print_first_n > 0:
-            format_print()
-            self._print_first_n -= 1
-        features.append(MRQAFeature(
-            unique_id=id_offset,
-            example_index=example_id,
-            doc_span_index=doc_span_index,
-            tokens=tokens,
-            token_to_orig_map=token_to_orig_map,
-            token_is_max_context=token_is_max_context,
-            input_ids=input_ids,
-            input_mask=input_mask,
-            segment_ids=segment_ids,
-            start_position=start_position,
-            end_position=end_position,
-            is_impossible=example.is_impossible))
-        id_offset += 1
-    return features
-def _features_to_batches(features, batch_size, in_tokens):
-    batch, total_token_num, max_len = [], 0, 0
-    for (index, feature) in enumerate(features):
-        if phase == 'train':
-            self.current_train_example = index + 1
-        seq_len = len(feature.input_ids)
-        labels = [feature.unique_id
-                  ] if feature.start_position is None else [
-                      feature.start_position, feature.end_position
-                  ]
-        example = [
-            feature.input_ids, feature.segment_ids, range(seq_len)
-        ] + labels
-        max_len = max(max_len, seq_len)
-        if in_tokens:
-            to_append = (len(batch) + 1) * max_len <= batch_size
-        else:
-            to_append = len(batch) < batch_size
-        if to_append:
-            batch.append(example)
-            total_token_num += seq_len
-        else:
-            yield batch, total_token_num
-            batch, total_token_num, max_len = [example
-                                               ], seq_len, seq_len
-    if len(batch) > 0:
-        yield batch, total_token_num
-def _estimate_runtime_examples(data, sample_rate, tokenizer, \
-                              max_seq_length, doc_stride, max_query_length, \
-                              remove_impossible_questions=True, filter_invalid_spans=True):
-    """Count runtime examples which may differ from number of raw samples due to sliding window operation and etc.. 
-       This is useful to get correct warmup steps for training."""
-    assert sample_rate > 0.0 and sample_rate <= 1.0, "sample_rate must be set between 0.0~1.0"
-    num_raw_examples = 0
-    for entry in data:
-        for paragraph in entry["paragraphs"]:
-            paragraph_text = paragraph["context"]
-            for qa in paragraph["qas"]:
-                num_raw_examples += 1
-    # print("num raw examples:{}".format(num_raw_examples))
-    def is_whitespace(c):
-        if c == " " or c == "\t" or c == "\r" or c == "\n" or ord(c) == 0x202F:
-            return True
-        return False
-    sampled_examples = []
-    first_samp = True
-    for entry in data:
-        for paragraph in entry["paragraphs"]:
-            doc_tokens = None
-            for qa in paragraph["qas"]:
-                if not first_samp and random.random() > sample_rate and sample_rate < 1.0:
-                    continue
-                if doc_tokens is None:
-                    paragraph_text = paragraph["context"]
-                    doc_tokens = []
-                    char_to_word_offset = []
-                    prev_is_whitespace = True
-                    for c in paragraph_text:
-                        if is_whitespace(c):
-                            prev_is_whitespace = True
-                        else:
-                            if prev_is_whitespace:
-                                doc_tokens.append(c)
-                            else:
-                                doc_tokens[-1] += c
-                            prev_is_whitespace = False
-                        char_to_word_offset.append(len(doc_tokens) - 1)
-                assert len(qa["answers"]) == 1, "For training, each question should have exactly 1 answer."
-                qas_id = qa["id"]
-                question_text = qa["question"]
-                start_position = None
-                end_position = None
-                orig_answer_text = None
-                is_impossible = False
-                if ('is_impossible' in qa) and (qa["is_impossible"]):
-                    if remove_impossible_questions or filter_invalid_spans:
-                        continue
-                    else:
-                        start_position = -1
-                        end_position = -1
-                        orig_answer_text = ""
-                        is_impossible = True
-                else:
-                    answer = qa["answers"][0]
-                    orig_answer_text = answer["text"]
-                    answer_offset = answer["answer_start"]
-                    answer_length = len(orig_answer_text)
-                    start_position = char_to_word_offset[answer_offset]
-                    end_position = char_to_word_offset[answer_offset +
-                                                       answer_length - 1]
-                    # remove corrupt samples
-                    actual_text = " ".join(doc_tokens[start_position:(
-                        end_position + 1)])
-                    cleaned_answer_text = " ".join(
-                        tokenization.whitespace_tokenize(orig_answer_text))
-                    if actual_text.find(cleaned_answer_text) == -1:
-                        continue
-                example = MRQAExample(
-                    qas_id=qas_id,
-                    question_text=question_text,
-                    doc_tokens=doc_tokens,
-                    orig_answer_text=orig_answer_text,
-                    start_position=start_position,
-                    end_position=end_position,
-                    is_impossible=is_impossible)
-                sampled_examples.append(example)
-                first_samp = False
-    runtime_sample_rate = len(sampled_examples) / float(num_raw_examples)
-    runtime_samp_cnt = 0
-    for example in sampled_examples:
-        query_tokens = tokenizer.tokenize(example.question_text)
-        if len(query_tokens) > max_query_length:
-            query_tokens = query_tokens[0:max_query_length]
-        tok_to_orig_index = []
-        orig_to_tok_index = []
-        all_doc_tokens = []
-        for (i, token) in enumerate(example.doc_tokens):
-            orig_to_tok_index.append(len(all_doc_tokens))
-            sub_tokens = tokenizer.tokenize(token)
-            for sub_token in sub_tokens:
-                tok_to_orig_index.append(i)
-                all_doc_tokens.append(sub_token)
-        tok_start_position = None
-        tok_end_position = None
-        tok_start_position = orig_to_tok_index[example.start_position]
-        if example.end_position < len(example.doc_tokens) - 1:
-            tok_end_position = orig_to_tok_index[example.end_position + 1] - 1
-        else:
-            tok_end_position = len(all_doc_tokens) - 1
-        (tok_start_position, tok_end_position) = _improve_answer_span(
-            all_doc_tokens, tok_start_position, tok_end_position, tokenizer,
-            example.orig_answer_text)
-        # The -3 accounts for [CLS], [SEP] and [SEP]
-        max_tokens_for_doc = max_seq_length - len(query_tokens) - 3
-        _DocSpan = collections.namedtuple(  # pylint: disable=invalid-name
-            "DocSpan", ["start", "length"])
-        doc_spans = []
-        start_offset = 0
-        while start_offset < len(all_doc_tokens):
-            length = len(all_doc_tokens) - start_offset
-            if length > max_tokens_for_doc:
-                length = max_tokens_for_doc
-            doc_spans.append(_DocSpan(start=start_offset, length=length))
-            if start_offset + length == len(all_doc_tokens):
-                break
-            start_offset += min(length, doc_stride)
-        for (doc_span_index, doc_span) in enumerate(doc_spans):
-            doc_start = doc_span.start
-            doc_end = doc_span.start + doc_span.length - 1
-            if filter_invalid_spans and not (tok_start_position >= doc_start and tok_end_position <= doc_end):
-                continue
-            runtime_samp_cnt += 1
-    return int(runtime_samp_cnt/runtime_sample_rate)
-def _improve_answer_span(doc_tokens, input_start, input_end, tokenizer,
-                         orig_answer_text):
-    """Returns tokenized answer spans that better match the annotated answer."""
-    # The MRQA annotations are character based. We first project them to
-    # whitespace-tokenized words. But then after WordPiece tokenization, we can
-    # often find a "better match". For example:
-    #
-    #   Question: What year was John Smith born?
-    #   Context: The leader was John Smith (1895-1943).
-    #   Answer: 1895
-    #
-    # The original whitespace-tokenized answer will be "(1895-1943).". However
-    # after tokenization, our tokens will be "( 1895 - 1943 ) .". So we can match
-    # the exact answer, 1895.
-    #
-    # However, this is not always possible. Consider the following:
-    #
-    #   Question: What country is the top exporter of electornics?
-    #   Context: The Japanese electronics industry is the lagest in the world.
-    #   Answer: Japan
-    #
-    # In this case, the annotator chose "Japan" as a character sub-span of
-    # the word "Japanese". Since our WordPiece tokenizer does not split
-    # "Japanese", we just use "Japanese" as the annotation. This is fairly rare
-    # in MRQA, but does happen.
-    tok_answer_text = " ".join(tokenizer.tokenize(orig_answer_text))
-    for new_start in range(input_start, input_end + 1):
-        for new_end in range(input_end, new_start - 1, -1):
-            text_span = " ".join(doc_tokens[new_start:(new_end + 1)])
-            if text_span == tok_answer_text:
-                return (new_start, new_end)
-    return (input_start, input_end)
-def _check_is_max_context(doc_spans, cur_span_index, position):
-    """Check if this is the 'max context' doc span for the token."""
-    # Because of the sliding window approach taken to scoring documents, a single
-    # token can appear in multiple documents. E.g.
-    #  Doc: the man went to the store and bought a gallon of milk
-    #  Span A: the man went to the
-    #  Span B: to the store and bought
-    #  Span C: and bought a gallon of
-    #  ...
-    #
-    # Now the word 'bought' will have two scores from spans B and C. We only
-    # want to consider the score with "maximum context", which we define as
-    # the *minimum* of its left and right context (the *sum* of left and
-    # right context will always be the same, of course).
-    #
-    # In the example the maximum context for 'bought' would be span C since
-    # it has 1 left context and 3 right context, while span B has 4 left context
-    # and 0 right context.
-    best_score = None
-    best_span_index = None
-    for (span_index, doc_span) in enumerate(doc_spans):
-        end = doc_span.start + doc_span.length - 1
-        if position < doc_span.start:
-            continue
-        if position > end:
-            continue
-        num_left_context = position - doc_span.start
-        num_right_context = end - position
-        score = min(num_left_context,
-                    num_right_context) + 0.01 * doc_span.length
-        if best_score is None or score > best_score:
-            best_score = score
-            best_span_index = span_index
-    return cur_span_index == best_span_index
--- a/build/lib/paddlepalm/reader/mrc4ernie.py
+++ b/build/lib/paddlepalm/reader/mrc4ernie.py
-# -*- coding: UTF-8 -*-
-#   Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserved.
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#     http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-from paddlepalm.interface import reader
-from paddlepalm.reader.utils.reader4ernie import MRCReader
-class Reader(reader):
-    def __init__(self, config, phase='train', dev_count=1, print_prefix=''):
-        """
-        Args:
-            phase: train, eval, pred
-            """
-        self._is_training = phase == 'train'
-        reader = MRCReader(config['vocab_path'],
-            max_seq_len=config['max_seq_len'],
-            do_lower_case=config.get('do_lower_case', False),
-            tokenizer='FullTokenizer',
-            for_cn=config.get('for_cn', False),
-            doc_stride=config['doc_stride'],
-            max_query_length=config['max_query_len'],
-            random_seed=config.get('seed', None))
-        self._reader = reader
-        self._dev_count = dev_count
-        self._batch_size = config['batch_size']
-        self._max_seq_len = config['max_seq_len']
-        if phase == 'train':
-            self._input_file = config['train_file']
-            # self._num_epochs = config['num_epochs']
-            self._num_epochs = None # 防止iteartor终止
-            self._shuffle = config.get('shuffle', False)
-            self._shuffle_buffer = config.get('shuffle_buffer', 5000)
-        if phase == 'eval':
-            self._input_file = config['dev_file']
-            self._num_epochs = 1
-            self._shuffle = False
-            self._batch_size = config.get('pred_batch_size', self._batch_size)
-        elif phase == 'pred':
-            self._input_file = config['pred_file']
-            self._num_epochs = 1
-            self._shuffle = False
-            self._batch_size = config.get('pred_batch_size', self._batch_size)
-        self._phase = phase
-        # self._batch_size = 
-        self._print_first_n = config.get('print_first_n', 1)
-        # TODO: without slide window version
-        self._with_slide_window = config.get('with_slide_window', False)
-    @property
-    def outputs_attr(self):
-        if self._is_training:
-            return {"token_ids": [[-1, -1, 1], 'int64'],
-                    "position_ids": [[-1, -1, 1], 'int64'],
-                    "segment_ids": [[-1, -1, 1], 'int64'],
-                    "input_mask": [[-1, -1, 1], 'float32'],
-                    "start_positions": [[-1, 1], 'int64'],
-                    "end_positions": [[-1, 1], 'int64'],
-                    "task_ids": [[-1, -1, 1], 'int64']
-                    }
-        else:
-            return {"token_ids": [[-1, -1, 1], 'int64'],
-                    "position_ids": [[-1, -1, 1], 'int64'],
-                    "segment_ids": [[-1, -1, 1], 'int64'],
-                    "task_ids": [[-1, -1, 1], 'int64'],
-                    "input_mask": [[-1, -1, 1], 'float32'],
-                    "unique_ids": [[-1, 1], 'int64']
-                    }
-    @property
-    def epoch_outputs_attr(self):
-        if not self._is_training:
-            return {"examples": None,
-                    "features": None}
-    def load_data(self):
-        self._data_generator = self._reader.data_generator(self._input_file, self._batch_size, self._num_epochs, dev_count=self._dev_count, shuffle=self._shuffle, phase=self._phase)
-    def iterator(self): 
-        def list_to_dict(x):
-            names = ['token_ids', 'segment_ids', 'position_ids', 'task_ids', 'input_mask', 
-                'start_positions', 'end_positions', 'unique_ids']
-            outputs = {n: i for n,i in zip(names, x)}
-            if self._is_training:
-                del outputs['unique_ids']
-            else:
-                del outputs['start_positions']
-                del outputs['end_positions']
-            return outputs
-        for batch in self._data_generator():
-            yield list_to_dict(batch)
-    def get_epoch_outputs(self):
-        return {'examples': self._reader.get_examples(self._phase),
-                'features': self._reader.get_features(self._phase)}
-    @property
-    def num_examples(self):
-        return self._reader.get_num_examples(phase=self._phase)
--- a/build/lib/paddlepalm/reader/utils/__init__.py
+++ b/build/lib/paddlepalm/reader/utils/__init__.py
--- a/build/lib/paddlepalm/reader/utils/batching4bert.py
+++ b/build/lib/paddlepalm/reader/utils/batching4bert.py
-# -*- coding: UTF-8 -*-
-#   Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserved.
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#     http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-"""Mask, padding and batching."""
-from __future__ import absolute_import
-from __future__ import division
-from __future__ import print_function
-import numpy as np
-def mask(batch_tokens, total_token_num, vocab_size, CLS=1, SEP=2, MASK=3):
-    """
-    Add mask for batch_tokens, return out, mask_label, mask_pos;
-    Note: mask_pos responding the batch_tokens after padded;
-    """
-    max_len = max([len(sent) for sent in batch_tokens])
-    mask_label = []
-    mask_pos = []
-    prob_mask = np.random.rand(total_token_num)
-    # Note: the first token is [CLS], so [low=1]
-    replace_ids = np.random.randint(1, high=vocab_size, size=total_token_num)
-    pre_sent_len = 0
-    prob_index = 0
-    for sent_index, sent in enumerate(batch_tokens):
-        mask_flag = False
-        prob_index += pre_sent_len
-        for token_index, token in enumerate(sent):
-            prob = prob_mask[prob_index + token_index]
-            if prob > 0.15:
-                continue
-            elif 0.03 < prob <= 0.15:
-                # mask
-                if token != SEP and token != CLS:
-                    mask_label.append(sent[token_index])
-                    sent[token_index] = MASK
-                    mask_flag = True
-                    mask_pos.append(sent_index * max_len + token_index)
-            elif 0.015 < prob <= 0.03:
-                # random replace
-                if token != SEP and token != CLS:
-                    mask_label.append(sent[token_index])
-                    sent[token_index] = replace_ids[prob_index + token_index]
-                    mask_flag = True
-                    mask_pos.append(sent_index * max_len + token_index)
-            else:
-                # keep the original token
-                if token != SEP and token != CLS:
-                    mask_label.append(sent[token_index])
-                    mask_pos.append(sent_index * max_len + token_index)
-        pre_sent_len = len(sent)
-        # ensure at least mask one word in a sentence
-        while not mask_flag:
-            token_index = int(np.random.randint(1, high=len(sent) - 1, size=1))
-            if sent[token_index] != SEP and sent[token_index] != CLS:
-                mask_label.append(sent[token_index])
-                sent[token_index] = MASK
-                mask_flag = True
-                mask_pos.append(sent_index * max_len + token_index)
-    mask_label = np.array(mask_label).astype("int64").reshape([-1, 1])
-    mask_pos = np.array(mask_pos).astype("int64").reshape([-1, 1])
-    return batch_tokens, mask_label, mask_pos
-def prepare_batch_data(insts,
-                       total_token_num,
-                       max_len=None,
-                       voc_size=0,
-                       pad_id=None,
-                       cls_id=None,
-                       sep_id=None,
-                       mask_id=None,
-                       return_input_mask=True,
-                       return_max_len=True,
-                       return_num_token=False):
-    """
-    1. generate Tensor of data
-    2. generate Tensor of position
-    3. generate self attention mask, [shape: batch_size *  max_len * max_len]
-    """
-    batch_src_ids = [inst[0] for inst in insts]
-    batch_sent_ids = [inst[1] for inst in insts]
-    batch_pos_ids = [inst[2] for inst in insts]
-    labels_list = []
-    # compatible with mrqa, whose example includes start/end positions, 
-    # or unique id
-    for i in range(3, len(insts[0]), 1):
-        labels = [inst[i] for inst in insts]
-        labels = np.array(labels).astype("int64").reshape([-1, 1])
-        labels_list.append(labels)
-    # First step: do mask without padding
-    if mask_id >= 0:
-        out, mask_label, mask_pos = mask(
-            batch_src_ids,
-            total_token_num,
-            vocab_size=voc_size,
-            CLS=cls_id,
-            SEP=sep_id,
-            MASK=mask_id)
-    else:
-        out = batch_src_ids
-    # Second step: padding
-    src_id, self_input_mask = pad_batch_data(
-        out, 
-        max_len=max_len,
-        pad_idx=pad_id, return_input_mask=True)
-    pos_id = pad_batch_data(
-        batch_pos_ids,
-        max_len=max_len,
-        pad_idx=pad_id,
-        return_pos=False,
-        return_input_mask=False)
-    sent_id = pad_batch_data(
-        batch_sent_ids,
-        max_len=max_len,
-        pad_idx=pad_id,
-        return_pos=False,
-        return_input_mask=False)
-    if mask_id >= 0:
-        return_list = [
-            src_id, pos_id, sent_id, self_input_mask, mask_label, mask_pos
-        ] + labels_list
-    else:
-        return_list = [src_id, pos_id, sent_id, self_input_mask] + labels_list
-    return return_list if len(return_list) > 1 else return_list[0]
-def pad_batch_data(insts,
-                   max_len=None,
-                   pad_idx=0,
-                   return_pos=False,
-                   return_input_mask=False,
-                   return_max_len=False,
-                   return_num_token=False):
-    """
-    Pad the instances to the max sequence length in batch, and generate the
-    corresponding position data and input mask.
-    """
-    return_list = []
-    if max_len is None:
-        max_len = max(len(inst) for inst in insts)
-    # Any token included in dict can be used to pad, since the paddings' loss
-    # will be masked out by weights and make no effect on parameter gradients.
-    inst_data = np.array([
-        list(inst) + list([pad_idx] * (max_len - len(inst))) for inst in insts
-    ])
-    return_list += [inst_data.astype("int64").reshape([-1, max_len, 1])]
-    # position data
-    if return_pos:
-        inst_pos = np.array([
-            list(range(0, len(inst))) + [pad_idx] * (max_len - len(inst))
-            for inst in insts
-        ])
-        return_list += [inst_pos.astype("int64").reshape([-1, max_len, 1])]
-    if return_input_mask:
-        # This is used to avoid attention on paddings.
-        input_mask_data = np.array([[1] * len(inst) + [0] *
-                                    (max_len - len(inst)) for inst in insts])
-        input_mask_data = np.expand_dims(input_mask_data, axis=-1)
-        return_list += [input_mask_data.astype("float32")]
-    if return_max_len:
-        return_list += [max_len]
-    if return_num_token:
-        num_token = 0
-        for inst in insts:
-            num_token += len(inst)
-        return_list += [num_token]
-    return return_list if len(return_list) > 1 else return_list[0]
-if __name__ == "__main__":
-    pass
--- a/build/lib/paddlepalm/reader/utils/batching4ernie.py
+++ b/build/lib/paddlepalm/reader/utils/batching4ernie.py
-# -*- coding: UTF-8 -*-
-#   Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserved.
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#     http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-"""Mask, padding and batching."""
-from __future__ import absolute_import
-from __future__ import division
-from __future__ import print_function
-import numpy as np
-from six.moves import xrange
-def mask(batch_tokens,
-         seg_labels,
-         mask_word_tags,
-         total_token_num,
-         vocab_size,
-         CLS=1,
-         SEP=2,
-         MASK=3):
-    """
-    Add mask for batch_tokens, return out, mask_label, mask_pos;
-    Note: mask_pos responding the batch_tokens after padded;
-    """
-    max_len = max([len(sent) for sent in batch_tokens])
-    mask_label = []
-    mask_pos = []
-    prob_mask = np.random.rand(total_token_num)
-    # Note: the first token is [CLS], so [low=1]
-    replace_ids = np.random.randint(1, high=vocab_size, size=total_token_num)
-    pre_sent_len = 0
-    prob_index = 0
-    for sent_index, sent in enumerate(batch_tokens):
-        mask_flag = False
-        mask_word = mask_word_tags[sent_index]
-        prob_index += pre_sent_len
-        if mask_word:
-            beg = 0
-            for token_index, token in enumerate(sent):
-                seg_label = seg_labels[sent_index][token_index]
-                if seg_label == 1:
-                    continue
-                if beg == 0:
-                    if seg_label != -1:
-                        beg = token_index
-                    continue
-                prob = prob_mask[prob_index + beg]
-                if prob > 0.15:
-                    pass
-                else:
-                    for index in xrange(beg, token_index):
-                        prob = prob_mask[prob_index + index]
-                        base_prob = 1.0
-                        if index == beg:
-                            base_prob = 0.15
-                        if base_prob * 0.2 < prob <= base_prob:
-                            mask_label.append(sent[index])
-                            sent[index] = MASK
-                            mask_flag = True
-                            mask_pos.append(sent_index * max_len + index)
-                        elif base_prob * 0.1 < prob <= base_prob * 0.2:
-                            mask_label.append(sent[index])
-                            sent[index] = replace_ids[prob_index + index]
-                            mask_flag = True
-                            mask_pos.append(sent_index * max_len + index)
-                        else:
-                            mask_label.append(sent[index])
-                            mask_pos.append(sent_index * max_len + index)
-                if seg_label == -1:
-                    beg = 0
-                else:
-                    beg = token_index
-        else:
-            for token_index, token in enumerate(sent):
-                prob = prob_mask[prob_index + token_index]
-                if prob > 0.15:
-                    continue
-                elif 0.03 < prob <= 0.15:
-                    # mask
-                    if token != SEP and token != CLS:
-                        mask_label.append(sent[token_index])
-                        sent[token_index] = MASK
-                        mask_flag = True
-                        mask_pos.append(sent_index * max_len + token_index)
-                elif 0.015 < prob <= 0.03:
-                    # random replace
-                    if token != SEP and token != CLS:
-                        mask_label.append(sent[token_index])
-                        sent[token_index] = replace_ids[prob_index +
-                                                        token_index]
-                        mask_flag = True
-                        mask_pos.append(sent_index * max_len + token_index)
-                else:
-                    # keep the original token
-                    if token != SEP and token != CLS:
-                        mask_label.append(sent[token_index])
-                        mask_pos.append(sent_index * max_len + token_index)
-        pre_sent_len = len(sent)
-    mask_label = np.array(mask_label).astype("int64").reshape([-1, 1])
-    mask_pos = np.array(mask_pos).astype("int64").reshape([-1, 1])
-    return batch_tokens, mask_label, mask_pos
-def pad_batch_data(insts,
-                   pad_idx=0,
-                   return_pos=False,
-                   return_input_mask=False,
-                   return_max_len=False,
-                   return_num_token=False,
-                   return_seq_lens=False):
-    """
-    Pad the instances to the max sequence length in batch, and generate the
-    corresponding position data and attention bias.
-    """
-    return_list = []
-    max_len = max(len(inst) for inst in insts)
-    # Any token included in dict can be used to pad, since the paddings' loss
-    # will be masked out by weights and make no effect on parameter gradients.
-    inst_data = np.array(
-        [inst + list([pad_idx] * (max_len - len(inst))) for inst in insts])
-    return_list += [inst_data.astype("int64").reshape([-1, max_len, 1])]
-    # position data
-    if return_pos:
-        inst_pos = np.array([
-            list(range(0, len(inst))) + [pad_idx] * (max_len - len(inst))
-            for inst in insts
-        ])
-        return_list += [inst_pos.astype("int64").reshape([-1, max_len, 1])]
-    if return_input_mask:
-        # This is used to avoid attention on paddings.
-        input_mask_data = np.array([[1] * len(inst) + [0] *
-                                    (max_len - len(inst)) for inst in insts])
-        input_mask_data = np.expand_dims(input_mask_data, axis=-1)
-        return_list += [input_mask_data.astype("float32")]
-    if return_max_len:
-        return_list += [max_len]
-    if return_num_token:
-        num_token = 0
-        for inst in insts:
-            num_token += len(inst)
-        return_list += [num_token]
-    if return_seq_lens:
-        seq_lens = np.array([len(inst) for inst in insts])
-        return_list += [seq_lens.astype("int64").reshape([-1, 1])]
-    return return_list if len(return_list) > 1 else return_list[0]
-if __name__ == "__main__":
-    pass
--- a/build/lib/paddlepalm/reader/utils/mlm_batching.py
+++ b/build/lib/paddlepalm/reader/utils/mlm_batching.py
-# -*- coding: UTF-8 -*-
-#   Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserved.
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#     http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-"""Mask, padding and batching."""
-from __future__ import absolute_import
-from __future__ import division
-from __future__ import print_function
-import numpy as np
-def mask(batch_tokens, total_token_num, vocab_size, CLS=1, SEP=2, MASK=3):
-    """
-    Add mask for batch_tokens, return out, mask_label, mask_pos;
-    Note: mask_pos responding the batch_tokens after padded;
-    """
-    max_len = max([len(sent) for sent in batch_tokens])
-    mask_label = []
-    mask_pos = []
-    prob_mask = np.random.rand(total_token_num)
-    # Note: the first token is [CLS], so [low=1]
-    replace_ids = np.random.randint(1, high=vocab_size, size=total_token_num)
-    pre_sent_len = 0
-    prob_index = 0
-    for sent_index, sent in enumerate(batch_tokens):
-        mask_flag = False
-        prob_index += pre_sent_len
-        for token_index, token in enumerate(sent):
-            prob = prob_mask[prob_index + token_index]
-            if prob > 0.15:
-                continue
-            elif 0.03 < prob <= 0.15:
-                # mask
-                if token != SEP and token != CLS:
-                    mask_label.append(sent[token_index])
-                    sent[token_index] = MASK
-                    mask_flag = True
-                    mask_pos.append(sent_index * max_len + token_index)
-            elif 0.015 < prob <= 0.03:
-                # random replace
-                if token != SEP and token != CLS:
-                    mask_label.append(sent[token_index])
-                    sent[token_index] = replace_ids[prob_index + token_index]
-                    mask_flag = True
-                    mask_pos.append(sent_index * max_len + token_index)
-            else:
-                # keep the original token
-                if token != SEP and token != CLS:
-                    mask_label.append(sent[token_index])
-                    mask_pos.append(sent_index * max_len + token_index)
-        pre_sent_len = len(sent)
-        # ensure at least mask one word in a sentence
-        while not mask_flag:
-            token_index = int(np.random.randint(1, high=len(sent) - 1, size=1))
-            if sent[token_index] != SEP and sent[token_index] != CLS:
-                mask_label.append(sent[token_index])
-                sent[token_index] = MASK
-                mask_flag = True
-                mask_pos.append(sent_index * max_len + token_index)
-    mask_label = np.array(mask_label).astype("int64").reshape([-1, 1])
-    mask_pos = np.array(mask_pos).astype("int64").reshape([-1, 1])
-    return batch_tokens, mask_label, mask_pos
-def prepare_batch_data(insts,
-                       total_token_num,
-                       max_len=None,
-                       voc_size=0,
-                       pad_id=None,
-                       cls_id=None,
-                       sep_id=None,
-                       mask_id=None,
-                       task_id=0,
-                       return_input_mask=True,
-                       return_max_len=True,
-                       return_num_token=False):
-    """
-    1. generate Tensor of data
-    2. generate Tensor of position
-    3. generate self attention mask, [shape: batch_size *  max_len * max_len]
-    """
-    batch_src_ids = [inst[0] for inst in insts]
-    batch_sent_ids = [inst[1] for inst in insts]
-    batch_pos_ids = [inst[2] for inst in insts]
-    # First step: do mask without padding
-    out, mask_label, mask_pos = mask(
-        batch_src_ids,
-        total_token_num,
-        vocab_size=voc_size,
-        CLS=cls_id,
-        SEP=sep_id,
-        MASK=mask_id)
-    # Second step: padding
-    src_id, self_input_mask = pad_batch_data(
-        out, 
-        max_len=max_len,
-        pad_idx=pad_id, return_input_mask=True)
-    pos_id = pad_batch_data(
-        batch_pos_ids,
-        max_len=max_len,
-        pad_idx=pad_id,
-        return_pos=False,
-        return_input_mask=False)
-    sent_id = pad_batch_data(
-        batch_sent_ids,
-        max_len=max_len,
-        pad_idx=pad_id,
-        return_pos=False,
-        return_input_mask=False)
-    task_ids = np.ones_like(
-        src_id, dtype="int64") * task_id
-    return_list = [
-        src_id, pos_id, sent_id, self_input_mask, task_ids, mask_label, mask_pos
-    ]
-    return return_list if len(return_list) > 1 else return_list[0]
-def pad_batch_data(insts,
-                   max_len=None,
-                   pad_idx=0,
-                   return_pos=False,
-                   return_input_mask=False,
-                   return_max_len=False,
-                   return_num_token=False):
-    """
-    Pad the instances to the max sequence length in batch, and generate the
-    corresponding position data and input mask.
-    """
-    return_list = []
-    if max_len is None:
-        max_len = max(len(inst) for inst in insts)
-    # Any token included in dict can be used to pad, since the paddings' loss
-    # will be masked out by weights and make no effect on parameter gradients.
-    inst_data = np.array([
-        list(inst) + list([pad_idx] * (max_len - len(inst))) for inst in insts
-    ])
-    return_list += [inst_data.astype("int64").reshape([-1, max_len, 1])]
-    # position data
-    if return_pos:
-        inst_pos = np.array([
-            list(range(0, len(inst))) + [pad_idx] * (max_len - len(inst))
-            for inst in insts
-        ])
-        return_list += [inst_pos.astype("int64").reshape([-1, max_len, 1])]
-    if return_input_mask:
-        # This is used to avoid attention on paddings.
-        input_mask_data = np.array([[1] * len(inst) + [0] *
-                                    (max_len - len(inst)) for inst in insts])
-        input_mask_data = np.expand_dims(input_mask_data, axis=-1)
-        return_list += [input_mask_data.astype("float32")]
-    if return_max_len:
-        return_list += [max_len]
-    if return_num_token:
-        num_token = 0
-        for inst in insts:
-            num_token += len(inst)
-        return_list += [num_token]
-    return return_list if len(return_list) > 1 else return_list[0]
-if __name__ == "__main__":
-    pass
--- a/build/lib/paddlepalm/reader/utils/mrqa_helper.py
+++ b/build/lib/paddlepalm/reader/utils/mrqa_helper.py
-# -*- coding: UTF-8 -*-
-#   Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserved.
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#     http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-class MRQAExample(object):
-    """A single training/test example for simple sequence classification.
-     For examples without an answer, the start and end position are -1.
-  """
-    def __init__(self,
-                 qas_id,
-                 question_text,
-                 doc_tokens,
-                 orig_answer_text=None,
-                 start_position=None,
-                 end_position=None,
-                 is_impossible=False):
-        self.qas_id = qas_id
-        self.question_text = question_text
-        self.doc_tokens = doc_tokens
-        self.orig_answer_text = orig_answer_text
-        self.start_position = start_position
-        self.end_position = end_position
-        self.is_impossible = is_impossible
-    def __str__(self):
-        return self.__repr__()
-    def __repr__(self):
-        s = ""
-        s += "qas_id: %s" % (tokenization.printable_text(self.qas_id))
-        s += ", question_text: %s" % (
-            tokenization.printable_text(self.question_text))
-        s += ", doc_tokens: [%s]" % (" ".join(self.doc_tokens))
-        if self.start_position:
-            s += ", start_position: %d" % (self.start_position)
-        if self.start_position:
-            s += ", end_position: %d" % (self.end_position)
-        if self.start_position:
-            s += ", is_impossible: %r" % (self.is_impossible)
-        return s
-class MRQAFeature(object):
-    """A single set of features of data."""
-    def __init__(self,
-                 unique_id,
-                 example_index,
-                 doc_span_index,
-                 tokens,
-                 token_to_orig_map,
-                 token_is_max_context,
-                 input_ids,
-                 input_mask,
-                 segment_ids,
-                 start_position=None,
-                 end_position=None,
-                 is_impossible=None):
-        self.unique_id = unique_id
-        self.example_index = example_index
-        self.doc_span_index = doc_span_index
-        self.tokens = tokens
-        self.token_to_orig_map = token_to_orig_map
-        self.token_is_max_context = token_is_max_context
-        self.input_ids = input_ids
-        self.input_mask = input_mask
-        self.segment_ids = segment_ids
-        self.start_position = start_position
-        self.end_position = end_position
-        self.is_impossible = is_impossible
--- a/build/lib/paddlepalm/reader/utils/reader4ernie.py
+++ b/build/lib/paddlepalm/reader/utils/reader4ernie.py
-# -*- coding: UTF-8 -*-
-#   Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserved.
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#     http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-from __future__ import absolute_import
-from __future__ import division
-from __future__ import print_function
-from __future__ import unicode_literals
-from __future__ import absolute_import
-import sys
-import os
-import json
-import random
-import logging
-import numpy as np
-import six
-from io import open
-from collections import namedtuple
-import paddlepalm.tokenizer.ernie_tokenizer as tokenization
-from paddlepalm.reader.utils.batching4ernie import pad_batch_data
-from paddlepalm.reader.utils.mlm_batching import prepare_batch_data
-log = logging.getLogger(__name__)
-if six.PY3:
-    import io
-    sys.stdout = io.TextIOWrapper(sys.stdout.buffer, encoding='utf-8')
-    sys.stderr = io.TextIOWrapper(sys.stderr.buffer, encoding='utf-8')
-def csv_reader(fd, delimiter='\t'):
-    def gen():
-        for i in fd:
-            slots = i.rstrip('\n').split(delimiter)
-            if len(slots) == 1:
-                yield slots,
-            else:
-                yield slots
-    return gen()
-class BaseReader(object):
-    def __init__(self,
-                 vocab_path,
-                 label_map_config=None,
-                 max_seq_len=512,
-                 do_lower_case=True,
-                 in_tokens=False,
-                 is_inference=False,
-                 random_seed=None,
-                 tokenizer="FullTokenizer",
-                 is_classify=True,
-                 is_regression=False,
-                 for_cn=True,
-                 task_id=0):
-        self.max_seq_len = max_seq_len
-        self.tokenizer = tokenization.FullTokenizer(
-            vocab_file=vocab_path, do_lower_case=do_lower_case)
-        self.vocab = self.tokenizer.vocab
-        self.pad_id = self.vocab["[PAD]"]
-        self.cls_id = self.vocab["[CLS]"]
-        self.sep_id = self.vocab["[SEP]"]
-        self.in_tokens = in_tokens
-        self.is_inference = is_inference
-        self.for_cn = for_cn
-        self.task_id = task_id
-        np.random.seed(random_seed)
-        self.is_classify = is_classify
-        self.is_regression = is_regression
-        self.current_example = 0
-        self.current_epoch = 0
-        self.num_examples = 0
-        self.examples = {}
-        if label_map_config:
-            with open(label_map_config, encoding='utf8') as f: 
-                self.label_map = json.load(f)
-        else:
-            self.label_map = None
-    def get_train_progress(self):
-        """Gets progress for training phase."""
-        return self.current_example, self.current_epoch
-    def _read_tsv(self, input_file, quotechar=None):
-        """Reads a tab separated value file."""
-        with open(input_file, 'r', encoding='utf8') as f:
-            reader = csv_reader(f)
-            headers = next(reader)
-            Example = namedtuple('Example', headers)
-            examples = []
-            for line in reader:
-                example = Example(*line)
-                examples.append(example)
-            return examples
-    def _truncate_seq_pair(self, tokens_a, tokens_b, max_length):
-        """Truncates a sequence pair in place to the maximum length."""
-        # This is a simple heuristic which will always truncate the longer sequence
-        # one token at a time. This makes more sense than truncating an equal percent
-        # of tokens from each, since if one sequence is very short then each token
-        # that's truncated likely contains more information than a longer sequence.
-        while True:
-            total_length = len(tokens_a) + len(tokens_b)
-            if total_length <= max_length:
-                break
-            if len(tokens_a) > len(tokens_b):
-                tokens_a.pop()
-            else:
-                tokens_b.pop()
-    def _convert_example_to_record(self, example, max_seq_length, tokenizer):
-        """Converts a single `Example` into a single `Record`."""
-        text_a = tokenization.convert_to_unicode(example.text_a)
-        tokens_a = tokenizer.tokenize(text_a)
-        tokens_b = None
-        has_text_b = False
-        if isinstance(example, dict):
-            has_text_b = "text_b" in example.keys()
-        else:
-            has_text_b = "text_b" in example._fields
-        if has_text_b:
-            text_b = tokenization.convert_to_unicode(example.text_b)
-            tokens_b = tokenizer.tokenize(text_b)
-        if tokens_b:
-            # Modifies `tokens_a` and `tokens_b` in place so that the total
-            # length is less than the specified length.
-            # Account for [CLS], [SEP], [SEP] with "- 3"
-            self._truncate_seq_pair(tokens_a, tokens_b, max_seq_length - 3)
-        else:
-            # Account for [CLS] and [SEP] with "- 2"
-            if len(tokens_a) > max_seq_length - 2:
-                tokens_a = tokens_a[0:(max_seq_length - 2)]
-        # The convention in BERT/ERNIE is:
-        # (a) For sequence pairs:
-        #  tokens:   [CLS] is this jack ##son ##ville ? [SEP] no it is not . [SEP]
-        #  type_ids: 0     0  0    0    0     0       0 0     1  1  1  1   1 1
-        # (b) For single sequences:
-        #  tokens:   [CLS] the dog is hairy . [SEP]
-        #  type_ids: 0     0   0   0  0     0 0
-        #
-        # Where "type_ids" are used to indicate whether this is the first
-        # sequence or the second sequence. The embedding vectors for `type=0` and
-        # `type=1` were learned during pre-training and are added to the wordpiece
-        # embedding vector (and position vector). This is not *strictly* necessary
-        # since the [SEP] token unambiguously separates the sequences, but it makes
-        # it easier for the model to learn the concept of sequences.
-        #
-        # For classification tasks, the first vector (corresponding to [CLS]) is
-        # used as as the "sentence vector". Note that this only makes sense because
-        # the entire model is fine-tuned.
-        tokens = []
-        text_type_ids = []
-        tokens.append("[CLS]")
-        text_type_ids.append(0)
-        for token in tokens_a:
-            tokens.append(token)
-            text_type_ids.append(0)
-        tokens.append("[SEP]")
-        text_type_ids.append(0)
-        if tokens_b:
-            for token in tokens_b:
-                tokens.append(token)
-                text_type_ids.append(1)
-            tokens.append("[SEP]")
-            text_type_ids.append(1)
-        token_ids = tokenizer.convert_tokens_to_ids(tokens)
-        position_ids = list(range(len(token_ids)))
-        if self.is_inference:
-            Record = namedtuple('Record',
-                                ['token_ids', 'text_type_ids', 'position_ids'])
-            record = Record(
-                token_ids=token_ids,
-                text_type_ids=text_type_ids,
-                position_ids=position_ids)
-        else:
-            if self.label_map:
-                label_id = self.label_map[example.label]
-            else:
-                label_id = example.label
-            Record = namedtuple('Record', [
-                'token_ids', 'text_type_ids', 'position_ids', 'label_id', 'qid'
-            ])
-            qid = None
-            if "qid" in example._fields:
-                qid = example.qid
-            record = Record(
-                token_ids=token_ids,
-                text_type_ids=text_type_ids,
-                position_ids=position_ids,
-                label_id=label_id,
-                qid=qid)
-        return record
-    def _prepare_batch_data(self, examples, batch_size, phase=None):
-        """generate batch records"""
-        batch_records, max_len = [], 0
-        for index, example in enumerate(examples):
-            if phase == "train":
-                self.current_example = index
-            record = self._convert_example_to_record(example, self.max_seq_len,
-                                                     self.tokenizer)
-            max_len = max(max_len, len(record.token_ids))
-            if self.in_tokens:
-                to_append = (len(batch_records) + 1) * max_len <= batch_size
-            else:
-                to_append = len(batch_records) < batch_size
-            if to_append:
-                batch_records.append(record)
-            else:
-                yield self._pad_batch_records(batch_records)
-                batch_records, max_len = [record], len(record.token_ids)
-        if phase == 'pred' and batch_records:
-            print('the last batch yielded.')
-            yield self._pad_batch_records(batch_records)
-    def get_num_examples(self, input_file=None, phase=None):
-        if self.examples is not None:
-            if phase is None:
-                phase = 'all'
-            return len(self.examples[phase])
-        else:
-            assert input_file is not None, "Argument input_file should be given or the data_generator should be created when this func is called."
-            examples = self._read_tsv(input_file)
-            return len(examples)
-    def data_generator(self,
-                       input_file,
-                       batch_size,
-                       epoch,
-                       dev_count=1,
-                       shuffle=True,
-                       phase=None):
-        examples = self._read_tsv(input_file)
-        if phase is None:
-            phase = 'all'
-        self.examples[phase] = examples
-        def wrapper():
-            all_dev_batches = []
-            if epoch is None:
-                num_epochs = 99999999
-            else:
-                num_epochs = epoch
-            for epoch_index in range(num_epochs):
-                if phase == "train":
-                    self.current_example = 0
-                    self.current_epoch = epoch_index
-                if shuffle:
-                    np.random.shuffle(examples)
-                for batch_data in self._prepare_batch_data(
-                        examples, batch_size, phase=phase):
-                    if len(all_dev_batches) < dev_count:
-                        all_dev_batches.append(batch_data)
-                    if len(all_dev_batches) == dev_count:
-                        for batch in all_dev_batches:
-                            yield batch
-                        all_dev_batches = []
-        def f():
-            for i in wrapper():
-                yield i
-        # def f():
-        #     try:
-        #         for i in wrapper():
-        #             yield i
-        #     except Exception as e:
-        #         import traceback
-        #         traceback.print_exc()
-        return f
-class MaskLMReader(BaseReader):
-    def _convert_example_to_record(self, example, max_seq_length, tokenizer):
-        """Converts a single `Example` into a single `Record`."""
-        text_a = tokenization.convert_to_unicode(example.text_a)
-        tokens_a = tokenizer.tokenize(text_a)
-        tokens_b = None 
-        has_text_b = False
-        if isinstance(example, dict):
-            has_text_b = "text_b" in example.keys()
-        else:
-            has_text_b = "text_b" in example._fields
-        if has_text_b:
-            text_b = tokenization.convert_to_unicode(example.text_b)
-            tokens_b = tokenizer.tokenize(text_b)
-        if tokens_b:
-            # Modifies `tokens_a` and `tokens_b` in place so that the total
-            # length is less than the specified length.
-            # Account for [CLS], [SEP], [SEP] with "- 3"
-            self._truncate_seq_pair(tokens_a, tokens_b, max_seq_length - 3)
-        else:
-            # Account for [CLS] and [SEP] with "- 2"
-            if len(tokens_a) > max_seq_length - 2:
-                tokens_a = tokens_a[0:(max_seq_length - 2)]
-        # The convention in BERT/ERNIE is:
-        # (a) For sequence pairs:
-        #  tokens:   [CLS] is this jack ##son ##ville ? [SEP] no it is not . [SEP]
-        #  type_ids: 0     0  0    0    0     0       0 0     1  1  1  1   1 1
-        # (b) For single sequences:
-        #  tokens:   [CLS] the dog is hairy . [SEP]
-        #  type_ids: 0     0   0   0  0     0 0
-        #
-        # Where "type_ids" are used to indicate whether this is the first
-        # sequence or the second sequence. The embedding vectors for `type=0` and
-        # `type=1` were learned during pre-training and are added to the wordpiece
-        # embedding vector (and position vector). This is not *strictly* necessary
-        # since the [SEP] token unambiguously separates the sequences, but it makes
-        # it easier for the model to learn the concept of sequences.
-        #
-        # For classification tasks, the first vector (corresponding to [CLS]) is
-        # used as as the "sentence vector". Note that this only makes sense because
-        # the entire model is fine-tuned.
-        tokens = []
-        text_type_ids = []
-        tokens.append("[CLS]")
-        text_type_ids.append(0)
-        for token in tokens_a:
-            tokens.append(token)
-            text_type_ids.append(0)
-        tokens.append("[SEP]")
-        text_type_ids.append(0)
-        if tokens_b:
-            for token in tokens_b:
-                tokens.append(token)
-                text_type_ids.append(1)
-            tokens.append("[SEP]")
-            text_type_ids.append(1)
-        token_ids = tokenizer.convert_tokens_to_ids(tokens)
-        position_ids = list(range(len(token_ids)))
-        Record = namedtuple('Record',
-                            ['token_ids', 'text_type_ids', 'position_ids'])
-        record = Record(
-            token_ids=token_ids,
-            text_type_ids=text_type_ids,
-            position_ids=position_ids)
-        return record
-    def batch_reader(examples, batch_size, in_tokens, phase):
-        batch, total_token_num, max_len = [], 0, 0
-        for e in examples:
-            token_ids, sent_ids, pos_ids = _convert_example_to_record(e, self.max_seq_len, self.tokenizer)
-            max_len = max(max_len, len(token_ids))
-            if in_tokens:
-                to_append = (len(batch) + 1) * max_len <= batch_size
-            else:
-                to_append = len(batch) < batch_size
-            if to_append:
-                batch.append(parsed_line)
-                total_token_num += len(token_ids)
-            else:
-                yield batch, total_token_num
-                batch, total_token_num, max_len = [parsed_line], len(
-                    token_ids), len(token_ids)
-        if len(batch) > 0 and phase == 'pred':
-            yield batch, total_token_num
-    def data_generator(self,
-                       input_file,
-                       batch_size,
-                       epoch,
-                       dev_count=1,
-                       shuffle=True,
-                       phase=None):
-        examples = self._read_tsv(input_file)
-        if phase is None:
-            phase = 'all'
-        self.examples[phase] = examples
-        def wrapper():
-            all_dev_batches = []
-            if epoch is None:
-                num_epochs = 99999999
-            else:
-                num_epochs = epoch
-            for epoch_index in range(num_epochs):
-                if phase == "train":
-                    self.current_example = 0
-                    self.current_epoch = epoch_index
-                if shuffle:
-                    np.random.shuffle(examples)
-                all_dev_batches = []
-                for batch_data, total_token_num in batch_reader(examples, 
-                                                    self.batch_size, self.in_tokens, phase=phase):
-                    batch_data = prepare_batch_data(
-                        batch_data,
-                        total_token_num,
-                        voc_size=self.voc_size,
-                        pad_id=self.pad_id,
-                        cls_id=self.cls_id,
-                        sep_id=self.sep_id,
-                        mask_id=self.mask_id,
-                        max_len=self.max_seq_len,
-                        return_input_mask=True,
-                        return_max_len=False,
-                        return_num_token=False)
-                    if len(all_dev_batches) < dev_count:
-                        all_dev_batches.append(batch_data)
-                    if len(all_dev_batches) == dev_count:
-                        for batch in all_dev_batches:
-                            yield batch
-                        all_dev_batches = []
-        return wrapper
-class ClassifyReader(BaseReader):
-    def _read_tsv(self, input_file, quotechar=None):
-        """Reads a tab separated value file."""
-        with open(input_file, 'r', encoding='utf8') as f:
-            reader = csv_reader(f)
-            headers = next(reader)
-            text_indices = [
-                index for index, h in enumerate(headers) if h != "label"
-            ]
-            Example = namedtuple('Example', headers)
-            examples = []
-            for line in reader:
-                for index, text in enumerate(line):
-                    if index in text_indices:
-                        if self.for_cn:
-                            line[index] = text.replace(' ', '')
-                        else:
-                            line[index] = text
-                example = Example(*line)
-                examples.append(example)
-            return examples
-    def _pad_batch_records(self, batch_records):
-        batch_token_ids = [record.token_ids for record in batch_records]
-        batch_text_type_ids = [record.text_type_ids for record in batch_records]
-        batch_position_ids = [record.position_ids for record in batch_records]
-        if not self.is_inference:
-            batch_labels = [record.label_id for record in batch_records]
-            if self.is_classify:
-                batch_labels = np.array(batch_labels).astype("int64").reshape(
-                    [-1, 1])
-            elif self.is_regression:
-                batch_labels = np.array(batch_labels).astype("float32").reshape(
-                    [-1, 1])
-            if batch_records[0].qid:
-                batch_qids = [record.qid for record in batch_records]
-                batch_qids = np.array(batch_qids).astype("int64").reshape(
-                    [-1, 1])
-            else:
-                batch_qids = np.array([]).astype("int64").reshape([-1, 1])
-        # padding
-        padded_token_ids, input_mask = pad_batch_data(
-            batch_token_ids, pad_idx=self.pad_id, return_input_mask=True)
-        padded_text_type_ids = pad_batch_data(
-            batch_text_type_ids, pad_idx=self.pad_id)
-        padded_position_ids = pad_batch_data(
-            batch_position_ids, pad_idx=self.pad_id)
-        padded_task_ids = np.ones_like(
-            padded_token_ids, dtype="int64") * self.task_id
-        return_list = [
-            padded_token_ids, padded_text_type_ids, padded_position_ids,
-            padded_task_ids, input_mask
-        ]
-        if not self.is_inference:
-            return_list += [batch_labels, batch_qids]
-        return return_list
-class SequenceLabelReader(BaseReader):
-    def _pad_batch_records(self, batch_records):
-        batch_token_ids = [record.token_ids for record in batch_records]
-        batch_text_type_ids = [record.text_type_ids for record in batch_records]
-        batch_position_ids = [record.position_ids for record in batch_records]
-        batch_label_ids = [record.label_ids for record in batch_records]
-        # padding
-        padded_token_ids, input_mask, batch_seq_lens = pad_batch_data(
-            batch_token_ids,
-            pad_idx=self.pad_id,
-            return_input_mask=True,
-            return_seq_lens=True)
-        padded_text_type_ids = pad_batch_data(
-            batch_text_type_ids, pad_idx=self.pad_id)
-        padded_position_ids = pad_batch_data(
-            batch_position_ids, pad_idx=self.pad_id)
-        padded_label_ids = pad_batch_data(
-            batch_label_ids, pad_idx=len(self.label_map) - 1)
-        padded_task_ids = np.ones_like(
-            padded_token_ids, dtype="int64") * self.task_id
-        return_list = [
-            padded_token_ids, padded_text_type_ids, padded_position_ids,
-            padded_task_ids, input_mask, padded_label_ids, batch_seq_lens
-        ]
-        return return_list
-    def _reseg_token_label(self, tokens, labels, tokenizer):
-        assert len(tokens) == len(labels)
-        ret_tokens = []
-        ret_labels = []
-        for token, label in zip(tokens, labels):
-            sub_token = tokenizer.tokenize(token)
-            if len(sub_token) == 0:
-                continue
-            ret_tokens.extend(sub_token)
-            if len(sub_token) == 1:
-                ret_labels.append(label)
-                continue
-            if label == "O" or label.startswith("I-"):
-                ret_labels.extend([label] * len(sub_token))
-            elif label.startswith("B-"):
-                i_label = "I-" + label[2:]
-                ret_labels.extend([label] + [i_label] * (len(sub_token) - 1))
-            elif label.startswith("S-"):
-                b_laebl = "B-" + label[2:]
-                e_label = "E-" + label[2:]
-                i_label = "I-" + label[2:]
-                ret_labels.extend([b_laebl] + [i_label] * (len(sub_token) - 2) + [e_label])
-            elif label.startswith("E-"):
-                i_label = "I-" + label[2:]
-                ret_labels.extend([i_label] * (len(sub_token) - 1) + [label])
-        assert len(ret_tokens) == len(ret_labels)
-        return ret_tokens, ret_labels
-    def _convert_example_to_record(self, example, max_seq_length, tokenizer):
-        tokens = tokenization.convert_to_unicode(example.text_a).split(u"")
-        labels = tokenization.convert_to_unicode(example.label).split(u"")
-        tokens, labels = self._reseg_token_label(tokens, labels, tokenizer)
-        if len(tokens) > max_seq_length - 2:
-            tokens = tokens[0:(max_seq_length - 2)]
-            labels = labels[0:(max_seq_length - 2)]
-        tokens = ["[CLS]"] + tokens + ["[SEP]"]
-        token_ids = tokenizer.convert_tokens_to_ids(tokens)
-        position_ids = list(range(len(token_ids)))
-        text_type_ids = [0] * len(token_ids)
-        no_entity_id = len(self.label_map) - 1
-        label_ids = [no_entity_id] + [
-            self.label_map[label] for label in labels
-        ] + [no_entity_id]
-        Record = namedtuple(
-            'Record',
-            ['token_ids', 'text_type_ids', 'position_ids', 'label_ids'])
-        record = Record(
-            token_ids=token_ids,
-            text_type_ids=text_type_ids,
-            position_ids=position_ids,
-            label_ids=label_ids)
-        return record
-class ExtractEmbeddingReader(BaseReader):
-    def _pad_batch_records(self, batch_records):
-        batch_token_ids = [record.token_ids for record in batch_records]
-        batch_text_type_ids = [record.text_type_ids for record in batch_records]
-        batch_position_ids = [record.position_ids for record in batch_records]
-        # padding
-        padded_token_ids, input_mask, seq_lens = pad_batch_data(
-            batch_token_ids,
-            pad_idx=self.pad_id,
-            return_input_mask=True,
-            return_seq_lens=True)
-        padded_text_type_ids = pad_batch_data(
-            batch_text_type_ids, pad_idx=self.pad_id)
-        padded_position_ids = pad_batch_data(
-            batch_position_ids, pad_idx=self.pad_id)
-        padded_task_ids = np.ones_like(
-            padded_token_ids, dtype="int64") * self.task_id
-        return_list = [
-            padded_token_ids, padded_text_type_ids, padded_position_ids,
-            padded_task_ids, input_mask, seq_lens
-        ]
-        return return_list
-class MRCReader(BaseReader):
-    def __init__(self,
-                 vocab_path,
-                 label_map_config=None,
-                 max_seq_len=512,
-                 do_lower_case=True,
-                 in_tokens=False,
-                 random_seed=None,
-                 tokenizer="FullTokenizer",
-                 is_classify=True,
-                 is_regression=False,
-                 for_cn=True,
-                 task_id=0,
-                 doc_stride=128,
-                 max_query_length=64):
-        self.max_seq_len = max_seq_len
-        self.tokenizer = tokenization.FullTokenizer(
-            vocab_file=vocab_path, do_lower_case=do_lower_case)
-        self.vocab = self.tokenizer.vocab
-        self.pad_id = self.vocab["[PAD]"]
-        self.cls_id = self.vocab["[CLS]"]
-        self.sep_id = self.vocab["[SEP]"]
-        self.in_tokens = in_tokens
-        self.for_cn = for_cn
-        self.task_id = task_id
-        self.doc_stride = doc_stride
-        self.max_query_length = max_query_length
-        self.examples = {}
-        self.features = {}
-        if random_seed is not None:
-            np.random.seed(random_seed)
-        self.current_example = 0
-        self.current_epoch = 0
-        self.num_examples = 0
-        self.Example = namedtuple('Example',
-                ['qas_id', 'question_text', 'doc_tokens', 'orig_answer_text',
-                'start_position', 'end_position'])
-        self.Feature = namedtuple("Feature", ["unique_id", "example_index", "doc_span_index",
-                "tokens", "token_to_orig_map", "token_is_max_context",
-                "token_ids", "position_ids", "text_type_ids",
-                "start_position", "end_position"])
-        self.DocSpan = namedtuple("DocSpan", ["start", "length"])
-    def _read_json(self, input_file, is_training):
-        examples = []
-        with open(input_file, "r", encoding='utf8') as f:
-            input_data = json.load(f)["data"]
-            for entry in input_data:
-                for paragraph in entry["paragraphs"]:
-                    paragraph_text = paragraph["context"]
-                    for qa in paragraph["qas"]:
-                        qas_id = qa["id"]
-                        question_text = qa["question"]
-                        start_pos = None
-                        end_pos = None
-                        orig_answer_text = None
-                        if is_training:
-                            if len(qa["answers"]) != 1:
-                                raise ValueError(
-                                    "For training, each question should have exactly 1 answer."
-                                )
-                            answer = qa["answers"][0]
-                            orig_answer_text = answer["text"]
-                            answer_offset = answer["answer_start"]
-                            answer_length = len(orig_answer_text)
-                            doc_tokens = [
-                                paragraph_text[:answer_offset],
-                                paragraph_text[answer_offset:answer_offset +
-                                               answer_length],
-                                paragraph_text[answer_offset + answer_length:]
-                            ]
-                            start_pos = 1
-                            end_pos = 1
-                            actual_text = " ".join(doc_tokens[start_pos:(end_pos
-                                                                         + 1)])
-                            if actual_text.find(orig_answer_text) == -1:
-                                log.info("Could not find answer: '%s' vs. '%s'",
-                                      actual_text, orig_answer_text)
-                                continue
-                        else:
-                            doc_tokens = tokenization.tokenize_chinese_chars(
-                                paragraph_text)
-                        example = self.Example(
-                            qas_id=qas_id,
-                            question_text=question_text,
-                            doc_tokens=doc_tokens,
-                            orig_answer_text=orig_answer_text,
-                            start_position=start_pos,
-                            end_position=end_pos)
-                        examples.append(example)
-        return examples
-    def _improve_answer_span(self, doc_tokens, input_start, input_end,
-                             tokenizer, orig_answer_text):
-        tok_answer_text = " ".join(tokenizer.tokenize(orig_answer_text))
-        for new_start in range(input_start, input_end + 1):
-            for new_end in range(input_end, new_start - 1, -1):
-                text_span = " ".join(doc_tokens[new_start:(new_end + 1)])
-                if text_span == tok_answer_text:
-                    return (new_start, new_end)
-        return (input_start, input_end)
-    def _check_is_max_context(self, doc_spans, cur_span_index, position):
-        best_score = None
-        best_span_index = None
-        for (span_index, doc_span) in enumerate(doc_spans):
-            end = doc_span.start + doc_span.length - 1
-            if position < doc_span.start:
-                continue
-            if position > end:
-                continue
-            num_left_context = position - doc_span.start
-            num_right_context = end - position
-            score = min(num_left_context,
-                        num_right_context) + 0.01 * doc_span.length
-            if best_score is None or score > best_score:
-                best_score = score
-                best_span_index = span_index
-        return cur_span_index == best_span_index
-    def _convert_example_to_feature(self, examples, max_seq_length, tokenizer,
-                                    is_training):
-        features = []
-        unique_id = 1000000000
-        for (example_index, example) in enumerate(examples):
-            query_tokens = tokenizer.tokenize(example.question_text)
-            if len(query_tokens) > self.max_query_length:
-                query_tokens = query_tokens[0:self.max_query_length]
-            tok_to_orig_index = []
-            orig_to_tok_index = []
-            all_doc_tokens = []
-            for (i, token) in enumerate(example.doc_tokens):
-                orig_to_tok_index.append(len(all_doc_tokens))
-                sub_tokens = tokenizer.tokenize(token)
-                for sub_token in sub_tokens:
-                    tok_to_orig_index.append(i)
-                    all_doc_tokens.append(sub_token)
-            tok_start_position = None
-            tok_end_position = None
-            if is_training:
-                tok_start_position = orig_to_tok_index[example.start_position]
-                if example.end_position < len(example.doc_tokens) - 1:
-                    tok_end_position = orig_to_tok_index[example.end_position +
-                                                         1] - 1
-                else:
-                    tok_end_position = len(all_doc_tokens) - 1
-                (tok_start_position,
-                 tok_end_position) = self._improve_answer_span(
-                     all_doc_tokens, tok_start_position, tok_end_position,
-                     tokenizer, example.orig_answer_text)
-            max_tokens_for_doc = max_seq_length - len(query_tokens) - 3
-            doc_spans = []
-            start_offset = 0
-            while start_offset < len(all_doc_tokens):
-                length = len(all_doc_tokens) - start_offset
-                if length > max_tokens_for_doc:
-                    length = max_tokens_for_doc
-                doc_spans.append(self.DocSpan(start=start_offset, length=length))
-                if start_offset + length == len(all_doc_tokens):
-                    break
-                start_offset += min(length, self.doc_stride)
-            for (doc_span_index, doc_span) in enumerate(doc_spans):
-                tokens = []
-                token_to_orig_map = {}
-                token_is_max_context = {}
-                text_type_ids = []
-                tokens.append("[CLS]")
-                text_type_ids.append(0)
-                for token in query_tokens:
-                    tokens.append(token)
-                    text_type_ids.append(0)
-                tokens.append("[SEP]")
-                text_type_ids.append(0)
-                for i in range(doc_span.length):
-                    split_token_index = doc_span.start + i
-                    token_to_orig_map[len(tokens)] = tok_to_orig_index[
-                        split_token_index]
-                    is_max_context = self._check_is_max_context(
-                        doc_spans, doc_span_index, split_token_index)
-                    token_is_max_context[len(tokens)] = is_max_context
-                    tokens.append(all_doc_tokens[split_token_index])
-                    text_type_ids.append(1)
-                tokens.append("[SEP]")
-                text_type_ids.append(1)
-                token_ids = tokenizer.convert_tokens_to_ids(tokens)
-                position_ids = list(range(len(token_ids)))
-                start_position = None
-                end_position = None
-                if is_training:
-                    doc_start = doc_span.start
-                    doc_end = doc_span.start + doc_span.length - 1
-                    out_of_span = False
-                    if not (tok_start_position >= doc_start and
-                            tok_end_position <= doc_end):
-                        out_of_span = True
-                    if out_of_span:
-                        start_position = 0
-                        end_position = 0
-                    else:
-                        doc_offset = len(query_tokens) + 2
-                        start_position = tok_start_position - doc_start + doc_offset
-                        end_position = tok_end_position - doc_start + doc_offset
-                feature = self.Feature(
-                    unique_id=unique_id,
-                    example_index=example_index,
-                    doc_span_index=doc_span_index,
-                    tokens=tokens,
-                    token_to_orig_map=token_to_orig_map,
-                    token_is_max_context=token_is_max_context,
-                    token_ids=token_ids,
-                    position_ids=position_ids,
-                    text_type_ids=text_type_ids,
-                    start_position=start_position,
-                    end_position=end_position)
-                features.append(feature)
-                unique_id += 1
-        return features
-    def _prepare_batch_data(self, records, batch_size, phase=None):
-        """generate batch records"""
-        batch_records, max_len = [], 0
-        for index, record in enumerate(records):
-            if phase == "train":
-                self.current_example = index
-            max_len = max(max_len, len(record.token_ids))
-            if self.in_tokens:
-                to_append = (len(batch_records) + 1) * max_len <= batch_size
-            else:
-                to_append = len(batch_records) < batch_size
-            if to_append:
-                batch_records.append(record)
-            else:
-                yield self._pad_batch_records(batch_records, phase == "train")
-                batch_records, max_len = [record], len(record.token_ids)
-        if phase == 'pred' and batch_records:
-            yield self._pad_batch_records(batch_records, phase == "train")
-    def _pad_batch_records(self, batch_records, is_training):
-        batch_token_ids = [record.token_ids for record in batch_records]
-        batch_text_type_ids = [record.text_type_ids for record in batch_records]
-        batch_position_ids = [record.position_ids for record in batch_records]
-        if is_training:
-            batch_start_position = [
-                record.start_position for record in batch_records
-            ]
-            batch_end_position = [
-                record.end_position for record in batch_records
-            ]
-            batch_start_position = np.array(batch_start_position).astype(
-                "int64").reshape([-1, 1])
-            batch_end_position = np.array(batch_end_position).astype(
-                "int64").reshape([-1, 1])
-        else:
-            batch_size = len(batch_token_ids)
-            batch_start_position = np.zeros(
-                shape=[batch_size, 1], dtype="int64")
-            batch_end_position = np.zeros(shape=[batch_size, 1], dtype="int64")
-        batch_unique_ids = [record.unique_id for record in batch_records]
-        batch_unique_ids = np.array(batch_unique_ids).astype("int64").reshape(
-            [-1, 1])
-        # padding
-        padded_token_ids, input_mask = pad_batch_data(
-            batch_token_ids, pad_idx=self.pad_id, return_input_mask=True)
-        padded_text_type_ids = pad_batch_data(
-            batch_text_type_ids, pad_idx=self.pad_id)
-        padded_position_ids = pad_batch_data(
-            batch_position_ids, pad_idx=self.pad_id)
-        padded_task_ids = np.ones_like(
-            padded_token_ids, dtype="int64") * self.task_id
-        return_list = [
-            padded_token_ids, padded_text_type_ids, padded_position_ids,
-            padded_task_ids, input_mask, batch_start_position,
-            batch_end_position, batch_unique_ids
-        ]
-        return return_list
-    def get_num_examples(self, phase):
-        return len(self.features[phase])
-    def get_features(self, phase):
-        return self.features[phase]
-    def get_examples(self, phase):
-        return self.examples[phase]
-    def data_generator(self,
-                       input_file,
-                       batch_size,
-                       epoch,
-                       dev_count=1,
-                       shuffle=True,
-                       phase=None):
-        examples = self.examples.get(phase, None)
-        features = self.features.get(phase, None)
-        if not examples:
-            examples = self._read_json(input_file, phase == "train")
-            features = self._convert_example_to_feature(
-                examples, self.max_seq_len, self.tokenizer, phase == "train")
-            self.examples[phase] = examples
-            self.features[phase] = features
-        def wrapper():
-            all_dev_batches = []
-            if epoch is None:
-                num_epochs = 99999999
-            else:
-                num_epochs = epoch
-            for epoch_index in range(num_epochs):
-                if phase == "train":
-                    self.current_example = 0
-                    self.current_epoch = epoch_index
-                if phase == "train" and shuffle:
-                    np.random.shuffle(features)
-                for batch_data in self._prepare_batch_data(
-                        features, batch_size, phase=phase):
-                    if len(all_dev_batches) < dev_count:
-                        all_dev_batches.append(batch_data)
-                    if len(all_dev_batches) == dev_count:
-                        for batch in all_dev_batches:
-                            yield batch
-                        all_dev_batches = []
-        return wrapper
-if __name__ == '__main__':
-    pass
--- a/build/lib/paddlepalm/task_instance.py
+++ b/build/lib/paddlepalm/task_instance.py
-# -*- coding: UTF-8 -*-
-#   Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserved.
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#     http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-from paddlepalm.interface import reader as base_reader
-from paddlepalm.interface import task_paradigm as base_paradigm
-import os
-import json
-from paddle import fluid
-class TaskInstance(object):
-    def __init__(self, name, id, config={}, verbose=True):
-        self._name = name
-        self._config = config
-        self._verbose = verbose
-        self._save_infermodel_path = os.path.join(self._config['save_path'], 'infer_model')
-        self._save_ckpt_path = os.path.join(self._config['save_path'], 'ckpt')
-        # following flags can be fetch from instance config file
-        self._is_target = config.get('is_target', True)
-        self._first_target = config.get('is_first_target', False)
-        self._task_reuse_scope = config.get('task_reuse_scope', name)
-        self._feeded_var_names = None
-        self._target_vars = None
-        # training process management
-        self._mix_ratio = None
-        self._expected_train_steps = None
-        self._expected_train_epochs = None
-        self._steps_pur_epoch = None
-        self._cur_train_epoch = 0
-        self._cur_train_step = 0
-        self._train_finish = False
-        # 存放不同运行阶段（train，eval，pred）的数据集reader，key为phase，value为Reader实例
-        self._reader = {'train': None, 'eval': None, 'pred': None}
-        self._input_layer = None
-        self._inputname_to_varname = {}
-        self._task_layer = {'train': None, 'eval': None, 'pred': None}
-        self._pred_input_name_list = []
-        self._pred_input_varname_list = []
-        self._pred_fetch_name_list = []
-        self._pred_fetch_var_list = []
-        self._Reader = None
-        self._Paradigm = None
-        self._exe = fluid.Executor(fluid.CPUPlace())
-        self._save_protocol = {
-            'input_names': 'self._pred_input_name_list',
-            'input_varnames': 'self._pred_input_varname_list',
-            'fetch_list': 'self._pred_fetch_name_list'}
-    def build_task_layer(self, net_inputs, phase):
-        output_vars = self._task_layer[phase].build(net_inputs)
-        if phase == 'pred':
-            self._pred_fetch_name_list, self._pred_fetch_var_list = zip(*output_vars.items())
-        return output_vars
-    def postprocess(self, rt_outputs, phase):
-        return self._task_layer[phase].postprocess(rt_outputs)
-    def epoch_postprocess(self, epoch_inputs, phase):
-        return self._task_layer[phase].epoch_postprocess(epoch_inputs)
-    def save(self, suffix=''):
-        dirpath = self._save_infermodel_path + suffix
-        self._pred_input_varname_list = [str(i) for i in self._pred_input_varname_list]
-        fluid.io.save_inference_model(dirpath, self._pred_input_varname_list, self._pred_fetch_var_list, self._exe)
-        # fluid.io.save_inference_model(dirpath, self._pred_input_varname_list, self._pred_fetch_var_list, self._exe, params_filename='__params__')
-        print(self._name + ': inference model saved at ' + dirpath)
-        conf = {}
-        for k, strv in self._save_protocol.items():
-            exec('v={}'.format(strv))
-            conf[k] = v
-        with open(os.path.join(dirpath, '__conf__'), 'w') as writer:
-            writer.write(json.dumps(conf, indent=1))
-    def load(self, infer_model_path=None):
-        if infer_model_path is None:
-            infer_model_path = self._save_infermodel_path
-        for k,v in json.load(open(os.path.join(infer_model_path, '__conf__'))).items():
-            strv = self._save_protocol[k]
-            exec('{}=v'.format(strv))
-        pred_prog, self._pred_input_varname_list, self._pred_fetch_var_list = \
-            fluid.io.load_inference_model(infer_model_path, self._exe)
-        # pred_prog, self._pred_input_varname_list, self._pred_fetch_var_list = \
-        #     fluid.io.load_inference_model(infer_model_path, self._exe, params_filename='__params__')
-        print(self._name+': inference model loaded from ' + infer_model_path)
-        return pred_prog
-    @property
-    def name(self):
-        return self._name
-    @property
-    def Reader(self):
-        return self._Reader
-    @Reader.setter
-    def Reader(self, cls):
-        assert base_reader.__name__ == cls.__bases__[-1].__name__, \
-            "expect: {}, receive: {}.".format(base_reader.__name__, \
-                                              cls.__bases__[-1].__name__)
-        self._Reader = cls
-    @property
-    def Paradigm(self):
-        return self._Paradigm
-    @Paradigm.setter
-    def Paradigm(self, cls):
-        assert base_paradigm.__name__ == cls.__bases__[-1].__name__, \
-            "expect: {}, receive: {}.".format(base_paradigm.__name__, \
-                                              cls.__bases__[-1].__name__)
-        self._Paradigm = cls
-    @property
-    def config(self):
-        return self._config
-    @property
-    def reader(self):
-        return self._reader
-    @property
-    def pred_input(self):
-        return zip(*[self._pred_input_name_list, self._pred_input_varname_list])
-    @pred_input.setter
-    def pred_input(self, val):
-        assert isinstance(val, dict)
-        self._pred_input_name_list, self._pred_input_varname_list = \
-            zip(*[[k, v.name] for k,v in val.items()])
-        # print(self._pred_input_name_list)
-    @property
-    def pred_fetch_list(self):
-        return [self._pred_fetch_name_list, self._pred_fetch_var_list]
-    @property
-    def task_layer(self):
-        return self._task_layer
-    @property
-    def is_first_target(self):
-        return self._is_first_target
-    @is_first_target.setter
-    def is_first_target(self, value):
-        self._is_first_target = bool(value)
-        if self._is_first_target:
-            assert self._is_target, "ERROR: only target task could be set as main task."
-        if self._verbose and self._is_first_target:
-            print("{}: set as main task".format(self._name))
-    @property
-    def is_target(self):
-        if self._is_target is not None:
-            return self._is_target
-        else:
-            raise ValueError("{}: is_target is None".format(self._name))
-    @is_target.setter
-    def is_target(self, value):
-        self._is_target = bool(value)
-        if self._verbose:
-            if self._is_target:
-                print('{}: set as target task.'.format(self._name))
-            else:
-                print('{}: set as aux task.'.format(self._name))
-    @property
-    def mix_ratio(self):
-        if self._mix_ratio is not None:
-            return self._mix_ratio
-        else:
-            raise ValueError("{}: mix_ratio is None".format(self._name))
-    @mix_ratio.setter
-    def mix_ratio(self, value):
-        self._mix_ratio = float(value)
-        if self._verbose:
-            print('{}: mix_ratio is set to {}'.format(self._name, self._mix_ratio))
-    @property
-    def expected_train_steps(self):
-        return self._expected_train_steps
-    @expected_train_steps.setter
-    def expected_train_steps(self, value):
-        self._expected_train_steps = value
-        self._expected_train_epochs = value / float(self._steps_pur_epoch)
-    @property
-    def expected_train_epochs(self):
-        return self._expected_train_epochs
-    @property
-    def cur_train_epoch(self):
-        return self._cur_train_epoch
-    @cur_train_epoch.setter
-    def cur_train_epoch(self, value):
-        self._cur_train_epoch = value
-    @property
-    def cur_train_step(self):
-        return self._cur_train_step
-    @cur_train_step.setter
-    def cur_train_step(self, value):
-        self._cur_train_step = value
-        if self._cur_train_step > self._steps_pur_epoch:
-            self._cur_train_epoch += 1
-            self._cur_train_step = 1
-        if self._is_target and self._cur_train_step + self._cur_train_epoch * self._steps_pur_epoch >= self._expected_train_steps:
-            self._train_finish = True
-            print(self._name+': train finished!')
-            self.save()
-            # fluid.io.save_inference_model(self._save_infermodel_path, )
-    @property
-    def steps_pur_epoch(self):
-        return self._steps_pur_epoch
-    @steps_pur_epoch.setter
-    def steps_pur_epoch(self, value):
-        self._steps_pur_epoch = value
-    @property
-    def train_finish(self):
-        return self._train_finish
-    @property
-    def task_reuse_scope(self):
-        if self._task_reuse_scope is not None:
-            return self._task_reuse_scope
-        else:
-            raise ValueError("{}: task_reuse_scope is None".format(self._name))
-    @task_reuse_scope.setter
-    def task_reuse_scope(self, scope_name):
-        self._task_reuse_scope = str(scope_name)
-        if self._verbose:
-            print('{}: task_reuse_scope is set to {}'.format(self._name, self._task_reuse_scope))
-def check_instances(insts):
-    """to check ids, first_target"""
-    pass
-def _check_ids():
-    pass
-def _check_targets():
-    pass
-def _check_reuse_scopes():
-    pass
--- a/build/lib/paddlepalm/task_paradigm/__init__.py
+++ b/build/lib/paddlepalm/task_paradigm/__init__.py
--- a/build/lib/paddlepalm/task_paradigm/cls.py
+++ b/build/lib/paddlepalm/task_paradigm/cls.py
-# -*- coding: UTF-8 -*-
-#   Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserved.
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#     http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-import paddle.fluid as fluid
-from paddlepalm.interface import task_paradigm
-from paddle.fluid import layers
-class TaskParadigm(task_paradigm):
-    '''
-    classification
-    '''
-    def __init___(self, config, phase):
-        self._is_training = phase == 'train'
-        self.sent_emb_size = config['hidden_size']
-        self.num_classes = config['n_classes']
-    @property
-    def inputs_attrs(self):
-        return {'bakcbone': {"sentence_emb": [-1, self.sent_emb_size], 'float32']},
-                'reader': {"label_ids": [[-1, 1], 'int64']}}
-    @property
-    def outputs_attrs(self):
-        if self._is_training:
-            return {'loss': [[1], 'float32']}
-        else:
-            return {'logits': [-1, self.num_classes], 'float32'}
-    def build(self, **inputs):
-        sent_emb = inputs['backbone']['sentence_emb']
-        label_ids = inputs['reader']['label_ids']
-        logits = fluid.layers.fc(
-            input=ent_emb
-            size=self.num_classes,
-            param_attr=fluid.ParamAttr(
-                name="cls_out_w",
-                initializer=fluid.initializer.TruncatedNormal(scale=0.1)),
-            bias_attr=fluid.ParamAttr(
-                name="cls_out_b", initializer=fluid.initializer.Constant(0.)))
-        loss = fluid.layers.softmax_with_cross_entropy(
-            logits=logits, label=label_ids)
-        loss = layers.mean(loss)
-        if self._is_training:
-            return {"loss": loss}
-        else:
-            return {"logits":logits}
--- a/build/lib/paddlepalm/task_paradigm/match.py
+++ b/build/lib/paddlepalm/task_paradigm/match.py
-# -*- coding: UTF-8 -*-
-#   Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserved.
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#     http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-import paddle.fluid as fluid
-from paddlepalm.interface import task_paradigm
-from paddle.fluid import layers
-class TaskParadigm(task_paradigm):
-    '''
-    matching
-    '''
-    def __init__(self, config, phase, backbone_config=None):
-        self._is_training = phase == 'train'
-        self._hidden_size = backbone_config['hidden_size']
-    @property
-    def inputs_attrs(self):
-        if self._is_training:
-            reader = {"label_ids": [[-1, 1], 'int64']}
-        else:
-            reader = {}
-        bb = {"sentence_pair_embedding": [[-1, self._hidden_size], 'float32']}
-        return {'reader': reader, 'backbone': bb}
-    @property
-    def outputs_attrs(self):
-        if self._is_training:
-            return {"loss": [[1], 'float32']}
-        else:
-            return {"logits": [[-1, 1], 'float32']}
-    def build(self, inputs):
-        if self._is_training:
-            labels = inputs["reader"]["label_ids"] 
-        cls_feats = inputs["backbone"]["sentence_pair_embedding"]
-        cls_feats = fluid.layers.dropout(
-            x=cls_feats,
-            dropout_prob=0.1,
-            dropout_implementation="upscale_in_train")
-        logits = fluid.layers.fc(
-            input=cls_feats,
-            size=2,
-            param_attr=fluid.ParamAttr(
-                name="cls_out_w",
-                initializer=fluid.initializer.TruncatedNormal(scale=0.02)),
-            bias_attr=fluid.ParamAttr(
-                name="cls_out_b",
-                initializer=fluid.initializer.Constant(0.)))
-        if self._is_training:
-            ce_loss, probs = fluid.layers.softmax_with_cross_entropy(
-                logits=logits, label=labels, return_softmax=True)
-            loss = fluid.layers.mean(x=ce_loss)
-            return {'loss': loss}
-        else:
-            return {'logits': logits}
--- a/build/lib/paddlepalm/task_paradigm/mlm.py
+++ b/build/lib/paddlepalm/task_paradigm/mlm.py
-# -*- coding: UTF-8 -*-
-#   Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserved.
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#     http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-import paddle.fluid as fluid
-from paddlepalm.interface import task_paradigm
-from paddle.fluid import layers
-class TaskParadigm(task_paradigm):
-    '''
-    matching
-    '''
-    def __init__(self, config, phase, backbone_config=None):
-        self._is_training = phase == 'train'
-        self._hidden_size = backbone_config['hidden_size']
-        self._vocab_size = backbone_config['vocab_size']
-        self._hidden_act = backbone_config['hidden_act']
-        self._initializer_range = backbone_config['initializer_range']
-    @property
-    def inputs_attrs(self):
-        if self._is_training:
-            reader = {"label_ids": [[-1, 1], 'int64']}
-        else:
-            reader = {}
-        bb = {"encoder_outputs": [[-1, self._hidden_size], 'float32']}
-        return {'reader': reader, 'backbone': bb}
-    @property
-    def outputs_attrs(self):
-        if self._is_training:
-            return {"loss": [[1], 'float32']}
-        else:
-            return {"logits": [[-1, 1], 'float32']}
-    def build(self, inputs):
-        mask_label = inputs["reader"]["mask_label"] 
-        mask_pos = inputs["reader"]["mask_pos"] 
-        word_emb = inputs["backbone"]["word_embedding"]
-        enc_out = inputs["backbone"]["encoder_outputs"]
-        emb_size = word_emb.shape[-1]
-        _param_initializer = fluid.initializer.TruncatedNormal(
-            scale=self._initializer_range)
-        mask_pos = fluid.layers.cast(x=mask_pos, dtype='int32')
-        reshaped_emb_out = fluid.layers.reshape(
-            x=enc_out, shape=[-1, emb_size])
-        # extract masked tokens' feature
-        mask_feat = fluid.layers.gather(input=reshaped_emb_out, index=mask_pos)
-        num_seqs = fluid.layers.fill_constant(shape=[1], value=512, dtype='int64')
-        # transform: fc
-        mask_trans_feat = fluid.layers.fc(
-            input=mask_feat,
-            size=emb_size,
-            act=self._hidden_act,
-            param_attr=fluid.ParamAttr(
-                name='mask_lm_trans_fc.w_0',
-                initializer=_param_initializer),
-            bias_attr=fluid.ParamAttr(name='mask_lm_trans_fc.b_0'))
-        # transform: layer norm
-        mask_trans_feat = pre_process_layer(
-            mask_trans_feat, 'n', name='mask_lm_trans')
-        mask_lm_out_bias_attr = fluid.ParamAttr(
-            name="mask_lm_out_fc.b_0",
-            initializer=fluid.initializer.Constant(value=0.0))
-        # print fluid.default_main_program().global_block()
-        # fc_out = fluid.layers.matmul(
-        #     x=mask_trans_feat,
-        #     y=fluid.default_main_program().global_block().var(
-        #         _word_emb_name),
-        #     transpose_y=True)
-        fc_out = fluid.layers.matmul(
-            x=mask_trans_feat,
-            y=word_emb,
-            transpose_y=True)
-        fc_out += fluid.layers.create_parameter(
-            shape=[self._vocab_size],
-            dtype='float32',
-            attr=mask_lm_out_bias_attr,
-            is_bias=True)
-        mask_lm_loss = fluid.layers.softmax_with_cross_entropy(
-            logits=fc_out, label=mask_label)
-        loss = fluid.layers.mean(mask_lm_loss)
-        if self._is_training:
-            return {'loss': loss}
-        else:
-            return None
--- a/build/lib/paddlepalm/task_paradigm/mrc.py
+++ b/build/lib/paddlepalm/task_paradigm/mrc.py
-# -*- coding: UTF-8 -*-
-#   Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserved.
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#     http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-import paddle.fluid as fluid
-from paddlepalm.interface import task_paradigm
-import collections
-import numpy as np
-import os
-import math
-import six
-import paddlepalm.tokenizer.ernie_tokenizer as tokenization
-import json
-RawResult = collections.namedtuple("RawResult",
-                                   ["unique_id", "start_logits", "end_logits"])
-class TaskParadigm(task_paradigm):
-    """"""
-    def __init__(self, config, phase, backbone_config=None):
-        self._is_training = phase == 'train'
-        self._max_sequence_length = config['max_seq_len']
-        self._hidden_size = backbone_config['hidden_size']
-        self._pred_results = []
-        if phase == 'pred':
-            self._max_answer_length = config.get('max_answer_len', None)
-            self._null_score_diff_threshold = config.get('null_score_diff_threshold', 0.0)
-            self._n_best_size = config.get('n_best_size', 20)
-            self._pred_output_path = config.get('pred_output_path', None)
-            self._verbose = config.get('verbose', False)
-            self._with_negative = config.get('with_negative', False)
-            self._do_lower_case = config.get('do_lower_case', False)
-    @property
-    def inputs_attrs(self):
-        if self._is_training:
-            reader = {"start_positions": [[-1, 1], 'int64'],
-                      "end_positions": [[-1, 1], 'int64']}
-        else:
-            reader = {'unique_ids': [[-1, 1], 'int64']}
-        bb = {"encoder_outputs": [[-1, -1, self._hidden_size], 'float32']}
-        return {'reader': reader, 'backbone': bb}
-    @property
-    def epoch_inputs_attrs(self):
-        if not self._is_training:
-            from_reader = {'examples': None, 'features': None}
-            return {'reader': from_reader}
-    @property
-    def outputs_attr(self):
-        if self._is_training:
-            return {'loss': [[1], 'float32']}
-        else:
-            return {'start_logits': [[-1, -1, 1], 'float32'],
-                    'end_logits': [[-1, -1, 1], 'float32'],
-                    'unique_ids': [[-1, 1], 'int64']}
-    def build(self, inputs):
-        if self._is_training:
-            start_positions = inputs['reader']['start_positions']
-            end_positions = inputs['reader']['end_positions']
-        else:
-            unique_id = inputs['reader']['unique_ids']
-        enc_out = inputs['backbone']['encoder_outputs']
-        logits = fluid.layers.fc(
-            input=enc_out,
-            size=2,
-            num_flatten_dims=2,
-            param_attr=fluid.ParamAttr(
-                name="cls_squad_out_w",
-                initializer=fluid.initializer.TruncatedNormal(scale=0.02)),
-            bias_attr=fluid.ParamAttr(
-                name="cls_squad_out_b", initializer=fluid.initializer.Constant(0.)))
-        logits = fluid.layers.transpose(x=logits, perm=[2, 0, 1])
-        start_logits, end_logits = fluid.layers.unstack(x=logits, axis=0)
-        def _compute_single_loss(logits, positions):
-            """Compute start/end loss for mrc model"""
-            loss = fluid.layers.softmax_with_cross_entropy(
-                logits=logits, label=positions)
-            loss = fluid.layers.mean(x=loss)
-            return loss
-        if self._is_training:
-            start_loss = _compute_single_loss(start_logits, start_positions)
-            end_loss = _compute_single_loss(end_logits, end_positions)
-            total_loss = (start_loss + end_loss) / 2.0
-            return {'loss': total_loss}
-        else:
-            return {'start_logits': start_logits,
-                    'end_logits': end_logits,
-                    'unique_ids': unique_id}
-    def postprocess(self, rt_outputs):
-        """this func will be called after each step(batch) of training/evaluating/predicting process."""
-        if not self._is_training:
-            unique_ids = np.squeeze(rt_outputs['unique_ids'], -1)
-            start_logits = rt_outputs['start_logits']
-            end_logits = rt_outputs['end_logits']
-            for idx in range(len(unique_ids)):
-                if unique_ids[idx] < 0:
-                    continue
-                if len(self._pred_results) % 1000 == 0:
-                    print("Predicting example: {}".format(len(self._pred_results)))
-                uid = int(unique_ids[idx])
-                s = [float(x) for x in start_logits[idx].flat]
-                e = [float(x) for x in end_logits[idx].flat]
-                self._pred_results.append(
-                    RawResult(
-                        unique_id=uid,
-                        start_logits=s,
-                        end_logits=e))
-    def epoch_postprocess(self, post_inputs):
-        """(optional interface) this func will be called after evaluation/predicting process and each epoch during training process."""
-        if not self._is_training:
-            if self._pred_output_path is None:
-                raise ValueError('argument pred_output_path not found in config. Please add it into config dict/file.')
-            examples = post_inputs['reader']['examples']
-            features = post_inputs['reader']['features']
-            if not os.path.exists(self._pred_output_path):
-                os.makedirs(self._pred_output_path)
-            output_prediction_file = os.path.join(self._pred_output_path, "predictions.json")
-            output_nbest_file = os.path.join(self._pred_output_path, "nbest_predictions.json")
-            output_null_log_odds_file = os.path.join(self._pred_output_path, "null_odds.json")
-            _write_predictions(examples, features, self._pred_results,
-                              self._n_best_size, self._max_answer_length,
-                              self._do_lower_case, output_prediction_file,
-                              output_nbest_file, output_null_log_odds_file,
-                              self._with_negative,
-                              self._null_score_diff_threshold, self._verbose)
-def _write_predictions(all_examples, all_features, all_results, n_best_size,
-                      max_answer_length, do_lower_case, output_prediction_file,
-                      output_nbest_file, output_null_log_odds_file,
-                      with_negative, null_score_diff_threshold,
-                      verbose):
-    """Write final predictions to the json file and log-odds of null if needed."""
-    print("Writing predictions to: %s" % (output_prediction_file))
-    print("Writing nbest to: %s" % (output_nbest_file))
-    example_index_to_features = collections.defaultdict(list)
-    for feature in all_features:
-        example_index_to_features[feature.example_index].append(feature)
-    unique_id_to_result = {}
-    for result in all_results:
-        unique_id_to_result[result.unique_id] = result
-    _PrelimPrediction = collections.namedtuple(  # pylint: disable=invalid-name
-        "PrelimPrediction", [
-            "feature_index", "start_index", "end_index", "start_logit",
-            "end_logit"
-        ])
-    all_predictions = collections.OrderedDict()
-    all_nbest_json = collections.OrderedDict()
-    scores_diff_json = collections.OrderedDict()
-    for (example_index, example) in enumerate(all_examples):
-        features = example_index_to_features[example_index]
-        prelim_predictions = []
-        # keep track of the minimum score of null start+end of position 0
-        score_null = 1000000  # large and positive
-        min_null_feature_index = 0  # the paragraph slice with min mull score
-        null_start_logit = 0  # the start logit at the slice with min null score
-        null_end_logit = 0  # the end logit at the slice with min null score
-        for (feature_index, feature) in enumerate(features):
-            result = unique_id_to_result[feature.unique_id]
-            start_indexes = _get_best_indexes(result.start_logits, n_best_size)
-            end_indexes = _get_best_indexes(result.end_logits, n_best_size)
-            # if we could have irrelevant answers, get the min score of irrelevant
-            if with_negative:
-                feature_null_score = result.start_logits[0] + result.end_logits[
-                    0]
-                if feature_null_score < score_null:
-                    score_null = feature_null_score
-                    min_null_feature_index = feature_index
-                    null_start_logit = result.start_logits[0]
-                    null_end_logit = result.end_logits[0]
-            for start_index in start_indexes:
-                for end_index in end_indexes:
-                    # We could hypothetically create invalid predictions, e.g., predict
-                    # that the start of the span is in the question. We throw out all
-                    # invalid predictions.
-                    if start_index >= len(feature.tokens):
-                        continue
-                    if end_index >= len(feature.tokens):
-                        continue
-                    if start_index not in feature.token_to_orig_map:
-                        continue
-                    if end_index not in feature.token_to_orig_map:
-                        continue
-                    if not feature.token_is_max_context.get(start_index, False):
-                        continue
-                    if end_index < start_index:
-                        continue
-                    length = end_index - start_index + 1
-                    if length > max_answer_length:
-                        continue
-                    prelim_predictions.append(
-                        _PrelimPrediction(
-                            feature_index=feature_index,
-                            start_index=start_index,
-                            end_index=end_index,
-                            start_logit=result.start_logits[start_index],
-                            end_logit=result.end_logits[end_index]))
-        if with_negative:
-            prelim_predictions.append(
-                _PrelimPrediction(
-                    feature_index=min_null_feature_index,
-                    start_index=0,
-                    end_index=0,
-                    start_logit=null_start_logit,
-                    end_logit=null_end_logit))
-        prelim_predictions = sorted(
-            prelim_predictions,
-            key=lambda x: (x.start_logit + x.end_logit),
-            reverse=True)
-        _NbestPrediction = collections.namedtuple(  # pylint: disable=invalid-name
-            "NbestPrediction", ["text", "start_logit", "end_logit"])
-        seen_predictions = {}
-        nbest = []
-        for pred in prelim_predictions:
-            if len(nbest) >= n_best_size:
-                break
-            feature = features[pred.feature_index]
-            if pred.start_index > 0:  # this is a non-null prediction
-                tok_tokens = feature.tokens[pred.start_index:(pred.end_index + 1
-                                                              )]
-                orig_doc_start = feature.token_to_orig_map[pred.start_index]
-                orig_doc_end = feature.token_to_orig_map[pred.end_index]
-                orig_tokens = example.doc_tokens[orig_doc_start:(orig_doc_end +
-                                                                 1)]
-                tok_text = " ".join(tok_tokens)
-                # De-tokenize WordPieces that have been split off.
-                tok_text = tok_text.replace(" ##", "")
-                tok_text = tok_text.replace("##", "")
-                # Clean whitespace
-                tok_text = tok_text.strip()
-                tok_text = " ".join(tok_text.split())
-                orig_text = " ".join(orig_tokens)
-                final_text = _get_final_text(tok_text, orig_text, do_lower_case,
-                                            verbose)
-                if final_text in seen_predictions:
-                    continue
-                seen_predictions[final_text] = True
-            else:
-                final_text = ""
-                seen_predictions[final_text] = True
-            nbest.append(
-                _NbestPrediction(
-                    text=final_text,
-                    start_logit=pred.start_logit,
-                    end_logit=pred.end_logit))
-        # if we didn't inlude the empty option in the n-best, inlcude it
-        if with_negative:
-            if "" not in seen_predictions:
-                nbest.append(
-                    _NbestPrediction(
-                        text="",
-                        start_logit=null_start_logit,
-                        end_logit=null_end_logit))
-        # In very rare edge cases we could have no valid predictions. So we
-        # just create a nonce prediction in this case to avoid failure.
-        if not nbest:
-            nbest.append(
-                _NbestPrediction(
-                    text="empty", start_logit=0.0, end_logit=0.0))
-        assert len(nbest) >= 1
-        total_scores = []
-        best_non_null_entry = None
-        for entry in nbest:
-            total_scores.append(entry.start_logit + entry.end_logit)
-            if not best_non_null_entry:
-                if entry.text:
-                    best_non_null_entry = entry
-        # debug
-        if best_non_null_entry is None:
-            print("Emmm..., sth wrong")
-        probs = _compute_softmax(total_scores)
-        nbest_json = []
-        for (i, entry) in enumerate(nbest):
-            output = collections.OrderedDict()
-            output["text"] = entry.text
-            output["probability"] = probs[i]
-            output["start_logit"] = entry.start_logit
-            output["end_logit"] = entry.end_logit
-            nbest_json.append(output)
-        assert len(nbest_json) >= 1
-        if not with_negative:
-            all_predictions[example.qas_id] = nbest_json[0]["text"]
-        else:
-            # predict "" iff the null score - the score of best non-null > threshold
-            score_diff = score_null - best_non_null_entry.start_logit - (
-                best_non_null_entry.end_logit)
-            scores_diff_json[example.qas_id] = score_diff
-            if score_diff > null_score_diff_threshold:
-                all_predictions[example.qas_id] = ""
-            else:
-                all_predictions[example.qas_id] = best_non_null_entry.text
-        all_nbest_json[example.qas_id] = nbest_json
-    with open(output_prediction_file, "w") as writer:
-        writer.write(json.dumps(all_predictions, indent=4) + "\n")
-    with open(output_nbest_file, "w") as writer:
-        writer.write(json.dumps(all_nbest_json, indent=4) + "\n")
-    if with_negative:
-        with open(output_null_log_odds_file, "w") as writer:
-            writer.write(json.dumps(scores_diff_json, indent=4) + "\n")
-def _get_final_text(pred_text, orig_text, do_lower_case, verbose):
-    """Project the tokenized prediction back to the original text."""
-    # When we created the data, we kept track of the alignment between original
-    # (whitespace tokenized) tokens and our WordPiece tokenized tokens. So
-    # now `orig_text` contains the span of our original text corresponding to the
-    # span that we predicted.
-    #
-    # However, `orig_text` may contain extra characters that we don't want in
-    # our prediction.
-    #
-    # For example, let's say:
-    #   pred_text = steve smith
-    #   orig_text = Steve Smith's
-    #
-    # We don't want to return `orig_text` because it contains the extra "'s".
-    #
-    # We don't want to return `pred_text` because it's already been normalized
-    # (the MRQA eval script also does punctuation stripping/lower casing but
-    # our tokenizer does additional normalization like stripping accent
-    # characters).
-    #
-    # What we really want to return is "Steve Smith".
-    #
-    # Therefore, we have to apply a semi-complicated alignment heruistic between
-    # `pred_text` and `orig_text` to get a character-to-charcter alignment. This
-    # can fail in certain cases in which case we just return `orig_text`.
-    def _strip_spaces(text):
-        ns_chars = []
-        ns_to_s_map = collections.OrderedDict()
-        for (i, c) in enumerate(text):
-            if c == " ":
-                continue
-            ns_to_s_map[len(ns_chars)] = i
-            ns_chars.append(c)
-        ns_text = "".join(ns_chars)
-        return (ns_text, ns_to_s_map)
-    # We first tokenize `orig_text`, strip whitespace from the result
-    # and `pred_text`, and check if they are the same length. If they are
-    # NOT the same length, the heuristic has failed. If they are the same
-    # length, we assume the characters are one-to-one aligned.
-    tokenizer = tokenization.BasicTokenizer(do_lower_case=do_lower_case)
-    tok_text = " ".join(tokenizer.tokenize(orig_text))
-    start_position = tok_text.find(pred_text)
-    if start_position == -1:
-        if verbose:
-            print("Unable to find text: '%s' in '%s'" % (pred_text, orig_text))
-        return orig_text
-    end_position = start_position + len(pred_text) - 1
-    (orig_ns_text, orig_ns_to_s_map) = _strip_spaces(orig_text)
-    (tok_ns_text, tok_ns_to_s_map) = _strip_spaces(tok_text)
-    if len(orig_ns_text) != len(tok_ns_text):
-        if verbose:
-            print("Length not equal after stripping spaces: '%s' vs '%s'",
-                  orig_ns_text, tok_ns_text)
-        return orig_text
-    # We then project the characters in `pred_text` back to `orig_text` using
-    # the character-to-character alignment.
-    tok_s_to_ns_map = {}
-    for (i, tok_index) in six.iteritems(tok_ns_to_s_map):
-        tok_s_to_ns_map[tok_index] = i
-    orig_start_position = None
-    if start_position in tok_s_to_ns_map:
-        ns_start_position = tok_s_to_ns_map[start_position]
-        if ns_start_position in orig_ns_to_s_map:
-            orig_start_position = orig_ns_to_s_map[ns_start_position]
-    if orig_start_position is None:
-        if verbose:
-            print("Couldn't map start position")
-        return orig_text
-    orig_end_position = None
-    if end_position in tok_s_to_ns_map:
-        ns_end_position = tok_s_to_ns_map[end_position]
-        if ns_end_position in orig_ns_to_s_map:
-            orig_end_position = orig_ns_to_s_map[ns_end_position]
-    if orig_end_position is None:
-        if verbose:
-            print("Couldn't map end position")
-        return orig_text
-    output_text = orig_text[orig_start_position:(orig_end_position + 1)]
-    return output_text
-def _get_best_indexes(logits, n_best_size):
-    """Get the n-best logits from a list."""
-    index_and_score = sorted(
-        enumerate(logits), key=lambda x: x[1], reverse=True)
-    best_indexes = []
-    for i in range(len(index_and_score)):
-        if i >= n_best_size:
-            break
-        best_indexes.append(index_and_score[i][0])
-    return best_indexes
-def _compute_softmax(scores):
-    """Compute softmax probability over raw logits."""
-    if not scores:
-        return []
-    max_score = None
-    for score in scores:
-        if max_score is None or score > max_score:
-            max_score = score
-    exp_scores = []
-    total_sum = 0.0
-    for score in scores:
-        x = math.exp(score - max_score)
-        exp_scores.append(x)
-        total_sum += x
-    probs = []
-    for score in exp_scores:
-        probs.append(score / total_sum)
-    return probs
--- a/build/lib/paddlepalm/tokenizer/__init__.py
+++ b/build/lib/paddlepalm/tokenizer/__init__.py
--- a/build/lib/paddlepalm/tokenizer/bert_tokenizer.py
+++ b/build/lib/paddlepalm/tokenizer/bert_tokenizer.py
-# -*- coding: UTF-8 -*-
-#   Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserved.
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#     http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-"""Tokenization classes."""
-from __future__ import absolute_import
-from __future__ import division
-from __future__ import print_function
-import collections
-import unicodedata
-import six
-def convert_to_unicode(text):
-    """Converts `text` to Unicode (if it's not already), assuming utf-8 input."""
-    if six.PY3:
-        if isinstance(text, str):
-            return text
-        elif isinstance(text, bytes):
-            return text.decode("utf-8", "ignore")
-        else:
-            raise ValueError("Unsupported string type: %s" % (type(text)))
-    elif six.PY2:
-        if isinstance(text, str):
-            return text.decode("utf-8", "ignore")
-        elif isinstance(text, unicode):
-            return text
-        else:
-            raise ValueError("Unsupported string type: %s" % (type(text)))
-    else:
-        raise ValueError("Not running on Python2 or Python 3?")
-def printable_text(text):
-    """Returns text encoded in a way suitable for print or `tf.logging`."""
-    # These functions want `str` for both Python2 and Python3, but in one case
-    # it's a Unicode string and in the other it's a byte string.
-    if six.PY3:
-        if isinstance(text, str):
-            return text
-        elif isinstance(text, bytes):
-            return text.decode("utf-8", "ignore")
-        else:
-            raise ValueError("Unsupported string type: %s" % (type(text)))
-    elif six.PY2:
-        if isinstance(text, str):
-            return text
-        elif isinstance(text, unicode):
-            return text.encode("utf-8")
-        else:
-            raise ValueError("Unsupported string type: %s" % (type(text)))
-    else:
-        raise ValueError("Not running on Python2 or Python 3?")
-def load_vocab(vocab_file):
-    """Loads a vocabulary file into a dictionary."""
-    vocab = collections.OrderedDict()
-    fin = open(vocab_file)
-    for num, line in enumerate(fin):
-        items = convert_to_unicode(line.strip()).split("\t")
-        if len(items) > 2:
-            break
-        token = items[0]
-        index = items[1] if len(items) == 2 else num
-        token = token.strip()
-        vocab[token] = int(index)
-    return vocab
-def convert_by_vocab(vocab, items):
-    """Converts a sequence of [tokens|ids] using the vocab."""
-    output = []
-    for item in items:
-        output.append(vocab[item])
-    return output
-def convert_tokens_to_ids(vocab, tokens):
-    return convert_by_vocab(vocab, tokens)
-def convert_ids_to_tokens(inv_vocab, ids):
-    return convert_by_vocab(inv_vocab, ids)
-def whitespace_tokenize(text):
-    """Runs basic whitespace cleaning and splitting on a peice of text."""
-    text = text.strip()
-    if not text:
-        return []
-    tokens = text.split()
-    return tokens
-class FullTokenizer(object):
-    """Runs end-to-end tokenziation."""
-    def __init__(self, vocab_file, do_lower_case=True):
-        self.vocab = load_vocab(vocab_file)
-        self.inv_vocab = {v: k for k, v in self.vocab.items()}
-        self.basic_tokenizer = BasicTokenizer(do_lower_case=do_lower_case)
-        self.wordpiece_tokenizer = WordpieceTokenizer(vocab=self.vocab)
-    def tokenize(self, text):
-        split_tokens = []
-        for token in self.basic_tokenizer.tokenize(text):
-            for sub_token in self.wordpiece_tokenizer.tokenize(token):
-                split_tokens.append(sub_token)
-        return split_tokens
-    def convert_tokens_to_ids(self, tokens):
-        return convert_by_vocab(self.vocab, tokens)
-    def convert_ids_to_tokens(self, ids):
-        return convert_by_vocab(self.inv_vocab, ids)
-class CharTokenizer(object):
-    """Runs end-to-end tokenziation."""
-    def __init__(self, vocab_file, do_lower_case=True):
-        self.vocab = load_vocab(vocab_file)
-        self.inv_vocab = {v: k for k, v in self.vocab.items()}
-        self.wordpiece_tokenizer = WordpieceTokenizer(vocab=self.vocab)
-    def tokenize(self, text):
-        split_tokens = []
-        for token in text.lower().split(" "):
-            for sub_token in self.wordpiece_tokenizer.tokenize(token):
-                split_tokens.append(sub_token)
-        return split_tokens
-    def convert_tokens_to_ids(self, tokens):
-        return convert_by_vocab(self.vocab, tokens)
-    def convert_ids_to_tokens(self, ids):
-        return convert_by_vocab(self.inv_vocab, ids)
-class BasicTokenizer(object):
-    """Runs basic tokenization (punctuation splitting, lower casing, etc.)."""
-    def __init__(self, do_lower_case=True):
-        """Constructs a BasicTokenizer.
-        Args:
-            do_lower_case: Whether to lower case the input.
-        """
-        self.do_lower_case = do_lower_case
-        self._never_lowercase = ['[UNK]', '[SEP]', '[PAD]', '[CLS]', '[MASK]']
-    def tokenize(self, text):
-        """Tokenizes a piece of text."""
-        text = convert_to_unicode(text)
-        text = self._clean_text(text)
-        # This was added on November 1st, 2018 for the multilingual and Chinese
-        # models. This is also applied to the English models now, but it doesn't
-        # matter since the English models were not trained on any Chinese data
-        # and generally don't have any Chinese data in them (there are Chinese
-        # characters in the vocabulary because Wikipedia does have some Chinese
-        # words in the English Wikipedia.).
-        text = self._tokenize_chinese_chars(text)
-        orig_tokens = whitespace_tokenize(text)
-        split_tokens = []
-        for token in orig_tokens:
-            if self.do_lower_case and token not in self._never_lowercase:
-                token = token.lower()
-                token = self._run_strip_accents(token)
-            if token in self._never_lowercase:
-                split_tokens.extend([token])
-            else:
-                split_tokens.extend(self._run_split_on_punc(token))
-        output_tokens = whitespace_tokenize(" ".join(split_tokens))
-        return output_tokens
-    def _run_strip_accents(self, text):
-        """Strips accents from a piece of text."""
-        text = unicodedata.normalize("NFD", text)
-        output = []
-        for char in text:
-            cat = unicodedata.category(char)
-            if cat == "Mn":
-                continue
-            output.append(char)
-        return "".join(output)
-    def _run_split_on_punc(self, text):
-        """Splits punctuation on a piece of text."""
-        chars = list(text)
-        i = 0
-        start_new_word = True
-        output = []
-        while i < len(chars):
-            char = chars[i]
-            if _is_punctuation(char):
-                output.append([char])
-                start_new_word = True
-            else:
-                if start_new_word:
-                    output.append([])
-                start_new_word = False
-                output[-1].append(char)
-            i += 1
-        return ["".join(x) for x in output]
-    def _tokenize_chinese_chars(self, text):
-        """Adds whitespace around any CJK character."""
-        output = []
-        for char in text:
-            cp = ord(char)
-            if self._is_chinese_char(cp):
-                output.append(" ")
-                output.append(char)
-                output.append(" ")
-            else:
-                output.append(char)
-        return "".join(output)
-    def _is_chinese_char(self, cp):
-        """Checks whether CP is the codepoint of a CJK character."""
-        # This defines a "chinese character" as anything in the CJK Unicode block:
-        #     https://en.wikipedia.org/wiki/CJK_Unified_Ideographs_(Unicode_block)
-        #
-        # Note that the CJK Unicode block is NOT all Japanese and Korean characters,
-        # despite its name. The modern Korean Hangul alphabet is a different block,
-        # as is Japanese Hiragana and Katakana. Those alphabets are used to write
-        # space-separated words, so they are not treated specially and handled
-        # like the all of the other languages.
-        if ((cp >= 0x4E00 and cp <= 0x9FFF) or  #
-            (cp >= 0x3400 and cp <= 0x4DBF) or  #
-            (cp >= 0x20000 and cp <= 0x2A6DF) or  #
-            (cp >= 0x2A700 and cp <= 0x2B73F) or  #
-            (cp >= 0x2B740 and cp <= 0x2B81F) or  #
-            (cp >= 0x2B820 and cp <= 0x2CEAF) or
-            (cp >= 0xF900 and cp <= 0xFAFF) or  #
-            (cp >= 0x2F800 and cp <= 0x2FA1F)):  #
-            return True
-        return False
-    def _clean_text(self, text):
-        """Performs invalid character removal and whitespace cleanup on text."""
-        output = []
-        for char in text:
-            cp = ord(char)
-            if cp == 0 or cp == 0xfffd or _is_control(char):
-                continue
-            if _is_whitespace(char):
-                output.append(" ")
-            else:
-                output.append(char)
-        return "".join(output)
-class WordpieceTokenizer(object):
-    """Runs WordPiece tokenziation."""
-    def __init__(self, vocab, unk_token="[UNK]", max_input_chars_per_word=100):
-        self.vocab = vocab
-        self.unk_token = unk_token
-        self.max_input_chars_per_word = max_input_chars_per_word
-    def tokenize(self, text):
-        """Tokenizes a piece of text into its word pieces.
-        This uses a greedy longest-match-first algorithm to perform tokenization
-        using the given vocabulary.
-        For example:
-            input = "unaffable"
-            output = ["un", "##aff", "##able"]
-        Args:
-            text: A single token or whitespace separated tokens. This should have
-                already been passed through `BasicTokenizer.
-        Returns:
-            A list of wordpiece tokens.
-        """
-        text = convert_to_unicode(text)
-        output_tokens = []
-        for token in whitespace_tokenize(text):
-            chars = list(token)
-            if len(chars) > self.max_input_chars_per_word:
-                output_tokens.append(self.unk_token)
-                continue
-            is_bad = False
-            start = 0
-            sub_tokens = []
-            while start < len(chars):
-                end = len(chars)
-                cur_substr = None
-                while start < end:
-                    substr = "".join(chars[start:end])
-                    if start > 0:
-                        substr = "##" + substr
-                    if substr in self.vocab:
-                        cur_substr = substr
-                        break
-                    end -= 1
-                if cur_substr is None:
-                    is_bad = True
-                    break
-                sub_tokens.append(cur_substr)
-                start = end
-            if is_bad:
-                output_tokens.append(self.unk_token)
-            else:
-                output_tokens.extend(sub_tokens)
-        return output_tokens
-def _is_whitespace(char):
-    """Checks whether `chars` is a whitespace character."""
-    # \t, \n, and \r are technically contorl characters but we treat them
-    # as whitespace since they are generally considered as such.
-    if char == " " or char == "\t" or char == "\n" or char == "\r":
-        return True
-    cat = unicodedata.category(char)
-    if cat == "Zs":
-        return True
-    return False
-def _is_control(char):
-    """Checks whether `chars` is a control character."""
-    # These are technically control characters but we count them as whitespace
-    # characters.
-    if char == "\t" or char == "\n" or char == "\r":
-        return False
-    cat = unicodedata.category(char)
-    if cat.startswith("C"):
-        return True
-    return False
-def _is_punctuation(char):
-    """Checks whether `chars` is a punctuation character."""
-    cp = ord(char)
-    # We treat all non-letter/number ASCII as punctuation.
-    # Characters such as "^", "$", and "`" are not in the Unicode
-    # Punctuation class but we treat them as punctuation anyways, for
-    # consistency.
-    if ((cp >= 33 and cp <= 47) or (cp >= 58 and cp <= 64) or
-        (cp >= 91 and cp <= 96) or (cp >= 123 and cp <= 126)):
-        return True
-    cat = unicodedata.category(char)
-    if cat.startswith("P"):
-        return True
-    return False
--- a/build/lib/paddlepalm/tokenizer/ernie_tokenizer.py
+++ b/build/lib/paddlepalm/tokenizer/ernie_tokenizer.py
-# -*- coding: UTF-8 -*-
-#   Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserved.
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#     http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-"""Tokenization classes."""
-from __future__ import absolute_import
-from __future__ import division
-from __future__ import print_function
-from __future__ import unicode_literals
-from __future__ import absolute_import
-from io import open
-import collections
-import unicodedata
-import six
-def convert_to_unicode(text):
-    """Converts `text` to Unicode (if it's not already), assuming utf-8 input."""
-    if six.PY3:
-        if isinstance(text, str):
-            return text
-        elif isinstance(text, bytes):
-            return text.decode("utf-8", "ignore")
-        else:
-            raise ValueError("Unsupported string type: %s" % (type(text)))
-    elif six.PY2:
-        if isinstance(text, str):
-            return text.decode("utf-8", "ignore")
-        elif isinstance(text, unicode):
-            return text
-        else:
-            raise ValueError("Unsupported string type: %s" % (type(text)))
-    else:
-        raise ValueError("Not running on Python2 or Python 3?")
-def printable_text(text):
-    """Returns text encoded in a way suitable for print or `tf.logging`."""
-    # These functions want `str` for both Python2 and Python3, but in one case
-    # it's a Unicode string and in the other it's a byte string.
-    if six.PY3:
-        if isinstance(text, str):
-            return text
-        elif isinstance(text, bytes):
-            return text.decode("utf-8", "ignore")
-        else:
-            raise ValueError("Unsupported string type: %s" % (type(text)))
-    elif six.PY2:
-        if isinstance(text, str):
-            return text
-        elif isinstance(text, unicode):
-            return text.encode("utf-8")
-        else:
-            raise ValueError("Unsupported string type: %s" % (type(text)))
-    else:
-        raise ValueError("Not running on Python2 or Python 3?")
-def load_vocab(vocab_file):
-    """Loads a vocabulary file into a dictionary."""
-    vocab = collections.OrderedDict()
-    with open(vocab_file, encoding='utf8') as fin:
-        for num, line in enumerate(fin):
-            items = convert_to_unicode(line.strip()).split("\t")
-            if len(items) > 2:
-                break
-            token = items[0]
-            index = items[1] if len(items) == 2 else num
-            token = token.strip()
-            vocab[token] = int(index)
-    return vocab
-def convert_by_vocab(vocab, items):
-    """Converts a sequence of [tokens|ids] using the vocab."""
-    output = []
-    for item in items:
-        output.append(vocab[item])
-    return output
-def convert_tokens_to_ids(vocab, tokens):
-    return convert_by_vocab(vocab, tokens)
-def convert_ids_to_tokens(inv_vocab, ids):
-    return convert_by_vocab(inv_vocab, ids)
-def whitespace_tokenize(text):
-    """Runs basic whitespace cleaning and splitting on a peice of text."""
-    text = text.strip()
-    if not text:
-        return []
-    tokens = text.split()
-    return tokens
-class FullTokenizer(object):
-    """Runs end-to-end tokenziation."""
-    def __init__(self, vocab_file, do_lower_case=True):
-        self.vocab = load_vocab(vocab_file)
-        self.inv_vocab = {v: k for k, v in self.vocab.items()}
-        self.basic_tokenizer = BasicTokenizer(do_lower_case=do_lower_case)
-        self.wordpiece_tokenizer = WordpieceTokenizer(vocab=self.vocab)
-    def tokenize(self, text):
-        split_tokens = []
-        for token in self.basic_tokenizer.tokenize(text):
-            for sub_token in self.wordpiece_tokenizer.tokenize(token):
-                split_tokens.append(sub_token)
-        return split_tokens
-    def convert_tokens_to_ids(self, tokens):
-        return convert_by_vocab(self.vocab, tokens)
-    def convert_ids_to_tokens(self, ids):
-        return convert_by_vocab(self.inv_vocab, ids)
-class CharTokenizer(object):
-    """Runs end-to-end tokenziation."""
-    def __init__(self, vocab_file, do_lower_case=True):
-        self.vocab = load_vocab(vocab_file)
-        self.inv_vocab = {v: k for k, v in self.vocab.items()}
-        self.wordpiece_tokenizer = WordpieceTokenizer(vocab=self.vocab)
-    def tokenize(self, text):
-        split_tokens = []
-        for token in text.lower().split(" "):
-            for sub_token in self.wordpiece_tokenizer.tokenize(token):
-                split_tokens.append(sub_token)
-        return split_tokens
-    def convert_tokens_to_ids(self, tokens):
-        return convert_by_vocab(self.vocab, tokens)
-    def convert_ids_to_tokens(self, ids):
-        return convert_by_vocab(self.inv_vocab, ids)
-class BasicTokenizer(object):
-    """Runs basic tokenization (punctuation splitting, lower casing, etc.)."""
-    def __init__(self, do_lower_case=True):
-        """Constructs a BasicTokenizer.
-        Args:
-            do_lower_case: Whether to lower case the input.
-        """
-        self.do_lower_case = do_lower_case
-    def tokenize(self, text):
-        """Tokenizes a piece of text."""
-        text = convert_to_unicode(text)
-        text = self._clean_text(text)
-        # This was added on November 1st, 2018 for the multilingual and Chinese
-        # models. This is also applied to the English models now, but it doesn't
-        # matter since the English models were not trained on any Chinese data
-        # and generally don't have any Chinese data in them (there are Chinese
-        # characters in the vocabulary because Wikipedia does have some Chinese
-        # words in the English Wikipedia.).
-        text = self._tokenize_chinese_chars(text)
-        orig_tokens = whitespace_tokenize(text)
-        split_tokens = []
-        for token in orig_tokens:
-            if self.do_lower_case:
-                token = token.lower()
-                token = self._run_strip_accents(token)
-            split_tokens.extend(self._run_split_on_punc(token))
-        output_tokens = whitespace_tokenize(" ".join(split_tokens))
-        return output_tokens
-    def _run_strip_accents(self, text):
-        """Strips accents from a piece of text."""
-        text = unicodedata.normalize("NFD", text)
-        output = []
-        for char in text:
-            cat = unicodedata.category(char)
-            if cat == "Mn":
-                continue
-            output.append(char)
-        return "".join(output)
-    def _run_split_on_punc(self, text):
-        """Splits punctuation on a piece of text."""
-        chars = list(text)
-        i = 0
-        start_new_word = True
-        output = []
-        while i < len(chars):
-            char = chars[i]
-            if _is_punctuation(char):
-                output.append([char])
-                start_new_word = True
-            else:
-                if start_new_word:
-                    output.append([])
-                start_new_word = False
-                output[-1].append(char)
-            i += 1
-        return ["".join(x) for x in output]
-    def _tokenize_chinese_chars(self, text):
-        """Adds whitespace around any CJK character."""
-        output = []
-        for char in text:
-            cp = ord(char)
-            if self._is_chinese_char(cp):
-                output.append(" ")
-                output.append(char)
-                output.append(" ")
-            else:
-                output.append(char)
-        return "".join(output)
-    def _is_chinese_char(self, cp):
-        """Checks whether CP is the codepoint of a CJK character."""
-        # This defines a "chinese character" as anything in the CJK Unicode block:
-        #     https://en.wikipedia.org/wiki/CJK_Unified_Ideographs_(Unicode_block)
-        #
-        # Note that the CJK Unicode block is NOT all Japanese and Korean characters,
-        # despite its name. The modern Korean Hangul alphabet is a different block,
-        # as is Japanese Hiragana and Katakana. Those alphabets are used to write
-        # space-separated words, so they are not treated specially and handled
-        # like the all of the other languages.
-        if ((cp >= 0x4E00 and cp <= 0x9FFF) or  #
-            (cp >= 0x3400 and cp <= 0x4DBF) or  #
-            (cp >= 0x20000 and cp <= 0x2A6DF) or  #
-            (cp >= 0x2A700 and cp <= 0x2B73F) or  #
-            (cp >= 0x2B740 and cp <= 0x2B81F) or  #
-            (cp >= 0x2B820 and cp <= 0x2CEAF) or
-            (cp >= 0xF900 and cp <= 0xFAFF) or  #
-            (cp >= 0x2F800 and cp <= 0x2FA1F)):  #
-            return True
-        return False
-    def _clean_text(self, text):
-        """Performs invalid character removal and whitespace cleanup on text."""
-        output = []
-        for char in text:
-            cp = ord(char)
-            if cp == 0 or cp == 0xfffd or _is_control(char):
-                continue
-            if _is_whitespace(char):
-                output.append(" ")
-            else:
-                output.append(char)
-        return "".join(output)
-class WordpieceTokenizer(object):
-    """Runs WordPiece tokenziation."""
-    def __init__(self, vocab, unk_token="[UNK]", max_input_chars_per_word=100):
-        self.vocab = vocab
-        self.unk_token = unk_token
-        self.max_input_chars_per_word = max_input_chars_per_word
-    def tokenize(self, text):
-        """Tokenizes a piece of text into its word pieces.
-        This uses a greedy longest-match-first algorithm to perform tokenization
-        using the given vocabulary.
-        For example:
-            input = "unaffable"
-            output = ["un", "##aff", "##able"]
-        Args:
-            text: A single token or whitespace separated tokens. This should have
-                already been passed through `BasicTokenizer.
-        Returns:
-            A list of wordpiece tokens.
-        """
-        text = convert_to_unicode(text)
-        output_tokens = []
-        for token in whitespace_tokenize(text):
-            chars = list(token)
-            if len(chars) > self.max_input_chars_per_word:
-                output_tokens.append(self.unk_token)
-                continue
-            is_bad = False
-            start = 0
-            sub_tokens = []
-            while start < len(chars):
-                end = len(chars)
-                cur_substr = None
-                while start < end:
-                    substr = "".join(chars[start:end])
-                    if start > 0:
-                        substr = "##" + substr
-                    if substr in self.vocab:
-                        cur_substr = substr
-                        break
-                    end -= 1
-                if cur_substr is None:
-                    is_bad = True
-                    break
-                sub_tokens.append(cur_substr)
-                start = end
-            if is_bad:
-                output_tokens.append(self.unk_token)
-            else:
-                output_tokens.extend(sub_tokens)
-        return output_tokens
-def _is_whitespace(char):
-    """Checks whether `chars` is a whitespace character."""
-    # \t, \n, and \r are technically contorl characters but we treat them
-    # as whitespace since they are generally considered as such.
-    if char == " " or char == "\t" or char == "\n" or char == "\r":
-        return True
-    cat = unicodedata.category(char)
-    if cat == "Zs":
-        return True
-    return False
-def _is_control(char):
-    """Checks whether `chars` is a control character."""
-    # These are technically control characters but we count them as whitespace
-    # characters.
-    if char == "\t" or char == "\n" or char == "\r":
-        return False
-    cat = unicodedata.category(char)
-    if cat.startswith("C"):
-        return True
-    return False
-def _is_punctuation(char):
-    """Checks whether `chars` is a punctuation character."""
-    cp = ord(char)
-    # We treat all non-letter/number ASCII as punctuation.
-    # Characters such as "^", "$", and "`" are not in the Unicode
-    # Punctuation class but we treat them as punctuation anyways, for
-    # consistency.
-    if ((cp >= 33 and cp <= 47) or (cp >= 58 and cp <= 64) or
-        (cp >= 91 and cp <= 96) or (cp >= 123 and cp <= 126)):
-        return True
-    cat = unicodedata.category(char)
-    if cat.startswith("P"):
-        return True
-    return False
-def tokenize_chinese_chars(text):
-    """Adds whitespace around any CJK character."""
-    def _is_chinese_char(cp):
-        """Checks whether CP is the codepoint of a CJK character."""
-        # This defines a "chinese character" as anything in the CJK Unicode block:
-        #     https://en.wikipedia.org/wiki/CJK_Unified_Ideographs_(Unicode_block)
-        #
-        # Note that the CJK Unicode block is NOT all Japanese and Korean characters,
-        # despite its name. The modern Korean Hangul alphabet is a different block,
-        # as is Japanese Hiragana and Katakana. Those alphabets are used to write
-        # space-separated words, so they are not treated specially and handled
-        # like the all of the other languages.
-        if ((cp >= 0x4E00 and cp <= 0x9FFF) or  #
-            (cp >= 0x3400 and cp <= 0x4DBF) or  #
-            (cp >= 0x20000 and cp <= 0x2A6DF) or  #
-            (cp >= 0x2A700 and cp <= 0x2B73F) or  #
-            (cp >= 0x2B740 and cp <= 0x2B81F) or  #
-            (cp >= 0x2B820 and cp <= 0x2CEAF) or
-            (cp >= 0xF900 and cp <= 0xFAFF) or  #
-            (cp >= 0x2F800 and cp <= 0x2FA1F)):  #
-            return True
-        return False
-    def _is_whitespace(c):
-        if c == " " or c == "\t" or c == "\r" or c == "\n" or ord(c) == 0x202F:
-            return True
-        return False
-    output = []
-    buff = ""
-    for char in text:
-        cp = ord(char)
-        if _is_chinese_char(cp) or _is_whitespace(char):
-            if buff != "":
-                output.append(buff)
-                buff = ""
-            output.append(char)
-        else:
-            buff += char
-    if buff != "":
-        output.append(buff)
-    return output
--- a/build/lib/paddlepalm/utils/__init__.py
+++ b/build/lib/paddlepalm/utils/__init__.py
--- a/build/lib/paddlepalm/utils/config_helper.py
+++ b/build/lib/paddlepalm/utils/config_helper.py
-# -*- coding: UTF-8 -*-
-#   Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserved.
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#     http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-from __future__ import absolute_import
-from __future__ import division
-from __future__ import print_function
-import os
-import sys
-import argparse
-import json
-import yaml
-import six
-import logging
-logging_only_message = "%(message)s"
-logging_details = "%(asctime)s.%(msecs)03d %(levelname)s %(module)s - %(funcName)s: %(message)s"
-class JsonConfig(object):
-    """
-    A high-level api for handling json configure file.
-    """
-    def __init__(self, config_path):
-        self._config_dict = self._parse(config_path)
-    def _parse(self, config_path):
-        try:
-            with open(config_path) as json_file:
-                config_dict = json.load(json_file)
-                assert isinstance(config_dict, dict), "Object in {} is NOT a dict.".format(config_path)
-        except:
-            raise IOError("Error in parsing bert model config file '%s'" %
-                          config_path)
-        else:
-            return config_dict
-    def __getitem__(self, key):
-        return self._config_dict[key]
-    def asdict(self):
-        return self._config_dict
-    def print_config(self):
-        for arg, value in sorted(six.iteritems(self._config_dict)):
-            print('%s: %s' % (arg, value))
-        print('------------------------------------------------')
-class ArgumentGroup(object):
-    def __init__(self, parser, title, des):
-        self._group = parser.add_argument_group(title=title, description=des)
-    def add_arg(self, name, type, default, help, **kwargs):
-        type = str2bool if type == bool else type
-        self._group.add_argument(
-            "--" + name,
-            default=default,
-            type=type,
-            help=help + ' Default: %(default)s.',
-            **kwargs)
-class ArgConfig(object):
-    """
-    A high-level api for handling argument configs.
-    """
-    def __init__(self):
-        parser = argparse.ArgumentParser()
-        train_g = ArgumentGroup(parser, "training", "training options.")
-        train_g.add_arg("epoch", int, 3, "Number of epoches for fine-tuning.")
-        train_g.add_arg("learning_rate", float, 5e-5,
-                        "Learning rate used to train with warmup.")
-        train_g.add_arg(
-            "lr_scheduler",
-            str,
-            "linear_warmup_decay",
-            "scheduler of learning rate.",
-            choices=['linear_warmup_decay', 'noam_decay'])
-        train_g.add_arg("weight_decay", float, 0.01,
-                        "Weight decay rate for L2 regularizer.")
-        train_g.add_arg(
-            "warmup_proportion", float, 0.1,
-            "Proportion of training steps to perform linear learning rate warmup for."
-        )
-        train_g.add_arg("save_steps", int, 1000,
-                        "The steps interval to save checkpoints.")
-        train_g.add_arg(
-            "loss_scaling", float, 1.0,
-            "Loss scaling factor for mixed precision training, only valid when use_fp16 is enabled."
-        )
-        train_g.add_arg("pred_dir", str, None,
-                        "Path to save the prediction results")
-        log_g = ArgumentGroup(parser, "logging", "logging related.")
-        log_g.add_arg("skip_steps", int, 10,
-                      "The steps interval to print loss.")
-        log_g.add_arg("verbose", bool, False, "Whether to output verbose log.")
-        run_type_g = ArgumentGroup(parser, "run_type", "running type options.")
-        run_type_g.add_arg("use_cuda", bool, True,
-                           "If set, use GPU for training.")
-        run_type_g.add_arg(
-            "use_fast_executor", bool, False,
-            "If set, use fast parallel executor (in experiment).")
-        run_type_g.add_arg(
-            "num_iteration_per_drop_scope", int, 1,
-            "Ihe iteration intervals to clean up temporary variables.")
-        run_type_g.add_arg("do_train", bool, True,
-                           "Whether to perform training.")
-        run_type_g.add_arg("do_predict", bool, True,
-                           "Whether to perform prediction.")
-        custom_g = ArgumentGroup(parser, "customize", "customized options.")
-        self.custom_g = custom_g
-        self.parser = parser
-    def add_arg(self, name, dtype, default, descrip):
-        self.custom_g.add_arg(name, dtype, default, descrip)
-    def build_conf(self):
-        return self.parser.parse_args()
-def str2bool(v):
-    # because argparse does not support to parse "true, False" as python
-    # boolean directly
-    return v.lower() in ("true", "t", "1")
-def print_arguments(args, log=None):
-    if not log:
-        print('-----------  Configuration Arguments -----------')
-        for arg, value in sorted(six.iteritems(vars(args))):
-            print('%s: %s' % (arg, value))
-        print('------------------------------------------------')
-    else:
-        log.info('-----------  Configuration Arguments -----------')
-        for arg, value in sorted(six.iteritems(vars(args))):
-            log.info('%s: %s' % (arg, value))
-        log.info('------------------------------------------------')
-class PDConfig(object):
-    """
-    A high-level API for managing configuration files in PaddlePaddle.
-    Can jointly work with command-line-arugment, json files and yaml files.
-    """
-    def __init__(self, json_file=None, yaml_file=None, fuse_args=True):
-        """
-            Init funciton for PDConfig.
-            json_file: the path to the json configure file.
-            yaml_file: the path to the yaml configure file.
-            fuse_args: if fuse the json/yaml configs with argparse.
-        """
-        if json_file is not None and yaml_file is not None:
-            raise Warning(
-                "json_file and yaml_file can not co-exist for now. please only use one configure file type."
-            )
-            return
-        self.args = None
-        self.arg_config = {}
-        self.json_config = {}
-        self.yaml_config = {}
-        parser = argparse.ArgumentParser()
-        self.yaml_g = ArgumentGroup(parser, "yaml", "options from yaml.")
-        self.json_g = ArgumentGroup(parser, "json", "options from json.")
-        self.com_g = ArgumentGroup(parser, "custom", "customized options.")
-        self.parser = parser
-        if json_file is not None:
-            assert isinstance(json_file, str)
-            self.load_json(json_file, fuse_args=fuse_args)
-        if yaml_file is not None:
-            assert isinstance(yaml_file, str) or isinstance(yaml_file, list)
-            self.load_yaml(yaml_file, fuse_args=fuse_args)
-    def load_json(self, file_path, fuse_args=True):
-        if not os.path.exists(file_path):
-            raise Warning("the json file %s does not exist." % file_path)
-            return
-        with open(file_path, "r") as fin:
-            self.json_config = json.loads(fin.read())
-            fin.close()
-        if fuse_args:
-            for name in self.json_config:
-                if not isinstance(self.json_config[name], int) \
-                    and not isinstance(self.json_config[name], float) \
-                    and not isinstance(self.json_config[name], str) \
-                    and not isinstance(self.json_config[name], bool):
-                    continue
-                self.json_g.add_arg(name,
-                                    type(self.json_config[name]),
-                                    self.json_config[name],
-                                    "This is from %s" % file_path)
-    def load_yaml(self, file_path_list, fuse_args=True):
-        if isinstance(file_path_list, str):
-            file_path_list = [file_path_list]
-        for file_path in file_path_list: 
-            if not os.path.exists(file_path):
-                raise Warning("the yaml file %s does not exist." % file_path)
-                return
-            with open(file_path, "r") as fin: 
-                self.yaml_config = yaml.load(fin, Loader=yaml.SafeLoader)
-            if fuse_args:
-                for name in self.yaml_config:
-                    if not isinstance(self.yaml_config[name], int) \
-                        and not isinstance(self.yaml_config[name], float) \
-                        and not isinstance(self.yaml_config[name], str) \
-                        and not isinstance(self.yaml_config[name], bool):
-                        continue
-                    self.yaml_g.add_arg(name,
-                                        type(self.yaml_config[name]),
-                                        self.yaml_config[name],
-                                        "This is from %s" % file_path)
-    def build(self):
-        self.args = self.parser.parse_args()
-        self.arg_config = vars(self.args)
-    def asdict(self):
-        return self.arg_config
-    def __add__(self, new_arg):
-        assert isinstance(new_arg, list) or isinstance(new_arg, tuple)
-        assert len(new_arg) >= 3
-        assert self.args is None
-        name = new_arg[0]
-        dtype = new_arg[1]
-        dvalue = new_arg[2]
-        desc = new_arg[3] if len(
-            new_arg) == 4 else "Description is not provided."
-        self.com_g.add_arg(name, dtype, dvalue, desc)
-        return self
-    def __getattr__(self, name):
-        if name in self.arg_config:
-            return self.arg_config[name]
-        if name in self.json_config:
-            return self.json_config[name]
-        if name in self.yaml_config:
-            return self.yaml_config[name]
-        raise Warning("The argument %s is not defined." % name)
-    def Print(self):
-        print("-" * 70)
-        for name in self.arg_config:
-            print("{: <25}\t{}".format(str(name), str(self.arg_config[name])))
-        for name in self.json_config:
-            if name not in self.arg_config:
-                print("{: <25}\t{}" %
-                      (str(name), str(self.json_config[name])))
-        for name in self.yaml_config:
-            if name not in self.arg_config:
-                print("{: <25}\t{}" %
-                      (str(name), str(self.yaml_config[name])))
-        print("-" * 70)
-if __name__ == "__main__":
-    pd_config = PDConfig(yaml_file="./test/bert_config.yaml")
-    pd_config += ("my_age", int, 18, "I am forever 18.")
-    pd_config.build()
-    print(pd_config.do_train)
-    print(pd_config.hidden_size)
-    print(pd_config.my_age)
--- a/build/lib/paddlepalm/utils/print_helper.py
+++ b/build/lib/paddlepalm/utils/print_helper.py
-# -*- coding: UTF-8 -*-
-#   Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserved.
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#     http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-MAXLEN = 70
-def print_dict(dic, title=""):
-    if title:
-        title = ' ' + title + ' '
-        left_len = (MAXLEN - len(title)) // 2
-        title = '-' * left_len + title
-        right_len = MAXLEN - len(title)
-        title = title + '-' * right_len
-    else:
-        title = '-' * MAXLEN
-    print(title)
-    for name in dic:
-        print("{: <25}\t{}".format(str(name), str(dic[name])))
-    print("")
-    # print("-" * MAXLEN + '\n')
--- a/build/lib/paddlepalm/utils/reader_helper.py
+++ b/build/lib/paddlepalm/utils/reader_helper.py
-# -*- coding: UTF-8 -*-
-#   Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserved.
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#     http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-import os
-import sys
-import random
-import numpy as np
-import paddle
-from paddle import fluid
-from paddle.fluid import layers
-def _check_and_adapt_shape_dtype(rt_val, attr):
-    if not isinstance(rt_val, np.ndarray):
-        rt_val = np.array(rt_val)
-        assert rt_val.dtype != np.dtype('O'), "yielded data is not a valid tensor(number of elements on some dimension may differ)."
-        if rt_val.dtype == np.dtype('float64'):
-            rt_val = rt_val.astype('float32')
-    shape, dtype = attr
-    assert rt_val.dtype == np.dtype(dtype), "yielded data type not consistent with attr settings."
-    assert len(shape) == rt_val.ndim, "yielded data rank(ndim) not consistent with attr settings."
-    for rt, exp in zip(rt_val.shape, shape):
-        if exp is None or exp < 0:
-            continue
-        assert rt == exp, "yielded data shape is not consistent with attr settings.\nExpected:{}\nActual:{}".format(exp, rt)
-    return rt_val
-def _zero_batch(attrs):
-    pos_attrs = []
-    for shape, dtype in attrs:
-        pos_shape = [size if size and size > 0 else 1 for size in shape]
-        pos_attrs.append([pos_shape, dtype])
-    return [np.zeros(shape=shape, dtype=dtype) for shape, dtype in pos_attrs]
-def _zero_batch_x(attrs, batch_size):
-    pos_attrs = []
-    for shape, dtype in attrs:
-        # pos_shape = [size if size and size > 0 else 5 for size in shape]
-        pos_shape = [size for size in shape]
-        if pos_shape[0] == -1:
-            pos_shape[0] = batch_size
-        if pos_shape[1] == -1:
-            pos_shape[1] = 512 # max seq len
-        pos_attrs.append([pos_shape, dtype])
-    return [np.zeros(shape=shape, dtype=dtype) for shape, dtype in pos_attrs]
-def create_net_inputs(input_attrs, async=False, iterator_fn=None, dev_count=1, n_prefetch=1):
-    inputs = []
-    ret = {}
-    for name, shape, dtype in input_attrs:
-        p = layers.data(name, shape=shape, dtype=dtype)
-        ret[name] = p
-        inputs.append(p)
-    if async:
-        assert iterator_fn is not None, "iterator_fn is needed for building async input layer."
-        reader = fluid.io.PyReader(inputs, capacity=dev_count*n_prefetch, iterable=False)
-        reader.decorate_batch_generator(iterator_fn)
-        reader.start()
-    return ret
-def create_iterator_fn(iterator, iterator_prefix, shape_and_dtypes, outname_to_pos, verbose=0):
-    def iterator():
-        v = verbose
-        while True:
-            results = _zero_batch(shape_and_dtypes)
-            outputs = next(iterator) # dict type
-            prefix = iterator_prefixe
-            for outname, val in outputs.items():
-                task_outname = prefix + '/' + outname
-                if outname in outname_to_pos:
-                    idx = outname_to_pos[outname]
-                    val = _check_and_adapt_shape_dtype(val, joint_shape_and_dtypes[idx])
-                    results[idx] = val
-                if task_outname in outname_to_pos:
-                    idx = outname_to_pos[task_outname]
-                    val = _check_and_adapt_shape_dtype(val, joint_shape_and_dtypes[idx])
-                    results[idx] = val
-            yield results
-    return iterator
-def create_joint_iterator_fn(iterators, iterator_prefixes, joint_shape_and_dtypes, mrs, outname_to_pos, dev_count=1, keep_one_task=True, verbose=0, batch_size=None):
-    """
-        joint_shape_and_dtypes: 本质上是根据bb和parad的attr设定的，并且由reader中的attr自动填充-1（可变）维度得到，因此通过与iterator的校验可以完成runtime的batch正确性检查
-    """
-    task_ids = range(len(iterators))
-    weights = [mr / float(sum(mrs)) for mr in mrs]
-    if not keep_one_task:
-        dev_count = 1
-    # build fake batch
-    # 注意这种方法会导致一个问题，用户将某任务的mix ratio设置成0后，并不能避免从该任务上读数据，若用户将数据集删掉则会导致崩溃；不过相比之前的zero batch方法，这种方法不必作出只能有一个size=-1的维度且第0维的-1必须是batch size的假设
-    results = _zero_batch(joint_shape_and_dtypes)
-    outbuf = {}
-    for id in task_ids:
-        outputs = next(iterators[id]) # dict type
-        outbuf[id] = outputs
-        prefix = iterator_prefixes[id]
-        for outname, val in outputs.items():
-            task_outname = prefix + '/' + outname
-            if outname in outname_to_pos:
-                idx = outname_to_pos[outname]
-                val = _check_and_adapt_shape_dtype(val, joint_shape_and_dtypes[idx])
-                results[idx] = val
-            if task_outname in outname_to_pos:
-                idx = outname_to_pos[task_outname]
-                val = _check_and_adapt_shape_dtype(val, joint_shape_and_dtypes[idx])
-                results[idx] = val
-    fake_batch = results
-    dev_count_bak = dev_count
-    def iterator():
-        v = verbose
-        while True:
-            id = np.random.choice(task_ids, p=weights)
-            results = fake_batch
-            if v > 0:
-                print('----- debug joint iterator -----')
-                print('sampled task id: '+str(id))
-            task_id_tensor = np.array([[id]]).astype("int64")
-            results[0] = task_id_tensor
-            for i in range(dev_count):
-                # results = _zero_batch(joint_shape_and_dtypes, batch_size=batch_size)
-                # results[0] = task_id_tensor
-                if id in outbuf:
-                    outputs = outbuf[id]
-                    del outbuf[id]
-                else:
-                    outputs = next(iterators[id]) # dict type
-                prefix = iterator_prefixes[id]
-                for outname, val in outputs.items():
-                    if v > 0:
-                        print('reader generate: '+outname)
-                    task_outname = prefix + '/' + outname
-                    if outname in outname_to_pos:
-                        idx = outname_to_pos[outname]
-                        if v > 0:
-                            print(outname + ' is insert in idx ' + str(idx))
-                        val = _check_and_adapt_shape_dtype(val, joint_shape_and_dtypes[idx])
-                        results[idx] = val
-                    if task_outname in outname_to_pos:
-                        idx = outname_to_pos[task_outname]
-                        if v > 0:
-                            print(task_outname + ' is insert in idx ' + str(idx))
-                        val = _check_and_adapt_shape_dtype(val, joint_shape_and_dtypes[idx])
-                        results[idx] = val
-                if v > 0:
-                    print('yielded batch len and shapes:')
-                    print(len(results))
-                    for i in results:
-                        print(np.shape(i))
-                    print('')
-                    v -= 1
-                yield results
-    return iterator
-def merge_input_attrs(backbone_attr, task_attrs, insert_taskid=True):
-    """
-    Args:
-        task_attrs(list[dict]|dict): task input attributes, key=attr_name, val=[shape, dtype], support single task and nested tasks
-    """
-    if isinstance(task_attrs, dict):
-        task_attrs = [task_attrs]
-    if insert_taskid:
-        ret = [([1,1], 'int64')]
-        names = ['__task_id']
-        start = 1
-    else:
-        ret = []
-        names = []
-        start = 0
-    names += sorted(backbone_attr.keys())
-    ret.extend([backbone_attr[k] for k in names[start:]])
-    name_to_position = {}
-    # pos=0 is for task_id, thus we start from 1
-    for pos, k in enumerate(names):
-        name_to_position[k] = pos
-    for task_attr in task_attrs:
-        task_names = sorted(task_attr.keys())
-        names.extend(task_names)
-        ret.extend([task_attr[k] for k in task_names])
-        for pos, k in enumerate(task_names, start=len(name_to_position)):
-            name_to_position[k] = pos
-    return names, ret, name_to_position
--- a/build/lib/paddlepalm/utils/saver.py
+++ b/build/lib/paddlepalm/utils/saver.py
-# -*- coding: UTF-8 -*-
-#   Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserved.
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#     http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-from __future__ import print_function
-import os
-import six
-import ast
-import copy
-import numpy as np
-import paddle.fluid as fluid
-def init_checkpoint(exe, init_checkpoint_path, main_program, skip_list = []):
-    assert os.path.exists(
-        init_checkpoint_path), "[%s] cann't be found." % init_checkpoint_path
-    def existed_persitables(var):
-        if not fluid.io.is_persistable(var):
-            return False
-        if var.name in skip_list:
-            return False
-        return os.path.exists(os.path.join(init_checkpoint_path, var.name))
-    fluid.io.load_vars(
-        exe,
-        init_checkpoint_path,
-        main_program=main_program,
-        predicate=existed_persitables)
-    print("Load model from {}".format(init_checkpoint_path))
-def init_pretraining_params(exe,
-                            pretraining_params_path,
-                            main_program):
-    assert os.path.exists(pretraining_params_path
-                          ), "[%s] cann't be found." % pretraining_params_path
-    def existed_params(var):
-        if not isinstance(var, fluid.framework.Parameter):
-            return False
-        return os.path.exists(os.path.join(pretraining_params_path, var.name))
-    print("Load pretraining parameters from {}...\n".format(
-        pretraining_params_path))
-    fluid.io.load_vars(
-        exe,
-        pretraining_params_path,
-        main_program=main_program,
-        predicate=existed_params)
--- a/build/lib/paddlepalm/utils/textprocess_helper.py
+++ b/build/lib/paddlepalm/utils/textprocess_helper.py
-# -*- coding: UTF-8 -*-
-#   Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserved.
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#     http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-def is_whitespace(c):
-    if c == " " or c == "\t" or c == "\r" or c == "\n" or ord(c) == 0x202F:
-        return True
-    return False
--- a/dist/paddle_palm-1.2-py2.7.egg
+++ b/dist/paddle_palm-1.2-py2.7.egg
--- a/paddle_palm.egg-info/PKG-INFO
+++ b/paddle_palm.egg-info/PKG-INFO
-Metadata-Version: 1.1
-Name: paddle-palm
-Version: 1.2
-Summary: A Multi-task Learning Lib for PaddlePaddle Users.
-Home-page: https://github.com/PaddlePadd
-Author: PaddlePaddle
-Author-email: zhangyiming04@baidu.com
-License: Apache 2.0
-Description-Content-Type: text/markdown
-Description: 
-        # 多任务学习框架PaddlePALM
-        # 安装
-        pip install paddlepalm
-        # 使用
-        ### 1. 创建任务实例
-        使用yaml格式描述任务实例，每个任务实例中的必选字段包括
-        - train_file: 训练集文件路径
-        - reader: 数据集载入与处理工具名，框架预置reader列表见[这里](https://www.baidu.com/)
-        - backbone: 骨架模型名，框架预置reader列表见[这里](https://www.baidu.com/)
-        - paradigm: 任务范式(类型)名，框架预置paradigm列表见[这里](https://www.baidu.com/)
-        ### 2. 完成训练配置
-        使用yaml格式完成配置多任务学习中的相关参数，如指定任务实例及其相关的主辅关系、参数复用关系、采样权重等
-        ### 3. 开始训练
-        ```python
-        import paddlepalm as palm
-        if __name__ == '__main__':
-            controller = palm.Controller('config.yaml', task_dir='task_instance')
-            controller.load_pretrain('pretrain_model/ernie/params')
-            controller.train()
-        ```
-        ### 4. 预测
-        用户可在训练结束后直接调用pred接口对某个目标任务进行预测
-        示例：
-        ```python
-        import paddlepalm as palm
-        if __name__ == '__main__':
-            controller = palm.Controller(config_path='config.yaml', task_dir='task_instance')
-            controller.load_pretrain('pretrain_model/ernie/params')
-            controller.train()
-            controller.pred('mrqa')
-        ```
-        也可新建controller直接预测
-        ```python
-        import paddlepalm as palm
-        if __name__ == '__main__':
-            controller = palm.Controller(config_path='config.yaml', task_dir='task_instance')
-            controller.pred('mrqa', infermodel_path='output_model/firstrun2/infer_model')
-        ```
-        # 运行机制
-        ### 多任务学习机制
-        pass 
-        ### 训练终止机制
-        - 默认的设置：
-          - **所有target任务达到目标训练步数后多任务学习停止**
-          - 未设置成target任务的任务（即辅助任务）不会影响训练终止与否，只是担任”陪训“的角色
-          - 注：默认所有的任务都是target任务，用户可以通过`target_tag`来标记目标/辅助任务
-          - 每个目标任务的目标训练步数由num_epochs和mix_ratio计算得到
-        ### 保存机制
-        - 默认的设置：
-          - 训练过程中，保存下来的模型分为checkpoint (ckpt)和inference model (infermodel)两种：
-            - ckpt保存的是包含所有任务的总计算图（即整个多任务学习计算图），用于训练中断恢复
-            - infermodel保存的是某个目标任务的推理计算图和推理依赖的相关配置
-          - 对于每个target任务，训练到预期的步数后自动保存inference model，之后不再保存。（注：保存inference model不影响ckpt的保存）
-        - 用户可改配置
-          - 使用`save_ckpt_every_steps`来控制保存ckpt的频率，默认不保存
-          - 每个task instance均可使用`save_infermodel_every_steps`来控制该task保存infermodel的频率，默认为-1，即只在达到目标训练步数时保存一下
-Keywords: paddlepaddle,paddle,multi-task-learning
-Platform: any
-Classifier: License :: OSI Approved :: Apache Software License
-Classifier: Programming Language :: Python
-Classifier: Programming Language :: Python :: 2
-Classifier: Programming Language :: Python :: 2.7
-Classifier: Programming Language :: Python :: 3
-Classifier: Programming Language :: Python :: 3.5
-Classifier: Programming Language :: Python :: 3.6
-Classifier: Programming Language :: Python :: 3.7
--- a/paddle_palm.egg-info/SOURCES.txt
+++ b/paddle_palm.egg-info/SOURCES.txt
-README.md
-setup.cfg
-setup.py
-./paddlepalm/__init__.py
-./paddlepalm/default_settings.py
-./paddlepalm/interface.py
-./paddlepalm/mtl_controller.py
-./paddlepalm/task_instance.py
-./paddlepalm/backbone/__init__.py
-./paddlepalm/backbone/bert.py
-./paddlepalm/backbone/bow.py
-./paddlepalm/backbone/ernie.py
-./paddlepalm/backbone/utils/__init__.py
-./paddlepalm/backbone/utils/transformer.py
-./paddlepalm/optimizer/__init__.py
-./paddlepalm/optimizer/adam.py
-./paddlepalm/reader/__init__.py
-./paddlepalm/reader/cls4bert.py
-./paddlepalm/reader/match4ernie.py
-./paddlepalm/reader/mlm.py
-./paddlepalm/reader/mrc4bert.py
-./paddlepalm/reader/mrc4ernie.py
-./paddlepalm/reader/utils/__init__.py
-./paddlepalm/reader/utils/batching4bert.py
-./paddlepalm/reader/utils/batching4ernie.py
-./paddlepalm/reader/utils/mlm_batching.py
-./paddlepalm/reader/utils/mrqa_helper.py
-./paddlepalm/reader/utils/reader4ernie.py
-./paddlepalm/task_paradigm/__init__.py
-./paddlepalm/task_paradigm/cls.py
-./paddlepalm/task_paradigm/match.py
-./paddlepalm/task_paradigm/mlm.py
-./paddlepalm/task_paradigm/mrc.py
-./paddlepalm/tokenizer/__init__.py
-./paddlepalm/tokenizer/bert_tokenizer.py
-./paddlepalm/tokenizer/ernie_tokenizer.py
-./paddlepalm/utils/__init__.py
-./paddlepalm/utils/config_helper.py
-./paddlepalm/utils/print_helper.py
-./paddlepalm/utils/reader_helper.py
-./paddlepalm/utils/saver.py
-./paddlepalm/utils/textprocess_helper.py
-paddle_palm.egg-info/PKG-INFO
-paddle_palm.egg-info/SOURCES.txt
-paddle_palm.egg-info/dependency_links.txt
-paddle_palm.egg-info/not-zip-safe
-paddle_palm.egg-info/top_level.txt
\ No newline at end of file
--- a/paddle_palm.egg-info/dependency_links.txt
+++ b/paddle_palm.egg-info/dependency_links.txt
--- a/paddle_palm.egg-info/not-zip-safe
+++ b/paddle_palm.egg-info/not-zip-safe
--- a/paddle_palm.egg-info/top_level.txt
+++ b/paddle_palm.egg-info/top_level.txt
-paddlepalm