提交 fac8802f 编写于 作者: W wangxiao1021

Merge branch 'r0.3-api' of https://github.com/PaddlePaddle/PALM into r0.3-api

# -*- coding: UTF-8 -*-
# Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
"""v1.1
BERT model."""
from __future__ import absolute_import
from __future__ import division
from __future__ import print_function
from paddle import fluid
from paddle.fluid import layers
from paddlepalm.backbone.utils.transformer import pre_process_layer, encoder
from paddlepalm.interface import backbone
class Model(backbone):
def __init__(self, config, phase):
# self._is_training = phase == 'train' # backbone一般不用关心运行阶段,因为outputs在任何阶段基本不会变
self._emb_size = config["hidden_size"]
self._n_layer = config["num_hidden_layers"]
self._n_head = config["num_attention_heads"]
self._voc_size = config["vocab_size"]
self._max_position_seq_len = config["max_position_embeddings"]
self._sent_types = config["type_vocab_size"]
self._hidden_act = config["hidden_act"]
self._prepostprocess_dropout = config["hidden_dropout_prob"]
self._attention_dropout = config["attention_probs_dropout_prob"]
self._word_emb_name = "word_embedding"
self._pos_emb_name = "pos_embedding"
self._sent_emb_name = "sent_embedding"
# Initialize all weigths by truncated normal initializer, and all biases
# will be initialized by constant zero by default.
self._param_initializer = fluid.initializer.TruncatedNormal(
scale=config["initializer_range"])
@property
def inputs_attr(self):
return {"token_ids": [[-1, -1, 1], 'int64'],
"position_ids": [[-1, -1, 1], 'int64'],
"segment_ids": [[-1, -1, 1], 'int64'],
"input_mask": [[-1, -1, 1], 'float32']}
@property
def outputs_attr(self):
return {"word_embedding": [[-1, -1, self._emb_size], 'float32'],
"embedding_table": [[-1, self._voc_size, self._emb_size], 'float32'],
"encoder_outputs": [[-1, -1, self._emb_size], 'float32'],
"sentence_embedding": [[-1, self._emb_size], 'float32'],
"sentence_pair_embedding": [[-1, self._emb_size], 'float32']}
def build(self, inputs, scope_name=""):
src_ids = inputs['token_ids']
pos_ids = inputs['position_ids']
sent_ids = inputs['segment_ids']
input_mask = inputs['input_mask']
self._emb_dtype = 'float32'
# padding id in vocabulary must be set to 0
emb_out = fluid.layers.embedding(
input=src_ids,
size=[self._voc_size, self._emb_size],
dtype=self._emb_dtype,
param_attr=fluid.ParamAttr(
name=scope_name+self._word_emb_name, initializer=self._param_initializer),
is_sparse=False)
# fluid.global_scope().find_var('backbone-word_embedding').get_tensor()
embedding_table = fluid.default_main_program().global_block().var(scope_name+self._word_emb_name)
position_emb_out = fluid.layers.embedding(
input=pos_ids,
size=[self._max_position_seq_len, self._emb_size],
dtype=self._emb_dtype,
param_attr=fluid.ParamAttr(
name=scope_name+self._pos_emb_name, initializer=self._param_initializer))
sent_emb_out = fluid.layers.embedding(
sent_ids,
size=[self._sent_types, self._emb_size],
dtype=self._emb_dtype,
param_attr=fluid.ParamAttr(
name=scope_name+self._sent_emb_name, initializer=self._param_initializer))
emb_out = emb_out + position_emb_out
emb_out = emb_out + sent_emb_out
emb_out = pre_process_layer(
emb_out, 'nd', self._prepostprocess_dropout, name=scope_name+'pre_encoder')
self_attn_mask = fluid.layers.matmul(
x=input_mask, y=input_mask, transpose_y=True)
self_attn_mask = fluid.layers.scale(
x=self_attn_mask, scale=10000.0, bias=-1.0, bias_after_scale=False)
n_head_self_attn_mask = fluid.layers.stack(
x=[self_attn_mask] * self._n_head, axis=1)
n_head_self_attn_mask.stop_gradient = True
enc_out = encoder(
enc_input=emb_out,
attn_bias=n_head_self_attn_mask,
n_layer=self._n_layer,
n_head=self._n_head,
d_key=self._emb_size // self._n_head,
d_value=self._emb_size // self._n_head,
d_model=self._emb_size,
d_inner_hid=self._emb_size * 4,
prepostprocess_dropout=self._prepostprocess_dropout,
attention_dropout=self._attention_dropout,
relu_dropout=0,
hidden_act=self._hidden_act,
preprocess_cmd="",
postprocess_cmd="dan",
param_initializer=self._param_initializer,
name=scope_name+'encoder')
next_sent_feat = fluid.layers.slice(
input=enc_out, axes=[1], starts=[0], ends=[1])
next_sent_feat = fluid.layers.reshape(next_sent_feat, [-1, next_sent_feat.shape[-1]])
next_sent_feat = fluid.layers.fc(
input=next_sent_feat,
size=self._emb_size,
act="tanh",
param_attr=fluid.ParamAttr(
name=scope_name+"pooled_fc.w_0", initializer=self._param_initializer),
bias_attr=scope_name+"pooled_fc.b_0")
return {'embedding_table': embedding_table,
'word_embedding': emb_out,
'encoder_outputs': enc_out,
'sentence_embedding': next_sent_feat,
'sentence_pair_embedding': next_sent_feat}
def postprocess(self, rt_outputs):
pass
# -*- coding: UTF-8 -*-
# Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
"""Ernie model."""
from __future__ import absolute_import
from __future__ import division
from __future__ import print_function
from __future__ import unicode_literals
from __future__ import absolute_import
from paddle import fluid
from paddle.fluid import layers
from paddlepalm.backbone.utils.transformer import pre_process_layer, encoder
from paddlepalm.interface import backbone
class Model(backbone):
def __init__(self,
config,
phase):
# self._is_training = phase == 'train' # backbone一般不用关心运行阶段,因为outputs在任何阶段基本不会变
self._emb_size = config['hidden_size']
self._n_layer = config['num_hidden_layers']
self._n_head = config['num_attention_heads']
self._voc_size = config['vocab_size']
self._max_position_seq_len = config['max_position_embeddings']
if config['sent_type_vocab_size']:
self._sent_types = config['sent_type_vocab_size']
else:
self._sent_types = config['type_vocab_size']
self._task_types = config['task_type_vocab_size']
self._hidden_act = config['hidden_act']
self._prepostprocess_dropout = config['hidden_dropout_prob']
self._attention_dropout = config['attention_probs_dropout_prob']
self._word_emb_name = "word_embedding"
self._pos_emb_name = "pos_embedding"
self._sent_emb_name = "sent_embedding"
self._task_emb_name = "task_embedding"
self._emb_dtype = "float32"
self._param_initializer = fluid.initializer.TruncatedNormal(
scale=config['initializer_range'])
@property
def inputs_attr(self):
return {"token_ids": [[-1, -1, 1], 'int64'],
"position_ids": [[-1, -1, 1], 'int64'],
"segment_ids": [[-1, -1, 1], 'int64'],
"input_mask": [[-1, -1, 1], 'float32'],
"task_ids": [[-1,-1, 1], 'int64']}
@property
def outputs_attr(self):
return {"word_embedding": [[-1, -1, self._emb_size], 'float32'],
"embedding_table": [[-1, self._voc_size, self._emb_size], 'float32'],
"encoder_outputs": [[-1, -1, self._emb_size], 'float32'],
"sentence_embedding": [[-1, self._emb_size], 'float32'],
"sentence_pair_embedding": [[-1, self._emb_size], 'float32']}
def build(self, inputs, scope_name=""):
src_ids = inputs['token_ids']
pos_ids = inputs['position_ids']
sent_ids = inputs['segment_ids']
input_mask = inputs['input_mask']
task_ids = inputs['task_ids']
# padding id in vocabulary must be set to 0
emb_out = fluid.layers.embedding(
input=src_ids,
size=[self._voc_size, self._emb_size],
dtype=self._emb_dtype,
param_attr=fluid.ParamAttr(
name=scope_name+self._word_emb_name, initializer=self._param_initializer),
is_sparse=False)
# fluid.global_scope().find_var('backbone-word_embedding').get_tensor()
embedding_table = fluid.default_main_program().global_block().var(scope_name+self._word_emb_name)
position_emb_out = fluid.layers.embedding(
input=pos_ids,
size=[self._max_position_seq_len, self._emb_size],
dtype=self._emb_dtype,
param_attr=fluid.ParamAttr(
name=scope_name+self._pos_emb_name, initializer=self._param_initializer))
sent_emb_out = fluid.layers.embedding(
sent_ids,
size=[self._sent_types, self._emb_size],
dtype=self._emb_dtype,
param_attr=fluid.ParamAttr(
name=scope_name+self._sent_emb_name, initializer=self._param_initializer))
emb_out = emb_out + position_emb_out
emb_out = emb_out + sent_emb_out
task_emb_out = fluid.layers.embedding(
task_ids,
size=[self._task_types, self._emb_size],
dtype=self._emb_dtype,
param_attr=fluid.ParamAttr(
name=scope_name+self._task_emb_name,
initializer=self._param_initializer))
emb_out = emb_out + task_emb_out
emb_out = pre_process_layer(
emb_out, 'nd', self._prepostprocess_dropout, name=scope_name+'pre_encoder')
self_attn_mask = fluid.layers.matmul(
x=input_mask, y=input_mask, transpose_y=True)
self_attn_mask = fluid.layers.scale(
x=self_attn_mask, scale=10000.0, bias=-1.0, bias_after_scale=False)
n_head_self_attn_mask = fluid.layers.stack(
x=[self_attn_mask] * self._n_head, axis=1)
n_head_self_attn_mask.stop_gradient = True
enc_out = encoder(
enc_input=emb_out,
attn_bias=n_head_self_attn_mask,
n_layer=self._n_layer,
n_head=self._n_head,
d_key=self._emb_size // self._n_head,
d_value=self._emb_size // self._n_head,
d_model=self._emb_size,
d_inner_hid=self._emb_size * 4,
prepostprocess_dropout=self._prepostprocess_dropout,
attention_dropout=self._attention_dropout,
relu_dropout=0,
hidden_act=self._hidden_act,
preprocess_cmd="",
postprocess_cmd="dan",
param_initializer=self._param_initializer,
name=scope_name+'encoder')
next_sent_feat = fluid.layers.slice(
input=enc_out, axes=[1], starts=[0], ends=[1])
next_sent_feat = fluid.layers.reshape(next_sent_feat, [-1, next_sent_feat.shape[-1]])
next_sent_feat = fluid.layers.fc(
input=next_sent_feat,
size=self._emb_size,
act="tanh",
param_attr=fluid.ParamAttr(
name=scope_name+"pooled_fc.w_0", initializer=self._param_initializer),
bias_attr=scope_name+"pooled_fc.b_0")
return {'embedding_table': embedding_table,
'word_embedding': emb_out,
'encoder_outputs': enc_out,
'sentence_embedding': next_sent_feat,
'sentence_pair_embedding': next_sent_feat}
def postprocess(self, rt_outputs):
pass
# -*- coding: UTF-8 -*-
# Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
"""Transformer encoder."""
from __future__ import absolute_import
from __future__ import division
from __future__ import print_function
from functools import partial
import paddle.fluid as fluid
import paddle.fluid.layers as layers
from paddle.fluid.layer_helper import LayerHelper as LayerHelper
from functools import reduce # py3
def layer_norm(x, begin_norm_axis=1, epsilon=1e-6, param_attr=None, bias_attr=None):
helper = LayerHelper('layer_norm', **locals())
mean = layers.reduce_mean(x, dim=begin_norm_axis, keep_dim=True)
shift_x = layers.elementwise_sub(x=x, y=mean, axis=0)
variance = layers.reduce_mean(layers.square(shift_x), dim=begin_norm_axis, keep_dim=True)
r_stdev = layers.rsqrt(variance + epsilon)
norm_x = layers.elementwise_mul(x=shift_x, y=r_stdev, axis=0)
param_shape = [reduce(lambda x, y: x * y, norm_x.shape[begin_norm_axis:])]
param_dtype = norm_x.dtype
scale = helper.create_parameter(
attr=param_attr,
shape=param_shape,
dtype=param_dtype,
default_initializer=fluid.initializer.Constant(1.))
bias = helper.create_parameter(
attr=bias_attr,
shape=param_shape,
dtype=param_dtype,
is_bias=True,
default_initializer=fluid.initializer.Constant(0.))
out = layers.elementwise_mul(x=norm_x, y=scale, axis=-1)
out = layers.elementwise_add(x=out, y=bias, axis=-1)
return out
def multi_head_attention(queries,
keys,
values,
attn_bias,
d_key,
d_value,
d_model,
n_head=1,
dropout_rate=0.,
cache=None,
param_initializer=None,
name='multi_head_att'):
"""
Multi-Head Attention. Note that attn_bias is added to the logit before
computing softmax activiation to mask certain selected positions so that
they will not considered in attention weights.
"""
keys = queries if keys is None else keys
values = keys if values is None else values
if not (len(queries.shape) == len(keys.shape) == len(values.shape) == 3):
raise ValueError(
"Inputs: quries, keys and values should all be 3-D tensors.")
def __compute_qkv(queries, keys, values, n_head, d_key, d_value):
"""
Add linear projection to queries, keys, and values.
"""
q = layers.fc(input=queries,
size=d_key * n_head,
num_flatten_dims=2,
param_attr=fluid.ParamAttr(
name=name + '_query_fc.w_0',
initializer=param_initializer),
bias_attr=name + '_query_fc.b_0')
k = layers.fc(input=keys,
size=d_key * n_head,
num_flatten_dims=2,
param_attr=fluid.ParamAttr(
name=name + '_key_fc.w_0',
initializer=param_initializer),
bias_attr=name + '_key_fc.b_0')
v = layers.fc(input=values,
size=d_value * n_head,
num_flatten_dims=2,
param_attr=fluid.ParamAttr(
name=name + '_value_fc.w_0',
initializer=param_initializer),
bias_attr=name + '_value_fc.b_0')
return q, k, v
def __split_heads(x, n_head):
"""
Reshape the last dimension of inpunt tensor x so that it becomes two
dimensions and then transpose. Specifically, input a tensor with shape
[bs, max_sequence_length, n_head * hidden_dim] then output a tensor
with shape [bs, n_head, max_sequence_length, hidden_dim].
"""
hidden_size = x.shape[-1]
# The value 0 in shape attr means copying the corresponding dimension
# size of the input as the output dimension size.
reshaped = layers.reshape(
x=x, shape=[0, 0, n_head, hidden_size // n_head], inplace=True)
# permuate the dimensions into:
# [batch_size, n_head, max_sequence_len, hidden_size_per_head]
return layers.transpose(x=reshaped, perm=[0, 2, 1, 3])
def __combine_heads(x):
"""
Transpose and then reshape the last two dimensions of inpunt tensor x
so that it becomes one dimension, which is reverse to __split_heads.
"""
if len(x.shape) == 3: return x
if len(x.shape) != 4:
raise ValueError("Input(x) should be a 4-D Tensor.")
trans_x = layers.transpose(x, perm=[0, 2, 1, 3])
# The value 0 in shape attr means copying the corresponding dimension
# size of the input as the output dimension size.
return layers.reshape(
x=trans_x,
shape=[0, 0, trans_x.shape[2] * trans_x.shape[3]],
inplace=True)
def scaled_dot_product_attention(q, k, v, attn_bias, d_key, dropout_rate):
"""
Scaled Dot-Product Attention
"""
scaled_q = layers.scale(x=q, scale=d_key**-0.5)
product = layers.matmul(x=scaled_q, y=k, transpose_y=True)
if attn_bias:
product += attn_bias
weights = layers.softmax(product)
if dropout_rate:
weights = layers.dropout(
weights,
dropout_prob=dropout_rate,
dropout_implementation="upscale_in_train",
is_test=False)
out = layers.matmul(weights, v)
return out
q, k, v = __compute_qkv(queries, keys, values, n_head, d_key, d_value)
if cache is not None: # use cache and concat time steps
# Since the inplace reshape in __split_heads changes the shape of k and
# v, which is the cache input for next time step, reshape the cache
# input from the previous time step first.
k = cache["k"] = layers.concat(
[layers.reshape(
cache["k"], shape=[0, 0, d_model]), k], axis=1)
v = cache["v"] = layers.concat(
[layers.reshape(
cache["v"], shape=[0, 0, d_model]), v], axis=1)
q = __split_heads(q, n_head)
k = __split_heads(k, n_head)
v = __split_heads(v, n_head)
ctx_multiheads = scaled_dot_product_attention(q, k, v, attn_bias, d_key,
dropout_rate)
out = __combine_heads(ctx_multiheads)
# Project back to the model size.
proj_out = layers.fc(input=out,
size=d_model,
num_flatten_dims=2,
param_attr=fluid.ParamAttr(
name=name + '_output_fc.w_0',
initializer=param_initializer),
bias_attr=name + '_output_fc.b_0')
return proj_out
def positionwise_feed_forward(x,
d_inner_hid,
d_hid,
dropout_rate,
hidden_act,
param_initializer=None,
name='ffn'):
"""
Position-wise Feed-Forward Networks.
This module consists of two linear transformations with a ReLU activation
in between, which is applied to each position separately and identically.
"""
hidden = layers.fc(input=x,
size=d_inner_hid,
num_flatten_dims=2,
act=hidden_act,
param_attr=fluid.ParamAttr(
name=name + '_fc_0.w_0',
initializer=param_initializer),
bias_attr=name + '_fc_0.b_0')
if dropout_rate:
hidden = layers.dropout(
hidden,
dropout_prob=dropout_rate,
dropout_implementation="upscale_in_train",
is_test=False)
out = layers.fc(input=hidden,
size=d_hid,
num_flatten_dims=2,
param_attr=fluid.ParamAttr(
name=name + '_fc_1.w_0', initializer=param_initializer),
bias_attr=name + '_fc_1.b_0')
return out
def pre_post_process_layer(prev_out, out, process_cmd, dropout_rate=0.,
name=''):
"""
Add residual connection, layer normalization and droput to the out tensor
optionally according to the value of process_cmd.
This will be used before or after multi-head attention and position-wise
feed-forward networks.
"""
for cmd in process_cmd:
if cmd == "a": # add residual connection
out = out + prev_out if prev_out else out
elif cmd == "n": # add layer normalization
out_dtype = out.dtype
if out_dtype == fluid.core.VarDesc.VarType.FP16:
out = layers.cast(x=out, dtype="float32")
out = layer_norm(
out,
begin_norm_axis=len(out.shape) - 1,
param_attr=fluid.ParamAttr(
name=name + '_layer_norm_scale',
initializer=fluid.initializer.Constant(1.)),
bias_attr=fluid.ParamAttr(
name=name + '_layer_norm_bias',
initializer=fluid.initializer.Constant(0.)))
if out_dtype == fluid.core.VarDesc.VarType.FP16:
out = layers.cast(x=out, dtype="float16")
elif cmd == "d": # add dropout
if dropout_rate:
out = layers.dropout(
out,
dropout_prob=dropout_rate,
dropout_implementation="upscale_in_train",
is_test=False)
return out
pre_process_layer = partial(pre_post_process_layer, None)
post_process_layer = pre_post_process_layer
def encoder_layer(enc_input,
attn_bias,
n_head,
d_key,
d_value,
d_model,
d_inner_hid,
prepostprocess_dropout,
attention_dropout,
relu_dropout,
hidden_act,
preprocess_cmd="n",
postprocess_cmd="da",
param_initializer=None,
name=''):
"""The encoder layers that can be stacked to form a deep encoder.
This module consits of a multi-head (self) attention followed by
position-wise feed-forward networks and both the two components companied
with the post_process_layer to add residual connection, layer normalization
and droput.
"""
attn_output = multi_head_attention(
pre_process_layer(
enc_input,
preprocess_cmd,
prepostprocess_dropout,
name=name + '_pre_att'),
None,
None,
attn_bias,
d_key,
d_value,
d_model,
n_head,
attention_dropout,
param_initializer=param_initializer,
name=name + '_multi_head_att')
attn_output = post_process_layer(
enc_input,
attn_output,
postprocess_cmd,
prepostprocess_dropout,
name=name + '_post_att')
ffd_output = positionwise_feed_forward(
pre_process_layer(
attn_output,
preprocess_cmd,
prepostprocess_dropout,
name=name + '_pre_ffn'),
d_inner_hid,
d_model,
relu_dropout,
hidden_act,
param_initializer=param_initializer,
name=name + '_ffn')
return post_process_layer(
attn_output,
ffd_output,
postprocess_cmd,
prepostprocess_dropout,
name=name + '_post_ffn')
def encoder(enc_input,
attn_bias,
n_layer,
n_head,
d_key,
d_value,
d_model,
d_inner_hid,
prepostprocess_dropout,
attention_dropout,
relu_dropout,
hidden_act,
preprocess_cmd="n",
postprocess_cmd="da",
param_initializer=None,
name=''):
"""
The encoder is composed of a stack of identical layers returned by calling
encoder_layer.
"""
for i in range(n_layer):
enc_output = encoder_layer(
enc_input,
attn_bias,
n_head,
d_key,
d_value,
d_model,
d_inner_hid,
prepostprocess_dropout,
attention_dropout,
relu_dropout,
hidden_act,
preprocess_cmd,
postprocess_cmd,
param_initializer=param_initializer,
name=name + '_layer_' + str(i))
enc_input = enc_output
enc_output = pre_process_layer(
enc_output, preprocess_cmd, prepostprocess_dropout, name="post_encoder")
return enc_output
task_instance: "mrqa, mlm4mrqa, match4mrqa" ask_instance: "mrqa, mlm4mrqa, match4mrqa"
target_tag: 1, 0, 0 target_tag: 1, 0, 0
mix_ratio: 1.0, 0.5, 0.5 mix_ratio: 1.0, 0.5, 0.5
......
../../paddlepalm/
\ No newline at end of file
import paddlepalm as palm import paddlepalm as palm
if __name__ == '__main__': if __name__ == '__main__':
controller = palm.Controller('config.yaml', task_dir='tasks')
max_seqlen = 512
batch_size = 32
match_reader = palm.reader.match(train_file, vocab, \
max_seqlen, file_format='csv', tokenizer='wordpiece', \
lang='en', shuffle_train=True)
mrc_reader = palm.reader.mrc(train_file, phase='train')
mlm_reader = palm.reader.mlm(train_file, phase='train')
palm.reader.
match = palm.tasktype.cls(num_classes=4)
mrc = palm.tasktype.match(learning_strategy='pairwise')
mlm = palm.tasktype.mlm()
mlm.print()
bb_flags = palm.load_json('./pretrain/ernie/ernie_config.json')
bb = palm.backbone.ernie(bb_flags['xx'], xxx)
bb.print()
match4mrqa = palm.Task('match4mrqa', match_reader, match_tt)
mrc4mrqa = palm.Task('match4mrqa', match_reader, match_tt)
# match4mrqa.reuse_with(mrc4mrqa)
controller = palm.Controller([mrqa, match4mrqa, mlm4mrqa])
loss = controller.build_forward(bb, mask_task=[])
n_steps = controller.estimate_train_steps(basetask=mrqa, num_epochs=2, batch_size=8, dev_count=4)
adam = palm.optimizer.Adam(loss)
sched = palm.schedualer.LinearWarmup(learning_rate, max_train_steps=n_steps, warmup_steps=0.1*n_steps)
controller.build_backward(optimizer=adam, schedualer=sched, weight_decay=0.001, use_ema=True, ema_decay=0.999)
controller.random_init_params()
controller.load_pretrain('../../pretrain_model/ernie/params') controller.load_pretrain('../../pretrain_model/ernie/params')
controller.train() controller.train()
controller = palm.Controller(config='config.yaml', task_dir='tasks', for_train=False)
controller.pred('mrqa', inference_model_dir='output_model/secondrun/mrqa/infer_model')
# controller = palm.Controller(config='config.yaml', task_dir='tasks', for_train=False)
# controller.pred('mrqa', inference_model_dir='output_model/secondrun/mrqa/infer_model')
../../paddlepalm/
\ No newline at end of file
../../pretrain/
\ No newline at end of file
# coding=utf-8
import paddlepalm as palm import paddlepalm as palm
import json
if __name__ == '__main__': if __name__ == '__main__':
controller = palm.Controller('config.yaml', task_dir='tasks')
controller.load_pretrain('../../pretrain_model/ernie/params') max_seqlen = 512
controller.train() batch_size = 4
num_epochs = 2
lr = 1e-3
vocab_path = './pretrain/ernie/vocab.txt'
train_file = './data/cls4mrqa/train.tsv'
config = json.load(open('./pretrain/ernie/ernie_config.json'))
# ernie = palm.backbone.ERNIE(...)
ernie = palm.backbone.ERNIE.from_config(config)
# pred_ernie = palm.backbone.ERNIE.from_config(config, phase='pred')
# cls_reader2 = palm.reader.cls(train_file_topic, vocab_path, batch_size, max_seqlen)
# cls_reader3 = palm.reader.cls(train_file_subj, vocab_path, batch_size, max_seqlen)
# topic_trainer = palm.Trainer('topic_cls', cls_reader2, cls)
# subj_trainer = palm.Trainer('subj_cls', cls_reader3, cls)
# 创建该分类任务的reader,由诸多参数控制数据集读入格式、文件数量、预处理规则等
cls_reader = palm.reader.ClassifyReader(vocab_path, max_seqlen)
print(cls_reader.outputs_attr)
# 不同的backbone会对任务reader有不同的特征要求,例如对于分类任务,基本的输入feature为token_ids和label_ids,但是对于BERT,还要求从输入中额外提取position、segment、input_mask等特征,因此经过register后,reader会自动补充backbone所要求的字段
cls_reader.register_with(ernie)
print(cls_reader.outputs_attr)
# 创建任务头(task head),如分类、匹配、机器阅读理解等。每个任务头有跟该任务相关的必选/可选参数。注意,任务头与reader是解耦合的,只要任务头依赖的数据集侧的字段能被reader提供,那么就是合法的
cls_head = palm.head.Classify(4, 1024, 0.1)
# cls_pred_head = palm.head.Classify(4, 1024, 0.1, phase='pred')
# 根据reader和任务头来创建一个训练器trainer,trainer代表了一个训练任务,内部维护着训练进程、和任务的关键信息,并完成合法性校验,该任务的模型保存、载入等相关规则控制
trainer = palm.Trainer('senti_cls', cls_reader, cls_head)
# match4mrqa.reuse_head_with(mrc4mrqa)
# data_vars = cls_reader.build()
# output_vars = ernie.build(data_vars)
# cls_head.build({'backbone': output_vars, 'reader': data_vars})
loss_var = trainer.build_forward(ernie)
# controller.build_forward()
# Error! a head/backbone can be only build once! Try NOT to call build_forward method for any Trainer!
print(trainer.num_examples)
iterator_fn = trainer.load_data(train_file, 'csv', num_epochs=num_epochs, batch_size=batch_size)
print(trainer.num_examples)
n_steps = trainer.num_examples * num_epochs // batch_size
warmup_steps = int(0.1 * n_steps)
print(warmup_steps)
sched = palm.lr_sched.TriangularSchedualer(warmup_steps, n_steps)
adam = palm.optimizer.Adam(loss_var, lr, sched)
trainer.build_backward(optimizer=adam, weight_decay=0.001)
trainer.random_init_params()
trainer.load_pretrain('pretrain/ernie/params')
# print(trainer.train_one_step(next(iterator_fn())))
# trainer.train_one_epoch()
trainer.train(iterator_fn, print_steps=1, save_steps=5, save_path='outputs/ckpt')
# trainer.save()
# controller = palm.Controller([mrqa, match4mrqa, mlm4mrqa])
# loss = controller.build_forward(bb, mask_task=[])
# n_steps = controller.estimate_train_steps(basetask=mrqa, num_epochs=2, batch_size=8, dev_count=4)
# adam = palm.optimizer.Adam(loss)
# sched = palm.schedualer.LinearWarmup(learning_rate, max_train_steps=n_steps, warmup_steps=0.1*n_steps)
#
# controller.build_backward(optimizer=adam, schedualer=sched, weight_decay=0.001, use_ema=True, ema_decay=0.999)
# controller.random_init_params()
# controller.load_pretrain('../../pretrain_model/ernie/params')
# controller.train()
# controller = palm.Controller(config='config.yaml', task_dir='tasks', for_train=False)
# controller.pred('mrqa', inference_model_dir='output_model/secondrun/mrqa/infer_model')
export CUDA_VISIBLE_DEVICES=0 export CUDA_VISIBLE_DEVICES=3
python run.py python run.py
# -*- coding: UTF-8 -*-
# Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
"""v1.1"""
class reader(object):
"""interface of data manager."""
def __init__(self, config):
assert isinstance(config, dict)
# @property
# def inputs_attr(self):
# """描述reader输入对象的属性,包含各个对象的名字、shape以及数据类型。当某个对象为标量数据类型(如str, int, float等)时,shape设置为空列表[],当某个对象的某个维度长度可变时,shape中的相应维度设置为-1.
# Return:
# dict类型。对各个输入对象的属性描述。例如,
# 对于文本分类任务,可能需要包含输入文本和所属标签的id
# {"text": ([], 'str'),
# "label": ([], 'int')}
# 对于标注任务,可能需要输入词序列和对应的标签
# {"tokens", ([-1], 'str'),
# "tags", ([-1], 'str')}
# 对于机器阅读理解任务,可能需要包含上下文、问题、回答、答案区域的起止位置等
# {"paragraph", ([], 'str'),
# "question", ([], 'str'),
# "start_position", ([], 'int')
# """
# raise NotImplementedError()
@property
def outputs_attr(self):
"""描述reader输出对象(被yield出的对象)的属性,包含各个对象的名字、shape以及数据类型。当某个对象为标量数据类型(如str, int, float等)时,shape设置为空列表[],当某个对象的某个维度长度可变时,shape中的相应维度设置为-1。
注意:当使用mini-batch梯度下降学习策略时,,应为常规的输入对象设置batch_size维度(一般为-1)
Return:
dict类型。对各个输入对象的属性描述。例如,
对于文本分类和匹配任务,yield的输出内容可能包含如下的对象(下游backbone和task可按需访问其中的对象)
{"token_ids": ([-1, max_len], 'int64'),
"input_ids": ([-1, max_len], 'int64'),
"segment_ids": ([-1, max_len], 'int64'),
"input_mask": ([-1, max_len], 'float32'),
"label": ([-1], 'int')}
"""
raise NotImplementedError()
# def parse_line(self):
# """框架内部使用字典描述每个样本,字典的key为inputs_attr,value为每个input对应的符合attr描述的值。
# 该函数负责将文本行解析成符合inputs_attr描述的字典类型的样本。默认的parse_line方法会读取json格式的数据集文件,数据集的每一行为json格式描述的样本。
# 用户可通过对该方法的继承改写来适配不同格式的数据集,例如csv格式甚至tfrecord文件。
# """
# raise NotImplementedError()
#
# def tokenize(self, line):
# """框架中内置了word piece tokenizer等分词器,用户可通过修改tokenizer超参数来制定使用的分词器,若内置的分词器均无法满足需求,用户可通过对该方法的继承改写来自定义分词器。
# Args:
# - line: a unicode string.
# Return:
# a list of tokens
# """
# raise NotImplementedError()
def iterator(self):
"""数据集遍历接口,注意,当数据集遍历到尾部时该接口应自动完成指针重置,即重新从数据集头部开始新的遍历。
Yield:
(dict) elements that meet the requirements in output_templete
"""
raise NotImplementedError()
@property
def num_examples(self):
"""数据集中的样本数量,即每个epoch中iterator所生成的样本数。注意,使用滑动窗口等可能导致数据集样本数发生变化的策略时,该接口应返回runtime阶段的实际样本数。"""
raise NotImplementedError()
class backbone(object):
"""interface of backbone model."""
def __init__(self, config, phase):
"""
Args:
config: dict类型。描述了 多任务配置文件+预训练模型配置文件 中定义超参数
phase: str类型。运行阶段,目前支持train和predict
"""
assert isinstance(config, dict)
@property
def inputs_attr(self):
"""描述backbone从reader处需要得到的输入对象的属性,包含各个对象的名字、shape以及数据类型。当某个对象为标量数据类型(如str, int, float等)时,shape设置为空列表[],当某个对象的某个维度长度可变时,shape中的相应维度设置为-1。
Return:
dict类型。对各个输入对象的属性描述。例如,
对于文本分类和匹配任务,bert backbone依赖的reader对象主要包含如下的对象
{"token_ids": ([-1, max_len], 'int64'),
"input_ids": ([-1, max_len], 'int64'),
"segment_ids": ([-1, max_len], 'int64'),
"input_mask": ([-1, max_len], 'float32')}"""
raise NotImplementedError()
@property
def outputs_attr(self):
"""描述backbone输出对象的属性,包含各个对象的名字、shape以及数据类型。当某个对象为标量数据类型(如str, int, float等)时,shape设置为空列表[],当某个对象的某个维度长度可变时,shape中的相应维度设置为-1。
Return:
dict类型。对各个输出对象的属性描述。例如,
对于文本分类和匹配任务,bert backbone的输出内容可能包含如下的对象
{"word_emb": ([-1, max_seqlen, word_emb_size], 'float32'),
"sentence_emb": ([-1, hidden_size], 'float32'),
"sim_vec": ([-1, hidden_size], 'float32')}"""
raise NotImplementedError()
def build(self, inputs):
"""建立backbone的计算图。将符合inputs_attr描述的静态图Variable输入映射成符合outputs_attr描述的静态图Variable输出。
Args:
inputs: dict类型。字典中包含inputs_attr中的对象名到计算图Variable的映射,inputs中至少会包含inputs_attr中定义的对象
Return:
需要输出的计算图变量,输出对象会被加入到fetch_list中,从而在每个训练/推理step时得到runtime的计算结果,该计算结果会被传入postprocess方法中供用户处理。
"""
raise NotImplementedError()
class task_paradigm(object):
def __init__(self, config, phase, backbone_config):
"""
config: dict类型。描述了 任务实例(task instance)+多任务配置文件 中定义超参数
phase: str类型。运行阶段,目前支持train和predict
"""
@property
def inputs_attrs(self):
"""描述task_layer需要从reader, backbone等输入对象集合所读取到的输入对象的属性,第一级key为对象集和的名字,如backbone,reader等(后续会支持更灵活的输入),第二级key为对象集和中各对象的属性,包括对象的名字,shape和dtype。当某个对象为标量数据类型(如str, int, float等)时,shape设置为空列表[],当某个对象的某个维度长度可变时,shape中的相应维度设置为-1。
Return:
dict类型。对各个对象集及其输入对象的属性描述。"""
raise NotImplementedError()
@property
def outputs_attr(self):
"""描述task输出对象的属性,包括对象的名字,shape和dtype。输出对象会被加入到fetch_list中,从而在每个训练/推理step时得到runtime的计算结果,该计算结果会被传入postprocess方法中供用户处理。
当某个对象为标量数据类型(如str, int, float等)时,shape设置为空列表[],当某个对象的某个维度长度可变时,shape中的相应维度设置为-1。
Return:
dict类型。对各个输入对象的属性描述。注意,训练阶段必须包含名为loss的输出对象。
"""
raise NotImplementedError()
@property
def epoch_inputs_attrs(self):
return {}
def build(self, inputs, scope_name=""):
"""建立task_layer的计算图。将符合inputs_attrs描述的来自各个对象集的静态图Variables映射成符合outputs_attr描述的静态图Variable输出。
Args:
inputs: dict类型。字典中包含inputs_attrs中的对象名到计算图Variable的映射,inputs中至少会包含inputs_attr中定义的对象
Return:
需要输出的计算图变量,输出对象会被加入到fetch_list中,从而在每个训练/推理step时得到runtime的计算结果,该计算结果会被传入postprocess方法中供用户处理。
"""
raise NotImplementedError()
def postprocess(self, rt_outputs):
"""每个训练或推理step后针对当前batch的task_layer的runtime计算结果进行相关后处理。注意,rt_outputs除了包含build方法,还自动包含了loss的计算结果。"""
pass
def epoch_postprocess(self, post_inputs):
pass
import downloader
# from mtl_controller import Controller
import controller
import optimizer
import lr_sched
import backbone
import reader
import head
import sys
from paddlepalm.mtl_controller import Controller
sys.path.append('paddlepalm')
from trainer import Trainer
del interface
del task_instance
del default_settings
del utils
# -*- coding: UTF-8 -*-
# Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
from __future__ import print_function
import os
import tarfile
import shutil
try:
from urllib.request import urlopen # Python 3
except ImportError:
from urllib2 import urlopen # Python 2
import ssl
__all__ = ["download", "ls"]
# for https
ssl._create_default_https_context = ssl._create_unverified_context
_items = {
'pretrain': {'ernie-en-uncased-large': 'https://ernie.bj.bcebos.com/ERNIE_Large_en_stable-2.0.0.tar.gz',
'bert-en-uncased-large': 'https://bert-models.bj.bcebos.com/uncased_L-24_H-1024_A-16.tar.gz',
'bert-en-uncased-base': 'https://bert-models.bj.bcebos.com/uncased_L-12_H-768_A-12.tar.gz',
'utils': None},
'reader': {'utils': None},
'backbone': {'utils': None},
'tasktype': {'utils': None},
}
def _download(item, scope, path, silent=False):
data_url = _items[item][scope]
if data_url == None:
return
if not silent:
print('Downloading {}: {} from {}...'.format(item, scope, data_url))
data_dir = path + '/' + item + '/' + scope
if not os.path.exists(data_dir):
os.makedirs(os.path.join(data_dir))
data_name = data_url.split('/')[-1]
filename = data_dir + '/' + data_name
# print process
def _chunk_report(bytes_so_far, total_size):
percent = float(bytes_so_far) / float(total_size)
if percent > 1:
percent = 1
if not silent:
print('\r>> Downloading... {:.1%}'.format(percent), end = "")
# copy to local
def _chunk_read(response, url, chunk_size = 16 * 1024, report_hook = None):
total_size = response.info().getheader('Content-Length').strip()
total_size = int(total_size)
bytes_so_far = 0
with open("%s" % filename, "wb") as f:
while 1:
chunk = response.read(chunk_size)
f.write(chunk)
f.flush()
bytes_so_far += len(chunk)
if not chunk:
break
if report_hook:
report_hook(bytes_so_far, total_size)
return bytes_so_far
response = urlopen(data_url)
_chunk_read(response, data_url, report_hook=_chunk_report)
if not silent:
print(' done!')
if item == 'pretrain':
if not silent:
print ('Extracting {}...'.format(data_name), end=" ")
if os.path.exists(filename):
tar = tarfile.open(filename, 'r')
tar.extractall(path = data_dir)
tar.close()
os.remove(filename)
if scope.startswith('bert'):
source_path = data_dir + '/' + data_name.split('.')[0]
fileList = os.listdir(source_path)
for file in fileList:
filePath = os.path.join(source_path, file)
shutil.move(filePath, data_dir)
os.removedirs(source_path)
if not silent:
print ('done!')
if not silent:
print ('Converting params...', end=" ")
_convert(data_dir, silent)
if not silent:
print ('done!')
def _convert(path, silent=False):
if os.path.isfile(path + '/params/__palminfo__'):
if not silent:
print ('already converted.')
else:
if os.path.exists(path + '/params/'):
os.rename(path + '/params/', path + '/params1/')
os.mkdir(path + '/params/')
tar_model = tarfile.open(path + '/params/' + '__palmmodel__', 'w')
tar_info = open(path + '/params/'+ '__palminfo__', 'w')
for root, dirs, files in os.walk(path + '/params1/'):
for file in files:
src_file = os.path.join(root, file)
tar_model.add(src_file, '__paddlepalm_' + file)
tar_info.write('__paddlepalm_' + file)
os.remove(src_file)
tar_model.close()
tar_info.close()
os.removedirs(path + '/params1/')
def download(item, scope='all', path='.'):
item = item.lower()
scope = scope.lower()
assert item in _items, '{} is not found. Support list: {}'.format(item, list(_items.keys()))
if _items[item]['utils'] is not None:
_download(item, 'utils', path, silent=True)
if scope != 'all':
assert scope in _items[item], '{} is not found. Support scopes: {}'.format(scope, list(_items[item].keys()))
_download(item, scope, path)
else:
for s in _items[item].keys():
_download(item, s, path)
def _ls(item, scope, l = 10):
if scope != 'all':
assert scope in _items[item], '{} is not found. Support scopes: {}'.format(scope, list(_items[item].keys()))
print ('{}'.format(scope))
else:
for s in _items[item].keys():
if s == 'utils':
continue
print (' => '+s)
def ls(item='all', scope='all'):
if scope == 'utils':
return
if item != 'all':
assert item in _items, '{} is not found. Support scopes: {}'.format(item, list(_items.keys()))
print ('Available {} items:'.format(item))
_ls(item, scope)
else:
l = max(map(len, _items.keys()))
for i in _items.keys():
print ('Available {} items: '.format(i))
_ls(i, scope, l)
from ernie import ERNIE
from bert import BERT
# -*- coding: UTF-8 -*-
# Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
"""v1.1"""
class BaseBackbone(object):
"""interface of backbone model."""
def __init__(self, config, phase):
"""
Args:
config: dict类型。描述了 多任务配置文件+预训练模型配置文件 中定义超参数
phase: str类型。运行阶段,目前支持train和predict
"""
assert isinstance(config, dict)
@property
def inputs_attr(self):
"""描述backbone从reader处需要得到的输入对象的属性,包含各个对象的名字、shape以及数据类型。当某个对象为标量数据类型(如str, int, float等)时,shape设置为空列表[],当某个对象的某个维度长度可变时,shape中的相应维度设置为-1。
Return:
dict类型。对各个输入对象的属性描述。例如,
对于文本分类和匹配任务,bert backbone依赖的reader对象主要包含如下的对象
{"token_ids": ([-1, max_len], 'int64'),
"input_ids": ([-1, max_len], 'int64'),
"segment_ids": ([-1, max_len], 'int64'),
"input_mask": ([-1, max_len], 'float32')}"""
raise NotImplementedError()
@property
def outputs_attr(self):
"""描述backbone输出对象的属性,包含各个对象的名字、shape以及数据类型。当某个对象为标量数据类型(如str, int, float等)时,shape设置为空列表[],当某个对象的某个维度长度可变时,shape中的相应维度设置为-1。
Return:
dict类型。对各个输出对象的属性描述。例如,
对于文本分类和匹配任务,bert backbone的输出内容可能包含如下的对象
{"word_emb": ([-1, max_seqlen, word_emb_size], 'float32'),
"sentence_emb": ([-1, hidden_size], 'float32'),
"sim_vec": ([-1, hidden_size], 'float32')}"""
raise NotImplementedError()
def build(self, inputs):
"""建立backbone的计算图。将符合inputs_attr描述的静态图Variable输入映射成符合outputs_attr描述的静态图Variable输出。
Args:
inputs: dict类型。字典中包含inputs_attr中的对象名到计算图Variable的映射,inputs中至少会包含inputs_attr中定义的对象
Return:
需要输出的计算图变量,输出对象会被加入到fetch_list中,从而在每个训练/推理step时得到runtime的计算结果,该计算结果会被传入postprocess方法中供用户处理。
"""
raise NotImplementedError()
class task_paradigm(object):
def __init__(self, config, phase, backbone_config):
"""
config: dict类型。描述了 任务实例(task instance)+多任务配置文件 中定义超参数
phase: str类型。运行阶段,目前支持train和predict
"""
@property
def inputs_attrs(self):
"""描述task_layer需要从reader, backbone等输入对象集合所读取到的输入对象的属性,第一级key为对象集和的名字,如backbone,reader等(后续会支持更灵活的输入),第二级key为对象集和中各对象的属性,包括对象的名字,shape和dtype。当某个对象为标量数据类型(如str, int, float等)时,shape设置为空列表[],当某个对象的某个维度长度可变时,shape中的相应维度设置为-1。
Return:
dict类型。对各个对象集及其输入对象的属性描述。"""
raise NotImplementedError()
@property
def outputs_attr(self):
"""描述task输出对象的属性,包括对象的名字,shape和dtype。输出对象会被加入到fetch_list中,从而在每个训练/推理step时得到runtime的计算结果,该计算结果会被传入postprocess方法中供用户处理。
当某个对象为标量数据类型(如str, int, float等)时,shape设置为空列表[],当某个对象的某个维度长度可变时,shape中的相应维度设置为-1。
Return:
dict类型。对各个输入对象的属性描述。注意,训练阶段必须包含名为loss的输出对象。
"""
raise NotImplementedError()
@property
def epoch_inputs_attrs(self):
return {}
def build(self, inputs, scope_name=""):
"""建立task_layer的计算图。将符合inputs_attrs描述的来自各个对象集的静态图Variables映射成符合outputs_attr描述的静态图Variable输出。
Args:
inputs: dict类型。字典中包含inputs_attrs中的对象名到计算图Variable的映射,inputs中至少会包含inputs_attr中定义的对象
Return:
需要输出的计算图变量,输出对象会被加入到fetch_list中,从而在每个训练/推理step时得到runtime的计算结果,该计算结果会被传入postprocess方法中供用户处理。
"""
raise NotImplementedError()
def postprocess(self, rt_outputs):
"""每个训练或推理step后针对当前batch的task_layer的runtime计算结果进行相关后处理。注意,rt_outputs除了包含build方法,还自动包含了loss的计算结果。"""
pass
def epoch_postprocess(self, post_inputs):
pass
...@@ -23,12 +23,44 @@ from paddle import fluid ...@@ -23,12 +23,44 @@ from paddle import fluid
from paddle.fluid import layers from paddle.fluid import layers
from paddlepalm.backbone.utils.transformer import pre_process_layer, encoder from paddlepalm.backbone.utils.transformer import pre_process_layer, encoder
from paddlepalm.interface import backbone from paddlepalm.backbone.base_backbone import BaseBackbone
class Model(backbone): class BERT(BaseBackbone):
def __init__(self, config, phase):
def __init__(hidden_size, num_hidden_layers, num_attention_heads, vocab_size, \
max_position_embeddings, type_vocab_size, hidden_act, hidden_dropout_prob, \
attention_probs_dropout_prob, initializer_range, phase='train'):
config = {}
config['hidden_size'] = hidden_size
config['num_hidden_layers'] = num_hidden_layers
config['num_attention_heads'] = num_attention_heads
config['vocab_size'] = vocab_size
config['max_position_embeddings'] = max_position_embeddings
config['type_vocab_size'] = sent_type_vocab_size
config['hidden_act'] = hidden_act
config['hidden_dropout_prob'] = hidden_dropout_prob
config['attention_probs_dropout_prob'] = attention_probs_dropout_prob
config['initializer_range'] = initializer_range
self.from_config(config, phase=phase)
@classmethod
def from_config(self, config, phase='train'):
assert 'hidden_size' in config, "{} is required to initialize ERNIE".format('')
assert 'num_hidden_layers' in config, "{} is required to initialize ERNIE".format('num_hidden_layers')
assert 'num_attention_heads' in config, "{} is required to initialize ERNIE".format('num_attention_heads')
assert 'vocab_size' in config, "{} is required to initialize ERNIE".format('vocab_size')
assert 'max_position_embeddings' in config, "{} is required to initialize ERNIE".format('max_position_embeddings')
assert 'sent_type_vocab_size' in config or 'type_vocab_size' in config, \
"{} is required to initialize ERNIE".format('type_vocab_size')
assert 'hidden_act' in config, "{} is required to initialize ERNIE".format('hidden_act')
assert 'hidden_dropout_prob' in config, "{} is required to initialize ERNIE".format('hidden_dropout_prob')
assert 'attention_probs_dropout_prob' in config, \
"{} is required to initialize ERNIE".format('attention_probs_dropout_prob')
assert 'initializer_range' in config, "{} is required to initialize ERNIE".format('initializer_range')
# self._is_training = phase == 'train' # backbone一般不用关心运行阶段,因为outputs在任何阶段基本不会变 # self._is_training = phase == 'train' # backbone一般不用关心运行阶段,因为outputs在任何阶段基本不会变
self._emb_size = config["hidden_size"] self._emb_size = config["hidden_size"]
...@@ -52,9 +84,9 @@ class Model(backbone): ...@@ -52,9 +84,9 @@ class Model(backbone):
@property @property
def inputs_attr(self): def inputs_attr(self):
return {"token_ids": [[-1, -1, 1], 'int64'], return {"token_ids": [[-1, -1], 'int64'],
"position_ids": [[-1, -1, 1], 'int64'], "position_ids": [[-1, -1], 'int64'],
"segment_ids": [[-1, -1, 1], 'int64'], "segment_ids": [[-1, -1], 'int64'],
"input_mask": [[-1, -1, 1], 'float32']} "input_mask": [[-1, -1, 1], 'float32']}
@property @property
...@@ -73,7 +105,7 @@ class Model(backbone): ...@@ -73,7 +105,7 @@ class Model(backbone):
self._emb_dtype = 'float32' self._emb_dtype = 'float32'
# padding id in vocabulary must be set to 0 # padding id in vocabulary must be set to 0
emb_out = fluid.layers.embedding( emb_out = fluid.embedding(
input=src_ids, input=src_ids,
size=[self._voc_size, self._emb_size], size=[self._voc_size, self._emb_size],
dtype=self._emb_dtype, dtype=self._emb_dtype,
...@@ -84,14 +116,14 @@ class Model(backbone): ...@@ -84,14 +116,14 @@ class Model(backbone):
# fluid.global_scope().find_var('backbone-word_embedding').get_tensor() # fluid.global_scope().find_var('backbone-word_embedding').get_tensor()
embedding_table = fluid.default_main_program().global_block().var(scope_name+self._word_emb_name) embedding_table = fluid.default_main_program().global_block().var(scope_name+self._word_emb_name)
position_emb_out = fluid.layers.embedding( position_emb_out = fluid.embedding(
input=pos_ids, input=pos_ids,
size=[self._max_position_seq_len, self._emb_size], size=[self._max_position_seq_len, self._emb_size],
dtype=self._emb_dtype, dtype=self._emb_dtype,
param_attr=fluid.ParamAttr( param_attr=fluid.ParamAttr(
name=scope_name+self._pos_emb_name, initializer=self._param_initializer)) name=scope_name+self._pos_emb_name, initializer=self._param_initializer))
sent_emb_out = fluid.layers.embedding( sent_emb_out = fluid.embedding(
sent_ids, sent_ids,
size=[self._sent_types, self._emb_size], size=[self._sent_types, self._emb_size],
dtype=self._emb_dtype, dtype=self._emb_dtype,
...@@ -153,3 +185,9 @@ class Model(backbone): ...@@ -153,3 +185,9 @@ class Model(backbone):
pass pass
class Model(BERT):
"""BERT wrapper for ConfigController"""
def __init__(self, config, phase):
BERT.from_config(config, phase=phase)
...@@ -24,32 +24,29 @@ from paddle import fluid ...@@ -24,32 +24,29 @@ from paddle import fluid
from paddle.fluid import layers from paddle.fluid import layers
from paddlepalm.backbone.utils.transformer import pre_process_layer, encoder from paddlepalm.backbone.utils.transformer import pre_process_layer, encoder
from paddlepalm.interface import backbone from paddlepalm.backbone.base_backbone import BaseBackbone
class Model(backbone): class ERNIE(BaseBackbone):
def __init__(self, def __init__(self, hidden_size, num_hidden_layers, num_attention_heads, vocab_size, \
config, max_position_embeddings, sent_type_vocab_size, task_type_vocab_size, \
phase): hidden_act, hidden_dropout_prob, attention_probs_dropout_prob, initializer_range, phase='train'):
# self._is_training = phase == 'train' # backbone一般不用关心运行阶段,因为outputs在任何阶段基本不会变 # self._is_training = phase == 'train' # backbone一般不用关心运行阶段,因为outputs在任何阶段基本不会变
self._emb_size = config['hidden_size'] self._emb_size = hidden_size
self._n_layer = config['num_hidden_layers'] self._n_layer = num_hidden_layers
self._n_head = config['num_attention_heads'] self._n_head = num_attention_heads
self._voc_size = config['vocab_size'] self._voc_size = vocab_size
self._max_position_seq_len = config['max_position_embeddings'] self._max_position_seq_len = max_position_embeddings
if config['sent_type_vocab_size']: self._sent_types = sent_type_vocab_size
self._sent_types = config['sent_type_vocab_size']
else:
self._sent_types = config['type_vocab_size']
self._task_types = config['task_type_vocab_size'] self._task_types = task_type_vocab_size
self._hidden_act = config['hidden_act'] self._hidden_act = hidden_act
self._prepostprocess_dropout = config['hidden_dropout_prob'] self._prepostprocess_dropout = hidden_dropout_prob
self._attention_dropout = config['attention_probs_dropout_prob'] self._attention_dropout = attention_probs_dropout_prob
self._word_emb_name = "word_embedding" self._word_emb_name = "word_embedding"
self._pos_emb_name = "pos_embedding" self._pos_emb_name = "pos_embedding"
...@@ -58,15 +55,48 @@ class Model(backbone): ...@@ -58,15 +55,48 @@ class Model(backbone):
self._emb_dtype = "float32" self._emb_dtype = "float32"
self._param_initializer = fluid.initializer.TruncatedNormal( self._param_initializer = fluid.initializer.TruncatedNormal(
scale=config['initializer_range']) scale=initializer_range)
@classmethod
def from_config(cls, config, phase='train'):
assert 'hidden_size' in config, "{} is required to initialize ERNIE".format('hidden_size')
assert 'num_hidden_layers' in config, "{} is required to initialize ERNIE".format('num_hidden_layers')
assert 'num_attention_heads' in config, "{} is required to initialize ERNIE".format('num_attention_heads')
assert 'vocab_size' in config, "{} is required to initialize ERNIE".format('vocab_size')
assert 'max_position_embeddings' in config, "{} is required to initialize ERNIE".format('max_position_embeddings')
assert 'sent_type_vocab_size' in config or 'type_vocab_size' in config, "{} is required to initialize ERNIE".format('sent_type_vocab_size')
assert 'task_type_vocab_size' in config, "{} is required to initialize ERNIE".format('task_type_vocab_size')
assert 'hidden_act' in config, "{} is required to initialize ERNIE".format('hidden_act')
assert 'hidden_dropout_prob' in config, "{} is required to initialize ERNIE".format('hidden_dropout_prob')
assert 'attention_probs_dropout_prob' in config, "{} is required to initialize ERNIE".format('attention_probs_dropout_prob')
assert 'initializer_range' in config, "{} is required to initialize ERNIE".format('initializer_range')
hidden_size = config['hidden_size']
num_hidden_layers = config['num_hidden_layers']
num_attention_heads = config['num_attention_heads']
vocab_size = config['vocab_size']
max_position_embeddings = config['max_position_embeddings']
if 'sent_type_vocab_size' in config:
sent_type_vocab_size = config['sent_type_vocab_size']
else:
sent_type_vocab_size = config['type_vocab_size']
task_type_vocab_size = config['task_type_vocab_size']
hidden_act = config['hidden_act']
hidden_dropout_prob = config['hidden_dropout_prob']
attention_probs_dropout_prob = config['attention_probs_dropout_prob']
initializer_range = config['initializer_range']
return cls(hidden_size, num_hidden_layers, num_attention_heads, vocab_size, \
max_position_embeddings, sent_type_vocab_size, task_type_vocab_size, \
hidden_act, hidden_dropout_prob, attention_probs_dropout_prob, initializer_range, phase=phase)
@property @property
def inputs_attr(self): def inputs_attr(self):
return {"token_ids": [[-1, -1, 1], 'int64'], return {"token_ids": [[-1, -1], 'int64'],
"position_ids": [[-1, -1, 1], 'int64'], "position_ids": [[-1, -1], 'int64'],
"segment_ids": [[-1, -1, 1], 'int64'], "segment_ids": [[-1, -1], 'int64'],
"input_mask": [[-1, -1, 1], 'float32'], "input_mask": [[-1, -1, 1], 'float32'],
"task_ids": [[-1,-1, 1], 'int64']} "task_ids": [[-1,-1], 'int64']}
@property @property
def outputs_attr(self): def outputs_attr(self):
...@@ -85,7 +115,7 @@ class Model(backbone): ...@@ -85,7 +115,7 @@ class Model(backbone):
task_ids = inputs['task_ids'] task_ids = inputs['task_ids']
# padding id in vocabulary must be set to 0 # padding id in vocabulary must be set to 0
emb_out = fluid.layers.embedding( emb_out = fluid.embedding(
input=src_ids, input=src_ids,
size=[self._voc_size, self._emb_size], size=[self._voc_size, self._emb_size],
dtype=self._emb_dtype, dtype=self._emb_dtype,
...@@ -96,14 +126,14 @@ class Model(backbone): ...@@ -96,14 +126,14 @@ class Model(backbone):
# fluid.global_scope().find_var('backbone-word_embedding').get_tensor() # fluid.global_scope().find_var('backbone-word_embedding').get_tensor()
embedding_table = fluid.default_main_program().global_block().var(scope_name+self._word_emb_name) embedding_table = fluid.default_main_program().global_block().var(scope_name+self._word_emb_name)
position_emb_out = fluid.layers.embedding( position_emb_out = fluid.embedding(
input=pos_ids, input=pos_ids,
size=[self._max_position_seq_len, self._emb_size], size=[self._max_position_seq_len, self._emb_size],
dtype=self._emb_dtype, dtype=self._emb_dtype,
param_attr=fluid.ParamAttr( param_attr=fluid.ParamAttr(
name=scope_name+self._pos_emb_name, initializer=self._param_initializer)) name=scope_name+self._pos_emb_name, initializer=self._param_initializer))
sent_emb_out = fluid.layers.embedding( sent_emb_out = fluid.embedding(
sent_ids, sent_ids,
size=[self._sent_types, self._emb_size], size=[self._sent_types, self._emb_size],
dtype=self._emb_dtype, dtype=self._emb_dtype,
...@@ -113,7 +143,7 @@ class Model(backbone): ...@@ -113,7 +143,7 @@ class Model(backbone):
emb_out = emb_out + position_emb_out emb_out = emb_out + position_emb_out
emb_out = emb_out + sent_emb_out emb_out = emb_out + sent_emb_out
task_emb_out = fluid.layers.embedding( task_emb_out = fluid.embedding(
task_ids, task_ids,
size=[self._task_types, self._emb_size], size=[self._task_types, self._emb_size],
dtype=self._emb_dtype, dtype=self._emb_dtype,
...@@ -173,3 +203,12 @@ class Model(backbone): ...@@ -173,3 +203,12 @@ class Model(backbone):
def postprocess(self, rt_outputs): def postprocess(self, rt_outputs):
pass pass
class Model(ERNIE):
def __init__(self, config, phase):
ERNIE.from_config(config, phase=phase)
from conf_controller import ConfigController
from controller import Controller
此差异已折叠。
此差异已折叠。
from paddle import fluid
import os
import multiprocessing
gpu_dev_count = int(fluid.core.get_cuda_device_count())
cpu_dev_count = int(os.environ.get('CPU_NUM', multiprocessing.cpu_count()))
from reader import yield_pieces, data_feeder
from . import gpu_dev_count, cpu_dev_count
import Queue
from threading import Thread
dev_count = gpu_dev_count if gpu_dev_count > 0 else cpu_dev_count
def yield_pieces(data, distribute_strategy, batch_size):
"""
Args:
distribute_strategy: support s=split, c=copy, u=unstack,
"""
assert batch_size % dev_count == 0, "batch_size need to be integer times larger than dev_count."
print('data in yield pieces')
print(len(data))
assert type(data) == type(distribute_strategy), [type(data), type(distribute_strategy)]
assert len(data) == len(distribute_strategy), [len(data), len(distribute_strategy)]
if isinstance(data, dict):
keys = list(data.keys())
data_list = [data[i] for i in keys]
ds_list = [distribute_strategy[i] for i in keys]
else:
assert isinstance(data, list), "the input data must be a list or dict, and contained with multiple tensors."
data_list = data
ds_list = distribute_strategy
stride = batch_size // dev_count
p = stride
# while p < len(data_list) + stride:
while p <= batch_size:
temp = []
for d, s in zip(data_list, ds_list):
s = s.strip().lower()
if s == 's' or s == 'split':
if p - stride >= len(d):
print('WARNING: no more examples to feed empty devices')
temp = []
return
temp.append(d[p-stride:p])
elif s == 'u' or s == 'unstack':
assert len(d) <= dev_count, 'Tensor size on dim 0 must be less equal to dev_count when unstack is applied.'
if p//stride > len(d):
print('WARNING: no more examples to feed empty devices')
return
temp.append(d[p//stride-1])
elif s == 'c' or s == 'copy':
temp.append(d)
else:
raise NotImplementedError()
p += stride
if type(data) == dict:
yield dict(zip(*[keys, temp]))
else:
print('yielded pieces')
print(len(temp))
yield temp
def data_feeder(reader, postprocess_fn=None, prefetch_steps=2):
if postprocess_fn is None:
def postprocess_fn(batch):
return batch
def worker(reader, dev_count, queue):
dev_batches = []
for index, data in enumerate(reader()):
if len(dev_batches) < dev_count:
dev_batches.append(data)
if len(dev_batches) == dev_count:
queue.put((dev_batches, 0))
dev_batches = []
# For the prediction of the remained batches, pad more batches to
# the number of devices and the padded samples would be removed in
# prediction outputs.
if len(dev_batches) > 0:
num_pad = dev_count - len(dev_batches)
for i in range(len(dev_batches), dev_count):
dev_batches.append(dev_batches[-1])
queue.put((dev_batches, num_pad))
queue.put(None)
queue = Queue.Queue(dev_count*prefetch_steps)
p = Thread(
target=worker, args=(reader, dev_count, queue))
p.daemon = True
p.start()
while True:
ret = queue.get()
queue.task_done()
if ret is not None:
batches, num_pad = ret
batch_buf = []
flag_buf = []
for idx, batch in enumerate(batches):
# flag = num_pad == 0
flag = idx-len(batches) < -num_pad
# if num_pad > 0:
# num_pad -= 1
batch = postprocess_fn(batch)
batch_buf.append(batch)
flag_buf.append(flag)
yield batch_buf, flag_buf
else:
break
queue.join()
from _downloader import *
from cls import Classify
# from match import Match
# from mrc import MRC
# from mlm import MaskLM
# -*- coding: UTF-8 -*-
# Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
class BaseHead(object):
def __init__(self, config, phase, backbone_config):
"""
config: dict类型。描述了 任务实例(task instance)+多任务配置文件 中定义超参数
phase: str类型。运行阶段,目前支持train和predict
"""
self._stop_gradient = {}
self._prog = None
@property
def inputs_attrs(self):
"""描述task_layer需要从reader, backbone等输入对象集合所读取到的输入对象的属性,第一级key为对象集和的名字,如backbone,reader等(后续会支持更灵活的输入),第二级key为对象集和中各对象的属性,包括对象的名字,shape和dtype。当某个对象为标量数据类型(如str, int, float等)时,shape设置为空列表[],当某个对象的某个维度长度可变时,shape中的相应维度设置为-1。
Return:
dict类型。对各个对象集及其输入对象的属性描述。"""
raise NotImplementedError()
@property
def outputs_attr(self):
"""描述task输出对象的属性,包括对象的名字,shape和dtype。输出对象会被加入到fetch_list中,从而在每个训练/推理step时得到runtime的计算结果,该计算结果会被传入postprocess方法中供用户处理。
当某个对象为标量数据类型(如str, int, float等)时,shape设置为空列表[],当某个对象的某个维度长度可变时,shape中的相应维度设置为-1。
Return:
dict类型。对各个输入对象的属性描述。注意,训练阶段必须包含名为loss的输出对象。
"""
raise NotImplementedError()
@property
def epoch_inputs_attrs(self):
return {}
# def stop_gradient(source, inputs):
# # if self._inputs is None:
# # raise Exception('You need to build this head first before stop gradient.')
# self._inputs = inputs
# for name, var in self._inputs[source].items():
# # cur_block = self._prog.current_block()
# var = fluid.layers.assign(var)
# var.stop_gradient = True
# self._inputs[name] = var
# return self._inputs
def build(self, inputs, scope_name=""):
"""建立task_layer的计算图。将符合inputs_attrs描述的来自各个对象集的静态图Variables映射成符合outputs_attr描述的静态图Variable输出。
Args:
inputs: dict类型。字典中包含inputs_attrs中的对象名到计算图Variable的映射,inputs中至少会包含inputs_attr中定义的对象
Return:
需要输出的计算图变量,输出对象会被加入到fetch_list中,从而在每个训练/推理step时得到runtime的计算结果,该计算结果会被传入postprocess方法中供用户处理。
"""
raise NotImplementedError()
def postprocess(self, rt_outputs):
"""每个训练或推理step后针对当前batch的task_layer的runtime计算结果进行相关后处理。注意,rt_outputs除了包含build方法,还自动包含了loss的计算结果。"""
pass
def epoch_postprocess(self, post_inputs):
pass
# -*- coding: UTF-8 -*-
# Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
import paddle.fluid as fluid
from paddle.fluid import layers
from paddlepalm.head.base_head import BaseHead
import numpy as np
import os
class Classify(BaseHead):
"""
classification
"""
# def __init__(self, config, phase, backbone_config=None):
def __init__(self, num_classes, input_dim, dropout_prob=0.0, \
param_initializer_range=0.02, phase='train'):
self._is_training = phase == 'train'
self._hidden_size = input_dim
self.num_classes = num_classes
self._dropout_prob = dropout_prob if phase == 'train' else 0.0
self._param_initializer = fluid.initializer.TruncatedNormal(
scale=param_initializer_range)
self._preds = []
@property
def inputs_attrs(self):
reader = {}
bb = {"sentence_embedding": [[-1, self._hidden_size], 'float32']}
if self._is_training:
reader["label_ids"] = [[-1], 'int64']
return {'reader': reader, 'backbone': bb}
@property
def outputs_attrs(self):
if self._is_training:
return {'loss': [[1], 'float32']}
else:
return {'logits': [[-1, self.num_classes], 'float32']}
def build(self, inputs, scope_name=''):
sent_emb = inputs['backbone']['sentence_embedding']
if self._is_training:
label_ids = inputs['reader']['label_ids']
cls_feats = fluid.layers.dropout(
x=sent_emb,
dropout_prob=self._dropout_prob,
dropout_implementation="upscale_in_train")
logits = fluid.layers.fc(
input=sent_emb,
size=self.num_classes,
param_attr=fluid.ParamAttr(
name=scope_name+"cls_out_w",
initializer=self._param_initializer),
bias_attr=fluid.ParamAttr(
name=scope_name+"cls_out_b", initializer=fluid.initializer.Constant(0.)))
if self._is_training:
inputs = fluid.layers.softmax(logits)
loss = fluid.layers.cross_entropy(
input=inputs, label=label_ids)
loss = layers.mean(loss)
return {"loss": loss}
else:
return {"logits":logits}
def batch_postprocess(self, rt_outputs):
if not self._is_training:
logits = rt_outputs['logits']
preds = np.argmax(logits, -1)
self._preds.extend(preds.tolist())
return preds
def epoch_postprocess(self, post_inputs):
# there is no post_inputs needed and not declared in epoch_inputs_attrs, hence no elements exist in post_inputs
if not self._is_training:
if self._pred_output_path is None:
raise ValueError('argument pred_output_path not found in config. Please add it into config dict/file.')
with open(os.path.join(self._pred_output_path, 'predictions.json'), 'w') as writer:
for p in self._preds:
writer.write(str(p)+'\n')
print('Predictions saved at '+os.path.join(self._pred_output_path, 'predictions.json'))
from slanted_triangular_schedualer import TriangularSchedualer
from warmup_schedualer import WarmupSchedualer
# scheduled_lr = fluid.layers.learning_rate_scheduler\
# .noam_decay(1/(warmup_steps *(config['learning_rate'] ** 2)),
# warmup_steps)
class BaseSchedualer():
def __init__(self):
self._prog = None
def _set_prog(self, prog):
self._prog = prog
def build(self, learning_rate):
raise NotImplementedError()
from paddlepalm.lr_sched.schedualer import BaseSchedualer
from paddle import fluid
class TriangularSchedualer(BaseSchedualer):
""" Applies linear warmup of learning rate from 0 to learning_rate until warmup_steps, and then decay to 0 linearly until num_train_steps."""
def __init__(self, warmup_steps, num_train_steps):
BaseSchedualer.__init__(self)
assert num_train_steps > warmup_steps > 0
self.warmup_steps = warmup_steps
self.num_train_steps = num_train_steps
def build(self, learning_rate):
with self._prog._lr_schedule_guard():
lr = fluid.layers.tensor.create_global_var(
shape=[1],
value=0.0,
dtype='float32',
persistable=True,
name="scheduled_learning_rate")
global_step = fluid.layers.learning_rate_scheduler._decay_step_counter()
with fluid.layers.control_flow.Switch() as switch:
with switch.case(global_step < self.warmup_steps):
warmup_lr = learning_rate * (global_step / self.warmup_steps)
fluid.layers.tensor.assign(warmup_lr, lr)
with switch.default():
decayed_lr = fluid.layers.learning_rate_scheduler.polynomial_decay(
learning_rate=learning_rate,
decay_steps=self.num_train_steps,
end_learning_rate=0.0,
power=1.0,
cycle=False)
fluid.layers.tensor.assign(decayed_lr, lr)
return lr
from paddlepalm.lr_sched.schedualer import BaseSchedualer
def WarmupSchedualer(BaseSchedualer):
""" Applies linear warmup of learning rate from 0 to learning_rate until warmup_steps, and then decay to 0 linearly until num_train_steps."""
def __init__(self, warmup_steps):
schedualer.__init__(self)
self.warmup_steps = warmup_steps
def build(self, learning_rate):
with self._prog._lr_schedule_guard():
lr = fluid.layers.tensor.create_global_var(
shape=[1],
value=0.0,
dtype='float32',
persistable=True,
name="scheduled_learning_rate")
global_step = fluid.layers.learning_rate_scheduler._decay_step_counter()
with fluid.layers.control_flow.Switch() as switch:
with switch.case(global_step < self.warmup_steps):
warmup_lr = learning_rate * (global_step / self.warmup_steps)
fluid.layers.tensor.assign(warmup_lr, lr)
with switch.default():
fluid.layers.tensor.assign(learning_rate, lr)
return lr
...@@ -31,9 +31,11 @@ from paddlepalm.utils.saver import init_pretraining_params, init_checkpoint ...@@ -31,9 +31,11 @@ from paddlepalm.utils.saver import init_pretraining_params, init_checkpoint
from paddlepalm.utils.config_helper import PDConfig from paddlepalm.utils.config_helper import PDConfig
from paddlepalm.utils.print_helper import print_dict from paddlepalm.utils.print_helper import print_dict
from paddlepalm.utils.reader_helper import create_net_inputs, create_iterator_fn, create_joint_iterator_fn, merge_input_attrs from paddlepalm.utils.reader_helper import create_net_inputs, create_iterator_fn, create_joint_iterator_fn, merge_input_attrs
from paddlepalm.distribute import data_feeder
from default_settings import *
from task_instance import TaskInstance, check_instances
from paddlepalm.default_settings import *
from paddlepalm.task_instance import TaskInstance, check_instances
DEBUG=False DEBUG=False
VERBOSE=0 VERBOSE=0
...@@ -182,6 +184,20 @@ def _fit_attr(conf, fit_attr, strict=False): ...@@ -182,6 +184,20 @@ def _fit_attr(conf, fit_attr, strict=False):
return conf return conf
def create_feed_batch_process_fn(net_inputs):
def feed_batch_process_fn(data):
temp = {}
for q, var in net_inputs.items():
if isinstance(var, str) or isinstance(var, unicode):
temp[var] = data[q]
else:
temp[var.name] = data[q]
return temp
return feed_batch_process_fn
class Controller(object): class Controller(object):
def __init__(self, config, task_dir='.', for_train=True): def __init__(self, config, task_dir='.', for_train=True):
...@@ -234,7 +250,7 @@ class Controller(object): ...@@ -234,7 +250,7 @@ class Controller(object):
bb_conf = _merge_conf(mtl_conf, bb_conf) bb_conf = _merge_conf(mtl_conf, bb_conf)
else: else:
bb_conf = mtl_conf bb_conf = mtl_conf
print_dict(bb_conf, title='backbone configuration'.format(instname)) print_dict(bb_conf, title = 'backbone configuration'.format(instname))
bb_name = mtl_conf['backbone'] bb_name = mtl_conf['backbone']
bb_mod = importlib.import_module(BACKBONE_DIR + '.' + bb_name) bb_mod = importlib.import_module(BACKBONE_DIR + '.' + bb_name)
...@@ -338,6 +354,7 @@ class Controller(object): ...@@ -338,6 +354,7 @@ class Controller(object):
main_conf = main_inst.config main_conf = main_inst.config
if not os.path.exists(main_conf['save_path']): if not os.path.exists(main_conf['save_path']):
os.makedirs(main_conf['save_path']) os.makedirs(main_conf['save_path'])
os.makedirs(os.path.join(main_conf['save_path'], 'ckpt'))
# prepare backbone # prepare backbone
train_backbone = Backbone(bb_conf, phase='train') train_backbone = Backbone(bb_conf, phase='train')
...@@ -398,11 +415,14 @@ class Controller(object): ...@@ -398,11 +415,14 @@ class Controller(object):
prefixes.append(inst.name) prefixes.append(inst.name)
mrs.append(inst.mix_ratio) mrs.append(inst.mix_ratio)
joint_iterator_fn = create_joint_iterator_fn(iterators, prefixes, joint_shape_and_dtypes, mrs, name_to_position, dev_count=dev_count, verbose=VERBOSE) joint_iterator_fn = create_joint_iterator_fn(iterators, prefixes, joint_shape_and_dtypes, mrs, name_to_position, dev_count=dev_count, verbose=VERBOSE, return_type='dict')
self._joint_iterator_fn = joint_iterator_fn
input_attrs = [[i, j, k] for i, (j,k) in zip(joint_input_names, joint_shape_and_dtypes)] input_attrs = [[i, j, k] for i, (j,k) in zip(joint_input_names, joint_shape_and_dtypes)]
pred_input_attrs = [[i, j, k] for i, (j,k) in zip(pred_joint_input_names, pred_joint_shape_and_dtypes)] pred_input_attrs = [[i, j, k] for i, (j,k) in zip(pred_joint_input_names, pred_joint_shape_and_dtypes)]
net_inputs = create_net_inputs(input_attrs, async=True, iterator_fn=joint_iterator_fn, dev_count=dev_count, n_prefetch=3) # net_inputs = create_net_inputs(input_attrs, async=True, iterator_fn=joint_iterator_fn, dev_count=dev_count, n_prefetch=3)
net_inputs = create_net_inputs(input_attrs, async=False)
self._net_inputs = net_inputs
# build backbone and task layers # build backbone and task layers
train_prog = fluid.default_main_program() train_prog = fluid.default_main_program()
...@@ -453,7 +473,7 @@ class Controller(object): ...@@ -453,7 +473,7 @@ class Controller(object):
# compute loss # compute loss
task_id_var = net_inputs['__task_id'] task_id_var = net_inputs['__task_id']
task_id_vec = layers.one_hot(task_id_var, num_instances) task_id_vec = fluid.one_hot(task_id_var, num_instances)
losses = fluid.layers.concat([task_output_vars[inst.name+'/loss'] for inst in instances], axis=0) losses = fluid.layers.concat([task_output_vars[inst.name+'/loss'] for inst in instances], axis=0)
loss = layers.reduce_sum(task_id_vec * losses) loss = layers.reduce_sum(task_id_vec * losses)
...@@ -517,20 +537,21 @@ class Controller(object): ...@@ -517,20 +537,21 @@ class Controller(object):
insert_taskid=False, insert_batchsize=False, insert_seqlen=False, insert_batchsize_x_seqlen=False) insert_taskid=False, insert_batchsize=False, insert_seqlen=False, insert_batchsize_x_seqlen=False)
pred_prog = inst.load(infer_model_path) pred_prog = inst.load(infer_model_path)
pred_prog = fluid.CompiledProgram(pred_prog).with_data_parallel()
if inst.reader['pred'] is None: if inst.reader['pred'] is None:
pred_reader = inst.Reader(inst.config, phase='pred') pred_reader = inst.Reader(inst.config, phase='pred')
inst.reader['pred'] = pred_reader inst.reader['pred'] = pred_reader
return pred_prog return pred_prog
def load_pretrain(self, pretrain_model_path=None): def load_pretrain(self, pretrain_path=None):
# load pretrain model (or ckpt) # load pretrain model (or ckpt)
if pretrain_model_path is None: if pretrain_path is None:
assert 'pretrain_model_path' in self.main_conf, "pretrain_model_path NOT set." assert 'pretrain_path' in self.main_conf, "pretrain_path NOT set."
pretrain_model_path = self.main_conf['pretrain_model_path'] pretrain_path = self.main_conf['pretrain_path']
init_pretraining_params( init_pretraining_params(
self.exe, self.exe,
pretrain_model_path, pretrain_path,
main_program=fluid.default_startup_program()) main_program=fluid.default_startup_program())
...@@ -575,8 +596,18 @@ class Controller(object): ...@@ -575,8 +596,18 @@ class Controller(object):
epoch = 0 epoch = 0
time_begin = time.time() time_begin = time.time()
backbone_buffer = [] backbone_buffer = []
feed_batch_process_fn = create_feed_batch_process_fn(self._net_inputs)
distribute_feeder = data_feeder(self._joint_iterator_fn, feed_batch_process_fn)
# palm.distribute.reader(self._joint_iterator_fn, self._net_inputs, prefetch_steps=2)
while not train_finish(): while not train_finish():
rt_outputs = self.exe.run(train_program, fetch_list=fetch_list) feed, mask = next(distribute_feeder)
rt_outputs = self.exe.run(train_program, feed=feed, fetch_list=fetch_list)
while mask.pop() == False:
rt_outputs.pop()
rt_outputs = {k:v for k,v in zip(fetch_names, rt_outputs)} rt_outputs = {k:v for k,v in zip(fetch_names, rt_outputs)}
rt_task_id = np.squeeze(rt_outputs['__task_id']).tolist() rt_task_id = np.squeeze(rt_outputs['__task_id']).tolist()
rt_task_id = rt_task_id[0] if isinstance(rt_task_id, list) else rt_task_id rt_task_id = rt_task_id[0] if isinstance(rt_task_id, list) else rt_task_id
...@@ -591,8 +622,9 @@ class Controller(object): ...@@ -591,8 +622,9 @@ class Controller(object):
global_step += 1 global_step += 1
cur_task.cur_train_step += 1 cur_task.cur_train_step += 1
if cur_task.save_infermodel_every_n_steps > 0 and cur_task.cur_train_step % cur_task.save_infermodel_every_n_steps == 0: cur_task_global_step = cur_task.cur_train_step + cur_task.cur_train_epoch * cur_task.steps_pur_epoch
cur_task.save(suffix='.step'+str(cur_task.cur_train_step)) if cur_task.is_target and cur_task.save_infermodel_every_n_steps > 0 and cur_task_global_step % cur_task.save_infermodel_every_n_steps == 0:
cur_task.save(suffix='.step'+str(cur_task_global_step))
if global_step % main_conf.get('print_every_n_steps', 5) == 0: if global_step % main_conf.get('print_every_n_steps', 5) == 0:
loss = rt_outputs[cur_task.name+'/loss'] loss = rt_outputs[cur_task.name+'/loss']
...@@ -610,10 +642,16 @@ class Controller(object): ...@@ -610,10 +642,16 @@ class Controller(object):
print(cur_task.name+': train finished!') print(cur_task.name+': train finished!')
cur_task.save() cur_task.save()
if 'save_every_n_steps' in main_conf and global_step % main_conf['save_every_n_steps'] == 0: if 'save_ckpt_every_n_steps' in main_conf and global_step % main_conf['save_ckpt_every_n_steps'] == 0:
save_path = os.path.join(main_conf['save_path'], save_path = os.path.join(main_conf['save_path'], 'ckpt',
"step_" + str(global_step)) "step_" + str(global_step))
fluid.io.save_persistables(self.exe, save_path, saver_program) fluid.io.save_persistables(self.exe, save_path, saver_program)
print('checkpoint has been saved at '+save_path)
save_path = os.path.join(main_conf['save_path'], 'ckpt',
"step_" + str(global_step))
fluid.io.save_persistables(self.exe, save_path, saver_program)
print('checkpoint has been saved at '+save_path)
print("ALL tasks train finished, exiting...") print("ALL tasks train finished, exiting...")
...@@ -647,19 +685,38 @@ class Controller(object): ...@@ -647,19 +685,38 @@ class Controller(object):
fetch_names, fetch_vars = inst.pred_fetch_list fetch_names, fetch_vars = inst.pred_fetch_list
print('predicting...') print('predicting...')
mapper = {k:v for k,v in inst.pred_input} feed_batch_process_fn = create_feed_batch_process_fn(inst.pred_input)
buf = [] distribute_feeder = data_feeder(inst.reader['pred'].iterator, feed_batch_process_fn, prefetch_steps=1)
for feed in inst.reader['pred'].iterator():
feed = _encode_inputs(feed, inst.name, cand_set=mapper)
feed = {mapper[k]: v for k,v in feed.items()}
buf = []
for feed, mask in distribute_feeder:
print('before run')
rt_outputs = self.exe.run(pred_prog, feed, fetch_vars) rt_outputs = self.exe.run(pred_prog, feed, fetch_vars)
print('after run')
splited_rt_outputs = []
for item in rt_outputs:
splited_rt_outputs.append(np.split(item, len(mask)))
# assert len(rt_outputs) == len(mask), [len(rt_outputs), len(mask)]
print(mask)
while mask.pop() == False:
print(mask)
for item in splited_rt_outputs:
item.pop()
rt_outputs = []
print('cancat')
for item in splited_rt_outputs:
rt_outputs.append(np.concatenate(item))
rt_outputs = {k:v for k,v in zip(fetch_names, rt_outputs)} rt_outputs = {k:v for k,v in zip(fetch_names, rt_outputs)}
inst.postprocess(rt_outputs, phase='pred') inst.postprocess(rt_outputs, phase='pred')
print('leave feeder')
if inst.task_layer['pred'].epoch_inputs_attrs: if inst.task_layer['pred'].epoch_inputs_attrs:
reader_outputs = inst.reader['pred'].get_epoch_outputs() reader_outputs = inst.reader['pred'].get_epoch_outputs()
else: else:
reader_outputs = None reader_outputs = None
print('epoch postprocess')
inst.epoch_postprocess({'reader':reader_outputs}, phase='pred') inst.epoch_postprocess({'reader':reader_outputs}, phase='pred')
...@@ -673,6 +730,7 @@ if __name__ == '__main__': ...@@ -673,6 +730,7 @@ if __name__ == '__main__':
__all__ = ["Controller"]
...@@ -20,84 +20,36 @@ from __future__ import print_function ...@@ -20,84 +20,36 @@ from __future__ import print_function
import numpy as np import numpy as np
import paddle.fluid as fluid import paddle.fluid as fluid
from paddlepalm.optimizer.base_optimizer import BaseOptimizer
def linear_warmup_decay(learning_rate, warmup_steps, num_train_steps): class Adam(BaseOptimizer):
""" Applies linear warmup of learning rate from 0 and decay to 0."""
with fluid.default_main_program()._lr_schedule_guard():
lr = fluid.layers.tensor.create_global_var(
shape=[1],
value=0.0,
dtype='float32',
persistable=True,
name="scheduled_learning_rate")
global_step = fluid.layers.learning_rate_scheduler._decay_step_counter() def __init__(self, loss_var, lr, lr_schedualer=None):
with fluid.layers.control_flow.Switch() as switch: BaseOptimizer.__init__(self, loss_var, lr, lr_schedualer=None)
with switch.case(global_step < warmup_steps):
warmup_lr = learning_rate * (global_step / warmup_steps)
fluid.layers.tensor.assign(warmup_lr, lr)
with switch.default():
decayed_lr = fluid.layers.learning_rate_scheduler.polynomial_decay(
learning_rate=learning_rate,
decay_steps=num_train_steps,
end_learning_rate=0.0,
power=1.0,
cycle=False)
fluid.layers.tensor.assign(decayed_lr, lr)
return lr self._loss = loss_var
self._lr = lr
self._lr_schedualer = lr_schedualer
def build(self, grad_clip=None):
if self._lr_schedualer is not None:
self._lr = self._lr_schedualer.build(self._lr)
def optimize(loss, config, max_train_steps=None, warmup_steps=0, train_program=None): optimizer = fluid.optimizer.Adam(learning_rate=self._lr)
if warmup_steps > 0:
decay_strategy = config.get('lr_scheduler', 'linear_warmup_decay')
if decay_strategy == 'noam_decay':
scheduled_lr = fluid.layers.learning_rate_scheduler\
.noam_decay(1/(warmup_steps *(config['learning_rate'] ** 2)),
warmup_steps)
elif decay_strategy == 'linear_warmup_decay':
scheduled_lr = linear_warmup_decay(config['learning_rate'], warmup_steps,
max_train_steps)
else:
raise ValueError("Unkown lr_scheduler, should be "
"'noam_decay' or 'linear_warmup_decay'")
optimizer = fluid.optimizer.Adam(learning_rate=scheduled_lr)
else:
optimizer = fluid.optimizer.Adam(learning_rate=config['learning_rate'])
scheduled_lr = config['learning_rate']
clip_norm_thres = 1.0 if grad_clip is not None:
# When using mixed precision training, scale the gradient clip threshold clip_norm_thres = grad_clip
# by loss_scaling # When using mixed precision training, scale the gradient clip threshold
fluid.clip.set_gradient_clip( # by loss_scaling
clip=fluid.clip.GradientClipByGlobalNorm(clip_norm=clip_norm_thres)) fluid.clip.set_gradient_clip(
clip=fluid.clip.GradientClipByGlobalNorm(clip_norm=clip_norm_thres))
def exclude_from_weight_decay(name): _, param_grads = optimizer.minimize(self._loss)
if name.find("layer_norm") > -1: return param_grads
return True
bias_suffix = ["_bias", "_b", ".b_0"]
for suffix in bias_suffix:
if name.endswith(suffix):
return True
return False
param_list = dict() def get_cur_learning_rate(self):
return self._lr
for param in train_program.global_block().all_parameters():
param_list[param.name] = param * 1.0
param_list[param.name].stop_gradient = True
_, param_grads = optimizer.minimize(loss)
if config.get('weight_decay', 0) > 0:
for param, grad in param_grads:
if exclude_from_weight_decay(param.name):
continue
with param.block.program._optimized_guard(
[param, grad]), fluid.framework.name_scope("weight_decay"):
updated_param = param - param_list[
param.name] * config['weight_decay'] * scheduled_lr
fluid.layers.assign(output=param, input=updated_param)
class BaseOptimizer():
def __init__(self, loss_var, lr, lr_schedualer=None):
self._prog = None
self._lr_schedualer = lr_schedualer
def build(self, grad_clip=None):
pass
def _set_prog(self, prog):
self._prog = prog
if self._lr_schedualer is not None:
self._lr_schedualer._set_prog(prog)
def get_cur_learning_rate(self):
pass
# -*- coding: UTF-8 -*-
# Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
"""v1.1"""
from copy import copy
class BaseReader(object):
"""interface of data manager."""
def __init__(self, phase='train'):
# assert isinstance(config, dict)
# self._config = config
self._phase = phase
self._register = set()
self._registered_backbone = None
@classmethod
def create_register(self):
return set()
def clone(self, phase='train'):
if phase == self._phase:
return copy(self)
else:
ret = copy(self)
ret._phase = phase
return ret
def require_attr(self, attr_name):
self._register.add(attr_name)
def register_with(self, backbone):
print(backbone)
for attr in backbone.inputs_attr:
self.require_attr(attr)
self._registered_backbone = backbone
def get_registered_backbone(self):
return self._registered_backbone
def _get_registed_attrs(self, attrs):
ret = {}
for i in self._register:
if i not in attrs:
raise NotImplementedError('output attr {} is not found in this reader.'.format(i))
ret[i] = attrs[i]
return ret
# @property
# def inputs_attr(self):
# """描述reader输入对象的属性,包含各个对象的名字、shape以及数据类型。当某个对象为标量数据类型(如str, int, float等)时,shape设置为空列表[],当某个对象的某个维度长度可变时,shape中的相应维度设置为-1.
# Return:
# dict类型。对各个输入对象的属性描述。例如,
# 对于文本分类任务,可能需要包含输入文本和所属标签的id
# {"text": ([], 'str'),
# "label": ([], 'int')}
# 对于标注任务,可能需要输入词序列和对应的标签
# {"tokens", ([-1], 'str'),
# "tags", ([-1], 'str')}
# 对于机器阅读理解任务,可能需要包含上下文、问题、回答、答案区域的起止位置等
# {"paragraph", ([], 'str'),
# "question", ([], 'str'),
# "start_position", ([], 'int')
# """
# raise NotImplementedError()
@property
def outputs_attr(self):
"""描述reader输出对象(被yield出的对象)的属性,包含各个对象的名字、shape以及数据类型。当某个对象为标量数据类型(如str, int, float等)时,shape设置为空列表[],当某个对象的某个维度长度可变时,shape中的相应维度设置为-1。
注意:当使用mini-batch梯度下降学习策略时,,应为常规的输入对象设置batch_size维度(一般为-1)
Return:
dict类型。对各个输入对象的属性描述。例如,
对于文本分类和匹配任务,yield的输出内容可能包含如下的对象(下游backbone和task可按需访问其中的对象)
{"token_ids": ([-1, max_len], 'int64'),
"input_ids": ([-1, max_len], 'int64'),
"segment_ids": ([-1, max_len], 'int64'),
"input_mask": ([-1, max_len], 'float32'),
"label": ([-1], 'int')}
"""
raise NotImplementedError()
# def parse_line(self):
# """框架内部使用字典描述每个样本,字典的key为inputs_attr,value为每个input对应的符合attr描述的值。
# 该函数负责将文本行解析成符合inputs_attr描述的字典类型的样本。默认的parse_line方法会读取json格式的数据集文件,数据集的每一行为json格式描述的样本。
# 用户可通过对该方法的继承改写来适配不同格式的数据集,例如csv格式甚至tfrecord文件。
# """
# raise NotImplementedError()
#
# def tokenize(self, line):
# """框架中内置了word piece tokenizer等分词器,用户可通过修改tokenizer超参数来制定使用的分词器,若内置的分词器均无法满足需求,用户可通过对该方法的继承改写来自定义分词器。
# Args:
# - line: a unicode string.
# Return:
# a list of tokens
# """
# raise NotImplementedError()
def iterator(self):
"""数据集遍历接口,注意,当数据集遍历到尾部时该接口应自动完成指针重置,即重新从数据集头部开始新的遍历。
Yield:
(dict) elements that meet the requirements in output_templete
"""
raise NotImplementedError()
@property
def num_examples(self):
"""数据集中的样本数量,即每个epoch中iterator所生成的样本数。注意,使用滑动窗口等可能导致数据集样本数发生变化的策略时,该接口应返回runtime阶段的实际样本数。"""
raise NotImplementedError()
...@@ -13,87 +13,76 @@ ...@@ -13,87 +13,76 @@
# See the License for the specific language governing permissions and # See the License for the specific language governing permissions and
# limitations under the License. # limitations under the License.
from paddlepalm.interface import reader from paddlepalm.reader.base_reader import BaseReader
from paddlepalm.reader.utils.reader4ernie import ClassifyReader from paddlepalm.reader.utils.reader4ernie import ClassifyReader as CLSReader
class Reader(reader):
class ClassifyReader(BaseReader):
def __init__(self, config, phase='train', dev_count=1, print_prefix=''): def __init__(self, vocab_path, max_len, tokenizer='wordpiece', \
lang='en', seed=None, do_lower_case=False, phase='train'):
"""xxxxxx.
Argument:
- vocab_path: xxxx
-
""" """
Args:
phase: train, eval, pred
"""
self._is_training = phase == 'train' BaseReader.__init__(self, phase)
reader = ClassifyReader(config['vocab_path'], assert lang.lower() in ['en', 'cn', 'english', 'chinese'], "supported language: en (English), cn (Chinese)."
max_seq_len=config['max_seq_len'], assert phase in ['train', 'pred'], "supported phase: train, pred."
do_lower_case=config.get('do_lower_case', False),
for_cn=config.get('for_cn', False),
random_seed=config.get('seed', None))
self._reader = reader
self._dev_count = dev_count
self._batch_size = config['batch_size'] for_cn = lang.lower() == 'cn' or lang.lower() == 'chinese'
self._max_seq_len = config['max_seq_len']
self._num_classes = config['n_classes']
self._register.add('token_ids')
if phase == 'train': if phase == 'train':
self._input_file = config['train_file'] self._register.add('label_ids')
self._num_epochs = None # 防止iteartor终止
self._shuffle = config.get('shuffle', True) self._is_training = phase == 'train'
# self._shuffle_buffer = config.get('shuffle_buffer', 5000)
elif phase == 'eval': cls_reader = CLSReader(vocab_path,
self._input_file = config['dev_file'] max_seq_len=max_len,
self._num_epochs = 1 do_lower_case=do_lower_case,
self._shuffle = False for_cn=for_cn,
self._batch_size = config.get('pred_batch_size', self._batch_size) random_seed=seed)
elif phase == 'pred': self._reader = cls_reader
self._input_file = config['pred_file']
self._num_epochs = 1
self._shuffle = False
self._batch_size = config.get('pred_batch_size', self._batch_size)
self._phase = phase self._phase = phase
# self._batch_size = # self._batch_size =
self._print_first_n = config.get('print_first_n', 0) # self._print_first_n = config.get('print_first_n', 0)
@property @property
def outputs_attr(self): def outputs_attr(self):
if self._is_training: attrs = {"token_ids": [[-1, -1], 'int64'],
return {"token_ids": [[-1, -1, 1], 'int64'], "position_ids": [[-1, -1], 'int64'],
"position_ids": [[-1, -1, 1], 'int64'], "segment_ids": [[-1, -1], 'int64'],
"segment_ids": [[-1, -1, 1], 'int64'], "input_mask": [[-1, -1, 1], 'float32'],
"input_mask": [[-1, -1, 1], 'float32'], "label_ids": [[-1], 'int64'],
"label_ids": [[-1,1], 'int64'], "task_ids": [[-1, -1], 'int64']
"task_ids": [[-1, -1, 1], 'int64'] }
} return self._get_registed_attrs(attrs)
else:
return {"token_ids": [[-1, -1, 1], 'int64'],
"position_ids": [[-1, -1, 1], 'int64'], def _load_data(self, input_file, batch_size, num_epochs=None, \
"segment_ids": [[-1, -1, 1], 'int64'], file_format='csv', shuffle_train=True):
"task_ids": [[-1, -1, 1], 'int64'], self._data_generator = self._reader.data_generator(input_file, batch_size, \
"input_mask": [[-1, -1, 1], 'float32'] num_epochs, shuffle=shuffle_train if self._phase == 'train' else False, \
} phase=self._phase)
def _iterator(self):
def load_data(self):
self._data_generator = self._reader.data_generator(self._input_file, self._batch_size, self._num_epochs, dev_count=self._dev_count, shuffle=self._shuffle, phase=self._phase) names = ['token_ids', 'segment_ids', 'position_ids', 'task_ids', 'input_mask',
'label_ids', 'unique_ids']
def iterator(self):
def list_to_dict(x):
names = ['token_ids', 'segment_ids', 'position_ids', 'task_ids', 'input_mask',
'label_ids', 'unique_ids']
outputs = {n: i for n,i in zip(names, x)}
del outputs['unique_ids']
if not self._is_training:
del outputs['label_ids']
return outputs
for batch in self._data_generator(): for batch in self._data_generator():
yield list_to_dict(batch) outputs = {n: i for n,i in zip(names, batch)}
ret = {}
# TODO: move runtime shape check here
for attr in self.outputs_attr.keys():
ret[attr] = outputs[attr]
yield ret
def get_epoch_outputs(self): def get_epoch_outputs(self):
return {'examples': self._reader.get_examples(self._phase), return {'examples': self._reader.get_examples(self._phase),
...@@ -103,3 +92,4 @@ class Reader(reader): ...@@ -103,3 +92,4 @@ class Reader(reader):
def num_examples(self): def num_examples(self):
return self._reader.get_num_examples(phase=self._phase) return self._reader.get_num_examples(phase=self._phase)
...@@ -60,18 +60,18 @@ class Reader(reader): ...@@ -60,18 +60,18 @@ class Reader(reader):
@property @property
def outputs_attr(self): def outputs_attr(self):
if self._is_training: if self._is_training:
return {"token_ids": [[-1, -1, 1], 'int64'], return {"token_ids": [[-1, -1], 'int64'],
"position_ids": [[-1, -1, 1], 'int64'], "position_ids": [[-1, -1], 'int64'],
"segment_ids": [[-1, -1, 1], 'int64'], "segment_ids": [[-1, -1], 'int64'],
"input_mask": [[-1, -1, 1], 'float32'], "input_mask": [[-1, -1, 1], 'float32'],
"label_ids": [[-1,1], 'int64'], "label_ids": [[-1], 'int64'],
"task_ids": [[-1, -1, 1], 'int64'] "task_ids": [[-1, -1], 'int64']
} }
else: else:
return {"token_ids": [[-1, -1, 1], 'int64'], return {"token_ids": [[-1, -1], 'int64'],
"position_ids": [[-1, -1, 1], 'int64'], "position_ids": [[-1, -1], 'int64'],
"segment_ids": [[-1, -1, 1], 'int64'], "segment_ids": [[-1, -1], 'int64'],
"task_ids": [[-1, -1, 1], 'int64'], "task_ids": [[-1, -1], 'int64'],
"input_mask": [[-1, -1, 1], 'float32'] "input_mask": [[-1, -1, 1], 'float32']
} }
......
...@@ -60,13 +60,13 @@ class Reader(reader): ...@@ -60,13 +60,13 @@ class Reader(reader):
@property @property
def outputs_attr(self): def outputs_attr(self):
return {"token_ids": [[-1, -1, 1], 'int64'], return {"token_ids": [[-1, -1], 'int64'],
"position_ids": [[-1, -1, 1], 'int64'], "position_ids": [[-1, -1], 'int64'],
"segment_ids": [[-1, -1, 1], 'int64'], "segment_ids": [[-1, -1], 'int64'],
"input_mask": [[-1, -1, 1], 'float32'], "input_mask": [[-1, -1, 1], 'float32'],
"task_ids": [[-1, -1, 1], 'int64'], "task_ids": [[-1, -1], 'int64'],
"mask_label": [[-1, 1], 'int64'], "mask_label": [[-1], 'int64'],
"mask_pos": [[-1, 1], 'int64'], "mask_pos": [[-1], 'int64'],
} }
......
...@@ -32,6 +32,7 @@ class Reader(reader): ...@@ -32,6 +32,7 @@ class Reader(reader):
tokenizer='FullTokenizer', tokenizer='FullTokenizer',
for_cn=config.get('for_cn', False), for_cn=config.get('for_cn', False),
doc_stride=config['doc_stride'], doc_stride=config['doc_stride'],
remove_noanswer=config.get('remove_noanswer', True),
max_query_length=config['max_query_len'], max_query_length=config['max_query_len'],
random_seed=config.get('seed', None)) random_seed=config.get('seed', None))
self._reader = reader self._reader = reader
...@@ -67,21 +68,21 @@ class Reader(reader): ...@@ -67,21 +68,21 @@ class Reader(reader):
@property @property
def outputs_attr(self): def outputs_attr(self):
if self._is_training: if self._is_training:
return {"token_ids": [[-1, -1, 1], 'int64'], return {"token_ids": [[-1, -1], 'int64'],
"position_ids": [[-1, -1, 1], 'int64'], "position_ids": [[-1, -1], 'int64'],
"segment_ids": [[-1, -1, 1], 'int64'], "segment_ids": [[-1, -1], 'int64'],
"input_mask": [[-1, -1, 1], 'float32'], "input_mask": [[-1, -1, 1], 'float32'],
"start_positions": [[-1, 1], 'int64'], "start_positions": [[-1], 'int64'],
"end_positions": [[-1, 1], 'int64'], "end_positions": [[-1], 'int64'],
"task_ids": [[-1, -1, 1], 'int64'] "task_ids": [[-1, -1], 'int64']
} }
else: else:
return {"token_ids": [[-1, -1, 1], 'int64'], return {"token_ids": [[-1, -1], 'int64'],
"position_ids": [[-1, -1, 1], 'int64'], "position_ids": [[-1, -1], 'int64'],
"segment_ids": [[-1, -1, 1], 'int64'], "segment_ids": [[-1, -1], 'int64'],
"task_ids": [[-1, -1, 1], 'int64'], "task_ids": [[-1, -1], 'int64'],
"input_mask": [[-1, -1, 1], 'float32'], "input_mask": [[-1, -1, 1], 'float32'],
"unique_ids": [[-1, 1], 'int64'] "unique_ids": [[-1], 'int64']
} }
@property @property
......
...@@ -67,8 +67,8 @@ def mask(batch_tokens, total_token_num, vocab_size, CLS=1, SEP=2, MASK=3): ...@@ -67,8 +67,8 @@ def mask(batch_tokens, total_token_num, vocab_size, CLS=1, SEP=2, MASK=3):
sent[token_index] = MASK sent[token_index] = MASK
mask_flag = True mask_flag = True
mask_pos.append(sent_index * max_len + token_index) mask_pos.append(sent_index * max_len + token_index)
mask_label = np.array(mask_label).astype("int64").reshape([-1, 1]) mask_label = np.array(mask_label).astype("int64").reshape([-1])
mask_pos = np.array(mask_pos).astype("int64").reshape([-1, 1]) mask_pos = np.array(mask_pos).astype("int64").reshape([-1])
return batch_tokens, mask_label, mask_pos return batch_tokens, mask_label, mask_pos
...@@ -96,7 +96,7 @@ def prepare_batch_data(insts, ...@@ -96,7 +96,7 @@ def prepare_batch_data(insts,
# or unique id # or unique id
for i in range(3, len(insts[0]), 1): for i in range(3, len(insts[0]), 1):
labels = [inst[i] for inst in insts] labels = [inst[i] for inst in insts]
labels = np.array(labels).astype("int64").reshape([-1, 1]) labels = np.array(labels).astype("int64").reshape([-1])
labels_list.append(labels) labels_list.append(labels)
# First step: do mask without padding # First step: do mask without padding
if mask_id >= 0: if mask_id >= 0:
...@@ -154,14 +154,14 @@ def pad_batch_data(insts, ...@@ -154,14 +154,14 @@ def pad_batch_data(insts,
inst_data = np.array([ inst_data = np.array([
list(inst) + list([pad_idx] * (max_len - len(inst))) for inst in insts list(inst) + list([pad_idx] * (max_len - len(inst))) for inst in insts
]) ])
return_list += [inst_data.astype("int64").reshape([-1, max_len, 1])] return_list += [inst_data.astype("int64").reshape([-1, max_len])]
# position data # position data
if return_pos: if return_pos:
inst_pos = np.array([ inst_pos = np.array([
list(range(0, len(inst))) + [pad_idx] * (max_len - len(inst)) list(range(0, len(inst))) + [pad_idx] * (max_len - len(inst))
for inst in insts for inst in insts
]) ])
return_list += [inst_pos.astype("int64").reshape([-1, max_len, 1])] return_list += [inst_pos.astype("int64").reshape([-1, max_len])]
if return_input_mask: if return_input_mask:
# This is used to avoid attention on paddings. # This is used to avoid attention on paddings.
input_mask_data = np.array([[1] * len(inst) + [0] * input_mask_data = np.array([[1] * len(inst) + [0] *
......
...@@ -113,8 +113,8 @@ def mask(batch_tokens, ...@@ -113,8 +113,8 @@ def mask(batch_tokens,
pre_sent_len = len(sent) pre_sent_len = len(sent)
mask_label = np.array(mask_label).astype("int64").reshape([-1, 1]) mask_label = np.array(mask_label).astype("int64").reshape([-1])
mask_pos = np.array(mask_pos).astype("int64").reshape([-1, 1]) mask_pos = np.array(mask_pos).astype("int64").reshape([-1])
return batch_tokens, mask_label, mask_pos return batch_tokens, mask_label, mask_pos
...@@ -136,7 +136,7 @@ def pad_batch_data(insts, ...@@ -136,7 +136,7 @@ def pad_batch_data(insts,
inst_data = np.array( inst_data = np.array(
[inst + list([pad_idx] * (max_len - len(inst))) for inst in insts]) [inst + list([pad_idx] * (max_len - len(inst))) for inst in insts])
return_list += [inst_data.astype("int64").reshape([-1, max_len, 1])] return_list += [inst_data.astype("int64").reshape([-1, max_len])]
# position data # position data
if return_pos: if return_pos:
...@@ -145,7 +145,7 @@ def pad_batch_data(insts, ...@@ -145,7 +145,7 @@ def pad_batch_data(insts,
for inst in insts for inst in insts
]) ])
return_list += [inst_pos.astype("int64").reshape([-1, max_len, 1])] return_list += [inst_pos.astype("int64").reshape([-1, max_len])]
if return_input_mask: if return_input_mask:
# This is used to avoid attention on paddings. # This is used to avoid attention on paddings.
...@@ -165,7 +165,7 @@ def pad_batch_data(insts, ...@@ -165,7 +165,7 @@ def pad_batch_data(insts,
if return_seq_lens: if return_seq_lens:
seq_lens = np.array([len(inst) for inst in insts]) seq_lens = np.array([len(inst) for inst in insts])
return_list += [seq_lens.astype("int64").reshape([-1, 1])] return_list += [seq_lens.astype("int64").reshape([-1])]
return return_list if len(return_list) > 1 else return_list[0] return return_list if len(return_list) > 1 else return_list[0]
......
...@@ -67,8 +67,8 @@ def mask(batch_tokens, total_token_num, vocab_size, CLS=1, SEP=2, MASK=3): ...@@ -67,8 +67,8 @@ def mask(batch_tokens, total_token_num, vocab_size, CLS=1, SEP=2, MASK=3):
sent[token_index] = MASK sent[token_index] = MASK
mask_flag = True mask_flag = True
mask_pos.append(sent_index * max_len + token_index) mask_pos.append(sent_index * max_len + token_index)
mask_label = np.array(mask_label).astype("int64").reshape([-1, 1]) mask_label = np.array(mask_label).astype("int64").reshape([-1])
mask_pos = np.array(mask_pos).astype("int64").reshape([-1, 1]) mask_pos = np.array(mask_pos).astype("int64").reshape([-1])
return batch_tokens, mask_label, mask_pos return batch_tokens, mask_label, mask_pos
...@@ -147,14 +147,14 @@ def pad_batch_data(insts, ...@@ -147,14 +147,14 @@ def pad_batch_data(insts,
inst_data = np.array([ inst_data = np.array([
list(inst) + list([pad_idx] * (max_len - len(inst))) for inst in insts list(inst) + list([pad_idx] * (max_len - len(inst))) for inst in insts
]) ])
return_list += [inst_data.astype("int64").reshape([-1, max_len, 1])] return_list += [inst_data.astype("int64").reshape([-1, max_len])]
# position data # position data
if return_pos: if return_pos:
inst_pos = np.array([ inst_pos = np.array([
list(range(0, len(inst))) + [pad_idx] * (max_len - len(inst)) list(range(0, len(inst))) + [pad_idx] * (max_len - len(inst))
for inst in insts for inst in insts
]) ])
return_list += [inst_pos.astype("int64").reshape([-1, max_len, 1])] return_list += [inst_pos.astype("int64").reshape([-1, max_len])]
if return_input_mask: if return_input_mask:
# This is used to avoid attention on paddings. # This is used to avoid attention on paddings.
input_mask_data = np.array([[1] * len(inst) + [0] * input_mask_data = np.array([[1] * len(inst) + [0] *
......
...@@ -54,14 +54,14 @@ class BaseReader(object): ...@@ -54,14 +54,14 @@ class BaseReader(object):
vocab_path, vocab_path,
label_map_config=None, label_map_config=None,
max_seq_len=512, max_seq_len=512,
do_lower_case=True, do_lower_case=False,
in_tokens=False, in_tokens=False,
is_inference=False, is_inference=False,
random_seed=None, random_seed=None,
tokenizer="FullTokenizer", tokenizer="FullTokenizer",
is_classify=True, is_classify=True,
is_regression=False, is_regression=False,
for_cn=True, for_cn=False,
task_id=0): task_id=0):
self.max_seq_len = max_seq_len self.max_seq_len = max_seq_len
self.tokenizer = tokenization.FullTokenizer( self.tokenizer = tokenization.FullTokenizer(
...@@ -301,6 +301,70 @@ class BaseReader(object): ...@@ -301,6 +301,70 @@ class BaseReader(object):
return f return f
class ClassifyReader(BaseReader):
def _read_tsv(self, input_file, quotechar=None):
"""Reads a tab separated value file."""
with open(input_file, 'r', encoding='utf8') as f:
reader = csv_reader(f)
headers = next(reader)
text_indices = [
index for index, h in enumerate(headers) if h != "label"
]
Example = namedtuple('Example', headers)
examples = []
for line in reader:
for index, text in enumerate(line):
if index in text_indices:
if self.for_cn:
line[index] = text.replace(' ', '')
else:
line[index] = text
example = Example(*line)
examples.append(example)
return examples
def _pad_batch_records(self, batch_records):
batch_token_ids = [record.token_ids for record in batch_records]
batch_text_type_ids = [record.text_type_ids for record in batch_records]
batch_position_ids = [record.position_ids for record in batch_records]
if not self.is_inference:
batch_labels = [record.label_id for record in batch_records]
if self.is_classify:
batch_labels = np.array(batch_labels).astype("int64").reshape(
[-1])
elif self.is_regression:
batch_labels = np.array(batch_labels).astype("float32").reshape(
[-1])
if batch_records[0].qid:
batch_qids = [record.qid for record in batch_records]
batch_qids = np.array(batch_qids).astype("int64").reshape(
[-1])
else:
batch_qids = np.array([]).astype("int64").reshape([-1])
# padding
padded_token_ids, input_mask = pad_batch_data(
batch_token_ids, pad_idx=self.pad_id, return_input_mask=True)
padded_text_type_ids = pad_batch_data(
batch_text_type_ids, pad_idx=self.pad_id)
padded_position_ids = pad_batch_data(
batch_position_ids, pad_idx=self.pad_id)
padded_task_ids = np.ones_like(
padded_token_ids, dtype="int64") * self.task_id
return_list = [
padded_token_ids, padded_text_type_ids, padded_position_ids,
padded_task_ids, input_mask
]
if not self.is_inference:
return_list += [batch_labels, batch_qids]
return return_list
class MaskLMReader(BaseReader): class MaskLMReader(BaseReader):
def _convert_example_to_record(self, example, max_seq_length, tokenizer): def _convert_example_to_record(self, example, max_seq_length, tokenizer):
...@@ -447,70 +511,6 @@ class MaskLMReader(BaseReader): ...@@ -447,70 +511,6 @@ class MaskLMReader(BaseReader):
return wrapper return wrapper
class ClassifyReader(BaseReader):
def _read_tsv(self, input_file, quotechar=None):
"""Reads a tab separated value file."""
with open(input_file, 'r', encoding='utf8') as f:
reader = csv_reader(f)
headers = next(reader)
text_indices = [
index for index, h in enumerate(headers) if h != "label"
]
Example = namedtuple('Example', headers)
examples = []
for line in reader:
for index, text in enumerate(line):
if index in text_indices:
if self.for_cn:
line[index] = text.replace(' ', '')
else:
line[index] = text
example = Example(*line)
examples.append(example)
return examples
def _pad_batch_records(self, batch_records):
batch_token_ids = [record.token_ids for record in batch_records]
batch_text_type_ids = [record.text_type_ids for record in batch_records]
batch_position_ids = [record.position_ids for record in batch_records]
if not self.is_inference:
batch_labels = [record.label_id for record in batch_records]
if self.is_classify:
batch_labels = np.array(batch_labels).astype("int64").reshape(
[-1, 1])
elif self.is_regression:
batch_labels = np.array(batch_labels).astype("float32").reshape(
[-1, 1])
if batch_records[0].qid:
batch_qids = [record.qid for record in batch_records]
batch_qids = np.array(batch_qids).astype("int64").reshape(
[-1, 1])
else:
batch_qids = np.array([]).astype("int64").reshape([-1, 1])
# padding
padded_token_ids, input_mask = pad_batch_data(
batch_token_ids, pad_idx=self.pad_id, return_input_mask=True)
padded_text_type_ids = pad_batch_data(
batch_text_type_ids, pad_idx=self.pad_id)
padded_position_ids = pad_batch_data(
batch_position_ids, pad_idx=self.pad_id)
padded_task_ids = np.ones_like(
padded_token_ids, dtype="int64") * self.task_id
return_list = [
padded_token_ids, padded_text_type_ids, padded_position_ids,
padded_task_ids, input_mask
]
if not self.is_inference:
return_list += [batch_labels, batch_qids]
return return_list
class SequenceLabelReader(BaseReader): class SequenceLabelReader(BaseReader):
def _pad_batch_records(self, batch_records): def _pad_batch_records(self, batch_records):
batch_token_ids = [record.token_ids for record in batch_records] batch_token_ids = [record.token_ids for record in batch_records]
...@@ -908,19 +908,19 @@ class MRCReader(BaseReader): ...@@ -908,19 +908,19 @@ class MRCReader(BaseReader):
record.end_position for record in batch_records record.end_position for record in batch_records
] ]
batch_start_position = np.array(batch_start_position).astype( batch_start_position = np.array(batch_start_position).astype(
"int64").reshape([-1, 1]) "int64").reshape([-1])
batch_end_position = np.array(batch_end_position).astype( batch_end_position = np.array(batch_end_position).astype(
"int64").reshape([-1, 1]) "int64").reshape([-1])
else: else:
batch_size = len(batch_token_ids) batch_size = len(batch_token_ids)
batch_start_position = np.zeros( batch_start_position = np.zeros(
shape=[batch_size, 1], dtype="int64") shape=[batch_size], dtype="int64")
batch_end_position = np.zeros(shape=[batch_size, 1], dtype="int64") batch_end_position = np.zeros(shape=[batch_size], dtype="int64")
batch_unique_ids = [record.unique_id for record in batch_records] batch_unique_ids = [record.unique_id for record in batch_records]
batch_unique_ids = np.array(batch_unique_ids).astype("int64").reshape( batch_unique_ids = np.array(batch_unique_ids).astype("int64").reshape(
[-1, 1]) [-1])
# padding # padding
padded_token_ids, input_mask = pad_batch_data( padded_token_ids, input_mask = pad_batch_data(
......
此差异已折叠。
import basic_helper
import config_helper
# coding=utf-8
import os
import json
import yaml
from config_helper import PDConfig
from paddle import fluid
def get_basename(f):
return os.path.splitext(f)[0]
def get_suffix(f):
return os.path.splitext(f)[-1]
def parse_yaml(f, asdict=True, support_cmd_line=False):
assert os.path.exists(f), "file {} not found.".format(f)
if support_cmd_line:
args = PDConfig(yaml_file=f, fuse_args=True)
args.build()
return args.asdict() if asdict else args
else:
if asdict:
with open(f, "r") as fin:
yaml_config = yaml.load(fin, Loader=yaml.SafeLoader)
return yaml_config
else:
raise NotImplementedError()
def parse_json(f, asdict=True, support_cmd_line=False):
assert os.path.exists(f), "file {} not found.".format(f)
if support_cmd_line:
args = PDConfig(json_file=f, fuse_args=support_cmd_line)
args.build()
return args.asdict() if asdict else args
else:
if asdict:
with open(f, "r") as fin:
config = json.load(fin)
return config
else:
raise NotImplementedError()
def parse_list(string, astype=str):
assert isinstance(string, str), "{} is not a string.".format(string)
if ',' not in string:
return [astype(string)]
string = string.replace(',', ' ')
return [astype(i) for i in string.split()]
def try_float(s):
try:
float(s)
return(float(s))
except:
return s
# TODO: 增加None机制,允许hidden size、batch size和seqlen设置为None
def check_io(in_attr, out_attr, strict=False, in_name="left", out_name="right"):
for name, attr in in_attr.items():
assert name in out_attr, in_name+': '+name+' not found in '+out_name
if attr != out_attr[name]:
if strict:
raise ValueError(name+': shape or dtype not consistent!')
else:
logging.warning('{}: shape or dtype not consistent!\n{}:\n{}\n{}:\n{}'.format(name, in_name, attr, out_name, out_attr[name]))
def encode_inputs(inputs, scope_name, sep='.', cand_set=None):
outputs = {}
for k, v in inputs.items():
if cand_set is not None:
if k in cand_set:
outputs[k] = v
if scope_name+sep+k in cand_set:
outputs[scope_name+sep+k] = v
else:
outputs[scope_name+sep+k] = v
return outputs
def decode_inputs(inputs, scope_name, sep='.', keep_unk_keys=True):
outputs = {}
for name, value in inputs.items():
# var for backbone are also available to tasks
if keep_unk_keys and sep not in name:
outputs[name] = value
# var for this inst
if name.startswith(scope_name+'.'):
outputs[name[len(scope_name+'.'):]] = value
return outputs
def build_executor(on_gpu):
if on_gpu:
place = fluid.CUDAPlace(0)
# dev_count = fluid.core.get_cuda_device_count()
else:
place = fluid.CPUPlace()
# dev_count = int(os.environ.get('CPU_NUM', multiprocessing.cpu_count()))
# return fluid.Executor(place), dev_count
return fluid.Executor(place)
def fit_attr(conf, fit_attr, strict=False):
for i, attr in fit_attr.items():
if i not in conf:
if strict:
raise Exception('Argument {} is required to create a controller.'.format(i))
else:
continue
conf[i] = attr(conf[i])
return conf
此差异已折叠。
...@@ -55,7 +55,7 @@ def init_pretraining_params(exe, ...@@ -55,7 +55,7 @@ def init_pretraining_params(exe,
print("Loading pretraining parameters from {}...".format( print("Loading pretraining parameters from {}...".format(
pretraining_params_path)) pretraining_params_path))
with tarfile.open(os.path.join(pretraining_params_path, '__palmmodel__'), 'r:') as f: with tarfile.open(os.path.join(pretraining_params_path, '__palmmodel__'), 'r') as f:
f.extractall(os.path.join(pretraining_params_path, '.temp')) f.extractall(os.path.join(pretraining_params_path, '.temp'))
log_path = os.path.join(pretraining_params_path, '__palmmodel__') log_path = os.path.join(pretraining_params_path, '__palmmodel__')
......
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册