提交 b281d52a 编写于 作者: W wangxiao

Merge branch 'master' of https://github.com/PaddlePaddle/PALM

......@@ -33,6 +33,7 @@ ssl._create_default_https_context = ssl._create_unverified_context
_items = {
'pretrain': {'ernie-en-uncased-large': 'https://ernie.bj.bcebos.com/ERNIE_Large_en_stable-2.0.0.tar.gz',
'bert-en-uncased-large': 'https://bert-models.bj.bcebos.com/uncased_L-24_H-1024_A-16.tar.gz',
'bert-en-uncased-base': 'https://bert-models.bj.bcebos.com/uncased_L-12_H-768_A-12.tar.gz',
'utils': None},
'reader': {'utils': None},
'backbone': {'utils': None},
......@@ -90,7 +91,7 @@ def _download(item, scope, path, silent=False):
tar.extractall(path = data_dir)
tar.close()
os.remove(filename)
if scope == 'bert-en-uncased-large':
if scope.startswith('bert'):
source_path = data_dir + '/' + data_name.split('.')[0]
fileList = os.listdir(source_path)
for file in fileList:
......
......@@ -52,9 +52,9 @@ class Model(backbone):
@property
def inputs_attr(self):
return {"token_ids": [[-1, -1, 1], 'int64'],
"position_ids": [[-1, -1, 1], 'int64'],
"segment_ids": [[-1, -1, 1], 'int64'],
return {"token_ids": [[-1, -1], 'int64'],
"position_ids": [[-1, -1], 'int64'],
"segment_ids": [[-1, -1], 'int64'],
"input_mask": [[-1, -1, 1], 'float32']}
@property
......@@ -73,7 +73,7 @@ class Model(backbone):
self._emb_dtype = 'float32'
# padding id in vocabulary must be set to 0
emb_out = fluid.layers.embedding(
emb_out = fluid.embedding(
input=src_ids,
size=[self._voc_size, self._emb_size],
dtype=self._emb_dtype,
......@@ -84,14 +84,14 @@ class Model(backbone):
# fluid.global_scope().find_var('backbone-word_embedding').get_tensor()
embedding_table = fluid.default_main_program().global_block().var(scope_name+self._word_emb_name)
position_emb_out = fluid.layers.embedding(
position_emb_out = fluid.embedding(
input=pos_ids,
size=[self._max_position_seq_len, self._emb_size],
dtype=self._emb_dtype,
param_attr=fluid.ParamAttr(
name=scope_name+self._pos_emb_name, initializer=self._param_initializer))
sent_emb_out = fluid.layers.embedding(
sent_emb_out = fluid.embedding(
sent_ids,
size=[self._sent_types, self._emb_size],
dtype=self._emb_dtype,
......
......@@ -62,11 +62,11 @@ class Model(backbone):
@property
def inputs_attr(self):
return {"token_ids": [[-1, -1, 1], 'int64'],
"position_ids": [[-1, -1, 1], 'int64'],
"segment_ids": [[-1, -1, 1], 'int64'],
return {"token_ids": [[-1, -1], 'int64'],
"position_ids": [[-1, -1], 'int64'],
"segment_ids": [[-1, -1], 'int64'],
"input_mask": [[-1, -1, 1], 'float32'],
"task_ids": [[-1,-1, 1], 'int64']}
"task_ids": [[-1,-1], 'int64']}
@property
def outputs_attr(self):
......@@ -85,7 +85,7 @@ class Model(backbone):
task_ids = inputs['task_ids']
# padding id in vocabulary must be set to 0
emb_out = fluid.layers.embedding(
emb_out = fluid.embedding(
input=src_ids,
size=[self._voc_size, self._emb_size],
dtype=self._emb_dtype,
......@@ -96,14 +96,14 @@ class Model(backbone):
# fluid.global_scope().find_var('backbone-word_embedding').get_tensor()
embedding_table = fluid.default_main_program().global_block().var(scope_name+self._word_emb_name)
position_emb_out = fluid.layers.embedding(
position_emb_out = fluid.embedding(
input=pos_ids,
size=[self._max_position_seq_len, self._emb_size],
dtype=self._emb_dtype,
param_attr=fluid.ParamAttr(
name=scope_name+self._pos_emb_name, initializer=self._param_initializer))
sent_emb_out = fluid.layers.embedding(
sent_emb_out = fluid.embedding(
sent_ids,
size=[self._sent_types, self._emb_size],
dtype=self._emb_dtype,
......@@ -113,7 +113,7 @@ class Model(backbone):
emb_out = emb_out + position_emb_out
emb_out = emb_out + sent_emb_out
task_emb_out = fluid.layers.embedding(
task_emb_out = fluid.embedding(
task_ids,
size=[self._task_types, self._emb_size],
dtype=self._emb_dtype,
......
......@@ -454,7 +454,7 @@ class Controller(object):
# compute loss
task_id_var = net_inputs['__task_id']
task_id_vec = layers.one_hot(task_id_var, num_instances)
task_id_vec = fluid.one_hot(task_id_var, num_instances)
losses = fluid.layers.concat([task_output_vars[inst.name+'/loss'] for inst in instances], axis=0)
loss = layers.reduce_sum(task_id_vec * losses)
......
......@@ -62,18 +62,18 @@ class Reader(reader):
@property
def outputs_attr(self):
if self._is_training:
return {"token_ids": [[-1, -1, 1], 'int64'],
"position_ids": [[-1, -1, 1], 'int64'],
"segment_ids": [[-1, -1, 1], 'int64'],
return {"token_ids": [[-1, -1], 'int64'],
"position_ids": [[-1, -1], 'int64'],
"segment_ids": [[-1, -1], 'int64'],
"input_mask": [[-1, -1, 1], 'float32'],
"label_ids": [[-1,1], 'int64'],
"task_ids": [[-1, -1, 1], 'int64']
"label_ids": [[-1], 'int64'],
"task_ids": [[-1, -1], 'int64']
}
else:
return {"token_ids": [[-1, -1, 1], 'int64'],
"position_ids": [[-1, -1, 1], 'int64'],
"segment_ids": [[-1, -1, 1], 'int64'],
"task_ids": [[-1, -1, 1], 'int64'],
return {"token_ids": [[-1, -1], 'int64'],
"position_ids": [[-1, -1], 'int64'],
"segment_ids": [[-1, -1], 'int64'],
"task_ids": [[-1, -1], 'int64'],
"input_mask": [[-1, -1, 1], 'float32']
}
......
......@@ -60,18 +60,18 @@ class Reader(reader):
@property
def outputs_attr(self):
if self._is_training:
return {"token_ids": [[-1, -1, 1], 'int64'],
"position_ids": [[-1, -1, 1], 'int64'],
"segment_ids": [[-1, -1, 1], 'int64'],
return {"token_ids": [[-1, -1], 'int64'],
"position_ids": [[-1, -1], 'int64'],
"segment_ids": [[-1, -1], 'int64'],
"input_mask": [[-1, -1, 1], 'float32'],
"label_ids": [[-1,1], 'int64'],
"task_ids": [[-1, -1, 1], 'int64']
"label_ids": [[-1], 'int64'],
"task_ids": [[-1, -1], 'int64']
}
else:
return {"token_ids": [[-1, -1, 1], 'int64'],
"position_ids": [[-1, -1, 1], 'int64'],
"segment_ids": [[-1, -1, 1], 'int64'],
"task_ids": [[-1, -1, 1], 'int64'],
return {"token_ids": [[-1, -1], 'int64'],
"position_ids": [[-1, -1], 'int64'],
"segment_ids": [[-1, -1], 'int64'],
"task_ids": [[-1, -1], 'int64'],
"input_mask": [[-1, -1, 1], 'float32']
}
......
......@@ -60,13 +60,13 @@ class Reader(reader):
@property
def outputs_attr(self):
return {"token_ids": [[-1, -1, 1], 'int64'],
"position_ids": [[-1, -1, 1], 'int64'],
"segment_ids": [[-1, -1, 1], 'int64'],
return {"token_ids": [[-1, -1], 'int64'],
"position_ids": [[-1, -1], 'int64'],
"segment_ids": [[-1, -1], 'int64'],
"input_mask": [[-1, -1, 1], 'float32'],
"task_ids": [[-1, -1, 1], 'int64'],
"mask_label": [[-1, 1], 'int64'],
"mask_pos": [[-1, 1], 'int64'],
"task_ids": [[-1, -1], 'int64'],
"mask_label": [[-1], 'int64'],
"mask_pos": [[-1], 'int64'],
}
......
......@@ -68,21 +68,21 @@ class Reader(reader):
@property
def outputs_attr(self):
if self._is_training:
return {"token_ids": [[-1, -1, 1], 'int64'],
"position_ids": [[-1, -1, 1], 'int64'],
"segment_ids": [[-1, -1, 1], 'int64'],
return {"token_ids": [[-1, -1], 'int64'],
"position_ids": [[-1, -1], 'int64'],
"segment_ids": [[-1, -1], 'int64'],
"input_mask": [[-1, -1, 1], 'float32'],
"start_positions": [[-1, 1], 'int64'],
"end_positions": [[-1, 1], 'int64'],
"task_ids": [[-1, -1, 1], 'int64']
"start_positions": [[-1], 'int64'],
"end_positions": [[-1], 'int64'],
"task_ids": [[-1, -1], 'int64']
}
else:
return {"token_ids": [[-1, -1, 1], 'int64'],
"position_ids": [[-1, -1, 1], 'int64'],
"segment_ids": [[-1, -1, 1], 'int64'],
"task_ids": [[-1, -1, 1], 'int64'],
return {"token_ids": [[-1, -1], 'int64'],
"position_ids": [[-1, -1], 'int64'],
"segment_ids": [[-1, -1], 'int64'],
"task_ids": [[-1, -1], 'int64'],
"input_mask": [[-1, -1, 1], 'float32'],
"unique_ids": [[-1, 1], 'int64']
"unique_ids": [[-1], 'int64']
}
@property
......
......@@ -67,8 +67,8 @@ def mask(batch_tokens, total_token_num, vocab_size, CLS=1, SEP=2, MASK=3):
sent[token_index] = MASK
mask_flag = True
mask_pos.append(sent_index * max_len + token_index)
mask_label = np.array(mask_label).astype("int64").reshape([-1, 1])
mask_pos = np.array(mask_pos).astype("int64").reshape([-1, 1])
mask_label = np.array(mask_label).astype("int64").reshape([-1])
mask_pos = np.array(mask_pos).astype("int64").reshape([-1])
return batch_tokens, mask_label, mask_pos
......@@ -96,7 +96,7 @@ def prepare_batch_data(insts,
# or unique id
for i in range(3, len(insts[0]), 1):
labels = [inst[i] for inst in insts]
labels = np.array(labels).astype("int64").reshape([-1, 1])
labels = np.array(labels).astype("int64").reshape([-1])
labels_list.append(labels)
# First step: do mask without padding
if mask_id >= 0:
......@@ -154,14 +154,14 @@ def pad_batch_data(insts,
inst_data = np.array([
list(inst) + list([pad_idx] * (max_len - len(inst))) for inst in insts
])
return_list += [inst_data.astype("int64").reshape([-1, max_len, 1])]
return_list += [inst_data.astype("int64").reshape([-1, max_len])]
# position data
if return_pos:
inst_pos = np.array([
list(range(0, len(inst))) + [pad_idx] * (max_len - len(inst))
for inst in insts
])
return_list += [inst_pos.astype("int64").reshape([-1, max_len, 1])]
return_list += [inst_pos.astype("int64").reshape([-1, max_len])]
if return_input_mask:
# This is used to avoid attention on paddings.
input_mask_data = np.array([[1] * len(inst) + [0] *
......
......@@ -113,8 +113,8 @@ def mask(batch_tokens,
pre_sent_len = len(sent)
mask_label = np.array(mask_label).astype("int64").reshape([-1, 1])
mask_pos = np.array(mask_pos).astype("int64").reshape([-1, 1])
mask_label = np.array(mask_label).astype("int64").reshape([-1])
mask_pos = np.array(mask_pos).astype("int64").reshape([-1])
return batch_tokens, mask_label, mask_pos
......@@ -136,7 +136,7 @@ def pad_batch_data(insts,
inst_data = np.array(
[inst + list([pad_idx] * (max_len - len(inst))) for inst in insts])
return_list += [inst_data.astype("int64").reshape([-1, max_len, 1])]
return_list += [inst_data.astype("int64").reshape([-1, max_len])]
# position data
if return_pos:
......@@ -145,7 +145,7 @@ def pad_batch_data(insts,
for inst in insts
])
return_list += [inst_pos.astype("int64").reshape([-1, max_len, 1])]
return_list += [inst_pos.astype("int64").reshape([-1, max_len])]
if return_input_mask:
# This is used to avoid attention on paddings.
......@@ -165,7 +165,7 @@ def pad_batch_data(insts,
if return_seq_lens:
seq_lens = np.array([len(inst) for inst in insts])
return_list += [seq_lens.astype("int64").reshape([-1, 1])]
return_list += [seq_lens.astype("int64").reshape([-1])]
return return_list if len(return_list) > 1 else return_list[0]
......
......@@ -67,8 +67,8 @@ def mask(batch_tokens, total_token_num, vocab_size, CLS=1, SEP=2, MASK=3):
sent[token_index] = MASK
mask_flag = True
mask_pos.append(sent_index * max_len + token_index)
mask_label = np.array(mask_label).astype("int64").reshape([-1, 1])
mask_pos = np.array(mask_pos).astype("int64").reshape([-1, 1])
mask_label = np.array(mask_label).astype("int64").reshape([-1])
mask_pos = np.array(mask_pos).astype("int64").reshape([-1])
return batch_tokens, mask_label, mask_pos
......@@ -147,14 +147,14 @@ def pad_batch_data(insts,
inst_data = np.array([
list(inst) + list([pad_idx] * (max_len - len(inst))) for inst in insts
])
return_list += [inst_data.astype("int64").reshape([-1, max_len, 1])]
return_list += [inst_data.astype("int64").reshape([-1, max_len])]
# position data
if return_pos:
inst_pos = np.array([
list(range(0, len(inst))) + [pad_idx] * (max_len - len(inst))
for inst in insts
])
return_list += [inst_pos.astype("int64").reshape([-1, max_len, 1])]
return_list += [inst_pos.astype("int64").reshape([-1, max_len])]
if return_input_mask:
# This is used to avoid attention on paddings.
input_mask_data = np.array([[1] * len(inst) + [0] *
......
......@@ -479,17 +479,17 @@ class ClassifyReader(BaseReader):
batch_labels = [record.label_id for record in batch_records]
if self.is_classify:
batch_labels = np.array(batch_labels).astype("int64").reshape(
[-1, 1])
[-1])
elif self.is_regression:
batch_labels = np.array(batch_labels).astype("float32").reshape(
[-1, 1])
[-1])
if batch_records[0].qid:
batch_qids = [record.qid for record in batch_records]
batch_qids = np.array(batch_qids).astype("int64").reshape(
[-1, 1])
[-1])
else:
batch_qids = np.array([]).astype("int64").reshape([-1, 1])
batch_qids = np.array([]).astype("int64").reshape([-1])
# padding
padded_token_ids, input_mask = pad_batch_data(
......@@ -908,19 +908,19 @@ class MRCReader(BaseReader):
record.end_position for record in batch_records
]
batch_start_position = np.array(batch_start_position).astype(
"int64").reshape([-1, 1])
"int64").reshape([-1])
batch_end_position = np.array(batch_end_position).astype(
"int64").reshape([-1, 1])
"int64").reshape([-1])
else:
batch_size = len(batch_token_ids)
batch_start_position = np.zeros(
shape=[batch_size, 1], dtype="int64")
batch_end_position = np.zeros(shape=[batch_size, 1], dtype="int64")
shape=[batch_size], dtype="int64")
batch_end_position = np.zeros(shape=[batch_size], dtype="int64")
batch_unique_ids = [record.unique_id for record in batch_records]
batch_unique_ids = np.array(batch_unique_ids).astype("int64").reshape(
[-1, 1])
[-1])
# padding
padded_token_ids, input_mask = pad_batch_data(
......
......@@ -43,7 +43,7 @@ class TaskParadigm(task_paradigm):
@property
def inputs_attrs(self):
if self._is_training:
reader = {"label_ids": [[-1, 1], 'int64']}
reader = {"label_ids": [[-1], 'int64']}
else:
reader = {}
bb = {"sentence_embedding": [[-1, self._hidden_size], 'float32']}
......@@ -75,8 +75,9 @@ class TaskParadigm(task_paradigm):
name=scope_name+"cls_out_b", initializer=fluid.initializer.Constant(0.)))
if self._is_training:
loss = fluid.layers.softmax_with_cross_entropy(
logits=logits, label=label_ids)
inputs = fluid.layers.softmax(logits)
loss = fluid.layers.cross_entropy(
input=inputs, label=label_ids)
loss = layers.mean(loss)
return {"loss": loss}
else:
......
......@@ -44,7 +44,7 @@ class TaskParadigm(task_paradigm):
@property
def inputs_attrs(self):
if self._is_training:
reader = {"label_ids": [[-1, 1], 'int64']}
reader = {"label_ids": [[-1], 'int64']}
else:
reader = {}
bb = {"sentence_pair_embedding": [[-1, self._hidden_size], 'float32']}
......@@ -79,8 +79,9 @@ class TaskParadigm(task_paradigm):
initializer=fluid.initializer.Constant(0.)))
if self._is_training:
ce_loss, probs = fluid.layers.softmax_with_cross_entropy(
logits=logits, label=labels, return_softmax=True)
inputs = fluid.layers.softmax(logits)
ce_loss = fluid.layers.cross_entropy(
input=inputs, label=labels)
loss = fluid.layers.mean(x=ce_loss)
return {'loss': loss}
else:
......
......@@ -33,8 +33,8 @@ class TaskParadigm(task_paradigm):
@property
def inputs_attrs(self):
reader = {
"mask_label": [[-1, 1], 'int64'],
"mask_pos": [[-1, 1], 'int64']}
"mask_label": [[-1], 'int64'],
"mask_pos": [[-1], 'int64']}
if not self._is_training:
del reader['mask_label']
del reader['batchsize_x_seqlen']
......@@ -100,8 +100,9 @@ class TaskParadigm(task_paradigm):
is_bias=True)
if self._is_training:
mask_lm_loss = fluid.layers.softmax_with_cross_entropy(
logits=fc_out, label=mask_label)
inputs = fluid.layers.softmax(fc_out)
mask_lm_loss = fluid.layers.cross_entropy(
input=inputs, label=mask_label)
loss = fluid.layers.mean(mask_lm_loss)
return {'loss': loss}
else:
......
......@@ -49,11 +49,11 @@ class TaskParadigm(task_paradigm):
@property
def inputs_attrs(self):
if self._is_training:
reader = {"start_positions": [[-1, 1], 'int64'],
"end_positions": [[-1, 1], 'int64'],
reader = {"start_positions": [[-1], 'int64'],
"end_positions": [[-1], 'int64'],
}
else:
reader = {'unique_ids': [[-1, 1], 'int64']}
reader = {'unique_ids': [[-1], 'int64']}
bb = {"encoder_outputs": [[-1, -1, self._hidden_size], 'float32']}
return {'reader': reader, 'backbone': bb}
......@@ -70,7 +70,7 @@ class TaskParadigm(task_paradigm):
else:
return {'start_logits': [[-1, -1, 1], 'float32'],
'end_logits': [[-1, -1, 1], 'float32'],
'unique_ids': [[-1, 1], 'int64']}
'unique_ids': [[-1], 'int64']}
def build(self, inputs, scope_name=""):
......@@ -100,9 +100,11 @@ class TaskParadigm(task_paradigm):
start_logits, end_logits = fluid.layers.unstack(x=logits, axis=0)
def _compute_single_loss(logits, positions):
"""Compute start/end loss for mrc model"""
loss = fluid.layers.softmax_with_cross_entropy(
logits=logits, label=positions)
"""Compute start/en
d loss for mrc model"""
inputs = fluid.layers.softmax(logits)
loss = fluid.layers.cross_entropy(
input=inputs, label=positions)
loss = fluid.layers.mean(x=loss)
return loss
......@@ -120,7 +122,7 @@ class TaskParadigm(task_paradigm):
def postprocess(self, rt_outputs):
"""this func will be called after each step(batch) of training/evaluating/predicting process."""
if not self._is_training:
unique_ids = np.squeeze(rt_outputs['unique_ids'], -1)
unique_ids = rt_outputs['unique_ids']
start_logits = rt_outputs['start_logits']
end_logits = rt_outputs['end_logits']
for idx in range(len(unique_ids)):
......
......@@ -19,7 +19,6 @@ import random
import numpy as np
import paddle
from paddle import fluid
from paddle.fluid import layers
def _check_and_adapt_shape_dtype(rt_val, attr, message=""):
......@@ -65,7 +64,7 @@ def create_net_inputs(input_attrs, async=False, iterator_fn=None, dev_count=1, n
inputs = []
ret = {}
for name, shape, dtype in input_attrs:
p = layers.data(name, shape=shape, dtype=dtype)
p = fluid.data(name, shape=shape, dtype=dtype)
ret[name] = p
inputs.append(p)
......@@ -219,7 +218,7 @@ def merge_input_attrs(backbone_attr, task_attrs, insert_taskid=True, insert_batc
names = []
start = 0
if insert_taskid:
ret.append(([1,1], 'int64'))
ret.append(([1, 1], 'int64'))
names.append('__task_id')
start += 1
......
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册