未验证 提交 39646067 编写于 作者: X Xiaoyao Xi 提交者: GitHub

Merge pull request #15 from xixiaoyao/master

fix bugs
......@@ -12,7 +12,7 @@ do_lower_case: True
max_seq_len: 512
batch_size: 4
num_epochs: 2
num_epochs: 0.5
optimizer: "adam"
learning_rate: 3e-5
warmup_proportion: 0.1
......
......@@ -398,7 +398,7 @@ class Controller(object):
# merge reader input attrs from backbone and task_instances
joint_input_names, joint_shape_and_dtypes, name_to_position = merge_input_attrs(train_backbone.inputs_attr, task_attrs)
pred_joint_input_names, pred_joint_shape_and_dtypes, _ = merge_input_attrs(pred_backbone.inputs_attr, pred_task_attrs, insert_taskid=False)
pred_joint_input_names, pred_joint_shape_and_dtypes, _ = merge_input_attrs(pred_backbone.inputs_attr, pred_task_attrs, insert_taskid=False, insert_batchsize=False, insert_seqlen=False, insert_batchsize_x_seqlen=False)
# shapes: [task_id, shapes_of_backbone, shapes_of_inst1, ..., shapes_of_instN]
if DEBUG:
......@@ -448,6 +448,7 @@ class Controller(object):
pred_net_inputs = create_net_inputs(pred_input_attrs)
# 别用unique_name.guard了,没用的,无法作用到param_attr里的name上
# with fluid.unique_name.guard("backbone-"):
pred_bb_output_vars = pred_backbone.build(pred_net_inputs, scope_name='__paddlepalm_')
fluid.framework.switch_main_program(train_prog)
......
......@@ -87,7 +87,11 @@ class TaskInstance(object):
dirpath = self._save_infermodel_path + suffix
self._pred_input_varname_list = [str(i) for i in self._pred_input_varname_list]
fluid.io.save_inference_model(dirpath, self._pred_input_varname_list, self._pred_fetch_var_list, self._exe)
# del self._pred_input_varname_list[0]
# del self._pred_input_varname_list[0]
# del self._pred_input_varname_list[0]
fluid.io.save_inference_model(dirpath, self._pred_input_varname_list, self._pred_fetch_var_list, self._exe, export_for_deployment = True)
# fluid.io.save_inference_model(dirpath, self._pred_input_varname_list, self._pred_fetch_var_list, self._exe, params_filename='__params__')
print(self._name + ': inference model saved at ' + dirpath)
......
......@@ -34,7 +34,6 @@ class TaskParadigm(task_paradigm):
def inputs_attrs(self):
reader = {
"mask_label": [[-1, 1], 'int64'],
"batchsize_x_seqlen": [[1], 'int64'],
"mask_pos": [[-1, 1], 'int64']}
if not self._is_training:
del reader['mask_label']
......@@ -56,6 +55,7 @@ class TaskParadigm(task_paradigm):
mask_label = inputs["reader"]["mask_label"]
# 多任务学习时才需要引入这个,防止其他run其他任务时导致seqlen过小,gather超范围
batchsize_x_seqlen = inputs["reader"]["batchsize_x_seqlen"]
mask_pos = inputs["reader"]["mask_pos"]
word_emb = inputs["backbone"]["embedding_table"]
enc_out = inputs["backbone"]["encoder_outputs"]
......@@ -67,7 +67,7 @@ class TaskParadigm(task_paradigm):
if self._is_training:
# 多任务训练时才需要引入这个,防止其他run其他任务时导致seqlen过小,gather超范围
#mask_pos = fluid.layers.cast(x=mask_pos, dtype='int32')
# mask_pos = fluid.layers.cast(x=mask_pos, dtype='int32')
mask_pos = fluid.layers.elementwise_min(mask_pos, batchsize_x_seqlen)
#print(fluid.default_main_program().blocks[0].vars)
......
......@@ -50,7 +50,8 @@ class TaskParadigm(task_paradigm):
def inputs_attrs(self):
if self._is_training:
reader = {"start_positions": [[-1, 1], 'int64'],
"end_positions": [[-1, 1], 'int64']}
"end_positions": [[-1, 1], 'int64'],
}
else:
reader = {'unique_ids': [[-1, 1], 'int64']}
bb = {"encoder_outputs": [[-1, -1, self._hidden_size], 'float32']}
......@@ -76,6 +77,9 @@ class TaskParadigm(task_paradigm):
if self._is_training:
start_positions = inputs['reader']['start_positions']
end_positions = inputs['reader']['end_positions']
seqlen = inputs["reader"]["seqlen"]
start_positions = fluid.layers.elementwise_min(start_positions, seqlen)
end_positions = fluid.layers.elementwise_min(end_positions, seqlen)
else:
unique_id = inputs['reader']['unique_ids']
......
......@@ -166,21 +166,21 @@ def create_joint_iterator_fn(iterators, iterator_prefixes, joint_shape_and_dtype
else:
outputs = next(iterators[id]) # dict type
# if 'token_ids' in outputs:
# val1 = len(outputs['token_ids'])
# val = _check_and_adapt_shape_dtype([val1], [[1], 'int64'])
# results[outname_to_pos['batch_size']] = val
# val2 = len(outputs['token_ids'][0])
# val = _check_and_adapt_shape_dtype([val2], [[1], 'int64'])
# results[outname_to_pos['seqlen']] = val
# val = _check_and_adapt_shape_dtype([val1*val2], [[1], 'int64'])
# results[outname_to_pos['batchsize_x_seqlen']] = val
# else:
# if not has_show_warn:
# print('WARNING: token_ids not found in current batch, failed to yield batch_size, seqlen and batchsize_x_seqlen. (This message would be shown only once.)')
# has_show_warn = True
if 'token_ids' in outputs:
val1 = len(outputs['token_ids'])
val = _check_and_adapt_shape_dtype([val1], [[1], 'int64'])
results[outname_to_pos['batch_size']] = val
val2 = len(outputs['token_ids'][0])
val = _check_and_adapt_shape_dtype([val2], [[1], 'int64'])
results[outname_to_pos['seqlen']] = val
val = _check_and_adapt_shape_dtype([val1*val2], [[1], 'int64'])
results[outname_to_pos['batchsize_x_seqlen']] = val
else:
if not has_show_warn:
print('WARNING: token_ids not found in current batch, failed to yield batch_size, seqlen and batchsize_x_seqlen. (This message would be shown only once.)')
has_show_warn = True
prefix = iterator_prefixes[id]
for outname, val in outputs.items():
......@@ -214,7 +214,7 @@ def create_joint_iterator_fn(iterators, iterator_prefixes, joint_shape_and_dtype
return iterator
def merge_input_attrs(backbone_attr, task_attrs, insert_taskid=True, insert_batchsize=False, insert_seqlen=False, insert_batchsize_x_seqlen=False):
def merge_input_attrs(backbone_attr, task_attrs, insert_taskid=True, insert_batchsize=True, insert_seqlen=True, insert_batchsize_x_seqlen=True):
"""
Args:
task_attrs(list[dict]|dict): task input attributes, key=attr_name, val=[shape, dtype], support single task and nested tasks
......@@ -242,7 +242,7 @@ def merge_input_attrs(backbone_attr, task_attrs, insert_taskid=True, insert_batc
if insert_batchsize_x_seqlen:
ret.append(([1], 'int64'))
names.append('batchsize_x_seqlen')
names.append(u'batchsize_x_seqlen')
start += 1
names += sorted(backbone_attr.keys())
......
export CUDA_VISIBLE_DEVICES=0
export CUDA_VISIBLE_DEVICES=0,1,2,3,4,5,6,7
python -u demo2.py
# GLOG_vmodule=lookup_table_op=4 python -u demo2.py > debug2.log 2>&1
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册