未验证 提交 39646067 编写于 作者: X Xiaoyao Xi 提交者: GitHub

Merge pull request #15 from xixiaoyao/master

fix bugs
...@@ -12,7 +12,7 @@ do_lower_case: True ...@@ -12,7 +12,7 @@ do_lower_case: True
max_seq_len: 512 max_seq_len: 512
batch_size: 4 batch_size: 4
num_epochs: 2 num_epochs: 0.5
optimizer: "adam" optimizer: "adam"
learning_rate: 3e-5 learning_rate: 3e-5
warmup_proportion: 0.1 warmup_proportion: 0.1
......
...@@ -398,7 +398,7 @@ class Controller(object): ...@@ -398,7 +398,7 @@ class Controller(object):
# merge reader input attrs from backbone and task_instances # merge reader input attrs from backbone and task_instances
joint_input_names, joint_shape_and_dtypes, name_to_position = merge_input_attrs(train_backbone.inputs_attr, task_attrs) joint_input_names, joint_shape_and_dtypes, name_to_position = merge_input_attrs(train_backbone.inputs_attr, task_attrs)
pred_joint_input_names, pred_joint_shape_and_dtypes, _ = merge_input_attrs(pred_backbone.inputs_attr, pred_task_attrs, insert_taskid=False) pred_joint_input_names, pred_joint_shape_and_dtypes, _ = merge_input_attrs(pred_backbone.inputs_attr, pred_task_attrs, insert_taskid=False, insert_batchsize=False, insert_seqlen=False, insert_batchsize_x_seqlen=False)
# shapes: [task_id, shapes_of_backbone, shapes_of_inst1, ..., shapes_of_instN] # shapes: [task_id, shapes_of_backbone, shapes_of_inst1, ..., shapes_of_instN]
if DEBUG: if DEBUG:
...@@ -448,6 +448,7 @@ class Controller(object): ...@@ -448,6 +448,7 @@ class Controller(object):
pred_net_inputs = create_net_inputs(pred_input_attrs) pred_net_inputs = create_net_inputs(pred_input_attrs)
# 别用unique_name.guard了,没用的,无法作用到param_attr里的name上 # 别用unique_name.guard了,没用的,无法作用到param_attr里的name上
# with fluid.unique_name.guard("backbone-"): # with fluid.unique_name.guard("backbone-"):
pred_bb_output_vars = pred_backbone.build(pred_net_inputs, scope_name='__paddlepalm_') pred_bb_output_vars = pred_backbone.build(pred_net_inputs, scope_name='__paddlepalm_')
fluid.framework.switch_main_program(train_prog) fluid.framework.switch_main_program(train_prog)
......
...@@ -87,7 +87,11 @@ class TaskInstance(object): ...@@ -87,7 +87,11 @@ class TaskInstance(object):
dirpath = self._save_infermodel_path + suffix dirpath = self._save_infermodel_path + suffix
self._pred_input_varname_list = [str(i) for i in self._pred_input_varname_list] self._pred_input_varname_list = [str(i) for i in self._pred_input_varname_list]
fluid.io.save_inference_model(dirpath, self._pred_input_varname_list, self._pred_fetch_var_list, self._exe) # del self._pred_input_varname_list[0]
# del self._pred_input_varname_list[0]
# del self._pred_input_varname_list[0]
fluid.io.save_inference_model(dirpath, self._pred_input_varname_list, self._pred_fetch_var_list, self._exe, export_for_deployment = True)
# fluid.io.save_inference_model(dirpath, self._pred_input_varname_list, self._pred_fetch_var_list, self._exe, params_filename='__params__') # fluid.io.save_inference_model(dirpath, self._pred_input_varname_list, self._pred_fetch_var_list, self._exe, params_filename='__params__')
print(self._name + ': inference model saved at ' + dirpath) print(self._name + ': inference model saved at ' + dirpath)
......
...@@ -34,7 +34,6 @@ class TaskParadigm(task_paradigm): ...@@ -34,7 +34,6 @@ class TaskParadigm(task_paradigm):
def inputs_attrs(self): def inputs_attrs(self):
reader = { reader = {
"mask_label": [[-1, 1], 'int64'], "mask_label": [[-1, 1], 'int64'],
"batchsize_x_seqlen": [[1], 'int64'],
"mask_pos": [[-1, 1], 'int64']} "mask_pos": [[-1, 1], 'int64']}
if not self._is_training: if not self._is_training:
del reader['mask_label'] del reader['mask_label']
...@@ -56,6 +55,7 @@ class TaskParadigm(task_paradigm): ...@@ -56,6 +55,7 @@ class TaskParadigm(task_paradigm):
mask_label = inputs["reader"]["mask_label"] mask_label = inputs["reader"]["mask_label"]
# 多任务学习时才需要引入这个,防止其他run其他任务时导致seqlen过小,gather超范围 # 多任务学习时才需要引入这个,防止其他run其他任务时导致seqlen过小,gather超范围
batchsize_x_seqlen = inputs["reader"]["batchsize_x_seqlen"] batchsize_x_seqlen = inputs["reader"]["batchsize_x_seqlen"]
mask_pos = inputs["reader"]["mask_pos"] mask_pos = inputs["reader"]["mask_pos"]
word_emb = inputs["backbone"]["embedding_table"] word_emb = inputs["backbone"]["embedding_table"]
enc_out = inputs["backbone"]["encoder_outputs"] enc_out = inputs["backbone"]["encoder_outputs"]
...@@ -67,7 +67,7 @@ class TaskParadigm(task_paradigm): ...@@ -67,7 +67,7 @@ class TaskParadigm(task_paradigm):
if self._is_training: if self._is_training:
# 多任务训练时才需要引入这个,防止其他run其他任务时导致seqlen过小,gather超范围 # 多任务训练时才需要引入这个,防止其他run其他任务时导致seqlen过小,gather超范围
#mask_pos = fluid.layers.cast(x=mask_pos, dtype='int32') # mask_pos = fluid.layers.cast(x=mask_pos, dtype='int32')
mask_pos = fluid.layers.elementwise_min(mask_pos, batchsize_x_seqlen) mask_pos = fluid.layers.elementwise_min(mask_pos, batchsize_x_seqlen)
#print(fluid.default_main_program().blocks[0].vars) #print(fluid.default_main_program().blocks[0].vars)
......
...@@ -50,7 +50,8 @@ class TaskParadigm(task_paradigm): ...@@ -50,7 +50,8 @@ class TaskParadigm(task_paradigm):
def inputs_attrs(self): def inputs_attrs(self):
if self._is_training: if self._is_training:
reader = {"start_positions": [[-1, 1], 'int64'], reader = {"start_positions": [[-1, 1], 'int64'],
"end_positions": [[-1, 1], 'int64']} "end_positions": [[-1, 1], 'int64'],
}
else: else:
reader = {'unique_ids': [[-1, 1], 'int64']} reader = {'unique_ids': [[-1, 1], 'int64']}
bb = {"encoder_outputs": [[-1, -1, self._hidden_size], 'float32']} bb = {"encoder_outputs": [[-1, -1, self._hidden_size], 'float32']}
...@@ -76,6 +77,9 @@ class TaskParadigm(task_paradigm): ...@@ -76,6 +77,9 @@ class TaskParadigm(task_paradigm):
if self._is_training: if self._is_training:
start_positions = inputs['reader']['start_positions'] start_positions = inputs['reader']['start_positions']
end_positions = inputs['reader']['end_positions'] end_positions = inputs['reader']['end_positions']
seqlen = inputs["reader"]["seqlen"]
start_positions = fluid.layers.elementwise_min(start_positions, seqlen)
end_positions = fluid.layers.elementwise_min(end_positions, seqlen)
else: else:
unique_id = inputs['reader']['unique_ids'] unique_id = inputs['reader']['unique_ids']
......
...@@ -166,21 +166,21 @@ def create_joint_iterator_fn(iterators, iterator_prefixes, joint_shape_and_dtype ...@@ -166,21 +166,21 @@ def create_joint_iterator_fn(iterators, iterator_prefixes, joint_shape_and_dtype
else: else:
outputs = next(iterators[id]) # dict type outputs = next(iterators[id]) # dict type
# if 'token_ids' in outputs: if 'token_ids' in outputs:
# val1 = len(outputs['token_ids']) val1 = len(outputs['token_ids'])
# val = _check_and_adapt_shape_dtype([val1], [[1], 'int64']) val = _check_and_adapt_shape_dtype([val1], [[1], 'int64'])
# results[outname_to_pos['batch_size']] = val results[outname_to_pos['batch_size']] = val
# val2 = len(outputs['token_ids'][0]) val2 = len(outputs['token_ids'][0])
# val = _check_and_adapt_shape_dtype([val2], [[1], 'int64']) val = _check_and_adapt_shape_dtype([val2], [[1], 'int64'])
# results[outname_to_pos['seqlen']] = val results[outname_to_pos['seqlen']] = val
# val = _check_and_adapt_shape_dtype([val1*val2], [[1], 'int64']) val = _check_and_adapt_shape_dtype([val1*val2], [[1], 'int64'])
# results[outname_to_pos['batchsize_x_seqlen']] = val results[outname_to_pos['batchsize_x_seqlen']] = val
# else: else:
# if not has_show_warn: if not has_show_warn:
# print('WARNING: token_ids not found in current batch, failed to yield batch_size, seqlen and batchsize_x_seqlen. (This message would be shown only once.)') print('WARNING: token_ids not found in current batch, failed to yield batch_size, seqlen and batchsize_x_seqlen. (This message would be shown only once.)')
# has_show_warn = True has_show_warn = True
prefix = iterator_prefixes[id] prefix = iterator_prefixes[id]
for outname, val in outputs.items(): for outname, val in outputs.items():
...@@ -214,7 +214,7 @@ def create_joint_iterator_fn(iterators, iterator_prefixes, joint_shape_and_dtype ...@@ -214,7 +214,7 @@ def create_joint_iterator_fn(iterators, iterator_prefixes, joint_shape_and_dtype
return iterator return iterator
def merge_input_attrs(backbone_attr, task_attrs, insert_taskid=True, insert_batchsize=False, insert_seqlen=False, insert_batchsize_x_seqlen=False): def merge_input_attrs(backbone_attr, task_attrs, insert_taskid=True, insert_batchsize=True, insert_seqlen=True, insert_batchsize_x_seqlen=True):
""" """
Args: Args:
task_attrs(list[dict]|dict): task input attributes, key=attr_name, val=[shape, dtype], support single task and nested tasks task_attrs(list[dict]|dict): task input attributes, key=attr_name, val=[shape, dtype], support single task and nested tasks
...@@ -242,7 +242,7 @@ def merge_input_attrs(backbone_attr, task_attrs, insert_taskid=True, insert_batc ...@@ -242,7 +242,7 @@ def merge_input_attrs(backbone_attr, task_attrs, insert_taskid=True, insert_batc
if insert_batchsize_x_seqlen: if insert_batchsize_x_seqlen:
ret.append(([1], 'int64')) ret.append(([1], 'int64'))
names.append('batchsize_x_seqlen') names.append(u'batchsize_x_seqlen')
start += 1 start += 1
names += sorted(backbone_attr.keys()) names += sorted(backbone_attr.keys())
......
export CUDA_VISIBLE_DEVICES=0 export CUDA_VISIBLE_DEVICES=0,1,2,3,4,5,6,7
python -u demo2.py python -u demo2.py
# GLOG_vmodule=lookup_table_op=4 python -u demo2.py > debug2.log 2>&1 # GLOG_vmodule=lookup_table_op=4 python -u demo2.py > debug2.log 2>&1
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册