diff --git a/config_demo2.yaml b/config_demo2.yaml index 7b55af515f4f80d2d667c6fb7e35a07877313fed..b09c9e1ec0ab95b47b1b706b7174ddb1fd532179 100644 --- a/config_demo2.yaml +++ b/config_demo2.yaml @@ -12,7 +12,7 @@ do_lower_case: True max_seq_len: 512 batch_size: 4 -num_epochs: 2 +num_epochs: 0.5 optimizer: "adam" learning_rate: 3e-5 warmup_proportion: 0.1 diff --git a/paddlepalm/mtl_controller.py b/paddlepalm/mtl_controller.py index bd8456aca1aaa752a1283ec3b470cb9bd0793aa1..12fbd0bf9d6279441975b4bad37f4e4a5ad2270f 100755 --- a/paddlepalm/mtl_controller.py +++ b/paddlepalm/mtl_controller.py @@ -398,7 +398,7 @@ class Controller(object): # merge reader input attrs from backbone and task_instances joint_input_names, joint_shape_and_dtypes, name_to_position = merge_input_attrs(train_backbone.inputs_attr, task_attrs) - pred_joint_input_names, pred_joint_shape_and_dtypes, _ = merge_input_attrs(pred_backbone.inputs_attr, pred_task_attrs, insert_taskid=False) + pred_joint_input_names, pred_joint_shape_and_dtypes, _ = merge_input_attrs(pred_backbone.inputs_attr, pred_task_attrs, insert_taskid=False, insert_batchsize=False, insert_seqlen=False, insert_batchsize_x_seqlen=False) # shapes: [task_id, shapes_of_backbone, shapes_of_inst1, ..., shapes_of_instN] if DEBUG: @@ -448,6 +448,7 @@ class Controller(object): pred_net_inputs = create_net_inputs(pred_input_attrs) # 别用unique_name.guard了,没用的,无法作用到param_attr里的name上 # with fluid.unique_name.guard("backbone-"): + pred_bb_output_vars = pred_backbone.build(pred_net_inputs, scope_name='__paddlepalm_') fluid.framework.switch_main_program(train_prog) diff --git a/paddlepalm/task_instance.py b/paddlepalm/task_instance.py index 36ad848705545bbbace417dbcec00844d2a3b4af..d6d1c8d777c74545138bc7c26bdc10fe399504a1 100644 --- a/paddlepalm/task_instance.py +++ b/paddlepalm/task_instance.py @@ -87,7 +87,11 @@ class TaskInstance(object): dirpath = self._save_infermodel_path + suffix self._pred_input_varname_list = [str(i) for i in self._pred_input_varname_list] - fluid.io.save_inference_model(dirpath, self._pred_input_varname_list, self._pred_fetch_var_list, self._exe) + # del self._pred_input_varname_list[0] + # del self._pred_input_varname_list[0] + # del self._pred_input_varname_list[0] + + fluid.io.save_inference_model(dirpath, self._pred_input_varname_list, self._pred_fetch_var_list, self._exe, export_for_deployment = True) # fluid.io.save_inference_model(dirpath, self._pred_input_varname_list, self._pred_fetch_var_list, self._exe, params_filename='__params__') print(self._name + ': inference model saved at ' + dirpath) diff --git a/paddlepalm/task_paradigm/mlm.py b/paddlepalm/task_paradigm/mlm.py index 08a4e42548b3c3727e8990aff82674795dddd4e3..2782236aa08d6ae045fc5d51415d948e84322ca9 100644 --- a/paddlepalm/task_paradigm/mlm.py +++ b/paddlepalm/task_paradigm/mlm.py @@ -34,7 +34,6 @@ class TaskParadigm(task_paradigm): def inputs_attrs(self): reader = { "mask_label": [[-1, 1], 'int64'], - "batchsize_x_seqlen": [[1], 'int64'], "mask_pos": [[-1, 1], 'int64']} if not self._is_training: del reader['mask_label'] @@ -56,6 +55,7 @@ class TaskParadigm(task_paradigm): mask_label = inputs["reader"]["mask_label"] # 多任务学习时才需要引入这个,防止其他run其他任务时导致seqlen过小,gather超范围 batchsize_x_seqlen = inputs["reader"]["batchsize_x_seqlen"] + mask_pos = inputs["reader"]["mask_pos"] word_emb = inputs["backbone"]["embedding_table"] enc_out = inputs["backbone"]["encoder_outputs"] @@ -67,7 +67,7 @@ class TaskParadigm(task_paradigm): if self._is_training: # 多任务训练时才需要引入这个,防止其他run其他任务时导致seqlen过小,gather超范围 - #mask_pos = fluid.layers.cast(x=mask_pos, dtype='int32') + # mask_pos = fluid.layers.cast(x=mask_pos, dtype='int32') mask_pos = fluid.layers.elementwise_min(mask_pos, batchsize_x_seqlen) #print(fluid.default_main_program().blocks[0].vars) diff --git a/paddlepalm/task_paradigm/mrc.py b/paddlepalm/task_paradigm/mrc.py index 1d3642ad87328dd3c5b874cbad4eb2662e732538..0b5142c6c0a371ef485ee98b919324b506036c8d 100644 --- a/paddlepalm/task_paradigm/mrc.py +++ b/paddlepalm/task_paradigm/mrc.py @@ -50,7 +50,8 @@ class TaskParadigm(task_paradigm): def inputs_attrs(self): if self._is_training: reader = {"start_positions": [[-1, 1], 'int64'], - "end_positions": [[-1, 1], 'int64']} + "end_positions": [[-1, 1], 'int64'], + } else: reader = {'unique_ids': [[-1, 1], 'int64']} bb = {"encoder_outputs": [[-1, -1, self._hidden_size], 'float32']} @@ -76,6 +77,9 @@ class TaskParadigm(task_paradigm): if self._is_training: start_positions = inputs['reader']['start_positions'] end_positions = inputs['reader']['end_positions'] + seqlen = inputs["reader"]["seqlen"] + start_positions = fluid.layers.elementwise_min(start_positions, seqlen) + end_positions = fluid.layers.elementwise_min(end_positions, seqlen) else: unique_id = inputs['reader']['unique_ids'] diff --git a/paddlepalm/utils/reader_helper.py b/paddlepalm/utils/reader_helper.py index 6f66c7580f8146954f831986f161e9ecc1685cd4..ef9b674d25c866ed8ab755b702875d8351cd835b 100644 --- a/paddlepalm/utils/reader_helper.py +++ b/paddlepalm/utils/reader_helper.py @@ -166,21 +166,21 @@ def create_joint_iterator_fn(iterators, iterator_prefixes, joint_shape_and_dtype else: outputs = next(iterators[id]) # dict type - # if 'token_ids' in outputs: - # val1 = len(outputs['token_ids']) - # val = _check_and_adapt_shape_dtype([val1], [[1], 'int64']) - # results[outname_to_pos['batch_size']] = val - - # val2 = len(outputs['token_ids'][0]) - # val = _check_and_adapt_shape_dtype([val2], [[1], 'int64']) - # results[outname_to_pos['seqlen']] = val - - # val = _check_and_adapt_shape_dtype([val1*val2], [[1], 'int64']) - # results[outname_to_pos['batchsize_x_seqlen']] = val - # else: - # if not has_show_warn: - # print('WARNING: token_ids not found in current batch, failed to yield batch_size, seqlen and batchsize_x_seqlen. (This message would be shown only once.)') - # has_show_warn = True + if 'token_ids' in outputs: + val1 = len(outputs['token_ids']) + val = _check_and_adapt_shape_dtype([val1], [[1], 'int64']) + results[outname_to_pos['batch_size']] = val + + val2 = len(outputs['token_ids'][0]) + val = _check_and_adapt_shape_dtype([val2], [[1], 'int64']) + results[outname_to_pos['seqlen']] = val + + val = _check_and_adapt_shape_dtype([val1*val2], [[1], 'int64']) + results[outname_to_pos['batchsize_x_seqlen']] = val + else: + if not has_show_warn: + print('WARNING: token_ids not found in current batch, failed to yield batch_size, seqlen and batchsize_x_seqlen. (This message would be shown only once.)') + has_show_warn = True prefix = iterator_prefixes[id] for outname, val in outputs.items(): @@ -214,7 +214,7 @@ def create_joint_iterator_fn(iterators, iterator_prefixes, joint_shape_and_dtype return iterator -def merge_input_attrs(backbone_attr, task_attrs, insert_taskid=True, insert_batchsize=False, insert_seqlen=False, insert_batchsize_x_seqlen=False): +def merge_input_attrs(backbone_attr, task_attrs, insert_taskid=True, insert_batchsize=True, insert_seqlen=True, insert_batchsize_x_seqlen=True): """ Args: task_attrs(list[dict]|dict): task input attributes, key=attr_name, val=[shape, dtype], support single task and nested tasks @@ -242,7 +242,7 @@ def merge_input_attrs(backbone_attr, task_attrs, insert_taskid=True, insert_batc if insert_batchsize_x_seqlen: ret.append(([1], 'int64')) - names.append('batchsize_x_seqlen') + names.append(u'batchsize_x_seqlen') start += 1 names += sorted(backbone_attr.keys()) diff --git a/run_demo2.sh b/run_demo2.sh index 02c40baa58cb9467ab2904ad8d8f32aaeced9ea2..128910ebe9b365c4dd7f8a3712093cea4b04444a 100755 --- a/run_demo2.sh +++ b/run_demo2.sh @@ -1,5 +1,4 @@ -export CUDA_VISIBLE_DEVICES=0 - +export CUDA_VISIBLE_DEVICES=0,1,2,3,4,5,6,7 python -u demo2.py # GLOG_vmodule=lookup_table_op=4 python -u demo2.py > debug2.log 2>&1