未验证 提交 457e0078 编写于 作者: X Xiaoyao Xi 提交者: GitHub

Merge pull request #10 from xixiaoyao/master

fix bugs 
...@@ -2,3 +2,5 @@ ...@@ -2,3 +2,5 @@
__pycache__ __pycache__
pretrain_model pretrain_model
output_model output_model
mrqa_output
*.log
import paddlepalm as palm import paddlepalm as palm
if __name__ == '__main__': if __name__ == '__main__':
controller = palm.Controller('config.yaml', task_dir='task_instance') controller = palm.Controller('demo1_config.yaml', task_dir='demo1_tasks')
controller.load_pretrain('pretrain_model/ernie/params') controller.load_pretrain('pretrain_model/ernie/params')
controller.train() controller.train()
controller = palm.Controller(config='config.yaml', task_dir='task_instance', for_train=False) controller = palm.Controller(config='demo1_config.yaml', task_dir='demo1_tasks', for_train=False)
controller.pred('mrqa', inference_model_dir='output_model/firstrun/infer_model') controller.pred('mrqa', inference_model_dir='output_model/firstrun/infer_model')
task_instance: "mrqa"
target_tag: 1
mix_ratio: 1.0
save_path: "output_model/firstrun"
backbone: "ernie"
backbone_config_path: "pretrain_model/ernie/ernie_config.json"
vocab_path: "pretrain_model/ernie/vocab.txt"
do_lower_case: True
max_seq_len: 512
batch_size: 5
num_epochs: 2
optimizer: "adam"
learning_rate: 3e-5
warmup_proportion: 0.1
weight_decay: 0.1
import paddlepalm as palm
if __name__ == '__main__':
controller = palm.Controller('demo2_config.yaml', task_dir='demo2_tasks')
controller.load_pretrain('pretrain_model/ernie/params')
controller.train()
controller = palm.Controller(config='demo2_config.yaml', task_dir='demo2_tasks', for_train=False)
controller.pred('mrqa', inference_model_dir='output_model/secondrun/infer_model')
...@@ -2,7 +2,7 @@ task_instance: "mrqa, match4mrqa" ...@@ -2,7 +2,7 @@ task_instance: "mrqa, match4mrqa"
target_tag: 1, 0 target_tag: 1, 0
mix_ratio: 1.0, 0.5 mix_ratio: 1.0, 0.5
save_path: "output_model/firstrun" save_path: "output_model/secondrun"
backbone: "ernie" backbone: "ernie"
backbone_config_path: "pretrain_model/ernie/ernie_config.json" backbone_config_path: "pretrain_model/ernie/ernie_config.json"
......
train_file: data/mrqa/mrqa-combined.train.raw.json
pred_file: data/mrqa/mrqa-combined.dev.raw.json
pred_output_path: 'mrqa_output'
reader: mrc4ernie
paradigm: mrc
doc_stride: 128
max_query_len: 64
max_answer_len: 30
n_best_size: 20
null_score_diff_threshold: 0.0
verbose: False
...@@ -422,7 +422,7 @@ class Controller(object): ...@@ -422,7 +422,7 @@ class Controller(object):
prefixes.append(inst.name) prefixes.append(inst.name)
mrs.append(inst.mix_ratio) mrs.append(inst.mix_ratio)
joint_iterator_fn = create_joint_iterator_fn(iterators, prefixes, joint_shape_and_dtypes, mrs, name_to_position, dev_count=dev_count, verbose=VERBOSE) joint_iterator_fn = create_joint_iterator_fn(iterators, prefixes, joint_shape_and_dtypes, mrs, name_to_position, dev_count=dev_count, verbose=VERBOSE, batch_size=main_conf['batch_size'])
input_attrs = [[i, j, k] for i, (j,k) in zip(joint_input_names, joint_shape_and_dtypes)] input_attrs = [[i, j, k] for i, (j,k) in zip(joint_input_names, joint_shape_and_dtypes)]
pred_input_attrs = [[i, j, k] for i, (j,k) in zip(pred_joint_input_names, pred_joint_shape_and_dtypes)] pred_input_attrs = [[i, j, k] for i, (j,k) in zip(pred_joint_input_names, pred_joint_shape_and_dtypes)]
...@@ -488,10 +488,9 @@ class Controller(object): ...@@ -488,10 +488,9 @@ class Controller(object):
bb_fetches = {k: v.name for k,v in bb_output_vars.items()} bb_fetches = {k: v.name for k,v in bb_output_vars.items()}
task_fetches = {k: v.name for k,v in task_output_vars.items()} task_fetches = {k: v.name for k,v in task_output_vars.items()}
old = len(bb_fetches)+len(task_fetches) # for debug # fetches = bb_fetches.copy() # 注意!框架在多卡时无法fetch变长维度的tensor,这里加入bb的out后会挂
fetches = bb_fetches.copy() # fetches.update(task_fetches)
fetches.update(task_fetches) fetches = task_fetches
assert len(fetches) == old # for debug
fetches['__task_id'] = net_inputs['__task_id'].name fetches['__task_id'] = net_inputs['__task_id'].name
# compute loss # compute loss
...@@ -505,7 +504,8 @@ class Controller(object): ...@@ -505,7 +504,8 @@ class Controller(object):
num_examples = main_reader.num_examples num_examples = main_reader.num_examples
for inst in instances: for inst in instances:
max_train_steps = int(main_conf['num_epochs']* inst.mix_ratio * num_examples) // main_conf['batch_size'] // dev_count max_train_steps = int(main_conf['num_epochs']* inst.mix_ratio * num_examples) // main_conf['batch_size'] // dev_count
print('{}: expected train steps {}.'.format(inst.name, max_train_steps)) if inst.is_target:
print('{}: expected train steps {}.'.format(inst.name, max_train_steps))
inst.steps_pur_epoch = inst.reader['train'].num_examples // main_conf['batch_size'] // dev_count inst.steps_pur_epoch = inst.reader['train'].num_examples // main_conf['batch_size'] // dev_count
inst.expected_train_steps = max_train_steps inst.expected_train_steps = max_train_steps
...@@ -622,12 +622,11 @@ class Controller(object): ...@@ -622,12 +622,11 @@ class Controller(object):
epoch = 0 epoch = 0
time_begin = time.time() time_begin = time.time()
backbone_buffer = [] backbone_buffer = []
task_buffer = [[]] * num_instances
while not train_finish(): while not train_finish():
rt_outputs = self.exe.run(train_program, fetch_list=fetch_list) rt_outputs = self.exe.run(train_program, fetch_list=fetch_list)
rt_outputs = {k:v for k,v in zip(fetch_names, rt_outputs)} rt_outputs = {k:v for k,v in zip(fetch_names, rt_outputs)}
rt_task_id = np.squeeze(rt_outputs['__task_id']).tolist() rt_task_id = np.squeeze(rt_outputs['__task_id']).tolist()
assert (not isinstance(rt_task_id, list)) or len(set(rt_task_id)) == 1 assert (not isinstance(rt_task_id, list)) or len(set(rt_task_id)) == 1, rt_task_id
rt_task_id = rt_task_id[0] if isinstance(rt_task_id, list) else rt_task_id rt_task_id = rt_task_id[0] if isinstance(rt_task_id, list) else rt_task_id
cur_task = instances[rt_task_id] cur_task = instances[rt_task_id]
...@@ -635,8 +634,7 @@ class Controller(object): ...@@ -635,8 +634,7 @@ class Controller(object):
backbone_buffer.append(backbone.postprocess(backbone_rt_outputs)) backbone_buffer.append(backbone.postprocess(backbone_rt_outputs))
task_rt_outputs = {k[len(cur_task.name+'/'):]: v for k,v in rt_outputs.items() if k.startswith(cur_task.name+'/')} task_rt_outputs = {k[len(cur_task.name+'/'):]: v for k,v in rt_outputs.items() if k.startswith(cur_task.name+'/')}
temp = instances[rt_task_id].task_layer['train'].postprocess(task_rt_outputs) instances[rt_task_id].task_layer['train'].postprocess(task_rt_outputs)
task_buffer[rt_task_id].append(temp)
global_step += 1 global_step += 1
# if cur_task.is_target: # if cur_task.is_target:
......
...@@ -30,6 +30,7 @@ class Reader(reader): ...@@ -30,6 +30,7 @@ class Reader(reader):
max_seq_len=config['max_seq_len'], max_seq_len=config['max_seq_len'],
do_lower_case=config.get('do_lower_case', False), do_lower_case=config.get('do_lower_case', False),
tokenizer='FullTokenizer', tokenizer='FullTokenizer',
for_cn=config.get('for_cn', False),
doc_stride=config['doc_stride'], doc_stride=config['doc_stride'],
max_query_length=config['max_query_len'], max_query_length=config['max_query_len'],
random_seed=config.get('seed', None)) random_seed=config.get('seed', None))
......
...@@ -42,7 +42,8 @@ class TaskParadigm(task_paradigm): ...@@ -42,7 +42,8 @@ class TaskParadigm(task_paradigm):
return {"logits": [[-1, 1], 'float32']} return {"logits": [[-1, 1], 'float32']}
def build(self, inputs): def build(self, inputs):
labels = inputs["reader"]["label_ids"] if self._is_training:
labels = inputs["reader"]["label_ids"]
cls_feats = inputs["backbone"]["sentence_pair_embedding"] cls_feats = inputs["backbone"]["sentence_pair_embedding"]
cls_feats = fluid.layers.dropout( cls_feats = fluid.layers.dropout(
...@@ -58,11 +59,11 @@ class TaskParadigm(task_paradigm): ...@@ -58,11 +59,11 @@ class TaskParadigm(task_paradigm):
bias_attr=fluid.ParamAttr( bias_attr=fluid.ParamAttr(
name="cls_out_b", name="cls_out_b",
initializer=fluid.initializer.Constant(0.))) initializer=fluid.initializer.Constant(0.)))
ce_loss, probs = fluid.layers.softmax_with_cross_entropy(
logits=logits, label=labels, return_softmax=True)
loss = fluid.layers.mean(x=ce_loss)
if self._is_training: if self._is_training:
ce_loss, probs = fluid.layers.softmax_with_cross_entropy(
logits=logits, label=labels, return_softmax=True)
loss = fluid.layers.mean(x=ce_loss)
return {'loss': loss} return {'loss': loss}
else: else:
return {'logits': logits} return {'logits': logits}
......
...@@ -65,9 +65,7 @@ class TaskParadigm(task_paradigm): ...@@ -65,9 +65,7 @@ class TaskParadigm(task_paradigm):
@property @property
def outputs_attr(self): def outputs_attr(self):
if self._is_training: if self._is_training:
return {'start_logits': [[-1, -1, 1], 'float32'], return {'loss': [[1], 'float32']}
'end_logits': [[-1, -1, 1], 'float32'],
'loss': [[1], 'float32']}
else: else:
return {'start_logits': [[-1, -1, 1], 'float32'], return {'start_logits': [[-1, -1, 1], 'float32'],
'end_logits': [[-1, -1, 1], 'float32'], 'end_logits': [[-1, -1, 1], 'float32'],
...@@ -106,16 +104,14 @@ class TaskParadigm(task_paradigm): ...@@ -106,16 +104,14 @@ class TaskParadigm(task_paradigm):
start_loss = _compute_single_loss(start_logits, start_positions) start_loss = _compute_single_loss(start_logits, start_positions)
end_loss = _compute_single_loss(end_logits, end_positions) end_loss = _compute_single_loss(end_logits, end_positions)
total_loss = (start_loss + end_loss) / 2.0 total_loss = (start_loss + end_loss) / 2.0
return {'start_logits': start_logits, return {'loss': total_loss}
'end_logits': end_logits,
'loss': total_loss}
else: else:
return {'start_logits': start_logits, return {'start_logits': start_logits,
'end_logits': end_logits, 'end_logits': end_logits,
'unique_ids': unique_id} 'unique_ids': unique_id}
def postprocess(self, rt_outputs): def postprocess(self, rt_outputs):
"""this func will be called after each step(batch) of training/evaluating/predicting process.""" """this func will be called after each step(batch) of training/evaluating/predicting process."""
if not self._is_training: if not self._is_training:
unique_ids = np.squeeze(rt_outputs['unique_ids'], -1) unique_ids = np.squeeze(rt_outputs['unique_ids'], -1)
......
...@@ -48,6 +48,20 @@ def _zero_batch(attrs): ...@@ -48,6 +48,20 @@ def _zero_batch(attrs):
return [np.zeros(shape=shape, dtype=dtype) for shape, dtype in pos_attrs] return [np.zeros(shape=shape, dtype=dtype) for shape, dtype in pos_attrs]
def _zero_batch_x(attrs, batch_size):
pos_attrs = []
for shape, dtype in attrs:
# pos_shape = [size if size and size > 0 else 5 for size in shape]
pos_shape = [size for size in shape]
if pos_shape[0] == -1:
pos_shape[0] = batch_size
if pos_shape[1] == -1:
pos_shape[1] = 512 # max seq len
pos_attrs.append([pos_shape, dtype])
return [np.zeros(shape=shape, dtype=dtype) for shape, dtype in pos_attrs]
def create_net_inputs(input_attrs, async=False, iterator_fn=None, dev_count=1, n_prefetch=1): def create_net_inputs(input_attrs, async=False, iterator_fn=None, dev_count=1, n_prefetch=1):
inputs = [] inputs = []
ret = {} ret = {}
...@@ -92,10 +106,11 @@ def create_iterator_fn(iterator, iterator_prefix, shape_and_dtypes, outname_to_p ...@@ -92,10 +106,11 @@ def create_iterator_fn(iterator, iterator_prefix, shape_and_dtypes, outname_to_p
return iterator return iterator
def create_joint_iterator_fn(iterators, iterator_prefixes, joint_shape_and_dtypes, mrs, outname_to_pos, dev_count=1, keep_one_task=True, verbose=0): def create_joint_iterator_fn(iterators, iterator_prefixes, joint_shape_and_dtypes, mrs, outname_to_pos, dev_count=1, keep_one_task=True, verbose=0, batch_size=None):
""" """
joint_shape_and_dtypes: 本质上是根据bb和parad的attr设定的,并且由reader中的attr自动填充-1(可变)维度得到,因此通过与iterator的校验可以完成runtime的batch正确性检查 joint_shape_and_dtypes: 本质上是根据bb和parad的attr设定的,并且由reader中的attr自动填充-1(可变)维度得到,因此通过与iterator的校验可以完成runtime的batch正确性检查
""" """
task_ids = range(len(iterators)) task_ids = range(len(iterators))
weights = [mr / float(sum(mrs)) for mr in mrs] weights = [mr / float(sum(mrs)) for mr in mrs]
if not keep_one_task: if not keep_one_task:
...@@ -129,7 +144,6 @@ def create_joint_iterator_fn(iterators, iterator_prefixes, joint_shape_and_dtype ...@@ -129,7 +144,6 @@ def create_joint_iterator_fn(iterators, iterator_prefixes, joint_shape_and_dtype
v = verbose v = verbose
while True: while True:
id = np.random.choice(task_ids, p=weights) id = np.random.choice(task_ids, p=weights)
# results = _zero_batch(joint_shape_and_dtypes)
results = fake_batch results = fake_batch
if v > 0: if v > 0:
print('----- debug joint iterator -----') print('----- debug joint iterator -----')
...@@ -138,6 +152,8 @@ def create_joint_iterator_fn(iterators, iterator_prefixes, joint_shape_and_dtype ...@@ -138,6 +152,8 @@ def create_joint_iterator_fn(iterators, iterator_prefixes, joint_shape_and_dtype
results[0] = task_id_tensor results[0] = task_id_tensor
for i in range(dev_count): for i in range(dev_count):
# results = _zero_batch(joint_shape_and_dtypes, batch_size=batch_size)
# results[0] = task_id_tensor
if id in outbuf: if id in outbuf:
outputs = outbuf[id] outputs = outbuf[id]
del outbuf[id] del outbuf[id]
......
export CUDA_VISIBLE_DEVICES=0 export CUDA_VISIBLE_DEVICES=0,1,2,3
export FLAGS_fraction_of_gpu_memory_to_use=0.1 export FLAGS_fraction_of_gpu_memory_to_use=0.1
export FLAGS_eager_delete_tensor_gb=0 export FLAGS_eager_delete_tensor_gb=0
python demo.py python demo1.py
export CUDA_VISIBLE_DEVICES=0,1,2,3,4,5,6,7
export FLAGS_fraction_of_gpu_memory_to_use=0.1
export FLAGS_eager_delete_tensor_gb=0
python demo2.py
train_file: "data/mlm4mrqa"
mix_ratio: 0.4
batch_size: 4
in_tokens: False
generate_neg_sample: False
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册