提交 df98c24f 编写于 作者: X xixiaoyao

fix pred

上级 8a99149a
此差异已折叠。
...@@ -6,7 +6,7 @@ if __name__ == '__main__': ...@@ -6,7 +6,7 @@ if __name__ == '__main__':
max_seqlen = 512 max_seqlen = 512
batch_size = 4 batch_size = 4
num_epochs = 2 num_epochs = 20
lr = 1e-3 lr = 1e-3
vocab_path = './pretrain/ernie/vocab.txt' vocab_path = './pretrain/ernie/vocab.txt'
...@@ -67,7 +67,8 @@ if __name__ == '__main__': ...@@ -67,7 +67,8 @@ if __name__ == '__main__':
cls_pred_head = palm.head.Classify(4, 1024, phase='pred') cls_pred_head = palm.head.Classify(4, 1024, phase='pred')
trainer.build_predict_head(cls_pred_head, pred_ernie) trainer.build_predict_head(cls_pred_head, pred_ernie)
trainer.train(iterator_fn, print_steps=1, save_steps=5, save_path='outputs', save_type='ckpt,predict') # trainer.train(iterator_fn, print_steps=1, save_steps=5, save_path='outputs', save_type='ckpt,predict')
trainer.train(iterator_fn, print_steps=1)
# trainer.save() # trainer.save()
......
export CUDA_VISIBLE_DEVICES=3 export CUDA_VISIBLE_DEVICES=4
python run.py python run.py
...@@ -114,6 +114,8 @@ class ERNIE(BaseBackbone): ...@@ -114,6 +114,8 @@ class ERNIE(BaseBackbone):
input_mask = inputs['input_mask'] input_mask = inputs['input_mask']
task_ids = inputs['task_ids'] task_ids = inputs['task_ids']
fluid.layers.Print(src_ids)
# padding id in vocabulary must be set to 0 # padding id in vocabulary must be set to 0
emb_out = fluid.embedding( emb_out = fluid.embedding(
input=src_ids, input=src_ids,
......
...@@ -5,5 +5,5 @@ import multiprocessing ...@@ -5,5 +5,5 @@ import multiprocessing
gpu_dev_count = int(fluid.core.get_cuda_device_count()) gpu_dev_count = int(fluid.core.get_cuda_device_count())
cpu_dev_count = int(os.environ.get('CPU_NUM', multiprocessing.cpu_count())) cpu_dev_count = int(os.environ.get('CPU_NUM', multiprocessing.cpu_count()))
from reader import yield_pieces, data_feeder from reader import yield_pieces, data_feeder, decode_fake
...@@ -11,8 +11,8 @@ def yield_pieces(data, distribute_strategy, batch_size): ...@@ -11,8 +11,8 @@ def yield_pieces(data, distribute_strategy, batch_size):
distribute_strategy: support s=split, c=copy, u=unstack, distribute_strategy: support s=split, c=copy, u=unstack,
""" """
assert batch_size % dev_count == 0, "batch_size need to be integer times larger than dev_count." assert batch_size % dev_count == 0, "batch_size need to be integer times larger than dev_count."
print('data in yield pieces') # print('data in yield pieces')
print(len(data)) # print(len(data))
assert type(data) == type(distribute_strategy), [type(data), type(distribute_strategy)] assert type(data) == type(distribute_strategy), [type(data), type(distribute_strategy)]
assert len(data) == len(distribute_strategy), [len(data), len(distribute_strategy)] assert len(data) == len(distribute_strategy), [len(data), len(distribute_strategy)]
...@@ -53,12 +53,11 @@ def yield_pieces(data, distribute_strategy, batch_size): ...@@ -53,12 +53,11 @@ def yield_pieces(data, distribute_strategy, batch_size):
if type(data) == dict: if type(data) == dict:
yield dict(zip(*[keys, temp])) yield dict(zip(*[keys, temp]))
else: else:
print('yielded pieces') # print('yielded pieces')
print(len(temp)) # print(len(temp))
yield temp yield temp
def data_feeder(reader, postprocess_fn=None, prefetch_steps=2): def data_feeder(reader, postprocess_fn=None, prefetch_steps=2, phase='train'):
if postprocess_fn is None: if postprocess_fn is None:
def postprocess_fn(batch): def postprocess_fn(batch):
return batch return batch
...@@ -91,6 +90,7 @@ def data_feeder(reader, postprocess_fn=None, prefetch_steps=2): ...@@ -91,6 +90,7 @@ def data_feeder(reader, postprocess_fn=None, prefetch_steps=2):
queue.task_done() queue.task_done()
if ret is not None: if ret is not None:
batches, num_pad = ret batches, num_pad = ret
id = batches[0]['__task_id'][0][0] if phase == 'train' else -1
batch_buf = [] batch_buf = []
flag_buf = [] flag_buf = []
for idx, batch in enumerate(batches): for idx, batch in enumerate(batches):
...@@ -98,12 +98,24 @@ def data_feeder(reader, postprocess_fn=None, prefetch_steps=2): ...@@ -98,12 +98,24 @@ def data_feeder(reader, postprocess_fn=None, prefetch_steps=2):
flag = idx-len(batches) < -num_pad flag = idx-len(batches) < -num_pad
# if num_pad > 0: # if num_pad > 0:
# num_pad -= 1 # num_pad -= 1
batch = postprocess_fn(batch) batch = postprocess_fn(batch, id)
batch_buf.append(batch) batch_buf.append(batch)
flag_buf.append(flag) flag_buf.append(flag)
yield batch_buf, flag_buf yield batch_buf, flag_buf, id
else: else:
break break
queue.join() queue.join()
def decode_fake(nums, mask, bs):
n_t = 0
for flag in mask:
if not flag:
break
n_t = n_t + 1
n_f = len(mask) - n_t
p1 = nums - (n_t-1) * bs
each_f = p1 / (n_f+1)
return each_f * n_f
...@@ -37,6 +37,8 @@ class Adam(BaseOptimizer): ...@@ -37,6 +37,8 @@ class Adam(BaseOptimizer):
if self._lr_schedualer is not None: if self._lr_schedualer is not None:
self._lr = self._lr_schedualer.build(self._lr) self._lr = self._lr_schedualer.build(self._lr)
fluid.layers.Print(self._lr)
optimizer = fluid.optimizer.Adam(learning_rate=self._lr) optimizer = fluid.optimizer.Adam(learning_rate=self._lr)
if grad_clip is not None: if grad_clip is not None:
...@@ -46,6 +48,7 @@ class Adam(BaseOptimizer): ...@@ -46,6 +48,7 @@ class Adam(BaseOptimizer):
fluid.clip.set_gradient_clip( fluid.clip.set_gradient_clip(
clip=fluid.clip.GradientClipByGlobalNorm(clip_norm=clip_norm_thres)) clip=fluid.clip.GradientClipByGlobalNorm(clip_norm=clip_norm_thres))
print(self._loss)
_, param_grads = optimizer.minimize(self._loss) _, param_grads = optimizer.minimize(self._loss)
return param_grads return param_grads
......
...@@ -8,8 +8,9 @@ class BaseOptimizer(): ...@@ -8,8 +8,9 @@ class BaseOptimizer():
def build(self, grad_clip=None): def build(self, grad_clip=None):
pass pass
def _set_prog(self, prog): def _set_prog(self, prog, init_prog):
self._prog = prog self._prog = prog
self._init_prog = prog
if self._lr_schedualer is not None: if self._lr_schedualer is not None:
self._lr_schedualer._set_prog(prog) self._lr_schedualer._set_prog(prog)
......
...@@ -21,7 +21,7 @@ import time ...@@ -21,7 +21,7 @@ import time
import numpy as np import numpy as np
import paddlepalm.utils.basic_helper as helper import paddlepalm.utils.basic_helper as helper
from paddlepalm.utils import reader_helper, saver from paddlepalm.utils import reader_helper, saver
from paddlepalm.distribute import gpu_dev_count, data_feeder from paddlepalm.distribute import gpu_dev_count, data_feeder, decode_fake
# from paddlepalm.default_settings import * # from paddlepalm.default_settings import *
DEBUG=False DEBUG=False
...@@ -217,12 +217,16 @@ class Trainer(object): ...@@ -217,12 +217,16 @@ class Trainer(object):
with fluid.program_guard(train_prog, train_init_prog): with fluid.program_guard(train_prog, train_init_prog):
loss_var = fluid.layers.reduce_sum(task_output_vars[self.name+'.loss']) loss_var = fluid.layers.reduce_sum(task_output_vars[self.name+'.loss'])
self._distribute_train_prog = fluid.CompiledProgram(self._train_prog).with_data_parallel(loss_name=loss_var.name) for _id, block in enumerate(self._train_prog.blocks):
for var in block.vars:
print("[debug] : %d, %s" % (_id, var))
return loss_var return loss_var
def build_backward(self, optimizer, weight_decay=None, use_ema=False, ema_decay=0.9999): def build_backward(self, optimizer, weight_decay=None, use_ema=False, ema_decay=0.9999):
# build optimizer # build optimizer
optimizer._set_prog(self._train_prog) assert self._train_init_prog is not None, "train graph not foung! You should build_forward first."
optimizer._set_prog(self._train_prog, self._train_init_prog)
with fluid.program_guard(self._train_prog, self._train_init_prog): with fluid.program_guard(self._train_prog, self._train_init_prog):
param_grads = optimizer.build() param_grads = optimizer.build()
...@@ -258,6 +262,13 @@ class Trainer(object): ...@@ -258,6 +262,13 @@ class Trainer(object):
ema = fluid.optimizer.ExponentialMovingAverage(ema_decay) ema = fluid.optimizer.ExponentialMovingAverage(ema_decay)
ema.update() ema.update()
# for bid, block in enumerate(self._train_prog.blocks):
# print('block id: '+str(bid))
# for var in block.vars:
# print("%d : %s" % (bid, var))
# print(self._train_prog)
def load_data(self, input_file, file_format, batch_size, num_epochs=None, shuffle_train=True): def load_data(self, input_file, file_format, batch_size, num_epochs=None, shuffle_train=True):
# load data # load data
print("preparing data...", end='') print("preparing data...", end='')
...@@ -287,6 +298,7 @@ class Trainer(object): ...@@ -287,6 +298,7 @@ class Trainer(object):
def random_init_params(self): def random_init_params(self):
assert self._train_init_prog is not None, "train graph not foung! You should build_forward first before you random init parameters." assert self._train_init_prog is not None, "train graph not foung! You should build_forward first before you random init parameters."
self._distribute_train_prog = fluid.CompiledProgram(self._train_prog).with_data_parallel(loss_name=loss_var.name)
on_gpu = gpu_dev_count > 0 on_gpu = gpu_dev_count > 0
self._exe = helper.build_executor(on_gpu) self._exe = helper.build_executor(on_gpu)
print('random init params...') print('random init params...')
...@@ -294,7 +306,7 @@ class Trainer(object): ...@@ -294,7 +306,7 @@ class Trainer(object):
def load_ckpt(self, model_path, phase='train'): def load_ckpt(self, model_path, phase='train'):
# load pretrain model (or ckpt) # load pretrain model (or ckpt)
assert self._exe is not None, "You need to random_init_params before load pretrain models." assert self._exe is not None, "You need to random_init_params before load checkpoints."
if phase == 'train': if phase == 'train':
assert self._train_init_prog is not None, "train graph not found! You should build_forward first before load checkpoint." assert self._train_init_prog is not None, "train graph not found! You should build_forward first before load checkpoint."
...@@ -437,12 +449,12 @@ class Trainer(object): ...@@ -437,12 +449,12 @@ class Trainer(object):
def predict_one_batch(self, batch): def predict_one_batch(self, batch):
if gpu_dev_count > 1: if gpu_dev_count > 1:
feed, mask = batch feed, mask = batch
rt_outputs = self.exe.run(self._distribute_train_prog, feed=feed, fetch_list=self._fetch_list) rt_outputs = self.exe.run(self._distribute_pred_prog, feed=feed, fetch_list=self._fetch_list)
while mask.pop() == False: while mask.pop() == False:
rt_outputs.pop() rt_outputs.pop()
else: else:
feed = self._feed_batch_process_fn(batch) feed = self._feed_batch_process_fn(batch)
rt_outputs = self._exe.run(self._distribute_train_prog, feed=feed, fetch_list=self._fetch_list) rt_outputs = self._exe.run(self._distribute_pred_prog, feed=feed, fetch_list=self._fetch_list)
rt_outputs = {k:v for k,v in zip(self._fetch_names, rt_outputs)} rt_outputs = {k:v for k,v in zip(self._fetch_names, rt_outputs)}
......
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册