提交 df98c24f 编写于 作者: X xixiaoyao

fix pred

上级 8a99149a
此差异已折叠。
......@@ -6,7 +6,7 @@ if __name__ == '__main__':
max_seqlen = 512
batch_size = 4
num_epochs = 2
num_epochs = 20
lr = 1e-3
vocab_path = './pretrain/ernie/vocab.txt'
......@@ -67,7 +67,8 @@ if __name__ == '__main__':
cls_pred_head = palm.head.Classify(4, 1024, phase='pred')
trainer.build_predict_head(cls_pred_head, pred_ernie)
trainer.train(iterator_fn, print_steps=1, save_steps=5, save_path='outputs', save_type='ckpt,predict')
# trainer.train(iterator_fn, print_steps=1, save_steps=5, save_path='outputs', save_type='ckpt,predict')
trainer.train(iterator_fn, print_steps=1)
# trainer.save()
......
export CUDA_VISIBLE_DEVICES=3
export CUDA_VISIBLE_DEVICES=4
python run.py
......@@ -114,6 +114,8 @@ class ERNIE(BaseBackbone):
input_mask = inputs['input_mask']
task_ids = inputs['task_ids']
fluid.layers.Print(src_ids)
# padding id in vocabulary must be set to 0
emb_out = fluid.embedding(
input=src_ids,
......
......@@ -5,5 +5,5 @@ import multiprocessing
gpu_dev_count = int(fluid.core.get_cuda_device_count())
cpu_dev_count = int(os.environ.get('CPU_NUM', multiprocessing.cpu_count()))
from reader import yield_pieces, data_feeder
from reader import yield_pieces, data_feeder, decode_fake
......@@ -11,8 +11,8 @@ def yield_pieces(data, distribute_strategy, batch_size):
distribute_strategy: support s=split, c=copy, u=unstack,
"""
assert batch_size % dev_count == 0, "batch_size need to be integer times larger than dev_count."
print('data in yield pieces')
print(len(data))
# print('data in yield pieces')
# print(len(data))
assert type(data) == type(distribute_strategy), [type(data), type(distribute_strategy)]
assert len(data) == len(distribute_strategy), [len(data), len(distribute_strategy)]
......@@ -53,12 +53,11 @@ def yield_pieces(data, distribute_strategy, batch_size):
if type(data) == dict:
yield dict(zip(*[keys, temp]))
else:
print('yielded pieces')
print(len(temp))
# print('yielded pieces')
# print(len(temp))
yield temp
def data_feeder(reader, postprocess_fn=None, prefetch_steps=2):
def data_feeder(reader, postprocess_fn=None, prefetch_steps=2, phase='train'):
if postprocess_fn is None:
def postprocess_fn(batch):
return batch
......@@ -91,6 +90,7 @@ def data_feeder(reader, postprocess_fn=None, prefetch_steps=2):
queue.task_done()
if ret is not None:
batches, num_pad = ret
id = batches[0]['__task_id'][0][0] if phase == 'train' else -1
batch_buf = []
flag_buf = []
for idx, batch in enumerate(batches):
......@@ -98,12 +98,24 @@ def data_feeder(reader, postprocess_fn=None, prefetch_steps=2):
flag = idx-len(batches) < -num_pad
# if num_pad > 0:
# num_pad -= 1
batch = postprocess_fn(batch)
batch = postprocess_fn(batch, id)
batch_buf.append(batch)
flag_buf.append(flag)
yield batch_buf, flag_buf
else:
yield batch_buf, flag_buf, id
else:
break
queue.join()
def decode_fake(nums, mask, bs):
n_t = 0
for flag in mask:
if not flag:
break
n_t = n_t + 1
n_f = len(mask) - n_t
p1 = nums - (n_t-1) * bs
each_f = p1 / (n_f+1)
return each_f * n_f
......@@ -37,6 +37,8 @@ class Adam(BaseOptimizer):
if self._lr_schedualer is not None:
self._lr = self._lr_schedualer.build(self._lr)
fluid.layers.Print(self._lr)
optimizer = fluid.optimizer.Adam(learning_rate=self._lr)
if grad_clip is not None:
......@@ -46,6 +48,7 @@ class Adam(BaseOptimizer):
fluid.clip.set_gradient_clip(
clip=fluid.clip.GradientClipByGlobalNorm(clip_norm=clip_norm_thres))
print(self._loss)
_, param_grads = optimizer.minimize(self._loss)
return param_grads
......
......@@ -8,8 +8,9 @@ class BaseOptimizer():
def build(self, grad_clip=None):
pass
def _set_prog(self, prog):
def _set_prog(self, prog, init_prog):
self._prog = prog
self._init_prog = prog
if self._lr_schedualer is not None:
self._lr_schedualer._set_prog(prog)
......
......@@ -21,7 +21,7 @@ import time
import numpy as np
import paddlepalm.utils.basic_helper as helper
from paddlepalm.utils import reader_helper, saver
from paddlepalm.distribute import gpu_dev_count, data_feeder
from paddlepalm.distribute import gpu_dev_count, data_feeder, decode_fake
# from paddlepalm.default_settings import *
DEBUG=False
......@@ -217,12 +217,16 @@ class Trainer(object):
with fluid.program_guard(train_prog, train_init_prog):
loss_var = fluid.layers.reduce_sum(task_output_vars[self.name+'.loss'])
self._distribute_train_prog = fluid.CompiledProgram(self._train_prog).with_data_parallel(loss_name=loss_var.name)
for _id, block in enumerate(self._train_prog.blocks):
for var in block.vars:
print("[debug] : %d, %s" % (_id, var))
return loss_var
def build_backward(self, optimizer, weight_decay=None, use_ema=False, ema_decay=0.9999):
# build optimizer
optimizer._set_prog(self._train_prog)
assert self._train_init_prog is not None, "train graph not foung! You should build_forward first."
optimizer._set_prog(self._train_prog, self._train_init_prog)
with fluid.program_guard(self._train_prog, self._train_init_prog):
param_grads = optimizer.build()
......@@ -258,6 +262,13 @@ class Trainer(object):
ema = fluid.optimizer.ExponentialMovingAverage(ema_decay)
ema.update()
# for bid, block in enumerate(self._train_prog.blocks):
# print('block id: '+str(bid))
# for var in block.vars:
# print("%d : %s" % (bid, var))
# print(self._train_prog)
def load_data(self, input_file, file_format, batch_size, num_epochs=None, shuffle_train=True):
# load data
print("preparing data...", end='')
......@@ -287,6 +298,7 @@ class Trainer(object):
def random_init_params(self):
assert self._train_init_prog is not None, "train graph not foung! You should build_forward first before you random init parameters."
self._distribute_train_prog = fluid.CompiledProgram(self._train_prog).with_data_parallel(loss_name=loss_var.name)
on_gpu = gpu_dev_count > 0
self._exe = helper.build_executor(on_gpu)
print('random init params...')
......@@ -294,7 +306,7 @@ class Trainer(object):
def load_ckpt(self, model_path, phase='train'):
# load pretrain model (or ckpt)
assert self._exe is not None, "You need to random_init_params before load pretrain models."
assert self._exe is not None, "You need to random_init_params before load checkpoints."
if phase == 'train':
assert self._train_init_prog is not None, "train graph not found! You should build_forward first before load checkpoint."
......@@ -437,12 +449,12 @@ class Trainer(object):
def predict_one_batch(self, batch):
if gpu_dev_count > 1:
feed, mask = batch
rt_outputs = self.exe.run(self._distribute_train_prog, feed=feed, fetch_list=self._fetch_list)
rt_outputs = self.exe.run(self._distribute_pred_prog, feed=feed, fetch_list=self._fetch_list)
while mask.pop() == False:
rt_outputs.pop()
else:
feed = self._feed_batch_process_fn(batch)
rt_outputs = self._exe.run(self._distribute_train_prog, feed=feed, fetch_list=self._fetch_list)
rt_outputs = self._exe.run(self._distribute_pred_prog, feed=feed, fetch_list=self._fetch_list)
rt_outputs = {k:v for k,v in zip(self._fetch_names, rt_outputs)}
......
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册