提交 af31077d 编写于 作者: X xixiaoyao

release 0.3

上级 0a317161
import downloader import downloader
from mtl_controller import Controller # from mtl_controller import Controller
import controller
import optimizer
import lr_sched
import backbone
import reader
import head
from trainer import Trainer
del interface del interface
del task_instance del task_instance
del default_settings del default_settings
del utils del utils
del mtl_controller
\ No newline at end of file
from ernie import ERNIE
from bert import BERT
...@@ -14,76 +14,8 @@ ...@@ -14,76 +14,8 @@
# limitations under the License. # limitations under the License.
"""v1.1""" """v1.1"""
class reader(object):
"""interface of data manager."""
def __init__(self, config): class BaseBackbone(object):
assert isinstance(config, dict)
# @property
# def inputs_attr(self):
# """描述reader输入对象的属性,包含各个对象的名字、shape以及数据类型。当某个对象为标量数据类型(如str, int, float等)时,shape设置为空列表[],当某个对象的某个维度长度可变时,shape中的相应维度设置为-1.
# Return:
# dict类型。对各个输入对象的属性描述。例如,
# 对于文本分类任务,可能需要包含输入文本和所属标签的id
# {"text": ([], 'str'),
# "label": ([], 'int')}
# 对于标注任务,可能需要输入词序列和对应的标签
# {"tokens", ([-1], 'str'),
# "tags", ([-1], 'str')}
# 对于机器阅读理解任务,可能需要包含上下文、问题、回答、答案区域的起止位置等
# {"paragraph", ([], 'str'),
# "question", ([], 'str'),
# "start_position", ([], 'int')
# """
# raise NotImplementedError()
@property
def outputs_attr(self):
"""描述reader输出对象(被yield出的对象)的属性,包含各个对象的名字、shape以及数据类型。当某个对象为标量数据类型(如str, int, float等)时,shape设置为空列表[],当某个对象的某个维度长度可变时,shape中的相应维度设置为-1。
注意:当使用mini-batch梯度下降学习策略时,,应为常规的输入对象设置batch_size维度(一般为-1)
Return:
dict类型。对各个输入对象的属性描述。例如,
对于文本分类和匹配任务,yield的输出内容可能包含如下的对象(下游backbone和task可按需访问其中的对象)
{"token_ids": ([-1, max_len], 'int64'),
"input_ids": ([-1, max_len], 'int64'),
"segment_ids": ([-1, max_len], 'int64'),
"input_mask": ([-1, max_len], 'float32'),
"label": ([-1], 'int')}
"""
raise NotImplementedError()
# def parse_line(self):
# """框架内部使用字典描述每个样本,字典的key为inputs_attr,value为每个input对应的符合attr描述的值。
# 该函数负责将文本行解析成符合inputs_attr描述的字典类型的样本。默认的parse_line方法会读取json格式的数据集文件,数据集的每一行为json格式描述的样本。
# 用户可通过对该方法的继承改写来适配不同格式的数据集,例如csv格式甚至tfrecord文件。
# """
# raise NotImplementedError()
#
# def tokenize(self, line):
# """框架中内置了word piece tokenizer等分词器,用户可通过修改tokenizer超参数来制定使用的分词器,若内置的分词器均无法满足需求,用户可通过对该方法的继承改写来自定义分词器。
# Args:
# - line: a unicode string.
# Return:
# a list of tokens
# """
# raise NotImplementedError()
def iterator(self):
"""数据集遍历接口,注意,当数据集遍历到尾部时该接口应自动完成指针重置,即重新从数据集头部开始新的遍历。
Yield:
(dict) elements that meet the requirements in output_templete
"""
raise NotImplementedError()
@property
def num_examples(self):
"""数据集中的样本数量,即每个epoch中iterator所生成的样本数。注意,使用滑动窗口等可能导致数据集样本数发生变化的策略时,该接口应返回runtime阶段的实际样本数。"""
raise NotImplementedError()
class backbone(object):
"""interface of backbone model.""" """interface of backbone model."""
def __init__(self, config, phase): def __init__(self, config, phase):
......
...@@ -23,12 +23,44 @@ from paddle import fluid ...@@ -23,12 +23,44 @@ from paddle import fluid
from paddle.fluid import layers from paddle.fluid import layers
from paddlepalm.backbone.utils.transformer import pre_process_layer, encoder from paddlepalm.backbone.utils.transformer import pre_process_layer, encoder
from paddlepalm.interface import backbone from paddlepalm.backbone.base_backbone import BaseBackbone
class Model(backbone): class BERT(BaseBackbone):
def __init__(self, config, phase):
def __init__(hidden_size, num_hidden_layers, num_attention_heads, vocab_size, \
max_position_embeddings, type_vocab_size, hidden_act, hidden_dropout_prob, \
attention_probs_dropout_prob, initializer_range, phase='train'):
config = {}
config['hidden_size'] = hidden_size
config['num_hidden_layers'] = num_hidden_layers
config['num_attention_heads'] = num_attention_heads
config['vocab_size'] = vocab_size
config['max_position_embeddings'] = max_position_embeddings
config['type_vocab_size'] = sent_type_vocab_size
config['hidden_act'] = hidden_act
config['hidden_dropout_prob'] = hidden_dropout_prob
config['attention_probs_dropout_prob'] = attention_probs_dropout_prob
config['initializer_range'] = initializer_range
self.from_config(config, phase=phase)
@classmethod
def from_config(self, config, phase='train'):
assert 'hidden_size' in config, "{} is required to initialize ERNIE".format('')
assert 'num_hidden_layers' in config, "{} is required to initialize ERNIE".format('num_hidden_layers')
assert 'num_attention_heads' in config, "{} is required to initialize ERNIE".format('num_attention_heads')
assert 'vocab_size' in config, "{} is required to initialize ERNIE".format('vocab_size')
assert 'max_position_embeddings' in config, "{} is required to initialize ERNIE".format('max_position_embeddings')
assert 'sent_type_vocab_size' in config or 'type_vocab_size' in config, \
"{} is required to initialize ERNIE".format('type_vocab_size')
assert 'hidden_act' in config, "{} is required to initialize ERNIE".format('hidden_act')
assert 'hidden_dropout_prob' in config, "{} is required to initialize ERNIE".format('hidden_dropout_prob')
assert 'attention_probs_dropout_prob' in config, \
"{} is required to initialize ERNIE".format('attention_probs_dropout_prob')
assert 'initializer_range' in config, "{} is required to initialize ERNIE".format('initializer_range')
# self._is_training = phase == 'train' # backbone一般不用关心运行阶段,因为outputs在任何阶段基本不会变 # self._is_training = phase == 'train' # backbone一般不用关心运行阶段,因为outputs在任何阶段基本不会变
self._emb_size = config["hidden_size"] self._emb_size = config["hidden_size"]
...@@ -153,3 +185,9 @@ class Model(backbone): ...@@ -153,3 +185,9 @@ class Model(backbone):
pass pass
class Model(BERT):
"""BERT wrapper for ConfigController"""
def __init__(self, config, phase):
BERT.from_config(config, phase=phase)
...@@ -24,32 +24,30 @@ from paddle import fluid ...@@ -24,32 +24,30 @@ from paddle import fluid
from paddle.fluid import layers from paddle.fluid import layers
from paddlepalm.backbone.utils.transformer import pre_process_layer, encoder from paddlepalm.backbone.utils.transformer import pre_process_layer, encoder
from paddlepalm.interface import backbone from paddlepalm.backbone.base_backbone import BaseBackbone
class Model(backbone):
def __init__(self, class ERNIE(BaseBackbone):
config,
phase): def __init__(self, hidden_size, num_hidden_layers, num_attention_heads, vocab_size, \
max_position_embeddings, sent_type_vocab_size, task_type_vocab_size, \
hidden_act, hidden_dropout_prob, attention_probs_dropout_prob, initializer_range, phase='train'):
# self._is_training = phase == 'train' # backbone一般不用关心运行阶段,因为outputs在任何阶段基本不会变 # self._is_training = phase == 'train' # backbone一般不用关心运行阶段,因为outputs在任何阶段基本不会变
self._emb_size = config['hidden_size'] self._emb_size = hidden_size
self._n_layer = config['num_hidden_layers'] self._n_layer = num_hidden_layers
self._n_head = config['num_attention_heads'] self._n_head = num_attention_heads
self._voc_size = config['vocab_size'] self._voc_size = vocab_size
self._max_position_seq_len = config['max_position_embeddings'] self._max_position_seq_len = max_position_embeddings
if config['sent_type_vocab_size']: self._sent_types = sent_type_vocab_size
self._sent_types = config['sent_type_vocab_size']
else:
self._sent_types = config['type_vocab_size']
self._task_types = config['task_type_vocab_size'] self._task_types = task_type_vocab_size
self._hidden_act = config['hidden_act'] self._hidden_act = hidden_act
self._prepostprocess_dropout = config['hidden_dropout_prob'] self._prepostprocess_dropout = hidden_dropout_prob
self._attention_dropout = config['attention_probs_dropout_prob'] self._attention_dropout = attention_probs_dropout_prob
self._word_emb_name = "word_embedding" self._word_emb_name = "word_embedding"
self._pos_emb_name = "pos_embedding" self._pos_emb_name = "pos_embedding"
...@@ -58,7 +56,40 @@ class Model(backbone): ...@@ -58,7 +56,40 @@ class Model(backbone):
self._emb_dtype = "float32" self._emb_dtype = "float32"
self._param_initializer = fluid.initializer.TruncatedNormal( self._param_initializer = fluid.initializer.TruncatedNormal(
scale=config['initializer_range']) scale=initializer_range)
@classmethod
def from_config(cls, config, phase='train'):
assert 'hidden_size' in config, "{} is required to initialize ERNIE".format('hidden_size')
assert 'num_hidden_layers' in config, "{} is required to initialize ERNIE".format('num_hidden_layers')
assert 'num_attention_heads' in config, "{} is required to initialize ERNIE".format('num_attention_heads')
assert 'vocab_size' in config, "{} is required to initialize ERNIE".format('vocab_size')
assert 'max_position_embeddings' in config, "{} is required to initialize ERNIE".format('max_position_embeddings')
assert 'sent_type_vocab_size' in config or 'type_vocab_size' in config, "{} is required to initialize ERNIE".format('sent_type_vocab_size')
assert 'task_type_vocab_size' in config, "{} is required to initialize ERNIE".format('task_type_vocab_size')
assert 'hidden_act' in config, "{} is required to initialize ERNIE".format('hidden_act')
assert 'hidden_dropout_prob' in config, "{} is required to initialize ERNIE".format('hidden_dropout_prob')
assert 'attention_probs_dropout_prob' in config, "{} is required to initialize ERNIE".format('attention_probs_dropout_prob')
assert 'initializer_range' in config, "{} is required to initialize ERNIE".format('initializer_range')
hidden_size = config['hidden_size']
num_hidden_layers = config['num_hidden_layers']
num_attention_heads = config['num_attention_heads']
vocab_size = config['vocab_size']
max_position_embeddings = config['max_position_embeddings']
if 'sent_type_vocab_size' in config:
sent_type_vocab_size = config['sent_type_vocab_size']
else:
sent_type_vocab_size = config['type_vocab_size']
task_type_vocab_size = config['task_type_vocab_size']
hidden_act = config['hidden_act']
hidden_dropout_prob = config['hidden_dropout_prob']
attention_probs_dropout_prob = config['attention_probs_dropout_prob']
initializer_range = config['initializer_range']
return cls(hidden_size, num_hidden_layers, num_attention_heads, vocab_size, \
max_position_embeddings, sent_type_vocab_size, task_type_vocab_size, \
hidden_act, hidden_dropout_prob, attention_probs_dropout_prob, initializer_range, phase=phase)
@property @property
def inputs_attr(self): def inputs_attr(self):
...@@ -173,3 +204,12 @@ class Model(backbone): ...@@ -173,3 +204,12 @@ class Model(backbone):
def postprocess(self, rt_outputs): def postprocess(self, rt_outputs):
pass pass
class Model(ERNIE):
def __init__(self, config, phase):
ERNIE.from_config(config, phase=phase)
from conf_controller import ConfigController
from controller import Controller
...@@ -35,6 +35,9 @@ from paddlepalm.utils.reader_helper import create_net_inputs, create_iterator_fn ...@@ -35,6 +35,9 @@ from paddlepalm.utils.reader_helper import create_net_inputs, create_iterator_fn
from paddlepalm.default_settings import * from paddlepalm.default_settings import *
from paddlepalm.task_instance import TaskInstance, check_instances from paddlepalm.task_instance import TaskInstance, check_instances
import Queue
from threading import Thread
DEBUG=False DEBUG=False
VERBOSE=0 VERBOSE=0
...@@ -182,7 +185,7 @@ def _fit_attr(conf, fit_attr, strict=False): ...@@ -182,7 +185,7 @@ def _fit_attr(conf, fit_attr, strict=False):
return conf return conf
class ConfController(object): class ConfigController(object):
def __init__(self, config, task_dir='.', for_train=True): def __init__(self, config, task_dir='.', for_train=True):
""" """
...@@ -234,7 +237,7 @@ class ConfController(object): ...@@ -234,7 +237,7 @@ class ConfController(object):
bb_conf = _merge_conf(mtl_conf, bb_conf) bb_conf = _merge_conf(mtl_conf, bb_conf)
else: else:
bb_conf = mtl_conf bb_conf = mtl_conf
print_dict(bb_conf, title='backbone configuration'.format(instname)) print_dict(bb_conf, title = 'backbone configuration'.format(instname))
bb_name = mtl_conf['backbone'] bb_name = mtl_conf['backbone']
bb_mod = importlib.import_module(BACKBONE_DIR + '.' + bb_name) bb_mod = importlib.import_module(BACKBONE_DIR + '.' + bb_name)
...@@ -338,6 +341,7 @@ class ConfController(object): ...@@ -338,6 +341,7 @@ class ConfController(object):
main_conf = main_inst.config main_conf = main_inst.config
if not os.path.exists(main_conf['save_path']): if not os.path.exists(main_conf['save_path']):
os.makedirs(main_conf['save_path']) os.makedirs(main_conf['save_path'])
os.makedirs(os.path.join(main_conf['save_path'], 'ckpt'))
# prepare backbone # prepare backbone
train_backbone = Backbone(bb_conf, phase='train') train_backbone = Backbone(bb_conf, phase='train')
...@@ -398,11 +402,14 @@ class ConfController(object): ...@@ -398,11 +402,14 @@ class ConfController(object):
prefixes.append(inst.name) prefixes.append(inst.name)
mrs.append(inst.mix_ratio) mrs.append(inst.mix_ratio)
joint_iterator_fn = create_joint_iterator_fn(iterators, prefixes, joint_shape_and_dtypes, mrs, name_to_position, dev_count=dev_count, verbose=VERBOSE) joint_iterator_fn = create_joint_iterator_fn(iterators, prefixes, joint_shape_and_dtypes, mrs, name_to_position, dev_count=dev_count, verbose=VERBOSE, return_type='dict')
self._joint_iterator_fn = joint_iterator_fn
input_attrs = [[i, j, k] for i, (j,k) in zip(joint_input_names, joint_shape_and_dtypes)] input_attrs = [[i, j, k] for i, (j,k) in zip(joint_input_names, joint_shape_and_dtypes)]
pred_input_attrs = [[i, j, k] for i, (j,k) in zip(pred_joint_input_names, pred_joint_shape_and_dtypes)] pred_input_attrs = [[i, j, k] for i, (j,k) in zip(pred_joint_input_names, pred_joint_shape_and_dtypes)]
net_inputs = create_net_inputs(input_attrs, async=True, iterator_fn=joint_iterator_fn, dev_count=dev_count, n_prefetch=3) # net_inputs = create_net_inputs(input_attrs, async=True, iterator_fn=joint_iterator_fn, dev_count=dev_count, n_prefetch=3)
net_inputs = create_net_inputs(input_attrs, async=False)
self._net_inputs = net_inputs
# build backbone and task layers # build backbone and task layers
train_prog = fluid.default_main_program() train_prog = fluid.default_main_program()
...@@ -453,7 +460,7 @@ class ConfController(object): ...@@ -453,7 +460,7 @@ class ConfController(object):
# compute loss # compute loss
task_id_var = net_inputs['__task_id'] task_id_var = net_inputs['__task_id']
task_id_vec = layers.one_hot(task_id_var, num_instances) task_id_vec = fluid.one_hot(task_id_var, num_instances)
losses = fluid.layers.concat([task_output_vars[inst.name+'/loss'] for inst in instances], axis=0) losses = fluid.layers.concat([task_output_vars[inst.name+'/loss'] for inst in instances], axis=0)
loss = layers.reduce_sum(task_id_vec * losses) loss = layers.reduce_sum(task_id_vec * losses)
...@@ -522,15 +529,15 @@ class ConfController(object): ...@@ -522,15 +529,15 @@ class ConfController(object):
inst.reader['pred'] = pred_reader inst.reader['pred'] = pred_reader
return pred_prog return pred_prog
def load_pretrain(self, pretrain_model_path=None): def load_pretrain(self, pretrain_path=None):
# load pretrain model (or ckpt) # load pretrain model (or ckpt)
if pretrain_model_path is None: if pretrain_path is None:
assert 'pretrain_model_path' in self.main_conf, "pretrain_model_path NOT set." assert 'pretrain_path' in self.main_conf, "pretrain_path NOT set."
pretrain_model_path = self.main_conf['pretrain_model_path'] pretrain_path = self.main_conf['pretrain_path']
init_pretraining_params( init_pretraining_params(
self.exe, self.exe,
pretrain_model_path, pretrain_path,
main_program=fluid.default_startup_program()) main_program=fluid.default_startup_program())
...@@ -567,6 +574,18 @@ class ConfController(object): ...@@ -567,6 +574,18 @@ class ConfController(object):
return False return False
return True return True
def pack_multicard_feed(iterator, net_inputs, dev_count):
ret = []
mask = []
for i in range(dev_count):
temp = {}
content, flag = next(iterator)
for q, var in net_inputs.items():
temp[var.name] = content[q]
ret.append(temp)
mask.append(1 if flag else 0)
return ret, mask
# do training # do training
fetch_names, fetch_list = zip(*fetches.items()) fetch_names, fetch_list = zip(*fetches.items())
...@@ -575,8 +594,50 @@ class ConfController(object): ...@@ -575,8 +594,50 @@ class ConfController(object):
epoch = 0 epoch = 0
time_begin = time.time() time_begin = time.time()
backbone_buffer = [] backbone_buffer = []
def multi_dev_reader(reader, dev_count):
def worker(reader, dev_count, queue):
dev_batches = []
for index, data in enumerate(reader()):
if len(dev_batches) < dev_count:
dev_batches.append(data)
if len(dev_batches) == dev_count:
queue.put((dev_batches, 0))
dev_batches = []
# For the prediction of the remained batches, pad more batches to
# the number of devices and the padded samples would be removed in
# prediction outputs.
if len(dev_batches) > 0:
num_pad = dev_count - len(dev_batches)
for i in range(len(dev_batches), dev_count):
dev_batches.append(dev_batches[-1])
queue.put((dev_batches, num_pad))
queue.put(None)
queue = Queue.Queue(dev_count*2)
p = Thread(
target=worker, args=(reader, dev_count, queue))
p.daemon = True
p.start()
while True:
ret = queue.get()
if ret is not None:
batches, num_pad = ret
queue.task_done()
for batch in batches:
flag = num_pad == 0
if num_pad > 0:
num_pad -= 1
yield batch, flag
else:
break
queue.join()
joint_iterator = multi_dev_reader(self._joint_iterator_fn, self.dev_count)
while not train_finish(): while not train_finish():
rt_outputs = self.exe.run(train_program, fetch_list=fetch_list) feed, mask = pack_multicard_feed(joint_iterator, self._net_inputs, self.dev_count)
rt_outputs = self.exe.run(train_program, feed=feed, fetch_list=fetch_list)
rt_outputs = {k:v for k,v in zip(fetch_names, rt_outputs)} rt_outputs = {k:v for k,v in zip(fetch_names, rt_outputs)}
rt_task_id = np.squeeze(rt_outputs['__task_id']).tolist() rt_task_id = np.squeeze(rt_outputs['__task_id']).tolist()
rt_task_id = rt_task_id[0] if isinstance(rt_task_id, list) else rt_task_id rt_task_id = rt_task_id[0] if isinstance(rt_task_id, list) else rt_task_id
...@@ -591,8 +652,9 @@ class ConfController(object): ...@@ -591,8 +652,9 @@ class ConfController(object):
global_step += 1 global_step += 1
cur_task.cur_train_step += 1 cur_task.cur_train_step += 1
if cur_task.save_infermodel_every_n_steps > 0 and cur_task.cur_train_step % cur_task.save_infermodel_every_n_steps == 0: cur_task_global_step = cur_task.cur_train_step + cur_task.cur_train_epoch * cur_task.steps_pur_epoch
cur_task.save(suffix='.step'+str(cur_task.cur_train_step)) if cur_task.is_target and cur_task.save_infermodel_every_n_steps > 0 and cur_task_global_step % cur_task.save_infermodel_every_n_steps == 0:
cur_task.save(suffix='.step'+str(cur_task_global_step))
if global_step % main_conf.get('print_every_n_steps', 5) == 0: if global_step % main_conf.get('print_every_n_steps', 5) == 0:
loss = rt_outputs[cur_task.name+'/loss'] loss = rt_outputs[cur_task.name+'/loss']
...@@ -610,10 +672,16 @@ class ConfController(object): ...@@ -610,10 +672,16 @@ class ConfController(object):
print(cur_task.name+': train finished!') print(cur_task.name+': train finished!')
cur_task.save() cur_task.save()
if 'save_every_n_steps' in main_conf and global_step % main_conf['save_every_n_steps'] == 0: if 'save_ckpt_every_n_steps' in main_conf and global_step % main_conf['save_ckpt_every_n_steps'] == 0:
save_path = os.path.join(main_conf['save_path'], save_path = os.path.join(main_conf['save_path'], 'ckpt',
"step_" + str(global_step))
fluid.io.save_persistables(self.exe, save_path, saver_program)
print('checkpoint has been saved at '+save_path)
save_path = os.path.join(main_conf['save_path'], 'ckpt',
"step_" + str(global_step)) "step_" + str(global_step))
fluid.io.save_persistables(self.exe, save_path, saver_program) fluid.io.save_persistables(self.exe, save_path, saver_program)
print('checkpoint has been saved at '+save_path)
print("ALL tasks train finished, exiting...") print("ALL tasks train finished, exiting...")
...@@ -673,6 +741,7 @@ if __name__ == '__main__': ...@@ -673,6 +741,7 @@ if __name__ == '__main__':
__all__ = ["Controller"]
...@@ -415,9 +415,6 @@ class Controller(object): ...@@ -415,9 +415,6 @@ class Controller(object):
return loss, max_train_steps return loss, max_train_steps
def build_backward(self, optimizer, use_ema=False, ema_decay=0.9999): def build_backward(self, optimizer, use_ema=False, ema_decay=0.9999):
# build optimizer # build optimizer
optimizer.optimize(fluid.default_main_program()) optimizer.optimize(fluid.default_main_program())
......
from cls import Classify
# from match import Match
# from mrc import MRC
# from mlm import MaskLM
...@@ -14,13 +14,15 @@ ...@@ -14,13 +14,15 @@
# limitations under the License. # limitations under the License.
class task(object): class BaseHead(object):
def __init__(self, config, phase, backbone_config): def __init__(self, config, phase, backbone_config):
""" """
config: dict类型。描述了 任务实例(task instance)+多任务配置文件 中定义超参数 config: dict类型。描述了 任务实例(task instance)+多任务配置文件 中定义超参数
phase: str类型。运行阶段,目前支持train和predict phase: str类型。运行阶段,目前支持train和predict
""" """
self._stop_gradient = {}
self._prog = None
@property @property
def inputs_attrs(self): def inputs_attrs(self):
...@@ -43,6 +45,17 @@ class task(object): ...@@ -43,6 +45,17 @@ class task(object):
def epoch_inputs_attrs(self): def epoch_inputs_attrs(self):
return {} return {}
# def stop_gradient(source, inputs):
# # if self._inputs is None:
# # raise Exception('You need to build this head first before stop gradient.')
# self._inputs = inputs
# for name, var in self._inputs[source].items():
# # cur_block = self._prog.current_block()
# var = fluid.layers.assign(var)
# var.stop_gradient = True
# self._inputs[name] = var
# return self._inputs
def build(self, inputs, scope_name=""): def build(self, inputs, scope_name=""):
"""建立task_layer的计算图。将符合inputs_attrs描述的来自各个对象集的静态图Variables映射成符合outputs_attr描述的静态图Variable输出。 """建立task_layer的计算图。将符合inputs_attrs描述的来自各个对象集的静态图Variables映射成符合outputs_attr描述的静态图Variable输出。
Args: Args:
...@@ -53,6 +66,7 @@ class task(object): ...@@ -53,6 +66,7 @@ class task(object):
""" """
raise NotImplementedError() raise NotImplementedError()
def postprocess(self, rt_outputs): def postprocess(self, rt_outputs):
"""每个训练或推理step后针对当前batch的task_layer的runtime计算结果进行相关后处理。注意,rt_outputs除了包含build方法,还自动包含了loss的计算结果。""" """每个训练或推理step后针对当前batch的task_layer的runtime计算结果进行相关后处理。注意,rt_outputs除了包含build方法,还自动包含了loss的计算结果。"""
pass pass
......
...@@ -15,51 +15,47 @@ ...@@ -15,51 +15,47 @@
import paddle.fluid as fluid import paddle.fluid as fluid
from paddle.fluid import layers from paddle.fluid import layers
from paddlepalm.base_task import base_task from paddlepalm.head.base_head import BaseHead
import numpy as np import numpy as np
import os import os
def classify(num_classes, input_dim, dropout_prob, pred_output_dir=None, param_initializer_range=0.02, phase='train'): # def classify(num_classes, input_dim, dropout_prob, pred_output_dir=None, param_initializer_range=0.02, phase='train'):
#
config = { # config = {
'num_classes': num_classes, # 'num_classes': num_classes,
'hidden_size': input_dim, # 'hidden_size': input_dim,
'dropout_prob': dropout_prob, # 'dropout_prob': dropout_prob,
'pred_output_dir': pred_output_dir, # 'pred_output_dir': pred_output_dir,
'initializer_range': param_initializer_range # 'initializer_range': param_initializer_range
} # }
#
return Task(config, phase, config) # return Task(config, phase, config)
class Task(base_task): class Classify(BaseHead):
''' '''
classification classification
''' '''
def __init__(self, config, phase, backbone_config=None): # def __init__(self, config, phase, backbone_config=None):
def __init__(self, num_classes, input_dim, dropout_prob=0.0, \
param_initializer_range=0.02, phase='train'):
self._is_training = phase == 'train' self._is_training = phase == 'train'
self._hidden_size = backbone_config['hidden_size'] self._hidden_size = input_dim
self.num_classes = config['num_classes']
if 'initializer_range' in config: self.num_classes = num_classes
self._param_initializer = config['initializer_range']
else: self._dropout_prob = dropout_prob if phase == 'train' else 0.0
self._param_initializer = fluid.initializer.TruncatedNormal( self._param_initializer = fluid.initializer.TruncatedNormal(
scale=backbone_config.get('initializer_range', 0.02)) scale=param_initializer_range)
if 'dropout_prob' in config:
self._dropout_prob = config['dropout_prob']
else:
self._dropout_prob = backbone_config.get('hidden_dropout_prob', 0.0)
self._pred_output_path = config.get('pred_output_dir', None)
self._preds = [] self._preds = []
@property @property
def inputs_attrs(self): def inputs_attrs(self):
if self._is_training:
reader = {"label_ids": [[-1], 'int64']}
else:
reader = {} reader = {}
bb = {"sentence_embedding": [[-1, self._hidden_size], 'float32']} bb = {"sentence_embedding": [[-1, self._hidden_size], 'float32']}
if self._is_training:
reader["label_ids"] = [[-1], 'int64']
return {'reader': reader, 'backbone': bb} return {'reader': reader, 'backbone': bb}
@property @property
...@@ -96,11 +92,12 @@ class Task(base_task): ...@@ -96,11 +92,12 @@ class Task(base_task):
else: else:
return {"logits":logits} return {"logits":logits}
def postprocess(self, rt_outputs): def batch_postprocess(self, rt_outputs):
if not self._is_training: if not self._is_training:
logits = rt_outputs['logits'] logits = rt_outputs['logits']
preds = np.argmax(logits, -1) preds = np.argmax(logits, -1)
self._preds.extend(preds.tolist()) self._preds.extend(preds.tolist())
return preds
def epoch_postprocess(self, post_inputs): def epoch_postprocess(self, post_inputs):
# there is no post_inputs needed and not declared in epoch_inputs_attrs, hence no elements exist in post_inputs # there is no post_inputs needed and not declared in epoch_inputs_attrs, hence no elements exist in post_inputs
......
from slanted_triangular_schedualer import TriangularSchedualer
from warmup_schedualer import WarmupSchedualer
# scheduled_lr = fluid.layers.learning_rate_scheduler\
# .noam_decay(1/(warmup_steps *(config['learning_rate'] ** 2)),
# warmup_steps)
class BaseSchedualer():
def __init__(self):
self._prog = None
def _set_prog(self, prog):
self._prog = prog
def build(self, learning_rate):
raise NotImplementedError()
from paddlepalm.lr_sched.schedualer import BaseSchedualer
from paddle import fluid
class TriangularSchedualer(BaseSchedualer):
""" Applies linear warmup of learning rate from 0 to learning_rate until warmup_steps, and then decay to 0 linearly until num_train_steps."""
def __init__(self, warmup_steps, num_train_steps):
BaseSchedualer.__init__(self)
assert num_train_steps > warmup_steps > 0
self.warmup_steps = warmup_steps
self.num_train_steps = num_train_steps
def build(self, learning_rate):
with self._prog._lr_schedule_guard():
lr = fluid.layers.tensor.create_global_var(
shape=[1],
value=0.0,
dtype='float32',
persistable=True,
name="scheduled_learning_rate")
global_step = fluid.layers.learning_rate_scheduler._decay_step_counter()
with fluid.layers.control_flow.Switch() as switch:
with switch.case(global_step < self.warmup_steps):
warmup_lr = learning_rate * (global_step / self.warmup_steps)
fluid.layers.tensor.assign(warmup_lr, lr)
with switch.default():
decayed_lr = fluid.layers.learning_rate_scheduler.polynomial_decay(
learning_rate=learning_rate,
decay_steps=self.num_train_steps,
end_learning_rate=0.0,
power=1.0,
cycle=False)
fluid.layers.tensor.assign(decayed_lr, lr)
return lr
from paddlepalm.lr_sched.schedualer import BaseSchedualer
def WarmupSchedualer(BaseSchedualer):
""" Applies linear warmup of learning rate from 0 to learning_rate until warmup_steps, and then decay to 0 linearly until num_train_steps."""
def __init__(self, warmup_steps):
schedualer.__init__(self)
self.warmup_steps = warmup_steps
def build(self, learning_rate):
with self._prog._lr_schedule_guard():
lr = fluid.layers.tensor.create_global_var(
shape=[1],
value=0.0,
dtype='float32',
persistable=True,
name="scheduled_learning_rate")
global_step = fluid.layers.learning_rate_scheduler._decay_step_counter()
with fluid.layers.control_flow.Switch() as switch:
with switch.case(global_step < self.warmup_steps):
warmup_lr = learning_rate * (global_step / self.warmup_steps)
fluid.layers.tensor.assign(warmup_lr, lr)
with switch.default():
fluid.layers.tensor.assign(learning_rate, lr)
return lr
此差异已折叠。
...@@ -20,102 +20,36 @@ from __future__ import print_function ...@@ -20,102 +20,36 @@ from __future__ import print_function
import numpy as np import numpy as np
import paddle.fluid as fluid import paddle.fluid as fluid
from paddlepalm.optimizer.base_optimizer import BaseOptimizer
class schedualer(object): class Adam(BaseOptimizer):
def __init__(self): def __init__(self, loss_var, lr, lr_schedualer=None):
pass
def lr(self): BaseOptimizer.__init__(self, loss_var, lr, lr_schedualer=None)
pass
self._loss = loss_var
def ConstantLearning():
def __init__(self, lr):
self._lr = lr self._lr = lr
self._lr_schedualer = lr_schedualer
def lr(self): def build(self, grad_clip=None):
return self._lr
def LinearWarmupLearning():
def linear_warmup_decay(learning_rate, warmup_steps, num_train_steps):
""" Applies linear warmup of learning rate from 0 and decay to 0."""
with fluid.default_main_program()._lr_schedule_guard():
lr = fluid.layers.tensor.create_global_var(
shape=[1],
value=0.0,
dtype='float32',
persistable=True,
name="scheduled_learning_rate")
global_step = fluid.layers.learning_rate_scheduler._decay_step_counter()
with fluid.layers.control_flow.Switch() as switch:
with switch.case(global_step < warmup_steps):
warmup_lr = learning_rate * (global_step / warmup_steps)
fluid.layers.tensor.assign(warmup_lr, lr)
with switch.default():
decayed_lr = fluid.layers.learning_rate_scheduler.polynomial_decay(
learning_rate=learning_rate,
decay_steps=num_train_steps,
end_learning_rate=0.0,
power=1.0,
cycle=False)
fluid.layers.tensor.assign(decayed_lr, lr)
return lr if self._lr_schedualer is not None:
self._lr = self._lr_schedualer.build(self._lr)
optimizer = fluid.optimizer.Adam(learning_rate=self._lr)
def optimize(loss, config, max_train_steps=None, warmup_steps=0, train_program=None): if grad_clip is not None:
if warmup_steps > 0: clip_norm_thres = grad_clip
decay_strategy = config.get('lr_scheduler', 'linear_warmup_decay')
if decay_strategy == 'noam_decay':
scheduled_lr = fluid.layers.learning_rate_scheduler\
.noam_decay(1/(warmup_steps *(config['learning_rate'] ** 2)),
warmup_steps)
elif decay_strategy == 'linear_warmup_decay':
scheduled_lr = linear_warmup_decay(config['learning_rate'], warmup_steps,
max_train_steps)
else:
raise ValueError("Unkown lr_scheduler, should be "
"'noam_decay' or 'linear_warmup_decay'")
optimizer = fluid.optimizer.Adam(learning_rate=scheduled_lr)
else:
optimizer = fluid.optimizer.Adam(learning_rate=config['learning_rate'])
scheduled_lr = config['learning_rate']
clip_norm_thres = 1.0
# When using mixed precision training, scale the gradient clip threshold # When using mixed precision training, scale the gradient clip threshold
# by loss_scaling # by loss_scaling
fluid.clip.set_gradient_clip( fluid.clip.set_gradient_clip(
clip=fluid.clip.GradientClipByGlobalNorm(clip_norm=clip_norm_thres)) clip=fluid.clip.GradientClipByGlobalNorm(clip_norm=clip_norm_thres))
def exclude_from_weight_decay(name): _, param_grads = optimizer.minimize(self._loss)
if name.find("layer_norm") > -1: return param_grads
return True
bias_suffix = ["_bias", "_b", ".b_0"]
for suffix in bias_suffix:
if name.endswith(suffix):
return True
return False
param_list = dict()
for param in train_program.global_block().all_parameters():
param_list[param.name] = param * 1.0
param_list[param.name].stop_gradient = True
_, param_grads = optimizer.minimize(loss)
def get_cur_learning_rate(self):
return self._lr
if config.get('weight_decay', 0) > 0:
for param, grad in param_grads:
if exclude_from_weight_decay(param.name):
continue
with param.block.program._optimized_guard(
[param, grad]), fluid.framework.name_scope("weight_decay"):
updated_param = param - param_list[
param.name] * config['weight_decay'] * scheduled_lr
fluid.layers.assign(output=param, input=updated_param)
class BaseOptimizer():
def __init__(self, loss_var, lr, lr_schedualer=None):
self._prog = None
self._lr_schedualer = lr_schedualer
def build(self, grad_clip=None):
pass
def _set_prog(self, prog):
self._prog = prog
if self._lr_schedualer is not None:
self._lr_schedualer._set_prog(prog)
def get_cur_learning_rate(self):
pass
...@@ -14,15 +14,21 @@ ...@@ -14,15 +14,21 @@
# limitations under the License. # limitations under the License.
"""v1.1""" """v1.1"""
from copy import copy from copy import copy
class reader(object): class BaseReader(object):
"""interface of data manager.""" """interface of data manager."""
def __init__(self, config, phase='train'): def __init__(self, phase='train'):
assert isinstance(config, dict) # assert isinstance(config, dict)
self._config = config # self._config = config
self._phase = phase self._phase = phase
self._register = set()
self._registered_backbone = None
def copy(self, phase=self._phase): @classmethod
def create_register(self):
return set()
def clone(self, phase='train'):
if phase == self._phase: if phase == self._phase:
return copy(self) return copy(self)
else: else:
...@@ -30,7 +36,25 @@ class reader(object): ...@@ -30,7 +36,25 @@ class reader(object):
ret._phase = phase ret._phase = phase
return ret return ret
def require_attr(self, attr_name):
self._register.add(attr_name)
def register_with(self, backbone):
print(backbone)
for attr in backbone.inputs_attr:
self.require_attr(attr)
self._registered_backbone = backbone
def get_registered_backbone(self):
return self._registered_backbone
def _get_registed_attrs(self, attrs):
ret = {}
for i in self._register:
if i not in attrs:
raise NotImplementedError('output attr {} is not found in this reader.'.format(i))
ret[i] = attrs[i]
return ret
# @property # @property
# def inputs_attr(self): # def inputs_attr(self):
......
...@@ -13,108 +13,69 @@ ...@@ -13,108 +13,69 @@
# See the License for the specific language governing permissions and # See the License for the specific language governing permissions and
# limitations under the License. # limitations under the License.
from paddlepalm.interface import reader from paddlepalm.reader.base_reader import BaseReader
from paddlepalm.reader.utils.reader4ernie import ClassifyReader from paddlepalm.reader.utils.reader4ernie import ClassifyReader as CLSReader
def classify(data_path, vocab_path, batch_size, max_len, \
pred_batch_size=None, file_format='csv', tokenizer='wordpiece', \
lang='en', shuffle_train=True, seed=None, do_lower_case=False, \
seed=None, phase='train'):
assert lang.lower() in ['en', 'cn', 'english', 'chinese'], "supported language: en (English), cn (Chinese)." class ClassifyReader(BaseReader):
assert phase in ['train', 'pred'], "supported phase: train, pred."
config = {
'train_file': data_path,
'pred_file': data_path,
'batch_size': batch_size,
'pred_batch_size': pred_batch_size,
'max_len': max_len,
'file_format': file_format,
'tokenizer': tokenizer,
'for_cn': lang.lower() == 'cn' or lang.lower() == 'chinese',
'shuffle_train': shuffle_train,
'do_lower_case': do_lower_case,
'seed': seed
}
if pred_batch_size is None:
del config['pred_batch_size']
return Reader(config, phase=phase) def __init__(self, vocab_path, max_len, tokenizer='wordpiece', \
lang='en', seed=None, do_lower_case=False, phase='train'):
class Reader(reader): BaseReader.__init__(self, phase)
def __init__(self, config, phase='train', print_prefix=''): assert lang.lower() in ['en', 'cn', 'english', 'chinese'], "supported language: en (English), cn (Chinese)."
""" assert phase in ['train', 'pred'], "supported phase: train, pred."
Args:
phase: train, eval, pred
"""
self._is_training = phase == 'train' for_cn = lang.lower() == 'cn' or lang.lower() == 'chinese'
reader = ClassifyReader(config['vocab_path'], self._register.add('token_ids')
max_seq_len=config['max_len'], if phase == 'train':
do_lower_case=config.get('do_lower_case', False), self._register.add('label_ids')
for_cn=config.get('for_cn', False),
random_seed=config.get('seed', None))
self._reader = reader
self._batch_size = config['batch_size'] self._is_training = phase == 'train'
self._max_seq_len = config['max_len']
self._input_file = config['data_path'] cls_reader = CLSReader(vocab_path,
if phase == 'train': max_seq_len=max_len,
self._num_epochs = None # 防止iteartor终止 do_lower_case=do_lower_case,
self._shuffle = config.get('shuffle_train', True) for_cn=for_cn,
# self._shuffle_buffer = config.get('shuffle_buffer', 5000) random_seed=seed)
elif phase == 'eval': self._reader = cls_reader
self._num_epochs = 1
self._shuffle = False
self._batch_size = config.get('pred_batch_size', self._batch_size)
elif phase == 'pred':
self._num_epochs = 1
self._shuffle = False
self._batch_size = config.get('pred_batch_size', self._batch_size)
self._phase = phase self._phase = phase
# self._batch_size = # self._batch_size =
self._print_first_n = config.get('print_first_n', 0) # self._print_first_n = config.get('print_first_n', 0)
@property @property
def outputs_attr(self): def outputs_attr(self):
if self._is_training: attrs = {"token_ids": [[-1, -1], 'int64'],
return {"token_ids": [[-1, -1], 'int64'],
"position_ids": [[-1, -1], 'int64'], "position_ids": [[-1, -1], 'int64'],
"segment_ids": [[-1, -1], 'int64'], "segment_ids": [[-1, -1], 'int64'],
"input_mask": [[-1, -1, 1], 'float32'], "input_mask": [[-1, -1, 1], 'float32'],
"label_ids": [[-1], 'int64'], "label_ids": [[-1], 'int64'],
"task_ids": [[-1, -1], 'int64'] "task_ids": [[-1, -1], 'int64']
} }
else: return self._get_registed_attrs(attrs)
return {"token_ids": [[-1, -1], 'int64'],
"position_ids": [[-1, -1], 'int64'],
"segment_ids": [[-1, -1], 'int64'],
"task_ids": [[-1, -1], 'int64'],
"input_mask": [[-1, -1, 1], 'float32']
}
def load_data(self): def _load_data(self, input_file, batch_size, num_epochs=None, \
self._data_generator = self._reader.data_generator(self._input_file, self._batch_size, self._num_epochs, shuffle=self._shuffle, phase=self._phase) file_format='csv', shuffle_train=True):
self._data_generator = self._reader.data_generator(input_file, batch_size, \
num_epochs, shuffle=shuffle_train if self._phase == 'train' else False, \
phase=self._phase)
def iterator(self): def _iterator(self):
def list_to_dict(x):
names = ['token_ids', 'segment_ids', 'position_ids', 'task_ids', 'input_mask', names = ['token_ids', 'segment_ids', 'position_ids', 'task_ids', 'input_mask',
'label_ids', 'unique_ids'] 'label_ids', 'unique_ids']
outputs = {n: i for n,i in zip(names, x)}
del outputs['unique_ids']
if not self._is_training:
del outputs['label_ids']
return outputs
for batch in self._data_generator(): for batch in self._data_generator():
yield list_to_dict(batch) outputs = {n: i for n,i in zip(names, batch)}
ret = []
# TODO: move runtime shape check here
for attr in self.outputs_attr.keys():
ret[attr] = outputs[attr]
yield ret
def get_epoch_outputs(self): def get_epoch_outputs(self):
return {'examples': self._reader.get_examples(self._phase), return {'examples': self._reader.get_examples(self._phase),
...@@ -124,3 +85,4 @@ class Reader(reader): ...@@ -124,3 +85,4 @@ class Reader(reader):
def num_examples(self): def num_examples(self):
return self._reader.get_num_examples(phase=self._phase) return self._reader.get_num_examples(phase=self._phase)
此差异已折叠。
import basic_helper
import config_helper
# coding=utf-8
import os
import json
import yaml
from config_helper import PDConfig
def get_basename(f):
return os.path.splitext(f)[0]
def get_suffix(f):
return os.path.splitext(f)[-1]
def parse_yaml(f, asdict=True, support_cmd_line=False):
assert os.path.exists(f), "file {} not found.".format(f)
if support_cmd_line:
args = PDConfig(yaml_file=f, fuse_args=True)
args.build()
return args.asdict() if asdict else args
else:
if asdict:
with open(f, "r") as fin:
yaml_config = yaml.load(fin, Loader=yaml.SafeLoader)
return yaml_config
else:
raise NotImplementedError()
def parse_json(f, asdict=True, support_cmd_line=False):
assert os.path.exists(f), "file {} not found.".format(f)
if support_cmd_line:
args = PDConfig(json_file=f, fuse_args=support_cmd_line)
args.build()
return args.asdict() if asdict else args
else:
if asdict:
with open(f, "r") as fin:
config = json.load(fin)
return config
else:
raise NotImplementedError()
def parse_list(string, astype=str):
assert isinstance(string, str), "{} is not a string.".format(string)
if ',' not in string:
return [astype(string)]
string = string.replace(',', ' ')
return [astype(i) for i in string.split()]
def try_float(s):
try:
float(s)
return(float(s))
except:
return s
# TODO: 增加None机制,允许hidden size、batch size和seqlen设置为None
def check_io(in_attr, out_attr, strict=False, in_name="left", out_name="right"):
for name, attr in in_attr.items():
assert name in out_attr, in_name+': '+name+' not found in '+out_name
if attr != out_attr[name]:
if strict:
raise ValueError(name+': shape or dtype not consistent!')
else:
logging.warning('{}: shape or dtype not consistent!\n{}:\n{}\n{}:\n{}'.format(name, in_name, attr, out_name, out_attr[name]))
def encode_inputs(inputs, scope_name, sep='/', cand_set=None):
outputs = {}
for k, v in inputs.items():
if cand_set is not None:
if k in cand_set:
outputs[k] = v
if scope_name+sep+k in cand_set:
outputs[scope_name+sep+k] = v
else:
outputs[scope_name+sep+k] = v
return outputs
def decode_inputs(inputs, scope_name, sep='/', keep_unk_keys=True):
outputs = {}
for name, value in inputs.items():
# var for backbone are also available to tasks
if keep_unk_keys and sep not in name:
outputs[name] = value
# var for this inst
if name.startswith(scope_name+'/'):
outputs[name[len(scope_name+'/'):]] = value
return outputs
def build_executor(on_gpu):
if on_gpu:
place = fluid.CUDAPlace(0)
# dev_count = fluid.core.get_cuda_device_count()
else:
place = fluid.CPUPlace()
# dev_count = int(os.environ.get('CPU_NUM', multiprocessing.cpu_count()))
# return fluid.Executor(place), dev_count
return fluid.Executor(place)
def fit_attr(conf, fit_attr, strict=False):
for i, attr in fit_attr.items():
if i not in conf:
if strict:
raise Exception('Argument {} is required to create a controller.'.format(i))
else:
continue
conf[i] = attr(conf[i])
return conf
...@@ -88,7 +88,7 @@ def create_iterator_fn(iterator, iterator_prefix, shape_and_dtypes, outname_to_p ...@@ -88,7 +88,7 @@ def create_iterator_fn(iterator, iterator_prefix, shape_and_dtypes, outname_to_p
outputs = next(iterator) # dict type outputs = next(iterator) # dict type
prefix = iterator_prefixe prefix = iterator_prefixe
for outname, val in outputs.items(): for outname, val in outputs.items():
task_outname = prefix + '/' + outname task_outname = prefix + '.' + outname
if outname in outname_to_pos: if outname in outname_to_pos:
idx = outname_to_pos[outname] idx = outname_to_pos[outname]
......
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册