未验证 提交 8b9c24a8 编写于 作者: X Xiaoyao Xi 提交者: GitHub

Merge pull request #13 from xixiaoyao/master

fix bugs
......@@ -12,9 +12,10 @@ do_lower_case: True
max_seq_len: 512
batch_size: 5
num_epochs: 2
num_epochs: 3
optimizer: "adam"
learning_rate: 3e-5
warmup_proportion: 0.1
weight_decay: 0.1
print_every_n_steps: 1
import paddlepalm as palm
if __name__ == '__main__':
controller = palm.Controller('demo1_config.yaml', task_dir='demo1_tasks')
controller = palm.Controller('config_demo1.yaml', task_dir='demo1_tasks')
controller.load_pretrain('pretrain_model/ernie/params')
controller.train()
controller = palm.Controller(config='demo1_config.yaml', task_dir='demo1_tasks', for_train=False)
controller = palm.Controller(config='config_demo1.yaml', task_dir='demo1_tasks', for_train=False)
controller.pred('mrqa', inference_model_dir='output_model/firstrun/infer_model')
......@@ -3,8 +3,8 @@ import paddlepalm as palm
if __name__ == '__main__':
controller = palm.Controller('config_demo2.yaml', task_dir='demo2_tasks')
controller.load_pretrain('pretrain_model/ernie/params')
controller.train()
# controller.train()
controller = palm.Controller(config='config_demo2.yaml', task_dir='demo2_tasks', for_train=False)
controller.pred('mrqa', inference_model_dir='output_model/secondrun/infer_model')
# controller = palm.Controller(config='config_demo2.yaml', task_dir='demo2_tasks', for_train=False)
# controller.pred('mrqa', inference_model_dir='output_model/secondrun/infer_model')
......@@ -76,7 +76,7 @@ class Model(backbone):
"sentence_embedding": [[-1, self._emb_size], 'float32'],
"sentence_pair_embedding": [[-1, self._emb_size], 'float32']}
def build(self, inputs):
def build(self, inputs, scope_name=""):
src_ids = inputs['token_ids']
pos_ids = inputs['position_ids']
......@@ -90,25 +90,25 @@ class Model(backbone):
size=[self._voc_size, self._emb_size],
dtype=self._emb_dtype,
param_attr=fluid.ParamAttr(
name=self._word_emb_name, initializer=self._param_initializer),
name=scope_name+self._word_emb_name, initializer=self._param_initializer),
is_sparse=False)
# fluid.global_scope().find_var('backbone-word_embedding').get_tensor()
embedding_table = fluid.default_main_program().global_block().var(self._word_emb_name)
embedding_table = fluid.default_main_program().global_block().var(scope_name+self._word_emb_name)
position_emb_out = fluid.layers.embedding(
input=pos_ids,
size=[self._max_position_seq_len, self._emb_size],
dtype=self._emb_dtype,
param_attr=fluid.ParamAttr(
name=self._pos_emb_name, initializer=self._param_initializer))
name=scope_name+self._pos_emb_name, initializer=self._param_initializer))
sent_emb_out = fluid.layers.embedding(
sent_ids,
size=[self._sent_types, self._emb_size],
dtype=self._emb_dtype,
param_attr=fluid.ParamAttr(
name=self._sent_emb_name, initializer=self._param_initializer))
name=scope_name+self._sent_emb_name, initializer=self._param_initializer))
emb_out = emb_out + position_emb_out
emb_out = emb_out + sent_emb_out
......@@ -118,13 +118,13 @@ class Model(backbone):
size=[self._task_types, self._emb_size],
dtype=self._emb_dtype,
param_attr=fluid.ParamAttr(
name=self._task_emb_name,
name=scope_name+self._task_emb_name,
initializer=self._param_initializer))
emb_out = emb_out + task_emb_out
emb_out = pre_process_layer(
emb_out, 'nd', self._prepostprocess_dropout, name='pre_encoder')
emb_out, 'nd', self._prepostprocess_dropout, name=scope_name+'pre_encoder')
self_attn_mask = fluid.layers.matmul(
x=input_mask, y=input_mask, transpose_y=True)
......@@ -151,7 +151,7 @@ class Model(backbone):
preprocess_cmd="",
postprocess_cmd="dan",
param_initializer=self._param_initializer,
name='encoder')
name=scope_name+'encoder')
next_sent_feat = fluid.layers.slice(
......@@ -162,8 +162,8 @@ class Model(backbone):
size=self._emb_size,
act="tanh",
param_attr=fluid.ParamAttr(
name="pooled_fc.w_0", initializer=self._param_initializer),
bias_attr="pooled_fc.b_0")
name=scope_name+"pooled_fc.w_0", initializer=self._param_initializer),
bias_attr=scope_name+"pooled_fc.b_0")
return {'embedding_table': embedding_table,
'word_embedding': emb_out,
......
......@@ -430,23 +430,25 @@ class Controller(object):
# build backbone and task layers
# 不指定scope名字会挂,框架有坑
train_prog = fluid.default_main_program()
train_init_prog = fluid.default_startup_program()
# 别用unique_name.guard了,没用的,无法作用到param_attr里的name上
with fluid.unique_name.guard("backbone-"):
bb_output_vars = train_backbone.build(net_inputs)
# bb_output_vars = train_backbone.build(net_inputs)
bb_output_vars = train_backbone.build(net_inputs, scope_name='__paddlepalm_')
assert sorted(bb_output_vars.keys()) == sorted(train_backbone.outputs_attr.keys())
#for var in train_init_prog.blocks[0].vars:
# print(var)
# 会挂
# 这里是否有必要新建一个program?是的,被坑死了
pred_prog = fluid.Program()
pred_init_prog = fluid.Program()
train_prog = fluid.default_main_program()
train_init_prog = fluid.default_startup_program()
with fluid.program_guard(main_program = pred_prog, startup_program = pred_init_prog):
pred_net_inputs = create_net_inputs(pred_input_attrs)
with fluid.unique_name.guard("backbone-"):
pred_bb_output_vars = pred_backbone.build(pred_net_inputs)
# 别用unique_name.guard了,没用的,无法作用到param_attr里的name上
# with fluid.unique_name.guard("backbone-"):
pred_bb_output_vars = pred_backbone.build(pred_net_inputs, scope_name='__paddlepalm_')
fluid.framework.switch_main_program(train_prog)
fluid.framework.switch_startup_program(train_init_prog)
......@@ -503,13 +505,13 @@ class Controller(object):
num_examples = main_reader.num_examples
for inst in instances:
max_train_steps = int(main_conf['num_epochs']* inst.mix_ratio * num_examples) // main_conf['batch_size'] // dev_count
max_train_steps = int(main_conf['num_epochs']* inst.mix_ratio * (num_examples // main_conf['batch_size'] // dev_count))
if inst.is_target:
print('{}: expected train steps {}.'.format(inst.name, max_train_steps))
inst.steps_pur_epoch = inst.reader['train'].num_examples // main_conf['batch_size'] // dev_count
inst.expected_train_steps = max_train_steps
global_max_train_steps = int(main_conf['num_epochs'] * num_examples * sum(mrs)) // main_conf['batch_size'] // dev_count
global_max_train_steps = int(main_conf['num_epochs'] * sum(mrs) * (num_examples // main_conf['batch_size'] // dev_count))
print('Estimated overall train steps {}.'.format(global_max_train_steps))
if 'warmup_proportion' in main_conf and main_conf['warmup_proportion'] > 0:
......
......@@ -90,11 +90,6 @@ def optimize(loss, config, max_train_steps=None, warmup_steps=0, train_program=N
_, param_grads = optimizer.minimize(loss)
for block in fluid.default_main_program().blocks:
for var_name in block.vars:
if var_name.startswith("embedding"):
print(block.vars[var_name])
if config.get('weight_decay', 0) > 0:
for param, grad in param_grads:
......
......@@ -19,6 +19,8 @@ import os
import six
import ast
import copy
import tarfile
import shutil
import numpy as np
import paddle.fluid as fluid
......@@ -48,18 +50,31 @@ def init_pretraining_params(exe,
assert os.path.exists(pretraining_params_path
), "[%s] cann't be found." % pretraining_params_path
assert os.path.exists(os.path.join(pretraining_params_path, '__palmmodel__')), "__palmmodel__ not found."
print("Loading pretraining parameters from {}...".format(
pretraining_params_path))
with tarfile.open(os.path.join(pretraining_params_path, '__palmmodel__'), 'r:') as f:
f.extractall(os.path.join(pretraining_params_path, '.temp'))
log_path = os.path.join(pretraining_params_path, '__palmmodel__')
pretraining_params_path = os.path.join(pretraining_params_path, '.temp')
def existed_params(var):
if not isinstance(var, fluid.framework.Parameter):
return False
if not os.path.exists(os.path.join(pretraining_params_path, var.name)):
print('Warning: {} not found in {}.'.format(var.name, log_path))
return os.path.exists(os.path.join(pretraining_params_path, var.name))
print("Load pretraining parameters from {}...\n".format(
pretraining_params_path))
fluid.io.load_vars(
exe,
pretraining_params_path,
main_program=main_program,
predicate=existed_params)
shutil.rmtree(pretraining_params_path)
print('')
export CUDA_VISIBLE_DEVICES=0
export CUDA_VISIBLE_DEVICES=0,1,2,3,4,5,6,7
python demo1.py
export CUDA_VISIBLE_DEVICES=0,1,2,3,4,5,6,7
export CUDA_VISIBLE_DEVICES=0
python demo2.py
......@@ -5,13 +5,32 @@ if [[ $# != 1 ]]; then
exit 1
fi
if [[ -f $1/__palminfo__ ]]; then
echo "already converted."
exit 0
fi
echo "converting..."
cd $1
if [[ -d $1/params ]]; then
cd $1/params
else
cd $1
fi
mkdir .palm.backup
for file in $(ls *)
do cp $file "backbone-"$file; mv $file .palm.backup
do cp $file .palm.backup; mv $file "__paddlepalm_"$file
done
tar -cf __rawmodel__ .palm.backup/*
rm .palm.backup/*
mv __rawmodel__ .palm.backup
# find . ! -name '__rawmodel__' -exec rm {} +
tar -cf __palmmodel__ __paddlepalm_*
touch __palminfo__
ls __paddlepalm_* > __palminfo__
rm __paddlepalm_*
cd - >/dev/null
echo "done!"
......
......@@ -5,7 +5,29 @@ if [[ $# != 1 ]]; then
exit 1
fi
rm $1/backbone-*
mv $1/.palm.backup/* $1
rm -rf $1/.palm.backup
if [[ ! -d $1 ]]; then
echo "$1 not found."
exit 1
fi
if [[ ! -f $1/__palmmodel__ ]]; then
echo "paddlepalm model not found."
exit 1
fi
echo "recovering..."
if [[ -d $1/params ]]; then
cd $1/params
else
cd $1
fi
rm __palm*
mv .palm.backup/__rawmodel__ .
rm -rf .palm.backup
tar -xf __rawmodel__
mv .palm.backup/* .
rm __rawmodel__
rm -rf .palm.backup
cd - >/dev/null
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册