“c3fcc151ecd7b3bd37266e8e20fc76861142189c”上不存在“develop/doc_cn/design/file_manager/README.html”
提交 2f2e84b6 编写于 作者: X xixiaoyao

fix bugs

上级 53c68177
...@@ -12,9 +12,10 @@ do_lower_case: True ...@@ -12,9 +12,10 @@ do_lower_case: True
max_seq_len: 512 max_seq_len: 512
batch_size: 5 batch_size: 5
num_epochs: 2 num_epochs: 3
optimizer: "adam" optimizer: "adam"
learning_rate: 3e-5 learning_rate: 3e-5
warmup_proportion: 0.1 warmup_proportion: 0.1
weight_decay: 0.1 weight_decay: 0.1
print_every_n_steps: 1
import paddlepalm as palm import paddlepalm as palm
if __name__ == '__main__': if __name__ == '__main__':
controller = palm.Controller('demo1_config.yaml', task_dir='demo1_tasks') controller = palm.Controller('config_demo1.yaml', task_dir='demo1_tasks')
controller.load_pretrain('pretrain_model/ernie/params') controller.load_pretrain('pretrain_model/ernie/params')
controller.train() controller.train()
controller = palm.Controller(config='demo1_config.yaml', task_dir='demo1_tasks', for_train=False) controller = palm.Controller(config='config_demo1.yaml', task_dir='demo1_tasks', for_train=False)
controller.pred('mrqa', inference_model_dir='output_model/firstrun/infer_model') controller.pred('mrqa', inference_model_dir='output_model/firstrun/infer_model')
...@@ -3,8 +3,8 @@ import paddlepalm as palm ...@@ -3,8 +3,8 @@ import paddlepalm as palm
if __name__ == '__main__': if __name__ == '__main__':
controller = palm.Controller('config_demo2.yaml', task_dir='demo2_tasks') controller = palm.Controller('config_demo2.yaml', task_dir='demo2_tasks')
controller.load_pretrain('pretrain_model/ernie/params') controller.load_pretrain('pretrain_model/ernie/params')
controller.train() # controller.train()
controller = palm.Controller(config='config_demo2.yaml', task_dir='demo2_tasks', for_train=False) # controller = palm.Controller(config='config_demo2.yaml', task_dir='demo2_tasks', for_train=False)
controller.pred('mrqa', inference_model_dir='output_model/secondrun/infer_model') # controller.pred('mrqa', inference_model_dir='output_model/secondrun/infer_model')
...@@ -76,7 +76,7 @@ class Model(backbone): ...@@ -76,7 +76,7 @@ class Model(backbone):
"sentence_embedding": [[-1, self._emb_size], 'float32'], "sentence_embedding": [[-1, self._emb_size], 'float32'],
"sentence_pair_embedding": [[-1, self._emb_size], 'float32']} "sentence_pair_embedding": [[-1, self._emb_size], 'float32']}
def build(self, inputs): def build(self, inputs, scope_name=""):
src_ids = inputs['token_ids'] src_ids = inputs['token_ids']
pos_ids = inputs['position_ids'] pos_ids = inputs['position_ids']
...@@ -90,25 +90,25 @@ class Model(backbone): ...@@ -90,25 +90,25 @@ class Model(backbone):
size=[self._voc_size, self._emb_size], size=[self._voc_size, self._emb_size],
dtype=self._emb_dtype, dtype=self._emb_dtype,
param_attr=fluid.ParamAttr( param_attr=fluid.ParamAttr(
name=self._word_emb_name, initializer=self._param_initializer), name=scope_name+self._word_emb_name, initializer=self._param_initializer),
is_sparse=False) is_sparse=False)
# fluid.global_scope().find_var('backbone-word_embedding').get_tensor() # fluid.global_scope().find_var('backbone-word_embedding').get_tensor()
embedding_table = fluid.default_main_program().global_block().var(self._word_emb_name) embedding_table = fluid.default_main_program().global_block().var(scope_name+self._word_emb_name)
position_emb_out = fluid.layers.embedding( position_emb_out = fluid.layers.embedding(
input=pos_ids, input=pos_ids,
size=[self._max_position_seq_len, self._emb_size], size=[self._max_position_seq_len, self._emb_size],
dtype=self._emb_dtype, dtype=self._emb_dtype,
param_attr=fluid.ParamAttr( param_attr=fluid.ParamAttr(
name=self._pos_emb_name, initializer=self._param_initializer)) name=scope_name+self._pos_emb_name, initializer=self._param_initializer))
sent_emb_out = fluid.layers.embedding( sent_emb_out = fluid.layers.embedding(
sent_ids, sent_ids,
size=[self._sent_types, self._emb_size], size=[self._sent_types, self._emb_size],
dtype=self._emb_dtype, dtype=self._emb_dtype,
param_attr=fluid.ParamAttr( param_attr=fluid.ParamAttr(
name=self._sent_emb_name, initializer=self._param_initializer)) name=scope_name+self._sent_emb_name, initializer=self._param_initializer))
emb_out = emb_out + position_emb_out emb_out = emb_out + position_emb_out
emb_out = emb_out + sent_emb_out emb_out = emb_out + sent_emb_out
...@@ -118,13 +118,13 @@ class Model(backbone): ...@@ -118,13 +118,13 @@ class Model(backbone):
size=[self._task_types, self._emb_size], size=[self._task_types, self._emb_size],
dtype=self._emb_dtype, dtype=self._emb_dtype,
param_attr=fluid.ParamAttr( param_attr=fluid.ParamAttr(
name=self._task_emb_name, name=scope_name+self._task_emb_name,
initializer=self._param_initializer)) initializer=self._param_initializer))
emb_out = emb_out + task_emb_out emb_out = emb_out + task_emb_out
emb_out = pre_process_layer( emb_out = pre_process_layer(
emb_out, 'nd', self._prepostprocess_dropout, name='pre_encoder') emb_out, 'nd', self._prepostprocess_dropout, name=scope_name+'pre_encoder')
self_attn_mask = fluid.layers.matmul( self_attn_mask = fluid.layers.matmul(
x=input_mask, y=input_mask, transpose_y=True) x=input_mask, y=input_mask, transpose_y=True)
...@@ -151,7 +151,7 @@ class Model(backbone): ...@@ -151,7 +151,7 @@ class Model(backbone):
preprocess_cmd="", preprocess_cmd="",
postprocess_cmd="dan", postprocess_cmd="dan",
param_initializer=self._param_initializer, param_initializer=self._param_initializer,
name='encoder') name=scope_name+'encoder')
next_sent_feat = fluid.layers.slice( next_sent_feat = fluid.layers.slice(
...@@ -162,8 +162,8 @@ class Model(backbone): ...@@ -162,8 +162,8 @@ class Model(backbone):
size=self._emb_size, size=self._emb_size,
act="tanh", act="tanh",
param_attr=fluid.ParamAttr( param_attr=fluid.ParamAttr(
name="pooled_fc.w_0", initializer=self._param_initializer), name=scope_name+"pooled_fc.w_0", initializer=self._param_initializer),
bias_attr="pooled_fc.b_0") bias_attr=scope_name+"pooled_fc.b_0")
return {'embedding_table': embedding_table, return {'embedding_table': embedding_table,
'word_embedding': emb_out, 'word_embedding': emb_out,
......
...@@ -430,23 +430,25 @@ class Controller(object): ...@@ -430,23 +430,25 @@ class Controller(object):
# build backbone and task layers # build backbone and task layers
# 不指定scope名字会挂,框架有坑 # 不指定scope名字会挂,框架有坑
train_prog = fluid.default_main_program()
train_init_prog = fluid.default_startup_program()
# 别用unique_name.guard了,没用的,无法作用到param_attr里的name上
with fluid.unique_name.guard("backbone-"): with fluid.unique_name.guard("backbone-"):
bb_output_vars = train_backbone.build(net_inputs) bb_output_vars = train_backbone.build(net_inputs, scope_name='__paddlepalm_')
# bb_output_vars = train_backbone.build(net_inputs)
assert sorted(bb_output_vars.keys()) == sorted(train_backbone.outputs_attr.keys()) assert sorted(bb_output_vars.keys()) == sorted(train_backbone.outputs_attr.keys())
#for var in train_init_prog.blocks[0].vars:
# print(var)
# 会挂 # 会挂
# 这里是否有必要新建一个program?是的,被坑死了 # 这里是否有必要新建一个program?是的,被坑死了
pred_prog = fluid.Program() pred_prog = fluid.Program()
pred_init_prog = fluid.Program() pred_init_prog = fluid.Program()
train_prog = fluid.default_main_program()
train_init_prog = fluid.default_startup_program()
with fluid.program_guard(main_program = pred_prog, startup_program = pred_init_prog): with fluid.program_guard(main_program = pred_prog, startup_program = pred_init_prog):
pred_net_inputs = create_net_inputs(pred_input_attrs) pred_net_inputs = create_net_inputs(pred_input_attrs)
with fluid.unique_name.guard("backbone-"): # 别用unique_name.guard了,没用的,无法作用到param_attr里的name上
pred_bb_output_vars = pred_backbone.build(pred_net_inputs) # with fluid.unique_name.guard("backbone-"):
pred_bb_output_vars = pred_backbone.build(pred_net_inputs, scope_name='__paddlepalm_')
fluid.framework.switch_main_program(train_prog) fluid.framework.switch_main_program(train_prog)
fluid.framework.switch_startup_program(train_init_prog) fluid.framework.switch_startup_program(train_init_prog)
...@@ -503,13 +505,13 @@ class Controller(object): ...@@ -503,13 +505,13 @@ class Controller(object):
num_examples = main_reader.num_examples num_examples = main_reader.num_examples
for inst in instances: for inst in instances:
max_train_steps = int(main_conf['num_epochs']* inst.mix_ratio * num_examples) // main_conf['batch_size'] // dev_count max_train_steps = int(main_conf['num_epochs']* inst.mix_ratio * (num_examples // main_conf['batch_size'] // dev_count))
if inst.is_target: if inst.is_target:
print('{}: expected train steps {}.'.format(inst.name, max_train_steps)) print('{}: expected train steps {}.'.format(inst.name, max_train_steps))
inst.steps_pur_epoch = inst.reader['train'].num_examples // main_conf['batch_size'] // dev_count inst.steps_pur_epoch = inst.reader['train'].num_examples // main_conf['batch_size'] // dev_count
inst.expected_train_steps = max_train_steps inst.expected_train_steps = max_train_steps
global_max_train_steps = int(main_conf['num_epochs'] * num_examples * sum(mrs)) // main_conf['batch_size'] // dev_count global_max_train_steps = int(main_conf['num_epochs'] * sum(mrs) * (num_examples // main_conf['batch_size'] // dev_count))
print('Estimated overall train steps {}.'.format(global_max_train_steps)) print('Estimated overall train steps {}.'.format(global_max_train_steps))
if 'warmup_proportion' in main_conf and main_conf['warmup_proportion'] > 0: if 'warmup_proportion' in main_conf and main_conf['warmup_proportion'] > 0:
......
...@@ -90,11 +90,6 @@ def optimize(loss, config, max_train_steps=None, warmup_steps=0, train_program=N ...@@ -90,11 +90,6 @@ def optimize(loss, config, max_train_steps=None, warmup_steps=0, train_program=N
_, param_grads = optimizer.minimize(loss) _, param_grads = optimizer.minimize(loss)
for block in fluid.default_main_program().blocks:
for var_name in block.vars:
if var_name.startswith("embedding"):
print(block.vars[var_name])
if config.get('weight_decay', 0) > 0: if config.get('weight_decay', 0) > 0:
for param, grad in param_grads: for param, grad in param_grads:
......
...@@ -19,6 +19,8 @@ import os ...@@ -19,6 +19,8 @@ import os
import six import six
import ast import ast
import copy import copy
import tarfile
import shutil
import numpy as np import numpy as np
import paddle.fluid as fluid import paddle.fluid as fluid
...@@ -48,18 +50,30 @@ def init_pretraining_params(exe, ...@@ -48,18 +50,30 @@ def init_pretraining_params(exe,
assert os.path.exists(pretraining_params_path assert os.path.exists(pretraining_params_path
), "[%s] cann't be found." % pretraining_params_path ), "[%s] cann't be found." % pretraining_params_path
assert os.path.exists(os.path.join(pretraining_params_path, '__palmmodel__')), "__palmmodel__ not found."
print("Loading pretraining parameters from {}...".format(
pretraining_params_path))
with tarfile.open(os.path.join(pretraining_params_path, '__palmmodel__'), 'r:') as f:
f.extractall(os.path.join(pretraining_params_path, '.temp'))
pretraining_params_path = os.path.join(pretraining_params_path, '.temp')
def existed_params(var): def existed_params(var):
if not isinstance(var, fluid.framework.Parameter): if not isinstance(var, fluid.framework.Parameter):
return False return False
if not os.path.exists(os.path.join(pretraining_params_path, var.name)):
print('Warning: {} not found in {}.'.format(var.name, pretraining_params_path))
return os.path.exists(os.path.join(pretraining_params_path, var.name)) return os.path.exists(os.path.join(pretraining_params_path, var.name))
print("Load pretraining parameters from {}...\n".format(
pretraining_params_path))
fluid.io.load_vars( fluid.io.load_vars(
exe, exe,
pretraining_params_path, pretraining_params_path,
main_program=main_program, main_program=main_program,
predicate=existed_params) predicate=existed_params)
shutil.rmtree(pretraining_params_path)
print('')
export CUDA_VISIBLE_DEVICES=0 export CUDA_VISIBLE_DEVICES=0,1,2,3,4,5,6,7
python demo1.py python demo1.py
export CUDA_VISIBLE_DEVICES=0,1,2,3,4,5,6,7 export CUDA_VISIBLE_DEVICES=0
python demo2.py python demo2.py
...@@ -5,13 +5,32 @@ if [[ $# != 1 ]]; then ...@@ -5,13 +5,32 @@ if [[ $# != 1 ]]; then
exit 1 exit 1
fi fi
if [[ -f $1/__palminfo__ ]]; then
echo "already converted."
exit 0
fi
echo "converting..." echo "converting..."
cd $1 if [[ -d $1/params ]]; then
cd $1/params
else
cd $1
fi
mkdir .palm.backup mkdir .palm.backup
for file in $(ls *) for file in $(ls *)
do cp $file "backbone-"$file; mv $file .palm.backup do cp $file .palm.backup; mv $file "__paddlepalm_"$file
done done
tar -cf __rawmodel__ .palm.backup/*
rm .palm.backup/*
mv __rawmodel__ .palm.backup
# find . ! -name '__rawmodel__' -exec rm {} +
tar -cf __palmmodel__ __paddlepalm_*
touch __palminfo__
ls __paddlepalm_* > __palminfo__
rm __paddlepalm_*
cd - >/dev/null cd - >/dev/null
echo "done!" echo "done!"
......
...@@ -5,7 +5,29 @@ if [[ $# != 1 ]]; then ...@@ -5,7 +5,29 @@ if [[ $# != 1 ]]; then
exit 1 exit 1
fi fi
rm $1/backbone-* if [[ ! -d $1 ]]; then
mv $1/.palm.backup/* $1 echo "$1 not found."
rm -rf $1/.palm.backup exit 1
fi
if [[ ! -f $1/__palmmodel__ ]]; then
echo "paddlepalm model not found."
exit 1
fi
echo "recovering..."
if [[ -d $1/params ]]; then
cd $1/params
else
cd $1
fi
rm __palm*
mv .palm.backup/__rawmodel__ .
rm -rf .palm.backup
tar -xf __rawmodel__
mv .palm.backup/* .
rm __rawmodel__
rm -rf .palm.backup
cd - >/dev/null
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册