Skip to content
体验新版
项目
组织
正在加载...
登录
切换导航
打开侧边栏
PaddlePaddle
PALM
提交
2f2e84b6
P
PALM
项目概览
PaddlePaddle
/
PALM
通知
8
Star
3
Fork
0
代码
文件
提交
分支
Tags
贡献者
分支图
Diff
Issue
10
列表
看板
标记
里程碑
合并请求
0
Wiki
0
Wiki
分析
仓库
DevOps
项目成员
Pages
P
PALM
项目概览
项目概览
详情
发布
仓库
仓库
文件
提交
分支
标签
贡献者
分支图
比较
Issue
10
Issue
10
列表
看板
标记
里程碑
合并请求
0
合并请求
0
Pages
分析
分析
仓库分析
DevOps
Wiki
0
Wiki
成员
成员
收起侧边栏
关闭侧边栏
动态
分支图
创建新Issue
提交
Issue看板
提交
2f2e84b6
编写于
10月 28, 2019
作者:
X
xixiaoyao
浏览文件
操作
浏览文件
下载
电子邮件补丁
差异文件
fix bugs
上级
53c68177
变更
11
显示空白变更内容
内联
并排
Showing
11 changed file
with
94 addition
and
41 deletion
+94
-41
config_demo1.yaml
config_demo1.yaml
+2
-1
demo1.py
demo1.py
+2
-2
demo2.py
demo2.py
+3
-3
paddlepalm/backbone/ernie.py
paddlepalm/backbone/ernie.py
+10
-10
paddlepalm/mtl_controller.py
paddlepalm/mtl_controller.py
+12
-10
paddlepalm/optimizer/adam.py
paddlepalm/optimizer/adam.py
+0
-5
paddlepalm/utils/saver.py
paddlepalm/utils/saver.py
+17
-3
run_demo1.sh
run_demo1.sh
+1
-1
run_demo2.sh
run_demo2.sh
+1
-1
script/convert_params.sh
script/convert_params.sh
+21
-2
script/recover_params.sh
script/recover_params.sh
+25
-3
未找到文件。
demo1_config
.yaml
→
config_demo1
.yaml
浏览文件 @
2f2e84b6
...
@@ -12,9 +12,10 @@ do_lower_case: True
...
@@ -12,9 +12,10 @@ do_lower_case: True
max_seq_len
:
512
max_seq_len
:
512
batch_size
:
5
batch_size
:
5
num_epochs
:
2
num_epochs
:
3
optimizer
:
"
adam"
optimizer
:
"
adam"
learning_rate
:
3e-5
learning_rate
:
3e-5
warmup_proportion
:
0.1
warmup_proportion
:
0.1
weight_decay
:
0.1
weight_decay
:
0.1
print_every_n_steps
:
1
demo1.py
浏览文件 @
2f2e84b6
import
paddlepalm
as
palm
import
paddlepalm
as
palm
if
__name__
==
'__main__'
:
if
__name__
==
'__main__'
:
controller
=
palm
.
Controller
(
'
demo1_config
.yaml'
,
task_dir
=
'demo1_tasks'
)
controller
=
palm
.
Controller
(
'
config_demo1
.yaml'
,
task_dir
=
'demo1_tasks'
)
controller
.
load_pretrain
(
'pretrain_model/ernie/params'
)
controller
.
load_pretrain
(
'pretrain_model/ernie/params'
)
controller
.
train
()
controller
.
train
()
controller
=
palm
.
Controller
(
config
=
'
demo1_config
.yaml'
,
task_dir
=
'demo1_tasks'
,
for_train
=
False
)
controller
=
palm
.
Controller
(
config
=
'
config_demo1
.yaml'
,
task_dir
=
'demo1_tasks'
,
for_train
=
False
)
controller
.
pred
(
'mrqa'
,
inference_model_dir
=
'output_model/firstrun/infer_model'
)
controller
.
pred
(
'mrqa'
,
inference_model_dir
=
'output_model/firstrun/infer_model'
)
demo2.py
浏览文件 @
2f2e84b6
...
@@ -3,8 +3,8 @@ import paddlepalm as palm
...
@@ -3,8 +3,8 @@ import paddlepalm as palm
if
__name__
==
'__main__'
:
if
__name__
==
'__main__'
:
controller
=
palm
.
Controller
(
'config_demo2.yaml'
,
task_dir
=
'demo2_tasks'
)
controller
=
palm
.
Controller
(
'config_demo2.yaml'
,
task_dir
=
'demo2_tasks'
)
controller
.
load_pretrain
(
'pretrain_model/ernie/params'
)
controller
.
load_pretrain
(
'pretrain_model/ernie/params'
)
controller
.
train
()
#
controller.train()
controller
=
palm
.
Controller
(
config
=
'config_demo2.yaml'
,
task_dir
=
'demo2_tasks'
,
for_train
=
False
)
#
controller = palm.Controller(config='config_demo2.yaml', task_dir='demo2_tasks', for_train=False)
controller
.
pred
(
'mrqa'
,
inference_model_dir
=
'output_model/secondrun/infer_model'
)
#
controller.pred('mrqa', inference_model_dir='output_model/secondrun/infer_model')
paddlepalm/backbone/ernie.py
浏览文件 @
2f2e84b6
...
@@ -76,7 +76,7 @@ class Model(backbone):
...
@@ -76,7 +76,7 @@ class Model(backbone):
"sentence_embedding"
:
[[
-
1
,
self
.
_emb_size
],
'float32'
],
"sentence_embedding"
:
[[
-
1
,
self
.
_emb_size
],
'float32'
],
"sentence_pair_embedding"
:
[[
-
1
,
self
.
_emb_size
],
'float32'
]}
"sentence_pair_embedding"
:
[[
-
1
,
self
.
_emb_size
],
'float32'
]}
def
build
(
self
,
inputs
):
def
build
(
self
,
inputs
,
scope_name
=
""
):
src_ids
=
inputs
[
'token_ids'
]
src_ids
=
inputs
[
'token_ids'
]
pos_ids
=
inputs
[
'position_ids'
]
pos_ids
=
inputs
[
'position_ids'
]
...
@@ -90,25 +90,25 @@ class Model(backbone):
...
@@ -90,25 +90,25 @@ class Model(backbone):
size
=
[
self
.
_voc_size
,
self
.
_emb_size
],
size
=
[
self
.
_voc_size
,
self
.
_emb_size
],
dtype
=
self
.
_emb_dtype
,
dtype
=
self
.
_emb_dtype
,
param_attr
=
fluid
.
ParamAttr
(
param_attr
=
fluid
.
ParamAttr
(
name
=
self
.
_word_emb_name
,
initializer
=
self
.
_param_initializer
),
name
=
s
cope_name
+
s
elf
.
_word_emb_name
,
initializer
=
self
.
_param_initializer
),
is_sparse
=
False
)
is_sparse
=
False
)
# fluid.global_scope().find_var('backbone-word_embedding').get_tensor()
# fluid.global_scope().find_var('backbone-word_embedding').get_tensor()
embedding_table
=
fluid
.
default_main_program
().
global_block
().
var
(
self
.
_word_emb_name
)
embedding_table
=
fluid
.
default_main_program
().
global_block
().
var
(
s
cope_name
+
s
elf
.
_word_emb_name
)
position_emb_out
=
fluid
.
layers
.
embedding
(
position_emb_out
=
fluid
.
layers
.
embedding
(
input
=
pos_ids
,
input
=
pos_ids
,
size
=
[
self
.
_max_position_seq_len
,
self
.
_emb_size
],
size
=
[
self
.
_max_position_seq_len
,
self
.
_emb_size
],
dtype
=
self
.
_emb_dtype
,
dtype
=
self
.
_emb_dtype
,
param_attr
=
fluid
.
ParamAttr
(
param_attr
=
fluid
.
ParamAttr
(
name
=
self
.
_pos_emb_name
,
initializer
=
self
.
_param_initializer
))
name
=
s
cope_name
+
s
elf
.
_pos_emb_name
,
initializer
=
self
.
_param_initializer
))
sent_emb_out
=
fluid
.
layers
.
embedding
(
sent_emb_out
=
fluid
.
layers
.
embedding
(
sent_ids
,
sent_ids
,
size
=
[
self
.
_sent_types
,
self
.
_emb_size
],
size
=
[
self
.
_sent_types
,
self
.
_emb_size
],
dtype
=
self
.
_emb_dtype
,
dtype
=
self
.
_emb_dtype
,
param_attr
=
fluid
.
ParamAttr
(
param_attr
=
fluid
.
ParamAttr
(
name
=
self
.
_sent_emb_name
,
initializer
=
self
.
_param_initializer
))
name
=
s
cope_name
+
s
elf
.
_sent_emb_name
,
initializer
=
self
.
_param_initializer
))
emb_out
=
emb_out
+
position_emb_out
emb_out
=
emb_out
+
position_emb_out
emb_out
=
emb_out
+
sent_emb_out
emb_out
=
emb_out
+
sent_emb_out
...
@@ -118,13 +118,13 @@ class Model(backbone):
...
@@ -118,13 +118,13 @@ class Model(backbone):
size
=
[
self
.
_task_types
,
self
.
_emb_size
],
size
=
[
self
.
_task_types
,
self
.
_emb_size
],
dtype
=
self
.
_emb_dtype
,
dtype
=
self
.
_emb_dtype
,
param_attr
=
fluid
.
ParamAttr
(
param_attr
=
fluid
.
ParamAttr
(
name
=
self
.
_task_emb_name
,
name
=
s
cope_name
+
s
elf
.
_task_emb_name
,
initializer
=
self
.
_param_initializer
))
initializer
=
self
.
_param_initializer
))
emb_out
=
emb_out
+
task_emb_out
emb_out
=
emb_out
+
task_emb_out
emb_out
=
pre_process_layer
(
emb_out
=
pre_process_layer
(
emb_out
,
'nd'
,
self
.
_prepostprocess_dropout
,
name
=
'pre_encoder'
)
emb_out
,
'nd'
,
self
.
_prepostprocess_dropout
,
name
=
scope_name
+
'pre_encoder'
)
self_attn_mask
=
fluid
.
layers
.
matmul
(
self_attn_mask
=
fluid
.
layers
.
matmul
(
x
=
input_mask
,
y
=
input_mask
,
transpose_y
=
True
)
x
=
input_mask
,
y
=
input_mask
,
transpose_y
=
True
)
...
@@ -151,7 +151,7 @@ class Model(backbone):
...
@@ -151,7 +151,7 @@ class Model(backbone):
preprocess_cmd
=
""
,
preprocess_cmd
=
""
,
postprocess_cmd
=
"dan"
,
postprocess_cmd
=
"dan"
,
param_initializer
=
self
.
_param_initializer
,
param_initializer
=
self
.
_param_initializer
,
name
=
'encoder'
)
name
=
scope_name
+
'encoder'
)
next_sent_feat
=
fluid
.
layers
.
slice
(
next_sent_feat
=
fluid
.
layers
.
slice
(
...
@@ -162,8 +162,8 @@ class Model(backbone):
...
@@ -162,8 +162,8 @@ class Model(backbone):
size
=
self
.
_emb_size
,
size
=
self
.
_emb_size
,
act
=
"tanh"
,
act
=
"tanh"
,
param_attr
=
fluid
.
ParamAttr
(
param_attr
=
fluid
.
ParamAttr
(
name
=
"pooled_fc.w_0"
,
initializer
=
self
.
_param_initializer
),
name
=
scope_name
+
"pooled_fc.w_0"
,
initializer
=
self
.
_param_initializer
),
bias_attr
=
"pooled_fc.b_0"
)
bias_attr
=
scope_name
+
"pooled_fc.b_0"
)
return
{
'embedding_table'
:
embedding_table
,
return
{
'embedding_table'
:
embedding_table
,
'word_embedding'
:
emb_out
,
'word_embedding'
:
emb_out
,
...
...
paddlepalm/mtl_controller.py
浏览文件 @
2f2e84b6
...
@@ -430,23 +430,25 @@ class Controller(object):
...
@@ -430,23 +430,25 @@ class Controller(object):
# build backbone and task layers
# build backbone and task layers
# 不指定scope名字会挂,框架有坑
# 不指定scope名字会挂,框架有坑
train_prog
=
fluid
.
default_main_program
()
train_init_prog
=
fluid
.
default_startup_program
()
# 别用unique_name.guard了,没用的,无法作用到param_attr里的name上
with
fluid
.
unique_name
.
guard
(
"backbone-"
):
with
fluid
.
unique_name
.
guard
(
"backbone-"
):
bb_output_vars
=
train_backbone
.
build
(
net_inputs
)
bb_output_vars
=
train_backbone
.
build
(
net_inputs
,
scope_name
=
'__paddlepalm_'
)
# bb_output_vars = train_backbone.build(net_inputs)
assert
sorted
(
bb_output_vars
.
keys
())
==
sorted
(
train_backbone
.
outputs_attr
.
keys
())
assert
sorted
(
bb_output_vars
.
keys
())
==
sorted
(
train_backbone
.
outputs_attr
.
keys
())
#for var in train_init_prog.blocks[0].vars:
# print(var)
# 会挂
# 会挂
# 这里是否有必要新建一个program?是的,被坑死了
# 这里是否有必要新建一个program?是的,被坑死了
pred_prog
=
fluid
.
Program
()
pred_prog
=
fluid
.
Program
()
pred_init_prog
=
fluid
.
Program
()
pred_init_prog
=
fluid
.
Program
()
train_prog
=
fluid
.
default_main_program
()
train_init_prog
=
fluid
.
default_startup_program
()
with
fluid
.
program_guard
(
main_program
=
pred_prog
,
startup_program
=
pred_init_prog
):
with
fluid
.
program_guard
(
main_program
=
pred_prog
,
startup_program
=
pred_init_prog
):
pred_net_inputs
=
create_net_inputs
(
pred_input_attrs
)
pred_net_inputs
=
create_net_inputs
(
pred_input_attrs
)
with
fluid
.
unique_name
.
guard
(
"backbone-"
):
# 别用unique_name.guard了,没用的,无法作用到param_attr里的name上
pred_bb_output_vars
=
pred_backbone
.
build
(
pred_net_inputs
)
# with fluid.unique_name.guard("backbone-"):
pred_bb_output_vars
=
pred_backbone
.
build
(
pred_net_inputs
,
scope_name
=
'__paddlepalm_'
)
fluid
.
framework
.
switch_main_program
(
train_prog
)
fluid
.
framework
.
switch_main_program
(
train_prog
)
fluid
.
framework
.
switch_startup_program
(
train_init_prog
)
fluid
.
framework
.
switch_startup_program
(
train_init_prog
)
...
@@ -503,13 +505,13 @@ class Controller(object):
...
@@ -503,13 +505,13 @@ class Controller(object):
num_examples
=
main_reader
.
num_examples
num_examples
=
main_reader
.
num_examples
for
inst
in
instances
:
for
inst
in
instances
:
max_train_steps
=
int
(
main_conf
[
'num_epochs'
]
*
inst
.
mix_ratio
*
num_examples
)
//
main_conf
[
'batch_size'
]
//
dev_count
max_train_steps
=
int
(
main_conf
[
'num_epochs'
]
*
inst
.
mix_ratio
*
(
num_examples
//
main_conf
[
'batch_size'
]
//
dev_count
))
if
inst
.
is_target
:
if
inst
.
is_target
:
print
(
'{}: expected train steps {}.'
.
format
(
inst
.
name
,
max_train_steps
))
print
(
'{}: expected train steps {}.'
.
format
(
inst
.
name
,
max_train_steps
))
inst
.
steps_pur_epoch
=
inst
.
reader
[
'train'
].
num_examples
//
main_conf
[
'batch_size'
]
//
dev_count
inst
.
steps_pur_epoch
=
inst
.
reader
[
'train'
].
num_examples
//
main_conf
[
'batch_size'
]
//
dev_count
inst
.
expected_train_steps
=
max_train_steps
inst
.
expected_train_steps
=
max_train_steps
global_max_train_steps
=
int
(
main_conf
[
'num_epochs'
]
*
num_examples
*
sum
(
mrs
))
//
main_conf
[
'batch_size'
]
//
dev_count
global_max_train_steps
=
int
(
main_conf
[
'num_epochs'
]
*
sum
(
mrs
)
*
(
num_examples
//
main_conf
[
'batch_size'
]
//
dev_count
))
print
(
'Estimated overall train steps {}.'
.
format
(
global_max_train_steps
))
print
(
'Estimated overall train steps {}.'
.
format
(
global_max_train_steps
))
if
'warmup_proportion'
in
main_conf
and
main_conf
[
'warmup_proportion'
]
>
0
:
if
'warmup_proportion'
in
main_conf
and
main_conf
[
'warmup_proportion'
]
>
0
:
...
...
paddlepalm/optimizer/adam.py
浏览文件 @
2f2e84b6
...
@@ -90,11 +90,6 @@ def optimize(loss, config, max_train_steps=None, warmup_steps=0, train_program=N
...
@@ -90,11 +90,6 @@ def optimize(loss, config, max_train_steps=None, warmup_steps=0, train_program=N
_
,
param_grads
=
optimizer
.
minimize
(
loss
)
_
,
param_grads
=
optimizer
.
minimize
(
loss
)
for
block
in
fluid
.
default_main_program
().
blocks
:
for
var_name
in
block
.
vars
:
if
var_name
.
startswith
(
"embedding"
):
print
(
block
.
vars
[
var_name
])
if
config
.
get
(
'weight_decay'
,
0
)
>
0
:
if
config
.
get
(
'weight_decay'
,
0
)
>
0
:
for
param
,
grad
in
param_grads
:
for
param
,
grad
in
param_grads
:
...
...
paddlepalm/utils/saver.py
浏览文件 @
2f2e84b6
...
@@ -19,6 +19,8 @@ import os
...
@@ -19,6 +19,8 @@ import os
import
six
import
six
import
ast
import
ast
import
copy
import
copy
import
tarfile
import
shutil
import
numpy
as
np
import
numpy
as
np
import
paddle.fluid
as
fluid
import
paddle.fluid
as
fluid
...
@@ -48,18 +50,30 @@ def init_pretraining_params(exe,
...
@@ -48,18 +50,30 @@ def init_pretraining_params(exe,
assert
os
.
path
.
exists
(
pretraining_params_path
assert
os
.
path
.
exists
(
pretraining_params_path
),
"[%s] cann't be found."
%
pretraining_params_path
),
"[%s] cann't be found."
%
pretraining_params_path
assert
os
.
path
.
exists
(
os
.
path
.
join
(
pretraining_params_path
,
'__palmmodel__'
)),
"__palmmodel__ not found."
print
(
"Loading pretraining parameters from {}..."
.
format
(
pretraining_params_path
))
with
tarfile
.
open
(
os
.
path
.
join
(
pretraining_params_path
,
'__palmmodel__'
),
'r:'
)
as
f
:
f
.
extractall
(
os
.
path
.
join
(
pretraining_params_path
,
'.temp'
))
pretraining_params_path
=
os
.
path
.
join
(
pretraining_params_path
,
'.temp'
)
def
existed_params
(
var
):
def
existed_params
(
var
):
if
not
isinstance
(
var
,
fluid
.
framework
.
Parameter
):
if
not
isinstance
(
var
,
fluid
.
framework
.
Parameter
):
return
False
return
False
if
not
os
.
path
.
exists
(
os
.
path
.
join
(
pretraining_params_path
,
var
.
name
)):
print
(
'Warning: {} not found in {}.'
.
format
(
var
.
name
,
pretraining_params_path
))
return
os
.
path
.
exists
(
os
.
path
.
join
(
pretraining_params_path
,
var
.
name
))
return
os
.
path
.
exists
(
os
.
path
.
join
(
pretraining_params_path
,
var
.
name
))
print
(
"Load pretraining parameters from {}...
\n
"
.
format
(
pretraining_params_path
))
fluid
.
io
.
load_vars
(
fluid
.
io
.
load_vars
(
exe
,
exe
,
pretraining_params_path
,
pretraining_params_path
,
main_program
=
main_program
,
main_program
=
main_program
,
predicate
=
existed_params
)
predicate
=
existed_params
)
shutil
.
rmtree
(
pretraining_params_path
)
print
(
''
)
run_demo1.sh
浏览文件 @
2f2e84b6
export
CUDA_VISIBLE_DEVICES
=
0
export
CUDA_VISIBLE_DEVICES
=
0
,1,2,3,4,5,6,7
python demo1.py
python demo1.py
run_demo2.sh
浏览文件 @
2f2e84b6
export
CUDA_VISIBLE_DEVICES
=
0
,1,2,3,4,5,6,7
export
CUDA_VISIBLE_DEVICES
=
0
python demo2.py
python demo2.py
script/convert_params.sh
浏览文件 @
2f2e84b6
...
@@ -5,13 +5,32 @@ if [[ $# != 1 ]]; then
...
@@ -5,13 +5,32 @@ if [[ $# != 1 ]]; then
exit
1
exit
1
fi
fi
if
[[
-f
$1
/__palminfo__
]]
;
then
echo
"already converted."
exit
0
fi
echo
"converting..."
echo
"converting..."
cd
$1
if
[[
-d
$1
/params
]]
;
then
cd
$1
/params
else
cd
$1
fi
mkdir
.palm.backup
mkdir
.palm.backup
for
file
in
$(
ls
*
)
for
file
in
$(
ls
*
)
do
cp
$file
"backbone-"
$file
;
mv
$file
.palm.backup
do
cp
$file
.palm.backup
;
mv
$file
"__paddlepalm_"
$file
done
done
tar
-cf
__rawmodel__ .palm.backup/
*
rm
.palm.backup/
*
mv
__rawmodel__ .palm.backup
# find . ! -name '__rawmodel__' -exec rm {} +
tar
-cf
__palmmodel__ __paddlepalm_
*
touch
__palminfo__
ls
__paddlepalm_
*
>
__palminfo__
rm
__paddlepalm_
*
cd
-
>
/dev/null
cd
-
>
/dev/null
echo
"done!"
echo
"done!"
...
...
script/recover_params.sh
浏览文件 @
2f2e84b6
...
@@ -5,7 +5,29 @@ if [[ $# != 1 ]]; then
...
@@ -5,7 +5,29 @@ if [[ $# != 1 ]]; then
exit
1
exit
1
fi
fi
rm
$1
/backbone-
*
if
[[
!
-d
$1
]]
;
then
mv
$1
/.palm.backup/
*
$1
echo
"
$1
not found."
rm
-rf
$1
/.palm.backup
exit
1
fi
if
[[
!
-f
$1
/__palmmodel__
]]
;
then
echo
"paddlepalm model not found."
exit
1
fi
echo
"recovering..."
if
[[
-d
$1
/params
]]
;
then
cd
$1
/params
else
cd
$1
fi
rm
__palm
*
mv
.palm.backup/__rawmodel__
.
rm
-rf
.palm.backup
tar
-xf
__rawmodel__
mv
.palm.backup/
*
.
rm
__rawmodel__
rm
-rf
.palm.backup
cd
-
>
/dev/null
编辑
预览
Markdown
is supported
0%
请重试
或
添加新附件
.
添加附件
取消
You are about to add
0
people
to the discussion. Proceed with caution.
先完成此消息的编辑!
取消
想要评论请
注册
或
登录