Skip to content
体验新版
项目
组织
正在加载...
登录
切换导航
打开侧边栏
Edaker
PaddleHub
提交
fc72dc60
P
PaddleHub
项目概览
Edaker
/
PaddleHub
与 Fork 源项目一致
Fork自
PaddlePaddle / PaddleHub
通知
4
Star
0
Fork
0
代码
文件
提交
分支
Tags
贡献者
分支图
Diff
Issue
0
列表
看板
标记
里程碑
合并请求
0
Wiki
0
Wiki
分析
仓库
DevOps
项目成员
Pages
P
PaddleHub
项目概览
项目概览
详情
发布
仓库
仓库
文件
提交
分支
标签
贡献者
分支图
比较
Issue
0
Issue
0
列表
看板
标记
里程碑
合并请求
0
合并请求
0
Pages
分析
分析
仓库分析
DevOps
Wiki
0
Wiki
成员
成员
收起侧边栏
关闭侧边栏
动态
分支图
创建新Issue
提交
Issue看板
前往新版Gitcode,体验更适合开发者的 AI 搜索 >>
提交
fc72dc60
编写于
3月 29, 2019
作者:
Z
Zeyu Chen
浏览文件
操作
浏览文件
下载
电子邮件补丁
差异文件
migrate bert to latest interface
上级
cf4d67dd
变更
7
隐藏空白更改
内联
并排
Showing
7 changed file
with
125 addition
and
81 deletion
+125
-81
demo/bert-cls/finetune_with_hub.py
demo/bert-cls/finetune_with_hub.py
+9
-2
demo/bert-cls/run_fintune_with_hub.sh
demo/bert-cls/run_fintune_with_hub.sh
+1
-1
paddle_hub/finetune/config.py
paddle_hub/finetune/config.py
+4
-2
paddle_hub/finetune/finetune.py
paddle_hub/finetune/finetune.py
+29
-37
paddle_hub/finetune/network.py
paddle_hub/finetune/network.py
+0
-1
paddle_hub/finetune/optimization.py
paddle_hub/finetune/optimization.py
+82
-35
paddle_hub/finetune/task.py
paddle_hub/finetune/task.py
+0
-3
未找到文件。
demo/bert-cls/finetune_with_hub.py
浏览文件 @
fc72dc60
...
...
@@ -83,7 +83,10 @@ if __name__ == '__main__':
batch_size
=
args
.
batch_size
,
max_seq_len
=
args
.
max_seq_len
,
weight_decay
=
args
.
weight_decay
,
in_tokens
=
args
.
in_tokens
,
finetune_strategy
=
"bert_finetune"
,
with_memory_optimization
=
True
,
in_tokens
=
True
,
optimizer
=
None
,
warmup_proportion
=
args
.
warmup_proportion
)
processor
=
reader
.
ChnsenticorpProcessor
(
...
...
@@ -123,4 +126,8 @@ if __name__ == '__main__':
# Finetune and evaluate by PaddleHub's API
# will finish training, evaluation, testing, save model automatically
hub
.
finetune_and_eval
(
cls_task
,
feed_list
,
processor
,
config
)
hub
.
finetune_and_eval
(
task
=
cls_task
,
data_processor
=
processor
,
feed_list
=
feed_list
,
config
=
config
)
demo/bert-cls/run_fintune_with_hub.sh
浏览文件 @
fc72dc60
export
CUDA_VISIBLE_DEVICES
=
6
export
CUDA_VISIBLE_DEVICES
=
2
BERT_BASE_PATH
=
"chinese_L-12_H-768_A-12"
TASK_NAME
=
'chnsenticorp'
...
...
paddle_hub/finetune/config.py
浏览文件 @
fc72dc60
...
...
@@ -30,6 +30,8 @@ FinetuneConfig = collections.namedtuple(
'weight_decay'
,
# for bert
'warmup_proportion'
,
# for bert
'in_tokens'
,
# for bert
'strategy'
,
'with_memory_optimization'
'finetune_strategy'
,
'with_memory_optimization'
,
# learning rate scheduler
'optimizer'
])
paddle_hub/finetune/finetune.py
浏览文件 @
fc72dc60
...
...
@@ -23,17 +23,7 @@ import paddle
import
paddle.fluid
as
fluid
from
paddle_hub.tools.logger
import
logger
def
optimizer_config_for_strategy
(
strategy
,
parameters
,
data_processor
,
dev_count
):
# basic configuration
learning_rate
=
1e-4
optimizer
=
fluid
.
optimizer
.
Adam
(
learning_rate
)
regularizer
=
fluid
.
regularizer
.
L2DecayRegularizer
(
regularization_coeff
=
1e-4
)
return
optimizer
from
paddle_hub.finetune.optimization
import
bert_finetune
def
_finetune_model
(
task
,
...
...
@@ -51,12 +41,10 @@ def _finetune_model(task,
learning_rate
=
config
.
learning_rate
use_cuda
=
config
.
use_cuda
batch_size
=
config
.
batch_size
strategy
=
config
.
strategy
with_memory_optimization
=
config
.
with_memory_optimization
checkpoint_dir
=
config
.
checkpoint_dir
with
fluid
.
program_guard
(
main_program
,
startup_program
):
if
use_cuda
:
place
=
fluid
.
CUDAPlace
(
0
)
dev_count
=
fluid
.
core
.
get_cuda_device_count
()
...
...
@@ -65,17 +53,20 @@ def _finetune_model(task,
dev_count
=
int
(
os
.
environ
.
get
(
'CPU_NUM'
,
multiprocessing
.
cpu_count
()))
optimizer
=
optimizer_config_for_strategy
(
strategy
=
strategy
,
parameters
=
None
,
data_processor
=
data_processor
,
dev_count
=
dev_count
)
data_feeder
=
fluid
.
DataFeeder
(
feed_list
=
feed_list
,
place
=
place
)
exe
=
fluid
.
Executor
(
place
=
place
)
optimizer
.
minimize
(
loss
)
data_feeder
=
fluid
.
DataFeeder
(
feed_list
=
feed_list
,
place
=
place
)
if
config
.
finetune_strategy
==
"bert_finetune"
:
scheduled_lr
=
bert_finetune
(
task
,
main_program
,
data_processor
,
config
,
dev_count
)
elif
config
.
optimizer
==
"adam"
:
optimzier
=
fluid
.
optimizer
.
Adam
(
learning_rate
=
config
.
learning_rate
)
optimizer
.
minimize
(
loss
)
#TODO: add more finetune strategy
if
with_memory_optimization
:
logger
.
info
(
"Memory optimize start"
)
logger
.
info
(
"Memory optimization start..."
)
optimize_time_begin
=
time
.
time
()
fluid
.
memory_optimize
(
input_program
=
fluid
.
default_main_program
(),
skip_opt_set
=
[
...
...
@@ -83,7 +74,9 @@ def _finetune_model(task,
loss
.
name
,
accuracy
.
name
])
logger
.
info
(
"Memory optimize end"
)
time_used
=
time
.
time
()
-
optimize_time_begin
logger
.
info
(
"Memory optimization done! Time elapsed %f sec"
%
time_used
)
# initilize all parameters
exe
.
run
(
fluid
.
default_startup_program
())
...
...
@@ -91,13 +84,12 @@ def _finetune_model(task,
logger
.
info
(
"Finetune start"
)
train_time_begin
=
time
.
time
()
for
index
in
range
(
epoch
):
train_reader
=
paddle
.
batch
(
data_processor
.
data_generator
(
phase
=
'train'
),
batch_size
=
batch_size
)
train_reader
=
data_processor
.
data_generator
(
batch_size
=
batch_size
,
phase
=
'train'
)
size
=
accuracy_sum
=
loss_sum
=
0
for
batch
in
train_reader
():
loss_v
,
accuracy_v
=
exe
.
run
(
feed
=
data_feeder
.
feed
(
batch
),
feed
=
data_feeder
.
feed
(
[
batch
]
),
fetch_list
=
[
loss
.
name
,
accuracy
.
name
])
step
+=
1
size
+=
len
(
batch
)
...
...
@@ -106,16 +98,16 @@ def _finetune_model(task,
if
step
%
config
.
log_interval
==
0
:
train_time_used
=
time
.
time
()
-
train_time_begin
perf
=
train_time_used
/
config
.
log_interval
speed
=
config
.
log_interval
/
train_time_used
train_time_begin
=
time
.
time
()
logger
.
info
(
"step %d: loss=%.5f acc=%.5f [step/sec: %.2f]"
%
(
step
,
loss_sum
/
size
,
accuracy_sum
/
size
,
perf
))
(
step
,
loss_sum
/
size
,
accuracy_sum
/
size
,
speed
))
size
=
accuracy_sum
=
loss_sum
=
0
if
step
%
config
.
save_ckpt_interval
==
0
:
model_save_dir
=
os
.
path
.
join
(
checkpoint_dir
,
"model_parameters_in_step
%d"
%
step
)
model_save_dir
=
os
.
path
.
join
(
checkpoint_dir
,
"step_
%d"
%
step
)
fluid
.
io
.
save_persistables
(
exe
,
dirname
=
model_save_dir
)
if
eval_model
and
step
%
config
.
eval_interval
==
0
:
...
...
@@ -123,7 +115,7 @@ def _finetune_model(task,
# eval before end
if
eval_model
:
eval
(
task
,
data_processor
,
feed_list
,
config
)
logger
.
info
(
"Finetune
en
d"
)
logger
.
info
(
"Finetune
finishe
d"
)
def
save_model_and_checkpoint
(
task
,
save_dir
):
...
...
@@ -150,22 +142,22 @@ def eval(task, data_processor, feed_list, config=None):
accuracy
=
task
.
variable
(
"accuracy"
)
use_cuda
=
config
.
use_cuda
batch_size
=
config
.
batch_size
logger
.
info
(
"[Evaluation] start"
)
with
fluid
.
program_guard
(
inference_program
):
place
=
fluid
.
CUDAPlace
(
0
)
if
use_cuda
else
fluid
.
CPUPlace
()
data_feeder
=
fluid
.
DataFeeder
(
feed_list
=
feed_list
,
place
=
place
)
exe
=
fluid
.
Executor
(
place
=
place
)
size
=
accuracy_sum
=
loss_sum
=
0
test_reader
=
paddle
.
batch
(
data_processor
.
data_generator
(
phase
=
'test'
),
batch_size
=
batch_size
)
test_reader
=
data_processor
.
data_generator
(
batch_size
=
batch_size
,
phase
=
'test'
)
eval_time_begin
=
time
.
time
()
for
index
,
batch
in
enumerate
(
test_reader
()):
loss_v
,
accuracy_v
,
=
exe
.
run
(
feed
=
data_feeder
.
feed
(
batch
),
fetch_list
=
[
loss
,
accuracy
.
name
])
feed
=
data_feeder
.
feed
([
batch
]),
fetch_list
=
[
loss
,
accuracy
.
name
])
size
+=
len
(
batch
)
accuracy_sum
+=
accuracy_v
*
len
(
batch
)
loss_sum
+=
loss_v
*
len
(
batch
)
eval_time_used
=
time
.
time
()
-
eval_time_begin
perf
=
eval_time_used
/
index
eval_speed
=
index
/
eval_time_used
logger
.
info
(
"[Evaluation] loss=%.5f acc=%.5f [step/sec: %.2f]"
%
(
loss_sum
/
size
,
accuracy_sum
/
size
,
perf
))
(
loss_sum
/
size
,
accuracy_sum
/
size
,
eval_speed
))
paddle_hub/finetune/network.py
浏览文件 @
fc72dc60
...
...
@@ -19,7 +19,6 @@ import time
import
numpy
as
np
import
multiprocessing
from
paddle_hub.finetune.optimization
import
bert_optimization
from
.task
import
Task
__all__
=
[
'append_mlp_classifier'
]
...
...
paddle_hub/finetune/optimization.py
浏览文件 @
fc72dc60
...
...
@@ -19,43 +19,90 @@ from __future__ import print_function
import
numpy
as
np
import
paddle.fluid
as
fluid
"""
Finetune optimization strategy
"""
def
linear_warmup_decay
(
learning_rate
,
warmup_steps
,
num_train_steps
):
""" Applies linear warmup of learning rate from 0 and decay to 0."""
with
fluid
.
default_main_program
().
_lr_schedule_guard
():
lr
=
fluid
.
layers
.
tensor
.
create_global_var
(
shape
=
[
1
],
value
=
0.0
,
dtype
=
'float32'
,
persistable
=
True
,
name
=
"scheduled_learning_rate"
)
global_step
=
fluid
.
layers
.
learning_rate_scheduler
.
_decay_step_counter
()
with
fluid
.
layers
.
control_flow
.
Switch
()
as
switch
:
with
switch
.
case
(
global_step
<
warmup_steps
):
warmup_lr
=
learning_rate
*
(
global_step
/
warmup_steps
)
fluid
.
layers
.
tensor
.
assign
(
warmup_lr
,
lr
)
with
switch
.
default
():
decayed_lr
=
fluid
.
layers
.
learning_rate_scheduler
.
polynomial_decay
(
learning_rate
=
learning_rate
,
decay_steps
=
num_train_steps
,
end_learning_rate
=
0.0
,
power
=
1.0
,
cycle
=
False
)
fluid
.
layers
.
tensor
.
assign
(
decayed_lr
,
lr
)
return
lr
def
bert_optimization
(
loss
,
warmup_steps
,
num_train_steps
,
learning_rate
,
train_program
,
weight_decay
,
scheduler
=
'linear_warmup_decay'
):
def
bert_finetune
(
task
,
train_program
,
data_processor
,
config
,
dev_count
):
# calculate wamrup step
num_train_examples
=
data_processor
.
get_num_examples
(
phase
=
'train'
)
max_train_steps
=
config
.
num_epoch
*
num_train_examples
//
config
.
batch_size
//
dev_count
warmup_steps
=
int
(
max_train_steps
*
config
.
warmup_proportion
)
loss
=
task
.
variable
(
"loss"
)
scheduled_lr
=
adam_weight_decay_optimizer_with_linear_warmup
(
loss
,
warmup_steps
,
max_train_steps
,
config
.
learning_rate
,
train_program
,
config
.
weight_decay
)
return
scheduled_lr
def
adam_weight_decay_optimizer_with_noam_decay
(
loss
,
warmup_steps
,
num_train_steps
,
learning_rate
,
train_program
,
weight_decay
,
scheduler
=
'linear_warmup_decay'
):
if
warmup_steps
>
0
:
if
scheduler
==
'noam_decay'
:
scheduled_lr
=
fluid
.
layers
.
learning_rate_scheduler
\
.
noam_decay
(
1
/
(
warmup_steps
*
(
learning_rate
**
2
)),
warmup_steps
)
elif
scheduler
==
'linear_warmup_decay'
:
scheduled_lr
=
linear_warmup_decay
(
learning_rate
,
warmup_steps
,
num_train_steps
)
else
:
raise
ValueError
(
"Unkown learning rate scheduler, should be "
"'noam_decay' or 'linear_warmup_decay'"
)
optimizer
=
fluid
.
optimizer
.
Adam
(
learning_rate
=
scheduled_lr
)
else
:
optimizer
=
fluid
.
optimizer
.
Adam
(
learning_rate
=
learning_rate
)
scheduled_lr
=
learning_rate
clip_norm_thres
=
1.0
fluid
.
clip
.
set_gradient_clip
(
clip
=
fluid
.
clip
.
GradientClipByGlobalNorm
(
clip_norm
=
clip_norm_thres
))
def
exclude_from_weight_decay
(
name
):
if
name
.
find
(
"layer_norm"
)
>
-
1
:
return
True
bias_suffix
=
[
"_bias"
,
"_b"
,
".b_0"
]
for
suffix
in
bias_suffix
:
if
name
.
endswith
(
suffix
):
return
True
return
False
param_list
=
dict
()
for
param
in
train_program
.
global_block
().
all_parameters
():
param_list
[
param
.
name
]
=
param
*
1.0
param_list
[
param
.
name
].
stop_gradient
=
True
_
,
param_grads
=
optimizer
.
minimize
(
loss
)
if
weight_decay
>
0
:
for
param
,
grad
in
param_grads
:
if
exclude_from_weight_decay
(
param
.
name
):
continue
with
param
.
block
.
program
.
_optimized_guard
(
[
param
,
grad
]),
fluid
.
framework
.
name_scope
(
"weight_decay"
):
updated_param
=
param
-
param_list
[
param
.
name
]
*
weight_decay
*
scheduled_lr
fluid
.
layers
.
assign
(
output
=
param
,
input
=
updated_param
)
return
scheduled_lr
def
adam_weight_decay_optimizer_with_linear_warmup
(
loss
,
warmup_steps
,
num_train_steps
,
learning_rate
,
train_program
,
weight_decay
,
scheduler
=
'noam_decay'
):
if
warmup_steps
>
0
:
if
scheduler
==
'noam_decay'
:
scheduled_lr
=
fluid
.
layers
.
learning_rate_scheduler
\
...
...
paddle_hub/finetune/task.py
浏览文件 @
fc72dc60
...
...
@@ -19,9 +19,6 @@ import time
import
numpy
as
np
import
multiprocessing
from
paddle_hub.finetune.optimization
import
bert_optimization
from
paddle_hub.finetune.config
import
FinetuneConfig
class
Task
(
object
):
def
__init__
(
self
,
task_type
,
graph_var_dict
,
main_program
,
...
...
编辑
预览
Markdown
is supported
0%
请重试
或
添加新附件
.
添加附件
取消
You are about to add
0
people
to the discussion. Proceed with caution.
先完成此消息的编辑!
取消
想要评论请
注册
或
登录