Skip to content
体验新版
项目
组织
正在加载...
登录
切换导航
打开侧边栏
PaddlePaddle
PaddleHub
提交
7b96067a
P
PaddleHub
项目概览
PaddlePaddle
/
PaddleHub
接近 2 年 前同步成功
通知
284
Star
12117
Fork
2091
代码
文件
提交
分支
Tags
贡献者
分支图
Diff
Issue
200
列表
看板
标记
里程碑
合并请求
4
Wiki
0
Wiki
分析
仓库
DevOps
项目成员
Pages
P
PaddleHub
项目概览
项目概览
详情
发布
仓库
仓库
文件
提交
分支
标签
贡献者
分支图
比较
Issue
200
Issue
200
列表
看板
标记
里程碑
合并请求
4
合并请求
4
Pages
分析
分析
仓库分析
DevOps
Wiki
0
Wiki
成员
成员
收起侧边栏
关闭侧边栏
动态
分支图
创建新Issue
提交
Issue看板
提交
7b96067a
编写于
4月 02, 2019
作者:
Z
Zeyu Chen
浏览文件
操作
浏览文件
下载
电子邮件补丁
差异文件
add Strategy
上级
dc0b2847
变更
6
隐藏空白更改
内联
并排
Showing
6 changed file
with
37 addition
and
92 deletion
+37
-92
demo/bert-cls/finetune_with_hub.py
demo/bert-cls/finetune_with_hub.py
+2
-3
paddle_hub/__init__.py
paddle_hub/__init__.py
+3
-0
paddle_hub/finetune/config.py
paddle_hub/finetune/config.py
+8
-7
paddle_hub/finetune/finetune.py
paddle_hub/finetune/finetune.py
+9
-6
paddle_hub/finetune/optimization.py
paddle_hub/finetune/optimization.py
+11
-72
paddle_hub/module/module.py
paddle_hub/module/module.py
+4
-4
未找到文件。
demo/bert-cls/finetune_with_hub.py
浏览文件 @
7b96067a
...
@@ -43,6 +43,7 @@ args = parser.parse_args()
...
@@ -43,6 +43,7 @@ args = parser.parse_args()
# yapf: enable.
# yapf: enable.
if
__name__
==
'__main__'
:
if
__name__
==
'__main__'
:
strategy
=
hub
.
BERTFinetuneStrategy
(
weight_decay
=
args
.
weight_decay
)
config
=
hub
.
FinetuneConfig
(
config
=
hub
.
FinetuneConfig
(
log_interval
=
10
,
log_interval
=
10
,
eval_interval
=
100
,
eval_interval
=
100
,
...
@@ -51,9 +52,7 @@ if __name__ == '__main__':
...
@@ -51,9 +52,7 @@ if __name__ == '__main__':
learning_rate
=
args
.
learning_rate
,
learning_rate
=
args
.
learning_rate
,
num_epoch
=
args
.
num_epoch
,
num_epoch
=
args
.
num_epoch
,
batch_size
=
args
.
batch_size
,
batch_size
=
args
.
batch_size
,
max_seq_len
=
args
.
max_seq_len
,
strategy
=
strategy
)
weight_decay
=
args
.
weight_decay
,
finetune_strategy
=
"bert_finetune"
)
# loading Paddlehub BERT
# loading Paddlehub BERT
module
=
hub
.
Module
(
module_dir
=
args
.
hub_module_dir
)
module
=
hub
.
Module
(
module_dir
=
args
.
hub_module_dir
)
...
...
paddle_hub/__init__.py
浏览文件 @
7b96067a
...
@@ -15,6 +15,7 @@ from . import module
...
@@ -15,6 +15,7 @@ from . import module
from
.
import
common
from
.
import
common
from
.
import
io
from
.
import
io
from
.
import
dataset
from
.
import
dataset
from
.
import
finetune
from
.common.dir
import
USER_HOME
from
.common.dir
import
USER_HOME
from
.common.dir
import
HUB_HOME
from
.common.dir
import
HUB_HOME
...
@@ -35,6 +36,8 @@ from .finetune.network import append_mlp_classifier
...
@@ -35,6 +36,8 @@ from .finetune.network import append_mlp_classifier
from
.finetune.finetune
import
finetune_and_eval
from
.finetune.finetune
import
finetune_and_eval
from
.finetune.config
import
FinetuneConfig
from
.finetune.config
import
FinetuneConfig
from
.finetune.task
import
Task
from
.finetune.task
import
Task
from
.finetune.strategy
import
BERTFinetuneStrategy
from
.finetune.strategy
import
DefaultStrategy
from
.reader
import
BERTTokenizeReader
from
.reader
import
BERTTokenizeReader
from
.reader.cv_reader
import
ImageClassificationReader
from
.reader.cv_reader
import
ImageClassificationReader
paddle_hub/finetune/config.py
浏览文件 @
7b96067a
...
@@ -12,7 +12,7 @@
...
@@ -12,7 +12,7 @@
# See the License for the specific language governing permissions and
# See the License for the specific language governing permissions and
# limitations under the License.
# limitations under the License.
import
collections
from
.strategy
import
DefaultStrategy
class
FinetuneConfig
(
object
):
class
FinetuneConfig
(
object
):
...
@@ -30,8 +30,8 @@ class FinetuneConfig(object):
...
@@ -30,8 +30,8 @@ class FinetuneConfig(object):
max_seq_len
=
128
,
max_seq_len
=
128
,
weight_decay
=
None
,
weight_decay
=
None
,
warmup_proportion
=
0.0
,
warmup_proportion
=
0.0
,
finetune_strategy
=
None
,
enable_memory_optim
=
True
,
enable_memory_optim
=
True
,
strategy
=
None
,
optimizer
=
"adam"
):
optimizer
=
"adam"
):
""" Construct finetune Config """
""" Construct finetune Config """
self
.
_log_interval
=
log_interval
self
.
_log_interval
=
log_interval
...
@@ -43,9 +43,10 @@ class FinetuneConfig(object):
...
@@ -43,9 +43,10 @@ class FinetuneConfig(object):
self
.
_num_epoch
=
num_epoch
self
.
_num_epoch
=
num_epoch
self
.
_batch_size
=
batch_size
self
.
_batch_size
=
batch_size
self
.
_max_seq_len
=
max_seq_len
self
.
_max_seq_len
=
max_seq_len
self
.
_weight_decay
=
weight_decay
if
strategy
is
None
:
self
.
_warmup_proportion
=
warmup_proportion
self
.
_strategy
=
DefaultStrategy
()
self
.
_finetune_strategy
=
finetune_strategy
else
:
self
.
_strategy
=
strategy
self
.
_enable_memory_optim
=
enable_memory_optim
self
.
_enable_memory_optim
=
enable_memory_optim
self
.
_optimizer
=
optimizer
self
.
_optimizer
=
optimizer
...
@@ -94,8 +95,8 @@ class FinetuneConfig(object):
...
@@ -94,8 +95,8 @@ class FinetuneConfig(object):
return
self
.
_warmup_proportion
return
self
.
_warmup_proportion
@
property
@
property
def
finetune_
strategy
(
self
):
def
strategy
(
self
):
return
self
.
_
finetune_
strategy
return
self
.
_strategy
@
property
@
property
def
enable_memory_optim
(
self
):
def
enable_memory_optim
(
self
):
...
...
paddle_hub/finetune/finetune.py
浏览文件 @
7b96067a
...
@@ -18,13 +18,16 @@ from __future__ import print_function
...
@@ -18,13 +18,16 @@ from __future__ import print_function
import
os
import
os
import
time
import
time
import
multiprocessing
import
paddle
import
paddle
import
paddle.fluid
as
fluid
import
paddle.fluid
as
fluid
import
paddle_hub
as
hub
from
visualdl
import
LogWriter
from
visualdl
import
LogWriter
from
paddle_hub.common.logger
import
logger
from
paddle_hub.common.logger
import
logger
from
paddle_hub.finetune.optimization
import
bert_finetune
from
paddle_hub.finetune.optimization
import
bert_finetune
from
paddle_hub.finetune.strategy
import
BERTFinetuneStrategy
,
DefaultStrategy
from
paddle_hub.finetune.checkpoint
import
load_checkpoint
,
save_checkpoint
from
paddle_hub.finetune.checkpoint
import
load_checkpoint
,
save_checkpoint
...
@@ -76,12 +79,12 @@ def _finetune_model(task, data_reader, feed_list, config=None, do_eval=False):
...
@@ -76,12 +79,12 @@ def _finetune_model(task, data_reader, feed_list, config=None, do_eval=False):
exe
=
fluid
.
Executor
(
place
=
place
)
exe
=
fluid
.
Executor
(
place
=
place
)
data_feeder
=
fluid
.
DataFeeder
(
feed_list
=
feed_list
,
place
=
place
)
data_feeder
=
fluid
.
DataFeeder
(
feed_list
=
feed_list
,
place
=
place
)
if
config
.
finetune_strategy
==
"bert_finetune"
:
# select strategy
scheduled_lr
=
bert_finetune
(
task
,
main_program
,
data_reader
,
if
isinstance
(
config
.
strategy
,
hub
.
BERTFinetuneStrategy
):
config
,
dev_count
)
scheduled_lr
=
config
.
strategy
.
execute
(
loss
,
main_program
,
elif
config
.
optimizer
==
"adam"
:
data_reader
,
config
)
optimizer
=
fluid
.
optimizer
.
Adam
(
learning_rate
=
config
.
learning_rate
)
elif
isinstance
(
config
.
optimizer
,
hub
.
DefaultStrategy
):
optimizer
.
minimiz
e
(
loss
)
config
.
strategy
.
execut
e
(
loss
)
#TODO: add more finetune strategy
#TODO: add more finetune strategy
_do_memory_optimization
(
task
,
config
)
_do_memory_optimization
(
task
,
config
)
...
...
paddle_hub/finetune/optimization.py
浏览文件 @
7b96067a
...
@@ -19,12 +19,9 @@ from __future__ import print_function
...
@@ -19,12 +19,9 @@ from __future__ import print_function
import
numpy
as
np
import
numpy
as
np
import
paddle.fluid
as
fluid
import
paddle.fluid
as
fluid
"""
Finetune optimization strategy
"""
def
bert_finetune
(
task
,
tr
ain_program
,
data_processor
,
config
,
dev_count
):
def
bert_finetune
(
task
,
m
ain_program
,
data_processor
,
config
,
dev_count
):
# calculate wamrup step
# calculate wamrup step
num_train_examples
=
data_processor
.
get_num_examples
(
phase
=
'train'
)
num_train_examples
=
data_processor
.
get_num_examples
(
phase
=
'train'
)
max_train_steps
=
config
.
num_epoch
*
num_train_examples
//
config
.
batch_size
//
dev_count
max_train_steps
=
config
.
num_epoch
*
num_train_examples
//
config
.
batch_size
//
dev_count
...
@@ -32,20 +29,19 @@ def bert_finetune(task, train_program, data_processor, config, dev_count):
...
@@ -32,20 +29,19 @@ def bert_finetune(task, train_program, data_processor, config, dev_count):
loss
=
task
.
variable
(
"loss"
)
loss
=
task
.
variable
(
"loss"
)
scheduled_lr
=
adam_weight_decay_optimizer_with_linear_warmup
(
scheduled_lr
=
adam_weight_decay_optimizer_with_linear_warmup
(
loss
,
warmup_steps
,
max_train_steps
,
config
.
learning_rate
,
loss
,
warmup_steps
,
max_train_steps
,
config
.
learning_rate
,
main_program
,
train_program
,
config
.
weight_decay
)
config
.
weight_decay
)
return
scheduled_lr
return
scheduled_lr
def
adam_weight_decay_optimizer_with_noam_decay
(
def
adam_weight_decay_optimization
(
loss
,
loss
,
warmup_steps
,
warmup_steps
,
num_train_steps
,
num_train_steps
,
learning_rate
,
learning_rate
,
main_program
,
train_program
,
weight_decay
,
weight_decay
,
scheduler
=
'linear_warmup_decay'
):
scheduler
=
'linear_warmup_decay'
):
if
warmup_steps
>
0
:
if
warmup_steps
>
0
:
if
scheduler
==
'noam_decay'
:
if
scheduler
==
'noam_decay'
:
scheduled_lr
=
fluid
.
layers
.
learning_rate_scheduler
\
scheduled_lr
=
fluid
.
layers
.
learning_rate_scheduler
\
...
@@ -77,64 +73,7 @@ def adam_weight_decay_optimizer_with_noam_decay(
...
@@ -77,64 +73,7 @@ def adam_weight_decay_optimizer_with_noam_decay(
param_list
=
dict
()
param_list
=
dict
()
for
param
in
train_program
.
global_block
().
all_parameters
():
for
param
in
main_program
.
global_block
().
all_parameters
():
param_list
[
param
.
name
]
=
param
*
1.0
param_list
[
param
.
name
].
stop_gradient
=
True
_
,
param_grads
=
optimizer
.
minimize
(
loss
)
if
weight_decay
>
0
:
for
param
,
grad
in
param_grads
:
if
exclude_from_weight_decay
(
param
.
name
):
continue
with
param
.
block
.
program
.
_optimized_guard
(
[
param
,
grad
]),
fluid
.
framework
.
name_scope
(
"weight_decay"
):
updated_param
=
param
-
param_list
[
param
.
name
]
*
weight_decay
*
scheduled_lr
fluid
.
layers
.
assign
(
output
=
param
,
input
=
updated_param
)
return
scheduled_lr
def
adam_weight_decay_optimizer_with_linear_warmup
(
loss
,
warmup_steps
,
num_train_steps
,
learning_rate
,
train_program
,
weight_decay
,
scheduler
=
'noam_decay'
):
if
warmup_steps
>
0
:
if
scheduler
==
'noam_decay'
:
scheduled_lr
=
fluid
.
layers
.
learning_rate_scheduler
\
.
noam_decay
(
1
/
(
warmup_steps
*
(
learning_rate
**
2
)),
warmup_steps
)
elif
scheduler
==
'linear_warmup_decay'
:
scheduled_lr
=
linear_warmup_decay
(
learning_rate
,
warmup_steps
,
num_train_steps
)
else
:
raise
ValueError
(
"Unkown learning rate scheduler, should be "
"'noam_decay' or 'linear_warmup_decay'"
)
optimizer
=
fluid
.
optimizer
.
Adam
(
learning_rate
=
scheduled_lr
)
else
:
optimizer
=
fluid
.
optimizer
.
Adam
(
learning_rate
=
learning_rate
)
scheduled_lr
=
learning_rate
clip_norm_thres
=
1.0
fluid
.
clip
.
set_gradient_clip
(
clip
=
fluid
.
clip
.
GradientClipByGlobalNorm
(
clip_norm
=
clip_norm_thres
))
def
exclude_from_weight_decay
(
name
):
if
name
.
find
(
"layer_norm"
)
>
-
1
:
return
True
bias_suffix
=
[
"_bias"
,
"_b"
,
".b_0"
]
for
suffix
in
bias_suffix
:
if
name
.
endswith
(
suffix
):
return
True
return
False
param_list
=
dict
()
for
param
in
train_program
.
global_block
().
all_parameters
():
param_list
[
param
.
name
]
=
param
*
1.0
param_list
[
param
.
name
]
=
param
*
1.0
param_list
[
param
.
name
].
stop_gradient
=
True
param_list
[
param
.
name
].
stop_gradient
=
True
...
...
paddle_hub/module/module.py
浏览文件 @
7b96067a
...
@@ -191,7 +191,7 @@ class Module(object):
...
@@ -191,7 +191,7 @@ class Module(object):
def
_init_with_module_file
(
self
,
module_dir
):
def
_init_with_module_file
(
self
,
module_dir
):
checker
=
ModuleChecker
(
module_dir
)
checker
=
ModuleChecker
(
module_dir
)
if
not
checker
.
check
():
if
not
checker
.
check
():
logger
.
error
(
"
module check fail"
)
logger
.
error
(
"
Module init failed on {}"
.
format
(
module_dir
)
)
exit
(
1
)
exit
(
1
)
self
.
helper
=
ModuleHelper
(
module_dir
)
self
.
helper
=
ModuleHelper
(
module_dir
)
...
@@ -205,7 +205,7 @@ class Module(object):
...
@@ -205,7 +205,7 @@ class Module(object):
self
.
_load_assets
()
self
.
_load_assets
()
self
.
_recover_from_desc
()
self
.
_recover_from_desc
()
self
.
_generate_sign_attr
()
self
.
_generate_sign_attr
()
self
.
_re
covery
_parameter
(
self
.
program
)
self
.
_re
store
_parameter
(
self
.
program
)
self
.
_recover_variable_info
(
self
.
program
)
self
.
_recover_variable_info
(
self
.
program
)
def
_init_with_signature
(
self
,
signatures
):
def
_init_with_signature
(
self
,
signatures
):
...
@@ -228,7 +228,7 @@ class Module(object):
...
@@ -228,7 +228,7 @@ class Module(object):
self
.
default_signature
=
sign
self
.
default_signature
=
sign
self
.
signatures
[
sign
.
name
]
=
sign
self
.
signatures
[
sign
.
name
]
=
sign
def
_re
covery
_parameter
(
self
,
program
):
def
_re
store
_parameter
(
self
,
program
):
global_block
=
program
.
global_block
()
global_block
=
program
.
global_block
()
param_attrs
=
self
.
desc
.
extra_info
.
map
.
data
[
'param_attrs'
]
param_attrs
=
self
.
desc
.
extra_info
.
map
.
data
[
'param_attrs'
]
for
key
,
param_attr
in
param_attrs
.
map
.
data
.
items
():
for
key
,
param_attr
in
param_attrs
.
map
.
data
.
items
():
...
@@ -477,7 +477,7 @@ class Module(object):
...
@@ -477,7 +477,7 @@ class Module(object):
if
regularizer
!=
"Default"
:
if
regularizer
!=
"Default"
:
paddle_helper
.
set_parameter_regularizer
(
program
,
regularizer
)
paddle_helper
.
set_parameter_regularizer
(
program
,
regularizer
)
self
.
_re
covery
_parameter
(
program
)
self
.
_re
store
_parameter
(
program
)
self
.
_recover_variable_info
(
program
)
self
.
_recover_variable_info
(
program
)
...
...
编辑
预览
Markdown
is supported
0%
请重试
或
添加新附件
.
添加附件
取消
You are about to add
0
people
to the discussion. Proceed with caution.
先完成此消息的编辑!
取消
想要评论请
注册
或
登录