提交 b0149dc3 编写于 作者: Z zhangxuefei

Modify the linear_warmup_decay to linear_decay

上级 7bdce56e
...@@ -69,7 +69,7 @@ if __name__ == '__main__': ...@@ -69,7 +69,7 @@ if __name__ == '__main__':
strategy = hub.AdamWeightDecayStrategy( strategy = hub.AdamWeightDecayStrategy(
weight_decay=args.weight_decay, weight_decay=args.weight_decay,
learning_rate=args.learning_rate, learning_rate=args.learning_rate,
lr_scheduler="linear_warmup_decay", lr_scheduler="linear_decay",
) )
# Setup runing config for PaddleHub Finetune API # Setup runing config for PaddleHub Finetune API
......
...@@ -111,7 +111,7 @@ strategy = hub.AdamWeightDecayStrategy( ...@@ -111,7 +111,7 @@ strategy = hub.AdamWeightDecayStrategy(
learning_rate=5e-5, learning_rate=5e-5,
weight_decay=0.01, weight_decay=0.01,
warmup_proportion=0.0, warmup_proportion=0.0,
lr_scheduler="linear_warmup_decay", lr_scheduler="linear_decay",
) )
config = hub.RunConfig(use_cuda=True, num_epoch=3, batch_size=32, strategy=strategy) config = hub.RunConfig(use_cuda=True, num_epoch=3, batch_size=32, strategy=strategy)
...@@ -124,7 +124,7 @@ hub.finetune_and_eval(task=cls_task, data_reader=reader, feed_list=feed_list, co ...@@ -124,7 +124,7 @@ hub.finetune_and_eval(task=cls_task, data_reader=reader, feed_list=feed_list, co
`learning_rate`: Finetune过程中的最大学习率; `learning_rate`: Finetune过程中的最大学习率;
`weight_decay`: 模型的正则项参数,默认0.01,如果模型有过拟合倾向,可适当调高这一参数; `weight_decay`: 模型的正则项参数,默认0.01,如果模型有过拟合倾向,可适当调高这一参数;
`warmup_proportion`: 如果warmup_proportion>0, 例如0.1, 则学习率会在前10%的steps中线性增长至最高值learning_rate; `warmup_proportion`: 如果warmup_proportion>0, 例如0.1, 则学习率会在前10%的steps中线性增长至最高值learning_rate;
`lr_scheduler`: 有两种策略可选(1) `linear_warmup_decay`策略学习率会在最高点后以线性方式衰减; `noam_decay`策略学习率会在最高点以多项式形式衰减; `lr_scheduler`: 有两种策略可选(1) `linear_decay`策略学习率会在最高点后以线性方式衰减; `noam_decay`策略学习率会在最高点以多项式形式衰减;
#### 运行配置 #### 运行配置
`RunConfig` 主要控制Finetune的训练,包含以下可控制的参数: `RunConfig` 主要控制Finetune的训练,包含以下可控制的参数:
......
...@@ -78,7 +78,7 @@ if __name__ == '__main__': ...@@ -78,7 +78,7 @@ if __name__ == '__main__':
strategy = hub.AdamWeightDecayStrategy( strategy = hub.AdamWeightDecayStrategy(
weight_decay=args.weight_decay, weight_decay=args.weight_decay,
learning_rate=args.learning_rate, learning_rate=args.learning_rate,
lr_scheduler="linear_warmup_decay", lr_scheduler="linear_decay",
) )
# Setup runing config for PaddleHub Finetune API # Setup runing config for PaddleHub Finetune API
......
...@@ -3,13 +3,13 @@ ...@@ -3,13 +3,13 @@
---- ----
在PaddleHub中,Strategy代表了在对[Task](https://github.com/PaddlePaddle/PaddleHub/tree/develop/docs/API/Task.md)进行Finetune时,应该使用怎样的策略。这里的策略,包含了对预训练参数使用怎样的学习率,使用哪种类型的优化器,使用什么类型的正则化等 在PaddleHub中,Strategy代表了在对[Task](https://github.com/PaddlePaddle/PaddleHub/tree/develop/docs/API/Task.md)进行Finetune时,应该使用怎样的策略。这里的策略,包含了对预训练参数使用怎样的学习率,使用哪种类型的优化器,使用什么类型的正则化等
## `class paddlehub.finetune.strategy.AdamWeightDecayStrategy(learning_rate=1e-4, lr_scheduler="linear_warmup_decay", warmup_proportion=0.0, weight_decay=0.01, optimizer_name=None)` ## `class paddlehub.finetune.strategy.AdamWeightDecayStrategy(learning_rate=1e-4, lr_scheduler="linear_decay", warmup_proportion=0.0, weight_decay=0.01, optimizer_name=None)`
基于Adam优化器的学习率衰减策略 基于Adam优化器的学习率衰减策略
> ### 参数 > ### 参数
> * learning_rate: 全局学习率。默认为1e-4 > * learning_rate: 全局学习率。默认为1e-4
> >
> * lr_scheduler: 学习率调度方法。默认为"linear_warmup_decay" > * lr_scheduler: 学习率调度方法。默认为"linear_decay"
> >
> * warmup_proportion: warmup所占比重 > * warmup_proportion: warmup所占比重
> >
......
...@@ -27,18 +27,18 @@ def adam_weight_decay_optimization(loss, ...@@ -27,18 +27,18 @@ def adam_weight_decay_optimization(loss,
learning_rate, learning_rate,
main_program, main_program,
weight_decay, weight_decay,
scheduler='linear_warmup_decay'): scheduler='linear_decay'):
if warmup_steps > 0: if warmup_steps > 0:
if scheduler == 'noam_decay': if scheduler == 'noam_decay':
scheduled_lr = fluid.layers.learning_rate_scheduler\ scheduled_lr = fluid.layers.learning_rate_scheduler\
.noam_decay(1/(warmup_steps *(learning_rate ** 2)), .noam_decay(1/(warmup_steps *(learning_rate ** 2)),
warmup_steps) warmup_steps)
elif scheduler == 'linear_warmup_decay': elif scheduler == 'linear_decay':
scheduled_lr = linear_warmup_decay(learning_rate, warmup_steps, scheduled_lr = linear_warmup_decay(learning_rate, warmup_steps,
num_train_steps) num_train_steps)
else: else:
raise ValueError("Unkown learning rate scheduler, should be " raise ValueError("Unkown learning rate scheduler, should be "
"'noam_decay' or 'linear_warmup_decay'") "'noam_decay' or 'linear_decay'")
optimizer = fluid.optimizer.Adam(learning_rate=scheduled_lr) optimizer = fluid.optimizer.Adam(learning_rate=scheduled_lr)
else: else:
optimizer = fluid.optimizer.Adam(learning_rate=learning_rate) optimizer = fluid.optimizer.Adam(learning_rate=learning_rate)
......
...@@ -64,14 +64,14 @@ class DefaultStrategy(object): ...@@ -64,14 +64,14 @@ class DefaultStrategy(object):
class AdamWeightDecayStrategy(DefaultStrategy): class AdamWeightDecayStrategy(DefaultStrategy):
def __init__(self, def __init__(self,
learning_rate=1e-4, learning_rate=1e-4,
lr_scheduler="linear_warmup_decay", lr_scheduler="linear_decay",
warmup_proportion=0.0, warmup_proportion=0.0,
weight_decay=0.01, weight_decay=0.01,
optimizer_name=None): optimizer_name=None):
super().__init__( super().__init__(
learning_rate=learning_rate, optimizer_name=optimizer_name) learning_rate=learning_rate, optimizer_name=optimizer_name)
# check strategy correctness # check strategy correctness
if lr_scheduler not in ["linear_warmup_decay", "noam_decay"]: if lr_scheduler not in ["linear_decay", "noam_decay"]:
raise ValueError("lr_scheduler {} is not setup " raise ValueError("lr_scheduler {} is not setup "
"correctly".format(lr_scheduler)) "correctly".format(lr_scheduler))
self._lr_scheduler = lr_scheduler self._lr_scheduler = lr_scheduler
......
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册