提交 b32619c0 编写于 作者: Z Zeyu Chen

update warmup_strategy to lr_scheduler

上级 a745a3ce
......@@ -111,7 +111,7 @@ strategy = hub.AdamWeightDecayStrategy(
learning_rate=5e-5,
weight_decay=0.01,
warmup_proportion=0.0,
warmup_strategy="linear_warmup_decay",
lr_scheduler="linear_warmup_decay",
)
config = hub.RunConfig(use_cuda=True, num_epoch=3, batch_size=32, strategy=strategy)
......@@ -124,7 +124,7 @@ hub.finetune_and_eval(task=cls_task, data_reader=reader, feed_list=feed_list, co
`learning_rate`: Finetune过程中的最大学习率;
`weight_decay`: 模型的正则项参数,默认0.01,如果模型有过拟合倾向,可适当调高这一参数;
`warmup_proportion`: 如果warmup_proportion>0, 例如0.1, 则学习率会在前10%的steps中线性增长至最高值learning_rate;
`warmup_strategy`: 有两种策略可选(1) `linear_warmup_decay`策略学习率会在最高点后以线性方式衰减; `noam_decay`策略学习率会在最高点以多项式形式衰减;
`lr_scheduler`: 有两种策略可选(1) `linear_warmup_decay`策略学习率会在最高点后以线性方式衰减; `noam_decay`策略学习率会在最高点以多项式形式衰减;
#### 运行配置
`RunConfig` 主要控制Finetune的训练,包含以下可控制的参数:
......
......@@ -79,7 +79,7 @@ if __name__ == '__main__':
strategy = hub.AdamWeightDecayStrategy(
weight_decay=args.weight_decay,
learning_rate=args.learning_rate,
warmup_strategy="linear_warmup_decay",
lr_scheduler="linear_warmup_decay",
)
# Setup runing config for PaddleHub Finetune API
......
......@@ -64,23 +64,23 @@ class DefaultStrategy(object):
class AdamWeightDecayStrategy(DefaultStrategy):
def __init__(self,
learning_rate=1e-4,
warmup_strategy="linear_warmup_decay",
lr_scheduler="linear_warmup_decay",
warmup_proportion=0.0,
weight_decay=0.01,
optimizer_name=None):
super().__init__(
learning_rate=learning_rate, optimizer_name=optimizer_name)
# check strategy correctness
if warmup_strategy not in ["linear_warmup_decay", "noam_decay"]:
raise ValueError("warmup strategy {} is not setup "
"correctly".format(warmup_strategy))
self._warmup_strategy = warmup_strategy
if lr_scheduler not in ["linear_warmup_decay", "noam_decay"]:
raise ValueError("lr_scheduler {} is not setup "
"correctly".format(lr_scheduler))
self._lr_scheduler = lr_scheduler
self._warmup_proportion = warmup_proportion
self._weight_decay = weight_decay
@property
def warmup_strategy(self):
return self._warmup_strategy
def lr_scheduler(self):
return self._lr_scheduler
@property
def warmup_proportion(self):
......@@ -99,7 +99,7 @@ class AdamWeightDecayStrategy(DefaultStrategy):
scheduled_lr = adam_weight_decay_optimization(
loss, warmup_steps, max_train_steps, self.learning_rate,
main_program, self.weight_decay, self.warmup_strategy)
main_program, self.weight_decay, self.lr_scheduler)
return scheduled_lr
......
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册