Merge pull request #36 from Steffy-zxf/add-linear-warmup-decay-function

Fix the bug that lack the defination of linear_warmup_decay function

Merge pull request #36 from Steffy-zxf/add-linear-warmup-decay-function
Fix the bug that lack the defination of linear_warmup_decay function
d0f8cbcc · Zeyu Chen · GitHub · 5e6cc426 · 8ec57349 · d0f8cbcc
隐藏空白更改
内联并排

Showing with 33 addition and 2 deletion

paddlehub/finetune/optimization.py paddlehub/finetune/optimization.py +26 -1

paddlehub/finetune/strategy.py paddlehub/finetune/strategy.py +7 -1

未找到文件。
--- a/paddlehub/finetune/optimization.py
+++ b/paddlehub/finetune/optimization.py
@@ -19,6 +19,8 @@ from __future__ import print_function

 import numpy as np
 import paddle.fluid as fluid
+import paddle.fluid.layers.learning_rate_scheduler as lr_scheduler
+from paddle.fluid.layers import control_flow


 def adam_weight_decay_optimization(loss,
@@ -35,7 +37,7 @@ def adam_weight_decay_optimization(loss,
                         warmup_steps)
        elif scheduler == 'linear_decay':
            scheduled_lr = linear_warmup_decay(learning_rate, warmup_steps,
-                                               num_train_steps)
+                                               main_program)
        else:
            raise ValueError("Unkown learning rate scheduler, should be "
                             "'noam_decay' or 'linear_decay'")
@@ -76,3 +78,26 @@ def adam_weight_decay_optimization(loss,
                fluid.layers.assign(output=param, input=updated_param)

    return scheduled_lr
+
+
+def linear_warmup_decay(init_lr, num_warmup_steps, main_program):
+    with main_program._lr_schedule_guard():
+        global_step = lr_scheduler._decay_step_counter()
+
+        lr = fluid.layers.create_global_var(
+            shape=[1],
+            value=0.0,
+            dtype='float32',
+            persistable=True,
+            name="learning_rate")
+
+        with control_flow.Switch() as switch:
+            with switch.case(global_step < num_warmup_steps):
+                decayed_lr = init_lr * global_step * 1.0 / num_warmup_steps
+                fluid.layers.assign(decayed_lr, lr)
+            with switch.default():
+                last_value_var = fluid.layers.fill_constant(
+                    shape=[1], dtype='float32', value=float(init_lr))
+                fluid.layers.assign(last_value_var, lr)
+
+        return lr
--- a/paddlehub/finetune/strategy.py
+++ b/paddlehub/finetune/strategy.py
@@ -89,7 +89,7 @@ class AdamWeightDecayStrategy(DefaultStrategy):
    def __init__(self,
                 learning_rate=1e-4,
                 lr_scheduler="linear_decay",
-                 warmup_proportion=0.0,
+                 warmup_proportion=0.1,
                 weight_decay=0.01,
                 optimizer_name="adam"):
        super(AdamWeightDecayStrategy, self).__init__(
@@ -117,6 +117,12 @@ class AdamWeightDecayStrategy(DefaultStrategy):
    def execute(self, loss, main_program, data_reader, config):
        # calculate wamrup step
        dev_count = self._get_dev_count(config)
+        data_reader.data_generator(
+            batch_size=config.batch_size, phase='train', shuffle=True)
+        data_reader.data_generator(
+            batch_size=config.batch_size, phase='val', shuffle=False)
+        data_reader.data_generator(
+            batch_size=config.batch_size, phase='dev', shuffle=False)
        num_train_examples = data_reader.get_num_examples(phase='train')
        max_train_steps = config.num_epoch * num_train_examples // config.batch_size // dev_count
        warmup_steps = int(max_train_steps * self.warmup_proportion)