add strategy

72a0fe4e · Zeyu Chen · 7b96067a · 72a0fe4e · 72a0fe4e
隐藏空白更改
内联并排

Showing with 90 addition and 14 deletion

paddle_hub/finetune/optimization.py paddle_hub/finetune/optimization.py +0 -14

paddle_hub/finetune/strategy.py paddle_hub/finetune/strategy.py +90 -0

未找到文件。
--- a/paddle_hub/finetune/optimization.py
+++ b/paddle_hub/finetune/optimization.py
@@ -21,20 +21,6 @@ import numpy as np
 import paddle.fluid as fluid
-def bert_finetune(task, main_program, data_processor, config, dev_count):
-    # calculate wamrup step
-    num_train_examples = data_processor.get_num_examples(phase='train')
-    max_train_steps = config.num_epoch * num_train_examples // config.batch_size // dev_count
-    warmup_steps = int(max_train_steps * config.warmup_proportion)
-    loss = task.variable("loss")
-    scheduled_lr = adam_weight_decay_optimizer_with_linear_warmup(
-        loss, warmup_steps, max_train_steps, config.learning_rate, main_program,
-        config.weight_decay)
-    return scheduled_lr
 def adam_weight_decay_optimization(loss,
                                   warmup_steps,
                                   num_train_steps,

--- a/paddle_hub/finetune/strategy.py
+++ b/paddle_hub/finetune/strategy.py
+#   Copyright (c) 2019  PaddlePaddle Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License"
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+import os
+import multiprocessing
+import paddle.fluid as fluid
+from .optimization import adam_weight_decay_optimization
+class DefaultStrategy(object):
+    def __init__(self, learning_rate=1e-4, optimizer_name="adam"):
+        self.learning_rate = learning_rate
+        self._optimizer_name = optimizer_name
+    def execute(self, loss):
+        if self.optimizer.lower() == "adam":
+            self.optimizer = fluid.optimizer.Adam(
+                learning_rate=self.learning_rate)
+        elif self.optimizer.lower() == "sgd":
+            self.optimizer = fluid.optimizer.SGD(
+                learning_rate=self.learning_rate)
+        if self.optimizer is not None:
+            self.optimizer.minimize(loss)
+        else:
+            raise ValueError("DefaultStrategy's optimizer is None")
+class BERTFinetuneStrategy(DefaultStrategy):
+    def __init__(self,
+                 learning_rate=1e-4,
+                 warmup_strategy="linear_warmup_decay",
+                 warmup_proportion=0.0,
+                 weight_decay=0.01,
+                 optimizer_name=None):
+        super().__init__(
+            learning_rate=learning_rate, optimizer_name=optimizer_name)
+        # check strategy correctness
+        if warmup_strategy not in ["linear_warmup_decay", "noam_decay"]:
+            raise ValueError("warmup strategy {} is not setup "
+                             "correctly".format(warmup_strategy))
+        self._warmup_strategy = warmup_strategy
+        self._warmup_proportion = warmup_proportion
+        self._weight_decay = weight_decay
+    @property
+    def warmup_strategy(self):
+        return self._warmup_strategy
+    @property
+    def warmup_proportion(self):
+        return self._warmup_proportion
+    @property
+    def weight_decay(self):
+        return self._weight_decay
+    def execute(self, loss, main_program, data_reader, config):
+        # calculate wamrup step
+        dev_count = self._get_dev_count(config)
+        num_train_examples = data_reader.get_num_examples(phase='train')
+        max_train_steps = config.num_epoch * num_train_examples // config.batch_size // dev_count
+        warmup_steps = int(max_train_steps * self.warmup_proportion)
+        scheduled_lr = adam_weight_decay_optimization(
+            loss, warmup_steps, max_train_steps, self.learning_rate,
+            main_program, self.weight_decay, self.warmup_strategy)
+        return scheduled_lr
+    def _get_dev_count(self, config):
+        if config.use_cuda:
+            dev_count = fluid.core.get_cuda_device_count()
+        else:
+            dev_count = int(
+                os.environ.get('CPU_NUM', multiprocessing.cpu_count()))
+        return dev_count