提交 72a0fe4e 编写于 作者: Z Zeyu Chen

add strategy

上级 7b96067a
......@@ -21,20 +21,6 @@ import numpy as np
import paddle.fluid as fluid
def bert_finetune(task, main_program, data_processor, config, dev_count):
# calculate wamrup step
num_train_examples = data_processor.get_num_examples(phase='train')
max_train_steps = config.num_epoch * num_train_examples // config.batch_size // dev_count
warmup_steps = int(max_train_steps * config.warmup_proportion)
loss = task.variable("loss")
scheduled_lr = adam_weight_decay_optimizer_with_linear_warmup(
loss, warmup_steps, max_train_steps, config.learning_rate, main_program,
config.weight_decay)
return scheduled_lr
def adam_weight_decay_optimization(loss,
warmup_steps,
num_train_steps,
......
# Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License"
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
import os
import multiprocessing
import paddle.fluid as fluid
from .optimization import adam_weight_decay_optimization
class DefaultStrategy(object):
def __init__(self, learning_rate=1e-4, optimizer_name="adam"):
self.learning_rate = learning_rate
self._optimizer_name = optimizer_name
def execute(self, loss):
if self.optimizer.lower() == "adam":
self.optimizer = fluid.optimizer.Adam(
learning_rate=self.learning_rate)
elif self.optimizer.lower() == "sgd":
self.optimizer = fluid.optimizer.SGD(
learning_rate=self.learning_rate)
if self.optimizer is not None:
self.optimizer.minimize(loss)
else:
raise ValueError("DefaultStrategy's optimizer is None")
class BERTFinetuneStrategy(DefaultStrategy):
def __init__(self,
learning_rate=1e-4,
warmup_strategy="linear_warmup_decay",
warmup_proportion=0.0,
weight_decay=0.01,
optimizer_name=None):
super().__init__(
learning_rate=learning_rate, optimizer_name=optimizer_name)
# check strategy correctness
if warmup_strategy not in ["linear_warmup_decay", "noam_decay"]:
raise ValueError("warmup strategy {} is not setup "
"correctly".format(warmup_strategy))
self._warmup_strategy = warmup_strategy
self._warmup_proportion = warmup_proportion
self._weight_decay = weight_decay
@property
def warmup_strategy(self):
return self._warmup_strategy
@property
def warmup_proportion(self):
return self._warmup_proportion
@property
def weight_decay(self):
return self._weight_decay
def execute(self, loss, main_program, data_reader, config):
# calculate wamrup step
dev_count = self._get_dev_count(config)
num_train_examples = data_reader.get_num_examples(phase='train')
max_train_steps = config.num_epoch * num_train_examples // config.batch_size // dev_count
warmup_steps = int(max_train_steps * self.warmup_proportion)
scheduled_lr = adam_weight_decay_optimization(
loss, warmup_steps, max_train_steps, self.learning_rate,
main_program, self.weight_decay, self.warmup_strategy)
return scheduled_lr
def _get_dev_count(self, config):
if config.use_cuda:
dev_count = fluid.core.get_cuda_device_count()
else:
dev_count = int(
os.environ.get('CPU_NUM', multiprocessing.cpu_count()))
return dev_count
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册