提交 ae82728c 编写于 作者: W wuzewu

Merge branch 'develop' of https://github.com/PaddlePaddle/PaddleHub into develop

......@@ -3,3 +3,18 @@
[![Build Status](https://travis-ci.org/PaddlePaddle/PaddleHub.svg?branch=develop)](https://travis-ci.org/PaddlePaddle/PaddleHub)
[![License](https://img.shields.io/badge/license-Apache%202-blue.svg)](LICENSE)
[![Version](https://img.shields.io/github/release/PaddlePaddle/PaddleHub.svg)](https://github.com/PaddlePaddle/PaddleHub/releases)
## 安装
```
pip install paddlehub
```
## 答疑
欢迎您将问题和bug报告以[Github Issues](https://github.com/PaddlePaddle/PaddleHub/issues)的形式提交
## 版权和许可证
PaddleHub由[Apache-2.0 license](LICENSE)提供
......@@ -43,6 +43,7 @@ args = parser.parse_args()
# yapf: enable.
if __name__ == '__main__':
strategy = hub.BERTFinetuneStrategy(weight_decay=args.weight_decay)
config = hub.FinetuneConfig(
log_interval=10,
eval_interval=100,
......@@ -51,9 +52,7 @@ if __name__ == '__main__':
learning_rate=args.learning_rate,
num_epoch=args.num_epoch,
batch_size=args.batch_size,
max_seq_len=args.max_seq_len,
weight_decay=args.weight_decay,
finetune_strategy="bert_finetune")
strategy=strategy)
# loading Paddlehub BERT
module = hub.Module(module_dir=args.hub_module_dir)
......
......@@ -15,6 +15,7 @@ from . import module
from . import common
from . import io
from . import dataset
from . import finetune
from .common.dir import USER_HOME
from .common.dir import HUB_HOME
......@@ -35,6 +36,8 @@ from .finetune.network import append_mlp_classifier
from .finetune.finetune import finetune_and_eval
from .finetune.config import FinetuneConfig
from .finetune.task import Task
from .finetune.strategy import BERTFinetuneStrategy
from .finetune.strategy import DefaultStrategy
from .reader import BERTTokenizeReader
from .reader.cv_reader import ImageClassificationReader
......@@ -12,7 +12,7 @@
# See the License for the specific language governing permissions and
# limitations under the License.
import collections
from .strategy import DefaultStrategy
class FinetuneConfig(object):
......@@ -30,8 +30,8 @@ class FinetuneConfig(object):
max_seq_len=128,
weight_decay=None,
warmup_proportion=0.0,
finetune_strategy=None,
enable_memory_optim=True,
strategy=None,
optimizer="adam"):
""" Construct finetune Config """
self._log_interval = log_interval
......@@ -43,9 +43,10 @@ class FinetuneConfig(object):
self._num_epoch = num_epoch
self._batch_size = batch_size
self._max_seq_len = max_seq_len
self._weight_decay = weight_decay
self._warmup_proportion = warmup_proportion
self._finetune_strategy = finetune_strategy
if strategy is None:
self._strategy = DefaultStrategy()
else:
self._strategy = strategy
self._enable_memory_optim = enable_memory_optim
self._optimizer = optimizer
......@@ -94,8 +95,8 @@ class FinetuneConfig(object):
return self._warmup_proportion
@property
def finetune_strategy(self):
return self._finetune_strategy
def strategy(self):
return self._strategy
@property
def enable_memory_optim(self):
......
......@@ -18,13 +18,16 @@ from __future__ import print_function
import os
import time
import multiprocessing
import paddle
import paddle.fluid as fluid
import paddle_hub as hub
from visualdl import LogWriter
from paddle_hub.common.logger import logger
from paddle_hub.finetune.optimization import bert_finetune
from paddle_hub.finetune.strategy import BERTFinetuneStrategy, DefaultStrategy
from paddle_hub.finetune.checkpoint import load_checkpoint, save_checkpoint
......@@ -76,12 +79,12 @@ def _finetune_model(task, data_reader, feed_list, config=None, do_eval=False):
exe = fluid.Executor(place=place)
data_feeder = fluid.DataFeeder(feed_list=feed_list, place=place)
if config.finetune_strategy == "bert_finetune":
scheduled_lr = bert_finetune(task, main_program, data_reader,
config, dev_count)
elif config.optimizer == "adam":
optimizer = fluid.optimizer.Adam(learning_rate=config.learning_rate)
optimizer.minimize(loss)
# select strategy
if isinstance(config.strategy, hub.BERTFinetuneStrategy):
scheduled_lr = config.strategy.execute(loss, main_program,
data_reader, config)
elif isinstance(config.optimizer, hub.DefaultStrategy):
config.strategy.execute(loss)
#TODO: add more finetune strategy
_do_memory_optimization(task, config)
......
......@@ -19,90 +19,15 @@ from __future__ import print_function
import numpy as np
import paddle.fluid as fluid
"""
Finetune optimization strategy
"""
def bert_finetune(task, train_program, data_processor, config, dev_count):
# calculate wamrup step
num_train_examples = data_processor.get_num_examples(phase='train')
max_train_steps = config.num_epoch * num_train_examples // config.batch_size // dev_count
warmup_steps = int(max_train_steps * config.warmup_proportion)
loss = task.variable("loss")
scheduled_lr = adam_weight_decay_optimizer_with_linear_warmup(
loss, warmup_steps, max_train_steps, config.learning_rate,
train_program, config.weight_decay)
return scheduled_lr
def adam_weight_decay_optimizer_with_noam_decay(
loss,
warmup_steps,
num_train_steps,
learning_rate,
train_program,
weight_decay,
scheduler='linear_warmup_decay'):
if warmup_steps > 0:
if scheduler == 'noam_decay':
scheduled_lr = fluid.layers.learning_rate_scheduler\
.noam_decay(1/(warmup_steps *(learning_rate ** 2)),
warmup_steps)
elif scheduler == 'linear_warmup_decay':
scheduled_lr = linear_warmup_decay(learning_rate, warmup_steps,
num_train_steps)
else:
raise ValueError("Unkown learning rate scheduler, should be "
"'noam_decay' or 'linear_warmup_decay'")
optimizer = fluid.optimizer.Adam(learning_rate=scheduled_lr)
else:
optimizer = fluid.optimizer.Adam(learning_rate=learning_rate)
scheduled_lr = learning_rate
clip_norm_thres = 1.0
fluid.clip.set_gradient_clip(
clip=fluid.clip.GradientClipByGlobalNorm(clip_norm=clip_norm_thres))
def exclude_from_weight_decay(name):
if name.find("layer_norm") > -1:
return True
bias_suffix = ["_bias", "_b", ".b_0"]
for suffix in bias_suffix:
if name.endswith(suffix):
return True
return False
param_list = dict()
for param in train_program.global_block().all_parameters():
param_list[param.name] = param * 1.0
param_list[param.name].stop_gradient = True
_, param_grads = optimizer.minimize(loss)
if weight_decay > 0:
for param, grad in param_grads:
if exclude_from_weight_decay(param.name):
continue
with param.block.program._optimized_guard(
[param, grad]), fluid.framework.name_scope("weight_decay"):
updated_param = param - param_list[
param.name] * weight_decay * scheduled_lr
fluid.layers.assign(output=param, input=updated_param)
return scheduled_lr
def adam_weight_decay_optimizer_with_linear_warmup(loss,
warmup_steps,
num_train_steps,
learning_rate,
train_program,
weight_decay,
scheduler='noam_decay'):
def adam_weight_decay_optimization(loss,
warmup_steps,
num_train_steps,
learning_rate,
main_program,
weight_decay,
scheduler='linear_warmup_decay'):
if warmup_steps > 0:
if scheduler == 'noam_decay':
scheduled_lr = fluid.layers.learning_rate_scheduler\
......@@ -134,7 +59,7 @@ def adam_weight_decay_optimizer_with_linear_warmup(loss,
param_list = dict()
for param in train_program.global_block().all_parameters():
for param in main_program.global_block().all_parameters():
param_list[param.name] = param * 1.0
param_list[param.name].stop_gradient = True
......
# Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License"
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
import os
import multiprocessing
import paddle.fluid as fluid
from .optimization import adam_weight_decay_optimization
class DefaultStrategy(object):
def __init__(self, learning_rate=1e-4, optimizer_name="adam"):
self.learning_rate = learning_rate
self._optimizer_name = optimizer_name
def execute(self, loss):
if self.optimizer.lower() == "adam":
self.optimizer = fluid.optimizer.Adam(
learning_rate=self.learning_rate)
elif self.optimizer.lower() == "sgd":
self.optimizer = fluid.optimizer.SGD(
learning_rate=self.learning_rate)
if self.optimizer is not None:
self.optimizer.minimize(loss)
else:
raise ValueError("DefaultStrategy's optimizer is None")
class BERTFinetuneStrategy(DefaultStrategy):
def __init__(self,
learning_rate=1e-4,
warmup_strategy="linear_warmup_decay",
warmup_proportion=0.0,
weight_decay=0.01,
optimizer_name=None):
super().__init__(
learning_rate=learning_rate, optimizer_name=optimizer_name)
# check strategy correctness
if warmup_strategy not in ["linear_warmup_decay", "noam_decay"]:
raise ValueError("warmup strategy {} is not setup "
"correctly".format(warmup_strategy))
self._warmup_strategy = warmup_strategy
self._warmup_proportion = warmup_proportion
self._weight_decay = weight_decay
@property
def warmup_strategy(self):
return self._warmup_strategy
@property
def warmup_proportion(self):
return self._warmup_proportion
@property
def weight_decay(self):
return self._weight_decay
def execute(self, loss, main_program, data_reader, config):
# calculate wamrup step
dev_count = self._get_dev_count(config)
num_train_examples = data_reader.get_num_examples(phase='train')
max_train_steps = config.num_epoch * num_train_examples // config.batch_size // dev_count
warmup_steps = int(max_train_steps * self.warmup_proportion)
scheduled_lr = adam_weight_decay_optimization(
loss, warmup_steps, max_train_steps, self.learning_rate,
main_program, self.weight_decay, self.warmup_strategy)
return scheduled_lr
def _get_dev_count(self, config):
if config.use_cuda:
dev_count = fluid.core.get_cuda_device_count()
else:
dev_count = int(
os.environ.get('CPU_NUM', multiprocessing.cpu_count()))
return dev_count
......@@ -191,7 +191,7 @@ class Module(object):
def _init_with_module_file(self, module_dir):
checker = ModuleChecker(module_dir)
if not checker.check():
logger.error("module check fail")
logger.error("Module init failed on {}".format(module_dir))
exit(1)
self.helper = ModuleHelper(module_dir)
......@@ -205,7 +205,7 @@ class Module(object):
self._load_assets()
self._recover_from_desc()
self._generate_sign_attr()
self._recovery_parameter(self.program)
self._restore_parameter(self.program)
self._recover_variable_info(self.program)
def _init_with_signature(self, signatures):
......@@ -228,7 +228,7 @@ class Module(object):
self.default_signature = sign
self.signatures[sign.name] = sign
def _recovery_parameter(self, program):
def _restore_parameter(self, program):
global_block = program.global_block()
param_attrs = self.desc.extra_info.map.data['param_attrs']
for key, param_attr in param_attrs.map.data.items():
......@@ -477,7 +477,7 @@ class Module(object):
if regularizer != "Default":
paddle_helper.set_parameter_regularizer(program, regularizer)
self._recovery_parameter(program)
self._restore_parameter(program)
self._recover_variable_info(program)
......
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册