strategy.py 7.9 KB
Newer Older
S
Steffy-zxf 已提交
1
#coding:utf-8
Z
Zeyu Chen 已提交
2 3 4 5 6 7 8 9 10 11 12 13 14 15
#   Copyright (c) 2019  PaddlePaddle Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License"
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.

16 17 18 19
from __future__ import absolute_import
from __future__ import division
from __future__ import print_function

Z
Zeyu Chen 已提交
20 21
import os
import multiprocessing
22

Z
Zeyu Chen 已提交
23 24
import paddle.fluid as fluid

W
wuzewu 已提交
25
from paddlehub.finetune.optimization import adam_weight_decay_optimization
W
wuzewu 已提交
26
from paddlehub.finetune.regularizer import L2SPDecayRegularizer
Z
Zeyu Chen 已提交
27 28


W
wuzewu 已提交
29 30 31 32 33 34 35 36 37 38 39 40 41 42
def get_pretrained_parameter(main_program, start_program):
    pretrained_parameters = []
    global_block = main_program.global_block()
    for op in global_block.ops[::-1]:
        for input_arg in op.input_arg_names:
            var = global_block.var(input_arg)
            if isinstance(
                    var, fluid.framework.Parameter
            ) and input_arg not in start_program.global_block().vars:
                pretrained_parameters.append(var)

    return pretrained_parameters


Z
Zeyu Chen 已提交
43 44 45 46
class DefaultStrategy(object):
    def __init__(self, learning_rate=1e-4, optimizer_name="adam"):
        self.learning_rate = learning_rate
        self._optimizer_name = optimizer_name
47
        if self._optimizer_name.lower() == "sgd":
Z
Zeyu Chen 已提交
48 49
            self.optimizer = fluid.optimizer.SGD(
                learning_rate=self.learning_rate)
Z
zhangxuefei 已提交
50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73
        elif self._optimizer_name.lower() == "adagrad":
            self.optimizer = fluid.optimizer.Adagrad(
                learning_rate=self.learning_rate)
        elif self._optimizer_name.lower() == "adamax":
            self.optimizer = fluid.optimizer.Adamax(
                learning_rate=self.learning_rate)
        elif self._optimizer_name.lower() == "decayedadagrad":
            self.optimizer = fluid.optimizer.DecayedAdagrad(
                learning_rate=self.learning_rate)
        elif self._optimizer_name.lower() == "ftrl":
            self.optimizer = fluid.optimizer.Ftrl(
                learning_rate=self.learning_rate)
        elif self._optimizer_name.lower() == "larsmomentum":
            self.optimizer = fluid.optimizer.LarsMomentum(
                learning_rate=self.learning_rate)
        elif self._optimizer_name.lower() == "momentum":
            self.optimizer = fluid.optimizer.Momentum(
                learning_rate=self.learning_rate)
        elif self._optimizer_name.lower() == "decayedadagrad":
            self.optimizer = fluid.optimizer.DecayedAdagrad(
                learning_rate=self.learning_rate)
        elif self._optimizer_name.lower() == "rmsprop":
            self.optimizer = fluid.optimizer.RMSPropOptimizer(
                learning_rate=self.learning_rate)
74 75 76
        else:
            self.optimizer = fluid.optimizer.Adam(
                learning_rate=self.learning_rate)
Z
Zeyu Chen 已提交
77

W
wuzewu 已提交
78
    def execute(self, loss, data_reader, config):
Z
Zeyu Chen 已提交
79 80 81 82 83
        if self.optimizer is not None:
            self.optimizer.minimize(loss)
        else:
            raise ValueError("DefaultStrategy's optimizer is None")

Z
Zeyu Chen 已提交
84 85 86 87
    # TODO complete __str__()
    def __str__(self):
        return "DefaultStrategy"

Z
Zeyu Chen 已提交
88

89
class AdamWeightDecayStrategy(DefaultStrategy):
Z
Zeyu Chen 已提交
90 91
    def __init__(self,
                 learning_rate=1e-4,
92
                 lr_scheduler="linear_decay",
93
                 warmup_proportion=0.1,
Z
Zeyu Chen 已提交
94
                 weight_decay=0.01,
W
wuzewu 已提交
95
                 optimizer_name="adam"):
96
        super(AdamWeightDecayStrategy, self).__init__(
Z
Zeyu Chen 已提交
97 98
            learning_rate=learning_rate, optimizer_name=optimizer_name)
        # check strategy correctness
99
        if lr_scheduler not in ["linear_decay", "noam_decay"]:
100 101 102
            raise ValueError("lr_scheduler {} is not setup "
                             "correctly".format(lr_scheduler))
        self._lr_scheduler = lr_scheduler
Z
Zeyu Chen 已提交
103 104 105 106
        self._warmup_proportion = warmup_proportion
        self._weight_decay = weight_decay

    @property
107 108
    def lr_scheduler(self):
        return self._lr_scheduler
Z
Zeyu Chen 已提交
109 110 111 112 113 114 115 116 117

    @property
    def warmup_proportion(self):
        return self._warmup_proportion

    @property
    def weight_decay(self):
        return self._weight_decay

W
wuzewu 已提交
118 119
    def execute(self, loss, data_reader, config):
        main_program = loss.block.program
Z
Zeyu Chen 已提交
120 121
        # calculate wamrup step
        dev_count = self._get_dev_count(config)
122 123 124 125 126 127 128
        data_reader.data_generator(
            batch_size=config.batch_size, phase='train', shuffle=True)
        data_reader.data_generator(
            batch_size=config.batch_size, phase='val', shuffle=False)
        data_reader.data_generator(
            batch_size=config.batch_size, phase='dev', shuffle=False)
        num_train_examples = data_reader.get_num_examples(phase='train')
Z
Zeyu Chen 已提交
129 130 131 132 133
        max_train_steps = config.num_epoch * num_train_examples // config.batch_size // dev_count
        warmup_steps = int(max_train_steps * self.warmup_proportion)

        scheduled_lr = adam_weight_decay_optimization(
            loss, warmup_steps, max_train_steps, self.learning_rate,
134
            main_program, self.weight_decay, self.lr_scheduler)
Z
Zeyu Chen 已提交
135 136 137 138 139 140 141 142 143 144 145

        return scheduled_lr

    def _get_dev_count(self, config):
        if config.use_cuda:
            dev_count = fluid.core.get_cuda_device_count()
        else:
            dev_count = int(
                os.environ.get('CPU_NUM', multiprocessing.cpu_count()))

        return dev_count
Z
Zeyu Chen 已提交
146 147 148

    # TODO complete __str__()
    def __str__(self):
149
        return "AdamWeightDecayStrategy"
W
wuzewu 已提交
150 151 152 153 154 155 156 157 158 159 160 161 162


class DefaultFinetuneStrategy(DefaultStrategy):
    def __init__(self,
                 learning_rate=1e-4,
                 optimizer_name="adam",
                 regularization_coeff=1e-3):
        super(DefaultFinetuneStrategy, self).__init__(
            learning_rate=learning_rate, optimizer_name=optimizer_name)
        self.learning_rate = learning_rate
        self._optimizer_name = optimizer_name
        self.regularization_coeff = regularization_coeff

W
wuzewu 已提交
163
    def execute(self, loss, data_reader, config):
W
wuzewu 已提交
164 165 166 167 168 169 170 171 172 173 174 175 176
        # get pretrained parameters
        program = loss.block.program
        global_block = program.global_block()
        pretrained_params = get_pretrained_parameter(
            program, fluid.default_startup_program())

        # set parameter attrs
        for index, param in enumerate(pretrained_params):
            param.regularizer = fluid.regularizer.L2Decay(
                regularization_coeff=self.regularization_coeff)

        if self.optimizer is not None:
            self.optimizer.minimize(loss)
177
        else:
W
wuzewu 已提交
178 179 180 181 182 183 184 185 186 187 188 189 190
            raise ValueError("DefaultFinetuneStrategy's optimizer is None")


class L2SPFinetuneStrategy(DefaultStrategy):
    def __init__(self,
                 learning_rate=1e-4,
                 optimizer_name="adam",
                 regularization_coeff=1e-3):
        super(L2SPFinetuneStrategy, self).__init__(
            learning_rate=learning_rate, optimizer_name=optimizer_name)
        self.learning_rate = learning_rate
        self._optimizer_name = optimizer_name
        self.regularization_coeff = regularization_coeff
W
wuzewu 已提交
191

W
wuzewu 已提交
192
    def execute(self, loss, data_reader, config):
W
wuzewu 已提交
193 194 195 196 197 198 199 200
        # get pretrained parameters
        program = loss.block.program
        global_block = program.global_block()
        pretrained_params = get_pretrained_parameter(
            program, fluid.default_startup_program())

        # set parameter attrs
        for index, param in enumerate(pretrained_params):
W
wuzewu 已提交
201
            param.regularizer = L2SPDecayRegularizer(
W
wuzewu 已提交
202 203 204 205 206 207
                regularization_coeff=self.regularization_coeff)

        if self.optimizer is not None:
            self.optimizer.minimize(loss)
        else:
            raise ValueError("DefaultFinetuneStrategy's optimizer is None")