提交 b8a7d186 编写于 作者: littletomatodonkey's avatar littletomatodonkey

fix optimizer and regularizer

上级 94a8f50a
...@@ -19,36 +19,15 @@ from __future__ import print_function ...@@ -19,36 +19,15 @@ from __future__ import print_function
import sys import sys
import math import math
import paddle.fluid as fluid from paddle.optimizer.lr_scheduler import LinearLrWarmup
import paddle.fluid.layers.ops as ops from paddle.optimizer.lr_scheduler import PiecewiseLR
from paddle.fluid.layers.learning_rate_scheduler import _decay_step_counter from paddle.optimizer.lr_scheduler import CosineAnnealingLR
from paddle.optimizer.lr_scheduler import ExponentialLR
__all__ = ['LearningRateBuilder'] __all__ = ['LearningRateBuilder']
class Linear(object): class Cosine(CosineAnnealingLR):
"""
Linear learning rate decay
Args:
lr(float): initial learning rate
steps(int): total decay steps
end_lr(float): end learning rate, default: 0.0.
"""
def __init__(self, lr, steps, end_lr=0.0, **kwargs):
super(Linear, self).__init__()
self.lr = lr
self.steps = steps
self.end_lr = end_lr
def __call__(self):
learning_rate = fluid.layers.polynomial_decay(
self.lr, self.steps, self.end_lr, power=1)
return learning_rate
class Cosine(object):
""" """
Cosine learning rate decay Cosine learning rate decay
lr = 0.05 * (math.cos(epoch * (math.pi / epochs)) + 1) lr = 0.05 * (math.cos(epoch * (math.pi / epochs)) + 1)
...@@ -60,20 +39,14 @@ class Cosine(object): ...@@ -60,20 +39,14 @@ class Cosine(object):
""" """
def __init__(self, lr, step_each_epoch, epochs, **kwargs): def __init__(self, lr, step_each_epoch, epochs, **kwargs):
super(Cosine, self).__init__() super(Cosine, self).__init__(
self.lr = lr learning_rate=lr,
self.step_each_epoch = step_each_epoch T_max=step_each_epoch * epochs, )
self.epochs = epochs
def __call__(self): self.update_specified = False
learning_rate = fluid.layers.cosine_decay(
learning_rate=self.lr,
step_each_epoch=self.step_each_epoch,
epochs=self.epochs)
return learning_rate
class Piecewise(object): class Piecewise(PiecewiseLR):
""" """
Piecewise learning rate decay Piecewise learning rate decay
...@@ -85,16 +58,15 @@ class Piecewise(object): ...@@ -85,16 +58,15 @@ class Piecewise(object):
""" """
def __init__(self, lr, step_each_epoch, decay_epochs, gamma=0.1, **kwargs): def __init__(self, lr, step_each_epoch, decay_epochs, gamma=0.1, **kwargs):
super(Piecewise, self).__init__() boundaries = [step_each_epoch * e for e in decay_epochs]
self.bd = [step_each_epoch * e for e in decay_epochs] lr_values = [lr * (gamma**i) for i in range(len(boundaries) + 1)]
self.lr = [lr * (gamma**i) for i in range(len(self.bd) + 1)] super(Piecewise, self).__init__(
boundaries=boundaries, values=lr_values)
def __call__(self): self.update_specified = False
learning_rate = fluid.layers.piecewise_decay(self.bd, self.lr)
return learning_rate
class CosineWarmup(object): class CosineWarmup(LinearLrWarmup):
""" """
Cosine learning rate decay with warmup Cosine learning rate decay with warmup
[0, warmup_epoch): linear warmup [0, warmup_epoch): linear warmup
...@@ -108,28 +80,23 @@ class CosineWarmup(object): ...@@ -108,28 +80,23 @@ class CosineWarmup(object):
""" """
def __init__(self, lr, step_each_epoch, epochs, warmup_epoch=5, **kwargs): def __init__(self, lr, step_each_epoch, epochs, warmup_epoch=5, **kwargs):
super(CosineWarmup, self).__init__() assert epochs > warmup_epoch, "total epoch({}) should be larger than warmup_epoch({}) in CosineWarmup.".format(
self.lr = lr epochs, warmup_epoch)
self.step_each_epoch = step_each_epoch warmup_step = warmup_epoch * step_each_epoch
self.epochs = epochs start_lr = 0.0
self.warmup_epoch = warmup_epoch end_lr = lr
lr_sch = Cosine(lr, step_each_epoch, epochs - warmup_epoch)
def __call__(self):
learning_rate = fluid.layers.cosine_decay(
learning_rate=self.lr,
step_each_epoch=self.step_each_epoch,
epochs=self.epochs)
learning_rate = fluid.layers.linear_lr_warmup( super(CosineWarmup, self).__init__(
learning_rate, learning_rate=lr_sch,
warmup_steps=self.warmup_epoch * self.step_each_epoch, warmup_steps=warmup_step,
start_lr=0.0, start_lr=start_lr,
end_lr=self.lr) end_lr=end_lr)
return learning_rate self.update_specified = False
class ExponentialWarmup(object): class ExponentialWarmup(LinearLrWarmup):
""" """
Exponential learning rate decay with warmup Exponential learning rate decay with warmup
[0, warmup_epoch): linear warmup [0, warmup_epoch): linear warmup
...@@ -150,27 +117,22 @@ class ExponentialWarmup(object): ...@@ -150,27 +117,22 @@ class ExponentialWarmup(object):
decay_rate=0.97, decay_rate=0.97,
warmup_epoch=5, warmup_epoch=5,
**kwargs): **kwargs):
super(ExponentialWarmup, self).__init__() warmup_step = warmup_epoch * step_each_epoch
self.lr = lr start_lr = 0.0
end_lr = lr
lr_sch = ExponentialLR(lr, decay_rate)
super(ExponentialWarmup, self).__init__(
learning_rate=lr_sch,
warmup_steps=warmup_step,
start_lr=start_lr,
end_lr=end_lr)
# NOTE: hac method to update exponential lr scheduler
self.update_specified = True
self.update_start_step = warmup_step
self.update_step_interval = int(decay_epochs * step_each_epoch)
self.step_each_epoch = step_each_epoch self.step_each_epoch = step_each_epoch
self.decay_epochs = decay_epochs
self.decay_rate = decay_rate
self.warmup_epoch = warmup_epoch
def __call__(self):
learning_rate = fluid.layers.exponential_decay(
learning_rate=self.lr,
decay_steps=self.decay_epochs * self.step_each_epoch,
decay_rate=self.decay_rate,
staircase=False)
learning_rate = fluid.layers.linear_lr_warmup(
learning_rate,
warmup_steps=self.warmup_epoch * self.step_each_epoch,
start_lr=0.0,
end_lr=self.lr)
return learning_rate
class LearningRateBuilder(): class LearningRateBuilder():
...@@ -193,5 +155,5 @@ class LearningRateBuilder(): ...@@ -193,5 +155,5 @@ class LearningRateBuilder():
def __call__(self): def __call__(self):
mod = sys.modules[__name__] mod = sys.modules[__name__]
lr = getattr(mod, self.function)(**self.params)() lr = getattr(mod, self.function)(**self.params)
return lr return lr
...@@ -18,7 +18,7 @@ from __future__ import print_function ...@@ -18,7 +18,7 @@ from __future__ import print_function
import sys import sys
import paddle.fluid as fluid import paddle
__all__ = ['OptimizerBuilder'] __all__ = ['OptimizerBuilder']
...@@ -33,11 +33,10 @@ class L1Decay(object): ...@@ -33,11 +33,10 @@ class L1Decay(object):
def __init__(self, factor=0.0): def __init__(self, factor=0.0):
super(L1Decay, self).__init__() super(L1Decay, self).__init__()
self.regularization_coeff = factor self.factor = factor
def __call__(self): def __call__(self):
reg = fluid.regularizer.L1Decay( reg = paddle.regularizer.L1Decay(self.factor)
regularization_coeff=self.regularization_coeff)
return reg return reg
...@@ -51,11 +50,10 @@ class L2Decay(object): ...@@ -51,11 +50,10 @@ class L2Decay(object):
def __init__(self, factor=0.0): def __init__(self, factor=0.0):
super(L2Decay, self).__init__() super(L2Decay, self).__init__()
self.regularization_coeff = factor self.factor = factor
def __call__(self): def __call__(self):
reg = fluid.regularizer.L2Decay( reg = paddle.regularizer.L2Decay(self.factor)
regularization_coeff=self.regularization_coeff)
return reg return reg
...@@ -83,11 +81,11 @@ class Momentum(object): ...@@ -83,11 +81,11 @@ class Momentum(object):
self.regularization = regularization self.regularization = regularization
def __call__(self): def __call__(self):
opt = fluid.optimizer.Momentum( opt = paddle.optimizer.Momentum(
learning_rate=self.learning_rate, learning_rate=self.learning_rate,
momentum=self.momentum, momentum=self.momentum,
parameter_list=self.parameter_list, parameters=self.parameter_list,
regularization=self.regularization) weight_decay=self.regularization)
return opt return opt
...@@ -121,13 +119,13 @@ class RMSProp(object): ...@@ -121,13 +119,13 @@ class RMSProp(object):
self.regularization = regularization self.regularization = regularization
def __call__(self): def __call__(self):
opt = fluid.optimizer.RMSProp( opt = paddle.optimizer.RMSProp(
learning_rate=self.learning_rate, learning_rate=self.learning_rate,
momentum=self.momentum, momentum=self.momentum,
rho=self.rho, rho=self.rho,
epsilon=self.epsilon, epsilon=self.epsilon,
parameter_list=self.parameter_list, parameters=self.parameter_list,
regularization=self.regularization) weight_decay=self.regularization)
return opt return opt
......
...@@ -19,7 +19,9 @@ from __future__ import print_function ...@@ -19,7 +19,9 @@ from __future__ import print_function
import os import os
import sys import sys
import paddle.fluid as fluid import paddle
# TODO: need to be fixed in the future.
from paddle.fluid import is_compiled_with_cuda
from ppcls.modeling import get_architectures from ppcls.modeling import get_architectures
from ppcls.modeling import similar_architectures from ppcls.modeling import similar_architectures
...@@ -33,10 +35,9 @@ def check_version(): ...@@ -33,10 +35,9 @@ def check_version():
""" """
err = "PaddlePaddle version 1.8.0 or higher is required, " \ err = "PaddlePaddle version 1.8.0 or higher is required, " \
"or a suitable develop version is satisfied as well. \n" \ "or a suitable develop version is satisfied as well. \n" \
"Please make sure the version is good with your code." \ "Please make sure the version is good with your code."
try: try:
fluid.require_version('1.8.0') paddle.utils.require_version('0.0.0')
except Exception: except Exception:
logger.error(err) logger.error(err)
sys.exit(1) sys.exit(1)
...@@ -50,7 +51,7 @@ def check_gpu(): ...@@ -50,7 +51,7 @@ def check_gpu():
"install paddlepaddle-gpu to run model on GPU." "install paddlepaddle-gpu to run model on GPU."
try: try:
assert fluid.is_compiled_with_cuda() assert is_compiled_with_cuda()
except AssertionError: except AssertionError:
logger.error(err) logger.error(err)
sys.exit(1) sys.exit(1)
......
...@@ -22,7 +22,8 @@ import re ...@@ -22,7 +22,8 @@ import re
import shutil import shutil
import tempfile import tempfile
import paddle.fluid as fluid import paddle
from paddle.io import load_program_state
from ppcls.utils import logger from ppcls.utils import logger
...@@ -50,7 +51,7 @@ def load_dygraph_pretrain(model, path=None, load_static_weights=False): ...@@ -50,7 +51,7 @@ def load_dygraph_pretrain(model, path=None, load_static_weights=False):
raise ValueError("Model pretrain path {} does not " raise ValueError("Model pretrain path {} does not "
"exists.".format(path)) "exists.".format(path))
if load_static_weights: if load_static_weights:
pre_state_dict = fluid.load_program_state(path) pre_state_dict = load_program_state(path)
param_state_dict = {} param_state_dict = {}
model_dict = model.state_dict() model_dict = model.state_dict()
for key in model_dict.keys(): for key in model_dict.keys():
...@@ -64,7 +65,7 @@ def load_dygraph_pretrain(model, path=None, load_static_weights=False): ...@@ -64,7 +65,7 @@ def load_dygraph_pretrain(model, path=None, load_static_weights=False):
model.set_dict(param_state_dict) model.set_dict(param_state_dict)
return return
param_state_dict, optim_state_dict = fluid.load_dygraph(path) param_state_dict, optim_state_dict = paddle.load(path)
model.set_dict(param_state_dict) model.set_dict(param_state_dict)
return return
...@@ -105,7 +106,7 @@ def init_model(config, net, optimizer=None): ...@@ -105,7 +106,7 @@ def init_model(config, net, optimizer=None):
"Given dir {}.pdparams not exist.".format(checkpoints) "Given dir {}.pdparams not exist.".format(checkpoints)
assert os.path.exists(checkpoints + ".pdopt"), \ assert os.path.exists(checkpoints + ".pdopt"), \
"Given dir {}.pdopt not exist.".format(checkpoints) "Given dir {}.pdopt not exist.".format(checkpoints)
para_dict, opti_dict = fluid.dygraph.load_dygraph(checkpoints) para_dict, opti_dict = paddle(checkpoints)
net.set_dict(para_dict) net.set_dict(para_dict)
optimizer.set_dict(opti_dict) optimizer.set_dict(opti_dict)
logger.info( logger.info(
...@@ -141,8 +142,8 @@ def save_model(net, optimizer, model_path, epoch_id, prefix='ppcls'): ...@@ -141,8 +142,8 @@ def save_model(net, optimizer, model_path, epoch_id, prefix='ppcls'):
_mkdir_if_not_exist(model_path) _mkdir_if_not_exist(model_path)
model_prefix = os.path.join(model_path, prefix) model_prefix = os.path.join(model_path, prefix)
fluid.dygraph.save_dygraph(net.state_dict(), model_prefix) paddle.save(net.state_dict(), model_prefix)
fluid.dygraph.save_dygraph(optimizer.state_dict(), model_prefix) paddle.save(optimizer.state_dict(), model_prefix)
logger.info( logger.info(
logger.coloring("Already save model in {}".format(model_path), logger.coloring("Already save model in {}".format(model_path),
"HEADER")) "HEADER"))
...@@ -69,8 +69,6 @@ def create_model(architecture, classes_num): ...@@ -69,8 +69,6 @@ def create_model(architecture, classes_num):
""" """
name = architecture["name"] name = architecture["name"]
params = architecture.get("params", {}) params = architecture.get("params", {})
print(name)
print(params)
return architectures.__dict__[name](class_dim=classes_num, **params) return architectures.__dict__[name](class_dim=classes_num, **params)
...@@ -237,7 +235,7 @@ def create_optimizer(config, parameter_list=None): ...@@ -237,7 +235,7 @@ def create_optimizer(config, parameter_list=None):
# create optimizer instance # create optimizer instance
opt_config = config['OPTIMIZER'] opt_config = config['OPTIMIZER']
opt = OptimizerBuilder(**opt_config) opt = OptimizerBuilder(**opt_config)
return opt(lr, parameter_list) return opt(lr, parameter_list), lr
def create_feeds(batch, use_mix): def create_feeds(batch, use_mix):
...@@ -253,7 +251,13 @@ def create_feeds(batch, use_mix): ...@@ -253,7 +251,13 @@ def create_feeds(batch, use_mix):
return feeds return feeds
def run(dataloader, config, net, optimizer=None, epoch=0, mode='train'): def run(dataloader,
config,
net,
optimizer=None,
lr_scheduler=None,
epoch=0,
mode='train'):
""" """
Feed data to the model and fetch the measures and loss Feed data to the model and fetch the measures and loss
...@@ -302,6 +306,17 @@ def run(dataloader, config, net, optimizer=None, epoch=0, mode='train'): ...@@ -302,6 +306,17 @@ def run(dataloader, config, net, optimizer=None, epoch=0, mode='train'):
metric_list['lr'].update( metric_list['lr'].update(
optimizer._global_learning_rate().numpy()[0], batch_size) optimizer._global_learning_rate().numpy()[0], batch_size)
if lr_scheduler is not None:
if lr_scheduler.update_specified:
curr_global_counter = lr_scheduler.step_each_epoch * epoch + idx
update = max(
0, curr_global_counter - lr_scheduler.update_start_step
) % lr_scheduler.update_step_interval == 0
if update:
lr_scheduler.step()
else:
lr_scheduler.step()
for name, fetch in fetchs.items(): for name, fetch in fetchs.items():
metric_list[name].update(fetch.numpy()[0], batch_size) metric_list[name].update(fetch.numpy()[0], batch_size)
metric_list['batch_time'].update(time.time() - tic) metric_list['batch_time'].update(time.time() - tic)
......
...@@ -23,15 +23,16 @@ __dir__ = os.path.dirname(os.path.abspath(__file__)) ...@@ -23,15 +23,16 @@ __dir__ = os.path.dirname(os.path.abspath(__file__))
sys.path.append(__dir__) sys.path.append(__dir__)
sys.path.append(os.path.abspath(os.path.join(__dir__, '..'))) sys.path.append(os.path.abspath(os.path.join(__dir__, '..')))
import program
from ppcls.utils import logger
from ppcls.utils.save_load import init_model, save_model
from ppcls.utils.config import get_config
from ppcls.data import Reader
import paddle import paddle
from paddle.distributed import ParallelEnv from paddle.distributed import ParallelEnv
from ppcls.data import Reader
from ppcls.utils.config import get_config
from ppcls.utils.save_load import init_model, save_model
from ppcls.utils import logger
import program
def parse_args(): def parse_args():
parser = argparse.ArgumentParser("PaddleClas train script") parser = argparse.ArgumentParser("PaddleClas train script")
parser.add_argument( parser.add_argument(
...@@ -67,7 +68,7 @@ def main(args): ...@@ -67,7 +68,7 @@ def main(args):
net = program.create_model(config.ARCHITECTURE, config.classes_num) net = program.create_model(config.ARCHITECTURE, config.classes_num)
optimizer = program.create_optimizer( optimizer, lr_scheduler = program.create_optimizer(
config, parameter_list=net.parameters()) config, parameter_list=net.parameters())
if config["use_data_parallel"]: if config["use_data_parallel"]:
...@@ -90,8 +91,8 @@ def main(args): ...@@ -90,8 +91,8 @@ def main(args):
for epoch_id in range(config.epochs): for epoch_id in range(config.epochs):
net.train() net.train()
# 1. train with train dataset # 1. train with train dataset
program.run(train_dataloader, config, net, optimizer, epoch_id, program.run(train_dataloader, config, net, optimizer, lr_scheduler,
'train') epoch_id, 'train')
if not config["use_data_parallel"] or ParallelEnv().local_rank == 0: if not config["use_data_parallel"] or ParallelEnv().local_rank == 0:
# 2. validate with validate dataset # 2. validate with validate dataset
......
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册