提交 5203c85f 编写于 作者: S shippingwang

Merge branch 'dygraph' of https://github.com/PaddlePaddle/PaddleClas into dygraph

...@@ -2,6 +2,7 @@ mode: 'train' ...@@ -2,6 +2,7 @@ mode: 'train'
ARCHITECTURE: ARCHITECTURE:
name: 'MobileNetV3_large_x1_0' name: 'MobileNetV3_large_x1_0'
pretrained_model: "./pretrained/MobileNetV3_large_x1_0_pretrained" pretrained_model: "./pretrained/MobileNetV3_large_x1_0_pretrained"
load_static_weights: True
model_save_dir: "./output/" model_save_dir: "./output/"
classes_num: 102 classes_num: 102
total_images: 1020 total_images: 1020
......
...@@ -5,6 +5,9 @@ ARCHITECTURE: ...@@ -5,6 +5,9 @@ ARCHITECTURE:
pretrained_model: pretrained_model:
- "./pretrained/flowers102_R50_vd_final/ppcls" - "./pretrained/flowers102_R50_vd_final/ppcls"
- "./pretrained/MobileNetV3_large_x1_0_pretrained/" - "./pretrained/MobileNetV3_large_x1_0_pretrained/"
load_static_weights:
- False
- True
model_save_dir: "./output/" model_save_dir: "./output/"
classes_num: 102 classes_num: 102
total_images: 7169 total_images: 7169
......
mode: 'train' mode: 'train'
ARCHITECTURE: ARCHITECTURE:
name: 'ResNet50_vd' name: 'ResNet50_vd'
checkpoints: ""
pretrained_model: "" pretrained_model: ""
load_static_weights: True
model_save_dir: "./output/" model_save_dir: "./output/"
classes_num: 102 classes_num: 102
total_images: 1020 total_images: 1020
......
...@@ -2,6 +2,7 @@ mode: 'train' ...@@ -2,6 +2,7 @@ mode: 'train'
ARCHITECTURE: ARCHITECTURE:
name: 'ResNet50_vd' name: 'ResNet50_vd'
pretrained_model: "./pretrained/ResNet50_vd_pretrained" pretrained_model: "./pretrained/ResNet50_vd_pretrained"
load_static_weights: true
model_save_dir: "./output/" model_save_dir: "./output/"
classes_num: 102 classes_num: 102
total_images: 1020 total_images: 1020
......
...@@ -4,6 +4,7 @@ ARCHITECTURE: ...@@ -4,6 +4,7 @@ ARCHITECTURE:
params: params:
lr_mult_list: [0.1, 0.1, 0.2, 0.2, 0.3] lr_mult_list: [0.1, 0.1, 0.2, 0.2, 0.3]
pretrained_model: "./pretrained/ResNet50_vd_ssld_pretrained" pretrained_model: "./pretrained/ResNet50_vd_ssld_pretrained"
load_static_weights: True
model_save_dir: "./output/" model_save_dir: "./output/"
classes_num: 102 classes_num: 102
total_images: 1020 total_images: 1020
......
...@@ -4,6 +4,7 @@ ARCHITECTURE: ...@@ -4,6 +4,7 @@ ARCHITECTURE:
params: params:
lr_mult_list: [0.1, 0.1, 0.2, 0.2, 0.3] lr_mult_list: [0.1, 0.1, 0.2, 0.2, 0.3]
pretrained_model: "./pretrained/ResNet50_vd_ssld_pretrained" pretrained_model: "./pretrained/ResNet50_vd_ssld_pretrained"
load_static_weights: True
model_save_dir: "./output/" model_save_dir: "./output/"
classes_num: 102 classes_num: 102
total_images: 1020 total_images: 1020
......
...@@ -28,3 +28,5 @@ from .mobilenet_v1 import MobileNetV1_x0_25, MobileNetV1_x0_5, MobileNetV1_x0_75 ...@@ -28,3 +28,5 @@ from .mobilenet_v1 import MobileNetV1_x0_25, MobileNetV1_x0_5, MobileNetV1_x0_75
from .mobilenet_v2 import MobileNetV2_x0_25, MobileNetV2_x0_5, MobileNetV2_x0_75, MobileNetV2, MobileNetV2_x1_5, MobileNetV2_x2_0 from .mobilenet_v2 import MobileNetV2_x0_25, MobileNetV2_x0_5, MobileNetV2_x0_75, MobileNetV2, MobileNetV2_x1_5, MobileNetV2_x2_0
from .mobilenet_v3 import MobileNetV3_small_x0_35, MobileNetV3_small_x0_5, MobileNetV3_small_x0_75, MobileNetV3_small_x1_0, MobileNetV3_small_x1_25, MobileNetV3_large_x0_35, MobileNetV3_large_x0_5, MobileNetV3_large_x0_75, MobileNetV3_large_x1_0, MobileNetV3_large_x1_25 from .mobilenet_v3 import MobileNetV3_small_x0_35, MobileNetV3_small_x0_5, MobileNetV3_small_x0_75, MobileNetV3_small_x1_0, MobileNetV3_small_x1_25, MobileNetV3_large_x0_35, MobileNetV3_large_x0_5, MobileNetV3_large_x0_75, MobileNetV3_large_x1_0, MobileNetV3_large_x1_25
from .shufflenet_v2 import ShuffleNetV2_x0_25, ShuffleNetV2_x0_33, ShuffleNetV2_x0_5, ShuffleNetV2, ShuffleNetV2_x1_5, ShuffleNetV2_x2_0, ShuffleNetV2_swish from .shufflenet_v2 import ShuffleNetV2_x0_25, ShuffleNetV2_x0_33, ShuffleNetV2_x0_5, ShuffleNetV2, ShuffleNetV2_x1_5, ShuffleNetV2_x2_0, ShuffleNetV2_swish
from .distillation_models import ResNet50_vd_distill_MobileNetV3_large_x1_0
\ No newline at end of file
#copyright (c) 2020 PaddlePaddle Authors. All Rights Reserve. # copyright (c) 2020 PaddlePaddle Authors. All Rights Reserve.
# #
#Licensed under the Apache License, Version 2.0 (the "License"); # Licensed under the Apache License, Version 2.0 (the "License");
#you may not use this file except in compliance with the License. # you may not use this file except in compliance with the License.
#You may obtain a copy of the License at # You may obtain a copy of the License at
# #
# http://www.apache.org/licenses/LICENSE-2.0 # http://www.apache.org/licenses/LICENSE-2.0
# #
#Unless required by applicable law or agreed to in writing, software # Unless required by applicable law or agreed to in writing, software
#distributed under the License is distributed on an "AS IS" BASIS, # distributed under the License is distributed on an "AS IS" BASIS,
#WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
#See the License for the specific language governing permissions and # See the License for the specific language governing permissions and
#limitations under the License. # limitations under the License.
from __future__ import absolute_import from __future__ import absolute_import
from __future__ import division from __future__ import division
...@@ -32,27 +32,35 @@ __all__ = [ ...@@ -32,27 +32,35 @@ __all__ = [
] ]
class ResNet50_vd_distill_MobileNetV3_large_x1_0(): class ResNet50_vd_distill_MobileNetV3_large_x1_0(fluid.dygraph.Layer):
def net(self, input, class_dim=1000): def __init__(self, class_dim=1000, **args):
# student super(ResNet50_vd_distill_MobileNetV3_large_x1_0, self).__init__()
student = MobileNetV3_large_x1_0()
out_student = student.net(input, class_dim=class_dim)
# teacher
teacher = ResNet50_vd()
out_teacher = teacher.net(input, class_dim=class_dim)
out_teacher.stop_gradient = True
return out_teacher, out_student self.teacher = ResNet50_vd(class_dim=class_dim, **args)
self.student = MobileNetV3_large_x1_0(class_dim=class_dim, **args)
class ResNeXt101_32x16d_wsl_distill_ResNet50_vd(): def forward(self, input):
def net(self, input, class_dim=1000): teacher_label = self.teacher(input)
# student teacher_label.stop_gradient = True
student = ResNet50_vd()
out_student = student.net(input, class_dim=class_dim)
# teacher
teacher = ResNeXt101_32x16d_wsl()
out_teacher = teacher.net(input, class_dim=class_dim)
out_teacher.stop_gradient = True
return out_teacher, out_student student_label = self.student(input)
return teacher_label, student_label
class ResNeXt101_32x16d_wsl_distill_ResNet50_vd(fluid.dygraph.Layer):
def __init__(self, class_dim=1000, **args):
super(ResNet50_vd_distill_MobileNetV3_large_x1_0, self).__init__()
self.teacher = ResNeXt101_32x16d_wsl(class_dim=class_dim, **args)
self.student = ResNet50_vd(class_dim=class_dim, **args)
def forward(self, input):
teacher_label = self.teacher(input)
teacher_label.stop_gradient = True
student_label = self.student(input)
return teacher_label, student_label
\ No newline at end of file
...@@ -112,35 +112,19 @@ class CosineWarmup(object): ...@@ -112,35 +112,19 @@ class CosineWarmup(object):
self.lr = lr self.lr = lr
self.step_each_epoch = step_each_epoch self.step_each_epoch = step_each_epoch
self.epochs = epochs self.epochs = epochs
self.warmup_epoch = fluid.layers.fill_constant( self.warmup_epoch = warmup_epoch
shape=[1],
value=float(warmup_epoch),
dtype='float32',
force_cpu=True)
def __call__(self): def __call__(self):
global_step = _decay_step_counter() learning_rate = fluid.layers.cosine_decay(
learning_rate = fluid.layers.tensor.create_global_var( learning_rate=self.lr,
shape=[1], step_each_epoch=self.step_each_epoch,
value=0.0, epochs=self.epochs)
dtype='float32',
persistable=True, learning_rate = fluid.layers.linear_lr_warmup(
name="learning_rate") learning_rate,
epoch = ops.floor(global_step / self.step_each_epoch) warmup_steps=self.warmup_epoch * self.step_each_epoch,
with fluid.layers.control_flow.Switch() as switch: start_lr=0.0,
with switch.case(epoch < self.warmup_epoch): end_lr=self.lr)
decayed_lr = self.lr * \
(global_step / (self.step_each_epoch * self.warmup_epoch))
fluid.layers.tensor.assign(
input=decayed_lr, output=learning_rate)
with switch.default():
current_step = global_step - self.warmup_epoch * self.step_each_epoch
total_step = (
self.epochs - self.warmup_epoch) * self.step_each_epoch
decayed_lr = self.lr * \
(ops.cos(current_step * math.pi / total_step) + 1) / 2
fluid.layers.tensor.assign(
input=decayed_lr, output=learning_rate)
return learning_rate return learning_rate
...@@ -169,37 +153,22 @@ class ExponentialWarmup(object): ...@@ -169,37 +153,22 @@ class ExponentialWarmup(object):
super(ExponentialWarmup, self).__init__() super(ExponentialWarmup, self).__init__()
self.lr = lr self.lr = lr
self.step_each_epoch = step_each_epoch self.step_each_epoch = step_each_epoch
self.decay_epochs = decay_epochs * self.step_each_epoch self.decay_epochs = decay_epochs
self.decay_rate = decay_rate self.decay_rate = decay_rate
self.warmup_epoch = fluid.layers.fill_constant( self.warmup_epoch = warmup_epoch
shape=[1],
value=float(warmup_epoch),
dtype='float32',
force_cpu=True)
def __call__(self): def __call__(self):
global_step = _decay_step_counter() learning_rate = fluid.layers.exponential_decay(
learning_rate = fluid.layers.tensor.create_global_var( learning_rate=self.lr,
shape=[1], decay_steps=self.decay_epochs * self.step_each_epoch,
value=0.0, decay_rate=self.decay_rate,
dtype='float32', staircase=False)
persistable=True,
name="learning_rate") learning_rate = fluid.layers.linear_lr_warmup(
learning_rate,
epoch = ops.floor(global_step / self.step_each_epoch) warmup_steps=self.warmup_epoch * self.step_each_epoch,
with fluid.layers.control_flow.Switch() as switch: start_lr=0.0,
with switch.case(epoch < self.warmup_epoch): end_lr=self.lr)
decayed_lr = self.lr * \
(global_step / (self.step_each_epoch * self.warmup_epoch))
fluid.layers.tensor.assign(
input=decayed_lr, output=learning_rate)
with switch.default():
rest_step = global_step - self.warmup_epoch * self.step_each_epoch
div_res = ops.floor(rest_step / self.decay_epochs)
decayed_lr = self.lr * (self.decay_rate**div_res)
fluid.layers.tensor.assign(
input=decayed_lr, output=learning_rate)
return learning_rate return learning_rate
......
...@@ -31,12 +31,12 @@ def check_version(): ...@@ -31,12 +31,12 @@ def check_version():
Log error and exit when the installed version of paddlepaddle is Log error and exit when the installed version of paddlepaddle is
not satisfied. not satisfied.
""" """
err = "PaddlePaddle version 1.7 or higher is required, " \ err = "PaddlePaddle version 2.0.0 or higher is required, " \
"or a suitable develop version is satisfied as well. \n" \ "or a suitable develop version is satisfied as well. \n" \
"Please make sure the version is good with your code." \ "Please make sure the version is good with your code." \
try: try:
fluid.require_version('1.7.0') fluid.require_version('2.0.0')
except Exception: except Exception:
logger.error(err) logger.error(err)
sys.exit(1) sys.exit(1)
......
...@@ -16,9 +16,6 @@ import logging ...@@ -16,9 +16,6 @@ import logging
import os import os
import datetime import datetime
from imp import reload
reload(logging)
logging.basicConfig( logging.basicConfig(
level=logging.INFO, level=logging.INFO,
format="%(asctime)s %(levelname)s: %(message)s", format="%(asctime)s %(levelname)s: %(message)s",
......
...@@ -26,7 +26,7 @@ import paddle.fluid as fluid ...@@ -26,7 +26,7 @@ import paddle.fluid as fluid
from ppcls.utils import logger from ppcls.utils import logger
__all__ = ['init_model', 'save_model'] __all__ = ['init_model', 'save_model', 'load_dygraph_pretrain']
def _mkdir_if_not_exist(path): def _mkdir_if_not_exist(path):
...@@ -45,71 +45,34 @@ def _mkdir_if_not_exist(path): ...@@ -45,71 +45,34 @@ def _mkdir_if_not_exist(path):
raise OSError('Failed to mkdir {}'.format(path)) raise OSError('Failed to mkdir {}'.format(path))
def _load_state(path): def load_dygraph_pretrain(
if os.path.exists(path + '.pdopt'): model,
# XXX another hack to ignore the optimizer state path=None,
tmp = tempfile.mkdtemp() load_static_weights=False, ):
dst = os.path.join(tmp, os.path.basename(os.path.normpath(path)))
shutil.copy(path + '.pdparams', dst + '.pdparams')
state = fluid.io.load_program_state(dst)
shutil.rmtree(tmp)
else:
state = fluid.io.load_program_state(path)
return state
def load_params(exe, prog, path, ignore_params=None):
"""
Load model from the given path.
Args:
exe (fluid.Executor): The fluid.Executor object.
prog (fluid.Program): load weight to which Program object.
path (string): URL string or loca model path.
ignore_params (list): ignore variable to load when finetuning.
It can be specified by finetune_exclude_pretrained_params
and the usage can refer to the document
docs/advanced_tutorials/TRANSFER_LEARNING.md
"""
if not (os.path.isdir(path) or os.path.exists(path + '.pdparams')): if not (os.path.isdir(path) or os.path.exists(path + '.pdparams')):
raise ValueError("Model pretrain path {} does not " raise ValueError("Model pretrain path {} does not "
"exists.".format(path)) "exists.".format(path))
if load_static_weights:
pre_state_dict = fluid.load_program_state(path)
param_state_dict = {}
model_dict = model.state_dict()
for key in model_dict.keys():
weight_name = model_dict[key].name
if weight_name in pre_state_dict.keys():
print('Load weight: {}, shape: {}'.format(
weight_name, pre_state_dict[weight_name].shape))
param_state_dict[key] = pre_state_dict[weight_name]
else:
param_state_dict[key] = model_dict[key]
model.set_dict(param_state_dict)
return
logger.info( param_state_dict, optim_state_dict = fluid.load_dygraph(path)
logger.coloring('Loading parameters from {}...'.format(path), model.set_dict(param_state_dict)
'HEADER')) return
ignore_set = set()
state = _load_state(path)
# ignore the parameter which mismatch the shape
# between the model and pretrain weight.
all_var_shape = {}
for block in prog.blocks:
for param in block.all_parameters():
all_var_shape[param.name] = param.shape
ignore_set.update([
name for name, shape in all_var_shape.items()
if name in state and shape != state[name].shape
])
if ignore_params:
all_var_names = [var.name for var in prog.list_vars()]
ignore_list = filter(
lambda var: any([re.match(name, var) for name in ignore_params]),
all_var_names)
ignore_set.update(list(ignore_list))
if len(ignore_set) > 0:
for k in ignore_set:
if k in state:
logger.warning(
'variable {} is already excluded automatically'.format(k))
del state[k]
fluid.io.set_program_state(prog, state)
def init_model(config, net, optimizer): def init_model(config, net, optimizer=None):
""" """
load model from checkpoint or pretrained_model load model from checkpoint or pretrained_model
""" """
...@@ -128,16 +91,24 @@ def init_model(config, net, optimizer): ...@@ -128,16 +91,24 @@ def init_model(config, net, optimizer):
return return
pretrained_model = config.get('pretrained_model') pretrained_model = config.get('pretrained_model')
load_static_weights = config.get('load_static_weights', False)
use_distillation = config.get('use_distillation', False)
if pretrained_model: if pretrained_model:
if not isinstance(pretrained_model, list): if not isinstance(pretrained_model, list):
pretrained_model = [pretrained_model] pretrained_model = [pretrained_model]
# TODO: load pretrained_model if not isinstance(load_static_weights, list):
raise NotImplementedError load_static_weights = [load_static_weights] * len(pretrained_model)
for pretrain in pretrained_model: for idx, pretrained in enumerate(pretrained_model):
load_params(exe, program, pretrain) load_static = load_static_weights[idx]
logger.info( model = net
logger.coloring("Finish initing model from {}".format( if use_distillation and not load_static:
pretrained_model), "HEADER")) model = net.teacher
load_dygraph_pretrain(
model, path=pretrained, load_static_weights=load_static)
logger.info(
logger.coloring("Finish initing model from {}".format(
pretrained_model), "HEADER"))
def save_model(net, optimizer, model_path, epoch_id, prefix='ppcls'): def save_model(net, optimizer, model_path, epoch_id, prefix='ppcls'):
......
...@@ -18,6 +18,8 @@ import numpy as np ...@@ -18,6 +18,8 @@ import numpy as np
import paddle.fluid as fluid import paddle.fluid as fluid
from ppcls.modeling import architectures from ppcls.modeling import architectures
from ppcls.utils.save_load import load_dygraph_pretrain
def parse_args(): def parse_args():
def str2bool(v): def str2bool(v):
...@@ -28,9 +30,11 @@ def parse_args(): ...@@ -28,9 +30,11 @@ def parse_args():
parser.add_argument("-m", "--model", type=str) parser.add_argument("-m", "--model", type=str)
parser.add_argument("-p", "--pretrained_model", type=str) parser.add_argument("-p", "--pretrained_model", type=str)
parser.add_argument("--use_gpu", type=str2bool, default=True) parser.add_argument("--use_gpu", type=str2bool, default=True)
parser.add_argument("--load_static_weights", type=str2bool, default=True)
return parser.parse_args() return parser.parse_args()
def create_operators(): def create_operators():
size = 224 size = 224
img_mean = [0.485, 0.456, 0.406] img_mean = [0.485, 0.456, 0.406]
...@@ -66,32 +70,32 @@ def main(): ...@@ -66,32 +70,32 @@ def main():
args = parse_args() args = parse_args()
operators = create_operators() operators = create_operators()
# assign the place # assign the place
gpu_id = fluid.dygraph.parallel.Env().dev_id if args.use_gpu:
place = fluid.CUDAPlace(gpu_id) gpu_id = fluid.dygraph.parallel.Env().dev_id
place = fluid.CUDAPlace(gpu_id)
pre_weights_dict = fluid.load_program_state(args.pretrained_model) else:
place = fluid.CPUPlace()
with fluid.dygraph.guard(place): with fluid.dygraph.guard(place):
net = architectures.__dict__[args.model]() net = architectures.__dict__[args.model]()
data = preprocess(args.image_file, operators) data = preprocess(args.image_file, operators)
data = np.expand_dims(data, axis=0) data = np.expand_dims(data, axis=0)
data = fluid.dygraph.to_variable(data) data = fluid.dygraph.to_variable(data)
dy_weights_dict = net.state_dict() load_dygraph_pretrain(net, args.pretrained_model,
pre_weights_dict_new = {} args.load_static_weights)
for key in dy_weights_dict:
weights_name = dy_weights_dict[key].name
pre_weights_dict_new[key] = pre_weights_dict[weights_name]
net.set_dict(pre_weights_dict_new)
net.eval() net.eval()
outputs = net(data) outputs = net(data)
outputs = fluid.layers.softmax(outputs) outputs = fluid.layers.softmax(outputs)
outputs = outputs.numpy() outputs = outputs.numpy()
probs = postprocess(outputs) probs = postprocess(outputs)
rank = 1 rank = 1
for idx, prob in probs: for idx, prob in probs:
print("top{:d}, class id: {:d}, probability: {:.4f}".format( print("top{:d}, class id: {:d}, probability: {:.4f}".format(rank, idx,
rank, idx, prob)) prob))
rank += 1 rank += 1
return
if __name__ == "__main__": if __name__ == "__main__":
main() main()
...@@ -21,6 +21,7 @@ import time ...@@ -21,6 +21,7 @@ import time
from collections import OrderedDict from collections import OrderedDict
import paddle
import paddle.fluid as fluid import paddle.fluid as fluid
from ppcls.optimizer import LearningRateBuilder from ppcls.optimizer import LearningRateBuilder
...@@ -71,6 +72,8 @@ def create_model(architecture, classes_num): ...@@ -71,6 +72,8 @@ def create_model(architecture, classes_num):
""" """
name = architecture["name"] name = architecture["name"]
params = architecture.get("params", {}) params = architecture.get("params", {})
print(name)
print(params)
return architectures.__dict__[name](class_dim=classes_num, **params) return architectures.__dict__[name](class_dim=classes_num, **params)
...@@ -278,7 +281,7 @@ def mixed_precision_optimizer(config, optimizer): ...@@ -278,7 +281,7 @@ def mixed_precision_optimizer(config, optimizer):
def create_feeds(batch, use_mix): def create_feeds(batch, use_mix):
image = to_variable(batch[0].numpy().astype("float32")) image = batch[0]
if use_mix: if use_mix:
y_a = to_variable(batch[1].numpy().astype("int64").reshape(-1, 1)) y_a = to_variable(batch[1].numpy().astype("int64").reshape(-1, 1))
y_b = to_variable(batch[2].numpy().astype("int64").reshape(-1, 1)) y_b = to_variable(batch[2].numpy().astype("int64").reshape(-1, 1))
......
...@@ -57,13 +57,14 @@ def main(args): ...@@ -57,13 +57,14 @@ def main(args):
with fluid.dygraph.guard(place): with fluid.dygraph.guard(place):
net = program.create_model(config.ARCHITECTURE, config.classes_num) net = program.create_model(config.ARCHITECTURE, config.classes_num)
if config["use_data_parallel"]:
strategy = fluid.dygraph.parallel.prepare_context()
net = fluid.dygraph.parallel.DataParallel(net, strategy)
optimizer = program.create_optimizer( optimizer = program.create_optimizer(
config, parameter_list=net.parameters()) config, parameter_list=net.parameters())
if config["use_data_parallel"]:
strategy = fluid.dygraph.parallel.prepare_context()
net = fluid.dygraph.parallel.DataParallel(net, strategy)
# load model from checkpoint or pretrained model # load model from checkpoint or pretrained model
init_model(config, net, optimizer) init_model(config, net, optimizer)
...@@ -102,7 +103,7 @@ def main(args): ...@@ -102,7 +103,7 @@ def main(args):
config.model_save_dir, config.model_save_dir,
config.ARCHITECTURE["name"]) config.ARCHITECTURE["name"])
save_model(net, optimizer, model_path, save_model(net, optimizer, model_path,
"best_model_in_epoch_" + str(epoch_id)) "best_model")
# 3. save the persistable model # 3. save the persistable model
if epoch_id % config.save_interval == 0: if epoch_id % config.save_interval == 0:
......
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册