diff --git a/ppcls/modeling/architectures/__init__.py b/ppcls/modeling/architectures/__init__.py index db5df4e44920c7ae3f3d39b3e60e4c669773c680..5942b925e1e06ca561ec13a8f522b9fef6eb541e 100644 --- a/ppcls/modeling/architectures/__init__.py +++ b/ppcls/modeling/architectures/__init__.py @@ -13,3 +13,6 @@ # limitations under the License. from .resnet_name import * +from .dpn import DPN68 +from .densenet import DenseNet121 +from .hrnet import HRNet_W18_C \ No newline at end of file diff --git a/ppcls/modeling/architectures/densenet.py b/ppcls/modeling/architectures/densenet.py index 70fd058f024f0517560bb6ccb1a234a40da3d52c..b0950064153bfd08fa68f2132746ae4f95c83f56 100644 --- a/ppcls/modeling/architectures/densenet.py +++ b/ppcls/modeling/architectures/densenet.py @@ -1,3 +1,21 @@ +# copyright (c) 2020 PaddlePaddle Authors. All Rights Reserve. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +from __future__ import absolute_import +from __future__ import division +from __future__ import print_function + import numpy as np import paddle import paddle.fluid as fluid @@ -268,26 +286,26 @@ class DenseNet(fluid.dygraph.Layer): return y -def DenseNet121(): - model = DenseNet(layers=121) +def DenseNet121(**args): + model = DenseNet(layers=121, **args) return model -def DenseNet161(): - model = DenseNet(layers=161) +def DenseNet161(**args): + model = DenseNet(layers=161, **args) return model -def DenseNet169(): - model = DenseNet(layers=169) +def DenseNet169(**args): + model = DenseNet(layers=169, **args) return model -def DenseNet201(): - model = DenseNet(layers=201) +def DenseNet201(**args): + model = DenseNet(layers=201, **args) return model -def DenseNet264(): - model = DenseNet(layers=264) +def DenseNet264(**args): + model = DenseNet(layers=264, **args) return model diff --git a/ppcls/modeling/architectures/dpn.py b/ppcls/modeling/architectures/dpn.py index d4271061de32db04410cddac8e83b738cc8dbd10..44fc4b5aceb149472f28b9e29a6758e304a2a65f 100644 --- a/ppcls/modeling/architectures/dpn.py +++ b/ppcls/modeling/architectures/dpn.py @@ -1,3 +1,21 @@ +# copyright (c) 2020 PaddlePaddle Authors. All Rights Reserve. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +from __future__ import absolute_import +from __future__ import division +from __future__ import print_function + import numpy as np import sys import paddle @@ -386,26 +404,26 @@ class DPN(fluid.dygraph.Layer): return net_arg -def DPN68(): - model = DPN(layers=68) +def DPN68(**args): + model = DPN(layers=68, **args) return model -def DPN92(): - model = DPN(layers=92) +def DPN92(**args): + model = DPN(layers=92, **args) return model -def DPN98(): - model = DPN(layers=98) +def DPN98(**args): + model = DPN(layers=98, **args) return model -def DPN107(): - model = DPN(layers=107) +def DPN107(**args): + model = DPN(layers=107, **args) return model -def DPN131(): - model = DPN(layers=131) +def DPN131(**args): + model = DPN(layers=131, **args) return model diff --git a/ppcls/modeling/architectures/hrnet.py b/ppcls/modeling/architectures/hrnet.py index 467567e1926d448ae5f202935300ebb794ae60fb..64d1f14869b1e991c64502ee987a9611c7febd11 100644 --- a/ppcls/modeling/architectures/hrnet.py +++ b/ppcls/modeling/architectures/hrnet.py @@ -1,3 +1,21 @@ +# copyright (c) 2020 PaddlePaddle Authors. All Rights Reserve. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +from __future__ import absolute_import +from __future__ import division +from __future__ import print_function + import numpy as np import paddle import paddle.fluid as fluid @@ -647,81 +665,81 @@ class HRNet(fluid.dygraph.Layer): return y -def HRNet_W18_C(): - model = HRNet(width=18) +def HRNet_W18_C(**args): + model = HRNet(width=18, **args) return model -def HRNet_W30_C(): - model = HRNet(width=30) +def HRNet_W30_C(**args): + model = HRNet(width=30, **args) return model -def HRNet_W32_C(): - model = HRNet(width=32) +def HRNet_W32_C(**args): + model = HRNet(width=32, **args) return model -def HRNet_W40_C(): - model = HRNet(width=40) +def HRNet_W40_C(**args): + model = HRNet(width=40, **args) return model -def HRNet_W44_C(): - model = HRNet(width=44) +def HRNet_W44_C(**args): + model = HRNet(width=44, **args) return model -def HRNet_W48_C(): - model = HRNet(width=48) +def HRNet_W48_C(**args): + model = HRNet(width=48, **args) return model -def HRNet_W60_C(): - model = HRNet(width=60) +def HRNet_W60_C(**args): + model = HRNet(width=60, **args) return model -def HRNet_W64_C(): - model = HRNet(width=64) +def HRNet_W64_C(**args): + model = HRNet(width=64, **args) return model -def SE_HRNet_W18_C(): - model = HRNet(width=18, has_se=True) +def SE_HRNet_W18_C(**args): + model = HRNet(width=18, has_se=True, **args) return model -def SE_HRNet_W30_C(): - model = HRNet(width=30, has_se=True) +def SE_HRNet_W30_C(**args): + model = HRNet(width=30, has_se=True, **args) return model -def SE_HRNet_W32_C(): - model = HRNet(width=32, has_se=True) +def SE_HRNet_W32_C(**args): + model = HRNet(width=32, has_se=True, **args) return model -def SE_HRNet_W40_C(): - model = HRNet(width=40, has_se=True) +def SE_HRNet_W40_C(**args): + model = HRNet(width=40, has_se=True, **args) return model -def SE_HRNet_W44_C(): - model = HRNet(width=44, has_se=True) +def SE_HRNet_W44_C(**args): + model = HRNet(width=44, has_se=True, **args) return model -def SE_HRNet_W48_C(): - model = HRNet(width=48, has_se=True) +def SE_HRNet_W48_C(**args): + model = HRNet(width=48, has_se=True, **args) return model -def SE_HRNet_W60_C(): - model = HRNet(width=60, has_se=True) +def SE_HRNet_W60_C(**args): + model = HRNet(width=60, has_se=True, **args) return model -def SE_HRNet_W64_C(): - model = HRNet(width=64, has_se=True) +def SE_HRNet_W64_C(**args): + model = HRNet(width=64, has_se=True, **args) return model diff --git a/tools/program.py b/tools/program.py index 0a28dbe6bf29587e73918d0d494504b3683366ad..34541043623ea9c4f78387488203f57e7fa8a0c7 100644 --- a/tools/program.py +++ b/tools/program.py @@ -49,11 +49,9 @@ def create_dataloader(): dataloader(fluid dataloader): """ trainer_num = int(os.environ.get('PADDLE_TRAINERS_NUM', 1)) - capacity = 64 if trainer_num <= 1 else 8 + capacity = 64 if trainer_num == 1 else 8 dataloader = fluid.io.DataLoader.from_generator( - capacity=capacity, - use_double_buffer=True, - iterable=True) + capacity=capacity, use_double_buffer=True, iterable=True) return dataloader @@ -76,8 +74,8 @@ def create_model(architecture, classes_num): return architectures.__dict__[name](class_dim=classes_num, **params) -def create_loss(out, - label, +def create_loss(feeds, + out, architecture, classes_num=1000, epsilon=None, @@ -106,7 +104,7 @@ def create_loss(out, if architecture["name"] == "GoogLeNet": assert len(out) == 3, "GoogLeNet should have 3 outputs" loss = GoogLeNetLoss(class_dim=classes_num, epsilon=epsilon) - return loss(out[0], out[1], out[2], label) + return loss(out[0], out[1], out[2], feeds["label"]) if use_distillation: assert len(out) == 2, ("distillation output length must be 2, " @@ -116,14 +114,13 @@ def create_loss(out, if use_mix: loss = MixCELoss(class_dim=classes_num, epsilon=epsilon) - raise NotImplementedError - #feed_y_a = feeds['feed_y_a'] - #feed_y_b = feeds['feed_y_b'] - #feed_lam = feeds['feed_lam'] - #return loss(out, feed_y_a, feed_y_b, feed_lam) + feed_y_a = feeds['y_a'] + feed_y_b = feeds['y_b'] + feed_lam = feeds['lam'] + return loss(out, feed_y_a, feed_y_b, feed_lam) else: loss = CELoss(class_dim=classes_num, epsilon=epsilon) - return loss(out, label) + return loss(out, feeds["label"]) def create_metric(out, @@ -166,14 +163,7 @@ def create_metric(out, return fetchs -def create_fetchs(out, - label, - architecture, - topk=5, - classes_num=1000, - epsilon=None, - use_mix=False, - use_distillation=False): +def create_fetchs(feeds, net, config, mode="train"): """ Create fetchs as model outputs(included loss and measures), will call create_loss and create_metric(if use_mix). @@ -192,12 +182,21 @@ def create_fetchs(out, Returns: fetchs(dict): dict of model outputs(included loss and measures) """ + architecture = config.ARCHITECTURE + topk = config.topk + classes_num = config.classes_num + epsilon = config.get('ls_epsilon') + use_mix = config.get('use_mix') and mode == 'train' + use_distillation = config.get('use_distillation') + + out = net(feeds["image"]) + fetchs = OrderedDict() - fetchs['loss'] = create_loss(out, label, architecture, classes_num, epsilon, use_mix, - use_distillation) + fetchs['loss'] = create_loss(feeds, out, architecture, classes_num, + epsilon, use_mix, use_distillation) if not use_mix: - metric = create_metric(out, label, architecture, topk, classes_num, - use_distillation) + metric = create_metric(out, feeds["label"], architecture, topk, + classes_num, use_distillation) fetchs.update(metric) return fetchs @@ -278,36 +277,17 @@ def mixed_precision_optimizer(config, optimizer): return optimizer -def compute(config, out, label, mode='train'): - """ - Build a program using a model and an optimizer - 1. create feeds - 2. create a dataloader - 3. create a model - 4. create fetchs - 5. create an optimizer - - Args: - config(dict): config - main_prog(): main program - startup_prog(): startup program - is_train(bool): train or valid - - Returns: - dataloader(): a bridge between the model and the data - fetchs(dict): dict of model outputs(included loss and measures) - """ - fetchs = create_fetchs( - out, - label, - config.ARCHITECTURE, - config.topk, - config.classes_num, - epsilon=config.get('ls_epsilon'), - use_mix=config.get('use_mix') and mode == 'train', - use_distillation=config.get('use_distillation')) - - return fetchs +def create_feeds(batch, use_mix): + image = to_variable(batch[0].numpy().astype("float32")) + if use_mix: + y_a = to_variable(batch[1].numpy().astype("int64").reshape(-1, 1)) + y_b = to_variable(batch[2].numpy().astype("int64").reshape(-1, 1)) + lam = to_variable(batch[3].numpy().astype("float32").reshape(-1, 1)) + feeds = {"image": image, "y_a": y_a, "y_b": y_b, "lam": lam} + else: + label = to_variable(batch[1].numpy().astype('int64').reshape(-1, 1)) + feeds = {"image": image, "label": label} + return feeds def run(dataloader, config, net, optimizer=None, epoch=0, mode='train'): @@ -324,19 +304,30 @@ def run(dataloader, config, net, optimizer=None, epoch=0, mode='train'): Returns: """ - topk_name = 'top{}'.format(config.topk) - metric_list = OrderedDict([ - ("loss", AverageMeter('loss', '7.4f')), - ("top1", AverageMeter('top1', '.4f')), - (topk_name, AverageMeter(topk_name, '.4f')), - ("lr", AverageMeter('lr', 'f', need_avg=False)), - ("batch_time", AverageMeter('elapse', '.3f')), - ]) + use_mix = config.get("use_mix", False) and mode == "train" + if use_mix: + metric_list = OrderedDict([ + ("loss", AverageMeter('loss', '7.4f')), + ("lr", AverageMeter( + 'lr', 'f', need_avg=False)), + ("batch_time", AverageMeter('elapse', '.3f')), + ]) + else: + topk_name = 'top{}'.format(config.topk) + metric_list = OrderedDict([ + ("loss", AverageMeter('loss', '7.4f')), + ("top1", AverageMeter('top1', '.4f')), + (topk_name, AverageMeter(topk_name, '.4f')), + ("lr", AverageMeter( + 'lr', 'f', need_avg=False)), + ("batch_time", AverageMeter('elapse', '.3f')), + ]) tic = time.time() - for idx, (img, label) in enumerate(dataloader()): - label = to_variable(label.numpy().astype('int64').reshape(-1, 1)) - fetchs = compute(config, net(img), label, mode) + for idx, batch in enumerate(dataloader()): + batch_size = len(batch[0]) + feeds = create_feeds(batch, use_mix) + fetchs = create_fetchs(feeds, net, config, mode) if mode == 'train': avg_loss = net.scale_loss(fetchs['loss']) avg_loss.backward() @@ -345,10 +336,10 @@ def run(dataloader, config, net, optimizer=None, epoch=0, mode='train'): optimizer.minimize(avg_loss) net.clear_gradients() metric_list['lr'].update( - optimizer._global_learning_rate().numpy()[0], len(img)) + optimizer._global_learning_rate().numpy()[0], batch_size) for name, fetch in fetchs.items(): - metric_list[name].update(fetch.numpy()[0], len(img)) + metric_list[name].update(fetch.numpy()[0], batch_size) metric_list['batch_time'].update(time.time() - tic) tic = time.time() @@ -365,7 +356,8 @@ def run(dataloader, config, net, optimizer=None, epoch=0, mode='train'): logger.coloring(step_str, "PURPLE"), logger.coloring(fetchs_str, 'OKGREEN'))) - end_str = ' '.join([str(m.mean) for m in metric_list.values()] + [metric_list['batch_time'].total]) + end_str = ' '.join([str(m.mean) for m in metric_list.values()] + + [metric_list['batch_time'].total]) if mode == 'eval': logger.info("END {:s} {:s}s".format(mode, end_str)) else: @@ -378,4 +370,4 @@ def run(dataloader, config, net, optimizer=None, epoch=0, mode='train'): # return top1_acc in order to save the best model if mode == 'valid': - return metric_list['top1'].avg \ No newline at end of file + return metric_list['top1'].avg