diff --git a/demo/DML/README.md b/demo/DML/README.md new file mode 100755 index 0000000000000000000000000000000000000000..fab0faafca2bfcca2931882a7e9a75a3370e82e5 --- /dev/null +++ b/demo/DML/README.md @@ -0,0 +1,30 @@ +# 深度互学习DML(Deep Mutual Learning) +本示例介绍如何使用PaddleSlim的深度互学习DML方法训练模型,算法原理请参考论文 [Deep Mutual Learning](https://arxiv.org/abs/1706.00384) + +## 使用数据 +示例中使用cifar100数据集进行训练, 您可以在启动训练时等待自动下载, +也可以在自行下载[数据集](https://www.cs.toronto.edu/~kriz/cifar-100-python.tar.gz)之后,放在当前目录的`./dataset/cifar100`路径下 + +## 启动命令 + +单卡训练, 以0号GPU为例: +```bash +CUDA_VISIBLE_DEVICES=0 python dml_train.py +``` + +多卡训练, 以0-3号GPU为例: +```bash +python -m paddle.distributed.launch --selected_gpus=0,1,2,3 --log_dir ./mylog dml_train.py --use_parallel=True +``` + +## 实验结果 + +以下实验结果可以由默认实验配置(学习率、优化器等)训练得到,仅调整了DML训练的模型组合 + +如果想进一步提升实验结果可以尝试[更多优化tricks](https://arxiv.org/abs/1812.01187), 或进一步增加一次DML训练的模型数量。 + +| 数据集 | 网络模型 | 单独训练准确率 | 深度互学习准确率 | +| ------ | ------ | ------ | ------ | +| CIFAR100 | MobileNet X 2 | 73.65% | 76.34% (+2.69%) | +| CIFAR100 | MobileNet X 4 | 73.65% | 76.56% (+2.91%) | +| CIFAR100 | MobileNet + ResNet50 | 73.65%/76.52% | 76.00%/77.80% (+2.35%/+1.28%) | diff --git a/demo/DML/cifar100_reader.py b/demo/DML/cifar100_reader.py new file mode 100755 index 0000000000000000000000000000000000000000..325ed9f0724ba3ef2ccf39e29665f8008861165f --- /dev/null +++ b/demo/DML/cifar100_reader.py @@ -0,0 +1,125 @@ +# Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +from __future__ import absolute_import +from __future__ import division +from __future__ import print_function + +from PIL import Image +from PIL import ImageOps +import os +import math +import random +import tarfile +import functools +import numpy as np +from PIL import Image, ImageEnhance +import paddle +# for python2/python3 compatiablity +try: + import cPickle +except: + import _pickle as cPickle + +IMAGE_SIZE = 32 +IMAGE_DEPTH = 3 +CIFAR_MEAN = [0.5070751592371323, 0.48654887331495095, 0.4409178433670343] +CIFAR_STD = [0.2673342858792401, 0.2564384629170883, 0.27615047132568404] + +URL_PREFIX = 'https://www.cs.toronto.edu/~kriz/' +CIFAR100_URL = URL_PREFIX + 'cifar-100-python.tar.gz' +CIFAR100_MD5 = 'eb9058c3a382ffc7106e4002c42a8d85' +paddle.dataset.common.DATA_HOME = "dataset/" + + +def preprocess(sample, is_training): + image_array = sample.reshape(IMAGE_DEPTH, IMAGE_SIZE, IMAGE_SIZE) + rgb_array = np.transpose(image_array, (1, 2, 0)) + img = Image.fromarray(rgb_array, 'RGB') + + if is_training: + # pad, ramdom crop, random_flip_left_right, random_rotation + img = ImageOps.expand(img, (4, 4, 4, 4), fill=0) + left_top = np.random.randint(8, size=2) + img = img.crop((left_top[1], left_top[0], left_top[1] + IMAGE_SIZE, + left_top[0] + IMAGE_SIZE)) + if np.random.randint(2): + img = img.transpose(Image.FLIP_LEFT_RIGHT) + random_angle = np.random.randint(-15, 15) + img = img.rotate(random_angle, Image.NEAREST) + img = np.array(img).astype(np.float32) + + img_float = img / 255.0 + img = (img_float - CIFAR_MEAN) / CIFAR_STD + + img = np.transpose(img, (2, 0, 1)) + return img + + +def reader_generator(datasets, batch_size, is_training, is_shuffle): + def read_batch(datasets): + if is_shuffle: + random.shuffle(datasets) + for im, label in datasets: + im = preprocess(im, is_training) + yield im, [int(label)] + + def reader(): + batch_data = [] + batch_label = [] + for data in read_batch(datasets): + batch_data.append(data[0]) + batch_label.append(data[1]) + if len(batch_data) == batch_size: + batch_data = np.array(batch_data, dtype='float32') + batch_label = np.array(batch_label, dtype='int64') + batch_out = [batch_data, batch_label] + yield batch_out + batch_data = [] + batch_label = [] + + return reader + + +def cifar100_reader(file_name, data_name, is_shuffle): + with tarfile.open(file_name, mode='r') as f: + names = [ + each_item.name for each_item in f if data_name in each_item.name + ] + names.sort() + datasets = [] + for name in names: + print("Reading file " + name) + try: + batch = cPickle.load( + f.extractfile(name), encoding='iso-8859-1') + except: + batch = cPickle.load(f.extractfile(name)) + data = batch['data'] + labels = batch.get('labels', batch.get('fine_labels', None)) + assert labels is not None + dataset = zip(data, labels) + datasets.extend(dataset) + if is_shuffle: + random.shuffle(datasets) + return datasets + + +def train_valid(batch_size, is_train, is_shuffle): + name = 'train' if is_train else 'test' + datasets = cifar100_reader( + paddle.dataset.common.download(CIFAR100_URL, 'cifar', CIFAR100_MD5), + name, is_shuffle) + reader = reader_generator(datasets, batch_size, is_train, is_shuffle) + return reader diff --git a/demo/DML/dml_train.py b/demo/DML/dml_train.py new file mode 100755 index 0000000000000000000000000000000000000000..cbe7ff42a5e53c61d37ac102b0a646e7825c9bda --- /dev/null +++ b/demo/DML/dml_train.py @@ -0,0 +1,207 @@ +# Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +from __future__ import absolute_import +from __future__ import division +from __future__ import print_function + +import os +import sys +import argparse +import functools +import logging +import paddle.fluid as fluid +from paddle.fluid.dygraph.base import to_variable +from paddleslim.common import AvgrageMeter, get_logger +from paddleslim.dist import DML +from paddleslim.models.dygraph import MobileNetV1 +import cifar100_reader as reader +sys.path[0] = os.path.join(os.path.dirname("__file__"), os.path.pardir) +from utility import add_arguments, print_arguments + +logger = get_logger(__name__, level=logging.INFO) + +parser = argparse.ArgumentParser(description=__doc__) +add_arg = functools.partial(add_arguments, argparser=parser) + +# yapf: disable +add_arg('log_freq', int, 100, "Log frequency.") +add_arg('batch_size', int, 256, "Minibatch size.") +add_arg('init_lr', float, 0.1, "The start learning rate.") +add_arg('use_gpu', bool, True, "Whether use GPU.") +add_arg('epochs', int, 200, "Epoch number.") +add_arg('class_num', int, 100, "Class number of dataset.") +add_arg('trainset_num', int, 50000, "Images number of trainset.") +add_arg('model_save_dir', str, 'saved_models', "The path to save model.") +add_arg('use_multiprocess', bool, True, "Whether use multiprocess reader.") +add_arg('use_parallel', bool, False, "Whether to use data parallel mode to train the model.") +# yapf: enable + + +def create_optimizer(models, args): + device_num = fluid.dygraph.parallel.Env().nranks + step = int(args.trainset_num / (args.batch_size * device_num)) + epochs = [60, 120, 180] + bd = [step * e for e in epochs] + lr = [args.init_lr * (0.1**i) for i in range(len(bd) + 1)] + + optimizers = [] + for cur_model in models: + learning_rate = fluid.dygraph.PiecewiseDecay(bd, lr, 0) + opt = fluid.optimizer.MomentumOptimizer( + learning_rate, + 0.9, + parameter_list=cur_model.parameters(), + use_nesterov=True, + regularization=fluid.regularizer.L2DecayRegularizer(5e-4)) + optimizers.append(opt) + return optimizers + + +def create_reader(place, args): + train_reader = reader.train_valid( + batch_size=args.batch_size, is_train=True, is_shuffle=True) + valid_reader = reader.train_valid( + batch_size=args.batch_size, is_train=False, is_shuffle=False) + if args.use_parallel: + train_reader = fluid.contrib.reader.distributed_batch_reader( + train_reader) + train_loader = fluid.io.DataLoader.from_generator( + capacity=1024, + return_list=True, + use_multiprocess=args.use_multiprocess) + valid_loader = fluid.io.DataLoader.from_generator( + capacity=1024, + return_list=True, + use_multiprocess=args.use_multiprocess) + train_loader.set_batch_generator(train_reader, places=place) + valid_loader.set_batch_generator(valid_reader, places=place) + return train_loader, valid_loader + + +def train(train_loader, dml_model, dml_optimizer, args): + dml_model.train() + costs = [AvgrageMeter() for i in range(dml_model.model_num)] + accs = [AvgrageMeter() for i in range(dml_model.model_num)] + for step_id, (images, labels) in enumerate(train_loader): + images, labels = to_variable(images), to_variable(labels) + batch_size = images.shape[0] + + logits = dml_model.forward(images) + precs = [ + fluid.layers.accuracy( + input=l, label=labels, k=1) for l in logits + ] + losses = dml_model.loss(logits, labels) + dml_optimizer.minimize(losses) + + for i in range(dml_model.model_num): + accs[i].update(precs[i].numpy(), batch_size) + costs[i].update(losses[i].numpy(), batch_size) + model_names = dml_model.full_name() + if step_id % args.log_freq == 0: + log_msg = "Train Step {}".format(step_id) + for model_id, (cost, acc) in enumerate(zip(costs, accs)): + log_msg += ", {} loss: {:.6f} acc: {:.6f}".format( + model_names[model_id], cost.avg[0], acc.avg[0]) + logger.info(log_msg) + return costs, accs + + +def valid(valid_loader, dml_model, args): + dml_model.eval() + costs = [AvgrageMeter() for i in range(dml_model.model_num)] + accs = [AvgrageMeter() for i in range(dml_model.model_num)] + for step_id, (images, labels) in enumerate(valid_loader): + images, labels = to_variable(images), to_variable(labels) + batch_size = images.shape[0] + + logits = dml_model.forward(images) + precs = [ + fluid.layers.accuracy( + input=l, label=labels, k=1) for l in logits + ] + losses = dml_model.loss(logits, labels) + + for i in range(dml_model.model_num): + accs[i].update(precs[i].numpy(), batch_size) + costs[i].update(losses[i].numpy(), batch_size) + model_names = dml_model.full_name() + if step_id % args.log_freq == 0: + log_msg = "Valid Step{} ".format(step_id) + for model_id, (cost, acc) in enumerate(zip(costs, accs)): + log_msg += ", {} loss: {:.6f} acc: {:.6f}".format( + model_names[model_id], cost.avg[0], acc.avg[0]) + logger.info(log_msg) + return costs, accs + + +def main(args): + if not args.use_gpu: + place = fluid.CPUPlace() + elif not args.use_parallel: + place = fluid.CUDAPlace(0) + else: + place = fluid.CUDAPlace(fluid.dygraph.parallel.Env().dev_id) + + with fluid.dygraph.guard(place): + # 1. Define data reader + train_loader, valid_loader = create_reader(place, args) + + # 2. Define neural network + models = [ + MobileNetV1(class_dim=args.class_num), + MobileNetV1(class_dim=args.class_num) + ] + optimizers = create_optimizer(models, args) + + # 3. Use PaddleSlim DML strategy + dml_model = DML(models, args.use_parallel) + dml_optimizer = dml_model.opt(optimizers) + + # 4. Train your network + save_parameters = (not args.use_parallel) or ( + args.use_parallel and fluid.dygraph.parallel.Env().local_rank == 0) + best_valid_acc = [0] * dml_model.model_num + for epoch_id in range(args.epochs): + current_step_lr = dml_optimizer.get_lr() + lr_msg = "Epoch {}".format(epoch_id) + for model_id, lr in enumerate(current_step_lr): + lr_msg += ", {} lr: {:.6f}".format( + dml_model.full_name()[model_id], lr) + logger.info(lr_msg) + train_losses, train_accs = train(train_loader, dml_model, + dml_optimizer, args) + valid_losses, valid_accs = valid(valid_loader, dml_model, args) + for i in range(dml_model.model_num): + if valid_accs[i].avg[0] > best_valid_acc[i]: + best_valid_acc[i] = valid_accs[i].avg[0] + if save_parameters: + fluid.save_dygraph( + models[i].state_dict(), + os.path.join(args.model_save_dir, + dml_model.full_name()[i], + "best_model")) + summery_msg = "Epoch {} {}: valid_loss {:.6f}, valid_acc {:.6f}, best_valid_acc {:.6f}" + logger.info( + summery_msg.format(epoch_id, + dml_model.full_name()[i], valid_losses[ + i].avg[0], valid_accs[i].avg[0], + best_valid_acc[i])) + + +if __name__ == '__main__': + args = parser.parse_args() + print_arguments(args) + main(args) diff --git a/paddleslim/dist/__init__.py b/paddleslim/dist/__init__.py old mode 100644 new mode 100755 index bdc208fa3a7e930d76fa27d8c09e2ed343e978ba..04b8ef113d70f54b664ed518e71cf0667d599831 --- a/paddleslim/dist/__init__.py +++ b/paddleslim/dist/__init__.py @@ -13,3 +13,4 @@ # limitations under the License. from .single_distiller import merge, fsp_loss, l2_loss, soft_label_loss, loss +from .dml import DML diff --git a/paddleslim/dist/dml.py b/paddleslim/dist/dml.py new file mode 100755 index 0000000000000000000000000000000000000000..34ae1deb2b5b4060e6ce19e63c5d26a417170375 --- /dev/null +++ b/paddleslim/dist/dml.py @@ -0,0 +1,126 @@ +# Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +from __future__ import absolute_import +from __future__ import division +from __future__ import print_function + +import copy +import paddle.fluid as fluid +import paddle.nn.functional as F + + +class DML(fluid.dygraph.Layer): + def __init__(self, model, use_parallel): + super(DML, self).__init__() + self.model = model + self.use_parallel = use_parallel + self.model_num = len(self.model) + if self.use_parallel: + strategy = fluid.dygraph.parallel.prepare_context() + self.model = [ + fluid.dygraph.parallel.DataParallel(m, strategy) + for m in self.model + ] + + def full_name(self): + return [m.full_name() for m in self.model] + + def forward(self, input): + return [m(input) for m in self.model] + + def opt(self, optimizer): + assert len( + optimizer + ) == self.model_num, "The number of optimizers must match the number of models" + optimizer = DMLOptimizers(self.model, optimizer, self.use_parallel) + return optimizer + + def ce_loss(self, logits, labels): + assert len( + logits + ) == self.model_num, "The number of logits must match the number of models" + ce_losses = [] + for i in range(self.model_num): + ce_losses.append( + fluid.layers.mean( + fluid.layers.softmax_with_cross_entropy(logits[i], + labels))) + return ce_losses + + def kl_loss(self, logits): + assert len( + logits + ) == self.model_num, "The number of logits must match the number of models" + if self.model_num == 1: + return [] + kl_losses = [] + for i in range(self.model_num): + cur_kl_loss = 0 + for j in range(self.model_num): + if i != j: + x = F.log_softmax(logits[i], axis=1) + y = fluid.layers.softmax(logits[j], axis=1) + cur_kl_loss += fluid.layers.kldiv_loss( + x, y, reduction='batchmean') + kl_losses.append(cur_kl_loss / (self.model_num - 1)) + return kl_losses + + def loss(self, logits, labels): + gt_losses = self.ce_loss(logits, labels) + kl_losses = self.kl_loss(logits) + if self.model_num > 1: + return [a + b for a, b in zip(gt_losses, kl_losses)] + else: + return gt_losses + + def acc(self, logits, labels, k): + accs = [ + fluid.layers.accuracy( + input=l, label=labels, k=k) for l in logits + ] + return accs + + def train(self): + for m in self.model: + m.train() + + def eval(self): + for m in self.model: + m.eval() + + +class DMLOptimizers(object): + def __init__(self, model, optimizer, use_parallel): + self.model = model + self.optimizer = optimizer + self.use_parallel = use_parallel + + def minimize(self, losses): + assert len(losses) == len( + self.optimizer + ), "The number of losses must match the number of optimizers" + for i in range(len(losses)): + if self.use_parallel: + losses[i] = self.model[i].scale_loss(losses[i]) + losses[i].backward() + self.model[i].apply_collective_grads() + else: + losses[i].backward() + self.optimizer[i].minimize(losses[i]) + self.model[i].clear_gradients() + + def get_lr(self): + current_step_lr = [opt.current_step_lr() for opt in self.optimizer] + return current_step_lr diff --git a/paddleslim/models/dygraph/__init__.py b/paddleslim/models/dygraph/__init__.py new file mode 100644 index 0000000000000000000000000000000000000000..d618ee708b3c9d594dd5e6b02d9ee75504452c38 --- /dev/null +++ b/paddleslim/models/dygraph/__init__.py @@ -0,0 +1,19 @@ +# Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License" +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +from __future__ import absolute_import +from .mobilenet import MobileNetV1 +from .resnet import ResNet + +__all__ = ["MobileNetV1", "ResNet"] diff --git a/paddleslim/models/dygraph/mobilenet.py b/paddleslim/models/dygraph/mobilenet.py new file mode 100755 index 0000000000000000000000000000000000000000..16f0aef39ef59697f71b50bde15eb5eb3778e522 --- /dev/null +++ b/paddleslim/models/dygraph/mobilenet.py @@ -0,0 +1,238 @@ +# Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +#order: standard library, third party, local library +import os +import time +import sys +import math +import numpy as np +import argparse +import paddle +import paddle.fluid as fluid +from paddle.fluid.initializer import MSRA +from paddle.fluid.param_attr import ParamAttr +from paddle.fluid.layer_helper import LayerHelper +from paddle.fluid.dygraph.nn import Conv2D, Pool2D, BatchNorm, Linear +from paddle.fluid.dygraph.base import to_variable +from paddle.fluid import framework + + +class ConvBNLayer(fluid.dygraph.Layer): + def __init__(self, + num_channels, + filter_size, + num_filters, + stride, + padding, + channels=None, + num_groups=1, + act='relu', + use_cudnn=True, + name=None): + super(ConvBNLayer, self).__init__() + + self._conv = Conv2D( + num_channels=num_channels, + num_filters=num_filters, + filter_size=filter_size, + stride=stride, + padding=padding, + groups=num_groups, + act=None, + use_cudnn=use_cudnn, + param_attr=ParamAttr( + initializer=MSRA(), name=self.full_name() + "_weights"), + bias_attr=False) + + self._batch_norm = BatchNorm( + num_filters, + act=act, + param_attr=ParamAttr(name=self.full_name() + "_bn" + "_scale"), + bias_attr=ParamAttr(name=self.full_name() + "_bn" + "_offset"), + moving_mean_name=self.full_name() + "_bn" + '_mean', + moving_variance_name=self.full_name() + "_bn" + '_variance') + + def forward(self, inputs): + y = self._conv(inputs) + y = self._batch_norm(y) + return y + + +class DepthwiseSeparable(fluid.dygraph.Layer): + def __init__(self, + num_channels, + num_filters1, + num_filters2, + num_groups, + stride, + scale, + name=None): + super(DepthwiseSeparable, self).__init__() + + self._depthwise_conv = ConvBNLayer( + num_channels=num_channels, + num_filters=int(num_filters1 * scale), + filter_size=3, + stride=stride, + padding=1, + num_groups=int(num_groups * scale), + use_cudnn=False) + + self._pointwise_conv = ConvBNLayer( + num_channels=int(num_filters1 * scale), + filter_size=1, + num_filters=int(num_filters2 * scale), + stride=1, + padding=0) + + def forward(self, inputs): + y = self._depthwise_conv(inputs) + y = self._pointwise_conv(y) + return y + + +class MobileNetV1(fluid.dygraph.Layer): + def __init__(self, scale=1.0, class_dim=100): + super(MobileNetV1, self).__init__() + self.scale = scale + self.dwsl = [] + + self.conv1 = ConvBNLayer( + num_channels=3, + filter_size=3, + channels=3, + num_filters=int(32 * scale), + stride=1, + padding=1) + + dws21 = self.add_sublayer( + sublayer=DepthwiseSeparable( + num_channels=int(32 * scale), + num_filters1=32, + num_filters2=64, + num_groups=32, + stride=1, + scale=scale), + name="conv2_1") + self.dwsl.append(dws21) + + dws22 = self.add_sublayer( + sublayer=DepthwiseSeparable( + num_channels=int(64 * scale), + num_filters1=64, + num_filters2=128, + num_groups=64, + stride=1, + scale=scale), + name="conv2_2") + self.dwsl.append(dws22) + + dws31 = self.add_sublayer( + sublayer=DepthwiseSeparable( + num_channels=int(128 * scale), + num_filters1=128, + num_filters2=128, + num_groups=128, + stride=1, + scale=scale), + name="conv3_1") + self.dwsl.append(dws31) + + dws32 = self.add_sublayer( + sublayer=DepthwiseSeparable( + num_channels=int(128 * scale), + num_filters1=128, + num_filters2=256, + num_groups=128, + stride=2, + scale=scale), + name="conv3_2") + self.dwsl.append(dws32) + + dws41 = self.add_sublayer( + sublayer=DepthwiseSeparable( + num_channels=int(256 * scale), + num_filters1=256, + num_filters2=256, + num_groups=256, + stride=1, + scale=scale), + name="conv4_1") + self.dwsl.append(dws41) + + dws42 = self.add_sublayer( + sublayer=DepthwiseSeparable( + num_channels=int(256 * scale), + num_filters1=256, + num_filters2=512, + num_groups=256, + stride=2, + scale=scale), + name="conv4_2") + self.dwsl.append(dws42) + + for i in range(5): + tmp = self.add_sublayer( + sublayer=DepthwiseSeparable( + num_channels=int(512 * scale), + num_filters1=512, + num_filters2=512, + num_groups=512, + stride=1, + scale=scale), + name="conv5_" + str(i + 1)) + self.dwsl.append(tmp) + + dws56 = self.add_sublayer( + sublayer=DepthwiseSeparable( + num_channels=int(512 * scale), + num_filters1=512, + num_filters2=1024, + num_groups=512, + stride=2, + scale=scale), + name="conv5_6") + self.dwsl.append(dws56) + + dws6 = self.add_sublayer( + sublayer=DepthwiseSeparable( + num_channels=int(1024 * scale), + num_filters1=1024, + num_filters2=1024, + num_groups=1024, + stride=1, + scale=scale), + name="conv6") + self.dwsl.append(dws6) + + self.pool2d_avg = Pool2D(pool_type='avg', global_pooling=True) + + self.out = Linear( + int(1024 * scale), + class_dim, + param_attr=ParamAttr( + initializer=MSRA(), name=self.full_name() + "fc7_weights"), + bias_attr=ParamAttr(name=self.full_name() + "fc7_offset")) + + def forward(self, inputs): + y = self.conv1(inputs) + for dws in self.dwsl: + y = dws(y) + + y = self.pool2d_avg(y) + y = fluid.layers.reshape(y, shape=[-1, 1024]) + y = self.out(y) + + return y diff --git a/paddleslim/models/dygraph/resnet.py b/paddleslim/models/dygraph/resnet.py new file mode 100644 index 0000000000000000000000000000000000000000..a33f6f56decfdba28f93282eb62adbdb185ede4a --- /dev/null +++ b/paddleslim/models/dygraph/resnet.py @@ -0,0 +1,161 @@ +# Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import paddle +import paddle.fluid as fluid +from paddle.fluid.layer_helper import LayerHelper +from paddle.fluid.dygraph.nn import Conv2D, Pool2D, BatchNorm, Linear + + +class ConvBNLayer(fluid.dygraph.Layer): + def __init__(self, + num_channels, + num_filters, + filter_size, + stride=1, + groups=1, + act=None): + super(ConvBNLayer, self).__init__() + + self._conv = Conv2D( + num_channels=num_channels, + num_filters=num_filters, + filter_size=filter_size, + stride=stride, + padding=(filter_size - 1) // 2, + groups=groups, + act=None, + bias_attr=False) + + self._batch_norm = BatchNorm(num_filters, act=act) + + def forward(self, inputs): + y = self._conv(inputs) + y = self._batch_norm(y) + + return y + + +class BottleneckBlock(fluid.dygraph.Layer): + def __init__(self, num_channels, num_filters, stride, shortcut=True): + super(BottleneckBlock, self).__init__() + + self.conv0 = ConvBNLayer( + num_channels=num_channels, + num_filters=num_filters, + filter_size=1, + act='relu') + self.conv1 = ConvBNLayer( + num_channels=num_filters, + num_filters=num_filters, + filter_size=3, + stride=stride, + act='relu') + self.conv2 = ConvBNLayer( + num_channels=num_filters, + num_filters=num_filters * 4, + filter_size=1, + act=None) + + if not shortcut: + self.short = ConvBNLayer( + num_channels=num_channels, + num_filters=num_filters * 4, + filter_size=1, + stride=stride) + + self.shortcut = shortcut + + self._num_channels_out = num_filters * 4 + + def forward(self, inputs): + y = self.conv0(inputs) + conv1 = self.conv1(y) + conv2 = self.conv2(conv1) + + if self.shortcut: + short = inputs + else: + short = self.short(inputs) + + y = fluid.layers.elementwise_add(x=short, y=conv2) + + layer_helper = LayerHelper(self.full_name(), act='relu') + return layer_helper.append_activation(y) + + +class ResNet(fluid.dygraph.Layer): + def __init__(self, layers=50, class_dim=100): + super(ResNet, self).__init__() + + self.layers = layers + supported_layers = [34, 50, 101, 152] + assert layers in supported_layers, \ + "supported layers are {} but input layer is {}".format(supported_layers, layers) + + if layers == 50: + depth = [3, 4, 6, 3] + elif layers == 101: + depth = [3, 4, 23, 3] + elif layers == 152: + depth = [3, 8, 36, 3] + num_channels = [64, 256, 512, 1024] + num_filters = [64, 128, 256, 512] + + self.conv = ConvBNLayer( + num_channels=3, + num_filters=64, + filter_size=7, + stride=1, + act='relu') + self.pool2d_max = Pool2D( + pool_size=3, pool_stride=2, pool_padding=1, pool_type='max') + + self.bottleneck_block_list = [] + for block in range(len(depth)): + shortcut = False + for i in range(depth[block]): + bottleneck_block = self.add_sublayer( + 'bb_%d_%d' % (block, i), + BottleneckBlock( + num_channels=num_channels[block] + if i == 0 else num_filters[block] * 4, + num_filters=num_filters[block], + stride=2 if i == 0 and block != 0 else 1, + shortcut=shortcut)) + self.bottleneck_block_list.append(bottleneck_block) + shortcut = True + + self.pool2d_avg = Pool2D( + pool_size=7, pool_type='avg', global_pooling=True) + + self.pool2d_avg_output = num_filters[len(num_filters) - 1] * 4 * 1 * 1 + + import math + stdv = 1.0 / math.sqrt(2048 * 1.0) + + self.out = Linear( + self.pool2d_avg_output, + class_dim, + param_attr=fluid.param_attr.ParamAttr( + initializer=fluid.initializer.Uniform(-stdv, stdv))) + + def forward(self, inputs): + y = self.conv(inputs) + for bottleneck_block in self.bottleneck_block_list: + y = bottleneck_block(y) + y = self.pool2d_avg(y) + y = fluid.layers.reshape(y, shape=[-1, self.pool2d_avg_output]) + y = self.out(y) + return y