diff --git a/demo/auto_prune/train.py b/demo/auto_prune/train.py new file mode 100644 index 0000000000000000000000000000000000000000..70930774dc1c4306d12e63fbd1766a67ec2a5c3c --- /dev/null +++ b/demo/auto_prune/train.py @@ -0,0 +1,221 @@ +import os +import sys +import logging +import paddle +import argparse +import functools +import math +import time +import numpy as np +import paddle.fluid as fluid +from paddleslim.prune import AutoPruner +from paddleslim.common import get_logger +from paddleslim.analysis import flops +sys.path.append(sys.path[0] + "/../") +import models +from utility import add_arguments, print_arguments + +_logger = get_logger(__name__, level=logging.INFO) + +parser = argparse.ArgumentParser(description=__doc__) +add_arg = functools.partial(add_arguments, argparser=parser) +# yapf: disable +add_arg('batch_size', int, 64 * 4, "Minibatch size.") +add_arg('use_gpu', bool, True, "Whether to use GPU or not.") +add_arg('model', str, "MobileNet", "The target model.") +add_arg('pretrained_model', str, "../pretrained_model/MobileNetV1_pretained", "Whether to use pretrained model.") +add_arg('lr', float, 0.1, "The learning rate used to fine-tune pruned model.") +add_arg('lr_strategy', str, "piecewise_decay", "The learning rate decay strategy.") +add_arg('l2_decay', float, 3e-5, "The l2_decay parameter.") +add_arg('momentum_rate', float, 0.9, "The value of momentum_rate.") +add_arg('num_epochs', int, 120, "The number of total epochs.") +add_arg('total_images', int, 1281167, "The number of total training images.") +parser.add_argument('--step_epochs', nargs='+', type=int, default=[30, 60, 90], help="piecewise decay step") +add_arg('config_file', str, None, "The config file for compression with yaml format.") +add_arg('data', str, "mnist", "Which data to use. 'mnist' or 'imagenet'") +add_arg('log_period', int, 10, "Log period in batches.") +add_arg('test_period', int, 10, "Test period in epoches.") +# yapf: enable + +model_list = [m for m in dir(models) if "__" not in m] + + +def piecewise_decay(args): + step = int(math.ceil(float(args.total_images) / args.batch_size)) + bd = [step * e for e in args.step_epochs] + lr = [args.lr * (0.1**i) for i in range(len(bd) + 1)] + learning_rate = fluid.layers.piecewise_decay(boundaries=bd, values=lr) + optimizer = fluid.optimizer.Momentum( + learning_rate=learning_rate, + momentum=args.momentum_rate, + regularization=fluid.regularizer.L2Decay(args.l2_decay)) + return optimizer + + +def cosine_decay(args): + step = int(math.ceil(float(args.total_images) / args.batch_size)) + learning_rate = fluid.layers.cosine_decay( + learning_rate=args.lr, step_each_epoch=step, epochs=args.num_epochs) + optimizer = fluid.optimizer.Momentum( + learning_rate=learning_rate, + momentum=args.momentum_rate, + regularization=fluid.regularizer.L2Decay(args.l2_decay)) + return optimizer + + +def create_optimizer(args): + if args.lr_strategy == "piecewise_decay": + return piecewise_decay(args) + elif args.lr_strategy == "cosine_decay": + return cosine_decay(args) + + +def compress(args): + + train_reader = None + test_reader = None + if args.data == "mnist": + import paddle.dataset.mnist as reader + train_reader = reader.train() + val_reader = reader.test() + class_dim = 10 + image_shape = "1,28,28" + elif args.data == "imagenet": + import imagenet_reader as reader + train_reader = reader.train() + val_reader = reader.val() + class_dim = 1000 + image_shape = "3,224,224" + else: + raise ValueError("{} is not supported.".format(args.data)) + + image_shape = [int(m) for m in image_shape.split(",")] + assert args.model in model_list, "{} is not in lists: {}".format( + args.model, model_list) + image = fluid.layers.data(name='image', shape=image_shape, dtype='float32') + label = fluid.layers.data(name='label', shape=[1], dtype='int64') + # model definition + model = models.__dict__[args.model]() + out = model.net(input=image, class_dim=class_dim) + cost = fluid.layers.cross_entropy(input=out, label=label) + avg_cost = fluid.layers.mean(x=cost) + acc_top1 = fluid.layers.accuracy(input=out, label=label, k=1) + acc_top5 = fluid.layers.accuracy(input=out, label=label, k=5) + val_program = fluid.default_main_program().clone(for_test=True) + opt = create_optimizer(args) + opt.minimize(avg_cost) + place = fluid.CUDAPlace(0) if args.use_gpu else fluid.CPUPlace() + exe = fluid.Executor(place) + exe.run(fluid.default_startup_program()) + + if args.pretrained_model: + + def if_exist(var): + return os.path.exists( + os.path.join(args.pretrained_model, var.name)) + + fluid.io.load_vars(exe, args.pretrained_model, predicate=if_exist) + + val_reader = paddle.batch(val_reader, batch_size=args.batch_size) + train_reader = paddle.batch( + train_reader, batch_size=args.batch_size, drop_last=True) + + train_feeder = feeder = fluid.DataFeeder([image, label], place) + val_feeder = feeder = fluid.DataFeeder( + [image, label], place, program=val_program) + + def test(epoch, program): + batch_id = 0 + acc_top1_ns = [] + acc_top5_ns = [] + for data in val_reader(): + start_time = time.time() + acc_top1_n, acc_top5_n = exe.run( + program, + feed=train_feeder.feed(data), + fetch_list=[acc_top1.name, acc_top5.name]) + end_time = time.time() + if batch_id % args.log_period == 0: + _logger.info( + "Eval epoch[{}] batch[{}] - acc_top1: {}; acc_top5: {}; time: {}". + format(epoch, batch_id, + np.mean(acc_top1_n), + np.mean(acc_top5_n), end_time - start_time)) + acc_top1_ns.append(np.mean(acc_top1_n)) + acc_top5_ns.append(np.mean(acc_top5_n)) + batch_id += 1 + + _logger.info("Final eval epoch[{}] - acc_top1: {}; acc_top5: {}". + format(epoch, + np.mean(np.array(acc_top1_ns)), + np.mean(np.array(acc_top5_ns)))) + return np.mean(np.array(acc_top1_ns)) + + def train(epoch, program): + + build_strategy = fluid.BuildStrategy() + exec_strategy = fluid.ExecutionStrategy() + train_program = fluid.compiler.CompiledProgram( + program).with_data_parallel( + loss_name=avg_cost.name, + build_strategy=build_strategy, + exec_strategy=exec_strategy) + + batch_id = 0 + for data in train_reader(): + start_time = time.time() + loss_n, acc_top1_n, acc_top5_n = exe.run( + train_program, + feed=train_feeder.feed(data), + fetch_list=[avg_cost.name, acc_top1.name, acc_top5.name]) + end_time = time.time() + loss_n = np.mean(loss_n) + acc_top1_n = np.mean(acc_top1_n) + acc_top5_n = np.mean(acc_top5_n) + if batch_id % args.log_period == 0: + _logger.info( + "epoch[{}]-batch[{}] - loss: {}; acc_top1: {}; acc_top5: {}; time: {}". + format(epoch, batch_id, loss_n, acc_top1_n, acc_top5_n, + end_time - start_time)) + batch_id += 1 + + params = [] + for param in fluid.default_main_program().global_block().all_parameters(): + if "_sep_weights" in param.name: + params.append(param.name) + + pruner = AutoPruner( + val_program, + fluid.global_scope(), + place, + params=params, + init_ratios=[0.33] * len(params), + pruned_flops=0.5, + pruned_latency=None, + server_addr=("", 0), + init_temperature=100, + reduce_rate=0.85, + max_try_number=300, + max_client_num=10, + search_steps=100, + max_ratios=0.9, + min_ratios=0., + key="auto_pruner") + + while True: + pruned_program, pruned_val_program = pruner.prune( + fluid.default_main_program(), val_program) + for i in range(1): + train(i, pruned_program) + score = test(0, pruned_val_program) + pruner.reward(score) + + +def main(): + args = parser.parse_args() + print_arguments(args) + compress(args) + + +if __name__ == '__main__': + main() diff --git a/demo/imagenet_reader.py b/demo/imagenet_reader.py new file mode 100644 index 0000000000000000000000000000000000000000..25bc756e93db829f3566754e079ba7711074e577 --- /dev/null +++ b/demo/imagenet_reader.py @@ -0,0 +1,194 @@ +import os +import math +import random +import functools +import numpy as np +import paddle +from PIL import Image, ImageEnhance + +random.seed(0) +np.random.seed(0) + +DATA_DIM = 224 + +THREAD = 16 +BUF_SIZE = 10240 + +#DATA_DIR = './data/ILSVRC2012/' +DATA_DIR = './data/' +DATA_DIR = os.path.join(os.path.split(os.path.realpath(__file__))[0], DATA_DIR) + +img_mean = np.array([0.485, 0.456, 0.406]).reshape((3, 1, 1)) +img_std = np.array([0.229, 0.224, 0.225]).reshape((3, 1, 1)) + + +def resize_short(img, target_size): + percent = float(target_size) / min(img.size[0], img.size[1]) + resized_width = int(round(img.size[0] * percent)) + resized_height = int(round(img.size[1] * percent)) + img = img.resize((resized_width, resized_height), Image.LANCZOS) + return img + + +def crop_image(img, target_size, center): + width, height = img.size + size = target_size + if center == True: + w_start = (width - size) / 2 + h_start = (height - size) / 2 + else: + w_start = np.random.randint(0, width - size + 1) + h_start = np.random.randint(0, height - size + 1) + w_end = w_start + size + h_end = h_start + size + img = img.crop((w_start, h_start, w_end, h_end)) + return img + + +def random_crop(img, size, scale=[0.08, 1.0], ratio=[3. / 4., 4. / 3.]): + aspect_ratio = math.sqrt(np.random.uniform(*ratio)) + w = 1. * aspect_ratio + h = 1. / aspect_ratio + + bound = min((float(img.size[0]) / img.size[1]) / (w**2), + (float(img.size[1]) / img.size[0]) / (h**2)) + scale_max = min(scale[1], bound) + scale_min = min(scale[0], bound) + + target_area = img.size[0] * img.size[1] * np.random.uniform(scale_min, + scale_max) + target_size = math.sqrt(target_area) + w = int(target_size * w) + h = int(target_size * h) + + i = np.random.randint(0, img.size[0] - w + 1) + j = np.random.randint(0, img.size[1] - h + 1) + + img = img.crop((i, j, i + w, j + h)) + img = img.resize((size, size), Image.LANCZOS) + return img + + +def rotate_image(img): + angle = np.random.randint(-10, 11) + img = img.rotate(angle) + return img + + +def distort_color(img): + def random_brightness(img, lower=0.5, upper=1.5): + e = np.random.uniform(lower, upper) + return ImageEnhance.Brightness(img).enhance(e) + + def random_contrast(img, lower=0.5, upper=1.5): + e = np.random.uniform(lower, upper) + return ImageEnhance.Contrast(img).enhance(e) + + def random_color(img, lower=0.5, upper=1.5): + e = np.random.uniform(lower, upper) + return ImageEnhance.Color(img).enhance(e) + + ops = [random_brightness, random_contrast, random_color] + np.random.shuffle(ops) + + img = ops[0](img) + img = ops[1](img) + img = ops[2](img) + + return img + + +def process_image(sample, mode, color_jitter, rotate): + img_path = sample[0] + + img = Image.open(img_path) + if mode == 'train': + if rotate: img = rotate_image(img) + img = random_crop(img, DATA_DIM) + else: + img = resize_short(img, target_size=256) + img = crop_image(img, target_size=DATA_DIM, center=True) + if mode == 'train': + if color_jitter: + img = distort_color(img) + if np.random.randint(0, 2) == 1: + img = img.transpose(Image.FLIP_LEFT_RIGHT) + + if img.mode != 'RGB': + img = img.convert('RGB') + + img = np.array(img).astype('float32').transpose((2, 0, 1)) / 255 + img -= img_mean + img /= img_std + + if mode == 'train' or mode == 'val': + return img, sample[1] + elif mode == 'test': + return [img] + + +def _reader_creator(file_list, + mode, + shuffle=False, + color_jitter=False, + rotate=False, + data_dir=DATA_DIR, + batch_size=1): + def reader(): + try: + with open(file_list) as flist: + full_lines = [line.strip() for line in flist] + if shuffle: + np.random.shuffle(full_lines) + if mode == 'train' and os.getenv('PADDLE_TRAINING_ROLE'): + # distributed mode if the env var `PADDLE_TRAINING_ROLE` exits + trainer_id = int(os.getenv("PADDLE_TRAINER_ID", "0")) + trainer_count = int(os.getenv("PADDLE_TRAINERS", "1")) + per_node_lines = len(full_lines) // trainer_count + lines = full_lines[trainer_id * per_node_lines:( + trainer_id + 1) * per_node_lines] + print( + "read images from %d, length: %d, lines length: %d, total: %d" + % (trainer_id * per_node_lines, per_node_lines, + len(lines), len(full_lines))) + else: + lines = full_lines + + for line in lines: + if mode == 'train' or mode == 'val': + img_path, label = line.split() + img_path = os.path.join(data_dir + "/" + mode, + img_path) + yield img_path, int(label) + elif mode == 'test': + img_path = os.path.join(data_dir, line) + yield [img_path] + except Exception as e: + print("Reader failed!\n{}".format(str(e))) + os._exit(1) + + mapper = functools.partial( + process_image, mode=mode, color_jitter=color_jitter, rotate=rotate) + + return paddle.reader.xmap_readers(mapper, reader, THREAD, BUF_SIZE) + + +def train(data_dir=DATA_DIR): + file_list = os.path.join(data_dir, 'train_list.txt') + return _reader_creator( + file_list, + 'train', + shuffle=True, + color_jitter=False, + rotate=False, + data_dir=data_dir) + + +def val(data_dir=DATA_DIR): + file_list = os.path.join(data_dir, 'val_list.txt') + return _reader_creator(file_list, 'val', shuffle=False, data_dir=data_dir) + + +def test(data_dir=DATA_DIR): + file_list = os.path.join(data_dir, 'test_list.txt') + return _reader_creator(file_list, 'test', shuffle=False, data_dir=data_dir) diff --git a/demo/models/__init__.py b/demo/models/__init__.py new file mode 100644 index 0000000000000000000000000000000000000000..e843697407850c049a5427d2b6533c417e59c228 --- /dev/null +++ b/demo/models/__init__.py @@ -0,0 +1,5 @@ +from .mobilenet import MobileNet +from .resnet import ResNet34, ResNet50 +from .mobilenet_v2 import MobileNetV2 + +__all__ = ['MobileNet', 'ResNet34', 'ResNet50', 'MobileNetV2'] diff --git a/demo/models/mobilenet.py b/demo/models/mobilenet.py new file mode 100644 index 0000000000000000000000000000000000000000..921d6226ca2a65d5c9b57e27bf6607c7376c51f6 --- /dev/null +++ b/demo/models/mobilenet.py @@ -0,0 +1,197 @@ +from __future__ import absolute_import +from __future__ import division +from __future__ import print_function +import paddle.fluid as fluid +from paddle.fluid.initializer import MSRA +from paddle.fluid.param_attr import ParamAttr + +__all__ = ['MobileNet'] + +train_parameters = { + "input_size": [3, 224, 224], + "input_mean": [0.485, 0.456, 0.406], + "input_std": [0.229, 0.224, 0.225], + "learning_strategy": { + "name": "piecewise_decay", + "batch_size": 256, + "epochs": [10, 16, 30], + "steps": [0.1, 0.01, 0.001, 0.0001] + } +} + + +class MobileNet(): + def __init__(self): + self.params = train_parameters + + def net(self, input, class_dim=1000, scale=1.0): + # conv1: 112x112 + input = self.conv_bn_layer( + input, + filter_size=3, + channels=3, + num_filters=int(32 * scale), + stride=2, + padding=1, + name="conv1") + + # 56x56 + input = self.depthwise_separable( + input, + num_filters1=32, + num_filters2=64, + num_groups=32, + stride=1, + scale=scale, + name="conv2_1") + + input = self.depthwise_separable( + input, + num_filters1=64, + num_filters2=128, + num_groups=64, + stride=2, + scale=scale, + name="conv2_2") + + # 28x28 + input = self.depthwise_separable( + input, + num_filters1=128, + num_filters2=128, + num_groups=128, + stride=1, + scale=scale, + name="conv3_1") + + input = self.depthwise_separable( + input, + num_filters1=128, + num_filters2=256, + num_groups=128, + stride=2, + scale=scale, + name="conv3_2") + + # 14x14 + input = self.depthwise_separable( + input, + num_filters1=256, + num_filters2=256, + num_groups=256, + stride=1, + scale=scale, + name="conv4_1") + + input = self.depthwise_separable( + input, + num_filters1=256, + num_filters2=512, + num_groups=256, + stride=2, + scale=scale, + name="conv4_2") + + # 14x14 + for i in range(5): + input = self.depthwise_separable( + input, + num_filters1=512, + num_filters2=512, + num_groups=512, + stride=1, + scale=scale, + name="conv5" + "_" + str(i + 1)) + # 7x7 + input = self.depthwise_separable( + input, + num_filters1=512, + num_filters2=1024, + num_groups=512, + stride=2, + scale=scale, + name="conv5_6") + + input = self.depthwise_separable( + input, + num_filters1=1024, + num_filters2=1024, + num_groups=1024, + stride=1, + scale=scale, + name="conv6") + + input = fluid.layers.pool2d( + input=input, + pool_size=0, + pool_stride=1, + pool_type='avg', + global_pooling=True) + + output = fluid.layers.fc(input=input, + size=class_dim, + act='softmax', + param_attr=ParamAttr( + initializer=MSRA(), name="fc7_weights"), + bias_attr=ParamAttr(name="fc7_offset")) + + return output + + def conv_bn_layer(self, + input, + filter_size, + num_filters, + stride, + padding, + channels=None, + num_groups=1, + act='relu', + use_cudnn=True, + name=None): + conv = fluid.layers.conv2d( + input=input, + num_filters=num_filters, + filter_size=filter_size, + stride=stride, + padding=padding, + groups=num_groups, + act=None, + use_cudnn=use_cudnn, + param_attr=ParamAttr( + initializer=MSRA(), name=name + "_weights"), + bias_attr=False) + bn_name = name + "_bn" + return fluid.layers.batch_norm( + input=conv, + act=act, + param_attr=ParamAttr(name=bn_name + "_scale"), + bias_attr=ParamAttr(name=bn_name + "_offset"), + moving_mean_name=bn_name + '_mean', + moving_variance_name=bn_name + '_variance') + + def depthwise_separable(self, + input, + num_filters1, + num_filters2, + num_groups, + stride, + scale, + name=None): + depthwise_conv = self.conv_bn_layer( + input=input, + filter_size=3, + num_filters=int(num_filters1 * scale), + stride=stride, + padding=1, + num_groups=int(num_groups * scale), + use_cudnn=False, + name=name + "_dw") + + pointwise_conv = self.conv_bn_layer( + input=depthwise_conv, + filter_size=1, + num_filters=int(num_filters2 * scale), + stride=1, + padding=0, + name=name + "_sep") + return pointwise_conv diff --git a/demo/models/mobilenet_v2.py b/demo/models/mobilenet_v2.py new file mode 100644 index 0000000000000000000000000000000000000000..ccfb250b79a5365d28470886624287fbc87be50c --- /dev/null +++ b/demo/models/mobilenet_v2.py @@ -0,0 +1,259 @@ +#copyright (c) 2019 PaddlePaddle Authors. All Rights Reserve. +# +#Licensed under the Apache License, Version 2.0 (the "License"); +#you may not use this file except in compliance with the License. +#You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +#Unless required by applicable law or agreed to in writing, software +#distributed under the License is distributed on an "AS IS" BASIS, +#WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +#See the License for the specific language governing permissions and +#limitations under the License. + +from __future__ import absolute_import +from __future__ import division +from __future__ import print_function +import paddle.fluid as fluid +from paddle.fluid.initializer import MSRA +from paddle.fluid.param_attr import ParamAttr + +__all__ = [ + 'MobileNetV2', 'MobileNetV2_x0_25, ' + 'MobileNetV2_x0_5', 'MobileNetV2_x1_0', 'MobileNetV2_x1_5', + 'MobileNetV2_x2_0', 'MobileNetV2_scale' +] + +train_parameters = { + "input_size": [3, 224, 224], + "input_mean": [0.485, 0.456, 0.406], + "input_std": [0.229, 0.224, 0.225], + "learning_strategy": { + "name": "piecewise_decay", + "batch_size": 256, + "epochs": [30, 60, 90], + "steps": [0.1, 0.01, 0.001, 0.0001] + } +} + + +class MobileNetV2(): + def __init__(self, scale=1.0, change_depth=False): + self.params = train_parameters + self.scale = scale + self.change_depth = change_depth + + def net(self, input, class_dim=1000): + scale = self.scale + change_depth = self.change_depth + #if change_depth is True, the new depth is 1.4 times as deep as before. + bottleneck_params_list = [ + (1, 16, 1, 1), + (6, 24, 2, 2), + (6, 32, 3, 2), + (6, 64, 4, 2), + (6, 96, 3, 1), + (6, 160, 3, 2), + (6, 320, 1, 1), + ] if change_depth == False else [ + (1, 16, 1, 1), + (6, 24, 2, 2), + (6, 32, 5, 2), + (6, 64, 7, 2), + (6, 96, 5, 1), + (6, 160, 3, 2), + (6, 320, 1, 1), + ] + + #conv1 + input = self.conv_bn_layer( + input, + num_filters=int(32 * scale), + filter_size=3, + stride=2, + padding=1, + if_act=True, + name='conv1_1') + + # bottleneck sequences + i = 1 + in_c = int(32 * scale) + for layer_setting in bottleneck_params_list: + t, c, n, s = layer_setting + i += 1 + input = self.invresi_blocks( + input=input, + in_c=in_c, + t=t, + c=int(c * scale), + n=n, + s=s, + name='conv' + str(i)) + in_c = int(c * scale) + #last_conv + input = self.conv_bn_layer( + input=input, + num_filters=int(1280 * scale) if scale > 1.0 else 1280, + filter_size=1, + stride=1, + padding=0, + if_act=True, + name='conv9') + + input = fluid.layers.pool2d( + input=input, + pool_size=7, + pool_stride=1, + pool_type='avg', + global_pooling=True) + + output = fluid.layers.fc(input=input, + size=class_dim, + act='softmax', + param_attr=ParamAttr(name='fc10_weights'), + bias_attr=ParamAttr(name='fc10_offset')) + return output + + def conv_bn_layer(self, + input, + filter_size, + num_filters, + stride, + padding, + channels=None, + num_groups=1, + if_act=True, + name=None, + use_cudnn=True): + conv = fluid.layers.conv2d( + input=input, + num_filters=num_filters, + filter_size=filter_size, + stride=stride, + padding=padding, + groups=num_groups, + act=None, + use_cudnn=use_cudnn, + param_attr=ParamAttr(name=name + '_weights'), + bias_attr=False) + bn_name = name + '_bn' + bn = fluid.layers.batch_norm( + input=conv, + param_attr=ParamAttr(name=bn_name + "_scale"), + bias_attr=ParamAttr(name=bn_name + "_offset"), + moving_mean_name=bn_name + '_mean', + moving_variance_name=bn_name + '_variance') + if if_act: + return fluid.layers.relu6(bn) + else: + return bn + + def shortcut(self, input, data_residual): + return fluid.layers.elementwise_add(input, data_residual) + + def inverted_residual_unit(self, + input, + num_in_filter, + num_filters, + ifshortcut, + stride, + filter_size, + padding, + expansion_factor, + name=None): + num_expfilter = int(round(num_in_filter * expansion_factor)) + + channel_expand = self.conv_bn_layer( + input=input, + num_filters=num_expfilter, + filter_size=1, + stride=1, + padding=0, + num_groups=1, + if_act=True, + name=name + '_expand') + + bottleneck_conv = self.conv_bn_layer( + input=channel_expand, + num_filters=num_expfilter, + filter_size=filter_size, + stride=stride, + padding=padding, + num_groups=num_expfilter, + if_act=True, + name=name + '_dwise', + use_cudnn=False) + + linear_out = self.conv_bn_layer( + input=bottleneck_conv, + num_filters=num_filters, + filter_size=1, + stride=1, + padding=0, + num_groups=1, + if_act=False, + name=name + '_linear') + if ifshortcut: + out = self.shortcut(input=input, data_residual=linear_out) + return out + else: + return linear_out + + def invresi_blocks(self, input, in_c, t, c, n, s, name=None): + first_block = self.inverted_residual_unit( + input=input, + num_in_filter=in_c, + num_filters=c, + ifshortcut=False, + stride=s, + filter_size=3, + padding=1, + expansion_factor=t, + name=name + '_1') + + last_residual_block = first_block + last_c = c + + for i in range(1, n): + last_residual_block = self.inverted_residual_unit( + input=last_residual_block, + num_in_filter=last_c, + num_filters=c, + ifshortcut=True, + stride=1, + filter_size=3, + padding=1, + expansion_factor=t, + name=name + '_' + str(i + 1)) + return last_residual_block + + +def MobileNetV2_x0_25(): + model = MobileNetV2(scale=0.25) + return model + + +def MobileNetV2_x0_5(): + model = MobileNetV2(scale=0.5) + return model + + +def MobileNetV2_x1_0(): + model = MobileNetV2(scale=1.0) + return model + + +def MobileNetV2_x1_5(): + model = MobileNetV2(scale=1.5) + return model + + +def MobileNetV2_x2_0(): + model = MobileNetV2(scale=2.0) + return model + + +def MobileNetV2_scale(): + model = MobileNetV2(scale=1.2, change_depth=True) + return model diff --git a/demo/models/resnet.py b/demo/models/resnet.py new file mode 100644 index 0000000000000000000000000000000000000000..4ceaef41ecc87d7388ae05d7fcb199de1841ebc2 --- /dev/null +++ b/demo/models/resnet.py @@ -0,0 +1,229 @@ +from __future__ import absolute_import +from __future__ import division +from __future__ import print_function +import paddle +import paddle.fluid as fluid +import math +from paddle.fluid.param_attr import ParamAttr + +__all__ = ["ResNet", "ResNet34", "ResNet50", "ResNet101", "ResNet152"] + +train_parameters = { + "input_size": [3, 224, 224], + "input_mean": [0.485, 0.456, 0.406], + "input_std": [0.229, 0.224, 0.225], + "learning_strategy": { + "name": "piecewise_decay", + "batch_size": 256, + "epochs": [10, 16, 30], + "steps": [0.1, 0.01, 0.001, 0.0001] + } +} + + +class ResNet(): + def __init__(self, layers=50, prefix_name=''): + self.params = train_parameters + self.layers = layers + self.prefix_name = prefix_name + + def net(self, input, class_dim=1000, conv1_name='conv1', fc_name=None): + layers = self.layers + prefix_name = self.prefix_name if self.prefix_name is '' else self.prefix_name + '_' + supported_layers = [34, 50, 101, 152] + assert layers in supported_layers, \ + "supported layers are {} but input layer is {}".format(supported_layers, layers) + + if layers == 34 or layers == 50: + depth = [3, 4, 6, 3] + elif layers == 101: + depth = [3, 4, 23, 3] + elif layers == 152: + depth = [3, 8, 36, 3] + num_filters = [64, 128, 256, 512] + + # TODO(wanghaoshuang@baidu.com): + # fix name("conv1") conflict between student and teacher in distillation. + conv = self.conv_bn_layer( + input=input, + num_filters=64, + filter_size=7, + stride=2, + act='relu', + name=prefix_name + conv1_name) + conv = fluid.layers.pool2d( + input=conv, + pool_size=3, + pool_stride=2, + pool_padding=1, + pool_type='max') + + if layers >= 50: + for block in range(len(depth)): + for i in range(depth[block]): + if layers in [101, 152] and block == 2: + if i == 0: + conv_name = "res" + str(block + 2) + "a" + else: + conv_name = "res" + str(block + 2) + "b" + str(i) + else: + conv_name = "res" + str(block + 2) + chr(97 + i) + conv_name = prefix_name + conv_name + conv = self.bottleneck_block( + input=conv, + num_filters=num_filters[block], + stride=2 if i == 0 and block != 0 else 1, + name=conv_name) + + pool = fluid.layers.pool2d( + input=conv, pool_size=7, pool_type='avg', global_pooling=True) + stdv = 1.0 / math.sqrt(pool.shape[1] * 1.0) + fc_name = fc_name if fc_name is None else prefix_name + fc_name + out = fluid.layers.fc(input=pool, + size=class_dim, + act='softmax', + name=fc_name, + param_attr=fluid.param_attr.ParamAttr( + initializer=fluid.initializer.Uniform( + -stdv, stdv))) + else: + for block in range(len(depth)): + for i in range(depth[block]): + conv_name = "res" + str(block + 2) + chr(97 + i) + conv_name = prefix_name + conv_name + conv = self.basic_block( + input=conv, + num_filters=num_filters[block], + stride=2 if i == 0 and block != 0 else 1, + is_first=block == i == 0, + name=conv_name) + + pool = fluid.layers.pool2d( + input=conv, pool_type='avg', global_pooling=True) + stdv = 1.0 / math.sqrt(pool.shape[1] * 1.0) + fc_name = fc_name if fc_name is None else prefix_name + fc_name + out = fluid.layers.fc( + input=pool, + size=class_dim, + act='softmax', + name=fc_name, + param_attr=fluid.param_attr.ParamAttr( + initializer=fluid.initializer.Uniform(-stdv, stdv))) + + return out + + def conv_bn_layer(self, + input, + num_filters, + filter_size, + stride=1, + groups=1, + act=None, + name=None): + conv = fluid.layers.conv2d( + input=input, + num_filters=num_filters, + filter_size=filter_size, + stride=stride, + padding=(filter_size - 1) // 2, + groups=groups, + act=None, + param_attr=ParamAttr(name=name + "_weights"), + bias_attr=False, + name=name + '.conv2d.output.1') + if self.prefix_name == '': + if name == "conv1": + bn_name = "bn_" + name + else: + bn_name = "bn" + name[3:] + else: + if name.split("_")[1] == "conv1": + bn_name = name.split("_", 1)[0] + "_bn_" + name.split("_", + 1)[1] + else: + bn_name = name.split("_", 1)[0] + "_bn" + name.split("_", + 1)[1][3:] + return fluid.layers.batch_norm( + input=conv, + act=act, + name=bn_name + '.output.1', + param_attr=ParamAttr(name=bn_name + '_scale'), + bias_attr=ParamAttr(bn_name + '_offset'), + moving_mean_name=bn_name + '_mean', + moving_variance_name=bn_name + '_variance', ) + + def shortcut(self, input, ch_out, stride, is_first, name): + ch_in = input.shape[1] + if ch_in != ch_out or stride != 1 or is_first == True: + return self.conv_bn_layer(input, ch_out, 1, stride, name=name) + else: + return input + + def bottleneck_block(self, input, num_filters, stride, name): + conv0 = self.conv_bn_layer( + input=input, + num_filters=num_filters, + filter_size=1, + act='relu', + name=name + "_branch2a") + conv1 = self.conv_bn_layer( + input=conv0, + num_filters=num_filters, + filter_size=3, + stride=stride, + act='relu', + name=name + "_branch2b") + conv2 = self.conv_bn_layer( + input=conv1, + num_filters=num_filters * 4, + filter_size=1, + act=None, + name=name + "_branch2c") + + short = self.shortcut( + input, + num_filters * 4, + stride, + is_first=False, + name=name + "_branch1") + + return fluid.layers.elementwise_add( + x=short, y=conv2, act='relu', name=name + ".add.output.5") + + def basic_block(self, input, num_filters, stride, is_first, name): + conv0 = self.conv_bn_layer( + input=input, + num_filters=num_filters, + filter_size=3, + act='relu', + stride=stride, + name=name + "_branch2a") + conv1 = self.conv_bn_layer( + input=conv0, + num_filters=num_filters, + filter_size=3, + act=None, + name=name + "_branch2b") + short = self.shortcut( + input, num_filters, stride, is_first, name=name + "_branch1") + return fluid.layers.elementwise_add(x=short, y=conv1, act='relu') + + +def ResNet34(prefix_name=''): + model = ResNet(layers=34, prefix_name=prefix_name) + return model + + +def ResNet50(prefix_name=''): + model = ResNet(layers=50, prefix_name=prefix_name) + return model + + +def ResNet101(): + model = ResNet(layers=101) + return model + + +def ResNet152(): + model = ResNet(layers=152) + return model diff --git a/demo/nas/sa_nas_mobilenetv2_cifar10.py b/demo/nas/sa_nas_mobilenetv2_cifar10.py new file mode 100644 index 0000000000000000000000000000000000000000..3e903960b1c783c38d672238d5a2b3a0c1581c4d --- /dev/null +++ b/demo/nas/sa_nas_mobilenetv2_cifar10.py @@ -0,0 +1,122 @@ +import sys +sys.path.append('..') +import numpy as np +import argparse +import ast +import paddle +import paddle.fluid as fluid +from paddleslim.nas.search_space.search_space_factory import SearchSpaceFactory +from paddleslim.analysis import flops +from paddleslim.nas import SANAS + + +def create_data_loader(): + data = fluid.data(name='data', shape=[-1, 3, 32, 32], dtype='float32') + label = fluid.data(name='label', shape=[-1, 1], dtype='int64') + data_loader = fluid.io.DataLoader.from_generator( + feed_list=[data, label], + capacity=1024, + use_double_buffer=True, + iterable=True) + return data_loader, data, label + + +def init_sa_nas(config): + factory = SearchSpaceFactory() + space = factory.get_search_space(config) + model_arch = space.token2arch()[0] + main_program = fluid.Program() + startup_program = fluid.Program() + + with fluid.program_guard(main_program, startup_program): + data_loader, data, label = create_data_loader() + output = model_arch(data) + cost = fluid.layers.mean( + fluid.layers.softmax_with_cross_entropy( + logits=output, label=label)) + + base_flops = flops(main_program) + search_steps = 10000000 + + ### start a server and a client + sa_nas = SANAS(config, max_flops=base_flops, search_steps=search_steps) + + ### start a client, server_addr is server address + #sa_nas = SANAS(config, max_flops = base_flops, server_addr=("10.255.125.38", 18607), search_steps = search_steps, is_server=False) + + return sa_nas, search_steps + + +def search_mobilenetv2_cifar10(config, args): + sa_nas, search_steps = init_sa_nas(config) + for i in range(search_steps): + print('search step: ', i) + archs = sa_nas.next_archs()[0] + train_program = fluid.Program() + test_program = fluid.Program() + startup_program = fluid.Program() + with fluid.program_guard(train_program, startup_program): + train_loader, data, label = create_data_loader() + output = archs(data) + cost = fluid.layers.mean( + fluid.layers.softmax_with_cross_entropy( + logits=output, label=label))[0] + test_program = train_program.clone(for_test=True) + + optimizer = fluid.optimizer.Momentum( + learning_rate=0.1, + momentum=0.9, + regularization=fluid.regularizer.L2Decay(1e-4)) + optimizer.minimize(cost) + + place = fluid.CUDAPlace(0) if args.use_gpu else fluid.CPUPlace() + exe = fluid.Executor(place) + exe.run(startup_program) + train_reader = paddle.reader.shuffle( + paddle.dataset.cifar.train10(cycle=False), buf_size=1024) + train_loader.set_sample_generator( + train_reader, + batch_size=512, + places=fluid.cuda_places() if args.use_gpu else fluid.cpu_places()) + + test_loader, _, _ = create_data_loader() + test_reader = paddle.dataset.cifar.test10(cycle=False) + test_loader.set_sample_generator( + test_reader, + batch_size=256, + drop_last=False, + places=fluid.cuda_places() if args.use_gpu else fluid.cpu_places()) + + for epoch_id in range(10): + for batch_id, data in enumerate(train_loader()): + loss = exe.run(train_program, + feed=data, + fetch_list=[cost.name])[0] + if batch_id % 5 == 0: + print('epoch: {}, batch: {}, loss: {}'.format( + epoch_id, batch_id, loss[0])) + + for data in test_loader(): + reward = exe.run(test_program, feed=data, + fetch_list=[cost.name])[0] + + print('reward:', reward) + sa_nas.reward(float(reward)) + + +if __name__ == '__main__': + + parser = argparse.ArgumentParser( + description='SA NAS MobileNetV2 cifar10 argparase') + parser.add_argument( + '--use_gpu', + type=ast.literal_eval, + default=True, + help='Whether to use GPU in train/test model.') + args = parser.parse_args() + print(args) + + config_info = {'input_size': 32, 'output_size': 1, 'block_num': 5} + config = [('MobileNetV2Space', config_info)] + + search_mobilenetv2_cifar10(config, args) diff --git a/demo/prune/train.py b/demo/prune/train.py new file mode 100644 index 0000000000000000000000000000000000000000..f7d7f5cd854848e097c625b37d9c73f79d2aa662 --- /dev/null +++ b/demo/prune/train.py @@ -0,0 +1,216 @@ +import os +import sys +import logging +import paddle +import argparse +import functools +import math +import time +import numpy as np +import paddle.fluid as fluid +from paddleslim.prune import Pruner +from paddleslim.common import get_logger +from paddleslim.analysis import flops +sys.path.append(sys.path[0] + "/../") +import models +from utility import add_arguments, print_arguments + +_logger = get_logger(__name__, level=logging.INFO) + +parser = argparse.ArgumentParser(description=__doc__) +add_arg = functools.partial(add_arguments, argparser=parser) +# yapf: disable +add_arg('batch_size', int, 64 * 4, "Minibatch size.") +add_arg('use_gpu', bool, True, "Whether to use GPU or not.") +add_arg('model', str, "MobileNet", "The target model.") +add_arg('pretrained_model', str, "../pretrained_model/MobileNetV1_pretained", "Whether to use pretrained model.") +add_arg('lr', float, 0.1, "The learning rate used to fine-tune pruned model.") +add_arg('lr_strategy', str, "piecewise_decay", "The learning rate decay strategy.") +add_arg('l2_decay', float, 3e-5, "The l2_decay parameter.") +add_arg('momentum_rate', float, 0.9, "The value of momentum_rate.") +add_arg('num_epochs', int, 120, "The number of total epochs.") +add_arg('total_images', int, 1281167, "The number of total training images.") +parser.add_argument('--step_epochs', nargs='+', type=int, default=[30, 60, 90], help="piecewise decay step") +add_arg('config_file', str, None, "The config file for compression with yaml format.") +add_arg('data', str, "mnist", "Which data to use. 'mnist' or 'imagenet'") +add_arg('log_period', int, 10, "Log period in batches.") +add_arg('test_period', int, 10, "Test period in epoches.") +# yapf: enable + +model_list = [m for m in dir(models) if "__" not in m] + + +def piecewise_decay(args): + step = int(math.ceil(float(args.total_images) / args.batch_size)) + bd = [step * e for e in args.step_epochs] + lr = [args.lr * (0.1**i) for i in range(len(bd) + 1)] + learning_rate = fluid.layers.piecewise_decay(boundaries=bd, values=lr) + optimizer = fluid.optimizer.Momentum( + learning_rate=learning_rate, + momentum=args.momentum_rate, + regularization=fluid.regularizer.L2Decay(args.l2_decay)) + return optimizer + + +def cosine_decay(args): + step = int(math.ceil(float(args.total_images) / args.batch_size)) + learning_rate = fluid.layers.cosine_decay( + learning_rate=args.lr, step_each_epoch=step, epochs=args.num_epochs) + optimizer = fluid.optimizer.Momentum( + learning_rate=learning_rate, + momentum=args.momentum_rate, + regularization=fluid.regularizer.L2Decay(args.l2_decay)) + return optimizer + + +def create_optimizer(args): + if args.lr_strategy == "piecewise_decay": + return piecewise_decay(args) + elif args.lr_strategy == "cosine_decay": + return cosine_decay(args) + + +def compress(args): + train_reader = None + test_reader = None + if args.data == "mnist": + import paddle.dataset.mnist as reader + train_reader = reader.train() + val_reader = reader.test() + class_dim = 10 + image_shape = "1,28,28" + elif args.data == "imagenet": + import imagenet_reader as reader + train_reader = reader.train() + val_reader = reader.val() + class_dim = 1000 + image_shape = "3,224,224" + else: + raise ValueError("{} is not supported.".format(args.data)) + image_shape = [int(m) for m in image_shape.split(",")] + assert args.model in model_list, "{} is not in lists: {}".format( + args.model, model_list) + image = fluid.layers.data(name='image', shape=image_shape, dtype='float32') + label = fluid.layers.data(name='label', shape=[1], dtype='int64') + # model definition + model = models.__dict__[args.model]() + out = model.net(input=image, class_dim=class_dim) + cost = fluid.layers.cross_entropy(input=out, label=label) + avg_cost = fluid.layers.mean(x=cost) + acc_top1 = fluid.layers.accuracy(input=out, label=label, k=1) + acc_top5 = fluid.layers.accuracy(input=out, label=label, k=5) + val_program = fluid.default_main_program().clone(for_test=True) + opt = create_optimizer(args) + opt.minimize(avg_cost) + place = fluid.CUDAPlace(0) if args.use_gpu else fluid.CPUPlace() + exe = fluid.Executor(place) + exe.run(fluid.default_startup_program()) + + if args.pretrained_model: + + def if_exist(var): + return os.path.exists( + os.path.join(args.pretrained_model, var.name)) + + fluid.io.load_vars(exe, args.pretrained_model, predicate=if_exist) + + val_reader = paddle.batch(val_reader, batch_size=args.batch_size) + train_reader = paddle.batch( + train_reader, batch_size=args.batch_size, drop_last=True) + + train_feeder = feeder = fluid.DataFeeder([image, label], place) + val_feeder = feeder = fluid.DataFeeder( + [image, label], place, program=val_program) + + def test(epoch, program): + batch_id = 0 + acc_top1_ns = [] + acc_top5_ns = [] + for data in val_reader(): + start_time = time.time() + acc_top1_n, acc_top5_n = exe.run( + program, + feed=train_feeder.feed(data), + fetch_list=[acc_top1.name, acc_top5.name]) + end_time = time.time() + if batch_id % args.log_period == 0: + _logger.info( + "Eval epoch[{}] batch[{}] - acc_top1: {}; acc_top5: {}; time: {}". + format(epoch, batch_id, + np.mean(acc_top1_n), + np.mean(acc_top5_n), end_time - start_time)) + acc_top1_ns.append(np.mean(acc_top1_n)) + acc_top5_ns.append(np.mean(acc_top5_n)) + batch_id += 1 + + _logger.info("Final eval epoch[{}] - acc_top1: {}; acc_top5: {}". + format(epoch, + np.mean(np.array(acc_top1_ns)), + np.mean(np.array(acc_top5_ns)))) + + def train(epoch, program): + + build_strategy = fluid.BuildStrategy() + exec_strategy = fluid.ExecutionStrategy() + train_program = fluid.compiler.CompiledProgram( + program).with_data_parallel( + loss_name=avg_cost.name, + build_strategy=build_strategy, + exec_strategy=exec_strategy) + + batch_id = 0 + for data in train_reader(): + start_time = time.time() + loss_n, acc_top1_n, acc_top5_n = exe.run( + train_program, + feed=train_feeder.feed(data), + fetch_list=[avg_cost.name, acc_top1.name, acc_top5.name]) + end_time = time.time() + loss_n = np.mean(loss_n) + acc_top1_n = np.mean(acc_top1_n) + acc_top5_n = np.mean(acc_top5_n) + if batch_id % args.log_period == 0: + _logger.info( + "epoch[{}]-batch[{}] - loss: {}; acc_top1: {}; acc_top5: {}; time: {}". + format(epoch, batch_id, loss_n, acc_top1_n, acc_top5_n, + end_time - start_time)) + batch_id += 1 + + params = [] + for param in fluid.default_main_program().global_block().all_parameters(): + if "_sep_weights" in param.name: + params.append(param.name) + _logger.info("fops before pruning: {}".format( + flops(fluid.default_main_program()))) + pruner = Pruner() + pruned_val_program = pruner.prune( + val_program, + fluid.global_scope(), + params=params, + ratios=[0.33] * len(params), + place=place, + only_graph=True) + + pruned_program = pruner.prune( + fluid.default_main_program(), + fluid.global_scope(), + params=params, + ratios=[0.33] * len(params), + place=place) + + _logger.info("fops after pruning: {}".format(flops(pruned_program))) + + for i in range(args.num_epochs): + train(i, pruned_program) + if i % args.test_period == 0: + test(i, pruned_val_program) + + +def main(): + args = parser.parse_args() + print_arguments(args) + compress(args) + + +if __name__ == '__main__': + main() diff --git a/demo/utility.py b/demo/utility.py new file mode 100644 index 0000000000000000000000000000000000000000..dd52f69457c9f8d94920b85dc09b58ff8e605a64 --- /dev/null +++ b/demo/utility.py @@ -0,0 +1,156 @@ +"""Contains common utility functions.""" +# Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserve. +# +#Licensed under the Apache License, Version 2.0 (the "License"); +#you may not use this file except in compliance with the License. +#You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +#Unless required by applicable law or agreed to in writing, software +#distributed under the License is distributed on an "AS IS" BASIS, +#WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +#See the License for the specific language governing permissions and +#limitations under the License. + +from __future__ import absolute_import +from __future__ import division +from __future__ import print_function +import distutils.util +import os +import numpy as np +import six +import logging +import paddle.fluid as fluid +import paddle.compat as cpt +from paddle.fluid import core +from paddle.fluid.framework import Program + +logging.basicConfig(format='%(asctime)s-%(levelname)s: %(message)s') +_logger = logging.getLogger(__name__) +_logger.setLevel(logging.INFO) + + +def print_arguments(args): + """Print argparse's arguments. + + Usage: + + .. code-block:: python + + parser = argparse.ArgumentParser() + parser.add_argument("name", default="Jonh", type=str, help="User name.") + args = parser.parse_args() + print_arguments(args) + + :param args: Input argparse.Namespace for printing. + :type args: argparse.Namespace + """ + print("----------- Configuration Arguments -----------") + for arg, value in sorted(six.iteritems(vars(args))): + print("%s: %s" % (arg, value)) + print("------------------------------------------------") + + +def add_arguments(argname, type, default, help, argparser, **kwargs): + """Add argparse's argument. + + Usage: + + .. code-block:: python + + parser = argparse.ArgumentParser() + add_argument("name", str, "Jonh", "User name.", parser) + args = parser.parse_args() + """ + type = distutils.util.strtobool if type == bool else type + argparser.add_argument( + "--" + argname, + default=default, + type=type, + help=help + ' Default: %(default)s.', + **kwargs) + + +def save_persistable_nodes(executor, dirname, graph): + """ + Save persistable nodes to the given directory by the executor. + + Args: + executor(Executor): The executor to run for saving node values. + dirname(str): The directory path. + graph(IrGraph): All the required persistable nodes in the graph will be saved. + """ + persistable_node_names = set() + persistable_nodes = [] + all_persistable_nodes = graph.all_persistable_nodes() + for node in all_persistable_nodes: + name = cpt.to_text(node.name()) + if name not in persistable_node_names: + persistable_node_names.add(name) + persistable_nodes.append(node) + program = Program() + var_list = [] + for node in persistable_nodes: + var_desc = node.var() + if var_desc.type() == core.VarDesc.VarType.RAW or \ + var_desc.type() == core.VarDesc.VarType.READER: + continue + var = program.global_block().create_var( + name=var_desc.name(), + shape=var_desc.shape(), + dtype=var_desc.dtype(), + type=var_desc.type(), + lod_level=var_desc.lod_level(), + persistable=var_desc.persistable()) + var_list.append(var) + fluid.io.save_vars(executor=executor, dirname=dirname, vars=var_list) + + +def load_persistable_nodes(executor, dirname, graph): + """ + Load persistable node values from the given directory by the executor. + + Args: + executor(Executor): The executor to run for loading node values. + dirname(str): The directory path. + graph(IrGraph): All the required persistable nodes in the graph will be loaded. + """ + persistable_node_names = set() + persistable_nodes = [] + all_persistable_nodes = graph.all_persistable_nodes() + for node in all_persistable_nodes: + name = cpt.to_text(node.name()) + if name not in persistable_node_names: + persistable_node_names.add(name) + persistable_nodes.append(node) + program = Program() + var_list = [] + + def _exist(var): + return os.path.exists(os.path.join(dirname, var.name)) + + def _load_var(name, scope): + return np.array(scope.find_var(name).get_tensor()) + + def _store_var(name, array, scope, place): + tensor = scope.find_var(name).get_tensor() + tensor.set(array, place) + + for node in persistable_nodes: + var_desc = node.var() + if var_desc.type() == core.VarDesc.VarType.RAW or \ + var_desc.type() == core.VarDesc.VarType.READER: + continue + var = program.global_block().create_var( + name=var_desc.name(), + shape=var_desc.shape(), + dtype=var_desc.dtype(), + type=var_desc.type(), + lod_level=var_desc.lod_level(), + persistable=var_desc.persistable()) + if _exist(var): + var_list.append(var) + else: + _logger.info("Cannot find the var %s!!!" % (node.name())) + fluid.io.load_vars(executor=executor, dirname=dirname, vars=var_list) diff --git a/paddleslim/analysis/__init__.py b/paddleslim/analysis/__init__.py index 9caa0d24006a3e59f2d39c646d247b7e68480f96..76904c8d548208adb29188f28e9e0c6a0f11f30d 100644 --- a/paddleslim/analysis/__init__.py +++ b/paddleslim/analysis/__init__.py @@ -15,6 +15,9 @@ import flops as flops_module from flops import * import model_size as model_size_module from model_size import * +import sensitive +from sensitive import * __all__ = [] __all__ += flops_module.__all__ __all__ += model_size_module.__all__ +__all__ += sensitive.__all__ diff --git a/paddleslim/analysis/sensitive.py b/paddleslim/analysis/sensitive.py new file mode 100644 index 0000000000000000000000000000000000000000..09dd2a875ae21caf64034cf79421d7cc1661b817 --- /dev/null +++ b/paddleslim/analysis/sensitive.py @@ -0,0 +1,111 @@ +# Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License" +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import sys +import os +import logging +import pickle +import numpy as np +from ..core import GraphWrapper +from ..common import get_logger +from ..prune import Pruner + +_logger = get_logger(__name__, level=logging.INFO) + +__all__ = ["sensitivity"] + + +def sensitivity(program, + scope, + place, + param_names, + eval_func, + sensitivities_file=None, + step_size=0.2): + + graph = GraphWrapper(program) + sensitivities = _load_sensitivities(sensitivities_file) + + for name in param_names: + if name not in sensitivities: + size = graph.var(name).shape()[0] + sensitivities[name] = { + 'pruned_percent': [], + 'loss': [], + 'size': size + } + baseline = None + for name in sensitivities: + ratio = step_size + while ratio < 1: + ratio = round(ratio, 2) + if ratio in sensitivities[name]['pruned_percent']: + _logger.debug('{}, {} has computed.'.format(name, ratio)) + ratio += step_size + continue + if baseline is None: + baseline = eval_func(graph.program, scope) + + param_backup = {} + pruner = Pruner() + pruned_program = pruner.prune( + program=graph.program, + scope=scope, + params=[name], + ratios=[ratio], + place=place, + lazy=True, + only_graph=False, + param_backup=param_backup) + pruned_metric = eval_func(pruned_program, scope) + loss = (baseline - pruned_metric) / baseline + _logger.info("pruned param: {}; {}; loss={}".format(name, ratio, + loss)) + sensitivities[name]['pruned_percent'].append(ratio) + sensitivities[name]['loss'].append(loss) + _save_sensitivities(sensitivities, sensitivities_file) + + # restore pruned parameters + for param_name in param_backup.keys(): + param_t = scope.find_var(param_name).get_tensor() + param_t.set(param_backup[param_name], place) + ratio += step_size + return sensitivities + + +def _load_sensitivities(sensitivities_file): + """ + Load sensitivities from file. + """ + sensitivities = {} + if sensitivities_file and os.path.exists(sensitivities_file): + with open(sensitivities_file, 'rb') as f: + if sys.version_info < (3, 0): + sensitivities = pickle.load(f) + else: + sensitivities = pickle.load(f, encoding='bytes') + + for param in sensitivities: + sensitivities[param]['pruned_percent'] = [ + round(p, 2) for p in sensitivities[param]['pruned_percent'] + ] + return sensitivities + + +def _save_sensitivities(sensitivities, sensitivities_file): + """ + Save sensitivities into file. + """ + with open(sensitivities_file, 'wb') as f: + pickle.dump(sensitivities, f) diff --git a/paddleslim/common/controller_client.py b/paddleslim/common/controller_client.py index 5dcbd7bb64bf4460371d523a0f745e2490a7b3a0..ad989dd16014fa8e6fa1495516e81048324fb826 100644 --- a/paddleslim/common/controller_client.py +++ b/paddleslim/common/controller_client.py @@ -50,9 +50,11 @@ class ControllerClient(object): tokens = ",".join([str(token) for token in tokens]) socket_client.send("{}\t{}\t{}".format(self._key, tokens, reward) .encode()) - tokens = socket_client.recv(1024).decode() - tokens = [int(token) for token in tokens.strip("\n").split(",")] - return tokens + response = socket_client.recv(1024).decode() + if response.strip('\n').split("\t") == "ok": + return True + else: + return False def next_tokens(self): """ diff --git a/paddleslim/common/controller_server.py b/paddleslim/common/controller_server.py index 74b954db3bb1c4520551e82b5e8ba3b9514c549c..e4705a887727bf444b3ba285165d27df59a1ed57 100644 --- a/paddleslim/common/controller_server.py +++ b/paddleslim/common/controller_server.py @@ -117,9 +117,8 @@ class ControllerServer(object): reward = messages[2] tokens = [int(token) for token in tokens.split(",")] self._controller.update(tokens, float(reward)) - tokens = self._controller.next_tokens() - tokens = ",".join([str(token) for token in tokens]) - conn.send(tokens.encode()) + response = "ok" + conn.send(response.encode()) _logger.debug("send message to {}: [{}]".format(addr, tokens)) conn.close() diff --git a/paddleslim/common/log_helper.py b/paddleslim/common/log_helper.py index 1088761e0284181bc485f5ee1824e1cbd9c7eb81..18000ce4ec6c472914de49a053e960c02cfd8e32 100644 --- a/paddleslim/common/log_helper.py +++ b/paddleslim/common/log_helper.py @@ -19,7 +19,7 @@ import logging __all__ = ['get_logger'] -def get_logger(name, level, fmt=None): +def get_logger(name, level, fmt='%(asctime)s-%(levelname)s: %(message)s'): """ Get logger from logging with given name, level and format without setting logging basicConfig. For setting basicConfig in paddle @@ -39,10 +39,10 @@ def get_logger(name, level, fmt=None): logger = logging.getLogger(name) logger.setLevel(level) handler = logging.StreamHandler() - if fmt: formatter = logging.Formatter(fmt=fmt) handler.setFormatter(formatter) logger.addHandler(handler) + logger.propagate = 0 return logger diff --git a/paddleslim/nas/sa_nas.py b/paddleslim/nas/sa_nas.py index ae0714137a684483cf00601adbc3156e2766eda3..6d84df919881fceb8d2a26c0e03c3cbe8a0536aa 100644 --- a/paddleslim/nas/sa_nas.py +++ b/paddleslim/nas/sa_nas.py @@ -111,6 +111,8 @@ class SANAS(object): Return reward of current searched network. Args: score(float): The score of current searched network. + Returns: + bool: True means updating successfully while false means failure. """ - self._controller_client.update(self._current_tokens, score) self._iter += 1 + return self._controller_client.update(self._current_tokens, score) diff --git a/paddleslim/nas/search_space/__init__.py b/paddleslim/nas/search_space/__init__.py index c8bef8db17e4a4cea110a3ef3fd4f3d7edceeedc..51b433d452b8cd8c3eb32582d9caa43634b700d0 100644 --- a/paddleslim/nas/search_space/__init__.py +++ b/paddleslim/nas/search_space/__init__.py @@ -14,6 +14,8 @@ import mobilenetv2 from .mobilenetv2 import * +import mobilenetv1 +from .mobilenetv1 import * import resnet from .resnet import * import search_space_registry @@ -28,4 +30,3 @@ __all__ += mobilenetv2.__all__ __all__ += search_space_registry.__all__ __all__ += search_space_factory.__all__ __all__ += search_space_base.__all__ - diff --git a/paddleslim/nas/search_space/base_layer.py b/paddleslim/nas/search_space/base_layer.py index 2e769ec6339b639732995849e9f819a08b749c92..b497c92a2ca57b4acab0c39c5dbd69d30083e295 100644 --- a/paddleslim/nas/search_space/base_layer.py +++ b/paddleslim/nas/search_space/base_layer.py @@ -20,7 +20,7 @@ def conv_bn_layer(input, filter_size, num_filters, stride, - padding, + padding='SAME', num_groups=1, act=None, name=None, @@ -51,15 +51,10 @@ def conv_bn_layer(input, param_attr=ParamAttr(name=name + '_weights'), bias_attr=False) bn_name = name + '_bn' - bn = fluid.layers.batch_norm( - input=conv, - param_attr=ParamAttr(name=bn_name + '_scale'), - bias_attr=ParamAttr(name=bn_name + '_offset'), - moving_mean_name=bn_name + '_mean', - moving_variance_name=bn_name + '_variance') - if act == 'relu6': - return fluid.layers.relu6(bn) - elif act == 'sigmoid': - return fluid.layers.sigmoid(bn) - else: - return bn + return fluid.layers.batch_norm( + input=conv, + act = act, + param_attr=ParamAttr(name=bn_name + '_scale'), + bias_attr=ParamAttr(name=bn_name + '_offset'), + moving_mean_name=bn_name + '_mean', + moving_variance_name=bn_name + '_variance') diff --git a/paddleslim/nas/search_space/combine_search_space.py b/paddleslim/nas/search_space/combine_search_space.py index 371bcf5347ebbc21e0688d1611ed3b298b940eb1..667720a9110aa92e096a4f8fa30bb3e4b3e3cecb 100644 --- a/paddleslim/nas/search_space/combine_search_space.py +++ b/paddleslim/nas/search_space/combine_search_space.py @@ -25,12 +25,14 @@ from .base_layer import conv_bn_layer __all__ = ["CombineSearchSpace"] + class CombineSearchSpace(object): """ Combine Search Space. Args: configs(list): multi config. """ + def __init__(self, config_lists): self.lens = len(config_lists) self.spaces = [] @@ -50,11 +52,10 @@ class CombineSearchSpace(object): """ cls = SEARCHSPACE.get(key) space = cls(config['input_size'], config['output_size'], - config['block_num']) + config['block_num'], config['block_mask']) return space - def init_tokens(self): """ Combine init tokens. @@ -96,4 +97,3 @@ class CombineSearchSpace(object): model_archs.append(space.token2arch(token)) return model_archs - diff --git a/paddleslim/nas/search_space/mobilenetv1.py b/paddleslim/nas/search_space/mobilenetv1.py new file mode 100644 index 0000000000000000000000000000000000000000..8b3277d2cb1b472ccd5e27407e3099b28e64f42b --- /dev/null +++ b/paddleslim/nas/search_space/mobilenetv1.py @@ -0,0 +1,224 @@ +# Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License" +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +from __future__ import absolute_import +from __future__ import division +from __future__ import print_function + +import numpy as np +import paddle.fluid as fluid +from paddle.fluid.param_attr import ParamAttr +from .search_space_base import SearchSpaceBase +from .base_layer import conv_bn_layer +from .search_space_registry import SEARCHSPACE + +__all__ = ["MobileNetV1Space"] + + +@SEARCHSPACE.register +class MobileNetV1Space(SearchSpaceBase): + def __init__(self, + input_size, + output_size, + block_num, + scale=1.0, + class_dim=1000): + super(MobileNetV1Space, self).__init__(input_size, output_size, + block_num) + self.scale = scale + self.class_dim = class_dim + # self.head_num means the channel of first convolution + self.head_num = np.array([3, 4, 8, 12, 16, 24, 32]) # 7 + # self.filter_num1 ~ self.filtet_num9 means channel of the following convolution + self.filter_num1 = np.array([3, 4, 8, 12, 16, 24, 32, 48]) # 8 + self.filter_num2 = np.array([8, 12, 16, 24, 32, 48, 64, 80]) # 8 + self.filter_num3 = np.array( + [16, 24, 32, 48, 64, 80, 96, 128, 144, 160]) #10 + self.filter_num4 = np.array( + [24, 32, 48, 64, 80, 96, 128, 144, 160, 192]) #10 + self.filter_num5 = np.array( + [32, 48, 64, 80, 96, 128, 144, 160, 192, 224, 256, 320]) #12 + self.filter_num6 = np.array( + [64, 80, 96, 128, 144, 160, 192, 224, 256, 320, 384]) #11 + self.filter_num7 = np.array([ + 64, 80, 96, 128, 144, 160, 192, 224, 256, 320, 384, 512, 1024, 1048 + ]) #14 + self.filter_num8 = np.array( + [128, 144, 160, 192, 224, 256, 320, 384, 512, 576, 640, 704, + 768]) #13 + self.filter_num9 = np.array( + [160, 192, 224, 256, 320, 384, 512, 640, 768, 832, 1024, + 1048]) #12 + # self.k_size means kernel size + self.k_size = np.array([3, 5]) #2 + # self.repeat means repeat_num in forth downsample + self.repeat = np.array([1, 2, 3, 4, 5, 6]) #6 + + assert self.block_num < 6, 'MobileNetV1: block number must less than 6, but receive block number is {}'.format( + self.block_num) + + def init_tokens(self): + """ + The initial token. + The first one is the index of the first layers' channel in self.head_num, + each line in the following represent the index of the [filter_num1, filter_num2, kernel_size] + and depth means repeat times for forth downsample + """ + # yapf: disable + base_init_tokens = [6, # 32 + 6, 6, 0, # 32, 64, 3 + 6, 7, 0, # 64, 128, 3 + 7, 6, 0, # 128, 128, 3 + 6, 10, 0, # 128, 256, 3 + 10, 8, 0, # 256, 256, 3 + 8, 11, 0, # 256, 512, 3 + 4, # depth 5 + 11, 8, 0, # 512, 512, 3 + 8, 10, 0, # 512, 1024, 3 + 10, 10, 0] # 1024, 1024, 3 + # yapf: enable + if self.block_num < 5: + self.token_len = 1 + (self.block_num * 2 - 1) * 3 + else: + self.token_len = 2 + (self.block_num * 2 - 1) * 3 + return base_init_tokens[:self.token_len] + + def range_table(self): + """ + Get range table of current search space, constrains the range of tokens. + """ + # yapf: disable + base_range_table = [len(self.head_num), + len(self.filter_num1), len(self.filter_num2), len(self.k_size), + len(self.filter_num2), len(self.filter_num3), len(self.k_size), + len(self.filter_num3), len(self.filter_num4), len(self.k_size), + len(self.filter_num4), len(self.filter_num5), len(self.k_size), + len(self.filter_num5), len(self.filter_num6), len(self.k_size), + len(self.filter_num6), len(self.filter_num7), len(self.k_size), + len(self.repeat), + len(self.filter_num7), len(self.filter_num8), len(self.k_size), + len(self.filter_num8), len(self.filter_num9), len(self.k_size), + len(self.filter_num9), len(self.filter_num9), len(self.k_size)] + # yapf: enable + return base_range_table[:self.token_len] + + def token2arch(self, tokens=None): + + if tokens is None: + tokens = self.tokens() + + bottleneck_param_list = [] + + if self.block_num >= 1: + # tokens[0] = 32 + # 32, 64 + bottleneck_param_list.append( + (self.filter_num1[tokens[1]], self.filter_num2[tokens[2]], 1, + self.k_size[tokens[3]])) + if self.block_num >= 2: + # 64 128 128 128 + bottleneck_param_list.append( + (self.filter_num2[tokens[4]], self.filter_num3[tokens[5]], 2, + self.k_size[tokens[6]])) + bottleneck_param_list.append( + (self.filter_num3[tokens[7]], self.filter_num4[tokens[8]], 1, + self.k_size[tokens[9]])) + if self.block_num >= 3: + # 128 256 256 256 + bottleneck_param_list.append( + (self.filter_num4[tokens[10]], self.filter_num5[tokens[11]], 2, + self.k_size[tokens[12]])) + bottleneck_param_list.append( + (self.filter_num5[tokens[13]], self.filter_num6[tokens[14]], 1, + self.k_size[tokens[15]])) + if self.block_num >= 4: + # 256 512 (512 512) * 5 + bottleneck_param_list.append( + (self.filter_num6[tokens[16]], self.filter_num7[tokens[17]], 2, + self.k_size[tokens[18]])) + for i in range(self.repeat[tokens[19]]): + bottleneck_param_list.append( + (self.filter_num7[tokens[20]], + self.filter_num8[tokens[21]], 1, self.k_size[tokens[22]])) + if self.block_num >= 5: + # 512 1024 1024 1024 + bottleneck_param_list.append( + (self.filter_num8[tokens[23]], self.filter_num9[tokens[24]], 2, + self.k_size[tokens[25]])) + bottleneck_param_list.append( + (self.filter_num9[tokens[26]], self.filter_num9[tokens[27]], 1, + self.k_size[tokens[28]])) + + def net_arch(input): + input = conv_bn_layer( + input=input, + filter_size=3, + num_filters=self.head_num[tokens[0]], + stride=2, + name='mobilenetv1') + + for i, layer_setting in enumerate(bottleneck_param_list): + filter_num1, filter_num2, stride, kernel_size = layer_setting + input = self._depthwise_separable( + input=input, + num_filters1=filter_num1, + num_filters2=filter_num2, + num_groups=filter_num1, + stride=stride, + scale=self.scale, + kernel_size=kernel_size, + name='mobilenetv1_{}'.format(str(i + 1))) + + if self.output_size == 1: + print('NOTE: if output_size is 1, add fc layer in the end!!!') + input = fluid.layers.fc( + input=input, + size=self.class_dim, + param_attr=ParamAttr(name='mobilenetv2_fc_weights'), + bias_attr=ParamAttr(name='mobilenetv2_fc_offset')) + else: + assert self.output_size == input.shape[2], \ + ("output_size must EQUAL to input_size / (2^block_num)." + "But receive input_size={}, output_size={}, block_num={}".format( + self.input_size, self.output_size, self.block_num)) + + return input + + return net_arch + + def _depthwise_separable(self, + input, + num_filters1, + num_filters2, + num_groups, + stride, + scale, + kernel_size, + name=None): + depthwise_conv = conv_bn_layer( + input=input, + filter_size=kernel_size, + num_filters=int(num_filters1 * scale), + stride=stride, + num_groups=int(num_groups * scale), + use_cudnn=False, + name=name + '_dw') + pointwise_conv = conv_bn_layer( + input=depthwise_conv, + filter_size=1, + num_filters=int(num_filters2 * scale), + stride=1, + name=name + '_sep') + + return pointwise_conv diff --git a/paddleslim/nas/search_space/mobilenetv2.py b/paddleslim/nas/search_space/mobilenetv2.py index 28d8a7ea03bc94618b9b5575f837f09879d309c8..e974a676a70546e19aa4649679393031634e7822 100644 --- a/paddleslim/nas/search_space/mobilenetv2.py +++ b/paddleslim/nas/search_space/mobilenetv2.py @@ -32,11 +32,15 @@ class MobileNetV2Space(SearchSpaceBase): input_size, output_size, block_num, + block_mask=None, scale=1.0, class_dim=1000): super(MobileNetV2Space, self).__init__(input_size, output_size, - block_num) + block_num, block_mask) + assert self.block_mask == None, 'MobileNetV2Space will use origin MobileNetV2 as seach space, so use input_size, output_size and block_num to search' + # self.head_num means the first convolution channel self.head_num = np.array([3, 4, 8, 12, 16, 24, 32]) #7 + # self.filter_num1 ~ self.filter_num6 means following convlution channel self.filter_num1 = np.array([3, 4, 8, 12, 16, 24, 32, 48]) #8 self.filter_num2 = np.array([8, 12, 16, 24, 32, 48, 64, 80]) #8 self.filter_num3 = np.array([16, 24, 32, 48, 64, 80, 96, 128]) #8 @@ -46,15 +50,21 @@ class MobileNetV2Space(SearchSpaceBase): [32, 48, 64, 80, 96, 128, 144, 160, 192, 224]) #10 self.filter_num6 = np.array( [64, 80, 96, 128, 144, 160, 192, 224, 256, 320, 384, 512]) #12 + # self.k_size means kernel size self.k_size = np.array([3, 5]) #2 + # self.multiply means expansion_factor of each _inverted_residual_unit self.multiply = np.array([1, 2, 3, 4, 6]) #5 + # self.repeat means repeat_num _inverted_residual_unit in each _invresi_blocks self.repeat = np.array([1, 2, 3, 4, 5, 6]) #6 self.scale = scale self.class_dim = class_dim + assert self.block_num < 7, 'MobileNetV2: block number must less than 7, but receive block number is {}'.format( + self.block_num) + def init_tokens(self): """ - The initial token send to controller. + The initial token. The first one is the index of the first layers' channel in self.head_num, each line in the following represent the index of the [expansion_factor, filter_num, repeat_num, kernel_size] """ @@ -80,18 +90,18 @@ class MobileNetV2Space(SearchSpaceBase): def range_table(self): """ - get range table of current search space + Get range table of current search space, constrains the range of tokens. """ # head_num + 7 * [multiple(expansion_factor), filter_num, repeat, kernel_size] # yapf: disable - range_table_base = [7, - 5, 8, 6, 2, - 5, 8, 6, 2, - 5, 8, 6, 2, - 5, 8, 6, 2, - 5, 10, 6, 2, - 5, 10, 6, 2, - 5, 12, 6, 2] + range_table_base = [len(self.head_num), + len(self.multiply), len(self.filter_num1), len(self.repeat), len(self.k_size), + len(self.multiply), len(self.filter_num1), len(self.repeat), len(self.k_size), + len(self.multiply), len(self.filter_num2), len(self.repeat), len(self.k_size), + len(self.multiply), len(self.filter_num3), len(self.repeat), len(self.k_size), + len(self.multiply), len(self.filter_num4), len(self.repeat), len(self.k_size), + len(self.multiply), len(self.filter_num5), len(self.repeat), len(self.k_size), + len(self.multiply), len(self.filter_num6), len(self.repeat), len(self.k_size)] range_table_base = list(np.array(range_table_base) - 1) # yapf: enable return range_table_base[:self.token_len] @@ -101,11 +111,9 @@ class MobileNetV2Space(SearchSpaceBase): return net_arch function """ - assert self.block_num < 7, 'block number must less than 7, but receive block number is {}'.format( - self.block_num) - if tokens is None: tokens = self.init_tokens() + print(tokens) bottleneck_params_list = [] if self.block_num >= 1: @@ -128,7 +136,7 @@ class MobileNetV2Space(SearchSpaceBase): (self.multiply[tokens[13]], self.filter_num3[tokens[14]], self.repeat[tokens[15]], 2, self.k_size[tokens[16]])) bottleneck_params_list.append( - (self.multiply[tokens[17]], self.filter_num3[tokens[18]], + (self.multiply[tokens[17]], self.filter_num4[tokens[18]], self.repeat[tokens[19]], 1, self.k_size[tokens[20]])) if self.block_num >= 6: bottleneck_params_list.append( diff --git a/paddleslim/nas/search_space/resnet.py b/paddleslim/nas/search_space/resnet.py index 7ed404e5e145c9f173aee95823c8d6ac6a47dfdb..fd761d417575988e8ba8bd99da25372613c5912f 100644 --- a/paddleslim/nas/search_space/resnet.py +++ b/paddleslim/nas/search_space/resnet.py @@ -32,31 +32,144 @@ class ResNetSpace(SearchSpaceBase): input_size, output_size, block_num, - scale=1.0, + block_mask=None, + extract_feature=False, class_dim=1000): - super(ResNetSpace, self).__init__(input_size, output_size, block_num) - pass + super(ResNetSpace, self).__init__(input_size, output_size, block_num, + block_mask) + assert self.block_mask == None, 'ResNetSpace will use origin ResNet as seach space, so use input_size, output_size and block_num to search' + # self.filter_num1 ~ self.filter_num4 means convolution channel + self.filter_num1 = np.array([48, 64, 96, 128, 160, 192, 224]) #7 + self.filter_num2 = np.array([64, 96, 128, 160, 192, 256, 320]) #7 + self.filter_num3 = np.array([128, 160, 192, 256, 320, 384]) #6 + self.filter_num4 = np.array([192, 256, 384, 512, 640]) #5 + # self.repeat1 ~ self.repeat4 means depth of network + self.repeat1 = [2, 3, 4, 5, 6] #5 + self.repeat2 = [2, 3, 4, 5, 6, 7] #6 + self.repeat3 = [2, 3, 4, 5, 6, 7, 8, 10, 12, 14, 16, 20, 24] #13 + self.repeat4 = [2, 3, 4, 5, 6, 7] #6 + self.class_dim = class_dim + self.extract_feature = extract_feature + assert self.block_num < 5, 'ResNet: block number must less than 5, but receive block number is {}'.format( + self.block_num) def init_tokens(self): - return [0, 0, 0, 0, 0, 0] + """ + The initial token. + return 2 * self.block_num, 2 means depth and num_filter + """ + init_token_base = [0, 0, 0, 0, 0, 0, 0, 0] + self.token_len = self.block_num * 2 + return init_token_base[:self.token_len] def range_table(self): - return [2, 2, 2, 2, 2, 2] + """ + Get range table of current search space, constrains the range of tokens. + """ + #2 * self.block_num, 2 means depth and num_filter + range_table_base = [ + len(self.filter_num1), len(self.repeat1), len(self.filter_num2), + len(self.repeat2), len(self.filter_num3), len(self.repeat3), + len(self.filter_num4), len(self.repeat4) + ] + return range_table_base[:self.token_len] def token2arch(self, tokens=None): + """ + return net_arch function + """ if tokens is None: - self.init_tokens() + tokens = self.init_tokens() + + depth = [] + num_filters = [] + if self.block_num >= 1: + filter1 = self.filter_num1[tokens[0]] + repeat1 = self.repeat1[tokens[1]] + num_filters.append(filter1) + depth.append(repeat1) + if self.block_num >= 2: + filter2 = self.filter_num2[tokens[2]] + repeat2 = self.repeat2[tokens[3]] + num_filters.append(filter2) + depth.append(repeat2) + if self.block_num >= 3: + filter3 = self.filter_num3[tokens[4]] + repeat3 = self.repeat3[tokens[5]] + num_filters.append(filter3) + depth.append(repeat3) + if self.block_num >= 4: + filter4 = self.filter_num4[tokens[6]] + repeat4 = self.repeat4[tokens[7]] + num_filters.append(filter4) + depth.append(repeat4) def net_arch(input): - input = conv_bn_layer( - input, - num_filters=32, - filter_size=3, + conv = conv_bn_layer( + input=input, + filter_size=5, + num_filters=filter1, stride=2, - padding='SAME', - act='sigmoid', - name='resnet_conv1_1') + act='relu', + name='resnet_conv0') + for block in range(len(depth)): + for i in range(depth[block]): + conv = self._bottleneck_block( + input=conv, + num_filters=num_filters[block], + stride=2 if i == 0 and block != 0 else 1, + name='resnet_depth{}_block{}'.format(i, block)) - return input + if self.output_size == 1: + conv = fluid.layers.fc( + input=conv, + size=self.class_dim, + act=None, + param_attr=fluid.param_attr.ParamAttr( + initializer=fluid.initializer.NormalInitializer(0.0, + 0.01)), + bias_attr=fluid.param_attr.ParamAttr( + initializer=fluid.initializer.ConstantInitializer(0))) + + return conv return net_arch + + def _shortcut(self, input, ch_out, stride, name=None): + ch_in = input.shape[1] + if ch_in != ch_out or stride != 1: + return conv_bn_layer( + input=input, + filter_size=1, + num_filters=ch_out, + stride=stride, + name=name + '_conv') + else: + return input + + def _bottleneck_block(self, input, num_filters, stride, name=None): + conv0 = conv_bn_layer( + input=input, + num_filters=num_filters, + filter_size=1, + act='relu', + name=name + '_bottleneck_conv0') + conv1 = conv_bn_layer( + input=conv0, + num_filters=num_filters, + filter_size=3, + stride=stride, + act='relu', + name=name + '_bottleneck_conv1') + conv2 = conv_bn_layer( + input=conv1, + num_filters=num_filters * 4, + filter_size=1, + act=None, + name=name + '_bottleneck_conv2') + + short = self._shortcut( + input, num_filters * 4, stride, name=name + '_shortcut') + + return fluid.layers.elementwise_add( + x=short, y=conv2, act='relu', name=name + '_bottleneck_add') diff --git a/paddleslim/nas/search_space/search_space_base.py b/paddleslim/nas/search_space/search_space_base.py index bb1ce0f8a4bbd0b18d36fa9199a6ff814ab13236..6a83f86005a5fb2408f7f85f40dff8a9e5cba819 100644 --- a/paddleslim/nas/search_space/search_space_base.py +++ b/paddleslim/nas/search_space/search_space_base.py @@ -19,10 +19,11 @@ class SearchSpaceBase(object): """Controller for Neural Architecture Search. """ - def __init__(self, input_size, output_size, block_num, *argss): + def __init__(self, input_size, output_size, block_num, block_mask, *argss): self.input_size = input_size self.output_size = output_size self.block_num = block_num + self.block_mask = block_mask def init_tokens(self): """Get init tokens in search space. diff --git a/paddleslim/prune/auto_pruner.py b/paddleslim/prune/auto_pruner.py index 5dbdb6d4aa064fc6d5534f0ea02fefe19e580899..fba8c11170f3fbf2eddbe15942dc642ad448658b 100644 --- a/paddleslim/prune/auto_pruner.py +++ b/paddleslim/prune/auto_pruner.py @@ -96,8 +96,10 @@ class AutoPruner(object): self._pruner = Pruner() if self._pruned_flops: self._base_flops = flops(program) - _logger.info("AutoPruner - base flops: {};".format( - self._base_flops)) + self._max_flops = self._base_flops * (1 - self._pruned_flops) + _logger.info( + "AutoPruner - base flops: {}; pruned_flops: {}; max_flops: {}". + format(self._base_flops, self._pruned_flops, self._max_flops)) if self._pruned_latency: self._base_latency = latency(program) @@ -106,7 +108,7 @@ class AutoPruner(object): self, _program, self._params, self._pruned_flops, self._pruned_latency) init_tokens = self._ratios2tokens(self._init_ratios) - + _logger.info("range table: {}".format(self._range_table)) controller = SAController(self._range_table, self._reduce_rate, self._init_temperature, self._max_try_number, init_tokens, self._constrain_func) @@ -143,10 +145,10 @@ class AutoPruner(object): def _get_range_table(self, min_ratios, max_ratios): assert isinstance(min_ratios, list) or isinstance(min_ratios, float) assert isinstance(max_ratios, list) or isinstance(max_ratios, float) - min_ratios = min_ratios if isinstance(min_ratios, - list) else [min_ratios] - max_ratios = max_ratios if isinstance(max_ratios, - list) else [max_ratios] + min_ratios = min_ratios if isinstance( + min_ratios, list) else [min_ratios] * len(self._params) + max_ratios = max_ratios if isinstance( + max_ratios, list) else [max_ratios] * len(self._params) min_tokens = self._ratios2tokens(min_ratios) max_tokens = self._ratios2tokens(max_ratios) return (min_tokens, max_tokens) @@ -160,10 +162,17 @@ class AutoPruner(object): ratios, place=self._place, only_graph=True) - return flops(pruned_program) < self._base_flops * ( - 1 - self._pruned_flops) - - def prune(self, program): + current_flops = flops(pruned_program) + result = current_flops < self._max_flops + if not result: + _logger.info("Failed try ratios: {}; flops: {}; max_flops: {}". + format(ratios, current_flops, self._max_flops)) + else: + _logger.info("Success try ratios: {}; flops: {}; max_flops: {}". + format(ratios, current_flops, self._max_flops)) + return result + + def prune(self, program, eval_program=None): """ Prune program with latest tokens generated by controller. Args: @@ -178,10 +187,21 @@ class AutoPruner(object): self._params, self._current_ratios, place=self._place, + only_graph=False, param_backup=self._param_backup) + pruned_val_program = None + if eval_program is not None: + pruned_val_program = self._pruner.prune( + program, + self._scope, + self._params, + self._current_ratios, + place=self._place, + only_graph=True) + _logger.info("AutoPruner - pruned ratios: {}".format( self._current_ratios)) - return pruned_program + return pruned_program, pruned_val_program def reward(self, score): """ diff --git a/paddleslim/prune/pruner.py b/paddleslim/prune/pruner.py index cd79f5b286bbb34d1d688ce515691fdfc7e8f730..0fdde525a793b90df63f3245ac5215365dd7ccf4 100644 --- a/paddleslim/prune/pruner.py +++ b/paddleslim/prune/pruner.py @@ -12,13 +12,17 @@ # See the License for the specific language governing permissions and # limitations under the License. +import logging import numpy as np import paddle.fluid as fluid import copy from ..core import VarWrapper, OpWrapper, GraphWrapper +from ..common import get_logger __all__ = ["Pruner"] +_logger = get_logger(__name__, level=logging.INFO) + class Pruner(): def __init__(self, criterion="l1_norm"): @@ -69,6 +73,10 @@ class Pruner(): only_graph=only_graph, param_backup=param_backup, param_shape_backup=param_shape_backup) + for op in graph.ops(): + if op.type() == 'depthwise_conv2d' or op.type( + ) == 'depthwise_conv2d_grad': + op.set_attr('groups', op.inputs('Filter')[0].shape()[0]) return graph.program def _prune_filters_by_ratio(self, @@ -94,27 +102,49 @@ class Pruner(): """ if params[0].name() in self.pruned_list[0]: return - param_t = scope.find_var(params[0].name()).get_tensor() - pruned_idx = self._cal_pruned_idx( - params[0].name(), np.array(param_t), ratio, axis=0) - for param in params: - assert isinstance(param, VarWrapper) - param_t = scope.find_var(param.name()).get_tensor() - if param_backup is not None and (param.name() not in param_backup): - param_backup[param.name()] = copy.deepcopy(np.array(param_t)) - pruned_param = self._prune_tensor( - np.array(param_t), pruned_idx, pruned_axis=0, lazy=lazy) - if not only_graph: + + if only_graph: + pruned_num = int(round(params[0].shape()[0] * ratio)) + for param in params: + ori_shape = param.shape() + if param_backup is not None and ( + param.name() not in param_backup): + param_backup[param.name()] = copy.deepcopy(ori_shape) + new_shape = list(ori_shape) + new_shape[0] -= pruned_num + param.set_shape(new_shape) + _logger.debug("prune [{}] from {} to {}".format(param.name( + ), ori_shape, new_shape)) + self.pruned_list[0].append(param.name()) + return range(pruned_num) + + else: + + param_t = scope.find_var(params[0].name()).get_tensor() + pruned_idx = self._cal_pruned_idx( + params[0].name(), np.array(param_t), ratio, axis=0) + for param in params: + assert isinstance(param, VarWrapper) + param_t = scope.find_var(param.name()).get_tensor() + if param_backup is not None and ( + param.name() not in param_backup): + param_backup[param.name()] = copy.deepcopy( + np.array(param_t)) + pruned_param = self._prune_tensor( + np.array(param_t), pruned_idx, pruned_axis=0, lazy=lazy) param_t.set(pruned_param, place) - ori_shape = param.shape() - if param_shape_backup is not None and ( - param.name() not in param_shape_backup): - param_shape_backup[param.name()] = copy.deepcopy(param.shape()) - new_shape = list(param.shape()) - new_shape[0] = pruned_param.shape[0] - param.set_shape(new_shape) - self.pruned_list[0].append(param.name()) - return pruned_idx + ori_shape = param.shape() + if param_shape_backup is not None and ( + param.name() not in param_shape_backup): + param_shape_backup[param.name()] = copy.deepcopy( + param.shape()) + new_shape = list(param.shape()) + new_shape[0] = pruned_param.shape[0] + param.set_shape(new_shape) + _logger.debug("prune [{}] from {} to {}".format(param.name( + ), ori_shape, new_shape)) + self.pruned_list[0].append(param.name()) + return pruned_idx def _prune_parameter_by_idx(self, scope, @@ -141,24 +171,44 @@ class Pruner(): """ if params[0].name() in self.pruned_list[pruned_axis]: return - for param in params: - assert isinstance(param, VarWrapper) - param_t = scope.find_var(param.name()).get_tensor() - if param_backup is not None and (param.name() not in param_backup): - param_backup[param.name()] = copy.deepcopy(np.array(param_t)) - pruned_param = self._prune_tensor( - np.array(param_t), pruned_idx, pruned_axis, lazy=lazy) - if not only_graph: + + if only_graph: + pruned_num = len(pruned_idx) + for param in params: + ori_shape = param.shape() + if param_backup is not None and ( + param.name() not in param_backup): + param_backup[param.name()] = copy.deepcopy(ori_shape) + new_shape = list(ori_shape) + new_shape[pruned_axis] -= pruned_num + param.set_shape(new_shape) + _logger.debug("prune [{}] from {} to {}".format(param.name( + ), ori_shape, new_shape)) + self.pruned_list[pruned_axis].append(param.name()) + + else: + for param in params: + assert isinstance(param, VarWrapper) + param_t = scope.find_var(param.name()).get_tensor() + if param_backup is not None and ( + param.name() not in param_backup): + param_backup[param.name()] = copy.deepcopy( + np.array(param_t)) + pruned_param = self._prune_tensor( + np.array(param_t), pruned_idx, pruned_axis, lazy=lazy) param_t.set(pruned_param, place) - ori_shape = param.shape() + ori_shape = param.shape() - if param_shape_backup is not None and ( - param.name() not in param_shape_backup): - param_shape_backup[param.name()] = copy.deepcopy(param.shape()) - new_shape = list(param.shape()) - new_shape[pruned_axis] = pruned_param.shape[pruned_axis] - param.set_shape(new_shape) - self.pruned_list[pruned_axis].append(param.name()) + if param_shape_backup is not None and ( + param.name() not in param_shape_backup): + param_shape_backup[param.name()] = copy.deepcopy( + param.shape()) + new_shape = list(param.shape()) + new_shape[pruned_axis] = pruned_param.shape[pruned_axis] + param.set_shape(new_shape) + _logger.debug("prune [{}] from {} to {}".format(param.name( + ), ori_shape, new_shape)) + self.pruned_list[pruned_axis].append(param.name()) def _forward_search_related_op(self, graph, param): """ @@ -488,14 +538,16 @@ class Pruner(): visited.append(op.idx()) while len(stack) > 0: top_op = stack.pop() - for parent in graph.pre_ops(top_op): - if parent.idx() not in visited and (not parent.is_bwd_op()): - if ((parent.type() == 'conv2d') or - (parent.type() == 'fc')): - brothers.append(parent) - else: - stack.append(parent) - visited.append(parent.idx()) + if top_op.type().startswith("elementwise_"): + for parent in graph.pre_ops(top_op): + if parent.idx() not in visited and ( + not parent.is_bwd_op()): + if ((parent.type() == 'conv2d') or + (parent.type() == 'fc')): + brothers.append(parent) + else: + stack.append(parent) + visited.append(parent.idx()) for child in graph.next_ops(top_op): if (child.type() != 'conv2d') and (child.type() != 'fc') and ( diff --git a/paddleslim/quant/quanter.py b/paddleslim/quant/quanter.py old mode 100644 new mode 100755 index 0db22772d712951ed895f2d2e897142d6ce3c377..8ea9fbe32ee3f8617d9f00a1ce097b715957163e --- a/paddleslim/quant/quanter.py +++ b/paddleslim/quant/quanter.py @@ -20,11 +20,19 @@ from paddle.fluid.contrib.slim.quantization import QuantizationTransformPass from paddle.fluid.contrib.slim.quantization import QuantizationFreezePass from paddle.fluid.contrib.slim.quantization import ConvertToInt8Pass from paddle.fluid.contrib.slim.quantization import TransformForMobilePass +from paddle.fluid.contrib.slim.quantization import AddQuantDequantPass from paddle.fluid import core -WEIGHT_QUANTIZATION_TYPES=['abs_max', 'channel_wise_abs_max', 'range_abs_max', 'moving_average_abs_max'] -ACTIVATION_QUANTIZATION_TYPES=['abs_max','range_abs_max', 'moving_average_abs_max'] +WEIGHT_QUANTIZATION_TYPES = [ + 'abs_max', 'channel_wise_abs_max', 'range_abs_max', + 'moving_average_abs_max' +] +ACTIVATION_QUANTIZATION_TYPES = [ + 'abs_max', 'range_abs_max', 'moving_average_abs_max' +] VALID_DTYPES = ['int8'] +TRANSFORM_PASS_OP_TYPES = ['conv2d', 'depthwise_conv2d', 'mul'] +QUANT_DEQUANT_PASS_OP_TYPES = ['elementwise_add', 'pool2d'] _quant_config_default = { # weight quantize type, default is 'abs_max' @@ -38,7 +46,8 @@ _quant_config_default = { # ops of name_scope in not_quant_pattern list, will not be quantized 'not_quant_pattern': ['skip_quant'], # ops of type in quantize_op_types, will be quantized - 'quantize_op_types': ['conv2d', 'depthwise_conv2d', 'mul'], + 'quantize_op_types': + ['conv2d', 'depthwise_conv2d', 'mul', 'elementwise_add', 'pool2d'], # data type after quantization, such as 'uint8', 'int8', etc. default is 'int8' 'dtype': 'int8', # window size for 'range_abs_max' quantization. defaulf is 10000 @@ -88,6 +97,12 @@ def _parse_configs(user_config): assert isinstance(configs['quantize_op_types'], list), \ "quantize_op_types must be a list" + for op_type in configs['quantize_op_types']: + assert (op_type in QUANT_DEQUANT_PASS_OP_TYPES) or ( + op_type in TRANSFORM_PASS_OP_TYPES), "{} is not support, \ + now support op types are {}".format( + op_type, TRANSFORM_PASS_OP_TYPES + QUANT_DEQUANT_PASS_OP_TYPES) + assert isinstance(configs['dtype'], str), \ "dtype must be a str." @@ -132,19 +147,37 @@ def quant_aware(program, place, config, scope=None, for_test=False): config = _parse_configs(config) main_graph = IrGraph(core.Graph(program.desc), for_test=for_test) - transform_pass = QuantizationTransformPass( - scope=scope, - place=place, - weight_bits=config['weight_bits'], - activation_bits=config['activation_bits'], - activation_quantize_type=config['activation_quantize_type'], - weight_quantize_type=config['weight_quantize_type'], - window_size=config['window_size'], - moving_rate=config['moving_rate'], - quantizable_op_type=config['quantize_op_types'], - skip_pattern=config['not_quant_pattern']) - - transform_pass.apply(main_graph) + transform_pass_ops = [] + quant_dequant_ops = [] + for op_type in config['quantize_op_types']: + if op_type in TRANSFORM_PASS_OP_TYPES: + transform_pass_ops.append(op_type) + elif op_type in QUANT_DEQUANT_PASS_OP_TYPES: + quant_dequant_ops.append(op_type) + if len(transform_pass_ops) > 0: + transform_pass = QuantizationTransformPass( + scope=scope, + place=place, + weight_bits=config['weight_bits'], + activation_bits=config['activation_bits'], + activation_quantize_type=config['activation_quantize_type'], + weight_quantize_type=config['weight_quantize_type'], + window_size=config['window_size'], + moving_rate=config['moving_rate'], + quantizable_op_type=transform_pass_ops, + skip_pattern=config['not_quant_pattern']) + + transform_pass.apply(main_graph) + + if len(quant_dequant_ops) > 0: + quant_dequant_pass = AddQuantDequantPass( + scope=scope, + place=place, + moving_rate=config['moving_rate'], + quant_bits=config['activation_bits'], + skip_pattern=config['not_quant_pattern'], + quantizable_op_type=quant_dequant_ops) + quant_dequant_pass.apply(main_graph) if for_test: quant_program = main_graph.to_program() @@ -168,7 +201,7 @@ def quant_post(program, place, config, scope=None): pass -def convert(program, scope, place, config, save_int8=False): +def convert(program, place, config, scope=None, save_int8=False): """ add quantization ops in program. the program returned is not trainable. Args: @@ -183,7 +216,7 @@ def convert(program, scope, place, config, save_int8=False): fluid.Program: freezed int8 program which can be used for inference. if save_int8 is False, this value is None. """ - + scope = fluid.global_scope() if not scope else scope test_graph = IrGraph(core.Graph(program.desc), for_test=True) # Freeze the graph after training by adjusting the quantize diff --git a/paddleslim/search/__init__.py b/paddleslim/search/__init__.py deleted file mode 100644 index 4f3182c3058cb33e46777ab1424242b42406a603..0000000000000000000000000000000000000000 --- a/paddleslim/search/__init__.py +++ /dev/null @@ -1,14 +0,0 @@ -# Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License" -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -"""Controllers and controller server""" diff --git a/tests/test_sensitivity.py b/tests/test_sensitivity.py new file mode 100644 index 0000000000000000000000000000000000000000..e2cfa01d889db2891fd7507b2d4d9aec018a1163 --- /dev/null +++ b/tests/test_sensitivity.py @@ -0,0 +1,69 @@ +# Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License" +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +import sys +sys.path.append("../") +import unittest +import numpy +import paddle +import paddle.fluid as fluid +from paddleslim.analysis import sensitivity +from layers import conv_bn_layer + + +class TestSensitivity(unittest.TestCase): + def test_sensitivity(self): + main_program = fluid.Program() + startup_program = fluid.Program() + with fluid.program_guard(main_program, startup_program): + input = fluid.data(name="image", shape=[None, 1, 28, 28]) + label = fluid.data(name="label", shape=[None, 1], dtype="int64") + conv1 = conv_bn_layer(input, 8, 3, "conv1") + conv2 = conv_bn_layer(conv1, 8, 3, "conv2") + sum1 = conv1 + conv2 + conv3 = conv_bn_layer(sum1, 8, 3, "conv3") + conv4 = conv_bn_layer(conv3, 8, 3, "conv4") + sum2 = conv4 + sum1 + conv5 = conv_bn_layer(sum2, 8, 3, "conv5") + conv6 = conv_bn_layer(conv5, 8, 3, "conv6") + out = fluid.layers.fc(conv6, size=10, act='softmax') + acc_top1 = fluid.layers.accuracy(input=out, label=label, k=1) + eval_program = main_program.clone(for_test=True) + + place = fluid.CUDAPlace(0) + exe = fluid.Executor(place) + exe.run(startup_program) + + val_reader = paddle.batch(paddle.dataset.mnist.test(), batch_size=128) + + def eval_func(program, scope): + feeder = fluid.DataFeeder( + feed_list=['image', 'label'], place=place, program=program) + acc_set = [] + for data in val_reader(): + acc_np = exe.run(program=program, + scope=scope, + feed=feeder.feed(data), + fetch_list=[acc_top1]) + acc_set.append(float(acc_np[0])) + acc_val_mean = numpy.array(acc_set).mean() + print("acc_val_mean: {}".format(acc_val_mean)) + return acc_val_mean + + sensitivity(eval_program, + fluid.global_scope(), place, ["conv4_weights"], eval_func, + "./sensitivities_file") + + +if __name__ == '__main__': + unittest.main()