diff --git a/demo/nas/block_sa_nas_mobilenetv2.py b/demo/nas/block_sa_nas_mobilenetv2.py new file mode 100644 index 0000000000000000000000000000000000000000..27fb1f4cf9076dfdf6500fc79d4df5697042cb09 --- /dev/null +++ b/demo/nas/block_sa_nas_mobilenetv2.py @@ -0,0 +1,263 @@ +import sys +sys.path.append('..') +import numpy as np +import argparse +import ast +import logging +import time +import paddle +import paddle.fluid as fluid +from paddle.fluid.param_attr import ParamAttr +from paddleslim.analysis import flops +from paddleslim.nas import SANAS +from paddleslim.common import get_logger +from optimizer import create_optimizer +import imagenet_reader + +_logger = get_logger(__name__, level=logging.INFO) + +reduce_rate = 0.85 +init_temperature = 10.24 +max_flops = 321208544 +server_address = "" +port = 8979 +retain_epoch = 5 + + +def create_data_loader(image_shape): + data_shape = [None] + image_shape + data = fluid.data(name='data', shape=data_shape, dtype='float32') + label = fluid.data(name='label', shape=[None, 1], dtype='int64') + data_loader = fluid.io.DataLoader.from_generator( + feed_list=[data, label], + capacity=1024, + use_double_buffer=True, + iterable=True) + return data_loader, data, label + + +def conv_bn_layer(input, + filter_size, + num_filters, + stride, + padding='SAME', + num_groups=1, + act=None, + name=None, + use_cudnn=True): + conv = fluid.layers.conv2d( + input, + num_filters=num_filters, + filter_size=filter_size, + stride=stride, + padding=padding, + groups=num_groups, + act=None, + use_cudnn=use_cudnn, + param_attr=ParamAttr(name=name + '_weights'), + bias_attr=False) + bn_name = name + '_bn' + return fluid.layers.batch_norm( + input=conv, + act=act, + param_attr=ParamAttr(name=bn_name + '_scale'), + bias_attr=ParamAttr(name=bn_name + '_offset'), + moving_mean_name=bn_name + '_mean', + moving_variance_name=bn_name + '_variance') + + +def search_mobilenetv2_block(config, args, image_size): + image_shape = [3, image_size, image_size] + if args.is_server: + sa_nas = SANAS( + config, + server_addr=("", port), + init_temperature=init_temperature, + reduce_rate=reduce_rate, + search_steps=args.search_steps, + is_server=True) + else: + sa_nas = SANAS( + config, + server_addr=(server_address, port), + init_temperature=init_temperature, + reduce_rate=reduce_rate, + search_steps=args.search_steps, + is_server=False) + + for step in range(args.search_steps): + archs = sa_nas.next_archs()[0] + + train_program = fluid.Program() + test_program = fluid.Program() + startup_program = fluid.Program() + with fluid.program_guard(train_program, startup_program): + train_loader, data, label = create_data_loader(image_shape) + data = conv_bn_layer( + input=data, + num_filters=32, + filter_size=3, + stride=2, + padding='SAME', + act='relu6', + name='mobilenetv2_conv1') + data = archs(data)[0] + data = conv_bn_layer( + input=data, + num_filters=1280, + filter_size=1, + stride=1, + padding='SAME', + act='relu6', + name='mobilenetv2_last_conv') + data = fluid.layers.pool2d( + input=data, + pool_size=7, + pool_stride=1, + pool_type='avg', + global_pooling=True, + name='mobilenetv2_last_pool') + output = fluid.layers.fc( + input=data, + size=args.class_dim, + param_attr=ParamAttr(name='mobilenetv2_fc_weights'), + bias_attr=ParamAttr(name='mobilenetv2_fc_offset')) + + softmax_out = fluid.layers.softmax(input=output, use_cudnn=False) + cost = fluid.layers.cross_entropy(input=softmax_out, label=label) + avg_cost = fluid.layers.mean(cost) + acc_top1 = fluid.layers.accuracy( + input=softmax_out, label=label, k=1) + acc_top5 = fluid.layers.accuracy( + input=softmax_out, label=label, k=5) + test_program = train_program.clone(for_test=True) + + optimizer = fluid.optimizer.Momentum( + learning_rate=0.1, + momentum=0.9, + regularization=fluid.regularizer.L2Decay(1e-4)) + optimizer.minimize(avg_cost) + + current_flops = flops(train_program) + print('step: {}, current_flops: {}'.format(step, current_flops)) + if current_flops > max_flops: + continue + + place = fluid.CUDAPlace(0) if args.use_gpu else fluid.CPUPlace() + exe = fluid.Executor(place) + exe.run(startup_program) + + if args.data == 'cifar10': + train_reader = paddle.batch( + paddle.reader.shuffle( + paddle.dataset.cifar.train10(cycle=False), buf_size=1024), + batch_size=args.batch_size, + drop_last=True) + + test_reader = paddle.batch( + paddle.dataset.cifar.test10(cycle=False), + batch_size=args.batch_size, + drop_last=False) + elif args.data == 'imagenet': + train_reader = paddle.batch( + imagenet_reader.train(), + batch_size=args.batch_size, + drop_last=True) + test_reader = paddle.batch( + imagenet_reader.val(), + batch_size=args.batch_size, + drop_last=False) + + test_loader, _, _ = create_data_loader(image_shape) + train_loader.set_sample_list_generator( + train_reader, + places=fluid.cuda_places() if args.use_gpu else fluid.cpu_places()) + test_loader.set_sample_list_generator(test_reader, places=place) + + build_strategy = fluid.BuildStrategy() + train_compiled_program = fluid.CompiledProgram( + train_program).with_data_parallel( + loss_name=avg_cost.name, build_strategy=build_strategy) + for epoch_id in range(retain_epoch): + for batch_id, data in enumerate(train_loader()): + fetches = [avg_cost.name] + s_time = time.time() + outs = exe.run(train_compiled_program, + feed=data, + fetch_list=fetches)[0] + batch_time = time.time() - s_time + if batch_id % 10 == 0: + _logger.info( + 'TRAIN: steps: {}, epoch: {}, batch: {}, cost: {}, batch_time: {}ms'. + format(step, epoch_id, batch_id, outs[0], batch_time)) + + reward = [] + for batch_id, data in enumerate(test_loader()): + test_fetches = [avg_cost.name, acc_top1.name, acc_top5.name] + batch_reward = exe.run(test_program, + feed=data, + fetch_list=test_fetches) + reward_avg = np.mean(np.array(batch_reward), axis=1) + reward.append(reward_avg) + + _logger.info( + 'TEST: step: {}, batch: {}, avg_cost: {}, acc_top1: {}, acc_top5: {}'. + format(step, batch_id, batch_reward[0], batch_reward[1], + batch_reward[2])) + + finally_reward = np.mean(np.array(reward), axis=0) + _logger.info( + 'FINAL TEST: avg_cost: {}, acc_top1: {}, acc_top5: {}'.format( + finally_reward[0], finally_reward[1], finally_reward[2])) + + sa_nas.reward(float(finally_reward[1])) + + +if __name__ == '__main__': + + parser = argparse.ArgumentParser( + description='SA NAS MobileNetV2 cifar10 argparase') + parser.add_argument( + '--use_gpu', + type=ast.literal_eval, + default=True, + help='Whether to use GPU in train/test model.') + parser.add_argument( + '--class_dim', type=int, default=1000, help='classify number.') + parser.add_argument( + '--batch_size', type=int, default=256, help='batch size.') + parser.add_argument( + '--data', + type=str, + default='cifar10', + choices=['cifar10', 'imagenet'], + help='dataset name.') + parser.add_argument( + '--is_server', + type=ast.literal_eval, + default=True, + help='Whether to start a server.') + # nas args + parser.add_argument( + '--search_steps', + type=int, + default=100, + help='controller server number.') + parser.add_argument('--lr', type=float, default=0.1, help='learning rate.') + args = parser.parse_args() + print(args) + + if args.data == 'cifar10': + image_size = 32 + elif args.data == 'imagenet': + image_size = 224 + else: + raise NotImplementedError( + 'data must in [cifar10, imagenet], but received: {}'.format( + args.data)) + + # block mask means block number, 1 mean downsample, 0 means the size of feature map don't change after this block + config_info = {'block_mask': [0, 1, 1, 1, 1, 0, 1, 0]} + config = [('MobileNetV2BlockSpace', config_info)] + + search_mobilenetv2_block(config, args, image_size) diff --git a/demo/nas/sa_nas_mobilenetv2.py b/demo/nas/sa_nas_mobilenetv2.py index 142c2c08f09e7888ab255b1d6ce762a50c8e1966..e6abe115d566f0779cbd8806f702a18b832233f5 100644 --- a/demo/nas/sa_nas_mobilenetv2.py +++ b/demo/nas/sa_nas_mobilenetv2.py @@ -9,7 +9,7 @@ import ast import logging import paddle import paddle.fluid as fluid -from paddleslim.nas.search_space.search_space_factory import SearchSpaceFactory +from paddle.fluid.param_attr import ParamAttr from paddleslim.analysis import flops from paddleslim.nas import SANAS from paddleslim.common import get_logger @@ -18,11 +18,18 @@ import imagenet_reader _logger = get_logger(__name__, level=logging.INFO) +reduce_rate = 0.85 +init_temperature = 10.24 +max_flops = 321208544 +server_address = "" +port = 8989 +retain_epoch = 5 + def create_data_loader(image_shape): - data_shape = [-1] + image_shape + data_shape = [None] + image_shape data = fluid.data(name='data', shape=data_shape, dtype='float32') - label = fluid.data(name='label', shape=[-1, 1], dtype='int64') + label = fluid.data(name='label', shape=[None, 1], dtype='int64') data_loader = fluid.io.DataLoader.from_generator( feed_list=[data, label], capacity=1024, @@ -40,6 +47,7 @@ def build_program(main_program, with fluid.program_guard(main_program, startup_program): data_loader, data, label = create_data_loader(image_shape) output = archs(data) + output = fluid.layers.fc(input=output, size=args.class_dim) softmax_out = fluid.layers.softmax(input=output, use_cudnn=False) cost = fluid.layers.cross_entropy(input=softmax_out, label=label) @@ -54,24 +62,22 @@ def build_program(main_program, def search_mobilenetv2(config, args, image_size, is_server=True): - factory = SearchSpaceFactory() - space = factory.get_search_space(config) if is_server: ### start a server and a client sa_nas = SANAS( config, - server_addr=("", 8883), - init_temperature=args.init_temperature, - reduce_rate=args.reduce_rate, + server_addr=("", port), + init_temperature=init_temperature, + reduce_rate=reduce_rate, search_steps=args.search_steps, is_server=True) else: ### start a client sa_nas = SANAS( config, - server_addr=("10.255.125.38", 8883), - init_temperature=args.init_temperature, - reduce_rate=args.reduce_rate, + server_addr=(server_address, port), + init_temperature=init_temperature, + reduce_rate=reduce_rate, search_steps=args.search_steps, is_server=False) @@ -87,7 +93,7 @@ def search_mobilenetv2(config, args, image_size, is_server=True): current_flops = flops(train_program) print('step: {}, current_flops: {}'.format(step, current_flops)) - if current_flops > args.max_flops: + if current_flops > max_flops: continue test_loader, test_avg_cost, test_acc_top1, test_acc_top5 = build_program( @@ -124,7 +130,6 @@ def search_mobilenetv2(config, args, image_size, is_server=True): batch_size=args.batch_size, drop_last=False) - #test_loader, _, _ = create_data_loader(image_shape) train_loader.set_sample_list_generator( train_reader, places=fluid.cuda_places() if args.use_gpu else fluid.cpu_places()) @@ -134,7 +139,7 @@ def search_mobilenetv2(config, args, image_size, is_server=True): train_compiled_program = fluid.CompiledProgram( train_program).with_data_parallel( loss_name=avg_cost.name, build_strategy=build_strategy) - for epoch_id in range(args.retain_epoch): + for epoch_id in range(retain_epoch): for batch_id, data in enumerate(train_loader()): fetches = [avg_cost.name] s_time = time.time() @@ -171,6 +176,99 @@ def search_mobilenetv2(config, args, image_size, is_server=True): sa_nas.reward(float(finally_reward[1])) +def test_search_result(tokens, image_size, args, config): + sa_nas = SANAS( + config, + server_addr=("", 8887), + init_temperature=args.init_temperature, + reduce_rate=args.reduce_rate, + search_steps=args.search_steps, + is_server=True) + + image_shape = [3, image_size, image_size] + + archs = sa_nas.tokens2arch(tokens) + + train_program = fluid.Program() + test_program = fluid.Program() + startup_program = fluid.Program() + train_loader, avg_cost, acc_top1, acc_top5 = build_program( + train_program, startup_program, image_shape, archs, args) + + current_flops = flops(train_program) + print('current_flops: {}'.format(current_flops)) + test_loader, test_avg_cost, test_acc_top1, test_acc_top5 = build_program( + test_program, startup_program, image_shape, archs, args, is_test=True) + + test_program = test_program.clone(for_test=True) + + place = fluid.CUDAPlace(0) if args.use_gpu else fluid.CPUPlace() + exe = fluid.Executor(place) + exe.run(startup_program) + + if args.data == 'cifar10': + train_reader = paddle.batch( + paddle.reader.shuffle( + paddle.dataset.cifar.train10(cycle=False), buf_size=1024), + batch_size=args.batch_size, + drop_last=True) + + test_reader = paddle.batch( + paddle.dataset.cifar.test10(cycle=False), + batch_size=args.batch_size, + drop_last=False) + elif args.data == 'imagenet': + train_reader = paddle.batch( + imagenet_reader.train(), + batch_size=args.batch_size, + drop_last=True) + test_reader = paddle.batch( + imagenet_reader.val(), batch_size=args.batch_size, drop_last=False) + + train_loader.set_sample_list_generator( + train_reader, + places=fluid.cuda_places() if args.use_gpu else fluid.cpu_places()) + test_loader.set_sample_list_generator(test_reader, places=place) + + build_strategy = fluid.BuildStrategy() + train_compiled_program = fluid.CompiledProgram( + train_program).with_data_parallel( + loss_name=avg_cost.name, build_strategy=build_strategy) + for epoch_id in range(retain_epoch): + for batch_id, data in enumerate(train_loader()): + fetches = [avg_cost.name] + s_time = time.time() + outs = exe.run(train_compiled_program, + feed=data, + fetch_list=fetches)[0] + batch_time = time.time() - s_time + if batch_id % 10 == 0: + _logger.info( + 'TRAIN: epoch: {}, batch: {}, cost: {}, batch_time: {}ms'. + format(epoch_id, batch_id, outs[0], batch_time)) + + reward = [] + for batch_id, data in enumerate(test_loader()): + test_fetches = [ + test_avg_cost.name, test_acc_top1.name, test_acc_top5.name + ] + batch_reward = exe.run(test_program, + feed=data, + fetch_list=test_fetches) + reward_avg = np.mean(np.array(batch_reward), axis=1) + reward.append(reward_avg) + + _logger.info( + 'TEST: batch: {}, avg_cost: {}, acc_top1: {}, acc_top5: {}'. + format(batch_id, batch_reward[0], batch_reward[1], + batch_reward[2])) + + finally_reward = np.mean(np.array(reward), axis=0) + _logger.info( + 'FINAL TEST: avg_cost: {}, acc_top1: {}, acc_top5: {}'.format( + finally_reward[0], finally_reward[1], finally_reward[2])) + + if __name__ == '__main__': parser = argparse.ArgumentParser( @@ -182,75 +280,25 @@ if __name__ == '__main__': help='Whether to use GPU in train/test model.') parser.add_argument( '--batch_size', type=int, default=256, help='batch size.') + parser.add_argument( + '--class_dim', type=int, default=1000, help='classify number.') parser.add_argument( '--data', type=str, default='cifar10', choices=['cifar10', 'imagenet'], help='server address.') - # controller - parser.add_argument( - '--reduce_rate', type=float, default=0.85, help='reduce rate.') - parser.add_argument( - '--init_temperature', - type=float, - default=10.24, - help='init temperature.') parser.add_argument( '--is_server', type=ast.literal_eval, default=True, help='Whether to start a server.') - # nas args - parser.add_argument( - '--max_flops', type=int, default=592948064, help='reduce rate.') - parser.add_argument( - '--retain_epoch', type=int, default=5, help='train epoch before val.') - parser.add_argument( - '--end_epoch', type=int, default=500, help='end epoch present client.') parser.add_argument( '--search_steps', type=int, default=100, help='controller server number.') - parser.add_argument( - '--server_address', type=str, default=None, help='server address.') - # optimizer args - parser.add_argument( - '--lr_strategy', - type=str, - default='piecewise_decay', - help='learning rate decay strategy.') parser.add_argument('--lr', type=float, default=0.1, help='learning rate.') - parser.add_argument( - '--l2_decay', type=float, default=1e-4, help='learning rate decay.') - parser.add_argument( - '--step_epochs', - nargs='+', - type=int, - default=[30, 60, 90], - help="piecewise decay step") - parser.add_argument( - '--momentum_rate', - type=float, - default=0.9, - help='learning rate decay.') - parser.add_argument( - '--warm_up_epochs', - type=float, - default=5.0, - help='learning rate decay.') - parser.add_argument( - '--num_epochs', type=int, default=120, help='learning rate decay.') - parser.add_argument( - '--decay_epochs', type=float, default=2.4, help='learning rate decay.') - parser.add_argument( - '--decay_rate', type=float, default=0.97, help='learning rate decay.') - parser.add_argument( - '--total_images', - type=int, - default=1281167, - help='learning rate decay.') args = parser.parse_args() print(args) @@ -261,16 +309,10 @@ if __name__ == '__main__': image_size = 224 block_num = 6 else: - raise NotImplemented( + raise NotImplementedError( 'data must in [cifar10, imagenet], but received: {}'.format( args.data)) - config_info = { - 'input_size': image_size, - 'output_size': 1, - 'block_num': block_num, - 'block_mask': None - } - config = [('MobileNetV2Space', config_info)] + config = [('MobileNetV2Space')] search_mobilenetv2(config, args, image_size, is_server=args.is_server) diff --git a/demo/optimizer.py b/demo/optimizer.py index 73f441f897d22c10d2d6e05afaa7491b227b27d4..0f0c57985f839097e9e1ae4643ba2e5a2fb64698 100644 --- a/demo/optimizer.py +++ b/demo/optimizer.py @@ -23,6 +23,16 @@ import paddle.fluid.layers.ops as ops from paddle.fluid.initializer import init_on_cpu from paddle.fluid.layers.learning_rate_scheduler import _decay_step_counter +lr_strategy = 'cosine_decay' +l2_decay = 1e-4 +step_epochs = [30, 60, 90] +momentum_rate = 0.9 +warm_up_epochs = 5.0 +num_epochs = 120 +decay_epochs = 2.4 +decay_rate = 0.97 +total_images = 1281167 + def cosine_decay(learning_rate, step_each_epoch, epochs=120): """Applies cosine decay to the learning rate. @@ -152,15 +162,15 @@ class Optimizer(object): def __init__(self, args): self.batch_size = args.batch_size self.lr = args.lr - self.lr_strategy = args.lr_strategy - self.l2_decay = args.l2_decay - self.momentum_rate = args.momentum_rate - self.step_epochs = args.step_epochs - self.num_epochs = args.num_epochs - self.warm_up_epochs = args.warm_up_epochs - self.decay_epochs = args.decay_epochs - self.decay_rate = args.decay_rate - self.total_images = args.total_images + self.lr_strategy = lr_strategy + self.l2_decay = l2_decay + self.momentum_rate = momentum_rate + self.step_epochs = step_epochs + self.num_epochs = num_epochs + self.warm_up_epochs = warm_up_epochs + self.decay_epochs = decay_epochs + self.decay_rate = decay_rate + self.total_images = total_images self.step = int(math.ceil(float(self.total_images) / self.batch_size)) @@ -295,6 +305,6 @@ class Optimizer(object): def create_optimizer(args): Opt = Optimizer(args) - optimizer = getattr(Opt, args.lr_strategy)() + optimizer = getattr(Opt, lr_strategy)() return optimizer diff --git a/paddleslim/nas/nas_api.md b/paddleslim/nas/nas_api.md new file mode 100644 index 0000000000000000000000000000000000000000..68b6fef39fc58ff4ae340b95c1279fabdf2c54f7 --- /dev/null +++ b/paddleslim/nas/nas_api.md @@ -0,0 +1,182 @@ +# paddleslim.nas API文档 + +## SANAS API文档 + +## class SANAS +SANAS(Simulated Annealing Neural Architecture Search)是基于模拟退火算法进行模型结构搜索的算法,一般用于离散搜索任务。 + +--- + +>paddleslim.nas.SANAS(configs, server_addr, init_temperature, reduce_rate, search_steps, save_checkpoint, load_checkpoint, is_server) + +**参数:** +- **configs(list):** 搜索空间配置列表,格式是`[(key, {input_size, output_size, block_num, block_mask})]`或者`[(key)]`(MobileNetV2、MobilenetV1和ResNet的搜索空间使用和原本网络结构相同的搜索空间,所以仅需指定`key`即可), `input_size` 和`output_size`表示输入和输出的特征图的大小,`block_num`是指搜索网络中的block数量,`block_mask`是一组由0和1组成的列表,0代表不进行下采样的block,1代表下采样的block。 更多paddleslim提供的搜索空间配置可以参考。 +- **server_addr(tuple):** SANAS的地址,包括server的ip地址和端口号,如果ip地址为None或者为""的话则默认使用本机ip。默认:("", 8881)。 +- **init_temperature(float):** 基于模拟退火进行搜索的初始温度。默认:100。 +- **reduce_rate(float):** 基于模拟退火进行搜索的衰减率。默认:0.85。 +- **search_steps(int):** 搜索过程迭代的次数。默认:300。 +- **save_checkpoint(str|None):** 保存checkpoint的文件目录,如果设置为None的话则不保存checkpoint。默认:`./nas_checkpoint`。 +- **load_checkpoint(str|None):** 加载checkpoint的文件目录,如果设置为None的话则不加载checkpoint。默认:None。 +- **is_server(bool):** 当前实例是否要启动一个server。默认:True。 + +**返回:** +一个SANAS类的实例 + +**示例代码:** +``` +from paddleslim.nas import SANAS +config = [('MobileNetV2Space')] +sanas = SANAS(config=config) +``` + +--- + +>tokens2arch(tokens) +通过一组token得到实际的模型结构,一般用来把搜索到最优的token转换为模型结构用来做最后的训练。 + +**参数:** +- **tokens(list):** 一组token。 + +**返回** +返回一个模型结构实例。 + +**示例代码:** +``` +import paddle.fluid as fluid +input = fluid.data(name='input', shape=[None, 3, 32, 32], dtype='float32') +archs = sanas.token2arch(tokens) +for arch in archs: + output = arch(input) + input = output +``` +--- + +>next_archs(): +获取下一组模型结构。 + +**返回** +返回模型结构实例的列表,形式为list。 + +**示例代码:** +``` +import paddle.fluid as fluid +input = fluid.data(name='input', shape=[None, 3, 32, 32], dtype='float32') +archs = sanas.next_archs() +for arch in archs: + output = arch(input) + input = output +``` + +--- + +>reward(score): +把当前模型结构的得分情况回传。 + +**参数:** +**score:** 当前模型的得分,分数越大越好。 + +**返回** +模型结构更新成功或者失败,成功则返回`True`,失败则返回`False`。 + + +**代码示例** +```python +import numpy as np +import paddle +import paddle.fluid as fluid +from paddleslim.nas import SANAS +from paddleslim.analysis import flops + +max_flops = 321208544 +batch_size = 256 + +# 搜索空间配置 +config=[('MobileNetV2Space')] + +# 实例化SANAS +sa_nas = SANAS(config, server_addr=("", 8887), init_temperature=10.24, reduce_rate=0.85, search_steps=100, is_server=True) + +for step in range(100): + archs = sa_nas.next_archs() + train_program = fluid.Program() + test_program = fluid.Program() + startup_program = fluid.Program() + ### 构造训练program + with fluid.program_guard(train_program, startup_program): + image = fluid.data(name='image', shape=[None, 3, 32, 32], dtype='float32') + label = fluid.data(name='label', shape=[None, 1], dtype='int64') + + for arch in archs: + output = arch(image) + out = fluid.layers.fc(output, size=10, act="softmax") + softmax_out = fluid.layers.softmax(input=out, use_cudnn=False) + cost = fluid.layers.cross_entropy(input=softmax_out, label=label) + avg_cost = fluid.layers.mean(cost) + acc_top1 = fluid.layers.accuracy(input=softmax_out, label=label, k=1) + + ### 构造测试program + test_program = train_program.clone(for_test=True) + ### 定义优化器 + sgd = fluid.optimizer.SGD(learning_rate=1e-3) + sgd.minimize(avg_cost) + + + ### 增加限制条件,如果没有则进行无限制搜索 + if flops(train_program) > max_flops: + continue + + ### 定义代码是在cpu上运行 + place = fluid.CPUPlace() + exe = fluid.Executor(place) + exe.run(startup_program) + + ### 定义训练输入数据 + train_reader = paddle.batch( + paddle.reader.shuffle( + paddle.dataset.cifar.train10(cycle=False), buf_size=1024), + batch_size=batch_size, + drop_last=True) + + ### 定义预测输入数据 + test_reader = paddle.batch( + paddle.dataset.cifar.test10(cycle=False), + batch_size=batch_size, + drop_last=False) + train_feeder = fluid.DataFeeder([image, label], place, program=train_program) + test_feeder = fluid.DataFeeder([image, label], place, program=test_program) + + + ### 开始训练,每个搜索结果训练5个epoch + for epoch_id in range(5): + for batch_id, data in enumerate(train_reader()): + fetches = [avg_cost.name] + outs = exe.run(train_program, + feed=train_feeder.feed(data), + fetch_list=fetches)[0] + if batch_id % 10 == 0: + print('TRAIN: steps: {}, epoch: {}, batch: {}, cost: {}'.format(step, epoch_id, batch_id, outs[0])) + + ### 开始预测,得到最终的测试结果作为score回传给sa_nas + reward = [] + for batch_id, data in enumerate(test_reader()): + test_fetches = [ + avg_cost.name, acc_top1.name + ] + batch_reward = exe.run(test_program, + feed=test_feeder.feed(data), + fetch_list=test_fetches) + reward_avg = np.mean(np.array(batch_reward), axis=1) + reward.append(reward_avg) + + print('TEST: step: {}, batch: {}, avg_cost: {}, acc_top1: {}'. + format(step, batch_id, batch_reward[0],batch_reward[1])) + + finally_reward = np.mean(np.array(reward), axis=0) + print( + 'FINAL TEST: avg_cost: {}, acc_top1: {}'.format( + finally_reward[0], finally_reward[1])) + + ### 回传score + sa_nas.reward(float(finally_reward[1])) + +``` diff --git a/paddleslim/nas/sa_nas.py b/paddleslim/nas/sa_nas.py index 7eced9c546a57ad49625a707b06e422426c19e92..34ec47b8ae7a08d9242b66ef4210db294017bc0e 100644 --- a/paddleslim/nas/sa_nas.py +++ b/paddleslim/nas/sa_nas.py @@ -40,21 +40,19 @@ class SANAS(object): init_temperature=100, reduce_rate=0.85, search_steps=300, - key="sa_nas", save_checkpoint='nas_checkpoint', load_checkpoint=None, is_server=False): """ Search a group of ratios used to prune program. Args: - configs(list): A list of search space configuration with format (key, input_size, output_size, block_num). + configs(list): A list of search space configuration with format [(key, {input_size, output_size, block_num, block_mask})]. `key` is the name of search space with data type str. `input_size` and `output_size` are - input size and output size of searched sub-network. `block_num` is the number of blocks in searched network. + input size and output size of searched sub-network. `block_num` is the number of blocks in searched network, `block_mask` is a list consists by 0 and 1, 0 means normal block, 1 means reduction block. server_addr(tuple): A tuple of server ip and server port for controller server. init_temperature(float): The init temperature used in simulated annealing search strategy. reduce_rate(float): The decay rate used in simulated annealing search strategy. search_steps(int): The steps of searching. - key(str): Identity used in communication between controller server and clients. save_checkpoint(string|None): The directory of checkpoint to save, if set to None, not save checkpoint. Default: 'nas_checkpoint'. load_checkpoint(string|None): The directory of checkpoint to load, if set to None, not load checkpoint. Default: None. is_server(bool): Whether current host is controller server. Default: True. diff --git a/paddleslim/nas/search_space/__init__.py b/paddleslim/nas/search_space/__init__.py index 166406d89070e484a11331c712097c44ea9838e0..9556c61917406ab461fb7a0dbb071b864c5ab357 100644 --- a/paddleslim/nas/search_space/__init__.py +++ b/paddleslim/nas/search_space/__init__.py @@ -15,11 +15,16 @@ from .mobilenetv2 import MobileNetV2Space from .mobilenetv1 import MobileNetV1Space from .resnet import ResNetSpace +from .mobilenet_block import MobileNetV1BlockSpace, MobileNetV2BlockSpace +from .resnet_block import ResNetBlockSpace +from .inception_block import InceptionABlockSpace, InceptionCBlockSpace from .search_space_registry import SEARCHSPACE from .search_space_factory import SearchSpaceFactory from .search_space_base import SearchSpaceBase __all__ = [ - 'MobileNetV1Space', 'MobileNetV2Space', 'ResNetSpace', 'SearchSpaceBase', + 'MobileNetV1Space', 'MobileNetV2Space', 'ResNetSpace', + 'MobileNetV1BlockSpace', 'MobileNetV2BlockSpace', 'ResNetBlockSpace', + 'InceptionABlockSpace', 'InceptionCBlockSpace', 'SearchSpaceBase', 'SearchSpaceFactory', 'SEARCHSPACE' ] diff --git a/paddleslim/nas/search_space/combine_search_space.py b/paddleslim/nas/search_space/combine_search_space.py index 17ebbd3939798ad0e2a7d3fd763bb9427f6e13f0..7bb66c00c663cfba75dcc429e5ca53270e58bed7 100644 --- a/paddleslim/nas/search_space/combine_search_space.py +++ b/paddleslim/nas/search_space/combine_search_space.py @@ -19,12 +19,16 @@ from __future__ import print_function import numpy as np import paddle.fluid as fluid from paddle.fluid.param_attr import ParamAttr +import logging +from ...common import get_logger from .search_space_base import SearchSpaceBase from .search_space_registry import SEARCHSPACE from .base_layer import conv_bn_layer __all__ = ["CombineSearchSpace"] +_logger = get_logger(__name__, level=logging.INFO) + class CombineSearchSpace(object): """ @@ -37,7 +41,15 @@ class CombineSearchSpace(object): self.lens = len(config_lists) self.spaces = [] for config_list in config_lists: - key, config = config_list + if isinstance(config_list, tuple): + key, config = config_list + elif isinstance(config_list, str): + key = config_list + config = None + else: + raise NotImplementedError( + 'the type of config is Error!!! Please check the config information. Receive the type of config is {}'. + format(type(config_list))) self.spaces.append(self._get_single_search_space(key, config)) self.init_tokens() @@ -52,11 +64,37 @@ class CombineSearchSpace(object): model space(class) """ cls = SEARCHSPACE.get(key) - block_mask = config['block_mask'] if 'block_mask' in config else None - space = cls(config['input_size'], - config['output_size'], - config['block_num'], - block_mask=block_mask) + assert cls != None, '{} is NOT a correct space, the space we support is {}'.format( + key, SEARCHSPACE) + + if config is None: + block_mask = None + input_size = None + output_size = None + block_num = None + else: + if 'Block' not in cls.__name__: + _logger.warn( + 'if space is not a Block space, config is useless, current space is {}'. + format(cls.__name__)) + + block_mask = config[ + 'block_mask'] if 'block_mask' in config else None + input_size = config[ + 'input_size'] if 'input_size' in config else None + output_size = config[ + 'output_size'] if 'output_size' in config else None + block_num = config['block_num'] if 'block_num' in config else None + + if 'Block' in cls.__name__: + if block_mask == None and (block_num == None or + input_size == None or + output_size == None): + raise NotImplementedError( + "block_mask or (block num and input_size and output_size) can NOT be None at the same time in Block SPACE!" + ) + + space = cls(input_size, output_size, block_num, block_mask=block_mask) return space def init_tokens(self): diff --git a/paddleslim/nas/search_space/inception_block.py b/paddleslim/nas/search_space/inception_block.py new file mode 100644 index 0000000000000000000000000000000000000000..5b022d85eb9b4d80c3377bf074d645553bf67f80 --- /dev/null +++ b/paddleslim/nas/search_space/inception_block.py @@ -0,0 +1,519 @@ +# Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License" +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +from __future__ import absolute_import +from __future__ import division +from __future__ import print_function + +import numpy as np +import paddle.fluid as fluid +from paddle.fluid.param_attr import ParamAttr +from .search_space_base import SearchSpaceBase +from .base_layer import conv_bn_layer +from .search_space_registry import SEARCHSPACE +from .utils import compute_downsample_num, check_points + +__all__ = ["InceptionABlockSpace", "InceptionCBlockSpace"] +### TODO add asymmetric kernel of conv when paddle-lite support +### inceptionB is same as inceptionA if asymmetric kernel is not support + + +@SEARCHSPACE.register +class InceptionABlockSpace(SearchSpaceBase): + def __init__(self, input_size, output_size, block_num, block_mask): + super(InceptionABlockSpace, self).__init__(input_size, output_size, + block_num, block_mask) + if self.block_mask == None: + # use input_size and output_size to compute self.downsample_num + self.downsample_num = compute_downsample_num(self.input_size, + self.output_size) + if self.block_num != None: + assert self.downsample_num <= self.block_num, 'downsample numeber must be LESS THAN OR EQUAL TO block_num, but NOW: downsample numeber is {}, block_num is {}'.format( + self.downsample_num, self.block_num) + + ### self.filter_num means filter nums + self.filter_num = np.array([ + 3, 4, 8, 12, 16, 24, 32, 48, 64, 80, 96, 128, 144, 160, 192, 224, + 256, 320, 384, 448, 480, 512, 1024 + ]) + ### self.k_size means kernel_size + self.k_size = np.array([3, 5]) + ### self.pool_type means pool type, 0 means avg, 1 means max + self.pool_type = np.array([0, 1]) + ### self.repeat means repeat of 1x1 conv in branch of inception + ### self.repeat = np.array([0,1]) + + def init_tokens(self): + """ + The initial token. + """ + if self.block_mask != None: + return [0] * (len(self.block_mask) * 9) + else: + return [0] * (self.block_num * 9) + + def range_table(self): + """ + Get range table of current search space, constrains the range of tokens. + """ + range_table_base = [] + if self.block_mask != None: + range_table_length = len(self.block_mask) + else: + range_table_length = self.block_num + + for i in range(range_table_length): + range_table_base.append(len(self.filter_num)) + range_table_base.append(len(self.filter_num)) + range_table_base.append(len(self.filter_num)) + range_table_base.append(len(self.filter_num)) + range_table_base.append(len(self.filter_num)) + range_table_base.append(len(self.filter_num)) + range_table_base.append(len(self.filter_num)) + range_table_base.append(len(self.k_size)) + range_table_base.append(len(self.pool_type)) + + return range_table_base + + def token2arch(self, tokens=None): + """ + return net_arch function + """ + #assert self.block_num + if tokens is None: + tokens = self.init_tokens() + + self.bottleneck_params_list = [] + if self.block_mask != None: + for i in range(len(self.block_mask)): + self.bottleneck_params_list.append( + (self.filter_num[tokens[i * 9]], + self.filter_num[tokens[i * 9 + 1]], + self.filter_num[tokens[i * 9 + 2]], + self.filter_num[tokens[i * 9 + 3]], + self.filter_num[tokens[i * 9 + 4]], + self.filter_num[tokens[i * 9 + 5]], + self.filter_num[tokens[i * 9 + 6]], + self.k_size[tokens[i * 9 + 7]], 2 if self.block_mask == 1 + else 1, self.pool_type[tokens[i * 9 + 8]])) + else: + repeat_num = int(self.block_num / self.downsample_num) + num_minus = self.block_num % self.downsample_num + ### if block_num > downsample_num, add stride=1 block at last (block_num-downsample_num) layers + for i in range(self.downsample_num): + self.bottleneck_params_list.append( + (self.filter_num[tokens[i * 9]], + self.filter_num[tokens[i * 9 + 1]], + self.filter_num[tokens[i * 9 + 2]], + self.filter_num[tokens[i * 9 + 3]], + self.filter_num[tokens[i * 9 + 4]], + self.filter_num[tokens[i * 9 + 5]], + self.filter_num[tokens[i * 9 + 6]], + self.k_size[tokens[i * 9 + 7]], 2, + self.pool_type[tokens[i * 9 + 8]])) + ### if block_num / downsample_num > 1, add (block_num / downsample_num) times stride=1 block + for k in range(repeat_num - 1): + kk = k * self.downsample_num + i + self.bottleneck_params_list.append( + (self.filter_num[tokens[kk * 9]], + self.filter_num[tokens[kk * 9 + 1]], + self.filter_num[tokens[kk * 9 + 2]], + self.filter_num[tokens[kk * 9 + 3]], + self.filter_num[tokens[kk * 9 + 4]], + self.filter_num[tokens[kk * 9 + 5]], + self.filter_num[tokens[kk * 9 + 6]], + self.k_size[tokens[kk * 9 + 7]], 1, + self.pool_type[tokens[kk * 9 + 8]])) + + if self.downsample_num - i <= num_minus: + j = self.downsample_num * (repeat_num - 1) + i + self.bottleneck_params_list.append( + (self.filter_num[tokens[j * 9]], + self.filter_num[tokens[j * 9 + 1]], + self.filter_num[tokens[j * 9 + 2]], + self.filter_num[tokens[j * 9 + 3]], + self.filter_num[tokens[j * 9 + 4]], + self.filter_num[tokens[j * 9 + 5]], + self.filter_num[tokens[j * 9 + 6]], + self.k_size[tokens[j * 9 + 7]], 1, + self.pool_type[tokens[j * 9 + 8]])) + + if self.downsample_num == 0 and self.block_num != 0: + for i in range(len(self.block_num)): + self.bottleneck_params_list.append( + (self.filter_num[tokens[i * 9]], + self.filter_num[tokens[i * 9 + 1]], + self.filter_num[tokens[i * 9 + 2]], + self.filter_num[tokens[i * 9 + 3]], + self.filter_num[tokens[i * 9 + 4]], + self.filter_num[tokens[i * 9 + 5]], + self.filter_num[tokens[i * 9 + 6]], + self.k_size[tokens[i * 9 + 7]], 1, + self.pool_type[tokens[i * 9 + 8]])) + + def net_arch(input, return_mid_layer=False, return_block=None): + layer_count = 0 + mid_layer = dict() + for i, layer_setting in enumerate(self.bottleneck_params_list): + filter_nums = layer_setting[0:7] + filter_size = layer_setting[7] + stride = layer_setting[8] + pool_type = 'avg' if layer_setting[9] == 0 else 'max' + if stride == 2: + layer_count += 1 + if check_points((layer_count - 1), return_block): + mid_layer[layer_count - 1] = input + + input = self._inceptionA( + input, + A_tokens=filter_nums, + filter_size=filter_size, + stride=stride, + pool_type=pool_type, + name='inceptionA_{}'.format(i + 1)) + + if return_mid_layer: + return input, mid_layer + else: + return input, + + return net_arch + + def _inceptionA(self, + data, + A_tokens, + filter_size, + stride, + pool_type, + name=None): + pool1 = fluid.layers.pool2d( + input=data, + pool_size=filter_size, + pool_padding='SAME', + pool_type=pool_type, + name=name + '_pool2d') + conv1 = conv_bn_layer( + input=pool1, + filter_size=1, + num_filters=A_tokens[0], + stride=stride, + act='relu', + name=name + '_conv1') + + conv2 = conv_bn_layer( + input=data, + filter_size=1, + num_filters=A_tokens[1], + stride=stride, + act='relu', + name=name + '_conv2') + + conv3 = conv_bn_layer( + input=data, + filter_size=1, + num_filters=A_tokens[2], + stride=1, + act='relu', + name=name + '_conv3_1') + conv3 = conv_bn_layer( + input=conv3, + filter_size=filter_size, + num_filters=A_tokens[3], + stride=stride, + act='relu', + name=name + '_conv3_2') + + conv4 = conv_bn_layer( + input=data, + filter_size=1, + num_filters=A_tokens[4], + stride=1, + act='relu', + name=name + '_conv4_1') + conv4 = conv_bn_layer( + input=conv4, + filter_size=filter_size, + num_filters=A_tokens[5], + stride=1, + act='relu', + name=name + '_conv4_2') + conv4 = conv_bn_layer( + input=conv4, + filter_size=filter_size, + num_filters=A_tokens[6], + stride=stride, + act='relu', + name=name + '_conv4_3') + + concat = fluid.layers.concat( + [conv1, conv2, conv3, conv4], axis=1, name=name + '_concat') + return concat + + +@SEARCHSPACE.register +class InceptionCBlockSpace(SearchSpaceBase): + def __init__(self, input_size, output_size, block_num, block_mask): + super(InceptionCBlockSpace, self).__init__(input_size, output_size, + block_num, block_mask) + if self.block_mask == None: + # use input_size and output_size to compute self.downsample_num + self.downsample_num = compute_downsample_num(self.input_size, + self.output_size) + if self.block_num != None: + assert self.downsample_num <= self.block_num, 'downsample numeber must be LESS THAN OR EQUAL TO block_num, but NOW: downsample numeber is {}, block_num is {}'.format( + self.downsample_num, self.block_num) + + ### self.filter_num means filter nums + self.filter_num = np.array([ + 3, 4, 8, 12, 16, 24, 32, 48, 64, 80, 96, 128, 144, 160, 192, 224, + 256, 320, 384, 448, 480, 512, 1024 + ]) + ### self.k_size means kernel_size + self.k_size = np.array([3, 5]) + ### self.pool_type means pool type, 0 means avg, 1 means max + self.pool_type = np.array([0, 1]) + ### self.repeat means repeat of 1x1 conv in branch of inception + ### self.repeat = np.array([0,1]) + + def init_tokens(self): + """ + The initial token. + """ + if self.block_mask != None: + return [0] * (len(self.block_mask) * 11) + else: + return [0] * (self.block_num * 11) + + def range_table(self): + """ + Get range table of current search space, constrains the range of tokens. + """ + range_table_base = [] + if self.block_mask != None: + range_table_length = len(self.block_mask) + else: + range_table_length = self.block_num + + for i in range(range_table_length): + range_table_base.append(len(self.filter_num)) + range_table_base.append(len(self.filter_num)) + range_table_base.append(len(self.filter_num)) + range_table_base.append(len(self.filter_num)) + range_table_base.append(len(self.filter_num)) + range_table_base.append(len(self.filter_num)) + range_table_base.append(len(self.filter_num)) + range_table_base.append(len(self.k_size)) + range_table_base.append(len(self.pool_type)) + + return range_table_base + + def token2arch(self, tokens=None): + """ + return net_arch function + """ + #assert self.block_num + if tokens is None: + tokens = self.init_tokens() + + self.bottleneck_params_list = [] + if self.block_mask != None: + for i in range(len(self.block_mask)): + self.bottleneck_params_list.append( + (self.filter_num[tokens[i * 11]], + self.filter_num[tokens[i * 11 + 1]], + self.filter_num[tokens[i * 11 + 2]], + self.filter_num[tokens[i * 11 + 3]], + self.filter_num[tokens[i * 11 + 4]], + self.filter_num[tokens[i * 11 + 5]], + self.filter_num[tokens[i * 11 + 6]], + self.filter_num[tokens[i * 11 + 7]], + self.filter_num[tokens[i * 11 + 8]], + self.k_size[tokens[i * 11 + 9]], 2 if self.block_mask == 1 + else 1, self.pool_type[tokens[i * 11 + 10]])) + else: + repeat_num = int(self.block_num / self.downsample_num) + num_minus = self.block_num % self.downsample_num + ### if block_num > downsample_num, add stride=1 block at last (block_num-downsample_num) layers + for i in range(self.downsample_num): + self.bottleneck_params_list.append( + (self.filter_num[tokens[i * 11]], + self.filter_num[tokens[i * 11 + 1]], + self.filter_num[tokens[i * 11 + 2]], + self.filter_num[tokens[i * 11 + 3]], + self.filter_num[tokens[i * 11 + 4]], + self.filter_num[tokens[i * 11 + 5]], + self.filter_num[tokens[i * 11 + 6]], + self.filter_num[tokens[i * 11 + 7]], + self.filter_num[tokens[i * 11 + 8]], + self.k_size[tokens[i * 11 + 9]], 2, + self.pool_type[tokens[i * 11 + 10]])) + ### if block_num / downsample_num > 1, add (block_num / downsample_num) times stride=1 block + for k in range(repeat_num - 1): + kk = k * self.downsample_num + i + self.bottleneck_params_list.append( + (self.filter_num[tokens[kk * 11]], + self.filter_num[tokens[kk * 11 + 1]], + self.filter_num[tokens[kk * 11 + 2]], + self.filter_num[tokens[kk * 11 + 3]], + self.filter_num[tokens[kk * 11 + 4]], + self.filter_num[tokens[kk * 11 + 5]], + self.filter_num[tokens[kk * 11 + 6]], + self.filter_num[tokens[kk * 11 + 7]], + self.filter_num[tokens[kk * 11 + 8]], + self.k_size[tokens[kk * 11 + 9]], 1, + self.pool_type[tokens[kk * 11 + 10]])) + + if self.downsample_num - i <= num_minus: + j = self.downsample_num * (repeat_num - 1) + i + self.bottleneck_params_list.append( + (self.filter_num[tokens[j * 11]], + self.filter_num[tokens[j * 11 + 1]], + self.filter_num[tokens[j * 11 + 2]], + self.filter_num[tokens[j * 11 + 3]], + self.filter_num[tokens[j * 11 + 4]], + self.filter_num[tokens[j * 11 + 5]], + self.filter_num[tokens[j * 11 + 6]], + self.filter_num[tokens[j * 11 + 7]], + self.filter_num[tokens[j * 11 + 8]], + self.k_size[tokens[j * 11 + 9]], 1, + self.pool_type[tokens[j * 11 + 10]])) + + if self.downsample_num == 0 and self.block_num != 0: + for i in range(len(self.block_num)): + self.bottleneck_params_list.append( + (self.filter_num[tokens[i * 11]], + self.filter_num[tokens[i * 11 + 1]], + self.filter_num[tokens[i * 11 + 2]], + self.filter_num[tokens[i * 11 + 3]], + self.filter_num[tokens[i * 11 + 4]], + self.filter_num[tokens[i * 11 + 5]], + self.filter_num[tokens[i * 11 + 6]], + self.filter_num[tokens[i * 11 + 7]], + self.filter_num[tokens[i * 11 + 8]], + self.k_size[tokens[i * 11 + 9]], 1, + self.pool_type[tokens[i * 11 + 10]])) + + def net_arch(input, return_mid_layer=False, return_block=None): + layer_count = 0 + mid_layer = dict() + for i, layer_setting in enumerate(self.bottleneck_params_list): + filter_nums = layer_setting[0:9] + filter_size = layer_setting[9] + stride = layer_setting[10] + pool_type = 'avg' if layer_setting[11] == 0 else 'max' + if stride == 2: + layer_count += 1 + if check_points((layer_count - 1) in return_block): + mid_layer[layer_count - 1] = input + + input = self._inceptionC( + input, + C_tokens=filter_nums, + filter_size=filter_size, + stride=stride, + pool_type=pool_type, + name='inceptionC_{}'.format(i + 1)) + + if return_mid_layer: + return input, mid_layer + else: + return input, + + return net_arch + + def _inceptionC(self, + data, + C_tokens, + filter_size, + stride, + pool_type, + name=None): + pool1 = fluid.layers.pool2d( + input=data, + pool_size=filter_size, + pool_padding='SAME', + pool_type=pool_type, + name=name + '_pool2d') + conv1 = conv_bn_layer( + input=pool1, + filter_size=1, + num_filters=C_tokens[0], + stride=stride, + act='relu', + name=name + '_conv1') + + conv2 = conv_bn_layer( + input=data, + filter_size=1, + num_filters=C_tokens[1], + stride=stride, + act='relu', + name=name + '_conv2') + + conv3 = conv_bn_layer( + input=data, + filter_size=1, + num_filters=C_tokens[2], + stride=1, + act='relu', + name=name + '_conv3_1') + conv3_1 = conv_bn_layer( + input=conv3, + filter_size=filter_size, + num_filters=C_tokens[3], + stride=stride, + act='relu', + name=name + '_conv3_2_1') + conv3_2 = conv_bn_layer( + input=conv3, + filter_size=filter_size, + num_filters=C_tokens[4], + stride=stride, + act='relu', + name=name + '_conv3_2_2') + + conv4 = conv_bn_layer( + input=data, + filter_size=1, + num_filters=C_tokens[5], + stride=1, + act='relu', + name=name + '_conv4_1') + conv4 = conv_bn_layer( + input=conv4, + filter_size=filter_size, + num_filters=C_tokens[6], + stride=1, + act='relu', + name=name + '_conv4_2') + conv4_1 = conv_bn_layer( + input=conv4, + filter_size=filter_size, + num_filters=C_tokens[7], + stride=stride, + act='relu', + name=name + '_conv4_3_1') + conv4_2 = conv_bn_layer( + input=conv4, + filter_size=filter_size, + num_filters=C_tokens[8], + stride=stride, + act='relu', + name=name + '_conv4_3_2') + + concat = fluid.layers.concat( + [conv1, conv2, conv3_1, conv3_2, conv4_1, conv4_2], + axis=1, + name=name + '_concat') + return concat diff --git a/paddleslim/nas/search_space/mobilenet_block.py b/paddleslim/nas/search_space/mobilenet_block.py new file mode 100644 index 0000000000000000000000000000000000000000..76597e3cdc0f2d613f39a51ed4dae81719c3ae78 --- /dev/null +++ b/paddleslim/nas/search_space/mobilenet_block.py @@ -0,0 +1,433 @@ +# Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License" +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +from __future__ import absolute_import +from __future__ import division +from __future__ import print_function + +import numpy as np +import paddle.fluid as fluid +from paddle.fluid.param_attr import ParamAttr +from .search_space_base import SearchSpaceBase +from .base_layer import conv_bn_layer +from .search_space_registry import SEARCHSPACE +from .utils import compute_downsample_num, check_points + +__all__ = ["MobileNetV1BlockSpace", "MobileNetV2BlockSpace"] + + +@SEARCHSPACE.register +class MobileNetV2BlockSpace(SearchSpaceBase): + def __init__(self, + input_size, + output_size, + block_num, + block_mask=None, + scale=1.0): + super(MobileNetV2BlockSpace, self).__init__(input_size, output_size, + block_num, block_mask) + + if self.block_mask == None: + # use input_size and output_size to compute self.downsample_num + self.downsample_num = compute_downsample_num(self.input_size, + self.output_size) + if self.block_num != None: + assert self.downsample_num <= self.block_num, 'downsample numeber must be LESS THAN OR EQUAL TO block_num, but NOW: downsample numeber is {}, block_num is {}'.format( + self.downsample_num, self.block_num) + + # self.filter_num means channel number + self.filter_num = np.array([ + 3, 4, 8, 12, 16, 24, 32, 48, 64, 80, 96, 128, 144, 160, 192, 224, + 256, 320, 384, 512 + ]) # 20 + # self.k_size means kernel size + self.k_size = np.array([3, 5]) #2 + # self.multiply means expansion_factor of each _inverted_residual_unit + self.multiply = np.array([1, 2, 3, 4, 5, 6]) #6 + # self.repeat means repeat_num _inverted_residual_unit in each _invresi_blocks + self.repeat = np.array([1, 2, 3, 4, 5, 6]) #6 + self.scale = scale + + def init_tokens(self): + if self.block_mask != None: + return [0] * (len(self.block_mask) * 4) + else: + return [0] * (self.block_num * 4) + + def range_table(self): + range_table_base = [] + if self.block_mask != None: + range_table_length = len(self.block_mask) + else: + range_table_length = self.block_num + + for i in range(range_table_length): + range_table_base.append(len(self.multiply)) + range_table_base.append(len(self.filter_num)) + range_table_base.append(len(self.repeat)) + range_table_base.append(len(self.k_size)) + + return range_table_base + + def token2arch(self, tokens=None): + """ + return mobilenetv2 net_arch function + """ + + if tokens == None: + tokens = self.init_tokens() + + self.bottleneck_params_list = [] + if self.block_mask != None: + for i in range(len(self.block_mask)): + self.bottleneck_params_list.append( + (self.multiply[tokens[i * 4]], + self.filter_num[tokens[i * 4 + 1]], + self.repeat[tokens[i * 4 + 2]], 2 + if self.block_mask[i] == 1 else 1, + self.k_size[tokens[i * 4 + 3]])) + else: + repeat_num = int(self.block_num / self.downsample_num) + num_minus = self.block_num % self.downsample_num + ### if block_num > downsample_num, add stride=1 block at last (block_num-downsample_num) layers + for i in range(self.downsample_num): + self.bottleneck_params_list.append( + (self.multiply[tokens[i * 4]], + self.filter_num[tokens[i * 4 + 1]], + self.repeat[tokens[i * 4 + 2]], 2, + self.k_size[tokens[i * 4 + 3]])) + + ### if block_num / downsample_num > 1, add (block_num / downsample_num) times stride=1 block + for k in range(repeat_num - 1): + kk = k * self.downsample_num + i + self.bottleneck_params_list.append( + (self.multiply[tokens[kk * 4]], + self.filter_num[tokens[kk * 4 + 1]], + self.repeat[tokens[kk * 4 + 2]], 1, + self.k_size[tokens[kk * 4 + 3]])) + + if self.downsample_num - i <= num_minus: + j = self.downsample_num * (repeat_num - 1) + i + self.bottleneck_params_list.append( + (self.multiply[tokens[j * 4]], + self.filter_num[tokens[j * 4 + 1]], + self.repeat[tokens[j * 4 + 2]], 1, + self.k_size[tokens[j * 4 + 3]])) + + if self.downsample_num == 0 and self.block_num != 0: + for i in range(len(self.block_num)): + self.bottleneck_params_list.append( + (self.multiply[tokens[i * 4]], + self.filter_num[tokens[i * 4 + 1]], + self.repeat[tokens[i * 4 + 2]], 1, + self.k_size[tokens[i * 4 + 3]])) + + def net_arch(input, return_mid_layer=False, return_block=None): + # all padding is 'SAME' in the conv2d, can compute the actual padding automatic. + # bottleneck sequences + in_c = int(32 * self.scale) + mid_layer = dict() + layer_count = 0 + depthwise_conv = None + + for i, layer_setting in enumerate(self.bottleneck_params_list): + t, c, n, s, k = layer_setting + + if s == 2: + layer_count += 1 + if check_points((layer_count - 1), return_block): + mid_layer[layer_count - 1] = depthwise_conv + + input, depthwise_conv = self._invresi_blocks( + input=input, + in_c=in_c, + t=t, + c=int(c * self.scale), + n=n, + s=s, + k=k, + name='mobilenetv2_' + str(i + 1)) + in_c = int(c * self.scale) + + if check_points(layer_count, return_block): + mid_layer[layer_count] = depthwise_conv + + if return_mid_layer: + return input, mid_layer + else: + return input, + + return net_arch + + def _shortcut(self, input, data_residual): + """Build shortcut layer. + Args: + input(Variable): input. + data_residual(Variable): residual layer. + Returns: + Variable, layer output. + """ + return fluid.layers.elementwise_add(input, data_residual) + + def _inverted_residual_unit(self, + input, + num_in_filter, + num_filters, + ifshortcut, + stride, + filter_size, + expansion_factor, + reduction_ratio=4, + name=None): + """Build inverted residual unit. + Args: + input(Variable), input. + num_in_filter(int), number of in filters. + num_filters(int), number of filters. + ifshortcut(bool), whether using shortcut. + stride(int), stride. + filter_size(int), filter size. + padding(str|int|list), padding. + expansion_factor(float), expansion factor. + name(str), name. + Returns: + Variable, layers output. + """ + num_expfilter = int(round(num_in_filter * expansion_factor)) + channel_expand = conv_bn_layer( + input=input, + num_filters=num_expfilter, + filter_size=1, + stride=1, + padding='SAME', + num_groups=1, + act='relu6', + name=name + '_expand') + + bottleneck_conv = conv_bn_layer( + input=channel_expand, + num_filters=num_expfilter, + filter_size=filter_size, + stride=stride, + padding='SAME', + num_groups=num_expfilter, + act='relu6', + name=name + '_dwise', + use_cudnn=False) + + depthwise_output = bottleneck_conv + + linear_out = conv_bn_layer( + input=bottleneck_conv, + num_filters=num_filters, + filter_size=1, + stride=1, + padding='SAME', + num_groups=1, + act=None, + name=name + '_linear') + out = linear_out + if ifshortcut: + out = self._shortcut(input=input, data_residual=out) + return out, depthwise_output + + def _invresi_blocks(self, input, in_c, t, c, n, s, k, name=None): + """Build inverted residual blocks. + Args: + input: Variable, input. + in_c: int, number of in filters. + t: float, expansion factor. + c: int, number of filters. + n: int, number of layers. + s: int, stride. + k: int, filter size. + name: str, name. + Returns: + Variable, layers output. + """ + first_block, depthwise_output = self._inverted_residual_unit( + input=input, + num_in_filter=in_c, + num_filters=c, + ifshortcut=False, + stride=s, + filter_size=k, + expansion_factor=t, + name=name + '_1') + + last_residual_block = first_block + last_c = c + + for i in range(1, n): + last_residual_block, depthwise_output = self._inverted_residual_unit( + input=last_residual_block, + num_in_filter=last_c, + num_filters=c, + ifshortcut=True, + stride=1, + filter_size=k, + expansion_factor=t, + name=name + '_' + str(i + 1)) + return last_residual_block, depthwise_output + + +@SEARCHSPACE.register +class MobileNetV1BlockSpace(SearchSpaceBase): + def __init__(self, + input_size, + output_size, + block_num, + block_mask=None, + scale=1.0): + super(MobileNetV1BlockSpace, self).__init__(input_size, output_size, + block_num, block_mask) + # use input_size and output_size to compute self.downsample_num + self.downsample_num = compute_downsample_num(self.input_size, + self.output_size) + if self.block_num != None: + assert self.downsample_num <= self.block_num, 'downsample numeber must be LESS THAN OR EQUAL TO block_num, but NOW: downsample numeber is {}, block_num is {}'.format( + self.downsample_num, self.block_num) + + # self.filter_num means channel number + self.filter_num = np.array([ + 3, 4, 8, 12, 16, 24, 32, 48, 64, 80, 96, 128, 144, 160, 192, 224, + 256, 320, 384, 512, 576, 640, 768, 1024, 1048 + ]) + self.k_size = np.array([3, 5]) + self.scale = scale + + def init_tokens(self): + if self.block_mask != None: + return [0] * (len(self.block_mask) * 3) + else: + return [0] * (self.block_num * 3) + + def range_table(self): + range_table_base = [] + if self.block_mask != None: + for i in range(len(self.block_mask)): + range_table_base.append(len(self.filter_num)) + range_table_base.append(len(self.filter_num)) + range_table_base.append(len(self.k_size)) + else: + for i in range(self.block_num): + range_table_base.append(len(self.filter_num)) + range_table_base.append(len(self.filter_num)) + range_table_base.append(len(self.k_size)) + + return range_table_base + + def token2arch(self, tokens=None): + if tokens == None: + tokens = self.init_tokens() + + self.bottleneck_params_list = [] + if self.block_mask != None: + for i in range(len(self.block_mask)): + self.bottleneck_params_list.append( + (self.filter_num[tokens[i * 3]], + self.filter_num[tokens[i * 3 + 1]], 2 + if self.block_mask[i] == 1 else 1, + self.k_size[tokens[i * 3 + 2]])) + else: + repeat_num = int(self.block_num / self.downsample_num) + num_minus = self.block_num % self.downsample_num + for i in range(self.downsample_num): + ### if block_num > downsample_num, add stride=1 block at last (block_num-downsample_num) layers + self.bottleneck_params_list.append( + (self.filter_num[tokens[i * 3]], + self.filter_num[tokens[i * 3 + 1]], 2, + self.k_size[tokens[i * 3 + 2]])) + + ### if block_num / downsample_num > 1, add (block_num / downsample_num) times stride=1 block + for k in range(repeat_num - 1): + kk = k * self.downsample_num + i + self.bottleneck_params_list.append( + (self.filter_num[tokens[kk * 3]], + self.filter_num[tokens[kk * 3 + 1]], 1, + self.k_size[tokens[kk * 3 + 2]])) + + if self.downsample_num - i <= num_minus: + j = self.downsample_num * (repeat_num - 1) + i + self.bottleneck_params_list.append( + (self.filter_num[tokens[j * 3]], + self.filter_num[tokens[j * 3 + 1]], 1, + self.k_size[tokens[j * 3 + 2]])) + + if self.downsample_num == 0 and self.block_num != 0: + for i in range(len(self.block_num)): + self.bottleneck_params_list.append( + (self.filter_num[tokens[i * 3]], + self.filter_num[tokens[i * 3 + 1]], 1, + self.k_size[tokens[i * 3 + 2]])) + + def net_arch(input, return_mid_layer=False, return_block=None): + mid_layer = dict() + layer_count = 0 + + for i, layer_setting in enumerate(self.bottleneck_params_list): + filter_num1, filter_num2, stride, kernel_size = layer_setting + if stride == 2: + layer_count += 1 + if check_points((layer_count - 1), return_block): + mid_layer[layer_count - 1] = input + + input = self._depthwise_separable( + input=input, + num_filters1=filter_num1, + num_filters2=filter_num2, + stride=stride, + scale=self.scale, + kernel_size=kernel_size, + name='mobilenetv1_{}'.format(str(i + 1))) + + if return_mid_layer: + return input, mid_layer + else: + return input, + + return net_arch + + def _depthwise_separable(self, + input, + num_filters1, + num_filters2, + stride, + scale, + kernel_size, + name=None): + num_groups = input.shape[1] + + s_oc = int(num_filters1 * scale) + if s_oc > num_groups: + output_channel = s_oc - (s_oc % num_groups) + else: + output_channel = num_groups + + depthwise_conv = conv_bn_layer( + input=input, + filter_size=kernel_size, + num_filters=output_channel, + stride=stride, + num_groups=num_groups, + use_cudnn=False, + name=name + '_dw') + pointwise_conv = conv_bn_layer( + input=depthwise_conv, + filter_size=1, + num_filters=int(num_filters2 * scale), + stride=1, + name=name + '_sep') + + return pointwise_conv diff --git a/paddleslim/nas/search_space/mobilenetv1.py b/paddleslim/nas/search_space/mobilenetv1.py index 3976d21df1e3ad2c5ac344dab59ad32adeaedb79..4a931c6f0013c8c3dd0ef4a7bc47fd2d6da718ad 100644 --- a/paddleslim/nas/search_space/mobilenetv1.py +++ b/paddleslim/nas/search_space/mobilenetv1.py @@ -22,24 +22,16 @@ from paddle.fluid.param_attr import ParamAttr from .search_space_base import SearchSpaceBase from .base_layer import conv_bn_layer from .search_space_registry import SEARCHSPACE +from .utils import check_points __all__ = ["MobileNetV1Space"] @SEARCHSPACE.register class MobileNetV1Space(SearchSpaceBase): - def __init__(self, - input_size, - output_size, - block_num, - block_mask, - scale=1.0, - class_dim=1000): + def __init__(self, input_size, output_size, block_num, block_mask): super(MobileNetV1Space, self).__init__(input_size, output_size, block_num, block_mask) - assert self.block_mask == None, 'MobileNetV1Space will use origin MobileNetV1 as seach space, so use input_size, output_size and block_num to search' - self.scale = scale - self.class_dim = class_dim # self.head_num means the channel of first convolution self.head_num = np.array([3, 4, 8, 12, 16, 24, 32]) # 7 # self.filter_num1 ~ self.filtet_num9 means channel of the following convolution @@ -67,9 +59,6 @@ class MobileNetV1Space(SearchSpaceBase): # self.repeat means repeat_num in forth downsample self.repeat = np.array([1, 2, 3, 4, 5, 6]) #6 - assert self.block_num < 6, 'MobileNetV1: block number must less than 6, but receive block number is {}'.format( - self.block_num) - def init_tokens(self): """ The initial token. @@ -90,11 +79,7 @@ class MobileNetV1Space(SearchSpaceBase): 8, 10, 0, # 512, 1024, 3 10, 10, 0] # 1024, 1024, 3 # yapf: enable - if self.block_num < 5: - self.token_len = 1 + (self.block_num * 2 - 1) * 3 - else: - self.token_len = 2 + (self.block_num * 2 - 1) * 3 - return base_init_tokens[:self.token_len] + return base_init_tokens def range_table(self): """ @@ -113,65 +98,92 @@ class MobileNetV1Space(SearchSpaceBase): len(self.filter_num8), len(self.filter_num9), len(self.k_size), len(self.filter_num9), len(self.filter_num9), len(self.k_size)] # yapf: enable - return base_range_table[:self.token_len] + return base_range_table def token2arch(self, tokens=None): if tokens is None: tokens = self.tokens() - bottleneck_param_list = [] - - if self.block_num >= 1: - # tokens[0] = 32 - # 32, 64 - bottleneck_param_list.append( - (self.filter_num1[tokens[1]], self.filter_num2[tokens[2]], 1, - self.k_size[tokens[3]])) - if self.block_num >= 2: - # 64 128 128 128 - bottleneck_param_list.append( - (self.filter_num2[tokens[4]], self.filter_num3[tokens[5]], 2, - self.k_size[tokens[6]])) - bottleneck_param_list.append( - (self.filter_num3[tokens[7]], self.filter_num4[tokens[8]], 1, - self.k_size[tokens[9]])) - if self.block_num >= 3: - # 128 256 256 256 - bottleneck_param_list.append( - (self.filter_num4[tokens[10]], self.filter_num5[tokens[11]], 2, - self.k_size[tokens[12]])) - bottleneck_param_list.append( - (self.filter_num5[tokens[13]], self.filter_num6[tokens[14]], 1, - self.k_size[tokens[15]])) - if self.block_num >= 4: - # 256 512 (512 512) * 5 - bottleneck_param_list.append( - (self.filter_num6[tokens[16]], self.filter_num7[tokens[17]], 2, - self.k_size[tokens[18]])) - for i in range(self.repeat[tokens[19]]): - bottleneck_param_list.append( - (self.filter_num7[tokens[20]], - self.filter_num8[tokens[21]], 1, self.k_size[tokens[22]])) - if self.block_num >= 5: - # 512 1024 1024 1024 - bottleneck_param_list.append( - (self.filter_num8[tokens[23]], self.filter_num9[tokens[24]], 2, - self.k_size[tokens[25]])) - bottleneck_param_list.append( - (self.filter_num9[tokens[26]], self.filter_num9[tokens[27]], 1, - self.k_size[tokens[28]])) - - def net_arch(input): + self.bottleneck_param_list = [] + + # tokens[0] = 32 + # 32, 64 + self.bottleneck_param_list.append( + (self.filter_num1[tokens[1]], self.filter_num2[tokens[2]], 1, + self.k_size[tokens[3]])) + # 64 128 128 128 + self.bottleneck_param_list.append( + (self.filter_num2[tokens[4]], self.filter_num3[tokens[5]], 2, + self.k_size[tokens[6]])) + self.bottleneck_param_list.append( + (self.filter_num3[tokens[7]], self.filter_num4[tokens[8]], 1, + self.k_size[tokens[9]])) + # 128 256 256 256 + self.bottleneck_param_list.append( + (self.filter_num4[tokens[10]], self.filter_num5[tokens[11]], 2, + self.k_size[tokens[12]])) + self.bottleneck_param_list.append( + (self.filter_num5[tokens[13]], self.filter_num6[tokens[14]], 1, + self.k_size[tokens[15]])) + # 256 512 (512 512) * 5 + self.bottleneck_param_list.append( + (self.filter_num6[tokens[16]], self.filter_num7[tokens[17]], 2, + self.k_size[tokens[18]])) + for i in range(self.repeat[tokens[19]]): + self.bottleneck_param_list.append( + (self.filter_num7[tokens[20]], self.filter_num8[tokens[21]], 1, + self.k_size[tokens[22]])) + # 512 1024 1024 1024 + self.bottleneck_param_list.append( + (self.filter_num8[tokens[23]], self.filter_num9[tokens[24]], 2, + self.k_size[tokens[25]])) + self.bottleneck_param_list.append( + (self.filter_num9[tokens[26]], self.filter_num9[tokens[27]], 1, + self.k_size[tokens[28]])) + + def _modify_bottle_params(output_stride=None): + if output_stride is not None and output_stride % 2 != 0: + raise Exception("output stride must to be even number") + if output_stride is None: + return + else: + stride = 2 + for i, layer_setting in enumerate(self.bottleneck_params_list): + f1, f2, s, ks = layer_setting + stride = stride * s + if stride > output_stride: + s = 1 + self.bottleneck_params_list[i] = (f1, f2, s, ks) + + def net_arch(input, + scale=1.0, + return_block=None, + end_points=None, + output_stride=None): + self.scale = scale + _modify_bottle_params(output_stride) + + decode_ends = dict() + input = conv_bn_layer( input=input, filter_size=3, num_filters=self.head_num[tokens[0]], stride=2, - name='mobilenetv1') + name='mobilenetv1_conv1') - for i, layer_setting in enumerate(bottleneck_param_list): + layer_count = 1 + for i, layer_setting in enumerate(self.bottleneck_param_list): filter_num1, filter_num2, stride, kernel_size = layer_setting + if stride == 2: + layer_count += 1 + ### return_block and end_points means block num + if check_points((layer_count - 1), return_block): + decode_ends[layer_count - 1] = input + + if check_points((layer_count - 1), end_points): + return input, decode_ends input = self._depthwise_separable( input=input, num_filters1=filter_num1, @@ -182,18 +194,15 @@ class MobileNetV1Space(SearchSpaceBase): kernel_size=kernel_size, name='mobilenetv1_{}'.format(str(i + 1))) - if self.output_size == 1: - print('NOTE: if output_size is 1, add fc layer in the end!!!') - input = fluid.layers.fc( - input=input, - size=self.class_dim, - param_attr=ParamAttr(name='mobilenetv2_fc_weights'), - bias_attr=ParamAttr(name='mobilenetv2_fc_offset')) - else: - assert self.output_size == input.shape[2], \ - ("output_size must EQUAL to input_size / (2^block_num)." - "But receive input_size={}, output_size={}, block_num={}".format( - self.input_size, self.output_size, self.block_num)) + ### return_block and end_points means block num + if check_points(layer_count, end_points): + return input, decode_ends + + input = fluid.layers.pool2d( + input=input, + pool_type='avg', + global_pooling=True, + name='mobilenetv1_last_pool') return input @@ -208,12 +217,20 @@ class MobileNetV1Space(SearchSpaceBase): scale, kernel_size, name=None): + num_groups = input.shape[1] + + s_oc = int(num_filters1 * scale) + if s_oc > num_groups: + output_channel = s_oc - (s_oc % num_groups) + else: + output_channel = num_groups + depthwise_conv = conv_bn_layer( input=input, filter_size=kernel_size, - num_filters=int(num_filters1 * scale), + num_filters=output_channel, stride=stride, - num_groups=int(num_groups * scale), + num_groups=num_groups, use_cudnn=False, name=name + '_dw') pointwise_conv = conv_bn_layer( diff --git a/paddleslim/nas/search_space/mobilenetv2.py b/paddleslim/nas/search_space/mobilenetv2.py index 36231912715a29808d55158881ab3e918260f8b5..37099ae6dcfa0ae4a362f249c04e271404fccd34 100644 --- a/paddleslim/nas/search_space/mobilenetv2.py +++ b/paddleslim/nas/search_space/mobilenetv2.py @@ -22,22 +22,16 @@ from paddle.fluid.param_attr import ParamAttr from .search_space_base import SearchSpaceBase from .base_layer import conv_bn_layer from .search_space_registry import SEARCHSPACE +from .utils import check_points __all__ = ["MobileNetV2Space"] @SEARCHSPACE.register class MobileNetV2Space(SearchSpaceBase): - def __init__(self, - input_size, - output_size, - block_num, - block_mask=None, - scale=1.0, - class_dim=1000): + def __init__(self, input_size, output_size, block_num, block_mask=None): super(MobileNetV2Space, self).__init__(input_size, output_size, block_num, block_mask) - assert self.block_mask == None, 'MobileNetV2Space will use origin MobileNetV2 as seach space, so use input_size, output_size and block_num to search' # self.head_num means the first convolution channel self.head_num = np.array([3, 4, 8, 12, 16, 24, 32]) #7 # self.filter_num1 ~ self.filter_num6 means following convlution channel @@ -56,11 +50,6 @@ class MobileNetV2Space(SearchSpaceBase): self.multiply = np.array([1, 2, 3, 4, 6]) #5 # self.repeat means repeat_num _inverted_residual_unit in each _invresi_blocks self.repeat = np.array([1, 2, 3, 4, 5, 6]) #6 - self.scale = scale - self.class_dim = class_dim - - assert self.block_num < 7, 'MobileNetV2: block number must less than 7, but receive block number is {}'.format( - self.block_num) def init_tokens(self): """ @@ -80,13 +69,7 @@ class MobileNetV2Space(SearchSpaceBase): 4, 9, 0, 0] # 6, 320, 1 # yapf: enable - if self.block_num < 5: - self.token_len = 1 + (self.block_num - 1) * 4 - else: - self.token_len = 1 + (self.block_num + 2 * - (self.block_num - 5)) * 4 - - return init_token_base[:self.token_len] + return init_token_base def range_table(self): """ @@ -102,9 +85,8 @@ class MobileNetV2Space(SearchSpaceBase): len(self.multiply), len(self.filter_num4), len(self.repeat), len(self.k_size), len(self.multiply), len(self.filter_num5), len(self.repeat), len(self.k_size), len(self.multiply), len(self.filter_num6), len(self.repeat), len(self.k_size)] - range_table_base = list(np.array(range_table_base) - 1) # yapf: enable - return range_table_base[:self.token_len] + return range_table_base def token2arch(self, tokens=None): """ @@ -115,35 +97,29 @@ class MobileNetV2Space(SearchSpaceBase): tokens = self.init_tokens() self.bottleneck_params_list = [] - if self.block_num >= 1: - self.bottleneck_params_list.append( - (1, self.head_num[tokens[0]], 1, 1, 3)) - if self.block_num >= 2: - self.bottleneck_params_list.append( - (self.multiply[tokens[1]], self.filter_num1[tokens[2]], - self.repeat[tokens[3]], 2, self.k_size[tokens[4]])) - if self.block_num >= 3: - self.bottleneck_params_list.append( - (self.multiply[tokens[5]], self.filter_num1[tokens[6]], - self.repeat[tokens[7]], 2, self.k_size[tokens[8]])) - if self.block_num >= 4: - self.bottleneck_params_list.append( - (self.multiply[tokens[9]], self.filter_num2[tokens[10]], - self.repeat[tokens[11]], 2, self.k_size[tokens[12]])) - if self.block_num >= 5: - self.bottleneck_params_list.append( - (self.multiply[tokens[13]], self.filter_num3[tokens[14]], - self.repeat[tokens[15]], 2, self.k_size[tokens[16]])) - self.bottleneck_params_list.append( - (self.multiply[tokens[17]], self.filter_num4[tokens[18]], - self.repeat[tokens[19]], 1, self.k_size[tokens[20]])) - if self.block_num >= 6: - self.bottleneck_params_list.append( - (self.multiply[tokens[21]], self.filter_num5[tokens[22]], - self.repeat[tokens[23]], 2, self.k_size[tokens[24]])) - self.bottleneck_params_list.append( - (self.multiply[tokens[25]], self.filter_num6[tokens[26]], - self.repeat[tokens[27]], 1, self.k_size[tokens[28]])) + self.bottleneck_params_list.append( + (1, self.head_num[tokens[0]], 1, 1, 3)) + self.bottleneck_params_list.append( + (self.multiply[tokens[1]], self.filter_num1[tokens[2]], + self.repeat[tokens[3]], 2, self.k_size[tokens[4]])) + self.bottleneck_params_list.append( + (self.multiply[tokens[5]], self.filter_num1[tokens[6]], + self.repeat[tokens[7]], 2, self.k_size[tokens[8]])) + self.bottleneck_params_list.append( + (self.multiply[tokens[9]], self.filter_num2[tokens[10]], + self.repeat[tokens[11]], 2, self.k_size[tokens[12]])) + self.bottleneck_params_list.append( + (self.multiply[tokens[13]], self.filter_num3[tokens[14]], + self.repeat[tokens[15]], 2, self.k_size[tokens[16]])) + self.bottleneck_params_list.append( + (self.multiply[tokens[17]], self.filter_num4[tokens[18]], + self.repeat[tokens[19]], 1, self.k_size[tokens[20]])) + self.bottleneck_params_list.append( + (self.multiply[tokens[21]], self.filter_num5[tokens[22]], + self.repeat[tokens[23]], 2, self.k_size[tokens[24]])) + self.bottleneck_params_list.append( + (self.multiply[tokens[25]], self.filter_num6[tokens[26]], + self.repeat[tokens[27]], 1, self.k_size[tokens[28]])) def _modify_bottle_params(output_stride=None): if output_stride is not None and output_stride % 2 != 0: @@ -160,9 +136,11 @@ class MobileNetV2Space(SearchSpaceBase): self.bottleneck_params_list[i] = (t, c, n, s, ks) def net_arch(input, + scale=1.0, + return_block=None, end_points=None, - decode_points=None, output_stride=None): + self.scale = scale _modify_bottle_params(output_stride) decode_ends = dict() @@ -185,21 +163,23 @@ class MobileNetV2Space(SearchSpaceBase): stride=2, padding='SAME', act='relu6', - name='mobilenetv2_conv1_1') + name='mobilenetv2_conv1') layer_count = 1 - if check_points(layer_count, decode_points): - decode_ends[layer_count] = input - - if check_points(layer_count, end_points): - return input, decode_ends + depthwise_output = None # bottleneck sequences - i = 1 in_c = int(32 * self.scale) - for layer_setting in self.bottleneck_params_list: + for i, layer_setting in enumerate(self.bottleneck_params_list): t, c, n, s, k = layer_setting - i += 1 - #print(input) + if s == 2: + layer_count += 1 + ### return_block and end_points means block num + if check_points((layer_count - 1), return_block): + decode_ends[layer_count - 1] = depthwise_output + + if check_points((layer_count - 1), end_points): + return input, decode_ends + input, depthwise_output = self._invresi_blocks( input=input, in_c=in_c, @@ -210,14 +190,13 @@ class MobileNetV2Space(SearchSpaceBase): k=k, name='mobilenetv2_conv' + str(i)) in_c = int(c * self.scale) - layer_count += 1 - ### decode_points and end_points means block num - if check_points(layer_count, decode_points): - decode_ends[layer_count] = depthwise_output + ### return_block and end_points means block num + if check_points(layer_count, return_block): + decode_ends[layer_count] = depthwise_output - if check_points(layer_count, end_points): - return input, decode_ends + if check_points(layer_count, end_points): + return input, decode_ends # last conv input = conv_bn_layer( @@ -232,25 +211,10 @@ class MobileNetV2Space(SearchSpaceBase): input = fluid.layers.pool2d( input=input, - pool_size=7, - pool_stride=1, pool_type='avg', global_pooling=True, name='mobilenetv2_last_pool') - # if output_size is 1, add fc layer in the end - if self.output_size == 1: - input = fluid.layers.fc( - input=input, - size=self.class_dim, - param_attr=ParamAttr(name='mobilenetv2_fc_weights'), - bias_attr=ParamAttr(name='mobilenetv2_fc_offset')) - else: - assert self.output_size == input.shape[2], \ - ("output_size must EQUAL to input_size / (2^block_num)." - "But receive input_size={}, output_size={}, block_num={}".format( - self.input_size, self.output_size, self.block_num)) - return input return net_arch diff --git a/paddleslim/nas/search_space/resnet.py b/paddleslim/nas/search_space/resnet.py index fd761d417575988e8ba8bd99da25372613c5912f..97cf1ffbe2f759bfbb65b8197df49e1d9698e8e7 100644 --- a/paddleslim/nas/search_space/resnet.py +++ b/paddleslim/nas/search_space/resnet.py @@ -22,22 +22,16 @@ from paddle.fluid.param_attr import ParamAttr from .search_space_base import SearchSpaceBase from .base_layer import conv_bn_layer from .search_space_registry import SEARCHSPACE +from .utils import check_points __all__ = ["ResNetSpace"] @SEARCHSPACE.register class ResNetSpace(SearchSpaceBase): - def __init__(self, - input_size, - output_size, - block_num, - block_mask=None, - extract_feature=False, - class_dim=1000): + def __init__(self, input_size, output_size, block_num, block_mask=None): super(ResNetSpace, self).__init__(input_size, output_size, block_num, block_mask) - assert self.block_mask == None, 'ResNetSpace will use origin ResNet as seach space, so use input_size, output_size and block_num to search' # self.filter_num1 ~ self.filter_num4 means convolution channel self.filter_num1 = np.array([48, 64, 96, 128, 160, 192, 224]) #7 self.filter_num2 = np.array([64, 96, 128, 160, 192, 256, 320]) #7 @@ -48,31 +42,24 @@ class ResNetSpace(SearchSpaceBase): self.repeat2 = [2, 3, 4, 5, 6, 7] #6 self.repeat3 = [2, 3, 4, 5, 6, 7, 8, 10, 12, 14, 16, 20, 24] #13 self.repeat4 = [2, 3, 4, 5, 6, 7] #6 - self.class_dim = class_dim - self.extract_feature = extract_feature - assert self.block_num < 5, 'ResNet: block number must less than 5, but receive block number is {}'.format( - self.block_num) def init_tokens(self): """ The initial token. - return 2 * self.block_num, 2 means depth and num_filter """ init_token_base = [0, 0, 0, 0, 0, 0, 0, 0] - self.token_len = self.block_num * 2 - return init_token_base[:self.token_len] + return init_token_base def range_table(self): """ Get range table of current search space, constrains the range of tokens. """ - #2 * self.block_num, 2 means depth and num_filter range_table_base = [ len(self.filter_num1), len(self.repeat1), len(self.filter_num2), len(self.repeat2), len(self.filter_num3), len(self.repeat3), len(self.filter_num4), len(self.repeat4) ] - return range_table_base[:self.token_len] + return range_table_base def token2arch(self, tokens=None): """ @@ -83,28 +70,27 @@ class ResNetSpace(SearchSpaceBase): depth = [] num_filters = [] - if self.block_num >= 1: - filter1 = self.filter_num1[tokens[0]] - repeat1 = self.repeat1[tokens[1]] - num_filters.append(filter1) - depth.append(repeat1) - if self.block_num >= 2: - filter2 = self.filter_num2[tokens[2]] - repeat2 = self.repeat2[tokens[3]] - num_filters.append(filter2) - depth.append(repeat2) - if self.block_num >= 3: - filter3 = self.filter_num3[tokens[4]] - repeat3 = self.repeat3[tokens[5]] - num_filters.append(filter3) - depth.append(repeat3) - if self.block_num >= 4: - filter4 = self.filter_num4[tokens[6]] - repeat4 = self.repeat4[tokens[7]] - num_filters.append(filter4) - depth.append(repeat4) - - def net_arch(input): + + filter1 = self.filter_num1[tokens[0]] + repeat1 = self.repeat1[tokens[1]] + num_filters.append(filter1) + depth.append(repeat1) + filter2 = self.filter_num2[tokens[2]] + repeat2 = self.repeat2[tokens[3]] + num_filters.append(filter2) + depth.append(repeat2) + filter3 = self.filter_num3[tokens[4]] + repeat3 = self.repeat3[tokens[5]] + num_filters.append(filter3) + depth.append(repeat3) + filter4 = self.filter_num4[tokens[6]] + repeat4 = self.repeat4[tokens[7]] + num_filters.append(filter4) + depth.append(repeat4) + + def net_arch(input, return_block=None, end_points=None): + decode_ends = dict() + conv = conv_bn_layer( input=input, filter_size=5, @@ -112,24 +98,26 @@ class ResNetSpace(SearchSpaceBase): stride=2, act='relu', name='resnet_conv0') + layer_count = 1 for block in range(len(depth)): for i in range(depth[block]): + stride = 2 if i == 0 and block != 0 else 1 + if stride == 2: + layer_count += 1 + if check_points((layer_count - 1), return_block): + decode_ends[layer_count - 1] = conv + + if check_points((layer_count - 1), end_points): + return conv, decode_ends + conv = self._bottleneck_block( input=conv, num_filters=num_filters[block], - stride=2 if i == 0 and block != 0 else 1, + stride=stride, name='resnet_depth{}_block{}'.format(i, block)) - if self.output_size == 1: - conv = fluid.layers.fc( - input=conv, - size=self.class_dim, - act=None, - param_attr=fluid.param_attr.ParamAttr( - initializer=fluid.initializer.NormalInitializer(0.0, - 0.01)), - bias_attr=fluid.param_attr.ParamAttr( - initializer=fluid.initializer.ConstantInitializer(0))) + if check_points(layer_count, end_points): + return conv, decode_ends return conv diff --git a/paddleslim/nas/search_space/resnet_block.py b/paddleslim/nas/search_space/resnet_block.py new file mode 100644 index 0000000000000000000000000000000000000000..64646a3863af86afd8ca3578dfb07fb59f26e4db --- /dev/null +++ b/paddleslim/nas/search_space/resnet_block.py @@ -0,0 +1,202 @@ +# Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License" +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +from __future__ import absolute_import +from __future__ import division +from __future__ import print_function + +import numpy as np +import paddle.fluid as fluid +from paddle.fluid.param_attr import ParamAttr +from .search_space_base import SearchSpaceBase +from .base_layer import conv_bn_layer +from .search_space_registry import SEARCHSPACE +from .utils import compute_downsample_num, check_points + +__all__ = ["ResNetBlockSpace"] + + +@SEARCHSPACE.register +class ResNetBlockSpace(SearchSpaceBase): + def __init__(self, input_size, output_size, block_num, block_mask=None): + super(ResNetBlockSpace, self).__init__(input_size, output_size, + block_num, block_mask) + # use input_size and output_size to compute self.downsample_num + self.downsample_num = compute_downsample_num(self.input_size, + self.output_size) + if self.block_num != None: + assert self.downsample_num <= self.block_num, 'downsample numeber must be LESS THAN OR EQUAL TO block_num, but NOW: downsample numeber is {}, block_num is {}'.format( + self.downsample_num, self.block_num) + self.filter_num = np.array( + [48, 64, 96, 128, 160, 192, 224, 256, 320, 384, 512, 640]) + self.repeat = np.array([0, 1, 2]) + self.k_size = np.array([3, 5]) + + def init_tokens(self): + if self.block_mask != None: + return [0] * (len(self.block_mask) * 6) + else: + return [0] * (self.block_num * 6) + + def range_table(self): + range_table_base = [] + if self.block_mask != None: + range_table_length = len(self.block_mask) + else: + range_table_length = self.block_num + + for i in range(range_table_length): + range_table_base.append(len(self.filter_num)) + range_table_base.append(len(self.filter_num)) + range_table_base.append(len(self.filter_num)) + range_table_base.append(len(self.k_size)) + range_table_base.append(len(self.repeat)) + range_table_base.append(len(self.repeat)) + + return range_table_base + + def token2arch(self, tokens=None): + if tokens == None: + tokens = self.init_tokens() + + self.bottleneck_params_list = [] + if self.block_mask != None: + for i in range(len(self.block_mask)): + self.bottleneck_params_list.append( + (self.filter_num[tokens[i * 6]], + self.filter_num[tokens[i * 6 + 1]], + self.filter_num[tokens[i * 6 + 2]], + self.k_size[tokens[i * 6 + 3]], + self.repeat[tokens[i * 6 + 4]], + self.repeat[tokens[i * 6 + 5]], 2 + if self.block_mask[i] == 1 else 1)) + else: + repeat_num = int(self.block_num / self.downsample_num) + num_minus = self.block_num % self.downsample_num + for i in range(self.downsample_num): + self.bottleneck_params_list.append( + (self.filter_num[tokens[i * 6]], + self.filter_num[tokens[i * 6 + 1]], + self.filter_num[tokens[i * 6 + 2]], + self.k_size[tokens[i * 6 + 3]], + self.repeat[tokens[i * 6 + 4]], + self.repeat[tokens[i * 6 + 5]], 2)) + for k in range(repeat_num - 1): + kk = k * self.downsample_num + i + self.bottleneck_params_list.append( + (self.filter_num[tokens[kk * 6]], + self.filter_num[tokens[kk * 6 + 1]], + self.filter_num[tokens[kk * 6 + 2]], + self.k_size[tokens[kk * 6 + 3]], + self.repeat[tokens[kk * 6 + 4]], + self.repeat[tokens[kk * 6 + 5]], 1)) + if self.downsample_num - i <= num_minus: + j = self.downsample_num * (repeat_num - 1) + i + self.bottleneck_params_list.append( + (self.filter_num[tokens[j * 6]], + self.filter_num[tokens[j * 6 + 1]], + self.filter_num[tokens[j * 6 + 2]], + self.k_size[tokens[j * 6 + 3]], + self.repeat[tokens[j * 6 + 4]], + self.repeat[tokens[j * 6 + 5]], 1)) + + if self.downsample_num == 0 and self.block_num != 0: + for i in range(len(self.block_num)): + self.bottleneck_params_list.append( + (self.filter_num[tokens[i * 6]], + self.filter_num[tokens[i * 6 + 1]], + self.filter_num[tokens[i * 6 + 2]], + self.k_size[tokens[i * 6 + 3]], + self.repeat[tokens[i * 6 + 4]], + self.repeat[tokens[i * 6 + 5]], 1)) + + def net_arch(input, return_mid_layer=False, return_block=None): + layer_count = 0 + mid_layer = dict() + for i, layer_setting in enumerate(self.bottleneck_params_list): + filter_num1, filter_num2, filter_num3, k_size, repeat1, repeat2, stride = layer_setting + if stride == 2: + layer_count += 1 + if check_points((layer_count - 1), return_block): + mid_layer[layer_count - 1] = input + + input = self._bottleneck_block( + input=input, + num_filters1=filter_num1, + num_filters2=filter_num3, + num_filters3=filter_num3, + kernel_size=k_size, + repeat1=repeat1, + repeat2=repeat2, + stride=stride, + name='resnet' + str(i + 1)) + + if return_mid_layer: + return input, mid_layer + else: + return input, + + return net_arch + + def _shortcut(self, input, ch_out, stride, name=None): + ch_in = input.shape[1] + if ch_in != ch_out or stride != 1: + return conv_bn_layer( + input=input, + filter_size=1, + num_filters=ch_out, + stride=stride, + name=name + '_shortcut') + else: + return input + + def _bottleneck_block(self, + input, + num_filters1, + num_filters2, + num_filters3, + kernel_size, + repeat1, + repeat2, + stride, + name=None): + short = self._shortcut(input, num_filters3, stride, name=name) + + for i in range(repeat1): + input = conv_bn_layer( + input=input, + num_filters=num_filters1, + filter_size=1, + stride=1, + act='relu', + name=name + '_bottleneck_conv0_{}'.format(str(i))) + + input = conv_bn_layer( + input=input, + num_filters=num_filters2, + filter_size=kernel_size, + stride=stride, + act='relu', + name=name + '_bottleneck_conv1') + for i in range(repeat2): + input = conv_bn_layer( + input=input, + num_filters=num_filters3, + filter_size=1, + stride=1, + act=None, + name=name + '_bottleneck_conv2_{}'.format(str(i))) + + return fluid.layers.elementwise_add( + x=short, y=input, act='relu', name=name + '_bottleneck_add') diff --git a/paddleslim/nas/search_space/search_space_base.py b/paddleslim/nas/search_space/search_space_base.py index b8f5d9b89bd2a64e566a5b20280dd27048a4028b..9dee1431d34afb2411747affc542e82ca099d4d7 100644 --- a/paddleslim/nas/search_space/search_space_base.py +++ b/paddleslim/nas/search_space/search_space_base.py @@ -12,8 +12,12 @@ # See the License for the specific language governing permissions and # limitations under the License. +import logging +from ...common import get_logger + __all__ = ['SearchSpaceBase'] +_logger = get_logger(__name__, level=logging.INFO) class SearchSpaceBase(object): """Controller for Neural Architecture Search. @@ -26,12 +30,13 @@ class SearchSpaceBase(object): self.output_size = output_size self.block_num = block_num self.block_mask = block_mask - if self.block_mask is not None: + if self.block_mask != None: assert isinstance(self.block_mask, list), 'Block_mask must be a list.' - print( + _logger.warn( "If block_mask is NOT None, we will use block_mask as major configs!" ) + self.block_num = None def init_tokens(self): """Get init tokens in search space. diff --git a/paddleslim/nas/search_space/utils.py b/paddleslim/nas/search_space/utils.py new file mode 100644 index 0000000000000000000000000000000000000000..c76a48cc5d3cbbc2858dd91479f7913c49d4081a --- /dev/null +++ b/paddleslim/nas/search_space/utils.py @@ -0,0 +1,38 @@ +# Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License" +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import math + + +def compute_downsample_num(input_size, output_size): + downsample_num = 0 + while input_size > output_size: + input_size = math.ceil(float(input_size) / 2.0) + downsample_num += 1 + + if input_size != output_size: + raise NotImplementedError( + 'output_size must can downsample by input_size!!!') + + return downsample_num + + +def check_points(count, points): + if points is None: + return False + else: + if isinstance(points, list): + return (True if count in points else False) + else: + return (True if count == points else False)