提交 4b8befd0 编写于 作者: I itminner

Merge remote-tracking branch 'upstream/develop' into develop

from __future__ import absolute_import
from __future__ import division
from __future__ import print_function
import os
import sys
import math
import logging
import paddle
import argparse
import functools
import numpy as np
import paddle.fluid as fluid
sys.path.append(sys.path[0] + "/../")
import models
import imagenet_reader as reader
from utility import add_arguments, print_arguments
from paddleslim.dist import merge, l2_loss, soft_label_loss, fsp_loss
logging.basicConfig(format='%(asctime)s-%(levelname)s: %(message)s')
_logger = logging.getLogger(__name__)
_logger.setLevel(logging.INFO)
parser = argparse.ArgumentParser(description=__doc__)
add_arg = functools.partial(add_arguments, argparser=parser)
# yapf: disable
add_arg('batch_size', int, 64*4, "Minibatch size.")
add_arg('use_gpu', bool, True, "Whether to use GPU or not.")
add_arg('total_images', int, 1281167, "Training image number.")
add_arg('image_shape', str, "3,224,224", "Input image size")
add_arg('lr', float, 0.1, "The learning rate used to fine-tune pruned model.")
add_arg('lr_strategy', str, "piecewise_decay", "The learning rate decay strategy.")
add_arg('l2_decay', float, 3e-5, "The l2_decay parameter.")
add_arg('momentum_rate', float, 0.9, "The value of momentum_rate.")
add_arg('num_epochs', int, 120, "The number of total epochs.")
add_arg('data', str, "mnist", "Which data to use. 'mnist' or 'imagenet'")
add_arg('log_period', int, 20, "Log period in batches.")
add_arg('model', str, "MobileNet", "Set the network to use.")
add_arg('pretrained_model', str, None, "Whether to use pretrained model.")
add_arg('teacher_model', str, "ResNet50", "Set the teacher network to use.")
add_arg('teacher_pretrained_model', str, "../pretrain/ResNet50_pretrained", "Whether to use pretrained model.")
parser.add_argument('--step_epochs', nargs='+', type=int, default=[30, 60, 90], help="piecewise decay step")
# yapf: enable
model_list = [m for m in dir(models) if "__" not in m]
def piecewise_decay(args):
step = int(math.ceil(float(args.total_images) / args.batch_size))
bd = [step * e for e in args.step_epochs]
lr = [args.lr * (0.1**i) for i in range(len(bd) + 1)]
learning_rate = fluid.layers.piecewise_decay(boundaries=bd, values=lr)
optimizer = fluid.optimizer.Momentum(
learning_rate=learning_rate,
momentum=args.momentum_rate,
regularization=fluid.regularizer.L2Decay(args.l2_decay))
return optimizer
def cosine_decay(args):
step = int(math.ceil(float(args.total_images) / args.batch_size))
learning_rate = fluid.layers.cosine_decay(
learning_rate=args.lr, step_each_epoch=step, epochs=args.num_epochs)
optimizer = fluid.optimizer.Momentum(
learning_rate=learning_rate,
momentum=args.momentum_rate,
regularization=fluid.regularizer.L2Decay(args.l2_decay))
return optimizer
def create_optimizer(args):
if args.lr_strategy == "piecewise_decay":
return piecewise_decay(args)
elif args.lr_strategy == "cosine_decay":
return cosine_decay(args)
def compress(args):
if args.data == "mnist":
import paddle.dataset.mnist as reader
train_reader = reader.train()
val_reader = reader.test()
class_dim = 10
image_shape = "1,28,28"
elif args.data == "imagenet":
import imagenet_reader as reader
train_reader = reader.train()
val_reader = reader.val()
class_dim = 1000
image_shape = "3,224,224"
else:
raise ValueError("{} is not supported.".format(args.data))
image_shape = [int(m) for m in image_shape.split(",")]
assert args.model in model_list, "{} is not in lists: {}".format(
args.model, model_list)
student_program = fluid.Program()
s_startup = fluid.Program()
with fluid.program_guard(student_program, s_startup):
with fluid.unique_name.guard():
image = fluid.layers.data(
name='image', shape=image_shape, dtype='float32')
label = fluid.layers.data(name='label', shape=[1], dtype='int64')
train_loader = fluid.io.DataLoader.from_generator(
feed_list=[image, label],
capacity=64,
use_double_buffer=True,
iterable=True)
valid_loader = fluid.io.DataLoader.from_generator(
feed_list=[image, label],
capacity=64,
use_double_buffer=True,
iterable=True)
# model definition
model = models.__dict__[args.model]()
out = model.net(input=image, class_dim=class_dim)
cost = fluid.layers.cross_entropy(input=out, label=label)
avg_cost = fluid.layers.mean(x=cost)
acc_top1 = fluid.layers.accuracy(input=out, label=label, k=1)
acc_top5 = fluid.layers.accuracy(input=out, label=label, k=5)
#print("="*50+"student_model_params"+"="*50)
#for v in student_program.list_vars():
# print(v.name, v.shape)
place = fluid.CUDAPlace(0) if args.use_gpu else fluid.CPUPlace()
exe = fluid.Executor(place)
train_reader = paddle.batch(
train_reader, batch_size=args.batch_size, drop_last=True)
val_reader = paddle.batch(
val_reader, batch_size=args.batch_size, drop_last=True)
val_program = student_program.clone(for_test=True)
places = fluid.cuda_places()
train_loader.set_sample_list_generator(train_reader, places)
valid_loader.set_sample_list_generator(val_reader, place)
teacher_model = models.__dict__[args.teacher_model]()
# define teacher program
teacher_program = fluid.Program()
t_startup = fluid.Program()
teacher_scope = fluid.Scope()
with fluid.scope_guard(teacher_scope):
with fluid.program_guard(teacher_program, t_startup):
with fluid.unique_name.guard():
image = fluid.layers.data(
name='image', shape=image_shape, dtype='float32')
predict = teacher_model.net(image, class_dim=class_dim)
#print("="*50+"teacher_model_params"+"="*50)
#for v in teacher_program.list_vars():
# print(v.name, v.shape)
exe.run(t_startup)
assert args.teacher_pretrained_model and os.path.exists(
args.teacher_pretrained_model
), "teacher_pretrained_model should be set when teacher_model is not None."
def if_exist(var):
return os.path.exists(
os.path.join(args.teacher_pretrained_model, var.name)
) and var.name != 'conv1_weights' and var.name != 'fc_0.w_0' and var.name != 'fc_0.b_0'
fluid.io.load_vars(
exe,
args.teacher_pretrained_model,
main_program=teacher_program,
predicate=if_exist)
data_name_map = {'image': 'image'}
main = merge(
teacher_program,
student_program,
data_name_map,
place,
teacher_scope=teacher_scope)
#print("="*50+"teacher_vars"+"="*50)
#for v in teacher_program.list_vars():
# if '_generated_var' not in v.name and 'fetch' not in v.name and 'feed' not in v.name:
# print(v.name, v.shape)
#return
with fluid.program_guard(main, s_startup):
l2_loss_v = l2_loss("teacher_fc_0.tmp_0", "fc_0.tmp_0", main)
fsp_loss_v = fsp_loss("teacher_res2a_branch2a.conv2d.output.1.tmp_0",
"teacher_res3a_branch2a.conv2d.output.1.tmp_0",
"depthwise_conv2d_1.tmp_0", "conv2d_3.tmp_0",
main)
loss = avg_cost + l2_loss_v + fsp_loss_v
opt = create_optimizer(args)
opt.minimize(loss)
exe.run(s_startup)
build_strategy = fluid.BuildStrategy()
build_strategy.fuse_all_reduce_ops = False
parallel_main = fluid.CompiledProgram(main).with_data_parallel(
loss_name=loss.name, build_strategy=build_strategy)
for epoch_id in range(args.num_epochs):
for step_id, data in enumerate(train_loader):
loss_1, loss_2, loss_3, loss_4 = exe.run(
parallel_main,
feed=data,
fetch_list=[
loss.name, avg_cost.name, l2_loss_v.name, fsp_loss_v.name
])
if step_id % args.log_period == 0:
_logger.info(
"train_epoch {} step {} loss {:.6f}, class loss {:.6f}, l2 loss {:.6f}, fsp loss {:.6f}".
format(epoch_id, step_id, loss_1[0], loss_2[0], loss_3[0],
loss_4[0]))
val_acc1s = []
val_acc5s = []
for step_id, data in enumerate(valid_loader):
val_loss, val_acc1, val_acc5 = exe.run(
val_program,
data,
fetch_list=[avg_cost.name, acc_top1.name, acc_top5.name])
val_acc1s.append(val_acc1)
val_acc5s.append(val_acc5)
if step_id % args.log_period == 0:
_logger.info(
"valid_epoch {} step {} loss {:.6f}, top1 {:.6f}, top5 {:.6f}".
format(epoch_id, step_id, val_loss[0], val_acc1[0],
val_acc5[0]))
_logger.info("epoch {} top1 {:.6f}, top5 {:.6f}".format(
epoch_id, np.mean(val_acc1s), np.mean(val_acc5s)))
def main():
args = parser.parse_args()
print_arguments(args)
compress(args)
if __name__ == '__main__':
main()
import sys
sys.path.append('..')
import numpy as np
import argparse
import ast
import time
import argparse
import ast
import logging
import paddle
import paddle.fluid as fluid
from paddleslim.nas.search_space.search_space_factory import SearchSpaceFactory
from paddleslim.analysis import flops
from paddleslim.nas import SANAS
from paddleslim.common import get_logger
from optimizer import create_optimizer
import imagenet_reader
_logger = get_logger(__name__, level=logging.INFO)
def create_data_loader(image_shape):
data_shape = [-1] + image_shape
data = fluid.data(name='data', shape=data_shape, dtype='float32')
label = fluid.data(name='label', shape=[-1, 1], dtype='int64')
data_loader = fluid.io.DataLoader.from_generator(
feed_list=[data, label],
capacity=1024,
use_double_buffer=True,
iterable=True)
return data_loader, data, label
def build_program(main_program,
startup_program,
image_shape,
archs,
args,
is_test=False):
with fluid.program_guard(main_program, startup_program):
data_loader, data, label = create_data_loader(image_shape)
output = archs(data)
softmax_out = fluid.layers.softmax(input=output, use_cudnn=False)
cost = fluid.layers.cross_entropy(input=softmax_out, label=label)
avg_cost = fluid.layers.mean(cost)
acc_top1 = fluid.layers.accuracy(input=softmax_out, label=label, k=1)
acc_top5 = fluid.layers.accuracy(input=softmax_out, label=label, k=5)
if is_test == False:
optimizer = create_optimizer(args)
optimizer.minimize(avg_cost)
return data_loader, avg_cost, acc_top1, acc_top5
def search_mobilenetv2(config, args, image_size, is_server=True):
factory = SearchSpaceFactory()
space = factory.get_search_space(config)
if is_server:
### start a server and a client
sa_nas = SANAS(
config,
server_addr=("", 8883),
init_temperature=args.init_temperature,
reduce_rate=args.reduce_rate,
search_steps=args.search_steps,
is_server=True)
else:
### start a client
sa_nas = SANAS(
config,
server_addr=("10.255.125.38", 8883),
init_temperature=args.init_temperature,
reduce_rate=args.reduce_rate,
search_steps=args.search_steps,
is_server=False)
image_shape = [3, image_size, image_size]
for step in range(args.search_steps):
archs = sa_nas.next_archs()[0]
train_program = fluid.Program()
test_program = fluid.Program()
startup_program = fluid.Program()
train_loader, avg_cost, acc_top1, acc_top5 = build_program(
train_program, startup_program, image_shape, archs, args)
current_flops = flops(train_program)
print('step: {}, current_flops: {}'.format(step, current_flops))
if current_flops > args.max_flops:
continue
test_loader, test_avg_cost, test_acc_top1, test_acc_top5 = build_program(
test_program,
startup_program,
image_shape,
archs,
args,
is_test=True)
test_program = test_program.clone(for_test=True)
place = fluid.CUDAPlace(0) if args.use_gpu else fluid.CPUPlace()
exe = fluid.Executor(place)
exe.run(startup_program)
if args.data == 'cifar10':
train_reader = paddle.batch(
paddle.reader.shuffle(
paddle.dataset.cifar.train10(cycle=False), buf_size=1024),
batch_size=args.batch_size,
drop_last=True)
test_reader = paddle.batch(
paddle.dataset.cifar.test10(cycle=False),
batch_size=args.batch_size,
drop_last=False)
elif args.data == 'imagenet':
train_reader = paddle.batch(
imagenet_reader.train(),
batch_size=args.batch_size,
drop_last=True)
test_reader = paddle.batch(
imagenet_reader.val(),
batch_size=args.batch_size,
drop_last=False)
#test_loader, _, _ = create_data_loader(image_shape)
train_loader.set_sample_list_generator(
train_reader,
places=fluid.cuda_places() if args.use_gpu else fluid.cpu_places())
test_loader.set_sample_list_generator(test_reader, places=place)
build_strategy = fluid.BuildStrategy()
train_compiled_program = fluid.CompiledProgram(
train_program).with_data_parallel(
loss_name=avg_cost.name, build_strategy=build_strategy)
for epoch_id in range(args.retain_epoch):
for batch_id, data in enumerate(train_loader()):
fetches = [avg_cost.name]
s_time = time.time()
outs = exe.run(train_compiled_program,
feed=data,
fetch_list=fetches)[0]
batch_time = time.time() - s_time
if batch_id % 10 == 0:
_logger.info(
'TRAIN: steps: {}, epoch: {}, batch: {}, cost: {}, batch_time: {}ms'.
format(step, epoch_id, batch_id, outs[0], batch_time))
reward = []
for batch_id, data in enumerate(test_loader()):
test_fetches = [
test_avg_cost.name, test_acc_top1.name, test_acc_top5.name
]
batch_reward = exe.run(test_program,
feed=data,
fetch_list=test_fetches)
reward_avg = np.mean(np.array(batch_reward), axis=1)
reward.append(reward_avg)
_logger.info(
'TEST: step: {}, batch: {}, avg_cost: {}, acc_top1: {}, acc_top5: {}'.
format(step, batch_id, batch_reward[0], batch_reward[1],
batch_reward[2]))
finally_reward = np.mean(np.array(reward), axis=0)
_logger.info(
'FINAL TEST: avg_cost: {}, acc_top1: {}, acc_top5: {}'.format(
finally_reward[0], finally_reward[1], finally_reward[2]))
sa_nas.reward(float(finally_reward[1]))
if __name__ == '__main__':
parser = argparse.ArgumentParser(
description='SA NAS MobileNetV2 cifar10 argparase')
parser.add_argument(
'--use_gpu',
type=ast.literal_eval,
default=True,
help='Whether to use GPU in train/test model.')
parser.add_argument(
'--batch_size', type=int, default=256, help='batch size.')
parser.add_argument(
'--data',
type=str,
default='cifar10',
choices=['cifar10', 'imagenet'],
help='server address.')
# controller
parser.add_argument(
'--reduce_rate', type=float, default=0.85, help='reduce rate.')
parser.add_argument(
'--init_temperature',
type=float,
default=10.24,
help='init temperature.')
parser.add_argument(
'--is_server',
type=ast.literal_eval,
default=True,
help='Whether to start a server.')
# nas args
parser.add_argument(
'--max_flops', type=int, default=592948064, help='reduce rate.')
parser.add_argument(
'--retain_epoch', type=int, default=5, help='train epoch before val.')
parser.add_argument(
'--end_epoch', type=int, default=500, help='end epoch present client.')
parser.add_argument(
'--search_steps',
type=int,
default=100,
help='controller server number.')
parser.add_argument(
'--server_address', type=str, default=None, help='server address.')
# optimizer args
parser.add_argument(
'--lr_strategy',
type=str,
default='piecewise_decay',
help='learning rate decay strategy.')
parser.add_argument('--lr', type=float, default=0.1, help='learning rate.')
parser.add_argument(
'--l2_decay', type=float, default=1e-4, help='learning rate decay.')
parser.add_argument(
'--step_epochs',
nargs='+',
type=int,
default=[30, 60, 90],
help="piecewise decay step")
parser.add_argument(
'--momentum_rate',
type=float,
default=0.9,
help='learning rate decay.')
parser.add_argument(
'--warm_up_epochs',
type=float,
default=5.0,
help='learning rate decay.')
parser.add_argument(
'--num_epochs', type=int, default=120, help='learning rate decay.')
parser.add_argument(
'--decay_epochs', type=float, default=2.4, help='learning rate decay.')
parser.add_argument(
'--decay_rate', type=float, default=0.97, help='learning rate decay.')
parser.add_argument(
'--total_images',
type=int,
default=1281167,
help='learning rate decay.')
args = parser.parse_args()
print(args)
if args.data == 'cifar10':
image_size = 32
block_num = 3
elif args.data == 'imagenet':
image_size = 224
block_num = 6
else:
raise NotImplemented(
'data must in [cifar10, imagenet], but received: {}'.format(
args.data))
config_info = {
'input_size': image_size,
'output_size': 1,
'block_num': block_num,
'block_mask': None
}
config = [('MobileNetV2Space', config_info)]
search_mobilenetv2(config, args, image_size, is_server=args.is_server)
import sys
sys.path.append('..')
import numpy as np
import argparse
import ast
import paddle
import paddle.fluid as fluid
from paddleslim.nas.search_space.search_space_factory import SearchSpaceFactory
from paddleslim.analysis import flops
from paddleslim.nas import SANAS
def create_data_loader():
data = fluid.data(name='data', shape=[-1, 3, 32, 32], dtype='float32')
label = fluid.data(name='label', shape=[-1, 1], dtype='int64')
data_loader = fluid.io.DataLoader.from_generator(
feed_list=[data, label],
capacity=1024,
use_double_buffer=True,
iterable=True)
return data_loader, data, label
def init_sa_nas(config):
factory = SearchSpaceFactory()
space = factory.get_search_space(config)
model_arch = space.token2arch()[0]
main_program = fluid.Program()
startup_program = fluid.Program()
with fluid.program_guard(main_program, startup_program):
data_loader, data, label = create_data_loader()
output = model_arch(data)
cost = fluid.layers.mean(
fluid.layers.softmax_with_cross_entropy(
logits=output, label=label))
base_flops = flops(main_program)
search_steps = 10000000
### start a server and a client
sa_nas = SANAS(config, search_steps=search_steps, is_server=True)
### start a client, server_addr is server address
#sa_nas = SANAS(config, max_flops = base_flops, server_addr=("10.255.125.38", 18607), search_steps = search_steps, is_server=False)
return sa_nas, search_steps
def search_mobilenetv2_cifar10(config, args):
sa_nas, search_steps = init_sa_nas(config)
for i in range(search_steps):
print('search step: ', i)
archs = sa_nas.next_archs()[0]
train_program = fluid.Program()
test_program = fluid.Program()
startup_program = fluid.Program()
with fluid.program_guard(train_program, startup_program):
train_loader, data, label = create_data_loader()
output = archs(data)
cost = fluid.layers.mean(
fluid.layers.softmax_with_cross_entropy(
logits=output, label=label))[0]
test_program = train_program.clone(for_test=True)
optimizer = fluid.optimizer.Momentum(
learning_rate=0.1,
momentum=0.9,
regularization=fluid.regularizer.L2Decay(1e-4))
optimizer.minimize(cost)
place = fluid.CUDAPlace(0) if args.use_gpu else fluid.CPUPlace()
exe = fluid.Executor(place)
exe.run(startup_program)
train_reader = paddle.reader.shuffle(
paddle.dataset.cifar.train10(cycle=False), buf_size=1024)
train_loader.set_sample_generator(
train_reader,
batch_size=512,
places=fluid.cuda_places() if args.use_gpu else fluid.cpu_places())
test_loader, _, _ = create_data_loader()
test_reader = paddle.dataset.cifar.test10(cycle=False)
test_loader.set_sample_generator(
test_reader,
batch_size=256,
drop_last=False,
places=fluid.cuda_places() if args.use_gpu else fluid.cpu_places())
for epoch_id in range(10):
for batch_id, data in enumerate(train_loader()):
loss = exe.run(train_program,
feed=data,
fetch_list=[cost.name])[0]
if batch_id % 5 == 0:
print('epoch: {}, batch: {}, loss: {}'.format(
epoch_id, batch_id, loss[0]))
for data in test_loader():
reward = exe.run(test_program, feed=data,
fetch_list=[cost.name])[0]
print('reward:', reward)
sa_nas.reward(float(reward))
if __name__ == '__main__':
parser = argparse.ArgumentParser(
description='SA NAS MobileNetV2 cifar10 argparase')
parser.add_argument(
'--use_gpu',
type=ast.literal_eval,
default=True,
help='Whether to use GPU in train/test model.')
args = parser.parse_args()
print(args)
config_info = {'input_size': 32, 'output_size': 1, 'block_num': 5}
config = [('MobileNetV2Space', config_info)]
search_mobilenetv2_cifar10(config, args)
#copyright (c) 2019 PaddlePaddle Authors. All Rights Reserve.
#
#Licensed under the Apache License, Version 2.0 (the "License");
#you may not use this file except in compliance with the License.
#You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
#Unless required by applicable law or agreed to in writing, software
#distributed under the License is distributed on an "AS IS" BASIS,
#WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
#See the License for the specific language governing permissions and
#limitations under the License.
from __future__ import absolute_import
from __future__ import division
from __future__ import print_function
import math
import paddle.fluid as fluid
import paddle.fluid.layers.ops as ops
from paddle.fluid.initializer import init_on_cpu
from paddle.fluid.layers.learning_rate_scheduler import _decay_step_counter
def cosine_decay(learning_rate, step_each_epoch, epochs=120):
"""Applies cosine decay to the learning rate.
lr = 0.05 * (math.cos(epoch * (math.pi / 120)) + 1)
"""
global_step = _decay_step_counter()
with init_on_cpu():
epoch = ops.floor(global_step / step_each_epoch)
decayed_lr = learning_rate * \
(ops.cos(epoch * (math.pi / epochs)) + 1)/2
return decayed_lr
def cosine_decay_with_warmup(learning_rate, step_each_epoch, epochs=120):
"""Applies cosine decay to the learning rate.
lr = 0.05 * (math.cos(epoch * (math.pi / 120)) + 1)
decrease lr for every mini-batch and start with warmup.
"""
global_step = _decay_step_counter()
lr = fluid.layers.tensor.create_global_var(
shape=[1],
value=0.0,
dtype='float32',
persistable=True,
name="learning_rate")
warmup_epoch = fluid.layers.fill_constant(
shape=[1], dtype='float32', value=float(5), force_cpu=True)
with init_on_cpu():
epoch = ops.floor(global_step / step_each_epoch)
with fluid.layers.control_flow.Switch() as switch:
with switch.case(epoch < warmup_epoch):
decayed_lr = learning_rate * (global_step /
(step_each_epoch * warmup_epoch))
fluid.layers.tensor.assign(input=decayed_lr, output=lr)
with switch.default():
decayed_lr = learning_rate * \
(ops.cos((global_step - warmup_epoch * step_each_epoch) * (math.pi / (epochs * step_each_epoch))) + 1)/2
fluid.layers.tensor.assign(input=decayed_lr, output=lr)
return lr
def exponential_decay_with_warmup(learning_rate,
step_each_epoch,
decay_epochs,
decay_rate=0.97,
warm_up_epoch=5.0):
"""Applies exponential decay to the learning rate.
"""
global_step = _decay_step_counter()
lr = fluid.layers.tensor.create_global_var(
shape=[1],
value=0.0,
dtype='float32',
persistable=True,
name="learning_rate")
warmup_epoch = fluid.layers.fill_constant(
shape=[1], dtype='float32', value=float(warm_up_epoch), force_cpu=True)
with init_on_cpu():
epoch = ops.floor(global_step / step_each_epoch)
with fluid.layers.control_flow.Switch() as switch:
with switch.case(epoch < warmup_epoch):
decayed_lr = learning_rate * (global_step /
(step_each_epoch * warmup_epoch))
fluid.layers.assign(input=decayed_lr, output=lr)
with switch.default():
div_res = (global_step - warmup_epoch * step_each_epoch
) / decay_epochs
div_res = ops.floor(div_res)
decayed_lr = learning_rate * (decay_rate**div_res)
fluid.layers.assign(input=decayed_lr, output=lr)
return lr
def lr_warmup(learning_rate, warmup_steps, start_lr, end_lr):
""" Applies linear learning rate warmup for distributed training
Argument learning_rate can be float or a Variable
lr = lr + (warmup_rate * step / warmup_steps)
"""
assert (isinstance(end_lr, float))
assert (isinstance(start_lr, float))
linear_step = end_lr - start_lr
with fluid.default_main_program()._lr_schedule_guard():
lr = fluid.layers.tensor.create_global_var(
shape=[1],
value=0.0,
dtype='float32',
persistable=True,
name="learning_rate_warmup")
global_step = fluid.layers.learning_rate_scheduler._decay_step_counter(
)
with fluid.layers.control_flow.Switch() as switch:
with switch.case(global_step < warmup_steps):
decayed_lr = start_lr + linear_step * (global_step /
warmup_steps)
fluid.layers.tensor.assign(decayed_lr, lr)
with switch.default():
fluid.layers.tensor.assign(learning_rate, lr)
return lr
class Optimizer(object):
"""A class used to represent several optimizer methods
Attributes:
batch_size: batch size on all devices.
lr: learning rate.
lr_strategy: learning rate decay strategy.
l2_decay: l2_decay parameter.
momentum_rate: momentum rate when using Momentum optimizer.
step_epochs: piecewise decay steps.
num_epochs: number of total epochs.
total_images: total images.
step: total steps in the an epoch.
"""
def __init__(self, args):
self.batch_size = args.batch_size
self.lr = args.lr
self.lr_strategy = args.lr_strategy
self.l2_decay = args.l2_decay
self.momentum_rate = args.momentum_rate
self.step_epochs = args.step_epochs
self.num_epochs = args.num_epochs
self.warm_up_epochs = args.warm_up_epochs
self.decay_epochs = args.decay_epochs
self.decay_rate = args.decay_rate
self.total_images = args.total_images
self.step = int(math.ceil(float(self.total_images) / self.batch_size))
def piecewise_decay(self):
"""piecewise decay with Momentum optimizer
Returns:
a piecewise_decay optimizer
"""
bd = [self.step * e for e in self.step_epochs]
lr = [self.lr * (0.1**i) for i in range(len(bd) + 1)]
learning_rate = fluid.layers.piecewise_decay(boundaries=bd, values=lr)
optimizer = fluid.optimizer.Momentum(
learning_rate=learning_rate,
momentum=self.momentum_rate,
regularization=fluid.regularizer.L2Decay(self.l2_decay))
return optimizer
def cosine_decay(self):
"""cosine decay with Momentum optimizer
Returns:
a cosine_decay optimizer
"""
learning_rate = fluid.layers.cosine_decay(
learning_rate=self.lr,
step_each_epoch=self.step,
epochs=self.num_epochs)
optimizer = fluid.optimizer.Momentum(
learning_rate=learning_rate,
momentum=self.momentum_rate,
regularization=fluid.regularizer.L2Decay(self.l2_decay))
return optimizer
def cosine_decay_warmup(self):
"""cosine decay with warmup
Returns:
a cosine_decay_with_warmup optimizer
"""
learning_rate = cosine_decay_with_warmup(
learning_rate=self.lr,
step_each_epoch=self.step,
epochs=self.num_epochs)
optimizer = fluid.optimizer.Momentum(
learning_rate=learning_rate,
momentum=self.momentum_rate,
regularization=fluid.regularizer.L2Decay(self.l2_decay))
return optimizer
def exponential_decay_warmup(self):
"""exponential decay with warmup
Returns:
a exponential_decay_with_warmup optimizer
"""
learning_rate = exponential_decay_with_warmup(
learning_rate=self.lr,
step_each_epoch=self.step,
decay_epochs=self.step * self.decay_epochs,
decay_rate=self.decay_rate,
warm_up_epoch=self.warm_up_epochs)
optimizer = fluid.optimizer.RMSProp(
learning_rate=learning_rate,
regularization=fluid.regularizer.L2Decay(self.l2_decay),
momentum=self.momentum_rate,
rho=0.9,
epsilon=0.001)
return optimizer
def linear_decay(self):
"""linear decay with Momentum optimizer
Returns:
a linear_decay optimizer
"""
end_lr = 0
learning_rate = fluid.layers.polynomial_decay(
self.lr, self.step, end_lr, power=1)
optimizer = fluid.optimizer.Momentum(
learning_rate=learning_rate,
momentum=self.momentum_rate,
regularization=fluid.regularizer.L2Decay(self.l2_decay))
return optimizer
def adam_decay(self):
"""Adam optimizer
Returns:
an adam_decay optimizer
"""
return fluid.optimizer.Adam(learning_rate=self.lr)
def cosine_decay_RMSProp(self):
"""cosine decay with RMSProp optimizer
Returns:
an cosine_decay_RMSProp optimizer
"""
learning_rate = fluid.layers.cosine_decay(
learning_rate=self.lr,
step_each_epoch=self.step,
epochs=self.num_epochs)
optimizer = fluid.optimizer.RMSProp(
learning_rate=learning_rate,
momentum=self.momentum_rate,
regularization=fluid.regularizer.L2Decay(self.l2_decay),
# Apply epsilon=1 on ImageNet dataset.
epsilon=1)
return optimizer
def default_decay(self):
"""default decay
Returns:
default decay optimizer
"""
optimizer = fluid.optimizer.Momentum(
learning_rate=self.lr,
momentum=self.momentum_rate,
regularization=fluid.regularizer.L2Decay(self.l2_decay))
return optimizer
def create_optimizer(args):
Opt = Optimizer(args)
optimizer = getattr(Opt, args.lr_strategy)()
return optimizer
...@@ -107,6 +107,8 @@ class ControllerServer(object): ...@@ -107,6 +107,8 @@ class ControllerServer(object):
_logger.debug("send message to {}: [{}]".format(addr, _logger.debug("send message to {}: [{}]".format(addr,
tokens)) tokens))
conn.close() conn.close()
except Exception, err:
_logger.error(err)
finally: finally:
self._socket_server.close() self._socket_server.close()
self.close() self.close()
...@@ -75,7 +75,7 @@ class SAController(EvolutionaryController): ...@@ -75,7 +75,7 @@ class SAController(EvolutionaryController):
iter = int(iter) iter = int(iter)
if iter > self._iter: if iter > self._iter:
self._iter = iter self._iter = iter
temperature = self._init_temperature * self._reduce_rate**self._iter temperature = self._init_temperature * self._reduce_rate**self._iter
if (reward > self._reward) or (np.random.random() <= math.exp( if (reward > self._reward) or (np.random.random() <= math.exp(
(reward - self._reward) / temperature)): (reward - self._reward) / temperature)):
self._reward = reward self._reward = reward
...@@ -98,7 +98,7 @@ class SAController(EvolutionaryController): ...@@ -98,7 +98,7 @@ class SAController(EvolutionaryController):
new_tokens = tokens[:] new_tokens = tokens[:]
index = int(len(self._range_table[0]) * np.random.random()) index = int(len(self._range_table[0]) * np.random.random())
new_tokens[index] = np.random.randint(self._range_table[0][index], new_tokens[index] = np.random.randint(self._range_table[0][index],
self._range_table[1][index] + 1) self._range_table[1][index])
_logger.debug("change index[{}] from {} to {}".format(index, tokens[ _logger.debug("change index[{}] from {} to {}".format(index, tokens[
index], new_tokens[index])) index], new_tokens[index]))
if self._constrain_func is None or self._max_try_times is None: if self._constrain_func is None or self._max_try_times is None:
......
...@@ -54,6 +54,9 @@ class VarWrapper(object): ...@@ -54,6 +54,9 @@ class VarWrapper(object):
""" """
return self._var.name return self._var.name
def __repr__(self):
return self._var.name
def shape(self): def shape(self):
""" """
Get the shape of the varibale. Get the shape of the varibale.
...@@ -131,6 +134,11 @@ class OpWrapper(object): ...@@ -131,6 +134,11 @@ class OpWrapper(object):
""" """
return self._op.type return self._op.type
def __repr__(self):
return "op[id: {}, type: {}; inputs: {}]".format(self.idx(),
self.type(),
self.all_inputs())
def is_bwd_op(self): def is_bwd_op(self):
""" """
Whether this operator is backward op. Whether this operator is backward op.
......
...@@ -60,16 +60,17 @@ class SANAS(object): ...@@ -60,16 +60,17 @@ class SANAS(object):
self._init_temperature = init_temperature self._init_temperature = init_temperature
self._is_server = is_server self._is_server = is_server
self._configs = configs self._configs = configs
self._keys = hashlib.md5(str(self._configs)).hexdigest() self._key = hashlib.md5(str(self._configs)).hexdigest()
server_ip, server_port = server_addr server_ip, server_port = server_addr
if server_ip == None or server_ip == "": if server_ip == None or server_ip == "":
server_ip = self._get_host_ip() server_ip = self._get_host_ip()
factory = SearchSpaceFactory()
self._search_space = factory.get_search_space(configs)
# create controller server # create controller server
if self._is_server: if self._is_server:
factory = SearchSpaceFactory()
self._search_space = factory.get_search_space(configs)
init_tokens = self._search_space.init_tokens() init_tokens = self._search_space.init_tokens()
range_table = self._search_space.range_table() range_table = self._search_space.range_table()
range_table = (len(range_table) * [0], range_table) range_table = (len(range_table) * [0], range_table)
...@@ -90,6 +91,7 @@ class SANAS(object): ...@@ -90,6 +91,7 @@ class SANAS(object):
search_steps=search_steps, search_steps=search_steps,
key=self._key) key=self._key)
self._controller_server.start() self._controller_server.start()
server_port = self._controller_server.port()
self._controller_client = ControllerClient( self._controller_client = ControllerClient(
server_ip, server_port, key=self._key) server_ip, server_port, key=self._key)
...@@ -99,6 +101,9 @@ class SANAS(object): ...@@ -99,6 +101,9 @@ class SANAS(object):
def _get_host_ip(self): def _get_host_ip(self):
return socket.gethostbyname(socket.gethostname()) return socket.gethostbyname(socket.gethostname())
def tokens2arch(self, tokens):
return self._search_space.token2arch(self.tokens)
def next_archs(self): def next_archs(self):
""" """
Get next network architectures. Get next network architectures.
......
...@@ -39,6 +39,7 @@ class CombineSearchSpace(object): ...@@ -39,6 +39,7 @@ class CombineSearchSpace(object):
for config_list in config_lists: for config_list in config_lists:
key, config = config_list key, config = config_list
self.spaces.append(self._get_single_search_space(key, config)) self.spaces.append(self._get_single_search_space(key, config))
self.init_tokens()
def _get_single_search_space(self, key, config): def _get_single_search_space(self, key, config):
""" """
...@@ -51,9 +52,11 @@ class CombineSearchSpace(object): ...@@ -51,9 +52,11 @@ class CombineSearchSpace(object):
model space(class) model space(class)
""" """
cls = SEARCHSPACE.get(key) cls = SEARCHSPACE.get(key)
space = cls(config['input_size'], config['output_size'], block_mask = config['block_mask'] if 'block_mask' in config else None
config['block_num'], config['block_mask']) space = cls(config['input_size'],
config['output_size'],
config['block_num'],
block_mask=block_mask)
return space return space
def init_tokens(self): def init_tokens(self):
......
...@@ -32,10 +32,12 @@ class MobileNetV1Space(SearchSpaceBase): ...@@ -32,10 +32,12 @@ class MobileNetV1Space(SearchSpaceBase):
input_size, input_size,
output_size, output_size,
block_num, block_num,
block_mask,
scale=1.0, scale=1.0,
class_dim=1000): class_dim=1000):
super(MobileNetV1Space, self).__init__(input_size, output_size, super(MobileNetV1Space, self).__init__(input_size, output_size,
block_num) block_num, block_mask)
assert self.block_mask == None, 'MobileNetV1Space will use origin MobileNetV1 as seach space, so use input_size, output_size and block_num to search'
self.scale = scale self.scale = scale
self.class_dim = class_dim self.class_dim = class_dim
# self.head_num means the channel of first convolution # self.head_num means the channel of first convolution
......
...@@ -113,40 +113,69 @@ class MobileNetV2Space(SearchSpaceBase): ...@@ -113,40 +113,69 @@ class MobileNetV2Space(SearchSpaceBase):
if tokens is None: if tokens is None:
tokens = self.init_tokens() tokens = self.init_tokens()
print(tokens)
bottleneck_params_list = [] self.bottleneck_params_list = []
if self.block_num >= 1: if self.block_num >= 1:
bottleneck_params_list.append( self.bottleneck_params_list.append(
(1, self.head_num[tokens[0]], 1, 1, 3)) (1, self.head_num[tokens[0]], 1, 1, 3))
if self.block_num >= 2: if self.block_num >= 2:
bottleneck_params_list.append( self.bottleneck_params_list.append(
(self.multiply[tokens[1]], self.filter_num1[tokens[2]], (self.multiply[tokens[1]], self.filter_num1[tokens[2]],
self.repeat[tokens[3]], 2, self.k_size[tokens[4]])) self.repeat[tokens[3]], 2, self.k_size[tokens[4]]))
if self.block_num >= 3: if self.block_num >= 3:
bottleneck_params_list.append( self.bottleneck_params_list.append(
(self.multiply[tokens[5]], self.filter_num1[tokens[6]], (self.multiply[tokens[5]], self.filter_num1[tokens[6]],
self.repeat[tokens[7]], 2, self.k_size[tokens[8]])) self.repeat[tokens[7]], 2, self.k_size[tokens[8]]))
if self.block_num >= 4: if self.block_num >= 4:
bottleneck_params_list.append( self.bottleneck_params_list.append(
(self.multiply[tokens[9]], self.filter_num2[tokens[10]], (self.multiply[tokens[9]], self.filter_num2[tokens[10]],
self.repeat[tokens[11]], 2, self.k_size[tokens[12]])) self.repeat[tokens[11]], 2, self.k_size[tokens[12]]))
if self.block_num >= 5: if self.block_num >= 5:
bottleneck_params_list.append( self.bottleneck_params_list.append(
(self.multiply[tokens[13]], self.filter_num3[tokens[14]], (self.multiply[tokens[13]], self.filter_num3[tokens[14]],
self.repeat[tokens[15]], 2, self.k_size[tokens[16]])) self.repeat[tokens[15]], 2, self.k_size[tokens[16]]))
bottleneck_params_list.append( self.bottleneck_params_list.append(
(self.multiply[tokens[17]], self.filter_num4[tokens[18]], (self.multiply[tokens[17]], self.filter_num4[tokens[18]],
self.repeat[tokens[19]], 1, self.k_size[tokens[20]])) self.repeat[tokens[19]], 1, self.k_size[tokens[20]]))
if self.block_num >= 6: if self.block_num >= 6:
bottleneck_params_list.append( self.bottleneck_params_list.append(
(self.multiply[tokens[21]], self.filter_num5[tokens[22]], (self.multiply[tokens[21]], self.filter_num5[tokens[22]],
self.repeat[tokens[23]], 2, self.k_size[tokens[24]])) self.repeat[tokens[23]], 2, self.k_size[tokens[24]]))
bottleneck_params_list.append( self.bottleneck_params_list.append(
(self.multiply[tokens[25]], self.filter_num6[tokens[26]], (self.multiply[tokens[25]], self.filter_num6[tokens[26]],
self.repeat[tokens[27]], 1, self.k_size[tokens[28]])) self.repeat[tokens[27]], 1, self.k_size[tokens[28]]))
def net_arch(input): def _modify_bottle_params(output_stride=None):
if output_stride is not None and output_stride % 2 != 0:
raise Exception("output stride must to be even number")
if output_stride is None:
return
else:
stride = 2
for i, layer_setting in enumerate(self.bottleneck_params_list):
t, c, n, s, ks = layer_setting
stride = stride * s
if stride > output_stride:
s = 1
self.bottleneck_params_list[i] = (t, c, n, s, ks)
def net_arch(input,
end_points=None,
decode_points=None,
output_stride=None):
_modify_bottle_params(output_stride)
decode_ends = dict()
def check_points(count, points):
if points is None:
return False
else:
if isinstance(points, list):
return (True if count in points else False)
else:
return (True if count == points else False)
#conv1 #conv1
# all padding is 'SAME' in the conv2d, can compute the actual padding automatic. # all padding is 'SAME' in the conv2d, can compute the actual padding automatic.
input = conv_bn_layer( input = conv_bn_layer(
...@@ -157,14 +186,21 @@ class MobileNetV2Space(SearchSpaceBase): ...@@ -157,14 +186,21 @@ class MobileNetV2Space(SearchSpaceBase):
padding='SAME', padding='SAME',
act='relu6', act='relu6',
name='mobilenetv2_conv1_1') name='mobilenetv2_conv1_1')
layer_count = 1
if check_points(layer_count, decode_points):
decode_ends[layer_count] = input
if check_points(layer_count, end_points):
return input, decode_ends
# bottleneck sequences # bottleneck sequences
i = 1 i = 1
in_c = int(32 * self.scale) in_c = int(32 * self.scale)
for layer_setting in bottleneck_params_list: for layer_setting in self.bottleneck_params_list:
t, c, n, s, k = layer_setting t, c, n, s, k = layer_setting
i += 1 i += 1
input = self._invresi_blocks( #print(input)
input, depthwise_output = self._invresi_blocks(
input=input, input=input,
in_c=in_c, in_c=in_c,
t=t, t=t,
...@@ -174,6 +210,33 @@ class MobileNetV2Space(SearchSpaceBase): ...@@ -174,6 +210,33 @@ class MobileNetV2Space(SearchSpaceBase):
k=k, k=k,
name='mobilenetv2_conv' + str(i)) name='mobilenetv2_conv' + str(i))
in_c = int(c * self.scale) in_c = int(c * self.scale)
layer_count += 1
### decode_points and end_points means block num
if check_points(layer_count, decode_points):
decode_ends[layer_count] = depthwise_output
if check_points(layer_count, end_points):
return input, decode_ends
# last conv
input = conv_bn_layer(
input=input,
num_filters=int(1280 * self.scale)
if self.scale > 1.0 else 1280,
filter_size=1,
stride=1,
padding='SAME',
act='relu6',
name='mobilenetv2_conv' + str(i + 1))
input = fluid.layers.pool2d(
input=input,
pool_size=7,
pool_stride=1,
pool_type='avg',
global_pooling=True,
name='mobilenetv2_last_pool')
# if output_size is 1, add fc layer in the end # if output_size is 1, add fc layer in the end
if self.output_size == 1: if self.output_size == 1:
...@@ -248,6 +311,8 @@ class MobileNetV2Space(SearchSpaceBase): ...@@ -248,6 +311,8 @@ class MobileNetV2Space(SearchSpaceBase):
name=name + '_dwise', name=name + '_dwise',
use_cudnn=False) use_cudnn=False)
depthwise_output = bottleneck_conv
linear_out = conv_bn_layer( linear_out = conv_bn_layer(
input=bottleneck_conv, input=bottleneck_conv,
num_filters=num_filters, num_filters=num_filters,
...@@ -260,7 +325,7 @@ class MobileNetV2Space(SearchSpaceBase): ...@@ -260,7 +325,7 @@ class MobileNetV2Space(SearchSpaceBase):
out = linear_out out = linear_out
if ifshortcut: if ifshortcut:
out = self._shortcut(input=input, data_residual=out) out = self._shortcut(input=input, data_residual=out)
return out return out, depthwise_output
def _invresi_blocks(self, input, in_c, t, c, n, s, k, name=None): def _invresi_blocks(self, input, in_c, t, c, n, s, k, name=None):
"""Build inverted residual blocks. """Build inverted residual blocks.
...@@ -276,7 +341,7 @@ class MobileNetV2Space(SearchSpaceBase): ...@@ -276,7 +341,7 @@ class MobileNetV2Space(SearchSpaceBase):
Returns: Returns:
Variable, layers output. Variable, layers output.
""" """
first_block = self._inverted_residual_unit( first_block, depthwise_output = self._inverted_residual_unit(
input=input, input=input,
num_in_filter=in_c, num_in_filter=in_c,
num_filters=c, num_filters=c,
...@@ -290,7 +355,7 @@ class MobileNetV2Space(SearchSpaceBase): ...@@ -290,7 +355,7 @@ class MobileNetV2Space(SearchSpaceBase):
last_c = c last_c = c
for i in range(1, n): for i in range(1, n):
last_residual_block = self._inverted_residual_unit( last_residual_block, depthwise_output = self._inverted_residual_unit(
input=last_residual_block, input=last_residual_block,
num_in_filter=last_c, num_in_filter=last_c,
num_filters=c, num_filters=c,
...@@ -299,4 +364,4 @@ class MobileNetV2Space(SearchSpaceBase): ...@@ -299,4 +364,4 @@ class MobileNetV2Space(SearchSpaceBase):
filter_size=k, filter_size=k,
expansion_factor=t, expansion_factor=t,
name=name + '_' + str(i + 1)) name=name + '_' + str(i + 1))
return last_residual_block return last_residual_block, depthwise_output
...@@ -19,11 +19,19 @@ class SearchSpaceBase(object): ...@@ -19,11 +19,19 @@ class SearchSpaceBase(object):
"""Controller for Neural Architecture Search. """Controller for Neural Architecture Search.
""" """
def __init__(self, input_size, output_size, block_num, block_mask, *argss): def __init__(self, input_size, output_size, block_num, block_mask, *args):
"""init model config
"""
self.input_size = input_size self.input_size = input_size
self.output_size = output_size self.output_size = output_size
self.block_num = block_num self.block_num = block_num
self.block_mask = block_mask self.block_mask = block_mask
if self.block_mask is not None:
assert isinstance(self.block_mask,
list), 'Block_mask must be a list.'
print(
"If block_mask is NOT None, we will use block_mask as major configs!"
)
def init_tokens(self): def init_tokens(self):
"""Get init tokens in search space. """Get init tokens in search space.
......
...@@ -528,33 +528,41 @@ class Pruner(): ...@@ -528,33 +528,41 @@ class Pruner():
Returns: Returns:
list<VarWrapper>: A list of operators. list<VarWrapper>: A list of operators.
""" """
_logger.debug("######################search: {}######################".
format(op_node))
visited = [op_node.idx()] visited = [op_node.idx()]
stack = [] stack = []
brothers = [] brothers = []
for op in graph.next_ops(op_node): for op in graph.next_ops(op_node):
if (op.type() != 'conv2d') and (op.type() != 'fc') and ( if ("conv2d" not in op.type()) and (op.type() != 'fc') and (
not op.is_bwd_op()): not op.is_bwd_op()) and (not op.is_opt_op()):
stack.append(op) stack.append(op)
visited.append(op.idx()) visited.append(op.idx())
while len(stack) > 0: while len(stack) > 0:
top_op = stack.pop() top_op = stack.pop()
if top_op.type().startswith("elementwise_"): for parent in graph.pre_ops(top_op):
for parent in graph.pre_ops(top_op): if parent.idx() not in visited and (
if parent.idx() not in visited and ( not parent.is_bwd_op()) and (not parent.is_opt_op()):
not parent.is_bwd_op()): _logger.debug("----------go back from {} to {}----------".
if ((parent.type() == 'conv2d') or format(top_op, parent))
(parent.type() == 'fc')): if (('conv2d' in parent.type()) or
brothers.append(parent) (parent.type() == 'fc')):
else: brothers.append(parent)
stack.append(parent) else:
visited.append(parent.idx()) stack.append(parent)
visited.append(parent.idx())
for child in graph.next_ops(top_op): for child in graph.next_ops(top_op):
if (child.type() != 'conv2d') and (child.type() != 'fc') and ( if ('conv2d' not in child.type()
) and (child.type() != 'fc') and (
child.idx() not in visited) and ( child.idx() not in visited) and (
not child.is_bwd_op()): not child.is_bwd_op()) and (not child.is_opt_op()):
stack.append(child) stack.append(child)
visited.append(child.idx()) visited.append(child.idx())
_logger.debug("brothers: {}".format(brothers))
_logger.debug(
"######################Finish search######################".format(
op_node))
return brothers return brothers
def _cal_pruned_idx(self, name, param, ratio, axis): def _cal_pruned_idx(self, name, param, ratio, axis):
......
...@@ -15,7 +15,7 @@ import sys ...@@ -15,7 +15,7 @@ import sys
sys.path.append("../") sys.path.append("../")
import unittest import unittest
import paddle.fluid as fluid import paddle.fluid as fluid
from prune import Pruner from paddleslim.prune import Pruner
from layers import conv_bn_layer from layers import conv_bn_layer
......
...@@ -40,7 +40,11 @@ class TestSANAS(unittest.TestCase): ...@@ -40,7 +40,11 @@ class TestSANAS(unittest.TestCase):
base_flops = flops(main_program) base_flops = flops(main_program)
search_steps = 3 search_steps = 3
sa_nas = SANAS(configs, search_steps=search_steps, is_server=True) sa_nas = SANAS(
configs,
search_steps=search_steps,
server_addr=("", 0),
is_server=True)
for i in range(search_steps): for i in range(search_steps):
archs = sa_nas.next_archs() archs = sa_nas.next_archs()
......
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册