diff --git a/.gitignore b/.gitignore
index c59240cd16aeeb80cbe8501f65dba682edddc463..5647eb30d72f245f8c8f206fe5125e2b04985f41 100644
--- a/.gitignore
+++ b/.gitignore
@@ -3,3 +3,8 @@ build/
./dist/
*.pyc
dist/
+*.data
+*.log
+*.tar
+*.tar.gz
+*.zip
diff --git a/demo/auto_prune/train.py b/demo/auto_prune/train.py
new file mode 100644
index 0000000000000000000000000000000000000000..d65dd875a8d650b57bbe429514e99dc6fa46e630
--- /dev/null
+++ b/demo/auto_prune/train.py
@@ -0,0 +1,222 @@
+import os
+import sys
+import logging
+import paddle
+import argparse
+import functools
+import math
+import time
+import numpy as np
+import paddle.fluid as fluid
+from paddleslim.prune import AutoPruner
+from paddleslim.common import get_logger
+from paddleslim.analysis import flops
+sys.path.append(sys.path[0] + "/../")
+import models
+from utility import add_arguments, print_arguments
+
+_logger = get_logger(__name__, level=logging.INFO)
+
+parser = argparse.ArgumentParser(description=__doc__)
+add_arg = functools.partial(add_arguments, argparser=parser)
+# yapf: disable
+add_arg('batch_size', int, 64 * 4, "Minibatch size.")
+add_arg('use_gpu', bool, True, "Whether to use GPU or not.")
+add_arg('model', str, "MobileNet", "The target model.")
+add_arg('pretrained_model', str, "../pretrained_model/MobileNetV1_pretained", "Whether to use pretrained model.")
+add_arg('lr', float, 0.1, "The learning rate used to fine-tune pruned model.")
+add_arg('lr_strategy', str, "piecewise_decay", "The learning rate decay strategy.")
+add_arg('l2_decay', float, 3e-5, "The l2_decay parameter.")
+add_arg('momentum_rate', float, 0.9, "The value of momentum_rate.")
+add_arg('num_epochs', int, 120, "The number of total epochs.")
+add_arg('total_images', int, 1281167, "The number of total training images.")
+parser.add_argument('--step_epochs', nargs='+', type=int, default=[30, 60, 90], help="piecewise decay step")
+add_arg('config_file', str, None, "The config file for compression with yaml format.")
+add_arg('data', str, "mnist", "Which data to use. 'mnist' or 'imagenet'")
+add_arg('log_period', int, 10, "Log period in batches.")
+add_arg('test_period', int, 10, "Test period in epoches.")
+# yapf: enable
+
+model_list = [m for m in dir(models) if "__" not in m]
+
+
+def piecewise_decay(args):
+ step = int(math.ceil(float(args.total_images) / args.batch_size))
+ bd = [step * e for e in args.step_epochs]
+ lr = [args.lr * (0.1**i) for i in range(len(bd) + 1)]
+ learning_rate = fluid.layers.piecewise_decay(boundaries=bd, values=lr)
+ optimizer = fluid.optimizer.Momentum(
+ learning_rate=learning_rate,
+ momentum=args.momentum_rate,
+ regularization=fluid.regularizer.L2Decay(args.l2_decay))
+ return optimizer
+
+
+def cosine_decay(args):
+ step = int(math.ceil(float(args.total_images) / args.batch_size))
+ learning_rate = fluid.layers.cosine_decay(
+ learning_rate=args.lr, step_each_epoch=step, epochs=args.num_epochs)
+ optimizer = fluid.optimizer.Momentum(
+ learning_rate=learning_rate,
+ momentum=args.momentum_rate,
+ regularization=fluid.regularizer.L2Decay(args.l2_decay))
+ return optimizer
+
+
+def create_optimizer(args):
+ if args.lr_strategy == "piecewise_decay":
+ return piecewise_decay(args)
+ elif args.lr_strategy == "cosine_decay":
+ return cosine_decay(args)
+
+
+def compress(args):
+
+ train_reader = None
+ test_reader = None
+ if args.data == "mnist":
+ import paddle.dataset.mnist as reader
+ train_reader = reader.train()
+ val_reader = reader.test()
+ class_dim = 10
+ image_shape = "1,28,28"
+ elif args.data == "imagenet":
+ import imagenet_reader as reader
+ train_reader = reader.train()
+ val_reader = reader.val()
+ class_dim = 1000
+ image_shape = "3,224,224"
+ else:
+ raise ValueError("{} is not supported.".format(args.data))
+
+ image_shape = [int(m) for m in image_shape.split(",")]
+ assert args.model in model_list, "{} is not in lists: {}".format(
+ args.model, model_list)
+ image = fluid.layers.data(name='image', shape=image_shape, dtype='float32')
+ label = fluid.layers.data(name='label', shape=[1], dtype='int64')
+ # model definition
+ model = models.__dict__[args.model]()
+ out = model.net(input=image, class_dim=class_dim)
+ cost = fluid.layers.cross_entropy(input=out, label=label)
+ avg_cost = fluid.layers.mean(x=cost)
+ acc_top1 = fluid.layers.accuracy(input=out, label=label, k=1)
+ acc_top5 = fluid.layers.accuracy(input=out, label=label, k=5)
+ val_program = fluid.default_main_program().clone(for_test=True)
+ opt = create_optimizer(args)
+ opt.minimize(avg_cost)
+ place = fluid.CUDAPlace(0) if args.use_gpu else fluid.CPUPlace()
+ exe = fluid.Executor(place)
+ exe.run(fluid.default_startup_program())
+
+ if args.pretrained_model:
+
+ def if_exist(var):
+ return os.path.exists(
+ os.path.join(args.pretrained_model, var.name))
+
+ fluid.io.load_vars(exe, args.pretrained_model, predicate=if_exist)
+
+ val_reader = paddle.batch(val_reader, batch_size=args.batch_size)
+ train_reader = paddle.batch(
+ train_reader, batch_size=args.batch_size, drop_last=True)
+
+ train_feeder = feeder = fluid.DataFeeder([image, label], place)
+ val_feeder = feeder = fluid.DataFeeder(
+ [image, label], place, program=val_program)
+
+ def test(epoch, program):
+ batch_id = 0
+ acc_top1_ns = []
+ acc_top5_ns = []
+ for data in val_reader():
+ start_time = time.time()
+ acc_top1_n, acc_top5_n = exe.run(
+ program,
+ feed=train_feeder.feed(data),
+ fetch_list=[acc_top1.name, acc_top5.name])
+ end_time = time.time()
+ if batch_id % args.log_period == 0:
+ _logger.info(
+ "Eval epoch[{}] batch[{}] - acc_top1: {}; acc_top5: {}; time: {}".
+ format(epoch, batch_id,
+ np.mean(acc_top1_n),
+ np.mean(acc_top5_n), end_time - start_time))
+ acc_top1_ns.append(np.mean(acc_top1_n))
+ acc_top5_ns.append(np.mean(acc_top5_n))
+ batch_id += 1
+
+ _logger.info("Final eval epoch[{}] - acc_top1: {}; acc_top5: {}".
+ format(epoch,
+ np.mean(np.array(acc_top1_ns)),
+ np.mean(np.array(acc_top5_ns))))
+ return np.mean(np.array(acc_top1_ns))
+
+ def train(epoch, program):
+
+ build_strategy = fluid.BuildStrategy()
+ exec_strategy = fluid.ExecutionStrategy()
+ train_program = fluid.compiler.CompiledProgram(
+ program).with_data_parallel(
+ loss_name=avg_cost.name,
+ build_strategy=build_strategy,
+ exec_strategy=exec_strategy)
+
+ batch_id = 0
+ for data in train_reader():
+ start_time = time.time()
+ loss_n, acc_top1_n, acc_top5_n = exe.run(
+ train_program,
+ feed=train_feeder.feed(data),
+ fetch_list=[avg_cost.name, acc_top1.name, acc_top5.name])
+ end_time = time.time()
+ loss_n = np.mean(loss_n)
+ acc_top1_n = np.mean(acc_top1_n)
+ acc_top5_n = np.mean(acc_top5_n)
+ if batch_id % args.log_period == 0:
+ _logger.info(
+ "epoch[{}]-batch[{}] - loss: {}; acc_top1: {}; acc_top5: {}; time: {}".
+ format(epoch, batch_id, loss_n, acc_top1_n, acc_top5_n,
+ end_time - start_time))
+ batch_id += 1
+
+ params = []
+ for param in fluid.default_main_program().global_block().all_parameters():
+ if "_sep_weights" in param.name:
+ params.append(param.name)
+
+ pruner = AutoPruner(
+ val_program,
+ fluid.global_scope(),
+ place,
+ params=params,
+ init_ratios=[0.33] * len(params),
+ pruned_flops=0.5,
+ pruned_latency=None,
+ server_addr=("", 0),
+ init_temperature=100,
+ reduce_rate=0.85,
+ max_try_times=300,
+ max_client_num=10,
+ search_steps=100,
+ max_ratios=0.9,
+ min_ratios=0.,
+ is_server=True,
+ key="auto_pruner")
+
+ while True:
+ pruned_program, pruned_val_program = pruner.prune(
+ fluid.default_main_program(), val_program)
+ for i in range(1):
+ train(i, pruned_program)
+ score = test(0, pruned_val_program)
+ pruner.reward(score)
+
+
+def main():
+ args = parser.parse_args()
+ print_arguments(args)
+ compress(args)
+
+
+if __name__ == '__main__':
+ main()
diff --git a/demo/distillation/train.py b/demo/distillation/train.py
new file mode 100644
index 0000000000000000000000000000000000000000..7f389168440a59f0872d44ab6e62f262e373f6f0
--- /dev/null
+++ b/demo/distillation/train.py
@@ -0,0 +1,238 @@
+from __future__ import absolute_import
+from __future__ import division
+from __future__ import print_function
+
+import os
+import sys
+import math
+import logging
+import paddle
+import argparse
+import functools
+import numpy as np
+import paddle.fluid as fluid
+sys.path.append(sys.path[0] + "/../")
+import models
+import imagenet_reader as reader
+from utility import add_arguments, print_arguments
+from paddleslim.dist import merge, l2_loss, soft_label_loss, fsp_loss
+
+logging.basicConfig(format='%(asctime)s-%(levelname)s: %(message)s')
+_logger = logging.getLogger(__name__)
+_logger.setLevel(logging.INFO)
+
+parser = argparse.ArgumentParser(description=__doc__)
+add_arg = functools.partial(add_arguments, argparser=parser)
+# yapf: disable
+add_arg('batch_size', int, 64*4, "Minibatch size.")
+add_arg('use_gpu', bool, True, "Whether to use GPU or not.")
+add_arg('total_images', int, 1281167, "Training image number.")
+add_arg('image_shape', str, "3,224,224", "Input image size")
+add_arg('lr', float, 0.1, "The learning rate used to fine-tune pruned model.")
+add_arg('lr_strategy', str, "piecewise_decay", "The learning rate decay strategy.")
+add_arg('l2_decay', float, 3e-5, "The l2_decay parameter.")
+add_arg('momentum_rate', float, 0.9, "The value of momentum_rate.")
+add_arg('num_epochs', int, 120, "The number of total epochs.")
+add_arg('data', str, "mnist", "Which data to use. 'mnist' or 'imagenet'")
+add_arg('log_period', int, 20, "Log period in batches.")
+add_arg('model', str, "MobileNet", "Set the network to use.")
+add_arg('pretrained_model', str, None, "Whether to use pretrained model.")
+add_arg('teacher_model', str, "ResNet50", "Set the teacher network to use.")
+add_arg('teacher_pretrained_model', str, "../pretrain/ResNet50_pretrained", "Whether to use pretrained model.")
+parser.add_argument('--step_epochs', nargs='+', type=int, default=[30, 60, 90], help="piecewise decay step")
+# yapf: enable
+
+model_list = [m for m in dir(models) if "__" not in m]
+
+
+def piecewise_decay(args):
+ step = int(math.ceil(float(args.total_images) / args.batch_size))
+ bd = [step * e for e in args.step_epochs]
+ lr = [args.lr * (0.1**i) for i in range(len(bd) + 1)]
+ learning_rate = fluid.layers.piecewise_decay(boundaries=bd, values=lr)
+ optimizer = fluid.optimizer.Momentum(
+ learning_rate=learning_rate,
+ momentum=args.momentum_rate,
+ regularization=fluid.regularizer.L2Decay(args.l2_decay))
+ return optimizer
+
+
+def cosine_decay(args):
+ step = int(math.ceil(float(args.total_images) / args.batch_size))
+ learning_rate = fluid.layers.cosine_decay(
+ learning_rate=args.lr, step_each_epoch=step, epochs=args.num_epochs)
+ optimizer = fluid.optimizer.Momentum(
+ learning_rate=learning_rate,
+ momentum=args.momentum_rate,
+ regularization=fluid.regularizer.L2Decay(args.l2_decay))
+ return optimizer
+
+
+def create_optimizer(args):
+ if args.lr_strategy == "piecewise_decay":
+ return piecewise_decay(args)
+ elif args.lr_strategy == "cosine_decay":
+ return cosine_decay(args)
+
+
+def compress(args):
+ if args.data == "mnist":
+ import paddle.dataset.mnist as reader
+ train_reader = reader.train()
+ val_reader = reader.test()
+ class_dim = 10
+ image_shape = "1,28,28"
+ elif args.data == "imagenet":
+ import imagenet_reader as reader
+ train_reader = reader.train()
+ val_reader = reader.val()
+ class_dim = 1000
+ image_shape = "3,224,224"
+ else:
+ raise ValueError("{} is not supported.".format(args.data))
+ image_shape = [int(m) for m in image_shape.split(",")]
+
+ assert args.model in model_list, "{} is not in lists: {}".format(
+ args.model, model_list)
+ student_program = fluid.Program()
+ s_startup = fluid.Program()
+
+ with fluid.program_guard(student_program, s_startup):
+ with fluid.unique_name.guard():
+ image = fluid.layers.data(
+ name='image', shape=image_shape, dtype='float32')
+ label = fluid.layers.data(name='label', shape=[1], dtype='int64')
+ train_loader = fluid.io.DataLoader.from_generator(
+ feed_list=[image, label],
+ capacity=64,
+ use_double_buffer=True,
+ iterable=True)
+ valid_loader = fluid.io.DataLoader.from_generator(
+ feed_list=[image, label],
+ capacity=64,
+ use_double_buffer=True,
+ iterable=True)
+ # model definition
+ model = models.__dict__[args.model]()
+ out = model.net(input=image, class_dim=class_dim)
+ cost = fluid.layers.cross_entropy(input=out, label=label)
+ avg_cost = fluid.layers.mean(x=cost)
+ acc_top1 = fluid.layers.accuracy(input=out, label=label, k=1)
+ acc_top5 = fluid.layers.accuracy(input=out, label=label, k=5)
+ #print("="*50+"student_model_params"+"="*50)
+ #for v in student_program.list_vars():
+ # print(v.name, v.shape)
+
+ place = fluid.CUDAPlace(0) if args.use_gpu else fluid.CPUPlace()
+ exe = fluid.Executor(place)
+
+ train_reader = paddle.batch(
+ train_reader, batch_size=args.batch_size, drop_last=True)
+ val_reader = paddle.batch(
+ val_reader, batch_size=args.batch_size, drop_last=True)
+ val_program = student_program.clone(for_test=True)
+
+ places = fluid.cuda_places()
+ train_loader.set_sample_list_generator(train_reader, places)
+ valid_loader.set_sample_list_generator(val_reader, place)
+
+ teacher_model = models.__dict__[args.teacher_model]()
+ # define teacher program
+ teacher_program = fluid.Program()
+ t_startup = fluid.Program()
+ teacher_scope = fluid.Scope()
+ with fluid.scope_guard(teacher_scope):
+ with fluid.program_guard(teacher_program, t_startup):
+ with fluid.unique_name.guard():
+ image = fluid.layers.data(
+ name='image', shape=image_shape, dtype='float32')
+ predict = teacher_model.net(image, class_dim=class_dim)
+
+ #print("="*50+"teacher_model_params"+"="*50)
+ #for v in teacher_program.list_vars():
+ # print(v.name, v.shape)
+
+ exe.run(t_startup)
+ assert args.teacher_pretrained_model and os.path.exists(
+ args.teacher_pretrained_model
+ ), "teacher_pretrained_model should be set when teacher_model is not None."
+
+ def if_exist(var):
+ return os.path.exists(
+ os.path.join(args.teacher_pretrained_model, var.name)
+ ) and var.name != 'conv1_weights' and var.name != 'fc_0.w_0' and var.name != 'fc_0.b_0'
+
+ fluid.io.load_vars(
+ exe,
+ args.teacher_pretrained_model,
+ main_program=teacher_program,
+ predicate=if_exist)
+
+ data_name_map = {'image': 'image'}
+ main = merge(
+ teacher_program,
+ student_program,
+ data_name_map,
+ place,
+ teacher_scope=teacher_scope)
+
+ #print("="*50+"teacher_vars"+"="*50)
+ #for v in teacher_program.list_vars():
+ # if '_generated_var' not in v.name and 'fetch' not in v.name and 'feed' not in v.name:
+ # print(v.name, v.shape)
+ #return
+
+ with fluid.program_guard(main, s_startup):
+ l2_loss_v = l2_loss("teacher_fc_0.tmp_0", "fc_0.tmp_0", main)
+ fsp_loss_v = fsp_loss("teacher_res2a_branch2a.conv2d.output.1.tmp_0",
+ "teacher_res3a_branch2a.conv2d.output.1.tmp_0",
+ "depthwise_conv2d_1.tmp_0", "conv2d_3.tmp_0",
+ main)
+ loss = avg_cost + l2_loss_v + fsp_loss_v
+ opt = create_optimizer(args)
+ opt.minimize(loss)
+ exe.run(s_startup)
+ build_strategy = fluid.BuildStrategy()
+ build_strategy.fuse_all_reduce_ops = False
+ parallel_main = fluid.CompiledProgram(main).with_data_parallel(
+ loss_name=loss.name, build_strategy=build_strategy)
+
+ for epoch_id in range(args.num_epochs):
+ for step_id, data in enumerate(train_loader):
+ loss_1, loss_2, loss_3, loss_4 = exe.run(
+ parallel_main,
+ feed=data,
+ fetch_list=[
+ loss.name, avg_cost.name, l2_loss_v.name, fsp_loss_v.name
+ ])
+ if step_id % args.log_period == 0:
+ _logger.info(
+ "train_epoch {} step {} loss {:.6f}, class loss {:.6f}, l2 loss {:.6f}, fsp loss {:.6f}".
+ format(epoch_id, step_id, loss_1[0], loss_2[0], loss_3[0],
+ loss_4[0]))
+ val_acc1s = []
+ val_acc5s = []
+ for step_id, data in enumerate(valid_loader):
+ val_loss, val_acc1, val_acc5 = exe.run(
+ val_program,
+ data,
+ fetch_list=[avg_cost.name, acc_top1.name, acc_top5.name])
+ val_acc1s.append(val_acc1)
+ val_acc5s.append(val_acc5)
+ if step_id % args.log_period == 0:
+ _logger.info(
+ "valid_epoch {} step {} loss {:.6f}, top1 {:.6f}, top5 {:.6f}".
+ format(epoch_id, step_id, val_loss[0], val_acc1[0],
+ val_acc5[0]))
+ _logger.info("epoch {} top1 {:.6f}, top5 {:.6f}".format(
+ epoch_id, np.mean(val_acc1s), np.mean(val_acc5s)))
+
+
+def main():
+ args = parser.parse_args()
+ print_arguments(args)
+ compress(args)
+
+
+if __name__ == '__main__':
+ main()
diff --git a/demo/imagenet_reader.py b/demo/imagenet_reader.py
new file mode 100644
index 0000000000000000000000000000000000000000..25bc756e93db829f3566754e079ba7711074e577
--- /dev/null
+++ b/demo/imagenet_reader.py
@@ -0,0 +1,194 @@
+import os
+import math
+import random
+import functools
+import numpy as np
+import paddle
+from PIL import Image, ImageEnhance
+
+random.seed(0)
+np.random.seed(0)
+
+DATA_DIM = 224
+
+THREAD = 16
+BUF_SIZE = 10240
+
+#DATA_DIR = './data/ILSVRC2012/'
+DATA_DIR = './data/'
+DATA_DIR = os.path.join(os.path.split(os.path.realpath(__file__))[0], DATA_DIR)
+
+img_mean = np.array([0.485, 0.456, 0.406]).reshape((3, 1, 1))
+img_std = np.array([0.229, 0.224, 0.225]).reshape((3, 1, 1))
+
+
+def resize_short(img, target_size):
+ percent = float(target_size) / min(img.size[0], img.size[1])
+ resized_width = int(round(img.size[0] * percent))
+ resized_height = int(round(img.size[1] * percent))
+ img = img.resize((resized_width, resized_height), Image.LANCZOS)
+ return img
+
+
+def crop_image(img, target_size, center):
+ width, height = img.size
+ size = target_size
+ if center == True:
+ w_start = (width - size) / 2
+ h_start = (height - size) / 2
+ else:
+ w_start = np.random.randint(0, width - size + 1)
+ h_start = np.random.randint(0, height - size + 1)
+ w_end = w_start + size
+ h_end = h_start + size
+ img = img.crop((w_start, h_start, w_end, h_end))
+ return img
+
+
+def random_crop(img, size, scale=[0.08, 1.0], ratio=[3. / 4., 4. / 3.]):
+ aspect_ratio = math.sqrt(np.random.uniform(*ratio))
+ w = 1. * aspect_ratio
+ h = 1. / aspect_ratio
+
+ bound = min((float(img.size[0]) / img.size[1]) / (w**2),
+ (float(img.size[1]) / img.size[0]) / (h**2))
+ scale_max = min(scale[1], bound)
+ scale_min = min(scale[0], bound)
+
+ target_area = img.size[0] * img.size[1] * np.random.uniform(scale_min,
+ scale_max)
+ target_size = math.sqrt(target_area)
+ w = int(target_size * w)
+ h = int(target_size * h)
+
+ i = np.random.randint(0, img.size[0] - w + 1)
+ j = np.random.randint(0, img.size[1] - h + 1)
+
+ img = img.crop((i, j, i + w, j + h))
+ img = img.resize((size, size), Image.LANCZOS)
+ return img
+
+
+def rotate_image(img):
+ angle = np.random.randint(-10, 11)
+ img = img.rotate(angle)
+ return img
+
+
+def distort_color(img):
+ def random_brightness(img, lower=0.5, upper=1.5):
+ e = np.random.uniform(lower, upper)
+ return ImageEnhance.Brightness(img).enhance(e)
+
+ def random_contrast(img, lower=0.5, upper=1.5):
+ e = np.random.uniform(lower, upper)
+ return ImageEnhance.Contrast(img).enhance(e)
+
+ def random_color(img, lower=0.5, upper=1.5):
+ e = np.random.uniform(lower, upper)
+ return ImageEnhance.Color(img).enhance(e)
+
+ ops = [random_brightness, random_contrast, random_color]
+ np.random.shuffle(ops)
+
+ img = ops[0](img)
+ img = ops[1](img)
+ img = ops[2](img)
+
+ return img
+
+
+def process_image(sample, mode, color_jitter, rotate):
+ img_path = sample[0]
+
+ img = Image.open(img_path)
+ if mode == 'train':
+ if rotate: img = rotate_image(img)
+ img = random_crop(img, DATA_DIM)
+ else:
+ img = resize_short(img, target_size=256)
+ img = crop_image(img, target_size=DATA_DIM, center=True)
+ if mode == 'train':
+ if color_jitter:
+ img = distort_color(img)
+ if np.random.randint(0, 2) == 1:
+ img = img.transpose(Image.FLIP_LEFT_RIGHT)
+
+ if img.mode != 'RGB':
+ img = img.convert('RGB')
+
+ img = np.array(img).astype('float32').transpose((2, 0, 1)) / 255
+ img -= img_mean
+ img /= img_std
+
+ if mode == 'train' or mode == 'val':
+ return img, sample[1]
+ elif mode == 'test':
+ return [img]
+
+
+def _reader_creator(file_list,
+ mode,
+ shuffle=False,
+ color_jitter=False,
+ rotate=False,
+ data_dir=DATA_DIR,
+ batch_size=1):
+ def reader():
+ try:
+ with open(file_list) as flist:
+ full_lines = [line.strip() for line in flist]
+ if shuffle:
+ np.random.shuffle(full_lines)
+ if mode == 'train' and os.getenv('PADDLE_TRAINING_ROLE'):
+ # distributed mode if the env var `PADDLE_TRAINING_ROLE` exits
+ trainer_id = int(os.getenv("PADDLE_TRAINER_ID", "0"))
+ trainer_count = int(os.getenv("PADDLE_TRAINERS", "1"))
+ per_node_lines = len(full_lines) // trainer_count
+ lines = full_lines[trainer_id * per_node_lines:(
+ trainer_id + 1) * per_node_lines]
+ print(
+ "read images from %d, length: %d, lines length: %d, total: %d"
+ % (trainer_id * per_node_lines, per_node_lines,
+ len(lines), len(full_lines)))
+ else:
+ lines = full_lines
+
+ for line in lines:
+ if mode == 'train' or mode == 'val':
+ img_path, label = line.split()
+ img_path = os.path.join(data_dir + "/" + mode,
+ img_path)
+ yield img_path, int(label)
+ elif mode == 'test':
+ img_path = os.path.join(data_dir, line)
+ yield [img_path]
+ except Exception as e:
+ print("Reader failed!\n{}".format(str(e)))
+ os._exit(1)
+
+ mapper = functools.partial(
+ process_image, mode=mode, color_jitter=color_jitter, rotate=rotate)
+
+ return paddle.reader.xmap_readers(mapper, reader, THREAD, BUF_SIZE)
+
+
+def train(data_dir=DATA_DIR):
+ file_list = os.path.join(data_dir, 'train_list.txt')
+ return _reader_creator(
+ file_list,
+ 'train',
+ shuffle=True,
+ color_jitter=False,
+ rotate=False,
+ data_dir=data_dir)
+
+
+def val(data_dir=DATA_DIR):
+ file_list = os.path.join(data_dir, 'val_list.txt')
+ return _reader_creator(file_list, 'val', shuffle=False, data_dir=data_dir)
+
+
+def test(data_dir=DATA_DIR):
+ file_list = os.path.join(data_dir, 'test_list.txt')
+ return _reader_creator(file_list, 'test', shuffle=False, data_dir=data_dir)
diff --git a/demo/models/__init__.py b/demo/models/__init__.py
new file mode 100644
index 0000000000000000000000000000000000000000..e843697407850c049a5427d2b6533c417e59c228
--- /dev/null
+++ b/demo/models/__init__.py
@@ -0,0 +1,5 @@
+from .mobilenet import MobileNet
+from .resnet import ResNet34, ResNet50
+from .mobilenet_v2 import MobileNetV2
+
+__all__ = ['MobileNet', 'ResNet34', 'ResNet50', 'MobileNetV2']
diff --git a/demo/models/mobilenet.py b/demo/models/mobilenet.py
new file mode 100644
index 0000000000000000000000000000000000000000..921d6226ca2a65d5c9b57e27bf6607c7376c51f6
--- /dev/null
+++ b/demo/models/mobilenet.py
@@ -0,0 +1,197 @@
+from __future__ import absolute_import
+from __future__ import division
+from __future__ import print_function
+import paddle.fluid as fluid
+from paddle.fluid.initializer import MSRA
+from paddle.fluid.param_attr import ParamAttr
+
+__all__ = ['MobileNet']
+
+train_parameters = {
+ "input_size": [3, 224, 224],
+ "input_mean": [0.485, 0.456, 0.406],
+ "input_std": [0.229, 0.224, 0.225],
+ "learning_strategy": {
+ "name": "piecewise_decay",
+ "batch_size": 256,
+ "epochs": [10, 16, 30],
+ "steps": [0.1, 0.01, 0.001, 0.0001]
+ }
+}
+
+
+class MobileNet():
+ def __init__(self):
+ self.params = train_parameters
+
+ def net(self, input, class_dim=1000, scale=1.0):
+ # conv1: 112x112
+ input = self.conv_bn_layer(
+ input,
+ filter_size=3,
+ channels=3,
+ num_filters=int(32 * scale),
+ stride=2,
+ padding=1,
+ name="conv1")
+
+ # 56x56
+ input = self.depthwise_separable(
+ input,
+ num_filters1=32,
+ num_filters2=64,
+ num_groups=32,
+ stride=1,
+ scale=scale,
+ name="conv2_1")
+
+ input = self.depthwise_separable(
+ input,
+ num_filters1=64,
+ num_filters2=128,
+ num_groups=64,
+ stride=2,
+ scale=scale,
+ name="conv2_2")
+
+ # 28x28
+ input = self.depthwise_separable(
+ input,
+ num_filters1=128,
+ num_filters2=128,
+ num_groups=128,
+ stride=1,
+ scale=scale,
+ name="conv3_1")
+
+ input = self.depthwise_separable(
+ input,
+ num_filters1=128,
+ num_filters2=256,
+ num_groups=128,
+ stride=2,
+ scale=scale,
+ name="conv3_2")
+
+ # 14x14
+ input = self.depthwise_separable(
+ input,
+ num_filters1=256,
+ num_filters2=256,
+ num_groups=256,
+ stride=1,
+ scale=scale,
+ name="conv4_1")
+
+ input = self.depthwise_separable(
+ input,
+ num_filters1=256,
+ num_filters2=512,
+ num_groups=256,
+ stride=2,
+ scale=scale,
+ name="conv4_2")
+
+ # 14x14
+ for i in range(5):
+ input = self.depthwise_separable(
+ input,
+ num_filters1=512,
+ num_filters2=512,
+ num_groups=512,
+ stride=1,
+ scale=scale,
+ name="conv5" + "_" + str(i + 1))
+ # 7x7
+ input = self.depthwise_separable(
+ input,
+ num_filters1=512,
+ num_filters2=1024,
+ num_groups=512,
+ stride=2,
+ scale=scale,
+ name="conv5_6")
+
+ input = self.depthwise_separable(
+ input,
+ num_filters1=1024,
+ num_filters2=1024,
+ num_groups=1024,
+ stride=1,
+ scale=scale,
+ name="conv6")
+
+ input = fluid.layers.pool2d(
+ input=input,
+ pool_size=0,
+ pool_stride=1,
+ pool_type='avg',
+ global_pooling=True)
+
+ output = fluid.layers.fc(input=input,
+ size=class_dim,
+ act='softmax',
+ param_attr=ParamAttr(
+ initializer=MSRA(), name="fc7_weights"),
+ bias_attr=ParamAttr(name="fc7_offset"))
+
+ return output
+
+ def conv_bn_layer(self,
+ input,
+ filter_size,
+ num_filters,
+ stride,
+ padding,
+ channels=None,
+ num_groups=1,
+ act='relu',
+ use_cudnn=True,
+ name=None):
+ conv = fluid.layers.conv2d(
+ input=input,
+ num_filters=num_filters,
+ filter_size=filter_size,
+ stride=stride,
+ padding=padding,
+ groups=num_groups,
+ act=None,
+ use_cudnn=use_cudnn,
+ param_attr=ParamAttr(
+ initializer=MSRA(), name=name + "_weights"),
+ bias_attr=False)
+ bn_name = name + "_bn"
+ return fluid.layers.batch_norm(
+ input=conv,
+ act=act,
+ param_attr=ParamAttr(name=bn_name + "_scale"),
+ bias_attr=ParamAttr(name=bn_name + "_offset"),
+ moving_mean_name=bn_name + '_mean',
+ moving_variance_name=bn_name + '_variance')
+
+ def depthwise_separable(self,
+ input,
+ num_filters1,
+ num_filters2,
+ num_groups,
+ stride,
+ scale,
+ name=None):
+ depthwise_conv = self.conv_bn_layer(
+ input=input,
+ filter_size=3,
+ num_filters=int(num_filters1 * scale),
+ stride=stride,
+ padding=1,
+ num_groups=int(num_groups * scale),
+ use_cudnn=False,
+ name=name + "_dw")
+
+ pointwise_conv = self.conv_bn_layer(
+ input=depthwise_conv,
+ filter_size=1,
+ num_filters=int(num_filters2 * scale),
+ stride=1,
+ padding=0,
+ name=name + "_sep")
+ return pointwise_conv
diff --git a/demo/models/mobilenet_v2.py b/demo/models/mobilenet_v2.py
new file mode 100644
index 0000000000000000000000000000000000000000..ccfb250b79a5365d28470886624287fbc87be50c
--- /dev/null
+++ b/demo/models/mobilenet_v2.py
@@ -0,0 +1,259 @@
+#copyright (c) 2019 PaddlePaddle Authors. All Rights Reserve.
+#
+#Licensed under the Apache License, Version 2.0 (the "License");
+#you may not use this file except in compliance with the License.
+#You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+#Unless required by applicable law or agreed to in writing, software
+#distributed under the License is distributed on an "AS IS" BASIS,
+#WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+#See the License for the specific language governing permissions and
+#limitations under the License.
+
+from __future__ import absolute_import
+from __future__ import division
+from __future__ import print_function
+import paddle.fluid as fluid
+from paddle.fluid.initializer import MSRA
+from paddle.fluid.param_attr import ParamAttr
+
+__all__ = [
+ 'MobileNetV2', 'MobileNetV2_x0_25, '
+ 'MobileNetV2_x0_5', 'MobileNetV2_x1_0', 'MobileNetV2_x1_5',
+ 'MobileNetV2_x2_0', 'MobileNetV2_scale'
+]
+
+train_parameters = {
+ "input_size": [3, 224, 224],
+ "input_mean": [0.485, 0.456, 0.406],
+ "input_std": [0.229, 0.224, 0.225],
+ "learning_strategy": {
+ "name": "piecewise_decay",
+ "batch_size": 256,
+ "epochs": [30, 60, 90],
+ "steps": [0.1, 0.01, 0.001, 0.0001]
+ }
+}
+
+
+class MobileNetV2():
+ def __init__(self, scale=1.0, change_depth=False):
+ self.params = train_parameters
+ self.scale = scale
+ self.change_depth = change_depth
+
+ def net(self, input, class_dim=1000):
+ scale = self.scale
+ change_depth = self.change_depth
+ #if change_depth is True, the new depth is 1.4 times as deep as before.
+ bottleneck_params_list = [
+ (1, 16, 1, 1),
+ (6, 24, 2, 2),
+ (6, 32, 3, 2),
+ (6, 64, 4, 2),
+ (6, 96, 3, 1),
+ (6, 160, 3, 2),
+ (6, 320, 1, 1),
+ ] if change_depth == False else [
+ (1, 16, 1, 1),
+ (6, 24, 2, 2),
+ (6, 32, 5, 2),
+ (6, 64, 7, 2),
+ (6, 96, 5, 1),
+ (6, 160, 3, 2),
+ (6, 320, 1, 1),
+ ]
+
+ #conv1
+ input = self.conv_bn_layer(
+ input,
+ num_filters=int(32 * scale),
+ filter_size=3,
+ stride=2,
+ padding=1,
+ if_act=True,
+ name='conv1_1')
+
+ # bottleneck sequences
+ i = 1
+ in_c = int(32 * scale)
+ for layer_setting in bottleneck_params_list:
+ t, c, n, s = layer_setting
+ i += 1
+ input = self.invresi_blocks(
+ input=input,
+ in_c=in_c,
+ t=t,
+ c=int(c * scale),
+ n=n,
+ s=s,
+ name='conv' + str(i))
+ in_c = int(c * scale)
+ #last_conv
+ input = self.conv_bn_layer(
+ input=input,
+ num_filters=int(1280 * scale) if scale > 1.0 else 1280,
+ filter_size=1,
+ stride=1,
+ padding=0,
+ if_act=True,
+ name='conv9')
+
+ input = fluid.layers.pool2d(
+ input=input,
+ pool_size=7,
+ pool_stride=1,
+ pool_type='avg',
+ global_pooling=True)
+
+ output = fluid.layers.fc(input=input,
+ size=class_dim,
+ act='softmax',
+ param_attr=ParamAttr(name='fc10_weights'),
+ bias_attr=ParamAttr(name='fc10_offset'))
+ return output
+
+ def conv_bn_layer(self,
+ input,
+ filter_size,
+ num_filters,
+ stride,
+ padding,
+ channels=None,
+ num_groups=1,
+ if_act=True,
+ name=None,
+ use_cudnn=True):
+ conv = fluid.layers.conv2d(
+ input=input,
+ num_filters=num_filters,
+ filter_size=filter_size,
+ stride=stride,
+ padding=padding,
+ groups=num_groups,
+ act=None,
+ use_cudnn=use_cudnn,
+ param_attr=ParamAttr(name=name + '_weights'),
+ bias_attr=False)
+ bn_name = name + '_bn'
+ bn = fluid.layers.batch_norm(
+ input=conv,
+ param_attr=ParamAttr(name=bn_name + "_scale"),
+ bias_attr=ParamAttr(name=bn_name + "_offset"),
+ moving_mean_name=bn_name + '_mean',
+ moving_variance_name=bn_name + '_variance')
+ if if_act:
+ return fluid.layers.relu6(bn)
+ else:
+ return bn
+
+ def shortcut(self, input, data_residual):
+ return fluid.layers.elementwise_add(input, data_residual)
+
+ def inverted_residual_unit(self,
+ input,
+ num_in_filter,
+ num_filters,
+ ifshortcut,
+ stride,
+ filter_size,
+ padding,
+ expansion_factor,
+ name=None):
+ num_expfilter = int(round(num_in_filter * expansion_factor))
+
+ channel_expand = self.conv_bn_layer(
+ input=input,
+ num_filters=num_expfilter,
+ filter_size=1,
+ stride=1,
+ padding=0,
+ num_groups=1,
+ if_act=True,
+ name=name + '_expand')
+
+ bottleneck_conv = self.conv_bn_layer(
+ input=channel_expand,
+ num_filters=num_expfilter,
+ filter_size=filter_size,
+ stride=stride,
+ padding=padding,
+ num_groups=num_expfilter,
+ if_act=True,
+ name=name + '_dwise',
+ use_cudnn=False)
+
+ linear_out = self.conv_bn_layer(
+ input=bottleneck_conv,
+ num_filters=num_filters,
+ filter_size=1,
+ stride=1,
+ padding=0,
+ num_groups=1,
+ if_act=False,
+ name=name + '_linear')
+ if ifshortcut:
+ out = self.shortcut(input=input, data_residual=linear_out)
+ return out
+ else:
+ return linear_out
+
+ def invresi_blocks(self, input, in_c, t, c, n, s, name=None):
+ first_block = self.inverted_residual_unit(
+ input=input,
+ num_in_filter=in_c,
+ num_filters=c,
+ ifshortcut=False,
+ stride=s,
+ filter_size=3,
+ padding=1,
+ expansion_factor=t,
+ name=name + '_1')
+
+ last_residual_block = first_block
+ last_c = c
+
+ for i in range(1, n):
+ last_residual_block = self.inverted_residual_unit(
+ input=last_residual_block,
+ num_in_filter=last_c,
+ num_filters=c,
+ ifshortcut=True,
+ stride=1,
+ filter_size=3,
+ padding=1,
+ expansion_factor=t,
+ name=name + '_' + str(i + 1))
+ return last_residual_block
+
+
+def MobileNetV2_x0_25():
+ model = MobileNetV2(scale=0.25)
+ return model
+
+
+def MobileNetV2_x0_5():
+ model = MobileNetV2(scale=0.5)
+ return model
+
+
+def MobileNetV2_x1_0():
+ model = MobileNetV2(scale=1.0)
+ return model
+
+
+def MobileNetV2_x1_5():
+ model = MobileNetV2(scale=1.5)
+ return model
+
+
+def MobileNetV2_x2_0():
+ model = MobileNetV2(scale=2.0)
+ return model
+
+
+def MobileNetV2_scale():
+ model = MobileNetV2(scale=1.2, change_depth=True)
+ return model
diff --git a/demo/models/resnet.py b/demo/models/resnet.py
new file mode 100644
index 0000000000000000000000000000000000000000..4ceaef41ecc87d7388ae05d7fcb199de1841ebc2
--- /dev/null
+++ b/demo/models/resnet.py
@@ -0,0 +1,229 @@
+from __future__ import absolute_import
+from __future__ import division
+from __future__ import print_function
+import paddle
+import paddle.fluid as fluid
+import math
+from paddle.fluid.param_attr import ParamAttr
+
+__all__ = ["ResNet", "ResNet34", "ResNet50", "ResNet101", "ResNet152"]
+
+train_parameters = {
+ "input_size": [3, 224, 224],
+ "input_mean": [0.485, 0.456, 0.406],
+ "input_std": [0.229, 0.224, 0.225],
+ "learning_strategy": {
+ "name": "piecewise_decay",
+ "batch_size": 256,
+ "epochs": [10, 16, 30],
+ "steps": [0.1, 0.01, 0.001, 0.0001]
+ }
+}
+
+
+class ResNet():
+ def __init__(self, layers=50, prefix_name=''):
+ self.params = train_parameters
+ self.layers = layers
+ self.prefix_name = prefix_name
+
+ def net(self, input, class_dim=1000, conv1_name='conv1', fc_name=None):
+ layers = self.layers
+ prefix_name = self.prefix_name if self.prefix_name is '' else self.prefix_name + '_'
+ supported_layers = [34, 50, 101, 152]
+ assert layers in supported_layers, \
+ "supported layers are {} but input layer is {}".format(supported_layers, layers)
+
+ if layers == 34 or layers == 50:
+ depth = [3, 4, 6, 3]
+ elif layers == 101:
+ depth = [3, 4, 23, 3]
+ elif layers == 152:
+ depth = [3, 8, 36, 3]
+ num_filters = [64, 128, 256, 512]
+
+ # TODO(wanghaoshuang@baidu.com):
+ # fix name("conv1") conflict between student and teacher in distillation.
+ conv = self.conv_bn_layer(
+ input=input,
+ num_filters=64,
+ filter_size=7,
+ stride=2,
+ act='relu',
+ name=prefix_name + conv1_name)
+ conv = fluid.layers.pool2d(
+ input=conv,
+ pool_size=3,
+ pool_stride=2,
+ pool_padding=1,
+ pool_type='max')
+
+ if layers >= 50:
+ for block in range(len(depth)):
+ for i in range(depth[block]):
+ if layers in [101, 152] and block == 2:
+ if i == 0:
+ conv_name = "res" + str(block + 2) + "a"
+ else:
+ conv_name = "res" + str(block + 2) + "b" + str(i)
+ else:
+ conv_name = "res" + str(block + 2) + chr(97 + i)
+ conv_name = prefix_name + conv_name
+ conv = self.bottleneck_block(
+ input=conv,
+ num_filters=num_filters[block],
+ stride=2 if i == 0 and block != 0 else 1,
+ name=conv_name)
+
+ pool = fluid.layers.pool2d(
+ input=conv, pool_size=7, pool_type='avg', global_pooling=True)
+ stdv = 1.0 / math.sqrt(pool.shape[1] * 1.0)
+ fc_name = fc_name if fc_name is None else prefix_name + fc_name
+ out = fluid.layers.fc(input=pool,
+ size=class_dim,
+ act='softmax',
+ name=fc_name,
+ param_attr=fluid.param_attr.ParamAttr(
+ initializer=fluid.initializer.Uniform(
+ -stdv, stdv)))
+ else:
+ for block in range(len(depth)):
+ for i in range(depth[block]):
+ conv_name = "res" + str(block + 2) + chr(97 + i)
+ conv_name = prefix_name + conv_name
+ conv = self.basic_block(
+ input=conv,
+ num_filters=num_filters[block],
+ stride=2 if i == 0 and block != 0 else 1,
+ is_first=block == i == 0,
+ name=conv_name)
+
+ pool = fluid.layers.pool2d(
+ input=conv, pool_type='avg', global_pooling=True)
+ stdv = 1.0 / math.sqrt(pool.shape[1] * 1.0)
+ fc_name = fc_name if fc_name is None else prefix_name + fc_name
+ out = fluid.layers.fc(
+ input=pool,
+ size=class_dim,
+ act='softmax',
+ name=fc_name,
+ param_attr=fluid.param_attr.ParamAttr(
+ initializer=fluid.initializer.Uniform(-stdv, stdv)))
+
+ return out
+
+ def conv_bn_layer(self,
+ input,
+ num_filters,
+ filter_size,
+ stride=1,
+ groups=1,
+ act=None,
+ name=None):
+ conv = fluid.layers.conv2d(
+ input=input,
+ num_filters=num_filters,
+ filter_size=filter_size,
+ stride=stride,
+ padding=(filter_size - 1) // 2,
+ groups=groups,
+ act=None,
+ param_attr=ParamAttr(name=name + "_weights"),
+ bias_attr=False,
+ name=name + '.conv2d.output.1')
+ if self.prefix_name == '':
+ if name == "conv1":
+ bn_name = "bn_" + name
+ else:
+ bn_name = "bn" + name[3:]
+ else:
+ if name.split("_")[1] == "conv1":
+ bn_name = name.split("_", 1)[0] + "_bn_" + name.split("_",
+ 1)[1]
+ else:
+ bn_name = name.split("_", 1)[0] + "_bn" + name.split("_",
+ 1)[1][3:]
+ return fluid.layers.batch_norm(
+ input=conv,
+ act=act,
+ name=bn_name + '.output.1',
+ param_attr=ParamAttr(name=bn_name + '_scale'),
+ bias_attr=ParamAttr(bn_name + '_offset'),
+ moving_mean_name=bn_name + '_mean',
+ moving_variance_name=bn_name + '_variance', )
+
+ def shortcut(self, input, ch_out, stride, is_first, name):
+ ch_in = input.shape[1]
+ if ch_in != ch_out or stride != 1 or is_first == True:
+ return self.conv_bn_layer(input, ch_out, 1, stride, name=name)
+ else:
+ return input
+
+ def bottleneck_block(self, input, num_filters, stride, name):
+ conv0 = self.conv_bn_layer(
+ input=input,
+ num_filters=num_filters,
+ filter_size=1,
+ act='relu',
+ name=name + "_branch2a")
+ conv1 = self.conv_bn_layer(
+ input=conv0,
+ num_filters=num_filters,
+ filter_size=3,
+ stride=stride,
+ act='relu',
+ name=name + "_branch2b")
+ conv2 = self.conv_bn_layer(
+ input=conv1,
+ num_filters=num_filters * 4,
+ filter_size=1,
+ act=None,
+ name=name + "_branch2c")
+
+ short = self.shortcut(
+ input,
+ num_filters * 4,
+ stride,
+ is_first=False,
+ name=name + "_branch1")
+
+ return fluid.layers.elementwise_add(
+ x=short, y=conv2, act='relu', name=name + ".add.output.5")
+
+ def basic_block(self, input, num_filters, stride, is_first, name):
+ conv0 = self.conv_bn_layer(
+ input=input,
+ num_filters=num_filters,
+ filter_size=3,
+ act='relu',
+ stride=stride,
+ name=name + "_branch2a")
+ conv1 = self.conv_bn_layer(
+ input=conv0,
+ num_filters=num_filters,
+ filter_size=3,
+ act=None,
+ name=name + "_branch2b")
+ short = self.shortcut(
+ input, num_filters, stride, is_first, name=name + "_branch1")
+ return fluid.layers.elementwise_add(x=short, y=conv1, act='relu')
+
+
+def ResNet34(prefix_name=''):
+ model = ResNet(layers=34, prefix_name=prefix_name)
+ return model
+
+
+def ResNet50(prefix_name=''):
+ model = ResNet(layers=50, prefix_name=prefix_name)
+ return model
+
+
+def ResNet101():
+ model = ResNet(layers=101)
+ return model
+
+
+def ResNet152():
+ model = ResNet(layers=152)
+ return model
diff --git a/demo/block_sa_nas_mobilenetv2_cifar10.py b/demo/nas/block_sa_nas_mobilenetv2_cifar10.py
similarity index 100%
rename from demo/block_sa_nas_mobilenetv2_cifar10.py
rename to demo/nas/block_sa_nas_mobilenetv2_cifar10.py
diff --git a/demo/nas/sa_nas_mobilenetv2.py b/demo/nas/sa_nas_mobilenetv2.py
new file mode 100644
index 0000000000000000000000000000000000000000..142c2c08f09e7888ab255b1d6ce762a50c8e1966
--- /dev/null
+++ b/demo/nas/sa_nas_mobilenetv2.py
@@ -0,0 +1,276 @@
+import sys
+sys.path.append('..')
+import numpy as np
+import argparse
+import ast
+import time
+import argparse
+import ast
+import logging
+import paddle
+import paddle.fluid as fluid
+from paddleslim.nas.search_space.search_space_factory import SearchSpaceFactory
+from paddleslim.analysis import flops
+from paddleslim.nas import SANAS
+from paddleslim.common import get_logger
+from optimizer import create_optimizer
+import imagenet_reader
+
+_logger = get_logger(__name__, level=logging.INFO)
+
+
+def create_data_loader(image_shape):
+ data_shape = [-1] + image_shape
+ data = fluid.data(name='data', shape=data_shape, dtype='float32')
+ label = fluid.data(name='label', shape=[-1, 1], dtype='int64')
+ data_loader = fluid.io.DataLoader.from_generator(
+ feed_list=[data, label],
+ capacity=1024,
+ use_double_buffer=True,
+ iterable=True)
+ return data_loader, data, label
+
+
+def build_program(main_program,
+ startup_program,
+ image_shape,
+ archs,
+ args,
+ is_test=False):
+ with fluid.program_guard(main_program, startup_program):
+ data_loader, data, label = create_data_loader(image_shape)
+ output = archs(data)
+
+ softmax_out = fluid.layers.softmax(input=output, use_cudnn=False)
+ cost = fluid.layers.cross_entropy(input=softmax_out, label=label)
+ avg_cost = fluid.layers.mean(cost)
+ acc_top1 = fluid.layers.accuracy(input=softmax_out, label=label, k=1)
+ acc_top5 = fluid.layers.accuracy(input=softmax_out, label=label, k=5)
+
+ if is_test == False:
+ optimizer = create_optimizer(args)
+ optimizer.minimize(avg_cost)
+ return data_loader, avg_cost, acc_top1, acc_top5
+
+
+def search_mobilenetv2(config, args, image_size, is_server=True):
+ factory = SearchSpaceFactory()
+ space = factory.get_search_space(config)
+ if is_server:
+ ### start a server and a client
+ sa_nas = SANAS(
+ config,
+ server_addr=("", 8883),
+ init_temperature=args.init_temperature,
+ reduce_rate=args.reduce_rate,
+ search_steps=args.search_steps,
+ is_server=True)
+ else:
+ ### start a client
+ sa_nas = SANAS(
+ config,
+ server_addr=("10.255.125.38", 8883),
+ init_temperature=args.init_temperature,
+ reduce_rate=args.reduce_rate,
+ search_steps=args.search_steps,
+ is_server=False)
+
+ image_shape = [3, image_size, image_size]
+ for step in range(args.search_steps):
+ archs = sa_nas.next_archs()[0]
+
+ train_program = fluid.Program()
+ test_program = fluid.Program()
+ startup_program = fluid.Program()
+ train_loader, avg_cost, acc_top1, acc_top5 = build_program(
+ train_program, startup_program, image_shape, archs, args)
+
+ current_flops = flops(train_program)
+ print('step: {}, current_flops: {}'.format(step, current_flops))
+ if current_flops > args.max_flops:
+ continue
+
+ test_loader, test_avg_cost, test_acc_top1, test_acc_top5 = build_program(
+ test_program,
+ startup_program,
+ image_shape,
+ archs,
+ args,
+ is_test=True)
+ test_program = test_program.clone(for_test=True)
+
+ place = fluid.CUDAPlace(0) if args.use_gpu else fluid.CPUPlace()
+ exe = fluid.Executor(place)
+ exe.run(startup_program)
+
+ if args.data == 'cifar10':
+ train_reader = paddle.batch(
+ paddle.reader.shuffle(
+ paddle.dataset.cifar.train10(cycle=False), buf_size=1024),
+ batch_size=args.batch_size,
+ drop_last=True)
+
+ test_reader = paddle.batch(
+ paddle.dataset.cifar.test10(cycle=False),
+ batch_size=args.batch_size,
+ drop_last=False)
+ elif args.data == 'imagenet':
+ train_reader = paddle.batch(
+ imagenet_reader.train(),
+ batch_size=args.batch_size,
+ drop_last=True)
+ test_reader = paddle.batch(
+ imagenet_reader.val(),
+ batch_size=args.batch_size,
+ drop_last=False)
+
+ #test_loader, _, _ = create_data_loader(image_shape)
+ train_loader.set_sample_list_generator(
+ train_reader,
+ places=fluid.cuda_places() if args.use_gpu else fluid.cpu_places())
+ test_loader.set_sample_list_generator(test_reader, places=place)
+
+ build_strategy = fluid.BuildStrategy()
+ train_compiled_program = fluid.CompiledProgram(
+ train_program).with_data_parallel(
+ loss_name=avg_cost.name, build_strategy=build_strategy)
+ for epoch_id in range(args.retain_epoch):
+ for batch_id, data in enumerate(train_loader()):
+ fetches = [avg_cost.name]
+ s_time = time.time()
+ outs = exe.run(train_compiled_program,
+ feed=data,
+ fetch_list=fetches)[0]
+ batch_time = time.time() - s_time
+ if batch_id % 10 == 0:
+ _logger.info(
+ 'TRAIN: steps: {}, epoch: {}, batch: {}, cost: {}, batch_time: {}ms'.
+ format(step, epoch_id, batch_id, outs[0], batch_time))
+
+ reward = []
+ for batch_id, data in enumerate(test_loader()):
+ test_fetches = [
+ test_avg_cost.name, test_acc_top1.name, test_acc_top5.name
+ ]
+ batch_reward = exe.run(test_program,
+ feed=data,
+ fetch_list=test_fetches)
+ reward_avg = np.mean(np.array(batch_reward), axis=1)
+ reward.append(reward_avg)
+
+ _logger.info(
+ 'TEST: step: {}, batch: {}, avg_cost: {}, acc_top1: {}, acc_top5: {}'.
+ format(step, batch_id, batch_reward[0], batch_reward[1],
+ batch_reward[2]))
+
+ finally_reward = np.mean(np.array(reward), axis=0)
+ _logger.info(
+ 'FINAL TEST: avg_cost: {}, acc_top1: {}, acc_top5: {}'.format(
+ finally_reward[0], finally_reward[1], finally_reward[2]))
+
+ sa_nas.reward(float(finally_reward[1]))
+
+
+if __name__ == '__main__':
+
+ parser = argparse.ArgumentParser(
+ description='SA NAS MobileNetV2 cifar10 argparase')
+ parser.add_argument(
+ '--use_gpu',
+ type=ast.literal_eval,
+ default=True,
+ help='Whether to use GPU in train/test model.')
+ parser.add_argument(
+ '--batch_size', type=int, default=256, help='batch size.')
+ parser.add_argument(
+ '--data',
+ type=str,
+ default='cifar10',
+ choices=['cifar10', 'imagenet'],
+ help='server address.')
+ # controller
+ parser.add_argument(
+ '--reduce_rate', type=float, default=0.85, help='reduce rate.')
+ parser.add_argument(
+ '--init_temperature',
+ type=float,
+ default=10.24,
+ help='init temperature.')
+ parser.add_argument(
+ '--is_server',
+ type=ast.literal_eval,
+ default=True,
+ help='Whether to start a server.')
+ # nas args
+ parser.add_argument(
+ '--max_flops', type=int, default=592948064, help='reduce rate.')
+ parser.add_argument(
+ '--retain_epoch', type=int, default=5, help='train epoch before val.')
+ parser.add_argument(
+ '--end_epoch', type=int, default=500, help='end epoch present client.')
+ parser.add_argument(
+ '--search_steps',
+ type=int,
+ default=100,
+ help='controller server number.')
+ parser.add_argument(
+ '--server_address', type=str, default=None, help='server address.')
+ # optimizer args
+ parser.add_argument(
+ '--lr_strategy',
+ type=str,
+ default='piecewise_decay',
+ help='learning rate decay strategy.')
+ parser.add_argument('--lr', type=float, default=0.1, help='learning rate.')
+ parser.add_argument(
+ '--l2_decay', type=float, default=1e-4, help='learning rate decay.')
+ parser.add_argument(
+ '--step_epochs',
+ nargs='+',
+ type=int,
+ default=[30, 60, 90],
+ help="piecewise decay step")
+ parser.add_argument(
+ '--momentum_rate',
+ type=float,
+ default=0.9,
+ help='learning rate decay.')
+ parser.add_argument(
+ '--warm_up_epochs',
+ type=float,
+ default=5.0,
+ help='learning rate decay.')
+ parser.add_argument(
+ '--num_epochs', type=int, default=120, help='learning rate decay.')
+ parser.add_argument(
+ '--decay_epochs', type=float, default=2.4, help='learning rate decay.')
+ parser.add_argument(
+ '--decay_rate', type=float, default=0.97, help='learning rate decay.')
+ parser.add_argument(
+ '--total_images',
+ type=int,
+ default=1281167,
+ help='learning rate decay.')
+ args = parser.parse_args()
+ print(args)
+
+ if args.data == 'cifar10':
+ image_size = 32
+ block_num = 3
+ elif args.data == 'imagenet':
+ image_size = 224
+ block_num = 6
+ else:
+ raise NotImplemented(
+ 'data must in [cifar10, imagenet], but received: {}'.format(
+ args.data))
+
+ config_info = {
+ 'input_size': image_size,
+ 'output_size': 1,
+ 'block_num': block_num,
+ 'block_mask': None
+ }
+ config = [('MobileNetV2Space', config_info)]
+
+ search_mobilenetv2(config, args, image_size, is_server=args.is_server)
diff --git a/demo/optimizer.py b/demo/optimizer.py
new file mode 100644
index 0000000000000000000000000000000000000000..73f441f897d22c10d2d6e05afaa7491b227b27d4
--- /dev/null
+++ b/demo/optimizer.py
@@ -0,0 +1,300 @@
+#copyright (c) 2019 PaddlePaddle Authors. All Rights Reserve.
+#
+#Licensed under the Apache License, Version 2.0 (the "License");
+#you may not use this file except in compliance with the License.
+#You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+#Unless required by applicable law or agreed to in writing, software
+#distributed under the License is distributed on an "AS IS" BASIS,
+#WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+#See the License for the specific language governing permissions and
+#limitations under the License.
+
+from __future__ import absolute_import
+from __future__ import division
+from __future__ import print_function
+
+import math
+
+import paddle.fluid as fluid
+import paddle.fluid.layers.ops as ops
+from paddle.fluid.initializer import init_on_cpu
+from paddle.fluid.layers.learning_rate_scheduler import _decay_step_counter
+
+
+def cosine_decay(learning_rate, step_each_epoch, epochs=120):
+ """Applies cosine decay to the learning rate.
+ lr = 0.05 * (math.cos(epoch * (math.pi / 120)) + 1)
+ """
+ global_step = _decay_step_counter()
+
+ with init_on_cpu():
+ epoch = ops.floor(global_step / step_each_epoch)
+ decayed_lr = learning_rate * \
+ (ops.cos(epoch * (math.pi / epochs)) + 1)/2
+ return decayed_lr
+
+
+def cosine_decay_with_warmup(learning_rate, step_each_epoch, epochs=120):
+ """Applies cosine decay to the learning rate.
+ lr = 0.05 * (math.cos(epoch * (math.pi / 120)) + 1)
+ decrease lr for every mini-batch and start with warmup.
+ """
+ global_step = _decay_step_counter()
+ lr = fluid.layers.tensor.create_global_var(
+ shape=[1],
+ value=0.0,
+ dtype='float32',
+ persistable=True,
+ name="learning_rate")
+
+ warmup_epoch = fluid.layers.fill_constant(
+ shape=[1], dtype='float32', value=float(5), force_cpu=True)
+
+ with init_on_cpu():
+ epoch = ops.floor(global_step / step_each_epoch)
+ with fluid.layers.control_flow.Switch() as switch:
+ with switch.case(epoch < warmup_epoch):
+ decayed_lr = learning_rate * (global_step /
+ (step_each_epoch * warmup_epoch))
+ fluid.layers.tensor.assign(input=decayed_lr, output=lr)
+ with switch.default():
+ decayed_lr = learning_rate * \
+ (ops.cos((global_step - warmup_epoch * step_each_epoch) * (math.pi / (epochs * step_each_epoch))) + 1)/2
+ fluid.layers.tensor.assign(input=decayed_lr, output=lr)
+ return lr
+
+
+def exponential_decay_with_warmup(learning_rate,
+ step_each_epoch,
+ decay_epochs,
+ decay_rate=0.97,
+ warm_up_epoch=5.0):
+ """Applies exponential decay to the learning rate.
+ """
+ global_step = _decay_step_counter()
+ lr = fluid.layers.tensor.create_global_var(
+ shape=[1],
+ value=0.0,
+ dtype='float32',
+ persistable=True,
+ name="learning_rate")
+
+ warmup_epoch = fluid.layers.fill_constant(
+ shape=[1], dtype='float32', value=float(warm_up_epoch), force_cpu=True)
+
+ with init_on_cpu():
+ epoch = ops.floor(global_step / step_each_epoch)
+ with fluid.layers.control_flow.Switch() as switch:
+ with switch.case(epoch < warmup_epoch):
+ decayed_lr = learning_rate * (global_step /
+ (step_each_epoch * warmup_epoch))
+ fluid.layers.assign(input=decayed_lr, output=lr)
+ with switch.default():
+ div_res = (global_step - warmup_epoch * step_each_epoch
+ ) / decay_epochs
+ div_res = ops.floor(div_res)
+ decayed_lr = learning_rate * (decay_rate**div_res)
+ fluid.layers.assign(input=decayed_lr, output=lr)
+
+ return lr
+
+
+def lr_warmup(learning_rate, warmup_steps, start_lr, end_lr):
+ """ Applies linear learning rate warmup for distributed training
+ Argument learning_rate can be float or a Variable
+ lr = lr + (warmup_rate * step / warmup_steps)
+ """
+ assert (isinstance(end_lr, float))
+ assert (isinstance(start_lr, float))
+ linear_step = end_lr - start_lr
+ with fluid.default_main_program()._lr_schedule_guard():
+ lr = fluid.layers.tensor.create_global_var(
+ shape=[1],
+ value=0.0,
+ dtype='float32',
+ persistable=True,
+ name="learning_rate_warmup")
+
+ global_step = fluid.layers.learning_rate_scheduler._decay_step_counter(
+ )
+
+ with fluid.layers.control_flow.Switch() as switch:
+ with switch.case(global_step < warmup_steps):
+ decayed_lr = start_lr + linear_step * (global_step /
+ warmup_steps)
+ fluid.layers.tensor.assign(decayed_lr, lr)
+ with switch.default():
+ fluid.layers.tensor.assign(learning_rate, lr)
+
+ return lr
+
+
+class Optimizer(object):
+ """A class used to represent several optimizer methods
+
+ Attributes:
+ batch_size: batch size on all devices.
+ lr: learning rate.
+ lr_strategy: learning rate decay strategy.
+ l2_decay: l2_decay parameter.
+ momentum_rate: momentum rate when using Momentum optimizer.
+ step_epochs: piecewise decay steps.
+ num_epochs: number of total epochs.
+
+ total_images: total images.
+ step: total steps in the an epoch.
+
+ """
+
+ def __init__(self, args):
+ self.batch_size = args.batch_size
+ self.lr = args.lr
+ self.lr_strategy = args.lr_strategy
+ self.l2_decay = args.l2_decay
+ self.momentum_rate = args.momentum_rate
+ self.step_epochs = args.step_epochs
+ self.num_epochs = args.num_epochs
+ self.warm_up_epochs = args.warm_up_epochs
+ self.decay_epochs = args.decay_epochs
+ self.decay_rate = args.decay_rate
+ self.total_images = args.total_images
+
+ self.step = int(math.ceil(float(self.total_images) / self.batch_size))
+
+ def piecewise_decay(self):
+ """piecewise decay with Momentum optimizer
+
+ Returns:
+ a piecewise_decay optimizer
+ """
+ bd = [self.step * e for e in self.step_epochs]
+ lr = [self.lr * (0.1**i) for i in range(len(bd) + 1)]
+ learning_rate = fluid.layers.piecewise_decay(boundaries=bd, values=lr)
+ optimizer = fluid.optimizer.Momentum(
+ learning_rate=learning_rate,
+ momentum=self.momentum_rate,
+ regularization=fluid.regularizer.L2Decay(self.l2_decay))
+ return optimizer
+
+ def cosine_decay(self):
+ """cosine decay with Momentum optimizer
+
+ Returns:
+ a cosine_decay optimizer
+ """
+
+ learning_rate = fluid.layers.cosine_decay(
+ learning_rate=self.lr,
+ step_each_epoch=self.step,
+ epochs=self.num_epochs)
+ optimizer = fluid.optimizer.Momentum(
+ learning_rate=learning_rate,
+ momentum=self.momentum_rate,
+ regularization=fluid.regularizer.L2Decay(self.l2_decay))
+ return optimizer
+
+ def cosine_decay_warmup(self):
+ """cosine decay with warmup
+
+ Returns:
+ a cosine_decay_with_warmup optimizer
+ """
+
+ learning_rate = cosine_decay_with_warmup(
+ learning_rate=self.lr,
+ step_each_epoch=self.step,
+ epochs=self.num_epochs)
+ optimizer = fluid.optimizer.Momentum(
+ learning_rate=learning_rate,
+ momentum=self.momentum_rate,
+ regularization=fluid.regularizer.L2Decay(self.l2_decay))
+ return optimizer
+
+ def exponential_decay_warmup(self):
+ """exponential decay with warmup
+
+ Returns:
+ a exponential_decay_with_warmup optimizer
+ """
+
+ learning_rate = exponential_decay_with_warmup(
+ learning_rate=self.lr,
+ step_each_epoch=self.step,
+ decay_epochs=self.step * self.decay_epochs,
+ decay_rate=self.decay_rate,
+ warm_up_epoch=self.warm_up_epochs)
+ optimizer = fluid.optimizer.RMSProp(
+ learning_rate=learning_rate,
+ regularization=fluid.regularizer.L2Decay(self.l2_decay),
+ momentum=self.momentum_rate,
+ rho=0.9,
+ epsilon=0.001)
+ return optimizer
+
+ def linear_decay(self):
+ """linear decay with Momentum optimizer
+
+ Returns:
+ a linear_decay optimizer
+ """
+
+ end_lr = 0
+ learning_rate = fluid.layers.polynomial_decay(
+ self.lr, self.step, end_lr, power=1)
+ optimizer = fluid.optimizer.Momentum(
+ learning_rate=learning_rate,
+ momentum=self.momentum_rate,
+ regularization=fluid.regularizer.L2Decay(self.l2_decay))
+
+ return optimizer
+
+ def adam_decay(self):
+ """Adam optimizer
+
+ Returns:
+ an adam_decay optimizer
+ """
+
+ return fluid.optimizer.Adam(learning_rate=self.lr)
+
+ def cosine_decay_RMSProp(self):
+ """cosine decay with RMSProp optimizer
+
+ Returns:
+ an cosine_decay_RMSProp optimizer
+ """
+
+ learning_rate = fluid.layers.cosine_decay(
+ learning_rate=self.lr,
+ step_each_epoch=self.step,
+ epochs=self.num_epochs)
+ optimizer = fluid.optimizer.RMSProp(
+ learning_rate=learning_rate,
+ momentum=self.momentum_rate,
+ regularization=fluid.regularizer.L2Decay(self.l2_decay),
+ # Apply epsilon=1 on ImageNet dataset.
+ epsilon=1)
+ return optimizer
+
+ def default_decay(self):
+ """default decay
+
+ Returns:
+ default decay optimizer
+ """
+
+ optimizer = fluid.optimizer.Momentum(
+ learning_rate=self.lr,
+ momentum=self.momentum_rate,
+ regularization=fluid.regularizer.L2Decay(self.l2_decay))
+ return optimizer
+
+
+def create_optimizer(args):
+ Opt = Optimizer(args)
+ optimizer = getattr(Opt, args.lr_strategy)()
+
+ return optimizer
diff --git a/demo/prune/train.py b/demo/prune/train.py
new file mode 100644
index 0000000000000000000000000000000000000000..f7d7f5cd854848e097c625b37d9c73f79d2aa662
--- /dev/null
+++ b/demo/prune/train.py
@@ -0,0 +1,216 @@
+import os
+import sys
+import logging
+import paddle
+import argparse
+import functools
+import math
+import time
+import numpy as np
+import paddle.fluid as fluid
+from paddleslim.prune import Pruner
+from paddleslim.common import get_logger
+from paddleslim.analysis import flops
+sys.path.append(sys.path[0] + "/../")
+import models
+from utility import add_arguments, print_arguments
+
+_logger = get_logger(__name__, level=logging.INFO)
+
+parser = argparse.ArgumentParser(description=__doc__)
+add_arg = functools.partial(add_arguments, argparser=parser)
+# yapf: disable
+add_arg('batch_size', int, 64 * 4, "Minibatch size.")
+add_arg('use_gpu', bool, True, "Whether to use GPU or not.")
+add_arg('model', str, "MobileNet", "The target model.")
+add_arg('pretrained_model', str, "../pretrained_model/MobileNetV1_pretained", "Whether to use pretrained model.")
+add_arg('lr', float, 0.1, "The learning rate used to fine-tune pruned model.")
+add_arg('lr_strategy', str, "piecewise_decay", "The learning rate decay strategy.")
+add_arg('l2_decay', float, 3e-5, "The l2_decay parameter.")
+add_arg('momentum_rate', float, 0.9, "The value of momentum_rate.")
+add_arg('num_epochs', int, 120, "The number of total epochs.")
+add_arg('total_images', int, 1281167, "The number of total training images.")
+parser.add_argument('--step_epochs', nargs='+', type=int, default=[30, 60, 90], help="piecewise decay step")
+add_arg('config_file', str, None, "The config file for compression with yaml format.")
+add_arg('data', str, "mnist", "Which data to use. 'mnist' or 'imagenet'")
+add_arg('log_period', int, 10, "Log period in batches.")
+add_arg('test_period', int, 10, "Test period in epoches.")
+# yapf: enable
+
+model_list = [m for m in dir(models) if "__" not in m]
+
+
+def piecewise_decay(args):
+ step = int(math.ceil(float(args.total_images) / args.batch_size))
+ bd = [step * e for e in args.step_epochs]
+ lr = [args.lr * (0.1**i) for i in range(len(bd) + 1)]
+ learning_rate = fluid.layers.piecewise_decay(boundaries=bd, values=lr)
+ optimizer = fluid.optimizer.Momentum(
+ learning_rate=learning_rate,
+ momentum=args.momentum_rate,
+ regularization=fluid.regularizer.L2Decay(args.l2_decay))
+ return optimizer
+
+
+def cosine_decay(args):
+ step = int(math.ceil(float(args.total_images) / args.batch_size))
+ learning_rate = fluid.layers.cosine_decay(
+ learning_rate=args.lr, step_each_epoch=step, epochs=args.num_epochs)
+ optimizer = fluid.optimizer.Momentum(
+ learning_rate=learning_rate,
+ momentum=args.momentum_rate,
+ regularization=fluid.regularizer.L2Decay(args.l2_decay))
+ return optimizer
+
+
+def create_optimizer(args):
+ if args.lr_strategy == "piecewise_decay":
+ return piecewise_decay(args)
+ elif args.lr_strategy == "cosine_decay":
+ return cosine_decay(args)
+
+
+def compress(args):
+ train_reader = None
+ test_reader = None
+ if args.data == "mnist":
+ import paddle.dataset.mnist as reader
+ train_reader = reader.train()
+ val_reader = reader.test()
+ class_dim = 10
+ image_shape = "1,28,28"
+ elif args.data == "imagenet":
+ import imagenet_reader as reader
+ train_reader = reader.train()
+ val_reader = reader.val()
+ class_dim = 1000
+ image_shape = "3,224,224"
+ else:
+ raise ValueError("{} is not supported.".format(args.data))
+ image_shape = [int(m) for m in image_shape.split(",")]
+ assert args.model in model_list, "{} is not in lists: {}".format(
+ args.model, model_list)
+ image = fluid.layers.data(name='image', shape=image_shape, dtype='float32')
+ label = fluid.layers.data(name='label', shape=[1], dtype='int64')
+ # model definition
+ model = models.__dict__[args.model]()
+ out = model.net(input=image, class_dim=class_dim)
+ cost = fluid.layers.cross_entropy(input=out, label=label)
+ avg_cost = fluid.layers.mean(x=cost)
+ acc_top1 = fluid.layers.accuracy(input=out, label=label, k=1)
+ acc_top5 = fluid.layers.accuracy(input=out, label=label, k=5)
+ val_program = fluid.default_main_program().clone(for_test=True)
+ opt = create_optimizer(args)
+ opt.minimize(avg_cost)
+ place = fluid.CUDAPlace(0) if args.use_gpu else fluid.CPUPlace()
+ exe = fluid.Executor(place)
+ exe.run(fluid.default_startup_program())
+
+ if args.pretrained_model:
+
+ def if_exist(var):
+ return os.path.exists(
+ os.path.join(args.pretrained_model, var.name))
+
+ fluid.io.load_vars(exe, args.pretrained_model, predicate=if_exist)
+
+ val_reader = paddle.batch(val_reader, batch_size=args.batch_size)
+ train_reader = paddle.batch(
+ train_reader, batch_size=args.batch_size, drop_last=True)
+
+ train_feeder = feeder = fluid.DataFeeder([image, label], place)
+ val_feeder = feeder = fluid.DataFeeder(
+ [image, label], place, program=val_program)
+
+ def test(epoch, program):
+ batch_id = 0
+ acc_top1_ns = []
+ acc_top5_ns = []
+ for data in val_reader():
+ start_time = time.time()
+ acc_top1_n, acc_top5_n = exe.run(
+ program,
+ feed=train_feeder.feed(data),
+ fetch_list=[acc_top1.name, acc_top5.name])
+ end_time = time.time()
+ if batch_id % args.log_period == 0:
+ _logger.info(
+ "Eval epoch[{}] batch[{}] - acc_top1: {}; acc_top5: {}; time: {}".
+ format(epoch, batch_id,
+ np.mean(acc_top1_n),
+ np.mean(acc_top5_n), end_time - start_time))
+ acc_top1_ns.append(np.mean(acc_top1_n))
+ acc_top5_ns.append(np.mean(acc_top5_n))
+ batch_id += 1
+
+ _logger.info("Final eval epoch[{}] - acc_top1: {}; acc_top5: {}".
+ format(epoch,
+ np.mean(np.array(acc_top1_ns)),
+ np.mean(np.array(acc_top5_ns))))
+
+ def train(epoch, program):
+
+ build_strategy = fluid.BuildStrategy()
+ exec_strategy = fluid.ExecutionStrategy()
+ train_program = fluid.compiler.CompiledProgram(
+ program).with_data_parallel(
+ loss_name=avg_cost.name,
+ build_strategy=build_strategy,
+ exec_strategy=exec_strategy)
+
+ batch_id = 0
+ for data in train_reader():
+ start_time = time.time()
+ loss_n, acc_top1_n, acc_top5_n = exe.run(
+ train_program,
+ feed=train_feeder.feed(data),
+ fetch_list=[avg_cost.name, acc_top1.name, acc_top5.name])
+ end_time = time.time()
+ loss_n = np.mean(loss_n)
+ acc_top1_n = np.mean(acc_top1_n)
+ acc_top5_n = np.mean(acc_top5_n)
+ if batch_id % args.log_period == 0:
+ _logger.info(
+ "epoch[{}]-batch[{}] - loss: {}; acc_top1: {}; acc_top5: {}; time: {}".
+ format(epoch, batch_id, loss_n, acc_top1_n, acc_top5_n,
+ end_time - start_time))
+ batch_id += 1
+
+ params = []
+ for param in fluid.default_main_program().global_block().all_parameters():
+ if "_sep_weights" in param.name:
+ params.append(param.name)
+ _logger.info("fops before pruning: {}".format(
+ flops(fluid.default_main_program())))
+ pruner = Pruner()
+ pruned_val_program = pruner.prune(
+ val_program,
+ fluid.global_scope(),
+ params=params,
+ ratios=[0.33] * len(params),
+ place=place,
+ only_graph=True)
+
+ pruned_program = pruner.prune(
+ fluid.default_main_program(),
+ fluid.global_scope(),
+ params=params,
+ ratios=[0.33] * len(params),
+ place=place)
+
+ _logger.info("fops after pruning: {}".format(flops(pruned_program)))
+
+ for i in range(args.num_epochs):
+ train(i, pruned_program)
+ if i % args.test_period == 0:
+ test(i, pruned_val_program)
+
+
+def main():
+ args = parser.parse_args()
+ print_arguments(args)
+ compress(args)
+
+
+if __name__ == '__main__':
+ main()
diff --git a/demo/quant/quant_embedding/README.md b/demo/quant/quant_embedding/README.md
new file mode 100755
index 0000000000000000000000000000000000000000..5667b19a7f27062dc508a68569ae9fb86d178b45
--- /dev/null
+++ b/demo/quant/quant_embedding/README.md
@@ -0,0 +1,240 @@
+# Embedding量化示例
+
+本示例介绍如何使用Embedding量化的接口 [paddleslim.quant.quant_embedding]() 。``quant_embedding``接口将网络中的Embedding参数从``float32``类型量化到 ``8-bit``整数类型,在几乎不损失模型精度的情况下减少模型的存储空间和显存占用。
+
+接口如下:
+```
+quant_embedding(program, place, config, scope=None)
+```
+
+参数介绍:
+
+- program(fluid.Program) : 需要量化的program
+- scope(fluid.Scope, optional) : 用来获取和写入``Variable``, 如果设置为``None``,则使用``fluid.global_scope()``.
+- place(fluid.CPUPlace or fluid.CUDAPlace): 运行program的设备
+- config(dict) : 定义量化的配置。可以配置的参数有:
+ - ``'params_name'`` (str, required): 需要进行量化的参数名称,此参数必须设置。
+ - ``'quantize_type'`` (str, optional): 量化的类型,目前支持的类型是``'abs_max'``, 待支持的类型有 ``'log', 'product_quantization'``。 默认值是``'abs_max'``.
+ - ``'quantize_bits'``(int, optional): 量化的``bit``数,目前支持的``bit``数为8。默认值是8.
+ - ``'dtype'``(str, optional): 量化之后的数据类型, 目前支持的是``'int8'``. 默认值是``int8``。
+ - ``'threshold'``(float, optional): 量化之前将根据此阈值对需要量化的参数值进行``clip``. 如果不设置,则跳过``clip``过程直接量化。
+
+该接口对program的修改:
+
+量化前:
+
+
+
+图1:量化前的模型结构
+
+
+量化后:
+
+
+
+图2: 量化后的模型结构
+
+
+以下将以 ``基于skip-gram的word2vector模型`` 为例来说明如何使用``quant_embedding``接口。首先介绍 ``基于skip-gram的word2vector模型`` 的正常训练和测试流程。
+
+## 基于skip-gram的word2vector模型
+
+以下是本例的简要目录结构及说明:
+
+```text
+.
+├── cluster_train.py # 分布式训练函数
+├── cluster_train.sh # 本地模拟多机脚本
+├── train.py # 训练函数
+├── infer.py # 预测脚本
+├── net.py # 网络结构
+├── preprocess.py # 预处理脚本,包括构建词典和预处理文本
+├── reader.py # 训练阶段的文本读写
+├── train.py # 训练函数
+└── utils.py # 通用函数
+
+```
+
+### 介绍
+本例实现了skip-gram模式的word2vector模型。
+
+同时推荐用户参考[ IPython Notebook demo](https://aistudio.baidu.com/aistudio/projectDetail/124377)
+
+### 数据下载
+全量数据集使用的是来自1 Billion Word Language Model Benchmark的(http://www.statmt.org/lm-benchmark) 的数据集.
+
+```bash
+mkdir data
+wget http://www.statmt.org/lm-benchmark/1-billion-word-language-modeling-benchmark-r13output.tar.gz
+tar xzvf 1-billion-word-language-modeling-benchmark-r13output.tar.gz
+mv 1-billion-word-language-modeling-benchmark-r13output/training-monolingual.tokenized.shuffled/ data/
+```
+
+备用数据地址下载命令如下
+
+```bash
+mkdir data
+wget https://paddlerec.bj.bcebos.com/word2vec/1-billion-word-language-modeling-benchmark-r13output.tar
+tar xvf 1-billion-word-language-modeling-benchmark-r13output.tar
+mv 1-billion-word-language-modeling-benchmark-r13output/training-monolingual.tokenized.shuffled/ data/
+```
+
+为了方便快速验证,我们也提供了经典的text8样例数据集,包含1700w个词。 下载命令如下
+
+```bash
+mkdir data
+wget https://paddlerec.bj.bcebos.com/word2vec/text.tar
+tar xvf text.tar
+mv text data/
+```
+
+
+### 数据预处理
+以样例数据集为例进行预处理。全量数据集注意解压后以training-monolingual.tokenized.shuffled 目录为预处理目录,和样例数据集的text目录并列。
+
+词典格式: 词<空格>词频。注意低频词用'UNK'表示
+
+可以按格式自建词典,如果自建词典跳过第一步。
+```
+the 1061396
+of 593677
+and 416629
+one 411764
+in 372201
+a 325873
+ 324608
+to 316376
+zero 264975
+nine 250430
+```
+
+第一步根据英文语料生成词典,中文语料可以通过修改text_strip方法自定义处理方法。
+
+```bash
+python preprocess.py --build_dict --build_dict_corpus_dir data/text/ --dict_path data/test_build_dict
+```
+
+第二步根据词典将文本转成id, 同时进行downsample,按照概率过滤常见词, 同时生成word和id映射的文件,文件名为词典+"_word_to_id_"。
+
+```bash
+python preprocess.py --filter_corpus --dict_path data/test_build_dict --input_corpus_dir data/text --output_corpus_dir data/convert_text8 --min_count 5 --downsample 0.001
+```
+
+### 训练
+具体的参数配置可运行
+
+
+```bash
+python train.py -h
+```
+
+单机多线程训练
+```bash
+OPENBLAS_NUM_THREADS=1 CPU_NUM=5 python train.py --train_data_dir data/convert_text8 --dict_path data/test_build_dict --num_passes 10 --batch_size 100 --model_output_dir v1_cpu5_b100_lr1dir --base_lr 1.0 --print_batch 1000 --with_speed --is_sparse
+```
+
+本地单机模拟多机训练
+
+```bash
+sh cluster_train.sh
+```
+
+本示例中按照单机多线程训练的命令进行训练,训练完毕后,可看到在当前文件夹下保存模型的路径为: ``v1_cpu5_b100_lr1dir``, 运行 ``ls v1_cpu5_b100_lr1dir``可看到该文件夹下保存了训练的10个epoch的模型文件。
+```
+pass-0 pass-1 pass-2 pass-3 pass-4 pass-5 pass-6 pass-7 pass-8 pass-9
+```
+
+### 预测
+测试集下载命令如下
+
+```bash
+#全量数据集测试集
+wget https://paddlerec.bj.bcebos.com/word2vec/test_dir.tar
+#样本数据集测试集
+wget https://paddlerec.bj.bcebos.com/word2vec/test_mid_dir.tar
+```
+
+预测命令,注意词典名称需要加后缀"_word_to_id_", 此文件是预处理阶段生成的。
+```bash
+python infer.py --infer_epoch --test_dir data/test_mid_dir --dict_path data/test_build_dict_word_to_id_ --batch_size 20000 --model_dir v1_cpu5_b100_lr1dir/ --start_index 0 --last_index 9
+```
+运行该预测命令, 可看到如下输出
+```
+('start index: ', 0, ' last_index:', 9)
+('vocab_size:', 63642)
+step:1 249
+epoch:0 acc:0.014
+step:1 590
+epoch:1 acc:0.033
+step:1 982
+epoch:2 acc:0.055
+step:1 1338
+epoch:3 acc:0.075
+step:1 1653
+epoch:4 acc:0.093
+step:1 1914
+epoch:5 acc:0.107
+step:1 2204
+epoch:6 acc:0.124
+step:1 2416
+epoch:7 acc:0.136
+step:1 2606
+epoch:8 acc:0.146
+step:1 2722
+epoch:9 acc:0.153
+```
+
+## 量化``基于skip-gram的word2vector模型``
+
+量化配置为:
+```
+config = {
+ 'params_name': 'emb',
+ 'quantize_type': 'abs_max'
+ }
+```
+
+运行命令为:
+
+```bash
+python infer.py --infer_epoch --test_dir data/test_mid_dir --dict_path data/test_build_dict_word_to_id_ --batch_size 20000 --model_dir v1_cpu5_b100_lr1dir/ --start_index 0 --last_index 9 --emb_quant True
+```
+
+运行输出为:
+
+```
+('start index: ', 0, ' last_index:', 9)
+('vocab_size:', 63642)
+quant_embedding config {'quantize_type': 'abs_max', 'params_name': 'emb', 'quantize_bits': 8, 'dtype': 'int8'}
+step:1 253
+epoch:0 acc:0.014
+quant_embedding config {'quantize_type': 'abs_max', 'params_name': 'emb', 'quantize_bits': 8, 'dtype': 'int8'}
+step:1 586
+epoch:1 acc:0.033
+quant_embedding config {'quantize_type': 'abs_max', 'params_name': 'emb', 'quantize_bits': 8, 'dtype': 'int8'}
+step:1 970
+epoch:2 acc:0.054
+quant_embedding config {'quantize_type': 'abs_max', 'params_name': 'emb', 'quantize_bits': 8, 'dtype': 'int8'}
+step:1 1364
+epoch:3 acc:0.077
+quant_embedding config {'quantize_type': 'abs_max', 'params_name': 'emb', 'quantize_bits': 8, 'dtype': 'int8'}
+step:1 1642
+epoch:4 acc:0.092
+quant_embedding config {'quantize_type': 'abs_max', 'params_name': 'emb', 'quantize_bits': 8, 'dtype': 'int8'}
+step:1 1936
+epoch:5 acc:0.109
+quant_embedding config {'quantize_type': 'abs_max', 'params_name': 'emb', 'quantize_bits': 8, 'dtype': 'int8'}
+step:1 2216
+epoch:6 acc:0.124
+quant_embedding config {'quantize_type': 'abs_max', 'params_name': 'emb', 'quantize_bits': 8, 'dtype': 'int8'}
+step:1 2419
+epoch:7 acc:0.136
+quant_embedding config {'quantize_type': 'abs_max', 'params_name': 'emb', 'quantize_bits': 8, 'dtype': 'int8'}
+step:1 2603
+epoch:8 acc:0.146
+quant_embedding config {'quantize_type': 'abs_max', 'params_name': 'emb', 'quantize_bits': 8, 'dtype': 'int8'}
+step:1 2719
+epoch:9 acc:0.153
+```
+
+量化后的模型保存在``./output_quant``中,可看到量化后的参数``'emb.int8'``的大小为3.9M, 在``./v1_cpu5_b100_lr1dir``中可看到量化前的参数``'emb'``的大小为16M。
diff --git a/demo/quant/quant_embedding/cluster_train.py b/demo/quant/quant_embedding/cluster_train.py
new file mode 100755
index 0000000000000000000000000000000000000000..9b5bc2f620958fe4387c70022e2dbaee9042f625
--- /dev/null
+++ b/demo/quant/quant_embedding/cluster_train.py
@@ -0,0 +1,250 @@
+from __future__ import print_function
+import argparse
+import logging
+import os
+import time
+import math
+import random
+import numpy as np
+import paddle
+import paddle.fluid as fluid
+import six
+import reader
+from net import skip_gram_word2vec
+
+logging.basicConfig(format='%(asctime)s - %(levelname)s - %(message)s')
+logger = logging.getLogger("fluid")
+logger.setLevel(logging.INFO)
+
+
+def parse_args():
+ parser = argparse.ArgumentParser(
+ description="PaddlePaddle Word2vec example")
+ parser.add_argument(
+ '--train_data_dir',
+ type=str,
+ default='./data/text',
+ help="The path of taining dataset")
+ parser.add_argument(
+ '--base_lr',
+ type=float,
+ default=0.01,
+ help="The number of learing rate (default: 0.01)")
+ parser.add_argument(
+ '--save_step',
+ type=int,
+ default=500000,
+ help="The number of step to save (default: 500000)")
+ parser.add_argument(
+ '--print_batch',
+ type=int,
+ default=100,
+ help="The number of print_batch (default: 10)")
+ parser.add_argument(
+ '--dict_path',
+ type=str,
+ default='./data/1-billion_dict',
+ help="The path of data dict")
+ parser.add_argument(
+ '--batch_size',
+ type=int,
+ default=500,
+ help="The size of mini-batch (default:500)")
+ parser.add_argument(
+ '--num_passes',
+ type=int,
+ default=10,
+ help="The number of passes to train (default: 10)")
+ parser.add_argument(
+ '--model_output_dir',
+ type=str,
+ default='models',
+ help='The path for model to store (default: models)')
+ parser.add_argument('--nce_num', type=int, default=5, help='nce_num')
+ parser.add_argument(
+ '--embedding_size',
+ type=int,
+ default=64,
+ help='sparse feature hashing space for index processing')
+ parser.add_argument(
+ '--is_sparse',
+ action='store_true',
+ required=False,
+ default=False,
+ help='embedding and nce will use sparse or not, (default: False)')
+ parser.add_argument(
+ '--with_speed',
+ action='store_true',
+ required=False,
+ default=False,
+ help='print speed or not , (default: False)')
+ parser.add_argument(
+ '--role', type=str, default='pserver', help='trainer or pserver')
+ parser.add_argument(
+ '--endpoints',
+ type=str,
+ default='127.0.0.1:6000',
+ help='The pserver endpoints, like: 127.0.0.1:6000, 127.0.0.1:6001')
+ parser.add_argument(
+ '--current_endpoint',
+ type=str,
+ default='127.0.0.1:6000',
+ help='The current_endpoint')
+ parser.add_argument(
+ '--trainer_id',
+ type=int,
+ default=0,
+ help='trainer id ,only trainer_id=0 save model')
+ parser.add_argument(
+ '--trainers',
+ type=int,
+ default=1,
+ help='The num of trianers, (default: 1)')
+ return parser.parse_args()
+
+
+def convert_python_to_tensor(weight, batch_size, sample_reader):
+ def __reader__():
+ cs = np.array(weight).cumsum()
+ result = [[], []]
+ for sample in sample_reader():
+ for i, fea in enumerate(sample):
+ result[i].append(fea)
+ if len(result[0]) == batch_size:
+ tensor_result = []
+ for tensor in result:
+ t = fluid.Tensor()
+ dat = np.array(tensor, dtype='int64')
+ if len(dat.shape) > 2:
+ dat = dat.reshape((dat.shape[0], dat.shape[2]))
+ elif len(dat.shape) == 1:
+ dat = dat.reshape((-1, 1))
+ t.set(dat, fluid.CPUPlace())
+ tensor_result.append(t)
+ tt = fluid.Tensor()
+ neg_array = cs.searchsorted(np.random.sample(args.nce_num))
+ neg_array = np.tile(neg_array, batch_size)
+ tt.set(
+ neg_array.reshape((batch_size, args.nce_num)),
+ fluid.CPUPlace())
+ tensor_result.append(tt)
+ yield tensor_result
+ result = [[], []]
+
+ return __reader__
+
+
+def train_loop(args, train_program, reader, py_reader, loss, trainer_id,
+ weight):
+
+ py_reader.decorate_tensor_provider(
+ convert_python_to_tensor(weight, args.batch_size, reader.train()))
+
+ place = fluid.CPUPlace()
+ exe = fluid.Executor(place)
+ exe.run(fluid.default_startup_program())
+
+ print("CPU_NUM:" + str(os.getenv("CPU_NUM")))
+
+ train_exe = exe
+
+ for pass_id in range(args.num_passes):
+ py_reader.start()
+ time.sleep(10)
+ epoch_start = time.time()
+ batch_id = 0
+ start = time.time()
+ try:
+ while True:
+
+ loss_val = train_exe.run(fetch_list=[loss.name])
+ loss_val = np.mean(loss_val)
+
+ if batch_id % args.print_batch == 0:
+ logger.info(
+ "TRAIN --> pass: {} batch: {} loss: {} reader queue:{}".
+ format(pass_id, batch_id,
+ loss_val.mean(), py_reader.queue.size()))
+ if args.with_speed:
+ if batch_id % 500 == 0 and batch_id != 0:
+ elapsed = (time.time() - start)
+ start = time.time()
+ samples = 1001 * args.batch_size * int(
+ os.getenv("CPU_NUM"))
+ logger.info("Time used: {}, Samples/Sec: {}".format(
+ elapsed, samples / elapsed))
+
+ if batch_id % args.save_step == 0 and batch_id != 0:
+ model_dir = args.model_output_dir + '/pass-' + str(
+ pass_id) + ('/batch-' + str(batch_id))
+ if trainer_id == 0:
+ fluid.io.save_params(executor=exe, dirname=model_dir)
+ print("model saved in %s" % model_dir)
+ batch_id += 1
+
+ except fluid.core.EOFException:
+ py_reader.reset()
+ epoch_end = time.time()
+ logger.info("Epoch: {0}, Train total expend: {1} ".format(
+ pass_id, epoch_end - epoch_start))
+ model_dir = args.model_output_dir + '/pass-' + str(pass_id)
+ if trainer_id == 0:
+ fluid.io.save_params(executor=exe, dirname=model_dir)
+ print("model saved in %s" % model_dir)
+
+
+def GetFileList(data_path):
+ return os.listdir(data_path)
+
+
+def train(args):
+
+ if not os.path.isdir(args.model_output_dir) and args.trainer_id == 0:
+ os.mkdir(args.model_output_dir)
+
+ filelist = GetFileList(args.train_data_dir)
+ word2vec_reader = reader.Word2VecReader(
+ args.dict_path, args.train_data_dir, filelist, 0, 1)
+
+ logger.info("dict_size: {}".format(word2vec_reader.dict_size))
+ np_power = np.power(np.array(word2vec_reader.id_frequencys), 0.75)
+ id_frequencys_pow = np_power / np_power.sum()
+
+ loss, py_reader = skip_gram_word2vec(
+ word2vec_reader.dict_size,
+ args.embedding_size,
+ is_sparse=args.is_sparse,
+ neg_num=args.nce_num)
+
+ optimizer = fluid.optimizer.SGD(
+ learning_rate=fluid.layers.exponential_decay(
+ learning_rate=args.base_lr,
+ decay_steps=100000,
+ decay_rate=0.999,
+ staircase=True))
+
+ optimizer.minimize(loss)
+
+ logger.info("run dist training")
+
+ t = fluid.DistributeTranspiler()
+ t.transpile(
+ args.trainer_id, pservers=args.endpoints, trainers=args.trainers)
+ if args.role == "pserver":
+ print("run psever")
+ pserver_prog = t.get_pserver_program(args.current_endpoint)
+ pserver_startup = t.get_startup_program(args.current_endpoint,
+ pserver_prog)
+ exe = fluid.Executor(fluid.CPUPlace())
+ exe.run(pserver_startup)
+ exe.run(pserver_prog)
+ elif args.role == "trainer":
+ print("run trainer")
+ train_loop(args,
+ t.get_trainer_program(), word2vec_reader, py_reader, loss,
+ args.trainer_id, id_frequencys_pow)
+
+
+if __name__ == '__main__':
+ args = parse_args()
+ train(args)
diff --git a/demo/quant/quant_embedding/cluster_train.sh b/demo/quant/quant_embedding/cluster_train.sh
new file mode 100755
index 0000000000000000000000000000000000000000..756196fd41eeb52d5f43553664c824748ac83e4e
--- /dev/null
+++ b/demo/quant/quant_embedding/cluster_train.sh
@@ -0,0 +1,68 @@
+#!/bin/bash
+
+#export GLOG_v=30
+#export GLOG_logtostderr=1
+
+# start pserver0
+export CPU_NUM=5
+export FLAGS_rpc_deadline=3000000
+python cluster_train.py \
+ --train_data_dir data/convert_text8 \
+ --dict_path data/test_build_dict \
+ --batch_size 100 \
+ --model_output_dir dis_model \
+ --base_lr 1.0 \
+ --print_batch 1 \
+ --is_sparse \
+ --with_speed \
+ --role pserver \
+ --endpoints 127.0.0.1:6000,127.0.0.1:6001 \
+ --current_endpoint 127.0.0.1:6000 \
+ --trainers 2 \
+ > pserver0.log 2>&1 &
+
+python cluster_train.py \
+ --train_data_dir data/convert_text8 \
+ --dict_path data/test_build_dict \
+ --batch_size 100 \
+ --model_output_dir dis_model \
+ --base_lr 1.0 \
+ --print_batch 1 \
+ --is_sparse \
+ --with_speed \
+ --role pserver \
+ --endpoints 127.0.0.1:6000,127.0.0.1:6001 \
+ --current_endpoint 127.0.0.1:6001 \
+ --trainers 2 \
+ > pserver1.log 2>&1 &
+
+# start trainer0
+python cluster_train.py \
+ --train_data_dir data/convert_text8 \
+ --dict_path data/test_build_dict \
+ --batch_size 100 \
+ --model_output_dir dis_model \
+ --base_lr 1.0 \
+ --print_batch 1000 \
+ --is_sparse \
+ --with_speed \
+ --role trainer \
+ --endpoints 127.0.0.1:6000,127.0.0.1:6001 \
+ --trainers 2 \
+ --trainer_id 0 \
+ > trainer0.log 2>&1 &
+# start trainer1
+python cluster_train.py \
+ --train_data_dir data/convert_text8 \
+ --dict_path data/test_build_dict \
+ --batch_size 100 \
+ --model_output_dir dis_model \
+ --base_lr 1.0 \
+ --print_batch 1000 \
+ --is_sparse \
+ --with_speed \
+ --role trainer \
+ --endpoints 127.0.0.1:6000,127.0.0.1:6001 \
+ --trainers 2 \
+ --trainer_id 1 \
+ > trainer1.log 2>&1 &
diff --git a/demo/quant/quant_embedding/image/after.png b/demo/quant/quant_embedding/image/after.png
new file mode 100644
index 0000000000000000000000000000000000000000..50a3f172fb5b34c853f27f4406458e5bd6399f06
Binary files /dev/null and b/demo/quant/quant_embedding/image/after.png differ
diff --git a/demo/quant/quant_embedding/image/before.png b/demo/quant/quant_embedding/image/before.png
new file mode 100644
index 0000000000000000000000000000000000000000..5b77f0b05a5e3bb56a80aeaf29b6624798c2eb56
Binary files /dev/null and b/demo/quant/quant_embedding/image/before.png differ
diff --git a/demo/quant/quant_embedding/infer.py b/demo/quant/quant_embedding/infer.py
new file mode 100755
index 0000000000000000000000000000000000000000..40ae2ee8c639754d24a5474c5e58d7e062a1d4d0
--- /dev/null
+++ b/demo/quant/quant_embedding/infer.py
@@ -0,0 +1,231 @@
+import argparse
+import sys
+import time
+import math
+import unittest
+import contextlib
+import numpy as np
+import six
+import paddle.fluid as fluid
+import paddle
+import net
+import utils
+sys.path.append(sys.path[0] + "/../../../")
+from paddleslim.quant import quant_embedding
+
+
+def parse_args():
+ parser = argparse.ArgumentParser("PaddlePaddle Word2vec infer example")
+ parser.add_argument(
+ '--dict_path',
+ type=str,
+ default='./data/data_c/1-billion_dict_word_to_id_',
+ help="The path of dic")
+ parser.add_argument(
+ '--infer_epoch',
+ action='store_true',
+ required=False,
+ default=False,
+ help='infer by epoch')
+ parser.add_argument(
+ '--infer_step',
+ action='store_true',
+ required=False,
+ default=False,
+ help='infer by step')
+ parser.add_argument(
+ '--test_dir', type=str, default='test_data', help='test file address')
+ parser.add_argument(
+ '--print_step', type=int, default='500000', help='print step')
+ parser.add_argument(
+ '--start_index', type=int, default='0', help='start index')
+ parser.add_argument(
+ '--start_batch', type=int, default='1', help='start index')
+ parser.add_argument(
+ '--end_batch', type=int, default='13', help='start index')
+ parser.add_argument(
+ '--last_index', type=int, default='100', help='last index')
+ parser.add_argument(
+ '--model_dir', type=str, default='model', help='model dir')
+ parser.add_argument(
+ '--use_cuda', type=int, default='0', help='whether use cuda')
+ parser.add_argument(
+ '--batch_size', type=int, default='5', help='batch_size')
+ parser.add_argument(
+ '--emb_size', type=int, default='64', help='batch_size')
+ parser.add_argument(
+ '--emb_quant',
+ type=bool,
+ default=False,
+ help='whether to quant embedding parameter')
+ args = parser.parse_args()
+ return args
+
+
+def infer_epoch(args, vocab_size, test_reader, use_cuda, i2w):
+ """ inference function """
+ place = fluid.CUDAPlace(0) if use_cuda else fluid.CPUPlace()
+ exe = fluid.Executor(place)
+ emb_size = args.emb_size
+ batch_size = args.batch_size
+ with fluid.scope_guard(fluid.Scope()):
+ main_program = fluid.Program()
+ with fluid.program_guard(main_program):
+ values, pred = net.infer_network(vocab_size, emb_size)
+ for epoch in range(start_index, last_index + 1):
+ copy_program = main_program.clone()
+ model_path = model_dir + "/pass-" + str(epoch)
+ fluid.io.load_params(
+ executor=exe,
+ dirname=model_path,
+ main_program=copy_program)
+ if args.emb_quant:
+ config = {'params_name': 'emb', 'quantize_type': 'abs_max'}
+ copy_program = quant_embedding(copy_program, place, config)
+ fluid.io.save_persistables(
+ exe,
+ './output_quant/pass-' + str(epoch),
+ main_program=copy_program)
+
+ accum_num = 0
+ accum_num_sum = 0.0
+ t0 = time.time()
+ step_id = 0
+ for data in test_reader():
+ step_id += 1
+ b_size = len([dat[0] for dat in data])
+ wa = np.array(
+ [dat[0] for dat in data]).astype("int64").reshape(
+ b_size, 1)
+ wb = np.array(
+ [dat[1] for dat in data]).astype("int64").reshape(
+ b_size, 1)
+ wc = np.array(
+ [dat[2] for dat in data]).astype("int64").reshape(
+ b_size, 1)
+
+ label = [dat[3] for dat in data]
+ input_word = [dat[4] for dat in data]
+ para = exe.run(copy_program,
+ feed={
+ "analogy_a": wa,
+ "analogy_b": wb,
+ "analogy_c": wc,
+ "all_label":
+ np.arange(vocab_size).reshape(
+ vocab_size, 1).astype("int64"),
+ },
+ fetch_list=[pred.name, values],
+ return_numpy=False)
+ pre = np.array(para[0])
+ val = np.array(para[1])
+ for ii in range(len(label)):
+ top4 = pre[ii]
+ accum_num_sum += 1
+ for idx in top4:
+ if int(idx) in input_word[ii]:
+ continue
+ if int(idx) == int(label[ii][0]):
+ accum_num += 1
+ break
+ if step_id % 1 == 0:
+ print("step:%d %d " % (step_id, accum_num))
+
+ print("epoch:%d \t acc:%.3f " %
+ (epoch, 1.0 * accum_num / accum_num_sum))
+
+
+def infer_step(args, vocab_size, test_reader, use_cuda, i2w):
+ """ inference function """
+ place = fluid.CUDAPlace(0) if use_cuda else fluid.CPUPlace()
+ exe = fluid.Executor(place)
+ emb_size = args.emb_size
+ batch_size = args.batch_size
+ with fluid.scope_guard(fluid.Scope()):
+ main_program = fluid.Program()
+ with fluid.program_guard(main_program):
+ values, pred = net.infer_network(vocab_size, emb_size)
+ for epoch in range(start_index, last_index + 1):
+ for batchid in range(args.start_batch, args.end_batch):
+ copy_program = main_program.clone()
+ model_path = model_dir + "/pass-" + str(epoch) + (
+ '/batch-' + str(batchid * args.print_step))
+ fluid.io.load_params(
+ executor=exe,
+ dirname=model_path,
+ main_program=copy_program)
+ accum_num = 0
+ accum_num_sum = 0.0
+ t0 = time.time()
+ step_id = 0
+ for data in test_reader():
+ step_id += 1
+ b_size = len([dat[0] for dat in data])
+ wa = np.array(
+ [dat[0] for dat in data]).astype("int64").reshape(
+ b_size, 1)
+ wb = np.array(
+ [dat[1] for dat in data]).astype("int64").reshape(
+ b_size, 1)
+ wc = np.array(
+ [dat[2] for dat in data]).astype("int64").reshape(
+ b_size, 1)
+
+ label = [dat[3] for dat in data]
+ input_word = [dat[4] for dat in data]
+ para = exe.run(
+ copy_program,
+ feed={
+ "analogy_a": wa,
+ "analogy_b": wb,
+ "analogy_c": wc,
+ "all_label":
+ np.arange(vocab_size).reshape(vocab_size, 1),
+ },
+ fetch_list=[pred.name, values],
+ return_numpy=False)
+ pre = np.array(para[0])
+ val = np.array(para[1])
+ for ii in range(len(label)):
+ top4 = pre[ii]
+ accum_num_sum += 1
+ for idx in top4:
+ if int(idx) in input_word[ii]:
+ continue
+ if int(idx) == int(label[ii][0]):
+ accum_num += 1
+ break
+ if step_id % 1 == 0:
+ print("step:%d %d " % (step_id, accum_num))
+ print("epoch:%d \t acc:%.3f " %
+ (epoch, 1.0 * accum_num / accum_num_sum))
+ t1 = time.time()
+
+
+if __name__ == "__main__":
+ args = parse_args()
+ start_index = args.start_index
+ last_index = args.last_index
+ test_dir = args.test_dir
+ model_dir = args.model_dir
+ batch_size = args.batch_size
+ dict_path = args.dict_path
+ use_cuda = True if args.use_cuda else False
+ print("start index: ", start_index, " last_index:", last_index)
+ vocab_size, test_reader, id2word = utils.prepare_data(
+ test_dir, dict_path, batch_size=batch_size)
+ print("vocab_size:", vocab_size)
+ if args.infer_step:
+ infer_step(
+ args,
+ vocab_size,
+ test_reader=test_reader,
+ use_cuda=use_cuda,
+ i2w=id2word)
+ else:
+ infer_epoch(
+ args,
+ vocab_size,
+ test_reader=test_reader,
+ use_cuda=use_cuda,
+ i2w=id2word)
diff --git a/demo/quant/quant_embedding/net.py b/demo/quant/quant_embedding/net.py
new file mode 100755
index 0000000000000000000000000000000000000000..ab2abbc76bde8e03c9a6e1e0abb062aa467d2c91
--- /dev/null
+++ b/demo/quant/quant_embedding/net.py
@@ -0,0 +1,136 @@
+# Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+"""
+neural network for word2vec
+"""
+from __future__ import print_function
+import math
+import numpy as np
+import paddle.fluid as fluid
+
+
+def skip_gram_word2vec(dict_size, embedding_size, is_sparse=False, neg_num=5):
+
+ datas = []
+ input_word = fluid.layers.data(name="input_word", shape=[1], dtype='int64')
+ true_word = fluid.layers.data(name='true_label', shape=[1], dtype='int64')
+ neg_word = fluid.layers.data(
+ name="neg_label", shape=[neg_num], dtype='int64')
+
+ datas.append(input_word)
+ datas.append(true_word)
+ datas.append(neg_word)
+
+ py_reader = fluid.layers.create_py_reader_by_data(
+ capacity=64, feed_list=datas, name='py_reader', use_double_buffer=True)
+
+ words = fluid.layers.read_file(py_reader)
+ init_width = 0.5 / embedding_size
+ input_emb = fluid.layers.embedding(
+ input=words[0],
+ is_sparse=is_sparse,
+ size=[dict_size, embedding_size],
+ param_attr=fluid.ParamAttr(
+ name='emb',
+ initializer=fluid.initializer.Uniform(-init_width, init_width)))
+
+ true_emb_w = fluid.layers.embedding(
+ input=words[1],
+ is_sparse=is_sparse,
+ size=[dict_size, embedding_size],
+ param_attr=fluid.ParamAttr(
+ name='emb_w', initializer=fluid.initializer.Constant(value=0.0)))
+
+ true_emb_b = fluid.layers.embedding(
+ input=words[1],
+ is_sparse=is_sparse,
+ size=[dict_size, 1],
+ param_attr=fluid.ParamAttr(
+ name='emb_b', initializer=fluid.initializer.Constant(value=0.0)))
+ neg_word_reshape = fluid.layers.reshape(words[2], shape=[-1, 1])
+ neg_word_reshape.stop_gradient = True
+
+ neg_emb_w = fluid.layers.embedding(
+ input=neg_word_reshape,
+ is_sparse=is_sparse,
+ size=[dict_size, embedding_size],
+ param_attr=fluid.ParamAttr(
+ name='emb_w', learning_rate=1.0))
+
+ neg_emb_w_re = fluid.layers.reshape(
+ neg_emb_w, shape=[-1, neg_num, embedding_size])
+ neg_emb_b = fluid.layers.embedding(
+ input=neg_word_reshape,
+ is_sparse=is_sparse,
+ size=[dict_size, 1],
+ param_attr=fluid.ParamAttr(
+ name='emb_b', learning_rate=1.0))
+
+ neg_emb_b_vec = fluid.layers.reshape(neg_emb_b, shape=[-1, neg_num])
+ true_logits = fluid.layers.elementwise_add(
+ fluid.layers.reduce_sum(
+ fluid.layers.elementwise_mul(input_emb, true_emb_w),
+ dim=1,
+ keep_dim=True),
+ true_emb_b)
+ input_emb_re = fluid.layers.reshape(
+ input_emb, shape=[-1, 1, embedding_size])
+ neg_matmul = fluid.layers.matmul(
+ input_emb_re, neg_emb_w_re, transpose_y=True)
+ neg_matmul_re = fluid.layers.reshape(neg_matmul, shape=[-1, neg_num])
+ neg_logits = fluid.layers.elementwise_add(neg_matmul_re, neg_emb_b_vec)
+ #nce loss
+
+ label_ones = fluid.layers.fill_constant_batch_size_like(
+ true_logits, shape=[-1, 1], value=1.0, dtype='float32')
+ label_zeros = fluid.layers.fill_constant_batch_size_like(
+ true_logits, shape=[-1, neg_num], value=0.0, dtype='float32')
+
+ true_xent = fluid.layers.sigmoid_cross_entropy_with_logits(true_logits,
+ label_ones)
+ neg_xent = fluid.layers.sigmoid_cross_entropy_with_logits(neg_logits,
+ label_zeros)
+ cost = fluid.layers.elementwise_add(
+ fluid.layers.reduce_sum(
+ true_xent, dim=1),
+ fluid.layers.reduce_sum(
+ neg_xent, dim=1))
+ avg_cost = fluid.layers.reduce_mean(cost)
+ return avg_cost, py_reader
+
+
+def infer_network(vocab_size, emb_size):
+ analogy_a = fluid.layers.data(name="analogy_a", shape=[1], dtype='int64')
+ analogy_b = fluid.layers.data(name="analogy_b", shape=[1], dtype='int64')
+ analogy_c = fluid.layers.data(name="analogy_c", shape=[1], dtype='int64')
+ all_label = fluid.layers.data(
+ name="all_label",
+ shape=[vocab_size, 1],
+ dtype='int64',
+ append_batch_size=False)
+ emb_all_label = fluid.layers.embedding(
+ input=all_label, size=[vocab_size, emb_size], param_attr="emb")
+
+ emb_a = fluid.layers.embedding(
+ input=analogy_a, size=[vocab_size, emb_size], param_attr="emb")
+ emb_b = fluid.layers.embedding(
+ input=analogy_b, size=[vocab_size, emb_size], param_attr="emb")
+ emb_c = fluid.layers.embedding(
+ input=analogy_c, size=[vocab_size, emb_size], param_attr="emb")
+ target = fluid.layers.elementwise_add(
+ fluid.layers.elementwise_sub(emb_b, emb_a), emb_c)
+ emb_all_label_l2 = fluid.layers.l2_normalize(x=emb_all_label, axis=1)
+ dist = fluid.layers.matmul(x=target, y=emb_all_label_l2, transpose_y=True)
+ values, pred_idx = fluid.layers.topk(input=dist, k=4)
+ return values, pred_idx
diff --git a/demo/quant/quant_embedding/preprocess.py b/demo/quant/quant_embedding/preprocess.py
new file mode 100755
index 0000000000000000000000000000000000000000..db1e0994f9cb0ec5945cf018dea3c5f49309d47e
--- /dev/null
+++ b/demo/quant/quant_embedding/preprocess.py
@@ -0,0 +1,195 @@
+# -*- coding: utf-8 -*
+import os
+import random
+import re
+import six
+import argparse
+import io
+import math
+prog = re.compile("[^a-z ]", flags=0)
+
+
+def parse_args():
+ parser = argparse.ArgumentParser(
+ description="Paddle Fluid word2 vector preprocess")
+ parser.add_argument(
+ '--build_dict_corpus_dir', type=str, help="The dir of corpus")
+ parser.add_argument(
+ '--input_corpus_dir', type=str, help="The dir of input corpus")
+ parser.add_argument(
+ '--output_corpus_dir', type=str, help="The dir of output corpus")
+ parser.add_argument(
+ '--dict_path',
+ type=str,
+ default='./dict',
+ help="The path of dictionary ")
+ parser.add_argument(
+ '--min_count',
+ type=int,
+ default=5,
+ help="If the word count is less then min_count, it will be removed from dict"
+ )
+ parser.add_argument(
+ '--downsample',
+ type=float,
+ default=0.001,
+ help="filter word by downsample")
+ parser.add_argument(
+ '--filter_corpus',
+ action='store_true',
+ default=False,
+ help='Filter corpus')
+ parser.add_argument(
+ '--build_dict',
+ action='store_true',
+ default=False,
+ help='Build dict from corpus')
+ return parser.parse_args()
+
+
+def text_strip(text):
+ #English Preprocess Rule
+ return prog.sub("", text.lower())
+
+
+# Shameless copy from Tensorflow https://github.com/tensorflow/tensor2tensor/blob/master/tensor2tensor/data_generators/text_encoder.py
+# Unicode utility functions that work with Python 2 and 3
+def native_to_unicode(s):
+ if _is_unicode(s):
+ return s
+ try:
+ return _to_unicode(s)
+ except UnicodeDecodeError:
+ res = _to_unicode(s, ignore_errors=True)
+ return res
+
+
+def _is_unicode(s):
+ if six.PY2:
+ if isinstance(s, unicode):
+ return True
+ else:
+ if isinstance(s, str):
+ return True
+ return False
+
+
+def _to_unicode(s, ignore_errors=False):
+ if _is_unicode(s):
+ return s
+ error_mode = "ignore" if ignore_errors else "strict"
+ return s.decode("utf-8", errors=error_mode)
+
+
+def filter_corpus(args):
+ """
+ filter corpus and convert id.
+ """
+ word_count = dict()
+ word_to_id_ = dict()
+ word_all_count = 0
+ id_counts = []
+ word_id = 0
+ #read dict
+ with io.open(args.dict_path, 'r', encoding='utf-8') as f:
+ for line in f:
+ word, count = line.split()[0], int(line.split()[1])
+ word_count[word] = count
+ word_to_id_[word] = word_id
+ word_id += 1
+ id_counts.append(count)
+ word_all_count += count
+
+ #write word2id file
+ print("write word2id file to : " + args.dict_path + "_word_to_id_")
+ with io.open(
+ args.dict_path + "_word_to_id_", 'w+', encoding='utf-8') as fid:
+ for k, v in word_to_id_.items():
+ fid.write(k + " " + str(v) + '\n')
+ #filter corpus and convert id
+ if not os.path.exists(args.output_corpus_dir):
+ os.makedirs(args.output_corpus_dir)
+ for file in os.listdir(args.input_corpus_dir):
+ with io.open(args.output_corpus_dir + '/convert_' + file, "w") as wf:
+ with io.open(
+ args.input_corpus_dir + '/' + file,
+ encoding='utf-8') as rf:
+ print(args.input_corpus_dir + '/' + file)
+ for line in rf:
+ signal = False
+ line = text_strip(line)
+ words = line.split()
+ for item in words:
+ if item in word_count:
+ idx = word_to_id_[item]
+ else:
+ idx = word_to_id_[native_to_unicode('')]
+ count_w = id_counts[idx]
+ corpus_size = word_all_count
+ keep_prob = (
+ math.sqrt(count_w /
+ (args.downsample * corpus_size)) + 1
+ ) * (args.downsample * corpus_size) / count_w
+ r_value = random.random()
+ if r_value > keep_prob:
+ continue
+ wf.write(_to_unicode(str(idx) + " "))
+ signal = True
+ if signal:
+ wf.write(_to_unicode("\n"))
+
+
+def build_dict(args):
+ """
+ proprocess the data, generate dictionary and save into dict_path.
+ :param corpus_dir: the input data dir.
+ :param dict_path: the generated dict path. the data in dict is "word count"
+ :param min_count:
+ :return:
+ """
+ # word to count
+
+ word_count = dict()
+
+ for file in os.listdir(args.build_dict_corpus_dir):
+ with io.open(
+ args.build_dict_corpus_dir + "/" + file,
+ encoding='utf-8') as f:
+ print("build dict : ", args.build_dict_corpus_dir + "/" + file)
+ for line in f:
+ line = text_strip(line)
+ words = line.split()
+ for item in words:
+ if item in word_count:
+ word_count[item] = word_count[item] + 1
+ else:
+ word_count[item] = 1
+
+ item_to_remove = []
+ for item in word_count:
+ if word_count[item] <= args.min_count:
+ item_to_remove.append(item)
+
+ unk_sum = 0
+ for item in item_to_remove:
+ unk_sum += word_count[item]
+ del word_count[item]
+ #sort by count
+ word_count[native_to_unicode('')] = unk_sum
+ word_count = sorted(
+ word_count.items(), key=lambda word_count: -word_count[1])
+
+ with io.open(args.dict_path, 'w+', encoding='utf-8') as f:
+ for k, v in word_count:
+ f.write(k + " " + str(v) + '\n')
+
+
+if __name__ == "__main__":
+ args = parse_args()
+ if args.build_dict:
+ build_dict(args)
+ elif args.filter_corpus:
+ filter_corpus(args)
+ else:
+ print(
+ "error command line, please choose --build_dict or --filter_corpus")
diff --git a/demo/quant/quant_embedding/reader.py b/demo/quant/quant_embedding/reader.py
new file mode 100755
index 0000000000000000000000000000000000000000..7400e493fc78db389506f938ea510f461fdc2154
--- /dev/null
+++ b/demo/quant/quant_embedding/reader.py
@@ -0,0 +1,106 @@
+# -*- coding: utf-8 -*
+
+import numpy as np
+import preprocess
+import logging
+import math
+import random
+import io
+
+logging.basicConfig(format='%(asctime)s - %(levelname)s - %(message)s')
+logger = logging.getLogger("fluid")
+logger.setLevel(logging.INFO)
+
+
+class NumpyRandomInt(object):
+ def __init__(self, a, b, buf_size=1000):
+ self.idx = 0
+ self.buffer = np.random.random_integers(a, b, buf_size)
+ self.a = a
+ self.b = b
+
+ def __call__(self):
+ if self.idx == len(self.buffer):
+ self.buffer = np.random.random_integers(self.a, self.b,
+ len(self.buffer))
+ self.idx = 0
+
+ result = self.buffer[self.idx]
+ self.idx += 1
+ return result
+
+
+class Word2VecReader(object):
+ def __init__(self,
+ dict_path,
+ data_path,
+ filelist,
+ trainer_id,
+ trainer_num,
+ window_size=5):
+ self.window_size_ = window_size
+ self.data_path_ = data_path
+ self.filelist = filelist
+ self.trainer_id = trainer_id
+ self.trainer_num = trainer_num
+
+ word_all_count = 0
+ id_counts = []
+ word_id = 0
+
+ with io.open(dict_path, 'r', encoding='utf-8') as f:
+ for line in f:
+ word, count = line.split()[0], int(line.split()[1])
+ word_id += 1
+ id_counts.append(count)
+ word_all_count += count
+
+ self.word_all_count = word_all_count
+ self.corpus_size_ = word_all_count
+ self.dict_size = len(id_counts)
+ self.id_counts_ = id_counts
+
+ print("corpus_size:", self.corpus_size_)
+ self.id_frequencys = [
+ float(count) / word_all_count for count in self.id_counts_
+ ]
+ print("dict_size = " + str(self.dict_size) + " word_all_count = " +
+ str(word_all_count))
+
+ self.random_generator = NumpyRandomInt(1, self.window_size_ + 1)
+
+ def get_context_words(self, words, idx):
+ """
+ Get the context word list of target word.
+ words: the words of the current line
+ idx: input word index
+ window_size: window size
+ """
+ target_window = self.random_generator()
+ start_point = idx - target_window # if (idx - target_window) > 0 else 0
+ if start_point < 0:
+ start_point = 0
+ end_point = idx + target_window
+ targets = words[start_point:idx] + words[idx + 1:end_point + 1]
+ return targets
+
+ def train(self):
+ def nce_reader():
+ for file in self.filelist:
+ with io.open(
+ self.data_path_ + "/" + file, 'r',
+ encoding='utf-8') as f:
+ logger.info("running data in {}".format(self.data_path_ +
+ "/" + file))
+ count = 1
+ for line in f:
+ if self.trainer_id == count % self.trainer_num:
+ word_ids = [int(w) for w in line.split()]
+ for idx, target_id in enumerate(word_ids):
+ context_word_ids = self.get_context_words(
+ word_ids, idx)
+ for context_id in context_word_ids:
+ yield [target_id], [context_id]
+ count += 1
+
+ return nce_reader
diff --git a/demo/quant/quant_embedding/train.py b/demo/quant/quant_embedding/train.py
new file mode 100755
index 0000000000000000000000000000000000000000..72af09744564f0fcc83d1f43a8bd1a04d8a8fa38
--- /dev/null
+++ b/demo/quant/quant_embedding/train.py
@@ -0,0 +1,228 @@
+from __future__ import print_function
+import argparse
+import logging
+import os
+import time
+import math
+import random
+import numpy as np
+import paddle
+import paddle.fluid as fluid
+import six
+import reader
+from net import skip_gram_word2vec
+
+logging.basicConfig(format='%(asctime)s - %(levelname)s - %(message)s')
+logger = logging.getLogger("fluid")
+logger.setLevel(logging.INFO)
+
+
+def parse_args():
+ parser = argparse.ArgumentParser(
+ description="PaddlePaddle Word2vec example")
+ parser.add_argument(
+ '--train_data_dir',
+ type=str,
+ default='./data/text',
+ help="The path of taining dataset")
+ parser.add_argument(
+ '--base_lr',
+ type=float,
+ default=0.01,
+ help="The number of learing rate (default: 0.01)")
+ parser.add_argument(
+ '--save_step',
+ type=int,
+ default=500000,
+ help="The number of step to save (default: 500000)")
+ parser.add_argument(
+ '--print_batch',
+ type=int,
+ default=10,
+ help="The number of print_batch (default: 10)")
+ parser.add_argument(
+ '--dict_path',
+ type=str,
+ default='./data/1-billion_dict',
+ help="The path of data dict")
+ parser.add_argument(
+ '--batch_size',
+ type=int,
+ default=500,
+ help="The size of mini-batch (default:500)")
+ parser.add_argument(
+ '--num_passes',
+ type=int,
+ default=10,
+ help="The number of passes to train (default: 10)")
+ parser.add_argument(
+ '--model_output_dir',
+ type=str,
+ default='models',
+ help='The path for model to store (default: models)')
+ parser.add_argument('--nce_num', type=int, default=5, help='nce_num')
+ parser.add_argument(
+ '--embedding_size',
+ type=int,
+ default=64,
+ help='sparse feature hashing space for index processing')
+ parser.add_argument(
+ '--is_sparse',
+ action='store_true',
+ required=False,
+ default=False,
+ help='embedding and nce will use sparse or not, (default: False)')
+ parser.add_argument(
+ '--with_speed',
+ action='store_true',
+ required=False,
+ default=False,
+ help='print speed or not , (default: False)')
+ return parser.parse_args()
+
+
+def convert_python_to_tensor(weight, batch_size, sample_reader):
+ def __reader__():
+ cs = np.array(weight).cumsum()
+ result = [[], []]
+ for sample in sample_reader():
+ for i, fea in enumerate(sample):
+ result[i].append(fea)
+ if len(result[0]) == batch_size:
+ tensor_result = []
+ for tensor in result:
+ t = fluid.Tensor()
+ dat = np.array(tensor, dtype='int64')
+ if len(dat.shape) > 2:
+ dat = dat.reshape((dat.shape[0], dat.shape[2]))
+ elif len(dat.shape) == 1:
+ dat = dat.reshape((-1, 1))
+ t.set(dat, fluid.CPUPlace())
+ tensor_result.append(t)
+ tt = fluid.Tensor()
+ neg_array = cs.searchsorted(np.random.sample(args.nce_num))
+ neg_array = np.tile(neg_array, batch_size)
+ tt.set(
+ neg_array.reshape((batch_size, args.nce_num)),
+ fluid.CPUPlace())
+ tensor_result.append(tt)
+ yield tensor_result
+ result = [[], []]
+
+ return __reader__
+
+
+def train_loop(args, train_program, reader, py_reader, loss, trainer_id,
+ weight):
+
+ py_reader.decorate_tensor_provider(
+ convert_python_to_tensor(weight, args.batch_size, reader.train()))
+
+ place = fluid.CPUPlace()
+ exe = fluid.Executor(place)
+ exe.run(fluid.default_startup_program())
+
+ exec_strategy = fluid.ExecutionStrategy()
+ exec_strategy.use_experimental_executor = True
+
+ print("CPU_NUM:" + str(os.getenv("CPU_NUM")))
+ exec_strategy.num_threads = int(os.getenv("CPU_NUM"))
+
+ build_strategy = fluid.BuildStrategy()
+ if int(os.getenv("CPU_NUM")) > 1:
+ build_strategy.reduce_strategy = fluid.BuildStrategy.ReduceStrategy.Reduce
+
+ train_exe = fluid.ParallelExecutor(
+ use_cuda=False,
+ loss_name=loss.name,
+ main_program=train_program,
+ build_strategy=build_strategy,
+ exec_strategy=exec_strategy)
+
+ for pass_id in range(args.num_passes):
+ py_reader.start()
+ time.sleep(10)
+ epoch_start = time.time()
+ batch_id = 0
+ start = time.time()
+ try:
+ while True:
+
+ loss_val = train_exe.run(fetch_list=[loss.name])
+ loss_val = np.mean(loss_val)
+
+ if batch_id % args.print_batch == 0:
+ logger.info(
+ "TRAIN --> pass: {} batch: {} loss: {} reader queue:{}".
+ format(pass_id, batch_id,
+ loss_val.mean(), py_reader.queue.size()))
+ if args.with_speed:
+ if batch_id % 500 == 0 and batch_id != 0:
+ elapsed = (time.time() - start)
+ start = time.time()
+ samples = 1001 * args.batch_size * int(
+ os.getenv("CPU_NUM"))
+ logger.info("Time used: {}, Samples/Sec: {}".format(
+ elapsed, samples / elapsed))
+
+ if batch_id % args.save_step == 0 and batch_id != 0:
+ model_dir = args.model_output_dir + '/pass-' + str(
+ pass_id) + ('/batch-' + str(batch_id))
+ if trainer_id == 0:
+ fluid.io.save_params(executor=exe, dirname=model_dir)
+ print("model saved in %s" % model_dir)
+ batch_id += 1
+
+ except fluid.core.EOFException:
+ py_reader.reset()
+ epoch_end = time.time()
+ logger.info("Epoch: {0}, Train total expend: {1} ".format(
+ pass_id, epoch_end - epoch_start))
+ model_dir = args.model_output_dir + '/pass-' + str(pass_id)
+ if trainer_id == 0:
+ fluid.io.save_params(executor=exe, dirname=model_dir)
+ print("model saved in %s" % model_dir)
+
+
+def GetFileList(data_path):
+ return os.listdir(data_path)
+
+
+def train(args):
+
+ if not os.path.isdir(args.model_output_dir):
+ os.mkdir(args.model_output_dir)
+
+ filelist = GetFileList(args.train_data_dir)
+ word2vec_reader = reader.Word2VecReader(
+ args.dict_path, args.train_data_dir, filelist, 0, 1)
+
+ logger.info("dict_size: {}".format(word2vec_reader.dict_size))
+ np_power = np.power(np.array(word2vec_reader.id_frequencys), 0.75)
+ id_frequencys_pow = np_power / np_power.sum()
+
+ loss, py_reader = skip_gram_word2vec(
+ word2vec_reader.dict_size,
+ args.embedding_size,
+ is_sparse=args.is_sparse,
+ neg_num=args.nce_num)
+
+ optimizer = fluid.optimizer.SGD(
+ learning_rate=fluid.layers.exponential_decay(
+ learning_rate=args.base_lr,
+ decay_steps=100000,
+ decay_rate=0.999,
+ staircase=True))
+
+ optimizer.minimize(loss)
+
+ # do local training
+ logger.info("run local training")
+ main_program = fluid.default_main_program()
+ train_loop(args, main_program, word2vec_reader, py_reader, loss, 0,
+ id_frequencys_pow)
+
+
+if __name__ == '__main__':
+ args = parse_args()
+ train(args)
diff --git a/demo/quant/quant_embedding/utils.py b/demo/quant/quant_embedding/utils.py
new file mode 100755
index 0000000000000000000000000000000000000000..01cd04e493b09e880303d7b0c87f5ed71cf86357
--- /dev/null
+++ b/demo/quant/quant_embedding/utils.py
@@ -0,0 +1,96 @@
+import sys
+import collections
+import six
+import time
+import numpy as np
+import paddle.fluid as fluid
+import paddle
+import os
+import preprocess
+
+
+def BuildWord_IdMap(dict_path):
+ word_to_id = dict()
+ id_to_word = dict()
+ with open(dict_path, 'r') as f:
+ for line in f:
+ word_to_id[line.split(' ')[0]] = int(line.split(' ')[1])
+ id_to_word[int(line.split(' ')[1])] = line.split(' ')[0]
+ return word_to_id, id_to_word
+
+
+def prepare_data(file_dir, dict_path, batch_size):
+ w2i, i2w = BuildWord_IdMap(dict_path)
+ vocab_size = len(i2w)
+ reader = paddle.batch(test(file_dir, w2i), batch_size)
+ return vocab_size, reader, i2w
+
+
+def native_to_unicode(s):
+ if _is_unicode(s):
+ return s
+ try:
+ return _to_unicode(s)
+ except UnicodeDecodeError:
+ res = _to_unicode(s, ignore_errors=True)
+ return res
+
+
+def _is_unicode(s):
+ if six.PY2:
+ if isinstance(s, unicode):
+ return True
+ else:
+ if isinstance(s, str):
+ return True
+ return False
+
+
+def _to_unicode(s, ignore_errors=False):
+ if _is_unicode(s):
+ return s
+ error_mode = "ignore" if ignore_errors else "strict"
+ return s.decode("utf-8", errors=error_mode)
+
+
+def strip_lines(line, vocab):
+ return _replace_oov(vocab, native_to_unicode(line))
+
+
+def _replace_oov(original_vocab, line):
+ """Replace out-of-vocab words with "".
+ This maintains compatibility with published results.
+ Args:
+ original_vocab: a set of strings (The standard vocabulary for the dataset)
+ line: a unicode string - a space-delimited sequence of words.
+ Returns:
+ a unicode string - a space-delimited sequence of words.
+ """
+ return u" ".join([
+ word if word in original_vocab else u"" for word in line.split()
+ ])
+
+
+def reader_creator(file_dir, word_to_id):
+ def reader():
+ files = os.listdir(file_dir)
+ for fi in files:
+ with open(file_dir + '/' + fi, "r") as f:
+ for line in f:
+ if ':' in line:
+ pass
+ else:
+ line = strip_lines(line.lower(), word_to_id)
+ line = line.split()
+ yield [word_to_id[line[0]]], [word_to_id[line[1]]], [
+ word_to_id[line[2]]
+ ], [word_to_id[line[3]]], [
+ word_to_id[line[0]], word_to_id[line[1]],
+ word_to_id[line[2]]
+ ]
+
+ return reader
+
+
+def test(test_dir, w2i):
+ return reader_creator(test_dir, w2i)
diff --git a/demo/quant/quant_post/README.md b/demo/quant/quant_post/README.md
new file mode 100755
index 0000000000000000000000000000000000000000..0bab00e7b885a807d7e1d19df8b0053f574e420a
--- /dev/null
+++ b/demo/quant/quant_post/README.md
@@ -0,0 +1,96 @@
+# 离线量化示例
+
+本示例介绍如何使用离线量化接口``paddleslim.quant.quant_post``来对训练好的分类模型进行离线量化, 该接口无需对模型进行训练就可得到量化模型,减少模型的存储空间和显存占用。
+
+## 接口介绍
+```
+quant_post(executor,
+ model_dir,
+ quantize_model_path,
+ sample_generator,
+ model_filename=None,
+ params_filename=None,
+ batch_size=16,
+ batch_nums=None,
+ scope=None,
+ algo='KL',
+ quantizable_op_type=["conv2d", "depthwise_conv2d", "mul"])
+```
+
+参数介绍:
+- executor (fluid.Executor): 执行模型的executor,可以在cpu或者gpu上执行。
+- model_dir(str): 需要量化的模型所在的文件夹。
+- quantize_model_path(str): 保存量化后的模型的路径
+- sample_generator(python generator): 读取数据样本,每次返回一个样本。
+- model_filename(str, optional): 模型文件名,如果需要量化的模型的参数存在一个文件中,则需要设置``model_filename``为模型文件的名称,否则设置为``None``即可。默认值是``None``。
+- params_filename(str): 参数文件名,如果需要量化的模型的参数存在一个文件中,则需要设置``params_filename``为参数文件的名称,否则设置为``None``即可。默认值是``None``。
+- batch_size(int): 每个batch的图片数量。默认值为16 。
+- batch_nums(int, optional): 迭代次数。如果设置为``None``,则会一直运行到``sample_generator`` 迭代结束, 否则,迭代次数为``batch_nums``, 也就是说参与对``Scale``进行校正的样本个数为 ``'batch_nums' * 'batch_size' ``.
+- scope(fluid.Scope, optional): 用来获取和写入``Variable``, 如果设置为``None``,则使用``fluid.global_scope()``. 默认值是``None``.
+- algo(str): 量化时使用的算法名称,可为``'KL'``或者``'direct'``。该参数仅针对激活值的量化,因为参数值的量化使用的方式为``'channel_wise_abs_max'``. 当``algo`` 设置为``'direct'``时,使用``'abs_max'``计算``Scale``值,当设置为``'KL'``时,则使用``KL``散度的方法来计算``Scale``值。默认值为``'KL'``。
+- quantizable_op_type(list[str]): 需要量化的``op``类型列表。默认值为``["conv2d", "depthwise_conv2d", "mul"]``。
+
+## 分类模型的离线量化流程
+
+### 准备数据
+
+在当前文件夹下创建``data``文件夹,将``imagenet``数据集解压在``data``文件夹下,解压后``data``文件夹下应包含以下文件:
+- ``'train'``文件夹,训练图片
+- ``'train_list.txt'``文件
+- ``'val'``文件夹,验证图片
+- ``'val_list.txt'``文件
+
+### 准备需要量化的模型
+因为离线量化接口只支持加载通过``fluid.io.save_inference_model``接口保存的模型,因此如果您的模型是通过其他接口保存的,那需要先将模型进行转化。本示例将以分类模型为例进行说明。
+
+首先在[imagenet分类模型](https://github.com/PaddlePaddle/models/tree/develop/PaddleCV/image_classification#%E5%B7%B2%E5%8F%91%E5%B8%83%E6%A8%A1%E5%9E%8B%E5%8F%8A%E5%85%B6%E6%80%A7%E8%83%BD)中下载训练好的``mobilenetv1``模型。
+
+在当前文件夹下创建``'pretrain'``文件夹,将``mobilenetv1``模型在该文件夹下解压,解压后的目录为``pretrain/MobileNetV1_pretrained``
+
+### 导出模型
+通过运行以下命令可将模型转化为离线量化接口可用的模型:
+```
+python export_model.py --model "MobileNet" --pretrained_model ./pretrain/MobileNetV1_pretrained --data imagenet
+```
+转化之后的模型存储在``inference_model/MobileNet/``文件夹下,可看到该文件夹下有``'model'``, ``'weights'``两个文件。
+
+### 离线量化
+接下来对导出的模型文件进行离线量化,离线量化的脚本为[quant_post.py](./quant_post.py),脚本中使用接口``paddleslim.quant.quant_post``对模型进行离线量化。运行命令为:
+```
+python quant_post.py --model_path ./inference_model/MobileNet --save_path ./quant_model_train/MobileNet --model_filename model --params_filename weights
+```
+
+- ``model_path``: 需要量化的模型坐在的文件夹
+- ``save_path``: 量化后的模型保存的路径
+- ``model_filename``: 如果需要量化的模型的参数文件保存在一个文件中,则设置为该模型的模型文件名称,如果参数文件保存在多个文件中,则不需要设置。
+- ``params_filename``: 如果需要量化的模型的参数文件保存在一个文件中,则设置为该模型的参数文件名称,如果参数文件保存在多个文件中,则不需要设置。
+
+运行以上命令后,可在``${save_path}``下看到量化后的模型文件和参数文件。
+
+> 使用的量化算法为``'KL'``, 使用训练集中的160张图片进行量化参数的校正。
+
+
+### 测试精度
+
+使用[eval.py](./eval.py)脚本对量化前后的模型进行测试,得到模型的分类精度进行对比。
+
+首先测试量化前的模型的精度,运行以下命令:
+```
+python eval.py --model_path ./inference_model/MobileNet --model_name model --params_name weights
+```
+精度输出为:
+```
+top1_acc/top5_acc= [0.70913923 0.89548034]
+```
+
+使用以下命令测试离线量化后的模型的精度:
+
+```
+python eval.py --model_path ./quant_model_train/MobileNet
+```
+
+精度输出为
+```
+top1_acc/top5_acc= [0.70141864 0.89086477]
+```
+从以上精度对比可以看出,对``mobilenet``在``imagenet``上的分类模型进行离线量化后 ``top1``精度损失为``0.77%``, ``top5``精度损失为``0.46%``.
diff --git a/demo/quant/quant_post/eval.py b/demo/quant/quant_post/eval.py
new file mode 100755
index 0000000000000000000000000000000000000000..8d5cfa003d8b7077224ae2f54194069aadc3dc90
--- /dev/null
+++ b/demo/quant/quant_post/eval.py
@@ -0,0 +1,95 @@
+#copyright (c) 2019 PaddlePaddle Authors. All Rights Reserve.
+#
+#Licensed under the Apache License, Version 2.0 (the "License");
+#you may not use this file except in compliance with the License.
+#You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+#Unless required by applicable law or agreed to in writing, software
+#distributed under the License is distributed on an "AS IS" BASIS,
+#WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+#See the License for the specific language governing permissions and
+#limitations under the License.
+
+import os
+import sys
+import numpy as np
+import argparse
+import functools
+
+import paddle
+import paddle.fluid as fluid
+sys.path.append('../../')
+import imagenet_reader as reader
+from utility import add_arguments, print_arguments
+
+parser = argparse.ArgumentParser(description=__doc__)
+# yapf: disable
+add_arg = functools.partial(add_arguments, argparser=parser)
+add_arg('use_gpu', bool, True, "Whether to use GPU or not.")
+add_arg('model_path', str, "./pruning/checkpoints/resnet50/2/eval_model/", "Whether to use pretrained model.")
+add_arg('model_name', str, None, "model filename for inference model")
+add_arg('params_name', str, None, "params filename for inference model")
+# yapf: enable
+
+
+def eval(args):
+ # parameters from arguments
+
+ place = fluid.CUDAPlace(0) if args.use_gpu else fluid.CPUPlace()
+ exe = fluid.Executor(place)
+
+ val_program, feed_target_names, fetch_targets = fluid.io.load_inference_model(
+ args.model_path,
+ exe,
+ model_filename=args.model_name,
+ params_filename=args.params_name)
+ val_reader = paddle.batch(reader.val(), batch_size=128)
+ feeder = fluid.DataFeeder(
+ place=place, feed_list=feed_target_names, program=val_program)
+
+ results = []
+ for batch_id, data in enumerate(val_reader()):
+
+ # top1_acc, top5_acc
+ if len(feed_target_names) == 1:
+ # eval "infer model", which input is image, output is classification probability
+ image = [[d[0]] for d in data]
+ label = [[d[1]] for d in data]
+ feed_data = feeder.feed(image)
+ pred = exe.run(val_program,
+ feed=feed_data,
+ fetch_list=fetch_targets)
+ pred = np.array(pred[0])
+ label = np.array(label)
+ sort_array = pred.argsort(axis=1)
+ top_1_pred = sort_array[:, -1:][:, ::-1]
+ top_1 = np.mean(label == top_1_pred)
+ top_5_pred = sort_array[:, -5:][:, ::-1]
+ acc_num = 0
+ for i in range(len(label)):
+ if label[i][0] in top_5_pred[i]:
+ acc_num += 1
+ top_5 = float(acc_num) / len(label)
+ results.append([top_1, top_5])
+ else:
+ # eval "eval model", which inputs are image and label, output is top1 and top5 accuracy
+ result = exe.run(val_program,
+ feed=feeder.feed(data),
+ fetch_list=fetch_targets)
+ result = [np.mean(r) for r in result]
+ results.append(result)
+ result = np.mean(np.array(results), axis=0)
+ print("top1_acc/top5_acc= {}".format(result))
+ sys.stdout.flush()
+
+
+def main():
+ args = parser.parse_args()
+ print_arguments(args)
+ eval(args)
+
+
+if __name__ == '__main__':
+ main()
diff --git a/demo/quant/quant_post/export_model.py b/demo/quant/quant_post/export_model.py
new file mode 100755
index 0000000000000000000000000000000000000000..dbfeb2b042139ec85b390ccd6f242c0aa93e8835
--- /dev/null
+++ b/demo/quant/quant_post/export_model.py
@@ -0,0 +1,88 @@
+import os
+import sys
+import logging
+import paddle
+import argparse
+import functools
+import math
+import time
+import numpy as np
+import paddle.fluid as fluid
+sys.path.append(sys.path[0] + "/../../../")
+from paddleslim.common import get_logger
+sys.path.append(sys.path[0] + "/../../")
+import models
+from utility import add_arguments, print_arguments
+
+_logger = get_logger(__name__, level=logging.INFO)
+
+parser = argparse.ArgumentParser(description=__doc__)
+add_arg = functools.partial(add_arguments, argparser=parser)
+# yapf: disable
+add_arg('use_gpu', bool, True, "Whether to use GPU or not.")
+add_arg('model', str, "MobileNet", "The target model.")
+add_arg('pretrained_model', str, "../pretrained_model/MobileNetV1_pretained", "Whether to use pretrained model.")
+add_arg('data', str, "mnist", "Which data to use. 'mnist' or 'imagenet'")
+add_arg('test_period', int, 10, "Test period in epoches.")
+# yapf: enable
+
+model_list = [m for m in dir(models) if "__" not in m]
+
+
+def export_model(args):
+ if args.data == "mnist":
+ import paddle.dataset.mnist as reader
+ train_reader = reader.train()
+ val_reader = reader.test()
+ class_dim = 10
+ image_shape = "1,28,28"
+ elif args.data == "imagenet":
+ import imagenet_reader as reader
+ train_reader = reader.train()
+ val_reader = reader.val()
+ class_dim = 1000
+ image_shape = "3,224,224"
+ else:
+ raise ValueError("{} is not supported.".format(args.data))
+
+ image_shape = [int(m) for m in image_shape.split(",")]
+ image = fluid.data(
+ name='image', shape=[None] + image_shape, dtype='float32')
+ assert args.model in model_list, "{} is not in lists: {}".format(
+ args.model, model_list)
+ # model definition
+ model = models.__dict__[args.model]()
+ out = model.net(input=image, class_dim=class_dim)
+ val_program = fluid.default_main_program().clone(for_test=True)
+ place = fluid.CUDAPlace(0) if args.use_gpu else fluid.CPUPlace()
+ exe = fluid.Executor(place)
+ exe.run(fluid.default_startup_program())
+
+ if args.pretrained_model:
+
+ def if_exist(var):
+ return os.path.exists(
+ os.path.join(args.pretrained_model, var.name))
+
+ fluid.io.load_vars(exe, args.pretrained_model, predicate=if_exist)
+ else:
+ assert False, "args.pretrained_model must set"
+
+ fluid.io.save_inference_model(
+ './inference_model/' + args.model,
+ feeded_var_names=[image.name],
+ target_vars=[out],
+ executor=exe,
+ main_program=val_program,
+ model_filename='model',
+ params_filename='weights')
+
+
+def main():
+ args = parser.parse_args()
+ print_arguments(args)
+ export_model(args)
+
+
+if __name__ == '__main__':
+ main()
diff --git a/demo/quant/quant_post/quant_post.py b/demo/quant/quant_post/quant_post.py
new file mode 100755
index 0000000000000000000000000000000000000000..5a2c1c834c82e125adad7a597f0d8667d8b19bfc
--- /dev/null
+++ b/demo/quant/quant_post/quant_post.py
@@ -0,0 +1,61 @@
+import os
+import sys
+import logging
+import paddle
+import argparse
+import functools
+import math
+import time
+import numpy as np
+import paddle.fluid as fluid
+
+import reader
+sys.path.append(sys.path[0] + "/../../../")
+from paddleslim.common import get_logger
+from paddleslim.quant import quant_post
+sys.path.append(sys.path[0] + "/../../")
+from utility import add_arguments, print_arguments
+
+_logger = get_logger(__name__, level=logging.INFO)
+
+parser = argparse.ArgumentParser(description=__doc__)
+add_arg = functools.partial(add_arguments, argparser=parser)
+# yapf: disable
+add_arg('batch_size', int, 16, "Minibatch size.")
+add_arg('batch_num', int, 10, "Batch number")
+add_arg('use_gpu', bool, True, "Whether to use GPU or not.")
+add_arg('model_path', str, "./inference_model/MobileNet/", "model dir")
+add_arg('save_path', str, "./quant_model/MobileNet/", "model dir to save quanted model")
+add_arg('model_filename', str, None, "model file name")
+add_arg('params_filename', str, None, "params file name")
+# yapf: enable
+
+
+def quantize(args):
+ val_reader = reader.train()
+
+ place = fluid.CUDAPlace(0) if args.use_gpu else fluid.CPUPlace()
+
+ assert os.path.exists(args.model_path), "args.model_path doesn't exist"
+ assert os.path.isdir(args.model_path), "args.model_path must be a dir"
+
+ exe = fluid.Executor(place)
+ quant_post(
+ executor=exe,
+ model_dir=args.model_path,
+ quantize_model_path=args.save_path,
+ sample_generator=val_reader,
+ model_filename=args.model_filename,
+ params_filename=args.params_filename,
+ batch_size=args.batch_size,
+ batch_nums=args.batch_num)
+
+
+def main():
+ args = parser.parse_args()
+ print_arguments(args)
+ quantize(args)
+
+
+if __name__ == '__main__':
+ main()
diff --git a/demo/sa_nas_mobilenetv2_cifar10.py b/demo/sa_nas_mobilenetv2_cifar10.py
deleted file mode 100644
index 9095521c7f60b004911f9b27a766c040cffc6474..0000000000000000000000000000000000000000
--- a/demo/sa_nas_mobilenetv2_cifar10.py
+++ /dev/null
@@ -1,129 +0,0 @@
-import sys
-sys.path.append('..')
-import numpy as np
-import argparse
-import ast
-import paddle
-import paddle.fluid as fluid
-from paddleslim.nas.search_space.search_space_factory import SearchSpaceFactory
-from paddleslim.analysis import flops
-from paddleslim.nas import SANAS
-
-
-def create_data_loader():
- data = fluid.data(name='data', shape=[-1, 3, 32, 32], dtype='float32')
- label = fluid.data(name='label', shape=[-1, 1], dtype='int64')
- data_loader = fluid.io.DataLoader.from_generator(
- feed_list=[data, label],
- capacity=1024,
- use_double_buffer=True,
- iterable=True)
- return data_loader, data, label
-
-
-def init_sa_nas(config):
- factory = SearchSpaceFactory()
- space = factory.get_search_space(config)
- model_arch = space.token2arch()[0]
- main_program = fluid.Program()
- startup_program = fluid.Program()
-
- with fluid.program_guard(main_program, startup_program):
- data_loader, data, label = create_data_loader()
- output = model_arch(data)
- cost = fluid.layers.mean(
- fluid.layers.softmax_with_cross_entropy(
- logits=output, label=label))
-
- base_flops = flops(main_program)
- search_steps = 10000000
-
- ### start a server and a client
- sa_nas = SANAS(config, max_flops=base_flops, search_steps=search_steps)
-
- ### start a client, server_addr is server address
- #sa_nas = SANAS(config, max_flops = base_flops, server_addr=("10.255.125.38", 18607), search_steps = search_steps, is_server=False)
-
- return sa_nas, search_steps
-
-
-def search_mobilenetv2_cifar10(config, args):
- sa_nas, search_steps = init_sa_nas(config)
- for i in range(search_steps):
- print('search step: ', i)
- archs = sa_nas.next_archs()[0]
-
- train_program = fluid.Program()
- test_program = fluid.Program()
- startup_program = fluid.Program()
- with fluid.program_guard(train_program, startup_program):
- train_loader, data, label = create_data_loader()
- output = archs(data)
- cost = fluid.layers.mean(
- fluid.layers.softmax_with_cross_entropy(
- logits=output, label=label))[0]
- test_program = train_program.clone(for_test=True)
-
- optimizer = fluid.optimizer.Momentum(
- learning_rate=0.1,
- momentum=0.9,
- regularization=fluid.regularizer.L2Decay(1e-4))
- optimizer.minimize(cost)
-
- place = fluid.CUDAPlace(0) if args.use_gpu else fluid.CPUPlace()
- exe = fluid.Executor(place)
- exe.run(startup_program)
-
- train_reader = paddle.reader.shuffle(
- paddle.dataset.cifar.train10(cycle=False), buf_size=1024)
- train_loader.set_sample_generator(
- train_reader,
- batch_size=512,
- places=fluid.cuda_places() if args.use_gpu else fluid.cpu_places())
-
- test_loader, _, _ = create_data_loader()
- test_reader = paddle.dataset.cifar.test10(cycle=False)
- test_loader.set_sample_generator(
- test_reader,
- batch_size=256,
- drop_last=False,
- places=fluid.cuda_places() if args.use_gpu else fluid.cpu_places())
-
- for epoch_id in range(10):
- for batch_id, data in enumerate(train_loader()):
- loss = exe.run(train_program,
- feed=data,
- fetch_list=[cost.name])[0]
- if batch_id % 5 == 0:
- print('epoch: {}, batch: {}, loss: {}'.format(
- epoch_id, batch_id, loss[0]))
-
- for data in test_loader():
- reward = exe.run(test_program, feed=data,
- fetch_list=[cost.name])[0]
-
- print('reward:', reward)
- sa_nas.reward(float(reward))
-
-
-if __name__ == '__main__':
-
- parser = argparse.ArgumentParser(
- description='SA NAS MobileNetV2 cifar10 argparase')
- parser.add_argument(
- '--use_gpu',
- type=ast.literal_eval,
- default=True,
- help='Whether to use GPU in train/test model.')
- args = parser.parse_args()
- print(args)
-
- config_info = {
- 'input_size': 32,
- 'output_size': 1,
- 'block_num': 5,
- 'block_mask': None
- }
- config = [('MobileNetV2Space', config_info)]
-
- search_mobilenetv2_cifar10(config, args)
diff --git a/demo/sensitive_prune/greedy_prune.py b/demo/sensitive_prune/greedy_prune.py
new file mode 100644
index 0000000000000000000000000000000000000000..e3f8bb57eb3eb0e5c515376970d9484eeca78764
--- /dev/null
+++ b/demo/sensitive_prune/greedy_prune.py
@@ -0,0 +1,231 @@
+import os
+import sys
+import logging
+import paddle
+import argparse
+import functools
+import math
+import time
+import numpy as np
+import paddle.fluid as fluid
+from paddleslim.prune import SensitivePruner
+from paddleslim.common import get_logger
+from paddleslim.analysis import flops
+sys.path.append(sys.path[0] + "/../")
+import models
+from utility import add_arguments, print_arguments
+
+_logger = get_logger(__name__, level=logging.INFO)
+
+parser = argparse.ArgumentParser(description=__doc__)
+add_arg = functools.partial(add_arguments, argparser=parser)
+# yapf: disable
+add_arg('batch_size', int, 64 * 4, "Minibatch size.")
+add_arg('use_gpu', bool, True, "Whether to use GPU or not.")
+add_arg('model', str, "MobileNet", "The target model.")
+add_arg('pretrained_model', str, "../pretrained_model/MobileNetV1_pretained", "Whether to use pretrained model.")
+add_arg('lr', float, 0.1, "The learning rate used to fine-tune pruned model.")
+add_arg('lr_strategy', str, "piecewise_decay", "The learning rate decay strategy.")
+add_arg('l2_decay', float, 3e-5, "The l2_decay parameter.")
+add_arg('momentum_rate', float, 0.9, "The value of momentum_rate.")
+add_arg('num_epochs', int, 120, "The number of total epochs.")
+add_arg('total_images', int, 1281167, "The number of total training images.")
+parser.add_argument('--step_epochs', nargs='+', type=int, default=[30, 60, 90], help="piecewise decay step")
+add_arg('config_file', str, None, "The config file for compression with yaml format.")
+add_arg('data', str, "mnist", "Which data to use. 'mnist' or 'imagenet'")
+add_arg('log_period', int, 10, "Log period in batches.")
+add_arg('test_period', int, 10, "Test period in epoches.")
+add_arg('checkpoints', str, "./checkpoints", "Checkpoints path.")
+add_arg('prune_steps', int, 1000, "prune steps.")
+add_arg('retrain_epoch', int, 5, "Retrain epoch.")
+# yapf: enable
+
+model_list = [m for m in dir(models) if "__" not in m]
+
+
+def piecewise_decay(args):
+ step = int(math.ceil(float(args.total_images) / args.batch_size))
+ bd = [step * e for e in args.step_epochs]
+ lr = [args.lr * (0.1**i) for i in range(len(bd) + 1)]
+ learning_rate = fluid.layers.piecewise_decay(boundaries=bd, values=lr)
+ optimizer = fluid.optimizer.Momentum(
+ learning_rate=learning_rate,
+ momentum=args.momentum_rate,
+ regularization=fluid.regularizer.L2Decay(args.l2_decay))
+ return optimizer
+
+
+def cosine_decay(args):
+ step = int(math.ceil(float(args.total_images) / args.batch_size))
+ learning_rate = fluid.layers.cosine_decay(
+ learning_rate=args.lr, step_each_epoch=step, epochs=args.num_epochs)
+ optimizer = fluid.optimizer.Momentum(
+ learning_rate=learning_rate,
+ momentum=args.momentum_rate,
+ regularization=fluid.regularizer.L2Decay(args.l2_decay))
+ return optimizer
+
+
+def create_optimizer(args):
+ if args.lr_strategy == "piecewise_decay":
+ return piecewise_decay(args)
+ elif args.lr_strategy == "cosine_decay":
+ return cosine_decay(args)
+
+
+def compress(args):
+
+ train_reader = None
+ test_reader = None
+ if args.data == "mnist":
+ import paddle.dataset.mnist as reader
+ train_reader = reader.train()
+ val_reader = reader.test()
+ class_dim = 10
+ image_shape = "1,28,28"
+ elif args.data == "imagenet":
+ import imagenet_reader as reader
+ train_reader = reader.train()
+ val_reader = reader.val()
+ class_dim = 1000
+ image_shape = "3,224,224"
+ else:
+ raise ValueError("{} is not supported.".format(args.data))
+
+ image_shape = [int(m) for m in image_shape.split(",")]
+ assert args.model in model_list, "{} is not in lists: {}".format(
+ args.model, model_list)
+ image = fluid.layers.data(name='image', shape=image_shape, dtype='float32')
+ label = fluid.layers.data(name='label', shape=[1], dtype='int64')
+ # model definition
+ model = models.__dict__[args.model]()
+ out = model.net(input=image, class_dim=class_dim)
+ cost = fluid.layers.cross_entropy(input=out, label=label)
+ avg_cost = fluid.layers.mean(x=cost)
+ acc_top1 = fluid.layers.accuracy(input=out, label=label, k=1)
+ acc_top5 = fluid.layers.accuracy(input=out, label=label, k=5)
+ val_program = fluid.default_main_program().clone(for_test=True)
+ opt = create_optimizer(args)
+ opt.minimize(avg_cost)
+ place = fluid.CUDAPlace(0) if args.use_gpu else fluid.CPUPlace()
+ exe = fluid.Executor(place)
+ exe.run(fluid.default_startup_program())
+
+ if args.pretrained_model:
+
+ def if_exist(var):
+ return os.path.exists(
+ os.path.join(args.pretrained_model, var.name))
+
+ fluid.io.load_vars(exe, args.pretrained_model, predicate=if_exist)
+
+ val_reader = paddle.batch(val_reader, batch_size=args.batch_size)
+ train_reader = paddle.batch(
+ train_reader, batch_size=args.batch_size, drop_last=True)
+
+ train_feeder = feeder = fluid.DataFeeder([image, label], place)
+ val_feeder = feeder = fluid.DataFeeder(
+ [image, label], place, program=val_program)
+
+ def test(epoch, program):
+ batch_id = 0
+ acc_top1_ns = []
+ acc_top5_ns = []
+ for data in val_reader():
+ start_time = time.time()
+ acc_top1_n, acc_top5_n = exe.run(
+ program,
+ feed=train_feeder.feed(data),
+ fetch_list=[acc_top1.name, acc_top5.name])
+ end_time = time.time()
+ if batch_id % args.log_period == 0:
+ _logger.info(
+ "Eval epoch[{}] batch[{}] - acc_top1: {:.3f}; acc_top5: {:.3f}; time: {:.3f}".
+ format(epoch, batch_id,
+ np.mean(acc_top1_n),
+ np.mean(acc_top5_n), end_time - start_time))
+ acc_top1_ns.append(np.mean(acc_top1_n))
+ acc_top5_ns.append(np.mean(acc_top5_n))
+ batch_id += 1
+
+ _logger.info(
+ "Final eval epoch[{}] - acc_top1: {:.3f}; acc_top5: {:.3f}".format(
+ epoch,
+ np.mean(np.array(acc_top1_ns)), np.mean(
+ np.array(acc_top5_ns))))
+ return np.mean(np.array(acc_top1_ns))
+
+ def train(epoch, program):
+
+ build_strategy = fluid.BuildStrategy()
+ exec_strategy = fluid.ExecutionStrategy()
+ train_program = fluid.compiler.CompiledProgram(
+ program).with_data_parallel(
+ loss_name=avg_cost.name,
+ build_strategy=build_strategy,
+ exec_strategy=exec_strategy)
+
+ batch_id = 0
+ for data in train_reader():
+ start_time = time.time()
+ loss_n, acc_top1_n, acc_top5_n = exe.run(
+ train_program,
+ feed=train_feeder.feed(data),
+ fetch_list=[avg_cost.name, acc_top1.name, acc_top5.name])
+ end_time = time.time()
+ loss_n = np.mean(loss_n)
+ acc_top1_n = np.mean(acc_top1_n)
+ acc_top5_n = np.mean(acc_top5_n)
+ if batch_id % args.log_period == 0:
+ _logger.info(
+ "epoch[{}]-batch[{}] - loss: {:.3f}; acc_top1: {:.3f}; acc_top5: {:.3f}; time: {:.3f}".
+ format(epoch, batch_id, loss_n, acc_top1_n, acc_top5_n,
+ end_time - start_time))
+ batch_id += 1
+
+ params = []
+ for param in fluid.default_main_program().global_block().all_parameters():
+ if "_sep_weights" in param.name:
+ params.append(param.name)
+
+ def eval_func(program):
+ return test(0, program)
+
+ if args.data == "mnist":
+ train(0, fluid.default_main_program())
+
+ pruner = SensitivePruner(place, eval_func, checkpoints=args.checkpoints)
+ pruned_program, pruned_val_program, iter = pruner.restore()
+
+ if pruned_program is None:
+ pruned_program = fluid.default_main_program()
+ if pruned_val_program is None:
+ pruned_val_program = val_program
+
+ base_flops = flops(val_program)
+
+ start = iter
+ end = args.prune_steps
+ for iter in range(start, end):
+ pruned_program, pruned_val_program = pruner.greedy_prune(
+ pruned_program, pruned_val_program, params, 0.03, topk=1)
+ current_flops = flops(pruned_val_program)
+ print("iter:{}; pruned FLOPS: {}".format(
+ iter, float(base_flops - current_flops) / base_flops))
+ acc = None
+ for i in range(args.retrain_epoch):
+ train(i, pruned_program)
+ acc = test(i, pruned_val_program)
+ print("iter:{}; pruned FLOPS: {}; acc: {}".format(
+ iter, float(base_flops - current_flops) / base_flops, acc))
+ pruner.save_checkpoint(pruned_program, pruned_val_program)
+
+
+def main():
+ args = parser.parse_args()
+ print_arguments(args)
+ compress(args)
+
+
+if __name__ == '__main__':
+ main()
diff --git a/demo/sensitive_prune/prune.py b/demo/sensitive_prune/prune.py
new file mode 100644
index 0000000000000000000000000000000000000000..e6c1ba7ccd09f41c8d0652075036a1c279251517
--- /dev/null
+++ b/demo/sensitive_prune/prune.py
@@ -0,0 +1,223 @@
+import os
+import sys
+import logging
+import paddle
+import argparse
+import functools
+import math
+import time
+import numpy as np
+import paddle.fluid as fluid
+from paddleslim.prune import SensitivePruner
+from paddleslim.common import get_logger
+from paddleslim.analysis import flops
+sys.path.append(sys.path[0] + "/../")
+import models
+from utility import add_arguments, print_arguments
+
+_logger = get_logger(__name__, level=logging.INFO)
+
+parser = argparse.ArgumentParser(description=__doc__)
+add_arg = functools.partial(add_arguments, argparser=parser)
+# yapf: disable
+add_arg('batch_size', int, 64 * 4, "Minibatch size.")
+add_arg('use_gpu', bool, True, "Whether to use GPU or not.")
+add_arg('model', str, "MobileNet", "The target model.")
+add_arg('pretrained_model', str, "../pretrained_model/MobileNetV1_pretained", "Whether to use pretrained model.")
+add_arg('lr', float, 0.1, "The learning rate used to fine-tune pruned model.")
+add_arg('lr_strategy', str, "piecewise_decay", "The learning rate decay strategy.")
+add_arg('l2_decay', float, 3e-5, "The l2_decay parameter.")
+add_arg('momentum_rate', float, 0.9, "The value of momentum_rate.")
+add_arg('num_epochs', int, 120, "The number of total epochs.")
+add_arg('total_images', int, 1281167, "The number of total training images.")
+parser.add_argument('--step_epochs', nargs='+', type=int, default=[30, 60, 90], help="piecewise decay step")
+add_arg('config_file', str, None, "The config file for compression with yaml format.")
+add_arg('data', str, "mnist", "Which data to use. 'mnist' or 'imagenet'")
+add_arg('log_period', int, 10, "Log period in batches.")
+add_arg('test_period', int, 10, "Test period in epoches.")
+add_arg('checkpoints', str, "./checkpoints", "Checkpoints path.")
+# yapf: enable
+
+model_list = [m for m in dir(models) if "__" not in m]
+
+
+def piecewise_decay(args):
+ step = int(math.ceil(float(args.total_images) / args.batch_size))
+ bd = [step * e for e in args.step_epochs]
+ lr = [args.lr * (0.1**i) for i in range(len(bd) + 1)]
+ learning_rate = fluid.layers.piecewise_decay(boundaries=bd, values=lr)
+ optimizer = fluid.optimizer.Momentum(
+ learning_rate=learning_rate,
+ momentum=args.momentum_rate,
+ regularization=fluid.regularizer.L2Decay(args.l2_decay))
+ return optimizer
+
+
+def cosine_decay(args):
+ step = int(math.ceil(float(args.total_images) / args.batch_size))
+ learning_rate = fluid.layers.cosine_decay(
+ learning_rate=args.lr, step_each_epoch=step, epochs=args.num_epochs)
+ optimizer = fluid.optimizer.Momentum(
+ learning_rate=learning_rate,
+ momentum=args.momentum_rate,
+ regularization=fluid.regularizer.L2Decay(args.l2_decay))
+ return optimizer
+
+
+def create_optimizer(args):
+ if args.lr_strategy == "piecewise_decay":
+ return piecewise_decay(args)
+ elif args.lr_strategy == "cosine_decay":
+ return cosine_decay(args)
+
+
+def compress(args):
+
+ train_reader = None
+ test_reader = None
+ if args.data == "mnist":
+ import paddle.dataset.mnist as reader
+ train_reader = reader.train()
+ val_reader = reader.test()
+ class_dim = 10
+ image_shape = "1,28,28"
+ elif args.data == "imagenet":
+ import imagenet_reader as reader
+ train_reader = reader.train()
+ val_reader = reader.val()
+ class_dim = 1000
+ image_shape = "3,224,224"
+ else:
+ raise ValueError("{} is not supported.".format(args.data))
+
+ image_shape = [int(m) for m in image_shape.split(",")]
+ assert args.model in model_list, "{} is not in lists: {}".format(
+ args.model, model_list)
+ image = fluid.layers.data(name='image', shape=image_shape, dtype='float32')
+ label = fluid.layers.data(name='label', shape=[1], dtype='int64')
+ # model definition
+ model = models.__dict__[args.model]()
+ out = model.net(input=image, class_dim=class_dim)
+ cost = fluid.layers.cross_entropy(input=out, label=label)
+ avg_cost = fluid.layers.mean(x=cost)
+ acc_top1 = fluid.layers.accuracy(input=out, label=label, k=1)
+ acc_top5 = fluid.layers.accuracy(input=out, label=label, k=5)
+ val_program = fluid.default_main_program().clone(for_test=True)
+ opt = create_optimizer(args)
+ opt.minimize(avg_cost)
+ place = fluid.CUDAPlace(0) if args.use_gpu else fluid.CPUPlace()
+ exe = fluid.Executor(place)
+ exe.run(fluid.default_startup_program())
+
+ if args.pretrained_model:
+
+ def if_exist(var):
+ return os.path.exists(
+ os.path.join(args.pretrained_model, var.name))
+
+ fluid.io.load_vars(exe, args.pretrained_model, predicate=if_exist)
+
+ val_reader = paddle.batch(val_reader, batch_size=args.batch_size)
+ train_reader = paddle.batch(
+ train_reader, batch_size=args.batch_size, drop_last=True)
+
+ train_feeder = feeder = fluid.DataFeeder([image, label], place)
+ val_feeder = feeder = fluid.DataFeeder(
+ [image, label], place, program=val_program)
+
+ def test(epoch, program):
+ batch_id = 0
+ acc_top1_ns = []
+ acc_top5_ns = []
+ for data in val_reader():
+ start_time = time.time()
+ acc_top1_n, acc_top5_n = exe.run(
+ program,
+ feed=train_feeder.feed(data),
+ fetch_list=[acc_top1.name, acc_top5.name])
+ end_time = time.time()
+ if batch_id % args.log_period == 0:
+ _logger.info(
+ "Eval epoch[{}] batch[{}] - acc_top1: {:.3f}; acc_top5: {:.3f}; time: {:.3f}".
+ format(epoch, batch_id,
+ np.mean(acc_top1_n),
+ np.mean(acc_top5_n), end_time - start_time))
+ acc_top1_ns.append(np.mean(acc_top1_n))
+ acc_top5_ns.append(np.mean(acc_top5_n))
+ batch_id += 1
+
+ _logger.info(
+ "Final eval epoch[{}] - acc_top1: {:.3f}; acc_top5: {:.3f}".format(
+ epoch,
+ np.mean(np.array(acc_top1_ns)), np.mean(
+ np.array(acc_top5_ns))))
+ return np.mean(np.array(acc_top1_ns))
+
+ def train(epoch, program):
+
+ build_strategy = fluid.BuildStrategy()
+ exec_strategy = fluid.ExecutionStrategy()
+ train_program = fluid.compiler.CompiledProgram(
+ program).with_data_parallel(
+ loss_name=avg_cost.name,
+ build_strategy=build_strategy,
+ exec_strategy=exec_strategy)
+
+ batch_id = 0
+ for data in train_reader():
+ start_time = time.time()
+ loss_n, acc_top1_n, acc_top5_n = exe.run(
+ train_program,
+ feed=train_feeder.feed(data),
+ fetch_list=[avg_cost.name, acc_top1.name, acc_top5.name])
+ end_time = time.time()
+ loss_n = np.mean(loss_n)
+ acc_top1_n = np.mean(acc_top1_n)
+ acc_top5_n = np.mean(acc_top5_n)
+ if batch_id % args.log_period == 0:
+ _logger.info(
+ "epoch[{}]-batch[{}] - loss: {:.3f}; acc_top1: {:.3f}; acc_top5: {:.3f}; time: {:.3f}".
+ format(epoch, batch_id, loss_n, acc_top1_n, acc_top5_n,
+ end_time - start_time))
+ batch_id += 1
+
+ params = []
+ for param in fluid.default_main_program().global_block().all_parameters():
+ if "_sep_weights" in param.name:
+ params.append(param.name)
+
+ def eval_func(program):
+ return test(0, program)
+
+ if args.data == "mnist":
+ train(0, fluid.default_main_program())
+
+ pruner = SensitivePruner(place, eval_func, checkpoints=args.checkpoints)
+ pruned_program, pruned_val_program, iter = pruner.restore()
+
+ if pruned_program is None:
+ pruned_program = fluid.default_main_program()
+ if pruned_val_program is None:
+ pruned_val_program = val_program
+
+ start = iter
+ end = 6
+ for iter in range(start, end):
+ pruned_program, pruned_val_program = pruner.prune(
+ pruned_program, pruned_val_program, params, 0.1)
+ train(iter, pruned_program)
+ test(iter, pruned_val_program)
+ pruner.save_checkpoint(pruned_program, pruned_val_program)
+
+ print("before flops: {}".format(flops(fluid.default_main_program())))
+ print("after flops: {}".format(flops(pruned_val_program)))
+
+
+def main():
+ args = parser.parse_args()
+ print_arguments(args)
+ compress(args)
+
+
+if __name__ == '__main__':
+ main()
diff --git a/demo/utility.py b/demo/utility.py
new file mode 100644
index 0000000000000000000000000000000000000000..dd52f69457c9f8d94920b85dc09b58ff8e605a64
--- /dev/null
+++ b/demo/utility.py
@@ -0,0 +1,156 @@
+"""Contains common utility functions."""
+# Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserve.
+#
+#Licensed under the Apache License, Version 2.0 (the "License");
+#you may not use this file except in compliance with the License.
+#You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+#Unless required by applicable law or agreed to in writing, software
+#distributed under the License is distributed on an "AS IS" BASIS,
+#WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+#See the License for the specific language governing permissions and
+#limitations under the License.
+
+from __future__ import absolute_import
+from __future__ import division
+from __future__ import print_function
+import distutils.util
+import os
+import numpy as np
+import six
+import logging
+import paddle.fluid as fluid
+import paddle.compat as cpt
+from paddle.fluid import core
+from paddle.fluid.framework import Program
+
+logging.basicConfig(format='%(asctime)s-%(levelname)s: %(message)s')
+_logger = logging.getLogger(__name__)
+_logger.setLevel(logging.INFO)
+
+
+def print_arguments(args):
+ """Print argparse's arguments.
+
+ Usage:
+
+ .. code-block:: python
+
+ parser = argparse.ArgumentParser()
+ parser.add_argument("name", default="Jonh", type=str, help="User name.")
+ args = parser.parse_args()
+ print_arguments(args)
+
+ :param args: Input argparse.Namespace for printing.
+ :type args: argparse.Namespace
+ """
+ print("----------- Configuration Arguments -----------")
+ for arg, value in sorted(six.iteritems(vars(args))):
+ print("%s: %s" % (arg, value))
+ print("------------------------------------------------")
+
+
+def add_arguments(argname, type, default, help, argparser, **kwargs):
+ """Add argparse's argument.
+
+ Usage:
+
+ .. code-block:: python
+
+ parser = argparse.ArgumentParser()
+ add_argument("name", str, "Jonh", "User name.", parser)
+ args = parser.parse_args()
+ """
+ type = distutils.util.strtobool if type == bool else type
+ argparser.add_argument(
+ "--" + argname,
+ default=default,
+ type=type,
+ help=help + ' Default: %(default)s.',
+ **kwargs)
+
+
+def save_persistable_nodes(executor, dirname, graph):
+ """
+ Save persistable nodes to the given directory by the executor.
+
+ Args:
+ executor(Executor): The executor to run for saving node values.
+ dirname(str): The directory path.
+ graph(IrGraph): All the required persistable nodes in the graph will be saved.
+ """
+ persistable_node_names = set()
+ persistable_nodes = []
+ all_persistable_nodes = graph.all_persistable_nodes()
+ for node in all_persistable_nodes:
+ name = cpt.to_text(node.name())
+ if name not in persistable_node_names:
+ persistable_node_names.add(name)
+ persistable_nodes.append(node)
+ program = Program()
+ var_list = []
+ for node in persistable_nodes:
+ var_desc = node.var()
+ if var_desc.type() == core.VarDesc.VarType.RAW or \
+ var_desc.type() == core.VarDesc.VarType.READER:
+ continue
+ var = program.global_block().create_var(
+ name=var_desc.name(),
+ shape=var_desc.shape(),
+ dtype=var_desc.dtype(),
+ type=var_desc.type(),
+ lod_level=var_desc.lod_level(),
+ persistable=var_desc.persistable())
+ var_list.append(var)
+ fluid.io.save_vars(executor=executor, dirname=dirname, vars=var_list)
+
+
+def load_persistable_nodes(executor, dirname, graph):
+ """
+ Load persistable node values from the given directory by the executor.
+
+ Args:
+ executor(Executor): The executor to run for loading node values.
+ dirname(str): The directory path.
+ graph(IrGraph): All the required persistable nodes in the graph will be loaded.
+ """
+ persistable_node_names = set()
+ persistable_nodes = []
+ all_persistable_nodes = graph.all_persistable_nodes()
+ for node in all_persistable_nodes:
+ name = cpt.to_text(node.name())
+ if name not in persistable_node_names:
+ persistable_node_names.add(name)
+ persistable_nodes.append(node)
+ program = Program()
+ var_list = []
+
+ def _exist(var):
+ return os.path.exists(os.path.join(dirname, var.name))
+
+ def _load_var(name, scope):
+ return np.array(scope.find_var(name).get_tensor())
+
+ def _store_var(name, array, scope, place):
+ tensor = scope.find_var(name).get_tensor()
+ tensor.set(array, place)
+
+ for node in persistable_nodes:
+ var_desc = node.var()
+ if var_desc.type() == core.VarDesc.VarType.RAW or \
+ var_desc.type() == core.VarDesc.VarType.READER:
+ continue
+ var = program.global_block().create_var(
+ name=var_desc.name(),
+ shape=var_desc.shape(),
+ dtype=var_desc.dtype(),
+ type=var_desc.type(),
+ lod_level=var_desc.lod_level(),
+ persistable=var_desc.persistable())
+ if _exist(var):
+ var_list.append(var)
+ else:
+ _logger.info("Cannot find the var %s!!!" % (node.name()))
+ fluid.io.load_vars(executor=executor, dirname=dirname, vars=var_list)
diff --git a/paddleslim/common/__init__.py b/paddleslim/common/__init__.py
index 98b314ab6d144924bff6b68e3fb176ce73583f5c..2794cd4d86c0996155fd8d6e9dd830cdc8775e09 100644
--- a/paddleslim/common/__init__.py
+++ b/paddleslim/common/__init__.py
@@ -23,6 +23,8 @@ import controller_client
from controller_client import *
import lock_utils
from lock_utils import *
+import cached_reader as cached_reader_module
+from cached_reader import *
__all__ = []
__all__ += controller.__all__
@@ -30,3 +32,4 @@ __all__ += sa_controller.__all__
__all__ += controller_server.__all__
__all__ += controller_client.__all__
__all__ += lock_utils.__all__
+__all__ += cached_reader_module.__all__
diff --git a/paddleslim/common/cached_reader.py b/paddleslim/common/cached_reader.py
new file mode 100644
index 0000000000000000000000000000000000000000..55f27054efe55d9df90352b3e707fe51c8996023
--- /dev/null
+++ b/paddleslim/common/cached_reader.py
@@ -0,0 +1,57 @@
+# Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+import os
+import logging
+import numpy as np
+from .log_helper import get_logger
+
+__all__ = ['cached_reader']
+
+_logger = get_logger(__name__, level=logging.INFO)
+
+
+def cached_reader(reader, sampled_rate, cache_path, cached_id):
+ """
+ Sample partial data from reader and cache them into local file system.
+ Args:
+ reader: Iterative data source.
+ sampled_rate(float): The sampled rate used to sample partial data for evaluation. None means using all data in eval_reader. default: None.
+ cache_path(str): The path to cache the sampled data.
+ cached_id(int): The id of dataset sampled. Evaluations with same cached_id use the same sampled dataset. default: 0.
+ """
+ np.random.seed(cached_id)
+ cache_path = os.path.join(cache_path, str(cached_id))
+ _logger.debug('read data from: {}'.format(cache_path))
+
+ def s_reader():
+ if os.path.isdir(cache_path):
+ for file_name in open(os.path.join(cache_path, "list")):
+ yield np.load(
+ os.path.join(cache_path, file_name.strip()),
+ allow_pickle=True)
+ else:
+ os.makedirs(cache_path)
+ list_file = open(os.path.join(cache_path, "list"), 'w')
+ batch = 0
+ dtype = None
+ for data in reader():
+ if batch == 0 or (np.random.uniform() < sampled_rate):
+ np.save(
+ os.path.join(cache_path, 'batch' + str(batch)), data)
+ list_file.write('batch' + str(batch) + '.npy\n')
+ batch += 1
+ yield data
+
+ return s_reader
diff --git a/paddleslim/common/controller_client.py b/paddleslim/common/controller_client.py
index 5dcbd7bb64bf4460371d523a0f745e2490a7b3a0..8a8ebbde3d738438d3cca484ca9c824d853837b2 100644
--- a/paddleslim/common/controller_client.py
+++ b/paddleslim/common/controller_client.py
@@ -38,7 +38,7 @@ class ControllerClient(object):
self.socket_client = socket.socket(socket.AF_INET, socket.SOCK_STREAM)
self._key = key
- def update(self, tokens, reward):
+ def update(self, tokens, reward, iter):
"""
Update the controller according to latest tokens and reward.
Args:
@@ -48,11 +48,13 @@ class ControllerClient(object):
socket_client = socket.socket(socket.AF_INET, socket.SOCK_STREAM)
socket_client.connect((self.server_ip, self.server_port))
tokens = ",".join([str(token) for token in tokens])
- socket_client.send("{}\t{}\t{}".format(self._key, tokens, reward)
- .encode())
- tokens = socket_client.recv(1024).decode()
- tokens = [int(token) for token in tokens.strip("\n").split(",")]
- return tokens
+ socket_client.send("{}\t{}\t{}\t{}".format(self._key, tokens, reward,
+ iter).encode())
+ response = socket_client.recv(1024).decode()
+ if response.strip('\n').split("\t") == "ok":
+ return True
+ else:
+ return False
def next_tokens(self):
"""
diff --git a/paddleslim/common/controller_server.py b/paddleslim/common/controller_server.py
index 74b954db3bb1c4520551e82b5e8ba3b9514c549c..eb37fe914e99ced520b3ab8faf4227466e83ae3c 100644
--- a/paddleslim/common/controller_server.py
+++ b/paddleslim/common/controller_server.py
@@ -51,23 +51,8 @@ class ControllerServer(object):
self._port = address[1]
self._ip = address[0]
self._key = key
- self._socket_file = "./controller_server.socket"
def start(self):
- open(self._socket_file, 'a').close()
- socket_file = open(self._socket_file, 'r+')
- lock(socket_file)
- tid = socket_file.readline()
- if tid == '':
- _logger.info("start controller server...")
- tid = self._start()
- socket_file.write("tid: {}\nip: {}\nport: {}\n".format(
- tid, self._ip, self._port))
- _logger.info("started controller server...")
- unlock(socket_file)
- socket_file.close()
-
- def _start(self):
self._socket_server = socket.socket(socket.AF_INET, socket.SOCK_STREAM)
self._socket_server.bind(self._address)
self._socket_server.listen(self._max_client_num)
@@ -82,7 +67,6 @@ class ControllerServer(object):
def close(self):
"""Close the server."""
self._closed = True
- os.remove(self._socket_file)
_logger.info("server closed!")
def port(self):
@@ -109,20 +93,22 @@ class ControllerServer(object):
_logger.debug("recv message from {}: [{}]".format(addr,
message))
messages = message.strip('\n').split("\t")
- if (len(messages) < 3) or (messages[0] != self._key):
+ if (len(messages) < 4) or (messages[0] != self._key):
_logger.debug("recv noise from {}: [{}]".format(
addr, message))
continue
tokens = messages[1]
reward = messages[2]
+ iter = messages[3]
tokens = [int(token) for token in tokens.split(",")]
- self._controller.update(tokens, float(reward))
- tokens = self._controller.next_tokens()
- tokens = ",".join([str(token) for token in tokens])
- conn.send(tokens.encode())
+ self._controller.update(tokens, float(reward), int(iter))
+ response = "ok"
+ conn.send(response.encode())
_logger.debug("send message to {}: [{}]".format(addr,
tokens))
conn.close()
+ except Exception, err:
+ _logger.error(err)
finally:
self._socket_server.close()
self.close()
diff --git a/paddleslim/common/log_helper.py b/paddleslim/common/log_helper.py
index 1088761e0284181bc485f5ee1824e1cbd9c7eb81..18000ce4ec6c472914de49a053e960c02cfd8e32 100644
--- a/paddleslim/common/log_helper.py
+++ b/paddleslim/common/log_helper.py
@@ -19,7 +19,7 @@ import logging
__all__ = ['get_logger']
-def get_logger(name, level, fmt=None):
+def get_logger(name, level, fmt='%(asctime)s-%(levelname)s: %(message)s'):
"""
Get logger from logging with given name, level and format without
setting logging basicConfig. For setting basicConfig in paddle
@@ -39,10 +39,10 @@ def get_logger(name, level, fmt=None):
logger = logging.getLogger(name)
logger.setLevel(level)
handler = logging.StreamHandler()
-
if fmt:
formatter = logging.Formatter(fmt=fmt)
handler.setFormatter(formatter)
logger.addHandler(handler)
+ logger.propagate = 0
return logger
diff --git a/paddleslim/common/sa_controller.py b/paddleslim/common/sa_controller.py
index b619b818a3208d740c1ddb6753cf5931f3d058f5..f3dfa4b9c0101f068be7c925cdb0f738f7b00d4c 100644
--- a/paddleslim/common/sa_controller.py
+++ b/paddleslim/common/sa_controller.py
@@ -32,7 +32,7 @@ class SAController(EvolutionaryController):
range_table=None,
reduce_rate=0.85,
init_temperature=1024,
- max_iter_number=300,
+ max_try_times=None,
init_tokens=None,
constrain_func=None):
"""Initialize.
@@ -40,7 +40,7 @@ class SAController(EvolutionaryController):
range_table(list): Range table.
reduce_rate(float): The decay rate of temperature.
init_temperature(float): Init temperature.
- max_iter_number(int): max iteration number.
+ max_try_times(int): max try times before get legal tokens.
init_tokens(list): The initial tokens.
constrain_func(function): The callback function used to check whether the tokens meet constraint. None means there is no constraint. Default: None.
"""
@@ -50,7 +50,7 @@ class SAController(EvolutionaryController):
len(self._range_table) == 2)
self._reduce_rate = reduce_rate
self._init_temperature = init_temperature
- self._max_iter_number = max_iter_number
+ self._max_try_times = max_try_times
self._reward = -1
self._tokens = init_tokens
self._constrain_func = constrain_func
@@ -65,14 +65,16 @@ class SAController(EvolutionaryController):
d[key] = self.__dict__[key]
return d
- def update(self, tokens, reward):
+ def update(self, tokens, reward, iter):
"""
Update the controller according to latest tokens and reward.
Args:
tokens(list): The tokens generated in last step.
reward(float): The reward of tokens.
"""
- self._iter += 1
+ iter = int(iter)
+ if iter > self._iter:
+ self._iter = iter
temperature = self._init_temperature * self._reduce_rate**self._iter
if (reward > self._reward) or (np.random.random() <= math.exp(
(reward - self._reward) / temperature)):
@@ -96,12 +98,12 @@ class SAController(EvolutionaryController):
new_tokens = tokens[:]
index = int(len(self._range_table[0]) * np.random.random())
new_tokens[index] = np.random.randint(self._range_table[0][index],
- self._range_table[1][index] + 1)
+ self._range_table[1][index])
_logger.debug("change index[{}] from {} to {}".format(index, tokens[
index], new_tokens[index]))
- if self._constrain_func is None:
+ if self._constrain_func is None or self._max_try_times is None:
return new_tokens
- for _ in range(self._max_iter_number):
+ for _ in range(self._max_try_times):
if not self._constrain_func(new_tokens):
index = int(len(self._range_table[0]) * np.random.random())
new_tokens = tokens[:]
diff --git a/paddleslim/core/graph_wrapper.py b/paddleslim/core/graph_wrapper.py
index 72de894a2e4345c32e7a4eee2f35249b77c2f467..dc01846a10feb8bf212f9e35b9cd585df47ba739 100644
--- a/paddleslim/core/graph_wrapper.py
+++ b/paddleslim/core/graph_wrapper.py
@@ -54,6 +54,9 @@ class VarWrapper(object):
"""
return self._var.name
+ def __repr__(self):
+ return self._var.name
+
def shape(self):
"""
Get the shape of the varibale.
@@ -131,6 +134,11 @@ class OpWrapper(object):
"""
return self._op.type
+ def __repr__(self):
+ return "op[id: {}, type: {}; inputs: {}]".format(self.idx(),
+ self.type(),
+ self.all_inputs())
+
def is_bwd_op(self):
"""
Whether this operator is backward op.
diff --git a/paddleslim/dist/single_distiller.py b/paddleslim/dist/single_distiller.py
new file mode 100644
index 0000000000000000000000000000000000000000..70b843c90fec6bdf906045dbac3097f8dfba3ff1
--- /dev/null
+++ b/paddleslim/dist/single_distiller.py
@@ -0,0 +1,189 @@
+# Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License"
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+import numpy as np
+import paddle.fluid as fluid
+
+
+def merge(teacher_program,
+ student_program,
+ data_name_map,
+ place,
+ teacher_scope=fluid.global_scope(),
+ student_scope=fluid.global_scope(),
+ name_prefix='teacher_'):
+ """
+ Merge teacher program into student program and add a uniform prefix to the
+ names of all vars in teacher program
+ Args:
+ teacher_program(Program): The input teacher model paddle program
+ student_program(Program): The input student model paddle program
+ data_map_map(dict): Describe the mapping between the teacher var name
+ and the student var name
+ place(fluid.CPUPlace()|fluid.CUDAPlace(N)): This parameter represents
+ paddle run on which device.
+ student_scope(Scope): The input student scope
+ teacher_scope(Scope): The input teacher scope
+ name_prefix(str): Name prefix added for all vars of the teacher program.
+ Return(Program): Merged program.
+ """
+ teacher_program = teacher_program.clone(for_test=True)
+ for teacher_var in teacher_program.list_vars():
+ skip_rename = False
+ if teacher_var.name != 'fetch' and teacher_var.name != 'feed':
+ if teacher_var.name in data_name_map.keys():
+ new_name = data_name_map[teacher_var.name]
+ if new_name == teacher_var.name:
+ skip_rename = True
+ else:
+ new_name = name_prefix + teacher_var.name
+ if not skip_rename:
+ # scope var rename
+ scope_var = teacher_scope.var(teacher_var.name).get_tensor()
+ renamed_scope_var = teacher_scope.var(new_name).get_tensor()
+ renamed_scope_var.set(np.array(scope_var), place)
+
+ # program var rename
+ renamed_var = teacher_program.global_block()._rename_var(
+ teacher_var.name, new_name)
+
+ for teacher_var in teacher_program.list_vars():
+ if teacher_var.name != 'fetch' and teacher_var.name != 'feed':
+ # student scope add var
+ student_scope_var = student_scope.var(teacher_var.name).get_tensor()
+ teacher_scope_var = teacher_scope.var(teacher_var.name).get_tensor()
+ student_scope_var.set(np.array(teacher_scope_var), place)
+
+ # student program add var
+ new_var = student_program.global_block()._clone_variable(
+ teacher_var, force_persistable=False)
+ new_var.stop_gradient = True
+
+ for block in teacher_program.blocks:
+ for op in block.ops:
+ if op.type != 'feed' and op.type != 'fetch':
+ inputs = {}
+ outputs = {}
+ attrs = {}
+ for input_name in op.input_names:
+ inputs[input_name] = [
+ block.var(in_var_name)
+ for in_var_name in op.input(input_name)
+ ]
+
+ for output_name in op.output_names:
+ outputs[output_name] = [
+ block.var(out_var_name)
+ for out_var_name in op.output(output_name)
+ ]
+ for attr_name in op.attr_names:
+ attrs[attr_name] = op.attr(attr_name)
+ student_program.global_block().append_op(
+ type=op.type, inputs=inputs, outputs=outputs, attrs=attrs)
+ return student_program
+
+
+def fsp_loss(teacher_var1_name, teacher_var2_name, student_var1_name,
+ student_var2_name, program=fluid.default_main_program()):
+ """
+ Combine variables from student model and teacher model by fsp-loss.
+ Args:
+ teacher_var1_name(str): The name of teacher_var1.
+ teacher_var2_name(str): The name of teacher_var2. Except for the
+ second dimension, all other dimensions should
+ be consistent with teacher_var1.
+ student_var1_name(str): The name of student_var1.
+ student_var2_name(str): The name of student_var2. Except for the
+ second dimension, all other dimensions should
+ be consistent with student_var1.
+ program(Program): The input distiller program.
+ default: fluid.default_main_program()
+ Return(Variable): fsp distiller loss.
+ """
+ teacher_var1 = program.global_block().var(teacher_var1_name)
+ teacher_var2 = program.global_block().var(teacher_var2_name)
+ student_var1 = program.global_block().var(student_var1_name)
+ student_var2 = program.global_block().var(student_var2_name)
+ teacher_fsp_matrix = fluid.layers.fsp_matrix(teacher_var1, teacher_var2)
+ student_fsp_matrix = fluid.layers.fsp_matrix(student_var1, student_var2)
+ fsp_loss = fluid.layers.reduce_mean(
+ fluid.layers.square(student_fsp_matrix - teacher_fsp_matrix))
+ return fsp_loss
+
+
+def l2_loss(teacher_var_name, student_var_name,
+ program=fluid.default_main_program()):
+ """
+ Combine variables from student model and teacher model by l2-loss.
+ Args:
+ teacher_var_name(str): The name of teacher_var.
+ student_var_name(str): The name of student_var.
+ program(Program): The input distiller program.
+ default: fluid.default_main_program()
+ Return(Variable): l2 distiller loss.
+ """
+ student_var = program.global_block().var(student_var_name)
+ teacher_var = program.global_block().var(teacher_var_name)
+ l2_loss = fluid.layers.reduce_mean(
+ fluid.layers.square(student_var - teacher_var))
+ return l2_loss
+
+
+def soft_label_loss(teacher_var_name,
+ student_var_name,
+ program=fluid.default_main_program(),
+ teacher_temperature=1.,
+ student_temperature=1.):
+ """
+ Combine variables from student model and teacher model by soft-label-loss.
+ Args:
+ teacher_var_name(str): The name of teacher_var.
+ student_var_name(str): The name of student_var.
+ program(Program): The input distiller program.
+ default: fluid.default_main_program()
+ teacher_temperature(float): Temperature used to divide
+ teacher_feature_map before softmax. default: 1.0
+ student_temperature(float): Temperature used to divide
+ student_feature_map before softmax. default: 1.0
+ Return(Variable): l2 distiller loss.
+ """
+ student_var = program.global_block().var(student_var_name)
+ teacher_var = program.global_block().var(teacher_var_name)
+ student_var = fluid.layers.softmax(student_var / student_temperature)
+ teacher_var = fluid.layers.softmax(teacher_var / teacher_temperature)
+ teacher_var.stop_gradient = True
+ soft_label_loss = fluid.layers.reduce_mean(
+ fluid.layers.cross_entropy(
+ student_var, teacher_var, soft_label=True))
+ return soft_label_loss
+
+
+def loss(loss_func, program=fluid.default_main_program(), **kwargs):
+ """
+ Combine variables from student model and teacher model by self defined loss.
+ Args:
+ program(Program): The input distiller program.
+ default: fluid.default_main_program()
+ loss_func(function): The user self defined loss function.
+ Return(Variable): self defined distiller loss.
+ """
+ func_parameters = {}
+ for item in kwargs.items():
+ if isinstance(item[1], str):
+ func_parameters.setdefault(item[0],
+ program.global_block().var(item[1]))
+ else:
+ func_parameters.setdefault(item[0], item[1])
+ loss = loss_func(**func_parameters)
+ return loss
diff --git a/paddleslim/nas/sa_nas.py b/paddleslim/nas/sa_nas.py
index cfc747b0ec9f977dc4e41d2fb128b29823cfd3a3..00decbfd1ae38dfa3fedf3234665ca740674d603 100644
--- a/paddleslim/nas/sa_nas.py
+++ b/paddleslim/nas/sa_nas.py
@@ -15,6 +15,7 @@
import socket
import logging
import numpy as np
+import hashlib
import paddle.fluid as fluid
from ..core import VarWrapper, OpWrapper, GraphWrapper
from ..common import SAController
@@ -33,97 +34,75 @@ _logger = get_logger(__name__, level=logging.INFO)
class SANAS(object):
def __init__(self,
configs,
- max_flops=None,
- max_latency=None,
- server_addr=("", 0),
+ server_addr=("", 8881),
init_temperature=100,
reduce_rate=0.85,
- max_try_number=300,
- max_client_num=10,
search_steps=300,
key="sa_nas",
- is_server=True):
+ is_server=False):
"""
Search a group of ratios used to prune program.
Args:
configs(list): A list of search space configuration with format (key, input_size, output_size, block_num).
`key` is the name of search space with data type str. `input_size` and `output_size` are
input size and output size of searched sub-network. `block_num` is the number of blocks in searched network.
- max_flops(int): The max flops of searched network. None means no constrains. Default: None.
- max_latency(float): The max latency of searched network. None means no constrains. Default: None.
server_addr(tuple): A tuple of server ip and server port for controller server.
init_temperature(float): The init temperature used in simulated annealing search strategy.
reduce_rate(float): The decay rate used in simulated annealing search strategy.
- max_try_number(int): The max number of trying to generate legal tokens.
- max_client_num(int): The max number of connections of controller server.
search_steps(int): The steps of searching.
key(str): Identity used in communication between controller server and clients.
is_server(bool): Whether current host is controller server. Default: True.
"""
-
+ if not is_server:
+ assert server_addr[
+ 0] != "", "You should set the IP and port of server when is_server is False."
self._reduce_rate = reduce_rate
self._init_temperature = init_temperature
- self._max_try_number = max_try_number
self._is_server = is_server
- self._max_flops = max_flops
- self._max_latency = max_latency
-
self._configs = configs
-
- factory = SearchSpaceFactory()
- self._search_space = factory.get_search_space(configs)
- init_tokens = self._search_space.init_tokens()
- range_table = self._search_space.range_table()
- range_table = (len(range_table) * [0], range_table)
-
- print range_table
-
- controller = SAController(range_table, self._reduce_rate,
- self._init_temperature, self._max_try_number,
- init_tokens, self._constrain_func)
+ self._key = hashlib.md5(str(self._configs)).hexdigest()
server_ip, server_port = server_addr
if server_ip == None or server_ip == "":
server_ip = self._get_host_ip()
- self._controller_server = ControllerServer(
- controller=controller,
- address=(server_ip, server_port),
- max_client_num=max_client_num,
- search_steps=search_steps,
- key=key)
+ factory = SearchSpaceFactory()
+ self._search_space = factory.get_search_space(configs)
# create controller server
if self._is_server:
+ init_tokens = self._search_space.init_tokens()
+ range_table = self._search_space.range_table()
+ range_table = (len(range_table) * [0], range_table)
+ _logger.info("range table: {}".format(range_table))
+ controller = SAController(
+ range_table,
+ self._reduce_rate,
+ self._init_temperature,
+ max_try_times=None,
+ init_tokens=init_tokens,
+ constrain_func=None)
+
+ max_client_num = 100
+ self._controller_server = ControllerServer(
+ controller=controller,
+ address=(server_ip, server_port),
+ max_client_num=max_client_num,
+ search_steps=search_steps,
+ key=self._key)
self._controller_server.start()
+ server_port = self._controller_server.port()
self._controller_client = ControllerClient(
- self._controller_server.ip(),
- self._controller_server.port(),
- key=key)
+ server_ip, server_port, key=self._key)
self._iter = 0
def _get_host_ip(self):
return socket.gethostbyname(socket.gethostname())
- def _constrain_func(self, tokens):
- if (self._max_flops is None) and (self._max_latency is None):
- return True
- archs = self._search_space.token2arch(tokens)
- main_program = fluid.Program()
- startup_program = fluid.Program()
- with fluid.program_guard(main_program, startup_program):
- i = 0
- for config, arch in zip(self._configs, archs):
- input_size = config[1]["input_size"]
- input = fluid.data(
- name="data_{}".format(i),
- shape=[None, 3, input_size, input_size],
- dtype="float32")
- output = arch(input)
- i += 1
- return flops(main_program) < self._max_flops
+ def tokens2arch(self, tokens):
+ return self._search_space.token2arch(self.tokens)
def next_archs(self):
"""
@@ -140,6 +119,9 @@ class SANAS(object):
Return reward of current searched network.
Args:
score(float): The score of current searched network.
+ Returns:
+ bool: True means updating successfully while false means failure.
"""
- self._controller_client.update(self._current_tokens, score)
self._iter += 1
+ return self._controller_client.update(self._current_tokens, score,
+ self._iter)
diff --git a/paddleslim/nas/search_space/combine_search_space.py b/paddleslim/nas/search_space/combine_search_space.py
index 667720a9110aa92e096a4f8fa30bb3e4b3e3cecb..17ebbd3939798ad0e2a7d3fd763bb9427f6e13f0 100644
--- a/paddleslim/nas/search_space/combine_search_space.py
+++ b/paddleslim/nas/search_space/combine_search_space.py
@@ -39,6 +39,7 @@ class CombineSearchSpace(object):
for config_list in config_lists:
key, config = config_list
self.spaces.append(self._get_single_search_space(key, config))
+ self.init_tokens()
def _get_single_search_space(self, key, config):
"""
@@ -51,9 +52,11 @@ class CombineSearchSpace(object):
model space(class)
"""
cls = SEARCHSPACE.get(key)
- space = cls(config['input_size'], config['output_size'],
- config['block_num'], config['block_mask'])
-
+ block_mask = config['block_mask'] if 'block_mask' in config else None
+ space = cls(config['input_size'],
+ config['output_size'],
+ config['block_num'],
+ block_mask=block_mask)
return space
def init_tokens(self):
diff --git a/paddleslim/nas/search_space/mobilenetv1.py b/paddleslim/nas/search_space/mobilenetv1.py
index 8b3277d2cb1b472ccd5e27407e3099b28e64f42b..3976d21df1e3ad2c5ac344dab59ad32adeaedb79 100644
--- a/paddleslim/nas/search_space/mobilenetv1.py
+++ b/paddleslim/nas/search_space/mobilenetv1.py
@@ -32,10 +32,12 @@ class MobileNetV1Space(SearchSpaceBase):
input_size,
output_size,
block_num,
+ block_mask,
scale=1.0,
class_dim=1000):
super(MobileNetV1Space, self).__init__(input_size, output_size,
- block_num)
+ block_num, block_mask)
+ assert self.block_mask == None, 'MobileNetV1Space will use origin MobileNetV1 as seach space, so use input_size, output_size and block_num to search'
self.scale = scale
self.class_dim = class_dim
# self.head_num means the channel of first convolution
diff --git a/paddleslim/nas/search_space/mobilenetv2.py b/paddleslim/nas/search_space/mobilenetv2.py
index e974a676a70546e19aa4649679393031634e7822..36231912715a29808d55158881ab3e918260f8b5 100644
--- a/paddleslim/nas/search_space/mobilenetv2.py
+++ b/paddleslim/nas/search_space/mobilenetv2.py
@@ -113,40 +113,69 @@ class MobileNetV2Space(SearchSpaceBase):
if tokens is None:
tokens = self.init_tokens()
- print(tokens)
- bottleneck_params_list = []
+ self.bottleneck_params_list = []
if self.block_num >= 1:
- bottleneck_params_list.append(
+ self.bottleneck_params_list.append(
(1, self.head_num[tokens[0]], 1, 1, 3))
if self.block_num >= 2:
- bottleneck_params_list.append(
+ self.bottleneck_params_list.append(
(self.multiply[tokens[1]], self.filter_num1[tokens[2]],
self.repeat[tokens[3]], 2, self.k_size[tokens[4]]))
if self.block_num >= 3:
- bottleneck_params_list.append(
+ self.bottleneck_params_list.append(
(self.multiply[tokens[5]], self.filter_num1[tokens[6]],
self.repeat[tokens[7]], 2, self.k_size[tokens[8]]))
if self.block_num >= 4:
- bottleneck_params_list.append(
+ self.bottleneck_params_list.append(
(self.multiply[tokens[9]], self.filter_num2[tokens[10]],
self.repeat[tokens[11]], 2, self.k_size[tokens[12]]))
if self.block_num >= 5:
- bottleneck_params_list.append(
+ self.bottleneck_params_list.append(
(self.multiply[tokens[13]], self.filter_num3[tokens[14]],
self.repeat[tokens[15]], 2, self.k_size[tokens[16]]))
- bottleneck_params_list.append(
+ self.bottleneck_params_list.append(
(self.multiply[tokens[17]], self.filter_num4[tokens[18]],
self.repeat[tokens[19]], 1, self.k_size[tokens[20]]))
if self.block_num >= 6:
- bottleneck_params_list.append(
+ self.bottleneck_params_list.append(
(self.multiply[tokens[21]], self.filter_num5[tokens[22]],
self.repeat[tokens[23]], 2, self.k_size[tokens[24]]))
- bottleneck_params_list.append(
+ self.bottleneck_params_list.append(
(self.multiply[tokens[25]], self.filter_num6[tokens[26]],
self.repeat[tokens[27]], 1, self.k_size[tokens[28]]))
- def net_arch(input):
+ def _modify_bottle_params(output_stride=None):
+ if output_stride is not None and output_stride % 2 != 0:
+ raise Exception("output stride must to be even number")
+ if output_stride is None:
+ return
+ else:
+ stride = 2
+ for i, layer_setting in enumerate(self.bottleneck_params_list):
+ t, c, n, s, ks = layer_setting
+ stride = stride * s
+ if stride > output_stride:
+ s = 1
+ self.bottleneck_params_list[i] = (t, c, n, s, ks)
+
+ def net_arch(input,
+ end_points=None,
+ decode_points=None,
+ output_stride=None):
+ _modify_bottle_params(output_stride)
+
+ decode_ends = dict()
+
+ def check_points(count, points):
+ if points is None:
+ return False
+ else:
+ if isinstance(points, list):
+ return (True if count in points else False)
+ else:
+ return (True if count == points else False)
+
#conv1
# all padding is 'SAME' in the conv2d, can compute the actual padding automatic.
input = conv_bn_layer(
@@ -157,14 +186,21 @@ class MobileNetV2Space(SearchSpaceBase):
padding='SAME',
act='relu6',
name='mobilenetv2_conv1_1')
+ layer_count = 1
+ if check_points(layer_count, decode_points):
+ decode_ends[layer_count] = input
+
+ if check_points(layer_count, end_points):
+ return input, decode_ends
# bottleneck sequences
i = 1
in_c = int(32 * self.scale)
- for layer_setting in bottleneck_params_list:
+ for layer_setting in self.bottleneck_params_list:
t, c, n, s, k = layer_setting
i += 1
- input = self._invresi_blocks(
+ #print(input)
+ input, depthwise_output = self._invresi_blocks(
input=input,
in_c=in_c,
t=t,
@@ -174,6 +210,33 @@ class MobileNetV2Space(SearchSpaceBase):
k=k,
name='mobilenetv2_conv' + str(i))
in_c = int(c * self.scale)
+ layer_count += 1
+
+ ### decode_points and end_points means block num
+ if check_points(layer_count, decode_points):
+ decode_ends[layer_count] = depthwise_output
+
+ if check_points(layer_count, end_points):
+ return input, decode_ends
+
+ # last conv
+ input = conv_bn_layer(
+ input=input,
+ num_filters=int(1280 * self.scale)
+ if self.scale > 1.0 else 1280,
+ filter_size=1,
+ stride=1,
+ padding='SAME',
+ act='relu6',
+ name='mobilenetv2_conv' + str(i + 1))
+
+ input = fluid.layers.pool2d(
+ input=input,
+ pool_size=7,
+ pool_stride=1,
+ pool_type='avg',
+ global_pooling=True,
+ name='mobilenetv2_last_pool')
# if output_size is 1, add fc layer in the end
if self.output_size == 1:
@@ -248,6 +311,8 @@ class MobileNetV2Space(SearchSpaceBase):
name=name + '_dwise',
use_cudnn=False)
+ depthwise_output = bottleneck_conv
+
linear_out = conv_bn_layer(
input=bottleneck_conv,
num_filters=num_filters,
@@ -260,7 +325,7 @@ class MobileNetV2Space(SearchSpaceBase):
out = linear_out
if ifshortcut:
out = self._shortcut(input=input, data_residual=out)
- return out
+ return out, depthwise_output
def _invresi_blocks(self, input, in_c, t, c, n, s, k, name=None):
"""Build inverted residual blocks.
@@ -276,7 +341,7 @@ class MobileNetV2Space(SearchSpaceBase):
Returns:
Variable, layers output.
"""
- first_block = self._inverted_residual_unit(
+ first_block, depthwise_output = self._inverted_residual_unit(
input=input,
num_in_filter=in_c,
num_filters=c,
@@ -290,7 +355,7 @@ class MobileNetV2Space(SearchSpaceBase):
last_c = c
for i in range(1, n):
- last_residual_block = self._inverted_residual_unit(
+ last_residual_block, depthwise_output = self._inverted_residual_unit(
input=last_residual_block,
num_in_filter=last_c,
num_filters=c,
@@ -299,4 +364,4 @@ class MobileNetV2Space(SearchSpaceBase):
filter_size=k,
expansion_factor=t,
name=name + '_' + str(i + 1))
- return last_residual_block
+ return last_residual_block, depthwise_output
diff --git a/paddleslim/nas/search_space/search_space_base.py b/paddleslim/nas/search_space/search_space_base.py
index 4f6e89bbae8c2c7974c5cc560742f4e074ad5756..537991542b57ff036948dc62a39a456a04e370ef 100644
--- a/paddleslim/nas/search_space/search_space_base.py
+++ b/paddleslim/nas/search_space/search_space_base.py
@@ -19,7 +19,9 @@ class SearchSpaceBase(object):
"""Controller for Neural Architecture Search.
"""
- def __init__(self, input_size, output_size, block_num, block_mask, *argss):
+ def __init__(self, input_size, output_size, block_num, block_mask, *args):
+ """init model config
+ """
self.input_size = input_size
self.output_size = output_size
self.block_num = block_num
diff --git a/paddleslim/prune/__init__.py b/paddleslim/prune/__init__.py
index bb615b9dfca03ed2b289f902f6d75c73543f6fb2..b012254170d4d63bf24fcccaf8fa5f3eaeccac11 100644
--- a/paddleslim/prune/__init__.py
+++ b/paddleslim/prune/__init__.py
@@ -19,9 +19,15 @@ import controller_server
from controller_server import *
import controller_client
from controller_client import *
+import sensitive_pruner
+from sensitive_pruner import *
+import sensitive
+from sensitive import *
__all__ = []
__all__ += pruner.__all__
__all__ += auto_pruner.__all__
__all__ += controller_server.__all__
__all__ += controller_client.__all__
+__all__ += sensitive_pruner.__all__
+__all__ += sensitive.__all__
diff --git a/paddleslim/prune/auto_pruner.py b/paddleslim/prune/auto_pruner.py
index 5dbdb6d4aa064fc6d5534f0ea02fefe19e580899..8420d0c1b5d6ca1d0401ba249ebfa980037907d0 100644
--- a/paddleslim/prune/auto_pruner.py
+++ b/paddleslim/prune/auto_pruner.py
@@ -42,7 +42,7 @@ class AutoPruner(object):
server_addr=("", 0),
init_temperature=100,
reduce_rate=0.85,
- max_try_number=300,
+ max_try_times=300,
max_client_num=10,
search_steps=300,
max_ratios=[0.9],
@@ -66,7 +66,7 @@ class AutoPruner(object):
server_addr(tuple): A tuple of server ip and server port for controller server.
init_temperature(float): The init temperature used in simulated annealing search strategy.
reduce_rate(float): The decay rate used in simulated annealing search strategy.
- max_try_number(int): The max number of trying to generate legal tokens.
+ max_try_times(int): The max number of trying to generate legal tokens.
max_client_num(int): The max number of connections of controller server.
search_steps(int): The steps of searching.
max_ratios(float|list): Max ratios used to pruned parameters in `params`. List means max ratios for each parameter in `params`.
@@ -88,7 +88,7 @@ class AutoPruner(object):
self._pruned_latency = pruned_latency
self._reduce_rate = reduce_rate
self._init_temperature = init_temperature
- self._max_try_number = max_try_number
+ self._max_try_times = max_try_times
self._is_server = is_server
self._range_table = self._get_range_table(min_ratios, max_ratios)
@@ -96,8 +96,10 @@ class AutoPruner(object):
self._pruner = Pruner()
if self._pruned_flops:
self._base_flops = flops(program)
- _logger.info("AutoPruner - base flops: {};".format(
- self._base_flops))
+ self._max_flops = self._base_flops * (1 - self._pruned_flops)
+ _logger.info(
+ "AutoPruner - base flops: {}; pruned_flops: {}; max_flops: {}".
+ format(self._base_flops, self._pruned_flops, self._max_flops))
if self._pruned_latency:
self._base_latency = latency(program)
@@ -106,9 +108,9 @@ class AutoPruner(object):
self, _program, self._params, self._pruned_flops,
self._pruned_latency)
init_tokens = self._ratios2tokens(self._init_ratios)
-
+ _logger.info("range table: {}".format(self._range_table))
controller = SAController(self._range_table, self._reduce_rate,
- self._init_temperature, self._max_try_number,
+ self._init_temperature, self._max_try_times,
init_tokens, self._constrain_func)
server_ip, server_port = server_addr
@@ -143,10 +145,10 @@ class AutoPruner(object):
def _get_range_table(self, min_ratios, max_ratios):
assert isinstance(min_ratios, list) or isinstance(min_ratios, float)
assert isinstance(max_ratios, list) or isinstance(max_ratios, float)
- min_ratios = min_ratios if isinstance(min_ratios,
- list) else [min_ratios]
- max_ratios = max_ratios if isinstance(max_ratios,
- list) else [max_ratios]
+ min_ratios = min_ratios if isinstance(
+ min_ratios, list) else [min_ratios] * len(self._params)
+ max_ratios = max_ratios if isinstance(
+ max_ratios, list) else [max_ratios] * len(self._params)
min_tokens = self._ratios2tokens(min_ratios)
max_tokens = self._ratios2tokens(max_ratios)
return (min_tokens, max_tokens)
@@ -160,10 +162,17 @@ class AutoPruner(object):
ratios,
place=self._place,
only_graph=True)
- return flops(pruned_program) < self._base_flops * (
- 1 - self._pruned_flops)
-
- def prune(self, program):
+ current_flops = flops(pruned_program)
+ result = current_flops < self._max_flops
+ if not result:
+ _logger.info("Failed try ratios: {}; flops: {}; max_flops: {}".
+ format(ratios, current_flops, self._max_flops))
+ else:
+ _logger.info("Success try ratios: {}; flops: {}; max_flops: {}".
+ format(ratios, current_flops, self._max_flops))
+ return result
+
+ def prune(self, program, eval_program=None):
"""
Prune program with latest tokens generated by controller.
Args:
@@ -178,10 +187,21 @@ class AutoPruner(object):
self._params,
self._current_ratios,
place=self._place,
+ only_graph=False,
param_backup=self._param_backup)
+ pruned_val_program = None
+ if eval_program is not None:
+ pruned_val_program = self._pruner.prune(
+ program,
+ self._scope,
+ self._params,
+ self._current_ratios,
+ place=self._place,
+ only_graph=True)
+
_logger.info("AutoPruner - pruned ratios: {}".format(
self._current_ratios))
- return pruned_program
+ return pruned_program, pruned_val_program
def reward(self, score):
"""
@@ -192,7 +212,7 @@ class AutoPruner(object):
self._restore(self._scope)
self._param_backup = {}
tokens = self._ratios2tokens(self._current_ratios)
- self._controller_client.update(tokens, score)
+ self._controller_client.update(tokens, score, self._iter)
self._iter += 1
def _restore(self, scope):
diff --git a/paddleslim/prune/pruner.py b/paddleslim/prune/pruner.py
index cd79f5b286bbb34d1d688ce515691fdfc7e8f730..e2b6a7e1d28078abef97c5fa53b215b098f18cca 100644
--- a/paddleslim/prune/pruner.py
+++ b/paddleslim/prune/pruner.py
@@ -12,13 +12,17 @@
# See the License for the specific language governing permissions and
# limitations under the License.
+import logging
import numpy as np
import paddle.fluid as fluid
import copy
from ..core import VarWrapper, OpWrapper, GraphWrapper
+from ..common import get_logger
__all__ = ["Pruner"]
+_logger = get_logger(__name__, level=logging.INFO)
+
class Pruner():
def __init__(self, criterion="l1_norm"):
@@ -69,6 +73,10 @@ class Pruner():
only_graph=only_graph,
param_backup=param_backup,
param_shape_backup=param_shape_backup)
+ for op in graph.ops():
+ if op.type() == 'depthwise_conv2d' or op.type(
+ ) == 'depthwise_conv2d_grad':
+ op.set_attr('groups', op.inputs('Filter')[0].shape()[0])
return graph.program
def _prune_filters_by_ratio(self,
@@ -94,27 +102,49 @@ class Pruner():
"""
if params[0].name() in self.pruned_list[0]:
return
- param_t = scope.find_var(params[0].name()).get_tensor()
- pruned_idx = self._cal_pruned_idx(
- params[0].name(), np.array(param_t), ratio, axis=0)
- for param in params:
- assert isinstance(param, VarWrapper)
- param_t = scope.find_var(param.name()).get_tensor()
- if param_backup is not None and (param.name() not in param_backup):
- param_backup[param.name()] = copy.deepcopy(np.array(param_t))
- pruned_param = self._prune_tensor(
- np.array(param_t), pruned_idx, pruned_axis=0, lazy=lazy)
- if not only_graph:
+
+ if only_graph:
+ pruned_num = int(round(params[0].shape()[0] * ratio))
+ for param in params:
+ ori_shape = param.shape()
+ if param_backup is not None and (
+ param.name() not in param_backup):
+ param_backup[param.name()] = copy.deepcopy(ori_shape)
+ new_shape = list(ori_shape)
+ new_shape[0] -= pruned_num
+ param.set_shape(new_shape)
+ _logger.debug("prune [{}] from {} to {}".format(param.name(
+ ), ori_shape, new_shape))
+ self.pruned_list[0].append(param.name())
+ return range(pruned_num)
+
+ else:
+
+ param_t = scope.find_var(params[0].name()).get_tensor()
+ pruned_idx = self._cal_pruned_idx(
+ params[0].name(), np.array(param_t), ratio, axis=0)
+ for param in params:
+ assert isinstance(param, VarWrapper)
+ param_t = scope.find_var(param.name()).get_tensor()
+ if param_backup is not None and (
+ param.name() not in param_backup):
+ param_backup[param.name()] = copy.deepcopy(
+ np.array(param_t))
+ pruned_param = self._prune_tensor(
+ np.array(param_t), pruned_idx, pruned_axis=0, lazy=lazy)
param_t.set(pruned_param, place)
- ori_shape = param.shape()
- if param_shape_backup is not None and (
- param.name() not in param_shape_backup):
- param_shape_backup[param.name()] = copy.deepcopy(param.shape())
- new_shape = list(param.shape())
- new_shape[0] = pruned_param.shape[0]
- param.set_shape(new_shape)
- self.pruned_list[0].append(param.name())
- return pruned_idx
+ ori_shape = param.shape()
+ if param_shape_backup is not None and (
+ param.name() not in param_shape_backup):
+ param_shape_backup[param.name()] = copy.deepcopy(
+ param.shape())
+ new_shape = list(param.shape())
+ new_shape[0] = pruned_param.shape[0]
+ param.set_shape(new_shape)
+ _logger.debug("prune [{}] from {} to {}".format(param.name(
+ ), ori_shape, new_shape))
+ self.pruned_list[0].append(param.name())
+ return pruned_idx
def _prune_parameter_by_idx(self,
scope,
@@ -141,24 +171,44 @@ class Pruner():
"""
if params[0].name() in self.pruned_list[pruned_axis]:
return
- for param in params:
- assert isinstance(param, VarWrapper)
- param_t = scope.find_var(param.name()).get_tensor()
- if param_backup is not None and (param.name() not in param_backup):
- param_backup[param.name()] = copy.deepcopy(np.array(param_t))
- pruned_param = self._prune_tensor(
- np.array(param_t), pruned_idx, pruned_axis, lazy=lazy)
- if not only_graph:
+
+ if only_graph:
+ pruned_num = len(pruned_idx)
+ for param in params:
+ ori_shape = param.shape()
+ if param_backup is not None and (
+ param.name() not in param_backup):
+ param_backup[param.name()] = copy.deepcopy(ori_shape)
+ new_shape = list(ori_shape)
+ new_shape[pruned_axis] -= pruned_num
+ param.set_shape(new_shape)
+ _logger.debug("prune [{}] from {} to {}".format(param.name(
+ ), ori_shape, new_shape))
+ self.pruned_list[pruned_axis].append(param.name())
+
+ else:
+ for param in params:
+ assert isinstance(param, VarWrapper)
+ param_t = scope.find_var(param.name()).get_tensor()
+ if param_backup is not None and (
+ param.name() not in param_backup):
+ param_backup[param.name()] = copy.deepcopy(
+ np.array(param_t))
+ pruned_param = self._prune_tensor(
+ np.array(param_t), pruned_idx, pruned_axis, lazy=lazy)
param_t.set(pruned_param, place)
- ori_shape = param.shape()
+ ori_shape = param.shape()
- if param_shape_backup is not None and (
- param.name() not in param_shape_backup):
- param_shape_backup[param.name()] = copy.deepcopy(param.shape())
- new_shape = list(param.shape())
- new_shape[pruned_axis] = pruned_param.shape[pruned_axis]
- param.set_shape(new_shape)
- self.pruned_list[pruned_axis].append(param.name())
+ if param_shape_backup is not None and (
+ param.name() not in param_shape_backup):
+ param_shape_backup[param.name()] = copy.deepcopy(
+ param.shape())
+ new_shape = list(param.shape())
+ new_shape[pruned_axis] = pruned_param.shape[pruned_axis]
+ param.set_shape(new_shape)
+ _logger.debug("prune [{}] from {} to {}".format(param.name(
+ ), ori_shape, new_shape))
+ self.pruned_list[pruned_axis].append(param.name())
def _forward_search_related_op(self, graph, param):
"""
@@ -478,19 +528,24 @@ class Pruner():
Returns:
list: A list of operators.
"""
+ _logger.debug("######################search: {}######################".
+ format(op_node))
visited = [op_node.idx()]
stack = []
brothers = []
for op in graph.next_ops(op_node):
- if (op.type() != 'conv2d') and (op.type() != 'fc') and (
- not op.is_bwd_op()):
+ if ("conv2d" not in op.type()) and (op.type() != 'fc') and (
+ not op.is_bwd_op()) and (not op.is_opt_op()):
stack.append(op)
visited.append(op.idx())
while len(stack) > 0:
top_op = stack.pop()
for parent in graph.pre_ops(top_op):
- if parent.idx() not in visited and (not parent.is_bwd_op()):
- if ((parent.type() == 'conv2d') or
+ if parent.idx() not in visited and (
+ not parent.is_bwd_op()) and (not parent.is_opt_op()):
+ _logger.debug("----------go back from {} to {}----------".
+ format(top_op, parent))
+ if (('conv2d' in parent.type()) or
(parent.type() == 'fc')):
brothers.append(parent)
else:
@@ -498,11 +553,16 @@ class Pruner():
visited.append(parent.idx())
for child in graph.next_ops(top_op):
- if (child.type() != 'conv2d') and (child.type() != 'fc') and (
+ if ('conv2d' not in child.type()
+ ) and (child.type() != 'fc') and (
child.idx() not in visited) and (
- not child.is_bwd_op()):
+ not child.is_bwd_op()) and (not child.is_opt_op()):
stack.append(child)
visited.append(child.idx())
+ _logger.debug("brothers: {}".format(brothers))
+ _logger.debug(
+ "######################Finish search######################".format(
+ op_node))
return brothers
def _cal_pruned_idx(self, name, param, ratio, axis):
diff --git a/paddleslim/prune/sensitive.py b/paddleslim/prune/sensitive.py
new file mode 100644
index 0000000000000000000000000000000000000000..8dd2f88b7de9eba62df447c78ef629a32111cd09
--- /dev/null
+++ b/paddleslim/prune/sensitive.py
@@ -0,0 +1,199 @@
+# Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License"
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+import sys
+import os
+import logging
+import pickle
+import numpy as np
+import paddle.fluid as fluid
+from ..core import GraphWrapper
+from ..common import get_logger
+from ..analysis import flops
+from ..prune import Pruner
+
+_logger = get_logger(__name__, level=logging.INFO)
+
+__all__ = ["sensitivity", "flops_sensitivity"]
+
+
+def sensitivity(program,
+ place,
+ param_names,
+ eval_func,
+ sensitivities_file=None,
+ step_size=0.2,
+ max_pruned_times=None):
+ scope = fluid.global_scope()
+ graph = GraphWrapper(program)
+ sensitivities = _load_sensitivities(sensitivities_file)
+
+ for name in param_names:
+ if name not in sensitivities:
+ size = graph.var(name).shape()[0]
+ sensitivities[name] = {
+ 'pruned_percent': [],
+ 'loss': [],
+ 'size': size
+ }
+ baseline = None
+ for name in sensitivities:
+ ratio = step_size
+ pruned_times = 0
+ while ratio < 1:
+ if max_pruned_times is not None and pruned_times >= max_pruned_times:
+ break
+ ratio = round(ratio, 2)
+ if ratio in sensitivities[name]['pruned_percent']:
+ _logger.debug('{}, {} has computed.'.format(name, ratio))
+ ratio += step_size
+ pruned_times += 1
+ continue
+ if baseline is None:
+ baseline = eval_func(graph.program)
+
+ param_backup = {}
+ pruner = Pruner()
+ _logger.info("sensitive - param: {}; ratios: {}".format(name,
+ ratio))
+ pruned_program = pruner.prune(
+ program=graph.program,
+ scope=scope,
+ params=[name],
+ ratios=[ratio],
+ place=place,
+ lazy=True,
+ only_graph=False,
+ param_backup=param_backup)
+ pruned_metric = eval_func(pruned_program)
+ loss = (baseline - pruned_metric) / baseline
+ _logger.info("pruned param: {}; {}; loss={}".format(name, ratio,
+ loss))
+ sensitivities[name]['pruned_percent'].append(ratio)
+ sensitivities[name]['loss'].append(loss)
+ _save_sensitivities(sensitivities, sensitivities_file)
+
+ # restore pruned parameters
+ for param_name in param_backup.keys():
+ param_t = scope.find_var(param_name).get_tensor()
+ param_t.set(param_backup[param_name], place)
+ ratio += step_size
+ pruned_times += 1
+ return sensitivities
+
+
+def flops_sensitivity(program,
+ place,
+ param_names,
+ eval_func,
+ sensitivities_file=None,
+ pruned_flops_rate=0.1):
+
+ assert (1.0 / len(param_names) > pruned_flops_rate)
+
+ scope = fluid.global_scope()
+ graph = GraphWrapper(program)
+ sensitivities = _load_sensitivities(sensitivities_file)
+
+ for name in param_names:
+ if name not in sensitivities:
+ size = graph.var(name).shape()[0]
+ sensitivities[name] = {
+ 'pruned_percent': [],
+ 'loss': [],
+ 'size': size
+ }
+ base_flops = flops(program)
+ target_pruned_flops = base_flops * pruned_flops_rate
+
+ pruner = Pruner()
+ baseline = None
+ for name in sensitivities:
+
+ pruned_program = pruner.prune(
+ program=graph.program,
+ scope=None,
+ params=[name],
+ ratios=[0.5],
+ place=None,
+ lazy=False,
+ only_graph=True)
+ param_flops = (base_flops - flops(pruned_program)) * 2
+ channel_size = sensitivities[name]["size"]
+ pruned_ratio = target_pruned_flops / float(param_flops)
+ pruned_size = round(pruned_ratio * channel_size)
+ pruned_ratio = 1 if pruned_size >= channel_size else pruned_ratio
+
+ if len(sensitivities[name]["pruned_percent"]) > 0:
+ _logger.debug('{} exist; pruned ratio: {}; excepted ratio: {}'.
+ format(name, sensitivities[name]["pruned_percent"][
+ 0], pruned_ratio))
+ continue
+ if baseline is None:
+ baseline = eval_func(graph.program)
+ param_backup = {}
+ pruner = Pruner()
+ _logger.info("sensitive - param: {}; ratios: {}".format(name,
+ pruned_ratio))
+ loss = 1
+ if pruned_ratio < 1:
+ pruned_program = pruner.prune(
+ program=graph.program,
+ scope=scope,
+ params=[name],
+ ratios=[pruned_ratio],
+ place=place,
+ lazy=True,
+ only_graph=False,
+ param_backup=param_backup)
+ pruned_metric = eval_func(pruned_program)
+ loss = (baseline - pruned_metric) / baseline
+ _logger.info("pruned param: {}; {}; loss={}".format(name, pruned_ratio,
+ loss))
+ sensitivities[name]['pruned_percent'].append(pruned_ratio)
+ sensitivities[name]['loss'].append(loss)
+ _save_sensitivities(sensitivities, sensitivities_file)
+
+ # restore pruned parameters
+ for param_name in param_backup.keys():
+ param_t = scope.find_var(param_name).get_tensor()
+ param_t.set(param_backup[param_name], place)
+ return sensitivities
+
+
+def _load_sensitivities(sensitivities_file):
+ """
+ Load sensitivities from file.
+ """
+ sensitivities = {}
+ if sensitivities_file and os.path.exists(sensitivities_file):
+ with open(sensitivities_file, 'rb') as f:
+ if sys.version_info < (3, 0):
+ sensitivities = pickle.load(f)
+ else:
+ sensitivities = pickle.load(f, encoding='bytes')
+
+ for param in sensitivities:
+ sensitivities[param]['pruned_percent'] = [
+ round(p, 2) for p in sensitivities[param]['pruned_percent']
+ ]
+ return sensitivities
+
+
+def _save_sensitivities(sensitivities, sensitivities_file):
+ """
+ Save sensitivities into file.
+ """
+ with open(sensitivities_file, 'wb') as f:
+ pickle.dump(sensitivities, f)
diff --git a/paddleslim/prune/sensitive_pruner.py b/paddleslim/prune/sensitive_pruner.py
new file mode 100644
index 0000000000000000000000000000000000000000..823b9264108055cac8604d8a351497b94591fcb4
--- /dev/null
+++ b/paddleslim/prune/sensitive_pruner.py
@@ -0,0 +1,257 @@
+# Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License"
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+import os
+import logging
+import copy
+from scipy.optimize import leastsq
+import numpy as np
+import paddle.fluid as fluid
+from ..common import get_logger
+from .sensitive import sensitivity
+from .sensitive import flops_sensitivity
+from ..analysis import flops
+from .pruner import Pruner
+
+__all__ = ["SensitivePruner"]
+
+_logger = get_logger(__name__, level=logging.INFO)
+
+
+class SensitivePruner(object):
+ def __init__(self, place, eval_func, scope=None, checkpoints=None):
+ """
+ Pruner used to prune parameters iteratively according to sensitivities of parameters in each step.
+ Args:
+ place(fluid.CUDAPlace | fluid.CPUPlace): The device place where program execute.
+ eval_func(function): A callback function used to evaluate pruned program. The argument of this function is pruned program. And it return a score of given program.
+ scope(fluid.scope): The scope used to execute program.
+ """
+ self._eval_func = eval_func
+ self._iter = 0
+ self._place = place
+ self._scope = fluid.global_scope() if scope is None else scope
+ self._pruner = Pruner()
+ self._checkpoints = checkpoints
+
+ def save_checkpoint(self, train_program, eval_program):
+ checkpoint = os.path.join(self._checkpoints, str(self._iter - 1))
+ exe = fluid.Executor(self._place)
+ fluid.io.save_persistables(
+ exe, checkpoint, main_program=train_program, filename="__params__")
+
+ with open(checkpoint + "/main_program", "wb") as f:
+ f.write(train_program.desc.serialize_to_string())
+ with open(checkpoint + "/eval_program", "wb") as f:
+ f.write(eval_program.desc.serialize_to_string())
+
+ def restore(self, checkpoints=None):
+
+ exe = fluid.Executor(self._place)
+ checkpoints = self._checkpoints if checkpoints is None else checkpoints
+ print("check points: {}".format(checkpoints))
+ main_program = None
+ eval_program = None
+ if checkpoints is not None:
+ cks = [dir for dir in os.listdir(checkpoints)]
+ if len(cks) > 0:
+ latest = max([int(ck) for ck in cks])
+ latest_ck_path = os.path.join(checkpoints, str(latest))
+ self._iter += 1
+
+ with open(latest_ck_path + "/main_program", "rb") as f:
+ program_desc_str = f.read()
+ main_program = fluid.Program.parse_from_string(
+ program_desc_str)
+
+ with open(latest_ck_path + "/eval_program", "rb") as f:
+ program_desc_str = f.read()
+ eval_program = fluid.Program.parse_from_string(
+ program_desc_str)
+
+ with fluid.scope_guard(self._scope):
+ fluid.io.load_persistables(exe, latest_ck_path,
+ main_program, "__params__")
+ print("load checkpoint from: {}".format(latest_ck_path))
+ print("flops of eval program: {}".format(flops(eval_program)))
+ return main_program, eval_program, self._iter
+
+ def greedy_prune(self,
+ train_program,
+ eval_program,
+ params,
+ pruned_flops_rate,
+ topk=1):
+
+ sensitivities_file = "greedy_sensitivities_iter{}.data".format(
+ self._iter)
+ with fluid.scope_guard(self._scope):
+ sensitivities = flops_sensitivity(
+ eval_program,
+ self._place,
+ params,
+ self._eval_func,
+ sensitivities_file=sensitivities_file,
+ pruned_flops_rate=pruned_flops_rate)
+ print sensitivities
+ params, ratios = self._greedy_ratio_by_sensitive(sensitivities, topk)
+
+ _logger.info("Pruning: {} by {}".format(params, ratios))
+ pruned_program = self._pruner.prune(
+ train_program,
+ self._scope,
+ params,
+ ratios,
+ place=self._place,
+ only_graph=False)
+ pruned_val_program = None
+ if eval_program is not None:
+ pruned_val_program = self._pruner.prune(
+ eval_program,
+ self._scope,
+ params,
+ ratios,
+ place=self._place,
+ only_graph=True)
+ self._iter += 1
+ return pruned_program, pruned_val_program
+
+ def prune(self, train_program, eval_program, params, pruned_flops):
+ """
+ Pruning parameters of training and evaluation network by sensitivities in current step.
+ Args:
+ train_program(fluid.Program): The training program to be pruned.
+ eval_program(fluid.Program): The evaluation program to be pruned. And it is also used to calculate sensitivities of parameters.
+ params(list): The parameters to be pruned.
+ pruned_flops(float): The ratio of FLOPS to be pruned in current step.
+ Return:
+ tuple: A tuple of pruned training program and pruned evaluation program.
+ """
+ _logger.info("Pruning: {}".format(params))
+ sensitivities_file = "sensitivities_iter{}.data".format(self._iter)
+ with fluid.scope_guard(self._scope):
+ sensitivities = sensitivity(
+ eval_program,
+ self._place,
+ params,
+ self._eval_func,
+ sensitivities_file=sensitivities_file,
+ step_size=0.1)
+ print sensitivities
+ _, ratios = self._get_ratios_by_sensitive(sensitivities, pruned_flops,
+ eval_program)
+
+ pruned_program = self._pruner.prune(
+ train_program,
+ self._scope,
+ params,
+ ratios,
+ place=self._place,
+ only_graph=False)
+ pruned_val_program = None
+ if eval_program is not None:
+ pruned_val_program = self._pruner.prune(
+ eval_program,
+ self._scope,
+ params,
+ ratios,
+ place=self._place,
+ only_graph=True)
+ self._iter += 1
+ return pruned_program, pruned_val_program
+
+ def _greedy_ratio_by_sensitive(self, sensitivities, topk=1):
+ losses = {}
+ percents = {}
+ for param in sensitivities:
+ losses[param] = sensitivities[param]['loss'][0]
+ percents[param] = sensitivities[param]['pruned_percent'][0]
+ topk_parms = sorted(losses, key=losses.__getitem__)[:topk]
+ topk_percents = [percents[param] for param in topk_parms]
+ return topk_parms, topk_percents
+
+ def _get_ratios_by_sensitive(self, sensitivities, pruned_flops,
+ eval_program):
+ """
+ Search a group of ratios for pruning target flops.
+ """
+
+ def func(params, x):
+ a, b, c, d = params
+ return a * x * x * x + b * x * x + c * x + d
+
+ def error(params, x, y):
+ return func(params, x) - y
+
+ def slove_coefficient(x, y):
+ init_coefficient = [10, 10, 10, 10]
+ coefficient, loss = leastsq(error, init_coefficient, args=(x, y))
+ return coefficient
+
+ min_loss = 0.
+ max_loss = 0.
+
+ # step 1: fit curve by sensitivities
+ coefficients = {}
+ for param in sensitivities:
+ losses = np.array([0] * 5 + sensitivities[param]['loss'])
+ precents = np.array([0] * 5 + sensitivities[param][
+ 'pruned_percent'])
+ coefficients[param] = slove_coefficient(precents, losses)
+ loss = np.max(losses)
+ max_loss = np.max([max_loss, loss])
+
+ # step 2: Find a group of ratios by binary searching.
+ base_flops = flops(eval_program)
+ ratios = []
+ max_times = 20
+ while min_loss < max_loss and max_times > 0:
+ loss = (max_loss + min_loss) / 2
+ _logger.info(
+ '-----------Try pruned ratios while acc loss={}-----------'.
+ format(loss))
+ ratios = []
+ # step 2.1: Get ratios according to current loss
+ for param in sensitivities:
+ coefficient = copy.deepcopy(coefficients[param])
+ coefficient[-1] = coefficient[-1] - loss
+ roots = np.roots(coefficient)
+ for root in roots:
+ min_root = 1
+ if np.isreal(root) and root > 0 and root < 1:
+ selected_root = min(root.real, min_root)
+ ratios.append(selected_root)
+ _logger.info('Pruned ratios={}'.format(
+ [round(ratio, 3) for ratio in ratios]))
+ # step 2.2: Pruning by current ratios
+ param_shape_backup = {}
+ pruned_program = self._pruner.prune(
+ eval_program,
+ None, # scope
+ sensitivities.keys(),
+ ratios,
+ None, # place
+ only_graph=True)
+ pruned_ratio = 1 - (float(flops(pruned_program)) / base_flops)
+ _logger.info('Pruned flops: {:.4f}'.format(pruned_ratio))
+
+ # step 2.3: Check whether current ratios is enough
+ if abs(pruned_ratio - pruned_flops) < 0.015:
+ break
+ if pruned_ratio > pruned_flops:
+ max_loss = loss
+ else:
+ min_loss = loss
+ max_times -= 1
+ return sensitivities.keys(), ratios
diff --git a/paddleslim/quant/quanter.py b/paddleslim/quant/quanter.py
old mode 100644
new mode 100755
index 0db22772d712951ed895f2d2e897142d6ce3c377..254cf4958643ef5e4d4e6cd625028baef964e222
--- a/paddleslim/quant/quanter.py
+++ b/paddleslim/quant/quanter.py
@@ -20,11 +20,20 @@ from paddle.fluid.contrib.slim.quantization import QuantizationTransformPass
from paddle.fluid.contrib.slim.quantization import QuantizationFreezePass
from paddle.fluid.contrib.slim.quantization import ConvertToInt8Pass
from paddle.fluid.contrib.slim.quantization import TransformForMobilePass
+from paddle.fluid.contrib.slim.quantization import PostTrainingQuantization
+from paddle.fluid.contrib.slim.quantization import AddQuantDequantPass
from paddle.fluid import core
-WEIGHT_QUANTIZATION_TYPES=['abs_max', 'channel_wise_abs_max', 'range_abs_max', 'moving_average_abs_max']
-ACTIVATION_QUANTIZATION_TYPES=['abs_max','range_abs_max', 'moving_average_abs_max']
+WEIGHT_QUANTIZATION_TYPES = [
+ 'abs_max', 'channel_wise_abs_max', 'range_abs_max',
+ 'moving_average_abs_max'
+]
+ACTIVATION_QUANTIZATION_TYPES = [
+ 'abs_max', 'range_abs_max', 'moving_average_abs_max'
+]
VALID_DTYPES = ['int8']
+TRANSFORM_PASS_OP_TYPES = ['conv2d', 'depthwise_conv2d', 'mul']
+QUANT_DEQUANT_PASS_OP_TYPES = ['elementwise_add', 'pool2d']
_quant_config_default = {
# weight quantize type, default is 'abs_max'
@@ -38,7 +47,8 @@ _quant_config_default = {
# ops of name_scope in not_quant_pattern list, will not be quantized
'not_quant_pattern': ['skip_quant'],
# ops of type in quantize_op_types, will be quantized
- 'quantize_op_types': ['conv2d', 'depthwise_conv2d', 'mul'],
+ 'quantize_op_types':
+ ['conv2d', 'depthwise_conv2d', 'mul', 'elementwise_add', 'pool2d'],
# data type after quantization, such as 'uint8', 'int8', etc. default is 'int8'
'dtype': 'int8',
# window size for 'range_abs_max' quantization. defaulf is 10000
@@ -88,6 +98,12 @@ def _parse_configs(user_config):
assert isinstance(configs['quantize_op_types'], list), \
"quantize_op_types must be a list"
+ for op_type in configs['quantize_op_types']:
+ assert (op_type in QUANT_DEQUANT_PASS_OP_TYPES) or (
+ op_type in TRANSFORM_PASS_OP_TYPES), "{} is not support, \
+ now support op types are {}".format(
+ op_type, TRANSFORM_PASS_OP_TYPES + QUANT_DEQUANT_PASS_OP_TYPES)
+
assert isinstance(configs['dtype'], str), \
"dtype must be a str."
@@ -132,19 +148,37 @@ def quant_aware(program, place, config, scope=None, for_test=False):
config = _parse_configs(config)
main_graph = IrGraph(core.Graph(program.desc), for_test=for_test)
- transform_pass = QuantizationTransformPass(
- scope=scope,
- place=place,
- weight_bits=config['weight_bits'],
- activation_bits=config['activation_bits'],
- activation_quantize_type=config['activation_quantize_type'],
- weight_quantize_type=config['weight_quantize_type'],
- window_size=config['window_size'],
- moving_rate=config['moving_rate'],
- quantizable_op_type=config['quantize_op_types'],
- skip_pattern=config['not_quant_pattern'])
-
- transform_pass.apply(main_graph)
+ transform_pass_ops = []
+ quant_dequant_ops = []
+ for op_type in config['quantize_op_types']:
+ if op_type in TRANSFORM_PASS_OP_TYPES:
+ transform_pass_ops.append(op_type)
+ elif op_type in QUANT_DEQUANT_PASS_OP_TYPES:
+ quant_dequant_ops.append(op_type)
+ if len(transform_pass_ops) > 0:
+ transform_pass = QuantizationTransformPass(
+ scope=scope,
+ place=place,
+ weight_bits=config['weight_bits'],
+ activation_bits=config['activation_bits'],
+ activation_quantize_type=config['activation_quantize_type'],
+ weight_quantize_type=config['weight_quantize_type'],
+ window_size=config['window_size'],
+ moving_rate=config['moving_rate'],
+ quantizable_op_type=transform_pass_ops,
+ skip_pattern=config['not_quant_pattern'])
+
+ transform_pass.apply(main_graph)
+
+ if len(quant_dequant_ops) > 0:
+ quant_dequant_pass = AddQuantDequantPass(
+ scope=scope,
+ place=place,
+ moving_rate=config['moving_rate'],
+ quant_bits=config['activation_bits'],
+ skip_pattern=config['not_quant_pattern'],
+ quantizable_op_type=quant_dequant_ops)
+ quant_dequant_pass.apply(main_graph)
if for_test:
quant_program = main_graph.to_program()
@@ -153,22 +187,71 @@ def quant_aware(program, place, config, scope=None, for_test=False):
return quant_program
-def quant_post(program, place, config, scope=None):
+def quant_post(executor,
+ model_dir,
+ quantize_model_path,
+ sample_generator,
+ model_filename=None,
+ params_filename=None,
+ batch_size=16,
+ batch_nums=None,
+ scope=None,
+ algo='KL',
+ quantizable_op_type=["conv2d", "depthwise_conv2d", "mul"]):
"""
- add quantization ops in program. the program returned is not trainable.
+ The function utilizes post training quantization method to quantize the
+ fp32 model. It uses calibrate data to calculate the scale factor of
+ quantized variables, and inserts fake quant/dequant op to obtain the
+ quantized model.
+
Args:
- program(fluid.Program): program
- scope(fluid.Scope): the scope to store var, it's should be the value of program's scope, usually it's fluid.global_scope().
- place(fluid.CPUPlace or fluid.CUDAPlace): place
- config(dict): configs for quantization, default values are in quant_config_default dict.
- for_test: is for test program.
- Return:
- fluid.Program: the quantization program is not trainable.
+ executor(fluid.Executor): The executor to load, run and save the
+ quantized model.
+ model_dir(str): The path of fp32 model that will be quantized, and
+ the model and params that saved by fluid.io.save_inference_model
+ are under the path.
+ quantize_model_path(str): The path to save quantized model using api
+ fluid.io.save_inference_model.
+ sample_generator(Python Generator): The sample generator provides
+ calibrate data for DataLoader, and it only returns a sample every time.
+ model_filename(str, optional): The name of model file. If parameters
+ are saved in separate files, set it as 'None'. Default is 'None'.
+ params_filename(str, optional): The name of params file.
+ When all parameters are saved in a single file, set it
+ as filename. If parameters are saved in separate files,
+ set it as 'None'. Default is 'None'.
+ batch_size(int, optional): The batch size of DataLoader, default is 16.
+ batch_nums(int, optional): If batch_nums is not None, the number of calibrate
+ data is 'batch_size*batch_nums'. If batch_nums is None, use all data
+ generated by sample_generator as calibrate data.
+ scope(fluid.Scope, optional): The scope to run program, use it to load
+ and save variables. If scope is None, will use fluid.global_scope().
+ algo(str, optional): If algo=KL, use KL-divergenc method to
+ get the more precise scale factor. If algo='direct', use
+ abs_max method to get the scale factor. Default is 'KL'.
+ quantizable_op_type(list[str], optional): The list of op types
+ that will be quantized. Default is ["conv2d", "depthwise_conv2d",
+ "mul"].
+ Returns:
+ None
"""
- pass
+ post_training_quantization = PostTrainingQuantization(
+ executor=executor,
+ sample_generator=sample_generator,
+ model_dir=model_dir,
+ model_filename=model_filename,
+ params_filename=params_filename,
+ batch_size=batch_size,
+ batch_nums=batch_nums,
+ scope=scope,
+ algo=algo,
+ quantizable_op_type=quantizable_op_type,
+ is_full_quantize=False)
+ post_training_quantization.quantize()
+ post_training_quantization.save_quantized_model(quantize_model_path)
-def convert(program, scope, place, config, save_int8=False):
+def convert(program, place, config, scope=None, save_int8=False):
"""
add quantization ops in program. the program returned is not trainable.
Args:
@@ -183,7 +266,7 @@ def convert(program, scope, place, config, save_int8=False):
fluid.Program: freezed int8 program which can be used for inference.
if save_int8 is False, this value is None.
"""
-
+ scope = fluid.global_scope() if not scope else scope
test_graph = IrGraph(core.Graph(program.desc), for_test=True)
# Freeze the graph after training by adjusting the quantize
diff --git a/paddleslim/search/__init__.py b/paddleslim/search/__init__.py
deleted file mode 100644
index 4f3182c3058cb33e46777ab1424242b42406a603..0000000000000000000000000000000000000000
--- a/paddleslim/search/__init__.py
+++ /dev/null
@@ -1,14 +0,0 @@
-# Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserved.
-#
-# Licensed under the Apache License, Version 2.0 (the "License"
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-# http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-"""Controllers and controller server"""
diff --git a/tests/test_prune.py b/tests/test_prune.py
index 93609367351618ce375f164a1dca284e85369e4c..3fdaa867e350af876648871f83fe70cc83b548b6 100644
--- a/tests/test_prune.py
+++ b/tests/test_prune.py
@@ -15,7 +15,7 @@ import sys
sys.path.append("../")
import unittest
import paddle.fluid as fluid
-from prune import Pruner
+from paddleslim.prune import Pruner
from layers import conv_bn_layer
diff --git a/tests/test_sa_nas.py b/tests/test_sa_nas.py
index c1bcd08dadf87e24f31af1a525f67aa9a92bd26e..a4203a85a898632ac2102eb61ab7dd7b475e73ef 100644
--- a/tests/test_sa_nas.py
+++ b/tests/test_sa_nas.py
@@ -41,7 +41,10 @@ class TestSANAS(unittest.TestCase):
search_steps = 3
sa_nas = SANAS(
- configs, max_flops=base_flops, search_steps=search_steps)
+ configs,
+ search_steps=search_steps,
+ server_addr=("", 0),
+ is_server=True)
for i in range(search_steps):
archs = sa_nas.next_archs()
diff --git a/tests/test_sensitivity.py b/tests/test_sensitivity.py
new file mode 100644
index 0000000000000000000000000000000000000000..e2cfa01d889db2891fd7507b2d4d9aec018a1163
--- /dev/null
+++ b/tests/test_sensitivity.py
@@ -0,0 +1,69 @@
+# Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License"
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+import sys
+sys.path.append("../")
+import unittest
+import numpy
+import paddle
+import paddle.fluid as fluid
+from paddleslim.analysis import sensitivity
+from layers import conv_bn_layer
+
+
+class TestSensitivity(unittest.TestCase):
+ def test_sensitivity(self):
+ main_program = fluid.Program()
+ startup_program = fluid.Program()
+ with fluid.program_guard(main_program, startup_program):
+ input = fluid.data(name="image", shape=[None, 1, 28, 28])
+ label = fluid.data(name="label", shape=[None, 1], dtype="int64")
+ conv1 = conv_bn_layer(input, 8, 3, "conv1")
+ conv2 = conv_bn_layer(conv1, 8, 3, "conv2")
+ sum1 = conv1 + conv2
+ conv3 = conv_bn_layer(sum1, 8, 3, "conv3")
+ conv4 = conv_bn_layer(conv3, 8, 3, "conv4")
+ sum2 = conv4 + sum1
+ conv5 = conv_bn_layer(sum2, 8, 3, "conv5")
+ conv6 = conv_bn_layer(conv5, 8, 3, "conv6")
+ out = fluid.layers.fc(conv6, size=10, act='softmax')
+ acc_top1 = fluid.layers.accuracy(input=out, label=label, k=1)
+ eval_program = main_program.clone(for_test=True)
+
+ place = fluid.CUDAPlace(0)
+ exe = fluid.Executor(place)
+ exe.run(startup_program)
+
+ val_reader = paddle.batch(paddle.dataset.mnist.test(), batch_size=128)
+
+ def eval_func(program, scope):
+ feeder = fluid.DataFeeder(
+ feed_list=['image', 'label'], place=place, program=program)
+ acc_set = []
+ for data in val_reader():
+ acc_np = exe.run(program=program,
+ scope=scope,
+ feed=feeder.feed(data),
+ fetch_list=[acc_top1])
+ acc_set.append(float(acc_np[0]))
+ acc_val_mean = numpy.array(acc_set).mean()
+ print("acc_val_mean: {}".format(acc_val_mean))
+ return acc_val_mean
+
+ sensitivity(eval_program,
+ fluid.global_scope(), place, ["conv4_weights"], eval_func,
+ "./sensitivities_file")
+
+
+if __name__ == '__main__':
+ unittest.main()