sanas_darts_space.ipynb 10.7 KB
Notebook
In [19]:
import paddle
import paddle.fluid as fluid
from paddleslim.nas import SANAS
import numpy as np

BATCH_SIZE=96
SERVER_ADDRESS = ""
PORT = 8377
SEARCH_STEPS = 300
RETAIN_EPOCH=30
MAX_PARAMS=3.77
IMAGE_SHAPE=[3, 32, 32]
AUXILIARY = True
AUXILIARY_WEIGHT= 0.4
TRAINSET_NUM = 50000
LR = 0.025
MOMENTUM = 0.9
WEIGHT_DECAY = 0.0003
DROP_PATH_PROBILITY = 0.2
In [2]:
config = [('DartsSpace')]
sa_nas = SANAS(config, server_addr=(SERVER_ADDRESS, PORT), search_steps=SEARCH_STEPS, is_server=True)
Out [2]:
2020-02-23 12:28:09,752-INFO: range table: ([0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0], [14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14])
2020-02-23 12:28:09,754-INFO: ControllerServer - listen on: [127.0.0.1:8377]
2020-02-23 12:28:09,756-INFO: Controller Server run...
In [3]:
def count_parameters_in_MB(all_params, prefix='model'):
    parameters_number = 0
    for param in all_params:
        if param.name.startswith(
                prefix) and param.trainable and 'aux' not in param.name:
            parameters_number += np.prod(param.shape)
    return parameters_number / 1e6
In [4]:
def create_data_loader(IMAGE_SHAPE, is_train):
    image = fluid.data(
        name="image", shape=[None] + IMAGE_SHAPE, dtype="float32")
    label = fluid.data(name="label", shape=[None, 1], dtype="int64")
    data_loader = fluid.io.DataLoader.from_generator(
        feed_list=[image, label],
        capacity=64,
        use_double_buffer=True,
        iterable=True)
    drop_path_prob = ''
    drop_path_mask = ''
    if is_train:
        drop_path_prob = fluid.data(
            name="drop_path_prob", shape=[BATCH_SIZE, 1], dtype="float32")
        drop_path_mask = fluid.data(
            name="drop_path_mask",
            shape=[BATCH_SIZE, 20, 4, 2],
            dtype="float32")

    return data_loader, image, label, drop_path_prob, drop_path_mask
In [5]:
def build_program(main_program, startup_program, IMAGE_SHAPE, archs, is_train):
    with fluid.program_guard(main_program, startup_program):
        data_loader, data, label, drop_path_prob, drop_path_mask = create_data_loader(
            IMAGE_SHAPE, is_train)
        logits, logits_aux = archs(data, drop_path_prob, drop_path_mask,
                                   is_train, 10)
        top1 = fluid.layers.accuracy(input=logits, label=label, k=1)
        top5 = fluid.layers.accuracy(input=logits, label=label, k=5)
        loss = fluid.layers.reduce_mean(
            fluid.layers.softmax_with_cross_entropy(logits, label))

        if is_train:
            if AUXILIARY:
                loss_aux = fluid.layers.reduce_mean(
                    fluid.layers.softmax_with_cross_entropy(logits_aux, label))
                loss = loss + AUXILIARY_WEIGHT * loss_aux
            step_per_epoch = int(TRAINSET_NUM / BATCH_SIZE)
            learning_rate = fluid.layers.cosine_decay(LR, step_per_epoch, RETAIN_EPOCH)
            fluid.clip.set_gradient_clip(
                clip=fluid.clip.GradientClipByGlobalNorm(clip_norm=5.0))
            optimizer = fluid.optimizer.MomentumOptimizer(
                learning_rate,
                MOMENTUM,
                regularization=fluid.regularizer.L2DecayRegularizer(
                    WEIGHT_DECAY))
            optimizer.minimize(loss)
            outs = [loss, top1, top5, learning_rate]
        else:
            outs = [loss, top1, top5]
    return outs, data_loader
In [27]:
def train(main_prog, exe, epoch_id, train_loader, fetch_list):
    loss = []
    top1 = []
    top5 = []
    for step_id, data in enumerate(train_loader()):
        devices_num = len(data)
        if DROP_PATH_PROBILITY > 0:
            feed = []
            for device_id in range(devices_num):
                image = data[device_id]['image']
                label = data[device_id]['label']
                drop_path_prob = np.array(
                    [[DROP_PATH_PROBILITY * epoch_id / RETAIN_EPOCH]
                     for i in range(BATCH_SIZE)]).astype(np.float32)
                drop_path_mask = 1 - np.random.binomial(
                    1, drop_path_prob[0],
                    size=[BATCH_SIZE, 20, 4, 2]).astype(np.float32)
                feed.append({
                    "image": image,
                    "label": label,
                    "drop_path_prob": drop_path_prob,
                    "drop_path_mask": drop_path_mask
                })
        else:
            feed = data
        loss_v, top1_v, top5_v, lr = exe.run(
            main_prog, feed=feed, fetch_list=[v.name for v in fetch_list])
        loss.append(loss_v)
        top1.append(top1_v)
        top5.append(top5_v)
        if step_id % 10 == 0:
            print(
                "Train Epoch {}, Step {}, Lr {:.8f}, loss {:.6f}, acc_1 {:.6f}, acc_5 {:.6f}".
                format(epoch_id, step_id, lr[0], np.mean(loss), np.mean(top1), np.mean(top5)))
    return np.mean(top1)
In [23]:
def valid(main_prog, exe, epoch_id, valid_loader, fetch_list):
    loss = []
    top1 = []
    top5 = []
    for step_id, data in enumerate(valid_loader()):
        loss_v, top1_v, top5_v = exe.run(
            main_prog, feed=data, fetch_list=[v.name for v in fetch_list])
        loss.append(loss_v)
        top1.append(top1_v)
        top5.append(top5_v)
        if step_id % 10 == 0:
            print(
                "Valid Epoch {}, Step {}, loss {:.6f}, acc_1 {:.6f}, acc_5 {:.6f}".
                format(epoch_id, step_id, np.mean(loss), np.mean(top1), np.mean(top5)))
    return np.mean(top1)
In [8]:
archs = sa_nas.next_archs()[0]
Out [8]:
2020-02-23 12:28:57,462-INFO: current tokens: [5, 5, 5, 5, 5, 12, 7, 7, 7, 7, 7, 7, 7, 10, 10, 10, 10, 10, 10, 10]
In [9]:
train_program = fluid.Program()
test_program = fluid.Program()
startup_program = fluid.Program()
train_fetch_list, train_loader = build_program(train_program, startup_program, IMAGE_SHAPE, archs, is_train=True)
test_fetch_list, test_loader = build_program(test_program, startup_program, IMAGE_SHAPE, archs, is_train=False)
test_program = test_program.clone(for_test=True)
In [10]:
place = fluid.CPUPlace()
exe = fluid.Executor(place)
exe.run(startup_program)
Out [10]:
[]
In [17]:
train_reader = paddle.fluid.io.batch(paddle.reader.shuffle(paddle.dataset.cifar.train10(cycle=False), buf_size=1024), batch_size=BATCH_SIZE, drop_last=True)
test_reader = paddle.fluid.io.batch(paddle.dataset.cifar.test10(cycle=False), batch_size=BATCH_SIZE, drop_last=False)
train_loader.set_sample_list_generator(train_reader, places=place)
test_loader.set_sample_list_generator(test_reader, places=place)
Out [17]:
<paddle.fluid.reader.GeneratorLoader at 0x7fddc8fe7cd0>
for epoch_id in range(RETAIN_EPOCH):
    train_top1 = train(train_program, exe, epoch_id, train_loader, train_fetch_list)
    print("TRAIN: Epoch {}, train_acc {:.6f}".format(epoch_id, train_top1))
    valid_top1 = valid(test_program, exe, epoch_id, test_loader, test_fetch_list)
    print("TEST: Epoch {}, valid_acc {:.6f}".format(epoch_id, valid_top1))
    valid_top1_list.append(valid_top1)
Train Epoch 0, Step 0, Lr 0.02500000, loss 3.310467, acc_1 0.062500, acc_5 0.468750