sanas_darts_space.ipynb
10.7 KB
In [19]:
import paddle import paddle.fluid as fluid from paddleslim.nas import SANAS import numpy as np BATCH_SIZE=96 SERVER_ADDRESS = "" PORT = 8377 SEARCH_STEPS = 300 RETAIN_EPOCH=30 MAX_PARAMS=3.77 IMAGE_SHAPE=[3, 32, 32] AUXILIARY = True AUXILIARY_WEIGHT= 0.4 TRAINSET_NUM = 50000 LR = 0.025 MOMENTUM = 0.9 WEIGHT_DECAY = 0.0003 DROP_PATH_PROBILITY = 0.2
In [2]:
config = [('DartsSpace')] sa_nas = SANAS(config, server_addr=(SERVER_ADDRESS, PORT), search_steps=SEARCH_STEPS, is_server=True)
Out [2]:
2020-02-23 12:28:09,752-INFO: range table: ([0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0], [14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14]) 2020-02-23 12:28:09,754-INFO: ControllerServer - listen on: [127.0.0.1:8377] 2020-02-23 12:28:09,756-INFO: Controller Server run...
In [3]:
def count_parameters_in_MB(all_params, prefix='model'): parameters_number = 0 for param in all_params: if param.name.startswith( prefix) and param.trainable and 'aux' not in param.name: parameters_number += np.prod(param.shape) return parameters_number / 1e6
In [4]:
def create_data_loader(IMAGE_SHAPE, is_train): image = fluid.data( name="image", shape=[None] + IMAGE_SHAPE, dtype="float32") label = fluid.data(name="label", shape=[None, 1], dtype="int64") data_loader = fluid.io.DataLoader.from_generator( feed_list=[image, label], capacity=64, use_double_buffer=True, iterable=True) drop_path_prob = '' drop_path_mask = '' if is_train: drop_path_prob = fluid.data( name="drop_path_prob", shape=[BATCH_SIZE, 1], dtype="float32") drop_path_mask = fluid.data( name="drop_path_mask", shape=[BATCH_SIZE, 20, 4, 2], dtype="float32") return data_loader, image, label, drop_path_prob, drop_path_mask
In [5]:
def build_program(main_program, startup_program, IMAGE_SHAPE, archs, is_train): with fluid.program_guard(main_program, startup_program): data_loader, data, label, drop_path_prob, drop_path_mask = create_data_loader( IMAGE_SHAPE, is_train) logits, logits_aux = archs(data, drop_path_prob, drop_path_mask, is_train, 10) top1 = fluid.layers.accuracy(input=logits, label=label, k=1) top5 = fluid.layers.accuracy(input=logits, label=label, k=5) loss = fluid.layers.reduce_mean( fluid.layers.softmax_with_cross_entropy(logits, label)) if is_train: if AUXILIARY: loss_aux = fluid.layers.reduce_mean( fluid.layers.softmax_with_cross_entropy(logits_aux, label)) loss = loss + AUXILIARY_WEIGHT * loss_aux step_per_epoch = int(TRAINSET_NUM / BATCH_SIZE) learning_rate = fluid.layers.cosine_decay(LR, step_per_epoch, RETAIN_EPOCH) fluid.clip.set_gradient_clip( clip=fluid.clip.GradientClipByGlobalNorm(clip_norm=5.0)) optimizer = fluid.optimizer.MomentumOptimizer( learning_rate, MOMENTUM, regularization=fluid.regularizer.L2DecayRegularizer( WEIGHT_DECAY)) optimizer.minimize(loss) outs = [loss, top1, top5, learning_rate] else: outs = [loss, top1, top5] return outs, data_loader
In [27]:
def train(main_prog, exe, epoch_id, train_loader, fetch_list): loss = [] top1 = [] top5 = [] for step_id, data in enumerate(train_loader()): devices_num = len(data) if DROP_PATH_PROBILITY > 0: feed = [] for device_id in range(devices_num): image = data[device_id]['image'] label = data[device_id]['label'] drop_path_prob = np.array( [[DROP_PATH_PROBILITY * epoch_id / RETAIN_EPOCH] for i in range(BATCH_SIZE)]).astype(np.float32) drop_path_mask = 1 - np.random.binomial( 1, drop_path_prob[0], size=[BATCH_SIZE, 20, 4, 2]).astype(np.float32) feed.append({ "image": image, "label": label, "drop_path_prob": drop_path_prob, "drop_path_mask": drop_path_mask }) else: feed = data loss_v, top1_v, top5_v, lr = exe.run( main_prog, feed=feed, fetch_list=[v.name for v in fetch_list]) loss.append(loss_v) top1.append(top1_v) top5.append(top5_v) if step_id % 10 == 0: print( "Train Epoch {}, Step {}, Lr {:.8f}, loss {:.6f}, acc_1 {:.6f}, acc_5 {:.6f}". format(epoch_id, step_id, lr[0], np.mean(loss), np.mean(top1), np.mean(top5))) return np.mean(top1)
In [23]:
def valid(main_prog, exe, epoch_id, valid_loader, fetch_list): loss = [] top1 = [] top5 = [] for step_id, data in enumerate(valid_loader()): loss_v, top1_v, top5_v = exe.run( main_prog, feed=data, fetch_list=[v.name for v in fetch_list]) loss.append(loss_v) top1.append(top1_v) top5.append(top5_v) if step_id % 10 == 0: print( "Valid Epoch {}, Step {}, loss {:.6f}, acc_1 {:.6f}, acc_5 {:.6f}". format(epoch_id, step_id, np.mean(loss), np.mean(top1), np.mean(top5))) return np.mean(top1)
In [8]:
archs = sa_nas.next_archs()[0]
Out [8]:
2020-02-23 12:28:57,462-INFO: current tokens: [5, 5, 5, 5, 5, 12, 7, 7, 7, 7, 7, 7, 7, 10, 10, 10, 10, 10, 10, 10]
In [9]:
train_program = fluid.Program() test_program = fluid.Program() startup_program = fluid.Program() train_fetch_list, train_loader = build_program(train_program, startup_program, IMAGE_SHAPE, archs, is_train=True) test_fetch_list, test_loader = build_program(test_program, startup_program, IMAGE_SHAPE, archs, is_train=False) test_program = test_program.clone(for_test=True)
In [10]:
place = fluid.CPUPlace() exe = fluid.Executor(place) exe.run(startup_program)
Out [10]:
[]
In [17]:
train_reader = paddle.fluid.io.batch(paddle.reader.shuffle(paddle.dataset.cifar.train10(cycle=False), buf_size=1024), batch_size=BATCH_SIZE, drop_last=True) test_reader = paddle.fluid.io.batch(paddle.dataset.cifar.test10(cycle=False), batch_size=BATCH_SIZE, drop_last=False) train_loader.set_sample_list_generator(train_reader, places=place) test_loader.set_sample_list_generator(test_reader, places=place)
Out [17]:
<paddle.fluid.reader.GeneratorLoader at 0x7fddc8fe7cd0>
for epoch_id in range(RETAIN_EPOCH): train_top1 = train(train_program, exe, epoch_id, train_loader, train_fetch_list) print("TRAIN: Epoch {}, train_acc {:.6f}".format(epoch_id, train_top1)) valid_top1 = valid(test_program, exe, epoch_id, test_loader, test_fetch_list) print("TEST: Epoch {}, valid_acc {:.6f}".format(epoch_id, valid_top1)) valid_top1_list.append(valid_top1)
Train Epoch 0, Step 0, Lr 0.02500000, loss 3.310467, acc_1 0.062500, acc_5 0.468750