From f2073a070de300fd9b62c1863069420527a4a30e Mon Sep 17 00:00:00 2001 From: ceci3 <ceci3@users.noreply.github.com> Date: Tue, 5 Jan 2021 21:10:08 +0800 Subject: [PATCH] [Cherry pick] migrate nas demo to paddle 2.0 (#589) * update * fix --- demo/nas/block_sa_nas_mobilenetv2.py | 129 +++++----- ...image_classification_nas_quick_start.ipynb | 163 ------------ demo/nas/parl_nas_mobilenetv2.py | 115 +++++---- demo/nas/rl_nas_mobilenetv2.py | 115 +++++---- demo/nas/sa_nas_mobilenetv2.py | 175 +++++++------ demo/nas/sanas_darts_space.py | 114 ++++---- demo/optimizer.py | 243 ++---------------- 7 files changed, 333 insertions(+), 721 deletions(-) delete mode 100644 demo/nas/image_classification_nas_quick_start.ipynb diff --git a/demo/nas/block_sa_nas_mobilenetv2.py b/demo/nas/block_sa_nas_mobilenetv2.py index 5c6c8e52..9fc9f9f5 100644 --- a/demo/nas/block_sa_nas_mobilenetv2.py +++ b/demo/nas/block_sa_nas_mobilenetv2.py @@ -6,8 +6,10 @@ import ast import logging import time import paddle -import paddle.fluid as fluid -from paddle.fluid.param_attr import ParamAttr +import paddle.nn as nn +import paddle.nn.functional as F +import paddle.static as static +from paddle import ParamAttr from paddleslim.analysis import flops from paddleslim.nas import SANAS from paddleslim.common import get_logger @@ -17,18 +19,6 @@ import imagenet_reader _logger = get_logger(__name__, level=logging.INFO) -def create_data_loader(image_shape): - data_shape = [None] + image_shape - data = fluid.data(name='data', shape=data_shape, dtype='float32') - label = fluid.data(name='label', shape=[None, 1], dtype='int64') - data_loader = fluid.io.DataLoader.from_generator( - feed_list=[data, label], - capacity=1024, - use_double_buffer=True, - iterable=True) - return data_loader, data, label - - def conv_bn_layer(input, filter_size, num_filters, @@ -38,7 +28,7 @@ def conv_bn_layer(input, act=None, name=None, use_cudnn=True): - conv = fluid.layers.conv2d( + conv = static.nn.conv2d( input, num_filters=num_filters, filter_size=filter_size, @@ -50,7 +40,7 @@ def conv_bn_layer(input, param_attr=ParamAttr(name=name + '_weights'), bias_attr=False) bn_name = name + '_bn' - return fluid.layers.batch_norm( + return static.nn.batch_norm( input=conv, act=act, param_attr=ParamAttr(name=bn_name + '_scale'), @@ -61,6 +51,16 @@ def conv_bn_layer(input, def search_mobilenetv2_block(config, args, image_size): image_shape = [3, image_size, image_size] + if args.data == 'cifar10': + train_dataset = paddle.vision.datasets.Cifar10(mode='train') + val_dataset = paddle.vision.datasets.Cifar10(mode='test') + + elif args.data == 'imagenet': + train_dataset = imagenet_reader.ImageNetDataset(mode='train') + val_dataset = imagenet_reader.ImageNetDataset(mode='val') + + places = static.cuda_places() if args.use_gpu else static.cpu_places() + place = places[0] if args.is_server: sa_nas = SANAS( config, @@ -77,11 +77,33 @@ def search_mobilenetv2_block(config, args, image_size): for step in range(args.search_steps): archs = sa_nas.next_archs()[0] - train_program = fluid.Program() - test_program = fluid.Program() - startup_program = fluid.Program() - with fluid.program_guard(train_program, startup_program): - train_loader, data, label = create_data_loader(image_shape) + train_program = static.Program() + test_program = static.Program() + startup_program = static.Program() + with static.program_guard(train_program, startup_program): + data_shape = [None] + image_shape + data = static.data(name='data', shape=data_shape, dtype='float32') + label = static.data(name='label', shape=[None, 1], dtype='int64') + if args.data == 'cifar10': + paddle.assign(paddle.reshape(label, [-1, 1]), label) + train_loader = paddle.io.DataLoader( + train_dataset, + places=places, + feed_list=[data, label], + drop_last=True, + batch_size=args.batch_size, + return_list=False, + shuffle=True, + use_shared_memory=True, + num_workers=4) + val_loader = paddle.io.DataLoader( + val_dataset, + places=place, + feed_list=[data, label], + drop_last=False, + batch_size=args.batch_size, + return_list=False, + shuffle=False) data = conv_bn_layer( input=data, num_filters=32, @@ -99,32 +121,27 @@ def search_mobilenetv2_block(config, args, image_size): padding='SAME', act='relu6', name='mobilenetv2_last_conv') - data = fluid.layers.pool2d( - input=data, - pool_size=7, - pool_stride=1, - pool_type='avg', - global_pooling=True, - name='mobilenetv2_last_pool') - output = fluid.layers.fc( - input=data, + data = F.adaptive_avg_pool2d( + data, output_size=[1, 1], name='mobilenetv2_last_pool') + output = static.nn.fc( + x=data, size=args.class_dim, - param_attr=ParamAttr(name='mobilenetv2_fc_weights'), + weight_attr=ParamAttr(name='mobilenetv2_fc_weights'), bias_attr=ParamAttr(name='mobilenetv2_fc_offset')) - softmax_out = fluid.layers.softmax(input=output, use_cudnn=False) - cost = fluid.layers.cross_entropy(input=softmax_out, label=label) - avg_cost = fluid.layers.mean(cost) - acc_top1 = fluid.layers.accuracy( + softmax_out = F.softmax(output) + cost = F.cross_entropy(softmax_out, label=label) + avg_cost = paddle.mean(cost) + acc_top1 = paddle.metric.accuracy( input=softmax_out, label=label, k=1) - acc_top5 = fluid.layers.accuracy( + acc_top5 = paddle.metric.accuracy( input=softmax_out, label=label, k=5) test_program = train_program.clone(for_test=True) - optimizer = fluid.optimizer.Momentum( + optimizer = paddle.optimizer.Momentum( learning_rate=0.1, momentum=0.9, - regularization=fluid.regularizer.L2Decay(1e-4)) + weight_decay=paddle.regularizer.L2Decay(1e-4)) optimizer.minimize(avg_cost) current_flops = flops(train_program) @@ -132,39 +149,11 @@ def search_mobilenetv2_block(config, args, image_size): if current_flops > int(321208544): continue - place = fluid.CUDAPlace(0) if args.use_gpu else fluid.CPUPlace() - exe = fluid.Executor(place) + exe = static.Executor(place) exe.run(startup_program) - if args.data == 'cifar10': - train_reader = paddle.fluid.io.batch( - paddle.reader.shuffle( - paddle.dataset.cifar.train10(cycle=False), buf_size=1024), - batch_size=args.batch_size, - drop_last=True) - - test_reader = paddle.fluid.io.batch( - paddle.dataset.cifar.test10(cycle=False), - batch_size=args.batch_size, - drop_last=False) - elif args.data == 'imagenet': - train_reader = paddle.fluid.io.batch( - imagenet_reader.train(), - batch_size=args.batch_size, - drop_last=True) - test_reader = paddle.fluid.io.batch( - imagenet_reader.val(), - batch_size=args.batch_size, - drop_last=False) - - test_loader, _, _ = create_data_loader(image_shape) - train_loader.set_sample_list_generator( - train_reader, - places=fluid.cuda_places() if args.use_gpu else fluid.cpu_places()) - test_loader.set_sample_list_generator(test_reader, places=place) - - build_strategy = fluid.BuildStrategy() - train_compiled_program = fluid.CompiledProgram( + build_strategy = static.BuildStrategy() + train_compiled_program = static.CompiledProgram( train_program).with_data_parallel( loss_name=avg_cost.name, build_strategy=build_strategy) for epoch_id in range(args.retain_epoch): @@ -181,7 +170,7 @@ def search_mobilenetv2_block(config, args, image_size): format(step, epoch_id, batch_id, outs[0], batch_time)) reward = [] - for batch_id, data in enumerate(test_loader()): + for batch_id, data in enumerate(val_loader()): test_fetches = [avg_cost.name, acc_top1.name, acc_top5.name] batch_reward = exe.run(test_program, feed=data, diff --git a/demo/nas/image_classification_nas_quick_start.ipynb b/demo/nas/image_classification_nas_quick_start.ipynb deleted file mode 100644 index 72f04cf8..00000000 --- a/demo/nas/image_classification_nas_quick_start.ipynb +++ /dev/null @@ -1,163 +0,0 @@ -{ - "cells": [ - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "# 图åƒåˆ†ç±»ç½‘络结构æœç´¢-快速开始\n", - "\n", - "该教程以图åƒåˆ†ç±»æ¨¡åž‹MobileNetV2为例,说明如何在cifar10æ•°æ®é›†ä¸Šå¿«é€Ÿä½¿ç”¨[网络结构æœç´¢æŽ¥å£](../api/nas_api.md)。\n", - "该示例包å«ä»¥ä¸‹æ¥éª¤ï¼š\n", - "\n", - "1. 导入ä¾èµ–\n", - "2. åˆå§‹åŒ–SANASæœç´¢å®žä¾‹\n", - "3. 构建网络\n", - "4. å¯åŠ¨æœç´¢å®žéªŒ\n", - "\n", - "ä»¥ä¸‹ç« èŠ‚ä¾æ¬¡ä»‹ç»æ¯ä¸ªæ¥éª¤çš„内容。" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "## 1. 导入ä¾èµ–\n", - "请确认已æ£ç¡®å®‰è£…Paddle,导入需è¦çš„ä¾èµ–包。" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "import paddle\n", - "import paddle.fluid as fluid\n", - "import paddleslim as slim\n", - "import numpy as np" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "## 2. åˆå§‹åŒ–SANASæœç´¢å®žä¾‹" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "sanas = slim.nas.SANAS(configs=[('MobileNetV2Space')], server_addr=(\"\", 8337))" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "## 3. 构建网络\n", - "æ ¹æ®ä¼ å…¥çš„ç½‘ç»œç»“æž„æž„é€ è®ç»ƒprogram和测试program。" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "def build_program(archs):\n", - " train_program = fluid.Program()\n", - " startup_program = fluid.Program()\n", - " with fluid.program_guard(train_program, startup_program):\n", - " data = fluid.data(name='data', shape=[None, 3, 32, 32], dtype='float32')\n", - " label = fluid.data(name='label', shape=[None, 1], dtype='int64')\n", - " output = archs(data)\n", - " output = fluid.layers.fc(input=output, size=10)\n", - "\n", - " softmax_out = fluid.layers.softmax(input=output, use_cudnn=False)\n", - " cost = fluid.layers.cross_entropy(input=softmax_out, label=label)\n", - " avg_cost = fluid.layers.mean(cost)\n", - " acc_top1 = fluid.layers.accuracy(input=softmax_out, label=label, k=1)\n", - " acc_top5 = fluid.layers.accuracy(input=softmax_out, label=label, k=5)\n", - " test_program = fluid.default_main_program().clone(for_test=True)\n", - " \n", - " optimizer = fluid.optimizer.Adam(learning_rate=0.1)\n", - " optimizer.minimize(avg_cost)\n", - "\n", - " place = fluid.CPUPlace()\n", - " exe = fluid.Executor(place)\n", - " exe.run(startup_program)\n", - " return exe, train_program, test_program, (data, label), avg_cost, acc_top1, acc_top5" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "## 4. å¯åŠ¨æœç´¢å®žéªŒ\n", - "获å–æ¯ä¸€è½®çš„模型结构并开始è®ç»ƒã€‚该教程ä¸ä½¿ç”¨FLOPs作为约æŸæ¡ä»¶ï¼Œæœç´¢å®žéªŒä¸€å…±æœç´¢3个step,表示æœç´¢åˆ°3个满足æ¡ä»¶çš„模型结构进行è®ç»ƒï¼Œæ¯æœç´¢åˆ°ä¸€ä¸ªç½‘络结构è®ç»ƒ7个epoch。" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "for step in range(3):\n", - " archs = sanas.next_archs()[0]\n", - " exe, train_program, test_progarm, inputs, avg_cost, acc_top1, acc_top5 = build_program(archs)\n", - "\n", - " current_flops = slim.analysis.flops(train_program)\n", - " if current_flops > 321208544:\n", - " continue\n", - " \n", - " train_reader = paddle.fluid.io.batch(paddle.reader.shuffle(paddle.dataset.cifar.train10(cycle=False), buf_size=1024),batch_size=256)\n", - " train_feeder = fluid.DataFeeder(inputs, fluid.CPUPlace())\n", - " test_reader = paddle.fluid.io.batch(paddle.dataset.cifar.test10(cycle=False),\n", - " batch_size=256)\n", - " test_feeder = fluid.DataFeeder(inputs, fluid.CPUPlace())\n", - "\n", - " outputs = [avg_cost.name, acc_top1.name, acc_top5.name]\n", - " for epoch in range(7):\n", - " for data in train_reader():\n", - " loss, acc1, acc5 = exe.run(train_program, feed=train_feeder.feed(data), fetch_list = outputs)\n", - " print(\"TRAIN: loss: {}, acc1: {}, acc5:{}\".format(loss, acc1, acc5))\n", - "\n", - " reward = []\n", - " for data in test_reader():\n", - " batch_reward = exe.run(test_program, feed=test_feeder.feed(data), fetch_list = outputs)\n", - " reward_avg = np.mean(np.array(batch_reward), axis=1)\n", - " reward.append(reward_avg)\n", - " print(\"TEST: loss: {}, acc1: {}, acc5:{}\".format(batch_reward[0], batch_reward[1], batch_reward[2]))\n", - " finally_reward = np.mean(np.array(reward), axis=0)\n", - " print(\"FINAL TEST: avg_cost: {}, acc1: {}, acc5: {}\".format(finally_reward[0], finally_reward[1], finally_reward[2]))\n", - "\n", - " sanas.reward(float(finally_reward[1]))" - ] - } - ], - "metadata": { - "kernelspec": { - "display_name": "Python 2", - "language": "python", - "name": "python2" - }, - "language_info": { - "codemirror_mode": { - "name": "ipython", - "version": 2 - }, - "file_extension": ".py", - "mimetype": "text/x-python", - "name": "python", - "nbconvert_exporter": "python", - "pygments_lexer": "ipython2", - "version": "2.7.12" - } - }, - "nbformat": 4, - "nbformat_minor": 2 -} diff --git a/demo/nas/parl_nas_mobilenetv2.py b/demo/nas/parl_nas_mobilenetv2.py index 6b8bf154..732c8f28 100644 --- a/demo/nas/parl_nas_mobilenetv2.py +++ b/demo/nas/parl_nas_mobilenetv2.py @@ -8,8 +8,9 @@ import argparse import ast import logging import paddle -import paddle.fluid as fluid -from paddle.fluid.param_attr import ParamAttr +import paddle.nn as nn +import paddle.static as static +import paddle.nn.functional as F from paddleslim.nas import RLNAS from paddleslim.common import get_logger from optimizer import create_optimizer @@ -18,36 +19,50 @@ import imagenet_reader _logger = get_logger(__name__, level=logging.INFO) -def create_data_loader(image_shape): - data_shape = [None] + image_shape - data = fluid.data(name='data', shape=data_shape, dtype='float32') - label = fluid.data(name='label', shape=[None, 1], dtype='int64') - data_loader = fluid.io.DataLoader.from_generator( - feed_list=[data, label], - capacity=1024, - use_double_buffer=True, - iterable=True) - return data_loader, data, label - - def build_program(main_program, startup_program, image_shape, + dataset, archs, args, + places, is_test=False): - with fluid.program_guard(main_program, startup_program): - with fluid.unique_name.guard(): - data_loader, data, label = create_data_loader(image_shape) + with static.program_guard(main_program, startup_program): + with paddle.utils.unique_name.guard(): + data_shape = [None] + image_shape + data = static.data(name='data', shape=data_shape, dtype='float32') + label = static.data(name='label', shape=[None, 1], dtype='int64') + if args.data == 'cifar10': + paddle.assign(paddle.reshape(label, [-1, 1]), label) + if is_test: + data_loader = paddle.io.DataLoader( + dataset, + places=places, + feed_list=[data, label], + drop_last=False, + batch_size=args.batch_size, + return_list=False, + shuffle=False) + else: + data_loader = paddle.io.DataLoader( + dataset, + places=places, + feed_list=[data, label], + drop_last=True, + batch_size=args.batch_size, + return_list=False, + shuffle=True, + use_shared_memory=True, + num_workers=4) output = archs(data) - output = fluid.layers.fc(input=output, size=args.class_dim) + output = static.nn.fc(output, size=args.class_dim) - softmax_out = fluid.layers.softmax(input=output, use_cudnn=False) - cost = fluid.layers.cross_entropy(input=softmax_out, label=label) - avg_cost = fluid.layers.mean(cost) - acc_top1 = fluid.layers.accuracy( + softmax_out = F.softmax(output) + cost = F.cross_entropy(softmax_out, label=label) + avg_cost = paddle.mean(cost) + acc_top1 = paddle.metric.accuracy( input=softmax_out, label=label, k=1) - acc_top5 = fluid.layers.accuracy( + acc_top5 = paddle.metric.accuracy( input=softmax_out, label=label, k=5) if is_test == False: @@ -57,6 +72,8 @@ def build_program(main_program, def search_mobilenetv2(config, args, image_size, is_server=True): + places = static.cuda_places() if args.use_gpu else static.cpu_places() + place = places[0] if is_server: ### start a server and a client rl_nas = RLNAS( @@ -76,6 +93,14 @@ def search_mobilenetv2(config, args, image_size, is_server=True): is_server=False) image_shape = [3, image_size, image_size] + if args.data == 'cifar10': + train_dataset = paddle.vision.datasets.Cifar10(mode='train') + val_dataset = paddle.vision.datasets.Cifar10(mode='test') + + elif args.data == 'imagenet': + train_dataset = imagenet_reader.ImageNetDataset(mode='train') + val_dataset = imagenet_reader.ImageNetDataset(mode='val') + for step in range(args.search_steps): if step == 0: action_prev = [1. for _ in rl_nas.range_tables] @@ -85,53 +110,29 @@ def search_mobilenetv2(config, args, image_size, is_server=True): obs.extend(action_prev) archs = rl_nas.next_archs(obs=obs)[0][0] - train_program = fluid.Program() - test_program = fluid.Program() - startup_program = fluid.Program() + train_program = static.Program() + test_program = static.Program() + startup_program = static.Program() train_loader, avg_cost, acc_top1, acc_top5 = build_program( - train_program, startup_program, image_shape, archs, args) + train_program, startup_program, image_shape, train_dataset, archs, + args, places) test_loader, test_avg_cost, test_acc_top1, test_acc_top5 = build_program( test_program, startup_program, image_shape, + val_dataset, archs, args, + place, is_test=True) test_program = test_program.clone(for_test=True) - place = fluid.CUDAPlace(0) if args.use_gpu else fluid.CPUPlace() - exe = fluid.Executor(place) + exe = static.Executor(place) exe.run(startup_program) - if args.data == 'cifar10': - train_reader = paddle.fluid.io.batch( - paddle.reader.shuffle( - paddle.dataset.cifar.train10(cycle=False), buf_size=1024), - batch_size=args.batch_size, - drop_last=True) - - test_reader = paddle.fluid.io.batch( - paddle.dataset.cifar.test10(cycle=False), - batch_size=args.batch_size, - drop_last=False) - elif args.data == 'imagenet': - train_reader = paddle.fluid.io.batch( - imagenet_reader.train(), - batch_size=args.batch_size, - drop_last=True) - test_reader = paddle.fluid.io.batch( - imagenet_reader.val(), - batch_size=args.batch_size, - drop_last=False) - - train_loader.set_sample_list_generator( - train_reader, - places=fluid.cuda_places() if args.use_gpu else fluid.cpu_places()) - test_loader.set_sample_list_generator(test_reader, places=place) - - build_strategy = fluid.BuildStrategy() - train_compiled_program = fluid.CompiledProgram( + build_strategy = static.BuildStrategy() + train_compiled_program = static.CompiledProgram( train_program).with_data_parallel( loss_name=avg_cost.name, build_strategy=build_strategy) for epoch_id in range(args.retain_epoch): diff --git a/demo/nas/rl_nas_mobilenetv2.py b/demo/nas/rl_nas_mobilenetv2.py index ee86c759..27445d4d 100644 --- a/demo/nas/rl_nas_mobilenetv2.py +++ b/demo/nas/rl_nas_mobilenetv2.py @@ -8,8 +8,9 @@ import argparse import ast import logging import paddle -import paddle.fluid as fluid -from paddle.fluid.param_attr import ParamAttr +import paddle.nn as nn +import paddle.static as static +import paddle.nn.functional as F from paddleslim.nas import RLNAS from paddleslim.common import get_logger from optimizer import create_optimizer @@ -18,36 +19,50 @@ import imagenet_reader _logger = get_logger(__name__, level=logging.INFO) -def create_data_loader(image_shape): - data_shape = [None] + image_shape - data = fluid.data(name='data', shape=data_shape, dtype='float32') - label = fluid.data(name='label', shape=[None, 1], dtype='int64') - data_loader = fluid.io.DataLoader.from_generator( - feed_list=[data, label], - capacity=1024, - use_double_buffer=True, - iterable=True) - return data_loader, data, label - - def build_program(main_program, startup_program, image_shape, + dataset, archs, args, + places, is_test=False): - with fluid.program_guard(main_program, startup_program): - with fluid.unique_name.guard(): - data_loader, data, label = create_data_loader(image_shape) + with static.program_guard(main_program, startup_program): + with paddle.utils.unique_name.guard(): + data_shape = [None] + image_shape + data = static.data(name='data', shape=data_shape, dtype='float32') + label = static.data(name='label', shape=[None, 1], dtype='int64') + if args.data == 'cifar10': + paddle.assign(paddle.reshape(label, [-1, 1]), label) + if is_test: + data_loader = paddle.io.DataLoader( + dataset, + places=places, + feed_list=[data, label], + drop_last=False, + batch_size=args.batch_size, + return_list=False, + shuffle=False) + else: + data_loader = paddle.io.DataLoader( + dataset, + places=places, + feed_list=[data, label], + drop_last=True, + batch_size=args.batch_size, + return_list=False, + shuffle=True, + use_shared_memory=True, + num_workers=4) output = archs(data) - output = fluid.layers.fc(input=output, size=args.class_dim) + output = static.nn.fc(output, size=args.class_dim) - softmax_out = fluid.layers.softmax(input=output, use_cudnn=False) - cost = fluid.layers.cross_entropy(input=softmax_out, label=label) - avg_cost = fluid.layers.mean(cost) - acc_top1 = fluid.layers.accuracy( + softmax_out = F.softmax(output) + cost = F.cross_entropy(softmax_out, label=label) + avg_cost = paddle.mean(cost) + acc_top1 = paddle.metric.accuracy( input=softmax_out, label=label, k=1) - acc_top5 = fluid.layers.accuracy( + acc_top5 = paddle.metric.accuracy( input=softmax_out, label=label, k=5) if is_test == False: @@ -57,6 +72,8 @@ def build_program(main_program, def search_mobilenetv2(config, args, image_size, is_server=True): + places = static.cuda_places() if args.use_gpu else static.cpu_places() + place = places[0] if is_server: ### start a server and a client rl_nas = RLNAS( @@ -86,56 +103,40 @@ def search_mobilenetv2(config, args, image_size, is_server=True): is_server=False) image_shape = [3, image_size, image_size] + if args.data == 'cifar10': + train_dataset = paddle.vision.datasets.Cifar10(mode='train') + val_dataset = paddle.vision.datasets.Cifar10(mode='test') + + elif args.data == 'imagenet': + train_dataset = imagenet_reader.ImageNetDataset(mode='train') + val_dataset = imagenet_reader.ImageNetDataset(mode='val') + for step in range(args.search_steps): archs = rl_nas.next_archs(1)[0][0] - train_program = fluid.Program() - test_program = fluid.Program() - startup_program = fluid.Program() + train_program = static.Program() + test_program = static.Program() + startup_program = static.Program() train_loader, avg_cost, acc_top1, acc_top5 = build_program( - train_program, startup_program, image_shape, archs, args) + train_program, startup_program, image_shape, train_dataset, archs, + args, places) test_loader, test_avg_cost, test_acc_top1, test_acc_top5 = build_program( test_program, startup_program, image_shape, + val_dataset, archs, args, + place, is_test=True) test_program = test_program.clone(for_test=True) - place = fluid.CUDAPlace(0) if args.use_gpu else fluid.CPUPlace() - exe = fluid.Executor(place) + exe = static.Executor(place) exe.run(startup_program) - if args.data == 'cifar10': - train_reader = paddle.fluid.io.batch( - paddle.reader.shuffle( - paddle.dataset.cifar.train10(cycle=False), buf_size=1024), - batch_size=args.batch_size, - drop_last=True) - - test_reader = paddle.fluid.io.batch( - paddle.dataset.cifar.test10(cycle=False), - batch_size=args.batch_size, - drop_last=False) - elif args.data == 'imagenet': - train_reader = paddle.fluid.io.batch( - imagenet_reader.train(), - batch_size=args.batch_size, - drop_last=True) - test_reader = paddle.fluid.io.batch( - imagenet_reader.val(), - batch_size=args.batch_size, - drop_last=False) - - train_loader.set_sample_list_generator( - train_reader, - places=fluid.cuda_places() if args.use_gpu else fluid.cpu_places()) - test_loader.set_sample_list_generator(test_reader, places=place) - - build_strategy = fluid.BuildStrategy() - train_compiled_program = fluid.CompiledProgram( + build_strategy = static.BuildStrategy() + train_compiled_program = static.CompiledProgram( train_program).with_data_parallel( loss_name=avg_cost.name, build_strategy=build_strategy) for epoch_id in range(args.retain_epoch): diff --git a/demo/nas/sa_nas_mobilenetv2.py b/demo/nas/sa_nas_mobilenetv2.py index b12be189..64e4748f 100644 --- a/demo/nas/sa_nas_mobilenetv2.py +++ b/demo/nas/sa_nas_mobilenetv2.py @@ -8,8 +8,10 @@ import argparse import ast import logging import paddle -import paddle.fluid as fluid -from paddle.fluid.param_attr import ParamAttr +import paddle.nn as nn +import paddle.static as static +import paddle.nn.functional as F +from paddle import ParamAttr from paddleslim.analysis import flops from paddleslim.nas import SANAS from paddleslim.common import get_logger @@ -19,36 +21,50 @@ import imagenet_reader _logger = get_logger(__name__, level=logging.INFO) -def create_data_loader(image_shape): - data_shape = [None] + image_shape - data = fluid.data(name='data', shape=data_shape, dtype='float32') - label = fluid.data(name='label', shape=[None, 1], dtype='int64') - data_loader = fluid.io.DataLoader.from_generator( - feed_list=[data, label], - capacity=1024, - use_double_buffer=True, - iterable=True) - return data_loader, data, label - - def build_program(main_program, startup_program, image_shape, + dataset, archs, args, + places, is_test=False): - with fluid.program_guard(main_program, startup_program): - with fluid.unique_name.guard(): - data_loader, data, label = create_data_loader(image_shape) + with static.program_guard(main_program, startup_program): + with paddle.utils.unique_name.guard(): + data_shape = [None] + image_shape + data = static.data(name='data', shape=data_shape, dtype='float32') + label = static.data(name='label', shape=[None, 1], dtype='int64') + if args.data == 'cifar10': + paddle.assign(paddle.reshape(label, [-1, 1]), label) + if is_test: + data_loader = paddle.io.DataLoader( + dataset, + places=places, + feed_list=[data, label], + drop_last=False, + batch_size=args.batch_size, + return_list=False, + shuffle=False) + else: + data_loader = paddle.io.DataLoader( + dataset, + places=places, + feed_list=[data, label], + drop_last=True, + batch_size=args.batch_size, + return_list=False, + shuffle=True, + use_shared_memory=True, + num_workers=4) output = archs(data) - output = fluid.layers.fc(input=output, size=args.class_dim) + output = static.nn.fc(x=output, size=args.class_dim) - softmax_out = fluid.layers.softmax(input=output, use_cudnn=False) - cost = fluid.layers.cross_entropy(input=softmax_out, label=label) - avg_cost = fluid.layers.mean(cost) - acc_top1 = fluid.layers.accuracy( + softmax_out = F.softmax(output) + cost = F.cross_entropy(softmax_out, label=label) + avg_cost = paddle.mean(cost) + acc_top1 = paddle.metric.accuracy( input=softmax_out, label=label, k=1) - acc_top5 = fluid.layers.accuracy( + acc_top5 = paddle.metric.accuracy( input=softmax_out, label=label, k=5) if is_test == False: @@ -58,6 +74,17 @@ def build_program(main_program, def search_mobilenetv2(config, args, image_size, is_server=True): + image_shape = [3, image_size, image_size] + if args.data == 'cifar10': + train_dataset = paddle.vision.datasets.Cifar10(mode='train') + val_dataset = paddle.vision.datasets.Cifar10(mode='test') + + elif args.data == 'imagenet': + train_dataset = imagenet_reader.ImageNetDataset(mode='train') + val_dataset = imagenet_reader.ImageNetDataset(mode='val') + + places = static.cuda_places() if args.use_gpu else static.cpu_places() + place = places[0] if is_server: ### start a server and a client sa_nas = SANAS( @@ -73,15 +100,15 @@ def search_mobilenetv2(config, args, image_size, is_server=True): search_steps=args.search_steps, is_server=False) - image_shape = [3, image_size, image_size] for step in range(args.search_steps): archs = sa_nas.next_archs()[0] - train_program = fluid.Program() - test_program = fluid.Program() - startup_program = fluid.Program() + train_program = static.Program() + test_program = static.Program() + startup_program = static.Program() train_loader, avg_cost, acc_top1, acc_top5 = build_program( - train_program, startup_program, image_shape, archs, args) + train_program, startup_program, image_shape, train_dataset, archs, + args, places) current_flops = flops(train_program) print('step: {}, current_flops: {}'.format(step, current_flops)) @@ -92,43 +119,18 @@ def search_mobilenetv2(config, args, image_size, is_server=True): test_program, startup_program, image_shape, + val_dataset, archs, args, + place, is_test=True) test_program = test_program.clone(for_test=True) - place = fluid.CUDAPlace(0) if args.use_gpu else fluid.CPUPlace() - exe = fluid.Executor(place) + exe = static.Executor(place) exe.run(startup_program) - if args.data == 'cifar10': - train_reader = paddle.fluid.io.batch( - paddle.reader.shuffle( - paddle.dataset.cifar.train10(cycle=False), buf_size=1024), - batch_size=args.batch_size, - drop_last=True) - - test_reader = paddle.fluid.io.batch( - paddle.dataset.cifar.test10(cycle=False), - batch_size=args.batch_size, - drop_last=False) - elif args.data == 'imagenet': - train_reader = paddle.fluid.io.batch( - imagenet_reader.train(), - batch_size=args.batch_size, - drop_last=True) - test_reader = paddle.fluid.io.batch( - imagenet_reader.val(), - batch_size=args.batch_size, - drop_last=False) - - train_loader.set_sample_list_generator( - train_reader, - places=fluid.cuda_places() if args.use_gpu else fluid.cpu_places()) - test_loader.set_sample_list_generator(test_reader, places=place) - - build_strategy = fluid.BuildStrategy() - train_compiled_program = fluid.CompiledProgram( + build_strategy = static.BuildStrategy() + train_compiled_program = static.CompiledProgram( train_program).with_data_parallel( loss_name=avg_cost.name, build_strategy=build_strategy) for epoch_id in range(args.retain_epoch): @@ -169,6 +171,9 @@ def search_mobilenetv2(config, args, image_size, is_server=True): def test_search_result(tokens, image_size, args, config): + places = static.cuda_places() if args.use_gpu else static.cpu_places() + place = places[0] + sa_nas = SANAS( config, server_addr=(args.server_address, args.port), @@ -176,50 +181,42 @@ def test_search_result(tokens, image_size, args, config): is_server=True) image_shape = [3, image_size, image_size] + if args.data == 'cifar10': + train_dataset = paddle.vision.datasets.Cifar10(mode='train') + val_dataset = paddle.vision.datasets.Cifar10(mode='test') + + elif args.data == 'imagenet': + train_dataset = imagenet_reader.ImageNetDataset(mode='train') + val_dataset = imagenet_reader.ImageNetDataset(mode='val') archs = sa_nas.tokens2arch(tokens)[0] - train_program = fluid.Program() - test_program = fluid.Program() - startup_program = fluid.Program() + train_program = static.Program() + test_program = static.Program() + startup_program = static.Program() train_loader, avg_cost, acc_top1, acc_top5 = build_program( - train_program, startup_program, image_shape, archs, args) + train_program, startup_program, image_shape, train_dataset, archs, args, + places) current_flops = flops(train_program) print('current_flops: {}'.format(current_flops)) test_loader, test_avg_cost, test_acc_top1, test_acc_top5 = build_program( - test_program, startup_program, image_shape, archs, args, is_test=True) + test_program, + startup_program, + image_shape, + val_dataset, + archs, + args, + place, + is_test=True) test_program = test_program.clone(for_test=True) - place = fluid.CUDAPlace(0) if args.use_gpu else fluid.CPUPlace() - exe = fluid.Executor(place) + exe = static.Executor(place) exe.run(startup_program) - if args.data == 'cifar10': - train_reader = paddle.fluid.io.batch( - paddle.reader.shuffle( - paddle.dataset.cifar.train10(cycle=False), buf_size=1024), - batch_size=args.batch_size, - drop_last=True) - - test_reader = paddle.fluid.io.batch( - paddle.dataset.cifar.test10(cycle=False), - batch_size=args.batch_size, - drop_last=False) - elif args.data == 'imagenet': - train_reader = paddle.fluid.io.batch( - imagenet_reader.train(), batch_size=args.batch_size, drop_last=True) - test_reader = paddle.fluid.io.batch( - imagenet_reader.val(), batch_size=args.batch_size, drop_last=False) - - train_loader.set_sample_list_generator( - train_reader, - places=fluid.cuda_places() if args.use_gpu else fluid.cpu_places()) - test_loader.set_sample_list_generator(test_reader, places=place) - - build_strategy = fluid.BuildStrategy() - train_compiled_program = fluid.CompiledProgram( + build_strategy = static.BuildStrategy() + train_compiled_program = static.CompiledProgram( train_program).with_data_parallel( loss_name=avg_cost.name, build_strategy=build_strategy) for epoch_id in range(args.retain_epoch): diff --git a/demo/nas/sanas_darts_space.py b/demo/nas/sanas_darts_space.py index 43705e87..6410f305 100644 --- a/demo/nas/sanas_darts_space.py +++ b/demo/nas/sanas_darts_space.py @@ -8,7 +8,10 @@ import time import argparse import ast import logging -import paddle.fluid as fluid +import paddle +import paddle.nn.functional as F +import paddle.nn as nn +import paddle.static as static from paddleslim.nas import SANAS from paddleslim.common import get_logger import darts_cifar10_reader as reader @@ -49,10 +52,10 @@ def count_parameters_in_MB(all_params, prefix='model'): def create_data_loader(image_shape, is_train, args): - image = fluid.data( + image = static.data( name="image", shape=[None] + image_shape, dtype="float32") - label = fluid.data(name="label", shape=[None, 1], dtype="int64") - data_loader = fluid.io.DataLoader.from_generator( + label = static.data(name="label", shape=[None, 1], dtype="int64") + data_loader = paddle.io.DataLoader.from_generator( feed_list=[image, label], capacity=64, use_double_buffer=True, @@ -60,9 +63,9 @@ def create_data_loader(image_shape, is_train, args): drop_path_prob = '' drop_path_mask = '' if is_train: - drop_path_prob = fluid.data( + drop_path_prob = static.data( name="drop_path_prob", shape=[args.batch_size, 1], dtype="float32") - drop_path_mask = fluid.data( + drop_path_mask = static.data( name="drop_path_mask", shape=[args.batch_size, 20, 4, 2], dtype="float32") @@ -72,36 +75,33 @@ def create_data_loader(image_shape, is_train, args): def build_program(main_program, startup_program, image_shape, archs, args, is_train): - with fluid.program_guard(main_program, startup_program): + with static.program_guard(main_program, startup_program): data_loader, data, label, drop_path_prob, drop_path_mask = create_data_loader( image_shape, is_train, args) logits, logits_aux = archs(data, drop_path_prob, drop_path_mask, is_train, 10) - top1 = fluid.layers.accuracy(input=logits, label=label, k=1) - top5 = fluid.layers.accuracy(input=logits, label=label, k=5) - loss = fluid.layers.reduce_mean( - fluid.layers.softmax_with_cross_entropy(logits, label)) + top1 = paddle.metric.accuracy(input=logits, label=label, k=1) + top5 = paddle.metric.accuracy(input=logits, label=label, k=5) + loss = paddle.mean(F.softmax_with_cross_entropy(logits, label)) if is_train: if auxiliary: - loss_aux = fluid.layers.reduce_mean( - fluid.layers.softmax_with_cross_entropy(logits_aux, label)) + loss_aux = paddle.mean( + F.softmax_with_cross_entropy(logits_aux, label)) loss = loss + auxiliary_weight * loss_aux step_per_epoch = int(trainset_num / args.batch_size) - learning_rate = fluid.layers.cosine_decay(lr, step_per_epoch, - args.retain_epoch) - fluid.clip.set_gradient_clip( - clip=fluid.clip.GradientClipByGlobalNorm(clip_norm=5.0)) - optimizer = fluid.optimizer.MomentumOptimizer( + learning_rate = paddle.optimizer.lr.CosineAnnealingDecay( + lr, T_max=step_per_epoch * args.retain_epoch) + optimizer = paddle.optimizer.Momentum( learning_rate, momentum, - regularization=fluid.regularizer.L2DecayRegularizer( - weight_decay)) + weight_decay=paddle.regularizer.L2Decay(weight_decay), + grad_clip=nn.ClipGradByGlobalNorm(clip_norm=5.0)) optimizer.minimize(loss) - outs = [loss, top1, top5, learning_rate] + outs = [loss, top1, top5] else: outs = [loss, top1, top5] - return outs, data_loader + return outs, (data, label), data_loader def train(main_prog, exe, epoch_id, train_loader, fetch_list, args): @@ -129,16 +129,16 @@ def train(main_prog, exe, epoch_id, train_loader, fetch_list, args): }) else: feed = data - loss_v, top1_v, top5_v, lr = exe.run( + loss_v, top1_v, top5_v = exe.run( main_prog, feed=feed, fetch_list=[v.name for v in fetch_list]) loss.update(loss_v, args.batch_size) top1.update(top1_v, args.batch_size) top5.update(top5_v, args.batch_size) if step_id % 10 == 0: _logger.info( - "Train Epoch {}, Step {}, Lr {:.8f}, loss {:.6f}, acc_1 {:.6f}, acc_5 {:.6f}". - format(epoch_id, step_id, lr[0], loss.avg[0], top1.avg[0], - top5.avg[0])) + "Train Epoch {}, Step {}, loss {:.6f}, acc_1 {:.6f}, acc_5 {:.6f}". + format(epoch_id, step_id, loss.avg[0], top1.avg[0], top5.avg[ + 0])) return top1.avg[0] @@ -161,6 +161,8 @@ def valid(main_prog, exe, epoch_id, valid_loader, fetch_list, args): def search(config, args, image_size, is_server=True): + places = static.cuda_places() if args.use_gpu else static.cpu_places() + place = places[0] if is_server: ### start a server and a client sa_nas = SANAS( @@ -180,10 +182,10 @@ def search(config, args, image_size, is_server=True): for step in range(args.search_steps): archs = sa_nas.next_archs()[0] - train_program = fluid.Program() - test_program = fluid.Program() - startup_program = fluid.Program() - train_fetch_list, train_loader = build_program( + train_program = static.Program() + test_program = static.Program() + startup_program = static.Program() + train_fetch_list, _, train_loader = build_program( train_program, startup_program, image_shape, @@ -198,7 +200,7 @@ def search(config, args, image_size, is_server=True): if current_params > float(3.77): continue - test_fetch_list, test_loader = build_program( + test_fetch_list, _, test_loader = build_program( test_program, startup_program, image_shape, @@ -207,8 +209,7 @@ def search(config, args, image_size, is_server=True): is_train=False) test_program = test_program.clone(for_test=True) - place = fluid.CUDAPlace(0) if args.use_gpu else fluid.CPUPlace() - exe = fluid.Executor(place) + exe = static.Executor(place) exe.run(startup_program) train_reader = reader.train_valid( @@ -219,8 +220,8 @@ def search(config, args, image_size, is_server=True): train_loader.set_batch_generator(train_reader, places=place) test_loader.set_batch_generator(test_reader, places=place) - build_strategy = fluid.BuildStrategy() - train_compiled_program = fluid.CompiledProgram( + build_strategy = static.BuildStrategy() + train_compiled_program = static.CompiledProgram( train_program).with_data_parallel( loss_name=train_fetch_list[0].name, build_strategy=build_strategy) @@ -241,52 +242,40 @@ def search(config, args, image_size, is_server=True): def final_test(config, args, image_size, token=None): assert token != None, "If you want to start a final experiment, you must input a token." + places = static.cuda_places() if args.use_gpu else static.cpu_places() + place = places[0] sa_nas = SANAS( config, server_addr=(args.server_address, args.port), is_server=True) image_shape = [3, image_size, image_size] archs = sa_nas.tokens2arch(token)[0] - train_program = fluid.Program() - test_program = fluid.Program() - startup_program = fluid.Program() - train_fetch_list, train_loader = build_program( - train_program, - startup_program, - image_shape, - archs, - args, - is_train=True) + train_program = static.Program() + test_program = static.Program() + startup_program = static.Program() + train_fetch_list, (data, label), train_loader = build_program( + train_program, startup_program, image_shape, archs, args, is_train=True) current_params = count_parameters_in_MB( train_program.global_block().all_parameters(), 'cifar10') _logger.info('current_params: {}M'.format(current_params)) - test_fetch_list, test_loader = build_program( - test_program, - startup_program, - image_shape, - archs, - args, - is_train=False) + test_fetch_list, _, test_loader = build_program( + test_program, startup_program, image_shape, archs, args, is_train=False) test_program = test_program.clone(for_test=True) - place = fluid.CUDAPlace(0) if args.use_gpu else fluid.CPUPlace() - exe = fluid.Executor(place) + exe = static.Executor(place) exe.run(startup_program) train_reader = reader.train_valid( - batch_size=args.batch_size, is_train=True, is_shuffle=True, args=args) + batch_size=args.batch_size, is_train=True, is_shuffle=True) test_reader = reader.train_valid( - batch_size=args.batch_size, - is_train=False, - is_shuffle=False, - args=args) + batch_size=args.batch_size, is_train=False, is_shuffle=False) train_loader.set_batch_generator(train_reader, places=place) test_loader.set_batch_generator(test_reader, places=place) - build_strategy = fluid.BuildStrategy() - train_compiled_program = fluid.CompiledProgram( + build_strategy = static.BuildStrategy() + train_compiled_program = static.CompiledProgram( train_program).with_data_parallel( loss_name=train_fetch_list[0].name, build_strategy=build_strategy) @@ -305,11 +294,12 @@ def final_test(config, args, image_size, token=None): output_dir = os.path.join('darts_output', str(epoch_id)) if not os.path.exists(output_dir): os.makedirs(output_dir) - fluid.io.save_persistables(exe, output_dir, main_program=train_program) + static.save_inference_model(output_dir, [data], test_fetch_list, exe) if __name__ == '__main__': + paddle.enable_static() parser = argparse.ArgumentParser( description='SA NAS MobileNetV2 cifar10 argparase') parser.add_argument( diff --git a/demo/optimizer.py b/demo/optimizer.py index 6b896274..bd1d3bb4 100644 --- a/demo/optimizer.py +++ b/demo/optimizer.py @@ -18,9 +18,7 @@ from __future__ import print_function import math -import paddle.fluid as fluid -import paddle.fluid.layers.ops as ops -from paddle.fluid.layers.learning_rate_scheduler import _decay_step_counter +import paddle lr_strategy = 'cosine_decay' l2_decay = 1e-4 @@ -33,111 +31,6 @@ decay_rate = 0.97 total_images = 1281167 -def cosine_decay(learning_rate, step_each_epoch, epochs=120): - """Applies cosine decay to the learning rate. - lr = 0.05 * (math.cos(epoch * (math.pi / 120)) + 1) - """ - global_step = _decay_step_counter() - - epoch = ops.floor(global_step / step_each_epoch) - decayed_lr = learning_rate * \ - (ops.cos(epoch * (math.pi / epochs)) + 1)/2 - return decayed_lr - - -def cosine_decay_with_warmup(learning_rate, step_each_epoch, epochs=120): - """Applies cosine decay to the learning rate. - lr = 0.05 * (math.cos(epoch * (math.pi / 120)) + 1) - decrease lr for every mini-batch and start with warmup. - """ - global_step = _decay_step_counter() - lr = fluid.layers.tensor.create_global_var( - shape=[1], - value=0.0, - dtype='float32', - persistable=True, - name="learning_rate") - - warmup_epoch = fluid.layers.fill_constant( - shape=[1], dtype='float32', value=float(5), force_cpu=True) - - epoch = ops.floor(global_step / step_each_epoch) - with fluid.layers.control_flow.Switch() as switch: - with switch.case(epoch < warmup_epoch): - decayed_lr = learning_rate * (global_step / - (step_each_epoch * warmup_epoch)) - fluid.layers.tensor.assign(input=decayed_lr, output=lr) - with switch.default(): - decayed_lr = learning_rate * \ - (ops.cos((global_step - warmup_epoch * step_each_epoch) * (math.pi / (epochs * step_each_epoch))) + 1)/2 - fluid.layers.tensor.assign(input=decayed_lr, output=lr) - return lr - - -def exponential_decay_with_warmup(learning_rate, - step_each_epoch, - decay_epochs, - decay_rate=0.97, - warm_up_epoch=5.0): - """Applies exponential decay to the learning rate. - """ - global_step = _decay_step_counter() - lr = fluid.layers.tensor.create_global_var( - shape=[1], - value=0.0, - dtype='float32', - persistable=True, - name="learning_rate") - - warmup_epoch = fluid.layers.fill_constant( - shape=[1], dtype='float32', value=float(warm_up_epoch), force_cpu=True) - - epoch = ops.floor(global_step / step_each_epoch) - with fluid.layers.control_flow.Switch() as switch: - with switch.case(epoch < warmup_epoch): - decayed_lr = learning_rate * (global_step / - (step_each_epoch * warmup_epoch)) - fluid.layers.assign(input=decayed_lr, output=lr) - with switch.default(): - div_res = (global_step - warmup_epoch * step_each_epoch - ) / decay_epochs - div_res = ops.floor(div_res) - decayed_lr = learning_rate * (decay_rate**div_res) - fluid.layers.assign(input=decayed_lr, output=lr) - - return lr - - -def lr_warmup(learning_rate, warmup_steps, start_lr, end_lr): - """ Applies linear learning rate warmup for distributed training - Argument learning_rate can be float or a Variable - lr = lr + (warmup_rate * step / warmup_steps) - """ - assert (isinstance(end_lr, float)) - assert (isinstance(start_lr, float)) - linear_step = end_lr - start_lr - with fluid.default_main_program()._lr_schedule_guard(): - lr = fluid.layers.tensor.create_global_var( - shape=[1], - value=0.0, - dtype='float32', - persistable=True, - name="learning_rate_warmup") - - global_step = fluid.layers.learning_rate_scheduler._decay_step_counter( - ) - - with fluid.layers.control_flow.Switch() as switch: - with switch.case(global_step < warmup_steps): - decayed_lr = start_lr + linear_step * (global_step / - warmup_steps) - fluid.layers.tensor.assign(decayed_lr, lr) - with switch.default(): - fluid.layers.tensor.assign(learning_rate, lr) - - return lr - - class Optimizer(object): """A class used to represent several optimizer methods @@ -167,23 +60,13 @@ class Optimizer(object): self.decay_epochs = decay_epochs self.decay_rate = decay_rate self.total_images = total_images + if args.use_gpu: + devices_num = paddle.fluid.core.get_cuda_device_count() + else: + devices_num = int(os.environ.get('CPU_NUM', 1)) - self.step = int(math.ceil(float(self.total_images) / self.batch_size)) - - def piecewise_decay(self): - """piecewise decay with Momentum optimizer - - Returns: - a piecewise_decay optimizer - """ - bd = [self.step * e for e in self.step_epochs] - lr = [self.lr * (0.1**i) for i in range(len(bd) + 1)] - learning_rate = fluid.layers.piecewise_decay(boundaries=bd, values=lr) - optimizer = fluid.optimizer.Momentum( - learning_rate=learning_rate, - momentum=self.momentum_rate, - regularization=fluid.regularizer.L2Decay(self.l2_decay)) - return optimizer + self.step = int( + math.ceil(float(self.total_images) / self.batch_size) / devices_num) def cosine_decay(self): """cosine decay with Momentum optimizer @@ -191,111 +74,25 @@ class Optimizer(object): Returns: a cosine_decay optimizer """ - - learning_rate = fluid.layers.cosine_decay( - learning_rate=self.lr, - step_each_epoch=self.step, - epochs=self.num_epochs) - optimizer = fluid.optimizer.Momentum( - learning_rate=learning_rate, - momentum=self.momentum_rate, - regularization=fluid.regularizer.L2Decay(self.l2_decay)) - return optimizer - - def cosine_decay_warmup(self): - """cosine decay with warmup - - Returns: - a cosine_decay_with_warmup optimizer - """ - - learning_rate = cosine_decay_with_warmup( - learning_rate=self.lr, - step_each_epoch=self.step, - epochs=self.num_epochs) - optimizer = fluid.optimizer.Momentum( - learning_rate=learning_rate, - momentum=self.momentum_rate, - regularization=fluid.regularizer.L2Decay(self.l2_decay)) - return optimizer - - def exponential_decay_warmup(self): - """exponential decay with warmup - - Returns: - a exponential_decay_with_warmup optimizer - """ - - learning_rate = exponential_decay_with_warmup( + learning_rate = paddle.optimizer.lr.CosineAnnealingDecay( learning_rate=self.lr, - step_each_epoch=self.step, - decay_epochs=self.step * self.decay_epochs, - decay_rate=self.decay_rate, - warm_up_epoch=self.warm_up_epochs) - optimizer = fluid.optimizer.RMSProp( - learning_rate=learning_rate, - regularization=fluid.regularizer.L2Decay(self.l2_decay), - momentum=self.momentum_rate, - rho=0.9, - epsilon=0.001) - return optimizer - - def linear_decay(self): - """linear decay with Momentum optimizer - - Returns: - a linear_decay optimizer - """ - - end_lr = 0 - learning_rate = fluid.layers.polynomial_decay( - self.lr, self.step, end_lr, power=1) - optimizer = fluid.optimizer.Momentum( + T_max=self.step * self.num_epochs, + verbose=False) + optimizer = paddle.optimizer.Momentum( learning_rate=learning_rate, momentum=self.momentum_rate, - regularization=fluid.regularizer.L2Decay(self.l2_decay)) - + weight_decay=paddle.regularizer.L2Decay(self.l2_decay)) return optimizer - def adam_decay(self): - """Adam optimizer - - Returns: - an adam_decay optimizer - """ - - return fluid.optimizer.Adam(learning_rate=self.lr) - - def cosine_decay_RMSProp(self): - """cosine decay with RMSProp optimizer - - Returns: - an cosine_decay_RMSProp optimizer - """ - - learning_rate = fluid.layers.cosine_decay( - learning_rate=self.lr, - step_each_epoch=self.step, - epochs=self.num_epochs) - optimizer = fluid.optimizer.RMSProp( + def piecewise_decay(args): + bd = [step * e for e in args.step_epochs] + lr = [args.lr * (0.1**i) for i in range(len(bd) + 1)] + learning_rate = paddle.optimizer.lr.PiecewiseDecay( + boundaries=bd, values=lr, verbose=False) + optimizer = paddle.optimizer.Momentum( learning_rate=learning_rate, - momentum=self.momentum_rate, - regularization=fluid.regularizer.L2Decay(self.l2_decay), - # Apply epsilon=1 on ImageNet dataset. - epsilon=1) - return optimizer - - def default_decay(self): - """default decay - - Returns: - default decay optimizer - """ - - optimizer = fluid.optimizer.Momentum( - learning_rate=self.lr, - momentum=self.momentum_rate, - regularization=fluid.regularizer.L2Decay(self.l2_decay)) + momentum=args.momentum_rate, + weight_decay=paddle.regularizer.L2Decay(args.l2_decay)) return optimizer -- GitLab