未验证 提交 f28bf198 编写于 作者: C ceci3 提交者: GitHub

migrate nas demo to paddle 2.0 (#588)

* update
上级 63db48b4
......@@ -6,8 +6,11 @@ import ast
import logging
import time
import paddle
import paddle.fluid as fluid
from paddle.fluid.param_attr import ParamAttr
import paddle.nn as nn
import paddle.nn.functional as F
import paddle.vision.transforms as T
import paddle.static as static
from paddle import ParamAttr
from paddleslim.analysis import flops
from paddleslim.nas import SANAS
from paddleslim.common import get_logger
......@@ -17,18 +20,6 @@ import imagenet_reader
_logger = get_logger(__name__, level=logging.INFO)
def create_data_loader(image_shape):
data_shape = [None] + image_shape
data = fluid.data(name='data', shape=data_shape, dtype='float32')
label = fluid.data(name='label', shape=[None, 1], dtype='int64')
data_loader = fluid.io.DataLoader.from_generator(
feed_list=[data, label],
capacity=1024,
use_double_buffer=True,
iterable=True)
return data_loader, data, label
def conv_bn_layer(input,
filter_size,
num_filters,
......@@ -38,7 +29,7 @@ def conv_bn_layer(input,
act=None,
name=None,
use_cudnn=True):
conv = fluid.layers.conv2d(
conv = static.nn.conv2d(
input,
num_filters=num_filters,
filter_size=filter_size,
......@@ -50,7 +41,7 @@ def conv_bn_layer(input,
param_attr=ParamAttr(name=name + '_weights'),
bias_attr=False)
bn_name = name + '_bn'
return fluid.layers.batch_norm(
return static.nn.batch_norm(
input=conv,
act=act,
param_attr=ParamAttr(name=bn_name + '_scale'),
......@@ -61,6 +52,19 @@ def conv_bn_layer(input,
def search_mobilenetv2_block(config, args, image_size):
image_shape = [3, image_size, image_size]
transform = T.Compose([T.Transpose(), T.Normalize([127.5], [127.5])])
if args.data == 'cifar10':
train_dataset = paddle.vision.datasets.Cifar10(
mode='train', transform=transform, backend='cv2')
val_dataset = paddle.vision.datasets.Cifar10(
mode='test', transform=transform, backend='cv2')
elif args.data == 'imagenet':
train_dataset = imagenet_reader.ImageNetDataset(mode='train')
val_dataset = imagenet_reader.ImageNetDataset(mode='val')
places = static.cuda_places() if args.use_gpu else static.cpu_places()
place = places[0]
if args.is_server:
sa_nas = SANAS(
config,
......@@ -77,11 +81,33 @@ def search_mobilenetv2_block(config, args, image_size):
for step in range(args.search_steps):
archs = sa_nas.next_archs()[0]
train_program = fluid.Program()
test_program = fluid.Program()
startup_program = fluid.Program()
with fluid.program_guard(train_program, startup_program):
train_loader, data, label = create_data_loader(image_shape)
train_program = static.Program()
test_program = static.Program()
startup_program = static.Program()
with static.program_guard(train_program, startup_program):
data_shape = [None] + image_shape
data = static.data(name='data', shape=data_shape, dtype='float32')
label = static.data(name='label', shape=[None, 1], dtype='int64')
if args.data == 'cifar10':
paddle.assign(paddle.reshape(label, [-1, 1]), label)
train_loader = paddle.io.DataLoader(
train_dataset,
places=places,
feed_list=[data, label],
drop_last=True,
batch_size=args.batch_size,
return_list=False,
shuffle=True,
use_shared_memory=True,
num_workers=4)
val_loader = paddle.io.DataLoader(
val_dataset,
places=place,
feed_list=[data, label],
drop_last=False,
batch_size=args.batch_size,
return_list=False,
shuffle=False)
data = conv_bn_layer(
input=data,
num_filters=32,
......@@ -99,32 +125,27 @@ def search_mobilenetv2_block(config, args, image_size):
padding='SAME',
act='relu6',
name='mobilenetv2_last_conv')
data = fluid.layers.pool2d(
input=data,
pool_size=7,
pool_stride=1,
pool_type='avg',
global_pooling=True,
name='mobilenetv2_last_pool')
output = fluid.layers.fc(
input=data,
data = F.adaptive_avg_pool2d(
data, output_size=[1, 1], name='mobilenetv2_last_pool')
output = static.nn.fc(
x=data,
size=args.class_dim,
param_attr=ParamAttr(name='mobilenetv2_fc_weights'),
weight_attr=ParamAttr(name='mobilenetv2_fc_weights'),
bias_attr=ParamAttr(name='mobilenetv2_fc_offset'))
softmax_out = fluid.layers.softmax(input=output, use_cudnn=False)
cost = fluid.layers.cross_entropy(input=softmax_out, label=label)
avg_cost = fluid.layers.mean(cost)
acc_top1 = fluid.layers.accuracy(
softmax_out = F.softmax(output)
cost = F.cross_entropy(softmax_out, label=label)
avg_cost = paddle.mean(cost)
acc_top1 = paddle.metric.accuracy(
input=softmax_out, label=label, k=1)
acc_top5 = fluid.layers.accuracy(
acc_top5 = paddle.metric.accuracy(
input=softmax_out, label=label, k=5)
test_program = train_program.clone(for_test=True)
optimizer = fluid.optimizer.Momentum(
optimizer = paddle.optimizer.Momentum(
learning_rate=0.1,
momentum=0.9,
regularization=fluid.regularizer.L2Decay(1e-4))
weight_decay=paddle.regularizer.L2Decay(1e-4))
optimizer.minimize(avg_cost)
current_flops = flops(train_program)
......@@ -132,39 +153,11 @@ def search_mobilenetv2_block(config, args, image_size):
if current_flops > int(321208544):
continue
place = fluid.CUDAPlace(0) if args.use_gpu else fluid.CPUPlace()
exe = fluid.Executor(place)
exe = static.Executor(place)
exe.run(startup_program)
if args.data == 'cifar10':
train_reader = paddle.fluid.io.batch(
paddle.reader.shuffle(
paddle.dataset.cifar.train10(cycle=False), buf_size=1024),
batch_size=args.batch_size,
drop_last=True)
test_reader = paddle.fluid.io.batch(
paddle.dataset.cifar.test10(cycle=False),
batch_size=args.batch_size,
drop_last=False)
elif args.data == 'imagenet':
train_reader = paddle.fluid.io.batch(
imagenet_reader.train(),
batch_size=args.batch_size,
drop_last=True)
test_reader = paddle.fluid.io.batch(
imagenet_reader.val(),
batch_size=args.batch_size,
drop_last=False)
test_loader, _, _ = create_data_loader(image_shape)
train_loader.set_sample_list_generator(
train_reader,
places=fluid.cuda_places() if args.use_gpu else fluid.cpu_places())
test_loader.set_sample_list_generator(test_reader, places=place)
build_strategy = fluid.BuildStrategy()
train_compiled_program = fluid.CompiledProgram(
build_strategy = static.BuildStrategy()
train_compiled_program = static.CompiledProgram(
train_program).with_data_parallel(
loss_name=avg_cost.name, build_strategy=build_strategy)
for epoch_id in range(args.retain_epoch):
......@@ -181,7 +174,7 @@ def search_mobilenetv2_block(config, args, image_size):
format(step, epoch_id, batch_id, outs[0], batch_time))
reward = []
for batch_id, data in enumerate(test_loader()):
for batch_id, data in enumerate(val_loader()):
test_fetches = [avg_cost.name, acc_top1.name, acc_top5.name]
batch_reward = exe.run(test_program,
feed=data,
......
{
"cells": [
{
"cell_type": "markdown",
"metadata": {},
"source": [
"# 图像分类网络结构搜索-快速开始\n",
"\n",
"该教程以图像分类模型MobileNetV2为例,说明如何在cifar10数据集上快速使用[网络结构搜索接口](../api/nas_api.md)。\n",
"该示例包含以下步骤:\n",
"\n",
"1. 导入依赖\n",
"2. 初始化SANAS搜索实例\n",
"3. 构建网络\n",
"4. 启动搜索实验\n",
"\n",
"以下章节依次介绍每个步骤的内容。"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"## 1. 导入依赖\n",
"请确认已正确安装Paddle,导入需要的依赖包。"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"import paddle\n",
"import paddle.fluid as fluid\n",
"import paddleslim as slim\n",
"import numpy as np"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"## 2. 初始化SANAS搜索实例"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"sanas = slim.nas.SANAS(configs=[('MobileNetV2Space')], server_addr=(\"\", 8337))"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"## 3. 构建网络\n",
"根据传入的网络结构构造训练program和测试program。"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"def build_program(archs):\n",
" train_program = fluid.Program()\n",
" startup_program = fluid.Program()\n",
" with fluid.program_guard(train_program, startup_program):\n",
" data = fluid.data(name='data', shape=[None, 3, 32, 32], dtype='float32')\n",
" label = fluid.data(name='label', shape=[None, 1], dtype='int64')\n",
" output = archs(data)\n",
" output = fluid.layers.fc(input=output, size=10)\n",
"\n",
" softmax_out = fluid.layers.softmax(input=output, use_cudnn=False)\n",
" cost = fluid.layers.cross_entropy(input=softmax_out, label=label)\n",
" avg_cost = fluid.layers.mean(cost)\n",
" acc_top1 = fluid.layers.accuracy(input=softmax_out, label=label, k=1)\n",
" acc_top5 = fluid.layers.accuracy(input=softmax_out, label=label, k=5)\n",
" test_program = fluid.default_main_program().clone(for_test=True)\n",
" \n",
" optimizer = fluid.optimizer.Adam(learning_rate=0.1)\n",
" optimizer.minimize(avg_cost)\n",
"\n",
" place = fluid.CPUPlace()\n",
" exe = fluid.Executor(place)\n",
" exe.run(startup_program)\n",
" return exe, train_program, test_program, (data, label), avg_cost, acc_top1, acc_top5"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"## 4. 启动搜索实验\n",
"获取每一轮的模型结构并开始训练。该教程中使用FLOPs作为约束条件,搜索实验一共搜索3个step,表示搜索到3个满足条件的模型结构进行训练,每搜索到一个网络结构训练7个epoch。"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"for step in range(3):\n",
" archs = sanas.next_archs()[0]\n",
" exe, train_program, test_progarm, inputs, avg_cost, acc_top1, acc_top5 = build_program(archs)\n",
"\n",
" current_flops = slim.analysis.flops(train_program)\n",
" if current_flops > 321208544:\n",
" continue\n",
" \n",
" train_reader = paddle.fluid.io.batch(paddle.reader.shuffle(paddle.dataset.cifar.train10(cycle=False), buf_size=1024),batch_size=256)\n",
" train_feeder = fluid.DataFeeder(inputs, fluid.CPUPlace())\n",
" test_reader = paddle.fluid.io.batch(paddle.dataset.cifar.test10(cycle=False),\n",
" batch_size=256)\n",
" test_feeder = fluid.DataFeeder(inputs, fluid.CPUPlace())\n",
"\n",
" outputs = [avg_cost.name, acc_top1.name, acc_top5.name]\n",
" for epoch in range(7):\n",
" for data in train_reader():\n",
" loss, acc1, acc5 = exe.run(train_program, feed=train_feeder.feed(data), fetch_list = outputs)\n",
" print(\"TRAIN: loss: {}, acc1: {}, acc5:{}\".format(loss, acc1, acc5))\n",
"\n",
" reward = []\n",
" for data in test_reader():\n",
" batch_reward = exe.run(test_program, feed=test_feeder.feed(data), fetch_list = outputs)\n",
" reward_avg = np.mean(np.array(batch_reward), axis=1)\n",
" reward.append(reward_avg)\n",
" print(\"TEST: loss: {}, acc1: {}, acc5:{}\".format(batch_reward[0], batch_reward[1], batch_reward[2]))\n",
" finally_reward = np.mean(np.array(reward), axis=0)\n",
" print(\"FINAL TEST: avg_cost: {}, acc1: {}, acc5: {}\".format(finally_reward[0], finally_reward[1], finally_reward[2]))\n",
"\n",
" sanas.reward(float(finally_reward[1]))"
]
}
],
"metadata": {
"kernelspec": {
"display_name": "Python 2",
"language": "python",
"name": "python2"
},
"language_info": {
"codemirror_mode": {
"name": "ipython",
"version": 2
},
"file_extension": ".py",
"mimetype": "text/x-python",
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython2",
"version": "2.7.12"
}
},
"nbformat": 4,
"nbformat_minor": 2
}
......@@ -8,8 +8,10 @@ import argparse
import ast
import logging
import paddle
import paddle.fluid as fluid
from paddle.fluid.param_attr import ParamAttr
import paddle.nn as nn
import paddle.static as static
import paddle.nn.functional as F
import paddle.vision.transforms as T
from paddleslim.nas import RLNAS
from paddleslim.common import get_logger
from optimizer import create_optimizer
......@@ -18,36 +20,50 @@ import imagenet_reader
_logger = get_logger(__name__, level=logging.INFO)
def create_data_loader(image_shape):
data_shape = [None] + image_shape
data = fluid.data(name='data', shape=data_shape, dtype='float32')
label = fluid.data(name='label', shape=[None, 1], dtype='int64')
data_loader = fluid.io.DataLoader.from_generator(
feed_list=[data, label],
capacity=1024,
use_double_buffer=True,
iterable=True)
return data_loader, data, label
def build_program(main_program,
startup_program,
image_shape,
dataset,
archs,
args,
places,
is_test=False):
with fluid.program_guard(main_program, startup_program):
with fluid.unique_name.guard():
data_loader, data, label = create_data_loader(image_shape)
with static.program_guard(main_program, startup_program):
with paddle.utils.unique_name.guard():
data_shape = [None] + image_shape
data = static.data(name='data', shape=data_shape, dtype='float32')
label = static.data(name='label', shape=[None, 1], dtype='int64')
if args.data == 'cifar10':
paddle.assign(paddle.reshape(label, [-1, 1]), label)
if is_test:
data_loader = paddle.io.DataLoader(
dataset,
places=places,
feed_list=[data, label],
drop_last=False,
batch_size=args.batch_size,
return_list=False,
shuffle=False)
else:
data_loader = paddle.io.DataLoader(
dataset,
places=places,
feed_list=[data, label],
drop_last=True,
batch_size=args.batch_size,
return_list=False,
shuffle=True,
use_shared_memory=True,
num_workers=4)
output = archs(data)
output = fluid.layers.fc(input=output, size=args.class_dim)
output = static.nn.fc(output, size=args.class_dim)
softmax_out = fluid.layers.softmax(input=output, use_cudnn=False)
cost = fluid.layers.cross_entropy(input=softmax_out, label=label)
avg_cost = fluid.layers.mean(cost)
acc_top1 = fluid.layers.accuracy(
softmax_out = F.softmax(output)
cost = F.cross_entropy(softmax_out, label=label)
avg_cost = paddle.mean(cost)
acc_top1 = paddle.metric.accuracy(
input=softmax_out, label=label, k=1)
acc_top5 = fluid.layers.accuracy(
acc_top5 = paddle.metric.accuracy(
input=softmax_out, label=label, k=5)
if is_test == False:
......@@ -57,6 +73,8 @@ def build_program(main_program,
def search_mobilenetv2(config, args, image_size, is_server=True):
places = static.cuda_places() if args.use_gpu else static.cpu_places()
place = places[0]
if is_server:
### start a server and a client
rl_nas = RLNAS(
......@@ -76,6 +94,17 @@ def search_mobilenetv2(config, args, image_size, is_server=True):
is_server=False)
image_shape = [3, image_size, image_size]
if args.data == 'cifar10':
transform = T.Compose([T.Transpose(), T.Normalize([127.5], [127.5])])
train_dataset = paddle.vision.datasets.Cifar10(
mode='train', transform=transform, backend='cv2')
val_dataset = paddle.vision.datasets.Cifar10(
mode='test', transform=transform, backend='cv2')
elif args.data == 'imagenet':
train_dataset = imagenet_reader.ImageNetDataset(mode='train')
val_dataset = imagenet_reader.ImageNetDataset(mode='val')
for step in range(args.search_steps):
if step == 0:
action_prev = [1. for _ in rl_nas.range_tables]
......@@ -85,53 +114,29 @@ def search_mobilenetv2(config, args, image_size, is_server=True):
obs.extend(action_prev)
archs = rl_nas.next_archs(obs=obs)[0][0]
train_program = fluid.Program()
test_program = fluid.Program()
startup_program = fluid.Program()
train_program = static.Program()
test_program = static.Program()
startup_program = static.Program()
train_loader, avg_cost, acc_top1, acc_top5 = build_program(
train_program, startup_program, image_shape, archs, args)
train_program, startup_program, image_shape, train_dataset, archs,
args, places)
test_loader, test_avg_cost, test_acc_top1, test_acc_top5 = build_program(
test_program,
startup_program,
image_shape,
val_dataset,
archs,
args,
place,
is_test=True)
test_program = test_program.clone(for_test=True)
place = fluid.CUDAPlace(0) if args.use_gpu else fluid.CPUPlace()
exe = fluid.Executor(place)
exe = static.Executor(place)
exe.run(startup_program)
if args.data == 'cifar10':
train_reader = paddle.fluid.io.batch(
paddle.reader.shuffle(
paddle.dataset.cifar.train10(cycle=False), buf_size=1024),
batch_size=args.batch_size,
drop_last=True)
test_reader = paddle.fluid.io.batch(
paddle.dataset.cifar.test10(cycle=False),
batch_size=args.batch_size,
drop_last=False)
elif args.data == 'imagenet':
train_reader = paddle.fluid.io.batch(
imagenet_reader.train(),
batch_size=args.batch_size,
drop_last=True)
test_reader = paddle.fluid.io.batch(
imagenet_reader.val(),
batch_size=args.batch_size,
drop_last=False)
train_loader.set_sample_list_generator(
train_reader,
places=fluid.cuda_places() if args.use_gpu else fluid.cpu_places())
test_loader.set_sample_list_generator(test_reader, places=place)
build_strategy = fluid.BuildStrategy()
train_compiled_program = fluid.CompiledProgram(
build_strategy = static.BuildStrategy()
train_compiled_program = static.CompiledProgram(
train_program).with_data_parallel(
loss_name=avg_cost.name, build_strategy=build_strategy)
for epoch_id in range(args.retain_epoch):
......
......@@ -8,8 +8,10 @@ import argparse
import ast
import logging
import paddle
import paddle.fluid as fluid
from paddle.fluid.param_attr import ParamAttr
import paddle.nn as nn
import paddle.static as static
import paddle.nn.functional as F
import paddle.vision.transforms as T
from paddleslim.nas import RLNAS
from paddleslim.common import get_logger
from optimizer import create_optimizer
......@@ -18,36 +20,50 @@ import imagenet_reader
_logger = get_logger(__name__, level=logging.INFO)
def create_data_loader(image_shape):
data_shape = [None] + image_shape
data = fluid.data(name='data', shape=data_shape, dtype='float32')
label = fluid.data(name='label', shape=[None, 1], dtype='int64')
data_loader = fluid.io.DataLoader.from_generator(
feed_list=[data, label],
capacity=1024,
use_double_buffer=True,
iterable=True)
return data_loader, data, label
def build_program(main_program,
startup_program,
image_shape,
dataset,
archs,
args,
places,
is_test=False):
with fluid.program_guard(main_program, startup_program):
with fluid.unique_name.guard():
data_loader, data, label = create_data_loader(image_shape)
with static.program_guard(main_program, startup_program):
with paddle.utils.unique_name.guard():
data_shape = [None] + image_shape
data = static.data(name='data', shape=data_shape, dtype='float32')
label = static.data(name='label', shape=[None, 1], dtype='int64')
if args.data == 'cifar10':
paddle.assign(paddle.reshape(label, [-1, 1]), label)
if is_test:
data_loader = paddle.io.DataLoader(
dataset,
places=places,
feed_list=[data, label],
drop_last=False,
batch_size=args.batch_size,
return_list=False,
shuffle=False)
else:
data_loader = paddle.io.DataLoader(
dataset,
places=places,
feed_list=[data, label],
drop_last=True,
batch_size=args.batch_size,
return_list=False,
shuffle=True,
use_shared_memory=True,
num_workers=4)
output = archs(data)
output = fluid.layers.fc(input=output, size=args.class_dim)
output = static.nn.fc(output, size=args.class_dim)
softmax_out = fluid.layers.softmax(input=output, use_cudnn=False)
cost = fluid.layers.cross_entropy(input=softmax_out, label=label)
avg_cost = fluid.layers.mean(cost)
acc_top1 = fluid.layers.accuracy(
softmax_out = F.softmax(output)
cost = F.cross_entropy(softmax_out, label=label)
avg_cost = paddle.mean(cost)
acc_top1 = paddle.metric.accuracy(
input=softmax_out, label=label, k=1)
acc_top5 = fluid.layers.accuracy(
acc_top5 = paddle.metric.accuracy(
input=softmax_out, label=label, k=5)
if is_test == False:
......@@ -57,6 +73,8 @@ def build_program(main_program,
def search_mobilenetv2(config, args, image_size, is_server=True):
places = static.cuda_places() if args.use_gpu else static.cpu_places()
place = places[0]
if is_server:
### start a server and a client
rl_nas = RLNAS(
......@@ -86,56 +104,43 @@ def search_mobilenetv2(config, args, image_size, is_server=True):
is_server=False)
image_shape = [3, image_size, image_size]
if args.data == 'cifar10':
transform = T.Compose([T.Transpose(), T.Normalize([127.5], [127.5])])
train_dataset = paddle.vision.datasets.Cifar10(
mode='train', transform=transform, backend='cv2')
val_dataset = paddle.vision.datasets.Cifar10(
mode='test', transform=transform, backend='cv2')
elif args.data == 'imagenet':
train_dataset = imagenet_reader.ImageNetDataset(mode='train')
val_dataset = imagenet_reader.ImageNetDataset(mode='val')
for step in range(args.search_steps):
archs = rl_nas.next_archs(1)[0][0]
train_program = fluid.Program()
test_program = fluid.Program()
startup_program = fluid.Program()
train_program = static.Program()
test_program = static.Program()
startup_program = static.Program()
train_loader, avg_cost, acc_top1, acc_top5 = build_program(
train_program, startup_program, image_shape, archs, args)
train_program, startup_program, image_shape, train_dataset, archs,
args, places)
test_loader, test_avg_cost, test_acc_top1, test_acc_top5 = build_program(
test_program,
startup_program,
image_shape,
val_dataset,
archs,
args,
place,
is_test=True)
test_program = test_program.clone(for_test=True)
place = fluid.CUDAPlace(0) if args.use_gpu else fluid.CPUPlace()
exe = fluid.Executor(place)
exe = static.Executor(place)
exe.run(startup_program)
if args.data == 'cifar10':
train_reader = paddle.fluid.io.batch(
paddle.reader.shuffle(
paddle.dataset.cifar.train10(cycle=False), buf_size=1024),
batch_size=args.batch_size,
drop_last=True)
test_reader = paddle.fluid.io.batch(
paddle.dataset.cifar.test10(cycle=False),
batch_size=args.batch_size,
drop_last=False)
elif args.data == 'imagenet':
train_reader = paddle.fluid.io.batch(
imagenet_reader.train(),
batch_size=args.batch_size,
drop_last=True)
test_reader = paddle.fluid.io.batch(
imagenet_reader.val(),
batch_size=args.batch_size,
drop_last=False)
train_loader.set_sample_list_generator(
train_reader,
places=fluid.cuda_places() if args.use_gpu else fluid.cpu_places())
test_loader.set_sample_list_generator(test_reader, places=place)
build_strategy = fluid.BuildStrategy()
train_compiled_program = fluid.CompiledProgram(
build_strategy = static.BuildStrategy()
train_compiled_program = static.CompiledProgram(
train_program).with_data_parallel(
loss_name=avg_cost.name, build_strategy=build_strategy)
for epoch_id in range(args.retain_epoch):
......
......@@ -8,8 +8,11 @@ import argparse
import ast
import logging
import paddle
import paddle.fluid as fluid
from paddle.fluid.param_attr import ParamAttr
import paddle.nn as nn
import paddle.static as static
import paddle.nn.functional as F
import paddle.vision.transforms as T
from paddle import ParamAttr
from paddleslim.analysis import flops
from paddleslim.nas import SANAS
from paddleslim.common import get_logger
......@@ -19,36 +22,50 @@ import imagenet_reader
_logger = get_logger(__name__, level=logging.INFO)
def create_data_loader(image_shape):
data_shape = [None] + image_shape
data = fluid.data(name='data', shape=data_shape, dtype='float32')
label = fluid.data(name='label', shape=[None, 1], dtype='int64')
data_loader = fluid.io.DataLoader.from_generator(
feed_list=[data, label],
capacity=1024,
use_double_buffer=True,
iterable=True)
return data_loader, data, label
def build_program(main_program,
startup_program,
image_shape,
dataset,
archs,
args,
places,
is_test=False):
with fluid.program_guard(main_program, startup_program):
with fluid.unique_name.guard():
data_loader, data, label = create_data_loader(image_shape)
with static.program_guard(main_program, startup_program):
with paddle.utils.unique_name.guard():
data_shape = [None] + image_shape
data = static.data(name='data', shape=data_shape, dtype='float32')
label = static.data(name='label', shape=[None, 1], dtype='int64')
if args.data == 'cifar10':
paddle.assign(paddle.reshape(label, [-1, 1]), label)
if is_test:
data_loader = paddle.io.DataLoader(
dataset,
places=places,
feed_list=[data, label],
drop_last=False,
batch_size=args.batch_size,
return_list=False,
shuffle=False)
else:
data_loader = paddle.io.DataLoader(
dataset,
places=places,
feed_list=[data, label],
drop_last=True,
batch_size=args.batch_size,
return_list=False,
shuffle=True,
use_shared_memory=True,
num_workers=4)
output = archs(data)
output = fluid.layers.fc(input=output, size=args.class_dim)
output = static.nn.fc(x=output, size=args.class_dim)
softmax_out = fluid.layers.softmax(input=output, use_cudnn=False)
cost = fluid.layers.cross_entropy(input=softmax_out, label=label)
avg_cost = fluid.layers.mean(cost)
acc_top1 = fluid.layers.accuracy(
softmax_out = F.softmax(output)
cost = F.cross_entropy(softmax_out, label=label)
avg_cost = paddle.mean(cost)
acc_top1 = paddle.metric.accuracy(
input=softmax_out, label=label, k=1)
acc_top5 = fluid.layers.accuracy(
acc_top5 = paddle.metric.accuracy(
input=softmax_out, label=label, k=5)
if is_test == False:
......@@ -58,6 +75,20 @@ def build_program(main_program,
def search_mobilenetv2(config, args, image_size, is_server=True):
image_shape = [3, image_size, image_size]
if args.data == 'cifar10':
transform = T.Compose([T.Transpose(), T.Normalize([127.5], [127.5])])
train_dataset = paddle.vision.datasets.Cifar10(
mode='train', transform=transform, backend='cv2')
val_dataset = paddle.vision.datasets.Cifar10(
mode='test', transform=transform, backend='cv2')
elif args.data == 'imagenet':
train_dataset = imagenet_reader.ImageNetDataset(mode='train')
val_dataset = imagenet_reader.ImageNetDataset(mode='val')
places = static.cuda_places() if args.use_gpu else static.cpu_places()
place = places[0]
if is_server:
### start a server and a client
sa_nas = SANAS(
......@@ -73,15 +104,15 @@ def search_mobilenetv2(config, args, image_size, is_server=True):
search_steps=args.search_steps,
is_server=False)
image_shape = [3, image_size, image_size]
for step in range(args.search_steps):
archs = sa_nas.next_archs()[0]
train_program = fluid.Program()
test_program = fluid.Program()
startup_program = fluid.Program()
train_program = static.Program()
test_program = static.Program()
startup_program = static.Program()
train_loader, avg_cost, acc_top1, acc_top5 = build_program(
train_program, startup_program, image_shape, archs, args)
train_program, startup_program, image_shape, train_dataset, archs,
args, places)
current_flops = flops(train_program)
print('step: {}, current_flops: {}'.format(step, current_flops))
......@@ -92,43 +123,18 @@ def search_mobilenetv2(config, args, image_size, is_server=True):
test_program,
startup_program,
image_shape,
val_dataset,
archs,
args,
place,
is_test=True)
test_program = test_program.clone(for_test=True)
place = fluid.CUDAPlace(0) if args.use_gpu else fluid.CPUPlace()
exe = fluid.Executor(place)
exe = static.Executor(place)
exe.run(startup_program)
if args.data == 'cifar10':
train_reader = paddle.fluid.io.batch(
paddle.reader.shuffle(
paddle.dataset.cifar.train10(cycle=False), buf_size=1024),
batch_size=args.batch_size,
drop_last=True)
test_reader = paddle.fluid.io.batch(
paddle.dataset.cifar.test10(cycle=False),
batch_size=args.batch_size,
drop_last=False)
elif args.data == 'imagenet':
train_reader = paddle.fluid.io.batch(
imagenet_reader.train(),
batch_size=args.batch_size,
drop_last=True)
test_reader = paddle.fluid.io.batch(
imagenet_reader.val(),
batch_size=args.batch_size,
drop_last=False)
train_loader.set_sample_list_generator(
train_reader,
places=fluid.cuda_places() if args.use_gpu else fluid.cpu_places())
test_loader.set_sample_list_generator(test_reader, places=place)
build_strategy = fluid.BuildStrategy()
train_compiled_program = fluid.CompiledProgram(
build_strategy = static.BuildStrategy()
train_compiled_program = static.CompiledProgram(
train_program).with_data_parallel(
loss_name=avg_cost.name, build_strategy=build_strategy)
for epoch_id in range(args.retain_epoch):
......@@ -169,6 +175,9 @@ def search_mobilenetv2(config, args, image_size, is_server=True):
def test_search_result(tokens, image_size, args, config):
places = static.cuda_places() if args.use_gpu else static.cpu_places()
place = places[0]
sa_nas = SANAS(
config,
server_addr=(args.server_address, args.port),
......@@ -176,50 +185,45 @@ def test_search_result(tokens, image_size, args, config):
is_server=True)
image_shape = [3, image_size, image_size]
if args.data == 'cifar10':
transform = T.Compose([T.Transpose(), T.Normalize([127.5], [127.5])])
train_dataset = paddle.vision.datasets.Cifar10(
mode='train', transform=transform, backend='cv2')
val_dataset = paddle.vision.datasets.Cifar10(
mode='test', transform=transform, backend='cv2')
elif args.data == 'imagenet':
train_dataset = imagenet_reader.ImageNetDataset(mode='train')
val_dataset = imagenet_reader.ImageNetDataset(mode='val')
archs = sa_nas.tokens2arch(tokens)[0]
train_program = fluid.Program()
test_program = fluid.Program()
startup_program = fluid.Program()
train_program = static.Program()
test_program = static.Program()
startup_program = static.Program()
train_loader, avg_cost, acc_top1, acc_top5 = build_program(
train_program, startup_program, image_shape, archs, args)
train_program, startup_program, image_shape, train_dataset, archs, args,
places)
current_flops = flops(train_program)
print('current_flops: {}'.format(current_flops))
test_loader, test_avg_cost, test_acc_top1, test_acc_top5 = build_program(
test_program, startup_program, image_shape, archs, args, is_test=True)
test_program,
startup_program,
image_shape,
val_dataset,
archs,
args,
place,
is_test=True)
test_program = test_program.clone(for_test=True)
place = fluid.CUDAPlace(0) if args.use_gpu else fluid.CPUPlace()
exe = fluid.Executor(place)
exe = static.Executor(place)
exe.run(startup_program)
if args.data == 'cifar10':
train_reader = paddle.fluid.io.batch(
paddle.reader.shuffle(
paddle.dataset.cifar.train10(cycle=False), buf_size=1024),
batch_size=args.batch_size,
drop_last=True)
test_reader = paddle.fluid.io.batch(
paddle.dataset.cifar.test10(cycle=False),
batch_size=args.batch_size,
drop_last=False)
elif args.data == 'imagenet':
train_reader = paddle.fluid.io.batch(
imagenet_reader.train(), batch_size=args.batch_size, drop_last=True)
test_reader = paddle.fluid.io.batch(
imagenet_reader.val(), batch_size=args.batch_size, drop_last=False)
train_loader.set_sample_list_generator(
train_reader,
places=fluid.cuda_places() if args.use_gpu else fluid.cpu_places())
test_loader.set_sample_list_generator(test_reader, places=place)
build_strategy = fluid.BuildStrategy()
train_compiled_program = fluid.CompiledProgram(
build_strategy = static.BuildStrategy()
train_compiled_program = static.CompiledProgram(
train_program).with_data_parallel(
loss_name=avg_cost.name, build_strategy=build_strategy)
for epoch_id in range(args.retain_epoch):
......
......@@ -8,7 +8,10 @@ import time
import argparse
import ast
import logging
import paddle.fluid as fluid
import paddle
import paddle.nn.functional as F
import paddle.nn as nn
import paddle.static as static
from paddleslim.nas import SANAS
from paddleslim.common import get_logger
import darts_cifar10_reader as reader
......@@ -49,10 +52,10 @@ def count_parameters_in_MB(all_params, prefix='model'):
def create_data_loader(image_shape, is_train, args):
image = fluid.data(
image = static.data(
name="image", shape=[None] + image_shape, dtype="float32")
label = fluid.data(name="label", shape=[None, 1], dtype="int64")
data_loader = fluid.io.DataLoader.from_generator(
label = static.data(name="label", shape=[None, 1], dtype="int64")
data_loader = paddle.io.DataLoader.from_generator(
feed_list=[image, label],
capacity=64,
use_double_buffer=True,
......@@ -60,9 +63,9 @@ def create_data_loader(image_shape, is_train, args):
drop_path_prob = ''
drop_path_mask = ''
if is_train:
drop_path_prob = fluid.data(
drop_path_prob = static.data(
name="drop_path_prob", shape=[args.batch_size, 1], dtype="float32")
drop_path_mask = fluid.data(
drop_path_mask = static.data(
name="drop_path_mask",
shape=[args.batch_size, 20, 4, 2],
dtype="float32")
......@@ -72,36 +75,33 @@ def create_data_loader(image_shape, is_train, args):
def build_program(main_program, startup_program, image_shape, archs, args,
is_train):
with fluid.program_guard(main_program, startup_program):
with static.program_guard(main_program, startup_program):
data_loader, data, label, drop_path_prob, drop_path_mask = create_data_loader(
image_shape, is_train, args)
logits, logits_aux = archs(data, drop_path_prob, drop_path_mask,
is_train, 10)
top1 = fluid.layers.accuracy(input=logits, label=label, k=1)
top5 = fluid.layers.accuracy(input=logits, label=label, k=5)
loss = fluid.layers.reduce_mean(
fluid.layers.softmax_with_cross_entropy(logits, label))
top1 = paddle.metric.accuracy(input=logits, label=label, k=1)
top5 = paddle.metric.accuracy(input=logits, label=label, k=5)
loss = paddle.mean(F.softmax_with_cross_entropy(logits, label))
if is_train:
if auxiliary:
loss_aux = fluid.layers.reduce_mean(
fluid.layers.softmax_with_cross_entropy(logits_aux, label))
loss_aux = paddle.mean(
F.softmax_with_cross_entropy(logits_aux, label))
loss = loss + auxiliary_weight * loss_aux
step_per_epoch = int(trainset_num / args.batch_size)
learning_rate = fluid.layers.cosine_decay(lr, step_per_epoch,
args.retain_epoch)
fluid.clip.set_gradient_clip(
clip=fluid.clip.GradientClipByGlobalNorm(clip_norm=5.0))
optimizer = fluid.optimizer.MomentumOptimizer(
learning_rate = paddle.optimizer.lr.CosineAnnealingDecay(
lr, T_max=step_per_epoch * args.retain_epoch)
optimizer = paddle.optimizer.Momentum(
learning_rate,
momentum,
regularization=fluid.regularizer.L2DecayRegularizer(
weight_decay))
weight_decay=paddle.regularizer.L2Decay(weight_decay),
grad_clip=nn.ClipGradByGlobalNorm(clip_norm=5.0))
optimizer.minimize(loss)
outs = [loss, top1, top5, learning_rate]
outs = [loss, top1, top5]
else:
outs = [loss, top1, top5]
return outs, data_loader
return outs, (data, label), data_loader
def train(main_prog, exe, epoch_id, train_loader, fetch_list, args):
......@@ -129,16 +129,16 @@ def train(main_prog, exe, epoch_id, train_loader, fetch_list, args):
})
else:
feed = data
loss_v, top1_v, top5_v, lr = exe.run(
loss_v, top1_v, top5_v = exe.run(
main_prog, feed=feed, fetch_list=[v.name for v in fetch_list])
loss.update(loss_v, args.batch_size)
top1.update(top1_v, args.batch_size)
top5.update(top5_v, args.batch_size)
if step_id % 10 == 0:
_logger.info(
"Train Epoch {}, Step {}, Lr {:.8f}, loss {:.6f}, acc_1 {:.6f}, acc_5 {:.6f}".
format(epoch_id, step_id, lr[0], loss.avg[0], top1.avg[0],
top5.avg[0]))
"Train Epoch {}, Step {}, loss {:.6f}, acc_1 {:.6f}, acc_5 {:.6f}".
format(epoch_id, step_id, loss.avg[0], top1.avg[0], top5.avg[
0]))
return top1.avg[0]
......@@ -161,6 +161,8 @@ def valid(main_prog, exe, epoch_id, valid_loader, fetch_list, args):
def search(config, args, image_size, is_server=True):
places = static.cuda_places() if args.use_gpu else static.cpu_places()
place = places[0]
if is_server:
### start a server and a client
sa_nas = SANAS(
......@@ -180,10 +182,10 @@ def search(config, args, image_size, is_server=True):
for step in range(args.search_steps):
archs = sa_nas.next_archs()[0]
train_program = fluid.Program()
test_program = fluid.Program()
startup_program = fluid.Program()
train_fetch_list, train_loader = build_program(
train_program = static.Program()
test_program = static.Program()
startup_program = static.Program()
train_fetch_list, _, train_loader = build_program(
train_program,
startup_program,
image_shape,
......@@ -198,7 +200,7 @@ def search(config, args, image_size, is_server=True):
if current_params > float(3.77):
continue
test_fetch_list, test_loader = build_program(
test_fetch_list, _, test_loader = build_program(
test_program,
startup_program,
image_shape,
......@@ -207,8 +209,7 @@ def search(config, args, image_size, is_server=True):
is_train=False)
test_program = test_program.clone(for_test=True)
place = fluid.CUDAPlace(0) if args.use_gpu else fluid.CPUPlace()
exe = fluid.Executor(place)
exe = static.Executor(place)
exe.run(startup_program)
train_reader = reader.train_valid(
......@@ -219,8 +220,8 @@ def search(config, args, image_size, is_server=True):
train_loader.set_batch_generator(train_reader, places=place)
test_loader.set_batch_generator(test_reader, places=place)
build_strategy = fluid.BuildStrategy()
train_compiled_program = fluid.CompiledProgram(
build_strategy = static.BuildStrategy()
train_compiled_program = static.CompiledProgram(
train_program).with_data_parallel(
loss_name=train_fetch_list[0].name,
build_strategy=build_strategy)
......@@ -241,52 +242,40 @@ def search(config, args, image_size, is_server=True):
def final_test(config, args, image_size, token=None):
assert token != None, "If you want to start a final experiment, you must input a token."
places = static.cuda_places() if args.use_gpu else static.cpu_places()
place = places[0]
sa_nas = SANAS(
config, server_addr=(args.server_address, args.port), is_server=True)
image_shape = [3, image_size, image_size]
archs = sa_nas.tokens2arch(token)[0]
train_program = fluid.Program()
test_program = fluid.Program()
startup_program = fluid.Program()
train_fetch_list, train_loader = build_program(
train_program,
startup_program,
image_shape,
archs,
args,
is_train=True)
train_program = static.Program()
test_program = static.Program()
startup_program = static.Program()
train_fetch_list, (data, label), train_loader = build_program(
train_program, startup_program, image_shape, archs, args, is_train=True)
current_params = count_parameters_in_MB(
train_program.global_block().all_parameters(), 'cifar10')
_logger.info('current_params: {}M'.format(current_params))
test_fetch_list, test_loader = build_program(
test_program,
startup_program,
image_shape,
archs,
args,
is_train=False)
test_fetch_list, _, test_loader = build_program(
test_program, startup_program, image_shape, archs, args, is_train=False)
test_program = test_program.clone(for_test=True)
place = fluid.CUDAPlace(0) if args.use_gpu else fluid.CPUPlace()
exe = fluid.Executor(place)
exe = static.Executor(place)
exe.run(startup_program)
train_reader = reader.train_valid(
batch_size=args.batch_size, is_train=True, is_shuffle=True, args=args)
batch_size=args.batch_size, is_train=True, is_shuffle=True)
test_reader = reader.train_valid(
batch_size=args.batch_size,
is_train=False,
is_shuffle=False,
args=args)
batch_size=args.batch_size, is_train=False, is_shuffle=False)
train_loader.set_batch_generator(train_reader, places=place)
test_loader.set_batch_generator(test_reader, places=place)
build_strategy = fluid.BuildStrategy()
train_compiled_program = fluid.CompiledProgram(
build_strategy = static.BuildStrategy()
train_compiled_program = static.CompiledProgram(
train_program).with_data_parallel(
loss_name=train_fetch_list[0].name, build_strategy=build_strategy)
......@@ -305,11 +294,12 @@ def final_test(config, args, image_size, token=None):
output_dir = os.path.join('darts_output', str(epoch_id))
if not os.path.exists(output_dir):
os.makedirs(output_dir)
fluid.io.save_persistables(exe, output_dir, main_program=train_program)
static.save_inference_model(output_dir, [data], test_fetch_list, exe)
if __name__ == '__main__':
paddle.enable_static()
parser = argparse.ArgumentParser(
description='SA NAS MobileNetV2 cifar10 argparase')
parser.add_argument(
......
......@@ -18,9 +18,7 @@ from __future__ import print_function
import math
import paddle.fluid as fluid
import paddle.fluid.layers.ops as ops
from paddle.fluid.layers.learning_rate_scheduler import _decay_step_counter
import paddle
lr_strategy = 'cosine_decay'
l2_decay = 1e-4
......@@ -33,111 +31,6 @@ decay_rate = 0.97
total_images = 1281167
def cosine_decay(learning_rate, step_each_epoch, epochs=120):
"""Applies cosine decay to the learning rate.
lr = 0.05 * (math.cos(epoch * (math.pi / 120)) + 1)
"""
global_step = _decay_step_counter()
epoch = ops.floor(global_step / step_each_epoch)
decayed_lr = learning_rate * \
(ops.cos(epoch * (math.pi / epochs)) + 1)/2
return decayed_lr
def cosine_decay_with_warmup(learning_rate, step_each_epoch, epochs=120):
"""Applies cosine decay to the learning rate.
lr = 0.05 * (math.cos(epoch * (math.pi / 120)) + 1)
decrease lr for every mini-batch and start with warmup.
"""
global_step = _decay_step_counter()
lr = fluid.layers.tensor.create_global_var(
shape=[1],
value=0.0,
dtype='float32',
persistable=True,
name="learning_rate")
warmup_epoch = fluid.layers.fill_constant(
shape=[1], dtype='float32', value=float(5), force_cpu=True)
epoch = ops.floor(global_step / step_each_epoch)
with fluid.layers.control_flow.Switch() as switch:
with switch.case(epoch < warmup_epoch):
decayed_lr = learning_rate * (global_step /
(step_each_epoch * warmup_epoch))
fluid.layers.tensor.assign(input=decayed_lr, output=lr)
with switch.default():
decayed_lr = learning_rate * \
(ops.cos((global_step - warmup_epoch * step_each_epoch) * (math.pi / (epochs * step_each_epoch))) + 1)/2
fluid.layers.tensor.assign(input=decayed_lr, output=lr)
return lr
def exponential_decay_with_warmup(learning_rate,
step_each_epoch,
decay_epochs,
decay_rate=0.97,
warm_up_epoch=5.0):
"""Applies exponential decay to the learning rate.
"""
global_step = _decay_step_counter()
lr = fluid.layers.tensor.create_global_var(
shape=[1],
value=0.0,
dtype='float32',
persistable=True,
name="learning_rate")
warmup_epoch = fluid.layers.fill_constant(
shape=[1], dtype='float32', value=float(warm_up_epoch), force_cpu=True)
epoch = ops.floor(global_step / step_each_epoch)
with fluid.layers.control_flow.Switch() as switch:
with switch.case(epoch < warmup_epoch):
decayed_lr = learning_rate * (global_step /
(step_each_epoch * warmup_epoch))
fluid.layers.assign(input=decayed_lr, output=lr)
with switch.default():
div_res = (global_step - warmup_epoch * step_each_epoch
) / decay_epochs
div_res = ops.floor(div_res)
decayed_lr = learning_rate * (decay_rate**div_res)
fluid.layers.assign(input=decayed_lr, output=lr)
return lr
def lr_warmup(learning_rate, warmup_steps, start_lr, end_lr):
""" Applies linear learning rate warmup for distributed training
Argument learning_rate can be float or a Variable
lr = lr + (warmup_rate * step / warmup_steps)
"""
assert (isinstance(end_lr, float))
assert (isinstance(start_lr, float))
linear_step = end_lr - start_lr
with fluid.default_main_program()._lr_schedule_guard():
lr = fluid.layers.tensor.create_global_var(
shape=[1],
value=0.0,
dtype='float32',
persistable=True,
name="learning_rate_warmup")
global_step = fluid.layers.learning_rate_scheduler._decay_step_counter(
)
with fluid.layers.control_flow.Switch() as switch:
with switch.case(global_step < warmup_steps):
decayed_lr = start_lr + linear_step * (global_step /
warmup_steps)
fluid.layers.tensor.assign(decayed_lr, lr)
with switch.default():
fluid.layers.tensor.assign(learning_rate, lr)
return lr
class Optimizer(object):
"""A class used to represent several optimizer methods
......@@ -167,23 +60,13 @@ class Optimizer(object):
self.decay_epochs = decay_epochs
self.decay_rate = decay_rate
self.total_images = total_images
if args.use_gpu:
devices_num = paddle.fluid.core.get_cuda_device_count()
else:
devices_num = int(os.environ.get('CPU_NUM', 1))
self.step = int(math.ceil(float(self.total_images) / self.batch_size))
def piecewise_decay(self):
"""piecewise decay with Momentum optimizer
Returns:
a piecewise_decay optimizer
"""
bd = [self.step * e for e in self.step_epochs]
lr = [self.lr * (0.1**i) for i in range(len(bd) + 1)]
learning_rate = fluid.layers.piecewise_decay(boundaries=bd, values=lr)
optimizer = fluid.optimizer.Momentum(
learning_rate=learning_rate,
momentum=self.momentum_rate,
regularization=fluid.regularizer.L2Decay(self.l2_decay))
return optimizer
self.step = int(
math.ceil(float(self.total_images) / self.batch_size) / devices_num)
def cosine_decay(self):
"""cosine decay with Momentum optimizer
......@@ -191,111 +74,25 @@ class Optimizer(object):
Returns:
a cosine_decay optimizer
"""
learning_rate = fluid.layers.cosine_decay(
learning_rate=self.lr,
step_each_epoch=self.step,
epochs=self.num_epochs)
optimizer = fluid.optimizer.Momentum(
learning_rate=learning_rate,
momentum=self.momentum_rate,
regularization=fluid.regularizer.L2Decay(self.l2_decay))
return optimizer
def cosine_decay_warmup(self):
"""cosine decay with warmup
Returns:
a cosine_decay_with_warmup optimizer
"""
learning_rate = cosine_decay_with_warmup(
learning_rate=self.lr,
step_each_epoch=self.step,
epochs=self.num_epochs)
optimizer = fluid.optimizer.Momentum(
learning_rate=learning_rate,
momentum=self.momentum_rate,
regularization=fluid.regularizer.L2Decay(self.l2_decay))
return optimizer
def exponential_decay_warmup(self):
"""exponential decay with warmup
Returns:
a exponential_decay_with_warmup optimizer
"""
learning_rate = exponential_decay_with_warmup(
learning_rate = paddle.optimizer.lr.CosineAnnealingDecay(
learning_rate=self.lr,
step_each_epoch=self.step,
decay_epochs=self.step * self.decay_epochs,
decay_rate=self.decay_rate,
warm_up_epoch=self.warm_up_epochs)
optimizer = fluid.optimizer.RMSProp(
learning_rate=learning_rate,
regularization=fluid.regularizer.L2Decay(self.l2_decay),
momentum=self.momentum_rate,
rho=0.9,
epsilon=0.001)
return optimizer
def linear_decay(self):
"""linear decay with Momentum optimizer
Returns:
a linear_decay optimizer
"""
end_lr = 0
learning_rate = fluid.layers.polynomial_decay(
self.lr, self.step, end_lr, power=1)
optimizer = fluid.optimizer.Momentum(
T_max=self.step * self.num_epochs,
verbose=False)
optimizer = paddle.optimizer.Momentum(
learning_rate=learning_rate,
momentum=self.momentum_rate,
regularization=fluid.regularizer.L2Decay(self.l2_decay))
weight_decay=paddle.regularizer.L2Decay(self.l2_decay))
return optimizer
def adam_decay(self):
"""Adam optimizer
Returns:
an adam_decay optimizer
"""
return fluid.optimizer.Adam(learning_rate=self.lr)
def cosine_decay_RMSProp(self):
"""cosine decay with RMSProp optimizer
Returns:
an cosine_decay_RMSProp optimizer
"""
learning_rate = fluid.layers.cosine_decay(
learning_rate=self.lr,
step_each_epoch=self.step,
epochs=self.num_epochs)
optimizer = fluid.optimizer.RMSProp(
def piecewise_decay(args):
bd = [step * e for e in args.step_epochs]
lr = [args.lr * (0.1**i) for i in range(len(bd) + 1)]
learning_rate = paddle.optimizer.lr.PiecewiseDecay(
boundaries=bd, values=lr, verbose=False)
optimizer = paddle.optimizer.Momentum(
learning_rate=learning_rate,
momentum=self.momentum_rate,
regularization=fluid.regularizer.L2Decay(self.l2_decay),
# Apply epsilon=1 on ImageNet dataset.
epsilon=1)
return optimizer
def default_decay(self):
"""default decay
Returns:
default decay optimizer
"""
optimizer = fluid.optimizer.Momentum(
learning_rate=self.lr,
momentum=self.momentum_rate,
regularization=fluid.regularizer.L2Decay(self.l2_decay))
momentum=args.momentum_rate,
weight_decay=paddle.regularizer.L2Decay(args.l2_decay))
return optimizer
......
......@@ -74,8 +74,8 @@ def build_program(archs):
import paddle.vision.transforms as T
def input_data(image, label):
transform = T.Compose([T.Normalize([127.5], [127.5])])
train_dataset = paddle.vision.datasets.Cifar10(mode="train", transform=transform)
transform = T.Compose([T.Transpose(), T.Normalize([127.5], [127.5])])
train_dataset = paddle.vision.datasets.Cifar10(mode="train", transform=transform, backend='cv2')
train_loader = paddle.io.DataLoader(train_dataset,
places=paddle.CPUPlace(),
feed_list=[image, label],
......@@ -83,7 +83,7 @@ def input_data(image, label):
batch_size=64,
return_list=False,
shuffle=True)
eval_dataset = paddle.vision.datasets.Cifar10(mode="test", transform=transform)
eval_dataset = paddle.vision.datasets.Cifar10(mode="test", transform=transform, backend='cv2')
eval_loader = paddle.io.DataLoader(eval_dataset,
places=paddle.CPUPlace(),
feed_list=[image, label],
......
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册