未验证 提交 60544b52 编写于 作者: C ceci3 提交者: GitHub

fix something wrong about rlnas (#269) (#280)

* fix

* fix

* update

* update
上级 e6b788a3
# 网络结构搜索示例
# SANAS网络结构搜索示例
本示例介绍如何使用网络结构搜索接口,搜索到一个更小或者精度更高的模型,该文档仅介绍paddleslim中SANAS的使用及如何利用SANAS得到模型结构,完整示例代码请参考sa_nas_mobilenetv2.py或者block_sa_nas_mobilenetv2.py。
本示例介绍如何使用网络结构搜索接口,搜索到一个更小或者精度更高的模型,该示例介绍paddleslim中SANAS的使用及如何利用SANAS得到模型结构,完整示例代码请参考sa_nas_mobilenetv2.py或者block_sa_nas_mobilenetv2.py。
## 数据准备
本示例默认使用cifar10数据,cifar10数据会根据调用的paddle接口自动下载,无需额外准备。
......@@ -8,7 +8,7 @@
## 接口介绍
请参考<a href='../../docs/zh_cn/api_cn/nas_api.rst'>神经网络搜索API文档</a>
本示例为在MobileNetV2的搜索空间上搜索FLOPs更小的模型。
本示例为利用SANAS在MobileNetV2的搜索空间上搜索FLOPs更小的模型。
## 1 搜索空间配置
默认搜索空间为`MobileNetV2`,详细的搜索空间配置请参考<a href='../../docs/zh_cn/api_cn/search_space.md'>搜索空间配置文档</a>
......@@ -24,3 +24,29 @@ CUDA_VISIBLE_DEVICES=0 python sa_nas_mobilenetv2.py
```shell
CUDA_VISIBLE_DEVICES=0 python block_sa_nas_mobilenetv2.py
```
# RLNAS网络结构搜索示例
本示例介绍如何使用RLNAS接口进行网络结构搜索,该示例介绍paddleslim中RLNAS的使用,完整示例代码请参考rl_nas_mobilenetv2.py或者parl_nas_mobilenetv2.py。
## 数据准备
本示例默认使用cifar10数据,cifar10数据会根据调用的paddle接口自动下载,无需额外准备。
## 接口介绍
请参考<a href='../../docs/zh_cn/api_cn/nas_api.rst'>神经网络搜索API文档</a>
示例为利用SANAS在MobileNetV2的搜索空间上搜索精度更高的模型。
## 1 搜索空间配置
默认搜索空间为`MobileNetV2`,详细的搜索空间配置请参考<a href='../../docs/zh_cn/api_cn/search_space.md'>搜索空间配置文档</a>
## 2 启动训练
### 2.1 启动基于MobileNetV2初始模型结构构造搜索空间,强化学习算法为lstm的搜索实验
```shell
CUDA_VISIBLE_DEVICES=0 python rl_nas_mobilenetv2.py
```
### 2.2 启动基于MobileNetV2初始模型结构构造搜索空间,强化学习算法为ddpg的搜索实验
```shell
CUDA_VISIBLE_DEVICES=0 python parl_nas_mobilenetv2.py
```
import sys
sys.path.append('..')
import numpy as np
import argparse
import ast
import time
import argparse
import ast
import logging
import paddle
import paddle.fluid as fluid
from paddle.fluid.param_attr import ParamAttr
from paddleslim.nas import RLNAS
from paddleslim.common import get_logger
from optimizer import create_optimizer
import imagenet_reader
_logger = get_logger(__name__, level=logging.INFO)
def create_data_loader(image_shape):
data_shape = [None] + image_shape
data = fluid.data(name='data', shape=data_shape, dtype='float32')
label = fluid.data(name='label', shape=[None, 1], dtype='int64')
data_loader = fluid.io.DataLoader.from_generator(
feed_list=[data, label],
capacity=1024,
use_double_buffer=True,
iterable=True)
return data_loader, data, label
def build_program(main_program,
startup_program,
image_shape,
archs,
args,
is_test=False):
with fluid.program_guard(main_program, startup_program):
with fluid.unique_name.guard():
data_loader, data, label = create_data_loader(image_shape)
output = archs(data)
output = fluid.layers.fc(input=output, size=args.class_dim)
softmax_out = fluid.layers.softmax(input=output, use_cudnn=False)
cost = fluid.layers.cross_entropy(input=softmax_out, label=label)
avg_cost = fluid.layers.mean(cost)
acc_top1 = fluid.layers.accuracy(
input=softmax_out, label=label, k=1)
acc_top5 = fluid.layers.accuracy(
input=softmax_out, label=label, k=5)
if is_test == False:
optimizer = create_optimizer(args)
optimizer.minimize(avg_cost)
return data_loader, avg_cost, acc_top1, acc_top5
def search_mobilenetv2(config, args, image_size, is_server=True):
if is_server:
### start a server and a client
rl_nas = RLNAS(
key='ddpg',
configs=config,
is_sync=False,
obs_dim=26, ### step + length_of_token
server_addr=(args.server_address, args.port))
else:
### start a client
rl_nas = RLNAS(
key='ddpg',
configs=config,
is_sync=False,
obs_dim=26,
server_addr=(args.server_address, args.port),
is_server=False)
image_shape = [3, image_size, image_size]
for step in range(args.search_steps):
if step == 0:
action_prev = [1. for _ in rl_nas.range_tables]
else:
action_prev = rl_nas.tokens[0]
obs = [step]
obs.extend(action_prev)
archs = rl_nas.next_archs(obs=obs)[0][0]
train_program = fluid.Program()
test_program = fluid.Program()
startup_program = fluid.Program()
train_loader, avg_cost, acc_top1, acc_top5 = build_program(
train_program, startup_program, image_shape, archs, args)
test_loader, test_avg_cost, test_acc_top1, test_acc_top5 = build_program(
test_program,
startup_program,
image_shape,
archs,
args,
is_test=True)
test_program = test_program.clone(for_test=True)
place = fluid.CUDAPlace(0) if args.use_gpu else fluid.CPUPlace()
exe = fluid.Executor(place)
exe.run(startup_program)
if args.data == 'cifar10':
train_reader = paddle.batch(
paddle.reader.shuffle(
paddle.dataset.cifar.train10(cycle=False), buf_size=1024),
batch_size=args.batch_size,
drop_last=True)
test_reader = paddle.batch(
paddle.dataset.cifar.test10(cycle=False),
batch_size=args.batch_size,
drop_last=False)
elif args.data == 'imagenet':
train_reader = paddle.batch(
imagenet_reader.train(),
batch_size=args.batch_size,
drop_last=True)
test_reader = paddle.batch(
imagenet_reader.val(),
batch_size=args.batch_size,
drop_last=False)
train_loader.set_sample_list_generator(
train_reader,
places=fluid.cuda_places() if args.use_gpu else fluid.cpu_places())
test_loader.set_sample_list_generator(test_reader, places=place)
build_strategy = fluid.BuildStrategy()
train_compiled_program = fluid.CompiledProgram(
train_program).with_data_parallel(
loss_name=avg_cost.name, build_strategy=build_strategy)
for epoch_id in range(args.retain_epoch):
for batch_id, data in enumerate(train_loader()):
fetches = [avg_cost.name]
s_time = time.time()
outs = exe.run(train_compiled_program,
feed=data,
fetch_list=fetches)[0]
batch_time = time.time() - s_time
if batch_id % 10 == 0:
_logger.info(
'TRAIN: steps: {}, epoch: {}, batch: {}, cost: {}, batch_time: {}ms'.
format(step, epoch_id, batch_id, outs[0], batch_time))
reward = []
for batch_id, data in enumerate(test_loader()):
test_fetches = [
test_avg_cost.name, test_acc_top1.name, test_acc_top5.name
]
batch_reward = exe.run(test_program,
feed=data,
fetch_list=test_fetches)
reward_avg = np.mean(np.array(batch_reward), axis=1)
reward.append(reward_avg)
_logger.info(
'TEST: step: {}, batch: {}, avg_cost: {}, acc_top1: {}, acc_top5: {}'.
format(step, batch_id, batch_reward[0], batch_reward[1],
batch_reward[2]))
finally_reward = np.mean(np.array(reward), axis=0)
_logger.info(
'FINAL TEST: avg_cost: {}, acc_top1: {}, acc_top5: {}'.format(
finally_reward[0], finally_reward[1], finally_reward[2]))
obs = np.expand_dims(obs, axis=0).astype('float32')
actions = rl_nas.tokens
obs_next = [step + 1]
obs_next.extend(actions[0])
obs_next = np.expand_dims(obs_next, axis=0).astype('float32')
if step == args.search_steps - 1:
terminal = np.expand_dims([True], axis=0).astype(np.bool)
else:
terminal = np.expand_dims([False], axis=0).astype(np.bool)
rl_nas.reward(
np.expand_dims(
np.float32(finally_reward[1]), axis=0),
obs=obs,
actions=actions.astype('float32'),
obs_next=obs_next,
terminal=terminal)
if step == 2:
sys.exit(0)
if __name__ == '__main__':
parser = argparse.ArgumentParser(
description='RL NAS MobileNetV2 cifar10 argparase')
parser.add_argument(
'--use_gpu',
type=ast.literal_eval,
default=True,
help='Whether to use GPU in train/test model.')
parser.add_argument(
'--batch_size', type=int, default=256, help='batch size.')
parser.add_argument(
'--class_dim', type=int, default=10, help='classify number.')
parser.add_argument(
'--data',
type=str,
default='cifar10',
choices=['cifar10', 'imagenet'],
help='server address.')
parser.add_argument(
'--is_server',
type=ast.literal_eval,
default=True,
help='Whether to start a server.')
parser.add_argument(
'--search_steps',
type=int,
default=100,
help='controller server number.')
parser.add_argument(
'--server_address', type=str, default="", help='server ip.')
parser.add_argument('--port', type=int, default=8881, help='server port')
parser.add_argument(
'--retain_epoch', type=int, default=5, help='epoch for each token.')
parser.add_argument('--lr', type=float, default=0.1, help='learning rate.')
args = parser.parse_args()
print(args)
if args.data == 'cifar10':
image_size = 32
block_num = 3
elif args.data == 'imagenet':
image_size = 224
block_num = 6
else:
raise NotImplementedError(
'data must in [cifar10, imagenet], but received: {}'.format(
args.data))
config = [('MobileNetV2Space')]
search_mobilenetv2(config, args, image_size, is_server=args.is_server)
......@@ -9,12 +9,12 @@
4. 定义输入数据函数
5. 定义训练函数
6. 定义评估函数
7. 启动搜索实验
7.1 获取模型结构
7.2 构造program
7.3 定义输入数据
7.4 训练模型
7.5 评估模型
7. 启动搜索实验
7.1 获取模型结构
7.2 构造program
7.3 定义输入数据
7.4 训练模型
7.5 评估模型
7.6 回传当前模型的得分
8. 完整示例
......@@ -53,7 +53,7 @@ def build_program(archs):
acc_top1 = fluid.layers.accuracy(input=softmax_out, label=label, k=1)
acc_top5 = fluid.layers.accuracy(input=softmax_out, label=label, k=5)
test_program = fluid.default_main_program().clone(for_test=True)
optimizer = fluid.optimizer.Adam(learning_rate=0.1)
optimizer.minimize(avg_cost)
......@@ -77,7 +77,7 @@ def input_data(inputs):
## 5. 定义训练函数
根据训练program和训练数据进行训练。
```python
def start_train(program, data_reader, data_feeder):
def start_train(program, data_reader, data_feeder):
outputs = [avg_cost.name, acc_top1.name, acc_top5.name]
for data in data_reader():
batch_reward = exe.run(program, feed=data_feeder.feed(data), fetch_list = outputs)
......@@ -146,7 +146,7 @@ for step in range(3):
current_flops = slim.analysis.flops(train_program)
if current_flops > 321208544:
continue
for epoch in range(7):
start_train(train_program, train_reader, train_feeder)
......
......@@ -15,13 +15,13 @@
6. 定义造program的函数
7. 定义训练函数
8. 定义预测函数
9. 启动搜索
9.1 获取下一个模型结构
9.2 构造相应的训练和预测program
9.3 添加搜索限制
9.4 定义环境
9.5 定义输入数据
9.6 启动训练和评估
9. 启动搜索
9.1 获取下一个模型结构
9.2 构造相应的训练和预测program
9.3 添加搜索限制
9.4 定义环境
9.5 定义输入数据
9.6 启动训练和评估
9.7 回传当前模型的得分reward
10. 利用demo下的脚本启动搜索
11. 利用demo下的脚本启动最终实验
......@@ -33,7 +33,7 @@
按照通道数来区分DARTS_model中block的话,则DARTS_model中共有3个block,第一个block仅包含6个normal cell,之后的两个block每个block都包含和一个reduction cell和6个normal cell,共有20个cell。在构造搜索空间的时候我们定义每个cell中的所有卷积操作都使用相同的通道数,共有20位token。
完整的搜索空间可以参考[基于DARTS_model的搜索空间](../../../paddleslim/nas/search_space/darts_space.py)
完整的搜索空间可以参考[基于DARTS_model的搜索空间](https://github.com/PaddlePaddle/PaddleSlim/blob/develop/paddleslim/nas/search_space/darts_space.py)
### 2. 引入依赖包并定义全局变量
```python
......@@ -232,9 +232,9 @@ exe.run(startup_program)
```
#### 9.5 定义输入数据
由于本示例中对cifar10中的图片进行了一些额外的预处理操作,和[快速开始](../quick_start/nas_tutorial.md)示例中的reader不同,所以需要自定义cifar10的reader,不能直接调用paddle中封装好的`paddle.dataset.cifar10`的reader。自定义cifar10的reader文件位于[demo/nas](../../../demo/nas/darts_cifar10_reader.py)中。
由于本示例中对cifar10中的图片进行了一些额外的预处理操作,和[快速开始](https://paddlepaddle.github.io/PaddleSlim/quick_start/nas_tutorial.html)示例中的reader不同,所以需要自定义cifar10的reader,不能直接调用paddle中封装好的`paddle.dataset.cifar10`的reader。自定义cifar10的reader文件位于[demo/nas](https://github.com/PaddlePaddle/PaddleSlim/blob/develop/demo/nas/darts_cifar10_reader.py)中。
**注意:**本示例为了简化代码直接调用`paddle.dataset.cifar10`定义训练数据和预测数据,实际训练需要使用自定义cifar10的reader。
**注意:**本示例为了简化代码直接调用`paddle.dataset.cifar10`定义训练数据和预测数据,实际训练需要使用自定义cifar10文件中的reader。
```python
train_reader = paddle.batch(paddle.reader.shuffle(paddle.dataset.cifar.train10(cycle=False), buf_size=1024), batch_size=BATCH_SIZE, drop_last=True)
test_reader = paddle.batch(paddle.dataset.cifar.test10(cycle=False), batch_size=BATCH_SIZE, drop_last=False)
......@@ -261,14 +261,14 @@ sa_nas.reward(float(valid_top1_list[-1] + valid_top1_list[-2]) / 2)
### 10. 利用demo下的脚本启动搜索
搜索文件位于: [darts_sanas_demo](https://github.com/PaddlePaddle/PaddleSlim/tree/develop/demo/nas/sanas_darts_nas.py),搜索过程中限制模型参数量为不大于3.77M。
搜索文件位于: [darts_sanas_demo](https://github.com/PaddlePaddle/PaddleSlim/blob/develop/demo/nas/sanas_darts_space.py),搜索过程中限制模型参数量为不大于3.77M。
```python
cd demo/nas/
python darts_nas.py
```
### 11. 利用demo下的脚本启动最终实验
最终实验文件位于: [darts_sanas_demo](https://github.com/PaddlePaddle/PaddleSlim/tree/develop/demo/nas/sanas_darts_nas.py),最终实验需要训练600epoch。以下示例输入token为`[5, 5, 0, 5, 5, 10, 7, 7, 5, 7, 7, 11, 10, 12, 10, 0, 5, 3, 10, 8]`
最终实验文件位于: [darts_sanas_demo](https://github.com/PaddlePaddle/PaddleSlim/blob/develop/demo/nas/sanas_darts_space.py),最终实验需要训练600epoch。以下示例输入token为`[5, 5, 0, 5, 5, 10, 7, 7, 5, 7, 7, 11, 10, 12, 10, 0, 5, 3, 10, 8]`
```python
cd demo/nas/
python darts_nas.py --token 5 5 0 5 5 10 7 7 5 7 7 11 10 12 10 0 5 3 10 8 --retain_epoch 600
......
......@@ -11,7 +11,7 @@
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
from .controller import EvolutionaryController
from .controller import EvolutionaryController, RLBaseController
from .sa_controller import SAController
from .log_helper import get_logger
from .controller_server import ControllerServer
......
......@@ -99,7 +99,7 @@ class Client(object):
assert self._params_dict != None, "Please call next_token to get token first, then call update"
current_params_dict = self._controller.update(
rewards, self._params_dict, **kwargs)
params_grad = compute_grad(self._params_dict, current_params_dict)
params_grad = compute_grad(current_params_dict, self._params_dict)
_logger.debug("Client: update weight {}".format(self._client_name))
self._client_socket.send_multipart([
pickle.dumps(ConnectMessage.UPDATE_WEIGHT),
......
# Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserve.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
"""Base environment used in reinforcement learning"""
import numpy as np
__all__ = ['BaseEnv']
class BaseEnv:
def reset(self):
raise NotImplementedError('Abstract method.')
def step(self):
raise NotImplementedError('Abstract method.')
def _build_state_embedding(self):
raise NotImplementedError('Abstract method.')
......@@ -41,24 +41,24 @@ class DDPGAgent(parl.Agent):
self.learn_program = fluid.Program()
with fluid.program_guard(self.pred_program):
obs = layers.data(
name='obs', shape=[self.obs_dim], dtype='float32')
obs = fluid.data(
name='obs', shape=[None, self.obs_dim], dtype='float32')
self.pred_act = self.alg.predict(obs)
with fluid.program_guard(self.learn_program):
obs = layers.data(
name='obs', shape=[self.obs_dim], dtype='float32')
act = layers.data(
name='act', shape=[self.act_dim], dtype='float32')
reward = layers.data(name='reward', shape=[], dtype='float32')
next_obs = layers.data(
name='next_obs', shape=[self.obs_dim], dtype='float32')
terminal = layers.data(name='terminal', shape=[], dtype='bool')
obs = fluid.data(
name='obs', shape=[None, self.obs_dim], dtype='float32')
act = fluid.data(
name='act', shape=[None, self.act_dim], dtype='float32')
reward = fluid.data(name='reward', shape=[None], dtype='float32')
next_obs = fluid.data(
name='next_obs', shape=[None, self.obs_dim], dtype='float32')
terminal = fluid.data(
name='terminal', shape=[None, 1], dtype='bool')
_, self.critic_cost = self.alg.learn(obs, act, reward, next_obs,
terminal)
def predict(self, obs):
obs = np.expand_dims(obs, axis=0)
act = self.fluid_executor.run(self.pred_program,
feed={'obs': obs},
fetch_list=[self.pred_act])[0]
......
......@@ -62,6 +62,7 @@ class LSTM(RLBaseController):
self.lstm_num_layers = kwargs.get('lstm_num_layers') or 1
self.hidden_size = kwargs.get('hidden_size') or 100
self.temperature = kwargs.get('temperature') or None
self.controller_lr = kwargs.get('controller_lr') or 1e-4
self.tanh_constant = kwargs.get('tanh_constant') or None
self.decay = kwargs.get('decay') or 0.99
self.weight_entropy = kwargs.get('weight_entropy') or None
......@@ -91,12 +92,6 @@ class LSTM(RLBaseController):
return logits, output, new_states
def _create_parameter(self):
self.emb_w = fluid.layers.create_parameter(
name='emb_w',
shape=(self.max_range_table, self.hidden_size),
dtype='float32',
default_initializer=uniform_initializer(1.0))
self.g_emb = fluid.layers.create_parameter(
name='emb_g',
shape=(self.controller_batch_size, self.hidden_size),
......@@ -133,11 +128,16 @@ class LSTM(RLBaseController):
axes=[1],
starts=[idx],
ends=[idx + 1])
action = fluid.layers.squeeze(action, axes=[1])
action.stop_gradient = True
else:
action = fluid.layers.sampling_id(probs)
actions.append(action)
log_prob = fluid.layers.cross_entropy(probs, action)
log_prob = fluid.layers.softmax_with_cross_entropy(
logits,
fluid.layers.reshape(
action, shape=[fluid.layers.shape(action), 1]),
axis=1)
sample_log_probs.append(log_prob)
entropy = log_prob * fluid.layers.exp(-1 * log_prob)
......@@ -145,10 +145,14 @@ class LSTM(RLBaseController):
entropies.append(entropy)
action_emb = fluid.layers.cast(action, dtype=np.int64)
inputs = fluid.layers.gather(self.emb_w, action_emb)
inputs = fluid.embedding(
action_emb,
size=(self.max_range_table, self.hidden_size),
param_attr=fluid.ParamAttr(
name='emb_w', initializer=uniform_initializer(1.0)))
sample_log_probs = fluid.layers.stack(sample_log_probs)
self.sample_log_probs = fluid.layers.reduce_sum(sample_log_probs)
self.sample_log_probs = fluid.layers.concat(
sample_log_probs, axis=0)
entropies = fluid.layers.stack(entropies)
self.sample_entropies = fluid.layers.reduce_sum(entropies)
......@@ -196,7 +200,9 @@ class LSTM(RLBaseController):
self.loss = self.sample_log_probs * (self.rewards - self.baseline)
fluid.clip.set_gradient_clip(
clip=fluid.clip.GradientClipByGlobalNorm(clip_norm=5.0))
optimizer = fluid.optimizer.Adam(learning_rate=1e-3)
lr = fluid.layers.exponential_decay(
self.controller_lr, decay_steps=1000, decay_rate=0.8)
optimizer = fluid.optimizer.Adam(learning_rate=lr)
optimizer.minimize(self.loss)
def _create_input(self, is_test=True, actual_rewards=None):
......
......@@ -161,6 +161,8 @@ class Server(object):
if len(self._client) == len(
self._client_dict.items()):
self._done = True
self._params_dict = sum_params_dict
del sum_params_dict
self._server_socket.send_multipart([
pickle.dumps(ConnectMessage.WAIT),
......
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册