Skip to content
体验新版
项目
组织
正在加载...
登录
切换导航
打开侧边栏
PaddlePaddle
PaddleSlim
提交
823ca6bb
P
PaddleSlim
项目概览
PaddlePaddle
/
PaddleSlim
1 年多 前同步成功
通知
51
Star
1434
Fork
344
代码
文件
提交
分支
Tags
贡献者
分支图
Diff
Issue
53
列表
看板
标记
里程碑
合并请求
16
Wiki
0
Wiki
分析
仓库
DevOps
项目成员
Pages
P
PaddleSlim
项目概览
项目概览
详情
发布
仓库
仓库
文件
提交
分支
标签
贡献者
分支图
比较
Issue
53
Issue
53
列表
看板
标记
里程碑
合并请求
16
合并请求
16
Pages
分析
分析
仓库分析
DevOps
Wiki
0
Wiki
成员
成员
收起侧边栏
关闭侧边栏
动态
分支图
创建新Issue
提交
Issue看板
未验证
提交
823ca6bb
编写于
4月 22, 2020
作者:
B
Bai Yifan
提交者:
GitHub
4月 22, 2020
浏览文件
操作
浏览文件
下载
电子邮件补丁
差异文件
Fix grad_clip in DARTS, grad_clip has been upgraded in Paddle2.0 (#229)
上级
388211f3
变更
6
隐藏空白更改
内联
并排
Showing
6 changed file
with
36 addition
and
37 deletion
+36
-37
demo/darts/README.md
demo/darts/README.md
+5
-5
demo/darts/model.py
demo/darts/model.py
+1
-0
demo/darts/search.py
demo/darts/search.py
+1
-1
demo/darts/train.py
demo/darts/train.py
+13
-15
demo/darts/train_imagenet.py
demo/darts/train_imagenet.py
+11
-13
paddleslim/nas/darts/train_search.py
paddleslim/nas/darts/train_search.py
+5
-3
未找到文件。
demo/darts/README.md
浏览文件 @
823ca6bb
...
...
@@ -29,15 +29,15 @@ python search.py --method='PC-DARTS' --batch_size=256 --learning_rate=0.1 --arch
图1: 在CIFAR10数据集上进行搜索的模型结构变化,上半部分为reduction cell,下半部分为normal cell
</p>
使用三种搜索方法得到的结构Genotype已添加到了genotypes.py文件中,
`DARTS_V1`
、
`DARTS_V2`
和
`PC
-
DARTS`
分别代表使用DARTS一阶、二阶近似方法和PC-DARTS搜索方法得到的网络结构。
使用三种搜索方法得到的结构Genotype已添加到了genotypes.py文件中,
`DARTS_V1`
、
`DARTS_V2`
和
`PC
_
DARTS`
分别代表使用DARTS一阶、二阶近似方法和PC-DARTS搜索方法得到的网络结构。
## 网络结构评估训练
在得到搜索结构Genotype之后,可以对其进行评估训练,从而获得它在特定数据集上的真实性能
```
bash
python train.py
--arch
=
'PC
-
DARTS'
# 在CIFAR10数据集上对搜索到的结构评估训练
python train_imagenet.py
--arch
=
'PC
-
DARTS'
# 在ImageNet数据集上对搜索得到的结构评估训练
python train.py
--arch
=
'PC
_
DARTS'
# 在CIFAR10数据集上对搜索到的结构评估训练
python train_imagenet.py
--arch
=
'PC
_
DARTS'
# 在ImageNet数据集上对搜索得到的结构评估训练
```
对搜索到的
`DARTS_V1`
、
`DARTS_V2`
和
`PC-DARTS`
做评估训练的结果如下:
...
...
@@ -83,7 +83,7 @@ def train_search(batch_size, train_portion, is_shuffle, args):
使用以下命令对搜索得到的Genotype结构进行可视化观察
```
python
python
visualize
.
py
PC
-
DARTS
python
visualize
.
py
PC
_
DARTS
```
`PC
-
DARTS`
代表某个Genotype结构,需要预先添加到genotype.py中
`PC
_
DARTS`
代表某个Genotype结构,需要预先添加到genotype.py中
demo/darts/model.py
浏览文件 @
823ca6bb
...
...
@@ -16,6 +16,7 @@ from __future__ import absolute_import
from
__future__
import
division
from
__future__
import
print_function
import
numpy
as
np
import
paddle.fluid
as
fluid
from
paddle.fluid.param_attr
import
ParamAttr
from
paddle.fluid.initializer
import
ConstantInitializer
,
MSRAInitializer
...
...
demo/darts/search.py
浏览文件 @
823ca6bb
...
...
@@ -35,7 +35,7 @@ add_arg = functools.partial(add_arguments, argparser=parser)
# yapf: disable
add_arg
(
'log_freq'
,
int
,
50
,
"Log frequency."
)
add_arg
(
'use_multiprocess'
,
bool
,
Tru
e
,
"Whether use multiprocess reader."
)
add_arg
(
'use_multiprocess'
,
bool
,
Fals
e
,
"Whether use multiprocess reader."
)
add_arg
(
'num_workers'
,
int
,
4
,
"The multiprocess reader number."
)
add_arg
(
'data'
,
str
,
'dataset/cifar10'
,
"The dir of dataset."
)
add_arg
(
'batch_size'
,
int
,
64
,
"Minibatch size."
)
...
...
demo/darts/train.py
浏览文件 @
823ca6bb
...
...
@@ -21,26 +21,24 @@ import sys
import
ast
import
argparse
import
functools
import
logging
FORMAT
=
'%(asctime)s-%(levelname)s: %(message)s'
logging
.
basicConfig
(
level
=
logging
.
INFO
,
format
=
FORMAT
)
logger
=
logging
.
getLogger
(
__name__
)
import
paddle.fluid
as
fluid
from
paddle.fluid.dygraph.base
import
to_variable
from
model
import
NetworkCIFAR
as
Network
from
paddleslim.common
import
AvgrageMeter
from
paddleslim.common
import
AvgrageMeter
,
get_logger
import
genotypes
import
reader
from
model
import
NetworkCIFAR
as
Network
sys
.
path
[
0
]
=
os
.
path
.
join
(
os
.
path
.
dirname
(
"__file__"
),
os
.
path
.
pardir
)
from
utility
import
add_arguments
,
print_arguments
logger
=
get_logger
(
__name__
,
level
=
logging
.
INFO
)
parser
=
argparse
.
ArgumentParser
(
description
=
__doc__
)
add_arg
=
functools
.
partial
(
add_arguments
,
argparser
=
parser
)
# yapf: disable
add_arg
(
'use_multiprocess'
,
bool
,
Tru
e
,
"Whether use multiprocess reader."
)
add_arg
(
'use_multiprocess'
,
bool
,
Fals
e
,
"Whether use multiprocess reader."
)
add_arg
(
'num_workers'
,
int
,
4
,
"The multiprocess reader number."
)
add_arg
(
'data'
,
str
,
'dataset/cifar10'
,
"The dir of dataset."
)
add_arg
(
'batch_size'
,
int
,
96
,
"Minibatch size."
)
...
...
@@ -60,8 +58,8 @@ add_arg('auxiliary', bool, True, 'Use auxiliary tower.')
add_arg
(
'auxiliary_weight'
,
float
,
0.4
,
"Weight for auxiliary loss."
)
add_arg
(
'drop_path_prob'
,
float
,
0.2
,
"Drop path probability."
)
add_arg
(
'grad_clip'
,
float
,
5
,
"Gradient clipping."
)
add_arg
(
'arch'
,
str
,
'DARTS_V2'
,
"Which architecture to use"
)
add_arg
(
'
report_freq'
,
int
,
50
,
'Report frequency'
)
add_arg
(
'arch'
,
str
,
'DARTS_V2'
,
"Which architecture to use"
)
add_arg
(
'
log_freq'
,
int
,
50
,
'Report frequency'
)
add_arg
(
'use_data_parallel'
,
ast
.
literal_eval
,
False
,
"The flag indicating whether to use data parallel mode to train the model."
)
# yapf: enable
...
...
@@ -95,9 +93,7 @@ def train(model, train_reader, optimizer, epoch, drop_path_prob, args):
else
:
loss
.
backward
()
grad_clip
=
fluid
.
dygraph_grad_clip
.
GradClipByGlobalNorm
(
args
.
grad_clip
)
optimizer
.
minimize
(
loss
,
grad_clip
=
grad_clip
)
optimizer
.
minimize
(
loss
)
model
.
clear_gradients
()
n
=
image
.
shape
[
0
]
...
...
@@ -105,7 +101,7 @@ def train(model, train_reader, optimizer, epoch, drop_path_prob, args):
top1
.
update
(
prec1
.
numpy
(),
n
)
top5
.
update
(
prec5
.
numpy
(),
n
)
if
step_id
%
args
.
report
_freq
==
0
:
if
step_id
%
args
.
log
_freq
==
0
:
logger
.
info
(
"Train Epoch {}, Step {}, loss {:.6f}, acc_1 {:.6f}, acc_5 {:.6f}"
.
format
(
epoch
,
step_id
,
objs
.
avg
[
0
],
top1
.
avg
[
0
],
top5
.
avg
[
0
]))
...
...
@@ -132,7 +128,7 @@ def valid(model, valid_reader, epoch, args):
objs
.
update
(
loss
.
numpy
(),
n
)
top1
.
update
(
prec1
.
numpy
(),
n
)
top5
.
update
(
prec5
.
numpy
(),
n
)
if
step_id
%
args
.
report
_freq
==
0
:
if
step_id
%
args
.
log
_freq
==
0
:
logger
.
info
(
"Valid Epoch {}, Step {}, loss {:.6f}, acc_1 {:.6f}, acc_5 {:.6f}"
.
format
(
epoch
,
step_id
,
objs
.
avg
[
0
],
top1
.
avg
[
0
],
top5
.
avg
[
0
]))
...
...
@@ -158,11 +154,13 @@ def main(args):
step_per_epoch
=
int
(
args
.
trainset_num
/
args
.
batch_size
)
learning_rate
=
fluid
.
dygraph
.
CosineDecay
(
args
.
learning_rate
,
step_per_epoch
,
args
.
epochs
)
clip
=
fluid
.
clip
.
GradientClipByGlobalNorm
(
clip_norm
=
args
.
grad_clip
)
optimizer
=
fluid
.
optimizer
.
MomentumOptimizer
(
learning_rate
,
momentum
=
args
.
momentum
,
regularization
=
fluid
.
regularizer
.
L2Decay
(
args
.
weight_decay
),
parameter_list
=
model
.
parameters
())
parameter_list
=
model
.
parameters
(),
grad_clip
=
clip
)
if
args
.
use_data_parallel
:
model
=
fluid
.
dygraph
.
parallel
.
DataParallel
(
model
,
strategy
)
...
...
demo/darts/train_imagenet.py
浏览文件 @
823ca6bb
...
...
@@ -21,20 +21,17 @@ import sys
import
ast
import
argparse
import
functools
import
logging
FORMAT
=
'%(asctime)s-%(levelname)s: %(message)s'
logging
.
basicConfig
(
level
=
logging
.
INFO
,
format
=
FORMAT
)
logger
=
logging
.
getLogger
(
__name__
)
import
paddle.fluid
as
fluid
from
paddle.fluid.dygraph.base
import
to_variable
from
model
import
NetworkImageNet
as
Network
from
paddleslim.common
import
AvgrageMeter
from
paddleslim.common
import
AvgrageMeter
,
get_logger
import
genotypes
import
reader
from
model
import
NetworkImageNet
as
Network
sys
.
path
[
0
]
=
os
.
path
.
join
(
os
.
path
.
dirname
(
"__file__"
),
os
.
path
.
pardir
)
from
utility
import
add_arguments
,
print_arguments
logger
=
get_logger
(
__name__
,
level
=
logging
.
INFO
)
parser
=
argparse
.
ArgumentParser
(
description
=
__doc__
)
add_arg
=
functools
.
partial
(
add_arguments
,
argparser
=
parser
)
...
...
@@ -62,7 +59,7 @@ add_arg('dropout', float, 0.0, "Dropout probability.")
add_arg
(
'grad_clip'
,
float
,
5
,
"Gradient clipping."
)
add_arg
(
'label_smooth'
,
float
,
0.1
,
"Label smoothing."
)
add_arg
(
'arch'
,
str
,
'DARTS_V2'
,
"Which architecture to use"
)
add_arg
(
'
report_freq'
,
int
,
100
,
'Report frequency'
)
add_arg
(
'
log_freq'
,
int
,
100
,
'Report frequency'
)
add_arg
(
'use_data_parallel'
,
ast
.
literal_eval
,
False
,
"The flag indicating whether to use data parallel mode to train the model."
)
# yapf: enable
...
...
@@ -108,9 +105,7 @@ def train(model, train_reader, optimizer, epoch, args):
else
:
loss
.
backward
()
grad_clip
=
fluid
.
dygraph_grad_clip
.
GradClipByGlobalNorm
(
args
.
grad_clip
)
optimizer
.
minimize
(
loss
,
grad_clip
=
grad_clip
)
optimizer
.
minimize
(
loss
)
model
.
clear_gradients
()
n
=
image
.
shape
[
0
]
...
...
@@ -118,7 +113,7 @@ def train(model, train_reader, optimizer, epoch, args):
top1
.
update
(
prec1
.
numpy
(),
n
)
top5
.
update
(
prec5
.
numpy
(),
n
)
if
step_id
%
args
.
report
_freq
==
0
:
if
step_id
%
args
.
log
_freq
==
0
:
logger
.
info
(
"Train Epoch {}, Step {}, loss {:.6f}, acc_1 {:.6f}, acc_5 {:.6f}"
.
format
(
epoch
,
step_id
,
objs
.
avg
[
0
],
top1
.
avg
[
0
],
top5
.
avg
[
0
]))
...
...
@@ -145,7 +140,7 @@ def valid(model, valid_reader, epoch, args):
objs
.
update
(
loss
.
numpy
(),
n
)
top1
.
update
(
prec1
.
numpy
(),
n
)
top5
.
update
(
prec5
.
numpy
(),
n
)
if
step_id
%
args
.
report
_freq
==
0
:
if
step_id
%
args
.
log
_freq
==
0
:
logger
.
info
(
"Valid Epoch {}, Step {}, loss {:.6f}, acc_1 {:.6f}, acc_5 {:.6f}"
.
format
(
epoch
,
step_id
,
objs
.
avg
[
0
],
top1
.
avg
[
0
],
top5
.
avg
[
0
]))
...
...
@@ -174,11 +169,14 @@ def main(args):
step_per_epoch
,
args
.
decay_rate
,
staircase
=
True
)
clip
=
fluid
.
clip
.
GradientClipByGlobalNorm
(
clip_norm
=
args
.
grad_clip
)
optimizer
=
fluid
.
optimizer
.
MomentumOptimizer
(
learning_rate
,
momentum
=
args
.
momentum
,
regularization
=
fluid
.
regularizer
.
L2Decay
(
args
.
weight_decay
),
parameter_list
=
model
.
parameters
())
parameter_list
=
model
.
parameters
(),
grad_clip
=
clip
)
if
args
.
use_data_parallel
:
model
=
fluid
.
dygraph
.
parallel
.
DataParallel
(
model
,
strategy
)
...
...
paddleslim/nas/darts/train_search.py
浏览文件 @
823ca6bb
...
...
@@ -108,8 +108,7 @@ class DARTSearch(object):
else
:
loss
.
backward
()
grad_clip
=
fluid
.
dygraph_grad_clip
.
GradClipByGlobalNorm
(
5
)
optimizer
.
minimize
(
loss
,
grad_clip
)
optimizer
.
minimize
(
loss
)
self
.
model
.
clear_gradients
()
objs
.
update
(
loss
.
numpy
(),
n
)
...
...
@@ -163,11 +162,14 @@ class DARTSearch(object):
step_per_epoch
*=
2
learning_rate
=
fluid
.
dygraph
.
CosineDecay
(
self
.
learning_rate
,
step_per_epoch
,
self
.
num_epochs
)
clip
=
fluid
.
clip
.
GradientClipByGlobalNorm
(
clip_norm
=
5.0
)
optimizer
=
fluid
.
optimizer
.
MomentumOptimizer
(
learning_rate
,
0.9
,
regularization
=
fluid
.
regularizer
.
L2DecayRegularizer
(
3e-4
),
parameter_list
=
model_parameters
)
parameter_list
=
model_parameters
,
grad_clip
=
clip
)
if
self
.
use_data_parallel
:
self
.
model
=
fluid
.
dygraph
.
parallel
.
DataParallel
(
self
.
model
,
...
...
编辑
预览
Markdown
is supported
0%
请重试
或
添加新附件
.
添加附件
取消
You are about to add
0
people
to the discussion. Proceed with caution.
先完成此消息的编辑!
取消
想要评论请
注册
或
登录