Skip to content
体验新版
项目
组织
正在加载...
登录
切换导航
打开侧边栏
PaddlePaddle
models
提交
f66de96c
M
models
项目概览
PaddlePaddle
/
models
大约 2 年 前同步成功
通知
232
Star
6828
Fork
2962
代码
文件
提交
分支
Tags
贡献者
分支图
Diff
Issue
602
列表
看板
标记
里程碑
合并请求
255
Wiki
0
Wiki
分析
仓库
DevOps
项目成员
Pages
M
models
项目概览
项目概览
详情
发布
仓库
仓库
文件
提交
分支
标签
贡献者
分支图
比较
Issue
602
Issue
602
列表
看板
标记
里程碑
合并请求
255
合并请求
255
Pages
分析
分析
仓库分析
DevOps
Wiki
0
Wiki
成员
成员
收起侧边栏
关闭侧边栏
动态
分支图
创建新Issue
提交
Issue看板
提交
f66de96c
编写于
2月 19, 2019
作者:
Y
Yancey1989
浏览文件
操作
浏览文件
下载
电子邮件补丁
差异文件
add fast resnet model
上级
4d78bac7
变更
6
隐藏空白更改
内联
并排
Showing
6 changed file
with
788 addition
and
0 deletion
+788
-0
.gitignore
.gitignore
+1
-0
fluid/PaddleCV/image_classification/fast_resnet/requirements.txt
...addleCV/image_classification/fast_resnet/requirements.txt
+3
-0
fluid/PaddleCV/image_classification/fast_resnet/torchvision_reader.py
...CV/image_classification/fast_resnet/torchvision_reader.py
+233
-0
fluid/PaddleCV/image_classification/fast_resnet/train.py
fluid/PaddleCV/image_classification/fast_resnet/train.py
+380
-0
fluid/PaddleCV/image_classification/models/__init__.py
fluid/PaddleCV/image_classification/models/__init__.py
+1
-0
fluid/PaddleCV/image_classification/models/fast_resnet.py
fluid/PaddleCV/image_classification/models/fast_resnet.py
+170
-0
未找到文件。
.gitignore
浏览文件 @
f66de96c
...
...
@@ -3,3 +3,4 @@
*.user
*.pyc
*~
*.vscode
fluid/PaddleCV/image_classification/fast_resnet/requirements.txt
0 → 100644
浏览文件 @
f66de96c
torch==0.4.1
torchvision
tqdm
fluid/PaddleCV/image_classification/fast_resnet/torchvision_reader.py
0 → 100644
浏览文件 @
f66de96c
import
os
import
numpy
as
np
import
math
import
random
import
torch
import
torch.utils.data
from
torch.utils.data.distributed
import
DistributedSampler
import
torchvision.transforms
as
transforms
import
torchvision.datasets
as
datasets
from
torch.utils.data.sampler
import
Sampler
import
torchvision
import
pickle
from
tqdm
import
tqdm
import
time
import
multiprocessing
TRAINER_NUMS
=
int
(
os
.
getenv
(
"PADDLE_TRAINER_NUM"
,
"1"
))
TRAINER_ID
=
int
(
os
.
getenv
(
"PADDLE_TRAINER_ID"
,
"0"
))
epoch
=
0
FINISH_EVENT
=
"FINISH_EVENT"
#def paddle_data_loader(torch_dataset, indices=None, concurrent=1, queue_size=3072, use_uint8_reader=False):
class
PaddleDataLoader
(
object
):
def
__init__
(
self
,
torch_dataset
,
indices
=
None
,
concurrent
=
16
,
queue_size
=
3072
):
self
.
torch_dataset
=
torch_dataset
self
.
data_queue
=
multiprocessing
.
Queue
(
queue_size
)
self
.
indices
=
indices
self
.
concurrent
=
concurrent
def
_worker_loop
(
self
,
dataset
,
worker_indices
,
worker_id
):
cnt
=
0
for
idx
in
worker_indices
:
cnt
+=
1
img
,
label
=
self
.
torch_dataset
[
idx
]
img
=
np
.
array
(
img
).
astype
(
'uint8'
).
transpose
((
2
,
0
,
1
))
self
.
data_queue
.
put
((
img
,
label
))
print
(
"worker: [%d] read [%d] samples. "
%
(
worker_id
,
cnt
))
self
.
data_queue
.
put
(
FINISH_EVENT
)
def
reader
(
self
):
def
_reader_creator
():
worker_processes
=
[]
total_img
=
len
(
self
.
torch_dataset
)
print
(
"total image: "
,
total_img
)
if
self
.
indices
is
None
:
self
.
indices
=
[
i
for
i
in
xrange
(
total_img
)]
random
.
seed
(
time
.
time
())
random
.
shuffle
(
self
.
indices
)
print
(
"shuffle indices: %s ..."
%
self
.
indices
[:
10
])
imgs_per_worker
=
int
(
math
.
ceil
(
total_img
/
self
.
concurrent
))
for
i
in
xrange
(
self
.
concurrent
):
start
=
i
*
imgs_per_worker
end
=
(
i
+
1
)
*
imgs_per_worker
if
i
!=
self
.
concurrent
-
1
else
None
sliced_indices
=
self
.
indices
[
start
:
end
]
w
=
multiprocessing
.
Process
(
target
=
self
.
_worker_loop
,
args
=
(
self
.
torch_dataset
,
sliced_indices
,
i
)
)
w
.
daemon
=
True
w
.
start
()
worker_processes
.
append
(
w
)
finish_workers
=
0
worker_cnt
=
len
(
worker_processes
)
while
finish_workers
<
worker_cnt
:
sample
=
self
.
data_queue
.
get
()
if
sample
==
FINISH_EVENT
:
finish_workers
+=
1
else
:
yield
sample
return
_reader_creator
def
train
(
traindir
,
sz
,
min_scale
=
0.08
):
train_tfms
=
[
transforms
.
RandomResizedCrop
(
sz
,
scale
=
(
min_scale
,
1.0
)),
transforms
.
RandomHorizontalFlip
()
]
train_dataset
=
datasets
.
ImageFolder
(
traindir
,
transforms
.
Compose
(
train_tfms
))
return
PaddleDataLoader
(
train_dataset
).
reader
()
def
test
(
valdir
,
bs
,
sz
,
rect_val
=
False
):
if
rect_val
:
idx_ar_sorted
=
sort_ar
(
valdir
)
idx_sorted
,
_
=
zip
(
*
idx_ar_sorted
)
idx2ar
=
map_idx2ar
(
idx_ar_sorted
,
bs
)
ar_tfms
=
[
transforms
.
Resize
(
int
(
sz
*
1.14
)),
CropArTfm
(
idx2ar
,
sz
)]
val_dataset
=
ValDataset
(
valdir
,
transform
=
ar_tfms
)
return
PaddleDataLoader
(
val_dataset
,
concurrent
=
1
,
indices
=
idx_sorted
).
reader
()
val_tfms
=
[
transforms
.
Resize
(
int
(
sz
*
1.14
)),
transforms
.
CenterCrop
(
sz
)]
val_dataset
=
datasets
.
ImageFolder
(
valdir
,
transforms
.
Compose
(
val_tfms
))
return
PaddleDataLoader
(
val_dataset
).
reader
()
def
create_validation_set
(
valdir
,
batch_size
,
target_size
,
rect_val
,
distributed
):
print
(
"create_validation_set"
,
valdir
,
batch_size
,
target_size
,
rect_val
,
distributed
)
if
rect_val
:
idx_ar_sorted
=
sort_ar
(
valdir
)
idx_sorted
,
_
=
zip
(
*
idx_ar_sorted
)
idx2ar
=
map_idx2ar
(
idx_ar_sorted
,
batch_size
)
ar_tfms
=
[
transforms
.
Resize
(
int
(
target_size
*
1.14
)),
CropArTfm
(
idx2ar
,
target_size
)]
val_dataset
=
ValDataset
(
valdir
,
transform
=
ar_tfms
)
val_sampler
=
DistValSampler
(
idx_sorted
,
batch_size
=
batch_size
,
distributed
=
distributed
)
return
val_dataset
,
val_sampler
val_tfms
=
[
transforms
.
Resize
(
int
(
target_size
*
1.14
)),
transforms
.
CenterCrop
(
target_size
)]
val_dataset
=
datasets
.
ImageFolder
(
valdir
,
transforms
.
Compose
(
val_tfms
))
val_sampler
=
DistValSampler
(
list
(
range
(
len
(
val_dataset
))),
batch_size
=
batch_size
,
distributed
=
distributed
)
return
val_dataset
,
val_sampler
class
ValDataset
(
datasets
.
ImageFolder
):
def
__init__
(
self
,
root
,
transform
=
None
,
target_transform
=
None
):
super
(
ValDataset
,
self
).
__init__
(
root
,
transform
,
target_transform
)
def
__getitem__
(
self
,
index
):
path
,
target
=
self
.
imgs
[
index
]
sample
=
self
.
loader
(
path
)
if
self
.
transform
is
not
None
:
for
tfm
in
self
.
transform
:
if
isinstance
(
tfm
,
CropArTfm
):
sample
=
tfm
(
sample
,
index
)
else
:
sample
=
tfm
(
sample
)
if
self
.
target_transform
is
not
None
:
target
=
self
.
target_transform
(
target
)
return
sample
,
target
class
DistValSampler
(
Sampler
):
# DistValSampler distrbutes batches equally (based on batch size) to every gpu (even if there aren't enough images)
# WARNING: Some baches will contain an empty array to signify there aren't enough images
# Distributed=False - same validation happens on every single gpu
def
__init__
(
self
,
indices
,
batch_size
,
distributed
=
True
):
self
.
indices
=
indices
self
.
batch_size
=
batch_size
if
distributed
:
self
.
world_size
=
TRAINER_NUMS
self
.
global_rank
=
TRAINER_ID
else
:
self
.
global_rank
=
0
self
.
world_size
=
1
# expected number of batches per sample. Need this so each distributed gpu validates on same number of batches.
# even if there isn't enough data to go around
self
.
expected_num_batches
=
int
(
math
.
ceil
(
len
(
self
.
indices
)
/
self
.
world_size
/
self
.
batch_size
))
# num_samples = total images / world_size. This is what we distribute to each gpu
self
.
num_samples
=
self
.
expected_num_batches
*
self
.
batch_size
def
__iter__
(
self
):
offset
=
self
.
num_samples
*
self
.
global_rank
sampled_indices
=
self
.
indices
[
offset
:
offset
+
self
.
num_samples
]
print
(
"DistValSampler: self.world_size: "
,
self
.
world_size
,
" self.global_rank: "
,
self
.
global_rank
)
for
i
in
range
(
self
.
expected_num_batches
):
offset
=
i
*
self
.
batch_size
yield
sampled_indices
[
offset
:
offset
+
self
.
batch_size
]
def
__len__
(
self
):
return
self
.
expected_num_batches
def
set_epoch
(
self
,
epoch
):
return
class
CropArTfm
(
object
):
def
__init__
(
self
,
idx2ar
,
target_size
):
self
.
idx2ar
,
self
.
target_size
=
idx2ar
,
target_size
def
__call__
(
self
,
img
,
idx
):
target_ar
=
self
.
idx2ar
[
idx
]
if
target_ar
<
1
:
w
=
int
(
self
.
target_size
/
target_ar
)
size
=
(
w
//
8
*
8
,
self
.
target_size
)
else
:
h
=
int
(
self
.
target_size
*
target_ar
)
size
=
(
self
.
target_size
,
h
//
8
*
8
)
return
torchvision
.
transforms
.
functional
.
center_crop
(
img
,
size
)
def
sort_ar
(
valdir
):
idx2ar_file
=
valdir
+
'/../sorted_idxar.p'
if
os
.
path
.
isfile
(
idx2ar_file
):
return
pickle
.
load
(
open
(
idx2ar_file
,
'rb'
))
print
(
'Creating AR indexes. Please be patient this may take a couple minutes...'
)
val_dataset
=
datasets
.
ImageFolder
(
valdir
)
# AS: TODO: use Image.open instead of looping through dataset
sizes
=
[
img
[
0
].
size
for
img
in
tqdm
(
val_dataset
,
total
=
len
(
val_dataset
))]
idx_ar
=
[(
i
,
round
(
s
[
0
]
*
1.0
/
s
[
1
],
5
))
for
i
,
s
in
enumerate
(
sizes
)]
sorted_idxar
=
sorted
(
idx_ar
,
key
=
lambda
x
:
x
[
1
])
pickle
.
dump
(
sorted_idxar
,
open
(
idx2ar_file
,
'wb'
))
print
(
'Done'
)
return
sorted_idxar
def
chunks
(
l
,
n
):
n
=
max
(
1
,
n
)
return
(
l
[
i
:
i
+
n
]
for
i
in
range
(
0
,
len
(
l
),
n
))
def
map_idx2ar
(
idx_ar_sorted
,
batch_size
):
ar_chunks
=
list
(
chunks
(
idx_ar_sorted
,
batch_size
))
idx2ar
=
{}
for
chunk
in
ar_chunks
:
idxs
,
ars
=
list
(
zip
(
*
chunk
))
mean
=
round
(
np
.
mean
(
ars
),
5
)
for
idx
in
idxs
:
idx2ar
[
idx
]
=
mean
return
idx2ar
if
__name__
==
"__main__"
:
#ds, sampler = create_validation_set("/data/imagenet/validation", 128, 288, True, True)
#for item in sampler:
# for idx in item:
# ds[idx]
import
time
test_reader
=
test
(
valdir
=
"/data/imagenet/validation"
,
bs
=
50
,
sz
=
288
,
rect_val
=
True
)
start_ts
=
time
.
time
()
for
idx
,
data
in
enumerate
(
test_reader
()):
print
(
idx
,
data
[
0
].
shape
,
data
[
1
])
if
idx
==
10
:
break
if
(
idx
+
1
)
%
1000
==
0
:
cost
=
(
time
.
time
()
-
start_ts
)
print
(
"%d samples per second"
%
(
1000
/
cost
))
start_ts
=
time
.
time
()
\ No newline at end of file
fluid/PaddleCV/image_classification/fast_resnet/train.py
0 → 100644
浏览文件 @
f66de96c
# Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
import
argparse
import
cProfile
import
time
import
os
import
traceback
import
numpy
as
np
import
paddle
import
paddle.fluid
as
fluid
import
paddle.fluid.core
as
core
import
paddle.fluid.profiler
as
profiler
import
paddle.fluid.transpiler.distribute_transpiler
as
distribute_transpiler
import
torchvision_reader
import
sys
sys
.
path
.
append
(
".."
)
from
utility
import
add_arguments
,
print_arguments
import
functools
import
models
import
utils
DEBUG_PROG
=
bool
(
os
.
getenv
(
"DEBUG_PROG"
,
"0"
))
def
parse_args
():
parser
=
argparse
.
ArgumentParser
(
description
=
__doc__
)
add_arg
=
functools
.
partial
(
add_arguments
,
argparser
=
parser
)
# yapf: disable
add_arg
(
'batch_size'
,
int
,
256
,
"Minibatch size."
)
add_arg
(
'use_gpu'
,
bool
,
True
,
"Whether to use GPU or not."
)
add_arg
(
'total_images'
,
int
,
1281167
,
"Training image number."
)
add_arg
(
'num_epochs'
,
int
,
120
,
"number of epochs."
)
add_arg
(
'class_dim'
,
int
,
1000
,
"Class number."
)
add_arg
(
'image_shape'
,
str
,
"3,224,224"
,
"input image size"
)
add_arg
(
'model_save_dir'
,
str
,
"output"
,
"model save directory"
)
add_arg
(
'with_mem_opt'
,
bool
,
False
,
"Whether to use memory optimization or not."
)
add_arg
(
'pretrained_model'
,
str
,
None
,
"Whether to use pretrained model."
)
add_arg
(
'checkpoint'
,
str
,
None
,
"Whether to resume checkpoint."
)
add_arg
(
'lr'
,
float
,
0.1
,
"set learning rate."
)
add_arg
(
'lr_strategy'
,
str
,
"piecewise_decay"
,
"Set the learning rate decay strategy."
)
add_arg
(
'model'
,
str
,
"FastResNet"
,
"Set the network to use."
)
add_arg
(
'data_dir'
,
str
,
"./data/ILSVRC2012"
,
"The ImageNet dataset root dir."
)
add_arg
(
'model_category'
,
str
,
"models"
,
"Whether to use models_name or not, valid value:'models','models_name'"
)
add_arg
(
'fp16'
,
bool
,
False
,
"Enable half precision training with fp16."
)
add_arg
(
'scale_loss'
,
float
,
1.0
,
"Scale loss for fp16."
)
# for distributed
add_arg
(
'start_test_pass'
,
int
,
0
,
"Start test after x passes."
)
add_arg
(
'num_threads'
,
int
,
8
,
"Use num_threads to run the fluid program."
)
add_arg
(
'reduce_strategy'
,
str
,
"allreduce"
,
"Choose from reduce or allreduce."
)
add_arg
(
'log_period'
,
int
,
5
,
"Print period, defualt is 5."
)
add_arg
(
'init_conv2d_kaiming'
,
bool
,
False
,
"Whether to initliaze conv2d weight by kaiming."
)
add_arg
(
'memory_optimize'
,
bool
,
True
,
"Whether to enable memory optimize."
)
# yapf: enable
args
=
parser
.
parse_args
()
return
args
def
get_device_num
():
import
subprocess
visible_device
=
os
.
getenv
(
'CUDA_VISIBLE_DEVICES'
)
if
visible_device
:
device_num
=
len
(
visible_device
.
split
(
','
))
else
:
device_num
=
subprocess
.
check_output
(
[
'nvidia-smi'
,
'-L'
]).
decode
().
count
(
'
\n
'
)
return
device_num
def
linear_lr_decay
(
lr_values
,
epochs
,
bs_values
,
total_images
):
"""Applies cosine decay to the learning rate.
lr = 0.05 * (math.cos(epoch * (math.pi / 120)) + 1)
"""
from
paddle.fluid.layers.learning_rate_scheduler
import
_decay_step_counter
import
paddle.fluid.layers.tensor
as
tensor
import
math
with
paddle
.
fluid
.
default_main_program
().
_lr_schedule_guard
():
global_step
=
_decay_step_counter
()
lr
=
tensor
.
create_global_var
(
shape
=
[
1
],
value
=
0.0
,
dtype
=
'float32'
,
persistable
=
True
,
name
=
"learning_rate"
)
with
fluid
.
layers
.
control_flow
.
Switch
()
as
switch
:
last_steps
=
0
for
idx
,
epoch_bound
in
enumerate
(
epochs
):
start_epoch
,
end_epoch
=
epoch_bound
linear_epoch
=
end_epoch
-
start_epoch
start_lr
,
end_lr
=
lr_values
[
idx
]
linear_lr
=
end_lr
-
start_lr
steps
=
last_steps
+
math
.
ceil
(
total_images
*
1.0
/
bs_values
[
idx
])
*
linear_epoch
linear_lr
=
end_lr
=
start_lr
with
switch
.
case
(
global_step
<
steps
):
decayed_lr
=
start_lr
+
linear_lr
*
((
global_step
-
last_steps
)
*
1.0
/
steps
)
last_steps
=
steps
fluid
.
layers
.
tensor
.
assign
(
decayed_lr
,
lr
)
last_value_var
=
tensor
.
fill_constant
(
shape
=
[
1
],
dtype
=
'float32'
,
value
=
float
(
lr_values
[
-
1
]))
with
switch
.
default
():
fluid
.
layers
.
tensor
.
assign
(
last_value_var
,
lr
)
return
lr
return
decayed_lr
def
test_parallel
(
exe
,
test_args
,
args
,
test_prog
,
feeder
,
bs
):
acc_evaluators
=
[]
for
i
in
xrange
(
len
(
test_args
[
2
])):
acc_evaluators
.
append
(
fluid
.
metrics
.
Accuracy
())
to_fetch
=
[
v
.
name
for
v
in
test_args
[
2
]]
test_reader
=
test_args
[
3
]
batch_id
=
0
start_ts
=
time
.
time
()
for
batch_id
,
data
in
enumerate
(
test_reader
()):
acc_rets
=
exe
.
run
(
fetch_list
=
to_fetch
,
feed
=
feeder
.
feed
(
data
))
ret_result
=
[
np
.
mean
(
np
.
array
(
ret
))
for
ret
in
acc_rets
]
print
(
"Test batch: [%d], acc_rets: [%s]"
%
(
batch_id
,
ret_result
))
for
i
,
e
in
enumerate
(
acc_evaluators
):
e
.
update
(
value
=
np
.
array
(
acc_rets
[
i
]),
weight
=
bs
)
num_samples
=
batch_id
*
bs
*
get_device_num
()
print_train_time
(
start_ts
,
time
.
time
(),
num_samples
)
return
[
e
.
eval
()
for
e
in
acc_evaluators
]
def
build_program
(
args
,
is_train
,
main_prog
,
startup_prog
,
py_reader_startup_prog
,
img_size
,
trn_dir
,
batch_size
,
min_scale
,
rect_val
):
if
is_train
:
reader
=
torchvision_reader
.
train
(
traindir
=
os
.
path
.
join
(
args
.
data_dir
,
trn_dir
,
"train"
),
sz
=
img_size
,
min_scale
=
min_scale
)
else
:
reader
=
torchvision_reader
.
test
(
valdir
=
os
.
path
.
join
(
args
.
data_dir
,
trn_dir
,
"validation"
),
bs
=
batch_size
*
get_device_num
(),
sz
=
img_size
,
rect_val
=
rect_val
)
dshape
=
[
3
,
img_size
,
img_size
]
class_dim
=
1000
pyreader
=
None
batched_reader
=
None
model_name
=
args
.
model
model_list
=
[
m
for
m
in
dir
(
models
)
if
"__"
not
in
m
]
assert
model_name
in
model_list
,
"{} is not in lists: {}"
.
format
(
args
.
model
,
model_list
)
model
=
models
.
__dict__
[
model_name
]()
with
fluid
.
program_guard
(
main_prog
,
startup_prog
):
with
fluid
.
unique_name
.
guard
():
if
is_train
:
with
fluid
.
program_guard
(
main_prog
,
py_reader_startup_prog
):
with
fluid
.
unique_name
.
guard
():
pyreader
=
fluid
.
layers
.
py_reader
(
capacity
=
batch_size
*
get_device_num
(),
shapes
=
([
-
1
]
+
dshape
,
(
-
1
,
1
)),
dtypes
=
(
'uint8'
,
'int64'
),
name
=
"train_reader_"
+
str
(
img_size
)
if
is_train
else
"test_reader_"
+
str
(
img_size
),
use_double_buffer
=
True
)
input
,
label
=
fluid
.
layers
.
read_file
(
pyreader
)
pyreader
.
decorate_paddle_reader
(
paddle
.
batch
(
reader
,
batch_size
=
batch_size
))
else
:
input
=
fluid
.
layers
.
data
(
name
=
"image"
,
shape
=
[
3
,
244
,
244
],
dtype
=
"uint8"
)
label
=
fluid
.
layers
.
data
(
name
=
"label"
,
shape
=
[
1
],
dtype
=
"int64"
)
batched_reader
=
paddle
.
batch
(
reader
,
batch_size
=
batch_size
*
get_device_num
())
cast_img_type
=
"float16"
if
args
.
fp16
else
"float32"
cast
=
fluid
.
layers
.
cast
(
input
,
cast_img_type
)
img_mean
=
fluid
.
layers
.
create_global_var
([
3
,
1
,
1
],
0.0
,
cast_img_type
,
name
=
"img_mean"
,
persistable
=
True
)
img_std
=
fluid
.
layers
.
create_global_var
([
3
,
1
,
1
],
0.0
,
cast_img_type
,
name
=
"img_std"
,
persistable
=
True
)
# image = (image - (mean * 255.0)) / (std * 255.0)
t1
=
fluid
.
layers
.
elementwise_sub
(
cast
,
img_mean
,
axis
=
1
)
t2
=
fluid
.
layers
.
elementwise_div
(
t1
,
img_std
,
axis
=
1
)
predict
=
model
.
net
(
t2
,
class_dim
=
class_dim
,
img_size
=
img_size
,
is_train
=
is_train
)
cost
,
pred
=
fluid
.
layers
.
softmax_with_cross_entropy
(
predict
,
label
,
return_softmax
=
True
)
if
args
.
scale_loss
>
1
:
avg_cost
=
fluid
.
layers
.
mean
(
x
=
cost
)
*
float
(
args
.
scale_loss
)
else
:
avg_cost
=
fluid
.
layers
.
mean
(
x
=
cost
)
batch_acc1
=
fluid
.
layers
.
accuracy
(
input
=
pred
,
label
=
label
,
k
=
1
)
batch_acc5
=
fluid
.
layers
.
accuracy
(
input
=
pred
,
label
=
label
,
k
=
5
)
# configure optimize
optimizer
=
None
if
is_train
:
#total_images = 1281167 / trainer_count
epochs
=
[(
0
,
7
),
(
7
,
13
),
(
13
,
22
),
(
22
,
25
),
(
25
,
28
)]
bs_epoch
=
[
x
*
get_device_num
()
for
x
in
[
224
,
224
,
96
,
96
,
50
]]
lrs
=
[(
1.0
,
2.0
),
(
2.0
,
0.25
),
(
0.42857142857142855
,
0.04285714285714286
),
(
0.04285714285714286
,
0.004285714285714286
),
(
0.0022321428571428575
,
0.00022321428571428573
),
0.00022321428571428573
]
#boundaries, values = lr_decay(lrs, epochs, bs_epoch, total_images)
#print("lr linear decay boundaries: ", boundaries, " \nvalues: ", values)
optimizer
=
fluid
.
optimizer
.
Momentum
(
learning_rate
=
linear_lr_decay
(
lrs
,
epochs
,
bs_epoch
,
args
.
total_images
),
momentum
=
0.9
,
regularization
=
fluid
.
regularizer
.
L2Decay
(
1e-4
))
if
args
.
fp16
:
params_grads
=
optimizer
.
backward
(
avg_cost
)
master_params_grads
=
utils
.
create_master_params_grads
(
params_grads
,
main_prog
,
startup_prog
,
args
.
scale_loss
)
optimizer
.
apply_gradients
(
master_params_grads
)
utils
.
master_param_to_train_param
(
master_params_grads
,
params_grads
,
main_prog
)
else
:
optimizer
.
minimize
(
avg_cost
)
if
args
.
memory_optimize
:
fluid
.
memory_optimize
(
main_prog
,
skip_grads
=
True
)
return
avg_cost
,
optimizer
,
[
batch_acc1
,
batch_acc5
],
batched_reader
,
pyreader
,
py_reader_startup_prog
def
refresh_program
(
args
,
epoch
,
sz
,
trn_dir
,
bs
,
val_bs
,
need_update_start_prog
=
False
,
min_scale
=
0.08
,
rect_val
=
False
):
print
(
'program changed: epoch: [%d], image size: [%d], trn_dir: [%s], batch_size:[%d]'
%
(
epoch
,
sz
,
trn_dir
,
bs
))
train_prog
=
fluid
.
Program
()
test_prog
=
fluid
.
Program
()
startup_prog
=
fluid
.
Program
()
py_reader_startup_prog
=
fluid
.
Program
()
train_args
=
build_program
(
args
,
True
,
train_prog
,
startup_prog
,
py_reader_startup_prog
,
sz
,
trn_dir
,
bs
,
min_scale
,
False
)
test_args
=
build_program
(
args
,
False
,
test_prog
,
startup_prog
,
py_reader_startup_prog
,
sz
,
trn_dir
,
val_bs
,
min_scale
,
rect_val
)
place
=
core
.
CUDAPlace
(
0
)
startup_exe
=
fluid
.
Executor
(
place
)
print
(
"execute py_reader startup program"
)
startup_exe
.
run
(
py_reader_startup_prog
)
if
need_update_start_prog
:
print
(
"execute startup program"
)
startup_exe
.
run
(
startup_prog
)
if
args
.
init_conv2d_kaiming
:
import
torch
conv2d_w_vars
=
[
var
for
var
in
startup_prog
.
global_block
().
vars
.
values
()
if
var
.
name
.
startswith
(
'conv2d_'
)]
for
var
in
conv2d_w_vars
:
torch_w
=
torch
.
empty
(
var
.
shape
)
kaiming_np
=
torch
.
nn
.
init
.
kaiming_normal_
(
torch_w
,
mode
=
'fan_out'
,
nonlinearity
=
'relu'
).
numpy
()
tensor
=
fluid
.
global_scope
().
find_var
(
var
.
name
).
get_tensor
()
if
args
.
fp16
:
tensor
.
set
(
np
.
array
(
kaiming_np
,
dtype
=
"float16"
).
view
(
np
.
uint16
),
place
)
else
:
tensor
.
set
(
np
.
array
(
kaiming_np
,
dtype
=
"float32"
),
place
)
np_tensors
=
{}
np_tensors
[
"img_mean"
]
=
np
.
array
([
0.485
*
255.0
,
0.456
*
255.0
,
0.406
*
255.0
]).
astype
(
"float16"
if
args
.
fp16
else
"float32"
).
reshape
((
3
,
1
,
1
))
np_tensors
[
"img_std"
]
=
np
.
array
([
0.229
*
255.0
,
0.224
*
255.0
,
0.225
*
255.0
]).
astype
(
"float16"
if
args
.
fp16
else
"float32"
).
reshape
((
3
,
1
,
1
))
for
vname
,
np_tensor
in
np_tensors
.
items
():
var
=
fluid
.
global_scope
().
find_var
(
vname
)
if
args
.
fp16
:
var
.
get_tensor
().
set
(
np_tensor
.
view
(
np
.
uint16
),
place
)
else
:
var
.
get_tensor
().
set
(
np_tensor
,
place
)
if
DEBUG_PROG
:
with
open
(
'/tmp/train_prog_pass%d'
%
epoch
,
'w'
)
as
f
:
f
.
write
(
train_prog
.
to_string
(
True
))
with
open
(
'/tmp/test_prog_pass%d'
%
epoch
,
'w'
)
as
f
:
f
.
write
(
test_prog
.
to_string
(
True
))
with
open
(
'/tmp/startup_prog_pass%d'
%
epoch
,
'w'
)
as
f
:
f
.
write
(
startup_prog
.
to_string
(
True
))
with
open
(
'/tmp/py_reader_startup_prog_pass%d'
%
epoch
,
'w'
)
as
f
:
f
.
write
(
py_reader_startup_prog
.
to_string
(
True
))
strategy
=
fluid
.
ExecutionStrategy
()
strategy
.
num_threads
=
args
.
num_threads
strategy
.
allow_op_delay
=
False
build_strategy
=
fluid
.
BuildStrategy
()
build_strategy
.
reduce_strategy
=
fluid
.
BuildStrategy
().
ReduceStrategy
.
AllReduce
avg_loss
=
train_args
[
0
]
train_exe
=
fluid
.
ParallelExecutor
(
True
,
avg_loss
.
name
,
main_program
=
train_prog
,
exec_strategy
=
strategy
,
build_strategy
=
build_strategy
)
test_exe
=
fluid
.
ParallelExecutor
(
True
,
main_program
=
test_prog
,
share_vars_from
=
train_exe
)
return
train_args
,
test_args
,
test_prog
,
train_exe
,
test_exe
# NOTE: only need to benchmark using parallelexe
def
train_parallel
(
args
):
over_all_start
=
time
.
time
()
test_prog
=
fluid
.
Program
()
exe
=
None
test_exe
=
None
train_args
=
None
test_args
=
None
bs
=
224
val_bs
=
64
for
pass_id
in
range
(
args
.
num_epochs
):
# program changed
if
pass_id
==
0
:
train_args
,
test_args
,
test_prog
,
exe
,
test_exe
=
refresh_program
(
args
,
pass_id
,
sz
=
128
,
trn_dir
=
"sz/160/"
,
bs
=
bs
,
val_bs
=
val_bs
,
need_update_start_prog
=
True
)
elif
pass_id
==
13
:
#13
bs
=
96
train_args
,
test_args
,
test_prog
,
exe
,
test_exe
=
refresh_program
(
args
,
pass_id
,
sz
=
224
,
trn_dir
=
"sz/352/"
,
bs
=
bs
,
val_bs
=
val_bs
,
min_scale
=
0.087
)
elif
pass_id
==
25
:
#25
bs
=
50
val_bs
=
4
train_args
,
test_args
,
test_prog
,
exe
,
test_exe
=
refresh_program
(
args
,
pass_id
,
sz
=
288
,
trn_dir
=
""
,
bs
=
bs
,
val_bs
=
val_bs
,
min_scale
=
0.5
,
rect_val
=
True
)
else
:
pass
avg_loss
=
train_args
[
0
]
num_samples
=
0
iters
=
0
start_time
=
time
.
time
()
train_args
[
4
].
start
()
# start pyreader
while
True
:
fetch_list
=
[
avg_loss
.
name
]
acc_name_list
=
[
v
.
name
for
v
in
train_args
[
2
]]
fetch_list
.
extend
(
acc_name_list
)
fetch_list
.
append
(
"learning_rate"
)
if
iters
%
args
.
log_period
==
0
:
should_print
=
True
else
:
should_print
=
False
fetch_ret
=
[]
try
:
if
should_print
:
fetch_ret
=
exe
.
run
(
fetch_list
)
else
:
exe
.
run
([])
except
fluid
.
core
.
EOFException
as
eof
:
print
(
"Finish current epoch, will reset pyreader..."
)
train_args
[
4
].
reset
()
break
except
fluid
.
core
.
EnforceNotMet
as
ex
:
traceback
.
print_exc
()
exit
(
1
)
num_samples
+=
bs
*
get_device_num
()
if
should_print
:
fetched_data
=
[
np
.
mean
(
np
.
array
(
d
))
for
d
in
fetch_ret
]
print
(
"Pass %d, batch %d, loss %s, accucacys: %s, learning_rate %s, py_reader queue_size: %d"
%
(
pass_id
,
iters
,
fetched_data
[
0
],
fetched_data
[
1
:
-
1
],
fetched_data
[
-
1
],
train_args
[
4
].
queue
.
size
()))
iters
+=
1
print_train_time
(
start_time
,
time
.
time
(),
num_samples
)
feed_list
=
[
test_prog
.
global_block
().
var
(
varname
)
for
varname
in
(
"image"
,
"label"
)]
test_feeder
=
fluid
.
DataFeeder
(
feed_list
=
feed_list
,
place
=
fluid
.
CUDAPlace
(
0
))
test_ret
=
test_parallel
(
test_exe
,
test_args
,
args
,
test_prog
,
test_feeder
,
bs
)
print
(
"Pass: %d, Test Accuracy: %s, Spend %.2f hours
\n
"
%
(
pass_id
,
[
np
.
mean
(
np
.
array
(
v
))
for
v
in
test_ret
],
(
time
.
time
()
-
over_all_start
)
/
3600
))
print
(
"total train time: "
,
time
.
time
()
-
over_all_start
)
def
print_train_time
(
start_time
,
end_time
,
num_samples
):
train_elapsed
=
end_time
-
start_time
examples_per_sec
=
num_samples
/
train_elapsed
print
(
'
\n
Total examples: %d, total time: %.5f, %.5f examples/sed
\n
'
%
(
num_samples
,
train_elapsed
,
examples_per_sec
))
def
print_paddle_envs
():
print
(
'----------- Configuration envs -----------'
)
for
k
in
os
.
environ
:
if
"PADDLE_"
in
k
:
print
"ENV %s:%s"
%
(
k
,
os
.
environ
[
k
])
print
(
'------------------------------------------------'
)
def
main
():
args
=
parse_args
()
print_arguments
(
args
)
print_paddle_envs
()
train_parallel
(
args
)
if
__name__
==
"__main__"
:
main
()
\ No newline at end of file
fluid/PaddleCV/image_classification/models/__init__.py
浏览文件 @
f66de96c
...
...
@@ -9,3 +9,4 @@ from .inception_v4 import InceptionV4
from
.se_resnext
import
SE_ResNeXt50_32x4d
,
SE_ResNeXt101_32x4d
,
SE_ResNeXt152_32x4d
from
.dpn
import
DPN68
,
DPN92
,
DPN98
,
DPN107
,
DPN131
from
.shufflenet_v2
import
ShuffleNetV2_x0_5
,
ShuffleNetV2_x1_0
,
ShuffleNetV2_x1_5
,
ShuffleNetV2_x2_0
from
.fast_resnet
import
FastResNet
fluid/PaddleCV/image_classification/models/fast_resnet.py
0 → 100644
浏览文件 @
f66de96c
# Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
from
__future__
import
absolute_import
from
__future__
import
division
from
__future__
import
print_function
import
functools
import
numpy
as
np
import
time
import
os
import
math
import
cProfile
,
pstats
,
StringIO
import
paddle
import
paddle.fluid
as
fluid
import
paddle.fluid.core
as
core
import
paddle.fluid.profiler
as
profiler
import
utils
## visreader for imagenet
import
torchvision_reader
__all__
=
[
"FastResNet"
]
class
FastResNet
():
def
__init__
(
self
,
layers
=
50
):
self
.
layers
=
layers
def
net
(
self
,
input
,
class_dim
=
1000
,
img_size
=
224
,
is_train
=
True
):
layers
=
self
.
layers
supported_layers
=
[
50
,
101
,
152
]
assert
layers
in
supported_layers
,
\
"supported layers are {} but input layer is {}"
.
format
(
supported_layers
,
layers
)
if
layers
==
50
:
depth
=
[
3
,
4
,
6
,
3
]
elif
layers
==
101
:
depth
=
[
3
,
4
,
23
,
3
]
elif
layers
==
152
:
depth
=
[
3
,
8
,
36
,
3
]
num_filters
=
[
64
,
128
,
256
,
512
]
conv
=
self
.
conv_bn_layer
(
input
=
input
,
num_filters
=
64
,
filter_size
=
7
,
stride
=
2
,
act
=
'relu'
,
is_train
=
is_train
)
conv
=
fluid
.
layers
.
pool2d
(
input
=
conv
,
pool_size
=
3
,
pool_stride
=
2
,
pool_padding
=
1
,
pool_type
=
'max'
)
for
block
in
range
(
len
(
depth
)):
for
i
in
range
(
depth
[
block
]):
conv
=
self
.
bottleneck_block
(
input
=
conv
,
num_filters
=
num_filters
[
block
],
stride
=
2
if
i
==
0
and
block
!=
0
else
1
)
pool_size
=
int
(
img_size
/
32
)
pool
=
fluid
.
layers
.
pool2d
(
input
=
conv
,
pool_size
=
pool_size
,
pool_type
=
'avg'
,
global_pooling
=
True
)
out
=
fluid
.
layers
.
fc
(
input
=
pool
,
size
=
class_dim
,
act
=
None
,
param_attr
=
fluid
.
param_attr
.
ParamAttr
(
initializer
=
fluid
.
initializer
.
NormalInitializer
(
0.0
,
0.01
),
regularizer
=
fluid
.
regularizer
.
L2Decay
(
1e-4
)),
bias_attr
=
fluid
.
ParamAttr
(
regularizer
=
fluid
.
regularizer
.
L2Decay
(
1e-4
)))
return
out
def
conv_bn_layer
(
self
,
input
,
num_filters
,
filter_size
,
stride
=
1
,
groups
=
1
,
act
=
None
,
bn_init_value
=
1.0
,
is_train
=
True
):
conv
=
fluid
.
layers
.
conv2d
(
input
=
input
,
num_filters
=
num_filters
,
filter_size
=
filter_size
,
stride
=
stride
,
padding
=
(
filter_size
-
1
)
//
2
,
groups
=
groups
,
act
=
None
,
bias_attr
=
False
,
param_attr
=
fluid
.
ParamAttr
(
initializer
=
fluid
.
initializer
.
MSRAInitializer
(),
regularizer
=
fluid
.
regularizer
.
L2Decay
(
1e-4
)))
return
fluid
.
layers
.
batch_norm
(
input
=
conv
,
act
=
act
,
is_test
=
not
is_train
,
param_attr
=
fluid
.
param_attr
.
ParamAttr
(
initializer
=
fluid
.
initializer
.
Constant
(
bn_init_value
),
regularizer
=
None
))
def
shortcut
(
self
,
input
,
ch_out
,
stride
):
ch_in
=
input
.
shape
[
1
]
if
ch_in
!=
ch_out
or
stride
!=
1
:
return
self
.
conv_bn_layer
(
input
,
ch_out
,
1
,
stride
)
else
:
return
input
def
bottleneck_block
(
self
,
input
,
num_filters
,
stride
):
conv0
=
self
.
conv_bn_layer
(
input
=
input
,
num_filters
=
num_filters
,
filter_size
=
1
,
act
=
'relu'
)
conv1
=
self
.
conv_bn_layer
(
input
=
conv0
,
num_filters
=
num_filters
,
filter_size
=
3
,
stride
=
stride
,
act
=
'relu'
)
# init bn-weight0
conv2
=
self
.
conv_bn_layer
(
input
=
conv1
,
num_filters
=
num_filters
*
4
,
filter_size
=
1
,
act
=
None
,
bn_init_value
=
0.0
)
short
=
self
.
shortcut
(
input
,
num_filters
*
4
,
stride
)
return
fluid
.
layers
.
elementwise_add
(
x
=
short
,
y
=
conv2
,
act
=
'relu'
)
def
_model_reader_dshape_classdim
(
args
,
is_train
,
val_bs
=
None
,
sz
=
224
,
trn_dir
=
""
,
min_scale
=
0.08
,
rect_val
=
False
):
reader
=
None
if
args
.
data_set
==
"imagenet"
:
class_dim
=
1000
if
args
.
data_format
==
'NCHW'
:
dshape
=
[
3
,
sz
,
sz
]
else
:
dshape
=
[
sz
,
sz
,
3
]
if
is_train
:
reader
=
torchvision_reader
.
train
(
traindir
=
"/data/imagenet/%strain"
%
trn_dir
,
sz
=
sz
,
min_scale
=
min_scale
)
else
:
reader
=
torchvision_reader
.
test
(
valdir
=
"/data/imagenet/%svalidation"
%
trn_dir
,
bs
=
val_bs
,
sz
=
sz
,
rect_val
=
rect_val
)
else
:
raise
ValueError
(
"only support imagenet dataset."
)
return
None
,
reader
,
dshape
,
class_dim
def
lr_decay
(
lrs
,
epochs
,
bs
,
total_image
):
boundaries
=
[]
values
=
[]
import
math
for
idx
,
epoch
in
enumerate
(
epochs
):
step
=
math
.
ceil
(
total_image
*
1.0
/
(
bs
[
idx
]
*
8
))
ratio
=
(
lrs
[
idx
][
1
]
-
lrs
[
idx
][
0
])
/
(
epoch
[
1
]
-
epoch
[
0
])
lr_base
=
lrs
[
idx
][
0
]
for
s
in
xrange
(
epoch
[
0
],
epoch
[
1
]):
if
boundaries
:
boundaries
.
append
(
boundaries
[
-
1
]
+
step
)
else
:
boundaries
=
[
step
]
values
.
append
(
lr_base
+
ratio
*
(
s
-
epoch
[
0
]))
values
.
append
(
lrs
[
-
1
])
return
boundaries
,
values
编辑
预览
Markdown
is supported
0%
请重试
或
添加新附件
.
添加附件
取消
You are about to add
0
people
to the discussion. Proceed with caution.
先完成此消息的编辑!
取消
想要评论请
注册
或
登录