Skip to content
体验新版
项目
组织
正在加载...
登录
切换导航
打开侧边栏
PaddlePaddle
PaddleClas
提交
0a0d5bc0
P
PaddleClas
项目概览
PaddlePaddle
/
PaddleClas
大约 1 年 前同步成功
通知
115
Star
4999
Fork
1114
代码
文件
提交
分支
Tags
贡献者
分支图
Diff
Issue
19
列表
看板
标记
里程碑
合并请求
6
Wiki
0
Wiki
分析
仓库
DevOps
项目成员
Pages
P
PaddleClas
项目概览
项目概览
详情
发布
仓库
仓库
文件
提交
分支
标签
贡献者
分支图
比较
Issue
19
Issue
19
列表
看板
标记
里程碑
合并请求
6
合并请求
6
Pages
分析
分析
仓库分析
DevOps
Wiki
0
Wiki
成员
成员
收起侧边栏
关闭侧边栏
动态
分支图
创建新Issue
提交
Issue看板
提交
0a0d5bc0
编写于
6月 10, 2020
作者:
littletomatodonkey
浏览文件
操作
浏览文件
下载
电子邮件补丁
差异文件
add support for windows and cpu
上级
507c74a7
变更
4
显示空白变更内容
内联
并排
Showing
4 changed file
with
264 addition
and
12 deletion
+264
-12
ppcls/data/reader.py
ppcls/data/reader.py
+25
-9
tools/eval_multi_platform.py
tools/eval_multi_platform.py
+76
-0
tools/program.py
tools/program.py
+6
-3
tools/train_multi_platform.py
tools/train_multi_platform.py
+157
-0
未找到文件。
ppcls/data/reader.py
浏览文件 @
0a0d5bc0
...
...
@@ -15,15 +15,17 @@
import
numpy
as
np
import
imghdr
import
os
import
sys
import
signal
from
paddle
import
fluid
from
paddle.fluid.io
import
multiprocess_reader
from
.
import
imaug
from
.imaug
import
transform
from
ppcls.utils
import
logger
trainers_num
=
int
(
os
.
environ
.
get
(
'PADDLE_TRAINERS_NUM'
,
1
))
trainers_num
=
int
(
os
.
environ
.
get
(
'PADDLE_TRAINERS_NUM'
,
0
))
trainer_id
=
int
(
os
.
environ
.
get
(
"PADDLE_TRAINER_ID"
,
0
))
...
...
@@ -139,8 +141,9 @@ def get_file_list(params):
# use only partial data for each trainer in distributed training
if
params
[
'mode'
]
==
'train'
:
img_per_trainer
=
len
(
full_lines
)
//
trainers_num
full_lines
=
full_lines
[
trainer_id
::
trainers_num
][:
img_per_trainer
]
real_trainer_num
=
max
(
trainers_num
,
1
)
img_per_trainer
=
len
(
full_lines
)
//
real_trainer_num
full_lines
=
full_lines
[
trainer_id
::
real_trainer_num
][:
img_per_trainer
]
return
full_lines
...
...
@@ -165,7 +168,7 @@ def create_operators(params):
return
ops
def
partial_reader
(
params
,
full_lines
,
part_id
=
0
,
part_num
=
1
):
def
partial_reader
(
params
,
full_lines
,
part_id
=
0
,
part_num
=
1
,
batch_size
=
1
):
"""
create a reader with partial data
...
...
@@ -174,13 +177,13 @@ def partial_reader(params, full_lines, part_id=0, part_num=1):
full_lines: label list
part_id(int): part index of the current partial data
part_num(int): part num of the dataset
batch_size(int): batch size for one trainer
"""
assert
part_id
<
part_num
,
(
"part_num: {} should be larger "
"than part_id: {}"
.
format
(
part_num
,
part_id
))
full_lines
=
full_lines
[
part_id
::
part_num
]
batch_size
=
int
(
params
[
'batch_size'
])
//
trainers_num
if
params
[
'mode'
]
!=
"test"
and
len
(
full_lines
)
<
batch_size
:
raise
SampleNumException
(
''
,
len
(
full_lines
),
batch_size
)
...
...
@@ -197,7 +200,7 @@ def partial_reader(params, full_lines, part_id=0, part_num=1):
return
reader
def
mp_reader
(
params
):
def
mp_reader
(
params
,
batch_size
):
"""
multiprocess reader
...
...
@@ -210,11 +213,16 @@ def mp_reader(params):
if
params
[
"mode"
]
==
"train"
:
full_lines
=
shuffle_lines
(
full_lines
,
seed
=
None
)
# NOTE: multiprocess reader is not supported on windows
if
sys
.
platform
==
"win32"
:
return
partial_reader
(
params
,
full_lines
,
0
,
1
,
batch_size
)
part_num
=
1
if
'num_workers'
not
in
params
else
params
[
'num_workers'
]
readers
=
[]
for
part_id
in
range
(
part_num
):
readers
.
append
(
partial_reader
(
params
,
full_lines
,
part_id
,
part_num
))
readers
.
append
(
partial_reader
(
params
,
full_lines
,
part_id
,
part_num
,
batch_size
))
return
multiprocess_reader
(
readers
,
use_pipe
=
False
)
...
...
@@ -248,6 +256,7 @@ class Reader:
except
KeyError
:
raise
ModeException
(
mode
=
mode
)
self
.
use_gpu
=
config
.
get
(
"use_gpu"
,
True
)
use_mix
=
config
.
get
(
'use_mix'
)
self
.
params
[
'mode'
]
=
mode
if
seed
is
not
None
:
...
...
@@ -257,10 +266,17 @@ class Reader:
self
.
batch_ops
=
create_operators
(
self
.
params
[
'mix'
])
def
__call__
(
self
):
batch_size
=
int
(
self
.
params
[
'batch_size'
])
//
trainers_num
device_num
=
trainers_num
# non-distributed launch
if
trainers_num
<=
0
:
if
self
.
use_gpu
:
device_num
=
fluid
.
core
.
get_cuda_device_count
()
else
:
device_num
=
int
(
os
.
environ
.
get
(
'CPU_NUM'
,
1
))
batch_size
=
int
(
self
.
params
[
'batch_size'
])
//
device_num
def
wrapper
():
reader
=
mp_reader
(
self
.
params
)
reader
=
mp_reader
(
self
.
params
,
batch_size
)
batch
=
[]
for
idx
,
sample
in
enumerate
(
reader
()):
img
,
label
=
sample
...
...
tools/eval_multi_platform.py
0 → 100644
浏览文件 @
0a0d5bc0
# copyright (c) 2020 PaddlePaddle Authors. All Rights Reserve.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
from
__future__
import
absolute_import
from
__future__
import
division
from
__future__
import
print_function
import
os
import
argparse
import
paddle.fluid
as
fluid
import
program
from
ppcls.data
import
Reader
from
ppcls.utils.config
import
get_config
from
ppcls.utils.save_load
import
init_model
def
parse_args
():
parser
=
argparse
.
ArgumentParser
(
"PaddleClas eval script"
)
parser
.
add_argument
(
'-c'
,
'--config'
,
type
=
str
,
default
=
'./configs/eval.yaml'
,
help
=
'config file path'
)
parser
.
add_argument
(
'-o'
,
'--override'
,
action
=
'append'
,
default
=
[],
help
=
'config options to be overridden'
)
args
=
parser
.
parse_args
()
return
args
def
main
(
args
):
config
=
get_config
(
args
.
config
,
overrides
=
args
.
override
,
show
=
True
)
use_gpu
=
config
.
get
(
"use_gpu"
,
True
)
places
=
fluid
.
cuda_places
()
if
use_gpu
else
fluid
.
cpu_places
()
startup_prog
=
fluid
.
Program
()
valid_prog
=
fluid
.
Program
()
valid_dataloader
,
valid_fetchs
=
program
.
build
(
config
,
valid_prog
,
startup_prog
,
is_train
=
False
,
is_distributed
=
False
)
valid_prog
=
valid_prog
.
clone
(
for_test
=
True
)
exe
=
fluid
.
Executor
(
places
[
0
])
exe
.
run
(
startup_prog
)
init_model
(
config
,
valid_prog
,
exe
)
valid_reader
=
Reader
(
config
,
'valid'
)()
valid_dataloader
.
set_sample_list_generator
(
valid_reader
,
places
)
compiled_valid_prog
=
program
.
compile
(
config
,
valid_prog
)
program
.
run
(
valid_dataloader
,
exe
,
compiled_valid_prog
,
valid_fetchs
,
-
1
,
'eval'
)
if
__name__
==
'__main__'
:
args
=
parse_args
()
main
(
args
)
tools/program.py
浏览文件 @
0a0d5bc0
...
...
@@ -18,6 +18,7 @@ from __future__ import print_function
import
os
import
time
import
numpy
as
np
from
collections
import
OrderedDict
...
...
@@ -314,7 +315,7 @@ def mixed_precision_optimizer(config, optimizer):
return
optimizer
def
build
(
config
,
main_prog
,
startup_prog
,
is_train
=
True
):
def
build
(
config
,
main_prog
,
startup_prog
,
is_train
=
True
,
is_distributed
=
True
):
"""
Build a program using a model and an optimizer
1. create feeds
...
...
@@ -328,6 +329,7 @@ def build(config, main_prog, startup_prog, is_train=True):
main_prog(): main program
startup_prog(): startup program
is_train(bool): train or valid
is_distributed(bool): whether to use distributed training method
Returns:
dataloader(): a bridge between the model and the data
...
...
@@ -356,6 +358,7 @@ def build(config, main_prog, startup_prog, is_train=True):
fetchs
[
'lr'
]
=
(
lr
,
AverageMeter
(
'lr'
,
'f'
,
need_avg
=
False
))
optimizer
=
mixed_precision_optimizer
(
config
,
optimizer
)
if
is_distributed
:
optimizer
=
dist_optimizer
(
config
,
optimizer
)
optimizer
.
minimize
(
fetchs
[
'loss'
][
0
])
if
config
.
get
(
'use_ema'
):
...
...
@@ -430,7 +433,7 @@ def run(dataloader,
batch_time
.
update
(
time
.
time
()
-
tic
)
tic
=
time
.
time
()
for
i
,
m
in
enumerate
(
metrics
):
metric_list
[
i
].
update
(
m
[
0
]
,
len
(
batch
[
0
]))
metric_list
[
i
].
update
(
np
.
mean
(
m
)
,
len
(
batch
[
0
]))
fetchs_str
=
''
.
join
([
str
(
m
.
value
)
+
' '
for
m
in
metric_list
]
+
[
batch_time
.
value
])
+
's'
if
vdl_writer
:
...
...
tools/train_multi_platform.py
0 → 100644
浏览文件 @
0a0d5bc0
# copyright (c) 2020 PaddlePaddle Authors. All Rights Reserve.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
from
__future__
import
absolute_import
from
__future__
import
division
from
__future__
import
print_function
import
argparse
import
os
import
paddle.fluid
as
fluid
from
ppcls.data
import
Reader
from
ppcls.utils.config
import
get_config
from
ppcls.utils.save_load
import
init_model
,
save_model
from
ppcls.utils
import
logger
import
program
def
parse_args
():
parser
=
argparse
.
ArgumentParser
(
"PaddleClas train script"
)
parser
.
add_argument
(
'-c'
,
'--config'
,
type
=
str
,
default
=
'configs/ResNet/ResNet50.yaml'
,
help
=
'config file path'
)
parser
.
add_argument
(
'--vdl_dir'
,
type
=
str
,
default
=
None
,
help
=
'VisualDL logging directory for image.'
)
parser
.
add_argument
(
'-o'
,
'--override'
,
action
=
'append'
,
default
=
[],
help
=
'config options to be overridden'
)
args
=
parser
.
parse_args
()
return
args
def
main
(
args
):
config
=
get_config
(
args
.
config
,
overrides
=
args
.
override
,
show
=
True
)
# assign the place
use_gpu
=
config
.
get
(
"use_gpu"
,
True
)
places
=
fluid
.
cuda_places
()
if
use_gpu
else
fluid
.
cpu_places
()
# startup_prog is used to do some parameter init work,
# and train prog is used to hold the network
startup_prog
=
fluid
.
Program
()
train_prog
=
fluid
.
Program
()
best_top1_acc
=
0.0
# best top1 acc record
if
not
config
.
get
(
'use_ema'
):
train_dataloader
,
train_fetchs
=
program
.
build
(
config
,
train_prog
,
startup_prog
,
is_train
=
True
,
is_distributed
=
False
)
else
:
train_dataloader
,
train_fetchs
,
ema
=
program
.
build
(
config
,
train_prog
,
startup_prog
,
is_train
=
True
,
is_distributed
=
False
)
if
config
.
validate
:
valid_prog
=
fluid
.
Program
()
valid_dataloader
,
valid_fetchs
=
program
.
build
(
config
,
valid_prog
,
startup_prog
,
is_train
=
False
,
is_distributed
=
False
)
# clone to prune some content which is irrelevant in valid_prog
valid_prog
=
valid_prog
.
clone
(
for_test
=
True
)
# create the "Executor" with the statement of which place
exe
=
fluid
.
Executor
(
places
[
0
])
# Parameter initialization
exe
.
run
(
startup_prog
)
# load model from 1. checkpoint to resume training, 2. pretrained model to finetune
init_model
(
config
,
train_prog
,
exe
)
train_reader
=
Reader
(
config
,
'train'
)()
train_dataloader
.
set_sample_list_generator
(
train_reader
,
places
)
if
config
.
validate
:
valid_reader
=
Reader
(
config
,
'valid'
)()
valid_dataloader
.
set_sample_list_generator
(
valid_reader
,
places
)
compiled_valid_prog
=
program
.
compile
(
config
,
valid_prog
)
compiled_train_prog
=
program
.
compile
(
config
,
train_prog
,
train_fetchs
[
'loss'
][
0
].
name
)
if
args
.
vdl_dir
:
from
visualdl
import
LogWriter
vdl_writer
=
LogWriter
(
args
.
vdl_dir
)
else
:
vdl_writer
=
None
for
epoch_id
in
range
(
config
.
epochs
):
# 1. train with train dataset
program
.
run
(
train_dataloader
,
exe
,
compiled_train_prog
,
train_fetchs
,
epoch_id
,
'train'
,
vdl_writer
)
if
int
(
os
.
getenv
(
"PADDLE_TRAINER_ID"
,
0
))
==
0
:
# 2. validate with validate dataset
if
config
.
validate
and
epoch_id
%
config
.
valid_interval
==
0
:
if
config
.
get
(
'use_ema'
):
logger
.
info
(
logger
.
coloring
(
"EMA validate start..."
))
with
ema
.
apply
(
exe
):
top1_acc
=
program
.
run
(
valid_dataloader
,
exe
,
compiled_valid_prog
,
valid_fetchs
,
epoch_id
,
'valid'
)
logger
.
info
(
logger
.
coloring
(
"EMA validate over!"
))
top1_acc
=
program
.
run
(
valid_dataloader
,
exe
,
compiled_valid_prog
,
valid_fetchs
,
epoch_id
,
'valid'
)
if
top1_acc
>
best_top1_acc
:
best_top1_acc
=
top1_acc
message
=
"The best top1 acc {:.5f}, in epoch: {:d}"
.
format
(
best_top1_acc
,
epoch_id
)
logger
.
info
(
"{:s}"
.
format
(
logger
.
coloring
(
message
,
"RED"
)))
if
epoch_id
%
config
.
save_interval
==
0
:
model_path
=
os
.
path
.
join
(
config
.
model_save_dir
,
config
.
ARCHITECTURE
[
"name"
])
save_model
(
train_prog
,
model_path
,
"best_model_in_epoch_"
+
str
(
epoch_id
))
# 3. save the persistable model
if
epoch_id
%
config
.
save_interval
==
0
:
model_path
=
os
.
path
.
join
(
config
.
model_save_dir
,
config
.
ARCHITECTURE
[
"name"
])
save_model
(
train_prog
,
model_path
,
epoch_id
)
if
__name__
==
'__main__'
:
args
=
parse_args
()
main
(
args
)
编辑
预览
Markdown
is supported
0%
请重试
或
添加新附件
.
添加附件
取消
You are about to add
0
people
to the discussion. Proceed with caution.
先完成此消息的编辑!
取消
想要评论请
注册
或
登录