Skip to content
体验新版
项目
组织
正在加载...
登录
切换导航
打开侧边栏
PaddlePaddle
PaddleClas
提交
e83e3038
P
PaddleClas
项目概览
PaddlePaddle
/
PaddleClas
大约 1 年 前同步成功
通知
115
Star
4999
Fork
1114
代码
文件
提交
分支
Tags
贡献者
分支图
Diff
Issue
19
列表
看板
标记
里程碑
合并请求
6
Wiki
0
Wiki
分析
仓库
DevOps
项目成员
Pages
P
PaddleClas
项目概览
项目概览
详情
发布
仓库
仓库
文件
提交
分支
标签
贡献者
分支图
比较
Issue
19
Issue
19
列表
看板
标记
里程碑
合并请求
6
合并请求
6
Pages
分析
分析
仓库分析
DevOps
Wiki
0
Wiki
成员
成员
收起侧边栏
关闭侧边栏
动态
分支图
创建新Issue
提交
Issue看板
未验证
提交
e83e3038
编写于
11月 18, 2020
作者:
L
littletomatodonkey
提交者:
GitHub
11月 18, 2020
浏览文件
操作
浏览文件
下载
电子邮件补丁
差异文件
fix local rank get word size in dist (#402)
* fix local rank * fix export model
上级
6a5f4626
变更
6
隐藏空白更改
内联
并排
Showing
6 changed file
with
42 addition
and
45 deletion
+42
-45
tools/eval.py
tools/eval.py
+7
-6
tools/export_model.py
tools/export_model.py
+10
-11
tools/infer/infer.py
tools/infer/infer.py
+1
-3
tools/static/program.py
tools/static/program.py
+10
-8
tools/static/train.py
tools/static/train.py
+8
-9
tools/train.py
tools/train.py
+6
-8
未找到文件。
tools/eval.py
浏览文件 @
e83e3038
...
...
@@ -13,7 +13,6 @@
# limitations under the License.
import
paddle
from
paddle.distributed
import
ParallelEnv
import
argparse
import
os
...
...
@@ -52,16 +51,18 @@ def main(args, return_dict={}):
config
.
mode
=
"valid"
# assign place
use_gpu
=
config
.
get
(
"use_gpu"
,
True
)
place
=
'gpu:{}'
.
format
(
ParallelEnv
().
dev_id
)
if
use_gpu
else
'cpu'
place
=
paddle
.
set_device
(
place
)
place
=
paddle
.
set_device
(
'gpu'
if
use_gpu
else
'cpu'
)
use_data_parallel
=
int
(
os
.
getenv
(
"PADDLE_TRAINERS_NUM"
,
1
))
!=
1
trainer_num
=
paddle
.
distributed
.
get_world_size
()
use_data_parallel
=
trainer_num
!=
1
config
[
"use_data_parallel"
]
=
use_data_parallel
if
config
[
"use_data_parallel"
]:
paddle
.
distributed
.
init_parallel_env
()
net
=
program
.
create_model
(
config
.
ARCHITECTURE
,
config
.
classes_num
)
if
config
[
"use_data_parallel"
]:
strategy
=
paddle
.
distributed
.
init_parallel_env
()
net
=
paddle
.
DataParallel
(
net
,
strategy
)
net
=
paddle
.
DataParallel
(
net
)
init_model
(
config
,
net
,
optimizer
=
None
)
valid_dataloader
=
Reader
(
config
,
'valid'
,
places
=
place
)()
...
...
tools/export_model.py
浏览文件 @
e83e3038
...
...
@@ -37,23 +37,17 @@ def parse_args():
"-o"
,
"--output_path"
,
type
=
str
,
default
=
"./inference/cls_infer"
)
parser
.
add_argument
(
"--class_dim"
,
type
=
int
,
default
=
1000
)
parser
.
add_argument
(
"--load_static_weights"
,
type
=
str2bool
,
default
=
False
)
#
parser.add_argument("--img_size", type=int, default=224)
parser
.
add_argument
(
"--img_size"
,
type
=
int
,
default
=
224
)
return
parser
.
parse_args
()
class
Net
(
paddle
.
nn
.
Layer
):
def
__init__
(
self
,
net
,
to_static
,
class_dim
,
model
):
def
__init__
(
self
,
net
,
class_dim
,
model
):
super
(
Net
,
self
).
__init__
()
self
.
pre_net
=
net
(
class_dim
=
class_dim
)
self
.
to_static
=
to_static
self
.
model
=
model
# Please modify the 'shape' according to actual needs
@
to_static
(
input_spec
=
[
paddle
.
static
.
InputSpec
(
shape
=
[
None
,
3
,
224
,
224
],
dtype
=
'float32'
)
])
def
forward
(
self
,
inputs
):
x
=
self
.
pre_net
(
inputs
)
if
self
.
model
==
"GoogLeNet"
:
...
...
@@ -66,14 +60,19 @@ def main():
args
=
parse_args
()
net
=
architectures
.
__dict__
[
args
.
model
]
model
=
Net
(
net
,
to_static
,
args
.
class_dim
,
args
.
model
)
model
=
Net
(
net
,
args
.
class_dim
,
args
.
model
)
load_dygraph_pretrain
(
model
.
pre_net
,
path
=
args
.
pretrained_model
,
load_static_weights
=
args
.
load_static_weights
)
model
.
eval
()
model
=
to_static
(
model
,
input_spec
=
[
paddle
.
static
.
InputSpec
(
shape
=
[
None
,
3
,
args
.
img_size
,
args
.
img_size
],
dtype
=
'float32'
)
])
paddle
.
jit
.
save
(
model
,
args
.
output_path
)
...
...
tools/infer/infer.py
浏览文件 @
e83e3038
...
...
@@ -26,7 +26,6 @@ from ppcls.utils.save_load import load_dygraph_pretrain
from
ppcls.modeling
import
architectures
import
paddle
from
paddle.distributed
import
ParallelEnv
import
paddle.nn.functional
as
F
...
...
@@ -64,8 +63,7 @@ def save_prelabel_results(class_id, input_filepath, output_idr):
def
main
():
args
=
utils
.
parse_args
()
# assign the place
place
=
'gpu:{}'
.
format
(
ParallelEnv
().
dev_id
)
if
args
.
use_gpu
else
'cpu'
place
=
paddle
.
set_device
(
place
)
place
=
paddle
.
set_device
(
'gpu'
if
args
.
use_gpu
else
'cpu'
)
net
=
architectures
.
__dict__
[
args
.
model
](
class_dim
=
args
.
class_num
)
load_dygraph_pretrain
(
net
,
args
.
pretrained_model
,
args
.
load_static_weights
)
...
...
tools/static/program.py
浏览文件 @
e83e3038
...
...
@@ -149,7 +149,7 @@ def create_loss(out,
feed_lam
=
paddle
.
reshape
(
feeds
[
'feed_lam'
],
[
-
1
,
1
])
else
:
target
=
paddle
.
reshape
(
feeds
[
'label'
],
[
-
1
,
1
])
if
architecture
[
"name"
]
==
"GoogLeNet"
:
assert
len
(
out
)
==
3
,
"GoogLeNet should have 3 outputs"
loss
=
GoogLeNetLoss
(
class_dim
=
classes_num
,
epsilon
=
epsilon
)
...
...
@@ -400,6 +400,7 @@ def compile(config, program, loss_name=None, share_prog=None):
total_step
=
0
def
run
(
dataloader
,
exe
,
program
,
...
...
@@ -409,8 +410,7 @@ def run(dataloader,
mode
=
'train'
,
config
=
None
,
vdl_writer
=
None
,
lr_scheduler
=
None
):
lr_scheduler
=
None
):
"""
Feed data to the model and fetch the measures and loss
...
...
@@ -434,11 +434,13 @@ def run(dataloader,
tic
=
time
.
time
()
for
idx
,
batch
in
enumerate
(
dataloader
()):
batch_size
=
batch
[
0
].
shape
()[
0
]
feed_dict
=
{
key
.
name
:
batch
[
idx
]
for
idx
,
key
in
enumerate
(
feeds
.
values
())}
metrics
=
exe
.
run
(
program
=
program
,
feed
=
feed_dict
,
fetch_list
=
fetch_list
)
feed_dict
=
{
key
.
name
:
batch
[
idx
]
for
idx
,
key
in
enumerate
(
feeds
.
values
())
}
metrics
=
exe
.
run
(
program
=
program
,
feed
=
feed_dict
,
fetch_list
=
fetch_list
)
batch_time
.
update
(
time
.
time
()
-
tic
)
tic
=
time
.
time
()
...
...
tools/static/train.py
浏览文件 @
e83e3038
...
...
@@ -26,7 +26,6 @@ sys.path.append(os.path.abspath(os.path.join(__dir__, '../../')))
from
sys
import
version_info
import
paddle
from
paddle.distributed
import
ParallelEnv
from
paddle.distributed
import
fleet
from
ppcls.data
import
Reader
...
...
@@ -66,8 +65,7 @@ def main(args):
# assign the place
use_gpu
=
config
.
get
(
"use_gpu"
,
True
)
assert
use_gpu
is
True
,
"gpu must be true in static mode!"
place
=
'gpu:{}'
.
format
(
ParallelEnv
().
dev_id
)
place
=
paddle
.
set_device
(
place
)
place
=
paddle
.
set_device
(
"gpu"
)
# startup_prog is used to do some parameter init work,
# and train prog is used to hold the network
...
...
@@ -94,7 +92,7 @@ def main(args):
# load model from 1. checkpoint to resume training, 2. pretrained model to finetune
train_dataloader
=
Reader
(
config
,
'train'
,
places
=
place
)()
if
config
.
validate
and
ParallelEnv
().
local_rank
==
0
:
if
config
.
validate
and
paddle
.
distributed
.
get_rank
()
==
0
:
valid_dataloader
=
Reader
(
config
,
'valid'
,
places
=
place
)()
compiled_valid_prog
=
program
.
compile
(
config
,
valid_prog
)
...
...
@@ -110,14 +108,15 @@ def main(args):
for
epoch_id
in
range
(
config
.
epochs
):
# 1. train with train dataset
program
.
run
(
train_dataloader
,
exe
,
train_prog
,
train_feeds
,
train_fetchs
,
epoch_id
,
'train'
,
config
,
vdl_writer
,
lr_scheduler
)
if
int
(
os
.
getenv
(
"PADDLE_TRAINER_ID"
,
0
))
==
0
:
program
.
run
(
train_dataloader
,
exe
,
train_prog
,
train_feeds
,
train_fetchs
,
epoch_id
,
'train'
,
config
,
vdl_writer
,
lr_scheduler
)
if
paddle
.
distributed
.
get_rank
()
==
0
:
# 2. validate with validate dataset
if
config
.
validate
and
epoch_id
%
config
.
valid_interval
==
0
:
top1_acc
=
program
.
run
(
valid_dataloader
,
exe
,
compiled_valid_prog
,
valid_feeds
,
valid_fetchs
,
epoch_id
,
'valid'
,
config
)
compiled_valid_prog
,
valid_feeds
,
valid_fetchs
,
epoch_id
,
'valid'
,
config
)
if
top1_acc
>
best_top1_acc
:
best_top1_acc
=
top1_acc
message
=
"The best top1 acc {:.5f}, in epoch: {:d}"
.
format
(
...
...
tools/train.py
浏览文件 @
e83e3038
...
...
@@ -24,7 +24,6 @@ sys.path.append(__dir__)
sys
.
path
.
append
(
os
.
path
.
abspath
(
os
.
path
.
join
(
__dir__
,
'..'
)))
import
paddle
from
paddle.distributed
import
ParallelEnv
from
ppcls.data
import
Reader
from
ppcls.utils.config
import
get_config
...
...
@@ -57,29 +56,28 @@ def main(args):
config
=
get_config
(
args
.
config
,
overrides
=
args
.
override
,
show
=
True
)
# assign the place
use_gpu
=
config
.
get
(
"use_gpu"
,
True
)
place
=
'gpu:{}'
.
format
(
ParallelEnv
().
dev_id
)
if
use_gpu
else
'cpu'
place
=
paddle
.
set_device
(
place
)
place
=
paddle
.
set_device
(
'gpu'
if
use_gpu
else
'cpu'
)
trainer_num
=
int
(
os
.
getenv
(
"PADDLE_TRAINERS_NUM"
,
1
)
)
trainer_num
=
paddle
.
distributed
.
get_world_size
(
)
use_data_parallel
=
trainer_num
!=
1
config
[
"use_data_parallel"
]
=
use_data_parallel
if
config
[
"use_data_parallel"
]:
strategy
=
paddle
.
distributed
.
init_parallel_env
()
paddle
.
distributed
.
init_parallel_env
()
net
=
program
.
create_model
(
config
.
ARCHITECTURE
,
config
.
classes_num
)
optimizer
,
lr_scheduler
=
program
.
create_optimizer
(
config
,
parameter_list
=
net
.
parameters
())
if
config
[
"use_data_parallel"
]:
net
=
paddle
.
DataParallel
(
net
,
strategy
)
net
=
paddle
.
DataParallel
(
net
)
# load model from checkpoint or pretrained model
init_model
(
config
,
net
,
optimizer
)
train_dataloader
=
Reader
(
config
,
'train'
,
places
=
place
)()
if
config
.
validate
and
ParallelEnv
().
local_rank
==
0
:
if
config
.
validate
and
paddle
.
distributed
.
get_rank
()
==
0
:
valid_dataloader
=
Reader
(
config
,
'valid'
,
places
=
place
)()
last_epoch_id
=
config
.
get
(
"last_epoch"
,
-
1
)
...
...
@@ -91,7 +89,7 @@ def main(args):
program
.
run
(
train_dataloader
,
config
,
net
,
optimizer
,
lr_scheduler
,
epoch_id
,
'train'
)
if
not
config
[
"use_data_parallel"
]
or
ParallelEnv
().
local_rank
==
0
:
if
paddle
.
distributed
.
get_rank
()
==
0
:
# 2. validate with validate dataset
if
config
.
validate
and
epoch_id
%
config
.
valid_interval
==
0
:
net
.
eval
()
...
...
编辑
预览
Markdown
is supported
0%
请重试
或
添加新附件
.
添加附件
取消
You are about to add
0
people
to the discussion. Proceed with caution.
先完成此消息的编辑!
取消
想要评论请
注册
或
登录