Skip to content
体验新版
项目
组织
正在加载...
登录
切换导航
打开侧边栏
Crayon鑫
Paddle
提交
f2c55743
P
Paddle
项目概览
Crayon鑫
/
Paddle
与 Fork 源项目一致
Fork自
PaddlePaddle / Paddle
通知
1
Star
1
Fork
0
代码
文件
提交
分支
Tags
贡献者
分支图
Diff
Issue
1
列表
看板
标记
里程碑
合并请求
0
Wiki
0
Wiki
分析
仓库
DevOps
项目成员
Pages
P
Paddle
项目概览
项目概览
详情
发布
仓库
仓库
文件
提交
分支
标签
贡献者
分支图
比较
Issue
1
Issue
1
列表
看板
标记
里程碑
合并请求
0
合并请求
0
Pages
分析
分析
仓库分析
DevOps
Wiki
0
Wiki
成员
成员
收起侧边栏
关闭侧边栏
动态
分支图
创建新Issue
提交
Issue看板
未验证
提交
f2c55743
编写于
6月 05, 2018
作者:
T
Tao Luo
提交者:
GitHub
6月 05, 2018
浏览文件
操作
浏览文件
下载
差异文件
Merge pull request #11158 from luotao1/benchmark1
refine benchmark/fluid
上级
78afcbff
9d1dae39
变更
3
隐藏空白更改
内联
并排
Showing
3 changed file
with
26 addition
and
23 deletion
+26
-23
benchmark/.gitignore
benchmark/.gitignore
+3
-0
benchmark/fluid/fluid_benchmark.py
benchmark/fluid/fluid_benchmark.py
+12
-14
benchmark/fluid/run.sh
benchmark/fluid/run.sh
+11
-9
未找到文件。
benchmark/.gitignore
浏览文件 @
f2c55743
...
...
@@ -7,3 +7,6 @@ paddle/rnn/imdb.pkl
caffe/image/logs
tensorflow/image/logs
tensorflow/rnn/logs
fluid/models/*.pyc
fluid/logs
fluid/nohup.out
benchmark/fluid/fluid_benchmark.py
浏览文件 @
f2c55743
...
...
@@ -40,10 +40,7 @@ def parse_args():
parser
.
add_argument
(
'--batch_size'
,
type
=
int
,
default
=
32
,
help
=
'The minibatch size.'
)
parser
.
add_argument
(
'--learning_rate'
,
type
=
float
,
default
=
0.001
,
help
=
'The minibatch size.'
)
'--learning_rate'
,
type
=
float
,
default
=
0.001
,
help
=
'The learning rate.'
)
# TODO(wuyi): add "--use_fake_data" option back.
parser
.
add_argument
(
'--skip_batch_num'
,
...
...
@@ -231,10 +228,7 @@ def train(avg_loss, infer_prog, optimizer, train_reader, test_reader, batch_acc,
train_losses
.
append
(
loss
)
print
(
"Pass: %d, Iter: %d, Loss: %f
\n
"
%
(
pass_id
,
iters
,
np
.
mean
(
train_losses
)))
train_elapsed
=
time
.
time
()
-
start_time
examples_per_sec
=
num_samples
/
train_elapsed
print
(
'
\n
Total examples: %d, total time: %.5f, %.5f examples/sec
\n
'
%
(
num_samples
,
train_elapsed
,
examples_per_sec
))
print_train_time
(
start_time
,
time
.
time
(),
num_samples
)
print
(
"Pass: %d, Loss: %f"
%
(
pass_id
,
np
.
mean
(
train_losses
)))
# evaluation
if
not
args
.
no_test
and
batch_acc
!=
None
:
...
...
@@ -315,10 +309,7 @@ def train_parallel(avg_loss, infer_prog, optimizer, train_reader, test_reader,
if
batch_id
%
1
==
0
:
print
(
"Pass %d, batch %d, loss %s"
%
(
pass_id
,
batch_id
,
np
.
array
(
loss
)))
train_elapsed
=
time
.
time
()
-
start_time
examples_per_sec
=
num_samples
/
train_elapsed
print
(
'
\n
Total examples: %d, total time: %.5f, %.5f examples/sed
\n
'
%
(
num_samples
,
train_elapsed
,
examples_per_sec
))
print_train_time
(
start_time
,
time
.
time
(),
num_samples
)
if
not
args
.
no_test
and
batch_acc
!=
None
:
test_acc
=
test
(
startup_exe
,
infer_prog
,
test_reader
,
feeder
,
batch_acc
)
...
...
@@ -329,12 +320,19 @@ def train_parallel(avg_loss, infer_prog, optimizer, train_reader, test_reader,
def
print_arguments
(
args
):
vars
(
args
)[
'use_nvprof'
]
=
(
vars
(
args
)[
'use_nvprof'
]
and
vars
(
args
)[
'device'
]
==
'GPU'
)
print
(
'-----------
resnet
Configuration Arguments -----------'
)
print
(
'----------- Configuration Arguments -----------'
)
for
arg
,
value
in
sorted
(
vars
(
args
).
iteritems
()):
print
(
'%s: %s'
%
(
arg
,
value
))
print
(
'------------------------------------------------'
)
def
print_train_time
(
start_time
,
end_time
,
num_samples
):
train_elapsed
=
end_time
-
start_time
examples_per_sec
=
num_samples
/
train_elapsed
print
(
'
\n
Total examples: %d, total time: %.5f, %.5f examples/sed
\n
'
%
(
num_samples
,
train_elapsed
,
examples_per_sec
))
def
main
():
args
=
parse_args
()
print_arguments
(
args
)
...
...
@@ -342,7 +340,7 @@ def main():
# the unique trainer id, starting from 0, needed by trainer
# only
nccl_id_var
,
num_trainers
,
trainer_id
=
(
None
,
1
,
int
(
os
.
getenv
(
"PADDLE_TRAINER_ID"
,
"
-1
"
)))
None
,
1
,
int
(
os
.
getenv
(
"PADDLE_TRAINER_ID"
,
"
0
"
)))
if
args
.
use_cprof
:
pr
=
cProfile
.
Profile
()
...
...
benchmark/fluid/run.sh
浏览文件 @
f2c55743
...
...
@@ -2,6 +2,7 @@
# This script benchmarking the PaddlePaddle Fluid on
# single thread single GPU.
mkdir
-p
logs
#export FLAGS_fraction_of_gpu_memory_to_use=0.0
export
CUDNN_PATH
=
/paddle/cudnn_v5
...
...
@@ -35,6 +36,7 @@ nohup stdbuf -oL nvidia-smi \
--format
=
csv
\
--filename
=
mem.log
\
-l
1 &
# mnist
# mnist gpu mnist 128
FLAGS_benchmark
=
true stdbuf
-oL
python fluid_benchmark.py
\
...
...
@@ -43,7 +45,7 @@ FLAGS_benchmark=true stdbuf -oL python fluid_benchmark.py \
--batch_size
=
128
\
--skip_batch_num
=
5
\
--iterations
=
500
\
2>&1 |
tee
-a
mnist_gpu_128.log
2>&1 |
tee
-a
logs/
mnist_gpu_128.log
# vgg16
# gpu cifar10 128
...
...
@@ -53,7 +55,7 @@ FLAGS_benchmark=true stdbuf -oL python fluid_benchmark.py \
--batch_size
=
128
\
--skip_batch_num
=
5
\
--iterations
=
30
\
2>&1 |
tee
-a
vgg16_gpu_128.log
2>&1 |
tee
-a
logs/
vgg16_gpu_128.log
# flowers gpu 128
FLAGS_benchmark
=
true stdbuf
-oL
python fluid_benchmark.py
\
...
...
@@ -63,28 +65,28 @@ FLAGS_benchmark=true stdbuf -oL python fluid_benchmark.py \
--data_set
=
flowers
\
--skip_batch_num
=
5
\
--iterations
=
30
\
2>&1 |
tee
-a
vgg16_gpu_flowers_32.log
2>&1 |
tee
-a
logs/
vgg16_gpu_flowers_32.log
# resnet50
# resnet50 gpu cifar10 128
FLAGS_benchmark
=
true stdbuf
-oL
python fluid_benchmark.py
\
--model
=
resnet
50
\
--model
=
resnet
\
--device
=
GPU
\
--batch_size
=
128
\
--data_set
=
cifar10
\
--skip_batch_num
=
5
\
--iterations
=
30
\
2>&1 |
tee
-a
resnet50_gpu_128.log
2>&1 |
tee
-a
logs/
resnet50_gpu_128.log
# resnet50 gpu flowers 64
FLAGS_benchmark
=
true stdbuf
-oL
python fluid_benchmark.py
\
--model
=
resnet
50
\
--model
=
resnet
\
--device
=
GPU
\
--batch_size
=
64
\
--data_set
=
flowers
\
--skip_batch_num
=
5
\
--iterations
=
30
\
2>&1 |
tee
-a
resnet50_gpu_flowers_64.log
2>&1 |
tee
-a
logs/
resnet50_gpu_flowers_64.log
# lstm
# lstm gpu imdb 32 # tensorflow only support batch=32
...
...
@@ -94,7 +96,7 @@ FLAGS_benchmark=true stdbuf -oL python fluid_benchmark.py \
--batch_size
=
32
\
--skip_batch_num
=
5
\
--iterations
=
30
\
2>&1 |
tee
-a
lstm_gpu_32.log
2>&1 |
tee
-a
l
ogs/l
stm_gpu_32.log
# seq2seq
# seq2seq gpu wmb 128
...
...
@@ -104,4 +106,4 @@ FLAGS_benchmark=true stdbuf -oL python fluid_benchmark.py \
--batch_size
=
128
\
--skip_batch_num
=
5
\
--iterations
=
30
\
2>&1 |
tee
-a
lstm_gpu_128.log
2>&1 |
tee
-a
l
ogs/l
stm_gpu_128.log
编辑
预览
Markdown
is supported
0%
请重试
或
添加新附件
.
添加附件
取消
You are about to add
0
people
to the discussion. Proceed with caution.
先完成此消息的编辑!
取消
想要评论请
注册
或
登录