Skip to content
体验新版
项目
组织
正在加载...
登录
切换导航
打开侧边栏
BaiXuePrincess
Paddle
提交
172c887d
P
Paddle
项目概览
BaiXuePrincess
/
Paddle
与 Fork 源项目一致
Fork自
PaddlePaddle / Paddle
通知
1
Star
1
Fork
0
代码
文件
提交
分支
Tags
贡献者
分支图
Diff
Issue
0
列表
看板
标记
里程碑
合并请求
0
Wiki
0
Wiki
分析
仓库
DevOps
项目成员
Pages
P
Paddle
项目概览
项目概览
详情
发布
仓库
仓库
文件
提交
分支
标签
贡献者
分支图
比较
Issue
0
Issue
0
列表
看板
标记
里程碑
合并请求
0
合并请求
0
Pages
分析
分析
仓库分析
DevOps
Wiki
0
Wiki
成员
成员
收起侧边栏
关闭侧边栏
动态
分支图
创建新Issue
提交
Issue看板
未验证
提交
172c887d
编写于
4月 03, 2018
作者:
D
dzhwinter
提交者:
GitHub
4月 03, 2018
浏览文件
操作
浏览文件
下载
电子邮件补丁
差异文件
init (#9462)
上级
faa752a4
变更
6
隐藏空白更改
内联
并排
Showing
6 changed file
with
1355 addition
and
0 deletion
+1355
-0
benchmark/fluid/machine_translation.py
benchmark/fluid/machine_translation.py
+349
-0
benchmark/fluid/mnist.py
benchmark/fluid/mnist.py
+205
-0
benchmark/fluid/resnet.py
benchmark/fluid/resnet.py
+323
-0
benchmark/fluid/run.sh
benchmark/fluid/run.sh
+49
-0
benchmark/fluid/stacked_dynamic_lstm.py
benchmark/fluid/stacked_dynamic_lstm.py
+209
-0
benchmark/fluid/vgg.py
benchmark/fluid/vgg.py
+220
-0
未找到文件。
benchmark/fluid/machine_translation.py
0 → 100644
浏览文件 @
172c887d
# Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
"""seq2seq model for fluid."""
from
__future__
import
absolute_import
from
__future__
import
division
from
__future__
import
print_function
import
numpy
as
np
import
argparse
import
time
import
distutils.util
import
paddle.v2
as
paddle
import
paddle.fluid
as
fluid
import
paddle.fluid.core
as
core
import
paddle.fluid.framework
as
framework
from
paddle.fluid.executor
import
Executor
parser
=
argparse
.
ArgumentParser
(
description
=
__doc__
)
parser
.
add_argument
(
"--embedding_dim"
,
type
=
int
,
default
=
512
,
help
=
"The dimension of embedding table. (default: %(default)d)"
)
parser
.
add_argument
(
"--encoder_size"
,
type
=
int
,
default
=
512
,
help
=
"The size of encoder bi-rnn unit. (default: %(default)d)"
)
parser
.
add_argument
(
"--decoder_size"
,
type
=
int
,
default
=
512
,
help
=
"The size of decoder rnn unit. (default: %(default)d)"
)
parser
.
add_argument
(
"--batch_size"
,
type
=
int
,
default
=
16
,
help
=
"The sequence number of a mini-batch data. (default: %(default)d)"
)
parser
.
add_argument
(
"--dict_size"
,
type
=
int
,
default
=
30000
,
help
=
"The dictionary capacity. Dictionaries of source sequence and "
"target dictionary have same capacity. (default: %(default)d)"
)
parser
.
add_argument
(
"--pass_num"
,
type
=
int
,
default
=
2
,
help
=
"The pass number to train. (default: %(default)d)"
)
parser
.
add_argument
(
"--learning_rate"
,
type
=
float
,
default
=
0.0002
,
help
=
"Learning rate used to train the model. (default: %(default)f)"
)
parser
.
add_argument
(
"--infer_only"
,
action
=
'store_true'
,
help
=
"If set, run forward only."
)
parser
.
add_argument
(
"--beam_size"
,
type
=
int
,
default
=
3
,
help
=
"The width for beam searching. (default: %(default)d)"
)
parser
.
add_argument
(
"--use_gpu"
,
type
=
distutils
.
util
.
strtobool
,
default
=
True
,
help
=
"Whether to use gpu. (default: %(default)d)"
)
parser
.
add_argument
(
"--max_length"
,
type
=
int
,
default
=
250
,
help
=
"The maximum length of sequence when doing generation. "
"(default: %(default)d)"
)
def
lstm_step
(
x_t
,
hidden_t_prev
,
cell_t_prev
,
size
):
def
linear
(
inputs
):
return
fluid
.
layers
.
fc
(
input
=
inputs
,
size
=
size
,
bias_attr
=
True
)
forget_gate
=
fluid
.
layers
.
sigmoid
(
x
=
linear
([
hidden_t_prev
,
x_t
]))
input_gate
=
fluid
.
layers
.
sigmoid
(
x
=
linear
([
hidden_t_prev
,
x_t
]))
output_gate
=
fluid
.
layers
.
sigmoid
(
x
=
linear
([
hidden_t_prev
,
x_t
]))
cell_tilde
=
fluid
.
layers
.
tanh
(
x
=
linear
([
hidden_t_prev
,
x_t
]))
cell_t
=
fluid
.
layers
.
sums
(
input
=
[
fluid
.
layers
.
elementwise_mul
(
x
=
forget_gate
,
y
=
cell_t_prev
),
fluid
.
layers
.
elementwise_mul
(
x
=
input_gate
,
y
=
cell_tilde
)
])
hidden_t
=
fluid
.
layers
.
elementwise_mul
(
x
=
output_gate
,
y
=
fluid
.
layers
.
tanh
(
x
=
cell_t
))
return
hidden_t
,
cell_t
def
seq_to_seq_net
(
embedding_dim
,
encoder_size
,
decoder_size
,
source_dict_dim
,
target_dict_dim
,
is_generating
,
beam_size
,
max_length
):
"""Construct a seq2seq network."""
def
bi_lstm_encoder
(
input_seq
,
gate_size
):
# Linear transformation part for input gate, output gate, forget gate
# and cell activation vectors need be done outside of dynamic_lstm.
# So the output size is 4 times of gate_size.
input_forward_proj
=
fluid
.
layers
.
fc
(
input
=
input_seq
,
size
=
gate_size
*
4
,
act
=
None
,
bias_attr
=
False
)
forward
,
_
=
fluid
.
layers
.
dynamic_lstm
(
input
=
input_forward_proj
,
size
=
gate_size
*
4
,
use_peepholes
=
False
)
input_reversed_proj
=
fluid
.
layers
.
fc
(
input
=
input_seq
,
size
=
gate_size
*
4
,
act
=
None
,
bias_attr
=
False
)
reversed
,
_
=
fluid
.
layers
.
dynamic_lstm
(
input
=
input_reversed_proj
,
size
=
gate_size
*
4
,
is_reverse
=
True
,
use_peepholes
=
False
)
return
forward
,
reversed
src_word_idx
=
fluid
.
layers
.
data
(
name
=
'source_sequence'
,
shape
=
[
1
],
dtype
=
'int64'
,
lod_level
=
1
)
src_embedding
=
fluid
.
layers
.
embedding
(
input
=
src_word_idx
,
size
=
[
source_dict_dim
,
embedding_dim
],
dtype
=
'float32'
)
src_forward
,
src_reversed
=
bi_lstm_encoder
(
input_seq
=
src_embedding
,
gate_size
=
encoder_size
)
encoded_vector
=
fluid
.
layers
.
concat
(
input
=
[
src_forward
,
src_reversed
],
axis
=
1
)
encoded_proj
=
fluid
.
layers
.
fc
(
input
=
encoded_vector
,
size
=
decoder_size
,
bias_attr
=
False
)
backward_first
=
fluid
.
layers
.
sequence_pool
(
input
=
src_reversed
,
pool_type
=
'first'
)
decoder_boot
=
fluid
.
layers
.
fc
(
input
=
backward_first
,
size
=
decoder_size
,
bias_attr
=
False
,
act
=
'tanh'
)
def
lstm_decoder_with_attention
(
target_embedding
,
encoder_vec
,
encoder_proj
,
decoder_boot
,
decoder_size
):
def
simple_attention
(
encoder_vec
,
encoder_proj
,
decoder_state
):
decoder_state_proj
=
fluid
.
layers
.
fc
(
input
=
decoder_state
,
size
=
decoder_size
,
bias_attr
=
False
)
decoder_state_expand
=
fluid
.
layers
.
sequence_expand
(
x
=
decoder_state_proj
,
y
=
encoder_proj
)
concated
=
fluid
.
layers
.
concat
(
input
=
[
encoder_proj
,
decoder_state_expand
],
axis
=
1
)
attention_weights
=
fluid
.
layers
.
fc
(
input
=
concated
,
size
=
1
,
act
=
'tanh'
,
bias_attr
=
False
)
attention_weights
=
fluid
.
layers
.
sequence_softmax
(
input
=
attention_weights
)
weigths_reshape
=
fluid
.
layers
.
reshape
(
x
=
attention_weights
,
shape
=
[
-
1
])
scaled
=
fluid
.
layers
.
elementwise_mul
(
x
=
encoder_vec
,
y
=
weigths_reshape
,
axis
=
0
)
context
=
fluid
.
layers
.
sequence_pool
(
input
=
scaled
,
pool_type
=
'sum'
)
return
context
rnn
=
fluid
.
layers
.
DynamicRNN
()
cell_init
=
fluid
.
layers
.
fill_constant_batch_size_like
(
input
=
decoder_boot
,
value
=
0.0
,
shape
=
[
-
1
,
decoder_size
],
dtype
=
'float32'
)
cell_init
.
stop_gradient
=
False
with
rnn
.
block
():
current_word
=
rnn
.
step_input
(
target_embedding
)
encoder_vec
=
rnn
.
static_input
(
encoder_vec
)
encoder_proj
=
rnn
.
static_input
(
encoder_proj
)
hidden_mem
=
rnn
.
memory
(
init
=
decoder_boot
,
need_reorder
=
True
)
cell_mem
=
rnn
.
memory
(
init
=
cell_init
)
context
=
simple_attention
(
encoder_vec
,
encoder_proj
,
hidden_mem
)
decoder_inputs
=
fluid
.
layers
.
concat
(
input
=
[
context
,
current_word
],
axis
=
1
)
h
,
c
=
lstm_step
(
decoder_inputs
,
hidden_mem
,
cell_mem
,
decoder_size
)
rnn
.
update_memory
(
hidden_mem
,
h
)
rnn
.
update_memory
(
cell_mem
,
c
)
out
=
fluid
.
layers
.
fc
(
input
=
h
,
size
=
target_dict_dim
,
bias_attr
=
True
,
act
=
'softmax'
)
rnn
.
output
(
out
)
return
rnn
()
if
not
is_generating
:
trg_word_idx
=
fluid
.
layers
.
data
(
name
=
'target_sequence'
,
shape
=
[
1
],
dtype
=
'int64'
,
lod_level
=
1
)
trg_embedding
=
fluid
.
layers
.
embedding
(
input
=
trg_word_idx
,
size
=
[
target_dict_dim
,
embedding_dim
],
dtype
=
'float32'
)
prediction
=
lstm_decoder_with_attention
(
trg_embedding
,
encoded_vector
,
encoded_proj
,
decoder_boot
,
decoder_size
)
label
=
fluid
.
layers
.
data
(
name
=
'label_sequence'
,
shape
=
[
1
],
dtype
=
'int64'
,
lod_level
=
1
)
cost
=
fluid
.
layers
.
cross_entropy
(
input
=
prediction
,
label
=
label
)
avg_cost
=
fluid
.
layers
.
mean
(
x
=
cost
)
feeding_list
=
[
"source_sequence"
,
"target_sequence"
,
"label_sequence"
]
return
avg_cost
,
feeding_list
def
to_lodtensor
(
data
,
place
):
seq_lens
=
[
len
(
seq
)
for
seq
in
data
]
cur_len
=
0
lod
=
[
cur_len
]
for
l
in
seq_lens
:
cur_len
+=
l
lod
.
append
(
cur_len
)
flattened_data
=
np
.
concatenate
(
data
,
axis
=
0
).
astype
(
"int64"
)
flattened_data
=
flattened_data
.
reshape
([
len
(
flattened_data
),
1
])
lod_t
=
core
.
LoDTensor
()
lod_t
.
set
(
flattened_data
,
place
)
lod_t
.
set_lod
([
lod
])
return
lod_t
,
lod
[
-
1
]
def
lodtensor_to_ndarray
(
lod_tensor
):
dims
=
lod_tensor
.
get_dims
()
ndarray
=
np
.
zeros
(
shape
=
dims
).
astype
(
'float32'
)
for
i
in
xrange
(
np
.
product
(
dims
)):
ndarray
.
ravel
()[
i
]
=
lod_tensor
.
get_float_element
(
i
)
return
ndarray
def
train
():
avg_cost
,
feeding_list
=
seq_to_seq_net
(
args
.
embedding_dim
,
args
.
encoder_size
,
args
.
decoder_size
,
args
.
dict_size
,
args
.
dict_size
,
False
,
beam_size
=
args
.
beam_size
,
max_length
=
args
.
max_length
)
# clone from default main program
inference_program
=
fluid
.
default_main_program
().
clone
()
optimizer
=
fluid
.
optimizer
.
Adam
(
learning_rate
=
args
.
learning_rate
)
optimizer
.
minimize
(
avg_cost
)
fluid
.
memory_optimize
(
fluid
.
default_main_program
())
train_batch_generator
=
paddle
.
batch
(
paddle
.
reader
.
shuffle
(
paddle
.
dataset
.
wmt14
.
train
(
args
.
dict_size
),
buf_size
=
1000
),
batch_size
=
args
.
batch_size
)
test_batch_generator
=
paddle
.
batch
(
paddle
.
reader
.
shuffle
(
paddle
.
dataset
.
wmt14
.
test
(
args
.
dict_size
),
buf_size
=
1000
),
batch_size
=
args
.
batch_size
)
place
=
core
.
CUDAPlace
(
0
)
if
args
.
use_gpu
else
core
.
CPUPlace
()
exe
=
Executor
(
place
)
exe
.
run
(
framework
.
default_startup_program
())
def
do_validation
():
total_loss
=
0.0
count
=
0
for
batch_id
,
data
in
enumerate
(
test_batch_generator
()):
src_seq
=
to_lodtensor
(
map
(
lambda
x
:
x
[
0
],
data
),
place
)[
0
]
trg_seq
=
to_lodtensor
(
map
(
lambda
x
:
x
[
1
],
data
),
place
)[
0
]
lbl_seq
=
to_lodtensor
(
map
(
lambda
x
:
x
[
2
],
data
),
place
)[
0
]
fetch_outs
=
exe
.
run
(
inference_program
,
feed
=
{
feeding_list
[
0
]:
src_seq
,
feeding_list
[
1
]:
trg_seq
,
feeding_list
[
2
]:
lbl_seq
},
fetch_list
=
[
avg_cost
],
return_numpy
=
False
)
total_loss
+=
lodtensor_to_ndarray
(
fetch_outs
[
0
])[
0
]
count
+=
1
return
total_loss
/
count
for
pass_id
in
xrange
(
args
.
pass_num
):
pass_start_time
=
time
.
time
()
words_seen
=
0
for
batch_id
,
data
in
enumerate
(
train_batch_generator
()):
src_seq
,
word_num
=
to_lodtensor
(
map
(
lambda
x
:
x
[
0
],
data
),
place
)
words_seen
+=
word_num
trg_seq
,
word_num
=
to_lodtensor
(
map
(
lambda
x
:
x
[
1
],
data
),
place
)
words_seen
+=
word_num
lbl_seq
,
_
=
to_lodtensor
(
map
(
lambda
x
:
x
[
2
],
data
),
place
)
fetch_outs
=
exe
.
run
(
framework
.
default_main_program
(),
feed
=
{
feeding_list
[
0
]:
src_seq
,
feeding_list
[
1
]:
trg_seq
,
feeding_list
[
2
]:
lbl_seq
},
fetch_list
=
[
avg_cost
])
avg_cost_val
=
np
.
array
(
fetch_outs
[
0
])
print
(
'pass_id=%d, batch_id=%d, train_loss: %f'
%
(
pass_id
,
batch_id
,
avg_cost_val
))
pass_end_time
=
time
.
time
()
test_loss
=
do_validation
()
time_consumed
=
pass_end_time
-
pass_start_time
words_per_sec
=
words_seen
/
time_consumed
print
(
"pass_id=%d, test_loss: %f, words/s: %f, sec/pass: %f"
%
(
pass_id
,
test_loss
,
words_per_sec
,
time_consumed
))
def
infer
():
pass
if
__name__
==
'__main__'
:
args
=
parser
.
parse_args
()
if
args
.
infer_only
:
infer
()
else
:
train
()
benchmark/fluid/mnist.py
0 → 100644
浏览文件 @
172c887d
# Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
from
__future__
import
absolute_import
from
__future__
import
division
from
__future__
import
print_function
import
numpy
as
np
import
argparse
import
time
import
paddle.v2
as
paddle
import
paddle.fluid
as
fluid
import
paddle.fluid.profiler
as
profiler
SEED
=
1
DTYPE
=
"float32"
# random seed must set before configuring the network.
# fluid.default_startup_program().random_seed = SEED
def
parse_args
():
parser
=
argparse
.
ArgumentParser
(
"mnist model benchmark."
)
parser
.
add_argument
(
'--batch_size'
,
type
=
int
,
default
=
128
,
help
=
'The minibatch size.'
)
parser
.
add_argument
(
'--iterations'
,
type
=
int
,
default
=
35
,
help
=
'The number of minibatches.'
)
parser
.
add_argument
(
'--pass_num'
,
type
=
int
,
default
=
5
,
help
=
'The number of passes.'
)
parser
.
add_argument
(
'--device'
,
type
=
str
,
default
=
'GPU'
,
choices
=
[
'CPU'
,
'GPU'
],
help
=
'The device type.'
)
parser
.
add_argument
(
'--infer_only'
,
action
=
'store_true'
,
help
=
'If set, run forward only.'
)
parser
.
add_argument
(
'--use_cprof'
,
action
=
'store_true'
,
help
=
'If set, use cProfile.'
)
parser
.
add_argument
(
'--use_nvprof'
,
action
=
'store_true'
,
help
=
'If set, use nvprof for CUDA.'
)
args
=
parser
.
parse_args
()
return
args
def
print_arguments
(
args
):
vars
(
args
)[
'use_nvprof'
]
=
(
vars
(
args
)[
'use_nvprof'
]
and
vars
(
args
)[
'device'
]
==
'GPU'
)
print
(
'----------- Configuration Arguments -----------'
)
for
arg
,
value
in
sorted
(
vars
(
args
).
iteritems
()):
print
(
'%s: %s'
%
(
arg
,
value
))
print
(
'------------------------------------------------'
)
def
cnn_model
(
data
):
conv_pool_1
=
fluid
.
nets
.
simple_img_conv_pool
(
input
=
data
,
filter_size
=
5
,
num_filters
=
20
,
pool_size
=
2
,
pool_stride
=
2
,
act
=
"relu"
)
conv_pool_2
=
fluid
.
nets
.
simple_img_conv_pool
(
input
=
conv_pool_1
,
filter_size
=
5
,
num_filters
=
50
,
pool_size
=
2
,
pool_stride
=
2
,
act
=
"relu"
)
# TODO(dzhwinter) : refine the initializer and random seed settting
SIZE
=
10
input_shape
=
conv_pool_2
.
shape
param_shape
=
[
reduce
(
lambda
a
,
b
:
a
*
b
,
input_shape
[
1
:],
1
)]
+
[
SIZE
]
scale
=
(
2.0
/
(
param_shape
[
0
]
**
2
*
SIZE
))
**
0.5
predict
=
fluid
.
layers
.
fc
(
input
=
conv_pool_2
,
size
=
SIZE
,
act
=
"softmax"
,
param_attr
=
fluid
.
param_attr
.
ParamAttr
(
initializer
=
fluid
.
initializer
.
NormalInitializer
(
loc
=
0.0
,
scale
=
scale
)))
return
predict
def
eval_test
(
exe
,
batch_acc
,
batch_size_tensor
,
inference_program
):
test_reader
=
paddle
.
batch
(
paddle
.
dataset
.
mnist
.
test
(),
batch_size
=
args
.
batch_size
)
test_pass_acc
=
fluid
.
average
.
WeightedAverage
()
for
batch_id
,
data
in
enumerate
(
test_reader
()):
img_data
=
np
.
array
(
map
(
lambda
x
:
x
[
0
].
reshape
([
1
,
28
,
28
]),
data
)).
astype
(
DTYPE
)
y_data
=
np
.
array
(
map
(
lambda
x
:
x
[
1
],
data
)).
astype
(
"int64"
)
y_data
=
y_data
.
reshape
([
len
(
y_data
),
1
])
acc
,
weight
=
exe
.
run
(
inference_program
,
feed
=
{
"pixel"
:
img_data
,
"label"
:
y_data
},
fetch_list
=
[
batch_acc
,
batch_size_tensor
])
test_pass_acc
.
add
(
value
=
acc
,
weight
=
weight
)
pass_acc
=
test_pass_acc
.
eval
()
return
pass_acc
def
run_benchmark
(
model
,
args
):
if
args
.
use_cprof
:
pr
=
cProfile
.
Profile
()
pr
.
enable
()
start_time
=
time
.
time
()
# Input data
images
=
fluid
.
layers
.
data
(
name
=
'pixel'
,
shape
=
[
1
,
28
,
28
],
dtype
=
DTYPE
)
label
=
fluid
.
layers
.
data
(
name
=
'label'
,
shape
=
[
1
],
dtype
=
'int64'
)
# Train program
predict
=
model
(
images
)
cost
=
fluid
.
layers
.
cross_entropy
(
input
=
predict
,
label
=
label
)
avg_cost
=
fluid
.
layers
.
mean
(
x
=
cost
)
# Evaluator
batch_size_tensor
=
fluid
.
layers
.
create_tensor
(
dtype
=
'int64'
)
batch_acc
=
fluid
.
layers
.
accuracy
(
input
=
predict
,
label
=
label
,
total
=
batch_size_tensor
)
# inference program
inference_program
=
fluid
.
default_main_program
().
clone
()
with
fluid
.
program_guard
(
inference_program
):
inference_program
=
fluid
.
io
.
get_inference_program
(
target_vars
=
[
batch_acc
,
batch_size_tensor
])
# Optimization
opt
=
fluid
.
optimizer
.
AdamOptimizer
(
learning_rate
=
0.001
,
beta1
=
0.9
,
beta2
=
0.999
)
opt
.
minimize
(
avg_cost
)
fluid
.
memory_optimize
(
fluid
.
default_main_program
())
# Initialize executor
place
=
fluid
.
CPUPlace
()
if
args
.
device
==
'CPU'
else
fluid
.
CUDAPlace
(
0
)
exe
=
fluid
.
Executor
(
place
)
# Parameter initialization
exe
.
run
(
fluid
.
default_startup_program
())
# Reader
train_reader
=
paddle
.
batch
(
paddle
.
dataset
.
mnist
.
train
(),
batch_size
=
args
.
batch_size
)
accuracy
=
fluid
.
average
.
WeightedAverage
()
for
pass_id
in
range
(
args
.
pass_num
):
accuracy
.
reset
()
pass_start
=
time
.
time
()
for
batch_id
,
data
in
enumerate
(
train_reader
()):
img_data
=
np
.
array
(
map
(
lambda
x
:
x
[
0
].
reshape
([
1
,
28
,
28
]),
data
)).
astype
(
DTYPE
)
y_data
=
np
.
array
(
map
(
lambda
x
:
x
[
1
],
data
)).
astype
(
"int64"
)
y_data
=
y_data
.
reshape
([
len
(
y_data
),
1
])
start
=
time
.
time
()
outs
=
exe
.
run
(
fluid
.
default_main_program
(),
feed
=
{
"pixel"
:
img_data
,
"label"
:
y_data
},
fetch_list
=
[
avg_cost
,
batch_acc
,
batch_size_tensor
]
)
# The accuracy is the accumulation of batches, but not the current batch.
accuracy
.
add
(
value
=
outs
[
1
],
weight
=
outs
[
2
])
end
=
time
.
time
()
loss
=
np
.
array
(
outs
[
0
])
acc
=
np
.
array
(
outs
[
1
])
print
(
"pass=%d, batch=%d, loss=%f, error=%f, elapse=%f"
%
(
pass_id
,
batch_id
,
loss
,
1
-
acc
,
(
end
-
start
)
/
1000
))
pass_end
=
time
.
time
()
train_avg_acc
=
accuracy
.
eval
()
test_avg_acc
=
eval_test
(
exe
,
batch_acc
,
batch_size_tensor
,
inference_program
)
print
(
"pass=%d, train_avg_acc=%f, test_avg_acc=%f, elapse=%f"
%
(
pass_id
,
train_avg_acc
,
test_avg_acc
,
(
pass_end
-
pass_start
)
/
1000
))
if
__name__
==
'__main__'
:
args
=
parse_args
()
print_arguments
(
args
)
if
args
.
use_nvprof
and
args
.
device
==
'GPU'
:
with
profiler
.
cuda_profiler
(
"cuda_profiler.txt"
,
'csv'
)
as
nvprof
:
run_benchmark
(
cnn_model
,
args
)
else
:
run_benchmark
(
cnn_model
,
args
)
benchmark/fluid/resnet.py
0 → 100644
浏览文件 @
172c887d
# Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
from
__future__
import
absolute_import
from
__future__
import
division
from
__future__
import
print_function
import
argparse
import
functools
import
numpy
as
np
import
time
import
cProfile
,
pstats
,
StringIO
import
paddle.v2
as
paddle
import
paddle.fluid
as
fluid
import
paddle.fluid.core
as
core
import
paddle.fluid.profiler
as
profiler
def
parse_args
():
parser
=
argparse
.
ArgumentParser
(
'Convolution model benchmark.'
)
parser
.
add_argument
(
'--model'
,
type
=
str
,
choices
=
[
'resnet_imagenet'
,
'resnet_cifar10'
],
default
=
'resnet_imagenet'
,
help
=
'The model architecture.'
)
parser
.
add_argument
(
'--batch_size'
,
type
=
int
,
default
=
32
,
help
=
'The minibatch size.'
)
parser
.
add_argument
(
'--use_fake_data'
,
action
=
'store_true'
,
help
=
'use real data or fake data'
)
parser
.
add_argument
(
'--skip_batch_num'
,
type
=
int
,
default
=
5
,
help
=
'The first num of minibatch num to skip, for better performance test'
)
parser
.
add_argument
(
'--iterations'
,
type
=
int
,
default
=
80
,
help
=
'The number of minibatches.'
)
parser
.
add_argument
(
'--pass_num'
,
type
=
int
,
default
=
100
,
help
=
'The number of passes.'
)
parser
.
add_argument
(
'--data_format'
,
type
=
str
,
default
=
'NCHW'
,
choices
=
[
'NCHW'
,
'NHWC'
],
help
=
'The data data_format, now only support NCHW.'
)
parser
.
add_argument
(
'--device'
,
type
=
str
,
default
=
'GPU'
,
choices
=
[
'CPU'
,
'GPU'
],
help
=
'The device type.'
)
parser
.
add_argument
(
'--data_set'
,
type
=
str
,
default
=
'flowers'
,
choices
=
[
'cifar10'
,
'flowers'
],
help
=
'Optional dataset for benchmark.'
)
parser
.
add_argument
(
'--infer_only'
,
action
=
'store_true'
,
help
=
'If set, run forward only.'
)
parser
.
add_argument
(
'--use_cprof'
,
action
=
'store_true'
,
help
=
'If set, use cProfile.'
)
parser
.
add_argument
(
'--use_nvprof'
,
action
=
'store_true'
,
help
=
'If set, use nvprof for CUDA.'
)
parser
.
add_argument
(
'--with_test'
,
action
=
'store_true'
,
help
=
'If set, test the testset during training.'
)
args
=
parser
.
parse_args
()
return
args
def
print_arguments
(
args
):
vars
(
args
)[
'use_nvprof'
]
=
(
vars
(
args
)[
'use_nvprof'
]
and
vars
(
args
)[
'device'
]
==
'GPU'
)
print
(
'----------- Configuration Arguments -----------'
)
for
arg
,
value
in
sorted
(
vars
(
args
).
iteritems
()):
print
(
'%s: %s'
%
(
arg
,
value
))
print
(
'------------------------------------------------'
)
def
conv_bn_layer
(
input
,
ch_out
,
filter_size
,
stride
,
padding
,
act
=
'relu'
):
conv1
=
fluid
.
layers
.
conv2d
(
input
=
input
,
filter_size
=
filter_size
,
num_filters
=
ch_out
,
stride
=
stride
,
padding
=
padding
,
act
=
None
,
bias_attr
=
False
)
return
fluid
.
layers
.
batch_norm
(
input
=
conv1
,
act
=
act
)
def
shortcut
(
input
,
ch_out
,
stride
):
ch_in
=
input
.
shape
[
1
]
if
args
.
data_format
==
'NCHW'
else
input
.
shape
[
-
1
]
if
ch_in
!=
ch_out
:
return
conv_bn_layer
(
input
,
ch_out
,
1
,
stride
,
0
,
None
)
else
:
return
input
def
basicblock
(
input
,
ch_out
,
stride
):
short
=
shortcut
(
input
,
ch_out
,
stride
)
conv1
=
conv_bn_layer
(
input
,
ch_out
,
3
,
stride
,
1
)
conv2
=
conv_bn_layer
(
conv1
,
ch_out
,
3
,
1
,
1
,
act
=
None
)
return
fluid
.
layers
.
elementwise_add
(
x
=
short
,
y
=
conv2
,
act
=
'relu'
)
def
bottleneck
(
input
,
ch_out
,
stride
):
short
=
shortcut
(
input
,
ch_out
*
4
,
stride
)
conv1
=
conv_bn_layer
(
input
,
ch_out
,
1
,
stride
,
0
)
conv2
=
conv_bn_layer
(
conv1
,
ch_out
,
3
,
1
,
1
)
conv3
=
conv_bn_layer
(
conv2
,
ch_out
*
4
,
1
,
1
,
0
,
act
=
None
)
return
fluid
.
layers
.
elementwise_add
(
x
=
short
,
y
=
conv3
,
act
=
'relu'
)
def
layer_warp
(
block_func
,
input
,
ch_out
,
count
,
stride
):
res_out
=
block_func
(
input
,
ch_out
,
stride
)
for
i
in
range
(
1
,
count
):
res_out
=
block_func
(
res_out
,
ch_out
,
1
)
return
res_out
def
resnet_imagenet
(
input
,
class_dim
,
depth
=
50
,
data_format
=
'NCHW'
):
cfg
=
{
18
:
([
2
,
2
,
2
,
1
],
basicblock
),
34
:
([
3
,
4
,
6
,
3
],
basicblock
),
50
:
([
3
,
4
,
6
,
3
],
bottleneck
),
101
:
([
3
,
4
,
23
,
3
],
bottleneck
),
152
:
([
3
,
8
,
36
,
3
],
bottleneck
)
}
stages
,
block_func
=
cfg
[
depth
]
conv1
=
conv_bn_layer
(
input
,
ch_out
=
64
,
filter_size
=
7
,
stride
=
2
,
padding
=
3
)
pool1
=
fluid
.
layers
.
pool2d
(
input
=
conv1
,
pool_type
=
'avg'
,
pool_size
=
3
,
pool_stride
=
2
)
res1
=
layer_warp
(
block_func
,
pool1
,
64
,
stages
[
0
],
1
)
res2
=
layer_warp
(
block_func
,
res1
,
128
,
stages
[
1
],
2
)
res3
=
layer_warp
(
block_func
,
res2
,
256
,
stages
[
2
],
2
)
res4
=
layer_warp
(
block_func
,
res3
,
512
,
stages
[
3
],
2
)
pool2
=
fluid
.
layers
.
pool2d
(
input
=
res4
,
pool_size
=
7
,
pool_type
=
'avg'
,
pool_stride
=
1
,
global_pooling
=
True
)
out
=
fluid
.
layers
.
fc
(
input
=
pool2
,
size
=
class_dim
,
act
=
'softmax'
)
return
out
def
resnet_cifar10
(
input
,
class_dim
,
depth
=
32
,
data_format
=
'NCHW'
):
assert
(
depth
-
2
)
%
6
==
0
n
=
(
depth
-
2
)
//
6
conv1
=
conv_bn_layer
(
input
=
input
,
ch_out
=
16
,
filter_size
=
3
,
stride
=
1
,
padding
=
1
)
res1
=
layer_warp
(
basicblock
,
conv1
,
16
,
n
,
1
)
res2
=
layer_warp
(
basicblock
,
res1
,
32
,
n
,
2
)
res3
=
layer_warp
(
basicblock
,
res2
,
64
,
n
,
2
)
pool
=
fluid
.
layers
.
pool2d
(
input
=
res3
,
pool_size
=
8
,
pool_type
=
'avg'
,
pool_stride
=
1
)
out
=
fluid
.
layers
.
fc
(
input
=
pool
,
size
=
class_dim
,
act
=
'softmax'
)
return
out
def
run_benchmark
(
model
,
args
):
if
args
.
use_cprof
:
pr
=
cProfile
.
Profile
()
pr
.
enable
()
if
args
.
data_set
==
"cifar10"
:
class_dim
=
10
if
args
.
data_format
==
'NCHW'
:
dshape
=
[
3
,
32
,
32
]
else
:
dshape
=
[
32
,
32
,
3
]
else
:
class_dim
=
102
if
args
.
data_format
==
'NCHW'
:
dshape
=
[
3
,
224
,
224
]
else
:
dshape
=
[
224
,
224
,
3
]
input
=
fluid
.
layers
.
data
(
name
=
'data'
,
shape
=
dshape
,
dtype
=
'float32'
)
label
=
fluid
.
layers
.
data
(
name
=
'label'
,
shape
=
[
1
],
dtype
=
'int64'
)
predict
=
model
(
input
,
class_dim
)
cost
=
fluid
.
layers
.
cross_entropy
(
input
=
predict
,
label
=
label
)
avg_cost
=
fluid
.
layers
.
mean
(
x
=
cost
)
batch_size_tensor
=
fluid
.
layers
.
create_tensor
(
dtype
=
'int64'
)
batch_acc
=
fluid
.
layers
.
accuracy
(
input
=
predict
,
label
=
label
,
total
=
batch_size_tensor
)
inference_program
=
fluid
.
default_main_program
().
clone
()
with
fluid
.
program_guard
(
inference_program
):
inference_program
=
fluid
.
io
.
get_inference_program
(
target_vars
=
[
batch_acc
,
batch_size_tensor
])
optimizer
=
fluid
.
optimizer
.
Momentum
(
learning_rate
=
0.01
,
momentum
=
0.9
)
opts
=
optimizer
.
minimize
(
avg_cost
)
fluid
.
memory_optimize
(
fluid
.
default_main_program
())
train_reader
=
paddle
.
batch
(
paddle
.
reader
.
shuffle
(
paddle
.
dataset
.
cifar
.
train10
()
if
args
.
data_set
==
'cifar10'
else
paddle
.
dataset
.
flowers
.
train
(),
buf_size
=
5120
),
batch_size
=
args
.
batch_size
)
test_reader
=
paddle
.
batch
(
paddle
.
dataset
.
cifar
.
test10
()
if
args
.
data_set
==
'cifar10'
else
paddle
.
dataset
.
flowers
.
test
(),
batch_size
=
args
.
batch_size
)
def
test
(
exe
):
test_accuracy
=
fluid
.
average
.
WeightedAverage
()
for
batch_id
,
data
in
enumerate
(
test_reader
()):
img_data
=
np
.
array
(
map
(
lambda
x
:
x
[
0
].
reshape
(
dshape
),
data
)).
astype
(
"float32"
)
y_data
=
np
.
array
(
map
(
lambda
x
:
x
[
1
],
data
)).
astype
(
"int64"
)
y_data
=
y_data
.
reshape
([
-
1
,
1
])
acc
,
weight
=
exe
.
run
(
inference_program
,
feed
=
{
"data"
:
img_data
,
"label"
:
y_data
},
fetch_list
=
[
batch_acc
,
batch_size_tensor
])
test_accuracy
.
add
(
value
=
acc
,
weight
=
weight
)
return
test_accuracy
.
eval
()
place
=
core
.
CPUPlace
()
if
args
.
device
==
'CPU'
else
core
.
CUDAPlace
(
0
)
exe
=
fluid
.
Executor
(
place
)
exe
.
run
(
fluid
.
default_startup_program
())
accuracy
=
fluid
.
average
.
WeightedAverage
()
if
args
.
use_fake_data
:
data
=
train_reader
().
next
()
image
=
np
.
array
(
map
(
lambda
x
:
x
[
0
].
reshape
(
dshape
),
data
)).
astype
(
'float32'
)
label
=
np
.
array
(
map
(
lambda
x
:
x
[
1
],
data
)).
astype
(
'int64'
)
label
=
label
.
reshape
([
-
1
,
1
])
iters
,
num_samples
,
start_time
=
0
,
0
,
time
.
time
()
for
pass_id
in
range
(
args
.
pass_num
):
accuracy
.
reset
()
train_accs
=
[]
train_losses
=
[]
for
batch_id
,
data
in
enumerate
(
train_reader
()):
if
iters
==
args
.
skip_batch_num
:
start_time
=
time
.
time
()
num_samples
=
0
if
iters
==
args
.
iterations
:
break
if
not
args
.
use_fake_data
:
image
=
np
.
array
(
map
(
lambda
x
:
x
[
0
].
reshape
(
dshape
),
data
)).
astype
(
'float32'
)
label
=
np
.
array
(
map
(
lambda
x
:
x
[
1
],
data
)).
astype
(
'int64'
)
label
=
label
.
reshape
([
-
1
,
1
])
loss
,
acc
,
weight
=
exe
.
run
(
fluid
.
default_main_program
(),
feed
=
{
'data'
:
image
,
'label'
:
label
},
fetch_list
=
[
avg_cost
,
batch_acc
,
batch_size_tensor
])
iters
+=
1
num_samples
+=
label
[
0
]
accuracy
.
add
(
value
=
acc
,
weight
=
weight
)
train_losses
.
append
(
loss
)
train_accs
.
append
(
acc
)
print
(
"Pass: %d, Iter: %d, Loss: %f, Accuracy: %f"
%
(
pass_id
,
iters
,
loss
,
acc
))
pass_train_acc
=
accuracy
.
eval
()
# evaluation
if
args
.
with_test
:
pass_test_acc
=
test
(
exe
)
train_elapsed
=
time
.
time
()
-
start_time
print
(
"Pass: %d, Loss: %f, Train Accuray: %f
\n
"
%
(
pass_id
,
np
.
mean
(
train_losses
),
np
.
mean
(
train_accs
)))
examples_per_sec
=
num_samples
/
train_elapsed
print
(
'
\n
Total examples: %d, total time: %.5f, %.5f examples/sed
\n
'
%
(
num_samples
,
train_elapsed
,
examples_per_sec
))
if
args
.
use_cprof
:
pr
.
disable
()
s
=
StringIO
.
StringIO
()
sortby
=
'cumulative'
ps
=
pstats
.
Stats
(
pr
,
stream
=
s
).
sort_stats
(
sortby
)
ps
.
print_stats
()
print
(
s
.
getvalue
())
if
__name__
==
'__main__'
:
model_map
=
{
'resnet_imagenet'
:
resnet_imagenet
,
'resnet_cifar10'
:
resnet_cifar10
}
args
=
parse_args
()
print_arguments
(
args
)
if
args
.
data_format
==
'NHWC'
:
raise
ValueError
(
'Only support NCHW data_format now.'
)
if
args
.
use_nvprof
and
args
.
device
==
'GPU'
:
with
profiler
.
cuda_profiler
(
"cuda_profiler.txt"
,
'csv'
)
as
nvprof
:
run_benchmark
(
model_map
[
args
.
model
],
args
)
else
:
run_benchmark
(
model_map
[
args
.
model
],
args
)
benchmark/fluid/run.sh
0 → 100644
浏览文件 @
172c887d
#!/bin/bash
# This script benchmarking the PaddlePaddle Fluid on
# single thread single GPU.
export
CUDNN_PATH
=
/paddle/cudnn_v5/cuda/lib
# disable openmp and mkl parallel
#https://github.com/PaddlePaddle/Paddle/issues/7199
export
MKL_NUM_THREADS
=
1
export
OMP_NUM_THREADS
=
1
ht
=
`
lscpu |grep
"per core"
|awk
-F
':'
'{print $2}'
|xargs
`
if
[
$ht
-eq
1
]
;
then
# HT is OFF
if
[
-z
"
$KMP_AFFINITY
"
]
;
then
export
KMP_AFFINITY
=
"granularity=fine,compact,0,0"
fi
if
[
-z
"
$OMP_DYNAMIC
"
]
;
then
export
OMP_DYNAMIC
=
"FALSE"
fi
else
# HT is ON
if
[
-z
"
$KMP_AFFINITY
"
]
;
then
export
KMP_AFFINITY
=
"granularity=fine,compact,1,0"
fi
fi
# disable multi-gpu if have more than one
export
CUDA_VISIBLE_DEVICES
=
0
export
LD_LIBRARY_PATH
=
/usr/local/lib:
$LD_LIBRARY_PATH
export
LD_LIBRARY_PATH
=
$CUDNN_PATH
:
$LD_LIBRARY_PATH
# vgg16
# cifar10 gpu cifar10 128
FLAGS_benchmark
=
true
python fluid/vgg.py
\
--device
=
GPU
\
--batch_size
=
128
\
--skip_batch_num
=
5
\
--iterations
=
30
\
2>&1
>
vgg16_gpu_128.log
# resnet50
# resnet50 gpu cifar10 128
FLAGS_benchmark
=
true
python fluid/resnet.py
\
--device
=
GPU
\
--batch_size
=
128
\
--data_set
=
cifar10
\
--model
=
resnet_cifar10
\
--skip_batch_num
=
5
\
--iterations
=
30
\
2>&1
>
resnet50_gpu_128.log
# lstm
benchmark/fluid/stacked_dynamic_lstm.py
0 → 100644
浏览文件 @
172c887d
# Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
from
__future__
import
absolute_import
from
__future__
import
division
from
__future__
import
print_function
import
argparse
import
cPickle
import
os
import
random
import
time
import
numpy
import
paddle.v2
as
paddle
import
paddle.v2.dataset.imdb
as
imdb
import
paddle.fluid
as
fluid
from
paddle.v2
import
batch
import
paddle.fluid.profiler
as
profiler
def
parse_args
():
parser
=
argparse
.
ArgumentParser
(
"Understand Sentiment by Dynamic RNN."
)
parser
.
add_argument
(
'--batch_size'
,
type
=
int
,
default
=
32
,
help
=
'The sequence number of a batch data. (default: %(default)d)'
)
parser
.
add_argument
(
'--emb_dim'
,
type
=
int
,
default
=
512
,
help
=
'Dimension of embedding table. (default: %(default)d)'
)
parser
.
add_argument
(
'--hidden_dim'
,
type
=
int
,
default
=
512
,
help
=
'Hidden size of lstm unit. (default: %(default)d)'
)
parser
.
add_argument
(
'--pass_num'
,
type
=
int
,
default
=
100
,
help
=
'Epoch number to train. (default: %(default)d)'
)
parser
.
add_argument
(
'--device'
,
type
=
str
,
default
=
'CPU'
,
choices
=
[
'CPU'
,
'GPU'
],
help
=
'The device type.'
)
parser
.
add_argument
(
'--crop_size'
,
type
=
int
,
default
=
int
(
os
.
environ
.
get
(
'CROP_SIZE'
,
'1500'
)),
help
=
'The max sentence length of input. Since this model use plain RNN,'
' Gradient could be explored if sentence is too long'
)
args
=
parser
.
parse_args
()
return
args
word_dict
=
imdb
.
word_dict
()
def
crop_sentence
(
reader
,
crop_size
):
unk_value
=
word_dict
[
'<unk>'
]
def
__impl__
():
for
item
in
reader
():
if
len
([
x
for
x
in
item
[
0
]
if
x
!=
unk_value
])
<
crop_size
:
yield
item
return
__impl__
def
main
():
args
=
parse_args
()
lstm_size
=
args
.
hidden_dim
data
=
fluid
.
layers
.
data
(
name
=
"words"
,
shape
=
[
1
],
lod_level
=
1
,
dtype
=
'int64'
)
sentence
=
fluid
.
layers
.
embedding
(
input
=
data
,
size
=
[
len
(
word_dict
),
args
.
emb_dim
])
sentence
=
fluid
.
layers
.
fc
(
input
=
sentence
,
size
=
lstm_size
,
act
=
'tanh'
)
rnn
=
fluid
.
layers
.
DynamicRNN
()
with
rnn
.
block
():
word
=
rnn
.
step_input
(
sentence
)
prev_hidden
=
rnn
.
memory
(
value
=
0.0
,
shape
=
[
lstm_size
])
prev_cell
=
rnn
.
memory
(
value
=
0.0
,
shape
=
[
lstm_size
])
def
gate_common
(
ipt
,
hidden
,
size
,
):
gate0
=
fluid
.
layers
.
fc
(
input
=
ipt
,
size
=
size
,
bias_attr
=
True
)
gate1
=
fluid
.
layers
.
fc
(
input
=
hidden
,
size
=
size
,
bias_attr
=
False
)
gate
=
fluid
.
layers
.
sums
(
input
=
[
gate0
,
gate1
])
return
gate
forget_gate
=
fluid
.
layers
.
sigmoid
(
x
=
gate_common
(
word
,
prev_hidden
,
lstm_size
))
input_gate
=
fluid
.
layers
.
sigmoid
(
x
=
gate_common
(
word
,
prev_hidden
,
lstm_size
))
output_gate
=
fluid
.
layers
.
sigmoid
(
x
=
gate_common
(
word
,
prev_hidden
,
lstm_size
))
cell_gate
=
fluid
.
layers
.
tanh
(
x
=
gate_common
(
word
,
prev_hidden
,
lstm_size
))
cell
=
fluid
.
layers
.
sums
(
input
=
[
fluid
.
layers
.
elementwise_mul
(
x
=
forget_gate
,
y
=
prev_cell
),
fluid
.
layers
.
elementwise_mul
(
x
=
input_gate
,
y
=
cell_gate
)
])
hidden
=
fluid
.
layers
.
elementwise_mul
(
x
=
output_gate
,
y
=
fluid
.
layers
.
tanh
(
x
=
cell
))
rnn
.
update_memory
(
prev_cell
,
cell
)
rnn
.
update_memory
(
prev_hidden
,
hidden
)
rnn
.
output
(
hidden
)
last
=
fluid
.
layers
.
sequence_pool
(
rnn
(),
'last'
)
logit
=
fluid
.
layers
.
fc
(
input
=
last
,
size
=
2
,
act
=
'softmax'
)
loss
=
fluid
.
layers
.
cross_entropy
(
input
=
logit
,
label
=
fluid
.
layers
.
data
(
name
=
'label'
,
shape
=
[
1
],
dtype
=
'int64'
))
loss
=
fluid
.
layers
.
mean
(
x
=
loss
)
# add acc
batch_size_tensor
=
fluid
.
layers
.
create_tensor
(
dtype
=
'int64'
)
batch_acc
=
fluid
.
layers
.
accuracy
(
input
=
logit
,
label
=
fluid
.
layers
.
data
(
name
=
'label'
,
\
shape
=
[
1
],
dtype
=
'int64'
),
total
=
batch_size_tensor
)
inference_program
=
fluid
.
default_main_program
().
clone
()
with
fluid
.
program_guard
(
inference_program
):
inference_program
=
fluid
.
io
.
get_inference_program
(
target_vars
=
[
batch_acc
,
batch_size_tensor
])
adam
=
fluid
.
optimizer
.
Adam
()
adam
.
minimize
(
loss
)
fluid
.
memory_optimize
(
fluid
.
default_main_program
())
place
=
fluid
.
CPUPlace
()
if
args
.
device
==
'CPU'
else
fluid
.
CUDAPlace
(
0
)
exe
=
fluid
.
Executor
(
place
)
exe
.
run
(
fluid
.
default_startup_program
())
def
train_loop
(
pass_num
,
crop_size
):
with
profiler
.
profiler
(
args
.
device
,
'total'
)
as
prof
:
for
pass_id
in
range
(
pass_num
):
train_reader
=
batch
(
paddle
.
reader
.
shuffle
(
crop_sentence
(
imdb
.
train
(
word_dict
),
crop_size
),
buf_size
=
25000
),
batch_size
=
args
.
batch_size
)
word_nums
=
0
pass_start_time
=
time
.
time
()
for
batch_id
,
data
in
enumerate
(
train_reader
()):
tensor_words
=
to_lodtensor
([
x
[
0
]
for
x
in
data
],
place
)
for
x
in
data
:
word_nums
+=
len
(
x
[
0
])
label
=
numpy
.
array
([
x
[
1
]
for
x
in
data
]).
astype
(
"int64"
)
label
=
label
.
reshape
((
-
1
,
1
))
loss_np
,
acc
,
weight
=
exe
.
run
(
fluid
.
default_main_program
(),
feed
=
{
"words"
:
tensor_words
,
"label"
:
label
},
fetch_list
=
[
loss
,
batch_acc
,
batch_size_tensor
])
print
(
"pass_id=%d, batch_id=%d, loss=%f, acc=%f"
%
(
pass_id
,
batch_id
,
loss_np
,
acc
))
pass_end_time
=
time
.
time
()
time_consumed
=
pass_end_time
-
pass_start_time
words_per_sec
=
word_nums
/
time_consumed
print
(
"pass_id=%d, sec/pass: %f, words/s: %f"
%
(
pass_id
,
time_consumed
,
words_per_sec
))
train_loop
(
args
.
pass_num
,
args
.
crop_size
)
def
to_lodtensor
(
data
,
place
):
seq_lens
=
[
len
(
seq
)
for
seq
in
data
]
cur_len
=
0
lod
=
[
cur_len
]
for
l
in
seq_lens
:
cur_len
+=
l
lod
.
append
(
cur_len
)
flattened_data
=
numpy
.
concatenate
(
data
,
axis
=
0
).
astype
(
"int64"
)
flattened_data
=
flattened_data
.
reshape
([
len
(
flattened_data
),
1
])
res
=
fluid
.
LoDTensor
()
res
.
set
(
flattened_data
,
place
)
res
.
set_lod
([
lod
])
return
res
if
__name__
==
'__main__'
:
main
()
benchmark/fluid/vgg.py
0 → 100644
浏览文件 @
172c887d
# Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
"""VGG16 benchmark in Fluid"""
from
__future__
import
print_function
import
sys
import
time
import
numpy
as
np
import
paddle.v2
as
paddle
import
paddle.fluid
as
fluid
import
paddle.fluid.core
as
core
import
argparse
import
functools
parser
=
argparse
.
ArgumentParser
(
description
=
__doc__
)
parser
.
add_argument
(
'--batch_size'
,
type
=
int
,
default
=
128
,
help
=
"Batch size for training."
)
parser
.
add_argument
(
'--skip_batch_num'
,
type
=
int
,
default
=
5
,
help
=
'The first num of minibatch num to skip, for better performance test'
)
parser
.
add_argument
(
'--iterations'
,
type
=
int
,
default
=
80
,
help
=
'The number of minibatches.'
)
parser
.
add_argument
(
'--learning_rate'
,
type
=
float
,
default
=
1e-3
,
help
=
"Learning rate for training."
)
parser
.
add_argument
(
'--pass_num'
,
type
=
int
,
default
=
50
,
help
=
"No. of passes."
)
parser
.
add_argument
(
'--device'
,
type
=
str
,
default
=
'GPU'
,
choices
=
[
'CPU'
,
'GPU'
],
help
=
"The device type."
)
parser
.
add_argument
(
'--data_format'
,
type
=
str
,
default
=
'NCHW'
,
choices
=
[
'NCHW'
,
'NHWC'
],
help
=
'The data order, now only support NCHW.'
)
parser
.
add_argument
(
'--data_set'
,
type
=
str
,
default
=
'cifar10'
,
choices
=
[
'cifar10'
,
'flowers'
],
help
=
'Optional dataset for benchmark.'
)
parser
.
add_argument
(
'--with_test'
,
action
=
'store_true'
,
help
=
'If set, test the testset during training.'
)
args
=
parser
.
parse_args
()
def
vgg16_bn_drop
(
input
):
def
conv_block
(
input
,
num_filter
,
groups
,
dropouts
):
return
fluid
.
nets
.
img_conv_group
(
input
=
input
,
pool_size
=
2
,
pool_stride
=
2
,
conv_num_filter
=
[
num_filter
]
*
groups
,
conv_filter_size
=
3
,
conv_act
=
'relu'
,
conv_with_batchnorm
=
True
,
conv_batchnorm_drop_rate
=
dropouts
,
pool_type
=
'max'
)
conv1
=
conv_block
(
input
,
64
,
2
,
[
0.3
,
0
])
conv2
=
conv_block
(
conv1
,
128
,
2
,
[
0.4
,
0
])
conv3
=
conv_block
(
conv2
,
256
,
3
,
[
0.4
,
0.4
,
0
])
conv4
=
conv_block
(
conv3
,
512
,
3
,
[
0.4
,
0.4
,
0
])
conv5
=
conv_block
(
conv4
,
512
,
3
,
[
0.4
,
0.4
,
0
])
drop
=
fluid
.
layers
.
dropout
(
x
=
conv5
,
dropout_prob
=
0.5
)
fc1
=
fluid
.
layers
.
fc
(
input
=
drop
,
size
=
512
,
act
=
None
)
bn
=
fluid
.
layers
.
batch_norm
(
input
=
fc1
,
act
=
'relu'
)
drop2
=
fluid
.
layers
.
dropout
(
x
=
bn
,
dropout_prob
=
0.5
)
fc2
=
fluid
.
layers
.
fc
(
input
=
drop2
,
size
=
512
,
act
=
None
)
return
fc2
def
main
():
if
args
.
data_set
==
"cifar10"
:
classdim
=
10
if
args
.
data_format
==
'NCHW'
:
data_shape
=
[
3
,
32
,
32
]
else
:
data_shape
=
[
32
,
32
,
3
]
else
:
classdim
=
102
if
args
.
data_format
==
'NCHW'
:
data_shape
=
[
3
,
224
,
224
]
else
:
data_shape
=
[
224
,
224
,
3
]
# Input data
images
=
fluid
.
layers
.
data
(
name
=
'pixel'
,
shape
=
data_shape
,
dtype
=
'float32'
)
label
=
fluid
.
layers
.
data
(
name
=
'label'
,
shape
=
[
1
],
dtype
=
'int64'
)
# Train program
net
=
vgg16_bn_drop
(
images
)
predict
=
fluid
.
layers
.
fc
(
input
=
net
,
size
=
classdim
,
act
=
'softmax'
)
cost
=
fluid
.
layers
.
cross_entropy
(
input
=
predict
,
label
=
label
)
avg_cost
=
fluid
.
layers
.
mean
(
x
=
cost
)
# Evaluator
batch_size_tensor
=
fluid
.
layers
.
create_tensor
(
dtype
=
'int64'
)
batch_acc
=
fluid
.
layers
.
accuracy
(
input
=
predict
,
label
=
label
,
total
=
batch_size_tensor
)
# inference program
inference_program
=
fluid
.
default_main_program
().
clone
()
with
fluid
.
program_guard
(
inference_program
):
inference_program
=
fluid
.
io
.
get_inference_program
(
target_vars
=
[
batch_acc
,
batch_size_tensor
])
# Optimization
optimizer
=
fluid
.
optimizer
.
Adam
(
learning_rate
=
args
.
learning_rate
)
opts
=
optimizer
.
minimize
(
avg_cost
)
fluid
.
memory_optimize
(
fluid
.
default_main_program
())
# Initialize executor
place
=
core
.
CPUPlace
()
if
args
.
device
==
'CPU'
else
core
.
CUDAPlace
(
0
)
exe
=
fluid
.
Executor
(
place
)
# Parameter initialization
exe
.
run
(
fluid
.
default_startup_program
())
# data reader
train_reader
=
paddle
.
batch
(
paddle
.
reader
.
shuffle
(
paddle
.
dataset
.
cifar
.
train10
()
if
args
.
data_set
==
'cifar10'
else
paddle
.
dataset
.
flowers
.
train
(),
buf_size
=
5120
),
batch_size
=
args
.
batch_size
)
test_reader
=
paddle
.
batch
(
paddle
.
dataset
.
cifar
.
test10
()
if
args
.
data_set
==
'cifar10'
else
paddle
.
dataset
.
flowers
.
test
(),
batch_size
=
args
.
batch_size
)
# test
def
test
(
exe
):
test_accuracy
=
fluid
.
average
.
WeightedAverage
()
for
batch_id
,
data
in
enumerate
(
test_reader
()):
img_data
=
np
.
array
(
map
(
lambda
x
:
x
[
0
].
reshape
(
data_shape
),
data
)).
astype
(
"float32"
)
y_data
=
np
.
array
(
map
(
lambda
x
:
x
[
1
],
data
)).
astype
(
"int64"
)
y_data
=
y_data
.
reshape
([
-
1
,
1
])
acc
,
weight
=
exe
.
run
(
inference_program
,
feed
=
{
"pixel"
:
img_data
,
"label"
:
y_data
},
fetch_list
=
[
batch_acc
,
batch_size_tensor
])
test_accuracy
.
add
(
value
=
acc
,
weight
=
weight
)
return
test_accuracy
.
eval
()
iters
,
num_samples
,
start_time
=
0
,
0
,
time
.
time
()
accuracy
=
fluid
.
average
.
WeightedAverage
()
for
pass_id
in
range
(
args
.
pass_num
):
accuracy
.
reset
()
train_accs
=
[]
train_losses
=
[]
for
batch_id
,
data
in
enumerate
(
train_reader
()):
if
iters
==
args
.
skip_batch_num
:
start_time
=
time
.
time
()
num_samples
=
0
if
iters
==
args
.
iterations
:
break
img_data
=
np
.
array
(
map
(
lambda
x
:
x
[
0
].
reshape
(
data_shape
),
data
)).
astype
(
"float32"
)
y_data
=
np
.
array
(
map
(
lambda
x
:
x
[
1
],
data
)).
astype
(
"int64"
)
y_data
=
y_data
.
reshape
([
-
1
,
1
])
loss
,
acc
,
weight
=
exe
.
run
(
fluid
.
default_main_program
(),
feed
=
{
"pixel"
:
img_data
,
"label"
:
y_data
},
fetch_list
=
[
avg_cost
,
batch_acc
,
batch_size_tensor
])
accuracy
.
add
(
value
=
acc
,
weight
=
weight
)
iters
+=
1
num_samples
+=
len
(
data
)
print
(
"Pass = %d, Iter = %d, Loss = %f, Accuracy = %f"
%
(
pass_id
,
iters
,
loss
,
acc
)
)
# The accuracy is the accumulation of batches, but not the current batch.
pass_train_acc
=
accuracy
.
eval
()
train_losses
.
append
(
loss
)
train_accs
.
append
(
acc
)
# evaluation
if
args
.
with_test
:
pass_test_acc
=
test
(
exe
)
train_elapsed
=
time
.
time
()
-
start_time
print
(
"Pass: %d, Loss: %f, Train Accuray: %f
\n
"
%
(
pass_id
,
np
.
mean
(
train_losses
),
np
.
mean
(
train_accs
)))
def
print_arguments
():
print
(
'----------- Configuration Arguments -----------'
)
for
arg
,
value
in
sorted
(
vars
(
args
).
iteritems
()):
print
(
'%s: %s'
%
(
arg
,
value
))
print
(
'------------------------------------------------'
)
if
__name__
==
"__main__"
:
print_arguments
()
main
()
编辑
预览
Markdown
is supported
0%
请重试
或
添加新附件
.
添加附件
取消
You are about to add
0
people
to the discussion. Proceed with caution.
先完成此消息的编辑!
取消
想要评论请
注册
或
登录