Skip to content
体验新版
项目
组织
正在加载...
登录
切换导航
打开侧边栏
PaddlePaddle
models
提交
b0239e3a
M
models
项目概览
PaddlePaddle
/
models
大约 1 年 前同步成功
通知
222
Star
6828
Fork
2962
代码
文件
提交
分支
Tags
贡献者
分支图
Diff
Issue
602
列表
看板
标记
里程碑
合并请求
255
Wiki
0
Wiki
分析
仓库
DevOps
项目成员
Pages
M
models
项目概览
项目概览
详情
发布
仓库
仓库
文件
提交
分支
标签
贡献者
分支图
比较
Issue
602
Issue
602
列表
看板
标记
里程碑
合并请求
255
合并请求
255
Pages
分析
分析
仓库分析
DevOps
Wiki
0
Wiki
成员
成员
收起侧边栏
关闭侧边栏
动态
分支图
创建新Issue
提交
Issue看板
未验证
提交
b0239e3a
编写于
5月 28, 2020
作者:
C
Chen Weihang
提交者:
GitHub
5月 28, 2020
浏览文件
操作
浏览文件
下载
电子邮件补丁
差异文件
change some model using data loader (#4595)
上级
edf1a872
变更
7
隐藏空白更改
内联
并排
Showing
7 changed file
with
585 addition
and
599 deletion
+585
-599
dygraph/mnist/train.py
dygraph/mnist/train.py
+36
-25
dygraph/mobilenet/reader.py
dygraph/mobilenet/reader.py
+1
-1
dygraph/mobilenet/train.py
dygraph/mobilenet/train.py
+2
-6
dygraph/mobilenet/utils/utility.py
dygraph/mobilenet/utils/utility.py
+2
-20
dygraph/ptb_lm/ptb_dy.py
dygraph/ptb_lm/ptb_dy.py
+474
-461
dygraph/resnet/train.py
dygraph/resnet/train.py
+38
-47
dygraph/se_resnet/train.py
dygraph/se_resnet/train.py
+32
-39
未找到文件。
dygraph/mnist/train.py
浏览文件 @
b0239e3a
...
...
@@ -99,11 +99,13 @@ class MNIST(fluid.dygraph.Layer):
self
.
pool_2_shape
=
50
*
4
*
4
SIZE
=
10
scale
=
(
2.0
/
(
self
.
pool_2_shape
**
2
*
SIZE
))
**
0.5
self
.
_fc
=
Linear
(
self
.
pool_2_shape
,
10
,
param_attr
=
fluid
.
param_attr
.
ParamAttr
(
initializer
=
fluid
.
initializer
.
NormalInitializer
(
loc
=
0.0
,
scale
=
scale
)),
act
=
"softmax"
)
self
.
_fc
=
Linear
(
self
.
pool_2_shape
,
10
,
param_attr
=
fluid
.
param_attr
.
ParamAttr
(
initializer
=
fluid
.
initializer
.
NormalInitializer
(
loc
=
0.0
,
scale
=
scale
)),
act
=
"softmax"
)
def
forward
(
self
,
inputs
,
label
=
None
):
x
=
self
.
_simple_img_conv_pool_1
(
inputs
)
...
...
@@ -117,17 +119,21 @@ class MNIST(fluid.dygraph.Layer):
return
x
def
reader_decorator
(
reader
):
def
__reader__
():
for
item
in
reader
():
img
=
np
.
array
(
item
[
0
]).
astype
(
'float32'
).
reshape
(
1
,
28
,
28
)
label
=
np
.
array
(
item
[
1
]).
astype
(
'int64'
).
reshape
(
1
)
yield
img
,
label
return
__reader__
def
test_mnist
(
reader
,
model
,
batch_size
):
acc_set
=
[]
avg_loss_set
=
[]
for
batch_id
,
data
in
enumerate
(
reader
()):
dy_x_data
=
np
.
array
([
x
[
0
].
reshape
(
1
,
28
,
28
)
for
x
in
data
]).
astype
(
'float32'
)
y_data
=
np
.
array
(
[
x
[
1
]
for
x
in
data
]).
astype
(
'int64'
).
reshape
(
batch_size
,
1
)
img
=
to_variable
(
dy_x_data
)
label
=
to_variable
(
y_data
)
img
,
label
=
data
label
.
stop_gradient
=
True
prediction
,
acc
=
model
(
img
,
label
)
loss
=
fluid
.
layers
.
cross_entropy
(
input
=
prediction
,
label
=
label
)
...
...
@@ -187,28 +193,33 @@ def train_mnist(args):
if
args
.
use_data_parallel
:
strategy
=
fluid
.
dygraph
.
parallel
.
prepare_context
()
mnist
=
MNIST
()
adam
=
AdamOptimizer
(
learning_rate
=
0.001
,
parameter_list
=
mnist
.
parameters
())
adam
=
AdamOptimizer
(
learning_rate
=
0.001
,
parameter_list
=
mnist
.
parameters
())
if
args
.
use_data_parallel
:
mnist
=
fluid
.
dygraph
.
parallel
.
DataParallel
(
mnist
,
strategy
)
train_reader
=
paddle
.
batch
(
paddle
.
dataset
.
mnist
.
train
(),
batch_size
=
BATCH_SIZE
,
drop_last
=
True
)
reader_decorator
(
paddle
.
dataset
.
mnist
.
train
()),
batch_size
=
BATCH_SIZE
,
drop_last
=
True
)
if
args
.
use_data_parallel
:
train_reader
=
fluid
.
contrib
.
reader
.
distributed_batch_reader
(
train_reader
)
test_reader
=
paddle
.
batch
(
paddle
.
dataset
.
mnist
.
test
(),
batch_size
=
BATCH_SIZE
,
drop_last
=
True
)
reader_decorator
(
paddle
.
dataset
.
mnist
.
test
()),
batch_size
=
BATCH_SIZE
,
drop_last
=
True
)
train_loader
=
fluid
.
io
.
DataLoader
.
from_generator
(
capacity
=
10
)
train_loader
.
set_sample_list_generator
(
train_reader
,
places
=
place
)
test_loader
=
fluid
.
io
.
DataLoader
.
from_generator
(
capacity
=
10
)
test_loader
.
set_sample_list_generator
(
test_reader
,
places
=
place
)
for
epoch
in
range
(
epoch_num
):
for
batch_id
,
data
in
enumerate
(
train_reader
()):
dy_x_data
=
np
.
array
([
x
[
0
].
reshape
(
1
,
28
,
28
)
for
x
in
data
]).
astype
(
'float32'
)
y_data
=
np
.
array
(
[
x
[
1
]
for
x
in
data
]).
astype
(
'int64'
).
reshape
(
-
1
,
1
)
img
=
to_variable
(
dy_x_data
)
label
=
to_variable
(
y_data
)
for
batch_id
,
data
in
enumerate
(
train_loader
()):
img
,
label
=
data
label
.
stop_gradient
=
True
cost
,
acc
=
mnist
(
img
,
label
)
...
...
@@ -231,7 +242,7 @@ def train_mnist(args):
epoch
,
batch_id
,
avg_loss
.
numpy
()))
mnist
.
eval
()
test_cost
,
test_acc
=
test_mnist
(
test_
re
ader
,
mnist
,
BATCH_SIZE
)
test_cost
,
test_acc
=
test_mnist
(
test_
lo
ader
,
mnist
,
BATCH_SIZE
)
mnist
.
train
()
if
args
.
ce
:
print
(
"kpis
\t
test_acc
\t
%s"
%
test_acc
)
...
...
@@ -244,7 +255,7 @@ def train_mnist(args):
fluid
.
dygraph
.
parallel
.
Env
().
local_rank
==
0
)
if
save_parameters
:
fluid
.
save_dygraph
(
mnist
.
state_dict
(),
"save_temp"
)
print
(
"checkpoint saved"
)
inference_mnist
()
...
...
dygraph/mobilenet/reader.py
浏览文件 @
b0239e3a
...
...
@@ -239,7 +239,7 @@ def process_image(sample, settings, mode, color_jitter, rotate):
img
/=
img_std
if
mode
==
'train'
or
mode
==
'val'
:
return
(
img
,
sample
[
1
])
return
(
img
,
[
sample
[
1
]
])
elif
mode
==
'test'
:
return
(
img
,
)
...
...
dygraph/mobilenet/train.py
浏览文件 @
b0239e3a
...
...
@@ -116,10 +116,8 @@ def train_mobilenet():
optimizer
.
set_dict
(
opti_dict
)
# 3. reader
train_data_loader
,
train_data
=
utility
.
create_data_loader
(
is_train
=
True
,
args
=
args
)
test_data_loader
,
test_data
=
utility
.
create_data_loader
(
is_train
=
False
,
args
=
args
)
train_data_loader
=
utility
.
create_data_loader
(
is_train
=
True
,
args
=
args
)
test_data_loader
=
utility
.
create_data_loader
(
is_train
=
False
,
args
=
args
)
num_trainers
=
int
(
os
.
environ
.
get
(
'PADDLE_TRAINERS_NUM'
,
1
))
imagenet_reader
=
reader
.
ImageNetReader
(
seed
=
0
,
place_num
=
place_num
)
train_reader
=
imagenet_reader
.
train
(
settings
=
args
)
...
...
@@ -145,8 +143,6 @@ def train_mobilenet():
t1
=
time
.
time
()
if
args
.
max_iter
and
total_batch_num
==
args
.
max_iter
:
return
label
=
to_variable
(
label
.
numpy
().
astype
(
'int64'
).
reshape
(
int
(
args
.
batch_size
//
place_num
),
1
))
t_start
=
time
.
time
()
# 4.1.1 call net()
...
...
dygraph/mobilenet/utils/utility.py
浏览文件 @
b0239e3a
...
...
@@ -309,32 +309,14 @@ def create_data_loader(is_train, args):
Returns:
data_loader and the input data of net,
"""
image_shape
=
[
int
(
m
)
for
m
in
args
.
image_shape
.
split
(
","
)]
feed_image
=
fluid
.
data
(
name
=
"feed_image"
,
shape
=
[
None
]
+
image_shape
,
dtype
=
"float32"
,
lod_level
=
0
)
feed_label
=
fluid
.
data
(
name
=
"feed_label"
,
shape
=
[
None
,
1
],
dtype
=
"int64"
,
lod_level
=
0
)
feed_y_a
=
fluid
.
data
(
name
=
"feed_y_a"
,
shape
=
[
None
,
1
],
dtype
=
"int64"
,
lod_level
=
0
)
if
is_train
and
args
.
use_mixup
:
feed_y_b
=
fluid
.
data
(
name
=
"feed_y_b"
,
shape
=
[
None
,
1
],
dtype
=
"int64"
,
lod_level
=
0
)
feed_lam
=
fluid
.
data
(
name
=
"feed_lam"
,
shape
=
[
None
,
1
],
dtype
=
"float32"
,
lod_level
=
0
)
data_loader
=
fluid
.
io
.
DataLoader
.
from_generator
(
capacity
=
64
,
use_double_buffer
=
True
,
iterable
=
True
,
return_list
=
True
)
return
data_loader
,
[
feed_image
,
feed_y_a
,
feed_y_b
,
feed_lam
]
return
data_loader
else
:
data_loader
=
fluid
.
io
.
DataLoader
.
from_generator
(
capacity
=
64
,
...
...
@@ -342,7 +324,7 @@ def create_data_loader(is_train, args):
iterable
=
True
,
return_list
=
True
)
return
data_loader
,
[
feed_image
,
feed_label
]
return
data_loader
def
print_info
(
pass_id
,
batch_id
,
print_step
,
metrics
,
time_info
,
info_mode
):
...
...
dygraph/ptb_lm/ptb_dy.py
浏览文件 @
b0239e3a
# Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
from
__future__
import
print_function
import
os
import
unittest
import
paddle.fluid
as
fluid
import
paddle.fluid.core
as
core
from
paddle.fluid.dygraph.nn
import
Embedding
import
paddle.fluid.framework
as
framework
from
paddle.fluid.optimizer
import
SGDOptimizer
from
paddle.fluid.dygraph.base
import
to_variable
import
numpy
as
np
import
six
import
multiprocessing
import
reader
import
model_check
import
time
from
args
import
*
#import fluid.clip as clip
#from fluid.clip import *
import
sys
if
sys
.
version
[
0
]
==
'2'
:
reload
(
sys
)
sys
.
setdefaultencoding
(
"utf-8"
)
class
SimpleLSTMRNN
(
fluid
.
Layer
):
def
__init__
(
self
,
hidden_size
,
num_steps
,
num_layers
=
2
,
init_scale
=
0.1
,
dropout
=
None
):
super
(
SimpleLSTMRNN
,
self
).
__init__
()
self
.
_hidden_size
=
hidden_size
self
.
_num_layers
=
num_layers
self
.
_init_scale
=
init_scale
self
.
_dropout
=
dropout
self
.
_num_steps
=
num_steps
self
.
cell_array
=
[]
self
.
hidden_array
=
[]
self
.
weight_1_arr
=
[]
self
.
weight_2_arr
=
[]
self
.
bias_arr
=
[]
self
.
mask_array
=
[]
for
i
in
range
(
self
.
_num_layers
):
weight_1
=
self
.
create_parameter
(
attr
=
fluid
.
ParamAttr
(
initializer
=
fluid
.
initializer
.
UniformInitializer
(
low
=-
self
.
_init_scale
,
high
=
self
.
_init_scale
)),
shape
=
[
self
.
_hidden_size
*
2
,
self
.
_hidden_size
*
4
],
dtype
=
"float32"
,
default_initializer
=
fluid
.
initializer
.
UniformInitializer
(
low
=-
self
.
_init_scale
,
high
=
self
.
_init_scale
))
self
.
weight_1_arr
.
append
(
self
.
add_parameter
(
'w_%d'
%
i
,
weight_1
))
bias_1
=
self
.
create_parameter
(
attr
=
fluid
.
ParamAttr
(
initializer
=
fluid
.
initializer
.
UniformInitializer
(
low
=-
self
.
_init_scale
,
high
=
self
.
_init_scale
)),
shape
=
[
self
.
_hidden_size
*
4
],
dtype
=
"float32"
,
default_initializer
=
fluid
.
initializer
.
Constant
(
0.0
))
self
.
bias_arr
.
append
(
self
.
add_parameter
(
'b_%d'
%
i
,
bias_1
))
def
forward
(
self
,
input_embedding
,
init_hidden
=
None
,
init_cell
=
None
):
cell_array
=
[]
hidden_array
=
[]
for
i
in
range
(
self
.
_num_layers
):
hidden_array
.
append
(
init_hidden
[
i
])
cell_array
.
append
(
init_cell
[
i
])
res
=
[]
for
index
in
range
(
self
.
_num_steps
):
step_input
=
input_embedding
[:,
index
,:]
for
k
in
range
(
self
.
_num_layers
):
pre_hidden
=
hidden_array
[
k
]
pre_cell
=
cell_array
[
k
]
weight_1
=
self
.
weight_1_arr
[
k
]
bias
=
self
.
bias_arr
[
k
]
nn
=
fluid
.
layers
.
concat
([
step_input
,
pre_hidden
],
1
)
gate_input
=
fluid
.
layers
.
matmul
(
x
=
nn
,
y
=
weight_1
)
gate_input
=
fluid
.
layers
.
elementwise_add
(
gate_input
,
bias
)
i
,
j
,
f
,
o
=
fluid
.
layers
.
split
(
gate_input
,
num_or_sections
=
4
,
dim
=-
1
)
c
=
pre_cell
*
fluid
.
layers
.
sigmoid
(
f
)
+
fluid
.
layers
.
sigmoid
(
i
)
*
fluid
.
layers
.
tanh
(
j
)
m
=
fluid
.
layers
.
tanh
(
c
)
*
fluid
.
layers
.
sigmoid
(
o
)
hidden_array
[
k
]
=
m
cell_array
[
k
]
=
c
step_input
=
m
if
self
.
_dropout
is
not
None
and
self
.
_dropout
>
0.0
:
step_input
=
fluid
.
layers
.
dropout
(
step_input
,
dropout_prob
=
self
.
_dropout
,
dropout_implementation
=
'upscale_in_train'
)
res
.
append
(
step_input
)
real_res
=
fluid
.
layers
.
concat
(
res
,
1
)
real_res
=
fluid
.
layers
.
reshape
(
real_res
,
[
-
1
,
self
.
_num_steps
,
self
.
_hidden_size
])
last_hidden
=
fluid
.
layers
.
concat
(
hidden_array
,
1
)
last_hidden
=
fluid
.
layers
.
reshape
(
last_hidden
,
shape
=
[
-
1
,
self
.
_num_layers
,
self
.
_hidden_size
])
last_hidden
=
fluid
.
layers
.
transpose
(
x
=
last_hidden
,
perm
=
[
1
,
0
,
2
])
last_cell
=
fluid
.
layers
.
concat
(
cell_array
,
1
)
last_cell
=
fluid
.
layers
.
reshape
(
last_cell
,
shape
=
[
-
1
,
self
.
_num_layers
,
self
.
_hidden_size
])
last_cell
=
fluid
.
layers
.
transpose
(
x
=
last_cell
,
perm
=
[
1
,
0
,
2
])
return
real_res
,
last_hidden
,
last_cell
class
PtbModel
(
fluid
.
Layer
):
def
__init__
(
self
,
hidden_size
,
vocab_size
,
num_layers
=
2
,
num_steps
=
20
,
init_scale
=
0.1
,
dropout
=
None
):
super
(
PtbModel
,
self
).
__init__
()
self
.
hidden_size
=
hidden_size
self
.
vocab_size
=
vocab_size
self
.
init_scale
=
init_scale
self
.
num_layers
=
num_layers
self
.
num_steps
=
num_steps
self
.
dropout
=
dropout
self
.
simple_lstm_rnn
=
SimpleLSTMRNN
(
hidden_size
,
num_steps
,
num_layers
=
num_layers
,
init_scale
=
init_scale
,
dropout
=
dropout
)
self
.
embedding
=
Embedding
(
size
=
[
vocab_size
,
hidden_size
],
dtype
=
'float32'
,
is_sparse
=
False
,
param_attr
=
fluid
.
ParamAttr
(
name
=
'embedding_para'
,
initializer
=
fluid
.
initializer
.
UniformInitializer
(
low
=-
init_scale
,
high
=
init_scale
)))
self
.
softmax_weight
=
self
.
create_parameter
(
attr
=
fluid
.
ParamAttr
(),
shape
=
[
self
.
hidden_size
,
self
.
vocab_size
],
dtype
=
"float32"
,
default_initializer
=
fluid
.
initializer
.
UniformInitializer
(
low
=-
self
.
init_scale
,
high
=
self
.
init_scale
))
self
.
softmax_bias
=
self
.
create_parameter
(
attr
=
fluid
.
ParamAttr
(),
shape
=
[
self
.
vocab_size
],
dtype
=
"float32"
,
default_initializer
=
fluid
.
initializer
.
UniformInitializer
(
low
=-
self
.
init_scale
,
high
=
self
.
init_scale
))
def
build_once
(
self
,
input
,
label
,
init_hidden
,
init_cell
):
pass
def
forward
(
self
,
input
,
label
,
init_hidden
,
init_cell
):
init_h
=
fluid
.
layers
.
reshape
(
init_hidden
,
shape
=
[
self
.
num_layers
,
-
1
,
self
.
hidden_size
])
init_c
=
fluid
.
layers
.
reshape
(
init_cell
,
shape
=
[
self
.
num_layers
,
-
1
,
self
.
hidden_size
])
x_emb
=
self
.
embedding
(
input
)
x_emb
=
fluid
.
layers
.
reshape
(
x_emb
,
shape
=
[
-
1
,
self
.
num_steps
,
self
.
hidden_size
])
if
self
.
dropout
is
not
None
and
self
.
dropout
>
0.0
:
x_emb
=
fluid
.
layers
.
dropout
(
x_emb
,
dropout_prob
=
self
.
dropout
,
dropout_implementation
=
'upscale_in_train'
)
rnn_out
,
last_hidden
,
last_cell
=
self
.
simple_lstm_rnn
(
x_emb
,
init_h
,
init_c
)
projection
=
fluid
.
layers
.
matmul
(
rnn_out
,
self
.
softmax_weight
)
projection
=
fluid
.
layers
.
elementwise_add
(
projection
,
self
.
softmax_bias
)
loss
=
fluid
.
layers
.
softmax_with_cross_entropy
(
logits
=
projection
,
label
=
label
,
soft_label
=
False
)
loss
=
fluid
.
layers
.
reshape
(
loss
,
shape
=
[
-
1
,
self
.
num_steps
])
loss
=
fluid
.
layers
.
reduce_mean
(
loss
,
dim
=
[
0
])
loss
=
fluid
.
layers
.
reduce_sum
(
loss
)
return
loss
,
last_hidden
,
last_cell
def
debug_emb
(
self
):
np
.
save
(
"emb_grad"
,
self
.
x_emb
.
gradient
())
def
train_ptb_lm
():
args
=
parse_args
()
# check if set use_gpu=True in paddlepaddle cpu version
model_check
.
check_cuda
(
args
.
use_gpu
)
place
=
core
.
CPUPlace
()
if
args
.
use_gpu
:
place
=
fluid
.
CUDAPlace
(
0
)
dev_count
=
fluid
.
core
.
get_cuda_device_count
()
else
:
place
=
fluid
.
CPUPlace
()
dev_count
=
int
(
os
.
environ
.
get
(
'CPU_NUM'
,
multiprocessing
.
cpu_count
()))
# check if paddlepaddle version is satisfied
model_check
.
check_version
()
model_type
=
args
.
model_type
vocab_size
=
10000
if
model_type
==
"test"
:
num_layers
=
1
batch_size
=
2
hidden_size
=
10
num_steps
=
3
init_scale
=
0.1
max_grad_norm
=
5.0
epoch_start_decay
=
1
max_epoch
=
1
dropout
=
0.0
lr_decay
=
0.5
base_learning_rate
=
1.0
elif
model_type
==
"small"
:
num_layers
=
2
batch_size
=
20
hidden_size
=
200
num_steps
=
20
init_scale
=
0.1
max_grad_norm
=
5.0
epoch_start_decay
=
4
max_epoch
=
13
dropout
=
0.0
lr_decay
=
0.5
base_learning_rate
=
1.0
elif
model_type
==
"medium"
:
num_layers
=
2
batch_size
=
20
hidden_size
=
650
num_steps
=
35
init_scale
=
0.05
max_grad_norm
=
5.0
epoch_start_decay
=
6
max_epoch
=
39
dropout
=
0.5
lr_decay
=
0.8
base_learning_rate
=
1.0
elif
model_type
==
"large"
:
num_layers
=
2
batch_size
=
20
hidden_size
=
1500
num_steps
=
35
init_scale
=
0.04
max_grad_norm
=
10.0
epoch_start_decay
=
14
max_epoch
=
55
dropout
=
0.65
lr_decay
=
1.0
/
1.15
base_learning_rate
=
1.0
else
:
print
(
"model type not support"
)
return
with
fluid
.
dygraph
.
guard
(
place
):
if
args
.
ce
:
print
(
"ce mode"
)
seed
=
33
np
.
random
.
seed
(
seed
)
fluid
.
default_startup_program
().
random_seed
=
seed
fluid
.
default_main_program
().
random_seed
=
seed
max_epoch
=
1
ptb_model
=
PtbModel
(
hidden_size
=
hidden_size
,
vocab_size
=
vocab_size
,
num_layers
=
num_layers
,
num_steps
=
num_steps
,
init_scale
=
init_scale
,
dropout
=
dropout
)
if
args
.
init_from_pretrain_model
:
if
not
os
.
path
.
exists
(
args
.
init_from_pretrain_model
+
'.pdparams'
):
print
(
args
.
init_from_pretrain_model
)
raise
Warning
(
"The pretrained params do not exist."
)
return
fluid
.
load_dygraph
(
args
.
init_from_pretrain_model
)
print
(
"finish initing model from pretrained params from %s"
%
(
args
.
init_from_pretrain_model
))
dy_param_updated
=
dict
()
dy_param_init
=
dict
()
dy_loss
=
None
last_hidden
=
None
last_cell
=
None
data_path
=
args
.
data_path
print
(
"begin to load data"
)
ptb_data
=
reader
.
get_ptb_data
(
data_path
)
print
(
"finished load data"
)
train_data
,
valid_data
,
test_data
=
ptb_data
batch_len
=
len
(
train_data
)
//
batch_size
total_batch_size
=
(
batch_len
-
1
)
//
num_steps
log_interval
=
200
bd
=
[]
lr_arr
=
[
1.0
]
for
i
in
range
(
1
,
max_epoch
):
bd
.
append
(
total_batch_size
*
i
)
new_lr
=
base_learning_rate
*
(
lr_decay
**
max
(
i
+
1
-
epoch_start_decay
,
0.0
))
lr_arr
.
append
(
new_lr
)
grad_clip
=
fluid
.
clip
.
GradientClipByGlobalNorm
(
max_grad_norm
)
sgd
=
SGDOptimizer
(
learning_rate
=
fluid
.
layers
.
piecewise_decay
(
boundaries
=
bd
,
values
=
lr_arr
),
parameter_list
=
ptb_model
.
parameters
(),
grad_clip
=
grad_clip
)
def
eval
(
model
,
data
):
print
(
"begin to eval"
)
total_loss
=
0.0
iters
=
0.0
init_hidden_data
=
np
.
zeros
(
(
num_layers
,
batch_size
,
hidden_size
),
dtype
=
'float32'
)
init_cell_data
=
np
.
zeros
(
(
num_layers
,
batch_size
,
hidden_size
),
dtype
=
'float32'
)
model
.
eval
()
train_data_iter
=
reader
.
get_data_iter
(
data
,
batch_size
,
num_steps
)
for
batch_id
,
batch
in
enumerate
(
train_data_iter
):
x_data
,
y_data
=
batch
x_data
=
x_data
.
reshape
((
-
1
,
num_steps
,
1
))
y_data
=
y_data
.
reshape
((
-
1
,
num_steps
,
1
))
x
=
to_variable
(
x_data
)
y
=
to_variable
(
y_data
)
init_hidden
=
to_variable
(
init_hidden_data
)
init_cell
=
to_variable
(
init_cell_data
)
dy_loss
,
last_hidden
,
last_cell
=
ptb_model
(
x
,
y
,
init_hidden
,
init_cell
)
out_loss
=
dy_loss
.
numpy
()
init_hidden_data
=
last_hidden
.
numpy
()
init_cell_data
=
last_cell
.
numpy
()
total_loss
+=
out_loss
iters
+=
num_steps
print
(
"eval finished"
)
ppl
=
np
.
exp
(
total_loss
/
iters
)
print
(
"ppl "
,
batch_id
,
ppl
[
0
])
ce_time
=
[]
ce_ppl
=
[]
total_batch_num
=
0
#this is for benchmark
for
epoch_id
in
range
(
max_epoch
):
ptb_model
.
train
()
total_loss
=
0.0
iters
=
0.0
init_hidden_data
=
np
.
zeros
(
(
num_layers
,
batch_size
,
hidden_size
),
dtype
=
'float32'
)
init_cell_data
=
np
.
zeros
(
(
num_layers
,
batch_size
,
hidden_size
),
dtype
=
'float32'
)
train_data_iter
=
reader
.
get_data_iter
(
train_data
,
batch_size
,
num_steps
)
init_hidden
=
to_variable
(
init_hidden_data
)
init_cell
=
to_variable
(
init_cell_data
)
start_time
=
time
.
time
()
for
batch_id
,
batch
in
enumerate
(
train_data_iter
):
if
args
.
max_iter
and
total_batch_num
==
args
.
max_iter
:
return
batch_start
=
time
.
time
()
x_data
,
y_data
=
batch
x_data
=
x_data
.
reshape
((
-
1
,
num_steps
,
1
))
y_data
=
y_data
.
reshape
((
-
1
,
num_steps
,
1
))
x
=
to_variable
(
x_data
)
y
=
to_variable
(
y_data
)
dy_loss
,
last_hidden
,
last_cell
=
ptb_model
(
x
,
y
,
init_hidden
,
init_cell
)
init_hidden
=
last_hidden
.
detach
()
init_cell
=
last_cell
.
detach
()
out_loss
=
dy_loss
.
numpy
()
dy_loss
.
backward
()
sgd
.
minimize
(
dy_loss
)
ptb_model
.
clear_gradients
()
total_loss
+=
out_loss
batch_end
=
time
.
time
()
train_batch_cost
=
batch_end
-
batch_start
iters
+=
num_steps
total_batch_num
=
total_batch_num
+
1
#this is for benchmark
if
batch_id
>
0
and
batch_id
%
log_interval
==
0
:
ppl
=
np
.
exp
(
total_loss
/
iters
)
print
(
"-- Epoch:[%d]; Batch:[%d]; ppl: %.5f, lr: %.5f, loss: %.5f, batch cost: %.5f"
%
(
epoch_id
,
batch_id
,
ppl
[
0
],
sgd
.
_global_learning_rate
().
numpy
(),
out_loss
,
train_batch_cost
))
print
(
"one epoch finished"
,
epoch_id
)
print
(
"time cost "
,
time
.
time
()
-
start_time
)
ppl
=
np
.
exp
(
total_loss
/
iters
)
ce_time
.
append
(
time
.
time
()
-
start_time
)
ce_ppl
.
append
(
ppl
[
0
])
print
(
"-- Epoch:[%d]; ppl: %.5f"
%
(
epoch_id
,
ppl
[
0
]))
if
batch_size
<=
20
and
epoch_id
==
0
and
ppl
[
0
]
>
1000
:
# for bad init, after first epoch, the loss is over 1000
# no more need to continue
print
(
"Parameters are randomly initialized and not good this time because the loss is over 1000 after the first epoch."
)
print
(
"Abort this training process and please start again."
)
return
save_model_dir
=
os
.
path
.
join
(
args
.
save_model_dir
,
str
(
epoch_id
),
'params'
)
fluid
.
save_dygraph
(
ptb_model
.
state_dict
(),
save_model_dir
)
print
(
"Saved model to: %s.
\n
"
%
save_model_dir
)
eval
(
ptb_model
,
valid_data
)
if
args
.
ce
:
_ppl
=
0
_time
=
0
try
:
_time
=
ce_time
[
-
1
]
_ppl
=
ce_ppl
[
-
1
]
except
:
print
(
"ce info error"
)
print
(
"kpis
\t
train_duration_card%s
\t
%s"
%
(
dev_count
,
_time
))
print
(
"kpis
\t
train_ppl_card%s
\t
%f"
%
(
dev_count
,
_ppl
))
eval
(
ptb_model
,
test_data
)
train_ptb_lm
()
# Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
from
__future__
import
print_function
import
os
import
unittest
import
paddle.fluid
as
fluid
import
paddle.fluid.core
as
core
from
paddle.fluid.dygraph.nn
import
Embedding
import
paddle.fluid.framework
as
framework
from
paddle.fluid.optimizer
import
SGDOptimizer
from
paddle.fluid.dygraph.base
import
to_variable
import
numpy
as
np
import
six
import
multiprocessing
import
reader
import
model_check
import
time
from
args
import
*
#import fluid.clip as clip
#from fluid.clip import *
import
sys
if
sys
.
version
[
0
]
==
'2'
:
reload
(
sys
)
sys
.
setdefaultencoding
(
"utf-8"
)
class
SimpleLSTMRNN
(
fluid
.
Layer
):
def
__init__
(
self
,
hidden_size
,
num_steps
,
num_layers
=
2
,
init_scale
=
0.1
,
dropout
=
None
):
super
(
SimpleLSTMRNN
,
self
).
__init__
()
self
.
_hidden_size
=
hidden_size
self
.
_num_layers
=
num_layers
self
.
_init_scale
=
init_scale
self
.
_dropout
=
dropout
self
.
_num_steps
=
num_steps
self
.
cell_array
=
[]
self
.
hidden_array
=
[]
self
.
weight_1_arr
=
[]
self
.
weight_2_arr
=
[]
self
.
bias_arr
=
[]
self
.
mask_array
=
[]
for
i
in
range
(
self
.
_num_layers
):
weight_1
=
self
.
create_parameter
(
attr
=
fluid
.
ParamAttr
(
initializer
=
fluid
.
initializer
.
UniformInitializer
(
low
=-
self
.
_init_scale
,
high
=
self
.
_init_scale
)),
shape
=
[
self
.
_hidden_size
*
2
,
self
.
_hidden_size
*
4
],
dtype
=
"float32"
,
default_initializer
=
fluid
.
initializer
.
UniformInitializer
(
low
=-
self
.
_init_scale
,
high
=
self
.
_init_scale
))
self
.
weight_1_arr
.
append
(
self
.
add_parameter
(
'w_%d'
%
i
,
weight_1
))
bias_1
=
self
.
create_parameter
(
attr
=
fluid
.
ParamAttr
(
initializer
=
fluid
.
initializer
.
UniformInitializer
(
low
=-
self
.
_init_scale
,
high
=
self
.
_init_scale
)),
shape
=
[
self
.
_hidden_size
*
4
],
dtype
=
"float32"
,
default_initializer
=
fluid
.
initializer
.
Constant
(
0.0
))
self
.
bias_arr
.
append
(
self
.
add_parameter
(
'b_%d'
%
i
,
bias_1
))
def
forward
(
self
,
input_embedding
,
init_hidden
=
None
,
init_cell
=
None
):
cell_array
=
[]
hidden_array
=
[]
for
i
in
range
(
self
.
_num_layers
):
hidden_array
.
append
(
init_hidden
[
i
])
cell_array
.
append
(
init_cell
[
i
])
res
=
[]
for
index
in
range
(
self
.
_num_steps
):
step_input
=
input_embedding
[:,
index
,
:]
for
k
in
range
(
self
.
_num_layers
):
pre_hidden
=
hidden_array
[
k
]
pre_cell
=
cell_array
[
k
]
weight_1
=
self
.
weight_1_arr
[
k
]
bias
=
self
.
bias_arr
[
k
]
nn
=
fluid
.
layers
.
concat
([
step_input
,
pre_hidden
],
1
)
gate_input
=
fluid
.
layers
.
matmul
(
x
=
nn
,
y
=
weight_1
)
gate_input
=
fluid
.
layers
.
elementwise_add
(
gate_input
,
bias
)
i
,
j
,
f
,
o
=
fluid
.
layers
.
split
(
gate_input
,
num_or_sections
=
4
,
dim
=-
1
)
c
=
pre_cell
*
fluid
.
layers
.
sigmoid
(
f
)
+
fluid
.
layers
.
sigmoid
(
i
)
*
fluid
.
layers
.
tanh
(
j
)
m
=
fluid
.
layers
.
tanh
(
c
)
*
fluid
.
layers
.
sigmoid
(
o
)
hidden_array
[
k
]
=
m
cell_array
[
k
]
=
c
step_input
=
m
if
self
.
_dropout
is
not
None
and
self
.
_dropout
>
0.0
:
step_input
=
fluid
.
layers
.
dropout
(
step_input
,
dropout_prob
=
self
.
_dropout
,
dropout_implementation
=
'upscale_in_train'
)
res
.
append
(
step_input
)
real_res
=
fluid
.
layers
.
concat
(
res
,
1
)
real_res
=
fluid
.
layers
.
reshape
(
real_res
,
[
-
1
,
self
.
_num_steps
,
self
.
_hidden_size
])
last_hidden
=
fluid
.
layers
.
concat
(
hidden_array
,
1
)
last_hidden
=
fluid
.
layers
.
reshape
(
last_hidden
,
shape
=
[
-
1
,
self
.
_num_layers
,
self
.
_hidden_size
])
last_hidden
=
fluid
.
layers
.
transpose
(
x
=
last_hidden
,
perm
=
[
1
,
0
,
2
])
last_cell
=
fluid
.
layers
.
concat
(
cell_array
,
1
)
last_cell
=
fluid
.
layers
.
reshape
(
last_cell
,
shape
=
[
-
1
,
self
.
_num_layers
,
self
.
_hidden_size
])
last_cell
=
fluid
.
layers
.
transpose
(
x
=
last_cell
,
perm
=
[
1
,
0
,
2
])
return
real_res
,
last_hidden
,
last_cell
class
PtbModel
(
fluid
.
Layer
):
def
__init__
(
self
,
hidden_size
,
vocab_size
,
num_layers
=
2
,
num_steps
=
20
,
init_scale
=
0.1
,
dropout
=
None
):
super
(
PtbModel
,
self
).
__init__
()
self
.
hidden_size
=
hidden_size
self
.
vocab_size
=
vocab_size
self
.
init_scale
=
init_scale
self
.
num_layers
=
num_layers
self
.
num_steps
=
num_steps
self
.
dropout
=
dropout
self
.
simple_lstm_rnn
=
SimpleLSTMRNN
(
hidden_size
,
num_steps
,
num_layers
=
num_layers
,
init_scale
=
init_scale
,
dropout
=
dropout
)
self
.
embedding
=
Embedding
(
size
=
[
vocab_size
,
hidden_size
],
dtype
=
'float32'
,
is_sparse
=
False
,
param_attr
=
fluid
.
ParamAttr
(
name
=
'embedding_para'
,
initializer
=
fluid
.
initializer
.
UniformInitializer
(
low
=-
init_scale
,
high
=
init_scale
)))
self
.
softmax_weight
=
self
.
create_parameter
(
attr
=
fluid
.
ParamAttr
(),
shape
=
[
self
.
hidden_size
,
self
.
vocab_size
],
dtype
=
"float32"
,
default_initializer
=
fluid
.
initializer
.
UniformInitializer
(
low
=-
self
.
init_scale
,
high
=
self
.
init_scale
))
self
.
softmax_bias
=
self
.
create_parameter
(
attr
=
fluid
.
ParamAttr
(),
shape
=
[
self
.
vocab_size
],
dtype
=
"float32"
,
default_initializer
=
fluid
.
initializer
.
UniformInitializer
(
low
=-
self
.
init_scale
,
high
=
self
.
init_scale
))
def
build_once
(
self
,
input
,
label
,
init_hidden
,
init_cell
):
pass
def
forward
(
self
,
input
,
label
,
init_hidden
,
init_cell
):
init_h
=
fluid
.
layers
.
reshape
(
init_hidden
,
shape
=
[
self
.
num_layers
,
-
1
,
self
.
hidden_size
])
init_c
=
fluid
.
layers
.
reshape
(
init_cell
,
shape
=
[
self
.
num_layers
,
-
1
,
self
.
hidden_size
])
x_emb
=
self
.
embedding
(
input
)
x_emb
=
fluid
.
layers
.
reshape
(
x_emb
,
shape
=
[
-
1
,
self
.
num_steps
,
self
.
hidden_size
])
if
self
.
dropout
is
not
None
and
self
.
dropout
>
0.0
:
x_emb
=
fluid
.
layers
.
dropout
(
x_emb
,
dropout_prob
=
self
.
dropout
,
dropout_implementation
=
'upscale_in_train'
)
rnn_out
,
last_hidden
,
last_cell
=
self
.
simple_lstm_rnn
(
x_emb
,
init_h
,
init_c
)
projection
=
fluid
.
layers
.
matmul
(
rnn_out
,
self
.
softmax_weight
)
projection
=
fluid
.
layers
.
elementwise_add
(
projection
,
self
.
softmax_bias
)
loss
=
fluid
.
layers
.
softmax_with_cross_entropy
(
logits
=
projection
,
label
=
label
,
soft_label
=
False
)
loss
=
fluid
.
layers
.
reshape
(
loss
,
shape
=
[
-
1
,
self
.
num_steps
])
loss
=
fluid
.
layers
.
reduce_mean
(
loss
,
dim
=
[
0
])
loss
=
fluid
.
layers
.
reduce_sum
(
loss
)
return
loss
,
last_hidden
,
last_cell
def
debug_emb
(
self
):
np
.
save
(
"emb_grad"
,
self
.
x_emb
.
gradient
())
def
train_ptb_lm
():
args
=
parse_args
()
# check if set use_gpu=True in paddlepaddle cpu version
model_check
.
check_cuda
(
args
.
use_gpu
)
place
=
core
.
CPUPlace
()
if
args
.
use_gpu
:
place
=
fluid
.
CUDAPlace
(
0
)
dev_count
=
fluid
.
core
.
get_cuda_device_count
()
else
:
place
=
fluid
.
CPUPlace
()
dev_count
=
int
(
os
.
environ
.
get
(
'CPU_NUM'
,
multiprocessing
.
cpu_count
()))
# check if paddlepaddle version is satisfied
model_check
.
check_version
()
model_type
=
args
.
model_type
vocab_size
=
10000
if
model_type
==
"test"
:
num_layers
=
1
batch_size
=
2
hidden_size
=
10
num_steps
=
3
init_scale
=
0.1
max_grad_norm
=
5.0
epoch_start_decay
=
1
max_epoch
=
1
dropout
=
0.0
lr_decay
=
0.5
base_learning_rate
=
1.0
elif
model_type
==
"small"
:
num_layers
=
2
batch_size
=
20
hidden_size
=
200
num_steps
=
20
init_scale
=
0.1
max_grad_norm
=
5.0
epoch_start_decay
=
4
max_epoch
=
13
dropout
=
0.0
lr_decay
=
0.5
base_learning_rate
=
1.0
elif
model_type
==
"medium"
:
num_layers
=
2
batch_size
=
20
hidden_size
=
650
num_steps
=
35
init_scale
=
0.05
max_grad_norm
=
5.0
epoch_start_decay
=
6
max_epoch
=
39
dropout
=
0.5
lr_decay
=
0.8
base_learning_rate
=
1.0
elif
model_type
==
"large"
:
num_layers
=
2
batch_size
=
20
hidden_size
=
1500
num_steps
=
35
init_scale
=
0.04
max_grad_norm
=
10.0
epoch_start_decay
=
14
max_epoch
=
55
dropout
=
0.65
lr_decay
=
1.0
/
1.15
base_learning_rate
=
1.0
else
:
print
(
"model type not support"
)
return
with
fluid
.
dygraph
.
guard
(
place
):
if
args
.
ce
:
print
(
"ce mode"
)
seed
=
33
np
.
random
.
seed
(
seed
)
fluid
.
default_startup_program
().
random_seed
=
seed
fluid
.
default_main_program
().
random_seed
=
seed
max_epoch
=
1
ptb_model
=
PtbModel
(
hidden_size
=
hidden_size
,
vocab_size
=
vocab_size
,
num_layers
=
num_layers
,
num_steps
=
num_steps
,
init_scale
=
init_scale
,
dropout
=
dropout
)
if
args
.
init_from_pretrain_model
:
if
not
os
.
path
.
exists
(
args
.
init_from_pretrain_model
+
'.pdparams'
):
print
(
args
.
init_from_pretrain_model
)
raise
Warning
(
"The pretrained params do not exist."
)
return
fluid
.
load_dygraph
(
args
.
init_from_pretrain_model
)
print
(
"finish initing model from pretrained params from %s"
%
(
args
.
init_from_pretrain_model
))
dy_param_updated
=
dict
()
dy_param_init
=
dict
()
dy_loss
=
None
last_hidden
=
None
last_cell
=
None
data_path
=
args
.
data_path
print
(
"begin to load data"
)
ptb_data
=
reader
.
get_ptb_data
(
data_path
)
print
(
"finished load data"
)
train_data
,
valid_data
,
test_data
=
ptb_data
batch_len
=
len
(
train_data
)
//
batch_size
total_batch_size
=
(
batch_len
-
1
)
//
num_steps
log_interval
=
200
bd
=
[]
lr_arr
=
[
1.0
]
for
i
in
range
(
1
,
max_epoch
):
bd
.
append
(
total_batch_size
*
i
)
new_lr
=
base_learning_rate
*
(
lr_decay
**
max
(
i
+
1
-
epoch_start_decay
,
0.0
))
lr_arr
.
append
(
new_lr
)
grad_clip
=
fluid
.
clip
.
GradientClipByGlobalNorm
(
max_grad_norm
)
sgd
=
SGDOptimizer
(
learning_rate
=
fluid
.
layers
.
piecewise_decay
(
boundaries
=
bd
,
values
=
lr_arr
),
parameter_list
=
ptb_model
.
parameters
(),
grad_clip
=
grad_clip
)
def
reader_decorator
(
reader
):
def
__reader__
():
for
item
in
reader
:
x_data
=
item
[
0
].
reshape
((
-
1
,
num_steps
,
1
))
y_data
=
item
[
1
].
reshape
((
-
1
,
num_steps
,
1
))
yield
x_data
,
y_data
return
__reader__
def
eval
(
model
,
data
):
print
(
"begin to eval"
)
total_loss
=
0.0
iters
=
0.0
init_hidden_data
=
np
.
zeros
(
(
num_layers
,
batch_size
,
hidden_size
),
dtype
=
'float32'
)
init_cell_data
=
np
.
zeros
(
(
num_layers
,
batch_size
,
hidden_size
),
dtype
=
'float32'
)
model
.
eval
()
train_data_iter
=
reader_decorator
(
reader
.
get_data_iter
(
data
,
batch_size
,
num_steps
))
eval_data_loader
=
fluid
.
io
.
DataLoader
.
from_generator
(
capacity
=
200
)
eval_data_loader
.
set_batch_generator
(
train_data_iter
,
places
=
place
)
for
batch_id
,
batch
in
enumerate
(
eval_data_loader
):
x
,
y
=
batch
init_hidden
=
to_variable
(
init_hidden_data
)
init_cell
=
to_variable
(
init_cell_data
)
dy_loss
,
last_hidden
,
last_cell
=
ptb_model
(
x
,
y
,
init_hidden
,
init_cell
)
out_loss
=
dy_loss
.
numpy
()
init_hidden_data
=
last_hidden
.
numpy
()
init_cell_data
=
last_cell
.
numpy
()
total_loss
+=
out_loss
iters
+=
num_steps
print
(
"eval finished"
)
ppl
=
np
.
exp
(
total_loss
/
iters
)
print
(
"ppl "
,
batch_id
,
ppl
[
0
])
ce_time
=
[]
ce_ppl
=
[]
total_batch_num
=
0
#this is for benchmark
for
epoch_id
in
range
(
max_epoch
):
ptb_model
.
train
()
total_loss
=
0.0
iters
=
0.0
init_hidden_data
=
np
.
zeros
(
(
num_layers
,
batch_size
,
hidden_size
),
dtype
=
'float32'
)
init_cell_data
=
np
.
zeros
(
(
num_layers
,
batch_size
,
hidden_size
),
dtype
=
'float32'
)
train_data_iter
=
reader_decorator
(
reader
.
get_data_iter
(
train_data
,
batch_size
,
num_steps
))
train_data_loader
=
fluid
.
io
.
DataLoader
.
from_generator
(
capacity
=
200
)
train_data_loader
.
set_batch_generator
(
train_data_iter
,
places
=
place
)
init_hidden
=
to_variable
(
init_hidden_data
)
init_cell
=
to_variable
(
init_cell_data
)
start_time
=
time
.
time
()
for
batch_id
,
batch
in
enumerate
(
train_data_loader
):
if
args
.
max_iter
and
total_batch_num
==
args
.
max_iter
:
return
batch_start
=
time
.
time
()
x
,
y
=
batch
dy_loss
,
last_hidden
,
last_cell
=
ptb_model
(
x
,
y
,
init_hidden
,
init_cell
)
init_hidden
=
last_hidden
.
detach
()
init_cell
=
last_cell
.
detach
()
out_loss
=
dy_loss
.
numpy
()
dy_loss
.
backward
()
sgd
.
minimize
(
dy_loss
)
ptb_model
.
clear_gradients
()
total_loss
+=
out_loss
batch_end
=
time
.
time
()
train_batch_cost
=
batch_end
-
batch_start
iters
+=
num_steps
total_batch_num
=
total_batch_num
+
1
#this is for benchmark
if
batch_id
>
0
and
batch_id
%
log_interval
==
0
:
ppl
=
np
.
exp
(
total_loss
/
iters
)
print
(
"-- Epoch:[%d]; Batch:[%d]; ppl: %.5f, lr: %.5f, loss: %.5f, batch cost: %.5f"
%
(
epoch_id
,
batch_id
,
ppl
[
0
],
sgd
.
_global_learning_rate
().
numpy
(),
out_loss
,
train_batch_cost
))
print
(
"one epoch finished"
,
epoch_id
)
print
(
"time cost "
,
time
.
time
()
-
start_time
)
ppl
=
np
.
exp
(
total_loss
/
iters
)
ce_time
.
append
(
time
.
time
()
-
start_time
)
ce_ppl
.
append
(
ppl
[
0
])
print
(
"-- Epoch:[%d]; ppl: %.5f"
%
(
epoch_id
,
ppl
[
0
]))
if
batch_size
<=
20
and
epoch_id
==
0
and
ppl
[
0
]
>
1000
:
# for bad init, after first epoch, the loss is over 1000
# no more need to continue
print
(
"Parameters are randomly initialized and not good this time because the loss is over 1000 after the first epoch."
)
print
(
"Abort this training process and please start again."
)
return
save_model_dir
=
os
.
path
.
join
(
args
.
save_model_dir
,
str
(
epoch_id
),
'params'
)
fluid
.
save_dygraph
(
ptb_model
.
state_dict
(),
save_model_dir
)
print
(
"Saved model to: %s.
\n
"
%
save_model_dir
)
eval
(
ptb_model
,
valid_data
)
if
args
.
ce
:
_ppl
=
0
_time
=
0
try
:
_time
=
ce_time
[
-
1
]
_ppl
=
ce_ppl
[
-
1
]
except
:
print
(
"ce info error"
)
print
(
"kpis
\t
train_duration_card%s
\t
%s"
%
(
dev_count
,
_time
))
print
(
"kpis
\t
train_ppl_card%s
\t
%f"
%
(
dev_count
,
_ppl
))
eval
(
ptb_model
,
test_data
)
train_ptb_lm
()
dygraph/resnet/train.py
浏览文件 @
b0239e3a
...
...
@@ -81,7 +81,6 @@ def optimizer_setting(parameter_list=None):
boundaries
=
bd
,
values
=
lr
),
momentum
=
momentum_rate
,
regularization
=
fluid
.
regularizer
.
L2Decay
(
l2_decay
))
return
optimizer
...
...
@@ -116,11 +115,7 @@ class ConvBNLayer(fluid.dygraph.Layer):
class
BottleneckBlock
(
fluid
.
dygraph
.
Layer
):
def
__init__
(
self
,
num_channels
,
num_filters
,
stride
,
shortcut
=
True
):
def
__init__
(
self
,
num_channels
,
num_filters
,
stride
,
shortcut
=
True
):
super
(
BottleneckBlock
,
self
).
__init__
()
self
.
conv0
=
ConvBNLayer
(
...
...
@@ -186,16 +181,9 @@ class ResNet(fluid.dygraph.Layer):
num_filters
=
[
64
,
128
,
256
,
512
]
self
.
conv
=
ConvBNLayer
(
num_channels
=
3
,
num_filters
=
64
,
filter_size
=
7
,
stride
=
2
,
act
=
'relu'
)
num_channels
=
3
,
num_filters
=
64
,
filter_size
=
7
,
stride
=
2
,
act
=
'relu'
)
self
.
pool2d_max
=
Pool2D
(
pool_size
=
3
,
pool_stride
=
2
,
pool_padding
=
1
,
pool_type
=
'max'
)
pool_size
=
3
,
pool_stride
=
2
,
pool_padding
=
1
,
pool_type
=
'max'
)
self
.
bottleneck_block_list
=
[]
for
block
in
range
(
len
(
depth
)):
...
...
@@ -220,11 +208,12 @@ class ResNet(fluid.dygraph.Layer):
import
math
stdv
=
1.0
/
math
.
sqrt
(
2048
*
1.0
)
self
.
out
=
Linear
(
self
.
pool2d_avg_output
,
class_dim
,
act
=
'softmax'
,
param_attr
=
fluid
.
param_attr
.
ParamAttr
(
initializer
=
fluid
.
initializer
.
Uniform
(
-
stdv
,
stdv
)))
self
.
out
=
Linear
(
self
.
pool2d_avg_output
,
class_dim
,
act
=
'softmax'
,
param_attr
=
fluid
.
param_attr
.
ParamAttr
(
initializer
=
fluid
.
initializer
.
Uniform
(
-
stdv
,
stdv
)))
def
forward
(
self
,
inputs
):
y
=
self
.
conv
(
inputs
)
...
...
@@ -237,6 +226,16 @@ class ResNet(fluid.dygraph.Layer):
return
y
def
reader_decorator
(
reader
):
def
__reader__
():
for
item
in
reader
():
img
=
np
.
array
(
item
[
0
]).
astype
(
'float32'
).
reshape
(
3
,
224
,
224
)
label
=
np
.
array
(
item
[
1
]).
astype
(
'int64'
).
reshape
(
1
)
yield
img
,
label
return
__reader__
def
eval
(
model
,
data
):
model
.
eval
()
...
...
@@ -245,15 +244,8 @@ def eval(model, data):
total_acc5
=
0.0
total_sample
=
0
for
batch_id
,
data
in
enumerate
(
data
()):
dy_x_data
=
np
.
array
(
[
x
[
0
].
reshape
(
3
,
224
,
224
)
for
x
in
data
]).
astype
(
'float32'
)
if
len
(
np
.
array
([
x
[
1
]
for
x
in
data
]).
astype
(
'int64'
))
!=
batch_size
:
continue
y_data
=
np
.
array
([
x
[
1
]
for
x
in
data
]).
astype
(
'int64'
).
reshape
(
batch_size
,
1
)
img
=
to_variable
(
dy_x_data
)
label
=
to_variable
(
y_data
)
img
=
data
[
0
]
label
=
data
[
1
]
label
.
stop_gradient
=
True
out
=
model
(
img
)
...
...
@@ -303,13 +295,24 @@ def train_resnet():
resnet
=
fluid
.
dygraph
.
parallel
.
DataParallel
(
resnet
,
strategy
)
train_reader
=
paddle
.
batch
(
paddle
.
dataset
.
flowers
.
train
(
use_xmap
=
False
),
batch_size
=
batch_size
)
reader_decorator
(
paddle
.
dataset
.
flowers
.
train
(
use_xmap
=
True
)),
batch_size
=
batch_size
,
drop_last
=
True
)
if
args
.
use_data_parallel
:
train_reader
=
fluid
.
contrib
.
reader
.
distributed_batch_reader
(
train_reader
)
test_reader
=
paddle
.
batch
(
paddle
.
dataset
.
flowers
.
test
(
use_xmap
=
False
),
batch_size
=
batch_size
)
reader_decorator
(
paddle
.
dataset
.
flowers
.
test
(
use_xmap
=
True
)),
batch_size
=
batch_size
,
drop_last
=
True
)
train_loader
=
fluid
.
io
.
DataLoader
.
from_generator
(
capacity
=
10
)
train_loader
.
set_sample_list_generator
(
train_reader
,
places
=
place
)
test_loader
=
fluid
.
io
.
DataLoader
.
from_generator
(
capacity
=
10
)
test_loader
.
set_sample_list_generator
(
test_reader
,
places
=
place
)
#file_name = './model/epoch_0.npz'
#model_data = np.load( file_name )
...
...
@@ -331,23 +334,13 @@ def train_resnet():
print
(
"load finished"
)
for
batch_id
,
data
in
enumerate
(
train_reader
()):
for
batch_id
,
data
in
enumerate
(
train_loader
()):
#NOTE: used in benchmark
if
args
.
max_iter
and
total_batch_num
==
args
.
max_iter
:
return
batch_start
=
time
.
time
()
dy_x_data
=
np
.
array
(
[
x
[
0
].
reshape
(
3
,
224
,
224
)
for
x
in
data
]).
astype
(
'float32'
)
if
len
(
np
.
array
([
x
[
1
]
for
x
in
data
]).
astype
(
'int64'
))
!=
batch_size
:
continue
y_data
=
np
.
array
([
x
[
1
]
for
x
in
data
]).
astype
(
'int64'
).
reshape
(
-
1
,
1
)
img
=
to_variable
(
dy_x_data
)
label
=
to_variable
(
y_data
)
img
,
label
=
data
label
.
stop_gradient
=
True
out
=
resnet
(
img
)
...
...
@@ -390,16 +383,14 @@ def train_resnet():
(
eop
,
batch_id
,
total_loss
/
total_sample
,
\
total_acc1
/
total_sample
,
total_acc5
/
total_sample
))
resnet
.
eval
()
eval
(
resnet
,
test_
re
ader
)
eval
(
resnet
,
test_
lo
ader
)
save_parameters
=
(
not
args
.
use_data_parallel
)
or
(
args
.
use_data_parallel
and
fluid
.
dygraph
.
parallel
.
Env
().
local_rank
==
0
)
if
save_parameters
:
fluid
.
save_dygraph
(
resnet
.
state_dict
(),
'resnet_params'
)
fluid
.
save_dygraph
(
resnet
.
state_dict
(),
'resnet_params'
)
if
__name__
==
'__main__'
:
train_resnet
()
dygraph/se_resnet/train.py
浏览文件 @
b0239e3a
...
...
@@ -169,8 +169,7 @@ class BottleneckBlock(fluid.dygraph.Layer):
act
=
None
)
self
.
scale
=
SqueezeExcitation
(
num_channels
=
num_filters
*
2
,
reduction_ratio
=
reduction_ratio
)
num_channels
=
num_filters
*
2
,
reduction_ratio
=
reduction_ratio
)
if
not
shortcut
:
self
.
short
=
ConvBNLayer
(
...
...
@@ -219,10 +218,7 @@ class SeResNeXt(fluid.dygraph.Layer):
stride
=
2
,
act
=
'relu'
)
self
.
pool
=
Pool2D
(
pool_size
=
3
,
pool_stride
=
2
,
pool_padding
=
1
,
pool_type
=
'max'
)
pool_size
=
3
,
pool_stride
=
2
,
pool_padding
=
1
,
pool_type
=
'max'
)
elif
layers
==
101
:
cardinality
=
32
reduction_ratio
=
16
...
...
@@ -235,10 +231,7 @@ class SeResNeXt(fluid.dygraph.Layer):
stride
=
2
,
act
=
'relu'
)
self
.
pool
=
Pool2D
(
pool_size
=
3
,
pool_stride
=
2
,
pool_padding
=
1
,
pool_type
=
'max'
)
pool_size
=
3
,
pool_stride
=
2
,
pool_padding
=
1
,
pool_type
=
'max'
)
elif
layers
==
152
:
cardinality
=
64
reduction_ratio
=
16
...
...
@@ -263,10 +256,7 @@ class SeResNeXt(fluid.dygraph.Layer):
stride
=
1
,
act
=
'relu'
)
self
.
pool
=
Pool2D
(
pool_size
=
3
,
pool_stride
=
2
,
pool_padding
=
1
,
pool_type
=
'max'
)
pool_size
=
3
,
pool_stride
=
2
,
pool_padding
=
1
,
pool_type
=
'max'
)
self
.
bottleneck_block_list
=
[]
num_channels
=
64
...
...
@@ -294,10 +284,11 @@ class SeResNeXt(fluid.dygraph.Layer):
self
.
pool2d_avg_output
=
num_filters
[
len
(
num_filters
)
-
1
]
*
2
*
1
*
1
self
.
out
=
Linear
(
self
.
pool2d_avg_output
,
class_dim
,
param_attr
=
fluid
.
param_attr
.
ParamAttr
(
initializer
=
fluid
.
initializer
.
Uniform
(
-
stdv
,
stdv
)))
self
.
out
=
Linear
(
self
.
pool2d_avg_output
,
class_dim
,
param_attr
=
fluid
.
param_attr
.
ParamAttr
(
initializer
=
fluid
.
initializer
.
Uniform
(
-
stdv
,
stdv
)))
def
forward
(
self
,
inputs
):
if
self
.
layers
==
50
or
self
.
layers
==
101
:
...
...
@@ -318,6 +309,16 @@ class SeResNeXt(fluid.dygraph.Layer):
return
y
def
reader_decorator
(
reader
):
def
__reader__
():
for
item
in
reader
():
img
=
np
.
array
(
item
[
0
]).
astype
(
'float32'
).
reshape
(
3
,
224
,
224
)
label
=
np
.
array
(
item
[
1
]).
astype
(
'int64'
).
reshape
(
1
)
yield
img
,
label
return
__reader__
def
eval
(
model
,
data
):
model
.
eval
()
...
...
@@ -327,15 +328,7 @@ def eval(model, data):
total_acc5
=
0.0
total_sample
=
0
for
batch_id
,
data
in
enumerate
(
data
()):
dy_x_data
=
np
.
array
(
[
x
[
0
].
reshape
(
3
,
224
,
224
)
for
x
in
data
]).
astype
(
'float32'
)
if
len
(
np
.
array
([
x
[
1
]
for
x
in
data
]).
astype
(
'int64'
))
!=
batch_size
:
continue
y_data
=
np
.
array
([
x
[
1
]
for
x
in
data
]).
astype
(
'int64'
).
reshape
(
batch_size
,
1
)
img
=
to_variable
(
dy_x_data
)
label
=
to_variable
(
y_data
)
img
,
label
=
data
label
.
stop_gradient
=
True
out
=
model
(
img
)
...
...
@@ -389,29 +382,29 @@ def train():
se_resnext
=
fluid
.
dygraph
.
parallel
.
DataParallel
(
se_resnext
,
strategy
)
train_reader
=
paddle
.
batch
(
paddle
.
dataset
.
flowers
.
train
(
use_xmap
=
False
),
reader_decorator
(
paddle
.
dataset
.
flowers
.
train
(
use_xmap
=
False
)
),
batch_size
=
batch_size
,
drop_last
=
True
)
if
args
.
use_data_parallel
:
train_reader
=
fluid
.
contrib
.
reader
.
distributed_batch_reader
(
train_reader
)
test_reader
=
paddle
.
batch
(
paddle
.
dataset
.
flowers
.
test
(
use_xmap
=
False
),
batch_size
=
32
)
reader_decorator
(
paddle
.
dataset
.
flowers
.
test
(
use_xmap
=
False
)),
batch_size
=
32
)
train_loader
=
fluid
.
io
.
DataLoader
.
from_generator
(
capacity
=
10
)
train_loader
.
set_sample_list_generator
(
train_reader
,
places
=
place
)
test_loader
=
fluid
.
io
.
DataLoader
.
from_generator
(
capacity
=
10
)
test_loader
.
set_sample_list_generator
(
test_reader
,
places
=
place
)
for
epoch_id
in
range
(
epoch_num
):
total_loss
=
0.0
total_acc1
=
0.0
total_acc5
=
0.0
total_sample
=
0
for
batch_id
,
data
in
enumerate
(
train_reader
()):
dy_x_data
=
np
.
array
([
x
[
0
].
reshape
(
3
,
224
,
224
)
for
x
in
data
]).
astype
(
'float32'
)
y_data
=
np
.
array
([
x
[
1
]
for
x
in
data
]).
astype
(
'int64'
).
reshape
(
batch_size
,
1
)
img
=
to_variable
(
dy_x_data
)
label
=
to_variable
(
y_data
)
for
batch_id
,
data
in
enumerate
(
train_loader
()):
img
,
label
=
data
label
.
stop_gradient
=
True
out
=
se_resnext
(
img
)
...
...
@@ -454,7 +447,7 @@ def train():
(
epoch_id
,
batch_id
,
total_loss
/
total_sample
,
\
total_acc1
/
total_sample
,
total_acc5
/
total_sample
))
se_resnext
.
eval
()
eval
(
se_resnext
,
test_
re
ader
)
eval
(
se_resnext
,
test_
lo
ader
)
se_resnext
.
train
()
...
...
编辑
预览
Markdown
is supported
0%
请重试
或
添加新附件
.
添加附件
取消
You are about to add
0
people
to the discussion. Proceed with caution.
先完成此消息的编辑!
取消
想要评论请
注册
或
登录