Skip to content
体验新版
项目
组织
正在加载...
登录
切换导航
打开侧边栏
PaddlePaddle
models
提交
b0239e3a
M
models
项目概览
PaddlePaddle
/
models
大约 1 年 前同步成功
通知
222
Star
6828
Fork
2962
代码
文件
提交
分支
Tags
贡献者
分支图
Diff
Issue
602
列表
看板
标记
里程碑
合并请求
255
Wiki
0
Wiki
分析
仓库
DevOps
项目成员
Pages
M
models
项目概览
项目概览
详情
发布
仓库
仓库
文件
提交
分支
标签
贡献者
分支图
比较
Issue
602
Issue
602
列表
看板
标记
里程碑
合并请求
255
合并请求
255
Pages
分析
分析
仓库分析
DevOps
Wiki
0
Wiki
成员
成员
收起侧边栏
关闭侧边栏
动态
分支图
创建新Issue
提交
Issue看板
未验证
提交
b0239e3a
编写于
5月 28, 2020
作者:
C
Chen Weihang
提交者:
GitHub
5月 28, 2020
浏览文件
操作
浏览文件
下载
电子邮件补丁
差异文件
change some model using data loader (#4595)
上级
edf1a872
变更
7
隐藏空白更改
内联
并排
Showing
7 changed file
with
585 addition
and
599 deletion
+585
-599
dygraph/mnist/train.py
dygraph/mnist/train.py
+36
-25
dygraph/mobilenet/reader.py
dygraph/mobilenet/reader.py
+1
-1
dygraph/mobilenet/train.py
dygraph/mobilenet/train.py
+2
-6
dygraph/mobilenet/utils/utility.py
dygraph/mobilenet/utils/utility.py
+2
-20
dygraph/ptb_lm/ptb_dy.py
dygraph/ptb_lm/ptb_dy.py
+474
-461
dygraph/resnet/train.py
dygraph/resnet/train.py
+38
-47
dygraph/se_resnet/train.py
dygraph/se_resnet/train.py
+32
-39
未找到文件。
dygraph/mnist/train.py
浏览文件 @
b0239e3a
...
@@ -99,11 +99,13 @@ class MNIST(fluid.dygraph.Layer):
...
@@ -99,11 +99,13 @@ class MNIST(fluid.dygraph.Layer):
self
.
pool_2_shape
=
50
*
4
*
4
self
.
pool_2_shape
=
50
*
4
*
4
SIZE
=
10
SIZE
=
10
scale
=
(
2.0
/
(
self
.
pool_2_shape
**
2
*
SIZE
))
**
0.5
scale
=
(
2.0
/
(
self
.
pool_2_shape
**
2
*
SIZE
))
**
0.5
self
.
_fc
=
Linear
(
self
.
pool_2_shape
,
10
,
self
.
_fc
=
Linear
(
param_attr
=
fluid
.
param_attr
.
ParamAttr
(
self
.
pool_2_shape
,
initializer
=
fluid
.
initializer
.
NormalInitializer
(
10
,
loc
=
0.0
,
scale
=
scale
)),
param_attr
=
fluid
.
param_attr
.
ParamAttr
(
act
=
"softmax"
)
initializer
=
fluid
.
initializer
.
NormalInitializer
(
loc
=
0.0
,
scale
=
scale
)),
act
=
"softmax"
)
def
forward
(
self
,
inputs
,
label
=
None
):
def
forward
(
self
,
inputs
,
label
=
None
):
x
=
self
.
_simple_img_conv_pool_1
(
inputs
)
x
=
self
.
_simple_img_conv_pool_1
(
inputs
)
...
@@ -117,17 +119,21 @@ class MNIST(fluid.dygraph.Layer):
...
@@ -117,17 +119,21 @@ class MNIST(fluid.dygraph.Layer):
return
x
return
x
def
reader_decorator
(
reader
):
def
__reader__
():
for
item
in
reader
():
img
=
np
.
array
(
item
[
0
]).
astype
(
'float32'
).
reshape
(
1
,
28
,
28
)
label
=
np
.
array
(
item
[
1
]).
astype
(
'int64'
).
reshape
(
1
)
yield
img
,
label
return
__reader__
def
test_mnist
(
reader
,
model
,
batch_size
):
def
test_mnist
(
reader
,
model
,
batch_size
):
acc_set
=
[]
acc_set
=
[]
avg_loss_set
=
[]
avg_loss_set
=
[]
for
batch_id
,
data
in
enumerate
(
reader
()):
for
batch_id
,
data
in
enumerate
(
reader
()):
dy_x_data
=
np
.
array
([
x
[
0
].
reshape
(
1
,
28
,
28
)
img
,
label
=
data
for
x
in
data
]).
astype
(
'float32'
)
y_data
=
np
.
array
(
[
x
[
1
]
for
x
in
data
]).
astype
(
'int64'
).
reshape
(
batch_size
,
1
)
img
=
to_variable
(
dy_x_data
)
label
=
to_variable
(
y_data
)
label
.
stop_gradient
=
True
label
.
stop_gradient
=
True
prediction
,
acc
=
model
(
img
,
label
)
prediction
,
acc
=
model
(
img
,
label
)
loss
=
fluid
.
layers
.
cross_entropy
(
input
=
prediction
,
label
=
label
)
loss
=
fluid
.
layers
.
cross_entropy
(
input
=
prediction
,
label
=
label
)
...
@@ -187,28 +193,33 @@ def train_mnist(args):
...
@@ -187,28 +193,33 @@ def train_mnist(args):
if
args
.
use_data_parallel
:
if
args
.
use_data_parallel
:
strategy
=
fluid
.
dygraph
.
parallel
.
prepare_context
()
strategy
=
fluid
.
dygraph
.
parallel
.
prepare_context
()
mnist
=
MNIST
()
mnist
=
MNIST
()
adam
=
AdamOptimizer
(
learning_rate
=
0.001
,
parameter_list
=
mnist
.
parameters
())
adam
=
AdamOptimizer
(
learning_rate
=
0.001
,
parameter_list
=
mnist
.
parameters
())
if
args
.
use_data_parallel
:
if
args
.
use_data_parallel
:
mnist
=
fluid
.
dygraph
.
parallel
.
DataParallel
(
mnist
,
strategy
)
mnist
=
fluid
.
dygraph
.
parallel
.
DataParallel
(
mnist
,
strategy
)
train_reader
=
paddle
.
batch
(
train_reader
=
paddle
.
batch
(
paddle
.
dataset
.
mnist
.
train
(),
batch_size
=
BATCH_SIZE
,
drop_last
=
True
)
reader_decorator
(
paddle
.
dataset
.
mnist
.
train
()),
batch_size
=
BATCH_SIZE
,
drop_last
=
True
)
if
args
.
use_data_parallel
:
if
args
.
use_data_parallel
:
train_reader
=
fluid
.
contrib
.
reader
.
distributed_batch_reader
(
train_reader
=
fluid
.
contrib
.
reader
.
distributed_batch_reader
(
train_reader
)
train_reader
)
test_reader
=
paddle
.
batch
(
test_reader
=
paddle
.
batch
(
paddle
.
dataset
.
mnist
.
test
(),
batch_size
=
BATCH_SIZE
,
drop_last
=
True
)
reader_decorator
(
paddle
.
dataset
.
mnist
.
test
()),
batch_size
=
BATCH_SIZE
,
drop_last
=
True
)
train_loader
=
fluid
.
io
.
DataLoader
.
from_generator
(
capacity
=
10
)
train_loader
.
set_sample_list_generator
(
train_reader
,
places
=
place
)
test_loader
=
fluid
.
io
.
DataLoader
.
from_generator
(
capacity
=
10
)
test_loader
.
set_sample_list_generator
(
test_reader
,
places
=
place
)
for
epoch
in
range
(
epoch_num
):
for
epoch
in
range
(
epoch_num
):
for
batch_id
,
data
in
enumerate
(
train_reader
()):
for
batch_id
,
data
in
enumerate
(
train_loader
()):
dy_x_data
=
np
.
array
([
x
[
0
].
reshape
(
1
,
28
,
28
)
img
,
label
=
data
for
x
in
data
]).
astype
(
'float32'
)
y_data
=
np
.
array
(
[
x
[
1
]
for
x
in
data
]).
astype
(
'int64'
).
reshape
(
-
1
,
1
)
img
=
to_variable
(
dy_x_data
)
label
=
to_variable
(
y_data
)
label
.
stop_gradient
=
True
label
.
stop_gradient
=
True
cost
,
acc
=
mnist
(
img
,
label
)
cost
,
acc
=
mnist
(
img
,
label
)
...
@@ -231,7 +242,7 @@ def train_mnist(args):
...
@@ -231,7 +242,7 @@ def train_mnist(args):
epoch
,
batch_id
,
avg_loss
.
numpy
()))
epoch
,
batch_id
,
avg_loss
.
numpy
()))
mnist
.
eval
()
mnist
.
eval
()
test_cost
,
test_acc
=
test_mnist
(
test_
re
ader
,
mnist
,
BATCH_SIZE
)
test_cost
,
test_acc
=
test_mnist
(
test_
lo
ader
,
mnist
,
BATCH_SIZE
)
mnist
.
train
()
mnist
.
train
()
if
args
.
ce
:
if
args
.
ce
:
print
(
"kpis
\t
test_acc
\t
%s"
%
test_acc
)
print
(
"kpis
\t
test_acc
\t
%s"
%
test_acc
)
...
@@ -244,7 +255,7 @@ def train_mnist(args):
...
@@ -244,7 +255,7 @@ def train_mnist(args):
fluid
.
dygraph
.
parallel
.
Env
().
local_rank
==
0
)
fluid
.
dygraph
.
parallel
.
Env
().
local_rank
==
0
)
if
save_parameters
:
if
save_parameters
:
fluid
.
save_dygraph
(
mnist
.
state_dict
(),
"save_temp"
)
fluid
.
save_dygraph
(
mnist
.
state_dict
(),
"save_temp"
)
print
(
"checkpoint saved"
)
print
(
"checkpoint saved"
)
inference_mnist
()
inference_mnist
()
...
...
dygraph/mobilenet/reader.py
浏览文件 @
b0239e3a
...
@@ -239,7 +239,7 @@ def process_image(sample, settings, mode, color_jitter, rotate):
...
@@ -239,7 +239,7 @@ def process_image(sample, settings, mode, color_jitter, rotate):
img
/=
img_std
img
/=
img_std
if
mode
==
'train'
or
mode
==
'val'
:
if
mode
==
'train'
or
mode
==
'val'
:
return
(
img
,
sample
[
1
])
return
(
img
,
[
sample
[
1
]
])
elif
mode
==
'test'
:
elif
mode
==
'test'
:
return
(
img
,
)
return
(
img
,
)
...
...
dygraph/mobilenet/train.py
浏览文件 @
b0239e3a
...
@@ -116,10 +116,8 @@ def train_mobilenet():
...
@@ -116,10 +116,8 @@ def train_mobilenet():
optimizer
.
set_dict
(
opti_dict
)
optimizer
.
set_dict
(
opti_dict
)
# 3. reader
# 3. reader
train_data_loader
,
train_data
=
utility
.
create_data_loader
(
train_data_loader
=
utility
.
create_data_loader
(
is_train
=
True
,
args
=
args
)
is_train
=
True
,
args
=
args
)
test_data_loader
=
utility
.
create_data_loader
(
is_train
=
False
,
args
=
args
)
test_data_loader
,
test_data
=
utility
.
create_data_loader
(
is_train
=
False
,
args
=
args
)
num_trainers
=
int
(
os
.
environ
.
get
(
'PADDLE_TRAINERS_NUM'
,
1
))
num_trainers
=
int
(
os
.
environ
.
get
(
'PADDLE_TRAINERS_NUM'
,
1
))
imagenet_reader
=
reader
.
ImageNetReader
(
seed
=
0
,
place_num
=
place_num
)
imagenet_reader
=
reader
.
ImageNetReader
(
seed
=
0
,
place_num
=
place_num
)
train_reader
=
imagenet_reader
.
train
(
settings
=
args
)
train_reader
=
imagenet_reader
.
train
(
settings
=
args
)
...
@@ -145,8 +143,6 @@ def train_mobilenet():
...
@@ -145,8 +143,6 @@ def train_mobilenet():
t1
=
time
.
time
()
t1
=
time
.
time
()
if
args
.
max_iter
and
total_batch_num
==
args
.
max_iter
:
if
args
.
max_iter
and
total_batch_num
==
args
.
max_iter
:
return
return
label
=
to_variable
(
label
.
numpy
().
astype
(
'int64'
).
reshape
(
int
(
args
.
batch_size
//
place_num
),
1
))
t_start
=
time
.
time
()
t_start
=
time
.
time
()
# 4.1.1 call net()
# 4.1.1 call net()
...
...
dygraph/mobilenet/utils/utility.py
浏览文件 @
b0239e3a
...
@@ -309,32 +309,14 @@ def create_data_loader(is_train, args):
...
@@ -309,32 +309,14 @@ def create_data_loader(is_train, args):
Returns:
Returns:
data_loader and the input data of net,
data_loader and the input data of net,
"""
"""
image_shape
=
[
int
(
m
)
for
m
in
args
.
image_shape
.
split
(
","
)]
feed_image
=
fluid
.
data
(
name
=
"feed_image"
,
shape
=
[
None
]
+
image_shape
,
dtype
=
"float32"
,
lod_level
=
0
)
feed_label
=
fluid
.
data
(
name
=
"feed_label"
,
shape
=
[
None
,
1
],
dtype
=
"int64"
,
lod_level
=
0
)
feed_y_a
=
fluid
.
data
(
name
=
"feed_y_a"
,
shape
=
[
None
,
1
],
dtype
=
"int64"
,
lod_level
=
0
)
if
is_train
and
args
.
use_mixup
:
if
is_train
and
args
.
use_mixup
:
feed_y_b
=
fluid
.
data
(
name
=
"feed_y_b"
,
shape
=
[
None
,
1
],
dtype
=
"int64"
,
lod_level
=
0
)
feed_lam
=
fluid
.
data
(
name
=
"feed_lam"
,
shape
=
[
None
,
1
],
dtype
=
"float32"
,
lod_level
=
0
)
data_loader
=
fluid
.
io
.
DataLoader
.
from_generator
(
data_loader
=
fluid
.
io
.
DataLoader
.
from_generator
(
capacity
=
64
,
capacity
=
64
,
use_double_buffer
=
True
,
use_double_buffer
=
True
,
iterable
=
True
,
iterable
=
True
,
return_list
=
True
)
return_list
=
True
)
return
data_loader
,
[
feed_image
,
feed_y_a
,
feed_y_b
,
feed_lam
]
return
data_loader
else
:
else
:
data_loader
=
fluid
.
io
.
DataLoader
.
from_generator
(
data_loader
=
fluid
.
io
.
DataLoader
.
from_generator
(
capacity
=
64
,
capacity
=
64
,
...
@@ -342,7 +324,7 @@ def create_data_loader(is_train, args):
...
@@ -342,7 +324,7 @@ def create_data_loader(is_train, args):
iterable
=
True
,
iterable
=
True
,
return_list
=
True
)
return_list
=
True
)
return
data_loader
,
[
feed_image
,
feed_label
]
return
data_loader
def
print_info
(
pass_id
,
batch_id
,
print_step
,
metrics
,
time_info
,
info_mode
):
def
print_info
(
pass_id
,
batch_id
,
print_step
,
metrics
,
time_info
,
info_mode
):
...
...
dygraph/ptb_lm/ptb_dy.py
浏览文件 @
b0239e3a
# Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.
# Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.
#
#
# Licensed under the Apache License, Version 2.0 (the "License");
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
# You may obtain a copy of the License at
#
#
# http://www.apache.org/licenses/LICENSE-2.0
# http://www.apache.org/licenses/LICENSE-2.0
#
#
# Unless required by applicable law or agreed to in writing, software
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# See the License for the specific language governing permissions and
# limitations under the License.
# limitations under the License.
from
__future__
import
print_function
from
__future__
import
print_function
import
os
import
os
import
unittest
import
unittest
import
paddle.fluid
as
fluid
import
paddle.fluid
as
fluid
import
paddle.fluid.core
as
core
import
paddle.fluid.core
as
core
from
paddle.fluid.dygraph.nn
import
Embedding
from
paddle.fluid.dygraph.nn
import
Embedding
import
paddle.fluid.framework
as
framework
import
paddle.fluid.framework
as
framework
from
paddle.fluid.optimizer
import
SGDOptimizer
from
paddle.fluid.optimizer
import
SGDOptimizer
from
paddle.fluid.dygraph.base
import
to_variable
from
paddle.fluid.dygraph.base
import
to_variable
import
numpy
as
np
import
numpy
as
np
import
six
import
six
import
multiprocessing
import
multiprocessing
import
reader
import
reader
import
model_check
import
model_check
import
time
import
time
from
args
import
*
from
args
import
*
#import fluid.clip as clip
#import fluid.clip as clip
#from fluid.clip import *
#from fluid.clip import *
import
sys
import
sys
if
sys
.
version
[
0
]
==
'2'
:
if
sys
.
version
[
0
]
==
'2'
:
reload
(
sys
)
reload
(
sys
)
sys
.
setdefaultencoding
(
"utf-8"
)
sys
.
setdefaultencoding
(
"utf-8"
)
class
SimpleLSTMRNN
(
fluid
.
Layer
):
class
SimpleLSTMRNN
(
fluid
.
Layer
):
def
__init__
(
self
,
def
__init__
(
self
,
hidden_size
,
hidden_size
,
num_steps
,
num_steps
,
num_layers
=
2
,
num_layers
=
2
,
init_scale
=
0.1
,
init_scale
=
0.1
,
dropout
=
None
):
dropout
=
None
):
super
(
SimpleLSTMRNN
,
self
).
__init__
()
super
(
SimpleLSTMRNN
,
self
).
__init__
()
self
.
_hidden_size
=
hidden_size
self
.
_hidden_size
=
hidden_size
self
.
_num_layers
=
num_layers
self
.
_num_layers
=
num_layers
self
.
_init_scale
=
init_scale
self
.
_init_scale
=
init_scale
self
.
_dropout
=
dropout
self
.
_dropout
=
dropout
self
.
_num_steps
=
num_steps
self
.
_num_steps
=
num_steps
self
.
cell_array
=
[]
self
.
cell_array
=
[]
self
.
hidden_array
=
[]
self
.
hidden_array
=
[]
self
.
weight_1_arr
=
[]
self
.
weight_1_arr
=
[]
self
.
weight_2_arr
=
[]
self
.
weight_2_arr
=
[]
self
.
bias_arr
=
[]
self
.
bias_arr
=
[]
self
.
mask_array
=
[]
self
.
mask_array
=
[]
for
i
in
range
(
self
.
_num_layers
):
for
i
in
range
(
self
.
_num_layers
):
weight_1
=
self
.
create_parameter
(
weight_1
=
self
.
create_parameter
(
attr
=
fluid
.
ParamAttr
(
attr
=
fluid
.
ParamAttr
(
initializer
=
fluid
.
initializer
.
UniformInitializer
(
initializer
=
fluid
.
initializer
.
UniformInitializer
(
low
=-
self
.
_init_scale
,
high
=
self
.
_init_scale
)),
low
=-
self
.
_init_scale
,
high
=
self
.
_init_scale
)),
shape
=
[
self
.
_hidden_size
*
2
,
self
.
_hidden_size
*
4
],
shape
=
[
self
.
_hidden_size
*
2
,
self
.
_hidden_size
*
4
],
dtype
=
"float32"
,
dtype
=
"float32"
,
default_initializer
=
fluid
.
initializer
.
UniformInitializer
(
default_initializer
=
fluid
.
initializer
.
UniformInitializer
(
low
=-
self
.
_init_scale
,
high
=
self
.
_init_scale
))
low
=-
self
.
_init_scale
,
high
=
self
.
_init_scale
))
self
.
weight_1_arr
.
append
(
self
.
add_parameter
(
'w_%d'
%
i
,
weight_1
))
self
.
weight_1_arr
.
append
(
self
.
add_parameter
(
'w_%d'
%
i
,
weight_1
))
bias_1
=
self
.
create_parameter
(
bias_1
=
self
.
create_parameter
(
attr
=
fluid
.
ParamAttr
(
attr
=
fluid
.
ParamAttr
(
initializer
=
fluid
.
initializer
.
UniformInitializer
(
initializer
=
fluid
.
initializer
.
UniformInitializer
(
low
=-
self
.
_init_scale
,
high
=
self
.
_init_scale
)),
low
=-
self
.
_init_scale
,
high
=
self
.
_init_scale
)),
shape
=
[
self
.
_hidden_size
*
4
],
shape
=
[
self
.
_hidden_size
*
4
],
dtype
=
"float32"
,
dtype
=
"float32"
,
default_initializer
=
fluid
.
initializer
.
Constant
(
0.0
))
default_initializer
=
fluid
.
initializer
.
Constant
(
0.0
))
self
.
bias_arr
.
append
(
self
.
add_parameter
(
'b_%d'
%
i
,
bias_1
))
self
.
bias_arr
.
append
(
self
.
add_parameter
(
'b_%d'
%
i
,
bias_1
))
def
forward
(
self
,
input_embedding
,
init_hidden
=
None
,
init_cell
=
None
):
def
forward
(
self
,
input_embedding
,
init_hidden
=
None
,
init_cell
=
None
):
cell_array
=
[]
cell_array
=
[]
hidden_array
=
[]
hidden_array
=
[]
for
i
in
range
(
self
.
_num_layers
):
for
i
in
range
(
self
.
_num_layers
):
hidden_array
.
append
(
init_hidden
[
i
])
hidden_array
.
append
(
init_hidden
[
i
])
cell_array
.
append
(
init_cell
[
i
])
cell_array
.
append
(
init_cell
[
i
])
res
=
[]
res
=
[]
for
index
in
range
(
self
.
_num_steps
):
for
index
in
range
(
self
.
_num_steps
):
step_input
=
input_embedding
[:,
index
,:]
step_input
=
input_embedding
[:,
index
,
:]
for
k
in
range
(
self
.
_num_layers
):
for
k
in
range
(
self
.
_num_layers
):
pre_hidden
=
hidden_array
[
k
]
pre_hidden
=
hidden_array
[
k
]
pre_cell
=
cell_array
[
k
]
pre_cell
=
cell_array
[
k
]
weight_1
=
self
.
weight_1_arr
[
k
]
weight_1
=
self
.
weight_1_arr
[
k
]
bias
=
self
.
bias_arr
[
k
]
bias
=
self
.
bias_arr
[
k
]
nn
=
fluid
.
layers
.
concat
([
step_input
,
pre_hidden
],
1
)
nn
=
fluid
.
layers
.
concat
([
step_input
,
pre_hidden
],
1
)
gate_input
=
fluid
.
layers
.
matmul
(
x
=
nn
,
y
=
weight_1
)
gate_input
=
fluid
.
layers
.
matmul
(
x
=
nn
,
y
=
weight_1
)
gate_input
=
fluid
.
layers
.
elementwise_add
(
gate_input
,
bias
)
gate_input
=
fluid
.
layers
.
elementwise_add
(
gate_input
,
bias
)
i
,
j
,
f
,
o
=
fluid
.
layers
.
split
(
i
,
j
,
f
,
o
=
fluid
.
layers
.
split
(
gate_input
,
num_or_sections
=
4
,
dim
=-
1
)
gate_input
,
num_or_sections
=
4
,
dim
=-
1
)
c
=
pre_cell
*
fluid
.
layers
.
sigmoid
(
f
)
+
fluid
.
layers
.
sigmoid
(
c
=
pre_cell
*
fluid
.
layers
.
sigmoid
(
f
)
+
fluid
.
layers
.
sigmoid
(
i
)
*
fluid
.
layers
.
tanh
(
j
)
i
)
*
fluid
.
layers
.
tanh
(
j
)
m
=
fluid
.
layers
.
tanh
(
c
)
*
fluid
.
layers
.
sigmoid
(
o
)
m
=
fluid
.
layers
.
tanh
(
c
)
*
fluid
.
layers
.
sigmoid
(
o
)
hidden_array
[
k
]
=
m
hidden_array
[
k
]
=
m
cell_array
[
k
]
=
c
cell_array
[
k
]
=
c
step_input
=
m
step_input
=
m
if
self
.
_dropout
is
not
None
and
self
.
_dropout
>
0.0
:
if
self
.
_dropout
is
not
None
and
self
.
_dropout
>
0.0
:
step_input
=
fluid
.
layers
.
dropout
(
step_input
=
fluid
.
layers
.
dropout
(
step_input
,
step_input
,
dropout_prob
=
self
.
_dropout
,
dropout_prob
=
self
.
_dropout
,
dropout_implementation
=
'upscale_in_train'
)
dropout_implementation
=
'upscale_in_train'
)
res
.
append
(
step_input
)
res
.
append
(
step_input
)
real_res
=
fluid
.
layers
.
concat
(
res
,
1
)
real_res
=
fluid
.
layers
.
concat
(
res
,
1
)
real_res
=
fluid
.
layers
.
reshape
(
real_res
,
[
-
1
,
self
.
_num_steps
,
self
.
_hidden_size
])
real_res
=
fluid
.
layers
.
reshape
(
last_hidden
=
fluid
.
layers
.
concat
(
hidden_array
,
1
)
real_res
,
[
-
1
,
self
.
_num_steps
,
self
.
_hidden_size
])
last_hidden
=
fluid
.
layers
.
reshape
(
last_hidden
=
fluid
.
layers
.
concat
(
hidden_array
,
1
)
last_hidden
,
shape
=
[
-
1
,
self
.
_num_layers
,
self
.
_hidden_size
])
last_hidden
=
fluid
.
layers
.
reshape
(
last_hidden
=
fluid
.
layers
.
transpose
(
x
=
last_hidden
,
perm
=
[
1
,
0
,
2
])
last_hidden
,
shape
=
[
-
1
,
self
.
_num_layers
,
self
.
_hidden_size
])
last_cell
=
fluid
.
layers
.
concat
(
cell_array
,
1
)
last_hidden
=
fluid
.
layers
.
transpose
(
x
=
last_hidden
,
perm
=
[
1
,
0
,
2
])
last_cell
=
fluid
.
layers
.
reshape
(
last_cell
=
fluid
.
layers
.
concat
(
cell_array
,
1
)
last_cell
,
shape
=
[
-
1
,
self
.
_num_layers
,
self
.
_hidden_size
])
last_cell
=
fluid
.
layers
.
reshape
(
last_cell
=
fluid
.
layers
.
transpose
(
x
=
last_cell
,
perm
=
[
1
,
0
,
2
])
last_cell
,
shape
=
[
-
1
,
self
.
_num_layers
,
self
.
_hidden_size
])
return
real_res
,
last_hidden
,
last_cell
last_cell
=
fluid
.
layers
.
transpose
(
x
=
last_cell
,
perm
=
[
1
,
0
,
2
])
return
real_res
,
last_hidden
,
last_cell
class
PtbModel
(
fluid
.
Layer
):
def
__init__
(
self
,
class
PtbModel
(
fluid
.
Layer
):
hidden_size
,
def
__init__
(
self
,
vocab_size
,
hidden_size
,
num_layers
=
2
,
vocab_size
,
num_steps
=
20
,
num_layers
=
2
,
init_scale
=
0.1
,
num_steps
=
20
,
dropout
=
None
):
init_scale
=
0.1
,
super
(
PtbModel
,
self
).
__init__
()
dropout
=
None
):
self
.
hidden_size
=
hidden_size
super
(
PtbModel
,
self
).
__init__
()
self
.
vocab_size
=
vocab_size
self
.
hidden_size
=
hidden_size
self
.
init_scale
=
init_scale
self
.
vocab_size
=
vocab_size
self
.
num_layers
=
num_layers
self
.
init_scale
=
init_scale
self
.
num_steps
=
num_steps
self
.
num_layers
=
num_layers
self
.
dropout
=
dropout
self
.
num_steps
=
num_steps
self
.
simple_lstm_rnn
=
SimpleLSTMRNN
(
self
.
dropout
=
dropout
hidden_size
,
self
.
simple_lstm_rnn
=
SimpleLSTMRNN
(
num_steps
,
hidden_size
,
num_layers
=
num_layers
,
num_steps
,
init_scale
=
init_scale
,
num_layers
=
num_layers
,
dropout
=
dropout
)
init_scale
=
init_scale
,
self
.
embedding
=
Embedding
(
dropout
=
dropout
)
size
=
[
vocab_size
,
hidden_size
],
self
.
embedding
=
Embedding
(
dtype
=
'float32'
,
size
=
[
vocab_size
,
hidden_size
],
is_sparse
=
False
,
dtype
=
'float32'
,
param_attr
=
fluid
.
ParamAttr
(
is_sparse
=
False
,
name
=
'embedding_para'
,
param_attr
=
fluid
.
ParamAttr
(
initializer
=
fluid
.
initializer
.
UniformInitializer
(
name
=
'embedding_para'
,
low
=-
init_scale
,
high
=
init_scale
)))
initializer
=
fluid
.
initializer
.
UniformInitializer
(
self
.
softmax_weight
=
self
.
create_parameter
(
low
=-
init_scale
,
high
=
init_scale
)))
attr
=
fluid
.
ParamAttr
(),
self
.
softmax_weight
=
self
.
create_parameter
(
shape
=
[
self
.
hidden_size
,
self
.
vocab_size
],
attr
=
fluid
.
ParamAttr
(),
dtype
=
"float32"
,
shape
=
[
self
.
hidden_size
,
self
.
vocab_size
],
default_initializer
=
fluid
.
initializer
.
UniformInitializer
(
dtype
=
"float32"
,
low
=-
self
.
init_scale
,
high
=
self
.
init_scale
))
default_initializer
=
fluid
.
initializer
.
UniformInitializer
(
self
.
softmax_bias
=
self
.
create_parameter
(
low
=-
self
.
init_scale
,
high
=
self
.
init_scale
))
attr
=
fluid
.
ParamAttr
(),
self
.
softmax_bias
=
self
.
create_parameter
(
shape
=
[
self
.
vocab_size
],
attr
=
fluid
.
ParamAttr
(),
dtype
=
"float32"
,
shape
=
[
self
.
vocab_size
],
default_initializer
=
fluid
.
initializer
.
UniformInitializer
(
dtype
=
"float32"
,
low
=-
self
.
init_scale
,
high
=
self
.
init_scale
))
default_initializer
=
fluid
.
initializer
.
UniformInitializer
(
low
=-
self
.
init_scale
,
high
=
self
.
init_scale
))
def
build_once
(
self
,
input
,
label
,
init_hidden
,
init_cell
):
pass
def
build_once
(
self
,
input
,
label
,
init_hidden
,
init_cell
):
pass
def
forward
(
self
,
input
,
label
,
init_hidden
,
init_cell
):
def
forward
(
self
,
input
,
label
,
init_hidden
,
init_cell
):
init_h
=
fluid
.
layers
.
reshape
(
init_hidden
,
shape
=
[
self
.
num_layers
,
-
1
,
self
.
hidden_size
])
init_h
=
fluid
.
layers
.
reshape
(
init_hidden
,
shape
=
[
self
.
num_layers
,
-
1
,
self
.
hidden_size
])
init_c
=
fluid
.
layers
.
reshape
(
init_cell
,
shape
=
[
self
.
num_layers
,
-
1
,
self
.
hidden_size
])
init_c
=
fluid
.
layers
.
reshape
(
init_cell
,
shape
=
[
self
.
num_layers
,
-
1
,
self
.
hidden_size
])
x_emb
=
self
.
embedding
(
input
)
x_emb
=
self
.
embedding
(
input
)
x_emb
=
fluid
.
layers
.
reshape
(
x_emb
,
shape
=
[
-
1
,
self
.
num_steps
,
self
.
hidden_size
])
x_emb
=
fluid
.
layers
.
reshape
(
if
self
.
dropout
is
not
None
and
self
.
dropout
>
0.0
:
x_emb
,
shape
=
[
-
1
,
self
.
num_steps
,
self
.
hidden_size
])
x_emb
=
fluid
.
layers
.
dropout
(
if
self
.
dropout
is
not
None
and
self
.
dropout
>
0.0
:
x_emb
,
x_emb
=
fluid
.
layers
.
dropout
(
dropout_prob
=
self
.
dropout
,
x_emb
,
dropout_implementation
=
'upscale_in_train'
)
dropout_prob
=
self
.
dropout
,
rnn_out
,
last_hidden
,
last_cell
=
self
.
simple_lstm_rnn
(
x_emb
,
init_h
,
dropout_implementation
=
'upscale_in_train'
)
init_c
)
rnn_out
,
last_hidden
,
last_cell
=
self
.
simple_lstm_rnn
(
x_emb
,
init_h
,
init_c
)
projection
=
fluid
.
layers
.
matmul
(
rnn_out
,
self
.
softmax_weight
)
projection
=
fluid
.
layers
.
elementwise_add
(
projection
,
self
.
softmax_bias
)
projection
=
fluid
.
layers
.
matmul
(
rnn_out
,
self
.
softmax_weight
)
projection
=
fluid
.
layers
.
elementwise_add
(
projection
,
self
.
softmax_bias
)
loss
=
fluid
.
layers
.
softmax_with_cross_entropy
(
logits
=
projection
,
label
=
label
,
soft_label
=
False
)
loss
=
fluid
.
layers
.
softmax_with_cross_entropy
(
loss
=
fluid
.
layers
.
reshape
(
loss
,
shape
=
[
-
1
,
self
.
num_steps
])
logits
=
projection
,
label
=
label
,
soft_label
=
False
)
loss
=
fluid
.
layers
.
reduce_mean
(
loss
,
dim
=
[
0
])
loss
=
fluid
.
layers
.
reshape
(
loss
,
shape
=
[
-
1
,
self
.
num_steps
])
loss
=
fluid
.
layers
.
reduce_sum
(
loss
)
loss
=
fluid
.
layers
.
reduce_mean
(
loss
,
dim
=
[
0
])
loss
=
fluid
.
layers
.
reduce_sum
(
loss
)
return
loss
,
last_hidden
,
last_cell
return
loss
,
last_hidden
,
last_cell
def
debug_emb
(
self
):
def
debug_emb
(
self
):
np
.
save
(
"emb_grad"
,
self
.
x_emb
.
gradient
())
np
.
save
(
"emb_grad"
,
self
.
x_emb
.
gradient
())
def
train_ptb_lm
():
args
=
parse_args
()
def
train_ptb_lm
():
args
=
parse_args
()
# check if set use_gpu=True in paddlepaddle cpu version
model_check
.
check_cuda
(
args
.
use_gpu
)
# check if set use_gpu=True in paddlepaddle cpu version
model_check
.
check_cuda
(
args
.
use_gpu
)
place
=
core
.
CPUPlace
()
if
args
.
use_gpu
:
place
=
core
.
CPUPlace
()
place
=
fluid
.
CUDAPlace
(
0
)
if
args
.
use_gpu
:
dev_count
=
fluid
.
core
.
get_cuda_device_count
()
place
=
fluid
.
CUDAPlace
(
0
)
else
:
dev_count
=
fluid
.
core
.
get_cuda_device_count
()
place
=
fluid
.
CPUPlace
()
else
:
dev_count
=
int
(
os
.
environ
.
get
(
'CPU_NUM'
,
multiprocessing
.
cpu_count
()))
place
=
fluid
.
CPUPlace
()
dev_count
=
int
(
os
.
environ
.
get
(
'CPU_NUM'
,
multiprocessing
.
cpu_count
()))
# check if paddlepaddle version is satisfied
model_check
.
check_version
()
# check if paddlepaddle version is satisfied
model_check
.
check_version
()
model_type
=
args
.
model_type
model_type
=
args
.
model_type
vocab_size
=
10000
if
model_type
==
"test"
:
vocab_size
=
10000
num_layers
=
1
if
model_type
==
"test"
:
batch_size
=
2
num_layers
=
1
hidden_size
=
10
batch_size
=
2
num_steps
=
3
hidden_size
=
10
init_scale
=
0.1
num_steps
=
3
max_grad_norm
=
5.0
init_scale
=
0.1
epoch_start_decay
=
1
max_grad_norm
=
5.0
max_epoch
=
1
epoch_start_decay
=
1
dropout
=
0.0
max_epoch
=
1
lr_decay
=
0.5
dropout
=
0.0
base_learning_rate
=
1.0
lr_decay
=
0.5
elif
model_type
==
"small"
:
base_learning_rate
=
1.0
num_layers
=
2
elif
model_type
==
"small"
:
batch_size
=
20
num_layers
=
2
hidden_size
=
200
batch_size
=
20
num_steps
=
20
hidden_size
=
200
init_scale
=
0.1
num_steps
=
20
max_grad_norm
=
5.0
init_scale
=
0.1
epoch_start_decay
=
4
max_grad_norm
=
5.0
max_epoch
=
13
epoch_start_decay
=
4
dropout
=
0.0
max_epoch
=
13
lr_decay
=
0.5
dropout
=
0.0
base_learning_rate
=
1.0
lr_decay
=
0.5
elif
model_type
==
"medium"
:
base_learning_rate
=
1.0
num_layers
=
2
elif
model_type
==
"medium"
:
batch_size
=
20
num_layers
=
2
hidden_size
=
650
batch_size
=
20
num_steps
=
35
hidden_size
=
650
init_scale
=
0.05
num_steps
=
35
max_grad_norm
=
5.0
init_scale
=
0.05
epoch_start_decay
=
6
max_grad_norm
=
5.0
max_epoch
=
39
epoch_start_decay
=
6
dropout
=
0.5
max_epoch
=
39
lr_decay
=
0.8
dropout
=
0.5
base_learning_rate
=
1.0
lr_decay
=
0.8
elif
model_type
==
"large"
:
base_learning_rate
=
1.0
num_layers
=
2
elif
model_type
==
"large"
:
batch_size
=
20
num_layers
=
2
hidden_size
=
1500
batch_size
=
20
num_steps
=
35
hidden_size
=
1500
init_scale
=
0.04
num_steps
=
35
max_grad_norm
=
10.0
init_scale
=
0.04
epoch_start_decay
=
14
max_grad_norm
=
10.0
max_epoch
=
55
epoch_start_decay
=
14
dropout
=
0.65
max_epoch
=
55
lr_decay
=
1.0
/
1.15
dropout
=
0.65
base_learning_rate
=
1.0
lr_decay
=
1.0
/
1.15
else
:
base_learning_rate
=
1.0
print
(
"model type not support"
)
else
:
return
print
(
"model type not support"
)
return
with
fluid
.
dygraph
.
guard
(
place
):
if
args
.
ce
:
with
fluid
.
dygraph
.
guard
(
place
):
print
(
"ce mode"
)
if
args
.
ce
:
seed
=
33
print
(
"ce mode"
)
np
.
random
.
seed
(
seed
)
seed
=
33
fluid
.
default_startup_program
().
random_seed
=
seed
np
.
random
.
seed
(
seed
)
fluid
.
default_main_program
().
random_seed
=
seed
fluid
.
default_startup_program
().
random_seed
=
seed
max_epoch
=
1
fluid
.
default_main_program
().
random_seed
=
seed
ptb_model
=
PtbModel
(
max_epoch
=
1
hidden_size
=
hidden_size
,
ptb_model
=
PtbModel
(
vocab_size
=
vocab_size
,
hidden_size
=
hidden_size
,
num_layers
=
num_layers
,
vocab_size
=
vocab_size
,
num_steps
=
num_steps
,
num_layers
=
num_layers
,
init_scale
=
init_scale
,
num_steps
=
num_steps
,
dropout
=
dropout
)
init_scale
=
init_scale
,
dropout
=
dropout
)
if
args
.
init_from_pretrain_model
:
if
not
os
.
path
.
exists
(
args
.
init_from_pretrain_model
+
'.pdparams'
):
if
args
.
init_from_pretrain_model
:
print
(
args
.
init_from_pretrain_model
)
if
not
os
.
path
.
exists
(
args
.
init_from_pretrain_model
+
'.pdparams'
):
raise
Warning
(
"The pretrained params do not exist."
)
print
(
args
.
init_from_pretrain_model
)
return
raise
Warning
(
"The pretrained params do not exist."
)
fluid
.
load_dygraph
(
args
.
init_from_pretrain_model
)
return
print
(
"finish initing model from pretrained params from %s"
%
fluid
.
load_dygraph
(
args
.
init_from_pretrain_model
)
(
args
.
init_from_pretrain_model
))
print
(
"finish initing model from pretrained params from %s"
%
(
args
.
init_from_pretrain_model
))
dy_param_updated
=
dict
()
dy_param_init
=
dict
()
dy_param_updated
=
dict
()
dy_loss
=
None
dy_param_init
=
dict
()
last_hidden
=
None
dy_loss
=
None
last_cell
=
None
last_hidden
=
None
last_cell
=
None
data_path
=
args
.
data_path
print
(
"begin to load data"
)
data_path
=
args
.
data_path
ptb_data
=
reader
.
get_ptb_data
(
data_path
)
print
(
"begin to load data"
)
print
(
"finished load data"
)
ptb_data
=
reader
.
get_ptb_data
(
data_path
)
train_data
,
valid_data
,
test_data
=
ptb_data
print
(
"finished load data"
)
train_data
,
valid_data
,
test_data
=
ptb_data
batch_len
=
len
(
train_data
)
//
batch_size
total_batch_size
=
(
batch_len
-
1
)
//
num_steps
batch_len
=
len
(
train_data
)
//
batch_size
log_interval
=
200
total_batch_size
=
(
batch_len
-
1
)
//
num_steps
log_interval
=
200
bd
=
[]
lr_arr
=
[
1.0
]
bd
=
[]
for
i
in
range
(
1
,
max_epoch
):
lr_arr
=
[
1.0
]
bd
.
append
(
total_batch_size
*
i
)
for
i
in
range
(
1
,
max_epoch
):
new_lr
=
base_learning_rate
*
(
lr_decay
**
bd
.
append
(
total_batch_size
*
i
)
max
(
i
+
1
-
epoch_start_decay
,
0.0
))
new_lr
=
base_learning_rate
*
(
lr_decay
**
lr_arr
.
append
(
new_lr
)
max
(
i
+
1
-
epoch_start_decay
,
0.0
))
lr_arr
.
append
(
new_lr
)
grad_clip
=
fluid
.
clip
.
GradientClipByGlobalNorm
(
max_grad_norm
)
sgd
=
SGDOptimizer
(
grad_clip
=
fluid
.
clip
.
GradientClipByGlobalNorm
(
max_grad_norm
)
learning_rate
=
fluid
.
layers
.
piecewise_decay
(
boundaries
=
bd
,
values
=
lr_arr
),
sgd
=
SGDOptimizer
(
parameter_list
=
ptb_model
.
parameters
(),
learning_rate
=
fluid
.
layers
.
piecewise_decay
(
grad_clip
=
grad_clip
)
boundaries
=
bd
,
values
=
lr_arr
),
parameter_list
=
ptb_model
.
parameters
(),
def
eval
(
model
,
data
):
grad_clip
=
grad_clip
)
print
(
"begin to eval"
)
total_loss
=
0.0
def
reader_decorator
(
reader
):
iters
=
0.0
def
__reader__
():
init_hidden_data
=
np
.
zeros
(
for
item
in
reader
:
(
num_layers
,
batch_size
,
hidden_size
),
dtype
=
'float32'
)
x_data
=
item
[
0
].
reshape
((
-
1
,
num_steps
,
1
))
init_cell_data
=
np
.
zeros
(
y_data
=
item
[
1
].
reshape
((
-
1
,
num_steps
,
1
))
(
num_layers
,
batch_size
,
hidden_size
),
dtype
=
'float32'
)
yield
x_data
,
y_data
model
.
eval
()
return
__reader__
train_data_iter
=
reader
.
get_data_iter
(
data
,
batch_size
,
num_steps
)
for
batch_id
,
batch
in
enumerate
(
train_data_iter
):
def
eval
(
model
,
data
):
x_data
,
y_data
=
batch
print
(
"begin to eval"
)
x_data
=
x_data
.
reshape
((
-
1
,
num_steps
,
1
))
total_loss
=
0.0
y_data
=
y_data
.
reshape
((
-
1
,
num_steps
,
1
))
iters
=
0.0
x
=
to_variable
(
x_data
)
init_hidden_data
=
np
.
zeros
(
y
=
to_variable
(
y_data
)
(
num_layers
,
batch_size
,
hidden_size
),
dtype
=
'float32'
)
init_hidden
=
to_variable
(
init_hidden_data
)
init_cell_data
=
np
.
zeros
(
init_cell
=
to_variable
(
init_cell_data
)
(
num_layers
,
batch_size
,
hidden_size
),
dtype
=
'float32'
)
dy_loss
,
last_hidden
,
last_cell
=
ptb_model
(
x
,
y
,
init_hidden
,
init_cell
)
model
.
eval
()
train_data_iter
=
reader_decorator
(
out_loss
=
dy_loss
.
numpy
()
reader
.
get_data_iter
(
data
,
batch_size
,
num_steps
))
init_hidden_data
=
last_hidden
.
numpy
()
eval_data_loader
=
fluid
.
io
.
DataLoader
.
from_generator
(
capacity
=
200
)
init_cell_data
=
last_cell
.
numpy
()
eval_data_loader
.
set_batch_generator
(
train_data_iter
,
places
=
place
)
total_loss
+=
out_loss
for
batch_id
,
batch
in
enumerate
(
eval_data_loader
):
iters
+=
num_steps
x
,
y
=
batch
init_hidden
=
to_variable
(
init_hidden_data
)
print
(
"eval finished"
)
init_cell
=
to_variable
(
init_cell_data
)
ppl
=
np
.
exp
(
total_loss
/
iters
)
dy_loss
,
last_hidden
,
last_cell
=
ptb_model
(
x
,
y
,
init_hidden
,
print
(
"ppl "
,
batch_id
,
ppl
[
0
])
init_cell
)
ce_time
=
[]
out_loss
=
dy_loss
.
numpy
()
ce_ppl
=
[]
init_hidden_data
=
last_hidden
.
numpy
()
total_batch_num
=
0
#this is for benchmark
init_cell_data
=
last_cell
.
numpy
()
for
epoch_id
in
range
(
max_epoch
):
ptb_model
.
train
()
total_loss
+=
out_loss
total_loss
=
0.0
iters
+=
num_steps
iters
=
0.0
init_hidden_data
=
np
.
zeros
(
print
(
"eval finished"
)
(
num_layers
,
batch_size
,
hidden_size
),
dtype
=
'float32'
)
ppl
=
np
.
exp
(
total_loss
/
iters
)
init_cell_data
=
np
.
zeros
(
print
(
"ppl "
,
batch_id
,
ppl
[
0
])
(
num_layers
,
batch_size
,
hidden_size
),
dtype
=
'float32'
)
ce_time
=
[]
train_data_iter
=
reader
.
get_data_iter
(
train_data
,
batch_size
,
ce_ppl
=
[]
num_steps
)
init_hidden
=
to_variable
(
init_hidden_data
)
total_batch_num
=
0
#this is for benchmark
init_cell
=
to_variable
(
init_cell_data
)
for
epoch_id
in
range
(
max_epoch
):
start_time
=
time
.
time
()
ptb_model
.
train
()
for
batch_id
,
batch
in
enumerate
(
train_data_iter
):
total_loss
=
0.0
if
args
.
max_iter
and
total_batch_num
==
args
.
max_iter
:
iters
=
0.0
return
init_hidden_data
=
np
.
zeros
(
batch_start
=
time
.
time
()
(
num_layers
,
batch_size
,
hidden_size
),
dtype
=
'float32'
)
x_data
,
y_data
=
batch
init_cell_data
=
np
.
zeros
(
(
num_layers
,
batch_size
,
hidden_size
),
dtype
=
'float32'
)
x_data
=
x_data
.
reshape
((
-
1
,
num_steps
,
1
))
y_data
=
y_data
.
reshape
((
-
1
,
num_steps
,
1
))
train_data_iter
=
reader_decorator
(
reader
.
get_data_iter
(
train_data
,
batch_size
,
num_steps
))
x
=
to_variable
(
x_data
)
y
=
to_variable
(
y_data
)
train_data_loader
=
fluid
.
io
.
DataLoader
.
from_generator
(
capacity
=
200
)
train_data_loader
.
set_batch_generator
(
train_data_iter
,
places
=
place
)
dy_loss
,
last_hidden
,
last_cell
=
ptb_model
(
x
,
y
,
init_hidden
,
init_cell
)
init_hidden
=
to_variable
(
init_hidden_data
)
init_hidden
=
last_hidden
.
detach
()
init_cell
=
to_variable
(
init_cell_data
)
init_cell
=
last_cell
.
detach
()
start_time
=
time
.
time
()
out_loss
=
dy_loss
.
numpy
()
for
batch_id
,
batch
in
enumerate
(
train_data_loader
):
if
args
.
max_iter
and
total_batch_num
==
args
.
max_iter
:
dy_loss
.
backward
()
return
sgd
.
minimize
(
dy_loss
)
batch_start
=
time
.
time
()
x
,
y
=
batch
ptb_model
.
clear_gradients
()
total_loss
+=
out_loss
dy_loss
,
last_hidden
,
last_cell
=
ptb_model
(
x
,
y
,
init_hidden
,
batch_end
=
time
.
time
()
init_cell
)
train_batch_cost
=
batch_end
-
batch_start
init_hidden
=
last_hidden
.
detach
()
iters
+=
num_steps
init_cell
=
last_cell
.
detach
()
total_batch_num
=
total_batch_num
+
1
#this is for benchmark
out_loss
=
dy_loss
.
numpy
()
if
batch_id
>
0
and
batch_id
%
log_interval
==
0
:
dy_loss
.
backward
()
ppl
=
np
.
exp
(
total_loss
/
iters
)
sgd
.
minimize
(
dy_loss
)
print
(
"-- Epoch:[%d]; Batch:[%d]; ppl: %.5f, lr: %.5f, loss: %.5f, batch cost: %.5f"
%
(
epoch_id
,
batch_id
,
ppl
[
0
],
ptb_model
.
clear_gradients
()
sgd
.
_global_learning_rate
().
numpy
(),
out_loss
,
train_batch_cost
))
total_loss
+=
out_loss
batch_end
=
time
.
time
()
print
(
"one epoch finished"
,
epoch_id
)
train_batch_cost
=
batch_end
-
batch_start
print
(
"time cost "
,
time
.
time
()
-
start_time
)
iters
+=
num_steps
ppl
=
np
.
exp
(
total_loss
/
iters
)
total_batch_num
=
total_batch_num
+
1
#this is for benchmark
ce_time
.
append
(
time
.
time
()
-
start_time
)
ce_ppl
.
append
(
ppl
[
0
])
if
batch_id
>
0
and
batch_id
%
log_interval
==
0
:
print
(
"-- Epoch:[%d]; ppl: %.5f"
%
(
epoch_id
,
ppl
[
0
]))
ppl
=
np
.
exp
(
total_loss
/
iters
)
print
(
"-- Epoch:[%d]; Batch:[%d]; ppl: %.5f, lr: %.5f, loss: %.5f, batch cost: %.5f"
%
if
batch_size
<=
20
and
epoch_id
==
0
and
ppl
[
0
]
>
1000
:
(
epoch_id
,
batch_id
,
ppl
[
0
],
# for bad init, after first epoch, the loss is over 1000
sgd
.
_global_learning_rate
().
numpy
(),
out_loss
,
train_batch_cost
))
# no more need to continue
print
(
"Parameters are randomly initialized and not good this time because the loss is over 1000 after the first epoch."
)
print
(
"one epoch finished"
,
epoch_id
)
print
(
"Abort this training process and please start again."
)
print
(
"time cost "
,
time
.
time
()
-
start_time
)
return
ppl
=
np
.
exp
(
total_loss
/
iters
)
ce_time
.
append
(
time
.
time
()
-
start_time
)
save_model_dir
=
os
.
path
.
join
(
args
.
save_model_dir
,
ce_ppl
.
append
(
ppl
[
0
])
str
(
epoch_id
),
'params'
)
print
(
"-- Epoch:[%d]; ppl: %.5f"
%
(
epoch_id
,
ppl
[
0
]))
fluid
.
save_dygraph
(
ptb_model
.
state_dict
(),
save_model_dir
)
print
(
"Saved model to: %s.
\n
"
%
save_model_dir
)
if
batch_size
<=
20
and
epoch_id
==
0
and
ppl
[
0
]
>
1000
:
# for bad init, after first epoch, the loss is over 1000
eval
(
ptb_model
,
valid_data
)
# no more need to continue
print
(
if
args
.
ce
:
"Parameters are randomly initialized and not good this time because the loss is over 1000 after the first epoch."
_ppl
=
0
)
_time
=
0
print
(
"Abort this training process and please start again."
)
try
:
return
_time
=
ce_time
[
-
1
]
_ppl
=
ce_ppl
[
-
1
]
save_model_dir
=
os
.
path
.
join
(
args
.
save_model_dir
,
except
:
str
(
epoch_id
),
'params'
)
print
(
"ce info error"
)
fluid
.
save_dygraph
(
ptb_model
.
state_dict
(),
save_model_dir
)
print
(
"kpis
\t
train_duration_card%s
\t
%s"
%
(
dev_count
,
_time
))
print
(
"Saved model to: %s.
\n
"
%
save_model_dir
)
print
(
"kpis
\t
train_ppl_card%s
\t
%f"
%
(
dev_count
,
_ppl
))
eval
(
ptb_model
,
valid_data
)
eval
(
ptb_model
,
test_data
)
if
args
.
ce
:
train_ptb_lm
()
_ppl
=
0
_time
=
0
try
:
_time
=
ce_time
[
-
1
]
_ppl
=
ce_ppl
[
-
1
]
except
:
print
(
"ce info error"
)
print
(
"kpis
\t
train_duration_card%s
\t
%s"
%
(
dev_count
,
_time
))
print
(
"kpis
\t
train_ppl_card%s
\t
%f"
%
(
dev_count
,
_ppl
))
eval
(
ptb_model
,
test_data
)
train_ptb_lm
()
dygraph/resnet/train.py
浏览文件 @
b0239e3a
...
@@ -81,7 +81,6 @@ def optimizer_setting(parameter_list=None):
...
@@ -81,7 +81,6 @@ def optimizer_setting(parameter_list=None):
boundaries
=
bd
,
values
=
lr
),
boundaries
=
bd
,
values
=
lr
),
momentum
=
momentum_rate
,
momentum
=
momentum_rate
,
regularization
=
fluid
.
regularizer
.
L2Decay
(
l2_decay
))
regularization
=
fluid
.
regularizer
.
L2Decay
(
l2_decay
))
return
optimizer
return
optimizer
...
@@ -116,11 +115,7 @@ class ConvBNLayer(fluid.dygraph.Layer):
...
@@ -116,11 +115,7 @@ class ConvBNLayer(fluid.dygraph.Layer):
class
BottleneckBlock
(
fluid
.
dygraph
.
Layer
):
class
BottleneckBlock
(
fluid
.
dygraph
.
Layer
):
def
__init__
(
self
,
def
__init__
(
self
,
num_channels
,
num_filters
,
stride
,
shortcut
=
True
):
num_channels
,
num_filters
,
stride
,
shortcut
=
True
):
super
(
BottleneckBlock
,
self
).
__init__
()
super
(
BottleneckBlock
,
self
).
__init__
()
self
.
conv0
=
ConvBNLayer
(
self
.
conv0
=
ConvBNLayer
(
...
@@ -186,16 +181,9 @@ class ResNet(fluid.dygraph.Layer):
...
@@ -186,16 +181,9 @@ class ResNet(fluid.dygraph.Layer):
num_filters
=
[
64
,
128
,
256
,
512
]
num_filters
=
[
64
,
128
,
256
,
512
]
self
.
conv
=
ConvBNLayer
(
self
.
conv
=
ConvBNLayer
(
num_channels
=
3
,
num_channels
=
3
,
num_filters
=
64
,
filter_size
=
7
,
stride
=
2
,
act
=
'relu'
)
num_filters
=
64
,
filter_size
=
7
,
stride
=
2
,
act
=
'relu'
)
self
.
pool2d_max
=
Pool2D
(
self
.
pool2d_max
=
Pool2D
(
pool_size
=
3
,
pool_size
=
3
,
pool_stride
=
2
,
pool_padding
=
1
,
pool_type
=
'max'
)
pool_stride
=
2
,
pool_padding
=
1
,
pool_type
=
'max'
)
self
.
bottleneck_block_list
=
[]
self
.
bottleneck_block_list
=
[]
for
block
in
range
(
len
(
depth
)):
for
block
in
range
(
len
(
depth
)):
...
@@ -220,11 +208,12 @@ class ResNet(fluid.dygraph.Layer):
...
@@ -220,11 +208,12 @@ class ResNet(fluid.dygraph.Layer):
import
math
import
math
stdv
=
1.0
/
math
.
sqrt
(
2048
*
1.0
)
stdv
=
1.0
/
math
.
sqrt
(
2048
*
1.0
)
self
.
out
=
Linear
(
self
.
pool2d_avg_output
,
self
.
out
=
Linear
(
class_dim
,
self
.
pool2d_avg_output
,
act
=
'softmax'
,
class_dim
,
param_attr
=
fluid
.
param_attr
.
ParamAttr
(
act
=
'softmax'
,
initializer
=
fluid
.
initializer
.
Uniform
(
-
stdv
,
stdv
)))
param_attr
=
fluid
.
param_attr
.
ParamAttr
(
initializer
=
fluid
.
initializer
.
Uniform
(
-
stdv
,
stdv
)))
def
forward
(
self
,
inputs
):
def
forward
(
self
,
inputs
):
y
=
self
.
conv
(
inputs
)
y
=
self
.
conv
(
inputs
)
...
@@ -237,6 +226,16 @@ class ResNet(fluid.dygraph.Layer):
...
@@ -237,6 +226,16 @@ class ResNet(fluid.dygraph.Layer):
return
y
return
y
def
reader_decorator
(
reader
):
def
__reader__
():
for
item
in
reader
():
img
=
np
.
array
(
item
[
0
]).
astype
(
'float32'
).
reshape
(
3
,
224
,
224
)
label
=
np
.
array
(
item
[
1
]).
astype
(
'int64'
).
reshape
(
1
)
yield
img
,
label
return
__reader__
def
eval
(
model
,
data
):
def
eval
(
model
,
data
):
model
.
eval
()
model
.
eval
()
...
@@ -245,15 +244,8 @@ def eval(model, data):
...
@@ -245,15 +244,8 @@ def eval(model, data):
total_acc5
=
0.0
total_acc5
=
0.0
total_sample
=
0
total_sample
=
0
for
batch_id
,
data
in
enumerate
(
data
()):
for
batch_id
,
data
in
enumerate
(
data
()):
dy_x_data
=
np
.
array
(
img
=
data
[
0
]
[
x
[
0
].
reshape
(
3
,
224
,
224
)
for
x
in
data
]).
astype
(
'float32'
)
label
=
data
[
1
]
if
len
(
np
.
array
([
x
[
1
]
for
x
in
data
]).
astype
(
'int64'
))
!=
batch_size
:
continue
y_data
=
np
.
array
([
x
[
1
]
for
x
in
data
]).
astype
(
'int64'
).
reshape
(
batch_size
,
1
)
img
=
to_variable
(
dy_x_data
)
label
=
to_variable
(
y_data
)
label
.
stop_gradient
=
True
label
.
stop_gradient
=
True
out
=
model
(
img
)
out
=
model
(
img
)
...
@@ -303,13 +295,24 @@ def train_resnet():
...
@@ -303,13 +295,24 @@ def train_resnet():
resnet
=
fluid
.
dygraph
.
parallel
.
DataParallel
(
resnet
,
strategy
)
resnet
=
fluid
.
dygraph
.
parallel
.
DataParallel
(
resnet
,
strategy
)
train_reader
=
paddle
.
batch
(
train_reader
=
paddle
.
batch
(
paddle
.
dataset
.
flowers
.
train
(
use_xmap
=
False
),
batch_size
=
batch_size
)
reader_decorator
(
paddle
.
dataset
.
flowers
.
train
(
use_xmap
=
True
)),
batch_size
=
batch_size
,
drop_last
=
True
)
if
args
.
use_data_parallel
:
if
args
.
use_data_parallel
:
train_reader
=
fluid
.
contrib
.
reader
.
distributed_batch_reader
(
train_reader
=
fluid
.
contrib
.
reader
.
distributed_batch_reader
(
train_reader
)
train_reader
)
test_reader
=
paddle
.
batch
(
test_reader
=
paddle
.
batch
(
paddle
.
dataset
.
flowers
.
test
(
use_xmap
=
False
),
batch_size
=
batch_size
)
reader_decorator
(
paddle
.
dataset
.
flowers
.
test
(
use_xmap
=
True
)),
batch_size
=
batch_size
,
drop_last
=
True
)
train_loader
=
fluid
.
io
.
DataLoader
.
from_generator
(
capacity
=
10
)
train_loader
.
set_sample_list_generator
(
train_reader
,
places
=
place
)
test_loader
=
fluid
.
io
.
DataLoader
.
from_generator
(
capacity
=
10
)
test_loader
.
set_sample_list_generator
(
test_reader
,
places
=
place
)
#file_name = './model/epoch_0.npz'
#file_name = './model/epoch_0.npz'
#model_data = np.load( file_name )
#model_data = np.load( file_name )
...
@@ -331,23 +334,13 @@ def train_resnet():
...
@@ -331,23 +334,13 @@ def train_resnet():
print
(
"load finished"
)
print
(
"load finished"
)
for
batch_id
,
data
in
enumerate
(
train_reader
()):
for
batch_id
,
data
in
enumerate
(
train_loader
()):
#NOTE: used in benchmark
#NOTE: used in benchmark
if
args
.
max_iter
and
total_batch_num
==
args
.
max_iter
:
if
args
.
max_iter
and
total_batch_num
==
args
.
max_iter
:
return
return
batch_start
=
time
.
time
()
batch_start
=
time
.
time
()
dy_x_data
=
np
.
array
(
img
,
label
=
data
[
x
[
0
].
reshape
(
3
,
224
,
224
)
for
x
in
data
]).
astype
(
'float32'
)
if
len
(
np
.
array
([
x
[
1
]
for
x
in
data
]).
astype
(
'int64'
))
!=
batch_size
:
continue
y_data
=
np
.
array
([
x
[
1
]
for
x
in
data
]).
astype
(
'int64'
).
reshape
(
-
1
,
1
)
img
=
to_variable
(
dy_x_data
)
label
=
to_variable
(
y_data
)
label
.
stop_gradient
=
True
label
.
stop_gradient
=
True
out
=
resnet
(
img
)
out
=
resnet
(
img
)
...
@@ -390,16 +383,14 @@ def train_resnet():
...
@@ -390,16 +383,14 @@ def train_resnet():
(
eop
,
batch_id
,
total_loss
/
total_sample
,
\
(
eop
,
batch_id
,
total_loss
/
total_sample
,
\
total_acc1
/
total_sample
,
total_acc5
/
total_sample
))
total_acc1
/
total_sample
,
total_acc5
/
total_sample
))
resnet
.
eval
()
resnet
.
eval
()
eval
(
resnet
,
test_
re
ader
)
eval
(
resnet
,
test_
lo
ader
)
save_parameters
=
(
not
args
.
use_data_parallel
)
or
(
save_parameters
=
(
not
args
.
use_data_parallel
)
or
(
args
.
use_data_parallel
and
args
.
use_data_parallel
and
fluid
.
dygraph
.
parallel
.
Env
().
local_rank
==
0
)
fluid
.
dygraph
.
parallel
.
Env
().
local_rank
==
0
)
if
save_parameters
:
if
save_parameters
:
fluid
.
save_dygraph
(
resnet
.
state_dict
(),
fluid
.
save_dygraph
(
resnet
.
state_dict
(),
'resnet_params'
)
'resnet_params'
)
if
__name__
==
'__main__'
:
if
__name__
==
'__main__'
:
train_resnet
()
train_resnet
()
dygraph/se_resnet/train.py
浏览文件 @
b0239e3a
...
@@ -169,8 +169,7 @@ class BottleneckBlock(fluid.dygraph.Layer):
...
@@ -169,8 +169,7 @@ class BottleneckBlock(fluid.dygraph.Layer):
act
=
None
)
act
=
None
)
self
.
scale
=
SqueezeExcitation
(
self
.
scale
=
SqueezeExcitation
(
num_channels
=
num_filters
*
2
,
num_channels
=
num_filters
*
2
,
reduction_ratio
=
reduction_ratio
)
reduction_ratio
=
reduction_ratio
)
if
not
shortcut
:
if
not
shortcut
:
self
.
short
=
ConvBNLayer
(
self
.
short
=
ConvBNLayer
(
...
@@ -219,10 +218,7 @@ class SeResNeXt(fluid.dygraph.Layer):
...
@@ -219,10 +218,7 @@ class SeResNeXt(fluid.dygraph.Layer):
stride
=
2
,
stride
=
2
,
act
=
'relu'
)
act
=
'relu'
)
self
.
pool
=
Pool2D
(
self
.
pool
=
Pool2D
(
pool_size
=
3
,
pool_size
=
3
,
pool_stride
=
2
,
pool_padding
=
1
,
pool_type
=
'max'
)
pool_stride
=
2
,
pool_padding
=
1
,
pool_type
=
'max'
)
elif
layers
==
101
:
elif
layers
==
101
:
cardinality
=
32
cardinality
=
32
reduction_ratio
=
16
reduction_ratio
=
16
...
@@ -235,10 +231,7 @@ class SeResNeXt(fluid.dygraph.Layer):
...
@@ -235,10 +231,7 @@ class SeResNeXt(fluid.dygraph.Layer):
stride
=
2
,
stride
=
2
,
act
=
'relu'
)
act
=
'relu'
)
self
.
pool
=
Pool2D
(
self
.
pool
=
Pool2D
(
pool_size
=
3
,
pool_size
=
3
,
pool_stride
=
2
,
pool_padding
=
1
,
pool_type
=
'max'
)
pool_stride
=
2
,
pool_padding
=
1
,
pool_type
=
'max'
)
elif
layers
==
152
:
elif
layers
==
152
:
cardinality
=
64
cardinality
=
64
reduction_ratio
=
16
reduction_ratio
=
16
...
@@ -263,10 +256,7 @@ class SeResNeXt(fluid.dygraph.Layer):
...
@@ -263,10 +256,7 @@ class SeResNeXt(fluid.dygraph.Layer):
stride
=
1
,
stride
=
1
,
act
=
'relu'
)
act
=
'relu'
)
self
.
pool
=
Pool2D
(
self
.
pool
=
Pool2D
(
pool_size
=
3
,
pool_size
=
3
,
pool_stride
=
2
,
pool_padding
=
1
,
pool_type
=
'max'
)
pool_stride
=
2
,
pool_padding
=
1
,
pool_type
=
'max'
)
self
.
bottleneck_block_list
=
[]
self
.
bottleneck_block_list
=
[]
num_channels
=
64
num_channels
=
64
...
@@ -294,10 +284,11 @@ class SeResNeXt(fluid.dygraph.Layer):
...
@@ -294,10 +284,11 @@ class SeResNeXt(fluid.dygraph.Layer):
self
.
pool2d_avg_output
=
num_filters
[
len
(
num_filters
)
-
1
]
*
2
*
1
*
1
self
.
pool2d_avg_output
=
num_filters
[
len
(
num_filters
)
-
1
]
*
2
*
1
*
1
self
.
out
=
Linear
(
self
.
pool2d_avg_output
,
self
.
out
=
Linear
(
class_dim
,
self
.
pool2d_avg_output
,
param_attr
=
fluid
.
param_attr
.
ParamAttr
(
class_dim
,
initializer
=
fluid
.
initializer
.
Uniform
(
-
stdv
,
stdv
)))
param_attr
=
fluid
.
param_attr
.
ParamAttr
(
initializer
=
fluid
.
initializer
.
Uniform
(
-
stdv
,
stdv
)))
def
forward
(
self
,
inputs
):
def
forward
(
self
,
inputs
):
if
self
.
layers
==
50
or
self
.
layers
==
101
:
if
self
.
layers
==
50
or
self
.
layers
==
101
:
...
@@ -318,6 +309,16 @@ class SeResNeXt(fluid.dygraph.Layer):
...
@@ -318,6 +309,16 @@ class SeResNeXt(fluid.dygraph.Layer):
return
y
return
y
def
reader_decorator
(
reader
):
def
__reader__
():
for
item
in
reader
():
img
=
np
.
array
(
item
[
0
]).
astype
(
'float32'
).
reshape
(
3
,
224
,
224
)
label
=
np
.
array
(
item
[
1
]).
astype
(
'int64'
).
reshape
(
1
)
yield
img
,
label
return
__reader__
def
eval
(
model
,
data
):
def
eval
(
model
,
data
):
model
.
eval
()
model
.
eval
()
...
@@ -327,15 +328,7 @@ def eval(model, data):
...
@@ -327,15 +328,7 @@ def eval(model, data):
total_acc5
=
0.0
total_acc5
=
0.0
total_sample
=
0
total_sample
=
0
for
batch_id
,
data
in
enumerate
(
data
()):
for
batch_id
,
data
in
enumerate
(
data
()):
dy_x_data
=
np
.
array
(
img
,
label
=
data
[
x
[
0
].
reshape
(
3
,
224
,
224
)
for
x
in
data
]).
astype
(
'float32'
)
if
len
(
np
.
array
([
x
[
1
]
for
x
in
data
]).
astype
(
'int64'
))
!=
batch_size
:
continue
y_data
=
np
.
array
([
x
[
1
]
for
x
in
data
]).
astype
(
'int64'
).
reshape
(
batch_size
,
1
)
img
=
to_variable
(
dy_x_data
)
label
=
to_variable
(
y_data
)
label
.
stop_gradient
=
True
label
.
stop_gradient
=
True
out
=
model
(
img
)
out
=
model
(
img
)
...
@@ -389,29 +382,29 @@ def train():
...
@@ -389,29 +382,29 @@ def train():
se_resnext
=
fluid
.
dygraph
.
parallel
.
DataParallel
(
se_resnext
,
se_resnext
=
fluid
.
dygraph
.
parallel
.
DataParallel
(
se_resnext
,
strategy
)
strategy
)
train_reader
=
paddle
.
batch
(
train_reader
=
paddle
.
batch
(
paddle
.
dataset
.
flowers
.
train
(
use_xmap
=
False
),
reader_decorator
(
paddle
.
dataset
.
flowers
.
train
(
use_xmap
=
False
)
),
batch_size
=
batch_size
,
batch_size
=
batch_size
,
drop_last
=
True
)
drop_last
=
True
)
if
args
.
use_data_parallel
:
if
args
.
use_data_parallel
:
train_reader
=
fluid
.
contrib
.
reader
.
distributed_batch_reader
(
train_reader
=
fluid
.
contrib
.
reader
.
distributed_batch_reader
(
train_reader
)
train_reader
)
test_reader
=
paddle
.
batch
(
test_reader
=
paddle
.
batch
(
paddle
.
dataset
.
flowers
.
test
(
use_xmap
=
False
),
batch_size
=
32
)
reader_decorator
(
paddle
.
dataset
.
flowers
.
test
(
use_xmap
=
False
)),
batch_size
=
32
)
train_loader
=
fluid
.
io
.
DataLoader
.
from_generator
(
capacity
=
10
)
train_loader
.
set_sample_list_generator
(
train_reader
,
places
=
place
)
test_loader
=
fluid
.
io
.
DataLoader
.
from_generator
(
capacity
=
10
)
test_loader
.
set_sample_list_generator
(
test_reader
,
places
=
place
)
for
epoch_id
in
range
(
epoch_num
):
for
epoch_id
in
range
(
epoch_num
):
total_loss
=
0.0
total_loss
=
0.0
total_acc1
=
0.0
total_acc1
=
0.0
total_acc5
=
0.0
total_acc5
=
0.0
total_sample
=
0
total_sample
=
0
for
batch_id
,
data
in
enumerate
(
train_reader
()):
for
batch_id
,
data
in
enumerate
(
train_loader
()):
img
,
label
=
data
dy_x_data
=
np
.
array
([
x
[
0
].
reshape
(
3
,
224
,
224
)
for
x
in
data
]).
astype
(
'float32'
)
y_data
=
np
.
array
([
x
[
1
]
for
x
in
data
]).
astype
(
'int64'
).
reshape
(
batch_size
,
1
)
img
=
to_variable
(
dy_x_data
)
label
=
to_variable
(
y_data
)
label
.
stop_gradient
=
True
label
.
stop_gradient
=
True
out
=
se_resnext
(
img
)
out
=
se_resnext
(
img
)
...
@@ -454,7 +447,7 @@ def train():
...
@@ -454,7 +447,7 @@ def train():
(
epoch_id
,
batch_id
,
total_loss
/
total_sample
,
\
(
epoch_id
,
batch_id
,
total_loss
/
total_sample
,
\
total_acc1
/
total_sample
,
total_acc5
/
total_sample
))
total_acc1
/
total_sample
,
total_acc5
/
total_sample
))
se_resnext
.
eval
()
se_resnext
.
eval
()
eval
(
se_resnext
,
test_
re
ader
)
eval
(
se_resnext
,
test_
lo
ader
)
se_resnext
.
train
()
se_resnext
.
train
()
...
...
编辑
预览
Markdown
is supported
0%
请重试
或
添加新附件
.
添加附件
取消
You are about to add
0
people
to the discussion. Proceed with caution.
先完成此消息的编辑!
取消
想要评论请
注册
或
登录