Skip to content
体验新版
项目
组织
正在加载...
登录
切换导航
打开侧边栏
PaddlePaddle
models
提交
f258a876
M
models
项目概览
PaddlePaddle
/
models
大约 2 年 前同步成功
通知
232
Star
6828
Fork
2962
代码
文件
提交
分支
Tags
贡献者
分支图
Diff
Issue
602
列表
看板
标记
里程碑
合并请求
255
Wiki
0
Wiki
分析
仓库
DevOps
项目成员
Pages
M
models
项目概览
项目概览
详情
发布
仓库
仓库
文件
提交
分支
标签
贡献者
分支图
比较
Issue
602
Issue
602
列表
看板
标记
里程碑
合并请求
255
合并请求
255
Pages
分析
分析
仓库分析
DevOps
Wiki
0
Wiki
成员
成员
收起侧边栏
关闭侧边栏
动态
分支图
创建新Issue
提交
Issue看板
提交
f258a876
编写于
4月 13, 2018
作者:
B
buaawht
浏览文件
操作
浏览文件
下载
差异文件
Merge branch 'develop' of
https://github.com/PaddlePaddle/models
into new_method
上级
1af463bf
26b3788b
变更
31
隐藏空白更改
内联
并排
Showing
31 changed file
with
1414 addition
and
647 deletion
+1414
-647
.gitignore
.gitignore
+1
-0
.travis.yml
.travis.yml
+1
-1
fluid/DeepASR/tools/profile.py
fluid/DeepASR/tools/profile.py
+1
-1
fluid/DeepASR/train.py
fluid/DeepASR/train.py
+1
-1
fluid/image_classification/caffe2fluid/README.md
fluid/image_classification/caffe2fluid/README.md
+5
-5
fluid/image_classification/caffe2fluid/examples/imagenet/compare.py
...e_classification/caffe2fluid/examples/imagenet/compare.py
+85
-0
fluid/image_classification/caffe2fluid/examples/imagenet/diff.sh
...mage_classification/caffe2fluid/examples/imagenet/diff.sh
+64
-0
fluid/image_classification/caffe2fluid/examples/imagenet/infer.py
...age_classification/caffe2fluid/examples/imagenet/infer.py
+67
-14
fluid/image_classification/caffe2fluid/examples/imagenet/run.sh
...image_classification/caffe2fluid/examples/imagenet/run.sh
+7
-2
fluid/image_classification/caffe2fluid/kaffe/graph.py
fluid/image_classification/caffe2fluid/kaffe/graph.py
+4
-1
fluid/image_classification/caffe2fluid/kaffe/paddle/network.py
.../image_classification/caffe2fluid/kaffe/paddle/network.py
+12
-35
fluid/image_classification/caffe2fluid/kaffe/paddle/transformer.py
...ge_classification/caffe2fluid/kaffe/paddle/transformer.py
+8
-2
fluid/image_classification/se_resnext.py
fluid/image_classification/se_resnext.py
+0
-166
fluid/image_classification/train.py
fluid/image_classification/train.py
+311
-0
fluid/image_classification/utility.py
fluid/image_classification/utility.py
+62
-0
fluid/neural_machine_translation/transformer/config.py
fluid/neural_machine_translation/transformer/config.py
+21
-16
fluid/neural_machine_translation/transformer/infer.py
fluid/neural_machine_translation/transformer/infer.py
+139
-59
fluid/neural_machine_translation/transformer/model.py
fluid/neural_machine_translation/transformer/model.py
+81
-64
fluid/neural_machine_translation/transformer/train.py
fluid/neural_machine_translation/transformer/train.py
+69
-46
fluid/object_detection/.gitignore
fluid/object_detection/.gitignore
+8
-0
fluid/object_detection/data/pascalvoc/create_list.py
fluid/object_detection/data/pascalvoc/create_list.py
+2
-1
fluid/object_detection/data/pascalvoc/download.sh
fluid/object_detection/data/pascalvoc/download.sh
+16
-0
fluid/object_detection/data/pascalvoc/label_list
fluid/object_detection/data/pascalvoc/label_list
+0
-0
fluid/object_detection/image_util.py
fluid/object_detection/image_util.py
+5
-6
fluid/object_detection/load_model.py
fluid/object_detection/load_model.py
+0
-67
fluid/object_detection/mobilenet_ssd.py
fluid/object_detection/mobilenet_ssd.py
+4
-8
fluid/object_detection/pretrained/download_coco.sh
fluid/object_detection/pretrained/download_coco.sh
+8
-0
fluid/object_detection/pretrained/download_imagenet.sh
fluid/object_detection/pretrained/download_imagenet.sh
+8
-0
fluid/object_detection/reader.py
fluid/object_detection/reader.py
+210
-94
fluid/object_detection/train.py
fluid/object_detection/train.py
+205
-45
fluid/policy_gradient/brain.py
fluid/policy_gradient/brain.py
+9
-13
未找到文件。
.gitignore
浏览文件 @
f258a876
.DS_Store
.DS_Store
*.pyc
*.pyc
.*~
.travis.yml
浏览文件 @
f258a876
...
@@ -17,7 +17,7 @@ addons:
...
@@ -17,7 +17,7 @@ addons:
-
python-pip
-
python-pip
-
python2.7-dev
-
python2.7-dev
-
clang-format-3.8
-
clang-format-3.8
ssh_known_hosts
:
52.76.173.135
ssh_known_hosts
:
13.229.163.131
before_install
:
before_install
:
-
if [[ "$JOB" == "PRE_COMMIT" ]]; then sudo ln -s /usr/bin/clang-format-3.8 /usr/bin/clang-format; fi
-
if [[ "$JOB" == "PRE_COMMIT" ]]; then sudo ln -s /usr/bin/clang-format-3.8 /usr/bin/clang-format; fi
-
sudo pip install -U virtualenv pre-commit pip
-
sudo pip install -U virtualenv pre-commit pip
...
...
fluid/DeepASR/tools/profile.py
浏览文件 @
f258a876
...
@@ -168,7 +168,7 @@ def profile(args):
...
@@ -168,7 +168,7 @@ def profile(args):
start_time
=
time
.
time
()
start_time
=
time
.
time
()
frames_seen
=
0
frames_seen
=
0
# load_data
# load_data
(
features
,
labels
,
lod
)
=
batch_data
(
features
,
labels
,
lod
,
_
)
=
batch_data
feature_t
.
set
(
features
,
place
)
feature_t
.
set
(
features
,
place
)
feature_t
.
set_lod
([
lod
])
feature_t
.
set_lod
([
lod
])
label_t
.
set
(
labels
,
place
)
label_t
.
set
(
labels
,
place
)
...
...
fluid/DeepASR/train.py
浏览文件 @
f258a876
...
@@ -192,7 +192,7 @@ def train(args):
...
@@ -192,7 +192,7 @@ def train(args):
test_data_reader
.
batch_iterator
(
args
.
batch_size
,
test_data_reader
.
batch_iterator
(
args
.
batch_size
,
args
.
minimum_batch_size
)):
args
.
minimum_batch_size
)):
# load_data
# load_data
(
features
,
labels
,
lod
)
=
batch_data
(
features
,
labels
,
lod
,
_
)
=
batch_data
feature_t
.
set
(
features
,
place
)
feature_t
.
set
(
features
,
place
)
feature_t
.
set_lod
([
lod
])
feature_t
.
set_lod
([
lod
])
label_t
.
set
(
labels
,
place
)
label_t
.
set
(
labels
,
place
)
...
...
fluid/image_classification/caffe2fluid/README.md
浏览文件 @
f258a876
...
@@ -18,19 +18,19 @@ This tool is used to convert a Caffe model to Fluid model
...
@@ -18,19 +18,19 @@ This tool is used to convert a Caffe model to Fluid model
### Tested models
### Tested models
-
Lenet
on mnist dataset
-
Lenet
-
ResNets:(ResNet-50, ResNet-101, ResNet-152)
-
ResNets:(ResNet-50, ResNet-101, ResNet-152)
model addr:
`https://onedrive.live.com/?authkey=%21AAFW2-FVoxeVRck&id=4006CBB8476FF777%2117887&cid=4006CBB8476FF777`
_
[
model addr
](
https://onedrive.live.com/?authkey=%21AAFW2-FVoxeVRck&id=4006CBB8476FF777%2117887&cid=4006CBB8476FF777
)
-
GoogleNet:
-
GoogleNet:
model addr:
`https://gist.github.com/jimmie33/7ea9f8ac0da259866b854460f4526034`
_
[
model addr
](
https://gist.github.com/jimmie33/7ea9f8ac0da259866b854460f4526034
)
-
VGG:
-
VGG:
model addr:
`https://gist.github.com/ksimonyan/211839e770f7b538e2d8`
_
[
model addr
](
https://gist.github.com/ksimonyan/211839e770f7b538e2d8
)
-
AlexNet:
-
AlexNet:
model addr:
`https://github.com/BVLC/caffe/tree/master/models/bvlc_alexnet`
_
[
model addr
](
https://github.com/BVLC/caffe/tree/master/models/bvlc_alexnet
)
### Notes
### Notes
Some of this code come from here: https://github.com/ethereon/caffe-tensorflow
Some of this code come from here: https://github.com/ethereon/caffe-tensorflow
fluid/image_classification/caffe2fluid/examples/imagenet/compare.py
0 → 100644
浏览文件 @
f258a876
#!/usr/bin/python
#
#a tool to compare tensors in two files or two directories
#
import
sys
import
os
def
walk_dir
(
rootdir
):
for
subdir
,
dirs
,
files
in
os
.
walk
(
rootdir
):
for
file
in
files
:
yield
file
def
calc_diff
(
f1
,
f2
):
import
numpy
as
np
d1
=
np
.
load
(
f1
).
flatten
()
d2
=
np
.
load
(
f2
).
flatten
()
d1_num
=
reduce
(
lambda
x
,
y
:
x
*
y
,
d1
.
shape
)
d2_num
=
reduce
(
lambda
x
,
y
:
x
*
y
,
d2
.
shape
)
if
d1_num
!=
d2_num
:
print
d1
.
shape
print
d2
.
shape
assert
(
d1_num
==
d2_num
),
"their shape is not consistent"
try
:
df
=
np
.
abs
(
d1
-
d2
)
max_df
=
np
.
max
(
df
)
sq_df
=
np
.
mean
(
df
*
df
)
return
max_df
,
sq_df
except
Exception
as
e
:
return
-
1.0
,
-
1.0
def
compare
(
path1
,
path2
):
def
diff
(
f1
,
f2
):
max_df
,
sq_df
=
calc_diff
(
f1
,
f2
)
print
(
'compare %s <=> %s with result[max_df:%.4e, sq_df:%.4e]'
%
(
f1
,
f2
,
max_df
,
sq_df
))
assert
(
max_df
<
1e-5
),
\
'max_df is too large with value[%.6e]'
%
(
max_df
)
assert
(
sq_df
<
1e-10
),
\
'sq_df is too large with value[%.6e]'
%
(
sq_df
)
if
os
.
path
.
exists
(
path1
)
is
False
:
print
(
'not found %s'
%
(
path1
))
return
1
elif
os
.
path
.
exists
(
path2
)
is
False
:
print
(
'not found %s'
%
(
path2
))
return
1
if
path1
.
find
(
'.npy'
)
>
0
and
path2
.
find
(
'.npy'
)
>
0
:
diff
(
path1
,
path2
)
return
for
f
in
walk_dir
(
path2
):
if
f
.
find
(
'.npy'
)
<
0
:
continue
f1
=
os
.
path
.
join
(
path1
,
f
)
f2
=
os
.
path
.
join
(
path2
,
f
)
diff
(
f1
,
f2
)
print
(
'all checking succeed to pass'
)
return
0
if
__name__
==
"__main__"
:
if
len
(
sys
.
argv
)
==
1
:
path1
=
'lenet.tf/results'
path2
=
'lenet.paddle/results'
elif
len
(
sys
.
argv
)
==
3
:
path1
=
sys
.
argv
[
1
]
path2
=
sys
.
argv
[
2
]
else
:
print
(
'usage:'
)
print
(
' %s [path1] [path2]'
%
(
sys
.
argv
[
0
]))
exit
(
1
)
print
(
'compare inner result in %s %s'
%
(
path1
,
path2
))
exit
(
compare
(
path1
,
path2
))
fluid/image_classification/caffe2fluid/examples/imagenet/diff.sh
0 → 100644
浏览文件 @
f258a876
#!/bin/bash
#
#function:
# a tool used to check the difference of models' results generated by caffe model and paddle model
#
#howto:
# bash diff.sh resnet50 #when this has been finished, you can get the difference in precision
#
#notes:
# 0, in order to infer using caffe, we need pycaffe installed
# 1, prepare your caffe model in 'models.caffe/', eg: 'model.caffe/resnet101/resnet101.[prototxt|caffemodel]'
# 2, converted paddle model will be in 'models'
# 3, results of layers will be stored in 'results/${model_name}.[paddle|caffe]'
# 4, only the last layer will be checked by default
model_name
=
"resnet50"
results_root
=
"results/"
if
[[
-n
$1
]]
;
then
if
[
$1
=
"-h"
]
;
then
echo
"usage:"
echo
" bash
$0
[model_name]"
echo
" eg:bash
$0
resnet50"
exit
0
fi
model_name
=
$1
fi
mkdir
-p
$results_root
model_prototxt
=
"models.caffe/
$model_name
/
${
model_name
}
.prototxt"
model_caffemodel
=
"models.caffe/
${
model_name
}
/
${
model_name
}
.caffemodel"
#1, dump layers' results from paddle
paddle_results
=
"
$results_root
/
${
model_name
}
.paddle"
rm
-rf
$paddle_results
rm
-rf
"results.paddle"
bash run.sh
$model_name
./models.caffe/
$model_name
./models/
$model_name
if
[[
$?
-ne
0
]]
||
[[
!
-e
"results.paddle"
]]
;
then
echo
"not found paddle's results, maybe failed to convert"
exit
1
fi
mv
results.paddle
$paddle_results
#2, dump layers' results from caffe
caffe_results
=
"
$results_root
/
${
model_name
}
.caffe"
rm
-rf
$caffe_results
rm
-rf
"results.caffe"
cfpython ./infer.py caffe
$model_prototxt
$model_caffemodel
$paddle_results
/data.npy
if
[[
$?
-ne
0
]]
||
[[
!
-e
"results.caffe"
]]
;
then
echo
"not found caffe's results, maybe failed to do inference with caffe"
exit
1
fi
mv
results.caffe
$caffe_results
#3, extract layer names
cat
$model_prototxt
|
grep
name | perl
-ne
'if(/^\s*name:\s+\"([^\"]+)/){ print $1."\n";}'
>
.layer_names
#4, compare one by one
for
i
in
$(
cat
".layer_names"
|
tail
-n1
)
;
do
echo
"process
$i
"
python compare.py
$caffe_results
/
${
i
}
.npy
$paddle_results
/
${
i
}
.npy
done
fluid/image_classification/caffe2fluid/examples/imagenet/infer.py
浏览文件 @
f258a876
...
@@ -10,8 +10,11 @@ import os
...
@@ -10,8 +10,11 @@ import os
import
sys
import
sys
import
inspect
import
inspect
import
numpy
as
np
import
numpy
as
np
import
paddle.v2
as
paddle
import
paddle.v2.fluid
as
fluid
def
import_fluid
():
import
paddle.fluid
as
fluid
return
fluid
def
load_data
(
imgfile
,
shape
):
def
load_data
(
imgfile
,
shape
):
...
@@ -52,8 +55,10 @@ def build_model(net_file, net_name):
...
@@ -52,8 +55,10 @@ def build_model(net_file, net_name):
print
(
e
)
print
(
e
)
return
None
return
None
input_name
=
'data'
fluid
=
import_fluid
()
input_shape
=
MyNet
.
input_shapes
()[
input_name
]
inputs_dict
=
MyNet
.
input_shapes
()
input_name
=
inputs_dict
.
keys
()[
0
]
input_shape
=
inputs_dict
[
input_name
]
images
=
fluid
.
layers
.
data
(
name
=
'image'
,
shape
=
input_shape
,
dtype
=
'float32'
)
images
=
fluid
.
layers
.
data
(
name
=
'image'
,
shape
=
input_shape
,
dtype
=
'float32'
)
#label = fluid.layers.data(name='label', shape=[1], dtype='int64')
#label = fluid.layers.data(name='label', shape=[1], dtype='int64')
...
@@ -64,7 +69,7 @@ def build_model(net_file, net_name):
...
@@ -64,7 +69,7 @@ def build_model(net_file, net_name):
def
dump_results
(
results
,
names
,
root
):
def
dump_results
(
results
,
names
,
root
):
if
os
.
path
.
exists
(
root
)
is
False
:
if
os
.
path
.
exists
(
root
)
is
False
:
os
.
path
.
mkdir
(
root
)
os
.
mkdir
(
root
)
for
i
in
range
(
len
(
names
)):
for
i
in
range
(
len
(
names
)):
n
=
names
[
i
]
n
=
names
[
i
]
...
@@ -73,9 +78,12 @@ def dump_results(results, names, root):
...
@@ -73,9 +78,12 @@ def dump_results(results, names, root):
np
.
save
(
filename
+
'.npy'
,
res
)
np
.
save
(
filename
+
'.npy'
,
res
)
def
infer
(
net_file
,
net_name
,
model_file
,
imgfile
,
debug
=
Fals
e
):
def
infer
(
net_file
,
net_name
,
model_file
,
imgfile
,
debug
=
Tru
e
):
""" do inference using a model which consist 'xxx.py' and 'xxx.npy'
""" do inference using a model which consist 'xxx.py' and 'xxx.npy'
"""
"""
fluid
=
import_fluid
()
#1, build model
#1, build model
net
,
input_shape
=
build_model
(
net_file
,
net_name
)
net
,
input_shape
=
build_model
(
net_file
,
net_name
)
prediction
=
net
.
get_output
()
prediction
=
net
.
get_output
()
...
@@ -109,34 +117,79 @@ def infer(net_file, net_name, model_file, imgfile, debug=False):
...
@@ -109,34 +117,79 @@ def infer(net_file, net_name, model_file, imgfile, debug=False):
fetch_list
=
fetch_list_var
)
fetch_list
=
fetch_list_var
)
if
debug
is
True
:
if
debug
is
True
:
dump_path
=
'results.
layers
'
dump_path
=
'results.
paddle
'
dump_results
(
results
,
fetch_list_name
,
dump_path
)
dump_results
(
results
,
fetch_list_name
,
dump_path
)
print
(
'all results dumped to [%s]'
%
(
dump_path
))
print
(
'all result
of layer
s dumped to [%s]'
%
(
dump_path
))
else
:
else
:
result
=
results
[
0
]
result
=
results
[
0
]
print
(
'predicted class:'
,
np
.
argmax
(
result
))
print
(
'predicted class:'
,
np
.
argmax
(
result
))
return
0
def
caffe_infer
(
prototxt
,
caffemodel
,
datafile
):
""" do inference using pycaffe for debug,
all intermediate results will be dumpped to 'results.caffe'
"""
import
caffe
net
=
caffe
.
Net
(
prototxt
,
caffemodel
,
caffe
.
TEST
)
input_layer
=
net
.
blobs
.
keys
()[
0
]
print
(
'got name of input layer is:%s'
%
(
input_layer
))
input_shape
=
list
(
net
.
blobs
[
input_layer
].
data
.
shape
[
1
:])
if
'.npy'
in
datafile
:
np_images
=
np
.
load
(
datafile
)
else
:
np_images
=
load_data
(
datafile
,
input_shape
)
inputs
=
{
input_layer
:
np_images
}
net
.
forward_all
(
**
inputs
)
results
=
[]
names
=
[]
for
k
,
v
in
net
.
blobs
.
items
():
k
=
k
.
rstrip
(
'_output'
)
k
=
k
.
replace
(
'/'
,
'_'
)
names
.
append
(
k
)
results
.
append
(
v
.
data
.
copy
())
dump_path
=
'results.caffe'
dump_results
(
results
,
names
,
dump_path
)
print
(
'all result of layers dumped to [%s]'
%
(
dump_path
))
return
0
if
__name__
==
"__main__"
:
if
__name__
==
"__main__"
:
""" maybe more convenient to use 'run.sh' to call this tool
""" maybe more convenient to use 'run.sh' to call this tool
"""
"""
net_file
=
'models/resnet50/resnet50.py'
net_file
=
'models/resnet50/resnet50.py'
weight_file
=
'models/resnet50/resnet50.npy'
weight_file
=
'models/resnet50/resnet50.npy'
img
file
=
'data/65.jpeg'
data
file
=
'data/65.jpeg'
net_name
=
'ResNet50'
net_name
=
'ResNet50'
argc
=
len
(
sys
.
argv
)
argc
=
len
(
sys
.
argv
)
if
argc
==
5
:
if
sys
.
argv
[
1
]
==
'caffe'
:
if
len
(
sys
.
argv
)
!=
5
:
print
(
'usage:'
)
print
(
'
\t
python %s caffe [prototxt] [caffemodel] [datafile]'
%
(
sys
.
argv
[
0
]))
sys
.
exit
(
1
)
prototxt
=
sys
.
argv
[
2
]
caffemodel
=
sys
.
argv
[
3
]
datafile
=
sys
.
argv
[
4
]
sys
.
exit
(
caffe_infer
(
prototxt
,
caffemodel
,
datafile
))
elif
argc
==
5
:
net_file
=
sys
.
argv
[
1
]
net_file
=
sys
.
argv
[
1
]
weight_file
=
sys
.
argv
[
2
]
weight_file
=
sys
.
argv
[
2
]
img
file
=
sys
.
argv
[
3
]
data
file
=
sys
.
argv
[
3
]
net_name
=
sys
.
argv
[
4
]
net_name
=
sys
.
argv
[
4
]
elif
argc
>
1
:
elif
argc
>
1
:
print
(
'usage:'
)
print
(
'usage:'
)
print
(
'
\t
python %s [net_file] [weight_file] [
img
file] [net_name]'
%
print
(
'
\t
python %s [net_file] [weight_file] [
data
file] [net_name]'
%
(
sys
.
argv
[
0
]))
(
sys
.
argv
[
0
]))
print
(
'
\t
eg:python %s %s %s %s %s'
%
(
sys
.
argv
[
0
],
net_file
,
print
(
'
\t
eg:python %s %s %s %s %s'
%
(
sys
.
argv
[
0
],
net_file
,
weight_file
,
img
file
,
net_name
))
weight_file
,
data
file
,
net_name
))
sys
.
exit
(
1
)
sys
.
exit
(
1
)
infer
(
net_file
,
net_name
,
weight_file
,
img
file
)
infer
(
net_file
,
net_name
,
weight_file
,
data
file
)
fluid/image_classification/caffe2fluid/examples/imagenet/run.sh
浏览文件 @
f258a876
...
@@ -3,7 +3,7 @@
...
@@ -3,7 +3,7 @@
#function:
#function:
# a tool used to:
# a tool used to:
# 1, convert a caffe model
# 1, convert a caffe model
# 2, do inference using this model
# 2, do inference
(only in fluid)
using this model
#
#
#usage:
#usage:
# bash run.sh resnet50 ./models.caffe/resnet50 ./models/resnet50
# bash run.sh resnet50 ./models.caffe/resnet50 ./models/resnet50
...
@@ -65,7 +65,12 @@ if [[ -z $only_convert ]];then
...
@@ -65,7 +65,12 @@ if [[ -z $only_convert ]];then
PYTHON
=
`
which python
`
PYTHON
=
`
which python
`
fi
fi
imgfile
=
"data/65.jpeg"
imgfile
=
"data/65.jpeg"
net_name
=
`
grep
"name"
$proto_file
|
head
-n1
| perl
-ne
'if(/\"([^\"]+)\"/){ print $1."\n";}'
`
#FIX ME:
# only look the first line in prototxt file for the name of this network, maybe not correct
net_name
=
`
grep
"name"
$proto_file
|
head
-n1
| perl
-ne
'if(/^\s*name\s*:\s*\"([^\"]+)\"/){ print $1."\n";}'
`
if
[[
-z
$net_name
]]
;
then
net_name
=
"MyNet"
fi
$PYTHON
./infer.py
$net_file
$weight_file
$imgfile
$net_name
$PYTHON
./infer.py
$net_file
$weight_file
$imgfile
$net_name
ret
=
$?
ret
=
$?
fi
fi
...
...
fluid/image_classification/caffe2fluid/kaffe/graph.py
浏览文件 @
f258a876
...
@@ -52,7 +52,10 @@ class Graph(object):
...
@@ -52,7 +52,10 @@ class Graph(object):
def
__init__
(
self
,
nodes
=
None
,
name
=
None
):
def
__init__
(
self
,
nodes
=
None
,
name
=
None
):
self
.
nodes
=
nodes
or
[]
self
.
nodes
=
nodes
or
[]
self
.
node_lut
=
{
node
.
name
:
node
for
node
in
self
.
nodes
}
self
.
node_lut
=
{
node
.
name
:
node
for
node
in
self
.
nodes
}
self
.
name
=
name
if
name
is
None
or
name
==
''
:
self
.
name
=
'MyNet'
else
:
self
.
name
=
name
def
add_node
(
self
,
node
):
def
add_node
(
self
,
node
):
self
.
nodes
.
append
(
node
)
self
.
nodes
.
append
(
node
)
...
...
fluid/image_classification/caffe2fluid/kaffe/paddle/network.py
浏览文件 @
f258a876
...
@@ -4,7 +4,7 @@ import numpy as np
...
@@ -4,7 +4,7 @@ import numpy as np
def
import_fluid
():
def
import_fluid
():
import
paddle.
v2.
fluid
as
fluid
import
paddle.fluid
as
fluid
return
fluid
return
fluid
...
@@ -64,7 +64,7 @@ class Network(object):
...
@@ -64,7 +64,7 @@ class Network(object):
if
os
.
path
.
isdir
(
data_path
):
if
os
.
path
.
isdir
(
data_path
):
assert
(
exe
is
not
None
),
\
assert
(
exe
is
not
None
),
\
'must provide a executor to load fluid model'
'must provide a executor to load fluid model'
fluid
.
io
.
load_persistables
_if_exist
(
executor
=
exe
,
dirname
=
data_path
)
fluid
.
io
.
load_persistables
(
executor
=
exe
,
dirname
=
data_path
)
return
True
return
True
#load model from a npy file
#load model from a npy file
...
@@ -161,56 +161,28 @@ class Network(object):
...
@@ -161,56 +161,28 @@ class Network(object):
output
=
fluid
.
layers
.
relu
(
x
=
input
)
output
=
fluid
.
layers
.
relu
(
x
=
input
)
return
output
return
output
def
_adjust_pad_if_needed
(
self
,
i_hw
,
k_hw
,
s_hw
,
p_hw
):
#adjust the padding if needed
i_h
,
i_w
=
i_hw
k_h
,
k_w
=
k_hw
s_h
,
s_w
=
s_hw
p_h
,
p_w
=
p_hw
def
is_consistent
(
i
,
k
,
s
,
p
):
o
=
i
+
2
*
p
-
k
if
o
%
s
==
0
:
return
True
else
:
return
False
real_p_h
=
0
real_p_w
=
0
if
is_consistent
(
i_h
,
k_h
,
s_h
,
p_h
)
is
False
:
real_p_h
=
int
(
k_h
/
2
)
if
is_consistent
(
i_w
,
k_w
,
s_w
,
p_w
)
is
False
:
real_p_w
=
int
(
k_w
/
2
)
return
[
real_p_h
,
real_p_w
]
def
pool
(
self
,
pool_type
,
input
,
k_h
,
k_w
,
s_h
,
s_w
,
name
,
padding
):
def
pool
(
self
,
pool_type
,
input
,
k_h
,
k_w
,
s_h
,
s_w
,
name
,
padding
):
# Get the number of channels in the input
# Get the number of channels in the input
in_hw
=
input
.
shape
[
2
:]
in_hw
=
input
.
shape
[
2
:]
k_hw
=
[
k_h
,
k_w
]
k_hw
=
[
k_h
,
k_w
]
s_hw
=
[
s_h
,
s_w
]
s_hw
=
[
s_h
,
s_w
]
if
padding
is
None
:
#fix bug about the difference between conv and pool
#more info: https://github.com/BVLC/caffe/issues/1318
padding
=
self
.
_adjust_pad_if_needed
(
in_hw
,
k_hw
,
s_hw
,
[
0
,
0
])
fluid
=
import_fluid
()
fluid
=
import_fluid
()
output
=
fluid
.
layers
.
pool2d
(
output
=
fluid
.
layers
.
pool2d
(
input
=
input
,
input
=
input
,
pool_size
=
k_hw
,
pool_size
=
k_hw
,
pool_stride
=
s_hw
,
pool_stride
=
s_hw
,
pool_padding
=
padding
,
pool_padding
=
padding
,
ceil_mode
=
True
,
pool_type
=
pool_type
)
pool_type
=
pool_type
)
return
output
return
output
@
layer
@
layer
def
max_pool
(
self
,
input
,
k_h
,
k_w
,
s_h
,
s_w
,
name
,
padding
=
None
):
def
max_pool
(
self
,
input
,
k_h
,
k_w
,
s_h
,
s_w
,
name
,
padding
=
[
0
,
0
]
):
return
self
.
pool
(
'max'
,
input
,
k_h
,
k_w
,
s_h
,
s_w
,
name
,
padding
)
return
self
.
pool
(
'max'
,
input
,
k_h
,
k_w
,
s_h
,
s_w
,
name
,
padding
)
@
layer
@
layer
def
avg_pool
(
self
,
input
,
k_h
,
k_w
,
s_h
,
s_w
,
name
,
padding
=
None
):
def
avg_pool
(
self
,
input
,
k_h
,
k_w
,
s_h
,
s_w
,
name
,
padding
=
[
0
,
0
]
):
return
self
.
pool
(
'avg'
,
input
,
k_h
,
k_w
,
s_h
,
s_w
,
name
,
padding
)
return
self
.
pool
(
'avg'
,
input
,
k_h
,
k_w
,
s_h
,
s_w
,
name
,
padding
)
@
layer
@
layer
...
@@ -258,7 +230,12 @@ class Network(object):
...
@@ -258,7 +230,12 @@ class Network(object):
return
output
return
output
@
layer
@
layer
def
batch_normalization
(
self
,
input
,
name
,
scale_offset
=
True
,
relu
=
False
):
def
batch_normalization
(
self
,
input
,
name
,
scale_offset
=
True
,
eps
=
1e-5
,
relu
=
False
):
# NOTE: Currently, only inference is supported
# NOTE: Currently, only inference is supported
fluid
=
import_fluid
()
fluid
=
import_fluid
()
prefix
=
name
+
'_'
prefix
=
name
+
'_'
...
@@ -276,7 +253,7 @@ class Network(object):
...
@@ -276,7 +253,7 @@ class Network(object):
bias_attr
=
bias_attr
,
bias_attr
=
bias_attr
,
moving_mean_name
=
mean_name
,
moving_mean_name
=
mean_name
,
moving_variance_name
=
variance_name
,
moving_variance_name
=
variance_name
,
epsilon
=
1e-5
,
epsilon
=
eps
,
act
=
'relu'
if
relu
is
True
else
None
)
act
=
'relu'
if
relu
is
True
else
None
)
return
output
return
output
...
...
fluid/image_classification/caffe2fluid/kaffe/paddle/transformer.py
浏览文件 @
f258a876
...
@@ -142,7 +142,13 @@ class TensorFlowMapper(NodeMapper):
...
@@ -142,7 +142,13 @@ class TensorFlowMapper(NodeMapper):
def
map_batch_norm
(
self
,
node
):
def
map_batch_norm
(
self
,
node
):
scale_offset
=
len
(
node
.
data
)
==
4
scale_offset
=
len
(
node
.
data
)
==
4
kwargs
=
{}
if
scale_offset
else
{
'scale_offset'
:
False
}
#this default value comes from caffe's param in batch_norm
default_eps
=
1e-5
kwargs
=
{
'scale_offset'
:
scale_offset
}
if
node
.
parameters
.
eps
!=
default_eps
:
kwargs
[
'eps'
]
=
node
.
parameters
.
eps
return
MaybeActivated
(
return
MaybeActivated
(
node
,
default
=
False
)(
'batch_normalization'
,
**
kwargs
)
node
,
default
=
False
)(
'batch_normalization'
,
**
kwargs
)
...
@@ -236,7 +242,7 @@ class TensorFlowEmitter(object):
...
@@ -236,7 +242,7 @@ class TensorFlowEmitter(object):
func_def
=
self
.
statement
(
'@classmethod'
)
func_def
=
self
.
statement
(
'@classmethod'
)
func_def
+=
self
.
statement
(
'def convert(cls, npy_model, fluid_path):'
)
func_def
+=
self
.
statement
(
'def convert(cls, npy_model, fluid_path):'
)
self
.
indent
()
self
.
indent
()
func_def
+=
self
.
statement
(
'
import paddle.v2.fluid as fluid
'
)
func_def
+=
self
.
statement
(
'
fluid = import_fluid()
'
)
for
l
in
codes
:
for
l
in
codes
:
func_def
+=
self
.
statement
(
l
)
func_def
+=
self
.
statement
(
l
)
return
'
\n
'
+
func_def
return
'
\n
'
+
func_def
...
...
fluid/image_classification/se_resnext.py
浏览文件 @
f258a876
import
os
import
numpy
as
np
import
time
import
sys
import
paddle.v2
as
paddle
import
paddle.v2
as
paddle
import
paddle.fluid
as
fluid
import
paddle.fluid
as
fluid
import
reader
def
conv_bn_layer
(
input
,
num_filters
,
filter_size
,
stride
=
1
,
groups
=
1
,
def
conv_bn_layer
(
input
,
num_filters
,
filter_size
,
stride
=
1
,
groups
=
1
,
...
@@ -124,164 +119,3 @@ def SE_ResNeXt(input, class_dim, infer=False, layers=50):
...
@@ -124,164 +119,3 @@ def SE_ResNeXt(input, class_dim, infer=False, layers=50):
drop
=
pool
drop
=
pool
out
=
fluid
.
layers
.
fc
(
input
=
drop
,
size
=
class_dim
,
act
=
'softmax'
)
out
=
fluid
.
layers
.
fc
(
input
=
drop
,
size
=
class_dim
,
act
=
'softmax'
)
return
out
return
out
def
train
(
learning_rate
,
batch_size
,
num_passes
,
init_model
=
None
,
model_save_dir
=
'model'
,
parallel
=
True
,
use_nccl
=
True
,
lr_strategy
=
None
,
layers
=
50
):
class_dim
=
1000
image_shape
=
[
3
,
224
,
224
]
image
=
fluid
.
layers
.
data
(
name
=
'image'
,
shape
=
image_shape
,
dtype
=
'float32'
)
label
=
fluid
.
layers
.
data
(
name
=
'label'
,
shape
=
[
1
],
dtype
=
'int64'
)
if
parallel
:
places
=
fluid
.
layers
.
get_places
()
pd
=
fluid
.
layers
.
ParallelDo
(
places
,
use_nccl
=
use_nccl
)
with
pd
.
do
():
image_
=
pd
.
read_input
(
image
)
label_
=
pd
.
read_input
(
label
)
out
=
SE_ResNeXt
(
input
=
image_
,
class_dim
=
class_dim
,
layers
=
layers
)
cost
=
fluid
.
layers
.
cross_entropy
(
input
=
out
,
label
=
label_
)
avg_cost
=
fluid
.
layers
.
mean
(
x
=
cost
)
acc_top1
=
fluid
.
layers
.
accuracy
(
input
=
out
,
label
=
label_
,
k
=
1
)
acc_top5
=
fluid
.
layers
.
accuracy
(
input
=
out
,
label
=
label_
,
k
=
5
)
pd
.
write_output
(
avg_cost
)
pd
.
write_output
(
acc_top1
)
pd
.
write_output
(
acc_top5
)
avg_cost
,
acc_top1
,
acc_top5
=
pd
()
avg_cost
=
fluid
.
layers
.
mean
(
x
=
avg_cost
)
acc_top1
=
fluid
.
layers
.
mean
(
x
=
acc_top1
)
acc_top5
=
fluid
.
layers
.
mean
(
x
=
acc_top5
)
else
:
out
=
SE_ResNeXt
(
input
=
image
,
class_dim
=
class_dim
,
layers
=
layers
)
cost
=
fluid
.
layers
.
cross_entropy
(
input
=
out
,
label
=
label
)
avg_cost
=
fluid
.
layers
.
mean
(
x
=
cost
)
acc_top1
=
fluid
.
layers
.
accuracy
(
input
=
out
,
label
=
label
,
k
=
1
)
acc_top5
=
fluid
.
layers
.
accuracy
(
input
=
out
,
label
=
label
,
k
=
5
)
if
lr_strategy
is
None
:
optimizer
=
fluid
.
optimizer
.
Momentum
(
learning_rate
=
learning_rate
,
momentum
=
0.9
,
regularization
=
fluid
.
regularizer
.
L2Decay
(
1e-4
))
else
:
bd
=
lr_strategy
[
"bd"
]
lr
=
lr_strategy
[
"lr"
]
optimizer
=
fluid
.
optimizer
.
Momentum
(
learning_rate
=
fluid
.
layers
.
piecewise_decay
(
boundaries
=
bd
,
values
=
lr
),
momentum
=
0.9
,
regularization
=
fluid
.
regularizer
.
L2Decay
(
1e-4
))
opts
=
optimizer
.
minimize
(
avg_cost
)
fluid
.
memory_optimize
(
fluid
.
default_main_program
())
inference_program
=
fluid
.
default_main_program
().
clone
()
with
fluid
.
program_guard
(
inference_program
):
inference_program
=
fluid
.
io
.
get_inference_program
(
[
avg_cost
,
acc_top1
,
acc_top5
])
place
=
fluid
.
CUDAPlace
(
0
)
exe
=
fluid
.
Executor
(
place
)
exe
.
run
(
fluid
.
default_startup_program
())
if
init_model
is
not
None
:
fluid
.
io
.
load_persistables
(
exe
,
init_model
)
train_reader
=
paddle
.
batch
(
reader
.
train
(),
batch_size
=
batch_size
)
test_reader
=
paddle
.
batch
(
reader
.
test
(),
batch_size
=
batch_size
)
feeder
=
fluid
.
DataFeeder
(
place
=
place
,
feed_list
=
[
image
,
label
])
for
pass_id
in
range
(
num_passes
):
train_info
=
[[],
[],
[]]
test_info
=
[[],
[],
[]]
for
batch_id
,
data
in
enumerate
(
train_reader
()):
t1
=
time
.
time
()
loss
,
acc1
,
acc5
=
exe
.
run
(
fluid
.
default_main_program
(),
feed
=
feeder
.
feed
(
data
),
fetch_list
=
[
avg_cost
,
acc_top1
,
acc_top5
])
t2
=
time
.
time
()
period
=
t2
-
t1
train_info
[
0
].
append
(
loss
[
0
])
train_info
[
1
].
append
(
acc1
[
0
])
train_info
[
2
].
append
(
acc5
[
0
])
if
batch_id
%
10
==
0
:
print
(
"Pass {0}, trainbatch {1}, loss {2},
\
acc1 {3}, acc5 {4} time {5}"
.
format
(
pass_id
,
\
batch_id
,
loss
[
0
],
acc1
[
0
],
acc5
[
0
],
\
"%2.2f sec"
%
period
))
sys
.
stdout
.
flush
()
train_loss
=
np
.
array
(
train_info
[
0
]).
mean
()
train_acc1
=
np
.
array
(
train_info
[
1
]).
mean
()
train_acc5
=
np
.
array
(
train_info
[
2
]).
mean
()
for
data
in
test_reader
():
t1
=
time
.
time
()
loss
,
acc1
,
acc5
=
exe
.
run
(
inference_program
,
feed
=
feeder
.
feed
(
data
),
fetch_list
=
[
avg_cost
,
acc_top1
,
acc_top5
])
t2
=
time
.
time
()
period
=
t2
-
t1
test_info
[
0
].
append
(
loss
[
0
])
test_info
[
1
].
append
(
acc1
[
0
])
test_info
[
2
].
append
(
acc5
[
0
])
if
batch_id
%
10
==
0
:
print
(
"Pass {0},testbatch {1},loss {2},
\
acc1 {3},acc5 {4},time {5}"
.
format
(
pass_id
,
\
batch_id
,
loss
[
0
],
acc1
[
0
],
acc5
[
0
],
\
"%2.2f sec"
%
period
))
sys
.
stdout
.
flush
()
test_loss
=
np
.
array
(
test_info
[
0
]).
mean
()
test_acc1
=
np
.
array
(
test_info
[
1
]).
mean
()
test_acc5
=
np
.
array
(
test_info
[
2
]).
mean
()
print
(
"End pass {0}, train_loss {1}, train_acc1 {2}, train_acc5 {3},
\
test_loss {4}, test_acc1 {5}, test_acc5 {6}"
.
format
(
pass_id
,
\
train_loss
,
train_acc1
,
train_acc5
,
test_loss
,
test_acc1
,
\
test_acc5
))
sys
.
stdout
.
flush
()
model_path
=
os
.
path
.
join
(
model_save_dir
,
str
(
pass_id
))
if
not
os
.
path
.
isdir
(
model_path
):
os
.
makedirs
(
model_path
)
fluid
.
io
.
save_persistables
(
exe
,
model_path
)
if
__name__
==
'__main__'
:
epoch_points
=
[
30
,
60
,
90
]
total_images
=
1281167
batch_size
=
256
step
=
int
(
total_images
/
batch_size
+
1
)
bd
=
[
e
*
step
for
e
in
epoch_points
]
lr
=
[
0.1
,
0.01
,
0.001
,
0.0001
]
lr_strategy
=
{
"bd"
:
bd
,
"lr"
:
lr
}
use_nccl
=
True
# layers: 50, 152
layers
=
50
train
(
learning_rate
=
0.1
,
batch_size
=
batch_size
,
num_passes
=
120
,
init_model
=
None
,
parallel
=
True
,
use_nccl
=
True
,
lr_strategy
=
lr_strategy
,
layers
=
layers
)
fluid/image_classification/train.py
0 → 100644
浏览文件 @
f258a876
import
os
import
numpy
as
np
import
time
import
sys
import
paddle.v2
as
paddle
import
paddle.fluid
as
fluid
from
se_resnext
import
SE_ResNeXt
import
reader
import
argparse
import
functools
from
utility
import
add_arguments
,
print_arguments
parser
=
argparse
.
ArgumentParser
(
description
=
__doc__
)
add_arg
=
functools
.
partial
(
add_arguments
,
argparser
=
parser
)
# yapf: disable
add_arg
(
'batch_size'
,
int
,
256
,
"Minibatch size."
)
add_arg
(
'num_layers'
,
int
,
50
,
"How many layers for SE-ResNeXt model."
)
add_arg
(
'with_mem_opt'
,
bool
,
True
,
"Whether to use memory optimization or not."
)
add_arg
(
'parallel_exe'
,
bool
,
True
,
"Whether to use ParallelExecutor to train or not."
)
def
train_paralle_do
(
args
,
learning_rate
,
batch_size
,
num_passes
,
init_model
=
None
,
model_save_dir
=
'model'
,
parallel
=
True
,
use_nccl
=
True
,
lr_strategy
=
None
,
layers
=
50
):
class_dim
=
1000
image_shape
=
[
3
,
224
,
224
]
image
=
fluid
.
layers
.
data
(
name
=
'image'
,
shape
=
image_shape
,
dtype
=
'float32'
)
label
=
fluid
.
layers
.
data
(
name
=
'label'
,
shape
=
[
1
],
dtype
=
'int64'
)
if
parallel
:
places
=
fluid
.
layers
.
get_places
()
pd
=
fluid
.
layers
.
ParallelDo
(
places
,
use_nccl
=
use_nccl
)
with
pd
.
do
():
image_
=
pd
.
read_input
(
image
)
label_
=
pd
.
read_input
(
label
)
out
=
SE_ResNeXt
(
input
=
image_
,
class_dim
=
class_dim
,
layers
=
layers
)
cost
=
fluid
.
layers
.
cross_entropy
(
input
=
out
,
label
=
label_
)
avg_cost
=
fluid
.
layers
.
mean
(
x
=
cost
)
acc_top1
=
fluid
.
layers
.
accuracy
(
input
=
out
,
label
=
label_
,
k
=
1
)
acc_top5
=
fluid
.
layers
.
accuracy
(
input
=
out
,
label
=
label_
,
k
=
5
)
pd
.
write_output
(
avg_cost
)
pd
.
write_output
(
acc_top1
)
pd
.
write_output
(
acc_top5
)
avg_cost
,
acc_top1
,
acc_top5
=
pd
()
avg_cost
=
fluid
.
layers
.
mean
(
x
=
avg_cost
)
acc_top1
=
fluid
.
layers
.
mean
(
x
=
acc_top1
)
acc_top5
=
fluid
.
layers
.
mean
(
x
=
acc_top5
)
else
:
out
=
SE_ResNeXt
(
input
=
image
,
class_dim
=
class_dim
,
layers
=
layers
)
cost
=
fluid
.
layers
.
cross_entropy
(
input
=
out
,
label
=
label
)
avg_cost
=
fluid
.
layers
.
mean
(
x
=
cost
)
acc_top1
=
fluid
.
layers
.
accuracy
(
input
=
out
,
label
=
label
,
k
=
1
)
acc_top5
=
fluid
.
layers
.
accuracy
(
input
=
out
,
label
=
label
,
k
=
5
)
if
lr_strategy
is
None
:
optimizer
=
fluid
.
optimizer
.
Momentum
(
learning_rate
=
learning_rate
,
momentum
=
0.9
,
regularization
=
fluid
.
regularizer
.
L2Decay
(
1e-4
))
else
:
bd
=
lr_strategy
[
"bd"
]
lr
=
lr_strategy
[
"lr"
]
optimizer
=
fluid
.
optimizer
.
Momentum
(
learning_rate
=
fluid
.
layers
.
piecewise_decay
(
boundaries
=
bd
,
values
=
lr
),
momentum
=
0.9
,
regularization
=
fluid
.
regularizer
.
L2Decay
(
1e-4
))
inference_program
=
fluid
.
default_main_program
().
clone
(
for_test
=
True
)
opts
=
optimizer
.
minimize
(
avg_cost
)
if
args
.
with_mem_opt
:
fluid
.
memory_optimize
(
fluid
.
default_main_program
())
fluid
.
memory_optimize
(
inference_program
)
place
=
fluid
.
CUDAPlace
(
0
)
exe
=
fluid
.
Executor
(
place
)
exe
.
run
(
fluid
.
default_startup_program
())
if
init_model
is
not
None
:
fluid
.
io
.
load_persistables
(
exe
,
init_model
)
train_reader
=
paddle
.
batch
(
reader
.
train
(),
batch_size
=
batch_size
)
test_reader
=
paddle
.
batch
(
reader
.
test
(),
batch_size
=
batch_size
)
feeder
=
fluid
.
DataFeeder
(
place
=
place
,
feed_list
=
[
image
,
label
])
for
pass_id
in
range
(
num_passes
):
train_info
=
[[],
[],
[]]
test_info
=
[[],
[],
[]]
for
batch_id
,
data
in
enumerate
(
train_reader
()):
t1
=
time
.
time
()
loss
,
acc1
,
acc5
=
exe
.
run
(
fluid
.
default_main_program
(),
feed
=
feeder
.
feed
(
data
),
fetch_list
=
[
avg_cost
,
acc_top1
,
acc_top5
])
t2
=
time
.
time
()
period
=
t2
-
t1
train_info
[
0
].
append
(
loss
[
0
])
train_info
[
1
].
append
(
acc1
[
0
])
train_info
[
2
].
append
(
acc5
[
0
])
if
batch_id
%
10
==
0
:
print
(
"Pass {0}, trainbatch {1}, loss {2},
\
acc1 {3}, acc5 {4} time {5}"
.
format
(
pass_id
,
\
batch_id
,
loss
[
0
],
acc1
[
0
],
acc5
[
0
],
\
"%2.2f sec"
%
period
))
sys
.
stdout
.
flush
()
train_loss
=
np
.
array
(
train_info
[
0
]).
mean
()
train_acc1
=
np
.
array
(
train_info
[
1
]).
mean
()
train_acc5
=
np
.
array
(
train_info
[
2
]).
mean
()
for
data
in
test_reader
():
t1
=
time
.
time
()
loss
,
acc1
,
acc5
=
exe
.
run
(
inference_program
,
feed
=
feeder
.
feed
(
data
),
fetch_list
=
[
avg_cost
,
acc_top1
,
acc_top5
])
t2
=
time
.
time
()
period
=
t2
-
t1
test_info
[
0
].
append
(
loss
[
0
])
test_info
[
1
].
append
(
acc1
[
0
])
test_info
[
2
].
append
(
acc5
[
0
])
if
batch_id
%
10
==
0
:
print
(
"Pass {0},testbatch {1},loss {2},
\
acc1 {3},acc5 {4},time {5}"
.
format
(
pass_id
,
\
batch_id
,
loss
[
0
],
acc1
[
0
],
acc5
[
0
],
\
"%2.2f sec"
%
period
))
sys
.
stdout
.
flush
()
test_loss
=
np
.
array
(
test_info
[
0
]).
mean
()
test_acc1
=
np
.
array
(
test_info
[
1
]).
mean
()
test_acc5
=
np
.
array
(
test_info
[
2
]).
mean
()
print
(
"End pass {0}, train_loss {1}, train_acc1 {2}, train_acc5 {3},
\
test_loss {4}, test_acc1 {5}, test_acc5 {6}"
.
format
(
pass_id
,
\
train_loss
,
train_acc1
,
train_acc5
,
test_loss
,
test_acc1
,
\
test_acc5
))
sys
.
stdout
.
flush
()
model_path
=
os
.
path
.
join
(
model_save_dir
,
str
(
pass_id
))
if
not
os
.
path
.
isdir
(
model_path
):
os
.
makedirs
(
model_path
)
fluid
.
io
.
save_persistables
(
exe
,
model_path
)
def
train_parallel_exe
(
args
,
learning_rate
,
batch_size
,
num_passes
,
init_model
=
None
,
model_save_dir
=
'model'
,
parallel
=
True
,
use_nccl
=
True
,
lr_strategy
=
None
,
layers
=
50
):
class_dim
=
1000
image_shape
=
[
3
,
224
,
224
]
image
=
fluid
.
layers
.
data
(
name
=
'image'
,
shape
=
image_shape
,
dtype
=
'float32'
)
label
=
fluid
.
layers
.
data
(
name
=
'label'
,
shape
=
[
1
],
dtype
=
'int64'
)
out
=
SE_ResNeXt
(
input
=
image
,
class_dim
=
class_dim
,
layers
=
layers
)
cost
=
fluid
.
layers
.
cross_entropy
(
input
=
out
,
label
=
label
)
acc_top1
=
fluid
.
layers
.
accuracy
(
input
=
out
,
label
=
label
,
k
=
1
)
acc_top5
=
fluid
.
layers
.
accuracy
(
input
=
out
,
label
=
label
,
k
=
5
)
avg_cost
=
fluid
.
layers
.
mean
(
x
=
cost
)
test_program
=
fluid
.
default_main_program
().
clone
(
for_test
=
True
)
if
lr_strategy
is
None
:
optimizer
=
fluid
.
optimizer
.
Momentum
(
learning_rate
=
learning_rate
,
momentum
=
0.9
,
regularization
=
fluid
.
regularizer
.
L2Decay
(
1e-4
))
else
:
bd
=
lr_strategy
[
"bd"
]
lr
=
lr_strategy
[
"lr"
]
optimizer
=
fluid
.
optimizer
.
Momentum
(
learning_rate
=
fluid
.
layers
.
piecewise_decay
(
boundaries
=
bd
,
values
=
lr
),
momentum
=
0.9
,
regularization
=
fluid
.
regularizer
.
L2Decay
(
1e-4
))
opts
=
optimizer
.
minimize
(
avg_cost
)
if
args
.
with_mem_opt
:
fluid
.
memory_optimize
(
fluid
.
default_main_program
())
fluid
.
memory_optimize
(
test_program
)
place
=
fluid
.
CUDAPlace
(
0
)
exe
=
fluid
.
Executor
(
place
)
exe
.
run
(
fluid
.
default_startup_program
())
if
init_model
is
not
None
:
fluid
.
io
.
load_persistables
(
exe
,
init_model
)
train_reader
=
paddle
.
batch
(
reader
.
train
(),
batch_size
=
batch_size
)
test_reader
=
paddle
.
batch
(
reader
.
test
(),
batch_size
=
batch_size
)
feeder
=
fluid
.
DataFeeder
(
place
=
place
,
feed_list
=
[
image
,
label
])
train_exe
=
fluid
.
ParallelExecutor
(
use_cuda
=
True
,
loss_name
=
avg_cost
.
name
)
test_exe
=
fluid
.
ParallelExecutor
(
use_cuda
=
True
,
main_program
=
test_program
,
share_vars_from
=
train_exe
)
fetch_list
=
[
avg_cost
.
name
,
acc_top1
.
name
,
acc_top5
.
name
]
for
pass_id
in
range
(
num_passes
):
train_info
=
[[],
[],
[]]
test_info
=
[[],
[],
[]]
for
batch_id
,
data
in
enumerate
(
train_reader
()):
t1
=
time
.
time
()
loss
,
acc1
,
acc5
=
train_exe
.
run
(
fetch_list
,
feed_dict
=
feeder
.
feed
(
data
))
t2
=
time
.
time
()
period
=
t2
-
t1
loss
=
np
.
mean
(
np
.
array
(
loss
))
acc1
=
np
.
mean
(
np
.
array
(
acc1
))
acc5
=
np
.
mean
(
np
.
array
(
acc5
))
train_info
[
0
].
append
(
loss
)
train_info
[
1
].
append
(
acc1
)
train_info
[
2
].
append
(
acc5
)
if
batch_id
%
10
==
0
:
print
(
"Pass {0}, trainbatch {1}, loss {2},
\
acc1 {3}, acc5 {4} time {5}"
.
format
(
pass_id
,
\
batch_id
,
loss
,
acc1
,
acc5
,
\
"%2.2f sec"
%
period
))
sys
.
stdout
.
flush
()
train_loss
=
np
.
array
(
train_info
[
0
]).
mean
()
train_acc1
=
np
.
array
(
train_info
[
1
]).
mean
()
train_acc5
=
np
.
array
(
train_info
[
2
]).
mean
()
for
data
in
test_reader
():
t1
=
time
.
time
()
loss
,
acc1
,
acc5
=
test_exe
.
run
(
fetch_list
,
feed_dict
=
feeder
.
feed
(
data
))
t2
=
time
.
time
()
period
=
t2
-
t1
loss
=
np
.
mean
(
np
.
array
(
loss
))
acc1
=
np
.
mean
(
np
.
array
(
acc1
))
acc5
=
np
.
mean
(
np
.
array
(
acc5
))
test_info
[
0
].
append
(
loss
)
test_info
[
1
].
append
(
acc1
)
test_info
[
2
].
append
(
acc5
)
if
batch_id
%
10
==
0
:
print
(
"Pass {0},testbatch {1},loss {2},
\
acc1 {3},acc5 {4},time {5}"
.
format
(
pass_id
,
\
batch_id
,
loss
,
acc1
,
acc5
,
\
"%2.2f sec"
%
period
))
sys
.
stdout
.
flush
()
test_loss
=
np
.
array
(
test_info
[
0
]).
mean
()
test_acc1
=
np
.
array
(
test_info
[
1
]).
mean
()
test_acc5
=
np
.
array
(
test_info
[
2
]).
mean
()
print
(
"End pass {0}, train_loss {1}, train_acc1 {2}, train_acc5 {3},
\
test_loss {4}, test_acc1 {5}, test_acc5 {6}"
.
format
(
pass_id
,
\
train_loss
,
train_acc1
,
train_acc5
,
test_loss
,
test_acc1
,
\
test_acc5
))
sys
.
stdout
.
flush
()
model_path
=
os
.
path
.
join
(
model_save_dir
,
str
(
pass_id
))
if
not
os
.
path
.
isdir
(
model_path
):
os
.
makedirs
(
model_path
)
fluid
.
io
.
save_persistables
(
exe
,
model_path
)
if
__name__
==
'__main__'
:
args
=
parser
.
parse_args
()
print_arguments
(
args
)
epoch_points
=
[
30
,
60
,
90
]
total_images
=
1281167
batch_size
=
args
.
batch_size
step
=
int
(
total_images
/
batch_size
+
1
)
bd
=
[
e
*
step
for
e
in
epoch_points
]
lr
=
[
0.1
,
0.01
,
0.001
,
0.0001
]
lr_strategy
=
{
"bd"
:
bd
,
"lr"
:
lr
}
use_nccl
=
True
# layers: 50, 152
layers
=
args
.
num_layers
method
=
train_parallel_exe
if
args
.
parallel_exe
else
train_parallel_do
method
(
args
,
learning_rate
=
0.1
,
batch_size
=
batch_size
,
num_passes
=
120
,
init_model
=
None
,
parallel
=
True
,
use_nccl
=
True
,
lr_strategy
=
lr_strategy
,
layers
=
layers
)
fluid/image_classification/utility.py
0 → 100644
浏览文件 @
f258a876
"""Contains common utility functions."""
# Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserve.
#
#Licensed under the Apache License, Version 2.0 (the "License");
#you may not use this file except in compliance with the License.
#You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
#Unless required by applicable law or agreed to in writing, software
#distributed under the License is distributed on an "AS IS" BASIS,
#WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
#See the License for the specific language governing permissions and
#limitations under the License.
from
__future__
import
absolute_import
from
__future__
import
division
from
__future__
import
print_function
import
distutils.util
import
numpy
as
np
from
paddle.fluid
import
core
def
print_arguments
(
args
):
"""Print argparse's arguments.
Usage:
.. code-block:: python
parser = argparse.ArgumentParser()
parser.add_argument("name", default="Jonh", type=str, help="User name.")
args = parser.parse_args()
print_arguments(args)
:param args: Input argparse.Namespace for printing.
:type args: argparse.Namespace
"""
print
(
"----------- Configuration Arguments -----------"
)
for
arg
,
value
in
sorted
(
vars
(
args
).
iteritems
()):
print
(
"%s: %s"
%
(
arg
,
value
))
print
(
"------------------------------------------------"
)
def
add_arguments
(
argname
,
type
,
default
,
help
,
argparser
,
**
kwargs
):
"""Add argparse's argument.
Usage:
.. code-block:: python
parser = argparse.ArgumentParser()
add_argument("name", str, "Jonh", "User name.", parser)
args = parser.parse_args()
"""
type
=
distutils
.
util
.
strtobool
if
type
==
bool
else
type
argparser
.
add_argument
(
"--"
+
argname
,
default
=
default
,
type
=
type
,
help
=
help
+
' Default: %(default)s.'
,
**
kwargs
)
fluid/neural_machine_translation/transformer/config.py
浏览文件 @
f258a876
...
@@ -15,6 +15,9 @@ class TrainTaskConfig(object):
...
@@ -15,6 +15,9 @@ class TrainTaskConfig(object):
# the parameters for learning rate scheduling.
# the parameters for learning rate scheduling.
warmup_steps
=
4000
warmup_steps
=
4000
# the flag indicating to use average loss or sum loss when training.
use_avg_cost
=
False
# the directory for saving trained models.
# the directory for saving trained models.
model_dir
=
"trained_models"
model_dir
=
"trained_models"
...
@@ -22,8 +25,7 @@ class TrainTaskConfig(object):
...
@@ -22,8 +25,7 @@ class TrainTaskConfig(object):
class
InferTaskConfig
(
object
):
class
InferTaskConfig
(
object
):
use_gpu
=
False
use_gpu
=
False
# the number of examples in one run for sequence generation.
# the number of examples in one run for sequence generation.
# currently the batch size can only be set to 1.
batch_size
=
10
batch_size
=
1
# the parameters for beam search.
# the parameters for beam search.
beam_size
=
5
beam_size
=
5
...
@@ -31,37 +33,38 @@ class InferTaskConfig(object):
...
@@ -31,37 +33,38 @@ class InferTaskConfig(object):
# the number of decoded sentences to output.
# the number of decoded sentences to output.
n_best
=
1
n_best
=
1
# the flags indicating whether to output the special tokens.
output_bos
=
False
output_eos
=
False
output_unk
=
False
# the directory for loading the trained model.
# the directory for loading the trained model.
model_path
=
"trained_models/pass_1.infer.model"
model_path
=
"trained_models/pass_1.infer.model"
class
ModelHyperParams
(
object
):
class
ModelHyperParams
(
object
):
# Dictionary size for source and target language. This model directly uses
# This model directly uses paddle.dataset.wmt16 in which <bos>, <eos> and
# paddle.dataset.wmt16 in which <bos>, <eos> and <unk> token has
# <unk> token has alreay been added. As for the <pad> token, any token
# alreay been added, but the <pad> token is not added. Transformer requires
# included in dict can be used to pad, since the paddings' loss will be
# sequences in a mini-batch are padded to have the same length. A <pad> token is
# masked out and make no effect on parameter gradients.
# added into the original dictionary in paddle.dateset.wmt16.
# size of source word dictionary.
# size of source word dictionary.
src_vocab_size
=
10000
src_vocab_size
=
10000
# index for <pad> token in source language.
src_pad_idx
=
src_vocab_size
# size of target word dictionay
# size of target word dictionay
trg_vocab_size
=
10000
trg_vocab_size
=
10000
# index for <pad> token in target language.
trg_pad_idx
=
trg_vocab_size
# index for <bos> token
# index for <bos> token
bos_idx
=
0
bos_idx
=
0
# index for <eos> token
# index for <eos> token
eos_idx
=
1
eos_idx
=
1
# index for <unk> token
unk_idx
=
2
# position value corresponding to the <pad> token.
# max length of sequences.
pos_pad_idx
=
0
# The size of position encoding table should at least plus 1, since the
# sinusoid position encoding starts from 1 and 0 can be used as the padding
# max length of sequences. It should plus 1 to include position
# token for position encoding.
# padding token for position encoding.
max_length
=
50
max_length
=
50
# the dimension for word embeddings, which is also the last dimension of
# the dimension for word embeddings, which is also the last dimension of
...
@@ -93,6 +96,7 @@ encoder_input_data_names = (
...
@@ -93,6 +96,7 @@ encoder_input_data_names = (
"src_word"
,
"src_word"
,
"src_pos"
,
"src_pos"
,
"src_slf_attn_bias"
,
"src_slf_attn_bias"
,
"src_data_shape"
,
"src_slf_attn_pre_softmax_shape"
,
"src_slf_attn_pre_softmax_shape"
,
"src_slf_attn_post_softmax_shape"
,
)
"src_slf_attn_post_softmax_shape"
,
)
...
@@ -102,6 +106,7 @@ decoder_input_data_names = (
...
@@ -102,6 +106,7 @@ decoder_input_data_names = (
"trg_pos"
,
"trg_pos"
,
"trg_slf_attn_bias"
,
"trg_slf_attn_bias"
,
"trg_src_attn_bias"
,
"trg_src_attn_bias"
,
"trg_data_shape"
,
"trg_slf_attn_pre_softmax_shape"
,
"trg_slf_attn_pre_softmax_shape"
,
"trg_slf_attn_post_softmax_shape"
,
"trg_slf_attn_post_softmax_shape"
,
"trg_src_attn_pre_softmax_shape"
,
"trg_src_attn_pre_softmax_shape"
,
...
...
fluid/neural_machine_translation/transformer/infer.py
浏览文件 @
f258a876
...
@@ -11,10 +11,26 @@ from config import InferTaskConfig, ModelHyperParams, \
...
@@ -11,10 +11,26 @@ from config import InferTaskConfig, ModelHyperParams, \
from
train
import
pad_batch_data
from
train
import
pad_batch_data
def
translate_batch
(
exe
,
src_words
,
encoder
,
enc_in_names
,
enc_out_names
,
def
translate_batch
(
exe
,
decoder
,
dec_in_names
,
dec_out_names
,
beam_size
,
max_length
,
src_words
,
n_best
,
batch_size
,
n_head
,
src_pad_idx
,
trg_pad_idx
,
encoder
,
bos_idx
,
eos_idx
):
enc_in_names
,
enc_out_names
,
decoder
,
dec_in_names
,
dec_out_names
,
beam_size
,
max_length
,
n_best
,
batch_size
,
n_head
,
d_model
,
src_pad_idx
,
trg_pad_idx
,
bos_idx
,
eos_idx
,
unk_idx
,
output_unk
=
True
):
"""
"""
Run the encoder program once and run the decoder program multiple times to
Run the encoder program once and run the decoder program multiple times to
implement beam search externally.
implement beam search externally.
...
@@ -25,9 +41,14 @@ def translate_batch(exe, src_words, encoder, enc_in_names, enc_out_names,
...
@@ -25,9 +41,14 @@ def translate_batch(exe, src_words, encoder, enc_in_names, enc_out_names,
src_pad_idx
,
src_pad_idx
,
n_head
,
n_head
,
is_target
=
False
,
is_target
=
False
,
return_pos
=
Tru
e
,
is_label
=
Fals
e
,
return_attn_bias
=
True
,
return_attn_bias
=
True
,
return_max_len
=
False
)
return_max_len
=
False
)
# Append the data shape input to reshape the output of embedding layer.
enc_in_data
=
enc_in_data
+
[
np
.
array
(
[
-
1
,
enc_in_data
[
2
].
shape
[
-
1
],
d_model
],
dtype
=
"int32"
)
]
# Append the shape inputs to reshape before and after softmax in encoder
# Append the shape inputs to reshape before and after softmax in encoder
# self attention.
# self attention.
enc_in_data
=
enc_in_data
+
[
enc_in_data
=
enc_in_data
+
[
...
@@ -44,11 +65,16 @@ def translate_batch(exe, src_words, encoder, enc_in_names, enc_out_names,
...
@@ -44,11 +65,16 @@ def translate_batch(exe, src_words, encoder, enc_in_names, enc_out_names,
scores
=
np
.
zeros
((
batch_size
,
beam_size
),
dtype
=
"float32"
)
scores
=
np
.
zeros
((
batch_size
,
beam_size
),
dtype
=
"float32"
)
prev_branchs
=
[[]
for
i
in
range
(
batch_size
)]
prev_branchs
=
[[]
for
i
in
range
(
batch_size
)]
next_ids
=
[[]
for
i
in
range
(
batch_size
)]
next_ids
=
[[]
for
i
in
range
(
batch_size
)]
# Use beam_
map to map the instance idx in batch to beam idx
, since the
# Use beam_
inst_map to map beam idx to the instance idx in batch
, since the
# size of feeded batch is changing.
# size of feeded batch is changing.
beam_map
=
range
(
batch_size
)
beam_inst_map
=
{
beam_idx
:
inst_idx
for
inst_idx
,
beam_idx
in
enumerate
(
range
(
batch_size
))
}
# Use active_beams to recode the alive.
active_beams
=
range
(
batch_size
)
def
beam_backtrace
(
prev_branchs
,
next_ids
,
n_best
=
beam_size
,
add_bos
=
True
):
def
beam_backtrace
(
prev_branchs
,
next_ids
,
n_best
=
beam_size
):
"""
"""
Decode and select n_best sequences for one instance by backtrace.
Decode and select n_best sequences for one instance by backtrace.
"""
"""
...
@@ -60,7 +86,8 @@ def translate_batch(exe, src_words, encoder, enc_in_names, enc_out_names,
...
@@ -60,7 +86,8 @@ def translate_batch(exe, src_words, encoder, enc_in_names, enc_out_names,
seq
.
append
(
next_ids
[
j
][
k
])
seq
.
append
(
next_ids
[
j
][
k
])
k
=
prev_branchs
[
j
][
k
]
k
=
prev_branchs
[
j
][
k
]
seq
=
seq
[::
-
1
]
seq
=
seq
[::
-
1
]
seq
=
[
bos_idx
]
+
seq
if
add_bos
else
seq
# Add the <bos>, since next_ids don't include the <bos>.
seq
=
[
bos_idx
]
+
seq
seqs
.
append
(
seq
)
seqs
.
append
(
seq
)
return
seqs
return
seqs
...
@@ -82,8 +109,14 @@ def translate_batch(exe, src_words, encoder, enc_in_names, enc_out_names,
...
@@ -82,8 +109,14 @@ def translate_batch(exe, src_words, encoder, enc_in_names, enc_out_names,
[
-
1e9
]).
astype
(
"float32"
)
[
-
1e9
]).
astype
(
"float32"
)
# This is used to remove attention on the paddings of source sequences.
# This is used to remove attention on the paddings of source sequences.
trg_src_attn_bias
=
np
.
tile
(
trg_src_attn_bias
=
np
.
tile
(
src_slf_attn_bias
[:,
:,
::
src_max_length
,
:],
src_slf_attn_bias
[:,
:,
::
src_max_length
,
:][:,
np
.
newaxis
],
[
beam_size
,
1
,
trg_max_len
,
1
])
[
1
,
beam_size
,
1
,
trg_max_len
,
1
]).
reshape
([
-
1
,
src_slf_attn_bias
.
shape
[
1
],
trg_max_len
,
src_slf_attn_bias
.
shape
[
-
1
]
])
# Append the shape input to reshape the output of embedding layer.
trg_data_shape
=
np
.
array
(
[
batch_size
*
beam_size
,
trg_max_len
,
d_model
],
dtype
=
"int32"
)
# Append the shape inputs to reshape before and after softmax in
# Append the shape inputs to reshape before and after softmax in
# decoder self attention.
# decoder self attention.
trg_slf_attn_pre_softmax_shape
=
np
.
array
(
trg_slf_attn_pre_softmax_shape
=
np
.
array
(
...
@@ -96,26 +129,27 @@ def translate_batch(exe, src_words, encoder, enc_in_names, enc_out_names,
...
@@ -96,26 +129,27 @@ def translate_batch(exe, src_words, encoder, enc_in_names, enc_out_names,
[
-
1
,
trg_src_attn_bias
.
shape
[
-
1
]],
dtype
=
"int32"
)
[
-
1
,
trg_src_attn_bias
.
shape
[
-
1
]],
dtype
=
"int32"
)
trg_src_attn_post_softmax_shape
=
np
.
array
(
trg_src_attn_post_softmax_shape
=
np
.
array
(
trg_src_attn_bias
.
shape
,
dtype
=
"int32"
)
trg_src_attn_bias
.
shape
,
dtype
=
"int32"
)
enc_output
=
np
.
tile
(
enc_output
,
[
beam_size
,
1
,
1
])
enc_output
=
np
.
tile
(
enc_output
[:,
np
.
newaxis
],
[
1
,
beam_size
,
1
,
1
]).
reshape
(
[
-
1
,
enc_output
.
shape
[
-
2
],
enc_output
.
shape
[
-
1
]])
return
trg_words
,
trg_pos
,
trg_slf_attn_bias
,
trg_src_attn_bias
,
\
return
trg_words
,
trg_pos
,
trg_slf_attn_bias
,
trg_src_attn_bias
,
\
trg_
slf_attn_pre_softmax_shape
,
trg_slf_attn_post
_softmax_shape
,
\
trg_
data_shape
,
trg_slf_attn_pre
_softmax_shape
,
\
trg_s
rc_attn_pre_softmax_shape
,
trg_src_attn_post
_softmax_shape
,
\
trg_s
lf_attn_post_softmax_shape
,
trg_src_attn_pre
_softmax_shape
,
\
enc_output
trg_src_attn_post_softmax_shape
,
enc_output
def
update_dec_in_data
(
dec_in_data
,
next_ids
,
active_beams
):
def
update_dec_in_data
(
dec_in_data
,
next_ids
,
active_beams
,
beam_inst_map
):
"""
"""
Update the input data of decoder mainly by slicing from the previous
Update the input data of decoder mainly by slicing from the previous
input data and dropping the finished instance beams.
input data and dropping the finished instance beams.
"""
"""
trg_words
,
trg_pos
,
trg_slf_attn_bias
,
trg_src_attn_bias
,
\
trg_words
,
trg_pos
,
trg_slf_attn_bias
,
trg_src_attn_bias
,
\
trg_
slf_attn_pre_softmax_shape
,
trg_slf_attn_post
_softmax_shape
,
\
trg_
data_shape
,
trg_slf_attn_pre
_softmax_shape
,
\
trg_s
rc_attn_pre_softmax_shape
,
trg_src_attn_post
_softmax_shape
,
\
trg_s
lf_attn_post_softmax_shape
,
trg_src_attn_pre
_softmax_shape
,
\
enc_output
=
dec_in_data
trg_src_attn_post_softmax_shape
,
enc_output
=
dec_in_data
trg_cur_len
=
len
(
next_ids
[
0
])
+
1
# include the <bos>
trg_cur_len
=
trg_slf_attn_bias
.
shape
[
-
1
]
+
1
trg_words
=
np
.
array
(
trg_words
=
np
.
array
(
[
[
beam_backtrace
(
beam_backtrace
(
prev_branchs
[
beam_idx
],
next_ids
[
beam_idx
])
prev_branchs
[
beam_idx
],
next_ids
[
beam_idx
],
add_bos
=
True
)
for
beam_idx
in
active_beams
for
beam_idx
in
active_beams
],
],
dtype
=
"int64"
)
dtype
=
"int64"
)
...
@@ -123,6 +157,7 @@ def translate_batch(exe, src_words, encoder, enc_in_names, enc_out_names,
...
@@ -123,6 +157,7 @@ def translate_batch(exe, src_words, encoder, enc_in_names, enc_out_names,
trg_pos
=
np
.
array
(
trg_pos
=
np
.
array
(
[
range
(
1
,
trg_cur_len
+
1
)]
*
len
(
active_beams
)
*
beam_size
,
[
range
(
1
,
trg_cur_len
+
1
)]
*
len
(
active_beams
)
*
beam_size
,
dtype
=
"int64"
).
reshape
([
-
1
,
1
])
dtype
=
"int64"
).
reshape
([
-
1
,
1
])
active_beams
=
[
beam_inst_map
[
beam_idx
]
for
beam_idx
in
active_beams
]
active_beams_indice
=
(
active_beams_indice
=
(
(
np
.
array
(
active_beams
)
*
beam_size
)[:,
np
.
newaxis
]
+
(
np
.
array
(
active_beams
)
*
beam_size
)[:,
np
.
newaxis
]
+
np
.
array
(
range
(
beam_size
))[
np
.
newaxis
,
:]).
flatten
()
np
.
array
(
range
(
beam_size
))[
np
.
newaxis
,
:]).
flatten
()
...
@@ -137,6 +172,10 @@ def translate_batch(exe, src_words, encoder, enc_in_names, enc_out_names,
...
@@ -137,6 +172,10 @@ def translate_batch(exe, src_words, encoder, enc_in_names, enc_out_names,
trg_src_attn_bias
=
np
.
tile
(
trg_src_attn_bias
[
trg_src_attn_bias
=
np
.
tile
(
trg_src_attn_bias
[
active_beams_indice
,
:,
::
trg_src_attn_bias
.
shape
[
2
],
:],
active_beams_indice
,
:,
::
trg_src_attn_bias
.
shape
[
2
],
:],
[
1
,
1
,
trg_cur_len
,
1
])
[
1
,
1
,
trg_cur_len
,
1
])
# Append the shape input to reshape the output of embedding layer.
trg_data_shape
=
np
.
array
(
[
len
(
active_beams
)
*
beam_size
,
trg_cur_len
,
d_model
],
dtype
=
"int32"
)
# Append the shape inputs to reshape before and after softmax in
# Append the shape inputs to reshape before and after softmax in
# decoder self attention.
# decoder self attention.
trg_slf_attn_pre_softmax_shape
=
np
.
array
(
trg_slf_attn_pre_softmax_shape
=
np
.
array
(
...
@@ -151,9 +190,9 @@ def translate_batch(exe, src_words, encoder, enc_in_names, enc_out_names,
...
@@ -151,9 +190,9 @@ def translate_batch(exe, src_words, encoder, enc_in_names, enc_out_names,
trg_src_attn_bias
.
shape
,
dtype
=
"int32"
)
trg_src_attn_bias
.
shape
,
dtype
=
"int32"
)
enc_output
=
enc_output
[
active_beams_indice
,
:,
:]
enc_output
=
enc_output
[
active_beams_indice
,
:,
:]
return
trg_words
,
trg_pos
,
trg_slf_attn_bias
,
trg_src_attn_bias
,
\
return
trg_words
,
trg_pos
,
trg_slf_attn_bias
,
trg_src_attn_bias
,
\
trg_
slf_attn_pre_softmax_shape
,
trg_slf_attn_post
_softmax_shape
,
\
trg_
data_shape
,
trg_slf_attn_pre
_softmax_shape
,
\
trg_s
rc_attn_pre_softmax_shape
,
trg_src_attn_post
_softmax_shape
,
\
trg_s
lf_attn_post_softmax_shape
,
trg_src_attn_pre
_softmax_shape
,
\
enc_output
trg_src_attn_post_softmax_shape
,
enc_output
dec_in_data
=
init_dec_in_data
(
batch_size
,
beam_size
,
enc_in_data
,
dec_in_data
=
init_dec_in_data
(
batch_size
,
beam_size
,
enc_in_data
,
enc_output
)
enc_output
)
...
@@ -162,13 +201,18 @@ def translate_batch(exe, src_words, encoder, enc_in_names, enc_out_names,
...
@@ -162,13 +201,18 @@ def translate_batch(exe, src_words, encoder, enc_in_names, enc_out_names,
feed
=
dict
(
zip
(
dec_in_names
,
dec_in_data
)),
feed
=
dict
(
zip
(
dec_in_names
,
dec_in_data
)),
fetch_list
=
dec_out_names
)[
0
]
fetch_list
=
dec_out_names
)[
0
]
predict_all
=
np
.
log
(
predict_all
=
np
.
log
(
predict_all
.
reshape
([
len
(
beam_map
)
*
beam_size
,
i
+
1
,
-
1
])[:,
predict_all
.
reshape
([
len
(
beam_inst_map
)
*
beam_size
,
i
+
1
,
-
1
])
-
1
,
:])
[:,
-
1
,
:])
predict_all
=
(
predict_all
+
scores
[
beam_map
].
reshape
(
predict_all
=
(
predict_all
+
scores
[
active_beams
].
reshape
(
[
len
(
beam_map
)
*
beam_size
,
-
1
])).
reshape
(
[
len
(
beam_inst_map
)
*
beam_size
,
-
1
])).
reshape
(
[
len
(
beam_map
),
beam_size
,
-
1
])
[
len
(
beam_inst_map
),
beam_size
,
-
1
])
if
not
output_unk
:
# To exclude the <unk> token.
predict_all
[:,
:,
unk_idx
]
=
-
1e9
active_beams
=
[]
active_beams
=
[]
for
inst_idx
,
beam_idx
in
enumerate
(
beam_map
):
for
beam_idx
in
range
(
batch_size
):
if
not
beam_inst_map
.
has_key
(
beam_idx
):
continue
inst_idx
=
beam_inst_map
[
beam_idx
]
predict
=
(
predict_all
[
inst_idx
,
:,
:]
predict
=
(
predict_all
[
inst_idx
,
:,
:]
if
i
!=
0
else
predict_all
[
inst_idx
,
0
,
:]).
flatten
()
if
i
!=
0
else
predict_all
[
inst_idx
,
0
,
:]).
flatten
()
top_k_indice
=
np
.
argpartition
(
predict
,
-
beam_size
)[
-
beam_size
:]
top_k_indice
=
np
.
argpartition
(
predict
,
-
beam_size
)[
-
beam_size
:]
...
@@ -181,13 +225,20 @@ def translate_batch(exe, src_words, encoder, enc_in_names, enc_out_names,
...
@@ -181,13 +225,20 @@ def translate_batch(exe, src_words, encoder, enc_in_names, enc_out_names,
next_ids
[
beam_idx
].
append
(
top_scores_ids
%
predict_all
.
shape
[
-
1
])
next_ids
[
beam_idx
].
append
(
top_scores_ids
%
predict_all
.
shape
[
-
1
])
if
next_ids
[
beam_idx
][
-
1
][
0
]
!=
eos_idx
:
if
next_ids
[
beam_idx
][
-
1
][
0
]
!=
eos_idx
:
active_beams
.
append
(
beam_idx
)
active_beams
.
append
(
beam_idx
)
beam_map
=
active_beams
if
len
(
active_beams
)
==
0
:
if
len
(
beam_map
)
==
0
:
break
break
dec_in_data
=
update_dec_in_data
(
dec_in_data
,
next_ids
,
active_beams
)
dec_in_data
=
update_dec_in_data
(
dec_in_data
,
next_ids
,
active_beams
,
beam_inst_map
)
beam_inst_map
=
{
beam_idx
:
inst_idx
for
inst_idx
,
beam_idx
in
enumerate
(
active_beams
)
}
# Decode beams and select n_best sequences for each instance by backtrace.
# Decode beams and select n_best sequences for each instance by backtrace.
seqs
=
[
beam_backtrace
(
prev_branchs
[
beam_idx
],
next_ids
[
beam_idx
],
n_best
)]
seqs
=
[
beam_backtrace
(
prev_branchs
[
beam_idx
],
next_ids
[
beam_idx
],
n_best
)
for
beam_idx
in
range
(
batch_size
)
]
return
seqs
,
scores
[:,
:
n_best
].
tolist
()
return
seqs
,
scores
[:,
:
n_best
].
tolist
()
...
@@ -195,29 +246,24 @@ def translate_batch(exe, src_words, encoder, enc_in_names, enc_out_names,
...
@@ -195,29 +246,24 @@ def translate_batch(exe, src_words, encoder, enc_in_names, enc_out_names,
def
main
():
def
main
():
place
=
fluid
.
CUDAPlace
(
0
)
if
InferTaskConfig
.
use_gpu
else
fluid
.
CPUPlace
()
place
=
fluid
.
CUDAPlace
(
0
)
if
InferTaskConfig
.
use_gpu
else
fluid
.
CPUPlace
()
exe
=
fluid
.
Executor
(
place
)
exe
=
fluid
.
Executor
(
place
)
# The current program desc is coupled with batch_size and the only
# supported batch size is 1 currently.
encoder_program
=
fluid
.
Program
()
encoder_program
=
fluid
.
Program
()
model
.
batch_size
=
InferTaskConfig
.
batch_size
with
fluid
.
program_guard
(
main_program
=
encoder_program
):
with
fluid
.
program_guard
(
main_program
=
encoder_program
):
enc_output
=
encoder
(
enc_output
=
encoder
(
ModelHyperParams
.
src_vocab_size
+
1
,
ModelHyperParams
.
src_vocab_size
,
ModelHyperParams
.
max_length
+
1
,
ModelHyperParams
.
max_length
+
1
,
ModelHyperParams
.
n_layer
,
ModelHyperParams
.
n_layer
,
ModelHyperParams
.
n_head
,
ModelHyperParams
.
n_head
,
ModelHyperParams
.
d_key
,
ModelHyperParams
.
d_key
,
ModelHyperParams
.
d_value
,
ModelHyperParams
.
d_value
,
ModelHyperParams
.
d_model
,
ModelHyperParams
.
d_model
,
ModelHyperParams
.
d_inner_hid
,
ModelHyperParams
.
d_inner_hid
,
ModelHyperParams
.
dropout
,
ModelHyperParams
.
dropout
)
ModelHyperParams
.
src_pad_idx
,
ModelHyperParams
.
pos_pad_idx
)
model
.
batch_size
=
InferTaskConfig
.
batch_size
*
InferTaskConfig
.
beam_size
decoder_program
=
fluid
.
Program
()
decoder_program
=
fluid
.
Program
()
with
fluid
.
program_guard
(
main_program
=
decoder_program
):
with
fluid
.
program_guard
(
main_program
=
decoder_program
):
predict
=
decoder
(
predict
=
decoder
(
ModelHyperParams
.
trg_vocab_size
+
1
,
ModelHyperParams
.
trg_vocab_size
,
ModelHyperParams
.
max_length
+
1
,
ModelHyperParams
.
max_length
+
1
,
ModelHyperParams
.
n_layer
,
ModelHyperParams
.
n_layer
,
ModelHyperParams
.
n_head
,
ModelHyperParams
.
n_head
,
ModelHyperParams
.
d_key
,
ModelHyperParams
.
d_key
,
ModelHyperParams
.
d_value
,
ModelHyperParams
.
d_value
,
ModelHyperParams
.
d_model
,
ModelHyperParams
.
d_model
,
ModelHyperParams
.
d_inner_hid
,
ModelHyperParams
.
d_inner_hid
,
ModelHyperParams
.
dropout
,
ModelHyperParams
.
dropout
)
ModelHyperParams
.
trg_pad_idx
,
ModelHyperParams
.
pos_pad_idx
)
# Load model parameters of encoder and decoder separately from the saved
# Load model parameters of encoder and decoder separately from the saved
# transformer model.
# transformer model.
...
@@ -254,17 +300,51 @@ def main():
...
@@ -254,17 +300,51 @@ def main():
trg_idx2word
=
paddle
.
dataset
.
wmt16
.
get_dict
(
trg_idx2word
=
paddle
.
dataset
.
wmt16
.
get_dict
(
"de"
,
dict_size
=
ModelHyperParams
.
trg_vocab_size
,
reverse
=
True
)
"de"
,
dict_size
=
ModelHyperParams
.
trg_vocab_size
,
reverse
=
True
)
def
post_process_seq
(
seq
,
bos_idx
=
ModelHyperParams
.
bos_idx
,
eos_idx
=
ModelHyperParams
.
eos_idx
,
output_bos
=
InferTaskConfig
.
output_bos
,
output_eos
=
InferTaskConfig
.
output_eos
):
"""
Post-process the beam-search decoded sequence. Truncate from the first
<eos> and remove the <bos> and <eos> tokens currently.
"""
eos_pos
=
len
(
seq
)
-
1
for
i
,
idx
in
enumerate
(
seq
):
if
idx
==
eos_idx
:
eos_pos
=
i
break
seq
=
seq
[:
eos_pos
+
1
]
return
filter
(
lambda
idx
:
(
output_bos
or
idx
!=
bos_idx
)
and
\
(
output_eos
or
idx
!=
eos_idx
),
seq
)
for
batch_id
,
data
in
enumerate
(
test_data
()):
for
batch_id
,
data
in
enumerate
(
test_data
()):
batch_seqs
,
batch_scores
=
translate_batch
(
batch_seqs
,
batch_scores
=
translate_batch
(
exe
,
[
item
[
0
]
for
item
in
data
],
encoder_program
,
exe
,
encoder_input_data_names
,
[
enc_output
.
name
],
decoder_program
,
[
item
[
0
]
for
item
in
data
],
decoder_input_data_names
,
[
predict
.
name
],
InferTaskConfig
.
beam_size
,
encoder_program
,
InferTaskConfig
.
max_length
,
InferTaskConfig
.
n_best
,
encoder_input_data_names
,
len
(
data
),
ModelHyperParams
.
n_head
,
ModelHyperParams
.
src_pad_idx
,
[
enc_output
.
name
],
ModelHyperParams
.
trg_pad_idx
,
ModelHyperParams
.
bos_idx
,
decoder_program
,
ModelHyperParams
.
eos_idx
)
decoder_input_data_names
,
[
predict
.
name
],
InferTaskConfig
.
beam_size
,
InferTaskConfig
.
max_length
,
InferTaskConfig
.
n_best
,
len
(
data
),
ModelHyperParams
.
n_head
,
ModelHyperParams
.
d_model
,
ModelHyperParams
.
eos_idx
,
# Use eos_idx to pad.
ModelHyperParams
.
eos_idx
,
# Use eos_idx to pad.
ModelHyperParams
.
bos_idx
,
ModelHyperParams
.
eos_idx
,
ModelHyperParams
.
unk_idx
,
output_unk
=
InferTaskConfig
.
output_unk
)
for
i
in
range
(
len
(
batch_seqs
)):
for
i
in
range
(
len
(
batch_seqs
)):
seqs
=
batch_seqs
[
i
]
# Post-process the beam-search decoded sequences.
seqs
=
map
(
post_process_seq
,
batch_seqs
[
i
])
scores
=
batch_scores
[
i
]
scores
=
batch_scores
[
i
]
for
seq
in
seqs
:
for
seq
in
seqs
:
print
(
" "
.
join
([
trg_idx2word
[
idx
]
for
idx
in
seq
]))
print
(
" "
.
join
([
trg_idx2word
[
idx
]
for
idx
in
seq
]))
...
...
fluid/neural_machine_translation/transformer/model.py
浏览文件 @
f258a876
...
@@ -7,9 +7,6 @@ import paddle.fluid.layers as layers
...
@@ -7,9 +7,6 @@ import paddle.fluid.layers as layers
from
config
import
TrainTaskConfig
,
pos_enc_param_names
,
\
from
config
import
TrainTaskConfig
,
pos_enc_param_names
,
\
encoder_input_data_names
,
decoder_input_data_names
,
label_data_names
encoder_input_data_names
,
decoder_input_data_names
,
label_data_names
# FIXME(guosheng): Remove out the batch_size from the model.
batch_size
=
TrainTaskConfig
.
batch_size
def
position_encoding_init
(
n_position
,
d_pos_vec
):
def
position_encoding_init
(
n_position
,
d_pos_vec
):
"""
"""
...
@@ -85,9 +82,10 @@ def multi_head_attention(queries,
...
@@ -85,9 +82,10 @@ def multi_head_attention(queries,
return
x
return
x
hidden_size
=
x
.
shape
[
-
1
]
hidden_size
=
x
.
shape
[
-
1
]
# FIXME(guosheng): Decouple the program desc with batch_size.
# The value 0 in shape attr means copying the corresponding dimension
# size of the input as the output dimension size.
reshaped
=
layers
.
reshape
(
reshaped
=
layers
.
reshape
(
x
=
x
,
shape
=
[
batch_size
,
-
1
,
n_head
,
hidden_size
//
n_head
])
x
=
x
,
shape
=
[
0
,
-
1
,
n_head
,
hidden_size
//
n_head
])
# permuate the dimensions into:
# permuate the dimensions into:
# [batch_size, n_head, max_sequence_len, hidden_size_per_head]
# [batch_size, n_head, max_sequence_len, hidden_size_per_head]
...
@@ -103,11 +101,11 @@ def multi_head_attention(queries,
...
@@ -103,11 +101,11 @@ def multi_head_attention(queries,
raise
ValueError
(
"Input(x) should be a 4-D Tensor."
)
raise
ValueError
(
"Input(x) should be a 4-D Tensor."
)
trans_x
=
layers
.
transpose
(
x
,
perm
=
[
0
,
2
,
1
,
3
])
trans_x
=
layers
.
transpose
(
x
,
perm
=
[
0
,
2
,
1
,
3
])
# FIXME(guosheng): Decouple the program desc with batch_size.
# The value 0 in shape attr means copying the corresponding dimension
# size of the input as the output dimension size.
return
layers
.
reshape
(
return
layers
.
reshape
(
x
=
trans_x
,
x
=
trans_x
,
shape
=
map
(
int
,
shape
=
map
(
int
,
[
0
,
-
1
,
trans_x
.
shape
[
2
]
*
trans_x
.
shape
[
3
]]))
[
batch_size
,
-
1
,
trans_x
.
shape
[
2
]
*
trans_x
.
shape
[
3
]]))
def
scaled_dot_product_attention
(
q
,
k
,
v
,
attn_bias
,
d_model
,
dropout_rate
):
def
scaled_dot_product_attention
(
q
,
k
,
v
,
attn_bias
,
d_model
,
dropout_rate
):
"""
"""
...
@@ -201,10 +199,9 @@ def prepare_encoder(src_word,
...
@@ -201,10 +199,9 @@ def prepare_encoder(src_word,
src_pos
,
src_pos
,
src_vocab_size
,
src_vocab_size
,
src_emb_dim
,
src_emb_dim
,
src_pad_idx
,
src_max_len
,
src_max_len
,
dropout_rate
=
0.
,
dropout_rate
=
0.
,
pos_pad_idx
=
0
,
src_data_shape
=
None
,
pos_enc_param_name
=
None
):
pos_enc_param_name
=
None
):
"""Add word embeddings and position encodings.
"""Add word embeddings and position encodings.
The output tensor has a shape of:
The output tensor has a shape of:
...
@@ -215,18 +212,17 @@ def prepare_encoder(src_word,
...
@@ -215,18 +212,17 @@ def prepare_encoder(src_word,
src_word_emb
=
layers
.
embedding
(
src_word_emb
=
layers
.
embedding
(
src_word
,
src_word
,
size
=
[
src_vocab_size
,
src_emb_dim
],
size
=
[
src_vocab_size
,
src_emb_dim
],
padding_idx
=
src_pad_idx
,
param_attr
=
fluid
.
initializer
.
Normal
(
0.
,
1.
))
param_attr
=
fluid
.
initializer
.
Normal
(
0.
,
1.
))
src_pos_enc
=
layers
.
embedding
(
src_pos_enc
=
layers
.
embedding
(
src_pos
,
src_pos
,
size
=
[
src_max_len
,
src_emb_dim
],
size
=
[
src_max_len
,
src_emb_dim
],
padding_idx
=
pos_pad_idx
,
param_attr
=
fluid
.
ParamAttr
(
param_attr
=
fluid
.
ParamAttr
(
name
=
pos_enc_param_name
,
trainable
=
False
))
name
=
pos_enc_param_name
,
trainable
=
False
))
enc_input
=
src_word_emb
+
src_pos_enc
enc_input
=
src_word_emb
+
src_pos_enc
enc_input
=
layers
.
reshape
(
# FIXME(guosheng): Decouple the program desc with batch_size.
x
=
enc_input
,
enc_input
=
layers
.
reshape
(
x
=
enc_input
,
shape
=
[
batch_size
,
-
1
,
src_emb_dim
])
shape
=
[
-
1
,
src_max_len
,
src_emb_dim
],
actual_shape
=
src_data_shape
)
return
layers
.
dropout
(
return
layers
.
dropout
(
enc_input
,
dropout_prob
=
dropout_rate
,
enc_input
,
dropout_prob
=
dropout_rate
,
is_test
=
False
)
if
dropout_rate
else
enc_input
is_test
=
False
)
if
dropout_rate
else
enc_input
...
@@ -401,20 +397,23 @@ def decoder(dec_input,
...
@@ -401,20 +397,23 @@ def decoder(dec_input,
def
make_inputs
(
input_data_names
,
def
make_inputs
(
input_data_names
,
n_head
,
n_head
,
d_model
,
d_model
,
batch_size
,
max_length
,
max_length
,
is_pos
,
is_pos
,
slf_attn_bias_flag
,
slf_attn_bias_flag
,
src_attn_bias_flag
,
src_attn_bias_flag
,
enc_output_flag
=
False
,
enc_output_flag
=
False
,
data_shape_flag
=
True
,
slf_attn_shape_flag
=
True
,
slf_attn_shape_flag
=
True
,
src_attn_shape_flag
=
True
):
src_attn_shape_flag
=
True
):
"""
"""
Define the input data layers for the transformer model.
Define the input data layers for the transformer model.
"""
"""
input_layers
=
[]
input_layers
=
[]
# The shapes here act as placeholder.
batch_size
=
1
# Only for the infer-shape in compile time.
# The shapes set here is to pass the infer-shape in compile time.
# The shapes here act as placeholder and are set to pass the infer-shape in
# compile time.
# The actual data shape of word is:
# [batch_size * max_len_in_batch, 1]
word
=
layers
.
data
(
word
=
layers
.
data
(
name
=
input_data_names
[
len
(
input_layers
)],
name
=
input_data_names
[
len
(
input_layers
)],
shape
=
[
batch_size
*
max_length
,
1
],
shape
=
[
batch_size
*
max_length
,
1
],
...
@@ -422,6 +421,8 @@ def make_inputs(input_data_names,
...
@@ -422,6 +421,8 @@ def make_inputs(input_data_names,
append_batch_size
=
False
)
append_batch_size
=
False
)
input_layers
+=
[
word
]
input_layers
+=
[
word
]
# This is used for position data or label weight.
# This is used for position data or label weight.
# The actual data shape of pos is:
# [batch_size * max_len_in_batch, 1]
pos
=
layers
.
data
(
pos
=
layers
.
data
(
name
=
input_data_names
[
len
(
input_layers
)],
name
=
input_data_names
[
len
(
input_layers
)],
shape
=
[
batch_size
*
max_length
,
1
],
shape
=
[
batch_size
*
max_length
,
1
],
...
@@ -432,6 +433,8 @@ def make_inputs(input_data_names,
...
@@ -432,6 +433,8 @@ def make_inputs(input_data_names,
# This input is used to remove attention weights on paddings for the
# This input is used to remove attention weights on paddings for the
# encoder and to remove attention weights on subsequent words for the
# encoder and to remove attention weights on subsequent words for the
# decoder.
# decoder.
# The actual data shape of slf_attn_bias_flag is:
# [batch_size, n_head, max_len_in_batch, max_len_in_batch]
slf_attn_bias
=
layers
.
data
(
slf_attn_bias
=
layers
.
data
(
name
=
input_data_names
[
len
(
input_layers
)],
name
=
input_data_names
[
len
(
input_layers
)],
shape
=
[
batch_size
,
n_head
,
max_length
,
max_length
],
shape
=
[
batch_size
,
n_head
,
max_length
,
max_length
],
...
@@ -439,40 +442,60 @@ def make_inputs(input_data_names,
...
@@ -439,40 +442,60 @@ def make_inputs(input_data_names,
append_batch_size
=
False
)
append_batch_size
=
False
)
input_layers
+=
[
slf_attn_bias
]
input_layers
+=
[
slf_attn_bias
]
if
src_attn_bias_flag
:
if
src_attn_bias_flag
:
# This input is used to remove attention weights on paddings.
# This input is used to remove attention weights on paddings. It's used
# in encoder-decoder attention.
# The actual data shape of slf_attn_bias_flag is:
# [batch_size, n_head, trg_max_len_in_batch, src_max_len_in_batch]
src_attn_bias
=
layers
.
data
(
src_attn_bias
=
layers
.
data
(
name
=
input_data_names
[
len
(
input_layers
)],
name
=
input_data_names
[
len
(
input_layers
)],
shape
=
[
batch_size
,
n_head
,
max_length
,
max_length
],
shape
=
[
batch_size
,
n_head
,
max_length
,
max_length
],
dtype
=
"float32"
,
dtype
=
"float32"
,
append_batch_size
=
False
)
append_batch_size
=
False
)
input_layers
+=
[
src_attn_bias
]
input_layers
+=
[
src_attn_bias
]
if
data_shape_flag
:
# This input is used to reshape the output of embedding layer.
data_shape
=
layers
.
data
(
name
=
input_data_names
[
len
(
input_layers
)],
shape
=
[
3
],
dtype
=
"int32"
,
append_batch_size
=
False
)
input_layers
+=
[
data_shape
]
if
slf_attn_shape_flag
:
if
slf_attn_shape_flag
:
# This shape input is used to reshape before softmax in self attention.
slf_attn_pre_softmax_shape
=
layers
.
data
(
slf_attn_pre_softmax_shape
=
layers
.
data
(
name
=
input_data_names
[
len
(
input_layers
)],
name
=
input_data_names
[
len
(
input_layers
)],
shape
=
[
3
],
shape
=
[
2
],
dtype
=
"int32"
,
dtype
=
"int32"
,
append_batch_size
=
False
)
append_batch_size
=
False
)
input_layers
+=
[
slf_attn_pre_softmax_shape
]
input_layers
+=
[
slf_attn_pre_softmax_shape
]
# This shape input is used to reshape after softmax in self attention.
slf_attn_post_softmax_shape
=
layers
.
data
(
slf_attn_post_softmax_shape
=
layers
.
data
(
name
=
input_data_names
[
len
(
input_layers
)],
name
=
input_data_names
[
len
(
input_layers
)],
shape
=
[
3
],
shape
=
[
4
],
dtype
=
"int32"
,
dtype
=
"int32"
,
append_batch_size
=
False
)
append_batch_size
=
False
)
input_layers
+=
[
slf_attn_post_softmax_shape
]
input_layers
+=
[
slf_attn_post_softmax_shape
]
if
src_attn_shape_flag
:
if
src_attn_shape_flag
:
# This shape input is used to reshape before softmax in encoder-decoder
# attention.
src_attn_pre_softmax_shape
=
layers
.
data
(
src_attn_pre_softmax_shape
=
layers
.
data
(
name
=
input_data_names
[
len
(
input_layers
)],
name
=
input_data_names
[
len
(
input_layers
)],
shape
=
[
3
],
shape
=
[
2
],
dtype
=
"int32"
,
dtype
=
"int32"
,
append_batch_size
=
False
)
append_batch_size
=
False
)
input_layers
+=
[
src_attn_pre_softmax_shape
]
input_layers
+=
[
src_attn_pre_softmax_shape
]
# This shape input is used to reshape after softmax in encoder-decoder
# attention.
src_attn_post_softmax_shape
=
layers
.
data
(
src_attn_post_softmax_shape
=
layers
.
data
(
name
=
input_data_names
[
len
(
input_layers
)],
name
=
input_data_names
[
len
(
input_layers
)],
shape
=
[
3
],
shape
=
[
4
],
dtype
=
"int32"
,
dtype
=
"int32"
,
append_batch_size
=
False
)
append_batch_size
=
False
)
input_layers
+=
[
src_attn_post_softmax_shape
]
input_layers
+=
[
src_attn_post_softmax_shape
]
if
enc_output_flag
:
if
enc_output_flag
:
# This input is used in independent decoder program for inference.
# The actual data shape of slf_attn_bias_flag is:
# [batch_size, max_len_in_batch, d_model]
enc_output
=
layers
.
data
(
enc_output
=
layers
.
data
(
name
=
input_data_names
[
len
(
input_layers
)],
name
=
input_data_names
[
len
(
input_layers
)],
shape
=
[
batch_size
,
max_length
,
d_model
],
shape
=
[
batch_size
,
max_length
,
d_model
],
...
@@ -493,20 +516,17 @@ def transformer(
...
@@ -493,20 +516,17 @@ def transformer(
d_value
,
d_value
,
d_model
,
d_model
,
d_inner_hid
,
d_inner_hid
,
dropout_rate
,
dropout_rate
,
):
src_pad_idx
,
enc_inputs
=
make_inputs
(
trg_pad_idx
,
pos_pad_idx
,
):
enc_input_layers
=
make_inputs
(
encoder_input_data_names
,
encoder_input_data_names
,
n_head
,
n_head
,
d_model
,
d_model
,
batch_size
,
max_length
,
max_length
,
is_pos
=
True
,
is_pos
=
True
,
slf_attn_bias_flag
=
True
,
slf_attn_bias_flag
=
True
,
src_attn_bias_flag
=
False
,
src_attn_bias_flag
=
False
,
enc_output_flag
=
False
,
enc_output_flag
=
False
,
data_shape_flag
=
True
,
slf_attn_shape_flag
=
True
,
slf_attn_shape_flag
=
True
,
src_attn_shape_flag
=
False
)
src_attn_shape_flag
=
False
)
...
@@ -520,20 +540,18 @@ def transformer(
...
@@ -520,20 +540,18 @@ def transformer(
d_model
,
d_model
,
d_inner_hid
,
d_inner_hid
,
dropout_rate
,
dropout_rate
,
src_pad_idx
,
enc_inputs
,
)
pos_pad_idx
,
enc_input_layers
,
)
dec_input
_layer
s
=
make_inputs
(
dec_inputs
=
make_inputs
(
decoder_input_data_names
,
decoder_input_data_names
,
n_head
,
n_head
,
d_model
,
d_model
,
batch_size
,
max_length
,
max_length
,
is_pos
=
True
,
is_pos
=
True
,
slf_attn_bias_flag
=
True
,
slf_attn_bias_flag
=
True
,
src_attn_bias_flag
=
True
,
src_attn_bias_flag
=
True
,
enc_output_flag
=
False
,
enc_output_flag
=
False
,
data_shape_flag
=
True
,
slf_attn_shape_flag
=
True
,
slf_attn_shape_flag
=
True
,
src_attn_shape_flag
=
True
)
src_attn_shape_flag
=
True
)
...
@@ -547,9 +565,7 @@ def transformer(
...
@@ -547,9 +565,7 @@ def transformer(
d_model
,
d_model
,
d_inner_hid
,
d_inner_hid
,
dropout_rate
,
dropout_rate
,
trg_pad_idx
,
dec_inputs
,
pos_pad_idx
,
dec_input_layers
,
enc_output
,
)
enc_output
,
)
# Padding index do not contribute to the total loss. The weights is used to
# Padding index do not contribute to the total loss. The weights is used to
...
@@ -558,17 +574,20 @@ def transformer(
...
@@ -558,17 +574,20 @@ def transformer(
label_data_names
,
label_data_names
,
n_head
,
n_head
,
d_model
,
d_model
,
batch_size
,
max_length
,
max_length
,
is_pos
=
False
,
is_pos
=
False
,
slf_attn_bias_flag
=
False
,
slf_attn_bias_flag
=
False
,
src_attn_bias_flag
=
False
,
src_attn_bias_flag
=
False
,
enc_output_flag
=
False
,
enc_output_flag
=
False
,
data_shape_flag
=
False
,
slf_attn_shape_flag
=
False
,
slf_attn_shape_flag
=
False
,
src_attn_shape_flag
=
False
)
src_attn_shape_flag
=
False
)
cost
=
layers
.
softmax_with_cross_entropy
(
logits
=
predict
,
label
=
gold
)
cost
=
layers
.
softmax_with_cross_entropy
(
logits
=
predict
,
label
=
gold
)
weighted_cost
=
cost
*
weights
weighted_cost
=
cost
*
weights
return
layers
.
reduce_sum
(
weighted_cost
),
predict
sum_cost
=
layers
.
reduce_sum
(
weighted_cost
)
token_num
=
layers
.
reduce_sum
(
weights
)
avg_cost
=
sum_cost
/
token_num
return
sum_cost
,
avg_cost
,
predict
,
token_num
def
wrap_encoder
(
src_vocab_size
,
def
wrap_encoder
(
src_vocab_size
,
...
@@ -580,38 +599,38 @@ def wrap_encoder(src_vocab_size,
...
@@ -580,38 +599,38 @@ def wrap_encoder(src_vocab_size,
d_model
,
d_model
,
d_inner_hid
,
d_inner_hid
,
dropout_rate
,
dropout_rate
,
src_pad_idx
,
enc_inputs
=
None
):
pos_pad_idx
,
enc_input_layers
=
None
):
"""
"""
The wrapper assembles together all needed layers for the encoder.
The wrapper assembles together all needed layers for the encoder.
"""
"""
if
enc_input
_layer
s
is
None
:
if
enc_inputs
is
None
:
# This is used to implement independent encoder program in inference.
# This is used to implement independent encoder program in inference.
src_word
,
src_pos
,
src_slf_attn_bias
,
slf_attn_pre_softmax_shape
,
\
src_word
,
src_pos
,
src_slf_attn_bias
,
src_data_shape
,
\
slf_attn_post_softmax_shape
=
make_inputs
(
slf_attn_pre_softmax_shape
,
slf_attn_post_softmax_shape
=
\
make_inputs
(
encoder_input_data_names
,
encoder_input_data_names
,
n_head
,
n_head
,
d_model
,
d_model
,
batch_size
,
max_length
,
max_length
,
is_pos
=
True
,
is_pos
=
True
,
slf_attn_bias_flag
=
True
,
slf_attn_bias_flag
=
True
,
src_attn_bias_flag
=
False
,
src_attn_bias_flag
=
False
,
enc_output_flag
=
False
,
enc_output_flag
=
False
,
data_shape_flag
=
True
,
slf_attn_shape_flag
=
True
,
slf_attn_shape_flag
=
True
,
src_attn_shape_flag
=
False
)
src_attn_shape_flag
=
False
)
else
:
else
:
src_word
,
src_pos
,
src_slf_attn_bias
,
slf_attn_pre_softmax_shape
,
\
src_word
,
src_pos
,
src_slf_attn_bias
,
src_data_shape
,
\
slf_attn_post_softmax_shape
=
enc_input_layers
slf_attn_pre_softmax_shape
,
slf_attn_post_softmax_shape
=
\
enc_inputs
enc_input
=
prepare_encoder
(
enc_input
=
prepare_encoder
(
src_word
,
src_word
,
src_pos
,
src_pos
,
src_vocab_size
,
src_vocab_size
,
d_model
,
d_model
,
src_pad_idx
,
max_length
,
max_length
,
dropout_rate
,
)
dropout_rate
,
src_data_shape
,
)
enc_output
=
encoder
(
enc_output
=
encoder
(
enc_input
,
enc_input
,
src_slf_attn_bias
,
src_slf_attn_bias
,
...
@@ -636,44 +655,42 @@ def wrap_decoder(trg_vocab_size,
...
@@ -636,44 +655,42 @@ def wrap_decoder(trg_vocab_size,
d_model
,
d_model
,
d_inner_hid
,
d_inner_hid
,
dropout_rate
,
dropout_rate
,
trg_pad_idx
,
dec_inputs
=
None
,
pos_pad_idx
,
dec_input_layers
=
None
,
enc_output
=
None
):
enc_output
=
None
):
"""
"""
The wrapper assembles together all needed layers for the decoder.
The wrapper assembles together all needed layers for the decoder.
"""
"""
if
dec_input
_layer
s
is
None
:
if
dec_inputs
is
None
:
# This is used to implement independent decoder program in inference.
# This is used to implement independent decoder program in inference.
trg_word
,
trg_pos
,
trg_slf_attn_bias
,
trg_src_attn_bias
,
\
trg_word
,
trg_pos
,
trg_slf_attn_bias
,
trg_src_attn_bias
,
\
slf_attn_pre_softmax_shape
,
slf_attn_post
_softmax_shape
,
\
trg_data_shape
,
slf_attn_pre
_softmax_shape
,
\
s
rc_attn_pre_softmax_shape
,
src_attn_post
_softmax_shape
,
\
s
lf_attn_post_softmax_shape
,
src_attn_pre
_softmax_shape
,
\
enc_output
=
make_inputs
(
src_attn_post_softmax_shape
,
enc_output
=
make_inputs
(
decoder_input_data_names
,
decoder_input_data_names
,
n_head
,
n_head
,
d_model
,
d_model
,
batch_size
,
max_length
,
max_length
,
is_pos
=
True
,
is_pos
=
True
,
slf_attn_bias_flag
=
True
,
slf_attn_bias_flag
=
True
,
src_attn_bias_flag
=
True
,
src_attn_bias_flag
=
True
,
enc_output_flag
=
True
,
enc_output_flag
=
True
,
data_shape_flag
=
True
,
slf_attn_shape_flag
=
True
,
slf_attn_shape_flag
=
True
,
src_attn_shape_flag
=
True
)
src_attn_shape_flag
=
True
)
else
:
else
:
trg_word
,
trg_pos
,
trg_slf_attn_bias
,
trg_src_attn_bias
,
\
trg_word
,
trg_pos
,
trg_slf_attn_bias
,
trg_src_attn_bias
,
\
slf_attn_pre_softmax_shape
,
slf_attn_post
_softmax_shape
,
\
trg_data_shape
,
slf_attn_pre
_softmax_shape
,
\
s
rc_attn_pre_softmax_shape
,
src_attn_post_softmax_shape
=
\
s
lf_attn_post_softmax_shape
,
src_attn_pre_softmax_shape
,
\
dec_input_layer
s
src_attn_post_softmax_shape
=
dec_input
s
dec_input
=
prepare_decoder
(
dec_input
=
prepare_decoder
(
trg_word
,
trg_word
,
trg_pos
,
trg_pos
,
trg_vocab_size
,
trg_vocab_size
,
d_model
,
d_model
,
trg_pad_idx
,
max_length
,
max_length
,
dropout_rate
,
)
dropout_rate
,
trg_data_shape
,
)
dec_output
=
decoder
(
dec_output
=
decoder
(
dec_input
,
dec_input
,
enc_output
,
enc_output
,
...
@@ -697,5 +714,5 @@ def wrap_decoder(trg_vocab_size,
...
@@ -697,5 +714,5 @@ def wrap_decoder(trg_vocab_size,
bias_attr
=
False
,
bias_attr
=
False
,
num_flatten_dims
=
2
),
num_flatten_dims
=
2
),
shape
=
[
-
1
,
trg_vocab_size
],
shape
=
[
-
1
,
trg_vocab_size
],
act
=
"softmax"
if
dec_input
_layer
s
is
None
else
None
)
act
=
"softmax"
if
dec_inputs
is
None
else
None
)
return
predict
return
predict
fluid/neural_machine_translation/transformer/train.py
浏览文件 @
f258a876
import
os
import
os
import
time
import
numpy
as
np
import
numpy
as
np
import
paddle
import
paddle
...
@@ -14,7 +15,7 @@ def pad_batch_data(insts,
...
@@ -14,7 +15,7 @@ def pad_batch_data(insts,
pad_idx
,
pad_idx
,
n_head
,
n_head
,
is_target
=
False
,
is_target
=
False
,
return_pos
=
Tru
e
,
is_label
=
Fals
e
,
return_attn_bias
=
True
,
return_attn_bias
=
True
,
return_max_len
=
True
):
return_max_len
=
True
):
"""
"""
...
@@ -23,14 +24,20 @@ def pad_batch_data(insts,
...
@@ -23,14 +24,20 @@ def pad_batch_data(insts,
"""
"""
return_list
=
[]
return_list
=
[]
max_len
=
max
(
len
(
inst
)
for
inst
in
insts
)
max_len
=
max
(
len
(
inst
)
for
inst
in
insts
)
# Any token included in dict can be used to pad, since the paddings' loss
# will be masked out by weights and make no effect on parameter gradients.
inst_data
=
np
.
array
(
inst_data
=
np
.
array
(
[
inst
+
[
pad_idx
]
*
(
max_len
-
len
(
inst
))
for
inst
in
insts
])
[
inst
+
[
pad_idx
]
*
(
max_len
-
len
(
inst
))
for
inst
in
insts
])
return_list
+=
[
inst_data
.
astype
(
"int64"
).
reshape
([
-
1
,
1
])]
return_list
+=
[
inst_data
.
astype
(
"int64"
).
reshape
([
-
1
,
1
])]
if
return_pos
:
if
is_label
:
# label weight
inst_pos
=
np
.
array
([[
inst_weight
=
np
.
array
(
pos_i
+
1
if
w_i
!=
pad_idx
else
0
for
pos_i
,
w_i
in
enumerate
(
inst
)
[[
1.
]
*
len
(
inst
)
+
[
0.
]
*
(
max_len
-
len
(
inst
))
for
inst
in
insts
])
]
for
inst
in
inst_data
])
return_list
+=
[
inst_weight
.
astype
(
"float32"
).
reshape
([
-
1
,
1
])]
else
:
# position data
inst_pos
=
np
.
array
([
range
(
1
,
len
(
inst
)
+
1
)
+
[
0
]
*
(
max_len
-
len
(
inst
))
for
inst
in
insts
])
return_list
+=
[
inst_pos
.
astype
(
"int64"
).
reshape
([
-
1
,
1
])]
return_list
+=
[
inst_pos
.
astype
(
"int64"
).
reshape
([
-
1
,
1
])]
if
return_attn_bias
:
if
return_attn_bias
:
if
is_target
:
if
is_target
:
...
@@ -56,7 +63,7 @@ def pad_batch_data(insts,
...
@@ -56,7 +63,7 @@ def pad_batch_data(insts,
def
prepare_batch_input
(
insts
,
input_data_names
,
src_pad_idx
,
trg_pad_idx
,
def
prepare_batch_input
(
insts
,
input_data_names
,
src_pad_idx
,
trg_pad_idx
,
max_length
,
n_head
):
n_head
,
d_model
):
"""
"""
Put all padded data needed by training into a dict.
Put all padded data needed by training into a dict.
"""
"""
...
@@ -66,6 +73,10 @@ def prepare_batch_input(insts, input_data_names, src_pad_idx, trg_pad_idx,
...
@@ -66,6 +73,10 @@ def prepare_batch_input(insts, input_data_names, src_pad_idx, trg_pad_idx,
[
inst
[
1
]
for
inst
in
insts
],
trg_pad_idx
,
n_head
,
is_target
=
True
)
[
inst
[
1
]
for
inst
in
insts
],
trg_pad_idx
,
n_head
,
is_target
=
True
)
trg_src_attn_bias
=
np
.
tile
(
src_slf_attn_bias
[:,
:,
::
src_max_len
,
:],
trg_src_attn_bias
=
np
.
tile
(
src_slf_attn_bias
[:,
:,
::
src_max_len
,
:],
[
1
,
1
,
trg_max_len
,
1
]).
astype
(
"float32"
)
[
1
,
1
,
trg_max_len
,
1
]).
astype
(
"float32"
)
# These shape tensors are used in reshape_op.
src_data_shape
=
np
.
array
([
len
(
insts
),
src_max_len
,
d_model
],
dtype
=
"int32"
)
trg_data_shape
=
np
.
array
([
len
(
insts
),
trg_max_len
,
d_model
],
dtype
=
"int32"
)
src_slf_attn_pre_softmax_shape
=
np
.
array
(
src_slf_attn_pre_softmax_shape
=
np
.
array
(
[
-
1
,
src_slf_attn_bias
.
shape
[
-
1
]],
dtype
=
"int32"
)
[
-
1
,
src_slf_attn_bias
.
shape
[
-
1
]],
dtype
=
"int32"
)
src_slf_attn_post_softmax_shape
=
np
.
array
(
src_slf_attn_post_softmax_shape
=
np
.
array
(
...
@@ -78,17 +89,24 @@ def prepare_batch_input(insts, input_data_names, src_pad_idx, trg_pad_idx,
...
@@ -78,17 +89,24 @@ def prepare_batch_input(insts, input_data_names, src_pad_idx, trg_pad_idx,
[
-
1
,
trg_src_attn_bias
.
shape
[
-
1
]],
dtype
=
"int32"
)
[
-
1
,
trg_src_attn_bias
.
shape
[
-
1
]],
dtype
=
"int32"
)
trg_src_attn_post_softmax_shape
=
np
.
array
(
trg_src_attn_post_softmax_shape
=
np
.
array
(
trg_src_attn_bias
.
shape
,
dtype
=
"int32"
)
trg_src_attn_bias
.
shape
,
dtype
=
"int32"
)
lbl_word
=
pad_batch_data
([
inst
[
2
]
for
inst
in
insts
],
trg_pad_idx
,
n_head
,
False
,
False
,
False
,
False
)
lbl_word
,
lbl_weight
=
pad_batch_data
(
lbl_weight
=
(
lbl_word
!=
trg_pad_idx
).
astype
(
"float32"
).
reshape
([
-
1
,
1
])
[
inst
[
2
]
for
inst
in
insts
],
trg_pad_idx
,
n_head
,
is_target
=
False
,
is_label
=
True
,
return_attn_bias
=
False
,
return_max_len
=
False
)
input_dict
=
dict
(
input_dict
=
dict
(
zip
(
input_data_names
,
[
zip
(
input_data_names
,
[
src_word
,
src_pos
,
src_slf_attn_bias
,
src_word
,
src_pos
,
src_slf_attn_bias
,
src_data_shape
,
src_slf_attn_pre_softmax_shape
,
src_slf_attn_post_softmax_shape
,
src_slf_attn_pre_softmax_shape
,
src_slf_attn_post_softmax_shape
,
trg_word
,
trg_pos
,
trg_slf_attn_bias
,
trg_src_attn_bias
,
trg_word
,
trg_pos
,
trg_slf_attn_bias
,
trg_src_attn_bias
,
trg_
slf_attn_pre_softmax_shape
,
trg_slf_attn_post
_softmax_shape
,
trg_
data_shape
,
trg_slf_attn_pre
_softmax_shape
,
trg_s
rc_attn_pre_softmax_shape
,
trg_src_attn_post
_softmax_shape
,
trg_s
lf_attn_post_softmax_shape
,
trg_src_attn_pre
_softmax_shape
,
lbl_word
,
lbl_weight
trg_src_attn_post_softmax_shape
,
lbl_word
,
lbl_weight
]))
]))
return
input_dict
return
input_dict
...
@@ -97,14 +115,12 @@ def main():
...
@@ -97,14 +115,12 @@ def main():
place
=
fluid
.
CUDAPlace
(
0
)
if
TrainTaskConfig
.
use_gpu
else
fluid
.
CPUPlace
()
place
=
fluid
.
CUDAPlace
(
0
)
if
TrainTaskConfig
.
use_gpu
else
fluid
.
CPUPlace
()
exe
=
fluid
.
Executor
(
place
)
exe
=
fluid
.
Executor
(
place
)
cost
,
predict
=
transformer
(
sum_cost
,
avg_cost
,
predict
,
token_num
=
transformer
(
ModelHyperParams
.
src_vocab_size
+
1
,
ModelHyperParams
.
src_vocab_size
,
ModelHyperParams
.
trg_vocab_size
,
ModelHyperParams
.
trg_vocab_size
+
1
,
ModelHyperParams
.
max_length
+
1
,
ModelHyperParams
.
max_length
+
1
,
ModelHyperParams
.
n_layer
,
ModelHyperParams
.
n_layer
,
ModelHyperParams
.
n_head
,
ModelHyperParams
.
n_head
,
ModelHyperParams
.
d_key
,
ModelHyperParams
.
d_key
,
ModelHyperParams
.
d_value
,
ModelHyperParams
.
d_value
,
ModelHyperParams
.
d_model
,
ModelHyperParams
.
d_model
,
ModelHyperParams
.
d_inner_hid
,
ModelHyperParams
.
d_inner_hid
,
ModelHyperParams
.
dropout
)
ModelHyperParams
.
dropout
,
ModelHyperParams
.
src_pad_idx
,
ModelHyperParams
.
trg_pad_idx
,
ModelHyperParams
.
pos_pad_idx
)
lr_scheduler
=
LearningRateScheduler
(
ModelHyperParams
.
d_model
,
lr_scheduler
=
LearningRateScheduler
(
ModelHyperParams
.
d_model
,
TrainTaskConfig
.
warmup_steps
,
place
,
TrainTaskConfig
.
warmup_steps
,
place
,
...
@@ -114,7 +130,7 @@ def main():
...
@@ -114,7 +130,7 @@ def main():
beta1
=
TrainTaskConfig
.
beta1
,
beta1
=
TrainTaskConfig
.
beta1
,
beta2
=
TrainTaskConfig
.
beta2
,
beta2
=
TrainTaskConfig
.
beta2
,
epsilon
=
TrainTaskConfig
.
eps
)
epsilon
=
TrainTaskConfig
.
eps
)
optimizer
.
minimize
(
cost
)
optimizer
.
minimize
(
avg_cost
if
TrainTaskConfig
.
use_avg_cost
else
sum_
cost
)
train_data
=
paddle
.
batch
(
train_data
=
paddle
.
batch
(
paddle
.
reader
.
shuffle
(
paddle
.
reader
.
shuffle
(
...
@@ -126,27 +142,31 @@ def main():
...
@@ -126,27 +142,31 @@ def main():
# Program to do validation.
# Program to do validation.
test_program
=
fluid
.
default_main_program
().
clone
()
test_program
=
fluid
.
default_main_program
().
clone
()
with
fluid
.
program_guard
(
test_program
):
with
fluid
.
program_guard
(
test_program
):
test_program
=
fluid
.
io
.
get_inference_program
([
cost
])
test_program
=
fluid
.
io
.
get_inference_program
([
avg_
cost
])
val_data
=
paddle
.
batch
(
val_data
=
paddle
.
batch
(
paddle
.
dataset
.
wmt16
.
validation
(
ModelHyperParams
.
src_vocab_size
,
paddle
.
dataset
.
wmt16
.
validation
(
ModelHyperParams
.
src_vocab_size
,
ModelHyperParams
.
trg_vocab_size
),
ModelHyperParams
.
trg_vocab_size
),
batch_size
=
TrainTaskConfig
.
batch_size
)
batch_size
=
TrainTaskConfig
.
batch_size
)
def
test
(
exe
):
def
test
(
exe
):
test_costs
=
[]
test_total_cost
=
0
test_total_token
=
0
for
batch_id
,
data
in
enumerate
(
val_data
()):
for
batch_id
,
data
in
enumerate
(
val_data
()):
if
len
(
data
)
!=
TrainTaskConfig
.
batch_size
:
continue
data_input
=
prepare_batch_input
(
data_input
=
prepare_batch_input
(
data
,
encoder_input_data_names
+
decoder_input_data_names
[:
-
1
]
+
data
,
encoder_input_data_names
+
decoder_input_data_names
[:
-
1
]
+
label_data_names
,
ModelHyperParams
.
src_pad_idx
,
label_data_names
,
ModelHyperParams
.
eos_idx
,
ModelHyperParams
.
trg_pad_idx
,
ModelHyperParams
.
max_length
,
ModelHyperParams
.
eos_idx
,
ModelHyperParams
.
n_head
,
ModelHyperParams
.
n_head
)
ModelHyperParams
.
d_model
)
test_cost
=
exe
.
run
(
test_program
,
test_sum_cost
,
test_token_num
=
exe
.
run
(
feed
=
data_input
,
test_program
,
fetch_list
=
[
cost
])[
0
]
feed
=
data_input
,
test_costs
.
append
(
test_cost
)
fetch_list
=
[
sum_cost
,
token_num
],
return
np
.
mean
(
test_costs
)
use_program_cache
=
True
)
test_total_cost
+=
test_sum_cost
test_total_token
+=
test_token_num
test_avg_cost
=
test_total_cost
/
test_total_token
test_ppl
=
np
.
exp
([
min
(
test_avg_cost
,
100
)])
return
test_avg_cost
,
test_ppl
# Initialize the parameters.
# Initialize the parameters.
exe
.
run
(
fluid
.
framework
.
default_startup_program
())
exe
.
run
(
fluid
.
framework
.
default_startup_program
())
...
@@ -158,27 +178,30 @@ def main():
...
@@ -158,27 +178,30 @@ def main():
ModelHyperParams
.
d_model
),
place
)
ModelHyperParams
.
d_model
),
place
)
for
pass_id
in
xrange
(
TrainTaskConfig
.
pass_num
):
for
pass_id
in
xrange
(
TrainTaskConfig
.
pass_num
):
pass_start_time
=
time
.
time
()
for
batch_id
,
data
in
enumerate
(
train_data
()):
for
batch_id
,
data
in
enumerate
(
train_data
()):
# The current program desc is coupled with batch_size, thus all
# mini-batches must have the same number of instances currently.
if
len
(
data
)
!=
TrainTaskConfig
.
batch_size
:
if
len
(
data
)
!=
TrainTaskConfig
.
batch_size
:
continue
continue
data_input
=
prepare_batch_input
(
data_input
=
prepare_batch_input
(
data
,
encoder_input_data_names
+
decoder_input_data_names
[:
-
1
]
+
data
,
encoder_input_data_names
+
decoder_input_data_names
[:
-
1
]
+
label_data_names
,
ModelHyperParams
.
src_pad
_idx
,
label_data_names
,
ModelHyperParams
.
eos
_idx
,
ModelHyperParams
.
trg_pad_idx
,
ModelHyperParams
.
max_length
,
ModelHyperParams
.
eos_idx
,
ModelHyperParams
.
n_head
,
ModelHyperParams
.
n_head
)
ModelHyperParams
.
d_model
)
lr_scheduler
.
update_learning_rate
(
data_input
)
lr_scheduler
.
update_learning_rate
(
data_input
)
outs
=
exe
.
run
(
fluid
.
framework
.
default_main_program
(),
outs
=
exe
.
run
(
fluid
.
framework
.
default_main_program
(),
feed
=
data_input
,
feed
=
data_input
,
fetch_list
=
[
cost
],
fetch_list
=
[
sum_cost
,
avg_
cost
],
use_program_cache
=
True
)
use_program_cache
=
True
)
cost_val
=
np
.
array
(
outs
[
0
])
sum_cost_val
,
avg_cost_val
=
np
.
array
(
outs
[
0
]),
np
.
array
(
outs
[
1
])
print
(
"pass_id = "
+
str
(
pass_id
)
+
" batch = "
+
str
(
batch_id
)
+
print
(
"epoch: %d, batch: %d, sum loss: %f, avg loss: %f, ppl: %f"
%
" cost = "
+
str
(
cost_val
))
(
pass_id
,
batch_id
,
sum_cost_val
,
avg_cost_val
,
np
.
exp
([
min
(
avg_cost_val
[
0
],
100
)])))
# Validate and save the model for inference.
# Validate and save the model for inference.
val_cost
=
test
(
exe
)
val_avg_cost
,
val_ppl
=
test
(
exe
)
print
(
"pass_id = "
+
str
(
pass_id
)
+
" val_cost = "
+
str
(
val_cost
))
pass_end_time
=
time
.
time
()
time_consumed
=
pass_end_time
-
pass_start_time
print
(
"epoch: %d, val avg loss: %f, val ppl: %f, "
"consumed %fs"
%
(
pass_id
,
val_avg_cost
,
val_ppl
,
time_consumed
))
fluid
.
io
.
save_inference_model
(
fluid
.
io
.
save_inference_model
(
os
.
path
.
join
(
TrainTaskConfig
.
model_dir
,
os
.
path
.
join
(
TrainTaskConfig
.
model_dir
,
"pass_"
+
str
(
pass_id
)
+
".infer.model"
),
"pass_"
+
str
(
pass_id
)
+
".infer.model"
),
...
...
fluid/object_detection/.gitignore
0 → 100644
浏览文件 @
f258a876
./data/pascalvoc/VOCdevkit/
data/pascalvoc/test.txt
data/pascalvoc/trainval.txt
pretrained/ssd_mobilenet_v1_coco.tar.gz
pretrained/ssd_mobilenet_v1_coco
pretrained/mobilenet_v1_imagenet.tar.gz
pretrained/mobilenet_v1_imagenet
log*
fluid/object_detection/data/p
repare_voc_data
.py
→
fluid/object_detection/data/p
ascalvoc/create_list
.py
浏览文件 @
f258a876
...
@@ -60,4 +60,5 @@ def prepare_filelist(devkit_dir, years, output_dir):
...
@@ -60,4 +60,5 @@ def prepare_filelist(devkit_dir, years, output_dir):
ftest
.
write
(
item
[
0
]
+
' '
+
item
[
1
]
+
'
\n
'
)
ftest
.
write
(
item
[
0
]
+
' '
+
item
[
1
]
+
'
\n
'
)
prepare_filelist
(
devkit_dir
,
years
,
'.'
)
if
__name__
==
'__main__'
:
prepare_filelist
(
devkit_dir
,
years
,
'.'
)
fluid/object_detection/data/pascalvoc/download.sh
0 → 100644
浏览文件 @
f258a876
DIR
=
"
$(
cd
"
$(
dirname
"
$0
"
)
"
;
pwd
-P
)
"
cd
"
$DIR
"
# Download the data.
echo
"Downloading..."
wget http://host.robots.ox.ac.uk/pascal/VOC/voc2012/VOCtrainval_11-May-2012.tar
wget http://host.robots.ox.ac.uk/pascal/VOC/voc2007/VOCtrainval_06-Nov-2007.tar
wget http://host.robots.ox.ac.uk/pascal/VOC/voc2007/VOCtest_06-Nov-2007.tar
# Extract the data.
echo
"Extractint..."
tar
-xf
VOCtrainval_11-May-2012.tar
tar
-xf
VOCtrainval_06-Nov-2007.tar
tar
-xf
VOCtest_06-Nov-2007.tar
echo
"Creating data lists..."
python create_list.py
fluid/object_detection/data/label_list
→
fluid/object_detection/data/
pascalvoc/
label_list
浏览文件 @
f258a876
文件已移动
fluid/object_detection/image_util.py
浏览文件 @
f258a876
...
@@ -85,8 +85,7 @@ def satisfy_sample_constraint(sampler, sample_bbox, bbox_labels):
...
@@ -85,8 +85,7 @@ def satisfy_sample_constraint(sampler, sample_bbox, bbox_labels):
return
False
return
False
def
generate_batch_samples
(
batch_sampler
,
bbox_labels
,
image_width
,
def
generate_batch_samples
(
batch_sampler
,
bbox_labels
):
image_height
):
sampled_bbox
=
[]
sampled_bbox
=
[]
index
=
[]
index
=
[]
c
=
0
c
=
0
...
@@ -217,8 +216,8 @@ def distort_image(img, settings):
...
@@ -217,8 +216,8 @@ def distort_image(img, settings):
def
expand_image
(
img
,
bbox_labels
,
img_width
,
img_height
,
settings
):
def
expand_image
(
img
,
bbox_labels
,
img_width
,
img_height
,
settings
):
prob
=
random
.
uniform
(
0
,
1
)
prob
=
random
.
uniform
(
0
,
1
)
if
prob
<
settings
.
_expand_prob
:
if
prob
<
settings
.
_expand_prob
:
expand_ratio
=
random
.
uniform
(
1
,
settings
.
_expand_max_ratio
)
if
_expand_max_ratio
-
1
>=
0.01
:
if
expand_ratio
-
1
>=
0.01
:
expand_ratio
=
random
.
uniform
(
1
,
settings
.
_expand_max_ratio
)
height
=
int
(
img_height
*
expand_ratio
)
height
=
int
(
img_height
*
expand_ratio
)
width
=
int
(
img_width
*
expand_ratio
)
width
=
int
(
img_width
*
expand_ratio
)
h_off
=
math
.
floor
(
random
.
uniform
(
0
,
height
-
img_height
))
h_off
=
math
.
floor
(
random
.
uniform
(
0
,
height
-
img_height
))
...
@@ -231,5 +230,5 @@ def expand_image(img, bbox_labels, img_width, img_height, settings):
...
@@ -231,5 +230,5 @@ def expand_image(img, bbox_labels, img_width, img_height, settings):
expand_img
=
Image
.
fromarray
(
expand_img
)
expand_img
=
Image
.
fromarray
(
expand_img
)
expand_img
.
paste
(
img
,
(
int
(
w_off
),
int
(
h_off
)))
expand_img
.
paste
(
img
,
(
int
(
w_off
),
int
(
h_off
)))
bbox_labels
=
transform_labels
(
bbox_labels
,
expand_bbox
)
bbox_labels
=
transform_labels
(
bbox_labels
,
expand_bbox
)
return
expand_img
,
bbox_labels
return
expand_img
,
bbox_labels
,
width
,
height
return
img
,
bbox_labels
return
img
,
bbox_labels
,
img_width
,
img_height
fluid/object_detection/load_model.py
已删除
100644 → 0
浏览文件 @
1af463bf
import
paddle.v2
as
paddle
import
paddle.fluid
as
fluid
import
numpy
as
np
# From npy
def
load_vars
():
vars
=
{}
name_map
=
{}
with
open
(
'./ssd_mobilenet_v1_coco/names.map'
,
'r'
)
as
map_file
:
for
param
in
map_file
:
fd_name
,
tf_name
=
param
.
strip
().
split
(
'
\t
'
)
name_map
[
fd_name
]
=
tf_name
tf_vars
=
np
.
load
(
'./ssd_mobilenet_v1_coco/ssd_mobilenet_v1_coco_2017_11_17.npy'
).
item
()
for
fd_name
in
name_map
:
tf_name
=
name_map
[
fd_name
]
tf_var
=
tf_vars
[
tf_name
]
if
len
(
tf_var
.
shape
)
==
4
and
'depthwise'
in
tf_name
:
vars
[
fd_name
]
=
np
.
transpose
(
tf_var
,
(
2
,
3
,
0
,
1
))
elif
len
(
tf_var
.
shape
)
==
4
:
vars
[
fd_name
]
=
np
.
transpose
(
tf_var
,
(
3
,
2
,
0
,
1
))
else
:
vars
[
fd_name
]
=
tf_var
return
vars
def
load_and_set_vars
(
place
):
vars
=
load_vars
()
for
k
,
v
in
vars
.
items
():
t
=
fluid
.
global_scope
().
find_var
(
k
).
get_tensor
()
#print(np.array(t).shape, v.shape, k)
assert
np
.
array
(
t
).
shape
==
v
.
shape
t
.
set
(
v
,
place
)
# From Paddle V1
def
load_paddlev1_vars
(
place
):
vars
=
{}
name_map
=
{}
with
open
(
'./caffe2paddle/names.map'
,
'r'
)
as
map_file
:
for
param
in
map_file
:
fd_name
,
tf_name
=
param
.
strip
().
split
(
'
\t
'
)
name_map
[
fd_name
]
=
tf_name
from
operator
import
mul
def
load
(
file_name
,
shape
):
with
open
(
file_name
,
'rb'
)
as
f
:
f
.
read
(
16
)
arr
=
np
.
fromfile
(
f
,
dtype
=
np
.
float32
)
#print(arr.size, reduce(mul, shape), file_name)
assert
arr
.
size
==
reduce
(
mul
,
shape
)
return
arr
.
reshape
(
shape
)
for
fd_name
in
name_map
:
v1_name
=
name_map
[
fd_name
]
t
=
fluid
.
global_scope
().
find_var
(
fd_name
).
get_tensor
()
shape
=
np
.
array
(
t
).
shape
v1_var
=
load
(
'./caffe2paddle/'
+
v1_name
,
shape
)
t
.
set
(
v1_var
,
place
)
if
__name__
==
"__main__"
:
load_vars
()
fluid/object_detection/mobilenet_ssd.py
浏览文件 @
f258a876
...
@@ -27,12 +27,7 @@ def conv_bn(input,
...
@@ -27,12 +27,7 @@ def conv_bn(input,
bias_attr
=
False
)
bias_attr
=
False
)
parameter_attr
=
ParamAttr
(
learning_rate
=
0.1
,
initializer
=
MSRA
())
parameter_attr
=
ParamAttr
(
learning_rate
=
0.1
,
initializer
=
MSRA
())
bias_attr
=
ParamAttr
(
learning_rate
=
0.2
)
bias_attr
=
ParamAttr
(
learning_rate
=
0.2
)
return
fluid
.
layers
.
batch_norm
(
return
fluid
.
layers
.
batch_norm
(
input
=
conv
,
act
=
act
)
input
=
conv
,
act
=
act
,
epsilon
=
0.00001
,
param_attr
=
parameter_attr
,
bias_attr
=
bias_attr
)
def
depthwise_separable
(
input
,
num_filters1
,
num_filters2
,
num_groups
,
stride
,
def
depthwise_separable
(
input
,
num_filters1
,
num_filters2
,
num_groups
,
stride
,
...
@@ -76,7 +71,7 @@ def extra_block(input, num_filters1, num_filters2, num_groups, stride, scale):
...
@@ -76,7 +71,7 @@ def extra_block(input, num_filters1, num_filters2, num_groups, stride, scale):
return
normal_conv
return
normal_conv
def
mobile_net
(
img
,
img_shape
,
scale
=
1.0
):
def
mobile_net
(
num_classes
,
img
,
img_shape
,
scale
=
1.0
):
# 300x300
# 300x300
tmp
=
conv_bn
(
img
,
3
,
int
(
32
*
scale
),
2
,
1
,
3
)
tmp
=
conv_bn
(
img
,
3
,
int
(
32
*
scale
),
2
,
1
,
3
)
# 150x150
# 150x150
...
@@ -104,10 +99,11 @@ def mobile_net(img, img_shape, scale=1.0):
...
@@ -104,10 +99,11 @@ def mobile_net(img, img_shape, scale=1.0):
module16
=
extra_block
(
module15
,
128
,
256
,
1
,
2
,
scale
)
module16
=
extra_block
(
module15
,
128
,
256
,
1
,
2
,
scale
)
# 2x2
# 2x2
module17
=
extra_block
(
module16
,
64
,
128
,
1
,
2
,
scale
)
module17
=
extra_block
(
module16
,
64
,
128
,
1
,
2
,
scale
)
mbox_locs
,
mbox_confs
,
box
,
box_var
=
fluid
.
layers
.
multi_box_head
(
mbox_locs
,
mbox_confs
,
box
,
box_var
=
fluid
.
layers
.
multi_box_head
(
inputs
=
[
module11
,
module13
,
module14
,
module15
,
module16
,
module17
],
inputs
=
[
module11
,
module13
,
module14
,
module15
,
module16
,
module17
],
image
=
img
,
image
=
img
,
num_classes
=
21
,
num_classes
=
num_classes
,
min_ratio
=
20
,
min_ratio
=
20
,
max_ratio
=
90
,
max_ratio
=
90
,
min_sizes
=
[
60.0
,
105.0
,
150.0
,
195.0
,
240.0
,
285.0
],
min_sizes
=
[
60.0
,
105.0
,
150.0
,
195.0
,
240.0
,
285.0
],
...
...
fluid/object_detection/pretrained/download_coco.sh
0 → 100644
浏览文件 @
f258a876
DIR
=
"
$(
cd
"
$(
dirname
"
$0
"
)
"
;
pwd
-P
)
"
cd
"
$DIR
"
# Download the data.
echo
"Downloading..."
wget http://paddlemodels.bj.bcebos.com/ssd_mobilenet_v1_coco.tar.gz
echo
"Extractint..."
tar
-xf
ssd_mobilenet_v1_coco.tar.gz
fluid/object_detection/pretrained/download_imagenet.sh
0 → 100644
浏览文件 @
f258a876
DIR
=
"
$(
cd
"
$(
dirname
"
$0
"
)
"
;
pwd
-P
)
"
cd
"
$DIR
"
# Download the data.
echo
"Downloading..."
wget http://paddlemodels.bj.bcebos.com/mobilenet_v1_imagenet.tar.gz
echo
"Extractint..."
tar
-xf
mobilenet_v1_imagenet.tar.gz
fluid/object_detection/reader.py
浏览文件 @
f258a876
...
@@ -16,19 +16,25 @@ import image_util
...
@@ -16,19 +16,25 @@ import image_util
from
paddle.utils.image_util
import
*
from
paddle.utils.image_util
import
*
import
random
import
random
from
PIL
import
Image
from
PIL
import
Image
from
PIL
import
ImageDraw
import
numpy
as
np
import
numpy
as
np
import
xml.etree.ElementTree
import
xml.etree.ElementTree
import
os
import
os
import
time
import
copy
class
Settings
(
object
):
class
Settings
(
object
):
def
__init__
(
self
,
data_dir
,
label_file
,
resize_h
,
resize_w
,
mean_value
,
def
__init__
(
self
,
dataset
,
toy
,
data_dir
,
label_file
,
resize_h
,
resize_w
,
apply_distort
,
apply_expand
):
mean_value
,
apply_distort
,
apply_expand
):
self
.
_dataset
=
dataset
self
.
_toy
=
toy
self
.
_data_dir
=
data_dir
self
.
_data_dir
=
data_dir
self
.
_label_list
=
[]
if
dataset
==
"pascalvoc"
:
label_fpath
=
os
.
path
.
join
(
data_dir
,
label_file
)
self
.
_label_list
=
[]
for
line
in
open
(
label_fpath
):
label_fpath
=
os
.
path
.
join
(
data_dir
,
label_file
)
self
.
_label_list
.
append
(
line
.
strip
())
for
line
in
open
(
label_fpath
):
self
.
_label_list
.
append
(
line
.
strip
())
self
.
_apply_distort
=
apply_distort
self
.
_apply_distort
=
apply_distort
self
.
_apply_expand
=
apply_expand
self
.
_apply_expand
=
apply_expand
...
@@ -47,6 +53,14 @@ class Settings(object):
...
@@ -47,6 +53,14 @@ class Settings(object):
self
.
_brightness_prob
=
0.5
self
.
_brightness_prob
=
0.5
self
.
_brightness_delta
=
0.125
self
.
_brightness_delta
=
0.125
@
property
def
dataset
(
self
):
return
self
.
_dataset
@
property
def
toy
(
self
):
return
self
.
_toy
@
property
@
property
def
apply_distort
(
self
):
def
apply_distort
(
self
):
return
self
.
_apply_expand
return
self
.
_apply_expand
...
@@ -59,6 +73,10 @@ class Settings(object):
...
@@ -59,6 +73,10 @@ class Settings(object):
def
data_dir
(
self
):
def
data_dir
(
self
):
return
self
.
_data_dir
return
self
.
_data_dir
@
data_dir
.
setter
def
data_dir
(
self
,
data_dir
):
self
.
_data_dir
=
data_dir
@
property
@
property
def
label_list
(
self
):
def
label_list
(
self
):
return
self
.
_label_list
return
self
.
_label_list
...
@@ -78,23 +96,76 @@ class Settings(object):
...
@@ -78,23 +96,76 @@ class Settings(object):
def
_reader_creator
(
settings
,
file_list
,
mode
,
shuffle
):
def
_reader_creator
(
settings
,
file_list
,
mode
,
shuffle
):
def
reader
():
def
reader
():
with
open
(
file_list
)
as
flist
:
if
settings
.
dataset
==
'coco'
:
lines
=
[
line
.
strip
()
for
line
in
flist
]
# cocoapi
if
shuffle
:
from
pycocotools.coco
import
COCO
random
.
shuffle
(
lines
)
from
pycocotools.cocoeval
import
COCOeval
for
line
in
lines
:
coco
=
COCO
(
file_list
)
image_ids
=
coco
.
getImgIds
()
images
=
coco
.
loadImgs
(
image_ids
)
category_ids
=
coco
.
getCatIds
()
category_names
=
[
item
[
'name'
]
for
item
in
coco
.
loadCats
(
category_ids
)
]
elif
settings
.
dataset
==
'pascalvoc'
:
flist
=
open
(
file_list
)
images
=
[
line
.
strip
()
for
line
in
flist
]
if
not
settings
.
toy
==
0
:
images
=
images
[:
settings
.
toy
]
if
len
(
images
)
>
settings
.
toy
else
images
print
(
"{} on {} with {} images"
.
format
(
mode
,
settings
.
dataset
,
len
(
images
)))
if
shuffle
:
random
.
shuffle
(
images
)
for
image
in
images
:
if
settings
.
dataset
==
'coco'
:
image_name
=
image
[
'file_name'
]
image_path
=
os
.
path
.
join
(
settings
.
data_dir
,
image_name
)
elif
settings
.
dataset
==
'pascalvoc'
:
if
mode
==
'train'
or
mode
==
'test'
:
if
mode
==
'train'
or
mode
==
'test'
:
im
g_path
,
label_path
=
lin
e
.
split
()
im
age_path
,
label_path
=
imag
e
.
split
()
im
g_path
=
os
.
path
.
join
(
settings
.
data_dir
,
img
_path
)
im
age_path
=
os
.
path
.
join
(
settings
.
data_dir
,
image
_path
)
label_path
=
os
.
path
.
join
(
settings
.
data_dir
,
label_path
)
label_path
=
os
.
path
.
join
(
settings
.
data_dir
,
label_path
)
elif
mode
==
'infer'
:
elif
mode
==
'infer'
:
im
g_path
=
os
.
path
.
join
(
settings
.
data_dir
,
lin
e
)
im
age_path
=
os
.
path
.
join
(
settings
.
data_dir
,
imag
e
)
img
=
Image
.
open
(
img_path
)
img
=
Image
.
open
(
image_path
)
img_width
,
img_height
=
img
.
size
if
img
.
mode
==
'L'
:
img
=
img
.
convert
(
'RGB'
)
img_width
,
img_height
=
img
.
size
# layout: label | xmin | ymin | xmax | ymax | difficult
if
mode
==
'train'
or
mode
==
'test'
:
if
mode
==
'train'
or
mode
==
'test'
:
if
settings
.
dataset
==
'coco'
:
# layout: category_id | xmin | ymin | xmax | ymax | iscrowd | origin_coco_bbox | segmentation | area | image_id | annotation_id
bbox_labels
=
[]
annIds
=
coco
.
getAnnIds
(
imgIds
=
image
[
'id'
])
anns
=
coco
.
loadAnns
(
annIds
)
for
ann
in
anns
:
bbox_sample
=
[]
# start from 1, leave 0 to background
bbox_sample
.
append
(
float
(
category_ids
.
index
(
ann
[
'category_id'
]))
+
1
)
bbox
=
ann
[
'bbox'
]
xmin
,
ymin
,
w
,
h
=
bbox
xmax
=
xmin
+
w
ymax
=
ymin
+
h
bbox_sample
.
append
(
float
(
xmin
)
/
img_width
)
bbox_sample
.
append
(
float
(
ymin
)
/
img_height
)
bbox_sample
.
append
(
float
(
xmax
)
/
img_width
)
bbox_sample
.
append
(
float
(
ymax
)
/
img_height
)
bbox_sample
.
append
(
float
(
ann
[
'iscrowd'
]))
#bbox_sample.append(ann['bbox'])
#bbox_sample.append(ann['segmentation'])
#bbox_sample.append(ann['area'])
#bbox_sample.append(ann['image_id'])
#bbox_sample.append(ann['id'])
bbox_labels
.
append
(
bbox_sample
)
elif
settings
.
dataset
==
'pascalvoc'
:
# layout: label | xmin | ymin | xmax | ymax | difficult
bbox_labels
=
[]
bbox_labels
=
[]
root
=
xml
.
etree
.
ElementTree
.
parse
(
label_path
).
getroot
()
root
=
xml
.
etree
.
ElementTree
.
parse
(
label_path
).
getroot
()
for
object
in
root
.
findall
(
'object'
):
for
object
in
root
.
findall
(
'object'
):
...
@@ -117,91 +188,136 @@ def _reader_creator(settings, file_list, mode, shuffle):
...
@@ -117,91 +188,136 @@ def _reader_creator(settings, file_list, mode, shuffle):
bbox_sample
.
append
(
difficult
)
bbox_sample
.
append
(
difficult
)
bbox_labels
.
append
(
bbox_sample
)
bbox_labels
.
append
(
bbox_sample
)
sample_labels
=
bbox_labels
sample_labels
=
bbox_labels
if
mode
==
'train'
:
if
settings
.
_apply_distort
:
img
=
image_util
.
distort_image
(
img
,
settings
)
if
settings
.
_apply_expand
:
img
,
bbox_labels
=
image_util
.
expand_image
(
img
,
bbox_labels
,
img_width
,
img_height
,
settings
)
batch_sampler
=
[]
# hard-code here
batch_sampler
.
append
(
image_util
.
sampler
(
1
,
1
,
1.0
,
1.0
,
1.0
,
1.0
,
0.0
,
0.0
))
batch_sampler
.
append
(
image_util
.
sampler
(
1
,
50
,
0.3
,
1.0
,
0.5
,
2.0
,
0.1
,
0.0
))
batch_sampler
.
append
(
image_util
.
sampler
(
1
,
50
,
0.3
,
1.0
,
0.5
,
2.0
,
0.3
,
0.0
))
batch_sampler
.
append
(
image_util
.
sampler
(
1
,
50
,
0.3
,
1.0
,
0.5
,
2.0
,
0.5
,
0.0
))
batch_sampler
.
append
(
image_util
.
sampler
(
1
,
50
,
0.3
,
1.0
,
0.5
,
2.0
,
0.7
,
0.0
))
batch_sampler
.
append
(
image_util
.
sampler
(
1
,
50
,
0.3
,
1.0
,
0.5
,
2.0
,
0.9
,
0.0
))
batch_sampler
.
append
(
image_util
.
sampler
(
1
,
50
,
0.3
,
1.0
,
0.5
,
2.0
,
0.0
,
1.0
))
""" random crop """
sampled_bbox
=
image_util
.
generate_batch_samples
(
batch_sampler
,
bbox_labels
,
img_width
,
img_height
)
img
=
np
.
array
(
img
)
if
len
(
sampled_bbox
)
>
0
:
idx
=
int
(
random
.
uniform
(
0
,
len
(
sampled_bbox
)))
img
,
sample_labels
=
image_util
.
crop_image
(
img
,
bbox_labels
,
sampled_bbox
[
idx
],
img_width
,
img_height
)
img
=
Image
.
fromarray
(
img
)
img
=
img
.
resize
((
settings
.
resize_w
,
settings
.
resize_h
),
Image
.
ANTIALIAS
)
img
=
np
.
array
(
img
)
if
mode
==
'train'
:
if
mode
==
'train'
:
mirror
=
int
(
random
.
uniform
(
0
,
2
))
if
settings
.
_apply_distort
:
if
mirror
==
1
:
img
=
image_util
.
distort_image
(
img
,
settings
)
img
=
img
[:,
::
-
1
,
:]
if
settings
.
_apply_expand
:
for
i
in
xrange
(
len
(
sample_labels
)):
img
,
bbox_labels
,
img_width
,
img_height
=
image_util
.
expand_image
(
tmp
=
sample_labels
[
i
][
1
]
img
,
bbox_labels
,
img_width
,
img_height
,
settings
)
sample_labels
[
i
][
1
]
=
1
-
sample_labels
[
i
][
3
]
batch_sampler
=
[]
sample_labels
[
i
][
3
]
=
1
-
tmp
# hard-code here
batch_sampler
.
append
(
if
len
(
img
.
shape
)
==
3
:
image_util
.
sampler
(
1
,
1
,
1.0
,
1.0
,
1.0
,
1.0
,
0.0
,
0.0
))
img
=
np
.
swapaxes
(
img
,
1
,
2
)
batch_sampler
.
append
(
img
=
np
.
swapaxes
(
img
,
1
,
0
)
image_util
.
sampler
(
1
,
50
,
0.3
,
1.0
,
0.5
,
2.0
,
0.1
,
0.0
))
batch_sampler
.
append
(
img
=
img
[[
2
,
1
,
0
],
:,
:]
image_util
.
sampler
(
1
,
50
,
0.3
,
1.0
,
0.5
,
2.0
,
0.3
,
0.0
))
img
=
img
.
astype
(
'float32'
)
batch_sampler
.
append
(
img
-=
settings
.
img_mean
image_util
.
sampler
(
1
,
50
,
0.3
,
1.0
,
0.5
,
2.0
,
0.5
,
0.0
))
img
=
img
.
flatten
()
batch_sampler
.
append
(
img
=
img
*
0.007843
image_util
.
sampler
(
1
,
50
,
0.3
,
1.0
,
0.5
,
2.0
,
0.7
,
0.0
))
batch_sampler
.
append
(
sample_labels
=
np
.
array
(
sample_labels
)
image_util
.
sampler
(
1
,
50
,
0.3
,
1.0
,
0.5
,
2.0
,
0.9
,
0.0
))
if
mode
==
'train'
or
mode
==
'test'
:
batch_sampler
.
append
(
if
mode
==
'train'
and
len
(
sample_labels
)
==
0
:
continue
image_util
.
sampler
(
1
,
50
,
0.3
,
1.0
,
0.5
,
2.0
,
0.0
,
1.0
))
yield
img
.
astype
(
""" random crop """
'float32'
sampled_bbox
=
image_util
.
generate_batch_samples
(
),
sample_labels
[:,
1
:
5
],
sample_labels
[:,
0
].
astype
(
batch_sampler
,
bbox_labels
,
img_width
,
img_height
)
'int32'
),
sample_labels
[:,
-
1
].
astype
(
'int32'
)
elif
mode
==
'infer'
:
img
=
np
.
array
(
img
)
yield
img
.
astype
(
'float32'
)
if
len
(
sampled_bbox
)
>
0
:
idx
=
int
(
random
.
uniform
(
0
,
len
(
sampled_bbox
)))
img
,
sample_labels
=
image_util
.
crop_image
(
img
,
bbox_labels
,
sampled_bbox
[
idx
],
img_width
,
img_height
)
img
=
Image
.
fromarray
(
img
)
img
=
img
.
resize
((
settings
.
resize_w
,
settings
.
resize_h
),
Image
.
ANTIALIAS
)
img
=
np
.
array
(
img
)
if
mode
==
'train'
:
mirror
=
int
(
random
.
uniform
(
0
,
2
))
if
mirror
==
1
:
img
=
img
[:,
::
-
1
,
:]
for
i
in
xrange
(
len
(
sample_labels
)):
tmp
=
sample_labels
[
i
][
1
]
sample_labels
[
i
][
1
]
=
1
-
sample_labels
[
i
][
3
]
sample_labels
[
i
][
3
]
=
1
-
tmp
# HWC to CHW
if
len
(
img
.
shape
)
==
3
:
img
=
np
.
swapaxes
(
img
,
1
,
2
)
img
=
np
.
swapaxes
(
img
,
1
,
0
)
# RBG to BGR
img
=
img
[[
2
,
1
,
0
],
:,
:]
img
=
img
.
astype
(
'float32'
)
img
-=
settings
.
img_mean
img
=
img
.
flatten
()
img
=
img
*
0.007843
sample_labels
=
np
.
array
(
sample_labels
)
if
mode
==
'train'
or
mode
==
'test'
:
if
mode
==
'train'
and
len
(
sample_labels
)
==
0
:
continue
if
mode
==
'test'
and
len
(
sample_labels
)
==
0
:
continue
yield
img
.
astype
(
'float32'
),
sample_labels
[:,
1
:
5
],
sample_labels
[:,
0
].
astype
(
'int32'
),
sample_labels
[:,
-
1
].
astype
(
'int32'
)
elif
mode
==
'infer'
:
yield
img
.
astype
(
'float32'
)
return
reader
return
reader
def
draw_bounding_box_on_image
(
image
,
sample_labels
,
image_name
,
category_names
,
color
=
'red'
,
thickness
=
4
,
with_text
=
True
,
normalized
=
True
):
image
=
Image
.
fromarray
(
image
)
draw
=
ImageDraw
.
Draw
(
image
)
im_width
,
im_height
=
image
.
size
if
not
normalized
:
im_width
,
im_height
=
1
,
1
for
item
in
sample_labels
:
label
=
item
[
0
]
category_name
=
category_names
[
int
(
label
)]
bbox
=
item
[
1
:
5
]
xmin
,
ymin
,
xmax
,
ymax
=
bbox
(
left
,
right
,
top
,
bottom
)
=
(
xmin
*
im_width
,
xmax
*
im_width
,
ymin
*
im_height
,
ymax
*
im_height
)
draw
.
line
(
[(
left
,
top
),
(
left
,
bottom
),
(
right
,
bottom
),
(
right
,
top
),
(
left
,
top
)],
width
=
thickness
,
fill
=
color
)
if
with_text
:
if
image
.
mode
==
'RGB'
:
draw
.
text
((
left
,
top
),
category_name
,
(
255
,
255
,
0
))
image
.
save
(
image_name
)
def
train
(
settings
,
file_list
,
shuffle
=
True
):
def
train
(
settings
,
file_list
,
shuffle
=
True
):
return
_reader_creator
(
settings
,
file_list
,
'train'
,
shuffle
)
file_list
=
os
.
path
.
join
(
settings
.
data_dir
,
file_list
)
if
settings
.
dataset
==
'coco'
:
train_settings
=
copy
.
copy
(
settings
)
if
'2014'
in
file_list
:
sub_dir
=
"train2014"
elif
'2017'
in
file_list
:
sub_dir
=
"train2017"
train_settings
.
data_dir
=
os
.
path
.
join
(
settings
.
data_dir
,
sub_dir
)
return
_reader_creator
(
train_settings
,
file_list
,
'train'
,
shuffle
)
elif
settings
.
dataset
==
'pascalvoc'
:
return
_reader_creator
(
settings
,
file_list
,
'train'
,
shuffle
)
def
test
(
settings
,
file_list
):
def
test
(
settings
,
file_list
):
return
_reader_creator
(
settings
,
file_list
,
'test'
,
False
)
file_list
=
os
.
path
.
join
(
settings
.
data_dir
,
file_list
)
if
settings
.
dataset
==
'coco'
:
test_settings
=
copy
.
copy
(
settings
)
if
'2014'
in
file_list
:
sub_dir
=
"val2014"
elif
'2017'
in
file_list
:
sub_dir
=
"val2017"
test_settings
.
data_dir
=
os
.
path
.
join
(
settings
.
data_dir
,
sub_dir
)
return
_reader_creator
(
test_settings
,
file_list
,
'test'
,
False
)
elif
settings
.
dataset
==
'pascalvoc'
:
return
_reader_creator
(
settings
,
file_list
,
'test'
,
False
)
def
infer
(
settings
,
file_list
):
def
infer
(
settings
,
file_list
):
...
...
fluid/object_detection/train.py
浏览文件 @
f258a876
import
paddle
.v2
as
paddle
import
paddle
import
paddle.fluid
as
fluid
import
paddle.fluid
as
fluid
import
reader
import
reader
import
load_model
as
load_model
import
load_model
as
load_model
from
mobilenet_ssd
import
mobile_net
from
mobilenet_ssd
import
mobile_net
from
utility
import
add_arguments
,
print_arguments
from
utility
import
add_arguments
,
print_arguments
import
os
import
os
import
time
import
numpy
as
np
import
numpy
as
np
import
argparse
import
argparse
import
functools
import
functools
...
@@ -12,22 +13,40 @@ import functools
...
@@ -12,22 +13,40 @@ import functools
parser
=
argparse
.
ArgumentParser
(
description
=
__doc__
)
parser
=
argparse
.
ArgumentParser
(
description
=
__doc__
)
add_arg
=
functools
.
partial
(
add_arguments
,
argparser
=
parser
)
add_arg
=
functools
.
partial
(
add_arguments
,
argparser
=
parser
)
# yapf: disable
# yapf: disable
add_arg
(
'batch_size'
,
int
,
32
,
"Minibatch size."
)
add_arg
(
'learning_rate'
,
float
,
0.001
,
"Learning rate."
)
add_arg
(
'parallel'
,
bool
,
True
,
"Whether use parallel training."
)
add_arg
(
'batch_size'
,
int
,
32
,
"Minibatch size."
)
add_arg
(
'use_gpu'
,
bool
,
True
,
"Whether use GPU."
)
add_arg
(
'num_passes'
,
int
,
25
,
"Epoch number."
)
add_arg
(
'parallel'
,
bool
,
True
,
"Whether use parallel training."
)
add_arg
(
'use_gpu'
,
bool
,
True
,
"Whether use GPU."
)
add_arg
(
'use_nccl'
,
bool
,
False
,
"Whether use NCCL."
)
add_arg
(
'dataset'
,
str
,
'pascalvoc'
,
"coco or pascalvoc."
)
add_arg
(
'model_save_dir'
,
str
,
'model'
,
"The path to save model."
)
add_arg
(
'pretrained_model'
,
str
,
'pretrained/ssd_mobilenet_v1_coco/'
,
"The init model path."
)
add_arg
(
'apply_distort'
,
bool
,
True
,
"Whether apply distort"
)
add_arg
(
'apply_expand'
,
bool
,
False
,
"Whether appley expand"
)
add_arg
(
'resize_h'
,
int
,
300
,
"resize image size"
)
add_arg
(
'resize_w'
,
int
,
300
,
"resize image size"
)
add_arg
(
'mean_value_B'
,
float
,
127.5
,
"mean value which will be subtracted"
)
#123.68
add_arg
(
'mean_value_G'
,
float
,
127.5
,
"mean value which will be subtracted"
)
#116.78
add_arg
(
'mean_value_R'
,
float
,
127.5
,
"mean value which will be subtracted"
)
#103.94
add_arg
(
'is_toy'
,
int
,
0
,
"Toy for quick debug, 0 means using all data, while n means using only n sample"
)
# yapf: disable
# yapf: disable
def
train
(
args
,
def
parallel_do
(
args
,
train_file_list
,
train_file_list
,
val_file_list
,
val_file_list
,
data_args
,
data_args
,
learning_rate
,
learning_rate
,
batch_size
,
batch_size
,
num_passes
,
num_passes
,
model_save_dir
=
'model'
,
model_save_dir
,
init_model_path
=
None
):
pretrained_model
=
None
):
image_shape
=
[
3
,
data_args
.
resize_h
,
data_args
.
resize_w
]
image_shape
=
[
3
,
data_args
.
resize_h
,
data_args
.
resize_w
]
if
data_args
.
dataset
==
'coco'
:
num_classes
=
81
elif
data_args
.
dataset
==
'pascalvoc'
:
num_classes
=
21
image
=
fluid
.
layers
.
data
(
name
=
'image'
,
shape
=
image_shape
,
dtype
=
'float32'
)
image
=
fluid
.
layers
.
data
(
name
=
'image'
,
shape
=
image_shape
,
dtype
=
'float32'
)
gt_box
=
fluid
.
layers
.
data
(
gt_box
=
fluid
.
layers
.
data
(
...
@@ -39,15 +58,16 @@ def train(args,
...
@@ -39,15 +58,16 @@ def train(args,
if
args
.
parallel
:
if
args
.
parallel
:
places
=
fluid
.
layers
.
get_places
()
places
=
fluid
.
layers
.
get_places
()
pd
=
fluid
.
layers
.
ParallelDo
(
places
)
pd
=
fluid
.
layers
.
ParallelDo
(
places
,
use_nccl
=
args
.
use_nccl
)
with
pd
.
do
():
with
pd
.
do
():
image_
=
pd
.
read_input
(
image
)
image_
=
pd
.
read_input
(
image
)
gt_box_
=
pd
.
read_input
(
gt_box
)
gt_box_
=
pd
.
read_input
(
gt_box
)
gt_label_
=
pd
.
read_input
(
gt_label
)
gt_label_
=
pd
.
read_input
(
gt_label
)
difficult_
=
pd
.
read_input
(
difficult
)
difficult_
=
pd
.
read_input
(
difficult
)
locs
,
confs
,
box
,
box_var
=
mobile_net
(
image_
,
image_shape
)
locs
,
confs
,
box
,
box_var
=
mobile_net
(
num_classes
,
image_
,
loss
=
fluid
.
layers
.
ssd_loss
(
locs
,
confs
,
gt_box_
,
gt_label_
,
image_shape
)
box
,
box_var
)
loss
=
fluid
.
layers
.
ssd_loss
(
locs
,
confs
,
gt_box_
,
gt_label_
,
box
,
box_var
)
nmsed_out
=
fluid
.
layers
.
detection_output
(
nmsed_out
=
fluid
.
layers
.
detection_output
(
locs
,
confs
,
box
,
box_var
,
nms_threshold
=
0.45
)
locs
,
confs
,
box
,
box_var
,
nms_threshold
=
0.45
)
loss
=
fluid
.
layers
.
reduce_sum
(
loss
)
loss
=
fluid
.
layers
.
reduce_sum
(
loss
)
...
@@ -57,11 +77,11 @@ def train(args,
...
@@ -57,11 +77,11 @@ def train(args,
loss
,
nmsed_out
=
pd
()
loss
,
nmsed_out
=
pd
()
loss
=
fluid
.
layers
.
mean
(
loss
)
loss
=
fluid
.
layers
.
mean
(
loss
)
else
:
else
:
locs
,
confs
,
box
,
box_var
=
mobile_net
(
image
,
image_shape
)
locs
,
confs
,
box
,
box_var
=
mobile_net
(
num_classes
,
image
,
image_shape
)
nmsed_out
=
fluid
.
layers
.
detection_output
(
nmsed_out
=
fluid
.
layers
.
detection_output
(
locs
,
confs
,
box
,
box_var
,
nms_threshold
=
0.45
)
locs
,
confs
,
box
,
box_var
,
nms_threshold
=
0.45
)
loss
=
fluid
.
layers
.
ssd_loss
(
locs
,
confs
,
gt_box
,
gt_label
,
loss
=
fluid
.
layers
.
ssd_loss
(
locs
,
confs
,
gt_box
,
gt_label
,
box
,
box
,
box
_var
)
box_var
)
loss
=
fluid
.
layers
.
reduce_sum
(
loss
)
loss
=
fluid
.
layers
.
reduce_sum
(
loss
)
test_program
=
fluid
.
default_main_program
().
clone
(
for_test
=
True
)
test_program
=
fluid
.
default_main_program
().
clone
(
for_test
=
True
)
...
@@ -71,13 +91,20 @@ def train(args,
...
@@ -71,13 +91,20 @@ def train(args,
gt_label
,
gt_label
,
gt_box
,
gt_box
,
difficult
,
difficult
,
21
,
num_classes
,
overlap_threshold
=
0.5
,
overlap_threshold
=
0.5
,
evaluate_difficult
=
False
,
evaluate_difficult
=
False
,
ap_version
=
'
11point
'
)
ap_version
=
'
integral
'
)
boundaries
=
[
40000
,
60000
]
if
data_args
.
dataset
==
'coco'
:
values
=
[
0.001
,
0.0005
,
0.00025
]
# learning rate decay in 12, 19 pass, respectively
if
'2014'
in
train_file_list
:
boundaries
=
[
82783
/
batch_size
*
12
,
82783
/
batch_size
*
19
]
elif
'2017'
in
train_file_list
:
boundaries
=
[
118287
/
batch_size
*
12
,
118287
/
batch_size
*
19
]
elif
data_args
.
dataset
==
'pascalvoc'
:
boundaries
=
[
40000
,
60000
]
values
=
[
learning_rate
,
learning_rate
*
0.5
,
learning_rate
*
0.25
]
optimizer
=
fluid
.
optimizer
.
RMSProp
(
optimizer
=
fluid
.
optimizer
.
RMSProp
(
learning_rate
=
fluid
.
layers
.
piecewise_decay
(
boundaries
,
values
),
learning_rate
=
fluid
.
layers
.
piecewise_decay
(
boundaries
,
values
),
regularization
=
fluid
.
regularizer
.
L2Decay
(
0.00005
),
)
regularization
=
fluid
.
regularizer
.
L2Decay
(
0.00005
),
)
...
@@ -88,8 +115,11 @@ def train(args,
...
@@ -88,8 +115,11 @@ def train(args,
exe
=
fluid
.
Executor
(
place
)
exe
=
fluid
.
Executor
(
place
)
exe
.
run
(
fluid
.
default_startup_program
())
exe
.
run
(
fluid
.
default_startup_program
())
load_model
.
load_and_set_vars
(
place
)
if
pretrained_model
:
#load_model.load_paddlev1_vars(place)
def
if_exist
(
var
):
return
os
.
path
.
exists
(
os
.
path
.
join
(
pretrained_model
,
var
.
name
))
fluid
.
io
.
load_vars
(
exe
,
pretrained_model
,
predicate
=
if_exist
)
train_reader
=
paddle
.
batch
(
train_reader
=
paddle
.
batch
(
reader
.
train
(
data_args
,
train_file_list
),
batch_size
=
batch_size
)
reader
.
train
(
data_args
,
train_file_list
),
batch_size
=
batch_size
)
test_reader
=
paddle
.
batch
(
test_reader
=
paddle
.
batch
(
...
@@ -108,37 +138,167 @@ def train(args,
...
@@ -108,37 +138,167 @@ def train(args,
print
(
"Test {0}, map {1}"
.
format
(
pass_id
,
test_map
[
0
]))
print
(
"Test {0}, map {1}"
.
format
(
pass_id
,
test_map
[
0
]))
for
pass_id
in
range
(
num_passes
):
for
pass_id
in
range
(
num_passes
):
start_time
=
time
.
time
()
prev_start_time
=
start_time
end_time
=
0
for
batch_id
,
data
in
enumerate
(
train_reader
()):
for
batch_id
,
data
in
enumerate
(
train_reader
()):
prev_start_time
=
start_time
start_time
=
time
.
time
()
loss_v
=
exe
.
run
(
fluid
.
default_main_program
(),
loss_v
=
exe
.
run
(
fluid
.
default_main_program
(),
feed
=
feeder
.
feed
(
data
),
feed
=
feeder
.
feed
(
data
),
fetch_list
=
[
loss
])
fetch_list
=
[
loss
])
end_time
=
time
.
time
()
if
batch_id
%
20
==
0
:
if
batch_id
%
20
==
0
:
print
(
"Pass {0}, batch {1}, loss {2}
"
print
(
"Pass {0}, batch {1}, loss {2}
, time {3}"
.
format
(
.
format
(
pass_id
,
batch_id
,
loss_v
[
0
]
))
pass_id
,
batch_id
,
loss_v
[
0
],
start_time
-
prev_start_time
))
test
(
pass_id
)
test
(
pass_id
)
if
pass_id
%
10
==
0
:
if
pass_id
%
10
==
0
or
pass_id
==
num_passes
-
1
:
model_path
=
os
.
path
.
join
(
model_save_dir
,
str
(
pass_id
))
model_path
=
os
.
path
.
join
(
model_save_dir
,
str
(
pass_id
))
print
'save models to %s'
%
(
model_path
)
print
'save models to %s'
%
(
model_path
)
fluid
.
io
.
save_inference_model
(
model_path
,
[
'image'
],
[
nmsed_out
],
fluid
.
io
.
save_persistables
(
exe
,
model_path
)
exe
)
def
parallel_exe
(
args
,
train_file_list
,
val_file_list
,
data_args
,
learning_rate
,
batch_size
,
num_passes
,
model_save_dir
=
'model'
,
pretrained_model
=
None
):
image_shape
=
[
3
,
data_args
.
resize_h
,
data_args
.
resize_w
]
if
data_args
.
dataset
==
'coco'
:
num_classes
=
81
elif
data_args
.
dataset
==
'pascalvoc'
:
num_classes
=
21
image
=
fluid
.
layers
.
data
(
name
=
'image'
,
shape
=
image_shape
,
dtype
=
'float32'
)
gt_box
=
fluid
.
layers
.
data
(
name
=
'gt_box'
,
shape
=
[
4
],
dtype
=
'float32'
,
lod_level
=
1
)
gt_label
=
fluid
.
layers
.
data
(
name
=
'gt_label'
,
shape
=
[
1
],
dtype
=
'int32'
,
lod_level
=
1
)
difficult
=
fluid
.
layers
.
data
(
name
=
'gt_difficult'
,
shape
=
[
1
],
dtype
=
'int32'
,
lod_level
=
1
)
locs
,
confs
,
box
,
box_var
=
mobile_net
(
num_classes
,
image
,
image_shape
)
nmsed_out
=
fluid
.
layers
.
detection_output
(
locs
,
confs
,
box
,
box_var
,
nms_threshold
=
0.45
)
loss
=
fluid
.
layers
.
ssd_loss
(
locs
,
confs
,
gt_box
,
gt_label
,
box
,
box_var
)
loss
=
fluid
.
layers
.
reduce_sum
(
loss
)
test_program
=
fluid
.
default_main_program
().
clone
(
for_test
=
True
)
with
fluid
.
program_guard
(
test_program
):
map_eval
=
fluid
.
evaluator
.
DetectionMAP
(
nmsed_out
,
gt_label
,
gt_box
,
difficult
,
num_classes
,
overlap_threshold
=
0.5
,
evaluate_difficult
=
False
,
ap_version
=
'integral'
)
if
data_args
.
dataset
==
'coco'
:
# learning rate decay in 12, 19 pass, respectively
if
'2014'
in
train_file_list
:
boundaries
=
[
82783
/
batch_size
*
12
,
82783
/
batch_size
*
19
]
elif
'2017'
in
train_file_list
:
boundaries
=
[
118287
/
batch_size
*
12
,
118287
/
batch_size
*
19
]
elif
data_args
.
dataset
==
'pascalvoc'
:
boundaries
=
[
40000
,
60000
]
values
=
[
learning_rate
,
learning_rate
*
0.5
,
learning_rate
*
0.25
]
optimizer
=
fluid
.
optimizer
.
RMSProp
(
learning_rate
=
fluid
.
layers
.
piecewise_decay
(
boundaries
,
values
),
regularization
=
fluid
.
regularizer
.
L2Decay
(
0.00005
),
)
optimizer
.
minimize
(
loss
)
place
=
fluid
.
CUDAPlace
(
0
)
if
args
.
use_gpu
else
fluid
.
CPUPlace
()
exe
=
fluid
.
Executor
(
place
)
exe
.
run
(
fluid
.
default_startup_program
())
if
pretrained_model
:
def
if_exist
(
var
):
return
os
.
path
.
exists
(
os
.
path
.
join
(
pretrained_model
,
var
.
name
))
fluid
.
io
.
load_vars
(
exe
,
pretrained_model
,
predicate
=
if_exist
)
train_exe
=
fluid
.
ParallelExecutor
(
use_cuda
=
args
.
use_gpu
,
loss_name
=
loss
.
name
)
train_reader
=
paddle
.
batch
(
reader
.
train
(
data_args
,
train_file_list
),
batch_size
=
batch_size
)
test_reader
=
paddle
.
batch
(
reader
.
test
(
data_args
,
val_file_list
),
batch_size
=
batch_size
)
feeder
=
fluid
.
DataFeeder
(
place
=
place
,
feed_list
=
[
image
,
gt_box
,
gt_label
,
difficult
])
def
test
(
pass_id
):
_
,
accum_map
=
map_eval
.
get_map_var
()
map_eval
.
reset
(
exe
)
test_map
=
None
for
_
,
data
in
enumerate
(
test_reader
()):
test_map
=
exe
.
run
(
test_program
,
feed
=
feeder
.
feed
(
data
),
fetch_list
=
[
accum_map
])
print
(
"Test {0}, map {1}"
.
format
(
pass_id
,
test_map
[
0
]))
for
pass_id
in
range
(
num_passes
):
start_time
=
time
.
time
()
prev_start_time
=
start_time
end_time
=
0
test
(
pass_id
)
for
batch_id
,
data
in
enumerate
(
train_reader
()):
prev_start_time
=
start_time
start_time
=
time
.
time
()
loss_v
,
=
train_exe
.
run
(
fetch_list
=
[
loss
.
name
],
feed_dict
=
feeder
.
feed
(
data
))
end_time
=
time
.
time
()
loss_v
=
np
.
mean
(
np
.
array
(
loss_v
))
if
batch_id
%
20
==
0
:
print
(
"Pass {0}, batch {1}, loss {2}, time {3}"
.
format
(
pass_id
,
batch_id
,
loss_v
,
start_time
-
prev_start_time
))
if
pass_id
%
10
==
0
or
pass_id
==
num_passes
-
1
:
model_path
=
os
.
path
.
join
(
model_save_dir
,
str
(
pass_id
))
print
'save models to %s'
%
(
model_path
)
fluid
.
io
.
save_persistables
(
exe
,
model_path
)
if
__name__
==
'__main__'
:
if
__name__
==
'__main__'
:
args
=
parser
.
parse_args
()
args
=
parser
.
parse_args
()
print_arguments
(
args
)
print_arguments
(
args
)
data_dir
=
'data/pascalvoc'
train_file_list
=
'trainval.txt'
val_file_list
=
'test.txt'
label_file
=
'label_list'
model_save_dir
=
args
.
model_save_dir
if
args
.
dataset
==
'coco'
:
data_dir
=
'./data/COCO17'
train_file_list
=
'annotations/instances_train2017.json'
val_file_list
=
'annotations/instances_val2017.json'
label_file
=
'label_list'
data_args
=
reader
.
Settings
(
data_args
=
reader
.
Settings
(
data_dir
=
'./data'
,
dataset
=
args
.
dataset
,
label_file
=
'label_list'
,
toy
=
args
.
is_toy
,
apply_distort
=
True
,
data_dir
=
data_dir
,
apply_expand
=
True
,
label_file
=
label_file
,
resize_h
=
300
,
apply_distort
=
args
.
apply_distort
,
resize_w
=
300
,
apply_expand
=
args
.
apply_expand
,
mean_value
=
[
127.5
,
127.5
,
127.5
])
resize_h
=
args
.
resize_h
,
train
(
args
,
resize_w
=
args
.
resize_w
,
train_file_list
=
'./data/trainval.txt'
,
mean_value
=
[
args
.
mean_value_B
,
args
.
mean_value_G
,
args
.
mean_value_R
])
val_file_list
=
'./data/test.txt'
,
#method = parallel_do
data_args
=
data_args
,
method
=
parallel_exe
learning_rate
=
0.001
,
method
(
args
,
batch_size
=
args
.
batch_size
,
train_file_list
=
train_file_list
,
num_passes
=
300
)
val_file_list
=
val_file_list
,
data_args
=
data_args
,
learning_rate
=
args
.
learning_rate
,
batch_size
=
args
.
batch_size
,
num_passes
=
args
.
num_passes
,
model_save_dir
=
model_save_dir
,
pretrained_model
=
args
.
pretrained_model
)
fluid/policy_gradient/brain.py
浏览文件 @
f258a876
...
@@ -30,32 +30,28 @@ class PolicyGradient:
...
@@ -30,32 +30,28 @@ class PolicyGradient:
acts
=
fluid
.
layers
.
data
(
name
=
'acts'
,
shape
=
[
1
],
dtype
=
'int64'
)
acts
=
fluid
.
layers
.
data
(
name
=
'acts'
,
shape
=
[
1
],
dtype
=
'int64'
)
vt
=
fluid
.
layers
.
data
(
name
=
'vt'
,
shape
=
[
1
],
dtype
=
'float32'
)
vt
=
fluid
.
layers
.
data
(
name
=
'vt'
,
shape
=
[
1
],
dtype
=
'float32'
)
# fc1
# fc1
fc1
=
fluid
.
layers
.
fc
(
fc1
=
fluid
.
layers
.
fc
(
input
=
obs
,
size
=
10
,
act
=
"tanh"
)
# tanh activation
input
=
obs
,
size
=
10
,
act
=
"tanh"
# tanh activation
)
# fc2
# fc2
self
.
all_act_prob
=
fluid
.
layers
.
fc
(
input
=
fc1
,
all_act_prob
=
fluid
.
layers
.
fc
(
input
=
fc1
,
size
=
self
.
n_actions
,
size
=
self
.
n_actions
,
act
=
"softmax"
)
act
=
"softmax"
)
self
.
inferece_program
=
fluid
.
defaul_main_program
().
clone
()
# to maximize total reward (log_p * R) is to minimize -(log_p * R)
# to maximize total reward (log_p * R) is to minimize -(log_p * R)
neg_log_prob
=
fluid
.
layers
.
cross_entropy
(
neg_log_prob
=
fluid
.
layers
.
cross_entropy
(
input
=
self
.
all_act_prob
,
input
=
self
.
all_act_prob
,
label
=
acts
)
# this is negative log of chosen action
label
=
acts
)
# this is negative log of chosen action
neg_log_prob_weight
=
fluid
.
layers
.
elementwise_mul
(
x
=
neg_log_prob
,
y
=
vt
)
neg_log_prob_weight
=
fluid
.
layers
.
elementwise_mul
(
x
=
neg_log_prob
,
y
=
vt
)
loss
=
fluid
.
layers
.
reduce_mean
(
loss
=
fluid
.
layers
.
reduce_mean
(
x
=
neg_log_prob_weight
)
# reward guided loss
neg_log_prob_weight
)
# reward guided loss
sgd_optimizer
=
fluid
.
optimizer
.
SGD
(
self
.
lr
)
sgd_optimizer
=
fluid
.
optimizer
.
SGD
(
self
.
lr
)
sgd_optimizer
.
minimize
(
loss
)
sgd_optimizer
.
minimize
(
loss
)
self
.
exe
.
run
(
fluid
.
default_startup_program
())
self
.
exe
.
run
(
fluid
.
default_startup_program
())
def
choose_action
(
self
,
observation
):
def
choose_action
(
self
,
observation
):
prob_weights
=
self
.
exe
.
run
(
prob_weights
=
self
.
exe
.
run
(
self
.
inferece_program
,
fluid
.
default_main_program
().
prune
(
self
.
all_act_prob
),
feed
=
{
"obs"
:
observation
[
np
.
newaxis
,
:]},
feed
=
{
"obs"
:
observation
[
np
.
newaxis
,
:]},
fetch_list
=
[
self
.
all_act_prob
])
fetch_list
=
[
self
.
all_act_prob
])
prob_weights
=
np
.
array
(
prob_weights
[
0
])
prob_weights
=
np
.
array
(
prob_weights
[
0
])
action
=
np
.
random
.
choice
(
action
=
np
.
random
.
choice
(
range
(
prob_weights
.
shape
[
1
]),
range
(
prob_weights
.
shape
[
1
]),
...
...
编辑
预览
Markdown
is supported
0%
请重试
或
添加新附件
.
添加附件
取消
You are about to add
0
people
to the discussion. Proceed with caution.
先完成此消息的编辑!
取消
想要评论请
注册
或
登录