Skip to content
体验新版
项目
组织
正在加载...
登录
切换导航
打开侧边栏
PaddlePaddle
models
提交
1847c180
M
models
项目概览
PaddlePaddle
/
models
大约 1 年 前同步成功
通知
222
Star
6828
Fork
2962
代码
文件
提交
分支
Tags
贡献者
分支图
Diff
Issue
602
列表
看板
标记
里程碑
合并请求
255
Wiki
0
Wiki
分析
仓库
DevOps
项目成员
Pages
M
models
项目概览
项目概览
详情
发布
仓库
仓库
文件
提交
分支
标签
贡献者
分支图
比较
Issue
602
Issue
602
列表
看板
标记
里程碑
合并请求
255
合并请求
255
Pages
分析
分析
仓库分析
DevOps
Wiki
0
Wiki
成员
成员
收起侧边栏
关闭侧边栏
动态
分支图
创建新Issue
提交
Issue看板
提交
1847c180
编写于
4月 12, 2018
作者:
D
Dang Qingqing
浏览文件
操作
浏览文件
下载
差异文件
Merge branch 'develop' of
https://github.com/PaddlePaddle/models
into ssd_pl_exe
上级
637477f7
6fa30500
变更
14
隐藏空白更改
内联
并排
Showing
14 changed file
with
322 addition
and
145 deletion
+322
-145
.gitignore
.gitignore
+1
-0
fluid/image_classification/caffe2fluid/README.md
fluid/image_classification/caffe2fluid/README.md
+5
-5
fluid/image_classification/caffe2fluid/examples/imagenet/compare.py
...e_classification/caffe2fluid/examples/imagenet/compare.py
+85
-0
fluid/image_classification/caffe2fluid/examples/imagenet/diff.sh
...mage_classification/caffe2fluid/examples/imagenet/diff.sh
+64
-0
fluid/image_classification/caffe2fluid/examples/imagenet/infer.py
...age_classification/caffe2fluid/examples/imagenet/infer.py
+67
-14
fluid/image_classification/caffe2fluid/examples/imagenet/run.sh
...image_classification/caffe2fluid/examples/imagenet/run.sh
+7
-2
fluid/image_classification/caffe2fluid/kaffe/graph.py
fluid/image_classification/caffe2fluid/kaffe/graph.py
+4
-1
fluid/image_classification/caffe2fluid/kaffe/paddle/network.py
.../image_classification/caffe2fluid/kaffe/paddle/network.py
+12
-35
fluid/image_classification/caffe2fluid/kaffe/paddle/transformer.py
...ge_classification/caffe2fluid/kaffe/paddle/transformer.py
+8
-2
fluid/neural_machine_translation/transformer/config.py
fluid/neural_machine_translation/transformer/config.py
+8
-14
fluid/neural_machine_translation/transformer/infer.py
fluid/neural_machine_translation/transformer/infer.py
+19
-21
fluid/neural_machine_translation/transformer/model.py
fluid/neural_machine_translation/transformer/model.py
+5
-20
fluid/neural_machine_translation/transformer/train.py
fluid/neural_machine_translation/transformer/train.py
+31
-20
fluid/policy_gradient/brain.py
fluid/policy_gradient/brain.py
+6
-11
未找到文件。
.gitignore
浏览文件 @
1847c180
.DS_Store
*.pyc
.*~
fluid/image_classification/caffe2fluid/README.md
浏览文件 @
1847c180
...
...
@@ -18,19 +18,19 @@ This tool is used to convert a Caffe model to Fluid model
### Tested models
-
Lenet
on mnist dataset
-
Lenet
-
ResNets:(ResNet-50, ResNet-101, ResNet-152)
model addr:
`https://onedrive.live.com/?authkey=%21AAFW2-FVoxeVRck&id=4006CBB8476FF777%2117887&cid=4006CBB8476FF777`
_
[
model addr
](
https://onedrive.live.com/?authkey=%21AAFW2-FVoxeVRck&id=4006CBB8476FF777%2117887&cid=4006CBB8476FF777
)
-
GoogleNet:
model addr:
`https://gist.github.com/jimmie33/7ea9f8ac0da259866b854460f4526034`
_
[
model addr
](
https://gist.github.com/jimmie33/7ea9f8ac0da259866b854460f4526034
)
-
VGG:
model addr:
`https://gist.github.com/ksimonyan/211839e770f7b538e2d8`
_
[
model addr
](
https://gist.github.com/ksimonyan/211839e770f7b538e2d8
)
-
AlexNet:
model addr:
`https://github.com/BVLC/caffe/tree/master/models/bvlc_alexnet`
_
[
model addr
](
https://github.com/BVLC/caffe/tree/master/models/bvlc_alexnet
)
### Notes
Some of this code come from here: https://github.com/ethereon/caffe-tensorflow
fluid/image_classification/caffe2fluid/examples/imagenet/compare.py
0 → 100644
浏览文件 @
1847c180
#!/usr/bin/python
#
#a tool to compare tensors in two files or two directories
#
import
sys
import
os
def
walk_dir
(
rootdir
):
for
subdir
,
dirs
,
files
in
os
.
walk
(
rootdir
):
for
file
in
files
:
yield
file
def
calc_diff
(
f1
,
f2
):
import
numpy
as
np
d1
=
np
.
load
(
f1
).
flatten
()
d2
=
np
.
load
(
f2
).
flatten
()
d1_num
=
reduce
(
lambda
x
,
y
:
x
*
y
,
d1
.
shape
)
d2_num
=
reduce
(
lambda
x
,
y
:
x
*
y
,
d2
.
shape
)
if
d1_num
!=
d2_num
:
print
d1
.
shape
print
d2
.
shape
assert
(
d1_num
==
d2_num
),
"their shape is not consistent"
try
:
df
=
np
.
abs
(
d1
-
d2
)
max_df
=
np
.
max
(
df
)
sq_df
=
np
.
mean
(
df
*
df
)
return
max_df
,
sq_df
except
Exception
as
e
:
return
-
1.0
,
-
1.0
def
compare
(
path1
,
path2
):
def
diff
(
f1
,
f2
):
max_df
,
sq_df
=
calc_diff
(
f1
,
f2
)
print
(
'compare %s <=> %s with result[max_df:%.4e, sq_df:%.4e]'
%
(
f1
,
f2
,
max_df
,
sq_df
))
assert
(
max_df
<
1e-5
),
\
'max_df is too large with value[%.6e]'
%
(
max_df
)
assert
(
sq_df
<
1e-10
),
\
'sq_df is too large with value[%.6e]'
%
(
sq_df
)
if
os
.
path
.
exists
(
path1
)
is
False
:
print
(
'not found %s'
%
(
path1
))
return
1
elif
os
.
path
.
exists
(
path2
)
is
False
:
print
(
'not found %s'
%
(
path2
))
return
1
if
path1
.
find
(
'.npy'
)
>
0
and
path2
.
find
(
'.npy'
)
>
0
:
diff
(
path1
,
path2
)
return
for
f
in
walk_dir
(
path2
):
if
f
.
find
(
'.npy'
)
<
0
:
continue
f1
=
os
.
path
.
join
(
path1
,
f
)
f2
=
os
.
path
.
join
(
path2
,
f
)
diff
(
f1
,
f2
)
print
(
'all checking succeed to pass'
)
return
0
if
__name__
==
"__main__"
:
if
len
(
sys
.
argv
)
==
1
:
path1
=
'lenet.tf/results'
path2
=
'lenet.paddle/results'
elif
len
(
sys
.
argv
)
==
3
:
path1
=
sys
.
argv
[
1
]
path2
=
sys
.
argv
[
2
]
else
:
print
(
'usage:'
)
print
(
' %s [path1] [path2]'
%
(
sys
.
argv
[
0
]))
exit
(
1
)
print
(
'compare inner result in %s %s'
%
(
path1
,
path2
))
exit
(
compare
(
path1
,
path2
))
fluid/image_classification/caffe2fluid/examples/imagenet/diff.sh
0 → 100644
浏览文件 @
1847c180
#!/bin/bash
#
#function:
# a tool used to check the difference of models' results generated by caffe model and paddle model
#
#howto:
# bash diff.sh resnet50 #when this has been finished, you can get the difference in precision
#
#notes:
# 0, in order to infer using caffe, we need pycaffe installed
# 1, prepare your caffe model in 'models.caffe/', eg: 'model.caffe/resnet101/resnet101.[prototxt|caffemodel]'
# 2, converted paddle model will be in 'models'
# 3, results of layers will be stored in 'results/${model_name}.[paddle|caffe]'
# 4, only the last layer will be checked by default
model_name
=
"resnet50"
results_root
=
"results/"
if
[[
-n
$1
]]
;
then
if
[
$1
=
"-h"
]
;
then
echo
"usage:"
echo
" bash
$0
[model_name]"
echo
" eg:bash
$0
resnet50"
exit
0
fi
model_name
=
$1
fi
mkdir
-p
$results_root
model_prototxt
=
"models.caffe/
$model_name
/
${
model_name
}
.prototxt"
model_caffemodel
=
"models.caffe/
${
model_name
}
/
${
model_name
}
.caffemodel"
#1, dump layers' results from paddle
paddle_results
=
"
$results_root
/
${
model_name
}
.paddle"
rm
-rf
$paddle_results
rm
-rf
"results.paddle"
bash run.sh
$model_name
./models.caffe/
$model_name
./models/
$model_name
if
[[
$?
-ne
0
]]
||
[[
!
-e
"results.paddle"
]]
;
then
echo
"not found paddle's results, maybe failed to convert"
exit
1
fi
mv
results.paddle
$paddle_results
#2, dump layers' results from caffe
caffe_results
=
"
$results_root
/
${
model_name
}
.caffe"
rm
-rf
$caffe_results
rm
-rf
"results.caffe"
cfpython ./infer.py caffe
$model_prototxt
$model_caffemodel
$paddle_results
/data.npy
if
[[
$?
-ne
0
]]
||
[[
!
-e
"results.caffe"
]]
;
then
echo
"not found caffe's results, maybe failed to do inference with caffe"
exit
1
fi
mv
results.caffe
$caffe_results
#3, extract layer names
cat
$model_prototxt
|
grep
name | perl
-ne
'if(/^\s*name:\s+\"([^\"]+)/){ print $1."\n";}'
>
.layer_names
#4, compare one by one
for
i
in
$(
cat
".layer_names"
|
tail
-n1
)
;
do
echo
"process
$i
"
python compare.py
$caffe_results
/
${
i
}
.npy
$paddle_results
/
${
i
}
.npy
done
fluid/image_classification/caffe2fluid/examples/imagenet/infer.py
浏览文件 @
1847c180
...
...
@@ -10,8 +10,11 @@ import os
import
sys
import
inspect
import
numpy
as
np
import
paddle.v2
as
paddle
import
paddle.v2.fluid
as
fluid
def
import_fluid
():
import
paddle.fluid
as
fluid
return
fluid
def
load_data
(
imgfile
,
shape
):
...
...
@@ -52,8 +55,10 @@ def build_model(net_file, net_name):
print
(
e
)
return
None
input_name
=
'data'
input_shape
=
MyNet
.
input_shapes
()[
input_name
]
fluid
=
import_fluid
()
inputs_dict
=
MyNet
.
input_shapes
()
input_name
=
inputs_dict
.
keys
()[
0
]
input_shape
=
inputs_dict
[
input_name
]
images
=
fluid
.
layers
.
data
(
name
=
'image'
,
shape
=
input_shape
,
dtype
=
'float32'
)
#label = fluid.layers.data(name='label', shape=[1], dtype='int64')
...
...
@@ -64,7 +69,7 @@ def build_model(net_file, net_name):
def
dump_results
(
results
,
names
,
root
):
if
os
.
path
.
exists
(
root
)
is
False
:
os
.
path
.
mkdir
(
root
)
os
.
mkdir
(
root
)
for
i
in
range
(
len
(
names
)):
n
=
names
[
i
]
...
...
@@ -73,9 +78,12 @@ def dump_results(results, names, root):
np
.
save
(
filename
+
'.npy'
,
res
)
def
infer
(
net_file
,
net_name
,
model_file
,
imgfile
,
debug
=
Fals
e
):
def
infer
(
net_file
,
net_name
,
model_file
,
imgfile
,
debug
=
Tru
e
):
""" do inference using a model which consist 'xxx.py' and 'xxx.npy'
"""
fluid
=
import_fluid
()
#1, build model
net
,
input_shape
=
build_model
(
net_file
,
net_name
)
prediction
=
net
.
get_output
()
...
...
@@ -109,34 +117,79 @@ def infer(net_file, net_name, model_file, imgfile, debug=False):
fetch_list
=
fetch_list_var
)
if
debug
is
True
:
dump_path
=
'results.
layers
'
dump_path
=
'results.
paddle
'
dump_results
(
results
,
fetch_list_name
,
dump_path
)
print
(
'all results dumped to [%s]'
%
(
dump_path
))
print
(
'all result
of layer
s dumped to [%s]'
%
(
dump_path
))
else
:
result
=
results
[
0
]
print
(
'predicted class:'
,
np
.
argmax
(
result
))
return
0
def
caffe_infer
(
prototxt
,
caffemodel
,
datafile
):
""" do inference using pycaffe for debug,
all intermediate results will be dumpped to 'results.caffe'
"""
import
caffe
net
=
caffe
.
Net
(
prototxt
,
caffemodel
,
caffe
.
TEST
)
input_layer
=
net
.
blobs
.
keys
()[
0
]
print
(
'got name of input layer is:%s'
%
(
input_layer
))
input_shape
=
list
(
net
.
blobs
[
input_layer
].
data
.
shape
[
1
:])
if
'.npy'
in
datafile
:
np_images
=
np
.
load
(
datafile
)
else
:
np_images
=
load_data
(
datafile
,
input_shape
)
inputs
=
{
input_layer
:
np_images
}
net
.
forward_all
(
**
inputs
)
results
=
[]
names
=
[]
for
k
,
v
in
net
.
blobs
.
items
():
k
=
k
.
rstrip
(
'_output'
)
k
=
k
.
replace
(
'/'
,
'_'
)
names
.
append
(
k
)
results
.
append
(
v
.
data
.
copy
())
dump_path
=
'results.caffe'
dump_results
(
results
,
names
,
dump_path
)
print
(
'all result of layers dumped to [%s]'
%
(
dump_path
))
return
0
if
__name__
==
"__main__"
:
""" maybe more convenient to use 'run.sh' to call this tool
"""
net_file
=
'models/resnet50/resnet50.py'
weight_file
=
'models/resnet50/resnet50.npy'
img
file
=
'data/65.jpeg'
data
file
=
'data/65.jpeg'
net_name
=
'ResNet50'
argc
=
len
(
sys
.
argv
)
if
argc
==
5
:
if
sys
.
argv
[
1
]
==
'caffe'
:
if
len
(
sys
.
argv
)
!=
5
:
print
(
'usage:'
)
print
(
'
\t
python %s caffe [prototxt] [caffemodel] [datafile]'
%
(
sys
.
argv
[
0
]))
sys
.
exit
(
1
)
prototxt
=
sys
.
argv
[
2
]
caffemodel
=
sys
.
argv
[
3
]
datafile
=
sys
.
argv
[
4
]
sys
.
exit
(
caffe_infer
(
prototxt
,
caffemodel
,
datafile
))
elif
argc
==
5
:
net_file
=
sys
.
argv
[
1
]
weight_file
=
sys
.
argv
[
2
]
img
file
=
sys
.
argv
[
3
]
data
file
=
sys
.
argv
[
3
]
net_name
=
sys
.
argv
[
4
]
elif
argc
>
1
:
print
(
'usage:'
)
print
(
'
\t
python %s [net_file] [weight_file] [
img
file] [net_name]'
%
print
(
'
\t
python %s [net_file] [weight_file] [
data
file] [net_name]'
%
(
sys
.
argv
[
0
]))
print
(
'
\t
eg:python %s %s %s %s %s'
%
(
sys
.
argv
[
0
],
net_file
,
weight_file
,
img
file
,
net_name
))
weight_file
,
data
file
,
net_name
))
sys
.
exit
(
1
)
infer
(
net_file
,
net_name
,
weight_file
,
img
file
)
infer
(
net_file
,
net_name
,
weight_file
,
data
file
)
fluid/image_classification/caffe2fluid/examples/imagenet/run.sh
浏览文件 @
1847c180
...
...
@@ -3,7 +3,7 @@
#function:
# a tool used to:
# 1, convert a caffe model
# 2, do inference using this model
# 2, do inference
(only in fluid)
using this model
#
#usage:
# bash run.sh resnet50 ./models.caffe/resnet50 ./models/resnet50
...
...
@@ -65,7 +65,12 @@ if [[ -z $only_convert ]];then
PYTHON
=
`
which python
`
fi
imgfile
=
"data/65.jpeg"
net_name
=
`
grep
"name"
$proto_file
|
head
-n1
| perl
-ne
'if(/\"([^\"]+)\"/){ print $1."\n";}'
`
#FIX ME:
# only look the first line in prototxt file for the name of this network, maybe not correct
net_name
=
`
grep
"name"
$proto_file
|
head
-n1
| perl
-ne
'if(/^\s*name\s*:\s*\"([^\"]+)\"/){ print $1."\n";}'
`
if
[[
-z
$net_name
]]
;
then
net_name
=
"MyNet"
fi
$PYTHON
./infer.py
$net_file
$weight_file
$imgfile
$net_name
ret
=
$?
fi
...
...
fluid/image_classification/caffe2fluid/kaffe/graph.py
浏览文件 @
1847c180
...
...
@@ -52,7 +52,10 @@ class Graph(object):
def
__init__
(
self
,
nodes
=
None
,
name
=
None
):
self
.
nodes
=
nodes
or
[]
self
.
node_lut
=
{
node
.
name
:
node
for
node
in
self
.
nodes
}
self
.
name
=
name
if
name
is
None
or
name
==
''
:
self
.
name
=
'MyNet'
else
:
self
.
name
=
name
def
add_node
(
self
,
node
):
self
.
nodes
.
append
(
node
)
...
...
fluid/image_classification/caffe2fluid/kaffe/paddle/network.py
浏览文件 @
1847c180
...
...
@@ -4,7 +4,7 @@ import numpy as np
def
import_fluid
():
import
paddle.
v2.
fluid
as
fluid
import
paddle.fluid
as
fluid
return
fluid
...
...
@@ -64,7 +64,7 @@ class Network(object):
if
os
.
path
.
isdir
(
data_path
):
assert
(
exe
is
not
None
),
\
'must provide a executor to load fluid model'
fluid
.
io
.
load_persistables
_if_exist
(
executor
=
exe
,
dirname
=
data_path
)
fluid
.
io
.
load_persistables
(
executor
=
exe
,
dirname
=
data_path
)
return
True
#load model from a npy file
...
...
@@ -161,56 +161,28 @@ class Network(object):
output
=
fluid
.
layers
.
relu
(
x
=
input
)
return
output
def
_adjust_pad_if_needed
(
self
,
i_hw
,
k_hw
,
s_hw
,
p_hw
):
#adjust the padding if needed
i_h
,
i_w
=
i_hw
k_h
,
k_w
=
k_hw
s_h
,
s_w
=
s_hw
p_h
,
p_w
=
p_hw
def
is_consistent
(
i
,
k
,
s
,
p
):
o
=
i
+
2
*
p
-
k
if
o
%
s
==
0
:
return
True
else
:
return
False
real_p_h
=
0
real_p_w
=
0
if
is_consistent
(
i_h
,
k_h
,
s_h
,
p_h
)
is
False
:
real_p_h
=
int
(
k_h
/
2
)
if
is_consistent
(
i_w
,
k_w
,
s_w
,
p_w
)
is
False
:
real_p_w
=
int
(
k_w
/
2
)
return
[
real_p_h
,
real_p_w
]
def
pool
(
self
,
pool_type
,
input
,
k_h
,
k_w
,
s_h
,
s_w
,
name
,
padding
):
# Get the number of channels in the input
in_hw
=
input
.
shape
[
2
:]
k_hw
=
[
k_h
,
k_w
]
s_hw
=
[
s_h
,
s_w
]
if
padding
is
None
:
#fix bug about the difference between conv and pool
#more info: https://github.com/BVLC/caffe/issues/1318
padding
=
self
.
_adjust_pad_if_needed
(
in_hw
,
k_hw
,
s_hw
,
[
0
,
0
])
fluid
=
import_fluid
()
output
=
fluid
.
layers
.
pool2d
(
input
=
input
,
pool_size
=
k_hw
,
pool_stride
=
s_hw
,
pool_padding
=
padding
,
ceil_mode
=
True
,
pool_type
=
pool_type
)
return
output
@
layer
def
max_pool
(
self
,
input
,
k_h
,
k_w
,
s_h
,
s_w
,
name
,
padding
=
None
):
def
max_pool
(
self
,
input
,
k_h
,
k_w
,
s_h
,
s_w
,
name
,
padding
=
[
0
,
0
]
):
return
self
.
pool
(
'max'
,
input
,
k_h
,
k_w
,
s_h
,
s_w
,
name
,
padding
)
@
layer
def
avg_pool
(
self
,
input
,
k_h
,
k_w
,
s_h
,
s_w
,
name
,
padding
=
None
):
def
avg_pool
(
self
,
input
,
k_h
,
k_w
,
s_h
,
s_w
,
name
,
padding
=
[
0
,
0
]
):
return
self
.
pool
(
'avg'
,
input
,
k_h
,
k_w
,
s_h
,
s_w
,
name
,
padding
)
@
layer
...
...
@@ -258,7 +230,12 @@ class Network(object):
return
output
@
layer
def
batch_normalization
(
self
,
input
,
name
,
scale_offset
=
True
,
relu
=
False
):
def
batch_normalization
(
self
,
input
,
name
,
scale_offset
=
True
,
eps
=
1e-5
,
relu
=
False
):
# NOTE: Currently, only inference is supported
fluid
=
import_fluid
()
prefix
=
name
+
'_'
...
...
@@ -276,7 +253,7 @@ class Network(object):
bias_attr
=
bias_attr
,
moving_mean_name
=
mean_name
,
moving_variance_name
=
variance_name
,
epsilon
=
1e-5
,
epsilon
=
eps
,
act
=
'relu'
if
relu
is
True
else
None
)
return
output
...
...
fluid/image_classification/caffe2fluid/kaffe/paddle/transformer.py
浏览文件 @
1847c180
...
...
@@ -142,7 +142,13 @@ class TensorFlowMapper(NodeMapper):
def
map_batch_norm
(
self
,
node
):
scale_offset
=
len
(
node
.
data
)
==
4
kwargs
=
{}
if
scale_offset
else
{
'scale_offset'
:
False
}
#this default value comes from caffe's param in batch_norm
default_eps
=
1e-5
kwargs
=
{
'scale_offset'
:
scale_offset
}
if
node
.
parameters
.
eps
!=
default_eps
:
kwargs
[
'eps'
]
=
node
.
parameters
.
eps
return
MaybeActivated
(
node
,
default
=
False
)(
'batch_normalization'
,
**
kwargs
)
...
...
@@ -236,7 +242,7 @@ class TensorFlowEmitter(object):
func_def
=
self
.
statement
(
'@classmethod'
)
func_def
+=
self
.
statement
(
'def convert(cls, npy_model, fluid_path):'
)
self
.
indent
()
func_def
+=
self
.
statement
(
'
import paddle.v2.fluid as fluid
'
)
func_def
+=
self
.
statement
(
'
fluid = import_fluid()
'
)
for
l
in
codes
:
func_def
+=
self
.
statement
(
l
)
return
'
\n
'
+
func_def
...
...
fluid/neural_machine_translation/transformer/config.py
浏览文件 @
1847c180
...
...
@@ -43,21 +43,16 @@ class InferTaskConfig(object):
class
ModelHyperParams
(
object
):
# Dictionary size for source and target language. This model directly uses
# paddle.dataset.wmt16 in which <bos>, <eos> and <unk> token has
# alreay been added, but the <pad> token is not added. Transformer requires
# sequences in a mini-batch are padded to have the same length. A <pad> token is
# added into the original dictionary in paddle.dateset.wmt16.
# This model directly uses paddle.dataset.wmt16 in which <bos>, <eos> and
# <unk> token has alreay been added. As for the <pad> token, any token
# included in dict can be used to pad, since the paddings' loss will be
# masked out and make no effect on parameter gradients.
# size of source word dictionary.
src_vocab_size
=
10000
# index for <pad> token in source language.
src_pad_idx
=
src_vocab_size
# size of target word dictionay
trg_vocab_size
=
10000
# index for <pad> token in target language.
trg_pad_idx
=
trg_vocab_size
# index for <bos> token
bos_idx
=
0
...
...
@@ -66,11 +61,10 @@ class ModelHyperParams(object):
# index for <unk> token
unk_idx
=
2
# position value corresponding to the <pad> token.
pos_pad_idx
=
0
# max length of sequences. It should plus 1 to include position
# padding token for position encoding.
# max length of sequences.
# The size of position encoding table should at least plus 1, since the
# sinusoid position encoding starts from 1 and 0 can be used as the padding
# token for position encoding.
max_length
=
50
# the dimension for word embeddings, which is also the last dimension of
...
...
fluid/neural_machine_translation/transformer/infer.py
浏览文件 @
1847c180
...
...
@@ -41,7 +41,7 @@ def translate_batch(exe,
src_pad_idx
,
n_head
,
is_target
=
False
,
return_pos
=
Tru
e
,
is_label
=
Fals
e
,
return_attn_bias
=
True
,
return_max_len
=
False
)
# Append the data shape input to reshape the output of embedding layer.
...
...
@@ -250,22 +250,20 @@ def main():
encoder_program
=
fluid
.
Program
()
with
fluid
.
program_guard
(
main_program
=
encoder_program
):
enc_output
=
encoder
(
ModelHyperParams
.
src_vocab_size
+
1
,
ModelHyperParams
.
max_length
+
1
,
ModelHyperParams
.
n_layer
,
ModelHyperParams
.
n_head
,
ModelHyperParams
.
d_key
,
ModelHyperParams
.
d_value
,
ModelHyperParams
.
d_model
,
ModelHyperParams
.
d_inner_hid
,
ModelHyperParams
.
dropout
,
ModelHyperParams
.
src_pad_idx
,
ModelHyperParams
.
pos_pad_idx
)
ModelHyperParams
.
src_vocab_size
,
ModelHyperParams
.
max_length
+
1
,
ModelHyperParams
.
n_layer
,
ModelHyperParams
.
n_head
,
ModelHyperParams
.
d_key
,
ModelHyperParams
.
d_value
,
ModelHyperParams
.
d_model
,
ModelHyperParams
.
d_inner_hid
,
ModelHyperParams
.
dropout
)
decoder_program
=
fluid
.
Program
()
with
fluid
.
program_guard
(
main_program
=
decoder_program
):
predict
=
decoder
(
ModelHyperParams
.
trg_vocab_size
+
1
,
ModelHyperParams
.
max_length
+
1
,
ModelHyperParams
.
n_layer
,
ModelHyperParams
.
n_head
,
ModelHyperParams
.
d_key
,
ModelHyperParams
.
d_value
,
ModelHyperParams
.
d_model
,
ModelHyperParams
.
d_inner_hid
,
ModelHyperParams
.
dropout
,
ModelHyperParams
.
trg_pad_idx
,
ModelHyperParams
.
pos_pad_idx
)
ModelHyperParams
.
trg_vocab_size
,
ModelHyperParams
.
max_length
+
1
,
ModelHyperParams
.
n_layer
,
ModelHyperParams
.
n_head
,
ModelHyperParams
.
d_key
,
ModelHyperParams
.
d_value
,
ModelHyperParams
.
d_model
,
ModelHyperParams
.
d_inner_hid
,
ModelHyperParams
.
dropout
)
# Load model parameters of encoder and decoder separately from the saved
# transformer model.
...
...
@@ -301,9 +299,6 @@ def main():
trg_idx2word
=
paddle
.
dataset
.
wmt16
.
get_dict
(
"de"
,
dict_size
=
ModelHyperParams
.
trg_vocab_size
,
reverse
=
True
)
# Append the <pad> token since the dict provided by dataset.wmt16 does
# not include it.
trg_idx2word
[
ModelHyperParams
.
trg_pad_idx
]
=
"<pad>"
def
post_process_seq
(
seq
,
bos_idx
=
ModelHyperParams
.
bos_idx
,
...
...
@@ -327,19 +322,22 @@ def main():
for
batch_id
,
data
in
enumerate
(
test_data
()):
batch_seqs
,
batch_scores
=
translate_batch
(
exe
,
[
item
[
0
]
for
item
in
data
],
exe
,
[
item
[
0
]
for
item
in
data
],
encoder_program
,
encoder_input_data_names
,
[
enc_output
.
name
],
encoder_input_data_names
,
[
enc_output
.
name
],
decoder_program
,
decoder_input_data_names
,
[
predict
.
name
],
decoder_input_data_names
,
[
predict
.
name
],
InferTaskConfig
.
beam_size
,
InferTaskConfig
.
max_length
,
InferTaskConfig
.
n_best
,
len
(
data
),
ModelHyperParams
.
n_head
,
ModelHyperParams
.
d_model
,
ModelHyperParams
.
src_pad_idx
,
ModelHyperParams
.
trg_pad_idx
,
ModelHyperParams
.
eos_idx
,
# Use eos_idx to pad.
ModelHyperParams
.
eos_idx
,
# Use eos_idx to pad.
ModelHyperParams
.
bos_idx
,
ModelHyperParams
.
eos_idx
,
ModelHyperParams
.
unk_idx
,
...
...
fluid/neural_machine_translation/transformer/model.py
浏览文件 @
1847c180
...
...
@@ -199,10 +199,8 @@ def prepare_encoder(src_word,
src_pos
,
src_vocab_size
,
src_emb_dim
,
src_pad_idx
,
src_max_len
,
dropout_rate
=
0.
,
pos_pad_idx
=
0
,
src_data_shape
=
None
,
pos_enc_param_name
=
None
):
"""Add word embeddings and position encodings.
...
...
@@ -214,12 +212,10 @@ def prepare_encoder(src_word,
src_word_emb
=
layers
.
embedding
(
src_word
,
size
=
[
src_vocab_size
,
src_emb_dim
],
padding_idx
=
src_pad_idx
,
param_attr
=
fluid
.
initializer
.
Normal
(
0.
,
1.
))
src_pos_enc
=
layers
.
embedding
(
src_pos
,
size
=
[
src_max_len
,
src_emb_dim
],
padding_idx
=
pos_pad_idx
,
param_attr
=
fluid
.
ParamAttr
(
name
=
pos_enc_param_name
,
trainable
=
False
))
enc_input
=
src_word_emb
+
src_pos_enc
...
...
@@ -480,12 +476,16 @@ def make_inputs(input_data_names,
append_batch_size
=
False
)
input_layers
+=
[
slf_attn_post_softmax_shape
]
if
src_attn_shape_flag
:
# This shape input is used to reshape before softmax in encoder-decoder
# attention.
src_attn_pre_softmax_shape
=
layers
.
data
(
name
=
input_data_names
[
len
(
input_layers
)],
shape
=
[
2
],
dtype
=
"int32"
,
append_batch_size
=
False
)
input_layers
+=
[
src_attn_pre_softmax_shape
]
# This shape input is used to reshape after softmax in encoder-decoder
# attention.
src_attn_post_softmax_shape
=
layers
.
data
(
name
=
input_data_names
[
len
(
input_layers
)],
shape
=
[
4
],
...
...
@@ -516,10 +516,7 @@ def transformer(
d_value
,
d_model
,
d_inner_hid
,
dropout_rate
,
src_pad_idx
,
trg_pad_idx
,
pos_pad_idx
,
):
dropout_rate
,
):
enc_inputs
=
make_inputs
(
encoder_input_data_names
,
n_head
,
...
...
@@ -543,8 +540,6 @@ def transformer(
d_model
,
d_inner_hid
,
dropout_rate
,
src_pad_idx
,
pos_pad_idx
,
enc_inputs
,
)
dec_inputs
=
make_inputs
(
...
...
@@ -570,8 +565,6 @@ def transformer(
d_model
,
d_inner_hid
,
dropout_rate
,
trg_pad_idx
,
pos_pad_idx
,
dec_inputs
,
enc_output
,
)
...
...
@@ -606,8 +599,6 @@ def wrap_encoder(src_vocab_size,
d_model
,
d_inner_hid
,
dropout_rate
,
src_pad_idx
,
pos_pad_idx
,
enc_inputs
=
None
):
"""
The wrapper assembles together all needed layers for the encoder.
...
...
@@ -637,10 +628,8 @@ def wrap_encoder(src_vocab_size,
src_pos
,
src_vocab_size
,
d_model
,
src_pad_idx
,
max_length
,
dropout_rate
,
pos_pad_idx
,
src_data_shape
,
)
enc_output
=
encoder
(
enc_input
,
...
...
@@ -666,8 +655,6 @@ def wrap_decoder(trg_vocab_size,
d_model
,
d_inner_hid
,
dropout_rate
,
trg_pad_idx
,
pos_pad_idx
,
dec_inputs
=
None
,
enc_output
=
None
):
"""
...
...
@@ -701,10 +688,8 @@ def wrap_decoder(trg_vocab_size,
trg_pos
,
trg_vocab_size
,
d_model
,
trg_pad_idx
,
max_length
,
dropout_rate
,
pos_pad_idx
,
trg_data_shape
,
)
dec_output
=
decoder
(
dec_input
,
...
...
fluid/neural_machine_translation/transformer/train.py
浏览文件 @
1847c180
...
...
@@ -15,7 +15,7 @@ def pad_batch_data(insts,
pad_idx
,
n_head
,
is_target
=
False
,
return_pos
=
Tru
e
,
is_label
=
Fals
e
,
return_attn_bias
=
True
,
return_max_len
=
True
):
"""
...
...
@@ -24,14 +24,20 @@ def pad_batch_data(insts,
"""
return_list
=
[]
max_len
=
max
(
len
(
inst
)
for
inst
in
insts
)
# Any token included in dict can be used to pad, since the paddings' loss
# will be masked out by weights and make no effect on parameter gradients.
inst_data
=
np
.
array
(
[
inst
+
[
pad_idx
]
*
(
max_len
-
len
(
inst
))
for
inst
in
insts
])
return_list
+=
[
inst_data
.
astype
(
"int64"
).
reshape
([
-
1
,
1
])]
if
return_pos
:
inst_pos
=
np
.
array
([[
pos_i
+
1
if
w_i
!=
pad_idx
else
0
for
pos_i
,
w_i
in
enumerate
(
inst
)
]
for
inst
in
inst_data
])
if
is_label
:
# label weight
inst_weight
=
np
.
array
(
[[
1.
]
*
len
(
inst
)
+
[
0.
]
*
(
max_len
-
len
(
inst
))
for
inst
in
insts
])
return_list
+=
[
inst_weight
.
astype
(
"float32"
).
reshape
([
-
1
,
1
])]
else
:
# position data
inst_pos
=
np
.
array
([
range
(
1
,
len
(
inst
)
+
1
)
+
[
0
]
*
(
max_len
-
len
(
inst
))
for
inst
in
insts
])
return_list
+=
[
inst_pos
.
astype
(
"int64"
).
reshape
([
-
1
,
1
])]
if
return_attn_bias
:
if
is_target
:
...
...
@@ -84,9 +90,14 @@ def prepare_batch_input(insts, input_data_names, src_pad_idx, trg_pad_idx,
trg_src_attn_post_softmax_shape
=
np
.
array
(
trg_src_attn_bias
.
shape
,
dtype
=
"int32"
)
lbl_word
=
pad_batch_data
([
inst
[
2
]
for
inst
in
insts
],
trg_pad_idx
,
n_head
,
False
,
False
,
False
,
False
)
lbl_weight
=
(
lbl_word
!=
trg_pad_idx
).
astype
(
"float32"
).
reshape
([
-
1
,
1
])
lbl_word
,
lbl_weight
=
pad_batch_data
(
[
inst
[
2
]
for
inst
in
insts
],
trg_pad_idx
,
n_head
,
is_target
=
False
,
is_label
=
True
,
return_attn_bias
=
False
,
return_max_len
=
False
)
input_dict
=
dict
(
zip
(
input_data_names
,
[
...
...
@@ -105,13 +116,11 @@ def main():
exe
=
fluid
.
Executor
(
place
)
sum_cost
,
avg_cost
,
predict
,
token_num
=
transformer
(
ModelHyperParams
.
src_vocab_size
+
1
,
ModelHyperParams
.
trg_vocab_size
+
1
,
ModelHyperParams
.
max_length
+
1
,
ModelHyperParams
.
n_layer
,
ModelHyperParams
.
n_head
,
ModelHyperParams
.
d_key
,
ModelHyperParams
.
d_value
,
ModelHyperParams
.
d_model
,
ModelHyperParams
.
d_inner_hid
,
ModelHyperParams
.
dropout
,
ModelHyperParams
.
src_pad_idx
,
ModelHyperParams
.
trg_pad_idx
,
ModelHyperParams
.
pos_pad_idx
)
ModelHyperParams
.
src_vocab_size
,
ModelHyperParams
.
trg_vocab_size
,
ModelHyperParams
.
max_length
+
1
,
ModelHyperParams
.
n_layer
,
ModelHyperParams
.
n_head
,
ModelHyperParams
.
d_key
,
ModelHyperParams
.
d_value
,
ModelHyperParams
.
d_model
,
ModelHyperParams
.
d_inner_hid
,
ModelHyperParams
.
dropout
)
lr_scheduler
=
LearningRateScheduler
(
ModelHyperParams
.
d_model
,
TrainTaskConfig
.
warmup_steps
,
place
,
...
...
@@ -145,8 +154,8 @@ def main():
for
batch_id
,
data
in
enumerate
(
val_data
()):
data_input
=
prepare_batch_input
(
data
,
encoder_input_data_names
+
decoder_input_data_names
[:
-
1
]
+
label_data_names
,
ModelHyperParams
.
src_pad
_idx
,
ModelHyperParams
.
trg_pad
_idx
,
ModelHyperParams
.
n_head
,
label_data_names
,
ModelHyperParams
.
eos
_idx
,
ModelHyperParams
.
eos
_idx
,
ModelHyperParams
.
n_head
,
ModelHyperParams
.
d_model
)
test_sum_cost
,
test_token_num
=
exe
.
run
(
test_program
,
...
...
@@ -171,10 +180,12 @@ def main():
for
pass_id
in
xrange
(
TrainTaskConfig
.
pass_num
):
pass_start_time
=
time
.
time
()
for
batch_id
,
data
in
enumerate
(
train_data
()):
if
len
(
data
)
!=
TrainTaskConfig
.
batch_size
:
continue
data_input
=
prepare_batch_input
(
data
,
encoder_input_data_names
+
decoder_input_data_names
[:
-
1
]
+
label_data_names
,
ModelHyperParams
.
src_pad
_idx
,
ModelHyperParams
.
trg_pad
_idx
,
ModelHyperParams
.
n_head
,
label_data_names
,
ModelHyperParams
.
eos
_idx
,
ModelHyperParams
.
eos
_idx
,
ModelHyperParams
.
n_head
,
ModelHyperParams
.
d_model
)
lr_scheduler
.
update_learning_rate
(
data_input
)
outs
=
exe
.
run
(
fluid
.
framework
.
default_main_program
(),
...
...
fluid/policy_gradient/brain.py
浏览文件 @
1847c180
...
...
@@ -30,15 +30,11 @@ class PolicyGradient:
acts
=
fluid
.
layers
.
data
(
name
=
'acts'
,
shape
=
[
1
],
dtype
=
'int64'
)
vt
=
fluid
.
layers
.
data
(
name
=
'vt'
,
shape
=
[
1
],
dtype
=
'float32'
)
# fc1
fc1
=
fluid
.
layers
.
fc
(
input
=
obs
,
size
=
10
,
act
=
"tanh"
# tanh activation
)
fc1
=
fluid
.
layers
.
fc
(
input
=
obs
,
size
=
10
,
act
=
"tanh"
)
# tanh activation
# fc2
all_act_prob
=
fluid
.
layers
.
fc
(
input
=
fc1
,
size
=
self
.
n_actions
,
act
=
"softmax"
)
size
=
self
.
n_actions
,
act
=
"softmax"
)
self
.
inferece_program
=
fluid
.
defaul_main_program
().
clone
()
# to maximize total reward (log_p * R) is to minimize -(log_p * R)
neg_log_prob
=
fluid
.
layers
.
cross_entropy
(
...
...
@@ -53,10 +49,9 @@ class PolicyGradient:
self
.
exe
.
run
(
fluid
.
default_startup_program
())
def
choose_action
(
self
,
observation
):
prob_weights
=
self
.
exe
.
run
(
self
.
inferece_program
,
feed
=
{
"obs"
:
observation
[
np
.
newaxis
,
:]},
fetch_list
=
[
self
.
all_act_prob
])
prob_weights
=
self
.
exe
.
run
(
self
.
inferece_program
,
feed
=
{
"obs"
:
observation
[
np
.
newaxis
,
:]},
fetch_list
=
[
self
.
all_act_prob
])
prob_weights
=
np
.
array
(
prob_weights
[
0
])
action
=
np
.
random
.
choice
(
range
(
prob_weights
.
shape
[
1
]),
...
...
编辑
预览
Markdown
is supported
0%
请重试
或
添加新附件
.
添加附件
取消
You are about to add
0
people
to the discussion. Proceed with caution.
先完成此消息的编辑!
取消
想要评论请
注册
或
登录