Skip to content
体验新版
项目
组织
正在加载...
登录
切换导航
打开侧边栏
PaddlePaddle
FluidDoc
提交
9c80f825
F
FluidDoc
项目概览
PaddlePaddle
/
FluidDoc
通知
10
Star
2
Fork
0
代码
文件
提交
分支
Tags
贡献者
分支图
Diff
Issue
23
列表
看板
标记
里程碑
合并请求
111
Wiki
0
Wiki
分析
仓库
DevOps
项目成员
Pages
F
FluidDoc
项目概览
项目概览
详情
发布
仓库
仓库
文件
提交
分支
标签
贡献者
分支图
比较
Issue
23
Issue
23
列表
看板
标记
里程碑
合并请求
111
合并请求
111
Pages
分析
分析
仓库分析
DevOps
Wiki
0
Wiki
成员
成员
收起侧边栏
关闭侧边栏
动态
分支图
创建新Issue
提交
Issue看板
未验证
提交
9c80f825
编写于
6月 26, 2018
作者:
Y
yuyang18
浏览文件
操作
浏览文件
下载
电子邮件补丁
差异文件
remove python file
上级
b2eb302f
变更
10
隐藏空白更改
内联
并排
Showing
10 changed file
with
0 addition
and
1361 deletion
+0
-1361
source/beginners_guide/basics/03.image_classification/resnet.py
.../beginners_guide/basics/03.image_classification/resnet.py
+0
-73
source/beginners_guide/basics/03.image_classification/train.py
...e/beginners_guide/basics/03.image_classification/train.py
+0
-136
source/beginners_guide/basics/03.image_classification/vgg.py
source/beginners_guide/basics/03.image_classification/vgg.py
+0
-47
source/beginners_guide/basics/04.word2vec/calculate_dis.py
source/beginners_guide/basics/04.word2vec/calculate_dis.py
+0
-77
source/beginners_guide/basics/04.word2vec/format_convert.py
source/beginners_guide/basics/04.word2vec/format_convert.py
+0
-158
source/beginners_guide/basics/04.word2vec/train.py
source/beginners_guide/basics/04.word2vec/train.py
+0
-111
source/beginners_guide/basics/05.recommender_system/train.py
source/beginners_guide/basics/05.recommender_system/train.py
+0
-135
source/beginners_guide/basics/06.understand_sentiment/train.py
...e/beginners_guide/basics/06.understand_sentiment/train.py
+0
-161
source/beginners_guide/basics/07.label_semantic_roles/train.py
...e/beginners_guide/basics/07.label_semantic_roles/train.py
+0
-228
source/beginners_guide/basics/08.machine_translation/train.py
...ce/beginners_guide/basics/08.machine_translation/train.py
+0
-235
未找到文件。
source/beginners_guide/basics/03.image_classification/resnet.py
已删除
100644 → 0
浏览文件 @
b2eb302f
# Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserved
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
import
paddle.v2
as
paddle
__all__
=
[
'resnet_cifar10'
]
def
conv_bn_layer
(
input
,
ch_out
,
filter_size
,
stride
,
padding
,
active_type
=
paddle
.
activation
.
Relu
(),
ch_in
=
None
):
tmp
=
paddle
.
layer
.
img_conv
(
input
=
input
,
filter_size
=
filter_size
,
num_channels
=
ch_in
,
num_filters
=
ch_out
,
stride
=
stride
,
padding
=
padding
,
act
=
paddle
.
activation
.
Linear
(),
bias_attr
=
False
)
return
paddle
.
layer
.
batch_norm
(
input
=
tmp
,
act
=
active_type
)
def
shortcut
(
ipt
,
ch_in
,
ch_out
,
stride
):
if
ch_in
!=
ch_out
:
return
conv_bn_layer
(
ipt
,
ch_out
,
1
,
stride
,
0
,
paddle
.
activation
.
Linear
())
else
:
return
ipt
def
basicblock
(
ipt
,
ch_in
,
ch_out
,
stride
):
tmp
=
conv_bn_layer
(
ipt
,
ch_out
,
3
,
stride
,
1
)
tmp
=
conv_bn_layer
(
tmp
,
ch_out
,
3
,
1
,
1
,
paddle
.
activation
.
Linear
())
short
=
shortcut
(
ipt
,
ch_in
,
ch_out
,
stride
)
return
paddle
.
layer
.
addto
(
input
=
[
tmp
,
short
],
act
=
paddle
.
activation
.
Relu
())
def
layer_warp
(
block_func
,
ipt
,
ch_in
,
ch_out
,
count
,
stride
):
tmp
=
block_func
(
ipt
,
ch_in
,
ch_out
,
stride
)
for
i
in
range
(
1
,
count
):
tmp
=
block_func
(
tmp
,
ch_out
,
ch_out
,
1
)
return
tmp
def
resnet_cifar10
(
ipt
,
depth
=
32
):
# depth should be one of 20, 32, 44, 56, 110, 1202
assert
(
depth
-
2
)
%
6
==
0
n
=
(
depth
-
2
)
/
6
nStages
=
{
16
,
64
,
128
}
conv1
=
conv_bn_layer
(
ipt
,
ch_in
=
3
,
ch_out
=
16
,
filter_size
=
3
,
stride
=
1
,
padding
=
1
)
res1
=
layer_warp
(
basicblock
,
conv1
,
16
,
16
,
n
,
1
)
res2
=
layer_warp
(
basicblock
,
res1
,
16
,
32
,
n
,
2
)
res3
=
layer_warp
(
basicblock
,
res2
,
32
,
64
,
n
,
2
)
pool
=
paddle
.
layer
.
img_pool
(
input
=
res3
,
pool_size
=
8
,
stride
=
1
,
pool_type
=
paddle
.
pooling
.
Avg
())
return
pool
source/beginners_guide/basics/03.image_classification/train.py
已删除
100644 → 0
浏览文件 @
b2eb302f
# Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserved
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License
import
sys
,
os
import
paddle.v2
as
paddle
from
vgg
import
vgg_bn_drop
from
resnet
import
resnet_cifar10
with_gpu
=
os
.
getenv
(
'WITH_GPU'
,
'0'
)
!=
'0'
def
main
():
datadim
=
3
*
32
*
32
classdim
=
10
# PaddlePaddle init
paddle
.
init
(
use_gpu
=
with_gpu
,
trainer_count
=
1
)
image
=
paddle
.
layer
.
data
(
name
=
"image"
,
type
=
paddle
.
data_type
.
dense_vector
(
datadim
))
# Add neural network config
# option 1. resnet
# net = resnet_cifar10(image, depth=32)
# option 2. vgg
net
=
vgg_bn_drop
(
image
)
out
=
paddle
.
layer
.
fc
(
input
=
net
,
size
=
classdim
,
act
=
paddle
.
activation
.
Softmax
())
lbl
=
paddle
.
layer
.
data
(
name
=
"label"
,
type
=
paddle
.
data_type
.
integer_value
(
classdim
))
cost
=
paddle
.
layer
.
classification_cost
(
input
=
out
,
label
=
lbl
)
# Create parameters
parameters
=
paddle
.
parameters
.
create
(
cost
)
# Create optimizer
momentum_optimizer
=
paddle
.
optimizer
.
Momentum
(
momentum
=
0.9
,
regularization
=
paddle
.
optimizer
.
L2Regularization
(
rate
=
0.0002
*
128
),
learning_rate
=
0.1
/
128.0
,
learning_rate_decay_a
=
0.1
,
learning_rate_decay_b
=
50000
*
100
,
learning_rate_schedule
=
'discexp'
)
# Create trainer
trainer
=
paddle
.
trainer
.
SGD
(
cost
=
cost
,
parameters
=
parameters
,
update_equation
=
momentum_optimizer
)
# End batch and end pass event handler
def
event_handler
(
event
):
if
isinstance
(
event
,
paddle
.
event
.
EndIteration
):
if
event
.
batch_id
%
100
==
0
:
print
"
\n
Pass %d, Batch %d, Cost %f, %s"
%
(
event
.
pass_id
,
event
.
batch_id
,
event
.
cost
,
event
.
metrics
)
else
:
sys
.
stdout
.
write
(
'.'
)
sys
.
stdout
.
flush
()
if
isinstance
(
event
,
paddle
.
event
.
EndPass
):
# save parameters
with
open
(
'params_pass_%d.tar'
%
event
.
pass_id
,
'w'
)
as
f
:
trainer
.
save_parameter_to_tar
(
f
)
result
=
trainer
.
test
(
reader
=
paddle
.
batch
(
paddle
.
dataset
.
cifar
.
test10
(),
batch_size
=
128
),
feeding
=
{
'image'
:
0
,
'label'
:
1
})
print
"
\n
Test with Pass %d, %s"
%
(
event
.
pass_id
,
result
.
metrics
)
# Save the inference topology to protobuf.
inference_topology
=
paddle
.
topology
.
Topology
(
layers
=
out
)
with
open
(
"inference_topology.pkl"
,
'wb'
)
as
f
:
inference_topology
.
serialize_for_inference
(
f
)
trainer
.
train
(
reader
=
paddle
.
batch
(
paddle
.
reader
.
shuffle
(
paddle
.
dataset
.
cifar
.
train10
(),
buf_size
=
50000
),
batch_size
=
128
),
num_passes
=
200
,
event_handler
=
event_handler
,
feeding
=
{
'image'
:
0
,
'label'
:
1
})
# inference
from
PIL
import
Image
import
numpy
as
np
import
os
def
load_image
(
file
):
im
=
Image
.
open
(
file
)
im
=
im
.
resize
((
32
,
32
),
Image
.
ANTIALIAS
)
im
=
np
.
array
(
im
).
astype
(
np
.
float32
)
# The storage order of the loaded image is W(widht),
# H(height), C(channel). PaddlePaddle requires
# the CHW order, so transpose them.
im
=
im
.
transpose
((
2
,
0
,
1
))
# CHW
# In the training phase, the channel order of CIFAR
# image is B(Blue), G(green), R(Red). But PIL open
# image in RGB mode. It must swap the channel order.
im
=
im
[(
2
,
1
,
0
),
:,
:]
# BGR
im
=
im
.
flatten
()
im
=
im
/
255.0
return
im
test_data
=
[]
cur_dir
=
os
.
path
.
dirname
(
os
.
path
.
realpath
(
__file__
))
test_data
.
append
((
load_image
(
cur_dir
+
'/image/dog.png'
),
))
# users can remove the comments and change the model name
# with open('params_pass_50.tar', 'r') as f:
# parameters = paddle.parameters.Parameters.from_tar(f)
probs
=
paddle
.
infer
(
output_layer
=
out
,
parameters
=
parameters
,
input
=
test_data
)
lab
=
np
.
argsort
(
-
probs
)
# probs and lab are the results of one batch data
print
"Label of image/dog.png is: %d"
%
lab
[
0
][
0
]
if
__name__
==
'__main__'
:
main
()
source/beginners_guide/basics/03.image_classification/vgg.py
已删除
100644 → 0
浏览文件 @
b2eb302f
# Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserved
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
import
paddle.v2
as
paddle
__all__
=
[
'vgg_bn_drop'
]
def
vgg_bn_drop
(
input
):
def
conv_block
(
ipt
,
num_filter
,
groups
,
dropouts
,
num_channels
=
None
):
return
paddle
.
networks
.
img_conv_group
(
input
=
ipt
,
num_channels
=
num_channels
,
pool_size
=
2
,
pool_stride
=
2
,
conv_num_filter
=
[
num_filter
]
*
groups
,
conv_filter_size
=
3
,
conv_act
=
paddle
.
activation
.
Relu
(),
conv_with_batchnorm
=
True
,
conv_batchnorm_drop_rate
=
dropouts
,
pool_type
=
paddle
.
pooling
.
Max
())
conv1
=
conv_block
(
input
,
64
,
2
,
[
0.3
,
0
],
3
)
conv2
=
conv_block
(
conv1
,
128
,
2
,
[
0.4
,
0
])
conv3
=
conv_block
(
conv2
,
256
,
3
,
[
0.4
,
0.4
,
0
])
conv4
=
conv_block
(
conv3
,
512
,
3
,
[
0.4
,
0.4
,
0
])
conv5
=
conv_block
(
conv4
,
512
,
3
,
[
0.4
,
0.4
,
0
])
drop
=
paddle
.
layer
.
dropout
(
input
=
conv5
,
dropout_rate
=
0.5
)
fc1
=
paddle
.
layer
.
fc
(
input
=
drop
,
size
=
512
,
act
=
paddle
.
activation
.
Linear
())
bn
=
paddle
.
layer
.
batch_norm
(
input
=
fc1
,
act
=
paddle
.
activation
.
Relu
(),
layer_attr
=
paddle
.
attr
.
Extra
(
drop_rate
=
0.5
))
fc2
=
paddle
.
layer
.
fc
(
input
=
bn
,
size
=
512
,
act
=
paddle
.
activation
.
Linear
())
return
fc2
source/beginners_guide/basics/04.word2vec/calculate_dis.py
已删除
100755 → 0
浏览文件 @
b2eb302f
# Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserved
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
"""
Example:
python calculate_dis.py DICTIONARYTXT FEATURETXT
Required arguments:
DICTIONARYTXT the dictionary generated in dataprovider
FEATURETXT the text format word feature, one line for one word
"""
import
numpy
as
np
from
argparse
import
ArgumentParser
def
load_dict
(
fdict
):
words
=
[
line
.
strip
()
for
line
in
fdict
.
readlines
()]
dictionary
=
dict
(
zip
(
words
,
xrange
(
len
(
words
))))
return
dictionary
def
load_emb
(
femb
):
feaBank
=
[]
flag_firstline
=
True
for
line
in
femb
:
if
flag_firstline
:
flag_firstline
=
False
continue
fea
=
np
.
array
([
float
(
x
)
for
x
in
line
.
strip
().
split
(
','
)])
normfea
=
fea
*
1.0
/
np
.
linalg
.
norm
(
fea
)
feaBank
.
append
(
normfea
)
return
feaBank
def
calcos
(
id1
,
id2
,
Fea
):
f1
=
Fea
[
id1
]
f2
=
Fea
[
id2
]
return
np
.
dot
(
f1
.
transpose
(),
f2
)
def
get_wordidx
(
w
,
Dict
):
if
w
not
in
Dict
:
print
'ERROR: %s not in the dictionary'
%
w
return
-
1
return
Dict
[
w
]
if
__name__
==
'__main__'
:
parser
=
ArgumentParser
()
parser
.
add_argument
(
'dict'
,
help
=
'dictionary file'
)
parser
.
add_argument
(
'fea'
,
help
=
'feature file'
)
args
=
parser
.
parse_args
()
with
open
(
args
.
dict
)
as
fdict
:
word_dict
=
load_dict
(
fdict
)
with
open
(
args
.
fea
)
as
ffea
:
word_fea
=
load_emb
(
ffea
)
while
True
:
w1
,
w2
=
raw_input
(
"please input two words: "
).
split
()
w1_id
=
get_wordidx
(
w1
,
word_dict
)
w2_id
=
get_wordidx
(
w2
,
word_dict
)
if
w1_id
==
-
1
or
w2_id
==
-
1
:
continue
print
'similarity: %s'
%
(
calcos
(
w1_id
,
w2_id
,
word_fea
))
source/beginners_guide/basics/04.word2vec/format_convert.py
已删除
100755 → 0
浏览文件 @
b2eb302f
# Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserved
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
"""
Example:
python format_convert.py --b2t -i INPUT -o OUTPUT -d DIM
python format_convert.py --t2b -i INPUT -o OUTPUT
Options:
-h, --help show this help message and exit
--b2t convert parameter file of embedding model from binary to text
--t2b convert parameter file of embedding model from text to binary
-i INPUT input parameter file name
-o OUTPUT output parameter file name
-d DIM dimension of parameter
"""
from
optparse
import
OptionParser
import
struct
def
binary2text
(
input
,
output
,
paraDim
):
"""
Convert a binary parameter file of embedding model to be a text file.
input: the name of input binary parameter file, the format is:
1) the first 16 bytes is filehead:
version(4 bytes): version of paddle, default = 0
floatSize(4 bytes): sizeof(float) = 4
paraCount(8 bytes): total number of parameter
2) the next (paraCount * 4) bytes is parameters, each has 4 bytes
output: the name of output text parameter file, for example:
0,4,32156096
-0.7845433,1.1937413,-0.1704215,...
0.0000909,0.0009465,-0.0008813,...
...
the format is:
1) the first line is filehead:
version=0, floatSize=4, paraCount=32156096
2) other lines print the paramters
a) each line prints paraDim paramters splitted by ','
b) there is paraCount/paraDim lines (embedding words)
paraDim: dimension of parameters
"""
fi
=
open
(
input
,
"rb"
)
fo
=
open
(
output
,
"w"
)
"""
"""
version
,
floatSize
,
paraCount
=
struct
.
unpack
(
"iil"
,
fi
.
read
(
16
))
newHead
=
','
.
join
([
str
(
version
),
str
(
floatSize
),
str
(
paraCount
)])
print
>>
fo
,
newHead
bytes
=
4
*
int
(
paraDim
)
format
=
"%df"
%
int
(
paraDim
)
context
=
fi
.
read
(
bytes
)
line
=
0
while
context
:
numbers
=
struct
.
unpack
(
format
,
context
)
lst
=
[]
for
i
in
numbers
:
lst
.
append
(
'%8.7f'
%
i
)
print
>>
fo
,
','
.
join
(
lst
)
context
=
fi
.
read
(
bytes
)
line
+=
1
fi
.
close
()
fo
.
close
()
print
"binary2text finish, total"
,
line
,
"lines"
def
get_para_count
(
input
):
"""
Compute the total number of embedding parameters in input text file.
input: the name of input text file
"""
numRows
=
1
paraDim
=
0
with
open
(
input
)
as
f
:
line
=
f
.
readline
()
paraDim
=
len
(
line
.
split
(
","
))
for
line
in
f
:
numRows
+=
1
return
numRows
*
paraDim
def
text2binary
(
input
,
output
,
paddle_head
=
True
):
"""
Convert a text parameter file of embedding model to be a binary file.
input: the name of input text parameter file, for example:
-0.7845433,1.1937413,-0.1704215,...
0.0000909,0.0009465,-0.0008813,...
...
the format is:
1) it doesn't have filehead
2) each line stores the same dimension of parameters,
the separator is commas ','
output: the name of output binary parameter file, the format is:
1) the first 16 bytes is filehead:
version(4 bytes), floatSize(4 bytes), paraCount(8 bytes)
2) the next (paraCount * 4) bytes is parameters, each has 4 bytes
"""
fi
=
open
(
input
,
"r"
)
fo
=
open
(
output
,
"wb"
)
newHead
=
struct
.
pack
(
"iil"
,
0
,
4
,
get_para_count
(
input
))
fo
.
write
(
newHead
)
count
=
0
for
line
in
fi
:
line
=
line
.
strip
().
split
(
","
)
for
i
in
range
(
0
,
len
(
line
)):
binary_data
=
struct
.
pack
(
"f"
,
float
(
line
[
i
]))
fo
.
write
(
binary_data
)
count
+=
1
fi
.
close
()
fo
.
close
()
print
"text2binary finish, total"
,
count
,
"lines"
def
main
():
"""
Main entry for running format_convert.py
"""
usage
=
"usage:
\n
"
\
"python %prog --b2t -i INPUT -o OUTPUT -d DIM
\n
"
\
"python %prog --t2b -i INPUT -o OUTPUT"
parser
=
OptionParser
(
usage
)
parser
.
add_option
(
"--b2t"
,
action
=
"store_true"
,
help
=
"convert parameter file of embedding model from binary to text"
)
parser
.
add_option
(
"--t2b"
,
action
=
"store_true"
,
help
=
"convert parameter file of embedding model from text to binary"
)
parser
.
add_option
(
"-i"
,
action
=
"store"
,
dest
=
"input"
,
help
=
"input parameter file name"
)
parser
.
add_option
(
"-o"
,
action
=
"store"
,
dest
=
"output"
,
help
=
"output parameter file name"
)
parser
.
add_option
(
"-d"
,
action
=
"store"
,
dest
=
"dim"
,
help
=
"dimension of parameter"
)
(
options
,
args
)
=
parser
.
parse_args
()
if
options
.
b2t
:
binary2text
(
options
.
input
,
options
.
output
,
options
.
dim
)
if
options
.
t2b
:
text2binary
(
options
.
input
,
options
.
output
)
if
__name__
==
'__main__'
:
main
()
source/beginners_guide/basics/04.word2vec/train.py
已删除
100644 → 0
浏览文件 @
b2eb302f
import
math
import
os
import
numpy
import
paddle.v2
as
paddle
with_gpu
=
os
.
getenv
(
'WITH_GPU'
,
'0'
)
!=
'0'
embsize
=
32
hiddensize
=
256
N
=
5
def
wordemb
(
inlayer
):
wordemb
=
paddle
.
layer
.
table_projection
(
input
=
inlayer
,
size
=
embsize
,
param_attr
=
paddle
.
attr
.
Param
(
name
=
"_proj"
,
initial_std
=
0.001
,
learning_rate
=
1
,
l2_rate
=
0
))
return
wordemb
# save and load word dict and embedding table
def
save_dict_and_embedding
(
word_dict
,
embeddings
):
with
open
(
"word_dict"
,
"w"
)
as
f
:
for
key
in
word_dict
:
f
.
write
(
key
+
" "
+
str
(
word_dict
[
key
])
+
"
\n
"
)
with
open
(
"embedding_table"
,
"w"
)
as
f
:
numpy
.
savetxt
(
f
,
embeddings
,
delimiter
=
','
,
newline
=
'
\n
'
)
def
load_dict_and_embedding
():
word_dict
=
dict
()
with
open
(
"word_dict"
,
"r"
)
as
f
:
for
line
in
f
:
key
,
value
=
line
.
strip
().
split
(
" "
)
word_dict
[
key
]
=
int
(
value
)
embeddings
=
numpy
.
loadtxt
(
"embedding_table"
,
delimiter
=
","
)
return
word_dict
,
embeddings
def
main
():
paddle
.
init
(
use_gpu
=
with_gpu
,
trainer_count
=
1
)
word_dict
=
paddle
.
dataset
.
imikolov
.
build_dict
()
dict_size
=
len
(
word_dict
)
# Every layer takes integer value of range [0, dict_size)
firstword
=
paddle
.
layer
.
data
(
name
=
"firstw"
,
type
=
paddle
.
data_type
.
integer_value
(
dict_size
))
secondword
=
paddle
.
layer
.
data
(
name
=
"secondw"
,
type
=
paddle
.
data_type
.
integer_value
(
dict_size
))
thirdword
=
paddle
.
layer
.
data
(
name
=
"thirdw"
,
type
=
paddle
.
data_type
.
integer_value
(
dict_size
))
fourthword
=
paddle
.
layer
.
data
(
name
=
"fourthw"
,
type
=
paddle
.
data_type
.
integer_value
(
dict_size
))
nextword
=
paddle
.
layer
.
data
(
name
=
"fifthw"
,
type
=
paddle
.
data_type
.
integer_value
(
dict_size
))
Efirst
=
wordemb
(
firstword
)
Esecond
=
wordemb
(
secondword
)
Ethird
=
wordemb
(
thirdword
)
Efourth
=
wordemb
(
fourthword
)
contextemb
=
paddle
.
layer
.
concat
(
input
=
[
Efirst
,
Esecond
,
Ethird
,
Efourth
])
hidden1
=
paddle
.
layer
.
fc
(
input
=
contextemb
,
size
=
hiddensize
,
act
=
paddle
.
activation
.
Sigmoid
(),
layer_attr
=
paddle
.
attr
.
Extra
(
drop_rate
=
0.5
),
bias_attr
=
paddle
.
attr
.
Param
(
learning_rate
=
2
),
param_attr
=
paddle
.
attr
.
Param
(
initial_std
=
1.
/
math
.
sqrt
(
embsize
*
8
),
learning_rate
=
1
))
predictword
=
paddle
.
layer
.
fc
(
input
=
hidden1
,
size
=
dict_size
,
bias_attr
=
paddle
.
attr
.
Param
(
learning_rate
=
2
),
act
=
paddle
.
activation
.
Softmax
())
cost
=
paddle
.
layer
.
classification_cost
(
input
=
predictword
,
label
=
nextword
)
parameters
=
paddle
.
parameters
.
create
(
cost
)
adagrad
=
paddle
.
optimizer
.
AdaGrad
(
learning_rate
=
3e-3
,
regularization
=
paddle
.
optimizer
.
L2Regularization
(
8e-4
))
trainer
=
paddle
.
trainer
.
SGD
(
cost
,
parameters
,
adagrad
)
def
event_handler
(
event
):
if
isinstance
(
event
,
paddle
.
event
.
EndIteration
):
if
event
.
batch_id
%
100
==
0
:
print
"Pass %d, Batch %d, Cost %f, %s"
%
(
event
.
pass_id
,
event
.
batch_id
,
event
.
cost
,
event
.
metrics
)
if
isinstance
(
event
,
paddle
.
event
.
EndPass
):
result
=
trainer
.
test
(
paddle
.
batch
(
paddle
.
dataset
.
imikolov
.
test
(
word_dict
,
N
),
32
))
print
"Pass %d, Testing metrics %s"
%
(
event
.
pass_id
,
result
.
metrics
)
with
open
(
"model_%d.tar"
%
event
.
pass_id
,
'w'
)
as
f
:
trainer
.
save_parameter_to_tar
(
f
)
trainer
.
train
(
paddle
.
batch
(
paddle
.
dataset
.
imikolov
.
train
(
word_dict
,
N
),
32
),
num_passes
=
100
,
event_handler
=
event_handler
)
# save word dict and embedding table
embeddings
=
parameters
.
get
(
"_proj"
).
reshape
(
len
(
word_dict
),
embsize
)
save_dict_and_embedding
(
word_dict
,
embeddings
)
if
__name__
==
'__main__'
:
main
()
source/beginners_guide/basics/05.recommender_system/train.py
已删除
100644 → 0
浏览文件 @
b2eb302f
import
paddle.v2
as
paddle
import
cPickle
import
copy
import
os
with_gpu
=
os
.
getenv
(
'WITH_GPU'
,
'0'
)
!=
'0'
def
get_usr_combined_features
():
uid
=
paddle
.
layer
.
data
(
name
=
'user_id'
,
type
=
paddle
.
data_type
.
integer_value
(
paddle
.
dataset
.
movielens
.
max_user_id
()
+
1
))
usr_emb
=
paddle
.
layer
.
embedding
(
input
=
uid
,
size
=
32
)
usr_fc
=
paddle
.
layer
.
fc
(
input
=
usr_emb
,
size
=
32
)
usr_gender_id
=
paddle
.
layer
.
data
(
name
=
'gender_id'
,
type
=
paddle
.
data_type
.
integer_value
(
2
))
usr_gender_emb
=
paddle
.
layer
.
embedding
(
input
=
usr_gender_id
,
size
=
16
)
usr_gender_fc
=
paddle
.
layer
.
fc
(
input
=
usr_gender_emb
,
size
=
16
)
usr_age_id
=
paddle
.
layer
.
data
(
name
=
'age_id'
,
type
=
paddle
.
data_type
.
integer_value
(
len
(
paddle
.
dataset
.
movielens
.
age_table
)))
usr_age_emb
=
paddle
.
layer
.
embedding
(
input
=
usr_age_id
,
size
=
16
)
usr_age_fc
=
paddle
.
layer
.
fc
(
input
=
usr_age_emb
,
size
=
16
)
usr_job_id
=
paddle
.
layer
.
data
(
name
=
'job_id'
,
type
=
paddle
.
data_type
.
integer_value
(
paddle
.
dataset
.
movielens
.
max_job_id
()
+
1
))
usr_job_emb
=
paddle
.
layer
.
embedding
(
input
=
usr_job_id
,
size
=
16
)
usr_job_fc
=
paddle
.
layer
.
fc
(
input
=
usr_job_emb
,
size
=
16
)
usr_combined_features
=
paddle
.
layer
.
fc
(
input
=
[
usr_fc
,
usr_gender_fc
,
usr_age_fc
,
usr_job_fc
],
size
=
200
,
act
=
paddle
.
activation
.
Tanh
())
return
usr_combined_features
def
get_mov_combined_features
():
movie_title_dict
=
paddle
.
dataset
.
movielens
.
get_movie_title_dict
()
mov_id
=
paddle
.
layer
.
data
(
name
=
'movie_id'
,
type
=
paddle
.
data_type
.
integer_value
(
paddle
.
dataset
.
movielens
.
max_movie_id
()
+
1
))
mov_emb
=
paddle
.
layer
.
embedding
(
input
=
mov_id
,
size
=
32
)
mov_fc
=
paddle
.
layer
.
fc
(
input
=
mov_emb
,
size
=
32
)
mov_categories
=
paddle
.
layer
.
data
(
name
=
'category_id'
,
type
=
paddle
.
data_type
.
sparse_binary_vector
(
len
(
paddle
.
dataset
.
movielens
.
movie_categories
())))
mov_categories_hidden
=
paddle
.
layer
.
fc
(
input
=
mov_categories
,
size
=
32
)
mov_title_id
=
paddle
.
layer
.
data
(
name
=
'movie_title'
,
type
=
paddle
.
data_type
.
integer_value_sequence
(
len
(
movie_title_dict
)))
mov_title_emb
=
paddle
.
layer
.
embedding
(
input
=
mov_title_id
,
size
=
32
)
mov_title_conv
=
paddle
.
networks
.
sequence_conv_pool
(
input
=
mov_title_emb
,
hidden_size
=
32
,
context_len
=
3
)
mov_combined_features
=
paddle
.
layer
.
fc
(
input
=
[
mov_fc
,
mov_categories_hidden
,
mov_title_conv
],
size
=
200
,
act
=
paddle
.
activation
.
Tanh
())
return
mov_combined_features
def
main
():
paddle
.
init
(
use_gpu
=
with_gpu
)
usr_combined_features
=
get_usr_combined_features
()
mov_combined_features
=
get_mov_combined_features
()
inference
=
paddle
.
layer
.
cos_sim
(
a
=
usr_combined_features
,
b
=
mov_combined_features
,
size
=
1
,
scale
=
5
)
cost
=
paddle
.
layer
.
square_error_cost
(
input
=
inference
,
label
=
paddle
.
layer
.
data
(
name
=
'score'
,
type
=
paddle
.
data_type
.
dense_vector
(
1
)))
parameters
=
paddle
.
parameters
.
create
(
cost
)
trainer
=
paddle
.
trainer
.
SGD
(
cost
=
cost
,
parameters
=
parameters
,
update_equation
=
paddle
.
optimizer
.
Adam
(
learning_rate
=
1e-4
))
feeding
=
{
'user_id'
:
0
,
'gender_id'
:
1
,
'age_id'
:
2
,
'job_id'
:
3
,
'movie_id'
:
4
,
'category_id'
:
5
,
'movie_title'
:
6
,
'score'
:
7
}
def
event_handler
(
event
):
if
isinstance
(
event
,
paddle
.
event
.
EndIteration
):
if
event
.
batch_id
%
100
==
0
:
print
"Pass %d Batch %d Cost %.2f"
%
(
event
.
pass_id
,
event
.
batch_id
,
event
.
cost
)
trainer
.
train
(
reader
=
paddle
.
batch
(
paddle
.
reader
.
shuffle
(
paddle
.
dataset
.
movielens
.
train
(),
buf_size
=
8192
),
batch_size
=
256
),
event_handler
=
event_handler
,
feeding
=
feeding
,
num_passes
=
1
)
user_id
=
234
movie_id
=
345
user
=
paddle
.
dataset
.
movielens
.
user_info
()[
user_id
]
movie
=
paddle
.
dataset
.
movielens
.
movie_info
()[
movie_id
]
feature
=
user
.
value
()
+
movie
.
value
()
infer_dict
=
copy
.
copy
(
feeding
)
del
infer_dict
[
'score'
]
prediction
=
paddle
.
infer
(
output_layer
=
inference
,
parameters
=
parameters
,
input
=
[
feature
],
feeding
=
infer_dict
)
print
(
prediction
+
5
)
/
2
if
__name__
==
'__main__'
:
main
()
source/beginners_guide/basics/06.understand_sentiment/train.py
已删除
100644 → 0
浏览文件 @
b2eb302f
# Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserved
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
import
sys
,
os
import
paddle.v2
as
paddle
with_gpu
=
os
.
getenv
(
'WITH_GPU'
,
'0'
)
!=
'0'
def
convolution_net
(
input_dim
,
class_dim
=
2
,
emb_dim
=
128
,
hid_dim
=
128
):
data
=
paddle
.
layer
.
data
(
"word"
,
paddle
.
data_type
.
integer_value_sequence
(
input_dim
))
emb
=
paddle
.
layer
.
embedding
(
input
=
data
,
size
=
emb_dim
)
conv_3
=
paddle
.
networks
.
sequence_conv_pool
(
input
=
emb
,
context_len
=
3
,
hidden_size
=
hid_dim
)
conv_4
=
paddle
.
networks
.
sequence_conv_pool
(
input
=
emb
,
context_len
=
4
,
hidden_size
=
hid_dim
)
output
=
paddle
.
layer
.
fc
(
input
=
[
conv_3
,
conv_4
],
size
=
class_dim
,
act
=
paddle
.
activation
.
Softmax
())
lbl
=
paddle
.
layer
.
data
(
"label"
,
paddle
.
data_type
.
integer_value
(
2
))
cost
=
paddle
.
layer
.
classification_cost
(
input
=
output
,
label
=
lbl
)
return
cost
,
output
def
stacked_lstm_net
(
input_dim
,
class_dim
=
2
,
emb_dim
=
128
,
hid_dim
=
512
,
stacked_num
=
3
):
"""
A Wrapper for sentiment classification task.
This network uses bi-directional recurrent network,
consisting three LSTM layers. This configure is referred to
the paper as following url, but use fewer layrs.
http://www.aclweb.org/anthology/P15-1109
input_dim: here is word dictionary dimension.
class_dim: number of categories.
emb_dim: dimension of word embedding.
hid_dim: dimension of hidden layer.
stacked_num: number of stacked lstm-hidden layer.
"""
assert
stacked_num
%
2
==
1
fc_para_attr
=
paddle
.
attr
.
Param
(
learning_rate
=
1e-3
)
lstm_para_attr
=
paddle
.
attr
.
Param
(
initial_std
=
0.
,
learning_rate
=
1.
)
para_attr
=
[
fc_para_attr
,
lstm_para_attr
]
bias_attr
=
paddle
.
attr
.
Param
(
initial_std
=
0.
,
l2_rate
=
0.
)
relu
=
paddle
.
activation
.
Relu
()
linear
=
paddle
.
activation
.
Linear
()
data
=
paddle
.
layer
.
data
(
"word"
,
paddle
.
data_type
.
integer_value_sequence
(
input_dim
))
emb
=
paddle
.
layer
.
embedding
(
input
=
data
,
size
=
emb_dim
)
fc1
=
paddle
.
layer
.
fc
(
input
=
emb
,
size
=
hid_dim
,
act
=
linear
,
bias_attr
=
bias_attr
)
lstm1
=
paddle
.
layer
.
lstmemory
(
input
=
fc1
,
act
=
relu
,
bias_attr
=
bias_attr
)
inputs
=
[
fc1
,
lstm1
]
for
i
in
range
(
2
,
stacked_num
+
1
):
fc
=
paddle
.
layer
.
fc
(
input
=
inputs
,
size
=
hid_dim
,
act
=
linear
,
param_attr
=
para_attr
,
bias_attr
=
bias_attr
)
lstm
=
paddle
.
layer
.
lstmemory
(
input
=
fc
,
reverse
=
(
i
%
2
)
==
0
,
act
=
relu
,
bias_attr
=
bias_attr
)
inputs
=
[
fc
,
lstm
]
fc_last
=
paddle
.
layer
.
pooling
(
input
=
inputs
[
0
],
pooling_type
=
paddle
.
pooling
.
Max
())
lstm_last
=
paddle
.
layer
.
pooling
(
input
=
inputs
[
1
],
pooling_type
=
paddle
.
pooling
.
Max
())
output
=
paddle
.
layer
.
fc
(
input
=
[
fc_last
,
lstm_last
],
size
=
class_dim
,
act
=
paddle
.
activation
.
Softmax
(),
bias_attr
=
bias_attr
,
param_attr
=
para_attr
)
lbl
=
paddle
.
layer
.
data
(
"label"
,
paddle
.
data_type
.
integer_value
(
2
))
cost
=
paddle
.
layer
.
classification_cost
(
input
=
output
,
label
=
lbl
)
return
cost
,
output
if
__name__
==
'__main__'
:
# init
paddle
.
init
(
use_gpu
=
with_gpu
)
#data
print
'load dictionary...'
word_dict
=
paddle
.
dataset
.
imdb
.
word_dict
()
dict_dim
=
len
(
word_dict
)
class_dim
=
2
train_reader
=
paddle
.
batch
(
paddle
.
reader
.
shuffle
(
paddle
.
dataset
.
imdb
.
train
(
word_dict
),
buf_size
=
1000
),
batch_size
=
100
)
test_reader
=
paddle
.
batch
(
paddle
.
dataset
.
imdb
.
test
(
word_dict
),
batch_size
=
100
)
feeding
=
{
'word'
:
0
,
'label'
:
1
}
# network config
# Please choose the way to build the network
# by uncommenting the corresponding line.
[
cost
,
output
]
=
convolution_net
(
dict_dim
,
class_dim
=
class_dim
)
# [cost, output] = stacked_lstm_net(dict_dim, class_dim=class_dim, stacked_num=3)
# create parameters
parameters
=
paddle
.
parameters
.
create
(
cost
)
# create optimizer
adam_optimizer
=
paddle
.
optimizer
.
Adam
(
learning_rate
=
2e-3
,
regularization
=
paddle
.
optimizer
.
L2Regularization
(
rate
=
8e-4
),
model_average
=
paddle
.
optimizer
.
ModelAverage
(
average_window
=
0.5
))
# create trainer
trainer
=
paddle
.
trainer
.
SGD
(
cost
=
cost
,
parameters
=
parameters
,
update_equation
=
adam_optimizer
)
# End batch and end pass event handler
def
event_handler
(
event
):
if
isinstance
(
event
,
paddle
.
event
.
EndIteration
):
if
event
.
batch_id
%
100
==
0
:
print
"
\n
Pass %d, Batch %d, Cost %f, %s"
%
(
event
.
pass_id
,
event
.
batch_id
,
event
.
cost
,
event
.
metrics
)
else
:
sys
.
stdout
.
write
(
'.'
)
sys
.
stdout
.
flush
()
if
isinstance
(
event
,
paddle
.
event
.
EndPass
):
with
open
(
'./params_pass_%d.tar'
%
event
.
pass_id
,
'w'
)
as
f
:
trainer
.
save_parameter_to_tar
(
f
)
result
=
trainer
.
test
(
reader
=
test_reader
,
feeding
=
feeding
)
print
"
\n
Test with Pass %d, %s"
%
(
event
.
pass_id
,
result
.
metrics
)
# Save the inference topology to protobuf.
inference_topology
=
paddle
.
topology
.
Topology
(
layers
=
output
)
with
open
(
"./inference_topology.pkl"
,
'wb'
)
as
f
:
inference_topology
.
serialize_for_inference
(
f
)
trainer
.
train
(
reader
=
train_reader
,
event_handler
=
event_handler
,
feeding
=
feeding
,
num_passes
=
20
)
source/beginners_guide/basics/07.label_semantic_roles/train.py
已删除
100644 → 0
浏览文件 @
b2eb302f
import
math
,
os
import
numpy
as
np
import
paddle.v2
as
paddle
import
paddle.v2.dataset.conll05
as
conll05
import
paddle.v2.evaluator
as
evaluator
with_gpu
=
os
.
getenv
(
'WITH_GPU'
,
'0'
)
!=
'0'
word_dict
,
verb_dict
,
label_dict
=
conll05
.
get_dict
()
word_dict_len
=
len
(
word_dict
)
label_dict_len
=
len
(
label_dict
)
pred_len
=
len
(
verb_dict
)
mark_dict_len
=
2
word_dim
=
32
mark_dim
=
5
hidden_dim
=
512
depth
=
8
default_std
=
1
/
math
.
sqrt
(
hidden_dim
)
/
3.0
mix_hidden_lr
=
1e-3
def
d_type
(
size
):
return
paddle
.
data_type
.
integer_value_sequence
(
size
)
def
db_lstm
():
#8 features
word
=
paddle
.
layer
.
data
(
name
=
'word_data'
,
type
=
d_type
(
word_dict_len
))
predicate
=
paddle
.
layer
.
data
(
name
=
'verb_data'
,
type
=
d_type
(
pred_len
))
ctx_n2
=
paddle
.
layer
.
data
(
name
=
'ctx_n2_data'
,
type
=
d_type
(
word_dict_len
))
ctx_n1
=
paddle
.
layer
.
data
(
name
=
'ctx_n1_data'
,
type
=
d_type
(
word_dict_len
))
ctx_0
=
paddle
.
layer
.
data
(
name
=
'ctx_0_data'
,
type
=
d_type
(
word_dict_len
))
ctx_p1
=
paddle
.
layer
.
data
(
name
=
'ctx_p1_data'
,
type
=
d_type
(
word_dict_len
))
ctx_p2
=
paddle
.
layer
.
data
(
name
=
'ctx_p2_data'
,
type
=
d_type
(
word_dict_len
))
mark
=
paddle
.
layer
.
data
(
name
=
'mark_data'
,
type
=
d_type
(
mark_dict_len
))
emb_para
=
paddle
.
attr
.
Param
(
name
=
'emb'
,
initial_std
=
0.
,
is_static
=
True
)
std_0
=
paddle
.
attr
.
Param
(
initial_std
=
0.
)
std_default
=
paddle
.
attr
.
Param
(
initial_std
=
default_std
)
predicate_embedding
=
paddle
.
layer
.
embedding
(
size
=
word_dim
,
input
=
predicate
,
param_attr
=
paddle
.
attr
.
Param
(
name
=
'vemb'
,
initial_std
=
default_std
))
mark_embedding
=
paddle
.
layer
.
embedding
(
size
=
mark_dim
,
input
=
mark
,
param_attr
=
std_0
)
word_input
=
[
word
,
ctx_n2
,
ctx_n1
,
ctx_0
,
ctx_p1
,
ctx_p2
]
emb_layers
=
[
paddle
.
layer
.
embedding
(
size
=
word_dim
,
input
=
x
,
param_attr
=
emb_para
)
for
x
in
word_input
]
emb_layers
.
append
(
predicate_embedding
)
emb_layers
.
append
(
mark_embedding
)
hidden_0
=
paddle
.
layer
.
mixed
(
size
=
hidden_dim
,
bias_attr
=
std_default
,
input
=
[
paddle
.
layer
.
full_matrix_projection
(
input
=
emb
,
param_attr
=
std_default
)
for
emb
in
emb_layers
])
lstm_para_attr
=
paddle
.
attr
.
Param
(
initial_std
=
0.0
,
learning_rate
=
1.0
)
hidden_para_attr
=
paddle
.
attr
.
Param
(
initial_std
=
default_std
,
learning_rate
=
mix_hidden_lr
)
lstm_0
=
paddle
.
layer
.
lstmemory
(
input
=
hidden_0
,
act
=
paddle
.
activation
.
Relu
(),
gate_act
=
paddle
.
activation
.
Sigmoid
(),
state_act
=
paddle
.
activation
.
Sigmoid
(),
bias_attr
=
std_0
,
param_attr
=
lstm_para_attr
)
#stack L-LSTM and R-LSTM with direct edges
input_tmp
=
[
hidden_0
,
lstm_0
]
for
i
in
range
(
1
,
depth
):
mix_hidden
=
paddle
.
layer
.
mixed
(
size
=
hidden_dim
,
bias_attr
=
std_default
,
input
=
[
paddle
.
layer
.
full_matrix_projection
(
input
=
input_tmp
[
0
],
param_attr
=
hidden_para_attr
),
paddle
.
layer
.
full_matrix_projection
(
input
=
input_tmp
[
1
],
param_attr
=
lstm_para_attr
)
])
lstm
=
paddle
.
layer
.
lstmemory
(
input
=
mix_hidden
,
act
=
paddle
.
activation
.
Relu
(),
gate_act
=
paddle
.
activation
.
Sigmoid
(),
state_act
=
paddle
.
activation
.
Sigmoid
(),
reverse
=
((
i
%
2
)
==
1
),
bias_attr
=
std_0
,
param_attr
=
lstm_para_attr
)
input_tmp
=
[
mix_hidden
,
lstm
]
feature_out
=
paddle
.
layer
.
mixed
(
size
=
label_dict_len
,
bias_attr
=
std_default
,
input
=
[
paddle
.
layer
.
full_matrix_projection
(
input
=
input_tmp
[
0
],
param_attr
=
hidden_para_attr
),
paddle
.
layer
.
full_matrix_projection
(
input
=
input_tmp
[
1
],
param_attr
=
lstm_para_attr
)
],
)
return
feature_out
def
load_parameter
(
file_name
,
h
,
w
):
with
open
(
file_name
,
'rb'
)
as
f
:
f
.
read
(
16
)
# skip header.
return
np
.
fromfile
(
f
,
dtype
=
np
.
float32
).
reshape
(
h
,
w
)
def
main
():
paddle
.
init
(
use_gpu
=
with_gpu
,
trainer_count
=
1
)
# define network topology
feature_out
=
db_lstm
()
target
=
paddle
.
layer
.
data
(
name
=
'target'
,
type
=
d_type
(
label_dict_len
))
crf_cost
=
paddle
.
layer
.
crf
(
size
=
label_dict_len
,
input
=
feature_out
,
label
=
target
,
param_attr
=
paddle
.
attr
.
Param
(
name
=
'crfw'
,
initial_std
=
default_std
,
learning_rate
=
mix_hidden_lr
))
crf_dec
=
paddle
.
layer
.
crf_decoding
(
size
=
label_dict_len
,
input
=
feature_out
,
label
=
target
,
param_attr
=
paddle
.
attr
.
Param
(
name
=
'crfw'
))
evaluator
.
sum
(
input
=
crf_dec
)
# create parameters
parameters
=
paddle
.
parameters
.
create
(
crf_cost
)
parameters
.
set
(
'emb'
,
load_parameter
(
conll05
.
get_embedding
(),
44068
,
32
))
# create optimizer
optimizer
=
paddle
.
optimizer
.
Momentum
(
momentum
=
0
,
learning_rate
=
2e-2
,
regularization
=
paddle
.
optimizer
.
L2Regularization
(
rate
=
8e-4
),
model_average
=
paddle
.
optimizer
.
ModelAverage
(
average_window
=
0.5
,
max_average_window
=
10000
),
)
trainer
=
paddle
.
trainer
.
SGD
(
cost
=
crf_cost
,
parameters
=
parameters
,
update_equation
=
optimizer
,
extra_layers
=
crf_dec
)
reader
=
paddle
.
batch
(
paddle
.
reader
.
shuffle
(
conll05
.
test
(),
buf_size
=
8192
),
batch_size
=
10
)
test_reader
=
paddle
.
batch
(
paddle
.
reader
.
shuffle
(
conll05
.
test
(),
buf_size
=
8192
),
batch_size
=
10
)
feeding
=
{
'word_data'
:
0
,
'ctx_n2_data'
:
1
,
'ctx_n1_data'
:
2
,
'ctx_0_data'
:
3
,
'ctx_p1_data'
:
4
,
'ctx_p2_data'
:
5
,
'verb_data'
:
6
,
'mark_data'
:
7
,
'target'
:
8
}
def
event_handler
(
event
):
if
isinstance
(
event
,
paddle
.
event
.
EndIteration
):
if
event
.
batch_id
%
100
==
0
:
print
"Pass %d, Batch %d, Cost %f, %s"
%
(
event
.
pass_id
,
event
.
batch_id
,
event
.
cost
,
event
.
metrics
)
if
event
.
batch_id
%
1000
==
0
:
result
=
trainer
.
test
(
reader
=
test_reader
,
feeding
=
feeding
)
print
"
\n
Test with Pass %d, Batch %d, %s"
%
(
event
.
pass_id
,
event
.
batch_id
,
result
.
metrics
)
if
isinstance
(
event
,
paddle
.
event
.
EndPass
):
# save parameters
with
open
(
'params_pass_%d.tar'
%
event
.
pass_id
,
'w'
)
as
f
:
trainer
.
save_parameter_to_tar
(
f
)
result
=
trainer
.
test
(
reader
=
test_reader
,
feeding
=
feeding
)
print
"
\n
Test with Pass %d, %s"
%
(
event
.
pass_id
,
result
.
metrics
)
trainer
.
train
(
reader
=
reader
,
event_handler
=
event_handler
,
num_passes
=
1
,
feeding
=
feeding
)
test_creator
=
paddle
.
dataset
.
conll05
.
test
()
test_data
=
[]
for
item
in
test_creator
():
test_data
.
append
(
item
[
0
:
8
])
if
len
(
test_data
)
==
1
:
break
predict
=
paddle
.
layer
.
crf_decoding
(
size
=
label_dict_len
,
input
=
feature_out
,
param_attr
=
paddle
.
attr
.
Param
(
name
=
'crfw'
))
probs
=
paddle
.
infer
(
output_layer
=
predict
,
parameters
=
parameters
,
input
=
test_data
,
feeding
=
feeding
,
field
=
'id'
)
assert
len
(
probs
)
==
len
(
test_data
[
0
][
0
])
labels_reverse
=
{}
for
(
k
,
v
)
in
label_dict
.
items
():
labels_reverse
[
v
]
=
k
pre_lab
=
[
labels_reverse
[
i
]
for
i
in
probs
]
print
pre_lab
if
__name__
==
'__main__'
:
main
()
source/beginners_guide/basics/08.machine_translation/train.py
已删除
100644 → 0
浏览文件 @
b2eb302f
import
sys
,
os
import
numpy
as
np
import
paddle.v2
as
paddle
with_gpu
=
os
.
getenv
(
'WITH_GPU'
,
'0'
)
!=
'0'
def
save_model
(
trainer
,
parameters
,
save_path
):
with
open
(
save_path
,
'w'
)
as
f
:
trainer
.
save_parameter_to_tar
(
f
)
def
seq_to_seq_net
(
source_dict_dim
,
target_dict_dim
,
is_generating
,
beam_size
=
3
,
max_length
=
250
):
### Network Architecture
word_vector_dim
=
512
# dimension of word vector
decoder_size
=
512
# dimension of hidden unit of GRU decoder
encoder_size
=
512
# dimension of hidden unit of GRU encoder
#### Encoder
src_word_id
=
paddle
.
layer
.
data
(
name
=
'source_language_word'
,
type
=
paddle
.
data_type
.
integer_value_sequence
(
source_dict_dim
))
src_embedding
=
paddle
.
layer
.
embedding
(
input
=
src_word_id
,
size
=
word_vector_dim
)
src_forward
=
paddle
.
networks
.
simple_gru
(
input
=
src_embedding
,
size
=
encoder_size
)
src_backward
=
paddle
.
networks
.
simple_gru
(
input
=
src_embedding
,
size
=
encoder_size
,
reverse
=
True
)
encoded_vector
=
paddle
.
layer
.
concat
(
input
=
[
src_forward
,
src_backward
])
#### Decoder
encoded_proj
=
paddle
.
layer
.
fc
(
act
=
paddle
.
activation
.
Linear
(),
size
=
decoder_size
,
bias_attr
=
False
,
input
=
encoded_vector
)
backward_first
=
paddle
.
layer
.
first_seq
(
input
=
src_backward
)
decoder_boot
=
paddle
.
layer
.
fc
(
size
=
decoder_size
,
act
=
paddle
.
activation
.
Tanh
(),
bias_attr
=
False
,
input
=
backward_first
)
def
gru_decoder_with_attention
(
enc_vec
,
enc_proj
,
current_word
):
decoder_mem
=
paddle
.
layer
.
memory
(
name
=
'gru_decoder'
,
size
=
decoder_size
,
boot_layer
=
decoder_boot
)
context
=
paddle
.
networks
.
simple_attention
(
encoded_sequence
=
enc_vec
,
encoded_proj
=
enc_proj
,
decoder_state
=
decoder_mem
)
decoder_inputs
=
paddle
.
layer
.
fc
(
act
=
paddle
.
activation
.
Linear
(),
size
=
decoder_size
*
3
,
bias_attr
=
False
,
input
=
[
context
,
current_word
],
layer_attr
=
paddle
.
attr
.
ExtraLayerAttribute
(
error_clipping_threshold
=
100.0
))
gru_step
=
paddle
.
layer
.
gru_step
(
name
=
'gru_decoder'
,
input
=
decoder_inputs
,
output_mem
=
decoder_mem
,
size
=
decoder_size
)
out
=
paddle
.
layer
.
fc
(
size
=
target_dict_dim
,
bias_attr
=
True
,
act
=
paddle
.
activation
.
Softmax
(),
input
=
gru_step
)
return
out
decoder_group_name
=
'decoder_group'
group_input1
=
paddle
.
layer
.
StaticInput
(
input
=
encoded_vector
)
group_input2
=
paddle
.
layer
.
StaticInput
(
input
=
encoded_proj
)
group_inputs
=
[
group_input1
,
group_input2
]
if
not
is_generating
:
trg_embedding
=
paddle
.
layer
.
embedding
(
input
=
paddle
.
layer
.
data
(
name
=
'target_language_word'
,
type
=
paddle
.
data_type
.
integer_value_sequence
(
target_dict_dim
)),
size
=
word_vector_dim
,
param_attr
=
paddle
.
attr
.
ParamAttr
(
name
=
'_target_language_embedding'
))
group_inputs
.
append
(
trg_embedding
)
# For decoder equipped with attention mechanism, in training,
# target embeding (the groudtruth) is the data input,
# while encoded source sequence is accessed to as an unbounded memory.
# Here, the StaticInput defines a read-only memory
# for the recurrent_group.
decoder
=
paddle
.
layer
.
recurrent_group
(
name
=
decoder_group_name
,
step
=
gru_decoder_with_attention
,
input
=
group_inputs
)
lbl
=
paddle
.
layer
.
data
(
name
=
'target_language_next_word'
,
type
=
paddle
.
data_type
.
integer_value_sequence
(
target_dict_dim
))
cost
=
paddle
.
layer
.
classification_cost
(
input
=
decoder
,
label
=
lbl
)
return
cost
else
:
# In generation, the decoder predicts a next target word based on
# the encoded source sequence and the previous generated target word.
# The encoded source sequence (encoder's output) must be specified by
# StaticInput, which is a read-only memory.
# Embedding of the previous generated word is automatically retrieved
# by GeneratedInputs initialized by a start mark <s>.
trg_embedding
=
paddle
.
layer
.
GeneratedInput
(
size
=
target_dict_dim
,
embedding_name
=
'_target_language_embedding'
,
embedding_size
=
word_vector_dim
)
group_inputs
.
append
(
trg_embedding
)
beam_gen
=
paddle
.
layer
.
beam_search
(
name
=
decoder_group_name
,
step
=
gru_decoder_with_attention
,
input
=
group_inputs
,
bos_id
=
0
,
eos_id
=
1
,
beam_size
=
beam_size
,
max_length
=
max_length
)
return
beam_gen
def
main
():
paddle
.
init
(
use_gpu
=
with_gpu
,
trainer_count
=
1
)
is_generating
=
False
# source and target dict dim.
dict_size
=
30000
source_dict_dim
=
target_dict_dim
=
dict_size
# train the network
if
not
is_generating
:
# define optimize method and trainer
optimizer
=
paddle
.
optimizer
.
Adam
(
learning_rate
=
5e-5
,
regularization
=
paddle
.
optimizer
.
L2Regularization
(
rate
=
8e-4
))
cost
=
seq_to_seq_net
(
source_dict_dim
,
target_dict_dim
,
is_generating
)
parameters
=
paddle
.
parameters
.
create
(
cost
)
trainer
=
paddle
.
trainer
.
SGD
(
cost
=
cost
,
parameters
=
parameters
,
update_equation
=
optimizer
)
# define data reader
wmt14_reader
=
paddle
.
batch
(
paddle
.
reader
.
shuffle
(
paddle
.
dataset
.
wmt14
.
train
(
dict_size
),
buf_size
=
8192
),
batch_size
=
4
)
# define event_handler callback
def
event_handler
(
event
):
if
isinstance
(
event
,
paddle
.
event
.
EndIteration
):
if
event
.
batch_id
%
10
==
0
:
print
(
"
\n
Pass %d, Batch %d, Cost %f, %s"
%
(
event
.
pass_id
,
event
.
batch_id
,
event
.
cost
,
event
.
metrics
))
else
:
sys
.
stdout
.
write
(
'.'
)
sys
.
stdout
.
flush
()
if
not
event
.
batch_id
%
10
:
save_path
=
'params_pass_%05d_batch_%05d.tar'
%
(
event
.
pass_id
,
event
.
batch_id
)
save_model
(
trainer
,
parameters
,
save_path
)
if
isinstance
(
event
,
paddle
.
event
.
EndPass
):
# save parameters
save_path
=
'params_pass_%05d.tar'
%
(
event
.
pass_id
)
save_model
(
trainer
,
parameters
,
save_path
)
# start to train
trainer
.
train
(
reader
=
wmt14_reader
,
event_handler
=
event_handler
,
num_passes
=
2
)
# generate a english sequence to french
else
:
# use the first 3 samples for generation
gen_data
=
[]
gen_num
=
3
for
item
in
paddle
.
dataset
.
wmt14
.
gen
(
dict_size
)():
gen_data
.
append
([
item
[
0
]])
if
len
(
gen_data
)
==
gen_num
:
break
beam_size
=
3
beam_gen
=
seq_to_seq_net
(
source_dict_dim
,
target_dict_dim
,
is_generating
,
beam_size
)
# get the trained model, whose bleu = 26.92
parameters
=
paddle
.
dataset
.
wmt14
.
model
()
# prob is the prediction probabilities, and id is the prediction word.
beam_result
=
paddle
.
infer
(
output_layer
=
beam_gen
,
parameters
=
parameters
,
input
=
gen_data
,
field
=
[
'prob'
,
'id'
])
# load the dictionary
src_dict
,
trg_dict
=
paddle
.
dataset
.
wmt14
.
get_dict
(
dict_size
)
gen_sen_idx
=
np
.
where
(
beam_result
[
1
]
==
-
1
)[
0
]
assert
len
(
gen_sen_idx
)
==
len
(
gen_data
)
*
beam_size
# -1 is the delimiter of generated sequences.
# the first element of each generated sequence its length.
start_pos
,
end_pos
=
1
,
0
for
i
,
sample
in
enumerate
(
gen_data
):
print
(
" "
.
join
([
src_dict
[
w
]
for
w
in
sample
[
0
][
1
:
-
1
]])
)
# skip the start and ending mark when printing the source sentence
for
j
in
xrange
(
beam_size
):
end_pos
=
gen_sen_idx
[
i
*
beam_size
+
j
]
print
(
"%.4f
\t
%s"
%
(
beam_result
[
0
][
i
][
j
],
" "
.
join
(
trg_dict
[
w
]
for
w
in
beam_result
[
1
][
start_pos
:
end_pos
])))
start_pos
=
end_pos
+
2
print
(
"
\n
"
)
if
__name__
==
'__main__'
:
main
()
编辑
预览
Markdown
is supported
0%
请重试
或
添加新附件
.
添加附件
取消
You are about to add
0
people
to the discussion. Proceed with caution.
先完成此消息的编辑!
取消
想要评论请
注册
或
登录