Skip to content
体验新版
项目
组织
正在加载...
登录
切换导航
打开侧边栏
PaddlePaddle
models
提交
f3b1bb5a
M
models
项目概览
PaddlePaddle
/
models
大约 1 年 前同步成功
通知
222
Star
6828
Fork
2962
代码
文件
提交
分支
Tags
贡献者
分支图
Diff
Issue
602
列表
看板
标记
里程碑
合并请求
255
Wiki
0
Wiki
分析
仓库
DevOps
项目成员
Pages
M
models
项目概览
项目概览
详情
发布
仓库
仓库
文件
提交
分支
标签
贡献者
分支图
比较
Issue
602
Issue
602
列表
看板
标记
里程碑
合并请求
255
合并请求
255
Pages
分析
分析
仓库分析
DevOps
Wiki
0
Wiki
成员
成员
收起侧边栏
关闭侧边栏
动态
分支图
创建新Issue
提交
Issue看板
提交
f3b1bb5a
编写于
11月 09, 2017
作者:
C
caoying03
浏览文件
操作
浏览文件
下载
电子邮件补丁
差异文件
refine DSSM comments.
上级
77d2bf77
变更
2
隐藏空白更改
内联
并排
Showing
2 changed file
with
125 addition
and
122 deletion
+125
-122
dssm/infer.py
dssm/infer.py
+32
-33
dssm/network_conf.py
dssm/network_conf.py
+93
-89
未找到文件。
dssm/infer.py
浏览文件 @
f3b1bb5a
...
...
@@ -9,25 +9,25 @@ from utils import logger, ModelType, ModelArch, load_dic
parser
=
argparse
.
ArgumentParser
(
description
=
"PaddlePaddle DSSM infer"
)
parser
.
add_argument
(
'--model_path'
,
"--model_path"
,
type
=
str
,
required
=
True
,
help
=
"path of model parameters file"
)
parser
.
add_argument
(
'-i'
,
'--data_path'
,
"-i"
,
"--data_path"
,
type
=
str
,
required
=
True
,
help
=
"path of the dataset to infer"
)
parser
.
add_argument
(
'-o'
,
'--prediction_output_path'
,
"-o"
,
"--prediction_output_path"
,
type
=
str
,
required
=
True
,
help
=
"path to output the prediction"
)
parser
.
add_argument
(
'-y'
,
'--model_type'
,
"-y"
,
"--model_type"
,
type
=
int
,
required
=
True
,
default
=
ModelType
.
CLASSIFICATION_MODE
,
...
...
@@ -36,45 +36,45 @@ parser.add_argument(
(
ModelType
.
CLASSIFICATION_MODE
,
ModelType
.
RANK_MODE
,
ModelType
.
REGRESSION_MODE
))
parser
.
add_argument
(
'-s'
,
'--source_dic_path'
,
"-s"
,
"--source_dic_path"
,
type
=
str
,
required
=
False
,
help
=
"path of the source's word dic"
)
parser
.
add_argument
(
'--target_dic_path'
,
"--target_dic_path"
,
type
=
str
,
required
=
False
,
help
=
(
"path of the target's word dictionary, "
"if not set, the `source_dic_path` will be used"
))
parser
.
add_argument
(
'-a'
,
'--model_arch'
,
"-a"
,
"--model_arch"
,
type
=
int
,
required
=
True
,
default
=
ModelArch
.
CNN_MODE
,
help
=
"model architecture, %d for CNN, %d for FC, %d for RNN"
%
(
ModelArch
.
CNN_MODE
,
ModelArch
.
FC_MODE
,
ModelArch
.
RNN_MODE
))
parser
.
add_argument
(
'--share_network_between_source_target'
,
"--share_network_between_source_target"
,
type
=
distutils
.
util
.
strtobool
,
default
=
False
,
help
=
"whether to share network parameters between source and target"
)
parser
.
add_argument
(
'--share_embed'
,
"--share_embed"
,
type
=
distutils
.
util
.
strtobool
,
default
=
False
,
help
=
"whether to share word embedding between source and target"
)
parser
.
add_argument
(
'--dnn_dims'
,
"--dnn_dims"
,
type
=
str
,
default
=
'256,128,64,32'
,
help
=
(
"dimentions of dnn layers, default is
'256,128,64,32'
, "
default
=
"256,128,64,32"
,
help
=
(
"dimentions of dnn layers, default is
`256,128,64,32`
, "
"which means create a 4-layer dnn, "
"demention of each layer is 256, 128, 64 and 32"
))
parser
.
add_argument
(
'-c'
,
'--class_num'
,
"-c"
,
"--class_num"
,
type
=
int
,
default
=
0
,
help
=
"number of categories for classification task."
)
...
...
@@ -83,9 +83,10 @@ args = parser.parse_args()
args
.
model_type
=
ModelType
(
args
.
model_type
)
args
.
model_arch
=
ModelArch
(
args
.
model_arch
)
if
args
.
model_type
.
is_classification
():
assert
args
.
class_num
>
1
,
"--class_num should be set in classification task."
assert
args
.
class_num
>
1
,
(
"The parameter class_num should be set "
"in classification task."
)
layer_dims
=
map
(
int
,
args
.
dnn_dims
.
split
(
','
))
layer_dims
=
map
(
int
,
args
.
dnn_dims
.
split
(
","
))
args
.
target_dic_path
=
args
.
source_dic_path
if
not
args
.
target_dic_path
\
else
args
.
target_dic_path
...
...
@@ -94,8 +95,6 @@ paddle.init(use_gpu=False, trainer_count=1)
class
Inferer
(
object
):
def
__init__
(
self
,
param_path
):
logger
.
info
(
"create DSSM model"
)
prediction
=
DSSM
(
dnn_dims
=
layer_dims
,
vocab_sizes
=
[
...
...
@@ -110,14 +109,13 @@ class Inferer(object):
is_infer
=
True
)()
# load parameter
logger
.
info
(
"
load model parameters from %s
"
%
param_path
)
logger
.
info
(
"
Load the trained model from %s.
"
%
param_path
)
self
.
parameters
=
paddle
.
parameters
.
Parameters
.
from_tar
(
open
(
param_path
,
'r'
))
open
(
param_path
,
"r"
))
self
.
inferer
=
paddle
.
inference
.
Inference
(
output_layer
=
prediction
,
parameters
=
self
.
parameters
)
def
infer
(
self
,
data_path
):
logger
.
info
(
"infer data..."
)
dataset
=
reader
.
Dataset
(
train_path
=
data_path
,
test_path
=
None
,
...
...
@@ -125,19 +123,20 @@ class Inferer(object):
target_dic_path
=
args
.
target_dic_path
,
model_type
=
args
.
model_type
,
)
infer_reader
=
paddle
.
batch
(
dataset
.
infer
,
batch_size
=
1000
)
logger
.
warning
(
'write predictions to %s'
%
args
.
prediction_output_path
)
logger
.
warning
(
"Write predictions to %s."
%
args
.
prediction_output_path
)
output_f
=
open
(
args
.
prediction_output_path
,
'w'
)
output_f
=
open
(
args
.
prediction_output_path
,
"w"
)
for
id
,
batch
in
enumerate
(
infer_reader
()):
res
=
self
.
inferer
.
infer
(
input
=
batch
)
predictions
=
[
' '
.
join
(
map
(
str
,
x
))
for
x
in
res
]
predictions
=
[
" "
.
join
(
map
(
str
,
x
))
for
x
in
res
]
assert
len
(
batch
)
==
len
(
predictions
),
(
"predict error, %d inputs, "
"but %d predictions"
)
%
(
len
(
batch
),
len
(
predictions
))
output_f
.
write
(
'
\n
'
.
join
(
map
(
str
,
predictions
))
+
'
\n
'
)
"Error! %d inputs are given, "
"but only %d predictions are returned."
)
%
(
len
(
batch
),
len
(
predictions
))
output_f
.
write
(
"
\n
"
.
join
(
map
(
str
,
predictions
))
+
"
\n
"
)
if
__name__
==
'__main__'
:
if
__name__
==
"__main__"
:
inferer
=
Inferer
(
args
.
model_path
)
inferer
.
infer
(
args
.
data_path
)
dssm/network_conf.py
浏览文件 @
f3b1bb5a
...
...
@@ -13,26 +13,33 @@ class DSSM(object):
class_num
=
None
,
share_embed
=
False
,
is_infer
=
False
):
'''
@dnn_dims: list of int
dimentions of each layer in semantic vector generator.
@vocab_sizes: 2-d tuple
size of both left and right items.
@model_type: int
type of task, should be 'rank: 0', 'regression: 1' or 'classification: 2'
@model_arch: int
model architecture
@share_semantic_generator: bool
whether to share the semantic vector generator for both left and right.
@share_embed: bool
whether to share the embeddings between left and right.
@class_num: int
number of categories.
'''
"""
:param dnn_dims: The dimention of each layer in the semantic vector
generator.
:type dnn_dims: list of int
:param vocab_sizes: The size of left and right items.
:type vocab_sizes: A list having 2 elements.
:param model_type: The type of task to train the DSSM model. The value
should be "rank: 0", "regression: 1" or
"classification: 2".
:type model_type: int
:param model_arch: A value indicating the model architecture to use.
:type model_arch: int
:param share_semantic_generator: A flag indicating whether to share the
semantic vector between the left and
the right item.
:type share_semantic_generator: bool
:param share_embed: A floag indicating whether to share the embeddings
between the left and the right item.
:type share_embed: bool
:param class_num: The number of categories.
:type class_num: int
"""
assert
len
(
vocab_sizes
)
==
2
,
(
"vocab_sizes specify the sizes left and right inputs, "
"and dim should be 2."
)
assert
len
(
dnn_dims
)
>
1
,
"more than two layers is needed."
"The vocab_sizes specifying the sizes left and right inputs. "
"Its dimension should be 2."
)
assert
len
(
dnn_dims
)
>
1
,
(
"In the DNN model, more than two layers "
"are needed."
)
self
.
dnn_dims
=
dnn_dims
self
.
vocab_sizes
=
vocab_sizes
...
...
@@ -42,91 +49,89 @@ class DSSM(object):
self
.
model_arch
=
ModelArch
(
model_arch
)
self
.
class_num
=
class_num
self
.
is_infer
=
is_infer
logger
.
warning
(
"
b
uild DSSM model with config of %s, %s"
%
logger
.
warning
(
"
B
uild DSSM model with config of %s, %s"
%
(
self
.
model_type
,
self
.
model_arch
))
logger
.
info
(
"
vocabulary sizes
: %s"
%
str
(
self
.
vocab_sizes
))
logger
.
info
(
"
The vocabulary size is
: %s"
%
str
(
self
.
vocab_sizes
))
# bind model architecture
_model_arch
=
{
'cnn'
:
self
.
create_cnn
,
'fc'
:
self
.
create_fc
,
'rnn'
:
self
.
create_rnn
,
"cnn"
:
self
.
create_cnn
,
"fc"
:
self
.
create_fc
,
"rnn"
:
self
.
create_rnn
,
}
def
_model_arch_creater
(
emb
,
prefix
=
''
):
def
_model_arch_creater
(
emb
,
prefix
=
""
):
sent_vec
=
_model_arch
.
get
(
str
(
model_arch
))(
emb
,
prefix
)
dnn
=
self
.
create_dnn
(
sent_vec
,
prefix
)
return
dnn
self
.
model_arch_creater
=
_model_arch_creater
# build model type
_model_type
=
{
'classification'
:
self
.
_build_classification_model
,
'rank'
:
self
.
_build_rank_model
,
'regression'
:
self
.
_build_regression_model
,
"classification"
:
self
.
_build_classification_model
,
"rank"
:
self
.
_build_rank_model
,
"regression"
:
self
.
_build_regression_model
,
}
print
'model type: '
,
str
(
self
.
model_type
)
print
(
"model type: "
,
str
(
self
.
model_type
)
)
self
.
model_type_creater
=
_model_type
[
str
(
self
.
model_type
)]
def
__call__
(
self
):
return
self
.
model_type_creater
()
def
create_embedding
(
self
,
input
,
prefix
=
''
):
'''
Create an embedding table whose name has a `prefix`.
'''
logger
.
info
(
"create embedding table [%s] which dimention is %d"
%
def
create_embedding
(
self
,
input
,
prefix
=
""
):
"""
Create word embedding. The `prefix` is added in front of the name of
embedding"s learnable parameter.
"""
logger
.
info
(
"Create embedding table [%s] whose dimention is %d. "
%
(
prefix
,
self
.
dnn_dims
[
0
]))
emb
=
paddle
.
layer
.
embedding
(
input
=
input
,
size
=
self
.
dnn_dims
[
0
],
param_attr
=
ParamAttr
(
name
=
'%s_emb.w'
%
prefix
))
param_attr
=
ParamAttr
(
name
=
"%s_emb.w"
%
prefix
))
return
emb
def
create_fc
(
self
,
emb
,
prefix
=
''
):
'''
def
create_fc
(
self
,
emb
,
prefix
=
""
):
"""
A multi-layer fully connected neural networks.
@emb: paddle.layer
output of the embedding layer
@prefix: str
prefix of layers' names, used to share parameters between
more than one `fc` parts.
'''
:param emb: The output of the embedding layer
:type emb: paddle.layer
:param prefix: A prefix will be added to the layers' names.
:type prefix: str
"""
_input_layer
=
paddle
.
layer
.
pooling
(
input
=
emb
,
pooling_type
=
paddle
.
pooling
.
Max
())
fc
=
paddle
.
layer
.
fc
(
input
=
_input_layer
,
size
=
self
.
dnn_dims
[
1
],
param_attr
=
ParamAttr
(
name
=
'%s_fc.w'
%
prefix
),
param_attr
=
ParamAttr
(
name
=
"%s_fc.w"
%
prefix
),
bias_attr
=
ParamAttr
(
name
=
"%s_fc.b"
%
prefix
,
initial_std
=
0.
))
return
fc
def
create_rnn
(
self
,
emb
,
prefix
=
''
):
'''
def
create_rnn
(
self
,
emb
,
prefix
=
""
):
"""
A GRU sentence vector learner.
'''
"""
gru
=
paddle
.
networks
.
simple_gru
(
input
=
emb
,
size
=
self
.
dnn_dims
[
1
],
mixed_param_attr
=
ParamAttr
(
name
=
'%s_gru_mixed.w'
%
prefix
),
mixed_param_attr
=
ParamAttr
(
name
=
"%s_gru_mixed.w"
%
prefix
),
mixed_bias_param_attr
=
ParamAttr
(
name
=
"%s_gru_mixed.b"
%
prefix
),
gru_param_attr
=
ParamAttr
(
name
=
'%s_gru.w'
%
prefix
),
gru_param_attr
=
ParamAttr
(
name
=
"%s_gru.w"
%
prefix
),
gru_bias_attr
=
ParamAttr
(
name
=
"%s_gru.b"
%
prefix
))
sent_vec
=
paddle
.
layer
.
last_seq
(
gru
)
return
sent_vec
def
create_cnn
(
self
,
emb
,
prefix
=
''
):
'''
def
create_cnn
(
self
,
emb
,
prefix
=
""
):
"""
A multi-layer CNN.
@emb: paddle.layer
output of the embedding layer
@prefix: str
prefix of layers' names, used to share parameters between
more than one `cnn` parts.
'''
:param emb: The word embedding.
:type emb: paddle.layer
:param prefix: The prefix will be added to of layers' names.
:type prefix: str
"""
def
create_conv
(
context_len
,
hidden_size
,
prefix
):
key
=
"%s_%d_%d"
%
(
prefix
,
context_len
,
hidden_size
)
...
...
@@ -135,15 +140,15 @@ class DSSM(object):
context_len
=
context_len
,
hidden_size
=
hidden_size
,
# set parameter attr for parameter sharing
context_proj_param_attr
=
ParamAttr
(
name
=
key
+
'contex_proj.w'
),
fc_param_attr
=
ParamAttr
(
name
=
key
+
'_fc.w'
),
fc_bias_attr
=
ParamAttr
(
name
=
key
+
'_fc.b'
),
pool_bias_attr
=
ParamAttr
(
name
=
key
+
'_pool.b'
))
context_proj_param_attr
=
ParamAttr
(
name
=
key
+
"contex_proj.w"
),
fc_param_attr
=
ParamAttr
(
name
=
key
+
"_fc.w"
),
fc_bias_attr
=
ParamAttr
(
name
=
key
+
"_fc.b"
),
pool_bias_attr
=
ParamAttr
(
name
=
key
+
"_pool.b"
))
return
conv
logger
.
info
(
'create a sequence_conv_pool which context width is 3'
)
logger
.
info
(
"create a sequence_conv_pool which context width is 3"
)
conv_3
=
create_conv
(
3
,
self
.
dnn_dims
[
1
],
"cnn"
)
logger
.
info
(
'create a sequence_conv_pool which context width is 4'
)
logger
.
info
(
"create a sequence_conv_pool which context width is 4"
)
conv_4
=
create_conv
(
4
,
self
.
dnn_dims
[
1
],
"cnn"
)
return
conv_3
,
conv_4
...
...
@@ -160,8 +165,8 @@ class DSSM(object):
input
=
_input_layer
,
size
=
dim
,
act
=
paddle
.
activation
.
Tanh
(),
param_attr
=
ParamAttr
(
name
=
'%s.w'
%
name
),
bias_attr
=
ParamAttr
(
name
=
'%s.b'
%
name
,
initial_std
=
0.
))
param_attr
=
ParamAttr
(
name
=
"%s.w"
%
name
),
bias_attr
=
ParamAttr
(
name
=
"%s.b"
%
name
,
initial_std
=
0.
))
_input_layer
=
fc
return
_input_layer
...
...
@@ -178,7 +183,7 @@ class DSSM(object):
is_classification
=
False
)
def
_build_rank_model
(
self
):
'''
"""
Build a pairwise rank model, and the cost is returned.
A pairwise rank model has 3 inputs:
...
...
@@ -187,26 +192,26 @@ class DSSM(object):
- right_target sentence
- label, 1 if left_target should be sorted in front of
right_target, otherwise 0.
'''
"""
logger
.
info
(
"build rank model"
)
assert
self
.
model_type
.
is_rank
()
source
=
paddle
.
layer
.
data
(
name
=
'source_input'
,
name
=
"source_input"
,
type
=
paddle
.
data_type
.
integer_value_sequence
(
self
.
vocab_sizes
[
0
]))
left_target
=
paddle
.
layer
.
data
(
name
=
'left_target_input'
,
name
=
"left_target_input"
,
type
=
paddle
.
data_type
.
integer_value_sequence
(
self
.
vocab_sizes
[
1
]))
right_target
=
paddle
.
layer
.
data
(
name
=
'right_target_input'
,
name
=
"right_target_input"
,
type
=
paddle
.
data_type
.
integer_value_sequence
(
self
.
vocab_sizes
[
1
]))
if
not
self
.
is_infer
:
label
=
paddle
.
layer
.
data
(
name
=
'label_input'
,
type
=
paddle
.
data_type
.
integer_value
(
1
))
name
=
"label_input"
,
type
=
paddle
.
data_type
.
integer_value
(
1
))
prefixs
=
'_ _ _'
.
split
(
)
if
self
.
share_semantic_generator
else
'source target target'
.
split
()
embed_prefixs
=
'_ _'
.
split
(
)
if
self
.
share_embed
else
'source target target'
.
split
()
prefixs
=
"_ _ _"
.
split
(
)
if
self
.
share_semantic_generator
else
"source target target"
.
split
()
embed_prefixs
=
"_ _"
.
split
(
)
if
self
.
share_embed
else
"source target target"
.
split
()
word_vecs
=
[]
for
id
,
input
in
enumerate
([
source
,
left_target
,
right_target
]):
...
...
@@ -218,9 +223,9 @@ class DSSM(object):
x
=
self
.
model_arch_creater
(
input
,
prefix
=
prefixs
[
id
])
semantics
.
append
(
x
)
#
cossim score of source and left_target
#
The cosine similarity score of source and left_target.
left_score
=
paddle
.
layer
.
cos_sim
(
semantics
[
0
],
semantics
[
1
])
#
cossim score of source and right target
#
The cosine similarity score of source and right target.
right_score
=
paddle
.
layer
.
cos_sim
(
semantics
[
0
],
semantics
[
2
])
if
not
self
.
is_infer
:
...
...
@@ -233,34 +238,33 @@ class DSSM(object):
return
right_score
def
_build_classification_or_regression_model
(
self
,
is_classification
):
'''
"""
Build a classification/regression model, and the cost is returned.
A Classification ha
s 3 inputs:
The classification/regression task expect
s 3 inputs:
- source sentence
- target sentence
- classification label
'''
"""
if
is_classification
:
# prepare inputs.
assert
self
.
class_num
source
=
paddle
.
layer
.
data
(
name
=
'source_input'
,
name
=
"source_input"
,
type
=
paddle
.
data_type
.
integer_value_sequence
(
self
.
vocab_sizes
[
0
]))
target
=
paddle
.
layer
.
data
(
name
=
'target_input'
,
name
=
"target_input"
,
type
=
paddle
.
data_type
.
integer_value_sequence
(
self
.
vocab_sizes
[
1
]))
label
=
paddle
.
layer
.
data
(
name
=
'label_input'
,
name
=
"label_input"
,
type
=
paddle
.
data_type
.
integer_value
(
self
.
class_num
)
if
is_classification
else
paddle
.
data_type
.
dense_vector
(
1
))
prefixs
=
'_ _'
.
split
(
)
if
self
.
share_semantic_generator
else
'source target'
.
split
()
embed_prefixs
=
'_ _'
.
split
(
)
if
self
.
share_embed
else
'source target'
.
split
()
prefixs
=
"_ _"
.
split
(
)
if
self
.
share_semantic_generator
else
"source target"
.
split
()
embed_prefixs
=
"_ _"
.
split
(
)
if
self
.
share_embed
else
"source target"
.
split
()
word_vecs
=
[]
for
id
,
input
in
enumerate
([
source
,
target
]):
...
...
编辑
预览
Markdown
is supported
0%
请重试
或
添加新附件
.
添加附件
取消
You are about to add
0
people
to the discussion. Proceed with caution.
先完成此消息的编辑!
取消
想要评论请
注册
或
登录