Skip to content
体验新版
项目
组织
正在加载...
登录
切换导航
打开侧边栏
BaiXuePrincess
PaddleRec
提交
8a800d25
P
PaddleRec
项目概览
BaiXuePrincess
/
PaddleRec
与 Fork 源项目一致
Fork自
PaddlePaddle / PaddleRec
通知
1
Star
0
Fork
0
代码
文件
提交
分支
Tags
贡献者
分支图
Diff
Issue
0
列表
看板
标记
里程碑
合并请求
0
Wiki
0
Wiki
分析
仓库
DevOps
项目成员
Pages
P
PaddleRec
项目概览
项目概览
详情
发布
仓库
仓库
文件
提交
分支
标签
贡献者
分支图
比较
Issue
0
Issue
0
列表
看板
标记
里程碑
合并请求
0
合并请求
0
Pages
分析
分析
仓库分析
DevOps
Wiki
0
Wiki
成员
成员
收起侧边栏
关闭侧边栏
动态
分支图
创建新Issue
提交
Issue看板
提交
8a800d25
编写于
5月 08, 2020
作者:
M
malin10
浏览文件
操作
浏览文件
下载
电子邮件补丁
差异文件
add dssm
上级
dd378956
变更
5
展开全部
隐藏空白更改
内联
并排
Showing
5 changed file
with
264 addition
and
0 deletion
+264
-0
models/match/dssm/config.yaml
models/match/dssm/config.yaml
+54
-0
models/match/dssm/data/train/sample_train.txt
models/match/dssm/data/train/sample_train.txt
+16
-0
models/match/dssm/model.py
models/match/dssm/model.py
+110
-0
models/match/dssm/synthetic_evaluate_reader.py
models/match/dssm/synthetic_evaluate_reader.py
+40
-0
models/match/dssm/synthetic_reader.py
models/match/dssm/synthetic_reader.py
+44
-0
未找到文件。
models/match/dssm/config.yaml
0 → 100644
浏览文件 @
8a800d25
# Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
evaluate
:
reader
:
batch_size
:
1
class
:
"
{workspace}/synthetic_evaluate_reader.py"
test_data_path
:
"
{workspace}/data/train"
train
:
trainer
:
# for cluster training
strategy
:
"
async"
epochs
:
4
workspace
:
"
fleetrec.models.match.dssm"
reader
:
batch_size
:
4
class
:
"
{workspace}/synthetic_reader.py"
train_data_path
:
"
{workspace}/data/train"
model
:
models
:
"
{workspace}/model.py"
hyper_parameters
:
TRIGRAM_D
:
1000
NEG
:
4
fc_sizes
:
[
300
,
300
,
128
]
fc_acts
:
[
'
tanh'
,
'
tanh'
,
'
tanh'
]
learning_rate
:
0.01
optimizer
:
sgd
save
:
increment
:
dirname
:
"
increment"
epoch_interval
:
2
save_last
:
True
inference
:
dirname
:
"
inference"
epoch_interval
:
4
feed_varnames
:
[
"
query"
,
"
doc_pos"
]
fetch_varnames
:
[
"
cos_sim_0.tmp_0"
]
save_last
:
True
models/match/dssm/data/train/sample_train.txt
0 → 100644
浏览文件 @
8a800d25
此差异已折叠。
点击以展开。
models/match/dssm/model.py
0 → 100644
浏览文件 @
8a800d25
# Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
import
math
import
paddle.fluid
as
fluid
from
fleetrec.core.utils
import
envs
from
fleetrec.core.model
import
Model
as
ModelBase
class
Model
(
ModelBase
):
def
__init__
(
self
,
config
):
ModelBase
.
__init__
(
self
,
config
)
def
input
(
self
):
TRIGRAM_D
=
envs
.
get_global_env
(
"hyper_parameters.TRIGRAM_D"
,
None
,
self
.
_namespace
)
Neg
=
envs
.
get_global_env
(
"hyper_parameters.NEG"
,
None
,
self
.
_namespace
)
self
.
query
=
fluid
.
data
(
name
=
"query"
,
shape
=
[
-
1
,
TRIGRAM_D
],
dtype
=
'float32'
,
lod_level
=
0
)
self
.
doc_pos
=
fluid
.
data
(
name
=
"doc_pos"
,
shape
=
[
-
1
,
TRIGRAM_D
],
dtype
=
'float32'
,
lod_level
=
0
)
self
.
doc_negs
=
[
fluid
.
data
(
name
=
"doc_neg_"
+
str
(
i
),
shape
=
[
-
1
,
TRIGRAM_D
],
dtype
=
"float32"
,
lod_level
=
0
)
for
i
in
range
(
Neg
)]
self
.
_data_var
.
append
(
self
.
query
)
self
.
_data_var
.
append
(
self
.
doc_pos
)
for
input
in
self
.
doc_negs
:
self
.
_data_var
.
append
(
input
)
if
self
.
_platform
!=
"LINUX"
:
self
.
_data_loader
=
fluid
.
io
.
DataLoader
.
from_generator
(
feed_list
=
self
.
_data_var
,
capacity
=
64
,
use_double_buffer
=
False
,
iterable
=
False
)
def
net
(
self
,
is_infer
=
False
):
hidden_layers
=
envs
.
get_global_env
(
"hyper_parameters.fc_sizes"
,
None
,
self
.
_namespace
)
hidden_acts
=
envs
.
get_global_env
(
"hyper_parameters.fc_acts"
,
None
,
self
.
_namespace
)
def
fc
(
data
,
hidden_layers
,
hidden_acts
,
names
):
fc_inputs
=
[
data
]
for
i
in
range
(
len
(
hidden_layers
)):
xavier
=
fluid
.
initializer
.
Xavier
(
uniform
=
True
,
fan_in
=
fc_inputs
[
-
1
].
shape
[
1
],
fan_out
=
hidden_layers
[
i
])
out
=
fluid
.
layers
.
fc
(
input
=
fc_inputs
[
-
1
],
size
=
hidden_layers
[
i
],
act
=
hidden_acts
[
i
],
param_attr
=
xavier
,
bias_attr
=
xavier
,
name
=
names
[
i
])
fc_inputs
.
append
(
out
)
return
fc_inputs
[
-
1
]
query_fc
=
fc
(
self
.
query
,
hidden_layers
,
hidden_acts
,
[
'query_l1'
,
'query_l2'
,
'query_l3'
])
doc_pos_fc
=
fc
(
self
.
doc_pos
,
hidden_layers
,
hidden_acts
,
[
'doc_pos_l1'
,
'doc_pos_l2'
,
'doc_pos_l3'
])
self
.
R_Q_D_p
=
fluid
.
layers
.
cos_sim
(
query_fc
,
doc_pos_fc
)
if
is_infer
:
return
R_Q_D_ns
=
[]
for
i
,
doc_neg
in
enumerate
(
self
.
doc_negs
):
doc_neg_fc_i
=
fc
(
doc_neg
,
hidden_layers
,
hidden_acts
,
[
'doc_neg_l1_'
+
str
(
i
),
'doc_neg_l2_'
+
str
(
i
),
'doc_neg_l3_'
+
str
(
i
)])
R_Q_D_ns
.
append
(
fluid
.
layers
.
cos_sim
(
query_fc
,
doc_neg_fc_i
))
concat_Rs
=
fluid
.
layers
.
concat
(
input
=
[
self
.
R_Q_D_p
]
+
R_Q_D_ns
,
axis
=-
1
)
prob
=
fluid
.
layers
.
softmax
(
concat_Rs
,
axis
=
1
)
hit_prob
=
fluid
.
layers
.
slice
(
prob
,
axes
=
[
0
,
1
],
starts
=
[
0
,
0
],
ends
=
[
4
,
1
])
loss
=
-
fluid
.
layers
.
reduce_sum
(
fluid
.
layers
.
log
(
hit_prob
))
self
.
avg_cost
=
fluid
.
layers
.
mean
(
x
=
loss
)
def
infer_results
(
self
):
self
.
_infer_results
[
'query_doc_sim'
]
=
self
.
R_Q_D_p
def
avg_loss
(
self
):
self
.
_cost
=
self
.
avg_cost
def
metrics
(
self
):
self
.
_metrics
[
"LOSS"
]
=
self
.
avg_cost
def
train_net
(
self
):
self
.
input
()
self
.
net
(
is_infer
=
False
)
self
.
avg_loss
()
self
.
metrics
()
def
optimizer
(
self
):
learning_rate
=
envs
.
get_global_env
(
"hyper_parameters.learning_rate"
,
None
,
self
.
_namespace
)
optimizer
=
fluid
.
optimizer
.
SGD
(
learning_rate
)
return
optimizer
def
infer_input
(
self
):
TRIGRAM_D
=
envs
.
get_global_env
(
"hyper_parameters.TRIGRAM_D"
,
None
,
self
.
_namespace
)
self
.
query
=
fluid
.
data
(
name
=
"query"
,
shape
=
[
-
1
,
TRIGRAM_D
],
dtype
=
'float32'
,
lod_level
=
0
)
self
.
doc_pos
=
fluid
.
data
(
name
=
"doc_pos"
,
shape
=
[
-
1
,
TRIGRAM_D
],
dtype
=
'float32'
,
lod_level
=
0
)
self
.
_infer_data_var
=
[
self
.
query
,
self
.
doc_pos
]
self
.
_infer_data_loader
=
fluid
.
io
.
DataLoader
.
from_generator
(
feed_list
=
self
.
_infer_data_var
,
capacity
=
64
,
use_double_buffer
=
False
,
iterable
=
False
)
def
infer_net
(
self
):
self
.
infer_input
()
self
.
net
(
is_infer
=
True
)
self
.
infer_results
()
models/match/dssm/synthetic_evaluate_reader.py
0 → 100644
浏览文件 @
8a800d25
# Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
from
__future__
import
print_function
from
fleetrec.core.reader
import
Reader
from
fleetrec.core.utils
import
envs
class
EvaluateReader
(
Reader
):
def
init
(
self
):
pass
def
generate_sample
(
self
,
line
):
"""
Read the data line by line and process it as a dictionary
"""
def
reader
():
"""
This function needs to be implemented by the user, based on data format
"""
features
=
line
.
rstrip
(
'
\n
'
).
split
(
'
\t
'
)
query
=
map
(
float
,
features
[
0
].
split
(
','
))
pos_doc
=
map
(
float
,
features
[
1
].
split
(
','
))
feature_names
=
[
'query'
,
'doc_pos'
]
yield
zip
(
feature_names
,
[
query
]
+
[
pos_doc
])
return
reader
models/match/dssm/synthetic_reader.py
0 → 100644
浏览文件 @
8a800d25
# Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
from
__future__
import
print_function
from
fleetrec.core.reader
import
Reader
from
fleetrec.core.utils
import
envs
class
TrainReader
(
Reader
):
def
init
(
self
):
pass
def
generate_sample
(
self
,
line
):
"""
Read the data line by line and process it as a dictionary
"""
def
reader
():
"""
This function needs to be implemented by the user, based on data format
"""
features
=
line
.
rstrip
(
'
\n
'
).
split
(
'
\t
'
)
query
=
map
(
float
,
features
[
0
].
split
(
','
))
pos_doc
=
map
(
float
,
features
[
1
].
split
(
','
))
feature_names
=
[
'query'
,
'doc_pos'
]
neg_docs
=
[]
for
i
in
range
(
len
(
features
)
-
2
):
feature_names
.
append
(
'doc_neg_'
+
str
(
i
))
neg_docs
.
append
(
map
(
float
,
features
[
i
+
2
].
split
(
','
)))
yield
zip
(
feature_names
,
[
query
]
+
[
pos_doc
]
+
neg_docs
)
return
reader
编辑
预览
Markdown
is supported
0%
请重试
或
添加新附件
.
添加附件
取消
You are about to add
0
people
to the discussion. Proceed with caution.
先完成此消息的编辑!
取消
想要评论请
注册
或
登录