Skip to content
体验新版
项目
组织
正在加载...
登录
切换导航
打开侧边栏
PaddlePaddle
models
提交
9511ee38
M
models
项目概览
PaddlePaddle
/
models
大约 1 年 前同步成功
通知
222
Star
6828
Fork
2962
代码
文件
提交
分支
Tags
贡献者
分支图
Diff
Issue
602
列表
看板
标记
里程碑
合并请求
255
Wiki
0
Wiki
分析
仓库
DevOps
项目成员
Pages
M
models
项目概览
项目概览
详情
发布
仓库
仓库
文件
提交
分支
标签
贡献者
分支图
比较
Issue
602
Issue
602
列表
看板
标记
里程碑
合并请求
255
合并请求
255
Pages
分析
分析
仓库分析
DevOps
Wiki
0
Wiki
成员
成员
收起侧边栏
关闭侧边栏
动态
分支图
创建新Issue
提交
Issue看板
提交
9511ee38
编写于
10月 18, 2018
作者:
M
mapingshuo
浏览文件
操作
浏览文件
下载
电子邮件补丁
差异文件
for Python3 compatible
上级
a53f41c2
变更
13
隐藏空白更改
内联
并排
Showing
13 changed file
with
38 addition
and
47 deletion
+38
-47
fluid/text_matching_on_quora/configs/__init__.py
fluid/text_matching_on_quora/configs/__init__.py
+5
-5
fluid/text_matching_on_quora/configs/cdssm.py
fluid/text_matching_on_quora/configs/cdssm.py
+1
-1
fluid/text_matching_on_quora/configs/dec_att.py
fluid/text_matching_on_quora/configs/dec_att.py
+1
-1
fluid/text_matching_on_quora/configs/infer_sent.py
fluid/text_matching_on_quora/configs/infer_sent.py
+1
-1
fluid/text_matching_on_quora/configs/sse.py
fluid/text_matching_on_quora/configs/sse.py
+1
-1
fluid/text_matching_on_quora/models/__init__.py
fluid/text_matching_on_quora/models/__init__.py
+4
-4
fluid/text_matching_on_quora/models/infer_sent.py
fluid/text_matching_on_quora/models/infer_sent.py
+3
-2
fluid/text_matching_on_quora/models/match_layers.py
fluid/text_matching_on_quora/models/match_layers.py
+0
-16
fluid/text_matching_on_quora/models/sse.py
fluid/text_matching_on_quora/models/sse.py
+3
-2
fluid/text_matching_on_quora/pretrained_word2vec.py
fluid/text_matching_on_quora/pretrained_word2vec.py
+13
-8
fluid/text_matching_on_quora/quora_question_pairs.py
fluid/text_matching_on_quora/quora_question_pairs.py
+4
-3
fluid/text_matching_on_quora/train_and_evaluate.py
fluid/text_matching_on_quora/train_and_evaluate.py
+1
-2
fluid/text_matching_on_quora/utils.py
fluid/text_matching_on_quora/utils.py
+1
-1
未找到文件。
fluid/text_matching_on_quora/configs/__init__.py
浏览文件 @
9511ee38
from
cdssm
import
cdssm_base
from
dec_att
import
decatt_glove
from
sse
import
sse_base
from
infer_sent
import
infer_sent_v1
from
infer_sent
import
infer_sent_v2
from
.
cdssm
import
cdssm_base
from
.
dec_att
import
decatt_glove
from
.
sse
import
sse_base
from
.
infer_sent
import
infer_sent_v1
from
.
infer_sent
import
infer_sent_v2
fluid/text_matching_on_quora/configs/cdssm.py
浏览文件 @
9511ee38
import
basic_config
from
.
import
basic_config
def
cdssm_base
():
"""
...
...
fluid/text_matching_on_quora/configs/dec_att.py
浏览文件 @
9511ee38
import
basic_config
from
.
import
basic_config
def
decatt_glove
():
"""
...
...
fluid/text_matching_on_quora/configs/infer_sent.py
浏览文件 @
9511ee38
import
basic_config
from
.
import
basic_config
def
infer_sent_v1
():
"""
...
...
fluid/text_matching_on_quora/configs/sse.py
浏览文件 @
9511ee38
import
basic_config
from
.
import
basic_config
def
sse_base
():
"""
...
...
fluid/text_matching_on_quora/models/__init__.py
浏览文件 @
9511ee38
from
cdssm
import
cdssmNet
from
dec_att
import
DecAttNet
from
sse
import
SSENet
from
infer_sent
import
InferSentNet
from
.
cdssm
import
cdssmNet
from
.
dec_att
import
DecAttNet
from
.
sse
import
SSENet
from
.
infer_sent
import
InferSentNet
fluid/text_matching_on_quora/models/infer_sent.py
浏览文件 @
9511ee38
import
paddle.fluid
as
fluid
from
my_layers
import
bi_lstm_layer
from
match_layers
import
ElementwiseMatching
from
.
my_layers
import
bi_lstm_layer
from
.
match_layers
import
ElementwiseMatching
class
InferSentNet
():
"""
...
...
fluid/text_matching_on_quora/models/match_layers.py
浏览文件 @
9511ee38
...
...
@@ -3,7 +3,6 @@ This Module provide different kinds of Match layers
"""
import
paddle.fluid
as
fluid
import
paddle.v2
as
paddle
def
MultiPerspectiveMatching
(
vec1
,
vec2
,
perspective_num
):
...
...
@@ -44,18 +43,3 @@ def ElementwiseMatching(vec1, vec2):
return
fluid
.
layers
.
concat
(
input
=
[
vec1
,
vec2
,
elementwise_mul
,
elementwise_abs_sub
],
axis
=
1
)
def
MultiPerspectiveFullMatching
(
seq1
,
seq2
,
perspective_num
):
"""
seq1: Lod tensor with shape [-1, feature_dim] (lod level == 1) is a representation of a sentence.
seq2: Another Lod tensor with shape [-1, feature_dim] (lod level == 1) is a representation of a sentence.
use seq1 to match seq2
return match seq with same shape as seq1.
"""
print
seq2
seq2_last
=
fluid
.
layers
.
sequence_pool
(
input
=
seq2
,
pool_type
=
"last"
)
print
seq2_last
seq2
=
fluid
.
layers
.
sequence_expand
(
seq2_last
,
seq1
)
print
seq2
#seq2 = fluid.layers.lod_reset(x=seq2, y=seq1)
seq2
.
set_lod
(
seq1
)
print
seq2
fluid/text_matching_on_quora/models/sse.py
浏览文件 @
9511ee38
import
paddle.fluid
as
fluid
from
my_layers
import
bi_lstm_layer
from
match_layers
import
ElementwiseMatching
from
.
my_layers
import
bi_lstm_layer
from
.
match_layers
import
ElementwiseMatching
class
SSENet
():
"""
...
...
fluid/text_matching_on_quora/pretrained_word2vec.py
浏览文件 @
9511ee38
...
...
@@ -2,27 +2,29 @@
This Module provide pretrained word-embeddings
"""
from
__future__
import
print_function
from
__future__
import
print_function
,
unicode_literals
import
numpy
as
np
import
time
,
datetime
import
os
,
sys
def
Glove840B_300D
(
filepath
,
keys
=
None
):
"""
input: the "glove.840B.300d.txt" file path
return: a dict, key: word (unicode), value: a numpy array with shape [300]
"""
if
keys
is
not
None
:
if
keys
is
not
None
:
assert
(
isinstance
(
keys
,
set
))
print
(
"loading word2vec from "
,
filepath
)
print
(
"please wait for a minute."
)
start
=
time
.
time
()
word2vec
=
{}
with
open
(
filepath
,
"r"
)
as
f
:
for
line
in
f
:
info
=
line
.
strip
().
split
()
# TODO: test python3
word
=
info
[
0
].
decode
(
'utf-8'
)
if
sys
.
version_info
<=
(
3
,
0
):
# for python2
line
=
line
.
decode
(
'utf-8'
)
info
=
line
.
strip
(
"
\n
"
).
split
(
" "
)
word
=
info
[
0
]
if
(
keys
is
not
None
)
and
(
word
not
in
keys
):
continue
vector
=
info
[
1
:]
...
...
@@ -32,6 +34,9 @@ def Glove840B_300D(filepath, keys=None):
end
=
time
.
time
()
print
(
"Spent "
,
str
(
datetime
.
timedelta
(
seconds
=
end
-
start
)),
" on loading word2vec."
)
return
word2vec
if
__name__
==
'__main__'
:
embed_dict
=
Glove840B_300D
(
"data/glove.840B.300d.txt"
)
from
os.path
import
expanduser
home
=
expanduser
(
"~"
)
embed_dict
=
Glove840B_300D
(
os
.
path
.
join
(
home
,
"./.cache/paddle/dataset/glove.840B.300d.txt"
))
exit
(
0
)
fluid/text_matching_on_quora/quora_question_pairs.py
浏览文件 @
9511ee38
...
...
@@ -20,7 +20,7 @@ import tarfile
import
re
import
string
import
random
import
os
import
os
,
sys
import
nltk
from
os.path
import
expanduser
...
...
@@ -43,7 +43,8 @@ COLUMN_COUNT = 4
def
tokenize
(
s
):
s
=
s
.
decode
(
'utf-8'
)
if
sys
.
version_info
<=
(
3
,
0
):
# for python2
s
=
s
.
decode
(
'utf-8'
)
if
TOKENIZE_METHOD
==
"nltk"
:
return
nltk
.
tokenize
.
word_tokenize
(
s
)
elif
TOKENIZE_METHOD
==
"punctuation"
:
...
...
@@ -116,7 +117,7 @@ def build_dict(file_name, cutoff):
dictionary
=
sorted
(
word_freq
,
key
=
lambda
x
:
(
-
x
[
1
],
x
[
0
]))
words
,
_
=
list
(
zip
(
*
dictionary
))
word_idx
=
dict
(
zip
(
words
,
x
range
(
len
(
words
))))
word_idx
=
dict
(
zip
(
words
,
range
(
len
(
words
))))
word_idx
[
'<unk>'
]
=
len
(
words
)
word_idx
[
'<pad>'
]
=
len
(
words
)
+
1
return
word_idx
...
...
fluid/text_matching_on_quora/train_and_evaluate.py
浏览文件 @
9511ee38
...
...
@@ -9,7 +9,6 @@ import contextlib
import
numpy
as
np
import
paddle.fluid
as
fluid
import
paddle.v2
as
paddle
import
utils
,
metric
,
configs
import
models
...
...
@@ -155,7 +154,7 @@ def train_and_evaluate(train_reader,
# start training
print
(
"[%s] Start Training"
%
time
.
asctime
(
time
.
localtime
(
time
.
time
())))
for
epoch_id
in
x
range
(
global_config
.
epoch_num
):
for
epoch_id
in
range
(
global_config
.
epoch_num
):
data_size
,
data_count
,
total_acc
,
total_cost
=
0
,
0
,
0.0
,
0.0
batch_id
=
0
for
data
in
train_reader
():
...
...
fluid/text_matching_on_quora/utils.py
浏览文件 @
9511ee38
...
...
@@ -7,7 +7,7 @@ import time
import
numpy
as
np
import
paddle.fluid
as
fluid
import
paddle
.v2
as
paddle
import
paddle
import
quora_question_pairs
...
...
编辑
预览
Markdown
is supported
0%
请重试
或
添加新附件
.
添加附件
取消
You are about to add
0
people
to the discussion. Proceed with caution.
先完成此消息的编辑!
取消
想要评论请
注册
或
登录