Skip to content
体验新版
项目
组织
正在加载...
登录
切换导航
打开侧边栏
PaddlePaddle
PaddleHub
提交
3ceffb4a
P
PaddleHub
项目概览
PaddlePaddle
/
PaddleHub
1 年多 前同步成功
通知
283
Star
12117
Fork
2091
代码
文件
提交
分支
Tags
贡献者
分支图
Diff
Issue
200
列表
看板
标记
里程碑
合并请求
4
Wiki
0
Wiki
分析
仓库
DevOps
项目成员
Pages
P
PaddleHub
项目概览
项目概览
详情
发布
仓库
仓库
文件
提交
分支
标签
贡献者
分支图
比较
Issue
200
Issue
200
列表
看板
标记
里程碑
合并请求
4
合并请求
4
Pages
分析
分析
仓库分析
DevOps
Wiki
0
Wiki
成员
成员
收起侧边栏
关闭侧边栏
动态
分支图
创建新Issue
提交
Issue看板
未验证
提交
3ceffb4a
编写于
8月 11, 2020
作者:
S
Steffy-zxf
提交者:
GitHub
8月 11, 2020
浏览文件
操作
浏览文件
下载
电子邮件补丁
差异文件
Update matching task (#814)
上级
3c090720
变更
10
隐藏空白更改
内联
并排
Showing
10 changed file
with
153 addition
and
35 deletion
+153
-35
demo/pairwise_text_matching/ernie_pairwise_matching.py
demo/pairwise_text_matching/ernie_pairwise_matching.py
+3
-3
demo/pairwise_text_matching/ernie_pairwise_matching_predict.py
...pairwise_text_matching/ernie_pairwise_matching_predict.py
+4
-4
demo/pairwise_text_matching/run_embedding_pairwise_matching.sh
...pairwise_text_matching/run_embedding_pairwise_matching.sh
+2
-2
demo/pairwise_text_matching/run_embedding_pairwise_matching_predict.sh
..._text_matching/run_embedding_pairwise_matching_predict.sh
+1
-1
demo/pointwise_text_matching/embedding_pointwise_matching_predict.py
...ise_text_matching/embedding_pointwise_matching_predict.py
+4
-4
demo/pointwise_text_matching/ernie_pointwise_matching.py
demo/pointwise_text_matching/ernie_pointwise_matching.py
+2
-2
demo/pointwise_text_matching/ernie_pointwise_matching_predict.py
...intwise_text_matching/ernie_pointwise_matching_predict.py
+7
-7
demo/pointwise_text_matching/run_embedding_pointwise_matching.sh
...intwise_text_matching/run_embedding_pointwise_matching.sh
+1
-1
paddlehub/finetune/task/matching_task.py
paddlehub/finetune/task/matching_task.py
+128
-10
paddlehub/version.py
paddlehub/version.py
+1
-1
未找到文件。
demo/pairwise_text_matching/ernie_pairwise_matching.py
浏览文件 @
3ceffb4a
...
...
@@ -52,9 +52,9 @@ if __name__ == '__main__':
# Construct transfer learning network
# Use sequence-level output.
query
=
outputs
[
"
pooled
_output"
]
left
=
outputs
[
'
pooled
_output_2'
]
right
=
outputs
[
'
pooled
_output_3'
]
query
=
outputs
[
"
sequence
_output"
]
left
=
outputs
[
'
sequence
_output_2'
]
right
=
outputs
[
'
sequence
_output_3'
]
# Select fine-tune strategy
strategy
=
hub
.
AdamWeightDecayStrategy
(
...
...
demo/pairwise_text_matching/ernie_pairwise_matching_predict.py
浏览文件 @
3ceffb4a
...
...
@@ -48,9 +48,9 @@ if __name__ == '__main__':
# Construct transfer learning network.
# Use sequence-level output.
query
=
outputs
[
"
pooled
_output"
]
left
=
outputs
[
'
pooled
_output_2'
]
right
=
outputs
[
'
pooled
_output_3'
]
query
=
outputs
[
"
sequence
_output"
]
left
=
outputs
[
'
sequence
_output_2'
]
right
=
outputs
[
'
sequence
_output_3'
]
# Select fine-tune strategy.
strategy
=
hub
.
AdamWeightDecayStrategy
()
...
...
@@ -91,6 +91,6 @@ if __name__ == '__main__':
max_seq_len
=
args
.
max_seq_len
,
label_list
=
dataset
.
get_labels
(),
return_result
=
True
,
accelerate_mode
=
Fals
e
)
accelerate_mode
=
Tru
e
)
for
index
,
text
in
enumerate
(
text_pairs
):
print
(
"data: %s, prediction_label: %s"
%
(
text
,
results
[
index
]))
demo/pairwise_text_matching/run_embedding_pairwise_matching.sh
浏览文件 @
3ceffb4a
...
...
@@ -3,7 +3,7 @@ CKPT_DIR="./ckpt_embedding_pairwise_matching"
python
-u
embedding_pairwise_matching.py
\
--batch_size
=
128
\
--checkpoint_dir
=
${
CKPT_DIR
}
\
--learning_rate
=
5e-
2
\
--learning_rate
=
5e-
3
\
--max_seq_len
=
128
\
--num_epoch
=
3
00
\
--num_epoch
=
1
00
\
--network
=
bow
demo/pairwise_text_matching/run_embedding_pairwise_matching_predict.sh
浏览文件 @
3ceffb4a
...
...
@@ -4,4 +4,4 @@ python -u embedding_pairwise_matching_predict.py \
--batch_size
=
1
\
--checkpoint_dir
=
${
CKPT_DIR
}
\
--max_seq_len
=
128
\
--network
=
lstm
--network
=
bow
demo/pointwise_text_matching/embedding_pointwise_matching_predict.py
浏览文件 @
3ceffb4a
...
...
@@ -88,12 +88,12 @@ if __name__ == '__main__':
# Prediction data sample.
text_pairs
=
[
[
"
请问不是您的账户吗?
"
,
# query
"
您好,请问您使用的邮箱类型是?"
# title
"
淘宝上怎么用信用卡分期付款
"
,
# query
"
淘宝上怎么分期付款,没有信用卡"
,
# title
],
[
"
推荐个手机游戏
"
,
# query
"
手机游戏推荐"
# title
"
山楂干怎么吃好吃?
"
,
# query
"
山楂怎么做好吃"
,
# title
]
]
...
...
demo/pointwise_text_matching/ernie_pointwise_matching.py
浏览文件 @
3ceffb4a
...
...
@@ -52,8 +52,8 @@ if __name__ == '__main__':
# Construct transfer learning network
# Use token-level output.
query
=
outputs
[
"
pooled
_output"
]
left
=
outputs
[
'
pooled
_output_2'
]
query
=
outputs
[
"
sequence
_output"
]
left
=
outputs
[
'
sequence
_output_2'
]
# Select fine-tune strategy
strategy
=
hub
.
AdamWeightDecayStrategy
(
...
...
demo/pointwise_text_matching/ernie_pointwise_matching_predict.py
浏览文件 @
3ceffb4a
...
...
@@ -48,8 +48,8 @@ if __name__ == '__main__':
# Construct transfer learning network
# Use token-level output.
query
=
outputs
[
"
pooled
_output"
]
left
=
outputs
[
'
pooled
_output_2'
]
query
=
outputs
[
"
sequence
_output"
]
left
=
outputs
[
'
sequence
_output_2'
]
# Select fine-tune strategy
strategy
=
hub
.
AdamWeightDecayStrategy
()
...
...
@@ -73,12 +73,12 @@ if __name__ == '__main__':
# Prediction data sample.
text_pairs
=
[
[
"
小品《战狼故事》中,吴京突破重重障碍解救爱人,深情告白太感人;爱人
"
,
# query
"
外文名:愛人;摘要:爱人,意思是:情人。;义项描述:日本语词汇;语言:日文;中文名:爱人;标签:文化;
"
,
# title
"
淘宝上怎么用信用卡分期付款
"
,
# query
"
淘宝上怎么分期付款,没有信用卡
"
,
# title
],
[
"
儿子祝融被杀害,西天王大发雷霆,立即下令捉拿天庭三公主;儿子
"
,
# query
"
摘要:《儿子》是曹国昌1983年创作的木雕,收藏于中国美术馆。;材质::木雕;作者::曹国昌;中文名:儿子;创作年代::1983年;义项描述:曹国昌木雕;标签:文化;
"
,
# title
"
山楂干怎么吃好吃?
"
,
# query
"
山楂怎么做好吃
"
,
# title
]
]
...
...
@@ -88,6 +88,6 @@ if __name__ == '__main__':
max_seq_len
=
args
.
max_seq_len
,
label_list
=
dataset
.
get_labels
(),
return_result
=
True
,
accelerate_mode
=
Fals
e
)
accelerate_mode
=
Tru
e
)
for
index
,
text
in
enumerate
(
text_pairs
):
print
(
"data: %s, prediction_label: %s"
%
(
text
,
results
[
index
]))
demo/pointwise_text_matching/run_embedding_pointwise_matching.sh
浏览文件 @
3ceffb4a
...
...
@@ -5,5 +5,5 @@ python -u embedding_pointwise_matching.py \
--checkpoint_dir
=
${
CKPT_DIR
}
\
--learning_rate
=
5e-3
\
--max_seq_len
=
128
\
--num_epoch
=
3
00
\
--num_epoch
=
1
00
\
--network
=
bow
paddlehub/finetune/task/matching_task.py
浏览文件 @
3ceffb4a
...
...
@@ -106,32 +106,81 @@ class PairwiseTextMatchingTask(BaseTask):
%
self
.
network
)
query_feats
,
left_feats
=
net_func
(
query_unpad
,
left_unpad
)
query_feats
=
fluid
.
layers
.
fc
(
input
=
query_feats
,
size
=
300
,
param_attr
=
fluid
.
ParamAttr
(
name
=
"query_fc_w"
,
initializer
=
fluid
.
initializer
.
TruncatedNormal
(
scale
=
0.02
),
),
bias_attr
=
fluid
.
ParamAttr
(
name
=
"query_fc_b"
,
initializer
=
fluid
.
initializer
.
Constant
(
0.
),
),
act
=
"tanh"
)
left_feats
=
fluid
.
layers
.
fc
(
input
=
left_feats
,
size
=
300
,
param_attr
=
fluid
.
ParamAttr
(
name
=
"title_fc_w"
,
initializer
=
fluid
.
initializer
.
TruncatedNormal
(
scale
=
0.02
),
),
bias_attr
=
fluid
.
ParamAttr
(
name
=
"title_fc_b"
,
initializer
=
fluid
.
initializer
.
Constant
(
0.
),
),
act
=
"tanh"
)
left_concat
=
fluid
.
layers
.
concat
(
input
=
[
query_feats
,
left_feats
],
axis
=
1
)
query_feats
,
right_feats
=
net_func
(
query_unpad
,
right_unpad
)
_
,
right_feats
=
net_func
(
query_unpad
,
right_unpad
)
right_feats
=
fluid
.
layers
.
fc
(
input
=
right_feats
,
size
=
300
,
param_attr
=
fluid
.
ParamAttr
(
name
=
"title_fc_w"
,
initializer
=
fluid
.
initializer
.
TruncatedNormal
(
scale
=
0.02
),
),
bias_attr
=
fluid
.
ParamAttr
(
name
=
"title_fc_b"
,
initializer
=
fluid
.
initializer
.
Constant
(
0.
),
),
act
=
"tanh"
)
right_concat
=
fluid
.
layers
.
concat
(
input
=
[
query_feats
,
right_feats
],
axis
=
1
,
)
input
=
[
query_feats
,
right_feats
],
axis
=
1
)
else
:
query_feats
=
fluid
.
layers
.
dropout
(
x
=
self
.
query_feature
,
dropout_prob
=
0.1
,
dropout_implementation
=
"upscale_in_train"
)
query_mean
=
fluid
.
layers
.
reduce_mean
(
query_feats
,
dim
=
1
)
left_feats
=
fluid
.
layers
.
dropout
(
x
=
self
.
left_feature
,
dropout_prob
=
0.1
,
dropout_implementation
=
"upscale_in_train"
)
left_mean
=
fluid
.
layers
.
reduce_mean
(
left_feats
,
dim
=
1
)
left_sub
=
fluid
.
layers
.
elementwise_sub
(
query_mean
,
left_mean
)
left_sub
=
fluid
.
layers
.
abs
(
left_sub
)
left_concat
=
fluid
.
layers
.
concat
(
input
=
[
query_
feats
,
left_feats
],
axis
=-
1
)
input
=
[
query_
mean
,
left_mean
,
left_sub
],
axis
=-
1
)
right_feats
=
fluid
.
layers
.
dropout
(
x
=
self
.
right_feature
,
dropout_prob
=
0.1
,
dropout_implementation
=
"upscale_in_train"
)
right_mean
=
fluid
.
layers
.
reduce_mean
(
right_feats
,
dim
=
1
)
right_sub
=
fluid
.
layers
.
elementwise_sub
(
query_mean
,
right_mean
)
right_sub
=
fluid
.
layers
.
abs
(
right_sub
)
right_concat
=
fluid
.
layers
.
concat
(
input
=
[
query_
feats
,
right_feats
],
input
=
[
query_
mean
,
right_mean
,
right_sub
],
axis
=-
1
,
)
...
...
@@ -435,22 +484,91 @@ class PointwiseTextMatchingTask(BaseTask):
self
.
network
)
query_feats
,
title_feats
=
net_func
(
query_unpad
,
title_unpad
)
query_fc
=
fluid
.
layers
.
fc
(
input
=
query_feats
,
size
=
300
,
param_attr
=
fluid
.
ParamAttr
(
name
=
"query_fc_w"
,
initializer
=
fluid
.
initializer
.
TruncatedNormal
(
scale
=
0.02
),
),
bias_attr
=
fluid
.
ParamAttr
(
name
=
"query_fc_b"
,
initializer
=
fluid
.
initializer
.
Constant
(
0.
),
),
act
=
"tanh"
)
title_fc
=
fluid
.
layers
.
fc
(
input
=
title_feats
,
size
=
300
,
param_attr
=
fluid
.
ParamAttr
(
name
=
"title_fc_w"
,
initializer
=
fluid
.
initializer
.
TruncatedNormal
(
scale
=
0.02
),
),
bias_attr
=
fluid
.
ParamAttr
(
name
=
"title_fc_b"
,
initializer
=
fluid
.
initializer
.
Constant
(
0.
),
),
act
=
"tanh"
)
title_concat
=
fluid
.
layers
.
concat
(
input
=
[
query_feats
,
title_feats
],
axis
=
1
)
input
=
[
query_fc
,
title_fc
],
axis
=
1
)
fc1
=
fluid
.
layers
.
fc
(
input
=
title_concat
,
size
=
256
,
param_attr
=
fluid
.
ParamAttr
(
name
=
"matching_fc1_w"
,
initializer
=
fluid
.
initializer
.
TruncatedNormal
(
scale
=
0.02
),
),
bias_attr
=
fluid
.
ParamAttr
(
name
=
"matching_fc1_b"
,
initializer
=
fluid
.
initializer
.
Constant
(
0.
),
),
act
=
"tanh"
)
fc2
=
fluid
.
layers
.
fc
(
input
=
fc1
,
size
=
128
,
param_attr
=
fluid
.
ParamAttr
(
name
=
"matching_fc2_w"
,
initializer
=
fluid
.
initializer
.
TruncatedNormal
(
scale
=
0.02
),
),
bias_attr
=
fluid
.
ParamAttr
(
name
=
"matching_fc2_b"
,
initializer
=
fluid
.
initializer
.
Constant
(
0.
),
),
act
=
"tanh"
)
projection
=
fluid
.
layers
.
fc
(
input
=
fc2
,
size
=
96
,
param_attr
=
fluid
.
ParamAttr
(
name
=
"matching_fc3_w"
,
initializer
=
fluid
.
initializer
.
TruncatedNormal
(
scale
=
0.02
),
),
bias_attr
=
fluid
.
ParamAttr
(
name
=
"matching_fc3_b"
,
initializer
=
fluid
.
initializer
.
Constant
(
0.
),
),
act
=
"tanh"
)
else
:
query_feats
=
fluid
.
layers
.
dropout
(
x
=
self
.
query_feature
,
dropout_prob
=
0.1
,
dropout_implementation
=
"upscale_in_train"
)
query_mean
=
fluid
.
layers
.
reduce_mean
(
query_feats
,
dim
=
1
)
title_feats
=
fluid
.
layers
.
dropout
(
x
=
self
.
title_feature
,
dropout_prob
=
0.1
,
dropout_implementation
=
"upscale_in_train"
)
title_concat
=
fluid
.
layers
.
concat
(
input
=
[
query_feats
,
title_feats
],
axis
=-
1
)
title_mean
=
fluid
.
layers
.
reduce_mean
(
title_feats
,
dim
=
1
)
sub
=
fluid
.
layers
.
elementwise_sub
(
query_mean
,
title_mean
)
sub
=
fluid
.
layers
.
abs
(
sub
)
projection
=
fluid
.
layers
.
concat
(
input
=
[
query_mean
,
title_mean
,
sub
],
axis
=-
1
)
score
=
fluid
.
layers
.
fc
(
input
=
title_concat
,
input
=
projection
,
size
=
2
,
param_attr
=
fluid
.
ParamAttr
(
name
=
"matching_out_w"
,
...
...
paddlehub/version.py
浏览文件 @
3ceffb4a
...
...
@@ -13,5 +13,5 @@
# See the License for the specific language governing permissions and
# limitations under the License.
""" PaddleHub version string """
hub_version
=
"1.8.
0
"
hub_version
=
"1.8.
1
"
module_proto_version
=
"1.0.0"
编辑
预览
Markdown
is supported
0%
请重试
或
添加新附件
.
添加附件
取消
You are about to add
0
people
to the discussion. Proceed with caution.
先完成此消息的编辑!
取消
想要评论请
注册
或
登录