Skip to content
体验新版
项目
组织
正在加载...
登录
切换导航
打开侧边栏
PaddlePaddle
models
提交
e19f4bc7
M
models
项目概览
PaddlePaddle
/
models
大约 1 年 前同步成功
通知
222
Star
6828
Fork
2962
代码
文件
提交
分支
Tags
贡献者
分支图
Diff
Issue
602
列表
看板
标记
里程碑
合并请求
255
Wiki
0
Wiki
分析
仓库
DevOps
项目成员
Pages
M
models
项目概览
项目概览
详情
发布
仓库
仓库
文件
提交
分支
标签
贡献者
分支图
比较
Issue
602
Issue
602
列表
看板
标记
里程碑
合并请求
255
合并请求
255
Pages
分析
分析
仓库分析
DevOps
Wiki
0
Wiki
成员
成员
收起侧边栏
关闭侧边栏
动态
分支图
创建新Issue
提交
Issue看板
提交
e19f4bc7
编写于
10月 08, 2019
作者:
D
Dilyar
提交者:
Yibing Liu
10月 08, 2019
浏览文件
操作
浏览文件
下载
电子邮件补丁
差异文件
Fix some problems of simnet (#3433)
* update * update * Update README.md * Update run.sh
上级
107d4e79
变更
11
显示空白变更内容
内联
并排
Showing
11 changed file
with
426 addition
and
333 deletion
+426
-333
PaddleNLP/models/matching/bow.py
PaddleNLP/models/matching/bow.py
+2
-2
PaddleNLP/models/matching/cnn.py
PaddleNLP/models/matching/cnn.py
+9
-9
PaddleNLP/models/matching/gru.py
PaddleNLP/models/matching/gru.py
+7
-7
PaddleNLP/models/matching/lstm.py
PaddleNLP/models/matching/lstm.py
+7
-7
PaddleNLP/similarity_net/README.md
PaddleNLP/similarity_net/README.md
+16
-18
PaddleNLP/similarity_net/config.py
PaddleNLP/similarity_net/config.py
+2
-4
PaddleNLP/similarity_net/download_data.sh
PaddleNLP/similarity_net/download_data.sh
+5
-0
PaddleNLP/similarity_net/download_pretrained_model.sh
PaddleNLP/similarity_net/download_pretrained_model.sh
+2
-8
PaddleNLP/similarity_net/run.sh
PaddleNLP/similarity_net/run.sh
+6
-5
PaddleNLP/similarity_net/run_classifier.py
PaddleNLP/similarity_net/run_classifier.py
+312
-264
PaddleNLP/similarity_net/utils.py
PaddleNLP/similarity_net/utils.py
+58
-9
未找到文件。
PaddleNLP/models/matching/bow.py
浏览文件 @
e19f4bc7
...
...
@@ -49,7 +49,7 @@ class BOW(object):
right_soft
=
softsign_layer
.
ops
(
right_pool
)
# matching layer
if
self
.
task_mode
==
"pairwise"
:
bow_layer
=
layers
.
FCLayer
(
self
.
bow_dim
,
"relu"
,
"fc"
)
bow_layer
=
layers
.
FCLayer
(
self
.
bow_dim
,
None
,
"fc"
)
left_bow
=
bow_layer
.
ops
(
left_soft
)
right_bow
=
bow_layer
.
ops
(
right_soft
)
cos_sim_layer
=
layers
.
CosSimLayer
()
...
...
@@ -58,7 +58,7 @@ class BOW(object):
else
:
concat_layer
=
layers
.
ConcatLayer
(
1
)
concat
=
concat_layer
.
ops
([
left_soft
,
right_soft
])
bow_layer
=
layers
.
FCLayer
(
self
.
bow_dim
,
"relu"
,
"fc"
)
bow_layer
=
layers
.
FCLayer
(
self
.
bow_dim
,
None
,
"fc"
)
concat_fc
=
bow_layer
.
ops
(
concat
)
softmax_layer
=
layers
.
FCLayer
(
2
,
"softmax"
,
"cos_sim"
)
pred
=
softmax_layer
.
ops
(
concat_fc
)
...
...
PaddleNLP/models/matching/cnn.py
浏览文件 @
e19f4bc7
...
...
@@ -43,23 +43,23 @@ class CNN(object):
left_emb
=
emb_layer
.
ops
(
left
)
right_emb
=
emb_layer
.
ops
(
right
)
# Presentation context
cnn_layer
=
layers
.
SequenceConvPoolLayer
(
self
.
filter_size
,
self
.
num_filters
,
"conv"
)
cnn_layer
=
layers
.
SequenceConvPoolLayer
(
self
.
filter_size
,
self
.
num_filters
,
"conv"
)
left_cnn
=
cnn_layer
.
ops
(
left_emb
)
right_cnn
=
cnn_layer
.
ops
(
right_emb
)
# matching layer
if
self
.
task_mode
==
"pairwise"
:
relu_layer
=
layers
.
FCLayer
(
self
.
hidden_dim
,
"relu"
,
"relu
"
)
left_
relu
=
relu
_layer
.
ops
(
left_cnn
)
right_
relu
=
relu
_layer
.
ops
(
right_cnn
)
fc_layer
=
layers
.
FCLayer
(
self
.
hidden_dim
,
None
,
"fc
"
)
left_
fc
=
fc
_layer
.
ops
(
left_cnn
)
right_
fc
=
fc
_layer
.
ops
(
right_cnn
)
cos_sim_layer
=
layers
.
CosSimLayer
()
pred
=
cos_sim_layer
.
ops
(
left_
relu
,
right_relu
)
return
left_
relu
,
pred
pred
=
cos_sim_layer
.
ops
(
left_
fc
,
right_fc
)
return
left_
fc
,
pred
else
:
concat_layer
=
layers
.
ConcatLayer
(
1
)
concat
=
concat_layer
.
ops
([
left_cnn
,
right_cnn
])
relu_layer
=
layers
.
FCLayer
(
self
.
hidden_dim
,
"relu"
,
"relu
"
)
concat_fc
=
relu
_layer
.
ops
(
concat
)
fc_layer
=
layers
.
FCLayer
(
self
.
hidden_dim
,
None
,
"fc
"
)
concat_fc
=
fc
_layer
.
ops
(
concat
)
softmax_layer
=
layers
.
FCLayer
(
2
,
"softmax"
,
"cos_sim"
)
pred
=
softmax_layer
.
ops
(
concat_fc
)
return
left_cnn
,
pred
PaddleNLP/models/matching/gru.py
浏览文件 @
e19f4bc7
...
...
@@ -50,17 +50,17 @@ class GRU(object):
right_last
=
last_layer
.
ops
(
right_gru
)
# matching layer
if
self
.
task_mode
==
"pairwise"
:
relu_layer
=
layers
.
FCLayer
(
self
.
hidden_dim
,
"relu"
,
"relu
"
)
left_
relu
=
relu
_layer
.
ops
(
left_last
)
right_
relu
=
relu
_layer
.
ops
(
right_last
)
fc_layer
=
layers
.
FCLayer
(
self
.
hidden_dim
,
None
,
"fc
"
)
left_
fc
=
fc
_layer
.
ops
(
left_last
)
right_
fc
=
fc
_layer
.
ops
(
right_last
)
cos_sim_layer
=
layers
.
CosSimLayer
()
pred
=
cos_sim_layer
.
ops
(
left_
relu
,
right_relu
)
return
left_
relu
,
pred
pred
=
cos_sim_layer
.
ops
(
left_
fc
,
right_fc
)
return
left_
fc
,
pred
else
:
concat_layer
=
layers
.
ConcatLayer
(
1
)
concat
=
concat_layer
.
ops
([
left_last
,
right_last
])
relu_layer
=
layers
.
FCLayer
(
self
.
hidden_dim
,
"relu"
,
"relu
"
)
concat_fc
=
relu
_layer
.
ops
(
concat
)
fc_layer
=
layers
.
FCLayer
(
self
.
hidden_dim
,
None
,
"fc
"
)
concat_fc
=
fc
_layer
.
ops
(
concat
)
softmax_layer
=
layers
.
FCLayer
(
2
,
"softmax"
,
"cos_sim"
)
pred
=
softmax_layer
.
ops
(
concat_fc
)
return
left_last
,
pred
PaddleNLP/models/matching/lstm.py
浏览文件 @
e19f4bc7
...
...
@@ -49,17 +49,17 @@ class LSTM(object):
right_last
=
last_layer
.
ops
(
right_lstm
)
# matching layer
if
self
.
task_mode
==
"pairwise"
:
relu_layer
=
layers
.
FCLayer
(
self
.
hidden_dim
,
"relu"
,
"relu
"
)
left_
relu
=
relu
_layer
.
ops
(
left_last
)
right_
relu
=
relu
_layer
.
ops
(
right_last
)
fc_layer
=
layers
.
FCLayer
(
self
.
hidden_dim
,
None
,
"fc
"
)
left_
fc
=
fc
_layer
.
ops
(
left_last
)
right_
fc
=
fc
_layer
.
ops
(
right_last
)
cos_sim_layer
=
layers
.
CosSimLayer
()
pred
=
cos_sim_layer
.
ops
(
left_
relu
,
right_relu
)
return
left_
relu
,
pred
pred
=
cos_sim_layer
.
ops
(
left_
fc
,
right_fc
)
return
left_
fc
,
pred
else
:
concat_layer
=
layers
.
ConcatLayer
(
1
)
concat
=
concat_layer
.
ops
([
left_last
,
right_last
])
relu_layer
=
layers
.
FCLayer
(
self
.
hidden_dim
,
"relu"
,
"relu
"
)
concat_fc
=
relu
_layer
.
ops
(
concat
)
fc_layer
=
layers
.
FCLayer
(
self
.
hidden_dim
,
None
,
"fc
"
)
concat_fc
=
fc
_layer
.
ops
(
concat
)
softmax_layer
=
layers
.
FCLayer
(
2
,
"softmax"
,
"cos_sim"
)
pred
=
softmax_layer
.
ops
(
concat_fc
)
return
left_last
,
pred
PaddleNLP/similarity_net/README.md
浏览文件 @
e19f4bc7
...
...
@@ -6,10 +6,17 @@
基于百度海量搜索数据,我们训练了一个SimNet-BOW-Pairwise语义匹配模型,在一些真实的FAQ问答场景中,该模型效果比基于字面的相似度方法AUC提升5%以上,我们基于百度自建测试集(包含聊天、客服等数据集)和语义匹配数据集(LCQMC)进行评测,效果如下表所示。LCQMC数据集以Accuracy为评测指标,而pairwise模型的输出为相似度,因此我们采用0.958作为分类阈值,相比于基线模型中网络结构同等复杂的CBOW模型(准确率为0.737),我们模型的准确率为0.7532。
| 模型 | 百度知道 | ECOM |QQSIM | UNICOM | LCQMC |
|:-----------:|:-------------:|:-------------:|:-------------:|:-------------:|:-------------:|
| | AUC | AUC | AUC|正逆序比|Accuracy|
|BOW_Pairwise|0.6767|0.7329|0.7650|1.5630|0.7532|
| 模型 | 百度知道 | ECOM |QQSIM | UNICOM |
|:-----------:|:-------------:|:-------------:|:-------------:|:-------------:|
| | AUC | AUC | AUC|正逆序比|
|BOW_Pairwise|0.6767|0.7329|0.7650|1.5630|
#### 测试集说明
| 数据集 | 来源 | 垂类 |
|:-----------:|:-------------:|:-------------:|
|百度知道 | 百度知道问题 | 日常 |
|ECOM|商业问句|金融|
|QQSIM|闲聊对话|日常|
|UNICOM|联通客服|客服|
## 快速开始
#### 版本依赖
本项目依赖于 Paddlepaddle Fluid 1.3.1,请参考
[
安装指南
](
http://www.paddlepaddle.org/#quick-start
)
进行安装。
...
...
@@ -24,24 +31,14 @@ cd models/PaddleNLP/similarity_net
#### 数据准备
下载经过预处理的数据,运行命令后,data目录下会存在训练集数据示例、集数据示例、测试集数据示例,以及对应词索引字典(term2id.dict)。
```
shell
wget
--no-check-certificate
https://baidu-nlp.bj.bcebos.com/simnet_dataset-1.0.0.tar.gz
tar
xzf simnet_dataset-1.0.0.tar.gz
sh download_data.sh
```
#### 模型准备
我们开源了基于大规模数据训练好的
```pairwise```
模型(基于bow模型训练),我们提供两种下载方式,模型保在
```./model_files/simnet_bow_pairwise_pretrained_model/```
下。
##### 方式一:基于PaddleHub命令行工具(PaddleHub[安装方式](https://github.com/PaddlePaddle/PaddleHub))
```
shell
mkdir
model_files
hub download simnet_bow_pairwise
--output_path
./
tar
xzf simnet_bow-pairwise-1.0.0.tar.gz
-C
./model_files
```
##### 方式二:直接下载
我们开源了基于大规模数据训练好的
```pairwise```
模型(基于bow模型训练),用户可以通过运行命令下载预训练好的模型,该模型将保存在
```./model_files/simnet_bow_pairwise_pretrained_model/```
下。
```
shell
mkdir
model_files
wget
--no-check-certificate
https://baidu-nlp.bj.bcebos.com/simnet_bow-pairwise-1.0.0.tar.gz
tar
xzf simnet_bow-pairwise-1.0.0.tar.gz
-C
./model_files
sh download_pretrained_model.sh
```
#### 评估
我们公开了自建的测试集,包括百度知道、ECOM、QQSIM、UNICOM四个数据集,基于上面的预训练模型,用户可以进入evaluate目录下依次执行下列命令获取测试集评估结果。
```
shell
...
...
@@ -162,6 +159,7 @@ python run_classifier.py \
--task_mode ${TASK_MODE} #训练模式,pairwise或pointwise,与相应的配置文件匹配。
--compute_accuracy False
\
#是否计算accuracy
--lamda 0.91
\
#pairwise模式计算accuracy时的阈值
--init_checkpoint "" #预加载模型路径
```
### 如何组建自己的模型
用户可以根据自己的需求,组建自定义的模型,具体方法如下所示:
...
...
PaddleNLP/similarity_net/config.py
浏览文件 @
e19f4bc7
...
...
@@ -34,14 +34,12 @@ class SimNetConfig(object):
with
open
(
config_path
)
as
json_file
:
config_dict
=
json
.
load
(
json_file
)
except
Exception
:
raise
IOError
(
"Error in parsing simnet model config file '%s'"
%
config_path
)
raise
IOError
(
"Error in parsing simnet model config file '%s'"
%
config_path
)
else
:
if
config_dict
[
"task_mode"
]
!=
self
.
task_mode
:
raise
ValueError
(
"the config '{}' does not match the task_mode '{}'"
.
format
(
self
.
config_path
,
self
.
task_mode
))
"the config '{}' does not match the task_mode '{}'"
.
format
(
self
.
config_path
,
self
.
task_mode
))
return
config_dict
def
__getitem__
(
self
,
key
):
...
...
PaddleNLP/similarity_net/download_data.sh
0 → 100644
浏览文件 @
e19f4bc7
#get data
wget
--no-check-certificate
https://baidu-nlp.bj.bcebos.com/simnet_dataset-1.0.0.tar.gz
tar
xzf simnet_dataset-1.0.0.tar.gz
rm
simnet_dataset-1.0.0.tar.gz
PaddleNLP/similarity_net/download.sh
→
PaddleNLP/similarity_net/download
_pretrained_model
.sh
浏览文件 @
e19f4bc7
...
...
@@ -8,9 +8,3 @@ if [ ! -d $model_files_path ]; then
fi
tar
xzf simnet_bow-pairwise-1.0.0.tar.gz
-C
$model_files_path
rm
simnet_bow-pairwise-1.0.0.tar.gz
\ No newline at end of file
#get data
wget
--no-check-certificate
https://baidu-nlp.bj.bcebos.com/simnet_dataset-1.0.0.tar.gz
tar
xzf simnet_dataset-1.0.0.tar.gz
rm
simnet_dataset-1.0.0.tar.gz
PaddleNLP/similarity_net/run.sh
浏览文件 @
e19f4bc7
...
...
@@ -21,7 +21,7 @@ INIT_CHECKPOINT=./model_files/simnet_bow_pairwise_pretrained_model/
train
()
{
python run_classifier.py
\
--task_name
${
TASK_NAME
}
\
--use_cuda
f
alse
\
--use_cuda
F
alse
\
--do_train
True
\
--do_valid
True
\
--do_test
True
\
...
...
@@ -34,12 +34,13 @@ train() {
--output_dir
${
CKPT_PATH
}
\
--config_path
${
CONFIG_PATH
}
\
--vocab_path
${
VOCAB_PATH
}
\
--epoch
1
0
\
--save_steps
1
000
\
--validation_steps
1
00
\
--epoch
4
0
\
--save_steps
2
000
\
--validation_steps
2
00
\
--compute_accuracy
False
\
--lamda
0.958
\
--task_mode
${
TASK_MODE
}
--task_mode
${
TASK_MODE
}
\
--init_checkpoint
""
}
#run_evaluate
evaluate
()
{
...
...
PaddleNLP/similarity_net/run_classifier.py
浏览文件 @
e19f4bc7
...
...
@@ -25,75 +25,66 @@ import argparse
import
multiprocessing
import
sys
defaultencoding
=
'utf-8'
if
sys
.
getdefaultencoding
()
!=
defaultencoding
:
reload
(
sys
)
sys
.
setdefaultencoding
(
defaultencoding
)
sys
.
path
.
append
(
".."
)
import
paddle
import
paddle.fluid
as
fluid
import
numpy
as
np
import
codecs
import
config
import
utils
import
reader
import
models.matching.paddle_layers
as
layers
import
codecs
from
utils
import
ArgConfig
import
logging
parser
=
argparse
.
ArgumentParser
(
__doc__
)
model_g
=
utils
.
ArgumentGroup
(
parser
,
"model"
,
"model configuration and paths."
)
model_g
.
add_arg
(
"config_path"
,
str
,
None
,
"Path to the json file for EmoTect model config."
)
model_g
.
add_arg
(
"init_checkpoint"
,
str
,
None
,
"Init checkpoint to resume training from."
)
model_g
.
add_arg
(
"output_dir"
,
str
,
None
,
"Directory path to save checkpoints"
)
model_g
.
add_arg
(
"task_mode"
,
str
,
None
,
"task mode: pairwise or pointwise"
)
train_g
=
utils
.
ArgumentGroup
(
parser
,
"training"
,
"training options."
)
train_g
.
add_arg
(
"epoch"
,
int
,
10
,
"Number of epoches for training."
)
train_g
.
add_arg
(
"save_steps"
,
int
,
200
,
"The steps interval to save checkpoints."
)
train_g
.
add_arg
(
"validation_steps"
,
int
,
100
,
"The steps interval to evaluate model performance."
)
log_g
=
utils
.
ArgumentGroup
(
parser
,
"logging"
,
"logging related"
)
log_g
.
add_arg
(
"skip_steps"
,
int
,
10
,
"The steps interval to print loss."
)
log_g
.
add_arg
(
"verbose_result"
,
bool
,
True
,
"Whether to output verbose result."
)
log_g
.
add_arg
(
"test_result_path"
,
str
,
"test_result"
,
"Directory path to test result."
)
log_g
.
add_arg
(
"infer_result_path"
,
str
,
"infer_result"
,
"Directory path to infer result."
)
data_g
=
utils
.
ArgumentGroup
(
parser
,
"data"
,
"Data paths, vocab paths and data processing options"
)
data_g
.
add_arg
(
"train_data_dir"
,
str
,
None
,
"Directory path to training data."
)
data_g
.
add_arg
(
"valid_data_dir"
,
str
,
None
,
"Directory path to valid data."
)
data_g
.
add_arg
(
"test_data_dir"
,
str
,
None
,
"Directory path to testing data."
)
data_g
.
add_arg
(
"infer_data_dir"
,
str
,
None
,
"Directory path to infer data."
)
data_g
.
add_arg
(
"vocab_path"
,
str
,
None
,
"Vocabulary path."
)
data_g
.
add_arg
(
"batch_size"
,
int
,
32
,
"Total examples' number in batch for training."
)
run_type_g
=
utils
.
ArgumentGroup
(
parser
,
"run_type"
,
"running type options."
)
run_type_g
.
add_arg
(
"use_cuda"
,
bool
,
False
,
"If set, use GPU for training."
)
run_type_g
.
add_arg
(
"task_name"
,
str
,
None
,
"The name of task to perform sentiment classification."
)
run_type_g
.
add_arg
(
"do_train"
,
bool
,
False
,
"Whether to perform training."
)
run_type_g
.
add_arg
(
"do_valid"
,
bool
,
False
,
"Whether to perform dev."
)
run_type_g
.
add_arg
(
"do_test"
,
bool
,
False
,
"Whether to perform testing."
)
run_type_g
.
add_arg
(
"do_infer"
,
bool
,
False
,
"Whether to perform inference."
)
run_type_g
.
add_arg
(
"compute_accuracy"
,
bool
,
False
,
"Whether to compute accuracy."
)
run_type_g
.
add_arg
(
"lamda"
,
float
,
0.91
,
"When task_mode is pairwise, lamda is the threshold for calculating the accuracy."
)
parser
.
add_argument
(
'--enable_ce'
,
action
=
'store_true'
,
help
=
'If set, run the task with continuous evaluation logs.'
)
args
=
parser
.
parse_args
()
def
create_model
(
args
,
pyreader_name
,
is_inference
=
False
,
is_pointwise
=
False
):
"""
Create Model for simnet
"""
if
is_inference
:
inf_pyreader
=
fluid
.
layers
.
py_reader
(
capacity
=
16
,
shapes
=
([
-
1
,
1
],
[
-
1
,
1
]),
dtypes
=
(
'int64'
,
'int64'
),
lod_levels
=
(
1
,
1
),
name
=
pyreader_name
,
use_double_buffer
=
False
)
left
,
pos_right
=
fluid
.
layers
.
read_file
(
inf_pyreader
)
return
inf_pyreader
,
left
,
pos_right
else
:
if
is_pointwise
:
pointwise_pyreader
=
fluid
.
layers
.
py_reader
(
capacity
=
16
,
shapes
=
([
-
1
,
1
],
[
-
1
,
1
],
[
-
1
,
1
]),
dtypes
=
(
'int64'
,
'int64'
,
'int64'
),
lod_levels
=
(
1
,
1
,
0
),
name
=
pyreader_name
,
use_double_buffer
=
False
)
left
,
right
,
label
=
fluid
.
layers
.
read_file
(
pointwise_pyreader
)
return
pointwise_pyreader
,
left
,
right
,
label
else
:
pairwise_pyreader
=
fluid
.
layers
.
py_reader
(
capacity
=
16
,
shapes
=
([
-
1
,
1
],
[
-
1
,
1
],
[
-
1
,
1
]),
dtypes
=
(
'int64'
,
'int64'
,
'int64'
),
lod_levels
=
(
1
,
1
,
1
),
name
=
pyreader_name
,
use_double_buffer
=
False
)
left
,
pos_right
,
neg_right
=
fluid
.
layers
.
read_file
(
pairwise_pyreader
)
return
pairwise_pyreader
,
left
,
pos_right
,
neg_right
def
train
(
conf_dict
,
args
):
"""
...
...
@@ -129,85 +120,79 @@ def train(conf_dict, args):
place
=
fluid
.
CUDAPlace
(
0
)
else
:
place
=
fluid
.
CPUPlace
()
exe
=
fluid
.
Executor
(
place
)
startup_prog
=
fluid
.
Program
()
train_program
=
fluid
.
Program
()
simnet_process
=
reader
.
SimNetProcessor
(
args
,
vocab
)
if
args
.
task_mode
==
"pairwise"
:
# Build network
left
=
data
.
ops
(
name
=
"left"
,
shape
=
[
1
],
dtype
=
"int64"
,
lod_level
=
1
)
pos_right
=
data
.
ops
(
name
=
"right"
,
shape
=
[
1
],
dtype
=
"int64"
,
lod_level
=
1
)
neg_right
=
data
.
ops
(
name
=
"neg_right"
,
shape
=
[
1
],
dtype
=
"int64"
,
lod_level
=
1
)
with
fluid
.
program_guard
(
train_program
,
startup_prog
):
with
fluid
.
unique_name
.
guard
():
train_pyreader
,
left
,
pos_right
,
neg_right
=
create_model
(
args
,
pyreader_name
=
'train_reader'
)
left_feat
,
pos_score
=
net
.
predict
(
left
,
pos_right
)
# Get Feeder and Reader
train_feeder
=
fluid
.
DataFeeder
(
place
=
place
,
feed_list
=
[
left
.
name
,
pos_right
.
name
,
neg_right
.
name
])
train_reader
=
simnet_process
.
get_reader
(
"train"
)
if
args
.
do_valid
:
valid_feeder
=
fluid
.
DataFeeder
(
place
=
place
,
feed_list
=
[
left
.
name
,
pos_right
.
name
])
valid_reader
=
simnet_process
.
get_reader
(
"valid"
)
pred
=
pos_score
# Save Infer model
infer_program
=
fluid
.
default_main_program
().
clone
(
for_test
=
True
)
_
,
neg_score
=
net
.
predict
(
left
,
neg_right
)
avg_cost
=
loss
.
compute
(
pos_score
,
neg_score
)
avg_cost
.
persistable
=
True
optimizer
.
ops
(
avg_cost
)
# Get Reader
get_train_examples
=
simnet_process
.
get_reader
(
"train"
)
if
args
.
do_valid
:
test_prog
=
fluid
.
Program
()
with
fluid
.
program_guard
(
test_prog
,
startup_prog
):
with
fluid
.
unique_name
.
guard
():
test_pyreader
,
left
,
pos_right
=
create_model
(
args
,
pyreader_name
=
'test_reader'
,
is_inference
=
True
)
left_feat
,
pos_score
=
net
.
predict
(
left
,
pos_right
)
pred
=
pos_score
test_prog
=
test_prog
.
clone
(
for_test
=
True
)
else
:
# Build network
left
=
data
.
ops
(
name
=
"left"
,
shape
=
[
1
],
dtype
=
"int64"
,
lod_level
=
1
)
right
=
data
.
ops
(
name
=
"right"
,
shape
=
[
1
],
dtype
=
"int64"
,
lod_level
=
1
)
label
=
data
.
ops
(
name
=
"label"
,
shape
=
[
1
],
dtype
=
"int64"
,
lod_level
=
0
)
with
fluid
.
program_guard
(
train_program
,
startup_prog
):
with
fluid
.
unique_name
.
guard
():
train_pyreader
,
left
,
right
,
label
=
create_model
(
args
,
pyreader_name
=
'train_reader'
,
is_pointwise
=
True
)
left_feat
,
pred
=
net
.
predict
(
left
,
right
)
avg_cost
=
loss
.
compute
(
pred
,
label
)
avg_cost
.
persistable
=
True
optimizer
.
ops
(
avg_cost
)
# Get Feeder and Reader
train_feeder
=
fluid
.
DataFeeder
(
place
=
place
,
feed_list
=
[
left
.
name
,
right
.
name
,
label
.
name
])
train_reader
=
simnet_process
.
get_reader
(
"train"
)
get_train_examples
=
simnet_process
.
get_reader
(
"train"
)
if
args
.
do_valid
:
valid_feeder
=
fluid
.
DataFeeder
(
place
=
place
,
feed_list
=
[
left
.
name
,
right
.
name
])
valid_reader
=
simnet_process
.
get_reader
(
"valid"
)
# Save Infer model
infer_program
=
fluid
.
default_main_program
().
clone
(
for_test
=
True
)
avg_cost
=
loss
.
compute
(
pred
,
label
)
avg_cost
.
persistable
=
True
test_prog
=
fluid
.
Program
()
with
fluid
.
program_guard
(
test_prog
,
startup_prog
):
with
fluid
.
unique_name
.
guard
():
test_pyreader
,
left
,
right
=
create_model
(
args
,
pyreader_name
=
'test_reader'
,
is_inference
=
True
)
left_feat
,
pred
=
net
.
predict
(
left
,
right
)
test_prog
=
test_prog
.
clone
(
for_test
=
True
)
# operate Optimization
optimizer
.
ops
(
avg_cost
)
executor
=
fluid
.
Executor
(
place
)
executor
.
run
(
fluid
.
default_startup_program
())
if
args
.
init_checkpoint
is
not
None
:
utils
.
init_checkpoint
(
executor
,
args
.
init_checkpoint
,
fluid
.
default_startup_program
())
# Get and run executor
parallel_executor
=
fluid
.
ParallelExecutor
(
use_cuda
=
args
.
use_cuda
,
loss_name
=
avg_cost
.
name
,
main_program
=
fluid
.
default_main_program
())
# Get device number
device_count
=
parallel_executor
.
device_count
logging
.
info
(
"device count: %d"
%
device_count
)
def
valid_and_test
(
program
,
feeder
,
reader
,
process
,
mode
=
"test"
):
if
args
.
init_checkpoint
is
not
""
:
utils
.
init_checkpoint
(
exe
,
args
.
init_checkpoint
,
startup_prog
)
def
valid_and_test
(
test_program
,
test_pyreader
,
get_valid_examples
,
process
,
mode
,
exe
,
fetch_list
):
"""
return auc and acc
"""
# Get Batch Data
batch_data
=
paddle
.
batch
(
reader
,
args
.
batch_size
,
drop_last
=
False
)
batch_data
=
paddle
.
batch
(
get_valid_examples
,
args
.
batch_size
,
drop_last
=
False
)
test_pyreader
.
decorate_paddle_reader
(
batch_data
)
test_pyreader
.
start
()
pred_list
=
[]
for
data
in
batch_data
():
_pred
=
executor
.
run
(
program
=
program
,
feed
=
feeder
.
feed
(
data
),
fetch_list
=
[
pred
.
name
])
while
True
:
try
:
_pred
=
exe
.
run
(
program
=
test_program
,
fetch_list
=
[
pred
.
name
])
pred_list
+=
list
(
_pred
)
except
fluid
.
core
.
EOFException
:
test_pyreader
.
reset
()
break
pred_list
=
np
.
vstack
(
pred_list
)
if
mode
==
"test"
:
label_list
=
process
.
get_test_label
()
...
...
@@ -232,32 +217,26 @@ def train(conf_dict, args):
# set global step
global_step
=
0
ce_info
=
[]
train_exe
=
exe
for
epoch_id
in
range
(
args
.
epoch
):
losses
=
[]
# Get batch data iterator
train_batch_data
=
paddle
.
batch
(
paddle
.
reader
.
shuffle
(
train_reader
,
buf_size
=
10000
),
get_train_examples
,
buf_size
=
10000
),
args
.
batch_size
,
drop_last
=
False
)
train_pyreader
.
decorate_paddle_reader
(
train_batch_data
)
train_pyreader
.
start
()
exe
.
run
(
startup_prog
)
losses
=
[]
start_time
=
time
.
time
()
for
iter
,
data
in
enumerate
(
train_batch_data
()):
if
len
(
data
)
<
device_count
:
logging
.
info
(
"the size of batch data is less than device_count(%d)"
%
device_count
)
continue
while
True
:
try
:
global_step
+=
1
avg_loss
=
parallel_executor
.
run
([
avg_cost
.
name
],
feed
=
train_feeder
.
feed
(
data
)
)
fetch_list
=
[
avg_cost
.
name
]
avg_loss
=
train_exe
.
run
(
program
=
train_program
,
fetch_list
=
fetch_list
)
if
args
.
do_valid
and
global_step
%
args
.
validation_steps
==
0
:
valid_result
=
valid_and_test
(
program
=
infer_program
,
feeder
=
valid_feeder
,
reader
=
valid_reader
,
process
=
simnet_process
,
mode
=
"valid"
)
get_valid_examples
=
simnet_process
.
get_reader
(
"valid"
)
valid_result
=
valid_and_test
(
test_prog
,
test_pyreader
,
get_valid_examples
,
simnet_process
,
"valid"
,
exe
,[
pred
.
name
])
if
args
.
compute_accuracy
:
valid_auc
,
valid_acc
=
valid_result
logging
.
info
(
...
...
@@ -284,14 +263,39 @@ def train(conf_dict, args):
]
target_vars
=
[
left_feat
,
pred
]
fluid
.
io
.
save_inference_model
(
model_path
,
feed_var_names
,
target_vars
,
executor
,
infer_program
)
target_vars
,
exe
,
test_prog
)
logging
.
info
(
"saving infer model in %s"
%
model_path
)
losses
.
append
(
np
.
mean
(
avg_loss
[
0
]))
except
fluid
.
core
.
EOFException
:
train_pyreader
.
reset
()
break
end_time
=
time
.
time
()
logging
.
info
(
"epoch: %d, loss: %f, used time: %d sec"
%
(
epoch_id
,
np
.
mean
(
losses
),
end_time
-
start_time
))
ce_info
.
append
([
np
.
mean
(
losses
),
end_time
-
start_time
])
#final save
logging
.
info
(
"the final step is %s"
%
global_step
)
model_save_dir
=
os
.
path
.
join
(
args
.
output_dir
,
conf_dict
[
"model_path"
])
model_path
=
os
.
path
.
join
(
model_save_dir
,
str
(
global_step
))
if
not
os
.
path
.
exists
(
model_save_dir
):
os
.
makedirs
(
model_save_dir
)
if
args
.
task_mode
==
"pairwise"
:
feed_var_names
=
[
left
.
name
,
pos_right
.
name
]
target_vars
=
[
left_feat
,
pos_score
]
else
:
feed_var_names
=
[
left
.
name
,
right
.
name
,
]
target_vars
=
[
left_feat
,
pred
]
fluid
.
io
.
save_inference_model
(
model_path
,
feed_var_names
,
target_vars
,
exe
,
test_prog
)
logging
.
info
(
"saving infer model in %s"
%
model_path
)
if
args
.
enable_ce
:
card_num
=
get_cards
()
ce_loss
=
0
...
...
@@ -309,20 +313,11 @@ def train(conf_dict, args):
if
args
.
do_test
:
if
args
.
task_mode
==
"pairwise"
:
# Get Feeder and Reader
test_feeder
=
fluid
.
DataFeeder
(
place
=
place
,
feed_list
=
[
left
.
name
,
pos_right
.
name
])
test_reader
=
simnet_process
.
get_reader
(
"test"
)
get_test_examples
=
simnet_process
.
get_reader
(
"test"
)
else
:
# Get Feeder and Reader
test_feeder
=
fluid
.
DataFeeder
(
place
=
place
,
feed_list
=
[
left
.
name
,
right
.
name
])
test_reader
=
simnet_process
.
get_reader
(
"test"
)
test_result
=
valid_and_test
(
program
=
infer_program
,
feeder
=
test_feeder
,
reader
=
test_reader
,
process
=
simnet_process
,
mode
=
"test"
)
get_test_examples
=
simnet_process
.
get_reader
(
"test"
)
test_result
=
valid_and_test
(
test_prog
,
test_pyreader
,
get_test_examples
,
simnet_process
,
"test"
,
exe
,[
pred
.
name
])
if
args
.
compute_accuracy
:
test_auc
,
test_acc
=
test_result
logging
.
info
(
"AUC of test is %f, Accuracy of test is %f"
%
...
...
@@ -334,51 +329,83 @@ def train(conf_dict, args):
def
test
(
conf_dict
,
args
):
"""
run predict
Evaluation Function
"""
vocab
=
utils
.
load_vocab
(
args
.
vocab_path
)
simnet_process
=
reader
.
SimNetProcessor
(
args
,
vocab
)
# load auc method
metric
=
fluid
.
metrics
.
Auc
(
name
=
"auc"
)
with
codecs
.
open
(
"predictions.txt"
,
"w"
,
"utf-8"
)
as
predictions_file
:
# Get model path
model_path
=
args
.
init_checkpoint
# Get device
if
args
.
use_cuda
:
place
=
fluid
.
CUDAPlace
(
0
)
else
:
place
=
fluid
.
CPUPlace
()
# Get executor
executor
=
fluid
.
Executor
(
place
=
place
)
# Load model
program
,
feed_var_names
,
fetch_targets
=
fluid
.
io
.
load_inference_model
(
model_path
,
executor
)
exe
=
fluid
.
Executor
(
place
)
vocab
=
utils
.
load_vocab
(
args
.
vocab_path
)
simnet_process
=
reader
.
SimNetProcessor
(
args
,
vocab
)
startup_prog
=
fluid
.
Program
()
get_test_examples
=
simnet_process
.
get_reader
(
"test"
)
batch_data
=
paddle
.
batch
(
get_test_examples
,
args
.
batch_size
,
drop_last
=
False
)
test_prog
=
fluid
.
Program
()
conf_dict
[
'dict_size'
]
=
len
(
vocab
)
net
=
utils
.
import_class
(
"../models/matching"
,
conf_dict
[
"net"
][
"module_name"
],
conf_dict
[
"net"
][
"class_name"
])(
conf_dict
)
metric
=
fluid
.
metrics
.
Auc
(
name
=
"auc"
)
with
codecs
.
open
(
"predictions.txt"
,
"w"
,
"utf-8"
)
as
predictions_file
:
if
args
.
task_mode
==
"pairwise"
:
# Get Feeder and Reader
feeder
=
fluid
.
DataFeeder
(
place
=
place
,
feed_list
=
feed_var_names
,
program
=
program
)
test_reader
=
simnet_process
.
get_reader
(
"test"
)
with
fluid
.
program_guard
(
test_prog
,
startup_prog
):
with
fluid
.
unique_name
.
guard
():
test_pyreader
,
left
,
pos_right
=
create_model
(
args
,
pyreader_name
=
'test_reader'
,
is_inference
=
True
)
left_feat
,
pos_score
=
net
.
predict
(
left
,
pos_right
)
pred
=
pos_score
test_prog
=
test_prog
.
clone
(
for_test
=
True
)
else
:
# Get Feeder and Reader
feeder
=
fluid
.
DataFeeder
(
place
=
place
,
feed_list
=
feed_var_names
,
program
=
program
)
test_reader
=
simnet_process
.
get_reader
(
"test"
)
# Get batch data iterator
batch_data
=
paddle
.
batch
(
test_reader
,
args
.
batch_size
,
drop_last
=
False
)
with
fluid
.
program_guard
(
test_prog
,
startup_prog
):
with
fluid
.
unique_name
.
guard
():
test_pyreader
,
left
,
right
=
create_model
(
args
,
pyreader_name
=
'test_reader'
,
is_inference
=
True
)
left_feat
,
pred
=
net
.
predict
(
left
,
right
)
test_prog
=
test_prog
.
clone
(
for_test
=
True
)
exe
.
run
(
startup_prog
)
utils
.
init_checkpoint
(
exe
,
args
.
init_checkpoint
,
main_program
=
test_prog
)
test_exe
=
exe
test_pyreader
.
decorate_paddle_reader
(
batch_data
)
logging
.
info
(
"start test process ..."
)
test_pyreader
.
start
()
pred_list
=
[]
for
iter
,
data
in
enumerate
(
batch_data
()):
output
=
executor
.
run
(
program
,
feed
=
feeder
.
feed
(
data
),
fetch_list
=
fetch_targets
)
fetch_list
=
[
pred
.
name
]
output
=
[]
while
True
:
try
:
output
=
test_exe
.
run
(
program
=
test_prog
,
fetch_list
=
fetch_list
)
if
args
.
task_mode
==
"pairwise"
:
pred_list
+=
list
(
map
(
lambda
item
:
float
(
item
[
0
]),
output
[
1
]))
pred_list
+=
list
(
map
(
lambda
item
:
float
(
item
[
0
]),
output
[
0
]))
predictions_file
.
write
(
"
\n
"
.
join
(
map
(
lambda
item
:
str
((
item
[
0
]
+
1
)
/
2
),
output
[
1
]))
+
"
\n
"
)
map
(
lambda
item
:
str
((
item
[
0
]
+
1
)
/
2
),
output
[
0
]))
+
"
\n
"
)
else
:
pred_list
+=
map
(
lambda
item
:
item
,
output
[
1
])
pred_list
+=
map
(
lambda
item
:
item
,
output
[
0
])
predictions_file
.
write
(
"
\n
"
.
join
(
map
(
lambda
item
:
str
(
np
.
argmax
(
item
)),
output
[
1
]))
+
"
\n
"
)
map
(
lambda
item
:
str
(
np
.
argmax
(
item
)),
output
[
0
]))
+
"
\n
"
)
except
fluid
.
core
.
EOFException
:
test_pyreader
.
reset
()
break
if
args
.
task_mode
==
"pairwise"
:
pred_list
=
np
.
array
(
pred_list
).
reshape
((
-
1
,
1
))
pred_list
=
(
pred_list
+
1
)
/
2
...
...
@@ -403,47 +430,72 @@ def test(conf_dict, args):
os
.
path
.
join
(
os
.
getcwd
(),
args
.
test_result_path
))
def
infer
(
args
):
def
infer
(
conf_dict
,
args
):
"""
run predict
"""
vocab
=
utils
.
load_vocab
(
args
.
vocab_path
)
simnet_process
=
reader
.
SimNetProcessor
(
args
,
vocab
)
# Get model path
model_path
=
args
.
init_checkpoint
# Get device
if
args
.
use_cuda
:
place
=
fluid
.
CUDAPlace
(
0
)
else
:
place
=
fluid
.
CPUPlace
()
# Get executor
executor
=
fluid
.
Executor
(
place
=
place
)
# Load model
program
,
feed_var_names
,
fetch_targets
=
fluid
.
io
.
load_inference_model
(
model_path
,
executor
)
exe
=
fluid
.
Executor
(
place
)
vocab
=
utils
.
load_vocab
(
args
.
vocab_path
)
simnet_process
=
reader
.
SimNetProcessor
(
args
,
vocab
)
startup_prog
=
fluid
.
Program
()
get_infer_examples
=
simnet_process
.
get_infer_reader
batch_data
=
paddle
.
batch
(
get_infer_examples
,
args
.
batch_size
,
drop_last
=
False
)
test_prog
=
fluid
.
Program
()
conf_dict
[
'dict_size'
]
=
len
(
vocab
)
net
=
utils
.
import_class
(
"../models/matching"
,
conf_dict
[
"net"
][
"module_name"
],
conf_dict
[
"net"
][
"class_name"
])(
conf_dict
)
if
args
.
task_mode
==
"pairwise"
:
# Get Feeder and Reader
infer_feeder
=
fluid
.
DataFeeder
(
place
=
place
,
feed_list
=
feed_var_names
,
program
=
program
)
infer_reader
=
simnet_process
.
get_infer_reader
with
fluid
.
program_guard
(
test_prog
,
startup_prog
):
with
fluid
.
unique_name
.
guard
():
infer_pyreader
,
left
,
pos_right
=
create_model
(
args
,
pyreader_name
=
'infer_reader'
,
is_inference
=
True
)
left_feat
,
pos_score
=
net
.
predict
(
left
,
pos_right
)
pred
=
pos_score
test_prog
=
test_prog
.
clone
(
for_test
=
True
)
else
:
# Get Feeder and Reader
infer_feeder
=
fluid
.
DataFeeder
(
place
=
place
,
feed_list
=
feed_var_names
,
program
=
program
)
infer_reader
=
simnet_process
.
get_infer_reader
# Get batch data iterator
batch_data
=
paddle
.
batch
(
infer_reader
,
args
.
batch_size
,
drop_last
=
False
)
with
fluid
.
program_guard
(
test_prog
,
startup_prog
):
with
fluid
.
unique_name
.
guard
():
infer_pyreader
,
left
,
right
=
create_model
(
args
,
pyreader_name
=
'infer_reader'
,
is_inference
=
True
)
left_feat
,
pred
=
net
.
predict
(
left
,
right
)
test_prog
=
test_prog
.
clone
(
for_test
=
True
)
exe
.
run
(
startup_prog
)
utils
.
init_checkpoint
(
exe
,
args
.
init_checkpoint
,
main_program
=
test_prog
)
test_exe
=
exe
infer_pyreader
.
decorate_sample_list_generator
(
batch_data
)
logging
.
info
(
"start test process ..."
)
preds_list
=
[]
for
iter
,
data
in
enumerate
(
batch_data
()):
output
=
executor
.
run
(
program
,
feed
=
infer_feeder
.
feed
(
data
),
fetch_list
=
fetch_targets
)
fetch_list
=
[
pred
.
name
]
output
=
[]
infer_pyreader
.
start
()
while
True
:
try
:
output
=
test_exe
.
run
(
program
=
test_prog
,
fetch_list
=
fetch_list
)
if
args
.
task_mode
==
"pairwise"
:
preds_list
+=
list
(
map
(
lambda
item
:
str
((
item
[
0
]
+
1
)
/
2
),
output
[
1
]))
map
(
lambda
item
:
str
((
item
[
0
]
+
1
)
/
2
),
output
[
0
]))
else
:
preds_list
+=
map
(
lambda
item
:
str
(
np
.
argmax
(
item
)),
output
[
1
])
preds_list
+=
map
(
lambda
item
:
str
(
np
.
argmax
(
item
)),
output
[
0
])
except
fluid
.
core
.
EOFException
:
infer_pyreader
.
reset
()
break
with
codecs
.
open
(
args
.
infer_result_path
,
"w"
,
"utf-8"
)
as
infer_file
:
for
_data
,
_pred
in
zip
(
simnet_process
.
get_infer_data
(),
preds_list
):
infer_file
.
write
(
_data
+
"
\t
"
+
_pred
+
"
\n
"
)
...
...
@@ -458,23 +510,11 @@ def get_cards():
num
=
len
(
cards
.
split
(
","
))
return
num
if
__name__
==
"__main__"
:
def
main
(
conf_dict
,
args
):
"""
main
"""
if
args
.
do_train
:
train
(
conf_dict
,
args
)
elif
args
.
do_test
:
test
(
conf_dict
,
args
)
elif
args
.
do_infer
:
infer
(
args
)
else
:
raise
ValueError
(
"one of do_train and do_test and do_infer must be True"
)
args
=
ArgConfig
()
args
=
args
.
build_conf
()
if
__name__
==
"__main__"
:
utils
.
print_arguments
(
args
)
try
:
if
fluid
.
is_compiled_with_cuda
()
!=
True
and
args
.
use_cuda
==
True
:
...
...
@@ -487,4 +527,12 @@ if __name__ == "__main__":
pass
utils
.
init_log
(
"./log/TextSimilarityNet"
)
conf_dict
=
config
.
SimNetConfig
(
args
)
main
(
conf_dict
,
args
)
if
args
.
do_train
:
train
(
conf_dict
,
args
)
elif
args
.
do_test
:
test
(
conf_dict
,
args
)
elif
args
.
do_infer
:
infer
(
conf_dict
,
args
)
else
:
raise
ValueError
(
"one of do_train and do_test and do_infer must be True"
)
\ No newline at end of file
PaddleNLP/similarity_net/utils.py
浏览文件 @
e19f4bc7
...
...
@@ -15,7 +15,7 @@
"""
SimNet utilities.
"""
import
argparse
import
time
import
sys
import
re
...
...
@@ -26,20 +26,17 @@ import numpy as np
import
logging
import
logging.handlers
import
paddle.fluid
as
fluid
import
io
"""
******functions for file processing******
"""
def
load_vocab
(
file_path
):
"""
load the given vocabulary
"""
vocab
=
{}
if
six
.
PY3
:
f
=
open
(
file_path
,
"r"
,
encoding
=
"utf-8"
)
else
:
f
=
open
(
file_path
,
"r"
)
f
=
io
.
open
(
file_path
,
"r"
,
encoding
=
"utf-8"
)
for
line
in
f
:
items
=
line
.
strip
(
"
\n
"
).
split
(
"
\t
"
)
if
items
[
0
]
not
in
vocab
:
...
...
@@ -61,8 +58,7 @@ def get_result_file(args):
"""
with
codecs
.
open
(
args
.
test_data_dir
,
"r"
,
"utf-8"
)
as
test_file
:
with
codecs
.
open
(
"predictions.txt"
,
"r"
,
"utf-8"
)
as
predictions_file
:
with
codecs
.
open
(
args
.
test_result_path
,
"w"
,
"utf-8"
)
as
test_result_file
:
with
codecs
.
open
(
args
.
test_result_path
,
"w"
,
"utf-8"
)
as
test_result_file
:
test_datas
=
[
line
.
strip
(
"
\n
"
)
for
line
in
test_file
]
predictions
=
[
line
.
strip
(
"
\n
"
)
for
line
in
predictions_file
]
for
test_data
,
prediction
in
zip
(
test_datas
,
predictions
):
...
...
@@ -170,6 +166,58 @@ class ArgumentGroup(object):
help
=
help
+
' Default: %(default)s.'
,
**
kwargs
)
class
ArgConfig
(
object
):
def
__init__
(
self
):
parser
=
argparse
.
ArgumentParser
()
model_g
=
ArgumentGroup
(
parser
,
"model"
,
"model configuration and paths."
)
model_g
.
add_arg
(
"config_path"
,
str
,
None
,
"Path to the json file for EmoTect model config."
)
model_g
.
add_arg
(
"init_checkpoint"
,
str
,
None
,
"Init checkpoint to resume training from."
)
model_g
.
add_arg
(
"output_dir"
,
str
,
None
,
"Directory path to save checkpoints"
)
model_g
.
add_arg
(
"task_mode"
,
str
,
None
,
"task mode: pairwise or pointwise"
)
train_g
=
ArgumentGroup
(
parser
,
"training"
,
"training options."
)
train_g
.
add_arg
(
"epoch"
,
int
,
10
,
"Number of epoches for training."
)
train_g
.
add_arg
(
"save_steps"
,
int
,
200
,
"The steps interval to save checkpoints."
)
train_g
.
add_arg
(
"validation_steps"
,
int
,
100
,
"The steps interval to evaluate model performance."
)
log_g
=
ArgumentGroup
(
parser
,
"logging"
,
"logging related"
)
log_g
.
add_arg
(
"skip_steps"
,
int
,
10
,
"The steps interval to print loss."
)
log_g
.
add_arg
(
"verbose_result"
,
bool
,
True
,
"Whether to output verbose result."
)
log_g
.
add_arg
(
"test_result_path"
,
str
,
"test_result"
,
"Directory path to test result."
)
log_g
.
add_arg
(
"infer_result_path"
,
str
,
"infer_result"
,
"Directory path to infer result."
)
data_g
=
ArgumentGroup
(
parser
,
"data"
,
"Data paths, vocab paths and data processing options"
)
data_g
.
add_arg
(
"train_data_dir"
,
str
,
None
,
"Directory path to training data."
)
data_g
.
add_arg
(
"valid_data_dir"
,
str
,
None
,
"Directory path to valid data."
)
data_g
.
add_arg
(
"test_data_dir"
,
str
,
None
,
"Directory path to testing data."
)
data_g
.
add_arg
(
"infer_data_dir"
,
str
,
None
,
"Directory path to infer data."
)
data_g
.
add_arg
(
"vocab_path"
,
str
,
None
,
"Vocabulary path."
)
data_g
.
add_arg
(
"batch_size"
,
int
,
32
,
"Total examples' number in batch for training."
)
run_type_g
=
ArgumentGroup
(
parser
,
"run_type"
,
"running type options."
)
run_type_g
.
add_arg
(
"use_cuda"
,
bool
,
False
,
"If set, use GPU for training."
)
run_type_g
.
add_arg
(
"task_name"
,
str
,
None
,
"The name of task to perform sentiment classification."
)
run_type_g
.
add_arg
(
"do_train"
,
bool
,
False
,
"Whether to perform training."
)
run_type_g
.
add_arg
(
"do_valid"
,
bool
,
False
,
"Whether to perform dev."
)
run_type_g
.
add_arg
(
"do_test"
,
bool
,
False
,
"Whether to perform testing."
)
run_type_g
.
add_arg
(
"do_infer"
,
bool
,
False
,
"Whether to perform inference."
)
run_type_g
.
add_arg
(
"compute_accuracy"
,
bool
,
False
,
"Whether to compute accuracy."
)
run_type_g
.
add_arg
(
"lamda"
,
float
,
0.91
,
"When task_mode is pairwise, lamda is the threshold for calculating the accuracy."
)
custom_g
=
ArgumentGroup
(
parser
,
"customize"
,
"customized options."
)
self
.
custom_g
=
custom_g
parser
.
add_argument
(
'--enable_ce'
,
action
=
'store_true'
,
help
=
'If set, run the task with continuous evaluation logs.'
)
self
.
parser
=
parser
def
add_arg
(
self
,
name
,
dtype
,
default
,
descrip
):
self
.
custom_g
.
add_arg
(
name
,
dtype
,
default
,
descrip
)
def
build_conf
(
self
):
return
self
.
parser
.
parse_args
()
def
print_arguments
(
args
):
"""
...
...
@@ -314,3 +362,4 @@ def init_checkpoint(exe, init_checkpoint_path, main_program):
main_program
=
main_program
,
predicate
=
existed_persitables
)
print
(
"Load model from {}"
.
format
(
init_checkpoint_path
))
编辑
预览
Markdown
is supported
0%
请重试
或
添加新附件
.
添加附件
取消
You are about to add
0
people
to the discussion. Proceed with caution.
先完成此消息的编辑!
取消
想要评论请
注册
或
登录