Skip to content
体验新版
项目
组织
正在加载...
登录
切换导航
打开侧边栏
PaddlePaddle
models
提交
a4a7df17
M
models
项目概览
PaddlePaddle
/
models
大约 2 年 前同步成功
通知
232
Star
6828
Fork
2962
代码
文件
提交
分支
Tags
贡献者
分支图
Diff
Issue
602
列表
看板
标记
里程碑
合并请求
255
Wiki
0
Wiki
分析
仓库
DevOps
项目成员
Pages
M
models
项目概览
项目概览
详情
发布
仓库
仓库
文件
提交
分支
标签
贡献者
分支图
比较
Issue
602
Issue
602
列表
看板
标记
里程碑
合并请求
255
合并请求
255
Pages
分析
分析
仓库分析
DevOps
Wiki
0
Wiki
成员
成员
收起侧边栏
关闭侧边栏
动态
分支图
创建新Issue
提交
Issue看板
提交
a4a7df17
编写于
5月 14, 2019
作者:
B
Bruce
提交者:
Yibing Liu
5月 14, 2019
浏览文件
操作
浏览文件
下载
电子邮件补丁
差异文件
fix
#2217
(#2229)
上级
f13e3681
变更
3
隐藏空白更改
内联
并排
Showing
3 changed file
with
11 addition
and
23 deletion
+11
-23
PaddleNLP/lexical_analysis/reader.py
PaddleNLP/lexical_analysis/reader.py
+1
-1
PaddleNLP/lexical_analysis/run.sh
PaddleNLP/lexical_analysis/run.sh
+4
-4
PaddleNLP/lexical_analysis/run_sequence_labeling.py
PaddleNLP/lexical_analysis/run_sequence_labeling.py
+6
-18
未找到文件。
PaddleNLP/lexical_analysis/reader.py
浏览文件 @
a4a7df17
...
...
@@ -54,7 +54,7 @@ class Dataset(object):
def
get_num_examples
(
self
,
filename
):
"""num of line of file"""
return
sum
(
1
for
line
in
open
(
filename
,
"r"
))
return
sum
(
1
for
line
in
io
.
open
(
filename
,
"r"
,
encoding
=
'utf-8'
))
def
word_to_ids
(
self
,
words
):
"""convert word to word index"""
...
...
PaddleNLP/lexical_analysis/run.sh
浏览文件 @
a4a7df17
...
...
@@ -2,7 +2,7 @@
export
FLAGS_fraction_of_gpu_memory_to_use
=
0.5
export
FLAGS_eager_delete_tensor_gb
=
0.0
export
FLAGS_fast_eager_deletion_mode
=
1
#export CUDA_VISIBLE_DEVICES=0,1,2,3
export
CUDA_VISIBLE_DEVICES
=
2
# which GPU to use
#alias python='./anaconda2/bin/python'
...
...
@@ -19,7 +19,7 @@ function run_train() {
--save_model_per_batches
10000
\
--batch_size
100
\
--epoch
10
\
--use_
gpu
0
\
--use_
cuda
true
\
--traindata_shuffle_buffer
200000
\
--word_emb_dim
768
\
--grnn_hidden_dim
768
\
...
...
@@ -43,7 +43,7 @@ function run_eval() {
--word_emb_dim
768
\
--grnn_hidden_dim
768
\
--bigru_num
2
\
--use_
gpu
0
\
--use_
cuda
True
\
--init_checkpoint
./model_baseline
\
--test_data
./data/test.tsv
\
--word_dict_path
./conf/word.dic
\
...
...
@@ -62,7 +62,7 @@ function run_infer() {
--word_emb_dim
768
\
--grnn_hidden_dim
768
\
--bigru_num
2
\
--use_
gpu
0
\
--use_
cuda
True
\
--init_checkpoint
./model_baseline/
\
--infer_data
./data/test.tsv
\
--word_dict_path
./conf/word.dic
\
...
...
PaddleNLP/lexical_analysis/run_sequence_labeling.py
浏览文件 @
a4a7df17
...
...
@@ -40,23 +40,17 @@ data_g = utils.ArgumentGroup(parser, "data", "data paths")
data_g
.
add_arg
(
"word_dict_path"
,
str
,
"./conf/word.dic"
,
"The path of the word dictionary."
)
data_g
.
add_arg
(
"label_dict_path"
,
str
,
"./conf/tag.dic"
,
"The path of the label dictionary."
)
data_g
.
add_arg
(
"word_rep_dict_path"
,
str
,
"./conf/q2b.dic"
,
"The path of the word replacement Dictionary."
)
data_g
.
add_arg
(
"train_data"
,
str
,
"./data/train
_data
"
,
"The folder where the training data is located."
)
data_g
.
add_arg
(
"test_data"
,
str
,
"./data/test
_data
"
,
"The folder where the training data is located."
)
data_g
.
add_arg
(
"train_data"
,
str
,
"./data/train
.tsv
"
,
"The folder where the training data is located."
)
data_g
.
add_arg
(
"test_data"
,
str
,
"./data/test
.tsv
"
,
"The folder where the training data is located."
)
data_g
.
add_arg
(
"infer_data"
,
str
,
"./data/test.tsv"
,
"The folder where the training data is located."
)
data_g
.
add_arg
(
"model_save_dir"
,
str
,
"./models"
,
"The model will be saved in this path."
)
data_g
.
add_arg
(
"init_checkpoint"
,
str
,
""
,
"Path to init model"
)
data_g
.
add_arg
(
"corpus_type_list"
,
str
,
[
"human"
,
"feed"
,
"query"
,
"title"
,
"news"
],
"The pattern list of different types of corpus used in training."
,
nargs
=
'+'
)
data_g
.
add_arg
(
"corpus_proportion_list"
,
float
,
[
0.2
,
0.2
,
0.2
,
0.2
,
0.2
],
"The proportion list of different types of corpus used in training."
,
nargs
=
'+'
)
# 3. train parameters
train_g
=
utils
.
ArgumentGroup
(
parser
,
"training"
,
"training options"
)
train_g
.
add_arg
(
"do_train"
,
bool
,
True
,
"whether to perform training"
)
train_g
.
add_arg
(
"do_valid"
,
bool
,
False
,
"whether to perform validation"
)
train_g
.
add_arg
(
"do_test"
,
bool
,
True
,
"whether to perform validation"
)
train_g
.
add_arg
(
"do_test"
,
bool
,
True
,
"whether to perform testing"
)
train_g
.
add_arg
(
"do_infer"
,
bool
,
False
,
"whether to perform inference"
)
train_g
.
add_arg
(
"random_seed"
,
int
,
0
,
"random seed for training"
)
train_g
.
add_arg
(
"save_model_per_batches"
,
int
,
10000
,
"Save the model once per xxxx batch of training"
)
...
...
@@ -64,7 +58,7 @@ train_g.add_arg("valid_model_per_batches", int, 1000, "Do the validation once pe
train_g
.
add_arg
(
"batch_size"
,
int
,
80
,
"The number of sequences contained in a mini-batch, "
"or the maximum number of tokens (include paddings) contained in a mini-batch."
)
train_g
.
add_arg
(
"epoch"
,
int
,
10
,
"corpus iteration num"
)
train_g
.
add_arg
(
"use_
gpu"
,
int
,
-
1
,
"Whether or not to use GPU. -1 means CPU, else GPU id
"
)
train_g
.
add_arg
(
"use_
cuda"
,
bool
,
False
,
"If set, use GPU for training.
"
)
train_g
.
add_arg
(
"traindata_shuffle_buffer"
,
int
,
200
,
"The buffer size used in shuffle the training data."
)
train_g
.
add_arg
(
"base_learning_rate"
,
float
,
1e-3
,
"The basic learning rate that affects the entire network."
)
train_g
.
add_arg
(
"emb_learning_rate"
,
float
,
5
,
...
...
@@ -76,12 +70,6 @@ train_g.add_arg("crf_learning_rate", float, 0.2,
args
=
parser
.
parse_args
()
# yapf: enable.
if
len
(
args
.
corpus_proportion_list
)
!=
len
(
args
.
corpus_type_list
):
sys
.
stderr
.
write
(
"The length of corpus_proportion_list should be equal to the length of corpus_type_list.
\n
"
)
exit
(
-
1
)
print
(
args
)
...
...
@@ -217,8 +205,8 @@ def main(args):
# init executor
if
args
.
use_
gpu
>=
0
:
place
=
fluid
.
CUDAPlace
(
args
.
use_gpu
)
if
args
.
use_
cuda
:
place
=
fluid
.
CUDAPlace
(
int
(
os
.
getenv
(
'FLAGS_selected_gpus'
,
'0'
))
)
dev_count
=
fluid
.
core
.
get_cuda_device_count
()
else
:
place
=
fluid
.
CPUPlace
()
...
...
编辑
预览
Markdown
is supported
0%
请重试
或
添加新附件
.
添加附件
取消
You are about to add
0
people
to the discussion. Proceed with caution.
先完成此消息的编辑!
取消
想要评论请
注册
或
登录