Skip to content
体验新版
项目
组织
正在加载...
登录
切换导航
打开侧边栏
PaddlePaddle
hapi
提交
f3e8f301
H
hapi
项目概览
PaddlePaddle
/
hapi
通知
11
Star
2
Fork
0
代码
文件
提交
分支
Tags
贡献者
分支图
Diff
Issue
4
列表
看板
标记
里程碑
合并请求
7
Wiki
0
Wiki
分析
仓库
DevOps
项目成员
Pages
H
hapi
项目概览
项目概览
详情
发布
仓库
仓库
文件
提交
分支
标签
贡献者
分支图
比较
Issue
4
Issue
4
列表
看板
标记
里程碑
合并请求
7
合并请求
7
Pages
分析
分析
仓库分析
DevOps
Wiki
0
Wiki
成员
成员
收起侧边栏
关闭侧边栏
动态
分支图
创建新Issue
提交
Issue看板
提交
f3e8f301
编写于
4月 26, 2020
作者:
G
Guo Sheng
提交者:
guosheng
4月 27, 2020
浏览文件
操作
浏览文件
下载
差异文件
Merge pull request #54 from 0YuanZhang0/seq_tag
seq_tag
上级
2004b003
a14ade8d
变更
10
显示空白变更内容
内联
并排
Showing
10 changed file
with
572 addition
and
348 deletion
+572
-348
examples/sequence_tagging/README.md
examples/sequence_tagging/README.md
+14
-17
examples/sequence_tagging/downloads.py
examples/sequence_tagging/downloads.py
+1
-1
examples/sequence_tagging/eval.py
examples/sequence_tagging/eval.py
+19
-39
examples/sequence_tagging/predict.py
examples/sequence_tagging/predict.py
+17
-26
examples/sequence_tagging/reader.py
examples/sequence_tagging/reader.py
+126
-135
examples/sequence_tagging/sequence_tagging.yaml
examples/sequence_tagging/sequence_tagging.yaml
+2
-3
examples/sequence_tagging/train.py
examples/sequence_tagging/train.py
+34
-38
examples/sequence_tagging/utils/configure.py
examples/sequence_tagging/utils/configure.py
+11
-5
examples/sequence_tagging/utils/metrics.py
examples/sequence_tagging/utils/metrics.py
+16
-17
hapi/text/text.py
hapi/text/text.py
+332
-67
未找到文件。
examples/sequence_tagging/README.md
浏览文件 @
f3e8f301
...
@@ -6,7 +6,7 @@ Sequence Tagging,是一个序列标注模型,模型可用于实现,分词
...
@@ -6,7 +6,7 @@ Sequence Tagging,是一个序列标注模型,模型可用于实现,分词
|模型|Precision|Recall|F1-score|
|模型|Precision|Recall|F1-score|
|:-:|:-:|:-:|:-:|
|:-:|:-:|:-:|:-:|
|Lexical Analysis|8
8.26%|89.20%|88.73
%|
|Lexical Analysis|8
9.57%|89.96%|89.76
%|
## 2. 快速开始
## 2. 快速开始
...
@@ -22,7 +22,7 @@ Sequence Tagging,是一个序列标注模型,模型可用于实现,分词
...
@@ -22,7 +22,7 @@ Sequence Tagging,是一个序列标注模型,模型可用于实现,分词
克隆工具集代码库到本地
克隆工具集代码库到本地
```
bash
```
bash
git clone https://github.com/PaddlePaddle/hapi.git
git clone https://github.com/PaddlePaddle/hapi.git
cd
hapi/sequence_tagging
cd
hapi/
examples/
sequence_tagging
```
```
#### 3. 环境依赖
#### 3. 环境依赖
...
@@ -70,7 +70,7 @@ python -u train.py \
...
@@ -70,7 +70,7 @@ python -u train.py \
--dynamic False
--dynamic False
# --device: 使用gpu设备还是cpu设备
# --device: 使用gpu设备还是cpu设备
# --dynamic: 是否使用动态图模式进行训练,如果使用静态图训练,设置为
True, 动态图设置为Fals
e
# --dynamic: 是否使用动态图模式进行训练,如果使用静态图训练,设置为
False, 动态图设置为Tru
e
```
```
GPU上多卡训练
GPU上多卡训练
...
@@ -84,7 +84,7 @@ python -m paddle.distributed.launch --selected_gpus=0,1,2,3 train.py \
...
@@ -84,7 +84,7 @@ python -m paddle.distributed.launch --selected_gpus=0,1,2,3 train.py \
--dynamic False
--dynamic False
# --device: 使用gpu设备还是cpu设备
# --device: 使用gpu设备还是cpu设备
# --dynamic: 是否使用动态图模式进行训练,如果使用静态图训练,设置为
True, 动态图设置为Fals
e
# --dynamic: 是否使用动态图模式进行训练,如果使用静态图训练,设置为
False, 动态图设置为Tru
e
```
```
CPU上训练
CPU上训练
...
@@ -95,7 +95,7 @@ python -u train.py \
...
@@ -95,7 +95,7 @@ python -u train.py \
--dynamic False
--dynamic False
# --device: 使用gpu设备还是cpu设备
# --device: 使用gpu设备还是cpu设备
# --dynamic: 是否使用动态图模式进行训练,如果使用静态图训练,设置为
True, 动态图设置为Fals
e
# --dynamic: 是否使用动态图模式进行训练,如果使用静态图训练,设置为
False, 动态图设置为Tru
e
```
```
### 模型预测
### 模型预测
...
@@ -105,15 +105,13 @@ python -u train.py \
...
@@ -105,15 +105,13 @@ python -u train.py \
python predict.py
\
python predict.py
\
--init_from_checkpoint
model_baseline/params
\
--init_from_checkpoint
model_baseline/params
\
--output_file
predict.result
\
--output_file
predict.result
\
--mode
predict
\
--device
cpu
\
--device
cpu
\
--dynamic
False
--dynamic
False
# --init_from_checkpoint: 初始化模型
# --init_from_checkpoint: 初始化模型
# --output_file: 预测结果文件
# --output_file: 预测结果文件
# --device: 使用gpu还是cpu设备
# --device: 使用gpu还是cpu设备
# --mode: 开启模式, 设置为train时,进行训练,设置为predict时进行预测
# --dynamic: 是否使用动态图模式进行训练,如果使用静态图训练,设置为False, 动态图设置为True
# --dynamic: 是否使用动态图模式进行训练,如果使用静态图训练,设置为True, 动态图设置为False
```
```
### 模型评估
### 模型评估
...
@@ -123,14 +121,12 @@ python predict.py \
...
@@ -123,14 +121,12 @@ python predict.py \
# baseline model
# baseline model
python eval.py
\
python eval.py
\
--init_from_checkpoint
./model_baseline/params
\
--init_from_checkpoint
./model_baseline/params
\
--mode
predict
\
--device
cpu
\
--device
cpu
\
--dynamic
False
--dynamic
False
# --init_from_checkpoint: 初始化模型
# --init_from_checkpoint: 初始化模型
# --device: 使用gpu还是cpu设备
# --device: 使用gpu还是cpu设备
# --mode: 开启模式, 设置为train时,进行训练,设置为predict时进行预测
# --dynamic: 是否使用动态图模式进行训练,如果使用静态图训练,设置为False, 动态图设置为True
# --dynamic: 是否使用动态图模式进行训练,如果使用静态图训练,设置为True, 动态图设置为False
```
```
...
@@ -196,6 +192,7 @@ Overall Architecture of GRU-CRF-MODEL
...
@@ -196,6 +192,7 @@ Overall Architecture of GRU-CRF-MODEL
├── eval.py # 词法分析评估的脚本
├── eval.py # 词法分析评估的脚本
├── downloads.py # 用于下载数据和模型的脚本
├── downloads.py # 用于下载数据和模型的脚本
├── downloads.sh # 用于下载数据和模型的脚本
├── downloads.sh # 用于下载数据和模型的脚本
├── sequence_tagging.yaml # 模型训练、预测、评估相关配置参数
└──reader.py # 文件读取相关函数
└──reader.py # 文件读取相关函数
```
```
...
...
examples/sequence_tagging/downloads.py
浏览文件 @
f3e8f301
...
@@ -35,7 +35,7 @@ FILE_INFO = {
...
@@ -35,7 +35,7 @@ FILE_INFO = {
},
},
'MODEL'
:
{
'MODEL'
:
{
'name'
:
'sequence_tagging_dy.tar.gz'
,
'name'
:
'sequence_tagging_dy.tar.gz'
,
'md5'
:
"
1125d374c03c8218b6e47325dcf607e3
"
'md5'
:
"
6ba37ceea8f1f764ba1fe227295a6a3b
"
},
},
}
}
...
...
examples/sequence_tagging/eval.py
浏览文件 @
f3e8f301
...
@@ -12,7 +12,7 @@
...
@@ -12,7 +12,7 @@
# See the License for the specific language governing permissions and
# See the License for the specific language governing permissions and
# limitations under the License.
# limitations under the License.
"""
"""
SequenceTagging
network
structure
SequenceTagging
eval
structure
"""
"""
from
__future__
import
division
from
__future__
import
division
...
@@ -25,18 +25,16 @@ import math
...
@@ -25,18 +25,16 @@ import math
import
argparse
import
argparse
import
numpy
as
np
import
numpy
as
np
from
train
import
SeqTagging
from
train
import
SeqTagging
,
ChunkEval
,
LacLoss
from
utils.configure
import
PDConfig
from
utils.configure
import
PDConfig
from
utils.check
import
check_gpu
,
check_version
from
utils.check
import
check_gpu
,
check_version
from
utils.metrics
import
chunk_count
from
reader
import
LacDataset
,
LacDataLoader
from
reader
import
LacDataset
,
create_lexnet_data_generator
,
create_dataloader
work_dir
=
os
.
path
.
dirname
(
os
.
path
.
dirname
(
os
.
path
.
abspath
(
__file__
)))
work_dir
=
os
.
path
.
dirname
(
os
.
path
.
dirname
(
os
.
path
.
abspath
(
__file__
)))
sys
.
path
.
append
(
os
.
path
.
join
(
work_dir
,
"../"
))
sys
.
path
.
append
(
os
.
path
.
join
(
work_dir
,
"../"
))
from
hapi.model
import
set_device
,
Input
from
hapi.model
import
set_device
,
Input
import
paddle.fluid
as
fluid
import
paddle.fluid
as
fluid
from
paddle.fluid.optimizer
import
AdamOptimizer
from
paddle.fluid.layers.utils
import
flatten
from
paddle.fluid.layers.utils
import
flatten
...
@@ -44,48 +42,30 @@ def main(args):
...
@@ -44,48 +42,30 @@ def main(args):
place
=
set_device
(
args
.
device
)
place
=
set_device
(
args
.
device
)
fluid
.
enable_dygraph
(
place
)
if
args
.
dynamic
else
None
fluid
.
enable_dygraph
(
place
)
if
args
.
dynamic
else
None
inputs
=
[
Input
([
None
,
None
],
'int64'
,
name
=
'words'
),
inputs
=
[
Input
([
None
],
'int64'
,
name
=
'length'
)]
Input
(
[
None
,
None
],
'int64'
,
name
=
'words'
),
Input
(
[
None
],
'int64'
,
name
=
'length'
),
Input
(
[
None
,
None
],
'int64'
,
name
=
'target'
)
]
labels
=
[
Input
([
None
,
None
],
'int64'
,
name
=
'labels'
)]
feed_list
=
None
if
args
.
dynamic
else
[
x
.
forward
()
for
x
in
inputs
]
dataset
=
LacDataset
(
args
)
dataset
=
LacDataset
(
args
)
eval_path
=
args
.
test_file
eval_dataset
=
LacDataLoader
(
args
,
place
,
phase
=
"test"
)
chunk_evaluator
=
fluid
.
metrics
.
ChunkEvaluator
()
chunk_evaluator
.
reset
()
eval_generator
=
create_lexnet_data_generator
(
args
,
reader
=
dataset
,
file_name
=
eval_path
,
place
=
place
,
mode
=
"test"
)
eval_dataset
=
create_dataloader
(
eval_generator
,
place
,
feed_list
=
feed_list
)
vocab_size
=
dataset
.
vocab_size
vocab_size
=
dataset
.
vocab_size
num_labels
=
dataset
.
num_labels
num_labels
=
dataset
.
num_labels
model
=
SeqTagging
(
args
,
vocab_size
,
num_labels
)
model
=
SeqTagging
(
args
,
vocab_size
,
num_labels
,
mode
=
"test"
)
optim
=
AdamOptimizer
(
learning_rate
=
args
.
base_learning_rate
,
parameter_list
=
model
.
parameters
())
model
.
mode
=
"test"
model
.
mode
=
"test"
model
.
prepare
(
inputs
=
inputs
)
model
.
prepare
(
metrics
=
ChunkEval
(
num_labels
),
inputs
=
inputs
,
labels
=
labels
,
device
=
place
)
model
.
load
(
args
.
init_from_checkpoint
,
skip_mismatch
=
True
)
model
.
load
(
args
.
init_from_checkpoint
,
skip_mismatch
=
True
)
for
data
in
eval_dataset
():
model
.
evaluate
(
eval_dataset
.
dataloader
,
batch_size
=
args
.
batch_size
)
if
len
(
data
)
==
1
:
batch_data
=
data
[
0
]
targets
=
np
.
array
(
batch_data
[
2
])
else
:
batch_data
=
data
targets
=
batch_data
[
2
].
numpy
()
inputs_data
=
[
batch_data
[
0
],
batch_data
[
1
]]
crf_decode
,
length
=
model
.
test
(
inputs
=
inputs_data
)
num_infer_chunks
,
num_label_chunks
,
num_correct_chunks
=
chunk_count
(
crf_decode
,
targets
,
length
,
dataset
.
id2label_dict
)
chunk_evaluator
.
update
(
num_infer_chunks
,
num_label_chunks
,
num_correct_chunks
)
precision
,
recall
,
f1
=
chunk_evaluator
.
eval
()
print
(
"[test] P: %.5f, R: %.5f, F1: %.5f"
%
(
precision
,
recall
,
f1
))
if
__name__
==
'__main__'
:
if
__name__
==
'__main__'
:
...
...
examples/sequence_tagging/predict.py
浏览文件 @
f3e8f301
...
@@ -12,7 +12,7 @@
...
@@ -12,7 +12,7 @@
# See the License for the specific language governing permissions and
# See the License for the specific language governing permissions and
# limitations under the License.
# limitations under the License.
"""
"""
SequenceTagging
network
structure
SequenceTagging
predict
structure
"""
"""
from
__future__
import
division
from
__future__
import
division
...
@@ -28,14 +28,13 @@ import numpy as np
...
@@ -28,14 +28,13 @@ import numpy as np
from
train
import
SeqTagging
from
train
import
SeqTagging
from
utils.check
import
check_gpu
,
check_version
from
utils.check
import
check_gpu
,
check_version
from
utils.configure
import
PDConfig
from
utils.configure
import
PDConfig
from
reader
import
LacDataset
,
create_lexnet_data_generator
,
create_datal
oader
from
reader
import
LacDataset
,
LacDataL
oader
work_dir
=
os
.
path
.
dirname
(
os
.
path
.
dirname
(
os
.
path
.
abspath
(
__file__
)))
work_dir
=
os
.
path
.
dirname
(
os
.
path
.
dirname
(
os
.
path
.
abspath
(
__file__
)))
sys
.
path
.
append
(
os
.
path
.
join
(
work_dir
,
"../"
))
sys
.
path
.
append
(
os
.
path
.
join
(
work_dir
,
"../"
))
from
hapi.model
import
set_device
,
Input
from
hapi.model
import
set_device
,
Input
import
paddle.fluid
as
fluid
import
paddle.fluid
as
fluid
from
paddle.fluid.optimizer
import
AdamOptimizer
from
paddle.fluid.layers.utils
import
flatten
from
paddle.fluid.layers.utils
import
flatten
...
@@ -43,26 +42,18 @@ def main(args):
...
@@ -43,26 +42,18 @@ def main(args):
place
=
set_device
(
args
.
device
)
place
=
set_device
(
args
.
device
)
fluid
.
enable_dygraph
(
place
)
if
args
.
dynamic
else
None
fluid
.
enable_dygraph
(
place
)
if
args
.
dynamic
else
None
inputs
=
[
Input
([
None
,
None
],
'int64'
,
name
=
'words'
),
inputs
=
[
Input
([
None
],
'int64'
,
name
=
'length'
)]
Input
(
[
None
,
None
],
'int64'
,
name
=
'words'
),
Input
(
[
None
],
'int64'
,
name
=
'length'
)
]
feed_list
=
None
if
args
.
dynamic
else
[
x
.
forward
()
for
x
in
inputs
]
dataset
=
LacDataset
(
args
)
dataset
=
LacDataset
(
args
)
predict_path
=
args
.
predict_file
predict_dataset
=
LacDataLoader
(
args
,
place
,
phase
=
"predict"
)
predict_generator
=
create_lexnet_data_generator
(
args
,
reader
=
dataset
,
file_name
=
predict_path
,
place
=
place
,
mode
=
"predict"
)
predict_dataset
=
create_dataloader
(
predict_generator
,
place
,
feed_list
=
feed_list
)
vocab_size
=
dataset
.
vocab_size
vocab_size
=
dataset
.
vocab_size
num_labels
=
dataset
.
num_labels
num_labels
=
dataset
.
num_labels
model
=
SeqTagging
(
args
,
vocab_size
,
num_labels
)
model
=
SeqTagging
(
args
,
vocab_size
,
num_labels
,
mode
=
"predict"
)
optim
=
AdamOptimizer
(
learning_rate
=
args
.
base_learning_rate
,
parameter_list
=
model
.
parameters
())
model
.
mode
=
"test"
model
.
mode
=
"test"
model
.
prepare
(
inputs
=
inputs
)
model
.
prepare
(
inputs
=
inputs
)
...
@@ -70,15 +61,15 @@ def main(args):
...
@@ -70,15 +61,15 @@ def main(args):
model
.
load
(
args
.
init_from_checkpoint
,
skip_mismatch
=
True
)
model
.
load
(
args
.
init_from_checkpoint
,
skip_mismatch
=
True
)
f
=
open
(
args
.
output_file
,
"wb"
)
f
=
open
(
args
.
output_file
,
"wb"
)
for
data
in
predict_dataset
():
for
data
in
predict_dataset
.
dataloader
:
if
len
(
data
)
==
1
:
if
len
(
data
)
==
1
:
input_data
=
data
[
0
]
input_data
=
data
[
0
]
else
:
else
:
input_data
=
data
input_data
=
data
results
,
length
=
model
.
test
(
inputs
=
flatten
(
input_data
))
results
,
length
=
model
.
test
_batch
(
inputs
=
flatten
(
input_data
))
for
i
in
range
(
len
(
results
)):
for
i
in
range
(
len
(
results
)):
word_len
=
length
[
i
]
word_len
=
length
[
i
]
word_ids
=
results
[
i
][:
word_len
]
word_ids
=
results
[
i
][:
word_len
]
tags
=
[
dataset
.
id2label_dict
[
str
(
id
)]
for
id
in
word_ids
]
tags
=
[
dataset
.
id2label_dict
[
str
(
id
)]
for
id
in
word_ids
]
f
.
write
(
"
\002
"
.
join
(
tags
)
+
"
\n
"
)
f
.
write
(
"
\002
"
.
join
(
tags
)
+
"
\n
"
)
...
...
examples/sequence_tagging/reader.py
浏览文件 @
f3e8f301
...
@@ -19,12 +19,19 @@ from __future__ import division
...
@@ -19,12 +19,19 @@ from __future__ import division
from
__future__
import
print_function
from
__future__
import
print_function
import
io
import
io
import
os
import
leveldb
import
numpy
as
np
import
numpy
as
np
import
shutil
from
functools
import
partial
import
paddle
import
paddle
from
paddle.io
import
BatchSampler
,
DataLoader
,
Dataset
from
paddle.fluid.dygraph.parallel
import
ParallelEnv
from
hapi.distributed
import
DistributedBatchSampler
class
LacDataset
(
objec
t
):
class
LacDataset
(
Datase
t
):
"""
"""
Load lexical analysis dataset
Load lexical analysis dataset
"""
"""
...
@@ -34,6 +41,7 @@ class LacDataset(object):
...
@@ -34,6 +41,7 @@ class LacDataset(object):
self
.
label_dict_path
=
args
.
label_dict_path
self
.
label_dict_path
=
args
.
label_dict_path
self
.
word_rep_dict_path
=
args
.
word_rep_dict_path
self
.
word_rep_dict_path
=
args
.
word_rep_dict_path
self
.
_load_dict
()
self
.
_load_dict
()
self
.
examples
=
[]
def
_load_dict
(
self
):
def
_load_dict
(
self
):
self
.
word2id_dict
=
self
.
load_kv_dict
(
self
.
word2id_dict
=
self
.
load_kv_dict
(
...
@@ -108,152 +116,135 @@ class LacDataset(object):
...
@@ -108,152 +116,135 @@ class LacDataset(object):
label_ids
.
append
(
label_id
)
label_ids
.
append
(
label_id
)
return
label_ids
return
label_ids
def
file_reader
(
self
,
def
file_reader
(
self
,
filename
,
phase
=
"train"
):
filename
,
mode
=
"train"
,
batch_size
=
32
,
max_seq_len
=
126
):
"""
"""
yield (word_idx, target_idx) one by one from file,
yield (word_idx, target_idx) one by one from file,
or yield (word_idx, ) in `infer` mode
or yield (word_idx, ) in `infer` mode
"""
"""
self
.
phase
=
phase
def
wrapper
():
with
io
.
open
(
filename
,
"r"
,
encoding
=
"utf8"
)
as
fr
:
fread
=
io
.
open
(
filename
,
"r"
,
encoding
=
"utf-8"
)
if
phase
in
[
"train"
,
"test"
]:
if
mode
==
"train"
:
headline
=
next
(
fr
)
headline
=
next
(
fread
)
headline
=
headline
.
strip
().
split
(
'
\t
'
)
headline
=
headline
.
strip
().
split
(
'
\t
'
)
assert
len
(
headline
)
==
2
and
headline
[
0
]
==
"text_a"
and
headline
[
assert
len
(
headline
)
==
2
and
headline
[
1
]
==
"label"
0
]
==
"text_a"
and
headline
[
1
]
==
"label"
buf
=
[]
for
line
in
fread
:
words
,
labels
=
line
.
strip
(
"
\n
"
).
split
(
"
\t
"
)
if
len
(
words
)
<
1
:
continue
word_ids
=
self
.
word_to_ids
(
words
.
split
(
"
\002
"
))
label_ids
=
self
.
label_to_ids
(
labels
.
split
(
"
\002
"
))
assert
len
(
word_ids
)
==
len
(
label_ids
)
words_len
=
np
.
int64
(
len
(
word_ids
))
word_ids
=
word_ids
[
0
:
max_seq_len
]
for
line
in
fr
:
words_len
=
np
.
int64
(
len
(
word_ids
))
line_str
=
line
.
strip
(
"
\n
"
)
word_ids
+=
[
0
for
_
in
range
(
max_seq_len
-
words_len
)]
if
len
(
line_str
)
<
1
and
len
(
line_str
.
split
(
'
\t
'
))
<
2
:
label_ids
=
label_ids
[
0
:
max_seq_len
]
label_ids
+=
[
0
for
_
in
range
(
max_seq_len
-
words_len
)]
assert
len
(
word_ids
)
==
len
(
label_ids
)
yield
word_ids
,
label_ids
,
words_len
elif
mode
==
"test"
:
headline
=
next
(
fread
)
headline
=
headline
.
strip
().
split
(
'
\t
'
)
assert
len
(
headline
)
==
2
and
headline
[
0
]
==
"text_a"
and
headline
[
1
]
==
"label"
buf
=
[]
for
line
in
fread
:
words
,
labels
=
line
.
strip
(
"
\n
"
).
split
(
"
\t
"
)
if
len
(
words
)
<
1
:
continue
continue
self
.
examples
.
append
(
line_str
)
else
:
for
idx
,
line
in
enumerate
(
fr
):
words
=
line
.
strip
(
"
\n
"
).
split
(
"
\t
"
)[
0
]
self
.
examples
.
append
(
words
)
def
__getitem__
(
self
,
idx
):
line_str
=
self
.
examples
[
idx
]
if
self
.
phase
in
[
"train"
,
"test"
]:
words
,
labels
=
line_str
.
split
(
'
\t
'
)
word_ids
=
self
.
word_to_ids
(
words
.
split
(
"
\002
"
))
word_ids
=
self
.
word_to_ids
(
words
.
split
(
"
\002
"
))
label_ids
=
self
.
label_to_ids
(
labels
.
split
(
"
\002
"
))
label_ids
=
self
.
label_to_ids
(
labels
.
split
(
"
\002
"
))
assert
len
(
word_ids
)
==
len
(
label_ids
)
assert
len
(
word_ids
)
==
len
(
label_ids
)
words_len
=
np
.
int64
(
len
(
word_ids
))
return
word_ids
,
label_ids
yield
word_ids
,
label_ids
,
words_len
else
:
else
:
for
line
in
fread
:
words
=
[
w
for
w
in
line_str
]
words
=
line
.
strip
(
"
\n
"
).
split
(
'
\t
'
)[
0
]
if
words
==
u
"text_a"
:
continue
if
"
\002
"
not
in
words
:
word_ids
=
self
.
word_to_ids
(
words
)
word_ids
=
self
.
word_to_ids
(
words
)
else
:
return
word_ids
word_ids
=
self
.
word_to_ids
(
words
.
split
(
"
\002
"
))
words_len
=
np
.
int64
(
len
(
word_ids
))
yield
word_ids
,
words_len
fread
.
close
()
def
__len__
(
self
):
return
wrapper
return
len
(
self
.
examples
)
def
create_lexnet_data_generator
(
args
,
reader
,
file_name
,
place
,
mode
=
"train"
):
def
create_lexnet_data_generator
(
args
,
insts
,
phase
=
"train"
):
def
padding_data
(
max_len
,
batch_data
):
def
padding_data
(
max_len
,
batch_data
,
if_len
=
False
):
padding_batch_data
=
[]
padding_batch_data
=
[]
padding_lens
=
[]
for
data
in
batch_data
:
for
data
in
batch_data
:
data
=
data
[:
max_len
]
if
if_len
:
seq_len
=
np
.
int64
(
len
(
data
))
padding_lens
.
append
(
seq_len
)
data
+=
[
0
for
_
in
range
(
max_len
-
len
(
data
))]
data
+=
[
0
for
_
in
range
(
max_len
-
len
(
data
))]
padding_batch_data
.
append
(
data
)
padding_batch_data
.
append
(
data
)
return
padding_batch_data
if
if_len
:
return
np
.
array
(
padding_batch_data
),
np
.
array
(
padding_lens
)
def
wrapper
():
if
mode
==
"train"
:
batch_words
,
batch_labels
,
seq_lens
=
[],
[],
[]
for
epoch
in
xrange
(
args
.
epoch
):
for
instance
in
reader
.
file_reader
(
file_name
,
mode
,
max_seq_len
=
args
.
max_seq_len
)():
words
,
labels
,
words_len
=
instance
if
len
(
seq_lens
)
<
args
.
batch_size
:
batch_words
.
append
(
words
)
batch_labels
.
append
(
labels
)
seq_lens
.
append
(
words_len
)
if
len
(
seq_lens
)
==
args
.
batch_size
:
yield
batch_words
,
seq_lens
,
batch_labels
,
batch_labels
batch_words
,
batch_labels
,
seq_lens
=
[],
[],
[]
if
len
(
seq_lens
)
>
0
:
yield
batch_words
,
seq_lens
,
batch_labels
,
batch_labels
elif
mode
==
"test"
:
batch_words
,
batch_labels
,
seq_lens
,
max_len
=
[],
[],
[],
0
for
instance
in
reader
.
file_reader
(
file_name
,
mode
,
max_seq_len
=
args
.
max_seq_len
)():
words
,
labels
,
words_len
=
instance
max_len
=
words_len
if
words_len
>
max_len
else
max_len
if
len
(
seq_lens
)
<
args
.
batch_size
:
batch_words
.
append
(
words
)
seq_lens
.
append
(
words_len
)
batch_labels
.
append
(
labels
)
if
len
(
seq_lens
)
==
args
.
batch_size
:
padding_batch_words
=
padding_data
(
max_len
,
batch_words
)
padding_batch_labels
=
padding_data
(
max_len
,
batch_labels
)
yield
padding_batch_words
,
seq_lens
,
padding_batch_labels
,
padding_batch_labels
batch_words
,
batch_labels
,
seq_lens
,
max_len
=
[],
[],
[],
0
if
len
(
seq_lens
)
>
0
:
padding_batch_words
=
padding_data
(
max_len
,
batch_words
)
padding_batch_labels
=
padding_data
(
max_len
,
batch_labels
)
yield
padding_batch_words
,
seq_lens
,
padding_batch_labels
,
padding_batch_labels
else
:
else
:
batch_words
,
seq_lens
,
max_len
=
[],
[],
0
return
np
.
array
(
padding_batch_data
)
for
instance
in
reader
.
file_reader
(
file_name
,
mode
,
max_seq_len
=
args
.
max_seq_len
)():
if
phase
==
"train"
:
words
,
words_len
=
instance
batch_words
=
[
inst
[
0
]
for
inst
in
insts
]
if
len
(
seq_lens
)
<
args
.
batch_size
:
batch_labels
=
[
inst
[
1
]
for
inst
in
insts
]
batch_words
.
append
(
words
)
padding_batch_words
,
padding_lens
=
padding_data
(
seq_lens
.
append
(
words_len
)
args
.
max_seq_len
,
batch_words
,
if_len
=
True
)
max_len
=
words_len
if
words_len
>
max_len
else
max_len
padding_batch_labels
=
padding_data
(
args
.
max_seq_len
,
batch_labels
)
if
len
(
seq_lens
)
==
args
.
batch_size
:
return
[
padding_batch_words
=
padding_data
(
max_len
,
batch_words
)
padding_batch_words
,
padding_lens
,
padding_batch_labels
,
yield
padding_batch_words
,
seq_lens
padding_batch_labels
batch_words
,
seq_lens
,
max_len
=
[],
[],
0
]
if
len
(
seq_lens
)
>
0
:
elif
phase
==
"test"
:
padding_batch_words
=
padding_data
(
max_len
,
batch_words
)
batch_words
=
[
inst
[
0
]
for
inst
in
insts
]
yield
padding_batch_words
,
seq_lens
seq_len
=
[
len
(
inst
[
0
])
for
inst
in
insts
]
max_seq_len
=
max
(
seq_len
)
return
wrapper
batch_labels
=
[
inst
[
1
]
for
inst
in
insts
]
padding_batch_words
,
padding_lens
=
padding_data
(
max_seq_len
,
batch_words
,
if_len
=
True
)
def
create_dataloader
(
generator
,
place
,
feed_list
=
None
):
padding_batch_labels
=
padding_data
(
max_seq_len
,
batch_labels
)
if
not
feed_list
:
return
[
data_loader
=
paddle
.
io
.
DataLoader
.
from_generator
(
padding_batch_words
,
padding_lens
,
padding_batch_labels
,
capacity
=
50
,
padding_batch_labels
use_double_buffer
=
True
,
]
iterable
=
True
,
else
:
return_list
=
True
)
batch_words
=
insts
seq_len
=
[
len
(
inst
)
for
inst
in
insts
]
max_seq_len
=
max
(
seq_len
)
padding_batch_words
,
padding_lens
=
padding_data
(
max_seq_len
,
batch_words
,
if_len
=
True
)
return
[
padding_batch_words
,
padding_lens
]
class
LacDataLoader
(
object
):
def
__init__
(
self
,
args
,
place
,
phase
=
"train"
,
shuffle
=
False
,
num_workers
=
0
,
drop_last
=
False
):
assert
phase
in
[
"train"
,
"test"
,
"predict"
],
"phase should be in [train, test, predict], but get %s"
%
phase
if
phase
==
"train"
:
file_name
=
args
.
train_file
elif
phase
==
"test"
:
file_name
=
args
.
test_file
elif
phase
==
"predict"
:
file_name
=
args
.
predict_file
self
.
dataset
=
LacDataset
(
args
)
self
.
dataset
.
file_reader
(
file_name
,
phase
=
phase
)
if
phase
==
"train"
:
self
.
sampler
=
DistributedBatchSampler
(
dataset
=
self
.
dataset
,
batch_size
=
args
.
batch_size
,
shuffle
=
shuffle
,
drop_last
=
drop_last
)
else
:
else
:
data_loader
=
paddle
.
io
.
DataLoader
.
from_generator
(
self
.
sampler
=
BatchSampler
(
feed_list
=
feed_list
,
dataset
=
self
.
dataset
,
capacity
=
50
,
batch_size
=
args
.
batch_size
,
use_double_buffer
=
True
,
shuffle
=
shuffle
,
iterable
=
True
,
drop_last
=
drop_last
)
self
.
dataloader
=
DataLoader
(
dataset
=
self
.
dataset
,
batch_sampler
=
self
.
sampler
,
places
=
place
,
collate_fn
=
partial
(
create_lexnet_data_generator
,
args
,
phase
=
phase
),
num_workers
=
num_workers
,
return_list
=
True
)
return_list
=
True
)
data_loader
.
set_batch_generator
(
generator
,
places
=
place
)
return
data_loader
examples/sequence_tagging/sequence_tagging.yaml
浏览文件 @
f3e8f301
word_dict_path
:
"
./conf/word.dic"
word_dict_path
:
"
./conf/word.dic"
label_dict_path
:
"
./conf/tag.dic"
label_dict_path
:
"
./conf/tag.dic"
word_rep_dict_path
:
"
./conf/q2b.dic"
word_rep_dict_path
:
"
./conf/q2b.dic"
device
:
"
c
pu"
device
:
"
g
pu"
dynamic
:
True
dynamic
:
True
epoch
:
10
epoch
:
10
base_learning_rate
:
0.001
base_learning_rate
:
0.001
...
@@ -14,7 +14,7 @@ batch_size: 300
...
@@ -14,7 +14,7 @@ batch_size: 300
max_seq_len
:
126
max_seq_len
:
126
num_devices
:
1
num_devices
:
1
save_dir
:
"
model"
save_dir
:
"
model"
init_from_checkpoint
:
"
model_baseline/params
"
init_from_checkpoint
:
"
"
init_from_pretrain_model
:
"
"
init_from_pretrain_model
:
"
"
save_freq
:
1
save_freq
:
1
eval_freq
:
1
eval_freq
:
1
...
@@ -22,4 +22,3 @@ output_file: "predict.result"
...
@@ -22,4 +22,3 @@ output_file: "predict.result"
test_file
:
"
./data/test.tsv"
test_file
:
"
./data/test.tsv"
train_file
:
"
./data/train.tsv"
train_file
:
"
./data/train.tsv"
predict_file
:
"
./data/infer.tsv"
predict_file
:
"
./data/infer.tsv"
mode
:
"
train"
examples/sequence_tagging/train.py
浏览文件 @
f3e8f301
...
@@ -28,21 +28,23 @@ import numpy as np
...
@@ -28,21 +28,23 @@ import numpy as np
work_dir
=
os
.
path
.
dirname
(
os
.
path
.
dirname
(
os
.
path
.
abspath
(
__file__
)))
work_dir
=
os
.
path
.
dirname
(
os
.
path
.
dirname
(
os
.
path
.
abspath
(
__file__
)))
sys
.
path
.
append
(
os
.
path
.
join
(
work_dir
,
"../"
))
sys
.
path
.
append
(
os
.
path
.
join
(
work_dir
,
"../"
))
from
hapi.metrics
import
Metric
from
hapi.metrics
import
Metric
from
hapi.model
import
Model
,
Input
,
Loss
,
set_device
from
hapi.model
import
Model
,
Input
,
Loss
,
set_device
from
hapi.text.text
import
SequenceTagging
from
hapi.text.text
import
SequenceTagging
from
utils.check
import
check_gpu
,
check_version
from
utils.check
import
check_gpu
,
check_version
from
utils.configure
import
PDConfig
from
utils.configure
import
PDConfig
from
reader
import
LacDataset
,
create_lexnet_data_generator
,
create_dataloader
from
reader
import
LacDataset
,
LacDataLoader
import
paddle.fluid
as
fluid
import
paddle.fluid
as
fluid
from
paddle.fluid.optimizer
import
AdamOptimizer
from
paddle.fluid.optimizer
import
AdamOptimizer
__all__
=
[
"SeqTagging"
,
"LacLoss"
,
"ChunkEval"
]
class
SeqTagging
(
Model
):
class
SeqTagging
(
Model
):
def
__init__
(
self
,
args
,
vocab_size
,
num_labels
,
length
=
None
):
def
__init__
(
self
,
args
,
vocab_size
,
num_labels
,
length
=
None
,
mode
=
"train"
):
super
(
SeqTagging
,
self
).
__init__
()
super
(
SeqTagging
,
self
).
__init__
()
"""
"""
define the lexical analysis network structure
define the lexical analysis network structure
...
@@ -53,7 +55,7 @@ class SeqTagging(Model):
...
@@ -53,7 +55,7 @@ class SeqTagging(Model):
for infer: return the prediction
for infer: return the prediction
otherwise: return the prediction
otherwise: return the prediction
"""
"""
self
.
mode_type
=
args
.
mode
self
.
mode_type
=
mode
self
.
word_emb_dim
=
args
.
word_emb_dim
self
.
word_emb_dim
=
args
.
word_emb_dim
self
.
vocab_size
=
vocab_size
self
.
vocab_size
=
vocab_size
self
.
num_labels
=
num_labels
self
.
num_labels
=
num_labels
...
@@ -65,7 +67,7 @@ class SeqTagging(Model):
...
@@ -65,7 +67,7 @@ class SeqTagging(Model):
self
.
bigru_num
=
args
.
bigru_num
self
.
bigru_num
=
args
.
bigru_num
self
.
batch_size
=
args
.
batch_size
self
.
batch_size
=
args
.
batch_size
self
.
init_bound
=
0.1
self
.
init_bound
=
0.1
self
.
length
=
length
self
.
length
=
length
self
.
sequence_tagging
=
SequenceTagging
(
self
.
sequence_tagging
=
SequenceTagging
(
vocab_size
=
self
.
vocab_size
,
vocab_size
=
self
.
vocab_size
,
...
@@ -207,30 +209,25 @@ def main(args):
...
@@ -207,30 +209,25 @@ def main(args):
place
=
set_device
(
args
.
device
)
place
=
set_device
(
args
.
device
)
fluid
.
enable_dygraph
(
place
)
if
args
.
dynamic
else
None
fluid
.
enable_dygraph
(
place
)
if
args
.
dynamic
else
None
inputs
=
[
Input
([
None
,
None
],
'int64'
,
name
=
'words'
),
inputs
=
[
Input
([
None
],
'int64'
,
name
=
'length'
),
Input
(
Input
([
None
,
None
],
'int64'
,
name
=
'target'
)]
[
None
,
None
],
'int64'
,
name
=
'words'
),
Input
(
[
None
],
'int64'
,
name
=
'length'
),
Input
(
[
None
,
None
],
'int64'
,
name
=
'target'
)
]
labels
=
[
Input
([
None
,
None
],
'int64'
,
name
=
'labels'
)]
labels
=
[
Input
([
None
,
None
],
'int64'
,
name
=
'labels'
)]
feed_list
=
None
if
args
.
dynamic
else
[
x
.
forward
()
for
x
in
inputs
+
labels
]
feed_list
=
None
if
args
.
dynamic
else
[
dataset
=
LacDataset
(
args
)
x
.
forward
()
for
x
in
inputs
+
labels
train_path
=
args
.
train_file
]
test_path
=
args
.
test_file
train_generator
=
create_lexnet_data_generator
(
dataset
=
LacDataset
(
args
)
args
,
reader
=
dataset
,
file_name
=
train_path
,
place
=
place
,
mode
=
"train"
)
train_dataset
=
LacDataLoader
(
args
,
place
,
phase
=
"train"
)
test_generator
=
create_lexnet_data_generator
(
args
,
reader
=
dataset
,
file_name
=
test_path
,
place
=
place
,
mode
=
"test"
)
train_dataset
=
create_dataloader
(
train_generator
,
place
,
feed_list
=
feed_list
)
test_dataset
=
create_dataloader
(
test_generator
,
place
,
feed_list
=
feed_list
)
vocab_size
=
dataset
.
vocab_size
vocab_size
=
dataset
.
vocab_size
num_labels
=
dataset
.
num_labels
num_labels
=
dataset
.
num_labels
model
=
SeqTagging
(
args
,
vocab_size
,
num_labels
)
model
=
SeqTagging
(
args
,
vocab_size
,
num_labels
,
mode
=
"train"
)
optim
=
AdamOptimizer
(
optim
=
AdamOptimizer
(
learning_rate
=
args
.
base_learning_rate
,
learning_rate
=
args
.
base_learning_rate
,
...
@@ -250,8 +247,7 @@ def main(args):
...
@@ -250,8 +247,7 @@ def main(args):
if
args
.
init_from_pretrain_model
:
if
args
.
init_from_pretrain_model
:
model
.
load
(
args
.
init_from_pretrain_model
,
reset_optimizer
=
True
)
model
.
load
(
args
.
init_from_pretrain_model
,
reset_optimizer
=
True
)
model
.
fit
(
train_dataset
,
model
.
fit
(
train_dataset
.
dataloader
,
test_dataset
,
epochs
=
args
.
epoch
,
epochs
=
args
.
epoch
,
batch_size
=
args
.
batch_size
,
batch_size
=
args
.
batch_size
,
eval_freq
=
args
.
eval_freq
,
eval_freq
=
args
.
eval_freq
,
...
...
examples/sequence_tagging/utils/configure.py
浏览文件 @
f3e8f301
...
@@ -195,13 +195,19 @@ class PDConfig(object):
...
@@ -195,13 +195,19 @@ class PDConfig(object):
"Whether to perform predicting."
)
"Whether to perform predicting."
)
self
.
default_g
.
add_arg
(
"do_eval"
,
bool
,
False
,
self
.
default_g
.
add_arg
(
"do_eval"
,
bool
,
False
,
"Whether to perform evaluating."
)
"Whether to perform evaluating."
)
self
.
default_g
.
add_arg
(
"do_save_inference_model"
,
bool
,
False
,
self
.
default_g
.
add_arg
(
"do_save_inference_model"
,
bool
,
False
,
"Whether to perform model saving for inference."
)
"Whether to perform model saving for inference."
)
# NOTE: args for profiler
# NOTE: args for profiler
self
.
default_g
.
add_arg
(
"is_profiler"
,
int
,
0
,
"the switch of profiler tools. (used for benchmark)"
)
self
.
default_g
.
add_arg
(
self
.
default_g
.
add_arg
(
"profiler_path"
,
str
,
'./'
,
"the profiler output file path. (used for benchmark)"
)
"is_profiler"
,
int
,
0
,
self
.
default_g
.
add_arg
(
"max_iter"
,
int
,
0
,
"the max train batch num.(used for benchmark)"
)
"the switch of profiler tools. (used for benchmark)"
)
self
.
default_g
.
add_arg
(
"profiler_path"
,
str
,
'./'
,
"the profiler output file path. (used for benchmark)"
)
self
.
default_g
.
add_arg
(
"max_iter"
,
int
,
0
,
"the max train batch num.(used for benchmark)"
)
self
.
parser
=
parser
self
.
parser
=
parser
...
...
examples/sequence_tagging/utils/metrics.py
浏览文件 @
f3e8f301
...
@@ -63,8 +63,8 @@ def chunk_count(infer_numpy, label_numpy, seq_len, id2label_dict):
...
@@ -63,8 +63,8 @@ def chunk_count(infer_numpy, label_numpy, seq_len, id2label_dict):
assert
infer_numpy
.
shape
[
0
]
==
label_numpy
.
shape
[
0
]
assert
infer_numpy
.
shape
[
0
]
==
label_numpy
.
shape
[
0
]
for
i
in
range
(
infer_numpy
.
shape
[
0
]):
for
i
in
range
(
infer_numpy
.
shape
[
0
]):
infer_list
=
infer_numpy
[
i
][:
seq_len
[
i
]]
infer_list
=
infer_numpy
[
i
][:
seq_len
[
i
]]
label_list
=
label_numpy
[
i
][:
seq_len
[
i
]]
label_list
=
label_numpy
[
i
][:
seq_len
[
i
]]
infer_dict
=
build_chunk
(
infer_list
,
id2label_dict
)
infer_dict
=
build_chunk
(
infer_list
,
id2label_dict
)
num_infer_chunks
+=
len
(
infer_dict
)
num_infer_chunks
+=
len
(
infer_dict
)
label_dict
=
build_chunk
(
label_list
,
id2label_dict
)
label_dict
=
build_chunk
(
label_list
,
id2label_dict
)
...
@@ -73,4 +73,3 @@ def chunk_count(infer_numpy, label_numpy, seq_len, id2label_dict):
...
@@ -73,4 +73,3 @@ def chunk_count(infer_numpy, label_numpy, seq_len, id2label_dict):
if
key
in
label_dict
and
label_dict
[
key
]
==
infer_dict
[
key
]:
if
key
in
label_dict
and
label_dict
[
key
]
==
infer_dict
[
key
]:
num_correct_chunks
+=
1
num_correct_chunks
+=
1
return
num_infer_chunks
,
num_label_chunks
,
num_correct_chunks
return
num_infer_chunks
,
num_label_chunks
,
num_correct_chunks
hapi/text/text.py
浏览文件 @
f3e8f301
...
@@ -19,6 +19,7 @@ from __future__ import print_function
...
@@ -19,6 +19,7 @@ from __future__ import print_function
import
os
import
os
import
six
import
six
import
sys
import
sys
if
six
.
PY2
:
if
six
.
PY2
:
reload
(
sys
)
reload
(
sys
)
sys
.
setdefaultencoding
(
'utf8'
)
sys
.
setdefaultencoding
(
'utf8'
)
...
@@ -37,7 +38,7 @@ import paddle
...
@@ -37,7 +38,7 @@ import paddle
import
paddle.fluid
as
fluid
import
paddle.fluid
as
fluid
import
paddle.fluid.layers.utils
as
utils
import
paddle.fluid.layers.utils
as
utils
from
paddle.fluid.layers.utils
import
map_structure
,
flatten
,
pack_sequence_as
from
paddle.fluid.layers.utils
import
map_structure
,
flatten
,
pack_sequence_as
from
paddle.fluid.dygraph
import
to_variable
,
Embedding
,
Linear
,
LayerNorm
,
GRUUnit
from
paddle.fluid.dygraph
import
to_variable
,
Embedding
,
Linear
,
LayerNorm
,
GRUUnit
,
Conv2D
from
paddle.fluid.data_feeder
import
convert_dtype
from
paddle.fluid.data_feeder
import
convert_dtype
from
paddle.fluid
import
layers
from
paddle.fluid
import
layers
...
@@ -49,7 +50,8 @@ __all__ = [
...
@@ -49,7 +50,8 @@ __all__ = [
'BeamSearchDecoder'
,
'MultiHeadAttention'
,
'FFN'
,
'BeamSearchDecoder'
,
'MultiHeadAttention'
,
'FFN'
,
'TransformerEncoderLayer'
,
'TransformerEncoder'
,
'TransformerDecoderLayer'
,
'TransformerEncoderLayer'
,
'TransformerEncoder'
,
'TransformerDecoderLayer'
,
'TransformerDecoder'
,
'TransformerBeamSearchDecoder'
,
'Linear_chain_crf'
,
'TransformerDecoder'
,
'TransformerBeamSearchDecoder'
,
'Linear_chain_crf'
,
'Crf_decoding'
,
'SequenceTagging'
'Crf_decoding'
,
'SequenceTagging'
,
'GRUEncoderLayer'
,
'CNNEncoder'
,
'BOWEncoder'
,
'SimpleConvPoolLayer'
,
'GRUEncoder'
,
'DynamicGRU'
,
'LSTMEncoder'
]
]
...
@@ -87,12 +89,12 @@ class RNNCell(Layer):
...
@@ -87,12 +89,12 @@ class RNNCell(Layer):
batch_ref
=
flatten
(
batch_ref
)[
0
]
batch_ref
=
flatten
(
batch_ref
)[
0
]
def
_is_shape_sequence
(
seq
):
def
_is_shape_sequence
(
seq
):
if
sys
.
version_info
<
(
3
,
):
if
sys
.
version_info
<
(
3
,):
integer_types
=
(
integer_types
=
(
int
,
int
,
long
,
)
long
,)
else
:
else
:
integer_types
=
(
int
,
)
integer_types
=
(
int
,)
"""For shape, list/tuple of integer is the finest-grained objection"""
"""For shape, list/tuple of integer is the finest-grained objection"""
if
(
isinstance
(
seq
,
list
)
or
isinstance
(
seq
,
tuple
)):
if
(
isinstance
(
seq
,
list
)
or
isinstance
(
seq
,
tuple
)):
if
reduce
(
if
reduce
(
...
@@ -763,7 +765,7 @@ class BasicGRUCell(RNNCell):
...
@@ -763,7 +765,7 @@ class BasicGRUCell(RNNCell):
c
=
self
.
_activation
(
candidate
)
c
=
self
.
_activation
(
candidate
)
new_hidden
=
u
*
pre_hidden
+
(
1
-
u
)
*
c
new_hidden
=
u
*
pre_hidden
+
(
1
-
u
)
*
c
return
new_hidden
return
new_hidden
,
new_hidden
@
property
@
property
def
state_shape
(
self
):
def
state_shape
(
self
):
...
@@ -1217,7 +1219,7 @@ class MultiHeadAttention(Layer):
...
@@ -1217,7 +1219,7 @@ class MultiHeadAttention(Layer):
# scale dot product attention
# scale dot product attention
product
=
layers
.
matmul
(
product
=
layers
.
matmul
(
x
=
q
,
y
=
k
,
transpose_y
=
True
,
alpha
=
self
.
d_model
**
-
0.5
)
x
=
q
,
y
=
k
,
transpose_y
=
True
,
alpha
=
self
.
d_model
**
-
0.5
)
if
attn_bias
:
if
attn_bias
:
product
+=
attn_bias
product
+=
attn_bias
weights
=
layers
.
softmax
(
product
)
weights
=
layers
.
softmax
(
product
)
...
@@ -1307,7 +1309,6 @@ class TransformerEncoderLayer(Layer):
...
@@ -1307,7 +1309,6 @@ class TransformerEncoderLayer(Layer):
reused_ffn_weights
=
{
"reused_fc1"
:
None
,
reused_ffn_weights
=
{
"reused_fc1"
:
None
,
"reused_fc2"
:
None
},
"reused_fc2"
:
None
},
reused_post_ffn_layernorm
=
None
):
reused_post_ffn_layernorm
=
None
):
super
(
TransformerEncoderLayer
,
self
).
__init__
()
super
(
TransformerEncoderLayer
,
self
).
__init__
()
self
.
preprocesser1
=
PrePostProcessLayer
(
preprocess_cmd
,
d_model
,
self
.
preprocesser1
=
PrePostProcessLayer
(
preprocess_cmd
,
d_model
,
...
@@ -1555,7 +1556,7 @@ class TransformerDecoder(Layer):
...
@@ -1555,7 +1556,7 @@ class TransformerDecoder(Layer):
]
]
#TODO: we should merge GRUCell with BasicGRUCell
#
TODO: we should merge GRUCell with BasicGRUCell
class
GRUCell
(
RNNCell
):
class
GRUCell
(
RNNCell
):
def
__init__
(
self
,
def
__init__
(
self
,
input_size
,
input_size
,
...
@@ -1589,7 +1590,7 @@ class GRUCell(RNNCell):
...
@@ -1589,7 +1590,7 @@ class GRUCell(RNNCell):
return
[
self
.
hidden_size
]
return
[
self
.
hidden_size
]
#TODO: we should merge GRUCell with BasicGRUCell
#
TODO: we should merge GRUCell with BasicGRUCell
class
GRUEncoderCell
(
RNNCell
):
class
GRUEncoderCell
(
RNNCell
):
def
__init__
(
self
,
def
__init__
(
self
,
num_layers
,
num_layers
,
...
@@ -1605,7 +1606,7 @@ class GRUEncoderCell(RNNCell):
...
@@ -1605,7 +1606,7 @@ class GRUEncoderCell(RNNCell):
self
.
gru_cells
.
append
(
self
.
gru_cells
.
append
(
self
.
add_sublayer
(
self
.
add_sublayer
(
"gru_%d"
%
i
,
"gru_%d"
%
i
,
#BasicGRUCell(
#
BasicGRUCell(
GRUCell
(
GRUCell
(
input_size
=
input_size
if
i
==
0
else
hidden_size
,
input_size
=
input_size
if
i
==
0
else
hidden_size
,
hidden_size
=
hidden_size
,
hidden_size
=
hidden_size
,
...
@@ -1672,7 +1673,6 @@ class Linear_chain_crf(fluid.dygraph.Layer):
...
@@ -1672,7 +1673,6 @@ class Linear_chain_crf(fluid.dygraph.Layer):
self
.
_transition
=
value
self
.
_transition
=
value
def
forward
(
self
,
input
,
label
,
length
=
None
):
def
forward
(
self
,
input
,
label
,
length
=
None
):
alpha
=
self
.
_helper
.
create_variable_for_type_inference
(
alpha
=
self
.
_helper
.
create_variable_for_type_inference
(
dtype
=
self
.
_dtype
)
dtype
=
self
.
_dtype
)
emission_exps
=
self
.
_helper
.
create_variable_for_type_inference
(
emission_exps
=
self
.
_helper
.
create_variable_for_type_inference
(
...
@@ -1723,7 +1723,6 @@ class Crf_decoding(fluid.dygraph.Layer):
...
@@ -1723,7 +1723,6 @@ class Crf_decoding(fluid.dygraph.Layer):
self
.
_transition
=
value
self
.
_transition
=
value
def
forward
(
self
,
input
,
label
=
None
,
length
=
None
):
def
forward
(
self
,
input
,
label
=
None
,
length
=
None
):
viterbi_path
=
self
.
_helper
.
create_variable_for_type_inference
(
viterbi_path
=
self
.
_helper
.
create_variable_for_type_inference
(
dtype
=
self
.
_dtype
)
dtype
=
self
.
_dtype
)
this_inputs
=
{
this_inputs
=
{
...
@@ -1741,6 +1740,64 @@ class Crf_decoding(fluid.dygraph.Layer):
...
@@ -1741,6 +1740,64 @@ class Crf_decoding(fluid.dygraph.Layer):
return
viterbi_path
return
viterbi_path
class
GRUEncoderLayer
(
Layer
):
def
__init__
(
self
,
input_dim
,
grnn_hidden_dim
,
init_bound
,
num_layers
=
1
,
h_0
=
None
,
is_bidirection
=
False
):
super
(
GRUEncoderLayer
,
self
).
__init__
()
self
.
h_0
=
h_0
self
.
num_layers
=
num_layers
self
.
is_bidirection
=
is_bidirection
self
.
gru_list
=
[]
self
.
gru_r_list
=
[]
for
i
in
range
(
num_layers
):
self
.
basic_gru_cell
=
BasicGRUCell
(
input_size
=
input_dim
if
i
==
0
else
input_dim
*
2
,
hidden_size
=
grnn_hidden_dim
,
param_attr
=
fluid
.
ParamAttr
(
initializer
=
fluid
.
initializer
.
UniformInitializer
(
low
=-
init_bound
,
high
=
init_bound
),
regularizer
=
fluid
.
regularizer
.
L2DecayRegularizer
(
regularization_coeff
=
1e-4
)))
self
.
gru_list
.
append
(
self
.
add_sublayer
(
"gru_%d"
%
i
,
RNN
(
self
.
basic_gru_cell
,
is_reverse
=
False
,
time_major
=
False
)))
if
self
.
is_bidirection
:
for
i
in
range
(
num_layers
):
self
.
basic_gru_cell_r
=
BasicGRUCell
(
input_size
=
input_dim
if
i
==
0
else
input_dim
*
2
,
hidden_size
=
grnn_hidden_dim
,
param_attr
=
fluid
.
ParamAttr
(
initializer
=
fluid
.
initializer
.
UniformInitializer
(
low
=-
init_bound
,
high
=
init_bound
),
regularizer
=
fluid
.
regularizer
.
L2DecayRegularizer
(
regularization_coeff
=
1e-4
)))
self
.
gru_r_list
.
append
(
self
.
add_sublayer
(
"gru_r_%d"
%
i
,
RNN
(
self
.
basic_gru_cell_r
,
is_reverse
=
True
,
time_major
=
False
)))
def
forward
(
self
,
input_feature
):
for
i
in
range
(
self
.
num_layers
):
pre_gru
,
pre_state
=
self
.
gru_list
[
i
](
input_feature
)
if
self
.
is_bidirection
:
gru_r
,
r_state
=
self
.
gru_r_list
[
i
](
input_feature
)
out
=
fluid
.
layers
.
concat
(
input
=
[
pre_gru
,
gru_r
],
axis
=-
1
)
else
:
out
=
pre_gru
input_feature
=
out
return
out
class
SequenceTagging
(
fluid
.
dygraph
.
Layer
):
class
SequenceTagging
(
fluid
.
dygraph
.
Layer
):
def
__init__
(
self
,
def
__init__
(
self
,
vocab_size
,
vocab_size
,
...
@@ -1790,26 +1847,13 @@ class SequenceTagging(fluid.dygraph.Layer):
...
@@ -1790,26 +1847,13 @@ class SequenceTagging(fluid.dygraph.Layer):
force_cpu
=
True
,
force_cpu
=
True
,
name
=
'h_0'
)
name
=
'h_0'
)
self
.
bigru_units
=
[]
self
.
gru_encoder
=
GRUEncoderLayer
(
for
i
in
range
(
self
.
bigru_num
):
input_dim
=
self
.
grnn_hidden_dim
,
if
i
==
0
:
grnn_hidden_dim
=
self
.
grnn_hidden_dim
,
self
.
bigru_units
.
append
(
init_bound
=
self
.
init_bound
,
self
.
add_sublayer
(
num_layers
=
self
.
bigru_num
,
"bigru_units%d"
%
i
,
h_0
=
h_0
,
BiGRU
(
is_bidirection
=
True
)
self
.
grnn_hidden_dim
,
self
.
grnn_hidden_dim
,
self
.
init_bound
,
h_0
=
h_0
)))
else
:
self
.
bigru_units
.
append
(
self
.
add_sublayer
(
"bigru_units%d"
%
i
,
BiGRU
(
self
.
grnn_hidden_dim
*
2
,
self
.
grnn_hidden_dim
,
self
.
init_bound
,
h_0
=
h_0
)))
self
.
fc
=
Linear
(
self
.
fc
=
Linear
(
input_dim
=
self
.
grnn_hidden_dim
*
2
,
input_dim
=
self
.
grnn_hidden_dim
*
2
,
...
@@ -1837,10 +1881,7 @@ class SequenceTagging(fluid.dygraph.Layer):
...
@@ -1837,10 +1881,7 @@ class SequenceTagging(fluid.dygraph.Layer):
word_embed
=
self
.
word_embedding
(
word
)
word_embed
=
self
.
word_embedding
(
word
)
input_feature
=
word_embed
input_feature
=
word_embed
for
i
in
range
(
self
.
bigru_num
):
bigru_output
=
self
.
gru_encoder
(
input_feature
)
bigru_output
=
self
.
bigru_units
[
i
](
input_feature
)
input_feature
=
bigru_output
emission
=
self
.
fc
(
bigru_output
)
emission
=
self
.
fc
(
bigru_output
)
if
target
is
not
None
:
if
target
is
not
None
:
...
@@ -1854,3 +1895,227 @@ class SequenceTagging(fluid.dygraph.Layer):
...
@@ -1854,3 +1895,227 @@ class SequenceTagging(fluid.dygraph.Layer):
self
.
linear_chain_crf
.
weight
=
self
.
crf_decoding
.
weight
self
.
linear_chain_crf
.
weight
=
self
.
crf_decoding
.
weight
crf_decode
=
self
.
crf_decoding
(
input
=
emission
,
length
=
lengths
)
crf_decode
=
self
.
crf_decoding
(
input
=
emission
,
length
=
lengths
)
return
crf_decode
,
lengths
return
crf_decode
,
lengths
class
SimpleConvPoolLayer
(
Layer
):
def
__init__
(
self
,
num_channels
,
num_filters
,
filter_size
,
use_cudnn
=
False
,
act
=
None
):
super
(
SimpleConvPoolLayer
,
self
).
__init__
()
self
.
_conv2d
=
Conv2D
(
num_channels
=
num_channels
,
num_filters
=
num_filters
,
filter_size
=
filter_size
,
padding
=
[
1
,
1
],
use_cudnn
=
use_cudnn
,
act
=
act
)
def
forward
(
self
,
input
):
x
=
self
.
_conv2d
(
input
)
x
=
fluid
.
layers
.
reduce_max
(
x
,
dim
=-
1
)
x
=
fluid
.
layers
.
reshape
(
x
,
shape
=
[
x
.
shape
[
0
],
-
1
])
return
x
class
CNNEncoder
(
Layer
):
"""
simple CNNEncoder for simnet
"""
def
__init__
(
self
,
dict_size
,
emb_dim
,
filter_size
,
num_filters
,
hidden_dim
,
seq_len
,
padding_idx
,
act
):
super
(
CNNEncoder
,
self
).
__init__
()
self
.
dict_size
=
dict_size
self
.
emb_dim
=
emb_dim
self
.
filter_size
=
filter_size
self
.
num_filters
=
num_filters
self
.
hidden_dim
=
hidden_dim
self
.
seq_len
=
seq_len
self
.
padding_idx
=
padding_idx
self
.
act
=
act
self
.
channels
=
1
self
.
emb_layer
=
Embedding
(
size
=
[
self
.
dict_size
,
self
.
emb_dim
],
is_sparse
=
True
,
padding_idx
=
self
.
padding_idx
,
param_attr
=
fluid
.
ParamAttr
(
name
=
'emb'
,
initializer
=
fluid
.
initializer
.
Xavier
()))
self
.
cnn_layer
=
SimpleConvPoolLayer
(
self
.
channels
,
self
.
num_filters
,
self
.
filter_size
,
use_cudnn
=
False
,
act
=
self
.
act
)
def
forward
(
self
,
input
):
emb
=
self
.
emb_layer
(
input
)
emb_reshape
=
fluid
.
layers
.
reshape
(
emb
,
shape
=
[
-
1
,
self
.
channels
,
self
.
seq_len
,
self
.
hidden_dim
])
emb_out
=
self
.
cnn_layer
(
emb_reshape
)
return
emb_out
class
BOWEncoder
(
Layer
):
"""
simple BOWEncoder for simnet
"""
def
__init__
(
self
,
dict_size
,
emb_dim
,
bow_dim
,
seq_len
,
padding_idx
):
super
(
BOWEncoder
,
self
).
__init__
()
self
.
dict_size
=
dict_size
self
.
bow_dim
=
bow_dim
self
.
seq_len
=
seq_len
self
.
emb_dim
=
emb_dim
self
.
padding_idx
=
padding_idx
self
.
emb_layer
=
Embedding
(
size
=
[
self
.
dict_size
,
self
.
emb_dim
],
is_sparse
=
True
,
padding_idx
=
self
.
padding_idx
,
param_attr
=
fluid
.
ParamAttr
(
name
=
'emb'
,
initializer
=
fluid
.
initializer
.
Xavier
()))
def
forward
(
self
,
input
):
emb
=
self
.
emb_layer
(
input
)
emb_reshape
=
fluid
.
layers
.
reshape
(
emb
,
shape
=
[
-
1
,
self
.
seq_len
,
self
.
bow_dim
])
bow_emb
=
fluid
.
layers
.
reduce_sum
(
emb_reshape
,
dim
=
1
)
return
bow_emb
class
DynamicGRU
(
fluid
.
dygraph
.
Layer
):
def
__init__
(
self
,
size
,
h_0
=
None
,
param_attr
=
None
,
bias_attr
=
None
,
is_reverse
=
False
,
gate_activation
=
'sigmoid'
,
candidate_activation
=
'tanh'
,
origin_mode
=
False
,
init_size
=
None
):
super
(
DynamicGRU
,
self
).
__init__
()
self
.
gru_unit
=
GRUUnit
(
size
*
3
,
param_attr
=
param_attr
,
bias_attr
=
bias_attr
,
activation
=
candidate_activation
,
gate_activation
=
gate_activation
,
origin_mode
=
origin_mode
)
self
.
size
=
size
self
.
h_0
=
h_0
self
.
is_reverse
=
is_reverse
def
forward
(
self
,
inputs
):
hidden
=
self
.
h_0
res
=
[]
for
i
in
range
(
inputs
.
shape
[
1
]):
if
self
.
is_reverse
:
i
=
inputs
.
shape
[
1
]
-
1
-
i
input_
=
inputs
[:,
i
:
i
+
1
,
:]
input_
=
fluid
.
layers
.
reshape
(
input_
,
[
-
1
,
input_
.
shape
[
2
]],
inplace
=
False
)
hidden
,
reset
,
gate
=
self
.
gru_unit
(
input_
,
hidden
)
hidden_
=
fluid
.
layers
.
reshape
(
hidden
,
[
-
1
,
1
,
hidden
.
shape
[
1
]],
inplace
=
False
)
res
.
append
(
hidden_
)
if
self
.
is_reverse
:
res
=
res
[::
-
1
]
res
=
fluid
.
layers
.
concat
(
res
,
axis
=
1
)
return
res
class
GRUEncoder
(
Layer
):
"""
simple GRUEncoder for simnet
"""
def
__init__
(
self
,
dict_size
,
emb_dim
,
gru_dim
,
hidden_dim
,
padding_idx
,
seq_len
):
super
(
GRUEncoder
,
self
).
__init__
()
self
.
dict_size
=
dict_size
self
.
emb_dim
=
emb_dim
self
.
gru_dim
=
gru_dim
self
.
seq_len
=
seq_len
self
.
hidden_dim
=
hidden_dim
self
.
padding_idx
=
self
.
padding_idx
self
.
emb_layer
=
Embedding
(
size
=
[
self
.
dict_size
,
self
.
emb_dim
],
is_sparse
=
True
,
padding_idx
=
self
.
padding_idx
,
param_attr
=
fluid
.
ParamAttr
(
name
=
'emb'
,
initializer
=
fluid
.
initializer
.
Xavier
()))
self
.
gru_layer
=
DynamicGRU
(
self
.
gru_dim
)
self
.
proj_layer
=
Linear
(
input_dim
=
self
.
hidden_dim
,
output_dim
=
self
.
gru_dim
*
3
)
def
forward
(
self
,
input
):
emb
=
self
.
emb_layer
(
input
)
emb_proj
=
self
.
proj_layer
(
emb
)
h_0
=
np
.
zeros
((
emb_proj
.
shape
[
0
],
self
.
hidden_dim
),
dtype
=
"float32"
)
h_0
=
to_variable
(
h_0
)
gru
=
self
.
gru_layer
(
emb_proj
,
h_0
=
h_0
)
gru
=
fluid
.
layers
.
reduce_max
(
gru
,
dim
=
1
)
gru
=
fluid
.
layers
.
tanh
(
gru
)
return
gru
class
LSTMEncoder
(
Layer
):
"""
simple LSTMEncoder for simnet
"""
def
__init__
(
self
,
dict_size
,
emb_dim
,
lstm_dim
,
hidden_dim
,
seq_len
,
padding_idx
,
is_reverse
):
"""
initialize
"""
super
(
LSTMEncoder
,
self
).
__init__
()
self
.
dict_size
=
dict_size
self
.
emb_dim
=
emb_dim
self
.
lstm_dim
=
lstm_dim
self
.
hidden_dim
=
hidden_dim
self
.
seq_len
=
seq_len
self
.
is_reverse
=
False
self
.
padding_idx
=
padding_idx
self
.
emb_layer
=
Embedding
(
size
=
[
self
.
dict_size
,
self
.
emb_dim
],
is_sparse
=
True
,
padding_idx
=
self
.
padding_idx
,
param_attr
=
fluid
.
ParamAttr
(
name
=
'emb'
,
initializer
=
fluid
.
initializer
.
Xavier
()))
self
.
lstm_cell
=
BasicLSTMCell
(
hidden_size
=
self
.
lstm_dim
,
input_size
=
self
.
lstm_dim
*
4
)
self
.
lstm_layer
=
RNN
(
cell
=
self
.
lstm_cell
,
time_major
=
True
,
is_reverse
=
self
.
is_reverse
)
self
.
proj_layer
=
Linear
(
input_dim
=
self
.
hidden_dim
,
output_dim
=
self
.
lstm_dim
*
4
)
def
forward
(
self
,
input
):
emb
=
self
.
emb_layer
(
input
)
emb_proj
=
self
.
proj_layer
(
emb
)
emb_lstm
,
_
=
self
.
lstm_layer
(
emb_proj
)
emb_reduce
=
fluid
.
layers
.
reduce_max
(
emb_lstm
,
dim
=
1
)
emb_reshape
=
fluid
.
layers
.
reshape
(
emb_reduce
,
shape
=
[
-
1
,
self
.
seq_len
,
self
.
hidden_dim
])
emb_lstm
=
fluid
.
layers
.
reduce_sum
(
emb_reshape
,
dim
=
1
)
emb_last
=
fluid
.
layers
.
tanh
(
emb_lstm
)
return
emb_last
编辑
预览
Markdown
is supported
0%
请重试
或
添加新附件
.
添加附件
取消
You are about to add
0
people
to the discussion. Proceed with caution.
先完成此消息的编辑!
取消
想要评论请
注册
或
登录