Skip to content
体验新版
项目
组织
正在加载...
登录
切换导航
打开侧边栏
PaddlePaddle
PaddleHub
提交
afba7194
P
PaddleHub
项目概览
PaddlePaddle
/
PaddleHub
1 年多 前同步成功
通知
284
Star
12117
Fork
2091
代码
文件
提交
分支
Tags
贡献者
分支图
Diff
Issue
200
列表
看板
标记
里程碑
合并请求
4
Wiki
0
Wiki
分析
仓库
DevOps
项目成员
Pages
P
PaddleHub
项目概览
项目概览
详情
发布
仓库
仓库
文件
提交
分支
标签
贡献者
分支图
比较
Issue
200
Issue
200
列表
看板
标记
里程碑
合并请求
4
合并请求
4
Pages
分析
分析
仓库分析
DevOps
Wiki
0
Wiki
成员
成员
收起侧边栏
关闭侧边栏
动态
分支图
创建新Issue
提交
Issue看板
未验证
提交
afba7194
编写于
9月 16, 2022
作者:
jm_12138
提交者:
GitHub
9月 16, 2022
浏览文件
操作
浏览文件
下载
电子邮件补丁
差异文件
update lac (#2025)
Co-authored-by:
N
wuzewu
<
wuzewu@baidu.com
>
Co-authored-by:
N
chenjian
<
chenjian26@baidu.com
>
上级
cfd8f7f5
变更
6
隐藏空白更改
内联
并排
Showing
6 changed file
with
115 addition
and
112 deletion
+115
-112
modules/text/lexical_analysis/lac/README.md
modules/text/lexical_analysis/lac/README.md
+2
-2
modules/text/lexical_analysis/lac/module.py
modules/text/lexical_analysis/lac/module.py
+14
-18
modules/text/lexical_analysis/lac/network.py
modules/text/lexical_analysis/lac/network.py
+0
-87
modules/text/lexical_analysis/lac/processor.py
modules/text/lexical_analysis/lac/processor.py
+0
-1
modules/text/lexical_analysis/lac/test.py
modules/text/lexical_analysis/lac/test.py
+99
-0
modules/text/lexical_analysis/lac/user.dict
modules/text/lexical_analysis/lac/user.dict
+0
-4
未找到文件。
modules/text/lexical_analysis/lac/README.md
浏览文件 @
afba7194
...
...
@@ -283,10 +283,10 @@
升级自定义词典功能,支持增加不属于lac默认提供的词性
*
2.
2.1
*
2.
3.0
移除 fluid api
-
```shell
$ hub install lac==2.
2.1
$ hub install lac==2.
3.0
```
modules/text/lexical_analysis/lac/module.py
浏览文件 @
afba7194
...
...
@@ -6,25 +6,20 @@ from __future__ import print_function
import
argparse
import
ast
import
io
import
json
import
math
import
os
import
numpy
as
np
import
paddle
import
six
from
lac
.custom
import
Customization
from
lac
.processor
import
load_kv_dict
from
lac
.processor
import
parse_result
from
lac
.processor
import
word_to_ids
from
.custom
import
Customization
from
.processor
import
load_kv_dict
from
.processor
import
parse_result
from
.processor
import
word_to_ids
from
paddle.inference
import
Config
from
paddle.inference
import
create_predictor
import
paddlehub
as
hub
from
paddlehub.common.logger
import
logger
from
paddlehub.common.paddle_helper
import
add_vars_prefix
from
paddlehub.common.utils
import
sys_stdin_encoding
from
paddlehub.io.parser
import
txt_parser
from
paddlehub.utils.utils
import
sys_stdin_encoding
from
paddlehub.utils.parser
import
txt_parser
from
paddlehub.module.module
import
moduleinfo
from
paddlehub.module.module
import
runnable
from
paddlehub.module.module
import
serving
...
...
@@ -38,19 +33,18 @@ class DataFormatError(Exception):
@
moduleinfo
(
name
=
"lac"
,
version
=
"2.
2.1
"
,
version
=
"2.
3.0
"
,
summary
=
"Baidu's open-source lexical analysis tool for Chinese, including word segmentation, part-of-speech tagging & named entity recognition"
,
author
=
"baidu-nlp"
,
author_email
=
"paddle-dev@baidu.com"
,
type
=
"nlp/lexical_analysis"
)
class
LAC
(
hub
.
Module
):
def
_initialize
(
self
,
user_dict
=
None
):
class
LAC
:
def
__init__
(
self
,
user_dict
=
None
):
"""
initialize with the necessary elements
"""
self
.
pretrained_model_path
=
os
.
path
.
join
(
self
.
directory
,
"infer_
model"
)
self
.
default_pretrained_model_path
=
os
.
path
.
join
(
self
.
directory
,
"infer_model"
,
"
model"
)
self
.
word2id_dict
=
load_kv_dict
(
os
.
path
.
join
(
self
.
directory
,
"assets/word.dic"
),
reverse
=
True
,
value_func
=
int
)
self
.
id2word_dict
=
load_kv_dict
(
os
.
path
.
join
(
self
.
directory
,
"assets/word.dic"
))
self
.
label2id_dict
=
load_kv_dict
(
os
.
path
.
join
(
self
.
directory
,
"assets/tag.dic"
),
reverse
=
True
,
value_func
=
int
)
...
...
@@ -72,7 +66,9 @@ class LAC(hub.Module):
"""
predictor config setting
"""
cpu_config
=
Config
(
self
.
pretrained_model_path
)
model
=
self
.
default_pretrained_model_path
+
'.pdmodel'
params
=
self
.
default_pretrained_model_path
+
'.pdiparams'
cpu_config
=
Config
(
model
,
params
)
cpu_config
.
disable_glog_info
()
cpu_config
.
disable_gpu
()
self
.
cpu_predictor
=
create_predictor
(
cpu_config
)
...
...
@@ -84,7 +80,7 @@ class LAC(hub.Module):
except
:
use_gpu
=
False
if
use_gpu
:
gpu_config
=
Config
(
self
.
pretrained_model_path
)
gpu_config
=
Config
(
model
,
params
)
gpu_config
.
disable_glog_info
()
gpu_config
.
enable_use_gpu
(
memory_pool_init_size_mb
=
500
,
device_id
=
0
)
self
.
gpu_predictor
=
create_predictor
(
gpu_config
)
...
...
modules/text/lexical_analysis/lac/network.py
已删除
100755 → 0
浏览文件 @
cfd8f7f5
# -*- coding:utf-8 -*-
import
paddle.fluid
as
fluid
def
lex_net
(
word_dict_len
,
label_dict_len
):
"""
define the lexical analysis network structure
"""
word_emb_dim
=
128
grnn_hidden_dim
=
128
emb_lr
=
2
crf_lr
=
0.2
bigru_num
=
2
init_bound
=
0.1
IS_SPARSE
=
True
def
_bigru_layer
(
input_feature
):
"""
define the bidirectional gru layer
"""
pre_gru
=
fluid
.
layers
.
fc
(
input
=
input_feature
,
size
=
grnn_hidden_dim
*
3
,
param_attr
=
fluid
.
ParamAttr
(
initializer
=
fluid
.
initializer
.
Uniform
(
low
=-
init_bound
,
high
=
init_bound
),
regularizer
=
fluid
.
regularizer
.
L2DecayRegularizer
(
regularization_coeff
=
1e-4
)))
gru
=
fluid
.
layers
.
dynamic_gru
(
input
=
pre_gru
,
size
=
grnn_hidden_dim
,
param_attr
=
fluid
.
ParamAttr
(
initializer
=
fluid
.
initializer
.
Uniform
(
low
=-
init_bound
,
high
=
init_bound
),
regularizer
=
fluid
.
regularizer
.
L2DecayRegularizer
(
regularization_coeff
=
1e-4
)))
pre_gru_r
=
fluid
.
layers
.
fc
(
input
=
input_feature
,
size
=
grnn_hidden_dim
*
3
,
param_attr
=
fluid
.
ParamAttr
(
initializer
=
fluid
.
initializer
.
Uniform
(
low
=-
init_bound
,
high
=
init_bound
),
regularizer
=
fluid
.
regularizer
.
L2DecayRegularizer
(
regularization_coeff
=
1e-4
)))
gru_r
=
fluid
.
layers
.
dynamic_gru
(
input
=
pre_gru_r
,
size
=
grnn_hidden_dim
,
is_reverse
=
True
,
param_attr
=
fluid
.
ParamAttr
(
initializer
=
fluid
.
initializer
.
Uniform
(
low
=-
init_bound
,
high
=
init_bound
),
regularizer
=
fluid
.
regularizer
.
L2DecayRegularizer
(
regularization_coeff
=
1e-4
)))
bi_merge
=
fluid
.
layers
.
concat
(
input
=
[
gru
,
gru_r
],
axis
=
1
)
return
bi_merge
def
_net_conf
(
word
):
"""
Configure the network
"""
word_embedding
=
fluid
.
layers
.
embedding
(
input
=
word
,
size
=
[
word_dict_len
,
word_emb_dim
],
dtype
=
'float32'
,
is_sparse
=
IS_SPARSE
,
param_attr
=
fluid
.
ParamAttr
(
learning_rate
=
emb_lr
,
name
=
"word_emb"
,
initializer
=
fluid
.
initializer
.
Uniform
(
low
=-
init_bound
,
high
=
init_bound
)))
input_feature
=
word_embedding
for
i
in
range
(
bigru_num
):
bigru_output
=
_bigru_layer
(
input_feature
)
input_feature
=
bigru_output
emission
=
fluid
.
layers
.
fc
(
size
=
label_dict_len
,
input
=
bigru_output
,
param_attr
=
fluid
.
ParamAttr
(
initializer
=
fluid
.
initializer
.
Uniform
(
low
=-
init_bound
,
high
=
init_bound
),
regularizer
=
fluid
.
regularizer
.
L2DecayRegularizer
(
regularization_coeff
=
1e-4
)))
size
=
emission
.
shape
[
1
]
fluid
.
layers
.
create_parameter
(
shape
=
[
size
+
2
,
size
],
dtype
=
emission
.
dtype
,
name
=
'crfw'
)
crf_decode
=
fluid
.
layers
.
crf_decoding
(
input
=
emission
,
param_attr
=
fluid
.
ParamAttr
(
name
=
'crfw'
))
return
crf_decode
,
emission
word
=
fluid
.
layers
.
data
(
name
=
'word'
,
shape
=
[
1
],
dtype
=
'int64'
,
lod_level
=
1
)
crf_decode
,
emission
=
_net_conf
(
word
)
return
crf_decode
,
word
,
emission
modules/text/lexical_analysis/lac/processor.py
浏览文件 @
afba7194
# -*- coding:utf-8 -*-
import
io
import
os
import
numpy
as
np
import
six
...
...
modules/text/lexical_analysis/lac/test.py
0 → 100644
浏览文件 @
afba7194
import
os
import
shutil
import
unittest
import
paddlehub
as
hub
os
.
environ
[
'CUDA_VISIBLE_DEVICES'
]
=
'0'
class
TestHubModule
(
unittest
.
TestCase
):
@
classmethod
def
setUpClass
(
cls
)
->
None
:
cls
.
text
=
"今天是个好日子"
cls
.
texts
=
[
"今天是个好日子"
,
"天气预报说今天要下雨"
,
"下一班地铁马上就要到了"
]
cls
.
module
=
hub
.
Module
(
name
=
"lac"
)
@
classmethod
def
tearDownClass
(
cls
)
->
None
:
shutil
.
rmtree
(
'inference'
)
def
test_cut1
(
self
):
results
=
self
.
module
.
cut
(
text
=
self
.
text
,
use_gpu
=
False
,
batch_size
=
1
,
return_tag
=
False
)
self
.
assertEqual
(
results
,
[
'今天'
,
'是'
,
'个'
,
'好日子'
])
def
test_cut2
(
self
):
results
=
self
.
module
.
cut
(
text
=
self
.
texts
,
use_gpu
=
False
,
batch_size
=
1
,
return_tag
=
False
)
self
.
assertEqual
(
results
,
[
{
'word'
:
[
'今天'
,
'是'
,
'个'
,
'好日子'
]},
{
'word'
:
[
'天气预报'
,
'说'
,
'今天'
,
'要'
,
'下雨'
]},
{
'word'
:
[
'下'
,
'一班'
,
'地铁'
,
'马上'
,
'就要'
,
'到'
,
'了'
]}
])
def
test_cut3
(
self
):
results
=
self
.
module
.
cut
(
text
=
self
.
texts
,
use_gpu
=
False
,
batch_size
=
2
,
return_tag
=
False
)
self
.
assertEqual
(
results
,
[
{
'word'
:
[
'今天'
,
'是'
,
'个'
,
'好日子'
]},
{
'word'
:
[
'天气预报'
,
'说'
,
'今天'
,
'要'
,
'下雨'
]},
{
'word'
:
[
'下'
,
'一班'
,
'地铁'
,
'马上'
,
'就要'
,
'到'
,
'了'
]}
])
def
test_cut4
(
self
):
results
=
self
.
module
.
cut
(
text
=
self
.
texts
,
use_gpu
=
True
,
batch_size
=
2
,
return_tag
=
False
)
self
.
assertEqual
(
results
,
[
{
'word'
:
[
'今天'
,
'是'
,
'个'
,
'好日子'
]},
{
'word'
:
[
'天气预报'
,
'说'
,
'今天'
,
'要'
,
'下雨'
]},
{
'word'
:
[
'下'
,
'一班'
,
'地铁'
,
'马上'
,
'就要'
,
'到'
,
'了'
]}
])
def
test_cut5
(
self
):
results
=
self
.
module
.
cut
(
text
=
self
.
texts
,
use_gpu
=
True
,
batch_size
=
2
,
return_tag
=
True
)
self
.
assertEqual
(
results
,
[
{
'word'
:
[
'今天'
,
'是'
,
'个'
,
'好日子'
],
'tag'
:
[
'TIME'
,
'v'
,
'q'
,
'n'
]
},
{
'word'
:
[
'天气预报'
,
'说'
,
'今天'
,
'要'
,
'下雨'
],
'tag'
:
[
'n'
,
'v'
,
'TIME'
,
'v'
,
'v'
]
},
{
'word'
:
[
'下'
,
'一班'
,
'地铁'
,
'马上'
,
'就要'
,
'到'
,
'了'
],
'tag'
:
[
'f'
,
'm'
,
'n'
,
'd'
,
'v'
,
'v'
,
'xc'
]
}
])
def
test_save_inference_model
(
self
):
self
.
module
.
save_inference_model
(
'./inference/model'
)
self
.
assertTrue
(
os
.
path
.
exists
(
'./inference/model.pdmodel'
))
self
.
assertTrue
(
os
.
path
.
exists
(
'./inference/model.pdiparams'
))
if
__name__
==
'__main__'
:
unittest
.
main
()
modules/text/lexical_analysis/lac/user.dict
已删除
100644 → 0
浏览文件 @
cfd8f7f5
春天/SEASON
花/n 开/v
秋天的风
落 阳
\ No newline at end of file
编辑
预览
Markdown
is supported
0%
请重试
或
添加新附件
.
添加附件
取消
You are about to add
0
people
to the discussion. Proceed with caution.
先完成此消息的编辑!
取消
想要评论请
注册
或
登录