Skip to content
体验新版
项目
组织
正在加载...
登录
切换导航
打开侧边栏
PaddlePaddle
hapi
提交
eb20b652
H
hapi
项目概览
PaddlePaddle
/
hapi
通知
11
Star
2
Fork
0
代码
文件
提交
分支
Tags
贡献者
分支图
Diff
Issue
4
列表
看板
标记
里程碑
合并请求
7
Wiki
0
Wiki
分析
仓库
DevOps
项目成员
Pages
H
hapi
项目概览
项目概览
详情
发布
仓库
仓库
文件
提交
分支
标签
贡献者
分支图
比较
Issue
4
Issue
4
列表
看板
标记
里程碑
合并请求
7
合并请求
7
Pages
分析
分析
仓库分析
DevOps
Wiki
0
Wiki
成员
成员
收起侧边栏
关闭侧边栏
动态
分支图
创建新Issue
提交
Issue看板
提交
eb20b652
编写于
4月 30, 2020
作者:
G
guosheng
浏览文件
操作
浏览文件
下载
电子邮件补丁
差异文件
Add more unit tests for apis in text.py.
Rename some apis in text.py.
上级
60917f41
变更
5
隐藏空白更改
内联
并排
Showing
5 changed file
with
205 addition
and
95 deletion
+205
-95
examples/sentiment_classification/models.py
examples/sentiment_classification/models.py
+37
-25
examples/transformer/transformer.py
examples/transformer/transformer.py
+22
-1
hapi/tests/test_text.py
hapi/tests/test_text.py
+89
-51
hapi/text/__init__.py
hapi/text/__init__.py
+2
-2
hapi/text/text.py
hapi/text/text.py
+55
-16
未找到文件。
examples/sentiment_classification/models.py
浏览文件 @
eb20b652
...
...
@@ -16,12 +16,12 @@ from paddle.fluid.dygraph.nn import Linear, Embedding
from
paddle.fluid.dygraph.base
import
to_variable
import
numpy
as
np
from
hapi.model
import
Model
from
hapi.text.text
import
GRUEncoder
Layer
as
BiGRUEncoder
from
hapi.text.text
import
GRUEncoder
as
BiGRUEncoder
from
hapi.text.test
import
BOWEncoder
,
CNNEncoder
,
GRUEncoder
class
CNN
(
Model
):
def
__init__
(
self
,
dict_dim
,
batch_size
,
seq_len
):
def
__init__
(
self
,
dict_dim
,
batch_size
,
seq_len
):
super
(
CNN
,
self
).
__init__
()
self
.
dict_dim
=
dict_dim
self
.
emb_dim
=
128
...
...
@@ -36,15 +36,19 @@ class CNN(Model):
dict_size
=
self
.
dict_dim
+
1
,
emb_dim
=
self
.
emb_dim
,
seq_len
=
self
.
seq_len
,
filter_size
=
self
.
win_size
,
num_filters
=
self
.
hid_dim
,
hidden_dim
=
self
.
hid_dim
,
filter_size
=
self
.
win_size
,
num_filters
=
self
.
hid_dim
,
hidden_dim
=
self
.
hid_dim
,
padding_idx
=
None
,
act
=
'tanh'
)
self
.
_fc1
=
Linear
(
input_dim
=
self
.
hid_dim
*
self
.
seq_len
,
output_dim
=
self
.
fc_hid_dim
,
act
=
"softmax"
)
self
.
_fc_prediction
=
Linear
(
input_dim
=
self
.
fc_hid_dim
,
output_dim
=
self
.
class_dim
,
act
=
"softmax"
)
self
.
_fc1
=
Linear
(
input_dim
=
self
.
hid_dim
*
self
.
seq_len
,
output_dim
=
self
.
fc_hid_dim
,
act
=
"softmax"
)
self
.
_fc_prediction
=
Linear
(
input_dim
=
self
.
fc_hid_dim
,
output_dim
=
self
.
class_dim
,
act
=
"softmax"
)
def
forward
(
self
,
inputs
):
conv_3
=
self
.
_encoder
(
inputs
)
...
...
@@ -69,11 +73,14 @@ class BOW(Model):
padding_idx
=
None
,
bow_dim
=
self
.
hid_dim
,
seq_len
=
self
.
seq_len
)
self
.
_fc1
=
Linear
(
input_dim
=
self
.
hid_dim
,
output_dim
=
self
.
hid_dim
,
act
=
"tanh"
)
self
.
_fc2
=
Linear
(
input_dim
=
self
.
hid_dim
,
output_dim
=
self
.
fc_hid_dim
,
act
=
"tanh"
)
self
.
_fc_prediction
=
Linear
(
input_dim
=
self
.
fc_hid_dim
,
output_dim
=
self
.
class_dim
,
act
=
"softmax"
)
self
.
_fc1
=
Linear
(
input_dim
=
self
.
hid_dim
,
output_dim
=
self
.
hid_dim
,
act
=
"tanh"
)
self
.
_fc2
=
Linear
(
input_dim
=
self
.
hid_dim
,
output_dim
=
self
.
fc_hid_dim
,
act
=
"tanh"
)
self
.
_fc_prediction
=
Linear
(
input_dim
=
self
.
fc_hid_dim
,
output_dim
=
self
.
class_dim
,
act
=
"softmax"
)
def
forward
(
self
,
inputs
):
bow_1
=
self
.
_encoder
(
inputs
)
...
...
@@ -94,10 +101,12 @@ class GRU(Model):
self
.
class_dim
=
2
self
.
batch_size
=
batch_size
self
.
seq_len
=
seq_len
self
.
_fc1
=
Linear
(
input_dim
=
self
.
hid_dim
,
output_dim
=
self
.
fc_hid_dim
,
act
=
"tanh"
)
self
.
_fc_prediction
=
Linear
(
input_dim
=
self
.
fc_hid_dim
,
output_dim
=
self
.
class_dim
,
act
=
"softmax"
)
self
.
_fc1
=
Linear
(
input_dim
=
self
.
hid_dim
,
output_dim
=
self
.
fc_hid_dim
,
act
=
"tanh"
)
self
.
_fc_prediction
=
Linear
(
input_dim
=
self
.
fc_hid_dim
,
output_dim
=
self
.
class_dim
,
act
=
"softmax"
)
self
.
_encoder
=
GRUEncoder
(
dict_size
=
self
.
dict_dim
+
1
,
emb_dim
=
self
.
emb_dim
,
...
...
@@ -112,7 +121,7 @@ class GRU(Model):
prediction
=
self
.
_fc_prediction
(
fc_1
)
return
prediction
class
BiGRU
(
Model
):
def
__init__
(
self
,
dict_dim
,
batch_size
,
seq_len
):
super
(
BiGRU
,
self
).
__init__
()
...
...
@@ -130,11 +139,13 @@ class BiGRU(Model):
is_sparse
=
False
)
h_0
=
np
.
zeros
((
self
.
batch_size
,
self
.
hid_dim
),
dtype
=
"float32"
)
h_0
=
to_variable
(
h_0
)
self
.
_fc1
=
Linear
(
input_dim
=
self
.
hid_dim
,
output_dim
=
self
.
hid_dim
*
3
)
self
.
_fc2
=
Linear
(
input_dim
=
self
.
hid_dim
*
2
,
output_dim
=
self
.
fc_hid_dim
,
act
=
"tanh"
)
self
.
_fc_prediction
=
Linear
(
input_dim
=
self
.
fc_hid_dim
,
output_dim
=
self
.
class_dim
,
act
=
"softmax"
)
self
.
_fc1
=
Linear
(
input_dim
=
self
.
hid_dim
,
output_dim
=
self
.
hid_dim
*
3
)
self
.
_fc2
=
Linear
(
input_dim
=
self
.
hid_dim
*
2
,
output_dim
=
self
.
fc_hid_dim
,
act
=
"tanh"
)
self
.
_fc_prediction
=
Linear
(
input_dim
=
self
.
fc_hid_dim
,
output_dim
=
self
.
class_dim
,
act
=
"softmax"
)
self
.
_encoder
=
BiGRUEncoder
(
grnn_hidden_dim
=
self
.
hid_dim
,
input_dim
=
self
.
hid_dim
*
3
,
...
...
@@ -144,7 +155,8 @@ class BiGRU(Model):
def
forward
(
self
,
inputs
):
emb
=
self
.
embedding
(
inputs
)
emb
=
fluid
.
layers
.
reshape
(
emb
,
shape
=
[
self
.
batch_size
,
-
1
,
self
.
hid_dim
])
emb
=
fluid
.
layers
.
reshape
(
emb
,
shape
=
[
self
.
batch_size
,
-
1
,
self
.
hid_dim
])
fc_1
=
self
.
_fc1
(
emb
)
encoded_vector
=
self
.
_encoder
(
fc_1
)
encoded_vector
=
fluid
.
layers
.
tanh
(
encoded_vector
)
...
...
examples/transformer/transformer.py
浏览文件 @
eb20b652
...
...
@@ -21,7 +21,7 @@ import paddle.fluid.layers as layers
from
paddle.fluid.dygraph
import
Embedding
,
LayerNorm
,
Linear
,
Layer
from
paddle.fluid.dygraph.learning_rate_scheduler
import
LearningRateDecay
from
hapi.model
import
Model
,
CrossEntropy
,
Loss
from
hapi.text
import
Transformer
Cell
,
Transformer
BeamSearchDecoder
,
DynamicDecode
from
hapi.text
import
TransformerBeamSearchDecoder
,
DynamicDecode
def
position_encoding_init
(
n_position
,
d_pos_vec
):
...
...
@@ -606,6 +606,27 @@ class Transformer(Model):
return
predict
class
TransformerCell
(
Layer
):
"""
Let inputs=(trg_word, trg_pos), states=cache to make Transformer can be
used as RNNCell
"""
def
__init__
(
self
,
decoder
):
super
(
TransformerCell
,
self
).
__init__
()
self
.
decoder
=
decoder
def
forward
(
self
,
inputs
,
states
,
trg_src_attn_bias
,
enc_output
,
static_caches
):
trg_word
,
trg_pos
=
inputs
for
cache
,
static_cache
in
zip
(
states
,
static_caches
):
cache
.
update
(
static_cache
)
logits
=
self
.
decoder
(
trg_word
,
trg_pos
,
None
,
trg_src_attn_bias
,
enc_output
,
states
)
new_states
=
[{
"k"
:
cache
[
"k"
],
"v"
:
cache
[
"v"
]}
for
cache
in
states
]
return
logits
,
new_states
class
InferTransformer
(
Transformer
):
"""
model for prediction
...
...
hapi/tests/test_text.py
浏览文件 @
eb20b652
...
...
@@ -25,8 +25,8 @@ from paddle.fluid.dygraph import Embedding, Linear, Layer
from
paddle.fluid.layers
import
BeamSearchDecoder
import
hapi.text
as
text
from
hapi.model
import
Model
,
Input
,
set_device
from
hapi.text
import
BasicLSTMCell
,
BasicGRUCell
,
RNN
,
DynamicDecode
,
MultiHeadAttention
,
TransformerEncoder
from
hapi.text
import
*
# from hapi.text.text import BasicLSTMCell, BasicGRUCell, RNN, DynamicDecode, MultiHeadAttention, TransformerEncoder, TransformerCell
from
hapi.text
.text
import
*
def
sigmoid
(
x
):
...
...
@@ -187,7 +187,7 @@ class TestBasicLSTM(ModuleApiTest):
Input
(
[
None
,
None
,
self
.
inputs
[
-
1
].
shape
[
-
1
]],
"float32"
,
name
=
"input"
)
name
=
"input"
)
,
]
return
inputs
...
...
@@ -216,7 +216,7 @@ class TestBasicGRU(ModuleApiTest):
Input
(
[
None
,
None
,
self
.
inputs
[
-
1
].
shape
[
-
1
]],
"float32"
,
name
=
"input"
)
name
=
"input"
)
,
]
return
inputs
...
...
@@ -270,10 +270,9 @@ class TestBeamSearch(ModuleApiTest):
Input
(
[
None
,
self
.
inputs
[
0
].
shape
[
-
1
]],
"float32"
,
name
=
"init_hidden"
),
Input
(
[
None
,
self
.
inputs
[
1
].
shape
[
-
1
]],
"float32"
,
name
=
"init_cell"
)
name
=
"init_hidden"
),
Input
(
[
None
,
self
.
inputs
[
1
].
shape
[
-
1
]],
"float32"
,
name
=
"init_cell"
),
]
return
inputs
...
...
@@ -328,10 +327,11 @@ class TestTransformerEncoder(ModuleApiTest):
Input
(
[
None
,
None
,
self
.
inputs
[
0
].
shape
[
-
1
]],
"float32"
,
name
=
"enc_input"
),
Input
(
[
None
,
self
.
inputs
[
1
].
shape
[
1
],
None
,
None
],
"float32"
,
name
=
"attn_bias"
)
name
=
"enc_input"
),
Input
(
[
None
,
self
.
inputs
[
1
].
shape
[
1
],
None
,
None
],
"float32"
,
name
=
"attn_bias"
),
]
return
inputs
...
...
@@ -395,16 +395,19 @@ class TestTransformerDecoder(TestTransformerEncoder):
Input
(
[
None
,
None
,
self
.
inputs
[
0
].
shape
[
-
1
]],
"float32"
,
name
=
"dec_input"
),
Input
(
[
None
,
None
,
self
.
inputs
[
0
].
shape
[
-
1
]],
"float32"
,
name
=
"enc_output"
),
Input
(
[
None
,
self
.
inputs
[
-
1
].
shape
[
1
],
None
,
None
],
"float32"
,
name
=
"self_attn_bias"
),
Input
(
[
None
,
self
.
inputs
[
-
1
].
shape
[
1
],
None
,
None
],
"float32"
,
name
=
"cross_attn_bias"
)
name
=
"dec_input"
),
Input
(
[
None
,
None
,
self
.
inputs
[
0
].
shape
[
-
1
]],
"float32"
,
name
=
"enc_output"
),
Input
(
[
None
,
self
.
inputs
[
-
1
].
shape
[
1
],
None
,
None
],
"float32"
,
name
=
"self_attn_bias"
),
Input
(
[
None
,
self
.
inputs
[
-
1
].
shape
[
1
],
None
,
None
],
"float32"
,
name
=
"cross_attn_bias"
),
]
return
inputs
...
...
@@ -414,16 +417,21 @@ class TestTransformerDecoder(TestTransformerEncoder):
class
TestTransformerBeamSearchDecoder
(
ModuleApiTest
):
def
setUp
(
self
):
shape
=
(
8
,
32
)
self
.
inputs
=
[
np
.
random
.
random
(
shape
).
astype
(
"float32"
),
np
.
random
.
random
(
shape
).
astype
(
"float32"
)
# encoder output: [batch_size, seq_len, hidden_size]
np
.
random
.
random
([
2
,
5
,
128
]).
astype
(
"float32"
),
# cross attention bias: [batch_size, n_head, seq_len, seq_len]
np
.
random
.
randint
(
0
,
1
,
[
2
,
2
,
1
,
5
]).
astype
(
"float32"
)
*
-
1e9
]
self
.
outputs
=
None
self
.
attrs
=
{
"vocab_size"
:
100
,
"embed_dim"
:
32
,
"hidden_size"
:
32
,
"n_layer"
:
2
,
"n_head"
:
2
,
"d_key"
:
64
,
"d_value"
:
64
,
"d_model"
:
128
,
"d_inner_hid"
:
128
}
self
.
param_states
=
{}
...
...
@@ -445,13 +453,24 @@ class TestTransformerBeamSearchDecoder(ModuleApiTest):
eos_id
=
1
,
beam_size
=
4
,
max_step_num
=
20
):
embedder
=
Embedding
(
size
=
[
vocab_size
,
d_model
])
self
.
beam_size
=
beam_size
def
embeder_init
(
self
,
size
):
Layer
.
__init__
(
self
)
self
.
embedder
=
Embedding
(
size
)
Embedder
=
type
(
"Embedder"
,
(
Layer
,
),
{
"__init__"
:
embeder_init
,
"forward"
:
lambda
self
,
word
,
pos
:
self
.
embedder
(
word
)
})
embedder
=
Embedder
(
size
=
[
vocab_size
,
d_model
])
output_layer
=
Linear
(
d_model
,
vocab_size
)
decoder
=
TransformerDecoder
(
n_layer
,
n_head
,
d_key
,
d_value
,
d_model
,
d_inner_hid
,
prepostprocess_dropout
,
attention_dropout
,
relu_dropout
,
preprocess_cmd
,
postprocess_cmd
)
transformer_cell
=
TransformerCell
(
decoder
)
self
.
decoder
=
TransformerDecoder
(
n_layer
,
n_head
,
d_key
,
d_value
,
d_model
,
d_inner_hid
,
prepostprocess_dropout
,
attention_dropout
,
relu_dropout
,
preprocess_cmd
,
postprocess_cmd
)
transformer_cell
=
TransformerCell
(
self
.
decoder
,
embedder
,
output_layer
)
self
.
beam_search_decoder
=
DynamicDecode
(
TransformerBeamSearchDecoder
(
transformer_cell
,
...
...
@@ -464,23 +483,12 @@ class TestTransformerBeamSearchDecoder(ModuleApiTest):
@
staticmethod
def
model_forward
(
self
,
enc_output
,
trg_src_attn_bias
):
caches
=
[{
"k"
:
layers
.
fill_constant_batch_size_like
(
input
=
enc_output
,
shape
=
[
-
1
,
self
.
n_head
,
0
,
self
.
d_key
],
dtype
=
enc_output
.
dtype
,
value
=
0
),
"v"
:
layers
.
fill_constant_batch_size_like
(
input
=
enc_output
,
shape
=
[
-
1
,
self
.
n_head
,
0
,
self
.
d_value
],
dtype
=
enc_output
.
dtype
,
value
=
0
),
}
for
i
in
range
(
self
.
n_layer
)]
caches
=
self
.
decoder
.
prepare_incremental_cache
(
enc_output
)
enc_output
=
TransformerBeamSearchDecoder
.
tile_beam_merge_with_batch
(
enc_output
,
self
.
beam_size
)
trg_src_attn_bias
=
TransformerBeamSearchDecoder
.
tile_beam_merge_with_batch
(
trg_src_attn_bias
,
self
.
beam_size
)
static_caches
=
self
.
decoder
.
decoder
.
prepare_static_cache
(
enc_output
)
static_caches
=
self
.
decoder
.
prepare_static_cache
(
enc_output
)
rs
,
_
=
self
.
beam_search_decoder
(
inits
=
caches
,
enc_output
=
enc_output
,
...
...
@@ -491,12 +499,42 @@ class TestTransformerBeamSearchDecoder(ModuleApiTest):
def
make_inputs
(
self
):
inputs
=
[
Input
(
[
None
,
self
.
inputs
[
0
].
shape
[
-
1
]],
[
None
,
None
,
self
.
inputs
[
0
].
shape
[
-
1
]],
"float32"
,
name
=
"enc_output"
),
Input
(
[
None
,
self
.
inputs
[
1
].
shape
[
1
],
None
,
None
],
"float32"
,
name
=
"trg_src_attn_bias"
),
]
return
inputs
def
test_check_output
(
self
):
self
.
check_output
()
class
TestSequenceTagging
(
ModuleApiTest
):
def
setUp
(
self
):
shape
=
(
2
,
4
,
128
)
self
.
inputs
=
[
np
.
random
.
random
(
shape
).
astype
(
"float32"
)]
self
.
outputs
=
None
self
.
attrs
=
{
"input_size"
:
128
,
"hidden_size"
:
128
}
self
.
param_states
=
{}
@
staticmethod
def
model_init
(
self
,
input_size
,
hidden_size
):
self
.
module
=
SequenceTagging
(
input_size
,
hidden_size
)
@
staticmethod
def
model_forward
(
self
,
inputs
):
return
self
.
gru
(
inputs
)[
0
]
def
make_inputs
(
self
):
inputs
=
[
Input
(
[
None
,
None
,
self
.
inputs
[
-
1
].
shape
[
-
1
]],
"float32"
,
name
=
"init_hidden"
),
Input
(
[
None
,
self
.
inputs
[
1
].
shape
[
-
1
]],
"float32"
,
name
=
"init_cell"
)
name
=
"input"
),
]
return
inputs
...
...
hapi/text/__init__.py
浏览文件 @
eb20b652
...
...
@@ -28,6 +28,6 @@ from hapi.text.text import TransformerBeamSearchDecoder as TransformerBeamSearch
from
hapi.text.text
import
GRUCell
as
GRUCell
from
hapi.text.text
import
GRUEncoderCell
as
GRUEncoderCell
from
hapi.text.text
import
BiGRU
as
BiGRU
from
hapi.text.text
import
Linear
_chain_crf
as
Linear_chain_crf
from
hapi.text.text
import
C
rf_decoding
as
Crf_d
ecoding
from
hapi.text.text
import
Linear
ChainCRF
as
LinearChainCRF
from
hapi.text.text
import
C
RFDecoding
as
CRFD
ecoding
from
hapi.text.text
import
SequenceTagging
as
SequenceTagging
hapi/text/text.py
浏览文件 @
eb20b652
...
...
@@ -49,7 +49,7 @@ __all__ = [
'BeamSearchDecoder'
,
'MultiHeadAttention'
,
'FFN'
,
'TransformerEncoderLayer'
,
'TransformerEncoder'
,
'TransformerDecoderLayer'
,
'TransformerDecoder'
,
'TransformerCell'
,
'TransformerBeamSearchDecoder'
,
'Linear
_chain_crf'
,
'Crf_decoding'
,
'SequenceTagging'
,
'GRUEncoderLay
er'
'Linear
ChainCRF'
,
'CRFDecoding'
,
'SequenceTagging'
,
'GRUEncod
er'
]
...
...
@@ -1008,18 +1008,38 @@ class TransformerCell(Layer):
used as RNNCell
"""
def
__init__
(
self
,
decoder
):
def
__init__
(
self
,
decoder
,
embedding_fn
=
None
,
output_fn
=
None
):
super
(
TransformerCell
,
self
).
__init__
()
self
.
decoder
=
decoder
self
.
embedding_fn
=
embedding_fn
self
.
output_fn
=
output_fn
def
__call__
(
self
,
inputs
,
states
,
trg_src_attn_bias
,
enc_output
,
static_caches
):
def
forward
(
self
,
inputs
,
states
,
trg_src_attn_bias
,
enc_output
,
static_caches
):
trg_word
,
trg_pos
=
inputs
for
cache
,
static_cache
in
zip
(
states
,
static_caches
):
cache
.
update
(
static_cache
)
logits
=
self
.
decoder
(
trg_word
,
trg_pos
,
None
,
trg_src_attn_bias
,
enc_output
,
states
)
if
self
.
embedding_fn
is
not
None
:
dec_input
=
self
.
embedding_fn
(
trg_word
,
trg_pos
)
outputs
=
self
.
decoder
(
dec_input
,
enc_output
,
None
,
trg_src_attn_bias
,
states
)
else
:
outputs
=
self
.
decoder
(
trg_word
,
trg_pos
,
enc_output
,
None
,
trg_src_attn_bias
,
states
)
if
self
.
output_fn
is
not
None
:
outputs
=
self
.
output_fn
(
outputs
)
if
len
(
outputs
.
shape
)
==
3
:
# squeeze to adapt to BeamSearchDecoder which use 2D logits
outputs
=
layers
.
squeeze
(
outputs
,
[
1
])
new_states
=
[{
"k"
:
cache
[
"k"
],
"v"
:
cache
[
"v"
]}
for
cache
in
states
]
return
logits
,
new_states
return
outputs
,
new_states
@
property
def
state_shape
(
self
):
return
[{
"k"
:
[
self
.
n_head
,
0
,
self
.
d_key
],
"v"
:
[
self
.
n_head
,
0
,
self
.
d_value
],
}
for
i
in
range
(
len
(
self
.
n_layer
))]
class
TransformerBeamSearchDecoder
(
layers
.
BeamSearchDecoder
):
...
...
@@ -1521,6 +1541,11 @@ class TransformerDecoder(Layer):
preprocess_cmd
,
postprocess_cmd
):
super
(
TransformerDecoder
,
self
).
__init__
()
self
.
n_layer
=
n_layer
self
.
n_head
=
n_head
self
.
d_key
=
d_key
self
.
d_value
=
d_value
self
.
decoder_layers
=
list
()
for
i
in
range
(
n_layer
):
self
.
decoder_layers
.
append
(
...
...
@@ -1555,6 +1580,20 @@ class TransformerDecoder(Layer):
for
decoder_layer
in
self
.
decoder_layers
]
def
prepare_incremental_cache
(
self
,
enc_output
):
return
[{
"k"
:
layers
.
fill_constant_batch_size_like
(
input
=
enc_output
,
shape
=
[
-
1
,
self
.
n_head
,
0
,
self
.
d_key
],
dtype
=
enc_output
.
dtype
,
value
=
0
),
"v"
:
layers
.
fill_constant_batch_size_like
(
input
=
enc_output
,
shape
=
[
-
1
,
self
.
n_head
,
0
,
self
.
d_value
],
dtype
=
enc_output
.
dtype
,
value
=
0
),
}
for
i
in
range
(
self
.
n_layer
)]
#TODO: we should merge GRUCell with BasicGRUCell
class
GRUCell
(
RNNCell
):
...
...
@@ -1651,9 +1690,9 @@ class BiGRU(fluid.dygraph.Layer):
return
bi_merge
class
Linear
_chain_crf
(
fluid
.
dygraph
.
Layer
):
class
Linear
ChainCRF
(
Layer
):
def
__init__
(
self
,
param_attr
,
size
=
None
,
is_test
=
False
,
dtype
=
'float32'
):
super
(
Linear
_chain_crf
,
self
).
__init__
()
super
(
Linear
ChainCRF
,
self
).
__init__
()
self
.
_param_attr
=
param_attr
self
.
_dtype
=
dtype
...
...
@@ -1702,9 +1741,9 @@ class Linear_chain_crf(fluid.dygraph.Layer):
return
log_likelihood
class
C
rf_decoding
(
fluid
.
dygraph
.
Layer
):
class
C
RFDecoding
(
Layer
):
def
__init__
(
self
,
param_attr
,
size
=
None
,
is_test
=
False
,
dtype
=
'float32'
):
super
(
C
rf_d
ecoding
,
self
).
__init__
()
super
(
C
RFD
ecoding
,
self
).
__init__
()
self
.
_dtype
=
dtype
self
.
_size
=
size
...
...
@@ -1742,7 +1781,7 @@ class Crf_decoding(fluid.dygraph.Layer):
return
viterbi_path
class
GRUEncoder
Layer
(
Layer
):
class
GRUEncoder
(
Layer
):
def
__init__
(
self
,
input_dim
,
grnn_hidden_dim
,
...
...
@@ -1750,7 +1789,7 @@ class GRUEncoderLayer(Layer):
num_layers
=
1
,
h_0
=
None
,
is_bidirection
=
False
):
super
(
GRUEncoder
Layer
,
self
).
__init__
()
super
(
GRUEncoder
,
self
).
__init__
()
self
.
h_0
=
h_0
self
.
num_layers
=
num_layers
self
.
is_bidirection
=
is_bidirection
...
...
@@ -1849,7 +1888,7 @@ class SequenceTagging(fluid.dygraph.Layer):
force_cpu
=
True
,
name
=
'h_0'
)
self
.
gru_encoder
=
GRUEncoder
Layer
(
self
.
gru_encoder
=
GRUEncoder
(
input_dim
=
self
.
grnn_hidden_dim
,
grnn_hidden_dim
=
self
.
grnn_hidden_dim
,
init_bound
=
self
.
init_bound
,
...
...
@@ -1866,12 +1905,12 @@ class SequenceTagging(fluid.dygraph.Layer):
regularizer
=
fluid
.
regularizer
.
L2DecayRegularizer
(
regularization_coeff
=
1e-4
)))
self
.
linear_chain_crf
=
Linear
_chain_crf
(
self
.
linear_chain_crf
=
Linear
ChainCRF
(
param_attr
=
fluid
.
ParamAttr
(
name
=
'linear_chain_crfw'
,
learning_rate
=
self
.
crf_lr
),
size
=
self
.
num_labels
)
self
.
crf_decoding
=
C
rf_d
ecoding
(
self
.
crf_decoding
=
C
RFD
ecoding
(
param_attr
=
fluid
.
ParamAttr
(
name
=
'crfw'
,
learning_rate
=
self
.
crf_lr
),
size
=
self
.
num_labels
)
...
...
编辑
预览
Markdown
is supported
0%
请重试
或
添加新附件
.
添加附件
取消
You are about to add
0
people
to the discussion. Proceed with caution.
先完成此消息的编辑!
取消
想要评论请
注册
或
登录