Skip to content
体验新版
项目
组织
正在加载...
登录
切换导航
打开侧边栏
PaddlePaddle
models
提交
a53a45d6
M
models
项目概览
PaddlePaddle
/
models
1 年多 前同步成功
通知
222
Star
6828
Fork
2962
代码
文件
提交
分支
Tags
贡献者
分支图
Diff
Issue
602
列表
看板
标记
里程碑
合并请求
255
Wiki
0
Wiki
分析
仓库
DevOps
项目成员
Pages
M
models
项目概览
项目概览
详情
发布
仓库
仓库
文件
提交
分支
标签
贡献者
分支图
比较
Issue
602
Issue
602
列表
看板
标记
里程碑
合并请求
255
合并请求
255
Pages
分析
分析
仓库分析
DevOps
Wiki
0
Wiki
成员
成员
收起侧边栏
关闭侧边栏
动态
分支图
创建新Issue
提交
Issue看板
未验证
提交
a53a45d6
编写于
3月 26, 2020
作者:
王
王肖
提交者:
GitHub
3月 26, 2020
浏览文件
操作
浏览文件
下载
电子邮件补丁
差异文件
remove redundant code from dygraph/similarity_net (#4485)
上级
6b882d42
变更
4
隐藏空白更改
内联
并排
Showing
4 changed file
with
7 addition
and
313 deletion
+7
-313
dygraph/similarity_net/mmdnn.py
dygraph/similarity_net/mmdnn.py
+0
-296
dygraph/similarity_net/nets/bow.py
dygraph/similarity_net/nets/bow.py
+1
-2
dygraph/similarity_net/nets/paddle_layers.py
dygraph/similarity_net/nets/paddle_layers.py
+6
-12
dygraph/similarity_net/run_classifier.py
dygraph/similarity_net/run_classifier.py
+0
-3
未找到文件。
dygraph/similarity_net/mmdnn.py
已删除
100644 → 0
浏览文件 @
6b882d42
# Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
"""
MMDNN class
"""
import
numpy
as
np
import
paddle.fluid
as
fluid
import
logging
from
paddle.fluid.dygraph
import
Embedding
,
LayerNorm
,
Linear
,
to_variable
,
Layer
,
guard
from
paddle.fluid.dygraph.nn
import
Conv2D
import
paddle_layers
as
pd_layers
from
paddle.fluid
import
layers
from
paddle.fluid.dygraph
import
Layer
class
BasicLSTMUnit
(
Layer
):
"""
****
BasicLSTMUnit class, Using basic operator to build LSTM
The algorithm can be described as the code below.
.. math::
i_t &= \sigma(W_{ix}x_{t} + W_{ih}h_{t-1} + b_i)
f_t &= \sigma(W_{fx}x_{t} + W_{fh}h_{t-1} + b_f + forget_bias )
o_t &= \sigma(W_{ox}x_{t} + W_{oh}h_{t-1} + b_o)
\\
tilde{c_t} &= tanh(W_{cx}x_t + W_{ch}h_{t-1} + b_c)
c_t &= f_t \odot c_{t-1} + i_t \odot
\\
tilde{c_t}
h_t &= o_t \odot tanh(c_t)
- $W$ terms denote weight matrices (e.g. $W_{ix}$ is the matrix
of weights from the input gate to the input)
- The b terms denote bias vectors ($bx_i$ and $bh_i$ are the input gate bias vector).
- sigmoid is the logistic sigmoid function.
- $i, f, o$ and $c$ are the input gate, forget gate, output gate,
and cell activation vectors, respectively, all of which have the same size as
the cell output activation vector $h$.
- The :math:`\odot` is the element-wise product of the vectors.
- :math:`tanh` is the activation functions.
- :math:`
\\
tilde{c_t}` is also called candidate hidden state,
which is computed based on the current input and the previous hidden state.
Args:
name_scope(string) : The name scope used to identify parameter and bias name
hidden_size (integer): The hidden size used in the Unit.
param_attr(ParamAttr|None): The parameter attribute for the learnable
weight matrix. Note:
If it is set to None or one attribute of ParamAttr, lstm_unit will
create ParamAttr as param_attr. If the Initializer of the param_attr
is not set, the parameter is initialized with Xavier. Default: None.
bias_attr (ParamAttr|None): The parameter attribute for the bias
of LSTM unit.
If it is set to None or one attribute of ParamAttr, lstm_unit will
create ParamAttr as bias_attr. If the Initializer of the bias_attr
is not set, the bias is initialized as zero. Default: None.
gate_activation (function|None): The activation function for gates (actGate).
Default: 'fluid.layers.sigmoid'
activation (function|None): The activation function for cells (actNode).
Default: 'fluid.layers.tanh'
forget_bias(float|1.0): forget bias used when computing forget gate
dtype(string): data type used in this unit
"""
def
__init__
(
self
,
hidden_size
,
input_size
,
param_attr
=
None
,
bias_attr
=
None
,
gate_activation
=
None
,
activation
=
None
,
forget_bias
=
1.0
,
dtype
=
'float32'
):
super
(
BasicLSTMUnit
,
self
).
__init__
(
dtype
)
self
.
_hiden_size
=
hidden_size
self
.
_param_attr
=
param_attr
self
.
_bias_attr
=
bias_attr
self
.
_gate_activation
=
gate_activation
or
layers
.
sigmoid
self
.
_activation
=
activation
or
layers
.
tanh
self
.
_forget_bias
=
layers
.
fill_constant
(
[
1
],
dtype
=
dtype
,
value
=
forget_bias
)
self
.
_forget_bias
.
stop_gradient
=
False
self
.
_dtype
=
dtype
self
.
_input_size
=
input_size
self
.
_weight
=
self
.
create_parameter
(
attr
=
self
.
_param_attr
,
shape
=
[
self
.
_input_size
+
self
.
_hiden_size
,
4
*
self
.
_hiden_size
],
dtype
=
self
.
_dtype
)
self
.
_bias
=
self
.
create_parameter
(
attr
=
self
.
_bias_attr
,
shape
=
[
4
*
self
.
_hiden_size
],
dtype
=
self
.
_dtype
,
is_bias
=
True
)
def
forward
(
self
,
input
,
pre_hidden
,
pre_cell
):
concat_input_hidden
=
layers
.
concat
([
input
,
pre_hidden
],
1
)
gate_input
=
layers
.
matmul
(
x
=
concat_input_hidden
,
y
=
self
.
_weight
)
gate_input
=
layers
.
elementwise_add
(
gate_input
,
self
.
_bias
)
i
,
j
,
f
,
o
=
layers
.
split
(
gate_input
,
num_or_sections
=
4
,
dim
=-
1
)
new_cell
=
layers
.
elementwise_add
(
layers
.
elementwise_mul
(
pre_cell
,
layers
.
sigmoid
(
layers
.
elementwise_add
(
f
,
self
.
_forget_bias
))),
layers
.
elementwise_mul
(
layers
.
sigmoid
(
i
),
layers
.
tanh
(
j
)))
new_hidden
=
layers
.
tanh
(
new_cell
)
*
layers
.
sigmoid
(
o
)
return
new_hidden
,
new_cell
class
MMDNN
(
object
):
"""
MMDNN
"""
def
__init__
(
self
,
config
):
"""
initialize
"""
self
.
vocab_size
=
int
(
config
[
'dict_size'
])
self
.
emb_size
=
int
(
config
[
'net'
][
'embedding_dim'
])
self
.
lstm_dim
=
int
(
config
[
'net'
][
'lstm_dim'
])
self
.
kernel_size
=
int
(
config
[
'net'
][
'num_filters'
])
self
.
win_size1
=
int
(
config
[
'net'
][
'window_size_left'
])
self
.
win_size2
=
int
(
config
[
'net'
][
'window_size_right'
])
self
.
dpool_size1
=
int
(
config
[
'net'
][
'dpool_size_left'
])
self
.
dpool_size2
=
int
(
config
[
'net'
][
'dpool_size_right'
])
self
.
hidden_size
=
int
(
config
[
'net'
][
'hidden_size'
])
self
.
seq_len1
=
int
(
config
[
'max_len_left'
])
self
.
seq_len2
=
int
(
config
[
'max_len_right'
])
self
.
task_mode
=
config
[
'task_mode'
]
if
int
(
config
[
'match_mask'
])
!=
0
:
self
.
match_mask
=
True
else
:
self
.
match_mask
=
False
if
self
.
task_mode
==
"pointwise"
:
self
.
n_class
=
int
(
config
[
'n_class'
])
self
.
out_size
=
self
.
n_class
elif
self
.
task_mode
==
"pairwise"
:
self
.
out_size
=
1
else
:
logging
.
error
(
"training mode not supported"
)
def
embedding_layer
(
self
,
input
,
zero_pad
=
True
,
scale
=
True
):
"""
embedding layer
"""
emb
=
Embedding
(
size
=
[
self
.
vocab_size
,
self
.
emb_size
],
padding_idx
=
(
0
if
zero_pad
else
None
),
param_attr
=
fluid
.
ParamAttr
(
name
=
"word_embedding"
,
initializer
=
fluid
.
initializer
.
Xavier
()))
emb
=
emb
(
input
)
if
scale
:
emb
=
emb
*
(
self
.
emb_size
**
0.5
)
return
emb
def
bi_dynamic_lstm
(
self
,
input
,
hidden_size
):
"""
bi_lstm layer
"""
fw_in_proj
=
Linear
(
input_dim
=
self
.
emb_size
,
output_dim
=
4
*
hidden_size
,
param_attr
=
fluid
.
ParamAttr
(
name
=
"fw_fc.w"
),
bias_attr
=
False
)
fw_in_proj
=
fw_in_proj
(
input
)
forward
=
pd_layers
.
DynamicLSTMLayer
(
size
=
4
*
hidden_size
,
is_reverse
=
False
,
param_attr
=
fluid
.
ParamAttr
(
name
=
"forward_lstm.w"
),
bias_attr
=
fluid
.
ParamAttr
(
name
=
"forward_lstm.b"
)).
ops
()
forward
=
forward
(
fw_in_proj
)
rv_in_proj
=
Linear
(
input_dim
=
self
.
emb_size
,
output_dim
=
4
*
hidden_size
,
param_attr
=
fluid
.
ParamAttr
(
name
=
"rv_fc.w"
),
bias_attr
=
False
)
rv_in_proj
=
rv_in_proj
(
input
)
reverse
=
pd_layers
.
DynamicLSTMLayer
(
4
*
hidden_size
,
'lstm'
is_reverse
=
True
,
param_attr
=
fluid
.
ParamAttr
(
name
=
"reverse_lstm.w"
),
bias_attr
=
fluid
.
ParamAttr
(
name
=
"reverse_lstm.b"
)).
ops
()
reverse
=
reverse
(
rv_in_proj
)
return
[
forward
,
reverse
]
def
conv_pool_relu_layer
(
self
,
input
,
mask
=
None
):
"""
convolution and pool layer
"""
# data format NCHW
emb_expanded
=
fluid
.
layers
.
unsqueeze
(
input
=
input
,
axes
=
[
1
])
# same padding
conv
=
Conv2d
(
num_filters
=
self
.
kernel_size
,
stride
=
1
,
padding
=
(
int
(
self
.
seq_len1
/
2
),
int
(
self
.
seq_len2
//
2
)),
filter_size
=
(
self
.
seq_len1
,
self
.
seq_len2
),
bias_attr
=
fluid
.
ParamAttr
(
initializer
=
fluid
.
initializer
.
Constant
(
0.1
)))
conv
=
conv
(
emb_expanded
)
if
mask
is
not
None
:
cross_mask
=
fluid
.
layers
.
stack
(
x
=
[
mask
]
*
self
.
kernel_size
,
axis
=
1
)
conv
=
cross_mask
*
conv
+
(
1
-
cross_mask
)
*
(
-
2
**
32
+
1
)
# valid padding
pool
=
fluid
.
layers
.
pool2d
(
input
=
conv
,
pool_size
=
[
int
(
self
.
seq_len1
/
self
.
dpool_size1
),
int
(
self
.
seq_len2
/
self
.
dpool_size2
)
],
pool_stride
=
[
int
(
self
.
seq_len1
/
self
.
dpool_size1
),
int
(
self
.
seq_len2
/
self
.
dpool_size2
)
],
pool_type
=
"max"
,
)
relu
=
fluid
.
layers
.
relu
(
pool
)
return
relu
def
get_cross_mask
(
self
,
left_lens
,
right_lens
):
"""
cross mask
"""
mask1
=
fluid
.
layers
.
sequence_mask
(
x
=
left_lens
,
dtype
=
'float32'
,
maxlen
=
self
.
seq_len1
+
1
)
mask2
=
fluid
.
layers
.
sequence_mask
(
x
=
right_lens
,
dtype
=
'float32'
,
maxlen
=
self
.
seq_len2
+
1
)
mask1
=
fluid
.
layers
.
transpose
(
x
=
mask1
,
perm
=
[
0
,
2
,
1
])
cross_mask
=
fluid
.
layers
.
matmul
(
x
=
mask1
,
y
=
mask2
)
return
cross_mask
def
predict
(
self
,
left
,
right
):
"""
Forward network
"""
left_emb
=
self
.
embedding_layer
(
left
,
zero_pad
=
True
,
scale
=
False
)
right_emb
=
self
.
embedding_layer
(
right
,
zero_pad
=
True
,
scale
=
False
)
bi_left_outputs
=
self
.
bi_dynamic_lstm
(
input
=
left_emb
,
hidden_size
=
self
.
lstm_dim
)
left_seq_encoder
=
fluid
.
layers
.
concat
(
input
=
bi_left_outputs
,
axis
=
1
)
bi_right_outputs
=
self
.
bi_dynamic_lstm
(
input
=
right_emb
,
hidden_size
=
self
.
lstm_dim
)
right_seq_encoder
=
fluid
.
layers
.
concat
(
input
=
bi_right_outputs
,
axis
=
1
)
pad_value
=
fluid
.
layers
.
assign
(
input
=
np
.
array
([
0
]).
astype
(
"float32"
))
left_seq_encoder
,
left_lens
=
fluid
.
layers
.
sequence_pad
(
x
=
left_seq_encoder
,
pad_value
=
pad_value
,
maxlen
=
self
.
seq_len1
)
right_seq_encoder
,
right_lens
=
fluid
.
layers
.
sequence_pad
(
x
=
right_seq_encoder
,
pad_value
=
pad_value
,
maxlen
=
self
.
seq_len2
)
cross
=
fluid
.
layers
.
matmul
(
left_seq_encoder
,
right_seq_encoder
,
transpose_y
=
True
)
if
self
.
match_mask
:
cross_mask
=
self
.
get_cross_mask
(
left_lens
,
right_lens
)
else
:
cross_mask
=
None
conv_pool_relu
=
self
.
conv_pool_relu_layer
(
input
=
cross
,
mask
=
cross_mask
)
relu_hid1
=
Linear
(
input_dim
=
conv_pool_relu
.
shape
[
-
1
],
output_dim
=
self
.
hidden_size
)
relu_hid1
=
relu_hid1
(
conv_pool_relu
)
relu_hid1
=
fluid
.
layers
.
tanh
(
relu_hid1
)
relu_hid1
=
Linear
(
input_dim
=
relu_hid1
.
shape
[
-
1
],
output_dim
=
self
.
out_size
)
pred
=
relu_hid1
(
pred
)
pred
=
fluid
.
layers
.
softmax
(
pred
)
return
left_seq_encoder
,
pred
dygraph/similarity_net/nets/bow.py
浏览文件 @
a53a45d6
...
...
@@ -18,9 +18,8 @@ bow class
import
paddle_layers
as
layers
from
paddle
import
fluid
from
paddle.fluid.dygraph.base
import
to_variable
from
paddle.fluid.dygraph
import
Layer
,
Embedding
,
Linear
from
paddle.fluid.dygraph
import
Layer
,
Linear
import
paddle.fluid.param_attr
as
attr
uniform_initializer
=
lambda
x
:
fluid
.
initializer
.
UniformInitializer
(
low
=-
x
,
high
=
x
)
class
BOW
(
Layer
):
"""
...
...
dygraph/similarity_net/nets/paddle_layers.py
浏览文件 @
a53a45d6
...
...
@@ -27,7 +27,7 @@ import paddle.fluid as fluid
from
paddle.fluid
import
layers
import
paddle.fluid.param_attr
as
attr
import
paddle.fluid.layers.utils
as
utils
from
paddle.fluid.dygraph
import
Embedding
,
Pool2D
,
Linear
,
Conv2D
,
GRUUnit
,
Layer
,
to_variable
from
paddle.fluid.dygraph
import
Embedding
,
Conv2D
,
GRUUnit
,
Layer
,
to_variable
from
paddle.fluid.layers.utils
import
map_structure
,
flatten
,
pack_sequence_as
class
EmbeddingLayer
(
object
):
...
...
@@ -48,7 +48,6 @@ class EmbeddingLayer(object):
"""
operation
"""
# name = self.name
emb
=
Embedding
(
size
=
[
self
.
dict_size
,
self
.
emb_dim
],
is_sparse
=
True
,
...
...
@@ -99,7 +98,6 @@ class DynamicGRULayer(object):
"""
operation
"""
gru
=
DynamicGRU
(
size
=
self
.
gru_dim
,
param_attr
=
attr
.
ParamAttr
(
name
=
"%s.w"
%
self
.
name
),
...
...
@@ -201,7 +199,7 @@ class CrossEntropyLayer(object):
"""
operation
"""
loss
=
fluid
.
layers
.
cross_entropy
(
input
=
input
,
label
=
label
)
# no need
loss
=
fluid
.
layers
.
cross_entropy
(
input
=
input
,
label
=
label
)
return
loss
...
...
@@ -220,7 +218,7 @@ class SoftmaxWithCrossEntropyLayer(object):
"""
operation
"""
loss
=
fluid
.
layers
.
softmax_with_cross_entropy
(
# no need
loss
=
fluid
.
layers
.
softmax_with_cross_entropy
(
logits
=
input
,
label
=
label
)
return
loss
...
...
@@ -359,9 +357,7 @@ class SoftsignLayer(object):
return
softsign
# dygraph
class
SimpleConvPool
(
fluid
.
dygraph
.
Layer
):
class
SimpleConvPool
(
Layer
):
def
__init__
(
self
,
num_channels
,
num_filters
,
...
...
@@ -574,6 +570,7 @@ class FC(Layer):
# Currently, we don't support inplace in dygraph mode
return
self
.
_helper
.
append_activation
(
pre_activation
,
act
=
self
.
_act
)
class
DynamicGRU
(
Layer
):
def
__init__
(
self
,
size
,
...
...
@@ -916,10 +913,6 @@ class RNN(Layer):
return
final_outputs
,
final_states
from
paddle.fluid.dygraph
import
Embedding
,
LayerNorm
,
Linear
,
Layer
,
to_variable
place
=
fluid
.
CPUPlace
()
executor
=
fluid
.
Executor
(
place
)
class
EncoderCell
(
RNNUnit
):
def
__init__
(
self
,
num_layers
,
input_size
,
hidden_size
,
dropout_prob
=
0.
):
super
(
EncoderCell
,
self
).
__init__
()
...
...
@@ -947,6 +940,7 @@ class EncoderCell(RNNUnit):
def
state_shape
(
self
):
return
[
cell
.
state_shape
for
cell
in
self
.
lstm_cells
]
class
BasicGRUUnit
(
Layer
):
"""
****
...
...
dygraph/similarity_net/run_classifier.py
浏览文件 @
a53a45d6
...
...
@@ -254,7 +254,6 @@ def train(conf_dict, args):
logging
.
info
(
"saving infer model in %s"
%
model_path
)
# used for continuous evaluation
if
args
.
enable_ce
:
# if True:
card_num
=
get_cards
()
ce_loss
=
0
ce_time
=
0
...
...
@@ -334,7 +333,6 @@ def test(conf_dict, args):
left_feat
,
pos_score
=
net
(
left
,
pos_right
)
pred
=
pos_score
# pred_list += list(pred.numpy())
pred_list
+=
list
(
map
(
lambda
item
:
float
(
item
[
0
]),
pred
.
numpy
()))
predictions_file
.
write
(
u
"
\n
"
.
join
(
...
...
@@ -345,7 +343,6 @@ def test(conf_dict, args):
left
=
fluid
.
layers
.
reshape
(
left
,
shape
=
[
-
1
,
1
])
right
=
fluid
.
layers
.
reshape
(
right
,
shape
=
[
-
1
,
1
])
left_feat
,
pred
=
net
(
left
,
right
)
# pred_list += list(pred.numpy())
pred_list
+=
list
(
map
(
lambda
item
:
float
(
item
[
0
]),
pred
.
numpy
()))
predictions_file
.
write
(
u
"
\n
"
.
join
(
...
...
编辑
预览
Markdown
is supported
0%
请重试
或
添加新附件
.
添加附件
取消
You are about to add
0
people
to the discussion. Proceed with caution.
先完成此消息的编辑!
取消
想要评论请
注册
或
登录