未验证 提交 a53a45d6 编写于 作者: 王肖 提交者: GitHub

remove redundant code from dygraph/similarity_net (#4485)

上级 6b882d42
# Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
"""
MMDNN class
"""
import numpy as np
import paddle.fluid as fluid
import logging
from paddle.fluid.dygraph import Embedding, LayerNorm, Linear, to_variable, Layer, guard
from paddle.fluid.dygraph.nn import Conv2D
import paddle_layers as pd_layers
from paddle.fluid import layers
from paddle.fluid.dygraph import Layer
class BasicLSTMUnit(Layer):
"""
****
BasicLSTMUnit class, Using basic operator to build LSTM
The algorithm can be described as the code below.
.. math::
i_t &= \sigma(W_{ix}x_{t} + W_{ih}h_{t-1} + b_i)
f_t &= \sigma(W_{fx}x_{t} + W_{fh}h_{t-1} + b_f + forget_bias )
o_t &= \sigma(W_{ox}x_{t} + W_{oh}h_{t-1} + b_o)
\\tilde{c_t} &= tanh(W_{cx}x_t + W_{ch}h_{t-1} + b_c)
c_t &= f_t \odot c_{t-1} + i_t \odot \\tilde{c_t}
h_t &= o_t \odot tanh(c_t)
- $W$ terms denote weight matrices (e.g. $W_{ix}$ is the matrix
of weights from the input gate to the input)
- The b terms denote bias vectors ($bx_i$ and $bh_i$ are the input gate bias vector).
- sigmoid is the logistic sigmoid function.
- $i, f, o$ and $c$ are the input gate, forget gate, output gate,
and cell activation vectors, respectively, all of which have the same size as
the cell output activation vector $h$.
- The :math:`\odot` is the element-wise product of the vectors.
- :math:`tanh` is the activation functions.
- :math:`\\tilde{c_t}` is also called candidate hidden state,
which is computed based on the current input and the previous hidden state.
Args:
name_scope(string) : The name scope used to identify parameter and bias name
hidden_size (integer): The hidden size used in the Unit.
param_attr(ParamAttr|None): The parameter attribute for the learnable
weight matrix. Note:
If it is set to None or one attribute of ParamAttr, lstm_unit will
create ParamAttr as param_attr. If the Initializer of the param_attr
is not set, the parameter is initialized with Xavier. Default: None.
bias_attr (ParamAttr|None): The parameter attribute for the bias
of LSTM unit.
If it is set to None or one attribute of ParamAttr, lstm_unit will
create ParamAttr as bias_attr. If the Initializer of the bias_attr
is not set, the bias is initialized as zero. Default: None.
gate_activation (function|None): The activation function for gates (actGate).
Default: 'fluid.layers.sigmoid'
activation (function|None): The activation function for cells (actNode).
Default: 'fluid.layers.tanh'
forget_bias(float|1.0): forget bias used when computing forget gate
dtype(string): data type used in this unit
"""
def __init__(self,
hidden_size,
input_size,
param_attr=None,
bias_attr=None,
gate_activation=None,
activation=None,
forget_bias=1.0,
dtype='float32'):
super(BasicLSTMUnit, self).__init__(dtype)
self._hiden_size = hidden_size
self._param_attr = param_attr
self._bias_attr = bias_attr
self._gate_activation = gate_activation or layers.sigmoid
self._activation = activation or layers.tanh
self._forget_bias = layers.fill_constant(
[1], dtype=dtype, value=forget_bias)
self._forget_bias.stop_gradient = False
self._dtype = dtype
self._input_size = input_size
self._weight = self.create_parameter(
attr=self._param_attr,
shape=[self._input_size + self._hiden_size, 4 * self._hiden_size],
dtype=self._dtype)
self._bias = self.create_parameter(
attr=self._bias_attr,
shape=[4 * self._hiden_size],
dtype=self._dtype,
is_bias=True)
def forward(self, input, pre_hidden, pre_cell):
concat_input_hidden = layers.concat([input, pre_hidden], 1)
gate_input = layers.matmul(x=concat_input_hidden, y=self._weight)
gate_input = layers.elementwise_add(gate_input, self._bias)
i, j, f, o = layers.split(gate_input, num_or_sections=4, dim=-1)
new_cell = layers.elementwise_add(
layers.elementwise_mul(
pre_cell,
layers.sigmoid(layers.elementwise_add(f, self._forget_bias))),
layers.elementwise_mul(layers.sigmoid(i), layers.tanh(j)))
new_hidden = layers.tanh(new_cell) * layers.sigmoid(o)
return new_hidden, new_cell
class MMDNN(object):
"""
MMDNN
"""
def __init__(self, config):
"""
initialize
"""
self.vocab_size = int(config['dict_size'])
self.emb_size = int(config['net']['embedding_dim'])
self.lstm_dim = int(config['net']['lstm_dim'])
self.kernel_size = int(config['net']['num_filters'])
self.win_size1 = int(config['net']['window_size_left'])
self.win_size2 = int(config['net']['window_size_right'])
self.dpool_size1 = int(config['net']['dpool_size_left'])
self.dpool_size2 = int(config['net']['dpool_size_right'])
self.hidden_size = int(config['net']['hidden_size'])
self.seq_len1 = int(config['max_len_left'])
self.seq_len2 = int(config['max_len_right'])
self.task_mode = config['task_mode']
if int(config['match_mask']) != 0:
self.match_mask = True
else:
self.match_mask = False
if self.task_mode == "pointwise":
self.n_class = int(config['n_class'])
self.out_size = self.n_class
elif self.task_mode == "pairwise":
self.out_size = 1
else:
logging.error("training mode not supported")
def embedding_layer(self, input, zero_pad=True, scale=True):
"""
embedding layer
"""
emb = Embedding(
size=[self.vocab_size, self.emb_size],
padding_idx=(0 if zero_pad else None),
param_attr=fluid.ParamAttr(
name="word_embedding", initializer=fluid.initializer.Xavier()))
emb = emb(input)
if scale:
emb = emb * (self.emb_size**0.5)
return emb
def bi_dynamic_lstm(self, input, hidden_size):
"""
bi_lstm layer
"""
fw_in_proj = Linear(
input_dim=self.emb_size,
output_dim=4 * hidden_size,
param_attr=fluid.ParamAttr(name="fw_fc.w"),
bias_attr=False)
fw_in_proj = fw_in_proj(input)
forward = pd_layers.DynamicLSTMLayer(
size=4 * hidden_size,
is_reverse=False,
param_attr=fluid.ParamAttr(name="forward_lstm.w"),
bias_attr=fluid.ParamAttr(name="forward_lstm.b")).ops()
forward = forward(fw_in_proj)
rv_in_proj = Linear(
input_dim=self.emb_size,
output_dim=4 * hidden_size,
param_attr=fluid.ParamAttr(name="rv_fc.w"),
bias_attr=False)
rv_in_proj = rv_in_proj(input)
reverse = pd_layers.DynamicLSTMLayer(
4 * hidden_size,
'lstm'
is_reverse=True,
param_attr=fluid.ParamAttr(name="reverse_lstm.w"),
bias_attr=fluid.ParamAttr(name="reverse_lstm.b")).ops()
reverse = reverse(rv_in_proj)
return [forward, reverse]
def conv_pool_relu_layer(self, input, mask=None):
"""
convolution and pool layer
"""
# data format NCHW
emb_expanded = fluid.layers.unsqueeze(input=input, axes=[1])
# same padding
conv = Conv2d(
num_filters=self.kernel_size,
stride=1,
padding=(int(self.seq_len1 / 2), int(self.seq_len2 // 2)),
filter_size=(self.seq_len1, self.seq_len2),
bias_attr=fluid.ParamAttr(
initializer=fluid.initializer.Constant(0.1)))
conv = conv(emb_expanded)
if mask is not None:
cross_mask = fluid.layers.stack(x=[mask] * self.kernel_size, axis=1)
conv = cross_mask * conv + (1 - cross_mask) * (-2**32 + 1)
# valid padding
pool = fluid.layers.pool2d(
input=conv,
pool_size=[
int(self.seq_len1 / self.dpool_size1),
int(self.seq_len2 / self.dpool_size2)
],
pool_stride=[
int(self.seq_len1 / self.dpool_size1),
int(self.seq_len2 / self.dpool_size2)
],
pool_type="max", )
relu = fluid.layers.relu(pool)
return relu
def get_cross_mask(self, left_lens, right_lens):
"""
cross mask
"""
mask1 = fluid.layers.sequence_mask(
x=left_lens, dtype='float32', maxlen=self.seq_len1 + 1)
mask2 = fluid.layers.sequence_mask(
x=right_lens, dtype='float32', maxlen=self.seq_len2 + 1)
mask1 = fluid.layers.transpose(x=mask1, perm=[0, 2, 1])
cross_mask = fluid.layers.matmul(x=mask1, y=mask2)
return cross_mask
def predict(self, left, right):
"""
Forward network
"""
left_emb = self.embedding_layer(left, zero_pad=True, scale=False)
right_emb = self.embedding_layer(right, zero_pad=True, scale=False)
bi_left_outputs = self.bi_dynamic_lstm(
input=left_emb, hidden_size=self.lstm_dim)
left_seq_encoder = fluid.layers.concat(input=bi_left_outputs, axis=1)
bi_right_outputs = self.bi_dynamic_lstm(
input=right_emb, hidden_size=self.lstm_dim)
right_seq_encoder = fluid.layers.concat(input=bi_right_outputs, axis=1)
pad_value = fluid.layers.assign(input=np.array([0]).astype("float32"))
left_seq_encoder, left_lens = fluid.layers.sequence_pad(
x=left_seq_encoder, pad_value=pad_value, maxlen=self.seq_len1)
right_seq_encoder, right_lens = fluid.layers.sequence_pad(
x=right_seq_encoder, pad_value=pad_value, maxlen=self.seq_len2)
cross = fluid.layers.matmul(
left_seq_encoder, right_seq_encoder, transpose_y=True)
if self.match_mask:
cross_mask = self.get_cross_mask(left_lens, right_lens)
else:
cross_mask = None
conv_pool_relu = self.conv_pool_relu_layer(input=cross, mask=cross_mask)
relu_hid1 = Linear(
input_dim=conv_pool_relu.shape[-1],
output_dim=self.hidden_size)
relu_hid1 = relu_hid1(conv_pool_relu)
relu_hid1 = fluid.layers.tanh(relu_hid1)
relu_hid1 = Linear(
input_dim=relu_hid1.shape[-1],
output_dim=self.out_size)
pred = relu_hid1(pred)
pred = fluid.layers.softmax(pred)
return left_seq_encoder, pred
...@@ -18,9 +18,8 @@ bow class ...@@ -18,9 +18,8 @@ bow class
import paddle_layers as layers import paddle_layers as layers
from paddle import fluid from paddle import fluid
from paddle.fluid.dygraph.base import to_variable from paddle.fluid.dygraph.base import to_variable
from paddle.fluid.dygraph import Layer, Embedding, Linear from paddle.fluid.dygraph import Layer, Linear
import paddle.fluid.param_attr as attr import paddle.fluid.param_attr as attr
uniform_initializer = lambda x: fluid.initializer.UniformInitializer(low=-x, high=x)
class BOW(Layer): class BOW(Layer):
""" """
......
...@@ -27,7 +27,7 @@ import paddle.fluid as fluid ...@@ -27,7 +27,7 @@ import paddle.fluid as fluid
from paddle.fluid import layers from paddle.fluid import layers
import paddle.fluid.param_attr as attr import paddle.fluid.param_attr as attr
import paddle.fluid.layers.utils as utils import paddle.fluid.layers.utils as utils
from paddle.fluid.dygraph import Embedding, Pool2D, Linear, Conv2D, GRUUnit, Layer, to_variable from paddle.fluid.dygraph import Embedding, Conv2D, GRUUnit, Layer, to_variable
from paddle.fluid.layers.utils import map_structure, flatten, pack_sequence_as from paddle.fluid.layers.utils import map_structure, flatten, pack_sequence_as
class EmbeddingLayer(object): class EmbeddingLayer(object):
...@@ -48,7 +48,6 @@ class EmbeddingLayer(object): ...@@ -48,7 +48,6 @@ class EmbeddingLayer(object):
""" """
operation operation
""" """
# name = self.name
emb = Embedding( emb = Embedding(
size=[self.dict_size, self.emb_dim], size=[self.dict_size, self.emb_dim],
is_sparse=True, is_sparse=True,
...@@ -99,7 +98,6 @@ class DynamicGRULayer(object): ...@@ -99,7 +98,6 @@ class DynamicGRULayer(object):
""" """
operation operation
""" """
gru = DynamicGRU( gru = DynamicGRU(
size=self.gru_dim, size=self.gru_dim,
param_attr=attr.ParamAttr(name="%s.w" % self.name), param_attr=attr.ParamAttr(name="%s.w" % self.name),
...@@ -201,7 +199,7 @@ class CrossEntropyLayer(object): ...@@ -201,7 +199,7 @@ class CrossEntropyLayer(object):
""" """
operation operation
""" """
loss = fluid.layers.cross_entropy(input=input, label=label) # no need loss = fluid.layers.cross_entropy(input=input, label=label)
return loss return loss
...@@ -220,7 +218,7 @@ class SoftmaxWithCrossEntropyLayer(object): ...@@ -220,7 +218,7 @@ class SoftmaxWithCrossEntropyLayer(object):
""" """
operation operation
""" """
loss = fluid.layers.softmax_with_cross_entropy( # no need loss = fluid.layers.softmax_with_cross_entropy(
logits=input, label=label) logits=input, label=label)
return loss return loss
...@@ -359,9 +357,7 @@ class SoftsignLayer(object): ...@@ -359,9 +357,7 @@ class SoftsignLayer(object):
return softsign return softsign
class SimpleConvPool(Layer):
# dygraph
class SimpleConvPool(fluid.dygraph.Layer):
def __init__(self, def __init__(self,
num_channels, num_channels,
num_filters, num_filters,
...@@ -574,6 +570,7 @@ class FC(Layer): ...@@ -574,6 +570,7 @@ class FC(Layer):
# Currently, we don't support inplace in dygraph mode # Currently, we don't support inplace in dygraph mode
return self._helper.append_activation(pre_activation, act=self._act) return self._helper.append_activation(pre_activation, act=self._act)
class DynamicGRU(Layer): class DynamicGRU(Layer):
def __init__(self, def __init__(self,
size, size,
...@@ -916,10 +913,6 @@ class RNN(Layer): ...@@ -916,10 +913,6 @@ class RNN(Layer):
return final_outputs, final_states return final_outputs, final_states
from paddle.fluid.dygraph import Embedding, LayerNorm, Linear, Layer, to_variable
place = fluid.CPUPlace()
executor = fluid.Executor(place)
class EncoderCell(RNNUnit): class EncoderCell(RNNUnit):
def __init__(self, num_layers, input_size, hidden_size, dropout_prob=0.): def __init__(self, num_layers, input_size, hidden_size, dropout_prob=0.):
super(EncoderCell, self).__init__() super(EncoderCell, self).__init__()
...@@ -947,6 +940,7 @@ class EncoderCell(RNNUnit): ...@@ -947,6 +940,7 @@ class EncoderCell(RNNUnit):
def state_shape(self): def state_shape(self):
return [cell.state_shape for cell in self.lstm_cells] return [cell.state_shape for cell in self.lstm_cells]
class BasicGRUUnit(Layer): class BasicGRUUnit(Layer):
""" """
**** ****
......
...@@ -254,7 +254,6 @@ def train(conf_dict, args): ...@@ -254,7 +254,6 @@ def train(conf_dict, args):
logging.info("saving infer model in %s" % model_path) logging.info("saving infer model in %s" % model_path)
# used for continuous evaluation # used for continuous evaluation
if args.enable_ce: if args.enable_ce:
# if True:
card_num = get_cards() card_num = get_cards()
ce_loss = 0 ce_loss = 0
ce_time = 0 ce_time = 0
...@@ -334,7 +333,6 @@ def test(conf_dict, args): ...@@ -334,7 +333,6 @@ def test(conf_dict, args):
left_feat, pos_score = net(left, pos_right) left_feat, pos_score = net(left, pos_right)
pred = pos_score pred = pos_score
# pred_list += list(pred.numpy())
pred_list += list(map(lambda item: float(item[0]), pred.numpy())) pred_list += list(map(lambda item: float(item[0]), pred.numpy()))
predictions_file.write(u"\n".join( predictions_file.write(u"\n".join(
...@@ -345,7 +343,6 @@ def test(conf_dict, args): ...@@ -345,7 +343,6 @@ def test(conf_dict, args):
left = fluid.layers.reshape(left, shape=[-1, 1]) left = fluid.layers.reshape(left, shape=[-1, 1])
right = fluid.layers.reshape(right, shape=[-1, 1]) right = fluid.layers.reshape(right, shape=[-1, 1])
left_feat, pred = net(left, right) left_feat, pred = net(left, right)
# pred_list += list(pred.numpy())
pred_list += list(map(lambda item: float(item[0]), pred.numpy())) pred_list += list(map(lambda item: float(item[0]), pred.numpy()))
predictions_file.write(u"\n".join( predictions_file.write(u"\n".join(
......
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册