remove redundant code from dygraph/similarity_net (#4485)

a53a45d6 · 王肖 · GitHub · 6b882d42 · 6b882d42 · a53a45d6
4 changed file
--- a/dygraph/similarity_net/mmdnn.py
+++ b/dygraph/similarity_net/mmdnn.py
-#   Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserved.
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#     http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-"""
-MMDNN class
-"""
-import numpy as np
-import paddle.fluid as fluid
-import logging
-from paddle.fluid.dygraph import Embedding, LayerNorm, Linear, to_variable, Layer, guard
-from paddle.fluid.dygraph.nn import Conv2D
-import paddle_layers as pd_layers
-from paddle.fluid import layers
-from paddle.fluid.dygraph import Layer
-class BasicLSTMUnit(Layer):
-    """
-    ****
-    BasicLSTMUnit class, Using basic operator to build LSTM
-    The algorithm can be described as the code below.
-        .. math::
-           i_t &= \sigma(W_{ix}x_{t} + W_{ih}h_{t-1} + b_i)
-           f_t &= \sigma(W_{fx}x_{t} + W_{fh}h_{t-1} + b_f + forget_bias )
-           o_t &= \sigma(W_{ox}x_{t} + W_{oh}h_{t-1} + b_o)
-           \\tilde{c_t} &= tanh(W_{cx}x_t + W_{ch}h_{t-1} + b_c)
-           c_t &= f_t \odot c_{t-1} + i_t \odot \\tilde{c_t}
-           h_t &= o_t \odot tanh(c_t)
-        - $W$ terms denote weight matrices (e.g. $W_{ix}$ is the matrix
-          of weights from the input gate to the input)
-        - The b terms denote bias vectors ($bx_i$ and $bh_i$ are the input gate bias vector).
-        - sigmoid is the logistic sigmoid function.
-        - $i, f, o$ and $c$ are the input gate, forget gate, output gate,
-          and cell activation vectors, respectively, all of which have the same size as
-          the cell output activation vector $h$.
-        - The :math:`\odot` is the element-wise product of the vectors.
-        - :math:`tanh` is the activation functions.
-        - :math:`\\tilde{c_t}` is also called candidate hidden state,
-          which is computed based on the current input and the previous hidden state.
-    Args:
-        name_scope(string) : The name scope used to identify parameter and bias name
-        hidden_size (integer): The hidden size used in the Unit.
-        param_attr(ParamAttr|None): The parameter attribute for the learnable
-            weight matrix. Note:
-            If it is set to None or one attribute of ParamAttr, lstm_unit will
-            create ParamAttr as param_attr. If the Initializer of the param_attr
-            is not set, the parameter is initialized with Xavier. Default: None.
-        bias_attr (ParamAttr|None): The parameter attribute for the bias
-            of LSTM unit.
-            If it is set to None or one attribute of ParamAttr, lstm_unit will 
-            create ParamAttr as bias_attr. If the Initializer of the bias_attr
-            is not set, the bias is initialized as zero. Default: None.
-        gate_activation (function|None): The activation function for gates (actGate).
-                                  Default: 'fluid.layers.sigmoid'
-        activation (function|None): The activation function for cells (actNode).
-                             Default: 'fluid.layers.tanh'
-        forget_bias(float|1.0): forget bias used when computing forget gate
-        dtype(string): data type used in this unit
-    """
-    def __init__(self,
-                 hidden_size,
-                 input_size,
-                 param_attr=None,
-                 bias_attr=None,
-                 gate_activation=None,
-                 activation=None,
-                 forget_bias=1.0,
-                 dtype='float32'):
-        super(BasicLSTMUnit, self).__init__(dtype)
-        self._hiden_size = hidden_size
-        self._param_attr = param_attr
-        self._bias_attr = bias_attr
-        self._gate_activation = gate_activation or layers.sigmoid
-        self._activation = activation or layers.tanh
-        self._forget_bias = layers.fill_constant(
-            [1], dtype=dtype, value=forget_bias)
-        self._forget_bias.stop_gradient = False
-        self._dtype = dtype
-        self._input_size = input_size
-        self._weight = self.create_parameter(
-            attr=self._param_attr,
-            shape=[self._input_size + self._hiden_size, 4 * self._hiden_size],
-            dtype=self._dtype)
-        self._bias = self.create_parameter(
-            attr=self._bias_attr,
-            shape=[4 * self._hiden_size],
-            dtype=self._dtype,
-            is_bias=True)
-    def forward(self, input, pre_hidden, pre_cell):
-        concat_input_hidden = layers.concat([input, pre_hidden], 1)
-        gate_input = layers.matmul(x=concat_input_hidden, y=self._weight)
-        gate_input = layers.elementwise_add(gate_input, self._bias)
-        i, j, f, o = layers.split(gate_input, num_or_sections=4, dim=-1)
-        new_cell = layers.elementwise_add(
-            layers.elementwise_mul(
-                pre_cell,
-                layers.sigmoid(layers.elementwise_add(f, self._forget_bias))),
-            layers.elementwise_mul(layers.sigmoid(i), layers.tanh(j)))
-        new_hidden = layers.tanh(new_cell) * layers.sigmoid(o)
-        return new_hidden, new_cell
-class MMDNN(object):
-    """
-    MMDNN
-    """
-    def __init__(self, config):
-        """
-        initialize
-        """
-        self.vocab_size = int(config['dict_size'])
-        self.emb_size = int(config['net']['embedding_dim'])
-        self.lstm_dim = int(config['net']['lstm_dim'])
-        self.kernel_size = int(config['net']['num_filters'])
-        self.win_size1 = int(config['net']['window_size_left'])
-        self.win_size2 = int(config['net']['window_size_right'])
-        self.dpool_size1 = int(config['net']['dpool_size_left'])
-        self.dpool_size2 = int(config['net']['dpool_size_right'])
-        self.hidden_size = int(config['net']['hidden_size'])
-        self.seq_len1 = int(config['max_len_left'])
-        self.seq_len2 = int(config['max_len_right'])
-        self.task_mode = config['task_mode']
-        if int(config['match_mask']) != 0:
-            self.match_mask = True
-        else:
-            self.match_mask = False
-        if self.task_mode == "pointwise":
-            self.n_class = int(config['n_class'])
-            self.out_size = self.n_class
-        elif self.task_mode == "pairwise":
-            self.out_size = 1
-        else:
-            logging.error("training mode not supported")
-    def embedding_layer(self, input, zero_pad=True, scale=True):
-        """
-        embedding layer
-        """
-        emb = Embedding(
-            size=[self.vocab_size, self.emb_size],
-            padding_idx=(0 if zero_pad else None),
-            param_attr=fluid.ParamAttr(
-                name="word_embedding", initializer=fluid.initializer.Xavier()))
-        emb = emb(input)
-        if scale:
-            emb = emb * (self.emb_size**0.5)
-        return emb
-    def bi_dynamic_lstm(self, input, hidden_size):
-        """
-        bi_lstm layer
-        """
-        fw_in_proj = Linear(
-            input_dim=self.emb_size,
-            output_dim=4 * hidden_size,
-            param_attr=fluid.ParamAttr(name="fw_fc.w"),
-            bias_attr=False)
-        fw_in_proj = fw_in_proj(input)
-        forward = pd_layers.DynamicLSTMLayer(
-            size=4 * hidden_size,
-            is_reverse=False,
-            param_attr=fluid.ParamAttr(name="forward_lstm.w"),
-            bias_attr=fluid.ParamAttr(name="forward_lstm.b")).ops()
-        forward = forward(fw_in_proj)
-        rv_in_proj = Linear(
-            input_dim=self.emb_size,
-            output_dim=4 * hidden_size,
-            param_attr=fluid.ParamAttr(name="rv_fc.w"),
-            bias_attr=False)
-        rv_in_proj = rv_in_proj(input)
-        reverse = pd_layers.DynamicLSTMLayer(
-            4 * hidden_size,
-            'lstm'
-            is_reverse=True,
-            param_attr=fluid.ParamAttr(name="reverse_lstm.w"),
-            bias_attr=fluid.ParamAttr(name="reverse_lstm.b")).ops()
-        reverse = reverse(rv_in_proj)
-        return [forward, reverse]
-    def conv_pool_relu_layer(self, input, mask=None):
-        """
-        convolution and pool layer
-        """
-        # data format NCHW
-        emb_expanded = fluid.layers.unsqueeze(input=input, axes=[1])
-        # same padding
-        conv = Conv2d(
-            num_filters=self.kernel_size,
-            stride=1,
-            padding=(int(self.seq_len1 / 2), int(self.seq_len2 // 2)),
-            filter_size=(self.seq_len1, self.seq_len2),
-            bias_attr=fluid.ParamAttr(
-                initializer=fluid.initializer.Constant(0.1)))
-        conv = conv(emb_expanded)
-        if mask is not None:
-            cross_mask = fluid.layers.stack(x=[mask] * self.kernel_size, axis=1)
-            conv = cross_mask * conv + (1 - cross_mask) * (-2**32 + 1)
-        # valid padding
-        pool = fluid.layers.pool2d(
-            input=conv,
-            pool_size=[
-                int(self.seq_len1 / self.dpool_size1),
-                int(self.seq_len2 / self.dpool_size2)
-            ],
-            pool_stride=[
-                int(self.seq_len1 / self.dpool_size1),
-                int(self.seq_len2 / self.dpool_size2)
-            ],
-            pool_type="max", )
-        relu = fluid.layers.relu(pool)
-        return relu
-    def get_cross_mask(self, left_lens, right_lens):
-        """
-        cross mask
-        """
-        mask1 = fluid.layers.sequence_mask(
-            x=left_lens, dtype='float32', maxlen=self.seq_len1 + 1)
-        mask2 = fluid.layers.sequence_mask(
-            x=right_lens, dtype='float32', maxlen=self.seq_len2 + 1)
-        mask1 = fluid.layers.transpose(x=mask1, perm=[0, 2, 1])
-        cross_mask = fluid.layers.matmul(x=mask1, y=mask2)
-        return cross_mask
-    def predict(self, left, right):
-        """
-        Forward network
-        """
-        left_emb = self.embedding_layer(left, zero_pad=True, scale=False)
-        right_emb = self.embedding_layer(right, zero_pad=True, scale=False)
-        bi_left_outputs = self.bi_dynamic_lstm(
-            input=left_emb, hidden_size=self.lstm_dim)
-        left_seq_encoder = fluid.layers.concat(input=bi_left_outputs, axis=1)
-        bi_right_outputs = self.bi_dynamic_lstm(
-            input=right_emb, hidden_size=self.lstm_dim)
-        right_seq_encoder = fluid.layers.concat(input=bi_right_outputs, axis=1)
-        pad_value = fluid.layers.assign(input=np.array([0]).astype("float32"))
-        left_seq_encoder, left_lens = fluid.layers.sequence_pad(
-            x=left_seq_encoder, pad_value=pad_value, maxlen=self.seq_len1)
-        right_seq_encoder, right_lens = fluid.layers.sequence_pad(
-            x=right_seq_encoder, pad_value=pad_value, maxlen=self.seq_len2)
-        cross = fluid.layers.matmul(
-            left_seq_encoder, right_seq_encoder, transpose_y=True)
-        if self.match_mask:
-            cross_mask = self.get_cross_mask(left_lens, right_lens)
-        else:
-            cross_mask = None
-        conv_pool_relu = self.conv_pool_relu_layer(input=cross, mask=cross_mask)
-        relu_hid1 = Linear(
-            input_dim=conv_pool_relu.shape[-1],
-            output_dim=self.hidden_size)
-        relu_hid1 = relu_hid1(conv_pool_relu)
-        relu_hid1 = fluid.layers.tanh(relu_hid1)
-        relu_hid1 = Linear(
-            input_dim=relu_hid1.shape[-1],
-            output_dim=self.out_size)
-        pred = relu_hid1(pred)
-        pred = fluid.layers.softmax(pred)
-        return left_seq_encoder, pred
--- a/dygraph/similarity_net/nets/bow.py
+++ b/dygraph/similarity_net/nets/bow.py
@@ -18,9 +18,8 @@ bow class
 import paddle_layers as layers
 from paddle import fluid
 from paddle.fluid.dygraph.base import to_variable
-from paddle.fluid.dygraph import Layer, Embedding, Linear
+from paddle.fluid.dygraph import Layer, Linear
 import paddle.fluid.param_attr as attr
-uniform_initializer = lambda x: fluid.initializer.UniformInitializer(low=-x, high=x)
 class BOW(Layer):
    """

--- a/dygraph/similarity_net/nets/paddle_layers.py
+++ b/dygraph/similarity_net/nets/paddle_layers.py
@@ -27,7 +27,7 @@ import paddle.fluid as fluid
 from paddle.fluid import layers
 import paddle.fluid.param_attr as attr
 import paddle.fluid.layers.utils as utils
-from paddle.fluid.dygraph import Embedding, Pool2D, Linear, Conv2D, GRUUnit, Layer, to_variable
+from paddle.fluid.dygraph import Embedding, Conv2D, GRUUnit, Layer, to_variable
 from paddle.fluid.layers.utils import map_structure, flatten, pack_sequence_as
 class EmbeddingLayer(object):
@@ -48,7 +48,6 @@ class EmbeddingLayer(object):
        """
        operation
        """
-        # name = self.name
        emb = Embedding(
            size=[self.dict_size, self.emb_dim],
            is_sparse=True,
@@ -99,7 +98,6 @@ class DynamicGRULayer(object):
        """
        operation
        """
        gru = DynamicGRU(
            size=self.gru_dim,
            param_attr=attr.ParamAttr(name="%s.w" % self.name),
@@ -201,7 +199,7 @@ class CrossEntropyLayer(object):
        """
        operation
        """
-        loss = fluid.layers.cross_entropy(input=input, label=label)   # no need
+        loss = fluid.layers.cross_entropy(input=input, label=label)
        return loss
@@ -220,7 +218,7 @@ class SoftmaxWithCrossEntropyLayer(object):
        """
        operation
        """
-        loss = fluid.layers.softmax_with_cross_entropy(   # no need
+        loss = fluid.layers.softmax_with_cross_entropy(
            logits=input, label=label)
        return loss
@@ -359,9 +357,7 @@ class SoftsignLayer(object):
        return softsign
+class SimpleConvPool(Layer):
-# dygraph 
-class SimpleConvPool(fluid.dygraph.Layer):
    def __init__(self,
                 num_channels,
                 num_filters,
@@ -574,6 +570,7 @@ class FC(Layer):
        # Currently, we don't support inplace in dygraph mode
        return self._helper.append_activation(pre_activation, act=self._act)
 class DynamicGRU(Layer):
    def __init__(self,
                 size,
@@ -916,10 +913,6 @@ class RNN(Layer):
        return final_outputs, final_states
-from paddle.fluid.dygraph import Embedding, LayerNorm, Linear, Layer, to_variable
-place = fluid.CPUPlace()
-executor = fluid.Executor(place)
 class EncoderCell(RNNUnit):
    def __init__(self, num_layers, input_size, hidden_size, dropout_prob=0.):
        super(EncoderCell, self).__init__()
@@ -947,6 +940,7 @@ class EncoderCell(RNNUnit):
    def state_shape(self):
        return [cell.state_shape for cell in self.lstm_cells]
 class BasicGRUUnit(Layer):
    """
    ****

--- a/dygraph/similarity_net/run_classifier.py
+++ b/dygraph/similarity_net/run_classifier.py
@@ -254,7 +254,6 @@ def train(conf_dict, args):
        logging.info("saving infer model in %s" % model_path)
        # used for continuous evaluation
        if args.enable_ce:
-        # if True:
            card_num = get_cards()
            ce_loss = 0
            ce_time = 0
@@ -334,7 +333,6 @@ def test(conf_dict, args):
                    left_feat, pos_score = net(left, pos_right)
                    pred = pos_score
-                    # pred_list += list(pred.numpy())
                    pred_list += list(map(lambda item: float(item[0]), pred.numpy()))
                    predictions_file.write(u"\n".join(
@@ -345,7 +343,6 @@ def test(conf_dict, args):
                    left = fluid.layers.reshape(left, shape=[-1, 1])
                    right = fluid.layers.reshape(right, shape=[-1, 1])
                    left_feat, pred = net(left, right)
-                    # pred_list += list(pred.numpy())
                    pred_list += list(map(lambda item: float(item[0]), pred.numpy()))
                    predictions_file.write(u"\n".join(