From a53a45d68b9cf921c88b67f37dd8f36d9e9ac681 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?=E7=8E=8B=E8=82=96?= <zko1021@foxmail.com>
Date: Thu, 26 Mar 2020 18:37:27 +0800
Subject: [PATCH] remove redundant code from dygraph/similarity_net (#4485)

---
 dygraph/similarity_net/mmdnn.py              | 296 -------------------
 dygraph/similarity_net/nets/bow.py           |   3 +-
 dygraph/similarity_net/nets/paddle_layers.py |  18 +-
 dygraph/similarity_net/run_classifier.py     |   3 -
 4 files changed, 7 insertions(+), 313 deletions(-)
 delete mode 100644 dygraph/similarity_net/mmdnn.py

diff --git a/dygraph/similarity_net/mmdnn.py b/dygraph/similarity_net/mmdnn.py
deleted file mode 100644
index 149cdae5..00000000
--- a/dygraph/similarity_net/mmdnn.py
+++ /dev/null
@@ -1,296 +0,0 @@
-#   Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserved.
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#     http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-"""
-MMDNN class
-"""
-import numpy as np
-import paddle.fluid as fluid
-import logging
-from paddle.fluid.dygraph import Embedding, LayerNorm, Linear, to_variable, Layer, guard
-from paddle.fluid.dygraph.nn import Conv2D
-
-import paddle_layers as pd_layers
-
-from paddle.fluid import layers
-from paddle.fluid.dygraph import Layer
-
-class BasicLSTMUnit(Layer):
-    """
-    ****
-    BasicLSTMUnit class, Using basic operator to build LSTM
-    The algorithm can be described as the code below.
-        .. math::
-           i_t &= \sigma(W_{ix}x_{t} + W_{ih}h_{t-1} + b_i)
-           f_t &= \sigma(W_{fx}x_{t} + W_{fh}h_{t-1} + b_f + forget_bias )
-           o_t &= \sigma(W_{ox}x_{t} + W_{oh}h_{t-1} + b_o)
-           \\tilde{c_t} &= tanh(W_{cx}x_t + W_{ch}h_{t-1} + b_c)
-           c_t &= f_t \odot c_{t-1} + i_t \odot \\tilde{c_t}
-           h_t &= o_t \odot tanh(c_t)
-        - $W$ terms denote weight matrices (e.g. $W_{ix}$ is the matrix
-          of weights from the input gate to the input)
-        - The b terms denote bias vectors ($bx_i$ and $bh_i$ are the input gate bias vector).
-        - sigmoid is the logistic sigmoid function.
-        - $i, f, o$ and $c$ are the input gate, forget gate, output gate,
-          and cell activation vectors, respectively, all of which have the same size as
-          the cell output activation vector $h$.
-        - The :math:`\odot` is the element-wise product of the vectors.
-        - :math:`tanh` is the activation functions.
-        - :math:`\\tilde{c_t}` is also called candidate hidden state,
-          which is computed based on the current input and the previous hidden state.
-    Args:
-        name_scope(string) : The name scope used to identify parameter and bias name
-        hidden_size (integer): The hidden size used in the Unit.
-        param_attr(ParamAttr|None): The parameter attribute for the learnable
-            weight matrix. Note:
-            If it is set to None or one attribute of ParamAttr, lstm_unit will
-            create ParamAttr as param_attr. If the Initializer of the param_attr
-            is not set, the parameter is initialized with Xavier. Default: None.
-        bias_attr (ParamAttr|None): The parameter attribute for the bias
-            of LSTM unit.
-            If it is set to None or one attribute of ParamAttr, lstm_unit will 
-            create ParamAttr as bias_attr. If the Initializer of the bias_attr
-            is not set, the bias is initialized as zero. Default: None.
-        gate_activation (function|None): The activation function for gates (actGate).
-                                  Default: 'fluid.layers.sigmoid'
-        activation (function|None): The activation function for cells (actNode).
-                             Default: 'fluid.layers.tanh'
-        forget_bias(float|1.0): forget bias used when computing forget gate
-        dtype(string): data type used in this unit
-    """
-
-    def __init__(self,
-                 hidden_size,
-                 input_size,
-                 param_attr=None,
-                 bias_attr=None,
-                 gate_activation=None,
-                 activation=None,
-                 forget_bias=1.0,
-                 dtype='float32'):
-        super(BasicLSTMUnit, self).__init__(dtype)
-
-        self._hiden_size = hidden_size
-        self._param_attr = param_attr
-        self._bias_attr = bias_attr
-        self._gate_activation = gate_activation or layers.sigmoid
-        self._activation = activation or layers.tanh
-        self._forget_bias = layers.fill_constant(
-            [1], dtype=dtype, value=forget_bias)
-        self._forget_bias.stop_gradient = False
-        self._dtype = dtype
-        self._input_size = input_size
-
-        self._weight = self.create_parameter(
-            attr=self._param_attr,
-            shape=[self._input_size + self._hiden_size, 4 * self._hiden_size],
-            dtype=self._dtype)
-
-        self._bias = self.create_parameter(
-            attr=self._bias_attr,
-            shape=[4 * self._hiden_size],
-            dtype=self._dtype,
-            is_bias=True)
-
-    def forward(self, input, pre_hidden, pre_cell):
-        concat_input_hidden = layers.concat([input, pre_hidden], 1)
-        gate_input = layers.matmul(x=concat_input_hidden, y=self._weight)
-
-        gate_input = layers.elementwise_add(gate_input, self._bias)
-        i, j, f, o = layers.split(gate_input, num_or_sections=4, dim=-1)
-        new_cell = layers.elementwise_add(
-            layers.elementwise_mul(
-                pre_cell,
-                layers.sigmoid(layers.elementwise_add(f, self._forget_bias))),
-            layers.elementwise_mul(layers.sigmoid(i), layers.tanh(j)))
-        new_hidden = layers.tanh(new_cell) * layers.sigmoid(o)
-
-        return new_hidden, new_cell
-
-
-class MMDNN(object):
-    """
-    MMDNN
-    """
-
-    def __init__(self, config):
-        """
-        initialize
-        """
-        self.vocab_size = int(config['dict_size'])
-        self.emb_size = int(config['net']['embedding_dim'])
-        self.lstm_dim = int(config['net']['lstm_dim'])
-        self.kernel_size = int(config['net']['num_filters'])
-        self.win_size1 = int(config['net']['window_size_left'])
-        self.win_size2 = int(config['net']['window_size_right'])
-        self.dpool_size1 = int(config['net']['dpool_size_left'])
-        self.dpool_size2 = int(config['net']['dpool_size_right'])
-        self.hidden_size = int(config['net']['hidden_size'])
-        self.seq_len1 = int(config['max_len_left'])
-        self.seq_len2 = int(config['max_len_right'])
-        self.task_mode = config['task_mode']
-
-        if int(config['match_mask']) != 0:
-            self.match_mask = True
-        else:
-            self.match_mask = False
-
-        if self.task_mode == "pointwise":
-            self.n_class = int(config['n_class'])
-            self.out_size = self.n_class
-        elif self.task_mode == "pairwise":
-            self.out_size = 1
-        else:
-            logging.error("training mode not supported")
-
-    def embedding_layer(self, input, zero_pad=True, scale=True):
-        """
-        embedding layer
-        """
-        emb = Embedding(
-            size=[self.vocab_size, self.emb_size],
-            padding_idx=(0 if zero_pad else None),
-            param_attr=fluid.ParamAttr(
-                name="word_embedding", initializer=fluid.initializer.Xavier()))
-        emb = emb(input)
-        if scale:
-            emb = emb * (self.emb_size**0.5)
-        return emb
-
-    def bi_dynamic_lstm(self, input, hidden_size):
-        """
-        bi_lstm layer
-        """
-        fw_in_proj = Linear(
-            input_dim=self.emb_size,
-            output_dim=4 * hidden_size,
-            param_attr=fluid.ParamAttr(name="fw_fc.w"),
-            bias_attr=False)
-        fw_in_proj = fw_in_proj(input)
-
-        forward = pd_layers.DynamicLSTMLayer(
-            size=4 * hidden_size,
-            is_reverse=False,
-            param_attr=fluid.ParamAttr(name="forward_lstm.w"),
-            bias_attr=fluid.ParamAttr(name="forward_lstm.b")).ops()
-
-        forward = forward(fw_in_proj)
-
-        rv_in_proj = Linear(
-            input_dim=self.emb_size,
-            output_dim=4 * hidden_size,
-            param_attr=fluid.ParamAttr(name="rv_fc.w"),
-            bias_attr=False)
-        rv_in_proj = rv_in_proj(input)
-
-        reverse = pd_layers.DynamicLSTMLayer(
-            4 * hidden_size,
-            'lstm'
-            is_reverse=True,
-            param_attr=fluid.ParamAttr(name="reverse_lstm.w"),
-            bias_attr=fluid.ParamAttr(name="reverse_lstm.b")).ops()
-        reverse = reverse(rv_in_proj)
-
-        return [forward, reverse]
-
-    def conv_pool_relu_layer(self, input, mask=None):
-        """
-        convolution and pool layer
-        """
-        # data format NCHW
-        emb_expanded = fluid.layers.unsqueeze(input=input, axes=[1])
-        # same padding
-
-        conv = Conv2d(
-            num_filters=self.kernel_size,
-            stride=1,
-            padding=(int(self.seq_len1 / 2), int(self.seq_len2 // 2)),
-            filter_size=(self.seq_len1, self.seq_len2),
-            bias_attr=fluid.ParamAttr(
-                initializer=fluid.initializer.Constant(0.1)))
-        conv = conv(emb_expanded)
-
-        if mask is not None:
-            cross_mask = fluid.layers.stack(x=[mask] * self.kernel_size, axis=1)
-            conv = cross_mask * conv + (1 - cross_mask) * (-2**32 + 1)
-        # valid padding
-        pool = fluid.layers.pool2d(
-            input=conv,
-            pool_size=[
-                int(self.seq_len1 / self.dpool_size1),
-                int(self.seq_len2 / self.dpool_size2)
-            ],
-            pool_stride=[
-                int(self.seq_len1 / self.dpool_size1),
-                int(self.seq_len2 / self.dpool_size2)
-            ],
-            pool_type="max", )
-
-        relu = fluid.layers.relu(pool)
-        return relu
-
-    def get_cross_mask(self, left_lens, right_lens):
-        """
-        cross mask
-        """
-        mask1 = fluid.layers.sequence_mask(
-            x=left_lens, dtype='float32', maxlen=self.seq_len1 + 1)
-        mask2 = fluid.layers.sequence_mask(
-            x=right_lens, dtype='float32', maxlen=self.seq_len2 + 1)
-
-        mask1 = fluid.layers.transpose(x=mask1, perm=[0, 2, 1])
-        cross_mask = fluid.layers.matmul(x=mask1, y=mask2)
-        return cross_mask
-
-    def predict(self, left, right):
-        """
-        Forward network
-        """
-        left_emb = self.embedding_layer(left, zero_pad=True, scale=False)
-        right_emb = self.embedding_layer(right, zero_pad=True, scale=False)
-
-        bi_left_outputs = self.bi_dynamic_lstm(
-            input=left_emb, hidden_size=self.lstm_dim)
-        left_seq_encoder = fluid.layers.concat(input=bi_left_outputs, axis=1)
-        bi_right_outputs = self.bi_dynamic_lstm(
-            input=right_emb, hidden_size=self.lstm_dim)
-        right_seq_encoder = fluid.layers.concat(input=bi_right_outputs, axis=1)
-
-        pad_value = fluid.layers.assign(input=np.array([0]).astype("float32"))
-        left_seq_encoder, left_lens = fluid.layers.sequence_pad(
-            x=left_seq_encoder, pad_value=pad_value, maxlen=self.seq_len1)
-        right_seq_encoder, right_lens = fluid.layers.sequence_pad(
-            x=right_seq_encoder, pad_value=pad_value, maxlen=self.seq_len2)
-
-        cross = fluid.layers.matmul(
-            left_seq_encoder, right_seq_encoder, transpose_y=True)
-        if self.match_mask:
-            cross_mask = self.get_cross_mask(left_lens, right_lens)
-        else:
-            cross_mask = None
-
-        conv_pool_relu = self.conv_pool_relu_layer(input=cross, mask=cross_mask)
-        relu_hid1 = Linear(
-            input_dim=conv_pool_relu.shape[-1],
-            output_dim=self.hidden_size)
-        relu_hid1 = relu_hid1(conv_pool_relu)
-        relu_hid1 = fluid.layers.tanh(relu_hid1)
-
-        relu_hid1 = Linear(
-            input_dim=relu_hid1.shape[-1],
-            output_dim=self.out_size)
-        pred = relu_hid1(pred)
-
-        pred = fluid.layers.softmax(pred)
-
-        return left_seq_encoder, pred
diff --git a/dygraph/similarity_net/nets/bow.py b/dygraph/similarity_net/nets/bow.py
index 407125b0..e00428aa 100644
--- a/dygraph/similarity_net/nets/bow.py
+++ b/dygraph/similarity_net/nets/bow.py
@@ -18,9 +18,8 @@ bow class
 import paddle_layers as layers
 from paddle import fluid
 from paddle.fluid.dygraph.base import to_variable
-from paddle.fluid.dygraph import Layer, Embedding, Linear
+from paddle.fluid.dygraph import Layer, Linear
 import paddle.fluid.param_attr as attr
-uniform_initializer = lambda x: fluid.initializer.UniformInitializer(low=-x, high=x)
 
 class BOW(Layer):
     """
diff --git a/dygraph/similarity_net/nets/paddle_layers.py b/dygraph/similarity_net/nets/paddle_layers.py
index 8c32076b..edbe4c55 100644
--- a/dygraph/similarity_net/nets/paddle_layers.py
+++ b/dygraph/similarity_net/nets/paddle_layers.py
@@ -27,7 +27,7 @@ import paddle.fluid as fluid
 from paddle.fluid import layers
 import paddle.fluid.param_attr as attr
 import paddle.fluid.layers.utils as utils
-from paddle.fluid.dygraph import Embedding, Pool2D, Linear, Conv2D, GRUUnit, Layer, to_variable
+from paddle.fluid.dygraph import Embedding, Conv2D, GRUUnit, Layer, to_variable
 from paddle.fluid.layers.utils import map_structure, flatten, pack_sequence_as
 
 class EmbeddingLayer(object):
@@ -48,7 +48,6 @@ class EmbeddingLayer(object):
         """
         operation
         """
-        # name = self.name
         emb = Embedding(
             size=[self.dict_size, self.emb_dim],
             is_sparse=True,
@@ -99,7 +98,6 @@ class DynamicGRULayer(object):
         """
         operation
         """
-
         gru = DynamicGRU(
             size=self.gru_dim,
             param_attr=attr.ParamAttr(name="%s.w" % self.name),
@@ -201,7 +199,7 @@ class CrossEntropyLayer(object):
         """
         operation
         """
-        loss = fluid.layers.cross_entropy(input=input, label=label)   # no need
+        loss = fluid.layers.cross_entropy(input=input, label=label)
         return loss
 
 
@@ -220,7 +218,7 @@ class SoftmaxWithCrossEntropyLayer(object):
         """
         operation
         """
-        loss = fluid.layers.softmax_with_cross_entropy(   # no need
+        loss = fluid.layers.softmax_with_cross_entropy(
             logits=input, label=label)
         return loss
 
@@ -359,9 +357,7 @@ class SoftsignLayer(object):
         return softsign
 
 
-
-# dygraph 
-class SimpleConvPool(fluid.dygraph.Layer):
+class SimpleConvPool(Layer):
     def __init__(self,
                  num_channels,
                  num_filters,
@@ -574,6 +570,7 @@ class FC(Layer):
         # Currently, we don't support inplace in dygraph mode
         return self._helper.append_activation(pre_activation, act=self._act)
 
+
 class DynamicGRU(Layer):
     def __init__(self,
                  size,
@@ -916,10 +913,6 @@ class RNN(Layer):
         return final_outputs, final_states
 
 
-from paddle.fluid.dygraph import Embedding, LayerNorm, Linear, Layer, to_variable
-place = fluid.CPUPlace()
-executor = fluid.Executor(place)
-
 class EncoderCell(RNNUnit):
     def __init__(self, num_layers, input_size, hidden_size, dropout_prob=0.):
         super(EncoderCell, self).__init__()
@@ -947,6 +940,7 @@ class EncoderCell(RNNUnit):
     def state_shape(self):
         return [cell.state_shape for cell in self.lstm_cells]
 
+
 class BasicGRUUnit(Layer):
     """
     ****
diff --git a/dygraph/similarity_net/run_classifier.py b/dygraph/similarity_net/run_classifier.py
index a0695dd4..ff82fdfb 100644
--- a/dygraph/similarity_net/run_classifier.py
+++ b/dygraph/similarity_net/run_classifier.py
@@ -254,7 +254,6 @@ def train(conf_dict, args):
         logging.info("saving infer model in %s" % model_path)
         # used for continuous evaluation
         if args.enable_ce:
-        # if True:
             card_num = get_cards()
             ce_loss = 0
             ce_time = 0
@@ -334,7 +333,6 @@ def test(conf_dict, args):
                 
                     left_feat, pos_score = net(left, pos_right)
                     pred = pos_score
-                    # pred_list += list(pred.numpy())
 
                     pred_list += list(map(lambda item: float(item[0]), pred.numpy()))
                     predictions_file.write(u"\n".join(
@@ -345,7 +343,6 @@ def test(conf_dict, args):
                     left = fluid.layers.reshape(left, shape=[-1, 1])
                     right = fluid.layers.reshape(right, shape=[-1, 1])
                     left_feat, pred = net(left, right)
-                    # pred_list += list(pred.numpy())
 
                     pred_list += list(map(lambda item: float(item[0]), pred.numpy()))
                     predictions_file.write(u"\n".join(
-- 
GitLab