From a53a45d68b9cf921c88b67f37dd8f36d9e9ac681 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=E7=8E=8B=E8=82=96?= Date: Thu, 26 Mar 2020 18:37:27 +0800 Subject: [PATCH] remove redundant code from dygraph/similarity_net (#4485) --- dygraph/similarity_net/mmdnn.py | 296 ------------------- dygraph/similarity_net/nets/bow.py | 3 +- dygraph/similarity_net/nets/paddle_layers.py | 18 +- dygraph/similarity_net/run_classifier.py | 3 - 4 files changed, 7 insertions(+), 313 deletions(-) delete mode 100644 dygraph/similarity_net/mmdnn.py diff --git a/dygraph/similarity_net/mmdnn.py b/dygraph/similarity_net/mmdnn.py deleted file mode 100644 index 149cdae5..00000000 --- a/dygraph/similarity_net/mmdnn.py +++ /dev/null @@ -1,296 +0,0 @@ -# Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -""" -MMDNN class -""" -import numpy as np -import paddle.fluid as fluid -import logging -from paddle.fluid.dygraph import Embedding, LayerNorm, Linear, to_variable, Layer, guard -from paddle.fluid.dygraph.nn import Conv2D - -import paddle_layers as pd_layers - -from paddle.fluid import layers -from paddle.fluid.dygraph import Layer - -class BasicLSTMUnit(Layer): - """ - **** - BasicLSTMUnit class, Using basic operator to build LSTM - The algorithm can be described as the code below. - .. math:: - i_t &= \sigma(W_{ix}x_{t} + W_{ih}h_{t-1} + b_i) - f_t &= \sigma(W_{fx}x_{t} + W_{fh}h_{t-1} + b_f + forget_bias ) - o_t &= \sigma(W_{ox}x_{t} + W_{oh}h_{t-1} + b_o) - \\tilde{c_t} &= tanh(W_{cx}x_t + W_{ch}h_{t-1} + b_c) - c_t &= f_t \odot c_{t-1} + i_t \odot \\tilde{c_t} - h_t &= o_t \odot tanh(c_t) - - $W$ terms denote weight matrices (e.g. $W_{ix}$ is the matrix - of weights from the input gate to the input) - - The b terms denote bias vectors ($bx_i$ and $bh_i$ are the input gate bias vector). - - sigmoid is the logistic sigmoid function. - - $i, f, o$ and $c$ are the input gate, forget gate, output gate, - and cell activation vectors, respectively, all of which have the same size as - the cell output activation vector $h$. - - The :math:`\odot` is the element-wise product of the vectors. - - :math:`tanh` is the activation functions. - - :math:`\\tilde{c_t}` is also called candidate hidden state, - which is computed based on the current input and the previous hidden state. - Args: - name_scope(string) : The name scope used to identify parameter and bias name - hidden_size (integer): The hidden size used in the Unit. - param_attr(ParamAttr|None): The parameter attribute for the learnable - weight matrix. Note: - If it is set to None or one attribute of ParamAttr, lstm_unit will - create ParamAttr as param_attr. If the Initializer of the param_attr - is not set, the parameter is initialized with Xavier. Default: None. - bias_attr (ParamAttr|None): The parameter attribute for the bias - of LSTM unit. - If it is set to None or one attribute of ParamAttr, lstm_unit will - create ParamAttr as bias_attr. If the Initializer of the bias_attr - is not set, the bias is initialized as zero. Default: None. - gate_activation (function|None): The activation function for gates (actGate). - Default: 'fluid.layers.sigmoid' - activation (function|None): The activation function for cells (actNode). - Default: 'fluid.layers.tanh' - forget_bias(float|1.0): forget bias used when computing forget gate - dtype(string): data type used in this unit - """ - - def __init__(self, - hidden_size, - input_size, - param_attr=None, - bias_attr=None, - gate_activation=None, - activation=None, - forget_bias=1.0, - dtype='float32'): - super(BasicLSTMUnit, self).__init__(dtype) - - self._hiden_size = hidden_size - self._param_attr = param_attr - self._bias_attr = bias_attr - self._gate_activation = gate_activation or layers.sigmoid - self._activation = activation or layers.tanh - self._forget_bias = layers.fill_constant( - [1], dtype=dtype, value=forget_bias) - self._forget_bias.stop_gradient = False - self._dtype = dtype - self._input_size = input_size - - self._weight = self.create_parameter( - attr=self._param_attr, - shape=[self._input_size + self._hiden_size, 4 * self._hiden_size], - dtype=self._dtype) - - self._bias = self.create_parameter( - attr=self._bias_attr, - shape=[4 * self._hiden_size], - dtype=self._dtype, - is_bias=True) - - def forward(self, input, pre_hidden, pre_cell): - concat_input_hidden = layers.concat([input, pre_hidden], 1) - gate_input = layers.matmul(x=concat_input_hidden, y=self._weight) - - gate_input = layers.elementwise_add(gate_input, self._bias) - i, j, f, o = layers.split(gate_input, num_or_sections=4, dim=-1) - new_cell = layers.elementwise_add( - layers.elementwise_mul( - pre_cell, - layers.sigmoid(layers.elementwise_add(f, self._forget_bias))), - layers.elementwise_mul(layers.sigmoid(i), layers.tanh(j))) - new_hidden = layers.tanh(new_cell) * layers.sigmoid(o) - - return new_hidden, new_cell - - -class MMDNN(object): - """ - MMDNN - """ - - def __init__(self, config): - """ - initialize - """ - self.vocab_size = int(config['dict_size']) - self.emb_size = int(config['net']['embedding_dim']) - self.lstm_dim = int(config['net']['lstm_dim']) - self.kernel_size = int(config['net']['num_filters']) - self.win_size1 = int(config['net']['window_size_left']) - self.win_size2 = int(config['net']['window_size_right']) - self.dpool_size1 = int(config['net']['dpool_size_left']) - self.dpool_size2 = int(config['net']['dpool_size_right']) - self.hidden_size = int(config['net']['hidden_size']) - self.seq_len1 = int(config['max_len_left']) - self.seq_len2 = int(config['max_len_right']) - self.task_mode = config['task_mode'] - - if int(config['match_mask']) != 0: - self.match_mask = True - else: - self.match_mask = False - - if self.task_mode == "pointwise": - self.n_class = int(config['n_class']) - self.out_size = self.n_class - elif self.task_mode == "pairwise": - self.out_size = 1 - else: - logging.error("training mode not supported") - - def embedding_layer(self, input, zero_pad=True, scale=True): - """ - embedding layer - """ - emb = Embedding( - size=[self.vocab_size, self.emb_size], - padding_idx=(0 if zero_pad else None), - param_attr=fluid.ParamAttr( - name="word_embedding", initializer=fluid.initializer.Xavier())) - emb = emb(input) - if scale: - emb = emb * (self.emb_size**0.5) - return emb - - def bi_dynamic_lstm(self, input, hidden_size): - """ - bi_lstm layer - """ - fw_in_proj = Linear( - input_dim=self.emb_size, - output_dim=4 * hidden_size, - param_attr=fluid.ParamAttr(name="fw_fc.w"), - bias_attr=False) - fw_in_proj = fw_in_proj(input) - - forward = pd_layers.DynamicLSTMLayer( - size=4 * hidden_size, - is_reverse=False, - param_attr=fluid.ParamAttr(name="forward_lstm.w"), - bias_attr=fluid.ParamAttr(name="forward_lstm.b")).ops() - - forward = forward(fw_in_proj) - - rv_in_proj = Linear( - input_dim=self.emb_size, - output_dim=4 * hidden_size, - param_attr=fluid.ParamAttr(name="rv_fc.w"), - bias_attr=False) - rv_in_proj = rv_in_proj(input) - - reverse = pd_layers.DynamicLSTMLayer( - 4 * hidden_size, - 'lstm' - is_reverse=True, - param_attr=fluid.ParamAttr(name="reverse_lstm.w"), - bias_attr=fluid.ParamAttr(name="reverse_lstm.b")).ops() - reverse = reverse(rv_in_proj) - - return [forward, reverse] - - def conv_pool_relu_layer(self, input, mask=None): - """ - convolution and pool layer - """ - # data format NCHW - emb_expanded = fluid.layers.unsqueeze(input=input, axes=[1]) - # same padding - - conv = Conv2d( - num_filters=self.kernel_size, - stride=1, - padding=(int(self.seq_len1 / 2), int(self.seq_len2 // 2)), - filter_size=(self.seq_len1, self.seq_len2), - bias_attr=fluid.ParamAttr( - initializer=fluid.initializer.Constant(0.1))) - conv = conv(emb_expanded) - - if mask is not None: - cross_mask = fluid.layers.stack(x=[mask] * self.kernel_size, axis=1) - conv = cross_mask * conv + (1 - cross_mask) * (-2**32 + 1) - # valid padding - pool = fluid.layers.pool2d( - input=conv, - pool_size=[ - int(self.seq_len1 / self.dpool_size1), - int(self.seq_len2 / self.dpool_size2) - ], - pool_stride=[ - int(self.seq_len1 / self.dpool_size1), - int(self.seq_len2 / self.dpool_size2) - ], - pool_type="max", ) - - relu = fluid.layers.relu(pool) - return relu - - def get_cross_mask(self, left_lens, right_lens): - """ - cross mask - """ - mask1 = fluid.layers.sequence_mask( - x=left_lens, dtype='float32', maxlen=self.seq_len1 + 1) - mask2 = fluid.layers.sequence_mask( - x=right_lens, dtype='float32', maxlen=self.seq_len2 + 1) - - mask1 = fluid.layers.transpose(x=mask1, perm=[0, 2, 1]) - cross_mask = fluid.layers.matmul(x=mask1, y=mask2) - return cross_mask - - def predict(self, left, right): - """ - Forward network - """ - left_emb = self.embedding_layer(left, zero_pad=True, scale=False) - right_emb = self.embedding_layer(right, zero_pad=True, scale=False) - - bi_left_outputs = self.bi_dynamic_lstm( - input=left_emb, hidden_size=self.lstm_dim) - left_seq_encoder = fluid.layers.concat(input=bi_left_outputs, axis=1) - bi_right_outputs = self.bi_dynamic_lstm( - input=right_emb, hidden_size=self.lstm_dim) - right_seq_encoder = fluid.layers.concat(input=bi_right_outputs, axis=1) - - pad_value = fluid.layers.assign(input=np.array([0]).astype("float32")) - left_seq_encoder, left_lens = fluid.layers.sequence_pad( - x=left_seq_encoder, pad_value=pad_value, maxlen=self.seq_len1) - right_seq_encoder, right_lens = fluid.layers.sequence_pad( - x=right_seq_encoder, pad_value=pad_value, maxlen=self.seq_len2) - - cross = fluid.layers.matmul( - left_seq_encoder, right_seq_encoder, transpose_y=True) - if self.match_mask: - cross_mask = self.get_cross_mask(left_lens, right_lens) - else: - cross_mask = None - - conv_pool_relu = self.conv_pool_relu_layer(input=cross, mask=cross_mask) - relu_hid1 = Linear( - input_dim=conv_pool_relu.shape[-1], - output_dim=self.hidden_size) - relu_hid1 = relu_hid1(conv_pool_relu) - relu_hid1 = fluid.layers.tanh(relu_hid1) - - relu_hid1 = Linear( - input_dim=relu_hid1.shape[-1], - output_dim=self.out_size) - pred = relu_hid1(pred) - - pred = fluid.layers.softmax(pred) - - return left_seq_encoder, pred diff --git a/dygraph/similarity_net/nets/bow.py b/dygraph/similarity_net/nets/bow.py index 407125b0..e00428aa 100644 --- a/dygraph/similarity_net/nets/bow.py +++ b/dygraph/similarity_net/nets/bow.py @@ -18,9 +18,8 @@ bow class import paddle_layers as layers from paddle import fluid from paddle.fluid.dygraph.base import to_variable -from paddle.fluid.dygraph import Layer, Embedding, Linear +from paddle.fluid.dygraph import Layer, Linear import paddle.fluid.param_attr as attr -uniform_initializer = lambda x: fluid.initializer.UniformInitializer(low=-x, high=x) class BOW(Layer): """ diff --git a/dygraph/similarity_net/nets/paddle_layers.py b/dygraph/similarity_net/nets/paddle_layers.py index 8c32076b..edbe4c55 100644 --- a/dygraph/similarity_net/nets/paddle_layers.py +++ b/dygraph/similarity_net/nets/paddle_layers.py @@ -27,7 +27,7 @@ import paddle.fluid as fluid from paddle.fluid import layers import paddle.fluid.param_attr as attr import paddle.fluid.layers.utils as utils -from paddle.fluid.dygraph import Embedding, Pool2D, Linear, Conv2D, GRUUnit, Layer, to_variable +from paddle.fluid.dygraph import Embedding, Conv2D, GRUUnit, Layer, to_variable from paddle.fluid.layers.utils import map_structure, flatten, pack_sequence_as class EmbeddingLayer(object): @@ -48,7 +48,6 @@ class EmbeddingLayer(object): """ operation """ - # name = self.name emb = Embedding( size=[self.dict_size, self.emb_dim], is_sparse=True, @@ -99,7 +98,6 @@ class DynamicGRULayer(object): """ operation """ - gru = DynamicGRU( size=self.gru_dim, param_attr=attr.ParamAttr(name="%s.w" % self.name), @@ -201,7 +199,7 @@ class CrossEntropyLayer(object): """ operation """ - loss = fluid.layers.cross_entropy(input=input, label=label) # no need + loss = fluid.layers.cross_entropy(input=input, label=label) return loss @@ -220,7 +218,7 @@ class SoftmaxWithCrossEntropyLayer(object): """ operation """ - loss = fluid.layers.softmax_with_cross_entropy( # no need + loss = fluid.layers.softmax_with_cross_entropy( logits=input, label=label) return loss @@ -359,9 +357,7 @@ class SoftsignLayer(object): return softsign - -# dygraph -class SimpleConvPool(fluid.dygraph.Layer): +class SimpleConvPool(Layer): def __init__(self, num_channels, num_filters, @@ -574,6 +570,7 @@ class FC(Layer): # Currently, we don't support inplace in dygraph mode return self._helper.append_activation(pre_activation, act=self._act) + class DynamicGRU(Layer): def __init__(self, size, @@ -916,10 +913,6 @@ class RNN(Layer): return final_outputs, final_states -from paddle.fluid.dygraph import Embedding, LayerNorm, Linear, Layer, to_variable -place = fluid.CPUPlace() -executor = fluid.Executor(place) - class EncoderCell(RNNUnit): def __init__(self, num_layers, input_size, hidden_size, dropout_prob=0.): super(EncoderCell, self).__init__() @@ -947,6 +940,7 @@ class EncoderCell(RNNUnit): def state_shape(self): return [cell.state_shape for cell in self.lstm_cells] + class BasicGRUUnit(Layer): """ **** diff --git a/dygraph/similarity_net/run_classifier.py b/dygraph/similarity_net/run_classifier.py index a0695dd4..ff82fdfb 100644 --- a/dygraph/similarity_net/run_classifier.py +++ b/dygraph/similarity_net/run_classifier.py @@ -254,7 +254,6 @@ def train(conf_dict, args): logging.info("saving infer model in %s" % model_path) # used for continuous evaluation if args.enable_ce: - # if True: card_num = get_cards() ce_loss = 0 ce_time = 0 @@ -334,7 +333,6 @@ def test(conf_dict, args): left_feat, pos_score = net(left, pos_right) pred = pos_score - # pred_list += list(pred.numpy()) pred_list += list(map(lambda item: float(item[0]), pred.numpy())) predictions_file.write(u"\n".join( @@ -345,7 +343,6 @@ def test(conf_dict, args): left = fluid.layers.reshape(left, shape=[-1, 1]) right = fluid.layers.reshape(right, shape=[-1, 1]) left_feat, pred = net(left, right) - # pred_list += list(pred.numpy()) pred_list += list(map(lambda item: float(item[0]), pred.numpy())) predictions_file.write(u"\n".join( -- GitLab