From 851c8c52798d31506fed32b3b79959fd3b6fca47 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=E7=8E=8B=E8=82=96?= Date: Sun, 1 Mar 2020 00:27:04 +0800 Subject: [PATCH] fix bugs of PaddleNLP/similarity_net & dygraph/similarity_net (#4362) * Update README.md (#4267) * test=develop (#4269) * 3d use new api (#4275) * PointNet++ and PointRCNN use new API * Update Readme of Dygraph BERT (#4277) Fix some typos. * Update run_classifier_multi_gpu.sh (#4279) remove the CUDA_VISIBLE_DEVICES * Update README.md (#4280) * add similarity_net dygraph * fix similarity_net dygraph * fix bugs of dygraph/similarity_net * Fix some bugs running on the GPU of dygraph/similarity_net * fix a bug in pointwise mode of dygraph/similarity_net * fix a bug of paddleNLP/similarity_net * fix a bug and remove unuse files of dygraph/similarity_net Co-authored-by: pkpk Co-authored-by: Kaipeng Deng --- PaddleNLP/similarity_net/run_classifier.py | 41 +- dygraph/similarity_net/nets/base_layers.py | 66 -- dygraph/similarity_net/nets/copy.py | 762 ------------------ dygraph/similarity_net/nets/mm_dnn.py | 4 +- .../similarity_net/nets/paddle_layers.1.py | 457 ----------- 5 files changed, 22 insertions(+), 1308 deletions(-) delete mode 100644 dygraph/similarity_net/nets/base_layers.py delete mode 100644 dygraph/similarity_net/nets/copy.py delete mode 100644 dygraph/similarity_net/nets/paddle_layers.1.py diff --git a/PaddleNLP/similarity_net/run_classifier.py b/PaddleNLP/similarity_net/run_classifier.py index 922b84f8..77271c46 100644 --- a/PaddleNLP/similarity_net/run_classifier.py +++ b/PaddleNLP/similarity_net/run_classifier.py @@ -53,12 +53,12 @@ def create_model(args, pyreader_name, is_inference=False, is_pointwise=False): """ if is_inference: inf_pyreader = fluid.layers.py_reader( - capacity=16, - shapes=([-1, 1], [-1, 1]), - dtypes=('int64', 'int64'), - lod_levels=(1, 1), - name=pyreader_name, - use_double_buffer=False) + capacity=16, + shapes=([-1], [-1]), + dtypes=('int64', 'int64'), + lod_levels=(1, 1), + name=pyreader_name, + use_double_buffer=False) left, pos_right = fluid.layers.read_file(inf_pyreader) return inf_pyreader, left, pos_right @@ -66,27 +66,26 @@ def create_model(args, pyreader_name, is_inference=False, is_pointwise=False): else: if is_pointwise: pointwise_pyreader = fluid.layers.py_reader( - capacity=16, - shapes=([-1, 1], [-1, 1], [-1, 1]), - dtypes=('int64', 'int64', 'int64'), - lod_levels=(1, 1, 0), - name=pyreader_name, - use_double_buffer=False) + capacity=16, + shapes=([-1], [-1], [-1]), + dtypes=('int64', 'int64', 'int64'), + lod_levels=(1, 1, 0), + name=pyreader_name, + use_double_buffer=False) left, right, label = fluid.layers.read_file(pointwise_pyreader) return pointwise_pyreader, left, right, label else: pairwise_pyreader = fluid.layers.py_reader( - capacity=16, - shapes=([-1, 1], [-1, 1], [-1, 1]), - dtypes=('int64', 'int64', 'int64'), - lod_levels=(1, 1, 1), - name=pyreader_name, - use_double_buffer=False) - - left, pos_right, neg_right = fluid.layers.read_file( - pairwise_pyreader) + capacity=16, + shapes=([-1], [-1], [-1]), + dtypes=('int64', 'int64', 'int64'), + lod_levels=(1, 1, 1), + name=pyreader_name, + use_double_buffer=False) + + left, pos_right, neg_right = fluid.layers.read_file(pairwise_pyreader) return pairwise_pyreader, left, pos_right, neg_right diff --git a/dygraph/similarity_net/nets/base_layers.py b/dygraph/similarity_net/nets/base_layers.py deleted file mode 100644 index cd055315..00000000 --- a/dygraph/similarity_net/nets/base_layers.py +++ /dev/null @@ -1,66 +0,0 @@ -# Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -""" -base layers -""" - -from paddle.fluid import layers -from paddle.fluid.dygraph import Layer -from paddle.fluid.dygraph import GRUUnit -from paddle.fluid.dygraph.base import to_variable - - - - -# import numpy as np -# import logging - - -class DynamicGRU(Layer): - def __init__(self, - size, - param_attr=None, - bias_attr=None, - is_reverse=False, - gate_activation='sigmoid', - candidate_activation='tanh', - h_0=None, - origin_mode=False, - init_size = None): - super(DynamicGRU, self).__init__() - self.gru_unit = GRUUnit( - size * 3, - param_attr=param_attr, - bias_attr=bias_attr, - activation=candidate_activation, - gate_activation=gate_activation, - origin_mode=origin_mode) - self.size = size - self.h_0 = h_0 - self.is_reverse = is_reverse - def forward(self, inputs): - hidden = self.h_0 - res = [] - for i in range(inputs.shape[1]): - if self.is_reverse: - i = inputs.shape[1] - 1 - i - input_ = inputs[ :, i:i+1, :] - input_ = fluid.layers.reshape(input_, [-1, input_.shape[2]], inplace=False) - hidden, reset, gate = self.gru_unit(input_, hidden) - hidden_ = fluid.layers.reshape(hidden, [-1, 1, hidden.shape[1]], inplace=False) - res.append(hidden_) - if self.is_reverse: - res = res[::-1] - res = fluid.layers.concat(res, axis=1) - return res \ No newline at end of file diff --git a/dygraph/similarity_net/nets/copy.py b/dygraph/similarity_net/nets/copy.py deleted file mode 100644 index 17fe99ca..00000000 --- a/dygraph/similarity_net/nets/copy.py +++ /dev/null @@ -1,762 +0,0 @@ -# Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -from paddle.fluid import layers, unique_name -from paddle.fluid.dygraph import Layer -from paddle.fluid.dygraph.layer_object_helper import LayerObjectHelper -from paddle.fluid.layers.control_flow import StaticRNN - -__all__ = ['BasicGRUUnit', 'basic_gru', 'BasicLSTMUnit', 'basic_lstm'] - - -class BasicGRUUnit(Layer): - """ - **** - BasicGRUUnit class, using basic operators to build GRU - The algorithm can be described as the equations below. - - .. math:: - u_t & = actGate(W_ux xu_{t} + W_uh h_{t-1} + b_u) - - r_t & = actGate(W_rx xr_{t} + W_rh h_{t-1} + b_r) - - m_t & = actNode(W_cx xm_t + W_ch dot(r_t, h_{t-1}) + b_m) - - h_t & = dot(u_t, h_{t-1}) + dot((1-u_t), m_t) - - Args: - name_scope(string) : The name scope used to identify parameters and biases - hidden_size (integer): The hidden size used in the Unit. - param_attr(ParamAttr|None): The parameter attribute for the learnable - weight matrix. Note: - If it is set to None or one attribute of ParamAttr, gru_unit will - create ParamAttr as param_attr. If the Initializer of the param_attr - is not set, the parameter is initialized with Xavier. Default: None. - bias_attr (ParamAttr|None): The parameter attribute for the bias - of GRU unit. - If it is set to None or one attribute of ParamAttr, gru_unit will - create ParamAttr as bias_attr. If the Initializer of the bias_attr - is not set, the bias is initialized zero. Default: None. - gate_activation (function|None): The activation function for gates (actGate). - Default: 'fluid.layers.sigmoid' - activation (function|None): The activation function for cell (actNode). - Default: 'fluid.layers.tanh' - dtype(string): data type used in this unit - - Examples: - - .. code-block:: python - - import paddle.fluid.layers as layers - from paddle.fluid.contrib.layers import BasicGRUUnit - - input_size = 128 - hidden_size = 256 - input = layers.data( name = "input", shape = [-1, input_size], dtype='float32') - pre_hidden = layers.data( name = "pre_hidden", shape=[-1, hidden_size], dtype='float32') - - gru_unit = BasicGRUUnit( "gru_unit", hidden_size ) - - new_hidden = gru_unit( input, pre_hidden ) - - """ - - def __init__(self, - name_scope, - hidden_size, - param_attr=None, - bias_attr=None, - gate_activation=None, - activation=None, - dtype='float32'): - super(BasicGRUUnit, self).__init__(name_scope, dtype) - # reserve old school _full_name and _helper for static graph save load - self._full_name = unique_name.generate(name_scope + "/" + - self.__class__.__name__) - self._helper = LayerObjectHelper(self._full_name) - - self._name = name_scope - self._hiden_size = hidden_size - self._param_attr = param_attr - self._bias_attr = bias_attr - self._gate_activation = gate_activation or layers.sigmoid - self._activation = activation or layers.tanh - self._dtype = dtype - - def _build_once(self, input, pre_hidden): - self._input_size = input.shape[-1] - assert (self._input_size > 0) - - self._gate_weight = self.create_parameter( - attr=self._param_attr, - shape=[self._input_size + self._hiden_size, 2 * self._hiden_size], - dtype=self._dtype) - - self._candidate_weight = self.create_parameter( - attr=self._param_attr, - shape=[self._input_size + self._hiden_size, self._hiden_size], - dtype=self._dtype) - - self._gate_bias = self.create_parameter( - attr=self._bias_attr, - shape=[2 * self._hiden_size], - dtype=self._dtype, - is_bias=True) - self._candidate_bias = self.create_parameter( - attr=self._bias_attr, - shape=[self._hiden_size], - dtype=self._dtype, - is_bias=True) - - def forward(self, input, pre_hidden): - concat_input_hidden = layers.concat([input, pre_hidden], 1) - - gate_input = layers.matmul(x=concat_input_hidden, y=self._gate_weight) - - gate_input = layers.elementwise_add(gate_input, self._gate_bias) - - gate_input = self._gate_activation(gate_input) - r, u = layers.split(gate_input, num_or_sections=2, dim=1) - - r_hidden = r * pre_hidden - - candidate = layers.matmul( - layers.concat([input, r_hidden], 1), self._candidate_weight) - candidate = layers.elementwise_add(candidate, self._candidate_bias) - - c = self._activation(candidate) - new_hidden = u * pre_hidden + (1 - u) * c - - return new_hidden - - -def basic_gru(input, - init_hidden, - hidden_size, - num_layers=1, - sequence_length=None, - dropout_prob=0.0, - bidirectional=False, - batch_first=True, - param_attr=None, - bias_attr=None, - gate_activation=None, - activation=None, - dtype='float32', - name='basic_gru'): - """ - GRU implementation using basic operator, supports multiple layers and bidirection gru. - - .. math:: - u_t & = actGate(W_ux xu_{t} + W_uh h_{t-1} + b_u) - - r_t & = actGate(W_rx xr_{t} + W_rh h_{t-1} + b_r) - - m_t & = actNode(W_cx xm_t + W_ch dot(r_t, h_{t-1}) + b_m) - - h_t & = dot(u_t, h_{t-1}) + dot((1-u_t), m_t) - - Args: - input (Variable): GRU input tensor, - if batch_first = False, shape should be ( seq_len x batch_size x input_size ) - if batch_first = True, shape should be ( batch_size x seq_len x hidden_size ) - init_hidden(Variable|None): The initial hidden state of the GRU - This is a tensor with shape ( num_layers x batch_size x hidden_size) - if is_bidirec = True, shape should be ( num_layers*2 x batch_size x hidden_size) - and can be reshaped to tensor with ( num_layers x 2 x batch_size x hidden_size) to use. - If it's None, it will be set to all 0. - hidden_size (int): Hidden size of the GRU - num_layers (int): The total number of layers of the GRU - sequence_length (Variabe|None): A Tensor (shape [batch_size]) stores each real length of each instance, - This tensor will be convert to a mask to mask the padding ids - If it's None means NO padding ids - dropout_prob(float|0.0): Dropout prob, dropout ONLY works after rnn output of earch layers, - NOT between time steps - bidirectional (bool|False): If it is bidirectional - batch_first (bool|True): The shape format of the input and output tensors. If true, - the shape format should be :attr:`[batch_size, seq_len, hidden_size]`. If false, - the shape format should be :attr:`[seq_len, batch_size, hidden_size]`. By default - this function accepts input and emits output in batch-major form to be consistent - with most of data format, though a bit less efficient because of extra transposes. - param_attr(ParamAttr|None): The parameter attribute for the learnable - weight matrix. Note: - If it is set to None or one attribute of ParamAttr, gru_unit will - create ParamAttr as param_attr. If the Initializer of the param_attr - is not set, the parameter is initialized with Xavier. Default: None. - bias_attr (ParamAttr|None): The parameter attribute for the bias - of GRU unit. - If it is set to None or one attribute of ParamAttr, gru_unit will - create ParamAttr as bias_attr. If the Initializer of the bias_attr - is not set, the bias is initialized zero. Default: None. - gate_activation (function|None): The activation function for gates (actGate). - Default: 'fluid.layers.sigmoid' - activation (function|None): The activation function for cell (actNode). - Default: 'fluid.layers.tanh' - dtype(string): data type used in this unit - name(string): name used to identify parameters and biases - - Returns: - rnn_out(Tensor),last_hidden(Tensor) - - rnn_out is result of GRU hidden, with shape (seq_len x batch_size x hidden_size) \ - if is_bidirec set to True, shape will be ( seq_len x batch_sze x hidden_size*2) - - last_hidden is the hidden state of the last step of GRU \ - shape is ( num_layers x batch_size x hidden_size ) \ - if is_bidirec set to True, shape will be ( num_layers*2 x batch_size x hidden_size), - can be reshaped to a tensor with shape( num_layers x 2 x batch_size x hidden_size) - - Examples: - .. code-block:: python - - import paddle.fluid.layers as layers - from paddle.fluid.contrib.layers import basic_gru - - batch_size = 20 - input_size = 128 - hidden_size = 256 - num_layers = 2 - dropout = 0.5 - bidirectional = True - batch_first = False - - input = layers.data( name = "input", shape = [-1, batch_size, input_size], dtype='float32') - pre_hidden = layers.data( name = "pre_hidden", shape=[-1, hidden_size], dtype='float32') - sequence_length = layers.data( name="sequence_length", shape=[-1], dtype='int32') - - - rnn_out, last_hidden = basic_gru( input, pre_hidden, hidden_size, num_layers = num_layers, \ - sequence_length = sequence_length, dropout_prob=dropout, bidirectional = bidirectional, \ - batch_first = batch_first) - - """ - - fw_unit_list = [] - - for i in range(num_layers): - new_name = name + "_layers_" + str(i) - fw_unit_list.append( - BasicGRUUnit(new_name, hidden_size, param_attr, bias_attr, - gate_activation, activation, dtype)) - if bidirectional: - bw_unit_list = [] - - for i in range(num_layers): - new_name = name + "_reverse_layers_" + str(i) - bw_unit_list.append( - BasicGRUUnit(new_name, hidden_size, param_attr, bias_attr, - gate_activation, activation, dtype)) - - if batch_first: - input = layers.transpose(input, [1, 0, 2]) - - mask = None - if sequence_length: - max_seq_len = layers.shape(input)[0] - mask = layers.sequence_mask( - sequence_length, maxlen=max_seq_len, dtype='float32') - mask = layers.transpose(mask, [1, 0]) - - direc_num = 1 - if bidirectional: - direc_num = 2 - if init_hidden: - init_hidden = layers.reshape( - init_hidden, shape=[num_layers, direc_num, -1, hidden_size]) - - def get_single_direction_output(rnn_input, - unit_list, - mask=None, - direc_index=0): - rnn = StaticRNN() - with rnn.step(): - step_input = rnn.step_input(rnn_input) - - if mask: - step_mask = rnn.step_input(mask) - - for i in range(num_layers): - if init_hidden: - pre_hidden = rnn.memory(init=init_hidden[i, direc_index]) - else: - pre_hidden = rnn.memory( - batch_ref=rnn_input, - shape=[-1, hidden_size], - ref_batch_dim_idx=1) - - new_hidden = unit_list[i](step_input, pre_hidden) - - if mask: - new_hidden = layers.elementwise_mul( - new_hidden, step_mask, axis=0) - layers.elementwise_mul( - pre_hidden, (step_mask - 1), axis=0) - rnn.update_memory(pre_hidden, new_hidden) - - rnn.step_output(new_hidden) - - step_input = new_hidden - if dropout_prob != None and dropout_prob > 0.0: - step_input = layers.dropout( - step_input, - dropout_prob=dropout_prob, ) - - rnn.step_output(step_input) - - rnn_out = rnn() - - last_hidden_array = [] - rnn_output = rnn_out[-1] - for i in range(num_layers): - last_hidden = rnn_out[i] - last_hidden = last_hidden[-1] - last_hidden_array.append(last_hidden) - - last_hidden_output = layers.concat(last_hidden_array, axis=0) - last_hidden_output = layers.reshape( - last_hidden_output, shape=[num_layers, -1, hidden_size]) - - return rnn_output, last_hidden_output - # seq_len, batch_size, hidden_size - - fw_rnn_out, fw_last_hidden = get_single_direction_output( - input, fw_unit_list, mask, direc_index=0) - - if bidirectional: - bw_input = layers.reverse(input, axis=[0]) - bw_mask = None - if mask: - bw_mask = layers.reverse(mask, axis=[0]) - bw_rnn_out, bw_last_hidden = get_single_direction_output( - bw_input, bw_unit_list, bw_mask, direc_index=1) - - bw_rnn_out = layers.reverse(bw_rnn_out, axis=[0]) - - rnn_out = layers.concat([fw_rnn_out, bw_rnn_out], axis=2) - last_hidden = layers.concat([fw_last_hidden, bw_last_hidden], axis=1) - - last_hidden = layers.reshape( - last_hidden, shape=[num_layers * direc_num, -1, hidden_size]) - - if batch_first: - rnn_out = layers.transpose(rnn_out, [1, 0, 2]) - return rnn_out, last_hidden - else: - - rnn_out = fw_rnn_out - last_hidden = fw_last_hidden - - if batch_first: - rnn_out = layers.transpose(rnn_out, [1, 0, 2]) - - return rnn_out, last_hidden - - -def basic_lstm(input, - init_hidden, - init_cell, - hidden_size, - num_layers=1, - sequence_length=None, - dropout_prob=0.0, - bidirectional=False, - batch_first=True, - param_attr=None, - bias_attr=None, - gate_activation=None, - activation=None, - forget_bias=1.0, - dtype='float32', - name='basic_lstm'): - """ - LSTM implementation using basic operators, supports multiple layers and bidirection LSTM. - - .. math:: - i_t &= \sigma(W_{ix}x_{t} + W_{ih}h_{t-1} + b_i) - - f_t &= \sigma(W_{fx}x_{t} + W_{fh}h_{t-1} + b_f + forget_bias ) - - o_t &= \sigma(W_{ox}x_{t} + W_{oh}h_{t-1} + b_o) - - \\tilde{c_t} &= tanh(W_{cx}x_t + W_{ch}h_{t-1} + b_c) - - c_t &= f_t \odot c_{t-1} + i_t \odot \\tilde{c_t} - - h_t &= o_t \odot tanh(c_t) - - Args: - input (Variable): lstm input tensor, - if batch_first = False, shape should be ( seq_len x batch_size x input_size ) - if batch_first = True, shape should be ( batch_size x seq_len x hidden_size ) - init_hidden(Variable|None): The initial hidden state of the LSTM - This is a tensor with shape ( num_layers x batch_size x hidden_size) - if is_bidirec = True, shape should be ( num_layers*2 x batch_size x hidden_size) - and can be reshaped to a tensor with shape ( num_layers x 2 x batch_size x hidden_size) to use. - If it's None, it will be set to all 0. - init_cell(Variable|None): The initial hidden state of the LSTM - This is a tensor with shape ( num_layers x batch_size x hidden_size) - if is_bidirec = True, shape should be ( num_layers*2 x batch_size x hidden_size) - and can be reshaped to a tensor with shape ( num_layers x 2 x batch_size x hidden_size) to use. - If it's None, it will be set to all 0. - hidden_size (int): Hidden size of the LSTM - num_layers (int): The total number of layers of the LSTM - sequence_length (Variabe|None): A tensor (shape [batch_size]) stores each real length of each instance, - This tensor will be convert to a mask to mask the padding ids - If it's None means NO padding ids - dropout_prob(float|0.0): Dropout prob, dropout ONLY work after rnn output of earch layers, - NOT between time steps - bidirectional (bool|False): If it is bidirectional - batch_first (bool|True): The shape format of the input and output tensors. If true, - the shape format should be :attr:`[batch_size, seq_len, hidden_size]`. If false, - the shape format should be :attr:`[seq_len, batch_size, hidden_size]`. By default - this function accepts input and emits output in batch-major form to be consistent - with most of data format, though a bit less efficient because of extra transposes. - param_attr(ParamAttr|None): The parameter attribute for the learnable - weight matrix. Note: - If it is set to None or one attribute of ParamAttr, lstm_unit will - create ParamAttr as param_attr. If the Initializer of the param_attr - is not set, the parameter is initialized with Xavier. Default: None. - bias_attr (ParamAttr|None): The parameter attribute for the bias - of LSTM unit. - If it is set to None or one attribute of ParamAttr, lstm_unit will - create ParamAttr as bias_attr. If the Initializer of the bias_attr - is not set, the bias is initialized zero. Default: None. - gate_activation (function|None): The activation function for gates (actGate). - Default: 'fluid.layers.sigmoid' - activation (function|None): The activation function for cell (actNode). - Default: 'fluid.layers.tanh' - forget_bias (float|1.0) : Forget bias used to compute the forget gate - dtype(string): Data type used in this unit - name(string): Name used to identify parameters and biases - - Returns: - rnn_out(Tensor), last_hidden(Tensor), last_cell(Tensor) - - rnn_out is the result of LSTM hidden, shape is (seq_len x batch_size x hidden_size) \ - if is_bidirec set to True, it's shape will be ( seq_len x batch_sze x hidden_size*2) - - last_hidden is the hidden state of the last step of LSTM \ - with shape ( num_layers x batch_size x hidden_size ) \ - if is_bidirec set to True, it's shape will be ( num_layers*2 x batch_size x hidden_size), - and can be reshaped to a tensor ( num_layers x 2 x batch_size x hidden_size) to use. - - last_cell is the hidden state of the last step of LSTM \ - with shape ( num_layers x batch_size x hidden_size ) \ - if is_bidirec set to True, it's shape will be ( num_layers*2 x batch_size x hidden_size), - and can be reshaped to a tensor ( num_layers x 2 x batch_size x hidden_size) to use. - - Examples: - .. code-block:: python - - import paddle.fluid.layers as layers - from paddle.fluid.contrib.layers import basic_lstm - - batch_size = 20 - input_size = 128 - hidden_size = 256 - num_layers = 2 - dropout = 0.5 - bidirectional = True - batch_first = False - - input = layers.data( name = "input", shape = [-1, batch_size, input_size], dtype='float32') - pre_hidden = layers.data( name = "pre_hidden", shape=[-1, hidden_size], dtype='float32') - pre_cell = layers.data( name = "pre_cell", shape=[-1, hidden_size], dtype='float32') - sequence_length = layers.data( name="sequence_length", shape=[-1], dtype='int32') - - rnn_out, last_hidden, last_cell = basic_lstm( input, pre_hidden, pre_cell, \ - hidden_size, num_layers = num_layers, \ - sequence_length = sequence_length, dropout_prob=dropout, bidirectional = bidirectional, \ - batch_first = batch_first) - - """ - fw_unit_list = [] - - for i in range(num_layers): - new_name = name + "_layers_" + str(i) - fw_unit_list.append( - BasicLSTMUnit( - new_name, - hidden_size, - param_attr=param_attr, - bias_attr=bias_attr, - gate_activation=gate_activation, - activation=activation, - forget_bias=forget_bias, - dtype=dtype)) - if bidirectional: - bw_unit_list = [] - - for i in range(num_layers): - new_name = name + "_reverse_layers_" + str(i) - bw_unit_list.append( - BasicLSTMUnit( - new_name, - hidden_size, - param_attr=param_attr, - bias_attr=bias_attr, - gate_activation=gate_activation, - activation=activation, - forget_bias=forget_bias, - dtype=dtype)) - - if batch_first: - input = layers.transpose(input, [1, 0, 2]) - - mask = None - if sequence_length: - max_seq_len = layers.shape(input)[0] - mask = layers.sequence_mask( - sequence_length, maxlen=max_seq_len, dtype='float32') - - mask = layers.transpose(mask, [1, 0]) - - direc_num = 1 - if bidirectional: - direc_num = 2 - # convert to [num_layers, 2, batch_size, hidden_size] - if init_hidden: - init_hidden = layers.reshape( - init_hidden, shape=[num_layers, direc_num, -1, hidden_size]) - init_cell = layers.reshape( - init_cell, shape=[num_layers, direc_num, -1, hidden_size]) - - # forward direction - def get_single_direction_output(rnn_input, - unit_list, - mask=None, - direc_index=0): - rnn = StaticRNN() - with rnn.step(): - step_input = rnn.step_input(rnn_input) - - if mask: - step_mask = rnn.step_input(mask) - - for i in range(num_layers): - if init_hidden: - pre_hidden = rnn.memory(init=init_hidden[i, direc_index]) - pre_cell = rnn.memory(init=init_cell[i, direc_index]) - else: - pre_hidden = rnn.memory( - batch_ref=rnn_input, shape=[-1, hidden_size]) - pre_cell = rnn.memory( - batch_ref=rnn_input, shape=[-1, hidden_size]) - - new_hidden, new_cell = unit_list[i](step_input, pre_hidden, - pre_cell) - - if mask: - new_hidden = layers.elementwise_mul( - new_hidden, step_mask, axis=0) - layers.elementwise_mul( - pre_hidden, (step_mask - 1), axis=0) - new_cell = layers.elementwise_mul( - new_cell, step_mask, axis=0) - layers.elementwise_mul( - pre_cell, (step_mask - 1), axis=0) - - rnn.update_memory(pre_hidden, new_hidden) - rnn.update_memory(pre_cell, new_cell) - - rnn.step_output(new_hidden) - rnn.step_output(new_cell) - - step_input = new_hidden - if dropout_prob != None and dropout_prob > 0.0: - step_input = layers.dropout( - step_input, - dropout_prob=dropout_prob, - dropout_implementation='upscale_in_train') - - rnn.step_output(step_input) - - rnn_out = rnn() - - last_hidden_array = [] - last_cell_array = [] - rnn_output = rnn_out[-1] - for i in range(num_layers): - last_hidden = rnn_out[i * 2] - last_hidden = last_hidden[-1] - last_hidden_array.append(last_hidden) - last_cell = rnn_out[i * 2 + 1] - last_cell = last_cell[-1] - last_cell_array.append(last_cell) - - last_hidden_output = layers.concat(last_hidden_array, axis=0) - last_hidden_output = layers.reshape( - last_hidden_output, shape=[num_layers, -1, hidden_size]) - last_cell_output = layers.concat(last_cell_array, axis=0) - last_cell_output = layers.reshape( - last_cell_output, shape=[num_layers, -1, hidden_size]) - - return rnn_output, last_hidden_output, last_cell_output - # seq_len, batch_size, hidden_size - - fw_rnn_out, fw_last_hidden, fw_last_cell = get_single_direction_output( - input, fw_unit_list, mask, direc_index=0) - - if bidirectional: - bw_input = layers.reverse(input, axis=[0]) - bw_mask = None - if mask: - bw_mask = layers.reverse(mask, axis=[0]) - bw_rnn_out, bw_last_hidden, bw_last_cell = get_single_direction_output( - bw_input, bw_unit_list, bw_mask, direc_index=1) - - bw_rnn_out = layers.reverse(bw_rnn_out, axis=[0]) - - rnn_out = layers.concat([fw_rnn_out, bw_rnn_out], axis=2) - last_hidden = layers.concat([fw_last_hidden, bw_last_hidden], axis=1) - last_hidden = layers.reshape( - last_hidden, shape=[num_layers * direc_num, -1, hidden_size]) - - last_cell = layers.concat([fw_last_cell, bw_last_cell], axis=1) - last_cell = layers.reshape( - last_cell, shape=[num_layers * direc_num, -1, hidden_size]) - - if batch_first: - rnn_out = layers.transpose(rnn_out, [1, 0, 2]) - return rnn_out, last_hidden, last_cell - else: - - rnn_out = fw_rnn_out - last_hidden = fw_last_hidden - last_cell = fw_last_cell - - if batch_first: - rnn_out = layers.transpose(rnn_out, [1, 0, 2]) - - return rnn_out, last_hidden, last_cell - - -class BasicLSTMUnit(Layer): - """ - **** - BasicLSTMUnit class, Using basic operator to build LSTM - The algorithm can be described as the code below. - - .. math:: - - i_t &= \sigma(W_{ix}x_{t} + W_{ih}h_{t-1} + b_i) - - f_t &= \sigma(W_{fx}x_{t} + W_{fh}h_{t-1} + b_f + forget_bias ) - - o_t &= \sigma(W_{ox}x_{t} + W_{oh}h_{t-1} + b_o) - - \\tilde{c_t} &= tanh(W_{cx}x_t + W_{ch}h_{t-1} + b_c) - - c_t &= f_t \odot c_{t-1} + i_t \odot \\tilde{c_t} - - h_t &= o_t \odot tanh(c_t) - - - $W$ terms denote weight matrices (e.g. $W_{ix}$ is the matrix - of weights from the input gate to the input) - - The b terms denote bias vectors ($bx_i$ and $bh_i$ are the input gate bias vector). - - sigmoid is the logistic sigmoid function. - - $i, f, o$ and $c$ are the input gate, forget gate, output gate, - and cell activation vectors, respectively, all of which have the same size as - the cell output activation vector $h$. - - The :math:`\odot` is the element-wise product of the vectors. - - :math:`tanh` is the activation functions. - - :math:`\\tilde{c_t}` is also called candidate hidden state, - which is computed based on the current input and the previous hidden state. - - Args: - name_scope(string) : The name scope used to identify parameter and bias name - hidden_size (integer): The hidden size used in the Unit. - param_attr(ParamAttr|None): The parameter attribute for the learnable - weight matrix. Note: - If it is set to None or one attribute of ParamAttr, lstm_unit will - create ParamAttr as param_attr. If the Initializer of the param_attr - is not set, the parameter is initialized with Xavier. Default: None. - bias_attr (ParamAttr|None): The parameter attribute for the bias - of LSTM unit. - If it is set to None or one attribute of ParamAttr, lstm_unit will - create ParamAttr as bias_attr. If the Initializer of the bias_attr - is not set, the bias is initialized as zero. Default: None. - gate_activation (function|None): The activation function for gates (actGate). - Default: 'fluid.layers.sigmoid' - activation (function|None): The activation function for cells (actNode). - Default: 'fluid.layers.tanh' - forget_bias(float|1.0): forget bias used when computing forget gate - dtype(string): data type used in this unit - - Examples: - - .. code-block:: python - - import paddle.fluid.layers as layers - from paddle.fluid.contrib.layers import BasicLSTMUnit - - input_size = 128 - hidden_size = 256 - input = layers.data( name = "input", shape = [-1, input_size], dtype='float32') - pre_hidden = layers.data( name = "pre_hidden", shape=[-1, hidden_size], dtype='float32') - pre_cell = layers.data( name = "pre_cell", shape=[-1, hidden_size], dtype='float32') - - lstm_unit = BasicLSTMUnit( "gru_unit", hidden_size) - - new_hidden, new_cell = lstm_unit( input, pre_hidden, pre_cell ) - - """ - - def __init__(self, - name_scope, - hidden_size, - param_attr=None, - bias_attr=None, - gate_activation=None, - activation=None, - forget_bias=1.0, - dtype='float32'): - super(BasicLSTMUnit, self).__init__(name_scope, dtype) - # reserve old school _full_name and _helper for static graph save load - self._full_name = unique_name.generate(name_scope + "/" + - self.__class__.__name__) - self._helper = LayerObjectHelper(self._full_name) - - self._name = name_scope - self._hiden_size = hidden_size - self._param_attr = param_attr - self._bias_attr = bias_attr - self._gate_activation = gate_activation or layers.sigmoid - self._activation = activation or layers.tanh - self._forget_bias = layers.fill_constant( - [1], dtype=dtype, value=forget_bias) - self._forget_bias.stop_gradient = False - self._dtype = dtype - - def _build_once(self, input, pre_hidden, pre_cell): - self._input_size = input.shape[-1] - assert (self._input_size > 0) - - self._weight = self.create_parameter( - attr=self._param_attr, - shape=[self._input_size + self._hiden_size, 4 * self._hiden_size], - dtype=self._dtype) - - self._bias = self.create_parameter( - attr=self._bias_attr, - shape=[4 * self._hiden_size], - dtype=self._dtype, - is_bias=True) - - def forward(self, input, pre_hidden, pre_cell): - concat_input_hidden = layers.concat([input, pre_hidden], 1) - gate_input = layers.matmul(x=concat_input_hidden, y=self._weight) - - gate_input = layers.elementwise_add(gate_input, self._bias) - i, j, f, o = layers.split(gate_input, num_or_sections=4, dim=-1) - new_cell = layers.elementwise_add( - layers.elementwise_mul( - pre_cell, - layers.sigmoid(layers.elementwise_add(f, self._forget_bias))), - layers.elementwise_mul(layers.sigmoid(i), layers.tanh(j))) - new_hidden = layers.tanh(new_cell) * layers.sigmoid(o) - - return new_hidden, new_cell diff --git a/dygraph/similarity_net/nets/mm_dnn.py b/dygraph/similarity_net/nets/mm_dnn.py index 0601b55e..6e461865 100644 --- a/dygraph/similarity_net/nets/mm_dnn.py +++ b/dygraph/similarity_net/nets/mm_dnn.py @@ -42,7 +42,7 @@ class MMDNN(Layer): self.dpool_size1 = int(config['net']['dpool_size_left']) self.dpool_size2 = int(config['net']['dpool_size_right']) self.hidden_size = int(config['net']['hidden_size']) - self.seq_len = int(conf_dict["seq_len"]) + self.seq_len = int(config["seq_len"]) self.seq_len1 = self.seq_len #int(config['max_len_left']) self.seq_len2 = self.seq_len @@ -157,7 +157,7 @@ class MMDNN(Layer): conv = self.conv(emb_expand) if mask is not None: cross_mask = fluid.layers.stack(x=[mask] * self.kernel_size, axis=0) - cross_mask = fluid.layers.stack(x=[cross] * conv.shape[1], axis=1) + cross_mask = fluid.layers.stack(x=[cross_mask] * conv.shape[0], axis=0) conv = cross_mask * conv + (1 - cross_mask) * (-2**self.seq_len + 1) pool = self.pool_layer(conv) diff --git a/dygraph/similarity_net/nets/paddle_layers.1.py b/dygraph/similarity_net/nets/paddle_layers.1.py deleted file mode 100644 index f01f64f5..00000000 --- a/dygraph/similarity_net/nets/paddle_layers.1.py +++ /dev/null @@ -1,457 +0,0 @@ -# Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -""" -network layers -""" - -import paddle.fluid as fluid -import paddle.fluid.param_attr as attr - - - -class EmbeddingLayer(object): - """ - Embedding Layer class - """ - - def __init__(self, dict_size, emb_dim, name="emb"): - """ - initialize - """ - self.dict_size = dict_size - self.emb_dim = emb_dim - self.name = name - - def ops(self, input): - """ - operation - """ - emb = fluid.dygraph.Embedding( - input=input, - size=[self.dict_size, self.emb_dim], - is_sparse=True, - param_attr=attr.ParamAttr(name=self.name)) - return emb - - -class SequencePoolLayer(object): - """ - Sequence Pool Layer class - """ - - def __init__(self, pool_type): - """ - initialize - """ - self.pool_type = pool_type - - def ops(self, input): - """ - operation - """ - pool = fluid.dygraph.Pool2D(input=input, pool_type=self.pool_type) - return pool - - -class FCLayer(object): - """ - Fully Connect Layer class - """ - - def __init__(self, fc_dim, act, name="fc"): - """ - initialize - """ - self.fc_dim = fc_dim - self.act = act - self.name = name - - def ops(self, input): - """ - operation - """ - fc = fluid.dygraph.FC(input=input, - size=self.fc_dim, - param_attr=attr.ParamAttr(name="%s.w" % self.name), - bias_attr=attr.ParamAttr(name="%s.b" % self.name), - act=self.act, - name=self.name) - return fc - - -class DynamicGRULayer(object): - """ - Dynamic GRU Layer class - """ - - def __init__(self, gru_dim, name="dyn_gru"): - """ - initialize - """ - self.gru_dim = gru_dim - self.name = name - - def ops(self, input): - """ - operation - """ - proj = fluid.dygraph.FC( - input=input, - size=self.gru_dim * 3, - param_attr=attr.ParamAttr(name="%s_fc.w" % self.name), - bias_attr=attr.ParamAttr(name="%s_fc.b" % self.name)) - gru = fluid.layers.dynamic_gru( - input=proj, - size=self.gru_dim, - param_attr=attr.ParamAttr(name="%s.w" % self.name), - bias_attr=attr.ParamAttr(name="%s.b" % self.name)) - return gru - - -class DynamicLSTMLayer(object): - """ - Dynamic LSTM Layer class - """ - - def __init__(self, lstm_dim, name="dyn_lstm"): - """ - initialize - """ - self.lstm_dim = lstm_dim - self.name = name - - def ops(self, input): - """ - operation - """ - proj = fluid.dygraph.FC( - input=input, - size=self.lstm_dim * 4, - param_attr=attr.ParamAttr(name="%s_fc.w" % self.name), - bias_attr=attr.ParamAttr(name="%s_fc.b" % self.name)) - lstm, _ = fluid.layers.dynamic_lstm( - input=proj, - size=self.lstm_dim * 4, - param_attr=attr.ParamAttr(name="%s.w" % self.name), - bias_attr=attr.ParamAttr(name="%s.b" % self.name)) - return lstm - - -class SequenceLastStepLayer(object): - """ - Get Last Step Sequence Layer class - """ - - def __init__(self): - """ - initialize - """ - pass - - def ops(self, input): - """ - operation - """ - last = fluid.layers.sequence_last_step(input) - return last - - -class SequenceConvPoolLayer(object): - """ - Sequence convolution and pooling Layer class - """ - - def __init__(self, filter_size, num_filters, name): - """ - initialize - Args: - filter_size:Convolution kernel size - num_filters:Convolution kernel number - """ - self.filter_size = filter_size - self.num_filters = num_filters - self.name = name - - def ops(self, input): - """ - operation - """ - conv = fluid.nets.sequence_conv_pool( - input=input, - filter_size=self.filter_size, - num_filters=self.num_filters, - param_attr=attr.ParamAttr(name=self.name), - act="relu") - return conv - - -class DataLayer(object): - """ - Data Layer class - """ - - def __init__(self): - """ - initialize - """ - pass - - def ops(self, name, shape, dtype, lod_level=0): - """ - operation - """ - data = fluid.layers.data( #不用改 - name=name, shape=shape, dtype=dtype, lod_level=lod_level) - return data - - -class ConcatLayer(object): - """ - Connection Layer class - """ - - def __init__(self, axis): - """ - initialize - """ - self.axis = axis - - def ops(self, inputs): - """ - operation - """ - concat = fluid.layers.concat(inputs, axis=self.axis) - return concat - - -class ReduceMeanLayer(object): - """ - Reduce Mean Layer class - """ - - def __init__(self): - """ - initialize - """ - pass - - def ops(self, input): - """ - operation - """ - mean = fluid.layers.reduce_mean(input) - return mean - - -class CrossEntropyLayer(object): - """ - Cross Entropy Calculate Layer - """ - - def __init__(self, name="cross_entropy"): - """ - initialize - """ - pass - - def ops(self, input, label): - """ - operation - """ - loss = fluid.layers.cross_entropy(input=input, label=label) # 不用改 - return loss - - -class SoftmaxWithCrossEntropyLayer(object): - """ - Softmax with Cross Entropy Calculate Layer - """ - - def __init__(self, name="softmax_with_cross_entropy"): - """ - initialize - """ - pass - - def ops(self, input, label): - """ - operation - """ - loss = fluid.layers.softmax_with_cross_entropy( # 不用改 - logits=input, label=label) - return loss - - -class CosSimLayer(object): - """ - Cos Similarly Calculate Layer - """ - - def __init__(self): - """ - initialize - """ - pass - - def ops(self, x, y): - """ - operation - """ - sim = fluid.layers.cos_sim(x, y) - return sim - - -class ElementwiseMaxLayer(object): - """ - Elementwise Max Layer class - """ - - def __init__(self): - """ - initialize - """ - pass - - def ops(self, x, y): - """ - operation - """ - max = fluid.layers.elementwise_max(x, y) - return max - - -class ElementwiseAddLayer(object): - """ - Elementwise Add Layer class - """ - - def __init__(self): - """ - initialize - """ - pass - - def ops(self, x, y): - """ - operation - """ - add = fluid.layers.elementwise_add(x, y) - return add - - -class ElementwiseSubLayer(object): - """ - Elementwise Add Layer class - """ - - def __init__(self): - """ - initialize - """ - pass - - def ops(self, x, y): - """ - operation - """ - sub = fluid.layers.elementwise_sub(x, y) - return sub - - -class ConstantLayer(object): - """ - Generate A Constant Layer class - """ - - def __init__(self): - """ - initialize - """ - pass - - def ops(self, input, shape, dtype, value): - """ - operation - """ - constant = fluid.layers.fill_constant_batch_size_like(input, shape, - dtype, value) - return constant - - -class SigmoidLayer(object): - """ - Sigmoid Layer class - """ - - def __init__(self): - """ - initialize - """ - pass - - def ops(self, input): - """ - operation - """ - sigmoid = fluid.layers.sigmoid(input) - return sigmoid - - -class SoftsignLayer(object): - """ - Softsign Layer class - """ - - def __init__(self): - """ - initialize - """ - pass - - def ops(self, input): - """ - operation - """ - softsign = fluid.layers.softsign(input) - return softsign - - -# class MatmulLayer(object): -# def __init__(self, transpose_x, transpose_y): -# self.transpose_x = transpose_x -# self.transpose_y = transpose_y - -# def ops(self, x, y): -# matmul = fluid.layers.matmul(x, y, self.transpose_x, self.transpose_y) -# return matmul - -# class Conv2dLayer(object): -# def __init__(self, num_filters, filter_size, act, name): -# self.num_filters = num_filters -# self.filter_size = filter_size -# self.act = act -# self.name = name - -# def ops(self, input): -# conv = fluid.layers.conv2d(input, self.num_filters, self.filter_size, param_attr=attr.ParamAttr(name="%s.w" % self.name), bias_attr=attr.ParamAttr(name="%s.b" % self.name), act=self.act) -# return conv - -# class Pool2dLayer(object): -# def __init__(self, pool_size, pool_type): -# self.pool_size = pool_size -# self.pool_type = pool_type - -# def ops(self, input): -# pool = fluid.layers.pool2d(input, self.pool_size, self.pool_type) -# return pool -- GitLab