From 9f3fb95b34ce80b4363713cd674a09d13de8f865 Mon Sep 17 00:00:00 2001 From: Huihuang Zheng Date: Fri, 2 Oct 2020 13:31:46 +0800 Subject: [PATCH] [Dy2stat] Add Simnet Test for V2 APIs (#27460) Add Simnet Test for V2 APIs. We used tool from https://www.paddlepaddle.org.cn/documentation/docs/zh/2.0-beta/guides/migration_cn.html#paddle1-xpaddle2-0beta to do v1.x to v2 transformation. This PR pulled changes from #27430, please DO NOT merge before #27430 is merged --- .../simnet_dygraph_model_v2.py | 493 ++++++++++++++++++ .../dygraph_to_static/test_simnet_v2.py | 168 ++++++ 2 files changed, 661 insertions(+) create mode 100644 python/paddle/fluid/tests/unittests/dygraph_to_static/simnet_dygraph_model_v2.py create mode 100644 python/paddle/fluid/tests/unittests/dygraph_to_static/test_simnet_v2.py diff --git a/python/paddle/fluid/tests/unittests/dygraph_to_static/simnet_dygraph_model_v2.py b/python/paddle/fluid/tests/unittests/dygraph_to_static/simnet_dygraph_model_v2.py new file mode 100644 index 0000000000..6612450b7c --- /dev/null +++ b/python/paddle/fluid/tests/unittests/dygraph_to_static/simnet_dygraph_model_v2.py @@ -0,0 +1,493 @@ +# Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +from functools import reduce +import paddle + + +class EmbeddingLayer(object): + """ + Embedding Layer class + """ + + def __init__(self, dict_size, emb_dim, name="emb", padding_idx=None): + """ + initialize + """ + self.dict_size = dict_size + self.emb_dim = emb_dim + self.name = name + self.padding_idx = padding_idx + + def ops(self): + """ + operation + """ + # TODO(huihuangzheng): The original code set the is_sparse=True, but it + # causes crush in dy2stat. Set it to True after fixing it. + emb = paddle.fluid.dygraph.Embedding( + size=[self.dict_size, self.emb_dim], + is_sparse=True, + padding_idx=self.padding_idx, + param_attr=paddle.ParamAttr( + name=self.name, initializer=paddle.nn.initializer.Xavier())) + + return emb + + +class FCLayer(object): + """ + Fully Connect Layer class + """ + + def __init__(self, fc_dim, act, name="fc"): + """ + initialize + """ + self.fc_dim = fc_dim + self.act = act + self.name = name + + def ops(self): + """ + operation + """ + fc = FC(size=self.fc_dim, + param_attr=paddle.ParamAttr(name="%s.w" % self.name), + bias_attr=paddle.ParamAttr(name="%s.b" % self.name), + act=self.act) + return fc + + +class ConcatLayer(object): + """ + Connection Layer class + """ + + def __init__(self, axis): + """ + initialize + """ + self.axis = axis + + def ops(self, inputs): + """ + operation + """ + concat = paddle.concat(x=inputs, axis=self.axis) + return concat + + +class ReduceMeanLayer(object): + """ + Reduce Mean Layer class + """ + + def __init__(self): + """ + initialize + """ + pass + + def ops(self, input): + """ + operation + """ + mean = paddle.reduce_mean(input) + return mean + + +class CosSimLayer(object): + """ + Cos Similarly Calculate Layer + """ + + def __init__(self): + """ + initialize + """ + pass + + def ops(self, x, y): + """ + operation + """ + sim = paddle.nn.functional.cosine_similarity(x, y) + return sim + + +class ElementwiseMaxLayer(object): + """ + Elementwise Max Layer class + """ + + def __init__(self): + """ + initialize + """ + pass + + def ops(self, x, y): + """ + operation + """ + max = paddle.maximum(x=x, y=y) + return max + + +class ElementwiseAddLayer(object): + """ + Elementwise Add Layer class + """ + + def __init__(self): + """ + initialize + """ + pass + + def ops(self, x, y): + """ + operation + """ + add = paddle.add(x=x, y=y) + return add + + +class ElementwiseSubLayer(object): + """ + Elementwise Add Layer class + """ + + def __init__(self): + """ + initialize + """ + pass + + def ops(self, x, y): + """ + operation + """ + sub = paddle.elementwise_sub(x, y) + return sub + + +class ConstantLayer(object): + """ + Generate A Constant Layer class + """ + + def __init__(self): + """ + initialize + """ + pass + + def ops(self, input, shape, dtype, value): + """ + operation + """ + shape = list(shape) + input_shape = paddle.shape(input) + shape[0] = input_shape[0] + constant = paddle.fill_constant(shape, dtype, value) + return constant + + +class SoftsignLayer(object): + """ + Softsign Layer class + """ + + def __init__(self): + """ + initialize + """ + pass + + def ops(self, input): + """ + operation + """ + softsign = paddle.nn.functional.softsign(input) + return softsign + + +class FC(paddle.nn.Layer): + """ + This interface is used to construct a callable object of the ``FC`` class. + For more details, refer to code examples. + It creates a fully connected layer in the network. It can take + one or multiple ``Tensor`` as its inputs. It creates a Variable called weights for each input tensor, + which represents a fully connected weight matrix from each input unit to + each output unit. The fully connected layer multiplies each input tensor + with its corresponding weight to produce an output Tensor with shape [N, `size`], + where N is batch size. If multiple input tensors are given, the results of + multiple output tensors with shape [N, `size`] will be summed up. If ``bias_attr`` + is not None, a bias variable will be created and added to the output. + Finally, if ``act`` is not None, it will be applied to the output as well. + When the input is single ``Tensor`` : + .. math:: + Out = Act({XW + b}) + When the input are multiple ``Tensor`` : + .. math:: + Out = Act({\sum_{i=0}^{N-1}X_iW_i + b}) + In the above equation: + * :math:`N`: Number of the input. N equals to len(input) if input is list of ``Tensor`` . + * :math:`X_i`: The i-th input ``Tensor`` . + * :math:`W_i`: The i-th weights matrix corresponding i-th input tensor. + * :math:`b`: The bias parameter created by this layer (if needed). + * :math:`Act`: The activation function. + * :math:`Out`: The output ``Tensor`` . + See below for an example. + .. code-block:: text + Given: + data_1.data = [[[0.1, 0.2]]] + data_1.shape = (1, 1, 2) # 1 is batch_size + data_2.data = [[[0.1, 0.2, 0.3]]] + data_2.shape = (1, 1, 3) # 1 is batch_size + fc = FC("fc", 2, num_flatten_dims=2) + out = fc(input=[data_1, data_2]) + Then: + out.data = [[[0.182996 -0.474117]]] + out.shape = (1, 1, 2) + Parameters: + + size(int): The number of output units in this layer. + num_flatten_dims (int, optional): The fc layer can accept an input tensor with more than + two dimensions. If this happens, the multi-dimension tensor will first be flattened + into a 2-dimensional matrix. The parameter `num_flatten_dims` determines how the input + tensor is flattened: the first `num_flatten_dims` (inclusive, index starts from 1) + dimensions will be flatten to form the first dimension of the final matrix (height of + the matrix), and the rest `rank(X) - num_flatten_dims` dimensions are flattened to + form the second dimension of the final matrix (width of the matrix). For example, suppose + `X` is a 5-dimensional tensor with a shape [2, 3, 4, 5, 6], and `num_flatten_dims` = 3. + Then, the flattened matrix will have a shape [2 x 3 x 4, 5 x 6] = [24, 30]. Default: 1 + param_attr (ParamAttr or list of ParamAttr, optional): The parameter attribute for learnable + weights(Parameter) of this layer. Default: None. + bias_attr (ParamAttr or list of ParamAttr, optional): The attribute for the bias + of this layer. If it is set to False, no bias will be added to the output units. + If it is set to None, the bias is initialized zero. Default: None. + act (str, optional): Activation to be applied to the output of this layer. Default: None. + is_test(bool, optional): A flag indicating whether execution is in test phase. Default: False. + dtype(str, optional): Dtype used for weight, it can be "float32" or "float64". Default: "float32". + Attribute: + **weight** (list of Parameter): the learnable weights of this layer. + **bias** (Parameter or None): the learnable bias of this layer. + Returns: + None + + """ + + def __init__(self, + size, + num_flatten_dims=1, + param_attr=None, + bias_attr=None, + act=None, + is_test=False, + dtype="float32"): + super(FC, self).__init__(dtype) + + self._size = size + self._num_flatten_dims = num_flatten_dims + self._dtype = dtype + self._param_attr = param_attr + self._bias_attr = bias_attr + self._act = act + self.__w = list() + + def _build_once(self, input): + i = 0 + for inp, param in self._helper.iter_inputs_and_params(input, + self._param_attr): + input_shape = inp.shape + + param_shape = [ + reduce(lambda a, b: a * b, input_shape[self._num_flatten_dims:], + 1) + ] + [self._size] + self.__w.append( + self.add_parameter( + '_w%d' % i, + self.create_parameter( + attr=param, + shape=param_shape, + dtype=self._dtype, + is_bias=False))) + i += 1 + + size = list([self._size]) + self._b = self.create_parameter( + attr=self._bias_attr, shape=size, dtype=self._dtype, is_bias=True) + + # TODO(songyouwei): We should remove _w property + @property + def _w(self, i=0): + return self.__w[i] + + @_w.setter + def _w(self, value, i=0): + assert isinstance(self.__w[i], Variable) + self.__w[i].set_value(value) + + @property + def weight(self): + if len(self.__w) > 1: + return self.__w + else: + return self.__w[0] + + @weight.setter + def weight(self, value): + if len(self.__w) == 1: + self.__w[0] = value + + @property + def bias(self): + return self._b + + @bias.setter + def bias(self, value): + self._b = value + + def forward(self, input): + mul_results = list() + i = 0 + for inp, param in self._helper.iter_inputs_and_params(input, + self._param_attr): + tmp = self._helper.create_variable_for_type_inference(self._dtype) + self._helper.append_op( + type="mul", + inputs={"X": inp, + "Y": self.__w[i]}, + outputs={"Out": tmp}, + attrs={ + "x_num_col_dims": self._num_flatten_dims, + "y_num_col_dims": 1 + }) + i += 1 + mul_results.append(tmp) + + if len(mul_results) == 1: + pre_bias = mul_results[0] + else: + pre_bias = self._helper.create_variable_for_type_inference( + self._dtype) + self._helper.append_op( + type="sum", + inputs={"X": mul_results}, + outputs={"Out": pre_bias}, + attrs={"use_mkldnn": False}) + + if self._b is not None: + pre_activation = self._helper.create_variable_for_type_inference( + dtype=self._dtype) + self._helper.append_op( + type='elementwise_add', + inputs={'X': [pre_bias], + 'Y': [self._b]}, + outputs={'Out': [pre_activation]}, + attrs={'axis': self._num_flatten_dims}) + else: + pre_activation = pre_bias + # Currently, we don't support inplace in dygraph mode + return self._helper.append_activation(pre_activation, act=self._act) + + +class HingeLoss(object): + """ + Hing Loss Calculate class + """ + + def __init__(self, conf_dict): + """ + initialize + """ + self.margin = conf_dict["loss"]["margin"] + + def compute(self, pos, neg): + """ + compute loss + """ + elementwise_max = ElementwiseMaxLayer() + elementwise_add = ElementwiseAddLayer() + elementwise_sub = ElementwiseSubLayer() + constant = ConstantLayer() + reduce_mean = ReduceMeanLayer() + loss = reduce_mean.ops( + elementwise_max.ops( + constant.ops(neg, neg.shape, "float32", 0.0), + elementwise_add.ops( + elementwise_sub.ops(neg, pos), + constant.ops(neg, neg.shape, "float32", self.margin)))) + return loss + + +class BOW(paddle.nn.Layer): + """ + BOW + """ + + def __init__(self, conf_dict): + """ + initialize + """ + super(BOW, self).__init__() + self.dict_size = conf_dict["dict_size"] + self.task_mode = conf_dict["task_mode"] + self.emb_dim = conf_dict["net"]["emb_dim"] + self.bow_dim = conf_dict["net"]["bow_dim"] + self.seq_len = conf_dict["seq_len"] + self.emb_layer = EmbeddingLayer(self.dict_size, self.emb_dim, + "emb").ops() + self.bow_layer = paddle.nn.Linear( + in_features=self.bow_dim, out_features=self.bow_dim) + self.bow_layer_po = FCLayer(self.bow_dim, None, "fc").ops() + self.softmax_layer = FCLayer(2, "softmax", "cos_sim").ops() + + @paddle.jit.to_static + def forward(self, left, right): + """ + Forward network + """ + + # embedding layer + left_emb = self.emb_layer(left) + right_emb = self.emb_layer(right) + left_emb = paddle.reshape( + left_emb, shape=[-1, self.seq_len, self.bow_dim]) + right_emb = paddle.reshape( + right_emb, shape=[-1, self.seq_len, self.bow_dim]) + + bow_left = paddle.reduce_sum(left_emb, dim=1) + bow_right = paddle.reduce_sum(right_emb, dim=1) + softsign_layer = SoftsignLayer() + left_soft = softsign_layer.ops(bow_left) + right_soft = softsign_layer.ops(bow_right) + + # matching layer + if self.task_mode == "pairwise": + left_bow = self.bow_layer(left_soft) + right_bow = self.bow_layer(right_soft) + cos_sim_layer = CosSimLayer() + pred = cos_sim_layer.ops(left_bow, right_bow) + return left_bow, pred + else: + concat_layer = ConcatLayer(1) + concat = concat_layer.ops([left_soft, right_soft]) + concat_fc = self.bow_layer_po(concat) + pred = self.softmax_layer(concat_fc) + return left_soft, pred diff --git a/python/paddle/fluid/tests/unittests/dygraph_to_static/test_simnet_v2.py b/python/paddle/fluid/tests/unittests/dygraph_to_static/test_simnet_v2.py new file mode 100644 index 0000000000..284087e61e --- /dev/null +++ b/python/paddle/fluid/tests/unittests/dygraph_to_static/test_simnet_v2.py @@ -0,0 +1,168 @@ +# Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import argparse +import numpy as np +import paddle +import random +import unittest + +from simnet_dygraph_model_v2 import BOW, HingeLoss + +SEED = 102 +random.seed(SEED) + + +def create_conf_dict(): + conf_dict = {} + conf_dict["task_mode"] = "pairwise" + conf_dict["net"] = {"emb_dim": 128, "bow_dim": 128, "hidden_dim": 128} + conf_dict["loss"] = {"margin": 0.1} + return conf_dict + + +def parse_args(): + parser = argparse.ArgumentParser() + parser.add_argument( + "--batch_size", + type=int, + default=32, + help="Total examples' number in batch for training.") + parser.add_argument( + "--seq_len", type=int, default=32, help="The length of each sentence.") + parser.add_argument( + "--epoch", type=int, default=1, help="The number of training epoch.") + parser.add_argument( + "--fake_sample_size", + type=int, + default=128, + help="The number of samples of fake data.") + args = parser.parse_args([]) + return args + + +args = parse_args() + + +def fake_vocabulary(): + vocab = {} + vocab[""] = 0 + for i in range(26): + c = chr(ord('a') + i) + vocab[c] = i + 1 + return vocab + + +vocab = fake_vocabulary() + + +class FakeReaderProcessor(object): + def __init__(self, args, vocab): + self.vocab = vocab + self.seq_len = args.seq_len + self.sample_size = args.fake_sample_size + self.data_samples = [] + for i in range(self.sample_size): + query = [random.randint(0, 26) for i in range(self.seq_len)] + pos_title = query[:] + neg_title = [26 - q for q in query] + self.data_samples.append( + np.array([query, pos_title, neg_title]).astype(np.int64)) + + def get_reader(self, mode, epoch=0): + def reader_with_pairwise(): + if mode == "train": + for i in range(self.sample_size): + yield self.data_samples[i] + + return reader_with_pairwise + + +simnet_process = FakeReaderProcessor(args, vocab) + + +def train(conf_dict, to_static): + """ + train process + """ + program_translator = paddle.jit.ProgramTranslator() + program_translator.enable(to_static) + + # Get device + if paddle.is_compiled_with_cuda(): + place = paddle.CUDAPlace(0) + else: + place = paddle.CPUPlace() + + paddle.disable_static(place) + paddle.manual_seed(SEED) + paddle.framework.random._manual_program_seed(SEED) + + conf_dict['dict_size'] = len(vocab) + conf_dict['seq_len'] = args.seq_len + + net = BOW(conf_dict) + loss = HingeLoss(conf_dict) + optimizer = paddle.optimizer.Adam( + learning_rate=0.001, + beta1=0.9, + beta2=0.999, + epsilon=1e-08, + parameters=net.parameters()) + + metric = paddle.metric.Auc(name="auc") + + global_step = 0 + losses = [] + + train_loader = paddle.io.DataLoader.from_generator( + capacity=16, return_list=True, iterable=True, use_double_buffer=True) + get_train_examples = simnet_process.get_reader("train", epoch=args.epoch) + train_loader.set_sample_list_generator( + paddle.batch( + get_train_examples, batch_size=args.batch_size), place) + + for left, pos_right, neg_right in train_loader(): + left = paddle.reshape(left, shape=[-1, 1]) + pos_right = paddle.reshape(pos_right, shape=[-1, 1]) + neg_right = paddle.reshape(neg_right, shape=[-1, 1]) + net.train() + global_step += 1 + left_feat, pos_score = net(left, pos_right) + pred = pos_score + _, neg_score = net(left, neg_right) + avg_cost = loss.compute(pos_score, neg_score) + losses.append(np.mean(avg_cost.numpy())) + avg_cost.backward() + optimizer.minimize(avg_cost) + net.clear_gradients() + paddle.enable_static() + return losses + + +class TestSimnet(unittest.TestCase): + def test_dygraph_static_same_loss(self): + if paddle.is_compiled_with_cuda(): + paddle.fluid.set_flags({"FLAGS_cudnn_deterministic": True}) + conf_dict = create_conf_dict() + dygraph_loss = train(conf_dict, to_static=False) + static_loss = train(conf_dict, to_static=True) + + self.assertEqual(len(dygraph_loss), len(static_loss)) + for i in range(len(dygraph_loss)): + self.assertAlmostEqual(dygraph_loss[i], static_loss[i]) + + +if __name__ == '__main__': + unittest.main() -- GitLab