未验证 提交 9f3fb95b 编写于 作者: H Huihuang Zheng 提交者: GitHub

[Dy2stat] Add Simnet Test for V2 APIs (#27460)

Add Simnet Test for V2 APIs.

We used tool from https://www.paddlepaddle.org.cn/documentation/docs/zh/2.0-beta/guides/migration_cn.html#paddle1-xpaddle2-0beta to do v1.x to v2 transformation.

This PR pulled changes from #27430, please DO NOT merge before #27430 is merged
上级 a90711c7
# Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
from functools import reduce
import paddle
class EmbeddingLayer(object):
"""
Embedding Layer class
"""
def __init__(self, dict_size, emb_dim, name="emb", padding_idx=None):
"""
initialize
"""
self.dict_size = dict_size
self.emb_dim = emb_dim
self.name = name
self.padding_idx = padding_idx
def ops(self):
"""
operation
"""
# TODO(huihuangzheng): The original code set the is_sparse=True, but it
# causes crush in dy2stat. Set it to True after fixing it.
emb = paddle.fluid.dygraph.Embedding(
size=[self.dict_size, self.emb_dim],
is_sparse=True,
padding_idx=self.padding_idx,
param_attr=paddle.ParamAttr(
name=self.name, initializer=paddle.nn.initializer.Xavier()))
return emb
class FCLayer(object):
"""
Fully Connect Layer class
"""
def __init__(self, fc_dim, act, name="fc"):
"""
initialize
"""
self.fc_dim = fc_dim
self.act = act
self.name = name
def ops(self):
"""
operation
"""
fc = FC(size=self.fc_dim,
param_attr=paddle.ParamAttr(name="%s.w" % self.name),
bias_attr=paddle.ParamAttr(name="%s.b" % self.name),
act=self.act)
return fc
class ConcatLayer(object):
"""
Connection Layer class
"""
def __init__(self, axis):
"""
initialize
"""
self.axis = axis
def ops(self, inputs):
"""
operation
"""
concat = paddle.concat(x=inputs, axis=self.axis)
return concat
class ReduceMeanLayer(object):
"""
Reduce Mean Layer class
"""
def __init__(self):
"""
initialize
"""
pass
def ops(self, input):
"""
operation
"""
mean = paddle.reduce_mean(input)
return mean
class CosSimLayer(object):
"""
Cos Similarly Calculate Layer
"""
def __init__(self):
"""
initialize
"""
pass
def ops(self, x, y):
"""
operation
"""
sim = paddle.nn.functional.cosine_similarity(x, y)
return sim
class ElementwiseMaxLayer(object):
"""
Elementwise Max Layer class
"""
def __init__(self):
"""
initialize
"""
pass
def ops(self, x, y):
"""
operation
"""
max = paddle.maximum(x=x, y=y)
return max
class ElementwiseAddLayer(object):
"""
Elementwise Add Layer class
"""
def __init__(self):
"""
initialize
"""
pass
def ops(self, x, y):
"""
operation
"""
add = paddle.add(x=x, y=y)
return add
class ElementwiseSubLayer(object):
"""
Elementwise Add Layer class
"""
def __init__(self):
"""
initialize
"""
pass
def ops(self, x, y):
"""
operation
"""
sub = paddle.elementwise_sub(x, y)
return sub
class ConstantLayer(object):
"""
Generate A Constant Layer class
"""
def __init__(self):
"""
initialize
"""
pass
def ops(self, input, shape, dtype, value):
"""
operation
"""
shape = list(shape)
input_shape = paddle.shape(input)
shape[0] = input_shape[0]
constant = paddle.fill_constant(shape, dtype, value)
return constant
class SoftsignLayer(object):
"""
Softsign Layer class
"""
def __init__(self):
"""
initialize
"""
pass
def ops(self, input):
"""
operation
"""
softsign = paddle.nn.functional.softsign(input)
return softsign
class FC(paddle.nn.Layer):
"""
This interface is used to construct a callable object of the ``FC`` class.
For more details, refer to code examples.
It creates a fully connected layer in the network. It can take
one or multiple ``Tensor`` as its inputs. It creates a Variable called weights for each input tensor,
which represents a fully connected weight matrix from each input unit to
each output unit. The fully connected layer multiplies each input tensor
with its corresponding weight to produce an output Tensor with shape [N, `size`],
where N is batch size. If multiple input tensors are given, the results of
multiple output tensors with shape [N, `size`] will be summed up. If ``bias_attr``
is not None, a bias variable will be created and added to the output.
Finally, if ``act`` is not None, it will be applied to the output as well.
When the input is single ``Tensor`` :
.. math::
Out = Act({XW + b})
When the input are multiple ``Tensor`` :
.. math::
Out = Act({\sum_{i=0}^{N-1}X_iW_i + b})
In the above equation:
* :math:`N`: Number of the input. N equals to len(input) if input is list of ``Tensor`` .
* :math:`X_i`: The i-th input ``Tensor`` .
* :math:`W_i`: The i-th weights matrix corresponding i-th input tensor.
* :math:`b`: The bias parameter created by this layer (if needed).
* :math:`Act`: The activation function.
* :math:`Out`: The output ``Tensor`` .
See below for an example.
.. code-block:: text
Given:
data_1.data = [[[0.1, 0.2]]]
data_1.shape = (1, 1, 2) # 1 is batch_size
data_2.data = [[[0.1, 0.2, 0.3]]]
data_2.shape = (1, 1, 3) # 1 is batch_size
fc = FC("fc", 2, num_flatten_dims=2)
out = fc(input=[data_1, data_2])
Then:
out.data = [[[0.182996 -0.474117]]]
out.shape = (1, 1, 2)
Parameters:
size(int): The number of output units in this layer.
num_flatten_dims (int, optional): The fc layer can accept an input tensor with more than
two dimensions. If this happens, the multi-dimension tensor will first be flattened
into a 2-dimensional matrix. The parameter `num_flatten_dims` determines how the input
tensor is flattened: the first `num_flatten_dims` (inclusive, index starts from 1)
dimensions will be flatten to form the first dimension of the final matrix (height of
the matrix), and the rest `rank(X) - num_flatten_dims` dimensions are flattened to
form the second dimension of the final matrix (width of the matrix). For example, suppose
`X` is a 5-dimensional tensor with a shape [2, 3, 4, 5, 6], and `num_flatten_dims` = 3.
Then, the flattened matrix will have a shape [2 x 3 x 4, 5 x 6] = [24, 30]. Default: 1
param_attr (ParamAttr or list of ParamAttr, optional): The parameter attribute for learnable
weights(Parameter) of this layer. Default: None.
bias_attr (ParamAttr or list of ParamAttr, optional): The attribute for the bias
of this layer. If it is set to False, no bias will be added to the output units.
If it is set to None, the bias is initialized zero. Default: None.
act (str, optional): Activation to be applied to the output of this layer. Default: None.
is_test(bool, optional): A flag indicating whether execution is in test phase. Default: False.
dtype(str, optional): Dtype used for weight, it can be "float32" or "float64". Default: "float32".
Attribute:
**weight** (list of Parameter): the learnable weights of this layer.
**bias** (Parameter or None): the learnable bias of this layer.
Returns:
None
"""
def __init__(self,
size,
num_flatten_dims=1,
param_attr=None,
bias_attr=None,
act=None,
is_test=False,
dtype="float32"):
super(FC, self).__init__(dtype)
self._size = size
self._num_flatten_dims = num_flatten_dims
self._dtype = dtype
self._param_attr = param_attr
self._bias_attr = bias_attr
self._act = act
self.__w = list()
def _build_once(self, input):
i = 0
for inp, param in self._helper.iter_inputs_and_params(input,
self._param_attr):
input_shape = inp.shape
param_shape = [
reduce(lambda a, b: a * b, input_shape[self._num_flatten_dims:],
1)
] + [self._size]
self.__w.append(
self.add_parameter(
'_w%d' % i,
self.create_parameter(
attr=param,
shape=param_shape,
dtype=self._dtype,
is_bias=False)))
i += 1
size = list([self._size])
self._b = self.create_parameter(
attr=self._bias_attr, shape=size, dtype=self._dtype, is_bias=True)
# TODO(songyouwei): We should remove _w property
@property
def _w(self, i=0):
return self.__w[i]
@_w.setter
def _w(self, value, i=0):
assert isinstance(self.__w[i], Variable)
self.__w[i].set_value(value)
@property
def weight(self):
if len(self.__w) > 1:
return self.__w
else:
return self.__w[0]
@weight.setter
def weight(self, value):
if len(self.__w) == 1:
self.__w[0] = value
@property
def bias(self):
return self._b
@bias.setter
def bias(self, value):
self._b = value
def forward(self, input):
mul_results = list()
i = 0
for inp, param in self._helper.iter_inputs_and_params(input,
self._param_attr):
tmp = self._helper.create_variable_for_type_inference(self._dtype)
self._helper.append_op(
type="mul",
inputs={"X": inp,
"Y": self.__w[i]},
outputs={"Out": tmp},
attrs={
"x_num_col_dims": self._num_flatten_dims,
"y_num_col_dims": 1
})
i += 1
mul_results.append(tmp)
if len(mul_results) == 1:
pre_bias = mul_results[0]
else:
pre_bias = self._helper.create_variable_for_type_inference(
self._dtype)
self._helper.append_op(
type="sum",
inputs={"X": mul_results},
outputs={"Out": pre_bias},
attrs={"use_mkldnn": False})
if self._b is not None:
pre_activation = self._helper.create_variable_for_type_inference(
dtype=self._dtype)
self._helper.append_op(
type='elementwise_add',
inputs={'X': [pre_bias],
'Y': [self._b]},
outputs={'Out': [pre_activation]},
attrs={'axis': self._num_flatten_dims})
else:
pre_activation = pre_bias
# Currently, we don't support inplace in dygraph mode
return self._helper.append_activation(pre_activation, act=self._act)
class HingeLoss(object):
"""
Hing Loss Calculate class
"""
def __init__(self, conf_dict):
"""
initialize
"""
self.margin = conf_dict["loss"]["margin"]
def compute(self, pos, neg):
"""
compute loss
"""
elementwise_max = ElementwiseMaxLayer()
elementwise_add = ElementwiseAddLayer()
elementwise_sub = ElementwiseSubLayer()
constant = ConstantLayer()
reduce_mean = ReduceMeanLayer()
loss = reduce_mean.ops(
elementwise_max.ops(
constant.ops(neg, neg.shape, "float32", 0.0),
elementwise_add.ops(
elementwise_sub.ops(neg, pos),
constant.ops(neg, neg.shape, "float32", self.margin))))
return loss
class BOW(paddle.nn.Layer):
"""
BOW
"""
def __init__(self, conf_dict):
"""
initialize
"""
super(BOW, self).__init__()
self.dict_size = conf_dict["dict_size"]
self.task_mode = conf_dict["task_mode"]
self.emb_dim = conf_dict["net"]["emb_dim"]
self.bow_dim = conf_dict["net"]["bow_dim"]
self.seq_len = conf_dict["seq_len"]
self.emb_layer = EmbeddingLayer(self.dict_size, self.emb_dim,
"emb").ops()
self.bow_layer = paddle.nn.Linear(
in_features=self.bow_dim, out_features=self.bow_dim)
self.bow_layer_po = FCLayer(self.bow_dim, None, "fc").ops()
self.softmax_layer = FCLayer(2, "softmax", "cos_sim").ops()
@paddle.jit.to_static
def forward(self, left, right):
"""
Forward network
"""
# embedding layer
left_emb = self.emb_layer(left)
right_emb = self.emb_layer(right)
left_emb = paddle.reshape(
left_emb, shape=[-1, self.seq_len, self.bow_dim])
right_emb = paddle.reshape(
right_emb, shape=[-1, self.seq_len, self.bow_dim])
bow_left = paddle.reduce_sum(left_emb, dim=1)
bow_right = paddle.reduce_sum(right_emb, dim=1)
softsign_layer = SoftsignLayer()
left_soft = softsign_layer.ops(bow_left)
right_soft = softsign_layer.ops(bow_right)
# matching layer
if self.task_mode == "pairwise":
left_bow = self.bow_layer(left_soft)
right_bow = self.bow_layer(right_soft)
cos_sim_layer = CosSimLayer()
pred = cos_sim_layer.ops(left_bow, right_bow)
return left_bow, pred
else:
concat_layer = ConcatLayer(1)
concat = concat_layer.ops([left_soft, right_soft])
concat_fc = self.bow_layer_po(concat)
pred = self.softmax_layer(concat_fc)
return left_soft, pred
# Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
import argparse
import numpy as np
import paddle
import random
import unittest
from simnet_dygraph_model_v2 import BOW, HingeLoss
SEED = 102
random.seed(SEED)
def create_conf_dict():
conf_dict = {}
conf_dict["task_mode"] = "pairwise"
conf_dict["net"] = {"emb_dim": 128, "bow_dim": 128, "hidden_dim": 128}
conf_dict["loss"] = {"margin": 0.1}
return conf_dict
def parse_args():
parser = argparse.ArgumentParser()
parser.add_argument(
"--batch_size",
type=int,
default=32,
help="Total examples' number in batch for training.")
parser.add_argument(
"--seq_len", type=int, default=32, help="The length of each sentence.")
parser.add_argument(
"--epoch", type=int, default=1, help="The number of training epoch.")
parser.add_argument(
"--fake_sample_size",
type=int,
default=128,
help="The number of samples of fake data.")
args = parser.parse_args([])
return args
args = parse_args()
def fake_vocabulary():
vocab = {}
vocab["<unk>"] = 0
for i in range(26):
c = chr(ord('a') + i)
vocab[c] = i + 1
return vocab
vocab = fake_vocabulary()
class FakeReaderProcessor(object):
def __init__(self, args, vocab):
self.vocab = vocab
self.seq_len = args.seq_len
self.sample_size = args.fake_sample_size
self.data_samples = []
for i in range(self.sample_size):
query = [random.randint(0, 26) for i in range(self.seq_len)]
pos_title = query[:]
neg_title = [26 - q for q in query]
self.data_samples.append(
np.array([query, pos_title, neg_title]).astype(np.int64))
def get_reader(self, mode, epoch=0):
def reader_with_pairwise():
if mode == "train":
for i in range(self.sample_size):
yield self.data_samples[i]
return reader_with_pairwise
simnet_process = FakeReaderProcessor(args, vocab)
def train(conf_dict, to_static):
"""
train process
"""
program_translator = paddle.jit.ProgramTranslator()
program_translator.enable(to_static)
# Get device
if paddle.is_compiled_with_cuda():
place = paddle.CUDAPlace(0)
else:
place = paddle.CPUPlace()
paddle.disable_static(place)
paddle.manual_seed(SEED)
paddle.framework.random._manual_program_seed(SEED)
conf_dict['dict_size'] = len(vocab)
conf_dict['seq_len'] = args.seq_len
net = BOW(conf_dict)
loss = HingeLoss(conf_dict)
optimizer = paddle.optimizer.Adam(
learning_rate=0.001,
beta1=0.9,
beta2=0.999,
epsilon=1e-08,
parameters=net.parameters())
metric = paddle.metric.Auc(name="auc")
global_step = 0
losses = []
train_loader = paddle.io.DataLoader.from_generator(
capacity=16, return_list=True, iterable=True, use_double_buffer=True)
get_train_examples = simnet_process.get_reader("train", epoch=args.epoch)
train_loader.set_sample_list_generator(
paddle.batch(
get_train_examples, batch_size=args.batch_size), place)
for left, pos_right, neg_right in train_loader():
left = paddle.reshape(left, shape=[-1, 1])
pos_right = paddle.reshape(pos_right, shape=[-1, 1])
neg_right = paddle.reshape(neg_right, shape=[-1, 1])
net.train()
global_step += 1
left_feat, pos_score = net(left, pos_right)
pred = pos_score
_, neg_score = net(left, neg_right)
avg_cost = loss.compute(pos_score, neg_score)
losses.append(np.mean(avg_cost.numpy()))
avg_cost.backward()
optimizer.minimize(avg_cost)
net.clear_gradients()
paddle.enable_static()
return losses
class TestSimnet(unittest.TestCase):
def test_dygraph_static_same_loss(self):
if paddle.is_compiled_with_cuda():
paddle.fluid.set_flags({"FLAGS_cudnn_deterministic": True})
conf_dict = create_conf_dict()
dygraph_loss = train(conf_dict, to_static=False)
static_loss = train(conf_dict, to_static=True)
self.assertEqual(len(dygraph_loss), len(static_loss))
for i in range(len(dygraph_loss)):
self.assertAlmostEqual(dygraph_loss[i], static_loss[i])
if __name__ == '__main__':
unittest.main()
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册