提交 49d05197 编写于 作者: Z zhhsplendid

Split PR, test=develop

上级 f4f5f3f2
# Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
from functools import reduce
import paddle
class EmbeddingLayer(object):
"""
Embedding Layer class
"""
def __init__(self, dict_size, emb_dim, name="emb", padding_idx=None):
"""
initialize
"""
self.dict_size = dict_size
self.emb_dim = emb_dim
self.name = name
self.padding_idx = padding_idx
def ops(self):
"""
operation
"""
# TODO(huihuangzheng): The original code set the is_sparse=True, but it
# causes crush in dy2stat. Set it to True after fixing it.
emb = paddle.fluid.dygraph.Embedding(
size=[self.dict_size, self.emb_dim],
is_sparse=True,
padding_idx=self.padding_idx,
param_attr=paddle.ParamAttr(
name=self.name, initializer=paddle.nn.initializer.Xavier()))
return emb
class FCLayer(object):
"""
Fully Connect Layer class
"""
def __init__(self, fc_dim, act, name="fc"):
"""
initialize
"""
self.fc_dim = fc_dim
self.act = act
self.name = name
def ops(self):
"""
operation
"""
fc = FC(size=self.fc_dim,
param_attr=paddle.ParamAttr(name="%s.w" % self.name),
bias_attr=paddle.ParamAttr(name="%s.b" % self.name),
act=self.act)
return fc
class ConcatLayer(object):
"""
Connection Layer class
"""
def __init__(self, axis):
"""
initialize
"""
self.axis = axis
def ops(self, inputs):
"""
operation
"""
concat = paddle.concat(x=inputs, axis=self.axis)
return concat
class ReduceMeanLayer(object):
"""
Reduce Mean Layer class
"""
def __init__(self):
"""
initialize
"""
pass
def ops(self, input):
"""
operation
"""
mean = paddle.reduce_mean(input)
return mean
class CosSimLayer(object):
"""
Cos Similarly Calculate Layer
"""
def __init__(self):
"""
initialize
"""
pass
def ops(self, x, y):
"""
operation
"""
sim = paddle.metric.cos_sim(x, y)
return sim
class ElementwiseMaxLayer(object):
"""
Elementwise Max Layer class
"""
def __init__(self):
"""
initialize
"""
pass
def ops(self, x, y):
"""
operation
"""
max = paddle.maximum(x=x, y=y)
return max
class ElementwiseAddLayer(object):
"""
Elementwise Add Layer class
"""
def __init__(self):
"""
initialize
"""
pass
def ops(self, x, y):
"""
operation
"""
add = paddle.add(x=x, y=y)
return add
class ElementwiseSubLayer(object):
"""
Elementwise Add Layer class
"""
def __init__(self):
"""
initialize
"""
pass
def ops(self, x, y):
"""
operation
"""
sub = paddle.fluid.layers.elementwise_sub(x, y)
return sub
class ConstantLayer(object):
"""
Generate A Constant Layer class
"""
def __init__(self):
"""
initialize
"""
pass
def ops(self, input, shape, dtype, value):
"""
operation
"""
shape = list(shape)
input_shape = paddle.shape(input)
shape[0] = input_shape[0]
constant = paddle.fill_constant(shape, dtype, value)
return constant
class SoftsignLayer(object):
"""
Softsign Layer class
"""
def __init__(self):
"""
initialize
"""
pass
def ops(self, input):
"""
operation
"""
softsign = paddle.nn.functional.softsign(input)
return softsign
class FC(paddle.nn.Layer):
"""
This interface is used to construct a callable object of the ``FC`` class.
For more details, refer to code examples.
It creates a fully connected layer in the network. It can take
one or multiple ``Tensor`` as its inputs. It creates a Variable called weights for each input tensor,
which represents a fully connected weight matrix from each input unit to
each output unit. The fully connected layer multiplies each input tensor
with its corresponding weight to produce an output Tensor with shape [N, `size`],
where N is batch size. If multiple input tensors are given, the results of
multiple output tensors with shape [N, `size`] will be summed up. If ``bias_attr``
is not None, a bias variable will be created and added to the output.
Finally, if ``act`` is not None, it will be applied to the output as well.
When the input is single ``Tensor`` :
.. math::
Out = Act({XW + b})
When the input are multiple ``Tensor`` :
.. math::
Out = Act({\sum_{i=0}^{N-1}X_iW_i + b})
In the above equation:
* :math:`N`: Number of the input. N equals to len(input) if input is list of ``Tensor`` .
* :math:`X_i`: The i-th input ``Tensor`` .
* :math:`W_i`: The i-th weights matrix corresponding i-th input tensor.
* :math:`b`: The bias parameter created by this layer (if needed).
* :math:`Act`: The activation function.
* :math:`Out`: The output ``Tensor`` .
See below for an example.
.. code-block:: text
Given:
data_1.data = [[[0.1, 0.2]]]
data_1.shape = (1, 1, 2) # 1 is batch_size
data_2.data = [[[0.1, 0.2, 0.3]]]
data_2.shape = (1, 1, 3) # 1 is batch_size
fc = FC("fc", 2, num_flatten_dims=2)
out = fc(input=[data_1, data_2])
Then:
out.data = [[[0.182996 -0.474117]]]
out.shape = (1, 1, 2)
Parameters:
size(int): The number of output units in this layer.
num_flatten_dims (int, optional): The fc layer can accept an input tensor with more than
two dimensions. If this happens, the multi-dimension tensor will first be flattened
into a 2-dimensional matrix. The parameter `num_flatten_dims` determines how the input
tensor is flattened: the first `num_flatten_dims` (inclusive, index starts from 1)
dimensions will be flatten to form the first dimension of the final matrix (height of
the matrix), and the rest `rank(X) - num_flatten_dims` dimensions are flattened to
form the second dimension of the final matrix (width of the matrix). For example, suppose
`X` is a 5-dimensional tensor with a shape [2, 3, 4, 5, 6], and `num_flatten_dims` = 3.
Then, the flattened matrix will have a shape [2 x 3 x 4, 5 x 6] = [24, 30]. Default: 1
param_attr (ParamAttr or list of ParamAttr, optional): The parameter attribute for learnable
weights(Parameter) of this layer. Default: None.
bias_attr (ParamAttr or list of ParamAttr, optional): The attribute for the bias
of this layer. If it is set to False, no bias will be added to the output units.
If it is set to None, the bias is initialized zero. Default: None.
act (str, optional): Activation to be applied to the output of this layer. Default: None.
is_test(bool, optional): A flag indicating whether execution is in test phase. Default: False.
dtype(str, optional): Dtype used for weight, it can be "float32" or "float64". Default: "float32".
Attribute:
**weight** (list of Parameter): the learnable weights of this layer.
**bias** (Parameter or None): the learnable bias of this layer.
Returns:
None
Examples:
.. code-block:: python
from paddle.fluid.dygraph.base import to_variable
import paddle.fluid as fluid
from paddle.fluid.dygraph import FC
import numpy as np
data = np.random.uniform(-1, 1, [30, 10, 32]).astype('float32')
with fluid.dygraph.guard():
fc = FC("fc", 64, num_flatten_dims=2)
data = to_variable(data)
conv = fc(data)
"""
def __init__(self,
size,
num_flatten_dims=1,
param_attr=None,
bias_attr=None,
act=None,
is_test=False,
dtype="float32"):
super(FC, self).__init__(dtype)
self._size = size
self._num_flatten_dims = num_flatten_dims
self._dtype = dtype
self._param_attr = param_attr
self._bias_attr = bias_attr
self._act = act
self.__w = list()
def _build_once(self, input):
i = 0
for inp, param in self._helper.iter_inputs_and_params(input,
self._param_attr):
input_shape = inp.shape
param_shape = [
reduce(lambda a, b: a * b, input_shape[self._num_flatten_dims:],
1)
] + [self._size]
self.__w.append(
self.add_parameter(
'_w%d' % i,
self.create_parameter(
attr=param,
shape=param_shape,
dtype=self._dtype,
is_bias=False)))
i += 1
size = list([self._size])
self._b = self.create_parameter(
attr=self._bias_attr, shape=size, dtype=self._dtype, is_bias=True)
# TODO(songyouwei): We should remove _w property
@property
def _w(self, i=0):
return self.__w[i]
@_w.setter
def _w(self, value, i=0):
assert isinstance(self.__w[i], Variable)
self.__w[i].set_value(value)
@property
def weight(self):
if len(self.__w) > 1:
return self.__w
else:
return self.__w[0]
@weight.setter
def weight(self, value):
if len(self.__w) == 1:
self.__w[0] = value
@property
def bias(self):
return self._b
@bias.setter
def bias(self, value):
self._b = value
def forward(self, input):
mul_results = list()
i = 0
for inp, param in self._helper.iter_inputs_and_params(input,
self._param_attr):
tmp = self._helper.create_variable_for_type_inference(self._dtype)
self._helper.append_op(
type="mul",
inputs={"X": inp,
"Y": self.__w[i]},
outputs={"Out": tmp},
attrs={
"x_num_col_dims": self._num_flatten_dims,
"y_num_col_dims": 1
})
i += 1
mul_results.append(tmp)
if len(mul_results) == 1:
pre_bias = mul_results[0]
else:
pre_bias = self._helper.create_variable_for_type_inference(
self._dtype)
self._helper.append_op(
type="sum",
inputs={"X": mul_results},
outputs={"Out": pre_bias},
attrs={"use_mkldnn": False})
if self._b is not None:
pre_activation = self._helper.create_variable_for_type_inference(
dtype=self._dtype)
self._helper.append_op(
type='elementwise_add',
inputs={'X': [pre_bias],
'Y': [self._b]},
outputs={'Out': [pre_activation]},
attrs={'axis': self._num_flatten_dims})
else:
pre_activation = pre_bias
# Currently, we don't support inplace in dygraph mode
return self._helper.append_activation(pre_activation, act=self._act)
class HingeLoss(object):
"""
Hing Loss Calculate class
"""
def __init__(self, conf_dict):
"""
initialize
"""
self.margin = conf_dict["loss"]["margin"]
def compute(self, pos, neg):
"""
compute loss
"""
elementwise_max = ElementwiseMaxLayer()
elementwise_add = ElementwiseAddLayer()
elementwise_sub = ElementwiseSubLayer()
constant = ConstantLayer()
reduce_mean = ReduceMeanLayer()
loss = reduce_mean.ops(
elementwise_max.ops(
constant.ops(neg, neg.shape, "float32", 0.0),
elementwise_add.ops(
elementwise_sub.ops(neg, pos),
constant.ops(neg, neg.shape, "float32", self.margin))))
return loss
class BOW(paddle.nn.Layer):
"""
BOW
"""
def __init__(self, conf_dict):
"""
initialize
"""
super(BOW, self).__init__()
self.dict_size = conf_dict["dict_size"]
self.task_mode = conf_dict["task_mode"]
self.emb_dim = conf_dict["net"]["emb_dim"]
self.bow_dim = conf_dict["net"]["bow_dim"]
self.seq_len = conf_dict["seq_len"]
self.emb_layer = EmbeddingLayer(self.dict_size, self.emb_dim,
"emb").ops()
self.bow_layer = paddle.nn.Linear(
in_features=self.bow_dim, out_features=self.bow_dim)
self.bow_layer_po = FCLayer(self.bow_dim, None, "fc").ops()
self.softmax_layer = FCLayer(2, "softmax", "cos_sim").ops()
@paddle.fluid.dygraph.declarative
def forward(self, left, right):
"""
Forward network
"""
# embedding layer
left_emb = self.emb_layer(left)
right_emb = self.emb_layer(right)
left_emb = paddle.fluid.layers.reshape(
left_emb, shape=[-1, self.seq_len, self.bow_dim])
right_emb = paddle.fluid.layers.reshape(
right_emb, shape=[-1, self.seq_len, self.bow_dim])
bow_left = paddle.reduce_sum(left_emb, dim=1)
bow_right = paddle.reduce_sum(right_emb, dim=1)
softsign_layer = SoftsignLayer()
left_soft = softsign_layer.ops(bow_left)
right_soft = softsign_layer.ops(bow_right)
# matching layer
if self.task_mode == "pairwise":
left_bow = self.bow_layer(left_soft)
right_bow = self.bow_layer(right_soft)
cos_sim_layer = CosSimLayer()
pred = cos_sim_layer.ops(left_bow, right_bow)
return left_bow, pred
else:
concat_layer = ConcatLayer(1)
concat = concat_layer.ops([left_soft, right_soft])
concat_fc = self.bow_layer_po(concat)
pred = self.softmax_layer(concat_fc)
return left_soft, pred
# Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
from __future__ import print_function
import math
import time
import unittest
import numpy as np
import paddle
from predictor_utils import PredictorTools
SEED = 2020
IMAGENET1000 = 1281167
base_lr = 0.001
momentum_rate = 0.9
l2_decay = 1e-4
# NOTE: Reduce batch_size from 8 to 2 to avoid unittest timeout.
batch_size = 2
epoch_num = 1
place = paddle.CUDAPlace(0) if paddle.fluid.is_compiled_with_cuda() \
else paddle.CPUPlace()
MODEL_SAVE_PATH = "./resnet_v2.inference.model"
DY_STATE_DICT_SAVE_PATH = "./resnet_v2.dygraph"
program_translator = paddle.jit.ProgramTranslator()
if paddle.fluid.is_compiled_with_cuda():
paddle.fluid.set_flags({'FLAGS_cudnn_deterministic': True})
def optimizer_setting(parameter_list=None):
optimizer = paddle.optimizer.Momentum(
learning_rate=base_lr,
momentum=momentum_rate,
weight_decay=paddle.fluid.regularizer.L2Decay(l2_decay),
parameters=parameter_list)
return optimizer
class ConvBNLayer(paddle.nn.Layer):
def __init__(self,
num_channels,
num_filters,
filter_size,
stride=1,
groups=1,
act=None):
super(ConvBNLayer, self).__init__()
self._conv = paddle.nn.Conv2d(
in_channels=num_channels,
out_channels=num_filters,
kernel_size=filter_size,
stride=stride,
padding=(filter_size - 1) // 2,
groups=groups,
bias_attr=False)
self._batch_norm = paddle.nn.BatchNorm(num_filters, act=act)
def forward(self, inputs):
y = self._conv(inputs)
y = self._batch_norm(y)
return y
class BottleneckBlock(paddle.nn.Layer):
def __init__(self, num_channels, num_filters, stride, shortcut=True):
super(BottleneckBlock, self).__init__()
self.conv0 = ConvBNLayer(
num_channels=num_channels,
num_filters=num_filters,
filter_size=1,
act='relu')
self.conv1 = ConvBNLayer(
num_channels=num_filters,
num_filters=num_filters,
filter_size=3,
stride=stride,
act='relu')
self.conv2 = ConvBNLayer(
num_channels=num_filters,
num_filters=num_filters * 4,
filter_size=1,
act=None)
if not shortcut:
self.short = ConvBNLayer(
num_channels=num_channels,
num_filters=num_filters * 4,
filter_size=1,
stride=stride)
self.shortcut = shortcut
self._num_channels_out = num_filters * 4
def forward(self, inputs):
y = self.conv0(inputs)
conv1 = self.conv1(y)
conv2 = self.conv2(conv1)
if self.shortcut:
short = inputs
else:
short = self.short(inputs)
y = paddle.add(x=short, y=conv2)
layer_helper = paddle.fluid.layer_helper.LayerHelper(
self.full_name(), act='relu')
return layer_helper.append_activation(y)
class ResNet(paddle.nn.Layer):
def __init__(self, layers=50, class_dim=102):
super(ResNet, self).__init__()
self.layers = layers
supported_layers = [50, 101, 152]
assert layers in supported_layers, \
"supported layers are {} but input layer is {}".format(supported_layers, layers)
if layers == 50:
depth = [3, 4, 6, 3]
elif layers == 101:
depth = [3, 4, 23, 3]
elif layers == 152:
depth = [3, 8, 36, 3]
num_channels = [64, 256, 512, 1024]
num_filters = [64, 128, 256, 512]
self.conv = ConvBNLayer(
num_channels=3, num_filters=64, filter_size=7, stride=2, act='relu')
self.pool2d_max = paddle.fluid.dygraph.nn.Pool2D(
pool_size=3, pool_stride=2, pool_padding=1, pool_type='max')
self.bottleneck_block_list = []
for block in range(len(depth)):
shortcut = False
for i in range(depth[block]):
bottleneck_block = self.add_sublayer(
'bb_%d_%d' % (block, i),
BottleneckBlock(
num_channels=num_channels[block]
if i == 0 else num_filters[block] * 4,
num_filters=num_filters[block],
stride=2 if i == 0 and block != 0 else 1,
shortcut=shortcut))
self.bottleneck_block_list.append(bottleneck_block)
shortcut = True
self.pool2d_avg = paddle.fluid.dygraph.nn.Pool2D(
pool_size=7, pool_type='avg', global_pooling=True)
self.pool2d_avg_output = num_filters[len(num_filters) - 1] * 4 * 1 * 1
stdv = 1.0 / math.sqrt(2048 * 1.0)
self.out = paddle.nn.Linear(
in_features=self.pool2d_avg_output,
out_features=class_dim,
weight_attr=paddle.ParamAttr(
initializer=paddle.nn.initializer.Uniform(-stdv, stdv)))
@paddle.fluid.dygraph.declarative
def forward(self, inputs):
y = self.conv(inputs)
y = self.pool2d_max(y)
for bottleneck_block in self.bottleneck_block_list:
y = bottleneck_block(y)
y = self.pool2d_avg(y)
y = paddle.fluid.layers.reshape(y, shape=[-1, self.pool2d_avg_output])
pred = self.out(y)
pred = paddle.nn.functional.softmax(pred)
return pred
def reader_decorator(reader):
def __reader__():
for item in reader():
img = np.array(item[0]).astype('float32').reshape(3, 224, 224)
label = np.array(item[1]).astype('int64').reshape(1)
yield img, label
return __reader__
def train(to_static):
"""
Tests model decorated by `dygraph_to_static_output` in static mode. For users, the model is defined in dygraph mode and trained in static mode.
"""
paddle.disable_static(place)
np.random.seed(SEED)
paddle.manual_seed(SEED)
paddle.framework.random._manual_program_seed(SEED)
train_reader = paddle.batch(
reader_decorator(paddle.dataset.flowers.train(use_xmap=False)),
batch_size=batch_size,
drop_last=True)
data_loader = paddle.io.DataLoader.from_generator(capacity=5, iterable=True)
data_loader.set_sample_list_generator(train_reader)
resnet = ResNet()
optimizer = optimizer_setting(parameter_list=resnet.parameters())
for epoch in range(epoch_num):
total_loss = 0.0
total_acc1 = 0.0
total_acc5 = 0.0
total_sample = 0
for batch_id, data in enumerate(data_loader()):
start_time = time.time()
img, label = data
pred = resnet(img)
loss = paddle.fluid.layers.cross_entropy(input=pred, label=label)
avg_loss = paddle.mean(x=loss)
acc_top1 = paddle.metric.accuracy(input=pred, label=label, k=1)
acc_top5 = paddle.metric.accuracy(input=pred, label=label, k=5)
avg_loss.backward()
optimizer.minimize(avg_loss)
resnet.clear_gradients()
total_loss += avg_loss
total_acc1 += acc_top1
total_acc5 += acc_top5
total_sample += 1
end_time = time.time()
if batch_id % 2 == 0:
print( "epoch %d | batch step %d, loss %0.3f, acc1 %0.3f, acc5 %0.3f, time %f" % \
( epoch, batch_id, total_loss.numpy() / total_sample, \
total_acc1.numpy() / total_sample, total_acc5.numpy() / total_sample, end_time-start_time))
if batch_id == 10:
if to_static:
paddle.fluid.dygraph.jit.save(resnet, MODEL_SAVE_PATH)
else:
paddle.fluid.dygraph.save_dygraph(resnet.state_dict(),
DY_STATE_DICT_SAVE_PATH)
# avoid dataloader throw abort signaal
data_loader._reset()
break
paddle.enable_static()
return total_loss.numpy()
def predict_dygraph(data):
program_translator.enable(False)
paddle.disable_static(place)
resnet = ResNet()
model_dict, _ = paddle.fluid.dygraph.load_dygraph(DY_STATE_DICT_SAVE_PATH)
resnet.set_dict(model_dict)
resnet.eval()
pred_res = resnet(
paddle.to_tensor(
data=data, dtype=None, place=None, stop_gradient=True))
ret = pred_res.numpy()
paddle.enable_static()
return ret
def predict_static(data):
exe = paddle.static.Executor(place)
[inference_program, feed_target_names,
fetch_targets] = paddle.io.load_inference_model(
MODEL_SAVE_PATH,
executor=exe,
params_filename=paddle.fluid.dygraph.io.VARIABLE_FILENAME)
pred_res = exe.run(inference_program,
feed={feed_target_names[0]: data},
fetch_list=fetch_targets)
return pred_res[0]
def predict_dygraph_jit(data):
paddle.disable_static(place)
resnet = paddle.fluid.dygraph.jit.load(MODEL_SAVE_PATH)
resnet.eval()
pred_res = resnet(data)
ret = pred_res.numpy()
paddle.enable_static()
return ret
def predict_analysis_inference(data):
output = PredictorTools(MODEL_SAVE_PATH,
paddle.fluid.dygraph.io.VARIABLE_FILENAME, [data])
out = output()
return out
class TestResnet(unittest.TestCase):
def train(self, to_static):
program_translator.enable(to_static)
return train(to_static)
def verify_predict(self):
image = np.random.random([1, 3, 224, 224]).astype('float32')
dy_pre = predict_dygraph(image)
st_pre = predict_static(image)
dy_jit_pre = predict_dygraph_jit(image)
predictor_pre = predict_analysis_inference(image)
self.assertTrue(
np.allclose(dy_pre, st_pre),
msg="dy_pre:\n {}\n, st_pre: \n{}.".format(dy_pre, st_pre))
self.assertTrue(
np.allclose(dy_jit_pre, st_pre),
msg="dy_jit_pre:\n {}\n, st_pre: \n{}.".format(dy_jit_pre, st_pre))
self.assertTrue(
np.allclose(predictor_pre, st_pre),
msg="predictor_pre:\n {}\n, st_pre: \n{}.".format(predictor_pre,
st_pre))
def test_resnet(self):
static_loss = self.train(to_static=True)
dygraph_loss = self.train(to_static=False)
self.assertTrue(
np.allclose(static_loss, dygraph_loss),
msg="static_loss: {} \n dygraph_loss: {}".format(static_loss,
dygraph_loss))
self.verify_predict()
def test_in_static_mode_mkldnn(self):
paddle.fluid.set_flags({'FLAGS_use_mkldnn': True})
try:
train(to_static=True)
finally:
paddle.fluid.set_flags({'FLAGS_use_mkldnn': False})
if __name__ == '__main__':
unittest.main()
# Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
import argparse
import numpy as np
import paddle
import random
import unittest
from simnet_dygraph_model_v2 import BOW, HingeLoss
SEED = 102
random.seed(SEED)
def create_conf_dict():
conf_dict = {}
conf_dict["task_mode"] = "pairwise"
conf_dict["net"] = {"emb_dim": 128, "bow_dim": 128, "hidden_dim": 128}
conf_dict["loss"] = {"margin": 0.1}
return conf_dict
def parse_args():
parser = argparse.ArgumentParser()
parser.add_argument(
"--batch_size",
type=int,
default=32,
help="Total examples' number in batch for training.")
parser.add_argument(
"--seq_len", type=int, default=32, help="The length of each sentence.")
parser.add_argument(
"--epoch", type=int, default=1, help="The number of training epoch.")
parser.add_argument(
"--fake_sample_size",
type=int,
default=128,
help="The number of samples of fake data.")
args = parser.parse_args([])
return args
args = parse_args()
def fake_vocabulary():
vocab = {}
vocab["<unk>"] = 0
for i in range(26):
c = chr(ord('a') + i)
vocab[c] = i + 1
return vocab
vocab = fake_vocabulary()
class FakeReaderProcessor(object):
def __init__(self, args, vocab):
self.vocab = vocab
self.seq_len = args.seq_len
self.sample_size = args.fake_sample_size
self.data_samples = []
for i in range(self.sample_size):
query = [random.randint(0, 26) for i in range(self.seq_len)]
pos_title = query[:]
neg_title = [26 - q for q in query]
self.data_samples.append(
np.array([query, pos_title, neg_title]).astype(np.int64))
def get_reader(self, mode, epoch=0):
def reader_with_pairwise():
if mode == "train":
for i in range(self.sample_size):
yield self.data_samples[i]
return reader_with_pairwise
simnet_process = FakeReaderProcessor(args, vocab)
def train(conf_dict, to_static):
"""
train process
"""
program_translator = paddle.jit.ProgramTranslator()
program_translator.enable(to_static)
# Get device
if paddle.fluid.is_compiled_with_cuda():
place = paddle.CUDAPlace(0)
else:
place = paddle.CPUPlace()
paddle.disable_static(place)
paddle.manual_seed(SEED)
paddle.framework.random._manual_program_seed(SEED)
conf_dict['dict_size'] = len(vocab)
conf_dict['seq_len'] = args.seq_len
net = BOW(conf_dict)
loss = HingeLoss(conf_dict)
optimizer = paddle.optimizer.Adam(
learning_rate=0.001,
beta1=0.9,
beta2=0.999,
epsilon=1e-08,
parameters=net.parameters())
metric = paddle.fluid.metrics.Auc(name="auc")
global_step = 0
losses = []
train_loader = paddle.io.DataLoader.from_generator(
capacity=16, return_list=True, iterable=True, use_double_buffer=True)
get_train_examples = simnet_process.get_reader("train", epoch=args.epoch)
train_loader.set_sample_list_generator(
paddle.batch(
get_train_examples, batch_size=args.batch_size), place)
for left, pos_right, neg_right in train_loader():
left = paddle.fluid.layers.reshape(left, shape=[-1, 1])
pos_right = paddle.fluid.layers.reshape(pos_right, shape=[-1, 1])
neg_right = paddle.fluid.layers.reshape(neg_right, shape=[-1, 1])
net.train()
global_step += 1
left_feat, pos_score = net(left, pos_right)
pred = pos_score
_, neg_score = net(left, neg_right)
avg_cost = loss.compute(pos_score, neg_score)
losses.append(np.mean(avg_cost.numpy()))
avg_cost.backward()
optimizer.minimize(avg_cost)
net.clear_gradients()
paddle.enable_static()
return losses
class TestSimnet(unittest.TestCase):
def test_dygraph_static_same_loss(self):
if paddle.fluid.is_compiled_with_cuda():
paddle.fluid.set_flags({"FLAGS_cudnn_deterministic": True})
conf_dict = create_conf_dict()
dygraph_loss = train(conf_dict, to_static=False)
static_loss = train(conf_dict, to_static=True)
self.assertEqual(len(dygraph_loss), len(static_loss))
for i in range(len(dygraph_loss)):
self.assertAlmostEqual(dygraph_loss[i], static_loss[i])
if __name__ == '__main__':
unittest.main()
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册