未验证 提交 0d9d25d4 编写于 作者: J Jiabin Yang 提交者: GitHub

Feature/refactor layers to Layers (#16337)

* test=develop, add some Layers and tests

* test=develop, add more layers

* test=develop, add more layers

* test=develop, add force cpu option

* Update test_layers.py

remove pdb

* test=develop, refine code
上级 850b7371
...@@ -33,7 +33,7 @@ class LookupTableOp : public framework::OperatorWithKernel { ...@@ -33,7 +33,7 @@ class LookupTableOp : public framework::OperatorWithKernel {
auto table_dims = ctx->GetInputDim("W"); auto table_dims = ctx->GetInputDim("W");
auto ids_dims = ctx->GetInputDim("Ids"); auto ids_dims = ctx->GetInputDim("Ids");
int ids_rank = ids_dims.size(); int ids_rank = ids_dims.size();
VLOG(5) << "ids rank is " << ids_rank << std::endl;
PADDLE_ENFORCE_EQ(table_dims.size(), 2); PADDLE_ENFORCE_EQ(table_dims.size(), 2);
PADDLE_ENFORCE_EQ(ids_dims[ids_rank - 1], 1, PADDLE_ENFORCE_EQ(ids_dims[ids_rank - 1], 1,
"The last dimension of the 'Ids' tensor must be 1."); "The last dimension of the 'Ids' tensor must be 1.");
......
...@@ -15,6 +15,7 @@ ...@@ -15,6 +15,7 @@
from __future__ import print_function from __future__ import print_function
from six.moves import reduce from six.moves import reduce
import numpy as np
from .. import core from .. import core
from ..layers import utils from ..layers import utils
...@@ -22,9 +23,11 @@ from . import layers ...@@ -22,9 +23,11 @@ from . import layers
from ..framework import Variable, OpProtoHolder from ..framework import Variable, OpProtoHolder
from ..layers import layer_function_generator from ..layers import layer_function_generator
from ..param_attr import ParamAttr from ..param_attr import ParamAttr
from ..initializer import Normal, Constant from ..initializer import Normal, Constant, NumpyArrayInitializer
__all__ = [ __all__ = [
'Conv2D', 'Pool2D', 'FC', 'BatchNorm', 'Embedding', 'GRUUnit', 'LayerNorm' 'Conv2D', 'Pool2D', 'FC', 'BatchNorm', 'Embedding', 'GRUUnit', 'LayerNorm',
'NCE', 'PRelu', 'BilinearTensorProduct', 'Conv2DTranspose', 'SequenceConv'
] ]
...@@ -729,3 +732,668 @@ class GRUUnit(layers.Layer): ...@@ -729,3 +732,668 @@ class GRUUnit(layers.Layer):
}) })
return updated_hidden, reset_hidden_pre, gate return updated_hidden, reset_hidden_pre, gate
class NCE(layers.Layer):
"""
${comment}
Args:
input (Variable): input variable.
label (Variable): label.
num_total_classes (int):${num_total_classes_comment}
sample_weight (Variable|None): A Variable of shape [batch_size, 1]
storing a weight for each sample. The default weight for each
sample is 1.0.
param_attr (ParamAttr|None): The parameter attribute for learnable parameters/weights
of nce. If it is set to None or one attribute of ParamAttr, nce
will create ParamAttr as param_attr. If the Initializer of the param_attr
is not set, the parameter is initialized with Xavier. Default: None.
bias_attr (ParamAttr|bool|None): The parameter attribute for the bias of nce.
If it is set to False, no bias will be added to the output units.
If it is set to None or one attribute of ParamAttr, nce
will create ParamAttr as bias_attr. If the Initializer of the bias_attr
is not set, the bias is initialized zero. Default: None.
num_neg_samples (int): ${num_neg_samples_comment}
name (str|None): A name for this layer(optional). If set None, the layer
will be named automatically. Default: None.
sampler (str): The sampler used to sample class from negtive classes.
It can be 'uniform', 'log_uniform' or 'custom_dist'.
default: 'uniform'.
custom_dist (float[]): A float[] with size=num_total_classes.
It is used when sampler is set to 'custom_dist'.
custom_dist[i] is the probsbility of i-th class to be sampled.
default: None.
seed (int): The seed used in sampler. default: 0.
is_sparse(bool): The flag indicating whether to use sparse update, the weight@GRAD and bias@GRAD will be changed to SelectedRows.
Returns:
Variable: The output nce loss.
Examples:
.. code-block:: python
window_size = 5
words = []
for i in xrange(window_size):
words.append(layers.data(
name='word_{0}'.format(i), shape=[1], dtype='int64'))
dict_size = 10000
label_word = int(window_size / 2) + 1
embs = []
for i in xrange(window_size):
if i == label_word:
continue
emb = layers.embedding(input=words[i], size=[dict_size, 32],
param_attr='emb.w', is_sparse=True)
embs.append(emb)
embs = layers.concat(input=embs, axis=1)
loss = layers.nce(input=embs, label=words[label_word],
num_total_classes=dict_size, param_attr='nce.w',
bias_attr='nce.b')
#or use custom distribution
dist = fluid.layers.assign(input=np.array([0.05,0.5,0.1,0.3,0.05]).astype("float32"))
loss = layers.nce(input=embs, label=words[label_word],
num_total_classes=5, param_attr='nce.w',
bias_attr='nce.b',
num_neg_samples=3,
sampler="custom_dist",
custom_dist=dist)
"""
def __init__(self,
name_scope,
num_total_classes,
param_attr=None,
bias_attr=None,
num_neg_samples=None,
sampler="uniform",
custom_dist=None,
seed=0,
is_sparse=False):
super(NCE, self).__init__(name_scope)
self._param_attr = param_attr
self._bias_attr = bias_attr
self._num_total_classes = num_total_classes
self._inputs = dict()
if sampler == "uniform":
sampler = 0
elif sampler == "log_uniform":
sampler = 1
elif sampler == "custom_dist":
assert custom_dist is not None
# assert isinstance(custom_dist, Variable)
custom_dist_len = len(custom_dist)
alias_probs_ = [0] * custom_dist_len
alias_ = [0] * custom_dist_len
bigs = []
littles = []
for i in range(custom_dist_len):
normal_prob = custom_dist[i] * custom_dist_len
if normal_prob - 1.0 > 0:
bigs.append((i, normal_prob))
elif 1.0 - normal_prob > 0:
littles.append((i, normal_prob))
else:
alias_probs_[i] = normal_prob
alias_[i] = -1
while len(bigs) and len(littles):
big = bigs.pop(0)
little = littles.pop(0)
big_idx = big[0]
big_prob = big[1]
alias_probs_[little[0]] = little[1]
alias_[little[0]] = big_idx
big_left = big[1] + little[1] - 1
if big_left - 1.0 > 0:
bigs.append((big_idx, big_left))
elif 1.0 - big_left > 0:
littles.append((big_idx, big_left))
else:
alias_probs_[big_idx] = big_left
alias_[big_idx] = -1
if len(bigs):
big = bigs.pop(0)
alias_probs_[big[0]] = 1.0
alias_[big[0]] = -1
if len(littles):
little = littles.pop(0)
alias_probs_[little[0]] = 1.0
alias_[little[0]] = -1
def _init_by_numpy_array(numpy_array):
ret = self.create_parameter(
attr=ParamAttr(),
shape=numpy_array.shape,
dtype=numpy_array.dtype,
default_initializer=NumpyArrayInitializer(numpy_array))
ret.stop_gradient = True
return ret
self._inputs['CustomDistProbs'] = _init_by_numpy_array(
np.array(custom_dist).astype('float32'))
self._inputs['CustomDistAlias'] = _init_by_numpy_array(
np.array(alias_).astype('int32'))
self._inputs['CustomDistAliasProbs'] = _init_by_numpy_array(
np.array(alias_probs_).astype('float32'))
sampler = 2
else:
raise Exception("Unsupported sampler type.")
if num_neg_samples is None:
num_neg_samples = 10
else:
num_neg_samples = int(num_neg_samples)
self._num_neg_samples = num_neg_samples
remote_prefetch = is_sparse
print(
"With sparse mode, if your models has only small parameter prefetch may cause speed down"
)
self._attrs = {
'num_total_classes': int(num_total_classes),
'num_neg_samples': num_neg_samples,
'seed': seed,
'sampler': sampler,
'is_sparse': is_sparse,
'remote_prefetch': remote_prefetch
}
def _build_once(self, input, label, sample_weight=None):
assert isinstance(input, Variable)
assert isinstance(label, Variable)
dim = input.shape[1]
num_true_class = label.shape[1]
self._w = self.create_parameter(
attr=self._param_attr,
shape=[self._num_total_classes, dim],
is_bias=False,
dtype=input.dtype)
if self._bias_attr:
self._b = self.create_parameter(
attr=self._bias_attr,
shape=[self._num_total_classes, 1],
is_bias=True,
dtype=input.dtype)
self._inputs['Bias'] = self._b
self._inputs['Weight'] = self._w
def forward(self, input, label, sample_weight=None):
assert isinstance(input, Variable)
assert isinstance(label, Variable)
self._inputs['Input'] = input
self._inputs['Label'] = label
self._inputs['SampleWeight'] = sample_weight if sample_weight is not None else []
cost = self._helper.create_variable_for_type_inference(
dtype=input.dtype)
sample_logits = self._helper.create_variable_for_type_inference(
dtype=input.dtype)
sample_labels = self._helper.create_variable_for_type_inference(
dtype=label.dtype)
self._helper.append_op(
type='nce',
inputs=self._inputs,
outputs={
'Cost': cost,
'SampleLogits': sample_logits,
'SampleLabels': sample_labels
},
attrs=self._attrs)
return cost / (self._num_neg_samples + 1)
class PRelu(layers.Layer):
"""
Equation:
.. math::
y = \max(0, x) + \\alpha * \min(0, x)
Args:
x (Variable): The input tensor.
param_attr(ParamAttr|None): The parameter attribute for the learnable
weight (alpha).
mode (string): The mode for weight sharing. It supports all, channel
and element. all: all elements share same weight
channel:elements in a channel share same weight
element:each element has a weight
name(str|None): A name for this layer(optional). If set None, the layer
will be named automatically.
Returns:
Variable: The output tensor with the same shape as input.
Examples:
.. code-block:: python
x = fluid.layers.data(name="x", shape=[10,10], dtype="float32")
mode = 'channel'
output = fluid.layers.prelu(x,mode)
"""
def __init__(self, name_scope, mode, param_attr=None):
super(PRelu, self).__init__(name_scope)
self._mode = mode
self._param_attr = param_attr
if self._mode not in ['all', 'channel', 'element']:
raise ValueError('mode should be one of all, channel, element.')
self._alpha_shape = [1]
def _build_once(self, input):
if self._mode == 'channel':
self._alpha_shape = [1, input.shape[1], 1, 1]
elif self._mode == 'element':
self._alpha_shape = input.shape
self._dtype = self._helper.input_dtype(input)
self._alpha = self.create_parameter(
attr=self._param_attr,
shape=self._alpha_shape,
dtype='float32',
is_bias=False,
default_initializer=Constant(1.0))
def forward(self, input):
out = self._helper.create_variable_for_type_inference(self._dtype)
self._helper.append_op(
type="prelu",
inputs={"X": input,
'Alpha': self._alpha},
attrs={"mode": self._mode},
outputs={"Out": out})
return out
class BilinearTensorProduct(layers.Layer):
"""
**Add Bilinear Tensor Product Layer**
This layer performs bilinear tensor product on two inputs.
For example:
.. math::
out_{i} = x * W_{i} * {y^\mathrm{T}}, i=0,1,...,size-1
In this formula:
- :math:`x`: the first input contains M elements, shape is [batch_size, M].
- :math:`y`: the second input contains N elements, shape is [batch_size, N].
- :math:`W_{i}`: the i-th learned weight, shape is [M, N]
- :math:`out_{i}`: the i-th element of out, shape is [batch_size, size].
- :math:`y^\mathrm{T}`: the transpose of :math:`y_{2}`.
Args:
x (Variable): 2-D input tensor with shape [batch_size, M]
y (Variable): 2-D input tensor with shape [batch_size, N]
size (int): The dimension of this layer.
act (str, default None): Activation to be applied to the output of this layer.
name (str, default None): The name of this layer.
param_attr (ParamAttr, default None): The parameter attribute for the learnable w.
parameters/weights of this layer.
bias_attr (ParamAttr, default None): The parameter attribute for the bias
of this layer. If it is set to False, no bias will be added to the output units.
If it is set to None, the bias is initialized zero. Default: None.
Returns:
Variable: A 2-D Tensor of shape [batch_size, size].
Examples:
.. code-block:: python
tensor = bilinear_tensor_product(x=layer1, y=layer2, size=1000)
"""
def __init__(self,
name_scope,
size,
name=None,
act=None,
param_attr=None,
bias_attr=None):
super(BilinearTensorProduct, self).__init__(name_scope)
self._param_attr = param_attr
self._bias_attr = bias_attr
self._act = act
self._size = size
self._name = name
self._inputs = dict()
def _build_once(self, x, y):
self._dtype = self._helper.input_dtype(x)
param_shape = [self._size, x.shape[1], y.shape[1]]
self._w = self.create_parameter(
attr=self._param_attr,
shape=param_shape,
dtype=self._dtype,
is_bias=False)
if self._bias_attr:
bias_size = [1, self._size]
bias = self.create_parameter(
attr=self._bias_attr,
shape=bias_size,
dtype=self._dtype,
is_bias=True)
self._inputs["Bias"] = bias
def forward(self, x, y):
self._inputs = {"X": x, "Y": y, "Weight": self._w}
if self._name is not None:
out = self._helper.create_variable(
name=".".join([self.full_name(), self._name]),
dtype=self._dtype,
persistable=False)
else:
out = self._helper.create_variable(
dtype=self._dtype, persistable=False)
self._helper.append_op(
type="bilinear_tensor_product",
inputs=self._inputs,
outputs={"Out": out})
# add activation
return self._helper.append_activation(out)
class Conv2DTranspose(layers.Layer):
"""
**Convlution2D transpose layer**
The convolution2D transpose layer calculates the output based on the input,
filter, and dilations, strides, paddings. Input(Input) and output(Output)
are in NCHW format. Where N is batch size, C is the number of channels,
H is the height of the feature, and W is the width of the feature.
Parameters(dilations, strides, paddings) are two elements. These two elements
represent height and width, respectively. The details of convolution transpose
layer, please refer to the following explanation and references
`therein <http://www.matthewzeiler.com/wp-content/uploads/2017/07/cvpr2010.pdf>`_.
If bias attribution and activation type are provided, bias is added to
the output of the convolution, and the corresponding activation function
is applied to the final result.
For each input :math:`X`, the equation is:
.. math::
Out = \sigma (W \\ast X + b)
Where:
* :math:`X`: Input value, a tensor with NCHW format.
* :math:`W`: Filter value, a tensor with MCHW format.
* :math:`\\ast`: Convolution operation.
* :math:`b`: Bias value, a 2-D tensor with shape [M, 1].
* :math:`\\sigma`: Activation function.
* :math:`Out`: Output value, the shape of :math:`Out` and :math:`X` may be different.
Example:
- Input:
Input shape: :math:`(N, C_{in}, H_{in}, W_{in})`
Filter shape: :math:`(C_{in}, C_{out}, H_f, W_f)`
- Output:
Output shape: :math:`(N, C_{out}, H_{out}, W_{out})`
Where
.. math::
H^\prime_{out} &= (H_{in} - 1) * strides[0] - 2 * paddings[0] + dilations[0] * (H_f - 1) + 1 \\\\
W^\prime_{out} &= (W_{in} - 1) * strides[1] - 2 * paddings[1] + dilations[1] * (W_f - 1) + 1 \\\\
H_{out} &\in [ H^\prime_{out}, H^\prime_{out} + strides[0] ) \\\\
W_{out} &\in [ W^\prime_{out}, W^\prime_{out} + strides[1] )
Args:
input(Variable): The input image with [N, C, H, W] format.
num_filters(int): The number of the filter. It is as same as the output
image channel.
output_size(int|tuple|None): The output image size. If output size is a
tuple, it must contain two integers, (image_H, image_W). None if use
filter_size, padding, and stride to calculate output_size.
if output_size and filter_size are specified at the same time, They
should follow the formula above.
filter_size(int|tuple|None): The filter size. If filter_size is a tuple,
it must contain two integers, (filter_size_H, filter_size_W).
Otherwise, the filter will be a square. None if use output size to
calculate filter_size.
padding(int|tuple): The padding size. If padding is a tuple, it must
contain two integers, (padding_H, padding_W). Otherwise, the
padding_H = padding_W = padding. Default: padding = 0.
stride(int|tuple): The stride size. If stride is a tuple, it must
contain two integers, (stride_H, stride_W). Otherwise, the
stride_H = stride_W = stride. Default: stride = 1.
dilation(int|tuple): The dilation size. If dilation is a tuple, it must
contain two integers, (dilation_H, dilation_W). Otherwise, the
dilation_H = dilation_W = dilation. Default: dilation = 1.
groups(int): The groups number of the Conv2d transpose layer. Inspired by
grouped convolution in Alex Krizhevsky's Deep CNN paper, in which
when group=2, the first half of the filters is only connected to the
first half of the input channels, while the second half of the
filters is only connected to the second half of the input channels.
Default: groups = 1.
param_attr (ParamAttr|None): The parameter attribute for learnable parameters/weights
of conv2d_transpose. If it is set to None or one attribute of ParamAttr, conv2d_transpose
will create ParamAttr as param_attr. If the Initializer of the param_attr
is not set, the parameter is initialized with Xavier. Default: None.
bias_attr (ParamAttr|bool|None): The parameter attribute for the bias of conv2d_transpose.
If it is set to False, no bias will be added to the output units.
If it is set to None or one attribute of ParamAttr, conv2d_transpose
will create ParamAttr as bias_attr. If the Initializer of the bias_attr
is not set, the bias is initialized zero. Default: None.
use_cudnn(bool): Use cudnn kernel or not, it is valid only when the cudnn
library is installed. Default: True.
act (str): Activation type, if it is set to None, activation is not appended.
Default: None.
name(str|None): A name for this layer(optional). If set None, the layer
will be named automatically. Default: True.
Returns:
Variable: The tensor variable storing the convolution transpose result.
Raises:
ValueError: If the shapes of input, filter_size, stride, padding and
groups mismatch.
Examples:
.. code-block:: python
data = fluid.layers.data(name='data', shape=[3, 32, 32], dtype='float32')
conv2d_transpose = fluid.layers.conv2d_transpose(input=data, num_filters=2, filter_size=3)
"""
def __init__(self,
name_scope,
num_filters,
output_size=None,
filter_size=None,
padding=0,
stride=1,
dilation=1,
groups=None,
param_attr=None,
bias_attr=None,
use_cudnn=True,
act=None):
super(Conv2DTranspose, self).__init__(name_scope)
assert param_attr is not False, "param_attr should not be False in conv2d_transpose."
self._param_attr = param_attr
self._bias_attr = bias_attr
self._groups = groups
self._num_filters = num_filters
self._use_cudnn = use_cudnn
self._padding = padding
self._stride = stride
self._dilation = dilation
self._filter_size = filter_size
self._output_size = output_size
self._op_type = 'conv2d_transpose'
def _build_once(self, input):
input_channel = input.shape[1]
if (input_channel == self._groups and
self._num_filters == input_channel and not self._use_cudnn):
self._op_type = 'depthwise_conv2d_transpose'
if not isinstance(input, Variable):
raise TypeError("Input of conv2d_transpose must be Variable")
self._padding = utils.convert_to_list(self._padding, 2, 'padding')
self._stride = utils.convert_to_list(self._stride, 2, 'stride')
self._dilation = utils.convert_to_list(self._dilation, 2, 'dilation')
if not isinstance(self._use_cudnn, bool):
raise ValueError("use_cudnn should be True or False")
if self._filter_size is None:
if self._output_size is None:
raise ValueError(
"output_size must be set when filter_size is None")
if isinstance(self._output_size, int):
self._output_size = [self._output_size, self._output_size]
h_in = input.shape[2]
w_in = input.shape[3]
filter_size_h = (self._output_size[0] -
(h_in - 1) * self._stride[0] + 2 * self._padding[0]
- 1) // self._dilation[0] + 1
filter_size_w = (self._output_size[1] -
(w_in - 1) * self._stride[1] + 2 * self._padding[1]
- 1) // self._dilation[1] + 1
self._filter_size = [filter_size_h, filter_size_w]
else:
self._filter_size = utils.convert_to_list(
self._output_size, 2, 'conv2d_transpose.filter_size')
if self._output_size is None:
self._output_size = []
elif isinstance(self._output_size, list) or isinstance(
self._output_size, int):
self._output_size = utils.convert_to_list(self._output_size, 2,
'output_size')
else:
raise ValueError("output_size should be list or int")
self._padding = utils.convert_to_list(self._padding, 2, 'padding')
self._groups = 1 if self._groups is None else self._groups
filter_shape = [input_channel, self._num_filters // self._groups
] + self._filter_size
self._img_filter = self.create_parameter(
dtype=input.dtype, shape=filter_shape, attr=self._param_attr)
def forward(self, input):
pre_bias = self._helper.create_variable_for_type_inference(
dtype=input.dtype)
self._helper.append_op(
type=self._op_type,
inputs={'Input': [input],
'Filter': [self._img_filter]},
outputs={'Output': pre_bias},
attrs={
'output_size': self._output_size,
'strides': self._stride,
'paddings': self._padding,
'dilations': self._dilation,
'groups': self._groups,
'use_cudnn': self._use_cudnn
})
pre_act = self._helper.append_bias_op(pre_bias, dim_start=1, dim_end=2)
out = self._helper.append_activation(pre_act)
return out
class SequenceConv(layers.Layer):
"""
This function creates the op for sequence_conv, using the inputs and
other convolutional configurations for the filters and stride as given
in the input parameters to the function.
Args:
input (Variable): ${x_comment}
num_filters (int): number of filters.
filter_size (int): the filter size (H and W).
filter_stride (int): stride of the filter.
padding (bool): if True, add paddings.
bias_attr (ParamAttr|bool|None): The parameter attribute for the bias of sequence_conv.
If it is set to False, no bias will be added to the output units.
If it is set to None or one attribute of ParamAttr, sequence_conv
will create ParamAttr as bias_attr. If the Initializer of the bias_attr
is not set, the bias is initialized zero. Default: None.
param_attr (ParamAttr|None): The parameter attribute for learnable parameters/weights
of sequence_conv. If it is set to None or one attribute of ParamAttr, sequence_conv
will create ParamAttr as param_attr. If the Initializer of the param_attr
is not set, the parameter is initialized with Xavier. Default: None.
act (str): Activation type, if it is set to None, activation is not appended.
Default: None.
name (str|None): A name for this layer(optional). If set None, the layer
will be named automatically. Default: None.
Returns:
Variable: output of sequence_conv
"""
def __init__(self,
name_scope,
num_filters,
filter_size=3,
filter_stride=1,
padding=None,
bias_attr=None,
param_attr=None,
act=None):
super(SequenceConv, self).__init__(name_scope)
self._num_filters = num_filters
self._filter_size = filter_size
self._filter_stride = filter_stride
self._padding = padding
self._bias_attr = bias_attr
self._param_attr = param_attr
def _build_once(self, input):
self._dtype = self._helper.input_dtype(input)
print(self._filter_size)
filter_shape = [self._filter_size * input.shape[1], self._num_filters]
self._filter_param = self.create_parameter(
attr=self.param_attr, shape=filter_shape, dtype=self._dtype)
def forward(self, input):
pre_bias = self._helper.create_variable_for_type_inference(self._dtype)
self._helper.append_op(
type='sequence_conv',
inputs={
'X': [input],
'Filter': [self._filter_param],
},
outputs={"Out": pre_bias},
attrs={
'contextStride': self._filter_stride,
'contextStart': -int(self._filter_size // 2),
'contextLength': self._filter_size
})
pre_act = self._helper.append_bias_op(pre_bias)
return self._helper.append_activation(pre_act)
...@@ -42,10 +42,14 @@ class LayerTest(unittest.TestCase): ...@@ -42,10 +42,14 @@ class LayerTest(unittest.TestCase):
def tearDownClass(cls): def tearDownClass(cls):
pass pass
def _get_place(self): def _get_place(self, force_to_use_cpu=False):
if core.is_compiled_with_cuda(): # this option for ops that only have cpu kernel
return core.CUDAPlace(0) if force_to_use_cpu:
return core.CPUPlace() return core.CPUPlace()
else:
if core.is_compiled_with_cuda():
return core.CUDAPlace(0)
return core.CPUPlace()
@contextlib.contextmanager @contextlib.contextmanager
def static_graph(self): def static_graph(self):
...@@ -54,16 +58,18 @@ class LayerTest(unittest.TestCase): ...@@ -54,16 +58,18 @@ class LayerTest(unittest.TestCase):
fluid.default_main_program().random_seed = self.seed fluid.default_main_program().random_seed = self.seed
yield yield
def get_static_graph_result(self, feed, fetch_list): def get_static_graph_result(self, feed, fetch_list, with_lod=False):
exe = fluid.Executor(self._get_place()) exe = fluid.Executor(self._get_place())
exe.run(fluid.default_startup_program()) exe.run(fluid.default_startup_program())
return exe.run(fluid.default_main_program(), return exe.run(fluid.default_main_program(),
feed=feed, feed=feed,
fetch_list=fetch_list) fetch_list=fetch_list,
return_numpy=(not with_lod))
@contextlib.contextmanager @contextlib.contextmanager
def dynamic_graph(self): def dynamic_graph(self, force_to_use_cpu=False):
with fluid.imperative.guard(self._get_place()): with fluid.imperative.guard(
self._get_place(force_to_use_cpu=force_to_use_cpu)):
fluid.default_startup_program().random_seed = self.seed fluid.default_startup_program().random_seed = self.seed
fluid.default_main_program().random_seed = self.seed fluid.default_main_program().random_seed = self.seed
yield yield
...@@ -256,6 +262,304 @@ class TestLayer(LayerTest): ...@@ -256,6 +262,304 @@ class TestLayer(LayerTest):
self.assertTrue(np.allclose(n, min_ret._numpy())) self.assertTrue(np.allclose(n, min_ret._numpy()))
self.assertTrue(np.allclose(n2, max_ret._numpy())) self.assertTrue(np.allclose(n2, max_ret._numpy()))
def test_sequence_conv(self):
inp_np = np.arange(12).reshape([3, 4]).astype('float32')
if core.is_compiled_with_cuda():
place = core.CUDAPlace(0)
else:
place = core.CPUPlace()
with self.static_graph():
seq = layers.data(
name='seq_in',
shape=[3, 4],
dtype='float32',
lod_level=1,
append_batch_size=False)
out = layers.sequence_conv(seq, 2)
static_rlt = self.get_static_graph_result(
feed={
"seq_in": fluid.create_lod_tensor(
data=inp_np,
recursive_seq_lens=[[1, 1, 1]],
place=place)
},
fetch_list=[out],
with_lod=True)[0]
with self.static_graph():
seq = layers.data(
name='seq_in',
shape=[3, 4],
dtype='float32',
lod_level=1,
append_batch_size=False)
seq_conv = nn.SequenceConv('seq_conv', num_filters=2)
out = seq_conv(seq)
static_rlt2 = self.get_static_graph_result(
feed={
"seq_in": fluid.create_lod_tensor(
data=inp_np,
recursive_seq_lens=[[1, 1, 1]],
place=place)
},
fetch_list=[out],
with_lod=True)[0]
self.assertTrue(
np.allclose(np.array(static_rlt), np.array(static_rlt2)))
def test_conv2d_transpose(self):
inp_np = np.arange(0, 24).reshape([2, 3, 2, 2]).astype('float32')
with self.static_graph():
img = layers.data(name='pixel', shape=[3, 2, 2], dtype='float32')
out = layers.conv2d_transpose(
input=img, num_filters=10, output_size=28)
static_rlt = self.get_static_graph_result(
feed={'pixel': inp_np}, fetch_list=[out])[0]
with self.static_graph():
img = layers.data(name='pixel', shape=[3, 2, 2], dtype='float32')
conv2d_transpose = nn.Conv2DTranspose(
'conv2d_transpose', num_filters=10, output_size=28)
out = conv2d_transpose(img)
static_rlt2 = self.get_static_graph_result(
feed={'pixel': inp_np}, fetch_list=[out])[0]
with self.dynamic_graph():
conv2d_transpose = nn.Conv2DTranspose(
'conv2d_transpose', num_filters=10, output_size=28)
dy_rlt = conv2d_transpose(base.to_variable(inp_np))
self.assertTrue(np.allclose(static_rlt2, static_rlt))
self.assertTrue(np.allclose(dy_rlt._numpy(), static_rlt))
def test_bilinear_tensor_product(self):
inp_np_x = np.array([[1, 2, 3]]).astype('float32')
inp_np_y = np.array([[4, 5, 6]]).astype('float32')
with self.static_graph():
data_x = layers.data(
name='x',
shape=[1, 3],
dtype="float32",
append_batch_size=False)
data_y = layers.data(
name='y',
shape=[1, 3],
dtype="float32",
append_batch_size=False)
out = layers.bilinear_tensor_product(data_x, data_y, 6)
static_rlt = self.get_static_graph_result(
feed={'x': inp_np_x,
'y': inp_np_y}, fetch_list=[out])[0]
with self.static_graph():
data_x = layers.data(
name='x',
shape=[1, 3],
dtype="float32",
append_batch_size=False)
data_y = layers.data(
name='y',
shape=[1, 3],
dtype="float32",
append_batch_size=False)
btp = nn.BilinearTensorProduct('btp', 6)
out = btp(data_x, data_y)
static_rlt2 = self.get_static_graph_result(
feed={'x': inp_np_x,
'y': inp_np_y}, fetch_list=[out])[0]
with self.dynamic_graph():
btp = nn.BilinearTensorProduct('btp', 6)
dy_rlt = btp(base.to_variable(inp_np_x), base.to_variable(inp_np_y))
self.assertTrue(np.allclose(static_rlt2, static_rlt))
self.assertTrue(np.allclose(dy_rlt._numpy(), static_rlt))
def test_prelu(self):
inp_np = np.ones([5, 200, 100, 100]).astype('float32')
with self.static_graph():
data_t = layers.data(
name="input",
shape=[5, 200, 100, 100],
dtype="float32",
append_batch_size=False)
mode = 'channel'
out = layers.prelu(
data_t, mode, param_attr=ParamAttr(initializer=Constant(1.0)))
static_rlt = self.get_static_graph_result(
feed={"input": inp_np}, fetch_list=[out])[0]
with self.static_graph():
data_t = layers.data(
name="input",
shape=[5, 200, 100, 100],
dtype="float32",
append_batch_size=False)
mode = 'channel'
prelu = nn.PRelu(
'prelu',
mode=mode,
param_attr=ParamAttr(initializer=Constant(1.0)))
out = prelu(data_t)
static_rlt2 = self.get_static_graph_result(
feed={"input": inp_np}, fetch_list=[out])[0]
with self.dynamic_graph():
mode = 'channel'
prelu = nn.PRelu(
'prelu',
mode=mode,
param_attr=ParamAttr(initializer=Constant(1.0)))
dy_rlt = prelu(base.to_variable(inp_np))
self.assertTrue(np.allclose(static_rlt2, static_rlt))
self.assertTrue(np.allclose(dy_rlt._numpy(), static_rlt))
def test_embeding(self):
inp_word = np.array([[[1]]]).astype('int64')
dict_size = 20
with self.static_graph():
data_t = layers.data(name='word', shape=[1], dtype='int64')
emb = layers.embedding(
input=data_t,
size=[dict_size, 32],
param_attr='emb.w',
is_sparse=False)
static_rlt = self.get_static_graph_result(
feed={'word': inp_word}, fetch_list=[emb])[0]
with self.static_graph():
data_t = layers.data(name='word', shape=[1], dtype='int64')
emb2 = nn.Embedding(
name_scope='embedding',
size=[dict_size, 32],
param_attr='emb.w',
is_sparse=False)
emb_rlt = emb2(data_t)
static_rlt2 = self.get_static_graph_result(
feed={'word': inp_word}, fetch_list=[emb_rlt])[0]
with self.dynamic_graph():
emb2 = nn.Embedding(
name_scope='embedding',
size=[dict_size, 32],
param_attr='emb.w',
is_sparse=False)
static_rlt3 = emb2(base.to_variable(inp_word))
self.assertTrue(np.allclose(static_rlt2, static_rlt))
self.assertTrue(np.allclose(static_rlt3._numpy(), static_rlt))
def test_nce(self):
window_size = 5
dict_size = 20
label_word = int(window_size // 2) + 1
inp_word = np.array([[[1]], [[2]], [[3]], [[4]], [[5]]]).astype('int64')
nid_freq_arr = np.random.dirichlet(np.ones(20) * 1000).astype('float32')
seed = 1
with self.static_graph():
words = []
for i in range(window_size):
words.append(
layers.data(
name='word_{0}'.format(i), shape=[1], dtype='int64'))
embs = []
for i in range(window_size):
if i == label_word:
continue
emb = layers.embedding(
input=words[i],
size=[dict_size, 32],
param_attr='emb.w',
is_sparse=False)
embs.append(emb)
embs = layers.concat(input=embs, axis=1)
nce_loss = layers.nce(input=embs,
label=words[label_word],
num_total_classes=dict_size,
num_neg_samples=2,
sampler="custom_dist",
custom_dist=nid_freq_arr.tolist(),
seed=seed,
param_attr='nce.w',
bias_attr='nce.b')
feed_dict = dict()
for i in range(window_size):
feed_dict['word_{0}'.format(i)] = inp_word[i]
static_rlt = self.get_static_graph_result(
feed=feed_dict, fetch_list=[nce_loss])[0]
with self.static_graph():
words = []
for i in range(window_size):
words.append(
layers.data(
name='word_{0}'.format(i), shape=[1], dtype='int64'))
emb = nn.Embedding(
'embedding',
size=[dict_size, 32],
param_attr='emb.w',
is_sparse=False)
embs2 = []
for i in range(window_size):
if i == label_word:
continue
emb_rlt = emb(words[i])
embs2.append(emb_rlt)
embs2 = layers.concat(input=embs2, axis=1)
nce = nn.NCE('nce',
num_total_classes=dict_size,
num_neg_samples=2,
sampler="custom_dist",
custom_dist=nid_freq_arr.tolist(),
seed=seed,
param_attr='nce.w',
bias_attr='nce.b')
nce_loss2 = nce(embs2, words[label_word])
feed_dict = dict()
for i in range(len(words)):
feed_dict['word_{0}'.format(i)] = inp_word[i]
static_rlt2 = self.get_static_graph_result(
feed=feed_dict, fetch_list=[nce_loss2])[0]
with self.dynamic_graph(force_to_use_cpu=True):
words = []
for i in range(window_size):
words.append(base.to_variable(inp_word[i]))
emb = nn.Embedding(
'embedding',
size=[dict_size, 32],
param_attr='emb.w',
is_sparse=False)
embs3 = []
for i in range(window_size):
if i == label_word:
continue
emb_rlt = emb(words[i])
embs3.append(emb_rlt)
embs3 = layers.concat(input=embs3, axis=1)
nce = nn.NCE('nce',
num_total_classes=dict_size,
num_neg_samples=2,
sampler="custom_dist",
custom_dist=nid_freq_arr.tolist(),
seed=seed,
param_attr='nce.w',
bias_attr='nce.b')
nce_loss3 = nce(embs3, words[label_word])
self.assertTrue(np.allclose(static_rlt2, static_rlt))
self.assertTrue(np.allclose(nce_loss3._numpy(), static_rlt))
class TestBook(unittest.TestCase): class TestBook(unittest.TestCase):
def test_fit_a_line(self): def test_fit_a_line(self):
......
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册