“5ec72b143a0dc597ed9a19f6b3a134b9a70250db”上不存在“doc/mobile/cross_compiling_for_android_en.html”
提交 f6144d84 编写于 作者: Y Youwei Song 提交者: hong

remove build_once & name_scope (#21131)

* remove build_once & name_scope (Conv2D)
test=develop

* fix unittest
test=develop

* Conv2DTranspose

* Conv3D & Conv3DTranspose
test=develop

* Pool2D & BatchNorm

* Embedding

* LayerNorm

* GRUUnit & NCE

* PRelu

* BilinearTensorProduct

* GroupNorm & SpectralNorm

* TreeConv
test=develop

* fix LayerNorm in transformer unnittest
test=develop

* disable LayerNorm or BatchNorm in multicard
test=develop

* refine Layer.create_parameter api
test=develop

* refine LayerNorm, remove begin_norm_axis param, add normed shape check
test=develop

* LayerNorm bug fix
test=develop
上级 0fe16539
...@@ -104,12 +104,12 @@ class BasicGRUUnit(Layer): ...@@ -104,12 +104,12 @@ class BasicGRUUnit(Layer):
dtype=self._dtype) dtype=self._dtype)
self._gate_bias = self.create_parameter( self._gate_bias = self.create_parameter(
self._bias_attr, attr=self._bias_attr,
shape=[2 * self._hiden_size], shape=[2 * self._hiden_size],
dtype=self._dtype, dtype=self._dtype,
is_bias=True) is_bias=True)
self._candidate_bias = self.create_parameter( self._candidate_bias = self.create_parameter(
self._bias_attr, attr=self._bias_attr,
shape=[self._hiden_size], shape=[self._hiden_size],
dtype=self._dtype, dtype=self._dtype,
is_bias=True) is_bias=True)
......
...@@ -48,7 +48,7 @@ def save_dygraph(state_dict, model_path): ...@@ -48,7 +48,7 @@ def save_dygraph(state_dict, model_path):
import paddle.fluid as fluid import paddle.fluid as fluid
with fluid.dygraph.guard(): with fluid.dygraph.guard():
emb = fluid.dygraph.Embedding( "emb", [10, 10]) emb = fluid.dygraph.Embedding([10, 10])
state_dict = emb.state_dict() state_dict = emb.state_dict()
fluid.save_dygraph( state_dict, "paddle_dy") fluid.save_dygraph( state_dict, "paddle_dy")
...@@ -91,7 +91,7 @@ def load_dygraph(model_path): ...@@ -91,7 +91,7 @@ def load_dygraph(model_path):
import paddle.fluid as fluid import paddle.fluid as fluid
with fluid.dygraph.guard(): with fluid.dygraph.guard():
emb = fluid.dygraph.Embedding( "emb", [10, 10]) emb = fluid.dygraph.Embedding([10, 10])
state_dict = emb.state_dict() state_dict = emb.state_dict()
fluid.save_dygraph( state_dict, "paddle_dy") fluid.save_dygraph( state_dict, "paddle_dy")
......
...@@ -33,10 +33,11 @@ class Layer(core.Layer): ...@@ -33,10 +33,11 @@ class Layer(core.Layer):
"""Dynamic graph Layer based on OOD, includes the parameters of the layer, the structure of the forward graph and so on. """Dynamic graph Layer based on OOD, includes the parameters of the layer, the structure of the forward graph and so on.
Parameters: Parameters:
name_scope (str): prefix name used by the layer to name parameters. name_scope (str, optional): prefix name used by the layer to name parameters.
If prefix is "my_model/layer_1", parameter name in MyLayer If prefix is "my_layer", parameter name in MyLayer
can be "my_model/layer_1/MyLayer/w_n", where w is the parameter can be "mylayer_0.w_n", where w is the parameter
base name and n is an unique suffix auto-generated. base name and n is an unique suffix auto-generated.
If None, prefix name will be lower cased class name. Default: None.
dtype(str or core.VarDesc.VarType, optional): data type of this parameter. dtype(str or core.VarDesc.VarType, optional): data type of this parameter.
If set str, it can be "bool", "float16", "float32", "float64", If set str, it can be "bool", "float16", "float32", "float64",
"int8", "int16", "int32", "int64", "uint8" or "uint16". "int8", "int16", "int32", "int64", "uint8" or "uint16".
...@@ -46,17 +47,22 @@ class Layer(core.Layer): ...@@ -46,17 +47,22 @@ class Layer(core.Layer):
None None
""" """
def __init__(self, name_scope, dtype=core.VarDesc.VarType.FP32): def __init__(self, name_scope=None, dtype=core.VarDesc.VarType.FP32):
self._full_name = unique_name.generate(name_scope + "/" + if name_scope is None:
self.__class__.__name__) name_scope = self.__class__.__name__.lower()
self._full_name = unique_name.generate(name_scope)
else:
# TODO: remove name_scope parameter and all hard-coded usages
self._full_name = unique_name.generate(name_scope + "/" +
self.__class__.__name__)
self._helper = LayerObjectHelper(self._full_name)
self._built = False self._built = False
self._dtype = dtype self._dtype = dtype
self._parameters = collections.OrderedDict() self._parameters = collections.OrderedDict()
self._sub_layers = collections.OrderedDict() self._sub_layers = collections.OrderedDict()
self._loaddict_holder = collections.OrderedDict() self._loaddict_holder = collections.OrderedDict()
self._helper = LayerObjectHelper(self._full_name)
def train(self): def train(self):
framework._dygraph_tracer().train_mode() framework._dygraph_tracer().train_mode()
...@@ -72,23 +78,23 @@ class Layer(core.Layer): ...@@ -72,23 +78,23 @@ class Layer(core.Layer):
return self._full_name return self._full_name
def create_parameter(self, def create_parameter(self,
attr,
shape, shape,
dtype, attr=None,
dtype='float32',
is_bias=False, is_bias=False,
default_initializer=None): default_initializer=None):
"""Create parameters for this layer. """Create parameters for this layer.
Parameters: Parameters:
attr(ParamAttr): Parameter attribute of weight. Please refer to :ref:`api_fluid_ParamAttr` shape(list): Shape of the parameter.
shape(list): shape of the parameter attr(ParamAttr, optional): Parameter attribute of weight. Please refer to :ref:`api_fluid_ParamAttr`. Default: None.
dtype(str or core.VarDesc.VarType): data type of this parameter. dtype(str or core.VarDesc.VarType or str, optional): Data type of this parameter.
If set str, it can be "bool", "float16", "float32", "float64", If set str, it can be "bool", "float16", "float32", "float64",
"int8", "int16", "int32", "int64", "uint8" or "uint16". "int8", "int16", "int32", "int64", "uint8" or "uint16". Default: "float32".
is_bias(bool, optional): if this is a bias parameter. Default: False is_bias(bool, optional): if this is a bias parameter. Default: False.
default_initializer(Initializer, optional): the default initializer for this parameter. default_initializer(Initializer, optional): the default initializer for this parameter.
If set None, default initializer will be set to :ref:`api_fluid_initializer_XavierInitializer` and :ref:`api_fluid_initializer_ConstantInitializer` If set None, default initializer will be set to :ref:`api_fluid_initializer_XavierInitializer` and :ref:`api_fluid_initializer_ConstantInitializer`
for non-bias and bias parameter, respectively. Default: None for non-bias and bias parameter, respectively. Default: None.
Returns: Returns:
:ref:`api_guide_Variable_en` : created parameter. :ref:`api_guide_Variable_en` : created parameter.
...@@ -294,7 +300,7 @@ class Layer(core.Layer): ...@@ -294,7 +300,7 @@ class Layer(core.Layer):
import paddle.fluid as fluid import paddle.fluid as fluid
with fluid.dygraph.guard(): with fluid.dygraph.guard():
emb = fluid.dygraph.Embedding( "emb", [10, 10]) emb = fluid.dygraph.Embedding([10, 10])
state_dict = emb.state_dict() state_dict = emb.state_dict()
fluid.save_dygraph( state_dict, "paddle_dy") fluid.save_dygraph( state_dict, "paddle_dy")
...@@ -332,7 +338,7 @@ class Layer(core.Layer): ...@@ -332,7 +338,7 @@ class Layer(core.Layer):
import paddle.fluid as fluid import paddle.fluid as fluid
with fluid.dygraph.guard(): with fluid.dygraph.guard():
emb = fluid.dygraph.Embedding( "emb", [10, 10]) emb = fluid.dygraph.Embedding([10, 10])
state_dict = emb.state_dict() state_dict = emb.state_dict()
fluid.save_dygraph( state_dict, "paddle_dy") fluid.save_dygraph( state_dict, "paddle_dy")
...@@ -361,7 +367,7 @@ class Layer(core.Layer): ...@@ -361,7 +367,7 @@ class Layer(core.Layer):
import paddle.fluid as fluid import paddle.fluid as fluid
with fluid.dygraph.guard(): with fluid.dygraph.guard():
emb = fluid.dygraph.Embedding( "emb", [10, 10]) emb = fluid.dygraph.Embedding([10, 10])
state_dict = emb.state_dict() state_dict = emb.state_dict()
fluid.save_dygraph( state_dict, "paddle_dy") fluid.save_dygraph( state_dict, "paddle_dy")
......
...@@ -23,6 +23,7 @@ from ..framework import Variable, in_dygraph_mode, OpProtoHolder, Parameter ...@@ -23,6 +23,7 @@ from ..framework import Variable, in_dygraph_mode, OpProtoHolder, Parameter
from ..param_attr import ParamAttr from ..param_attr import ParamAttr
from ..initializer import Normal, Constant, NumpyArrayInitializer from ..initializer import Normal, Constant, NumpyArrayInitializer
import numpy as np import numpy as np
import numbers
import logging import logging
__all__ = [ __all__ = [
...@@ -86,7 +87,7 @@ class Conv2D(layers.Layer): ...@@ -86,7 +87,7 @@ class Conv2D(layers.Layer):
W_{out}&= \\frac{(W_{in} + 2 * paddings[1] - (dilations[1] * (W_f - 1) + 1))}{strides[1]} + 1 W_{out}&= \\frac{(W_{in} + 2 * paddings[1] - (dilations[1] * (W_f - 1) + 1))}{strides[1]} + 1
Parameters: Parameters:
name_scope(str): The name for this class. num_channels(int): The number of channels in the input image.
num_filters(int): The number of filter. It is as same as the output num_filters(int): The number of filter. It is as same as the output
feature map. feature map.
filter_size (int or tuple): The filter size. If filter_size is a tuple, filter_size (int or tuple): The filter size. If filter_size is a tuple,
...@@ -143,14 +144,14 @@ class Conv2D(layers.Layer): ...@@ -143,14 +144,14 @@ class Conv2D(layers.Layer):
data = np.random.uniform(-1, 1, [10, 3, 32, 32]).astype('float32') data = np.random.uniform(-1, 1, [10, 3, 32, 32]).astype('float32')
with fluid.dygraph.guard(): with fluid.dygraph.guard():
conv2d = Conv2D("conv2d", 2, 3) conv2d = Conv2D(3, 2, 3)
data = to_variable(data) data = to_variable(data)
conv = conv2d(data) conv = conv2d(data)
""" """
def __init__(self, def __init__(self,
name_scope, num_channels,
num_filters, num_filters,
filter_size, filter_size,
stride=1, stride=1,
...@@ -163,7 +164,8 @@ class Conv2D(layers.Layer): ...@@ -163,7 +164,8 @@ class Conv2D(layers.Layer):
act=None, act=None,
dtype='float32'): dtype='float32'):
assert param_attr is not False, "param_attr should not be False here." assert param_attr is not False, "param_attr should not be False here."
super(Conv2D, self).__init__(name_scope, dtype) super(Conv2D, self).__init__()
self._num_channels = num_channels
self._groups = groups self._groups = groups
self._stride = utils.convert_to_list(stride, 2, 'stride') self._stride = utils.convert_to_list(stride, 2, 'stride')
self._padding = utils.convert_to_list(padding, 2, 'padding') self._padding = utils.convert_to_list(padding, 2, 'padding')
...@@ -177,16 +179,13 @@ class Conv2D(layers.Layer): ...@@ -177,16 +179,13 @@ class Conv2D(layers.Layer):
self._param_attr = param_attr self._param_attr = param_attr
self._bias_attr = bias_attr self._bias_attr = bias_attr
self._dtype = dtype self._dtype = dtype
# if (self._num_channels == self._groups and if (self._num_channels == self._groups and
# num_filters % self._num_channels == 0 and not self._use_cudnn): num_filters % self._num_channels == 0 and not self._use_cudnn):
# self._l_type = 'depthwise_conv2d' self._l_type = 'depthwise_conv2d'
# else: else:
# TODO(jiabin): recover the usage of depthwise_conv2d when it's self._l_type = 'conv2d'
# kernel fixed https://github.com/PaddlePaddle/Paddle/issues/17275
self._l_type = 'conv2d'
def _build_once(self, input): self._num_channels = num_channels
self._num_channels = input.shape[1]
if self._groups is None: if self._groups is None:
num_filter_channels = self._num_channels num_filter_channels = self._num_channels
else: else:
...@@ -194,8 +193,7 @@ class Conv2D(layers.Layer): ...@@ -194,8 +193,7 @@ class Conv2D(layers.Layer):
raise ValueError("num_channels must be divisible by groups.") raise ValueError("num_channels must be divisible by groups.")
num_filter_channels = self._num_channels // self._groups num_filter_channels = self._num_channels // self._groups
filter_size = utils.convert_to_list(self._filter_size, 2, 'filter_size') filter_size = utils.convert_to_list(self._filter_size, 2, 'filter_size')
filter_shape = [self._num_filters, int(num_filter_channels) filter_shape = [self._num_filters, num_filter_channels] + filter_size
] + filter_size
def _get_default_param_initializer(): def _get_default_param_initializer():
filter_elem_num = filter_size[0] * filter_size[ filter_elem_num = filter_size[0] * filter_size[
...@@ -316,7 +314,7 @@ class Conv3D(layers.Layer): ...@@ -316,7 +314,7 @@ class Conv3D(layers.Layer):
W_{out}&= \\frac{(W_{in} + 2 * paddings[2] - (dilations[2] * (W_f - 1) + 1))}{strides[2]} + 1 W_{out}&= \\frac{(W_{in} + 2 * paddings[2] - (dilations[2] * (W_f - 1) + 1))}{strides[2]} + 1
Parameters: Parameters:
name_scope(str) : The name for this class. num_channels(int): The number of channels in the input image.
num_filters(int): The number of filter. It is as same as the output image channel. num_filters(int): The number of filter. It is as same as the output image channel.
filter_size (int|tuple, optional): The filter size. If filter_size is a tuple, filter_size (int|tuple, optional): The filter size. If filter_size is a tuple,
it must contain three integers, (filter_size_D, filter_size_H, filter_size_W). it must contain three integers, (filter_size_D, filter_size_H, filter_size_W).
...@@ -350,6 +348,7 @@ class Conv3D(layers.Layer): ...@@ -350,6 +348,7 @@ class Conv3D(layers.Layer):
library is installed. The default value is True. library is installed. The default value is True.
act (str, optional): Activation type, if it is set to None, activation is not appended. act (str, optional): Activation type, if it is set to None, activation is not appended.
The default value is None. The default value is None.
dtype (str, optional): Data type, it can be "float32" or "float64". Default: "float32".
Attribute: Attribute:
**weight** (Parameter): the learnable weights of filters of this layer. **weight** (Parameter): the learnable weights of filters of this layer.
...@@ -372,13 +371,13 @@ class Conv3D(layers.Layer): ...@@ -372,13 +371,13 @@ class Conv3D(layers.Layer):
with fluid.dygraph.guard(): with fluid.dygraph.guard():
data = numpy.random.random((5, 3, 12, 32, 32)).astype('float32') data = numpy.random.random((5, 3, 12, 32, 32)).astype('float32')
conv3d = fluid.dygraph.nn.Conv3D( conv3d = fluid.dygraph.nn.Conv3D(
'Conv3D', num_filters=2, filter_size=3, act="relu") num_channels=3, num_filters=2, filter_size=3, act="relu")
ret = conv3d(fluid.dygraph.base.to_variable(data)) ret = conv3d(fluid.dygraph.base.to_variable(data))
""" """
def __init__(self, def __init__(self,
name_scope, num_channels,
num_filters, num_filters,
filter_size, filter_size,
stride=1, stride=1,
...@@ -388,40 +387,36 @@ class Conv3D(layers.Layer): ...@@ -388,40 +387,36 @@ class Conv3D(layers.Layer):
param_attr=None, param_attr=None,
bias_attr=None, bias_attr=None,
use_cudnn=True, use_cudnn=True,
act=None): act=None,
dtype='float32'):
assert param_attr is not False, "param_attr should not be False here." assert param_attr is not False, "param_attr should not be False here."
super(Conv3D, self).__init__(name_scope) super(Conv3D, self).__init__()
self._num_channels = num_channels
self._groups = groups self._groups = groups
self._stride = utils.convert_to_list(stride, 3, 'stride') self._stride = utils.convert_to_list(stride, 3, 'stride')
self._padding = utils.convert_to_list(padding, 3, 'padding') self._padding = utils.convert_to_list(padding, 3, 'padding')
self._dilation = utils.convert_to_list(dilation, 3, 'dilation') self._dilation = utils.convert_to_list(dilation, 3, 'dilation')
self._act = act self._act = act
if not isinstance(use_cudnn, bool):
raise ValueError("use_cudnn should be True or False")
self._use_cudnn = use_cudnn self._use_cudnn = use_cudnn
self._filter_size = filter_size self._filter_size = filter_size
self._num_filters = num_filters self._num_filters = num_filters
self._param_attr = param_attr self._param_attr = param_attr
self._bias_attr = bias_attr self._bias_attr = bias_attr
self._dtype = dtype
def _build_once(self, input):
num_channels = input.shape[1]
self._dtype = self._helper.input_dtype(input)
if self._groups is None: if self._groups is None:
num_filter_channels = num_channels num_filter_channels = self._num_channels
else: else:
if num_channels % self._groups != 0: if self._num_channels % self._groups != 0:
raise ValueError("num_channels must be divisible by groups.") raise ValueError("num_channels must be divisible by groups.")
num_filter_channels = num_channels // self._groups num_filter_channels = self._num_channels // self._groups
filter_size = utils.convert_to_list(self._filter_size, 3, 'filter_size') filter_size = utils.convert_to_list(self._filter_size, 3, 'filter_size')
filter_shape = [self._num_filters, num_filter_channels] + filter_size filter_shape = [self._num_filters, num_filter_channels] + filter_size
def _get_default_param_initializer(): def _get_default_param_initializer():
filter_elem_num = filter_size[0] * filter_size[1] * filter_size[ filter_elem_num = filter_size[0] * filter_size[1] * filter_size[
2] * num_channels 2] * self._num_channels
std = (2.0 / filter_elem_num)**0.5 std = (2.0 / filter_elem_num)**0.5
return Normal(0.0, std, 0) return Normal(0.0, std, 0)
...@@ -556,18 +551,12 @@ class Conv3DTranspose(layers.Layer): ...@@ -556,18 +551,12 @@ class Conv3DTranspose(layers.Layer):
Parameters: Parameters:
name_scope(str) : The name for this class. num_channels(int): The number of channels in the input image.
num_filters(int): The number of the filter. It is as same as the output num_filters(int): The number of the filter. It is as same as the output
image channel. image channel.
output_size(int|tuple, optional): The output image size. If output size is a filter_size(int|tuple): The filter size. If filter_size is a tuple,
tuple, it must contain three integers, (image_depth, image_height, image_width). This
parameter only works when filter_size is None. If output_size and filter_size are
specified at the same time, They should follow the formula above. The default value is None.
Output_size and filter_size should not be None at the same time.
filter_size(int|tuple, optional): The filter size. If filter_size is a tuple,
it must contain three integers, (filter_size_D, filter_size_H, filter_size_W). it must contain three integers, (filter_size_D, filter_size_H, filter_size_W).
Otherwise, the filter will be a square. None if use output size to Otherwise, the filter will be a square.
calculate filter_size. The default value is None.
padding(int|tuple, optional): The padding size. The padding argument effectively padding(int|tuple, optional): The padding size. The padding argument effectively
adds `dilation * (kernel - 1)` amount of zero-padding on both sides of input. If `padding` is a string, adds `dilation * (kernel - 1)` amount of zero-padding on both sides of input. If `padding` is a string,
either 'VALID' or 'SAME' supported, which is the padding algorithm. If `padding` either 'VALID' or 'SAME' supported, which is the padding algorithm. If `padding`
...@@ -627,9 +616,8 @@ class Conv3DTranspose(layers.Layer): ...@@ -627,9 +616,8 @@ class Conv3DTranspose(layers.Layer):
with fluid.dygraph.guard(): with fluid.dygraph.guard():
data = numpy.random.random((5, 3, 12, 32, 32)).astype('float32') data = numpy.random.random((5, 3, 12, 32, 32)).astype('float32')
conv3dTranspose = fluid.dygraph.nn.Conv3DTranspose( conv3dTranspose = fluid.dygraph.nn.Conv3DTranspose(
'Conv3DTranspose', num_channels=3,
num_filters=12, num_filters=12,
filter_size=12, filter_size=12,
use_cudnn=False) use_cudnn=False)
...@@ -638,10 +626,9 @@ class Conv3DTranspose(layers.Layer): ...@@ -638,10 +626,9 @@ class Conv3DTranspose(layers.Layer):
""" """
def __init__(self, def __init__(self,
name_scope, num_channels,
num_filters, num_filters,
output_size=None, filter_size,
filter_size=None,
padding=0, padding=0,
stride=1, stride=1,
dilation=1, dilation=1,
...@@ -650,8 +637,8 @@ class Conv3DTranspose(layers.Layer): ...@@ -650,8 +637,8 @@ class Conv3DTranspose(layers.Layer):
bias_attr=None, bias_attr=None,
use_cudnn=True, use_cudnn=True,
act=None, act=None,
name=None): dtype='float32'):
super(Conv3DTranspose, self).__init__(name_scope) super(Conv3DTranspose, self).__init__()
if not isinstance(use_cudnn, bool): if not isinstance(use_cudnn, bool):
raise ValueError("use_cudnn should be True or False") raise ValueError("use_cudnn should be True or False")
assert param_attr is not False, "param_attr should not be False in conv3d_transpose." assert param_attr is not False, "param_attr should not be False in conv3d_transpose."
...@@ -659,46 +646,20 @@ class Conv3DTranspose(layers.Layer): ...@@ -659,46 +646,20 @@ class Conv3DTranspose(layers.Layer):
self._stride = utils.convert_to_list(stride, 3, 'stride') self._stride = utils.convert_to_list(stride, 3, 'stride')
self._dilation = utils.convert_to_list(dilation, 3, 'dilation') self._dilation = utils.convert_to_list(dilation, 3, 'dilation')
self._param_attr = param_attr self._param_attr = param_attr
self._num_channels = num_channels
self._filter_size = filter_size self._filter_size = filter_size
self._output_size = output_size
self._groups = 1 if groups is None else groups self._groups = 1 if groups is None else groups
self._num_filters = num_filters self._num_filters = num_filters
self._use_cudnn = use_cudnn self._use_cudnn = use_cudnn
self._bias_attr = bias_attr self._bias_attr = bias_attr
self._act = act self._act = act
self._dtype = dtype
def _build_once(self, input): self._filter_size = utils.convert_to_list(
self._dtype = self._helper.input_dtype(input) self._filter_size, 3, 'conv3d_transpose.filter_size')
self._input_channel = input.shape[1]
if self._filter_size is None:
if self._output_size is None:
raise ValueError(
"output_size must be set when filter_size is None")
if isinstance(self._output_size, int):
self._output_size = [self._output_size, self._output_size]
d_in = input.shape[2]
h_in = input.shape[3]
w_in = input.shape[4]
filter_size_d = (self._output_size[0] -
(d_in - 1) * self._stride[0] + 2 * self._padding[0]
- 1) // self._dilation[0] + 1
filter_size_h = (self._output_size[1] -
(h_in - 1) * self._stride[1] + 2 * self._padding[1]
- 1) // self._dilation[1] + 1
filter_size_w = (self._output_size[2] -
(w_in - 1) * self._stride[2] + 2 * self._padding[2]
- 1) // self._dilation[2] + 1
self._filter_size = [filter_size_d, filter_size_h, filter_size_w]
else:
self._filter_size = utils.convert_to_list(
self._filter_size, 3, 'conv3d_transpose.filter_size')
filter_shape = [ filter_shape = [self._num_channels, self._num_filters // self._groups
self._input_channel, self._num_filters // self._groups ] + self._filter_size
] + self._filter_size
self._img_filter = self.create_parameter( self._img_filter = self.create_parameter(
dtype=self._dtype, shape=filter_shape, attr=self._param_attr) dtype=self._dtype, shape=filter_shape, attr=self._param_attr)
if self._bias_attr: if self._bias_attr:
...@@ -811,7 +772,6 @@ class Pool2D(layers.Layer): ...@@ -811,7 +772,6 @@ class Pool2D(layers.Layer):
Output(i ,j) & = \\frac{sum(Input[hstart:hend, wstart:wend])}{(hend - hstart) * (wend - wstart)} Output(i ,j) & = \\frac{sum(Input[hstart:hend, wstart:wend])}{(hend - hstart) * (wend - wstart)}
Parameters: Parameters:
name_scope(str) : The name of this class.
pool_size (int or list or tuple, optional): The pool kernel size. If pool kernel size is a tuple or list, pool_size (int or list or tuple, optional): The pool kernel size. If pool kernel size is a tuple or list,
it must contain two integers, (pool_size_Height, pool_size_Width). it must contain two integers, (pool_size_Height, pool_size_Width).
Otherwise, the pool kernel size will be a square of an int. Default: -1. Otherwise, the pool kernel size will be a square of an int. Default: -1.
...@@ -830,7 +790,6 @@ class Pool2D(layers.Layer): ...@@ -830,7 +790,6 @@ class Pool2D(layers.Layer):
ceil_mode (bool, optional): Whether to use the ceil function to calculate output height and width. ceil_mode (bool, optional): Whether to use the ceil function to calculate output height and width.
False is the default. If it is set to False, the floor function will be used. Default: False. False is the default. If it is set to False, the floor function will be used. Default: False.
exclusive (bool, optional): Whether to exclude padding points in average pooling mode. Default: True. exclusive (bool, optional): Whether to exclude padding points in average pooling mode. Default: True.
dtype (str, optional): Data type, it can be "float32" or "float64". Default: "float32".
Returns: Returns:
None None
...@@ -850,7 +809,7 @@ class Pool2D(layers.Layer): ...@@ -850,7 +809,7 @@ class Pool2D(layers.Layer):
with fluid.dygraph.guard(): with fluid.dygraph.guard():
data = numpy.random.random((3, 32, 32, 5)).astype('float32') data = numpy.random.random((3, 32, 32, 5)).astype('float32')
pool2d = fluid.dygraph.Pool2D("pool2d",pool_size=2, pool2d = fluid.dygraph.Pool2D(pool_size=2,
pool_type='max', pool_type='max',
pool_stride=1, pool_stride=1,
global_pooling=False) global_pooling=False)
...@@ -859,7 +818,6 @@ class Pool2D(layers.Layer): ...@@ -859,7 +818,6 @@ class Pool2D(layers.Layer):
""" """
def __init__(self, def __init__(self,
name_scope,
pool_size=-1, pool_size=-1,
pool_type="max", pool_type="max",
pool_stride=1, pool_stride=1,
...@@ -867,8 +825,7 @@ class Pool2D(layers.Layer): ...@@ -867,8 +825,7 @@ class Pool2D(layers.Layer):
global_pooling=False, global_pooling=False,
use_cudnn=True, use_cudnn=True,
ceil_mode=False, ceil_mode=False,
exclusive=True, exclusive=True):
dtype=core.VarDesc.VarType.FP32):
if pool_type not in ["max", "avg"]: if pool_type not in ["max", "avg"]:
raise ValueError( raise ValueError(
"Unknown pool_type: '%s'. It can only be 'max' or 'avg'.", "Unknown pool_type: '%s'. It can only be 'max' or 'avg'.",
...@@ -882,7 +839,7 @@ class Pool2D(layers.Layer): ...@@ -882,7 +839,7 @@ class Pool2D(layers.Layer):
if not isinstance(use_cudnn, bool): if not isinstance(use_cudnn, bool):
raise ValueError("use_cudnn should be True or False") raise ValueError("use_cudnn should be True or False")
super(Pool2D, self).__init__(name_scope, dtype=dtype) super(Pool2D, self).__init__()
self._pool_type = pool_type self._pool_type = pool_type
self._pool_size = utils.convert_to_list(pool_size, 2, 'pool_size') self._pool_size = utils.convert_to_list(pool_size, 2, 'pool_size')
...@@ -1178,7 +1135,6 @@ class BatchNorm(layers.Layer): ...@@ -1178,7 +1135,6 @@ class BatchNorm(layers.Layer):
- :math:`\\beta` : trainable deviation parameter - :math:`\\beta` : trainable deviation parameter
Parameters: Parameters:
name_scope(str): The name of this class.
num_channels(int): Indicate the number of channels of the input ``Tensor``. num_channels(int): Indicate the number of channels of the input ``Tensor``.
act(str, optional): Activation to be applied to the output of batch normalizaiton. Default: None. act(str, optional): Activation to be applied to the output of batch normalizaiton. Default: None.
is_test (bool, optional): A flag indicating whether it is in test phrase or not. Default: False. is_test (bool, optional): A flag indicating whether it is in test phrase or not. Default: False.
...@@ -1222,12 +1178,11 @@ class BatchNorm(layers.Layer): ...@@ -1222,12 +1178,11 @@ class BatchNorm(layers.Layer):
x = np.random.random(size=(3, 10, 3, 7)).astype('float32') x = np.random.random(size=(3, 10, 3, 7)).astype('float32')
with fluid.dygraph.guard(): with fluid.dygraph.guard():
x = to_variable(x) x = to_variable(x)
batch_norm = fluid.BatchNorm("batch_norm", 10) batch_norm = fluid.BatchNorm(10)
hidden1 = batch_norm(x) hidden1 = batch_norm(x)
""" """
def __init__(self, def __init__(self,
name_scope,
num_channels, num_channels,
act=None, act=None,
is_test=False, is_test=False,
...@@ -1243,7 +1198,7 @@ class BatchNorm(layers.Layer): ...@@ -1243,7 +1198,7 @@ class BatchNorm(layers.Layer):
do_model_average_for_mean_and_var=True, do_model_average_for_mean_and_var=True,
use_global_stats=False, use_global_stats=False,
trainable_statistics=False): trainable_statistics=False):
super(BatchNorm, self).__init__(name_scope, dtype) super(BatchNorm, self).__init__()
self._param_attr = param_attr self._param_attr = param_attr
self._bias_attr = bias_attr self._bias_attr = bias_attr
self._act = act self._act = act
...@@ -1303,9 +1258,6 @@ class BatchNorm(layers.Layer): ...@@ -1303,9 +1258,6 @@ class BatchNorm(layers.Layer):
self._use_global_stats = use_global_stats self._use_global_stats = use_global_stats
self._trainable_statistics = trainable_statistics self._trainable_statistics = trainable_statistics
def _build_once(self, input):
pass
def forward(self, input): def forward(self, input):
# create output # create output
# mean and mean_out share the same memory # mean and mean_out share the same memory
...@@ -1389,7 +1341,6 @@ class Embedding(layers.Layer): ...@@ -1389,7 +1341,6 @@ class Embedding(layers.Layer):
It will pad all-zero data when ids is 127. It will pad all-zero data when ids is 127.
Parameters: Parameters:
name_scope(str): The name of this class.
size(tuple|list): The shape of the look up table parameter. It should have two elements which indicate the size size(tuple|list): The shape of the look up table parameter. It should have two elements which indicate the size
of the dictionary of embeddings and the size of each embedding vector respectively. of the dictionary of embeddings and the size of each embedding vector respectively.
is_sparse(bool): The flag indicating whether to use sparse update. This parameter only is_sparse(bool): The flag indicating whether to use sparse update. This parameter only
...@@ -1435,7 +1386,6 @@ class Embedding(layers.Layer): ...@@ -1435,7 +1386,6 @@ class Embedding(layers.Layer):
dict_size = 20 dict_size = 20
with fluid.dygraph.guard(): with fluid.dygraph.guard():
emb = fluid.dygraph.Embedding( emb = fluid.dygraph.Embedding(
name_scope='embedding',
size=[dict_size, 32], size=[dict_size, 32],
param_attr='emb.w', param_attr='emb.w',
is_sparse=False) is_sparse=False)
...@@ -1451,7 +1401,6 @@ class Embedding(layers.Layer): ...@@ -1451,7 +1401,6 @@ class Embedding(layers.Layer):
trainable=True) trainable=True)
with fluid.dygraph.guard(): with fluid.dygraph.guard():
emb = fluid.dygraph.Embedding( emb = fluid.dygraph.Embedding(
name_scope='embedding',
size=[128, 100], size=[128, 100],
param_attr= w_param_attrs, param_attr= w_param_attrs,
is_sparse=False) is_sparse=False)
...@@ -1459,14 +1408,13 @@ class Embedding(layers.Layer): ...@@ -1459,14 +1408,13 @@ class Embedding(layers.Layer):
""" """
def __init__(self, def __init__(self,
name_scope,
size, size,
is_sparse=False, is_sparse=False,
is_distributed=False, is_distributed=False,
padding_idx=None, padding_idx=None,
param_attr=None, param_attr=None,
dtype='float32'): dtype='float32'):
super(Embedding, self).__init__(name_scope, dtype) super(Embedding, self).__init__()
self._size = size self._size = size
self._is_sparse = is_sparse self._is_sparse = is_sparse
self._is_distributed = is_distributed self._is_distributed = is_distributed
...@@ -1534,14 +1482,14 @@ class LayerNorm(layers.Layer): ...@@ -1534,14 +1482,14 @@ class LayerNorm(layers.Layer):
- :math:`b`: the trainable bias parameter. - :math:`b`: the trainable bias parameter.
Parameters: Parameters:
name_scope(str): The name of this class. normalized_shape(int or list or tuple): Input shape from an expected input of
size :math:`[*, normalized_shape[0], normalized_shape[1], ..., normalized_shape[-1]]`.
If it is a single integer, this module will normalize over the last dimension
which is expected to be of that specific size.
scale(bool, optional): Whether to learn the adaptive gain :math:`g` after scale(bool, optional): Whether to learn the adaptive gain :math:`g` after
normalization. Default: True. normalization. Default: True.
shift(bool, optional): Whether to learn the adaptive bias :math:`b` after shift(bool, optional): Whether to learn the adaptive bias :math:`b` after
normalization. Default: True. normalization. Default: True.
begin_norm_axis(int, optional): The normalization will be performed along
dimensions from :attr:`begin_norm_axis` to :attr:`rank(input)`.
Default: 1.
epsilon(float, optional): The small value added to the variance to prevent epsilon(float, optional): The small value added to the variance to prevent
division by zero. Default: 1e-05. division by zero. Default: 1e-05.
param_attr(ParamAttr, optional): The parameter attribute for the learnable param_attr(ParamAttr, optional): The parameter attribute for the learnable
...@@ -1556,6 +1504,8 @@ class LayerNorm(layers.Layer): ...@@ -1556,6 +1504,8 @@ class LayerNorm(layers.Layer):
:attr:`bias_attr` is initialized as 0 if it is added. Default: None. :attr:`bias_attr` is initialized as 0 if it is added. Default: None.
act(str, optional): Activation to be applied to the output of layer normalizaiton. act(str, optional): Activation to be applied to the output of layer normalizaiton.
Default: None. Default: None.
dtype (str, optional): Data type, it can be "float32" or "float64". Default: "float32".
Returns: Returns:
None None
...@@ -1570,35 +1520,32 @@ class LayerNorm(layers.Layer): ...@@ -1570,35 +1520,32 @@ class LayerNorm(layers.Layer):
x = numpy.random.random((3, 32, 32)).astype('float32') x = numpy.random.random((3, 32, 32)).astype('float32')
with fluid.dygraph.guard(): with fluid.dygraph.guard():
x = to_variable(x) x = to_variable(x)
layerNorm = fluid.LayerNorm('LayerNorm', begin_norm_axis=1) layerNorm = fluid.LayerNorm([32, 32])
ret = layerNorm(x) ret = layerNorm(x)
""" """
def __init__(self, def __init__(self,
name_scope, normalized_shape,
scale=True, scale=True,
shift=True, shift=True,
begin_norm_axis=1,
epsilon=1e-05, epsilon=1e-05,
param_attr=None, param_attr=None,
bias_attr=None, bias_attr=None,
act=None): act=None,
super(LayerNorm, self).__init__(name_scope) dtype='float32'):
super(LayerNorm, self).__init__()
if isinstance(normalized_shape, numbers.Integral):
normalized_shape = [normalized_shape]
self._normalized_shape = list(normalized_shape)
self._scale = scale self._scale = scale
self._shift = shift self._shift = shift
self._begin_norm_axis = begin_norm_axis
self._epsilon = epsilon self._epsilon = epsilon
self._param_attr = param_attr self._param_attr = param_attr
self._bias_attr = bias_attr self._bias_attr = bias_attr
self._act = act self._act = act
self._dtype = dtype
def _build_once(self, input): param_shape = [np.prod(self._normalized_shape)]
self._dtype = self._helper.input_dtype(input)
input_shape = input.shape
param_shape = [
reduce(lambda x, y: x * y, input_shape[self._begin_norm_axis:])
]
if self._scale: if self._scale:
self._scale_w = self.create_parameter( self._scale_w = self.create_parameter(
attr=self._param_attr, attr=self._param_attr,
...@@ -1621,6 +1568,17 @@ class LayerNorm(layers.Layer): ...@@ -1621,6 +1568,17 @@ class LayerNorm(layers.Layer):
logging.warn("bias_attr are only avaliable with shift is True") logging.warn("bias_attr are only avaliable with shift is True")
def forward(self, input): def forward(self, input):
input_shape = list(input.shape)
input_ndim = len(input_shape)
normalized_ndim = len(self._normalized_shape)
self._begin_norm_axis = input_ndim - normalized_ndim
if input_ndim < normalized_ndim or input_shape[
self._begin_norm_axis:] != self._normalized_shape:
str_normalized_shape = str(self._normalized_shape)
raise ValueError(
'Given normalized_shape is ' + str_normalized_shape +
', expected input with shape [*, ' + str_normalized_shape[
1:] + ', but got input shape ' + str(input_shape))
inputs = dict() inputs = dict()
inputs['X'] = input inputs['X'] = input
if self._scale: if self._scale:
...@@ -1696,7 +1654,6 @@ class GRUUnit(layers.Layer): ...@@ -1696,7 +1654,6 @@ class GRUUnit(layers.Layer):
and concatenation of :math:`u_t`, :math:`r_t` and :math:`m_t`. and concatenation of :math:`u_t`, :math:`r_t` and :math:`m_t`.
Parameters: Parameters:
name_scope(str): The name of this class.
size (int): The input dimension value. size (int): The input dimension value.
param_attr(ParamAttr, optional): The parameter attribute for the learnable param_attr(ParamAttr, optional): The parameter attribute for the learnable
hidden-hidden weight matrix. hidden-hidden weight matrix.
...@@ -1755,14 +1712,13 @@ class GRUUnit(layers.Layer): ...@@ -1755,14 +1712,13 @@ class GRUUnit(layers.Layer):
hidden_input = numpy.random.rand(T, D).astype('float32') hidden_input = numpy.random.rand(T, D).astype('float32')
with fluid.dygraph.guard(): with fluid.dygraph.guard():
x = numpy.random.random((3, 32, 32)).astype('float32') x = numpy.random.random((3, 32, 32)).astype('float32')
gru = fluid.dygraph.GRUUnit('gru', size=D * 3) gru = fluid.dygraph.GRUUnit(size=D * 3)
dy_ret = gru( dy_ret = gru(
base.to_variable(input), base.to_variable(hidden_input)) base.to_variable(input), base.to_variable(hidden_input))
""" """
def __init__(self, def __init__(self,
name_scope,
size, size,
param_attr=None, param_attr=None,
bias_attr=None, bias_attr=None,
...@@ -1770,9 +1726,8 @@ class GRUUnit(layers.Layer): ...@@ -1770,9 +1726,8 @@ class GRUUnit(layers.Layer):
gate_activation='sigmoid', gate_activation='sigmoid',
origin_mode=False, origin_mode=False,
dtype='float32'): dtype='float32'):
super(GRUUnit, self).__init__(name_scope, dtype) super(GRUUnit, self).__init__()
self._bias_attr = bias_attr self._bias_attr = bias_attr
activation_dict = dict( activation_dict = dict(
identity=0, identity=0,
sigmoid=1, sigmoid=1,
...@@ -1845,8 +1800,8 @@ class NCE(layers.Layer): ...@@ -1845,8 +1800,8 @@ class NCE(layers.Layer):
`Noise-contrastive estimation: A new estimation principle for unnormalized statistical models <http://www.jmlr.org/proceedings/papers/v9/gutmann10a/gutmann10a.pdf>`_ . `Noise-contrastive estimation: A new estimation principle for unnormalized statistical models <http://www.jmlr.org/proceedings/papers/v9/gutmann10a/gutmann10a.pdf>`_ .
Parameters: Parameters:
name_scope(str): The name of this class. num_total_classes (int): Total number of classes in all samples.
num_total_classes (int): Total number of classes in all samples dim (int): Dimension of input (possibly embedding dim).
param_attr (ParamAttr, optional): The parameter attribute for learnable weights(Parameter) param_attr (ParamAttr, optional): The parameter attribute for learnable weights(Parameter)
of nce. If it is set to None or one attribute of ParamAttr, nce of nce. If it is set to None or one attribute of ParamAttr, nce
will create ParamAttr as param_attr. If the Initializer of the param_attr will create ParamAttr as param_attr. If the Initializer of the param_attr
...@@ -1866,6 +1821,7 @@ class NCE(layers.Layer): ...@@ -1866,6 +1821,7 @@ class NCE(layers.Layer):
Default: None. Default: None.
seed (int, optional): The seed used in sampler. Default: 0. seed (int, optional): The seed used in sampler. Default: 0.
is_sparse(bool, optional): The flag indicating whether to use sparse update. If is_sparse is True, the weight@GRAD and bias@GRAD will be changed to SelectedRows. Default: False. is_sparse(bool, optional): The flag indicating whether to use sparse update. If is_sparse is True, the weight@GRAD and bias@GRAD will be changed to SelectedRows. Default: False.
dtype (str, optional): Data type, it can be "float32" or "float64". Default: "float32".
Attribute: Attribute:
**weight** (Parameter): the learnable weights of this layer. **weight** (Parameter): the learnable weights of this layer.
...@@ -1893,7 +1849,6 @@ class NCE(layers.Layer): ...@@ -1893,7 +1849,6 @@ class NCE(layers.Layer):
words.append(fluid.dygraph.base.to_variable(inp_word[i])) words.append(fluid.dygraph.base.to_variable(inp_word[i]))
emb = fluid.Embedding( emb = fluid.Embedding(
'embedding',
size=[dict_size, 32], size=[dict_size, 32],
param_attr='emb.w', param_attr='emb.w',
is_sparse=False) is_sparse=False)
...@@ -1907,8 +1862,9 @@ class NCE(layers.Layer): ...@@ -1907,8 +1862,9 @@ class NCE(layers.Layer):
embs3.append(emb_rlt) embs3.append(emb_rlt)
embs3 = fluid.layers.concat(input=embs3, axis=1) embs3 = fluid.layers.concat(input=embs3, axis=1)
nce = fluid.NCE('nce', nce = fluid.NCE(
num_total_classes=dict_size, num_total_classes=dict_size,
dim=embs3.shape[1],
num_neg_samples=2, num_neg_samples=2,
sampler="custom_dist", sampler="custom_dist",
custom_dist=nid_freq_arr.tolist(), custom_dist=nid_freq_arr.tolist(),
...@@ -1922,8 +1878,8 @@ class NCE(layers.Layer): ...@@ -1922,8 +1878,8 @@ class NCE(layers.Layer):
""" """
def __init__(self, def __init__(self,
name_scope,
num_total_classes, num_total_classes,
dim,
sample_weight=None, sample_weight=None,
param_attr=None, param_attr=None,
bias_attr=None, bias_attr=None,
...@@ -1931,12 +1887,13 @@ class NCE(layers.Layer): ...@@ -1931,12 +1887,13 @@ class NCE(layers.Layer):
sampler="uniform", sampler="uniform",
custom_dist=None, custom_dist=None,
seed=0, seed=0,
is_sparse=False): is_sparse=False,
super(NCE, self).__init__(name_scope) dtype='float32'):
super(NCE, self).__init__()
self._param_attr = param_attr self._param_attr = param_attr
self._bias_attr = bias_attr self._bias_attr = bias_attr
self._num_total_classes = num_total_classes self._num_total_classes = num_total_classes
self._dtype = dtype
self._inputs = dict() self._inputs = dict()
self._inputs['SampleWeight'] = sample_weight if sample_weight is not None else [] self._inputs['SampleWeight'] = sample_weight if sample_weight is not None else []
if sampler == "uniform": if sampler == "uniform":
...@@ -2026,23 +1983,17 @@ class NCE(layers.Layer): ...@@ -2026,23 +1983,17 @@ class NCE(layers.Layer):
'remote_prefetch': remote_prefetch 'remote_prefetch': remote_prefetch
} }
def _build_once(self, input, label, sample_weight=None):
assert isinstance(input, Variable)
assert isinstance(label, Variable)
dim = input.shape[1]
num_true_class = label.shape[1]
self._w = self.create_parameter( self._w = self.create_parameter(
attr=self._param_attr, attr=self._param_attr,
shape=[self._num_total_classes, dim], shape=[self._num_total_classes, dim],
is_bias=False, is_bias=False,
dtype=input.dtype) dtype=self._dtype)
if self._bias_attr: if self._bias_attr:
self._b = self.create_parameter( self._b = self.create_parameter(
attr=self._bias_attr, attr=self._bias_attr,
shape=[self._num_total_classes, 1], shape=[self._num_total_classes, 1],
is_bias=True, is_bias=True,
dtype=input.dtype) dtype=self._dtype)
self._inputs['Bias'] = self._b self._inputs['Bias'] = self._b
self._inputs['Weight'] = self._w self._inputs['Weight'] = self._w
...@@ -2101,13 +2052,15 @@ class PRelu(layers.Layer): ...@@ -2101,13 +2052,15 @@ class PRelu(layers.Layer):
y = \max(0, x) + \\alpha * \min(0, x) y = \max(0, x) + \\alpha * \min(0, x)
Parameters: Parameters:
name_scope(str): The name of this class.
mode (str): The mode for weight sharing. It supports all, channel mode (str): The mode for weight sharing. It supports all, channel
and element. all: all elements share same weight and element. all: all elements share same weight
channel:elements in a channel share same weight channel:elements in a channel share same weight
element:each element has a weight element:each element has a weight
input_shape (list or tuple, optional): The shape of input.
This parameter is required when mode is not "all". Default: None.
param_attr(ParamAttr, optional): The parameter attribute for the learnable param_attr(ParamAttr, optional): The parameter attribute for the learnable
weight (alpha). Default: None. weight (alpha). Default: None.
dtype (str, optional): Data type, it can be "float32" or "float64". Default: "float32".
Attribute: Attribute:
**weight** (Parameter): the learnable weights of this layer. **weight** (Parameter): the learnable weights of this layer.
...@@ -2128,28 +2081,29 @@ class PRelu(layers.Layer): ...@@ -2128,28 +2081,29 @@ class PRelu(layers.Layer):
inp_np = to_variable(inp_np) inp_np = to_variable(inp_np)
mode = 'channel' mode = 'channel'
prelu = fluid.PRelu( prelu = fluid.PRelu(
'prelu',
mode=mode, mode=mode,
input_shape=inp_np.shape,
param_attr=fluid.ParamAttr(initializer=fluid.initializer.Constant(1.0))) param_attr=fluid.ParamAttr(initializer=fluid.initializer.Constant(1.0)))
dy_rlt = prelu(inp_np) dy_rlt = prelu(inp_np)
""" """
def __init__(self, name_scope, mode, param_attr=None): def __init__(self, mode, input_shape=None, param_attr=None,
dtype='float32'):
super(PRelu, self).__init__(name_scope) super(PRelu, self).__init__()
self._mode = mode self._mode = mode
self._param_attr = param_attr self._param_attr = param_attr
if self._mode not in ['all', 'channel', 'element']: if self._mode not in ['all', 'channel', 'element']:
raise ValueError('mode should be one of all, channel, element.') raise ValueError('mode should be one of all, channel, element.')
self._dtype = dtype
self._alpha_shape = [1] self._alpha_shape = [1]
if mode is not 'all':
def _build_once(self, input): assert input_shape is not None
if self._mode == 'channel': input_shape = list(input_shape)
self._alpha_shape = [1, input.shape[1], 1, 1] if self._mode == 'channel':
elif self._mode == 'element': self._alpha_shape = [1, input_shape[1], 1, 1]
self._alpha_shape = input.shape elif self._mode == 'element':
self._dtype = self._helper.input_dtype(input) self._alpha_shape = input_shape
self._alpha = self.create_parameter( self._alpha = self.create_parameter(
attr=self._param_attr, attr=self._param_attr,
shape=self._alpha_shape, shape=self._alpha_shape,
...@@ -2195,16 +2149,18 @@ class BilinearTensorProduct(layers.Layer): ...@@ -2195,16 +2149,18 @@ class BilinearTensorProduct(layers.Layer):
- :math:`y^\mathrm{T}`: the transpose of :math:`y`. - :math:`y^\mathrm{T}`: the transpose of :math:`y`.
Parameters: Parameters:
name_scope(str): The name of this class. input1_dim (int): The dimension of each first input.
size (int): The dimension of this layer. input2_dim (int): The dimension of each second input.
name (str): The default value is None. Normally there is no need for user output_dim (int): The dimension of output of this layer.
to set this property. For more information, please refer to :ref:`api_guide_Name`. name (str, optional): The default value is None. Normally there is no need for user
to set this property. For more information, please refer to :ref:`api_guide_Name`. Default: None.
act (str, optional): Activation to be applied to the output of this layer. The default value is None. act (str, optional): Activation to be applied to the output of this layer. The default value is None.
param_attr (ParamAttr, optional): The parameter attribute for the learnable w, parameters/weights of param_attr (ParamAttr, optional): The parameter attribute for the learnable w, parameters/weights of
this layer. The default value is None. this layer. The default value is None.
bias_attr (ParamAttr, optional): The parameter attribute for the bias bias_attr (ParamAttr, optional): The parameter attribute for the bias
of this layer. If it is set to False, no bias will be added to the output units. of this layer. If it is set to False, no bias will be added to the output units.
If it is set to None, the bias is initialized zero. The default value is None. If it is set to None, the bias is initialized zero. The default value is None.
dtype (str, optional): Data type, it can be "float32" or "float64". Default: "float32".
Attribute: Attribute:
**weight** (Parameter): the learnable weights of this layer. **weight** (Parameter): the learnable weights of this layer.
...@@ -2224,38 +2180,38 @@ class BilinearTensorProduct(layers.Layer): ...@@ -2224,38 +2180,38 @@ class BilinearTensorProduct(layers.Layer):
layer1 = numpy.random.random((5, 5)).astype('float32') layer1 = numpy.random.random((5, 5)).astype('float32')
layer2 = numpy.random.random((5, 4)).astype('float32') layer2 = numpy.random.random((5, 4)).astype('float32')
bilinearTensorProduct = fluid.dygraph.nn.BilinearTensorProduct( bilinearTensorProduct = fluid.dygraph.nn.BilinearTensorProduct(
'BilinearTensorProduct', size=1000) input1_dim=5, input2_dim=4, output_dim=1000)
ret = bilinearTensorProduct(fluid.dygraph.base.to_variable(layer1), ret = bilinearTensorProduct(fluid.dygraph.base.to_variable(layer1),
fluid.dygraph.base.to_variable(layer2)) fluid.dygraph.base.to_variable(layer2))
""" """
def __init__(self, def __init__(self,
name_scope, input1_dim,
size, input2_dim,
output_dim,
name=None, name=None,
act=None, act=None,
param_attr=None, param_attr=None,
bias_attr=None): bias_attr=None,
super(BilinearTensorProduct, self).__init__(name_scope) dtype='float32'):
super(BilinearTensorProduct, self).__init__()
self._param_attr = param_attr self._param_attr = param_attr
self._bias_attr = bias_attr self._bias_attr = bias_attr
self._act = act self._act = act
self._size = size
self._name = name self._name = name
self._input1_dim = input1_dim
self._input2_dim = input2_dim
self._output_dim = output_dim
self._inputs = dict() self._inputs = dict()
self._dtype = dtype
def _build_once(self, x, y): param_shape = [self._output_dim, self._input1_dim, self._input2_dim]
self._dtype = self._helper.input_dtype(x)
param_shape = [self._size, x.shape[1], y.shape[1]]
self._w = self.create_parameter( self._w = self.create_parameter(
attr=self._param_attr, attr=self._param_attr,
shape=param_shape, shape=param_shape,
dtype=self._dtype, dtype=self._dtype,
is_bias=False) is_bias=False)
bias_size = [1, self._output_dim]
bias_size = [1, self._size]
self._bias_param = self.create_parameter( self._bias_param = self.create_parameter(
attr=self._bias_attr, attr=self._bias_attr,
shape=bias_size, shape=bias_size,
...@@ -2354,18 +2310,17 @@ class Conv2DTranspose(layers.Layer): ...@@ -2354,18 +2310,17 @@ class Conv2DTranspose(layers.Layer):
W_{out} &\in [ W^\prime_{out}, W^\prime_{out} + strides[1] ) W_{out} &\in [ W^\prime_{out}, W^\prime_{out} + strides[1] )
Parameters: Parameters:
name_scope(str): The name of this class. num_channels(int): The number of channels in the input image.
num_filters(int): The number of the filter. It is as same as the output num_filters(int): The number of the filter. It is as same as the output
feature map. feature map.
filter_size(int or tuple): The filter size. If filter_size is a tuple,
it must contain two integers, (filter_size_H, filter_size_W).
Otherwise, the filter will be a square.
output_size(int or tuple, optional): The output image size. If output size is a output_size(int or tuple, optional): The output image size. If output size is a
tuple, it must contain two integers, (image_H, image_W). None if use tuple, it must contain two integers, (image_H, image_W). None if use
filter_size, padding, and stride to calculate output_size. filter_size, padding, and stride to calculate output_size.
if output_size and filter_size are specified at the same time, They if output_size and filter_size are specified at the same time, They
should follow the formula above. Default: None. should follow the formula above. Default: None.
filter_size(int or tuple, optional): The filter size. If filter_size is a tuple,
it must contain two integers, (filter_size_H, filter_size_W).
Otherwise, the filter will be a square. None if use output size to
calculate filter_size. Default: None.
padding(int or tuple, optional): The padding size. If padding is a tuple, it must padding(int or tuple, optional): The padding size. If padding is a tuple, it must
contain two integers, (padding_H, padding_W). Otherwise, the contain two integers, (padding_H, padding_W). Otherwise, the
padding_H = padding_W = padding. Default: 0. padding_H = padding_W = padding. Default: 0.
...@@ -2394,6 +2349,7 @@ class Conv2DTranspose(layers.Layer): ...@@ -2394,6 +2349,7 @@ class Conv2DTranspose(layers.Layer):
library is installed. Default: True. library is installed. Default: True.
act (str, optional): Activation type, if it is set to None, activation is not appended. act (str, optional): Activation type, if it is set to None, activation is not appended.
Default: None. Default: None.
dtype (str, optional): Data type, it can be "float32" or "float64". Default: "float32".
Attribute: Attribute:
**weight** (Parameter): the learnable weights of filters of this layer. **weight** (Parameter): the learnable weights of filters of this layer.
...@@ -2412,16 +2368,16 @@ class Conv2DTranspose(layers.Layer): ...@@ -2412,16 +2368,16 @@ class Conv2DTranspose(layers.Layer):
with fluid.dygraph.guard(): with fluid.dygraph.guard():
data = np.random.random((3, 32, 32, 5)).astype('float32') data = np.random.random((3, 32, 32, 5)).astype('float32')
conv2DTranspose = fluid.dygraph.nn.Conv2DTranspose( conv2DTranspose = fluid.dygraph.nn.Conv2DTranspose(
'Conv2DTranspose', num_filters=2, filter_size=3) num_channels=32, num_filters=2, filter_size=3)
ret = conv2DTranspose(fluid.dygraph.base.to_variable(data)) ret = conv2DTranspose(fluid.dygraph.base.to_variable(data))
""" """
def __init__(self, def __init__(self,
name_scope, num_channels,
num_filters, num_filters,
filter_size,
output_size=None, output_size=None,
filter_size=None,
padding=0, padding=0,
stride=1, stride=1,
dilation=1, dilation=1,
...@@ -2429,13 +2385,15 @@ class Conv2DTranspose(layers.Layer): ...@@ -2429,13 +2385,15 @@ class Conv2DTranspose(layers.Layer):
param_attr=None, param_attr=None,
bias_attr=None, bias_attr=None,
use_cudnn=True, use_cudnn=True,
act=None): act=None,
super(Conv2DTranspose, self).__init__(name_scope) dtype='float32'):
super(Conv2DTranspose, self).__init__()
assert param_attr is not False, "param_attr should not be False in conv2d_transpose." assert param_attr is not False, "param_attr should not be False in conv2d_transpose."
self._param_attr = param_attr self._param_attr = param_attr
self._bias_attr = bias_attr self._bias_attr = bias_attr
self._act = act self._act = act
self._groups = groups self._groups = groups
self._num_channels = num_channels
self._num_filters = num_filters self._num_filters = num_filters
self._use_cudnn = use_cudnn self._use_cudnn = use_cudnn
self._padding = padding self._padding = padding
...@@ -2443,44 +2401,21 @@ class Conv2DTranspose(layers.Layer): ...@@ -2443,44 +2401,21 @@ class Conv2DTranspose(layers.Layer):
self._dilation = dilation self._dilation = dilation
self._filter_size = filter_size self._filter_size = filter_size
self._output_size = output_size self._output_size = output_size
self._op_type = 'conv2d_transpose' self._dtype = dtype
def _build_once(self, input): if (self._num_channels == self._groups and
input_channel = input.shape[1] self._num_filters == self._num_channels and
if (input_channel == self._groups and not self._use_cudnn):
self._num_filters == input_channel and not self._use_cudnn):
self._op_type = 'depthwise_conv2d_transpose' self._op_type = 'depthwise_conv2d_transpose'
else:
if not isinstance(input, Variable): self._op_type = 'conv2d_transpose'
raise TypeError("Input of conv2d_transpose must be Variable")
self._padding = utils.convert_to_list(self._padding, 2, 'padding') self._padding = utils.convert_to_list(self._padding, 2, 'padding')
self._stride = utils.convert_to_list(self._stride, 2, 'stride') self._stride = utils.convert_to_list(self._stride, 2, 'stride')
self._dilation = utils.convert_to_list(self._dilation, 2, 'dilation') self._dilation = utils.convert_to_list(self._dilation, 2, 'dilation')
if not isinstance(self._use_cudnn, bool): self._filter_size = utils.convert_to_list(
raise ValueError("use_cudnn should be True or False") self._filter_size, 2, 'conv2d_transpose.filter_size')
if self._filter_size is None:
if self._output_size is None:
raise ValueError(
"output_size must be set when filter_size is None")
if isinstance(self._output_size, int):
self._output_size = [self._output_size, self._output_size]
h_in = input.shape[2]
w_in = input.shape[3]
filter_size_h = (self._output_size[0] -
(h_in - 1) * self._stride[0] + 2 * self._padding[0]
- 1) // self._dilation[0] + 1
filter_size_w = (self._output_size[1] -
(w_in - 1) * self._stride[1] + 2 * self._padding[1]
- 1) // self._dilation[1] + 1
self._filter_size = [filter_size_h, filter_size_w]
else:
self._filter_size = utils.convert_to_list(
self._filter_size, 2, 'conv2d_transpose.filter_size')
if self._output_size is None: if self._output_size is None:
self._output_size = [] self._output_size = []
...@@ -2492,11 +2427,11 @@ class Conv2DTranspose(layers.Layer): ...@@ -2492,11 +2427,11 @@ class Conv2DTranspose(layers.Layer):
raise ValueError("output_size should be list or int") raise ValueError("output_size should be list or int")
self._padding = utils.convert_to_list(self._padding, 2, 'padding') self._padding = utils.convert_to_list(self._padding, 2, 'padding')
self._groups = 1 if self._groups is None else self._groups self._groups = 1 if self._groups is None else self._groups
filter_shape = [input_channel, self._num_filters // self._groups filter_shape = [self._num_channels, self._num_filters // self._groups
] + self._filter_size ] + self._filter_size
self._img_filter = self.create_parameter( self._img_filter = self.create_parameter(
dtype=input.dtype, shape=filter_shape, attr=self._param_attr) dtype=self._dtype, shape=filter_shape, attr=self._param_attr)
self._bias_param = self.create_parameter( self._bias_param = self.create_parameter(
attr=self._bias_attr, attr=self._bias_attr,
...@@ -2734,7 +2669,7 @@ class GroupNorm(layers.Layer): ...@@ -2734,7 +2669,7 @@ class GroupNorm(layers.Layer):
Refer to `Group Normalization <https://arxiv.org/abs/1803.08494>`_ . Refer to `Group Normalization <https://arxiv.org/abs/1803.08494>`_ .
Parameters: Parameters:
name_scope(str): The name of this class. channels(int): The number of channels of input.
groups(int): The number of groups that divided from channels. groups(int): The number of groups that divided from channels.
epsilon(float, optional): The small value added to the variance to prevent epsilon(float, optional): The small value added to the variance to prevent
division by zero. Default: 1e-05. division by zero. Default: 1e-05.
...@@ -2758,31 +2693,32 @@ class GroupNorm(layers.Layer): ...@@ -2758,31 +2693,32 @@ class GroupNorm(layers.Layer):
with fluid.dygraph.guard(): with fluid.dygraph.guard():
x = np.random.random((8, 32, 32)).astype('float32') x = np.random.random((8, 32, 32)).astype('float32')
groupNorm = fluid.dygraph.nn.GroupNorm('GroupNorm', groups=4) groupNorm = fluid.dygraph.nn.GroupNorm(channels=32, groups=4)
ret = groupNorm(fluid.dygraph.base.to_variable(x)) ret = groupNorm(fluid.dygraph.base.to_variable(x))
""" """
def __init__(self, def __init__(self,
name_scope, channels,
groups, groups,
epsilon=1e-05, epsilon=1e-05,
param_attr=None, param_attr=None,
bias_attr=None, bias_attr=None,
act=None, act=None,
data_layout='NCHW'): data_layout='NCHW',
super(GroupNorm, self).__init__(name_scope) dtype='float32'):
super(GroupNorm, self).__init__()
self._param_attr = param_attr self._param_attr = param_attr
self._bias_attr = bias_attr self._bias_attr = bias_attr
self._epsilon = epsilon self._epsilon = epsilon
self._channels = channels
self._groups = groups self._groups = groups
self._act = act self._act = act
self._dtype = dtype
if data_layout != 'NCHW': if data_layout != 'NCHW':
raise ValueError("unsupported data layout:" + data_layout) raise ValueError("unsupported data layout:" + data_layout)
def _build_once(self, input): param_shape = [self._channels]
self._dtype = self._helper.input_dtype(input)
param_shape = [input.shape[1]]
if self._bias_attr: if self._bias_attr:
self._bias = self.create_parameter( self._bias = self.create_parameter(
attr=self._bias_attr, attr=self._bias_attr,
...@@ -2862,11 +2798,12 @@ class SpectralNorm(layers.Layer): ...@@ -2862,11 +2798,12 @@ class SpectralNorm(layers.Layer):
Refer to `Spectral Normalization <https://arxiv.org/abs/1802.05957>`_ . Refer to `Spectral Normalization <https://arxiv.org/abs/1802.05957>`_ .
Parameters: Parameters:
name_scope(str): The name of this class. weight_shape(list or tuple): The shape of weight parameter.
dim(int, optional): The index of dimension which should be permuted to the first before reshaping Input(Weight) to matrix, it should be set as 0 if Input(Weight) is the weight of fc layer, and should be set as 1 if Input(Weight) is the weight of conv layer. Default: 0. dim(int, optional): The index of dimension which should be permuted to the first before reshaping Input(Weight) to matrix, it should be set as 0 if Input(Weight) is the weight of fc layer, and should be set as 1 if Input(Weight) is the weight of conv layer. Default: 0.
power_iters(int, optional): The number of power iterations to calculate spectral norm. Default: 1. power_iters(int, optional): The number of power iterations to calculate spectral norm. Default: 1.
eps(float, optional): The epsilon for numerical stability in calculating norms. Default: 1e-12. eps(float, optional): The epsilon for numerical stability in calculating norms. Default: 1e-12.
name (str, optional): The default value is None. Normally there is no need for user to set this property. For more information, please refer to :ref:`api_guide_Name` . name (str, optional): The default value is None. Normally there is no need for user to set this property. For more information, please refer to :ref:`api_guide_Name` .
dtype (str, optional): Data type, it can be "float32" or "float64". Default: "float32".
Returns: Returns:
None None
...@@ -2878,23 +2815,27 @@ class SpectralNorm(layers.Layer): ...@@ -2878,23 +2815,27 @@ class SpectralNorm(layers.Layer):
import numpy as np import numpy as np
with fluid.dygraph.guard(): with fluid.dygraph.guard():
x = np.random.random((2, 8, 32, 32)).astype('float32') weight = np.random.random((2, 8, 32, 32)).astype('float32')
spectralNorm = fluid.dygraph.nn.SpectralNorm('SpectralNorm', dim=1, power_iters=2) spectralNorm = fluid.dygraph.nn.SpectralNorm(weight.shape, dim=1, power_iters=2)
ret = spectralNorm(fluid.dygraph.base.to_variable(x)) ret = spectralNorm(fluid.dygraph.base.to_variable(weight))
""" """
def __init__(self, name_scope, dim=0, power_iters=1, eps=1e-12, name=None): def __init__(self,
super(SpectralNorm, self).__init__(name_scope) weight_shape,
dim=0,
power_iters=1,
eps=1e-12,
dtype='float32'):
super(SpectralNorm, self).__init__()
self._power_iters = power_iters self._power_iters = power_iters
self._eps = eps self._eps = eps
self._dim = dim self._dim = dim
self._dtype = dtype
def _build_once(self, weight): self._weight_shape = list(weight_shape)
self._dtype = self._helper.input_dtype(weight) h = self._weight_shape[self._dim]
input_shape = weight.shape w = np.prod(self._weight_shape) // h
h = input_shape[self._dim]
w = np.prod(input_shape) // h
self.u = self.create_parameter( self.u = self.create_parameter(
attr=ParamAttr(), attr=ParamAttr(),
...@@ -2938,7 +2879,7 @@ class TreeConv(layers.Layer): ...@@ -2938,7 +2879,7 @@ class TreeConv(layers.Layer):
The paper of Tree-Based Convolution Operator is here: `tree-based convolution <https://arxiv.org/abs/1409.5718v1/>`_ . The paper of Tree-Based Convolution Operator is here: `tree-based convolution <https://arxiv.org/abs/1409.5718v1/>`_ .
Parameters: Parameters:
name_scope(str): The name of this class. feature_size(int): last dimension of nodes_vector.
output_size(int): output feature width. output_size(int): output feature width.
num_filters(int, optional): number of filters, Default: 1. num_filters(int, optional): number of filters, Default: 1.
max_depth(int, optional): max depth of filters, Default: 2. max_depth(int, optional): max depth of filters, Default: 2.
...@@ -2946,6 +2887,7 @@ class TreeConv(layers.Layer): ...@@ -2946,6 +2887,7 @@ class TreeConv(layers.Layer):
param_attr(ParamAttr, optional): the parameter attribute for the filters, Default: None. param_attr(ParamAttr, optional): the parameter attribute for the filters, Default: None.
bias_attr(ParamAttr, optional): the parameter attribute for the bias of this layer, Default: None. bias_attr(ParamAttr, optional): the parameter attribute for the bias of this layer, Default: None.
name(str, optional): The default value is None. Normally there is no need for user to set this property. For more information, please refer to :ref:`api_guide_Name` . name(str, optional): The default value is None. Normally there is no need for user to set this property. For more information, please refer to :ref:`api_guide_Name` .
dtype (str, optional): Data type, it can be "float32" or "float64". Default: "float32".
Attribute: Attribute:
**weight** (Parameter): the learnable weights of filters of this layer. **weight** (Parameter): the learnable weights of filters of this layer.
...@@ -2966,35 +2908,31 @@ class TreeConv(layers.Layer): ...@@ -2966,35 +2908,31 @@ class TreeConv(layers.Layer):
nodes_vector = numpy.random.random((1, 10, 5)).astype('float32') nodes_vector = numpy.random.random((1, 10, 5)).astype('float32')
edge_set = numpy.random.random((1, 9, 2)).astype('int32') edge_set = numpy.random.random((1, 9, 2)).astype('int32')
treeConv = fluid.dygraph.nn.TreeConv( treeConv = fluid.dygraph.nn.TreeConv(
'TreeConv', output_size=6, num_filters=1, max_depth=2) feature_size=5, output_size=6, num_filters=1, max_depth=2)
ret = treeConv(fluid.dygraph.base.to_variable(nodes_vector), fluid.dygraph.base.to_variable(edge_set)) ret = treeConv(fluid.dygraph.base.to_variable(nodes_vector), fluid.dygraph.base.to_variable(edge_set))
""" """
def __init__(self, def __init__(self,
name_scope, feature_size,
output_size, output_size,
num_filters=1, num_filters=1,
max_depth=2, max_depth=2,
act='tanh', act='tanh',
param_attr=None, param_attr=None,
bias_attr=None, bias_attr=None,
name=None): name=None,
super(TreeConv, self).__init__(name_scope) dtype='float32'):
super(TreeConv, self).__init__()
self._name = name self._name = name
self._feature_size = feature_size
self._output_size = output_size self._output_size = output_size
self._act = act self._act = act
self._max_depth = max_depth self._max_depth = max_depth
self._num_filters = num_filters self._num_filters = num_filters
self._bias_attr = bias_attr self._bias_attr = bias_attr
self._param_attr = param_attr self._param_attr = param_attr
self._dtype = dtype
def _build_once(self, nodes_vector, edge_set): w_shape = [self._feature_size, 3, self._output_size, self._num_filters]
assert isinstance(nodes_vector, Variable)
assert isinstance(edge_set, Variable)
self._dtype = self._helper.input_dtype(nodes_vector)
feature_size = nodes_vector.shape[2]
w_shape = [feature_size, 3, self._output_size, self._num_filters]
if self._bias_attr: if self._bias_attr:
self._bias_param = self.create_parameter( self._bias_param = self.create_parameter(
attr=self._bias_attr, attr=self._bias_attr,
......
...@@ -34,7 +34,6 @@ from test_dist_base import runtime_main, TestParallelDyGraphRunnerBase ...@@ -34,7 +34,6 @@ from test_dist_base import runtime_main, TestParallelDyGraphRunnerBase
class SimpleImgConvPool(fluid.dygraph.Layer): class SimpleImgConvPool(fluid.dygraph.Layer):
def __init__(self, def __init__(self,
name_scope,
num_channels, num_channels,
num_filters, num_filters,
filter_size, filter_size,
...@@ -51,10 +50,10 @@ class SimpleImgConvPool(fluid.dygraph.Layer): ...@@ -51,10 +50,10 @@ class SimpleImgConvPool(fluid.dygraph.Layer):
use_cudnn=False, use_cudnn=False,
param_attr=None, param_attr=None,
bias_attr=None): bias_attr=None):
super(SimpleImgConvPool, self).__init__(name_scope) super(SimpleImgConvPool, self).__init__()
self._conv2d = Conv2D( self._conv2d = Conv2D(
self.full_name(), num_channels=num_channels,
num_filters=num_filters, num_filters=num_filters,
filter_size=filter_size, filter_size=filter_size,
stride=conv_stride, stride=conv_stride,
...@@ -66,7 +65,6 @@ class SimpleImgConvPool(fluid.dygraph.Layer): ...@@ -66,7 +65,6 @@ class SimpleImgConvPool(fluid.dygraph.Layer):
use_cudnn=use_cudnn) use_cudnn=use_cudnn)
self._pool2d = Pool2D( self._pool2d = Pool2D(
self.full_name(),
pool_size=pool_size, pool_size=pool_size,
pool_type=pool_type, pool_type=pool_type,
pool_stride=pool_stride, pool_stride=pool_stride,
...@@ -85,10 +83,10 @@ class MNIST(fluid.dygraph.Layer): ...@@ -85,10 +83,10 @@ class MNIST(fluid.dygraph.Layer):
super(MNIST, self).__init__(name_scope) super(MNIST, self).__init__(name_scope)
self._simple_img_conv_pool_1 = SimpleImgConvPool( self._simple_img_conv_pool_1 = SimpleImgConvPool(
self.full_name(), 1, 20, 5, 2, 2, act="relu") 1, 20, 5, 2, 2, act="relu")
self._simple_img_conv_pool_2 = SimpleImgConvPool( self._simple_img_conv_pool_2 = SimpleImgConvPool(
self.full_name(), 20, 50, 5, 2, 2, act="relu") 20, 50, 5, 2, 2, act="relu")
pool_2_shape = 50 * 4 * 4 pool_2_shape = 50 * 4 * 4
SIZE = 10 SIZE = 10
......
...@@ -27,7 +27,7 @@ import paddle.fluid as fluid ...@@ -27,7 +27,7 @@ import paddle.fluid as fluid
import paddle.fluid.dygraph as dygraph import paddle.fluid.dygraph as dygraph
from paddle.fluid import core from paddle.fluid import core
from paddle.fluid.optimizer import SGDOptimizer from paddle.fluid.optimizer import SGDOptimizer
from paddle.fluid.dygraph.nn import Conv2D, Pool2D, FC, LayerNorm from paddle.fluid.dygraph.nn import Conv2D, Pool2D, FC, BatchNorm
from paddle.fluid.dygraph.base import to_variable from paddle.fluid.dygraph.base import to_variable
from paddle.fluid.layer_helper import LayerHelper from paddle.fluid.layer_helper import LayerHelper
import math import math
...@@ -77,16 +77,16 @@ def optimizer_setting(params): ...@@ -77,16 +77,16 @@ def optimizer_setting(params):
class ConvBNLayer(fluid.dygraph.Layer): class ConvBNLayer(fluid.dygraph.Layer):
def __init__(self, def __init__(self,
name_scope, num_channels,
num_filters, num_filters,
filter_size, filter_size,
stride=1, stride=1,
groups=1, groups=1,
act=None): act=None):
super(ConvBNLayer, self).__init__(name_scope) super(ConvBNLayer, self).__init__()
self._conv = Conv2D( self._conv = Conv2D(
"conv2d", num_channels=num_channels,
num_filters=num_filters, num_filters=num_filters,
filter_size=filter_size, filter_size=filter_size,
stride=stride, stride=stride,
...@@ -96,11 +96,12 @@ class ConvBNLayer(fluid.dygraph.Layer): ...@@ -96,11 +96,12 @@ class ConvBNLayer(fluid.dygraph.Layer):
bias_attr=False, bias_attr=False,
param_attr=fluid.ParamAttr(name="weights")) param_attr=fluid.ParamAttr(name="weights"))
self._layer_norm = LayerNorm(self.full_name(), begin_norm_axis=1) # disable BatchNorm in multi-card. disable LayerNorm because of complex input_shape
# self._batch_norm = BatchNorm(num_filters, act=act)
def forward(self, inputs): def forward(self, inputs):
y = self._conv(inputs) y = self._conv(inputs)
y = self._layer_norm(y) # y = self._batch_norm(y)
return y return y
...@@ -109,8 +110,7 @@ class SqueezeExcitation(fluid.dygraph.Layer): ...@@ -109,8 +110,7 @@ class SqueezeExcitation(fluid.dygraph.Layer):
def __init__(self, name_scope, num_channels, reduction_ratio): def __init__(self, name_scope, num_channels, reduction_ratio):
super(SqueezeExcitation, self).__init__(name_scope) super(SqueezeExcitation, self).__init__(name_scope)
self._pool = Pool2D( self._pool = Pool2D(pool_size=0, pool_type='avg', global_pooling=True)
self.full_name(), pool_size=0, pool_type='avg', global_pooling=True)
stdv = 1.0 / math.sqrt(num_channels * 1.0) stdv = 1.0 / math.sqrt(num_channels * 1.0)
self._squeeze = FC( self._squeeze = FC(
self.full_name(), self.full_name(),
...@@ -136,29 +136,28 @@ class SqueezeExcitation(fluid.dygraph.Layer): ...@@ -136,29 +136,28 @@ class SqueezeExcitation(fluid.dygraph.Layer):
class BottleneckBlock(fluid.dygraph.Layer): class BottleneckBlock(fluid.dygraph.Layer):
def __init__(self, def __init__(self,
name_scope,
num_channels, num_channels,
num_filters, num_filters,
stride, stride,
cardinality, cardinality,
reduction_ratio, reduction_ratio,
shortcut=True): shortcut=True):
super(BottleneckBlock, self).__init__(name_scope) super(BottleneckBlock, self).__init__()
self.conv0 = ConvBNLayer( self.conv0 = ConvBNLayer(
self.full_name(), num_channels=num_channels,
num_filters=num_filters, num_filters=num_filters,
filter_size=1, filter_size=1,
act="relu") act="relu")
self.conv1 = ConvBNLayer( self.conv1 = ConvBNLayer(
self.full_name(), num_channels=num_filters,
num_filters=num_filters, num_filters=num_filters,
filter_size=3, filter_size=3,
stride=stride, stride=stride,
groups=cardinality, groups=cardinality,
act="relu") act="relu")
self.conv2 = ConvBNLayer( self.conv2 = ConvBNLayer(
self.full_name(), num_channels=num_filters,
num_filters=num_filters * 2, num_filters=num_filters * 2,
filter_size=1, filter_size=1,
act=None) act=None)
...@@ -170,7 +169,7 @@ class BottleneckBlock(fluid.dygraph.Layer): ...@@ -170,7 +169,7 @@ class BottleneckBlock(fluid.dygraph.Layer):
if not shortcut: if not shortcut:
self.short = ConvBNLayer( self.short = ConvBNLayer(
self.full_name(), num_channels=num_channels,
num_filters=num_filters * 2, num_filters=num_filters * 2,
filter_size=1, filter_size=1,
stride=stride) stride=stride)
...@@ -209,63 +208,51 @@ class SeResNeXt(fluid.dygraph.Layer): ...@@ -209,63 +208,51 @@ class SeResNeXt(fluid.dygraph.Layer):
depth = [3, 4, 6, 3] depth = [3, 4, 6, 3]
num_filters = [128, 256, 512, 1024] num_filters = [128, 256, 512, 1024]
self.conv0 = ConvBNLayer( self.conv0 = ConvBNLayer(
self.full_name(), num_channels=3,
num_filters=64, num_filters=64,
filter_size=7, filter_size=7,
stride=2, stride=2,
act='relu') act='relu')
self.pool = Pool2D( self.pool = Pool2D(
self.full_name(), pool_size=3, pool_stride=2, pool_padding=1, pool_type='max')
pool_size=3,
pool_stride=2,
pool_padding=1,
pool_type='max')
elif layers == 101: elif layers == 101:
cardinality = 32 cardinality = 32
reduction_ratio = 16 reduction_ratio = 16
depth = [3, 4, 23, 3] depth = [3, 4, 23, 3]
num_filters = [128, 256, 512, 1024] num_filters = [128, 256, 512, 1024]
self.conv0 = ConvBNLayer( self.conv0 = ConvBNLayer(
self.full_name(), num_channels=3,
num_filters=64, num_filters=64,
filter_size=7, filter_size=7,
stride=2, stride=2,
act='relu') act='relu')
self.pool = Pool2D( self.pool = Pool2D(
self.full_name(), pool_size=3, pool_stride=2, pool_padding=1, pool_type='max')
pool_size=3,
pool_stride=2,
pool_padding=1,
pool_type='max')
elif layers == 152: elif layers == 152:
cardinality = 64 cardinality = 64
reduction_ratio = 16 reduction_ratio = 16
depth = [3, 8, 36, 3] depth = [3, 8, 36, 3]
num_filters = [128, 256, 512, 1024] num_filters = [128, 256, 512, 1024]
self.conv0 = ConvBNLayer( self.conv0 = ConvBNLayer(
self.full_name(), num_channels=3,
num_filters=64, num_filters=64,
filter_size=3, filter_size=3,
stride=2, stride=2,
act='relu') act='relu')
self.conv1 = ConvBNLayer( self.conv1 = ConvBNLayer(
self.full_name(), num_channels=64,
num_filters=64, num_filters=64,
filter_size=3, filter_size=3,
stride=1, stride=1,
act='relu') act='relu')
self.conv2 = ConvBNLayer( self.conv2 = ConvBNLayer(
self.full_name(), num_channels=64,
num_filters=128, num_filters=128,
filter_size=3, filter_size=3,
stride=1, stride=1,
act='relu') act='relu')
self.pool = Pool2D( self.pool = Pool2D(
self.full_name(), pool_size=3, pool_stride=2, pool_padding=1, pool_type='max')
pool_size=3,
pool_stride=2,
pool_padding=1,
pool_type='max')
self.bottleneck_block_list = [] self.bottleneck_block_list = []
num_channels = 64 num_channels = 64
...@@ -275,7 +262,6 @@ class SeResNeXt(fluid.dygraph.Layer): ...@@ -275,7 +262,6 @@ class SeResNeXt(fluid.dygraph.Layer):
bottleneck_block = self.add_sublayer( bottleneck_block = self.add_sublayer(
'bb_%d_%d' % (block, i), 'bb_%d_%d' % (block, i),
BottleneckBlock( BottleneckBlock(
self.full_name(),
num_channels=num_channels, num_channels=num_channels,
num_filters=num_filters[block], num_filters=num_filters[block],
stride=2 if i == 0 and block != 0 else 1, stride=2 if i == 0 and block != 0 else 1,
...@@ -287,7 +273,7 @@ class SeResNeXt(fluid.dygraph.Layer): ...@@ -287,7 +273,7 @@ class SeResNeXt(fluid.dygraph.Layer):
shortcut = True shortcut = True
self.pool2d_avg = Pool2D( self.pool2d_avg = Pool2D(
self.full_name(), pool_size=7, pool_type='avg', global_pooling=True) pool_size=7, pool_type='avg', global_pooling=True)
stdv = 1.0 / math.sqrt(2048 * 1.0) stdv = 1.0 / math.sqrt(2048 * 1.0)
self.out = FC(self.full_name(), self.out = FC(self.full_name(),
......
...@@ -23,7 +23,7 @@ from paddle.fluid.dygraph.nn import Conv2D, Pool2D, FC ...@@ -23,7 +23,7 @@ from paddle.fluid.dygraph.nn import Conv2D, Pool2D, FC
class SimpleImgConvPool(fluid.dygraph.Layer): class SimpleImgConvPool(fluid.dygraph.Layer):
def __init__(self, def __init__(self,
name_scope, num_channels,
num_filters, num_filters,
filter_size, filter_size,
pool_size, pool_size,
...@@ -40,10 +40,10 @@ class SimpleImgConvPool(fluid.dygraph.Layer): ...@@ -40,10 +40,10 @@ class SimpleImgConvPool(fluid.dygraph.Layer):
dtype='float32', dtype='float32',
param_attr=None, param_attr=None,
bias_attr=None): bias_attr=None):
super(SimpleImgConvPool, self).__init__(name_scope) super(SimpleImgConvPool, self).__init__()
self._conv2d = Conv2D( self._conv2d = Conv2D(
self.full_name(), num_channels=num_channels,
num_filters=num_filters, num_filters=num_filters,
filter_size=filter_size, filter_size=filter_size,
stride=conv_stride, stride=conv_stride,
...@@ -57,7 +57,6 @@ class SimpleImgConvPool(fluid.dygraph.Layer): ...@@ -57,7 +57,6 @@ class SimpleImgConvPool(fluid.dygraph.Layer):
act=act) act=act)
self._pool2d = Pool2D( self._pool2d = Pool2D(
self.full_name(),
pool_size=pool_size, pool_size=pool_size,
pool_type=pool_type, pool_type=pool_type,
pool_stride=pool_stride, pool_stride=pool_stride,
...@@ -76,7 +75,7 @@ class MNIST(fluid.dygraph.Layer): ...@@ -76,7 +75,7 @@ class MNIST(fluid.dygraph.Layer):
super(MNIST, self).__init__(name_scope) super(MNIST, self).__init__(name_scope)
self._simple_img_conv_pool_1 = SimpleImgConvPool( self._simple_img_conv_pool_1 = SimpleImgConvPool(
self.full_name(), num_channels=3,
num_filters=20, num_filters=20,
filter_size=5, filter_size=5,
pool_size=2, pool_size=2,
...@@ -86,7 +85,7 @@ class MNIST(fluid.dygraph.Layer): ...@@ -86,7 +85,7 @@ class MNIST(fluid.dygraph.Layer):
use_cudnn=True) use_cudnn=True)
self._simple_img_conv_pool_2 = SimpleImgConvPool( self._simple_img_conv_pool_2 = SimpleImgConvPool(
self.full_name(), num_channels=20,
num_filters=50, num_filters=50,
filter_size=5, filter_size=5,
pool_size=2, pool_size=2,
......
...@@ -30,7 +30,6 @@ from test_imperative_base import new_program_scope ...@@ -30,7 +30,6 @@ from test_imperative_base import new_program_scope
class SimpleImgConvPool(fluid.dygraph.Layer): class SimpleImgConvPool(fluid.dygraph.Layer):
def __init__(self, def __init__(self,
name_scope,
num_channels, num_channels,
num_filters, num_filters,
filter_size, filter_size,
...@@ -47,10 +46,10 @@ class SimpleImgConvPool(fluid.dygraph.Layer): ...@@ -47,10 +46,10 @@ class SimpleImgConvPool(fluid.dygraph.Layer):
use_cudnn=False, use_cudnn=False,
param_attr=None, param_attr=None,
bias_attr=None): bias_attr=None):
super(SimpleImgConvPool, self).__init__(name_scope) super(SimpleImgConvPool, self).__init__()
self._conv2d = Conv2D( self._conv2d = Conv2D(
self.full_name(), num_channels=num_channels,
num_filters=num_filters, num_filters=num_filters,
filter_size=filter_size, filter_size=filter_size,
stride=conv_stride, stride=conv_stride,
...@@ -62,7 +61,6 @@ class SimpleImgConvPool(fluid.dygraph.Layer): ...@@ -62,7 +61,6 @@ class SimpleImgConvPool(fluid.dygraph.Layer):
use_cudnn=use_cudnn) use_cudnn=use_cudnn)
self._pool2d = Pool2D( self._pool2d = Pool2D(
self.full_name(),
pool_size=pool_size, pool_size=pool_size,
pool_type=pool_type, pool_type=pool_type,
pool_stride=pool_stride, pool_stride=pool_stride,
...@@ -81,10 +79,10 @@ class MNIST(fluid.dygraph.Layer): ...@@ -81,10 +79,10 @@ class MNIST(fluid.dygraph.Layer):
super(MNIST, self).__init__(name_scope) super(MNIST, self).__init__(name_scope)
self._simple_img_conv_pool_1 = SimpleImgConvPool( self._simple_img_conv_pool_1 = SimpleImgConvPool(
self.full_name(), 1, 20, 5, 2, 2, act="relu") 1, 20, 5, 2, 2, act="relu")
self._simple_img_conv_pool_2 = SimpleImgConvPool( self._simple_img_conv_pool_2 = SimpleImgConvPool(
self.full_name(), 20, 50, 5, 2, 2, act="relu") 20, 50, 5, 2, 2, act="relu")
pool_2_shape = 50 * 4 * 4 pool_2_shape = 50 * 4 * 4
SIZE = 10 SIZE = 10
......
...@@ -100,8 +100,8 @@ class AutoPruneLayer3(fluid.Layer): ...@@ -100,8 +100,8 @@ class AutoPruneLayer3(fluid.Layer):
class MyLayer(fluid.Layer): class MyLayer(fluid.Layer):
def __init__(self, name_scope, vocab_size, size, dtype="float32"): def __init__(self, name_scope, vocab_size, size, dtype="float32"):
super(MyLayer, self).__init__(name_scope, dtype) super(MyLayer, self).__init__(name_scope, dtype)
self.embed0 = fluid.Embedding(self.full_name(), size=(vocab_size, size)) self.embed0 = fluid.Embedding(size=(vocab_size, size))
self.embed1 = fluid.Embedding(self.full_name(), size=(vocab_size, size)) self.embed1 = fluid.Embedding(size=(vocab_size, size))
self.fc0 = fluid.FC(self.full_name(), size=size, dtype=dtype) self.fc0 = fluid.FC(self.full_name(), size=size, dtype=dtype)
self.fc1 = fluid.FC(self.full_name(), size=size, dtype=dtype) self.fc1 = fluid.FC(self.full_name(), size=size, dtype=dtype)
...@@ -122,8 +122,8 @@ class MyLayer(fluid.Layer): ...@@ -122,8 +122,8 @@ class MyLayer(fluid.Layer):
class MyLayer2(fluid.Layer): class MyLayer2(fluid.Layer):
def __init__(self, name_scope, vocab_size, size, dtype="float32"): def __init__(self, name_scope, vocab_size, size, dtype="float32"):
super(MyLayer2, self).__init__(name_scope, dtype) super(MyLayer2, self).__init__(name_scope, dtype)
self.embed0 = fluid.Embedding(self.full_name(), size=(vocab_size, size)) self.embed0 = fluid.Embedding(size=(vocab_size, size))
self.embed1 = fluid.Embedding(self.full_name(), size=(vocab_size, size)) self.embed1 = fluid.Embedding(size=(vocab_size, size))
self.fc0 = fluid.FC(self.full_name(), size=size, dtype=dtype) self.fc0 = fluid.FC(self.full_name(), size=size, dtype=dtype)
self.fc1 = fluid.FC(self.full_name(), size=size, dtype=dtype) self.fc1 = fluid.FC(self.full_name(), size=size, dtype=dtype)
......
...@@ -90,9 +90,9 @@ class DeepCF(fluid.Layer): ...@@ -90,9 +90,9 @@ class DeepCF(fluid.Layer):
self._num_users = num_users self._num_users = num_users
self._num_items = num_items self._num_items = num_items
self._rating_matrix = self.create_parameter( self._rating_matrix = self.create_parameter(
fluid.ParamAttr(trainable=False), attr=fluid.ParamAttr(trainable=False),
matrix.shape, shape=matrix.shape,
matrix.dtype, dtype=matrix.dtype,
is_bias=False, is_bias=False,
default_initializer=fluid.initializer.NumpyArrayInitializer(matrix)) default_initializer=fluid.initializer.NumpyArrayInitializer(matrix))
self._rating_matrix.stop_gradient = True self._rating_matrix.stop_gradient = True
......
...@@ -32,7 +32,7 @@ from paddle.fluid.dygraph.jit import TracedLayer ...@@ -32,7 +32,7 @@ from paddle.fluid.dygraph.jit import TracedLayer
class SimpleImgConvPool(fluid.dygraph.Layer): class SimpleImgConvPool(fluid.dygraph.Layer):
def __init__(self, def __init__(self,
name_scope, num_channels,
num_filters, num_filters,
filter_size, filter_size,
pool_size, pool_size,
...@@ -48,10 +48,10 @@ class SimpleImgConvPool(fluid.dygraph.Layer): ...@@ -48,10 +48,10 @@ class SimpleImgConvPool(fluid.dygraph.Layer):
use_cudnn=False, use_cudnn=False,
param_attr=None, param_attr=None,
bias_attr=None): bias_attr=None):
super(SimpleImgConvPool, self).__init__(name_scope) super(SimpleImgConvPool, self).__init__()
self._conv2d = Conv2D( self._conv2d = Conv2D(
self.full_name(), num_channels=num_channels,
num_filters=num_filters, num_filters=num_filters,
filter_size=filter_size, filter_size=filter_size,
stride=conv_stride, stride=conv_stride,
...@@ -63,7 +63,6 @@ class SimpleImgConvPool(fluid.dygraph.Layer): ...@@ -63,7 +63,6 @@ class SimpleImgConvPool(fluid.dygraph.Layer):
use_cudnn=use_cudnn) use_cudnn=use_cudnn)
self._pool2d = Pool2D( self._pool2d = Pool2D(
self.full_name(),
pool_size=pool_size, pool_size=pool_size,
pool_type=pool_type, pool_type=pool_type,
pool_stride=pool_stride, pool_stride=pool_stride,
...@@ -82,10 +81,10 @@ class MNIST(fluid.dygraph.Layer): ...@@ -82,10 +81,10 @@ class MNIST(fluid.dygraph.Layer):
super(MNIST, self).__init__(name_scope) super(MNIST, self).__init__(name_scope)
self._simple_img_conv_pool_1 = SimpleImgConvPool( self._simple_img_conv_pool_1 = SimpleImgConvPool(
self.full_name(), 20, 5, 2, 2, act="relu") 1, 20, 5, 2, 2, act="relu")
self._simple_img_conv_pool_2 = SimpleImgConvPool( self._simple_img_conv_pool_2 = SimpleImgConvPool(
self.full_name(), 50, 5, 2, 2, act="relu") 20, 50, 5, 2, 2, act="relu")
pool_2_shape = 50 * 4 * 4 pool_2_shape = 50 * 4 * 4
SIZE = 10 SIZE = 10
......
...@@ -57,7 +57,6 @@ class Config(object): ...@@ -57,7 +57,6 @@ class Config(object):
class ConvBNPool(fluid.dygraph.Layer): class ConvBNPool(fluid.dygraph.Layer):
def __init__(self, def __init__(self,
name_scope,
group, group,
out_ch, out_ch,
channels, channels,
...@@ -65,7 +64,7 @@ class ConvBNPool(fluid.dygraph.Layer): ...@@ -65,7 +64,7 @@ class ConvBNPool(fluid.dygraph.Layer):
is_test=False, is_test=False,
pool=True, pool=True,
use_cudnn=True): use_cudnn=True):
super(ConvBNPool, self).__init__(name_scope) super(ConvBNPool, self).__init__()
self.group = group self.group = group
self.pool = pool self.pool = pool
...@@ -79,7 +78,7 @@ class ConvBNPool(fluid.dygraph.Layer): ...@@ -79,7 +78,7 @@ class ConvBNPool(fluid.dygraph.Layer):
initializer=fluid.initializer.Normal(0.0, conv_std_1)) initializer=fluid.initializer.Normal(0.0, conv_std_1))
self.conv_0_layer = Conv2D( self.conv_0_layer = Conv2D(
self.full_name(), channels[0],
out_ch[0], out_ch[0],
3, 3,
padding=1, padding=1,
...@@ -87,10 +86,9 @@ class ConvBNPool(fluid.dygraph.Layer): ...@@ -87,10 +86,9 @@ class ConvBNPool(fluid.dygraph.Layer):
bias_attr=False, bias_attr=False,
act=None, act=None,
use_cudnn=use_cudnn) use_cudnn=use_cudnn)
self.bn_0_layer = BatchNorm( self.bn_0_layer = BatchNorm(out_ch[0], act=act, is_test=is_test)
self.full_name(), out_ch[0], act=act, is_test=is_test)
self.conv_1_layer = Conv2D( self.conv_1_layer = Conv2D(
self.full_name(), out_ch[0],
num_filters=out_ch[1], num_filters=out_ch[1],
filter_size=3, filter_size=3,
padding=1, padding=1,
...@@ -98,12 +96,10 @@ class ConvBNPool(fluid.dygraph.Layer): ...@@ -98,12 +96,10 @@ class ConvBNPool(fluid.dygraph.Layer):
bias_attr=False, bias_attr=False,
act=None, act=None,
use_cudnn=use_cudnn) use_cudnn=use_cudnn)
self.bn_1_layer = BatchNorm( self.bn_1_layer = BatchNorm(out_ch[1], act=act, is_test=is_test)
self.full_name(), out_ch[1], act=act, is_test=is_test)
if self.pool: if self.pool:
self.pool_layer = Pool2D( self.pool_layer = Pool2D(
self.full_name(),
pool_size=2, pool_size=2,
pool_type='max', pool_type='max',
pool_stride=2, pool_stride=2,
...@@ -125,22 +121,12 @@ class OCRConv(fluid.dygraph.Layer): ...@@ -125,22 +121,12 @@ class OCRConv(fluid.dygraph.Layer):
def __init__(self, name_scope, is_test=False, use_cudnn=True): def __init__(self, name_scope, is_test=False, use_cudnn=True):
super(OCRConv, self).__init__(name_scope) super(OCRConv, self).__init__(name_scope)
self.conv_bn_pool_1 = ConvBNPool( self.conv_bn_pool_1 = ConvBNPool(
self.full_name(), 2, [16, 16], [1, 16], is_test=is_test, use_cudnn=use_cudnn)
2, [16, 16], [1, 16],
is_test=is_test,
use_cudnn=use_cudnn)
self.conv_bn_pool_2 = ConvBNPool( self.conv_bn_pool_2 = ConvBNPool(
self.full_name(), 2, [32, 32], [16, 32], is_test=is_test, use_cudnn=use_cudnn)
2, [32, 32], [16, 32],
is_test=is_test,
use_cudnn=use_cudnn)
self.conv_bn_pool_3 = ConvBNPool( self.conv_bn_pool_3 = ConvBNPool(
self.full_name(), 2, [64, 64], [32, 64], is_test=is_test, use_cudnn=use_cudnn)
2, [64, 64], [32, 64],
is_test=is_test,
use_cudnn=use_cudnn)
self.conv_bn_pool_4 = ConvBNPool( self.conv_bn_pool_4 = ConvBNPool(
self.full_name(),
2, [128, 128], [64, 128], 2, [128, 128], [64, 128],
is_test=is_test, is_test=is_test,
pool=False, pool=False,
...@@ -169,7 +155,6 @@ class DynamicGRU(fluid.dygraph.Layer): ...@@ -169,7 +155,6 @@ class DynamicGRU(fluid.dygraph.Layer):
super(DynamicGRU, self).__init__(scope_name) super(DynamicGRU, self).__init__(scope_name)
self.gru_unit = GRUUnit( self.gru_unit = GRUUnit(
self.full_name(),
size * 3, size * 3,
param_attr=param_attr, param_attr=param_attr,
bias_attr=bias_attr, bias_attr=bias_attr,
...@@ -337,10 +322,7 @@ class GRUDecoderWithAttention(fluid.dygraph.Layer): ...@@ -337,10 +322,7 @@ class GRUDecoderWithAttention(fluid.dygraph.Layer):
size=decoder_size * 3, size=decoder_size * 3,
bias_attr=False) bias_attr=False)
self.gru_unit = GRUUnit( self.gru_unit = GRUUnit(
self.full_name(), size=decoder_size * 3, param_attr=None, bias_attr=None)
size=decoder_size * 3,
param_attr=None,
bias_attr=None)
self.out_layer = FC(self.full_name(), self.out_layer = FC(self.full_name(),
size=num_classes + 2, size=num_classes + 2,
bias_attr=None, bias_attr=None,
...@@ -383,8 +365,7 @@ class OCRAttention(fluid.dygraph.Layer): ...@@ -383,8 +365,7 @@ class OCRAttention(fluid.dygraph.Layer):
bias_attr=False, bias_attr=False,
act='relu') act='relu')
self.embedding = Embedding( self.embedding = Embedding(
self.full_name(), [Config.num_classes + 2, Config.word_vector_dim], [Config.num_classes + 2, Config.word_vector_dim], dtype='float32')
dtype='float32')
self.gru_decoder_with_attention = GRUDecoderWithAttention( self.gru_decoder_with_attention = GRUDecoderWithAttention(
self.full_name(), Config.decoder_size, Config.num_classes) self.full_name(), Config.decoder_size, Config.num_classes)
......
...@@ -158,7 +158,6 @@ class PtbModel(fluid.Layer): ...@@ -158,7 +158,6 @@ class PtbModel(fluid.Layer):
init_scale=init_scale, init_scale=init_scale,
dropout=dropout) dropout=dropout)
self.embedding = Embedding( self.embedding = Embedding(
self.full_name(),
size=[vocab_size, hidden_size], size=[vocab_size, hidden_size],
dtype='float32', dtype='float32',
is_sparse=is_sparse, is_sparse=is_sparse,
......
...@@ -72,16 +72,16 @@ def optimizer_setting(params): ...@@ -72,16 +72,16 @@ def optimizer_setting(params):
class ConvBNLayer(fluid.Layer): class ConvBNLayer(fluid.Layer):
def __init__(self, def __init__(self,
name_scope, num_channels,
num_filters, num_filters,
filter_size, filter_size,
stride=1, stride=1,
groups=1, groups=1,
act=None): act=None):
super(ConvBNLayer, self).__init__(name_scope) super(ConvBNLayer, self).__init__()
self._conv = Conv2D( self._conv = Conv2D(
self.full_name(), num_channels=num_channels,
num_filters=num_filters, num_filters=num_filters,
filter_size=filter_size, filter_size=filter_size,
stride=stride, stride=stride,
...@@ -91,7 +91,7 @@ class ConvBNLayer(fluid.Layer): ...@@ -91,7 +91,7 @@ class ConvBNLayer(fluid.Layer):
bias_attr=None, bias_attr=None,
use_cudnn=False) use_cudnn=False)
self._batch_norm = BatchNorm(self.full_name(), num_filters, act=act) self._batch_norm = BatchNorm(num_filters, act=act)
def forward(self, inputs): def forward(self, inputs):
y = self._conv(inputs) y = self._conv(inputs)
...@@ -101,29 +101,29 @@ class ConvBNLayer(fluid.Layer): ...@@ -101,29 +101,29 @@ class ConvBNLayer(fluid.Layer):
class BottleneckBlock(fluid.Layer): class BottleneckBlock(fluid.Layer):
def __init__(self, name_scope, num_filters, stride, shortcut=True): def __init__(self, num_channels, num_filters, stride, shortcut=True):
super(BottleneckBlock, self).__init__(name_scope) super(BottleneckBlock, self).__init__()
self.conv0 = ConvBNLayer( self.conv0 = ConvBNLayer(
self.full_name(), num_channels=num_channels,
num_filters=num_filters, num_filters=num_filters,
filter_size=1, filter_size=1,
act='relu') act='relu')
self.conv1 = ConvBNLayer( self.conv1 = ConvBNLayer(
self.full_name(), num_channels=num_filters,
num_filters=num_filters, num_filters=num_filters,
filter_size=3, filter_size=3,
stride=stride, stride=stride,
act='relu') act='relu')
self.conv2 = ConvBNLayer( self.conv2 = ConvBNLayer(
self.full_name(), num_channels=num_filters,
num_filters=num_filters * 4, num_filters=num_filters * 4,
filter_size=1, filter_size=1,
act=None) act=None)
if not shortcut: if not shortcut:
self.short = ConvBNLayer( self.short = ConvBNLayer(
self.full_name(), num_channels=num_channels,
num_filters=num_filters * 4, num_filters=num_filters * 4,
filter_size=1, filter_size=1,
stride=stride) stride=stride)
...@@ -161,20 +161,13 @@ class ResNet(fluid.Layer): ...@@ -161,20 +161,13 @@ class ResNet(fluid.Layer):
depth = [3, 4, 23, 3] depth = [3, 4, 23, 3]
elif layers == 152: elif layers == 152:
depth = [3, 8, 36, 3] depth = [3, 8, 36, 3]
num_channels = [64, 256, 512, 1024]
num_filters = [64, 128, 256, 512] num_filters = [64, 128, 256, 512]
self.conv = ConvBNLayer( self.conv = ConvBNLayer(
self.full_name(), num_channels=3, num_filters=64, filter_size=7, stride=2, act='relu')
num_filters=64,
filter_size=7,
stride=2,
act='relu')
self.pool2d_max = Pool2D( self.pool2d_max = Pool2D(
self.full_name(), pool_size=3, pool_stride=2, pool_padding=1, pool_type='max')
pool_size=3,
pool_stride=2,
pool_padding=1,
pool_type='max')
self.bottleneck_block_list = [] self.bottleneck_block_list = []
for block in range(len(depth)): for block in range(len(depth)):
...@@ -183,7 +176,8 @@ class ResNet(fluid.Layer): ...@@ -183,7 +176,8 @@ class ResNet(fluid.Layer):
bottleneck_block = self.add_sublayer( bottleneck_block = self.add_sublayer(
'bb_%d_%d' % (block, i), 'bb_%d_%d' % (block, i),
BottleneckBlock( BottleneckBlock(
self.full_name(), num_channels=num_channels[block]
if i == 0 else num_filters[block] * 4,
num_filters=num_filters[block], num_filters=num_filters[block],
stride=2 if i == 0 and block != 0 else 1, stride=2 if i == 0 and block != 0 else 1,
shortcut=shortcut)) shortcut=shortcut))
...@@ -191,7 +185,7 @@ class ResNet(fluid.Layer): ...@@ -191,7 +185,7 @@ class ResNet(fluid.Layer):
shortcut = True shortcut = True
self.pool2d_avg = Pool2D( self.pool2d_avg = Pool2D(
self.full_name(), pool_size=7, pool_type='avg', global_pooling=True) pool_size=7, pool_type='avg', global_pooling=True)
import math import math
stdv = 1.0 / math.sqrt(2048 * 1.0) stdv = 1.0 / math.sqrt(2048 * 1.0)
......
...@@ -156,7 +156,6 @@ class PtbModel(fluid.Layer): ...@@ -156,7 +156,6 @@ class PtbModel(fluid.Layer):
init_scale=init_scale, init_scale=init_scale,
dropout=dropout) dropout=dropout)
self.embedding = Embedding( self.embedding = Embedding(
self.full_name(),
size=[vocab_size, hidden_size], size=[vocab_size, hidden_size],
dtype='float32', dtype='float32',
is_sparse=False, is_sparse=False,
...@@ -882,7 +881,7 @@ class TestDygraphPtbRnn(unittest.TestCase): ...@@ -882,7 +881,7 @@ class TestDygraphPtbRnn(unittest.TestCase):
def testOnlyLoadParams(self): def testOnlyLoadParams(self):
with fluid.dygraph.guard(): with fluid.dygraph.guard():
emb = fluid.dygraph.Embedding("emb", [10, 10]) emb = fluid.dygraph.Embedding([10, 10])
state_dict = emb.state_dict() state_dict = emb.state_dict()
fluid.save_dygraph(state_dict, "emb_dy") fluid.save_dygraph(state_dict, "emb_dy")
......
...@@ -63,16 +63,16 @@ def optimizer_setting(params): ...@@ -63,16 +63,16 @@ def optimizer_setting(params):
class ConvBNLayer(fluid.dygraph.Layer): class ConvBNLayer(fluid.dygraph.Layer):
def __init__(self, def __init__(self,
name_scope, num_channels,
num_filters, num_filters,
filter_size, filter_size,
stride=1, stride=1,
groups=1, groups=1,
act=None): act=None):
super(ConvBNLayer, self).__init__(name_scope) super(ConvBNLayer, self).__init__()
self._conv = Conv2D( self._conv = Conv2D(
self.full_name(), num_channels=num_channels,
num_filters=num_filters, num_filters=num_filters,
filter_size=filter_size, filter_size=filter_size,
stride=stride, stride=stride,
...@@ -81,7 +81,7 @@ class ConvBNLayer(fluid.dygraph.Layer): ...@@ -81,7 +81,7 @@ class ConvBNLayer(fluid.dygraph.Layer):
act=None, act=None,
bias_attr=None) bias_attr=None)
self._batch_norm = BatchNorm(self.full_name(), num_filters, act=act) self._batch_norm = BatchNorm(num_filters, act=act)
def forward(self, inputs): def forward(self, inputs):
y = self._conv(inputs) y = self._conv(inputs)
...@@ -94,8 +94,7 @@ class SqueezeExcitation(fluid.dygraph.Layer): ...@@ -94,8 +94,7 @@ class SqueezeExcitation(fluid.dygraph.Layer):
def __init__(self, name_scope, num_channels, reduction_ratio): def __init__(self, name_scope, num_channels, reduction_ratio):
super(SqueezeExcitation, self).__init__(name_scope) super(SqueezeExcitation, self).__init__(name_scope)
self._pool = Pool2D( self._pool = Pool2D(pool_size=0, pool_type='avg', global_pooling=True)
self.full_name(), pool_size=0, pool_type='avg', global_pooling=True)
self._squeeze = FC( self._squeeze = FC(
self.full_name(), self.full_name(),
size=num_channels // reduction_ratio, size=num_channels // reduction_ratio,
...@@ -119,25 +118,24 @@ class SqueezeExcitation(fluid.dygraph.Layer): ...@@ -119,25 +118,24 @@ class SqueezeExcitation(fluid.dygraph.Layer):
class BottleneckBlock(fluid.dygraph.Layer): class BottleneckBlock(fluid.dygraph.Layer):
def __init__(self, def __init__(self,
name_scope,
num_channels, num_channels,
num_filters, num_filters,
stride, stride,
cardinality, cardinality,
reduction_ratio, reduction_ratio,
shortcut=True): shortcut=True):
super(BottleneckBlock, self).__init__(name_scope) super(BottleneckBlock, self).__init__()
self.conv0 = ConvBNLayer( self.conv0 = ConvBNLayer(
self.full_name(), num_filters=num_filters, filter_size=1) num_channels=num_channels, num_filters=num_filters, filter_size=1)
self.conv1 = ConvBNLayer( self.conv1 = ConvBNLayer(
self.full_name(), num_channels=num_filters,
num_filters=num_filters, num_filters=num_filters,
filter_size=3, filter_size=3,
stride=stride, stride=stride,
groups=cardinality) groups=cardinality)
self.conv2 = ConvBNLayer( self.conv2 = ConvBNLayer(
self.full_name(), num_channels=num_filters,
num_filters=num_filters * 4, num_filters=num_filters * 4,
filter_size=1, filter_size=1,
act='relu') act='relu')
...@@ -149,7 +147,7 @@ class BottleneckBlock(fluid.dygraph.Layer): ...@@ -149,7 +147,7 @@ class BottleneckBlock(fluid.dygraph.Layer):
if not shortcut: if not shortcut:
self.short = ConvBNLayer( self.short = ConvBNLayer(
self.full_name(), num_channels=num_channels,
num_filters=num_filters * 4, num_filters=num_filters * 4,
filter_size=1, filter_size=1,
stride=stride) stride=stride)
...@@ -191,63 +189,51 @@ class SeResNeXt(fluid.dygraph.Layer): ...@@ -191,63 +189,51 @@ class SeResNeXt(fluid.dygraph.Layer):
depth = [3, 4, 6, 3] depth = [3, 4, 6, 3]
num_filters = [128, 256, 512, 1024] num_filters = [128, 256, 512, 1024]
self.conv0 = ConvBNLayer( self.conv0 = ConvBNLayer(
self.full_name(), num_channels=3,
num_filters=64, num_filters=64,
filter_size=7, filter_size=7,
stride=2, stride=2,
act='relu') act='relu')
self.pool = Pool2D( self.pool = Pool2D(
self.full_name(), pool_size=3, pool_stride=2, pool_padding=1, pool_type='max')
pool_size=3,
pool_stride=2,
pool_padding=1,
pool_type='max')
elif layers == 101: elif layers == 101:
cardinality = 32 cardinality = 32
reduction_ratio = 16 reduction_ratio = 16
depth = [3, 4, 23, 3] depth = [3, 4, 23, 3]
num_filters = [128, 256, 512, 1024] num_filters = [128, 256, 512, 1024]
self.conv0 = ConvBNLayer( self.conv0 = ConvBNLayer(
self.full_name(), num_channels=3,
num_filters=3, num_filters=3,
filter_size=7, filter_size=7,
stride=2, stride=2,
act='relu') act='relu')
self.pool = Pool2D( self.pool = Pool2D(
self.full_name(), pool_size=3, pool_stride=2, pool_padding=1, pool_type='max')
pool_size=3,
pool_stride=2,
pool_padding=1,
pool_type='max')
elif layers == 152: elif layers == 152:
cardinality = 64 cardinality = 64
reduction_ratio = 16 reduction_ratio = 16
depth = [3, 8, 36, 3] depth = [3, 8, 36, 3]
num_filters = [128, 256, 512, 1024] num_filters = [128, 256, 512, 1024]
self.conv0 = ConvBNLayer( self.conv0 = ConvBNLayer(
self.full_name(), num_channels=3,
num_filters=3, num_filters=3,
filter_size=7, filter_size=7,
stride=2, stride=2,
act='relu') act='relu')
self.conv1 = ConvBNLayer( self.conv1 = ConvBNLayer(
self.full_name(), num_channels=3,
num_filters=3, num_filters=3,
filter_size=7, filter_size=7,
stride=2, stride=2,
act='relu') act='relu')
self.conv2 = ConvBNLayer( self.conv2 = ConvBNLayer(
self.full_name(), num_channels=7,
num_filters=3, num_filters=3,
filter_size=7, filter_size=7,
stride=2, stride=2,
act='relu') act='relu')
self.pool = Pool2D( self.pool = Pool2D(
self.full_name(), pool_size=3, pool_stride=2, pool_padding=1, pool_type='max')
pool_size=3,
pool_stride=2,
pool_padding=1,
pool_type='max')
self.bottleneck_block_list = [] self.bottleneck_block_list = []
num_channels = 64 num_channels = 64
...@@ -257,7 +243,6 @@ class SeResNeXt(fluid.dygraph.Layer): ...@@ -257,7 +243,6 @@ class SeResNeXt(fluid.dygraph.Layer):
bottleneck_block = self.add_sublayer( bottleneck_block = self.add_sublayer(
'bb_%d_%d' % (block, i), 'bb_%d_%d' % (block, i),
BottleneckBlock( BottleneckBlock(
self.full_name(),
num_channels=num_channels, num_channels=num_channels,
num_filters=num_filters[block], num_filters=num_filters[block],
stride=2 if i == 0 and block != 0 else 1, stride=2 if i == 0 and block != 0 else 1,
...@@ -269,7 +254,7 @@ class SeResNeXt(fluid.dygraph.Layer): ...@@ -269,7 +254,7 @@ class SeResNeXt(fluid.dygraph.Layer):
shortcut = True shortcut = True
self.pool2d_avg = Pool2D( self.pool2d_avg = Pool2D(
self.full_name(), pool_size=7, pool_type='avg', global_pooling=True) pool_size=7, pool_type='avg', global_pooling=True)
import math import math
stdv = 1.0 / math.sqrt(2048 * 1.0) stdv = 1.0 / math.sqrt(2048 * 1.0)
......
...@@ -350,13 +350,12 @@ pos_inp2 = position_encoding_init(ModelHyperParams.max_length, ...@@ -350,13 +350,12 @@ pos_inp2 = position_encoding_init(ModelHyperParams.max_length,
class PrePostProcessLayer(Layer): class PrePostProcessLayer(Layer):
def __init__(self, name_scope, process_cmd, shape_len=None): def __init__(self, d_model, process_cmd, shape_len=None):
super(PrePostProcessLayer, self).__init__(name_scope) super(PrePostProcessLayer, self).__init__()
for cmd in process_cmd: for cmd in process_cmd:
if cmd == "n": if cmd == "n":
self._layer_norm = LayerNorm( self._layer_norm = LayerNorm(
name_scope=self.full_name(), normalized_shape=d_model,
begin_norm_axis=shape_len - 1,
param_attr=fluid.ParamAttr( param_attr=fluid.ParamAttr(
initializer=fluid.initializer.Constant(1.)), initializer=fluid.initializer.Constant(1.)),
bias_attr=fluid.ParamAttr( bias_attr=fluid.ParamAttr(
...@@ -508,19 +507,19 @@ class EncoderSubLayer(Layer): ...@@ -508,19 +507,19 @@ class EncoderSubLayer(Layer):
self._postprocess_cmd = postprocess_cmd self._postprocess_cmd = postprocess_cmd
self._prepostprocess_dropout = prepostprocess_dropout self._prepostprocess_dropout = prepostprocess_dropout
self._preprocess_layer = PrePostProcessLayer(self.full_name(), self._preprocess_layer = PrePostProcessLayer(d_model,
self._preprocess_cmd, 3) self._preprocess_cmd, 3)
self._multihead_attention_layer = MultiHeadAttentionLayer( self._multihead_attention_layer = MultiHeadAttentionLayer(
self.full_name(), d_key, d_value, d_model, n_head, self.full_name(), d_key, d_value, d_model, n_head,
attention_dropout) attention_dropout)
self._postprocess_layer = PrePostProcessLayer( self._postprocess_layer = PrePostProcessLayer(
self.full_name(), self._postprocess_cmd, None) d_model, self._postprocess_cmd, None)
self._preprocess_layer2 = PrePostProcessLayer(self.full_name(), self._preprocess_layer2 = PrePostProcessLayer(d_model,
self._preprocess_cmd, 3) self._preprocess_cmd, 3)
self._positionwise_feed_forward = PositionwiseFeedForwardLayer( self._positionwise_feed_forward = PositionwiseFeedForwardLayer(
self.full_name(), d_inner_hid, d_model, relu_dropout) self.full_name(), d_inner_hid, d_model, relu_dropout)
self._postprocess_layer2 = PrePostProcessLayer( self._postprocess_layer2 = PrePostProcessLayer(
self.full_name(), self._postprocess_cmd, None) d_model, self._postprocess_cmd, None)
def forward(self, enc_input, attn_bias): def forward(self, enc_input, attn_bias):
pre_process_multihead = self._preprocess_layer( pre_process_multihead = self._preprocess_layer(
...@@ -559,7 +558,7 @@ class EncoderLayer(Layer): ...@@ -559,7 +558,7 @@ class EncoderLayer(Layer):
self._encoder_sublayers = list() self._encoder_sublayers = list()
self._prepostprocess_dropout = prepostprocess_dropout self._prepostprocess_dropout = prepostprocess_dropout
self._n_layer = n_layer self._n_layer = n_layer
self._preprocess_layer = PrePostProcessLayer(self.full_name(), self._preprocess_layer = PrePostProcessLayer(d_model,
self._preprocess_cmd, 3) self._preprocess_cmd, 3)
for i in range(n_layer): for i in range(n_layer):
self._encoder_sublayers.append( self._encoder_sublayers.append(
...@@ -595,7 +594,6 @@ class PrepareEncoderDecoderLayer(Layer): ...@@ -595,7 +594,6 @@ class PrepareEncoderDecoderLayer(Layer):
self._src_vocab_size = src_vocab_size self._src_vocab_size = src_vocab_size
self._dropout_rate = dropout_rate self._dropout_rate = dropout_rate
self._input_emb = Embedding( self._input_emb = Embedding(
name_scope=self.full_name(),
size=[src_vocab_size, src_emb_dim], size=[src_vocab_size, src_emb_dim],
is_sparse=is_sparse, is_sparse=is_sparse,
padding_idx=0, padding_idx=0,
...@@ -608,7 +606,6 @@ class PrepareEncoderDecoderLayer(Layer): ...@@ -608,7 +606,6 @@ class PrepareEncoderDecoderLayer(Layer):
else: else:
pos_inp = pos_inp2 pos_inp = pos_inp2
self._pos_emb = Embedding( self._pos_emb = Embedding(
name_scope=self.full_name(),
size=[self._src_max_len, src_emb_dim], size=[self._src_max_len, src_emb_dim],
is_sparse=is_sparse, is_sparse=is_sparse,
param_attr=fluid.ParamAttr( param_attr=fluid.ParamAttr(
...@@ -698,8 +695,8 @@ class DecoderSubLayer(Layer): ...@@ -698,8 +695,8 @@ class DecoderSubLayer(Layer):
self._postprocess_cmd = postprocess_cmd self._postprocess_cmd = postprocess_cmd
self._preprocess_cmd = preprocess_cmd self._preprocess_cmd = preprocess_cmd
self._prepostprcess_dropout = prepostprocess_dropout self._prepostprcess_dropout = prepostprocess_dropout
self._pre_process_layer = PrePostProcessLayer(self.full_name(), self._pre_process_layer = PrePostProcessLayer(d_model, preprocess_cmd,
preprocess_cmd, 3) 3)
self._multihead_attention_layer = MultiHeadAttentionLayer( self._multihead_attention_layer = MultiHeadAttentionLayer(
self.full_name(), self.full_name(),
d_key, d_key,
...@@ -709,10 +706,10 @@ class DecoderSubLayer(Layer): ...@@ -709,10 +706,10 @@ class DecoderSubLayer(Layer):
attention_dropout, attention_dropout,
cache=cache, cache=cache,
gather_idx=gather_idx) gather_idx=gather_idx)
self._post_process_layer = PrePostProcessLayer(self.full_name(), self._post_process_layer = PrePostProcessLayer(d_model, postprocess_cmd,
postprocess_cmd, None) None)
self._pre_process_layer2 = PrePostProcessLayer(self.full_name(), self._pre_process_layer2 = PrePostProcessLayer(d_model, preprocess_cmd,
preprocess_cmd, 3) 3)
self._multihead_attention_layer2 = MultiHeadAttentionLayer( self._multihead_attention_layer2 = MultiHeadAttentionLayer(
self.full_name(), self.full_name(),
d_key, d_key,
...@@ -723,13 +720,13 @@ class DecoderSubLayer(Layer): ...@@ -723,13 +720,13 @@ class DecoderSubLayer(Layer):
cache=cache, cache=cache,
gather_idx=gather_idx, gather_idx=gather_idx,
static_kv=True) static_kv=True)
self._post_process_layer2 = PrePostProcessLayer(self.full_name(), self._post_process_layer2 = PrePostProcessLayer(d_model,
postprocess_cmd, None) postprocess_cmd, None)
self._pre_process_layer3 = PrePostProcessLayer(self.full_name(), self._pre_process_layer3 = PrePostProcessLayer(d_model, preprocess_cmd,
preprocess_cmd, 3) 3)
self._positionwise_feed_forward_layer = PositionwiseFeedForwardLayer( self._positionwise_feed_forward_layer = PositionwiseFeedForwardLayer(
self.full_name(), d_inner_hid, d_model, relu_dropout) self.full_name(), d_inner_hid, d_model, relu_dropout)
self._post_process_layer3 = PrePostProcessLayer(self.full_name(), self._post_process_layer3 = PrePostProcessLayer(d_model,
postprocess_cmd, None) postprocess_cmd, None)
def forward(self, dec_input, enc_output, slf_attn_bias, dec_enc_attn_bias): def forward(self, dec_input, enc_output, slf_attn_bias, dec_enc_attn_bias):
...@@ -775,8 +772,8 @@ class DecoderLayer(Layer): ...@@ -775,8 +772,8 @@ class DecoderLayer(Layer):
caches=None, caches=None,
gather_idx=None): gather_idx=None):
super(DecoderLayer, self).__init__(name_scope) super(DecoderLayer, self).__init__(name_scope)
self._pre_process_layer = PrePostProcessLayer(self.full_name(), self._pre_process_layer = PrePostProcessLayer(d_model, preprocess_cmd,
preprocess_cmd, 3) 3)
self._decoder_sub_layers = list() self._decoder_sub_layers = list()
self._n_layer = n_layer self._n_layer = n_layer
self._preprocess_cmd = preprocess_cmd self._preprocess_cmd = preprocess_cmd
......
...@@ -222,7 +222,7 @@ class TestLayer(LayerTest): ...@@ -222,7 +222,7 @@ class TestLayer(LayerTest):
dtype='float32', dtype='float32',
append_batch_size=False) append_batch_size=False)
lm = nn.LayerNorm( lm = nn.LayerNorm(
'layer_norm', normalized_shape=[32, 32],
bias_attr=fluid.initializer.ConstantInitializer(value=1), bias_attr=fluid.initializer.ConstantInitializer(value=1),
act='sigmoid') act='sigmoid')
ret = lm(t) ret = lm(t)
...@@ -230,14 +230,14 @@ class TestLayer(LayerTest): ...@@ -230,14 +230,14 @@ class TestLayer(LayerTest):
feed={'data': inp}, fetch_list=[ret])[0] feed={'data': inp}, fetch_list=[ret])[0]
with self.dynamic_graph(): with self.dynamic_graph():
lm = nn.LayerNorm( lm = nn.LayerNorm(
'layer_norm', normalized_shape=[32, 32],
bias_attr=fluid.initializer.ConstantInitializer(value=1), bias_attr=fluid.initializer.ConstantInitializer(value=1),
act='sigmoid') act='sigmoid')
dy_ret = lm(base.to_variable(inp)) dy_ret = lm(base.to_variable(inp))
dy_ret_value = dy_ret.numpy() dy_ret_value = dy_ret.numpy()
with self.dynamic_graph(): with self.dynamic_graph():
lm = nn.LayerNorm( lm = nn.LayerNorm(
'layer_norm', normalized_shape=[32, 32],
shift=False, shift=False,
scale=False, scale=False,
param_attr=fluid.initializer.ConstantInitializer(value=1), param_attr=fluid.initializer.ConstantInitializer(value=1),
...@@ -251,6 +251,14 @@ class TestLayer(LayerTest): ...@@ -251,6 +251,14 @@ class TestLayer(LayerTest):
self.assertTrue(np.array_equal(static_ret, static_ret2)) self.assertTrue(np.array_equal(static_ret, static_ret2))
self.assertTrue(np.array_equal(dy_ret_value, static_ret2)) self.assertTrue(np.array_equal(dy_ret_value, static_ret2))
with self.dynamic_graph():
lm = nn.LayerNorm(
normalized_shape=[16, 32],
bias_attr=fluid.initializer.ConstantInitializer(value=1),
act='sigmoid')
with self.assertRaises(ValueError):
lm(base.to_variable(inp))
def test_relu(self): def test_relu(self):
with self.static_graph(): with self.static_graph():
t = layers.data(name='t', shape=[3, 3], dtype='float32') t = layers.data(name='t', shape=[3, 3], dtype='float32')
...@@ -299,7 +307,8 @@ class TestLayer(LayerTest): ...@@ -299,7 +307,8 @@ class TestLayer(LayerTest):
with self.static_graph(): with self.static_graph():
images = layers.data(name='pixel', shape=[3, 5, 5], dtype='float32') images = layers.data(name='pixel', shape=[3, 5, 5], dtype='float32')
conv2d = nn.Conv2D('conv2d', num_filters=3, filter_size=[2, 2]) conv2d = nn.Conv2D(
num_channels=3, num_filters=3, filter_size=[2, 2])
ret = conv2d(images) ret = conv2d(images)
static_ret2 = self.get_static_graph_result( static_ret2 = self.get_static_graph_result(
feed={'pixel': np.ones( feed={'pixel': np.ones(
...@@ -308,14 +317,18 @@ class TestLayer(LayerTest): ...@@ -308,14 +317,18 @@ class TestLayer(LayerTest):
with self.dynamic_graph(): with self.dynamic_graph():
images = np.ones([2, 3, 5, 5], dtype='float32') images = np.ones([2, 3, 5, 5], dtype='float32')
conv2d = nn.Conv2D('conv2d', num_filters=3, filter_size=[2, 2]) conv2d = nn.Conv2D(
num_channels=3, num_filters=3, filter_size=[2, 2])
dy_ret = conv2d(base.to_variable(images)) dy_ret = conv2d(base.to_variable(images))
dy_ret_value = dy_ret.numpy() dy_ret_value = dy_ret.numpy()
with self.dynamic_graph(): with self.dynamic_graph():
images = np.ones([2, 3, 5, 5], dtype='float32') images = np.ones([2, 3, 5, 5], dtype='float32')
conv2d = nn.Conv2D( conv2d = nn.Conv2D(
'conv2d', num_filters=3, filter_size=[2, 2], bias_attr=False) num_channels=3,
num_filters=3,
filter_size=[2, 2],
bias_attr=False)
dy_ret = conv2d(base.to_variable(images)) dy_ret = conv2d(base.to_variable(images))
self.assertTrue(conv2d._bias_param is None) self.assertTrue(conv2d._bias_param is None)
...@@ -328,9 +341,10 @@ class TestLayer(LayerTest): ...@@ -328,9 +341,10 @@ class TestLayer(LayerTest):
weight_attr = fluid.ParamAttr( weight_attr = fluid.ParamAttr(
initializer=fluid.initializer.NumpyArrayInitializer( initializer=fluid.initializer.NumpyArrayInitializer(
custom_weight)) custom_weight))
conv2d1 = nn.Conv2D('conv2d1', num_filters=3, filter_size=[2, 2]) conv2d1 = nn.Conv2D(
num_channels=3, num_filters=3, filter_size=[2, 2])
conv2d2 = nn.Conv2D( conv2d2 = nn.Conv2D(
'conv2d2', num_channels=3,
num_filters=3, num_filters=3,
filter_size=[2, 2], filter_size=[2, 2],
param_attr=weight_attr) param_attr=weight_attr)
...@@ -381,7 +395,7 @@ class TestLayer(LayerTest): ...@@ -381,7 +395,7 @@ class TestLayer(LayerTest):
hidden = layers.data(name='hidden', shape=[-1, D], dtype='float32') hidden = layers.data(name='hidden', shape=[-1, D], dtype='float32')
updated_hidden, reset_hidden_pre, gate = layers.gru_unit( updated_hidden, reset_hidden_pre, gate = layers.gru_unit(
input=x, hidden=hidden, size=D * 3) input=x, hidden=hidden, size=D * 3)
gru = nn.GRUUnit('gru', size=D * 3) gru = nn.GRUUnit(size=D * 3)
updated_hidden, reset_hidden_pre, gate = gru(x, hidden) updated_hidden, reset_hidden_pre, gate = gru(x, hidden)
static_ret2 = self.get_static_graph_result( static_ret2 = self.get_static_graph_result(
...@@ -390,7 +404,7 @@ class TestLayer(LayerTest): ...@@ -390,7 +404,7 @@ class TestLayer(LayerTest):
fetch_list=[updated_hidden, reset_hidden_pre, gate]) fetch_list=[updated_hidden, reset_hidden_pre, gate])
with self.dynamic_graph(): with self.dynamic_graph():
gru = nn.GRUUnit('gru', size=D * 3) gru = nn.GRUUnit(size=D * 3)
dy_ret = gru( dy_ret = gru(
base.to_variable(input), base.to_variable(hidden_input)) base.to_variable(input), base.to_variable(hidden_input))
dy_ret_value = [] dy_ret_value = []
...@@ -406,8 +420,8 @@ class TestLayer(LayerTest): ...@@ -406,8 +420,8 @@ class TestLayer(LayerTest):
weight_attr = fluid.ParamAttr( weight_attr = fluid.ParamAttr(
initializer=fluid.initializer.NumpyArrayInitializer( initializer=fluid.initializer.NumpyArrayInitializer(
custom_weight)) custom_weight))
gru1 = nn.GRUUnit('gru1', size=D * 3) gru1 = nn.GRUUnit(size=D * 3)
gru2 = nn.GRUUnit('gru2', size=D * 3, param_attr=weight_attr) gru2 = nn.GRUUnit(size=D * 3, param_attr=weight_attr)
dy_ret1 = gru1( dy_ret1 = gru1(
base.to_variable(input), base.to_variable(hidden_input)) base.to_variable(input), base.to_variable(hidden_input))
dy_ret2 = gru2( dy_ret2 = gru2(
...@@ -539,7 +553,7 @@ class TestLayer(LayerTest): ...@@ -539,7 +553,7 @@ class TestLayer(LayerTest):
out = layers.conv2d_transpose( out = layers.conv2d_transpose(
input=img, input=img,
num_filters=10, num_filters=10,
output_size=28, filter_size=27,
act='sigmoid', act='sigmoid',
bias_attr=fluid.initializer.ConstantInitializer(value=1)) bias_attr=fluid.initializer.ConstantInitializer(value=1))
static_rlt = self.get_static_graph_result( static_rlt = self.get_static_graph_result(
...@@ -547,9 +561,9 @@ class TestLayer(LayerTest): ...@@ -547,9 +561,9 @@ class TestLayer(LayerTest):
with self.static_graph(): with self.static_graph():
img = layers.data(name='pixel', shape=[3, 2, 2], dtype='float32') img = layers.data(name='pixel', shape=[3, 2, 2], dtype='float32')
conv2d_transpose = nn.Conv2DTranspose( conv2d_transpose = nn.Conv2DTranspose(
'conv2d_transpose', num_channels=3,
num_filters=10, num_filters=10,
output_size=28, filter_size=27,
act='sigmoid', act='sigmoid',
bias_attr=fluid.initializer.ConstantInitializer(value=1)) bias_attr=fluid.initializer.ConstantInitializer(value=1))
out = conv2d_transpose(img) out = conv2d_transpose(img)
...@@ -557,9 +571,9 @@ class TestLayer(LayerTest): ...@@ -557,9 +571,9 @@ class TestLayer(LayerTest):
feed={'pixel': inp_np}, fetch_list=[out])[0] feed={'pixel': inp_np}, fetch_list=[out])[0]
with self.dynamic_graph(): with self.dynamic_graph():
conv2d_transpose = nn.Conv2DTranspose( conv2d_transpose = nn.Conv2DTranspose(
'conv2d_transpose', num_channels=3,
num_filters=10, num_filters=10,
output_size=28, filter_size=27,
act='sigmoid', act='sigmoid',
bias_attr=fluid.initializer.ConstantInitializer(value=1)) bias_attr=fluid.initializer.ConstantInitializer(value=1))
dy_rlt = conv2d_transpose(base.to_variable(inp_np)) dy_rlt = conv2d_transpose(base.to_variable(inp_np))
...@@ -574,9 +588,9 @@ class TestLayer(LayerTest): ...@@ -574,9 +588,9 @@ class TestLayer(LayerTest):
initializer=fluid.initializer.NumpyArrayInitializer( initializer=fluid.initializer.NumpyArrayInitializer(
custom_weight)) custom_weight))
conv2d1 = nn.Conv2DTranspose( conv2d1 = nn.Conv2DTranspose(
'conv2d1', num_filters=3, filter_size=[2, 2]) num_channels=3, num_filters=3, filter_size=[2, 2])
conv2d2 = nn.Conv2DTranspose( conv2d2 = nn.Conv2DTranspose(
'conv2d2', num_channels=3,
num_filters=3, num_filters=3,
filter_size=[2, 2], filter_size=[2, 2],
param_attr=weight_attr) param_attr=weight_attr)
...@@ -641,7 +655,8 @@ class TestLayer(LayerTest): ...@@ -641,7 +655,8 @@ class TestLayer(LayerTest):
dtype="float32", dtype="float32",
append_batch_size=False) append_batch_size=False)
btp = nn.BilinearTensorProduct( btp = nn.BilinearTensorProduct(
'btp', 3,
3,
6, 6,
bias_attr=fluid.initializer.ConstantInitializer(value=1), bias_attr=fluid.initializer.ConstantInitializer(value=1),
act='sigmoid') act='sigmoid')
...@@ -651,14 +666,15 @@ class TestLayer(LayerTest): ...@@ -651,14 +666,15 @@ class TestLayer(LayerTest):
'y': inp_np_y}, fetch_list=[out])[0] 'y': inp_np_y}, fetch_list=[out])[0]
with self.dynamic_graph(): with self.dynamic_graph():
btp = nn.BilinearTensorProduct( btp = nn.BilinearTensorProduct(
'btp', 3,
3,
6, 6,
bias_attr=fluid.initializer.ConstantInitializer(value=1), bias_attr=fluid.initializer.ConstantInitializer(value=1),
act='sigmoid') act='sigmoid')
dy_rlt = btp(base.to_variable(inp_np_x), base.to_variable(inp_np_y)) dy_rlt = btp(base.to_variable(inp_np_x), base.to_variable(inp_np_y))
dy_rlt_value = dy_rlt.numpy() dy_rlt_value = dy_rlt.numpy()
with self.dynamic_graph(): with self.dynamic_graph():
btp2 = nn.BilinearTensorProduct('btp', 6, act='sigmoid') btp2 = nn.BilinearTensorProduct(3, 3, 6, act='sigmoid')
dy_rlt2 = btp2( dy_rlt2 = btp2(
base.to_variable(inp_np_x), base.to_variable(inp_np_y)) base.to_variable(inp_np_x), base.to_variable(inp_np_y))
dy_rlt2_value = dy_rlt2.numpy() dy_rlt2_value = dy_rlt2.numpy()
...@@ -689,9 +705,9 @@ class TestLayer(LayerTest): ...@@ -689,9 +705,9 @@ class TestLayer(LayerTest):
weight_attr = fluid.ParamAttr( weight_attr = fluid.ParamAttr(
initializer=fluid.initializer.NumpyArrayInitializer( initializer=fluid.initializer.NumpyArrayInitializer(
custom_weight)) custom_weight))
btp1 = nn.BilinearTensorProduct('btp1', 6, act='sigmoid') btp1 = nn.BilinearTensorProduct(3, 3, 6, act='sigmoid')
btp2 = nn.BilinearTensorProduct( btp2 = nn.BilinearTensorProduct(
'btp2', 6, act='sigmoid', param_attr=weight_attr) 3, 3, 6, act='sigmoid', param_attr=weight_attr)
dy_rlt1 = btp1( dy_rlt1 = btp1(
base.to_variable(inp_np_x), base.to_variable(inp_np_y)) base.to_variable(inp_np_x), base.to_variable(inp_np_y))
dy_rlt2 = btp2( dy_rlt2 = btp2(
...@@ -732,8 +748,8 @@ class TestLayer(LayerTest): ...@@ -732,8 +748,8 @@ class TestLayer(LayerTest):
dtype="float32", dtype="float32",
append_batch_size=False) append_batch_size=False)
prelu = nn.PRelu( prelu = nn.PRelu(
'prelu',
mode=mode, mode=mode,
input_shape=data_t.shape,
param_attr=ParamAttr(initializer=Constant(1.0))) param_attr=ParamAttr(initializer=Constant(1.0)))
out = prelu(data_t) out = prelu(data_t)
static_rlt2 = self.get_static_graph_result( static_rlt2 = self.get_static_graph_result(
...@@ -741,8 +757,8 @@ class TestLayer(LayerTest): ...@@ -741,8 +757,8 @@ class TestLayer(LayerTest):
with self.dynamic_graph(): with self.dynamic_graph():
prelu = nn.PRelu( prelu = nn.PRelu(
'prelu',
mode=mode, mode=mode,
input_shape=inp_np.shape,
param_attr=ParamAttr(initializer=Constant(1.0))) param_attr=ParamAttr(initializer=Constant(1.0)))
dy_rlt = prelu(base.to_variable(inp_np)) dy_rlt = prelu(base.to_variable(inp_np))
dy_rlt_value = dy_rlt.numpy() dy_rlt_value = dy_rlt.numpy()
...@@ -754,12 +770,12 @@ class TestLayer(LayerTest): ...@@ -754,12 +770,12 @@ class TestLayer(LayerTest):
inp_np = np.random.randn(5, 200, 100, 100).astype("float32") inp_np = np.random.randn(5, 200, 100, 100).astype("float32")
inp = base.to_variable(inp_np) inp = base.to_variable(inp_np)
prelu1 = nn.PRelu( prelu1 = nn.PRelu(
'prelu1',
mode=mode, mode=mode,
input_shape=inp_np.shape,
param_attr=ParamAttr(initializer=Constant(2.0))) param_attr=ParamAttr(initializer=Constant(2.0)))
prelu2 = nn.PRelu( prelu2 = nn.PRelu(
'prelu2',
mode=mode, mode=mode,
input_shape=inp_np.shape,
param_attr=ParamAttr(initializer=Constant(1.0))) param_attr=ParamAttr(initializer=Constant(1.0)))
dy_rlt1 = prelu1(inp) dy_rlt1 = prelu1(inp)
dy_rlt2 = prelu2(inp) dy_rlt2 = prelu2(inp)
...@@ -795,19 +811,13 @@ class TestLayer(LayerTest): ...@@ -795,19 +811,13 @@ class TestLayer(LayerTest):
with self.static_graph(): with self.static_graph():
data_t = layers.data(name='word', shape=[1], dtype='int64') data_t = layers.data(name='word', shape=[1], dtype='int64')
emb2 = nn.Embedding( emb2 = nn.Embedding(
name_scope='embedding', size=[dict_size, 32], param_attr='emb.w', is_sparse=False)
size=[dict_size, 32],
param_attr='emb.w',
is_sparse=False)
emb_rlt = emb2(data_t) emb_rlt = emb2(data_t)
static_rlt2 = self.get_static_graph_result( static_rlt2 = self.get_static_graph_result(
feed={'word': inp_word}, fetch_list=[emb_rlt])[0] feed={'word': inp_word}, fetch_list=[emb_rlt])[0]
with self.dynamic_graph(): with self.dynamic_graph():
emb2 = nn.Embedding( emb2 = nn.Embedding(
name_scope='embedding', size=[dict_size, 32], param_attr='emb.w', is_sparse=False)
size=[dict_size, 32],
param_attr='emb.w',
is_sparse=False)
dy_rlt = emb2(base.to_variable(inp_word)) dy_rlt = emb2(base.to_variable(inp_word))
dy_rlt_value = dy_rlt.numpy() dy_rlt_value = dy_rlt.numpy()
...@@ -819,13 +829,9 @@ class TestLayer(LayerTest): ...@@ -819,13 +829,9 @@ class TestLayer(LayerTest):
weight_attr = fluid.ParamAttr( weight_attr = fluid.ParamAttr(
initializer=fluid.initializer.NumpyArrayInitializer( initializer=fluid.initializer.NumpyArrayInitializer(
custom_weight)) custom_weight))
emb1 = nn.Embedding( emb1 = nn.Embedding(size=[dict_size, 32], is_sparse=False)
name_scope='embedding', size=[dict_size, 32], is_sparse=False)
emb2 = nn.Embedding( emb2 = nn.Embedding(
name_scope='embedding', size=[dict_size, 32], param_attr=weight_attr, is_sparse=False)
size=[dict_size, 32],
param_attr=weight_attr,
is_sparse=False)
rep1 = emb1(base.to_variable(inp_word)) rep1 = emb1(base.to_variable(inp_word))
rep2 = emb2(base.to_variable(inp_word)) rep2 = emb2(base.to_variable(inp_word))
self.assertFalse(np.array_equal(emb1.weight.numpy(), custom_weight)) self.assertFalse(np.array_equal(emb1.weight.numpy(), custom_weight))
...@@ -892,10 +898,7 @@ class TestLayer(LayerTest): ...@@ -892,10 +898,7 @@ class TestLayer(LayerTest):
sample_weights = layers.fill_constant( sample_weights = layers.fill_constant(
shape=[5, 1], dtype='float32', value=1) shape=[5, 1], dtype='float32', value=1)
emb = nn.Embedding( emb = nn.Embedding(
'embedding', size=[dict_size, 32], param_attr='emb.w', is_sparse=False)
size=[dict_size, 32],
param_attr='emb.w',
is_sparse=False)
embs2 = [] embs2 = []
for i in range(window_size): for i in range(window_size):
...@@ -906,8 +909,8 @@ class TestLayer(LayerTest): ...@@ -906,8 +909,8 @@ class TestLayer(LayerTest):
embs2.append(emb_rlt) embs2.append(emb_rlt)
embs2 = layers.concat(input=embs2, axis=1) embs2 = layers.concat(input=embs2, axis=1)
nce = nn.NCE('nce', nce = nn.NCE(num_total_classes=dict_size,
num_total_classes=dict_size, dim=embs2.shape[1],
num_neg_samples=2, num_neg_samples=2,
sampler="custom_dist", sampler="custom_dist",
custom_dist=nid_freq_arr.tolist(), custom_dist=nid_freq_arr.tolist(),
...@@ -932,10 +935,7 @@ class TestLayer(LayerTest): ...@@ -932,10 +935,7 @@ class TestLayer(LayerTest):
sample_weights = layers.fill_constant( sample_weights = layers.fill_constant(
shape=[5, 1], dtype='float32', value=1) shape=[5, 1], dtype='float32', value=1)
emb = nn.Embedding( emb = nn.Embedding(
'embedding', size=[dict_size, 32], param_attr='emb.w', is_sparse=False)
size=[dict_size, 32],
param_attr='emb.w',
is_sparse=False)
embs3 = [] embs3 = []
for i in range(window_size): for i in range(window_size):
...@@ -946,8 +946,8 @@ class TestLayer(LayerTest): ...@@ -946,8 +946,8 @@ class TestLayer(LayerTest):
embs3.append(emb_rlt) embs3.append(emb_rlt)
embs3 = layers.concat(input=embs3, axis=1) embs3 = layers.concat(input=embs3, axis=1)
nce = nn.NCE('nce', nce = nn.NCE(num_total_classes=dict_size,
num_total_classes=dict_size, dim=embs3.shape[1],
num_neg_samples=2, num_neg_samples=2,
sampler="custom_dist", sampler="custom_dist",
custom_dist=nid_freq_arr.tolist(), custom_dist=nid_freq_arr.tolist(),
...@@ -974,10 +974,7 @@ class TestLayer(LayerTest): ...@@ -974,10 +974,7 @@ class TestLayer(LayerTest):
sample_weights = layers.fill_constant( sample_weights = layers.fill_constant(
shape=[5, 1], dtype='float32', value=1) shape=[5, 1], dtype='float32', value=1)
emb = nn.Embedding( emb = nn.Embedding(
'embedding', size=[dict_size, 32], param_attr='emb.w', is_sparse=False)
size=[dict_size, 32],
param_attr='emb.w',
is_sparse=False)
embs3 = [] embs3 = []
for i in range(window_size): for i in range(window_size):
...@@ -988,8 +985,8 @@ class TestLayer(LayerTest): ...@@ -988,8 +985,8 @@ class TestLayer(LayerTest):
embs3.append(emb_rlt) embs3.append(emb_rlt)
embs3 = layers.concat(input=embs3, axis=1) embs3 = layers.concat(input=embs3, axis=1)
nce1 = nn.NCE('nce1', nce1 = nn.NCE(num_total_classes=dict_size,
num_total_classes=dict_size, dim=embs3.shape[1],
num_neg_samples=2, num_neg_samples=2,
sampler="custom_dist", sampler="custom_dist",
custom_dist=nid_freq_arr.tolist(), custom_dist=nid_freq_arr.tolist(),
...@@ -998,13 +995,13 @@ class TestLayer(LayerTest): ...@@ -998,13 +995,13 @@ class TestLayer(LayerTest):
bias_attr='nce1.b', bias_attr='nce1.b',
sample_weight=sample_weights) sample_weight=sample_weights)
nce2 = nn.NCE('nce2', nce2 = nn.NCE(num_total_classes=dict_size,
param_attr=weight_attr, dim=embs3.shape[1],
num_total_classes=dict_size,
num_neg_samples=2, num_neg_samples=2,
sampler="custom_dist", sampler="custom_dist",
custom_dist=nid_freq_arr.tolist(), custom_dist=nid_freq_arr.tolist(),
seed=seed, seed=seed,
param_attr=weight_attr,
bias_attr='nce2.b', bias_attr='nce2.b',
sample_weight=sample_weights) sample_weight=sample_weights)
...@@ -1040,7 +1037,7 @@ class TestLayer(LayerTest): ...@@ -1040,7 +1037,7 @@ class TestLayer(LayerTest):
with self.static_graph(): with self.static_graph():
images = layers.data( images = layers.data(
name='pixel', shape=[3, 6, 6, 6], dtype='float32') name='pixel', shape=[3, 6, 6, 6], dtype='float32')
conv3d = nn.Conv3D('conv3d', num_filters=3, filter_size=2) conv3d = nn.Conv3D(num_channels=3, num_filters=3, filter_size=2)
ret = conv3d(images) ret = conv3d(images)
static_ret2 = self.get_static_graph_result( static_ret2 = self.get_static_graph_result(
feed={'pixel': np.ones( feed={'pixel': np.ones(
...@@ -1049,7 +1046,7 @@ class TestLayer(LayerTest): ...@@ -1049,7 +1046,7 @@ class TestLayer(LayerTest):
with self.dynamic_graph(): with self.dynamic_graph():
images = np.ones([2, 3, 6, 6, 6], dtype='float32') images = np.ones([2, 3, 6, 6, 6], dtype='float32')
conv3d = nn.Conv3D('conv3d', num_filters=3, filter_size=2) conv3d = nn.Conv3D(num_channels=3, num_filters=3, filter_size=2)
dy_ret = conv3d(base.to_variable(images)) dy_ret = conv3d(base.to_variable(images))
dy_rlt_value = dy_ret.numpy() dy_rlt_value = dy_ret.numpy()
...@@ -1062,9 +1059,12 @@ class TestLayer(LayerTest): ...@@ -1062,9 +1059,12 @@ class TestLayer(LayerTest):
weight_attr = fluid.ParamAttr( weight_attr = fluid.ParamAttr(
initializer=fluid.initializer.NumpyArrayInitializer( initializer=fluid.initializer.NumpyArrayInitializer(
custom_weight)) custom_weight))
conv3d1 = nn.Conv3D('conv3d1', num_filters=3, filter_size=2) conv3d1 = nn.Conv3D(num_channels=3, num_filters=3, filter_size=2)
conv3d2 = nn.Conv3D( conv3d2 = nn.Conv3D(
'conv3d2', num_filters=3, filter_size=2, param_attr=weight_attr) num_channels=3,
num_filters=3,
filter_size=2,
param_attr=weight_attr)
dy_ret1 = conv3d1(base.to_variable(images)) dy_ret1 = conv3d1(base.to_variable(images))
dy_ret2 = conv3d2(base.to_variable(images)) dy_ret2 = conv3d2(base.to_variable(images))
self.assertFalse(np.array_equal(dy_ret1.numpy(), dy_ret2.numpy())) self.assertFalse(np.array_equal(dy_ret1.numpy(), dy_ret2.numpy()))
...@@ -1165,7 +1165,7 @@ class TestLayer(LayerTest): ...@@ -1165,7 +1165,7 @@ class TestLayer(LayerTest):
dtype='float32', dtype='float32',
lod_level=1, lod_level=1,
append_batch_size=False) append_batch_size=False)
groupNorm = nn.GroupNorm('GroupNorm', groups=2) groupNorm = nn.GroupNorm(channels=shape[1], groups=2)
ret = groupNorm(X) ret = groupNorm(X)
static_ret2 = self.get_static_graph_result( static_ret2 = self.get_static_graph_result(
feed={ feed={
...@@ -1176,7 +1176,7 @@ class TestLayer(LayerTest): ...@@ -1176,7 +1176,7 @@ class TestLayer(LayerTest):
with_lod=True)[0] with_lod=True)[0]
with self.dynamic_graph(): with self.dynamic_graph():
groupNorm = nn.GroupNorm('GroupNorm', groups=2) groupNorm = nn.GroupNorm(channels=shape[1], groups=2)
dy_ret = groupNorm(base.to_variable(input)) dy_ret = groupNorm(base.to_variable(input))
dy_rlt_value = dy_ret.numpy() dy_rlt_value = dy_ret.numpy()
...@@ -1216,7 +1216,7 @@ class TestLayer(LayerTest): ...@@ -1216,7 +1216,7 @@ class TestLayer(LayerTest):
dtype='float32', dtype='float32',
lod_level=1, lod_level=1,
append_batch_size=False) append_batch_size=False)
spectralNorm = nn.SpectralNorm('SpectralNorm', dim=1, power_iters=2) spectralNorm = nn.SpectralNorm(shape, dim=1, power_iters=2)
ret = spectralNorm(Weight) ret = spectralNorm(Weight)
static_ret2 = self.get_static_graph_result( static_ret2 = self.get_static_graph_result(
feed={ feed={
...@@ -1227,7 +1227,7 @@ class TestLayer(LayerTest): ...@@ -1227,7 +1227,7 @@ class TestLayer(LayerTest):
with_lod=True)[0] with_lod=True)[0]
with self.dynamic_graph(): with self.dynamic_graph():
spectralNorm = nn.SpectralNorm('SpectralNorm', dim=1, power_iters=2) spectralNorm = nn.SpectralNorm(shape, dim=1, power_iters=2)
dy_ret = spectralNorm(base.to_variable(input)) dy_ret = spectralNorm(base.to_variable(input))
dy_rlt_value = dy_ret.numpy() dy_rlt_value = dy_ret.numpy()
...@@ -1286,7 +1286,7 @@ class TestLayer(LayerTest): ...@@ -1286,7 +1286,7 @@ class TestLayer(LayerTest):
lod_level=1, lod_level=1,
append_batch_size=False) append_batch_size=False)
treeConv = nn.TreeConv( treeConv = nn.TreeConv(
'TreeConv', output_size=6, num_filters=1, max_depth=2) feature_size=5, output_size=6, num_filters=1, max_depth=2)
ret = treeConv(NodesVector, EdgeSet) ret = treeConv(NodesVector, EdgeSet)
static_ret2 = self.get_static_graph_result( static_ret2 = self.get_static_graph_result(
feed={ feed={
...@@ -1300,7 +1300,7 @@ class TestLayer(LayerTest): ...@@ -1300,7 +1300,7 @@ class TestLayer(LayerTest):
with self.dynamic_graph(): with self.dynamic_graph():
treeConv = nn.TreeConv( treeConv = nn.TreeConv(
'SpectralNorm', output_size=6, num_filters=1, max_depth=2) feature_size=5, output_size=6, num_filters=1, max_depth=2)
dy_ret = treeConv(base.to_variable(vectors), base.to_variable(adj)) dy_ret = treeConv(base.to_variable(vectors), base.to_variable(adj))
dy_rlt_value = dy_ret.numpy() dy_rlt_value = dy_ret.numpy()
...@@ -1313,13 +1313,13 @@ class TestLayer(LayerTest): ...@@ -1313,13 +1313,13 @@ class TestLayer(LayerTest):
initializer=fluid.initializer.NumpyArrayInitializer( initializer=fluid.initializer.NumpyArrayInitializer(
custom_weight)) custom_weight))
treeConv1 = nn.TreeConv( treeConv1 = nn.TreeConv(
'SpectralNorm1', feature_size=5,
output_size=6, output_size=6,
num_filters=1, num_filters=1,
max_depth=2, max_depth=2,
bias_attr='tc1_b') bias_attr='tc1_b')
treeConv2 = nn.TreeConv( treeConv2 = nn.TreeConv(
'SpectralNorm2', feature_size=5,
output_size=6, output_size=6,
num_filters=1, num_filters=1,
max_depth=2, max_depth=2,
...@@ -1359,19 +1359,13 @@ class TestLayer(LayerTest): ...@@ -1359,19 +1359,13 @@ class TestLayer(LayerTest):
with self.static_graph(): with self.static_graph():
img = layers.data(name='pixel', shape=[3, 2, 2, 2], dtype='float32') img = layers.data(name='pixel', shape=[3, 2, 2, 2], dtype='float32')
conv3d_transpose = nn.Conv3DTranspose( conv3d_transpose = nn.Conv3DTranspose(
'Conv3DTranspose', num_channels=3, num_filters=12, filter_size=12, use_cudnn=False)
num_filters=12,
filter_size=12,
use_cudnn=False)
out = conv3d_transpose(img) out = conv3d_transpose(img)
static_rlt2 = self.get_static_graph_result( static_rlt2 = self.get_static_graph_result(
feed={'pixel': input_array}, fetch_list=[out])[0] feed={'pixel': input_array}, fetch_list=[out])[0]
with self.dynamic_graph(): with self.dynamic_graph():
conv3d_transpose = nn.Conv3DTranspose( conv3d_transpose = nn.Conv3DTranspose(
'Conv3DTranspose', num_channels=3, num_filters=12, filter_size=12, use_cudnn=False)
num_filters=12,
filter_size=12,
use_cudnn=False)
dy_rlt = conv3d_transpose(base.to_variable(input_array)) dy_rlt = conv3d_transpose(base.to_variable(input_array))
dy_rlt_value = dy_rlt.numpy() dy_rlt_value = dy_rlt.numpy()
self.assertTrue(np.allclose(static_rlt2, static_rlt)) self.assertTrue(np.allclose(static_rlt2, static_rlt))
...@@ -1384,13 +1378,13 @@ class TestLayer(LayerTest): ...@@ -1384,13 +1378,13 @@ class TestLayer(LayerTest):
initializer=fluid.initializer.NumpyArrayInitializer( initializer=fluid.initializer.NumpyArrayInitializer(
custom_weight)) custom_weight))
conv3d1 = nn.Conv3DTranspose( conv3d1 = nn.Conv3DTranspose(
'conv3d1', num_channels=3,
num_filters=3, num_filters=3,
filter_size=2, filter_size=2,
bias_attr='conv3d1_b', bias_attr='conv3d1_b',
use_cudnn=False) use_cudnn=False)
conv3d2 = nn.Conv3DTranspose( conv3d2 = nn.Conv3DTranspose(
'conv3d2', num_channels=3,
num_filters=3, num_filters=3,
filter_size=2, filter_size=2,
param_attr=weight_attr, param_attr=weight_attr,
......
...@@ -158,7 +158,6 @@ class PtbModel(fluid.Layer): ...@@ -158,7 +158,6 @@ class PtbModel(fluid.Layer):
init_scale=init_scale, init_scale=init_scale,
dropout=dropout) dropout=dropout)
self.embedding = Embedding( self.embedding = Embedding(
self.full_name(),
size=[vocab_size, hidden_size], size=[vocab_size, hidden_size],
dtype='float32', dtype='float32',
is_sparse=False, is_sparse=False,
......
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册