提交 f6144d84 编写于 作者: Y Youwei Song 提交者: hong

remove build_once & name_scope (#21131)

* remove build_once & name_scope (Conv2D)
test=develop

* fix unittest
test=develop

* Conv2DTranspose

* Conv3D & Conv3DTranspose
test=develop

* Pool2D & BatchNorm

* Embedding

* LayerNorm

* GRUUnit & NCE

* PRelu

* BilinearTensorProduct

* GroupNorm & SpectralNorm

* TreeConv
test=develop

* fix LayerNorm in transformer unnittest
test=develop

* disable LayerNorm or BatchNorm in multicard
test=develop

* refine Layer.create_parameter api
test=develop

* refine LayerNorm, remove begin_norm_axis param, add normed shape check
test=develop

* LayerNorm bug fix
test=develop
上级 0fe16539
...@@ -104,12 +104,12 @@ class BasicGRUUnit(Layer): ...@@ -104,12 +104,12 @@ class BasicGRUUnit(Layer):
dtype=self._dtype) dtype=self._dtype)
self._gate_bias = self.create_parameter( self._gate_bias = self.create_parameter(
self._bias_attr, attr=self._bias_attr,
shape=[2 * self._hiden_size], shape=[2 * self._hiden_size],
dtype=self._dtype, dtype=self._dtype,
is_bias=True) is_bias=True)
self._candidate_bias = self.create_parameter( self._candidate_bias = self.create_parameter(
self._bias_attr, attr=self._bias_attr,
shape=[self._hiden_size], shape=[self._hiden_size],
dtype=self._dtype, dtype=self._dtype,
is_bias=True) is_bias=True)
......
...@@ -48,7 +48,7 @@ def save_dygraph(state_dict, model_path): ...@@ -48,7 +48,7 @@ def save_dygraph(state_dict, model_path):
import paddle.fluid as fluid import paddle.fluid as fluid
with fluid.dygraph.guard(): with fluid.dygraph.guard():
emb = fluid.dygraph.Embedding( "emb", [10, 10]) emb = fluid.dygraph.Embedding([10, 10])
state_dict = emb.state_dict() state_dict = emb.state_dict()
fluid.save_dygraph( state_dict, "paddle_dy") fluid.save_dygraph( state_dict, "paddle_dy")
...@@ -91,7 +91,7 @@ def load_dygraph(model_path): ...@@ -91,7 +91,7 @@ def load_dygraph(model_path):
import paddle.fluid as fluid import paddle.fluid as fluid
with fluid.dygraph.guard(): with fluid.dygraph.guard():
emb = fluid.dygraph.Embedding( "emb", [10, 10]) emb = fluid.dygraph.Embedding([10, 10])
state_dict = emb.state_dict() state_dict = emb.state_dict()
fluid.save_dygraph( state_dict, "paddle_dy") fluid.save_dygraph( state_dict, "paddle_dy")
......
...@@ -33,10 +33,11 @@ class Layer(core.Layer): ...@@ -33,10 +33,11 @@ class Layer(core.Layer):
"""Dynamic graph Layer based on OOD, includes the parameters of the layer, the structure of the forward graph and so on. """Dynamic graph Layer based on OOD, includes the parameters of the layer, the structure of the forward graph and so on.
Parameters: Parameters:
name_scope (str): prefix name used by the layer to name parameters. name_scope (str, optional): prefix name used by the layer to name parameters.
If prefix is "my_model/layer_1", parameter name in MyLayer If prefix is "my_layer", parameter name in MyLayer
can be "my_model/layer_1/MyLayer/w_n", where w is the parameter can be "mylayer_0.w_n", where w is the parameter
base name and n is an unique suffix auto-generated. base name and n is an unique suffix auto-generated.
If None, prefix name will be lower cased class name. Default: None.
dtype(str or core.VarDesc.VarType, optional): data type of this parameter. dtype(str or core.VarDesc.VarType, optional): data type of this parameter.
If set str, it can be "bool", "float16", "float32", "float64", If set str, it can be "bool", "float16", "float32", "float64",
"int8", "int16", "int32", "int64", "uint8" or "uint16". "int8", "int16", "int32", "int64", "uint8" or "uint16".
...@@ -46,17 +47,22 @@ class Layer(core.Layer): ...@@ -46,17 +47,22 @@ class Layer(core.Layer):
None None
""" """
def __init__(self, name_scope, dtype=core.VarDesc.VarType.FP32): def __init__(self, name_scope=None, dtype=core.VarDesc.VarType.FP32):
self._full_name = unique_name.generate(name_scope + "/" + if name_scope is None:
self.__class__.__name__) name_scope = self.__class__.__name__.lower()
self._full_name = unique_name.generate(name_scope)
else:
# TODO: remove name_scope parameter and all hard-coded usages
self._full_name = unique_name.generate(name_scope + "/" +
self.__class__.__name__)
self._helper = LayerObjectHelper(self._full_name)
self._built = False self._built = False
self._dtype = dtype self._dtype = dtype
self._parameters = collections.OrderedDict() self._parameters = collections.OrderedDict()
self._sub_layers = collections.OrderedDict() self._sub_layers = collections.OrderedDict()
self._loaddict_holder = collections.OrderedDict() self._loaddict_holder = collections.OrderedDict()
self._helper = LayerObjectHelper(self._full_name)
def train(self): def train(self):
framework._dygraph_tracer().train_mode() framework._dygraph_tracer().train_mode()
...@@ -72,23 +78,23 @@ class Layer(core.Layer): ...@@ -72,23 +78,23 @@ class Layer(core.Layer):
return self._full_name return self._full_name
def create_parameter(self, def create_parameter(self,
attr,
shape, shape,
dtype, attr=None,
dtype='float32',
is_bias=False, is_bias=False,
default_initializer=None): default_initializer=None):
"""Create parameters for this layer. """Create parameters for this layer.
Parameters: Parameters:
attr(ParamAttr): Parameter attribute of weight. Please refer to :ref:`api_fluid_ParamAttr` shape(list): Shape of the parameter.
shape(list): shape of the parameter attr(ParamAttr, optional): Parameter attribute of weight. Please refer to :ref:`api_fluid_ParamAttr`. Default: None.
dtype(str or core.VarDesc.VarType): data type of this parameter. dtype(str or core.VarDesc.VarType or str, optional): Data type of this parameter.
If set str, it can be "bool", "float16", "float32", "float64", If set str, it can be "bool", "float16", "float32", "float64",
"int8", "int16", "int32", "int64", "uint8" or "uint16". "int8", "int16", "int32", "int64", "uint8" or "uint16". Default: "float32".
is_bias(bool, optional): if this is a bias parameter. Default: False is_bias(bool, optional): if this is a bias parameter. Default: False.
default_initializer(Initializer, optional): the default initializer for this parameter. default_initializer(Initializer, optional): the default initializer for this parameter.
If set None, default initializer will be set to :ref:`api_fluid_initializer_XavierInitializer` and :ref:`api_fluid_initializer_ConstantInitializer` If set None, default initializer will be set to :ref:`api_fluid_initializer_XavierInitializer` and :ref:`api_fluid_initializer_ConstantInitializer`
for non-bias and bias parameter, respectively. Default: None for non-bias and bias parameter, respectively. Default: None.
Returns: Returns:
:ref:`api_guide_Variable_en` : created parameter. :ref:`api_guide_Variable_en` : created parameter.
...@@ -294,7 +300,7 @@ class Layer(core.Layer): ...@@ -294,7 +300,7 @@ class Layer(core.Layer):
import paddle.fluid as fluid import paddle.fluid as fluid
with fluid.dygraph.guard(): with fluid.dygraph.guard():
emb = fluid.dygraph.Embedding( "emb", [10, 10]) emb = fluid.dygraph.Embedding([10, 10])
state_dict = emb.state_dict() state_dict = emb.state_dict()
fluid.save_dygraph( state_dict, "paddle_dy") fluid.save_dygraph( state_dict, "paddle_dy")
...@@ -332,7 +338,7 @@ class Layer(core.Layer): ...@@ -332,7 +338,7 @@ class Layer(core.Layer):
import paddle.fluid as fluid import paddle.fluid as fluid
with fluid.dygraph.guard(): with fluid.dygraph.guard():
emb = fluid.dygraph.Embedding( "emb", [10, 10]) emb = fluid.dygraph.Embedding([10, 10])
state_dict = emb.state_dict() state_dict = emb.state_dict()
fluid.save_dygraph( state_dict, "paddle_dy") fluid.save_dygraph( state_dict, "paddle_dy")
...@@ -361,7 +367,7 @@ class Layer(core.Layer): ...@@ -361,7 +367,7 @@ class Layer(core.Layer):
import paddle.fluid as fluid import paddle.fluid as fluid
with fluid.dygraph.guard(): with fluid.dygraph.guard():
emb = fluid.dygraph.Embedding( "emb", [10, 10]) emb = fluid.dygraph.Embedding([10, 10])
state_dict = emb.state_dict() state_dict = emb.state_dict()
fluid.save_dygraph( state_dict, "paddle_dy") fluid.save_dygraph( state_dict, "paddle_dy")
......
此差异已折叠。
...@@ -34,7 +34,6 @@ from test_dist_base import runtime_main, TestParallelDyGraphRunnerBase ...@@ -34,7 +34,6 @@ from test_dist_base import runtime_main, TestParallelDyGraphRunnerBase
class SimpleImgConvPool(fluid.dygraph.Layer): class SimpleImgConvPool(fluid.dygraph.Layer):
def __init__(self, def __init__(self,
name_scope,
num_channels, num_channels,
num_filters, num_filters,
filter_size, filter_size,
...@@ -51,10 +50,10 @@ class SimpleImgConvPool(fluid.dygraph.Layer): ...@@ -51,10 +50,10 @@ class SimpleImgConvPool(fluid.dygraph.Layer):
use_cudnn=False, use_cudnn=False,
param_attr=None, param_attr=None,
bias_attr=None): bias_attr=None):
super(SimpleImgConvPool, self).__init__(name_scope) super(SimpleImgConvPool, self).__init__()
self._conv2d = Conv2D( self._conv2d = Conv2D(
self.full_name(), num_channels=num_channels,
num_filters=num_filters, num_filters=num_filters,
filter_size=filter_size, filter_size=filter_size,
stride=conv_stride, stride=conv_stride,
...@@ -66,7 +65,6 @@ class SimpleImgConvPool(fluid.dygraph.Layer): ...@@ -66,7 +65,6 @@ class SimpleImgConvPool(fluid.dygraph.Layer):
use_cudnn=use_cudnn) use_cudnn=use_cudnn)
self._pool2d = Pool2D( self._pool2d = Pool2D(
self.full_name(),
pool_size=pool_size, pool_size=pool_size,
pool_type=pool_type, pool_type=pool_type,
pool_stride=pool_stride, pool_stride=pool_stride,
...@@ -85,10 +83,10 @@ class MNIST(fluid.dygraph.Layer): ...@@ -85,10 +83,10 @@ class MNIST(fluid.dygraph.Layer):
super(MNIST, self).__init__(name_scope) super(MNIST, self).__init__(name_scope)
self._simple_img_conv_pool_1 = SimpleImgConvPool( self._simple_img_conv_pool_1 = SimpleImgConvPool(
self.full_name(), 1, 20, 5, 2, 2, act="relu") 1, 20, 5, 2, 2, act="relu")
self._simple_img_conv_pool_2 = SimpleImgConvPool( self._simple_img_conv_pool_2 = SimpleImgConvPool(
self.full_name(), 20, 50, 5, 2, 2, act="relu") 20, 50, 5, 2, 2, act="relu")
pool_2_shape = 50 * 4 * 4 pool_2_shape = 50 * 4 * 4
SIZE = 10 SIZE = 10
......
...@@ -27,7 +27,7 @@ import paddle.fluid as fluid ...@@ -27,7 +27,7 @@ import paddle.fluid as fluid
import paddle.fluid.dygraph as dygraph import paddle.fluid.dygraph as dygraph
from paddle.fluid import core from paddle.fluid import core
from paddle.fluid.optimizer import SGDOptimizer from paddle.fluid.optimizer import SGDOptimizer
from paddle.fluid.dygraph.nn import Conv2D, Pool2D, FC, LayerNorm from paddle.fluid.dygraph.nn import Conv2D, Pool2D, FC, BatchNorm
from paddle.fluid.dygraph.base import to_variable from paddle.fluid.dygraph.base import to_variable
from paddle.fluid.layer_helper import LayerHelper from paddle.fluid.layer_helper import LayerHelper
import math import math
...@@ -77,16 +77,16 @@ def optimizer_setting(params): ...@@ -77,16 +77,16 @@ def optimizer_setting(params):
class ConvBNLayer(fluid.dygraph.Layer): class ConvBNLayer(fluid.dygraph.Layer):
def __init__(self, def __init__(self,
name_scope, num_channels,
num_filters, num_filters,
filter_size, filter_size,
stride=1, stride=1,
groups=1, groups=1,
act=None): act=None):
super(ConvBNLayer, self).__init__(name_scope) super(ConvBNLayer, self).__init__()
self._conv = Conv2D( self._conv = Conv2D(
"conv2d", num_channels=num_channels,
num_filters=num_filters, num_filters=num_filters,
filter_size=filter_size, filter_size=filter_size,
stride=stride, stride=stride,
...@@ -96,11 +96,12 @@ class ConvBNLayer(fluid.dygraph.Layer): ...@@ -96,11 +96,12 @@ class ConvBNLayer(fluid.dygraph.Layer):
bias_attr=False, bias_attr=False,
param_attr=fluid.ParamAttr(name="weights")) param_attr=fluid.ParamAttr(name="weights"))
self._layer_norm = LayerNorm(self.full_name(), begin_norm_axis=1) # disable BatchNorm in multi-card. disable LayerNorm because of complex input_shape
# self._batch_norm = BatchNorm(num_filters, act=act)
def forward(self, inputs): def forward(self, inputs):
y = self._conv(inputs) y = self._conv(inputs)
y = self._layer_norm(y) # y = self._batch_norm(y)
return y return y
...@@ -109,8 +110,7 @@ class SqueezeExcitation(fluid.dygraph.Layer): ...@@ -109,8 +110,7 @@ class SqueezeExcitation(fluid.dygraph.Layer):
def __init__(self, name_scope, num_channels, reduction_ratio): def __init__(self, name_scope, num_channels, reduction_ratio):
super(SqueezeExcitation, self).__init__(name_scope) super(SqueezeExcitation, self).__init__(name_scope)
self._pool = Pool2D( self._pool = Pool2D(pool_size=0, pool_type='avg', global_pooling=True)
self.full_name(), pool_size=0, pool_type='avg', global_pooling=True)
stdv = 1.0 / math.sqrt(num_channels * 1.0) stdv = 1.0 / math.sqrt(num_channels * 1.0)
self._squeeze = FC( self._squeeze = FC(
self.full_name(), self.full_name(),
...@@ -136,29 +136,28 @@ class SqueezeExcitation(fluid.dygraph.Layer): ...@@ -136,29 +136,28 @@ class SqueezeExcitation(fluid.dygraph.Layer):
class BottleneckBlock(fluid.dygraph.Layer): class BottleneckBlock(fluid.dygraph.Layer):
def __init__(self, def __init__(self,
name_scope,
num_channels, num_channels,
num_filters, num_filters,
stride, stride,
cardinality, cardinality,
reduction_ratio, reduction_ratio,
shortcut=True): shortcut=True):
super(BottleneckBlock, self).__init__(name_scope) super(BottleneckBlock, self).__init__()
self.conv0 = ConvBNLayer( self.conv0 = ConvBNLayer(
self.full_name(), num_channels=num_channels,
num_filters=num_filters, num_filters=num_filters,
filter_size=1, filter_size=1,
act="relu") act="relu")
self.conv1 = ConvBNLayer( self.conv1 = ConvBNLayer(
self.full_name(), num_channels=num_filters,
num_filters=num_filters, num_filters=num_filters,
filter_size=3, filter_size=3,
stride=stride, stride=stride,
groups=cardinality, groups=cardinality,
act="relu") act="relu")
self.conv2 = ConvBNLayer( self.conv2 = ConvBNLayer(
self.full_name(), num_channels=num_filters,
num_filters=num_filters * 2, num_filters=num_filters * 2,
filter_size=1, filter_size=1,
act=None) act=None)
...@@ -170,7 +169,7 @@ class BottleneckBlock(fluid.dygraph.Layer): ...@@ -170,7 +169,7 @@ class BottleneckBlock(fluid.dygraph.Layer):
if not shortcut: if not shortcut:
self.short = ConvBNLayer( self.short = ConvBNLayer(
self.full_name(), num_channels=num_channels,
num_filters=num_filters * 2, num_filters=num_filters * 2,
filter_size=1, filter_size=1,
stride=stride) stride=stride)
...@@ -209,63 +208,51 @@ class SeResNeXt(fluid.dygraph.Layer): ...@@ -209,63 +208,51 @@ class SeResNeXt(fluid.dygraph.Layer):
depth = [3, 4, 6, 3] depth = [3, 4, 6, 3]
num_filters = [128, 256, 512, 1024] num_filters = [128, 256, 512, 1024]
self.conv0 = ConvBNLayer( self.conv0 = ConvBNLayer(
self.full_name(), num_channels=3,
num_filters=64, num_filters=64,
filter_size=7, filter_size=7,
stride=2, stride=2,
act='relu') act='relu')
self.pool = Pool2D( self.pool = Pool2D(
self.full_name(), pool_size=3, pool_stride=2, pool_padding=1, pool_type='max')
pool_size=3,
pool_stride=2,
pool_padding=1,
pool_type='max')
elif layers == 101: elif layers == 101:
cardinality = 32 cardinality = 32
reduction_ratio = 16 reduction_ratio = 16
depth = [3, 4, 23, 3] depth = [3, 4, 23, 3]
num_filters = [128, 256, 512, 1024] num_filters = [128, 256, 512, 1024]
self.conv0 = ConvBNLayer( self.conv0 = ConvBNLayer(
self.full_name(), num_channels=3,
num_filters=64, num_filters=64,
filter_size=7, filter_size=7,
stride=2, stride=2,
act='relu') act='relu')
self.pool = Pool2D( self.pool = Pool2D(
self.full_name(), pool_size=3, pool_stride=2, pool_padding=1, pool_type='max')
pool_size=3,
pool_stride=2,
pool_padding=1,
pool_type='max')
elif layers == 152: elif layers == 152:
cardinality = 64 cardinality = 64
reduction_ratio = 16 reduction_ratio = 16
depth = [3, 8, 36, 3] depth = [3, 8, 36, 3]
num_filters = [128, 256, 512, 1024] num_filters = [128, 256, 512, 1024]
self.conv0 = ConvBNLayer( self.conv0 = ConvBNLayer(
self.full_name(), num_channels=3,
num_filters=64, num_filters=64,
filter_size=3, filter_size=3,
stride=2, stride=2,
act='relu') act='relu')
self.conv1 = ConvBNLayer( self.conv1 = ConvBNLayer(
self.full_name(), num_channels=64,
num_filters=64, num_filters=64,
filter_size=3, filter_size=3,
stride=1, stride=1,
act='relu') act='relu')
self.conv2 = ConvBNLayer( self.conv2 = ConvBNLayer(
self.full_name(), num_channels=64,
num_filters=128, num_filters=128,
filter_size=3, filter_size=3,
stride=1, stride=1,
act='relu') act='relu')
self.pool = Pool2D( self.pool = Pool2D(
self.full_name(), pool_size=3, pool_stride=2, pool_padding=1, pool_type='max')
pool_size=3,
pool_stride=2,
pool_padding=1,
pool_type='max')
self.bottleneck_block_list = [] self.bottleneck_block_list = []
num_channels = 64 num_channels = 64
...@@ -275,7 +262,6 @@ class SeResNeXt(fluid.dygraph.Layer): ...@@ -275,7 +262,6 @@ class SeResNeXt(fluid.dygraph.Layer):
bottleneck_block = self.add_sublayer( bottleneck_block = self.add_sublayer(
'bb_%d_%d' % (block, i), 'bb_%d_%d' % (block, i),
BottleneckBlock( BottleneckBlock(
self.full_name(),
num_channels=num_channels, num_channels=num_channels,
num_filters=num_filters[block], num_filters=num_filters[block],
stride=2 if i == 0 and block != 0 else 1, stride=2 if i == 0 and block != 0 else 1,
...@@ -287,7 +273,7 @@ class SeResNeXt(fluid.dygraph.Layer): ...@@ -287,7 +273,7 @@ class SeResNeXt(fluid.dygraph.Layer):
shortcut = True shortcut = True
self.pool2d_avg = Pool2D( self.pool2d_avg = Pool2D(
self.full_name(), pool_size=7, pool_type='avg', global_pooling=True) pool_size=7, pool_type='avg', global_pooling=True)
stdv = 1.0 / math.sqrt(2048 * 1.0) stdv = 1.0 / math.sqrt(2048 * 1.0)
self.out = FC(self.full_name(), self.out = FC(self.full_name(),
......
...@@ -23,7 +23,7 @@ from paddle.fluid.dygraph.nn import Conv2D, Pool2D, FC ...@@ -23,7 +23,7 @@ from paddle.fluid.dygraph.nn import Conv2D, Pool2D, FC
class SimpleImgConvPool(fluid.dygraph.Layer): class SimpleImgConvPool(fluid.dygraph.Layer):
def __init__(self, def __init__(self,
name_scope, num_channels,
num_filters, num_filters,
filter_size, filter_size,
pool_size, pool_size,
...@@ -40,10 +40,10 @@ class SimpleImgConvPool(fluid.dygraph.Layer): ...@@ -40,10 +40,10 @@ class SimpleImgConvPool(fluid.dygraph.Layer):
dtype='float32', dtype='float32',
param_attr=None, param_attr=None,
bias_attr=None): bias_attr=None):
super(SimpleImgConvPool, self).__init__(name_scope) super(SimpleImgConvPool, self).__init__()
self._conv2d = Conv2D( self._conv2d = Conv2D(
self.full_name(), num_channels=num_channels,
num_filters=num_filters, num_filters=num_filters,
filter_size=filter_size, filter_size=filter_size,
stride=conv_stride, stride=conv_stride,
...@@ -57,7 +57,6 @@ class SimpleImgConvPool(fluid.dygraph.Layer): ...@@ -57,7 +57,6 @@ class SimpleImgConvPool(fluid.dygraph.Layer):
act=act) act=act)
self._pool2d = Pool2D( self._pool2d = Pool2D(
self.full_name(),
pool_size=pool_size, pool_size=pool_size,
pool_type=pool_type, pool_type=pool_type,
pool_stride=pool_stride, pool_stride=pool_stride,
...@@ -76,7 +75,7 @@ class MNIST(fluid.dygraph.Layer): ...@@ -76,7 +75,7 @@ class MNIST(fluid.dygraph.Layer):
super(MNIST, self).__init__(name_scope) super(MNIST, self).__init__(name_scope)
self._simple_img_conv_pool_1 = SimpleImgConvPool( self._simple_img_conv_pool_1 = SimpleImgConvPool(
self.full_name(), num_channels=3,
num_filters=20, num_filters=20,
filter_size=5, filter_size=5,
pool_size=2, pool_size=2,
...@@ -86,7 +85,7 @@ class MNIST(fluid.dygraph.Layer): ...@@ -86,7 +85,7 @@ class MNIST(fluid.dygraph.Layer):
use_cudnn=True) use_cudnn=True)
self._simple_img_conv_pool_2 = SimpleImgConvPool( self._simple_img_conv_pool_2 = SimpleImgConvPool(
self.full_name(), num_channels=20,
num_filters=50, num_filters=50,
filter_size=5, filter_size=5,
pool_size=2, pool_size=2,
......
...@@ -30,7 +30,6 @@ from test_imperative_base import new_program_scope ...@@ -30,7 +30,6 @@ from test_imperative_base import new_program_scope
class SimpleImgConvPool(fluid.dygraph.Layer): class SimpleImgConvPool(fluid.dygraph.Layer):
def __init__(self, def __init__(self,
name_scope,
num_channels, num_channels,
num_filters, num_filters,
filter_size, filter_size,
...@@ -47,10 +46,10 @@ class SimpleImgConvPool(fluid.dygraph.Layer): ...@@ -47,10 +46,10 @@ class SimpleImgConvPool(fluid.dygraph.Layer):
use_cudnn=False, use_cudnn=False,
param_attr=None, param_attr=None,
bias_attr=None): bias_attr=None):
super(SimpleImgConvPool, self).__init__(name_scope) super(SimpleImgConvPool, self).__init__()
self._conv2d = Conv2D( self._conv2d = Conv2D(
self.full_name(), num_channels=num_channels,
num_filters=num_filters, num_filters=num_filters,
filter_size=filter_size, filter_size=filter_size,
stride=conv_stride, stride=conv_stride,
...@@ -62,7 +61,6 @@ class SimpleImgConvPool(fluid.dygraph.Layer): ...@@ -62,7 +61,6 @@ class SimpleImgConvPool(fluid.dygraph.Layer):
use_cudnn=use_cudnn) use_cudnn=use_cudnn)
self._pool2d = Pool2D( self._pool2d = Pool2D(
self.full_name(),
pool_size=pool_size, pool_size=pool_size,
pool_type=pool_type, pool_type=pool_type,
pool_stride=pool_stride, pool_stride=pool_stride,
...@@ -81,10 +79,10 @@ class MNIST(fluid.dygraph.Layer): ...@@ -81,10 +79,10 @@ class MNIST(fluid.dygraph.Layer):
super(MNIST, self).__init__(name_scope) super(MNIST, self).__init__(name_scope)
self._simple_img_conv_pool_1 = SimpleImgConvPool( self._simple_img_conv_pool_1 = SimpleImgConvPool(
self.full_name(), 1, 20, 5, 2, 2, act="relu") 1, 20, 5, 2, 2, act="relu")
self._simple_img_conv_pool_2 = SimpleImgConvPool( self._simple_img_conv_pool_2 = SimpleImgConvPool(
self.full_name(), 20, 50, 5, 2, 2, act="relu") 20, 50, 5, 2, 2, act="relu")
pool_2_shape = 50 * 4 * 4 pool_2_shape = 50 * 4 * 4
SIZE = 10 SIZE = 10
......
...@@ -100,8 +100,8 @@ class AutoPruneLayer3(fluid.Layer): ...@@ -100,8 +100,8 @@ class AutoPruneLayer3(fluid.Layer):
class MyLayer(fluid.Layer): class MyLayer(fluid.Layer):
def __init__(self, name_scope, vocab_size, size, dtype="float32"): def __init__(self, name_scope, vocab_size, size, dtype="float32"):
super(MyLayer, self).__init__(name_scope, dtype) super(MyLayer, self).__init__(name_scope, dtype)
self.embed0 = fluid.Embedding(self.full_name(), size=(vocab_size, size)) self.embed0 = fluid.Embedding(size=(vocab_size, size))
self.embed1 = fluid.Embedding(self.full_name(), size=(vocab_size, size)) self.embed1 = fluid.Embedding(size=(vocab_size, size))
self.fc0 = fluid.FC(self.full_name(), size=size, dtype=dtype) self.fc0 = fluid.FC(self.full_name(), size=size, dtype=dtype)
self.fc1 = fluid.FC(self.full_name(), size=size, dtype=dtype) self.fc1 = fluid.FC(self.full_name(), size=size, dtype=dtype)
...@@ -122,8 +122,8 @@ class MyLayer(fluid.Layer): ...@@ -122,8 +122,8 @@ class MyLayer(fluid.Layer):
class MyLayer2(fluid.Layer): class MyLayer2(fluid.Layer):
def __init__(self, name_scope, vocab_size, size, dtype="float32"): def __init__(self, name_scope, vocab_size, size, dtype="float32"):
super(MyLayer2, self).__init__(name_scope, dtype) super(MyLayer2, self).__init__(name_scope, dtype)
self.embed0 = fluid.Embedding(self.full_name(), size=(vocab_size, size)) self.embed0 = fluid.Embedding(size=(vocab_size, size))
self.embed1 = fluid.Embedding(self.full_name(), size=(vocab_size, size)) self.embed1 = fluid.Embedding(size=(vocab_size, size))
self.fc0 = fluid.FC(self.full_name(), size=size, dtype=dtype) self.fc0 = fluid.FC(self.full_name(), size=size, dtype=dtype)
self.fc1 = fluid.FC(self.full_name(), size=size, dtype=dtype) self.fc1 = fluid.FC(self.full_name(), size=size, dtype=dtype)
......
...@@ -90,9 +90,9 @@ class DeepCF(fluid.Layer): ...@@ -90,9 +90,9 @@ class DeepCF(fluid.Layer):
self._num_users = num_users self._num_users = num_users
self._num_items = num_items self._num_items = num_items
self._rating_matrix = self.create_parameter( self._rating_matrix = self.create_parameter(
fluid.ParamAttr(trainable=False), attr=fluid.ParamAttr(trainable=False),
matrix.shape, shape=matrix.shape,
matrix.dtype, dtype=matrix.dtype,
is_bias=False, is_bias=False,
default_initializer=fluid.initializer.NumpyArrayInitializer(matrix)) default_initializer=fluid.initializer.NumpyArrayInitializer(matrix))
self._rating_matrix.stop_gradient = True self._rating_matrix.stop_gradient = True
......
...@@ -32,7 +32,7 @@ from paddle.fluid.dygraph.jit import TracedLayer ...@@ -32,7 +32,7 @@ from paddle.fluid.dygraph.jit import TracedLayer
class SimpleImgConvPool(fluid.dygraph.Layer): class SimpleImgConvPool(fluid.dygraph.Layer):
def __init__(self, def __init__(self,
name_scope, num_channels,
num_filters, num_filters,
filter_size, filter_size,
pool_size, pool_size,
...@@ -48,10 +48,10 @@ class SimpleImgConvPool(fluid.dygraph.Layer): ...@@ -48,10 +48,10 @@ class SimpleImgConvPool(fluid.dygraph.Layer):
use_cudnn=False, use_cudnn=False,
param_attr=None, param_attr=None,
bias_attr=None): bias_attr=None):
super(SimpleImgConvPool, self).__init__(name_scope) super(SimpleImgConvPool, self).__init__()
self._conv2d = Conv2D( self._conv2d = Conv2D(
self.full_name(), num_channels=num_channels,
num_filters=num_filters, num_filters=num_filters,
filter_size=filter_size, filter_size=filter_size,
stride=conv_stride, stride=conv_stride,
...@@ -63,7 +63,6 @@ class SimpleImgConvPool(fluid.dygraph.Layer): ...@@ -63,7 +63,6 @@ class SimpleImgConvPool(fluid.dygraph.Layer):
use_cudnn=use_cudnn) use_cudnn=use_cudnn)
self._pool2d = Pool2D( self._pool2d = Pool2D(
self.full_name(),
pool_size=pool_size, pool_size=pool_size,
pool_type=pool_type, pool_type=pool_type,
pool_stride=pool_stride, pool_stride=pool_stride,
...@@ -82,10 +81,10 @@ class MNIST(fluid.dygraph.Layer): ...@@ -82,10 +81,10 @@ class MNIST(fluid.dygraph.Layer):
super(MNIST, self).__init__(name_scope) super(MNIST, self).__init__(name_scope)
self._simple_img_conv_pool_1 = SimpleImgConvPool( self._simple_img_conv_pool_1 = SimpleImgConvPool(
self.full_name(), 20, 5, 2, 2, act="relu") 1, 20, 5, 2, 2, act="relu")
self._simple_img_conv_pool_2 = SimpleImgConvPool( self._simple_img_conv_pool_2 = SimpleImgConvPool(
self.full_name(), 50, 5, 2, 2, act="relu") 20, 50, 5, 2, 2, act="relu")
pool_2_shape = 50 * 4 * 4 pool_2_shape = 50 * 4 * 4
SIZE = 10 SIZE = 10
......
...@@ -57,7 +57,6 @@ class Config(object): ...@@ -57,7 +57,6 @@ class Config(object):
class ConvBNPool(fluid.dygraph.Layer): class ConvBNPool(fluid.dygraph.Layer):
def __init__(self, def __init__(self,
name_scope,
group, group,
out_ch, out_ch,
channels, channels,
...@@ -65,7 +64,7 @@ class ConvBNPool(fluid.dygraph.Layer): ...@@ -65,7 +64,7 @@ class ConvBNPool(fluid.dygraph.Layer):
is_test=False, is_test=False,
pool=True, pool=True,
use_cudnn=True): use_cudnn=True):
super(ConvBNPool, self).__init__(name_scope) super(ConvBNPool, self).__init__()
self.group = group self.group = group
self.pool = pool self.pool = pool
...@@ -79,7 +78,7 @@ class ConvBNPool(fluid.dygraph.Layer): ...@@ -79,7 +78,7 @@ class ConvBNPool(fluid.dygraph.Layer):
initializer=fluid.initializer.Normal(0.0, conv_std_1)) initializer=fluid.initializer.Normal(0.0, conv_std_1))
self.conv_0_layer = Conv2D( self.conv_0_layer = Conv2D(
self.full_name(), channels[0],
out_ch[0], out_ch[0],
3, 3,
padding=1, padding=1,
...@@ -87,10 +86,9 @@ class ConvBNPool(fluid.dygraph.Layer): ...@@ -87,10 +86,9 @@ class ConvBNPool(fluid.dygraph.Layer):
bias_attr=False, bias_attr=False,
act=None, act=None,
use_cudnn=use_cudnn) use_cudnn=use_cudnn)
self.bn_0_layer = BatchNorm( self.bn_0_layer = BatchNorm(out_ch[0], act=act, is_test=is_test)
self.full_name(), out_ch[0], act=act, is_test=is_test)
self.conv_1_layer = Conv2D( self.conv_1_layer = Conv2D(
self.full_name(), out_ch[0],
num_filters=out_ch[1], num_filters=out_ch[1],
filter_size=3, filter_size=3,
padding=1, padding=1,
...@@ -98,12 +96,10 @@ class ConvBNPool(fluid.dygraph.Layer): ...@@ -98,12 +96,10 @@ class ConvBNPool(fluid.dygraph.Layer):
bias_attr=False, bias_attr=False,
act=None, act=None,
use_cudnn=use_cudnn) use_cudnn=use_cudnn)
self.bn_1_layer = BatchNorm( self.bn_1_layer = BatchNorm(out_ch[1], act=act, is_test=is_test)
self.full_name(), out_ch[1], act=act, is_test=is_test)
if self.pool: if self.pool:
self.pool_layer = Pool2D( self.pool_layer = Pool2D(
self.full_name(),
pool_size=2, pool_size=2,
pool_type='max', pool_type='max',
pool_stride=2, pool_stride=2,
...@@ -125,22 +121,12 @@ class OCRConv(fluid.dygraph.Layer): ...@@ -125,22 +121,12 @@ class OCRConv(fluid.dygraph.Layer):
def __init__(self, name_scope, is_test=False, use_cudnn=True): def __init__(self, name_scope, is_test=False, use_cudnn=True):
super(OCRConv, self).__init__(name_scope) super(OCRConv, self).__init__(name_scope)
self.conv_bn_pool_1 = ConvBNPool( self.conv_bn_pool_1 = ConvBNPool(
self.full_name(), 2, [16, 16], [1, 16], is_test=is_test, use_cudnn=use_cudnn)
2, [16, 16], [1, 16],
is_test=is_test,
use_cudnn=use_cudnn)
self.conv_bn_pool_2 = ConvBNPool( self.conv_bn_pool_2 = ConvBNPool(
self.full_name(), 2, [32, 32], [16, 32], is_test=is_test, use_cudnn=use_cudnn)
2, [32, 32], [16, 32],
is_test=is_test,
use_cudnn=use_cudnn)
self.conv_bn_pool_3 = ConvBNPool( self.conv_bn_pool_3 = ConvBNPool(
self.full_name(), 2, [64, 64], [32, 64], is_test=is_test, use_cudnn=use_cudnn)
2, [64, 64], [32, 64],
is_test=is_test,
use_cudnn=use_cudnn)
self.conv_bn_pool_4 = ConvBNPool( self.conv_bn_pool_4 = ConvBNPool(
self.full_name(),
2, [128, 128], [64, 128], 2, [128, 128], [64, 128],
is_test=is_test, is_test=is_test,
pool=False, pool=False,
...@@ -169,7 +155,6 @@ class DynamicGRU(fluid.dygraph.Layer): ...@@ -169,7 +155,6 @@ class DynamicGRU(fluid.dygraph.Layer):
super(DynamicGRU, self).__init__(scope_name) super(DynamicGRU, self).__init__(scope_name)
self.gru_unit = GRUUnit( self.gru_unit = GRUUnit(
self.full_name(),
size * 3, size * 3,
param_attr=param_attr, param_attr=param_attr,
bias_attr=bias_attr, bias_attr=bias_attr,
...@@ -337,10 +322,7 @@ class GRUDecoderWithAttention(fluid.dygraph.Layer): ...@@ -337,10 +322,7 @@ class GRUDecoderWithAttention(fluid.dygraph.Layer):
size=decoder_size * 3, size=decoder_size * 3,
bias_attr=False) bias_attr=False)
self.gru_unit = GRUUnit( self.gru_unit = GRUUnit(
self.full_name(), size=decoder_size * 3, param_attr=None, bias_attr=None)
size=decoder_size * 3,
param_attr=None,
bias_attr=None)
self.out_layer = FC(self.full_name(), self.out_layer = FC(self.full_name(),
size=num_classes + 2, size=num_classes + 2,
bias_attr=None, bias_attr=None,
...@@ -383,8 +365,7 @@ class OCRAttention(fluid.dygraph.Layer): ...@@ -383,8 +365,7 @@ class OCRAttention(fluid.dygraph.Layer):
bias_attr=False, bias_attr=False,
act='relu') act='relu')
self.embedding = Embedding( self.embedding = Embedding(
self.full_name(), [Config.num_classes + 2, Config.word_vector_dim], [Config.num_classes + 2, Config.word_vector_dim], dtype='float32')
dtype='float32')
self.gru_decoder_with_attention = GRUDecoderWithAttention( self.gru_decoder_with_attention = GRUDecoderWithAttention(
self.full_name(), Config.decoder_size, Config.num_classes) self.full_name(), Config.decoder_size, Config.num_classes)
......
...@@ -158,7 +158,6 @@ class PtbModel(fluid.Layer): ...@@ -158,7 +158,6 @@ class PtbModel(fluid.Layer):
init_scale=init_scale, init_scale=init_scale,
dropout=dropout) dropout=dropout)
self.embedding = Embedding( self.embedding = Embedding(
self.full_name(),
size=[vocab_size, hidden_size], size=[vocab_size, hidden_size],
dtype='float32', dtype='float32',
is_sparse=is_sparse, is_sparse=is_sparse,
......
...@@ -72,16 +72,16 @@ def optimizer_setting(params): ...@@ -72,16 +72,16 @@ def optimizer_setting(params):
class ConvBNLayer(fluid.Layer): class ConvBNLayer(fluid.Layer):
def __init__(self, def __init__(self,
name_scope, num_channels,
num_filters, num_filters,
filter_size, filter_size,
stride=1, stride=1,
groups=1, groups=1,
act=None): act=None):
super(ConvBNLayer, self).__init__(name_scope) super(ConvBNLayer, self).__init__()
self._conv = Conv2D( self._conv = Conv2D(
self.full_name(), num_channels=num_channels,
num_filters=num_filters, num_filters=num_filters,
filter_size=filter_size, filter_size=filter_size,
stride=stride, stride=stride,
...@@ -91,7 +91,7 @@ class ConvBNLayer(fluid.Layer): ...@@ -91,7 +91,7 @@ class ConvBNLayer(fluid.Layer):
bias_attr=None, bias_attr=None,
use_cudnn=False) use_cudnn=False)
self._batch_norm = BatchNorm(self.full_name(), num_filters, act=act) self._batch_norm = BatchNorm(num_filters, act=act)
def forward(self, inputs): def forward(self, inputs):
y = self._conv(inputs) y = self._conv(inputs)
...@@ -101,29 +101,29 @@ class ConvBNLayer(fluid.Layer): ...@@ -101,29 +101,29 @@ class ConvBNLayer(fluid.Layer):
class BottleneckBlock(fluid.Layer): class BottleneckBlock(fluid.Layer):
def __init__(self, name_scope, num_filters, stride, shortcut=True): def __init__(self, num_channels, num_filters, stride, shortcut=True):
super(BottleneckBlock, self).__init__(name_scope) super(BottleneckBlock, self).__init__()
self.conv0 = ConvBNLayer( self.conv0 = ConvBNLayer(
self.full_name(), num_channels=num_channels,
num_filters=num_filters, num_filters=num_filters,
filter_size=1, filter_size=1,
act='relu') act='relu')
self.conv1 = ConvBNLayer( self.conv1 = ConvBNLayer(
self.full_name(), num_channels=num_filters,
num_filters=num_filters, num_filters=num_filters,
filter_size=3, filter_size=3,
stride=stride, stride=stride,
act='relu') act='relu')
self.conv2 = ConvBNLayer( self.conv2 = ConvBNLayer(
self.full_name(), num_channels=num_filters,
num_filters=num_filters * 4, num_filters=num_filters * 4,
filter_size=1, filter_size=1,
act=None) act=None)
if not shortcut: if not shortcut:
self.short = ConvBNLayer( self.short = ConvBNLayer(
self.full_name(), num_channels=num_channels,
num_filters=num_filters * 4, num_filters=num_filters * 4,
filter_size=1, filter_size=1,
stride=stride) stride=stride)
...@@ -161,20 +161,13 @@ class ResNet(fluid.Layer): ...@@ -161,20 +161,13 @@ class ResNet(fluid.Layer):
depth = [3, 4, 23, 3] depth = [3, 4, 23, 3]
elif layers == 152: elif layers == 152:
depth = [3, 8, 36, 3] depth = [3, 8, 36, 3]
num_channels = [64, 256, 512, 1024]
num_filters = [64, 128, 256, 512] num_filters = [64, 128, 256, 512]
self.conv = ConvBNLayer( self.conv = ConvBNLayer(
self.full_name(), num_channels=3, num_filters=64, filter_size=7, stride=2, act='relu')
num_filters=64,
filter_size=7,
stride=2,
act='relu')
self.pool2d_max = Pool2D( self.pool2d_max = Pool2D(
self.full_name(), pool_size=3, pool_stride=2, pool_padding=1, pool_type='max')
pool_size=3,
pool_stride=2,
pool_padding=1,
pool_type='max')
self.bottleneck_block_list = [] self.bottleneck_block_list = []
for block in range(len(depth)): for block in range(len(depth)):
...@@ -183,7 +176,8 @@ class ResNet(fluid.Layer): ...@@ -183,7 +176,8 @@ class ResNet(fluid.Layer):
bottleneck_block = self.add_sublayer( bottleneck_block = self.add_sublayer(
'bb_%d_%d' % (block, i), 'bb_%d_%d' % (block, i),
BottleneckBlock( BottleneckBlock(
self.full_name(), num_channels=num_channels[block]
if i == 0 else num_filters[block] * 4,
num_filters=num_filters[block], num_filters=num_filters[block],
stride=2 if i == 0 and block != 0 else 1, stride=2 if i == 0 and block != 0 else 1,
shortcut=shortcut)) shortcut=shortcut))
...@@ -191,7 +185,7 @@ class ResNet(fluid.Layer): ...@@ -191,7 +185,7 @@ class ResNet(fluid.Layer):
shortcut = True shortcut = True
self.pool2d_avg = Pool2D( self.pool2d_avg = Pool2D(
self.full_name(), pool_size=7, pool_type='avg', global_pooling=True) pool_size=7, pool_type='avg', global_pooling=True)
import math import math
stdv = 1.0 / math.sqrt(2048 * 1.0) stdv = 1.0 / math.sqrt(2048 * 1.0)
......
...@@ -156,7 +156,6 @@ class PtbModel(fluid.Layer): ...@@ -156,7 +156,6 @@ class PtbModel(fluid.Layer):
init_scale=init_scale, init_scale=init_scale,
dropout=dropout) dropout=dropout)
self.embedding = Embedding( self.embedding = Embedding(
self.full_name(),
size=[vocab_size, hidden_size], size=[vocab_size, hidden_size],
dtype='float32', dtype='float32',
is_sparse=False, is_sparse=False,
...@@ -882,7 +881,7 @@ class TestDygraphPtbRnn(unittest.TestCase): ...@@ -882,7 +881,7 @@ class TestDygraphPtbRnn(unittest.TestCase):
def testOnlyLoadParams(self): def testOnlyLoadParams(self):
with fluid.dygraph.guard(): with fluid.dygraph.guard():
emb = fluid.dygraph.Embedding("emb", [10, 10]) emb = fluid.dygraph.Embedding([10, 10])
state_dict = emb.state_dict() state_dict = emb.state_dict()
fluid.save_dygraph(state_dict, "emb_dy") fluid.save_dygraph(state_dict, "emb_dy")
......
...@@ -63,16 +63,16 @@ def optimizer_setting(params): ...@@ -63,16 +63,16 @@ def optimizer_setting(params):
class ConvBNLayer(fluid.dygraph.Layer): class ConvBNLayer(fluid.dygraph.Layer):
def __init__(self, def __init__(self,
name_scope, num_channels,
num_filters, num_filters,
filter_size, filter_size,
stride=1, stride=1,
groups=1, groups=1,
act=None): act=None):
super(ConvBNLayer, self).__init__(name_scope) super(ConvBNLayer, self).__init__()
self._conv = Conv2D( self._conv = Conv2D(
self.full_name(), num_channels=num_channels,
num_filters=num_filters, num_filters=num_filters,
filter_size=filter_size, filter_size=filter_size,
stride=stride, stride=stride,
...@@ -81,7 +81,7 @@ class ConvBNLayer(fluid.dygraph.Layer): ...@@ -81,7 +81,7 @@ class ConvBNLayer(fluid.dygraph.Layer):
act=None, act=None,
bias_attr=None) bias_attr=None)
self._batch_norm = BatchNorm(self.full_name(), num_filters, act=act) self._batch_norm = BatchNorm(num_filters, act=act)
def forward(self, inputs): def forward(self, inputs):
y = self._conv(inputs) y = self._conv(inputs)
...@@ -94,8 +94,7 @@ class SqueezeExcitation(fluid.dygraph.Layer): ...@@ -94,8 +94,7 @@ class SqueezeExcitation(fluid.dygraph.Layer):
def __init__(self, name_scope, num_channels, reduction_ratio): def __init__(self, name_scope, num_channels, reduction_ratio):
super(SqueezeExcitation, self).__init__(name_scope) super(SqueezeExcitation, self).__init__(name_scope)
self._pool = Pool2D( self._pool = Pool2D(pool_size=0, pool_type='avg', global_pooling=True)
self.full_name(), pool_size=0, pool_type='avg', global_pooling=True)
self._squeeze = FC( self._squeeze = FC(
self.full_name(), self.full_name(),
size=num_channels // reduction_ratio, size=num_channels // reduction_ratio,
...@@ -119,25 +118,24 @@ class SqueezeExcitation(fluid.dygraph.Layer): ...@@ -119,25 +118,24 @@ class SqueezeExcitation(fluid.dygraph.Layer):
class BottleneckBlock(fluid.dygraph.Layer): class BottleneckBlock(fluid.dygraph.Layer):
def __init__(self, def __init__(self,
name_scope,
num_channels, num_channels,
num_filters, num_filters,
stride, stride,
cardinality, cardinality,
reduction_ratio, reduction_ratio,
shortcut=True): shortcut=True):
super(BottleneckBlock, self).__init__(name_scope) super(BottleneckBlock, self).__init__()
self.conv0 = ConvBNLayer( self.conv0 = ConvBNLayer(
self.full_name(), num_filters=num_filters, filter_size=1) num_channels=num_channels, num_filters=num_filters, filter_size=1)
self.conv1 = ConvBNLayer( self.conv1 = ConvBNLayer(
self.full_name(), num_channels=num_filters,
num_filters=num_filters, num_filters=num_filters,
filter_size=3, filter_size=3,
stride=stride, stride=stride,
groups=cardinality) groups=cardinality)
self.conv2 = ConvBNLayer( self.conv2 = ConvBNLayer(
self.full_name(), num_channels=num_filters,
num_filters=num_filters * 4, num_filters=num_filters * 4,
filter_size=1, filter_size=1,
act='relu') act='relu')
...@@ -149,7 +147,7 @@ class BottleneckBlock(fluid.dygraph.Layer): ...@@ -149,7 +147,7 @@ class BottleneckBlock(fluid.dygraph.Layer):
if not shortcut: if not shortcut:
self.short = ConvBNLayer( self.short = ConvBNLayer(
self.full_name(), num_channels=num_channels,
num_filters=num_filters * 4, num_filters=num_filters * 4,
filter_size=1, filter_size=1,
stride=stride) stride=stride)
...@@ -191,63 +189,51 @@ class SeResNeXt(fluid.dygraph.Layer): ...@@ -191,63 +189,51 @@ class SeResNeXt(fluid.dygraph.Layer):
depth = [3, 4, 6, 3] depth = [3, 4, 6, 3]
num_filters = [128, 256, 512, 1024] num_filters = [128, 256, 512, 1024]
self.conv0 = ConvBNLayer( self.conv0 = ConvBNLayer(
self.full_name(), num_channels=3,
num_filters=64, num_filters=64,
filter_size=7, filter_size=7,
stride=2, stride=2,
act='relu') act='relu')
self.pool = Pool2D( self.pool = Pool2D(
self.full_name(), pool_size=3, pool_stride=2, pool_padding=1, pool_type='max')
pool_size=3,
pool_stride=2,
pool_padding=1,
pool_type='max')
elif layers == 101: elif layers == 101:
cardinality = 32 cardinality = 32
reduction_ratio = 16 reduction_ratio = 16
depth = [3, 4, 23, 3] depth = [3, 4, 23, 3]
num_filters = [128, 256, 512, 1024] num_filters = [128, 256, 512, 1024]
self.conv0 = ConvBNLayer( self.conv0 = ConvBNLayer(
self.full_name(), num_channels=3,
num_filters=3, num_filters=3,
filter_size=7, filter_size=7,
stride=2, stride=2,
act='relu') act='relu')
self.pool = Pool2D( self.pool = Pool2D(
self.full_name(), pool_size=3, pool_stride=2, pool_padding=1, pool_type='max')
pool_size=3,
pool_stride=2,
pool_padding=1,
pool_type='max')
elif layers == 152: elif layers == 152:
cardinality = 64 cardinality = 64
reduction_ratio = 16 reduction_ratio = 16
depth = [3, 8, 36, 3] depth = [3, 8, 36, 3]
num_filters = [128, 256, 512, 1024] num_filters = [128, 256, 512, 1024]
self.conv0 = ConvBNLayer( self.conv0 = ConvBNLayer(
self.full_name(), num_channels=3,
num_filters=3, num_filters=3,
filter_size=7, filter_size=7,
stride=2, stride=2,
act='relu') act='relu')
self.conv1 = ConvBNLayer( self.conv1 = ConvBNLayer(
self.full_name(), num_channels=3,
num_filters=3, num_filters=3,
filter_size=7, filter_size=7,
stride=2, stride=2,
act='relu') act='relu')
self.conv2 = ConvBNLayer( self.conv2 = ConvBNLayer(
self.full_name(), num_channels=7,
num_filters=3, num_filters=3,
filter_size=7, filter_size=7,
stride=2, stride=2,
act='relu') act='relu')
self.pool = Pool2D( self.pool = Pool2D(
self.full_name(), pool_size=3, pool_stride=2, pool_padding=1, pool_type='max')
pool_size=3,
pool_stride=2,
pool_padding=1,
pool_type='max')
self.bottleneck_block_list = [] self.bottleneck_block_list = []
num_channels = 64 num_channels = 64
...@@ -257,7 +243,6 @@ class SeResNeXt(fluid.dygraph.Layer): ...@@ -257,7 +243,6 @@ class SeResNeXt(fluid.dygraph.Layer):
bottleneck_block = self.add_sublayer( bottleneck_block = self.add_sublayer(
'bb_%d_%d' % (block, i), 'bb_%d_%d' % (block, i),
BottleneckBlock( BottleneckBlock(
self.full_name(),
num_channels=num_channels, num_channels=num_channels,
num_filters=num_filters[block], num_filters=num_filters[block],
stride=2 if i == 0 and block != 0 else 1, stride=2 if i == 0 and block != 0 else 1,
...@@ -269,7 +254,7 @@ class SeResNeXt(fluid.dygraph.Layer): ...@@ -269,7 +254,7 @@ class SeResNeXt(fluid.dygraph.Layer):
shortcut = True shortcut = True
self.pool2d_avg = Pool2D( self.pool2d_avg = Pool2D(
self.full_name(), pool_size=7, pool_type='avg', global_pooling=True) pool_size=7, pool_type='avg', global_pooling=True)
import math import math
stdv = 1.0 / math.sqrt(2048 * 1.0) stdv = 1.0 / math.sqrt(2048 * 1.0)
......
...@@ -350,13 +350,12 @@ pos_inp2 = position_encoding_init(ModelHyperParams.max_length, ...@@ -350,13 +350,12 @@ pos_inp2 = position_encoding_init(ModelHyperParams.max_length,
class PrePostProcessLayer(Layer): class PrePostProcessLayer(Layer):
def __init__(self, name_scope, process_cmd, shape_len=None): def __init__(self, d_model, process_cmd, shape_len=None):
super(PrePostProcessLayer, self).__init__(name_scope) super(PrePostProcessLayer, self).__init__()
for cmd in process_cmd: for cmd in process_cmd:
if cmd == "n": if cmd == "n":
self._layer_norm = LayerNorm( self._layer_norm = LayerNorm(
name_scope=self.full_name(), normalized_shape=d_model,
begin_norm_axis=shape_len - 1,
param_attr=fluid.ParamAttr( param_attr=fluid.ParamAttr(
initializer=fluid.initializer.Constant(1.)), initializer=fluid.initializer.Constant(1.)),
bias_attr=fluid.ParamAttr( bias_attr=fluid.ParamAttr(
...@@ -508,19 +507,19 @@ class EncoderSubLayer(Layer): ...@@ -508,19 +507,19 @@ class EncoderSubLayer(Layer):
self._postprocess_cmd = postprocess_cmd self._postprocess_cmd = postprocess_cmd
self._prepostprocess_dropout = prepostprocess_dropout self._prepostprocess_dropout = prepostprocess_dropout
self._preprocess_layer = PrePostProcessLayer(self.full_name(), self._preprocess_layer = PrePostProcessLayer(d_model,
self._preprocess_cmd, 3) self._preprocess_cmd, 3)
self._multihead_attention_layer = MultiHeadAttentionLayer( self._multihead_attention_layer = MultiHeadAttentionLayer(
self.full_name(), d_key, d_value, d_model, n_head, self.full_name(), d_key, d_value, d_model, n_head,
attention_dropout) attention_dropout)
self._postprocess_layer = PrePostProcessLayer( self._postprocess_layer = PrePostProcessLayer(
self.full_name(), self._postprocess_cmd, None) d_model, self._postprocess_cmd, None)
self._preprocess_layer2 = PrePostProcessLayer(self.full_name(), self._preprocess_layer2 = PrePostProcessLayer(d_model,
self._preprocess_cmd, 3) self._preprocess_cmd, 3)
self._positionwise_feed_forward = PositionwiseFeedForwardLayer( self._positionwise_feed_forward = PositionwiseFeedForwardLayer(
self.full_name(), d_inner_hid, d_model, relu_dropout) self.full_name(), d_inner_hid, d_model, relu_dropout)
self._postprocess_layer2 = PrePostProcessLayer( self._postprocess_layer2 = PrePostProcessLayer(
self.full_name(), self._postprocess_cmd, None) d_model, self._postprocess_cmd, None)
def forward(self, enc_input, attn_bias): def forward(self, enc_input, attn_bias):
pre_process_multihead = self._preprocess_layer( pre_process_multihead = self._preprocess_layer(
...@@ -559,7 +558,7 @@ class EncoderLayer(Layer): ...@@ -559,7 +558,7 @@ class EncoderLayer(Layer):
self._encoder_sublayers = list() self._encoder_sublayers = list()
self._prepostprocess_dropout = prepostprocess_dropout self._prepostprocess_dropout = prepostprocess_dropout
self._n_layer = n_layer self._n_layer = n_layer
self._preprocess_layer = PrePostProcessLayer(self.full_name(), self._preprocess_layer = PrePostProcessLayer(d_model,
self._preprocess_cmd, 3) self._preprocess_cmd, 3)
for i in range(n_layer): for i in range(n_layer):
self._encoder_sublayers.append( self._encoder_sublayers.append(
...@@ -595,7 +594,6 @@ class PrepareEncoderDecoderLayer(Layer): ...@@ -595,7 +594,6 @@ class PrepareEncoderDecoderLayer(Layer):
self._src_vocab_size = src_vocab_size self._src_vocab_size = src_vocab_size
self._dropout_rate = dropout_rate self._dropout_rate = dropout_rate
self._input_emb = Embedding( self._input_emb = Embedding(
name_scope=self.full_name(),
size=[src_vocab_size, src_emb_dim], size=[src_vocab_size, src_emb_dim],
is_sparse=is_sparse, is_sparse=is_sparse,
padding_idx=0, padding_idx=0,
...@@ -608,7 +606,6 @@ class PrepareEncoderDecoderLayer(Layer): ...@@ -608,7 +606,6 @@ class PrepareEncoderDecoderLayer(Layer):
else: else:
pos_inp = pos_inp2 pos_inp = pos_inp2
self._pos_emb = Embedding( self._pos_emb = Embedding(
name_scope=self.full_name(),
size=[self._src_max_len, src_emb_dim], size=[self._src_max_len, src_emb_dim],
is_sparse=is_sparse, is_sparse=is_sparse,
param_attr=fluid.ParamAttr( param_attr=fluid.ParamAttr(
...@@ -698,8 +695,8 @@ class DecoderSubLayer(Layer): ...@@ -698,8 +695,8 @@ class DecoderSubLayer(Layer):
self._postprocess_cmd = postprocess_cmd self._postprocess_cmd = postprocess_cmd
self._preprocess_cmd = preprocess_cmd self._preprocess_cmd = preprocess_cmd
self._prepostprcess_dropout = prepostprocess_dropout self._prepostprcess_dropout = prepostprocess_dropout
self._pre_process_layer = PrePostProcessLayer(self.full_name(), self._pre_process_layer = PrePostProcessLayer(d_model, preprocess_cmd,
preprocess_cmd, 3) 3)
self._multihead_attention_layer = MultiHeadAttentionLayer( self._multihead_attention_layer = MultiHeadAttentionLayer(
self.full_name(), self.full_name(),
d_key, d_key,
...@@ -709,10 +706,10 @@ class DecoderSubLayer(Layer): ...@@ -709,10 +706,10 @@ class DecoderSubLayer(Layer):
attention_dropout, attention_dropout,
cache=cache, cache=cache,
gather_idx=gather_idx) gather_idx=gather_idx)
self._post_process_layer = PrePostProcessLayer(self.full_name(), self._post_process_layer = PrePostProcessLayer(d_model, postprocess_cmd,
postprocess_cmd, None) None)
self._pre_process_layer2 = PrePostProcessLayer(self.full_name(), self._pre_process_layer2 = PrePostProcessLayer(d_model, preprocess_cmd,
preprocess_cmd, 3) 3)
self._multihead_attention_layer2 = MultiHeadAttentionLayer( self._multihead_attention_layer2 = MultiHeadAttentionLayer(
self.full_name(), self.full_name(),
d_key, d_key,
...@@ -723,13 +720,13 @@ class DecoderSubLayer(Layer): ...@@ -723,13 +720,13 @@ class DecoderSubLayer(Layer):
cache=cache, cache=cache,
gather_idx=gather_idx, gather_idx=gather_idx,
static_kv=True) static_kv=True)
self._post_process_layer2 = PrePostProcessLayer(self.full_name(), self._post_process_layer2 = PrePostProcessLayer(d_model,
postprocess_cmd, None) postprocess_cmd, None)
self._pre_process_layer3 = PrePostProcessLayer(self.full_name(), self._pre_process_layer3 = PrePostProcessLayer(d_model, preprocess_cmd,
preprocess_cmd, 3) 3)
self._positionwise_feed_forward_layer = PositionwiseFeedForwardLayer( self._positionwise_feed_forward_layer = PositionwiseFeedForwardLayer(
self.full_name(), d_inner_hid, d_model, relu_dropout) self.full_name(), d_inner_hid, d_model, relu_dropout)
self._post_process_layer3 = PrePostProcessLayer(self.full_name(), self._post_process_layer3 = PrePostProcessLayer(d_model,
postprocess_cmd, None) postprocess_cmd, None)
def forward(self, dec_input, enc_output, slf_attn_bias, dec_enc_attn_bias): def forward(self, dec_input, enc_output, slf_attn_bias, dec_enc_attn_bias):
...@@ -775,8 +772,8 @@ class DecoderLayer(Layer): ...@@ -775,8 +772,8 @@ class DecoderLayer(Layer):
caches=None, caches=None,
gather_idx=None): gather_idx=None):
super(DecoderLayer, self).__init__(name_scope) super(DecoderLayer, self).__init__(name_scope)
self._pre_process_layer = PrePostProcessLayer(self.full_name(), self._pre_process_layer = PrePostProcessLayer(d_model, preprocess_cmd,
preprocess_cmd, 3) 3)
self._decoder_sub_layers = list() self._decoder_sub_layers = list()
self._n_layer = n_layer self._n_layer = n_layer
self._preprocess_cmd = preprocess_cmd self._preprocess_cmd = preprocess_cmd
......
...@@ -158,7 +158,6 @@ class PtbModel(fluid.Layer): ...@@ -158,7 +158,6 @@ class PtbModel(fluid.Layer):
init_scale=init_scale, init_scale=init_scale,
dropout=dropout) dropout=dropout)
self.embedding = Embedding( self.embedding = Embedding(
self.full_name(),
size=[vocab_size, hidden_size], size=[vocab_size, hidden_size],
dtype='float32', dtype='float32',
is_sparse=False, is_sparse=False,
......
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册