提交 f6144d84 编写于 作者: Y Youwei Song 提交者: hong

remove build_once & name_scope (#21131)

* remove build_once & name_scope (Conv2D)
test=develop

* fix unittest
test=develop

* Conv2DTranspose

* Conv3D & Conv3DTranspose
test=develop

* Pool2D & BatchNorm

* Embedding

* LayerNorm

* GRUUnit & NCE

* PRelu

* BilinearTensorProduct

* GroupNorm & SpectralNorm

* TreeConv
test=develop

* fix LayerNorm in transformer unnittest
test=develop

* disable LayerNorm or BatchNorm in multicard
test=develop

* refine Layer.create_parameter api
test=develop

* refine LayerNorm, remove begin_norm_axis param, add normed shape check
test=develop

* LayerNorm bug fix
test=develop
上级 0fe16539
......@@ -104,12 +104,12 @@ class BasicGRUUnit(Layer):
dtype=self._dtype)
self._gate_bias = self.create_parameter(
self._bias_attr,
attr=self._bias_attr,
shape=[2 * self._hiden_size],
dtype=self._dtype,
is_bias=True)
self._candidate_bias = self.create_parameter(
self._bias_attr,
attr=self._bias_attr,
shape=[self._hiden_size],
dtype=self._dtype,
is_bias=True)
......
......@@ -48,7 +48,7 @@ def save_dygraph(state_dict, model_path):
import paddle.fluid as fluid
with fluid.dygraph.guard():
emb = fluid.dygraph.Embedding( "emb", [10, 10])
emb = fluid.dygraph.Embedding([10, 10])
state_dict = emb.state_dict()
fluid.save_dygraph( state_dict, "paddle_dy")
......@@ -91,7 +91,7 @@ def load_dygraph(model_path):
import paddle.fluid as fluid
with fluid.dygraph.guard():
emb = fluid.dygraph.Embedding( "emb", [10, 10])
emb = fluid.dygraph.Embedding([10, 10])
state_dict = emb.state_dict()
fluid.save_dygraph( state_dict, "paddle_dy")
......
......@@ -33,10 +33,11 @@ class Layer(core.Layer):
"""Dynamic graph Layer based on OOD, includes the parameters of the layer, the structure of the forward graph and so on.
Parameters:
name_scope (str): prefix name used by the layer to name parameters.
If prefix is "my_model/layer_1", parameter name in MyLayer
can be "my_model/layer_1/MyLayer/w_n", where w is the parameter
name_scope (str, optional): prefix name used by the layer to name parameters.
If prefix is "my_layer", parameter name in MyLayer
can be "mylayer_0.w_n", where w is the parameter
base name and n is an unique suffix auto-generated.
If None, prefix name will be lower cased class name. Default: None.
dtype(str or core.VarDesc.VarType, optional): data type of this parameter.
If set str, it can be "bool", "float16", "float32", "float64",
"int8", "int16", "int32", "int64", "uint8" or "uint16".
......@@ -46,17 +47,22 @@ class Layer(core.Layer):
None
"""
def __init__(self, name_scope, dtype=core.VarDesc.VarType.FP32):
self._full_name = unique_name.generate(name_scope + "/" +
self.__class__.__name__)
def __init__(self, name_scope=None, dtype=core.VarDesc.VarType.FP32):
if name_scope is None:
name_scope = self.__class__.__name__.lower()
self._full_name = unique_name.generate(name_scope)
else:
# TODO: remove name_scope parameter and all hard-coded usages
self._full_name = unique_name.generate(name_scope + "/" +
self.__class__.__name__)
self._helper = LayerObjectHelper(self._full_name)
self._built = False
self._dtype = dtype
self._parameters = collections.OrderedDict()
self._sub_layers = collections.OrderedDict()
self._loaddict_holder = collections.OrderedDict()
self._helper = LayerObjectHelper(self._full_name)
def train(self):
framework._dygraph_tracer().train_mode()
......@@ -72,23 +78,23 @@ class Layer(core.Layer):
return self._full_name
def create_parameter(self,
attr,
shape,
dtype,
attr=None,
dtype='float32',
is_bias=False,
default_initializer=None):
"""Create parameters for this layer.
Parameters:
attr(ParamAttr): Parameter attribute of weight. Please refer to :ref:`api_fluid_ParamAttr`
shape(list): shape of the parameter
dtype(str or core.VarDesc.VarType): data type of this parameter.
shape(list): Shape of the parameter.
attr(ParamAttr, optional): Parameter attribute of weight. Please refer to :ref:`api_fluid_ParamAttr`. Default: None.
dtype(str or core.VarDesc.VarType or str, optional): Data type of this parameter.
If set str, it can be "bool", "float16", "float32", "float64",
"int8", "int16", "int32", "int64", "uint8" or "uint16".
is_bias(bool, optional): if this is a bias parameter. Default: False
"int8", "int16", "int32", "int64", "uint8" or "uint16". Default: "float32".
is_bias(bool, optional): if this is a bias parameter. Default: False.
default_initializer(Initializer, optional): the default initializer for this parameter.
If set None, default initializer will be set to :ref:`api_fluid_initializer_XavierInitializer` and :ref:`api_fluid_initializer_ConstantInitializer`
for non-bias and bias parameter, respectively. Default: None
for non-bias and bias parameter, respectively. Default: None.
Returns:
:ref:`api_guide_Variable_en` : created parameter.
......@@ -294,7 +300,7 @@ class Layer(core.Layer):
import paddle.fluid as fluid
with fluid.dygraph.guard():
emb = fluid.dygraph.Embedding( "emb", [10, 10])
emb = fluid.dygraph.Embedding([10, 10])
state_dict = emb.state_dict()
fluid.save_dygraph( state_dict, "paddle_dy")
......@@ -332,7 +338,7 @@ class Layer(core.Layer):
import paddle.fluid as fluid
with fluid.dygraph.guard():
emb = fluid.dygraph.Embedding( "emb", [10, 10])
emb = fluid.dygraph.Embedding([10, 10])
state_dict = emb.state_dict()
fluid.save_dygraph( state_dict, "paddle_dy")
......@@ -361,7 +367,7 @@ class Layer(core.Layer):
import paddle.fluid as fluid
with fluid.dygraph.guard():
emb = fluid.dygraph.Embedding( "emb", [10, 10])
emb = fluid.dygraph.Embedding([10, 10])
state_dict = emb.state_dict()
fluid.save_dygraph( state_dict, "paddle_dy")
......
此差异已折叠。
......@@ -34,7 +34,6 @@ from test_dist_base import runtime_main, TestParallelDyGraphRunnerBase
class SimpleImgConvPool(fluid.dygraph.Layer):
def __init__(self,
name_scope,
num_channels,
num_filters,
filter_size,
......@@ -51,10 +50,10 @@ class SimpleImgConvPool(fluid.dygraph.Layer):
use_cudnn=False,
param_attr=None,
bias_attr=None):
super(SimpleImgConvPool, self).__init__(name_scope)
super(SimpleImgConvPool, self).__init__()
self._conv2d = Conv2D(
self.full_name(),
num_channels=num_channels,
num_filters=num_filters,
filter_size=filter_size,
stride=conv_stride,
......@@ -66,7 +65,6 @@ class SimpleImgConvPool(fluid.dygraph.Layer):
use_cudnn=use_cudnn)
self._pool2d = Pool2D(
self.full_name(),
pool_size=pool_size,
pool_type=pool_type,
pool_stride=pool_stride,
......@@ -85,10 +83,10 @@ class MNIST(fluid.dygraph.Layer):
super(MNIST, self).__init__(name_scope)
self._simple_img_conv_pool_1 = SimpleImgConvPool(
self.full_name(), 1, 20, 5, 2, 2, act="relu")
1, 20, 5, 2, 2, act="relu")
self._simple_img_conv_pool_2 = SimpleImgConvPool(
self.full_name(), 20, 50, 5, 2, 2, act="relu")
20, 50, 5, 2, 2, act="relu")
pool_2_shape = 50 * 4 * 4
SIZE = 10
......
......@@ -27,7 +27,7 @@ import paddle.fluid as fluid
import paddle.fluid.dygraph as dygraph
from paddle.fluid import core
from paddle.fluid.optimizer import SGDOptimizer
from paddle.fluid.dygraph.nn import Conv2D, Pool2D, FC, LayerNorm
from paddle.fluid.dygraph.nn import Conv2D, Pool2D, FC, BatchNorm
from paddle.fluid.dygraph.base import to_variable
from paddle.fluid.layer_helper import LayerHelper
import math
......@@ -77,16 +77,16 @@ def optimizer_setting(params):
class ConvBNLayer(fluid.dygraph.Layer):
def __init__(self,
name_scope,
num_channels,
num_filters,
filter_size,
stride=1,
groups=1,
act=None):
super(ConvBNLayer, self).__init__(name_scope)
super(ConvBNLayer, self).__init__()
self._conv = Conv2D(
"conv2d",
num_channels=num_channels,
num_filters=num_filters,
filter_size=filter_size,
stride=stride,
......@@ -96,11 +96,12 @@ class ConvBNLayer(fluid.dygraph.Layer):
bias_attr=False,
param_attr=fluid.ParamAttr(name="weights"))
self._layer_norm = LayerNorm(self.full_name(), begin_norm_axis=1)
# disable BatchNorm in multi-card. disable LayerNorm because of complex input_shape
# self._batch_norm = BatchNorm(num_filters, act=act)
def forward(self, inputs):
y = self._conv(inputs)
y = self._layer_norm(y)
# y = self._batch_norm(y)
return y
......@@ -109,8 +110,7 @@ class SqueezeExcitation(fluid.dygraph.Layer):
def __init__(self, name_scope, num_channels, reduction_ratio):
super(SqueezeExcitation, self).__init__(name_scope)
self._pool = Pool2D(
self.full_name(), pool_size=0, pool_type='avg', global_pooling=True)
self._pool = Pool2D(pool_size=0, pool_type='avg', global_pooling=True)
stdv = 1.0 / math.sqrt(num_channels * 1.0)
self._squeeze = FC(
self.full_name(),
......@@ -136,29 +136,28 @@ class SqueezeExcitation(fluid.dygraph.Layer):
class BottleneckBlock(fluid.dygraph.Layer):
def __init__(self,
name_scope,
num_channels,
num_filters,
stride,
cardinality,
reduction_ratio,
shortcut=True):
super(BottleneckBlock, self).__init__(name_scope)
super(BottleneckBlock, self).__init__()
self.conv0 = ConvBNLayer(
self.full_name(),
num_channels=num_channels,
num_filters=num_filters,
filter_size=1,
act="relu")
self.conv1 = ConvBNLayer(
self.full_name(),
num_channels=num_filters,
num_filters=num_filters,
filter_size=3,
stride=stride,
groups=cardinality,
act="relu")
self.conv2 = ConvBNLayer(
self.full_name(),
num_channels=num_filters,
num_filters=num_filters * 2,
filter_size=1,
act=None)
......@@ -170,7 +169,7 @@ class BottleneckBlock(fluid.dygraph.Layer):
if not shortcut:
self.short = ConvBNLayer(
self.full_name(),
num_channels=num_channels,
num_filters=num_filters * 2,
filter_size=1,
stride=stride)
......@@ -209,63 +208,51 @@ class SeResNeXt(fluid.dygraph.Layer):
depth = [3, 4, 6, 3]
num_filters = [128, 256, 512, 1024]
self.conv0 = ConvBNLayer(
self.full_name(),
num_channels=3,
num_filters=64,
filter_size=7,
stride=2,
act='relu')
self.pool = Pool2D(
self.full_name(),
pool_size=3,
pool_stride=2,
pool_padding=1,
pool_type='max')
pool_size=3, pool_stride=2, pool_padding=1, pool_type='max')
elif layers == 101:
cardinality = 32
reduction_ratio = 16
depth = [3, 4, 23, 3]
num_filters = [128, 256, 512, 1024]
self.conv0 = ConvBNLayer(
self.full_name(),
num_channels=3,
num_filters=64,
filter_size=7,
stride=2,
act='relu')
self.pool = Pool2D(
self.full_name(),
pool_size=3,
pool_stride=2,
pool_padding=1,
pool_type='max')
pool_size=3, pool_stride=2, pool_padding=1, pool_type='max')
elif layers == 152:
cardinality = 64
reduction_ratio = 16
depth = [3, 8, 36, 3]
num_filters = [128, 256, 512, 1024]
self.conv0 = ConvBNLayer(
self.full_name(),
num_channels=3,
num_filters=64,
filter_size=3,
stride=2,
act='relu')
self.conv1 = ConvBNLayer(
self.full_name(),
num_channels=64,
num_filters=64,
filter_size=3,
stride=1,
act='relu')
self.conv2 = ConvBNLayer(
self.full_name(),
num_channels=64,
num_filters=128,
filter_size=3,
stride=1,
act='relu')
self.pool = Pool2D(
self.full_name(),
pool_size=3,
pool_stride=2,
pool_padding=1,
pool_type='max')
pool_size=3, pool_stride=2, pool_padding=1, pool_type='max')
self.bottleneck_block_list = []
num_channels = 64
......@@ -275,7 +262,6 @@ class SeResNeXt(fluid.dygraph.Layer):
bottleneck_block = self.add_sublayer(
'bb_%d_%d' % (block, i),
BottleneckBlock(
self.full_name(),
num_channels=num_channels,
num_filters=num_filters[block],
stride=2 if i == 0 and block != 0 else 1,
......@@ -287,7 +273,7 @@ class SeResNeXt(fluid.dygraph.Layer):
shortcut = True
self.pool2d_avg = Pool2D(
self.full_name(), pool_size=7, pool_type='avg', global_pooling=True)
pool_size=7, pool_type='avg', global_pooling=True)
stdv = 1.0 / math.sqrt(2048 * 1.0)
self.out = FC(self.full_name(),
......
......@@ -23,7 +23,7 @@ from paddle.fluid.dygraph.nn import Conv2D, Pool2D, FC
class SimpleImgConvPool(fluid.dygraph.Layer):
def __init__(self,
name_scope,
num_channels,
num_filters,
filter_size,
pool_size,
......@@ -40,10 +40,10 @@ class SimpleImgConvPool(fluid.dygraph.Layer):
dtype='float32',
param_attr=None,
bias_attr=None):
super(SimpleImgConvPool, self).__init__(name_scope)
super(SimpleImgConvPool, self).__init__()
self._conv2d = Conv2D(
self.full_name(),
num_channels=num_channels,
num_filters=num_filters,
filter_size=filter_size,
stride=conv_stride,
......@@ -57,7 +57,6 @@ class SimpleImgConvPool(fluid.dygraph.Layer):
act=act)
self._pool2d = Pool2D(
self.full_name(),
pool_size=pool_size,
pool_type=pool_type,
pool_stride=pool_stride,
......@@ -76,7 +75,7 @@ class MNIST(fluid.dygraph.Layer):
super(MNIST, self).__init__(name_scope)
self._simple_img_conv_pool_1 = SimpleImgConvPool(
self.full_name(),
num_channels=3,
num_filters=20,
filter_size=5,
pool_size=2,
......@@ -86,7 +85,7 @@ class MNIST(fluid.dygraph.Layer):
use_cudnn=True)
self._simple_img_conv_pool_2 = SimpleImgConvPool(
self.full_name(),
num_channels=20,
num_filters=50,
filter_size=5,
pool_size=2,
......
......@@ -30,7 +30,6 @@ from test_imperative_base import new_program_scope
class SimpleImgConvPool(fluid.dygraph.Layer):
def __init__(self,
name_scope,
num_channels,
num_filters,
filter_size,
......@@ -47,10 +46,10 @@ class SimpleImgConvPool(fluid.dygraph.Layer):
use_cudnn=False,
param_attr=None,
bias_attr=None):
super(SimpleImgConvPool, self).__init__(name_scope)
super(SimpleImgConvPool, self).__init__()
self._conv2d = Conv2D(
self.full_name(),
num_channels=num_channels,
num_filters=num_filters,
filter_size=filter_size,
stride=conv_stride,
......@@ -62,7 +61,6 @@ class SimpleImgConvPool(fluid.dygraph.Layer):
use_cudnn=use_cudnn)
self._pool2d = Pool2D(
self.full_name(),
pool_size=pool_size,
pool_type=pool_type,
pool_stride=pool_stride,
......@@ -81,10 +79,10 @@ class MNIST(fluid.dygraph.Layer):
super(MNIST, self).__init__(name_scope)
self._simple_img_conv_pool_1 = SimpleImgConvPool(
self.full_name(), 1, 20, 5, 2, 2, act="relu")
1, 20, 5, 2, 2, act="relu")
self._simple_img_conv_pool_2 = SimpleImgConvPool(
self.full_name(), 20, 50, 5, 2, 2, act="relu")
20, 50, 5, 2, 2, act="relu")
pool_2_shape = 50 * 4 * 4
SIZE = 10
......
......@@ -100,8 +100,8 @@ class AutoPruneLayer3(fluid.Layer):
class MyLayer(fluid.Layer):
def __init__(self, name_scope, vocab_size, size, dtype="float32"):
super(MyLayer, self).__init__(name_scope, dtype)
self.embed0 = fluid.Embedding(self.full_name(), size=(vocab_size, size))
self.embed1 = fluid.Embedding(self.full_name(), size=(vocab_size, size))
self.embed0 = fluid.Embedding(size=(vocab_size, size))
self.embed1 = fluid.Embedding(size=(vocab_size, size))
self.fc0 = fluid.FC(self.full_name(), size=size, dtype=dtype)
self.fc1 = fluid.FC(self.full_name(), size=size, dtype=dtype)
......@@ -122,8 +122,8 @@ class MyLayer(fluid.Layer):
class MyLayer2(fluid.Layer):
def __init__(self, name_scope, vocab_size, size, dtype="float32"):
super(MyLayer2, self).__init__(name_scope, dtype)
self.embed0 = fluid.Embedding(self.full_name(), size=(vocab_size, size))
self.embed1 = fluid.Embedding(self.full_name(), size=(vocab_size, size))
self.embed0 = fluid.Embedding(size=(vocab_size, size))
self.embed1 = fluid.Embedding(size=(vocab_size, size))
self.fc0 = fluid.FC(self.full_name(), size=size, dtype=dtype)
self.fc1 = fluid.FC(self.full_name(), size=size, dtype=dtype)
......
......@@ -90,9 +90,9 @@ class DeepCF(fluid.Layer):
self._num_users = num_users
self._num_items = num_items
self._rating_matrix = self.create_parameter(
fluid.ParamAttr(trainable=False),
matrix.shape,
matrix.dtype,
attr=fluid.ParamAttr(trainable=False),
shape=matrix.shape,
dtype=matrix.dtype,
is_bias=False,
default_initializer=fluid.initializer.NumpyArrayInitializer(matrix))
self._rating_matrix.stop_gradient = True
......
......@@ -32,7 +32,7 @@ from paddle.fluid.dygraph.jit import TracedLayer
class SimpleImgConvPool(fluid.dygraph.Layer):
def __init__(self,
name_scope,
num_channels,
num_filters,
filter_size,
pool_size,
......@@ -48,10 +48,10 @@ class SimpleImgConvPool(fluid.dygraph.Layer):
use_cudnn=False,
param_attr=None,
bias_attr=None):
super(SimpleImgConvPool, self).__init__(name_scope)
super(SimpleImgConvPool, self).__init__()
self._conv2d = Conv2D(
self.full_name(),
num_channels=num_channels,
num_filters=num_filters,
filter_size=filter_size,
stride=conv_stride,
......@@ -63,7 +63,6 @@ class SimpleImgConvPool(fluid.dygraph.Layer):
use_cudnn=use_cudnn)
self._pool2d = Pool2D(
self.full_name(),
pool_size=pool_size,
pool_type=pool_type,
pool_stride=pool_stride,
......@@ -82,10 +81,10 @@ class MNIST(fluid.dygraph.Layer):
super(MNIST, self).__init__(name_scope)
self._simple_img_conv_pool_1 = SimpleImgConvPool(
self.full_name(), 20, 5, 2, 2, act="relu")
1, 20, 5, 2, 2, act="relu")
self._simple_img_conv_pool_2 = SimpleImgConvPool(
self.full_name(), 50, 5, 2, 2, act="relu")
20, 50, 5, 2, 2, act="relu")
pool_2_shape = 50 * 4 * 4
SIZE = 10
......
......@@ -57,7 +57,6 @@ class Config(object):
class ConvBNPool(fluid.dygraph.Layer):
def __init__(self,
name_scope,
group,
out_ch,
channels,
......@@ -65,7 +64,7 @@ class ConvBNPool(fluid.dygraph.Layer):
is_test=False,
pool=True,
use_cudnn=True):
super(ConvBNPool, self).__init__(name_scope)
super(ConvBNPool, self).__init__()
self.group = group
self.pool = pool
......@@ -79,7 +78,7 @@ class ConvBNPool(fluid.dygraph.Layer):
initializer=fluid.initializer.Normal(0.0, conv_std_1))
self.conv_0_layer = Conv2D(
self.full_name(),
channels[0],
out_ch[0],
3,
padding=1,
......@@ -87,10 +86,9 @@ class ConvBNPool(fluid.dygraph.Layer):
bias_attr=False,
act=None,
use_cudnn=use_cudnn)
self.bn_0_layer = BatchNorm(
self.full_name(), out_ch[0], act=act, is_test=is_test)
self.bn_0_layer = BatchNorm(out_ch[0], act=act, is_test=is_test)
self.conv_1_layer = Conv2D(
self.full_name(),
out_ch[0],
num_filters=out_ch[1],
filter_size=3,
padding=1,
......@@ -98,12 +96,10 @@ class ConvBNPool(fluid.dygraph.Layer):
bias_attr=False,
act=None,
use_cudnn=use_cudnn)
self.bn_1_layer = BatchNorm(
self.full_name(), out_ch[1], act=act, is_test=is_test)
self.bn_1_layer = BatchNorm(out_ch[1], act=act, is_test=is_test)
if self.pool:
self.pool_layer = Pool2D(
self.full_name(),
pool_size=2,
pool_type='max',
pool_stride=2,
......@@ -125,22 +121,12 @@ class OCRConv(fluid.dygraph.Layer):
def __init__(self, name_scope, is_test=False, use_cudnn=True):
super(OCRConv, self).__init__(name_scope)
self.conv_bn_pool_1 = ConvBNPool(
self.full_name(),
2, [16, 16], [1, 16],
is_test=is_test,
use_cudnn=use_cudnn)
2, [16, 16], [1, 16], is_test=is_test, use_cudnn=use_cudnn)
self.conv_bn_pool_2 = ConvBNPool(
self.full_name(),
2, [32, 32], [16, 32],
is_test=is_test,
use_cudnn=use_cudnn)
2, [32, 32], [16, 32], is_test=is_test, use_cudnn=use_cudnn)
self.conv_bn_pool_3 = ConvBNPool(
self.full_name(),
2, [64, 64], [32, 64],
is_test=is_test,
use_cudnn=use_cudnn)
2, [64, 64], [32, 64], is_test=is_test, use_cudnn=use_cudnn)
self.conv_bn_pool_4 = ConvBNPool(
self.full_name(),
2, [128, 128], [64, 128],
is_test=is_test,
pool=False,
......@@ -169,7 +155,6 @@ class DynamicGRU(fluid.dygraph.Layer):
super(DynamicGRU, self).__init__(scope_name)
self.gru_unit = GRUUnit(
self.full_name(),
size * 3,
param_attr=param_attr,
bias_attr=bias_attr,
......@@ -337,10 +322,7 @@ class GRUDecoderWithAttention(fluid.dygraph.Layer):
size=decoder_size * 3,
bias_attr=False)
self.gru_unit = GRUUnit(
self.full_name(),
size=decoder_size * 3,
param_attr=None,
bias_attr=None)
size=decoder_size * 3, param_attr=None, bias_attr=None)
self.out_layer = FC(self.full_name(),
size=num_classes + 2,
bias_attr=None,
......@@ -383,8 +365,7 @@ class OCRAttention(fluid.dygraph.Layer):
bias_attr=False,
act='relu')
self.embedding = Embedding(
self.full_name(), [Config.num_classes + 2, Config.word_vector_dim],
dtype='float32')
[Config.num_classes + 2, Config.word_vector_dim], dtype='float32')
self.gru_decoder_with_attention = GRUDecoderWithAttention(
self.full_name(), Config.decoder_size, Config.num_classes)
......
......@@ -158,7 +158,6 @@ class PtbModel(fluid.Layer):
init_scale=init_scale,
dropout=dropout)
self.embedding = Embedding(
self.full_name(),
size=[vocab_size, hidden_size],
dtype='float32',
is_sparse=is_sparse,
......
......@@ -72,16 +72,16 @@ def optimizer_setting(params):
class ConvBNLayer(fluid.Layer):
def __init__(self,
name_scope,
num_channels,
num_filters,
filter_size,
stride=1,
groups=1,
act=None):
super(ConvBNLayer, self).__init__(name_scope)
super(ConvBNLayer, self).__init__()
self._conv = Conv2D(
self.full_name(),
num_channels=num_channels,
num_filters=num_filters,
filter_size=filter_size,
stride=stride,
......@@ -91,7 +91,7 @@ class ConvBNLayer(fluid.Layer):
bias_attr=None,
use_cudnn=False)
self._batch_norm = BatchNorm(self.full_name(), num_filters, act=act)
self._batch_norm = BatchNorm(num_filters, act=act)
def forward(self, inputs):
y = self._conv(inputs)
......@@ -101,29 +101,29 @@ class ConvBNLayer(fluid.Layer):
class BottleneckBlock(fluid.Layer):
def __init__(self, name_scope, num_filters, stride, shortcut=True):
super(BottleneckBlock, self).__init__(name_scope)
def __init__(self, num_channels, num_filters, stride, shortcut=True):
super(BottleneckBlock, self).__init__()
self.conv0 = ConvBNLayer(
self.full_name(),
num_channels=num_channels,
num_filters=num_filters,
filter_size=1,
act='relu')
self.conv1 = ConvBNLayer(
self.full_name(),
num_channels=num_filters,
num_filters=num_filters,
filter_size=3,
stride=stride,
act='relu')
self.conv2 = ConvBNLayer(
self.full_name(),
num_channels=num_filters,
num_filters=num_filters * 4,
filter_size=1,
act=None)
if not shortcut:
self.short = ConvBNLayer(
self.full_name(),
num_channels=num_channels,
num_filters=num_filters * 4,
filter_size=1,
stride=stride)
......@@ -161,20 +161,13 @@ class ResNet(fluid.Layer):
depth = [3, 4, 23, 3]
elif layers == 152:
depth = [3, 8, 36, 3]
num_channels = [64, 256, 512, 1024]
num_filters = [64, 128, 256, 512]
self.conv = ConvBNLayer(
self.full_name(),
num_filters=64,
filter_size=7,
stride=2,
act='relu')
num_channels=3, num_filters=64, filter_size=7, stride=2, act='relu')
self.pool2d_max = Pool2D(
self.full_name(),
pool_size=3,
pool_stride=2,
pool_padding=1,
pool_type='max')
pool_size=3, pool_stride=2, pool_padding=1, pool_type='max')
self.bottleneck_block_list = []
for block in range(len(depth)):
......@@ -183,7 +176,8 @@ class ResNet(fluid.Layer):
bottleneck_block = self.add_sublayer(
'bb_%d_%d' % (block, i),
BottleneckBlock(
self.full_name(),
num_channels=num_channels[block]
if i == 0 else num_filters[block] * 4,
num_filters=num_filters[block],
stride=2 if i == 0 and block != 0 else 1,
shortcut=shortcut))
......@@ -191,7 +185,7 @@ class ResNet(fluid.Layer):
shortcut = True
self.pool2d_avg = Pool2D(
self.full_name(), pool_size=7, pool_type='avg', global_pooling=True)
pool_size=7, pool_type='avg', global_pooling=True)
import math
stdv = 1.0 / math.sqrt(2048 * 1.0)
......
......@@ -156,7 +156,6 @@ class PtbModel(fluid.Layer):
init_scale=init_scale,
dropout=dropout)
self.embedding = Embedding(
self.full_name(),
size=[vocab_size, hidden_size],
dtype='float32',
is_sparse=False,
......@@ -882,7 +881,7 @@ class TestDygraphPtbRnn(unittest.TestCase):
def testOnlyLoadParams(self):
with fluid.dygraph.guard():
emb = fluid.dygraph.Embedding("emb", [10, 10])
emb = fluid.dygraph.Embedding([10, 10])
state_dict = emb.state_dict()
fluid.save_dygraph(state_dict, "emb_dy")
......
......@@ -63,16 +63,16 @@ def optimizer_setting(params):
class ConvBNLayer(fluid.dygraph.Layer):
def __init__(self,
name_scope,
num_channels,
num_filters,
filter_size,
stride=1,
groups=1,
act=None):
super(ConvBNLayer, self).__init__(name_scope)
super(ConvBNLayer, self).__init__()
self._conv = Conv2D(
self.full_name(),
num_channels=num_channels,
num_filters=num_filters,
filter_size=filter_size,
stride=stride,
......@@ -81,7 +81,7 @@ class ConvBNLayer(fluid.dygraph.Layer):
act=None,
bias_attr=None)
self._batch_norm = BatchNorm(self.full_name(), num_filters, act=act)
self._batch_norm = BatchNorm(num_filters, act=act)
def forward(self, inputs):
y = self._conv(inputs)
......@@ -94,8 +94,7 @@ class SqueezeExcitation(fluid.dygraph.Layer):
def __init__(self, name_scope, num_channels, reduction_ratio):
super(SqueezeExcitation, self).__init__(name_scope)
self._pool = Pool2D(
self.full_name(), pool_size=0, pool_type='avg', global_pooling=True)
self._pool = Pool2D(pool_size=0, pool_type='avg', global_pooling=True)
self._squeeze = FC(
self.full_name(),
size=num_channels // reduction_ratio,
......@@ -119,25 +118,24 @@ class SqueezeExcitation(fluid.dygraph.Layer):
class BottleneckBlock(fluid.dygraph.Layer):
def __init__(self,
name_scope,
num_channels,
num_filters,
stride,
cardinality,
reduction_ratio,
shortcut=True):
super(BottleneckBlock, self).__init__(name_scope)
super(BottleneckBlock, self).__init__()
self.conv0 = ConvBNLayer(
self.full_name(), num_filters=num_filters, filter_size=1)
num_channels=num_channels, num_filters=num_filters, filter_size=1)
self.conv1 = ConvBNLayer(
self.full_name(),
num_channels=num_filters,
num_filters=num_filters,
filter_size=3,
stride=stride,
groups=cardinality)
self.conv2 = ConvBNLayer(
self.full_name(),
num_channels=num_filters,
num_filters=num_filters * 4,
filter_size=1,
act='relu')
......@@ -149,7 +147,7 @@ class BottleneckBlock(fluid.dygraph.Layer):
if not shortcut:
self.short = ConvBNLayer(
self.full_name(),
num_channels=num_channels,
num_filters=num_filters * 4,
filter_size=1,
stride=stride)
......@@ -191,63 +189,51 @@ class SeResNeXt(fluid.dygraph.Layer):
depth = [3, 4, 6, 3]
num_filters = [128, 256, 512, 1024]
self.conv0 = ConvBNLayer(
self.full_name(),
num_channels=3,
num_filters=64,
filter_size=7,
stride=2,
act='relu')
self.pool = Pool2D(
self.full_name(),
pool_size=3,
pool_stride=2,
pool_padding=1,
pool_type='max')
pool_size=3, pool_stride=2, pool_padding=1, pool_type='max')
elif layers == 101:
cardinality = 32
reduction_ratio = 16
depth = [3, 4, 23, 3]
num_filters = [128, 256, 512, 1024]
self.conv0 = ConvBNLayer(
self.full_name(),
num_channels=3,
num_filters=3,
filter_size=7,
stride=2,
act='relu')
self.pool = Pool2D(
self.full_name(),
pool_size=3,
pool_stride=2,
pool_padding=1,
pool_type='max')
pool_size=3, pool_stride=2, pool_padding=1, pool_type='max')
elif layers == 152:
cardinality = 64
reduction_ratio = 16
depth = [3, 8, 36, 3]
num_filters = [128, 256, 512, 1024]
self.conv0 = ConvBNLayer(
self.full_name(),
num_channels=3,
num_filters=3,
filter_size=7,
stride=2,
act='relu')
self.conv1 = ConvBNLayer(
self.full_name(),
num_channels=3,
num_filters=3,
filter_size=7,
stride=2,
act='relu')
self.conv2 = ConvBNLayer(
self.full_name(),
num_channels=7,
num_filters=3,
filter_size=7,
stride=2,
act='relu')
self.pool = Pool2D(
self.full_name(),
pool_size=3,
pool_stride=2,
pool_padding=1,
pool_type='max')
pool_size=3, pool_stride=2, pool_padding=1, pool_type='max')
self.bottleneck_block_list = []
num_channels = 64
......@@ -257,7 +243,6 @@ class SeResNeXt(fluid.dygraph.Layer):
bottleneck_block = self.add_sublayer(
'bb_%d_%d' % (block, i),
BottleneckBlock(
self.full_name(),
num_channels=num_channels,
num_filters=num_filters[block],
stride=2 if i == 0 and block != 0 else 1,
......@@ -269,7 +254,7 @@ class SeResNeXt(fluid.dygraph.Layer):
shortcut = True
self.pool2d_avg = Pool2D(
self.full_name(), pool_size=7, pool_type='avg', global_pooling=True)
pool_size=7, pool_type='avg', global_pooling=True)
import math
stdv = 1.0 / math.sqrt(2048 * 1.0)
......
......@@ -350,13 +350,12 @@ pos_inp2 = position_encoding_init(ModelHyperParams.max_length,
class PrePostProcessLayer(Layer):
def __init__(self, name_scope, process_cmd, shape_len=None):
super(PrePostProcessLayer, self).__init__(name_scope)
def __init__(self, d_model, process_cmd, shape_len=None):
super(PrePostProcessLayer, self).__init__()
for cmd in process_cmd:
if cmd == "n":
self._layer_norm = LayerNorm(
name_scope=self.full_name(),
begin_norm_axis=shape_len - 1,
normalized_shape=d_model,
param_attr=fluid.ParamAttr(
initializer=fluid.initializer.Constant(1.)),
bias_attr=fluid.ParamAttr(
......@@ -508,19 +507,19 @@ class EncoderSubLayer(Layer):
self._postprocess_cmd = postprocess_cmd
self._prepostprocess_dropout = prepostprocess_dropout
self._preprocess_layer = PrePostProcessLayer(self.full_name(),
self._preprocess_layer = PrePostProcessLayer(d_model,
self._preprocess_cmd, 3)
self._multihead_attention_layer = MultiHeadAttentionLayer(
self.full_name(), d_key, d_value, d_model, n_head,
attention_dropout)
self._postprocess_layer = PrePostProcessLayer(
self.full_name(), self._postprocess_cmd, None)
self._preprocess_layer2 = PrePostProcessLayer(self.full_name(),
d_model, self._postprocess_cmd, None)
self._preprocess_layer2 = PrePostProcessLayer(d_model,
self._preprocess_cmd, 3)
self._positionwise_feed_forward = PositionwiseFeedForwardLayer(
self.full_name(), d_inner_hid, d_model, relu_dropout)
self._postprocess_layer2 = PrePostProcessLayer(
self.full_name(), self._postprocess_cmd, None)
d_model, self._postprocess_cmd, None)
def forward(self, enc_input, attn_bias):
pre_process_multihead = self._preprocess_layer(
......@@ -559,7 +558,7 @@ class EncoderLayer(Layer):
self._encoder_sublayers = list()
self._prepostprocess_dropout = prepostprocess_dropout
self._n_layer = n_layer
self._preprocess_layer = PrePostProcessLayer(self.full_name(),
self._preprocess_layer = PrePostProcessLayer(d_model,
self._preprocess_cmd, 3)
for i in range(n_layer):
self._encoder_sublayers.append(
......@@ -595,7 +594,6 @@ class PrepareEncoderDecoderLayer(Layer):
self._src_vocab_size = src_vocab_size
self._dropout_rate = dropout_rate
self._input_emb = Embedding(
name_scope=self.full_name(),
size=[src_vocab_size, src_emb_dim],
is_sparse=is_sparse,
padding_idx=0,
......@@ -608,7 +606,6 @@ class PrepareEncoderDecoderLayer(Layer):
else:
pos_inp = pos_inp2
self._pos_emb = Embedding(
name_scope=self.full_name(),
size=[self._src_max_len, src_emb_dim],
is_sparse=is_sparse,
param_attr=fluid.ParamAttr(
......@@ -698,8 +695,8 @@ class DecoderSubLayer(Layer):
self._postprocess_cmd = postprocess_cmd
self._preprocess_cmd = preprocess_cmd
self._prepostprcess_dropout = prepostprocess_dropout
self._pre_process_layer = PrePostProcessLayer(self.full_name(),
preprocess_cmd, 3)
self._pre_process_layer = PrePostProcessLayer(d_model, preprocess_cmd,
3)
self._multihead_attention_layer = MultiHeadAttentionLayer(
self.full_name(),
d_key,
......@@ -709,10 +706,10 @@ class DecoderSubLayer(Layer):
attention_dropout,
cache=cache,
gather_idx=gather_idx)
self._post_process_layer = PrePostProcessLayer(self.full_name(),
postprocess_cmd, None)
self._pre_process_layer2 = PrePostProcessLayer(self.full_name(),
preprocess_cmd, 3)
self._post_process_layer = PrePostProcessLayer(d_model, postprocess_cmd,
None)
self._pre_process_layer2 = PrePostProcessLayer(d_model, preprocess_cmd,
3)
self._multihead_attention_layer2 = MultiHeadAttentionLayer(
self.full_name(),
d_key,
......@@ -723,13 +720,13 @@ class DecoderSubLayer(Layer):
cache=cache,
gather_idx=gather_idx,
static_kv=True)
self._post_process_layer2 = PrePostProcessLayer(self.full_name(),
self._post_process_layer2 = PrePostProcessLayer(d_model,
postprocess_cmd, None)
self._pre_process_layer3 = PrePostProcessLayer(self.full_name(),
preprocess_cmd, 3)
self._pre_process_layer3 = PrePostProcessLayer(d_model, preprocess_cmd,
3)
self._positionwise_feed_forward_layer = PositionwiseFeedForwardLayer(
self.full_name(), d_inner_hid, d_model, relu_dropout)
self._post_process_layer3 = PrePostProcessLayer(self.full_name(),
self._post_process_layer3 = PrePostProcessLayer(d_model,
postprocess_cmd, None)
def forward(self, dec_input, enc_output, slf_attn_bias, dec_enc_attn_bias):
......@@ -775,8 +772,8 @@ class DecoderLayer(Layer):
caches=None,
gather_idx=None):
super(DecoderLayer, self).__init__(name_scope)
self._pre_process_layer = PrePostProcessLayer(self.full_name(),
preprocess_cmd, 3)
self._pre_process_layer = PrePostProcessLayer(d_model, preprocess_cmd,
3)
self._decoder_sub_layers = list()
self._n_layer = n_layer
self._preprocess_cmd = preprocess_cmd
......
......@@ -158,7 +158,6 @@ class PtbModel(fluid.Layer):
init_scale=init_scale,
dropout=dropout)
self.embedding = Embedding(
self.full_name(),
size=[vocab_size, hidden_size],
dtype='float32',
is_sparse=False,
......
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册