提交 ab19d92e 编写于 作者: J Jiabin Yang 提交者: ceci3

test=develop, reconstruct layer helper to fit imperative usage (#15938)

* test=develop, reconstruct layer helper to fit imperative usage

* test=develop, fix import error on py35

* test=develop, fix rnn gradient error

* test=develop, delete test use code

* test=develop, remove helper from imperative usage

* test=develop, fix test_base_layer using new helper

* test=develop, reconstruct layerhelper for imperative mode

* test=develop, reconstruct layerhelper for imperative mode

* test=develop, fix bug

* test=develop, fix test failed bug

* test=develop, fix test failed bug

* test=develop, fix test failed bug

* test=develop, fix bug

* test=develop, polish code
上级 eb367f99
# Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
from __future__ import print_function
import copy
import six
from ..framework import Parameter, _in_imperative_mode
from ..param_attr import ParamAttr
from .. import core
from six.moves import zip
from ..layer_helper_base import LayerHelperBase
class LayerObjectHelper(LayerHelperBase):
def __init__(self, name):
super(LayerObjectHelper, self).__init__(name, layer_type=name)
def append_op(self,
type=None,
inputs=None,
outputs=None,
attrs=None,
stop_gradient=None):
"""append an operator for this layer object.
Args:
type: operator type
inputs: input variable of the operator
dtype: data type of this parameter
is_bias: if this is a bias parameter
default_initializer: set the default initializer for this parameter
Returns created parameter Variable.
"""
return self.main_program.current_block().append_op(
type=type,
inputs=inputs,
outputs=outputs,
attrs=attrs,
stop_gradient=stop_gradient)
def _multiple_input(self, inputs_in):
inputs = inputs_in
ret = []
if isinstance(inputs, (list, tuple)):
for inp in inputs:
ret.append(self.to_variable(inp))
else:
ret.append(self.to_variable(inputs))
return ret
# TODO: make it public when we need it
def _input(self, inputs_in):
inputs = self._multiple_input(inputs_in)
if len(inputs) != 1:
raise "{0} layer only takes one input".format(self.layer_type)
return inputs[0]
def _multiple_param_attr(self, length, param_attr_in=None):
param_attr = param_attr_in
if isinstance(param_attr, ParamAttr):
param_attr = [param_attr]
if len(param_attr) != 1 and len(param_attr) != length:
raise ValueError("parameter number mismatch")
elif len(param_attr) == 1 and length != 1:
tmp = [None] * length
for i in six.moves.range(length):
tmp[i] = copy.deepcopy(param_attr[0])
param_attr = tmp
return param_attr
def iter_inputs_and_params(self, inputs_in, param_attr_in=None):
"""Access all inputs and params one by one
Args:
inputs_in: inputs to be iter
param_attr_in: param_attr to be iter
Returns input, param_attr
"""
inputs = inputs_in if (inputs_in is not None) else []
inputs = self._multiple_input(inputs)
param_attrs = self._multiple_param_attr(len(inputs), param_attr_in)
for ipt, param_attr in zip(inputs, param_attrs):
yield ipt, param_attr
def input_dtype(self, inputs_in):
"""Get input data type
Args:
inputs_in: inputs wanted know the data type
Returns dtype of the input
"""
inputs = self._multiple_input(inputs_in)
dtype = None
for each in inputs:
if dtype is None:
dtype = each.dtype
elif dtype != each.dtype:
raise ValueError("Data Type mismatch: %d to %d" %
(dtype, each.dtype))
return dtype
def get_parameter(self, name):
"""Get parameter specifically
Args:
name: parameter's name
Returns target parameter
"""
param = self.main_program.global_block().var(name)
if not isinstance(param, Parameter):
raise ValueError("no Parameter name %s found" % name)
return param
def append_bias_op(self,
input_var,
dim_start=1,
dim_end=None,
bias_attr=None):
"""Append bias operator and return its output. If the user does not set bias_attr, append_bias_op will return input_var
Args:
input_var: the input variable. The len(input_var.shape) is
larger or equal than 2.
dim_start:
dim_end: the shape of the bias will be
bias_attr: the bias_attr of it
Return the Variable of after append bias op
"""
size = list(input_var.shape[dim_start:dim_end])
bias_attr = bias_attr
if not bias_attr:
return input_var
b = self.create_parameter(
attr=bias_attr, shape=size, dtype=input_var.dtype, is_bias=True)
tmp = self.create_variable_for_type_inference(dtype=input_var.dtype)
self.append_op(
type='elementwise_add',
inputs={'X': [input_var],
'Y': [b]},
outputs={'Out': [tmp]},
attrs={'axis': dim_start})
return tmp
# TODO: this should not be called anymore after all activation func move to Layers
def append_activation(self,
input_var,
act=None,
use_cudnn=None,
use_mkl_dnn=None):
"""Append activation
Args:
input_var: the input variable. The len(input_var.shape) is
larger or equal than 2.
act: activation type
use_mkl_dnn: if use mkldnn
use_cudnn: if use cudnn
Return the Variable of after append activation
"""
act = act
if act is None:
return input_var
if isinstance(act, six.string_types):
act = {'type': act}
else:
raise TypeError(str(act) + " should be unicode or str")
if (use_cudnn is not None) and use_cudnn:
act['use_cudnn'] = use_cudnn
if (use_mkl_dnn is not None) and use_mkl_dnn:
act['use_mkldnn'] = use_mkl_dnn
act_type = act.pop('type')
tmp = input_var
# NOTE(dzhwinter): some activation support inplace compution.
# NOTE(minqiyang): currently, we don't support inplace in imperative mode
if not _in_imperative_mode() and core.IsInplace(act_type):
tmp = input_var
else:
tmp = self.create_variable_for_type_inference(dtype=input_var.dtype)
self.append_op(
type=act_type,
inputs={"X": [input_var]},
outputs={"Out": [tmp]},
attrs=act)
return tmp
def is_instance(self, param, cls):
"""Check if the input parameter is instance of input class
Args:
param: parameter to be check
cls: class of the parameter
Return result of the check (True or False)
"""
param = param
if not isinstance(param, cls):
raise TypeError("The input {0} parameter of method {1} must be {2}",
param, self.layer_type, cls.__name__)
...@@ -19,8 +19,8 @@ import numpy as np ...@@ -19,8 +19,8 @@ import numpy as np
import collections import collections
from .. import unique_name from .. import unique_name
from paddle.fluid import core from paddle.fluid import core
from .layer_object_helper import LayerObjectHelper
from paddle.fluid import framework from paddle.fluid import framework
from paddle.fluid.imperative import base
__all__ = ['Layer', 'PyLayer'] __all__ = ['Layer', 'PyLayer']
...@@ -44,6 +44,8 @@ class Layer(core.Layer): ...@@ -44,6 +44,8 @@ class Layer(core.Layer):
self._parameters = collections.OrderedDict() self._parameters = collections.OrderedDict()
self._sub_layers = collections.OrderedDict() self._sub_layers = collections.OrderedDict()
self._helper = LayerObjectHelper(self._full_name)
def full_name(self): def full_name(self):
"""Full name for this layers. """Full name for this layers.
...@@ -53,6 +55,51 @@ class Layer(core.Layer): ...@@ -53,6 +55,51 @@ class Layer(core.Layer):
""" """
return self._full_name return self._full_name
def create_parameter(self,
attr,
shape,
dtype,
is_bias=False,
default_initializer=None):
"""Create parameters for this layers.
Args:
attr: [ParamAttr] should be the parameter attribute for this parameter
shape: shape of the paramter
dtype: data type of this parameter
is_bias: if this is a bias parameter
default_initializer: set the default initializer for this parameter
Returns created parameter Variable.
"""
return self._helper.create_parameter(attr, shape, dtype, is_bias,
default_initializer)
# TODO: Add more parameter list when we need them
def create_variable(self,
name=None,
persistable=None,
dtype=None,
type=core.VarDesc.VarType.LOD_TENSOR):
"""Create Variable for this layers.
Args:
name: name of the variable
persistable: if set this variable persistable
dtype: data type of data in the variable
type: type of the variable
Returns created Variable.
"""
if name is not None:
var_name = ".".join([self._full_name, name])
else:
var_name = unique_name.generate(".".join(
[self._full_name, "_generated_var"]))
return self._helper.main_program.current_block().create_var(
name=var_name, persistable=persistable, dtype=dtype, type=type)
def parameters(self, include_sublayers=True): def parameters(self, include_sublayers=True):
"""Returns a list of Parameters from current and sub-layers. """Returns a list of Parameters from current and sub-layers.
......
...@@ -41,21 +41,12 @@ class Conv2D(layers.Layer): ...@@ -41,21 +41,12 @@ class Conv2D(layers.Layer):
bias_attr=None, bias_attr=None,
dtype=core.VarDesc.VarType.FP32): dtype=core.VarDesc.VarType.FP32):
assert param_attr is not False, "param_attr should not be False here." assert param_attr is not False, "param_attr should not be False here."
super(Conv2D, self).__init__(name_scope, dtype=dtype) super(Conv2D, self).__init__(name_scope)
# TODO(minqiyang): Move this to the top.
from ..layer_helper import LayerHelper
self._helper = LayerHelper(
self.full_name(),
param_attr=param_attr,
bias_attr=bias_attr,
dtype=dtype,
act=act)
self._groups = groups self._groups = groups
self._stride = utils.convert_to_list(stride, 2, 'stride') self._stride = utils.convert_to_list(stride, 2, 'stride')
self._padding = utils.convert_to_list(padding, 2, 'padding') self._padding = utils.convert_to_list(padding, 2, 'padding')
self._dilation = utils.convert_to_list(dilation, 2, 'dilation') self._dilation = utils.convert_to_list(dilation, 2, 'dilation')
self._act = act
if not isinstance(use_cudnn, bool): if not isinstance(use_cudnn, bool):
raise ValueError("use_cudnn should be True or False") raise ValueError("use_cudnn should be True or False")
self._use_cudnn = use_cudnn self._use_cudnn = use_cudnn
...@@ -80,28 +71,28 @@ class Conv2D(layers.Layer): ...@@ -80,28 +71,28 @@ class Conv2D(layers.Layer):
std = (2.0 / filter_elem_num)**0.5 std = (2.0 / filter_elem_num)**0.5
return Normal(0.0, std, 0) return Normal(0.0, std, 0)
self._filter_param = self._helper.create_parameter( self._filter_param = self.create_parameter(
attr=self._helper.param_attr, attr=param_attr,
shape=filter_shape, shape=filter_shape,
dtype=self._dtype, dtype=self._dtype,
default_initializer=_get_default_param_initializer()) default_initializer=_get_default_param_initializer())
if self._use_cudnn: if self._use_cudnn:
self._helper.create_variable( self.create_variable(
name="kCUDNNFwdAlgoCache", name="kCUDNNFwdAlgoCache",
persistable=True, persistable=True,
type=core.VarDesc.VarType.RAW) type=core.VarDesc.VarType.RAW)
self._helper.create_variable( self.create_variable(
name="kCUDNNBwdDataAlgoCache", name="kCUDNNBwdDataAlgoCache",
persistable=True, persistable=True,
type=core.VarDesc.VarType.RAW) type=core.VarDesc.VarType.RAW)
self._helper.create_variable( self.create_variable(
name="kCUDNNBwdFilterAlgoCache", name="kCUDNNBwdFilterAlgoCache",
persistable=True, persistable=True,
type=core.VarDesc.VarType.RAW) type=core.VarDesc.VarType.RAW)
self._bias_param = self._helper.create_parameter( self._bias_param = self.create_parameter(
attr=self._helper.bias_attr, attr=bias_attr,
shape=[num_filters], shape=[num_filters],
dtype=self._dtype, dtype=self._dtype,
is_bias=True) is_bias=True)
...@@ -137,7 +128,7 @@ class Conv2D(layers.Layer): ...@@ -137,7 +128,7 @@ class Conv2D(layers.Layer):
attrs={'axis': 1}) attrs={'axis': 1})
# Currently, we don't support inplace in imperative mode # Currently, we don't support inplace in imperative mode
return self._helper.append_activation(pre_act) return self._helper.append_activation(pre_act, act=self._act)
class Pool2D(layers.Layer): class Pool2D(layers.Layer):
...@@ -167,9 +158,6 @@ class Pool2D(layers.Layer): ...@@ -167,9 +158,6 @@ class Pool2D(layers.Layer):
super(Pool2D, self).__init__(name_scope, dtype=dtype) super(Pool2D, self).__init__(name_scope, dtype=dtype)
from ..layer_helper import LayerHelper
self._helper = LayerHelper(self.full_name(), dtype=dtype)
self._pool_type = pool_type self._pool_type = pool_type
self._pool_size = utils.convert_to_list(pool_size, 2, 'pool_size') self._pool_size = utils.convert_to_list(pool_size, 2, 'pool_size')
self._pool_padding = utils.convert_to_list(pool_padding, 2, self._pool_padding = utils.convert_to_list(pool_padding, 2,
...@@ -216,28 +204,25 @@ class FC(layers.Layer): ...@@ -216,28 +204,25 @@ class FC(layers.Layer):
self._size = size self._size = size
self._num_flatten_dims = num_flatten_dims self._num_flatten_dims = num_flatten_dims
self._dtype = dtype self._dtype = dtype
from ..layer_helper import LayerHelper self._param_attr = param_attr
self._helper = LayerHelper( self._bias_attr = param_attr
self.full_name(), self._act = act
param_attr=param_attr,
bias_attr=bias_attr,
act=act)
def _build_once(self, input): def _build_once(self, input):
input_shape = input.shape input_shape = input.shape
param_shape = [ param_shape = [
reduce(lambda a, b: a * b, input_shape[self._num_flatten_dims:], 1) reduce(lambda a, b: a * b, input_shape[self._num_flatten_dims:], 1)
] + [self._size] ] + [self._size]
self._w = self._helper.create_parameter( self._w = self.create_parameter(
attr=self._helper.param_attr, attr=self._param_attr,
shape=param_shape, shape=param_shape,
dtype=self._dtype, dtype=self._dtype,
is_bias=False) is_bias=False)
if self._helper.bias_attr: if self._param_attr:
size = list([self._size]) size = list([self._size])
self._b = self._helper.create_parameter( self._b = self.create_parameter(
attr=self._helper.bias_attr, attr=self._param_attr,
shape=size, shape=size,
dtype=self._dtype, dtype=self._dtype,
is_bias=True) is_bias=True)
...@@ -275,7 +260,7 @@ class FC(layers.Layer): ...@@ -275,7 +260,7 @@ class FC(layers.Layer):
else: else:
pre_activation = pre_bias pre_activation = pre_bias
# Currently, we don't support inplace in imperative mode # Currently, we don't support inplace in imperative mode
return self._helper.append_activation(pre_activation) return self._helper.append_activation(pre_activation, act=self._act)
class BatchNorm(layers.Layer): class BatchNorm(layers.Layer):
...@@ -297,16 +282,12 @@ class BatchNorm(layers.Layer): ...@@ -297,16 +282,12 @@ class BatchNorm(layers.Layer):
fuse_with_relu=False, fuse_with_relu=False,
use_global_stats=False): use_global_stats=False):
super(BatchNorm, self).__init__(name_scope) super(BatchNorm, self).__init__(name_scope)
self._param_attr = param_attr
self._param_attr = bias_attr
self._act = act
assert bias_attr is not False, "bias_attr should not be False in batch_norm." assert bias_attr is not False, "bias_attr should not be False in batch_norm."
from ..layer_helper import LayerHelper
self._helper = LayerHelper(
self.full_name(),
param_attr=param_attr,
bias_attr=bias_attr,
act=act)
if dtype == core.VarDesc.VarType.FP16: if dtype == core.VarDesc.VarType.FP16:
self._dtype = core.VarDesc.VarType.FP32 self._dtype = core.VarDesc.VarType.FP32
else: else:
...@@ -315,23 +296,23 @@ class BatchNorm(layers.Layer): ...@@ -315,23 +296,23 @@ class BatchNorm(layers.Layer):
param_shape = [num_channels] param_shape = [num_channels]
# create parameter # create parameter
self._scale = self._helper.create_parameter( self._scale = self.create_parameter(
attr=self._helper.param_attr, attr=self._param_attr,
shape=param_shape, shape=param_shape,
dtype=self._dtype, dtype=self._dtype,
default_initializer=Constant(1.0)) default_initializer=Constant(1.0))
if use_global_stats and self._helper.param_attr.learning_rate == 0.: if use_global_stats and self._param_attr.learning_rate == 0.:
self._scale._stop_gradient = True self._scale._stop_gradient = True
self._bias = self._helper.create_parameter( self._bias = self.create_parameter(
attr=self._helper.bias_attr, attr=self._param_attr,
shape=param_shape, shape=param_shape,
dtype=self._dtype, dtype=self._dtype,
is_bias=True) is_bias=True)
if use_global_stats and self._helper.bias_attr.learning_rate == 0.: if use_global_stats and self._param_attr.learning_rate == 0.:
self._bias._stop_gradient = True self._bias._stop_gradient = True
self._mean = self._helper.create_parameter( self._mean = self.create_parameter(
attr=ParamAttr( attr=ParamAttr(
name=moving_mean_name, name=moving_mean_name,
initializer=Constant(0.0), initializer=Constant(0.0),
...@@ -341,7 +322,7 @@ class BatchNorm(layers.Layer): ...@@ -341,7 +322,7 @@ class BatchNorm(layers.Layer):
dtype=self._dtype) dtype=self._dtype)
self._mean._stop_gradient = True self._mean._stop_gradient = True
self._variance = self._helper.create_parameter( self._variance = self.create_parameter(
attr=ParamAttr( attr=ParamAttr(
name=moving_variance_name, name=moving_variance_name,
initializer=Constant(1.0), initializer=Constant(1.0),
...@@ -401,7 +382,7 @@ class BatchNorm(layers.Layer): ...@@ -401,7 +382,7 @@ class BatchNorm(layers.Layer):
}) })
# Currently, we don't support inplace in imperative mode # Currently, we don't support inplace in imperative mode
return self._helper.append_activation(batch_norm_out) return self._helper.append_activation(batch_norm_out, self._act)
class Embedding(layers.Layer): class Embedding(layers.Layer):
...@@ -466,9 +447,7 @@ class Embedding(layers.Layer): ...@@ -466,9 +447,7 @@ class Embedding(layers.Layer):
if self._remote_prefetch: if self._remote_prefetch:
assert self._is_sparse is True and self._is_distributed is False assert self._is_sparse is True and self._is_distributed is False
from ..layer_helper import LayerHelper self._w = self.create_parameter(
self._helper = LayerHelper(self.full_name(), param_attr=param_attr)
self._w = self._helper.create_parameter(
attr=self._param_attr, attr=self._param_attr,
shape=self._size, shape=self._size,
dtype=self._dtype, dtype=self._dtype,
......
...@@ -19,7 +19,6 @@ import numpy as np ...@@ -19,7 +19,6 @@ import numpy as np
from .wrapped_decorator import signature_safe_contextmanager from .wrapped_decorator import signature_safe_contextmanager
from .core import VarDesc from .core import VarDesc
from . import unique_name from . import unique_name
from .imperative import base as imperative_base
__all__ = [ __all__ = [
'Constant', 'Uniform', 'Normal', 'TruncatedNormal', 'Xavier', 'Bilinear', 'Constant', 'Uniform', 'Normal', 'TruncatedNormal', 'Xavier', 'Bilinear',
...@@ -166,7 +165,7 @@ class ConstantInitializer(Initializer): ...@@ -166,7 +165,7 @@ class ConstantInitializer(Initializer):
'force_cpu': self._force_cpu or force_init_on_cpu() 'force_cpu': self._force_cpu or force_init_on_cpu()
}, },
stop_gradient=True) stop_gradient=True)
if not imperative_base.enabled(): if not framework._in_imperative_mode():
var.op = op var.op = op
return op return op
...@@ -246,7 +245,7 @@ class UniformInitializer(Initializer): ...@@ -246,7 +245,7 @@ class UniformInitializer(Initializer):
attrs={"in_dtype": out_var.dtype, attrs={"in_dtype": out_var.dtype,
"out_dtype": var.dtype}) "out_dtype": var.dtype})
if not imperative_base.enabled(): if not framework._in_imperative_mode():
var.op = op var.op = op
return op return op
...@@ -325,7 +324,7 @@ class NormalInitializer(Initializer): ...@@ -325,7 +324,7 @@ class NormalInitializer(Initializer):
outputs={"Out": var}, outputs={"Out": var},
attrs={"in_dtype": out_var.dtype, attrs={"in_dtype": out_var.dtype,
"out_dtype": var.dtype}) "out_dtype": var.dtype})
if not imperative_base.enabled(): if not framework._in_imperative_mode():
var.op = op var.op = op
return op return op
...@@ -404,7 +403,7 @@ class TruncatedNormalInitializer(Initializer): ...@@ -404,7 +403,7 @@ class TruncatedNormalInitializer(Initializer):
outputs={"Out": var}, outputs={"Out": var},
attrs={"in_dtype": out_var.dtype, attrs={"in_dtype": out_var.dtype,
"out_dtype": var.dtype}) "out_dtype": var.dtype})
if not imperative_base.enabled(): if not framework._in_imperative_mode():
var.op = op var.op = op
return op return op
...@@ -510,7 +509,7 @@ class XavierInitializer(Initializer): ...@@ -510,7 +509,7 @@ class XavierInitializer(Initializer):
"seed": self._seed "seed": self._seed
}, },
stop_gradient=True) stop_gradient=True)
if not imperative_base.enabled(): if not framework._in_imperative_mode():
var.op = op var.op = op
return op return op
...@@ -611,7 +610,7 @@ class MSRAInitializer(Initializer): ...@@ -611,7 +610,7 @@ class MSRAInitializer(Initializer):
"seed": self._seed "seed": self._seed
}, },
stop_gradient=True) stop_gradient=True)
if not imperative_base.enabled(): if not framework._in_imperative_mode():
var.op = op var.op = op
return op return op
...@@ -710,7 +709,7 @@ class BilinearInitializer(Initializer): ...@@ -710,7 +709,7 @@ class BilinearInitializer(Initializer):
'shape': list(shape), 'shape': list(shape),
value_name: values value_name: values
}) })
if not imperative_base.enabled(): if not framework._in_imperative_mode():
var.op = op var.op = op
return op return op
...@@ -769,7 +768,7 @@ class NumpyArrayInitializer(Initializer): ...@@ -769,7 +768,7 @@ class NumpyArrayInitializer(Initializer):
value_name: values value_name: values
}, },
stop_gradient=True) stop_gradient=True)
if not imperative_base.enabled(): if not framework._in_imperative_mode():
var.op = op var.op = op
return op return op
......
...@@ -15,45 +15,29 @@ ...@@ -15,45 +15,29 @@
from __future__ import print_function from __future__ import print_function
import copy import copy
import itertools
import six import six
import sys
import numpy as np
from .framework import Variable, Parameter, default_main_program, default_startup_program, dtype_is_floating, _in_imperative_mode from .framework import Parameter, dtype_is_floating, _in_imperative_mode
from . import unique_name from . import unique_name
from paddle.fluid.imperative import base as imperative_base
from paddle.fluid.initializer import Constant, Xavier from paddle.fluid.initializer import Constant, Xavier
from .param_attr import ParamAttr, WeightNormParamAttr from .param_attr import ParamAttr
from . import core from . import core
from six.moves import zip from six.moves import zip
from .layer_helper_base import LayerHelperBase
class LayerHelper(object): class LayerHelper(LayerHelperBase):
def __init__(self, layer_type, **kwargs): def __init__(self, layer_type, **kwargs):
self.kwargs = kwargs self.kwargs = kwargs
self.layer_type = layer_type
name = self.kwargs.get('name', None) name = self.kwargs.get('name', None)
# TODO(panyx0718, minqiyang): imperative mode # TODO(panyx0718, minqiyang): imperative mode
# can not use both `layer_type` and `name`. Deprecate LayerHelper # can not use both `layer_type` and `name`. Deprecate LayerHelper
# and write a Helper for imperative mode. # and write a Helper for imperative mode.
if name is None: if name is None:
self.kwargs['name'] = unique_name.generate(self.layer_type) self.kwargs['name'] = unique_name.generate(layer_type)
@property super(LayerHelper, self).__init__(
def name(self): self.kwargs['name'], layer_type=layer_type)
return self.kwargs['name']
@property
def main_program(self):
return default_main_program()
@property
def startup_program(self):
return default_startup_program()
def to_variable(self, x):
return imperative_base.to_variable(x, self.main_program.current_block())
def append_op(self, *args, **kwargs): def append_op(self, *args, **kwargs):
return self.main_program.current_block().append_op(*args, **kwargs) return self.main_program.current_block().append_op(*args, **kwargs)
...@@ -82,6 +66,7 @@ class LayerHelper(object): ...@@ -82,6 +66,7 @@ class LayerHelper(object):
def bias_attr(self): def bias_attr(self):
return ParamAttr._to_attr(self.kwargs.get('bias_attr', None)) return ParamAttr._to_attr(self.kwargs.get('bias_attr', None))
#TODO (jiabin): reconstruct this in LayerObjHelper and avoid dependency of param_attr
def multiple_param_attr(self, length): def multiple_param_attr(self, length):
param_attr = self.param_attr param_attr = self.param_attr
if isinstance(param_attr, ParamAttr): if isinstance(param_attr, ParamAttr):
...@@ -113,297 +98,13 @@ class LayerHelper(object): ...@@ -113,297 +98,13 @@ class LayerHelper(object):
(dtype, each.dtype)) (dtype, each.dtype))
return dtype return dtype
def _create_weight_normalize(self, attr, shape, dtype):
from .layers import elementwise_mul, elementwise_div, reshape
# Remove these ops when LayerHelper and layers support indicating
# program and block.
def __norm_op(x,
out=None,
p=2,
dim=None,
keep_dim=False,
block=self.startup_program.global_block()):
if out is None:
out = block.create_var(
name=unique_name.generate(".".join(
[self.name, 'weight_norm_norm'])),
dtype=dtype,
persistable=False)
abs_out = block.create_var(
name=unique_name.generate(".".join(
[self.name, 'weight_norm_abs'])),
dtype=dtype,
persistable=False)
block.append_op(
type='abs', inputs={'X': x}, outputs={'Out': abs_out})
pow_out = block.create_var(
name=unique_name.generate(".".join(
[self.name, 'weight_norm_pow'])),
dtype=dtype,
persistable=False)
block.append_op(
type='pow',
inputs={'X': abs_out},
outputs={'Out': pow_out},
attrs={'factor': float(p)})
sum_out = block.create_var(
name=unique_name.generate(".".join(
[self.name, 'weight_norm_sum'])),
dtype=dtype,
persistable=False)
block.append_op(
type='reduce_sum',
inputs={'X': pow_out},
outputs={'Out': sum_out},
attrs={
'dim': dim,
'keep_dim': keep_dim,
'reduce_all': True if dim is None else False
})
block.append_op(
type='pow',
inputs={'X': sum_out},
outputs={'Out': out},
attrs={'factor': 1. / p})
return out
def __reshape_op(x,
shape,
out=None,
block=self.startup_program.global_block()):
if out is None:
out = block.create_var(
name=unique_name.generate(".".join(
[self.name, 'weight_norm_reshape'])),
dtype=dtype,
persistable=False)
block.append_op(
type='reshape',
inputs={'X': x},
outputs={'Out': out},
attrs={'shape': shape})
return out
def __transpose_op(x,
axis,
out=None,
block=self.startup_program.global_block()):
if out is None:
out = block.create_var(
name=unique_name.generate(".".join(
[self.name, 'weight_norm_transpose'])),
dtype=dtype,
persistable=False)
block.append_op(
type='transpose',
inputs={'X': x},
outputs={'Out': out},
attrs={'axis': axis})
return out
def __norm_except_dim(x,
out=None,
dim=None,
block=self.startup_program.global_block()):
"""Computes the norm over all dimensions except dim"""
if out is None:
out = block.create_var(
name=unique_name.generate(".".join(
[self.name, 'weight_norm_norm'])),
dtype=dtype,
persistable=False)
if dim is None:
__norm_op(x, out, dim=dim, block=block)
elif dim == 0:
out_shape = [x.shape[0]] + [1] * (len(x.shape) - 1)
reshape = __reshape_op(x, shape=[x.shape[0], -1], block=block)
norm = __norm_op(reshape, dim=1, block=block)
__reshape_op(norm, out=out, shape=out_shape, block=block)
elif dim == len(x.shape) - 1:
out_shape = [1] * (len(x.shape) - 1) + [x.shape[-1]]
reshape = __reshape_op(x, shape=[-1, x.shape[-1]], block=block)
norm = __norm_op(reshape, dim=0, block=block)
__reshape_op(norm, out=out, shape=out_shape, block=block)
else:
perm = list(range(len(x.shape)))
perm[0], perm[dim] = dim, 0
transpose = __transpose_op(x, perm, block=block)
norm = __norm_op(transpose, dim=0, block=block)
__transpose_op(norm, perm, out=out, block=block)
return out
def __weight_normalize(g, v, dim):
"""Calculations for weight normalization"""
norm = __norm_except_dim(
v, dim=dim, block=self.main_program.current_block())
scale = elementwise_div(
x=g, y=norm) # The shapes of g and norm are the same.
# Currently, elementwise_mul only support broadcast when the shape
# of y is a subset of the shape of x. Thus, we reshape y to squeeze
# to achive the subset.
w = elementwise_mul(
x=v,
y=scale if dim is None else reshape(
x=scale, shape=[v.shape[dim]]),
axis=-1 if dim is None else dim)
# To serialize the original parameter for inference, maybe a
# parameter rather than a variable should be returned.
return w
g_param_attr = copy.deepcopy(attr)
g_param_attr.name = attr.name + '_g'
g_param_shape = [1] * len(shape)
if attr.dim is not None:
g_param_shape[attr.dim] = shape[attr.dim]
v_param_attr = copy.deepcopy(attr)
v_param_attr.name = attr.name + '_v'
v_param_shape = shape
# Add to startup_program to initialize g and v.
# Try to reconstruct the initializer of w by initializing g and v.
# Set the initializers of g and v as below, then the distribution
# of w is the same as initializing w with the given initializer.
# For Data-Dependent Initialization, please compute the init-values
# of g and v in external and then feed the values to g and v by
# executing an extra program.
g_param = self.startup_program.global_block().create_parameter(
dtype=dtype,
shape=g_param_shape,
**g_param_attr._to_kwargs(with_initializer=False))
v_param = self.startup_program.global_block().create_parameter(
dtype=dtype,
shape=v_param_shape,
**v_param_attr._to_kwargs(with_initializer=True))
__norm_except_dim(
x=v_param,
out=g_param,
dim=attr.dim,
block=self.startup_program.global_block())
# Add weight normalization to main_program
g_param = self.main_program.global_block().create_parameter(
dtype=dtype, shape=g_param_shape, **g_param_attr._to_kwargs())
v_param = self.main_program.global_block().create_parameter(
dtype=dtype, shape=v_param_shape, **v_param_attr._to_kwargs())
w_param = __weight_normalize(g_param, v_param, dim=attr.dim)
return w_param
def create_parameter(self,
attr,
shape,
dtype,
is_bias=False,
default_initializer=None):
# Deepcopy the attr so that parameters can be shared in program
attr = copy.deepcopy(attr)
assert isinstance(attr, ParamAttr)
suffix = 'b' if is_bias else 'w'
if attr.name is None:
attr.name = unique_name.generate(".".join([self.name, suffix]))
if default_initializer is None and attr.initializer is None:
if isinstance(dtype, core.VarDesc.VarType):
if dtype != core.VarDesc.VarType.FP32 and \
dtype != core.VarDesc.VarType.FP64 and \
dtype != core.VarDesc.VarType.FP16:
raise TypeError(
"Can not create parameter with default initializer when dtype is not float type. Set default_initializer to fit the parameter dtype!"
)
else:
if not (dtype.startswith("float") or dtype == "double"):
raise TypeError(
"Can not create parameter with default initializer when dtype is not float type. Set default_initializer to fit the parameter dtype!"
)
if is_bias:
attr._set_default_bias_initializer()
else:
attr._set_default_param_initializer()
else:
attr._set_default_initializer(default_initializer)
# If weight normalization is set, insert extra parameters and ops.
# Refer to https://arxiv.org/pdf/1602.07868.pdf
if isinstance(attr, WeightNormParamAttr):
param = self._create_weight_normalize(attr, shape, dtype)
WeightNormParamAttr.params_with_weight_norm.append(param)
return param
if _in_imperative_mode():
# In imperative mode, we want the returned parameter to be
# initialized so that it can be used imperatively.
return self.main_program.global_block().create_parameter(
dtype=dtype,
shape=shape,
**attr._to_kwargs(with_initializer=True))
else:
self.startup_program.global_block().create_parameter(
dtype=dtype,
shape=shape,
**attr._to_kwargs(with_initializer=True))
return self.main_program.global_block().create_parameter(
dtype=dtype, shape=shape, **attr._to_kwargs())
def get_parameter(self, name): def get_parameter(self, name):
param = self.main_program.global_block().var(name) param = self.main_program.global_block().var(name)
if not isinstance(param, Parameter): if not isinstance(param, Parameter):
raise ValueError("no Parameter name %s found" % name) raise ValueError("no Parameter name %s found" % name)
return param return param
def create_variable_for_type_inference(self, dtype, stop_gradient=False): #TODO (jiabin): reconstruct this in LayerObjHelper and avoid dependency of bias_attr
"""Create a temporary variable that should be type inferred layer.
Note:
The default type will be set to LOD_TENSOR. However, when
the var is used as operator output, its type will be updated
based on operator's `VarTypeInference` implementation in
infer_var_type.
"""
return self.main_program.current_block().create_var(
name=unique_name.generate(".".join([self.name, 'tmp'])),
dtype=dtype,
type=core.VarDesc.VarType.LOD_TENSOR,
persistable=False,
stop_gradient=stop_gradient)
def create_variable(self, *args, **kwargs):
return self.main_program.current_block().create_var(*args, **kwargs)
def create_global_variable(self, persistable=False, *args, **kwargs):
"""
create global variable, note that there is no initializer for this global variable.
Args:
persistable(bool): True if it is a checkpoint value.
*args: See create_var's documentation
**kwargs: See create_var's documentation
Returns(Variable): the created variable.
"""
return self.main_program.global_block().create_var(
*args, persistable=persistable, **kwargs)
def create_or_get_global_variable(self, name, *args, **kwargs):
"""
Creates a global variable if not exists and returns the variable and
a boolean flag which is true when it is a new variable.
"""
if self.main_program.global_block().has_var(name):
return self.main_program.global_block().var(name), False
else:
return self.create_global_variable(name=name, *args, **kwargs), True
def set_variable_initializer(self, var, initializer):
assert isinstance(var, Variable)
if imperative_base.enabled():
initializer(var, var.block)
else:
self.startup_program.global_block().create_var(
name=var.name,
type=var.type,
dtype=var.dtype,
shape=var.shape,
persistable=True,
initializer=initializer)
def append_bias_op(self, input_var, dim_start=1, dim_end=None): def append_bias_op(self, input_var, dim_start=1, dim_end=None):
""" """
Append bias operator and return its output. If the user does not set Append bias operator and return its output. If the user does not set
...@@ -434,6 +135,7 @@ class LayerHelper(object): ...@@ -434,6 +135,7 @@ class LayerHelper(object):
attrs={'axis': dim_start}) attrs={'axis': dim_start})
return tmp return tmp
#TODO (jiabin): reconstruct this in LayerObjHelper and avoid dependency of act
def append_activation(self, input_var): def append_activation(self, input_var):
act = self.kwargs.get('act', None) act = self.kwargs.get('act', None)
if act is None: if act is None:
...@@ -448,10 +150,11 @@ class LayerHelper(object): ...@@ -448,10 +150,11 @@ class LayerHelper(object):
if 'use_mkldnn' in self.kwargs: if 'use_mkldnn' in self.kwargs:
act['use_mkldnn'] = self.kwargs.get('use_mkldnn') act['use_mkldnn'] = self.kwargs.get('use_mkldnn')
act_type = act.pop('type') act_type = act.pop('type')
tmp = input_var tmp = input_var
# NOTE(dzhwinter): some activation support inplace compution. # NOTE(dzhwinter): some activation support inplace compution.
# NOTE(minqiyang): currently, we don't support inplace in imperative mode # NOTE(minqiyang): currently, we don't support inplace in imperative mode
if not imperative_base.enabled() and core.IsInplace(act_type): if not _in_imperative_mode() and core.IsInplace(act_type):
tmp = input_var tmp = input_var
else: else:
tmp = self.create_variable_for_type_inference(dtype=input_var.dtype) tmp = self.create_variable_for_type_inference(dtype=input_var.dtype)
...@@ -462,6 +165,7 @@ class LayerHelper(object): ...@@ -462,6 +165,7 @@ class LayerHelper(object):
attrs=act) attrs=act)
return tmp return tmp
#TODO (jiabin): should we remove this since it has never be used
def _get_default_initializer(self, dtype): def _get_default_initializer(self, dtype):
if dtype is None or dtype_is_floating(dtype) is True: if dtype is None or dtype_is_floating(dtype) is True:
return Xavier() return Xavier()
...@@ -469,6 +173,7 @@ class LayerHelper(object): ...@@ -469,6 +173,7 @@ class LayerHelper(object):
# For integer and boolean types, initialize with all zeros # For integer and boolean types, initialize with all zeros
return Constant() return Constant()
#TODO (jiabin): reconstruct this in LayerObjHelper and avoid dependency of kwargs
def is_instance(self, param_name, cls): def is_instance(self, param_name, cls):
param = self.kwargs.get(param_name, None) param = self.kwargs.get(param_name, None)
if not isinstance(param, cls): if not isinstance(param, cls):
......
# Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
from __future__ import print_function
import copy
import numpy as np
from .framework import Variable, default_main_program, default_startup_program, _in_imperative_mode, _current_expected_place
from . import unique_name
from .param_attr import ParamAttr, WeightNormParamAttr
from . import core
class LayerHelperBase(object):
def __init__(self, name, layer_type):
self._layer_type = layer_type
self._name = name
@property
def name(self):
return self._name
@property
def layer_type(self):
return self._layer_type
@property
def main_program(self):
return default_main_program()
@property
def startup_program(self):
return default_startup_program()
def to_variable(self, value, block=None):
"""convert value to variable
Args:
value: value to be convert
block: the block of the variable
Return Variable construct from value
"""
if isinstance(value, np.ndarray):
assert _in_imperative_mode(
), "to_variable could only be called in imperative mode"
if not block:
block = default_main_program().current_block()
py_var = Variable(
block,
type=core.VarDesc.VarType.LOD_TENSOR,
name=None,
shape=value.shape,
dtype=value.dtype)
var = py_var._ivar.value()
tensor = var.get_tensor()
tensor.set(value, _current_expected_place())
return py_var
elif isinstance(value, Variable):
return value
def _create_weight_normalize(self, attr, shape, dtype):
from .layers import elementwise_mul, elementwise_div, reshape
# Remove these ops when LayerHelper and layers support indicating
# program and block.
def __norm_op(x,
out=None,
p=2,
dim=None,
keep_dim=False,
block=self.startup_program.global_block()):
if out is None:
out = block.create_var(
name=unique_name.generate(".".join(
[self.name, 'weight_norm_norm'])),
dtype=dtype,
persistable=False)
abs_out = block.create_var(
name=unique_name.generate(".".join(
[self.name, 'weight_norm_abs'])),
dtype=dtype,
persistable=False)
block.append_op(
type='abs', inputs={'X': x}, outputs={'Out': abs_out})
pow_out = block.create_var(
name=unique_name.generate(".".join(
[self.name, 'weight_norm_pow'])),
dtype=dtype,
persistable=False)
block.append_op(
type='pow',
inputs={'X': abs_out},
outputs={'Out': pow_out},
attrs={'factor': float(p)})
sum_out = block.create_var(
name=unique_name.generate(".".join(
[self.name, 'weight_norm_sum'])),
dtype=dtype,
persistable=False)
block.append_op(
type='reduce_sum',
inputs={'X': pow_out},
outputs={'Out': sum_out},
attrs={
'dim': dim,
'keep_dim': keep_dim,
'reduce_all': True if dim is None else False
})
block.append_op(
type='pow',
inputs={'X': sum_out},
outputs={'Out': out},
attrs={'factor': 1. / p})
return out
def __reshape_op(x,
shape,
out=None,
block=self.startup_program.global_block()):
if out is None:
out = block.create_var(
name=unique_name.generate(".".join(
[self.name, 'weight_norm_reshape'])),
dtype=dtype,
persistable=False)
block.append_op(
type='reshape',
inputs={'X': x},
outputs={'Out': out},
attrs={'shape': shape})
return out
def __transpose_op(x,
axis,
out=None,
block=self.startup_program.global_block()):
if out is None:
out = block.create_var(
name=unique_name.generate(".".join(
[self.name, 'weight_norm_transpose'])),
dtype=dtype,
persistable=False)
block.append_op(
type='transpose',
inputs={'X': x},
outputs={'Out': out},
attrs={'axis': axis})
return out
def __norm_except_dim(x,
out=None,
dim=None,
block=self.startup_program.global_block()):
"""Computes the norm over all dimensions except dim"""
if out is None:
out = block.create_var(
name=unique_name.generate(".".join(
[self.name, 'weight_norm_norm'])),
dtype=dtype,
persistable=False)
if dim is None:
__norm_op(x, out, dim=dim, block=block)
elif dim == 0:
out_shape = [x.shape[0]] + [1] * (len(x.shape) - 1)
reshape = __reshape_op(x, shape=[x.shape[0], -1], block=block)
norm = __norm_op(reshape, dim=1, block=block)
__reshape_op(norm, out=out, shape=out_shape, block=block)
elif dim == len(x.shape) - 1:
out_shape = [1] * (len(x.shape) - 1) + [x.shape[-1]]
reshape = __reshape_op(x, shape=[-1, x.shape[-1]], block=block)
norm = __norm_op(reshape, dim=0, block=block)
__reshape_op(norm, out=out, shape=out_shape, block=block)
else:
perm = list(range(len(x.shape)))
perm[0], perm[dim] = dim, 0
transpose = __transpose_op(x, perm, block=block)
norm = __norm_op(transpose, dim=0, block=block)
__transpose_op(norm, perm, out=out, block=block)
return out
def __weight_normalize(g, v, dim):
"""Calculations for weight normalization"""
norm = __norm_except_dim(
v, dim=dim, block=self.main_program.current_block())
scale = elementwise_div(
x=g, y=norm) # The shapes of g and norm are the same.
# Currently, elementwise_mul only support broadcast when the shape
# of y is a subset of the shape of x. Thus, we reshape y to squeeze
# to achive the subset.
w = elementwise_mul(
x=v,
y=scale if dim is None else reshape(
x=scale, shape=[v.shape[dim]]),
axis=-1 if dim is None else dim)
# To serialize the original parameter for inference, maybe a
# parameter rather than a variable should be returned.
return w
g_param_attr = copy.deepcopy(attr)
g_param_attr.name = attr.name + '_g'
g_param_shape = [1] * len(shape)
if attr.dim is not None:
g_param_shape[attr.dim] = shape[attr.dim]
v_param_attr = copy.deepcopy(attr)
v_param_attr.name = attr.name + '_v'
v_param_shape = shape
# Add to startup_program to initialize g and v.
# Try to reconstruct the initializer of w by initializing g and v.
# Set the initializers of g and v as below, then the distribution
# of w is the same as initializing w with the given initializer.
# For Data-Dependent Initialization, please compute the init-values
# of g and v in external and then feed the values to g and v by
# executing an extra program.
g_param = self.startup_program.global_block().create_parameter(
dtype=dtype,
shape=g_param_shape,
**g_param_attr._to_kwargs(with_initializer=False))
v_param = self.startup_program.global_block().create_parameter(
dtype=dtype,
shape=v_param_shape,
**v_param_attr._to_kwargs(with_initializer=True))
__norm_except_dim(
x=v_param,
out=g_param,
dim=attr.dim,
block=self.startup_program.global_block())
# Add weight normalization to main_program
g_param = self.main_program.global_block().create_parameter(
dtype=dtype, shape=g_param_shape, **g_param_attr._to_kwargs())
v_param = self.main_program.global_block().create_parameter(
dtype=dtype, shape=v_param_shape, **v_param_attr._to_kwargs())
w_param = __weight_normalize(g_param, v_param, dim=attr.dim)
return w_param
# TODO: hide the func after we move the layers to Layers
def create_parameter(self,
attr,
shape,
dtype,
is_bias=False,
default_initializer=None):
"""Create parameters for this layers.
Args:
attr: [ParamAttr] should be the parameter attribute for this parameter
shape: shape of the paramter
dtype: data type of this parameter
is_bias: if this is a bias parameter
default_initializer: set the default initializer for this parameter
Returns created parameter Variable.
"""
# Deepcopy the attr so that parameters can be shared in program
attr = copy.deepcopy(attr)
if attr is None:
attr = ParamAttr._to_attr(attr)
assert isinstance(attr, ParamAttr)
suffix = 'b' if is_bias else 'w'
if attr.name is None:
attr.name = unique_name.generate(".".join([self.name, suffix]))
if default_initializer is None and attr.initializer is None:
if isinstance(dtype, core.VarDesc.VarType):
if dtype != core.VarDesc.VarType.FP32 and \
dtype != core.VarDesc.VarType.FP64 and \
dtype != core.VarDesc.VarType.FP16:
raise TypeError(
"Can not create parameter with default initializer when dtype is not float type. Set default_initializer to fit the parameter dtype!"
)
else:
if not (dtype.startswith("float") or dtype == "double"):
raise TypeError(
"Can not create parameter with default initializer when dtype is not float type. Set default_initializer to fit the parameter dtype!"
)
if is_bias:
attr._set_default_bias_initializer()
else:
attr._set_default_param_initializer()
else:
attr._set_default_initializer(default_initializer)
# If weight normalization is set, insert extra parameters and ops.
# Refer to https://arxiv.org/pdf/1602.07868.pdf
if isinstance(attr, WeightNormParamAttr):
param = self._create_weight_normalize(attr, shape, dtype)
WeightNormParamAttr.params_with_weight_norm.append(param)
return param
if _in_imperative_mode():
# In imperative mode, we want the returned parameter to be
# initialized so that it can be used imperatively.
return self.main_program.global_block().create_parameter(
dtype=dtype,
shape=shape,
**attr._to_kwargs(with_initializer=True))
else:
self.startup_program.global_block().create_parameter(
dtype=dtype,
shape=shape,
**attr._to_kwargs(with_initializer=True))
return self.main_program.global_block().create_parameter(
dtype=dtype, shape=shape, **attr._to_kwargs())
def create_variable_for_type_inference(self, dtype, stop_gradient=False):
"""Create a temporary variable that should be type inferred layer.
Note:
The default type will be set to LOD_TENSOR. However, when
the var is used as operator output, its type will be updated
based on operator's `VarTypeInference` implementation in
infer_var_type.
"""
return self.main_program.current_block().create_var(
name=unique_name.generate(".".join([self.name, 'tmp'])),
dtype=dtype,
type=core.VarDesc.VarType.LOD_TENSOR,
persistable=False,
stop_gradient=stop_gradient)
def create_variable(self, *args, **kwargs):
"""Create Variable for this layers.
Returns created Variable.
"""
return self.main_program.current_block().create_var(*args, **kwargs)
def create_global_variable(self, persistable=False, *args, **kwargs):
"""
create global variable, note that there is no initializer for this global variable.
Args:
persistable(bool): True if it is a checkpoint value.
*args: See create_var's documentation
**kwargs: See create_var's documentation
Returns(Variable): the created variable.
"""
return self.main_program.global_block().create_var(
*args, persistable=persistable, **kwargs)
def create_or_get_global_variable(self, name, *args, **kwargs):
"""
Creates a global variable if not exists and returns the variable and
a boolean flag which is true when it is a new variable.
"""
if self.main_program.global_block().has_var(name):
return self.main_program.global_block().var(name), False
else:
return self.create_global_variable(name=name, *args, **kwargs), True
def set_variable_initializer(self, var, initializer):
"""Set target Variable's initializer
Args:
var: target Variable
initializer: initializer to use
"""
assert isinstance(var, Variable)
if _in_imperative_mode():
initializer(var, var.block)
else:
self.startup_program.global_block().create_var(
name=var.name,
type=var.type,
dtype=var.dtype,
shape=var.shape,
persistable=True,
initializer=initializer)
...@@ -379,7 +379,7 @@ class Optimizer(object): ...@@ -379,7 +379,7 @@ class Optimizer(object):
self._dtype = loss.dtype self._dtype = loss.dtype
program = loss.block.program program = loss.block.program
optimize_ops = [] optimize_ops = []
if imperative_base.enabled(): if framework._in_imperative_mode():
if parameter_list is not None: if parameter_list is not None:
parameters = parameter_list parameters = parameter_list
else: else:
......
...@@ -16,27 +16,17 @@ import unittest ...@@ -16,27 +16,17 @@ import unittest
import numpy as np import numpy as np
import paddle.fluid as fluid import paddle.fluid as fluid
from paddle.fluid.layer_helper import LayerHelper
class L1(fluid.imperative.Layer): class L1(fluid.imperative.Layer):
def __init__(self, prefix): def __init__(self, prefix):
super(L1, self).__init__(prefix) super(L1, self).__init__(prefix)
self._helper = LayerHelper( self._param_attr = fluid.ParamAttr(
self.full_name(), initializer=fluid.initializer.Constant(value=0.1))
param_attr=fluid.ParamAttr( self.w1 = self.create_parameter(
initializer=fluid.initializer.Constant(value=0.1))) attr=self._param_attr, shape=[2, 2], dtype='float32', is_bias=False)
self.w2 = self.create_parameter(
self.w1 = self._helper.create_parameter( attr=self._param_attr, shape=[2, 2], dtype='float32', is_bias=False)
attr=self._helper.param_attr,
shape=[2, 2],
dtype='float32',
is_bias=False)
self.w2 = self._helper.create_parameter(
attr=self._helper.param_attr,
shape=[2, 2],
dtype='float32',
is_bias=False)
def forward(self): def forward(self):
return self.w1 + self.w2 return self.w1 + self.w2
...@@ -67,8 +57,8 @@ class TestBaseLayer(unittest.TestCase): ...@@ -67,8 +57,8 @@ class TestBaseLayer(unittest.TestCase):
with fluid.imperative.guard(): with fluid.imperative.guard():
l = L1('test_one_level') l = L1('test_one_level')
ret = l() ret = l()
self.assertEqual(l.w1.name, "test_one_level/L1_0_0.w_0") self.assertEqual(l.w1.name, "test_one_level/L1_0.w_0")
self.assertEqual(l.w2.name, "test_one_level/L1_0_0.w_1") self.assertEqual(l.w2.name, "test_one_level/L1_0.w_1")
self.assertTrue(np.allclose(ret._numpy(), 0.2 * np.ones([2, 2]))) self.assertTrue(np.allclose(ret._numpy(), 0.2 * np.ones([2, 2])))
def test_three_level(self): def test_three_level(self):
...@@ -76,12 +66,12 @@ class TestBaseLayer(unittest.TestCase): ...@@ -76,12 +66,12 @@ class TestBaseLayer(unittest.TestCase):
l = L3('test_three_level') l = L3('test_three_level')
names = [p.name for p in l.parameters()] names = [p.name for p in l.parameters()]
ret = l() ret = l()
self.assertEqual(names[0], "test_three_level/L3_0/L2_0/L1_0_0.w_0") self.assertEqual(names[0], "test_three_level/L3_0/L2_0/L1_0.w_0")
self.assertEqual(names[1], "test_three_level/L3_0/L2_0/L1_0_0.w_1") self.assertEqual(names[1], "test_three_level/L3_0/L2_0/L1_0.w_1")
self.assertEqual(names[2], "test_three_level/L3_0/L2_0/L1_1_0.w_0") self.assertEqual(names[2], "test_three_level/L3_0/L2_0/L1_1.w_0")
self.assertEqual(names[3], "test_three_level/L3_0/L2_0/L1_1_0.w_1") self.assertEqual(names[3], "test_three_level/L3_0/L2_0/L1_1.w_1")
self.assertEqual(names[4], "test_three_level/L3_0/L2_1/L1_0_0.w_0") self.assertEqual(names[4], "test_three_level/L3_0/L2_1/L1_0.w_0")
self.assertEqual(names[5], "test_three_level/L3_0/L2_1/L1_0_0.w_1") self.assertEqual(names[5], "test_three_level/L3_0/L2_1/L1_0.w_1")
self.assertTrue(np.allclose(ret._numpy(), 0.8 * np.ones([2, 2]))) self.assertTrue(np.allclose(ret._numpy(), 0.8 * np.ones([2, 2])))
......
...@@ -53,11 +53,15 @@ class MLP(fluid.imperative.Layer): ...@@ -53,11 +53,15 @@ class MLP(fluid.imperative.Layer):
super(MLP, self).__init__(name_scope) super(MLP, self).__init__(name_scope)
self._fc1 = FC(self.full_name(), self._fc1 = FC(self.full_name(),
3, 3,
fluid.ParamAttr( param_attr=fluid.ParamAttr(
initializer=fluid.initializer.Constant(value=0.1)),
bias_attr=fluid.ParamAttr(
initializer=fluid.initializer.Constant(value=0.1))) initializer=fluid.initializer.Constant(value=0.1)))
self._fc2 = FC(self.full_name(), self._fc2 = FC(self.full_name(),
4, 4,
fluid.ParamAttr( param_attr=fluid.ParamAttr(
initializer=fluid.initializer.Constant(value=0.1)),
bias_attr=fluid.ParamAttr(
initializer=fluid.initializer.Constant(value=0.1))) initializer=fluid.initializer.Constant(value=0.1)))
def forward(self, inputs): def forward(self, inputs):
...@@ -74,41 +78,37 @@ class SimpleRNNCell(fluid.imperative.Layer): ...@@ -74,41 +78,37 @@ class SimpleRNNCell(fluid.imperative.Layer):
self.step_input_size = step_input_size self.step_input_size = step_input_size
self.hidden_size = hidden_size self.hidden_size = hidden_size
self.output_size = output_size self.output_size = output_size
self._dype = core.VarDesc.VarType.FP32 self._dtype = core.VarDesc.VarType.FP32
from paddle.fluid.layer_helper import LayerHelper self.param_attr = param_attr
self._helper = LayerHelper(
'SimpleRNNCell', act="tanh", param_attr=param_attr)
def _build_once(self, inputs, pre_hidden): def _build_once(self, inputs, pre_hidden):
i2h_param_shape = [self.step_input_size, self.hidden_size] i2h_param_shape = [self.step_input_size, self.hidden_size]
h2h_param_shape = [self.hidden_size, self.hidden_size] h2h_param_shape = [self.hidden_size, self.hidden_size]
h2o_param_shape = [self.output_size, self.hidden_size] h2o_param_shape = [self.output_size, self.hidden_size]
self._i2h_w = self._helper.create_parameter( self._i2h_w = self.create_parameter(
attr=self._helper.param_attr, attr=self.param_attr,
shape=i2h_param_shape, shape=i2h_param_shape,
dtype=self._dtype, dtype=self._dtype,
is_bias=False) is_bias=False)
self._h2h_w = self._helper.create_parameter( self._h2h_w = self.create_parameter(
attr=self._helper.param_attr, attr=self.param_attr,
shape=h2h_param_shape, shape=h2h_param_shape,
dtype=self._dtype, dtype=self._dtype,
is_bias=False) is_bias=False)
self._h2o_w = self._helper.create_parameter( self._h2o_w = self.create_parameter(
attr=self._helper.param_attr, attr=self.param_attr,
shape=h2o_param_shape, shape=h2o_param_shape,
dtype=self._dtype, dtype=self._dtype,
is_bias=False) is_bias=False)
def forward(self, input, pre_hidden): def forward(self, input, pre_hidden):
tmp_i2h = self._helper.create_variable_for_type_inference(self._dtype) tmp_i2h = self.create_variable(dtype=self._dtype)
tmp_h2h = self._helper.create_variable_for_type_inference(self._dtype) tmp_h2h = self.create_variable(dtype=self._dtype)
hidden = self._helper.create_variable_for_type_inference(self._dype) hidden = self.create_variable(dtype=self._dtype)
out = self._helper.create_variable_for_type_inference(self._dype) out = self.create_variable(dtype=self._dtype)
softmax_out = self._helper.create_variable_for_type_inference( softmax_out = self.create_variable(dtype=self._dtype)
self._dtype) reduce_out = self.create_variable(dtype=self._dtype)
reduce_out = self._helper.create_variable_for_type_inference(
self._dtype)
self._helper.append_op( self._helper.append_op(
type="mul", type="mul",
inputs={"X": input, inputs={"X": input,
...@@ -132,7 +132,7 @@ class SimpleRNNCell(fluid.imperative.Layer): ...@@ -132,7 +132,7 @@ class SimpleRNNCell(fluid.imperative.Layer):
outputs={'Out': hidden}, outputs={'Out': hidden},
attrs={'axis': -1, attrs={'axis': -1,
'use_mkldnn': False}) 'use_mkldnn': False})
hidden = self._helper.append_activation(hidden) hidden = self._helper.append_activation(hidden, act='tanh')
self._helper.append_op( self._helper.append_op(
type="mul", type="mul",
...@@ -174,7 +174,7 @@ class SimpleRNN(fluid.imperative.Layer): ...@@ -174,7 +174,7 @@ class SimpleRNN(fluid.imperative.Layer):
outs = list() outs = list()
pre_hiddens = list() pre_hiddens = list()
init_hidden = fluid.layers.tensor.create_parameter( init_hidden = self.create_parameter(
attr=fluid.ParamAttr( attr=fluid.ParamAttr(
initializer=fluid.initializer.Constant(value=0.1)), initializer=fluid.initializer.Constant(value=0.1)),
shape=[1, 3], shape=[1, 3],
...@@ -337,10 +337,10 @@ class TestImperative(unittest.TestCase): ...@@ -337,10 +337,10 @@ class TestImperative(unittest.TestCase):
self.assertTrue(np.allclose(dy_grad, static_grad)) self.assertTrue(np.allclose(dy_grad, static_grad))
params = mlp.parameters(True) params = mlp.parameters(True)
self.assertEqual("mlp/MLP_0/FC_0_0.w_0", params[0].name) self.assertEqual("mlp/MLP_0/FC_0.w_0", params[0].name)
self.assertEqual("mlp/MLP_0/FC_0_0.b_0", params[1].name) self.assertEqual("mlp/MLP_0/FC_0.b_0", params[1].name)
self.assertEqual("mlp/MLP_0/FC_1_0.w_0", params[2].name) self.assertEqual("mlp/MLP_0/FC_1.w_0", params[2].name)
self.assertEqual("mlp/MLP_0/FC_1_0.b_0", params[3].name) self.assertEqual("mlp/MLP_0/FC_1.b_0", params[3].name)
self.assertEqual(len(params), 4) self.assertEqual(len(params), 4)
sublayers = mlp.sublayers(True) sublayers = mlp.sublayers(True)
......
...@@ -78,7 +78,7 @@ class SimpleImgConvPool(fluid.imperative.Layer): ...@@ -78,7 +78,7 @@ class SimpleImgConvPool(fluid.imperative.Layer):
class MNIST(fluid.imperative.Layer): class MNIST(fluid.imperative.Layer):
def __init__(self, name_scope, param_attr=None, bias_attr=None): def __init__(self, name_scope):
super(MNIST, self).__init__(name_scope) super(MNIST, self).__init__(name_scope)
self._simple_img_conv_pool_1 = SimpleImgConvPool( self._simple_img_conv_pool_1 = SimpleImgConvPool(
......
...@@ -41,19 +41,17 @@ class SimpleLSTMRNN(fluid.imperative.Layer): ...@@ -41,19 +41,17 @@ class SimpleLSTMRNN(fluid.imperative.Layer):
self._dropout = dropout self._dropout = dropout
self._input = None self._input = None
self._num_steps = num_steps self._num_steps = num_steps
from paddle.fluid.layer_helper import LayerHelper self.cell_array = []
self._helper = LayerHelper('SimpleLSTMRNN', act="tanh") self.hidden_array = []
def _build_once(self, input_embedding, init_hidden=None, init_cell=None): def _build_once(self, input_embedding, init_hidden=None, init_cell=None):
self.weight_1_arr = [] self.weight_1_arr = []
self.weight_2_arr = [] self.weight_2_arr = []
self.bias_arr = [] self.bias_arr = []
self.hidden_array = []
self.cell_array = []
self.mask_array = [] self.mask_array = []
for i in range(self._num_layers): for i in range(self._num_layers):
weight_1 = self._helper.create_parameter( weight_1 = self.create_parameter(
attr=fluid.ParamAttr( attr=fluid.ParamAttr(
initializer=fluid.initializer.UniformInitializer( initializer=fluid.initializer.UniformInitializer(
low=-self._init_scale, high=self._init_scale)), low=-self._init_scale, high=self._init_scale)),
...@@ -62,7 +60,7 @@ class SimpleLSTMRNN(fluid.imperative.Layer): ...@@ -62,7 +60,7 @@ class SimpleLSTMRNN(fluid.imperative.Layer):
default_initializer=fluid.initializer.UniformInitializer( default_initializer=fluid.initializer.UniformInitializer(
low=-self._init_scale, high=self._init_scale)) low=-self._init_scale, high=self._init_scale))
self.weight_1_arr.append(weight_1) self.weight_1_arr.append(weight_1)
bias_1 = self._helper.create_parameter( bias_1 = self.create_parameter(
attr=fluid.ParamAttr( attr=fluid.ParamAttr(
initializer=fluid.initializer.UniformInitializer( initializer=fluid.initializer.UniformInitializer(
low=-self._init_scale, high=self._init_scale)), low=-self._init_scale, high=self._init_scale)),
...@@ -71,6 +69,11 @@ class SimpleLSTMRNN(fluid.imperative.Layer): ...@@ -71,6 +69,11 @@ class SimpleLSTMRNN(fluid.imperative.Layer):
default_initializer=fluid.initializer.Constant(0.0)) default_initializer=fluid.initializer.Constant(0.0))
self.bias_arr.append(bias_1) self.bias_arr.append(bias_1)
def forward(self, input_embedding, init_hidden=None, init_cell=None):
self.cell_array = []
self.hidden_array = []
for i in range(self._num_layers):
pre_hidden = fluid.layers.slice( pre_hidden = fluid.layers.slice(
init_hidden, axes=[0], starts=[i], ends=[i + 1]) init_hidden, axes=[0], starts=[i], ends=[i + 1])
pre_cell = fluid.layers.slice( pre_cell = fluid.layers.slice(
...@@ -82,7 +85,6 @@ class SimpleLSTMRNN(fluid.imperative.Layer): ...@@ -82,7 +85,6 @@ class SimpleLSTMRNN(fluid.imperative.Layer):
self.hidden_array.append(pre_hidden) self.hidden_array.append(pre_hidden)
self.cell_array.append(pre_cell) self.cell_array.append(pre_cell)
def forward(self, input_embedding, init_hidden=None, init_cell=None):
res = [] res = []
for index in range(self._num_steps): for index in range(self._num_steps):
self._input = fluid.layers.slice( self._input = fluid.layers.slice(
...@@ -145,8 +147,6 @@ class PtbModel(fluid.imperative.Layer): ...@@ -145,8 +147,6 @@ class PtbModel(fluid.imperative.Layer):
self.num_layers = num_layers self.num_layers = num_layers
self.num_steps = num_steps self.num_steps = num_steps
self.dropout = dropout self.dropout = dropout
from paddle.fluid.layer_helper import LayerHelper
self._helper = LayerHelper('PtbModel', act="tanh")
self.simple_lstm_rnn = SimpleLSTMRNN( self.simple_lstm_rnn = SimpleLSTMRNN(
self.full_name(), self.full_name(),
hidden_size, hidden_size,
...@@ -163,13 +163,13 @@ class PtbModel(fluid.imperative.Layer): ...@@ -163,13 +163,13 @@ class PtbModel(fluid.imperative.Layer):
name='embedding_para', name='embedding_para',
initializer=fluid.initializer.UniformInitializer( initializer=fluid.initializer.UniformInitializer(
low=-init_scale, high=init_scale))) low=-init_scale, high=init_scale)))
self.softmax_weight = self._helper.create_parameter( self.softmax_weight = self.create_parameter(
attr=fluid.ParamAttr(), attr=fluid.ParamAttr(),
shape=[self.hidden_size, self.vocab_size], shape=[self.hidden_size, self.vocab_size],
dtype="float32", dtype="float32",
default_initializer=fluid.initializer.UniformInitializer( default_initializer=fluid.initializer.UniformInitializer(
low=-self.init_scale, high=self.init_scale)) low=-self.init_scale, high=self.init_scale))
self.softmax_bias = self._helper.create_parameter( self.softmax_bias = self.create_parameter(
attr=fluid.ParamAttr(), attr=fluid.ParamAttr(),
shape=[self.vocab_size], shape=[self.vocab_size],
dtype="float32", dtype="float32",
...@@ -180,7 +180,6 @@ class PtbModel(fluid.imperative.Layer): ...@@ -180,7 +180,6 @@ class PtbModel(fluid.imperative.Layer):
pass pass
def forward(self, input, label, init_hidden, init_cell): def forward(self, input, label, init_hidden, init_cell):
init_h = fluid.layers.reshape( init_h = fluid.layers.reshape(
init_hidden, shape=[self.num_layers, -1, self.hidden_size]) init_hidden, shape=[self.num_layers, -1, self.hidden_size])
......
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册