From b61d8f779c5ec50d33a7d0759cce95f3e307d34a Mon Sep 17 00:00:00 2001 From: Weilong Wu Date: Thu, 11 Aug 2022 11:01:11 +0800 Subject: [PATCH] [Eager] use final_state_full / *full_ instead fill_constant under eager mode (#45044) * [Eager] use final_state_fill_constant_ * fill_constant use str_value * add fill_constant_ to no_amp_list * use float(value) as input * support final state full_ same as fill_constant * [Eager] use final_state_full / *full_ instead fill_constant under eager * polish code * fix mistakes --- python/paddle/fluid/initializer.py | 2 + python/paddle/fluid/layers/tensor.py | 68 +++++++++++++++------------ python/paddle/fluid/optimizer.py | 10 +++- python/paddle/nn/functional/loss.py | 16 +++++-- python/paddle/nn/initializer/dirac.py | 8 ++-- python/paddle/optimizer/optimizer.py | 9 +++- 6 files changed, 70 insertions(+), 43 deletions(-) diff --git a/python/paddle/fluid/initializer.py b/python/paddle/fluid/initializer.py index b4c99a7af49..6381cb59f20 100644 --- a/python/paddle/fluid/initializer.py +++ b/python/paddle/fluid/initializer.py @@ -140,6 +140,8 @@ class ConstantInitializer(Initializer): if in_dygraph_mode(): place = _current_expected_place() + if self._force_cpu: + place = core.CPUPlace() _C_ops.final_state_full_(var, var.shape, str(float(self._value)), var.dtype, place) return None diff --git a/python/paddle/fluid/layers/tensor.py b/python/paddle/fluid/layers/tensor.py index dbbc207fba4..0974aecba0c 100644 --- a/python/paddle/fluid/layers/tensor.py +++ b/python/paddle/fluid/layers/tensor.py @@ -774,44 +774,50 @@ def fill_constant(shape, dtype, value, force_cpu=False, out=None, name=None): attrs['str_value'] = str(float(value)) attrs['value'] = float(value) - if _non_static_mode(): - if out is None and in_dygraph_mode(): - #Currently, final state mode don't support out is None. - place = _current_expected_place() - if force_cpu: - place = core.CPUPlace() - if isinstance(shape, (list, tuple)): - for item in shape: - if not isinstance(item, Variable): - shape = list( - map( - lambda x: x.numpy().flat[0] - if isinstance(x, Variable) else x, shape)) - break - - if not isinstance(dtype, core.VarDesc.VarType): - dtype = convert_np_dtype_to_dtype_(dtype) + if in_dygraph_mode(): + place = _current_expected_place() + if force_cpu: + place = core.CPUPlace() + if isinstance(shape, (list, tuple)): + for item in shape: + if not isinstance(item, Variable): + shape = list( + map( + lambda x: x.numpy().flat[0] + if isinstance(x, Variable) else x, shape)) + break + + if not isinstance(dtype, core.VarDesc.VarType): + dtype = convert_np_dtype_to_dtype_(dtype) + + if out is None: out = _C_ops.final_state_full(shape, float(value), dtype, place) out.stop_gradient = True return out - else: - shape = utils.convert_shape_to_list(shape) - if out is None: - out = _varbase_creator(dtype=dtype) - - if isinstance(value, Variable): - if dtype in ['uint8', 'int16', 'int32', 'int64']: - attrs['str_value'] = str(int(value.numpy().item(0))) - else: - attrs['str_value'] = str(float(value.numpy().item(0))) - - _C_ops.fill_constant(out, 'value', float(value), 'force_cpu', - force_cpu, 'dtype', out.dtype, 'str_value', - attrs['str_value'], 'shape', shape) + if out is not None: + # final state mode is support out is not None. + _C_ops.final_state_full_(out, shape, float(value), dtype, place) out.stop_gradient = True return out + if _in_legacy_dygraph(): + shape = utils.convert_shape_to_list(shape) + if out is None: + out = _varbase_creator(dtype=dtype) + + if isinstance(value, Variable): + if dtype in ['uint8', 'int16', 'int32', 'int64']: + attrs['str_value'] = str(int(value.numpy().item(0))) + else: + attrs['str_value'] = str(float(value.numpy().item(0))) + + _C_ops.fill_constant(out, 'value', float(value), 'force_cpu', force_cpu, + 'dtype', out.dtype, 'str_value', + attrs['str_value'], 'shape', shape) + out.stop_gradient = True + return out + helper = LayerHelper("fill_constant", **locals()) inputs = {} if isinstance(value, Variable): diff --git a/python/paddle/fluid/optimizer.py b/python/paddle/fluid/optimizer.py index a3c68099089..5ecc1aec759 100755 --- a/python/paddle/fluid/optimizer.py +++ b/python/paddle/fluid/optimizer.py @@ -44,7 +44,7 @@ from .wrapped_decorator import signature_safe_contextmanager from .. import compat as cpt import warnings from paddle import _C_ops -from ..fluid.framework import _in_legacy_dygraph, in_dygraph_mode +from ..fluid.framework import _in_legacy_dygraph, in_dygraph_mode, _current_expected_place __all__ = [ 'SGD', 'Momentum', 'Adagrad', 'Adam', 'Adamax', 'Dpsgd', 'DecayedAdagrad', @@ -443,7 +443,13 @@ class Optimizer(object): self._learning_rate = value current_lr = self._global_learning_rate() if current_lr is not None: - if framework._non_static_mode(): + if in_dygraph_mode(): + place = _current_expected_place() + _C_ops.final_state_full_(current_lr, list(current_lr.shape), + float(value), current_lr.dtype, + place) + + elif _in_legacy_dygraph(): _C_ops.fill_constant(current_lr, 'value', float(value), 'dtype', current_lr.dtype, 'shape', list(current_lr.shape)) diff --git a/python/paddle/nn/functional/loss.py b/python/paddle/nn/functional/loss.py index 7e73f3ce8ac..0fe3a000ade 100755 --- a/python/paddle/nn/functional/loss.py +++ b/python/paddle/nn/functional/loss.py @@ -2611,15 +2611,21 @@ def sigmoid_focal_loss(logit, .format(normalizer_dims)) if _non_static_mode(): - one = _varbase_creator(dtype=logit.dtype) - _C_ops.fill_constant(one, 'value', float(1.0), 'force_cpu', False, - 'dtype', one.dtype, 'str_value', '1.0', 'shape', - logit.shape) if in_dygraph_mode(): + place = _current_expected_place() + one = _C_ops.final_state_full(logit.shape, float(1.0), logit.dtype, + place) + loss = _C_ops.final_state_sigmoid_cross_entropy_with_logits( logit, label, False, -100) - else: + + elif _in_legacy_dygraph(): + one = _varbase_creator(dtype=logit.dtype) + _C_ops.fill_constant(one, 'value', float(1.0), 'force_cpu', False, + 'dtype', one.dtype, 'str_value', '1.0', + 'shape', logit.shape) loss = _C_ops.sigmoid_cross_entropy_with_logits(logit, label) + pred = _C_ops.sigmoid(logit) p_t = _C_ops.elementwise_add( _C_ops.elementwise_mul(pred, label), diff --git a/python/paddle/nn/initializer/dirac.py b/python/paddle/nn/initializer/dirac.py index 1b5697ede40..b56f49e461a 100644 --- a/python/paddle/nn/initializer/dirac.py +++ b/python/paddle/nn/initializer/dirac.py @@ -16,6 +16,7 @@ from ...fluid.initializer import Initializer from ...fluid.data_feeder import check_variable_and_dtype from ...fluid.core import VarDesc from ...fluid import framework +from ...fluid.framework import _current_expected_place from paddle import in_dynamic_mode from paddle.utils import unique_name from paddle import _C_ops @@ -130,9 +131,10 @@ class Dirac(Initializer): op = None if framework.in_dygraph_mode(): with fluid.dygraph.no_grad(): - _C_ops.fill_constant(out_var, 'value', float(0), 'force_cpu', - False, 'dtype', out_var.dtype, 'str_value', - str(float(0)), 'shape', out_var.shape) + place = _current_expected_place() + _C_ops.final_state_full_(out_var, out_var.shape, str(float(0)), + out_var.dtype, place) + else: block.append_op(type='fill_constant', inputs={}, diff --git a/python/paddle/optimizer/optimizer.py b/python/paddle/optimizer/optimizer.py index dccbb21f5d2..9997aba7e31 100644 --- a/python/paddle/optimizer/optimizer.py +++ b/python/paddle/optimizer/optimizer.py @@ -42,7 +42,7 @@ from .. import compat as cpt from .lr import LRScheduler import copy from paddle import _C_ops -from paddle.fluid.framework import _in_legacy_dygraph, _in_eager_without_dygraph_check +from paddle.fluid.framework import _in_legacy_dygraph, _in_eager_without_dygraph_check, _current_expected_place, in_dygraph_mode __all__ = [] @@ -473,7 +473,12 @@ class Optimizer(object): self._learning_rate = float(value) current_lr = self._global_learning_rate() if current_lr is not None: - if framework._non_static_mode(): + if in_dygraph_mode(): + place = _current_expected_place() + _C_ops.final_state_full_(current_lr, list(current_lr.shape), + float(value), current_lr.dtype, place) + + elif _in_legacy_dygraph(): _C_ops.fill_constant(current_lr, 'value', float(value), 'dtype', current_lr.dtype, 'shape', list(current_lr.shape)) -- GitLab