[Eager] use final_state_full / *full_ instead fill_constant under eager mode (#45044)

* [Eager] use final_state_fill_constant_ * fill_constant use str_value * add fill_constant_ to no_amp_list * use float(value) as input * support final state full_ same as fill_constant * [Eager] use final_state_full / *full_ instead fill_constant under eager * polish code * fix mistakes

[Eager] use final_state_full / *full_ instead fill_constant under eager mode (#45044)
* [Eager] use final_state_fill_constant_ * fill_constant use str_value * add fill_constant_ to no_amp_list * use float(value) as input * support final state full_ same as fill_constant * [Eager] use final_state_full / *full_ instead fill_constant under eager * polish code * fix mistakes
b61d8f77 · Weilong Wu · GitHub · 4805da50 · b61d8f77 · b61d8f77
6 changed file
--- a/python/paddle/fluid/initializer.py
+++ b/python/paddle/fluid/initializer.py
@@ -140,6 +140,8 @@ class ConstantInitializer(Initializer):
        if in_dygraph_mode():
            place = _current_expected_place()
+            if self._force_cpu:
+                place = core.CPUPlace()
            _C_ops.final_state_full_(var, var.shape, str(float(self._value)),
                                     var.dtype, place)
            return None

--- a/python/paddle/fluid/layers/tensor.py
+++ b/python/paddle/fluid/layers/tensor.py
@@ -774,44 +774,50 @@ def fill_constant(shape, dtype, value, force_cpu=False, out=None, name=None):
            attrs['str_value'] = str(float(value))
            attrs['value'] = float(value)
-    if _non_static_mode():
+    if in_dygraph_mode():
-        if out is None and in_dygraph_mode():
+        place = _current_expected_place()
-            #Currently, final state mode don't support out is None.
+        if force_cpu:
-            place = _current_expected_place()
+            place = core.CPUPlace()
-            if force_cpu:
+        if isinstance(shape, (list, tuple)):
-                place = core.CPUPlace()
+            for item in shape:
-            if isinstance(shape, (list, tuple)):
+                if not isinstance(item, Variable):
-                for item in shape:
+                    shape = list(
-                    if not isinstance(item, Variable):
+                        map(
-                        shape = list(
+                            lambda x: x.numpy().flat[0]
-                            map(
+                            if isinstance(x, Variable) else x, shape))
-                                lambda x: x.numpy().flat[0]
+                    break
-                                if isinstance(x, Variable) else x, shape))
-                        break
+        if not isinstance(dtype, core.VarDesc.VarType):
+            dtype = convert_np_dtype_to_dtype_(dtype)
-            if not isinstance(dtype, core.VarDesc.VarType):
-                dtype = convert_np_dtype_to_dtype_(dtype)
+        if out is None:
            out = _C_ops.final_state_full(shape, float(value), dtype, place)
            out.stop_gradient = True
            return out
-        else:
+        if out is not None:
-            shape = utils.convert_shape_to_list(shape)
+            # final state mode is support out is not None.
-            if out is None:
+            _C_ops.final_state_full_(out, shape, float(value), dtype, place)
-                out = _varbase_creator(dtype=dtype)
-            if isinstance(value, Variable):
-                if dtype in ['uint8', 'int16', 'int32', 'int64']:
-                    attrs['str_value'] = str(int(value.numpy().item(0)))
-                else:
-                    attrs['str_value'] = str(float(value.numpy().item(0)))
-            _C_ops.fill_constant(out, 'value', float(value), 'force_cpu',
-                                 force_cpu, 'dtype', out.dtype, 'str_value',
-                                 attrs['str_value'], 'shape', shape)
            out.stop_gradient = True
            return out
+    if _in_legacy_dygraph():
+        shape = utils.convert_shape_to_list(shape)
+        if out is None:
+            out = _varbase_creator(dtype=dtype)
+        if isinstance(value, Variable):
+            if dtype in ['uint8', 'int16', 'int32', 'int64']:
+                attrs['str_value'] = str(int(value.numpy().item(0)))
+            else:
+                attrs['str_value'] = str(float(value.numpy().item(0)))
+        _C_ops.fill_constant(out, 'value', float(value), 'force_cpu', force_cpu,
+                             'dtype', out.dtype, 'str_value',
+                             attrs['str_value'], 'shape', shape)
+        out.stop_gradient = True
+        return out
    helper = LayerHelper("fill_constant", **locals())
    inputs = {}
    if isinstance(value, Variable):

--- a/python/paddle/fluid/optimizer.py
+++ b/python/paddle/fluid/optimizer.py
@@ -44,7 +44,7 @@ from .wrapped_decorator import signature_safe_contextmanager
 from .. import compat as cpt
 import warnings
 from paddle import _C_ops
-from ..fluid.framework import _in_legacy_dygraph, in_dygraph_mode
+from ..fluid.framework import _in_legacy_dygraph, in_dygraph_mode, _current_expected_place
 __all__ = [
    'SGD', 'Momentum', 'Adagrad', 'Adam', 'Adamax', 'Dpsgd', 'DecayedAdagrad',
@@ -443,7 +443,13 @@ class Optimizer(object):
            self._learning_rate = value
            current_lr = self._global_learning_rate()
            if current_lr is not None:
-                if framework._non_static_mode():
+                if in_dygraph_mode():
+                    place = _current_expected_place()
+                    _C_ops.final_state_full_(current_lr, list(current_lr.shape),
+                                             float(value), current_lr.dtype,
+                                             place)
+                elif _in_legacy_dygraph():
                    _C_ops.fill_constant(current_lr, 'value', float(value),
                                         'dtype', current_lr.dtype, 'shape',
                                         list(current_lr.shape))

--- a/python/paddle/nn/functional/loss.py
+++ b/python/paddle/nn/functional/loss.py
@@ -2611,15 +2611,21 @@ def sigmoid_focal_loss(logit,
                .format(normalizer_dims))
    if _non_static_mode():
-        one = _varbase_creator(dtype=logit.dtype)
-        _C_ops.fill_constant(one, 'value', float(1.0), 'force_cpu', False,
-                             'dtype', one.dtype, 'str_value', '1.0', 'shape',
-                             logit.shape)
        if in_dygraph_mode():
+            place = _current_expected_place()
+            one = _C_ops.final_state_full(logit.shape, float(1.0), logit.dtype,
+                                          place)
            loss = _C_ops.final_state_sigmoid_cross_entropy_with_logits(
                logit, label, False, -100)
-        else:
+        elif _in_legacy_dygraph():
+            one = _varbase_creator(dtype=logit.dtype)
+            _C_ops.fill_constant(one, 'value', float(1.0), 'force_cpu', False,
+                                 'dtype', one.dtype, 'str_value', '1.0',
+                                 'shape', logit.shape)
            loss = _C_ops.sigmoid_cross_entropy_with_logits(logit, label)
        pred = _C_ops.sigmoid(logit)
        p_t = _C_ops.elementwise_add(
            _C_ops.elementwise_mul(pred, label),

--- a/python/paddle/nn/initializer/dirac.py
+++ b/python/paddle/nn/initializer/dirac.py
@@ -16,6 +16,7 @@ from ...fluid.initializer import Initializer
 from ...fluid.data_feeder import check_variable_and_dtype
 from ...fluid.core import VarDesc
 from ...fluid import framework
+from ...fluid.framework import _current_expected_place
 from paddle import in_dynamic_mode
 from paddle.utils import unique_name
 from paddle import _C_ops
@@ -130,9 +131,10 @@ class Dirac(Initializer):
        op = None
        if framework.in_dygraph_mode():
            with fluid.dygraph.no_grad():
-                _C_ops.fill_constant(out_var, 'value', float(0), 'force_cpu',
+                place = _current_expected_place()
-                                     False, 'dtype', out_var.dtype, 'str_value',
+                _C_ops.final_state_full_(out_var, out_var.shape, str(float(0)),
-                                     str(float(0)), 'shape', out_var.shape)
+                                         out_var.dtype, place)
        else:
            block.append_op(type='fill_constant',
                            inputs={},

--- a/python/paddle/optimizer/optimizer.py
+++ b/python/paddle/optimizer/optimizer.py
@@ -42,7 +42,7 @@ from .. import compat as cpt
 from .lr import LRScheduler
 import copy
 from paddle import _C_ops
-from paddle.fluid.framework import _in_legacy_dygraph, _in_eager_without_dygraph_check
+from paddle.fluid.framework import _in_legacy_dygraph, _in_eager_without_dygraph_check, _current_expected_place, in_dygraph_mode
 __all__ = []
@@ -473,7 +473,12 @@ class Optimizer(object):
        self._learning_rate = float(value)
        current_lr = self._global_learning_rate()
        if current_lr is not None:
-            if framework._non_static_mode():
+            if in_dygraph_mode():
+                place = _current_expected_place()
+                _C_ops.final_state_full_(current_lr, list(current_lr.shape),
+                                         float(value), current_lr.dtype, place)
+            elif _in_legacy_dygraph():
                _C_ops.fill_constant(current_lr, 'value', float(value), 'dtype',
                                     current_lr.dtype, 'shape',
                                     list(current_lr.shape))