From b61d8f779c5ec50d33a7d0759cce95f3e307d34a Mon Sep 17 00:00:00 2001
From: Weilong Wu <veyron_wu@163.com>
Date: Thu, 11 Aug 2022 11:01:11 +0800
Subject: [PATCH] [Eager] use final_state_full / *full_ instead fill_constant
 under eager mode (#45044)

* [Eager] use final_state_fill_constant_

* fill_constant use str_value

* add fill_constant_ to no_amp_list

* use float(value) as input

* support final state full_ same as fill_constant

* [Eager] use final_state_full / *full_ instead fill_constant under eager

* polish code

* fix mistakes
---
 python/paddle/fluid/initializer.py    |  2 +
 python/paddle/fluid/layers/tensor.py  | 68 +++++++++++++++------------
 python/paddle/fluid/optimizer.py      | 10 +++-
 python/paddle/nn/functional/loss.py   | 16 +++++--
 python/paddle/nn/initializer/dirac.py |  8 ++--
 python/paddle/optimizer/optimizer.py  |  9 +++-
 6 files changed, 70 insertions(+), 43 deletions(-)

diff --git a/python/paddle/fluid/initializer.py b/python/paddle/fluid/initializer.py
index b4c99a7af49..6381cb59f20 100644
--- a/python/paddle/fluid/initializer.py
+++ b/python/paddle/fluid/initializer.py
@@ -140,6 +140,8 @@ class ConstantInitializer(Initializer):
 
         if in_dygraph_mode():
             place = _current_expected_place()
+            if self._force_cpu:
+                place = core.CPUPlace()
             _C_ops.final_state_full_(var, var.shape, str(float(self._value)),
                                      var.dtype, place)
             return None
diff --git a/python/paddle/fluid/layers/tensor.py b/python/paddle/fluid/layers/tensor.py
index dbbc207fba4..0974aecba0c 100644
--- a/python/paddle/fluid/layers/tensor.py
+++ b/python/paddle/fluid/layers/tensor.py
@@ -774,44 +774,50 @@ def fill_constant(shape, dtype, value, force_cpu=False, out=None, name=None):
             attrs['str_value'] = str(float(value))
             attrs['value'] = float(value)
 
-    if _non_static_mode():
-        if out is None and in_dygraph_mode():
-            #Currently, final state mode don't support out is None.
-            place = _current_expected_place()
-            if force_cpu:
-                place = core.CPUPlace()
-            if isinstance(shape, (list, tuple)):
-                for item in shape:
-                    if not isinstance(item, Variable):
-                        shape = list(
-                            map(
-                                lambda x: x.numpy().flat[0]
-                                if isinstance(x, Variable) else x, shape))
-                        break
-
-            if not isinstance(dtype, core.VarDesc.VarType):
-                dtype = convert_np_dtype_to_dtype_(dtype)
+    if in_dygraph_mode():
+        place = _current_expected_place()
+        if force_cpu:
+            place = core.CPUPlace()
+        if isinstance(shape, (list, tuple)):
+            for item in shape:
+                if not isinstance(item, Variable):
+                    shape = list(
+                        map(
+                            lambda x: x.numpy().flat[0]
+                            if isinstance(x, Variable) else x, shape))
+                    break
+
+        if not isinstance(dtype, core.VarDesc.VarType):
+            dtype = convert_np_dtype_to_dtype_(dtype)
+
+        if out is None:
             out = _C_ops.final_state_full(shape, float(value), dtype, place)
             out.stop_gradient = True
             return out
 
-        else:
-            shape = utils.convert_shape_to_list(shape)
-            if out is None:
-                out = _varbase_creator(dtype=dtype)
-
-            if isinstance(value, Variable):
-                if dtype in ['uint8', 'int16', 'int32', 'int64']:
-                    attrs['str_value'] = str(int(value.numpy().item(0)))
-                else:
-                    attrs['str_value'] = str(float(value.numpy().item(0)))
-
-            _C_ops.fill_constant(out, 'value', float(value), 'force_cpu',
-                                 force_cpu, 'dtype', out.dtype, 'str_value',
-                                 attrs['str_value'], 'shape', shape)
+        if out is not None:
+            # final state mode is support out is not None.
+            _C_ops.final_state_full_(out, shape, float(value), dtype, place)
             out.stop_gradient = True
             return out
 
+    if _in_legacy_dygraph():
+        shape = utils.convert_shape_to_list(shape)
+        if out is None:
+            out = _varbase_creator(dtype=dtype)
+
+        if isinstance(value, Variable):
+            if dtype in ['uint8', 'int16', 'int32', 'int64']:
+                attrs['str_value'] = str(int(value.numpy().item(0)))
+            else:
+                attrs['str_value'] = str(float(value.numpy().item(0)))
+
+        _C_ops.fill_constant(out, 'value', float(value), 'force_cpu', force_cpu,
+                             'dtype', out.dtype, 'str_value',
+                             attrs['str_value'], 'shape', shape)
+        out.stop_gradient = True
+        return out
+
     helper = LayerHelper("fill_constant", **locals())
     inputs = {}
     if isinstance(value, Variable):
diff --git a/python/paddle/fluid/optimizer.py b/python/paddle/fluid/optimizer.py
index a3c68099089..5ecc1aec759 100755
--- a/python/paddle/fluid/optimizer.py
+++ b/python/paddle/fluid/optimizer.py
@@ -44,7 +44,7 @@ from .wrapped_decorator import signature_safe_contextmanager
 from .. import compat as cpt
 import warnings
 from paddle import _C_ops
-from ..fluid.framework import _in_legacy_dygraph, in_dygraph_mode
+from ..fluid.framework import _in_legacy_dygraph, in_dygraph_mode, _current_expected_place
 
 __all__ = [
     'SGD', 'Momentum', 'Adagrad', 'Adam', 'Adamax', 'Dpsgd', 'DecayedAdagrad',
@@ -443,7 +443,13 @@ class Optimizer(object):
             self._learning_rate = value
             current_lr = self._global_learning_rate()
             if current_lr is not None:
-                if framework._non_static_mode():
+                if in_dygraph_mode():
+                    place = _current_expected_place()
+                    _C_ops.final_state_full_(current_lr, list(current_lr.shape),
+                                             float(value), current_lr.dtype,
+                                             place)
+
+                elif _in_legacy_dygraph():
                     _C_ops.fill_constant(current_lr, 'value', float(value),
                                          'dtype', current_lr.dtype, 'shape',
                                          list(current_lr.shape))
diff --git a/python/paddle/nn/functional/loss.py b/python/paddle/nn/functional/loss.py
index 7e73f3ce8ac..0fe3a000ade 100755
--- a/python/paddle/nn/functional/loss.py
+++ b/python/paddle/nn/functional/loss.py
@@ -2611,15 +2611,21 @@ def sigmoid_focal_loss(logit,
                 .format(normalizer_dims))
 
     if _non_static_mode():
-        one = _varbase_creator(dtype=logit.dtype)
-        _C_ops.fill_constant(one, 'value', float(1.0), 'force_cpu', False,
-                             'dtype', one.dtype, 'str_value', '1.0', 'shape',
-                             logit.shape)
         if in_dygraph_mode():
+            place = _current_expected_place()
+            one = _C_ops.final_state_full(logit.shape, float(1.0), logit.dtype,
+                                          place)
+
             loss = _C_ops.final_state_sigmoid_cross_entropy_with_logits(
                 logit, label, False, -100)
-        else:
+
+        elif _in_legacy_dygraph():
+            one = _varbase_creator(dtype=logit.dtype)
+            _C_ops.fill_constant(one, 'value', float(1.0), 'force_cpu', False,
+                                 'dtype', one.dtype, 'str_value', '1.0',
+                                 'shape', logit.shape)
             loss = _C_ops.sigmoid_cross_entropy_with_logits(logit, label)
+
         pred = _C_ops.sigmoid(logit)
         p_t = _C_ops.elementwise_add(
             _C_ops.elementwise_mul(pred, label),
diff --git a/python/paddle/nn/initializer/dirac.py b/python/paddle/nn/initializer/dirac.py
index 1b5697ede40..b56f49e461a 100644
--- a/python/paddle/nn/initializer/dirac.py
+++ b/python/paddle/nn/initializer/dirac.py
@@ -16,6 +16,7 @@ from ...fluid.initializer import Initializer
 from ...fluid.data_feeder import check_variable_and_dtype
 from ...fluid.core import VarDesc
 from ...fluid import framework
+from ...fluid.framework import _current_expected_place
 from paddle import in_dynamic_mode
 from paddle.utils import unique_name
 from paddle import _C_ops
@@ -130,9 +131,10 @@ class Dirac(Initializer):
         op = None
         if framework.in_dygraph_mode():
             with fluid.dygraph.no_grad():
-                _C_ops.fill_constant(out_var, 'value', float(0), 'force_cpu',
-                                     False, 'dtype', out_var.dtype, 'str_value',
-                                     str(float(0)), 'shape', out_var.shape)
+                place = _current_expected_place()
+                _C_ops.final_state_full_(out_var, out_var.shape, str(float(0)),
+                                         out_var.dtype, place)
+
         else:
             block.append_op(type='fill_constant',
                             inputs={},
diff --git a/python/paddle/optimizer/optimizer.py b/python/paddle/optimizer/optimizer.py
index dccbb21f5d2..9997aba7e31 100644
--- a/python/paddle/optimizer/optimizer.py
+++ b/python/paddle/optimizer/optimizer.py
@@ -42,7 +42,7 @@ from .. import compat as cpt
 from .lr import LRScheduler
 import copy
 from paddle import _C_ops
-from paddle.fluid.framework import _in_legacy_dygraph, _in_eager_without_dygraph_check
+from paddle.fluid.framework import _in_legacy_dygraph, _in_eager_without_dygraph_check, _current_expected_place, in_dygraph_mode
 
 __all__ = []
 
@@ -473,7 +473,12 @@ class Optimizer(object):
         self._learning_rate = float(value)
         current_lr = self._global_learning_rate()
         if current_lr is not None:
-            if framework._non_static_mode():
+            if in_dygraph_mode():
+                place = _current_expected_place()
+                _C_ops.final_state_full_(current_lr, list(current_lr.shape),
+                                         float(value), current_lr.dtype, place)
+
+            elif _in_legacy_dygraph():
                 _C_ops.fill_constant(current_lr, 'value', float(value), 'dtype',
                                      current_lr.dtype, 'shape',
                                      list(current_lr.shape))
-- 
GitLab