未验证 提交 b61d8f77 编写于 作者: W Weilong Wu 提交者: GitHub

[Eager] use final_state_full / *full_ instead fill_constant under eager mode (#45044)

* [Eager] use final_state_fill_constant_

* fill_constant use str_value

* add fill_constant_ to no_amp_list

* use float(value) as input

* support final state full_ same as fill_constant

* [Eager] use final_state_full / *full_ instead fill_constant under eager

* polish code

* fix mistakes
上级 4805da50
......@@ -140,6 +140,8 @@ class ConstantInitializer(Initializer):
if in_dygraph_mode():
place = _current_expected_place()
if self._force_cpu:
place = core.CPUPlace()
_C_ops.final_state_full_(var, var.shape, str(float(self._value)),
var.dtype, place)
return None
......
......@@ -774,9 +774,7 @@ def fill_constant(shape, dtype, value, force_cpu=False, out=None, name=None):
attrs['str_value'] = str(float(value))
attrs['value'] = float(value)
if _non_static_mode():
if out is None and in_dygraph_mode():
#Currently, final state mode don't support out is None.
if in_dygraph_mode():
place = _current_expected_place()
if force_cpu:
place = core.CPUPlace()
......@@ -791,11 +789,19 @@ def fill_constant(shape, dtype, value, force_cpu=False, out=None, name=None):
if not isinstance(dtype, core.VarDesc.VarType):
dtype = convert_np_dtype_to_dtype_(dtype)
if out is None:
out = _C_ops.final_state_full(shape, float(value), dtype, place)
out.stop_gradient = True
return out
else:
if out is not None:
# final state mode is support out is not None.
_C_ops.final_state_full_(out, shape, float(value), dtype, place)
out.stop_gradient = True
return out
if _in_legacy_dygraph():
shape = utils.convert_shape_to_list(shape)
if out is None:
out = _varbase_creator(dtype=dtype)
......@@ -806,8 +812,8 @@ def fill_constant(shape, dtype, value, force_cpu=False, out=None, name=None):
else:
attrs['str_value'] = str(float(value.numpy().item(0)))
_C_ops.fill_constant(out, 'value', float(value), 'force_cpu',
force_cpu, 'dtype', out.dtype, 'str_value',
_C_ops.fill_constant(out, 'value', float(value), 'force_cpu', force_cpu,
'dtype', out.dtype, 'str_value',
attrs['str_value'], 'shape', shape)
out.stop_gradient = True
return out
......
......@@ -44,7 +44,7 @@ from .wrapped_decorator import signature_safe_contextmanager
from .. import compat as cpt
import warnings
from paddle import _C_ops
from ..fluid.framework import _in_legacy_dygraph, in_dygraph_mode
from ..fluid.framework import _in_legacy_dygraph, in_dygraph_mode, _current_expected_place
__all__ = [
'SGD', 'Momentum', 'Adagrad', 'Adam', 'Adamax', 'Dpsgd', 'DecayedAdagrad',
......@@ -443,7 +443,13 @@ class Optimizer(object):
self._learning_rate = value
current_lr = self._global_learning_rate()
if current_lr is not None:
if framework._non_static_mode():
if in_dygraph_mode():
place = _current_expected_place()
_C_ops.final_state_full_(current_lr, list(current_lr.shape),
float(value), current_lr.dtype,
place)
elif _in_legacy_dygraph():
_C_ops.fill_constant(current_lr, 'value', float(value),
'dtype', current_lr.dtype, 'shape',
list(current_lr.shape))
......
......@@ -2611,15 +2611,21 @@ def sigmoid_focal_loss(logit,
.format(normalizer_dims))
if _non_static_mode():
one = _varbase_creator(dtype=logit.dtype)
_C_ops.fill_constant(one, 'value', float(1.0), 'force_cpu', False,
'dtype', one.dtype, 'str_value', '1.0', 'shape',
logit.shape)
if in_dygraph_mode():
place = _current_expected_place()
one = _C_ops.final_state_full(logit.shape, float(1.0), logit.dtype,
place)
loss = _C_ops.final_state_sigmoid_cross_entropy_with_logits(
logit, label, False, -100)
else:
elif _in_legacy_dygraph():
one = _varbase_creator(dtype=logit.dtype)
_C_ops.fill_constant(one, 'value', float(1.0), 'force_cpu', False,
'dtype', one.dtype, 'str_value', '1.0',
'shape', logit.shape)
loss = _C_ops.sigmoid_cross_entropy_with_logits(logit, label)
pred = _C_ops.sigmoid(logit)
p_t = _C_ops.elementwise_add(
_C_ops.elementwise_mul(pred, label),
......
......@@ -16,6 +16,7 @@ from ...fluid.initializer import Initializer
from ...fluid.data_feeder import check_variable_and_dtype
from ...fluid.core import VarDesc
from ...fluid import framework
from ...fluid.framework import _current_expected_place
from paddle import in_dynamic_mode
from paddle.utils import unique_name
from paddle import _C_ops
......@@ -130,9 +131,10 @@ class Dirac(Initializer):
op = None
if framework.in_dygraph_mode():
with fluid.dygraph.no_grad():
_C_ops.fill_constant(out_var, 'value', float(0), 'force_cpu',
False, 'dtype', out_var.dtype, 'str_value',
str(float(0)), 'shape', out_var.shape)
place = _current_expected_place()
_C_ops.final_state_full_(out_var, out_var.shape, str(float(0)),
out_var.dtype, place)
else:
block.append_op(type='fill_constant',
inputs={},
......
......@@ -42,7 +42,7 @@ from .. import compat as cpt
from .lr import LRScheduler
import copy
from paddle import _C_ops
from paddle.fluid.framework import _in_legacy_dygraph, _in_eager_without_dygraph_check
from paddle.fluid.framework import _in_legacy_dygraph, _in_eager_without_dygraph_check, _current_expected_place, in_dygraph_mode
__all__ = []
......@@ -473,7 +473,12 @@ class Optimizer(object):
self._learning_rate = float(value)
current_lr = self._global_learning_rate()
if current_lr is not None:
if framework._non_static_mode():
if in_dygraph_mode():
place = _current_expected_place()
_C_ops.final_state_full_(current_lr, list(current_lr.shape),
float(value), current_lr.dtype, place)
elif _in_legacy_dygraph():
_C_ops.fill_constant(current_lr, 'value', float(value), 'dtype',
current_lr.dtype, 'shape',
list(current_lr.shape))
......
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册