未验证 提交 b61d8f77 编写于 作者: W Weilong Wu 提交者: GitHub

[Eager] use final_state_full / *full_ instead fill_constant under eager mode (#45044)

* [Eager] use final_state_fill_constant_

* fill_constant use str_value

* add fill_constant_ to no_amp_list

* use float(value) as input

* support final state full_ same as fill_constant

* [Eager] use final_state_full / *full_ instead fill_constant under eager

* polish code

* fix mistakes
上级 4805da50
...@@ -140,6 +140,8 @@ class ConstantInitializer(Initializer): ...@@ -140,6 +140,8 @@ class ConstantInitializer(Initializer):
if in_dygraph_mode(): if in_dygraph_mode():
place = _current_expected_place() place = _current_expected_place()
if self._force_cpu:
place = core.CPUPlace()
_C_ops.final_state_full_(var, var.shape, str(float(self._value)), _C_ops.final_state_full_(var, var.shape, str(float(self._value)),
var.dtype, place) var.dtype, place)
return None return None
......
...@@ -774,44 +774,50 @@ def fill_constant(shape, dtype, value, force_cpu=False, out=None, name=None): ...@@ -774,44 +774,50 @@ def fill_constant(shape, dtype, value, force_cpu=False, out=None, name=None):
attrs['str_value'] = str(float(value)) attrs['str_value'] = str(float(value))
attrs['value'] = float(value) attrs['value'] = float(value)
if _non_static_mode(): if in_dygraph_mode():
if out is None and in_dygraph_mode(): place = _current_expected_place()
#Currently, final state mode don't support out is None. if force_cpu:
place = _current_expected_place() place = core.CPUPlace()
if force_cpu: if isinstance(shape, (list, tuple)):
place = core.CPUPlace() for item in shape:
if isinstance(shape, (list, tuple)): if not isinstance(item, Variable):
for item in shape: shape = list(
if not isinstance(item, Variable): map(
shape = list( lambda x: x.numpy().flat[0]
map( if isinstance(x, Variable) else x, shape))
lambda x: x.numpy().flat[0] break
if isinstance(x, Variable) else x, shape))
break if not isinstance(dtype, core.VarDesc.VarType):
dtype = convert_np_dtype_to_dtype_(dtype)
if not isinstance(dtype, core.VarDesc.VarType):
dtype = convert_np_dtype_to_dtype_(dtype) if out is None:
out = _C_ops.final_state_full(shape, float(value), dtype, place) out = _C_ops.final_state_full(shape, float(value), dtype, place)
out.stop_gradient = True out.stop_gradient = True
return out return out
else: if out is not None:
shape = utils.convert_shape_to_list(shape) # final state mode is support out is not None.
if out is None: _C_ops.final_state_full_(out, shape, float(value), dtype, place)
out = _varbase_creator(dtype=dtype)
if isinstance(value, Variable):
if dtype in ['uint8', 'int16', 'int32', 'int64']:
attrs['str_value'] = str(int(value.numpy().item(0)))
else:
attrs['str_value'] = str(float(value.numpy().item(0)))
_C_ops.fill_constant(out, 'value', float(value), 'force_cpu',
force_cpu, 'dtype', out.dtype, 'str_value',
attrs['str_value'], 'shape', shape)
out.stop_gradient = True out.stop_gradient = True
return out return out
if _in_legacy_dygraph():
shape = utils.convert_shape_to_list(shape)
if out is None:
out = _varbase_creator(dtype=dtype)
if isinstance(value, Variable):
if dtype in ['uint8', 'int16', 'int32', 'int64']:
attrs['str_value'] = str(int(value.numpy().item(0)))
else:
attrs['str_value'] = str(float(value.numpy().item(0)))
_C_ops.fill_constant(out, 'value', float(value), 'force_cpu', force_cpu,
'dtype', out.dtype, 'str_value',
attrs['str_value'], 'shape', shape)
out.stop_gradient = True
return out
helper = LayerHelper("fill_constant", **locals()) helper = LayerHelper("fill_constant", **locals())
inputs = {} inputs = {}
if isinstance(value, Variable): if isinstance(value, Variable):
......
...@@ -44,7 +44,7 @@ from .wrapped_decorator import signature_safe_contextmanager ...@@ -44,7 +44,7 @@ from .wrapped_decorator import signature_safe_contextmanager
from .. import compat as cpt from .. import compat as cpt
import warnings import warnings
from paddle import _C_ops from paddle import _C_ops
from ..fluid.framework import _in_legacy_dygraph, in_dygraph_mode from ..fluid.framework import _in_legacy_dygraph, in_dygraph_mode, _current_expected_place
__all__ = [ __all__ = [
'SGD', 'Momentum', 'Adagrad', 'Adam', 'Adamax', 'Dpsgd', 'DecayedAdagrad', 'SGD', 'Momentum', 'Adagrad', 'Adam', 'Adamax', 'Dpsgd', 'DecayedAdagrad',
...@@ -443,7 +443,13 @@ class Optimizer(object): ...@@ -443,7 +443,13 @@ class Optimizer(object):
self._learning_rate = value self._learning_rate = value
current_lr = self._global_learning_rate() current_lr = self._global_learning_rate()
if current_lr is not None: if current_lr is not None:
if framework._non_static_mode(): if in_dygraph_mode():
place = _current_expected_place()
_C_ops.final_state_full_(current_lr, list(current_lr.shape),
float(value), current_lr.dtype,
place)
elif _in_legacy_dygraph():
_C_ops.fill_constant(current_lr, 'value', float(value), _C_ops.fill_constant(current_lr, 'value', float(value),
'dtype', current_lr.dtype, 'shape', 'dtype', current_lr.dtype, 'shape',
list(current_lr.shape)) list(current_lr.shape))
......
...@@ -2611,15 +2611,21 @@ def sigmoid_focal_loss(logit, ...@@ -2611,15 +2611,21 @@ def sigmoid_focal_loss(logit,
.format(normalizer_dims)) .format(normalizer_dims))
if _non_static_mode(): if _non_static_mode():
one = _varbase_creator(dtype=logit.dtype)
_C_ops.fill_constant(one, 'value', float(1.0), 'force_cpu', False,
'dtype', one.dtype, 'str_value', '1.0', 'shape',
logit.shape)
if in_dygraph_mode(): if in_dygraph_mode():
place = _current_expected_place()
one = _C_ops.final_state_full(logit.shape, float(1.0), logit.dtype,
place)
loss = _C_ops.final_state_sigmoid_cross_entropy_with_logits( loss = _C_ops.final_state_sigmoid_cross_entropy_with_logits(
logit, label, False, -100) logit, label, False, -100)
else:
elif _in_legacy_dygraph():
one = _varbase_creator(dtype=logit.dtype)
_C_ops.fill_constant(one, 'value', float(1.0), 'force_cpu', False,
'dtype', one.dtype, 'str_value', '1.0',
'shape', logit.shape)
loss = _C_ops.sigmoid_cross_entropy_with_logits(logit, label) loss = _C_ops.sigmoid_cross_entropy_with_logits(logit, label)
pred = _C_ops.sigmoid(logit) pred = _C_ops.sigmoid(logit)
p_t = _C_ops.elementwise_add( p_t = _C_ops.elementwise_add(
_C_ops.elementwise_mul(pred, label), _C_ops.elementwise_mul(pred, label),
......
...@@ -16,6 +16,7 @@ from ...fluid.initializer import Initializer ...@@ -16,6 +16,7 @@ from ...fluid.initializer import Initializer
from ...fluid.data_feeder import check_variable_and_dtype from ...fluid.data_feeder import check_variable_and_dtype
from ...fluid.core import VarDesc from ...fluid.core import VarDesc
from ...fluid import framework from ...fluid import framework
from ...fluid.framework import _current_expected_place
from paddle import in_dynamic_mode from paddle import in_dynamic_mode
from paddle.utils import unique_name from paddle.utils import unique_name
from paddle import _C_ops from paddle import _C_ops
...@@ -130,9 +131,10 @@ class Dirac(Initializer): ...@@ -130,9 +131,10 @@ class Dirac(Initializer):
op = None op = None
if framework.in_dygraph_mode(): if framework.in_dygraph_mode():
with fluid.dygraph.no_grad(): with fluid.dygraph.no_grad():
_C_ops.fill_constant(out_var, 'value', float(0), 'force_cpu', place = _current_expected_place()
False, 'dtype', out_var.dtype, 'str_value', _C_ops.final_state_full_(out_var, out_var.shape, str(float(0)),
str(float(0)), 'shape', out_var.shape) out_var.dtype, place)
else: else:
block.append_op(type='fill_constant', block.append_op(type='fill_constant',
inputs={}, inputs={},
......
...@@ -42,7 +42,7 @@ from .. import compat as cpt ...@@ -42,7 +42,7 @@ from .. import compat as cpt
from .lr import LRScheduler from .lr import LRScheduler
import copy import copy
from paddle import _C_ops from paddle import _C_ops
from paddle.fluid.framework import _in_legacy_dygraph, _in_eager_without_dygraph_check from paddle.fluid.framework import _in_legacy_dygraph, _in_eager_without_dygraph_check, _current_expected_place, in_dygraph_mode
__all__ = [] __all__ = []
...@@ -473,7 +473,12 @@ class Optimizer(object): ...@@ -473,7 +473,12 @@ class Optimizer(object):
self._learning_rate = float(value) self._learning_rate = float(value)
current_lr = self._global_learning_rate() current_lr = self._global_learning_rate()
if current_lr is not None: if current_lr is not None:
if framework._non_static_mode(): if in_dygraph_mode():
place = _current_expected_place()
_C_ops.final_state_full_(current_lr, list(current_lr.shape),
float(value), current_lr.dtype, place)
elif _in_legacy_dygraph():
_C_ops.fill_constant(current_lr, 'value', float(value), 'dtype', _C_ops.fill_constant(current_lr, 'value', float(value), 'dtype',
current_lr.dtype, 'shape', current_lr.dtype, 'shape',
list(current_lr.shape)) list(current_lr.shape))
......
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册