未验证 提交 8100c16a 编写于 作者: C chentianyu03 提交者: GitHub

[cherry-pick]layer.to api support numpy.dtype and paddle.dtype (#38108)

Issue37932 反馈 layer.to 不支持paddle.dtype类型的问题,新增了对这类型的支持。详情见:#38018
同时,也一并cherry-pick了遗漏的 PR 36779 的commit。

修改内容:
Cherry-pick #36779
cherrry-pick #38018
上级 81469615
...@@ -37,6 +37,7 @@ from ..param_attr import ParamAttr ...@@ -37,6 +37,7 @@ from ..param_attr import ParamAttr
from paddle.fluid.executor import Executor, global_scope from paddle.fluid.executor import Executor, global_scope
from paddle.fluid.framework import in_dygraph_mode, convert_np_dtype_to_dtype_ from paddle.fluid.framework import in_dygraph_mode, convert_np_dtype_to_dtype_
from paddle.fluid.framework import _current_expected_place as _get_device from paddle.fluid.framework import _current_expected_place as _get_device
from paddle.fluid.core import VarDesc
from paddle.fluid.dygraph import no_grad from paddle.fluid.dygraph import no_grad
import paddle.utils.deprecated as deprecated import paddle.utils.deprecated as deprecated
...@@ -92,7 +93,7 @@ class Layer(core.Layer): ...@@ -92,7 +93,7 @@ class Layer(core.Layer):
If set str, it can be "bool", "float16", "float32", "float64", If set str, it can be "bool", "float16", "float32", "float64",
"int8", "int16", "int32", "int64", "uint8" or "uint16". "int8", "int16", "int32", "int64", "uint8" or "uint16".
Default: "float32" Default: "float32"
Returns: Returns:
None None
""" """
...@@ -275,7 +276,7 @@ class Layer(core.Layer): ...@@ -275,7 +276,7 @@ class Layer(core.Layer):
It should have the following form, `input` and `output` of the `hook` is `input` and `output` of the `Layer` respectively. It should have the following form, `input` and `output` of the `hook` is `input` and `output` of the `Layer` respectively.
User can use forward post-hook to change the output of the Layer or perform information statistics tasks on the Layer. User can use forward post-hook to change the output of the Layer or perform information statistics tasks on the Layer.
hook(Layer, input, output) -> None or modified output hook(Layer, input, output) -> None or modified output
Parameters: Parameters:
...@@ -321,9 +322,9 @@ class Layer(core.Layer): ...@@ -321,9 +322,9 @@ class Layer(core.Layer):
def register_forward_pre_hook(self, hook): def register_forward_pre_hook(self, hook):
"""Register a forward pre-hook for Layer. The hook will be called before `forward` function has been computed. """Register a forward pre-hook for Layer. The hook will be called before `forward` function has been computed.
It should have the following form, `input` of the `hook` is `input` of the `Layer`, It should have the following form, `input` of the `hook` is `input` of the `Layer`,
hook can either return a tuple or a single modified value in the hook. We will wrap the value into a tuple if hook can either return a tuple or a single modified value in the hook. We will wrap the value into a tuple if
a single value is returned(unless that value is already a tuple). a single value is returned(unless that value is already a tuple).
User can use forward pre-hook to change the input of the Layer or perform information statistics tasks on the Layer. User can use forward pre-hook to change the input of the Layer or perform information statistics tasks on the Layer.
...@@ -379,7 +380,7 @@ class Layer(core.Layer): ...@@ -379,7 +380,7 @@ class Layer(core.Layer):
is_bias=False, is_bias=False,
default_initializer=None): default_initializer=None):
"""Create parameters for this layer. """Create parameters for this layer.
Parameters: Parameters:
shape(list): Shape of the parameter. shape(list): Shape of the parameter.
attr(ParamAttr, optional): Parameter attribute of weight. Please refer to :ref:`api_paddle_ParamAttr`. Default: None. attr(ParamAttr, optional): Parameter attribute of weight. Please refer to :ref:`api_paddle_ParamAttr`. Default: None.
...@@ -450,13 +451,13 @@ class Layer(core.Layer): ...@@ -450,13 +451,13 @@ class Layer(core.Layer):
out_features): out_features):
super(MyLinear, self).__init__() super(MyLinear, self).__init__()
self.linear = paddle.nn.Linear( 10, 10) self.linear = paddle.nn.Linear( 10, 10)
self.back_var = self.create_variable(name = "linear_tmp_0", dtype=self._dtype) self.back_var = self.create_variable(name = "linear_tmp_0", dtype=self._dtype)
def forward(self, input): def forward(self, input):
out = self.linear(input) out = self.linear(input)
paddle.assign( out, self.back_var) paddle.assign( out, self.back_var)
return out return out
""" """
...@@ -500,13 +501,13 @@ class Layer(core.Layer): ...@@ -500,13 +501,13 @@ class Layer(core.Layer):
out_features): out_features):
super(MyLinear, self).__init__() super(MyLinear, self).__init__()
self.linear = paddle.nn.Linear( 10, 10) self.linear = paddle.nn.Linear( 10, 10)
self.back_var = self.create_tensor(name = "linear_tmp_0", dtype=self._dtype) self.back_var = self.create_tensor(name = "linear_tmp_0", dtype=self._dtype)
def forward(self, input): def forward(self, input):
out = self.linear(input) out = self.linear(input)
paddle.assign( out, self.back_var) paddle.assign( out, self.back_var)
return out return out
""" """
...@@ -726,7 +727,7 @@ class Layer(core.Layer): ...@@ -726,7 +727,7 @@ class Layer(core.Layer):
Returns: Returns:
None None
Examples: Examples:
.. code-block:: python .. code-block:: python
...@@ -853,10 +854,10 @@ class Layer(core.Layer): ...@@ -853,10 +854,10 @@ class Layer(core.Layer):
def clear_gradients(self): def clear_gradients(self):
""" """
Clear the gradients of all parameters for this layer. Clear the gradients of all parameters for this layer.
Returns: Returns:
None None
Examples: Examples:
.. code-block:: python .. code-block:: python
...@@ -898,8 +899,8 @@ class Layer(core.Layer): ...@@ -898,8 +899,8 @@ class Layer(core.Layer):
with program_desc_tracing_guard(False): with program_desc_tracing_guard(False):
self._build_once(*inputs, **kwargs) self._build_once(*inputs, **kwargs)
# TODO(liuyuhui) Only xpu broadcast parameters here. # TODO(liuyuhui) Only xpu broadcast parameters here.
# The other device is to call _sync_params_buffers in DataParallel # The other device is to call _sync_params_buffers in DataParallel
# to realize the parameter synchronization among multiply cards. # to realize the parameter synchronization among multiply cards.
if parallel_helper._is_data_parallel_mode( if parallel_helper._is_data_parallel_mode(
) and paddle.is_compiled_with_xpu(): ) and paddle.is_compiled_with_xpu():
...@@ -941,7 +942,7 @@ class Layer(core.Layer): ...@@ -941,7 +942,7 @@ class Layer(core.Layer):
sublayer(Layer): an instance of Layer. sublayer(Layer): an instance of Layer.
Returns: Returns:
Layer: the sublayer passed in. Layer: the sublayer passed in.
Examples: Examples:
.. code-block:: python .. code-block:: python
...@@ -1164,7 +1165,7 @@ class Layer(core.Layer): ...@@ -1164,7 +1165,7 @@ class Layer(core.Layer):
self._non_persistable_buffer_names_set.add(name) self._non_persistable_buffer_names_set.add(name)
_buffers[name] = value _buffers[name] = value
elif _buffers is not None and name in _buffers: elif _buffers is not None and name in _buffers:
# Note(Aurelius84): In Dy2stat, the value of the Buffer may be modified in # Note(Aurelius84): In Dy2stat, the value of the Buffer may be modified in
# decorated function, such as `self.buffer = new_tensor`. So we update its # decorated function, such as `self.buffer = new_tensor`. So we update its
# value via `assign`. # value via `assign`.
if type(value) == framework.Variable: if type(value) == framework.Variable:
...@@ -1323,7 +1324,7 @@ class Layer(core.Layer): ...@@ -1323,7 +1324,7 @@ class Layer(core.Layer):
Parameters: Parameters:
destination(dict, optional) : If provide, all the parameters and persistable buffers will be set to this dict . Default: None destination(dict, optional) : If provide, all the parameters and persistable buffers will be set to this dict . Default: None
include_sublayers(bool, optional) : If true, also include the parameters and persistable buffers from sublayers. Default: True include_sublayers(bool, optional) : If true, also include the parameters and persistable buffers from sublayers. Default: True
Retruns: Retruns:
dict: a dict contains all the parameters and persistable buffers. dict: a dict contains all the parameters and persistable buffers.
...@@ -1354,7 +1355,7 @@ class Layer(core.Layer): ...@@ -1354,7 +1355,7 @@ class Layer(core.Layer):
Parameters: Parameters:
destination(dict, optional) : If provide, all the parameters and persistable buffers will be set to this dict . Default: None destination(dict, optional) : If provide, all the parameters and persistable buffers will be set to this dict . Default: None
include_sublayers(bool, optional) : If true, also include the parameters and persistable buffers from sublayers. Default: True include_sublayers(bool, optional) : If true, also include the parameters and persistable buffers from sublayers. Default: True
Retruns: Retruns:
dict: a dict contains all the parameters and persistable buffers. dict: a dict contains all the parameters and persistable buffers.
...@@ -1382,7 +1383,7 @@ class Layer(core.Layer): ...@@ -1382,7 +1383,7 @@ class Layer(core.Layer):
Parameters: Parameters:
state_dict(dict) : Dict contains all the parameters and persistable buffers. state_dict(dict) : Dict contains all the parameters and persistable buffers.
use_structured_name(bool, optional) : If true, use structured name as key, otherwise, use parameter or buffer name as key. use_structured_name(bool, optional) : If true, use structured name as key, otherwise, use parameter or buffer name as key.
Default: True Default: True
Returns: Returns:
None None
...@@ -1484,21 +1485,22 @@ class Layer(core.Layer): ...@@ -1484,21 +1485,22 @@ class Layer(core.Layer):
Cast the parameters and buffers of Layer by the give device, dtype and blocking. Cast the parameters and buffers of Layer by the give device, dtype and blocking.
Parameters: Parameters:
device(str|paddle.CPUPlace()|paddle.CUDAPlace()|paddle.CUDAPinnedPlace()|paddle.XPUPlace()|None, optional): The device of the Layer which want to be stored. device(str|paddle.CPUPlace()|paddle.CUDAPlace()|paddle.CUDAPinnedPlace()|paddle.XPUPlace()|None, optional): The device of the Layer which want to be stored.
If None, the device is the same with the original Tensor. If device is string, it can be ``cpu``, ``gpu:x`` and ``xpu:x``, where ``x`` is the If None, the device is the same with the original Tensor. If device is string, it can be ``cpu``, ``gpu:x`` and ``xpu:x``, where ``x`` is the
index of the GPUs or XPUs. Default: None. index of the GPUs or XPUs. Default: None.
dtype(str|core.VarDesc.VarType|None, optional): The type of the data. If None, the dtype is the same with the original Tensor. Default: None.
blocking(bool|None, optional): If False and the source is in pinned memory, the copy will be dtype(str|numpy.dtype|paddle.dtype|None, optional): The type of the data. If None, the dtype is the same with the original Tensor. Default: None.
blocking(bool|None, optional): If False and the source is in pinned memory, the copy will be
asynchronous with respect to the host. Otherwise, the argument has no effect. If None, the blocking is set True. Default: None. asynchronous with respect to the host. Otherwise, the argument has no effect. If None, the blocking is set True. Default: None.
Returns: Returns:
None self
Examples: Examples:
.. code-block:: python .. code-block:: python
# required: skip
import paddle import paddle
linear=paddle.nn.Linear(2, 2) linear=paddle.nn.Linear(2, 2)
...@@ -1524,12 +1526,12 @@ class Layer(core.Layer): ...@@ -1524,12 +1526,12 @@ class Layer(core.Layer):
#Tensor(shape=[2, 2], dtype=float64, place=CUDAPinnedPlace, stop_gradient=False, #Tensor(shape=[2, 2], dtype=float64, place=CUDAPinnedPlace, stop_gradient=False,
# [[-0.04989364, -0.56889004], # [[-0.04989364, -0.56889004],
# [ 0.33960250, 0.96878713]]) # [ 0.33960250, 0.96878713]])
''' '''
if device is None and dtype is None and blocking is None: if device is None and dtype is None and blocking is None:
return return self
if device is not None: if device is not None:
if isinstance(device, str): if isinstance(device, str):
...@@ -1555,7 +1557,7 @@ class Layer(core.Layer): ...@@ -1555,7 +1557,7 @@ class Layer(core.Layer):
if dtype is None: if dtype is None:
dtype = t.dtype dtype = t.dtype
if type(dtype) is str: if type(dtype) is not VarDesc.VarType:
dtype = convert_np_dtype_to_dtype_(dtype) dtype = convert_np_dtype_to_dtype_(dtype)
# 1. gpu place need to determine whether the memory is sufficient for allocation: # 1. gpu place need to determine whether the memory is sufficient for allocation:
...@@ -1604,6 +1606,7 @@ class Layer(core.Layer): ...@@ -1604,6 +1606,7 @@ class Layer(core.Layer):
self._apply(transform, device, dtype, blocking) self._apply(transform, device, dtype, blocking)
self._dtype = dtype self._dtype = dtype
return self
# [aliases] Compatible with old method names # [aliases] Compatible with old method names
set_dict = set_state_dict set_dict = set_state_dict
......
...@@ -403,6 +403,52 @@ class TestLayerTo(unittest.TestCase): ...@@ -403,6 +403,52 @@ class TestLayerTo(unittest.TestCase):
self.assertRaises(AssertionError, self.linear.to, blocking=1) self.assertRaises(AssertionError, self.linear.to, blocking=1)
def test_to_api_paddle_dtype(self):
self.linear.to(dtype=paddle.float64)
self.assertEqual(self.linear.weight.dtype,
paddle.fluid.core.VarDesc.VarType.FP64)
self.assertEqual(self.linear.buf_name.dtype,
paddle.fluid.core.VarDesc.VarType.FP64)
self.assertTrue(
np.allclose(self.linear.weight.grad.numpy(), self.new_grad))
self.assertEqual(self.linear.weight._grad_ivar().dtype,
paddle.fluid.core.VarDesc.VarType.FP64)
self.linear.to()
self.assertEqual(self.linear.weight.dtype,
paddle.fluid.core.VarDesc.VarType.FP64)
self.assertEqual(self.linear.buf_name.dtype,
paddle.fluid.core.VarDesc.VarType.FP64)
self.assertTrue(
np.allclose(self.linear.weight.grad.numpy(), self.new_grad))
self.assertEqual(self.linear.weight._grad_ivar().dtype,
paddle.fluid.core.VarDesc.VarType.FP64)
for p in self.linear.parameters():
self.assertTrue(isinstance(p, paddle.fluid.framework.ParamBase))
def test_to_api_numpy_dtype(self):
self.linear.to(dtype=np.float64)
self.assertEqual(self.linear.weight.dtype,
paddle.fluid.core.VarDesc.VarType.FP64)
self.assertEqual(self.linear.buf_name.dtype,
paddle.fluid.core.VarDesc.VarType.FP64)
self.assertTrue(
np.allclose(self.linear.weight.grad.numpy(), self.new_grad))
self.assertEqual(self.linear.weight._grad_ivar().dtype,
paddle.fluid.core.VarDesc.VarType.FP64)
self.linear.to()
self.assertEqual(self.linear.weight.dtype,
paddle.fluid.core.VarDesc.VarType.FP64)
self.assertEqual(self.linear.buf_name.dtype,
paddle.fluid.core.VarDesc.VarType.FP64)
self.assertTrue(
np.allclose(self.linear.weight.grad.numpy(), self.new_grad))
self.assertEqual(self.linear.weight._grad_ivar().dtype,
paddle.fluid.core.VarDesc.VarType.FP64)
for p in self.linear.parameters():
self.assertTrue(isinstance(p, paddle.fluid.framework.ParamBase))
if __name__ == '__main__': if __name__ == '__main__':
unittest.main() unittest.main()
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册