Clear the gradients of all parameters for this layer.
Returns:
None
Examples:
.. code-block:: python
...
...
@@ -901,8 +901,8 @@ class Layer(core.Layer):
withprogram_desc_tracing_guard(False):
self._build_once(*inputs,**kwargs)
# TODO(liuyuhui) Only xpu broadcast parameters here.
# The other device is to call _sync_params_buffers in DataParallel
# TODO(liuyuhui) Only xpu broadcast parameters here.
# The other device is to call _sync_params_buffers in DataParallel
# to realize the parameter synchronization among multiply cards.
ifparallel_helper._is_data_parallel_mode(
)andpaddle.is_compiled_with_xpu():
...
...
@@ -944,7 +944,7 @@ class Layer(core.Layer):
sublayer(Layer): an instance of Layer.
Returns:
Layer: the sublayer passed in.
Examples:
.. code-block:: python
...
...
@@ -1167,7 +1167,7 @@ class Layer(core.Layer):
self._non_persistable_buffer_names_set.add(name)
_buffers[name]=value
elif_buffersisnotNoneandnamein_buffers:
# Note(Aurelius84): In Dy2stat, the value of the Buffer may be modified in
# Note(Aurelius84): In Dy2stat, the value of the Buffer may be modified in
# decorated function, such as `self.buffer = new_tensor`. So we update its
# value via `assign`.
iftype(value)==framework.Variable:
...
...
@@ -1326,7 +1326,7 @@ class Layer(core.Layer):
Parameters:
destination(dict, optional) : If provide, all the parameters and persistable buffers will be set to this dict . Default: None
include_sublayers(bool, optional) : If true, also include the parameters and persistable buffers from sublayers. Default: True
Retruns:
dict: a dict contains all the parameters and persistable buffers.
...
...
@@ -1357,7 +1357,7 @@ class Layer(core.Layer):
Parameters:
destination(dict, optional) : If provide, all the parameters and persistable buffers will be set to this dict . Default: None
include_sublayers(bool, optional) : If true, also include the parameters and persistable buffers from sublayers. Default: True
Retruns:
dict: a dict contains all the parameters and persistable buffers.
...
...
@@ -1385,7 +1385,7 @@ class Layer(core.Layer):
Parameters:
state_dict(dict) : Dict contains all the parameters and persistable buffers.
use_structured_name(bool, optional) : If true, use structured name as key, otherwise, use parameter or buffer name as key.
use_structured_name(bool, optional) : If true, use structured name as key, otherwise, use parameter or buffer name as key.
Default: True
Returns:
None
...
...
@@ -1502,21 +1502,22 @@ class Layer(core.Layer):
Cast the parameters and buffers of Layer by the give device, dtype and blocking.
Parameters:
device(str|paddle.CPUPlace()|paddle.CUDAPlace()|paddle.CUDAPinnedPlace()|paddle.XPUPlace()|None, optional): The device of the Layer which want to be stored.
If None, the device is the same with the original Tensor. If device is string, it can be ``cpu``, ``gpu:x`` and ``xpu:x``, where ``x`` is the
index of the GPUs or XPUs. Default: None.
device(str|paddle.CPUPlace()|paddle.CUDAPlace()|paddle.CUDAPinnedPlace()|paddle.XPUPlace()|None, optional): The device of the Layer which want to be stored.
If None, the device is the same with the original Tensor. If device is string, it can be ``cpu``, ``gpu:x`` and ``xpu:x``, where ``x`` is the
index of the GPUs or XPUs. Default: None.
dtype(str|core.VarDesc.VarType|None, optional): The type of the data. If None, the dtype is the same with the original Tensor. Default: None.
blocking(bool|None, optional): If False and the source is in pinned memory, the copy will be
blocking(bool|None, optional): If False and the source is in pinned memory, the copy will be
asynchronous with respect to the host. Otherwise, the argument has no effect. If None, the blocking is set True. Default: None.