提交 6d04a9cf 编写于 作者: T Tink_Y 提交者: Cheerego

fix api format and example (#14686)

* fix api format and examples

test=develop

* Update executor.py

test=develop

* Update nn.py

* Update nn.py

test=develop

* Update nn.py

test=develop
上级 ff423730
...@@ -134,12 +134,12 @@ class GradientClipByValue(BaseGradientClipAttr): ...@@ -134,12 +134,12 @@ class GradientClipByValue(BaseGradientClipAttr):
Examples: Examples:
.. code-block:: python .. code-block:: python
w_param_attrs = ParamAttr(name=None, w_param_attrs = fluid.ParamAttr(name=None,
initializer=UniformInitializer(low=-1.0, high=1.0, seed=0), initializer=fluid.initializer.UniformInitializer(low=-1.0, high=1.0, seed=0),
learning_rate=1.0, learning_rate=1.0,
regularizer=L1Decay(1.0), regularizer=fluid.regularizer.L1Decay(1.0),
trainable=True, trainable=True,
clip=GradientClipByValue(-1.0, 1.0)) clip=fluid.clip.GradientClipByValue(-1.0, 1.0))
y_predict = fluid.layers.fc(input=x, size=1, param_attr=w_param_attrs) y_predict = fluid.layers.fc(input=x, size=1, param_attr=w_param_attrs)
""" """
...@@ -185,12 +185,12 @@ class GradientClipByNorm(BaseGradientClipAttr): ...@@ -185,12 +185,12 @@ class GradientClipByNorm(BaseGradientClipAttr):
Examples: Examples:
.. code-block:: python .. code-block:: python
w_param_attrs = ParamAttr(name=None, w_param_attrs = flui.ParamAttr(name=None,
initializer=UniformInitializer(low=-1.0, high=1.0, seed=0), initializer=fluid.initializer.UniformInitializer(low=-1.0, high=1.0, seed=0),
learning_rate=1.0, learning_rate=1.0,
regularizer=L1Decay(1.0), regularizer=fluid.regularizer.L1Decay(1.0),
trainable=True, trainable=True,
clip=GradientClipByNorm(clip_norm=2.0)) clip=fluid.clip.GradientClipByNorm(clip_norm=2.0))
y_predict = fluid.layers.fc(input=x, size=1, param_attr=w_param_attrs) y_predict = fluid.layers.fc(input=x, size=1, param_attr=w_param_attrs)
""" """
......
...@@ -20,7 +20,7 @@ import six ...@@ -20,7 +20,7 @@ import six
from .framework import Program, default_main_program, Variable from .framework import Program, default_main_program, Variable
from . import core from . import core
__all__ = ['Executor', 'global_scope', 'scope_guard', '_switch_scope'] __all__ = ['Executor', 'global_scope', 'scope_guard']
g_scope = core.Scope() g_scope = core.Scope()
...@@ -407,16 +407,17 @@ class Executor(object): ...@@ -407,16 +407,17 @@ class Executor(object):
Examples: Examples:
>>> data = layers.data(name='X', shape=[1], dtype='float32') >>> data = fluid.layers.data(name='X', shape=[1], dtype='float32')
>>> hidden = layers.fc(input=data, size=10) >>> out = fluid.layers.create_tensor(dtype='float32')
>>> layers.assign(hidden, out) >>> hidden = fluid.layers.fc(input=data, size=10)
>>> loss = layers.mean(out) >>> fluid.layers.assign(hidden,out)
>>> loss = fluid.layers.mean(out)
>>> adam = fluid.optimizer.Adam() >>> adam = fluid.optimizer.Adam()
>>> adam.minimize(loss) >>> adam.minimize(loss)
>>> cpu = core.CPUPlace() >>> cpu = core.CPUPlace()
>>> exe = Executor(cpu) >>> exe = fluid.Executor(cpu)
>>> exe.run(default_startup_program()) >>> exe.run(fluid.default_startup_program())
>>> x = numpy.random.random(size=(10, 1)).astype('float32') >>> x = numpy.random.random(size=(10, 1)).astype('float32')
>>> outs = exe.run( >>> outs = exe.run(
......
...@@ -89,6 +89,7 @@ def name_scope(prefix=None): ...@@ -89,6 +89,7 @@ def name_scope(prefix=None):
Examples: Examples:
.. code-block:: python .. code-block:: python
with name_scope("encoder"): with name_scope("encoder"):
... ...
with name_scope("decoder"): with name_scope("decoder"):
......
...@@ -943,7 +943,18 @@ def __create_unshared_decorated_reader__(op_type, reader, attrs, name=None): ...@@ -943,7 +943,18 @@ def __create_unshared_decorated_reader__(op_type, reader, attrs, name=None):
def shuffle(reader, buffer_size): def shuffle(reader, buffer_size):
""" """
Shuffle the reader. Creates a data reader whose data output is shuffled.
Output from the iterator that created by original reader will be
buffered into shuffle buffer, and then shuffled. The size of shuffle buffer
is determined by argument buf_size.
Args:
param reader: the original reader whose output will be shuffled.
type reader: callable
param buf_size: shuffle buffer size.
type buf_size: int
return: the new reader whose output is shuffled.
rtype: callable
""" """
return __create_unshared_decorated_reader__( return __create_unshared_decorated_reader__(
'create_shuffle_reader', reader, {'buffer_size': int(buffer_size)}) 'create_shuffle_reader', reader, {'buffer_size': int(buffer_size)})
......
...@@ -308,14 +308,10 @@ def piecewise_decay(boundaries, values): ...@@ -308,14 +308,10 @@ def piecewise_decay(boundaries, values):
def append_LARS(params_grads, learning_rate, weight_decay): def append_LARS(params_grads, learning_rate, weight_decay):
"""Applies LARS (LAYER-WISE ADAPTIVE RATE SCALING) to learning rate for """
Applies LARS (LAYER-WISE ADAPTIVE RATE SCALING) to learning rate for
each layer. each layer.
```python
learning_rate *= local_gw_ratio * sqrt(sumsq(param))
/ (sqrt(sumsq(gradient))+ weight_decay * sqrt(sumsq(param)))
```
Args: Args:
learning_rate: A learning rate Variable. This learning_rate: A learning rate Variable. This
is the global learning rate for LARS. is the global learning rate for LARS.
...@@ -323,6 +319,11 @@ def append_LARS(params_grads, learning_rate, weight_decay): ...@@ -323,6 +319,11 @@ def append_LARS(params_grads, learning_rate, weight_decay):
Returns: Returns:
The decayed learning rate The decayed learning rate
Examples:
.. code-block:: python
learning_rate *= local_gw_ratio * sqrt(sumsq(param))
/ (sqrt(sumsq(gradient))+ weight_decay * sqrt(sumsq(param)))
""" """
def _balanced_weight(param_norm, grad_norm): def _balanced_weight(param_norm, grad_norm):
......
...@@ -928,7 +928,7 @@ def dynamic_gru(input, ...@@ -928,7 +928,7 @@ def dynamic_gru(input,
emb = fluid.layers.embedding(input=data, size=[dict_dim, emb_dim]) emb = fluid.layers.embedding(input=data, size=[dict_dim, emb_dim])
hidden_dim = 512 hidden_dim = 512
x = fluid.layers.fc(input=emb, size=hidden_dim * 3) x = fluid.layers.fc(input=emb, size=hidden_dim * 3)
hidden = fluid.layers.dynamic_gru(input=x, dim=hidden_dim) hidden = fluid.layers.dynamic_gru(input=x, size=hidden_dim)
""" """
helper = LayerHelper('gru', **locals()) helper = LayerHelper('gru', **locals())
...@@ -3586,6 +3586,7 @@ def beam_search_decode(ids, scores, beam_size, end_id, name=None): ...@@ -3586,6 +3586,7 @@ def beam_search_decode(ids, scores, beam_size, end_id, name=None):
Examples: Examples:
.. code-block:: python .. code-block:: python
# Suppose `ids` and `scores` are LodTensorArray variables reserving # Suppose `ids` and `scores` are LodTensorArray variables reserving
# the selected ids and scores of all steps # the selected ids and scores of all steps
finished_ids, finished_scores = layers.beam_search_decode( finished_ids, finished_scores = layers.beam_search_decode(
...@@ -5870,7 +5871,6 @@ def pad_constant_like(x, y, pad_value=0., name=None): ...@@ -5870,7 +5871,6 @@ def pad_constant_like(x, y, pad_value=0., name=None):
[[38, 39, 40]], [[38, 39, 40]],
[[41, 42, 43]]]] [[41, 42, 43]]]]
Y.shape = (1, 3, 1, 3) Y.shape = (1, 3, 1, 3)
And And
pad_value = -1, pad_value = -1,
...@@ -6126,6 +6126,7 @@ def image_resize(input, ...@@ -6126,6 +6126,7 @@ def image_resize(input,
Supporting resample methods: Supporting resample methods:
'BILINEAR' : Bilinear interpolation 'BILINEAR' : Bilinear interpolation
'NEAREST' : Nearest neighbor interpolation 'NEAREST' : Nearest neighbor interpolation
Args: Args:
...@@ -6781,7 +6782,7 @@ def crop(x, shape=None, offsets=None, name=None): ...@@ -6781,7 +6782,7 @@ def crop(x, shape=None, offsets=None, name=None):
# or # or
z = fluid.layers.data(name="z", shape=[3, 5], dtype="float32") z = fluid.layers.data(name="z", shape=[3, 5], dtype="float32")
crop = fluid.layers.crop(z, shape=[2, 3]) crop = fluid.layers.crop(z, shape=[-1, 2, 3])
""" """
helper = LayerHelper('crop', **locals()) helper = LayerHelper('crop', **locals())
...@@ -7062,6 +7063,7 @@ def pad2d(input, ...@@ -7062,6 +7063,7 @@ def pad2d(input,
than height-1. And the width dimension has the same condition. than height-1. And the width dimension has the same condition.
Example: Example:
.. code-block:: text
Given that X is a channel of image from input: Given that X is a channel of image from input:
......
...@@ -451,6 +451,7 @@ class EditDistance(MetricBase): ...@@ -451,6 +451,7 @@ class EditDistance(MetricBase):
In the above example: In the above example:
'distance' is the average of the edit distance in a pass. 'distance' is the average of the edit distance in a pass.
'instance_error' is the instance error rate in a pass. 'instance_error' is the instance error rate in a pass.
""" """
......
...@@ -50,8 +50,9 @@ class ParamAttr(object): ...@@ -50,8 +50,9 @@ class ParamAttr(object):
w_param_attrs = fluid.ParamAttr(name="fc_weight", w_param_attrs = fluid.ParamAttr(name="fc_weight",
learning_rate=0.5, learning_rate=0.5,
regularizer=fluid.L2Decay(1.0), regularizer=fluid.regularizer.L2Decay(1.0),
trainable=True) trainable=True)
x = fluid.layers.data(name='X', shape=[1], dtype='float32')
y_predict = fluid.layers.fc(input=x, size=10, param_attr=w_param_attrs) y_predict = fluid.layers.fc(input=x, size=10, param_attr=w_param_attrs)
""" """
......
...@@ -125,6 +125,7 @@ def slice_variable(var_list, slice_count, min_block_size): ...@@ -125,6 +125,7 @@ def slice_variable(var_list, slice_count, min_block_size):
class DistributeTranspilerConfig(object): class DistributeTranspilerConfig(object):
""" """
Args:
slice_var_up (bool): Do Tensor slice for pservers, default is True. slice_var_up (bool): Do Tensor slice for pservers, default is True.
split_method (PSDispatcher): RoundRobin or HashName can be used split_method (PSDispatcher): RoundRobin or HashName can be used
try to choose the best method to balance loads for pservers. try to choose the best method to balance loads for pservers.
......
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册