提交 6d04a9cf 编写于 作者: T Tink_Y 提交者: Cheerego

fix api format and example (#14686)

* fix api format and examples

test=develop

* Update executor.py

test=develop

* Update nn.py

* Update nn.py

test=develop

* Update nn.py

test=develop
上级 ff423730
......@@ -134,12 +134,12 @@ class GradientClipByValue(BaseGradientClipAttr):
Examples:
.. code-block:: python
w_param_attrs = ParamAttr(name=None,
initializer=UniformInitializer(low=-1.0, high=1.0, seed=0),
w_param_attrs = fluid.ParamAttr(name=None,
initializer=fluid.initializer.UniformInitializer(low=-1.0, high=1.0, seed=0),
learning_rate=1.0,
regularizer=L1Decay(1.0),
regularizer=fluid.regularizer.L1Decay(1.0),
trainable=True,
clip=GradientClipByValue(-1.0, 1.0))
clip=fluid.clip.GradientClipByValue(-1.0, 1.0))
y_predict = fluid.layers.fc(input=x, size=1, param_attr=w_param_attrs)
"""
......@@ -185,12 +185,12 @@ class GradientClipByNorm(BaseGradientClipAttr):
Examples:
.. code-block:: python
w_param_attrs = ParamAttr(name=None,
initializer=UniformInitializer(low=-1.0, high=1.0, seed=0),
w_param_attrs = flui.ParamAttr(name=None,
initializer=fluid.initializer.UniformInitializer(low=-1.0, high=1.0, seed=0),
learning_rate=1.0,
regularizer=L1Decay(1.0),
regularizer=fluid.regularizer.L1Decay(1.0),
trainable=True,
clip=GradientClipByNorm(clip_norm=2.0))
clip=fluid.clip.GradientClipByNorm(clip_norm=2.0))
y_predict = fluid.layers.fc(input=x, size=1, param_attr=w_param_attrs)
"""
......
......@@ -20,7 +20,7 @@ import six
from .framework import Program, default_main_program, Variable
from . import core
__all__ = ['Executor', 'global_scope', 'scope_guard', '_switch_scope']
__all__ = ['Executor', 'global_scope', 'scope_guard']
g_scope = core.Scope()
......@@ -407,16 +407,17 @@ class Executor(object):
Examples:
>>> data = layers.data(name='X', shape=[1], dtype='float32')
>>> hidden = layers.fc(input=data, size=10)
>>> layers.assign(hidden, out)
>>> loss = layers.mean(out)
>>> data = fluid.layers.data(name='X', shape=[1], dtype='float32')
>>> out = fluid.layers.create_tensor(dtype='float32')
>>> hidden = fluid.layers.fc(input=data, size=10)
>>> fluid.layers.assign(hidden,out)
>>> loss = fluid.layers.mean(out)
>>> adam = fluid.optimizer.Adam()
>>> adam.minimize(loss)
>>> cpu = core.CPUPlace()
>>> exe = Executor(cpu)
>>> exe.run(default_startup_program())
>>> exe = fluid.Executor(cpu)
>>> exe.run(fluid.default_startup_program())
>>> x = numpy.random.random(size=(10, 1)).astype('float32')
>>> outs = exe.run(
......
......@@ -89,6 +89,7 @@ def name_scope(prefix=None):
Examples:
.. code-block:: python
with name_scope("encoder"):
...
with name_scope("decoder"):
......
......@@ -943,7 +943,18 @@ def __create_unshared_decorated_reader__(op_type, reader, attrs, name=None):
def shuffle(reader, buffer_size):
"""
Shuffle the reader.
Creates a data reader whose data output is shuffled.
Output from the iterator that created by original reader will be
buffered into shuffle buffer, and then shuffled. The size of shuffle buffer
is determined by argument buf_size.
Args:
param reader: the original reader whose output will be shuffled.
type reader: callable
param buf_size: shuffle buffer size.
type buf_size: int
return: the new reader whose output is shuffled.
rtype: callable
"""
return __create_unshared_decorated_reader__(
'create_shuffle_reader', reader, {'buffer_size': int(buffer_size)})
......
......@@ -308,14 +308,10 @@ def piecewise_decay(boundaries, values):
def append_LARS(params_grads, learning_rate, weight_decay):
"""Applies LARS (LAYER-WISE ADAPTIVE RATE SCALING) to learning rate for
"""
Applies LARS (LAYER-WISE ADAPTIVE RATE SCALING) to learning rate for
each layer.
```python
learning_rate *= local_gw_ratio * sqrt(sumsq(param))
/ (sqrt(sumsq(gradient))+ weight_decay * sqrt(sumsq(param)))
```
Args:
learning_rate: A learning rate Variable. This
is the global learning rate for LARS.
......@@ -323,6 +319,11 @@ def append_LARS(params_grads, learning_rate, weight_decay):
Returns:
The decayed learning rate
Examples:
.. code-block:: python
learning_rate *= local_gw_ratio * sqrt(sumsq(param))
/ (sqrt(sumsq(gradient))+ weight_decay * sqrt(sumsq(param)))
"""
def _balanced_weight(param_norm, grad_norm):
......
......@@ -928,7 +928,7 @@ def dynamic_gru(input,
emb = fluid.layers.embedding(input=data, size=[dict_dim, emb_dim])
hidden_dim = 512
x = fluid.layers.fc(input=emb, size=hidden_dim * 3)
hidden = fluid.layers.dynamic_gru(input=x, dim=hidden_dim)
hidden = fluid.layers.dynamic_gru(input=x, size=hidden_dim)
"""
helper = LayerHelper('gru', **locals())
......@@ -3586,6 +3586,7 @@ def beam_search_decode(ids, scores, beam_size, end_id, name=None):
Examples:
.. code-block:: python
# Suppose `ids` and `scores` are LodTensorArray variables reserving
# the selected ids and scores of all steps
finished_ids, finished_scores = layers.beam_search_decode(
......@@ -5870,7 +5871,6 @@ def pad_constant_like(x, y, pad_value=0., name=None):
[[38, 39, 40]],
[[41, 42, 43]]]]
Y.shape = (1, 3, 1, 3)
And
pad_value = -1,
......@@ -6126,6 +6126,7 @@ def image_resize(input,
Supporting resample methods:
'BILINEAR' : Bilinear interpolation
'NEAREST' : Nearest neighbor interpolation
Args:
......@@ -6781,7 +6782,7 @@ def crop(x, shape=None, offsets=None, name=None):
# or
z = fluid.layers.data(name="z", shape=[3, 5], dtype="float32")
crop = fluid.layers.crop(z, shape=[2, 3])
crop = fluid.layers.crop(z, shape=[-1, 2, 3])
"""
helper = LayerHelper('crop', **locals())
......@@ -7062,6 +7063,7 @@ def pad2d(input,
than height-1. And the width dimension has the same condition.
Example:
.. code-block:: text
Given that X is a channel of image from input:
......
......@@ -451,6 +451,7 @@ class EditDistance(MetricBase):
In the above example:
'distance' is the average of the edit distance in a pass.
'instance_error' is the instance error rate in a pass.
"""
......
......@@ -50,8 +50,9 @@ class ParamAttr(object):
w_param_attrs = fluid.ParamAttr(name="fc_weight",
learning_rate=0.5,
regularizer=fluid.L2Decay(1.0),
regularizer=fluid.regularizer.L2Decay(1.0),
trainable=True)
x = fluid.layers.data(name='X', shape=[1], dtype='float32')
y_predict = fluid.layers.fc(input=x, size=10, param_attr=w_param_attrs)
"""
......
......@@ -125,6 +125,7 @@ def slice_variable(var_list, slice_count, min_block_size):
class DistributeTranspilerConfig(object):
"""
Args:
slice_var_up (bool): Do Tensor slice for pservers, default is True.
split_method (PSDispatcher): RoundRobin or HashName can be used
try to choose the best method to balance loads for pservers.
......
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册