提交 6d04a9cf 编写于 作者: T Tink_Y 提交者: Cheerego

fix api format and example (#14686)

* fix api format and examples

test=develop

* Update executor.py

test=develop

* Update nn.py

* Update nn.py

test=develop

* Update nn.py

test=develop
上级 ff423730
......@@ -134,12 +134,12 @@ class GradientClipByValue(BaseGradientClipAttr):
Examples:
.. code-block:: python
w_param_attrs = ParamAttr(name=None,
initializer=UniformInitializer(low=-1.0, high=1.0, seed=0),
w_param_attrs = fluid.ParamAttr(name=None,
initializer=fluid.initializer.UniformInitializer(low=-1.0, high=1.0, seed=0),
learning_rate=1.0,
regularizer=L1Decay(1.0),
regularizer=fluid.regularizer.L1Decay(1.0),
trainable=True,
clip=GradientClipByValue(-1.0, 1.0))
clip=fluid.clip.GradientClipByValue(-1.0, 1.0))
y_predict = fluid.layers.fc(input=x, size=1, param_attr=w_param_attrs)
"""
......@@ -185,12 +185,12 @@ class GradientClipByNorm(BaseGradientClipAttr):
Examples:
.. code-block:: python
w_param_attrs = ParamAttr(name=None,
initializer=UniformInitializer(low=-1.0, high=1.0, seed=0),
w_param_attrs = flui.ParamAttr(name=None,
initializer=fluid.initializer.UniformInitializer(low=-1.0, high=1.0, seed=0),
learning_rate=1.0,
regularizer=L1Decay(1.0),
regularizer=fluid.regularizer.L1Decay(1.0),
trainable=True,
clip=GradientClipByNorm(clip_norm=2.0))
clip=fluid.clip.GradientClipByNorm(clip_norm=2.0))
y_predict = fluid.layers.fc(input=x, size=1, param_attr=w_param_attrs)
"""
......
......@@ -20,7 +20,7 @@ import six
from .framework import Program, default_main_program, Variable
from . import core
__all__ = ['Executor', 'global_scope', 'scope_guard', '_switch_scope']
__all__ = ['Executor', 'global_scope', 'scope_guard']
g_scope = core.Scope()
......@@ -407,16 +407,17 @@ class Executor(object):
Examples:
>>> data = layers.data(name='X', shape=[1], dtype='float32')
>>> hidden = layers.fc(input=data, size=10)
>>> layers.assign(hidden, out)
>>> loss = layers.mean(out)
>>> data = fluid.layers.data(name='X', shape=[1], dtype='float32')
>>> out = fluid.layers.create_tensor(dtype='float32')
>>> hidden = fluid.layers.fc(input=data, size=10)
>>> fluid.layers.assign(hidden,out)
>>> loss = fluid.layers.mean(out)
>>> adam = fluid.optimizer.Adam()
>>> adam.minimize(loss)
>>> adam.minimize(loss)
>>> cpu = core.CPUPlace()
>>> exe = Executor(cpu)
>>> exe.run(default_startup_program())
>>> exe = fluid.Executor(cpu)
>>> exe.run(fluid.default_startup_program())
>>> x = numpy.random.random(size=(10, 1)).astype('float32')
>>> outs = exe.run(
......
......@@ -89,12 +89,13 @@ def name_scope(prefix=None):
Examples:
.. code-block:: python
with name_scope("encoder"):
...
with name_scope("decoder"):
...
with name_scope("attention"):
...
with name_scope("attention"):
...
"""
# TODO(panyx0718): Only [0-9a-z].
assert prefix, "namescope prefix cannot be empty."
......
......@@ -943,7 +943,18 @@ def __create_unshared_decorated_reader__(op_type, reader, attrs, name=None):
def shuffle(reader, buffer_size):
"""
Shuffle the reader.
Creates a data reader whose data output is shuffled.
Output from the iterator that created by original reader will be
buffered into shuffle buffer, and then shuffled. The size of shuffle buffer
is determined by argument buf_size.
Args:
param reader: the original reader whose output will be shuffled.
type reader: callable
param buf_size: shuffle buffer size.
type buf_size: int
return: the new reader whose output is shuffled.
rtype: callable
"""
return __create_unshared_decorated_reader__(
'create_shuffle_reader', reader, {'buffer_size': int(buffer_size)})
......
......@@ -308,13 +308,9 @@ def piecewise_decay(boundaries, values):
def append_LARS(params_grads, learning_rate, weight_decay):
"""Applies LARS (LAYER-WISE ADAPTIVE RATE SCALING) to learning rate for
each layer.
```python
learning_rate *= local_gw_ratio * sqrt(sumsq(param))
/ (sqrt(sumsq(gradient))+ weight_decay * sqrt(sumsq(param)))
```
"""
Applies LARS (LAYER-WISE ADAPTIVE RATE SCALING) to learning rate for
each layer.
Args:
learning_rate: A learning rate Variable. This
......@@ -323,6 +319,11 @@ def append_LARS(params_grads, learning_rate, weight_decay):
Returns:
The decayed learning rate
Examples:
.. code-block:: python
learning_rate *= local_gw_ratio * sqrt(sumsq(param))
/ (sqrt(sumsq(gradient))+ weight_decay * sqrt(sumsq(param)))
"""
def _balanced_weight(param_norm, grad_norm):
......
......@@ -928,7 +928,7 @@ def dynamic_gru(input,
emb = fluid.layers.embedding(input=data, size=[dict_dim, emb_dim])
hidden_dim = 512
x = fluid.layers.fc(input=emb, size=hidden_dim * 3)
hidden = fluid.layers.dynamic_gru(input=x, dim=hidden_dim)
hidden = fluid.layers.dynamic_gru(input=x, size=hidden_dim)
"""
helper = LayerHelper('gru', **locals())
......@@ -3586,6 +3586,7 @@ def beam_search_decode(ids, scores, beam_size, end_id, name=None):
Examples:
.. code-block:: python
# Suppose `ids` and `scores` are LodTensorArray variables reserving
# the selected ids and scores of all steps
finished_ids, finished_scores = layers.beam_search_decode(
......@@ -5083,7 +5084,7 @@ def im2sequence(input,
output.lod = [[4, 4]]
Examples:
Examples:
.. code-block:: python
......@@ -5870,24 +5871,23 @@ def pad_constant_like(x, y, pad_value=0., name=None):
[[38, 39, 40]],
[[41, 42, 43]]]]
Y.shape = (1, 3, 1, 3)
And
pad_value = -1,
And
pad_value = -1,
Return:
Out = [[[[35, 36, 37],
[-1, -1, -1]],
[[38, 39, 40],
[-1, -1, -1]],
[[41, 42, 43],
[-1, -1, -1]]],
[[[-1, -1, -1],
[-1, -1, -1]],
[[-1, -1, -1],
[-1, -1, -1]],
[[-1, -1, -1],
[-1, -1, -1]]]]
Out.shape = (2, 3, 2, 3)
Return:
Out = [[[[35, 36, 37],
[-1, -1, -1]],
[[38, 39, 40],
[-1, -1, -1]],
[[41, 42, 43],
[-1, -1, -1]]],
[[[-1, -1, -1],
[-1, -1, -1]],
[[-1, -1, -1],
[-1, -1, -1]],
[[-1, -1, -1],
[-1, -1, -1]]]]
Out.shape = (2, 3, 2, 3)
Args:
x (Variable): The input tensor variable.
......@@ -6126,6 +6126,7 @@ def image_resize(input,
Supporting resample methods:
'BILINEAR' : Bilinear interpolation
'NEAREST' : Nearest neighbor interpolation
Args:
......@@ -6781,7 +6782,7 @@ def crop(x, shape=None, offsets=None, name=None):
# or
z = fluid.layers.data(name="z", shape=[3, 5], dtype="float32")
crop = fluid.layers.crop(z, shape=[2, 3])
crop = fluid.layers.crop(z, shape=[-1, 2, 3])
"""
helper = LayerHelper('crop', **locals())
......@@ -7062,39 +7063,40 @@ def pad2d(input,
than height-1. And the width dimension has the same condition.
Example:
.. code-block:: text
Given that X is a channel of image from input:
Given that X is a channel of image from input:
X = [[1, 2, 3],
[4, 5, 6]]
X = [[1, 2, 3],
[4, 5, 6]]
Case 0:
Case 0:
paddings = [0, 1, 2, 3],
mode = 'constant'
pad_value = 0
paddings = [0, 1, 2, 3],
mode = 'constant'
pad_value = 0
Out = [[0, 0, 1, 2, 3, 0, 0, 0]
[0, 0, 4, 5, 6, 0, 0, 0]
[0, 0, 0, 0, 0, 0, 0, 0]]
Out = [[0, 0, 1, 2, 3, 0, 0, 0]
[0, 0, 4, 5, 6, 0, 0, 0]
[0, 0, 0, 0, 0, 0, 0, 0]]
Case 1:
Case 1:
paddings = [0, 1, 2, 1],
mode = 'reflect'
paddings = [0, 1, 2, 1],
mode = 'reflect'
Out = [[3, 2, 1, 2, 3, 2]
[6, 5, 4, 5, 6, 5]
[3, 2, 1, 2, 3, 2]]
Out = [[3, 2, 1, 2, 3, 2]
[6, 5, 4, 5, 6, 5]
[3, 2, 1, 2, 3, 2]]
Case 2:
Case 2:
paddings = [0, 1, 2, 1],
mode = 'edge'
paddings = [0, 1, 2, 1],
mode = 'edge'
Out = [[1, 1, 1, 2, 3, 3]
[4, 4, 4, 5, 6, 6]
[4, 4, 4, 5, 6, 6]]
Out = [[1, 1, 1, 2, 3, 3]
[4, 4, 4, 5, 6, 6]
[4, 4, 4, 5, 6, 6]]
Args:
......@@ -7332,13 +7334,13 @@ def prelu(x, mode, param_attr=None, name=None):
Args:
x (Variable): The input tensor.
param_attr(ParamAttr|None): The parameter attribute for the learnable
weight (alpha).
weight (alpha).
mode (string): The mode for weight sharing. It supports all, channel
and element. all: all elements share same weight
channel:elements in a channel share same weight
element:each element has a weight
and element. all: all elements share same weight
channel:elements in a channel share same weight
element:each element has a weight
name(str|None): A name for this layer(optional). If set None, the layer
will be named automatically.
will be named automatically.
Returns:
Variable: The output tensor with the same shape as input.
......
......@@ -222,13 +222,13 @@ class Precision(MetricBase):
Examples:
.. code-block:: python
metric = fluid.metrics.Precision()
for pass in range(PASSES):
metric.reset()
for data in train_reader():
loss, preds, labels = exe.run(fetch_list=[cost, preds, labels])
metric.update(preds=preds, labels=labels)
numpy_precision = metric.eval()
metric = fluid.metrics.Precision()
for pass in range(PASSES):
metric.reset()
for data in train_reader():
loss, preds, labels = exe.run(fetch_list=[cost, preds, labels])
metric.update(preds=preds, labels=labels)
numpy_precision = metric.eval()
"""
def __init__(self, name=None):
......@@ -267,13 +267,13 @@ class Recall(MetricBase):
Examples:
.. code-block:: python
metric = fluid.metrics.Recall()
for pass in range(PASSES):
metric.reset()
for data in train_reader():
loss, preds, labels = exe.run(fetch_list=[cost, preds, labels])
metric.update(preds=preds, labels=labels)
numpy_recall = metric.eval()
metric = fluid.metrics.Recall()
for pass in range(PASSES):
metric.reset()
for data in train_reader():
loss, preds, labels = exe.run(fetch_list=[cost, preds, labels])
metric.update(preds=preds, labels=labels)
numpy_recall = metric.eval()
"""
def __init__(self, name=None):
......@@ -449,8 +449,9 @@ class EditDistance(MetricBase):
distance_evaluator.update(distances, seq_num)
distance, instance_error = distance_evaluator.eval()
In the above example:
In the above example:
'distance' is the average of the edit distance in a pass.
'instance_error' is the instance error rate in a pass.
"""
......
......@@ -50,8 +50,9 @@ class ParamAttr(object):
w_param_attrs = fluid.ParamAttr(name="fc_weight",
learning_rate=0.5,
regularizer=fluid.L2Decay(1.0),
regularizer=fluid.regularizer.L2Decay(1.0),
trainable=True)
x = fluid.layers.data(name='X', shape=[1], dtype='float32')
y_predict = fluid.layers.fc(input=x, size=10, param_attr=w_param_attrs)
"""
......
......@@ -125,13 +125,14 @@ def slice_variable(var_list, slice_count, min_block_size):
class DistributeTranspilerConfig(object):
"""
slice_var_up (bool): Do Tensor slice for pservers, default is True.
split_method (PSDispatcher): RoundRobin or HashName can be used
try to choose the best method to balance loads for pservers.
min_block_size (int): Minimum splitted element number in block.
According:https://github.com/PaddlePaddle/Paddle/issues/8638#issuecomment-369912156
We can use bandwidth effiently when data size is larger than 2MB.If you
want to change it, please be sure you see the slice_variable function.
Args:
slice_var_up (bool): Do Tensor slice for pservers, default is True.
split_method (PSDispatcher): RoundRobin or HashName can be used
try to choose the best method to balance loads for pservers.
min_block_size (int): Minimum splitted element number in block.
According:https://github.com/PaddlePaddle/Paddle/issues/8638#issuecomment-369912156
We can use bandwidth effiently when data size is larger than 2MB.If you
want to change it, please be sure you see the slice_variable function.
"""
slice_var_up = True
......@@ -163,35 +164,35 @@ class DistributeTranspiler(object):
Examples:
.. code-block:: python
# for pserver mode
pserver_endpoints = "192.168.0.1:6174,192.168.0.2:6174"
trainer_endpoints = "192.168.0.1:6174,192.168.0.2:6174"
current_endpoint = "192.168.0.1:6174"
trainer_id = 0
trainers = 4
role = os.getenv("PADDLE_TRAINING_ROLE")
t = fluid.DistributeTranspiler()
t.transpile(
trainer_id, pservers=pserver_endpoints, trainers=trainers)
if role == "PSERVER":
pserver_program = t.get_pserver_program(current_endpoint)
pserver_startup_program = t.get_startup_program(current_endpoint,
# for pserver mode
pserver_endpoints = "192.168.0.1:6174,192.168.0.2:6174"
trainer_endpoints = "192.168.0.1:6174,192.168.0.2:6174"
current_endpoint = "192.168.0.1:6174"
trainer_id = 0
trainers = 4
role = os.getenv("PADDLE_TRAINING_ROLE")
t = fluid.DistributeTranspiler()
t.transpile(
trainer_id, pservers=pserver_endpoints, trainers=trainers)
if role == "PSERVER":
pserver_program = t.get_pserver_program(current_endpoint)
pserver_startup_program = t.get_startup_program(current_endpoint,
pserver_program)
elif role == "TRAINER":
trainer_program = t.get_trainer_program()
# for nccl2 mode
config = fluid.DistributeTranspilerConfig()
config.mode = "nccl2"
t = fluid.DistributeTranspiler(config=config)
t.transpile(trainer_id, workers=workers, current_endpoint=curr_ep)
exe = fluid.ParallelExecutor(
use_cuda,
loss_name=loss_var.name,
num_trainers=len(trainers.split(",)),
trainer_id=trainer_id
)
elif role == "TRAINER":
trainer_program = t.get_trainer_program()
# for nccl2 mode
config = fluid.DistributeTranspilerConfig()
config.mode = "nccl2"
t = fluid.DistributeTranspiler(config=config)
t.transpile(trainer_id, workers=workers, current_endpoint=curr_ep)
exe = fluid.ParallelExecutor(
use_cuda,
loss_name=loss_var.name,
num_trainers=len(trainers.split(",)),
trainer_id=trainer_id
)
"""
def __init__(self, config=None):
......
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册