diff --git a/python/paddle/fluid/clip.py b/python/paddle/fluid/clip.py index 1738afe93e99f1de28bec2fb23be8e1a309d9288..5b8ff0514e7c87df786e71b5cea2d6ebfbaad6d4 100644 --- a/python/paddle/fluid/clip.py +++ b/python/paddle/fluid/clip.py @@ -134,12 +134,12 @@ class GradientClipByValue(BaseGradientClipAttr): Examples: .. code-block:: python - w_param_attrs = ParamAttr(name=None, - initializer=UniformInitializer(low=-1.0, high=1.0, seed=0), + w_param_attrs = fluid.ParamAttr(name=None, + initializer=fluid.initializer.UniformInitializer(low=-1.0, high=1.0, seed=0), learning_rate=1.0, - regularizer=L1Decay(1.0), + regularizer=fluid.regularizer.L1Decay(1.0), trainable=True, - clip=GradientClipByValue(-1.0, 1.0)) + clip=fluid.clip.GradientClipByValue(-1.0, 1.0)) y_predict = fluid.layers.fc(input=x, size=1, param_attr=w_param_attrs) """ @@ -185,12 +185,12 @@ class GradientClipByNorm(BaseGradientClipAttr): Examples: .. code-block:: python - w_param_attrs = ParamAttr(name=None, - initializer=UniformInitializer(low=-1.0, high=1.0, seed=0), + w_param_attrs = flui.ParamAttr(name=None, + initializer=fluid.initializer.UniformInitializer(low=-1.0, high=1.0, seed=0), learning_rate=1.0, - regularizer=L1Decay(1.0), + regularizer=fluid.regularizer.L1Decay(1.0), trainable=True, - clip=GradientClipByNorm(clip_norm=2.0)) + clip=fluid.clip.GradientClipByNorm(clip_norm=2.0)) y_predict = fluid.layers.fc(input=x, size=1, param_attr=w_param_attrs) """ diff --git a/python/paddle/fluid/executor.py b/python/paddle/fluid/executor.py index 42c2484b284844a1f1acf53f79296e13da72676a..f2886090d75f87654b33cf7aa6f98ebf6f2e27d1 100644 --- a/python/paddle/fluid/executor.py +++ b/python/paddle/fluid/executor.py @@ -20,7 +20,7 @@ import six from .framework import Program, default_main_program, Variable from . import core -__all__ = ['Executor', 'global_scope', 'scope_guard', '_switch_scope'] +__all__ = ['Executor', 'global_scope', 'scope_guard'] g_scope = core.Scope() @@ -407,16 +407,17 @@ class Executor(object): Examples: - >>> data = layers.data(name='X', shape=[1], dtype='float32') - >>> hidden = layers.fc(input=data, size=10) - >>> layers.assign(hidden, out) - >>> loss = layers.mean(out) + >>> data = fluid.layers.data(name='X', shape=[1], dtype='float32') + >>> out = fluid.layers.create_tensor(dtype='float32') + >>> hidden = fluid.layers.fc(input=data, size=10) + >>> fluid.layers.assign(hidden,out) + >>> loss = fluid.layers.mean(out) >>> adam = fluid.optimizer.Adam() - >>> adam.minimize(loss) + >>> adam.minimize(loss) >>> cpu = core.CPUPlace() - >>> exe = Executor(cpu) - >>> exe.run(default_startup_program()) + >>> exe = fluid.Executor(cpu) + >>> exe.run(fluid.default_startup_program()) >>> x = numpy.random.random(size=(10, 1)).astype('float32') >>> outs = exe.run( diff --git a/python/paddle/fluid/framework.py b/python/paddle/fluid/framework.py index b991187d424108db176ebd6996d7d161f11dcd3d..b156db53d2928daefed0959fc3e0731709855343 100644 --- a/python/paddle/fluid/framework.py +++ b/python/paddle/fluid/framework.py @@ -89,12 +89,13 @@ def name_scope(prefix=None): Examples: .. code-block:: python + with name_scope("encoder"): ... with name_scope("decoder"): ... - with name_scope("attention"): - ... + with name_scope("attention"): + ... """ # TODO(panyx0718): Only [0-9a-z]. assert prefix, "namescope prefix cannot be empty." diff --git a/python/paddle/fluid/layers/io.py b/python/paddle/fluid/layers/io.py index 3f47053961bcc41b82f1b6776e9365166e78ddbf..42f4959a83fe113d6cbbe0db355249a9c203d602 100644 --- a/python/paddle/fluid/layers/io.py +++ b/python/paddle/fluid/layers/io.py @@ -943,7 +943,18 @@ def __create_unshared_decorated_reader__(op_type, reader, attrs, name=None): def shuffle(reader, buffer_size): """ - Shuffle the reader. + Creates a data reader whose data output is shuffled. + Output from the iterator that created by original reader will be + buffered into shuffle buffer, and then shuffled. The size of shuffle buffer + is determined by argument buf_size. + + Args: + param reader: the original reader whose output will be shuffled. + type reader: callable + param buf_size: shuffle buffer size. + type buf_size: int + return: the new reader whose output is shuffled. + rtype: callable """ return __create_unshared_decorated_reader__( 'create_shuffle_reader', reader, {'buffer_size': int(buffer_size)}) diff --git a/python/paddle/fluid/layers/learning_rate_scheduler.py b/python/paddle/fluid/layers/learning_rate_scheduler.py index 149224bb68ac869dec14ac9f953f0072bd24c7e2..dde05189722fef77e03a1c2d8f3cbae44a3e8245 100644 --- a/python/paddle/fluid/layers/learning_rate_scheduler.py +++ b/python/paddle/fluid/layers/learning_rate_scheduler.py @@ -308,13 +308,9 @@ def piecewise_decay(boundaries, values): def append_LARS(params_grads, learning_rate, weight_decay): - """Applies LARS (LAYER-WISE ADAPTIVE RATE SCALING) to learning rate for - each layer. - - ```python - learning_rate *= local_gw_ratio * sqrt(sumsq(param)) - / (sqrt(sumsq(gradient))+ weight_decay * sqrt(sumsq(param))) - ``` + """ + Applies LARS (LAYER-WISE ADAPTIVE RATE SCALING) to learning rate for + each layer. Args: learning_rate: A learning rate Variable. This @@ -323,6 +319,11 @@ def append_LARS(params_grads, learning_rate, weight_decay): Returns: The decayed learning rate + Examples: + .. code-block:: python + + learning_rate *= local_gw_ratio * sqrt(sumsq(param)) + / (sqrt(sumsq(gradient))+ weight_decay * sqrt(sumsq(param))) """ def _balanced_weight(param_norm, grad_norm): diff --git a/python/paddle/fluid/layers/nn.py b/python/paddle/fluid/layers/nn.py index ac1c8657759cad33f939c64413e669ba9ae2aad3..28b8ae895afb57e0b3461829e6aea175c7c90e9d 100644 --- a/python/paddle/fluid/layers/nn.py +++ b/python/paddle/fluid/layers/nn.py @@ -928,7 +928,7 @@ def dynamic_gru(input, emb = fluid.layers.embedding(input=data, size=[dict_dim, emb_dim]) hidden_dim = 512 x = fluid.layers.fc(input=emb, size=hidden_dim * 3) - hidden = fluid.layers.dynamic_gru(input=x, dim=hidden_dim) + hidden = fluid.layers.dynamic_gru(input=x, size=hidden_dim) """ helper = LayerHelper('gru', **locals()) @@ -3586,6 +3586,7 @@ def beam_search_decode(ids, scores, beam_size, end_id, name=None): Examples: .. code-block:: python + # Suppose `ids` and `scores` are LodTensorArray variables reserving # the selected ids and scores of all steps finished_ids, finished_scores = layers.beam_search_decode( @@ -5083,7 +5084,7 @@ def im2sequence(input, output.lod = [[4, 4]] - Examples: + Examples: .. code-block:: python @@ -5870,24 +5871,23 @@ def pad_constant_like(x, y, pad_value=0., name=None): [[38, 39, 40]], [[41, 42, 43]]]] Y.shape = (1, 3, 1, 3) + And + pad_value = -1, - And - pad_value = -1, - - Return: - Out = [[[[35, 36, 37], - [-1, -1, -1]], - [[38, 39, 40], - [-1, -1, -1]], - [[41, 42, 43], - [-1, -1, -1]]], - [[[-1, -1, -1], - [-1, -1, -1]], - [[-1, -1, -1], - [-1, -1, -1]], - [[-1, -1, -1], - [-1, -1, -1]]]] - Out.shape = (2, 3, 2, 3) + Return: + Out = [[[[35, 36, 37], + [-1, -1, -1]], + [[38, 39, 40], + [-1, -1, -1]], + [[41, 42, 43], + [-1, -1, -1]]], + [[[-1, -1, -1], + [-1, -1, -1]], + [[-1, -1, -1], + [-1, -1, -1]], + [[-1, -1, -1], + [-1, -1, -1]]]] + Out.shape = (2, 3, 2, 3) Args: x (Variable): The input tensor variable. @@ -6126,6 +6126,7 @@ def image_resize(input, Supporting resample methods: 'BILINEAR' : Bilinear interpolation + 'NEAREST' : Nearest neighbor interpolation Args: @@ -6781,7 +6782,7 @@ def crop(x, shape=None, offsets=None, name=None): # or z = fluid.layers.data(name="z", shape=[3, 5], dtype="float32") - crop = fluid.layers.crop(z, shape=[2, 3]) + crop = fluid.layers.crop(z, shape=[-1, 2, 3]) """ helper = LayerHelper('crop', **locals()) @@ -7062,39 +7063,40 @@ def pad2d(input, than height-1. And the width dimension has the same condition. Example: + .. code-block:: text - Given that X is a channel of image from input: + Given that X is a channel of image from input: - X = [[1, 2, 3], - [4, 5, 6]] + X = [[1, 2, 3], + [4, 5, 6]] - Case 0: + Case 0: - paddings = [0, 1, 2, 3], - mode = 'constant' - pad_value = 0 + paddings = [0, 1, 2, 3], + mode = 'constant' + pad_value = 0 - Out = [[0, 0, 1, 2, 3, 0, 0, 0] - [0, 0, 4, 5, 6, 0, 0, 0] - [0, 0, 0, 0, 0, 0, 0, 0]] + Out = [[0, 0, 1, 2, 3, 0, 0, 0] + [0, 0, 4, 5, 6, 0, 0, 0] + [0, 0, 0, 0, 0, 0, 0, 0]] - Case 1: + Case 1: - paddings = [0, 1, 2, 1], - mode = 'reflect' + paddings = [0, 1, 2, 1], + mode = 'reflect' - Out = [[3, 2, 1, 2, 3, 2] - [6, 5, 4, 5, 6, 5] - [3, 2, 1, 2, 3, 2]] + Out = [[3, 2, 1, 2, 3, 2] + [6, 5, 4, 5, 6, 5] + [3, 2, 1, 2, 3, 2]] - Case 2: + Case 2: - paddings = [0, 1, 2, 1], - mode = 'edge' + paddings = [0, 1, 2, 1], + mode = 'edge' - Out = [[1, 1, 1, 2, 3, 3] - [4, 4, 4, 5, 6, 6] - [4, 4, 4, 5, 6, 6]] + Out = [[1, 1, 1, 2, 3, 3] + [4, 4, 4, 5, 6, 6] + [4, 4, 4, 5, 6, 6]] Args: @@ -7332,13 +7334,13 @@ def prelu(x, mode, param_attr=None, name=None): Args: x (Variable): The input tensor. param_attr(ParamAttr|None): The parameter attribute for the learnable - weight (alpha). + weight (alpha). mode (string): The mode for weight sharing. It supports all, channel - and element. all: all elements share same weight - channel:elements in a channel share same weight - element:each element has a weight + and element. all: all elements share same weight + channel:elements in a channel share same weight + element:each element has a weight name(str|None): A name for this layer(optional). If set None, the layer - will be named automatically. + will be named automatically. Returns: Variable: The output tensor with the same shape as input. diff --git a/python/paddle/fluid/metrics.py b/python/paddle/fluid/metrics.py index 829154f1b23d6e49bf963762be6b6488c98ec94a..85af8fea13d5b9a1e22014fbd727e1baed3247be 100644 --- a/python/paddle/fluid/metrics.py +++ b/python/paddle/fluid/metrics.py @@ -222,13 +222,13 @@ class Precision(MetricBase): Examples: .. code-block:: python - metric = fluid.metrics.Precision() - for pass in range(PASSES): - metric.reset() - for data in train_reader(): - loss, preds, labels = exe.run(fetch_list=[cost, preds, labels]) - metric.update(preds=preds, labels=labels) - numpy_precision = metric.eval() + metric = fluid.metrics.Precision() + for pass in range(PASSES): + metric.reset() + for data in train_reader(): + loss, preds, labels = exe.run(fetch_list=[cost, preds, labels]) + metric.update(preds=preds, labels=labels) + numpy_precision = metric.eval() """ def __init__(self, name=None): @@ -267,13 +267,13 @@ class Recall(MetricBase): Examples: .. code-block:: python - metric = fluid.metrics.Recall() - for pass in range(PASSES): - metric.reset() - for data in train_reader(): - loss, preds, labels = exe.run(fetch_list=[cost, preds, labels]) - metric.update(preds=preds, labels=labels) - numpy_recall = metric.eval() + metric = fluid.metrics.Recall() + for pass in range(PASSES): + metric.reset() + for data in train_reader(): + loss, preds, labels = exe.run(fetch_list=[cost, preds, labels]) + metric.update(preds=preds, labels=labels) + numpy_recall = metric.eval() """ def __init__(self, name=None): @@ -449,8 +449,9 @@ class EditDistance(MetricBase): distance_evaluator.update(distances, seq_num) distance, instance_error = distance_evaluator.eval() - In the above example: + In the above example: 'distance' is the average of the edit distance in a pass. + 'instance_error' is the instance error rate in a pass. """ diff --git a/python/paddle/fluid/param_attr.py b/python/paddle/fluid/param_attr.py index a51607bfdb1dde3d25f490770cc2ba368ceb27ff..38ddf93198d7c58382e36a5b7af488f56e6f9878 100644 --- a/python/paddle/fluid/param_attr.py +++ b/python/paddle/fluid/param_attr.py @@ -50,8 +50,9 @@ class ParamAttr(object): w_param_attrs = fluid.ParamAttr(name="fc_weight", learning_rate=0.5, - regularizer=fluid.L2Decay(1.0), + regularizer=fluid.regularizer.L2Decay(1.0), trainable=True) + x = fluid.layers.data(name='X', shape=[1], dtype='float32') y_predict = fluid.layers.fc(input=x, size=10, param_attr=w_param_attrs) """ diff --git a/python/paddle/fluid/transpiler/distribute_transpiler.py b/python/paddle/fluid/transpiler/distribute_transpiler.py index 5d348f0995fbff7bbefa3324caffb448c98f552f..1d867d9194347cf55fd1bd8b1962856d599be7ec 100644 --- a/python/paddle/fluid/transpiler/distribute_transpiler.py +++ b/python/paddle/fluid/transpiler/distribute_transpiler.py @@ -125,13 +125,14 @@ def slice_variable(var_list, slice_count, min_block_size): class DistributeTranspilerConfig(object): """ - slice_var_up (bool): Do Tensor slice for pservers, default is True. - split_method (PSDispatcher): RoundRobin or HashName can be used - try to choose the best method to balance loads for pservers. - min_block_size (int): Minimum splitted element number in block. - According:https://github.com/PaddlePaddle/Paddle/issues/8638#issuecomment-369912156 - We can use bandwidth effiently when data size is larger than 2MB.If you - want to change it, please be sure you see the slice_variable function. + Args: + slice_var_up (bool): Do Tensor slice for pservers, default is True. + split_method (PSDispatcher): RoundRobin or HashName can be used + try to choose the best method to balance loads for pservers. + min_block_size (int): Minimum splitted element number in block. + According:https://github.com/PaddlePaddle/Paddle/issues/8638#issuecomment-369912156 + We can use bandwidth effiently when data size is larger than 2MB.If you + want to change it, please be sure you see the slice_variable function. """ slice_var_up = True @@ -163,35 +164,35 @@ class DistributeTranspiler(object): Examples: .. code-block:: python - # for pserver mode - pserver_endpoints = "192.168.0.1:6174,192.168.0.2:6174" - trainer_endpoints = "192.168.0.1:6174,192.168.0.2:6174" - current_endpoint = "192.168.0.1:6174" - trainer_id = 0 - trainers = 4 - role = os.getenv("PADDLE_TRAINING_ROLE") - - t = fluid.DistributeTranspiler() - t.transpile( - trainer_id, pservers=pserver_endpoints, trainers=trainers) - if role == "PSERVER": - pserver_program = t.get_pserver_program(current_endpoint) - pserver_startup_program = t.get_startup_program(current_endpoint, + # for pserver mode + pserver_endpoints = "192.168.0.1:6174,192.168.0.2:6174" + trainer_endpoints = "192.168.0.1:6174,192.168.0.2:6174" + current_endpoint = "192.168.0.1:6174" + trainer_id = 0 + trainers = 4 + role = os.getenv("PADDLE_TRAINING_ROLE") + + t = fluid.DistributeTranspiler() + t.transpile( + trainer_id, pservers=pserver_endpoints, trainers=trainers) + if role == "PSERVER": + pserver_program = t.get_pserver_program(current_endpoint) + pserver_startup_program = t.get_startup_program(current_endpoint, pserver_program) - elif role == "TRAINER": - trainer_program = t.get_trainer_program() - - # for nccl2 mode - config = fluid.DistributeTranspilerConfig() - config.mode = "nccl2" - t = fluid.DistributeTranspiler(config=config) - t.transpile(trainer_id, workers=workers, current_endpoint=curr_ep) - exe = fluid.ParallelExecutor( - use_cuda, - loss_name=loss_var.name, - num_trainers=len(trainers.split(",)), - trainer_id=trainer_id - ) + elif role == "TRAINER": + trainer_program = t.get_trainer_program() + + # for nccl2 mode + config = fluid.DistributeTranspilerConfig() + config.mode = "nccl2" + t = fluid.DistributeTranspiler(config=config) + t.transpile(trainer_id, workers=workers, current_endpoint=curr_ep) + exe = fluid.ParallelExecutor( + use_cuda, + loss_name=loss_var.name, + num_trainers=len(trainers.split(",)), + trainer_id=trainer_id + ) """ def __init__(self, config=None):