diff --git a/paddle/fluid/API.spec b/paddle/fluid/API.spec index 12006eb5145964df755727d5592e82c078e62c33..44c067912bbdeb20f9bf8590c9d40c7e61101831 100644 --- a/paddle/fluid/API.spec +++ b/paddle/fluid/API.spec @@ -296,7 +296,7 @@ paddle.fluid.layers.deformable_conv (ArgSpec(args=['input', 'offset', 'mask', 'n paddle.fluid.layers.unfold (ArgSpec(args=['x', 'kernel_sizes', 'strides', 'paddings', 'dilations', 'name'], varargs=None, keywords=None, defaults=(1, 0, 1, None)), ('document', '3f884662ad443d9ecc2b3734b4f61ad6')) paddle.fluid.layers.deformable_roi_pooling (ArgSpec(args=['input', 'rois', 'trans', 'no_trans', 'spatial_scale', 'group_size', 'pooled_height', 'pooled_width', 'part_size', 'sample_per_part', 'trans_std', 'position_sensitive', 'name'], varargs=None, keywords=None, defaults=(False, 1.0, [1, 1], 1, 1, None, 1, 0.1, False, None)), ('document', '99c03e3f249e36854f87dedaa17c8f35')) paddle.fluid.layers.filter_by_instag (ArgSpec(args=['ins', 'ins_tag', 'filter_tag', 'is_lod'], varargs=None, keywords=None, defaults=None), ('document', '7703a2088af8de4128b143ff1164ca4a')) -paddle.fluid.layers.shard_index (ArgSpec(args=['input', 'index_num', 'nshards', 'shard_id', 'ignore_value'], varargs=None, keywords=None, defaults=(-1,)), ('document', '5786fdbba6753ecd6cbce5e6b0889924')) +paddle.fluid.layers.shard_index (ArgSpec(args=['input', 'index_num', 'nshards', 'shard_id', 'ignore_value'], varargs=None, keywords=None, defaults=(-1,)), ('document', 'c4969dd6bf164f9e6a90414ea4f4e5ad')) paddle.fluid.layers.hard_swish (ArgSpec(args=['x', 'threshold', 'scale', 'offset', 'name'], varargs=None, keywords=None, defaults=(6.0, 6.0, 3.0, None)), ('document', '6a5152a7015c62cb8278fc24cb456459')) paddle.fluid.layers.mse_loss (ArgSpec(args=['input', 'label'], varargs=None, keywords=None, defaults=None), ('document', 'd9ede6469288636e1b3233b461a165c9')) paddle.fluid.layers.data (ArgSpec(args=['name', 'shape', 'append_batch_size', 'dtype', 'lod_level', 'type', 'stop_gradient'], varargs=None, keywords=None, defaults=(True, 'float32', 0, VarType.LOD_TENSOR, True)), ('document', '9d7806e31bdf727c1a23b8782a09b545')) diff --git a/python/paddle/fluid/layers/nn.py b/python/paddle/fluid/layers/nn.py index eeba89956479077619c128fca9a383048f02009a..d430d091f209fa46d9c4c2c63859106673fd4af7 100755 --- a/python/paddle/fluid/layers/nn.py +++ b/python/paddle/fluid/layers/nn.py @@ -14122,52 +14122,46 @@ def deformable_roi_pooling(input, def shard_index(input, index_num, nshards, shard_id, ignore_value=-1): """ - This layer creates the sharded index for input. This layers is used in - model- and data- parallel mixed training generally, in which the index - data (usually the label) should be recaculated in each trainer according - to - - .. math:: + This function recomputes the `input` indices according to the offset of the + shard. The length of the indices is evenly divided into N shards, and if + the `shard_id` matches the shard with the input index inside, the index is + recomputed on the basis of the shard offset, elsewise it is set to + `ignore_value`. The detail is as follows: + :: - assert index_num % nshards == 0 - - shard_size = index_num / nshards - - y = x % shard_size if x / shard_size == shard_id else ignore_value + shard_size = (index_num + nshards - 1) // nshards + y = x % shard_size if x // shard_size == shard_id else ignore_value - We take the distributed one-hot representation to show what this layer is - used for. The distributed one-hot representation is seperated into multiple - shards, and each shard is filling zeros except the one with the index - inside. In order to create these sharded representation in each trainer, - the original index should be recalculated (i.e. sharded) before. + NOTE: If the length of indices cannot be evely divided by the shard number, + the size of the last shard will be less than the calculated `shard_size` Examples: + :: - X is a Tensor of integer values: + Input: X.shape = [4, 1] X.data = [[1], [6], [12], [19]] + index_num = 20 + nshards = 2 + ignore_value = -1 - suppose index_num = 20 and nshards = 2, then we get shard_size = 10 - - if shard_id == 0, we get the Out: + if shard_id == 0, we get: Out.shape = [4, 1] Out.data = [[1], [6], [-1], [-1]] - if shard_id == 1, we get the Out: + if shard_id == 1, we get: Out.shape = [4, 1] Out.data = [[-1], [-1], [2], [9]] - the default `ignore_value` -1 is used in this example. - Args: - input(Variable): Input indices, last dimension must be 1. - index_num(scalar): An interger defining the range of the index. - nshards(scalar): The number of shards - shard_id(scalar): The index of the current shard - ignore_value(scalar): An ingeter value out of sharded index range + - **input** (Variable): Input indices, last dimension must be 1. + - **index_num** (scalar): An interger defining the range of the index. + - **nshards** (scalar): The number of shards + - **shard_id** (scalar): The index of the current shard + - **ignore_value** (scalar): An ingeter value out of sharded index range Returns: - Variable: The shard index of input. + Variable: The sharded index of input. Examples: .. code-block:: python