Fix problems in en docs (#20451)

test=release/1.6, test=document_fix

Fix problems in en docs (#20451)
test=release/1.6, test=document_fix
8f480ca9 · Yibing Liu · GitHub · dc206128 · 8f480ca9 · 8f480ca9
6 changed file
--- a/paddle/fluid/API.spec
+++ b/paddle/fluid/API.spec
@@ -128,7 +128,7 @@ paddle.fluid.layers.dynamic_lstmp (ArgSpec(args=['input', 'size', 'proj_size', '
 paddle.fluid.layers.dynamic_gru (ArgSpec(args=['input', 'size', 'param_attr', 'bias_attr', 'is_reverse', 'gate_activation', 'candidate_activation', 'h_0', 'origin_mode'], varargs=None, keywords=None, defaults=(None, None, False, 'sigmoid', 'tanh', None, False)), ('document', '83617c165827e030636c80486d5de6f3'))
 paddle.fluid.layers.gru_unit (ArgSpec(args=['input', 'hidden', 'size', 'param_attr', 'bias_attr', 'activation', 'gate_activation', 'origin_mode'], varargs=None, keywords=None, defaults=(None, None, 'tanh', 'sigmoid', False)), ('document', '33974b9bfa69f2f1eb85e6f956dff04e'))
 paddle.fluid.layers.linear_chain_crf (ArgSpec(args=['input', 'label', 'param_attr', 'length'], varargs=None, keywords=None, defaults=(None, None)), ('document', 'b28bdb43160e9667be2a3457d19d9f5b'))
-paddle.fluid.layers.crf_decoding (ArgSpec(args=['input', 'param_attr', 'label', 'length'], varargs=None, keywords=None, defaults=(None, None)), ('document', '933b7e268c4ffa3d5c3ef953a5ee9f0b'))
+paddle.fluid.layers.crf_decoding (ArgSpec(args=['input', 'param_attr', 'label', 'length'], varargs=None, keywords=None, defaults=(None, None)), ('document', '708ce0348b74d3e0c7885c2c524b7fa7'))
 paddle.fluid.layers.cos_sim (ArgSpec(args=['X', 'Y'], varargs=None, keywords=None, defaults=None), ('document', '48ec1ba2d75c4e2faf8d9a47350462ae'))
 paddle.fluid.layers.cross_entropy (ArgSpec(args=['input', 'label', 'soft_label', 'ignore_index'], varargs=None, keywords=None, defaults=(False, -100)), ('document', 'd1985a930a59c3bd41a7c1d72594f5b9'))
 paddle.fluid.layers.bpr_loss (ArgSpec(args=['input', 'label', 'name'], varargs=None, keywords=None, defaults=(None,)), ('document', 'ae57e6e5136dade436f0df1f11770afa'))
@@ -176,7 +176,7 @@ paddle.fluid.layers.warpctc (ArgSpec(args=['input', 'label', 'blank', 'norm_by_t
 paddle.fluid.layers.sequence_reshape (ArgSpec(args=['input', 'new_dim'], varargs=None, keywords=None, defaults=None), ('document', 'eeb1591cfc854c6ffdac77b376313c44'))
 paddle.fluid.layers.transpose (ArgSpec(args=['x', 'perm', 'name'], varargs=None, keywords=None, defaults=(None,)), ('document', '8e72db173d4c082e27cb11f31d8c9bfa'))
 paddle.fluid.layers.im2sequence (ArgSpec(args=['input', 'filter_size', 'stride', 'padding', 'input_image_size', 'out_stride', 'name'], varargs=None, keywords=None, defaults=(1, 1, 0, None, 1, None)), ('document', 'fe352915a543cec434f74e9b32ac49da'))
-paddle.fluid.layers.nce (ArgSpec(args=['input', 'label', 'num_total_classes', 'sample_weight', 'param_attr', 'bias_attr', 'num_neg_samples', 'name', 'sampler', 'custom_dist', 'seed', 'is_sparse'], varargs=None, keywords=None, defaults=(None, None, None, None, None, 'uniform', None, 0, False)), ('document', '83d4ca6dfb957912807f535756e76992'))
+paddle.fluid.layers.nce (ArgSpec(args=['input', 'label', 'num_total_classes', 'sample_weight', 'param_attr', 'bias_attr', 'num_neg_samples', 'name', 'sampler', 'custom_dist', 'seed', 'is_sparse'], varargs=None, keywords=None, defaults=(None, None, None, None, None, 'uniform', None, 0, False)), ('document', '38297567127888e01542857839058d52'))
 paddle.fluid.layers.sampled_softmax_with_cross_entropy (ArgSpec(args=['logits', 'label', 'num_samples', 'num_true', 'remove_accidental_hits', 'use_customized_samples', 'customized_samples', 'customized_probabilities', 'seed'], varargs=None, keywords=None, defaults=(1, True, False, None, None, 0)), ('document', 'd4435a63d34203339831ee6a86ef9242'))
 paddle.fluid.layers.hsigmoid (ArgSpec(args=['input', 'label', 'num_classes', 'param_attr', 'bias_attr', 'name', 'path_table', 'path_code', 'is_custom', 'is_sparse'], varargs=None, keywords=None, defaults=(None, None, None, None, None, False, False)), ('document', 'b83e7dfa81059b39bb137922dc914f50'))
 paddle.fluid.layers.beam_search (ArgSpec(args=['pre_ids', 'pre_scores', 'ids', 'scores', 'beam_size', 'end_id', 'level', 'is_accumulated', 'name', 'return_parent_idx'], varargs=None, keywords=None, defaults=(0, True, None, False)), ('document', '1270395ce97a4e1b556104abbb14f096'))
@@ -188,7 +188,7 @@ paddle.fluid.layers.spectral_norm (ArgSpec(args=['weight', 'dim', 'power_iters',
 paddle.fluid.layers.softmax_with_cross_entropy (ArgSpec(args=['logits', 'label', 'soft_label', 'ignore_index', 'numeric_stable_mode', 'return_softmax', 'axis'], varargs=None, keywords=None, defaults=(False, -100, True, False, -1)), ('document', '54e1675aa0364f4a78fa72804ec0f413'))
 paddle.fluid.layers.smooth_l1 (ArgSpec(args=['x', 'y', 'inside_weight', 'outside_weight', 'sigma'], varargs=None, keywords=None, defaults=(None, None, None)), ('document', 'ecb75c1b00c4c76c98b482f633b7a10c'))
 paddle.fluid.layers.one_hot (ArgSpec(args=['input', 'depth', 'allow_out_of_range'], varargs=None, keywords=None, defaults=(False,)), ('document', 'cdf5dc2078f1e20dc61dd0bec7e28a29'))
-paddle.fluid.layers.autoincreased_step_counter (ArgSpec(args=['counter_name', 'begin', 'step'], varargs=None, keywords=None, defaults=(None, 1, 1)), ('document', '98e7927f09ee2270535b29f048e481ec'))
+paddle.fluid.layers.autoincreased_step_counter (ArgSpec(args=['counter_name', 'begin', 'step'], varargs=None, keywords=None, defaults=(None, 1, 1)), ('document', 'd016c137beb9a4528b7378b437d00151'))
 paddle.fluid.layers.reshape (ArgSpec(args=['x', 'shape', 'actual_shape', 'act', 'inplace', 'name'], varargs=None, keywords=None, defaults=(None, None, False, None)), ('document', 'd7a6d59e464a7ef1184eb6caefeb49f1'))
 paddle.fluid.layers.squeeze (ArgSpec(args=['input', 'axes', 'name'], varargs=None, keywords=None, defaults=(None,)), ('document', 'ebbac07662a6e22e8e299ced880c7775'))
 paddle.fluid.layers.unsqueeze (ArgSpec(args=['input', 'axes', 'name'], varargs=None, keywords=None, defaults=(None,)), ('document', 'b9bd3129d36a70e7c4385df51ff71c62'))
@@ -206,7 +206,7 @@ paddle.fluid.layers.image_resize_short (ArgSpec(args=['input', 'out_short_len',
 paddle.fluid.layers.resize_bilinear (ArgSpec(args=['input', 'out_shape', 'scale', 'name', 'actual_shape', 'align_corners', 'align_mode', 'data_format'], varargs=None, keywords=None, defaults=(None, None, None, None, True, 1, 'NCHW')), ('document', '44da7890c8a362a83a1c0902a1dc1e4d'))
 paddle.fluid.layers.resize_trilinear (ArgSpec(args=['input', 'out_shape', 'scale', 'name', 'actual_shape', 'align_corners', 'align_mode', 'data_format'], varargs=None, keywords=None, defaults=(None, None, None, None, True, 1, 'NCDHW')), ('document', '5b4d0f823f94c260fe5e6f7eec60a797'))
 paddle.fluid.layers.resize_nearest (ArgSpec(args=['input', 'out_shape', 'scale', 'name', 'actual_shape', 'align_corners', 'data_format'], varargs=None, keywords=None, defaults=(None, None, None, None, True, 'NCHW')), ('document', '0107a5cbae1aef3f381d3d769a6068eb'))
-paddle.fluid.layers.gather (ArgSpec(args=['input', 'index', 'overwrite'], varargs=None, keywords=None, defaults=(True,)), ('document', 'f985c9b66e3aec96fa753a8eb44c991c'))
+paddle.fluid.layers.gather (ArgSpec(args=['input', 'index', 'overwrite'], varargs=None, keywords=None, defaults=(True,)), ('document', '5c52e8512f97a84608bc3b8b3250fc70'))
 paddle.fluid.layers.gather_nd (ArgSpec(args=['input', 'index', 'name'], varargs=None, keywords=None, defaults=(None,)), ('document', 'a7d625028525167b138106f574dffdf9'))
 paddle.fluid.layers.scatter (ArgSpec(args=['input', 'index', 'updates', 'name', 'overwrite'], varargs=None, keywords=None, defaults=(None, True)), ('document', '69b22affd4a6326502af166f04c095ab'))
 paddle.fluid.layers.scatter_nd_add (ArgSpec(args=['ref', 'index', 'updates', 'name'], varargs=None, keywords=None, defaults=(None,)), ('document', '2607b5c9369fbc52f208de066a80fc25'))
@@ -220,7 +220,7 @@ paddle.fluid.layers.log (ArgSpec(args=['x', 'name'], varargs=None, keywords=None
 paddle.fluid.layers.crop (ArgSpec(args=['x', 'shape', 'offsets', 'name'], varargs=None, keywords=None, defaults=(None, None, None)), ('document', '32196a194f757b4da114a595a5bc6414'))
 paddle.fluid.layers.crop_tensor (ArgSpec(args=['x', 'shape', 'offsets', 'name'], varargs=None, keywords=None, defaults=(None, None, None)), ('document', 'd460aaf35afbbeb9beea4789aa6e4343'))
 paddle.fluid.layers.rank_loss (ArgSpec(args=['label', 'left', 'right', 'name'], varargs=None, keywords=None, defaults=(None,)), ('document', '8eb36596bb43d7a907d3397c7aedbdb3'))
-paddle.fluid.layers.margin_rank_loss (ArgSpec(args=['label', 'left', 'right', 'margin', 'name'], varargs=None, keywords=None, defaults=(0.1, None)), ('document', '6fc86ed23b420c8a0f6c043563cf3937'))
+paddle.fluid.layers.margin_rank_loss (ArgSpec(args=['label', 'left', 'right', 'margin', 'name'], varargs=None, keywords=None, defaults=(0.1, None)), ('document', '1a177f30e5013fae7ee6c45860cf4946'))
 paddle.fluid.layers.elu (ArgSpec(args=['x', 'alpha', 'name'], varargs=None, keywords=None, defaults=(1.0, None)), ('document', '9af1926c06711eacef9e82d7a9e4d308'))
 paddle.fluid.layers.relu6 (ArgSpec(args=['x', 'threshold', 'name'], varargs=None, keywords=None, defaults=(6.0, None)), ('document', '538fc860b2a1734e118b94e4a1a3ee67'))
 paddle.fluid.layers.pow (ArgSpec(args=['x', 'factor', 'name'], varargs=None, keywords=None, defaults=(1.0, None)), ('document', '00d437d1e0d9450ea75a0495b93b54a7'))
@@ -254,7 +254,7 @@ paddle.fluid.layers.elementwise_floordiv (ArgSpec(args=['x', 'y', 'axis', 'act',
 paddle.fluid.layers.uniform_random_batch_size_like (ArgSpec(args=['input', 'shape', 'dtype', 'input_dim_idx', 'output_dim_idx', 'min', 'max', 'seed'], varargs=None, keywords=None, defaults=('float32', 0, 0, -1.0, 1.0, 0)), ('document', 'cfa120e583cd4a5bfa120c8a26f98a28'))
 paddle.fluid.layers.gaussian_random (ArgSpec(args=['shape', 'mean', 'std', 'seed', 'dtype'], varargs=None, keywords=None, defaults=(0.0, 1.0, 0, 'float32')), ('document', 'ebbf399d4e03190ce5dc9488f05c92f4'))
 paddle.fluid.layers.sampling_id (ArgSpec(args=['x', 'min', 'max', 'seed', 'dtype'], varargs=None, keywords=None, defaults=(0.0, 1.0, 0, 'float32')), ('document', 'c39b647b6cf08e058d96ee503d5284fe'))
-paddle.fluid.layers.gaussian_random_batch_size_like (ArgSpec(args=['input', 'shape', 'input_dim_idx', 'output_dim_idx', 'mean', 'std', 'seed', 'dtype'], varargs=None, keywords=None, defaults=(0, 0, 0.0, 1.0, 0, 'float32')), ('document', 'b24d0b21361c4bb8ef2cec8c26fb12b2'))
+paddle.fluid.layers.gaussian_random_batch_size_like (ArgSpec(args=['input', 'shape', 'input_dim_idx', 'output_dim_idx', 'mean', 'std', 'seed', 'dtype'], varargs=None, keywords=None, defaults=(0, 0, 0.0, 1.0, 0, 'float32')), ('document', '2aed0f546f220364fb1da724a3176f74'))
 paddle.fluid.layers.sum (ArgSpec(args=['x'], varargs=None, keywords=None, defaults=None), ('document', 'f4b60847cb0f1ae00823ba6fb1b11310'))
 paddle.fluid.layers.slice (ArgSpec(args=['input', 'axes', 'starts', 'ends'], varargs=None, keywords=None, defaults=None), ('document', '8c622791994a0d657d8c6c9cefa5bf34'))
 paddle.fluid.layers.strided_slice (ArgSpec(args=['input', 'axes', 'starts', 'ends', 'strides'], varargs=None, keywords=None, defaults=None), ('document', '340d8d656272ea396b441aab848429a2'))
@@ -275,12 +275,12 @@ paddle.fluid.layers.space_to_depth (ArgSpec(args=['x', 'blocksize', 'name'], var
 paddle.fluid.layers.affine_grid (ArgSpec(args=['theta', 'out_shape', 'name'], varargs=None, keywords=None, defaults=(None,)), ('document', '315b50c1cbd9569375b098c56f1e91c9'))
 paddle.fluid.layers.sequence_reverse (ArgSpec(args=['x', 'name'], varargs=None, keywords=None, defaults=(None,)), ('document', '5b32ed21ab89140a8e758002923a0da3'))
 paddle.fluid.layers.affine_channel (ArgSpec(args=['x', 'scale', 'bias', 'data_layout', 'name', 'act'], varargs=None, keywords=None, defaults=(None, None, 'NCHW', None, None)), ('document', 'ecc4b1323028bde0518d666882d03515'))
-paddle.fluid.layers.similarity_focus (ArgSpec(args=['input', 'axis', 'indexes', 'name'], varargs=None, keywords=None, defaults=(None,)), ('document', '18ec2e3afeb90e70c8b73d2b71c40fdb'))
+paddle.fluid.layers.similarity_focus (ArgSpec(args=['input', 'axis', 'indexes', 'name'], varargs=None, keywords=None, defaults=(None,)), ('document', '57256fcb1119dc35ae031889fa601d61'))
 paddle.fluid.layers.hash (ArgSpec(args=['input', 'hash_size', 'num_hash', 'name'], varargs=None, keywords=None, defaults=(1, None)), ('document', 'a0b73c21be618cec0281e7903039e5e3'))
 paddle.fluid.layers.grid_sampler (ArgSpec(args=['x', 'grid', 'name'], varargs=None, keywords=None, defaults=(None,)), ('document', '90c74742f48c70b103f1fbb9eb129066'))
-paddle.fluid.layers.log_loss (ArgSpec(args=['input', 'label', 'epsilon', 'name'], varargs=None, keywords=None, defaults=(0.0001, None)), ('document', 'e3993a477c94729526040ff65d95728e'))
+paddle.fluid.layers.log_loss (ArgSpec(args=['input', 'label', 'epsilon', 'name'], varargs=None, keywords=None, defaults=(0.0001, None)), ('document', 'ef1701e11d60508fe8f02dd2a8c60bdf'))
 paddle.fluid.layers.add_position_encoding (ArgSpec(args=['input', 'alpha', 'beta', 'name'], varargs=None, keywords=None, defaults=(None,)), ('document', 'e399f9436fed5f7ff480d8532e42c937'))
-paddle.fluid.layers.bilinear_tensor_product (ArgSpec(args=['x', 'y', 'size', 'act', 'name', 'param_attr', 'bias_attr'], varargs=None, keywords=None, defaults=(None, None, None, None)), ('document', '45fc3652a8e1aeffbe4eba371c54f756'))
+paddle.fluid.layers.bilinear_tensor_product (ArgSpec(args=['x', 'y', 'size', 'act', 'name', 'param_attr', 'bias_attr'], varargs=None, keywords=None, defaults=(None, None, None, None)), ('document', '6755168c4b2308e1e4f54cb56fa7dcb2'))
 paddle.fluid.layers.merge_selected_rows (ArgSpec(args=['x', 'name'], varargs=None, keywords=None, defaults=(None,)), ('document', 'b2b0e5d5c155ce24bafc38b78cd0b164'))
 paddle.fluid.layers.get_tensor_from_selected_rows (ArgSpec(args=['x', 'name'], varargs=None, keywords=None, defaults=(None,)), ('document', '2c568321feb4d16c41a83df43f95089d'))
 paddle.fluid.layers.lstm (ArgSpec(args=['input', 'init_h', 'init_c', 'max_len', 'hidden_size', 'num_layers', 'dropout_prob', 'is_bidirec', 'is_test', 'name', 'default_initializer', 'seed'], varargs=None, keywords=None, defaults=(0.0, False, False, None, None, -1)), ('document', '5193cf1113f9d8d8f682ee5a5fc8b391'))
@@ -900,7 +900,7 @@ paddle.fluid.transpiler.DistributeTranspilerConfig ('paddle.fluid.transpiler.dis
 paddle.fluid.transpiler.DistributeTranspilerConfig.__init__ (ArgSpec(args=['self'], varargs=None, keywords=None, defaults=None), ('document', '6adf97f83acf6453d4a6a4b1070f3754'))
 paddle.fluid.nets.simple_img_conv_pool (ArgSpec(args=['input', 'num_filters', 'filter_size', 'pool_size', 'pool_stride', 'pool_padding', 'pool_type', 'global_pooling', 'conv_stride', 'conv_padding', 'conv_dilation', 'conv_groups', 'param_attr', 'bias_attr', 'act', 'use_cudnn'], varargs=None, keywords=None, defaults=(0, 'max', False, 1, 0, 1, 1, None, None, None, True)), ('document', '5e89c978199c4ecce2b26d5fed1ec52b'))
 paddle.fluid.nets.sequence_conv_pool (ArgSpec(args=['input', 'num_filters', 'filter_size', 'param_attr', 'act', 'pool_type', 'bias_attr'], varargs=None, keywords=None, defaults=(None, 'sigmoid', 'max', None)), ('document', 'b2d435f782ac8ea3ca480b8d24e7f5b4'))
-paddle.fluid.nets.glu (ArgSpec(args=['input', 'dim'], varargs=None, keywords=None, defaults=(-1,)), ('document', 'b87bacfc70dd3477ed25ef14aa01389a'))
+paddle.fluid.nets.glu (ArgSpec(args=['input', 'dim'], varargs=None, keywords=None, defaults=(-1,)), ('document', '3efe197c8e3e75f84a4c464d8b74e943'))
 paddle.fluid.nets.scaled_dot_product_attention (ArgSpec(args=['queries', 'keys', 'values', 'num_heads', 'dropout_rate'], varargs=None, keywords=None, defaults=(1, 0.0)), ('document', 'b1a07a0000eb9103e3a143ca8c13de5b'))
 paddle.fluid.nets.img_conv_group (ArgSpec(args=['input', 'conv_num_filter', 'pool_size', 'conv_padding', 'conv_filter_size', 'conv_act', 'param_attr', 'conv_with_batchnorm', 'conv_batchnorm_drop_rate', 'pool_stride', 'pool_type', 'use_cudnn'], varargs=None, keywords=None, defaults=(1, 3, None, None, False, 0.0, 1, 'max', True)), ('document', '6033b78da39b8b0ed302fbb0f67da502'))
 paddle.fluid.optimizer.SGDOptimizer ('paddle.fluid.optimizer.SGDOptimizer', ('document', 'c3c8dd3193d991adf8bda505560371d6'))
@@ -1009,7 +1009,7 @@ paddle.fluid.optimizer.DGCMomentumOptimizer.backward (ArgSpec(args=['self', 'los
 paddle.fluid.optimizer.DGCMomentumOptimizer.get_opti_var_name_list (ArgSpec(args=['self'], varargs=None, keywords=None, defaults=None), ('document', '6adf97f83acf6453d4a6a4b1070f3754'))
 paddle.fluid.optimizer.DGCMomentumOptimizer.load (ArgSpec(args=['self', 'stat_dict'], varargs=None, keywords=None, defaults=None), ('document', '649a92cf7f1ea28666fd00c4ea01acde'))
 paddle.fluid.optimizer.DGCMomentumOptimizer.minimize (ArgSpec(args=['self', 'loss', 'startup_program', 'parameter_list', 'no_grad_set', 'grad_clip'], varargs=None, keywords=None, defaults=(None, None, None, None)), ('document', '8387af01322a6defc92c1832faccd304'))
-paddle.fluid.optimizer.LambOptimizer ('paddle.fluid.optimizer.LambOptimizer', ('document', '7dd8b270156a52f1f6b4663336960893'))
+paddle.fluid.optimizer.LambOptimizer ('paddle.fluid.optimizer.LambOptimizer', ('document', '56b5b21dc8fb01174c3bdd0b24f8be4b'))
 paddle.fluid.optimizer.LambOptimizer.__init__ (ArgSpec(args=['self', 'learning_rate', 'lamb_weight_decay', 'beta1', 'beta2', 'epsilon', 'regularization', 'exclude_from_weight_decay_fn', 'name'], varargs=None, keywords=None, defaults=(0.001, 0.01, 0.9, 0.999, 1e-06, None, None, None)), ('document', '6adf97f83acf6453d4a6a4b1070f3754'))
 paddle.fluid.optimizer.LambOptimizer.apply_gradients (ArgSpec(args=['self', 'params_grads'], varargs=None, keywords=None, defaults=None), ('document', '80ea99c9af7ef5fac7e57fb302103610'))
 paddle.fluid.optimizer.LambOptimizer.apply_optimize (ArgSpec(args=['self', 'loss', 'startup_program', 'params_grads'], varargs=None, keywords=None, defaults=None), ('document', '5c46d1926a40f1f873ffe9f37ac89dae'))
@@ -1017,9 +1017,9 @@ paddle.fluid.optimizer.LambOptimizer.backward (ArgSpec(args=['self', 'loss', 'st
 paddle.fluid.optimizer.LambOptimizer.get_opti_var_name_list (ArgSpec(args=['self'], varargs=None, keywords=None, defaults=None), ('document', '6adf97f83acf6453d4a6a4b1070f3754'))
 paddle.fluid.optimizer.LambOptimizer.load (ArgSpec(args=['self', 'stat_dict'], varargs=None, keywords=None, defaults=None), ('document', '649a92cf7f1ea28666fd00c4ea01acde'))
 paddle.fluid.optimizer.LambOptimizer.minimize (ArgSpec(args=['self', 'loss', 'startup_program', 'parameter_list', 'no_grad_set', 'grad_clip'], varargs=None, keywords=None, defaults=(None, None, None, None)), ('document', '8387af01322a6defc92c1832faccd304'))
-paddle.fluid.optimizer.ExponentialMovingAverage ('paddle.fluid.optimizer.ExponentialMovingAverage', ('document', 'a38b7d5b9f17a295ed15d4c1b9ab4cd0'))
+paddle.fluid.optimizer.ExponentialMovingAverage ('paddle.fluid.optimizer.ExponentialMovingAverage', ('document', 'c93a5e29890877d94c3180b3d5f2d464'))
 paddle.fluid.optimizer.ExponentialMovingAverage.__init__ (ArgSpec(args=['self', 'decay', 'thres_steps', 'name'], varargs=None, keywords=None, defaults=(0.999, None, None)), ('document', '6adf97f83acf6453d4a6a4b1070f3754'))
-paddle.fluid.optimizer.ExponentialMovingAverage.apply (ArgSpec(args=['self', 'executor', 'need_restore'], varargs=None, keywords=None, defaults=(True,)), ('document', '30f494752ac8921dc5835a63637f453a'))
+paddle.fluid.optimizer.ExponentialMovingAverage.apply (ArgSpec(args=['self', 'executor', 'need_restore'], varargs=None, keywords=None, defaults=(True,)), ('document', '178fa374c299906eb88c0df8ed21a337'))
 paddle.fluid.optimizer.ExponentialMovingAverage.restore (ArgSpec(args=['self', 'executor'], varargs=None, keywords=None, defaults=None), ('document', '8c8a1791608b02a1ede53d6dd3a4fcec'))
 paddle.fluid.optimizer.ExponentialMovingAverage.update (ArgSpec(args=['self'], varargs=None, keywords=None, defaults=None), ('document', 'ea10f08af6d7aac3b7974aa976e4085f'))
 paddle.fluid.optimizer.PipelineOptimizer ('paddle.fluid.optimizer.PipelineOptimizer', ('document', '6f85382abedb922387b08d98e8d0b69c'))

--- a/paddle/fluid/operators/crf_decoding_op.cc
+++ b/paddle/fluid/operators/crf_decoding_op.cc
@@ -21,45 +21,47 @@ class CRFDecodingOpMaker : public framework::OpProtoAndCheckerMaker {
  void Make() override {
    AddInput(
        "Emission",
-        "(Tensor<float>/LoDTensor<float>). For a LoDTensor input, its "
+        "(Tensor/LoDTensor). For a LoDTensor input, its shape is [N x D] "
-        "shape is [N x D] where N is the total sequence length of the "
+        "where N is the total sequence length of the mini-batch and D is "
-        "mini-batch and D is the total tag number. While for a tensor "
+        "the total tag number. While for a tensor input, its shape is "
-        "input, its shape is [B X S X D] with B the batch size and S the "
+        "[B X S X D] with B the batch size and S the sequence length of each "
-        "sequence length of each sample after padding. This input is the "
+        "sample after padding. This input is the unscaled emission weight "
-        "unscaled emission weight matrix of the linear_chain_crf operator.");
+        "matrix of the linear_chain_crf operator. The data type is float32 "
+        "or float64.");
    AddInput(
        "Transition",
-        "(Tensor<float>). A Tensor with shape [(D + 2) x D]. "
+        "(Tensor). A Tensor with shape [(D + 2) x D]. "
        "This input is the transition weights learned by the linear_chain_crf "
        "operator, denoted as w. The 1st row of w are transition weights for "
        "the start mask. The 2nd row of w are transition weights for the end "
        "mask. Transition weights between other tags begin from the 3rd row of "
-        "w. See more details in comments of the linear_chain_crf operator.");
+        "w. See more details in comments of the linear_chain_crf operator. "
+        "The data type is the same as Input(Emission).");
    AddInput(
        "Label",
-        "(Tensor<int64_t>/LoDTensor<int64_t>). The ground truth with shape "
+        "(Tensor/LoDTensor). The ground truth with shape "
        "[N x 1] (for LoDTensor) or [B x S] (for Tensor). This input is "
-        "optional. See more details in the operator's comments.")
+        "optional. See more details in the operator's comments. The data type "
+        "is int64.")
        .AsDispensable();
    AddOutput(
        "ViterbiPath",
-        "(Tensor<int64_t>/LoDTensor<int64_t>). The decoding results. What to "
+        "(Tensor/LoDTensor). The decoding results. What to "
        "return changes depending on whether the Input(Label) (the ground "
-        "truth) is given. See more details in the operator's comment.");
+        "truth) is given. See more details in the operator's comment. "
+        "The data type is int64.");
    AddInput("Length",
-             "(Tensor<int64_t>). The actual length of each sample before "
+             "(Tensor). The actual length of each sample before "
             "padding with shape [B x 1]. It means the Input(Emission), "
-             "Input(Label) "
+             "Input(Label) and Output(ViterbiPath) are common tensors with "
-             "and Output(ViterbiPath) are common tensors with padding when "
+             "padding when this input is given. The data type is int64.")
-             "this input "
-             "is given.")
        .AsDispensable();
    AddComment(R"DOC(
 The crf_decoding operator reads the emission feature weights and the transition
-feature weights learned by the linear_chain_crf operator. It implements the
+feature weights learned by the linear_chain_crf operator and performs decoding. 
-Viterbi algorithm which is a dynamic programming algorithm for finding the most
+It implements the Viterbi algorithm which is a dynamic programming algorithm 
-likely sequence of hidden states, called the Viterbi path, that results in a
+for finding the most likely sequence of hidden states, called the Viterbi path, 
-sequence of observed tags.
+that results in a sequence of observed tags.
 The output of this operator changes according to whether Input(Label) is given:

--- a/paddle/fluid/operators/gaussian_random_batch_size_like_op.cc
+++ b/paddle/fluid/operators/gaussian_random_batch_size_like_op.cc
@@ -46,7 +46,7 @@ class GaussianRandomBatchSizeLikeOpMaker : public BatchSizeLikeOpMaker {
    AddAttr<int>("seed",
                 "(int, default 0) "
                 "Random seed of generator."
-                 "0 means use system wide seed."
+                 "0 means don't specify random seed."
                 "Note that if seed is not 0, this operator will always "
                 "generate the same random numbers every time.")
        .SetDefault(0);
@@ -58,9 +58,9 @@ class GaussianRandomBatchSizeLikeOpMaker : public BatchSizeLikeOpMaker {
    AddComment(R"DOC(
 Used to initialize tensors with gaussian random generator.
-The default mean of the distribution is 0. and default standard
+The default mean of the distribution is 0, and default standard
-deviation (std) of the distribution is 1.. Uers can set mean and std
+deviation (std) of the distribution is 1.0. Uers can set mean and std
-by input arguments.
+via input arguments.
 )DOC");
  }
 };

--- a/python/paddle/fluid/layers/nn.py
+++ b/python/paddle/fluid/layers/nn.py
@@ -1610,11 +1610,13 @@ def crf_decoding(input, param_attr, label=None, length=None):
    Args:
        input(${emission_type}): ${emission_comment}
-        param_attr(ParamAttr): The parameter attribute for training.
+        param_attr (ParamAttr|None): To specify the weight parameter attribute. 
+            Default: None, which means the default weight parameter property is 
+            used. See usage for details in :ref:`api_fluid_ParamAttr` .
-        label(${label_type}): ${label_comment}
+        label(${label_type}, optional): ${label_comment}
-        label(${length_type}): ${length_comment}
+        length(${length_type}, optional): ${length_comment}
    Returns:
        Variable: ${viterbi_path_comment}
@@ -1626,8 +1628,8 @@ def crf_decoding(input, param_attr, label=None, length=None):
           # LoDTensor-based example
           num_labels = 10
-           feature = fluid.layers.data(name='word_emb', shape=[784], dtype='float32', lod_level=1)
+           feature = fluid.data(name='word_emb', shape=[-1, 784], dtype='float32', lod_level=1)
-           label = fluid.layers.data(name='label', shape=[1], dtype='int64', lod_level=1)
+           label = fluid.data(name='label', shape=[-1, 1], dtype='int64', lod_level=1)
           emission = fluid.layers.fc(input=feature, size=num_labels)
           crf_cost = fluid.layers.linear_chain_crf(input=emission, label=label, 
@@ -1637,9 +1639,9 @@ def crf_decoding(input, param_attr, label=None, length=None):
           # Common tensor example
           num_labels, max_len = 10, 20
-           feature = fluid.layers.data(name='word_emb_pad', shape=[max_len, 784], dtype='float32')
+           feature = fluid.data(name='word_emb_pad', shape=[-1, max_len, 784], dtype='float32')
-           label = fluid.layers.data(name='label_pad', shape=[max_len, 1], dtype='int64')
+           label = fluid.data(name='label_pad', shape=[-1, max_len, 1], dtype='int64')
-           length = fluid.layers.data(name='length', shape=[1], dtype='int64')
+           length = fluid.data(name='length', shape=[-1, 1], dtype='int64')
           emission = fluid.layers.fc(input=feature, size=num_labels,
                                      num_flatten_dims=2)
@@ -7078,33 +7080,33 @@ def nce(input,
    ${comment}
    Args:
-        input (Variable): input variable.
+        input (Variable): Input variable, 2-D tensor with shape [batch_size, dim], 
-        label (Variable): label.
+            and data type is float32 or float64.
-        num_total_classes (int):${num_total_classes_comment}
+        label (Variable): Input label, 2-D tensor with shape [batch_size, num_true_class],
+            and data type is int64.
+        num_total_classes (int):${num_total_classes_comment}.
        sample_weight (Variable|None): A Variable of shape [batch_size, 1]
            storing a weight for each sample. The default weight for each
            sample is 1.0.
-        param_attr (ParamAttr|None): The parameter attribute for learnable parameters/weights
+        param_attr (ParamAttr|None): To specify the weight parameter attribute. 
-             of nce. If it is set to None or one attribute of ParamAttr, nce
+            Default: None, which means the default weight parameter property is 
-             will create ParamAttr as param_attr. If the Initializer of the param_attr
+            used. See usage for details in :ref:`api_fluid_ParamAttr` .
-             is not set, the parameter is initialized with Xavier. Default: None.
+        bias_attr (ParamAttr|None): To specify the bias parameter attribute. 
-        bias_attr (ParamAttr|bool|None): The parameter attribute for the bias of nce.
+            Default: None, which means the default bias parameter property is 
-             If it is set to False, no bias will be added to the output units.
+            used. See usage for details in :ref:`api_fluid_ParamAttr` .
-             If it is set to None or one attribute of ParamAttr, nce
+        num_neg_samples (int): ${num_neg_samples_comment}.
-             will create ParamAttr as bias_attr. If the Initializer of the bias_attr
+        name(str|None): For detailed information, please refer to 
-             is not set, the bias is initialized zero. Default: None.
+            :ref:`api_guide_Name` . Usually name is no need to set and None by default.
-        num_neg_samples (int): ${num_neg_samples_comment}
+        sampler (str, optional): The sampler used to sample class from negtive classes.
-        name (str|None): A name for this layer(optional). If set None, the layer
-             will be named automatically. Default: None.
-        sampler (str): The sampler used to sample class from negtive classes.
                       It can be 'uniform', 'log_uniform' or 'custom_dist'.
                       default: 'uniform'.
-        custom_dist (float[]): A float[] with size=num_total_classes.
+        custom_dist (nd.array|None): A numpy ndarray with size=num_total_classes.
                       It is used when sampler is set to 'custom_dist'.
                       custom_dist[i] is the probsbility of i-th class to be sampled.
                       default: None.
-        seed (int): The seed used in sampler. default: 0.
+        seed (int, optional): The seed used in sampler. Default 0, means no random seed.
-        is_sparse(bool): The flag indicating whether to use sparse update, the weight@GRAD and bias@GRAD will be changed to SelectedRows.
+        is_sparse(bool, optional): The flag indicating whether to use sparse update, 
+            the weight@GRAD and bias@GRAD will be changed to SelectedRows. Default False.
    Returns:
        Variable: The output nce loss.
@@ -7119,8 +7121,8 @@ def nce(input,
            window_size = 5
            words = []
            for i in xrange(window_size):
-                words.append(fluid.layers.data(
+                words.append(fluid.data(
-                    name='word_{0}'.format(i), shape=[1], dtype='int64'))
+                    name='word_{0}'.format(i), shape=[-1, 1], dtype='int64'))
            dict_size = 10000
            label_word = int(window_size / 2) + 1
@@ -8182,17 +8184,17 @@ def one_hot(input, depth, allow_out_of_range=False):
 def autoincreased_step_counter(counter_name=None, begin=1, step=1):
    """
-    Create an auto-increase variable
+    Create an auto-increase variable. which will be automatically increased 
-    which will be automatically increased by 1 every mini-batch
+    by 1 in every iteration. By default, the first return of this counter is 1, 
-    Return the run counter of the main program, default is started from 1.
+    and the step size is 1.
    Args:
-        counter_name(str): The counter name, default is '@STEP_COUNTER@'.
+        counter_name(str, optional): The counter name. Default '@STEP_COUNTER@'.
-        begin(int): The first value of this counter.
+        begin(int, optional): The first return value of this counter. Default 1.
-        step(int): The increment step between each execution.
+        step(int, optional): The step size. Default 1.
    Returns:
-        Variable: The global run counter.
+        Variable: The auto-increased Variable with data type int64.
    Examples:
        .. code-block:: python
@@ -9976,9 +9978,11 @@ def gather(input, index, overwrite=True):
                       [5, 6]]
    Args:
-        input (Variable): The source input with rank>=1.
+        input (Variable): The source input tensor with rank>=1. Supported data type is 
-        index (Variable): The index input with rank=1.
+            int32, int64, float32, float64 and uint8 (only for CPU), 
-        overwrite (bool): The mode that updating the grad when has same index.
+            float16 (only for GPU).
+        index (Variable): The index input tensor with rank=1. Data type is int32 or int64.
+        overwrite (bool, optional): The mode that updating the grad when has same index.
            If True, use the overwrite mode to update the grad of the same index,
 	    if False, use the accumulate mode to update the grad of the same index. 
 	    Default value is True.
@@ -9993,8 +9997,8 @@ def gather(input, index, overwrite=True):
        .. code-block:: python
            import paddle.fluid as fluid
-            x = fluid.layers.data(name='x', shape=[-1, 5], dtype='float32')
+            x = fluid.data(name='x', shape=[-1, 5], dtype='float32')
-            index = fluid.layers.data(name='index', shape=[-1, 1], dtype='int32')
+            index = fluid.data(name='index', shape=[-1, 1], dtype='int32')
            output = fluid.layers.gather(x, index)
    """
    helper = LayerHelper('gather', **locals())
@@ -10996,11 +11000,12 @@ def margin_rank_loss(label, left, right, margin=0.1, name=None):
    Args:
       label (Variable): Indicates whether the left is ranked higher than the right or not.
-       left (Variable): Ranking score for left.
+           Data type is float32.
-       right (Variable): Ranking score for right.
+       left (Variable): Ranking score for left. Data type float32.
+       right (Variable): Ranking score for right. Data type float32.
       margin (float): Indicates the given margin.
-       name (str|None): A name for this layer (optional). If set None, the layer
+       name(str|None): For detailed information, please refer to 
-                       will be named automatically.
+           :ref:`api_guide_Name` . Usually name is no need to set and None by default.
    Returns:
       Variable: The ranking loss.
@@ -11013,9 +11018,9 @@ def margin_rank_loss(label, left, right, margin=0.1, name=None):
        .. code-block:: python
           import paddle.fluid as fluid
-           label = fluid.layers.data(name="label", shape=[-1, 1], dtype="float32")
+           label = fluid.data(name="label", shape=[-1, 1], dtype="float32")
-           left = fluid.layers.data(name="left", shape=[-1, 1], dtype="float32")
+           left = fluid.data(name="left", shape=[-1, 1], dtype="float32")
-           right = fluid.layers.data(name="right", shape=[-1, 1], dtype="float32")
+           right = fluid.data(name="right", shape=[-1, 1], dtype="float32")
           out = fluid.layers.margin_rank_loss(label, left, right)
    """
    helper = LayerHelper('margin_rank_loss', **locals())
@@ -12186,12 +12191,12 @@ def gaussian_random_batch_size_like(input,
    Args:
        input (Variable): ${input_comment}
        shape (tuple|list): ${shape_comment}
-        input_dim_idx (Int): ${input_dim_idx_comment}
+        input_dim_idx (int): ${input_dim_idx_comment}
-        output_dim_idx (Int): ${output_dim_idx_comment}
+        output_dim_idx (int): ${output_dim_idx_comment}
-        mean (Float): ${mean_comment}
+        mean (float): ${mean_comment}
-        std (Float): ${std_comment}
+        std (float): ${std_comment}
-        seed (Int): ${seed_comment}
+        seed (int): ${seed_comment}
-        dtype(np.dtype|core.VarDesc.VarType|str): The type of output data : float32, float_16, int etc
+        dtype(np.dtype|core.VarDesc.VarType|str): The type of output data, float32 or float_64.
    Returns:
        out (Variable): ${out_comment}
@@ -12200,7 +12205,7 @@ def gaussian_random_batch_size_like(input,
        .. code-block:: python
            import paddle.fluid as fluid
-            input = fluid.layers.data(name="input", shape=[13, 11], dtype='float32')
+            input = fluid.data(name="input", shape=[13, 11], dtype='float32')
            out = fluid.layers.gaussian_random_batch_size_like(
                input, shape=[-1, 11], mean=1.0, std=2.0)
@@ -14000,7 +14005,8 @@ def similarity_focus(input, axis, indexes, name=None):
    Args:
        input(Variable): The input tensor variable(default float). It should
-            be a 4-D tensor with shape [BatchSize, A, B, C].
+            be a 4-D tensor with shape [BatchSize, A, B, C]. Data type is 
+            float32 or float64.
        axis(int): Indicating the dimension to be selected. It can only be
            1, 2 or 3.
        indexes(list): Indicating the indexes of the selected dimension.
@@ -14013,7 +14019,7 @@ def similarity_focus(input, axis, indexes, name=None):
        .. code-block:: python
            import paddle.fluid as fluid
-            data = fluid.layers.data(
+            data = fluid.data(
                name='data', shape=[-1, 3, 2, 2], dtype='float32')
            fluid.layers.similarity_focus(input=data, axis=1, indexes=[0])
    """
@@ -14226,13 +14232,15 @@ def log_loss(input, label, epsilon=1e-4, name=None):
              - (1 - label) * \\log{(1 - input + \\epsilon)}
    Args:
-        input (Variable|list):  a 2-D tensor with shape [N x 1], where N is the
+        input (Variable|list):  A 2-D tensor with shape [N x 1], where N is the
                                batch size. This input is a probability computed
-                                by the previous operator.
+                                by the previous operator. Data type float32.
-        label (Variable|list):  the ground truth which is a 2-D tensor with
+        label (Variable|list):  The ground truth which is a 2-D tensor with
-                                shape [N x 1], where N is the batch size.
+                                shape [N x 1], where N is the batch size. 
-        epsilon (float): epsilon
+                                Data type float32.
-        name (string): the name of log_loss
+        epsilon (float, optional): A small number for numerical stability. Default 1e-4.
+        name(str|None): For detailed information, please refer to 
+            :ref:`api_guide_Name` . Usually name is no need to set and None by default.
    Returns:
        Variable: A 2-D tensor with shape [N x 1], the negative log loss.
@@ -14241,8 +14249,8 @@ def log_loss(input, label, epsilon=1e-4, name=None):
        .. code-block:: python
          import paddle.fluid as fluid
-          label = fluid.layers.data(name='label', shape=[1], dtype='int64')
+          label = fluid.data(name='label', shape=[-1, 1], dtype='int64')
-          prob = fluid.layers.data(name='prob', shape=[10], dtype='float32')
+          prob = fluid.data(name='prob', shape=[-1, 10], dtype='float32')
          cost = fluid.layers.log_loss(input=prob, label=label)
    """
    helper = LayerHelper('log_loss', **locals())
@@ -14380,7 +14388,7 @@ def bilinear_tensor_product(x,
                            param_attr=None,
                            bias_attr=None):
    """
-    **Add Bilinear Tensor Product Layer**
+    **Bilinear Tensor Product Layer**
    This layer performs bilinear tensor product on two inputs.
    For example:
@@ -14391,31 +14399,34 @@ def bilinear_tensor_product(x,
    In this formula:
      - :math:`x`: the first input contains M elements, shape is [batch_size, M].
      - :math:`y`: the second input contains N elements, shape is [batch_size, N].
-      - :math:`W_{i}`: the i-th learned weight, shape is [M, N]
+      - :math:`W_{i}`: the i-th learned weight, shape is [M, N].
      - :math:`out_{i}`: the i-th element of out, shape is [batch_size, size].
      - :math:`y^\mathrm{T}`: the transpose of :math:`y_{2}`.
    Args:
-        x (Variable): 2-D input tensor with shape [batch_size, M]
+        x (Variable): 2-D input tensor with shape [batch_size, M]. Data type 
-        y (Variable): 2-D input tensor with shape [batch_size, N]
+            is float32 or float64.
+        y (Variable): 2-D input tensor with shape [batch_size, N]. Data type 
+            should be same as **x**.
        size (int): The dimension of this layer.
-        act (str, default None): Activation to be applied to the output of this layer.
+        act (str|None): Activation to be applied to the output of this layer. Default None.
-        name (str, default None): The name of this layer.
+        name(str|None): For detailed information, please refer to 
-        param_attr (ParamAttr, default None): The parameter attribute for the learnable w.
+            :ref:`api_guide_Name` . Usually name is no need to set and None by default.
-            parameters/weights of this layer.
+        param_attr (ParamAttr|None): To specify the weight parameter attribute. 
-        bias_attr (ParamAttr, default None): The parameter attribute for the bias
+            Default: None, which means the default weight parameter property is 
-            of this layer. If it is set to False, no bias will be added to the output units.
+            used. See usage for details in :ref:`api_fluid_ParamAttr` .
-            If it is set to None, the bias is initialized zero. Default: None.
+        bias_attr (ParamAttr|None): To specify the bias parameter attribute. 
+            Default: None, which means the default bias parameter property is 
+            used. See usage for details in :ref:`api_fluid_ParamAttr` .
    Returns:
-        Variable: A 2-D Tensor of shape [batch_size, size].
+        Variable: A 2-D Tensor of shape [batch_size, size]. Data type is the same as input **x**.
    Examples:
        .. code-block:: python
          import paddle.fluid as fluid
-          layer1 = fluid.layers.data("t1", shape=[-1, 5], dtype="float32")
+          layer1 = fluid.data("t1", shape=[-1, 5], dtype="float32")
-          layer2 = fluid.layers.data("t2", shape=[-1, 4], dtype="float32")
+          layer2 = fluid.data("t2", shape=[-1, 4], dtype="float32")
          tensor = fluid.layers.bilinear_tensor_product(x=layer1, y=layer2, size=1000)
    """
    helper = LayerHelper('bilinear_tensor_product', **locals())

--- a/python/paddle/fluid/nets.py
+++ b/python/paddle/fluid/nets.py
@@ -318,8 +318,9 @@ def sequence_conv_pool(input,
 def glu(input, dim=-1):
    """
-    The Gated Linear Units(GLU) composed by split, sigmoid activation and element-wise
+    The Gated Linear Units(GLU) composed by :ref:`api_fluid_layers_split` , 
-    multiplication. Specifically, Split the input into two equal sized parts,
+    :ref:`api_fluid_layers_sigmoid`  and :ref:`api_fluid_layers_elementwise_mul` . 
+    Specifically, GLU will plit the input into two equal-sized parts,
    :math:`a` and :math:`b`, along the given dimension and then compute as
    following:
@@ -331,18 +332,20 @@ def glu(input, dim=-1):
    <https://arxiv.org/pdf/1612.08083.pdf>`_.
    Args:
-        input (Variable): The input variable which is a Tensor or LoDTensor.
+        input (Variable): The input variable which is a Tensor or LoDTensor. 
-        dim (int): The dimension along which to split. If :math:`dim < 0`, the
+                          The supported data types include float32, float64 
+                          and float16 (only for GPU).
+        dim (int, optional): The dimension along which to split. If :math:`dim < 0`, the
            dimension to split along is :math:`rank(input) + dim`. Default -1.
    Returns:
-        Variable: Variable with half the size of input.
+        Variable: Variable with half the size and same data type of input.
    Examples:
        .. code-block:: python
            import paddle.fluid as fluid
-            data = fluid.layers.data(
+            data = fluid.data(
                name="words", shape=[-1, 6, 3, 9], dtype="float32")
            # shape of output: [-1, 3, 3, 9]
            output = fluid.nets.glu(input=data, dim=1)

--- a/python/paddle/fluid/optimizer.py
+++ b/python/paddle/fluid/optimizer.py
@@ -2254,11 +2254,11 @@ class LambOptimizer(AdamOptimizer):
    ..  math::
-        m_t &= \\beta_1 m_{t - 1}+ (1 - \\beta_1)g_t \\
+        m_t &= \\beta_1 m_{t - 1}+ (1 - \\beta_1)g_t 
-        v_t &= \\beta_2 v_{t - 1}  + (1 - \\beta_2)g_t^2 \\
+        v_t &= \\beta_2 v_{t - 1}  + (1 - \\beta_2)g_t^2
-        r_t &= \\frac{m_t}{\\sqrt{v_t}+\\epsilon} \\
+        r_t &= \\frac{m_t}{\\sqrt{v_t}+\\epsilon}
        w_t &= w_{t-1} -\\eta_t \\frac{\\left \| w_{t-1}\\right \|}{\\left \| r_t + \\lambda w_{t-1}\\right \|} (r_t + \\lambda w_{t-1})
@@ -2267,25 +2267,28 @@ class LambOptimizer(AdamOptimizer):
    learning rate, :math:`\\lambda` the LAMB weight decay rate.
    Args:
-        learning_rate (float|Variable): the learning rate used to update parameters. \
+        learning_rate (float|Variable, optional): the learning rate used to update parameters. \
-                                        Can be a float value or a Variable with one \
+            Can be a float value or a Variable with data type float32. Default 0.001.
-                                        float value as data element.
+        lamb_weight_decay (float, optional): The LAMB weight decay rate. Default 0.01.
-        lamb_weight_decay (float): The LAMB weight decay rate.
+        beta1 (float, optional): The exponential decay rate for the 1st moment estimates.
-        beta1 (float): The exponential decay rate for the 1st moment estimates.
+            Default 0.9.
-        beta2 (float): The exponential decay rate for the 2nd moment estimates.
+        beta2 (float, optional): The exponential decay rate for the 2nd moment estimates.
-        epsilon (float): A small float value for numerical stability.
+            Default 0.999.
-        regularization (Regularizer): A Regularizer, such as
+        epsilon (float, optional): A small float value for numerical stability. Default 1e-6.
-                        fluid.regularizer.L1DecayRegularizer.
+        regularization (Regularizer|None): A Regularizer, such as
-        exclude_from_weight_decay_fn (function): Exclude a parameter from weight 
+           fluid.regularizer.L1DecayRegularizer. Default None.
-            decay when **exclude_from_weight_decay_fn(parameter)** returns true.
+        exclude_from_weight_decay_fn (function|None): Exclude a parameter from weight 
-        name (str|None): An optional name prefix.
+            decay when **exclude_from_weight_decay_fn(parameter)** returns true. 
+            Default None.
+        name(str|None): For detailed information, please refer to 
+            :ref:`api_guide_Name` . Usually name is no need to set and None by default.
    Examples:
        .. code-block:: python
            import paddle.fluid as fluid 
-            data = fluid.layers.data(name='x', shape=[5], dtype='float32')
+            data = fluid.data(name='x', shape=[-1, 5], dtype='float32')
            hidden = fluid.layers.fc(input=data, size=10)
            cost = fluid.layers.mean(hidden)
@@ -2738,10 +2741,13 @@ class ExponentialMovingAverage(object):
    Args:
-	decay (float): The exponential decay rate, usually close to 1, such as 
+	decay (float, optional): The exponential decay rate, usually close to 1, such as 
-                       0.999, 0.9999, ... .
+            0.999, 0.9999, ... . Default 0.999.
-        thres_steps (Variable|None): If not `None`, schedule the decay rate.
+        thres_steps (Variable|None): If not `None`, schedule the decay rate. 
-	name (str|None): An optional name prefix.
+            Default None.
+        name (str|None): For detailed information, please refer to 
+            :ref:`api_guide_Name`. Usually name is no need to set and None by 
+            default.
    Examples:
@@ -2752,7 +2758,7 @@ class ExponentialMovingAverage(object):
 	    import paddle
 	    import paddle.fluid as fluid
-	    data = fluid.layers.data(name='x', shape=[5], dtype='float32')
+	    data = fluid.data(name='x', shape=[-1, 5], dtype='float32')
 	    hidden = fluid.layers.fc(input=data, size=10)
 	    cost = fluid.layers.mean(hidden)
@@ -2910,7 +2916,8 @@ class ExponentialMovingAverage(object):
        Args:
            executor (Executor): The Executor to execute applying.
-            need_restore (bool): Whether to restore parameters after applying.
+            need_restore (bool, optional): Whether to restore parameters after 
+                applying. Default True.
        """
        executor.run(self.apply_program)
        try: