modify alias display

0ffc4e5a · chentianyu03 · f8c89a84 · 0ffc4e5a · 0ffc4e5a · 0ffc4e5a
482 changed file
--- a/doc/paddle/api/api_label
+++ b/doc/paddle/api/api_label
-l1_loss	.. _api_paddle_nn_functional_loss_l1_loss:
+train	.. _api_paddle_dataset_wmt14_train:
-elementwise_mod	.. _api_paddle_elementwise_mod:
+roi_pool	.. _api_paddle_fluid_layers_roi_pool:
-ParallelEnv	.. _api_paddle_ParallelEnv:
+expand	.. _api_paddle_fluid_layers_expand:
-CompiledProgram	.. _api_paddle_static_CompiledProgram:
+ReLU6	.. _api_paddle_nn_layer_activation_ReLU6:
-HSigmoid	.. _api_paddle_nn_layer_activation_HSigmoid:
+pixel_shuffle	.. _api_paddle_fluid_layers_pixel_shuffle:
-LookaheadOptimizer	.. _api_paddle_optimizer_LookaheadOptimizer:
+Conv3DTranspose	.. _api_paddle_fluid_dygraph_Conv3DTranspose:
-generate_mask_labels	.. _api_paddle_nn_functional_generate_mask_labels:
+cos_sim	.. _api_paddle_metric_cos_sim:
-test100	.. _api_paddle_dataset_cifar_test100:
+Tanh	.. _api_paddle_nn_layer_activation_Tanh:
-box_clip	.. _api_paddle_nn_functional_box_clip:
+dynamic_lstm	.. _api_paddle_fluid_layers_dynamic_lstm:
-load	.. _api_paddle_static_load:
+DatasetFactory	.. _api_paddle_distributed_fleet_DatasetFactory:
-TranslatedLayer	.. _api_paddle_jit_TranslatedLayer:
+multi_box_head	.. _api_paddle_fluid_layers_multi_box_head:
-chunk_eval	.. _api_paddle_metric_chunk_eval:
+LSTMCell	.. _api_paddle_fluid_dygraph_LSTMCell:
+test	.. _api_paddle_dataset_imikolov_test:
+LogSoftmax	.. _api_paddle_nn_layer_activation_LogSoftmax:
+basic_gru	.. _api_paddle_fluid_contrib_basic_gru:
+cross_entropy	.. _api_paddle_fluid_layers_cross_entropy:
+kldiv_loss	.. _api_paddle_fluid_layers_kldiv_loss:
+FSShellCmdAborted	.. _api_paddle_distributed_fleet_utils_FSShellCmdAborted:
+CUDAPinnedPlace	.. _api_paddle_framework_CUDAPinnedPlace:
+read_file	.. _api_paddle_fluid_layers_read_file:
+zeros_like	.. _api_paddle_tensor_creation_zeros_like:
+GraphExecutionOptimizer	.. _api_paddle_distributed_fleet_meta_optimizers_GraphExecutionOptimizer:
+log1p	.. _api_paddle_tensor_math_log1p:
+Mode	.. _api_paddle_fluid_incubate_fleet_base_mode_Mode:
+less_equal	.. _api_paddle_fluid_layers_less_equal:
+reverse	.. _api_paddle_fluid_layers_reverse:
+valid	.. _api_paddle_dataset_flowers_valid:
+sqrt	.. _api_paddle_fluid_layers_sqrt:
 bmm	.. _api_paddle_tensor_linalg_bmm:
-UpSample	.. _api_paddle_nn_layer_common_UpSample:
+sequence_conv	.. _api_paddle_fluid_layers_sequence_conv:
-gather_nd	.. _api_paddle_gather_nd:
+resize_bilinear	.. _api_paddle_fluid_layers_resize_bilinear:
-log_softmax	.. _api_paddle_nn_functional_activation_log_softmax:
+logical_not	.. _api_paddle_fluid_layers_logical_not:
-PipelineOptimizer	.. _api_paddle_distributed_fleet_meta_optimizers_PipelineOptimizer:
+AsyncGraphExecutionOptimizer	.. _api_paddle_distributed_fleet_meta_optimizers_AsyncGraphExecutionOptimizer:
-load_and_transform	.. _api_paddle_dataset_image_load_and_transform:
+SyncBatchNorm	.. _api_paddle_nn_SyncBatchNorm:
-test	.. _api_paddle_dataset_wmt16_test:
+extend_with_decoupled_weight_decay	.. _api_paddle_fluid_contrib_extend_with_decoupled_weight_decay:
-default_main_program	.. _api_paddle_static_default_main_program:
+uniform_random_batch_size_like	.. _api_paddle_fluid_layers_uniform_random_batch_size_like:
-test	.. _api_paddle_dataset_sentiment_test:
+fsp_matrix	.. _api_paddle_fluid_layers_fsp_matrix:
-ReflectionPad2d	.. _api_paddle_nn_ReflectionPad2d:
+BilinearTensorProduct	.. _api_paddle_fluid_dygraph_BilinearTensorProduct:
-affine_grid	.. _api_paddle_nn_functional_affine_grid:
+huber_loss	.. _api_paddle_fluid_layers_huber_loss:
-hsigmoid	.. _api_paddle_nn_functional_activation_hsigmoid:
+RoleMakerBase	.. _api_paddle_distributed_fleet_base_role_maker_RoleMakerBase:
-multiplex	.. _api_paddle_multiplex:
+Hardtanh	.. _api_paddle_nn_layer_activation_Hardtanh:
-GradientMergeOptimizer	.. _api_paddle_distributed_fleet_meta_optimizers_GradientMergeOptimizer:
+DistributedAdam	.. _api_paddle_fluid_incubate_fleet_parameter_server_pslib_optimizer_factory_DistributedAdam:
-RMSPropOptimizer	.. _api_paddle_optimizer_RMSPropOptimizer:
+batch	.. _api_paddle_io_batch:
+run_check	.. _api_paddle_fluid_install_check_run_check:
+linspace	.. _api_paddle_fluid_layers_linspace:
+cluster_files_reader	.. _api_paddle_dataset_common_cluster_files_reader:
+LogSigmoid	.. _api_paddle_nn_layer_activation_LogSigmoid:
+cuda_places	.. _api_paddle_fluid_cuda_places:
+unbind	.. _api_paddle_fluid_layers_unbind:
+log	.. _api_paddle_fluid_layers_log:
+reduce_mean	.. _api_paddle_fluid_layers_reduce_mean:
+guard	.. _api_paddle_fluid_unique_name_guard:
+Layer	.. _api_paddle_fluid_dygraph_layers_Layer:
+embedding	.. _api_paddle_fluid_layers_embedding:
+chain	.. _api_paddle_io_chain:
+BuildStrategy	.. _api_paddle_fluid_compiler_BuildStrategy:
+rand	.. _api_paddle_tensor_random_rand:
+enable_dygraph	.. _api_paddle_fluid_dygraph_base_enable_dygraph:
+Dpsgd	.. _api_paddle_optimizer_Dpsgd:
+not_equal	.. _api_paddle_tensor_logic_not_equal:
+size	.. _api_paddle_fluid_layers_size:
+md5file	.. _api_paddle_dataset_common_md5file:
+movie_categories	.. _api_paddle_dataset_movielens_movie_categories:
+Quant2Int8MkldnnPass	.. _api_paddle_fluid_contrib_slim_quantization_quant2_int8_mkldnn_pass_Quant2Int8MkldnnPass:
+cosine_decay	.. _api_paddle_fluid_layers_cosine_decay:
+KVHTTPServer	.. _api_paddle_distributed_fleet_utils_KVHTTPServer:
+gelu	.. _api_paddle_fluid_layers_gelu:
+triu	.. _api_paddle_tensor_creation_triu:
+KLDivLoss	.. _api_paddle_nn_layer_loss_KLDivLoss:
+roi_align	.. _api_paddle_fluid_layers_roi_align:
+search_pyramid_hash	.. _api_paddle_fluid_contrib_search_pyramid_hash:
+QuantizationTransformPass	.. _api_paddle_fluid_contrib_slim_quantization_quantization_pass_QuantizationTransformPass:
 LeakyReLU	.. _api_paddle_nn_layer_activation_LeakyReLU:
-is_empty	.. _api_paddle_is_empty:
+Tanhshrink	.. _api_paddle_nn_layer_activation_Tanhshrink:
-mnist	.. _api_paddle_dataset_mnist:
+tree_conv	.. _api_paddle_fluid_contrib_tree_conv:
-adaptive_pool2d	.. _api_paddle_nn_functional_adaptive_pool2d:
+tanh	.. _api_paddle_tensor_math_tanh:
-logical_and	.. _api_paddle_logical_and:
+equal_all	.. _api_paddle_tensor_logic_equal_all:
-reshape	.. _api_paddle_reshape:
+FtrlOptimizer	.. _api_paddle_optimizer_FtrlOptimizer:
-reduce_min	.. _api_paddle_reduce_min:
+get_default_dtype	.. _api_paddle_framework_get_default_dtype:
-compose	.. _api_paddle_io_compose:
+ConvTranspose3d	.. _api_paddle_nn_layer_conv_ConvTranspose3d:
-scope_guard	.. _api_paddle_static_scope_guard:
+dropout	.. _api_paddle_fluid_layers_dropout:
-log_loss	.. _api_paddle_nn_functional_log_loss:
+unsqueeze	.. _api_paddle_fluid_layers_unsqueeze:
-get_exception_message	.. _api_paddle_compat_get_exception_message:
-FSFileNotExistsError	.. _api_paddle_distributed_fleet_utils_FSFileNotExistsError:
-to_static	.. _api_paddle_jit_to_static:
-less_equal	.. _api_paddle_tensor_logic_less_equal:
-conv2d_transpose	.. _api_paddle_nn_functional_conv_conv2d_transpose:
-uci_housing	.. _api_paddle_dataset_uci_housing:
-val	.. _api_paddle_dataset_voc2012_val:
-test10	.. _api_paddle_dataset_cifar_test10:
-GradientClipByGlobalNorm	.. _api_paddle_nn_GradientClipByGlobalNorm:
-cross_entropy	.. _api_paddle_nn_functional_cross_entropy:
-GraphExecutionOptimizer	.. _api_paddle_distributed_fleet_meta_optimizers_GraphExecutionOptimizer:
-trace	.. _api_paddle_tensor_math_trace:
-argmax	.. _api_paddle_tensor_search_argmax:
-cifar	.. _api_paddle_dataset_cifar:
-SGDOptimizer	.. _api_paddle_optimizer_SGDOptimizer:
-addmm	.. _api_paddle_tensor_math_addmm:
-multi_box_head	.. _api_paddle_static_nn_multi_box_head:
-imdb	.. _api_paddle_dataset_imdb:
-nonzero	.. _api_paddle_tensor_search_nonzero:
-broadcast_to	.. _api_paddle_tensor_manipulation_broadcast_to:
-pad_constant_like	.. _api_paddle_nn_functional_pad_constant_like:
-inverse_time_decay	.. _api_paddle_nn_functional_inverse_time_decay:
-index_select	.. _api_paddle_tensor_search_index_select:
-scatter	.. _api_paddle_scatter:
-SGD	.. _api_paddle_optimizer_SGD:
-image_resize_short	.. _api_paddle_nn_functional_image_resize_short:
-meshgrid	.. _api_paddle_tensor_creation_meshgrid:
-multiprocess_reader	.. _api_paddle_reader_multiprocess_reader:
-ParameterList	.. _api_paddle_nn_ParameterList:
-linspace	.. _api_paddle_linspace:
-CrossEntropyLoss	.. _api_paddle_nn_layer_loss_CrossEntropyLoss:
-square	.. _api_paddle_square:
-equal	.. _api_paddle_tensor_logic_equal:
-soft_relu	.. _api_paddle_nn_functional_soft_relu:
-Normal	.. _api_paddle_nn_initializer_Normal:
-DecayedAdagrad	.. _api_paddle_optimizer_DecayedAdagrad:
-firstn	.. _api_paddle_reader_firstn:
-randperm	.. _api_paddle_tensor_random_randperm:
-UserDefinedRoleMaker	.. _api_paddle_distributed_fleet_UserDefinedRoleMaker:
-reciprocal	.. _api_paddle_reciprocal:
-normalize	.. _api_paddle_nn_functional_norm_normalize:
-QueueDataset	.. _api_paddle_distributed_fleet_QueueDataset:
-Auc	.. _api_paddle_metric_Auc:
-dropout	.. _api_paddle_nn_functional_dropout:
 nll_loss	.. _api_paddle_nn_functional_loss_nll_loss:
-reduce_any	.. _api_paddle_reduce_any:
+PyReader	.. _api_paddle_fluid_reader_PyReader:
-crf_decoding	.. _api_paddle_static_nn_crf_decoding:
+Dropout	.. _api_paddle_nn_Dropout:
-train10	.. _api_paddle_dataset_cifar_train10:
+FetchHandlerMonitor	.. _api_paddle_fluid_trainer_factory_FetchHandlerMonitor:
-Pool2D	.. _api_paddle_nn_Pool2D:
+meshgrid	.. _api_paddle_tensor_creation_meshgrid:
-voc2012	.. _api_paddle_dataset_voc2012:
+sort	.. _api_paddle_tensor_search_sort:
-logsumexp	.. _api_paddle_tensor_math_logsumexp:
+prroi_pool	.. _api_paddle_fluid_layers_prroi_pool:
-Print	.. _api_paddle_static_Print:
+GradientClipByValue	.. _api_paddle_fluid_clip_GradientClipByValue:
-square_error_cost	.. _api_paddle_nn_functional_square_error_cost:
+create_parameter	.. _api_paddle_fluid_layers_tensor_create_parameter:
-density_prior_box	.. _api_paddle_nn_functional_density_prior_box:
+random_crop	.. _api_paddle_dataset_image_random_crop:
-NaturalExpDecay	.. _api_paddle_NaturalExpDecay:
+DownpourSGDOPT	.. _api_paddle_fluid_device_worker_DownpourSGDOPT:
-margin_ranking_loss	.. _api_paddle_nn_functional_loss_margin_ranking_loss:
+assign	.. _api_paddle_fluid_layers_assign:
-temporal_shift	.. _api_paddle_nn_functional_temporal_shift:
+test	.. _api_paddle_dataset_movielens_test:
-RowConv	.. _api_paddle_nn_layer_extension_RowConv:
+LoDTensorArray	.. _api_paddle_fluid_LoDTensorArray:
-gather	.. _api_paddle_tensor_manipulation_gather:
+Conv3D	.. _api_paddle_fluid_dygraph_Conv3D:
-smooth_l1	.. _api_paddle_nn_functional_smooth_l1:
+full	.. _api_paddle_tensor_creation_full:
-ExponentialMovingAverage	.. _api_paddle_optimizer_ExponentialMovingAverage:
+Assert	.. _api_paddle_fluid_layers_Assert:
-enable_static	.. _api_paddle_enable_static:
+dropout3d	.. _api_paddle_nn_functional_dropout3d:
-test	.. _api_paddle_dataset_mnist_test:
+mish	.. _api_paddle_fluid_layers_mish:
-split	.. _api_paddle_tensor_manipulation_split:
+TrainerFactory	.. _api_paddle_fluid_trainer_factory_TrainerFactory:
-global_scope	.. _api_paddle_static_global_scope:
+cosine_similarity	.. _api_paddle_nn_functional_cosine_similarity:
-Adam	.. _api_paddle_optimizer_Adam:
+ImperativeQuantAware	.. _api_paddle_fluid_contrib_slim_quantization_imperative_ImperativeQuantAware:
-movie_info	.. _api_paddle_dataset_movielens_movie_info:
+NCE	.. _api_paddle_fluid_dygraph_NCE:
+FSFileExistsError	.. _api_paddle_distributed_fleet_utils_FSFileExistsError:
+AmpScaler	.. _api_paddle_fluid_dygraph_AmpScaler:
+ctr_metric_bundle	.. _api_paddle_fluid_contrib_ctr_metric_bundle:
+softmax	.. _api_paddle_nn_functional_activation_softmax:
+reduce_min	.. _api_paddle_fluid_layers_reduce_min:
+TracedLayer	.. _api_paddle_fluid_dygraph_jit_TracedLayer:
 CPUPlace	.. _api_paddle_framework_CPUPlace:
-get_dict	.. _api_paddle_dataset_wmt14_get_dict:
+is_compiled_with_cuda	.. _api_paddle_fluid_is_compiled_with_cuda:
-data_norm	.. _api_paddle_static_nn_data_norm:
+iou_similarity	.. _api_paddle_fluid_layers_iou_similarity:
-floor	.. _api_paddle_floor:
+psroi_pool	.. _api_paddle_fluid_layers_psroi_pool:
-generate_proposals	.. _api_paddle_nn_functional_generate_proposals:
+sequence_first_step	.. _api_paddle_fluid_layers_sequence_first_step:
-bipartite_match	.. _api_paddle_nn_functional_bipartite_match:
+mean_iou	.. _api_paddle_metric_mean_iou:
-WeightNormParamAttr	.. _api_paddle_static_WeightNormParamAttr:
+AutoMixedPrecisionLists	.. _api_paddle_fluid_contrib_mixed_precision_AutoMixedPrecisionLists:
-train	.. _api_paddle_dataset_wmt16_train:
+simple_transform	.. _api_paddle_dataset_image_simple_transform:
-ones	.. _api_paddle_tensor_creation_ones:
+distributed_batch_reader	.. _api_paddle_fluid_contrib_distributed_batch_reader:
-CompositeMetric	.. _api_paddle_metric_CompositeMetric:
-train	.. _api_paddle_dataset_wmt14_train:
-resize_trilinear	.. _api_paddle_nn_functional_resize_trilinear:
-flowers	.. _api_paddle_dataset_flowers:
-rand	.. _api_paddle_tensor_random_rand:
-GradientClipByValue	.. _api_paddle_nn_GradientClipByValue:
-relu	.. _api_paddle_nn_functional_activation_relu:
-cosine_decay	.. _api_paddle_nn_functional_cosine_decay:
-SequenceSampler	.. _api_paddle_io_SequenceSampler:
-elementwise_add	.. _api_paddle_elementwise_add:
-hash	.. _api_paddle_nn_functional_hash:
-mse_loss	.. _api_paddle_nn_functional_loss_mse_loss:
-BeamSearchDecoder	.. _api_paddle_fluid_layers_BeamSearchDecoder:
-PolynomialDecay	.. _api_paddle_PolynomialDecay:
-cache	.. _api_paddle_io_cache:
-sinh	.. _api_paddle_sinh:
-teacher_student_sigmoid_loss	.. _api_paddle_nn_functional_teacher_student_sigmoid_loss:
-cos	.. _api_paddle_cos:
-conv3d_transpose	.. _api_paddle_nn_functional_conv_conv3d_transpose:
-MSRA	.. _api_paddle_nn_initializer_MSRA:
-embedding	.. _api_paddle_static_nn_embedding:
-data	.. _api_paddle_nn_data:
-rank	.. _api_paddle_rank:
-save	.. _api_paddle_save:
-HDFSClient	.. _api_paddle_distributed_fleet_utils_HDFSClient:
-get_default_dtype	.. _api_paddle_framework_get_default_dtype:
 FS	.. _api_paddle_distributed_fleet_utils_FS:
-get_worker_info	.. _api_paddle_io_get_worker_info:
+clip_by_norm	.. _api_paddle_fluid_layers_clip_by_norm:
-ELU	.. _api_paddle_nn_layer_activation_ELU:
+warpctc	.. _api_paddle_fluid_layers_warpctc:
-default_startup_program	.. _api_paddle_static_default_startup_program:
-sums	.. _api_paddle_sums:
-polygon_box_transform	.. _api_paddle_nn_functional_polygon_box_transform:
-logical_not	.. _api_paddle_logical_not:
-acos	.. _api_paddle_acos:
-InverseTimeDecay	.. _api_paddle_InverseTimeDecay:
-in_dynamic_mode	.. _api_paddle_in_dynamic_mode:
-reduce_all	.. _api_paddle_reduce_all:
-mse_loss	.. _api_paddle_nn_functional_mse_loss:
-distribute_fpn_proposals	.. _api_paddle_nn_functional_distribute_fpn_proposals:
-Precision	.. _api_paddle_metric_Precision:
-conv2d_transpose	.. _api_paddle_static_nn_conv2d_transpose:
-PaddleCloudRoleMaker	.. _api_paddle_distributed_fleet_PaddleCloudRoleMaker:
-bpr_loss	.. _api_paddle_nn_functional_bpr_loss:
-set_program_state	.. _api_paddle_io_set_program_state:
-DistributedStrategy	.. _api_paddle_distributed_fleet_DistributedStrategy:
-train	.. _api_paddle_dataset_mnist_train:
-RoleMakerBase	.. _api_paddle_distributed_fleet_base_role_maker_RoleMakerBase:
-expand	.. _api_paddle_expand:
-valid	.. _api_paddle_dataset_flowers_valid:
 expand	.. _api_paddle_tensor_manipulation_expand:
-equal_all	.. _api_paddle_tensor_logic_equal_all:
+GradientClipByNorm	.. _api_paddle_fluid_clip_GradientClipByNorm:
-softsign	.. _api_paddle_nn_functional_softsign:
+NoamDecay	.. _api_paddle_fluid_dygraph_learning_rate_scheduler_NoamDecay:
-var	.. _api_paddle_tensor_stat_var:
+EditDistance	.. _api_paddle_fluid_evaluator_EditDistance:
-ZeroPad2d	.. _api_paddle_nn_ZeroPad2d:
+acos	.. _api_paddle_fluid_layers_acos:
-log1p	.. _api_paddle_tensor_math_log1p:
+resize_linear	.. _api_paddle_fluid_layers_resize_linear:
-test	.. _api_paddle_dataset_movielens_test:
+crf_decoding	.. _api_paddle_fluid_layers_crf_decoding:
-DetectionMAP	.. _api_paddle_metric_DetectionMAP:
+margin_ranking_loss	.. _api_paddle_nn_functional_loss_margin_ranking_loss:
-train	.. _api_paddle_dataset_flowers_train:
+add	.. _api_paddle_tensor_math_add:
-resize_short	.. _api_paddle_dataset_image_resize_short:
+Executor	.. _api_paddle_fluid_executor_Executor:
-ChunkEvaluator	.. _api_paddle_metric_ChunkEvaluator:
+firstn	.. _api_paddle_reader_firstn:
-CosineDecay	.. _api_paddle_CosineDecay:
+get_program_persistable_vars	.. _api_paddle_fluid_io_get_program_persistable_vars:
-create_tensor	.. _api_paddle_create_tensor:
+allclose	.. _api_paddle_tensor_logic_allclose:
-rank_loss	.. _api_paddle_nn_functional_rank_loss:
+elementwise_pow	.. _api_paddle_fluid_layers_elementwise_pow:
-get_include	.. _api_paddle_sysconfig_get_include:
+SampleEmbeddingHelper	.. _api_paddle_fluid_layers_SampleEmbeddingHelper:
-greater_equal	.. _api_paddle_tensor_logic_greater_equal:
+LinearLrWarmup	.. _api_paddle_fluid_dygraph_LinearLrWarmup:
-assign	.. _api_paddle_nn_functional_assign:
+load_params	.. _api_paddle_fluid_io_load_params:
-abs	.. _api_paddle_abs:
+elementwise_floordiv	.. _api_paddle_fluid_layers_elementwise_floordiv:
-cumsum	.. _api_paddle_tensor_math_cumsum:
+release_memory	.. _api_paddle_fluid_release_memory:
-elu	.. _api_paddle_nn_functional_elu:
+batch_norm	.. _api_paddle_fluid_layers_batch_norm:
-AdagradOptimizer	.. _api_paddle_optimizer_AdagradOptimizer:
-tile	.. _api_paddle_tensor_manipulation_tile:
-round	.. _api_paddle_round:
-Constant	.. _api_paddle_nn_initializer_Constant:
-FSShellCmdAborted	.. _api_paddle_distributed_fleet_utils_FSShellCmdAborted:
-not_equal	.. _api_paddle_tensor_logic_not_equal:
-sqrt	.. _api_paddle_sqrt:
-less_than	.. _api_paddle_tensor_logic_less_than:
-BilinearTensorProduct	.. _api_paddle_nn_BilinearTensorProduct:
-MSELoss	.. _api_paddle_nn_layer_loss_MSELoss:
-MetaOptimizerBase	.. _api_paddle_distributed_fleet_meta_optimizers_meta_optimizer_base_MetaOptimizerBase:
-RandomSampler	.. _api_paddle_io_RandomSampler:
-center_crop	.. _api_paddle_dataset_image_center_crop:
-load_inference_model	.. _api_paddle_io_load_inference_model:
-max	.. _api_paddle_tensor_math_max:
-py_func	.. _api_paddle_static_py_func:
-build_dict	.. _api_paddle_dataset_imdb_build_dict:
 long_type	.. _api_paddle_compat_long_type:
-AsyncGraphExecutionOptimizer	.. _api_paddle_distributed_fleet_meta_optimizers_AsyncGraphExecutionOptimizer:
+continuous_value_model	.. _api_paddle_fluid_layers_continuous_value_model:
-Adadelta	.. _api_paddle_optimizer_Adadelta:
+autodoc	.. _api_paddle_fluid_layers_layer_function_generator_autodoc:
-prepare_context	.. _api_paddle_prepare_context:
+BackwardStrategy	.. _api_paddle_framework_BackwardStrategy:
+conv_transpose1d	.. _api_paddle_nn_functional_conv_transpose1d:
+Adam	.. _api_paddle_optimizer_Adam:
+data	.. _api_paddle_fluid_data:
+L1Decay	.. _api_paddle_fluid_regularizer_L1Decay:
+dynamic_gru	.. _api_paddle_fluid_layers_dynamic_gru:
+min	.. _api_paddle_tensor_math_min:
+FakeQuantMovingAverage	.. _api_paddle_fluid_contrib_slim_quantization_imperative_FakeQuantMovingAverage:
+to_text	.. _api_paddle_compat_to_text:
+RoundRobin	.. _api_paddle_fluid_transpiler_RoundRobin:
+LarsOptimizer	.. _api_paddle_distributed_fleet_meta_optimizers_LarsOptimizer:
+binary_cross_entropy_with_logits	.. _api_paddle_nn_functional_binary_cross_entropy_with_logits:
+generate_layer_fn	.. _api_paddle_fluid_layers_layer_function_generator_generate_layer_fn:
+Distribution	.. _api_paddle_distribution_Distribution:
+ReplicationPad1d	.. _api_paddle_nn_ReplicationPad1d:
+cache	.. _api_paddle_reader_cache:
+QuantizeTranspiler	.. _api_paddle_fluid_contrib_QuantizeTranspiler:
+data	.. _api_paddle_fluid_layers_data:
+isfinite	.. _api_paddle_tensor_math_isfinite:
+Dropout3D	.. _api_paddle_nn_Dropout3D:
+max_user_id	.. _api_paddle_dataset_movielens_max_user_id:
+unbind	.. _api_paddle_tensor_manipulation_unbind:
+row_conv	.. _api_paddle_nn_functional_extension_row_conv:
+prelu	.. _api_paddle_fluid_layers_prelu:
+elu	.. _api_paddle_fluid_layers_elu:
+BasicDecoder	.. _api_paddle_fluid_layers_BasicDecoder:
+create_tensor	.. _api_paddle_fluid_layers_create_tensor:
+ModelAverage	.. _api_paddle_optimizer_ModelAverage:
+double_buffer	.. _api_paddle_fluid_layers_double_buffer:
+transpose	.. _api_paddle_fluid_layers_transpose:
+density_prior_box	.. _api_paddle_fluid_layers_density_prior_box:
+elementwise_min	.. _api_paddle_fluid_layers_elementwise_min:
+gather_nd	.. _api_paddle_fluid_layers_gather_nd:
+map_readers	.. _api_paddle_io_map_readers:
+load_persistables	.. _api_paddle_fluid_io_load_persistables:
+load_image_bytes	.. _api_paddle_dataset_image_load_image_bytes:
+guard	.. _api_paddle_fluid_dygraph_guard:
+train	.. _api_paddle_dataset_voc2012_train:
+equal	.. _api_paddle_tensor_logic_equal:
+sequence_enumerate	.. _api_paddle_fluid_layers_sequence_enumerate:
+ReLU	.. _api_paddle_nn_layer_activation_ReLU:
+ParamAttr	.. _api_paddle_fluid_param_attr_ParamAttr:
+PipelineOptimizer	.. _api_paddle_distributed_fleet_meta_optimizers_PipelineOptimizer:
+scatter	.. _api_paddle_tensor_manipulation_scatter:
+greater_than	.. _api_paddle_fluid_layers_greater_than:
+alpha_dropout	.. _api_paddle_nn_functional_alpha_dropout:
 conv3d	.. _api_paddle_nn_functional_conv_conv3d:
-unstack	.. _api_paddle_unstack:
+TrainerDesc	.. _api_paddle_fluid_trainer_desc_TrainerDesc:
-Embedding	.. _api_paddle_nn_Embedding:
+NLLLoss	.. _api_paddle_nn_layer_loss_NLLLoss:
-cond	.. _api_paddle_nn_cond:
+image_resize	.. _api_paddle_fluid_layers_image_resize:
-test	.. _api_paddle_dataset_wmt14_test:
+CUDAPinnedPlace	.. _api_paddle_fluid_CUDAPinnedPlace:
-npair_loss	.. _api_paddle_nn_functional_npair_loss:
+save_params	.. _api_paddle_fluid_io_save_params:
-fsp_matrix	.. _api_paddle_nn_functional_fsp_matrix:
+brelu	.. _api_paddle_fluid_layers_brelu:
-linear_lr_warmup	.. _api_paddle_nn_functional_linear_lr_warmup:
+data	.. _api_paddle_static_data:
-chain	.. _api_paddle_io_chain:
+DecayedAdagrad	.. _api_paddle_optimizer_DecayedAdagrad:
-tanh	.. _api_paddle_tanh:
+multiclass_nms	.. _api_paddle_fluid_layers_multiclass_nms:
-Ftrl	.. _api_paddle_optimizer_Ftrl:
+square_error_cost	.. _api_paddle_fluid_layers_square_error_cost:
-dot	.. _api_paddle_tensor_linalg_dot:
-Sampler	.. _api_paddle_io_Sampler:
-BatchSampler	.. _api_paddle_io_BatchSampler:
-PairwiseDistance	.. _api_paddle_nn_layer_distance_PairwiseDistance:
-get_embedding	.. _api_paddle_dataset_conll05_get_embedding:
-generate_proposal_labels	.. _api_paddle_nn_functional_generate_proposal_labels:
-TruncatedNormal	.. _api_paddle_nn_initializer_TruncatedNormal:
-get_movie_title_dict	.. _api_paddle_dataset_movielens_get_movie_title_dict:
-maxout	.. _api_paddle_nn_functional_maxout:
-stanh	.. _api_paddle_stanh:
-clip	.. _api_paddle_nn_clip:
-TracedLayer	.. _api_paddle_jit_TracedLayer:
-polynomial_decay	.. _api_paddle_nn_functional_polynomial_decay:
-batch	.. _api_paddle_io_batch:
-ConstantPad1d	.. _api_paddle_nn_ConstantPad1d:
-LayerNorm	.. _api_paddle_nn_LayerNorm:
-elementwise_sum	.. _api_paddle_tensor_math_elementwise_sum:
-AdadeltaOptimizer	.. _api_paddle_optimizer_AdadeltaOptimizer:
-concat	.. _api_paddle_tensor_manipulation_concat:
 Accuracy	.. _api_paddle_metric_Accuracy:
-atan	.. _api_paddle_atan:
+_pull_box_extended_sparse	.. _api_paddle_fluid_contrib__pull_box_extended_sparse:
-mean	.. _api_paddle_tensor_stat_mean:
+shuffle_channel	.. _api_paddle_fluid_layers_shuffle_channel:
-elementwise_mul	.. _api_paddle_tensor_math_elementwise_mul:
+mod	.. _api_paddle_tensor_math_mod:
-anchor_generator	.. _api_paddle_nn_functional_anchor_generator:
+argmax	.. _api_paddle_tensor_search_argmax:
-randn	.. _api_paddle_tensor_random_randn:
+MultiStepDecay	.. _api_paddle_fluid_dygraph_MultiStepDecay:
-ProgramTranslator	.. _api_paddle_jit_ProgramTranslator:
-unique	.. _api_paddle_unique:
-group_norm	.. _api_paddle_static_nn_group_norm:
-left_right_flip	.. _api_paddle_dataset_image_left_right_flip:
-firstn	.. _api_paddle_io_firstn:
 flip	.. _api_paddle_tensor_manipulation_flip:
-retinanet_detection_output	.. _api_paddle_nn_functional_retinanet_detection_output:
+tanh_shrink	.. _api_paddle_fluid_layers_tanh_shrink:
-sentiment	.. _api_paddle_dataset_sentiment:
+elementwise_mod	.. _api_paddle_fluid_layers_elementwise_mod:
-crop_tensor	.. _api_paddle_crop_tensor:
+eye	.. _api_paddle_tensor_creation_eye:
-user_info	.. _api_paddle_dataset_movielens_user_info:
+less_than	.. _api_paddle_tensor_logic_less_than:
-logical_or	.. _api_paddle_logical_or:
+ReduceLROnPlateau	.. _api_paddle_fluid_dygraph_ReduceLROnPlateau:
-histogram	.. _api_paddle_tensor_linalg_histogram:
+box_clip	.. _api_paddle_fluid_layers_box_clip:
-has_nan	.. _api_paddle_has_nan:
+load_persistables_for_increment	.. _api_paddle_fluid_contrib_load_persistables_for_increment:
-transpose	.. _api_paddle_transpose:
+load_dygraph	.. _api_paddle_fluid_dygraph_checkpoint_load_dygraph:
-disable_static	.. _api_paddle_disable_static:
+LayerHelperBase	.. _api_paddle_fluid_layer_helper_base_LayerHelperBase:
-resize_bilinear	.. _api_paddle_nn_functional_resize_bilinear:
+CompiledProgram	.. _api_paddle_fluid_compiler_CompiledProgram:
-CUDAPinnedPlace	.. _api_paddle_fluid_CUDAPinnedPlace:
+thresholded_relu	.. _api_paddle_fluid_layers_thresholded_relu:
-conv2d	.. _api_paddle_nn_functional_conv_conv2d:
-flatten	.. _api_paddle_tensor_manipulation_flatten:
-lrn	.. _api_paddle_nn_functional_lrn:
-gradients	.. _api_paddle_static_gradients:
-ExecuteError	.. _api_paddle_distributed_fleet_utils_ExecuteError:
-label_smooth	.. _api_paddle_nn_functional_label_smooth:
-minimum	.. _api_paddle_tensor_math_minimum:
-floor_division	.. _api_paddle_compat_floor_division:
-slice	.. _api_paddle_slice:
-program_guard	.. _api_paddle_static_program_guard:
-mean_iou	.. _api_paddle_metric_mean_iou:
-train	.. _api_paddle_dataset_imdb_train:
-softmax_with_cross_entropy	.. _api_paddle_nn_functional_softmax_with_cross_entropy:
-LayerList	.. _api_paddle_nn_LayerList:
-validation	.. _api_paddle_dataset_wmt16_validation:
-softplus	.. _api_paddle_nn_functional_softplus:
-thresholded_relu	.. _api_paddle_nn_functional_thresholded_relu:
-save	.. _api_paddle_jit_save:
-resize_nearest	.. _api_paddle_nn_functional_resize_nearest:
-KVServer	.. _api_paddle_distributed_fleet_utils_KVServer:
-unfold	.. _api_paddle_nn_functional_unfold:
-multiply	.. _api_paddle_tensor_math_multiply:
-movielens	.. _api_paddle_dataset_movielens:
-roi_align	.. _api_paddle_nn_functional_roi_align:
-conv3d_transpose	.. _api_paddle_static_nn_conv3d_transpose:
-BackwardStrategy	.. _api_paddle_framework_BackwardStrategy:
-train100	.. _api_paddle_dataset_cifar_train100:
-set_default_dtype	.. _api_paddle_framework_set_default_dtype:
-reduce_max	.. _api_paddle_reduce_max:
-logsigmoid	.. _api_paddle_nn_functional_logsigmoid:
-Sequential	.. _api_paddle_nn_Sequential:
-diag_embed	.. _api_paddle_nn_functional_extension_diag_embed:
-cast	.. _api_paddle_cast:
-Program	.. _api_paddle_static_Program:
-continuous_value_model	.. _api_paddle_nn_functional_continuous_value_model:
-case	.. _api_paddle_nn_case:
-max_movie_id	.. _api_paddle_dataset_movielens_max_movie_id:
-gelu	.. _api_paddle_nn_functional_gelu:
-Dataset	.. _api_paddle_io_Dataset:
-one_hot	.. _api_paddle_nn_functional_one_hot:
-create_global_var	.. _api_paddle_create_global_var:
-CPUPlace	.. _api_paddle_fluid_CPUPlace:
-detection_output	.. _api_paddle_nn_functional_detection_output:
-asin	.. _api_paddle_asin:
-warpctc	.. _api_paddle_nn_functional_warpctc:
-PiecewiseDecay	.. _api_paddle_PiecewiseDecay:
-row_conv	.. _api_paddle_static_nn_row_conv:
 PipelineOptimizer	.. _api_paddle_optimizer_PipelineOptimizer:
-softmax	.. _api_paddle_nn_functional_activation_softmax:
+clip	.. _api_paddle_tensor_math_clip:
-kldiv_loss	.. _api_paddle_nn_functional_kldiv_loss:
+squeeze	.. _api_paddle_fluid_layers_squeeze:
-multiclass_nms	.. _api_paddle_nn_functional_multiclass_nms:
+yolo_box	.. _api_paddle_fluid_layers_yolo_box:
-yolov3_loss	.. _api_paddle_nn_functional_yolov3_loss:
+beam_search_decode	.. _api_paddle_fluid_layers_beam_search_decode:
-greater_than	.. _api_paddle_tensor_logic_greater_than:
+crop	.. _api_paddle_fluid_layers_crop:
-train	.. _api_paddle_dataset_voc2012_train:
+load_inference_model	.. _api_paddle_io_load_inference_model:
-CUDAPlace	.. _api_paddle_framework_CUDAPlace:
+bilinear_tensor_product	.. _api_paddle_fluid_layers_bilinear_tensor_product:
-grid_sampler	.. _api_paddle_nn_functional_grid_sampler:
+disable_dygraph	.. _api_paddle_fluid_dygraph_base_disable_dygraph:
-SaveLoadConfig	.. _api_paddle_jit_SaveLoadConfig:
+isfinite	.. _api_paddle_fluid_layers_isfinite:
-stack	.. _api_paddle_tensor_manipulation_stack:
+maxout	.. _api_paddle_fluid_layers_maxout:
-shard_index	.. _api_paddle_shard_index:
+Uniform	.. _api_paddle_fluid_initializer_Uniform:
-create_parameter	.. _api_paddle_static_nn_create_parameter:
+leaky_relu	.. _api_paddle_nn_functional_leaky_relu:
-ExecutionStrategy	.. _api_paddle_static_ExecutionStrategy:
+MPISymetricRoleMaker	.. _api_paddle_fluid_incubate_fleet_base_role_maker_MPISymetricRoleMaker:
-Conv3D	.. _api_paddle_nn_layer_conv_Conv3D:
+temporal_shift	.. _api_paddle_fluid_layers_temporal_shift:
-triu	.. _api_paddle_tensor_creation_triu:
+ComplexVariable	.. _api_paddle_fluid_ComplexVariable:
+DataLoader	.. _api_paddle_io_DataLoader:
+Uniform	.. _api_paddle_distribution_Uniform:
+PaddleCloudRoleMaker	.. _api_paddle_fluid_incubate_fleet_base_role_maker_PaddleCloudRoleMaker:
+ParallelEnv	.. _api_paddle_fluid_dygraph_parallel_ParallelEnv:
+beam_search	.. _api_paddle_fluid_layers_beam_search:
 age_table	.. _api_paddle_dataset_movielens_age_table:
-ReflectionPad1d	.. _api_paddle_nn_ReflectionPad1d:
+ErrorClipByValue	.. _api_paddle_fluid_clip_ErrorClipByValue:
-expand_as	.. _api_paddle_expand_as:
+ExecutionStrategy	.. _api_paddle_fluid_compiler_ExecutionStrategy:
-cholesky	.. _api_paddle_tensor_linalg_cholesky:
+conv2d_transpose	.. _api_paddle_fluid_layers_conv2d_transpose:
-Bilinear	.. _api_paddle_nn_initializer_Bilinear:
+gaussian_random	.. _api_paddle_fluid_layers_gaussian_random:
-ConstantPad3d	.. _api_paddle_nn_ConstantPad3d:
+exp	.. _api_paddle_fluid_layers_exp:
-DatasetFactory	.. _api_paddle_distributed_fleet_DatasetFactory:
+relu6	.. _api_paddle_nn_functional_relu6:
-mm	.. _api_paddle_tensor_math_mm:
+get_worker_info	.. _api_paddle_io_get_worker_info:
-shuffle	.. _api_paddle_shuffle:
+ctc_loss	.. _api_paddle_nn_functional_ctc_loss:
-FSFileExistsError	.. _api_paddle_distributed_fleet_utils_FSFileExistsError:
+Conv2DTranspose	.. _api_paddle_fluid_dygraph_Conv2DTranspose:
-gather_tree	.. _api_paddle_nn_gather_tree:
+is_empty	.. _api_paddle_fluid_layers_is_empty:
-hsigmoid	.. _api_paddle_static_nn_hsigmoid:
+DistMultiTrainer	.. _api_paddle_fluid_trainer_desc_DistMultiTrainer:
-compose	.. _api_paddle_reader_compose:
+logical_xor	.. _api_paddle_fluid_layers_logical_xor:
-pow	.. _api_paddle_tensor_math_pow:
+natural_exp_decay	.. _api_paddle_fluid_layers_natural_exp_decay:
-sin	.. _api_paddle_sin:
+maximum	.. _api_paddle_tensor_math_maximum:
-switch_case	.. _api_paddle_nn_switch_case:
+DygraphToStaticAst	.. _api_paddle_fluid_dygraph_dygraph_to_static_DygraphToStaticAst:
-cluster_files_reader	.. _api_paddle_dataset_common_cluster_files_reader:
+sum	.. _api_paddle_tensor_math_sum:
-zeros_like	.. _api_paddle_tensor_creation_zeros_like:
-full	.. _api_paddle_tensor_creation_full:
-smooth_l1_loss	.. _api_paddle_nn_functional_loss_smooth_l1_loss:
-no_grad	.. _api_paddle_no_grad:
-min	.. _api_paddle_tensor_math_min:
-DATA_HOME	.. _api_paddle_dataset_common_DATA_HOME:
-hard_swish	.. _api_paddle_nn_functional_hard_swish:
-MomentumOptimizer	.. _api_paddle_optimizer_MomentumOptimizer:
-std	.. _api_paddle_tensor_stat_std:
-arange	.. _api_paddle_tensor_creation_arange:
-add	.. _api_paddle_tensor_math_add:
-DataParallel	.. _api_paddle_DataParallel:
-sigmoid_cross_entropy_with_logits	.. _api_paddle_nn_functional_sigmoid_cross_entropy_with_logits:
-while_loop	.. _api_paddle_nn_while_loop:
-clamp	.. _api_paddle_tensor_math_clamp:
-AsyncMetaOptimizer	.. _api_paddle_distributed_fleet_meta_optimizers_AsyncMetaOptimizer:
-argsort	.. _api_paddle_tensor_search_argsort:
-Momentum	.. _api_paddle_optimizer_Momentum:
 L1Loss	.. _api_paddle_nn_layer_loss_L1Loss:
-l2_normalize	.. _api_paddle_nn_functional_l2_normalize:
+shuffle_batch	.. _api_paddle_fluid_contrib_shuffle_batch:
-Hardshrink	.. _api_paddle_nn_layer_activation_Hardshrink:
+bpr_loss	.. _api_paddle_fluid_layers_bpr_loss:
-diag	.. _api_paddle_diag:
+hsigmoid	.. _api_paddle_nn_functional_activation_hsigmoid:
-Pad2D	.. _api_paddle_nn_layer_common_Pad2D:
+adaptive_avg_pool3d	.. _api_paddle_nn_functional_pooling_adaptive_avg_pool3d:
-Layer	.. _api_paddle_nn_Layer:
+lod_append	.. _api_paddle_fluid_layers_lod_append:
-exp	.. _api_paddle_exp:
+rank_loss	.. _api_paddle_fluid_layers_rank_loss:
-deformable_roi_pooling	.. _api_paddle_nn_functional_deformable_roi_pooling:
+concat	.. _api_paddle_fluid_layers_concat:
-cross	.. _api_paddle_tensor_linalg_cross:
+LayerList	.. _api_paddle_fluid_dygraph_container_LayerList:
-max_user_id	.. _api_paddle_dataset_movielens_max_user_id:
+declarative	.. _api_paddle_fluid_dygraph_jit_declarative:
-LocalSGDOptimizer	.. _api_paddle_distributed_fleet_meta_optimizers_LocalSGDOptimizer:
+unsqueeze	.. _api_paddle_tensor_manipulation_unsqueeze:
-cosh	.. _api_paddle_cosh:
+Momentum	.. _api_paddle_optimizer_Momentum:
-fc	.. _api_paddle_static_nn_fc:
+dropout2d	.. _api_paddle_nn_functional_dropout2d:
-manual_seed	.. _api_paddle_framework_random_manual_seed:
+kl_div	.. _api_paddle_nn_functional_kl_div:
-Linear	.. _api_paddle_nn_Linear:
+ConvTranspose2d	.. _api_paddle_nn_layer_conv_ConvTranspose2d:
-row_conv	.. _api_paddle_nn_functional_extension_row_conv:
+get_word_dict	.. _api_paddle_dataset_sentiment_get_word_dict:
-Flatten	.. _api_paddle_nn_Flatten:
+binary_cross_entropy	.. _api_paddle_nn_functional_binary_cross_entropy:
-unique_with_counts	.. _api_paddle_unique_with_counts:
+dropout	.. _api_paddle_nn_functional_dropout:
-append_backward	.. _api_paddle_static_append_backward:
+hard_shrink	.. _api_paddle_fluid_layers_hard_shrink:
-scatter_nd	.. _api_paddle_scatter_nd:
+compose	.. _api_paddle_io_compose:
-to_variable	.. _api_paddle_to_variable:
+generate_proposal_labels	.. _api_paddle_fluid_layers_generate_proposal_labels:
-instance_norm	.. _api_paddle_static_nn_instance_norm:
+LarsMomentumOptimizer	.. _api_paddle_optimizer_LarsMomentumOptimizer:
-train	.. _api_paddle_dataset_sentiment_train:
+scaled_dot_product_attention	.. _api_paddle_fluid_nets_scaled_dot_product_attention:
-huber_loss	.. _api_paddle_nn_functional_huber_loss:
+DecayedAdagradOptimizer	.. _api_paddle_optimizer_DecayedAdagradOptimizer:
+InMemoryDataset	.. _api_paddle_fluid_dataset_InMemoryDataset:
+is_compiled_with_xpu	.. _api_paddle_fluid_is_compiled_with_xpu:
+Conv3d	.. _api_paddle_nn_layer_conv_Conv3d:
+sampling_id	.. _api_paddle_fluid_layers_sampling_id:
+tile	.. _api_paddle_tensor_manipulation_tile:
+argsort	.. _api_paddle_fluid_layers_argsort:
+save_persistables	.. _api_paddle_fluid_io_save_persistables:
+MetaOptimizerBase	.. _api_paddle_distributed_fleet_meta_optimizers_meta_optimizer_base_MetaOptimizerBase:
 full_like	.. _api_paddle_tensor_creation_full_like:
-ceil	.. _api_paddle_ceil:
+tdm_sampler	.. _api_paddle_fluid_contrib_tdm_sampler:
-KVHandler	.. _api_paddle_distributed_fleet_utils_KVHandler:
+distribute_fpn_proposals	.. _api_paddle_fluid_layers_distribute_fpn_proposals:
-BCELoss	.. _api_paddle_nn_layer_loss_BCELoss:
+isinf	.. _api_paddle_tensor_math_isinf:
-Executor	.. _api_paddle_static_Executor:
+arange	.. _api_paddle_tensor_creation_arange:
-ComposeNotAligned	.. _api_paddle_reader_ComposeNotAligned:
+stack	.. _api_paddle_fluid_layers_stack:
-similarity_focus	.. _api_paddle_nn_functional_similarity_focus:
+l1_loss	.. _api_paddle_nn_functional_loss_l1_loss:
-Conv2D	.. _api_paddle_nn_layer_conv_Conv2D:
+memory_usage	.. _api_paddle_fluid_contrib_memory_usage:
-topk	.. _api_paddle_topk:
+StateCell	.. _api_paddle_fluid_contrib_StateCell:
-ConstantPad2d	.. _api_paddle_nn_ConstantPad2d:
+create_array	.. _api_paddle_fluid_layers_create_array:
-Fleet	.. _api_paddle_distributed_fleet_base_fleet_base_Fleet:
+GreedyEmbeddingHelper	.. _api_paddle_fluid_layers_GreedyEmbeddingHelper:
-pad	.. _api_paddle_nn_functional_pad:
+ProbabilityEntry	.. _api_paddle_fluid_entry_attr_ProbabilityEntry:
-target_assign	.. _api_paddle_nn_functional_target_assign:
+LambdaDecay	.. _api_paddle_fluid_dygraph_LambdaDecay:
-beam_search	.. _api_paddle_nn_beam_search:
+stop_profiler	.. _api_paddle_fluid_profiler_stop_profiler:
-ParamAttr	.. _api_paddle_ParamAttr:
+convert_call	.. _api_paddle_fluid_dygraph_dygraph_to_static_convert_call:
-expand_as	.. _api_paddle_tensor_manipulation_expand_as:
+PyReader	.. _api_paddle_fluid_io_PyReader:
-MetaOptimizerFactory	.. _api_paddle_distributed_fleet_base_meta_optimizer_factory_MetaOptimizerFactory:
+device_guard	.. _api_paddle_fluid_device_guard:
+Precision	.. _api_paddle_metric_Precision:
+set_program_state	.. _api_paddle_io_set_program_state:
+RMSProp	.. _api_paddle_optimizer_RMSProp:
+ones	.. _api_paddle_tensor_creation_ones:
+GRUCell	.. _api_paddle_fluid_dygraph_GRUCell:
+WeightQuantization	.. _api_paddle_fluid_contrib_slim_quantization_post_training_quantization_WeightQuantization:
+rank	.. _api_paddle_fluid_layers_rank:
+TrainingHelper	.. _api_paddle_fluid_layers_TrainingHelper:
+yolov3_loss	.. _api_paddle_fluid_layers_yolov3_loss:
+generate	.. _api_paddle_fluid_unique_name_generate:
+Auc	.. _api_paddle_metric_Auc:
+Normal	.. _api_paddle_distribution_Normal:
+var	.. _api_paddle_tensor_stat_var:
+cuda_pinned_places	.. _api_paddle_fluid_cuda_pinned_places:
+label_smooth	.. _api_paddle_fluid_layers_label_smooth:
+sequence_topk_avg_pooling	.. _api_paddle_fluid_contrib_sequence_topk_avg_pooling:
+AdaptiveAvgPool3d	.. _api_paddle_nn_layer_pooling_AdaptiveAvgPool3d:
+DataParallel	.. _api_paddle_fluid_dygraph_parallel_DataParallel:
+StaticAnalysisVisitor	.. _api_paddle_fluid_dygraph_dygraph_to_static_StaticAnalysisVisitor:
+batch_fc	.. _api_paddle_fluid_contrib_batch_fc:
+one_hot	.. _api_paddle_nn_functional_one_hot:
+IfElse	.. _api_paddle_fluid_layers_IfElse:
 MarginRankingLoss	.. _api_paddle_nn_layer_loss_MarginRankingLoss:
-Dpsgd	.. _api_paddle_optimizer_Dpsgd:
+train	.. _api_paddle_dataset_imdb_train:
-elementwise_div	.. _api_paddle_elementwise_div:
+movie_info	.. _api_paddle_dataset_movielens_movie_info:
-sigmoid	.. _api_paddle_nn_functional_activation_sigmoid:
+image_resize_short	.. _api_paddle_fluid_layers_image_resize_short:
-reduce_prod	.. _api_paddle_reduce_prod:
+normalize	.. _api_paddle_nn_functional_norm_normalize:
-auc	.. _api_paddle_metric_auc:
+pad	.. _api_paddle_nn_functional_pad:
-load	.. _api_paddle_jit_load:
+elementwise_div	.. _api_paddle_fluid_layers_elementwise_div:
-has_inf	.. _api_paddle_has_inf:
+CosineDecay	.. _api_paddle_fluid_dygraph_learning_rate_scheduler_CosineDecay:
-AdamOptimizer	.. _api_paddle_optimizer_AdamOptimizer:
+GradientMergeOptimizer	.. _api_paddle_distributed_fleet_meta_optimizers_GradientMergeOptimizer:
-space_to_depth	.. _api_paddle_nn_functional_space_to_depth:
+global_scope	.. _api_paddle_fluid_executor_global_scope:
-hard_sigmoid	.. _api_paddle_nn_functional_hard_sigmoid:
+Program	.. _api_paddle_fluid_framework_Program:
-DGCMomentumOptimizer	.. _api_paddle_optimizer_DGCMomentumOptimizer:
+Adam	.. _api_paddle_fluid_optimizer_Adam:
-SyncBatchNorm	.. _api_paddle_nn_SyncBatchNorm:
+has_nan	.. _api_paddle_fluid_layers_has_nan:
-roi_perspective_transform	.. _api_paddle_nn_functional_roi_perspective_transform:
+FLEET_GLOBAL_DICT	.. _api_paddle_fluid_incubate_fleet_parameter_server_pslib_optimizer_factory_FLEET_GLOBAL_DICT:
-to_chw	.. _api_paddle_dataset_image_to_chw:
+PaddleCloudRoleMaker	.. _api_paddle_distributed_fleet_PaddleCloudRoleMaker:
+amp_guard	.. _api_paddle_fluid_dygraph_amp_guard:
+elementwise_mul	.. _api_paddle_fluid_layers_elementwise_mul:
+hard_swish	.. _api_paddle_fluid_layers_hard_swish:
+deformable_roi_pooling	.. _api_paddle_fluid_layers_deformable_roi_pooling:
+center_loss	.. _api_paddle_fluid_layers_center_loss:
+unique_with_counts	.. _api_paddle_fluid_layers_unique_with_counts:
+pow	.. _api_paddle_fluid_layers_pow:
+get_movie_title_dict	.. _api_paddle_dataset_movielens_get_movie_title_dict:
+conv_transpose2d	.. _api_paddle_nn_functional_conv_transpose2d:
+StepDecay	.. _api_paddle_fluid_dygraph_StepDecay:
+pow	.. _api_paddle_tensor_math_pow:
+LoopTransformer	.. _api_paddle_fluid_dygraph_dygraph_to_static_LoopTransformer:
+UtilBase	.. _api_paddle_distributed_fleet_UtilBase:
+softsign	.. _api_paddle_nn_functional_softsign:
+L1DecayRegularizer	.. _api_paddle_fluid_regularizer_L1DecayRegularizer:
+Adamax	.. _api_paddle_optimizer_Adamax:
+Optimizer	.. _api_paddle_optimizer_Optimizer:
+noam_decay	.. _api_paddle_fluid_layers_noam_decay:
+im2sequence	.. _api_paddle_fluid_layers_im2sequence:
 hardshrink	.. _api_paddle_nn_functional_activation_hardshrink:
-conv2d	.. _api_paddle_static_nn_conv2d:
+zeros_like	.. _api_paddle_fluid_layers_zeros_like:
-train	.. _api_paddle_dataset_movielens_train:
+QuantizationFreezePass	.. _api_paddle_fluid_contrib_slim_quantization_quantization_pass_QuantizationFreezePass:
-test	.. _api_paddle_dataset_voc2012_test:
+AsyncMetaOptimizer	.. _api_paddle_distributed_fleet_meta_optimizers_AsyncMetaOptimizer:
-random_crop	.. _api_paddle_nn_functional_random_crop:
+atan	.. _api_paddle_fluid_layers_atan:
-retinanet_target_assign	.. _api_paddle_nn_functional_retinanet_target_assign:
+tdm_child	.. _api_paddle_fluid_contrib_tdm_child:
-nce	.. _api_paddle_static_nn_nce:
+dygraph_to_static_func	.. _api_paddle_fluid_dygraph_dygraph_to_static_func:
-pad2d	.. _api_paddle_nn_functional_pad2d:
+get_lib	.. _api_paddle_sysconfig_get_lib:
-download	.. _api_paddle_dataset_common_download:
+Flatten	.. _api_paddle_fluid_dygraph_Flatten:
-interpolate	.. _api_paddle_nn_functional_common_interpolate:
+resize_trilinear	.. _api_paddle_fluid_layers_resize_trilinear:
-prior_box	.. _api_paddle_nn_functional_prior_box:
+DataFeedDesc	.. _api_paddle_fluid_DataFeedDesc:
-collect_fpn_proposals	.. _api_paddle_nn_functional_collect_fpn_proposals:
+NodeVarType	.. _api_paddle_fluid_dygraph_dygraph_to_static_NodeVarType:
-AdamaxOptimizer	.. _api_paddle_optimizer_AdamaxOptimizer:
+Softshrink	.. _api_paddle_nn_layer_activation_Softshrink:
-ssd_loss	.. _api_paddle_nn_functional_ssd_loss:
+PostTrainingQuantization	.. _api_paddle_fluid_contrib_slim_quantization_post_training_quantization_PostTrainingQuantization:
-to_bytes	.. _api_paddle_compat_to_bytes:
+test10	.. _api_paddle_dataset_cifar_test10:
-affine_channel	.. _api_paddle_nn_functional_affine_channel:
+SGD	.. _api_paddle_optimizer_SGD:
-elementwise_floordiv	.. _api_paddle_elementwise_floordiv:
+retinanet_target_assign	.. _api_paddle_fluid_layers_retinanet_target_assign:
-tanh_shrink	.. _api_paddle_nn_functional_tanh_shrink:
+less_than	.. _api_paddle_fluid_layers_less_than:
-round	.. _api_paddle_compat_round:
+gather	.. _api_paddle_tensor_manipulation_gather:
-cos_sim	.. _api_paddle_metric_cos_sim:
+DecodeHelper	.. _api_paddle_fluid_layers_DecodeHelper:
-argmin	.. _api_paddle_argmin:
+greater_equal	.. _api_paddle_tensor_logic_greater_equal:
-shape	.. _api_paddle_shape:
+firstn	.. _api_paddle_io_firstn:
-swish	.. _api_paddle_nn_functional_swish:
+weight_norm	.. _api_paddle_nn_utils_weight_norm_hook_weight_norm:
-train	.. _api_paddle_dataset_uci_housing_train:
+train	.. _api_paddle_dataset_flowers_train:
-LogSoftmax	.. _api_paddle_nn_layer_activation_LogSoftmax:
+cosh	.. _api_paddle_fluid_layers_cosh:
-scale	.. _api_paddle_scale:
+XPUPlace	.. _api_paddle_fluid_XPUPlace:
-conv3d	.. _api_paddle_static_nn_conv3d:
+merge_selected_rows	.. _api_paddle_fluid_layers_merge_selected_rows:
-increment	.. _api_paddle_increment:
+smooth_l1_loss	.. _api_paddle_nn_functional_smooth_l1_loss:
-DatasetBase	.. _api_paddle_distributed_fleet_DatasetBase:
+t	.. _api_paddle_tensor_linalg_t:
-logical_xor	.. _api_paddle_logical_xor:
+HashName	.. _api_paddle_fluid_transpiler_HashName:
-LarsMomentum	.. _api_paddle_optimizer_LarsMomentum:
+CPUPlace	.. _api_paddle_fluid_CPUPlace:
-SmoothL1Loss	.. _api_paddle_nn_layer_loss_SmoothL1Loss:
+Decoder	.. _api_paddle_fluid_layers_Decoder:
-mq2007	.. _api_paddle_dataset_mq2007:
+array_read	.. _api_paddle_fluid_layers_array_read:
-GradientClipByNorm	.. _api_paddle_nn_GradientClipByNorm:
+floor_divide	.. _api_paddle_tensor_math_floor_divide:
-load_image	.. _api_paddle_dataset_image_load_image:
+floor_mod	.. _api_paddle_tensor_math_floor_mod:
-get_word_dict	.. _api_paddle_dataset_sentiment_get_word_dict:
+logsigmoid	.. _api_paddle_nn_functional_logsigmoid:
-buffered	.. _api_paddle_io_buffered:
+generate_mask_labels	.. _api_paddle_fluid_layers_generate_mask_labels:
-ExponentialDecay	.. _api_paddle_ExponentialDecay:
+square	.. _api_paddle_fluid_layers_square:
-ReLU	.. _api_paddle_nn_layer_activation_ReLU:
+reset_profiler	.. _api_paddle_fluid_profiler_reset_profiler:
-cache	.. _api_paddle_reader_cache:
+MSRA	.. _api_paddle_fluid_initializer_MSRA:
-imikolov	.. _api_paddle_dataset_imikolov:
+Print	.. _api_paddle_fluid_layers_control_flow_Print:
-SpectralNorm	.. _api_paddle_nn_SpectralNorm:
+unique	.. _api_paddle_fluid_layers_unique:
-batch_images_from_tar	.. _api_paddle_dataset_image_batch_images_from_tar:
-reduce_sum	.. _api_paddle_reduce_sum:
-beam_search_decode	.. _api_paddle_nn_beam_search_decode:
-movie_categories	.. _api_paddle_dataset_movielens_movie_categories:
-kl_div	.. _api_paddle_nn_functional_loss_kl_div:
 max_job_id	.. _api_paddle_dataset_movielens_max_job_id:
-KLDivLoss	.. _api_paddle_nn_layer_loss_KLDivLoss:
+grad	.. _api_paddle_fluid_dygraph_base_grad:
-deformable_conv	.. _api_paddle_static_nn_deformable_conv:
+KVHandler	.. _api_paddle_distributed_fleet_utils_KVHandler:
-md5file	.. _api_paddle_dataset_common_md5file:
+Switch	.. _api_paddle_fluid_layers_Switch:
-cosine_similarity	.. _api_paddle_nn_functional_cosine_similarity:
+selu	.. _api_paddle_nn_functional_selu:
+gradients	.. _api_paddle_fluid_backward_gradients:
+MSELoss	.. _api_paddle_nn_layer_loss_MSELoss:
+Pad2D	.. _api_paddle_nn_layer_common_Pad2D:
+sequence_concat	.. _api_paddle_fluid_layers_sequence_concat:
+sequence_expand_as	.. _api_paddle_fluid_layers_sequence_expand_as:
+DistributeTranspilerConfig	.. _api_paddle_fluid_DistributeTranspilerConfig:
+hsigmoid	.. _api_paddle_fluid_layers_hsigmoid:
+LambOptimizer	.. _api_paddle_distributed_fleet_meta_optimizers_LambOptimizer:
+sequence_conv_pool	.. _api_paddle_fluid_nets_sequence_conv_pool:
+test100	.. _api_paddle_dataset_cifar_test100:
+Softsign	.. _api_paddle_nn_layer_activation_Softsign:
+max	.. _api_paddle_tensor_math_max:
+ctc_greedy_decoder	.. _api_paddle_fluid_layers_ctc_greedy_decoder:
+multiprocess_reader	.. _api_paddle_reader_multiprocess_reader:
+StaticRNN	.. _api_paddle_fluid_layers_StaticRNN:
+multiply	.. _api_paddle_tensor_math_multiply:
 IterableDataset	.. _api_paddle_io_IterableDataset:
+elementwise_sub	.. _api_paddle_fluid_layers_elementwise_sub:
+gather	.. _api_paddle_fluid_layers_gather:
+RandomSampler	.. _api_paddle_io_RandomSampler:
+name_scope	.. _api_paddle_fluid_framework_name_scope:
+UserDefinedRoleMaker	.. _api_paddle_fluid_incubate_fleet_base_role_maker_UserDefinedRoleMaker:
+L2DecayRegularizer	.. _api_paddle_fluid_regularizer_L2DecayRegularizer:
+AdamaxOptimizer	.. _api_paddle_fluid_optimizer_AdamaxOptimizer:
+SELU	.. _api_paddle_nn_layer_activation_SELU:
+gather_tree	.. _api_paddle_fluid_layers_gather_tree:
+Uniform	.. _api_paddle_fluid_layers_Uniform:
+remove_weight_norm	.. _api_paddle_nn_utils_weight_norm_hook_remove_weight_norm:
+DownpourSGD	.. _api_paddle_fluid_device_worker_DownpourSGD:
+sequence_softmax	.. _api_paddle_fluid_layers_sequence_softmax:
+HDFSClient	.. _api_paddle_fluid_contrib_HDFSClient:
+abs	.. _api_paddle_fluid_layers_abs:
+NameVisitor	.. _api_paddle_fluid_dygraph_dygraph_to_static_NameVisitor:
+chunk_eval	.. _api_paddle_metric_chunk_eval:
+affine_grid	.. _api_paddle_fluid_layers_affine_grid:
+fused_elemwise_activation	.. _api_paddle_fluid_contrib_fused_elemwise_activation:
 norm	.. _api_paddle_tensor_linalg_norm:
-BuildStrategy	.. _api_paddle_static_BuildStrategy:
+Normal	.. _api_paddle_fluid_layers_Normal:
-elementwise_pow	.. _api_paddle_elementwise_pow:
+ConstantPad2d	.. _api_paddle_nn_ConstantPad2d:
-box_decoder_and_assign	.. _api_paddle_nn_functional_box_decoder_and_assign:
+load_program_state	.. _api_paddle_io_load_program_state:
-build_dict	.. _api_paddle_dataset_imikolov_build_dict:
+resize_nearest	.. _api_paddle_fluid_layers_resize_nearest:
-div	.. _api_paddle_tensor_math_div:
+mse_loss	.. _api_paddle_nn_functional_mse_loss:
-prelu	.. _api_paddle_static_nn_prelu:
+conv_transpose3d	.. _api_paddle_nn_functional_conv_transpose3d:
-AMPOptimizer	.. _api_paddle_distributed_fleet_meta_optimizers_AMPOptimizer:
+set_device	.. _api_paddle_device_set_device:
-buffered	.. _api_paddle_reader_buffered:
+where	.. _api_paddle_fluid_layers_where:
-save	.. _api_paddle_static_save:
+PipelineTrainer	.. _api_paddle_fluid_trainer_desc_PipelineTrainer:
-image	.. _api_paddle_dataset_image:
+anchor_generator	.. _api_paddle_fluid_layers_anchor_generator:
-RecomputeOptimizer	.. _api_paddle_optimizer_RecomputeOptimizer:
+train	.. _api_paddle_dataset_wmt16_train:
-sigmoid_focal_loss	.. _api_paddle_nn_functional_sigmoid_focal_loss:
+concat	.. _api_paddle_tensor_manipulation_concat:
-log	.. _api_paddle_log:
+to_variable	.. _api_paddle_fluid_dygraph_base_to_variable:
-GELU	.. _api_paddle_nn_layer_activation_GELU:
+sequence_reshape	.. _api_paddle_fluid_layers_sequence_reshape:
-grad	.. _api_paddle_grad:
+hash	.. _api_paddle_fluid_layers_hash:
-clip_by_norm	.. _api_paddle_nn_clip_by_norm:
+lod_reset	.. _api_paddle_fluid_layers_lod_reset:
-Conv3DTranspose	.. _api_paddle_nn_layer_conv_Conv3DTranspose:
+simple_img_conv_pool	.. _api_paddle_fluid_nets_simple_img_conv_pool:
-randint	.. _api_paddle_tensor_random_randint:
+get_device	.. _api_paddle_device_get_device:
-FtrlOptimizer	.. _api_paddle_optimizer_FtrlOptimizer:
+rsqrt	.. _api_paddle_fluid_layers_rsqrt:
-edit_distance	.. _api_paddle_nn_functional_edit_distance:
+to_static_variable_gast_node	.. _api_paddle_fluid_dygraph_dygraph_to_static_to_static_variable_gast_node:
-where	.. _api_paddle_tensor_search_where:
+matrix_nms	.. _api_paddle_fluid_layers_matrix_nms:
-Adagrad	.. _api_paddle_optimizer_Adagrad:
+user_info	.. _api_paddle_dataset_movielens_user_info:
-NLLLoss	.. _api_paddle_nn_layer_loss_NLLLoss:
+gaussian_random_batch_size_like	.. _api_paddle_fluid_layers_gaussian_random_batch_size_like:
+MomentumOptimizer	.. _api_paddle_optimizer_MomentumOptimizer:
+DistributedOptimizer	.. _api_paddle_fluid_incubate_fleet_base_fleet_base_DistributedOptimizer:
+Fleet	.. _api_paddle_distributed_fleet_base_fleet_base_Fleet:
+ProgramTranslator	.. _api_paddle_fluid_dygraph_ProgramTranslator:
+set_default_dtype	.. _api_paddle_framework_set_default_dtype:
+lstm	.. _api_paddle_fluid_layers_lstm:
+var_conv_2d	.. _api_paddle_fluid_contrib_var_conv_2d:
+Variable	.. _api_paddle_fluid_framework_Variable:
+ChunkEvaluator	.. _api_paddle_metric_ChunkEvaluator:
+cumsum	.. _api_paddle_tensor_math_cumsum:
+TruncatedNormal	.. _api_paddle_fluid_initializer_TruncatedNormal:
+sigmoid_focal_loss	.. _api_paddle_fluid_layers_sigmoid_focal_loss:
+LayerNorm	.. _api_paddle_fluid_dygraph_LayerNorm:
+MultiTrainer	.. _api_paddle_fluid_trainer_desc_MultiTrainer:
+xmap_readers	.. _api_paddle_io_xmap_readers:
+greater_than	.. _api_paddle_tensor_logic_greater_than:
+InitState	.. _api_paddle_fluid_contrib_InitState:
+sequence_pad	.. _api_paddle_fluid_layers_sequence_pad:
+linear_lr_warmup	.. _api_paddle_fluid_layers_linear_lr_warmup:
+NumpyArrayInitializer	.. _api_paddle_fluid_initializer_NumpyArrayInitializer:
+npair_loss	.. _api_paddle_fluid_layers_npair_loss:
+ZeroPad2d	.. _api_paddle_nn_ZeroPad2d:
+AdadeltaOptimizer	.. _api_paddle_optimizer_AdadeltaOptimizer:
+chain	.. _api_paddle_reader_chain:
+partial_sum	.. _api_paddle_fluid_contrib_partial_sum:
+logsumexp	.. _api_paddle_tensor_math_logsumexp:
+scale	.. _api_paddle_fluid_layers_scale:
+tanh	.. _api_paddle_nn_functional_tanh:
+bipartite_match	.. _api_paddle_fluid_layers_bipartite_match:
 InstanceNorm	.. _api_paddle_nn_layer_norm_InstanceNorm:
-get_lib	.. _api_paddle_sysconfig_get_lib:
+ExponentialMovingAverage	.. _api_paddle_optimizer_ExponentialMovingAverage:
-strided_slice	.. _api_paddle_strided_slice:
+AdagradOptimizer	.. _api_paddle_optimizer_AdagradOptimizer:
-split	.. _api_paddle_dataset_common_split:
+expand_as	.. _api_paddle_fluid_layers_expand_as:
-addcmul	.. _api_paddle_tensor_math_addcmul:
+masked_select	.. _api_paddle_tensor_search_masked_select:
-random_crop	.. _api_paddle_dataset_image_random_crop:
+RETURN_NO_VALUE_VAR_NAME	.. _api_paddle_fluid_dygraph_dygraph_to_static_return_transformer_RETURN_NO_VALUE_VAR_NAME:
-eye	.. _api_paddle_tensor_creation_eye:
+data_layer_not_check	.. _api_paddle_fluid_dygraph_dygraph_to_static_data_layer_not_check:
-center_loss	.. _api_paddle_nn_functional_center_loss:
+not_equal	.. _api_paddle_fluid_layers_not_equal:
+BatchNorm	.. _api_paddle_fluid_dygraph_BatchNorm:
+Hogwild	.. _api_paddle_fluid_device_worker_Hogwild:
+reshape	.. _api_paddle_tensor_manipulation_reshape:
+convert_to_static	.. _api_paddle_fluid_dygraph_dygraph_to_static_convert_to_static:
+UpSample	.. _api_paddle_nn_layer_common_UpSample:
+load	.. _api_paddle_fluid_io_load:
+DATA_HOME	.. _api_paddle_dataset_common_DATA_HOME:
+QuantizedConv2D	.. _api_paddle_fluid_contrib_slim_quantization_imperative_QuantizedConv2D:
+affine_channel	.. _api_paddle_fluid_layers_affine_channel:
+uniform	.. _api_paddle_tensor_random_uniform:
+KVServer	.. _api_paddle_distributed_fleet_utils_KVServer:
+sigmoid	.. _api_paddle_nn_functional_activation_sigmoid:
+wrap_decorator	.. _api_paddle_fluid_wrapped_decorator_wrap_decorator:
+default_collate_fn	.. _api_paddle_fluid_reader_default_collate_fn:
+filter_by_instag	.. _api_paddle_fluid_layers_filter_by_instag:
+std	.. _api_paddle_tensor_stat_std:
+TrainingDecoder	.. _api_paddle_fluid_contrib_TrainingDecoder:
+AdamOptimizer	.. _api_paddle_fluid_optimizer_AdamOptimizer:
+save_vars	.. _api_paddle_fluid_io_save_vars:
+op_freq_statistic	.. _api_paddle_fluid_contrib_op_freq_statistic:
+split	.. _api_paddle_fluid_layers_split:
+SGDOptimizer	.. _api_paddle_optimizer_SGDOptimizer:
+Conv1d	.. _api_paddle_nn_layer_conv_Conv1d:
+prelu	.. _api_paddle_nn_functional_prelu:
+unfold	.. _api_paddle_fluid_layers_unfold:
+smooth_l1	.. _api_paddle_fluid_layers_smooth_l1:
+logsigmoid	.. _api_paddle_fluid_layers_logsigmoid:
+glu	.. _api_paddle_fluid_nets_glu:
+bilateral_slice	.. _api_paddle_fluid_contrib_bilateral_slice:
+AdamW	.. _api_paddle_optimizer_AdamW:
+img_conv_group	.. _api_paddle_fluid_nets_img_conv_group:
+load_image	.. _api_paddle_dataset_image_load_image:
+AddQuantDequantPass	.. _api_paddle_fluid_contrib_slim_quantization_quantization_pass_AddQuantDequantPass:
+DeviceWorker	.. _api_paddle_fluid_device_worker_DeviceWorker:
+Pool2D	.. _api_paddle_fluid_dygraph_Pool2D:
+case	.. _api_paddle_fluid_layers_case:
+RETURN_NO_VALUE_MAGIC_NUM	.. _api_paddle_fluid_dygraph_dygraph_to_static_return_transformer_RETURN_NO_VALUE_MAGIC_NUM:
+tanh	.. _api_paddle_fluid_layers_tanh:
+reduce_prod	.. _api_paddle_fluid_layers_reduce_prod:
+DataFeeder	.. _api_paddle_fluid_DataFeeder:
+convert_dist_to_sparse_program	.. _api_paddle_fluid_contrib_convert_dist_to_sparse_program:
+cast	.. _api_paddle_fluid_layers_cast:
+cpu_places	.. _api_paddle_fluid_cpu_places:
+Mode	.. _api_paddle_fluid_incubate_fleet_base_fleet_base_Mode:
+accuracy	.. _api_paddle_metric_accuracy:
+topk	.. _api_paddle_fluid_layers_topk:
+TranslatedLayer	.. _api_paddle_fluid_dygraph_io_TranslatedLayer:
+Fleet	.. _api_paddle_fluid_incubate_fleet_base_fleet_base_Fleet:
+greater_equal	.. _api_paddle_fluid_layers_greater_equal:
+sign	.. _api_paddle_tensor_math_sign:
 EditDistance	.. _api_paddle_metric_EditDistance:
-fill_constant	.. _api_paddle_fill_constant:
+piecewise_decay	.. _api_paddle_fluid_layers_piecewise_decay:
-rpn_target_assign	.. _api_paddle_nn_functional_rpn_target_assign:
+set_global_initializer	.. _api_paddle_fluid_initializer_set_global_initializer:
-piecewise_decay	.. _api_paddle_nn_functional_piecewise_decay:
+LoDTensor	.. _api_paddle_fluid_LoDTensor:
-Variable	.. _api_paddle_Variable:
+templatedoc	.. _api_paddle_fluid_layers_layer_function_generator_templatedoc:
-rsqrt	.. _api_paddle_rsqrt:
+create_py_reader_by_data	.. _api_paddle_fluid_layers_create_py_reader_by_data:
-dist	.. _api_paddle_tensor_linalg_dist:
+buffered	.. _api_paddle_io_buffered:
-LarsMomentumOptimizer	.. _api_paddle_optimizer_LarsMomentumOptimizer:
+box_coder	.. _api_paddle_fluid_layers_box_coder:
-softshrink	.. _api_paddle_nn_functional_softshrink:
+sin	.. _api_paddle_fluid_layers_sin:
-Uniform	.. _api_paddle_nn_initializer_Uniform:
+similarity_focus	.. _api_paddle_fluid_layers_similarity_focus:
-spectral_norm	.. _api_paddle_static_nn_spectral_norm:
+Scope	.. _api_paddle_fluid_Scope:
-yolo_box	.. _api_paddle_nn_functional_yolo_box:
+add_position_encoding	.. _api_paddle_fluid_layers_add_position_encoding:
-xmap_readers	.. _api_paddle_io_xmap_readers:
+CTCLoss	.. _api_paddle_nn_layer_loss_CTCLoss:
-ModelAverage	.. _api_paddle_optimizer_ModelAverage:
+cuda_profiler	.. _api_paddle_fluid_profiler_cuda_profiler:
-inverse	.. _api_paddle_tensor_math_inverse:
+Conv2d	.. _api_paddle_nn_layer_conv_Conv2d:
+uniform_random	.. _api_paddle_fluid_layers_uniform_random:
+ChunkEvaluator	.. _api_paddle_fluid_evaluator_ChunkEvaluator:
+test	.. _api_paddle_dataset_wmt14_test:
+round	.. _api_paddle_compat_round:
+cond	.. _api_paddle_fluid_layers_cond:
+lstm_unit	.. _api_paddle_fluid_layers_lstm_unit:
+to_bytes	.. _api_paddle_compat_to_bytes:
 roll	.. _api_paddle_tensor_manipulation_roll:
-ParallelExecutor	.. _api_paddle_static_ParallelExecutor:
+divide	.. _api_paddle_tensor_math_divide:
-prroi_pool	.. _api_paddle_nn_functional_prroi_pool:
+DynamicRNN	.. _api_paddle_fluid_layers_DynamicRNN:
-selu	.. _api_paddle_nn_functional_selu:
+get_exception_message	.. _api_paddle_compat_get_exception_message:
-name_scope	.. _api_paddle_static_name_scope:
+map_readers	.. _api_paddle_reader_map_readers:
-image_resize	.. _api_paddle_nn_functional_image_resize:
+ConstantPad1d	.. _api_paddle_nn_ConstantPad1d:
-ones_like	.. _api_paddle_tensor_creation_ones_like:
+in_dygraph_mode	.. _api_paddle_fluid_framework_in_dygraph_mode:
-exponential_decay	.. _api_paddle_nn_functional_exponential_decay:
+Generator	.. _api_paddle_fluid_generator_Generator:
-GroupNorm	.. _api_paddle_nn_GroupNorm:
+tanhshrink	.. _api_paddle_nn_functional_tanhshrink:
-test	.. _api_paddle_dataset_imikolov_test:
+Adamax	.. _api_paddle_fluid_optimizer_Adamax:
-UtilBase	.. _api_paddle_distributed_fleet_UtilBase:
+slice	.. _api_paddle_fluid_layers_slice:
-set_device	.. _api_paddle_device_set_device:
+multi_download	.. _api_paddle_fluid_contrib_multi_download:
-kron	.. _api_paddle_tensor_math_kron:
+center_crop	.. _api_paddle_dataset_image_center_crop:
-ReplicationPad2d	.. _api_paddle_nn_ReplicationPad2d:
+train	.. _api_paddle_dataset_mnist_train:
-brelu	.. _api_paddle_nn_functional_brelu:
+l2_normalize	.. _api_paddle_fluid_layers_l2_normalize:
-psroi_pool	.. _api_paddle_nn_functional_psroi_pool:
+enabled	.. _api_paddle_fluid_dygraph_enabled:
+get_include	.. _api_paddle_sysconfig_get_include:
+MultivariateNormalDiag	.. _api_paddle_fluid_layers_MultivariateNormalDiag:
+inverse	.. _api_paddle_tensor_math_inverse:
+increment	.. _api_paddle_fluid_layers_increment:
+test	.. _api_paddle_dataset_voc2012_test:
+test	.. _api_paddle_dataset_mnist_test:
+Dataset	.. _api_paddle_io_Dataset:
+array_write	.. _api_paddle_fluid_layers_array_write:
+test	.. _api_paddle_dataset_sentiment_test:
+dynamic_lstmp	.. _api_paddle_fluid_layers_dynamic_lstmp:
+ssd_loss	.. _api_paddle_fluid_layers_ssd_loss:
+polygon_box_transform	.. _api_paddle_fluid_layers_polygon_box_transform:
+Tensor	.. _api_paddle_fluid_Tensor:
+py_reader	.. _api_paddle_fluid_layers_py_reader:
+QuantizedLinear	.. _api_paddle_fluid_contrib_slim_quantization_imperative_QuantizedLinear:
+train	.. _api_paddle_dataset_uci_housing_train:
+autoincreased_step_counter	.. _api_paddle_fluid_layers_autoincreased_step_counter:
+BCELoss	.. _api_paddle_nn_layer_loss_BCELoss:
+relu	.. _api_paddle_fluid_layers_relu:
+numel	.. _api_paddle_tensor_stat_numel:
+softplus	.. _api_paddle_fluid_layers_softplus:
+RoleMakerBase	.. _api_paddle_fluid_incubate_fleet_base_role_maker_RoleMakerBase:
+ParallelExecutor	.. _api_paddle_fluid_parallel_executor_ParallelExecutor:
+randperm	.. _api_paddle_tensor_random_randperm:
+relu6	.. _api_paddle_fluid_layers_relu6:
+fill_constant_batch_size_like	.. _api_paddle_fluid_layers_fill_constant_batch_size_like:
+argmin	.. _api_paddle_tensor_search_argmin:
+erf	.. _api_paddle_fluid_layers_erf:
+elementwise_sum	.. _api_paddle_tensor_math_elementwise_sum:
+RecomputeOptimizer	.. _api_paddle_optimizer_RecomputeOptimizer:
+memory_optimize	.. _api_paddle_fluid_memory_optimize:
+diag_embed	.. _api_paddle_nn_functional_extension_diag_embed:
+flatten	.. _api_paddle_fluid_layers_flatten:
+floor	.. _api_paddle_fluid_layers_floor:
+DetectionMAP	.. _api_paddle_metric_DetectionMAP:
+AlphaDropout	.. _api_paddle_nn_AlphaDropout:
+strided_slice	.. _api_paddle_fluid_layers_strided_slice:
+get_embedding	.. _api_paddle_dataset_conll05_get_embedding:
+log_loss	.. _api_paddle_fluid_layers_log_loss:
+DistributeTranspiler	.. _api_paddle_fluid_DistributeTranspiler:
+LocalSGD	.. _api_paddle_fluid_transpiler_collective_LocalSGD:
+batch_images_from_tar	.. _api_paddle_dataset_image_batch_images_from_tar:
+sequence_expand	.. _api_paddle_fluid_layers_sequence_expand:
+save_inference_model	.. _api_paddle_io_save_inference_model:
+addcmul	.. _api_paddle_tensor_math_addcmul:
+load	.. _api_paddle_jit_load:
+append_backward	.. _api_paddle_fluid_backward_append_backward:
+inplace_abn	.. _api_paddle_fluid_layers_inplace_abn:
+profiler	.. _api_paddle_fluid_profiler_profiler:
+train10	.. _api_paddle_dataset_cifar_train10:
+TreeConv	.. _api_paddle_fluid_dygraph_TreeConv:
+HeterXpuTrainer	.. _api_paddle_fluid_trainer_desc_HeterXpuTrainer:
+val	.. _api_paddle_dataset_voc2012_val:
+histogram	.. _api_paddle_tensor_linalg_histogram:
+mm	.. _api_paddle_tensor_math_mm:
+PReLU	.. _api_paddle_nn_layer_activation_PReLU:
+reduce_any	.. _api_paddle_fluid_layers_reduce_any:
+selu	.. _api_paddle_fluid_layers_selu:
+argmax	.. _api_paddle_fluid_layers_argmax:
+ExecuteError	.. _api_paddle_distributed_fleet_utils_ExecuteError:
+LSTMCell	.. _api_paddle_fluid_layers_LSTMCell:
+compose	.. _api_paddle_reader_compose:
+multiplex	.. _api_paddle_fluid_layers_multiplex:
+multi_upload	.. _api_paddle_fluid_contrib_multi_upload:
+QueueDataset	.. _api_paddle_distributed_fleet_QueueDataset:
+OutScaleForTrainingPass	.. _api_paddle_fluid_contrib_slim_quantization_quantization_pass_OutScaleForTrainingPass:
+eye	.. _api_paddle_fluid_layers_eye:
+equal	.. _api_paddle_fluid_layers_equal:
+GroupNorm	.. _api_paddle_fluid_dygraph_GroupNorm:
+sequence_scatter	.. _api_paddle_fluid_layers_sequence_scatter:
+create_random_int_lodtensor	.. _api_paddle_fluid_create_random_int_lodtensor:
+leaky_relu	.. _api_paddle_fluid_layers_leaky_relu:
+exponential_decay	.. _api_paddle_fluid_layers_exponential_decay:
+NaturalExpDecay	.. _api_paddle_fluid_dygraph_learning_rate_scheduler_NaturalExpDecay:
+softsign	.. _api_paddle_fluid_layers_softsign:
+GradAllReduce	.. _api_paddle_fluid_transpiler_collective_GradAllReduce:
+linear_chain_crf	.. _api_paddle_fluid_layers_linear_chain_crf:
+LookaheadOptimizer	.. _api_paddle_optimizer_LookaheadOptimizer:
+L2Decay	.. _api_paddle_fluid_regularizer_L2Decay:
+left_right_flip	.. _api_paddle_dataset_image_left_right_flip:
+Section	.. _api_paddle_fluid_device_worker_Section:
+save_dygraph	.. _api_paddle_fluid_dygraph_checkpoint_save_dygraph:
+sequence_reverse	.. _api_paddle_fluid_layers_sequence_reverse:
+isnan	.. _api_paddle_tensor_math_isnan:
+conv3d	.. _api_paddle_fluid_layers_conv3d:
 DGCOptimizer	.. _api_paddle_distributed_fleet_meta_optimizers_DGCOptimizer:
-Sigmoid	.. _api_paddle_nn_layer_activation_Sigmoid:
+data_norm	.. _api_paddle_fluid_layers_data_norm:
+conv3d_transpose	.. _api_paddle_fluid_layers_conv3d_transpose:
+sum	.. _api_paddle_fluid_layers_sum:
+asin	.. _api_paddle_fluid_layers_asin:
+auc	.. _api_paddle_metric_auc:
+zeros	.. _api_paddle_tensor_creation_zeros:
+TransformForMobilePass	.. _api_paddle_fluid_contrib_slim_quantization_quantization_pass_TransformForMobilePass:
+get_dict	.. _api_paddle_dataset_wmt14_get_dict:
+DetectionMAP	.. _api_paddle_fluid_evaluator_DetectionMAP:
+minimum	.. _api_paddle_tensor_math_minimum:
+prod	.. _api_paddle_tensor_math_prod:
+locality_aware_nms	.. _api_paddle_fluid_layers_locality_aware_nms:
+RNNCell	.. _api_paddle_fluid_layers_RNNCell:
+BCEWithLogitsLoss	.. _api_paddle_nn_layer_loss_BCEWithLogitsLoss:
+ConvertToInt8Pass	.. _api_paddle_fluid_contrib_slim_quantization_quantization_pass_ConvertToInt8Pass:
+gelu	.. _api_paddle_nn_functional_gelu:
+adaptive_pool2d	.. _api_paddle_fluid_layers_adaptive_pool2d:
+decorate	.. _api_paddle_fluid_contrib_mixed_precision_decorate:
+remainder	.. _api_paddle_tensor_math_remainder:
+clip	.. _api_paddle_fluid_layers_clip:
+AMPOptimizer	.. _api_paddle_distributed_fleet_meta_optimizers_AMPOptimizer:
+Xavier	.. _api_paddle_fluid_initializer_Xavier:
+sequence_unpad	.. _api_paddle_fluid_layers_sequence_unpad:
+Embedding	.. _api_paddle_fluid_dygraph_Embedding:
+UserDefinedRoleMaker	.. _api_paddle_distributed_fleet_UserDefinedRoleMaker:
+one_hot	.. _api_paddle_fluid_layers_one_hot:
+MultiSlotDataGenerator	.. _api_paddle_fluid_incubate_data_generator_MultiSlotDataGenerator:
+addmm	.. _api_paddle_tensor_math_addmm:
+GRUCell	.. _api_paddle_fluid_layers_GRUCell:
+get_program_parameter	.. _api_paddle_fluid_io_get_program_parameter:
+Recall	.. _api_paddle_metric_Recall:
+Hardshrink	.. _api_paddle_nn_layer_activation_Hardshrink:
+test	.. _api_paddle_dataset_wmt16_test:
+create_lod_tensor	.. _api_paddle_fluid_create_lod_tensor:
+generate_activation_fn	.. _api_paddle_fluid_layers_layer_function_generator_generate_activation_fn:
+space_to_depth	.. _api_paddle_fluid_layers_space_to_depth:
+swish	.. _api_paddle_fluid_layers_swish:
+QueueDataset	.. _api_paddle_fluid_dataset_QueueDataset:
+train	.. _api_paddle_dataset_movielens_train:
+reciprocal	.. _api_paddle_fluid_layers_reciprocal:
+SaveLoadConfig	.. _api_paddle_jit_SaveLoadConfig:
+MetricBase	.. _api_paddle_fluid_metrics_MetricBase:
+adaptive_avg_pool2d	.. _api_paddle_nn_functional_pooling_adaptive_avg_pool2d:
+polynomial_decay	.. _api_paddle_fluid_layers_polynomial_decay:
+py_func	.. _api_paddle_fluid_layers_nn_py_func:
+reshape	.. _api_paddle_fluid_layers_reshape:
+Bilinear	.. _api_paddle_fluid_initializer_Bilinear:
+get_flags	.. _api_paddle_fluid_get_flags:
+softshrink	.. _api_paddle_nn_functional_softshrink:
+OutScaleForInferencePass	.. _api_paddle_fluid_contrib_slim_quantization_quantization_pass_OutScaleForInferencePass:
+nonzero	.. _api_paddle_tensor_search_nonzero:
 LocalFS	.. _api_paddle_distributed_fleet_utils_LocalFS:
-unsqueeze	.. _api_paddle_tensor_manipulation_unsqueeze:
+ReturnTransformer	.. _api_paddle_fluid_dygraph_dygraph_to_static_return_transformer_ReturnTransformer:
-Xavier	.. _api_paddle_nn_initializer_Xavier:
+get_tensor_from_selected_rows	.. _api_paddle_fluid_layers_get_tensor_from_selected_rows:
-t	.. _api_paddle_tensor_linalg_t:
+dist	.. _api_paddle_tensor_linalg_dist:
-filter_by_instag	.. _api_paddle_nn_functional_filter_by_instag:
+matmul	.. _api_paddle_tensor_linalg_matmul:
-sign	.. _api_paddle_sign:
+DpsgdOptimizer	.. _api_paddle_optimizer_DpsgdOptimizer:
-test	.. _api_paddle_dataset_flowers_test:
+soft_relu	.. _api_paddle_fluid_layers_soft_relu:
-iou_similarity	.. _api_paddle_nn_functional_iou_similarity:
+no_grad	.. _api_paddle_fluid_dygraph_base_no_grad:
-Adamax	.. _api_paddle_optimizer_Adamax:
+dynamic_decode	.. _api_paddle_fluid_layers_dynamic_decode:
-add_position_encoding	.. _api_paddle_nn_functional_add_position_encoding:
+bernoulli	.. _api_paddle_tensor_random_bernoulli:
-to_tensor	.. _api_paddle_tensor_creation_to_tensor:
+ComposeNotAligned	.. _api_paddle_reader_ComposeNotAligned:
-erf	.. _api_paddle_erf:
+GRUUnit	.. _api_paddle_fluid_dygraph_GRUUnit:
-map_readers	.. _api_paddle_io_map_readers:
+ReplicationPad2d	.. _api_paddle_nn_ReplicationPad2d:
-layer_norm	.. _api_paddle_static_nn_layer_norm:
+detection_output	.. _api_paddle_fluid_layers_detection_output:
-roi_pool	.. _api_paddle_nn_functional_roi_pool:
+HSigmoid	.. _api_paddle_nn_layer_activation_HSigmoid:
-RecomputeOptimizer	.. _api_paddle_distributed_fleet_meta_optimizers_RecomputeOptimizer:
+elementwise_add	.. _api_paddle_fluid_layers_elementwise_add:
-map_readers	.. _api_paddle_reader_map_readers:
-get_device	.. _api_paddle_device_get_device:
 CosineSimilarity	.. _api_paddle_nn_CosineSimilarity:
-pool2d	.. _api_paddle_nn_functional_pool2d:
+argsort	.. _api_paddle_tensor_search_argsort:
-maximum	.. _api_paddle_tensor_math_maximum:
+prepare_context	.. _api_paddle_fluid_dygraph_parallel_prepare_context:
-allclose	.. _api_paddle_tensor_logic_allclose:
+test	.. _api_paddle_dataset_flowers_test:
-elementwise_sub	.. _api_paddle_elementwise_sub:
+sinh	.. _api_paddle_fluid_layers_sinh:
-create_parameter	.. _api_paddle_create_parameter:
+default_collate_fn	.. _api_paddle_fluid_io_default_collate_fn:
-load	.. _api_paddle_load:
+ones_like	.. _api_paddle_fluid_layers_ones_like:
-NoamDecay	.. _api_paddle_NoamDecay:
+partial_concat	.. _api_paddle_fluid_contrib_partial_concat:
-ReplicationPad1d	.. _api_paddle_nn_ReplicationPad1d:
+Adagrad	.. _api_paddle_optimizer_Adagrad:
-CUDAPinnedPlace	.. _api_paddle_framework_CUDAPinnedPlace:
+RMSPropOptimizer	.. _api_paddle_fluid_optimizer_RMSPropOptimizer:
+edit_distance	.. _api_paddle_fluid_layers_edit_distance:
+sums	.. _api_paddle_fluid_layers_sums:
+softmax_with_cross_entropy	.. _api_paddle_fluid_layers_softmax_with_cross_entropy:
 tril	.. _api_paddle_tensor_creation_tril:
-DataLoader	.. _api_paddle_io_DataLoader:
+shuffle	.. _api_paddle_fluid_io_shuffle:
-sum	.. _api_paddle_tensor_math_sum:
+layer_norm	.. _api_paddle_fluid_layers_layer_norm:
+softplus	.. _api_paddle_nn_functional_softplus:
+roi_perspective_transform	.. _api_paddle_fluid_layers_roi_perspective_transform:
+ReflectionPad1d	.. _api_paddle_nn_ReflectionPad1d:
+adaptive_pool3d	.. _api_paddle_fluid_layers_adaptive_pool3d:
+grid_sampler	.. _api_paddle_fluid_layers_grid_sampler:
+tensor_array_to_tensor	.. _api_paddle_fluid_layers_tensor_array_to_tensor:
+load_op_library	.. _api_paddle_fluid_load_op_library:
+max_movie_id	.. _api_paddle_dataset_movielens_max_movie_id:
+ExponentialDecay	.. _api_paddle_fluid_dygraph_learning_rate_scheduler_ExponentialDecay:
+log_softmax	.. _api_paddle_nn_functional_activation_log_softmax:
+require_version	.. _api_paddle_fluid_require_version:
+SequenceSampler	.. _api_paddle_io_SequenceSampler:
+Dropout2D	.. _api_paddle_nn_Dropout2D:
 train	.. _api_paddle_dataset_imikolov_train:
-zeros	.. _api_paddle_tensor_creation_zeros:
+elementwise_max	.. _api_paddle_fluid_layers_elementwise_max:
-natural_exp_decay	.. _api_paddle_nn_functional_natural_exp_decay:
+array_length	.. _api_paddle_fluid_layers_array_length:
-save_inference_model	.. _api_paddle_io_save_inference_model:
+sampled_softmax_with_cross_entropy	.. _api_paddle_fluid_layers_sampled_softmax_with_cross_entropy:
-sampled_softmax_with_cross_entropy	.. _api_paddle_nn_functional_sampled_softmax_with_cross_entropy:
+generate_proposals	.. _api_paddle_fluid_layers_generate_proposals:
-DpsgdOptimizer	.. _api_paddle_optimizer_DpsgdOptimizer:
+train	.. _api_paddle_dataset_sentiment_train:
-get_cudnn_version	.. _api_paddle_device_get_cudnn_version:
+build_dict	.. _api_paddle_dataset_imikolov_build_dict:
-sort	.. _api_paddle_tensor_search_sort:
+collect_fpn_proposals	.. _api_paddle_fluid_layers_collect_fpn_proposals:
-Conv2DTranspose	.. _api_paddle_nn_layer_conv_Conv2DTranspose:
+PolynomialDecay	.. _api_paddle_fluid_dygraph_learning_rate_scheduler_PolynomialDecay:
-dice_loss	.. _api_paddle_nn_functional_dice_loss:
+pool2d	.. _api_paddle_fluid_layers_pool2d:
-bilinear_tensor_product	.. _api_paddle_static_nn_bilinear_tensor_product:
+CountFilterEntry	.. _api_paddle_fluid_entry_attr_CountFilterEntry:
-box_coder	.. _api_paddle_nn_functional_box_coder:
+InputSpec	.. _api_paddle_static_InputSpec:
-leaky_relu	.. _api_paddle_nn_functional_leaky_relu:
+default_startup_program	.. _api_paddle_fluid_framework_default_startup_program:
-test	.. _api_paddle_dataset_uci_housing_test:
+index_select	.. _api_paddle_tensor_search_index_select:
-LogSigmoid	.. _api_paddle_nn_layer_activation_LogSigmoid:
+margin_rank_loss	.. _api_paddle_fluid_layers_margin_rank_loss:
-FSTimeOut	.. _api_paddle_distributed_fleet_utils_FSTimeOut:
+randint	.. _api_paddle_tensor_random_randint:
-simple_transform	.. _api_paddle_dataset_image_simple_transform:
+less_equal	.. _api_paddle_tensor_logic_less_equal:
-relu6	.. _api_paddle_nn_functional_relu6:
+sign	.. _api_paddle_fluid_layers_sign:
-load_program_state	.. _api_paddle_io_load_program_state:
+inverse_time_decay	.. _api_paddle_fluid_layers_inverse_time_decay:
-shuffle_channel	.. _api_paddle_nn_functional_shuffle_channel:
+MetaOptimizerFactory	.. _api_paddle_distributed_fleet_base_meta_optimizer_factory_MetaOptimizerFactory:
+conv1d	.. _api_paddle_nn_functional_conv1d:
+has_inf	.. _api_paddle_fluid_layers_has_inf:
+fused_embedding_seq_pool	.. _api_paddle_fluid_contrib_fused_embedding_seq_pool:
+logical_and	.. _api_paddle_fluid_layers_logical_and:
+BeamSearchDecoder	.. _api_paddle_fluid_contrib_BeamSearchDecoder:
+CUDAPlace	.. _api_paddle_framework_CUDAPlace:
+BreakContinueTransformer	.. _api_paddle_fluid_dygraph_dygraph_to_static_break_continue_transformer_BreakContinueTransformer:
+LocalSGDOptimizer	.. _api_paddle_distributed_fleet_meta_optimizers_LocalSGDOptimizer:
+CrossEntropyLoss	.. _api_paddle_nn_layer_loss_CrossEntropyLoss:
+PairwiseDistance	.. _api_paddle_nn_layer_distance_PairwiseDistance:
+AstNodeWrapper	.. _api_paddle_fluid_dygraph_dygraph_to_static_AstNodeWrapper:
+Role	.. _api_paddle_fluid_incubate_fleet_base_role_maker_Role:
+cross	.. _api_paddle_tensor_linalg_cross:
 fetch	.. _api_paddle_dataset_wmt16_fetch:
-to_text	.. _api_paddle_compat_to_text:
+buffered	.. _api_paddle_reader_buffered:
-DecayedAdagradOptimizer	.. _api_paddle_optimizer_DecayedAdagradOptimizer:
+manual_seed	.. _api_paddle_framework_random_manual_seed:
-index_sample	.. _api_paddle_tensor_search_index_sample:
+QuantInt8MkldnnPass	.. _api_paddle_fluid_contrib_slim_quantization_quant_int8_mkldnn_pass_QuantInt8MkldnnPass:
-adaptive_pool3d	.. _api_paddle_nn_functional_adaptive_pool3d:
+load_and_transform	.. _api_paddle_dataset_image_load_and_transform:
-conll05	.. _api_paddle_dataset_conll05:
-batch_norm	.. _api_paddle_static_nn_batch_norm:
-LambOptimizer	.. _api_paddle_distributed_fleet_meta_optimizers_LambOptimizer:
 CUDAPlace	.. _api_paddle_fluid_CUDAPlace:
-accuracy	.. _api_paddle_metric_accuracy:
+one_hot	.. _api_paddle_fluid_one_hot:
+broadcast_to	.. _api_paddle_tensor_manipulation_broadcast_to:
+get_cudnn_version	.. _api_paddle_device_get_cudnn_version:
+get_dict	.. _api_paddle_dataset_wmt16_get_dict:
+ones_like	.. _api_paddle_tensor_creation_ones_like:
+cross_entropy	.. _api_paddle_nn_functional_cross_entropy:
+random_crop	.. _api_paddle_fluid_layers_random_crop:
+cholesky	.. _api_paddle_tensor_linalg_cholesky:
+diag	.. _api_paddle_fluid_layers_diag:
+match_matrix_tensor	.. _api_paddle_fluid_contrib_match_matrix_tensor:
+GELU	.. _api_paddle_nn_layer_activation_GELU:
+load_persistables_for_inference	.. _api_paddle_fluid_contrib_load_persistables_for_inference:
+save	.. _api_paddle_jit_save:
+reorder_lod_tensor_by_rank	.. _api_paddle_fluid_layers_reorder_lod_tensor_by_rank:
+FakeQuantAbsMax	.. _api_paddle_fluid_contrib_slim_quantization_imperative_FakeQuantAbsMax:
+DatasetBase	.. _api_paddle_distributed_fleet_DatasetBase:
+floor_division	.. _api_paddle_compat_floor_division:
+while_loop	.. _api_paddle_fluid_layers_while_loop:
+DistributedStrategy	.. _api_paddle_distributed_fleet_DistributedStrategy:
+gru_unit	.. _api_paddle_fluid_layers_gru_unit:
+reduce_all	.. _api_paddle_fluid_layers_reduce_all:
+GradientClipByGlobalNorm	.. _api_paddle_fluid_clip_GradientClipByGlobalNorm:
+pad2d	.. _api_paddle_fluid_layers_pad2d:
+switch_case	.. _api_paddle_fluid_layers_switch_case:
+unstack	.. _api_paddle_fluid_layers_unstack:
+spectral_norm	.. _api_paddle_fluid_layers_spectral_norm:
+switch	.. _api_paddle_fluid_unique_name_switch:
+BeamSearchDecoder	.. _api_paddle_fluid_layers_BeamSearchDecoder:
+Softmax	.. _api_paddle_nn_layer_activation_Softmax:
+hardtanh	.. _api_paddle_nn_functional_hardtanh:
+DGCMomentumOptimizer	.. _api_paddle_optimizer_DGCMomentumOptimizer:
+split	.. _api_paddle_dataset_common_split:
+train100	.. _api_paddle_dataset_cifar_train100:
+box_decoder_and_assign	.. _api_paddle_fluid_layers_box_decoder_and_assign:
+gather_nd	.. _api_paddle_tensor_manipulation_gather_nd:
+MultiSlotStringDataGenerator	.. _api_paddle_fluid_incubate_data_generator_MultiSlotStringDataGenerator:
+range	.. _api_paddle_fluid_layers_range:
+to_tensor	.. _api_paddle_tensor_creation_to_tensor:
+rnn	.. _api_paddle_fluid_layers_rnn:
+start_profiler	.. _api_paddle_fluid_profiler_start_profiler:
+BatchSampler	.. _api_paddle_io_BatchSampler:
+SmoothL1Loss	.. _api_paddle_nn_layer_loss_SmoothL1Loss:
+DatasetFactory	.. _api_paddle_fluid_dataset_DatasetFactory:
+AdaptiveAvgPool2d	.. _api_paddle_nn_layer_pooling_AdaptiveAvgPool2d:
+sequence_slice	.. _api_paddle_fluid_layers_sequence_slice:
+SpectralNorm	.. _api_paddle_fluid_dygraph_SpectralNorm:
+WeightNormParamAttr	.. _api_paddle_fluid_param_attr_WeightNormParamAttr:
+test	.. _api_paddle_dataset_imdb_test:
+multiclass_nms2	.. _api_paddle_fluid_contrib_multiclass_nms2:
+ones	.. _api_paddle_fluid_layers_ones:
+Sequential	.. _api_paddle_fluid_dygraph_container_Sequential:
+reduce_max	.. _api_paddle_fluid_layers_reduce_max:
+ReflectionPad2d	.. _api_paddle_nn_ReflectionPad2d:
 xmap_readers	.. _api_paddle_reader_xmap_readers:
-scatter_nd_add	.. _api_paddle_scatter_nd_add:
+prior_box	.. _api_paddle_fluid_layers_prior_box:
-Recall	.. _api_paddle_metric_Recall:
+PiecewiseDecay	.. _api_paddle_fluid_dygraph_learning_rate_scheduler_PiecewiseDecay:
-LarsOptimizer	.. _api_paddle_distributed_fleet_meta_optimizers_LarsOptimizer:
+validation	.. _api_paddle_dataset_wmt16_validation:
-pixel_shuffle	.. _api_paddle_nn_functional_pixel_shuffle:
+conv2d	.. _api_paddle_nn_functional_conv_conv2d:
-isfinite	.. _api_paddle_isfinite:
+InMemoryDataset	.. _api_paddle_distributed_fleet_InMemoryDataset:
+index_sample	.. _api_paddle_tensor_search_index_sample:
+cumsum	.. _api_paddle_fluid_layers_cumsum:
+nce	.. _api_paddle_fluid_layers_nce:
+stack	.. _api_paddle_tensor_manipulation_stack:
+get_logger	.. _api_paddle_fluid_log_helper_get_logger:
+basic_lstm	.. _api_paddle_fluid_contrib_basic_lstm:
+create_global_var	.. _api_paddle_fluid_layers_tensor_create_global_var:
+argmin	.. _api_paddle_fluid_layers_argmin:
+load_vars	.. _api_paddle_fluid_io_load_vars:
+dot	.. _api_paddle_tensor_linalg_dot:
+build_dict	.. _api_paddle_dataset_imdb_build_dict:
+matmul	.. _api_paddle_fluid_layers_matmul:
+sparse_embedding	.. _api_paddle_fluid_contrib_sparse_embedding:
+elu	.. _api_paddle_nn_functional_elu:
+expand_as	.. _api_paddle_tensor_manipulation_expand_as:
+instance_norm	.. _api_paddle_fluid_layers_instance_norm:
+cos	.. _api_paddle_fluid_layers_cos:
+hard_sigmoid	.. _api_paddle_fluid_layers_hard_sigmoid:
+rank_attention	.. _api_paddle_fluid_contrib_rank_attention:
+While	.. _api_paddle_fluid_layers_While:
+sequence_last_step	.. _api_paddle_fluid_layers_sequence_last_step:
+softshrink	.. _api_paddle_fluid_layers_softshrink:
+BasicLSTMUnit	.. _api_paddle_fluid_contrib_BasicLSTMUnit:
+target_assign	.. _api_paddle_fluid_layers_target_assign:
+Constant	.. _api_paddle_fluid_initializer_Constant:
+create_static_variable_gast_node	.. _api_paddle_fluid_dygraph_dygraph_to_static_create_static_variable_gast_node:
+BasicGRUUnit	.. _api_paddle_fluid_contrib_BasicGRUUnit:
+sequence_pool	.. _api_paddle_fluid_layers_sequence_pool:
+RowConv	.. _api_paddle_nn_layer_extension_RowConv:
+scatter_nd_add	.. _api_paddle_fluid_layers_scatter_nd_add:
+ceil	.. _api_paddle_fluid_layers_ceil:
+save	.. _api_paddle_fluid_save:
+teacher_student_sigmoid_loss	.. _api_paddle_fluid_layers_teacher_student_sigmoid_loss:
+deformable_conv	.. _api_paddle_fluid_layers_deformable_conv:
+pad	.. _api_paddle_fluid_layers_pad:
+dice_loss	.. _api_paddle_fluid_layers_dice_loss:
+Linear	.. _api_paddle_fluid_dygraph_Linear:
+fill_constant	.. _api_paddle_fluid_layers_fill_constant:
+kron	.. _api_paddle_tensor_math_kron:
+embedding	.. _api_paddle_fluid_input_embedding:
+CompositeMetric	.. _api_paddle_metric_CompositeMetric:
+pad_constant_like	.. _api_paddle_fluid_layers_pad_constant_like:
+UserDefinedCollectiveRoleMaker	.. _api_paddle_fluid_incubate_fleet_base_role_maker_UserDefinedCollectiveRoleMaker:
+HDFSClient	.. _api_paddle_distributed_fleet_utils_HDFSClient:
+flatten	.. _api_paddle_tensor_manipulation_flatten:
+signature_safe_contextmanager	.. _api_paddle_fluid_wrapped_decorator_signature_safe_contextmanager:
+shape	.. _api_paddle_fluid_layers_shape:
+default_main_program	.. _api_paddle_fluid_framework_default_main_program:
+row_conv	.. _api_paddle_fluid_layers_row_conv:
+sigmoid_cross_entropy_with_logits	.. _api_paddle_fluid_layers_sigmoid_cross_entropy_with_logits:
+relu	.. _api_paddle_nn_functional_activation_relu:
+stanh	.. _api_paddle_fluid_layers_stanh:
+normal	.. _api_paddle_tensor_random_normal:
+set_flags	.. _api_paddle_fluid_set_flags:
+RecomputeOptimizer	.. _api_paddle_distributed_fleet_meta_optimizers_RecomputeOptimizer:
 LambOptimizer	.. _api_paddle_optimizer_LambOptimizer:
-noam_decay	.. _api_paddle_nn_functional_noam_decay:
+conv2d	.. _api_paddle_fluid_layers_conv2d:
-get_dict	.. _api_paddle_dataset_wmt16_get_dict:
+ELU	.. _api_paddle_nn_layer_activation_ELU:
-reduce_mean	.. _api_paddle_reduce_mean:
+scatter_nd	.. _api_paddle_fluid_layers_scatter_nd:
-KVHTTPServer	.. _api_paddle_distributed_fleet_utils_KVHTTPServer:
+to_chw	.. _api_paddle_dataset_image_to_chw:
+cache	.. _api_paddle_io_cache:
+rpn_target_assign	.. _api_paddle_fluid_layers_rpn_target_assign:
+logical_or	.. _api_paddle_fluid_layers_logical_or:
+mse_loss	.. _api_paddle_fluid_layers_mse_loss:
+sequence_mask	.. _api_paddle_fluid_layers_sequence_mask:
+mul	.. _api_paddle_fluid_layers_mul:
+group_norm	.. _api_paddle_fluid_layers_group_norm:
+test	.. _api_paddle_dataset_uci_housing_test:
+split	.. _api_paddle_tensor_manipulation_split:
+Conv2D	.. _api_paddle_fluid_dygraph_Conv2D:
+round	.. _api_paddle_fluid_layers_round:
+WeightedAverage	.. _api_paddle_fluid_average_WeightedAverage:
+mean	.. _api_paddle_tensor_stat_mean:
+diag	.. _api_paddle_tensor_creation_diag:
+mean	.. _api_paddle_fluid_layers_mean:
+GeneralRoleMaker	.. _api_paddle_fluid_incubate_fleet_base_role_maker_GeneralRoleMaker:
+Ftrl	.. _api_paddle_optimizer_Ftrl:
+softmax	.. _api_paddle_fluid_layers_softmax:
+pool3d	.. _api_paddle_fluid_layers_pool3d:
+FSFileNotExistsError	.. _api_paddle_distributed_fleet_utils_FSFileNotExistsError:
+Adadelta	.. _api_paddle_optimizer_Adadelta:
+Sigmoid	.. _api_paddle_nn_layer_activation_Sigmoid:
+interpolate	.. _api_paddle_nn_functional_common_interpolate:
+Categorical	.. _api_paddle_fluid_layers_Categorical:
+fc	.. _api_paddle_fluid_layers_fc:
 ReplicationPad3d	.. _api_paddle_nn_ReplicationPad3d:
-unbind	.. _api_paddle_tensor_manipulation_unbind:
+reduce_sum	.. _api_paddle_fluid_layers_reduce_sum:
-load_image_bytes	.. _api_paddle_dataset_image_load_image_bytes:
+InverseTimeDecay	.. _api_paddle_fluid_dygraph_learning_rate_scheduler_InverseTimeDecay:
-InMemoryDataset	.. _api_paddle_distributed_fleet_InMemoryDataset:
+randn	.. _api_paddle_tensor_random_randn:
-test	.. _api_paddle_dataset_imdb_test:
+Softplus	.. _api_paddle_nn_layer_activation_Softplus:
-chain	.. _api_paddle_reader_chain:
+ConvTranspose1d	.. _api_paddle_nn_layer_conv_ConvTranspose1d:
-matmul	.. _api_paddle_tensor_linalg_matmul:
+ParameterList	.. _api_paddle_fluid_dygraph_container_ParameterList:
-BatchNorm	.. _api_paddle_nn_BatchNorm:
+Dropout	.. _api_paddle_fluid_dygraph_Dropout:
-wmt16	.. _api_paddle_dataset_wmt16:
+zeros	.. _api_paddle_fluid_layers_zeros:
-wmt14	.. _api_paddle_dataset_wmt14:
+where	.. _api_paddle_tensor_search_where:
-pool3d	.. _api_paddle_nn_functional_pool3d:
+LarsMomentum	.. _api_paddle_optimizer_LarsMomentum:
+trace	.. _api_paddle_tensor_math_trace:
+set_gradient_clip	.. _api_paddle_fluid_clip_set_gradient_clip:
+FSTimeOut	.. _api_paddle_distributed_fleet_utils_FSTimeOut:
+crop_tensor	.. _api_paddle_fluid_layers_crop_tensor:
+load	.. _api_paddle_fluid_layers_load:
+program_guard	.. _api_paddle_fluid_framework_program_guard:
+retinanet_detection_output	.. _api_paddle_fluid_layers_retinanet_detection_output:
+Sampler	.. _api_paddle_io_Sampler:
+scope_guard	.. _api_paddle_fluid_executor_scope_guard:
+shard_index	.. _api_paddle_fluid_layers_shard_index:
+Normal	.. _api_paddle_fluid_initializer_Normal:
+download	.. _api_paddle_dataset_common_download:
+PRelu	.. _api_paddle_fluid_dygraph_PRelu:
+create_parameter	.. _api_paddle_fluid_layers_create_parameter:
+resize_short	.. _api_paddle_dataset_image_resize_short:
+ConstantPad3d	.. _api_paddle_nn_ConstantPad3d:
+lrn	.. _api_paddle_fluid_layers_lrn:
+scatter	.. _api_paddle_fluid_layers_scatter:
--- a/doc/paddle/api/gen_doc.py
+++ b/doc/paddle/api/gen_doc.py
@@ -93,20 +93,9 @@ def is_filter_api(api):
    if api in alias_api_map:
        return False
-    #check api start with paddle.fluid
-    #if has no alias, return True
-    #if has alias also in paddle.fluid, return True
-    #if has alias in other module, return False
    same_apis = same_api_map[id(eval(api))]
-    if api.startswith("paddle.fluid"):
-        all_fluid_flag = True
-        for x in same_apis:
-            if not x.startswith("paddle.fluid"):
-                all_fluid_flag = False
-        if all_fluid_flag:
-            return True
+    #api not in alias map
    #if the api in alias_map key, others api is alias api
    for x in same_apis:
        if x in alias_api_map:
@@ -127,14 +116,6 @@ def is_filter_api(api):
    return False
-def get_display_api(api):
-    # recomment alias api
-    if api.startswith("paddle.fluid") and api in alias_api_map:
-        return alias_api_map[api][0]
-    else:
-        return api
 def gen_en_files(root_path='paddle', api_label_file="api_label"):
    backup_path = root_path + "_" + str(int(time.time()))
    api_f = open(api_label_file, 'w')
@@ -142,11 +123,12 @@ def gen_en_files(root_path='paddle', api_label_file="api_label"):
    for api in api_set:
        if is_filter_api(api):
            continue
+        module_name = ".".join(api.split(".")[0:-1])
+        doc_file = api.split(".")[-1]
-        raw_api = api
+        if isinstance(eval(module_name + "." + doc_file), types.ModuleType):
-        api = get_display_api(api)
+            continue
-        doc_file = api.split(".")[-1]
        path = "/".join(api.split(".")[0:-1])
        if not os.path.exists(path):
            os.makedirs(path)
@@ -156,7 +138,7 @@ def gen_en_files(root_path='paddle', api_label_file="api_label"):
        os.mknod(f + en_suffix)
        gen = EnDocGenerator()
        with gen.guard(f + en_suffix):
-            gen.module_name = ".".join(raw_api.split(".")[0:-1])
+            gen.module_name = module_name
            gen.api = doc_file
            gen.print_header_reminder()
            gen.print_item()

--- a/doc/paddle/api/paddle/nn/initializer/Normal_cn.rst
+++ b/doc/paddle/api/paddle/nn/initializer/Normal_cn.rst
--- a/doc/paddle/api/paddle/nn/initializer/Uniform_cn.rst
+++ b/doc/paddle/api/paddle/nn/initializer/Uniform_cn.rst
--- a/doc/paddle/api/paddle/fluid/DataFeedDesc_cn.rst
+++ b/doc/paddle/api/paddle/fluid/DataFeedDesc_cn.rst
+.. _cn_api_fluid_DataFeedDesc:
+DataFeedDesc
+-------------------------------
+.. py:class:: paddle.fluid.DataFeedDesc(proto_file)
+:api_attr: 声明式编程模式（静态图)
+描述训练数据的格式。输入是一个文件路径名，其内容是protobuf message。
+可以参考 :code:`paddle/fluid/framework/data_feed.proto` 查看我们如何定义message
+一段典型的message可能是这样的：
+.. code-block:: python
+    import paddle.fluid as fluid
+    f = open("data.proto", "w")
+    print >> f, 'name: "MultiSlotDataFeed"'
+    print >> f, 'batch_size: 2'
+    print >> f, 'multi_slot_desc {'
+    print >> f, '    slots {'
+    print >> f, '         name: "words"'
+    print >> f, '         type: "uint64"'
+    print >> f, '         is_dense: false'
+    print >> f, '         is_used: true'
+    print >> f, '     }'
+    print >> f, '     slots {'
+    print >> f, '         name: "label"'
+    print >> f, '         type: "uint64"'
+    print >> f, '         is_dense: false'
+    print >> f, '         is_used: true'
+    print >> f, '    }'
+    print >> f, '}'
+    f.close()
+    data_feed = fluid.DataFeedDesc('data.proto')
+用户需要了解DataFeedDesc中每个字段的含义，以便自定义字段的值。例如:
+.. code-block:: python
+    import paddle.fluid as fluid
+    data_feed = fluid.DataFeedDesc('data.proto')
+    data_feed.set_batch_size(128)
+    data_feed.set_dense_slots('words')  # 名为'words'的slot将被设置为密集的
+    data_feed.set_use_slots('words')    # 名为'words'的slot将被用于训练
+    # 最后，可以打印变量详细信息便于排查错误
+    print(data_feed.desc())
+参数：
+  - **proto_file** (string) : 包含数据描述的protobuf message的磁盘文件
+.. py:method:: set_batch_size(batch_size)
+该接口用于设置DataFeedDesc中的 :code:`batch_size` 。可以在训练期间调用修改 :code:`batch_size` 。
+**代码示例**
+.. code-block:: python
+    import paddle.fluid as fluid
+    f = open("data.proto", "w")
+    print >> f, 'name: "MultiSlotDataFeed"'
+    print >> f, 'batch_size: 2'
+    print >> f, 'multi_slot_desc {'
+    print >> f, '    slots {'
+    print >> f, '         name: "words"'
+    print >> f, '         type: "uint64"'
+    print >> f, '         is_dense: false'
+    print >> f, '         is_used: true'
+    print >> f, '     }'
+    print >> f, '     slots {'
+    print >> f, '         name: "label"'
+    print >> f, '         type: "uint64"'
+    print >> f, '         is_dense: false'
+    print >> f, '         is_used: true'
+    print >> f, '    }'
+    print >> f, '}'
+    f.close()
+    data_feed = fluid.DataFeedDesc('data.proto')
+    data_feed.set_batch_size(128)
+参数：
+  - **batch_size** (int) - 新的批尺寸。
+返回：无
+.. py:method:: set_dense_slots(dense_slots_name)
+将 :code:`dense_slots_name` 指定的slots设置为密集的slot。**注意：默认情况下，所有slots都是稀疏的。**
+密集slot的特征将被输入一个Tensor，而稀疏slot的特征将被输入一个LoDTensor。
+**代码示例**
+.. code-block:: python
+    import paddle.fluid as fluid
+    f = open("data.proto", "w")
+    print >> f, 'name: "MultiSlotDataFeed"'
+    print >> f, 'batch_size: 2'
+    print >> f, 'multi_slot_desc {'
+    print >> f, '    slots {'
+    print >> f, '         name: "words"'
+    print >> f, '         type: "uint64"'
+    print >> f, '         is_dense: false'
+    print >> f, '         is_used: true'
+    print >> f, '     }'
+    print >> f, '     slots {'
+    print >> f, '         name: "label"'
+    print >> f, '         type: "uint64"'
+    print >> f, '         is_dense: false'
+    print >> f, '         is_used: true'
+    print >> f, '    }'
+    print >> f, '}'
+    f.close()
+    data_feed = fluid.DataFeedDesc('data.proto')
+    data_feed.set_dense_slots(['words'])
+参数：
+  - **dense_slots_name** (list(str)) - slot名称的列表，这些slot将被设置为密集的。
+返回：无
+.. py:method:: set_use_slots(use_slots_name)
+设置一个特定的slot是否用于训练。一个数据集包含了很多特征，通过这个函数可以选择哪些特征将用于指定的模型。
+参数：
+  - **use_slots_name** (list) : 将在训练中使用的slot名列表，类型为list，其中每个元素为一个字符串
+**代码示例：**
+.. code-block:: python
+    import paddle.fluid as fluid
+    f = open("data.proto", "w")
+    print >> f, 'name: "MultiSlotDataFeed"'
+    print >> f, 'batch_size: 2'
+    print >> f, 'multi_slot_desc {'
+    print >> f, '    slots {'
+    print >> f, '         name: "words"'
+    print >> f, '         type: "uint64"'
+    print >> f, '         is_dense: false'
+    print >> f, '         is_used: true'
+    print >> f, '     }'
+    print >> f, '     slots {'
+    print >> f, '         name: "label"'
+    print >> f, '         type: "uint64"'
+    print >> f, '         is_dense: false'
+    print >> f, '         is_used: true'
+    print >> f, '    }'
+    print >> f, '}'
+    f.close()
+    data_feed = fluid.DataFeedDesc('data.proto')
+    data_feed.set_use_slots(['words'])
+.. note::
+  默认值是不使用所有slot
+.. py:method:: desc()
+返回此DataFeedDesc的protobuf message
+返回：一个protobuf message字符串
+**代码示例：**
+.. code-block:: python
+    import paddle.fluid as fluid
+    f = open("data.proto", "w")
+    print >> f, 'name: "MultiSlotDataFeed"'
+    print >> f, 'batch_size: 2'
+    print >> f, 'multi_slot_desc {'
+    print >> f, '    slots {'
+    print >> f, '         name: "words"'
+    print >> f, '         type: "uint64"'
+    print >> f, '         is_dense: false'
+    print >> f, '         is_used: true'
+    print >> f, '     }'
+    print >> f, '     slots {'
+    print >> f, '         name: "label"'
+    print >> f, '         type: "uint64"'
+    print >> f, '         is_dense: false'
+    print >> f, '         is_used: true'
+    print >> f, '    }'
+    print >> f, '}'
+    f.close()
+    data_feed = fluid.DataFeedDesc('data.proto')
+    print(data_feed.desc())
--- a/doc/paddle/api/paddle/fluid/DataFeeder_cn.rst
+++ b/doc/paddle/api/paddle/fluid/DataFeeder_cn.rst
+.. _cn_api_fluid_DataFeeder:
+DataFeeder
+-------------------------------
+.. py:class:: paddle.fluid.DataFeeder(feed_list, place, program=None)
+:api_attr: 声明式编程模式（静态图)
+``DataFeeder`` 负责将reader(读取器)返回的数据转成一种特殊的数据结构，使它们可以输入到 ``Executor`` 和 ``ParallelExecutor`` 中。
+reader通常返回一个minibatch条目列表。在列表中每一条目都是一个样本（sample）,它是由具有一至多个特征的列表或元组组成的。
+以下是简单用法：
+.. code-block:: python
+  import paddle.fluid as fluid
+  place = fluid.CPUPlace()
+  img = fluid.layers.data(name='image', shape=[1, 28, 28])
+  label = fluid.layers.data(name='label', shape=[1], dtype='int64')
+  feeder = fluid.DataFeeder([img, label], fluid.CPUPlace())
+  result = feeder.feed([([0] * 784, [9]), ([1] * 784, [1])])
+在多GPU模型训练时，如果需要提前分别向各GPU输入数据，可以使用 ``decorate_reader`` 函数。
+.. code-block:: python
+  import paddle
+  import paddle.fluid as fluid
+  place=fluid.CUDAPlace(0)
+  data = fluid.layers.data(name='data', shape=[3, 224, 224], dtype='float32')
+  label = fluid.layers.data(name='label', shape=[1], dtype='int64')
+  feeder = fluid.DataFeeder(place=place, feed_list=[data, label])
+  reader = feeder.decorate_reader(
+        paddle.batch(paddle.dataset.flowers.train(), batch_size=16), multi_devices=False)
+参数：
+    - **feed_list** (list) – 向模型输入的变量表或者变量表名
+    - **place** (Place) – place表明是向GPU还是CPU中输入数据。如果想向GPU中输入数据, 请使用 ``fluid.CUDAPlace(i)`` (i 代表 the GPU id)；如果向CPU中输入数据, 请使用  ``fluid.CPUPlace()``
+    - **program** (Program) – 需要向其中输入数据的Program。如果为None, 会默认使用 ``default_main_program()``。 缺省值为None
+抛出异常:
+  - ``ValueError``  – 如果一些变量不在此 Program 中
+**代码示例**
+.. code-block:: python
+  import numpy as np
+  import paddle
+  import paddle.fluid as fluid
+  place = fluid.CPUPlace()
+  def reader():
+      yield [np.random.random([4]).astype('float32'), np.random.random([3]).astype('float32')],
+  main_program = fluid.Program()
+  startup_program = fluid.Program()
+  with fluid.program_guard(main_program, startup_program):
+        data_1 = fluid.layers.data(name='data_1', shape=[1, 2, 2])
+        data_2 = fluid.layers.data(name='data_2', shape=[1, 1, 3])
+        out = fluid.layers.fc(input=[data_1, data_2], size=2)
+        # ...
+  feeder = fluid.DataFeeder([data_1, data_2], place)
+  exe = fluid.Executor(place)
+  exe.run(startup_program)
+  for data in reader():
+      outs = exe.run(program=main_program,
+                     feed=feeder.feed(data),
+                     fetch_list=[out])
+.. py:method:: feed(iterable)
+根据feed_list（数据输入表）和iterable（可遍历的数据）提供的信息，将输入数据转成一种特殊的数据结构，使它们可以输入到 ``Executor`` 和 ``ParallelExecutor`` 中。
+参数:
+  - **iterable** (list|tuple) – 要输入的数据
+返回：  转换结果
+返回类型: dict
+**代码示例**
+.. code-block:: python
+    import numpy.random as random
+    import paddle.fluid as fluid
+    def reader(limit=5):
+        for i in range(limit):
+            yield random.random([784]).astype('float32'), random.random([1]).astype('int64'), random.random([256]).astype('float32')
+    data_1 = fluid.layers.data(name='data_1', shape=[1, 28, 28])
+    data_2 = fluid.layers.data(name='data_2', shape=[1], dtype='int64')
+    data_3 = fluid.layers.data(name='data_3', shape=[16, 16], dtype='float32')
+    feeder = fluid.DataFeeder(['data_1','data_2', 'data_3'], fluid.CPUPlace())
+    result = feeder.feed(reader())
+.. py:method:: feed_parallel(iterable, num_places=None)
+该方法获取的多个minibatch，并把每个minibatch提前输入进各个设备中。
+参数:
+    - **iterable** (list|tuple) – 要输入的数据
+    - **num_places** (int) – 设备数目。默认为None。
+返回: 转换结果
+返回类型: dict
+.. note::
+     设备（CPU或GPU）的数目必须等于minibatch的数目
+**代码示例**
+.. code-block:: python
+    import numpy.random as random
+    import paddle.fluid as fluid
+    def reader(limit=10):
+        for i in range(limit):
+            yield [random.random([784]).astype('float32'), random.random([1]).astype('float32')],
+    x = fluid.layers.data(name='x', shape=[1, 28, 28])
+    y = fluid.layers.data(name='y', shape=[1], dtype='float32')
+    fluid.layers.elementwise_add(x, y)
+    feeder = fluid.DataFeeder(['x','y'], fluid.CPUPlace())
+    place_num = 2
+    places = [fluid.CPUPlace() for x in range(place_num)]
+    data = []
+    exe = fluid.Executor(fluid.CPUPlace())
+    exe.run(fluid.default_startup_program())
+    program = fluid.CompiledProgram(fluid.default_main_program()).with_data_parallel(places=places)
+    for item in reader():
+        data.append(item)
+        if place_num == len(data):
+            exe.run(program=program, feed=list(feeder.feed_parallel(data, place_num)), fetch_list=[])
+            data = []
+.. py:method::  decorate_reader(reader, multi_devices, num_places=None, drop_last=True)
+将reader返回的输入数据batch转换为多个mini-batch，之后每个mini-batch都会被输入进各个设备（CPU或GPU）中。
+参数：
+        - **reader** (fun) – 该参数是一个可以生成数据的函数
+        - **multi_devices** (bool) – bool型，指明是否使用多个设备
+        - **num_places** (int) – 如果 ``multi_devices`` 为 ``True`` , 可以使用此参数来设置GPU数目。如果 ``multi_devices`` 为 ``None`` ，该函数默认使用当前训练机所有GPU设备。默认为None。
+        - **drop_last** (bool) – 如果最后一个batch的大小比 ``batch_size`` 要小，则可使用该参数来指明是否选择丢弃最后一个batch数据。 默认为 ``True``
+返回：转换结果
+返回类型: dict
+抛出异常： ``ValueError`` – 如果 ``drop_last`` 值为False并且data batch与设备不匹配时，产生此异常
+**代码示例**
+.. code-block:: python
+    import numpy.random as random
+    import paddle
+    import paddle.fluid as fluid
+    def reader(limit=5):
+        for i in range(limit):
+            yield (random.random([784]).astype('float32'), random.random([1]).astype('int64')),
+    place=fluid.CPUPlace()
+    data = fluid.layers.data(name='data', shape=[1, 28, 28], dtype='float32')
+    label = fluid.layers.data(name='label', shape=[1], dtype='int64')
+    feeder = fluid.DataFeeder(place=place, feed_list=[data, label])
+    reader = feeder.decorate_reader(reader, multi_devices=False)
+    exe = fluid.Executor(place)
+    exe.run(fluid.default_startup_program())
+    for data in reader():
+        exe.run(feed=data)
--- a/doc/paddle/api/paddle/fluid/DistributeTranspilerConfig_cn.rst
+++ b/doc/paddle/api/paddle/fluid/DistributeTranspilerConfig_cn.rst
+.. _cn_api_fluid_DistributeTranspilerConfig:
+DistributeTranspilerConfig
+-------------------------------
+.. py:class:: paddle.fluid.DistributeTranspilerConfig
+.. py:attribute:: slice_var_up (bool)
+为多个Pserver（parameter server）将tensor切片, 默认为True。
+.. py:attribute:: split_method (PSDispatcher)
+可使用 RoundRobin 或者 HashName。
+注意: 尝试选择最佳方法来达到Pserver间负载均衡。
+.. py:attribute:: min_block_size (int)
+block中分割(split)出的元素个数的最小值。
+注意: 根据：`issuecomment-369912156 <https://github.com/PaddlePaddle/Paddle/issues/8638#issuecomment-369912156>`_ , 当数据块大小超过2MB时，我们可以有效地使用带宽。如果你想更改它，请详细查看 ``slice_variable`` 函数。
+**代码示例**
+.. code-block:: python
+    import paddle.fluid as fluid
+    config = fluid.DistributeTranspilerConfig()
+    config.slice_var_up = True
--- a/doc/paddle/api/paddle/fluid/DistributeTranspiler_cn.rst
+++ b/doc/paddle/api/paddle/fluid/DistributeTranspiler_cn.rst
+.. _cn_api_fluid_transpiler_DistributeTranspiler:
+DistributeTranspiler
+-------------------------------
+.. py:class:: paddle.fluid.DistributeTranspiler (config=None)
+该类可以把fluid program转变为分布式数据并行计算的program, 有PServer和NCCL2两种模式。
+在Pserver（全称：parameter server）模式下， 通过 ``transpile`` 将用于单机训练的 ``program``  转译为可用于parameter server的分布式架构(即PServer,参数服务器)来进行训练的program。
+在NCCL2模式下, 通过 ``transpile`` 将用于单机训练的 ``program``  转译为可用于NCCL2的分布式架构来进行训练的program。在NCCL2模式下，transpiler会在 ``startup_program`` 中附加一个 ``NCCL_ID`` 广播
+算子（broadcasting operators）来实现在该集群中所有工作结点共享``NCCL_ID`` 。 调用 ``transpile_nccl2`` 后， 你 **必须** 将 ``trainer_id`` , ``num_trainers`` 参数提供给 ``Executor`` 来启动NCCL2分布式模式。
+参数：
+        - **config** （DistributeTranspilerConfig） DistributeTranspiler属性配置实例，定义了program转变所需要的属性, 请参考：`DistributeTranspilerConfig` 相关文档。
+返回：初始化后的DistributeTranspiler实例
+返回类型：实例（DistributeTranspiler）
+**代码示例**
+.. code-block:: python
+    x = fluid.layers.data(name='x', shape=[13], dtype='float32')
+    y = fluid.layers.data(name='y', shape=[1], dtype='float32')
+    y_predict = fluid.layers.fc(input=x, size=1, act=None)
+    cost = fluid.layers.square_error_cost(input=y_predict, label=y)
+    avg_loss = fluid.layers.mean(cost)
+    sgd_optimizer = fluid.optimizer.SGD(learning_rate=0.001)
+    sgd_optimizer.minimize(avg_loss)
+    # pserver 模式下
+    pserver_endpoints = "192.168.0.1:6174,192.168.0.2:6174"
+    trainer_endpoints = "192.168.0.1:6174,192.168.0.2:6174"
+    current_endpoint = "192.168.0.1:6174"
+    trainer_id = 0
+    trainers = 4
+    role = "PSERVER"
+    t = fluid.DistributeTranspiler()
+    t.transpile(
+         trainer_id, pservers=pserver_endpoints, trainers=trainers)
+    if role == "PSERVER":
+         pserver_program = t.get_pserver_program(current_endpoint)
+         pserver_startup_program = t.get_startup_program(current_endpoint,
+                                                        pserver_program)
+    elif role == "TRAINER":
+         trainer_program = t.get_trainer_program()
+    # nccl2 模式下
+    trainer_num = 2
+    trainer_id = 0
+    config = fluid.DistributeTranspilerConfig()
+    config.mode = "nccl2"
+    trainer_endpoints = "192.168.0.1:6174,192.168.0.2:6174"
+    t = fluid.DistributeTranspiler(config=config)
+    t.transpile(trainer_id=trainer_id, trainers=trainer_endpoints, current_endpoint="192.168.0.1:6174")
+    exe = fluid.ParallelExecutor(
+        use_cuda=True,
+        loss_name=avg_loss.name,
+        num_trainers=trainer_num,
+        trainer_id=trainer_id
+    )
+.. py:method:: transpile(trainer_id, program=None, pservers='127.0.0.1:6174', trainers=1, sync_mode=True, startup_program=None, current_endpoint='127.0.0.1:6174')
+通过此方法，可根据用户配置将单机的program转换为当前节点可用的数据并行的分布式program。
+参数:
+    - **trainer_id** (int) – 当前Trainer worker的id, 如果有n个Trainer worker, id 取值范围为0 ~ n-1
+    - **program** (Program|None) – 待transpile（转译）的program, 缺省为 ``fluid.default_main_program()``
+    - **startup_program** (Program|None) - 要转译的 ``startup_program`` ,默认为 ``fluid.default_startup_program()``
+    - **pservers** (str) – 内容为Pserver列表的字符串，格式为：按逗号区分不同的Pserver，每个Pserver的格式为 *ip地址:端口号*
+    - **trainers** (int|str) – 在Pserver模式下，该参数指Trainer机的个数；在nccl2模式下，它是一个内容为Trainer终端列表的字符串
+    - **sync_mode** (bool) – 是否做同步训练(synchronous training), 默认为True
+    - **startup_program** (Program|None) – 待transpile（转译）的startup_program，默认为 ``fluid.default_main_program()``
+    - **current_endpoint** (str) – 当需要把program转译（transpile）至NCCL2模式下时，需要将当前endpoint（终端）传入该参数。PServer模型下，当用户需要使用增量训练时，必须要指定该参数。
+返回：None
+**代码示例**
+.. code-block:: python
+    transpiler = fluid.DistributeTranspiler()
+    t.transpile(
+        trainer_id=0,
+        pservers="127.0.0.1:7000,127.0.0.1:7001",
+        trainers=2,
+        sync_mode=False,
+        current_endpoint="127.0.0.1:7000")
+.. py:method:: get_trainer_program(wait_port=True)
+该方法可以得到Trainer侧的program。
+返回:    Trainer侧的program
+返回类型:    Program
+**代码示例**
+.. code-block:: python
+        import paddle.fluid as fluid
+        # 这是一个示例，请根据你的情况更改endpoint
+        pserver_endpoints = "192.168.0.1:6174,192.168.0.2:6174"
+        trainer_id = 0
+        trainers = 4
+        t = fluid.DistributeTranspiler()
+        t.transpile(trainer_id, trainers=trainers, pservers=pserver_endpoints)
+        trainer_program = t.get_trainer_program()
+.. py:method:: get_pserver_program(endpoint)
+该方法可以得到Pserver（参数服务器）侧的程序
+参数:
+    - **endpoint** (str) – 当前Pserver终端
+返回:    当前Pserver需要执行的program
+返回类型:    Program
+**代码示例**
+.. code-block:: python
+          import paddle.fluid as fluid
+          # 这是一个示例，请根据你的情况更改endpoint
+          pserver_endpoints = "192.168.0.1:6174,192.168.0.2:6174"
+          current_endpoint = "192.168.0.1:6174"
+          trainer_id = 0
+          trainers = 4
+          t = fluid.DistributeTranspiler()
+          t.transpile(
+               trainer_id, pservers=pserver_endpoints, trainers=trainers)
+          pserver_program = t.get_pserver_program(current_endpoint)
+.. py:method:: get_pserver_programs(endpoint)
+该方法可以得到Pserver侧用于分布式训练的 ``main_program`` 和 ``startup_program`` 。
+参数:
+    - **endpoint** (str) – 当前Pserver终端
+返回:    (main_program, startup_program), “Program”类型的元组
+返回类型:    tuple
+**代码示例**
+.. code-block:: python
+          import paddle.fluid as fluid
+          # 这是一个示例，请根据你的情况更改endpoint
+          pserver_endpoints = "192.168.0.1:6174,192.168.0.2:6174"
+          current_endpoint = "192.168.0.1:6174"
+          trainer_id = 0
+          trainers = 4
+          t = fluid.DistributeTranspiler()
+          t.transpile(
+               trainer_id, pservers=pserver_endpoints, trainers=trainers)
+          pserver_program, pserver_startup_program = t.get_pserver_programs(current_endpoint)
+.. py:method:: get_startup_program(endpoint, pserver_program=None, startup_program=None)
+**该函数已停止使用**
+获取当前Pserver的startup_program，如果有多个被分散到不同blocks的变量，则修改operator的输入变量。
+参数:
+    - **endpoint** (str) – 当前Pserver终端
+    - **pserver_program** (Program) – 已停止使用。 先调用get_pserver_program
+    - **startup_program** (Program) – 已停止使用。应在初始化时传入startup_program
+返回:    Pserver侧的startup_program
+返回类型:    Program
+**代码示例**
+.. code-block:: python
+          pserver_endpoints = "192.168.0.1:6174,192.168.0.2:6174"
+          trainer_endpoints = "192.168.0.1:6174,192.168.0.2:6174"
+          current_endpoint = "192.168.0.1:6174"
+          trainer_id = 0
+          trainers = 4
+          t = fluid.DistributeTranspiler()
+          t.transpile(trainer_id, pservers=pserver_endpoints, trainers=trainers)
+          pserver_program = t.get_pserver_program(current_endpoint)
+          pserver_startup_program = t.get_startup_program(current_endpoint,
+                                                          pserver_program)
--- a/doc/paddle/api/paddle/fluid/LoDTensorArray_cn.rst
+++ b/doc/paddle/api/paddle/fluid/LoDTensorArray_cn.rst
+.. _cn_api_fluid_LoDTensorArray:
+LoDTensorArray
+-------------------------------
+.. py:class:: paddle.fluid.LoDTensorArray
+LoDTensorArray是由LoDTensor组成的数组，支持"[]"运算符、len()函数和for迭代等。
+**示例代码**
+.. code-block:: python
+        import paddle.fluid as fluid
+        arr = fluid.LoDTensorArray()   
+.. py:method:: append(self: paddle.fluid.core_avx.LoDTensorArray, tensor: paddle.fluid.core.LoDTensor) → None
+该接口将LoDTensor追加到LoDTensorArray后。
+参数：
+  - **tensor** (LoDTensor) - 追加的LoDTensor。
+返回：无。
+**示例代码**
+.. code-block:: python
+            import paddle.fluid as fluid
+            import numpy as np
+            arr = fluid.LoDTensorArray()
+            t = fluid.LoDTensor()
+            t.set(np.ndarray([5, 30]), fluid.CPUPlace())
+            arr.append(t)
--- a/doc/paddle/api/paddle/fluid/LoDTensor_cn.rst
+++ b/doc/paddle/api/paddle/fluid/LoDTensor_cn.rst
+.. _cn_api_fluid_LoDTensor:
+LoDTensor
+-------------------------------
+.. py:class:: paddle.fluid.LoDTensor
+LoDTensor是一个具有LoD（Level of Details）信息的张量（Tensor），可用于表示变长序列，详见 :ref:`cn_user_guide_lod_tensor` 。
+LoDTensor可以通过 ``np.array(lod_tensor)`` 方法转换为numpy.ndarray。
+如果您不需要了解LoDTensor的细节，可以跳过以下的注解。
+下面以两个例子说明如何用LoDTensor表示变长序列。
+示例1：
+假设x为一个表示变长序列的LoDTensor，它包含2个逻辑子序列，第一个序列长度是2（样本数量为2），第二个序列长度是3，总序列长度为5。
+第一个序列的数据为[1, 2], [3, 4]，第二个序列的数据为[5, 6], [7, 8], [9, 10]，每个样本数据的维度均是2，该LoDTensor最终的shape为[5, 2]，其中5为总序列长度，2为每个样本数据的维度。
+在逻辑上，我们可以用两种方式表示该变长序列，一种是递归序列长度的形式，即x.recursive_sequence_length = [[2, 3]]；另一种是偏移量的形式，即x.lod = [[0, 2, 2+3]]。
+这两种表示方式是等价的，您可以通过LoDTensor的相应接口来设置和获取recursive_sequence_length或LoD。
+在实现上，为了获得更快的序列访问速度，Paddle采用了偏移量的形式来存储不同的序列长度。因此，对recursive_sequence_length的操作最终将转换为对LoD的操作。
+::
+  x.data = [[1, 2], [3, 4], 
+            [5, 6], [7, 8], [9, 10]]
+  x.shape = [5, 2]
+  x.recursive_sequence_length = [[2, 3]]
+  x.lod  =  [[0, 2, 5]] 
+示例2：
+LoD可以有多个level（例如，一个段落可以有多个句子，一个句子可以有多个单词）。假设y为LoDTensor ，lod_level为2。从level=0来看有2个逻辑序列，序列长度分别为2和1，表示第一个逻辑序列包含2个子序列，第二个逻辑序列包含1个子序列。从level=1来看，第一个逻辑序列包含的2个子序列长度分别为2和2，第二个逻辑序列包含的1个子序列长度为3。
+因此，该LoDTensor以递归序列长度形式表示为 y.recursive_sequence_length = [[2, 1], [2, 2, 3]]；相应地，以偏移量形式表示为 y.lod = [[0, 2, 3], [0, 2, 4, 7]]。
+::
+  y.data = [[1, 2], [3, 4], 
+            [5, 6], [7, 8], 
+            [9, 10], [11, 12], [13, 14]]
+  y.shape = [2+2+3, 2]
+  y.recursive_sequence_length = [[2, 1], [2, 2, 3]]
+  y.lod = [[0, 2, 3], [0, 2, 4, 7]]
+**示例代码**
+.. code-block:: python
+      import paddle.fluid as fluid
+      t = fluid.LoDTensor()
+.. py:method:: has_valid_recursive_sequence_lengths(self: paddle.fluid.core_avx.LoDTensor) → bool
+该接口检查LoDTensor的LoD的正确性。
+返回：   是否带有正确的LoD。
+返回类型：  bool。
+**示例代码**
+.. code-block:: python
+            import paddle.fluid as fluid
+            import numpy as np
+            t = fluid.LoDTensor()
+            t.set(np.ndarray([5, 30]), fluid.CPUPlace())
+            t.set_recursive_sequence_lengths([[2, 3]])
+            print(t.has_valid_recursive_sequence_lengths())  # True
+.. py:method::  lod(self: paddle.fluid.core_avx.LoDTensor) → List[List[int]]
+该接口返回LoDTensor的LoD。
+返回：LoDTensor的LoD。
+返回类型：List [List [int]]。
+**示例代码**
+.. code-block:: python
+            import paddle.fluid as fluid
+            import numpy as np
+            t = fluid.LoDTensor()
+            t.set(np.ndarray([5, 30]), fluid.CPUPlace())
+            t.set_lod([[0, 2, 5]])
+            print(t.lod()) # [[0, 2, 5]]
+.. py:method:: recursive_sequence_lengths(self: paddle.fluid.core_avx.LoDTensor) → List[List[int]]
+该接口返回与LoDTensor的LoD对应的递归序列长度。
+返回：LoDTensor的LoD对应的递归序列长度。
+返回类型：List [List [int]]。
+**示例代码**
+.. code-block:: python
+            import paddle.fluid as fluid
+            import numpy as np
+            t = fluid.LoDTensor()
+            t.set(np.ndarray([5, 30]), fluid.CPUPlace())
+            t.set_recursive_sequence_lengths([[2, 3]])
+            print(t.recursive_sequence_lengths())  # [[2, 3]]
+.. py:method::  set(*args, **kwargs)
+该接口根据输入的numpy array和设备place，设置LoDTensor的数据。
+重载函数：
+1. set(self: paddle.fluid.core_avx.Tensor, array: numpy.ndarray[float32], place: paddle::platform::CPUPlace) -> None
+2. set(self: paddle.fluid.core_avx.Tensor, array: numpy.ndarray[int32], place: paddle::platform::CPUPlace) -> None
+3. set(self: paddle.fluid.core_avx.Tensor, array: numpy.ndarray[float64], place: paddle::platform::CPUPlace) -> None
+4. set(self: paddle.fluid.core_avx.Tensor, array: numpy.ndarray[int64], place: paddle::platform::CPUPlace) -> None
+5. set(self: paddle.fluid.core_avx.Tensor, array: numpy.ndarray[bool], place: paddle::platform::CPUPlace) -> None
+6. set(self: paddle.fluid.core_avx.Tensor, array: numpy.ndarray[uint16], place: paddle::platform::CPUPlace) -> None
+7. set(self: paddle.fluid.core_avx.Tensor, array: numpy.ndarray[uint8], place: paddle::platform::CPUPlace) -> None
+8. set(self: paddle.fluid.core_avx.Tensor, array: numpy.ndarray[int8], place: paddle::platform::CPUPlace) -> None
+9. set(self: paddle.fluid.core_avx.Tensor, array: numpy.ndarray[float32], place: paddle::platform::CUDAPlace) -> None
+10. set(self: paddle.fluid.core_avx.Tensor, array: numpy.ndarray[int32], place: paddle::platform::CUDAPlace) -> None
+11. set(self: paddle.fluid.core_avx.Tensor, array: numpy.ndarray[float64], place: paddle::platform::CUDAPlace) -> None
+12. set(self: paddle.fluid.core_avx.Tensor, array: numpy.ndarray[int64], place: paddle::platform::CUDAPlace) -> None
+13. set(self: paddle.fluid.core_avx.Tensor, array: numpy.ndarray[bool], place: paddle::platform::CUDAPlace) -> None
+14. set(self: paddle.fluid.core_avx.Tensor, array: numpy.ndarray[uint16], place: paddle::platform::CUDAPlace) -> None
+15. set(self: paddle.fluid.core_avx.Tensor, array: numpy.ndarray[uint8], place: paddle::platform::CUDAPlace) -> None
+16. set(self: paddle.fluid.core_avx.Tensor, array: numpy.ndarray[int8], place: paddle::platform::CUDAPlace) -> None
+17. set(self: paddle.fluid.core_avx.Tensor, array: numpy.ndarray[float32], place: paddle::platform::CUDAPinnedPlace) -> None
+18. set(self: paddle.fluid.core_avx.Tensor, array: numpy.ndarray[int32], place: paddle::platform::CUDAPinnedPlace) -> None
+19. set(self: paddle.fluid.core_avx.Tensor, array: numpy.ndarray[float64], place: paddle::platform::CUDAPinnedPlace) -> None
+20. set(self: paddle.fluid.core_avx.Tensor, array: numpy.ndarray[int64], place: paddle::platform::CUDAPinnedPlace) -> None
+21. set(self: paddle.fluid.core_avx.Tensor, array: numpy.ndarray[bool], place: paddle::platform::CUDAPinnedPlace) -> None
+22. set(self: paddle.fluid.core_avx.Tensor, array: numpy.ndarray[uint16], place: paddle::platform::CUDAPinnedPlace) -> None
+23. set(self: paddle.fluid.core_avx.Tensor, array: numpy.ndarray[uint8], place: paddle::platform::CUDAPinnedPlace) -> None
+24. set(self: paddle.fluid.core_avx.Tensor, array: numpy.ndarray[int8], place: paddle::platform::CUDAPinnedPlace) -> None
+参数：
+    - **array** (numpy.ndarray) - 要设置的numpy array，支持的数据类型为bool, float32, float64, int8, int32, int64, uint8, uint16。
+    - **place** (CPUPlace|CUDAPlace|CUDAPinnedPlace) - 要设置的LoDTensor所在的设备。
+返回：无。
+**示例代码**
+.. code-block:: python
+            import paddle.fluid as fluid
+            import numpy as np
+            t = fluid.LoDTensor()
+            t.set(np.ndarray([5, 30]), fluid.CPUPlace())
+.. py:method::  set_lod(self: paddle.fluid.core_avx.LoDTensor, lod: List[List[int]]) → None
+该接口设置LoDTensor的LoD。
+参数：
+    - **lod** （List [List [int]]） - 要设置的LoD。
+返回：无。
+**示例代码**
+.. code-block:: python
+            import paddle.fluid as fluid
+            import numpy as np
+            t = fluid.LoDTensor()
+            t.set(np.ndarray([5, 30]), fluid.CPUPlace())
+            t.set_lod([[0, 2, 5]])
+            print(t.lod())  # [[0, 2, 5]]
+.. py:method::  set_recursive_sequence_lengths(self: paddle.fluid.core_avx.LoDTensor, recursive_sequence_lengths: List[List[int]]) → None
+该接口根据递归序列长度 ``recursive_sequence_lengths`` 设置LoDTensor的LoD。
+例如，如果 ``recursive_sequence_lengths = [[2, 3]]``，意味着有两个长度分别为2和3的序列，相应的LoD是[[0, 2, 2 + 3]]，即[[0, 2, 5]]。
+参数：
+  - **recursive_sequence_lengths** (List [List [int]]) - 递归序列长度。
+返回：无。
+**示例代码**
+.. code-block:: python
+            import paddle.fluid as fluid
+            import numpy as np
+            t = fluid.LoDTensor()
+            t.set(np.ndarray([5, 30]), fluid.CPUPlace())
+            t.set_recursive_sequence_lengths([[2, 3]])
+            print(t.recursive_sequence_length())  # [[2, 3]]
+            print(t.lod())  # [[0, 2, 5]]
+.. py:method::  shape(self: paddle.fluid.core_avx.Tensor) → List[int]
+该接口返回LoDTensor的shape。
+返回：LoDTensor的shape。
+返回类型：List[int] 。
+**示例代码**
+.. code-block:: python
+            import paddle.fluid as fluid
+            import numpy as np
+            t = fluid.LoDTensor()
+            t.set(np.ndarray([5, 30]), fluid.CPUPlace())
+            print(t.shape())  # [5, 30]
--- a/doc/paddle/api/paddle/fluid/Tensor_cn.rst
+++ b/doc/paddle/api/paddle/fluid/Tensor_cn.rst
+.. _cn_api_fluid_Tensor:
+Tensor
+-------------------------------
+.. py:function:: paddle.fluid.Tensor
+Tensor用于表示多维张量，可以通过 ``np.array(tensor)`` 方法转换为numpy.ndarray。
+**示例代码**
+.. code-block:: python
+      import paddle.fluid as fluid
+      t = fluid.Tensor()
+.. py:method::  set(array, place, zero_copy=False)
+该接口根据输入的numpy array和设备place，设置Tensor的数据。
+参数：
+    - **array** (numpy.ndarray) - 要设置的numpy array，支持的数据类型为bool, float32, float64, int8, int32, int64, uint8, uint16。
+    - **place** (CPUPlace|CUDAPlace|CUDAPinnedPlace) - 要设置的Tensor所在的设备。
+    - **zero_copy** (bool，可选) - 是否与输入的numpy数组共享内存。此参数仅适用于CPUPlace。默认值为False。
+返回：无。
+**示例代码**
+.. code-block:: python
+            import paddle.fluid as fluid
+            import numpy as np
+            t = fluid.Tensor()
+            t.set(np.ndarray([5, 30]), fluid.CPUPlace())
+.. py:method::  shape(self: paddle.fluid.core_avx.Tensor) → List[int]
+该接口返回Tensor的shape。
+返回：Tensor的shape。
+返回类型：List[int] 。
+**示例代码**
+.. code-block:: python
+            import paddle.fluid as fluid
+            import numpy as np
+            t = fluid.Tensor()
+            t.set(np.ndarray([5, 30]), fluid.CPUPlace())
+            print(t.shape())  # [5, 30]
\ No newline at end of file
--- a/doc/paddle/api/paddle/static/append_backward_cn.rst
+++ b/doc/paddle/api/paddle/static/append_backward_cn.rst
--- a/doc/paddle/api/paddle/static/gradients_cn.rst
+++ b/doc/paddle/api/paddle/static/gradients_cn.rst
--- a/doc/paddle/api/paddle/fluid/clip/ErrorClipByValue_cn.rst
+++ b/doc/paddle/api/paddle/fluid/clip/ErrorClipByValue_cn.rst
+.. _cn_api_fluid_clip_ErrorClipByValue:
+ErrorClipByValue
+-------------------------------
+.. py:class:: paddle.fluid.clip.ErrorClipByValue(max, min=None)
+给定一个 Tensor  ``t`` （该 Tensor 传入方式见代码示例），对 Tensor 中的元素超出给定最大 ``max`` 和最小界 ``min`` 内区间范围 [min, max] 的元素，重设为所超出界的界值。
+- 任何小于min（最小值）的值都被设置为 ``min``
+- 任何大于max（最大值）的值都被设置为 ``max``
+参数:
+ - **max** (foat) - 要修剪的最大值。
+ - **min** (float) - 要修剪的最小值。如果用户没有设置，将被框架默认设置为 ``-max`` 。
+**代码示例**
+.. code-block:: python
+     import paddle.fluid as fluid
+     BATCH_SIZE = 128
+     CLIP_MAX = 2e-6
+     CLIP_MIN = -1e-6
+     prog = fluid.framework.Program()
+     with fluid.program_guard(main_program=prog):
+         image = fluid.layers.data(name='x', shape=[784], dtype='float32')
+         hidden1 = fluid.layers.fc(input=image, size=128, act='relu')
+         hidden2 = fluid.layers.fc(input=hidden1, size=64, act='relu')
+         predict = fluid.layers.fc(input=hidden2, size=10, act='softmax')
+         label = fluid.layers.data(name='y', shape=[1], dtype='int64')
+         cost = fluid.layers.cross_entropy(input=predict, label=label)
+         avg_cost = fluid.layers.mean(cost)
+     prog_clip = prog.clone()
+     prog_clip.block(0).var(hidden1.name)._set_error_clip(
+         fluid.clip.ErrorClipByValue(max=CLIP_MAX, min=CLIP_MIN))
--- a/doc/paddle/api/paddle/nn/GradientClipByGlobalNorm_cn.rst
+++ b/doc/paddle/api/paddle/nn/GradientClipByGlobalNorm_cn.rst
--- a/doc/paddle/api/paddle/nn/GradientClipByNorm_cn.rst
+++ b/doc/paddle/api/paddle/nn/GradientClipByNorm_cn.rst
--- a/doc/paddle/api/paddle/nn/GradientClipByValue_cn.rst
+++ b/doc/paddle/api/paddle/nn/GradientClipByValue_cn.rst
--- a/doc/paddle/api/paddle/fluid/clip/set_gradient_clip_cn.rst
+++ b/doc/paddle/api/paddle/fluid/clip/set_gradient_clip_cn.rst
+.. _cn_api_fluid_clip_set_gradient_clip:
+set_gradient_clip
+-------------------------------
+.. py:function:: paddle.fluid.clip.set_gradient_clip(clip, param_list=None, program=None)
+:api_attr: 声明式编程模式（静态图)
+.. warning::
+    此API对位置使用的要求较高，其必须位于组建网络之后， ``minimize`` 之前，因此在未来版本中可能被删除，故不推荐使用。推荐在 ``optimizer`` 初始化时设置梯度裁剪。
+    有三种裁剪策略： :ref:`cn_api_fluid_clip_GradientClipByGlobalNorm` 、 :ref:`cn_api_fluid_clip_GradientClipByNorm` 、 :ref:`cn_api_fluid_clip_GradientClipByValue` 。
+    如果在 ``optimizer`` 中设置过梯度裁剪，又使用了 ``set_gradient_clip`` ，``set_gradient_clip`` 将不会生效。
+给指定参数做梯度裁剪。
+参数:
+    - **clip** (GradientClipBase) - 梯度裁剪的策略，如 :ref:`cn_api_fluid_clip_GradientClipByGlobalNorm` 等，用于描述具体的裁剪方法和属性。
+    - **param_list** (list(Variable)，可选) - 需要裁剪的参数列表，可以是参数或参数名称列表。默认值为None，表示裁剪 ``program`` 中的所有参数。
+    - **program** (Program，可选) - 参数所在的Program。默认值为None，表示使用 :ref:`cn_api_fluid_default_main_program` 。
+返回: 无。
+**代码示例**
+.. code-block:: python
+    import paddle.fluid as fluid
+    def network():
+        image = fluid.layers.data(name='image', shape=[28], dtype='float32')
+        param_attr1 = fluid.ParamAttr("fc1_param")
+        fc1 = fluid.layers.fc(image, size=10, param_attr=param_attr1)
+        param_attr2 = fluid.ParamAttr("fc2_param")
+        fc2 = fluid.layers.fc(fc1, size=10, param_attr=param_attr2)
+        loss = fluid.layers.reduce_mean(fc2)
+        return loss
+    # network 1: clip all parameter gradient
+    with fluid.program_guard(fluid.Program(), fluid.Program()):
+        loss = network()
+        fluid.clip.set_gradient_clip(
+            fluid.clip.GradientClipByGlobalNorm(clip_norm=2.0))
+        sgd = fluid.optimizer.SGD(learning_rate=1e-3)
+        sgd.minimize(loss)
+    # network 2: clip parameter gradient by name
+    with fluid.program_guard(fluid.Program(), fluid.Program()):
+        loss = network()
+        fluid.clip.set_gradient_clip(
+            fluid.clip.GradientClipByValue(min=-1.0, max=1.0),
+            param_list=["fc1_param", "fc2_param"])
+        sgd = fluid.optimizer.SGD(learning_rate=1e-3)
+        sgd.minimize(loss)
+    # network 3: clip parameter gradient by var
+    with fluid.program_guard(fluid.Program(), fluid.Program()):
+        loss = network()
+        param_var1 = fluid.default_main_program().global_block().var("fc1_param")
+        param_var2 = fluid.default_main_program().global_block().var("fc2_param")
+        fluid.clip.set_gradient_clip(
+            fluid.clip.GradientClipByValue(min=-1.0, max=1.0),
+            param_list=[param_var1, param_var2])
+        sgd = fluid.optimizer.SGD(learning_rate=1e-3)
+        sgd.minimize(loss)
+    # network 4: use set_gradient_clip and minimize(grad_clip=clip) together
+    with fluid.program_guard(fluid.Program(), fluid.Program()):
+        loss = network()
+        param_var1 = fluid.default_main_program().global_block().var("fc1_param")
+        param_var2 = fluid.default_main_program().global_block().var("fc2_param")
+        clip1 = fluid.clip.GradientClipByValue(min=-1.0, max=1.0)
+        clip2 = fluid.clip.GradientClipByNorm(clip_norm=1.0)
+        # 设置梯度裁剪策略：clip1
+        fluid.clip.set_gradient_clip(clip1)
+        # 设置梯度裁剪策略：clip2
+        sgd = fluid.optimizer.SGD(learning_rate=1e-3, grad_clip=clip2)
+        sgd.minimize(loss)
+        # 有设置冲突时，set_gradient_clip将不会生效，将以clip2的策略进行梯度裁剪
--- a/doc/paddle/api/paddle/nn/clip_cn.rst
+++ b/doc/paddle/api/paddle/nn/clip_cn.rst
--- a/doc/paddle/api/paddle/static/BuildStrategy_cn.rst
+++ b/doc/paddle/api/paddle/static/BuildStrategy_cn.rst
--- a/doc/paddle/api/paddle/static/CompiledProgram_cn.rst
+++ b/doc/paddle/api/paddle/static/CompiledProgram_cn.rst
--- a/doc/paddle/api/paddle/static/ExecutionStrategy_cn.rst
+++ b/doc/paddle/api/paddle/static/ExecutionStrategy_cn.rst
--- a/doc/paddle/api/paddle/fluid/contrib/BeamSearchDecoder_cn.rst
+++ b/doc/paddle/api/paddle/fluid/contrib/BeamSearchDecoder_cn.rst
+.. _cn_api_fluid_layers_BeamSearchDecoder:
+BeamSearchDecoder
+-------------------------------
+.. py:class:: paddle.fluid.layers.BeamSearchDecoder(cell, start_token, end_token, beam_size, embedding_fn=None, output_fn=None)
+:api_attr: 声明式编程模式（静态图)
+带beam search解码策略的解码器。该接口包装一个cell来计算概率，然后执行一个beam search步骤计算得分，并为每个解码步骤选择候选输出。更多详细信息请参阅 `Beam search <https://en.wikipedia.org/wiki/Beam_search>`_
+**注意** 在使用beam search解码时，cell的输入和状态将被扩展到 :math:`beam\_size` ，得到 :math:`[batch\_size * beam\_size, ...]` 一样的形状，这个操作在BeamSearchDecoder中自动完成，因此，其他任何在 :code:`cell.call` 中使用的tensor，如果形状为  :math:`[batch\_size, ...]` ，都必须先手动使用 :code:`BeamSearchDecoder.tile_beam_merge_with_batch` 接口扩展。最常见的情况是带注意机制的编码器输出。
+参数：
+  - **cell** (RNNCell) - RNNCell的实例或者具有相同接口定义的对象。
+  - **start_token** (int) - 起始标记id。
+  - **end_token** (int) - 结束标记id。
+  - **beam_size** (int) - 在beam search中使用的beam宽度。
+  - **embedding_fn** (可选) - 处理选中的候选id的接口。通常，它是一个将词id转换为词嵌入的嵌入层，函数的返回值作为 :code:`cell.call` 接口的 :code:`input` 参数。如果 :code:`embedding_fn` 未提供，则必须在 :code:`cell.call` 中实现词嵌入转换。默认值None。
+  - **output_fn** (可选) - 处理cell输出的接口，在计算得分和选择候选标记id之前使用。默认值None。
+**示例代码**
+.. code-block:: python
+    import paddle.fluid as fluid
+    from paddle.fluid.layers import GRUCell, BeamSearchDecoder
+    trg_embeder = lambda x: fluid.embedding(
+        x, size=[10000, 128], param_attr=fluid.ParamAttr(name="trg_embedding"))
+    output_layer = lambda x: layers.fc(x,
+                                    size=10000,
+                                    num_flatten_dims=len(x.shape) - 1,
+                                    param_attr=fluid.ParamAttr(name=
+                                                                "output_w"),
+                                    bias_attr=False)
+    decoder_cell = GRUCell(hidden_size=128)
+    decoder = BeamSearchDecoder(decoder_cell,
+                                start_token=0,
+                                end_token=1,
+                                beam_size=4,
+                                embedding_fn=trg_embeder,
+                                output_fn=output_layer)
+.. py:method:: tile_beam_merge_with_batch(x, beam_size)
+扩展tensor的batch维度。此函数的输入是形状为 :math:`[batch\_size, s_0, s_1, ...]` 的tensor t，由minibatch中的样本 :math:`t[0], ..., t[batch\_size - 1]` 组成。将其扩展为形状是  :math:`[batch\_size * beam\_size, s_0, s_1, ...]` 的tensor，由 :math:`t[0], t[0], ..., t[1], t[1], ...` 组成, 每个minibatch中的样本重复 :math:`beam\_size` 次。
+参数：
+  - **x** (Variable) - 形状为 :math:`[batch\_size, ...]` 的tenosr。数据类型应为float32，float64，int32，int64或bool。
+  - **beam_size** (int) - 在beam search中使用的beam宽度。
+返回：形状为 :math:`[batch\_size * beam\_size, ...]` 的tensor，其数据类型与 :code:`x` 相同。
+返回类型：Variable
+.. py:method:: _split_batch_beams(x)
+将形状为 :math:`[batch\_size * beam\_size, ...]` 的tensor变换为形状为 :math:`[batch\_size, beam\_size, ...]` 的新tensor。
+参数：
+  - **x** (Variable) - 形状为 :math:`[batch\_size * beam\_size, ...]` 的tenosr。数据类型应为float32，float64，int32，int64或bool。
+返回：形状为 :math:`[batch\_size, beam\_size, ...]` 的tensor，其数据类型与 :code:`x` 相同。
+返回类型：Variable        
+.. py:method:: _merge_batch_beams(x)
+将形状为 :math:`[batch\_size, beam\_size, ...]` 的tensor变换为形状为 :math:`[batch\_size * beam\_size,...]` 的新tensor。
+参数：
+  - **x** (Variable) - 形状为 :math:`[batch\_size, beam_size,...]` 的tenosr。数据类型应为float32，float64，int32，int64或bool。
+返回：形状为 :math:`[batch\_size * beam\_size, ...]` 的tensor，其数据类型与 :code:`x` 相同。
+返回类型：Variable   
+.. py:method:: _expand_to_beam_size(x)
+此函数输入形状为 :math:`[batch\_size,s_0，s_1，...]` 的tensor t，由minibatch中的样本 :math:`t[0]，...，t[batch\_size-1]` 组成。将其扩展为形状 :math:`[ batch\_size,beam\_size,s_0，s_1，...]` 的tensor，由 :math:`t[0]，t[0]，...，t[1]，t[1]，...` 组成，其中每个minibatch中的样本重复 :math:`beam\_size` 次。
+参数：
+  - **x** (Variable) - 形状为 :math:`[batch\_size, ...]` 的tenosr。数据类型应为float32，float64，int32，int64或bool。
+返回：具有与 :code:`x` 相同的形状和数据类型的tensor，其中未完成的beam保持不变，而已完成的beam被替换成特殊的tensor(tensor中所有概率质量被分配给EOS标记)。
+返回类型：Variable   
+.. py:method:: _mask_probs(probs, finished)
+屏蔽对数概率。该函数使已完成的beam将所有概率质量分配给EOS标记，而未完成的beam保持不变。
+参数：
+  - **probs** (Variable) - 形状为 :math:`[batch\_size,beam\_size,vocab\_size]` 的tensor，表示对数概率。其数据类型应为float32。
+  - **finish** (Variable) - 形状为 :math:`[batch\_size,beam\_size]` 的tensor，表示所有beam的完成状态。其数据类型应为bool。
+返回：具有与 :code:`x` 相同的形状和数据类型的tensor，其中未完成的beam保持不变，而已完成的beam被替换成特殊的tensor(tensor中所有概率质量被分配给EOS标记)。
+返回类型：Variable   
+.. py:method:: _gather(x, indices, batch_size)
+对tensor :code:`x` 根据索引 :code:`indices` 收集。
+参数：
+  - **x** (Variable) - 形状为 :math:`[batch\_size, beam\_size,...]` 的tensor。
+  - **index** (Variable) - 一个形状为 :math:`[batch\_size, beam\_size]` 的int64 tensor，表示我们用来收集的索引。
+  - **batch_size** (Variable) - 形状为 :math:`[1]` 的tensor。其数据类型应为int32或int64。
+返回：具有与 :code:``x` 相同的形状和数据类型的tensor，表示收集后的tensor。
+返回类型：Variable   
+.. py:method:: initialize(initial_cell_states)
+初始化BeamSearchDecoder。
+参数：
+  - **initial_cell_states** (Variable) - 单个tensor变量或tensor变量组成的嵌套结构。调用者提供的参数。
+返回：一个元组 :code:`(initial_inputs, initial_states, finished)`。:code:`initial_inputs` 是一个tensor，当 :code:`embedding_fn` 为None时，由 :code:`start_token` 填充，形状为 :math:`[batch\_size,beam\_size,1]` ；否则使用 :code:`embedding_fn(t)` 返回的值。:code:`initial_states` 是tensor变量的嵌套结构(命名元组，字段包括 :code:`cell_states，log_probs，finished，lengths`)，其中 :code:`log_probs，finished，lengths` 都含有一个tensor，形状为 :math:`[batch\_size, beam\_size]`，数据类型为float32，bool，int64。:code:`cell_states` 具有与输入参数 :code:`initial_cell_states` 相同结构的值，但形状扩展为 :math:`[batch\_size,beam\_size,...]`。 :code:`finished` 是一个布尔型tensor，由False填充，形状为 :math:`[batch\_size,beam\_size]`。
+返回类型：tuple
+.. py:method:: _beam_search_step(time, logits, next_cell_states, beam_state)
+计算得分并选择候选id。
+参数：
+  - **time** (Variable) - 调用者提供的形状为[1]的tensor，表示当前解码的时间步长。其数据类型为int64。
+  - **logits** (Variable) - 形状为 :math:`[batch\_size,beam\_size,vocab\_size]` 的tensor，表示当前时间步的logits。其数据类型为float32。
+  - **next_cell_states** (Variable) - 单个tensor变量或tensor变量组成的嵌套结构。它的结构，形状和数据类型与 :code:`initialize()` 的返回值 :code:`initial_states` 中的 :code:`cell_states` 相同。它代表该cell的下一个状态。
+  - **beam_state** (Variable) - tensor变量的结构。在第一个解码步骤与 :code:`initialize()` 返回的 :code:`initial_states` 同，其他步骤与 :code:`initialize()` 返回的 :code:`beam_search_state` 相同。
+返回：一个元组 :code:`(beam_search_output, beam_search_state)`。:code:`beam_search_output` 是tensor变量的命名元组，字段为 :code:`scores，predicted_ids parent_ids`。其中 :code:`scores，predicted_ids，parent_ids` 都含有一个tensor，形状为 :math:`[batch\_size,beam\_size]`，数据类型为float32 ，int64，int64。:code:`beam_search_state` 具有与输入参数 :code:`beam_state` 相同的结构，形状和数据类型。
+返回类型：tuple
+.. py:method:: step(time, inputs, states, **kwargs)
+执行beam search解码步骤，该步骤使用 :code:`cell` 来计算概率，然后执行beam search步骤以计算得分并选择候选标记ID。
+参数：
+  - **time** (Variable) - 调用者提供的形状为[1]的int64tensor，表示当前解码的时间步长。
+  - **inputs** (Variable) - tensor变量。在第一个解码时间步时与由 :code:`initialize()` 返回的 :code:`initial_inputs` 相同，其他时间步与由 :code:`step()` 返回的 :code:`next_inputs` 相同。
+  - **States** (Variable) - tensor变量的结构。在第一个解码时间步时与 :code:`initialize()` 返回的 :code:`initial_states` 相同，其他时间步与由 :code:`step()` 返回的 :code:`beam_search_state` 相同。
+  - **kwargs** - 附加的关键字参数，由调用者提供。
+返回：一个元组 :code:`(beam_search_output，beam_search_state，next_inputs，finish)` 。:code:`beam_search_state` 和参数 :code:`states` 具有相同的结构，形状和数据类型。 :code:`next_inputs` 与输入参数 :code:`inputs` 具有相同的结构，形状和数据类型。 :code:`beam_search_output` 是tensor变量的命名元组(字段包括 :code:`scores，predicted_ids，parent_ids` )，其中 :code:`scores，predicted_ids，parent_ids` 都含有一个tensor，形状为 :math:`[batch\_size,beam\_size]`，数据类型为float32 ，int64，int64。:code:`finished` 是一个bool类型的tensor，形状为 :math:`[batch\_size,beam\_size]`。
+返回类型：tuple
+.. py:method:: finalize(outputs, final_states, sequence_lengths)
+使用 :code:`gather_tree` 沿beam search树回溯并构建完整的预测序列。
+参数：
+  - **outputs** (Variable) - tensor变量组成的结构(命名元组)，该结构和数据类型与 :code:`output_dtype` 相同。tensor将所有时间步的输出堆叠，因此具有形状 :math:`[time\_step，batch\_size,...]`。
+  - **final_states** (Variable) - tensor变量组成的结构(命名元组)。它是 :code:`decoder.step` 在最后一个解码步骤返回的 :code:`next_states`，因此具有与任何时间步的 :code:`state` 相同的结构、形状和数据类型。
+  - **sequence_lengths** (Variable) - tensor，形状为 :math:`[batch\_size,beam\_size]`，数据类型为int64。它包含解码期间确定的每个beam的序列长度。
+返回：一个元组 :code:`(predicted_ids, final_states)`。:code:`predicted_ids` 是一个tensor，形状为 :math:`[time\_step，batch\_size,beam\_size]`，数据类型为int64。:code:`final_states` 与输入参数 :code:`final_states` 相同。
+返回类型：tuple
+.. py:method:: output_dtype()
+用于beam search输出的数据类型的嵌套结构。它是一个命名元组，字段包括 :code:`scores, predicted_ids, parent_ids`。
+参数：无。
+返回：用于beam search输出的数据类型的命名元组。
--- a/doc/paddle/api/paddle/fluid/cpu_places_cn.rst
+++ b/doc/paddle/api/paddle/fluid/cpu_places_cn.rst
+.. _cn_api_fluid_cpu_places:
+cpu_places
+-------------------------------
+.. py:function:: paddle.fluid.cpu_places(device_count=None)
+该接口创建 ``device_count`` 个 ``fluid.CPUPlace`` 对象，并返回所创建的对象列表。
+如果 ``device_count`` 为 ``None``，则设备数目将由环境变量 ``CPU_NUM`` 确定。如果未设置 ``CPU_NUM`` 环境变量，则设备数目会默认设为1，也就是说， ``CPU_NUM=1``。
+``CPU_NUM`` 表示在当前任务中使用的设备数目。如果 ``CPU_NUM`` 与物理核心数相同，可以加速程序的运行。
+参数：
+  - **device_count** (int，可选) - 设备数目。默认值为 ``None``。
+返回: ``CPUPlace`` 的列表。
+返回类型：list[fluid.CPUPlace]
+**代码示例**
+.. code-block:: python
+      import paddle.fluid as fluid
+      cpu_places = fluid.cpu_places()
--- a/doc/paddle/api/paddle/fluid/create_lod_tensor_cn.rst
+++ b/doc/paddle/api/paddle/fluid/create_lod_tensor_cn.rst
+.. _cn_api_fluid_create_lod_tensor:
+create_lod_tensor
+-------------------------------
+.. py:function:: paddle.fluid.create_lod_tensor(data, recursive_seq_lens, place)
+从一个numpy数组、list或LoDTensor创建一个新的LoDTensor。
+具体实现方法如下:
+1. 检查基于序列长度的LoD（length-based LoD），即参数中的 :code:`recursive_seq_lens` 是否正确。
+2. 将 :code:`recursive_seq_lens` 转换为基于偏移量的LoD（offset-based LoD）。
+3. 根据place参数，把所提供的 :code:`data` （numpy数组、list或LoDTensor）的数据复制到CPU或GPU上。
+4. 将基于偏移量的LoD设置到输出的LoDTensor中。
+假设我们想创建一个LoDTensor表示词的序列，其中每个词用一个整数id表示。若待创建的LoDTensor表示2个句子，其中一个句子包含2个单词，另一个句子包含3个单词。
+那么， :code:`data` 为一个维度为(5, 1)的numpy整数数组； :code:`recursive_seq_lens` 为[[2, 3]]，表示每个句子含的单词个数。在该接口内部，基于序列长度的
+:code:`recursive_seq_lens` [[2, 3]]会转换为为基于偏移量的LoD [[0, 2, 5]]。
+请查阅 :ref:`cn_user_guide_lod_tensor` 了解更多关于LoD的介绍。
+参数:
+    - **data** (numpy.ndarray|list|LoDTensor) - 表示LoDTensor数据的numpy数组、list或LoDTensor。
+    - **recursive_seq_lens** (list[list[int]]) - 基于序列长度的LoD信息。
+    - **place** (CPUPlace|CUDAPlace) - 表示返回的LoDTensor存储在CPU或GPU place中。
+返回: 包含数据信息和序列长度信息的LoDTensor。
+返回类型: LoDTensor
+**代码示例**
+.. code-block:: python
+        import paddle.fluid as fluid
+        import numpy as np
+        t = fluid.create_lod_tensor(np.ndarray([5, 30]), [[2, 3]], fluid.CPUPlace())
--- a/doc/paddle/api/paddle/fluid/create_random_int_lodtensor_cn.rst
+++ b/doc/paddle/api/paddle/fluid/create_random_int_lodtensor_cn.rst
+.. _cn_api_fluid_create_random_int_lodtensor:
+create_random_int_lodtensor
+-------------------------------
+.. py:function:: paddle.fluid.create_random_int_lodtensor(recursive_seq_lens, base_shape, place, low, high)
+:api_attr: 声明式编程模式（静态图)
+创建一个包含随机整数的LoDTensor。
+具体实现方法如下：
+1. 基于序列长度 :code:`recursive_seq_lens` 和 :code:`base_shape` 产生返回值的维度。返回值的第一维等于序列总长度，其余维度为 :code:`base_shape` 。
+2. 创建一个包含随机整数的numpy数组，并作为 :code:`data` 参数传入 :ref:`cn_api_fluid_create_lod_tensor` 接口中创建LoDTensor返回。
+假设我们想创建一个LoDTensor表示序列信息，共包含2个序列，维度分别为[2, 30]和[3, 30]，那么序列长度 :code:`recursive_seq_lens` 传入[[2, 3]]，:code:`base_shape` 传入[30]（即除了序列长度以外的维度）。
+最后返回的LoDTensor的维度为[5, 30]，其中第一维5为序列总长度，其余维度为 :code:`base_shape` 。
+参数:
+    - **recursive_seq_lens** (list[list[int]]) - 基于序列长度的LoD信息。
+    - **base_shape** (list[int]) - 除第一维以外输出结果的维度信息。
+    - **place** (CPUPlace|CUDAPlace) - 表示返回的LoDTensor存储在CPU或GPU place中。
+    - **low** (int) - 随机整数的下限值。
+    - **high** (int) - 随机整数的上限值，必须大于或等于low。
+返回: 包含随机整数数据信息和序列长度信息的LoDTensor，数值范围在[low, high]之间。
+返回类型: LoDTensor
+**代码示例**
+.. code-block:: python
+        import paddle.fluid as fluid
+        t = fluid.create_random_int_lodtensor(recursive_seq_lens=[[2, 3]],base_shape=[30], place=fluid.CPUPlace(), low=0, high=10)
+        print(t.shape()) # [5, 30]
--- a/doc/paddle/api/paddle/fluid/cuda_pinned_places_cn.rst
+++ b/doc/paddle/api/paddle/fluid/cuda_pinned_places_cn.rst
+.. _cn_api_fluid_cuda_pinned_places:
+cuda_pinned_places
+-------------------------------
+.. py:function:: paddle.fluid.cuda_pinned_places(device_count=None)
+该接口创建 ``device_count`` 个 ``fluid.CUDAPinnedPlace`` ( fluid. :ref:`cn_api_fluid_CUDAPinnedPlace` ) 对象，并返回所创建的对象列表。
+如果 ``device_count`` 为 ``None``，实际设备数目将由当前任务中使用的GPU设备数决定。用户可通过以下2种方式设置任务可用的GPU设备：
+- 设置环境变量 ``FLAGS_selected_gpus`` ，例如 ``export FLAGS_selected_gpus='0,1'``。
+- 设置环境变量 ``CUDA_VISIBLE_DEVICES`` ，例如 ``export CUDA_VISIBLE_DEVICES='0,1'``。
+关于如何设置任务中使用的GPU设备，具体请查看 fluid. :ref:`cn_api_fluid_cuda_places`  。
+参数：
+  - **device_count** (int，可选) - 设备数目。默认值为 ``None``。
+返回: ``fluid.CUDAPinnedPlace`` 对象列表。
+返回类型：list[fluid.CUDAPinnedPlace]
+**代码示例**
+.. code-block:: python
+    import paddle.fluid as fluid
+    # 1）不设置任何环境变量，默认使用所有的GPU，8卡的机器则将创建8个CUDAPinnedPlace
+    # 2）export FLAGS_selected_gpus='0,1'，则创建2个CUDAPinnedPlace
+    cuda_pinned_places = fluid.cuda_pinned_places()
+    # 3）创建1个CUDAPinnedPlace
+    cuda_pinned_places = fluid.cuda_pinned_places(1)
--- a/doc/paddle/api/paddle/fluid/cuda_places_cn.rst
+++ b/doc/paddle/api/paddle/fluid/cuda_places_cn.rst
+.. _cn_api_fluid_cuda_places:
+cuda_places
+-------------------------------
+.. py:function:: paddle.fluid.cuda_places(device_ids=None)
+.. note::
+    多卡任务请先使用 FLAGS_selected_gpus 环境变量设置可见的GPU设备，下个版本将会修正 CUDA_VISIBLE_DEVICES 环境变量无效的问题。
+该接口根据 ``device_ids`` 创建一个或多个 ``fluid.CUDAPlace`` 对象，并返回所创建的对象列表。
+如果 ``device_ids`` 为 ``None``，则首先检查 ``FLAGS_selected_gpus`` 标志。
+例如： ``FLAGS_selected_gpus=0,1,2`` ，则返回的列表将为 ``[fluid.CUDAPlace(0), fluid.CUDAPlace(1), fluid.CUDAPlace(2)]``。
+如果未设置标志 ``FLAGS_selected_gpus`` ，则根据 ``CUDA_VISIBLE_DEVICES`` 环境变量，返回所有可见的 GPU places。
+如果 ``device_ids`` 不是 ``None``，它应该是使用的GPU设备ID的列表或元组。
+例如： ``device_id=[0,1,2]`` ，返回的列表将是 ``[fluid.CUDAPlace(0), fluid.CUDAPlace(1), fluid.CUDAPlace(2)]``。
+参数：
+  - **device_ids** (list(int)|tuple(int)，可选) - GPU的设备ID列表或元组。默认值为 ``None``。
+返回: 创建的 ``fluid.CUDAPlace`` 列表。
+返回类型：list[fluid.CUDAPlace]
+**代码示例**
+.. code-block:: python
+      import paddle.fluid as fluid
+      cuda_places = fluid.cuda_places()
--- a/doc/paddle/api/paddle/nn/data_cn.rst
+++ b/doc/paddle/api/paddle/nn/data_cn.rst
--- a/doc/paddle/api/paddle/fluid/dataset/DatasetFactory_cn.rst
+++ b/doc/paddle/api/paddle/fluid/dataset/DatasetFactory_cn.rst
+.. _cn_api_fluid_dataset_DatasetFactory:
+DatasetFactory
+-------------------------------
+.. py:class:: paddle.fluid.dataset.DatasetFactory
+DatasetFactory是一个按数据集名称创建数据集的 "工厂"，可以创建“QueueDataset”，“InMemoryDataset”或“FileInstantDataset”，默认为“QueueDataset”。
+**代码示例**
+.. code-block:: python
+    import paddle.fluid as fluid
+    dataset = fluid.DatasetFactory().create_dataset("InMemoryDataset")
+.. py:method:: create_dataset(datafeed_class='QueueDataset')
+创建“QueueDataset”，“InMemoryDataset” 或 “FileInstantDataset”，默认为“QueueDataset”。
+参数：
+    - **datafeed_class** (str) – datafeed类名，为QueueDataset或InMemoryDataset。默认为QueueDataset。
+**代码示例**:
+.. code-block:: python
+    import paddle.fluid as fluid
+    dataset = fluid.DatasetFactory().create_dataset()
--- a/doc/paddle/api/paddle/fluid/dataset/InMemoryDataset_cn.rst
+++ b/doc/paddle/api/paddle/fluid/dataset/InMemoryDataset_cn.rst
+.. _cn_api_fluid_dataset_InMemoryDataset:
+InMemoryDataset
+-------------------------------
+.. py:class:: paddle.fluid.dataset.InMemoryDataset
+InMemoryDataset会向内存中加载数据并在训练前缓冲数据。此类由DatasetFactory创建。
+**代码示例**:
+.. code-block:: python
+    dataset = paddle.fluid.DatasetFactory().create_dataset(“InMemoryDataset”)
+.. py:method:: set_queue_num(queue_num)
+设置 ``Dataset`` 输出队列数量，训练进程会从队列中获取数据。
+参数：
+    - **queue_num** (int) - dataset输出队列数量
+**代码示例**:
+.. code-block:: python
+    import paddle.fluid as fluid
+    dataset = fluid.DatasetFactory().create_dataset("InMemoryDataset")
+    dataset.set_queue_num(12)
+.. py:method:: set_fleet_send_batch_size(fleet_send_batch_size)
+设置发送batch的大小
+参数:
+    - **fleet_send_batch_size** (int) - 设置发送batch的大小。
+**代码示例**
+.. code-block:: python
+    import paddle.fluid as fluid
+    dataset = fluid.DatasetFactory().create_dataset("InMemoryDataset")
+    dataset.set_fleet_send_batch_size(800)
+.. py:method:: set_merge_by_lineid(var_list, erase_duplicate_feas=True, min_merge_size=2, keep_unmerged-ins=True)
+通过样本id来设置合并，一些线id的实例将会在shuffle之后进行合并，你应该在一个data生成器里面解析样本id。
+参数:
+    - **var_list** (list) - 可以被合并的特征列表，其中的每一个元素都是一个 ``Variable`` 。一些类特征我们通常不把它们合并为同样的样本id，所以用户应当指定哪个类特征可以被合并。
+    - **erase_duplicate_feas** (bool) - 合并的时候是否删除重复的特征值。默认为True。
+    - **min_merge_size** (int) - 合并的最小数量。默认为2。
+    - **keep_unmerged_ins** (bool) - 是否保留没有合并的样本，比如有着独特id的样本，或者重复id的数量小于 ``min_merge_size`` 的样本。
+.. code-block:: python
+    import paddle.fluid as fluid
+    dataset = fluid.DatasetFactory().create_dataset("InMemoryDataset")
+    dataset.set_merge_by_lineid()
+.. py:method:: load_into_memory()
+向内存中加载数据。
+**代码示例**:
+.. code-block:: python
+    import paddle.fluid as fluid
+    dataset = fluid.DatasetFactory().create_dataset("InMemoryDataset")
+    filelist = ["a.txt", "b.txt"]
+    dataset.set_filelist(filelist)
+    dataset.load_into_memory()
+.. py:method:: preload_into_memory()
+向内存中以异步模式加载数据。
+**代码示例**:
+.. code-block:: python
+    import paddle.fluid as fluid
+    dataset = fluid.DatasetFactory().create_dataset("InMemoryDataset")
+    filelist = ["a.txt", "b.txt"]
+    dataset.set_filelist(filelist)
+    dataset.preload_into_memory()
+    dataset.wait_preload_done()
+.. py:method:: wait_preload_done()
+等待 ``preload_into_memory`` 完成。
+**代码示例**:
+.. code-block:: python
+    import paddle.fluid as fluid
+    dataset = fluid.DatasetFactory().create_dataset("InMemoryDataset")
+    filelist = ["a.txt", "b.txt"]
+    dataset.set_filelist(filelist)
+    dataset.preload_into_memory()
+    dataset.wait_preload_done()
+.. py:method:: local_shuffle()
+局域shuffle。
+**代码示例**:
+.. code-block:: python
+    import paddle.fluid as fluid
+    dataset = fluid.DatasetFactory().create_dataset("InMemoryDataset")
+    filelist = ["a.txt", "b.txt"]
+    dataset.set_filelist(filelist)
+    dataset.load_into_memory()
+    dataset.local_shuffle()
+.. py:method:: global_shuffle(fleet=None)
+全局shuffle。
+只能用在分布式模式（单机多进程或多机多进程）中。您如果在分布式模式中运行，应当传递fleet而非None。
+**代码示例**:
+.. code-block:: python
+    import paddle.fluid as fluid
+    from paddle.fluid.incubate.fleet.parameter_server.pslib import fleet
+    dataset = fluid.DatasetFactory().create_dataset("InMemoryDataset")
+    filelist = ["a.txt", "b.txt"]
+    dataset.set_filelist(filelist)
+    dataset.load_into_memory()
+    dataset.global_shuffle(fleet)
+参数：
+    - **fleet** (Fleet) – fleet单例。默认为None。
+.. py:method:: release_memory()
+当数据不再使用时，释放InMemoryDataset内存数据。
+**代码示例**:
+.. code-block:: python
+    import paddle.fluid as fluid
+    from paddle.fluid.incubate.fleet.parameter_server.pslib import fleet
+    dataset = fluid.DatasetFactory().create_dataset("InMemoryDataset")
+    filelist = ["a.txt", "b.txt"]
+    dataset.set_filelist(filelist)
+    dataset.load_into_memory()
+    dataset.global_shuffle(fleet)
+    exe = fluid.Executor(fluid.CPUPlace())
+    exe.run(fluid.default_startup_program())
+    exe.train_from_dataset(fluid.default_main_program(), dataset)
+    dataset.release_memory()
+.. py:method:: get_memory_data_size(fleet=None)
+用户可以调用此函数以了解加载进内存后所有workers中的样本数量。
+.. note::
+    该函数可能会导致性能不佳，因为它具有barrier。
+参数：
+    - **fleet** (Fleet) – fleet对象。
+返回：内存数据的大小。
+**代码示例**:
+.. code-block:: python
+    import paddle.fluid as fluid
+    from paddle.fluid.incubate.fleet.parameter_server.pslib import fleet
+    dataset = fluid.DatasetFactory().create_dataset("InMemoryDataset")
+    filelist = ["a.txt", "b.txt"]
+    dataset.set_filelist(filelist)
+    dataset.load_into_memory()
+    print dataset.get_memory_data_size(fleet)
+.. py:method:: get_shuffle_data_size(fleet=None)
+获取shuffle数据大小，用户可以调用此函数以了解局域/全局shuffle后所有workers中的样本数量。
+.. note::
+    该函数可能会导致局域shuffle性能不佳，因为它具有barrier。但其不影响局域shuffle。
+参数：
+    - **fleet** (Fleet) – fleet对象。
+返回：shuffle数据的大小。
+**代码示例**:
+.. code-block:: python
+    import paddle.fluid as fluid
+    from paddle.fluid.incubate.fleet.parameter_server.pslib import fleet
+    dataset = fluid.DatasetFactory().create_dataset("InMemoryDataset")
+    filelist = ["a.txt", "b.txt"]
+    dataset.set_filelist(filelist)
+    dataset.load_into_memory()
+    dataset.global_shuffle(fleet)
+    print dataset.get_shuffle_data_size(fleet)
+.. py:method:: set_batch_size(batch_size)
+设置batch size。在训练期间生效。
+**代码示例**:
+.. code-block:: python
+    import paddle.fluid as fluid
+    dataset = fluid.DatasetFactory().create_dataset()
+    dataset.set_batch_size(128)
+参数：
+    - **batch_size** (int) - batch size
+.. py:method:: set_fea_eval(record_candidate_size, fea_eval=True)
+设置特征打乱特征验证模式，来修正特征level的重要性， 特征打乱需要 ``fea_eval`` 被设置为True。
+参数：
+    - **record_candidate_size** (int) - 打乱一个特征的候选实例大小
+    - **fea_eval** (bool) - 是否设置特征验证模式来打乱特征，默认为True。
+**代码示例**:
+.. code-block:: python
+    import paddle.fluid as fluid
+    dataset = fluid.DatasetFactory().create_dataset(“InMemoryDataset”)
+    dataset.set_fea_eval(1000000, True)
+.. py:method:: desc()
+为 ``DataFeedDesc`` 返回一个缓存信息。
+**代码示例**:
+.. code-block:: python
+    import paddle.fluid as fluid
+    dataset = fluid.DatasetFactory().create_dataset()
+    print(dataset.desc())
+返回：一个字符串信息
+.. py:method:: set_filelist(filelist)
+在当前的worker中设置文件列表。
+**代码示例**:
+.. code-block:: python
+    import paddle.fluid as fluid
+    dataset = fluid.DatasetFactory().create_dataset()
+    dataset.set_filelist(["a.txt", "b.txt"])
+参数：
+    - **filelist** (list) - 文件列表
+.. py:method:: set_hdfs_config(fs_name, fs_ugi)
+设置hdfs配置：fs名称与ugi。
+**代码示例**:
+.. code-block:: python
+    import paddle.fluid as fluid
+    dataset = fluid.DatasetFactory().create_dataset()
+    dataset.set_hdfs_config("my_fs_name", "my_fs_ugi")
+参数：
+    - **fs_name** (str) - fs名称
+    - **fs_ugi** (str) - fs ugi
+.. py:method:: set_pipe_command(pipe_coommand)
+在当前的 ``dataset`` 中设置pipe命令。pipe命令只能使用UNIX的pipe命令
+**代码示例**:
+.. code-block:: python
+    import paddle.fluid as fluid
+    dataset = fluid.DatasetFactory().create_dataset()
+    dataset.set_pipe_command("python my_script.py")
+参数：
+    - **pipe_command** (str) - pipe命令
+.. py:method:: set_thread(thread_num)
+设置进程数量，等于readers的数量。
+**代码示例**:
+.. code-block:: python
+    import paddle.fluid as fluid
+    dataset = fluid.DatasetFactory().create_dataset()
+    dataset.set_thread(12)
+参数：
+    - **thread_num** (int) - 进程数量
+.. py:method:: set_use_var(var_list)
+设置将要使用的 ``Variable`` 。
+**代码示例**:
+.. code-block:: python
+    import paddle.fluid as fluid
+    dataset = fluid.DatasetFactory().create_dataset()
+    dataset.set_use_var([data, label])
+参数：
+    - **var_list** (list) - variable 列表
+.. py:method:: slots_shuffle(slots)
+该方法是在特征层次上的一个打乱方法，经常被用在有着较大缩放率实例的稀疏矩阵上，为了比较metric，比如auc，在一个或者多个有着baseline的特征上做特征打乱来验证特征level的重要性。
+参数：
+    - **slots** (list[string]) - 要打乱特征的集合
+**代码示例**:
+.. code-block:: python
+    import paddle.fluid as fluid
+    dataset = fluid.DatasetFactory().create_dataset(“InMemoryDataset”)
+    dataset.set_merge_by_lineid()
+    #支持slot 0
+    dataset.slots_shuffle([‘0’])
--- a/doc/paddle/api/paddle/fluid/dataset/QueueDataset_cn.rst
+++ b/doc/paddle/api/paddle/fluid/dataset/QueueDataset_cn.rst
+.. _cn_api_fluid_dataset_QueueDataset:
+QueueDataset
+-------------------------------
+.. py:class:: paddle.fluid.dataset.QueueDataset
+流式处理数据。
+**代码示例**:
+.. code-block:: python
+    import paddle.fluid as fluid
+    dataset = fluid.DatasetFactory().create_dataset("QueueDataset")
+.. py:method:: local_shuffle()
+局域shuffle数据
+QueueDataset中不支持局域shuffle，可能抛出NotImplementedError
+**代码示例**:
+.. code-block:: python
+    import paddle.fluid as fluid
+    dataset = fluid.DatasetFactory().create_dataset("QueueDataset")
+    dataset.local_shuffle()
+.. py:method:: global_shuffle(fleet=None)
+全局shuffle数据
+QueueDataset中不支持全局shuffle，可能抛出NotImplementedError
+**代码示例**:
+.. code-block:: python
+    import paddle.fluid as fluid
+    from paddle.fluid.incubate.fleet.parameter_server.pslib import fleet
+    dataset = fluid.DatasetFactory().create_dataset("QueueDataset")
+    dataset.global_shuffle(fleet)
+.. py:method:: desc()
+为 ``DataFeedDesc`` 返回一个缓存信息。
+**代码示例**:
+.. code-block:: python
+    import paddle.fluid as fluid
+    dataset = fluid.DatasetFactory().create_dataset()
+    print(dataset.desc())
+返回：一个字符串信息
+.. py:method:: set_batch_size(batch_size)
+设置batch size。在训练期间生效。
+**代码示例**:
+.. code-block:: python
+    import paddle.fluid as fluid
+    dataset = fluid.DatasetFactory().create_dataset()
+    dataset.set_batch_size(128)
+参数：
+    - **batch_size** (int) - batch size
+.. py:method:: set_fea_eval(record_candidate_size,fea_eval)
+参数：
+    - **record_candidate_size** (int) - 打乱一个特征的候选实例大小
+    - **fea_eval** (bool) - 是否设置特征验证模式来打乱特征，默认为True。
+**代码示例**:
+.. code-block:: python
+    import paddle.fluid as fluid
+    dataset = fluid.DatasetFactory().create_dataset(“InMemoryDataset”)
+    dataset.set_fea_eval(1000000, True)
+.. py:method:: set_filelist(filelist)
+在当前的worker中设置文件列表。
+**代码示例**:
+.. code-block:: python
+    import paddle.fluid as fluid
+    dataset = fluid.DatasetFactory().create_dataset()
+    dataset.set_filelist(["a.txt", "b.txt"])
+参数：
+    - **filelist** (list) - 文件列表
+.. py:method:: set_hdfs_config(fs_name, fs_ugi)
+设置hdfs配置：fs名称与ugi。
+**代码示例**:
+.. code-block:: python
+    import paddle.fluid as fluid
+    dataset = fluid.DatasetFactory().create_dataset()
+    dataset.set_hdfs_config("my_fs_name", "my_fs_ugi")
+参数：
+    - **fs_name** (str) - fs名称
+    - **fs_ugi** (str) - fs ugi
+.. py:method:: set_pipe_command(pipe_coommand)
+在当前的 ``dataset`` 中设置pipe命令。pipe命令只能使用UNIX的pipe命令
+**代码示例**:
+.. code-block:: python
+    import paddle.fluid as fluid
+    dataset = fluid.DatasetFactory().create_dataset()
+    dataset.set_pipe_command("python my_script.py")
+参数：
+    - **pipe_command** (str) - pipe命令
+.. py:method:: set_thread(thread_num)
+设置进程数量，等于readers的数量。
+**代码示例**:
+.. code-block:: python
+    import paddle.fluid as fluid
+    dataset = fluid.DatasetFactory().create_dataset()
+    dataset.set_thread(12)
+参数：
+    - **thread_num** (int) - 进程数量
+.. py:method:: set_use_var(var_list)
+设置将要使用的 ``Variable`` 。
+**代码示例**:
+.. code-block:: python
+    import paddle.fluid as fluid
+    dataset = fluid.DatasetFactory().create_dataset()
+    dataset.set_use_var([data, label])
+参数：
+    - **var_list** (list) - variable 列表
+.. py:method:: slots_shuffle(slots)
+该方法是在特征层次上的一个打乱方法，经常被用在有着较大缩放率实例的稀疏矩阵上，为了比较metric，比如auc，在一个或者多个有着baseline的特征上做特征打乱来验证特征level的重要性。
+参数：
+    - **slots** (list[string]) - 要打乱特征的集合
+**代码示例**:
+.. code-block:: python
+    import paddle.fluid as fluid
+    dataset = fluid.DatasetFactory().create_dataset(“InMemoryDataset”)
+    dataset.set_merge_by_lineid()
+    #支持slot 0
+    dataset.slots_shuffle([‘0’])
--- a/doc/paddle/api/paddle/fluid/device_guard_cn.rst
+++ b/doc/paddle/api/paddle/fluid/device_guard_cn.rst
+.. _cn_api_fluid_device_guard:
+device_guard
+-------------------------------
+**注意：该API仅支持【静态图】模式**
+.. py:function:: paddle.fluid.device_guard(device=None)
+一个用于指定OP运行设备的上下文管理器。
+参数：
+    - **device** (str|None) – 指定上下文中使用的设备。它可以是'cpu'或者'gpu‘，当它被设置为'cpu'或者'gpu'时，创建在该上下文中的OP将被运行在CPUPlace或者CUDAPlace上。若设置为'gpu'，同时程序运行在单卡模式下，设备的索引将与执行器的设备索引保持一致。默认值：None，在该上下文中的OP将被自动地分配设备。
+**代码示例**
+.. code-block:: python
+    import paddle.fluid as fluid
+    support_gpu = fluid.is_compiled_with_cuda()
+    place = fluid.CPUPlace()
+    if support_gpu:
+        place = fluid.CUDAPlace(0)
+    # if GPU is supported, the three OPs below will be automatically assigned to CUDAPlace(0)
+    data1 = fluid.layers.fill_constant(shape=[1, 3, 8, 8], value=0.5, dtype='float32')
+    data2 = fluid.layers.fill_constant(shape=[1, 3, 5, 5], value=0.5, dtype='float32')
+    shape = fluid.layers.shape(data2)
+    with fluid.device_guard("cpu"):
+        # Ops created here will be placed on CPUPlace
+        shape = fluid.layers.slice(shape, axes=[0], starts=[0], ends=[4])
+    with fluid.device_guard('gpu'):
+        # if GPU is supported, OPs created here will be placed on CUDAPlace(0), otherwise on CPUPlace
+        out = fluid.layers.crop_tensor(data1, shape=shape)
+    exe = fluid.Executor(place)
+    exe.run(fluid.default_startup_program())
+    result = exe.run(fetch_list=[out])
--- a/doc/paddle/api/paddle/nn/BatchNorm_cn.rst
+++ b/doc/paddle/api/paddle/nn/BatchNorm_cn.rst
--- a/doc/paddle/api/paddle/nn/BilinearTensorProduct_cn.rst
+++ b/doc/paddle/api/paddle/nn/BilinearTensorProduct_cn.rst
--- a/doc/paddle/api/paddle/nn/layer/conv/Conv2DTranspose_cn.rst
+++ b/doc/paddle/api/paddle/nn/layer/conv/Conv2DTranspose_cn.rst
--- a/doc/paddle/api/paddle/nn/layer/conv/Conv2D_cn.rst
+++ b/doc/paddle/api/paddle/nn/layer/conv/Conv2D_cn.rst
--- a/doc/paddle/api/paddle/nn/layer/conv/Conv3DTranspose_cn.rst
+++ b/doc/paddle/api/paddle/nn/layer/conv/Conv3DTranspose_cn.rst
--- a/doc/paddle/api/paddle/nn/layer/conv/Conv3D_cn.rst
+++ b/doc/paddle/api/paddle/nn/layer/conv/Conv3D_cn.rst
--- a/doc/paddle/api/paddle/fluid/dygraph/Dropout_cn.rst
+++ b/doc/paddle/api/paddle/fluid/dygraph/Dropout_cn.rst
+.. _cn_api_fluid_dygraph_Dropout:
+Dropout
+-------------------------------
+.. py:class:: paddle.fluid.dygraph.Dropout(p=0.5, seed=None, dropout_implementation='downgrade_in_infer', is_test=False)
+丢弃或者保持输入的每个元素独立。Dropout是一种正则化手段，通过在训练过程中阻止神经元节点间的相关性来减少过拟合。根据给定的丢弃概率，dropout操作符按丢弃概率随机将一些神经元输出设置为0，其他的仍保持不变。
+Dropout层可以删除，提高执行效率。
+参数：
+    - **p** (float32，可选) - 输入单元的丢弃概率，即输入单元设置为0的概率。默认值：0.5
+    - **seed** (int，可选) - 整型数据，用于创建随机种子。如果该参数设为None，则使用随机种子。注：如果给定一个整型种子，始终丢弃相同的输出单元。训练过程中勿用固定不变的种子。默认值：None。
+    - **dropout_implementation** (str，可选) - 丢弃单元的方式，有两种'downgrade_in_infer'和'upscale_in_train'两种选择，默认：'downgrade_in_infer'。具体作用可以参考一下描述。
+      1. downgrade_in_infer(default), 在预测时减小输出结果
+         - train: out = input * mask
+         - inference: out = input * (1.0 - p)
+         (mask是一个张量，维度和输入维度相同，值为0或1，值为0的比例即为 ``p`` )
+      2. upscale_in_train, 增加训练时的结果
+         - train: out = input * mask / ( 1.0 - p )
+         - inference: out = input
+         (mask是一个张量，维度和输入维度相同，值为0或1，值为0的比例即为 ``p`` ）
+    - **is_test** (bool，可选) - 标记是否是测试阶段。此标志仅对静态图模式有效。对于动态图模式，请使用 ``eval()`` 接口。默认：False。
+返回：无
+**代码示例**：
+.. code-block:: python
+    import paddle.fluid as fluid
+    from paddle.fluid.dygraph.base import to_variable
+    import numpy as np
+    x = np.random.random(size=(3, 10, 3, 7)).astype('float32')
+    with fluid.dygraph.guard():
+        x = to_variable(x)
+        m = fluid.dygraph.Dropout(p=0.5)
+        droped_train = m(x)
+        # 切换到 eval 模式
+        m.eval()
+        droped_eval = m(x)
--- a/doc/paddle/api/paddle/nn/Embedding_cn.rst
+++ b/doc/paddle/api/paddle/nn/Embedding_cn.rst
--- a/doc/paddle/api/paddle/fluid/dygraph/GRUCell_cn.rst
+++ b/doc/paddle/api/paddle/fluid/dygraph/GRUCell_cn.rst
+.. _cn_api_fluid_layers_GRUCell:
+GRUCell
+-------------------------------
+.. py:class:: paddle.fluid.layers.GRUCell(hidden_size, param_attr=None, bias_attr=None, gate_activation=None, activation=None, dtype="float32", name="GRUCell")
+:api_attr: 声明式编程模式（静态图)
+门控循环单元（Gated Recurrent Unit）。通过对 :code:`fluid.contrib.layers.rnn_impl.BasicGRUUnit` 包装，来让它可以应用于RNNCell。
+公式如下：
+.. math::
+    u_t & = act_g(W_{ux}x_{t} + W_{uh}h_{t-1} + b_u)\\
+    r_t & = act_g(W_{rx}x_{t} + W_{rh}h_{t-1} + b_r)\\
+    \tilde{h_t} & = act_c(W_{cx}x_{t} + W_{ch}(r_t \odot h_{t-1}) + b_c)\\
+    h_t & = u_t \odot h_{t-1} + (1-u_t) \odot \tilde{h_t}
+更多细节可以参考 `Learning Phrase Representations using RNN Encoder Decoder for Statistical Machine Translation <https://arxiv.org/pdf/1406.1078.pdf>`_
+参数：
+  - **hidden_size** (int) - GRUCell中的隐藏层大小。 
+  - **param_attr** (ParamAttr，可选) - 指定权重参数属性的对象。默认值为None，表示使用默认的权重参数属性。具体用法请参见 :ref:`cn_api_fluid_ParamAttr`。
+  - **bias_attr** (ParamAttr，可选) - 指定偏置参数属性的对象。默认值为None，表示使用默认的偏置参数属性。具体用法请参见 :ref:`cn_api_fluid_ParamAttr` 。 
+  - **gate_activation** (function，可选) - :math:`act_g` 的激活函数。 默认值为 :code:`fluid.layers.sigmoid`。 
+  - **activation** (function，可选) - :math:`act_c` 的激活函数。 默认值为 :code:`fluid.layers.tanh` 
+  - **dtype** (string，可选) - 此cell中使用的数据类型。 默认为"float32"。 
+  - **name** (string，可选) - 用于标识参数和偏差的名称域。
+返回：GRUCell类的实例对象。
+**示例代码**
+..  code-block:: python 
+    import paddle.fluid.layers as layers
+    cell = layers.GRUCell(hidden_size=256)
+.. py:method:: call(inputs, states)
+执行GRU的计算。 
+参数：
+  - **input** (Variable) - 输入，形状为 :math:`[batch\_size，input\_size]` 的tensor，对应于公式中的 :math:`x_t` 。数据类型应为float32。 
+  - **states** (Variable) - 状态，形状为 :math:`[batch\_size，hidden\_size]` 的tensor。 对应于公式中的 :math:`h_{t-1}` 。数据类型应为float32。 
+返回：一个元组 :code:`(outputs, new_states)` ，其中 :code:`outputs` 和 :code:`new_states` 是同一个tensor，其形状为 :math:`[batch\_size，hidden\_size]`，数据类型和 :code:`state` 的数据类型相同，对应于公式中的 :math:`h_t`。
+返回类型：tuple
+.. py:method:: state_shape()
+GRUCell的 :code:`state_shape` 是形状 :math:`[hidden\_size]` （batch大小为-1，自动插入到形状中），对应于 :math:`h_{t-1}` 的形状。
+参数：无。
+返回：GRUCell的 :code:`state_shape`。
+返回类型：Variable
--- a/doc/paddle/api/paddle/fluid/dygraph/GRUUnit_cn.rst
+++ b/doc/paddle/api/paddle/fluid/dygraph/GRUUnit_cn.rst
+.. _cn_api_fluid_dygraph_GRUUnit:
+GRUUnit
+-------------------------------
+.. py:class:: paddle.fluid.dygraph.GRUUnit(name_scope, size, param_attr=None, bias_attr=None, activation='tanh', gate_activation='sigmoid', origin_mode=False, dtype='float32')
+该接口用于构建 ``GRU(Gated Recurrent Unit)`` 类的一个可调用对象，具体用法参照 ``代码示例`` 。其用于完成单个时间步内GRU的计算，支持以下两种计算方式：
+如果origin_mode为True，则使用的运算公式来自论文
+`Learning Phrase Representations using RNN Encoder Decoder for Statistical Machine Translation <https://arxiv.org/pdf/1406.1078.pdf>`_ 。
+.. math::
+    u_t & = act_g(W_{ux}x_{t} + W_{uh}h_{t-1} + b_u)\\
+    r_t & = act_g(W_{rx}x_{t} + W_{rh}h_{t-1} + b_r)\\
+    \tilde{h_t} & = act_c(W_{cx}x_{t} + W_{ch}(r_t \odot h_{t-1}) + b_c)\\
+    h_t & = u_t \odot h_{t-1} + (1-u_t) \odot \tilde{h_t}
+如果origin_mode为False，则使用的运算公式来自论文
+`Empirical Evaluation of Gated Recurrent Neural Networks on Sequence Modeling  <https://arxiv.org/pdf/1412.3555.pdf>`_ 。
+公式如下:
+.. math::
+    u_t & = act_g(W_{ux}x_{t} + W_{uh}h_{t-1} + b_u)\\
+    r_t & = act_g(W_{rx}x_{t} + W_{rh}h_{t-1} + b_r)\\
+    \tilde{h_t} & = act_c(W_{cx}x_{t} + W_{ch}(r_t \odot h_{t-1}) + b_c)\\
+    h_t & = (1-u_t) \odot h_{t-1} + u_t \odot \tilde{h_t}
+其中， :math:`x_t` 为当前时间步的输入，:math:`h_{t-1}` 为前一时间步的隐状态 ``hidden``； :math:`u_t` 、 :math:`r_t` 、 :math:`\tilde{h_t}` 和 :math:`h_t` 分别代表了GRU单元中update gate（更新门）、reset gate（重置门）、candidate hidden（候选隐状态）和隐状态输出; :math:`\odot` 为逐个元素相乘；
+:math:`W_{uh}, b_u` 、 :math:`W_{rh}, b_r` 和 :math:`W_{ch}, b_c` 分别代表更新门、重置门和候选隐状态在计算时使用的权重矩阵和偏置。在实现上，三个权重矩阵合并为一个维度为 :math:`[D, D \times 3]` 的Tensor存放。
+参数:
+    - **size** (int) – 输入数据的维度大小。
+    - **param_attr** (ParamAttr，可选) – 指定权重参数属性的对象。默认值为None，表示使用默认的权重参数属性。具体用法请参见 :ref:`cn_api_fluid_ParamAttr` 。
+    **注意**
+      - 权重参数维度为 :math:`[T, 3×D]` ， :math:`D` 是隐藏状态的规模（hidden size）, 其值与输入size相关，计算方式为size除以3取整 。
+      - 权重参数矩阵所有元素由两部分组成， 一是update gate和reset gate的权重，维度为 :math:`[D, 2×D]` 的2D Tensor，数据类型可以为float32或float64；二是候选隐藏状态（candidate hidden state）的权重矩阵，维度为 :math:`[D, D]` 的2D Tensor，数据类型可以为float32或float64。
+    - **bias_attr** (ParamAttr，可选) - 指定偏置参数属性的对象。默认值为None，表示使用默认的偏置参数属性。具体用法请参见 :ref:`cn_api_fluid_ParamAttr` 。
+    - **activation** (str，可选) –  公式中 :math:`act_c` 激活函数的类型。可以为'identity'、'sigmoid'、'tanh'、'relu'四种激活函数设置值。默认值为'tanh'。
+    - **gate_activation** (str，可选) – 公式中 :math:`act_g` 激活函数的类型。可以为'identity'、'sigmoid'、'tanh'、'relu'四种激活函数设置值。默认值为'sigmoid'。
+    - **origin_mode** (bool) – 指明要使用的GRU计算方式，两种计算方式具体差异见公式描述。默认值为False。
+    - **dtype** (str，可选) – 该层的数据类型，可以为'float32', 'float64'。默认值为'float32'。
+返回： 
+    None.
+**代码示例**
+.. code-block:: python
+    import paddle.fluid as fluid
+    import paddle.fluid.dygraph.base as base
+    import numpy
+    lod = [[2, 4, 3]]
+    D = 5
+    T = sum(lod[0])
+    input = numpy.random.rand(T, 3 * D).astype('float32')
+    hidden_input = numpy.random.rand(T, D).astype('float32')
+    with fluid.dygraph.guard():
+        x = numpy.random.random((3, 32, 32)).astype('float32')
+        gru = fluid.dygraph.GRUUnit(size=D * 3)
+        dy_ret = gru(
+        base.to_variable(input), base.to_variable(hidden_input))
+属性
+::::::::::::
+.. py:attribute:: weight
+本层的可学习参数，类型为 ``Parameter``
+.. py:attribute:: bias
+本层的可学习偏置，类型为 ``Parameter``
--- a/doc/paddle/api/paddle/nn/GroupNorm_cn.rst
+++ b/doc/paddle/api/paddle/nn/GroupNorm_cn.rst
--- a/doc/paddle/api/paddle/fluid/dygraph/LSTMCell_cn.rst
+++ b/doc/paddle/api/paddle/fluid/dygraph/LSTMCell_cn.rst
+.. _cn_api_fluid_layers_LSTMCell:
+LSTMCell
+-------------------------------
+.. py:class:: paddle.fluid.layers.LSTMCell(hidden_size, param_attr=None, bias_attr=None, gate_activation=None, activation=None, forget_bias=1.0, dtype="float32", name="LSTMCell")
+:api_attr: 声明式编程模式（静态图)
+长短期记忆单元（Long-Short Term Memory）。通过对 :code:`fluid.contrib.layers.rnn_impl.BasicLSTMUnit` 包装，来让它可以应用于RNNCell。    
+公式如下：
+.. math:: 
+    i_{t} &= act_g \left ( W_{x_{i}}x_{t}+W_{h_{i}}h_{t-1}+b_{i} \right ) \\
+    f_{t} &= act_g \left ( W_{x_{f}}x_{t}+W_{h_{f}}h_{t-1}+b_{f}+forget\_bias \right ) \\
+    c_{t} &= f_{t}c_{t-1}+i_{t}act_h\left ( W_{x_{c}}x_{t} +W_{h_{c}}h_{t-1}+b_{c}\right ) \\
+    o_{t} &= act_g\left ( W_{x_{o}}x_{t}+W_{h_{o}}h_{t-1}+b_{o} \right ) \\
+    h_{t} &= o_{t}act_h \left ( c_{t} \right )
+更多细节可以参考 `RECURRENT NEURAL NETWORK REGULARIZATION <http://arxiv.org/abs/1409.2329>`_  
+参数：
+  - **hidden_size** (int) - LSTMCell中的隐藏层大小。
+  - **param_attr** (ParamAttr，可选) - 指定权重参数属性的对象。默认值为None，表示使用默认的权重参数属性。具体用法请参见 :ref:`cn_api_fluid_ParamAttr`。
+  - **bias_attr** (ParamAttr，可选) - 指定偏置参数属性的对象。默认值为None，表示使用默认的偏置参数属性。具体用法请参见 :ref:`cn_api_fluid_ParamAttr` 。 
+  - **gate_activation** (function，可选) - :math:`act_g` 的激活函数。 默认值为 :code:`fluid.layers.sigmoid`。 
+  - **activation** (function，可选) - :math:`act_c` 的激活函数。 默认值为 :code:`fluid.layers.tanh`。
+  - **forget_bias** (float，可选) - 计算遗忘们时使用的遗忘偏置。默认值为 1.0。
+  - **dtype** (string，可选) - 此Cell中使用的数据类型。 默认值为 `float32`。 
+  - **name** (string，可选) - 用于标识参数和偏差的名称域。
+返回：LSTMCell类的实例对象。
+**示例代码**
+.. code-block:: python
+    import paddle.fluid.layers as layers
+    cell = layers.LSTMCell(hidden_size=256)
+.. py:method:: call(inputs, states)
+执行GRU的计算。 
+参数：
+  - **input** (Variable) - 输入，形状为 :math:`[batch\_size，input\_size]` 的tensor，对应于公式中的 :math:`x_t`。数据类型应为float32。 
+  - **states** (Variable) - 状态，包含两个tensor的列表，每个tensor形状为 :math:`[batch\_size，hidden\_size]`。 对应于公式中的 :math:`h_{t-1}, c_{t-1}`。数据类型应为float32。 
+返回：一个元组 :code:`(outputs, new_states)`，其中 :code:`outputs` 是形状为 :math:`[batch\_size，hidden\_size]` 的tensor，对应于公式中的 :math:`h_{t}`；:code:`new_states` 是一个列表，包含形状为 :math:`[batch_size，hidden_size]` 的两个tensor变量，它们对应于公式中的 :math:`h_{t}, c_{t}`。这些tensor的数据类型都与 :code:`state` 的数据类型相同。
+返回类型：tuple
+.. py:method:: state_shape()
+LSTMCell的 :code:`state_shape` 是一个具有两个形状的列表：:math:`[[hidden\_size], [hidden\_size]]` （batch大小为-1，自动插入到形状中）。 这两个形状分别对应于公式中的 :math:`h_{t-1}` and :math:`c_{t-1}`。
+参数：无。
+返回：LSTMCell的 :code:`state_shape` 
+返回类型：list
--- a/doc/paddle/api/paddle/fluid/dygraph/LambdaDecay_cn.rst
+++ b/doc/paddle/api/paddle/fluid/dygraph/LambdaDecay_cn.rst
+.. _cn_api_fluid_dygraph_LambdaDecay:
+LambdaDecay
+-------------------------------
+.. py:class:: paddle.fluid.dygraph.LambdaDecay(learning_rate, lr_lambda)
+:api_attr: 命令式编程模式（动态图)
+该API提供 lambda函数 设置学习率的功能。 ``lr_lambda`` 为一个lambda函数，其通过 ``epoch`` 计算出一个因子，该因子会乘以初始学习率。
+算法可以描述为：
+.. code-block:: text
+    learning_rate = 0.5        # init learning_rate
+    lr_lambda = lambda epoch: 0.95 ** epoch
+    learning_rate = 0.5        # epoch 0
+    learning_rate = 0.475      # epoch 1
+    learning_rate = 0.45125    # epoch 2
+参数：
+    - **learning_rate** (float|int) - 初始化的学习率。可以是Python的float或int。
+    - **lr_lambda** (function) - ``lr_lambda`` 为一个lambda函数，其通过 ``epoch`` 计算出一个因子，该因子会乘以初始学习率。
+返回： 无
+**代码示例**：
+    .. code-block:: python
+        import paddle.fluid as fluid
+        import numpy as np
+        with fluid.dygraph.guard():
+            x = np.random.uniform(-1, 1, [10, 10]).astype("float32")
+            linear = fluid.dygraph.Linear(10, 10)
+            input = fluid.dygraph.to_variable(x)
+            scheduler = fluid.dygraph.LambdaDecay(0.5, lr_lambda=lambda x: 0.95**x)
+            adam = fluid.optimizer.Adam(learning_rate = scheduler, parameter_list = linear.parameters())
+            for epoch in range(6):
+                for batch_id in range(5):
+                    out = linear(input)
+                    loss = fluid.layers.reduce_mean(out)
+                    adam.minimize(loss)
+                scheduler.epoch()
+                print("epoch:%d, current lr is %f" .format(epoch, adam.current_step_lr()))
+                # epoch:0, current lr is 0.5
+                # epoch:1, current lr is 0.475
+                # epoch:2, current lr is 0.45125
+.. py:method:: epoch(epoch=None)
+通过当前的 epoch 调整学习率，调整后的学习率将会在下一次调用 ``optimizer.minimize`` 时生效。
+参数：
+  - **epoch** (int|float，可选) - 类型：int或float。指定当前的epoch数。默认：无，此时将会自动累计epoch数。
+返回：
+    无
+**代码示例**:
+    参照上述示例代码。
--- a/doc/paddle/api/paddle/nn/LayerNorm_cn.rst
+++ b/doc/paddle/api/paddle/nn/LayerNorm_cn.rst
--- a/doc/paddle/api/paddle/nn/Linear_cn.rst
+++ b/doc/paddle/api/paddle/nn/Linear_cn.rst
--- a/doc/paddle/api/paddle/fluid/dygraph/MultiStepDecay_cn.rst
+++ b/doc/paddle/api/paddle/fluid/dygraph/MultiStepDecay_cn.rst
+.. _cn_api_fluid_dygraph_MultiStepDecay:
+MultiStepDecay
+-------------------------------
+.. py:class:: paddle.fluid.dygraph.MultiStepDecay(learning_rate, milestones, decay_rate=0.1)
+:api_attr: 命令式编程模式（动态图)
+该接口提供 ``MultiStep`` 衰减学习率的功能。
+算法可以描述为：
+.. code-block:: text
+    learning_rate = 0.5
+    milestones = [30, 50]
+    decay_rate = 0.1
+    if epoch < 30:
+        learning_rate = 0.5
+    elif epoch < 50:
+        learning_rate = 0.05
+    else:
+        learning_rate = 0.005
+参数：
+    - **learning_rate** (float|int) - 初始化的学习率。可以是Python的float或int。
+    - **milestones** (tuple|list) - 列表或元组。必须是递增的。
+    - **decay_rate** (float, optional) - 学习率的衰减率。 ``new_lr = origin_lr * decay_rate`` 。其值应该小于1.0。默认：0.1。
+返回： 无
+**代码示例**：
+    .. code-block:: python
+        import paddle.fluid as fluid
+        import numpy as np
+        with fluid.dygraph.guard():
+            x = np.random.uniform(-1, 1, [10, 10]).astype("float32")
+            linear = fluid.dygraph.Linear(10, 10)
+            input = fluid.dygraph.to_variable(x)
+            scheduler = fluid.dygraph.MultiStepDecay(0.5, milestones=[3, 5])
+            adam = fluid.optimizer.Adam(learning_rate = scheduler, parameter_list = linear.parameters())
+            for epoch in range(6):
+                for batch_id in range(5):
+                    out = linear(input)
+                    loss = fluid.layers.reduce_mean(out)
+                    adam.minimize(loss)
+                scheduler.epoch()
+                print("epoch:{}, current lr is {}" .format(epoch, adam.current_step_lr()))
+                # epoch:0, current lr is 0.5
+                # epoch:1, current lr is 0.5
+                # epoch:2, current lr is 0.5
+                # epoch:3, current lr is 0.05
+                # epoch:4, current lr is 0.05
+                # epoch:5, current lr is 0.005
+.. py:method:: epoch(epoch=None)
+通过当前的 epoch 调整学习率，调整后的学习率将会在下一次调用 ``optimizer.minimize`` 时生效。
+参数：
+  - **epoch** (int|float，可选) - 类型：int或float。指定当前的epoch数。默认：无，此时将会自动累计epoch数。
+返回：
+    无
+**代码示例**:
+    参照上述示例代码。
--- a/doc/paddle/api/paddle/fluid/dygraph/NCE_cn.rst
+++ b/doc/paddle/api/paddle/fluid/dygraph/NCE_cn.rst
+.. _cn_api_fluid_dygraph_NCE:
+NCE
+-------------------------------
+.. py:class:: paddle.fluid.dygraph.NCE(num_total_classes, dim, param_attr=None, bias_attr=None, num_neg_samples=None, sampler='uniform', custom_dist=None, seed=0, is_sparse=False, dtype="float32")
+该接口用于构建 ``NCE`` 类的一个可调用对象，具体用法参照 ``代码示例`` 。其中实现了 ``NCE`` 损失函数的功能，其默认使用均匀分布进行抽样，计算并返回噪音对比估计（ noise-contrastive estimation training loss）。更多详情请参考：`Noise-contrastive estimation: A new estimation principle for unnormalized statistical models <http://www.jmlr.org/proceedings/papers/v9/gutmann10a/gutmann10a.pdf>`_
+参数：
+    - **num_total_classes** (int) - 所有样本中的类别的总数。
+    - **dim** (int) - 输入的维度（一般为词嵌入的维度）。
+    - **sample_weight** (Variable, 可选) - 维度为\[batch_size, 1\]，存储每个样本的权重。每个样本的默认权重为1.0。默认值：None。
+    - **param_attr** (ParamAttr, 可选) - 指定权重参数属性的对象。默认值为None，表示使用默认的权重参数属性。具体用法请参见 :ref:`cn_api_fluid_ParamAttr` 。
+    - **bias_attr** (ParamAttr, 可选) - 指定偏置参数属性的对象。默认值为None，表示使用默认的偏置参数属性。具体用法请参见 :ref:`cn_api_fluid_ParamAttr` 。
+    - **num_neg_samples** (int, 可选) - 负样本的数量。默认值：10。
+    - **sampler** (str, 可选) – 指明采样器的类型，用于从负类别中进行采样。可以是 ``uniform`` 、 ``log_uniform`` 或 ``custom_dist`` 。 默认值： ``uniform`` 。
+    - **custom_dist** (float[], 可选) – float[] 类型的数据，并且它的长度为 ``num_total_classes`` 。如果采样器类别为 ``custom_dist`` ，则使用此参数。custom_dist\[i\]是第i个类别被取样的概率。默认值：None
+    - **seed** (int, 可选) – 采样器使用的随机种子。默认值：0。
+    - **is_sparse** (bool, 可选) – 指明是否使用稀疏更新，如果为True， :math:`weight@GRAD` 和 :math:`bias@GRAD` 会变为 SelectedRows。默认值：False。
+    - **dtype** (str, 可选) - 数据类型，可以为"float32"或"float64"。默认值："float32"。
+返回：无
+**代码示例**
+..  code-block:: python
+    import numpy as np
+    import paddle.fluid as fluid
+    window_size = 5
+    dict_size = 20
+    label_word = int(window_size // 2) + 1
+    inp_word = np.array([[1], [2], [3], [4], [5]]).astype('int64')
+    nid_freq_arr = np.random.dirichlet(np.ones(20) * 1000).astype('float32')
+    with fluid.dygraph.guard():
+        words = []
+        for i in range(window_size):
+            words.append(fluid.dygraph.base.to_variable(inp_word[i]))
+        emb = fluid.Embedding(
+            size=[dict_size, 32],
+            param_attr='emb.w',
+            is_sparse=False)
+        embs3 = []
+        for i in range(window_size):
+            if i == label_word:
+                continue
+            emb_rlt = emb(words[i])
+            embs3.append(emb_rlt)
+        embs3 = fluid.layers.concat(input=embs3, axis=1)
+        nce = fluid.NCE(
+                    num_total_classes=dict_size,
+                    dim=embs3.shape[1],
+                    num_neg_samples=2,
+                    sampler="custom_dist",
+                    custom_dist=nid_freq_arr.tolist(),
+                    seed=1,
+                    param_attr='nce.w',
+                    bias_attr='nce.b')
+        wl = fluid.layers.unsqueeze(words[label_word], axes=[0])
+        nce_loss3 = nce(embs3, wl)
+属性
+::::::::::::
+.. py:attribute:: weight
+本层的可学习参数，类型为 ``Parameter``
+.. py:attribute:: bias
+本层的可学习偏置，类型为 ``Parameter``
--- a/doc/paddle/api/paddle/fluid/dygraph/PRelu_cn.rst
+++ b/doc/paddle/api/paddle/fluid/dygraph/PRelu_cn.rst
+.. _cn_api_fluid_dygraph_PRelu:
+PRelu
+-------------------------------
+.. py:class:: paddle.fluid.dygraph.PRelu(mode, input_shape=None, param_attr=None, dtype="float32")
+该接口用于构建 ``PRelu`` 类的一个可调用对象，具体用法参照 ``代码示例`` 。其中实现了 ``PRelu`` 激活函数的三种激活方式。
+计算公式如下：
+.. math::
+    y = max(0, x) + \alpha min(0, x)
+参数：
+    - **mode** (str) - 权重共享模式。共提供三种激活方式：
+    .. code-block:: text
+        all：所有元素使用同一个alpha值
+        channel：在同一个通道中的元素使用同一个alpha值
+        element：每一个元素有一个独立的alpha值
+    - **channel** (int，可选) - 通道数。该参数在mode参数为"channel"时是必须的。默认为None。
+    - **input_shape** (int 或 list 或 tuple，可选) - 输入的维度。该参数在mode参数为"element"时是必须的。默认为None。
+    - **param_attr** (ParamAttr, 可选) - 指定权重参数属性的对象。默认值为None，表示使用默认的权重参数属性。具体用法请参见 :ref:`cn_api_fluid_ParamAttr` 。
+    - **dtype** (str, 可选) - 数据类型，可以为"float32"或"float64"。默认值："float32"。
+返回：无
+**代码示例：**
+.. code-block:: python
+    import paddle.fluid as fluid
+    from paddle.fluid.dygraph.base import to_variable
+    import numpy as np
+    inp_np = np.ones([5, 200, 100, 100]).astype('float32')
+    with fluid.dygraph.guard():
+        inp_np = to_variable(inp_np)
+        prelu0 = fluid.PRelu(
+           mode='all',
+           param_attr=fluid.ParamAttr(initializer=fluid.initializer.Constant(1.0)))
+        dy_rlt0 = prelu0(inp_np)
+        prelu1 = fluid.PRelu(
+           mode='channel',
+           channel=200,
+           param_attr=fluid.ParamAttr(initializer=fluid.initializer.Constant(1.0)))
+        dy_rlt1 = prelu1(inp_np)
+        prelu2 = fluid.PRelu(
+           mode='element',
+           input_shape=inp_np.shape,
+           param_attr=fluid.ParamAttr(initializer=fluid.initializer.Constant(1.0)))
+        dy_rlt2 = prelu2(inp_np)
+属性
+::::::::::::
+.. py:attribute:: weight
+本层的可学习参数，类型为 ``Parameter``
--- a/doc/paddle/api/paddle/nn/Pool2D_cn.rst
+++ b/doc/paddle/api/paddle/nn/Pool2D_cn.rst
--- a/doc/paddle/api/paddle/jit/ProgramTranslator_cn.rst
+++ b/doc/paddle/api/paddle/jit/ProgramTranslator_cn.rst
--- a/doc/paddle/api/paddle/fluid/dygraph/ReduceLROnPlateau_cn.rst
+++ b/doc/paddle/api/paddle/fluid/dygraph/ReduceLROnPlateau_cn.rst
+.. _cn_api_fluid_dygraph_ReduceLROnPlateau:
+ReduceLROnPlateau
+-------------------------------
+**注意：该API仅支持【动态图】模式**
+.. py:class:: paddle.fluid.dygraph.ReduceLROnPlateau(learning_rate, mode='min', decay_rate=0.1, patience=10, verbose=False, threshold=1e-4, threshold_mode='rel', cooldown=0, min_lr=0, eps=1e-8, dtype='float32')
+该API为 ``loss`` 自适应的学习率衰减策略。默认情况下，当 ``loss`` 停止下降时，降低学习率（如果将 ``mode`` 设置为 `'max'` ，此时判断逻辑相反， ``loss`` 停止上升时降低学习率）。其思想是：一旦模型表现不再提升，将学习率降低2-10倍对模型的训练往往有益。
+``loss`` 是传入到该类方法 ``step`` 中的参数，其必须是shape为[1]的1-D Tensor。 如果 ``loss`` 停止下降（``mode`` 为 `min` 时）超过 ``patience`` 个epoch，学习率将会减小为
+`learning_rate * decay_rate` 。
+此外，每降低一次学习率后，将会进入一个时长为 ``cooldown`` 个epoch的冷静期，在冷静期内，将不会监控 ``loss`` 的变化情况，也不会衰减。
+在冷静期之后，会继续监控 ``loss`` 的上升或下降。
+参数：
+    - **learning_rate** (Variable|float|int) - 初始学习率。其类型可以是Python的float类型，如果输入int类型则会被转为float类型。其也可以是shape为[1]的
+      1-D Tensor，且相应数据类型必须为 "float32" 或 "float64" 。
+    - **mode** (str，可选) - `'min'` 和 `'max'` 之一。通常情况下，为 `'min'` ，此时当 ``loss`` 停止下降时学习率将减小。默认：`'min'` 。
+      （注意：仅在特殊用法时，可以将其设置为 `'max'` ，此时判断逻辑相反， ``loss`` 停止上升学习率才减小）
+    - **decay_rate** (float，可选) - 学习率衰减的比例。`new_lr = origin_lr * decay_rate` ，它是值小于1.0的float型数字，默认: 0.1。
+    - **patience** (int，可选) - 当 ``loss`` 连续 ``patience`` 个epoch没有下降(mode: 'min')或上升(mode: 'max')时，学习率才会减小。默认：10。
+    - **verbose** (bool，可选) - 如果为 ``True`` ， 会在每次更新optimizer中的learning_rate时，打印信息。默认：``False`` 。
+    - **threshold** (float，可选) - ``threshold`` 和 ``threshold_mode`` 两个参数将会决定 ``loss`` 最小变化的阈值。小于该阈值的变化
+      将会被忽视。默认：1e-4。
+    - **threshold_mode** (str，可选) - `'rel'` 和 `'abs'` 之一。在 `'rel'` 模式下， ``loss`` 最小变化的阈值是 `last_loss * threshold` ，
+      其中 ``last_loss`` 是 ``loss`` 在上个epoch的值。在 `'abs'` 模式下，``loss`` 最小变化的阈值是 `threshold` 。 默认：`'rel'`。
+    - **cooldown** (int，可选) - 在学习速率每次减小之后，会进入时长为 ``cooldown`` 个epoch的冷静期。默认：0。
+    - **min_lr** (float，可选) - 最小的学习率。减小后的学习率最低下界限。默认：0。
+    - **eps** (float，可选) - 如果新旧学习率间的差异小于 ``eps`` ，则不会更新。默认值:1e-8。
+    - **dtype** (str，可选) – 学习率值的数据类型，可以为"float32", "float64"。默认："float32"。
+返回： ``loss`` 自适应的学习率
+返回类型：Variable
+**代码示例**：
+    .. code-block:: python
+        import paddle.fluid as fluid
+        import numpy as np
+        with fluid.dygraph.guard():
+            x = np.random.uniform(-1, 1, [10, 10]).astype("float32")
+            linear = fluid.dygraph.Linear(10, 10)
+            input = fluid.dygraph.to_variable(x)
+            adam = fluid.optimizer.Adam(
+                learning_rate = fluid.dygraph.ReduceLROnPlateau(
+                                    learning_rate = 1.0,
+                                    decay_rate = 0.5,
+                                    patience = 5,
+                                    verbose = True,
+                                    cooldown = 3),
+                parameter_list = linear.parameters())
+            for epoch in range(10):
+                total_loss = 0
+                for bath_id in range(5):
+                    out = linear(input)
+                    loss = fluid.layers.reduce_mean(out)
+                    total_loss += loss
+                    adam.minimize(loss)
+                avg_loss = total_loss/5
+                # 根据传入total_loss，调整学习率
+                reduce_lr.step(avg_loss)
+                lr = adam.current_step_lr()
+                print("current avg_loss is %s, current lr is %s" % (avg_loss.numpy()[0], lr))
+.. py:method:: step(loss)
+需要在每个epoch调用该方法，其根据传入的 ``loss`` 调整optimizer中的学习率，调整后的学习率将会在下一次调用 ``optimizer.minimize`` 时生效。
+参数：
+  - **loss** (Variable) - 类型：Variable，shape为[1]的1-D Tensor。将被用来判断是否需要降低学习率。如果 ``loss`` 连续 ``patience`` 个epochs没有下降，
+    将会降低学习率。
+返回：
+    无
+**代码示例**:
+    参照其类中的说明。
--- a/doc/paddle/api/paddle/nn/SpectralNorm_cn.rst
+++ b/doc/paddle/api/paddle/nn/SpectralNorm_cn.rst
--- a/doc/paddle/api/paddle/fluid/dygraph/StepDecay_cn.rst
+++ b/doc/paddle/api/paddle/fluid/dygraph/StepDecay_cn.rst
+.. _cn_api_fluid_dygraph_StepDecay:
+StepDecay
+-------------------------------
+.. py:class:: paddle.fluid.dygraph.StepDecay(learning_rate, step_size, decay_rate=0.1)
+:api_attr: 命令式编程模式（动态图)
+该接口提供 ``step_size`` 衰减学习率的功能，每经过 ``step_size`` 个 ``epoch`` 时会通过 ``decay_rate`` 衰减一次学习率。
+算法可以描述为：
+.. code-block:: text
+    learning_rate = 0.5
+    step_size = 30
+    decay_rate = 0.1
+    learning_rate = 0.5     if epoch < 30
+    learning_rate = 0.05    if 30 <= epoch < 60
+    learning_rate = 0.005   if 60 <= epoch < 90
+    ...
+参数：
+    - **learning_rate** (float|int) - 初始化的学习率。可以是Python的float或int。
+    - **step_size** (int) - 学习率每衰减一次的间隔。
+    - **decay_rate** (float, optional) - 学习率的衰减率。 ``new_lr = origin_lr * decay_rate`` 。其值应该小于1.0。默认：0.1。
+返回： 无
+**代码示例**：
+    .. code-block:: python
+        import paddle.fluid as fluid
+        import numpy as np
+        with fluid.dygraph.guard():
+            x = np.random.uniform(-1, 1, [10, 10]).astype("float32")
+            linear = fluid.dygraph.Linear(10, 10)
+            input = fluid.dygraph.to_variable(x)
+            scheduler = fluid.dygraph.StepDecay(0.5, step_size=3)
+            adam = fluid.optimizer.Adam(learning_rate = scheduler, parameter_list = linear.parameters())
+            for epoch in range(9):
+                for batch_id in range(5):
+                    out = linear(input)
+                    loss = fluid.layers.reduce_mean(out)
+                    adam.minimize(loss)  
+                scheduler.epoch()
+                print("epoch:{}, current lr is {}" .format(epoch, adam.current_step_lr()))
+                # epoch:0, current lr is 0.5
+                # epoch:1, current lr is 0.5
+                # epoch:2, current lr is 0.5
+                # epoch:3, current lr is 0.05
+                # epoch:4, current lr is 0.05
+                # epoch:5, current lr is 0.05
+                # epoch:6, current lr is 0.005
+                # epoch:7, current lr is 0.005
+                # epoch:8, current lr is 0.005
+.. py:method:: epoch(epoch=None)
+通过当前的 epoch 调整学习率，调整后的学习率将会在下一次调用 ``optimizer.minimize`` 时生效。
+参数：
+  - **epoch** (int|float，可选) - 类型：int或float。指定当前的epoch数。默认：无，此时将会自动累计epoch数。
+返回：
+    无
+**代码示例**:
+    参照上述示例代码。
--- a/doc/paddle/api/paddle/fluid/dygraph/TreeConv_cn.rst
+++ b/doc/paddle/api/paddle/fluid/dygraph/TreeConv_cn.rst
+.. _cn_api_fluid_dygraph_TreeConv:
+TreeConv
+-------------------------------
+.. py:class:: paddle.fluid.dygraph.TreeConv(feature_size, output_size, num_filters=1, max_depth=2, act='tanh', param_attr=None, bias_attr=None, name=None, dtype="float32")
+该接口用于构建 ``TreeConv`` 类的一个可调用对象，具体用法参照 ``代码示例`` 。其将在神经网络中构建一个基于树结构的卷积（Tree-Based Convolution）运算。基于树的卷积是基于树的卷积神经网络（TBCNN，Tree-Based Convolution Neural Network）的一部分，它用于对树结构进行分类，例如抽象语法树。 Tree-Based Convolution提出了一种称为连续二叉树的数据结构，它将多路（multiway）树视为二叉树。详情请参考： `基于树的卷积论文 <https://arxiv.org/abs/1409.5718v1>`_ 。
+参数：
+    - **feature_size**  (int) – nodes_vector的shape的最后一维的维度。
+    - **output_size**  (int) – 输出特征宽度。
+    - **num_filters**  (int, 可选) – 滤波器的数量，默认值为1。
+    - **max_depth**  (int, 可选) – 滤波器的最大深度，默认值为2。
+    - **act**  (str, 可选) – 应用于输出上的激活函数，如tanh、softmax、sigmoid，relu等，支持列表请参考 :ref:`api_guide_activations` ，默认值为None。
+    - **param_attr**  (ParamAttr, 可选) – 指定权重参数属性的对象。默认值为None，表示使用默认的权重参数属性。具体用法请参见 :ref:`cn_api_fluid_ParamAttr` 。
+    - **bias_attr**  (ParamAttr, 可选) – 指定偏置参数属性的对象。默认值为None，表示使用默认的偏置参数属性。具体用法请参见 :ref:`cn_api_fluid_ParamAttr` 。
+    - **name** (str, 可选) – 具体用法请参见 :ref:`api_guide_Name` ，一般无需设置，默认值为None。
+    - **dtype** (str, 可选) - 数据类型，可以为"float32"或"float64"。默认值为"float32"。
+返回：无
+**代码示例**:
+.. code-block:: python
+    import paddle.fluid as fluid
+    import numpy
+    with fluid.dygraph.guard():
+        nodes_vector = numpy.random.random((1, 10, 5)).astype('float32')
+        edge_set = numpy.random.random((1, 9, 2)).astype('int32')
+        treeConv = fluid.dygraph.nn.TreeConv(
+          feature_size=5, output_size=6, num_filters=1, max_depth=2)
+        ret = treeConv(fluid.dygraph.base.to_variable(nodes_vector), fluid.dygraph.base.to_variable(edge_set))
+属性
+::::::::::::
+.. py:attribute:: weight
+本层的可学习参数，类型为 ``Parameter``
+.. py:attribute:: bias
+本层的可学习偏置，类型为 ``Parameter``
--- a/doc/paddle/api/paddle/fluid/dygraph/base/disable_dygraph_cn.rst
+++ b/doc/paddle/api/paddle/fluid/dygraph/base/disable_dygraph_cn.rst
+.. _cn_api_fluid_disable_dygraph:
+disable_dygraph
+-------------------------------
+.. py:function:: paddle.fluid.disable_dygraph()
+该接口关闭动态图模式。
+返回：无
+**示例代码**
+.. code-block:: python
+    import paddle.fluid as fluid
+    import numpy as np
+    fluid.enable_dygraph()  # Now we are in dygraph mode
+    print(fluid.in_dygraph_mode())  # True
+    fluid.disable_dygraph()
+    print(fluid.in_dygraph_mode())  # False
--- a/doc/paddle/api/paddle/fluid/dygraph/base/enable_dygraph_cn.rst
+++ b/doc/paddle/api/paddle/fluid/dygraph/base/enable_dygraph_cn.rst
+.. _cn_api_fluid_enable_dygraph:
+enable_dygraph
+-------------------------------
+.. py:function:: paddle.fluid.enable_dygraph(place=None)
+该接口打开动态图模式。
+参数：
+  - **place** (fluid.CPUPlace 或 fluid.CUDAPlace，可选) - 执行动态图的设备数目。若为None，则设备根据paddle的编译方式决定。默认值为 ``None``。
+返回：无
+**示例代码**
+.. code-block:: python
+    import paddle.fluid as fluid
+    import numpy as np
+    fluid.enable_dygraph()  # Now we are in dygraph mode
+    print(fluid.in_dygraph_mode())  # True
+    fluid.disable_dygraph()
+    print(fluid.in_dygraph_mode())  # False
--- a/doc/paddle/api/paddle/grad_cn.rst
+++ b/doc/paddle/api/paddle/grad_cn.rst
--- a/doc/paddle/api/paddle/no_grad_cn.rst
+++ b/doc/paddle/api/paddle/no_grad_cn.rst
--- a/doc/paddle/api/paddle/to_variable_cn.rst
+++ b/doc/paddle/api/paddle/to_variable_cn.rst
--- a/doc/paddle/api/paddle/fluid/dygraph/checkpoint/load_dygraph_cn.rst
+++ b/doc/paddle/api/paddle/fluid/dygraph/checkpoint/load_dygraph_cn.rst
+.. _cn_api_fluid_dygraph_load_dygraph:
+load_dygraph
+-------------------------------
+.. py:function:: paddle.fluid.dygraph.load_dygraph(model_path)
+:api_attr: 命令式编程模式（动态图)
+该接口尝试从磁盘中加载参数或优化器的 ``dict`` 。
+该接口会同时加载 ``model_path + ".pdparams"`` 和 ``model_path + ".pdopt"`` 中的内容。
+参数:
+    - **model_path**  (str) – 保存state_dict的文件前缀。该路径不应该包括后缀 ``.pdparams`` 或 ``.pdopt``。
+返回: 两个 ``dict`` ，即从文件中恢复的参数 ``dict`` 和优化器 ``dict``
+- para_dict: 从文件中恢复的参数 ``dict``
+- opti_dict: 从文件中恢复的优化器 ``dict``
+返回类型: tuple(dict, dict)
+**代码示例**
+.. code-block:: python
+    import paddle.fluid as fluid
+    with fluid.dygraph.guard():
+        emb = fluid.dygraph.Embedding([10, 10])
+        state_dict = emb.state_dict()
+        fluid.save_dygraph( state_dict, "paddle_dy")
+        adam = fluid.optimizer.Adam( learning_rate = fluid.layers.noam_decay( 100, 10000) ,
+                                     parameter_list = emb.parameters() )
+        state_dict = adam.state_dict()
+        fluid.save_dygraph( state_dict, "paddle_dy")
+        para_state_dict, opti_state_dict = fluid.load_dygraph( "paddle_dy")
--- a/doc/paddle/api/paddle/fluid/dygraph/checkpoint/save_dygraph_cn.rst
+++ b/doc/paddle/api/paddle/fluid/dygraph/checkpoint/save_dygraph_cn.rst
+.. _cn_api_fluid_dygraph_save_dygraph:
+save_dygraph
+-------------------------------
+.. py:function:: paddle.fluid.dygraph.save_dygraph(state_dict, model_path)
+:api_attr: 命令式编程模式（动态图)
+该接口将传入的参数或优化器的 ``dict`` 保存到磁盘上。
+``state_dict`` 是通过 :ref:`cn_api_fluid_dygraph_Layer` 的 ``state_dict()`` 方法得到的。
+注： ``model_path`` 不可以是一个目录。
+该接口会根据 ``state_dict`` 的内容，自动给 ``model_path`` 添加 ``.pdparams`` 或者 ``.pdopt`` 后缀，
+生成 ``model_path + ".pdparams"`` 或者 ``model_path + ".pdopt"`` 文件。
+参数:
+ - **state_dict**  (dict of Parameters) – 要保存的模型参数的 ``dict`` 。
+ - **model_path**  (str) – 保存state_dict的文件前缀。格式为 ``目录名称/文件前缀``。如果文件前缀为空字符串，会引发异常。
+返回: 无
+**代码示例**
+.. code-block:: python
+    import paddle.fluid as fluid
+    with fluid.dygraph.guard():
+        emb = fluid.dygraph.Embedding([10, 10])
+        state_dict = emb.state_dict()
+        fluid.save_dygraph( state_dict, "paddle_dy") # 会保存为 paddle_dy.pdparams
+        adam = fluid.optimizer.Adam( learning_rate = fluid.layers.noam_decay( 100, 10000),
+                                     parameter_list = emb.parameters() )
+        state_dict = adam.state_dict()
+        fluid.save_dygraph( state_dict, "paddle_dy") # 会保存为 paddle_dy.pdopt
\ No newline at end of file
--- a/doc/paddle/api/paddle/nn/LayerList_cn.rst
+++ b/doc/paddle/api/paddle/nn/LayerList_cn.rst
--- a/doc/paddle/api/paddle/nn/ParameterList_cn.rst
+++ b/doc/paddle/api/paddle/nn/ParameterList_cn.rst
--- a/doc/paddle/api/paddle/nn/Sequential_cn.rst
+++ b/doc/paddle/api/paddle/nn/Sequential_cn.rst
--- a/doc/paddle/api/paddle/fluid/dygraph/guard_cn.rst
+++ b/doc/paddle/api/paddle/fluid/dygraph/guard_cn.rst
+.. _cn_api_fluid_dygraph_guard:
+guard
+-------------------------------
+.. py:function:: paddle.fluid.dygraph.guard(place=None)
+:api_attr: 命令式编程模式（动态图)
+通过with语句创建一个dygraph运行的context，执行context代码。
+参数：
+    - **place** (fluid.CPUPlace|fluid.CUDAPlace, 可选) –  动态图执行的设备，可以选择cpu，gpu，如果用户未制定，则根据用户paddle编译的方式来选择运行的设备，如果编译的cpu版本，则在cpu上运行，如果是编译的gpu版本，则在gpu上运行。默认值：None。
+返回： None
+**代码示例**
+.. code-block:: python
+    import numpy as np
+    import paddle.fluid as fluid
+    with fluid.dygraph.guard():
+        inp = np.ones([3, 1024], dtype='float32')
+        t = fluid.dygraph.base.to_variable(inp)
+        linear1 = fluid.Linear(1024, 4, bias_attr=False)
+        linear2 = fluid.Linear(4, 4)
+        ret = linear1(t)
+        dy_ret = linear2(ret)
--- a/doc/paddle/api/paddle/jit/TranslatedLayer_cn.rst
+++ b/doc/paddle/api/paddle/jit/TranslatedLayer_cn.rst
--- a/doc/paddle/api/paddle/jit/TracedLayer_cn.rst
+++ b/doc/paddle/api/paddle/jit/TracedLayer_cn.rst
--- a/doc/paddle/api/paddle/fluid/dygraph/jit/declarative_cn.rst
+++ b/doc/paddle/api/paddle/fluid/dygraph/jit/declarative_cn.rst
+.. _cn_api_fluid_dygraph_declarative:
+declarative
+-------------------------------
+.. py:decorator:: paddle.fluid.dygraph.jit.declarative
+本装饰器将函数内的动态图API转化为静态图API。此装饰器自动处理静态图模式下的
+Program和Executor，并将结果作为动态图VarBase返回。
+**示例代码**
+.. code-block:: python
+    import paddle.fluid as fluid
+    import numpy as np
+    from paddle.fluid.dygraph.jit import declarative
+    @declarative
+    def func(x):
+        x = fluid.dygraph.to_variable(x)
+        if fluid.layers.mean(x) < 0:
+            x_v = x - 1
+        else:
+            x_v = x + 1
+        return x_v
+    x = np.ones([1, 2])
+    x_v = func(x)
+    print(x_v.numpy()) # [[2. 2.]]
--- a/doc/paddle/api/paddle/nn/Layer_cn.rst
+++ b/doc/paddle/api/paddle/nn/Layer_cn.rst
--- a/doc/paddle/api/paddle/CosineDecay_cn.rst
+++ b/doc/paddle/api/paddle/CosineDecay_cn.rst
--- a/doc/paddle/api/paddle/ExponentialDecay_cn.rst
+++ b/doc/paddle/api/paddle/ExponentialDecay_cn.rst
--- a/doc/paddle/api/paddle/InverseTimeDecay_cn.rst
+++ b/doc/paddle/api/paddle/InverseTimeDecay_cn.rst
--- a/doc/paddle/api/paddle/NaturalExpDecay_cn.rst
+++ b/doc/paddle/api/paddle/NaturalExpDecay_cn.rst
--- a/doc/paddle/api/paddle/NoamDecay_cn.rst
+++ b/doc/paddle/api/paddle/NoamDecay_cn.rst
--- a/doc/paddle/api/paddle/PiecewiseDecay_cn.rst
+++ b/doc/paddle/api/paddle/PiecewiseDecay_cn.rst
--- a/doc/paddle/api/paddle/PolynomialDecay_cn.rst
+++ b/doc/paddle/api/paddle/PolynomialDecay_cn.rst
--- a/doc/paddle/api/paddle/ParallelEnv_cn.rst
+++ b/doc/paddle/api/paddle/ParallelEnv_cn.rst
--- a/doc/paddle/api/paddle/prepare_context_cn.rst
+++ b/doc/paddle/api/paddle/prepare_context_cn.rst
--- a/doc/paddle/api/paddle/fluid/evaluator/ChunkEvaluator_cn.rst
+++ b/doc/paddle/api/paddle/fluid/evaluator/ChunkEvaluator_cn.rst
+.. _cn_api_fluid_metrics_ChunkEvaluator:
+ChunkEvaluator
+-------------------------------
+.. py:class:: paddle.fluid.metrics.ChunkEvaluator(name=None)
+该接口使用mini-batch的chunk_eval累计的counter numbers，来计算准确率、召回率和F1值。ChunkEvaluator有三个状态num_infer_chunks，num_label_chunks和num_correct_chunks，分别对应语块数目、标签中的语块数目、正确识别的语块数目。对于chunking的基础知识，请参考 https://www.aclweb.org/anthology/N01-1025 。ChunkEvalEvaluator计算块检测（chunk detection）的准确率，召回率和F1值，支持IOB, IOE, IOBES和IO标注方案。
+参数：
+    - **name** (str，可选) – 具体用法请参见 :ref:`api_guide_Name` ，一般无需设置，默认值为None。
+返回：初始化后的 ``ChunkEvaluator`` 对象
+返回类型：ChunkEvaluator
+**代码示例**：
+.. code-block:: python
+        import paddle.fluid as fluid
+        # 初始化chunck-level的评价管理。
+        metric = fluid.metrics.ChunkEvaluator()
+        # 假设模型预测10个chuncks，其中8个为正确，且真值有9个chuncks。
+        num_infer_chunks = 10
+        num_label_chunks = 9
+        num_correct_chunks = 8
+        metric.update(num_infer_chunks, num_label_chunks, num_correct_chunks)
+        numpy_precision, numpy_recall, numpy_f1 = metric.eval()
+        print("precision: %.2f, recall: %.2f, f1: %.2f" % (numpy_precision, numpy_recall, numpy_f1))
+        # 下一个batch，完美地预测了3个正确的chuncks。
+        num_infer_chunks = 3
+        num_label_chunks = 3
+        num_correct_chunks = 3
+        metric.update(num_infer_chunks, num_label_chunks, num_correct_chunks)
+        numpy_precision, numpy_recall, numpy_f1 = metric.eval()
+        print("precision: %.2f, recall: %.2f, f1: %.2f" % (numpy_precision, numpy_recall, numpy_f1))
+.. py:method:: update(num_infer_chunks, num_label_chunks, num_correct_chunks)
+该函数使用输入的(num_infer_chunks, num_label_chunks, num_correct_chunks)来累计更新ChunkEvaluator对象的对应状态，更新方式如下：
+    .. math:: 
+                   \\ \begin{array}{l}{\text { self. num_infer_chunks }+=\text { num_infer_chunks }} \\ {\text { self. num_Label_chunks }+=\text { num_label_chunks }} \\ {\text { self. num_correct_chunks }+=\text { num_correct_chunks }}\end{array} \\
+参数:
+    - **num_infer_chunks** (int|numpy.array) – 给定mini-batch的语块数目。
+    - **num_label_chunks** (int|numpy.array) - 给定mini-batch的标签中的语块数目。
+    - **num_correct_chunks** （int|numpy.array）— 给定mini-batch的正确识别的语块数目。
+返回：无
+.. py:method:: eval()
+该函数计算并返回准确率，召回率和F1值。
+返回：准确率，召回率和F1值
+返回类型：float
--- a/doc/paddle/api/paddle/fluid/evaluator/DetectionMAP_cn.rst
+++ b/doc/paddle/api/paddle/fluid/evaluator/DetectionMAP_cn.rst
+.. _cn_api_fluid_metrics_DetectionMAP:
+DetectionMAP
+-------------------------------
+.. py:class:: paddle.fluid.metrics.DetectionMAP(input, gt_label, gt_box, gt_difficult=None, class_num=None, background_label=0, overlap_threshold=0.5, evaluate_difficult=True, ap_version='integral')
+该OP用于计算检测网络的平均精度（mAP）。 mAP是衡量object detectors精度的指标，比如 Faster R-CNN,SSD等。它不同于召回率，它是最大精度的平均值。
+通常步骤如下：
+1. 根据检测器中的输入和label，计算True Positive(TP)真正例 和 False Positive(FP)假正例
+2. 计算map，支持 ``11 point`` 和 ``integral`` 模式
+请从以下文章中获取更多信息：
+    - https://sanchom.wordpress.com/tag/average-precision/
+    - https://arxiv.org/abs/1512.0232
+参数：
+    - **input** (Variable) – detection的输出结果，一个 shape=[M, 6] 的 LoDtensor。布局为[label, confidence, xmin, ymin, xmax, ymax],label为类别标签，confidence为置信度，xmin，ymin为检测框左上点坐标，xmax，ymax为检测框右下点坐标，数据类型为float32或float64。
+    - **gt_label** (Variable) – ground truth label 的索引，它是一个形状为[N, 1]的LoDtensor，数据类型为float32或float64。
+    - **gt_box** (Variable) – ground truth bounds box (bbox)，是一个具有形状的LoD张量[N, 4]。布局是[xmin, ymin, xmax, ymax]，数据类型为float32或float64。
+    - **gt_difficult** (Variable|None, 可选) – 指定这个ground truth是否是一个difficult bounding bbox，它可以是一个 shape=[N, 1]的LoDTensor，也可以不被指定。默认设置为None，表示所有的ground truth标签都不是difficult bbox，数据类型为float32或float64。
+    - **class_num** (int) – 检测类别的数目。
+    - **background_label** (int) – 背景标签的索引，背景标签将被忽略。如果设置为-1，则所有类别将被考虑，默认为0。
+    - **overlap_threshold** (float) – 判断真假阳性的阈值，默认为0.5。
+    - **evaluate_difficult** (bool) – 是否考虑 difficult ground truth 进行评价，默认为 True。当 gt_difficult 为 None 时，这个参数不起作用。
+    - **ap_version** (str) – 平均精度的计算方法，必须是 "integral" 或 "11point"。详情请查看 https://sanchom.wordpress.com/tag/average-precision/。 其中，11point为：11-point 插值平均精度。积分: precision-recall曲线的自然积分。
+返回：变量(Variable) 计算mAP的结果，其中数据类型为float32或float64。
+返回类型：变量(Variable)
+**代码示例**
+.. code-block:: python
+        import paddle.fluid as fluid
+        batch_size = -1 # 可以为任意大小
+        image_boxs_num = 10
+        bounding_bboxes_num = 21
+        pb = fluid.data(name='prior_box', shape=[image_boxs_num, 4],
+            dtype='float32')
+        pbv = fluid.data(name='prior_box_var', shape=[image_boxs_num, 4],
+            dtype='float32')
+        loc = fluid.data(name='target_box', shape=[batch_size, bounding_bboxes_num, 4],
+            dtype='float32')
+        scores = fluid.data(name='scores', shape=[batch_size, bounding_bboxes_num, image_boxs_num],
+            dtype='float32')
+        nmsed_outs = fluid.layers.detection_output(scores=scores,
+            loc=loc, prior_box=pb, prior_box_var=pbv)
+        gt_box = fluid.data(name="gt_box", shape=[batch_size, 4], dtype="float32")
+        gt_label = fluid.data(name="gt_label", shape=[batch_size, 1], dtype="float32")
+        difficult = fluid.data(name="difficult", shape=[batch_size, 1], dtype="float32")
+        exe = fluid.Executor(fluid.CUDAPlace(0))
+        map_evaluator = fluid.metrics.DetectionMAP(nmsed_outs, gt_label, gt_box, difficult, class_num = 3)
+        cur_map, accum_map = map_evaluator.get_map_var()
+.. py:method:: get_map_var()
+返回：当前 mini-batch 的 mAP 变量和不同 mini-batch 的 mAP 累加和
+.. py:method::  reset(executor, reset_program=None)
+在指定的 batch 结束或者用户指定的开始时重置度量状态。
+参数：
+    - **executor** (Executor) – 执行reset_program的执行程序
+    - **reset_program** (Program|None, 可选) – 单个program 的 reset 过程。如果设置为 None，将创建一个 program
--- a/doc/paddle/api/paddle/fluid/evaluator/EditDistance_cn.rst
+++ b/doc/paddle/api/paddle/fluid/evaluator/EditDistance_cn.rst
+.. _cn_api_fluid_metrics_EditDistance:
+EditDistance
+-------------------------------
+.. py:class:: paddle.fluid.metrics.EditDistance(name)
+用于管理字符串的编辑距离。编辑距离是通过计算将一个字符串转换为另一个字符串所需的最小编辑操作数（添加、删除或替换）来量化两个字符串（例如单词）彼此不相似的程度一种方法。 参考 https://en.wikipedia.org/wiki/Edit_distance。
+**代码示例**
+.. code-block:: python
+    import paddle.fluid as fluid
+    import numpy as np
+    # 假设batch_size为128
+    batch_size = 128
+    # 初始化编辑距离管理器
+    distance_evaluator = fluid.metrics.EditDistance("EditDistance")
+    # 生成128个序列对间的编辑距离，此处的最大距离是10
+    edit_distances_batch0 = np.random.randint(low = 0, high = 10, size = (batch_size, 1))
+    seq_num_batch0 = batch_size
+    distance_evaluator.update(edit_distances_batch0, seq_num_batch0)
+    avg_distance, wrong_instance_ratio = distance_evaluator.eval()
+    print("the average edit distance for batch0 is %.2f and the wrong instance ratio is %.2f " % (avg_distance, wrong_instance_ratio))
+    edit_distances_batch1 = np.random.randint(low = 0, high = 10, size = (batch_size, 1))
+    seq_num_batch1 = batch_size
+    distance_evaluator.update(edit_distances_batch1, seq_num_batch1)
+    avg_distance, wrong_instance_ratio = distance_evaluator.eval()
+    print("the average edit distance for batch0 and batch1 is %.2f and the wrong instance ratio is %.2f " % (avg_distance, wrong_instance_ratio))
+.. py:method:: reset()
+清空存储结果。
+参数：无
+返回：无
+.. py:method:: update(distances, seq_num)
+更新存储结果
+参数：
+    - **distances** – 一个形状为(batch_size, 1)的numpy.array，每个元素代表两个序列间的距离。
+    - **seq_num** – 一个整型/浮点型值，代表序列对的数量。
+返回：无
+.. py:method:: eval()
+返回两个浮点数：
+avg_distance：使用更新函数更新的所有序列对的平均距离。
+avg_instance_error：编辑距离不为零的序列对的比例。
--- a/doc/paddle/api/paddle/static/Executor_cn.rst
+++ b/doc/paddle/api/paddle/static/Executor_cn.rst
--- a/doc/paddle/api/paddle/static/global_scope_cn.rst
+++ b/doc/paddle/api/paddle/static/global_scope_cn.rst
--- a/doc/paddle/api/paddle/static/scope_guard_cn.rst
+++ b/doc/paddle/api/paddle/static/scope_guard_cn.rst
--- a/doc/paddle/api/paddle/static/Program_cn.rst
+++ b/doc/paddle/api/paddle/static/Program_cn.rst
--- a/doc/paddle/api/paddle/Variable_cn.rst
+++ b/doc/paddle/api/paddle/Variable_cn.rst
--- a/doc/paddle/api/paddle/static/default_main_program_cn.rst
+++ b/doc/paddle/api/paddle/static/default_main_program_cn.rst
--- a/doc/paddle/api/paddle/static/default_startup_program_cn.rst
+++ b/doc/paddle/api/paddle/static/default_startup_program_cn.rst
--- a/doc/paddle/api/paddle/fluid/framework/in_dygraph_mode_cn.rst
+++ b/doc/paddle/api/paddle/fluid/framework/in_dygraph_mode_cn.rst
+.. _cn_api_fluid_in_dygraph_mode:
+in_dygraph_mode
+-------------------------------
+.. py:function:: paddle.fluid.in_dygraph_mode()
+该接口检查程序是否在动态图模式中运行。
+可以通过 ``fluid.dygraph.guard`` 接口开启动态图模式。
+返回：如果程序是在动态图模式下运行的，则返回 ``True``。
+返回类型：bool
+**示例代码**
+.. code-block:: python
+    import paddle.fluid as fluid
+    fluid.enable_dygraph()          # 现在进入 dygragh 模式
+    print(fluid.in_dygraph_mode())  # True
+    fluid.disable_dygraph()
+    print(fluid.in_dygraph_mode())  # False
--- a/doc/paddle/api/paddle/static/name_scope_cn.rst
+++ b/doc/paddle/api/paddle/static/name_scope_cn.rst
--- a/doc/paddle/api/paddle/static/program_guard_cn.rst
+++ b/doc/paddle/api/paddle/static/program_guard_cn.rst
--- a/doc/paddle/api/paddle/fluid/get_flags_cn.rst
+++ b/doc/paddle/api/paddle/fluid/get_flags_cn.rst
+.. _cn_api_fluid_get_flags:
+get_flags
+-------------------------------
+.. py:function:: paddle.fluid.get_flags(flags)
+用于获取Paddle框架中环境变量FLAGS的当前值。
+参数：
+    - **flags** (list|tuple|str) - 需要获取的环境变量FLAGS的名称。
+**代码示例**
+.. code-block:: python
+    import paddle.fluid as fluid
+    flags = ['FLAGS_eager_delete_tensor_gb', 'FLAGS_check_nan_inf']
+    res = fluid.get_flags(flags)
+    print(res)
+    # {'FLAGS_eager_delete_tensor_gb': 0.0, 'FLAGS_check_nan_inf': False}
--- a/doc/paddle/api/paddle/nn/initializer/Bilinear_cn.rst
+++ b/doc/paddle/api/paddle/nn/initializer/Bilinear_cn.rst
--- a/doc/paddle/api/paddle/nn/initializer/Constant_cn.rst
+++ b/doc/paddle/api/paddle/nn/initializer/Constant_cn.rst
--- a/doc/paddle/api/paddle/nn/initializer/MSRA_cn.rst
+++ b/doc/paddle/api/paddle/nn/initializer/MSRA_cn.rst
--- a/doc/paddle/api/paddle/fluid/initializer/Normal_cn.rst
+++ b/doc/paddle/api/paddle/fluid/initializer/Normal_cn.rst
+.. _cn_api_fluid_initializer_Normal:
+Normal
+-------------------------------
+.. py:attribute:: paddle.fluid.initializer.Normal
+:alias_main: paddle.nn.initializer.Normal
+:alias: paddle.nn.initializer.Normal
+:old_api: paddle.fluid.initializer.Normal
+``NormalInitializer`` 的别名
--- a/doc/paddle/api/paddle/fluid/initializer/NumpyArrayInitializer_cn.rst
+++ b/doc/paddle/api/paddle/fluid/initializer/NumpyArrayInitializer_cn.rst
+.. _cn_api_fluid_initializer_NumpyArrayInitializer:
+NumpyArrayInitializer
+-------------------------------
+.. py:class:: paddle.fluid.initializer.NumpyArrayInitializer(value)
+该OP使用Numpy型数组来初始化参数变量。
+参数：
+        - **value** （numpy） - 用于初始化变量的一个Numpy型数组。
+返回：张量（Tensor）
+返回类型：变量（Variable）
+**代码示例**
+.. code-block:: python
+    import paddle.fluid as fluid
+    import numpy
+    x1 = fluid.data(name="x1", shape=[2, 1], dtype='float32')
+    fc = fluid.layers.fc(input=x1, size=10,
+        param_attr=fluid.initializer.NumpyArrayInitializer(numpy.array([1,2])))
--- a/doc/paddle/api/paddle/nn/initializer/TruncatedNormal_cn.rst
+++ b/doc/paddle/api/paddle/nn/initializer/TruncatedNormal_cn.rst
--- a/doc/paddle/api/paddle/fluid/initializer/Uniform_cn.rst
+++ b/doc/paddle/api/paddle/fluid/initializer/Uniform_cn.rst
+.. _cn_api_fluid_initializer_Uniform:
+Uniform
+-------------------------------
+.. py:attribute:: paddle.fluid.initializer.Uniform
+:alias_main: paddle.nn.initializer.Uniform
+:alias: paddle.nn.initializer.Uniform
+:old_api: paddle.fluid.initializer.Uniform
+``UniformInitializer`` 的别名
--- a/doc/paddle/api/paddle/nn/initializer/Xavier_cn.rst
+++ b/doc/paddle/api/paddle/nn/initializer/Xavier_cn.rst
--- a/doc/paddle/api/paddle/fluid/initializer/set_global_initializer_cn.rst
+++ b/doc/paddle/api/paddle/fluid/initializer/set_global_initializer_cn.rst
+.. _cn_api_fluid_set_global_initializer:
+set_global_initializer
+-------------------------------
+.. py:function:: paddle.fluid.set_global_initializer(weight_init, bias_init=None)
+该API用于设置Paddle框架中全局的参数初始化方法。该API只对位于其后的代码生效。
+模型参数为模型中的weight和bias统称，在fluid中对应fluid.Parameter类，继承自fluid.Variable，是一种可持久化的variable。
+该API的设置仅对模型参数生效，对通过 :ref:`cn_api_fluid_layers_create_global_var` 、 :ref:`cn_api_fluid_layers_create_tensor` 等API创建的变量不会生效。
+如果创建网络层时还通过 ``param_attr`` 、 ``bias_attr`` 设置了初始化方式，这里的全局设置将不会生效，因为其优先级更低。
+参数：
+    - **weight_init** (Initializer) - 设置框架的全局的weight参数初始化方法。
+    - **bias_init** (Initializer，可选) - 设置框架的全局的bias参数初始化方法。默认：None。
+返回：无
+**代码示例**
+.. code-block:: python
+    import paddle.fluid as fluid
+    fluid.set_global_initializer(fluid.initializer.Uniform(), fluid.initializer.Constant())
+    x = fluid.data(name="x", shape=[1, 3, 32, 32])
+    # conv1的weight参数是通过Uniform来初始化
+    # conv1的bias参数是通过Constant来初始化
+    conv1 = fluid.layers.conv2d(x, 5, 3)
+    # 如果同时设置了param_attr/bias_attr, 全局初始化将不会生效
+    # conv2的weight参数是通过Xavier来初始化
+    # conv2的bias参数是通过Normal来初始化
+    conv2 = fluid.layers.conv2d(conv1, 5, 3, 
+        param_attr=fluid.initializer.Xavier(), 
+        bias_attr=fluid.initializer.Normal())
+    # 取消全局参数初始化的设置
+    fluid.set_global_initializer(None)
\ No newline at end of file
--- a/doc/paddle/api/paddle/static/nn/embedding_cn.rst
+++ b/doc/paddle/api/paddle/static/nn/embedding_cn.rst
--- a/doc/paddle/api/paddle/fluid/io/PyReader_cn.rst
+++ b/doc/paddle/api/paddle/fluid/io/PyReader_cn.rst
+.. _cn_api_fluid_io_PyReader:
+PyReader
+-------------------------------
+.. py:class:: paddle.fluid.io.PyReader(feed_list=None, capacity=None, use_double_buffer=True, iterable=True, return_list=False)
+在python中为数据输入创建一个reader对象。将使用python线程预取数据，并将其异步插入队列。当调用Executor.run（…）时，将自动提取队列中的数据。 
+参数:
+    - **feed_list** (list(Variable)|tuple(Variable)) - feed变量列表，由 ``fluid.layers.data()`` 创建。
+    - **capacity** (int) - PyReader对象内部维护队列的容量大小。单位是batch数量。若reader读取速度较快，建议设置较大的capacity值。
+    - **use_double_buffer** (bool) - 是否使用 ``double_buffer_reader`` 。若use_double_buffer=True，PyReader会异步地预读取下一个batch的数据，可加速数据读取过程，但同时会占用少量的CPU/GPU存储，即一个batch输入数据的存储空间。
+    - **iterable** (bool) - 所创建的DataLoader对象是否可迭代。
+    - **return_list** (bool) - 每个设备上的数据是否以list形式返回。仅在iterable = True模式下有效。若return_list = False，每个设备上的返回数据均是str -> LoDTensor的映射表，其中映射表的key是每个输入变量的名称。若return_list = True，则每个设备上的返回数据均是list(LoDTensor)。推荐在静态图模式下使用return_list = False，在动态图模式下使用return_list = True。
+返回: 被创建的reader对象
+返回类型： reader (Reader)
+**代码示例**
+1.如果iterable=False，则创建的PyReader对象几乎与 ``fluid.layers.py_reader（）`` 相同。算子将被插入program中。用户应该在每个epoch之前调用 ``start（）`` ，并在epoch结束时捕获 ``Executor.run（）`` 抛出的 ``fluid.core.EOFException`` 。一旦捕获到异常，用户应该调用 ``reset（）`` 手动重置reader。
+.. code-block:: python
+    import paddle
+    import paddle.fluid as fluid
+    import numpy as np
+    EPOCH_NUM = 3
+    ITER_NUM = 5
+    BATCH_SIZE = 3
+    def network(image, label):
+        # 用户定义网络，此处以softmax回归为例
+        predict = fluid.layers.fc(input=image, size=10, act='softmax')
+        return fluid.layers.cross_entropy(input=predict, label=label) 
+    def reader_creator_random_image_and_label(height, width):
+        def reader():
+            for i in range(ITER_NUM):
+                fake_image = np.random.uniform(low=0,
+                                               high=255,
+                                               size=[height, width])
+                fake_label = np.ones([1])
+                yield fake_image, fake_label
+        return reader
+    image = fluid.layers.data(name='image', shape=[784, 784], dtype='float32')
+    label = fluid.layers.data(name='label', shape=[1], dtype='int64')
+    reader = fluid.io.PyReader(feed_list=[image, label],
+                               capacity=4,
+                               iterable=False)
+    user_defined_reader = reader_creator_random_image_and_label(784, 784)
+    reader.decorate_sample_list_generator(
+        paddle.batch(user_defined_reader, batch_size=BATCH_SIZE))
+    loss = network(image, label)
+    executor = fluid.Executor(fluid.CPUPlace())
+    executor.run(fluid.default_startup_program())
+    for i in range(EPOCH_NUM):
+        reader.start()
+        while True:
+            try:
+                executor.run(feed=None)
+            except fluid.core.EOFException:
+                reader.reset()
+                break
+2.如果iterable=True，则创建的PyReader对象与程序分离。程序中不会插入任何算子。在本例中，创建的reader是一个python生成器，它是可迭代的。用户应将从PyReader对象生成的数据输入 ``Executor.run(feed=...)`` 。
+.. code-block:: python
+   import paddle
+   import paddle.fluid as fluid
+   import numpy as np
+   EPOCH_NUM = 3
+   ITER_NUM = 5
+   BATCH_SIZE = 10
+   def network(image, label):
+        # 用户定义网络，此处以softmax回归为例
+        predict = fluid.layers.fc(input=image, size=10, act='softmax')
+        return fluid.layers.cross_entropy(input=predict, label=label)   
+   def reader_creator_random_image(height, width):
+       def reader():
+           for i in range(ITER_NUM):
+               fake_image = np.random.uniform(low=0, high=255, size=[height, width]),
+               fake_label = np.ones([1])
+               yield fake_image, fake_label
+       return reader
+   image = fluid.layers.data(name='image', shape=[784, 784], dtype='float32')
+   label = fluid.layers.data(name='label', shape=[1], dtype='int64')
+   reader = fluid.io.PyReader(feed_list=[image, label], capacity=4, iterable=True, return_list=False)
+   user_defined_reader = reader_creator_random_image(784, 784)
+   reader.decorate_sample_list_generator(
+       paddle.batch(user_defined_reader, batch_size=BATCH_SIZE),
+       fluid.core.CPUPlace())
+   loss = network(image, label)
+   executor = fluid.Executor(fluid.CPUPlace())
+   executor.run(fluid.default_startup_program())
+   for _ in range(EPOCH_NUM):
+       for data in reader():
+           executor.run(feed=data, fetch_list=[loss])
+3. return_list=True，返回值将用list表示而非dict，通常用于动态图模式中。
+.. code-block:: python
+    import paddle
+    import paddle.fluid as fluid
+    import numpy as np
+    EPOCH_NUM = 3
+    ITER_NUM = 5
+    BATCH_SIZE = 10
+    def reader_creator_random_image(height, width):
+        def reader():
+            for i in range(ITER_NUM):
+                yield np.random.uniform(low=0, high=255, size=[height, width]), \
+                    np.random.random_integers(low=0, high=9, size=[1])
+        return reader
+    place = fluid.CPUPlace()
+    with fluid.dygraph.guard(place):
+        py_reader = fluid.io.PyReader(capacity=2, return_list=True)
+        user_defined_reader = reader_creator_random_image(784, 784)
+        py_reader.decorate_sample_list_generator(
+            paddle.batch(user_defined_reader, batch_size=BATCH_SIZE),
+            place)
+        for image, label in py_reader():
+            relu = fluid.layers.relu(image)
+.. py:method:: start()
+启动数据输入线程。只能在reader对象不可迭代时调用。
+**代码示例**
+.. code-block:: python
+  import paddle
+  import paddle.fluid as fluid
+  import numpy as np
+  BATCH_SIZE = 10
+  def generator():
+    for i in range(5):
+       yield np.random.uniform(low=0, high=255, size=[784, 784]),
+  image = fluid.layers.data(name='image', shape=[784, 784], dtype='float32')
+  reader = fluid.io.PyReader(feed_list=[image], capacity=4, iterable=False)
+  reader.decorate_sample_list_generator(
+    paddle.batch(generator, batch_size=BATCH_SIZE))
+  executor = fluid.Executor(fluid.CPUPlace())
+  executor.run(fluid.default_startup_program())
+  for i in range(3):
+    reader.start()
+    while True:
+        try:
+            executor.run(feed=None)
+        except fluid.core.EOFException:
+            reader.reset()
+            break
+.. py:method:: reset()
+当 ``fluid.core.EOFException`` 抛出时重置reader对象。只能在reader对象不可迭代时调用。
+**代码示例**
+.. code-block:: python
+            import paddle
+            import paddle.fluid as fluid
+            import numpy as np
+            BATCH_SIZE = 10
+            def generator():
+                for i in range(5):
+                    yield np.random.uniform(low=0, high=255, size=[784, 784]),
+            image = fluid.layers.data(name='image', shape=[784, 784], dtype='float32')
+            reader = fluid.io.PyReader(feed_list=[image], capacity=4, iterable=False)
+            reader.decorate_sample_list_generator(
+                paddle.batch(generator, batch_size=BATCH_SIZE))
+            executor = fluid.Executor(fluid.CPUPlace())
+            executor.run(fluid.default_startup_program())
+            for i in range(3):
+                reader.start()
+                while True:
+                    try:
+                        executor.run(feed=None)
+                    except fluid.core.EOFException:
+                        reader.reset()
+                        break
+.. py:method:: decorate_sample_generator(sample_generator, batch_size, drop_last=True, places=None)
+设置PyReader对象的数据源。
+提供的 ``sample_generator`` 应该是一个python生成器，它生成的数据类型应为list(numpy.ndarray)。
+当PyReader对象可迭代时，必须设置 ``places`` 。
+如果所有的输入都没有LOD，这个方法比 ``decorate_sample_list_generator(paddle.batch(sample_generator, ...))`` 更快。
+参数:
+  - **sample_generator** (generator)  – Python生成器，yield 类型为list(numpy.ndarray)
+  - **batch_size** (int) – batch size，必须大于0
+  - **drop_last** (bool) – 当样本数小于batch数量时，是否删除最后一个batch
+  - **places** (None|list(CUDAPlace)|list(CPUPlace)) –  位置列表。当PyReader可迭代时必须被提供
+**代码示例**
+.. code-block:: python
+            import paddle.fluid as fluid
+            import numpy as np
+            EPOCH_NUM = 3
+            ITER_NUM = 15
+            BATCH_SIZE = 3
+            def network(image, label):
+                # 用户定义网络，此处以softmax回归为例
+                predict = fluid.layers.fc(input=image, size=10, act='softmax')
+                return fluid.layers.cross_entropy(input=predict, label=label)    
+            def random_image_and_label_generator(height, width):
+                def generator():
+                    for i in range(ITER_NUM):
+                        fake_image = np.random.uniform(low=0,
+                                                       high=255,
+                                                       size=[height, width])
+                        fake_label = np.array([1])
+                        yield fake_image, fake_label
+                return generator
+            image = fluid.layers.data(name='image', shape=[784, 784], dtype='float32')
+            label = fluid.layers.data(name='label', shape=[1], dtype='int64')
+            reader = fluid.io.PyReader(feed_list=[image, label], capacity=4, iterable=True)
+            user_defined_generator = random_image_and_label_generator(784, 784)
+            reader.decorate_sample_generator(user_defined_generator,
+                                             batch_size=BATCH_SIZE,
+                                             places=[fluid.CPUPlace()])
+            loss = network(image, label)
+            executor = fluid.Executor(fluid.CPUPlace())
+            executor.run(fluid.default_startup_program())
+            for _ in range(EPOCH_NUM):
+                for data in reader():
+                    executor.run(feed=data, fetch_list=[loss])
+.. py:method:: decorate_sample_list_generator(reader, places=None)
+设置PyReader对象的数据源。
+提供的 ``reader`` 应该是一个python生成器，它生成列表（numpy.ndarray）类型的批处理数据。
+当PyReader对象不可迭代时，必须设置 ``places`` 。
+参数:
+  - **reader** (generator)  – 返回列表（numpy.ndarray）类型的批处理数据的Python生成器
+  - **places** (None|list(CUDAPlace)|list(CPUPlace)) –  位置列表。当PyReader可迭代时必须被提供
+**代码示例**
+.. code-block:: python
+            import paddle
+            import paddle.fluid as fluid
+            import numpy as np
+            EPOCH_NUM = 3
+            ITER_NUM = 15
+            BATCH_SIZE = 3
+            def network(image, label):
+                # 用户定义网络，此处以softmax回归为例
+                predict = fluid.layers.fc(input=image, size=10, act='softmax')
+                return fluid.layers.cross_entropy(input=predict, label=label)
+            def random_image_and_label_generator(height, width):
+                def generator():
+                    for i in range(ITER_NUM):
+                        fake_image = np.random.uniform(low=0,
+                                                       high=255,
+                                                       size=[height, width])
+                        fake_label = np.ones([1])
+                        yield fake_image, fake_label
+                return generator
+            image = fluid.layers.data(name='image', shape=[784, 784], dtype='float32')
+            label = fluid.layers.data(name='label', shape=[1], dtype='int64')
+            reader = fluid.io.PyReader(feed_list=[image, label], capacity=4, iterable=True)
+            user_defined_generator = random_image_and_label_generator(784, 784)
+            reader.decorate_sample_list_generator(
+                paddle.batch(user_defined_generator, batch_size=BATCH_SIZE),
+                fluid.core.CPUPlace())
+            loss = network(image, label)
+            executor = fluid.Executor(fluid.core.CPUPlace())
+            executor.run(fluid.default_startup_program())
+            for _ in range(EPOCH_NUM):
+                for data in reader():
+                    executor.run(feed=data, fetch_list=[loss])
+.. py:method:: decorate_batch_generator(reader, places=None)
+设置PyReader对象的数据源。
+提供的 ``reader`` 应该是一个python生成器，它生成列表（numpy.ndarray）类型或LoDTensor类型的批处理数据。
+当PyReader对象不可迭代时，必须设置 ``places`` 。
+参数:
+  - **reader** (generator)  – 返回LoDTensor类型的批处理数据的Python生成器
+  - **places** (None|list(CUDAPlace)|list(CPUPlace)) –  位置列表。当PyReader可迭代时必须被提供
+**代码示例**
+.. code-block:: python
+            import paddle.fluid as fluid
+            import numpy as np
+            EPOCH_NUM = 3
+            ITER_NUM = 15
+            BATCH_SIZE = 3
+            def network(image, label):
+                # 用户定义网络，此处以softmax回归为例
+                predict = fluid.layers.fc(input=image, size=10, act='softmax')
+                return fluid.layers.cross_entropy(input=predict, label=label)
+            def random_image_and_label_generator(height, width):
+                def generator():
+                    for i in range(ITER_NUM):
+                        batch_image = np.random.uniform(low=0,
+                                                        high=255,
+                                                        size=[BATCH_SIZE, height, width])
+                        batch_label = np.ones([BATCH_SIZE, 1])
+                        batch_image = batch_image.astype('float32')
+                        batch_label = batch_label.astype('int64')
+                        yield batch_image, batch_label
+                return generator
+            image = fluid.layers.data(name='image', shape=[784, 784], dtype='float32')
+            label = fluid.layers.data(name='label', shape=[1], dtype='int64')
+            reader = fluid.io.PyReader(feed_list=[image, label], capacity=4, iterable=True)
+            user_defined_generator = random_image_and_label_generator(784, 784)
+            reader.decorate_batch_generator(user_defined_generator, fluid.CPUPlace())
+            loss = network(image, label)
+            executor = fluid.Executor(fluid.CPUPlace())
+            executor.run(fluid.default_startup_program())
+            for _ in range(EPOCH_NUM):
+                for data in reader():
+                    executor.run(feed=data, fetch_list=[loss])
+.. py:method:: next()
+获取下一个数据。用户不应直接调用此方法。此方法用于PaddlePaddle框架内部实现Python 2.x的迭代器协议。
--- a/doc/paddle/api/paddle/fluid/io/get_program_parameter_cn.rst
+++ b/doc/paddle/api/paddle/fluid/io/get_program_parameter_cn.rst
+.. _cn_api_fluid_io_get_program_parameter:
+get_program_parameter
+-------------------------------
+.. py:function:: paddle.fluid.io.get_program_parameter(program)
+:api_attr: 声明式编程模式（静态图)
+该接口从Program中获取所有参数。
+参数:
+ - **program**  ( :ref:`cn_api_fluid_Program` ) – 从该Program中获取参数。
+返回: 包含此Program中所有参数的list
+返回类型: list
+**代码示例**
+.. code-block:: python
+    import paddle.fluid as fluid
+    data = fluid.data(name="img", shape=[64, 784])
+    w = fluid.layers.create_parameter(shape=[784, 200], dtype='float32', name='fc_w')
+    b = fluid.layers.create_parameter(shape=[200], dtype='float32', name='fc_b')
+    list_para  = fluid.io.get_program_parameter(  fluid.default_main_program() )
--- a/doc/paddle/api/paddle/fluid/io/get_program_persistable_vars_cn.rst
+++ b/doc/paddle/api/paddle/fluid/io/get_program_persistable_vars_cn.rst
+.. _cn_api_fluid_io_get_program_persistable_vars:
+get_program_persistable_vars
+-------------------------------
+.. py:function:: paddle.fluid.io.get_program_persistable_vars(program)
+:api_attr: 声明式编程模式（静态图)
+该接口从Program中获取所有persistable的变量。
+参数:
+ - **program**  ( :ref:`cn_api_fluid_Program` ) – 从该Program中获取persistable的变量。
+返回: 包含此Program中所有persistable的变量
+返回类型: list
+**代码示例**
+.. code-block:: python
+    import paddle.fluid as fluid
+    data = fluid.data(name="img", shape=[64, 784])
+    w = fluid.layers.create_parameter(shape=[784, 200], dtype='float32', name='fc_w')
+    b = fluid.layers.create_parameter(shape=[200], dtype='float32', name='fc_b')
+    list_para  = fluid.io.get_program_persistable_vars(  fluid.default_main_program() )
--- a/doc/paddle/api/paddle/load_cn.rst
+++ b/doc/paddle/api/paddle/load_cn.rst
--- a/doc/paddle/api/paddle/fluid/io/load_params_cn.rst
+++ b/doc/paddle/api/paddle/fluid/io/load_params_cn.rst
+.. _cn_api_fluid_io_load_params:
+load_params
+-------------------------------
+.. py:function:: paddle.fluid.io.load_params(executor, dirname, main_program=None, filename=None)
+:api_attr: 声明式编程模式（静态图)
+该接口从指定的 ``main_program`` 中筛选出所有模型参数变量，并根据目录 ``dirname``  或 ``filename`` 提供的参数文件对这些模型参数进行赋值。
+使用 ``dirname`` 指定模型参数的存储路径。若模型参数变量以分离文件的形式存储在 ``dirname`` 指定的目录下，则设置 ``filename`` 值为None；若所有模型参数存储在一个单独的二进制文件中，则使用 ``filename`` 来指明这个二进制文件。
+注意：
+  - 有些变量不是参数，如学习率、全局训练步数（global step）等，但它们之于训练却是必要的。因此，调用 :ref:`cn_api_fluid_io_save_params` 和 :ref:`cn_api_fluid_io_load_params` 来保存和加载参数对于断点训练是不够的，这种情况下可以使用 :ref:`cn_api_fluid_io_save_persistables` 和 :ref:`cn_api_fluid_io_load_persistables` 来保存和加载训练过程的检查点（checkpoint）。
+  - 若希望同时加载预训练后的模型结构和模型参数以用于预测过程，则可使用 :ref:`cn_api_fluid_io_load_inference_model` 接口。更多细节请参考 :ref:`api_guide_model_save_reader` 。
+参数:
+    - **executor**  (Executor) – 加载模型参数的 ``executor`` （详见 :ref:`api_guide_executor` ） 。
+    - **dirname**  (str) – 模型参数的存储路径。
+    - **main_program**  (Program，可选) – 筛选模型参数变量所依据的 ``Program`` （详见 :ref:`api_guide_Program` ）。若为None, 则使用全局默认的  ``default_main_program`` 。默认值为None。
+    - **filename**  (str，可选) – 若模型参数是以若干文件形式存储在 ``dirname`` 指定的目录下，则设置 ``filename`` 值为None。反之，需要通过 ``filename`` 来指明单一模型参数存储文件的名称。 默认值为None。
+**返回:** 无
+**代码示例**
+.. code-block:: python
+    import paddle.fluid as fluid
+    exe = fluid.Executor(fluid.CPUPlace())
+    param_path = "./my_paddle_model"
+    prog = fluid.default_main_program()
+    fluid.io.load_params(executor=exe, dirname=param_path,
+                        main_program=None)
--- a/doc/paddle/api/paddle/fluid/io/load_persistables_cn.rst
+++ b/doc/paddle/api/paddle/fluid/io/load_persistables_cn.rst
+.. _cn_api_fluid_io_load_persistables:
+load_persistables
+-------------------------------
+.. py:function:: paddle.fluid.io.load_persistables(executor, dirname, main_program=None, filename=None)
+:api_attr: 声明式编程模式（静态图)
+该接口从给定的 ``main_program`` 中取出所有 ``persistable==True`` 的变量（即持久性变量，详见 :ref:`api_guide_model_save_reader` ），并根据目录 ``dirname``  或 ``filename`` 提供的参数文件对这些持久性变量进行赋值。
+使用 ``dirname`` 指定持久性变量的存储路径。若持久性变量以分离文件的形式保存在 ``dirname`` 指定的目录下，则设置 ``filename`` 值为None；若所有持久性变量保存在一个单独的二进制文件中，则使用 ``filename`` 来指明这个二进制文件。
+参数:
+    - **executor**  (Executor) – 加载持久性变量的 ``executor`` （详见 :ref:`api_guide_executor` ） 。
+    - **dirname**  (str) – 持久性变量的存储路径。
+    - **main_program**  (Program，可选) – 筛选模型中持久性变量所依据的 ``Program`` （详见 :ref:`api_guide_Program` ）。若为None, 则使用全局默认的  ``default_main_program`` 。默认值为None。
+    - **filename**  (str，可选) – 若模型中的持久性变量是以若干文件形式存储在 ``dirname`` 指定的目录下，则设置 ``filename`` 值为None。反之，需要通过 ``filename`` 来指明单一模型持久性变量存储文件的名称。 默认值为None。
+**返回：** 无
+**代码示例**
+.. code-block:: python
+    import paddle.fluid as fluid
+    exe = fluid.Executor(fluid.CPUPlace())
+    param_path = "./my_paddle_model"
+    prog = fluid.default_main_program()
+    fluid.io.load_persistables(executor=exe, dirname=param_path,
+                               main_program=None)
--- a/doc/paddle/api/paddle/fluid/io/load_vars_cn.rst
+++ b/doc/paddle/api/paddle/fluid/io/load_vars_cn.rst
+.. _cn_api_fluid_io_load_vars:
+load_vars
+-------------------------------
+.. py:function:: paddle.fluid.io.load_vars(executor, dirname, main_program=None, vars=None, predicate=None, filename=None)
+:api_attr: 声明式编程模式（静态图)
+该接口从文件中加载 ``Program`` 的变量。
+通过 ``vars`` 指定需要加载的变量，或者通过 ``predicate`` 筛选需要加载的变量， ``vars`` 和 ``predicate`` 不能同时为None。
+参数:
+ - **executor**  (Executor) – 运行的执行器，执行器的介绍请参考 :ref:`api_guide_model_save_reader` 。
+ - **dirname**  (str) – 加载变量所在的目录路径。
+ - **main_program**  (Program，可选) – 需要加载变量的 ``Program`` ， ``Program`` 的介绍请参考 :ref:`api_guide_Program` 。如果 ``main_program`` 为None，则使用默认的主程序。默认值为None。
+ - **vars**  (list[Variable]，可选) –  通过该列表指定需要加载的变量。默认值为None。
+ - **predicate**  (function，可选) – 通过该函数筛选 :math:`predicate(variable)== True` 的变量进行加载。如果通过 ``vars`` 指定了需要加载的变量，则该参数无效。默认值为None。
+ - **filename**  (str，可选) – 加载所有变量的文件。如果所有待加载变量是保存在一个文件中，则设置 ``filename`` 为该文件名；如果所有待加载变量是按照变量名称单独保存成文件，则设置 ``filename`` 为None。默认值为None。
+返回： 无
+抛出异常：
+  - ``TypeError`` - 如果main_program不是Program的实例，也不是None。
+**代码示例**
+.. code-block:: python
+    import paddle.fluid as fluid
+    main_prog = fluid.Program()
+    startup_prog = fluid.Program()
+    with fluid.program_guard(main_prog, startup_prog):
+        data = fluid.layers.data(name="img", shape=[64, 784], append_batch_size=False)
+        w = fluid.layers.create_parameter(shape=[784, 200], dtype='float32', name='fc_w')
+        b = fluid.layers.create_parameter(shape=[200], dtype='float32', name='fc_b')
+        hidden_w = fluid.layers.matmul(x=data, y=w)
+        hidden_b = fluid.layers.elementwise_add(hidden_w, b)
+    place = fluid.CPUPlace()
+    exe = fluid.Executor(place)
+    exe.run(startup_prog)
+    # 示例一：用vars来指定加载变量。
+    path = "./my_paddle_vars"
+    var_list = [w, b]
+    fluid.io.save_vars(executor=exe, dirname=path, vars=var_list,
+                       filename="vars_file")
+    fluid.io.load_vars(executor=exe, dirname=path, vars=var_list,
+                       filename="vars_file")
+    # 加载w和b。它们被保存在'var_file'的文件中，所在路径为 "./my_paddle_model" 。
+    # 示例二：通过predicate来筛选加载变量。
+    def name_has_fc(var):
+        res = "fc" in var.name
+        return res
+    param_path = "./my_paddle_model"
+    fluid.io.save_vars(executor=exe, dirname=param_path, main_program=main_prog, vars=None, predicate=name_has_fc)
+    fluid.io.load_vars(executor=exe, dirname=param_path, main_program=main_prog, vars=None, predicate=name_has_fc)
+    #加载 `main_program` 中变量名包含 ‘fc’ 的所有变量
+    #此前所有变量应该保存在不同文件中
--- a/doc/paddle/api/paddle/fluid/io/save_params_cn.rst
+++ b/doc/paddle/api/paddle/fluid/io/save_params_cn.rst
+.. _cn_api_fluid_io_save_params:
+save_params
+-------------------------------
+.. py:function:: paddle.fluid.io.save_params(executor, dirname, main_program=None, filename=None)
+:api_attr: 声明式编程模式（静态图)
+该OP从 ``main_program`` 中取出所有参数，然后将它们保存到 ``dirname`` 目录下或名为 ``filename`` 的文件中。
+``dirname`` 用于指定保存参数的目标路径。若想将参数保存到多个独立文件中，设置 ``filename=None`` ； 若想将所有参数保存在单个文件中，请设置 ``filename`` 来指定该文件的名称。
+注意：
+   - 有些变量不是参数，如学习率，全局训练步数（global step）等，但它们对于训练却是必要的。因此，调用 :ref:`cn_api_fluid_io_save_params` 和 :ref:`cn_api_fluid_io_load_params` 来保存和加载参数对于断点训练是不够的，这种情况下可以使用 :ref:`cn_api_fluid_io_save_persistables` 和 :ref:`cn_api_fluid_io_load_persistables` 来保存和加载训练过程中的检查点（checkpoint）。
+   - 如果您想要储存您的模型用于预测，请使用 :ref:`cn_api_fluid_io_save_inference_model` 。更多细节请参考 :ref:`api_guide_model_save_reader` 
+参数:
+ - **executor**  (Executor) – 用于保存参数的 ``executor`` ，详见 :ref:`api_guide_executor` 。
+ - **dirname**  (str) – 指定保存参数的文件目录。
+ - **main_program**  (Program，可选) – 需要保存参数的Program（ ``Program`` 含义详见 :ref:`api_guide_Program` ）。如果为None，则使用default_main_Program 。默认值为None。
+ - **filename**  (str，可选) – 保存参数的文件名称。若需要将参数保存到多个独立的文件中，请设置 ``filename=None`` 。默认值为None。
+返回: 无
+**代码示例**
+.. code-block:: python
+    import paddle.fluid as fluid
+    params_path = "./my_paddle_model"
+    image = fluid.layers.data(name='img', shape=[1, 28, 28], dtype='float32')
+    label = fluid.layers.data(name='label', shape=[1], dtype='int64')
+    feeder = fluid.DataFeeder(feed_list=[image, label], place=fluid.CPUPlace())
+    predict = fluid.layers.fc(input=image, size=10, act='softmax')
+    loss = fluid.layers.cross_entropy(input=predict, label=label)
+    avg_loss = fluid.layers.mean(loss)
+    exe = fluid.Executor(fluid.CPUPlace())
+    exe.run(fluid.default_startup_program())
+    fluid.io.save_params(executor=exe, dirname=params_path)
+    # 网络中fc层的参数weight和bias将会分别存储在"./my_paddle_model"路径下。                    
--- a/doc/paddle/api/paddle/fluid/io/save_persistables_cn.rst
+++ b/doc/paddle/api/paddle/fluid/io/save_persistables_cn.rst
+.. _cn_api_fluid_io_save_persistables:
+save_persistables
+-------------------------------
+.. py:function:: paddle.fluid.io.save_persistables(executor, dirname, main_program=None, filename=None)
+:api_attr: 声明式编程模式（静态图)
+该OP从给定 ``main_program`` 中取出所有持久性变量（详见 :ref:`api_guide_model_save_reader` ），然后将它们保存到目录 ``dirname`` 中或 ``filename`` 指定的文件中。
+``dirname`` 用于指定保存持久性变量的目录。如果想将持久性变量保存到指定目录的若干文件中，请设置 ``filename=None`` ; 若想将所有持久性变量保存在同一个文件中，请设置 ``filename`` 来指定文件的名称。
+参数:
+ - **executor**  (Executor) – 用于保存持久性变量的 ``executor`` ，详见 :ref:`api_guide_executor` 。
+ - **dirname**  (str) – 用于储存持久性变量的文件目录。
+ - **main_program**  (Program，可选) – 需要保存持久性变量的Program（ ``Program`` 含义详见 :ref:`api_guide_Program` ）。如果为None，则使用default_main_Program 。默认值为None。
+ - **filename**  (str，可选) – 保存持久性变量的文件名称。若想分开保存变量，设置 ``filename=None`` 。 默认值为None。
+返回: 无
+**代码示例**
+.. code-block:: python
+    import paddle.fluid as fluid
+    dir_path = "./my_paddle_model"
+    file_name = "persistables"
+    image = fluid.layers.data(name='img', shape=[1, 28, 28], dtype='float32')
+    label = fluid.layers.data(name='label', shape=[1], dtype='int64')
+    feeder = fluid.DataFeeder(feed_list=[image, label], place=fluid.CPUPlace())
+    predict = fluid.layers.fc(input=image, size=10, act='softmax')
+    loss = fluid.layers.cross_entropy(input=predict, label=label)
+    avg_loss = fluid.layers.mean(loss)
+    exe = fluid.Executor(fluid.CPUPlace())
+    exe.run(fluid.default_startup_program())
+    fluid.io.save_persistables(executor=exe, dirname=dir_path, filename=file_name)
+    # 网络中fc层中的持久性变量weight和bia将会保存在路径“./my_paddle_model”下名为"persistables"的文件中。
--- a/doc/paddle/api/paddle/fluid/io/save_vars_cn.rst
+++ b/doc/paddle/api/paddle/fluid/io/save_vars_cn.rst
+.. _cn_api_fluid_io_save_vars:
+save_vars
+-------------------------------
+.. py:function:: paddle.fluid.io.save_vars(executor, dirname, main_program=None, vars=None, predicate=None, filename=None)
+:api_attr: 声明式编程模式（静态图)
+该接口将 ``Program`` 的变量保存到文件中。
+通过 ``vars`` 指定需要保存的变量，或者通过 ``predicate`` 筛选需要保存的变量， ``vars`` 和 ``predicate`` 不能同时为None。
+参数：
+      - **executor** （Executor）- 运行的执行器，执行器的介绍请参考 :ref:`api_guide_model_save_reader` 。
+      - **dirname** （str）- 保存变量的目录路径。
+      - **main_program** （Program，可选）- 需要保存变量的 ``Program`` ， ``Program`` 的介绍请参考 :ref:`api_guide_Program` 。如果 ``main_program`` 为None，则使用默认的主程序。默认值为None。
+      - **vars** （list [Variable]，可选）- 通过该列表指定需要保存的变量。默认值为None。
+      - **predicate** （function，可选）- 通过该函数筛选 :math:`predicate(variable)== True` 的变量进行保存。如果通过 ``vars`` 指定了需要保存的变量，则该参数无效。默认值为None。
+      - **filename** （str，可选）- 保存所有变量的文件。如果设置为None，所有变量会按照变量名称单独保存成文件；如果设置为非None，所有变量会保存成一个文件名为该设置值的文件。默认值为None。
+返回：无    
+抛出异常：
+    - ``TypeError`` - 如果main_program不是Program的实例，也不是None。
+**代码示例**
+.. code-block:: python
+      import paddle.fluid as fluid
+      main_prog = fluid.Program()
+      startup_prog = fluid.Program()
+      with fluid.program_guard(main_prog, startup_prog):
+          data = fluid.layers.data(name="img", shape=[64, 784], append_batch_size=False)
+          w = fluid.layers.create_parameter(shape=[784, 200], dtype='float32', name='fc_w')
+          b = fluid.layers.create_parameter(shape=[200], dtype='float32', name='fc_b')
+          hidden_w = fluid.layers.matmul(x=data, y=w)
+          hidden_b = fluid.layers.elementwise_add(hidden_w, b)
+      place = fluid.CPUPlace()
+      exe = fluid.Executor(place)
+      exe.run(startup_prog)
+      # 示例一：用vars来指定变量。
+      var_list = [w, b]
+      path = "./my_paddle_vars"
+      fluid.io.save_vars(executor=exe, dirname=path, vars=var_list,
+                         filename="vars_file")
+      # w, b 将被保存，使用同一文件名“var_file”，保存在路径“./my_paddle_vars”下。
+      # 示例二：通过predicate筛选变量。
+      def name_has_fc(var):
+          res = "fc" in var.name
+          return res
+      param_path = "./my_paddle_model"
+      fluid.io.save_vars(executor=exe, dirname=param_path, main_program=main_prog, vars=None, predicate = name_has_fc)
+      # 将main_program中名中包含“fc”的的所有变量保存。
+      # 变量将分开保存。
--- a/doc/paddle/api/paddle/shuffle_cn.rst
+++ b/doc/paddle/api/paddle/shuffle_cn.rst
--- a/doc/paddle/api/paddle/fluid/is_compiled_with_cuda_cn.rst
+++ b/doc/paddle/api/paddle/fluid/is_compiled_with_cuda_cn.rst
+.. _cn_api_fluid_is_compiled_with_cuda:
+is_compiled_with_cuda
+-------------------------------
+.. py:function:: paddle.fluid.is_compiled_with_cuda()
+检查 ``whl`` 包是否可以被用来在GPU上运行模型
+返回：支持gpu则为True,否则为False。
+返回类型：out(boolean)
+**示例代码**
+.. code-block:: python
+    import paddle.fluid as fluid
+    support_gpu = fluid.is_compiled_with_cuda()
--- a/doc/paddle/api/paddle/fluid/layers/Categorical_cn.rst
+++ b/doc/paddle/api/paddle/fluid/layers/Categorical_cn.rst
+.. _cn_api_fluid_layers_Categorical:
+Categorical
+-------------------------------
+.. py:class:: paddle.fluid.layers.Categorical(logits)
+类别分布是一种离散概率分布，其随机变量可以取K个相互独立类别的其中一个。
+概率质量函数（pmf）为：
+.. math::
+    pmf(k; p_i) =\prod_{i=1}^{k} p_i^{[x=i]}
+上面公式中:
+  - :math:`[x = i]` 表示：如果 :math:`x==i` ，则表达式取值为1，否则取值为0。
+参数：
+    - **logits** (list|numpy.ndarray|Variable) - 类别分布对应的logits。数据类型为float32。
+**代码示例**：
+.. code-block:: python
+    import numpy as np
+    from paddle.fluid import layers
+    from paddle.fluid.layers import Categorical
+    a_logits_npdata = np.array([-0.602,-0.602], dtype="float32")
+    a_logits_tensor = layers.create_tensor(dtype="float32")
+    layers.assign(a_logits_npdata, a_logits_tensor)
+    b_logits_npdata = np.array([-0.102,-0.112], dtype="float32")
+    b_logits_tensor = layers.create_tensor(dtype="float32")
+    layers.assign(b_logits_npdata, b_logits_tensor)
+    a = Categorical(a_logits_tensor)
+    b = Categorical(b_logits_tensor)
+    a.entropy()
+    # [0.6931472] with shape: [1]
+    b.entropy()
+    # [0.6931347] with shape: [1]
+    a.kl_divergence(b)
+    # [1.2516975e-05] with shape: [1]
+.. py:function:: kl_divergence(other)
+相对于另一个类别分布的KL散度
+参数：
+    - **other** (Categorical) - 输入的另一个类别分布。数据类型为float32。
+返回：相对于另一个类别分布的KL散度, 数据类型为float32
+返回类型：Variable
+.. py:function:: entropy()
+信息熵
+返回：类别分布的信息熵, 数据类型为float32
+返回类型：Variable
--- a/doc/paddle/api/paddle/fluid/layers/Decoder_cn.rst
+++ b/doc/paddle/api/paddle/fluid/layers/Decoder_cn.rst
+.. _cn_api_fluid_layers_Decoder:
+Decoder
+-------------------------------
+.. py:class:: paddle.fluid.layers.Decoder()
+:api_attr: 声明式编程模式（静态图)
+Decoder是dynamic_decode中使用的任何decoder实例的基类。它提供了为每一个时间步生成输出的接口，可用于生成序列。
+Decoder提供的主要抽象为：
+1. :code:`(initial_input, initial_state, finished) = initialize(inits)`，
+为第一个解码步生成输入和状态，并给出指示batch中的每个序列是否结束的初始标识。
+2. :code:`(output, next_state, next_input, finished) = step(time, input, state)`，
+将输入和状态转换为输出和新的状态，为下一个解码步生成输入，并给出指示batch中的每个序列是否结束的标识。
+3. :code:`(final_outputs, final_state) = finalize(outputs, final_state, sequence_lengths)`，
+修改输出（所有时间步输出的堆叠）和最后的状态以做特殊用途。若无需修改堆叠得到的输出和来自最后一个时间步的状态，则无需实现。
+与RNNCell相比，Decoder更为通用，因为返回的 :code:`next_input` 和 :code:`finished` 使它可以自行决定输入以及结束时机。
+.. py:method:: initialize(inits)
+在解码迭代之前调用一次。
+参数：  
+  - **inits** - 调用方提供的参数。 
+返回：一个元组 :code:`(initial_inputs, initial_states, finished)` 。:code:`initial_inputs` 和 :code:`initial_states` 都是单个tensor变量或tensor变量组成的嵌套结构， :code:`finished` 是具有bool数据类型的tensor。
+返回类型：tuple
+.. py:method:: step(time, inputs, states)
+在解码的每个时间步中被调用的接口
+参数：  
+  - **outputs** (Variable) - 单个tensor变量或tensor变量组成的嵌套结构。 结构和数据类型与 :code:`output_dtype` 相同。 tensor堆叠所有时间步长的输出从而具有shape :math:`[time\_step，batch\_size，...]` ，由调用者完成。 
+  - **final_states** (Variable) - 单个tensor变量或tensor变量组成的嵌套结构。 它是 :code:`decoder.step` 在最后一个解码步返回的 :code:`next_states`， 因此具有与任何时间步长的状态相同的结构，形状和数据类型。
+返回：一个元组 :code:`(final_outputs, final_states)` 。:code:`final_outputs` 和 :code:`final_states` 都是单个tensor变量或tensor变量组成的嵌套结构。
+返回类型：tuple
\ No newline at end of file
--- a/doc/paddle/api/paddle/fluid/layers/DynamicRNN_cn.rst
+++ b/doc/paddle/api/paddle/fluid/layers/DynamicRNN_cn.rst
+.. _cn_api_fluid_layers_DynamicRNN:
+DynamicRNN
+===================
+.. py:class:: paddle.fluid.layers.DynamicRNN(name=None)
+:api_attr: 声明式编程模式（静态图)
+**注意：该类型的输入仅支持LoDTensor，如果您需要处理的输入数据是Tensor类型，
+请使用StaticRNN（ fluid.layers.** :ref:`cn_api_fluid_layers_StaticRNN` **)。**
+DynamicRNN可以处理一批序列数据，其中每个样本序列的长度可以不同，每个序列的长度信息记录在LoD里面。
+DynamicRNN会按照时间步 (time step) 将输入序列展开，用户可以在 :code:`block` 中定义每个时间步要进行的运算。
+由于每个输入样本的序列长度不相同，RNN执行的step数由最长的序列决定。
+DynamicRNN的实现采用非padding的方式，每个时间步都会对输入数据进行收缩处理，移除已经处理完的序列的信息。
+因此，随着时间步的增加，每个时间步处理的样本数（batch size）会逐渐减少。
+.. warning::
+  目前不支持在DynamicRNN的 :code:`block` 中任何层上配置 :code:`is_sparse = True` 。
+参数：
+    - **name** (str，可选) - 具体用法参见 :ref:`api_guide_Name` ，一般无需设置，默认值为None。
+成员函数列表：
+    - :ref:`cn_api_fluid_layers_DynamicRNN_step_input` ，设置输入变量
+    - :ref:`cn_api_fluid_layers_DynamicRNN_static_input` ，设置静态输入变量
+    - :ref:`cn_api_fluid_layers_DynamicRNN_block` ，定义每个时间步执行的运算
+    - :ref:`cn_api_fluid_layers_DynamicRNN_memory` ，创建用于在时间步之间传递信息的变量
+    - :ref:`cn_api_fluid_layers_DynamicRNN_update_memory` ，更新需要传递的时间步信息
+    - :ref:`cn_api_fluid_layers_DynamicRNN_output` ，设置时间步的输出变量
+    - :ref:`cn_api_fluid_layers_DynamicRNN_call` ，获取RNN的输出序列
+.. _cn_api_fluid_layers_DynamicRNN_step_input:
+成员函数 step_input
+---------------------------------
+.. py:method:: step_input(x, level=0)
+将序列x设置为DynamicRNN输入。输入序列中最长的序列长度，将决定了RNN运算的长度。
+必须至少为DynamicRNN设置一个输入，也可以设置多个输入。
+如果多个输入x的 :code:`x.lod_level` 都为1，则要求多个输入LoDTensor携带完全相同的LoD信息。
+当输入x的 :code:`x.lod_level >= 2` 时，输入序列将按指定level进行展开，每个时间步携带 :code:`x.lod_level - level - 1` 层LoD信息，
+此时要求多个输入序列的LoD在指定level上的信息完全一样。
+- 示例1
+.. code-block:: text
+    # 输入，其中Si代表维度为[1, N]的数据
+    level = 0
+    x.lod = [[2, 1, 3]]
+    x.shape = [6, N]
+    x.data = [[S0],
+              [S0],
+              [S1],
+              [S2],
+              [S2],
+              [S2]]
+    # 输出
+    # step 0，持有3个序列的time step数据
+    out.lod = [[]]
+    out.shape = [3, N]
+    out.data = [[S2],
+                [S0],
+                [S1]]
+    # step 1，持有2个序列的time step数据
+    out.lod = [[]]
+    out.shape = [2, N]
+    out.data = [[S2],
+                [S0]]
+    # step 2，持有1个序列的time step数据
+    out.lod = [[]]
+    out.shape = [1, N]
+    out.data = [[S2]]
+参数：
+    - **x** (Variable) - 输入序列LoDTensor，代表由长度不同的多个序列组成的minibatch，要求 :code:`x.lod_level >= 1`。输入x第一个维度的值等于minibatch内所有序列的长度之和。RNN有多个输入序列时，多个输入LoDTensor的第一个维度必须相同，其它维度可以不同。支持的数据类型有：bool，float16，float32，float64，int8，int16，int32，int64，uint8。
+    - **level** (int，可选) - 用于拆分输入序列的LoD层级，取值范围是 :math:`[0, x.lod\_level)`，默认值是0。
+返回： 输入序列每个时间步的数据。执行第 :code:`step_idx` 个时间步时，若输入 :code:`x` 中有 :code:`num_sequences` 个长度不小于 :code:`step_idx` 的序列，则这个时间步返回值中只包含了这 :code:`num_sequences` 个序列第 :code:`step_idx` 时间步的数据。数据类型和输入一致。如果 :code:`x.lod_level == 1` ，返回值的维度是 :math:`\{num\_sequences, x.shape[1], ...\}`。否则，返回值也是一个变长的LoDTensor。
+返回类型：Variable
+抛出异常：
+    - :code:`ValueError` ：当 :code:`step_input()` 接口在RNN :code:`block()` 接口外面被调用时。
+    - :code:`TypeError`：当输入x类型不是Variable时。
+**代码示例**
+..  code-block:: python
+      import paddle.fluid as fluid
+      sentence = fluid.data(name='sentence', shape=[None, 1], dtype='int64', lod_level=1)
+      embedding = fluid.layers.embedding(input=sentence, size=[65536, 32], is_sparse=True)
+      drnn = fluid.layers.DynamicRNN()
+      with drnn.block():
+          # 将embedding标记为RNN的输入，每个时间步取句子中的一个字进行处理
+          word = drnn.step_input(embedding)
+          # 将memory初始化为一个值为0的常量Tensor，shape=[batch_size, 200]，其中batch_size由输入embedding决定
+          memory = drnn.memory(shape=[200])
+          hidden = fluid.layers.fc(input=[word, memory], size=200, act='relu')
+          # 用hidden更新memory
+          drnn.update_memory(ex_mem=memory, new_mem=hidden)
+          # 将hidden标记为RNN的输出
+          drnn.output(hidden)
+      # 获得RNN的计算结果
+      rnn_output = drnn()
+.. _cn_api_fluid_layers_DynamicRNN_static_input:
+成员函数 static_input
+---------------------------------
+.. py:method:: static_input(x)
+将变量设置为RNN的静态输入。
+- 示例1，静态输入携带LoD信息
+.. code-block:: text
+    # RNN的输入见step_input中的示例
+    # 静态输入，其中Si代表维度为[1, M]的数据
+    x.lod = [[3, 1, 2]]
+    x.shape = [6, M]
+    x.data = [[S0],
+              [S0],
+              [S0],
+              [S1],
+              [S2],
+              [S2]]
+    # step 0，持有3个序列对应的数据
+    out.lod = [[2, 3, 1]]
+    out.shape = [6, M]
+    out.data = [[S2],
+                [S2],
+                [S0],
+                [S0],
+                [S0],
+                [S1]]
+    # step 1，持有2个序列对应的数据
+    out.lod = [[2, 3]]
+    out.shape = [5, M]
+    out.data = [[S2],
+                [S2],
+                [S0],
+                [S0],
+                [S0]]
+    # step 2，持有1个序列对应的数据
+    out.lod = [[2]]
+    out.shape = [2, M]
+    out.data = [[S2],
+                [S2]]
+- 示例2，静态输入不携带LoD信息
+.. code-block:: text
+    # RNN的输入见step_input中的示例
+    # 静态输入，其中Si代表维度为[1, M]的数据
+    x.lod = [[]]
+    x.shape = [3, M]
+    x.data = [[S0],
+              [S1],
+              [S2]]
+    # step 0，持有3个序列对应的数据
+    out.lod = [[]]
+    out.shape = [3, M]
+    out.data = [[S2],
+                [S0],
+                [S1]]
+    # step 1，持有2个序列对应的数据
+    out.lod = [[]]
+    out.shape = [2, M]
+    out.data = [[S2],
+                [S0]]
+    # step 2，持有1个序列对应的数据
+    out.lod = [[]]
+    out.shape = [1, M]
+    out.data = [[S2]]
+参数:
+    - **x** (Variable) - 静态输入序列LoDTensor，要求持有与输入LoDTensor（通过 :code:`step_input` 设置的输入）相同的序列个数。如果输入x的LoD信息为空，则会被当成由 :code:`x.shape[0]` 个长度为1序列组成。支持的数据类型有：bool，float16，float32，float64，int8，int16，int32，int64，uint8。
+返回: 经过按照RNN输入LoD信息重排序、且收缩处理后的静态输入LoDTensor。执行第 :code:`step_idx` 个时间步时，如果输入序列中只有 :code:`num_sequences` 长度不小于 :code:`step_idx` 的序列，静态输入也会进行收缩处理，只返回对应的 :code:`num_sequences` 个序列对应的数据。数据类型和输入一致。如果 :code:`x.lod == None` ，返回值的维度是 :math:`\{num\_sequences, x.shape[1], ...\}` 。否则，返回值是一个变长的LoDTensor。
+返回类型：Variable
+抛出异常：
+    - :code:`ValueError`：当 :code:`static_input()` 接口在RNN :code:`block()` 接口外面被调用时。
+    - :code:`TypeError`：当输入x类型不是Variable类型时。
+    - :code:`RuntimeError`：当 :code:`static_input()` 接口在 :code:`step_input()` 接口之前被调用时。
+**代码示例**
+..  code-block:: python
+    import paddle.fluid as fluid
+    sentence = fluid.data(name='sentence', shape=[None, 32], dtype='float32', lod_level=1)
+    encoder_proj = fluid.data(name='encoder_proj', shape=[None, 32], dtype='float32', lod_level=1)
+    decoder_boot = fluid.data(name='boot', shape=[None, 10], dtype='float32')
+    drnn = fluid.layers.DynamicRNN()
+    with drnn.block():
+        # 将sentence标记为RNN的输入，每个时间步取句子中的一个字进行处理
+        current_word = drnn.step_input(sentence)
+        # 将encode_proj标记为RNN的静态输入
+        encoder_word = drnn.static_input(encoder_proj)
+        # 使用boot_memory初始化memory，并且需要依据输入序列进行重排序
+        memory = drnn.memory(init=decoder_boot, need_reorder=True)
+        fc_1 = fluid.layers.fc(input=encoder_word, size=30)
+        fc_2 = fluid.layers.fc(input=current_word, size=30)
+        decoder_inputs = fc_1 + fc_2
+        hidden, _, _ = fluid.layers.gru_unit(input=decoder_inputs, hidden=memory, size=30)
+        # 用hidden更新memory
+        drnn.update_memory(ex_mem=memory, new_mem=hidden)
+        out = fluid.layers.fc(input=hidden, size=10, bias_attr=True, act='softmax')
+        # 将out标记为RNN的输出
+        drnn.output(out)
+    # 获得RNN的计算结果
+    rnn_output = drnn()
+.. _cn_api_fluid_layers_DynamicRNN_block:
+成员函数 block
+---------------------------------
+.. py:method:: block()
+定义每个时间步执行的操作。 :code:`block` 语句里面定义的算子序列，将会被执行 :code:`max_sequence_len` 次（ :code:`max_sequence_len` 是输入序列中大的序列长度）。
+抛出异常：
+    - :code:`ValueError`：当RNN :code:`block()` 接口被多次调用时。
+.. _cn_api_fluid_layers_DynamicRNN_memory:
+成员函数 memory
+---------------------------------
+.. py:method:: memory(init=None, shape=None, value=0.0, need_reorder=False, dtype='float32')
+为RNN创建一个memory变量，用于在时间步之间传递信息。
+它可以用一个已有的Tensor来初始化，也可以初始化为一个特定维度的常量Tensor。
+参数：
+    - **init** (Variable，可选) – 设置memory初始值的LoDTensor。如果init不是None，将使用init来初始化memory，要求持有与输入LoDTensor（通过 :code:`step_input` 设置的输入）相同的序列个数。如果输入init的LoD信息为空，则会被当成由 :code:`init.shape[0]` 个长度为1序列组成。默认值是None。
+    - **shape** (list|tuple，可选) – 当init是None时，用来设置memory的维度。注意：shape中不包含batch_size。若设置 :math:`shape=\{D_1, D_2, ...\}`，memory Tensor的实际维度为 :math:`\{batch\_size, D_1, D_2, ...\}`，其中batch_size由输入序列决定。默认值是None。
+    - **value** (float，可选) – 当init是None时，用来设置memory的初始值。默认值是0.0。
+    - **need_reorder** (bool，可选) – 当init不是None时，用来决定init是否需要重新排序。动态RNN在计算时，会按照输入LoDTensor中序列的长度对输入进行排序，因此当init中的信息与输入序列样本紧密关联时，需要设置 :code:`need_reorder=True`。默认值是False。
+    - **dtype** (str|numpy.dtype，可选) – 当init是None是，初始化memory的数据类型。默认值是"float32"。可设置的字符串值有："float32"，"float64"，"int32"，"int64"。
+返回：经过收缩处理后的memory LoDTensor。执行第 :code:`step_idx` 个时间步时，如果输入序列中只有 :code:`num_sequences` 长度不小于 :code:`step_idx` 的序列，memory也会进行收缩处理，只返回对应的 :code:`num_sequences` 个序列对应的数据。
+返回类型：Variable
+抛出异常：
+    - :code:`ValueError`：当 :code:`memory()` 接口在RNN :code:`block()` 接口外面被调用时。
+    - :code:`TypeError`：当init被设置了，但是不是Variable类型时。
+    - :code:`ValueError`：当 :code:`memory()` 接口在 :code:`step_input()` 接口之前被调用时。
+**代码示例一**
+..  code-block:: python
+    import paddle.fluid as fluid
+    sentence = fluid.data(name='sentence', shape=[None, 32], dtype='float32', lod_level=1)
+    boot_memory = fluid.data(name='boot', shape=[None, 10], dtype='float32')
+    drnn = fluid.layers.DynamicRNN()
+    with drnn.block():
+        # 将sentence标记为RNN的输入，每个时间步取句子中的一个字进行处理
+        word = drnn.step_input(sentence)
+        # 使用boot_memory初始化memory，并且需要依据输入序列进行重排序
+        memory = drnn.memory(init=boot_memory, need_reorder=True)
+        hidden = fluid.layers.fc(input=[word, memory], size=10, act='tanh')
+        # 用hidden更新memory
+        drnn.update_memory(ex_mem=memory, new_mem=hidden)
+        # 将hidden标记为RNN的输出
+        drnn.output(hidden)
+    # 获得RNN的计算结果
+    rnn_output = drnn()
+**代码示例二**
+..  code-block:: python
+    import paddle.fluid as fluid
+    sentence = fluid.data(name='sentence', shape=[None, 32], dtype='float32', lod_level=1)
+    drnn = fluid.layers.DynamicRNN()
+    with drnn.block():
+        # 将sentence标记为RNN的输入，每个时间步取句子中的一个字进行处理
+        word = drnn.step_input(sentence)
+        # 将memory初始化为一个值为0的常量Tensor，shape=[batch_size, 10]，其中batch_size由输入sentence决定
+        memory = drnn.memory(shape=[10], dtype='float32', value=0)
+        hidden = fluid.layers.fc(input=[word, memory], size=10, act='tanh')
+        # 用hidden更新memory
+        drnn.update_memory(ex_mem=memory, new_mem=hidden)
+        # 将hidden标记为RNN的输出
+        drnn.output(hidden)
+    # 获得RNN的计算结果
+    rnn_output = drnn()
+.. _cn_api_fluid_layers_DynamicRNN_update_memory:
+成员函数 update_memory
+---------------------------------
+.. py:method:: update_memory(ex_mem, new_mem)
+将需要在时间步之间传递的信息更新。
+参数：
+  - **ex_mem** (Variable) - 上一个时间步的信息。
+  - **new_mem** (Variable) - 新的时间步信息。:code:`new_mem` 的维度和数据类型必须与 :code:`ex_mem` 一致。
+返回：无
+抛出异常：
+    - :code:`ValueError`：当 :code:`update_memory()` 接口在RNN :code:`block()` 接口外面被调用时。
+    - :code:`TypeError`：当 :code:`ex_mem` 或 :code:`new_mem` 不是Variable类型时。
+    - :code:`ValueError`：当 :code:`ex_mem` 不是使用 :code:`memory()` 接口定义的memory时。
+    - :code:`ValueError`：当 :code:`update_memory()` 接口在 :code:`step_input()` 接口之前被调用时。
+.. _cn_api_fluid_layers_DynamicRNN_output:
+成员函数 output
+---------------------------------
+.. py:method:: output(*outputs)
+设置outputs为RNN每个时间步的输出变量。
+参数：
+    - **\*outputs** (Variable ...) - 输出Tensor，可同时将多个Variable标记为输出。
+返回：无
+抛出异常：
+    - :code:`ValueError`：当 :code:`output()` 接口在RNN :code:`block()` 接口外面被调用时。
+.. _cn_api_fluid_layers_DynamicRNN_call:
+成员函数 __call__
+---------------------------------
+.. py:method:: __call__()
+获取RNN计算的输出序列。
+若定义了 :code:`drnn = DynamicRNN()`，则可以调用 :code:`drnn()` 获得输出序列，该输出序列是通过将每一个时间步的output数据合并得到的一个LoDTensor。
+当RNN的输入x（通过 :code:`step_input()` 接口设置）的 :code:`x.lod_level` 为1时，该输出LoDTensor将会和输入x持有完全相同的LoD信息。
+通过 :code:`drnn()` 获取的RNN输出LoDTensor中包含了所有时间步的计算结果，可调用 :ref:`cn_api_fluid_layers_sequence_last_step` 获取最后一个时间步的计算结果。
+参数：
+    无
+返回：RNN的输出序列。
+返回类型：Variable或Variable list
+抛出异常：
+    - :code:`ValueError` ：当 :code:`__call__()` 接口在RNN :code:`block()` 定义之前被调用时。
+**代码示例**
+..  code-block:: python
+    import paddle.fluid as fluid
+    sentence = fluid.data(name='sentence', shape=[None, 32], dtype='float32', lod_level=1)
+    encoder_proj = fluid.data(name='encoder_proj', shape=[None, 32], dtype='float32', lod_level=1)
+    decoder_boot = fluid.data(name='boot', shape=[None, 10], dtype='float32')
+    drnn = fluid.layers.DynamicRNN()
+    with drnn.block():
+        # 将sentence标记为RNN的输入，每个时间步取句子中的一个字进行处理
+        current_word = drnn.step_input(sentence)
+        # 将encode_proj标记为RNN的静态输入
+        encoder_word = drnn.static_input(encoder_proj)
+        # 使用boot_memory初始化memory，并且需要依据输入序列进行重排序
+        memory = drnn.memory(init=decoder_boot, need_reorder=True)
+        fc_1 = fluid.layers.fc(input=encoder_word, size=30)
+        fc_2 = fluid.layers.fc(input=current_word, size=30)
+        decoder_inputs = fc_1 + fc_2
+        hidden, _, _ = fluid.layers.gru_unit(input=decoder_inputs, hidden=memory, size=30)
+        # 用hidden更新memory
+        drnn.update_memory(ex_mem=memory, new_mem=hidden)
+        out = fluid.layers.fc(input=hidden, size=10, bias_attr=True, act='softmax')
+        # 将hidden和out标记为RNN的输出
+        drnn.output(hidden, out)
+    # 获得RNN的计算结果
+    hidden, out = drnn()
+    # 提取RNN最后一个时间步的计算结果
+    last = fluid.layers.sequence_last_step(out)
--- a/doc/paddle/api/paddle/fluid/layers/GRUCell_cn.rst
+++ b/doc/paddle/api/paddle/fluid/layers/GRUCell_cn.rst
+.. _cn_api_fluid_layers_GRUCell:
+GRUCell
+-------------------------------
+.. py:class:: paddle.fluid.layers.GRUCell(hidden_size, param_attr=None, bias_attr=None, gate_activation=None, activation=None, dtype="float32", name="GRUCell")
+:api_attr: 声明式编程模式（静态图)
+门控循环单元（Gated Recurrent Unit）。通过对 :code:`fluid.contrib.layers.rnn_impl.BasicGRUUnit` 包装，来让它可以应用于RNNCell。
+公式如下：
+.. math::
+    u_t & = act_g(W_{ux}x_{t} + W_{uh}h_{t-1} + b_u)\\
+    r_t & = act_g(W_{rx}x_{t} + W_{rh}h_{t-1} + b_r)\\
+    \tilde{h_t} & = act_c(W_{cx}x_{t} + W_{ch}(r_t \odot h_{t-1}) + b_c)\\
+    h_t & = u_t \odot h_{t-1} + (1-u_t) \odot \tilde{h_t}
+更多细节可以参考 `Learning Phrase Representations using RNN Encoder Decoder for Statistical Machine Translation <https://arxiv.org/pdf/1406.1078.pdf>`_
+参数：
+  - **hidden_size** (int) - GRUCell中的隐藏层大小。 
+  - **param_attr** (ParamAttr，可选) - 指定权重参数属性的对象。默认值为None，表示使用默认的权重参数属性。具体用法请参见 :ref:`cn_api_fluid_ParamAttr`。
+  - **bias_attr** (ParamAttr，可选) - 指定偏置参数属性的对象。默认值为None，表示使用默认的偏置参数属性。具体用法请参见 :ref:`cn_api_fluid_ParamAttr` 。 
+  - **gate_activation** (function，可选) - :math:`act_g` 的激活函数。 默认值为 :code:`fluid.layers.sigmoid`。 
+  - **activation** (function，可选) - :math:`act_c` 的激活函数。 默认值为 :code:`fluid.layers.tanh` 
+  - **dtype** (string，可选) - 此cell中使用的数据类型。 默认为"float32"。 
+  - **name** (string，可选) - 用于标识参数和偏差的名称域。
+返回：GRUCell类的实例对象。
+**示例代码**
+..  code-block:: python 
+    import paddle.fluid.layers as layers
+    cell = layers.GRUCell(hidden_size=256)
+.. py:method:: call(inputs, states)
+执行GRU的计算。 
+参数：
+  - **input** (Variable) - 输入，形状为 :math:`[batch\_size，input\_size]` 的tensor，对应于公式中的 :math:`x_t` 。数据类型应为float32。 
+  - **states** (Variable) - 状态，形状为 :math:`[batch\_size，hidden\_size]` 的tensor。 对应于公式中的 :math:`h_{t-1}` 。数据类型应为float32。 
+返回：一个元组 :code:`(outputs, new_states)` ，其中 :code:`outputs` 和 :code:`new_states` 是同一个tensor，其形状为 :math:`[batch\_size，hidden\_size]`，数据类型和 :code:`state` 的数据类型相同，对应于公式中的 :math:`h_t`。
+返回类型：tuple
+.. py:method:: state_shape()
+GRUCell的 :code:`state_shape` 是形状 :math:`[hidden\_size]` （batch大小为-1，自动插入到形状中），对应于 :math:`h_{t-1}` 的形状。
+参数：无。
+返回：GRUCell的 :code:`state_shape`。
+返回类型：Variable
--- a/doc/paddle/api/paddle/fluid/layers/IfElse_cn.rst
+++ b/doc/paddle/api/paddle/fluid/layers/IfElse_cn.rst
+.. _cn_api_fluid_layers_IfElse:
+IfElse
+-------------------------------
+.. py:class:: paddle.fluid.layers.IfElse(cond, name=None)
+:api_attr: 声明式编程模式（静态图)
+该类用于实现IfElse分支控制功能， IfElse包含两个Block，true_block，false_block，IfElse会将满足True或False条件的数据分别放入不同的block运行。    
+cond是一个shape为[N, 1]、数据类型为bool的2-D tensor，表示输入数据对应部分的执行条件。
+.. note::
+    如果参数 ``cond`` 的形状为[1]，强烈建议您使用新的OP :ref:`cn_api_fluid_layers_cond` 而不是 ``IfElse``。
+    OP :ref:`cn_api_fluid_layers_cond` 的使用方式更简单，并且调用该OP所用的代码更少且功能与 ``IfElse`` 一样。
+IfElse OP同其他的OP在使用上有一定的区别，可能会对一些用户造成一定的困惑，以下展示了一个
+简单的样例对该OP进行说明。
+.. code-block:: python
+        # 以下代码完成的功能：对x中大于0的数据减去10，对x中小于0的数据加上10，并将所有的数据求和
+        import numpy as np
+        import paddle.fluid as fluid
+        x = fluid.layers.data(name='x', shape=[4, 1], dtype='float32', append_batch_size=False)
+        y = fluid.layers.data(name='y', shape=[4, 1], dtype='float32', append_batch_size=False)
+        x_d = np.array([[3], [1], [-2], [-3]]).astype(np.float32)
+        y_d = np.zeros((4, 1)).astype(np.float32)
+        # 比较x, y对元素的大小，输出cond, cond是shape为[4, 1]，数据类型为bool的2-D tensor。
+        # 根据输入数据x_d, y_d，可以推断出cond中的数据为[[true], [true], [false], [false]]
+        cond = fluid.layers.greater_than(x, y)
+        # 同其他常见OP不同的是，该OP返回的ie是一个IfElse OP的对象
+        ie = fluid.layers.IfElse(cond)
+        with ie.true_block():
+            # 在这个block中，根据cond条件，获取x中对应条件为true维度的数据，并减去10
+            out_1 = ie.input(x)
+            out_1 = out_1 - 10
+            ie.output(out_1)
+        with ie.false_block():
+            # 在这个block中，根据cond条件，获取x中对应条件为false维度的数据，并加上10
+            out_1 = ie.input(x)
+            out_1 = out_1 + 10
+            ie.output(out_1)
+        # 根据cond条件将两个block中处理后的数据进行合并，此处的output为输出，类型为List，List中的元素类型为Variable。
+        output = ie() #  [array([[-7.], [-9.], [ 8.], [ 7.]], dtype=float32)] 
+        # 将输出List中的第一个Variable获取出来，并计算所有元素和
+        out = fluid.layers.reduce_sum(output[0])
+        exe = fluid.Executor(fluid.CPUPlace())
+        exe.run(fluid.default_startup_program())
+        res = exe.run(fluid.default_main_program(), feed={"x":x_d, "y":y_d}, fetch_list=[out])
+        print(res)
+        # [array([-1.], dtype=float32)] 
+参数：
+    - **cond** (Variable)- cond是一个shape为[N, 1]、数据类型为bool的2-D tensor，表示N个输入数据的对应的执行条件。数据类型为bool。
+    - **Name** (str，可选)- 具体用法请参见 :ref:`api_guide_Name` ，一般无需设置，默认值为None。
+**返回：**    
+同其他常见OP不同的是，该OP调用返回一个IfElse OP对象(如例子中的 ie)，通过调用对象内部函数 ``true_block()`` ， ``false_block()`` ， ``input()`` ， ``output()`` 对输入数据进行分支处理，
+通过调用内部的 ``__call__()`` 函数，将不同分支处理的数据进行整合，作为整体的输出，输出类型为列表，列表中每个元素的类型为Variable。
+**内部函数：**
+- 通过调用对象中的 ``with ie.true_block()`` 函数构建block，将条件为true下的计算逻辑放入此block中。如果没有构建相应的block，则对应条件维度下的输入数据不做改变。            
+- 通过调用对象中的 ``with ie.false_block()`` 函数构建block，将条件为false下的计算逻辑放入此block中。如果没有构建相应的block，则对应条件维度下的输入数据不做改变。                 
+- ``out = ie.input(x)`` 会将x中对应条件维度的数据获取出来放入到out中，支持block内部处理多个输入。                   
+- ``ie.output(out)`` 会将结果写入对应条件的输出中。                    
+- 对象内部有 ``__call__()`` 函数，即通过对 ``output = ie()`` 的调用，将条件分别为True，False的block内部所有的输出进行融合作为整体的输出，输出的类型为列表，列表中每个元素的类型为Variable。                
--- a/doc/paddle/api/paddle/fluid/layers/LSTMCell_cn.rst
+++ b/doc/paddle/api/paddle/fluid/layers/LSTMCell_cn.rst
+.. _cn_api_fluid_layers_LSTMCell:
+LSTMCell
+-------------------------------
+.. py:class:: paddle.fluid.layers.LSTMCell(hidden_size, param_attr=None, bias_attr=None, gate_activation=None, activation=None, forget_bias=1.0, dtype="float32", name="LSTMCell")
+:api_attr: 声明式编程模式（静态图)
+长短期记忆单元（Long-Short Term Memory）。通过对 :code:`fluid.contrib.layers.rnn_impl.BasicLSTMUnit` 包装，来让它可以应用于RNNCell。    
+公式如下：
+.. math:: 
+    i_{t} &= act_g \left ( W_{x_{i}}x_{t}+W_{h_{i}}h_{t-1}+b_{i} \right ) \\
+    f_{t} &= act_g \left ( W_{x_{f}}x_{t}+W_{h_{f}}h_{t-1}+b_{f}+forget\_bias \right ) \\
+    c_{t} &= f_{t}c_{t-1}+i_{t}act_h\left ( W_{x_{c}}x_{t} +W_{h_{c}}h_{t-1}+b_{c}\right ) \\
+    o_{t} &= act_g\left ( W_{x_{o}}x_{t}+W_{h_{o}}h_{t-1}+b_{o} \right ) \\
+    h_{t} &= o_{t}act_h \left ( c_{t} \right )
+更多细节可以参考 `RECURRENT NEURAL NETWORK REGULARIZATION <http://arxiv.org/abs/1409.2329>`_  
+参数：
+  - **hidden_size** (int) - LSTMCell中的隐藏层大小。
+  - **param_attr** (ParamAttr，可选) - 指定权重参数属性的对象。默认值为None，表示使用默认的权重参数属性。具体用法请参见 :ref:`cn_api_fluid_ParamAttr`。
+  - **bias_attr** (ParamAttr，可选) - 指定偏置参数属性的对象。默认值为None，表示使用默认的偏置参数属性。具体用法请参见 :ref:`cn_api_fluid_ParamAttr` 。 
+  - **gate_activation** (function，可选) - :math:`act_g` 的激活函数。 默认值为 :code:`fluid.layers.sigmoid`。 
+  - **activation** (function，可选) - :math:`act_c` 的激活函数。 默认值为 :code:`fluid.layers.tanh`。
+  - **forget_bias** (float，可选) - 计算遗忘们时使用的遗忘偏置。默认值为 1.0。
+  - **dtype** (string，可选) - 此Cell中使用的数据类型。 默认值为 `float32`。 
+  - **name** (string，可选) - 用于标识参数和偏差的名称域。
+返回：LSTMCell类的实例对象。
+**示例代码**
+.. code-block:: python
+    import paddle.fluid.layers as layers
+    cell = layers.LSTMCell(hidden_size=256)
+.. py:method:: call(inputs, states)
+执行GRU的计算。 
+参数：
+  - **input** (Variable) - 输入，形状为 :math:`[batch\_size，input\_size]` 的tensor，对应于公式中的 :math:`x_t`。数据类型应为float32。 
+  - **states** (Variable) - 状态，包含两个tensor的列表，每个tensor形状为 :math:`[batch\_size，hidden\_size]`。 对应于公式中的 :math:`h_{t-1}, c_{t-1}`。数据类型应为float32。 
+返回：一个元组 :code:`(outputs, new_states)`，其中 :code:`outputs` 是形状为 :math:`[batch\_size，hidden\_size]` 的tensor，对应于公式中的 :math:`h_{t}`；:code:`new_states` 是一个列表，包含形状为 :math:`[batch_size，hidden_size]` 的两个tensor变量，它们对应于公式中的 :math:`h_{t}, c_{t}`。这些tensor的数据类型都与 :code:`state` 的数据类型相同。
+返回类型：tuple
+.. py:method:: state_shape()
+LSTMCell的 :code:`state_shape` 是一个具有两个形状的列表：:math:`[[hidden\_size], [hidden\_size]]` （batch大小为-1，自动插入到形状中）。 这两个形状分别对应于公式中的 :math:`h_{t-1}` and :math:`c_{t-1}`。
+参数：无。
+返回：LSTMCell的 :code:`state_shape` 
+返回类型：list
--- a/doc/paddle/api/paddle/fluid/layers/MultivariateNormalDiag_cn.rst
+++ b/doc/paddle/api/paddle/fluid/layers/MultivariateNormalDiag_cn.rst
+.. _cn_api_fluid_layers_MultivariateNormalDiag:
+MultivariateNormalDiag
+-------------------------------
+.. py:class:: paddle.fluid.layers.MultivariateNormalDiag(loc, scale)
+多元高斯分布
+概率密度函数（pdf）为：
+.. math::
+    pdf(x; loc, scale) = \frac{e^{-\frac{||y||^2}{2}}}{Z}
+    y = inv(scale) @ (x - loc)
+    Z = (2\pi )^{0.5k} |det(scale)|
+上面公式中:
+  - :math:`inv` 表示： 对矩阵求逆
+  - :math:`@` 表示：矩阵相乘
+  - :math:`det` 表示：求行列式的值
+参数：
+    - **loc** (list|numpy.ndarray|Variable) - 形状为 :math:`[k]` 的多元高斯分布的均值列表。数据类型为float32。
+    - **scale** (list|numpy.ndarray|Variable) - 形状为 :math:`[k, k]` 的多元高斯分布的对角协方差矩阵，且除对角元素外，其他元素取值均为0。数据类型为float32。
+**代码示例**：
+.. code-block:: python
+    import numpy as np
+    from paddle.fluid import layers
+    from paddle.fluid.layers import MultivariateNormalDiag
+    a_loc_npdata = np.array([0.3,0.5],dtype="float32")
+    a_loc_tensor = layers.create_tensor(dtype="float32")
+    layers.assign(a_loc_npdata, a_loc_tensor)
+    a_scale_npdata = np.array([[0.4,0],[0,0.5]],dtype="float32")
+    a_scale_tensor = layers.create_tensor(dtype="float32")
+    layers.assign(a_scale_npdata, a_scale_tensor)
+    b_loc_npdata = np.array([0.2,0.4],dtype="float32")
+    b_loc_tensor = layers.create_tensor(dtype="float32")
+    layers.assign(b_loc_npdata, b_loc_tensor)
+    b_scale_npdata = np.array([[0.3,0],[0,0.4]],dtype="float32")
+    b_scale_tensor = layers.create_tensor(dtype="float32")
+    layers.assign(b_scale_npdata, b_scale_tensor)
+    a = MultivariateNormalDiag(a_loc_tensor, a_scale_tensor)
+    b = MultivariateNormalDiag(b_loc_tensor, b_scale_tensor)
+    a.entropy()
+    # [2.033158] with shape: [1]
+    b.entropy()
+    # [1.7777451] with shaoe: [1]
+    a.kl_divergence(b)
+    # [0.06542051] with shape: [1]
+.. py:function:: kl_divergence(other)
+计算相对于另一个多元高斯分布的KL散度
+参数：
+    - **other** (MultivariateNormalDiag) - 输入的另一个多元高斯分布。数据类型为float32。
+返回：相对于另一个多元高斯分布的KL散度，数据类型为float32
+返回类型：Variable
+.. py:function:: entropy()
+信息熵
+返回：多元高斯分布的信息熵，数据类型为float32
+返回类型：Variable
--- a/doc/paddle/api/paddle/fluid/layers/Normal_cn.rst
+++ b/doc/paddle/api/paddle/fluid/layers/Normal_cn.rst
+.. _cn_api_fluid_initializer_Normal:
+Normal
+-------------------------------
+.. py:attribute:: paddle.fluid.initializer.Normal
+:alias_main: paddle.nn.initializer.Normal
+:alias: paddle.nn.initializer.Normal
+:old_api: paddle.fluid.initializer.Normal
+``NormalInitializer`` 的别名
--- a/doc/paddle/api/paddle/fluid/layers/RNNCell_cn.rst
+++ b/doc/paddle/api/paddle/fluid/layers/RNNCell_cn.rst
+.. _cn_api_fluid_layers_RNNCell:
+RNNCell
+-------------------------------
+.. py:class:: paddle.fluid.layers.RNNCell(name=None)
+:api_attr: 声明式编程模式（静态图)
+RNNCell是抽象的基类，代表将输入和状态映射到输出和新状态的计算，主要用于RNN。
+.. py:method:: call(inputs, states, **kwargs)
+每个cell都必须实现此接口，将（输入和状态）映射到（输出和新状态）。为了更灵活，输入和状态都可以是单个tensor变量或嵌套结构的tensor变量（列表 | 元组 | 命名元组 | 字典）。
+参数：
+  - **inputs** - 输入，为单个tensor变量或tensor变量组成的嵌套结构。
+  - **states** - 状态，单个tensor变量或tensor变量组成的嵌套结构。
+  - **kwargs** - 附加的关键字参数，由调用者提供。
+返回：输出和新状态。输出和新状态都可以是嵌套的tensor变量。新状态必须具有与状态相同的结构。
+返回类型：tuple
+.. py:method:: get_initial_states(batch_ref, shape=None, dtype=None, init_value=0)
+该接口根据提供的形状，数据类型和初始值来初始化状态。
+参数：
+  - **batch_ref** - 单个tensor变量或tensor组成的嵌套结构。 tensor的第一维将用作初始化状态的batch大小。 
+  - **shape** - 单个形状或形状组成的嵌套结构，单个形状是整数的列表或元组。 如果形状的第一维不是batch大小，则自动插入-1作为batch大小。 如果该项为None，将使用属性 :code:`state_shape`。默认值为None。 
+  - **dtype** - 单个数据类型或由数据类型组成的嵌套结构。该结构必须与shape的结构相同，例外是当状态中的所有tensor都具有相同的数据类型，这时可以使用单个数据类型。 如果是None并且属性 :code:`cell.state_shape` 不可用，则float32将用作数据类型。 默认值为None。 
+  - **init_value** - 用于初始化状态的浮点值。
+返回：和shape具有相同结构的tensor变量，代表初始状态。
+返回类型：Variable
+.. py:method:: state_shape()
+该接口用于初始化cell的状态。 单个形状或由形状组成的嵌套结构，单个形状可以是整数的列表或元组(如果形状的第一维不是batch大小，则自动插入-1作为batch大小)。 当没有使用 :code:`get_initial_states` 初始化状态或 :code:`get_initial_states` 没有提供 :code:`shape` 参数的时候，不用实现该方法。
+.. py:method:: state_dtype()
+该接口用于初始化cell的状态。 单个数据类型或由数据类型组成的嵌套结构，该结构必须与 :code:`shape` 的结构相同，例外是当状态中的所有tensor都具有相同的数据类型，这时可以使用单个数据类型。 当没有使用 :code:`get_initial_states` 初始化状态或 :code:`get_initial_states` 没有提供 :code:`dtype` 参数的时候，不用实现该方法。
--- a/doc/paddle/api/paddle/fluid/layers/StaticRNN_cn.rst
+++ b/doc/paddle/api/paddle/fluid/layers/StaticRNN_cn.rst
+.. _cn_api_fluid_layers_StaticRNN:
+StaticRNN
+-------------------------------
+.. py:class:: paddle.fluid.layers.StaticRNN(name=None)
+:api_attr: 声明式编程模式（静态图)
+该OP用来处理一批序列数据，其中每个样本序列的长度必须相等。StaticRNN将序列按照时间步长展开，用户需要定义每个时间步中的处理逻辑。
+参数：
+  - **name** (str，可选) - 具体用法请参见 :ref:`api_guide_Name` ，一般无需设置，默认值为None。
+**代码示例**
+.. code-block:: python
+      import paddle.fluid as fluid
+      import paddle.fluid.layers as layers
+      vocab_size, hidden_size=10000, 200
+      x = layers.data(name="x", shape=[-1, 1, 1], dtype='int64')
+      # 创建处理用的word sequence
+      x_emb = layers.embedding(
+          input=x,
+          size=[vocab_size, hidden_size],
+          dtype='float32',
+          is_sparse=False)
+      # 把batch size变换到第1维。
+      x_emb = layers.transpose(x_emb, perm=[1, 0, 2])
+      rnn = fluid.layers.StaticRNN()
+      with rnn.step():
+          # 将刚才创建的word sequence标记为输入，每个时间步取一个word处理。
+          word = rnn.step_input(x_emb)
+          # 创建memory变量作为prev，batch size来自于word变量。
+          prev = rnn.memory(shape=[-1, hidden_size], batch_ref = word)
+          hidden = fluid.layers.fc(input=[word, prev], size=hidden_size, act='relu')
+          # 用处理完的hidden变量更新prev变量。
+          rnn.update_memory(prev, hidden)
+          # 把每一步处理后的hidden标记为输出序列。
+          rnn.step_output(hidden)
+      # 获取最终的输出结果
+      result = rnn()
+.. py:method:: step()
+定义在每个时间步执行的操作。step用在with语句中，with语句中定义的OP会被执行sequence_len次(sequence_len是输入序列的长度)。
+.. py:method:: memory(init=None, shape=None, batch_ref=None, init_value=0.0, init_batch_dim_idx=0, ref_batch_dim_idx=1)
+为静态RNN创建一个内存变量。
+如果init不为None，则用init将初始化memory。 如果init为None，则必须设置shape和batch_ref，函数会使用shape和batch_ref创建新的Variable来初始化init。
+参数：
+  - **init** (Variable，可选) - 用来初始化memory的Tensor。如果没有设置，则必须提供shape和batch_ref参数。默认值None。
+  - **shape** (list|tuple) - 当init为None时用来设置memory的维度，注意不包括batch_size。默认值None。
+  - **batch_ref** (Variable，可选) - 当init为None时，memory变量的batch size会设置为该batch_ref变量的ref_batch_dim_idx轴。默认值None。
+  - **init_value** (float，可选) - 当init为None时用来设置memory的初始值，默认值0.0。
+  - **init_batch_dim_idx** (int，可选) - init变量的batch_size轴，默认值0。
+  - **ref_batch_dim_idx** (int，可选) - batch_ref变量的batch_size轴，默认值1。
+返回：返回创建的memory变量。
+返回类型；Variable
+**代码示例一**
+.. code-block:: python
+      import paddle.fluid as fluid
+      import paddle.fluid.layers as layers
+      vocab_size, hidden_size=10000, 200
+      x = layers.data(name="x", shape=[-1, 1, 1], dtype='int64')
+      # 创建处理用的word sequence
+      x_emb = layers.embedding(
+          input=x,
+          size=[vocab_size, hidden_size],
+          dtype='float32',
+          is_sparse=False)
+      # 把batch size变换到第1维。
+      x_emb = layers.transpose(x_emb, perm=[1, 0, 2])
+      rnn = fluid.layers.StaticRNN()
+      with rnn.step():
+          # 将刚才创建的word sequence标记为输入，每个时间步取一个word处理。
+          word = rnn.step_input(x_emb)
+          # 创建memory变量作为prev，batch size来自于word变量。
+          prev = rnn.memory(shape=[-1, hidden_size], batch_ref = word)
+          hidden = fluid.layers.fc(input=[word, prev], size=hidden_size, act='relu')
+          # 用处理完的hidden变量更新prev变量。
+          rnn.update_memory(prev, hidden)
+**代码示例二**
+.. code-block:: python
+      import paddle.fluid as fluid
+      import paddle.fluid.layers as layers
+      vocab_size, hidden_size=10000, 200
+      x = layers.data(name="x", shape=[-1, 1, 1], dtype='int64')
+      # 创建处理用的word sequence
+      x_emb = layers.embedding(
+          input=x,
+          size=[vocab_size, hidden_size],
+          dtype='float32',
+          is_sparse=False)
+      # 把batch size变换到第1维。
+      x_emb = layers.transpose(x_emb, perm=[1, 0, 2])
+      boot_memory = fluid.layers.data(name='boot', shape=[hidden_size], dtype='float32', lod_level=1)
+      rnn = fluid.layers.StaticRNN()
+      with rnn.step():
+          # 将刚才创建的word sequence标记为输入，每个时间步取一个word处理。
+          word = rnn.step_input(x_emb)
+          # 用init初始化memory。
+          prev = rnn.memory(init=boot_memory)
+          hidden = fluid.layers.fc(input=[word, prev], size=hidden_size, act='relu')
+          # 用处理完的hidden变量更新prev变量。
+          rnn.update_memory(prev, hidden)
+.. py:method:: step_input(x)
+标记StaticRNN的输入序列。
+参数：
+  - **x** (Variable) – 输入序列，x的形状应为[seq_len, ...]。
+返回：输入序列中当前时间步的数据。
+返回类型：Variable
+**代码示例**
+.. code-block:: python
+      import paddle.fluid as fluid
+      import paddle.fluid.layers as layers
+      vocab_size, hidden_size=10000, 200
+      x = layers.data(name="x", shape=[-1, 1, 1], dtype='int64')
+      # 创建处理用的word sequence
+      x_emb = layers.embedding(
+          input=x,
+          size=[vocab_size, hidden_size],
+          dtype='float32',
+          is_sparse=False)
+      # 把batch size变换到第1维。
+      x_emb = layers.transpose(x_emb, perm=[1, 0, 2])
+      rnn = fluid.layers.StaticRNN()
+      with rnn.step():
+          # 将刚才创建的word sequence标记为输入，每个时间步取一个word处理。
+          word = rnn.step_input(x_emb)
+          # 创建memory变量作为prev，batch size来自于word变量。
+          prev = rnn.memory(shape=[-1, hidden_size], batch_ref = word)
+          hidden = fluid.layers.fc(input=[word, prev], size=hidden_size, act='relu')
+          # 用处理完的hidden变量更新prev变量。
+          rnn.update_memory(prev, hidden)
+.. py:method:: step_output(o)
+标记StaticRNN输出的序列。
+参数：
+  -**o** (Variable) – 输出序列
+返回：无
+**代码示例**
+.. code-block:: python
+      import paddle.fluid as fluid
+      import paddle.fluid.layers as layers
+      vocab_size, hidden_size=10000, 200
+      x = layers.data(name="x", shape=[-1, 1, 1], dtype='int64')
+      # 创建处理用的word sequence
+      x_emb = layers.embedding(
+          input=x,
+          size=[vocab_size, hidden_size],
+          dtype='float32',
+          is_sparse=False)
+      # 把batch size变换到第1维。
+      x_emb = layers.transpose(x_emb, perm=[1, 0, 2])
+      rnn = fluid.layers.StaticRNN()
+      with rnn.step():
+          # 将刚才创建的word sequence标记为输入，每个时间步取一个word处理。
+          word = rnn.step_input(x_emb)
+          # 创建memory变量作为prev，batch size来自于word变量。
+          prev = rnn.memory(shape=[-1, hidden_size], batch_ref = word)
+          hidden = fluid.layers.fc(input=[word, prev], size=hidden_size, act='relu')
+          # 用处理完的hidden变量更新prev变量。
+          rnn.update_memory(prev, hidden)
+          # 把每一步处理后的hidden标记为输出序列。
+          rnn.step_output(hidden)
+      result = rnn()
+.. py:method:: output(*outputs)
+标记StaticRNN输出变量。
+参数：
+  -**outputs** – 输出Tensor，可同时将多个Variable标记为输出。
+返回：无
+**代码示例**
+.. code-block:: python
+      import paddle.fluid as fluid
+      import paddle.fluid.layers as layers
+      vocab_size, hidden_size=10000, 200
+      x = layers.data(name="x", shape=[-1, 1, 1], dtype='int64')
+      # 创建处理用的word sequence
+      x_emb = layers.embedding(
+          input=x,
+          size=[vocab_size, hidden_size],
+          dtype='float32',
+          is_sparse=False)
+      # 把batch size变换到第1维。
+      x_emb = layers.transpose(x_emb, perm=[1, 0, 2])
+      rnn = fluid.layers.StaticRNN()
+      with rnn.step():
+          # 将刚才创建的word sequence标记为输入，每个时间步取一个word处理。
+          word = rnn.step_input(x_emb)
+          # 创建memory变量作为prev，batch size来自于word变量。
+          prev = rnn.memory(shape=[-1, hidden_size], batch_ref = word)
+          hidden = fluid.layers.fc(input=[word, prev], size=hidden_size, act='relu')
+          # 用处理完的hidden变量更新prev变量。
+          rnn.update_memory(prev, hidden)
+          # 把每一步的hidden和word标记为输出。
+          rnn.output(hidden, word)
+      result = rnn()
+.. py:method:: update_memory(mem, var)
+将memory从mem更新为var。
+参数：    
+  - **mem** (Variable) – memory接口定义的变量。
+  - **var** (Variable) – RNN块中的变量，用来更新memory。var的维度和数据类型必须与mem一致。
+返回：无
+代码示例参考前述示例。
--- a/doc/paddle/api/paddle/fluid/layers/Switch_cn.rst
+++ b/doc/paddle/api/paddle/fluid/layers/Switch_cn.rst
+.. _cn_api_fluid_layers_Switch:
+Switch
+-------------------------------
+.. py:class:: paddle.fluid.layers.Switch (name=None)
+:api_attr: 声明式编程模式（静态图)
+该类用于实现Switch分支控制功能。Switch分支包含多个case分支和一个default分支，Switch控制流会依次检查各case分支条件是否满足，并仅执行第一个满足条件的case分支后面的语句。若不存在满足条件的case分支，则仅执行default分支后面的语句。
+.. note::
+    如果参数 ``cond`` 的形状为[1]，强烈建议您使用新的OP :ref:`cn_api_fluid_layers_case` 而不是 ``Switch``。
+    OP :ref:`cn_api_fluid_layers_case` 的使用方式更简单，并且调用该OP所用的代码更少且功能与 ``Switch`` 一样。
+成员函数：
+    - **case(cond)** - Switch的case分支，其参数cond为bool型的标量Variable。只有当前case分支的cond为True，且之前的case分支的cond均为False，该case分支后的语句才会执行，且不再执行之后的case后的语句。
+    - **default()** - Switch的default分支。当所有case分支的cond均为False时，执行default分支后的语句。
+注意：case和default函数只能用于Switch的scope内部，示例如下：
+..  code-block:: python
+    with fluid.layers.Switch() as switch:
+        with switch.case(cond1):
+            i = fluid.layers.fill_constant(shape=[1], dtype='int64', value=1)
+        with switch.case(cond2):
+            i = fluid.layers.fill_constant(shape=[1], dtype='int64', value=2)
+        with switch.default():
+            i = fluid.layers.fill_constant(shape=[1], dtype='int64', value=0)
+参数：
+    - **name** (str，可选) - 具体用法请参见 :ref:`api_guide_Name` ，一般无需设置，默认值为None。
+**代码示例**
+..  code-block:: python
+    import paddle.fluid as fluid
+    lr = fluid.layers.create_global_var(
+        shape=[1],
+        value=0.0,
+        dtype='float32',
+        persistable=True,
+        name="learning_rate")
+    zero_var = fluid.layers.fill_constant(
+        shape=[1], dtype='float32', value=0.0)
+    one_var = fluid.layers.fill_constant(
+        shape=[1], dtype='float32', value=1.0)
+    two_var = fluid.layers.fill_constant(
+        shape=[1], dtype='float32', value=2.0)
+    # 将参数中的begin设为非0值，则进入Switch的default分支，输出数组中的数字将为2
+    global_step = fluid.layers.autoincreased_step_counter(counter_name='@LR_DECAY_COUNTER@', begin=0, step=1) 
+    with fluid.layers.control_flow.Switch() as switch:
+        with switch.case(global_step == zero_var):
+            fluid.layers.assign(input=one_var, output=lr)
+        with switch.default():
+            fluid.layers.assign(input=two_var, output=lr)
+    exe = fluid.Executor(fluid.CPUPlace())
+    exe.run(fluid.default_startup_program())
+    res = exe.run(fluid.default_main_program(), feed={}, fetch_list=[lr])
+    print(res) # [array([1.], dtype=float32)]
--- a/doc/paddle/api/paddle/fluid/layers/Uniform_cn.rst
+++ b/doc/paddle/api/paddle/fluid/layers/Uniform_cn.rst
+.. _cn_api_fluid_initializer_Uniform:
+Uniform
+-------------------------------
+.. py:attribute:: paddle.fluid.initializer.Uniform
+:alias_main: paddle.nn.initializer.Uniform
+:alias: paddle.nn.initializer.Uniform
+:old_api: paddle.fluid.initializer.Uniform
+``UniformInitializer`` 的别名
--- a/doc/paddle/api/paddle/fluid/layers/While_cn.rst
+++ b/doc/paddle/api/paddle/fluid/layers/While_cn.rst
+.. _cn_api_fluid_layers_While:
+While
+-------------------------------
+.. py:class:: paddle.fluid.layers.While (cond, is_test=False, name=None)
+:api_attr: 声明式编程模式（静态图)
+该类用于实现while循环控制功能，只要循环条件cond为True，就循环执行while循环体中的语句，直到cond为False为止。
+.. note::
+    如果参数 ``cond`` 的形状为[1]，强烈建议您使用新的OP :ref:`cn_api_fluid_layers_while_loop` 而不是 ``While``。
+    OP :ref:`cn_api_fluid_layers_while_loop` 的使用方式更简单，并且调用该OP所用的代码更少且功能与 ``While`` 一样。
+**注意：**
+    在 ``While`` 中创建的局部变量类似于C++中的while，无法被外部引用，因此无法通过 ``Executor`` 中的 ``fetch_list`` 来获取。
+    若想实现该功能，PaddlePaddle提供了 ``assign`` 接口将局部变量赋值到外部，请参考示例代码2 或参考 `issue#22724 <https://github.com/PaddlePaddle/Paddle/issues/22724>`_ 。
+参数：
+    - **cond** (Variable) – 用于判断循环继续进行的条件，为数据类型bool型的Tensor，其shape必须为[1]。
+    - **is_test** (bool，可选) – 用于表明是否在测试阶段执行，默认值为False。
+    - **name** (str，可选) - 具体用法请参见 :ref:`api_guide_Name` ，一般无需设置，默认值为None。
+**代码示例 1**
+.. code-block:: python
+    # 该示例代码展示整数循环+1，循环10次，输出计数结果
+    import paddle.fluid as fluid
+    import numpy as np
+    i = fluid.layers.fill_constant(shape=[1], dtype='int64', value=0)           # 循环计数器
+    loop_len = fluid.layers.fill_constant(shape=[1],dtype='int64', value=10)    # 循环次数
+    cond = fluid.layers.less_than(x=i, y=loop_len)              # 循环条件   
+    while_op = fluid.layers.While(cond=cond)
+    with while_op.block():  # 循环体
+        i = fluid.layers.increment(x=i, value=1, in_place=True)
+        fluid.layers.less_than(x=i, y=loop_len, cond=cond)      # 更新循环条件
+    exe = fluid.Executor(fluid.CPUPlace())
+    exe.run(fluid.default_startup_program())
+    res = exe.run(fluid.default_main_program(), feed={}, fetch_list=[i])
+    print(res) # [array([10])]
+**代码示例 2**
+.. code-block:: python
+    import paddle.fluid as fluid
+    import numpy as np
+    i = fluid.layers.fill_constant(shape=[1], dtype='int64', value=0)
+    loop_len = fluid.layers.fill_constant(shape=[1], dtype='int64', value=10)
+    one = fluid.layers.fill_constant(shape=[1], dtype='float32', value=1)
+    data = fluid.data(name='data', shape=[1], dtype='float32')
+    sums = fluid.layers.fill_constant(shape=[1], dtype='float32', value=0)  # 在 While 外先定义要获取的变量，需和要获取的 While 内部的变量名称不同
+    cond = fluid.layers.less_than(x=i, y=loop_len)
+    while_op = fluid.layers.While(cond=cond)
+    with while_op.block():
+        sums_tensor = fluid.layers.elementwise_add(x=data, y=data)
+        fluid.layers.assign(input=sums_tensor, output=sums)  # 将 While 内定义的变量 sums_tenosr 通过 layers.assign 更新至 While 外的变量 sums 中
+        i = fluid.layers.increment(x=i, value=1, in_place=True)
+        data = fluid.layers.elementwise_add(x=data, y=one)
+        fluid.layers.less_than(x=i, y=loop_len, cond=cond)
+    feed_data = np.ones([1]).astype('float32')
+    exe = fluid.Executor(fluid.CPUPlace())
+    exe.run(fluid.default_startup_program())
+    res = exe.run(fluid.default_main_program(), feed={'data': feed_data}, fetch_list=sums)
+    print(res[0])  # [2.]    # 因 While 内的 data 没有将值更新到 While 外，故循环过后此处 sums 的值为 [2.]
--- a/doc/paddle/api/paddle/abs_cn.rst
+++ b/doc/paddle/api/paddle/abs_cn.rst
--- a/doc/paddle/api/paddle/acos_cn.rst
+++ b/doc/paddle/api/paddle/acos_cn.rst
--- a/doc/paddle/api/paddle/nn/functional/adaptive_pool2d_cn.rst
+++ b/doc/paddle/api/paddle/nn/functional/adaptive_pool2d_cn.rst
--- a/doc/paddle/api/paddle/nn/functional/adaptive_pool3d_cn.rst
+++ b/doc/paddle/api/paddle/nn/functional/adaptive_pool3d_cn.rst
--- a/doc/paddle/api/paddle/nn/functional/add_position_encoding_cn.rst
+++ b/doc/paddle/api/paddle/nn/functional/add_position_encoding_cn.rst
--- a/doc/paddle/api/paddle/nn/functional/affine_channel_cn.rst
+++ b/doc/paddle/api/paddle/nn/functional/affine_channel_cn.rst
--- a/doc/paddle/api/paddle/nn/functional/affine_grid_cn.rst
+++ b/doc/paddle/api/paddle/nn/functional/affine_grid_cn.rst
--- a/doc/paddle/api/paddle/nn/functional/anchor_generator_cn.rst
+++ b/doc/paddle/api/paddle/nn/functional/anchor_generator_cn.rst
--- a/doc/paddle/api/paddle/fluid/layers/argmax_cn.rst
+++ b/doc/paddle/api/paddle/fluid/layers/argmax_cn.rst
+.. _cn_api_fluid_layers_argmax:
+argmax
+-------------------------------
+.. py:function:: paddle.fluid.layers.argmax(x, axis=0)
+**argmax**
+该OP沿 ``axis`` 计算输入 ``x`` 的最大元素的索引。
+参数：
+    - **x** (Variable) - 输入的多维 ``Tensor`` ，支持的数据类型：float32、float64、int8、int16、int32、int64。
+    - **axis** (int，可选) - 指定对输入Tensor进行运算的轴， ``axis`` 的有效范围是[-R, R)，R是输入 ``x`` 的Rank， ``axis`` 为负时与 ``axis`` +R 等价。默认值为0。
+返回： ``Tensor`` ，数据类型int64
+返回类型：Variable
+**代码示例**：
+.. code-block:: python
+  import paddle.fluid as fluid
+  import numpy as np
+  in1 = np.array([[[5,8,9,5],
+                   [0,0,1,7],
+                   [6,9,2,4]],
+                  [[5,2,4,2],
+                   [4,7,7,9],
+                   [1,7,0,6]]])
+  with fluid.dygraph.guard():
+      x = fluid.dygraph.to_variable(in1)
+      out1 = fluid.layers.argmax(x=x, axis=-1)
+      out2 = fluid.layers.argmax(x=x, axis=0)
+      out3 = fluid.layers.argmax(x=x, axis=1)
+      out4 = fluid.layers.argmax(x=x, axis=2)
+      print(out1.numpy())
+      # [[2 3 1]
+      #  [0 3 1]]
+      print(out2.numpy())
+      # [[0 0 0 0]
+      #  [1 1 1 1]
+      #  [0 0 0 1]]
+      print(out3.numpy())
+      # [[2 2 0 1]
+      #  [0 1 1 1]]
+      print(out4.numpy())
+      # [[2 3 1]
+      #  [0 3 1]]
--- a/doc/paddle/api/paddle/argmin_cn.rst
+++ b/doc/paddle/api/paddle/argmin_cn.rst
--- a/doc/paddle/api/paddle/fluid/layers/argsort_cn.rst
+++ b/doc/paddle/api/paddle/fluid/layers/argsort_cn.rst
+.. _cn_api_fluid_layers_argsort:
+argsort
+-------------------------------
+.. py:function:: paddle.fluid.layers.argsort(input,axis=-1,descending=False,name=None)
+:alias_main: paddle.argsort
+:alias: paddle.argsort,paddle.tensor.argsort,paddle.tensor.search.argsort
+:old_api: paddle.fluid.layers.argsort
+对输入变量沿给定轴进行排序，输出排序好的数据和相应的索引，其维度和输入相同。**默认升序排列，如果需要降序排列设置** ``descending=True`` 。
+参数：
+    - **input** (Variable) - 输入的多维 ``Tensor`` ，支持的数据类型：float32、float64、int16、int32、int64、uint8。
+    - **axis** (int，可选) - 指定对输入Tensor进行运算的轴， ``axis`` 的有效范围是[-R, R)，R是输入 ``x`` 的Rank， ``axis`` 为负时与 ``axis`` +R 等价。默认值为0。
+    - **descending** (bool，可选) - 指定算法排序的方向。如果设置为True，算法按照降序排序。如果设置为False或者不设置，按照升序排序。默认值为False。
+    - **name** (str，可选) – 具体用法请参见 :ref:`api_guide_Name` ，一般无需设置，默认值为None。
+返回：一组已排序的输出（与 ``input`` 维度相同、数据类型相同）和索引（数据类型为int64）。
+返回类型：tuple[Variable]
+**代码示例**：
+.. code-block:: python
+  import paddle.fluid as fluid
+  import numpy as np
+  in1 = np.array([[[5,8,9,5],
+                   [0,0,1,7],
+                   [6,9,2,4]],
+                  [[5,2,4,2],
+                   [4,7,7,9],
+                   [1,7,0,6]]]).astype(np.float32)
+  with fluid.dygraph.guard():
+      x = fluid.dygraph.to_variable(in1)
+      out1 = fluid.layers.argsort(input=x, axis=-1) # same as axis==2
+      out2 = fluid.layers.argsort(input=x, axis=0)
+      out3 = fluid.layers.argsort(input=x, axis=1)
+      print(out1[0].numpy())
+      # [[[5. 5. 8. 9.]
+      #   [0. 0. 1. 7.]
+      #   [2. 4. 6. 9.]]
+      #  [[2. 2. 4. 5.]
+      #   [4. 7. 7. 9.]
+      #   [0. 1. 6. 7.]]]
+      print(out1[1].numpy())
+      # [[[0 3 1 2]
+      #   [0 1 2 3]
+      #   [2 3 0 1]]
+      #  [[1 3 2 0]
+      #   [0 1 2 3]
+      #   [2 0 3 1]]]
+      print(out2[0].numpy())
+      # [[[5. 2. 4. 2.]
+      #   [0. 0. 1. 7.]
+      #   [1. 7. 0. 4.]]
+      #  [[5. 8. 9. 5.]
+      #   [4. 7. 7. 9.]
+      #   [6. 9. 2. 6.]]]
+      print(out3[0].numpy())
+      # [[[0. 0. 1. 4.]
+      #   [5. 8. 2. 5.]
+      #   [6. 9. 9. 7.]]
+      #  [[1. 2. 0. 2.]
+      #   [4. 7. 4. 6.]
+      #   [5. 7. 7. 9.]]]
--- a/doc/paddle/api/paddle/fluid/layers/array_length_cn.rst
+++ b/doc/paddle/api/paddle/fluid/layers/array_length_cn.rst
+.. _cn_api_fluid_layers_array_length:
+array_length
+-------------------------------
+.. py:function:: paddle.fluid.layers.array_length(array)
+该OP用于获取输入数组 :ref:`cn_api_fluid_LoDTensorArray` 的长度。可以与 :ref:`cn_api_fluid_layers_array_read` 、 :ref:`cn_api_fluid_layers_array_write` 、 :ref:`cn_api_fluid_layers_While` OP结合使用，实现LoDTensorArray的遍历与读写。
+参数：
+    - **array** (LoDTensorArray) - 输入的数组LoDTensorArray
+返回：shape为[1]的1-D Tensor, 表示数组LoDTensorArray的长度，数据类型为int64
+返回类型：Variable
+**代码示例**:
+.. code-block:: python
+    import paddle.fluid as fluid
+    tmp = fluid.layers.zeros(shape=[10], dtype='int32')
+    i = fluid.layers.fill_constant(shape=[1], dtype='int64', value=10)
+    #tmp是shape为[10]的Tensor，将tmp写入到数组arr下标为10的位置，arr的长度因此为11
+    arr = fluid.layers.array_write(tmp, i=i)
+    #查看arr的长度
+    arr_len = fluid.layers.array_length(arr)
+    #可以通过executor打印出LoDTensorArray的长度
+    input = fluid.layers.Print(arr_len, message="The length of LoDTensorArray:")
+    main_program = fluid.default_main_program()
+    exe = fluid.Executor(fluid.CPUPlace())
+    exe.run(main_program)
+**运行输出**
+.. code-block:: python
+    1569576542	The length of LoDTensorArray:	The place is:CPUPlace
+    Tensor[array_length_0.tmp_0]
+	shape: [1,]
+	dtype: l
+	data: 11,
+    #输出shape为[1]的Tensor，值为11，表示LoDTensorArray长度为11
+    #dtype为对应C++数据类型，在不同环境下可能显示值不同，但本质一致
+    #例如：如果Tensor中数据类型是int64，则对应的C++数据类型为int64_t，所以dtype值为typeid(int64_t).name()，
+    #      其在MacOS下为'x'，linux下为'l'，Windows下为'__int64'，都表示64位整型变量
--- a/doc/paddle/api/paddle/fluid/layers/array_read_cn.rst
+++ b/doc/paddle/api/paddle/fluid/layers/array_read_cn.rst
+.. _cn_api_fluid_layers_array_read:
+array_read
+-------------------------------
+.. py:function:: paddle.fluid.layers.array_read(array,i)
+该OP用于读取输入数组 :ref:`cn_api_fluid_LoDTensorArray` 中指定位置的数据, ``array`` 为输入的数组， ``i`` 为指定的读取位置。常与 :ref:`cn_api_fluid_layers_array_write` OP配合使用进行LoDTensorArray的读写。
+例1:
+::
+    输入：
+        包含4个Tensor的LoDTensorArray，前3个shape为[1]，最后一个shape为[1,2]:
+            input = ([0.6], [0.1], [0.3], [0.4, 0.2])
+        并且:
+            i = [3]
+    输出:
+        output = [0.4, 0.2]
+参数：
+    - **array** (Variable) - 输入的数组LoDTensorArray
+    - **i** (Variable) - shape为[1]的1-D Tensor，表示从 ``array`` 中读取数据的位置，数据类型为int64
+返回：从 ``array`` 中指定位置读取的LoDTensor或Tensor
+返回类型：Variable
+**代码示例**
+.. code-block:: python
+    #先创建一个LoDTensorArray，再在指定位置写入Tensor，然后从该位置读取Tensor
+    import paddle.fluid as fluid
+    arr = fluid.layers.create_array(dtype='float32')
+    tmp = fluid.layers.fill_constant(shape=[3, 2], dtype='int64', value=5)
+    i = fluid.layers.fill_constant(shape=[1], dtype='int64', value=10)
+    #tmp是shape为[3,2]的Tensor，将其写入空数组arr的下标10的位置，则arr的长度变为11
+    arr = fluid.layers.array_write(tmp, i, array=arr)
+    #读取arr的下标10的位置的数据
+    item = fluid.layers.array_read(arr, i)
+    #可以通过executor打印出该数据
+    input = fluid.layers.Print(item, message="The LoDTensor of the i-th position:")
+    main_program = fluid.default_main_program()
+    exe = fluid.Executor(fluid.CPUPlace())
+    exe.run(main_program)
+**输出结果**
+.. code-block:: python
+    1569588169	The LoDTensor of the i-th position:	The place is:CPUPlace
+    Tensor[array_read_0.tmp_0]
+	    shape: [3,2,]
+	    dtype: l
+	    data: 5,5,5,5,5,5,
+    #输出了shape为[3,2]的Tensor
+    #dtype为对应C++数据类型，在不同环境下可能显示值不同，但本质一致
+    #例如：如果Tensor中数据类型是int64，则对应的C++数据类型为int64_t，所以dtype值为typeid(int64_t).name()，
+    #      其在MacOS下为'x'，linux下为'l'，Windows下为'__int64'，都表示64位整型变量
--- a/doc/paddle/api/paddle/fluid/layers/array_write_cn.rst
+++ b/doc/paddle/api/paddle/fluid/layers/array_write_cn.rst
+.. _cn_api_fluid_layers_array_write:
+array_write
+-------------------------------
+.. py:function:: paddle.fluid.layers.array_write(x, i, array=None)
+该OP将输入的变量 ``x`` 写入到数组 :ref:`cn_api_fluid_LoDTensorArray` ``array`` 的第i个位置，并返回修改后的LoDTensorArray，如果 ``array`` 为None，则创建一个新的LoDTensorArray。常与 :ref:`cn_api_fluid_layers_array_read` OP联合使用对LoDTensorArray进行读写。
+参数:
+    - **x** (Variable) – 待写入的数据，多维Tensor或LoDTensor，数据类型支持float32，float64，int32，int64
+    - **i** (Variable) – shape为[1]的1-D Tensor，表示写入到输出数组LoDTensorArray的位置，数据类型为int64
+    - **array** (Variable，可选) – 指定写入 ``x`` 的数组LoDTensorArray。默认值为None, 此时将创建新的LoDTensorArray并作为结果返回
+返回: 写入输入 ``x`` 之后的LoDTensorArray
+返回类型: Variable
+**代码示例**
+.. code-block:: python
+  import paddle.fluid as fluid
+  tmp = fluid.layers.fill_constant(shape=[3, 2], dtype='int64', value=5)
+  i = fluid.layers.fill_constant(shape=[1], dtype='int64', value=10)
+  #将tmp写入数组arr下标为10的位置，并返回arr
+  arr = fluid.layers.array_write(tmp, i=i)
+  #此时arr是长度为11的LoDTensorArray，可以通过array_read来查看下标为10的LoDTensor，并将之打印出来
+  item = fluid.layers.array_read(arr, i=i)
+  input = fluid.layers.Print(item, message="The content of i-th LoDTensor:")
+  main_program = fluid.default_main_program()
+  exe = fluid.Executor(fluid.CPUPlace())
+  exe.run(main_program)
+**输出结果**
+.. code-block:: python
+  1570533133	The content of i-th LoDTensor:	The place is:CPUPlace
+  Tensor[array_read_0.tmp_0]
+	  shape: [3,2,]
+	  dtype: l
+	  data: 5,5,5,5,5,5,
+  #返回了shape为[3,2]的Tensor，即为上面写入的tmp
+  #dtype为对应C++数据类型，在不同环境下可能显示值不同，但本质一致
+  #例如：如果Tensor中数据类型是int64，则对应的C++数据类型为int64_t，所以dtype值为typeid(int64_t).name()，
+  #          其在MacOS下为'x'，linux下为'l'，Windows下为'__int64'，都表示64位整型变量
--- a/doc/paddle/api/paddle/asin_cn.rst
+++ b/doc/paddle/api/paddle/asin_cn.rst
--- a/doc/paddle/api/paddle/nn/functional/assign_cn.rst
+++ b/doc/paddle/api/paddle/nn/functional/assign_cn.rst
--- a/doc/paddle/api/paddle/atan_cn.rst
+++ b/doc/paddle/api/paddle/atan_cn.rst
--- a/doc/paddle/api/paddle/fluid/layers/autoincreased_step_counter_cn.rst
+++ b/doc/paddle/api/paddle/fluid/layers/autoincreased_step_counter_cn.rst
+.. _cn_api_fluid_layers_autoincreased_step_counter:
+autoincreased_step_counter
+-------------------------------
+.. py:function:: paddle.fluid.layers.autoincreased_step_counter(counter_name=None, begin=1, step=1)
+:api_attr: 声明式编程模式（静态图)
+创建一个自增变量，每个迭代累加一次，默认首次返回值为 1，默认累加步长为 1。
+参数：
+    - **counter_name** (str, 可选) - 该计数器的名称，默认为 ``@STEP_COUNTER@`` 。
+    - **begin** (int) - 该计数器返回的第一个值。
+    - **step** (int) - 累加步长。
+返回：累加结果，数据类型为 int64
+返回类型：Variable
+**代码示例**：
+.. code-block:: python
+    import paddle.fluid as fluid
+    global_step = fluid.layers.autoincreased_step_counter(
+        counter_name='@LR_DECAY_COUNTER@', begin=0, step=1)
--- a/doc/paddle/api/paddle/static/nn/batch_norm_cn.rst
+++ b/doc/paddle/api/paddle/static/nn/batch_norm_cn.rst
--- a/doc/paddle/api/paddle/nn/beam_search_cn.rst
+++ b/doc/paddle/api/paddle/nn/beam_search_cn.rst
--- a/doc/paddle/api/paddle/nn/beam_search_decode_cn.rst
+++ b/doc/paddle/api/paddle/nn/beam_search_decode_cn.rst
--- a/doc/paddle/api/paddle/static/nn/bilinear_tensor_product_cn.rst
+++ b/doc/paddle/api/paddle/static/nn/bilinear_tensor_product_cn.rst
--- a/doc/paddle/api/paddle/nn/functional/bipartite_match_cn.rst
+++ b/doc/paddle/api/paddle/nn/functional/bipartite_match_cn.rst
--- a/doc/paddle/api/paddle/nn/functional/box_clip_cn.rst
+++ b/doc/paddle/api/paddle/nn/functional/box_clip_cn.rst
--- a/doc/paddle/api/paddle/nn/functional/box_coder_cn.rst
+++ b/doc/paddle/api/paddle/nn/functional/box_coder_cn.rst
--- a/doc/paddle/api/paddle/nn/functional/box_decoder_and_assign_cn.rst
+++ b/doc/paddle/api/paddle/nn/functional/box_decoder_and_assign_cn.rst
--- a/doc/paddle/api/paddle/nn/functional/bpr_loss_cn.rst
+++ b/doc/paddle/api/paddle/nn/functional/bpr_loss_cn.rst
--- a/doc/paddle/api/paddle/nn/functional/brelu_cn.rst
+++ b/doc/paddle/api/paddle/nn/functional/brelu_cn.rst
--- a/doc/paddle/api/paddle/nn/case_cn.rst
+++ b/doc/paddle/api/paddle/nn/case_cn.rst
--- a/doc/paddle/api/paddle/cast_cn.rst
+++ b/doc/paddle/api/paddle/cast_cn.rst
--- a/doc/paddle/api/paddle/ceil_cn.rst
+++ b/doc/paddle/api/paddle/ceil_cn.rst
--- a/doc/paddle/api/paddle/nn/functional/center_loss_cn.rst
+++ b/doc/paddle/api/paddle/nn/functional/center_loss_cn.rst
--- a/doc/paddle/api/paddle/nn/clip_by_norm_cn.rst
+++ b/doc/paddle/api/paddle/nn/clip_by_norm_cn.rst
--- a/doc/paddle/api/paddle/fluid/layers/clip_cn.rst
+++ b/doc/paddle/api/paddle/fluid/layers/clip_cn.rst
+.. _cn_api_fluid_layers_clip:
+clip
+-------------------------------
+.. py:function:: paddle.fluid.layers.clip(x, min, max, name=None)
+:alias_main: paddle.nn.clip
+:alias: paddle.nn.clip,paddle.nn.clip.clip
+:old_api: paddle.fluid.layers.clip
+该OP对输入Tensor每个元素的数值进行裁剪，使得输出Tensor元素的数值被限制在区间[min, max]内。具体的计算公式为如下。
+.. math::
+  Out = MIN(MAX(x,min),max)
+参数：
+        - **x** (Variable)- 多维Tensor，数据类型为float32
+        - **min** (float)- 最小值，输入Tensor中小于该值的元素由min代替。
+        - **max** (float)- 最大值，输入Tensor中大于该值的元素由max替换。
+        - **name** (None|str) – 该参数供开发人员打印调试信息时使用，具体用法请参见 :ref:`api_guide_Name` ，默认值为None。
+返回：  对元素的数值进行裁剪之后的Tesnor，与输入x具有相同的shape和数据类型
+返回类型：Variable
+**代码示例：**
+.. code-block:: python
+    import paddle.fluid as fluid
+    input = fluid.layers.data(
+        name='data', shape=[1], dtype='float32')
+    reward = fluid.layers.clip(x=input, min=-1.0, max=1.0)
--- a/doc/paddle/api/paddle/nn/functional/collect_fpn_proposals_cn.rst
+++ b/doc/paddle/api/paddle/nn/functional/collect_fpn_proposals_cn.rst
--- a/doc/paddle/api/paddle/fluid/layers/concat_cn.rst
+++ b/doc/paddle/api/paddle/fluid/layers/concat_cn.rst
+.. _cn_api_fluid_layers_concat:
+concat
+-------------------------------
+.. py:function:: paddle.fluid.layers.concat(input, axis=0, name=None)
+该OP对输入沿 ``axis`` 轴进行联结，返回一个新的Tensor。
+参数：
+    - **input** (list|tuple|Tensor) - 待联结的Tensor list，Tensor tuple或者Tensor，支持的数据类型为：bool、float16、 float32、float64、int32、int64。 ``input`` 中所有Tensor的数据类型必须一致。
+    - **axis** (int|Tensor，可选) - 指定对输入Tensor进行运算的轴，可以是整数或者形状为[1]的Tensor，数据类型为int32或者int64。 ``axis`` 的有效范围是[-R, R)，R是输入 ``input`` 中Tensor 的维度， ``axis`` 为负值时与 :math:`axis + R` 等价。默认值为0。
+    - **name** (str，可选) – 具体用法请参见 :ref:`api_guide_Name` ，一般无需设置，默认值为None。
+返回：联结后的 ``Tensor`` ，数据类型和 ``input`` 中的Tensor相同。
+抛出异常：
+    - ``TypeError``: - 当输入 ``input`` 的类型不是list、tuple或者Tensor的时候。
+    - ``TypeError``: - 当输入 ``input`` 的数据类型不是 bool，float16， float32， float64， int32， int64时。
+    - ``TypeError``: - 当 ``axis`` 的类型不是int或者Tensor时。当 ``axis`` 是Tensor的时候其数据类型不是int32或者int64时。
+    - ``TypeError``: - 当输入 ``input`` 中的Tensor存在数据类型不一致时。
+**代码示例**：
+.. code-block:: python
+  import paddle.fluid as fluid
+  import numpy as np
+  in1 = np.array([[1, 2, 3],
+                  [4, 5, 6]])
+  in2 = np.array([[11, 12, 13],
+                  [14, 15, 16]])
+  in3 = np.array([[21, 22],
+                  [23, 24]])
+  with fluid.dygraph.guard():
+      x1 = fluid.dygraph.to_variable(in1)
+      x2 = fluid.dygraph.to_variable(in2)
+      x3 = fluid.dygraph.to_variable(in3)
+      out1 = fluid.layers.concat(input=[x1, x2, x3], axis=-1)
+      out2 = fluid.layers.concat(input=[x1, x2], axis=0)
+      print(out1.numpy())
+      # [[ 1  2  3 11 12 13 21 22]
+      #  [ 4  5  6 14 15 16 23 24]]
+      print(out2.numpy())
+      # [[ 1  2  3]
+      #  [ 4  5  6]
+      #  [11 12 13]
+      #  [14 15 16]]
--- a/doc/paddle/api/paddle/nn/cond_cn.rst
+++ b/doc/paddle/api/paddle/nn/cond_cn.rst
--- a/doc/paddle/api/paddle/nn/functional/continuous_value_model_cn.rst
+++ b/doc/paddle/api/paddle/nn/functional/continuous_value_model_cn.rst
--- a/doc/paddle/api/paddle/static/Print_cn.rst
+++ b/doc/paddle/api/paddle/static/Print_cn.rst
--- a/doc/paddle/api/paddle/static/nn/conv2d_cn.rst
+++ b/doc/paddle/api/paddle/static/nn/conv2d_cn.rst
--- a/doc/paddle/api/paddle/nn/functional/conv/conv2d_transpose_cn.rst
+++ b/doc/paddle/api/paddle/nn/functional/conv/conv2d_transpose_cn.rst
--- a/doc/paddle/api/paddle/static/nn/conv3d_cn.rst
+++ b/doc/paddle/api/paddle/static/nn/conv3d_cn.rst
--- a/doc/paddle/api/paddle/nn/functional/conv/conv3d_transpose_cn.rst
+++ b/doc/paddle/api/paddle/nn/functional/conv/conv3d_transpose_cn.rst
--- a/doc/paddle/api/paddle/cos_cn.rst
+++ b/doc/paddle/api/paddle/cos_cn.rst
--- a/doc/paddle/api/paddle/nn/functional/cosine_decay_cn.rst
+++ b/doc/paddle/api/paddle/nn/functional/cosine_decay_cn.rst
--- a/doc/paddle/api/paddle/fluid/layers/create_array_cn.rst
+++ b/doc/paddle/api/paddle/fluid/layers/create_array_cn.rst
+.. _cn_api_fluid_layers_create_array:
+create_array
+-------------------------------
+.. py:function:: paddle.fluid.layers.create_array(dtype)
+此OP创建一个LoDTensorArray，它可以用作 :ref:`cn_api_fluid_layers_array\_write` , :ref:`cn_api_fluid_layers_array\_read` OP的输入，以及和 :ref:`cn_api_fluid_layers_While` OP
+一起创建RNN网络。
+参数:
+    - **dtype** (str) — 指定Tensor中元素的数据类型，支持的数据类型值：float32，float64，int32，int64。
+返回: 返回创建的空LoDTensorArray，Tensor中的元素数据类型为指定的dtype。
+返回类型: Variable。
+**代码示例**
+..  code-block:: python
+  import paddle.fluid as fluid
+  data = fluid.layers.create_array(dtype='float32') # 创建一个数据类型为float32的LoDTensorArray。
--- a/doc/paddle/api/paddle/create_parameter_cn.rst
+++ b/doc/paddle/api/paddle/create_parameter_cn.rst
--- a/doc/paddle/api/paddle/fluid/layers/create_py_reader_by_data_cn.rst
+++ b/doc/paddle/api/paddle/fluid/layers/create_py_reader_by_data_cn.rst
+.. _cn_api_fluid_layers_create_py_reader_by_data:
+create_py_reader_by_data
+-------------------------------
+.. py:function:: paddle.fluid.layers.create_py_reader_by_data(capacity,feed_list,name=None,use_double_buffer=True)
+:api_attr: 声明式编程模式（静态图)
+创建一个Python端提供数据的reader。该OP与 :ref:`cn_api_fluid_layers_py_reader` 类似，不同点在于它能够从feed变量列表读取数据。
+参数：
+  - **capacity** (int) - ``py_reader`` 维护的队列缓冲区的容量大小。单位是batch数量。若reader读取速度较快，建议设置较大的 ``capacity`` 值。
+  - **feed_list** (list(Variable)) - feed变量列表，这些变量一般由 :code:`fluid.data()` 创建。
+  - **name** (str，可选) – 具体用法请参见 :ref:`api_guide_Name` ，一般无需设置，默认值为None。
+  - **use_double_buffer** (bool，可选) - 是否使用双缓冲区，双缓冲区是为了预读下一个batch的数据、异步CPU -> GPU拷贝。默认值为True。
+返回：能够从feed变量列表读取数据的reader，数据类型和feed变量列表中变量的数据类型相同。
+返回类型：reader
+**代码示例：**
+.. code-block:: python
+    import paddle
+    import paddle.fluid as fluid
+    import paddle.dataset.mnist as mnist
+    def network(img, label):
+        # 用户构建自定义网络，此处以一个简单的线性回归为例。
+        predict = fluid.layers.fc(input=img, size=10, act='softmax')
+        loss = fluid.layers.cross_entropy(input=predict, label=label)
+        return fluid.layers.mean(loss)
+    MEMORY_OPT = False
+    USE_CUDA = False
+    image = fluid.data(name='image', shape=[None, 1, 28, 28], dtype='float32')
+    label = fluid.data(name='label', shape=[None, 1], dtype='int64')
+    reader = fluid.layers.create_py_reader_by_data(capacity=64,
+                                                   feed_list=[image, label])
+    reader.decorate_paddle_reader(
+        paddle.reader.shuffle(paddle.batch(mnist.train(), batch_size=5), buf_size=500))
+    img, label = fluid.layers.read_file(reader)
+    loss = network(img, label) # 用户构建自定义网络并返回损失函数
+    place = fluid.CUDAPlace(0) if USE_CUDA else fluid.CPUPlace()
+    exe = fluid.Executor(place)
+    exe.run(fluid.default_startup_program())
+    build_strategy = fluid.BuildStrategy()
+    build_strategy.memory_optimize = True if MEMORY_OPT else False
+    exec_strategy = fluid.ExecutionStrategy()
+    compiled_prog = fluid.compiler.CompiledProgram(
+        fluid.default_main_program()).with_data_parallel(
+            loss_name=loss.name,
+            build_strategy=build_strategy,
+            exec_strategy=exec_strategy)
+    for epoch_id in range(2):
+        reader.start()
+        try:
+            while True:
+                exe.run(compiled_prog, fetch_list=[loss.name])
+        except fluid.core.EOFException:
+            reader.reset()
--- a/doc/paddle/api/paddle/create_tensor_cn.rst
+++ b/doc/paddle/api/paddle/create_tensor_cn.rst
--- a/doc/paddle/api/paddle/static/nn/crf_decoding_cn.rst
+++ b/doc/paddle/api/paddle/static/nn/crf_decoding_cn.rst
--- a/doc/paddle/api/paddle/fluid/layers/crop_cn.rst
+++ b/doc/paddle/api/paddle/fluid/layers/crop_cn.rst
+.. _cn_api_fluid_layers_crop:
+crop
+-------------------------------
+.. py:function:: paddle.fluid.layers.crop(x, shape=None, offsets=None, name=None)
+该OP根据偏移量(offsets)和形状(shape)，裁剪输入张量。
+**注意:** 此OP已被弃用，它将在以后的版本中被删除，请使用 :ref:`cn_api_fluid_layers_crop_tensor` 替代
+**样例**：
+::
+    * Case 1:
+        Given
+            X = [[0, 1, 2, 0, 0]
+                 [0, 3, 4, 0, 0]
+                 [0, 0, 0, 0, 0]],
+        and
+            shape = [2, 2],
+            offsets = [0, 1],
+        output is:
+            Out = [[1, 2],
+                   [3, 4]].
+    * Case 2:
+        Given
+            X = [[0, 1, 2, 5, 0]
+                 [0, 3, 4, 6, 0]
+                 [0, 0, 0, 0, 0]],
+        and shape is tensor
+            shape = [[0, 0, 0]
+                     [0, 0, 0]]
+        and
+            offsets = [0, 1],
+        output is:
+            Out = [[1, 2, 5],
+                   [3, 4, 6]].
+参数:
+  - **x** (Variable): 多维Tensor，数据类型为float32
+  - **shape** (Variable|list/tuple of integers) - 指定输出Tensor的形状，它可以是一个Tensor/整数列表/整数元组。如果是Tensor，它的秩必须与x相同，它的形状指定了输出Tensor的形状，它的元素的数值在这里不起作用，该方式适用于每次迭代时候需要改变输出形状的情况。如果是整数列表/元组，则其长度必须与x的秩相同
+  - **offsets** (Variable|list/tuple of integers|None，可选) - 指定每个维度上的裁剪的偏移量，它可以是一个Tensor，或者一个整数列表/整数元组。如果是一个Tensor，它的秩必须与x相同，这种方法适用于每次迭代的偏移量（offset）都可能改变的情况。如果是一个整数列表/元组，则长度必须与x的秩相同，如果offsets=None，则每个维度的偏移量为0。默认值为None
+  - **name** (str|None，可选) - 该参数供开发人员打印调试信息时使用，具体用法请参见 :ref:`api_guide_Name` ，默认值为None。
+返回: 经过形状裁剪之后的Tensor，与输入x具有相同的数据类型
+返回类型: Variable
+抛出异常: 如果形状不是列表、元组或Variable，抛出ValueError
+**代码示例**:
+..  code-block:: python
+    import paddle.fluid as fluid
+    # case 1
+    # 输入x的形状为[-1, 3, 5]，
+    # 参数shape = y是个Variable，形状是[-1, 2, 2]，输出Tensor将具有和y一样的形状
+    # y的具体数值不起作用，起作用的只有它的形状
+    # 经过下面的crop操作之后输出张量的形状是: [-1, 2, 2]
+    x = fluid.layers.data(name="x", shape=[3, 5], dtype="float32")
+    y = fluid.layers.data(name="y", shape=[2, 2], dtype="float32")
+    crop = fluid.layers.crop(x, shape=y)
+    ## 或者 case 2
+    # 输入z的形状为: [-1, 3, 5], shape为整数列表[-1, 2, 3]
+    # 则经过下面的crop操作之后输出张量的形状为：[-1, 2, 3]
+    z = fluid.layers.data(name="z", shape=[3, 5], dtype="float32")
+    crop = fluid.layers.crop(z, shape=[-1, 2, 3])
--- a/doc/paddle/api/paddle/crop_tensor_cn.rst
+++ b/doc/paddle/api/paddle/crop_tensor_cn.rst
--- a/doc/paddle/api/paddle/fluid/layers/cross_entropy_cn.rst
+++ b/doc/paddle/api/paddle/fluid/layers/cross_entropy_cn.rst
+.. _cn_api_fluid_layers_cross_entropy:
+cross_entropy
+-------------------------------
+.. py:function:: paddle.fluid.layers.cross_entropy(input, label, soft_label=False, ignore_index=-100)
+:alias_main: paddle.nn.functional.cross_entropy
+:alias: paddle.nn.functional.cross_entropy,paddle.nn.functional.loss.cross_entropy
+:old_api: paddle.fluid.layers.cross_entropy
+该OP计算输入input和标签label间的交叉熵，可用于计算硬标签或软标签的交叉熵。
+1. 硬标签交叉熵算法：若soft_label = False, :math:`label[i_1, i_2, ..., i_k]` 表示每个样本的硬标签值:
+     .. math::
+        \\output[i_1, i_2, ..., i_k]=-log(input[i_1, i_2, ..., i_k, j]), label[i_1, i_2, ..., i_k] = j, j != ignore\_index\\
+2. 软标签交叉熵算法：若soft_label = True, :math:`label[i_1, i_2, ..., i_k, j]` 表明每个样本对应类别j的软标签值:
+     .. math::
+        \\output[i_1, i_2, ..., i_k]= -\sum_{j}label[i_1,i_2,...,i_k,j]*log(input[i_1, i_2, ..., i_k,j])\\
+参数：
+    - **input** (Variable) – 维度为 :math:`[N_1, N_2, ..., N_k, D]` 的多维Tensor，其中最后一维D是类别数目。数据类型为float32或float64。
+    - **label** (Variable) – 输入input对应的标签值。若soft_label=False，要求label维度为 :math:`[N_1, N_2, ..., N_k]` 或 :math:`[N_1, N_2, ..., N_k, 1]` ，数据类型为int64，且值必须大于等于0且小于D；若soft_label=True，要求label的维度、数据类型与input相同，且每个样本各软标签的总和为1。
+    - **soft_label** (bool) – 指明label是否为软标签。默认为False，表示label为硬标签；若soft_label=True则表示软标签。
+    - **ignore_index** (int) – 指定一个忽略的标签值，此标签值不参与计算，负值表示无需忽略任何标签值。仅在soft_label=False时有效。 默认值为-100。
+返回： 表示交叉熵结果的Tensor，数据类型与input相同。若soft_label=False，则返回值维度与label维度相同；若soft_label=True，则返回值维度为 :math:`[N_1, N_2, ..., N_k, 1]` 。
+返回类型：Variable
+**代码示例**
+..  code-block:: python
+        import paddle.fluid as fluid
+        class_num = 7
+        x = fluid.layers.data(name='x', shape=[3, 10], dtype='float32')
+        label = fluid.layers.data(name='label', shape=[1], dtype='int64')
+        predict = fluid.layers.fc(input=x, size=class_num, act='softmax')
+        cost = fluid.layers.cross_entropy(input=predict, label=label)
--- a/doc/paddle/api/paddle/fluid/layers/ctc_greedy_decoder_cn.rst
+++ b/doc/paddle/api/paddle/fluid/layers/ctc_greedy_decoder_cn.rst
+.. _cn_api_fluid_layers_ctc_greedy_decoder:
+ctc_greedy_decoder
+-------------------------------
+.. py:function:: paddle.fluid.layers.ctc_greedy_decoder(input, blank, name=None)
+该OP用于贪婪策略解码序列，步骤如下:
+    1. 获取输入中的每一行的最大值索引，也就是numpy.argmax(input, axis=0)。
+    2. 对于step1结果中的每个序列，合并两个空格之间的重复部分并删除所有空格。
+该API支持两种输入，LoDTensor和Tensor输入，不同输入的代码样例如下：
+**样例**：
+::
+        # for lod tensor input 
+        已知：
+        input.data = [[0.6, 0.1, 0.3, 0.1],
+                      [0.3, 0.2, 0.4, 0.1],
+                      [0.1, 0.5, 0.1, 0.3],
+                      [0.5, 0.1, 0.3, 0.1],
+                      [0.5, 0.1, 0.3, 0.1],
+                      [0.2, 0.2, 0.2, 0.4],
+                      [0.2, 0.2, 0.1, 0.5],
+                      [0.5, 0.1, 0.3, 0.1]]
+        input.lod = [[4, 4]]
+        计算过程：
+        1. 将argmax的运算结果应用于输入的第一个序列，即 input.data[0:4] 。
+           则得出的结果为[[0], [2], [1], [0]]
+        2. 合并重复的索引值部分，删除空格，即为0的值。
+           则第一个输入序列对应的输出为：[[2], [1]]
+        最后
+        output.data = [[2],
+                       [1],
+                       [3]]
+        output.lod = [[2, 1]]
+        # for tensor input
+        input.data = [[[0.6, 0.1, 0.3, 0.1],
+                [0.3, 0.2, 0.4, 0.1],
+                [0.1, 0.5, 0.1, 0.3],
+                [0.5, 0.1, 0.3, 0.1]],
+               [[0.5, 0.1, 0.3, 0.1],
+                [0.2, 0.2, 0.2, 0.4],
+                [0.2, 0.2, 0.1, 0.5],
+                [0.5, 0.1, 0.3, 0.1]]]
+        input_length.data = [[4], [4]]
+        input.shape = [2, 4, 4]
+        step1: Apply argmax to first input sequence which is input.data[0:4]. Then we get:
+            [[0], [2], [1], [0]], for input.data[4:8] is [[0], [3], [3], [0]], shape is [2,4,1]
+        step2: Change the argmax result to use padding mode, then argmax result is
+                [[0, 2, 1, 0], [0, 3, 3, 0]], shape is [2, 4], lod is [], input_length is [[4], [4]]
+        step3: Apply ctc_align to padding argmax result, padding_value is 0
+        Finally:
+        output.data = [[2, 1, 0, 0],
+                       [3, 0, 0, 0]]
+        output_length.data = [[2], [1]]
+参数:
+        - **input** (Variable) — 变长序列的概率， 在输入为LoDTensor情况下，它是具有LoD信息的二维LoDTensor。 形状为[Lp，num_classes +1]，其中Lp是所有输入序列的长度之和，num_classes是真实的类数。 在输入为Tensor情况下，它是带有填充的3-D张量，其形状为[batch_size，N，num_classes +1]。 （不包括空白标签）。 数据类型可以是float32或float64。
+        - **blank** (int) — Connectionist Temporal Classification (CTC) loss空白标签索引,  其数值属于半开区间[0,num_classes + 1）
+        - **name** (str) — (str|None，可选) – 该参数供开发人员打印调试信息时使用，具体用法请参见 :ref:`api_guide_Name` ，默认值为None
+返回：对于输入为LoDTensor的情况，返回CTC贪婪解码器的结果，即2-D LoDTensor，形状为[Lp，1]，数据类型为int64。 “ Lp”是所有输出序列长度的总和。 如果结果中的所有序列均为空，则结果LoDTensor将为[-1]，其中LoD为[[]]。对于输入为Tensor的情况，返回一个元组，(output, output_length), 其中，output是一个形状为 [batch_size, N]，类型为int64的Tensor。output_length是一个形状为[batch_size, 1]，类型为int64的Tensor，表示Tensor输入下，每个输出序列的长度。
+返回类型： Variable
+**代码示例**
+..  code-block:: python
+    # for lod mode
+    import paddle.fluid as fluid
+    x = fluid.data(name='x', shape=[None, 8], dtype='float32', lod_level=1)
+    cost = fluid.layers.ctc_greedy_decoder(input=x, blank=0)
+    # for padding mode
+    x_pad = fluid.data(name='x_pad', shape=[10, 4, 8], dtype='float32')
+    x_pad_len = fluid.data(name='x_pad_len', shape=[10, 1], dtype='int64')
+    out, out_len = fluid.layers.ctc_greedy_decoder(input=x_pad, blank=0,
+                input_length=x_pad_len)
--- a/doc/paddle/api/paddle/fluid/layers/cumsum_cn.rst
+++ b/doc/paddle/api/paddle/fluid/layers/cumsum_cn.rst
+.. _cn_api_fluid_layers_cumsum:
+cumsum
+-------------------------------
+.. py:function:: paddle.fluid.layers.cumsum(x,axis=None,exclusive=None,reverse=None)
+:alias_main: paddle.cumsum
+:alias: paddle.cumsum,paddle.tensor.cumsum,paddle.tensor.math.cumsum
+:old_api: paddle.fluid.layers.cumsum
+沿给定轴(axis)的元素的累加和。默认结果的第一个元素和输入的第一个元素一致。如果exlusive为True，结果的第一个元素则为0。
+参数：
+    - **x** (Variable) - 累加的输入，需要进行累加操作的变量Tensor/LoDTensor。
+    - **axis** (int，可选) - 指明需要累加的维。-1代表最后一维。默认为：-1。
+    - **exclusive** (bool，可选) - 是否执行exclusive累加。默认为：False。
+    - **reverse** (bool，可选) - 若为True，则以相反顺序执行累加。默认为：False。
+返回：Variable(Tensor)。是累加的结果，即累加器的输出。
+返回类型：变量(Variable)。
+**代码示例**：
+.. code-block:: python
+    import paddle.fluid as fluid
+    data = fluid.layers.data(name="input", shape=[32, 784])
+    result = fluid.layers.cumsum(data, axis=0)
--- a/doc/paddle/api/paddle/fluid/layers/data_cn.rst
+++ b/doc/paddle/api/paddle/fluid/layers/data_cn.rst
+.. _cn_api_fluid_data:
+data
+-------------------------------
+.. py:function:: paddle.fluid.data(name, shape, dtype='float32', lod_level=0)
+:api_attr: 声明式编程模式（静态图)
+:alias_main: paddle.nn.data
+:alias: paddle.nn.data,paddle.nn.input.data
+:old_api: paddle.fluid.data
+该OP会在全局block中创建变量（Variable），该全局变量可被计算图中的算子（operator）访问。该变量可作为占位符用于数据输入。例如用执行器（Executor）feed数据进该变量
+注意：
+  不推荐使用 ``paddle.fluid.layers.data`` ，其在之后的版本中会被删除。请使用这个 ``paddle.fluid.data`` 。 
+  ``paddle.fluid.layers.data`` 在组网期间会设置创建的变量维度（shape）和数据类型（dtype），但不会检查输入数据的维度和数据类型是否符合要求。 ``paddle.fluid.data`` 会在运行过程中由Executor/ParallelExecutor检查输入数据的维度和数据类型。
+  如果想输入变长输入，可以使用 ``paddle.fluid.data`` 时将变长维度设为-1，或者直接输入 ``paddle.fluid.layers.data`` 且PaddlePaddle会按具体输入的形状运行。
+  本API创建的变量默认 ``stop_gradient`` 属性为true，这意味这反向梯度不会被传递过这个数据变量。如果用户想传递反向梯度，可以设置 ``var.stop_gradient = False`` 。
+参数：
+    - **name** (str)- 被创建的变量的名字，具体用法请参见 :ref:`api_guide_Name` 。
+    - **shape** (list|tuple)- 声明维度信息的list或tuple。
+    - **dtype** (np.dtype|VarType|str，可选)- 数据类型，支持bool，float16，float32，float64，int8，int16，int32，int64，uint8。默认值为float32。
+    - **lod_level** (int，可选)- LoDTensor变量的LoD level数，LoD level是PaddlePaddle的高级特性，一般任务中不会需要更改此默认值，关于LoD level的详细适用场景和用法请见 :ref:`cn_user_guide_lod_tensor` 。默认值为0。
+返回：全局变量，可进行数据访问
+返回类型：Variable
+**代码示例**：
+.. code-block:: python
+    import paddle.fluid as fluid
+    import numpy as np
+    # Creates a variable with fixed size [3, 2, 1]
+    # User can only feed data of the same shape to x
+    x = fluid.data(name='x', shape=[3, 2, 1], dtype='float32')
+    # Creates a variable with changable batch size -1.
+    # Users can feed data of any batch size into y,
+    # but size of each data sample has to be [2, 1]
+    y = fluid.data(name='y', shape=[-1, 2, 1], dtype='float32')
+    z = x + y
+    # In this example, we will feed x and y with np-ndarry "1"
+    # and fetch z, like implementing "1 + 1 = 2" in PaddlePaddle
+    feed_data = np.ones(shape=[3, 2, 1], dtype=np.float32)
+    exe = fluid.Executor(fluid.CPUPlace())
+    out = exe.run(fluid.default_main_program(),
+                  feed={
+                      'x': feed_data,
+                      'y': feed_data
+                  },
+                  fetch_list=[z.name])
+    # np-ndarray of shape=[3, 2, 1], dtype=float32, whose elements are 2
+    print(out)
--- a/doc/paddle/api/paddle/static/nn/data_norm_cn.rst
+++ b/doc/paddle/api/paddle/static/nn/data_norm_cn.rst
--- a/doc/paddle/api/paddle/static/nn/deformable_conv_cn.rst
+++ b/doc/paddle/api/paddle/static/nn/deformable_conv_cn.rst
--- a/doc/paddle/api/paddle/nn/functional/deformable_roi_pooling_cn.rst
+++ b/doc/paddle/api/paddle/nn/functional/deformable_roi_pooling_cn.rst
--- a/doc/paddle/api/paddle/nn/functional/density_prior_box_cn.rst
+++ b/doc/paddle/api/paddle/nn/functional/density_prior_box_cn.rst
--- a/doc/paddle/api/paddle/nn/functional/detection_output_cn.rst
+++ b/doc/paddle/api/paddle/nn/functional/detection_output_cn.rst
--- a/doc/paddle/api/paddle/diag_cn.rst
+++ b/doc/paddle/api/paddle/diag_cn.rst
--- a/doc/paddle/api/paddle/nn/functional/dice_loss_cn.rst
+++ b/doc/paddle/api/paddle/nn/functional/dice_loss_cn.rst
--- a/doc/paddle/api/paddle/nn/functional/distribute_fpn_proposals_cn.rst
+++ b/doc/paddle/api/paddle/nn/functional/distribute_fpn_proposals_cn.rst
--- a/doc/paddle/api/paddle/fluid/layers/double_buffer_cn.rst
+++ b/doc/paddle/api/paddle/fluid/layers/double_buffer_cn.rst
+.. _cn_api_fluid_layers_double_buffer:
+double_buffer
+-------------------------------
+.. py:function:: paddle.fluid.layers.double_buffer(reader, place=None, name=None)
+生成一个双缓冲队列Reader。Reader类有DecoratedReader和FileReader，其中DecoratedReader又可以细分成CustomReader和BufferedReader。这里是基于BufferedReader，数据将复制到具有双缓冲队列的位置（由place指定），如果 ``place=None`` 则将使用executor执行的位置。
+参数:
+    - **reader** (Variable) – 需要wrap的reader变量Reader。
+    - **place** (Place，可选) – 目标数据的位置，比如CPU，GPU，GPU需要指明是哪张卡。默认是executor执行样本的位置。
+    - **name** (str，可选) – 变量的名字。该参数供开发人员打印调试信息时使用，具体用法参见 :ref:`api_guide_Name`，默认值为None。
+返回：Variable(Reader)。双缓冲队列的reader。
+返回类型：变量(Variable)。
+**代码示例**
+..  code-block:: python
+    import paddle.fluid as fluid
+    reader = fluid.layers.py_reader(capacity=64,
+                                    shapes=[(-1, 1, 28, 28), (-1, 1)],
+                                    dtypes=['float32', 'int64'],
+                                    use_double_buffer=False)
+    reader = fluid.layers.double_buffer(reader)
+    image, label = fluid.layers.read_file(reader)
--- a/doc/paddle/api/paddle/fluid/layers/dropout_cn.rst
+++ b/doc/paddle/api/paddle/fluid/layers/dropout_cn.rst
+.. _cn_api_fluid_layers_dropout:
+dropout
+-------------------------------
+.. py:function:: paddle.fluid.layers.dropout(x,dropout_prob,is_test=False,seed=None,name=None,dropout_implementation='downgrade_in_infer')
+:alias_main: paddle.nn.functional.dropout
+:alias: paddle.nn.functional.dropout,paddle.nn.functional.common.dropout
+:old_api: paddle.fluid.layers.dropout
+dropout操作
+丢弃或者保持x的每个元素独立。Dropout是一种正则化手段，通过在训练过程中阻止神经元节点间的相关性来减少过拟合。根据给定的丢弃概率，dropout操作符按丢弃概率随机将一些神经元输出设置为0，其他的仍保持不变。
+dropout op可以从Program中删除，提高执行效率。
+参数：
+    - **x** (Variable) - 输入，多维Tensor。数据类型：float32和float64。
+    - **dropout_prob** (float32) - 输入单元的丢弃概率，即输入单元设置为0的概率。
+    - **is_test** (bool) - 标记是否是测试阶段。默认：False。
+    - **seed** (int) - 整型数据，用于创建随机种子。如果该参数设为None，则使用随机种子。注：如果给定一个整型种子，始终丢弃相同的输出单元。训练过程中勿用固定不变的种子。
+    - **name** (str|None) – 具体用法请参见 :ref:`cn_api_guide_Name` ，一般无需设置，默认值为None。
+    - **dropout_implementation** (str) - 丢弃单元的方式，有两种'downgrade_in_infer'和'upscale_in_train'两种选择，默认：'downgrade_in_infer'。具体作用可以参考一下描述。
+      1. downgrade_in_infer(default), 在预测时减小输出结果
+         - train: out = input * mask
+         - inference: out = input * (1.0 - dropout_prob)
+         (mask是一个张量，维度和输入维度相同，值为0或1，值为0的比例即为 ``dropout_prob`` )
+      2. upscale_in_train, 增加训练时的结果
+         - train: out = input * mask / ( 1.0 - dropout_prob )
+         - inference: out = input
+         (mask是一个张量，维度和输入维度相同，值为0或1，值为0的比例即为 ``dropout_prob`` ）
+dropout操作符可以从程序中移除，使程序变得高效。
+返回：Tensor。经过丢弃部分数据之后的结果，与输入X形状相同的张量。
+返回类型：Variable
+**代码示例**：
+.. code-block:: python
+    import paddle.fluid as fluid
+    import numpy as np
+    x = fluid.layers.data(name="x", shape=[32, 32], dtype="float32")
+    droped = fluid.layers.dropout(x, dropout_prob=0.5)
+    place = fluid.CPUPlace()
+    exe = fluid.Executor(place)
+    exe.run(fluid.default_startup_program())
+    np_x = np.random.random(size=(32, 32)).astype('float32')
+    output = exe.run(feed={"x": np_x}, fetch_list = [droped])
+    print(output)
--- a/doc/paddle/api/paddle/fluid/layers/dynamic_decode_cn.rst
+++ b/doc/paddle/api/paddle/fluid/layers/dynamic_decode_cn.rst
+.. _cn_api_fluid_layers_dynamic_decode:
+dynamic_decode
+-------------------------------
+.. py:method:: dynamic_decode(decoder, inits=None, max_step_num=None, output_time_major=False, **kwargs):
+:api_attr: 声明式编程模式（静态图)
+该接口重复执行 :code:`decoder.step()` 直到 其返回的表示完成状态的Tensor中的值全部为True或解码步骤达到 :code:`max_step_num`。
+:code:`decode.initialize()` 会在解码循环之前被调用一次。如果 :code:`decoder` 实现了 :code:`finalize` 方法，则 :code:`decoder.finalize()` 在解码循环后将被调用一次。
+参数:
+  - **decoder** (Decoder) - 解码器的实例。
+  - **inits** (object，可选) - 传递给 :code:`decoder.initialize` 的参数。默认为None。
+  - **max_step_num** (int，可选) - 最大步数。如果未提供，解码直到解码过程完成（ :code:`decode.step()` 返回的表示完成状态的Tensor中的值全部为True）。默认为None。
+  - **output_time_major** (bool，可选) - 指明最终输出(此方法的第一个返回值)中包含的Tensor的数据布局。如果为False，其将使用batch优先的数据布局, 此时的形状为 :math:`[batch\_size，seq\_len，...]`。如果为True，其将使用time优先的数据布局，此时的形状为 :math:`[seq\_len，batch\_size，...]`。默认值为False。
+  - **kwargs** - 其他命名关键字参数。这些参数将传递给 :code:`decoder.step`。
+返回:一个二元组 :code:`(final_outputs，final_states)`, 其包含了最终的输出和状态，这两者都是Tensor或Tensor的嵌套结构。:code:`final_outputs` 具有与 :code:`decoder.output_dtype` 相同的结构和数据类型， 其中的每个tensor都是对所有解码时间步对应输出的堆叠。 这些tensor也可能会通过 :code:`decoder.finalize` 进行修改。:code:`final_states` 是最后时间步的状态，和 :code:`decoder.initialize` 返回的初始状态具有相同的结构，其中的tensor也具有相同的形状 和数据类型。
+返回类型：tuple
+**示例代码**
+.. code-block:: python
+    import paddle.fluid as fluid
+    import paddle.fluid.layers as layers
+    from paddle.fluid.layers import GRUCell, BeamSearchDecoder, dynamic_decode
+    encoder_output = fluid.data(name="encoder_output",
+                            shape=[-1, 32, 128],
+                            dtype="float32")
+    trg_embeder = lambda x: fluid.embedding(
+        x, size=[10000, 128], param_attr=fluid.ParamAttr(name="trg_embedding"))
+    output_layer = lambda x: layers.fc(x,
+                                    size=10000,
+                                    num_flatten_dims=len(x.shape) - 1,
+                                    param_attr=fluid.ParamAttr(name=
+                                                                "output_w"),
+                                    bias_attr=False)
+    decoder_cell = GRUCell(hidden_size=128)
+    decoder = BeamSearchDecoder(decoder_cell,
+                                start_token=0,
+                                end_token=1,
+                                beam_size=4,
+                                embedding_fn=trg_embeder,
+                                output_fn=output_layer)
+    outputs = dynamic_decode(	
+        decoder=decoder, inits=decoder_cell.get_initial_states(encoder_output))
--- a/doc/paddle/api/paddle/fluid/layers/dynamic_gru_cn.rst
+++ b/doc/paddle/api/paddle/fluid/layers/dynamic_gru_cn.rst
+.. _cn_api_fluid_layers_dynamic_gru:
+dynamic_gru
+-------------------------------
+.. py:function::  paddle.fluid.layers.dynamic_gru(input, size, param_attr=None, bias_attr=None, is_reverse=False, gate_activation='sigmoid', candidate_activation='tanh', h_0=None, origin_mode=False)
+:api_attr: 声明式编程模式（静态图)
+**注意：该OP的输入只能是LoDTensor，如果您需要处理的输入是Tensor类型，请使用StaticRNN（fluid.layers.** :ref:`cn_api_fluid_layers_StaticRNN` **）。**
+该OP用于在完整序列上逐个时间步的进行单层Gated Recurrent Unit（GRU）的计算，单个时间步内GRU的计算支持以下两种计算方式：
+如果origin_mode为True，则使用的运算公式来自论文
+`Learning Phrase Representations using RNN Encoder Decoder for Statistical Machine Translation <https://arxiv.org/pdf/1406.1078.pdf>`_ 。
+.. math::
+    u_t & = act_g(W_{ux}x_{t} + W_{uh}h_{t-1} + b_u)\\
+    r_t & = act_g(W_{rx}x_{t} + W_{rh}h_{t-1} + b_r)\\
+    \tilde{h_t} & = act_c(W_{cx}x_{t} + W_{ch}(r_t \odot h_{t-1}) + b_c)\\
+    h_t & = u_t \odot h_{t-1} + (1-u_t) \odot \tilde{h_t}
+如果origin_mode为False，则使用的运算公式来自论文
+`Empirical Evaluation of Gated Recurrent Neural Networks on Sequence Modeling  <https://arxiv.org/pdf/1412.3555.pdf>`_ 。
+公式如下:
+.. math::
+    u_t & = act_g(W_{ux}x_{t} + W_{uh}h_{t-1} + b_u)\\
+    r_t & = act_g(W_{rx}x_{t} + W_{rh}h_{t-1} + b_r)\\
+    \tilde{h_t} & = act_c(W_{cx}x_{t} + W_{ch}(r_t \odot h_{t-1}) + b_c)\\
+    h_t & = (1-u_t) \odot h_{t-1} + u_t \odot \tilde{h_t}
+其中， :math:`x_t` 为当前时间步的输入，这个输入并非 ``input``，该OP不包含 :math:`W_{ux}x_{t}, W_{rx}x_{t}, W_{cx}x_{t}` 的计算， **注意** 要在该OP前使用大小为 ``size`` 的3倍的全连接层并将其输出作为 ``input``；
+:math:`h_{t-1}` 为前一时间步的隐状态 ``hidden``； :math:`u_t` 、 :math:`r_t` 、 :math:`\tilde{h_t}` 和 :math:`h_t` 分别代表了GRU单元中update gate（更新门）、reset gate（重置门）、candidate hidden（候选隐状态）和隐状态输出; :math:`\odot` 为逐个元素相乘；
+:math:`W_{uh}, b_u` 、 :math:`W_{rh}, b_r` 和 :math:`W_{ch}, b_c` 分别代表更新门、重置门和候选隐状态在计算时使用的权重矩阵和偏置。在实现上，三个权重矩阵合并为一个 :math:`[D, D \times 3]` 形状的Tensor存放，三个偏置拼接为一个 :math:`[1, D \times 3]` 形状的Tensor存放，其中 :math:`D` 为隐单元的数目；权重Tensor存放布局为： :math:`W_{uh}` 和 :math:`W_{rh}` 拼接为 :math:`[D, D  \times 2]` 形状位于前半部分，:math:`W_{ch}` 以 :math:`[D, D]` 形状位于后半部分。
+参数:
+    - **input** (Variable) – LoD level为1的LoDTensor，表示经线性变换后的序列输入，形状为 :math:`[T, D \times 3]` ，其中 :math:`T` 表示mini-batch中所有序列长度之和， :math:`D` 为隐状态特征维度的大小。数据类型为float32或float64。
+    - **size** (int) – 隐状态特征维度的大小
+    - **param_attr** (ParamAttr，可选) – 指定权重参数属性的对象。默认值为None，表示使用默认的权重参数属性。具体用法请参见 :ref:`cn_api_fluid_ParamAttr` 。
+    - **bias_attr** (ParamAttr，可选) - 指定偏置参数属性的对象。默认值为None，表示使用默认的偏置参数属性。具体用法请参见 :ref:`cn_api_fluid_ParamAttr` 。
+    - **is_reverse** (bool，可选) – 指明是否按照和输入相反的序列顺序计算，默认为False。
+    - **gate_activation** (str，可选) – 公式中 :math:`act_g` 激活函数的类型。支持identity、sigmoid、tanh、relu四种激活函数类型，默认为sigmoid。
+    - **candidate_activation** (str，可选) – 公式中 :math:`act_c` 激活函数的类型。支持identity、sigmoid、tanh、relu四种激活函数类型，默认为tanh。
+    - **h_0** (Variable，可选) – 表示初始隐状态的Tensor，若未提供，则默认为0。其形状为 :math:`[N, D]` , 其中 :math:`N` 为输入mini-batch中序列的数目， :math:`D` 为隐状态特征维度的大小。数据类型与 ``input`` 相同。默认值为None。
+    - **origin_mode** (bool，可选) – 指明要使用的GRU计算方式，两种计算方式具体差异见公式描述，默认值为False。
+返回： 形状为 :math:`[T, D]` 、LoD level为1的LoDTensor，其中 :math:`T` 表示mini-batch中所有序列长度之和， :math:`D` 为隐状态特征维度的大小。表示经过GRU变换的输出特征序列，和 ``input`` 具有相同的LoD（序列长度）和数据类型。
+返回类型: Variable
+**代码示例**
+..  code-block:: python
+    import paddle.fluid as fluid
+    dict_dim, emb_dim = 128, 64
+    data = fluid.data(name='sequence',
+                shape=[None],
+                dtype='int64',
+                lod_level=1)
+    emb = fluid.embedding(input=data, size=[dict_dim, emb_dim])
+    hidden_dim = 512
+    x = fluid.layers.fc(input=emb, size=hidden_dim * 3)
+    hidden = fluid.layers.dynamic_gru(input=x, size=hidden_dim)
--- a/doc/paddle/api/paddle/fluid/layers/dynamic_lstm_cn.rst
+++ b/doc/paddle/api/paddle/fluid/layers/dynamic_lstm_cn.rst
+.. _cn_api_fluid_layers_dynamic_lstm:
+dynamic_lstm
+-------------------------------
+.. py:function::  paddle.fluid.layers.dynamic_lstm(input, size, h_0=None, c_0=None, param_attr=None, bias_attr=None, use_peepholes=True, is_reverse=False, gate_activation='sigmoid', cell_activation='tanh', candidate_activation='tanh', dtype='float32', name=None)
+:api_attr: 声明式编程模式（静态图)
+该OP实现了 LSTM，即 Long-Short Term Memory（长短期记忆）运算 - `Hochreiter, S., & Schmidhuber, J. (1997) <http://deeplearning.cs.cmu.edu/pdfs/Hochreiter97_lstm.pdf>`_。
+.. note::
+    - 该OP仅支持 LoDTensor 作为输入，如果您需要处理的是Tensor，请使用 :ref:`cn_api_fluid_layers_lstm` 。
+    - 在实现的时候为了提升效率，用户必须将LSTM的输入先进行线性映射，将维度为 [T, hidden_size] 的输入映射为 [T, 4 × hidden_size] 输入，然后再传给该OP。
+该OP的默认实现方式为 diagonal/peephole 连接，参见 `Gers, F. A., & Schmidhuber, J. (2000) <ftp://ftp.idsia.ch/pub/juergen/TimeCount-IJCNN2000.pdf>`_。
+如果需要禁用 peephole 连接方法，将 use_peepholes 设为 False 即可。 
+该OP对于序列中每一个时间步的计算公式如下：
+.. math::
+      i_t=\sigma (W_{ix}x_{t}+W_{ih}h_{t-1}+W_{ic}c_{t-1}+b_i)
+.. math::
+      f_t=\sigma (W_{fx}x_{t}+W_{fh}h_{t-1}+W_{fc}c_{t-1}+b_f)
+.. math::
+      o_t=\sigma (W_{ox}x_{t}+W_{oh}h_{t-1}+W_{oc}c_{t-1}+b_o)
+.. math::
+      \widetilde{c_t}=act_g(W_{ct}x_{t}+W_{ch}h_{t-1}+b_{c})
+.. math::
+      c_t=f_t\odot c_{t-1}+i_t\odot \widetilde{c_t}
+.. math::
+      h_t=o_t\odot act_h(c_t)
+公式中的概念信息如下：
+      - :math:`x_{t}` 表示时间步 :math:`t` 的输入
+      - :math:`h_{t}` 表示时间步 :math:`t` 的 hidden 状态
+      - :math:`h_{t-1}, c_{t-1}` 分别表示前一个时间步的 hidden 和 cell 状态
+      - :math:`\widetilde{c_t}` 表示候选的 cell 状态
+      - :math:`i_t` ，:math:`f_t` 和 :math:`o_t` 分别为 input gate，forget gate，output gate
+      - :math:`W` 表示 weight （例如， :math:`W_{ix}` 是在计算 input gate :math:`i_t` 时，对输入 :math:`x_{t}` 做线性变换的 weight）
+      - :math:`b` 表示 bias （例如， :math:`b_{i}` 是 input gate 的 bias）
+      - :math:`\sigma` 表示 gate 的非线性激活函数，默认为 sigmoid
+      - :math:`act_g， act_h` 分别表示 cell 输入和 cell 输出的非线性激活函数，默认为 tanh
+      - :math:`\odot` 表示矩阵的 Hadamard product，即对两个维度相同的矩阵，将相同位置的元素相乘，得到另一个维度相同的矩阵
+参数:
+  - **input** ( :ref:`api_guide_Variable` ) 维度为 :math:`[T, 4*hidden\_size]` 的多维 LoDTensor（必须在传入该OP前对维度为 :math:`[T, hidden\_size]` 的输入经过线性变换得到），其中 T 为 batch 中所有样本的长度之和，hidden_size 为隐层大小，数据类型为 float32 或者 float64。
+  - **size** (int) – 必须为 4*hidden_size。
+  - **h_0** ( :ref:`api_guide_Variable` ，可选) 维度为 :math:`[batch\_size, hidden\_size]` 的多维 Tensor，其中 hidden_size 为隐层大小，数据类型为 float32 或者 float64。如果为 None，该OP会自动设置为全0的向量。默认值为None。
+  - **c_0** ( :ref:`api_guide_Variable` ，可选) 维度为 :math:`[batch\_size, hidden\_size]` 的多维 Tensor，其中 hidden_size 为隐层大小，数据类型为 float32 或者 float64。如果为 None，该OP会自动设置为全0的向量；:math:`h_0, c_0` 如果要设置为None，必须同时为None。默认值为None。
+  - **param_attr** (ParamAttr，可选) – 指定权重参数属性的对象。如果为None，表示使用默认的权重参数属性。具体用法请参见 :ref:`cn_api_fluid_ParamAttr` 。如果用户需要设置此属性，维度必须等于 :math:`[hidden\_size, 4*hidden\_size]`。默认值为None。
+  - **bias_attr** (ParamAttr，可选) – 指定偏置参数属性的对象。如果为None，表示使用默认的偏置参数属性。具体用法请参见 :ref:`cn_api_fluid_ParamAttr` 。如果用户需要设置此属性，如果 use_peepholes=true，维度需为 :math:`[1, 4*hidden\_size]`, use_peepholes=true，维度需为 :math:`[1, 7*hidden\_size]`。默认值为None。   
+  - **use_peepholes** (bool，可选) – 是否使用 peephole 连接。默认值为True。
+  - **is_reverse** (bool，可选) – 是否将输入的数据根据根据样本长度进行逆序，同时会将输出进行逆序，用户拿到结果之后，不需要再逆序。默认值为False。
+  - **gate_activation** (str，可选) – 应用于input gate，forget gate， output gate 的激活函数。默认值为sigmoid。
+  - **cell_activation** (str，可选) – 用于cell输入的激活函数。默认值为tanh。
+  - **candidate_activation** (str，可选) – 用于cell输出的激活函数。默认值为tanh。
+  - **dtype** (str，可选) – 数据类型为 float32 或者 float64。默认值为 float32。
+  - **name** (str，可选) – 具体用法请参见 :ref:`api_guide_Name` ，默认值为None。
+返回：经过lstm运算输出的 hidden 和 cell 的状态的tuple，包括
+- hidden：LSTM hidden的输出结果，维度为 :math:`[T, hidden\_size]` 的LoDTensor，且LoD保持与输入一致，数据类型与input一致。
+- cell：LSTM cell的输出结果，维度为 :math:`[T, hidden\_size]` 的LoDTensor，且LoD保持与输入一致，数据类型与input一致。
+返回类型: tuple（ :ref:`api_guide_Variable` , :ref:`api_guide_Variable` ）
+**代码示例**
+..  code-block:: python
+      import paddle.fluid as fluid
+      emb_dim = 256
+      vocab_size = 10000
+      hidden_dim = 512
+      data = fluid.layers.data(name='x', shape=[1], dtype='int32', lod_level=1)
+      emb = fluid.layers.embedding(input=data, size=[vocab_size, emb_dim], is_sparse=True)
+      forward_proj = fluid.layers.fc(input=emb, size=hidden_dim * 4, bias_attr=False)
+      forward, cell = fluid.layers.dynamic_lstm(input=forward_proj, size=hidden_dim * 4, use_peepholes=False)
+      forward.shape  # (-1, 512)
+      cell.shape  # (-1, 512)
--- a/doc/paddle/api/paddle/fluid/layers/dynamic_lstmp_cn.rst
+++ b/doc/paddle/api/paddle/fluid/layers/dynamic_lstmp_cn.rst
+.. _cn_api_fluid_layers_dynamic_lstmp:
+dynamic_lstmp
+-------------------------------
+.. py:function:: paddle.fluid.layers.dynamic_lstmp(input, size, proj_size, param_attr=None, bias_attr=None, use_peepholes=True, is_reverse=False, gate_activation='sigmoid', cell_activation='tanh', candidate_activation='tanh', proj_activation='tanh', dtype='float32', name=None, h_0=None, c_0=None, cell_clip=None, proj_clip=None)
+:api_attr: 声明式编程模式（静态图)
+.. note::
+    在实现的时候为了提升效率，用户必须将输入先进行线性映射，将维度为 [T, hidden_size] 的输入映射为 [T, 4×hidden_size] 输入，然后再传给该OP。
+该OP实现了LSTMP（LSTM Projected）层。LSTMP层在LSTM层之后有一个单独的的线性映射层。 -- `Sak, H., Senior, A., & Beaufays, F. (2014) <https://ai.google/research/pubs/pub43905.pdf>`_。
+与标准的LSTM层相比，LSTMP多出来的线性映射层，用于从原始隐藏状态 :math:`h_t` 映射到较低维的状态 :math:`r_t`，
+从而减少参数总数和计算复杂度，特别是输出单元相对较大的情况下。
+该OP的默认实现方式为 diagonal/peephole 连接，参见 `Gers, F. A., & Schmidhuber, J. (2000) <ftp://ftp.idsia.ch/pub/juergen/TimeCount-IJCNN2000.pdf>`_。
+如果需要禁用 peephole 连接方法，将 use_peepholes 设为 False 即可。 
+该OP对于序列中每一个时间步的计算公式如下：
+.. math::
+      i_t = \sigma(W_{ix}x_{t} + W_{ir}r_{t-1} + W_{ic}c_{t-1} + b_i)
+.. math::
+      f_t = \sigma(W_{fx}x_{t} + W_{fr}r_{t-1} + W_{fc}c_{t-1} + b_f)
+.. math::
+      o_t = \sigma(W_{ox}x_{t} + W_{or}r_{t-1} + W_{oc}c_{t-1} + b_o)
+.. math::
+      \widetilde{c_t} = act_g(W_{cx}x_t + W_{cr}r_{t-1} + b_c)
+.. math::
+      c_t = f_t \odot c_{t-1} + i_t \odot \widetilde{c_t}
+.. math::
+      h_t = o_t \odot act_h(c_t)
+.. math::
+      r_t = \overline{act_h}(W_{rh}h_t)
+公式中的概念信息如下：
+      - :math:`x_{t}` 表示时间步 :math:`t` 的输入
+      - :math:`h_{t}` 表示时间步 :math:`t` 的 hidden 状态
+      - :math:`r_{t}` : 隐藏状态循环的映射输出的状态
+      - :math:`h_{t-1}, c_{t-1}, r_{t-1}` 分别表示前一个时间步的 hidden 状态，cell 状态和循环映射输出状态
+      - :math:`\widetilde{c_t}` 表示候选的 cell 状态
+      - :math:`i_t` ，:math:`f_t` 和 :math:`o_t` 分别为 input gate，forget gate，output gate
+      - :math:`W` 表示 weight （例如， :math:`W_{ix}` 是在计算 input gate :math:`i_t` 时，对输入 :math:`x_{t}` 做线性变换的 weight）
+      - :math:`b` 表示 bias （例如， :math:`b_{i}` 是 input gate 的 bias）
+      - :math:`\sigma` 表示 gate 的非线性激活函数，默认为 sigmoid
+      - :math:`act_g, act_h, \overline{act_h}` 分别表示 cell 输入 cell 输出和映射输出的非线性激活函数，默认为 tanh
+      - :math:`\odot` 表示矩阵的 Hadamard product，即对两个维度相同的矩阵，将相同位置的元素相乘，得到另一个维度相同的矩阵
+参数：
+  - **input** ( :ref:`api_guide_Variable` ) 维度为 :math:`[T, 4*hidden\_size]` 的多维 LoDTensor（必须在传入该OP前对维度为 :math:`[T, hidden\_size]` 的输入经过线性变换得到），其中 T 为 batch 中所有样本的长度之和，hidden_size 为隐层大小，数据类型为 float32 或者 float64。
+  - **size** (int) – 必须为 4 * hidden_size。
+  - **proj_size** (int) - 投影映射输出的大小。
+  - **param_attr** (ParamAttr，可选) - 指定权重参数属性的对象。默认值为None，表示使用默认的权重参数属性。具体用法请参见 :ref:`cn_api_fluid_ParamAttr` 。
+    说明:
+      1. 隐藏状态到隐藏状态（Hidden-hidden）权重 = :math:`\{ W_{cr},W_{ir},W_{fr},W_{or} \}`，维度为 [P, 4*hidden_size] ，P是投影大小
+      2. 投影（Projection）权重 = :math:`\{ W_{rh} \}`，维度为 [D, P]
+  - **bias_attr** (ParamAttr，可选) - 指定偏置参数属性的对象。默认值为None，表示使用默认的偏置参数属性。具体用法请参见 :ref:`cn_api_fluid_ParamAttr` 。
+    说明:
+      1. use_peepholes = False
+          - Biases = { :math:`b_{c},b_{i},b_{f},b_{o}`}
+          - 维度为 [1, 4*hidden_size]
+      2. use_peepholes = True
+          - Biases = { :math:`b_{c},b_{i},b_{f},b_{o},W_{ic},W_{fc},W_{oc}`}
+          - 维度为 [1, 7*hidden_size]
+  - **use_peepholes** (bool，可选) - 是否使用 peephole 连接。默认值为True。
+  - **is_reverse** (bool，可选) - 是否计算反向LSTM，默认值为False。
+  - **gate_activation** (str，可选) - 应用于input gate，forget gate， output gate 的激活函数。可选值包括 sigmoid，tanh，relu，identity。默认值为 sigmoid。
+  - **cell_activation** (str，可选) - cell输出的激活函数。可选值包括 sigmoid，tanh，relu，identity。默认值为 tanh。
+  - **candidate_activation** (str，可选) - 候选隐藏状态（candidate hidden state）的激活状态。可选值包括 sigmoid，tanh，relu，identity。默认值为 tanh。
+  - **proj_activation** (str，可选) - 投影输出的激活函数。可选值包括 sigmoid，tanh，relu，identity。默认值为 tanh。
+  - **dtype** (str，可选) - 数据类型。可选值包括 float32，float64。默认值为 float32。
+  - **name** (str，可选) - 具体用法请参见 :ref:`api_guide_Name` ，一般无需设置，默认值为None。
+  - **h_0** ( :ref:`api_guide_Variable` ，可选) 维度为 :math:`[batch\_size, hidden\_size]` 的多维 Tensor。如果为 None，该OP会自动设置为全0的向量。默认值为None。
+  - **c_0** ( :ref:`api_guide_Variable` ，可选) 维度为 :math:`[batch\_size, hidden\_size]` 的多维 Tensor。如果为 None，该OP会自动设置为全0的向量；:math:`h_0, c_0` 如果要设置为None，必须同时为None。默认值为None。
+  - **cell_clip** (float，可选) - 如果该参数不为None，则在单元输出激活之前，单元状态将被此值剪裁。默认值为None。
+  - **proj_clip** (float，可选) - 如果 num_proj > 0 并且 proj_clip 不为None，那么将投影值沿元素方向剪切到[-proj_clip，proj_clip]内。默认值为None。
+返回：经过lstmp运算输出的 hidden 的映射和 cell 状态的tuple，包括
+- hidden：LSTM hidden的输出结果，维度为 :math:`[T, P]` 的LoDTensor，且LoD保持与输入一致，数据类型与input一致。
+- cell：LSTM cell的输出结果，维度为 :math:`[T, hidden\_size]` 的LoDTensor，且LoD保持与输入一致，数据类型与input一致。
+返回类型: tuple（ :ref:`api_guide_Variable` , :ref:`api_guide_Variable` ）
+**代码示例**：
+..  code-block:: python
+    import paddle.fluid as fluid
+    dict_dim, emb_dim = 128, 64
+    data = fluid.layers.data(name='sequence', shape=[1],
+                        dtype='int32', lod_level=1)
+    emb = fluid.layers.embedding(input=data, size=[dict_dim, emb_dim])
+    hidden_dim, proj_dim = 512, 256
+    fc_out = fluid.layers.fc(input=emb, size=hidden_dim * 4,
+                        act=None, bias_attr=None)
+    proj_out, cell = fluid.layers.dynamic_lstmp(input=fc_out,
+                                        size=hidden_dim * 4,
+                                        proj_size=proj_dim,
+                                        use_peepholes=False,
+                                        is_reverse=True,
+                                        cell_activation="tanh",
+                                        proj_activation="tanh")
+    proj_out.shape  # (-1, 256)
+    cell.shape  # (-1, 512)
--- a/doc/paddle/api/paddle/nn/functional/edit_distance_cn.rst
+++ b/doc/paddle/api/paddle/nn/functional/edit_distance_cn.rst
--- a/doc/paddle/api/paddle/elementwise_add_cn.rst
+++ b/doc/paddle/api/paddle/elementwise_add_cn.rst
--- a/doc/paddle/api/paddle/elementwise_div_cn.rst
+++ b/doc/paddle/api/paddle/elementwise_div_cn.rst
--- a/doc/paddle/api/paddle/elementwise_floordiv_cn.rst
+++ b/doc/paddle/api/paddle/elementwise_floordiv_cn.rst
--- a/doc/paddle/api/paddle/fluid/layers/elementwise_max_cn.rst
+++ b/doc/paddle/api/paddle/fluid/layers/elementwise_max_cn.rst
+.. _cn_api_fluid_layers_elementwise_max:
+elementwise_max
+-------------------------------
+.. py:function:: paddle.fluid.layers.elementwise_max(x, y, axis=-1, act=None, name=None)
+:alias_main: paddle.elementwise_max
+:alias: paddle.elementwise_max,paddle.tensor.elementwise_max,paddle.tensor.math.elementwise_max
+:old_api: paddle.fluid.layers.elementwise_max
+该OP逐元素对比输入的两个多维Tensor，并且把各个位置更大的元素保存到返回结果中。
+等式是：
+.. math::
+        Out = max(X, Y)
+- :math:`X` ：多维Tensor。
+- :math:`Y` ：多维Tensor。
+此运算算子有两种情况：
+        1. :math:`Y` 的 ``shape`` 与 :math:`X` 相同。
+        2. :math:`Y` 的 ``shape`` 是 :math:`X` 的连续子序列。
+对于情况2：
+        1. 用 :math:`Y` 的 ``shape`` 匹配 :math:`X` 的 ``shape``，其中 ``axis`` 是 :math:`Y` 在 :math:`X` 上的起始维度的位置。
+        2. 如果 ``axis`` 为-1（默认值），则 :math:`axis = rank（X）-rank（Y）` 。
+        3. 考虑到子序列， :math:`Y` 的大小为1的尾部维度将被忽略，例如shape（Y）=（2,1）=>（2）。
+例如：
+..  code-block:: text
+        shape(X) = (2, 3, 4, 5), shape(Y) = (,)
+        shape(X) = (2, 3, 4, 5), shape(Y) = (5,)
+        shape(X) = (2, 3, 4, 5), shape(Y) = (4, 5), with axis=-1(default) or axis=2
+        shape(X) = (2, 3, 4, 5), shape(Y) = (3, 4), with axis=1
+        shape(X) = (2, 3, 4, 5), shape(Y) = (2), with axis=0
+        shape(X) = (2, 3, 4, 5), shape(Y) = (2, 1), with axis=0
+参数：
+        - **x** （Variable）- 多维Tensor。数据类型为 ``float32`` 、 ``float64`` 、 ``int32`` 或  ``int64`` 。
+        - **y** （Variable）- 多维Tensor。数据类型为 ``float32`` 、 ``float64`` 、 ``int32`` 或  ``int64`` 。
+        - **axis** （int32, 可选）- Y的维度对应到X维度上时的索引。默认值为 -1。
+        - **act** （string, 可选）- 激活函数名称，作用于输出上。默认值为None。详细请参考 :ref:`api_guide_activations` ， 常见的激活函数有: ``relu`` ``tanh`` ``sigmoid`` 等。
+        - **name** （string, 可选）- 输出的名字。默认值为None。该参数供开发人员打印调试信息时使用，具体用法请参见 :ref:`api_guide_Name` 。
+返回：    维度和数据类型与 ``x`` 相同的多维Tensor。
+返回类型： 多维Tensor。
+**代码示例 1**
+..  code-block:: python
+    import paddle.fluid as fluid
+    import numpy as np
+    def gen_data():
+        return {
+            "x": np.array([2, 3, 4]),
+            "y": np.array([1, 5, 2])
+        }
+    x = fluid.layers.data(name="x", shape=[3], dtype='float32')
+    y = fluid.layers.data(name="y", shape=[3], dtype='float32')
+    z = fluid.layers.elementwise_max(x, y)
+    place = fluid.CPUPlace()
+    exe = fluid.Executor(place)
+    z_value = exe.run(feed=gen_data(),
+                        fetch_list=[z.name])
+    print(z_value) #[2, 5, 4]
+**代码示例 2**
+..  code-block:: python
+    import paddle.fluid as fluid
+    import numpy as np
+    def gen_data():
+        return {
+            "x": np.ones((2, 3, 4, 5)).astype('float32'),
+            "y": np.zeros((3, 4)).astype('float32')
+        }
+    x = fluid.layers.data(name="x", shape=[2,3,4,5], dtype='float32')
+    y = fluid.layers.data(name="y", shape=[3,4], dtype='float32')
+    z = fluid.layers.elementwise_max(x, y, axis=1)
+    place = fluid.CPUPlace()
+    exe = fluid.Executor(place)
+    z_value = exe.run(feed=gen_data(),
+                        fetch_list=[z.name])
+    print(z_value)#[[[[1., 1., 1., 1., 1.] .... [1., 1., 1., 1., 1.]]]]
--- a/doc/paddle/api/paddle/fluid/layers/elementwise_min_cn.rst
+++ b/doc/paddle/api/paddle/fluid/layers/elementwise_min_cn.rst
+.. _cn_api_fluid_layers_elementwise_min:
+elementwise_min
+-------------------------------
+.. py:function:: paddle.fluid.layers.elementwise_min(x, y, axis=-1, act=None, name=None)
+:alias_main: paddle.elementwise_min
+:alias: paddle.elementwise_min,paddle.tensor.elementwise_min,paddle.tensor.math.elementwise_min
+:old_api: paddle.fluid.layers.elementwise_min
+该OP逐元素对比输入的两个多维Tensor，并且把各个位置更小的元素保存到返回结果中。
+等式是：
+.. math::
+        Out = min(X, Y)
+- :math:`X` ：多维Tensor。
+- :math:`Y` ：多维Tensor。
+此运算算子有两种情况：
+        1. :math:`Y` 的 ``shape`` 与 :math:`X` 相同。
+        2. :math:`Y` 的 ``shape`` 是 :math:`X` 的连续子序列。
+对于情况2：
+        1. 用 :math:`Y` 的 ``shape`` 匹配 :math:`X` 的 ``shape``，其中 ``axis`` 是 :math:`Y` 在 :math:`X` 上的起始维度的位置。
+        2. 如果 ``axis`` 为-1（默认值），则 :math:`axis = rank（X）-rank（Y）` 。
+        3. 考虑到子序列， :math:`Y` 的大小为1的尾部维度将被忽略，例如shape（Y）=（2,1）=>（2）。
+例如：
+..  code-block:: text
+        shape(X) = (2, 3, 4, 5), shape(Y) = (,)
+        shape(X) = (2, 3, 4, 5), shape(Y) = (5,)
+        shape(X) = (2, 3, 4, 5), shape(Y) = (4, 5), with axis=-1(default) or axis=2
+        shape(X) = (2, 3, 4, 5), shape(Y) = (3, 4), with axis=1
+        shape(X) = (2, 3, 4, 5), shape(Y) = (2), with axis=0
+        shape(X) = (2, 3, 4, 5), shape(Y) = (2, 1), with axis=0
+参数：
+        - **x** （Variable）- 多维Tensor。数据类型为 ``float32`` 、 ``float64`` 、 ``int32`` 或  ``int64`` 。
+        - **y** （Variable）- 多维Tensor。数据类型为 ``float32`` 、 ``float64`` 、 ``int32`` 或  ``int64`` 。
+        - **axis** （int32, 可选）- Y的维度对应到X维度上时的索引。默认值为 -1。
+        - **act** （string, 可选）- 激活函数名称，作用于输出上。默认值为None。详细请参考 :ref:`api_guide_activations` ， 常见的激活函数有: ``relu`` ``tanh`` ``sigmoid`` 等。
+        - **name** （string, 可选）- 输出的名字。默认值为None。该参数供开发人员打印调试信息时使用，具体用法请参见 :ref:`api_guide_Name` 。
+返回：    维度和数据类型与 ``x`` 相同的多维Tensor。
+返回类型： 多维Tensor。
+**代码示例 1**
+..  code-block:: python
+    import paddle.fluid as fluid
+    import numpy as np
+    def gen_data():
+        return {
+            "x": np.array([2, 3, 4]),
+            "y": np.array([1, 5, 2])
+        }
+    x = fluid.layers.data(name="x", shape=[3], dtype='float32')
+    y = fluid.layers.data(name="y", shape=[3], dtype='float32')
+    z = fluid.layers.elementwise_min(x, y)
+    place = fluid.CPUPlace()
+    exe = fluid.Executor(place)
+    z_value = exe.run(feed=gen_data(),
+                        fetch_list=[z.name])
+    print(z_value) #[1, 3, 2]
+**代码示例 2**
+..  code-block:: python
+    import paddle.fluid as fluid
+    import numpy as np
+    def gen_data():
+        return {
+            "x": np.ones((2, 3, 4, 5)).astype('float32'),
+            "y": np.zeros((3, 4)).astype('float32')
+        }
+    x = fluid.layers.data(name="x", shape=[2,3,4,5], dtype='float32')
+    y = fluid.layers.data(name="y", shape=[3,4], dtype='float32')
+    z = fluid.layers.elementwise_min(x, y, axis=1)
+    place = fluid.CPUPlace()
+    exe = fluid.Executor(place)
+    z_value = exe.run(feed=gen_data(),
+                        fetch_list=[z.name])
+    print(z_value)#[[[[0., 0., 0., 0., 0.] .... [0., 0., 0., 0., 0.]]]]
--- a/doc/paddle/api/paddle/elementwise_mod_cn.rst
+++ b/doc/paddle/api/paddle/elementwise_mod_cn.rst
--- a/doc/paddle/api/paddle/elementwise_pow_cn.rst
+++ b/doc/paddle/api/paddle/elementwise_pow_cn.rst
--- a/doc/paddle/api/paddle/elementwise_sub_cn.rst
+++ b/doc/paddle/api/paddle/elementwise_sub_cn.rst
--- a/doc/paddle/api/paddle/fluid/layers/elu_cn.rst
+++ b/doc/paddle/api/paddle/fluid/layers/elu_cn.rst
+.. _cn_api_fluid_layers_elu:
+elu
+-------------------------------
+.. py:function:: paddle.fluid.layers.elu(x, alpha=1.0, name=None)
+:alias_main: paddle.nn.functional.elu
+:alias: paddle.nn.functional.elu,paddle.nn.functional.activation.elu
+:old_api: paddle.fluid.layers.elu
+ELU激活层（ELU Activation Operator）
+根据 https://arxiv.org/abs/1511.07289 对输入Tensor中每个元素应用以下计算。
+.. math::
+        \\out=max(0,x)+min(0,α∗(e^{x}−1))\\
+参数:
+ - **x** (Variable) - 该OP的输入为多维Tensor。数据类型为float32或float64。
+ - **alpha** (float, 可选) - ELU的alpha值，默认值为1.0。
+ - **name** (str, 可选) - 具体用法请参见 :ref:`api_guide_Name`，一般无需设置，默认值为None。
+返回: 输出为Tensor，与 ``x`` 维度相同、数据类型相同。
+返回类型： Variable
+**代码示例**
+.. code-block:: python
+    import paddle.fluid as fluid
+    import numpy as np
+    input_elu = np.array([[-1,6],[1,15.6]])
+    with fluid.dygraph.guard():
+        x = fluid.dygraph.to_variable(input_elu)
+        y = fluid.layers.elu(x, alpha=0.2)
+        print(y.numpy())
+        # [[-0.12642411  6.        ]
+        # [ 1.          15.6       ]]
--- a/doc/paddle/api/paddle/fluid/layers/embedding_cn.rst
+++ b/doc/paddle/api/paddle/fluid/layers/embedding_cn.rst
+.. _cn_api_fluid_embedding:
+embedding
+-------------------------------
+.. py:function:: paddle.fluid.embedding(input, size, is_sparse=False, is_distributed=False, padding_idx=None, param_attr=None, dtype='float32')
+:api_attr: 声明式编程模式（静态图)
+该OP根据input中的id信息从embedding矩阵中查询对应embedding信息，函数会根据输入的size (vocab_size, emb_size)和dtype自动构造一个二维embedding矩阵。
+输出的Tensor的shape是在输入Tensor shape的最后一维后面添加了emb_size的维度。
+注：input中的id必须满足 ``0 =< id < size[0]``，否则程序会抛异常退出。
+::
+    Case 1:
+    input是Tensor, 且padding_idx = -1
+        input.data = [[1, 3], [2, 4], [4, 127]]
+        input.shape = [3, 2]
+    若size = [128, 16]
+    输出为Tensor:
+        out.shape = [3, 2, 16]
+        out.data = [[[0.129435295, 0.244512452, ..., 0.436322452],
+                     [0.345421456, 0.524563927, ..., 0.144534654]],
+                    [[0.345249859, 0.124939536, ..., 0.194353745],
+                     [0.945345345, 0.435394634, ..., 0.435345365]],
+                    [[0.945345345, 0.435394634, ..., 0.435345365],
+                     [0.0,         0.0,         ..., 0.0        ]]]  # padding data
+    输入的padding_idx小于0，则自动转换为padding_idx = -1 + 128 = 127, 对于输入id为127的词，进行padding处理。
+    Case 2:
+    input是lod level 为1的LoDTensor, 且padding_idx = 0
+        input.lod = [[2, 3]]
+        input.data = [[1], [3], [2], [4], [0]]
+        input.shape = [5, 1]
+    若size = [128, 16]
+    输出为LoDTensor:
+        out.lod = [[2, 3]]
+        out.shape = [5, 1, 16]
+        out.data = [[[0.129435295, 0.244512452, ..., 0.436322452]],
+                    [[0.345421456, 0.524563927, ..., 0.144534654]],
+                    [[0.345249859, 0.124939536, ..., 0.194353745]],
+                    [[0.945345345, 0.435394634, ..., 0.435345365]],
+                    [[0.0,         0.0,         ..., 0.0        ]]]  # padding data
+    输入的padding_idx = 0，则对于输入id为0的词，进行padding处理。
+参数：
+    - **input** (Variable) - 存储id信息的Tensor或LoDTensor，数据类型必须为：int64。input中的id必须满足 ``0 =< id < size[0]`` 。
+    - **size** (tuple|list) - embedding矩阵的维度。必须包含两个元素，第一个元素为vocab_size(词表大小), 第二个为emb_size（embedding层维度）。
+    - **is_sparse** (bool) - 是否使用稀疏的更新方式，这个参数只会影响反向的梯度更新的性能，sparse更新速度更快，推荐使用稀疏更新的方式。但某些optimizer不支持sparse更新，比如 :ref:`cn_api_fluid_optimizer_AdadeltaOptimizer` 、 :ref:`cn_api_fluid_optimizer_AdamaxOptimizer` 、 :ref:`cn_api_fluid_optimizer_DecayedAdagradOptimizer` 、 :ref:`cn_api_fluid_optimizer_FtrlOptimizer` 、 :ref:`cn_api_fluid_optimizer_LambOptimizer` 、:ref:`cn_api_fluid_optimizer_LarsMomentumOptimizer` ，此时is_sparse必须为False。默认为False。
+    - **is_distributed** (bool) - 是否使用分布式的方式存储embedding矩阵，仅在多机分布式cpu训练中使用。默认为False。
+    - **padding_idx** (int|long|None) - padding_idx需在区间[-vocab_size, vocab_size)，否则不生效，padding_idx<0时，padding_idx 会被改成 vocab_size + padding_idx，input中等于padding_index的id对应的embedding信息会被设置为0，且这部分填充数据在训练时将不会被更新。如果为none，不作处理，默认为None。
+    - **param_attr** (ParamAttr) - 指定权重参数属性的对象。默认值为None，表示使用默认的权重参数属性。具体用法请参见 :ref:`cn_api_fluid_ParamAttr` 。此外，可以通过 ``param_attr`` 参数加载用户自定义或预训练的词向量。只需将本地词向量转为numpy数据格式，且保证本地词向量的shape和embedding的 ``size`` 参数一致，然后使用 :ref:`cn_api_fluid_initializer_NumpyArrayInitializer` 进行初始化，即可实现加载自定义或预训练的词向量。详细使用方法见代码示例2。
+    - **dtype** (str|core.VarDesc.VarType) - 输出Tensor或LoDTensor的数据类型，数据类型必须为：float32，float64，默认为float32。
+返回：input映射后embedding Tensor或LoDTensor，数据类型和dtype定义的类型一致。
+返回类型：Variable
+**代码示例**:
+.. code-block:: python
+    import paddle.fluid as fluid
+    data = fluid.layers.data(name='sequence', shape=[1], dtype='int64', lod_level=1)
+    # 示例 1
+    emb_1 = fluid.embedding(input=data, size=[128, 64])
+    # 示例 2: 加载用户自定义或预训练的词向量
+    weight_data = np.random.random(size=(128, 100))  # numpy格式的词向量数据
+    w_param_attrs = fluid.ParamAttr(
+        name="emb_weight",
+        learning_rate=0.5,
+        initializer=fluid.initializer.NumpyArrayInitializer(weight_data),
+        trainable=True)
+    emb_2 = fluid.embedding(input=data, size=(128, 100), param_attr=w_param_attrs, dtype='float32')
--- a/doc/paddle/api/paddle/fluid/layers/equal_cn.rst
+++ b/doc/paddle/api/paddle/fluid/layers/equal_cn.rst
+.. _cn_api_fluid_layers_equal:
+equal
+-------------------------------
+.. py:function:: paddle.fluid.layers.equal(x, y, cond=None, name=None)
+该OP返回 :math:`x==y` 逐元素比较x和y是否相等，x和y的维度应该相同。
+参数：
+    - **x** (Variable) - 输入Tensor，支持的数据类型包括 float32， float64，int32， int64。
+    - **y** (Variable) - 输入Tensor，支持的数据类型包括 float32， float64， int32， int64。
+    - **cond** (Variable，可选) – 如果为None，则创建一个Tensor来作为进行比较的输出结果，该Tensor的shape和数据类型和输入x一致；如果不为None，则将Tensor作为该OP的输出，数据类型和数据shape需要和输入x一致。默认值为None。 
+    - **name** （str，可选）- 具体用法请参见 :ref:`api_guide_Name` ，一般无需设置，默认值为None。
+返回：输出结果的Tensor，输出Tensor的shape和输入一致，Tensor数据类型为bool。
+返回类型：变量（Variable）
+**代码示例**:
+.. code-block:: python
+    import paddle.fluid as fluid
+    import numpy as np
+    out_cond =fluid.data(name="input1", shape=[2], dtype='bool')
+    label = fluid.layers.assign(np.array([3, 3], dtype="int32"))
+    limit = fluid.layers.assign(np.array([3, 2], dtype="int32"))
+    label_cond = fluid.layers.assign(np.array([1, 2], dtype="int32"))
+    out1 = fluid.layers.equal(x=label,y=limit) #out1=[True, False]
+    out2 = fluid.layers.equal(x=label_cond,y=limit, cond=out_cond) #out2=[False, True] out_cond=[False, True]
--- a/doc/paddle/api/paddle/erf_cn.rst
+++ b/doc/paddle/api/paddle/erf_cn.rst
--- a/doc/paddle/api/paddle/exp_cn.rst
+++ b/doc/paddle/api/paddle/exp_cn.rst
--- a/doc/paddle/api/paddle/expand_as_cn.rst
+++ b/doc/paddle/api/paddle/expand_as_cn.rst
--- a/doc/paddle/api/paddle/expand_cn.rst
+++ b/doc/paddle/api/paddle/expand_cn.rst
--- a/doc/paddle/api/paddle/nn/functional/exponential_decay_cn.rst
+++ b/doc/paddle/api/paddle/nn/functional/exponential_decay_cn.rst
--- a/doc/paddle/api/paddle/fluid/layers/eye_cn.rst
+++ b/doc/paddle/api/paddle/fluid/layers/eye_cn.rst
+.. _cn_api_fluid_layers_eye:
+eye
+-------------------------------
+.. py:function:: paddle.fluid.layers.eye(num_rows, num_columns=None, batch_shape=None, dtype='float32', name=None)
+该OP用来构建二维Tensor，或一个批次的二维Tensor。
+参数：
+    - **num_rows** (int) - 该批次二维Tensor的行数，数据类型为非负int32。
+    - **num_columns** (int, 可选) - 该批次二维Tensor的列数，数据类型为非负int32。若为None，则默认等于num_rows。
+    - **batch_shape** (list(int), 可选) - 如若提供，则返回Tensor的主批次维度将为batch_shape。
+    - **dtype** (np.dtype|core.VarDesc.VarType|str，可选) - 返回Tensor的数据类型，可为int32，int64，float16，float32，float64，默认数据类型为float32。
+    - **name** (str) – 该参数供开发人员打印调试信息时使用，具体用法请参见 :ref:`api_guide_Name` ，默认值为None。
+返回： ``shape`` 为batch_shape + [num_rows, num_columns]的Tensor。
+抛出异常：
+    - ``TypeError``: - 如果 ``dtype`` 的类型不是float16， float32， float64， int32， int64其中之一。
+    - ``TypeError``: - 如果 ``num_columns`` 不是非负整数或者 ``num_rows`` 不是非负整数。
+**代码示例**：
+.. code-block:: python
+    import paddle.fluid as fluid
+    data = fluid.layers.eye(3, dtype='int32')
+    # [[1, 0, 0]
+    #  [0, 1, 0]
+    #  [0, 0, 1]]
+    data = fluid.layers.eye(2, 3, dtype='int32')
+    # [[1, 0, 0]
+    #  [0, 1, 0]]
+    data = fluid.layers.eye(2, batch_shape=[3])
+    # Construct a batch of 3 identity tensors, each 2 x 2.
+    # data[i, :, :] is a 2 x 2 identity tensor, i = 0, 1, 2.
--- a/doc/paddle/api/paddle/static/nn/fc_cn.rst
+++ b/doc/paddle/api/paddle/static/nn/fc_cn.rst
--- a/doc/paddle/api/paddle/fill_constant_cn.rst
+++ b/doc/paddle/api/paddle/fill_constant_cn.rst
--- a/doc/paddle/api/paddle/nn/functional/filter_by_instag_cn.rst
+++ b/doc/paddle/api/paddle/nn/functional/filter_by_instag_cn.rst
--- a/doc/paddle/api/paddle/fluid/layers/flatten_cn.rst
+++ b/doc/paddle/api/paddle/fluid/layers/flatten_cn.rst
+.. _cn_api_fluid_layers_flatten:
+flatten
+-------------------------------
+.. py:function::  paddle.fluid.layers.flatten(x, axis=1, name=None)
+:alias_main: paddle.flatten
+:alias: paddle.flatten,paddle.tensor.flatten,paddle.tensor.manipulation.flatten
+:old_api: paddle.fluid.layers.flatten
+flatten op将输入的多维Tensor展平成2-D Tensor矩阵
+例如：
+.. code-block:: text
+    Case 1:
+      给定
+        X.shape = (3, 100, 100, 4)
+      且
+        axis = 2
+      得到:
+        Out.shape = (3 * 100, 4 * 100)
+    Case 2:
+      给定
+        X.shape = (3, 100, 100, 4)
+      且
+        axis = 0
+      得到:
+        Out.shape = (1, 3 * 100 * 100 * 4)
+参数：
+  - **x** (Variable) - 一个维度数>=axis 的多维Tensor, 数据类型可以为float32，float64，int8，int32或int64。
+  - **axis** (int) - flatten展开的分割轴，[0, axis) 轴数据被flatten到输出矩阵的0轴，[axis, R)数据被flatten到输出矩阵的1轴，其中R是输入张量的总维度数。axis的值必须在[0,R]范围内。当 axis=0 时，若输入Tensor的维度为 :math:`[d_0, d_1，… d_n]` ，则输出张量的Tensor维度为 :math:`[1，d_0 * d_1 *… d_n]` ，默认值为1。
+  - **name** (str，可选) - 具体用法请参见 :ref:`api_guide_Name` ，一般无需设置，默认值为None。
+返回: 一个 2-D Tensor，它包含输入Tensor的数据，但维度发生变化。输入的[0, axis)维将沿axis展平到输出Tensor的0维度，剩余的输入维数展平到输出的1维度。数据类型与输入x相同。
+返回类型: Variable
+抛出异常：
+  - ValueError: 如果 x 不是一个Variable
+  - ValueError: 如果axis的范围不在 [0, rank(x)] 范围内
+**代码示例**
+.. code-block:: python
+    import paddle.fluid as fluid
+    x = fluid.layers.data(name="x", shape=[4, 4, 3], append_batch_size=False, dtype="float32")
+    # x shape is [4, 4, 3]
+    out = fluid.layers.flatten(x=x, axis=2)
+    # out shape is [16, 3]
--- a/doc/paddle/api/paddle/floor_cn.rst
+++ b/doc/paddle/api/paddle/floor_cn.rst
--- a/doc/paddle/api/paddle/nn/functional/fsp_matrix_cn.rst
+++ b/doc/paddle/api/paddle/nn/functional/fsp_matrix_cn.rst
--- a/doc/paddle/api/paddle/fluid/layers/gather_cn.rst
+++ b/doc/paddle/api/paddle/fluid/layers/gather_cn.rst
+.. _cn_api_fluid_layers_gather:
+gather
+-------------------------------
+.. py:function:: paddle.fluid.layers.gather(input, index, overwrite=True)
+根据索引 ``index`` 获取输入（input）的最外层维度的条目，并将它们拼接在一起。
+.. math::
+        Out=X[Index]
+.. code-block:: text
+        X = [[1, 2],
+             [3, 4],
+             [5, 6]]
+        Index = [1, 2]
+        Then:
+        Out = [[3, 4],
+               [5, 6]]
+参数:
+        - **input** (Variable) - 输入, 秩 ``rank >= 1`` , 支持的数据类型包括 int32、int64、float32、float64 和 uint8 (CPU)、float16（GPU） 。
+        - **index** (Variable) - 索引，秩 ``rank = 1``, 数据类型为 int32 或 int64。
+        - **overwrite** (bool) - 具有相同索引时在反向更新梯度的模式。如果为 ``True`` ，则使用覆盖模式更新相同索引的梯度；如果为 ``False`` ，则使用累积模式更新相同索引的梯度。默认值为 ``True`` 。
+返回：和输入的秩相同的输出张量。
+返回类型：Variable
+**代码示例**
+..  code-block:: python
+  import paddle.fluid as fluid
+  x = fluid.layers.data(name='x', shape=[-1, 5], dtype='float32')
+  index = fluid.layers.data(name='index', shape=[-1, 1], dtype='int32')
+  output = fluid.layers.gather(x, index)
--- a/doc/paddle/api/paddle/gather_nd_cn.rst
+++ b/doc/paddle/api/paddle/gather_nd_cn.rst
--- a/doc/paddle/api/paddle/nn/gather_tree_cn.rst
+++ b/doc/paddle/api/paddle/nn/gather_tree_cn.rst
--- a/doc/paddle/api/paddle/fluid/layers/gaussian_random_cn.rst
+++ b/doc/paddle/api/paddle/fluid/layers/gaussian_random_cn.rst
+.. _cn_api_fluid_layers_gaussian_random:
+gaussian_random
+-------------------------------
+.. py:function:: paddle.fluid.layers.gaussian_random(shape, mean=0.0, std=1.0, seed=0, dtype='float32', name=None)
+该OP返回数值符合高斯随机分布的Tensor，形状为 ``shape``，数据类型为 ``dtype``。
+参数：
+    - **shape** (list|tuple|Tensor) - 生成的随机Tensor的形状。如果 ``shape`` 是list、tuple，则其中的元素可以是int，或者是形状为[1]且数据类型为int32、int64的Tensor。如果 ``shape`` 是Tensor，则是数据类型为int32、int64的1-D Tensor。
+    - **mean** (float|int, 可选) - 输出Tensor的均值，支持的数据类型：float、int。默认值为0.0。
+    - **std** (float|int, 可选) - 输出Tensor的标准差，支持的数据类型：float、int。默认值为1.0。
+    - **seed** (int, 可选) - 随机数种子，默认值为 0。注：seed 设置为 0 表示使用系统的随机数种子。注意如果 seed 不为 0，则此算子每次将始终生成相同的随机数。
+    - **dtype** (str|np.dtype|core.VarDesc.VarType, 可选) - 输出Tensor的数据类型，支持float32、float64。默认值为float32。
+    - **name** (str, 可选) - 输出的名字。一般无需设置，默认值为None。该参数供开发人员打印调试信息时使用，具体用法请参见 :ref:`api_guide_Name` 。
+返回：
+    Tensor：符合高斯随机分布的Tensor，形状为 ``shape``，数据类型为 ``dtype``。
+抛出异常：
+  - ``TypeError`` - 如果 ``shape`` 的类型不是list、tuple、Tensor。
+  - ``TypeError`` - 如果 ``dtype`` 不是float32、float64。
+**代码示例**：
+.. code-block:: python
+    # 静态图使用 
+    import numpy as np
+    from paddle import fluid
+    x = fluid.layers.gaussian_random((2, 3), std=2., seed=10)
+    place = fluid.CPUPlace()
+    exe = fluid.Executor(place)
+    start = fluid.default_startup_program()
+    main = fluid.default_main_program()
+    exe.run(start)
+    x_np, = exe.run(main, feed={}, fetch_list=[x])
+    x_np
+    # array([[2.3060477, 2.676496 , 3.9911983],
+    #        [0.9990833, 2.8675377, 2.2279181]], dtype=float32)
+.. code-block:: python
+    # 动态图使用
+    import numpy as np
+    from paddle import fluid
+    import paddle.fluid.dygraph as dg
+    place = fluid.CPUPlace()
+    with dg.guard(place) as g:
+        x = fluid.layers.gaussian_random((2, 4), mean=2., dtype="float32", seed=10)
+        x_np = x.numpy()       
+    x_np
+    # array([[2.3060477 , 2.676496  , 3.9911983 , 0.9990833 ],
+    #        [2.8675377 , 2.2279181 , 0.79029655, 2.8447366 ]], dtype=float32)
--- a/doc/paddle/api/paddle/fluid/layers/gelu_cn.rst
+++ b/doc/paddle/api/paddle/fluid/layers/gelu_cn.rst
+.. _cn_api_fluid_layers_gelu:
+gelu
+-------------------------------
+.. py:function:: paddle.fluid.layers.gelu(x)
+:alias_main: paddle.nn.functional.gelu
+:alias: paddle.nn.functional.gelu,paddle.nn.functional.activation.gelu
+:old_api: paddle.fluid.layers.gelu
+逐元素计算 Gelu激活函数。更多细节请参考 `Gaussian Error Linear Units <https://arxiv.org/abs/1606.08415>`_ 。
+如果使用近似计算：
+.. math::
+    out = 0.5 * x * (1 + tanh(\sqrt{\frac{2}{\pi}} * (x + 0.044715x^{3})))
+如果不使用近似计算：
+.. math::
+    out = 0.5 * x * (1 + erf(\frac{x}{\sqrt{2}}))
+参数：
+  - **x** (Variable) - Gelu Op 的输入，多维 Tensor 或 LoDTensor，数据类型为 float32 或 float64。
+  - **approximate** (bool, 可选) - 是否使用近似计算，默认值为 False。
+返回：
+  - 多维 Tensor 或 LoDTensor, 数据类型为 float32 或 float64， 和输入 x 的数据类型相同，形状和输入 x 相同。
+返回类型：
+  - Variable
+**代码示例**：
+.. code-block:: python
+    # declarative mode
+    import numpy as np
+    from paddle import fluid
+    x = fluid.data(name="x", shape=(-1, 3), dtype="float32")
+    y = fluid.layers.gelu(x)
+    place = fluid.CPUPlace()
+    exe = fluid.Executor(place)
+    start = fluid.default_startup_program()
+    main = fluid.default_main_program()
+    data = np.random.randn(2, 3).astype("float32")
+    exe.run(start)
+    y_np, = exe.run(main, feed={"x": data}, fetch_list=[y])
+    data
+    # array([[ 0.87165993, -1.0541513 , -0.37214822],
+    #         [ 0.15647964,  0.32496083,  0.33045998]], dtype=float32)
+    y_np
+    # array([[ 0.70456535, -0.15380788, -0.13207214],
+    #        [ 0.08796856,  0.20387867,  0.2080159 ]], dtype=float32)
+.. code-block:: python
+    # imperative mode
+    import numpy as np
+    from paddle import fluid
+    import paddle.fluid.dygraph as dg
+    data = np.random.randn(2, 3).astype("float32")
+    place = fluid.CPUPlace()
+    with dg.guard(place) as g:
+        x = dg.to_variable(data)
+        y = fluid.layers.gelu(x)
+        y_np = y.numpy()
+    data
+    # array([[ 0.87165993, -1.0541513 , -0.37214822],
+    #        [ 0.15647964,  0.32496083,  0.33045998]], dtype=float32)
+    y_np
+    # array([[ 0.70456535, -0.15380788, -0.13207214],
+    #        [ 0.08796856,  0.20387867,  0.2080159 ]], dtype=float32)
--- a/doc/paddle/api/paddle/nn/functional/generate_mask_labels_cn.rst
+++ b/doc/paddle/api/paddle/nn/functional/generate_mask_labels_cn.rst
--- a/doc/paddle/api/paddle/nn/functional/generate_proposal_labels_cn.rst
+++ b/doc/paddle/api/paddle/nn/functional/generate_proposal_labels_cn.rst
--- a/doc/paddle/api/paddle/nn/functional/generate_proposals_cn.rst
+++ b/doc/paddle/api/paddle/nn/functional/generate_proposals_cn.rst
--- a/doc/paddle/api/paddle/fluid/layers/get_tensor_from_selected_rows_cn.rst
+++ b/doc/paddle/api/paddle/fluid/layers/get_tensor_from_selected_rows_cn.rst
+.. _cn_api_fluid_layers_get_tensor_from_selected_rows:
+get_tensor_from_selected_rows
+-------------------------------
+.. py:function::  paddle.fluid.layers.get_tensor_from_selected_rows(x, name=None)
+该OP从SelectedRows类型的输入中获取向量数据，以LoDTensor的形式输出。
+::
+    例如：
+          输入为SelectedRows类型:
+               x.rows = [0, 5, 5, 4, 19]
+               x.height = 20
+               x.value = [[1, 1] [2, 2] [2, 2] [3, 3] [6, 6]]
+          输出为LoDTensor：
+               out.shape = [5, 2]
+               out.data = [[1, 1],
+                           [2, 2],
+                           [2, 2],
+                           [3, 3],
+                           [6, 6]]
+参数：
+  - **x** (SelectedRows) - SelectedRows类型的输入，数据类型为float32，float64，int32或int64。
+  - **name** (str) - 该参数供开发人员打印调试信息时使用，具体用法请参见 :ref:`api_guide_Name` ，默认值为None。
+返回： 从SelectedRows中转化而来的LoDTensor，数据类型和输入一致。
+返回类型： Variable
+**代码示例：**
+.. code-block:: python
+    import paddle.fluid as fluid
+    b = fluid.default_main_program().global_block()
+    input = b.create_var(name="X", dtype="float32", persistable=True, type=fluid.core.VarDesc.VarType.SELECTED_ROWS)
+    out = fluid.layers.get_tensor_from_selected_rows(input)
--- a/doc/paddle/api/paddle/fluid/layers/greater_equal_cn.rst
+++ b/doc/paddle/api/paddle/fluid/layers/greater_equal_cn.rst
+.. _cn_api_fluid_layers_greater_equal:
+greater_equal
+-------------------------------
+.. py:function:: paddle.fluid.layers.greater_equal(x, y, cond=None, name=None)
+:alias_main: paddle.greater_equal
+:alias: paddle.greater_equal,paddle.tensor.greater_equal,paddle.tensor.logic.greater_equal
+:old_api: paddle.fluid.layers.greater_equal
+该OP逐元素地返回 :math:`x >= y` 的逻辑值，使用重载算子 `>=` 可以有相同的计算函数效果。
+参数：
+    - **x** (Variable) – 进行比较的第一个输入，是一个多维的Tensor，数据类型可以是float32，float64，int32，int64。 
+    - **y** (Variable) – 进行比较的第二个输入，是一个多维的Tensor，数据类型可以是float32，float64，int32，int64。
+    - **cond** (Variable，可选) – 如果为None，则创建一个Tensor来作为进行比较的输出结果，该Tensor的shape，数据类型和输入x一致；如果不为None，则将Tensor作为该OP的输出，数据shape和数据类型需要和输入x一致。默认值为None。 
+    - **name** （str，可选）- 具体用法请参见 :ref:`api_guide_Name` ，一般无需设置，默认值为None。
+返回：输出结果的Tensor，数据的shape和输入x一致。
+返回类型：Variable，数据类型为bool类型。
+**代码示例**:
+.. code-block:: python
+     import paddle.fluid as fluid
+     import paddle.fluid.layers as layers
+     import numpy as np
+     label = layers.assign(np.array([2, 2], dtype='int32'))
+     limit = layers.assign(np.array([2, 3], dtype='int32'))
+     out = fluid.layers.greater_equal(x=label, y=limit) #out=[True, False]
+     out_1 = label >= limit #out1=[True, False]
--- a/doc/paddle/api/paddle/fluid/layers/greater_than_cn.rst
+++ b/doc/paddle/api/paddle/fluid/layers/greater_than_cn.rst
+.. _cn_api_fluid_layers_greater_than:
+greater_than
+-------------------------------
+.. py:function:: paddle.fluid.layers.greater_than(x, y, cond=None, name=None)
+:alias_main: paddle.greater_than
+:alias: paddle.greater_than,paddle.tensor.greater_than,paddle.tensor.logic.greater_than
+:old_api: paddle.fluid.layers.greater_than
+该OP逐元素地返回 :math:`x > y` 的逻辑值，使用重载算子 `>` 可以有相同的计算函数效果。
+参数：
+    - **x** (Variable) – 进行比较的第一个输入，是一个多维的Tensor，数据类型可以是float32，float64，int32，int64。 
+    - **y** (Variable) – 进行比较的第二个输入，是一个多维的Tensor，数据类型可以是float32，float64，int32，int64。
+    - **cond** (Variable，可选) – 如果为None，则创建一个Tensor来作为进行比较的输出结果，该Tensor的shape和数据类型和输入x一致；如果不为None，则将Tensor作为该OP的输出，数据类型和数据shape需要和输入x一致。默认值为None。 
+    - **name** （str，可选）- 具体用法请参见 :ref:`api_guide_Name` ，一般无需设置，默认值为None。
+返回：输出结果的Tensor，数据的shape和输入x一致。
+返回类型：Variable，数据类型为bool类型。
+**代码示例**:
+.. code-block:: python
+     import paddle.fluid as fluid
+     import paddle.fluid.layers as layers
+     import numpy as np
+     label = layers.assign(np.array([2, 3], dtype='int32'))
+     limit = layers.assign(np.array([3, 2], dtype='int32'))
+     out = fluid.layers.greater_than(x=label, y=limit) #out=[False, True]
+     out1 = label > limit #out1=[False, True]
--- a/doc/paddle/api/paddle/nn/functional/grid_sampler_cn.rst
+++ b/doc/paddle/api/paddle/nn/functional/grid_sampler_cn.rst
--- a/doc/paddle/api/paddle/static/nn/group_norm_cn.rst
+++ b/doc/paddle/api/paddle/static/nn/group_norm_cn.rst
--- a/doc/paddle/api/paddle/fluid/layers/gru_unit_cn.rst
+++ b/doc/paddle/api/paddle/fluid/layers/gru_unit_cn.rst
+.. _cn_api_fluid_layers_gru_unit:
+gru_unit
+-------------------------------
+.. py:function:: paddle.fluid.layers.gru_unit(input, hidden, size, param_attr=None, bias_attr=None, activation='tanh', gate_activation='sigmoid', origin_mode=False)
+:api_attr: 声明式编程模式（静态图)
+Gated Recurrent Unit（GRU）循环神经网络计算单元。该OP用于完成单个时间步内GRU的计算，支持以下两种计算方式：
+如果origin_mode为True，则使用的运算公式来自论文
+`Learning Phrase Representations using RNN Encoder Decoder for Statistical Machine Translation <https://arxiv.org/pdf/1406.1078.pdf>`_ 。
+.. math::
+    u_t & = act_g(W_{ux}x_{t} + W_{uh}h_{t-1} + b_u)\\
+    r_t & = act_g(W_{rx}x_{t} + W_{rh}h_{t-1} + b_r)\\
+    \tilde{h_t} & = act_c(W_{cx}x_{t} + W_{ch}(r_t \odot h_{t-1}) + b_c)\\
+    h_t & = u_t \odot h_{t-1} + (1-u_t) \odot \tilde{h_t}
+如果origin_mode为False，则使用的运算公式来自论文
+`Empirical Evaluation of Gated Recurrent Neural Networks on Sequence Modeling  <https://arxiv.org/pdf/1412.3555.pdf>`_ 。
+公式如下:
+.. math::
+    u_t & = act_g(W_{ux}x_{t} + W_{uh}h_{t-1} + b_u)\\
+    r_t & = act_g(W_{rx}x_{t} + W_{rh}h_{t-1} + b_r)\\
+    \tilde{h_t} & = act_c(W_{cx}x_{t} + W_{ch}(r_t \odot h_{t-1}) + b_c)\\
+    h_t & = (1-u_t) \odot h_{t-1} + u_t \odot \tilde{h_t}
+其中， :math:`x_t` 为当前时间步的输入，这个输入并非 ``input``，该OP不包含 :math:`W_{ux}x_{t}, W_{rx}x_{t}, W_{cx}x_{t}` 的计算，**注意** 要在该OP前使用大小为GRU隐单元数目的3倍的全连接层并将其输出作为 ``input``；
+:math:`h_{t-1}` 为前一时间步的隐状态 ``hidden``； :math:`u_t` 、 :math:`r_t` 、 :math:`\tilde{h_t}` 和 :math:`h_t` 分别代表了GRU单元中update gate（更新门）、reset gate（重置门）、candidate hidden（候选隐状态）和隐状态输出; :math:`\odot` 为逐个元素相乘；
+:math:`W_{uh}, b_u` 、 :math:`W_{rh}, b_r` 和 :math:`W_{ch}, b_c` 分别代表更新门、重置门和候选隐状态在计算时使用的权重矩阵和偏置。在实现上，三个权重矩阵合并为一个 :math:`[D, D \times 3]` 形状的Tensor存放，三个偏置拼接为一个 :math:`[1, D \times 3]` 形状的Tensor存放，其中 :math:`D` 为隐单元的数目；权重Tensor存放布局为： :math:`W_{uh}` 和 :math:`W_{rh}` 拼接为 :math:`[D, D  \times 2]` 形状位于前半部分，:math:`W_{ch}` 以 :math:`[D, D]` 形状位于后半部分。
+参数:
+    - **input** (Variable) – 表示经线性变换后当前时间步的输入，是形状为 :math:`[N, D \times 3]` 的二维Tensor，其中 :math:`N` 为batch_size， :math:`D` 为隐单元的数目。数据类型为float32或float64。
+    - **hidden** (Variable) –  表示上一时间步产生的隐状态，是形状为 :math:`[N, D]` 的二维Tensor，其中 :math:`N` 为batch_size， :math:`D` 为隐单元的数目。数据类型与 ``input`` 相同。
+    - **size** (integer) – 输入数据 ``input`` 特征维度的大小，需要是隐单元数目的3倍。
+    - **param_attr** (ParamAttr，可选) – 指定权重参数属性的对象。默认值为None，表示使用默认的权重参数属性。具体用法请参见 :ref:`cn_api_fluid_ParamAttr` 。
+    - **bias_attr** (ParamAttr，可选) - 指定偏置参数属性的对象。默认值为None，表示使用默认的偏置参数属性。具体用法请参见 :ref:`cn_api_fluid_ParamAttr` 。
+    - **activation** (string) –  公式中 :math:`act_c` 激活函数的类型。支持identity、sigmoid、tanh、relu四种激活函数类型，默认为tanh。
+    - **gate_activation** (string) – 公式中 :math:`act_g` 激活函数的类型。支持identity、sigmoid、tanh、relu四种激活函数类型，默认为sigmoid。
+    - **origin_mode** (bool) – 指明要使用的GRU计算方式，两种计算方式具体差异见公式描述，默认值为False。
+返回：Variable的三元组，包含三个与 ``input`` 相同数据类型的Tensor，分别表示下一时间步的隐状态（ :math:`h_t` ）、重置的前一时间步的隐状态（ :math:`r_t \odot h_{t-1}` ）和 :math:`h_t, r_t, \tilde{h_t}` 的拼接，形状分别为 :math:`[N, D]` 、 :math:`[N, D]` 和 :math:`[N, D \times 3]` 。通常只有下一时间步的隐状态（ :math:`h_t` ）作为GRU的输出和隐状态使用，其他内容只是中间计算结果。
+返回类型: tuple
+**代码示例**
+..  code-block:: python
+    import paddle.fluid as fluid
+    dict_dim, emb_dim = 128, 64
+    data = fluid.data(name='step_data', shape=[None], dtype='int64')
+    emb = fluid.embedding(input=data, size=[dict_dim, emb_dim])
+    hidden_dim = 512
+    x = fluid.layers.fc(input=emb, size=hidden_dim * 3)
+    pre_hidden = fluid.data(
+        name='pre_hidden', shape=[None, hidden_dim], dtype='float32')
+    hidden = fluid.layers.gru_unit(
+        input=x, hidden=pre_hidden, size=hidden_dim * 3)
--- a/doc/paddle/api/paddle/fluid/layers/hard_shrink_cn.rst
+++ b/doc/paddle/api/paddle/fluid/layers/hard_shrink_cn.rst
+.. _cn_api_fluid_layers_hard_shrink:
+hard_shrink
+-------------------------------
+.. py:function:: paddle.fluid.layers.hard_shrink(x,threshold=None)
+:alias_main: paddle.nn.functional.hard_shrink
+:alias: paddle.nn.functional.hard_shrink,paddle.nn.functional.activation.hard_shrink
+:old_api: paddle.fluid.layers.hard_shrink
+HardShrink激活函数(HardShrink activation operator)
+.. math::
+  out = \begin{cases}
+        x, \text{if } x > \lambda \\
+        x, \text{if } x < -\lambda \\
+        0,  \text{otherwise}
+      \end{cases}
+参数：
+    - **x** - HardShrink激活函数的输入
+    - **threshold** (FLOAT)-HardShrink激活函数的threshold值。[默认：0.5]
+返回：HardShrink激活函数的输出
+**代码示例**：
+.. code-block:: python
+    import paddle.fluid as fluid
+    data = fluid.layers.data(name="input", shape=[784])
+    result = fluid.layers.hard_shrink(x=data, threshold=0.3)
--- a/doc/paddle/api/paddle/nn/functional/hard_sigmoid_cn.rst
+++ b/doc/paddle/api/paddle/nn/functional/hard_sigmoid_cn.rst
--- a/doc/paddle/api/paddle/nn/functional/hard_swish_cn.rst
+++ b/doc/paddle/api/paddle/nn/functional/hard_swish_cn.rst
--- a/doc/paddle/api/paddle/has_inf_cn.rst
+++ b/doc/paddle/api/paddle/has_inf_cn.rst
--- a/doc/paddle/api/paddle/has_nan_cn.rst
+++ b/doc/paddle/api/paddle/has_nan_cn.rst
--- a/doc/paddle/api/paddle/nn/functional/hash_cn.rst
+++ b/doc/paddle/api/paddle/nn/functional/hash_cn.rst
--- a/doc/paddle/api/paddle/static/nn/hsigmoid_cn.rst
+++ b/doc/paddle/api/paddle/static/nn/hsigmoid_cn.rst
--- a/doc/paddle/api/paddle/nn/functional/huber_loss_cn.rst
+++ b/doc/paddle/api/paddle/nn/functional/huber_loss_cn.rst
--- a/doc/paddle/api/paddle/fluid/layers/im2sequence_cn.rst
+++ b/doc/paddle/api/paddle/fluid/layers/im2sequence_cn.rst
+.. _cn_api_fluid_layers_im2sequence:
+im2sequence
+-------------------------------
+.. py:function:: paddle.fluid.layers.im2sequence(input, filter_size=1, stride=1, padding=0, input_image_size=None, out_stride=1, name=None)
+:api_attr: 声明式编程模式（静态图)
+该OP使用 `filter` 扫描输入的Tensor并将输入Tensor转换成序列，返回值的 `shape={input.batch_size * output_height * output_width, filter_size_height* filter_size_width * input.channels}` 。返回值的timestep的个数为 `output_height * output_width` ， 每个timestep的维度是 `filter_size_height* filter_size_width * input.channels` 。其中 `output_height` 和 `output_width` 由以下式计算:
+.. math::
+    output\_height = 1 + \frac{padding\_up + padding\_down + input\_height - filter\_size\_height + stride\_height-1}{stride\_height} \\
+    output\_width = 1 + \frac{padding\_left + padding\_right + input\_width - filter\_size\_width + stride\_width-1}{stride\_width}
+其中符号的意义如下所示。
+参数:
+  - **input** （Variable）- 类型为float32的4-D Tensor，格式为 `[N, C, H, W]` 。公式中 `input_height` 和 `input_width` 分别代表输入的高和宽。
+  - **filter_size** (int32 | List[int32]) - 滤波器大小。如果 `filter_size` 是一个List，它必须包含两个整数 `[filter_size_height, filter_size_width]` 。如果 `filter_size` 是一个int32, 则滤波器大小是 `[filter_size, filter_size]` , 默认值为1。
+  - **stride** (int32 | List[int32]) - 步长大小。如果stride是一个List，它必须包含两个整数 `[stride_height,stride_width]` 。如果stride是一个int32, 则步长大小是 `[stride, stride]` , 默认值为1。
+  - **padding** (int32 | List[int32]) - 填充大小。如果padding是一个List，它可以包含四个整数 `[padding_up, padding_left, padding_down, padding_right]` ，当包含两个整数 `[padding_height, padding_width]` 时，可展开为 `[padding_height, padding_width, padding_height, padding_width]` 。如果padding是一个int, 可展开为 `[padding, padding, padding, padding]` 。默认值为0。
+  - **input_image_size** (Variable, 可选) - 2-D Tensor, 输入图像的实际大小, 它的维度为 `[batchsize，2]` 。当该参数不为None时，可用于batch inference。默认值为None.
+  - **out_stride** (int32 | List[int32]) - 输出步长。只有当input_image_size不为None时才有效。如果out_stride是List，它必须包含 `[out_stride_height, out_stride_width]` ，如果out_stride是int32, 则可展开为 `[out_stride, out_stride]` ，默认值为1。
+  - **name** (str, 可选) - 该参数供开发人员打印调试信息时使用，具体用法请参见 :ref:`api_guide_Name` ，缺省值为None。
+返回： 数据类型为float32, `shape` 为 `{batch_size * output_height * output_width, filter_size_height * filter_size_width * input.channels}` 的 2-D LodTensor。
+返回类型: Variable
+::
+  Given:
+    x = [[[[ 6.  2.  1.]
+      [ 8.  3.  5.]
+      [ 0.  2.  6.]]
+        [[ 2.  4.  4.]
+         [ 6.  3.  0.]
+         [ 6.  4.  7.]]]
+       [[[ 6.  7.  1.]
+         [ 5.  7.  9.]
+         [ 2.  4.  8.]]
+        [[ 1.  2.  1.]
+         [ 1.  3.  5.]
+         [ 9.  0.  8.]]]]
+    x.dims = {2, 2, 3, 3}
+    And:
+    filter = [2, 2]
+    stride = [1, 1]
+    padding = [0, 0]
+    Then:
+    output.data = [[ 6.  2.  8.  3.  2.  4.  6.  3.]
+                   [ 2.  1.  3.  5.  4.  4.  3.  0.]
+                   [ 8.  3.  0.  2.  6.  3.  6.  4.]
+                   [ 3.  5.  2.  6.  3.  0.  4.  7.]
+                   [ 6.  7.  5.  7.  1.  2.  1.  3.]
+                   [ 7.  1.  7.  9.  2.  1.  3.  5.]
+                   [ 5.  7.  2.  4.  1.  3.  9.  0.]
+                   [ 7.  9.  4.  8.  3.  5.  0.  8.]]
+    output.dims = {8, 8}
+    output.lod = [[4, 4]]
+**代码示例**
+..  code-block:: python
+    import paddle.fluid as fluid
+    data = fluid.layers.data(name='data', shape=[3, 32, 32],
+                             dtype='float32')
+    output = fluid.layers.im2sequence(
+        input=data, stride=[1, 1], filter_size=[2, 2])
--- a/doc/paddle/api/paddle/nn/functional/image_resize_cn.rst
+++ b/doc/paddle/api/paddle/nn/functional/image_resize_cn.rst
--- a/doc/paddle/api/paddle/nn/functional/image_resize_short_cn.rst
+++ b/doc/paddle/api/paddle/nn/functional/image_resize_short_cn.rst
--- a/doc/paddle/api/paddle/increment_cn.rst
+++ b/doc/paddle/api/paddle/increment_cn.rst
--- a/doc/paddle/api/paddle/fluid/layers/inplace_abn_cn.rst
+++ b/doc/paddle/api/paddle/fluid/layers/inplace_abn_cn.rst
+.. _cn_api_fluid_layers_inplace_abn:
+inplace_abn
+-------------------------------
+**注意：该API仅支持【静态图】模式**
+.. py:function:: paddle.fluid.layers.inplace_abn(input, act=None, is_test=False, momentum=0.9, epsilon=1e-05, param_attr=None, bias_attr=None, data_layout='NCHW', name=None, moving_mean_name=None, moving_variance_name=None, do_model_average_for_mean_and_var=False, use_global_stats=False, act_alpha=1.0)
+就地批正则化化激活层（Inplace Activation Batch Normalization Layer）
+此层使用就地内存计算批处理正则化和激活来实现节省内存，有关批量正则化计算，请参见 ``fluid.layers.batch_norm`` ，有关就地激活批正则化化的计算，请参考 `In-Place Activated BatchNorm for Memory-Optimized Training of DNNs <https://arxiv.org/abs/1712.02616>`_。
+参数：
+    - **input** (Variable) - inplace_abn算子的输入特征，是一个Variable类型，输入维度可以是 2, 3, 4, 5。数据类型：flaot16, float32, float64。
+    - **act** （string）- 激活函数类型，可以是leaky_realu、relu、prelu等。默认：None。
+    - **is_test** （bool） - 指示它是否在测试阶段，非训练阶段使用训练过程中统计到的全局均值和全局方差。默认：False。
+    - **momentum** （float|Variable）- 此值用于计算 moving_mean 和 moving_var，是一个float类型或者一个shape为[1]，数据类型为float32的Variable类型。更新公式为:  :math:`moving\_mean = moving\_mean * momentum + new\_mean * (1. - momentum)` ， :math:`moving\_var = moving\_var * momentum + new\_var * (1. - momentum)` ， 默认：0.9。
+    - **epsilon** （float）- 加在分母上为了数值稳定的值。默认：1e-5。
+    - **param_attr** (ParamAttr|None) ：指定权重参数属性的对象。默认值为None，表示使用默认的权重参数属性。具体用法请参见 :ref:`cn_api_fluid_ParamAttr` 。inplace_abn算子默认的权重初始化是1.0。
+    - **bias_attr** （ParamAttr|None）- 指定偏置参数属性的对象。默认值为None，表示使用默认的偏置参数属性。具体用法请参见 :ref:`cn_api_fluid_ParamAttr` 。inplace_abn算子默认的偏置初始化是0.0。
+    - **data_layout** （string) - 指定输入的数据格式，输出的数据格式将与输入保持一致，可以是"NCHW"和"NHWC"。N是批尺寸，C是通道数，H是特征高度，W是特征宽度。默认值："NCHW"。
+    - **name** (str|None) – 具体用法请参见 :ref:`cn_api_guide_Name` ，一般无需设置，默认值为None。
+    - **moving_mean_name** （string）- moving_mean的名称，存储全局均值。如果将其设置为None, ``inplace_abn`` 将随机命名全局均值；否则， ``inplace_abn`` 将命名全局均值为 ``moving_mean_name`` 。默认：None。
+    - **moving_variance_name** （string）- moving_variance的名称，存储全局变量。如果将其设置为None, ``inplace_abn`` 将随机命名全局方差；否则， ``inplace_abn`` 将命名全局方差为 ``moving_variance_name`` 。默认：None。
+    - **do_model_average_for_mean_and_var** （bool，默认False）- 是否为mean和variance做模型均值。
+    - **use_global_stats** （bool） – 是否使用全局均值和方差。 在预测或测试模式下，将use_global_stats设置为true或将is_test设置为true，并且行为是等效的。 在训练模式中，当设置use_global_stats为True时，在训练期间也使用全局均值和方差。默认：False。
+    - **act_alpha** （float） – 当 ``act`` 参数为None、leaky-relu、elu时，会使用就地批正则化激活算法，可通过此参数给定leaky-relu、elu的 ``alpha`` 值。默认：1.0。
+返回： 维度和输入相同的Tensor，在输入中运用批正则后的结果。
+返回类型：Variable
+**代码示例**：
+.. code-block:: python
+		import paddle.fluid as fluid
+		x = fluid.data(name='x', shape=[3, 7, 3, 7], dtype='float32')
+		hidden1 = fluid.layers.fc(input=x, size=200, param_attr='fc1.w')
+		hidden2 = fluid.layers.inplace_abn(input=hidden1)
+		hidden3 = fluid.layers.inplace_abn(input=hidden2, act='leaky_relu', act_alpha=0.2)
--- a/doc/paddle/api/paddle/static/nn/instance_norm_cn.rst
+++ b/doc/paddle/api/paddle/static/nn/instance_norm_cn.rst
--- a/doc/paddle/api/paddle/nn/functional/inverse_time_decay_cn.rst
+++ b/doc/paddle/api/paddle/nn/functional/inverse_time_decay_cn.rst
--- a/doc/paddle/api/paddle/nn/functional/iou_similarity_cn.rst
+++ b/doc/paddle/api/paddle/nn/functional/iou_similarity_cn.rst
--- a/doc/paddle/api/paddle/is_empty_cn.rst
+++ b/doc/paddle/api/paddle/is_empty_cn.rst
--- a/doc/paddle/api/paddle/isfinite_cn.rst
+++ b/doc/paddle/api/paddle/isfinite_cn.rst
--- a/doc/paddle/api/paddle/nn/functional/kldiv_loss_cn.rst
+++ b/doc/paddle/api/paddle/nn/functional/kldiv_loss_cn.rst
--- a/doc/paddle/api/paddle/nn/functional/l2_normalize_cn.rst
+++ b/doc/paddle/api/paddle/nn/functional/l2_normalize_cn.rst
--- a/doc/paddle/api/paddle/nn/functional/label_smooth_cn.rst
+++ b/doc/paddle/api/paddle/nn/functional/label_smooth_cn.rst
--- a/doc/paddle/api/paddle/static/nn/layer_norm_cn.rst
+++ b/doc/paddle/api/paddle/static/nn/layer_norm_cn.rst
--- a/doc/paddle/api/paddle/fluid/layers/leaky_relu_cn.rst
+++ b/doc/paddle/api/paddle/fluid/layers/leaky_relu_cn.rst
+.. _cn_api_fluid_layers_leaky_relu:
+leaky_relu
+-------------------------------
+.. py:function:: paddle.fluid.layers.leaky_relu(x, alpha=0.02, name=None)
+:alias_main: paddle.nn.functional.leaky_relu
+:alias: paddle.nn.functional.leaky_relu,paddle.nn.functional.activation.leaky_relu
+:old_api: paddle.fluid.layers.leaky_relu
+LeakyRelu激活函数
+.. math::   out=max(x,α∗x)
+参数:
+    - **x** (Variable) - 输入的多维LoDTensor/Tensor，数据类型为：float32，float64。
+    - **alpha** (float) - 负斜率，缺省值为0.02。
+    - **name** (str，可选) - 该参数供开发人员打印调试信息时使用，具体用法请参见 :ref:`api_guide_Name` ，默认值为None。
+返回: 与 ``x`` 维度相同，数据类型相同的LodTensor/Tensor。
+返回类型: Variable
+**代码示例：**
+.. code-block:: python
+    import paddle.fluid as fluid
+    import numpy as np
+    # Graph Organizing
+    x = fluid.layers.data(name="x", shape=[2], dtype="float32")
+    res = fluid.layers.leaky_relu(x, alpha=0.1)
+    # Create an executor using CPU as an example
+    exe = fluid.Executor(fluid.CPUPlace())
+    exe.run(fluid.default_startup_program())
+    # Execute
+    x_i = np.array([[-1, 2], [3, -4]]).astype(np.float32)
+    res_val, = exe.run(fluid.default_main_program(), feed={'x':x_i}, fetch_list=[res])
+    print(res_val) # [[-0.1, 2], [3, -0.4]]
--- a/doc/paddle/api/paddle/fluid/layers/less_equal_cn.rst
+++ b/doc/paddle/api/paddle/fluid/layers/less_equal_cn.rst
+.. _cn_api_fluid_layers_less_equal:
+less_equal
+-------------------------------
+.. py:function:: paddle.fluid.layers.less_equal(x, y, cond=None, name=None)
+:alias_main: paddle.less_equal
+:alias: paddle.less_equal,paddle.tensor.less_equal,paddle.tensor.logic.less_equal
+:old_api: paddle.fluid.layers.less_equal
+该OP逐元素地返回 :math:`x <= y` 的逻辑值，使用重载算子 `<=` 可以有相同的计算函数效果。
+参数：
+    - **x** (Variable) – 进行比较的第一个输入，是一个多维的Tensor，数据类型可以是float32，float64，int32，int64。 
+    - **y** (Variable) – 进行比较的第二个输入，是一个多维的Tensor，数据类型可以是float32，float64，int32，int64。
+    - **cond** (Variable，可选) – 如果为None，则创建一个Tensor来作为进行比较的输出结果，该Tensor的shape和数据类型和输入x一致；如果不为None，则将Tensor作为该OP的输出，数据类型和数据shape需要和输入x一致。默认值为None。 
+    - **name** （str，可选）- 具体用法请参见 :ref:`api_guide_Name` ，一般无需设置，默认值为None。
+返回：输出结果的Tensor，数据的shape和输入x一致。
+返回类型：Variable，数据类型为bool类型。
+**代码示例**:
+.. code-block:: python
+     import paddle.fluid as fluid
+     import paddle.fluid.layers as layers
+     import numpy as np
+     label = layers.assign(np.array([1, 3], dtype='int32'))
+     limit = layers.assign(np.array([1, 2], dtype='int32'))
+     out = fluid.layers.less_equal(x=label, y=limit) #out=[True, False]
+     out1 = label<= limit #out1=[True, False]
--- a/doc/paddle/api/paddle/fluid/layers/less_than_cn.rst
+++ b/doc/paddle/api/paddle/fluid/layers/less_than_cn.rst
+.. _cn_api_fluid_layers_less_than:
+less_than
+-------------------------------
+.. py:function:: paddle.fluid.layers.less_than(x, y, force_cpu=None, cond=None, name=None)
+:alias_main: paddle.less_than
+:alias: paddle.less_than,paddle.tensor.less_than,paddle.tensor.logic.less_than
+:old_api: paddle.fluid.layers.less_than
+该OP逐元素地返回 :math:`x < y` 的逻辑值，使用重载算子 `<` 可以有相同的计算函数效果
+参数：
+    - **x** (Variable) - 进行比较的第一个输入，是一个多维的LoDTensor/Tensor，数据类型可以是float32，float64，int32，int64。
+    - **y** (Variable) - 进行比较的第二个输入，是一个多维的LoDTensor/Tensor，数据类型可以是float32，float64，int32，int64。
+    - **force_cpu** (bool) – 如果为True则强制将输出变量写入CPU内存中，否则将其写入目前所在的运算设备上。默认值为False。注意：该属性已弃用，其值始终是False。
+    - **cond** (Variable，可选) – 指定算子输出结果的LoDTensor/Tensor，可以是程序中已经创建的任何Variable。默认值为None，此时将创建新的Variable来保存输出结果。
+    - **name** （str，可选）- 具体用法请参见 :ref:`api_guide_Name` ，一般无需设置，默认值为None。
+返回：输出结果的LoDTensor/Tensor，数据的shape和输入x一致。
+返回类型： Variable，数据类型为bool。
+**代码示例**:
+.. code-block:: python
+    import paddle.fluid as fluid
+    import numpy as np
+    # Graph Organizing
+    x = fluid.layers.data(name='x', shape=[2], dtype='float64')
+    y = fluid.layers.data(name='y', shape=[2], dtype='float64')
+    result = fluid.layers.less_than(x=x, y=y)
+    # The comment lists another available method.
+    # result = fluid.layers.fill_constant(shape=[2], dtype='float64', value=0)
+    # fluid.layers.less_than(x=x, y=y, cond=result)
+    # Create an executor using CPU as example
+    exe = fluid.Executor(fluid.CPUPlace())
+    exe.run(fluid.default_startup_program())
+    # Execute
+    x_i = np.array([[1, 2], [3, 4]]).astype(np.float64)
+    y_i = np.array([[2, 2], [1, 3]]).astype(np.float64)
+    result_value, = exe.run(fluid.default_main_program(), feed={'x':x_i, 'y':y_i}, fetch_list=[result])
+    print(result_value) # [[True, False], [False, False]]
--- a/doc/paddle/api/paddle/fluid/layers/linear_chain_crf_cn.rst
+++ b/doc/paddle/api/paddle/fluid/layers/linear_chain_crf_cn.rst
+.. _cn_api_fluid_layers_linear_chain_crf:
+linear_chain_crf
+-------------------------------
+.. py:function:: paddle.fluid.layers.linear_chain_crf(input, label, param_attr=None, length=None)
+:api_attr: 声明式编程模式（静态图)
+线性链条件随机场（Linear Chain CRF）
+条件随机场定义间接概率图，节点代表随机变量，边代表两个变量之间的依赖。CRF学习条件概率 :math:`P\left ( Y|X \right )` ， :math:`X = \left ( x_{1},x_{2},...,x_{n} \right )` 是结构性输入，:math:`Y = \left ( y_{1},y_{2},...,y_{n} \right )` 为输入标签。
+线性链条件随机场（Linear Chain CRF)是特殊的条件随机场（CRF），有利于序列标注任务。序列标注任务不为输入设定许多条件依赖。唯一的限制是输入和输出必须是线性序列。因此类似CRF的图是一个简单的链或者线，也就是线性链随机场（linear chain CRF）。
+该操作符实现了线性链条件随机场（linear chain CRF）的前向——反向算法。详情请参照 http://www.cs.columbia.edu/~mcollins/fb.pdf 和 http://cseweb.ucsd.edu/~elkan/250Bwinter2012/loglinearCRFs.pdf。
+长度为L的序列s的概率定义如下：
+.. math::
+    P(s) = (1/Z) exp(a_{s_1} + b_{s_L} + sum_{l=1}^L x_{s_l} + sum_{l=2}^L w_{s_{l-1},s_l})
+其中Z是归一化值，所有可能序列的P(s)之和为1，x是线性链条件随机场（linear chain CRF）的发射（emission）特征权重。
+线性链条件随机场最终输出每个batch训练样本的条件概率的对数
+  1.这里 :math:`x` 代表Emission
+  2.Transition的第一维度值，代表起始权重，这里用 :math:`a` 表示
+  3.Transition的下一维值，代表末尾权重，这里用 :math:`b` 表示
+  4.Transition剩下的值，代表转移权重，这里用 :math:`w` 表示
+  5.Label用 :math:`s` 表示
+**注意：**
+    1.条件随机场（CRF）的特征函数由发射特征(emission feature）和转移特征（transition feature）组成。发射特征（emission feature）权重在调用函数前计算，而不在函数里计算。
+    2.由于该函数对所有可能序列的进行全局正则化，发射特征（emission feature）权重应是未缩放的。因此如果该函数带有发射特征（emission feature），并且发射特征是任意非线性激活函数的输出，则请勿调用该函数。
+    3.Emission的第二维度必须和标记数字（tag number）相同。
+参数：
+    - **input** (LoDTensor|Tensor) - 数据类型为float32， float64的Tensor或者LoDTensor。线性链条件随机场的发射矩阵emission。输入为LoDTensor时，是一个shape为[N*D]的2-D LoDTensor，N是每一个batch中batch对应的长度数想加的总数，D是维度。当输入为Tensor时，应该是一个shape为[N x S x D]的Tensor，N是batch_size，S为序列的最大长度，D是维度。
+    - **label** (Tensor|LoDTensor） - 数据类型为int64类型Tensor或者LoDTensor。该值为标签值。输入为LoDTensor时[N x 1]，N是mini-batch的总数;输入为Tensor时，[N x S],N为batch数量，S为序列的最大长度。
+    - **Length** (Tensor) - 数据类型为int64类型的Tensor。 shape为[M x 1]的Tensor,M为mini_batch中序列的数量。
+    - **param_attr** (ParamAttr) - 可学习参数的属性，为transition矩阵。详见代码示例。
+返回：
+    Emission的指数形式。shape与Emission相同。这是前向计算中的中间计算结果，在反向计算中还会复用。
+    Transition的指数形式。shape为[(D+2)*D]的二维张量。这是前向计算中的中间计算结果，在反向计算中还会复用。
+    条件概率的对数形式。每个batch训练样本的条件概率的对数。这是一个shape为[S*1]的二维张量，S是mini-batch的序列数。注：S等于mini-batch的序列数。输出不再是LoDTensor。
+返回类型：
+    Emission的指数形式。Variable(Tensor|LoDTensor)：数据类型为float32， float64的Tensor或者LoDTensor。
+    Transition的指数形式。Variable(Tensor|LoDTensor)：数据类型为float32， float64的Tensor或者LoDTensor。
+    条件概率的对数形式。Variable(Tensor)：数据类型为float32， float64的Tensor。
+**代码示例：**
+.. code-block:: python
+    import paddle.fluid as fluid
+    import numpy as np
+    train_program = fluid.Program()
+    startup_program = fluid.Program()
+    with fluid.program_guard(train_program, startup_program):
+        input_data = fluid.layers.data(name='input_data', shape=[10], dtype='float32', lod_level=1)
+        label = fluid.layers.data(name='label', shape=[1], dtype='int', lod_level=1)
+        emission= fluid.layers.fc(input=input_data, size=10, act="tanh")
+        crf_cost = fluid.layers.linear_chain_crf(
+            input=emission,
+            label=label,
+            param_attr=fluid.ParamAttr(
+            name='crfw',
+            learning_rate=0.01))
+    use_cuda = False
+    place = fluid.CUDAPlace(0) if use_cuda else fluid.CPUPlace()
+    exe = fluid.Executor(place)
+    exe.run(startup_program)
+    #using LoDTensor, define network
+    a = fluid.create_lod_tensor(np.random.rand(12,10).astype('float32'), [[3,3,4,2]], place)
+    b = fluid.create_lod_tensor(np.array([[1],[1],[2],[3],[1],[1],[1],[3],[1],[1],[1],[1]]),[[3,3,4,2]] , place)
+    feed1 = {'input_data':a,'label':b}
+    loss= exe.run(train_program,feed=feed1, fetch_list=[crf_cost])
+    print(loss)
+    #using padding, define network
+    train_program = fluid.Program()
+    startup_program = fluid.Program()
+    with fluid.program_guard(train_program, startup_program):
+        input_data2 = fluid.layers.data(name='input_data2', shape=[10,10], dtype='float32')
+        label2 = fluid.layers.data(name='label2', shape=[10,1], dtype='int')
+        label_length = fluid.layers.data(name='length', shape=[1], dtype='int')
+        emission2= fluid.layers.fc(input=input_data2, size=10, act="tanh", num_flatten_dims=2)
+        crf_cost2 = fluid.layers.linear_chain_crf(
+            input=emission2,
+            label=label2,
+            length=label_length,
+            param_attr=fluid.ParamAttr(
+             name='crfw',
+             learning_rate=0.01))
+    use_cuda = False
+    place = fluid.CUDAPlace(0) if use_cuda else fluid.CPUPlace()
+    exe = fluid.Executor(place)
+    exe.run(startup_program)
+    #define input data
+    cc=np.random.rand(4,10,10).astype('float32')
+    dd=np.random.rand(4,10,1).astype('int64')
+    ll=np.array([[3,3,4,2]])
+    feed2 = {'input_data2':cc,'label2':dd,'length':ll}
+    loss2= exe.run(train_program,feed=feed2, fetch_list=[crf_cost2])
+    print(loss2)
+    """
+    output:
+    [array([[ 7.8902354],
+            [ 7.3602567],
+            [ 10.004011],
+            [ 5.86721  ]], dtype=float32)]
+    """
\ No newline at end of file
--- a/doc/paddle/api/paddle/nn/functional/linear_lr_warmup_cn.rst
+++ b/doc/paddle/api/paddle/nn/functional/linear_lr_warmup_cn.rst
--- a/doc/paddle/api/paddle/linspace_cn.rst
+++ b/doc/paddle/api/paddle/linspace_cn.rst
--- a/doc/paddle/api/paddle/static/load_cn.rst
+++ b/doc/paddle/api/paddle/static/load_cn.rst
--- a/doc/paddle/api/paddle/fluid/layers/locality_aware_nms_cn.rst
+++ b/doc/paddle/api/paddle/fluid/layers/locality_aware_nms_cn.rst
+.. _cn_api_fluid_layers_locality_aware_nms:
+locality_aware_nms
+-------------------------------
+.. py:function:: paddle.fluid.layers.locality_aware_nms(bboxes, scores, score_threshold, nms_top_k, keep_top_k, nms_threshold=0.3, normalized=True, nms_eta=1.0, background_label=-1, name=None)
+**局部感知NMS**
+`局部感知NMS <https://arxiv.org/abs/1704.03155>`_ 用于对边界框（bounding box）和评分（scores）执行局部感知非极大值抑制（LANMS）。
+首先，根据边界框之间的IOU(交并比)，对边界框和评分进行融合。
+在NMS中，如果提供 ``score_threshold`` 阈值，则此OP贪心地选择所有得分（scores）高于 ``score_threshold`` 的检测边界框（bounding box）的子集，如果nms_top_k大于-1，则选择最大的nms_top_k置信度分数。 接着，该OP依据 adaptive nms（基于 ``nms_threshold`` 和 ``nms_eta``），删除与已选择的框IOU(交并比)高于nms_threshold 的重叠框。
+在NMS步骤后，如果keep_top_k大于-1，则每个图像最多保留keep_top_k个框（bounding box）。
+参数：
+    - **bboxes**  (Variable) – 支持两种类型的边界框（bounding box）:
+      1. （Tensor）形为[N，M，4 或 8、16、24、32]的3-D张量，表示将预测M个边界框的预测位置， N是批大小（batch size）。当边界框(bounding box)大小等于4时，每个边界框有四个坐标值，布局为 :math:`[xmin, ymin, xmax, ymax]` 。数据类型为float32或float64。
+    - **scores**  (Variable) – 支持两种类型的分数：
+      1. （Tensor）具有形状 :math:`[N, C, M]` 的3-D张量表示预测的置信度。 N是批量大小 batch size，C是种类数目，M是边界框bounding box的数量。目前仅支持单个类别，所以输入维度应为 :math:`[N, 1, M]` 。请注意，M等于bboxes的第二维。数据类型为float32或float64。
+    - **background_label**  (int) – 背景标签（类别）的索引，如果设置为 0 ，则忽略背景标签（类别）。如果设置为 -1 ，则考虑所有类别。默认值：-1
+    - **score_threshold**  (float) – 过滤掉低置信度分数的边界框的阈值。如果没有提供，请考虑所有边界框。
+    - **nms_top_k**  (int) – 基于 score_threshold 的过滤检测后，根据置信度保留的最大检测次数。
+    - **nms_threshold**  (float) – 在LANMS中用于融合检测框和剔除检测框IOU的阈值，默认值：0.3 。
+    - **nms_eta**  (float) – 在NMS中用于调整 nms_threshold 的参数，设为1时表示nms_threshold不变。默认值：1.0 。
+    - **keep_top_k**  (int) – NMS步骤后每个图像要保留的总bbox数。 -1表示在NMS步骤之后保留所有bbox。
+    - **normalized**  (bool) –  检测是否已经经过正则化。默认值：True 。
+    - **name** (str|None) – 具体用法请参见 :ref:`api_guide_Name` ，一般无需设置，默认值为None。
+返回：形为[No，6]的2-D LoDTensor，表示检测(detections)结果。每行有6个值：[标签label，置信度confidence，xmin，ymin，xmax，ymax]。或形为[No，10]的2-D LoDTensor，用来表示检测结果。 每行有10个值：[标签label，置信度confidence，x1，y1，x2，y2，x3，y3，x4，y4]。 No是检测的总数。 如果对所有图像都没有检测到的box，则lod将设置为{1}，而Out仅包含一个值-1。 （1.3版本之后，当未检测到box时，lod从{0}更改为{1}）
+返回类型：Variable，数据类型与输入一致。
+**代码示例**
+..  code-block:: python
+    import paddle.fluid as fluid
+    boxes = fluid.data(name='bboxes', shape=[None, 81, 8],
+                              dtype='float32')
+    scores = fluid.data(name='scores', shape=[None, 1, 81],
+                              dtype='float32')
+    out = fluid.layers.locality_aware_nms(bboxes=boxes,
+                                      scores=scores,
+                                      score_threshold=0.5,
+                                      nms_top_k=400,
+                                      nms_threshold=0.3,
+                                      keep_top_k=200,
+                                      normalized=False)
--- a/doc/paddle/api/paddle/fluid/layers/lod_append_cn.rst
+++ b/doc/paddle/api/paddle/fluid/layers/lod_append_cn.rst
+.. _cn_api_fluid_layers_lod_append:
+lod_append
+-------------------------------
+.. py:function:: paddle.fluid.layers.lod_append(x, level)
+给 ``x`` 的LoD添加 ``level`` 。
+简单示例：
+.. code-block:: python
+    give a 1-level LodTensor x:
+        x.lod = [[2, 3, 1]]
+        x.data = [[1.0], [2.0], [3.0], [4.0], [5.0], [6.0]]
+        x.dims = [6, 1]
+    level:[1, 1, 1, 1, 1, 1]
+    Then we get a 2-level LodTensor:
+        x.lod = [[2, 3, 1], [1, 1, 1, 1, 1, 1]
+        x.data = [[1.0], [2.0], [3.0], [4.0], [5.0], [6.0]]
+        x.dims = [6, 1]
+参数：
+    - **x** (Variable)-输入变量，可以是LoDTensor或tensor。
+    - **level** (list|tuple|Variable)-预添加到x的LoD里的LoD level。
+返回：一个有着新的LoD level的输出变量
+返回类型：Variable
+Raise: ``ValueError`` - 如果y为None或者level不可迭代。
+**代码示例：**
+.. code-block:: python
+    import paddle.fluid as fluid
+    x = fluid.layers.data(name='x', shape=[6, 10], lod_level=1)
+    out = fluid.layers.lod_append(x, [1,1,1,1,1,1])
--- a/doc/paddle/api/paddle/fluid/layers/lod_reset_cn.rst
+++ b/doc/paddle/api/paddle/fluid/layers/lod_reset_cn.rst
+.. _cn_api_fluid_layers_lod_reset:
+lod_reset
+-------------------------------
+.. py:function:: paddle.fluid.layers.lod_reset(x, y=None, target_lod=None)
+根据给定的参数 ``y`` 或 ``target_lod`` ，重设输入 ``x`` (LoDTensor) 的 LoD 信息。
+参数：
+    - **x** (Variable) : 输入变量，类型为 Tensor 或者 LoDTensor。
+    - **y** (Variable|None) : 当 ``y`` 非空时，输出 LoDTensor 的 LoD 信息将与 ``y`` 的 LoD 一致。
+    - **target_lod** (list|tuple|None) : 一级 LoD，当 ``y`` 为空时，输出 LoDTensor 的 LoD 信息将与 ``target_lod`` 一致。
+返回:
+    - Variable (LoDTensor)，重设了 LoD 信息的 LoDTensor。
+返回类型：
+    - Variable (LoDTensor)。
+抛出异常：
+    - ``TypeError`` : 当 ``y`` 和 ``target_lod`` 二者均为空时抛出此异常。
+::
+    * 例 1:
+    x: 包含一级 LoD 信息的 LoDTensor
+        x.lod =  [[ 2,           3,                   1 ]]
+        x.data = [[1.0], [2.0], [3.0], [4.0], [5.0], [6.0]]
+        x.dims = [6, 1]
+    y: None
+    target_lod: [4, 2]
+    Output: 包含一级 LoD 信息的 LoDTensor
+        out.lod =  [[4,                          2]]
+        out.data = [[1.0], [2.0], [3.0], [4.0], [5.0], [6.0]]
+        out.dims = [6, 1]
+    * 例 2:
+    x: 包含一级 LoD 信息的 LoDTensor
+        x.lod =  [[2,            3,                   1]]
+        x.data = [[1.0], [2.0], [3.0], [4.0], [5.0], [6.0]]
+        x.dims = [6, 1]
+    y: 普通 Tensor，不含 LoD 信息
+        y.data = [[2, 4]]
+        y.dims = [1, 3]
+    target_lod: 当 y 不为空时，此参数不起作用
+    Output: 包含一级 LoD 信息的 LoDTensor
+        out.lod =  [[2,            4]]
+        out.data = [[1.0], [2.0], [3.0], [4.0], [5.0], [6.0]]
+        out.dims = [6, 1]
+    * 例 3:
+    x: 包含一级 LoD 信息的 LoDTensor
+        x.lod =  [[2,            3,                   1]]
+        x.data = [[1.0], [2.0], [3.0], [4.0], [5.0], [6.0]]
+        x.dims = [6, 1]
+    y: 包含二级 LoD 信息的 LoDTensor
+        y.lod =  [[2, 2], [2, 2, 1, 1]]
+        y.data = [[1.1], [2.1], [3.1], [4.1], [5.1], [6.1]]
+        y.dims = [6, 1]
+    target_lod: 当 y 不为空时，此参数不起作用
+    Output: 包含二级 LoD 信息的 LoDTensor
+        out.lod =  [[2, 2], [2, 2, 1, 1]]
+        out.data = [[1.0], [2.0], [3.0], [4.0], [5.0], [6.0]]
+        out.dims = [6, 1]
+**代码示例**：
+.. code-block:: python
+    import paddle.fluid as fluid
+    import numpy
+    # Graph Organizing
+    x = fluid.layers.data(name='x', shape=[6])
+    y = fluid.layers.data(name='y', shape=[6], lod_level=2)
+    output = fluid.layers.lod_reset(x=x, y=y)
+    # Create an executor using CPU as an example
+    place = fluid.CPUPlace()
+    exe = fluid.Executor(place)
+    exe.run(fluid.default_startup_program())
+    # Execute
+    x_tensor = fluid.core.LoDTensor()
+    x_tensor.set(numpy.ones([6]).astype(numpy.float32), place)
+    y_ndarray = numpy.ones([6]).astype(numpy.float32)
+    y_lod = [[2, 2], [2, 2, 1, 1]]
+    y_tensor = fluid.create_lod_tensor(y_ndarray, y_lod, place)
+    res, = exe.run(fluid.default_main_program(),
+                   feed={'x':x_tensor, 'y':y_tensor},
+                   fetch_list=[output],
+                   return_numpy=False)
+    print(res)
+    # Output Value:
+    # lod: [[0, 2, 4], [0, 2, 4, 5, 6]]
+    # dim: 6
+    # layout: NCHW
+    # dtype: float
+    # data: [1 1 1 1 1 1]
--- a/doc/paddle/api/paddle/log_cn.rst
+++ b/doc/paddle/api/paddle/log_cn.rst
--- a/doc/paddle/api/paddle/nn/functional/log_loss_cn.rst
+++ b/doc/paddle/api/paddle/nn/functional/log_loss_cn.rst
--- a/doc/paddle/api/paddle/logical_and_cn.rst
+++ b/doc/paddle/api/paddle/logical_and_cn.rst
--- a/doc/paddle/api/paddle/logical_not_cn.rst
+++ b/doc/paddle/api/paddle/logical_not_cn.rst
--- a/doc/paddle/api/paddle/logical_or_cn.rst
+++ b/doc/paddle/api/paddle/logical_or_cn.rst
--- a/doc/paddle/api/paddle/logical_xor_cn.rst
+++ b/doc/paddle/api/paddle/logical_xor_cn.rst
--- a/doc/paddle/api/paddle/fluid/layers/logsigmoid_cn.rst
+++ b/doc/paddle/api/paddle/fluid/layers/logsigmoid_cn.rst
+.. _cn_api_fluid_layers_logsigmoid:
+logsigmoid
+-------------------------------
+.. py:function:: paddle.fluid.layers.logsigmoid(x, name=None)
+:alias_main: paddle.nn.functional.logsigmoid
+:alias: paddle.nn.functional.logsigmoid,paddle.nn.functional.activation.logsigmoid
+:old_api: paddle.fluid.layers.logsigmoid
+Logsigmoid激活函数
+.. math::
+    out = \log \frac{1}{1 + e^{-x}}
+参数:
+    - **x** (Variable)- 张量（Tensor）
+    - **name** (str|None) - 该层名称（可选），若设为None，则自动为该层命名。
+返回: 张量(Tensor)
+返回类型: 变量(Variable)
+**代码示例**：
+.. code-block:: python
+    import paddle.fluid as fluid
+    data = fluid.layers.data(name="input", shape=[32, 784])
+    result = fluid.layers.logsigmoid(data)
--- a/doc/paddle/api/paddle/nn/functional/lrn_cn.rst
+++ b/doc/paddle/api/paddle/nn/functional/lrn_cn.rst
--- a/doc/paddle/api/paddle/fluid/layers/lstm_cn.rst
+++ b/doc/paddle/api/paddle/fluid/layers/lstm_cn.rst
+.. _cn_api_fluid_layers_lstm:
+lstm
+-------------------------------
+.. py:function::  paddle.fluid.layers.lstm(input, init_h, init_c, max_len, hidden_size, num_layers, dropout_prob=0.0, is_bidirec=False, is_test=False, name=None, default_initializer=None, seed=-1)
+:api_attr: 声明式编程模式（静态图)
+.. note::
+    该OP仅支持 GPU 设备运行
+该OP实现了 LSTM，即 Long-Short Term Memory（长短期记忆）运算 - `Hochreiter, S., & Schmidhuber, J. (1997) <https://www.bioinf.jku.at/publications/older/2604.pdf>`_。
+该OP的实现不包括 diagonal/peephole 连接，参见 `Gers, F. A., & Schmidhuber, J. (2000) <ftp://ftp.idsia.ch/pub/juergen/TimeCount-IJCNN2000.pdf>`_。
+如果需要使用 peephole 连接方法，请使用 :ref:`cn_api_fluid_layers_dynamic_lstm` 。
+该OP对于序列中每一个时间步的计算公式如下：
+.. math::
+  i_t = \sigma(W_{ix}x_{t} + W_{ih}h_{t-1} + b_{x_i} + b_{h_i})
+.. math::
+  f_t = \sigma(W_{fx}x_{t} + W_{fh}h_{t-1} + b_{x_f} + b_{h_f})
+.. math::
+  o_t = \sigma(W_{ox}x_{t} + W_{oh}h_{t-1} + b_{x_o} + b_{h_o})
+.. math::
+  \widetilde{c_t} = tanh(W_{cx}x_t + W_{ch}h_{t-1} + b{x_c} + b_{h_c})
+.. math::
+  c_t = f_t \odot c_{t-1} + i_t \odot \widetilde{c_t}
+.. math::
+  h_t = o_t \odot tanh(c_t)
+公式中的概念信息如下：
+      - :math:`x_{t}` 表示时间步 :math:`t` 的输入
+      - :math:`h_{t}` 表示时间步 :math:`t` 的 hidden 状态
+      - :math:`h_{t-1}, c_{t-1}` 分别表示前一个时间步的 hidden 和 cell 状态
+      - :math:`\widetilde{c_t}` 表示候选的 cell 状态
+      - :math:`i_t` ，:math:`f_t` 和 :math:`o_t` 分别为 input gate，forget gate，output gate
+      - :math:`W` 表示 weight （例如， :math:`W_{ix}` 是在计算 input gate :math:`i_t` 时，对输入 :math:`x_{t}` 做线性变换的 weight）
+      - :math:`b` 表示 bias （例如， :math:`b_{i}` 是 input gate 的 bias）
+      - :math:`\sigma` 表示 gate 的非线性激活函数，默认为 sigmoid
+      - :math:`\odot` 表示矩阵的 Hadamard product，即对两个维度相同的矩阵，将相同位置的元素相乘，得到另一个维度相同的矩阵
+参数：
+  - **input** ( :ref:`api_guide_Variable` ) - LSTM的输入张量，维度为 :math:`[batch\_size, seq\_len, input\_dim]` 的 3-D Tensor，其中 seq_len 为序列的长度， input_dim 为序列词嵌入的维度。数据类型为 float32 或者 float64。
+  - **init_h** ( :ref:`api_guide_Variable` ) – LSTM的初始 hidden 状态，维度为 :math:`[num\_layers, batch\_size, hidden\_size]` 的 3-D Tensor，其中 num_layers 是LSTM的总层数，hidden_size 是隐层维度。 如果is_bidirec = True， 维度应该为 :math:`[num\_layers*2, batch\_size, hidden\_size]` 。数据类型为 float32 或者 float64。
+  - **init_c** ( :ref:`api_guide_Variable` ) - LSTM的初始 cell 状态。维度为 :math:`[num\_layers, batch\_size, hidden\_size]` 的 3-D Tensor，其中 num_layers 是LSTM的总层数，hidden_size 是隐层维度。 如果is_bidirec = True， 维度应该为 :math:`[num\_layers*2, batch\_size, hidden\_size]` 。数据类型为 float32 或者 float64。
+  - **max_len** (int) – LSTM的最大长度。输入张量的第一个 input_dim 不能大于 max_len。
+  - **hidden_size** (int) - LSTM hidden 状态的维度。
+  - **num_layers** (int) –  LSTM的总层数。例如，该参数设置为2，则会堆叠两个LSTM，其第一个LSTM的输出会作为第二个LSTM的输入。
+  - **dropout_prob** (float，可选) – dropout比例，dropout 只在 rnn 层之间工作，而不是在时间步骤之间。dropout 不作用于最后的 rnn 层的 rnn 输出中。默认值为 0.0。
+  - **is_bidirec** (bool，可选) – 是否是双向的LSTM。默认值为 False。
+  - **is_test** (bool，可选) – 是否在测试阶段。默认值为 False。
+  - **name** (str，可选) - 具体用法请参见 :ref:`api_guide_Name` ，一般无需设置，默认值为None。
+  - **default_initializer** (Initializer，可选) – 用于初始化权重的初始化器，如果为None，将进行默认初始化。默认值为 None。
+  - **seed** (int，可选) – LSTM中dropout的seed，如果是-1，dropout将使用随机seed。默认值为 1。
+返回： 经过lstm运算输出的三个Tensor的tuple，包括
+- rnn_out：LSTM hidden的输出结果的Tensor，数据类型与input一致，维度为 :math:`[batch\_size, seq\_len, hidden\_size]` 。如果 ``is_bidirec`` 设置为True，则维度为 :math:`[batch\_size, seq\_len, hidden\_size*2]`
+- last_h：LSTM最后一步的hidden状态的Tensor，数据类型与input一致，维度为 :math:`[num\_layers, batch\_size, hidden\_size]` 。如果 ``is_bidirec`` 设置为True，则维度为 :math:`[num\_layers*2, batch\_size, hidden\_size]`
+- last_c：LSTM最后一步的cell状态的Tensor，数据类型与input一致，维度为 :math:`[num\_layers, batch\_size, hidden\_size]` 。如果 ``is_bidirec`` 设置为True，则维度为 :math:`[num\_layers*2, batch\_size, hidden\_size]`
+返回类型:  tuple（ :ref:`api_guide_Variable` , :ref:`api_guide_Variable` , :ref:`api_guide_Variable` ）
+**代码示例：**
+.. code-block:: python
+  import paddle.fluid as fluid
+  import paddle.fluid.layers as layers
+  emb_dim = 256
+  vocab_size = 10000
+  data = fluid.layers.data(name='x', shape=[-1, 100, 1],
+                 dtype='int64')
+  emb = fluid.layers.embedding(input=data, size=[vocab_size, emb_dim], is_sparse=True)
+  batch_size = 20
+  max_len = 100
+  dropout_prob = 0.2
+  hidden_size = 150
+  num_layers = 1
+  init_h = layers.fill_constant( [num_layers, batch_size, hidden_size], 'float32', 0.0 )
+  init_c = layers.fill_constant( [num_layers, batch_size, hidden_size], 'float32', 0.0 )
+  rnn_out, last_h, last_c = layers.lstm(emb, init_h, init_c, max_len, hidden_size, num_layers, dropout_prob=dropout_prob)
+  rnn_out.shape  # (-1, 100, 150)
+  last_h.shape  # (1, 20, 150)
+  last_c.shape  # (1, 20, 150)
--- a/doc/paddle/api/paddle/fluid/layers/lstm_unit_cn.rst
+++ b/doc/paddle/api/paddle/fluid/layers/lstm_unit_cn.rst
+.. _cn_api_fluid_layers_lstm_unit:
+lstm_unit
+-------------------------------
+.. py:function:: paddle.fluid.layers.lstm_unit(x_t, hidden_t_prev, cell_t_prev, forget_bias=0.0, param_attr=None, bias_attr=None, name=None)
+:api_attr: 声明式编程模式（静态图)
+Long-Short Term Memory（LSTM）循环神经网络计算单元。该OP用于完成单个时间步内LSTM的计算，基于论文 `RECURRENT NEURAL NETWORK REGULARIZATION <http://arxiv.org/abs/1409.2329>`_ 中的描述实现，
+并在forget gate（遗忘门）中增加了 ``forget_bias`` 来控制遗忘力度，公式如下：
+.. math::
+    i_{t} &= \sigma \left ( W_{x_{i}}x_{t}+W_{h_{i}}h_{t-1}+b_{i} \right ) \\
+    f_{t} &= \sigma \left ( W_{x_{f}}x_{t}+W_{h_{f}}h_{t-1}+b_{f}+forget\_bias \right ) \\
+    c_{t} &= f_{t}c_{t-1}+i_{t}tanh\left ( W_{x_{c}}x_{t} +W_{h_{c}}h_{t-1}+b_{c}\right ) \\
+    o_{t} &= \sigma \left ( W_{x_{o}}x_{t}+W_{h_{o}}h_{t-1}+b_{o} \right ) \\
+    h_{t} &= o_{t}tanh \left ( c_{t} \right )
+其中， :math:`x_{t}` 对应 ``x_t``， 表示当前时间步的输入； :math:`h_{t-1}` 和 :math:`c_{t-1}` 对应 ``hidden_t_prev`` 和 ``cell_t_prev``，表示上一时间步的hidden和cell输出；
+:math:`i_{t}, f_{t}, c_{t}, o_{t}, h_{t}` 分别为input gate（输入门）、forget gate（遗忘门）、cell、output gate（输出门）和hidden的计算。
+参数：
+    - **x_t** (Variable) - 表示当前时间步的输入的Tensor，形状为 :math:`[N, M]` ，其中 :math:`N` 为batch_size， :math:`M` 为输入的特征维度大小。数据类型为float32或float64。
+    - **hidden_t_prev** (Variable) - 表示前一时间步hidden输出的Tensor，形状为 :math:`[N, D]`，其中 :math:`N` 为batch_size， :math:`D` 为LSTM中隐单元的数目。数据类型与 ``x_t`` 相同。
+    - **cell_t_prev** (Variable) - 表示前一时间步cell输出的Tensor，和  ``hidden_t_prev`` 具有相同形状和数据类型。
+    - **forget_bias** (float，可选) - 额外添加在遗忘门中的偏置项(参见公式)。默认值为0。
+    - **param_attr** (ParamAttr，可选) – 指定权重参数属性的对象。默认值为None，表示使用默认的权重参数属性。具体用法请参见 :ref:`cn_api_fluid_ParamAttr` 。
+    - **bias_attr** (ParamAttr，可选) - 指定偏置参数属性的对象。默认值为None，表示使用默认的偏置参数属性。具体用法请参见 :ref:`cn_api_fluid_ParamAttr` 。
+    - **name**  (str，可选) – 具体用法请参见 :ref:`api_guide_Name` ，一般无需设置，默认值为None。
+返回：Variable的二元组，包含了两个形状和数据类型均与 ``hidden_t_prev`` 相同的Tensor，分别表示hiddel和cell输出，即公式中的 :math:`h_{t}` 和 :math:`c_{t}` 。
+返回类型：tuple
+抛出异常:
+    - :code:`ValueError`： ``x_t`` 的阶不为2
+    - :code:`ValueError`： ``hidden_t_prev`` 的阶不为2
+    - :code:`ValueError`： ``cell_t_prev`` 的阶不为2
+    - :code:`ValueError`： ``x_t`` 、``hidden_t_prev`` 和 ``cell_t_prev`` 的第一维大小必须相同
+    - :code:`ValueError`： ``hidden_t_prev`` 和 ``cell_t_prev`` 的第二维大小必须相同
+**代码示例**：
+.. code-block:: python
+    import paddle.fluid as fluid
+    dict_dim, emb_dim, hidden_dim = 128, 64, 512
+    data = fluid.data(name='step_data', shape=[None], dtype='int64')
+    x = fluid.embedding(input=data, size=[dict_dim, emb_dim])
+    pre_hidden = fluid.data(
+        name='pre_hidden', shape=[None, hidden_dim], dtype='float32')
+    pre_cell = fluid.data(
+        name='pre_cell', shape=[None, hidden_dim], dtype='float32')
+    hidden = fluid.layers.lstm_unit(
+        x_t=x,
+        hidden_t_prev=pre_hidden,
+        cell_t_prev=pre_cell)
--- a/doc/paddle/api/paddle/fluid/layers/margin_rank_loss_cn.rst
+++ b/doc/paddle/api/paddle/fluid/layers/margin_rank_loss_cn.rst
+.. _cn_api_fluid_layers_margin_rank_loss:
+margin_rank_loss
+-------------------------------
+.. py:function:: paddle.fluid.layers.margin_rank_loss(label, left, right, margin=0.1, name=None)
+:alias_main: paddle.nn.functional.margin_rank_loss
+:alias: paddle.nn.functional.margin_rank_loss,paddle.nn.functional.loss.margin_rank_loss
+:old_api: paddle.fluid.layers.margin_rank_loss
+margin rank loss（间隔排序损失）层。在排序问题中，它可以比较来自排序网络的输入 ``left`` 和输入 ``right`` 的得分。
+可用如下等式定义：
+.. math::
+    rank\_loss = max(0, -label * (left - right) + margin)
+参数:
+  - **label** (Variable) – 表示输入 ``left`` 的真实排序是否高于输入 ``right`` , 数据类型为 float32。
+  - **left** (Variable) – 输入 ``left`` 的排序得分， 数据类型为 float32 。
+  - **right** (Variable) – 输入 ``right`` 的排序得分， 数据类型为 float32。
+  - **margin** (float) – 指定的间隔。
+  - **name** (str，可选) – 具体用法请参见 :ref:`cn_api_guide_Name` ，一般无需设置，默认值为None。
+返回： 排序损失
+返回类型: Variable
+抛出异常:
+  - ``ValueError`` - ``label`` , ``left`` , ``right`` 有一者不为Variable类型时，抛出此异常
+**代码示例**
+..  code-block:: python
+    import paddle.fluid as fluid
+    label = fluid.layers.data(name="label", shape=[-1, 1], dtype="float32")
+    left = fluid.layers.data(name="left", shape=[-1, 1], dtype="float32")
+    right = fluid.layers.data(name="right", shape=[-1, 1], dtype="float32")
+    out = fluid.layers.margin_rank_loss(label, left, right)
--- a/doc/paddle/api/paddle/fluid/layers/matmul_cn.rst
+++ b/doc/paddle/api/paddle/fluid/layers/matmul_cn.rst
+.. _cn_api_fluid_layers_matmul:
+matmul
+-------------------------------
+.. py:function:: paddle.fluid.layers.matmul(x, y, transpose_x=False, transpose_y=False, alpha=1.0, name=None)
+输入 ``x`` 和输入 ``y`` 矩阵相乘。
+两个输入的形状可为任意维度，但当任一输入维度大于3时，两个输入的维度必须相等。
+实际的操作取决于 ``x`` 、 ``y`` 的维度和 ``transpose_x`` 、 ``transpose_y`` 的布尔值。具体如下：
+- 如果 ``transpose`` 为真，则对应 Tensor 的后两维会转置。假定 ``x`` 是一个 shape=[D] 的一维 Tensor，则 ``x`` 非转置形状为 [1, D]，转置形状为 [D, 1]。转置之后的输入形状需满足矩阵乘法要求，即 `x_width` 与 `y_height` 相等。
+- 转置后，输入的两个 Tensor 维度将为 2-D 或 n-D，将根据下列规则矩阵相乘：
+    - 如果两个矩阵都是 2-D，则同普通矩阵一样进行矩阵相乘。
+    - 如果任意一个矩阵是 n-D，则将其视为带 batch 的二维矩阵乘法。
+- 如果原始 Tensor x 或 y 的秩为 1 且未转置，则矩阵相乘后的前置或附加维度 1 将移除。
+参数：
+    - **x** (Variable) : 输入变量，类型为 Tensor 或 LoDTensor。
+    - **y** (Variable) : 输入变量，类型为 Tensor 或 LoDTensor。
+    - **transpose_x** (bool) : 相乘前是否转置 x。
+    - **transpose_y** (bool) : 相乘前是否转置 y。
+    - **alpha** (float) : 输出比例，默认为 1.0。
+    - **name** (str|None) : 该层名称（可选），如果设置为空，则自动为该层命名。
+返回：
+    - Variable (Tensor / LoDTensor)，矩阵相乘后的结果。
+返回类型：
+    - Variable（变量）。
+::
+    * 例 1:
+    x: [B, ..., M, K], y: [B, ..., K, N]
+    out: [B, ..., M, N]
+    * 例 2:
+    x: [B, M, K], y: [B, K, N]
+    out: [B, M, N]
+    * 例 3:
+    x: [B, M, K], y: [K, N]
+    out: [B, M, N]
+    * 例 4:
+    x: [M, K], y: [K, N]
+    out: [M, N]
+    * 例 5:
+    x: [B, M, K], y: [K]
+    out: [B, M]
+    * 例 6:
+    x: [K], y: [K]
+    out: [1]
+    * 例 7:
+    x: [M], y: [N]
+    out: [M, N]
+**代码示例**：
+.. code-block:: python
+    import paddle.fluid as fluid
+    import numpy
+    # Graph Organizing
+    x = fluid.layers.data(name='x', shape=[2, 3], dtype='float32')
+    y = fluid.layers.data(name='y', shape=[3, 2], dtype='float32')
+    output = fluid.layers.matmul(x, y, True, True)
+    # Create an executor using CPU as an example
+    exe = fluid.Executor(fluid.CPUPlace())
+    exe.run(fluid.default_startup_program())
+    # Execute
+    input_x = numpy.ones([2, 3]).astype(numpy.float32)
+    input_y = numpy.ones([3, 2]).astype(numpy.float32)
+    res, = exe.run(fluid.default_main_program(),
+                   feed={'x':input_x, 'y':input_y},
+                   fetch_list=[output])
+    print(res)
+    '''
+    Output Value:
+    [[2. 2. 2.]
+     [2. 2. 2.]
+     [2. 2. 2.]]
+    '''
--- a/doc/paddle/api/paddle/fluid/layers/matrix_nms_cn.rst
+++ b/doc/paddle/api/paddle/fluid/layers/matrix_nms_cn.rst
+.. _cn_api_fluid_layers_matrix_nms:
+matrix_nms
+-------------------------------
+.. py:function:: paddle.fluid.layers.matrix_nms(bboxes, scores, score_threshold, post_threshold, nms_top_k, keep_top_k, use_gaussian=False, gaussian_sigma=2., background_label=0, normalized=True, return_index=False, name=None)
+:alias_main: paddle.nn.functional.matrix_nms
+:alias: paddle.nn.functional.matrix_nms,paddle.nn.functional.extension.matrix_nms
+:old_api: paddle.fluid.layers.matrix_nms
+**Matrix NMS**
+该OP使用Matrix NMS算法对边界框（bounding box）和评分（scores）执行多类非极大值抑制（NMS）。
+如果提供 ``score_threshold`` 阈值且 ``nms_top_k`` 大于-1，则选择置信度分数最大的k个框。 然后按照Matrix NMS算法对分数进行衰减。经过抑制后，如果 ``keep_top_k`` 大于-1, 则每张图片最终保留 ``keep_top_k`` 个检测框。
+在NMS步骤后，如果keep_top_k大于-1，则每个图像最多保留keep_top_k个框（bounding box）。
+参数：
+    - **bboxes**  (Variable) - 形为[N，M，4]的3-D张量，表示将预测M个边界框的预测位置， N是批大小（batch size）。当边界框(bounding box)大小等于4时，每个边界框有四个坐标值，布局为[xmin，ymin，xmax，ymax]。数据类型为float32或float64。
+    - **scores**  (Variable) – 形为[N，C，M]的3-D张量，表示预测的置信度。 N是批大小（batch size），C是种类数目，M是边界框bounding box的数量。对于每个类别，存在对应于M个边界框的总M个分数。请注意，M等于bboxes的第二维。数据类型为float32或float64。
+    - **score_threshold**  (float) – 过滤掉低置信度分数的边界框的阈值。
+    - **post_threshold**  (float) – 经过NMS衰减后，过滤掉低置信度分数的边界框的阈值。
+    - **nms_top_k**  (int) – 基于 score_threshold 的过滤检测后，根据置信度保留的最大检测次数。
+    - **keep_top_k**  (int) – 经过NMS抑制后, 最终保留的最大检测次数。如果设置为 -1 ，则则保留全部。
+    - **use_gaussian**  (bool) –  是否使用高斯函数衰减。默认值：False 。
+    - **gaussian_sigma**  (float) – 高斯函数的Sigma值，默认值：2.0 。
+    - **background_label**  (int) – 背景标签（类别）的索引，如果设置为 0 ，则忽略背景标签（类别）。如果设置为 -1 ，则考虑所有类别。默认值：0
+    - **normalized**  (bool) –  检测是否已经经过正则化。默认值：True 。
+    - **return_index**  (bool) –  是否同时返回保留检测框的序号。默认值：False 。
+    - **name** (str|None) – 具体用法请参见 :ref:`cn_api_guide_Name` ，一般无需设置，默认值为None。
+返回：
+    - **Out**  (Variable) - 形为[No，6]的2-D LoDTensor，表示检测结果。每行有6个值：[标签label，置信度confidence，xmin，ymin，xmax，ymax]。或形为[No，10]的2-D LoDTensor，用来表示检测结果。 每行有10个值：[标签label，置信度confidence，x1，y1，x2，y2，x3，y3，x4，y4]。 No是检测的总数。 如果对所有图像都没有检测到的box，则lod将设置为{1}，而Out仅包含一个值-1。 （1.3版本之后，当未检测到box时，lod从{0}更改为{1}）
+    - **Index**  (Variable) - 形为[No，1]的2-D LoDTensor，表示检测结果在整个批次中的序号。
+**代码示例**
+..  code-block:: python
+    import paddle.fluid as fluid
+    boxes = fluid.data(name='bboxes', shape=[None,81, 4],
+                              dtype='float32', lod_level=1)
+    scores = fluid.data(name='scores', shape=[None,81],
+                              dtype='float32', lod_level=1)
+    out = fluid.layers.matrix_nms(bboxes=boxes,
+                                  scores=scores,
+                                  background_label=0,
+                                  score_threshold=0.5,
+                                  post_threshold=0.1,
+                                  nms_top_k=400,
+                                  keep_top_k=200,
+                                  normalized=False)
--- a/doc/paddle/api/paddle/nn/functional/maxout_cn.rst
+++ b/doc/paddle/api/paddle/nn/functional/maxout_cn.rst
--- a/doc/paddle/api/paddle/fluid/layers/mean_cn.rst
+++ b/doc/paddle/api/paddle/fluid/layers/mean_cn.rst
+.. _cn_api_fluid_layers_mean:
+mean
+-------------------------------
+.. py:function:: paddle.fluid.layers.mean(x, name=None)
+:alias_main: paddle.mean
+:alias: paddle.mean,paddle.tensor.mean,paddle.tensor.stat.mean
+:old_api: paddle.fluid.layers.mean
+计算 ``x`` 所有元素的平均值。
+参数：
+        - **x** (Variable) : Tensor 或 LoDTensor。均值运算的输入。
+        - **name** (basestring | None) : 输出变量的名称。
+返回：
+        - Variable: 包含输出均值的 Tensor / LoDTensor。
+返回类型：
+        - Variable（变量）。
+**代码示例**：
+.. code-block:: python
+    import paddle.fluid as fluid
+    import numpy
+    # Graph Organizing
+    input = fluid.layers.data(
+        name='data', shape=[2, 3], dtype='float32')
+    output = fluid.layers.mean(input)
+    # Create an executor using CPU as an example
+    place = fluid.CPUPlace()
+    exe = fluid.Executor(place)
+    exe.run(fluid.default_startup_program())
+    # Execute
+    x_ndarray = numpy.ones([2, 3]).astype(numpy.float32)
+    res, = exe.run(fluid.default_main_program(),
+                   feed={'data':x_ndarray},
+                   fetch_list=[output])
+    print(res)
+    '''
+    Output Value:
+    [1.]
+    '''
--- a/doc/paddle/api/paddle/fluid/layers/merge_selected_rows_cn.rst
+++ b/doc/paddle/api/paddle/fluid/layers/merge_selected_rows_cn.rst
+.. _cn_api_fluid_layers_merge_selected_rows:
+merge_selected_rows
+-------------------------------
+.. py:function:: paddle.fluid.layers.merge_selected_rows(x, name=None)
+累加合并 `SelectedRows <https://github.com/PaddlePaddle/Paddle/blob/develop/paddle/fluid/framework/selected_rows.h>`_ ( ``x`` ) 中的重复行，并对行值由小到大重新排序。
+参数:
+  - x (Variable) : 类型为 SelectedRows，选中行允许重复。
+  - name (basestring|None) : 输出变量名称。
+返回:
+  - 含有 SelectedRows 的 Variable，选中行不重复。
+返回类型:
+  - Variable（变量）。
+**代码示例**
+..  code-block:: python
+  import paddle.fluid as fluid
+  import numpy
+  place = fluid.CPUPlace()
+  block = fluid.default_main_program().global_block()
+  var = block.create_var(name="X2",
+                         dtype="float32",
+                         persistable=True,
+                         type=fluid.core.VarDesc.VarType.SELECTED_ROWS)
+  y = fluid.layers.merge_selected_rows(var)
+  z = fluid.layers.get_tensor_from_selected_rows(y)
+  x_rows = [0, 2, 2, 4, 19]
+  row_numel = 2
+  np_array = numpy.ones((len(x_rows), row_numel)).astype("float32")
+  x = fluid.global_scope().var("X2").get_selected_rows()
+  x.set_rows(x_rows)
+  x.set_height(20)
+  x_tensor = x.get_tensor()
+  x_tensor.set(np_array, place)
+  exe = fluid.Executor(place=place)
+  result = exe.run(fluid.default_main_program(), fetch_list=[z])
+  print("x_rows: ", x_rows)
+  print("np_array: ", np_array)
+  print("result: ", result)
+  '''
+  Output Values:
+  ('x_rows: ', [0, 2, 2, 4, 19])
+  ('np_array: ', array([[1., 1.],
+         [1., 1.],
+         [1., 1.],
+         [1., 1.],
+         [1., 1.]], dtype=float32))
+  ('result: ', [array([[1., 1.],
+         [2., 2.],
+         [1., 1.],
+         [1., 1.]], dtype=float32)])
+  '''
--- a/doc/paddle/api/paddle/nn/functional/loss/mse_loss_cn.rst
+++ b/doc/paddle/api/paddle/nn/functional/loss/mse_loss_cn.rst
--- a/doc/paddle/api/paddle/fluid/layers/mul_cn.rst
+++ b/doc/paddle/api/paddle/fluid/layers/mul_cn.rst
+.. _cn_api_fluid_layers_mul:
+mul
+-------------------------------
+.. py:function:: paddle.fluid.layers.mul(x, y, x_num_col_dims=1, y_num_col_dims=1, name=None)
+mul算子
+此运算是用于对输入x和y执行矩阵乘法。
+公式是：
+.. math::
+        Out = x * y
+输入x和y都可以携带LoD（详细程度）信息。但输出仅与输入x共享LoD信息。
+参数：
+    - **x** (Variable) - 乘法运算的第一个输入张量Tensor/LoDTensor。
+    - **y** (Variable) - 乘法运算的第二个输入张量Tensor/LoDTensor。
+    - **x_num_col_dims** (int，可选) - 默认值1， 可以将具有两个以上维度的张量作为输入。如果输入x是具有多于两个维度的张量，则输入x将先展平为二维矩阵。展平规则是：前 ``num_col_dims`` 将被展平成最终矩阵的第一个维度（矩阵的高度），其余的 rank(x) - num_col_dims 维度被展平成最终矩阵的第二个维度（矩阵的宽度）。结果是展平矩阵的高度等于x的前 ``x_num_col_dims`` 维数的乘积，展平矩阵的宽度等于x的最后一个 rank(x)- ``num_col_dims`` 个剩余维度的维数的乘积。例如，假设x是一个5-D张量，形状为（2,3,4,5,6），并且 ``x_num_col_dims`` 的值为3。 则扁平化后的张量具有的形即为（2x3x4,5x6）=（24,30）。
+    - **y_num_col_dims** (int，可选) - 默认值1， 可以将具有两个以上维度的张量作为输入。如果输入y是具有多于两个维度的张量，则y将首先展平为二维矩阵。 ``y_num_col_dims`` 属性确定y的展平方式。有关更多详细信息，请参阅 ``x_num_col_dims`` 的注释。
+    - **name** (str，可选) - 输出的名称。该参数供开发人员打印调试信息时使用，具体用法参见 :ref:`api_guide_name`，默认为：None。
+返回：Variable(Tensor)乘法运算输出张量。
+返回类型：变量(Variable)。
+**代码示例**
+..  code-block:: python
+    import paddle.fluid as fluid
+    dataX = fluid.layers.data(name="dataX", append_batch_size = False, shape=[2, 5], dtype="float32")
+    dataY = fluid.layers.data(name="dataY", append_batch_size = False, shape=[5, 3], dtype="float32")
+    output = fluid.layers.mul(dataX, dataY,
+                              x_num_col_dims = 1,
+                              y_num_col_dims = 1)
--- a/doc/paddle/api/paddle/static/nn/multi_box_head_cn.rst
+++ b/doc/paddle/api/paddle/static/nn/multi_box_head_cn.rst
--- a/doc/paddle/api/paddle/nn/functional/multiclass_nms_cn.rst
+++ b/doc/paddle/api/paddle/nn/functional/multiclass_nms_cn.rst
--- a/doc/paddle/api/paddle/multiplex_cn.rst
+++ b/doc/paddle/api/paddle/multiplex_cn.rst
--- a/doc/paddle/api/paddle/nn/functional/natural_exp_decay_cn.rst
+++ b/doc/paddle/api/paddle/nn/functional/natural_exp_decay_cn.rst
--- a/doc/paddle/api/paddle/static/nn/nce_cn.rst
+++ b/doc/paddle/api/paddle/static/nn/nce_cn.rst
--- a/doc/paddle/api/paddle/static/py_func_cn.rst
+++ b/doc/paddle/api/paddle/static/py_func_cn.rst
--- a/doc/paddle/api/paddle/nn/functional/noam_decay_cn.rst
+++ b/doc/paddle/api/paddle/nn/functional/noam_decay_cn.rst
--- a/doc/paddle/api/paddle/fluid/layers/not_equal_cn.rst
+++ b/doc/paddle/api/paddle/fluid/layers/not_equal_cn.rst
+.. _cn_api_fluid_layers_not_equal:
+not_equal
+-------------------------------
+.. py:function:: paddle.fluid.layers.not_equal(x, y, cond=None, name=None)
+:alias_main: paddle.not_equal
+:alias: paddle.not_equal,paddle.tensor.not_equal,paddle.tensor.logic.not_equal
+:old_api: paddle.fluid.layers.not_equal
+该OP逐元素地返回 :math:`x != y` 的逻辑值，使用重载算子 `!=` 可以有相同的计算函数效果。
+参数：
+    - **x** (Variable) – 进行比较的第一个输入，是一个多维的Tensor，数据类型可以是float32，float64，int32，int64。 
+    - **y** (Variable) – 进行比较的第二个输入，是一个多维的Tensor，数据类型可以是float32，float64，int32，int64。
+    - **cond** (Variable，可选) – 如果为None，则创建一个Tensor来作为进行比较的输出结果，该Tensor的shape和数据类型和输入x一致；如果不为None，则将Tensor作为该OP的输出，数据类型和数据shape需要和输入x一致。默认值为None。 
+    - **name** （str，可选）- 具体用法请参见 :ref:`api_guide_Name` ，一般无需设置，默认值为None。
+返回：输出结果的Tensor，数据的shape和输入x一致。
+返回类型：变量（Variable），数据类型为bool类型。
+**代码示例**:
+.. code-block:: python
+     import paddle.fluid as fluid
+     import paddle.fluid.layers as layers
+     import numpy as np
+     label = layers.assign(np.array([2, 3], dtype='int32'))
+     limit = layers.assign(np.array([3, 2], dtype='int32'))
+     out = fluid.layers.not_equal(x=label, y=limit) #out=[True, True]
+     out1 = label != limit #out1=[True, True]
--- a/doc/paddle/api/paddle/nn/functional/npair_loss_cn.rst
+++ b/doc/paddle/api/paddle/nn/functional/npair_loss_cn.rst
--- a/doc/paddle/api/paddle/fluid/layers/one_hot_cn.rst
+++ b/doc/paddle/api/paddle/fluid/layers/one_hot_cn.rst
+.. _cn_api_fluid_one_hot:
+one_hot
+-------------------------------
+.. py:function:: paddle.fluid.one_hot(input, depth, allow_out_of_range=False)
+:alias_main: paddle.nn.functional.one_hot
+:alias: paddle.nn.functional.one_hot,paddle.nn.functional.common.one_hot
+:old_api: paddle.fluid.one_hot
+该OP将输入（input）中的每个id转换为一个one-hot向量，其长度为 ``depth`` ，该id对应的向量维度上的值为1，其余维度的值为0。
+输出的Tensor（或LoDTensor）的shape是在输入shape的最后一维后面添加了depth的维度。
+- 示例1（allow_out_of_range=False）：
+.. code-block:: python
+  输入：
+    X.shape = [4]
+    X.data = [1, 1, 3, 0]
+    depth = 4
+  输出：
+    Out.shape = [4, 4]
+    Out.data = [[0., 1., 0., 0.],
+                [0., 1., 0., 0.],
+                [0., 0., 0., 1.],
+                [1., 0., 0., 0.]]
+- 示例2 （allow_out_of_range=True）：
+.. code-block:: python
+  输入：
+    X.shape = [4]
+    X.data = [1, 1, 5, 0]
+    depth = 4
+    allow_out_of_range=True
+  输出：
+    Out.shape = [4, 4]
+    Out.data = [[0., 1., 0., 0.],
+                [0., 1., 0., 0.], 
+                [0., 0., 0., 0.], ## 这一维的值是5，超过了depth，因此填成0
+                [1., 0., 0., 0.]]
+- 示例3 （allow_out_of_range=False）：
+.. code-block:: python
+  输入：
+    X.shape = [4]
+    X.data = [1, 1, 5, 0]
+    depth = 4
+    allow_out_of_range=False
+  输出：抛出 Illegal value 的异常
+    X中第2维的值是5，超过了depth，而allow_out_of_range=False表示不允许超过，因此抛异常。  
+参数：
+    - **input** (Variable) - 维度为 :math:`[N_1, ..., N_n]` 的多维Tensor或LoDTensor，维度至少1维。数据类型为int32或int64。
+    - **depth** (int) - 用于定义一个one-hot向量的长度。若输入为词id，则 ``depth`` 通常取值为词典大小。
+    - **allow_out_of_range** (bool) - 指明input中所包含的id值是否可以大于depth值。当超过depth时，如果 `allow_out_of_range` 为False，则会抛出 `Illegal value` 的异常；如果设置为True，该id对应的向量为0向量。默认值为False。
+返回：转换后的one_hot Tensor或LoDTensor，数据类型为float32。
+返回类型：Variable
+**代码示例**：
+.. code-block:: python
+    import paddle.fluid as fluid
+    # 该代码对应上述第一个示例，其中输入label的shape是[4]，输出one_hot_label的shape是[4, 4]
+    label = fluid.layers.data(name="label", shape=[4], append_batch_size=False, dtype="int64")
+    one_hot_label = fluid.one_hot(input=label, depth=4)
--- a/doc/paddle/api/paddle/fluid/layers/ones_cn.rst
+++ b/doc/paddle/api/paddle/fluid/layers/ones_cn.rst
+.. _cn_api_fluid_layers_ones:
+ones
+-------------------------------
+.. py:function:: paddle.fluid.layers.ones(shape,dtype,force_cpu=False)
+该OP创建形状为 ``shape`` 、数据类型为 ``dtype`` 且值全为1的Tensor。
+参数：
+    - **shape** (tuple|list|Tensor) - 输出Tensor的形状， ``shape`` 的数据类型为int32或者int64。
+    - **dtype** (np.dtype|core.VarDesc.VarType|str) - 输出Tensor的数据类型，数据类型必须为float16、float32、float64、int32或int64。
+    - **force_cpu** (bool， 可选) – 是否强制将输出Tensor写入CPU内存。如果 ``force_cpu`` 为False，则将输出Tensor写入当前所在运算设备的内存，默认为False。
+返回：值全为1的Tensor，数据类型和 ``dtype`` 定义的类型一致。
+抛出异常：
+    - ``TypeError`` - 当 ``dtype`` 不是bool、 float16、float32、float64、int32、int64和None时。
+    - ``TypeError`` - 当 ``shape`` 不是tuple、list、或者Tensor时， 当 ``shape`` 为Tensor，其数据类型不是int32或者int64时。
+**代码示例**：
+.. code-block:: python
+    import paddle.fluid as fluid
+    data = fluid.layers.ones(shape=[2, 4], dtype='float32') # [[1., 1., 1., 1.], [1., 1., 1., 1.]]
--- a/doc/paddle/api/paddle/fluid/layers/ones_like_cn.rst
+++ b/doc/paddle/api/paddle/fluid/layers/ones_like_cn.rst
+.. _cn_api_fluid_layers_ones_like:
+ones_like
+-------------------------------
+.. py:function:: paddle.fluid.layers.ones_like(x, out=None)
+ones_like
+该功能创建一个形状与类型与x相似的张量，初始值为1。
+参数：
+    - **x** (Variable) - 指定形状与数据类型的输入张量
+    - **out** (Variable)-输出张量
+返回：输出张量
+返回类型：变量（Variable）
+**代码示例**：
+.. code-block:: python
+    import paddle.fluid as fluid
+    x = fluid.layers.data(name='x', dtype='float32', shape=[3], append_batch_size=False)
+    data = fluid.layers.ones_like(x) # [1.0, 1.0, 1.0]
--- a/doc/paddle/api/paddle/nn/functional/pad2d_cn.rst
+++ b/doc/paddle/api/paddle/nn/functional/pad2d_cn.rst
--- a/doc/paddle/api/paddle/fluid/layers/pad_cn.rst
+++ b/doc/paddle/api/paddle/fluid/layers/pad_cn.rst
+.. _cn_api_fluid_layers_pad:
+pad
+-------------------------------
+.. py:function:: paddle.fluid.layers.pad(x, paddings, pad_value=0.0, name=None)
+:alias_main: paddle.nn.functional.pad
+:alias: paddle.nn.functional.pad,paddle.nn.functional.common.pad
+:old_api: paddle.fluid.layers.pad
+该OP在Tensor上填充一个由 ``pad_value`` 给出的常数值，填充宽度由 ``paddings`` 指定。
+其中，维度 ``i`` 中 ``x`` 内容前填充的值个数用 ``paddings[2*i]`` 表示，维度 ``i`` 中 ``x`` 内容后填充的值个数用 ``paddings[2*i+1]`` 表示。
+**示例**：
+.. code-block:: text
+        Given:
+            x = [[1, 2], [3, 4]]
+            paddings = [0, 1, 1, 2]
+            pad_value = 0
+        Return:
+            out = [[0, 1, 2, 0, 0]
+                   [0, 3, 4, 0, 0]
+                   [0, 0, 0, 0, 0]]
+参数:
+    - **x** (Variable) — 多维Tensor，数据类型为float32
+    - **paddings** (list of integers) — 整数列表，指定每个维度填充值的个数。维度 ``i`` 中 ``x`` 内容前填充的值个数用 ``paddings[2*i]`` 表示，维度 ``i`` 中 ``x`` 内容后填充的值个数用 ``paddings[2*i+1]`` 表示。 ``paddings`` 长度必须是 ``rank(x)×2``
+    - **pad_value** (float32, 可选) — 用来填充的常量值，数据类型为float。默认值为0.
+    - **name** (str|None) - 该参数供开发人员打印调试信息时使用，具体用法请参见 :ref:`api_guide_Name` ，默认值为None。
+返回： 填充后的Tensor，数据类型与输入 ``x`` 相同
+返回类型： Variable
+**代码示例**
+..  code-block:: python
+    # x 为一个秩为2的张量
+    import paddle.fluid as fluid
+    x = fluid.data(name='data', shape=[300, 300], dtype='float32')
+    out = fluid.layers.pad(x=x, paddings=[0, 1, 1, 2], pad_value=0.)
--- a/doc/paddle/api/paddle/nn/functional/pad_constant_like_cn.rst
+++ b/doc/paddle/api/paddle/nn/functional/pad_constant_like_cn.rst
--- a/doc/paddle/api/paddle/nn/functional/piecewise_decay_cn.rst
+++ b/doc/paddle/api/paddle/nn/functional/piecewise_decay_cn.rst
--- a/doc/paddle/api/paddle/nn/functional/pixel_shuffle_cn.rst
+++ b/doc/paddle/api/paddle/nn/functional/pixel_shuffle_cn.rst
--- a/doc/paddle/api/paddle/nn/functional/polygon_box_transform_cn.rst
+++ b/doc/paddle/api/paddle/nn/functional/polygon_box_transform_cn.rst
--- a/doc/paddle/api/paddle/nn/functional/polynomial_decay_cn.rst
+++ b/doc/paddle/api/paddle/nn/functional/polynomial_decay_cn.rst
--- a/doc/paddle/api/paddle/nn/functional/pool2d_cn.rst
+++ b/doc/paddle/api/paddle/nn/functional/pool2d_cn.rst
--- a/doc/paddle/api/paddle/nn/functional/pool3d_cn.rst
+++ b/doc/paddle/api/paddle/nn/functional/pool3d_cn.rst
--- a/doc/paddle/api/paddle/fluid/layers/pow_cn.rst
+++ b/doc/paddle/api/paddle/fluid/layers/pow_cn.rst
+.. _cn_api_fluid_layers_pow:
+pow
+-------------------------------
+.. py:function:: paddle.pow(x, exponent, name=None)
+该OP是指数激活算子：
+.. math::
+    out = x^{exponent}
+**注意：如果需要对输入进行 elementwise_pow 操作，请查使用** :ref:`cn_api_fluid_layers_elementwise_pow` 。
+参数：
+    - **x** （Variable）- 多维 ``Variable``，数据类型为 ``float32`` 或 ``float64`` 。
+    - **exponent** （float32|Variable）- ``float32`` 或形状为[1]的 ``Variable``，数据类型为 ``float32``。
+    - **name** （str，可选）- 具体用法请参见 :ref:`api_guide_Name` ，一般无需设置。默认值： ``None``。
+返回：维度与输入 `x` 相同的 ``Variable``，数据类型与 ``x`` 相同。
+返回类型：Variable。
+**代码示例：**
+.. code-block:: python
+            import paddle
+            import numpy as np
+            x = fluid.data(name="x", shape=[32,32], dtype="float32")
+            paddle.enable_imperative()
+            # example 1: exponent is a float
+            x_data = np.array([1, 2, 3])
+            exponent = 2
+            x = paddle.imperative.to_variable(x_data)
+            res = paddle.pow(x, exponent)
+            print(res.numpy()) # [1 4 9]
+            # example 2: exponent is a Variable
+            exponent = paddle.fill_constant(shape=[1], value=2, dtype='float32')
+            res = paddle.pow(x, exponent)
+            print(res.numpy()) # [1 4 9]
--- a/doc/paddle/api/paddle/static/nn/prelu_cn.rst
+++ b/doc/paddle/api/paddle/static/nn/prelu_cn.rst
--- a/doc/paddle/api/paddle/nn/functional/prior_box_cn.rst
+++ b/doc/paddle/api/paddle/nn/functional/prior_box_cn.rst
--- a/doc/paddle/api/paddle/nn/functional/prroi_pool_cn.rst
+++ b/doc/paddle/api/paddle/nn/functional/prroi_pool_cn.rst
--- a/doc/paddle/api/paddle/nn/functional/psroi_pool_cn.rst
+++ b/doc/paddle/api/paddle/nn/functional/psroi_pool_cn.rst
--- a/doc/paddle/api/paddle/fluid/layers/py_reader_cn.rst
+++ b/doc/paddle/api/paddle/fluid/layers/py_reader_cn.rst
+.. _cn_api_fluid_layers_py_reader:
+py_reader
+-------------------------------
+.. py:function:: paddle.fluid.layers.py_reader(capacity, shapes, dtypes, lod_levels=None, name=None, use_double_buffer=True)
+:api_attr: 声明式编程模式（静态图)
+创建一个在Python端提供数据的reader
+该OP返回一个Reader Variable。该Reader提供了 ``decorate_paddle_reader()`` 和 ``decorate_tensor_provider()`` 来设置Python generator作为数据源，将数据源中的数据feed到Reader Variable。在c++端调用 ``Executor::Run()`` 时，来自generator的数据将被自动读取。与 ``DataFeeder.feed()`` 不同，数据读取进程和  ``Executor::Run()`` 进程可以使用 ``py_reader`` 并行运行。在每次数据传递开始时调用reader的 ``start()`` ，在传递结束和抛出  ``fluid.core.EOFException`` 异常后执行 ``reset()`` 。
+注意： ``Program.clone()`` （含义详见 :ref:`cn_api_fluid_Program` ）不能克隆 ``py_reader`` ，且 ``read_file`` （ ``read_file`` 含义详见 :ref:`cn_api_fluid_layers_read_file` ）调用需在声明 ``py_reader`` 的program block内。
+参数:
+  - **capacity** (int) –  ``py_reader`` 维护的缓冲区的容量数据个数。
+  - **shapes** (list|tuple) – 一个列表或元组，shapes[i]是代表第i个数据shape，因此shape[i]也是元组或列表。
+  - **dtypes** (list|tuple) – 一个string的列表或元组。为 ``shapes`` 对应元素的数据类型，支持bool，float16，float32，float64，int8，int16，int32，int64，uint8。
+  - **lod_levels** (list|tuple) – lod_level的整型列表或元组
+  - **name**  (str，可选) – 具体用法请参见 :ref:`api_guide_Name` ，一般无需设置，默认值为None。
+  - **use_double_buffer** (bool) – 是否使用双缓冲区，双缓冲区是为了预读下一个batch的数据、异步CPU -> GPU拷贝。默认值为True。
+返回：reader，从reader中可以获取feed的数据，其dtype和feed的数据dtype相同。
+返回类型：Variable
+**代码示例**
+1.py_reader 基本用法如下
+..  code-block:: python
+  import paddle
+  import paddle.fluid as fluid
+  import paddle.dataset.mnist as mnist
+  def network(image, label):
+      # 用户自定义网络，此处以softmax回归为例
+      predict = fluid.layers.fc(input=image, size=10, act='softmax')
+      return fluid.layers.cross_entropy(input=predict, label=label)
+  reader = fluid.layers.py_reader(capacity=64,
+                                  shapes=[(-1,1, 28, 28), (-1,1)],
+                                  dtypes=['float32', 'int64'])
+  reader.decorate_paddle_reader(
+      paddle.reader.shuffle(paddle.batch(mnist.train(), batch_size=5),
+                            buf_size=1000))
+  img, label = fluid.layers.read_file(reader)
+  loss = network(img, label) # 一些网络定义
+  fluid.Executor(fluid.CUDAPlace(0)).run(fluid.default_startup_program())
+  exe = fluid.ParallelExecutor(use_cuda=True, loss_name=loss.name)
+  for epoch_id in range(10):
+      reader.start()
+          try:
+              while True:
+                  exe.run(fetch_list=[loss.name])
+          except fluid.core.EOFException:
+              reader.reset()
+  fluid.io.save_inference_model(dirname='./model', 
+                                feeded_var_names=[img.name, label.name],
+                                target_vars=[loss], 
+                                executor=fluid.Executor(fluid.CUDAPlace(0)))
+2.训练和测试应使用不同的名称创建两个不同的py_reader，例如：
+..  code-block:: python
+    import paddle
+    import paddle.fluid as fluid
+    import paddle.dataset.mnist as mnist
+    def network(reader):
+        img, label = fluid.layers.read_file(reader)
+        # 用户自定义网络，此处以softmax回归为例
+        predict = fluid.layers.fc(input=img, size=10, act='softmax')
+        loss = fluid.layers.cross_entropy(input=predict, label=label)
+        return fluid.layers.mean(loss)
+    # 新建 train_main_prog 和 train_startup_prog
+    train_main_prog = fluid.Program()
+    train_startup_prog = fluid.Program()
+    with fluid.program_guard(train_main_prog, train_startup_prog):
+        # 使用 fluid.unique_name.guard() 实现与test program的参数共享
+        with fluid.unique_name.guard():
+            train_reader = fluid.layers.py_reader(capacity=64,
+                                                shapes=[(-1, 1, 28, 28), (-1, 1)],
+                                                dtypes=['float32', 'int64'],
+                                                name='train_reader')
+            train_reader.decorate_paddle_reader(
+            paddle.reader.shuffle(paddle.batch(mnist.train(),
+                                batch_size=5),
+                                buf_size=500))
+            train_loss = network(train_reader) # 一些网络定义
+            adam = fluid.optimizer.Adam(learning_rate=0.01)
+            adam.minimize(train_loss)
+    # Create test_main_prog and test_startup_prog
+    test_main_prog = fluid.Program()
+    test_startup_prog = fluid.Program()
+    with fluid.program_guard(test_main_prog, test_startup_prog):
+        # 使用 fluid.unique_name.guard() 实现与train program的参数共享
+        with fluid.unique_name.guard():
+            test_reader = fluid.layers.py_reader(capacity=32,
+                                                shapes=[(-1, 1, 28, 28), (-1, 1)],
+                                                dtypes=['float32', 'int64'],
+                                                name='test_reader')
+            test_reader.decorate_paddle_reader(paddle.batch(mnist.test(), 512))
+            test_loss = network(test_reader)
+    fluid.Executor(fluid.CUDAPlace(0)).run(train_startup_prog)
+    fluid.Executor(fluid.CUDAPlace(0)).run(test_startup_prog)
+    train_exe = fluid.ParallelExecutor(use_cuda=True,
+        loss_name=train_loss.name, main_program=train_main_prog)
+    test_exe = fluid.ParallelExecutor(use_cuda=True,
+        loss_name=test_loss.name, main_program=test_main_prog)
+    for epoch_id in range(10):
+        train_reader.start()
+        try:
+            while True:
+                train_exe.run(fetch_list=[train_loss.name])
+        except fluid.core.EOFException:
+            train_reader.reset()
+    test_reader.start()
+    try:
+        while True:
+            test_exe.run(fetch_list=[test_loss.name])
+    except fluid.core.EOFException:
+        test_reader.reset()
--- a/doc/paddle/api/paddle/nn/functional/random_crop_cn.rst
+++ b/doc/paddle/api/paddle/nn/functional/random_crop_cn.rst
--- a/doc/paddle/api/paddle/fluid/layers/range_cn.rst
+++ b/doc/paddle/api/paddle/fluid/layers/range_cn.rst
+.. _cn_api_fluid_layers_range:
+range
+-------------------------------
+.. py:function:: paddle.fluid.layers.range(start, end, step, dtype, name=None)
+注意：推荐使用 paddle.arange
+该OP返回以步长 ``step`` 均匀分隔给定数值区间[``start``, ``end``)的1-D Tensor，数据类型为 ``dtype``。
+当 ``dtype`` 表示浮点类型时，为了避免浮点计算误差，建议给 ``end`` 加上一个极小值epsilon，使边界可以更加明确。
+参数：
+        - **start** (float|int|Tensor) - 区间起点（且区间包括此值）。当 ``start`` 类型是Tensor时，是形状为[1]且数据类型为int32、int64、float32、float64的Tensor。
+        - **end** (float|int|Tensor) - 区间终点（且通常区间不包括此值）。当 ``end`` 类型是Tensor时，是形状为[1]且数据类型为int32、int64、float32、float64的Tensor。
+        - **step** (float|int|Tensor) - 均匀分割的步长。当 ``step`` 类型是Tensor时，是形状为[1]且数据类型为int32、int64、float32、float64的Tensor。
+        - **dtype** (str|np.dtype|core.VarDesc.VarType) - 输出Tensor的数据类型，支持int32、int64、float32、float64。
+        - **name** (str, 可选) - 输出的名字。一般无需设置，默认值为None。该参数供开发人员打印调试信息时使用，具体用法请参见 :ref:`api_guide_Name` 。
+返回：
+        Tensor: 以步长 ``step`` 均匀分割给定数值区间[``start``, ``end``)后得到的1-D Tensor, 数据类型为 ``dtype`` 。
+抛出异常：
+        - ``TypeError`` - 如果 ``dtype`` 不是int32、int64、float32、float64。
+代码示例：
+.. code-block:: python
+    import paddle.fluid as fluid
+    data = fluid.layers.range(0, 10, 2, 'int32')
+    # [0, 2, 4, 6, 8]
--- a/doc/paddle/api/paddle/rank_cn.rst
+++ b/doc/paddle/api/paddle/rank_cn.rst
--- a/doc/paddle/api/paddle/nn/functional/rank_loss_cn.rst
+++ b/doc/paddle/api/paddle/nn/functional/rank_loss_cn.rst
--- a/doc/paddle/api/paddle/fluid/layers/read_file_cn.rst
+++ b/doc/paddle/api/paddle/fluid/layers/read_file_cn.rst
+.. _cn_api_fluid_layers_read_file:
+read_file
+-------------------------------
+.. py:function:: paddle.fluid.layers.read_file(reader)
+:api_attr: 声明式编程模式（静态图)
+从给定的reader中读取数据
+reader是一个Variable，它可以是由函数fluid.layers.py_reader()生成的reader，或者是由函数fluid.layers.double_buffer()生成的装饰Variable。
+参数：
+    - **reader** (Variable)-待处理的reader
+返回：从reader中读取的数据元组，元组数据类型为Variable
+返回类型: tuple（元组）
+**代码示例**：
+.. code-block:: python
+    import paddle.fluid as fluid
+    reader = fluid.layers.py_reader(capacity=64,
+                                    shapes=[(-1, 1, 28, 28), (-1, 1)],
+                                    dtypes=['float32', 'int64'])
+    image, label = fluid.layers.read_file(reader)
+    data_file = fluid.layers.double_buffer(
+        fluid.layers.batch(data_file, batch_size=64))
+    input, label = fluid.layers.read_file(data_file)
--- a/doc/paddle/api/paddle/reciprocal_cn.rst
+++ b/doc/paddle/api/paddle/reciprocal_cn.rst
--- a/doc/paddle/api/paddle/reduce_all_cn.rst
+++ b/doc/paddle/api/paddle/reduce_all_cn.rst
--- a/doc/paddle/api/paddle/reduce_any_cn.rst
+++ b/doc/paddle/api/paddle/reduce_any_cn.rst
--- a/doc/paddle/api/paddle/reduce_max_cn.rst
+++ b/doc/paddle/api/paddle/reduce_max_cn.rst
--- a/doc/paddle/api/paddle/reduce_mean_cn.rst
+++ b/doc/paddle/api/paddle/reduce_mean_cn.rst
--- a/doc/paddle/api/paddle/reduce_min_cn.rst
+++ b/doc/paddle/api/paddle/reduce_min_cn.rst
--- a/doc/paddle/api/paddle/reduce_prod_cn.rst
+++ b/doc/paddle/api/paddle/reduce_prod_cn.rst
--- a/doc/paddle/api/paddle/reduce_sum_cn.rst
+++ b/doc/paddle/api/paddle/reduce_sum_cn.rst
--- a/doc/paddle/api/paddle/fluid/layers/relu6_cn.rst
+++ b/doc/paddle/api/paddle/fluid/layers/relu6_cn.rst
+.. _cn_api_fluid_layers_relu6:
+relu6
+-------------------------------
+.. py:function:: paddle.fluid.layers.relu6(x, threshold=6.0, name=None)
+:alias_main: paddle.nn.functional.relu6
+:alias: paddle.nn.functional.relu6,paddle.nn.functional.activation.relu6
+:old_api: paddle.fluid.layers.relu6
+relu6激活函数
+.. math:: out=min(max(0, x), threshold)
+参数:
+    - **x** (Variable) - 输入的多维 ``Tensor`` ，数据类型为：float32、float64。
+    - **threshold** (float) - relu6的阈值。默认值为6.0
+    - **name** (str，可选) – 具体用法请参见 :ref:`api_guide_Name` ，一般无需设置，默认值为None。
+返回: 与 ``x`` 维度相同、数据类型相同的 ``Tensor``。
+返回类型: Variable
+**代码示例：**
+.. code-block:: python
+  import paddle.fluid as fluid
+  import numpy as np
+  in1 = np.array([[-1,0],[2.5,7.8]])
+  with fluid.dygraph.guard():
+      x1 = fluid.dygraph.to_variable(in1)
+      out1 = fluid.layers.relu6(x=x1, threshold=6.0)
+      print(out1.numpy())
+      # [[0.  0. ]
+      #  [2.5 6. ]]
--- a/doc/paddle/api/paddle/fluid/layers/relu_cn.rst
+++ b/doc/paddle/api/paddle/fluid/layers/relu_cn.rst
+.. _cn_api_fluid_layers_relu:
+relu
+-------------------------------
+.. py:function:: paddle.fluid.layers.relu(x, name=None)
+ReLU（Rectified Linear Unit）激活函数
+.. math:: Out=max(0,x)
+参数:
+  - **x** (Variable) - 输入的多维 ``Tensor`` ，数据类型为：float32、float64。
+  - **name** (str，可选) – 具体用法请参见 :ref:`api_guide_Name` ，一般无需设置，默认值为None。
+返回: 与 ``x`` 维度相同、数据类型相同的 ``Tensor`` 。
+返回类型: Variable
+**代码示例**:
+..  code-block:: python
+  import paddle.fluid as fluid
+  import numpy as np
+  in1 = np.array([[-1,0],[1,2.6]])
+  with fluid.dygraph.guard():
+      x1 = fluid.dygraph.to_variable(in1)
+      out1 = fluid.layers.relu(x1)
+      print(out1.numpy())
+      # [[0.  0. ]
+      #  [1.  2.6]]
--- a/doc/paddle/api/paddle/fluid/layers/reorder_lod_tensor_by_rank_cn.rst
+++ b/doc/paddle/api/paddle/fluid/layers/reorder_lod_tensor_by_rank_cn.rst
+.. _cn_api_fluid_layers_reorder_lod_tensor_by_rank:
+reorder_lod_tensor_by_rank
+-------------------------------
+.. py:function:: paddle.fluid.layers.reorder_lod_tensor_by_rank(x, rank_table)
+该OP根据 ``rank_table`` 中提供的 ``LoDRankTable`` 类型的顺序信息来实现对 ``X`` 的重新排列。
+接口参数 ``X`` 是由多个序列(Sequence)组成的的一个批序列（Batch of Sequences）， ``rank_table`` 存储着对batch中序列重新排列的 ``LoDRankTable`` 类型的顺序信息。
+例如:
+假设在 ``rank_table`` 中存储的序列索引为 :math:`[3,0,2,1]` ， ``X``  将会被这样被重新排列：
+``X`` 中的第四个序列（即索引为3的序列，后面以此类推）会变成排列后的batch中的第一个，紧接着就是原来batch中的第一个元素，第三个元素，和第二个元素。
+简言之，若有原batch：:math:`X = [Seq0, Seq1, Seq2, Seq3]` 且 RankTable 中的索引为 :math:`[3,0,2,1]` ，那么输出即为 :math:`Out = [Seq3, Seq0, Seq2, Seq1]` ，它携带着新的LoD信息。
+如果 ``X`` 的LoD信息是空的，这表明 ``X`` 不是序列型数据。这和由多个定长为1的序列组成的batch是相同的情况。此时，该函数将对 ``X`` 中数据 在第一轴(axis)上按 ``rank_table`` 里的规则加以排列。
+例如，现有 :math:`X = [Slice0, Slice1, Slice2, Slice3]` ，并且它LoD信息为空，在 ``rank_table`` 索引为 :math:`[3, 0, 2, 1]` 。则 :math:`Out = [Slice3, Slice0, Slice2, Slice1]` ，并且不在其中追加LoD信息。
+注意：该OP对 ``X`` 进行的排序所依据的 ``LoDRankTable`` 不一定是在 ``X`` 的基础上得出来的。它可以由其他不同的序列得出，并由该OP依据这个 ``LoDRankTable`` 来对 ``X`` 排序。
+参数：
+    - **x** (Variable) - 待根据提供的 ``rank_table`` 进行排序的LoDTensor.
+    - **rank_table** (Variable) - 提供对 ``x`` 重新排列的 ``LoDRankTable`` 类型的顺序信息.
+返回： 重新排列后的LoDTensor
+返回类型: Variable
+**代码示例**：
+.. code-block:: python
+    import numpy as np
+    import paddle.fluid as fluid
+    rank_data = fluid.layers.data(name='rank_data', shape=[5], dtype='float32', lod_level=2)
+    table = fluid.layers.control_flow.lod_rank_table(rank_data, level=1)
+    data = fluid.layers.data(name='data', shape=[9], lod_level=2)
+    new_data = fluid.layers.reorder_lod_tensor_by_rank(
+                     x=data, rank_table=table)
+    place=fluid.CPUPlace()
+    exe = fluid.Executor(place)
+    exe.run(fluid.default_startup_program())
+    rank_tensor = fluid.create_lod_tensor(np.random.random([14,5]).astype("float32"), [[4,1], [3, 2, 2, 3, 4]], place)
+    data_ndarray = np.random.random([27, 9]).astype("float32")
+    data_lod = [[1, 2, 2, 4, 4], [2, 2, 4, 2, 2, 2, 1, 1, 2, 2, 4, 2, 1]]
+    data_tensor = fluid.create_lod_tensor(data_ndarray, data_lod, place)
+    out = exe.run(fluid.default_main_program(),feed={'data':data_tensor, 'rank_data':rank_tensor}, fetch_list=[new_data], return_numpy=False)
+    print(out[0])
+    # lod: {{0, 4, 5, 9, 11, 13}{0, 2, 6, 8, 9, 11, 13, 14, 15, 17, 19, 23, 25, 27}}
+    #shape: [27, 9]
--- a/doc/paddle/api/paddle/reshape_cn.rst
+++ b/doc/paddle/api/paddle/reshape_cn.rst
--- a/doc/paddle/api/paddle/nn/functional/resize_bilinear_cn.rst
+++ b/doc/paddle/api/paddle/nn/functional/resize_bilinear_cn.rst
--- a/doc/paddle/api/paddle/nn/functional/resize_nearest_cn.rst
+++ b/doc/paddle/api/paddle/nn/functional/resize_nearest_cn.rst
--- a/doc/paddle/api/paddle/nn/functional/resize_trilinear_cn.rst
+++ b/doc/paddle/api/paddle/nn/functional/resize_trilinear_cn.rst
--- a/doc/paddle/api/paddle/nn/functional/retinanet_detection_output_cn.rst
+++ b/doc/paddle/api/paddle/nn/functional/retinanet_detection_output_cn.rst
--- a/doc/paddle/api/paddle/nn/functional/retinanet_target_assign_cn.rst
+++ b/doc/paddle/api/paddle/nn/functional/retinanet_target_assign_cn.rst
--- a/doc/paddle/api/paddle/fluid/layers/reverse_cn.rst
+++ b/doc/paddle/api/paddle/fluid/layers/reverse_cn.rst
+.. _cn_api_fluid_layers_reverse:
+reverse
+-------------------------------
+.. py:function:: paddle.fluid.layers.reverse(x,axis)
+:alias_main: paddle.reverse
+:alias: paddle.reverse,paddle.tensor.reverse,paddle.tensor.manipulation.reverse
+:old_api: paddle.fluid.layers.reverse
+**reverse**
+该OP对输入Tensor ``x`` 在指定轴 ``axis`` 上进行数据的逆序操作。
+::
+    示例1:
+        输入是 LoDTensor 类型:
+            x = [[0, 1, 2], [3, 4, 5], [6, 7, 8]]
+            axis = [0, 1]
+        输出:
+            output = [[8, 7, 6], [5, 4, 3], [2, 1, 0]]
+    示例2:
+        输入是 LoDTensorArray 类型:
+            x = {[[0, 1], [2, 3]],
+                 [[4, 5, 6]],
+                 [[7], [8], [9]]}
+            axis = 0
+        输出:
+            output = {[[7], [8], [9]],
+                      [[4, 5, 6]],
+                      [[0, 1], [2, 3]]}
+参数：
+  - **x** (Variable) - 输入为Tensor或LoDTensorArray，数据类型支持bool，int8，int32，int64，float32和float64。若输入是LoDTensorArray类型，则返回一个逆序的LoDTensorArray，其内部Tensor元素的次序保持不变。
+  - **axis** (int|tuple|list) - 指定逆序运算的轴，取值范围是[-R, R)，R是输入 ``x`` 的Rank， ``axis`` 为负时与 ``axis`` +R 等价。如果 ``axis`` 是一个元组或列表，则在 ``axis`` 每个元素值所指定的轴上进行逆序运算。如果输入是LoDTensorArray类型，axis须是值为0的int，或shape为[1]的list ``[0]`` 、元组 ``(0,)`` 。
+返回：逆序后的Tensor，形状、数据类型和 ``x`` 一致。
+返回类型：Variable
+**代码示例**：
+.. code-block:: python
+        import paddle.fluid as fluid
+        import numpy as np
+        data = fluid.layers.assign(np.array([[0, 1, 2], [3, 4, 5], [6, 7, 8]], dtype='float32')) # [[0., 1., 2.], [3., 4., 5.], [6., 7., 8.]]
+        result1 = fluid.layers.reverse(data, 0) # [[6., 7., 8.], [3., 4., 5.], [0., 1., 2.]]
+        result2 = fluid.layers.reverse(data, [0, 1]) # [[8., 7., 6.], [5., 4., 3.], [2., 1., 0.]]
+        # 输入为LoDTensorArray时
+        data1 = fluid.layers.assign(np.array([[0, 1, 2]], dtype='float32'))
+        data2 = fluid.layers.assign(np.array([[3, 4, 5]], dtype='float32'))
+        tensor_array = fluid.layers.create_array(dtype='float32')
+        i = fluid.layers.fill_constant(shape=[1], dtype='int64', value=0)
+        fluid.layers.array_write(data1, i, tensor_array)
+        fluid.layers.array_write(data2, i+1, tensor_array)
+        reversed_tensor_array = fluid.layers.reverse(tensor_array, 0) # {[[3, 4, 5]], [[0, 1, 2]]}
--- a/doc/paddle/api/paddle/fluid/layers/rnn_cn.rst
+++ b/doc/paddle/api/paddle/fluid/layers/rnn_cn.rst
+.. _cn_api_fluid_layers_rnn:
+rnn
+-------------------------------
+.. py:method:: paddle.fluid.layers.rnn(cell, inputs, initial_states=None, sequence_length=None, time_major=False, is_reverse=False, **kwargs)
+:api_attr: 声明式编程模式（静态图)
+rnn创建一个由RNNCell :code:`cell` 指定的递归神经网络，该神经网络重复执行 :code:`cell.call()` 直至达到 :code:`inputs` 的最大长度。
+参数：
+  - **cell** (RNNCell) - RNNCell的实例。
+  - **inputs** (Variable) - 单个tensor变量或tensor变量组成的嵌套结构。当 :code:`time_major == False` 时，tensor的形状应为 :math:`[batch\_size, sequence\_length, ...]`；当 :code:`time_major == True` 时，tensor的形状应为 :math:`[sequence\_length, batch\_size, ...]`。它表示要在RNN中展开的输入。
+  - **initial_states** (Variable，可选) - 初始状态，单个tensor变量或tensor变量组成的嵌套结构，表示RNN的初始状态。如果未提供，将使用 :code:`cell.get_initial_states` 产生初始状态。默认值None。
+  - **sequence_length** (Variable，可选) - 序列长度，形状为 :math:`[batch\_size]` 的tensor。它存储每个实例的实际长度，从而使用户能够在批处理的时候，提取最后一个有效状态，以确保正确性。如果未提供，则不区分填充和非填充输入。默认值None。
+  - **time_major** (bool，可选) - 指示输入tensor和输出tensor中包含的tensor的数据组织。如果为False，则数据组织为batch为主，形状为 :math:`[batch\_size，sequence\_length，...]`。如果为True，则数据组织为time为主，形状为 :math:`[sequence\_length，batch\_size，...]`。默认值：False。
+  - **is_reverse** (bool，可选) - 指示是否以输入序列的相反顺序进行计算。默认值：False。
+  - **kwargs** - 其他关键字参数。参数传递给 :code:`cell.call`。
+返回:一个元组 :code:`(final_outputs, final_states)` ，包括 :code:`final_outputs` 和 :code:`final_states`，均为单个tensor变量或tensor变量的嵌套结构。:code:`final_outputs` 具有与 :code:`cell.call` 返回的 :code:`outputs` 相同的结构和数据类型，并且 :code:`final_outputs` 中的每个tensor是将所有时间步的 :code:`outputs` 中对应内容堆叠产生，因此其形状为 :math:`[batch\_size，sequence\_length，...]` （:code:`time_major == False` 时）或 :math:`[sequence\_length，batch\_size，...]` （:code:`time_major == True` 时）。:code:`final_states` 是最后一步的状态，因此具有和 :code:`initial_states` 相同的结构，形状和数据类型。
+返回类型：tuple
+**示例代码**
+.. code-block:: python
+  import paddle.fluid as fluid
+  inputs = fluid.data(name="inputs",
+                      shape=[-1, 32, 128],
+                      dtype="float32")
+  cell = fluid.layers.GRUCell(hidden_size=128)
+  outputs = fluid.layers.rnn(cell=cell, inputs=inputs)
--- a/doc/paddle/api/paddle/nn/functional/roi_align_cn.rst
+++ b/doc/paddle/api/paddle/nn/functional/roi_align_cn.rst
--- a/doc/paddle/api/paddle/nn/functional/roi_perspective_transform_cn.rst
+++ b/doc/paddle/api/paddle/nn/functional/roi_perspective_transform_cn.rst
--- a/doc/paddle/api/paddle/nn/functional/roi_pool_cn.rst
+++ b/doc/paddle/api/paddle/nn/functional/roi_pool_cn.rst
--- a/doc/paddle/api/paddle/round_cn.rst
+++ b/doc/paddle/api/paddle/round_cn.rst
--- a/doc/paddle/api/paddle/static/nn/row_conv_cn.rst
+++ b/doc/paddle/api/paddle/static/nn/row_conv_cn.rst
--- a/doc/paddle/api/paddle/nn/functional/rpn_target_assign_cn.rst
+++ b/doc/paddle/api/paddle/nn/functional/rpn_target_assign_cn.rst
--- a/doc/paddle/api/paddle/rsqrt_cn.rst
+++ b/doc/paddle/api/paddle/rsqrt_cn.rst
--- a/doc/paddle/api/paddle/nn/functional/sampled_softmax_with_cross_entropy_cn.rst
+++ b/doc/paddle/api/paddle/nn/functional/sampled_softmax_with_cross_entropy_cn.rst
--- a/doc/paddle/api/paddle/fluid/layers/sampling_id_cn.rst
+++ b/doc/paddle/api/paddle/fluid/layers/sampling_id_cn.rst
+.. _cn_api_fluid_layers_sampling_id:
+sampling_id
+-------------------------------
+.. py:function:: paddle.fluid.layers.sampling_id(x, min=0.0, max=1.0, seed=0, dtype='float32')
+该OP从输入的多项分布中进行采样。
+参数：
+        - **x** （Variable）- 输入Tensor。一个形如[batch_size，input_feature_dimensions]的2-D Tensor。
+        - **min** （Float）- 随机的最小值。默认值为为0.0。
+        - **max** （Float）- 随机的最大值。默认值为1.0。
+        - **seed** （int）- 随机种子。0表示使用系统生成的种子, 默认值为0。请注意，如果seed不为0，则此算子每次调用将生成相同的随机数。
+        - **dtype** （np.dtype | core.VarDesc.VarType | str）- 指定输出数据的类型。
+返回：采样的数据张量（Tensor）
+返回类型：变量（Variable）
+**代码示例：**
+.. code-block:: python
+    import paddle.fluid as fluid
+    x = fluid.layers.data(
+    name="X",
+    shape=[13, 11],
+    dtype='float32',
+    append_batch_size=False)
+    out = fluid.layers.sampling_id(x)
--- a/doc/paddle/api/paddle/scale_cn.rst
+++ b/doc/paddle/api/paddle/scale_cn.rst
--- a/doc/paddle/api/paddle/scatter_cn.rst
+++ b/doc/paddle/api/paddle/scatter_cn.rst
--- a/doc/paddle/api/paddle/scatter_nd_add_cn.rst
+++ b/doc/paddle/api/paddle/scatter_nd_add_cn.rst
--- a/doc/paddle/api/paddle/scatter_nd_cn.rst
+++ b/doc/paddle/api/paddle/scatter_nd_cn.rst
--- a/doc/paddle/api/paddle/fluid/layers/selu_cn.rst
+++ b/doc/paddle/api/paddle/fluid/layers/selu_cn.rst
+.. _cn_api_fluid_layers_selu:
+selu
+-------------------------------
+.. py:function:: paddle.fluid.layers.selu(x, scale=None, alpha=None, name=None)
+:alias_main: paddle.nn.functional.selu
+:alias: paddle.nn.functional.selu,paddle.nn.functional.activation.selu
+:old_api: paddle.fluid.layers.selu
+SeLU激活函数，其公式如下：
+.. math::
+    selu= \lambda*
+    \begin{cases}
+         x                      &\quad \text{ if } x>0 \\
+         \alpha * e^x - \alpha  &\quad \text{ if } x<=0
+    \end{cases}
+输入 ``x`` 可以选择性携带LoD信息。输出和它共享此LoD信息(如果有)。
+参数:
+  - **x** (Variable) - 输入变量，为数据类型为float32，float64的多维Tensor或者LoDTensor。
+  - **scale** (float，可选) – 可选，表示SeLU激活函数中的λ的值，其默认值为 1.0507009873554804934193349852946。 详情请见： `Self-Normalizing Neural Networks <https://arxiv.org/abs/1706.02515.pdf>`_。
+  - **alpha** (float，可选) – 可选，表示SeLU激活函数中的α的值，其默认值为 1.6732632423543772848170429916717。 详情请见： `Self-Normalizing Neural Networks <https://arxiv.org/abs/1706.02515.pdf>`_。
+  - **name** (str，可选) - 具体用法请参见 :ref:`api_guide_Name` ，一般无需设置，默认值为None。
+返回：一个Tensor，shape和输入Tensor相同。
+返回类型：Variable(Tensor|LoDTensor)，LoD信息与输入Tensor一致。
+**代码示例**
+..  code-block:: python
+    import paddle.fluid as fluid
+    import numpy as np
+    inputs = fluid.layers.data(name="x", shape=[2, 2], dtype="float32")
+    output = fluid.layers.selu(inputs)
+    exe = fluid.Executor(fluid.CPUPlace())
+    exe.run(fluid.default_startup_program())
+    img = np.array([[0, 1],[2, 3]]).astype(np.float32)
+    res = exe.run(fluid.default_main_program(), feed={'x':img}, fetch_list=[output])
+    print(res) # [array([[0.      , 1.050701],[2.101402, 3.152103]], dtype=float32)]
--- a/doc/paddle/api/paddle/fluid/layers/sequence_concat_cn.rst
+++ b/doc/paddle/api/paddle/fluid/layers/sequence_concat_cn.rst
+.. _cn_api_fluid_layers_sequence_concat:
+sequence_concat
+-------------------------------
+.. py:function:: paddle.fluid.layers.sequence_concat(input, name=None)
+:api_attr: 声明式编程模式（静态图)
+**注意：该OP的输入只能是LoDTensor，如果您需要处理的输入是Tensor类型，请使用concat函数（fluid.layers.** :ref:`cn_api_fluid_layers_concat` **）。**
+**该OP仅支持LoDTensor** ，通过LoDTensor的LoD信息将输入的多个LoDTensor进行连接（concat），输出连接后的LoDTensor。
+::
+    input是由多个LoDTensor组成的list：
+        input = [x1, x2]
+    其中：
+        x1.lod = [[0, 3, 5]]
+        x1.data = [[1], [2], [3], [4], [5]]
+        x1.shape = [5, 1]
+        x2.lod = [[0, 2, 4]]
+        x2.data = [[6], [7], [8], [9]]
+        x2.shape = [4, 1]
+    且必须满足：len(x1.lod[0]) == len(x2.lod[0])
+    输出为LoDTensor：
+        out.lod = [[0, 3+2, 5+4]]
+        out.data = [[1], [2], [3], [6], [7], [4], [5], [8], [9]]
+        out.shape = [9, 1]
+参数:
+        - **input** (list of Variable) – 多个LoDTensor组成的list，要求每个输入LoDTensor的LoD长度必须一致。数据类型为float32，float64或int64。
+        - **name** (str，可选) – 具体用法请参见 :ref:`api_guide_Name` ，一般无需设置，默认值为None。
+返回: 输出连接后的LoDTensor，数据类型和输入一致。
+返回类型: Variable
+**代码示例**
+..  code-block:: python
+        import paddle.fluid as fluid
+        x = fluid.layers.data(name='x', shape=[10], dtype='float32')
+        y = fluid.layers.data(name='y', shape=[10], dtype='float32')
+        out = fluid.layers.sequence_concat(input=[x, y])
--- a/doc/paddle/api/paddle/fluid/layers/sequence_conv_cn.rst
+++ b/doc/paddle/api/paddle/fluid/layers/sequence_conv_cn.rst
+.. _cn_api_fluid_layers_sequence_conv:
+sequence_conv
+-------------------------------
+.. py:function:: paddle.fluid.layers.sequence_conv(input, num_filters, filter_size=3, filter_stride=1, padding=True, padding_start=None, bias_attr=None, param_attr=None, act=None, name=None)
+:api_attr: 声明式编程模式（静态图)
+**注意：该OP的输入只能是LoDTensor，如果您需要处理的输入是Tensor类型，请使用conv2d函数（fluid.layers.** :ref:`cn_api_fluid_layers_conv2d` **）。**
+该OP在给定的卷积参数下（如卷积核数目、卷积核大小等），对输入的变长序列（sequence）LoDTensor进行卷积操作。默认情况下，该OP会自适应地在每个输入序列的两端等长地填充全0数据，以确保卷积后的序列输出长度和输入长度一致。支持通过配置 ``padding_start`` 参数来指定序列填充的行为。
+**提示：** 参数 ``padding`` 为无用参数，将在未来的版本中被移除。
+::
+    这里详细介绍数据填充操作的细节：
+    对于一个min-batch为2的变长序列输入，分别包含3个、1个时间步（time_step），
+    假设输入input是一个[4, N]的float类型LoDTensor，为了方便，这里假设N = 2
+        input.data = [[1, 1],
+                      [2, 2],
+                      [3, 3],
+                      [4, 4]]
+        input.lod = [[0, 3, 4]]
+    即输入input总共有4个词，每个词被表示为一个2维向量。
+    Case1:
+    若 padding_start = -1，filter_size = 3，
+    则两端填充数据的长度分别为：
+        up_pad_len = max(0, -padding_start) = 1
+        down_pad_len = max(0, filter_size + padding_start - 1) = 1
+    则以此填充后的输入数据为：
+        data_aftet_padding = [[0, 0, 1, 1, 2, 2],
+                              [1, 1, 2, 2, 3, 3],
+                              [2, 2, 3, 3, 0, 0],
+                              [0, 0, 4, 4, 0, 0]]
+    它将和卷积核矩阵相乘得到最终的输出，假设num_filters = 3：
+        output.data = [[ 0.3234, -0.2334,  0.7433],
+                       [ 0.5646,  0.9464, -0.1223],
+                       [-0.1343,  0.5653,  0.4555],
+                       [ 0.9954, -0.1234, -0.1234]]
+        output.shape = [4, 3]     # 3 = num_filters
+        output.lod = [[0, 3, 4]]  # 保持不变
+参数：
+    - **input** (Variable) - 维度为 :math:`（M, K)` 的二维LoDTensor，仅支持lod_level为1。其中M是mini-batch的总时间步数，K是输入的 ``hidden_size`` 特征维度。数据类型为float32或float64。
+    - **num_filters** (int) - 滤波器的数量。
+    - **filter_size** (int) - 滤波器的高度（H）；不支持指定滤波器宽度（W），宽度固定取值为输入的 ``hidden_size`` 。默认值为3。
+    - **filter_stride** (int) - 滤波器每次移动的步长。目前只支持取值为1，默认为1。
+    - **padding** (bool) - **此参数不起任何作用，将在未来的版本中被移除。** 无论 ``padding`` 取值为False或者True，默认地，该函数会自适应地在每个输入序列的两端等长地填充全0数据，以确保卷积后的输出序列长度和输入长度一致。默认填充是考虑到输入的序列长度可能会小于卷积核大小，这会导致无正确计算卷积输出。填充为0的数据在训练过程中不会被更新。默认为True。
+    - **padding_start** (int) - 表示对输入序列填充时的起始位置，可以为负值。负值表示在每个序列的首端填充 ``|padding_start|`` 个时间步（time_step）的全0数据；正值表示对每个序列跳过前 ``padding_start`` 个时间步的数据。同时在末端填充 :math:`filter\_size + padding\_start - 1` 个时间步的全0数据，以保证卷积输出序列长度和输入长度一致。如果 ``padding_start`` 为None，则在每个序列的两端填充 :math:`\frac{filter\_size}{2}` 个时间步的全0数据；如果 ``padding_start`` 设置为0，则只在序列的末端填充 :math:`filter\_size - 1` 个时间步的全0数据。默认为None。
+    - **bias_attr** (ParamAttr) - 指定偏置参数属性的对象。默认值为None，表示使用默认的偏置参数属性。具体用法请参见 :ref:`cn_api_fluid_ParamAttr` 。
+    - **param_attr** (ParamAttr) - 指定权重参数属性的对象。默认值为None，表示使用默认的权重参数属性。具体用法请参见 :ref:`cn_api_fluid_ParamAttr` 。
+    - **act** (str) – 应用于输出上的激活函数，如tanh、softmax、sigmoid，relu等，支持列表请参考 :ref:`api_guide_activations` ，默认值为None。
+    - **name** (str，可选) – 具体用法请参见 :ref:`api_guide_Name` ，一般无需设置，默认值为None。
+返回：和输入序列等长的LoDTensor，数据类型和输入一致，为float32或float64。
+返回类型：Variable
+**代码示例**
+..  code-block:: python
+    import paddle.fluid as fluid
+    x = fluid.layers.data(name='x', shape=[10,10], append_batch_size=False, dtype='float32')
+    x_conved = fluid.layers.sequence_conv(x,2)
--- a/doc/paddle/api/paddle/fluid/layers/sequence_enumerate_cn.rst
+++ b/doc/paddle/api/paddle/fluid/layers/sequence_enumerate_cn.rst
+.. _cn_api_fluid_layers_sequence_enumerate:
+sequence_enumerate
+-------------------------------
+.. py:function:: paddle.fluid.layers.sequence_enumerate(input, win_size, pad_value=0, name=None)
+:api_attr: 声明式编程模式（静态图)
+枚举形状为 ``[d_1, 1]`` 的输入序列所有长度为 ``win_size`` 的子序列，生成一个形状为 ``[d_1, win_size]`` 的新序列，需要时以 ``pad_value`` 填充。
+注意,该OP的输入 ``input`` 只能是LodTensor。
+范例如下：
+::
+        给定输入 x：
+            x.lod =  [[0,            3,      5]]
+            x.data = [[1], [2], [3], [4], [5]]  
+            x.dims = [5, 1]
+        设置属性 win_size = 2  pad_value = 0
+        得到输出 out：
+            out.lod =  [[0,                     3,            5]]  
+            out.data = [[1, 2], [2, 3], [3, 0], [4, 5], [5, 0]]  
+            out.dims = [5, 2]
+参数：
+        - **input** （Variable）- 输入序列，形状为 ``[d_1, 1]`` ，lod level为1的LodTensor。数据类型支持int32，int64，float32或float64。
+        - **win_size** （int）- 子序列窗口大小。
+        - **pad_value** （int，可选）- 填充值，默认为0。
+        - **name** (str，可选) - 具体用法请参见 :ref:`api_guide_Name` ，一般无需设置，默认值为None。
+返回：      枚举序列，形状为 ``[d_1, win_size]`` ，lod_level为1的LoDTensor。数据类型与输入 ``input`` 一致。
+返回类型：   Variable
+**代码示例**
+..  code-block:: python
+      import paddle.fluid as fluid
+      x = fluid.layers.data(name='x',shape=[-1, 1], dtype='int32', lod_level=1)
+      out = fluid.layers.sequence_enumerate(input=x, win_size=3, pad_value=0)
--- a/doc/paddle/api/paddle/fluid/layers/sequence_expand_as_cn.rst
+++ b/doc/paddle/api/paddle/fluid/layers/sequence_expand_as_cn.rst
+.. _cn_api_fluid_layers_sequence_expand_as:
+sequence_expand_as
+-------------------------------
+.. py:function:: paddle.fluid.layers.sequence_expand_as(x, y, name=None)
+:api_attr: 声明式编程模式（静态图)
+Sequence Expand As Layer，该OP根据输入 ``y`` 的第0级lod对输入 ``x`` 进行扩展。当前实现要求 ``y`` 的lod层数（level）必须为1，且 ``x`` 的第一维必须和 ``y`` 的第0层lod大小相同，所以扩展后的LodTensor具有和 ``y`` 相同的lod。扩展结果与输入 ``x`` 的lod无关，所以无需考虑 ``x`` 的lod。
+注意，该OP的输入 ``x`` 可以是Tensor或LoDTensor， ``y`` 只能是LodTensor。
+范例解释如下：
+::
+    例1:
+    假设，有4个长度维1的序列[a]、[b]、[c]和[d]，现在要将其扩展为长度是3、3、1、1的序列[a][a][a]、[b][b][b]、[c]和[d]。
+    显然，扩展后的序列lod为[0, 3, 6, 7, 8]，则：
+    给定输入一维LoDTensor x
+        x.data = [[a], [b], [c], [d]]
+        x.dims = [4, 1]
+    和输入 y
+        y.lod = [[3, 3, 1, 1]]    #为了便于理解这里用基于长度lod表示
+    经过sequence_expand_as运算，得到输出1级LoDTensor out
+        out.lod =  [[0,            3,              6,  7,  8]]    #基于偏移的lod，等价于基于长度的[[3, 3, 1, 1]]
+        out.data = [[a], [a], [a], [b], [b], [b], [c], [d]]
+        out.dims = [8, 1]
+    可见，输出out将x扩展至和y具有相同的lod。
+::
+    例2：
+    设定与例1类似，给定输入一维LoDTensor x：
+        x.data = [[a, b], [c, d], [e, f]]
+        x.dims = [3, 2]
+    和输入 y：
+        y.lod = [[2, 1, 3]]    #为了便于理解这里用基于长度lod表示
+    输出为1级LoDTensor：
+        out.lod =  [[0,             2,     3,                    6]]    #基于偏移的lod，等价于基于长度的[[2, 1, 3]]
+        out.data = [[a, b], [a, b] [c, d], [e, f], [e, f], [e, f]]
+        out.dims = [6, 2]
+    可见，输出out将x扩展至和y具有相同的lod。
+参数：
+    - **x** (Variable) - 输入变量，维度为 :math:`[M, K]` 的二维Tensor或LoDTensor，第一维必须与输入 ``y`` 的第0层lod大小相同，且仅支持lod_level为1。数据类型支持int32，int64，float32或float64。
+    - **y** (Variable) - 输入变量，LoDTensor，lod level必须为1。
+    - **name** (str，可选) - 具体用法请参见 :ref:`api_guide_Name` ，一般无需设置，默认值为None。
+返回：扩展变量，维度为 :math:`[N, K]` 的二维LoDTensor，N由输入 ``y`` 的lod决定，且仅支持lod_level为1。数据类型与输入 ``x`` 一致。
+返回类型：Variable
+**代码示例**：
+.. code-block:: python
+    import paddle.fluid as fluid
+    import paddle.fluid.layers as layers
+    import numpy as np
+    x = fluid.data(name='x', shape=[1], dtype='float32')
+    y = fluid.data(name='y', shape=[1], dtype='float32', lod_level=1)
+    out = layers.sequence_expand_as(x=x, y=y)
+    exe = fluid.Executor(fluid.CPUPlace())
+    place = fluid.CPUPlace()
+    np_data = np.array([[1], [2], [3], [4]]).astype('float32')
+    x_lod_tensor = fluid.create_lod_tensor(np_data, [[2, 2]], place)
+    print(x_lod_tensor)
+    #lod: [[0, 2, 4]]
+    #    dim: 4, 1
+    #    layout: NCHW
+    #    dtype: float
+    #    data: [1 2 3 4]
+    y_lod_tensor = fluid.create_random_int_lodtensor([[3,3,1,1]], [1], 
+                                                    place, low=0, high=1)
+    print(y_lod_tensor)
+    #lod: [[0, 3, 6, 7, 8]]
+    #    dim: 8, 1
+    #    layout: NCHW
+    #    dtype: int64_t
+    #    data: [0 0 1 0 1 1 1 0]
+    out_main = exe.run(fluid.default_main_program(), 
+                      feed={'x': x_lod_tensor, 'y': y_lod_tensor}, 
+                      fetch_list=[out], return_numpy=False)
+    print(out_main[0])
+    #lod: [[0, 3, 6, 7, 8]]
+    #    dim: 8, 1
+    #    layout: NCHW
+    #    dtype: float
+    #    data: [1 1 1 2 2 2 3 4]
--- a/doc/paddle/api/paddle/fluid/layers/sequence_expand_cn.rst
+++ b/doc/paddle/api/paddle/fluid/layers/sequence_expand_cn.rst
+.. _cn_api_fluid_layers_sequence_expand:
+sequence_expand
+-------------------------------
+.. py:function:: paddle.fluid.layers.sequence_expand(x, y, ref_level=-1, name=None)
+:api_attr: 声明式编程模式（静态图)
+序列扩张层（Sequence Expand Layer)，根据输入 ``y`` 的第 ``ref_level`` 层lod对输入 ``x`` 进行扩展。 ``x`` 的lod level最多为1，若 ``x`` 的lod level为1，则 ``x`` 的lod大小必须与 ``y`` 的第 ``ref_level`` 层lod大小相等；若 ``x`` 的lod level为0，则 ``x`` 的第一维大小必须与 ``y`` 第 ``ref_level`` 层大小相等。 ``x`` 的秩最少为2，当 ``x`` 的秩大于2时，将被当作是一个二维张量处理。
+注意，该OP的输入 ``x`` 可以是Tensor或LodTensor， ``y`` 只能是LodTensor。
+范例解释如下：
+::
+    例1：
+    假设两个长度为2的序列[a][b]和[c][d]，欲将其扩展为4个长度为2的序列[a][b]、[a][b]、[c][d]、[c][d]。
+    序列[a][b]扩展2次，[c][d]扩展2次，扩展所需依据的lod为[2, 2]，则：
+    给定输入一维LoDTensor x
+      x.lod  = [[2,        2]]    #表示两个序列的长度为2，为了便于理解这里用基于长度lod表示
+      x.data = [[a], [b], [c], [d]]
+      x.dims = [4, 1]
+    和输入 y
+      y.lod = [[2,    2],     #第0层lod，指定按该层扩展，表示分别扩展2次，为了便于理解这里用基于长度lod表示
+               [3, 3, 1, 1]]  #第1层lod，注意，因为指定ref_level为0，所以这一层与运算无关
+    指定 ref_level = 0，依据y的第0层lod进行扩展，
+    经过sequence_expand，输出为1级LoDTensor out
+      out.lod =  [[0,        2,        4,        6,      8]]  #基于偏移的lod，等价于基于长度的[[2, 2, 2, 2]]
+      out.data = [[a], [b], [a], [b], [c], [d], [c], [d]]
+      out.dims = [8, 1]
+::
+    例2：
+    假设有3个长度维1的序列[a]、[b]、[c]，现在要将其扩展为长度是2、0、3的序列[a][a]、[c][c][c]。
+    显然，扩展后的序列lod为[2, 0, 3]，则：
+    给定输入一维LoDTensor x
+      x.data = [[a], [b], [c]]
+      x.dims = [3, 1]
+    和输入 y
+      y.lod = [[2, 0, 3]]
+    默认 ref_level = -1
+    经过sequence_expand，输出为1级LoDTensor out
+      out.data = [[a], [a], [c], [c], [c]]
+      out.dims = [5, 1]
+参数：
+    - **x** (Variable) - 输入变量，维度为 :math:`[M, K]` ，lod level至多1的二维Tensor或LoDTensor。数据类型支持int32，int64，float32或float64。
+    - **y** (Variable) - 输入变量，lod level至少为1的LoDTensor。数据类型不限。
+    - **ref_level** (int，可选) - 扩展 ``x`` 所依据的 ``y`` 的lod层。默认值-1，表示lod的最后一层。
+    - **name** (str，可选) - 具体用法请参见 :ref:`api_guide_Name` ，一般无需设置，默认值为None。
+返回：扩展变量，维度为 :math:`[N, K]` 的LoDTensor，N由输入 ``x`` 和 ``y`` 的lod共同决定。数据类型与输入 ``x`` 一致。
+返回类型：Variable
+**代码示例**：
+.. code-block:: python
+    import paddle.fluid as fluid
+    import paddle.fluid.layers as layers
+    import numpy as np
+    x = fluid.data(name='x', shape=[1], dtype='float32')
+    y = fluid.data(name='y', shape=[1],
+                 dtype='float32', lod_level=1)
+    out = layers.sequence_expand(x=x, y=y, ref_level=0)
+    exe = fluid.Executor(fluid.CPUPlace())
+    place = fluid.CPUPlace()
+    np_data = np.array([[1], [2], [3], [4]]).astype('float32')
+    x_lod_tensor = fluid.create_lod_tensor(np_data, [[2, 2]], place)
+    print(x_lod_tensor)
+    #lod: [[0, 2, 4]]
+    #    dim: 4, 1
+    #    layout: NCHW
+    #    dtype: float
+    #    data: [1 2 3 4]
+    y_lod_tensor = fluid.create_random_int_lodtensor([[2, 2], [3,3,1,1]], [1],
+                                                     place, low=0, high=1)
+    print(y_lod_tensor)
+    #lod: [[0, 2, 4][0, 3, 6, 7, 8]]
+    #    dim: 8, 1
+    #    layout: NCHW
+    #    dtype: int64_t
+    #    data: [0 0 1 1 1 1 1 0]
+    out_main = exe.run(fluid.default_main_program(), 
+                      feed={'x': x_lod_tensor, 'y': y_lod_tensor}, 
+                      fetch_list=[out], return_numpy=False)
+    print(out_main[0])
+    #lod: [[0, 2, 4, 6, 8]]
+    #    dim: 8, 1
+    #    layout: NCHW
+    #    dtype: float
+    #    data: [1 2 1 2 3 4 3 4]
--- a/doc/paddle/api/paddle/fluid/layers/sequence_first_step_cn.rst
+++ b/doc/paddle/api/paddle/fluid/layers/sequence_first_step_cn.rst
+.. _cn_api_fluid_layers_sequence_first_step:
+sequence_first_step
+-------------------------------
+.. py:function:: paddle.fluid.layers.sequence_first_step(input)
+:api_attr: 声明式编程模式（静态图)
+该OP **仅支持LoDTensor类型的输入** ，将对输入的LoDTensor，在最后一层lod_level上，选取其每个序列（sequence）的第一个时间步（time_step）的特征向量作为池化后的输出向量。
+::
+    Case 1:
+      input是1-level LoDTensor:
+        input.lod = [[0, 2, 5, 7]]
+        input.data = [[1.], [3.], [2.], [4.], [6.], [5.], [1.]]
+        input.shape = [7, 1]
+      输出为LoDTensor:
+        out.shape = [3, 1]
+        且 out.shape[0] == len(x.lod[-1]) == 3
+        out.data = [[1.], [2.], [5.]], where 1.=first(1., 3.), 2.=first(2., 4., 6.), 5.=first(5., 1.)
+    Case 2:
+      input是2-level的LoDTensor, 包含3个长度分别为[2, 0, 3]的序列，其中中间的0表示序列为空。
+      第一个长度为2的序列包含2个长度分别为[1, 2]的子序列；
+      最后一个长度为3的序列包含3个长度分别为[1, 0, 3]的子序列。
+          input.lod = [[0, 2, 2, 5], [0, 1, 3, 4, 4, 7]]
+          input.data = [[1.], [3.], [2.], [4.], [6.], [5.], [1.]]
+          input.shape = [7, 1]
+      将根据最后一层的lod信息[0, 1, 3, 4, 4, 7]进行池化操作，且pad_value = 0.0
+      输出为LoDTensor：
+          out.shape= [5, 1]
+          out.lod = [[0, 2, 2, 5]]
+          其中 out.shape[0] == len(x.lod[-1]) == 5
+          out.data = [[1.], [3.], [4.], [0.0], [6.]]
+          where 1.=first(1.), 3.=first(3., 2.), 4.=first(4.), 0.0 = pad_value, 6.=first(6., 5., 1.)
+参数：**input** (Variable)- 类型为LoDTensor的输入序列，仅支持lod_level不超过2的LoDTensor，数据类型为float32。
+返回：每个输入序列中的第一个step的特征向量组成的LoDTensor，数据类型为float32。
+返回类型：Variable
+**代码示例**：
+.. code-block:: python
+    import paddle.fluid as fluid
+    x = fluid.layers.data(name='x', shape=[7, 1], append_batch_size=False,
+                 dtype='float32', lod_level=1)
+    x_first_step = fluid.layers.sequence_first_step(input=x)
--- a/doc/paddle/api/paddle/fluid/layers/sequence_last_step_cn.rst
+++ b/doc/paddle/api/paddle/fluid/layers/sequence_last_step_cn.rst
+.. _cn_api_fluid_layers_sequence_last_step:
+sequence_last_step
+-------------------------------
+.. py:function:: paddle.fluid.layers.sequence_last_step(input)
+:api_attr: 声明式编程模式（静态图)
+该OP **仅支持LoDTensor类型的输入** ，将对输入的LoDTensor，在最后一层lod_level上，选取其每个序列（sequence）的最后一个时间步（time-step）的特征向量作为池化后的输出向量。
+::
+    Case 1:
+        input是1-level的LoDTensor:
+            input.lod = [[0, 2, 5, 7]]
+            input.data = [[1.], [3.], [2.], [4.], [6.], [5.], [1.]]
+            input.shape = [7, 1]
+        输出为LoDTensor:
+            out.shape = [3, 1]
+            且 out.shape[0] == len(x.lod[-1]) == 3
+            out.data = [[3.], [6.], [1.]], where 3.=last(1., 3.), 6.=last(2., 4., 6.), 1.=last(5., 1.)
+    Case 2:
+        input是2-level的LoDTensor, 包含3个长度分别为[2, 0, 3]的序列，其中中间的0表示序列为空。
+        第一个长度为2的序列包含2个长度分别为[1, 2]的子序列；
+        最后一个长度为3的序列包含3个长度分别为[1, 0, 3]的子序列。
+            input.lod = [[0, 2, 2, 5], [0, 1, 3, 4, 4, 7]]
+            input.data = [[1.], [3.], [2.], [4.], [6.], [5.], [1.]]
+            input.shape = [7, 1]
+        将根据最后一层的lod信息[0, 1, 3, 4, 4, 7]进行池化操作，且pad_value = 0.0
+        输出为LoDTensor：
+            out.shape= [5, 1]
+            out.lod = [[0, 2, 2, 5]]
+            其中 out.shape[0] == len(x.lod[-1]) == 5
+            out.data = [[1.], [2.], [4.], [0.0], [1.]]
+            where 1.=last(1.), 2.=last(3., 2.), 4.=last(4.), 0.0 = pad_value, 1=last(6., 5., 1.)
+参数：**input** (Variable)- 类型为LoDTensor的输入序列，仅支持lod_level不超过2的LoDTensor，数据类型为float32。
+返回：每个输入序列中的最后一步特征向量组成的LoDTensor，数据类型为float32。
+返回类型：Variable
+**代码示例**：
+.. code-block:: python
+    import paddle.fluid as fluid
+    x = fluid.layers.data(name='x', shape=[7, 1], append_batch_size=False,
+                 dtype='float32', lod_level=1)
+    x_last_step = fluid.layers.sequence_last_step(input=x)
--- a/doc/paddle/api/paddle/fluid/layers/sequence_mask_cn.rst
+++ b/doc/paddle/api/paddle/fluid/layers/sequence_mask_cn.rst
+.. _cn_api_fluid_layers_sequence_mask:
+sequence_mask
+-------------------------------
+.. py:function::  paddle.fluid.layers.sequence_mask(x, maxlen=None, dtype='int64', name=None)
+该层根据输入 ``x`` 和 ``maxlen`` 输出一个掩码，数据类型为 ``dtype`` 。
+假设 x 是一个形状为 ``[d_1, d_2，…, d_n]`` 的张量， 则输出 y 是一个形状为 ``[d_1, d_2，… ，d_n, maxlen]`` 的掩码，其中:
+.. math::
+  y(i_1, i_2,..., i_n, j) = (j < x(i_1, i_2,..., i_n))
+范例如下：
+::
+    给定输入：
+      x = [3, 1, 1, 0]  maxlen = 4
+    得到输出张量：
+      mask = [[1, 1, 1, 0],
+              [1, 0, 0, 0],
+              [1, 0, 0, 0],
+              [0, 0, 0, 0]]
+参数：
+  - **x** (Variable) - 输入张量，其元素是小于等于 ``maxlen`` 的整数，形状为 ``[d_1, d_2，…, d_n]`` 的Tensor或LoDTensor。
+  - **maxlen** (int，可选) - 序列的最大长度。默认为空，此时 ``maxlen`` 取 ``x`` 中所有元素的最大值。
+  - **dtype** (np.dtype|core.VarDesc.VarType|str，可选) - 输出的数据类型，默认为 ``int64`` 。
+  - **name** (str，可选) - 具体用法请参见 :ref:`api_guide_Name` ，一般无需设置，默认值为None。
+返回： mask张量，Tensor或LoDTensor，形状为 ``[d_1, d_2，… ，d_n, maxlen]`` ，数据类型由 ``dtype`` 指定，支持float32、float64、int32和int64，默认为int64。
+返回类型： Variable
+**代码示例**：
+.. code-block:: python
+    import paddle.fluid as fluid
+    import paddle.fluid.layers as layers
+    x = fluid.data(name='x', shape=[10], dtype='float32', lod_level=1)
+    mask = layers.sequence_mask(x=x)
--- a/doc/paddle/api/paddle/fluid/layers/sequence_pad_cn.rst
+++ b/doc/paddle/api/paddle/fluid/layers/sequence_pad_cn.rst
+.. _cn_api_fluid_layers_sequence_pad:
+sequence_pad
+-------------------------------
+.. py:function:: paddle.fluid.layers.sequence_pad(x,pad_value,maxlen=None,name=None)
+:api_attr: 声明式编程模式（静态图)
+序列填充操作符（Sequence Pad Operator）,该OP将同一batch中的序列填充到一个一致的长度（由 ``maxlen`` 指定）。填充的新元素的值具体由输入 ``pad_value`` 指定，并会添加到每一个序列的末尾，使得他们最终的长度保持一致。最后返回一个Python tuple ``(Out, Length)`` ，其中LodTensor ``Out`` 为填充后的序列，LodTensor ``Length`` 为填充前的原序列长度信息。
+注意，该OP的输入 ``x`` 只能是LodTensor。
+范例如下：
+::
+    例1:
+    给定输入1-level LoDTensor x:
+        x.lod = [[0,  2,   5]]    #输入的两个序列长度是2和3
+        x.data = [[a],[b],[c],[d],[e]]
+    和输入 pad_value:
+        pad_value.data = [0]
+    设置 maxlen = 4
+    得到得到tuple (Out, Length):
+        Out.data = [[[a],[b],[0],[0]],[[c],[d],[e],[0]]]
+        Length.data = [2, 3]      #原序列长度是2和3
+::
+    例2:
+    给定输入1-level LoDTensor x:
+        x.lod =  [[0,             2,                     5]]
+        x.data = [[a1,a2],[b1,b2],[c1,c2],[d1,d2],[e1,e2]]
+    和输入 pad_value:
+        pad_value.data = [0]
+    默认 maxlen = None, (根据x的形状，此例中实际长度为3)
+    得到得到tuple (Out, Length):
+        Out.data = [[[a1,a2],[b1,b2],[0,0]],[[c1,c2],[d1,d2],[e1,e2]]]
+        Length.data = [2， 3]
+::
+    例3:
+    给定输入1-level LoDTensor x:
+        x.lod =  [[0,             2,                     5]]
+        x.data = [[a1,a2],[b1,b2],[c1,c2],[d1,d2],[e1,e2]]
+    和输入 pad_value:
+        pad_value.data = [p1,p2]
+    默认 maxlen = None, (根据x的形状，此例中实际长度为3）
+    得到tuple (Out, Length):
+        Out.data = [[[a1,a2],[b1,b2],[p1,p2]],[[c1,c2],[d1,d2],[e1,e2]]]
+        Length.data = [2， 3]
+参数：
+    - **x** (Vairable) - 输入，维度为 ``[M, K]`` 的LoDTensor，仅支持lod_level为1。lod所描述的序列数量，作为要填充的batch_size。数据类型为int32，int64，float32或float64。
+    - **pad_value** (Variable) - 填充值，可以是标量或长度为 ``K`` 的一维Tensor。如果是标量，则自动广播为Tensor。数据类型需与 ``x`` 相同。
+    - **maxlen** (int，可选) - 填充序列的长度。默认为None，此时以序列中最长序列的长度为准，其他所有序列填充至该长度。当是某个特定的正整数，最大长度必须大于最长初始序列的长度。
+    - **name** (str，可选) - 具体用法请参见 :ref:`api_guide_Name` ，一般无需设置，默认值为None。
+返回：元素为两个LoDTensor的Python tuple。第一个元素为填充后的变量 ``Out`` ，形状为 ``[batch_size, maxlen, K]`` ，lod level为0的LoDTensor，数据类型与输入 ``x`` 相同。第二个元素为填充前的原序列长度信息 ``Length`` ，lod level为0的一维LoDTensor，长度等于batch_size，数据类型为int64。
+返回类型：tuple
+**代码示例**：
+.. code-block:: python
+    import paddle.fluid as fluid
+    import numpy
+    x = fluid.layers.data(name='y', shape=[10, 5],
+                     dtype='float32', lod_level=1)
+    pad_value = fluid.layers.assign(
+        input=numpy.array([0.0], dtype=numpy.float32))
+    out = fluid.layers.sequence_pad(x=x, pad_value=pad_value)
--- a/doc/paddle/api/paddle/fluid/layers/sequence_pool_cn.rst
+++ b/doc/paddle/api/paddle/fluid/layers/sequence_pool_cn.rst
+.. _cn_api_fluid_layers_sequence_pool:
+sequence_pool
+-------------------------------
+.. py:function:: paddle.fluid.layers.sequence_pool(input, pool_type, is_test=False, pad_value=0.0)
+:api_attr: 声明式编程模式（静态图)
+**注意：该OP的输入只能是LoDTensor，如果您需要处理的输入是Tensor类型，请使用pool2d函数（fluid.layers.** :ref:`cn_api_fluid_layers_pool2d` **）。**
+该OP **仅支持LoDTensor类型的输入** ，将对输入的LoDTensor进行指定方式的池化（pooling）操作。通过指定pool_type参数，将输入的每个序列（sequence）在最后一层lod_level上或时间步（time-step）上对特征进行诸如sum、average、sqrt等池化操作。
+支持六种pool_type:
+- **average**: :math:`Out[i] = \frac{\sum_{i}X_{i}}{N}`
+- **sum**: :math:`Out[i] = \sum _{j}X_{ij}`
+- **sqrt**: :math:`Out[i] = \frac{ \sum _{j}X_{ij}}{\sqrt{len(\sqrt{X_{i}})}}`
+- **max**: :math:`Out[i] = max(X_{i})`
+- **last**: :math:`Out[i] = X_{N\_i}`
+- **first**: :math:`Out[i] = X_{0}`
+其中 ``N_i`` 为待池化第i个输入序列的长度。
+::
+    Case 1:
+        input是1-level的LoDTensor, 且pad_value = 0.0:
+            input.lod = [[0, 2, 5, 7, 7]]
+            input.data = [[1.], [3.], [2.], [4.], [6.], [5.], [1.]]
+            input.shape = [7, 1]
+        输出为LoDTensor：
+            out.shape = [4, 1]
+            其中 out.shape[0] == len(x.lod[-1]) == 4
+        对于不同的pool_type：
+            average: out.data = [[2.], [4.], [3.], [0.0]], where 2.=(1. + 3.)/2, 4.=(2. + 4. + 6.)/3, 3.=(5. + 1.)/2
+            sum    : out.data = [[4.], [12.], [6.], [0.0]], where 4.=1. + 3., 12.=2. + 4. + 6., 6.=5. + 1.
+            sqrt   : out.data = [[2.82], [6.93], [4.24], [0.0]], where 2.82=(1. + 3.)/sqrt(2), 6.93=(2. + 4. + 6.)/sqrt(3), 4.24=(5. + 1.)/sqrt(2)
+            max    : out.data = [[3.], [6.], [5.], [0.0]], where 3.=max(1., 3.), 6.=max(2., 4., 6.), 5.=max(5., 1.)
+            last   : out.data = [[3.], [6.], [1.], [0.0]], where 3.=last(1., 3.), 6.=last(2., 4., 6.), 1.=last(5., 1.)
+            first  : out.data = [[1.], [2.], [5.], [0.0]], where 1.=first(1., 3.), 2.=first(2., 4., 6.), 5.=first(5., 1.)
+        上述out.data中的最后一个[0.0]均为填充的数据。
+    Case 2:
+        input是2-level的LoDTensor, 包含3个长度分别为[2, 0, 3]的序列，其中中间的0表示序列为空。
+        第一个长度为2的序列包含2个长度分别为[1, 2]的子序列；
+        最后一个长度为3的序列包含3个长度分别为[1, 0, 3]的子序列。
+            input.lod = [[0, 2, 2, 5], [0, 1, 3, 4, 4, 7]]
+            input.data = [[1.], [3.], [2.], [4.], [6.], [5.], [1.]]
+            input.shape = [7, 1]
+        以pool_type取值为sum为例，将根据最后一层的lod信息[0, 1, 3, 4, 4, 7]进行池化操作，且pad_value = 0.0
+        输出为LoDTensor：
+            out.shape= [5, 1]
+            out.lod = [[0, 2, 2, 5]]
+            其中 out.shape[0] == len(x.lod[-1]) == 5
+            sum: out.data = [[1.], [5.], [4.], [0.0], [12.]]
+            where 1.=1., 5.=3. + 2., 4.=4., 0.0=pad_value, 12.=6. + 5. + 1.
+参数：
+    - **input** (Variable) - 类型为LoDTensor的输入序列，仅支持lod_level不超过2的LoDTensor，数据类型为float32。
+    - **pool_type** (str) - 池化类型，支持average，sum，sqrt，max，last和first池化操作。
+    - **is_test** (bool) - 仅在pool_type取值为max时生效。当is_test为False时，则在池化操作过程中会创建maxIndex临时Tenosr，以记录最大特征值对应的索引信息，用于训练阶段的反向梯度计算。默认为False。
+    - **pad_value** (float) - 用于填充输入序列为空时的池化结果，默认为0.0。
+返回：经过指定类型池化后的LoDTensor，数据类型为float32。
+返回类型：Variable
+**代码示例**:
+.. code-block:: python
+    import paddle.fluid as fluid
+    x = fluid.layers.data(name='x', shape=[7, 1], append_batch_size=False,
+                 dtype='float32', lod_level=1)
+    avg_x = fluid.layers.sequence_pool(input=x, pool_type='average')
+    sum_x = fluid.layers.sequence_pool(input=x, pool_type='sum')
+    sqrt_x = fluid.layers.sequence_pool(input=x, pool_type='sqrt')
+    max_x = fluid.layers.sequence_pool(input=x, pool_type='max')
+    last_x = fluid.layers.sequence_pool(input=x, pool_type='last')
+    first_x = fluid.layers.sequence_pool(input=x, pool_type='first')
--- a/doc/paddle/api/paddle/fluid/layers/sequence_reshape_cn.rst
+++ b/doc/paddle/api/paddle/fluid/layers/sequence_reshape_cn.rst
+.. _cn_api_fluid_layers_sequence_reshape:
+sequence_reshape
+-------------------------------
+.. py:function:: paddle.fluid.layers.sequence_reshape(input, new_dim)
+:api_attr: 声明式编程模式（静态图)
+**注意：该OP的输入只能是LoDTensor，如果您需要处理的输入是Tensor类型，请使用reshape函数（fluid.layers.** :ref:`cn_api_fluid_layers_reshape` **）。**
+**该OP仅支持LoDTensor** ，在指定 ``new_dim`` 参数下，通过序列原始长度、和原始shape计算出新的shape，以输出包含新维度（new_dim）下的LoDTensor。目前仅支持1-level LoDTensor，请确保(原长度*原维数)可以除以新的维数，且每个序列没有余数。
+::
+    input是一个LoDTensor:
+        input.lod  = [[0, 2, 6]]
+        input.data = [[1,  2], [3,  4],
+                      [5,  6], [7,  8],
+                      [9, 10], [11, 12]]
+        input.shape = [6, 2]
+    设置 new_dim = 4
+    输出为LoDTensor:
+        out.lod  = [[0, 1, 3]]
+        out.data = [[1,  2,  3,  4],
+                    [5,  6,  7,  8],
+                    [9, 10, 11, 12]]
+        out.shape = [3, 4]
+参数：
+    - **input** (Variable) - 维度为 :math:`[M, K]` 的二维LoDTensor，且仅支持lod_level为1。数据类型为int32，int64，float32或float64。
+    - **new_dim** (int)- 指定reshape后的新维度，即对输入LoDTensor重新reshape后的新维度。
+返回：根据新维度重新reshape后的LoDTensor，数据类型和输入一致。
+返回类型：Variable
+**代码示例**：
+.. code-block:: python
+    import paddle.fluid as fluid
+    x = fluid.layers.data(name='x', shape=[2, 6], append_batch_size=False, dtype='float32', lod_level=1)
+    x_reshaped = fluid.layers.sequence_reshape(input=x, new_dim=4)
--- a/doc/paddle/api/paddle/fluid/layers/sequence_reverse_cn.rst
+++ b/doc/paddle/api/paddle/fluid/layers/sequence_reverse_cn.rst
+.. _cn_api_fluid_layers_sequence_reverse:
+sequence_reverse
+-------------------------------
+.. py:function:: paddle.fluid.layers.sequence_reverse(x, name=None)
+**注意：该OP的输入只能是LoDTensor，如果您需要处理的输入是Tensor类型，请使用reverse函数（fluid.layers.** :ref:`cn_api_fluid_layers_reverse` **）。**
+**该OP仅支持LoDTensor** ，对于输入的LoDTensor，在每个序列（sequence）上进行反转。目前仅支持对LoD层次(LoD level)为1的LoDTensor进行反转。该OP在构建反向 :ref:`cn_api_fluid_layers_DynamicRNN` 网络时十分有用。
+::
+    输入x是一个LoDTensor:
+        x.lod  = [[0, 2, 5]]
+        x.data = [[1,  2,  3,  4],
+                  [5,  6,  7,  8],
+                  [9, 10, 11, 12],
+                  [13,14, 15, 16],
+                  [17,18, 19, 20]]
+        x.shape = [5, 4]
+    输出out与x具有同样的shape和LoD信息：
+        out.lod  = [[0, 2, 5]]
+        out.data = [[5,  6,  7,  8],
+                    [1,  2,  3,  4],
+                    [17,18, 19, 20],
+                    [13,14, 15, 16],
+                    [9, 10, 11, 12]]
+        out.shape = [5, 4]
+参数:
+  - **x** (Variable) – 输入是LoD level为1的LoDTensor。目前仅支持对LoD层次(LoD level)为1的LoDTensor进行反转。数据类型为float32，float64，int8，int32或int64。
+  - **name** (str，可选) – 具体用法请参见 :ref:`api_guide_Name` ，一般无需设置，默认值为None。
+返回：输出在每个序列上反转后的LoDTensor，数据类型和输入类型一致。
+返回类型：Variable
+**代码示例**：
+.. code-block:: python
+    import paddle.fluid as fluid
+    x = fluid.layers.data(name='x', shape=[2, 6], dtype='float32')
+    x_reversed = fluid.layers.sequence_reverse(x)
--- a/doc/paddle/api/paddle/fluid/layers/sequence_scatter_cn.rst
+++ b/doc/paddle/api/paddle/fluid/layers/sequence_scatter_cn.rst
+.. _cn_api_fluid_layers_sequence_scatter:
+sequence_scatter
+-------------------------------
+.. py:function:: paddle.fluid.layers.sequence_scatter(input, index, updates, name=None)
+:api_attr: 声明式编程模式（静态图)
+.. note::
+    该OP的输入index，updates必须是LoDTensor。
+该OP根据index提供的位置将updates中的信息更新到输出中。
+该OP先使用input初始化output，然后通过output[instance_index][index[pos]] += updates[pos]方式，将updates的信息更新到output中，其中instance_idx是pos对应的在batch中第k个样本。
+output[i][j]的值取决于能否在index中第i+1个区间中找到对应的数据j，若能找到out[i][j] = input[i][j] + update[m][n]，否则 out[i][j] = input[i][j]。
+例如，在下面样例中，index的lod信息分为了3个区间。其中，out[0][0]能在index中第1个区间中找到对应数据0，所以，使用updates对应位置的值进行更新，out[0][0] = input[0][0]+updates[0][0]。out[2][1]不能在index中第3个区间找到对应数据1，所以，它等于输入对应位置的值，out[2][1] = input[2][1]。
+**样例**:
+::
+    输入：
+    input.data = [[1.0, 1.0, 1.0, 1.0, 1.0, 1.0],
+                  [1.0, 1.0, 1.0, 1.0, 1.0, 1.0],
+                  [1.0, 1.0, 1.0, 1.0, 1.0, 1.0]]
+    input.dims = [3, 6]
+    index.data = [[0], [1], [2], [5], [4], [3], [2], [1], [3], [2], [5], [4]]
+    index.lod =  [[0,        3,                       8,                 12]]
+    updates.data = [[0.3], [0.3], [0.4], [0.1], [0.2], [0.3], [0.4], [0.0], [0.2], [0.3], [0.1], [0.4]]
+    updates.lod =  [[  0,            3,                                 8,                         12]]
+    输出：
+    out.data = [[1.3, 1.3, 1.4, 1.0, 1.0, 1.0],
+                [1.0, 1.0, 1.4, 1.3, 1.2, 1.1],
+                [1.0, 1.0, 1.3, 1.2, 1.4, 1.1]]
+    out.dims = X.dims = [3, 6]
+参数：
+      - **input** (Variable) - 维度为 :math:`[N, k_1 ... k_n]` 的Tensor， 支持的数据类型：float32，float64，int32，int64。
+      - **index** (Variable) - 包含index信息的LoDTensor，lod level必须等于1，支持的数据类型：int32，int64。
+      - **updates** (Variable) - 包含updates信息的LoDTensor，lod level和index一致，数据类型与input的数据类型一致。支持的数据类型：float32，float64，int32，int64。 
+      - **name**  (str，可选) – 具体用法请参见 :ref:`api_guide_Name` ，一般无需设置，默认值为None。
+返回：在input的基础上使用updates进行更新后得到的Tensor，它与input有相同的维度和数据类型。
+返回类型：Variable
+**代码示例**:
+..  code-block:: python
+    import paddle.fluid as fluid
+    import paddle.fluid.layers as layers
+    input = fluid.data( name="x", shape=[3, 6], dtype='float32' )
+    index = fluid.data( name='index', shape=[12, 1], dtype='int64', lod_level=1)
+    updates = fluid.data( name='updates', shape=[12, 1], dtype='float32', lod_level=1)
+    output = fluid.layers.sequence_scatter(input, index, updates)
--- a/doc/paddle/api/paddle/fluid/layers/sequence_slice_cn.rst
+++ b/doc/paddle/api/paddle/fluid/layers/sequence_slice_cn.rst
+.. _cn_api_fluid_layers_sequence_slice:
+sequence_slice
+-------------------------------
+.. py:function:: paddle.fluid.layers.sequence_slice(input, offset, length, name=None)
+:api_attr: 声明式编程模式（静态图)
+**实现Sequence Slice(序列切片)运算**
+**该OP输入只能是LoDTensor, 如果您需要处理的是Tensor类型，请使用 :ref:`cn_api_fluid_layers_slice` 。**
+该层从给定序列中截取子序列。截取依据为所给的开始 ``offset`` （偏移量） 和子序列长 ``length`` 。
+::
+    输入变量：
+        (1) input (LoDTensor):
+                input.data = [[a1, a2], [b1, b2], [c1, c2], [d1, d2], [e1, e2]],
+                input.lod = [[3, 2]],
+                input.dims = (5, 2),
+        (2) offset (Variable):
+                offset.data = [[0], [1]]
+        (3) length (Variable):
+                length.data = [[2], [1]]
+        (4) name (str|None)
+    输出变量为LoDTensor：
+        out.data = [[a1, a2], [b1, b2], [e1, e2]],
+        out.lod = [[2, 1]],
+        out.dims = (3, 2).
+.. 注意::
+   ``input`` ， ``offset`` ， ``length`` 的第一维大小应相同。
+   ``offset`` 从0开始。
+参数:
+  - **input** (Variable) – 输入变量，类型为LoDTensor，承载着完整的序列。数据类型为float32，float64，int32或int64。
+  - **offset** (Variable) – 指定每个序列切片的起始索引，数据类型为int32或int64。
+  - **length** (Variable) – 指定每个子序列的长度，数据类型为int32或int64。
+  - **name**  (str，可选) – 具体用法请参见 :ref:`api_guide_Name` ，一般无需设置，默认值为None。
+返回：Variable(LoDTensor) 序列切片运算结果
+返回类型：变量(Variable), 数据类型与 ``input`` 一致
+**代码示例**
+..  code-block:: python
+  import paddle.fluid as fluid
+  import numpy as np
+  seqs = fluid.layers.data(name='x', shape=[10, 5],
+       dtype='float32', lod_level=1)
+  offset = fluid.layers.assign(input=np.array([[0, 1]]).astype("int32"))
+  length = fluid.layers.assign(input=np.array([[2, 1]]).astype("int32"))
+  subseqs = fluid.layers.sequence_slice(input=seqs, offset=offset,
+                length=length)
--- a/doc/paddle/api/paddle/fluid/layers/sequence_softmax_cn.rst
+++ b/doc/paddle/api/paddle/fluid/layers/sequence_softmax_cn.rst
+.. _cn_api_fluid_layers_sequence_softmax:
+sequence_softmax
+-------------------------------
+.. py:function:: paddle.fluid.layers.sequence_softmax(input, use_cudnn=False, name=None)
+:api_attr: 声明式编程模式（静态图)
+.. note::
+    该OP的输入只能是LoDTensor，如果要处理的输入是Tensor类型，请使用 :ref:`cn_api_fluid_layers_softmax`
+该OP根据LoD信息将输入的第0维度进行划分，在划分的每一个区间内部进行运算。
+对第i个区间内的元素的计算公式如下：
+.. math::
+    Out\left ( X[lod[i]:lod[i+1]],: \right ) = \frac{exp(X[lod[i]:lod[i+1],:])}{\sum (exp(X[lod[i]:lod[i+1],:]))}
+输入Tensor的维度可为 :math:`[N，1]` 或者 :math:`[N]` ，推荐使用 :math:`[N]` 。
+例如，对有6个样本的batch，每个样本的长度为3，2，4，1，2，3，其lod信息为[[0, 3, 5, 9, 10, 12, 15]]，根据lod信息将第0维度划分为6份，在 :math:`X[0:3,:],X[3:5,:],X[5:9,:],X[9:10,:],X[10:12,:],X[12:15,:]`  中进行softmax运算。
+::
+     示例：
+             给定：
+                   input.data = [0.7, 1, 0.6,
+                                 1.5, 1.1,
+                                 1.2, 0.2, 0.6, 1.9,
+                                 3.1,
+                                 2.5, 0.8,
+                                 0.1, 2.4, 1.3]
+                   input.lod = [[0, 3, 5, 9, 10, 12, 15]]
+              则：
+                   output.data = [0.30724832, 0.41474187, 0.2780098,
+                                  0.59868765, 0.40131235,
+                                  0.2544242, 0.09359743, 0.13963096, 0.5123474, 
+                                  1.,
+                                  0.84553474, 0.15446526,
+                                  0.06995796, 0.69777346, 0.23226859]
+                   output.lod = [[0, 3, 5, 9, 10, 12, 15]] 
+参数：
+    - **input** (Variable) - 维度为 :math:`[N, 1]` 或者 :math:`[N]` 的LoDTensor，推荐使用 :math:`[N]` 。支持的数据类型：float32，float64。
+    - **use_cudnn** (bool，可选) - 是否用cudnn核，仅当安装cudnn版本的paddle库且使用gpu训练或推理的时候生效。支持的数据类型：bool型。默认值为False。
+    - **name**  (str，可选) – 具体用法请参见 :ref:`api_guide_Name` ，一般无需设置，默认值为None。
+返回：根据区间计算softmax之后的LoDTensor，其维度与input的维度一致，数据类型与input的数据类型一致。
+返回类型：Variable
+**代码示例**：
+.. code-block:: python
+    import paddle.fluid as fluid
+    x = fluid.data(name='x', shape=[7, 1],
+                 dtype='float32', lod_level=1)
+    x_sequence_softmax = fluid.layers.sequence_softmax(input=x)
+    y = fluid.data(name='y', shape=[7],
+                 dtype='float32', lod_level=1)
+    y_sequence_softmax = fluid.layers.sequence_softmax(input=y)
--- a/doc/paddle/api/paddle/fluid/layers/sequence_unpad_cn.rst
+++ b/doc/paddle/api/paddle/fluid/layers/sequence_unpad_cn.rst
+.. _cn_api_fluid_layers_sequence_unpad:
+sequence_unpad
+-------------------------------
+.. py:function:: paddle.fluid.layers.sequence_unpad(x, length, name=None)
+:api_attr: 声明式编程模式（静态图)
+.. note::
+    该OP的输入为Tensor，输出为LoDTensor。该OP用于移除填充元素，与之对应，还存在进行数据填充的OP sequence_pad，详情见： :ref:`cn_api_fluid_layers_sequence_pad`
+该OP根据length的信息，将input中padding（填充）元素移除，并且返回一个LoDTensor。
+::
+    示例：
+    给定输入变量 ``x`` :
+        x.data = [[ 1.0,  2.0,  3.0,  4.0,  5.0],
+                  [ 6.0,  7.0,  8.0,  9.0, 10.0],
+                  [11.0, 12.0, 13.0, 14.0, 15.0]],
+    其中包含 3 个被填充到长度为5的序列，实际长度由输入变量 ``length`` 指明，其中，x的维度为[3,4]，length维度为[3]，length的第0维与x的第0维一致：
+        length.data = [2, 3, 4],
+    则去填充（unpad）后的输出变量为：
+        out.data = [[1.0, 2.0, 6.0, 7.0, 8.0, 11.0, 12.0, 13.0, 14.0]]
+        out.lod = [[0, 2, 5, 9]]
+参数:
+  - **x** (Variable) – 包含填充元素的Tensor，其维度大小不能小于2，支持的数据类型：float32, float64,int32, int64。
+  - **length** (Variable) – 存储每个样本实际长度信息的1D Tesnor，该Tensor维度的第0维必须与x维度的第0维一致。支持的数据类型：int64。
+  - **name**  (str，可选) – 具体用法请参见 :ref:`api_guide_Name` ，一般无需设置，默认值为None。
+返回：将输入的填充元素移除，并返回一个LoDTensor，其递归序列长度与length参数的信息一致，其数据类型和输入一致。
+返回类型：Variable
+**代码示例**
+..  code-block:: python
+    import paddle.fluid as fluid
+    import numpy
+    # example 1:
+    x = fluid.data(name='x', shape=[10, 5], dtype='float32')
+    len = fluid.data(name='length', shape=[10], dtype='int64')
+    out = fluid.layers.sequence_unpad(x=x, length=len)
+    # example 2:
+    # 使用sequence_pad填充数据
+    input = fluid.data(name='input', shape=[10, 5], dtype='float32', lod_level=1)
+    pad_value = fluid.layers.assign(input=numpy.array([0.0], dtype=numpy.float32))
+    pad_data, len = fluid.layers.sequence_pad(x=input, pad_value=pad_value)
+    #使用sequence_unpad移除填充数据
+    unpad_data = fluid.layers.sequence_unpad(x=pad_data, length=len)
--- a/doc/paddle/api/paddle/shape_cn.rst
+++ b/doc/paddle/api/paddle/shape_cn.rst
--- a/doc/paddle/api/paddle/shard_index_cn.rst
+++ b/doc/paddle/api/paddle/shard_index_cn.rst
--- a/doc/paddle/api/paddle/nn/functional/shuffle_channel_cn.rst
+++ b/doc/paddle/api/paddle/nn/functional/shuffle_channel_cn.rst
--- a/doc/paddle/api/paddle/nn/functional/sigmoid_cross_entropy_with_logits_cn.rst
+++ b/doc/paddle/api/paddle/nn/functional/sigmoid_cross_entropy_with_logits_cn.rst
--- a/doc/paddle/api/paddle/nn/functional/sigmoid_focal_loss_cn.rst
+++ b/doc/paddle/api/paddle/nn/functional/sigmoid_focal_loss_cn.rst
--- a/doc/paddle/api/paddle/sign_cn.rst
+++ b/doc/paddle/api/paddle/sign_cn.rst
--- a/doc/paddle/api/paddle/nn/functional/similarity_focus_cn.rst
+++ b/doc/paddle/api/paddle/nn/functional/similarity_focus_cn.rst
--- a/doc/paddle/api/paddle/sin_cn.rst
+++ b/doc/paddle/api/paddle/sin_cn.rst
--- a/doc/paddle/api/paddle/fluid/layers/size_cn.rst
+++ b/doc/paddle/api/paddle/fluid/layers/size_cn.rst
+.. _cn_api_fluid_layers_size:
+size
+-------------------------------
+.. py:function:: paddle.fluid.layers.size(input)
+返回张量的单元数量，是一个shape为[1]的int64的张量。
+参数:
+    - **input** （Variable）- 输入变量
+返回：(Variable)。
+**代码示例**：
+.. code-block:: python
+        import paddle.fluid.layers as layers
+        input = layers.data(
+            name="input", shape=[3, 100], dtype="float32", append_batch_size=False)
+        rank = layers.size(input) # 300
--- a/doc/paddle/api/paddle/slice_cn.rst
+++ b/doc/paddle/api/paddle/slice_cn.rst
--- a/doc/paddle/api/paddle/nn/functional/smooth_l1_cn.rst
+++ b/doc/paddle/api/paddle/nn/functional/smooth_l1_cn.rst
--- a/doc/paddle/api/paddle/nn/functional/soft_relu_cn.rst
+++ b/doc/paddle/api/paddle/nn/functional/soft_relu_cn.rst
--- a/doc/paddle/api/paddle/fluid/layers/softmax_cn.rst
+++ b/doc/paddle/api/paddle/fluid/layers/softmax_cn.rst
+.. _cn_api_fluid_layers_softmax:
+softmax
+-------------------------------
+.. py:function:: paddle.fluid.layers.softmax(input, use_cudnn=False, name=None, axis=-1)
+:alias_main: paddle.nn.functional.softmax
+:alias: paddle.nn.functional.softmax,paddle.nn.functional.activation.softmax
+:old_api: paddle.fluid.layers.softmax
+该OP实现了softmax层。OP的计算过程如下：
+步骤1：输入 ``input`` 的 ``axis`` 维会被置换到最后一维；
+步骤2：将输入 ``Tensor`` 在逻辑上变换为二维矩阵。二维矩阵第一维（列长度）是输入除最后一维之外的其他维度值的乘积，第二维（行长度）和输入 ``axis`` 维的长度相同；对于矩阵的每一行，softmax操作对其进行重新缩放，使得该行的每个元素在 \[0,1\] 范围内，并且总和为1；
+步骤3：softmax操作执行完成后，执行步骤1和步骤2的逆运算，将二维矩阵恢复至和输入 ``input`` 相同的维度。
+上述步骤2中softmax操作计算过程如下：
+    - 对于二维矩阵的每一行，计算K维向量（K是输入第 ``axis`` 维的长度）中指定位置的指数值和全部位置指数值的和。
+    - 指定位置指数值与全部位置指数值之和的比值就是softmax操作的输出。
+对于二维矩阵中的第i行和第j列有：
+.. math::
+    Out[i,j] = \frac{exp(X[i,j])}{\sum_j exp(X[i,j])}
+- 示例1（矩阵一共有三维。axis = -1，表示沿着最后一维（即第三维）做softmax操作）
+.. code-block:: python
+  输入
+    X.shape = [2, 3, 4] 
+    X.data = [[[2.0, 3.0, 4.0, 5.0],
+               [3.0, 4.0, 5.0, 6.0],
+               [7.0, 8.0, 8.0, 9.0]],
+              [[1.0, 2.0, 3.0, 4.0],
+               [5.0, 6.0, 7.0, 8.0],
+               [6.0, 7.0, 8.0, 9.0]]]
+    axis = -1
+  输出
+    Out.shape = [2, 3, 4]
+    Out.data = [[[0.0320586 , 0.08714432, 0.23688282, 0.64391426],
+                 [0.0320586 , 0.08714432, 0.23688282, 0.64391426],
+                 [0.07232949, 0.19661193, 0.19661193, 0.53444665]],
+                [[0.0320586 , 0.08714432, 0.23688282, 0.64391426],
+                 [0.0320586 , 0.08714432, 0.23688282, 0.64391426],
+                 [0.0320586 , 0.08714432, 0.23688282, 0.64391426]]]
+- 示例2（矩阵一共有三维。axis = 1，表示沿着第二维做softmax操作）
+.. code-block:: python
+  输入
+    X.shape = [2, 3, 4] 
+    X.data = [[[2.0, 3.0, 4.0, 5.0],
+               [3.0, 4.0, 5.0, 6.0],
+               [7.0, 8.0, 8.0, 9.0]],
+              [[1.0, 2.0, 3.0, 4.0],
+               [5.0, 6.0, 7.0, 8.0],
+               [6.0, 7.0, 8.0, 9.0]]]
+    axis = 1
+  输出
+    Out.shape = [2, 3, 4]
+    Out.data = [[[0.00657326, 0.00657326, 0.01714783, 0.01714783],
+                 [0.01786798, 0.01786798, 0.04661262, 0.04661262],
+                 [0.97555875, 0.97555875, 0.93623955, 0.93623955]],
+                [[0.00490169, 0.00490169, 0.00490169, 0.00490169],
+                 [0.26762315, 0.26762315, 0.26762315, 0.26762315],
+                 [0.72747516, 0.72747516, 0.72747516, 0.72747516]]] 
+参数：
+    - **input** (Variable) - 任意维度的多维 ``Tensor`` ，数据类型为float32或float64。
+    - **use_cudnn** (bool, 可选) - 指示是否用cudnn库。当 ``use_cudnn`` 为True时，在安装GPU版本Paddle并且本机安装cudnn库的前提下，使用GPU训练或推理时才有效。默认值：False。
+    - **name** (str, 可选) - 具体用法请参见 :ref:`api_guide_Name` ，一般无需设置，默认值为None。
+    - **axis** (int, 可选) - 指示进行softmax计算的维度索引，其范围应为 :math:`[-1，rank-1]` ，其中rank是输入变量的秩。默认值：-1（表示对最后一维做softmax操作）。
+返回：表示softmax操作结果的 ``Tensor`` ，数据类型和 ``input`` 一致，返回维度和 ``input`` 一致。
+返回类型：Variable
+**代码示例**
+.. code-block:: python
+    import paddle.fluid as fluid
+    import numpy as np
+    data = fluid.layers.data(name="input", shape=[-1, 3],dtype="float32")
+    result = fluid.layers.softmax(data,axis=1)
+    place = fluid.CPUPlace()
+    exe = fluid.Executor(place)
+    exe.run(fluid.default_startup_program())
+    x = np.random.rand(3, 3).astype("float32")
+    output= exe.run(feed={"input": x},
+                     fetch_list=[result[0]])
+    print(output)
--- a/doc/paddle/api/paddle/nn/functional/softmax_with_cross_entropy_cn.rst
+++ b/doc/paddle/api/paddle/nn/functional/softmax_with_cross_entropy_cn.rst
--- a/doc/paddle/api/paddle/fluid/layers/softplus_cn.rst
+++ b/doc/paddle/api/paddle/fluid/layers/softplus_cn.rst
+.. _cn_api_fluid_layers_softplus:
+softplus
+-------------------------------
+.. py:function:: paddle.fluid.layers.softplus(x,name=None)
+:alias_main: paddle.nn.functional.softplus
+:alias: paddle.nn.functional.softplus,paddle.nn.functional.activation.softplus
+:old_api: paddle.fluid.layers.softplus
+softplus激活函数
+.. math::
+    out = \ln(1 + e^{x})
+参数：
+    - **x** (Variable) - 张量（Tensor）
+    - **name** (str|None) - 该层名称（可选）。若设为None，则自动为该层命名。
+返回: 张量(Tensor)
+返回类型: 变量(Variable)
+**代码示例**：
+.. code-block:: python
+    import paddle.fluid as fluid
+    data = fluid.layers.data(name="input", shape=[32, 784])
+    result = fluid.layers.softplus(data)
--- a/doc/paddle/api/paddle/fluid/layers/softshrink_cn.rst
+++ b/doc/paddle/api/paddle/fluid/layers/softshrink_cn.rst
+.. _cn_api_fluid_layers_softshrink:
+softshrink
+-------------------------------
+.. py:function:: paddle.fluid.layers.softshrink(x, alpha=None)
+:alias_main: paddle.nn.functional.softshrink
+:alias: paddle.nn.functional.softshrink,paddle.nn.functional.activation.softshrink
+:old_api: paddle.fluid.layers.softshrink
+Softshrink激活函数
+.. math::
+    out = \begin{cases}
+        x - \alpha, \text{if } x > \alpha \\
+        x + \alpha, \text{if } x < -\alpha \\
+        0,  \text{otherwise}
+        \end{cases}
+参数：
+    - **x** (Variable0 - 张量（Tensor）
+    - **alpha** (float) - 上面公式中alpha的值
+返回: 张量(Tensor)
+返回类型: 变量(Variable)
+**代码示例**：
+.. code-block:: python
+    import paddle.fluid as fluid
+    data = fluid.layers.data(name="input", shape=[32, 784])
+    result = fluid.layers.softshrink(data)
--- a/doc/paddle/api/paddle/fluid/layers/softsign_cn.rst
+++ b/doc/paddle/api/paddle/fluid/layers/softsign_cn.rst
+.. _cn_api_fluid_layers_softsign:
+softsign
+-------------------------------
+.. py:function:: paddle.fluid.layers.softsign(x,name=None)
+:alias_main: paddle.nn.functional.softsign
+:alias: paddle.nn.functional.softsign,paddle.nn.functional.activation.softsign
+:old_api: paddle.fluid.layers.softsign
+softsign激活函数
+.. math::
+    out = \frac{x}{1 + |x|}
+参数：
+    - **x** (Variable) - 张量（Tensor）
+    - **name** (str|None) - 该层名称（可选）。若设为None，则自动为该层命名。
+返回: 张量(Tensor)
+返回类型: 变量(Variable)
+**代码示例**：
+.. code-block:: python
+    import paddle.fluid as fluid
+    data = fluid.layers.data(name="input", shape=[32, 784])
+    result = fluid.layers.softsign(data)
--- a/doc/paddle/api/paddle/nn/functional/space_to_depth_cn.rst
+++ b/doc/paddle/api/paddle/nn/functional/space_to_depth_cn.rst
--- a/doc/paddle/api/paddle/static/nn/spectral_norm_cn.rst
+++ b/doc/paddle/api/paddle/static/nn/spectral_norm_cn.rst
--- a/doc/paddle/api/paddle/fluid/layers/split_cn.rst
+++ b/doc/paddle/api/paddle/fluid/layers/split_cn.rst
+.. _cn_api_fluid_layers_split:
+split
+-------------------------------
+.. py:function:: paddle.fluid.layers.split(input, num_or_sections, dim=-1, name=None)
+该OP将输入Tensor分割成多个子Tensor。
+参数：
+    - **input** (Tensor) - 输入变量，数据类型为bool， float16，float32，float64，int32，int64的多维Tensor。
+    - **num_or_sections** (int|list|tuple) - 如果 ``num_or_sections`` 是一个整数，则表示Tensor平均划分为相同大小子Tensor的数量。如果 ``num_or_sections`` 是一个list或tuple，那么它的长度代表子Tensor的数量，它的元素可以是整数或者形状为[1]的Tensor，依次代表子Tensor需要分割成的维度的大小。list或tuple的长度不能超过输入Tensor待分割的维度的大小。至多有一个元素值为-1，-1表示该值是由 ``input`` 待分割的维度值和 ``num_or_sections`` 的剩余元素推断出来的。
+    - **dim** (int|Tenspr，可选) - 整数或者形状为[1]的Tensor，数据类型为int32或int64。表示需要分割的维度。如果 ``dim < 0`` ，则划分的维度为 ``rank(input) + dim`` 。默认值为-1。
+    - **name** (str，可选) - 具体用法请参见 :ref:`api_guide_Name` ，一般无需设置，默认值为None。
+返回：分割后的Tensor列表。
+抛出异常：
+    - :code:`TypeError`：``input`` 的数据类型不是bool、float16、float32、float64、int32或int64时 。
+    - :code:`TypeError`：``num_or_sections`` 不是int、list 或 tuple时。
+    - :code:`TypeError`：``dim`` 不是 int 或 Tensor时。当 ``dim`` 为Tensor，其数据类型不是int32或int64时。
+**代码示例**：
+.. code-block:: python
+    import paddle.fluid as fluid
+    # input is a Tensor which shape is [3, 9, 5]
+    input = fluid.data(
+         name="input", shape=[3, 9, 5], dtype="float32")
+    out0, out1, out2 = fluid.layers.split(input, num_or_sections=3, dim=1)
+    # out0.shape [3, 3, 5]
+    # out1.shape [3, 3, 5]
+    # out2.shape [3, 3, 5]
+    out0, out1, out2 = fluid.layers.split(input, num_or_sections=[2, 3, 4], dim=1)
+    # out0.shape [3, 2, 5]
+    # out1.shape [3, 3, 5]
+    # out2.shape [3, 4, 5]
+    out0, out1, out2 = fluid.layers.split(input, num_or_sections=[2, 3, -1], dim=1)
+    # out0.shape [3, 2, 5]
+    # out1.shape [3, 3, 5]
+    # out2.shape [3, 4, 5]
+    # dim is negative, the real dim is (rank(input) + axis) which real
+    # value is 1.
+    out0, out1, out2 = fluid.layers.split(input, num_or_sections=3, dim=-2)
+    # out0.shape [3, 3, 5]
+    # out1.shape [3, 3, 5]
+    # out2.shape [3, 3, 5]
--- a/doc/paddle/api/paddle/sqrt_cn.rst
+++ b/doc/paddle/api/paddle/sqrt_cn.rst
--- a/doc/paddle/api/paddle/square_cn.rst
+++ b/doc/paddle/api/paddle/square_cn.rst
--- a/doc/paddle/api/paddle/nn/functional/square_error_cost_cn.rst
+++ b/doc/paddle/api/paddle/nn/functional/square_error_cost_cn.rst
--- a/doc/paddle/api/paddle/fluid/layers/squeeze_cn.rst
+++ b/doc/paddle/api/paddle/fluid/layers/squeeze_cn.rst
+.. _cn_api_fluid_layers_squeeze:
+squeeze
+-------------------------------
+.. py:function:: paddle.fluid.layers.squeeze(input, axes, name=None)
+该OP会根据axes压缩输入Tensor的维度。如果指定了axes，则会删除axes中指定的维度，axes指定的维度要等于1。如果没有指定axes，那么所有等于1的维度都会被删除。
+- 例1：
+.. code-block:: python
+        输入：
+            X.shape = [1,3,1,5]
+            axes = [0]
+        输出;
+            Out.shape = [3,1,5]
+- 例2：
+.. code-block:: python
+        输入：
+            X.shape = [1,3,1,5]
+            axes = []
+        输出:
+            Out.shape = [3,5]
+- 例3：
+.. code-block:: python
+        输入:
+            X.shape = [1,3,1,5]
+            axes = [-2]
+        输出：
+            Out.shape = [1,3,5]
+参数：
+        - **input** (Variable) - 输入任意维度的Tensor。 支持的数据类型：float32，float64，int8，int32，int64。
+        - **axes** (list) - 输入一个或一列整数，代表要压缩的轴。axes的范围： :math:`[-rank(input), rank(input))` 。 axes为负数时， :math:`axes=axes+rank(input)` 。
+        - **name** (str，可选) - 具体用法请参见 :ref:`api_guide_Name` ，一般无需设置，默认值为None。
+返回: 返回对维度进行压缩后的Tensor。数据类型与输入Tensor一致。
+返回类型：Variable
+**代码示例**：
+.. code-block:: python
+    import paddle.fluid as fluid
+    import paddle.fluid.layers as layers
+    x = layers.data(name='x', shape=[5, 1, 10])
+    y = layers.squeeze(input=x, axes=[1]) #y.shape=[5, 10]
--- a/doc/paddle/api/paddle/nn/functional/ssd_loss_cn.rst
+++ b/doc/paddle/api/paddle/nn/functional/ssd_loss_cn.rst
--- a/doc/paddle/api/paddle/fluid/layers/stack_cn.rst
+++ b/doc/paddle/api/paddle/fluid/layers/stack_cn.rst
+.. _cn_api_fluid_layers_stack:
+stack
+-------------------------------
+.. py:function:: paddle.fluid.layers.stack(x, axis=0)
+该OP沿 ``axis`` 轴对输入 ``x`` 进行堆叠操作。
+- 例1:
+.. code-block:: python
+    输入:
+        x[0].shape = [1, 2]
+        x[0].data = [ [1.0 , 2.0 ] ]
+        x[1].shape = [1, 2]
+        x[1].data = [ [3.0 , 4.0 ] ]
+        x[2].shape = [1, 2]
+        x[2].data = [ [5.0 , 6.0 ] ]
+    参数:
+        axis = 0 #沿着第0维对输入x进行堆叠操作。
+    输出:
+        Out.shape = [3, 1, 2]
+        Out.data = [ [ [1.0, 2.0] ],
+                    [ [3.0, 4.0] ],
+                    [ [5.0, 6.0] ] ]
+- 例2:
+.. code-block:: python
+    输入:
+        x[0].shape = [1, 2]
+        x[0].data = [ [1.0 , 2.0 ] ]
+        x[1].shape = [1, 2]
+        x[1].data = [ [3.0 , 4.0 ] ]
+        x[2].shape = [1, 2]
+        x[2].data = [ [5.0 , 6.0 ] ]
+    参数:
+        axis = 1 or axis = -2 #沿着第1维对输入进行堆叠操作。
+    输出:
+        Out.shape = [1, 3, 2]
+        Out.data = [ [ [1.0, 2.0]
+                      [3.0, 4.0]
+                      [5.0, 6.0] ] ]
+参数:
+      - **x** (Variable|list(Variable)) – 输入 x 可以是单个Tensor，或是多个Tensor组成的列表。如果 x 是一个列表，那么这些Tensor的维度必须相同。 假设输入是N维Tensor :math:`[d_0,d_1,...,d_{n−1}]`，则输出变量的维度为N+1维 :math:`[d_0,d_1,...d_{axis-1},len(x),d_{axis}...,d_{n−1}]` 。支持的数据类型: float32，float64，int32，int64。
+      - **axis** (int, 可选) – 指定对输入Tensor进行堆叠运算的轴，有效 ``axis`` 的范围是: :math:`[-(R+1), R+1)`，R是输入中第一个Tensor的rank。如果 ``axis`` < 0，则 :math:`axis=axis+rank(x[0])+1` 。axis默认值为0。
+返回: 堆叠运算后的Tensor，数据类型与输入Tensor相同。输出维度等于 :math:`rank(x[0])+1` 维。
+返回类型: Variable
+**代码示例**：
+.. code-block:: python
+    import paddle.fluid as fluid
+    import paddle.fluid.layers as layers
+    x1 = layers.data(name='x1', shape=[1, 2], dtype='int32')
+    x2 = layers.data(name='x2', shape=[1, 2], dtype='int32')
+    #对Tensor List进行堆叠
+    data = layers.stack([x1,x2])  # 沿着第0轴进行堆叠，data.shape=[2, 1, 2]
+    data = layers.stack([x1,x2], axis=1)  # 沿着第1轴进行堆叠，data.shape=[1, 2, 2]
+    #单个Tensor的堆叠
+    data = layers.stack(x1)  # 沿着第0轴进行堆叠，data.shape=[1, 1, 2]
--- a/doc/paddle/api/paddle/stanh_cn.rst
+++ b/doc/paddle/api/paddle/stanh_cn.rst
--- a/doc/paddle/api/paddle/strided_slice_cn.rst
+++ b/doc/paddle/api/paddle/strided_slice_cn.rst
--- a/doc/paddle/api/paddle/fluid/layers/sum_cn.rst
+++ b/doc/paddle/api/paddle/fluid/layers/sum_cn.rst
+.. _cn_api_fluid_layers_sum:
+sum
+-------------------------------
+.. py:function:: paddle.fluid.layers.sum(x)
+该OP用于对输入的一至多个Tensor或LoDTensor求和。如果输入的是LoDTensor，输出仅与第一个输入共享LoD信息（序列信息）。
+例1：
+::
+    输入：
+    	input.shape = [2, 3]
+    	input = [[1, 2, 3],
+	      	  [4, 5, 6]]
+    输出：
+    	output.shape = [2, 3]
+    	output = [[1, 2, 3],
+	          [4, 5, 6]]
+例2：
+::
+    输入：
+	第一个输入：
+    	    input1.shape = [2, 3]
+    	    input1 = [[1, 2, 3],
+	      	      [4, 5, 6]]
+	第二个输入：
+    	    input2.shape = [2, 3]
+    	    input2 = [[7, 8, 9],
+	              [10, 11, 12]]
+    输出：
+    	output.shape = [2, 3]
+    	output = [[8, 10, 12],
+	          [14, 16, 18]]
+参数：
+    **x** (Variable|list(Variable)) - 输入的一至多个Variable。如果输入了多个Variable，则不同Variable间的shape和数据类型应保持一致。Variable为多维Tensor或LoDTensor，数据类型支持：float32，float64，int32，int64
+返回：对输入 ``x`` 中的Variable求和后的结果，shape和数据类型与 ``x`` 一致
+返回类型：Variable
+**代码示例：**
+.. code-block:: python
+	import paddle.fluid as fluid
+	input0 = fluid.layers.fill_constant(shape=[2, 3], dtype='int64', value=5)
+	input1 = fluid.layers.fill_constant(shape=[2, 3], dtype='int64', value=3)
+	sum = fluid.layers.sum([input0, input1])
+	#用户可以通过executor打印出求和的结果
+	out = fluid.layers.Print(sum, message="the sum of input0 and input1: ")
+	exe = fluid.Executor(fluid.CPUPlace())
+	exe.run(fluid.default_main_program())
+	#打印出的数据为：
+	1570701754	the sum of input0 and input1: 	The place is:CPUPlace
+	Tensor[sum_0.tmp_0]
+		shape: [2,3,]
+		dtype: l
+		data: 8,8,8,8,8,8,
+	#输出了shape为[2,3]的Tensor，与输入的shape一致
+	#dtype为对应C++数据类型，在不同环境下可能显示值不同，但本质相同
+	#例如：如果Tensor中数据类型是int64，则对应的C++数据类型为int64_t，所以dtype值为typeid(int64_t).name()，
+	#      其在MacOS下为'x'，linux下为'l'，Windows下为'__int64'，都表示64位整型变量
--- a/doc/paddle/api/paddle/sums_cn.rst
+++ b/doc/paddle/api/paddle/sums_cn.rst
--- a/doc/paddle/api/paddle/nn/functional/swish_cn.rst
+++ b/doc/paddle/api/paddle/nn/functional/swish_cn.rst
--- a/doc/paddle/api/paddle/nn/switch_case_cn.rst
+++ b/doc/paddle/api/paddle/nn/switch_case_cn.rst
--- a/doc/paddle/api/paddle/tanh_cn.rst
+++ b/doc/paddle/api/paddle/tanh_cn.rst
--- a/doc/paddle/api/paddle/nn/functional/tanh_shrink_cn.rst
+++ b/doc/paddle/api/paddle/nn/functional/tanh_shrink_cn.rst
--- a/doc/paddle/api/paddle/nn/functional/target_assign_cn.rst
+++ b/doc/paddle/api/paddle/nn/functional/target_assign_cn.rst
--- a/doc/paddle/api/paddle/nn/functional/teacher_student_sigmoid_loss_cn.rst
+++ b/doc/paddle/api/paddle/nn/functional/teacher_student_sigmoid_loss_cn.rst
--- a/doc/paddle/api/paddle/nn/functional/temporal_shift_cn.rst
+++ b/doc/paddle/api/paddle/nn/functional/temporal_shift_cn.rst
--- a/doc/paddle/api/paddle/create_global_var_cn.rst
+++ b/doc/paddle/api/paddle/create_global_var_cn.rst
--- a/doc/paddle/api/paddle/static/nn/create_parameter_cn.rst
+++ b/doc/paddle/api/paddle/static/nn/create_parameter_cn.rst
--- a/doc/paddle/api/paddle/fluid/layers/tensor_array_to_tensor_cn.rst
+++ b/doc/paddle/api/paddle/fluid/layers/tensor_array_to_tensor_cn.rst
+.. _cn_api_fluid_layers_tensor_array_to_tensor:
+tensor_array_to_tensor
+-------------------------------
+.. py:function:: paddle.fluid.layers.tensor_array_to_tensor(input, axis=1, name=None, use_stack=False)
+该OP将 ``input`` 这个LoDTensorArray中的所有Tensor沿 ``axis`` 指定的轴进行拼接（concat）或堆叠（stack）。
+示例：
+::
+    - 案例 1：
+        给定:
+            input.data = {[[0.6, 0.1, 0.3],
+                           [0.5, 0.3, 0.2]],
+                          [[1.3],
+                           [1.8]],
+                          [[2.3, 2.1],
+                           [2.5, 2.4]]}
+            axis = 1, use_stack = False
+        结果:                
+            output.data = [[0.6, 0.1, 0.3, 1.3, 2.3, 2.1],
+                           [0.5, 0.3, 0.2, 1.8, 2.5, 2.4]]
+            output_index.data = [3, 1, 2]
+    - 案例 2：
+        给定:
+            input.data = {[[0.6, 0.1],
+                           [0.5, 0.3]],
+                          [[0.3, 1.3],
+                           [0.2, 1.8]],
+                          [[2.3, 2.1],
+                           [2.5, 2.4]]}
+            axis = 1, use_stack = False
+        结果:                
+            output.data = [[[0.6, 0.1]
+                            [0.3, 1.3]
+                            [2.3, 2.1],
+                           [[0.5, 0.3]
+                            [0.2, 1.8]
+                            [2.5, 2.4]]]
+            output_index.data = [2, 2, 2]
+参数：
+  - **input** (Variable) - 输入的LoDTensorArray。支持的数据类型为：float32、float64、int32、int64。
+  - **axis** (int，可选) - 指定对输入Tensor进行运算的轴， ``axis`` 的有效范围是[-R, R)，R是输入 ``input`` 中Tensor的Rank，``axis`` 为负时与 ``axis`` +R 等价。默认值为1。
+  - **name** (str，可选) – 具体用法请参见 :ref:`api_guide_Name` ，一般无需设置，默认值为None。
+  - **use_stack** (bool，可选) – 指明使用stack或concat进行运算，若为stack模式，要求LoDTensorArray中的所有Tensor具有相同的形状。默认值为False。
+返回：Variable的二元组， 包含了两个Tensor。第一个Tensor表示对数组内的元素进行stack或concat的输出结果，数据类型与数组中的Tensor相同；第二个Tensor包含了数组中各Tensor在 `axis` 维度的大小，数据类型为int32。
+返回类型： tuple
+**代码示例：**
+.. code-block:: python
+    import paddle.fluid as fluid
+    import numpy as np
+    x0 = fluid.layers.assign(np.random.rand(2, 2).astype("float32"))
+    x1 = fluid.layers.assign(np.random.rand(2, 2).astype("float32"))
+    i = fluid.layers.fill_constant(shape=[1], dtype="int64", value=0)
+    array = fluid.layers.create_array(dtype='float32')
+    fluid.layers.array_write(x0, i, array)
+    fluid.layers.array_write(x1, i + 1, array)
+    output, output_index = fluid.layers.tensor_array_to_tensor(input=array)
--- a/doc/paddle/api/paddle/nn/functional/thresholded_relu_cn.rst
+++ b/doc/paddle/api/paddle/nn/functional/thresholded_relu_cn.rst
--- a/doc/paddle/api/paddle/topk_cn.rst
+++ b/doc/paddle/api/paddle/topk_cn.rst
--- a/doc/paddle/api/paddle/transpose_cn.rst
+++ b/doc/paddle/api/paddle/transpose_cn.rst
--- a/doc/paddle/api/paddle/fluid/layers/unbind_cn.rst
+++ b/doc/paddle/api/paddle/fluid/layers/unbind_cn.rst
+.. _cn_api_paddle_tensor_unbind
+unbind
+-------------------------------
+.. py:function:: paddle.tensor.unbind(input, axis=0)
+:alias_main: paddle.unbind
+:alias: paddle.unbind,paddle.tensor.unbind,paddle.tensor.manipulation.unbind
+该OP将输入Tensor按照指定的维度分割成多个子Tensor。
+**参数**：
+       - **input** (Variable) - 输入变量，数据类型为float32，float64，int32，int64的多维Tensor。
+       - **axis** (int32|int64，可选) - 数据类型为int32或int64,表示需要分割的维度。如果axis < 0，则划分的维度为rank(input) + axis。默认值为0。
+**返回**：分割后的Tensor列表。
+**返回类型**：列表(Variable)，数据类型为int32，int64，float32，float64。
+**代码示例**：
+.. code-block:: python
+    import paddle
+    # input is a variable which shape is [3, 4, 5]
+    input = paddle.fluid.data(
+        name="input", shape=[3, 4, 5], dtype="float32")
+    [x0, x1, x2] = paddle.tensor.unbind(input, axis=0)
+    # x0.shape [4, 5]
+    # x1.shape [4, 5]
+    # x2.shape [4, 5]
+    [x0, x1, x2, x3] = paddle.tensor.unbind(input, axis=1)
+    # x0.shape [3, 5]
+    # x1.shape [3, 5]
+    # x2.shape [3, 5]
+    # x3.shape [3, 5]
--- a/doc/paddle/api/paddle/nn/functional/unfold_cn.rst
+++ b/doc/paddle/api/paddle/nn/functional/unfold_cn.rst
--- a/doc/paddle/api/paddle/fluid/layers/uniform_random_cn.rst
+++ b/doc/paddle/api/paddle/fluid/layers/uniform_random_cn.rst
+.. _cn_api_fluid_layers_uniform_random:
+uniform_random
+-------------------------------
+.. py:function:: paddle.fluid.layers.uniform_random(shape, dtype='float32', min=-1.0, max=1.0, seed=0, name=None)
+该OP返回数值服从范围[``min``, ``max``)内均匀分布的随机Tensor，形状为 ``shape``，数据类型为 ``dtype``。
+::
+    示例1:
+             给定：
+                 shape=[1,2]
+             则输出为：
+                 result=[[0.8505902, 0.8397286]]
+参数：
+    - **shape** (list|tuple|Tensor) - 生成的随机Tensor的形状。如果 ``shape`` 是list、tuple，则其中的元素可以是int，或者是形状为[1]且数据类型为int32、int64的Tensor。如果 ``shape`` 是Tensor，则是数据类型为int32、int64的1-D Tensor。
+    - **dtype** (str|np.dtype|core.VarDesc.VarType, 可选) - 输出Tensor的数据类型，支持float32、float64。默认值为float32。
+    - **min** (float|int，可选) - 要生成的随机值范围的下限，min包含在范围中。支持的数据类型：float、int。默认值为-1.0。
+    - **max** (float|int，可选) - 要生成的随机值范围的上限，max不包含在范围中。支持的数据类型：float、int。默认值为1.0。
+    - **seed** (int，可选) - 随机种子，用于生成样本。0表示使用系统生成的种子。注意如果种子不为0，该操作符每次都生成同样的随机数。支持的数据类型：int。默认为 0。
+    - **name** (str, 可选) - 输出的名字。一般无需设置，默认值为None。该参数供开发人员打印调试信息时使用，具体用法请参见 :ref:`api_guide_Name` 。
+返回：
+    Tensor：数值服从范围[``min``, ``max``)内均匀分布的随机Tensor，形状为 ``shape``，数据类型为 ``dtype``。
+抛出异常：
+    - ``TypeError`` - 如果 ``shape`` 的类型不是list、tuple、Tensor。
+    - ``TypeError`` - 如果 ``dtype`` 不是float32、float64。
+**代码示例**：
+.. code-block:: python
+    import paddle.fluid as fluid
+    import numpy as np
+    startup_program = fluid.Program()
+    train_program = fluid.Program()
+    with fluid.program_guard(train_program, startup_program):
+        # example 1:
+        # attr shape is a list which doesn't contain Tensor.
+        result_1 = fluid.layers.uniform_random(shape=[3, 4])
+        # example 2:
+        # attr shape is a list which contains Tensor.
+        dim_1 = fluid.layers.fill_constant([1],"int64",3)
+        dim_2 = fluid.layers.fill_constant([1],"int32",5)
+        result_2 = fluid.layers.uniform_random(shape=[dim_1, dim_2])
+        # example 3:
+        # attr shape is a Tensor, the data type must be int32 or int64
+        var_shape = fluid.data(name='var_shape', shape=[2], dtype="int64")
+        result_3 = fluid.layers.uniform_random(var_shape)
+        var_shape_int32 = fluid.data(name='var_shape_int32', shape=[2], dtype="int32")
+        result_4 = fluid.layers.uniform_random(var_shape_int32)
+        shape_1 = np.array([3,4]).astype("int64")
+        shape_2 = np.array([3,4]).astype("int32")
+        exe = fluid.Executor(fluid.CPUPlace())
+        exe.run(startup_program)
+        outs = exe.run(train_program, feed = {'var_shape':shape_1, 'var_shape_int32':shape_2}, 
+                       fetch_list=[result_1, result_2, result_3, result_4])
--- a/doc/paddle/api/paddle/unique_cn.rst
+++ b/doc/paddle/api/paddle/unique_cn.rst
--- a/doc/paddle/api/paddle/unique_with_counts_cn.rst
+++ b/doc/paddle/api/paddle/unique_with_counts_cn.rst
--- a/doc/paddle/api/paddle/fluid/layers/unsqueeze_cn.rst
+++ b/doc/paddle/api/paddle/fluid/layers/unsqueeze_cn.rst
+.. _cn_api_fluid_layers_unsqueeze:
+unsqueeze
+-------------------------------
+.. py:function:: paddle.fluid.layers.unsqueeze(input, axes, name=None)
+该OP向输入（input）的shape中一个或多个位置（axes）插入维度。
+- 示例：
+.. code-block:: python
+    输入：
+      X.shape = [2, 3]
+      X.data = [[1, 2, 3], 
+                [4，5，6]]
+      axes = [0, 2]
+    输出（在X的第0维和第2维插入新维度）：
+      Out.shape = [1, 2, 1, 3]
+      Out.data = [[[[1, 2, 3]],
+                    [[4, 5, 6]]]]
+参数：
+    - **input** (Variable)- 多维 ``Tensor``，数据类型为 ``float32``， ``float64``， ``int8``， ``int32``，或 ``int64``。
+    - **axes** (int|list|tuple|Variable) - 表示要插入维度的位置。数据类型是 ``int32`` 。如果 ``axes`` 的类型是 list 或 tuple，它的元素可以是整数或者形状为[1]的 ``Tensor`` 。如果 ``axes`` 的类型是 ``Variable``，则是1-D ``Tensor``。
+    - **name** （str，可选）- 具体用法请参见 :ref:`api_guide_Name` ，一般无需设置。默认值： ``None``。
+返回：扩展维度后的多维Tensor
+返回类型：Variable
+**代码示例**：
+.. code-block:: python
+    import paddle.fluid as fluid
+    x = fluid.data(name='x', shape=[5, 10])
+    y = fluid.layers.unsqueeze(input=x, axes=[1])
+    # y.shape is [5, 1, 10]
--- a/doc/paddle/api/paddle/unstack_cn.rst
+++ b/doc/paddle/api/paddle/unstack_cn.rst
--- a/doc/paddle/api/paddle/nn/functional/warpctc_cn.rst
+++ b/doc/paddle/api/paddle/nn/functional/warpctc_cn.rst
--- a/doc/paddle/api/paddle/fluid/layers/where_cn.rst
+++ b/doc/paddle/api/paddle/fluid/layers/where_cn.rst
+.. _cn_api_fluid_layers_where:
+where
+-------------------------------
+.. py:function:: paddle.fluid.layers.where(condition)
+该OP计算输入元素中为True的元素在输入中的坐标（index）。
+参数：
+    - **condition** （Variable）– 输入秩至少为1的多维Tensor，数据类型是bool类型。
+返回：输出condition元素为True的坐标（index），将所有的坐标（index）组成一个2-D的Tensor。
+返回类型：Variable，数据类型是int64。
+**代码示例**：
+.. code-block:: python
+        import paddle.fluid as fluid
+        import paddle.fluid.layers as layers
+        import numpy as np
+        # tensor 为 [True, False, True]
+        condition = layers.assign(np.array([1, 0, 1], dtype='int32'))
+        condition = layers.cast(condition, 'bool')
+        out = layers.where(condition) # [[0], [2]]
+        # tensor 为 [[True, False], [False, True]]
+        condition = layers.assign(np.array([[1, 0], [0, 1]], dtype='int32'))
+        condition = layers.cast(condition, 'bool')
+        out = layers.where(condition) # [[0, 0], [1, 1]]
+        # tensor 为 [False, False, False]
+        condition = layers.assign(np.array([0, 0, 0], dtype='int32'))
+        condition = layers.cast(condition, 'bool')
+        out = layers.where(condition) # [[]]
--- a/doc/paddle/api/paddle/nn/while_loop_cn.rst
+++ b/doc/paddle/api/paddle/nn/while_loop_cn.rst
--- a/doc/paddle/api/paddle/nn/functional/yolo_box_cn.rst
+++ b/doc/paddle/api/paddle/nn/functional/yolo_box_cn.rst
--- a/doc/paddle/api/paddle/nn/functional/yolov3_loss_cn.rst
+++ b/doc/paddle/api/paddle/nn/functional/yolov3_loss_cn.rst
--- a/doc/paddle/api/paddle/fluid/layers/zeros_cn.rst
+++ b/doc/paddle/api/paddle/fluid/layers/zeros_cn.rst
+.. _cn_api_fluid_layers_zeros:
+zeros
+-------------------------------
+.. py:function:: paddle.fluid.layers.zeros(shape,dtype,force_cpu=False)
+该OP创建形状为 ``shape`` 、数据类型为 ``dtype`` 且值全为0的Tensor。
+参数：
+    - **shape** (tuple|list|Tensor) - 输出Tensor的形状， ``shape`` 的数据类型为int32或者int64。
+    - **dtype** (np.dtype|core.VarDesc.VarType|str) - 输出Tensor的数据类型，数据类型必须为float16、float32、float64、int32或int64。
+    - **force_cpu** (bool， 可选) - 是否强制将输出Tensor写入CPU内存。如果 ``force_cpu`` 为False，则将输出Tensor写入当前所在运算设备的内存，默认为False。
+返回：值全为0的Tensor，数据类型和 ``dtype`` 定义的类型一致。
+抛出异常：
+    - ``TypeError`` - 当 ``dtype`` 不是bool、 float16、float32、float64、int32、int64和None时。
+    - ``TypeError`` - 当 ``shape`` 不是tuple、list、或者Tensor时。 当 ``shape`` 为Tensor，其数据类型不是int32或者int64时。
+**代码示例**：
+.. code-block:: python
+    import paddle.fluid as fluid
+    data = fluid.layers.zeros(shape=[3, 2], dtype='float32') # [[0., 0.], [0., 0.], [0., 0.]]
--- a/doc/paddle/api/paddle/fluid/layers/zeros_like_cn.rst
+++ b/doc/paddle/api/paddle/fluid/layers/zeros_like_cn.rst
+.. _cn_api_fluid_layers_zeros_like:
+zeros_like
+-------------------------------
+.. py:function:: paddle.fluid.layers.zeros_like(x, out=None)
+该OP创建一个和x具有相同的形状和数据类型的全零Tensor。
+参数：
+    - **x** (Variable) – 指定输入为一个多维的Tensor，数据类型可以是bool，float32，float64，int32，int64。
+    - **out** (Variable|可选) – 如果为None，则创建一个Variable作为输出，创建后的Variable的数据类型，shape大小和输入变量x一致。如果是输入的一个Tensor，数据类型和数据shape大小需要和输入变量x一致。默认值为None。
+返回：返回一个多维的Tensor，具体的元素值和输入的数据类型相关，如果是bool类型的，则全False，其它均为0。数据shape大小和输入x一致。
+返回类型：Variable
+**代码示例**：
+.. code-block:: python
+    import paddle.fluid as fluid
+    x = fluid.data(name='x', dtype='float32', shape=[3])
+    data = fluid.layers.zeros_like(x) # [0.0, 0.0, 0.0]
--- a/doc/paddle/api/paddle/fluid/load_op_library_cn.rst
+++ b/doc/paddle/api/paddle/fluid/load_op_library_cn.rst
+.. _cn_api_fluid_load_op_library:
+load_op_library
+-------------------------------
+.. py:class:: paddle.fluid.load_op_library
+:api_attr: 声明式编程模式（静态图)
+``load_op_library`` 用于自定义C++算子中，用来加载算子动态共享库。加载库后，注册好的算子及其Kernel实现将在PaddlePaddle主进程中可以被调用。 请注意，自定义算子的类型不能与框架中的现有算子类型相同。
+参数：
+    - **lib_filename** (str) – 动态共享库的名字。
+**代码示例**
+.. code-block:: python
+       import paddle.fluid as fluid
+       #fluid.load_op_library('custom_op.so')
--- a/doc/paddle/api/paddle/fluid/memory_optimize_cn.rst
+++ b/doc/paddle/api/paddle/fluid/memory_optimize_cn.rst
+.. _cn_api_fluid_memory_optimize:
+memory_optimize
+-------------------------------
+.. py:function:: paddle.fluid.memory_optimize(input_program, skip_opt_set=None, print_log=False, level=0, skip_grads=True)
+:api_attr: 声明式编程模式（静态图)
+**从1.6版本开始此接口不再推荐使用，请不要在新写的代码中使用它，1.6+版本已默认开启更优的存储优化策略**
--- a/doc/paddle/api/paddle/fluid/metrics/MetricBase_cn.rst
+++ b/doc/paddle/api/paddle/fluid/metrics/MetricBase_cn.rst
+.. _cn_api_fluid_metrics_MetricBase:
+MetricBase
+-------------------------------
+.. py:class:: paddle.fluid.metrics.MetricBase(name)
+在评估神经网络效果的时候，由于我们常常需要把测试数据切分成mini-batch，并逐次将每个mini-batch送入神经网络进行预测和评估，因此我们每次只能获得当前batch下的评估结果，而并不能一次性获得整个测试集的评估结果。paddle.fluid.metrics正是为了解决这些问题而设计的，大部分paddle.fluid.metrics下的类都具有如下功能：
+1. 接受模型对一个batch的预测结果（numpy.array）和这个batch的原始标签（numpy.array）作为输入，并进行特定的计算（如计算准确率，召回率等）。
+2. 将当前batch评估结果和历史评估结果累计起来，以获取目前处理过的所有batch的整体评估结果。
+MetricBase是所有paddle.fluid.metrics下定义的所有python类的基类，它定义了一组接口，并需要所有继承他的类实现具体的计算逻辑，包括：
+1. update(preds, labels)：给定当前计算当前batch的预测结果（preds）和标签（labels），计算这个batch的评估结果。 
+2. eval()：合并当前累积的每个batch的评估结果，并返回整体评估结果。
+3. reset()：清空累积的每个batch的评估结果。
+.. py:method:: __init__(name)
+构造函数，参数name表示当前创建的评估器的名字。
+参数：
+    - **name** (str) - 当前创建的评估器的名字，用于区分不同的评估器，例如准确率（accuracy）或者其他自定义名字（如，my_evaluator）。
+返回：一个python对象，表示一个具体的评估器。
+返回类型：python对象
+.. py:method:: reset()
+空累积的每个batch的评估结果。
+返回：无
+.. py:method:: update(preds,labels)
+给定当前计算当前batch的预测结果（preds）和标签（labels），计算这个batch的评估结果，并将这个评估结果在评估器内部记录下来，注意update函数并不会返回评估结果。
+参数：
+     - **preds** (numpy.array) - 当前minibatch的预测结果。
+     - **labels** (numpy.array) - 当前minibatch的标签。
+返回：无
+.. py:method:: eval()
+合并当前累积的每个batch的评估结果，并返回整体评估结果。
+返回：当前累积batch的整体评估结果。
+返回类型：float|list(float)|numpy.array
+.. py:method:: get_config()
+获取当前评估器的状态，特指评估器内部没有 ``_`` 前缀的所有成员变量。
+返回：一个python字典，包含了当前评估器内部的状态。
+返回类型：python字典（dict）
--- a/doc/paddle/api/paddle/fluid/nets/glu_cn.rst
+++ b/doc/paddle/api/paddle/fluid/nets/glu_cn.rst
+.. _cn_api_fluid_nets_glu:
+glu
+-------------------------------
+.. py:function:: paddle.fluid.nets.glu(input, dim=-1)
+:api_attr: 声明式编程模式（静态图)
+门控线性单元 Gated Linear Units (GLU) 由 :ref:`cn_api_fluid_layers_split` ，:ref:`cn_api_fluid_layers_sigmoid` 和 :ref:`cn_api_fluid_layers_elementwise_mul` 组成。特定的，沿着给定维度将输入拆分成两个大小相同的部分，:math:`a` 和 :math:`b` ，按如下方式计算：
+.. math::
+    GLU(a,b) = a \bigotimes \sigma (b)
+参考论文: `Language Modeling with Gated Convolutional Networks <https://arxiv.org/pdf/1612.08083.pdf>`_
+参数：
+    - **input** (Variable) - 输入变量，多维 Tensor 或 LoDTensor, 支持的数据类型为float32、float64 和 float16（GPU）。
+    - **dim** (int) - 拆分的维度。如果 :math:`dim<0` ，拆分的维为 :math:`rank(input) + dim` 。默认为 -1，即最后一维。
+返回: 计算结果，尺寸为输入大小的一半，数据类型与输入的数据类型相同
+返回类型：变量（Variable）
+**代码示例：**
+.. code-block:: python
+    import paddle.fluid as fluid
+    data = fluid.layers.data(
+        name="words", shape=[-1, 6, 3, 9], dtype="float32")
+    # 输出的形状为[-1, 3, 3, 9]
+    output = fluid.nets.glu(input=data, dim=1)  
--- a/doc/paddle/api/paddle/fluid/nets/img_conv_group_cn.rst
+++ b/doc/paddle/api/paddle/fluid/nets/img_conv_group_cn.rst
+.. _cn_api_fluid_nets_img_conv_group:
+img_conv_group
+-------------------------------
+.. py:function:: paddle.fluid.nets.img_conv_group(input, conv_num_filter, pool_size, conv_padding=1, conv_filter_size=3, conv_act=None, param_attr=None, conv_with_batchnorm=False, conv_batchnorm_drop_rate=0.0, pool_stride=1, pool_type='max', use_cudnn=True)
+:api_attr: 声明式编程模式（静态图)
+Image Convolution Group由Convolution2d，BatchNorm，DropOut和Pool2d组成。根据输入参数，img_conv_group将使用Convolution2d，BatchNorm，DropOut对Input进行连续计算，得到最后结果。
+参数：
+       - **input** （Variable） - 输入，格式为[N，C，H，W]的4-D Tensor。数据类型：float32和float64。
+       - **conv_num_filter** （list | tuple） - 卷积中使用的滤波器数。
+       - **pool_size** （int | list | tuple） - 池化层中池化核的大小。如果pool_size是列表或元组，则它必须包含两个整数（pool_size_height，pool_size_width）。否则，pool_size_height = pool_size_width = pool_size。
+       - **conv_padding** （int | list | tuple） - 卷积层中的填充 ``padding`` 的大小。如果 ``padding`` 是列表或元组，则其长度必须等于 ``conv_num_filter`` 的长度。否则，所有卷积的 ``conv_padding`` 都是相同的。默认：1。
+       - **conv_filter_size** （int | list | tuple） - 卷积层中滤波器大小。如果filter_size是列表或元组，则其长度必须等于 ``conv_num_filter`` 的长度。否则，所有卷积的 ``conv_filter_size`` 都是相同的。默认：3。
+       - **conv_act** （str） -  卷积层之后接的的激活层类型， ``BatchNorm`` 后面没有。默认：None。
+       - **param_attr** (ParamAttr|None) ：指定权重参数属性的对象。默认值为None，表示使用默认的权重参数属性。具体用法请参见 :ref:`cn_api_fluid_ParamAttr` 。conv2d算子默认的权重初始化是Xavier。
+       - **conv_with_batchnorm** （bool | list） - 表示在卷积层之后是否使用 ``BatchNorm`` 。如果 ``conv_with_batchnorm`` 是一个列表，则其长度必须等于 ``conv_num_filter`` 的长度。否则， ``conv_with_batchnorm`` 指示是否所有卷积层后都使用 ``BatchNorm`` 。默认：False。
+       - **conv_batchnorm_drop_rate** （float | list） - 表示 ``BatchNorm`` 之后的 ``Dropout Layer`` 的 ``drop_rate`` 。如果 ``conv_batchnorm_drop_rate`` 是一个列表，则其长度必须等于 ``conv_num_filter`` 的长度。否则，所有 ``Dropout Layers`` 的 ``drop_rate`` 都是   ``conv_batchnorm_drop_rate`` 。默认：0.0。
+       - **pool_stride** （int | list | tuple） -  池化层的池化步长。如果 ``pool_stride`` 是列表或元组，则它必须包含两个整数（pooling_stride_height，pooling_stride_width）。否则，pooling_stride_height = pooling_stride_width = pool_stride。默认：1。
+       - **pool_type** （str） - 池化类型可以是最大池化的 ``max`` 和平均池化的 ``avg`` 。默认：max。
+       - **use_cudnn** （bool） - 是否使用cudnn内核，仅在安装cudnn库时才有效。默认值：True
+返回： Tensor。使用Convolution2d，BatchNorm，DropOut和Pool2d进行串行计算后的最终结果。
+返回类型： Variable
+**代码示例**：
+.. code-block:: python
+    import paddle.fluid as fluid
+    import numpy as np
+    img = fluid.layers.data(name='img', shape=[1, 28, 28], dtype='float32')
+    conv_pool = fluid.nets.img_conv_group(input=img,
+                                          conv_padding=1,
+                                          conv_num_filter=[3, 3],
+                                          conv_filter_size=3,
+                                          conv_act="relu",
+                                          pool_size=2,
+                                          pool_stride=2)
+    place = fluid.CPUPlace()
+    exe = fluid.Executor(place)
+    exe.run(fluid.default_startup_program())
+    np_x = np.random.random(size=(1, 1, 28, 28)).astype('float32')
+    output = exe.run(feed={"img": np_x}, fetch_list = [conv_pool])
+    print(output)
--- a/doc/paddle/api/paddle/fluid/nets/scaled_dot_product_attention_cn.rst
+++ b/doc/paddle/api/paddle/fluid/nets/scaled_dot_product_attention_cn.rst
+.. _cn_api_fluid_nets_scaled_dot_product_attention:
+scaled_dot_product_attention
+-------------------------------
+.. py:function:: paddle.fluid.nets.scaled_dot_product_attention(queries, keys, values, num_heads=1, dropout_rate=0.0)
+:api_attr: 声明式编程模式（静态图)
+该接口实现了的基于点积（并进行了缩放）的多头注意力（Multi-Head Attention）机制。attention可以表述为将一个查询（query）和一组键值对（key-value pair）映射为一个输出；Multi-Head Attention则是使用多路进行attention，而且对attention的输入进行了线性变换。公式如下：
+.. math::
+    MultiHead(Q, K, V ) & = Concat(head_1, ..., head_h)\\
+    where \  head_i & = Attention(QW_i^Q , KW_i^K , VW_i^V )\\
+    Attention(Q, K, V) & = softmax(\frac{QK^\mathrm{T}}{\sqrt{d_k}})V\\
+其中， :math:`Q, K, V` 分别对应 ``queries``、 ``keys`` 和 ``values`` ，详细内容请参阅 `Attention Is All You Need <https://arxiv.org/pdf/1706.03762.pdf>`_ 
+要注意该接口实现支持的是batch形式， :math:`Attention(Q, K, V)` 中使用的矩阵乘是batch形式的矩阵乘法，参考 fluid.layers. :ref:`cn_api_fluid_layers_matmul` 。
+参数：
+    - **queries** （Variable） - 形状为 :math:`[N, L_q, d_k \times h]` 的三维Tensor，其中 :math:`N` 为batch_size， :math:`L_q` 为查询序列长度， :math:`d_k \times h` 为查询的特征维度大小，:math:`h` 为head数。数据类型为float32或float64。
+    - **keys** （Variable） - 形状为 :math:`[N, L_k, d_k \times h]` 的三维Tensor，其中 :math:`N` 为batch_size， :math:`L_k` 为键值序列长度， :math:`d_k \times h` 为键的特征维度大小，:math:`h` 为head数。数据类型与 ``queries`` 相同。
+    - **values** （Variable） - 形状为 :math:`[N, L_k, d_v \times h]` 的三维Tensor，其中 :math:`N` 为batch_size， :math:`L_k` 为键值序列长度， :math:`d_v \times h` 为值的特征维度大小，:math:`h` 为head数。数据类型与 ``queries`` 相同。
+    - **num_heads** （int） - 指明所使用的head数。head数为1时不对输入进行线性变换。默认值为1。
+    - **dropout_rate** （float） - 以指定的概率对要attention到的内容进行dropout。默认值为0，即不使用dropout。
+返回： 形状为 :math:`[N, L_q, d_v * h]` 的三维Tensor，其中 :math:`N` 为batch_size， :math:`L_q` 为查询序列长度， :math:`d_v * h` 为值的特征维度大小。与输入具有相同的数据类型。表示Multi-Head Attention的输出。
+返回类型： Variable
+抛出异常:    
+    - :code:`ValueError`： ``queries`` 、 ``keys`` 和 ``values`` 必须都是三维。
+    - :code:`ValueError`： ``queries`` 和 ``keys`` 的最后一维（特征维度）大小必须相同。
+    - :code:`ValueError`： ``keys`` 和 ``values`` 的第二维（长度维度）大小必须相同。
+    - :code:`ValueError`： ``keys`` 的最后一维（特征维度）大小必须是 ``num_heads`` 的整数倍。
+    - :code:`ValueError`： ``values`` 的最后一维（特征维度）大小必须是 ``num_heads`` 的整数倍。
+**代码示例**
+.. code-block:: python
+    import paddle.fluid as fluid
+    queries = fluid.data(name="queries", shape=[3, 5, 9], dtype="float32")
+    keys = fluid.data(name="keys", shape=[3, 6, 9], dtype="float32")
+    values = fluid.data(name="values", shape=[3, 6, 10], dtype="float32")
+    contexts = fluid.nets.scaled_dot_product_attention(queries, keys, values)
+    contexts.shape  # [3, 5, 10]
--- a/doc/paddle/api/paddle/fluid/nets/sequence_conv_pool_cn.rst
+++ b/doc/paddle/api/paddle/fluid/nets/sequence_conv_pool_cn.rst
+.. _cn_api_fluid_nets_sequence_conv_pool:
+sequence_conv_pool
+-------------------------------
+.. py:function:: paddle.fluid.nets.sequence_conv_pool(input, num_filters, filter_size, param_attr=None, act='sigmoid', pool_type='max', bias_attr=None)
+:api_attr: 声明式编程模式（静态图)
+**注意：该OP的输入** ``input`` **必须是2维LoDTensor, lod_level为1，如果输入是Tensor，建议使用** :ref:`cn_api_fluid_nets_simple_img_conv_pool` **代替**
+该接口由序列卷积( :ref:`cn_api_fluid_layers_sequence_conv` )和池化( :ref:`cn_api_fluid_layers_sequence_pool` )组成
+参数：
+    - **input** (Variable) - sequence_conv的输入，LoDTensor, lod_level为1，支持时间长度可变的输入序列。当前输入为shape为（T，N）的矩阵，T是mini-batch中的总时间步数，N是input_hidden_size。数据类型为float32或者float64
+    - **num_filters** (int)- 卷积核的数目，整数
+    - **filter_size** (int)- 卷积核的大小，整数
+    - **param_attr** (ParamAttr，可选) - sequence_conv层的参数属性，类型是ParamAttr或者None。默认值为None
+    - **act** (str|None，可选) - sequence_conv层的激活函数类型，字符串，可以是'relu', 'softmax', 'sigmoid'等激活函数的类型。如果设置为None，则不使用激活。默认值为'sigmoid'
+    - **pool_type** (str，可选) - 池化类型，字符串。可以是'max', 'average', 'sum'或者'sqrt'。默认值为'max'
+    - **bias_attr** (ParamAttr|bool，可选) – sequence_conv偏置的参数属性，类型可以是bool，ParamAttr或者None。如果设置为False，则不会向输出单元添加偏置。如果将参数设置为ParamAttr的None或one属性，sequence_conv将创建ParamAttr作为bias_attr。如果未设置bias_attr的初始化器，则初始化偏差为零。默认值为None
+    - **name** (str|None，可选) - 该参数供开发人员打印调试信息时使用，具体用法请参见 :ref:`api_guide_Name` ，默认值为None
+返回：经过sequence_conv和sequence_pool两个操作之后的结果所表示的Tensor，数据类型与 ``input`` 相同
+返回类型：Variable
+**代码示例**：
+.. code-block:: python
+    import paddle.fluid as fluid
+    input_dim = 100 #len(word_dict)
+    emb_dim = 128
+    hid_dim = 512
+    data = fluid.layers.data( name="words", shape=[1], dtype="int64", lod_level=1)
+    emb = fluid.layers.embedding(input=data, size=[input_dim, emb_dim], is_sparse=True)
+    seq_conv = fluid.nets.sequence_conv_pool(input=emb,
+                                         num_filters=hid_dim,
+                                         filter_size=3,
+                                         act="tanh",
+                                         pool_type="sqrt")
--- a/doc/paddle/api/paddle/fluid/nets/simple_img_conv_pool_cn.rst
+++ b/doc/paddle/api/paddle/fluid/nets/simple_img_conv_pool_cn.rst
+.. _cn_api_fluid_nets_simple_img_conv_pool:
+simple_img_conv_pool
+-------------------------------
+.. py:function:: paddle.fluid.nets.simple_img_conv_pool(input, num_filters, filter_size, pool_size, pool_stride, pool_padding=0, pool_type='max', global_pooling=False, conv_stride=1, conv_padding=0, conv_dilation=1, conv_groups=1, param_attr=None, bias_attr=None, act=None, use_cudnn=True)
+:api_attr: 声明式编程模式（静态图)
+ ``simple_img_conv_pool`` 由一个conv2d( :ref:`cn_api_fluid_layers_conv2d` )和一个pool2d( :ref:`cn_api_fluid_layers_pool2d` ) OP组成。
+参数：
+    - **input** (Variable) - 输入图像, 4-D Tensor, 格式为[N，C，H，W]。数据类型是float32或者float64
+    - **num_filters** (int) - 卷积核的数目，整数。
+    - **filter_size** (int | list | tuple) - conv2d卷积核大小，整数或者整型列表或者整型元组。如果 ``filter_size`` 是列表或元组，则它必须包含两个整数(filter_size_H，filter_size_W)。如果是整数，则filter_size_H = filter_size_W = filter_size。
+    - **pool_size** (int | list | tuple) - pool2d池化层大小，整数或者整型列表或者整型元组。如果pool_size是列表或元组，则它必须包含两个整数(pool_size_H，pool_size_W)。如果是整数，则pool_size_H = pool_size_W = pool_size。
+    - **pool_stride** (int | list | tuple) - pool2d池化层步长，整数或者整型列表或者整型元组。如果pool_stride是列表或元组，则它必须包含两个整数(pooling_stride_H，pooling_stride_W)。如果是整数，pooling_stride_H = pooling_stride_W = pool_stride。
+    - **pool_padding** (int | list | tuple，可选) - pool2d池化层的padding，整数或者整型列表或者整型元组。如果pool_padding是列表或元组，则它必须包含两个整数(pool_padding_H，pool_padding_W)。如果是整数，pool_padding_H = pool_padding_W = pool_padding。默认值为0。
+    - **pool_type** (str，可选) - 池化类型，字符串，可以是 ``max`` 或者 ``avg`` ，分别对应最大池化和平均池化。默认 ``max`` 。
+    - **global_pooling** (bool，可选)- 是否使用全局池化。如果global_pooling = true，则忽略pool_size和pool_padding。默认为False
+    - **conv_stride** (int | list | tuple，可选) - conv2d Layer的卷积步长，整数或者整型列表或者整型元组。如果conv_stride是列表或元组，则它必须包含两个整数，(conv_stride_H，conv_stride_W)。如果是整数，conv_stride_H = conv_stride_W = conv_stride。默认值：conv_stride = 1。
+    - **conv_padding** (int | list | tuple，可选) - conv2d Layer的padding大小，整数或者整型列表或者整型元组。如果conv_padding是列表或元组，则它必须包含两个整数(conv_padding_H，conv_padding_W)。如果是整数，conv_padding_H = conv_padding_W = conv_padding。默认值：conv_padding = 0。
+    - **conv_dilation** (int | list | tuple，可选) - conv2d Layer的dilation大小，整数或者整型列表或者整型元。如果conv_dilation是列表或元组，则它必须包含两个整数(conv_dilation_H，conv_dilation_W)。如果是整数，conv_dilation_H = conv_dilation_W = conv_dilation。默认值：conv_dilation = 1。
+    - **conv_groups** (int，可选) - conv2d Layer的组数，整数。根据Alex Krizhevsky的Deep CNN论文中的分组卷积：当group = 2时，前半部分滤波器仅连接到输入通道的前半部分，而后半部分滤波器仅连接到后半部分输入通道。默认值：conv_groups = 1。
+    - **param_attr** (ParamAttr，可选) - conv2d的weights参数属性。如果将其设置为None或ParamAttr的一个属性，则conv2d将创建ParamAttr作为param_attr。如果未设置param_attr的初始化，则使用 :math:`Normal（0.0，std）` 初始化参数，并且 ``std`` 为 :math:`(\frac{2.0 }{filter\_elem\_num})^{0.5}` 。默认值:None
+    - **bias_attr** (ParamAttr | bool | None，可选) - conv2d的bias参数属性。如果设置为False，则不会向输出单元添加bias。如果将其设置为None或ParamAttr的一个属性，则conv2d将创建ParamAttr作为bias_attr。如果设置bias_attr为None，则将其初始化为零。默认值：None
+    - **act** (str，可选) - conv2d的激活类型，字符串，可以是'relu', 'softmax', 'sigmoid'等激活函数的类型。如果设置为None，则不附加激活。默认值：None。
+    - **use_cudnn** (bool，可选) - 是否使用cudnn内核，仅在安装cudnn库时才有效。默认值：True。
+    - **name** (str|None，可选) - 该参数供开发人员打印调试信息时使用，具体用法请参见 :ref:`api_guide_Name` ，默认值为None
+返回： 输入input经过conv2d和pool2d之后输入的结果，数据类型与input相同
+返回类型：  Variable
+**示例代码**
+.. code-block:: python
+    import paddle.fluid as fluid
+    # 输入 img.shape = [-1, 1, 28, 28]
+    # 使用该接口带如下参数的操作之后，输出conv_pool.shape = [-1, 20, 12, 12]
+    img = fluid.layers.data(name='img', shape=[1, 28, 28], dtype='float32')
+    conv_pool = fluid.nets.simple_img_conv_pool(input=img,
+                                            filter_size=5,
+                                            num_filters=20,
+                                            pool_size=2,
+                                            pool_stride=2,
+                                            act="relu")
--- a/doc/paddle/api/paddle/fluid/one_hot_cn.rst
+++ b/doc/paddle/api/paddle/fluid/one_hot_cn.rst
+.. _cn_api_fluid_one_hot:
+one_hot
+-------------------------------
+.. py:function:: paddle.fluid.one_hot(input, depth, allow_out_of_range=False)
+:alias_main: paddle.nn.functional.one_hot
+:alias: paddle.nn.functional.one_hot,paddle.nn.functional.common.one_hot
+:old_api: paddle.fluid.one_hot
+该OP将输入（input）中的每个id转换为一个one-hot向量，其长度为 ``depth`` ，该id对应的向量维度上的值为1，其余维度的值为0。
+输出的Tensor（或LoDTensor）的shape是在输入shape的最后一维后面添加了depth的维度。
+- 示例1（allow_out_of_range=False）：
+.. code-block:: python
+  输入：
+    X.shape = [4]
+    X.data = [1, 1, 3, 0]
+    depth = 4
+  输出：
+    Out.shape = [4, 4]
+    Out.data = [[0., 1., 0., 0.],
+                [0., 1., 0., 0.],
+                [0., 0., 0., 1.],
+                [1., 0., 0., 0.]]
+- 示例2 （allow_out_of_range=True）：
+.. code-block:: python
+  输入：
+    X.shape = [4]
+    X.data = [1, 1, 5, 0]
+    depth = 4
+    allow_out_of_range=True
+  输出：
+    Out.shape = [4, 4]
+    Out.data = [[0., 1., 0., 0.],
+                [0., 1., 0., 0.], 
+                [0., 0., 0., 0.], ## 这一维的值是5，超过了depth，因此填成0
+                [1., 0., 0., 0.]]
+- 示例3 （allow_out_of_range=False）：
+.. code-block:: python
+  输入：
+    X.shape = [4]
+    X.data = [1, 1, 5, 0]
+    depth = 4
+    allow_out_of_range=False
+  输出：抛出 Illegal value 的异常
+    X中第2维的值是5，超过了depth，而allow_out_of_range=False表示不允许超过，因此抛异常。  
+参数：
+    - **input** (Variable) - 维度为 :math:`[N_1, ..., N_n]` 的多维Tensor或LoDTensor，维度至少1维。数据类型为int32或int64。
+    - **depth** (int) - 用于定义一个one-hot向量的长度。若输入为词id，则 ``depth`` 通常取值为词典大小。
+    - **allow_out_of_range** (bool) - 指明input中所包含的id值是否可以大于depth值。当超过depth时，如果 `allow_out_of_range` 为False，则会抛出 `Illegal value` 的异常；如果设置为True，该id对应的向量为0向量。默认值为False。
+返回：转换后的one_hot Tensor或LoDTensor，数据类型为float32。
+返回类型：Variable
+**代码示例**：
+.. code-block:: python
+    import paddle.fluid as fluid
+    # 该代码对应上述第一个示例，其中输入label的shape是[4]，输出one_hot_label的shape是[4, 4]
+    label = fluid.layers.data(name="label", shape=[4], append_batch_size=False, dtype="int64")
+    one_hot_label = fluid.one_hot(input=label, depth=4)
--- a/doc/paddle/api/paddle/optimizer/AdamOptimizer_cn.rst
+++ b/doc/paddle/api/paddle/optimizer/AdamOptimizer_cn.rst
--- a/doc/paddle/api/paddle/fluid/optimizer/Adam_cn.rst
+++ b/doc/paddle/api/paddle/fluid/optimizer/Adam_cn.rst
+.. _cn_api_fluid_optimizer_Adam:
+Adam
+-------------------------------
+.. py:attribute::  paddle.fluid.optimizer.Adam
+``AdamOptimizer`` 的别名
--- a/doc/paddle/api/paddle/optimizer/AdamaxOptimizer_cn.rst
+++ b/doc/paddle/api/paddle/optimizer/AdamaxOptimizer_cn.rst
--- a/doc/paddle/api/paddle/fluid/optimizer/Adamax_cn.rst
+++ b/doc/paddle/api/paddle/fluid/optimizer/Adamax_cn.rst
+.. _cn_api_fluid_optimizer_Adamax:
+Adamax
+-------------------------------
+.. py:attribute:: paddle.fluid.optimizer.Adamax
+``AdamaxOptimizer`` 的别名
--- a/doc/paddle/api/paddle/optimizer/RMSPropOptimizer_cn.rst
+++ b/doc/paddle/api/paddle/optimizer/RMSPropOptimizer_cn.rst
--- a/doc/paddle/api/paddle/static/ParallelExecutor_cn.rst
+++ b/doc/paddle/api/paddle/static/ParallelExecutor_cn.rst
--- a/doc/paddle/api/paddle/ParamAttr_cn.rst
+++ b/doc/paddle/api/paddle/ParamAttr_cn.rst
--- a/doc/paddle/api/paddle/static/WeightNormParamAttr_cn.rst
+++ b/doc/paddle/api/paddle/static/WeightNormParamAttr_cn.rst
--- a/doc/paddle/api/paddle/fluid/profiler/cuda_profiler_cn.rst
+++ b/doc/paddle/api/paddle/fluid/profiler/cuda_profiler_cn.rst
+.. _cn_api_fluid_profiler_cuda_profiler:
+cuda_profiler
+-------------------------------
+.. py:function:: paddle.fluid.profiler.cuda_profiler(output_file, output_mode=None, config=None)
+CUDA性能分析器。该分析器通过调用CUDA运行时编程接口，对CUDA程序进行性能分析，并将分析结果写入输出文件output_file。输出格式由output_mode参数控制，性能分析配置选项由config参数控制。得到输出文件后，用户可使用 `NVIDIA Visual Profiler <https://developer.nvidia.com/nvidia-visual-profiler>`_ 工具来加载这个输出文件以获得可视化结果。
+参数:
+  - **output_file** (str) – 输出文件名称, 输出结果将会写入该文件。
+  - **output_mode** (str，可选) – 输出格式，有两种可以选择，分别是 key-value 键值对格式'kvp' 和 逗号分割的格式'csv'（默认格式）。
+  - **config** (list<str>, 可选) – NVIDIA性能分析配置列表，默认值为None时会选择以下配置：['gpustarttimestamp', 'gpuendtimestamp', 'gridsize3d', 'threadblocksize', 'streamid', 'enableonstart 0', 'conckerneltrace']。上述每个配置的含义和更多配置选项，请参考 `Compute Command Line Profiler User Guide <https://developer.download.nvidia.cn/compute/DevZone/docs/html/C/doc/Compute_Command_Line_Profiler_User_Guide.pdf>`_ 。
+抛出异常:
+    - ``ValueError`` -  如果输出格式output_mode不是'kvp'、'csv'两者之一，会抛出异常。
+返回: 无
+**代码示例**
+.. code-block:: python
+    import paddle.fluid as fluid
+    import paddle.fluid.profiler as profiler
+    import numpy as np
+    epoc = 8
+    dshape = [4, 3, 28, 28]
+    data = fluid.data(name='data', shape=[None, 3, 28, 28], dtype='float32')
+    conv = fluid.layers.conv2d(data, 20, 3, stride=[1, 1], padding=[1, 1])
+    place = fluid.CUDAPlace(0)
+    exe = fluid.Executor(place)
+    exe.run(fluid.default_startup_program())
+    output_file = 'cuda_profiler.txt'
+    with profiler.cuda_profiler(output_file, 'csv') as nvprof:
+        for i in range(epoc):
+            input = np.random.random(dshape).astype('float32')
+            exe.run(fluid.default_main_program(), feed={'data': input})
+    # 之后可以使用 NVIDIA Visual Profile 可视化结果
--- a/doc/paddle/api/paddle/fluid/profiler/profiler_cn.rst
+++ b/doc/paddle/api/paddle/fluid/profiler/profiler_cn.rst
+.. _cn_api_fluid_profiler_profiler:
+profiler
+-------------------------------
+.. py:function:: paddle.fluid.profiler.profiler(state, sorted_key=None, profile_path='/tmp/profile', tracer_option='Default')
+通用性能分析器 。与 :ref:`cn_api_fluid_profiler_cuda_profiler` 不同，此分析器可用于分析CPU和GPU程序。
+参数:
+  - **state** (str) –  性能分析状态, 取值为 'CPU' 或 'GPU' 或 'All'。'CPU'表示只分析CPU上的性能；'GPU'表示同时分析CPU和GPU上的性能；'All'表示除了同时分析CPU和GPU上的性能外，还将生成 `性能分析的时间轴信息 <../../advanced_usage/development/profiling/timeline_cn.html>`_ 。
+  - **sorted_key** (str，可选) – 性能分析结果的打印顺序，取值为None、'call'、'total'、'max'、'min'、'ave'之一。默认值为None，表示按照第一次结束时间顺序打印；'call'表示按调用的数量进行排序；'total'表示按总执行时间排序；'max'表示按最大执行时间排序；'min'表示按最小执行时间排序；'ave'表示按平均执行时间排序。
+  - **profile_path** (str，可选) –  如果性能分析状态为'All', 将生成的时间轴信息写入profile_path，默认输出文件为 ``/tmp/profile`` 。
+  - **tracer_option** (str，可选) –   性能分析选项取值为 'Default' 或 'OpDetail' 或 'AllOpDetail', 此选项用于设置性能分析层次并打印不同层次的性能分析结果， `Default` 选项打印不同Op类型的性能分析结果， `OpDetail` 则会打印不同OP类型更详细的性能分析结果，比如compute和data transform。 `AllOpDetail` 和 `OpDetail` 类似，但是打印的是不同Op名字的性能分析结果。
+抛出异常：
+  - ``ValueError`` – 如果state取值不在 ['CPU', 'GPU', 'All']中，或sorted_key取值不在 [None, 'calls', 'total', 'max', 'min', 'ave']中，则抛出异常。
+**代码示例**
+.. code-block:: python
+    import paddle.fluid as fluid
+    import paddle.fluid.profiler as profiler
+    import numpy as np
+    epoc = 8
+    dshape = [4, 3, 28, 28]
+    data = fluid.layers.data(name='data', shape=[3, 28, 28], dtype='float32')
+    conv = fluid.layers.conv2d(data, 20, 3, stride=[1, 1], padding=[1, 1])
+    place = fluid.CPUPlace()
+    exe = fluid.Executor(place)
+    exe.run(fluid.default_startup_program())
+    with profiler.profiler('CPU', 'total', '/tmp/profile') as prof:
+        for i in range(epoc):
+            input = np.random.random(dshape).astype('float32')
+            exe.run(fluid.default_main_program(), feed={'data': input})
+**结果示例**
+.. code-block:: text
+    #### sorted_key = 'total', 'calls', 'max', 'min', 'ave' 结果 ####
+    # 示例结果中，除了Sorted by number of xxx in descending order in the same thread 这句随着sorted_key变化而不同，其余均相同。
+    # 原因是，示例结果中，上述5列都已经按从大到小排列了。
+    ------------------------->     Profiling Report     <-------------------------
+    Place: CPU
+    Time unit: ms
+    Sorted by total time in descending order in the same thread
+    #Sorted by number of calls in descending order in the same thread
+    #Sorted by number of max in descending order in the same thread
+    #Sorted by number of min in descending order in the same thread
+    #Sorted by number of avg in descending order in the same thread
+    Event                       Calls       Total       Min.        Max.        Ave.        Ratio.
+    thread0::conv2d             8           129.406     0.304303    127.076     16.1758     0.983319
+    thread0::elementwise_add    8           2.11865     0.193486    0.525592    0.264832    0.016099
+    thread0::feed               8           0.076649    0.006834    0.024616    0.00958112  0.000582432
+    #### sorted_key = None 结果 ####
+    # 示例结果中，是按照Op结束时间顺序打印，因此打印顺序为feed->conv2d->elementwise_add
+    ------------------------->     Profiling Report     <-------------------------
+    Place: CPU
+    Time unit: ms
+    Sorted by event first end time in descending order in the same thread
+    Event                       Calls       Total       Min.        Max.        Ave.        Ratio.
+    thread0::feed               8           0.077419    0.006608    0.023349    0.00967738  0.00775934
+    thread0::conv2d             8           7.93456     0.291385    5.63342     0.99182     0.795243
+    thread0::elementwise_add    8           1.96555     0.191884    0.518004    0.245693    0.196998
--- a/doc/paddle/api/paddle/fluid/profiler/reset_profiler_cn.rst
+++ b/doc/paddle/api/paddle/fluid/profiler/reset_profiler_cn.rst
+.. _cn_api_fluid_profiler_reset_profiler:
+reset_profiler
+-------------------------------
+.. py:function:: paddle.fluid.profiler.reset_profiler()
+清除之前的性能分析记录。此接口不能和 :ref:`cn_api_fluid_profiler_cuda_profiler` 一起使用 ，但它可以和 :ref:`cn_api_fluid_profiler_start_profiler` 、:ref:`cn_api_fluid_profiler_stop_profiler` 和 :ref:`cn_api_fluid_profiler_profiler` 一起使用。
+**代码示例**
+.. code-block:: python
+    import paddle.fluid as fluid
+    import paddle.fluid.profiler as profiler
+    with profiler.profiler('CPU', 'total', '/tmp/profile'):
+        for iter in range(10):
+            if iter == 2:
+                profiler.reset_profiler()
+            # ...
--- a/doc/paddle/api/paddle/fluid/profiler/start_profiler_cn.rst
+++ b/doc/paddle/api/paddle/fluid/profiler/start_profiler_cn.rst
+.. _cn_api_fluid_profiler_start_profiler:
+start_profiler
+-------------------------------
+.. py:function:: paddle.fluid.profiler.start_profiler(state, tracer_option='Default')
+激活使用性能分析器。除了 :ref:`cn_api_fluid_profiler_profiler` 外，用户还可以使用 :ref:`cn_api_fluid_profiler_start_profiler` 和 :ref:`cn_api_fluid_profiler_stop_profiler` 来激活和停止使用性能分析器。
+参数:
+  - **state** (str) –  性能分析状态, 取值为 'CPU' 或 'GPU' 或 'All'。'CPU'表示只分析CPU上的性能；'GPU'表示同时分析CPU和GPU上的性能；'All'表示除了同时分析CPU和GPU上的性能外，还将生成性能分析的时间轴信息 :ref:`fluid_timeline` 。
+  - **tracer_option** (str，可选) –   性能分析选项取值为 'Default' 或 'OpDetail' 或 'AllOpDetail', 此选项用于设置性能分析层次并打印不同层次的性能分析结果， `Default` 选项打印不同Op类型的性能分析结果， `OpDetail` 则会打印不同OP类型更详细的性能分析结果，比如compute和data transform。 `AllOpDetail` 和 `OpDetail` 类似，但是打印的是不同Op名字的性能分析结果。
+抛出异常:
+  - ``ValueError`` – 如果state取值不在 ['CPU', 'GPU', 'All']中或者tracer_option取值不在['Default', 'OpDetail', 'AllOpDetail']中，则抛出异常
+**代码示例**
+.. code-block:: python
+    import paddle.fluid as fluid
+    import paddle.fluid.profiler as profiler
+    profiler.start_profiler('GPU')
+    for iter in range(10):
+        if iter == 2:
+            profiler.reset_profiler()
+        # except each iteration
+    profiler.stop_profiler('total', '/tmp/profile')
--- a/doc/paddle/api/paddle/fluid/profiler/stop_profiler_cn.rst
+++ b/doc/paddle/api/paddle/fluid/profiler/stop_profiler_cn.rst
+.. _cn_api_fluid_profiler_stop_profiler:
+stop_profiler
+-------------------------------
+.. py:function:: paddle.fluid.profiler.stop_profiler(sorted_key=None, profile_path='/tmp/profile')
+停止使用性能分析器。除了 :ref:`cn_api_fluid_profiler_profiler` 外，用户还可以使用 :ref:`cn_api_fluid_profiler_start_profiler` 和 :ref:`cn_api_fluid_profiler_stop_profiler` 来激活和停止使用性能分析器。
+参数:
+  - **sorted_key** (str，可选) – 性能分析结果的打印顺序，取值为None、'call'、'total'、'max'、'min'、'ave'之一。默认值为None，表示按照第一次结束时间顺序打印；'call'表示按调用的数量进行排序；'total'表示按总执行时间排序；'max'表示按最大执行时间排序；'min'表示按最小执行时间排序；'ave'表示按平均执行时间排序。
+  - **profile_path** (str，可选) –  如果性能分析状态为'All', 将生成的时间轴信息写入profile_path，默认输出文件为 ``/tmp/profile`` 。
+抛出异常:
+  - ``ValueError`` – 如果sorted_key取值不在 [None, 'calls', 'total', 'max', 'min', 'ave']中，则抛出异常。
+**代码示例**
+.. code-block:: python
+    import paddle.fluid as fluid
+    import paddle.fluid.profiler as profiler
+    profiler.start_profiler('GPU')
+    for iter in range(10):
+        if iter == 2:
+            profiler.reset_profiler()
+            # except each iteration
+    profiler.stop_profiler('total', '/tmp/profile')
--- a/doc/paddle/api/paddle/fluid/profiler_cn.rst
+++ b/doc/paddle/api/paddle/fluid/profiler_cn.rst
+.. _cn_api_fluid_profiler_profiler:
+profiler
+-------------------------------
+.. py:function:: paddle.fluid.profiler.profiler(state, sorted_key=None, profile_path='/tmp/profile', tracer_option='Default')
+通用性能分析器 。与 :ref:`cn_api_fluid_profiler_cuda_profiler` 不同，此分析器可用于分析CPU和GPU程序。
+参数:
+  - **state** (str) –  性能分析状态, 取值为 'CPU' 或 'GPU' 或 'All'。'CPU'表示只分析CPU上的性能；'GPU'表示同时分析CPU和GPU上的性能；'All'表示除了同时分析CPU和GPU上的性能外，还将生成 `性能分析的时间轴信息 <../../advanced_usage/development/profiling/timeline_cn.html>`_ 。
+  - **sorted_key** (str，可选) – 性能分析结果的打印顺序，取值为None、'call'、'total'、'max'、'min'、'ave'之一。默认值为None，表示按照第一次结束时间顺序打印；'call'表示按调用的数量进行排序；'total'表示按总执行时间排序；'max'表示按最大执行时间排序；'min'表示按最小执行时间排序；'ave'表示按平均执行时间排序。
+  - **profile_path** (str，可选) –  如果性能分析状态为'All', 将生成的时间轴信息写入profile_path，默认输出文件为 ``/tmp/profile`` 。
+  - **tracer_option** (str，可选) –   性能分析选项取值为 'Default' 或 'OpDetail' 或 'AllOpDetail', 此选项用于设置性能分析层次并打印不同层次的性能分析结果， `Default` 选项打印不同Op类型的性能分析结果， `OpDetail` 则会打印不同OP类型更详细的性能分析结果，比如compute和data transform。 `AllOpDetail` 和 `OpDetail` 类似，但是打印的是不同Op名字的性能分析结果。
+抛出异常：
+  - ``ValueError`` – 如果state取值不在 ['CPU', 'GPU', 'All']中，或sorted_key取值不在 [None, 'calls', 'total', 'max', 'min', 'ave']中，则抛出异常。
+**代码示例**
+.. code-block:: python
+    import paddle.fluid as fluid
+    import paddle.fluid.profiler as profiler
+    import numpy as np
+    epoc = 8
+    dshape = [4, 3, 28, 28]
+    data = fluid.layers.data(name='data', shape=[3, 28, 28], dtype='float32')
+    conv = fluid.layers.conv2d(data, 20, 3, stride=[1, 1], padding=[1, 1])
+    place = fluid.CPUPlace()
+    exe = fluid.Executor(place)
+    exe.run(fluid.default_startup_program())
+    with profiler.profiler('CPU', 'total', '/tmp/profile') as prof:
+        for i in range(epoc):
+            input = np.random.random(dshape).astype('float32')
+            exe.run(fluid.default_main_program(), feed={'data': input})
+**结果示例**
+.. code-block:: text
+    #### sorted_key = 'total', 'calls', 'max', 'min', 'ave' 结果 ####
+    # 示例结果中，除了Sorted by number of xxx in descending order in the same thread 这句随着sorted_key变化而不同，其余均相同。
+    # 原因是，示例结果中，上述5列都已经按从大到小排列了。
+    ------------------------->     Profiling Report     <-------------------------
+    Place: CPU
+    Time unit: ms
+    Sorted by total time in descending order in the same thread
+    #Sorted by number of calls in descending order in the same thread
+    #Sorted by number of max in descending order in the same thread
+    #Sorted by number of min in descending order in the same thread
+    #Sorted by number of avg in descending order in the same thread
+    Event                       Calls       Total       Min.        Max.        Ave.        Ratio.
+    thread0::conv2d             8           129.406     0.304303    127.076     16.1758     0.983319
+    thread0::elementwise_add    8           2.11865     0.193486    0.525592    0.264832    0.016099
+    thread0::feed               8           0.076649    0.006834    0.024616    0.00958112  0.000582432
+    #### sorted_key = None 结果 ####
+    # 示例结果中，是按照Op结束时间顺序打印，因此打印顺序为feed->conv2d->elementwise_add
+    ------------------------->     Profiling Report     <-------------------------
+    Place: CPU
+    Time unit: ms
+    Sorted by event first end time in descending order in the same thread
+    Event                       Calls       Total       Min.        Max.        Ave.        Ratio.
+    thread0::feed               8           0.077419    0.006608    0.023349    0.00967738  0.00775934
+    thread0::conv2d             8           7.93456     0.291385    5.63342     0.99182     0.795243
+    thread0::elementwise_add    8           1.96555     0.191884    0.518004    0.245693    0.196998
--- a/doc/paddle/api/paddle/fluid/reader/PyReader_cn.rst
+++ b/doc/paddle/api/paddle/fluid/reader/PyReader_cn.rst
+.. _cn_api_fluid_io_PyReader:
+PyReader
+-------------------------------
+.. py:class:: paddle.fluid.io.PyReader(feed_list=None, capacity=None, use_double_buffer=True, iterable=True, return_list=False)
+在python中为数据输入创建一个reader对象。将使用python线程预取数据，并将其异步插入队列。当调用Executor.run（…）时，将自动提取队列中的数据。 
+参数:
+    - **feed_list** (list(Variable)|tuple(Variable)) - feed变量列表，由 ``fluid.layers.data()`` 创建。
+    - **capacity** (int) - PyReader对象内部维护队列的容量大小。单位是batch数量。若reader读取速度较快，建议设置较大的capacity值。
+    - **use_double_buffer** (bool) - 是否使用 ``double_buffer_reader`` 。若use_double_buffer=True，PyReader会异步地预读取下一个batch的数据，可加速数据读取过程，但同时会占用少量的CPU/GPU存储，即一个batch输入数据的存储空间。
+    - **iterable** (bool) - 所创建的DataLoader对象是否可迭代。
+    - **return_list** (bool) - 每个设备上的数据是否以list形式返回。仅在iterable = True模式下有效。若return_list = False，每个设备上的返回数据均是str -> LoDTensor的映射表，其中映射表的key是每个输入变量的名称。若return_list = True，则每个设备上的返回数据均是list(LoDTensor)。推荐在静态图模式下使用return_list = False，在动态图模式下使用return_list = True。
+返回: 被创建的reader对象
+返回类型： reader (Reader)
+**代码示例**
+1.如果iterable=False，则创建的PyReader对象几乎与 ``fluid.layers.py_reader（）`` 相同。算子将被插入program中。用户应该在每个epoch之前调用 ``start（）`` ，并在epoch结束时捕获 ``Executor.run（）`` 抛出的 ``fluid.core.EOFException`` 。一旦捕获到异常，用户应该调用 ``reset（）`` 手动重置reader。
+.. code-block:: python
+    import paddle
+    import paddle.fluid as fluid
+    import numpy as np
+    EPOCH_NUM = 3
+    ITER_NUM = 5
+    BATCH_SIZE = 3
+    def network(image, label):
+        # 用户定义网络，此处以softmax回归为例
+        predict = fluid.layers.fc(input=image, size=10, act='softmax')
+        return fluid.layers.cross_entropy(input=predict, label=label) 
+    def reader_creator_random_image_and_label(height, width):
+        def reader():
+            for i in range(ITER_NUM):
+                fake_image = np.random.uniform(low=0,
+                                               high=255,
+                                               size=[height, width])
+                fake_label = np.ones([1])
+                yield fake_image, fake_label
+        return reader
+    image = fluid.layers.data(name='image', shape=[784, 784], dtype='float32')
+    label = fluid.layers.data(name='label', shape=[1], dtype='int64')
+    reader = fluid.io.PyReader(feed_list=[image, label],
+                               capacity=4,
+                               iterable=False)
+    user_defined_reader = reader_creator_random_image_and_label(784, 784)
+    reader.decorate_sample_list_generator(
+        paddle.batch(user_defined_reader, batch_size=BATCH_SIZE))
+    loss = network(image, label)
+    executor = fluid.Executor(fluid.CPUPlace())
+    executor.run(fluid.default_startup_program())
+    for i in range(EPOCH_NUM):
+        reader.start()
+        while True:
+            try:
+                executor.run(feed=None)
+            except fluid.core.EOFException:
+                reader.reset()
+                break
+2.如果iterable=True，则创建的PyReader对象与程序分离。程序中不会插入任何算子。在本例中，创建的reader是一个python生成器，它是可迭代的。用户应将从PyReader对象生成的数据输入 ``Executor.run(feed=...)`` 。
+.. code-block:: python
+   import paddle
+   import paddle.fluid as fluid
+   import numpy as np
+   EPOCH_NUM = 3
+   ITER_NUM = 5
+   BATCH_SIZE = 10
+   def network(image, label):
+        # 用户定义网络，此处以softmax回归为例
+        predict = fluid.layers.fc(input=image, size=10, act='softmax')
+        return fluid.layers.cross_entropy(input=predict, label=label)   
+   def reader_creator_random_image(height, width):
+       def reader():
+           for i in range(ITER_NUM):
+               fake_image = np.random.uniform(low=0, high=255, size=[height, width]),
+               fake_label = np.ones([1])
+               yield fake_image, fake_label
+       return reader
+   image = fluid.layers.data(name='image', shape=[784, 784], dtype='float32')
+   label = fluid.layers.data(name='label', shape=[1], dtype='int64')
+   reader = fluid.io.PyReader(feed_list=[image, label], capacity=4, iterable=True, return_list=False)
+   user_defined_reader = reader_creator_random_image(784, 784)
+   reader.decorate_sample_list_generator(
+       paddle.batch(user_defined_reader, batch_size=BATCH_SIZE),
+       fluid.core.CPUPlace())
+   loss = network(image, label)
+   executor = fluid.Executor(fluid.CPUPlace())
+   executor.run(fluid.default_startup_program())
+   for _ in range(EPOCH_NUM):
+       for data in reader():
+           executor.run(feed=data, fetch_list=[loss])
+3. return_list=True，返回值将用list表示而非dict，通常用于动态图模式中。
+.. code-block:: python
+    import paddle
+    import paddle.fluid as fluid
+    import numpy as np
+    EPOCH_NUM = 3
+    ITER_NUM = 5
+    BATCH_SIZE = 10
+    def reader_creator_random_image(height, width):
+        def reader():
+            for i in range(ITER_NUM):
+                yield np.random.uniform(low=0, high=255, size=[height, width]), \
+                    np.random.random_integers(low=0, high=9, size=[1])
+        return reader
+    place = fluid.CPUPlace()
+    with fluid.dygraph.guard(place):
+        py_reader = fluid.io.PyReader(capacity=2, return_list=True)
+        user_defined_reader = reader_creator_random_image(784, 784)
+        py_reader.decorate_sample_list_generator(
+            paddle.batch(user_defined_reader, batch_size=BATCH_SIZE),
+            place)
+        for image, label in py_reader():
+            relu = fluid.layers.relu(image)
+.. py:method:: start()
+启动数据输入线程。只能在reader对象不可迭代时调用。
+**代码示例**
+.. code-block:: python
+  import paddle
+  import paddle.fluid as fluid
+  import numpy as np
+  BATCH_SIZE = 10
+  def generator():
+    for i in range(5):
+       yield np.random.uniform(low=0, high=255, size=[784, 784]),
+  image = fluid.layers.data(name='image', shape=[784, 784], dtype='float32')
+  reader = fluid.io.PyReader(feed_list=[image], capacity=4, iterable=False)
+  reader.decorate_sample_list_generator(
+    paddle.batch(generator, batch_size=BATCH_SIZE))
+  executor = fluid.Executor(fluid.CPUPlace())
+  executor.run(fluid.default_startup_program())
+  for i in range(3):
+    reader.start()
+    while True:
+        try:
+            executor.run(feed=None)
+        except fluid.core.EOFException:
+            reader.reset()
+            break
+.. py:method:: reset()
+当 ``fluid.core.EOFException`` 抛出时重置reader对象。只能在reader对象不可迭代时调用。
+**代码示例**
+.. code-block:: python
+            import paddle
+            import paddle.fluid as fluid
+            import numpy as np
+            BATCH_SIZE = 10
+            def generator():
+                for i in range(5):
+                    yield np.random.uniform(low=0, high=255, size=[784, 784]),
+            image = fluid.layers.data(name='image', shape=[784, 784], dtype='float32')
+            reader = fluid.io.PyReader(feed_list=[image], capacity=4, iterable=False)
+            reader.decorate_sample_list_generator(
+                paddle.batch(generator, batch_size=BATCH_SIZE))
+            executor = fluid.Executor(fluid.CPUPlace())
+            executor.run(fluid.default_startup_program())
+            for i in range(3):
+                reader.start()
+                while True:
+                    try:
+                        executor.run(feed=None)
+                    except fluid.core.EOFException:
+                        reader.reset()
+                        break
+.. py:method:: decorate_sample_generator(sample_generator, batch_size, drop_last=True, places=None)
+设置PyReader对象的数据源。
+提供的 ``sample_generator`` 应该是一个python生成器，它生成的数据类型应为list(numpy.ndarray)。
+当PyReader对象可迭代时，必须设置 ``places`` 。
+如果所有的输入都没有LOD，这个方法比 ``decorate_sample_list_generator(paddle.batch(sample_generator, ...))`` 更快。
+参数:
+  - **sample_generator** (generator)  – Python生成器，yield 类型为list(numpy.ndarray)
+  - **batch_size** (int) – batch size，必须大于0
+  - **drop_last** (bool) – 当样本数小于batch数量时，是否删除最后一个batch
+  - **places** (None|list(CUDAPlace)|list(CPUPlace)) –  位置列表。当PyReader可迭代时必须被提供
+**代码示例**
+.. code-block:: python
+            import paddle.fluid as fluid
+            import numpy as np
+            EPOCH_NUM = 3
+            ITER_NUM = 15
+            BATCH_SIZE = 3
+            def network(image, label):
+                # 用户定义网络，此处以softmax回归为例
+                predict = fluid.layers.fc(input=image, size=10, act='softmax')
+                return fluid.layers.cross_entropy(input=predict, label=label)    
+            def random_image_and_label_generator(height, width):
+                def generator():
+                    for i in range(ITER_NUM):
+                        fake_image = np.random.uniform(low=0,
+                                                       high=255,
+                                                       size=[height, width])
+                        fake_label = np.array([1])
+                        yield fake_image, fake_label
+                return generator
+            image = fluid.layers.data(name='image', shape=[784, 784], dtype='float32')
+            label = fluid.layers.data(name='label', shape=[1], dtype='int64')
+            reader = fluid.io.PyReader(feed_list=[image, label], capacity=4, iterable=True)
+            user_defined_generator = random_image_and_label_generator(784, 784)
+            reader.decorate_sample_generator(user_defined_generator,
+                                             batch_size=BATCH_SIZE,
+                                             places=[fluid.CPUPlace()])
+            loss = network(image, label)
+            executor = fluid.Executor(fluid.CPUPlace())
+            executor.run(fluid.default_startup_program())
+            for _ in range(EPOCH_NUM):
+                for data in reader():
+                    executor.run(feed=data, fetch_list=[loss])
+.. py:method:: decorate_sample_list_generator(reader, places=None)
+设置PyReader对象的数据源。
+提供的 ``reader`` 应该是一个python生成器，它生成列表（numpy.ndarray）类型的批处理数据。
+当PyReader对象不可迭代时，必须设置 ``places`` 。
+参数:
+  - **reader** (generator)  – 返回列表（numpy.ndarray）类型的批处理数据的Python生成器
+  - **places** (None|list(CUDAPlace)|list(CPUPlace)) –  位置列表。当PyReader可迭代时必须被提供
+**代码示例**
+.. code-block:: python
+            import paddle
+            import paddle.fluid as fluid
+            import numpy as np
+            EPOCH_NUM = 3
+            ITER_NUM = 15
+            BATCH_SIZE = 3
+            def network(image, label):
+                # 用户定义网络，此处以softmax回归为例
+                predict = fluid.layers.fc(input=image, size=10, act='softmax')
+                return fluid.layers.cross_entropy(input=predict, label=label)
+            def random_image_and_label_generator(height, width):
+                def generator():
+                    for i in range(ITER_NUM):
+                        fake_image = np.random.uniform(low=0,
+                                                       high=255,
+                                                       size=[height, width])
+                        fake_label = np.ones([1])
+                        yield fake_image, fake_label
+                return generator
+            image = fluid.layers.data(name='image', shape=[784, 784], dtype='float32')
+            label = fluid.layers.data(name='label', shape=[1], dtype='int64')
+            reader = fluid.io.PyReader(feed_list=[image, label], capacity=4, iterable=True)
+            user_defined_generator = random_image_and_label_generator(784, 784)
+            reader.decorate_sample_list_generator(
+                paddle.batch(user_defined_generator, batch_size=BATCH_SIZE),
+                fluid.core.CPUPlace())
+            loss = network(image, label)
+            executor = fluid.Executor(fluid.core.CPUPlace())
+            executor.run(fluid.default_startup_program())
+            for _ in range(EPOCH_NUM):
+                for data in reader():
+                    executor.run(feed=data, fetch_list=[loss])
+.. py:method:: decorate_batch_generator(reader, places=None)
+设置PyReader对象的数据源。
+提供的 ``reader`` 应该是一个python生成器，它生成列表（numpy.ndarray）类型或LoDTensor类型的批处理数据。
+当PyReader对象不可迭代时，必须设置 ``places`` 。
+参数:
+  - **reader** (generator)  – 返回LoDTensor类型的批处理数据的Python生成器
+  - **places** (None|list(CUDAPlace)|list(CPUPlace)) –  位置列表。当PyReader可迭代时必须被提供
+**代码示例**
+.. code-block:: python
+            import paddle.fluid as fluid
+            import numpy as np
+            EPOCH_NUM = 3
+            ITER_NUM = 15
+            BATCH_SIZE = 3
+            def network(image, label):
+                # 用户定义网络，此处以softmax回归为例
+                predict = fluid.layers.fc(input=image, size=10, act='softmax')
+                return fluid.layers.cross_entropy(input=predict, label=label)
+            def random_image_and_label_generator(height, width):
+                def generator():
+                    for i in range(ITER_NUM):
+                        batch_image = np.random.uniform(low=0,
+                                                        high=255,
+                                                        size=[BATCH_SIZE, height, width])
+                        batch_label = np.ones([BATCH_SIZE, 1])
+                        batch_image = batch_image.astype('float32')
+                        batch_label = batch_label.astype('int64')
+                        yield batch_image, batch_label
+                return generator
+            image = fluid.layers.data(name='image', shape=[784, 784], dtype='float32')
+            label = fluid.layers.data(name='label', shape=[1], dtype='int64')
+            reader = fluid.io.PyReader(feed_list=[image, label], capacity=4, iterable=True)
+            user_defined_generator = random_image_and_label_generator(784, 784)
+            reader.decorate_batch_generator(user_defined_generator, fluid.CPUPlace())
+            loss = network(image, label)
+            executor = fluid.Executor(fluid.CPUPlace())
+            executor.run(fluid.default_startup_program())
+            for _ in range(EPOCH_NUM):
+                for data in reader():
+                    executor.run(feed=data, fetch_list=[loss])
+.. py:method:: next()
+获取下一个数据。用户不应直接调用此方法。此方法用于PaddlePaddle框架内部实现Python 2.x的迭代器协议。
--- a/doc/paddle/api/paddle/fluid/regularizer/L1DecayRegularizer_cn.rst
+++ b/doc/paddle/api/paddle/fluid/regularizer/L1DecayRegularizer_cn.rst
+.. _cn_api_fluid_regularizer_L1DecayRegularizer:
+L1DecayRegularizer
+-------------------------------
+.. py:class:: paddle.fluid.regularizer.L1DecayRegularizer(regularization_coeff=0.0)
+**注意：paddle.fluid.regularizer.L1DecayRegularizer是paddle.fluid.regularizer.L1Decay的别名，推荐使用paddle.fluid.regularizer.L1Decay。**
+详见 :ref:`cn_api_fluid_regularizer_L1Decay` 接口的使用文档。
--- a/doc/paddle/api/paddle/fluid/regularizer/L1Decay_cn.rst
+++ b/doc/paddle/api/paddle/fluid/regularizer/L1Decay_cn.rst
+.. _cn_api_fluid_regularizer_L1Decay:
+L1Decay
+-------------------------------
+.. py:attribute::   paddle.fluid.regularizer.L1Decay(regularization_coeff=0.0)
+L1Decay实现L1权重衰减正则化，用于模型训练，使得权重矩阵稀疏。
+该类生成的实例对象，需要设置在 :ref:`cn_api_fluid_ParamAttr` 或者 ``optimizer`` 
+(例如 :ref:`cn_api_fluid_optimizer_SGDOptimizer` )中，在 ``ParamAttr`` 中设置时，
+只对该网络层中的参数生效；在 ``optimizer`` 中设置时，会对所有的参数生效；如果同时设置，
+在 ``ParamAttr`` 中设置的优先级会高于在 ``optimizer`` 中设置。
+具体实现中，L1权重衰减正则化的计算公式如下：
+.. math::
+            \\L1WeightDecay=reg\_coeff∗sign(parameter)\\
+参数：
+  - **regularization_coeff** (float) – L1正则化系数，默认值为0.0。
+**代码示例1**
+.. code-block:: python
+    import paddle.fluid as fluid
+    main_prog = fluid.Program()
+    startup_prog = fluid.Program()
+    with fluid.program_guard(main_prog, startup_prog):
+        data = fluid.layers.data(name='image', shape=[3, 28, 28], dtype='float32')
+        label = fluid.layers.data(name='label', shape=[1], dtype='int64')
+        hidden = fluid.layers.fc(input=data, size=128, act='relu')
+        prediction = fluid.layers.fc(input=hidden, size=10, act='softmax')
+        loss = fluid.layers.cross_entropy(input=prediction, label=label)
+        avg_loss = fluid.layers.mean(loss)
+    optimizer = fluid.optimizer.Adagrad(
+        learning_rate=1e-4,
+        regularization=fluid.regularizer.L1Decay(
+            regularization_coeff=0.1))
+    optimizer.minimize(avg_loss)
+**代码示例2**
+.. code-block:: python
+    # 在 ParamAttr 和 optimizer 中同时设置正则化
+    import paddle.fluid as fluid
+    l1 = fluid.regularizer.L1Decay(regularization_coeff=0.1)
+    l2 = fluid.regularizer.L2Decay(regularization_coeff=0.1)
+    x = fluid.layers.uniform_random([3,4])
+    # 在ParamAttr中设置L1正则化
+    w_param = fluid.ParamAttr(regularizer=l1)
+    hidden1 = fluid.layers.fc(x, 8, param_attr=w_param)    # fc_0.w_0(L1), fc_0.b_0
+    hidden2 = fluid.layers.fc(hidden1, 16, param_attr=w_param)   # fc_1.w_0(L1), fc_1.b_0
+    predict = fluid.layers.fc(hidden2, 32)     # fc_3.w_0, fc_3.b_0
+    avg_loss = fluid.layers.mean(predict)
+    # 在optimizer中设置L2正则化
+    optimizer = fluid.optimizer.SGD(learning_rate=1e-4, regularization=l2)
+    optimizer.minimize(avg_loss)
+    # 将会打印出提示信息:
+    # Regularization of [fc_0.w_0, fc_1.w_0] have been set by ParamAttr or WeightNormParamAttr already. 
+    # So, the Regularization of Optimizer will not take effect for these parameters!
--- a/doc/paddle/api/paddle/fluid/regularizer/L2DecayRegularizer_cn.rst
+++ b/doc/paddle/api/paddle/fluid/regularizer/L2DecayRegularizer_cn.rst
+.. _cn_api_fluid_regularizer_L2DecayRegularizer:
+L2DecayRegularizer
+-------------------------------
+.. py:class:: paddle.fluid.regularizer.L2DecayRegularizer(regularization_coeff=0.0)
+**注意：paddle.fluid.regularizer.L2DecayRegularizer是paddle.fluid.regularizer.L2Decay的别名，推荐使用paddle.fluid.regularizer.L2Decay。**
+详见 :ref:`cn_api_fluid_regularizer_L2Decay` 接口的使用文档。
--- a/doc/paddle/api/paddle/fluid/regularizer/L2Decay_cn.rst
+++ b/doc/paddle/api/paddle/fluid/regularizer/L2Decay_cn.rst
+.. _cn_api_fluid_regularizer_L2Decay:
+L2Decay
+-------------------------------
+.. py:attribute::   paddle.fluid.regularizer.L2Decay
+L2Decay实现L2权重衰减正则化，用于模型训练，有助于防止模型对训练数据过拟合。
+该类生成的实例对象，需要设置在 :ref:`cn_api_fluid_ParamAttr` 或者 ``optimizer`` 
+(例如 :ref:`cn_api_fluid_optimizer_SGDOptimizer` )中，在 ``ParamAttr`` 中设置时，
+只对该网络层中的参数生效；在 ``optimizer`` 中设置时，会对所有的参数生效；如果同时设置，
+在 ``ParamAttr`` 中设置的优先级会高于在 ``optimizer`` 中设置。
+具体实现中，L2权重衰减正则化的计算公式如下：
+.. math::
+            \\L2WeightDecay=reg\_coeff*parameter\\
+参数:
+  - **regularization_coeff** (float) – 正则化系数，默认值为0.0。
+**代码示例1**
+.. code-block:: python
+    import paddle.fluid as fluid
+    main_prog = fluid.Program()
+    startup_prog = fluid.Program()
+    with fluid.program_guard(main_prog, startup_prog):
+        data = fluid.layers.data(name='image', shape=[3, 28, 28], dtype='float32')
+        label = fluid.layers.data(name='label', shape=[1], dtype='int64')
+        hidden = fluid.layers.fc(input=data, size=128, act='relu')
+        prediction = fluid.layers.fc(input=hidden, size=10, act='softmax')
+        loss = fluid.layers.cross_entropy(input=prediction, label=label)
+        avg_loss = fluid.layers.mean(loss)
+    optimizer = fluid.optimizer.Adagrad(
+        learning_rate=1e-4,
+        regularization=fluid.regularizer.L2Decay(
+            regularization_coeff=0.1))
+    optimizer.minimize(avg_loss)
+**代码示例2**
+.. code-block:: python
+    # 在 ParamAttr 和 optimizer 中同时设置正则化
+    import paddle.fluid as fluid
+    l1 = fluid.regularizer.L1Decay(regularization_coeff=0.1)
+    l2 = fluid.regularizer.L2Decay(regularization_coeff=0.1)
+    x = fluid.layers.uniform_random([3,4])
+    # 在ParamAttr中设置L1正则化
+    w_param = fluid.ParamAttr(regularizer=l1)
+    hidden1 = fluid.layers.fc(x, 8, param_attr=w_param)    # fc_0.w_0(L1), fc_0.b_0
+    hidden2 = fluid.layers.fc(hidden1, 16, param_attr=w_param)  # fc_1.w_0(L1), fc_1.b_0
+    predict = fluid.layers.fc(hidden2, 32)    # fc_3.w_0, fc_3.b_0
+    avg_loss = fluid.layers.mean(predict)
+    # 在optimizer中设置L2正则化
+    optimizer = fluid.optimizer.SGD(learning_rate=1e-4, regularization=l2)
+    optimizer.minimize(avg_loss)
+    # 将会打印出提示信息:
+    # Regularization of [fc_0.w_0, fc_1.w_0] have been set by ParamAttr or WeightNormParamAttr already. 
+    # So, the Regularization of Optimizer will not take effect for these parameters!
--- a/doc/paddle/api/paddle/fluid/release_memory_cn.rst
+++ b/doc/paddle/api/paddle/fluid/release_memory_cn.rst
+.. _cn_api_fluid_release_memory:
+release_memory
+-------------------------------
+.. py:function:: paddle.fluid.release_memory(input_program, skip_opt_set=None)
+:api_attr: 声明式编程模式（静态图)
+**从1.6版本开始此接口不再推荐使用，请不要在新写的代码中使用它，1.6+版本已默认开启更优的存储优化策略**
--- a/doc/paddle/api/paddle/fluid/require_version_cn.rst
+++ b/doc/paddle/api/paddle/fluid/require_version_cn.rst
+.. _cn_api_fluid_require_version:
+require_version
+-------------------------------
+.. py:function:: paddle.fluid.require_version(min_version, max_version=None)
+该接口用于检查已安装的飞桨版本是否介于[``min_version``, ``max_version``]之间（包含 ``min_version`` 和 ``max_version`` ），如果已安装的版本低于 ``min_version`` 或者高于 ``max_version`` ，将会抛出异常。该接口无返回值。
+参数:
+    - **min_version** (str) - 指定所需要的最低版本（如‘1.4.0’）
+    - **max_version** (str, optional) – 指定可接受的最高版本（如‘1.7.0’），默认值None，表示任意大于等于 ``min_version`` 的版本都可以接受。
+返回：无
+抛出异常:
+  - ``TypeError`` – ``min_version`` 的类型不是str。
+  - ``TypeError`` – ``max_version`` 的类型不是str或type(None)。
+  - ``ValueError`` – ``min_version`` 的值不是正常的版本号格式。
+  - ``ValueError`` – ``max_version`` 的值不是正常的版本号格式或None。
+  - ``Exception`` – 已安装的版本低于 ``min_version`` 或者高于 ``max_version`` 。
+**代码示例**：
+.. code-block:: python
+        import paddle.fluid as fluid
+        # 任何大于等于0.1.0的版本都可以接受
+        fluid.require_version('0.1.0')
+        # 只接受介于0.1.0和10.0.0之间的版本（包含0.1.0和10.0.0）
+        fluid.require_version(min_version='0.1.0', max_version='10.0.0')
--- a/doc/paddle/api/paddle/save_cn.rst
+++ b/doc/paddle/api/paddle/save_cn.rst
--- a/doc/paddle/api/paddle/fluid/set_flags_cn.rst
+++ b/doc/paddle/api/paddle/fluid/set_flags_cn.rst
+.. _cn_api_fluid_set_flags:
+set_flags
+-------------------------------
+.. py:function:: paddle.fluid.set_flags(flags)
+用于设置Paddle框架中环境变量FLAGS的值。
+参数：
+    - **flags** (dict) - 包含想要设置的环境变量FLAGS的名称和值的字典。
+**示例代码**
+.. code-block:: python
+    import paddle.fluid as fluid
+    fluid.set_flags({'FLAGS_eager_delete_tensor_gb': 1.0})
--- a/doc/paddle/api/paddle/fluid/transpiler/HashName_cn.rst
+++ b/doc/paddle/api/paddle/fluid/transpiler/HashName_cn.rst
+.. _cn_api_fluid_transpiler_HashName:
+HashName
+-------------------------------
+.. py:class:: paddle.fluid.transpiler.HashName(pserver_endpoints)
+:api_attr: 声明式编程模式（静态图)
+该方法使用 python ``Hash()`` 函数将变量散列到多个parameter server节点。
+参数:
+  - **pserver_endpoints** (list) - endpoint （ip:port）的 list 
+返回：实例化后的HashName的对象
+返回类型：HashName
+**代码示例**
+.. code-block:: python
+          import paddle.fluid.transpiler.HashName as HashName
+          pserver_endpoints = [“127.0.0.1:6007”, “127.0.0.1:6008”]
+          vars = [“var1”,”var2”,”var3”,”var4”,”var5”]
+          rr = HashName(pserver_endpoints)
+          rr.dispatch(vars)
+.. py:method:: reset()
+该方法将重置HashName内置的计数， 计数将重置为0。
+返回：无。
+**代码示例**
+.. code-block:: python
+          import paddle.fluid.transpiler.HashName as HashName 
+          pserver_endpoints = [“127.0.0.1:6007”, “127.0.0.1:6008”]
+          vars = [“var1”,”var2”,”var3”,”var4”,”var5”]
+          rr = HashName(pserver_endpoints)
+          rr.reset()
--- a/doc/paddle/api/paddle/fluid/unique_name/generate_cn.rst
+++ b/doc/paddle/api/paddle/fluid/unique_name/generate_cn.rst
+.. _cn_api_fluid_unique_name_generate:
+generate
+-------------------------------
+.. py:function:: paddle.fluid.unique_name.generate(key)
+该接口产生以前缀key开头的唯一名称。目前，Paddle通过从0开始的编号对相同前缀key的名称进行区分。例如，使用key=fc连续调用该接口会产生fc_0, fc_1, fc_2等不同名称。
+参数:
+  - **key** (str) - 产生的唯一名称的前缀。
+返回：含前缀key的唯一名称。
+返回类型：str。
+**代码示例**
+.. code-block:: python
+        import paddle.fluid as fluid
+        name1 = fluid.unique_name.generate('fc')
+        name2 = fluid.unique_name.generate('fc')
+        print(name1, name2)  # fc_0, fc_1 
--- a/doc/paddle/api/paddle/fluid/unique_name/guard_cn.rst
+++ b/doc/paddle/api/paddle/fluid/unique_name/guard_cn.rst
+.. _cn_api_fluid_dygraph_guard:
+guard
+-------------------------------
+.. py:function:: paddle.fluid.dygraph.guard(place=None)
+:api_attr: 命令式编程模式（动态图)
+通过with语句创建一个dygraph运行的context，执行context代码。
+参数：
+    - **place** (fluid.CPUPlace|fluid.CUDAPlace, 可选) –  动态图执行的设备，可以选择cpu，gpu，如果用户未制定，则根据用户paddle编译的方式来选择运行的设备，如果编译的cpu版本，则在cpu上运行，如果是编译的gpu版本，则在gpu上运行。默认值：None。
+返回： None
+**代码示例**
+.. code-block:: python
+    import numpy as np
+    import paddle.fluid as fluid
+    with fluid.dygraph.guard():
+        inp = np.ones([3, 1024], dtype='float32')
+        t = fluid.dygraph.base.to_variable(inp)
+        linear1 = fluid.Linear(1024, 4, bias_attr=False)
+        linear2 = fluid.Linear(4, 4)
+        ret = linear1(t)
+        dy_ret = linear2(ret)
--- a/doc/paddle/api/paddle/fluid/unique_name/switch_cn.rst
+++ b/doc/paddle/api/paddle/fluid/unique_name/switch_cn.rst
+.. _cn_api_fluid_unique_name_switch:
+switch
+-------------------------------
+.. py:function:: paddle.fluid.unique_name.switch(new_generator=None)
+该接口将当前上下文的命名空间切换到新的命名空间。该接口与guard接口都可用于更改命名空间，推荐使用guard接口，配合with语句管理命名空间上下文。
+参数:
+  - **new_generator** (UniqueNameGenerator, 可选) - 要切换到的新命名空间，一般无需设置。缺省值为None，表示切换到一个匿名的新命名空间。
+返回：先前的命名空间，一般无需操作该返回值。
+返回类型：UniqueNameGenerator。
+**代码示例**
+.. code-block:: python
+        import paddle.fluid as fluid
+        name1 = fluid.unique_name.generate('fc')
+        name2 = fluid.unique_name.generate('fc')
+        print(name1, name2)  # fc_0, fc_1
+        pre_generator = fluid.unique_name.switch()  # 切换到新命名空间
+        name2 = fluid.unique_name.generate('fc')
+        print(name2)  # fc_0
+        fluid.unique_name.switch(pre_generator)  # 切换回原命名空间
+        name3 = fluid.unique_name.generate('fc')
+        print(name3)  # fc_2, 因为原命名空间已生成fc_0, fc_1
--- a/doc/paddle/api/paddle/nn/Dropout_cn.rst
+++ b/doc/paddle/api/paddle/nn/Dropout_cn.rst
+.. _cn_api_fluid_dygraph_Dropout:
+Dropout
+-------------------------------
+.. py:class:: paddle.fluid.dygraph.Dropout(p=0.5, seed=None, dropout_implementation='downgrade_in_infer', is_test=False)
+丢弃或者保持输入的每个元素独立。Dropout是一种正则化手段，通过在训练过程中阻止神经元节点间的相关性来减少过拟合。根据给定的丢弃概率，dropout操作符按丢弃概率随机将一些神经元输出设置为0，其他的仍保持不变。
+Dropout层可以删除，提高执行效率。
+参数：
+    - **p** (float32，可选) - 输入单元的丢弃概率，即输入单元设置为0的概率。默认值：0.5
+    - **seed** (int，可选) - 整型数据，用于创建随机种子。如果该参数设为None，则使用随机种子。注：如果给定一个整型种子，始终丢弃相同的输出单元。训练过程中勿用固定不变的种子。默认值：None。
+    - **dropout_implementation** (str，可选) - 丢弃单元的方式，有两种'downgrade_in_infer'和'upscale_in_train'两种选择，默认：'downgrade_in_infer'。具体作用可以参考一下描述。
+      1. downgrade_in_infer(default), 在预测时减小输出结果
+         - train: out = input * mask
+         - inference: out = input * (1.0 - p)
+         (mask是一个张量，维度和输入维度相同，值为0或1，值为0的比例即为 ``p`` )
+      2. upscale_in_train, 增加训练时的结果
+         - train: out = input * mask / ( 1.0 - p )
+         - inference: out = input
+         (mask是一个张量，维度和输入维度相同，值为0或1，值为0的比例即为 ``p`` ）
+    - **is_test** (bool，可选) - 标记是否是测试阶段。此标志仅对静态图模式有效。对于动态图模式，请使用 ``eval()`` 接口。默认：False。
+返回：无
+**代码示例**：
+.. code-block:: python
+    import paddle.fluid as fluid
+    from paddle.fluid.dygraph.base import to_variable
+    import numpy as np
+    x = np.random.random(size=(3, 10, 3, 7)).astype('float32')
+    with fluid.dygraph.guard():
+        x = to_variable(x)
+        m = fluid.dygraph.Dropout(p=0.5)
+        droped_train = m(x)
+        # 切换到 eval 模式
+        m.eval()
+        droped_eval = m(x)
--- a/doc/paddle/api/paddle/nn/functional/prelu_cn.rst
+++ b/doc/paddle/api/paddle/nn/functional/prelu_cn.rst
+.. _cn_api_fluid_layers_prelu:
+prelu
+-------------------------------
+.. py:function:: paddle.fluid.layers.prelu(x, mode, param_attr=None, name=None)
+:api_attr: 声明式编程模式（静态图)
+等式：
+.. math::
+    y = max(0, x) + \alpha min(0, x)
+共提供三种激活方式：
+.. code-block:: text
+    all: 所有元素使用同一个alpha值
+    channel: 在同一个通道中的元素使用同一个alpha值
+    element: 每一个元素有一个独立的alpha值
+参数：
+          - **x** （Variable）- 多维Tensor或LoDTensor，数据类型为float32。
+          - **mode** (str) - 权重共享模式。
+          - **param_attr** (ParamAttr，可选) - 可学习权重 :math:`[\alpha]` 的参数属性，可由ParamAttr创建。默认值为None，表示使用默认的权重参数属性。具体用法请参见 :ref:`cn_api_fluid_ParamAttr` 。
+          - **name** (str，可选) – 具体用法请参见 :ref:`api_guide_Name` ，一般无需设置，默认值为None。 
+返回： 表示激活输出Tensor或LoDTensor，数据类型为float32。与输入形状相同。
+返回类型：Variable
+**代码示例：**
+.. code-block:: python
+    import paddle.fluid as fluid
+    from paddle.fluid.param_attr import ParamAttr
+    x = fluid.data(name="x", shape=[None,5,10,10], dtype="float32")
+    mode = 'channel'
+    output = fluid.layers.prelu(
+             x,mode,param_attr=ParamAttr(name='alpha'))
--- a/doc/paddle/api/paddle/nn/functional/tanh_cn.rst
+++ b/doc/paddle/api/paddle/nn/functional/tanh_cn.rst
+.. _cn_api_fluid_layers_tanh:
+tanh
+-------------------------------
+.. py:function:: paddle.fluid.layers.tanh(x, name=None)
+tanh 激活函数
+.. math::
+    out = \frac{e^{x} - e^{-x}}{e^{x} + e^{-x}}
+参数:
+    - **x** - Tanh算子的输入
+    - **name** (str|None) - 该层名称（可选）。若设为None，则自动为该层命名。
+返回: 张量(Tensor)
+返回类型: 变量(Variable)
+**代码示例**：
+.. code-block:: python
+    import paddle.fluid as fluid
+    data = fluid.layers.data(name="input", shape=[32, 784])
+    result = fluid.layers.tanh(data)
--- a/doc/paddle/api/paddle/static/data_cn.rst
+++ b/doc/paddle/api/paddle/static/data_cn.rst
+.. _cn_api_fluid_data:
+data
+-------------------------------
+.. py:function:: paddle.fluid.data(name, shape, dtype='float32', lod_level=0)
+:api_attr: 声明式编程模式（静态图)
+:alias_main: paddle.nn.data
+:alias: paddle.nn.data,paddle.nn.input.data
+:old_api: paddle.fluid.data
+该OP会在全局block中创建变量（Variable），该全局变量可被计算图中的算子（operator）访问。该变量可作为占位符用于数据输入。例如用执行器（Executor）feed数据进该变量
+注意：
+  不推荐使用 ``paddle.fluid.layers.data`` ，其在之后的版本中会被删除。请使用这个 ``paddle.fluid.data`` 。 
+  ``paddle.fluid.layers.data`` 在组网期间会设置创建的变量维度（shape）和数据类型（dtype），但不会检查输入数据的维度和数据类型是否符合要求。 ``paddle.fluid.data`` 会在运行过程中由Executor/ParallelExecutor检查输入数据的维度和数据类型。
+  如果想输入变长输入，可以使用 ``paddle.fluid.data`` 时将变长维度设为-1，或者直接输入 ``paddle.fluid.layers.data`` 且PaddlePaddle会按具体输入的形状运行。
+  本API创建的变量默认 ``stop_gradient`` 属性为true，这意味这反向梯度不会被传递过这个数据变量。如果用户想传递反向梯度，可以设置 ``var.stop_gradient = False`` 。
+参数：
+    - **name** (str)- 被创建的变量的名字，具体用法请参见 :ref:`api_guide_Name` 。
+    - **shape** (list|tuple)- 声明维度信息的list或tuple。
+    - **dtype** (np.dtype|VarType|str，可选)- 数据类型，支持bool，float16，float32，float64，int8，int16，int32，int64，uint8。默认值为float32。
+    - **lod_level** (int，可选)- LoDTensor变量的LoD level数，LoD level是PaddlePaddle的高级特性，一般任务中不会需要更改此默认值，关于LoD level的详细适用场景和用法请见 :ref:`cn_user_guide_lod_tensor` 。默认值为0。
+返回：全局变量，可进行数据访问
+返回类型：Variable
+**代码示例**：
+.. code-block:: python
+    import paddle.fluid as fluid
+    import numpy as np
+    # Creates a variable with fixed size [3, 2, 1]
+    # User can only feed data of the same shape to x
+    x = fluid.data(name='x', shape=[3, 2, 1], dtype='float32')
+    # Creates a variable with changable batch size -1.
+    # Users can feed data of any batch size into y,
+    # but size of each data sample has to be [2, 1]
+    y = fluid.data(name='y', shape=[-1, 2, 1], dtype='float32')
+    z = x + y
+    # In this example, we will feed x and y with np-ndarry "1"
+    # and fetch z, like implementing "1 + 1 = 2" in PaddlePaddle
+    feed_data = np.ones(shape=[3, 2, 1], dtype=np.float32)
+    exe = fluid.Executor(fluid.CPUPlace())
+    out = exe.run(fluid.default_main_program(),
+                  feed={
+                      'x': feed_data,
+                      'y': feed_data
+                  },
+                  fetch_list=[z.name])
+    # np-ndarray of shape=[3, 2, 1], dtype=float32, whose elements are 2
+    print(out)
--- a/doc/paddle/api/paddle/static/nn/conv2d_transpose_cn.rst
+++ b/doc/paddle/api/paddle/static/nn/conv2d_transpose_cn.rst
-.. _cn_api_fluid_layers_conv2d_transpose:
-conv2d_transpose
-------------------------------
-.. py:function:: paddle.fluid.layers.conv2d_transpose(input, num_filters, output_size=None, filter_size=None, padding=0, stride=1, dilation=1, groups=None, param_attr=None, bias_attr=None, use_cudnn=True, act=None, name=None, data_format='NCHW')
-:api_attr: 声明式编程模式（静态图)
-二维转置卷积层（Convlution2D transpose layer）
-该层根据输入（input）、滤波器（filter）和卷积核膨胀比例（dilations）、步长（stride）、填充（padding）来计算输出特征层大小或者通过output_size指定输出特征层大小。输入(Input)和输出(Output)为NCHW或NHWC格式，其中N为批尺寸，C为通道数（channel），H为特征层高度，W为特征层宽度。滤波器是MCHW格式，M是输出图像通道数，C是输入图像通道数，H是滤波器高度，W是滤波器宽度。如果组数大于1，C等于输入图像通道数除以组数的结果。转置卷积的计算过程相当于卷积的反向计算。转置卷积又被称为反卷积（但其实并不是真正的反卷积）。欲了解转置卷积层细节，请参考下面的说明和 参考文献_ 。如果参数bias_attr不为False, 转置卷积计算会添加偏置项。如果act不为None，则转置卷积计算之后添加相应的激活函数。
-.. _参考文献: https://arxiv.org/pdf/1603.07285.pdf
-输入 :math:`X` 和输出 :math:`Out` 函数关系如下：
-.. math::
-                        Out=\sigma (W*X+b)\\
-其中：
-    -  :math:`X` : 输入，具有NCHW或NHWC格式的4-D Tensor
-    -  :math:`W` : 滤波器，具有NCHW格式的4-D Tensor
-    -  :math:`*` : 卷积计算（注意：转置卷积本质上的计算还是卷积）
-    -  :math:`b` : 偏置（bias），2-D Tensor，形状为 ``[M,1]``
-    -  :math:`σ` : 激活函数
-    -  :math:`Out` : 输出值，NCHW或NHWC格式的4-D Tensor， 和 ``X`` 的形状可能不同
-**示例**
- 输入：
-    输入Tensor的形状： :math:`（N，C_{in}， H_{in}， W_{in}）`
-    滤波器的形状 ： :math:`（C_{in}, C_{out}, H_f, W_f）`
- 输出：
-    输出Tensor的形状 ： :math:`（N，C_{out}, H_{out}, W_{out}）`
-其中
-.. math::
-        & H'_{out} = (H_{in}-1)*strides[0] - pad\_height\_top - pad\_height\_bottom + dilations[0]*(H_f-1)+1\\
-        & W'_{out} = (W_{in}-1)*strides[1]- pad\_width\_left - pad\_width\_right + dilations[1]*(W_f-1)+1 \\
-        & H_{out}\in[H'_{out},H'_{out} + strides[0])\\
-        & W_{out}\in[W'_{out},W'_{out} + strides[1])\\
-如果 ``padding`` = "SAME":
-.. math::
-   & H'_{out} = \frac{(H_{in} + stride[0] - 1)}{stride[0]}\\
-   & W'_{out} = \frac{(W_{in} + stride[1] - 1)}{stride[1]}\\
-如果 ``padding`` = "VALID":
-.. math::
-    & H'_{out} = (H_{in}-1)*strides[0] + dilations[0]*(H_f-1)+1\\
-    & W'_{out} = (W_{in}-1)*strides[1] + dilations[1]*(W_f-1)+1 \\
-注意：
-如果output_size为None，则 :math:`H_{out}` = :math:`H^\prime_{out}` , :math:`W_{out}` = :math:`W^\prime_{out}` ;否则，指定的output_size_height（输出特征层的高） :math:`H_{out}` 应当介于 :math:`H^\prime_{out}` 和 :math:`H^\prime_{out} + strides[0]` 之间（不包含 :math:`H^\prime_{out} + strides[0]` ）, 并且指定的output_size_width（输出特征层的宽） :math:`W_{out}` 应当介于 :math:`W^\prime_{out}` 和 :math:`W^\prime_{out} + strides[1]` 之间（不包含 :math:`W^\prime_{out} + strides[1]` ）。
-由于转置卷积可以当成是卷积的反向计算，而根据卷积的输入输出计算公式来说，不同大小的输入特征层可能对应着相同大小的输出特征层，所以对应到转置卷积来说，固定大小的输入特征层对应的输出特征层大小并不唯一。
-如果指定了output_size， ``conv2d_transpose`` 可以自动计算滤波器的大小。
-参数:
-  - **input** （Variable）- 形状为 :math:`[N, C, H, W]` 或 :math:`[N, H, W, C]` 的4-D Tensor，N是批尺寸，C是通道数，H是特征高度，W是特征宽度。数据类型：float32或float64。
-  - **num_filters** (int) - 滤波器（卷积核）的个数，与输出图片的通道数相同。
-  - **output_size** (int|tuple，可选) - 输出图片的大小。如果output_size是一个元组，则必须包含两个整型数，（output_size_height，output_size_width）。如果output_size=None，则内部会使用filter_size、padding和stride来计算output_size。如果output_size和filter_size是同时指定的，那么它们应满足上面的公式。默认：None。output_size和filter_size不能同时为None。
-  - **filter_size** (int|tuple，可选) - 滤波器大小。如果filter_size是一个元组，则必须包含两个整型数，（filter_size_height, filter_size_width）。否则，filter_size_height = filter_size_width = filter_size。如果filter_size=None，则必须指定output_size， ``conv2d_transpose`` 内部会根据output_size、padding和stride计算出滤波器大小。默认：None。output_size和filter_size不能同时为None。
-  - **padding** (int|list|tuple|str，可选) - 填充padding大小。padding参数在输入特征层每边添加 ``dilation * (kernel_size - 1) - padding`` 个0。如果它是一个字符串，可以是"VALID"或者"SAME"，表示填充算法，计算细节可参考上述 ``padding`` = "SAME"或  ``padding`` = "VALID" 时的计算公式。如果它是一个元组或列表，它可以有3种格式：(1)包含4个二元组：当 ``data_format`` 为"NCHW"时为 [[0,0], [0,0], [padding_height_top, padding_height_bottom], [padding_width_left, padding_width_right]]，当 ``data_format`` 为"NHWC"时为[[0,0], [padding_height_top, padding_height_bottom], [padding_width_left, padding_width_right], [0,0]]；(2)包含4个整数值：[padding_height_top, padding_height_bottom, padding_width_left, padding_width_right]；(3)包含2个整数值：[padding_height, padding_width]，此时padding_height_top = padding_height_bottom = padding_height， padding_width_left = padding_width_right = padding_width。若为一个整数，padding_height = padding_width = padding。默认值：0。
-  - **stride** (int|tuple，可选) - 步长stride大小。滤波器和输入进行卷积计算时滑动的步长。如果stride是一个元组，则必须包含两个整型数，形式为(stride_height，stride_width)。否则，stride_height = stride_width = stride。默认：stride = 1。
-  - **dilation** (int|tuple，可选) - 膨胀比例(dilation)大小。空洞卷积时会指该参数，滤波器对输入进行卷积时，感受野里每相邻两个特征点之间的空洞信息，根据 `可视化效果图 <https://github.com/vdumoulin/conv_arithmetic/blob/master/README.md>`_ 较好理解。如果膨胀比例dilation是一个元组，那么元组必须包含两个整型数，形式为(dilation_height, dilation_width)。否则，dilation_height = dilation_width = dilation。默认：dilation= 1。
-  - **groups** (int，可选) - 二维转置卷积层的组数。从Alex Krizhevsky的CNN Deep论文中的群卷积中受到启发，当group=2时，输入和滤波器分别根据通道数量平均分为两组，第一组滤波器和第一组输入进行卷积计算，第二组滤波器和第二组输入进行卷积计算。默认：group = 1。
-  - **param_attr** (ParamAttr，可选) ：指定权重参数属性的对象。默认值为None，表示使用默认的权重参数属性。具体用法请参见 :ref:`cn_api_fluid_ParamAttr` 。conv2d_transpose算子默认的权重初始化是Xavier。
-  - **bias_attr** （ParamAttr|False，可选）- 指定偏置参数属性的对象。默认值为None，表示使用默认的偏置参数属性。具体用法请参见 :ref:`cn_api_fluid_ParamAttr` 。conv2d_transpose算子默认的偏置初始化是0.0。
-  - **use_cudnn** (bool，可选) - 是否使用cudnn内核，只有已安装cudnn库时才有效。默认：True。
-  - **act** (str，可选) -  激活函数类型，如果设置为None，则不使用激活函数。默认：None。
-  - **name** (str，可选) – 具体用法请参见 :ref:`cn_api_guide_Name` ，一般无需设置，默认值为None。
-  - **data_format** (str，可选) - 指定输入的数据格式，输出的数据格式将与输入保持一致，可以是"NCHW"和"NHWC"。N是批尺寸，C是通道数，H是特征高度，W是特征宽度。默认值："NCHW"。
-返回：4-D Tensor，数据类型与 ``input`` 一致。如果未指定激活层，则返回转置卷积计算的结果，如果指定激活层，则返回转置卷积和激活计算之后的最终结果。
-返回类型：Variable
-抛出异常:
-    -  ``ValueError`` : 如果输入的shape、filter_size、stride、padding和groups不匹配，抛出ValueError
-    -  ``ValueError`` - 如果 ``data_format`` 既不是"NCHW"也不是"NHWC"。
-    -  ``ValueError`` - 如果 ``padding`` 是字符串，既不是"SAME"也不是"VALID"。
-    -  ``ValueError`` - 如果 ``padding`` 含有4个二元组，与批尺寸对应维度的值不为0或者与通道对应维度的值不为0。
-    -  ``ValueError`` - 如果 ``output_size`` 和 ``filter_size`` 同时为None。
-    -  ``ShapeError`` - 如果输入不是4-D Tensor。
-    -  ``ShapeError`` - 如果输入和滤波器的维度大小不相同。
-    -  ``ShapeError`` - 如果输入的维度大小与 ``stride`` 之差不是2。
-**代码示例**
-..  code-block:: python
-    import paddle.fluid as fluid
-    import numpy as np
-    data = fluid.layers.data(name='data', shape=[3, 32, 32], dtype='float32')
-    param_attr = fluid.ParamAttr(name='conv2d.weight', initializer=fluid.initializer.Xavier(uniform=False), learning_rate=0.001)
-    res = fluid.layers.conv2d_transpose(input=data, num_filters=2, filter_size=3, act="relu", param_attr=param_attr)
-    place = fluid.CPUPlace()
-    exe = fluid.Executor(place)
-    exe.run(fluid.default_startup_program())
-    x = np.random.rand(1, 3, 32, 32).astype("float32")
-    output = exe.run(feed={"data": x}, fetch_list=[res])
-    print(output)
--- a/doc/paddle/api/paddle/static/nn/conv3d_transpose_cn.rst
+++ b/doc/paddle/api/paddle/static/nn/conv3d_transpose_cn.rst
-.. _cn_api_fluid_layers_conv3d_transpose:
-conv3d_transpose
-------------------------------
-.. py:function:: paddle.fluid.layers.conv3d_transpose(input, num_filters, output_size=None, filter_size=None, padding=0, stride=1, dilation=1, groups=None, param_attr=None, bias_attr=None, use_cudnn=True, act=None, name=None, data_format='NCDHW')
-:api_attr: 声明式编程模式（静态图)
-三维转置卷积层（Convlution3D transpose layer)
-该层根据输入（input）、滤波器（filter）和卷积核膨胀比例（dilations）、步长（stride）、填充（padding）来计算输出特征层大小或者通过output_size指定输出特征层大小。输入(Input)和输出(Output)为NCDHW或者NDHWC格式。其中N为批尺寸，C为通道数（channel），D为特征深度，H为特征层高度，W为特征层宽度。转置卷积的计算过程相当于卷积的反向计算。转置卷积又被称为反卷积（但其实并不是真正的反卷积）。欲了解卷积转置层细节，请参考下面的说明和 参考文献_ 。如果参数bias_attr不为False, 转置卷积计算会添加偏置项。如果act不为None，则转置卷积计算之后添加相应的激活函数。
-.. _参考文献: http://www.matthewzeiler.com/wp-content/uploads/2017/07/cvpr2010.pdf
-输入 :math:`X` 和输出 :math:`Out` 函数关系如下：
-.. math::
-                        \\Out=\sigma (W*X+b)\\
-其中：
-    -  :math:`X` : 输入，具有NCDHW或NDHWC格式的5-D Tensor
-    -  :math:`W` : 滤波器，具有NCDHW格式的5-D Tensor
-    -  :math:`*` : 卷积操作（注意：转置卷积本质上的计算还是卷积）
-    -  :math:`b` : 偏置（bias），2-D Tensor，形状为 ``[M,1]``
-    -  :math:`σ` : 激活函数
-    -  :math:`Out` : 输出值，NCDHW或NDHWC格式的5-D Tensor，和 ``X`` 的形状可能不同
-**示例**
-输入:
-    输入的shape：:math:`（N,C_{in}, D_{in}, H_{in}, W_{in}）`
-    滤波器的shape：:math:`（C_{in}, C_{out}, D_f, H_f, W_f）`
-输出:
-    输出的shape：:math:`（N,C_{out}, D_{out}, H_{out}, W_{out}）`
-其中：
-.. math::
-    & D'_{out}=(D_{in}-1)*strides[0] - pad\_depth\_front - pad\_depth\_back + dilations[0]*(D_f-1)+1\\
-    & H'_{out}=(H_{in}-1)*strides[1] - pad\_height\_top - pad\_height\_bottom + dilations[1]*(H_f-1)+1\\
-    & W'_{out}=(W_{in}-1)*strides[2] - pad\_width\_left - pad\_width\_right + dilations[2]*(W_f-1)+1\\
-    & D_{out}\in[D'_{out},D'_{out} + strides[0])\\
-    & H_{out}\in[H'_{out},H'_{out} + strides[1])\\
-    & W_{out}\in[W'_{out},W'_{out} + strides[2])\\
-如果 ``padding`` = "SAME":
-.. math::
-    D'_{out} = \frac{(D_{in} + stride[0] - 1)}{stride[0]}\\
-    H'_{out} = \frac{(H_{in} + stride[1] - 1)}{stride[1]}\\
-    W'_{out} = \frac{(W_{in} + stride[2] - 1)}{stride[2]}\\
-如果 ``padding`` = "VALID":
-.. math::
-    D'_{out}=(D_{in}-1)*strides[0] + dilations[0]*(D_f-1)+1\\
-    H'_{out}=(H_{in}-1)*strides[1] + dilations[1]*(H_f-1)+1\\
-    W'_{out}=(W_{in}-1)*strides[2] + dilations[2]*(W_f-1)+1\\
-注意：
-如果output_size为None，则 :math:`D_{out}` = :math:`D^\prime_{out}` , :math:`H_{out}` = :math:`H^\prime_{out}` , :math:`W_{out}` = :math:`W^\prime_{out}` ;否则，指定的output_size_depth（输出特征层的深度） :math:`D_{out}` 应当介于 :math:`D^\prime_{out}` 和 :math:`D^\prime_{out} + strides[0]` 之间（不包含 :math:`D^\prime_{out} + strides[0]` ），指定的output_size_height（输出特征层的高） :math:`H_{out}` 应当介于 :math:`H^\prime_{out}` 和 :math:`H^\prime_{out} + strides[1]` 之间（不包含 :math:`H^\prime_{out} + strides[1]` ）, 并且指定的output_size_width（输出特征层的宽） :math:`W_{out}` 应当介于 :math:`W^\prime_{out}` 和 :math:`W^\prime_{out} + strides[2]` 之间（不包含 :math:`W^\prime_{out} + strides[2]` ）。
-由于转置卷积可以当成是卷积的反向计算，而根据卷积的输入输出计算公式来说，不同大小的输入特征层可能对应着相同大小的输出特征层，所以对应到转置卷积来说，固定大小的输入特征层对应的输出特征层大小并不唯一。
-如果指定了output_size， ``conv3d_transpose`` 可以自动计算滤波器的大小。
-参数:
-  - **input** （Variable）- 形状为 :math:`[N, C, D, H, W]` 或 :math:`[N, D, H, W, C]` 的5-D Tensor，N是批尺寸，C是通道数，D是特征深度，H是特征高度，W是特征宽度，数据类型：float32或float64。
-  - **num_filters** (int) - 滤波器（卷积核）的个数，与输出的图片的通道数相同。
-  - **output_size** (int|tuple，可选) - 输出图片的大小。如果output_size是一个元组，则必须包含三个整型数，（output_size_depth，output_size_height，output_size_width）。如果output_size=None，则内部会使用filter_size、padding和stride来计算output_size。如果output_size和filter_size是同时指定的，那么它们应满足上面的公式。默认：None。output_size和filter_size不能同时为None。
-  - **filter_size** (int|tuple，可选) - 滤波器大小。如果filter_size是一个元组，则必须包含三个整型数，（filter_size_depth，filter_size_height, filter_size_width）。否则，filter_size_depth = filter_size_height = filter_size_width = filter_size。如果filter_size=None，则必须指定output_size， ``conv2d_transpose`` 内部会根据output_size、padding和stride计算出滤波器大小。默认：None。output_size和filter_size不能同时为None。
-  - **padding** (int|list|tuple|str，可选) - 填充padding大小。padding参数在输入特征层每边添加 ``dilation * (kernel_size - 1) - padding`` 个0。如果它是一个字符串，可以是"VALID"或者"SAME"，表示填充算法，计算细节可参考上述 ``padding`` = "SAME"或  ``padding`` = "VALID" 时的计算公式。如果它是一个元组或列表，它可以有3种格式：(1)包含5个二元组：当 ``data_format`` 为"NCDHW"时为 [[0,0], [0,0], [pad_depth_front, pad_depth_back], [pad_height_top, pad_height_bottom], [pad_width_left, pad_width_right]]，当 ``data_format`` 为"NDHWC"时为[[0,0], [pad_depth_front, pad_depth_back], [pad_height_top, pad_height_bottom], [pad_width_left, pad_width_right], [0,0]]；(2)包含6个整数值：[pad_depth_front, pad_depth_back, pad_height_top, pad_height_bottom, pad_width_left, pad_width_right]；(3)包含3个整数值：[pad_depth, pad_height, pad_width]，此时 pad_depth_front = pad_depth_back = pad_depth, pad_height_top = pad_height_bottom = pad_height, pad_width_left = pad_width_right = pad_width。若为一个整数，pad_depth = pad_height = pad_width = padding。默认值：0。
-  - **stride** (int|tuple，可选) - 步长stride大小。滤波器和输入进行卷积计算时滑动的步长。如果stride是一个元组，那么元组的形式为(stride_depth，stride_height，stride_width)。否则，stride_depth = stride_height = stride_width = stride。默认：stride = 1。
-  - **dilation** (int|tuple，可选) - 膨胀比例dilation大小。空洞卷积时会指该参数，滤波器对输入进行卷积时，感受野里每相邻两个特征点之间的空洞信息，根据 `可视化效果图 <https://github.com/vdumoulin/conv_arithmetic/blob/master/README.md>`_ 较好理解。如果膨胀比例dilation是一个元组，那么元组的形式为(dilation_depth，dilation_height， dilation_width)。否则，dilation_depth = dilation_height = dilation_width = dilation。默认:dilation= 1。
-  - **groups** (int，可选) - 三维转置卷积层的组数。从Alex Krizhevsky的CNN Deep论文中的群卷积中受到启发，当group=2时，输入和滤波器分别根据通道数量平均分为两组，第一组滤波器和第一组输入进行卷积计算，第二组滤波器和第二组输入进行卷积计算。默认：group = 1。
-  - **param_attr** (ParamAttr，可选) ：指定权重参数属性的对象。默认值为None，表示使用默认的权重参数属性。具体用法请参见 :ref:`cn_api_fluid_ParamAttr` 。conv3d_transpose算子默认的权重初始化是Xavier。
-  - **bias_attr** （ParamAttr|False，可选）- 指定偏置参数属性的对象。默认值为None，表示使用默认的偏置参数属性。具体用法请参见 :ref:`cn_api_fluid_ParamAttr` 。conv3d_transpose算子默认的偏置初始化是0.0。
-  - **use_cudnn** (bool，可选) - 是否使用cudnn内核，只有已安装cudnn库时才有效。默认：True。
-  - **act** (str，可选) -  激活函数类型，如果设置为None，则不使用激活函数。默认：None。
-  - **name** (str，可选) – 具体用法请参见 :ref:`cn_api_guide_Name` ，一般无需设置，默认值为None。
-  - **data_format** (str，可选) - 指定输入的数据格式，输出的数据格式将与输入保持一致，可以是"NCDHW"和"NDHWC"。N是批尺寸，C是通道数，H是特征高度，W是特征宽度。默认值："NCDHW"。
-返回：5-D Tensor，数据类型与 ``input`` 一致。如果未指定激活层，则返回转置卷积计算的结果，如果指定激活层，则返回转置卷积和激活计算之后的最终结果。
-返回类型：Variable
-抛出异常:
-    - ``ValueError`` - 如果输入的shape、filter_size、stride、padding和groups不匹配。
-    - ``ValueError`` - 如果 ``data_format`` 既不是"NCDHW"也不是"NDHWC"。
-    - ``ValueError`` - 如果 ``padding`` 是字符串，既不是"SAME"也不是"VALID"。
-    - ``ValueError`` - 如果 ``padding`` 含有5个二元组，与批尺寸对应维度的值不为0或者与通道对应维度的值不为0。
-    - ``ValueError`` - 如果 ``output_size`` 和 ``filter_size`` 同时为None。
-    - ``ShapeError`` - 如果输入不是5-D Tensor。
-    - ``ShapeError`` - 如果输入和滤波器的维度大小不相同。
-    - ``ShapeError`` - 如果输入的维度大小与 ``stride`` 之差不是2。
-**代码示例**
-..  code-block:: python
-    import paddle.fluid as fluid
-    import numpy as np
-    data = fluid.layers.data(name='data', shape=[3, 12, 32, 32], dtype='float32')
-    param_attr = fluid.ParamAttr(name='conv3d.weight', initializer=fluid.initializer.Xavier(uniform=False), learning_rate=0.001)
-    res = fluid.layers.conv3d_transpose(input=data, num_filters=2, filter_size=3, act="relu", param_attr=param_attr)
-    place = fluid.CPUPlace()
-    exe = fluid.Executor(place)
-    exe.run(fluid.default_startup_program())
-    x = np.random.rand(1, 3, 12, 32, 32).astype("float32")
-    output = exe.run(feed={"data": x}, fetch_list=[res])
-    print(output)
--- a/doc/paddle/api/paddle/static/save_cn.rst
+++ b/doc/paddle/api/paddle/static/save_cn.rst
-.. _cn_api_fluid_save:
-save
-------------------------------
-.. py:function:: paddle.fluid.save(program, model_path)
-:api_attr: 声明式编程模式（静态图)
-:alias_main: paddle.save
-:alias: paddle.save,paddle.tensor.save,paddle.tensor.io.save
-:old_api: paddle.fluid.save
-该接口将传入的参数、优化器信息和网络描述保存到 ``model_path`` 。
-参数包含所有的可训练 :ref:`cn_api_fluid_Variable` ，将保存到后缀为 ``.pdparams`` 的文件中。
-优化器信息包含优化器使用的所有变量。对于Adam优化器，包含beta1、beta2、momentum等。
-所有信息将保存到后缀为 ``.pdopt`` 的文件中。（如果优化器没有需要保存的变量（如sgd），则不会生成）。
-网络描述是程序的描述。它只用于部署。描述将保存到后缀为 ``.pdmodel`` 的文件中。
-参数:
- - **program**  ( :ref:`cn_api_fluid_Program` ) – 要保存的Program。
- - **model_path**  (str) – 保存program的文件前缀。格式为 ``目录名称/文件前缀``。如果文件前缀为空字符串，会引发异常。
-返回: 无
-**代码示例**
-.. code-block:: python
-    import paddle.fluid as fluid
-    x = fluid.data(name="x", shape=[10, 10], dtype='float32')
-    y = fluid.layers.fc(x, 10)
-    z = fluid.layers.fc(y, 10)
-    place = fluid.CPUPlace()
-    exe = fluid.Executor(place)
-    exe.run(fluid.default_startup_program())
-    fluid.save(fluid.default_main_program(), "./test_path")
--- a/doc/paddle/api/paddle/tensor/creation/diag_cn.rst
+++ b/doc/paddle/api/paddle/tensor/creation/diag_cn.rst
+.. _cn_api_fluid_layers_diag:
+diag
+-------------------------------
+.. py:function:: paddle.fluid.layers.diag(diagonal)
+:alias_main: paddle.diag
+:alias: paddle.diag,paddle.tensor.diag,paddle.tensor.creation.diag
+:old_api: paddle.fluid.layers.diag
+该OP创建一个方阵，使用输入diagonal来指定方阵的对角线元素的值。
+参数：
+    - **diagonal** (Variable|numpy.ndarray) — 数据shape为 :math:`[N]` 一维Tensor，会把该Tensor的元素赋在方阵的对角线上。数据类型可以是 float32，float64，int32，int64。
+返回：存储着方阵的Tensor，对角线值是输入Tensor diagonal的值， 数据shape为 :math:`[N, N]` 二维Tensor。
+返回类型：Variable，数据类型和输入数据类型一致。
+**代码示例**：
+.. code-block:: python
+        #  [3, 0, 0]
+        #  [0, 4, 0]
+        #  [0, 0, 5]
+        import paddle.fluid as fluid
+        import numpy as np
+        diagonal = np.arange(3, 6, dtype='int32')
+        data = fluid.layers.diag(diagonal)
+        # diagonal.shape=(3,) data.shape=(3, 3)
--- a/doc/paddle/api/paddle/tensor/manipulation/gather_nd_cn.rst
+++ b/doc/paddle/api/paddle/tensor/manipulation/gather_nd_cn.rst
+.. _cn_api_fluid_layers_gather_nd:
+gather_nd
+-------------------------------
+.. py:function:: paddle.fluid.layers.gather_nd(input, index, name=None)
+:alias_main: paddle.gather_nd
+:alias: paddle.gather_nd,paddle.tensor.gather_nd,paddle.tensor.manipulation.gather_nd
+:old_api: paddle.fluid.layers.gather_nd
+该OP是 :code:`gather` 的高维推广，并且支持多轴同时索引。 :code:`index` 是一个K维度的张量，它可以认为是从 :code:`input` 中取K-1维张量，每一个元素是一个切片：
+.. math::
+    output[(i_0, ..., i_{K-2})] = input[index[(i_0, ..., i_{K-2})]]
+显然， :code:`index.shape[-1] <= input.rank` 并且输出张量的维度是 :code:`index.shape[:-1] + input.shape[index.shape[-1]:]` 。 
+示例：
+::
+         给定:
+             input = [[[ 0,  1,  2,  3],
+                       [ 4,  5,  6,  7],
+                       [ 8,  9, 10, 11]],
+                      [[12, 13, 14, 15],
+                       [16, 17, 18, 19],
+                       [20, 21, 22, 23]]]
+             input.shape = (2, 3, 4)
+         - 案例 1:
+             index = [[1]]
+             gather_nd(input, index)  
+                      = [input[1, :, :]] 
+                      = [[12, 13, 14, 15],
+                         [16, 17, 18, 19],
+                         [20, 21, 22, 23]]
+         - 案例 2:
+             index = [[0,2]]
+             gather_nd(input, index)
+                      = [input[0, 2, :]]
+                      = [8, 9, 10, 11]
+         - 案例 3:
+             index = [[1, 2, 3]]
+             gather_nd(input, index)
+                      = [input[1, 2, 3]]
+                      = [23]
+参数：
+    - **input** (Variable) - 输入张量，数据类型可以是int32，int64，float32，float64, bool。
+    - **index** (Variable) - 输入的索引张量，数据类型为非负int32或非负int64。它的维度 :code:`index.rank` 必须大于1，并且 :code:`index.shape[-1] <= input.rank` 。
+    - **name** (string) - 该层的名字，默认值为None，表示会自动命名。
+返回：shape为index.shape[:-1] + input.shape[index.shape[-1]:]的Tensor|LoDTensor，数据类型与 :code:`input` 一致。
+返回类型：Variable
+**代码示例**：
+.. code-block:: python
+    import paddle.fluid as fluid
+    x = fluid.layers.data(name='x', shape=[3, 4, 5], dtype='float32')
+    index = fluid.layers.data(name='index', shape=[2, 2], dtype='int32')
+    output = fluid.layers.gather_nd(x, index)
--- a/doc/paddle/api/paddle/tensor/manipulation/reshape_cn.rst
+++ b/doc/paddle/api/paddle/tensor/manipulation/reshape_cn.rst
+.. _cn_api_fluid_layers_reshape:
+reshape
+-------------------------------
+.. py:function::  paddle.fluid.layers.reshape(x, shape, actual_shape=None, act=None, inplace=False, name=None)
+:alias_main: paddle.reshape
+:alias: paddle.reshape,paddle.tensor.reshape,paddle.tensor.manipulation.reshape
+:old_api: paddle.fluid.layers.reshape
+该OP在保持输入 ``x`` 数据不变的情况下，改变 ``x`` 的形状。
+目标形状可由 ``shape`` 或 ``actual_shape`` 给出。当两个属性同时被指定时，``actual_shape`` 的优先级高于 ``shape`` ，但此时 ``shape`` 只能是整数列表或元组，且在编译时仍然应该正确地设置 ``shape`` 以保证形状推断。
+在指定目标shape时存在一些技巧：
+.. code-block:: text
+  1. -1 表示这个维度的值是从x的元素总数和剩余维度推断出来的。因此，有且只有一个维度可以被设置为-1。
+  2. 0 表示实际的维数是从x的对应维数中复制出来的，因此shape中0的索引值不能超过x的维度。
+这里有一些例子来解释它们：
+.. code-block:: text
+  1. 给定一个形状为[2,4,6]的三维张量x，目标形状为[6,8]，则将x变换为形状为[6,8]的2-D张量，且x的数据保持不变。
+  2. 给定一个形状为[2,4,6]的三维张量x，目标形状为[2,3,-1,2]，则将x变换为形状为[2,3,4,2]的4-D张量，且x的数据保持不变。在这种情况下，目标形状的一个维度被设置为-1，这个维度的值是从x的元素总数和剩余维度推断出来的。
+  3. 给定一个形状为[2,4,6]的三维张量x，目标形状为[-1,0,3,2]，则将x变换为形状为[2,4,3,2]的4-D张量，且x的数据保持不变。在这种情况下，0对应位置的维度值将从x的对应维数中复制,-1对应位置的维度值由x的元素总数和剩余维度推断出来。
+**注意：参数** ``actual_shape`` **之后将被舍弃，只用参数** ``shape`` **来表示目标形状。**
+参数：
+  - **x** （Variable）- 多维 ``Tensor`` 或 ``LoDTensor``，数据类型为 ``float32``，``float64``，``int32``，或 ``int64``。
+  - **shape** （list|tuple|Variable）- 数据类型是 ``int32`` 。定义目标形状。目标形状最多只能有一个维度为-1。如果 ``shape`` 的类型是 list 或 tuple, 它的元素可以是整数或者形状为[1]的 ``Tensor`` 或 ``LoDTensor``。如果 ``shape`` 的类型是 ``Variable``，则是1-D的 ``Tensor`` 或 ``LoDTensor``。
+  - **actual_shape** （Variable，可选）- 1-D ``Tensor`` 或 ``LoDTensor``，默认值：`None`。如果 ``actual_shape`` 被提供，``actual_shape`` 具有比 ``shape`` 更高的优先级，此时 ``shape`` 只能是整数列表或元组。更新提示：``actual_shape`` 在未来的版本中将被舍弃，并用 ``shape`` 代替。
+  - **act** （str，可选）- 对形状改变后的输入变量做非线性激活操作，激活函数类型可以参考 :ref:`api_guide_activations` 。默认值： ``None``。
+  - **inplace** （bool，可选）- 如果 ``inplace`` 为 ``True``，则 ``layers.reshape`` 的输入和输出是同一个变量，否则 ``layers.reshape`` 的输入和输出是不同的变量。默认值：``False``。请注意，如果 ``x`` 是多个OP的输入，则 ``inplace`` 必须为False。
+  - **name** （str，可选）- 具体用法请参见 :ref:`api_guide_Name` ，一般无需设置。默认值： ``None``。
+返回：多维 ``Tensor`` 或 ``LoDTensor``，数据类型与 ``input`` 相同。如果 ``inplace`` 为 ``False``，则返回一个新的变量，否则将改变输入变量 ``x`` 自身。如果 ``act`` 为 ``None``，则直接返回形状改变后的变量，否则返回经过激活函数后的变量。
+返回类型：Variable。
+抛出异常：
+    - :code:`TypeError`：``actual_shape`` 的类型应该是 Variable 或 None。
+    - :code:`TypeError`：``starts`` 的类型应该是list、tuple 或 Variable。
+    - :code:`ValueError`：``shape`` 中至多有一个元素可以是-1。
+    - :code:`ValueError`：``shape`` 中的元素为0时，对应的维度应该小于等于``x``的维度。
+    - :code:`ValueError`：``shape`` 中的元素除了-1之外，都应该是非负值。
+**代码示例**
+.. code-block:: python
+  import paddle.fluid as fluid
+  # example 1:
+  # attr shape is a list which doesn't contain tensor Variable.
+  data_1 = fluid.data(
+    name='data_1', shape=[2, 4, 6], dtype='float32')
+  reshaped_1 = fluid.layers.reshape(
+    x=data_1, shape=[-1, 0, 3, 2], inplace=True)
+  # the shape of reshaped_1 is [2,4,3,2].
+  # example 2:
+  # attr shape is a list which contains tensor Variable.
+  data_2 = fluid.layers.fill_constant([2,25], "int32", 3)
+  dim = fluid.layers.fill_constant([1], "int32", 5)
+  reshaped_2 = fluid.layers.reshape(data_2, shape=[dim, 10])
+  # the shape of reshaped_2 is [5,10].
+  # example 3:
+  data_3 = fluid.data(
+    name="data_3", shape=[2,4,6], dtype='float32')
+  reshaped_3 = fluid.layers.reshape(x=data_3, shape=[6,8])
+  # the shape of reshaped_3 is [6,8].
--- a/doc/paddle/api/paddle/tensor/manipulation/scatter_cn.rst
+++ b/doc/paddle/api/paddle/tensor/manipulation/scatter_cn.rst
+.. _cn_api_fluid_layers_scatter:
+scatter
+-------------------------------
+.. py:function:: paddle.fluid.layers.scatter(input, index, updates, name=None, overwrite=True)
+:alias_main: paddle.scatter
+:alias: paddle.scatter,paddle.tensor.scatter,paddle.tensor.manipulation.scatter
+:old_api: paddle.fluid.layers.scatter
+该OP根据index中的索引值将updates数据更新到input中。
+.. code-block:: python
+  输入：
+    input = np.array([[1, 1], [2, 2], [3, 3]])
+    index = np.array([2, 1, 0, 1])
+    # updates的维度需要和input一样
+    # updates 维度 > 1 的shape要和input一样
+    updates = np.array([[1, 1], [2, 2], [3, 3], [4, 4]])
+    overwrite = False
+  计算过程：
+    if not overwrite:
+       for i in range(len(index)):
+         input[index[i]] = np.zeros((2))
+    # 根据index中的索引值取updates中的数据更新到input中去 
+    for i in range(len(index)):
+      if (overwirte):
+        input[index[i]] = updates[i]
+      else:
+        input[index[i]] += updates[i]
+  输出：
+    out # np.array([[3, 3], [6, 6], [1, 1]])
+    out.shape # [3, 2]
+参数：
+  - **input** （Variable） - 支持任意纬度的Tensor。支持的数据类型为float32。
+  - **index** （Variable） - 表示索引，仅支持1-D Tensor。 支持的数据类型为int32，int64。
+  - **updates** （Variable） - 根据索引的值将updates Tensor中的对应值更新到input Tensor中，updates Tensor的维度需要和input tensor保持一致，且除了第一维外的其他的维度的大小需要和input Tensor保持相同。支持的数据类型为float32。
+  - **name** （str，可选） - 具体用法请参见 :ref:`api_guide_Name` ，一般无需设置，默认值为None。
+  - **overwrite** （bool，可选） - 如果index中的索引值有重复且overwrite 为True，旧更新值将被新的更新值覆盖；如果为False，新的更新值将同旧的更新值相加。默认值为True。
+返回：返回类型为Variable(Tensor|LoDTensor)，数据类型以及shape大小同输入一致。
+**代码示例**
+..  code-block:: python
+    import numpy as np
+    import paddle.fluid as fluid
+    input = fluid.layers.data(name='data', shape=[3, 2], dtype='float32', append_batch_size=False)
+    index = fluid.layers.data(name='index', shape=[4], dtype='int64', append_batch_size=False)
+    updates = fluid.layers.data(name='update', shape=[4, 2], dtype='float32', append_batch_size=False)
+    output = fluid.layers.scatter(input, index, updates, overwrite=False)
+    exe = fluid.Executor(fluid.CPUPlace())
+    exe.run(fluid.default_startup_program())
+    in_data = np.array([[1, 1], [2, 2], [3, 3]]).astype(np.float32)
+    index_data = np.array([2, 1, 0, 1]).astype(np.int64)
+    update_data = np.array([[1, 1], [2, 2], [3, 3], [4, 4]]).astype(np.float32)
+    res = exe.run(fluid.default_main_program(), feed={'data':in_data, "index":index_data, "update":update_data}, fetch_list=[output])
+    print(res)
+    # [array([[3., 3.],
+    #   [6., 6.],
+    #   [1., 1.]], dtype=float32)]
--- a/doc/paddle/api/paddle/tensor/math/clamp_cn.rst
+++ b/doc/paddle/api/paddle/tensor/math/clamp_cn.rst
-.. _cn_api_tensor_clamp:
-clamp
-------------------------------
-.. py:function:: paddle.clamp(input, min=None, max=None, output=None, name=None)
-:alias_main: paddle.clamp
-:alias: paddle.clamp,paddle.tensor.clamp,paddle.tensor.math.clamp
-该OP将输入的所有元素进行剪裁，使得输出元素限制在[min, max]内，具体公式如下：
-.. math::
-        Out = MIN(MAX(x, min), max) 
-参数：
-    - **input** (Variable) – 指定输入为一个多维的Tensor，数据类型可以是float32，float64。
-    - **min** (float32|Variable, 可选) - 裁剪的最小值，输入中小于该值的元素将由该元素代替，若参数为空，则不对输入的最小值做限制。数据类型可以是float32或形状为[1]的Tensor，类型可以为int32，float32，float64，默认值为None。
-    - **max** (float32|Variable, 可选) - 裁剪的最大值，输入中大于该值的元素将由该元素代替，若参数为空，则不对输入的最大值做限制。数据类型可以是float32或形状为[1]的Tensor，类型可以为int32，float32，float64，默认值为None。
-    - **output** （Variable， 可选）- 输出Tensor或LoDTensor。如果为None，则创建一个新的Tensor作为输出Tensor，默认值为None。
-    - **name** （str，可选）- 具体用法请参见 :ref:`api_guide_Name` ，一般无需设置，默认值为None。
-返回：返回一个和输入形状相同的Tensor。
-返回类型：Variable
-**代码示例**：
-.. code-block:: python
-    import paddle
-    import paddle.fluid as fluid
-    import numpy as np
-    in1 = np.array([[1.2,3.5],
-                    [4.5,6.4]]).astype('float32')
-    with fluid.dygraph.guard():
-        x1 = fluid.dygraph.to_variable(in1)
-        out1 = paddle.tensor.clamp(x1, min=3.5, max=5.0)
-        out2 = paddle.tensor.clamp(x1, min=2.5)
-        print(out1.numpy())
-        # [[3.5, 3.5]
-        # [4.5, 5.0]]
-        print(out2.numpy())
-        # [[2.5, 3.5]
-        # [[4.5, 6.4]
--- a/doc/paddle/api/paddle/tensor/math/clip_cn.rst
+++ b/doc/paddle/api/paddle/tensor/math/clip_cn.rst
+.. _cn_api_fluid_layers_clip:
+clip
+-------------------------------
+.. py:function:: paddle.fluid.layers.clip(x, min, max, name=None)
+:alias_main: paddle.nn.clip
+:alias: paddle.nn.clip,paddle.nn.clip.clip
+:old_api: paddle.fluid.layers.clip
+该OP对输入Tensor每个元素的数值进行裁剪，使得输出Tensor元素的数值被限制在区间[min, max]内。具体的计算公式为如下。
+.. math::
+  Out = MIN(MAX(x,min),max)
+参数：
+        - **x** (Variable)- 多维Tensor，数据类型为float32
+        - **min** (float)- 最小值，输入Tensor中小于该值的元素由min代替。
+        - **max** (float)- 最大值，输入Tensor中大于该值的元素由max替换。
+        - **name** (None|str) – 该参数供开发人员打印调试信息时使用，具体用法请参见 :ref:`api_guide_Name` ，默认值为None。
+返回：  对元素的数值进行裁剪之后的Tesnor，与输入x具有相同的shape和数据类型
+返回类型：Variable
+**代码示例：**
+.. code-block:: python
+    import paddle.fluid as fluid
+    input = fluid.layers.data(
+        name='data', shape=[1], dtype='float32')
+    reward = fluid.layers.clip(x=input, min=-1.0, max=1.0)
--- a/doc/paddle/api/paddle/tensor/math/div_cn.rst
+++ b/doc/paddle/api/paddle/tensor/math/div_cn.rst
-.. _cn_api_tensor_div:
-div
-------------------------------
-.. py:function:: paddle.div(x, y, out=None, name=None)
-:alias_main: paddle.div
-:alias: paddle.div,paddle.tensor.div,paddle.tensor.math.div
-:update_api: paddle.fluid.layers.elementwise_div
-该OP是逐元素相除算子，输入 ``x`` 与输入 ``y`` 逐元素相除，并将各个位置的输出元素保存到返回结果中。
-等式是：
-.. math::
-        Out = X / Y
- :math:`X` ：多维Tensor。
- :math:`Y` ：维度必须小于等于X维度的Tensor。
-对于这个运算算子有2种情况：
-        1. :math:`Y` 的 ``shape`` 与 :math:`X` 相同。
-        2. :math:`Y` 的 ``shape`` 是 :math:`X` 的连续子序列。
-对于情况2:
-        1. 用 :math:`Y` 匹配 :math:`X` 的形状（shape），其中 ``axis`` 是 :math:`Y` 在 :math:`X` 上的起始维度的位置。
-        2. 如果 ``axis`` 为-1（默认值），则 :math:`axis= rank(X)-rank(Y)` 。
-        3. 考虑到子序列， :math:`Y` 的大小为1的尾部维度将被忽略，例如shape（Y）=（2,1）=>（2）。
-例如：
-..  code-block:: text
-        shape(X) = (2, 3, 4, 5), shape(Y) = (,)
-        shape(X) = (2, 3, 4, 5), shape(Y) = (5,)
-        shape(X) = (2, 3, 4, 5), shape(Y) = (4, 5), with axis=-1(default) or axis=2
-        shape(X) = (2, 3, 4, 5), shape(Y) = (3, 4), with axis=1
-        shape(X) = (2, 3, 4, 5), shape(Y) = (2), with axis=0
-        shape(X) = (2, 3, 4, 5), shape(Y) = (2, 1), with axis=0
-参数：
-        - **x** （Variable）- 多维 ``Tensor`` 或 ``LoDTensor`` 。数据类型为 ``float32`` 、 ``float64`` 、 ``int32`` 或  ``int64``。
-        - **y** （Variable）- 多维 ``Tensor`` 或 ``LoDTensor`` 。数据类型为 ``float32`` 、 ``float64`` 、 ``int32`` 或  ``int64``。
-        - **out** （Variable，可选）-  指定存储运算结果的 ``Tensor`` 。如果设置为None或者不设置，将创建新的 ``Tensor`` 存储运算结果，默认值为None。
-        - **name** （str，可选）- 输出的名字。默认值为None。该参数供开发人员打印调试信息时使用，具体用法请参见 :ref:`api_guide_Name` 。
-返回：        多维 ``Tensor`` 或 ``LoDTensor`` ， 维度和数据类型都与 ``x`` 相同。
-返回类型：        Variable
-**代码示例 1**
-..  code-block:: python
-    import paddle
-    import paddle.fluid as fluid
-    import numpy as np
-    def gen_data():
-        return {
-            "x": np.array([2, 3, 4]).astype('float32'),
-            "y": np.array([1, 5, 2]).astype('float32')
-        }
-    x = fluid.data(name="x", shape=[3], dtype='float32')
-    y = fluid.data(name="y", shape=[3], dtype='float32')
-    z = paddle.div(x, y)
-    # z = x / y
-    place = fluid.CPUPlace()
-    exe = fluid.Executor(place)
-    z_value = exe.run(feed=gen_data(),
-                        fetch_list=[z.name])
-    print(z_value) # [2., 0.6, 2.]
-**代码示例 2**
-.. code-block:: python
-    import paddle
-    import paddle.fluid as fluid
-    import numpy as np
-    def gen_data():
-        return {
-            "x": np.ones((2, 3, 4, 5)).astype('float32'),
-            "y": np.zeros((4, 5)).astype('float32')
-        }
-    x = fluid.data(name="x", shape=[2, 3, 4, 5], dtype='float32')
-    y = fluid.data(name="y", shape=[4, 5], dtype='float32')
-    z = paddle.div(x, y, name='z')
-    # z = x / y
-    place = fluid.CPUPlace()
-    exe = fluid.Executor(place)
-    z_value = exe.run(feed=gen_data(),
-                        fetch_list=[z.name])
-    print(z_value[0])
-    print(z_value[0].shape) # z.shape=[2,3,4,5]
-**代码示例 3**
-..  code-block:: python
-    import paddle
-    import paddle.fluid as fluid
-    import numpy as np
-    def gen_data():
-        return {
-            "x": np.random.randint(1, 5, size=[2, 3, 4, 5]).astype('float32'),
-            "y": np.random.randint(1, 5, size=[5]).astype('float32')
-        }
-    x = fluid.data(name="x", shape=[2,3,4,5], dtype='float32')
-    y = fluid.data(name="y", shape=[5], dtype='float32')
-    output = fluid.data(name="output", shape=[2,3,4,5], dtype="float32")
-    z = paddle.div(x, y, out=output)
-    # z = x / y
-    place = fluid.CPUPlace()
-    exe = fluid.Executor(place)
-    z_value = exe.run(feed=gen_data(),
-                        fetch_list=[z.name])
-    print(z_value[0])
-    print(z_value[0].shape) # z.shape=[2,3,4,5]
-**代码示例 4（动态图）**
-..  code-block:: python
-    import paddle
-    import paddle.fluid as fluid
-    import numpy as np
-    with fluid.dygraph.guard(fluid.CPUPlace()):
-        np_x = np.array([2, 3, 4]).astype('float64')
-        np_y = np.array([1, 5, 2]).astype('float64')
-        x = fluid.dygraph.to_variable(np_x)
-        y = fluid.dygraph.to_variable(np_y)
-        z = paddle.div(x, y)
-        np_z = z.numpy()
-        print(np_z)  # [2., 0.6, 2.]
--- a/doc/paddle/api/paddle/tensor/math/isfinite_cn.rst
+++ b/doc/paddle/api/paddle/tensor/math/isfinite_cn.rst
+.. _cn_api_fluid_layers_isfinite:
+isfinite
+-------------------------------
+.. py:function:: paddle.fluid.layers.isfinite(x)
+:alias_main: paddle.isfinite
+:alias: paddle.isfinite,paddle.tensor.isfinite,paddle.tensor.logic.isfinite
+:old_api: paddle.fluid.layers.isfinite
+``注意：此算子的输入 Tensor / LoDTensor 数据类型必须为 int32 / float / double 之一。``
+测试 x 是否包含无穷值（即 nan 或 inf）。若元素均为有穷数，返回真；否则返回假。
+参数：
+  - **x(variable)** : 变量，包含被测试的 Tensor / LoDTensor。
+返回: 
+  - Variable (Tensor / LoDTensor)，此 Tensor 变量包含一个 bool 型结果。
+返回类型
+  - Variable (Tensor / LoDTensor)，一个包含 Tensor 的变量。
+**代码示例**：
+.. code-block:: python
+    import paddle.fluid as fluid
+    import numpy
+    # Graph Organizing
+    var = fluid.layers.data(name="data", shape=(4, 6), dtype="float32")
+    output = fluid.layers.isfinite(var)
+    # Create an executor using CPU as an example
+    exe = fluid.Executor(fluid.CPUPlace())
+    exe.run(fluid.default_startup_program())
+    # Execute
+    img = numpy.array((4, 6)).astype(numpy.float32)
+    res, = exe.run(fluid.default_main_program(), feed={'data':img}, fetch_list=[output])
+    print(res)  # Output Value: [ True]
--- a/doc/paddle/api/paddle/tensor/math/isnan_cn.rst
+++ b/doc/paddle/api/paddle/tensor/math/isnan_cn.rst
+isnan
+-------------------------------
+**版本升级，文档正在开发中**
--- a/doc/paddle/api/paddle/tensor/math/sign_cn.rst
+++ b/doc/paddle/api/paddle/tensor/math/sign_cn.rst
+.. _cn_api_fluid_layers_sign:
+sign
+-------------------------------
+.. py:function:: paddle.fluid.layers.sign(x)
+:alias_main: paddle.sign
+:alias: paddle.sign,paddle.tensor.sign,paddle.tensor.math.sign
+:old_api: paddle.fluid.layers.sign
+此OP对输入x中每个元素进行正负判断，并且输出正负判断值：1代表正，-1代表负，0代表零。
+参数：
+    - **x** (Variable|numpy.ndarray) – 进行正负值判断的多维Tensor或者是多维的numpy数组，数据类型为 float32，float64。
+返回：输出正负号Tensor，数据的shape大小和输入x的数据shape一致。
+返回类型：Variable，数据类型和输入数据类型一致。
+**代码示例**
+..  code-block:: python
+    import paddle.fluid as fluid
+    import numpy as np
+    data = fluid.layers.sign(np.array([3.0, 0.0, -2.0], dtype='float32'))
+    # data=[1.0, 0.0, -1.0]
--- a/doc/paddle/api/paddle/tensor/math/tanh_cn.rst
+++ b/doc/paddle/api/paddle/tensor/math/tanh_cn.rst
+.. _cn_api_fluid_layers_tanh:
+tanh
+-------------------------------
+.. py:function:: paddle.fluid.layers.tanh(x, name=None)
+tanh 激活函数
+.. math::
+    out = \frac{e^{x} - e^{-x}}{e^{x} + e^{-x}}
+参数:
+    - **x** - Tanh算子的输入
+    - **name** (str|None) - 该层名称（可选）。若设为None，则自动为该层命名。
+返回: 张量(Tensor)
+返回类型: 变量(Variable)
+**代码示例**：
+.. code-block:: python
+    import paddle.fluid as fluid
+    data = fluid.layers.data(name="input", shape=[32, 784])
+    result = fluid.layers.tanh(data)
--- a/doc/paddle/api/paddle/tensor/search/argmin_cn.rst
+++ b/doc/paddle/api/paddle/tensor/search/argmin_cn.rst
+.. _cn_api_fluid_layers_argmin:
+argmin
+-------------------------------
+.. py:function:: paddle.fluid.layers.argmin(x, axis=0)
+:alias_main: paddle.argmin
+:alias: paddle.argmin,paddle.tensor.argmin,paddle.tensor.search.argmin
+:old_api: paddle.fluid.layers.argmin
+**argmin**
+该OP沿 ``axis`` 计算输入 ``x`` 的最小元素的索引。
+参数：
+    - **x** (Variable) - 输入的多维 ``Tensor`` ，支持的数据类型：float32、float64、int8、int16、int32、int64。
+    - **axis** (int，可选) - 指定对输入Tensor进行运算的轴， ``axis`` 的有效范围是[-R, R)，R是输入 ``x`` 的Rank， ``axis`` 为负时与 ``axis`` +R 等价。默认值为0。
+返回： ``Tensor`` ，数据类型int64
+返回类型：Variable
+**代码示例**：
+.. code-block:: python
+  import paddle.fluid as fluid
+  import numpy as np
+  in1 = np.array([[[5,8,9,5],
+                   [0,0,1,7],
+                   [6,9,2,4]],
+                  [[5,2,4,2],
+                   [4,7,7,9],
+                   [1,7,0,6]]])
+  with fluid.dygraph.guard():
+      x = fluid.dygraph.to_variable(in1)
+      out1 = fluid.layers.argmin(x=x, axis=-1)
+      out2 = fluid.layers.argmin(x=x, axis=0)
+      out3 = fluid.layers.argmin(x=x, axis=1)
+      out4 = fluid.layers.argmin(x=x, axis=2)
+      print(out1.numpy())
+      # [[0 0 2]
+      #  [1 0 2]]
+      print(out2.numpy())
+      # [[0 1 1 1]
+      #  [0 0 0 0]
+      #  [1 1 1 0]]
+      print(out3.numpy())
+      # [[1 1 1 2]
+      #  [2 0 2 0]]
+      print(out4.numpy())
+      # [[0 0 2]
+      #  [1 0 2]]
--- a/doc/paddle/api/paddle/tensor/search/masked_select_cn.rst
+++ b/doc/paddle/api/paddle/tensor/search/masked_select_cn.rst
+.. _cn_api_fluid_layers_masked_select:
+masked_select
+-------------------------------
+.. py:function:: paddle.fluid.layers.masked_select(input, mask)
+该OP将根据mask Tensor的真值选取输入Tensor元素，并返回一个一维Tensor
+参数：
+          - **input** （Variable）- 输入Tensor，数据类型为int32, float32, float64。
+          - **mask** （Variable）- mask Tensor， 数据类型为bool。
+返回：根据mask选择后的tensor
+返回类型：  Variable
+**示例代码**
+..  code-block:: python
+    import paddle.fluid as fluid
+    import numpy as np
+    mask_shape = [4,1]
+    shape = [4,4]
+    data = np.random.random(mask_shape).astype("float32")
+    input_data = np.random.randint(5,size=shape).astype("float32")
+    mask_data = data > 0.5
+    # print(input_data)
+    # [[0.38972723 0.36218056 0.7892614  0.50122297]
+    #  [0.14408113 0.85540855 0.30984417 0.7577004 ]
+    #  [0.97263193 0.5248062  0.07655851 0.75549215]
+    #  [0.26214206 0.32359877 0.6314582  0.2128865 ]]
+    # print(mask_data)
+    # [[ True]
+    #  [ True]
+    #  [False]
+    #  [ True]]
+    input = fluid.data(name="input",shape=[4,4],dtype="float32")
+    mask = fluid.data(name="mask",shape=[4,1],dtype="bool")
+    result = fluid.layers.masked_select(input=input, mask=mask)
+    place = fluid.CPUPlace()
+    exe = fluid.Executor(place)
+    start = fluid.default_startup_program()
+    main = fluid.default_main_program()
+    exe.run(start)
+    masked_select_result= exe.run(main, feed={'input':input_data, 'mask':mask_data}, fetch_list=[result])
+    # print(masked_select_result)
+    # [0.38972723 0.36218056 0.7892614  0.50122297 0.14408113 0.85540855
+    #   0.30984417 0.7577004  0.26214206 0.32359877 0.6314582  0.2128865 ]