diff --git a/doc/fluid/advanced_usage/deploy/inference/index_cn.rst b/doc/fluid/advanced_usage/deploy/inference/index_cn.rst
index 8e918743ea62cd7d661ed6c30e867769ec88abd1..e95420d988b843778de80b1ce1949b8fbb7e78c3 100644
--- a/doc/fluid/advanced_usage/deploy/inference/index_cn.rst
+++ b/doc/fluid/advanced_usage/deploy/inference/index_cn.rst
@@ -2,15 +2,7 @@
 服务器端部署
 ############
 
-推理（Inference）指的是在设备上运行训练好的模型，依据输入数据来进行预测。Paddle Fluid提供了预测库及其C++和Python的API来支持模型的部署上线。
-使用Paddle Fluid预测主要包含以下几个步骤：
-
-    1. 加载由Paddle Fluid训练的模型和参数文件；
-    2. 准备输入数据。即将待预测的数据（如图片）转换成Paddle Fluid模型接受的格式，并将其设定为预测引擎的输入；
-    3. 运行预测引擎，获得模型的输出；
-    4. 根据业务需求解析输出结果，获得需要的信息。
-
-以上步骤使用的API会在后续部分进行详细介绍。
+PaddlePaddle 提供了C++，C和Python的API来支持模型的部署上线。
 
 .. toctree::
    :titlesonly:
diff --git a/doc/fluid/advanced_usage/deploy/inference/native_infer.md b/doc/fluid/advanced_usage/deploy/inference/native_infer.md
index 6ed6b634f3bbf845a43b1de966b0370b18f40479..44a6e4e8001ab9b773dc5534eb5f4cc096c64971 100644
--- a/doc/fluid/advanced_usage/deploy/inference/native_infer.md
+++ b/doc/fluid/advanced_usage/deploy/inference/native_infer.md
@@ -22,7 +22,7 @@ Paddle Fluid采用 AnalysisPredictor 进行预测。AnalysisPredictor 是一个
 
 #### AnalysisPredictor 预测示例
 
-```c++
+``` c++
 #include "paddle_inference_api.h"
 
 namespace paddle {
@@ -97,28 +97,29 @@ int main() {
 
 AnalysisConfig管理AnalysisPredictor的预测配置，提供了模型路径设置、预测引擎运行设备选择以及多种优化预测流程的选项。配置方法如下：
 
+#### 通用优化配置
+``` c++
+config->SwitchIrOptim(true);  // 开启计算图分析优化，包括OP融合等
+config->EnableMemoryOptim();  // 开启内存/显存复用 
+```
+**Note:** 使用ZeroCopyTensor必须设置：
+``` c++
+config->SwitchUseFeedFetchOps(false);  // 关闭feed和fetch OP使用，使用ZeroCopy接口必须设置此项
+```
+
 #### 设置模型和参数路径
 从磁盘加载模型时，根据模型和参数文件存储方式不同，设置AnalysisConfig加载模型和参数的路径有两种形式：
 
-* combined形式：模型文件夹`model_dir`下存在一个模型文件和多个参数文件时，传入模型文件夹路径，模型文件名默认为`__model__`。
+* 非combined形式：模型文件夹`model_dir`下存在一个模型文件和多个参数文件时，传入模型文件夹路径，模型文件名默认为`__model__`。
 ``` c++
 config->SetModel("./model_dir");
 ```
 
-* 非combined形式：模型文件夹`model_dir`下只有一个模型文件`model`和一个参数文件`params`时，传入模型文件和参数文件路径。
+* combined形式：模型文件夹`model_dir`下只有一个模型文件`model`和一个参数文件`params`时，传入模型文件和参数文件路径。
 ``` c++
 config->SetModel("./model_dir/model", "./model_dir/params");
 ```
 
-#### 通用优化配置
-``` c++
-config->SwitchIrOptim(true);  // 开启计算图分析优化，包括OP融合等
-config->EnableMemoryOptim();  // 开启内存/显存复用 
-```
-**Note:** 使用ZeroCopyTensor必须设置：
-``` c++
-config->SwitchUseFeedFetchOps(false);  // 关闭feed和fetch OP使用，使用ZeroCopy接口必须设置此项
-```
 
 #### 配置CPU预测
 
@@ -147,7 +148,7 @@ ZeroCopyTensor是AnalysisPredictor的输入/输出数据结构。ZeroCopyTensor
 
 **Note:** 使用ZeroCopyTensor，务必在创建config时设置`config->SwitchUseFeedFetchOps(false);`。
 
-```c++
+``` c++
 // 通过创建的AnalysisPredictor获取输入和输出的tensor
 auto input_names = predictor->GetInputNames();
 auto input_t = predictor->GetInputTensor(input_names[0]);
@@ -177,7 +178,7 @@ float *output_d = output_t->data<float>(PaddlePlace::kGPU, &output_size);
 
 	`inference` 文件夹目录结构如下：
 
-	```shell
+	``` shell
     inference
     ├── CMakeLists.txt
     ├── mobilenet_test.cc
@@ -198,7 +199,7 @@ float *output_d = output_t->data<float>(PaddlePlace::kGPU, &output_size);
 	
     编译运行预测样例之前，需要根据运行环境配置编译与运行脚本`run.sh`。`run.sh`的选项与路径配置的部分如下：
 	
-    ```shell
+    ``` shell
     # 设置是否开启MKL、GPU、TensorRT，如果要使用TensorRT，必须打开GPU
     WITH_MKL=ON
     WITH_GPU=OFF
@@ -215,7 +216,7 @@ float *output_d = output_t->data<float>(PaddlePlace::kGPU, &output_size);
 
 4. 编译与运行样例   
 
-	```shell
+	``` shell
 	sh run.sh
 	```
 
@@ -238,4 +239,4 @@ Paddle Fluid支持通过在不同线程运行多个AnalysisPredictor的方式来
 sh run.sh
 ```
 
-即可运行多线程预测样例。
\ No newline at end of file
+即可运行多线程预测样例。
diff --git a/doc/fluid/advanced_usage/deploy/inference/paddle_tensorrt_infer.md b/doc/fluid/advanced_usage/deploy/inference/paddle_tensorrt_infer.md
index da244f8791d8c7ea85cd13af1e12c1cccbd9895a..281ac6bcf71dc200b800bfb759f5e0de33ea5186 100644
--- a/doc/fluid/advanced_usage/deploy/inference/paddle_tensorrt_infer.md
+++ b/doc/fluid/advanced_usage/deploy/inference/paddle_tensorrt_infer.md
@@ -21,7 +21,7 @@ NVIDIA TensorRT 是一个高性能的深度学习预测库，可为深度学习
 2. Windows支持需要TensorRT 版本5.0以上。
 3. Paddle-TRT目前仅支持固定输入shape。
 4. 若使用用户自行安装的TensorRT，需要手动在`NvInfer.h`文件中为`class IPluginFactory`和`class IGpuAllocator`分别添加虚析构函数：
-	```c++
+	``` c++
     virtual ~IPluginFactory() {};
 	virtual ~IGpuAllocator() {};
     ```
@@ -37,7 +37,7 @@ NVIDIA TensorRT 是一个高性能的深度学习预测库，可为深度学习
 
 在使用AnalysisPredictor时，我们通过配置AnalysisConfig中的接口
 
-```c++
+``` c++
 config->EnableTensorRtEngine(1 << 20      /* workspace_size*/,   
                         batch_size        /* max_batch_size*/,     
                         3                 /* min_subgraph_size*/, 
@@ -106,7 +106,7 @@ config->EnableTensorRtEngine(1 << 20      /* workspace_size*/,
 
 4. 编译与运行样例   
 
-	```shell
+	``` shell
 	sh run.sh
 	```
  
@@ -123,7 +123,7 @@ config->EnableTensorRtEngine(1 << 20      /* workspace_size*/,
 2. 编译测试INT8样例
 	将`run.sh`文件中的`mobilenet_test`改为`fluid_generate_calib_test`，运行
     
-    ```shell   
+    ``` shell   
     sh run.sh  
     ```
 
@@ -131,13 +131,13 @@ config->EnableTensorRtEngine(1 << 20      /* workspace_size*/,
     
     生成校准表后，将带校准表的模型文件拷贝到特定地址
     
-    ```shell   
+    ``` shell   
     cp -rf SAMPLE_BASE_DIR/sample/paddle-TRT/build/mobilenetv1/ SAMPLE_BASE_DIR/sample/paddle-TRT/mobilenetv1_calib  
     ```
     
 	将`run.sh`文件中的`fluid_generate_calib_test`改为`fluid_int8_test`，将模型路径改为`SAMPLE_BASE_DIR/sample/paddle-TRT/mobilenetv1_calib`，运行
     
-	```shell   
+	``` shell   
     sh run.sh  
     ```