diff --git a/python/paddle/fluid/contrib/mixed_precision/decorator.py b/python/paddle/fluid/contrib/mixed_precision/decorator.py
index a3d2f61623dccd01f4ec760cc07236dcc20a105f..d37e90b4695d03b5c9caa71c65c8624e558d1065 100644
--- a/python/paddle/fluid/contrib/mixed_precision/decorator.py
+++ b/python/paddle/fluid/contrib/mixed_precision/decorator.py
@@ -44,7 +44,7 @@ class OptimizerWithMixedPrecision(object):
 
     Args:
         optimizer (Optimizer): A common Optimizer object.
-        amp_lists (AutoMixedPrecisionLists): An AutoMixedPrecisionLists object.
+        amp_lists (CustomOpLists): An CustomOpLists object.
         init_loss_scaling (float): The initial loss scaling factor.
         use_dynamic_loss_scaling (bool): Whether to use dynamic loss scaling.
         incr_every_n_steps(int): Increases loss scaling every n consecutive 
@@ -196,12 +196,56 @@ class OptimizerWithMixedPrecision(object):
         Init the amp training, such as cast fp32 parameters to fp16 type.
   
         Args:
-            place(CPUPlace|CUDAPlace): place is used to initialize
+            place(CUDAPlace): place is used to initialize 
                 fp16 parameters with fp32 values.
             scope(Scope): The scope is used to find fp32 parameters.
             test_program(Program): The program is used for testing.
             use_fp16_test(bool): Whether to use fp16 testing.
 
+        Examples:
+            .. code-block:: python
+
+                import numpy as np
+                import paddle
+                import paddle.nn.functional as F
+                paddle.enable_static()
+
+                def run_example_code():
+                    place = paddle.CUDAPlace(0)
+                    exe = paddle.static.Executor(place)
+                    data = paddle.static.data(name='X', shape=[None, 1, 28, 28], dtype='float32')
+                    conv2d = paddle.static.nn.conv2d(input=data, num_filters=6, filter_size=3)
+                    # 1) Use fp16_guard to control the range of fp16 kernels used.
+                    with paddle.static.amp.fp16_guard():
+                        bn = paddle.static.nn.batch_norm(input=conv2d, act="relu")
+                        pool = F.max_pool2d(bn, kernel_size=2, stride=2)
+                        hidden = paddle.static.nn.fc(pool, size=10)
+                        loss = paddle.mean(hidden)
+                    # 2) Create the optimizer and set `multi_precision` to True.
+                    # Setting `multi_precision` to True can avoid the poor accuracy
+                    # or the slow convergence in a way. 
+                    optimizer = paddle.optimizer.Momentum(learning_rate=0.01, multi_precision=True)
+                    # 3) These ops in `custom_black_list` will keep in the float32 computation type.
+                    amp_list = paddle.static.amp.CustomOpLists(
+                        custom_black_list=['pool2d'])
+                    # 4) The entry of Paddle AMP.
+                    # Enable pure fp16 training by setting `use_pure_fp16` to True.
+                    optimizer = paddle.static.amp.decorate(
+                        optimizer,
+                        amp_list,
+                        init_loss_scaling=128.0,
+                        use_dynamic_loss_scaling=True,
+                        use_pure_fp16=True)
+                    # If you don't use the default_startup_program(), you sholud pass
+                    # your defined `startup_program` into `minimize`.
+                    optimizer.minimize(loss)
+                    exe.run(paddle.static.default_startup_program())
+                    # 5) Use `amp_init` after FP32 parameters initialization(such as `exe.run(startup_program)`).
+                    # If you want to perform the testing process, you should pass `test_program` into `amp_init`.
+                    optimizer.amp_init(place, scope=paddle.static.global_scope())
+                    
+                if paddle.is_compiled_with_cuda() and len(paddle.static.cuda_places()) > 0:
+                    run_example_code()       
         """
         assert self._train_program is not None, \
             "Please call the minimize method first."
@@ -383,7 +427,7 @@ def decorate(optimizer,
 
     Args:
         optimizer(Optimizer): A common Optimizer.
-        amp_lists (AutoMixedPrecisionLists): An AutoMixedPrecisionLists object.
+        amp_lists (CustomOpLists): An CustomOpLists object.
         init_loss_scaling(float): The initial loss scaling factor.
         incr_every_n_steps(int): Increases loss scaling every n consecutive 
                                  steps with finite gradients.
@@ -403,17 +447,70 @@ def decorate(optimizer,
         An optimizer acting like a normal one but with mixed-precision training 
         enabled.
 
-    Examples:
-	.. code-block:: python
+    Examples 1:
+	    .. code-block:: python
+
+            # black&white list based strategy example
+            import paddle
+            import paddle.static as static
+
+            paddle.enable_static()
+
+            data = static.data(name='X', shape=[None, 1], dtype='float32')
+            hidden = static.nn.fc(x=data, size=10)
+            loss = paddle.mean(hidden)
+            optimizer = paddle.optimizer.Adam(learning_rate=0.001)
+
+            mp_optimizer = static.amp.decorate(
+                    optimizer=optimizer, init_loss_scaling=8.0)
 
-	    loss = network()
-            optimizer = fluid.optimizer.Adam(learning_rate=0.001)
-	
-            mp_optimizer = fluid.contrib.mixed_precision.decorate(
-	              optimizer=optimizer, init_loss_scaling=8.0)
-	
             ops, param_grads = mp_optimizer.minimize(loss)
             scaled_loss = mp_optimizer.get_scaled_loss()
+
+    Examples 2:
+        .. code-block:: python
+
+            # pure fp16 training example
+            import numpy as np
+            import paddle
+            import paddle.nn.functional as F
+
+            def run_example_code():
+                place = paddle.CUDAPlace(0)
+                exe = paddle.static.Executor(place)
+                data = paddle.static.data(name='X', shape=[None, 1, 28, 28], dtype='float32')
+                conv2d = paddle.static.nn.conv2d(input=data, num_filters=6, filter_size=3)
+                # 1) Use fp16_guard to control the range of fp16 kernels used.
+                with paddle.static.amp.fp16_guard():
+                    bn = paddle.static.nn.batch_norm(input=conv2d, act="relu")
+                    pool = F.max_pool2d(bn, kernel_size=2, stride=2)
+                    hidden = paddle.static.nn.fc(pool, size=10)
+                    loss = paddle.mean(hidden)
+                # 2) Create the optimizer and set `multi_precision` to True.
+                # Setting `multi_precision` to True can avoid the poor accuracy
+                # or the slow convergence in a way. 
+                optimizer = paddle.optimizer.Momentum(learning_rate=0.01, multi_precision=True)
+                # 3) These ops in `custom_black_list` will keep in the float32 computation type.
+                amp_list = paddle.static.amp.CustomOpLists(
+                    custom_black_list=['pool2d'])
+                # 4) The entry of Paddle AMP.
+                # Enable pure fp16 training by setting `use_pure_fp16` to True.
+                optimizer = paddle.static.amp.decorate(
+                    optimizer,
+                    amp_list,
+                    init_loss_scaling=128.0,
+                    use_dynamic_loss_scaling=True,
+                    use_pure_fp16=True)
+                # If you don't use the default_startup_program(), you sholud pass
+                # your defined `startup_program` into `minimize`.
+                optimizer.minimize(loss)
+                exe.run(paddle.static.default_startup_program())
+                # 5) Use `amp_init` after FP32 parameters initialization(such as `exe.run(startup_program)`).
+                # If you want to perform the testing process, you should pass `test_program` into `amp_init`.
+                optimizer.amp_init(place, scope=paddle.static.global_scope())
+                
+            if paddle.is_compiled_with_cuda() and len(paddle.static.cuda_places()) > 0:
+                run_example_code()
     """
     if amp_lists is None:
         amp_lists = AutoMixedPrecisionLists()
diff --git a/python/paddle/fluid/contrib/mixed_precision/fp16_lists.py b/python/paddle/fluid/contrib/mixed_precision/fp16_lists.py
index a409595d3ed10b895f4b6dc3ee5a466815a71ad0..1e4286248538636f16f1242afa4a96b1d381dfbd 100644
--- a/python/paddle/fluid/contrib/mixed_precision/fp16_lists.py
+++ b/python/paddle/fluid/contrib/mixed_precision/fp16_lists.py
@@ -14,7 +14,7 @@
 
 import copy
 
-__all__ = ["AutoMixedPrecisionLists"]
+__all__ = ["CustomOpLists", "AutoMixedPrecisionLists"]
 
 
 class AutoMixedPrecisionLists(object):
@@ -27,6 +27,7 @@ class AutoMixedPrecisionLists(object):
     Args:
         custom_white_list (set): Users' custom white list.
         custom_black_list (set): Users' custom black list.
+        custom_black_varnames (set): Users' custom black varibles' names.
     """
 
     def __init__(self,
@@ -284,3 +285,5 @@ unsupported_fp16_list = {
     'generate_proposal_labels',
     'generate_mask_labels',
 }
+
+CustomOpLists = AutoMixedPrecisionLists
diff --git a/python/paddle/fluid/contrib/mixed_precision/fp16_utils.py b/python/paddle/fluid/contrib/mixed_precision/fp16_utils.py
index e02671e219fc936a14bdc56915b628ae93424e4d..f9c3a613c4053a79cb467d752b20f6f4ed3ea4ec 100644
--- a/python/paddle/fluid/contrib/mixed_precision/fp16_utils.py
+++ b/python/paddle/fluid/contrib/mixed_precision/fp16_utils.py
@@ -282,6 +282,22 @@ def fp16_guard():
     As for the pure fp16 training, if users set `use_fp16_guard` to True,
     only those ops created in the context manager `fp16_guard` will be
     transformed as float16 type.
+
+    Examples:
+        .. code-block:: python
+
+            import numpy as np
+            import paddle
+            import paddle.nn.functional as F
+            paddle.enable_static()
+            data = paddle.static.data(name='X', shape=[None, 1, 28, 28], dtype='float32')
+            conv2d = paddle.static.nn.conv2d(input=data, num_filters=6, filter_size=3)
+
+            with paddle.static.amp.fp16_guard():
+                bn = paddle.static.nn.batch_norm(input=conv2d, act="relu")
+                pool = F.max_pool2d(bn, kernel_size=2, stride=2)
+                hidden = paddle.static.nn.fc(pool, size=10)
+                loss = paddle.mean(hidden)
     """
     with framework.name_scope(prefix=_fp16_guard_pattern):
         yield