diff --git a/paddle/fluid/operators/activation_op.cc b/paddle/fluid/operators/activation_op.cc
index 91a28269470f8c3c9266c002bc16cea01568fd6c..c73482eb12e882fe15a595ad485ae688db346803 100644
--- a/paddle/fluid/operators/activation_op.cc
+++ b/paddle/fluid/operators/activation_op.cc
@@ -275,7 +275,7 @@ class HardShrinkOpMaker : public framework::OpProtoAndCheckerMaker {
                    "The value of threshold for HardShrink. [default: 0.5]")
         .SetDefault(0.5f);
     AddComment(R"DOC(
-** HardShrink activation operator **
+:strong:`HardShrink activation operator`
 
 ..  math::
     out = \begin{cases}
@@ -394,15 +394,16 @@ class ThresholdedReluOpMaker : public framework::OpProtoAndCheckerMaker {
   void Make() override {
     AddInput("X", "Input of ThresholdedRelu operator");
     AddOutput("Out", "Output of ThresholdedRelu operator");
-    AddAttr<float>("threshold", "The threshold location of activation")
+    AddAttr<float>("threshold",
+                   "The threshold location of activation. [default 1.0].")
         .SetDefault(1.0f);
     AddComment(R"DOC(
-ThresholdedRelu Activation Operator.
+:strong:`ThresholdedRelu activation operator`
 
 ..  math::
 
     out = \begin{cases}
-             x, \text{if } x > threshold \\
+             x,  \text{if } x > threshold \\
              0,  \text{otherwise}
           \end{cases}
 )DOC");
diff --git a/paddle/fluid/operators/row_conv_op.cc b/paddle/fluid/operators/row_conv_op.cc
index 52c37e8c911f79e0f0e97ba7cc6ac2706765944b..10b1b0c899d833d70fa6afe51998fe210899e3c3 100644
--- a/paddle/fluid/operators/row_conv_op.cc
+++ b/paddle/fluid/operators/row_conv_op.cc
@@ -94,7 +94,7 @@ class RowConvOpMaker : public framework::OpProtoAndCheckerMaker {
               "in this LodTensor is a matrix with shape T x N, i.e., the "
               "same shape as X.");
     AddComment(R"DOC(
-** Row-convolution operator **
+:strong:`Row-convolution operator`
 
 The row convolution is called lookahead convolution.  This operator was 
 introduced in the following paper for DeepSpeech2:
diff --git a/python/paddle/fluid/layers/ops.py b/python/paddle/fluid/layers/ops.py
index 486d6f371fbfa1f4e40a92cf71bcd0fe57aa646d..6f404c5cc608abda91c1d042d405f109dedc55c9 100644
--- a/python/paddle/fluid/layers/ops.py
+++ b/python/paddle/fluid/layers/ops.py
@@ -40,7 +40,6 @@ __activations__ = [
     'relu6',
     'pow',
     'stanh',
-    'thresholded_relu',
     'hard_sigmoid',
     'swish',
 ]
@@ -91,8 +90,7 @@ def uniform_random(shape, dtype=None, min=None, max=None, seed=None):
     return _uniform_random_(**kwargs)
 
 
-uniform_random.__doc__ = _uniform_random_.__doc__ + "\n" \
-                         + """
+uniform_random.__doc__ = _uniform_random_.__doc__ + """
 Examples:
 
     >>> result = fluid.layers.uniform_random(shape=[32, 784])
@@ -112,8 +110,7 @@ def hard_shrink(x, threshold=None):
     return _hard_shrink_(**kwargs)
 
 
-hard_shrink.__doc__ = _hard_shrink_.__doc__ + "\n" \
-                      + """
+hard_shrink.__doc__ = _hard_shrink_.__doc__ + """
 Examples:
 
     >>> data = fluid.layers.data(name="input", shape=[784])
@@ -141,3 +138,25 @@ Examples:
     >>> data = fluid.layers.data(name="input", shape=[32, 784])
     >>> result = fluid.layers.cumsum(data, axis=0)
 """
+
+__all__ += ['thresholded_relu']
+
+_thresholded_relu_ = generate_layer_fn('thresholded_relu')
+
+
+def thresholded_relu(x, threshold=None):
+    kwargs = dict()
+    for name in locals():
+        val = locals()[name]
+        if val is not None:
+            kwargs[name] = val
+
+    _thresholded_relu_(**kwargs)
+
+
+thresholded_relu.__doc__ = _thresholded_relu_.__doc__ + """
+Examples:
+
+    >>> data = fluid.layers.data(name="input", shape=[1])
+    >>> result = fluid.layers.thresholded_relu(data, threshold=0.4)
+"""