diff --git a/paddle/phi/api/lib/CMakeLists.txt b/paddle/phi/api/lib/CMakeLists.txt
index dd3ffb60c4ab36d7417a9368b55b0b08b8617034..bb99b6fc400edcb364a47f65414f82b9f0e75d0a 100644
--- a/paddle/phi/api/lib/CMakeLists.txt
+++ b/paddle/phi/api/lib/CMakeLists.txt
@@ -135,8 +135,8 @@ endforeach()
 
 # validation of api yamls
 message("validate api yaml:
-- ${parsed_api_dir}/new_api.parsed.yaml
-- ${parsed_api_dir}/new_backward_api.parsed.yaml")
+- ${parsed_api_dir}/api.parsed.yaml
+- ${parsed_api_dir}/backward_api.parsed.yaml")
 execute_process(
   WORKING_DIRECTORY ${CMAKE_SOURCE_DIR}/python/paddle/utils/code_gen
   COMMAND
diff --git a/paddle/phi/kernels/bernoulli_kernel.h b/paddle/phi/kernels/bernoulli_kernel.h
index 7d72d6eaf98afd8ec7bea215fc03ea5293e76ce3..4e0f9b5267fda9d298da45b56d33cc7499d601ef 100644
--- a/paddle/phi/kernels/bernoulli_kernel.h
+++ b/paddle/phi/kernels/bernoulli_kernel.h
@@ -19,6 +19,14 @@
 
 namespace phi {
 
+/**
+ * @brief This Kernel returns a Tensor filled with random binary(0 or 1) number
+ * from a Bernoulli distribution.
+ * @param  ctx   device context
+ * @param  x     A tensor with probabilities for generating the random binary
+ * number
+ * @param  out   A Tensor filled with random binary number
+ */
 template <typename T, typename Context>
 void BernoulliKernel(const Context& ctx,
                      const DenseTensor& x,
diff --git a/paddle/phi/kernels/erf_kernel.h b/paddle/phi/kernels/erf_kernel.h
index 1d5c57d2201c749f541cabe74b112073c2dace06..9a7e7f8f0f3b47bc0fed75e61a2cae5b26fe7733 100644
--- a/paddle/phi/kernels/erf_kernel.h
+++ b/paddle/phi/kernels/erf_kernel.h
@@ -18,6 +18,19 @@ limitations under the License. */
 
 namespace phi {
 
+/**
+ * @brief Erf Kernel.
+ *        The equation is:
+ *        $$
+ *        f(x) = \frac{2}{\sqrt{\pi}} \int_{0}^{x}e^{- \eta^{2}}d\eta
+ *        $$
+ *
+ *        The input `x` can carry the LoD (Level of Details) information,
+ *        or not. And the output shares the LoD information with input `x`.
+ * @param  ctx   device context
+ * @param  x     The input tensor of erf kernel
+ * @param  out   The output tensor of erf kernel
+ */
 template <typename T, typename Context>
 void ErfKernel(const Context& dev_ctx, const DenseTensor& x, DenseTensor* out);
 
diff --git a/paddle/phi/kernels/mv_kernel.h b/paddle/phi/kernels/mv_kernel.h
index df4626f4d49d4e8f1fe400b648bc3aa9d147eede..ad2794d9e425a56d3f675afbde897d72e9bfb597 100644
--- a/paddle/phi/kernels/mv_kernel.h
+++ b/paddle/phi/kernels/mv_kernel.h
@@ -18,6 +18,14 @@
 
 namespace phi {
 
+/**
+ * @brief This kernel is used to perform matrix vector multiplication
+ *        of the input tensors `X` and `Vec`
+ * @param  ctx   device context
+ * @param  x     The matrix input of mv
+ * @param  vec   The vector input of mv
+ * @param  out   The output of mv
+ */
 template <typename T, typename Context>
 void MvKernel(const Context& ctx,
               const DenseTensor& x,
diff --git a/paddle/phi/kernels/poisson_kernel.h b/paddle/phi/kernels/poisson_kernel.h
index f67c9c46311d16c2ec29393c6f957c0cb3371fd2..b2b2ea97f014e4308848bdd3bffb5b89548b14c7 100644
--- a/paddle/phi/kernels/poisson_kernel.h
+++ b/paddle/phi/kernels/poisson_kernel.h
@@ -18,6 +18,13 @@
 
 namespace phi {
 
+/**
+ * @brief This kernel generate random value that obey poisson distribution.
+ * @param  ctx   device context
+ * @param  x     The input tensor of poisson kernel
+ * @param  out   The output tensor of poisson kernel, it has the same shape and
+ *               dtype with input. Each element corresponds to input tensor
+ */
 template <typename T, typename Context>
 void PoissonKernel(const Context& ctx, const DenseTensor& x, DenseTensor* out);
 
diff --git a/paddle/phi/kernels/trace_kernel.h b/paddle/phi/kernels/trace_kernel.h
index 200d9e47ede3df16daf2548491945b0b4ff0fb82..3f5bc333c21369b1c4091187a9e5eed8b5aa043b 100644
--- a/paddle/phi/kernels/trace_kernel.h
+++ b/paddle/phi/kernels/trace_kernel.h
@@ -18,6 +18,25 @@
 
 namespace phi {
 
+/**
+ * @brief Trace Kernel.
+ *        Return the sum along diagonals of the input tensor.
+ *        The behavior of this operator is similar to how `numpy.trace` works.
+ *
+ *        If Input is 2-D, returns the sum of diagonal.
+ *        If Input has larger dimensions, then returns an tensor of diagonals
+ *        sum, diagonals be taken from the 2-D planes specified by dim1 and
+ *        dim2.
+ * @param  ctx      device context
+ * @param  x        The input tensor, from which the diagonals are taken
+ * @param  offset   offset of the diagonal from the main diagonal.
+ *                  Can be bothpositive and negative.
+ * @param  axis1    the first axis of the 2-D planes from which the diagonals
+ *                  should be taken. Can be either positive or negative
+ * @param  axis2    the second axis of the 2-D planes from which the diagonals
+ *                  should be taken. Can be either positive or negative
+ * @param  out      the sum along diagonals of the input tensor
+ */
 template <typename T, typename Context>
 void TraceKernel(const Context& ctx,
                  const DenseTensor& x,
diff --git a/paddle/phi/kernels/trunc_kernel.h b/paddle/phi/kernels/trunc_kernel.h
index 645ad48323421ea22e637ef3b7d6a8a60b1e12f6..d9a7ea633934884baf7174579c5b9b41fc1585a1 100644
--- a/paddle/phi/kernels/trunc_kernel.h
+++ b/paddle/phi/kernels/trunc_kernel.h
@@ -18,6 +18,12 @@
 
 namespace phi {
 
+/**
+ * @brief Returns a new tensor with the truncated integer values  of input.
+ * @param  ctx   device context
+ * @param  x     The input tensor of trunc kernel
+ * @param  out   The output tensor of trunc kernel
+ */
 template <typename T, typename Context>
 void TruncKernel(const Context& dev_ctx,
                  const DenseTensor& x,
diff --git a/python/paddle/utils/code_gen/templates/operator_utils.c.j2 b/python/paddle/utils/code_gen/templates/operator_utils.c.j2
index c9820e369ea96f5c199edd1f6b89411a8f76dce8..63a6fe82a443ed6c6ca422c27e71047ad677faa1 100644
--- a/python/paddle/utils/code_gen/templates/operator_utils.c.j2
+++ b/python/paddle/utils/code_gen/templates/operator_utils.c.j2
@@ -128,7 +128,9 @@ PD_REGISTER_ARG_MAPPING_FN({{api["name"]}}, phi::{{api["name"] | to_pascal_case}
 {% macro get_input_list(inputs, kernel_args) %}{# inline #}
 paddle::small_vector<const char*> inputs {
 {%- for input in inputs %}
+{%- if input["name"] in kernel_args %}
 {{input["name"] | to_opmaker_name_cstr}}{{", " if not loop.last}}
+{%- endif %}
 {%- endfor %}
 }
 {%- endmacro %}