在文档中统一静态图模式与动态图模式的英文翻译 (#49170)

* 1219 * temporarily change the num_diff_files limit, test=document_fix * Revert "temporarily change the num_diff_files limit, test=document_fix" This reverts commit 8e70f00ef468d2dad0e38b3da06295ed62990d20. * for codestyle * remove duplicate license * `static mode` -> `static graph mode` * Update hybrid_parallel_inference.py * Update layer_function_generator.py * Update manipulation.py * reset Co-authored-by: N Ligoml <39876205+Ligoml@users.noreply.github.com> Co-authored-by: N SigureMo <sigure.qaq@gmail.com>

在文档中统一静态图模式与动态图模式的英文翻译 (#49170)
* 1219 * temporarily change the num_diff_files limit, test=document_fix * Revert "temporarily change the num_diff_files limit, test=document_fix" This reverts commit 8e70f00ef468d2dad0e38b3da06295ed62990d20. * for codestyle * remove duplicate license * `static mode` -> `static graph mode` * Update hybrid_parallel_inference.py * Update layer_function_generator.py * Update manipulation.py * reset Co-authored-by: N Ligoml <39876205+Ligoml@users.noreply.github.com> Co-authored-by: N SigureMo <sigure.qaq@gmail.com>
a186e60d · Sanbu · GitHub · 162f8fe2 · a186e60d · a186e60d
106 changed file
--- a/paddle/fluid/eager/autograd_meta.h
+++ b/paddle/fluid/eager/autograd_meta.h
@@ -23,7 +23,7 @@ using AbstractAutogradMeta = paddle::experimental::AbstractAutogradMeta;
 *
 * AutogradMeta is what record the backward info for tensor. When we run
 * computation graph eagerly, we can not build a static paddle program like
- * static mode do, so we need a new method to record forward info to trace
+ * static graph mode do, so we need a new method to record forward info to trace
 * backward when we finish all forward computation. This require our
 * AutogradMeta class record following main members
 *

--- a/paddle/fluid/framework/new_executor/interpreter/interpreter_util.cc
+++ b/paddle/fluid/framework/new_executor/interpreter/interpreter_util.cc
@@ -760,7 +760,7 @@ bool BuildOpFuncList(const platform::Place& place,
                  new phi::Kernel(phi::KernelFactory::Instance().SelectKernel(
                      phi_kernel_name, phi_cpu_kernel_key)));
              if (op_with_kernel->PhiKernel()->IsValid()) {
-                VLOG(6) << "Static mode PrepareImpl - kernel name: "
+                VLOG(6) << "Static graph mode PrepareImpl - kernel name: "
                        << phi_kernel_name
                        << " | kernel key: " << phi_cpu_kernel_key
                        << " | kernel: " << *(op_with_kernel->PhiKernel());

--- a/paddle/fluid/framework/operator.cc
+++ b/paddle/fluid/framework/operator.cc
@@ -1679,12 +1679,12 @@ void OperatorWithKernel::RunImpl(const Scope& scope,
              phi_kernel_name, phi_kernel_key)));

      if (phi_kernel_->IsValid()) {
-        VLOG(6) << "Static mode ChoosePhiKernel - kernel name: "
+        VLOG(6) << "Static graph mode ChoosePhiKernel - kernel name: "
                << phi_kernel_name << " | kernel key: " << phi_kernel_key
                << " | kernel: " << *phi_kernel_;
      } else {
-        VLOG(6) << "Static mode ChoosePhiKernel - kernel `" << phi_kernel_name
-                << "` not found.";
+        VLOG(6) << "Static graph mode ChoosePhiKernel - kernel `"
+                << phi_kernel_name << "` not found.";
      }
    } else {
      phi_kernel_name = kernel_signature_->name;
@@ -1815,7 +1815,7 @@ void OperatorWithKernel::RunImpl(const Scope& scope,

        dev_ctx = pool.Get(platform::CPUPlace());
        if (phi_kernel_->IsValid()) {
-          VLOG(6) << "Static mode PrepareImpl - kernel name: "
+          VLOG(6) << "Static graph mode PrepareImpl - kernel name: "
                  << phi_kernel_name << " | kernel key: " << phi_cpu_kernel_key
                  << " | kernel: " << *phi_kernel_;
          run_phi_kernel_ = true;
@@ -2083,11 +2083,11 @@ phi::KernelKey OperatorWithKernel::ChoosePhiKernel(
      phi_kernel_name, phi_kernel_key)));

  if (phi_kernel_->IsValid()) {
-    VLOG(6) << "Static mode ChoosePhiKernel - kernel name: " << phi_kernel_name
-            << " | kernel key: " << phi_kernel_key
+    VLOG(6) << "Static graph mode ChoosePhiKernel - kernel name: "
+            << phi_kernel_name << " | kernel key: " << phi_kernel_key
            << " | kernel: " << *phi_kernel_;
  } else {
-    VLOG(6) << "Static mode ChoosePhiKernel - kernel `" << phi_kernel_name
+    VLOG(6) << "Static graph mode ChoosePhiKernel - kernel `" << phi_kernel_name
            << "` not found.";
  }
  return phi_kernel_key;

--- a/paddle/fluid/imperative/tracer.h
+++ b/paddle/fluid/imperative/tracer.h
@@ -136,7 +136,7 @@ class Tracer {
  }

  // Note(Aurelius84): The `tmp` is used as prefix key while naming a temporary
-  // intermediate var both in imperative and static mode. But the
+  // intermediate var both in imperative and static graph mode. But the
  // `UniqueNameGenerator` in C++ and `unique_name.py` in Python doesn't share
  // the same auto-increment id. It will create a variable repeatedly with same
  // name like `tmp_0` in some cases when transform dygraph into static layers.

--- a/paddle/fluid/inference/tensorrt/convert/c_allreduce_op.cc
+++ b/paddle/fluid/inference/tensorrt/convert/c_allreduce_op.cc
-/* Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
-
-Licensed under the Apache License, Version 2.0 (the "License");
-you may not use this file except in compliance with the License.
-You may obtain a copy of the License at
-
-http://www.apache.org/licenses/LICENSE-2.0
-
-Unless required by applicable law or agreed to in writing, software
-distributed under the License is distributed on an "AS IS" BASIS,
-WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-See the License for the specific language governing permissions and
-limitations under the License. */
+// Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.

 #include "paddle/fluid/inference/tensorrt/convert/op_converter.h"
 #include "paddle/fluid/inference/tensorrt/plugin/c_allreduce_op_plugin.h"
@@ -32,8 +32,9 @@ class CAllReduceOpConverter : public OpConverter {
                  bool test_mode) override {
    VLOG(4) << "convert fluid callreduce op to tensorrt layer";
    if (!engine_->with_dynamic_shape()) {
-      PADDLE_THROW(platform::errors::Fatal(
-          "Unsupported static mode. Please set dynamic shape of inputs."));
+      PADDLE_THROW(
+          platform::errors::Fatal("Unsupported static graph mode. Please set "
+                                  "dynamic shape of inputs."));
    }
    ReduceType red_type = op_to_reduce_type[op.type()];
    std::string name = op.type();

--- a/paddle/fluid/inference/tensorrt/convert/preln_residual_bias.cc
+++ b/paddle/fluid/inference/tensorrt/convert/preln_residual_bias.cc
@@ -28,8 +28,9 @@ class PrelnResidualBiasOpConverter : public OpConverter {
                  bool test_mode) override {
    VLOG(4) << "convert fused preln_residual_bias op to tensorrt layer";
    if (!engine_->with_dynamic_shape()) {
-      PADDLE_THROW(platform::errors::Fatal(
-          "Unsupported static mode. Please set dynamic shape of inputs."));
+      PADDLE_THROW(
+          platform::errors::Fatal("Unsupported static graph mode. Please set "
+                                  "dynamic shape of inputs."));
    }
    framework::OpDesc op_desc(op, nullptr);
    // Declare inputs

--- a/paddle/fluid/operators/run_program_op.h
+++ b/paddle/fluid/operators/run_program_op.h
@@ -288,8 +288,8 @@ class RunProgramOpKernel : public framework::OpKernel<T> {
    auto *out_scope_vec = ctx.Output<StepScopeVar>("OutScope");
    std::unique_ptr<framework::Scope> inner_scope{nullptr};
    if (out_scope_vec->size() == 0) {
-      // For cuda graph under static mode usage.
-      // For static mode, we cannot set value of a tensor before any run,
+      // For cuda graph under static graph mode usage.
+      // For static graph mode, we cannot set value of a tensor before any run,
      // the OutScope variable passed to the op actually contains nothing.
      // Just create a tmp scope to run the program.
      PADDLE_ENFORCE_EQ(

--- a/paddle/fluid/operators/set_value_op.cc
+++ b/paddle/fluid/operators/set_value_op.cc
@@ -145,7 +145,7 @@ class SetValueMaker : public framework::OpProtoAndCheckerMaker {
    AddAttr<std::vector<int64_t>>("shape", "(vector<int64_t>) Shape of values.")
        .SetDefault({});
    AddComment(R"DOC(SetValue operator.
-Assignment to a phi::DenseTensor in static mode.
+Assignment to a phi::DenseTensor in static graph mode.
 )DOC");
  }
 };

--- a/paddle/fluid/pybind/eager_legacy_op_function_generator.cc
+++ b/paddle/fluid/pybind/eager_legacy_op_function_generator.cc
@@ -443,9 +443,9 @@ GenerateOpFunctions() {
    // In this case, output will reuse input varbase.
    // Dygraph mode needs to be aligned with the in-place strategy in static
    // mode, and the mapping relationships between output and input that have
-    // been defined in static mode should be used in dygraph mode.
-    // Find which ops need to use Inplace strategy in static mode, and get the
-    // mapping relationship between Inplace output and input.
+    // been defined in static graph mode should be used in dygraph mode.
+    // Find which ops need to use Inplace strategy in static graph mode, and get
+    // the mapping relationship between Inplace output and input.
    auto& infer_inplace =
        paddle::framework::OpInfoMap::Instance().Get(op_type).infer_inplace_;
    std::map<std::string, std::string> inplace_map;

--- a/paddle/fluid/pybind/eager_properties.cc
+++ b/paddle/fluid/pybind/eager_properties.cc
@@ -39,7 +39,8 @@ PyObject* tensor_properties_get_name(TensorObject* self, void* closure) {
  EAGER_TRY
  // NOTE(dev): [why not use egr::Controller::Instance::GernerateUniqueName()?]
  // Beacause Controller must holder a tracer, but 'tensor.name' maybe called
-  // everywhere such as static mode in @to_static, which means tracer is None.
+  // everywhere such as static graph mode in @to_static, which means tracer is
+  // None.
  static egr::UniqueNameGenerator name_generator;
  if (self->tensor.name().empty()) {
    self->tensor.set_name(name_generator.Generate());

--- a/paddle/fluid/pybind/op_function_generator.cc
+++ b/paddle/fluid/pybind/op_function_generator.cc
@@ -473,9 +473,9 @@ GenerateOpFunctions(int split_count) {
    // In this case, output will reuse input varbase.
    // Dygraph mode needs to be aligned with the in-place strategy in static
    // mode, and the mapping relationships between output and input that have
-    // been defined in static mode should be used in dygraph mode.
-    // Find which ops need to use Inplace strategy in static mode, and get the
-    // mapping relationship between Inplace output and input.
+    // been defined in static graph mode should be used in dygraph mode.
+    // Find which ops need to use Inplace strategy in static graph mode, and get
+    // the mapping relationship between Inplace output and input.
    auto& infer_inplace =
        paddle::framework::OpInfoMap::Instance().Get(op_type).infer_inplace_;
    std::map<std::string, std::string> inplace_map;

--- a/paddle/phi/kernels/fusion/README.md
+++ b/paddle/phi/kernels/fusion/README.md
@@ -2,7 +2,7 @@

 1. We don't recommend to implement Python API for fusion kernel

-  - We don't recommend to implement Python API for fusion kernel, because it contains many inputs or outputs arguments generally, it is difficult to use and understand as an Python API, we recommend to call fusion kernel by pass optimization in dy2static mode or static mode.
+  - We don't recommend to implement Python API for fusion kernel, because it contains many inputs or outputs arguments generally, it is difficult to use and understand as an Python API, we recommend to call fusion kernel by pass optimization in dy2static mode or static graph mode.
  - We also don't recommend to reuse fusion kernel in other kernel implementation, but recommended that the fusion kernel be implemented by reusing other kernels.

 2. We don't require fusion kernel to have implementations for all devices

--- a/python/paddle/device/cuda/graphs.py
+++ b/python/paddle/device/cuda/graphs.py
@@ -82,7 +82,7 @@ class CUDAGraph:
 def wrap_cuda_graph(function, mode="thread_local", memory_pool="default"):
    assert mode in ALL_MODES
    if not paddle.in_dynamic_mode():
-        # static mode
+        # static graph mode
        from paddle.fluid.framework import _cuda_graph_guard

        global cuda_graph_id
@@ -94,7 +94,7 @@ def wrap_cuda_graph(function, mode="thread_local", memory_pool="default"):
            memory_pool_id = CoreCUDAGraph.gen_new_memory_pool_id()
        else:
            raise ValueError(
-                "memory_pool should be one of default or new under static mode, but got",
+                "memory_pool should be one of default or new under static graph mode, but got",
                memory_pool,
            )
        return _cuda_graph_guard(

--- a/python/paddle/distributed/auto_parallel/engine.py
+++ b/python/paddle/distributed/auto_parallel/engine.py
@@ -539,7 +539,7 @@ class Engine:

            paddle.enable_static()
        else:
-            # build program in static mode
+            # build program in static graph mode
            serial_main_prog = self._serial_main_progs.get(mode, None)
            if serial_main_prog is not None:
                return

--- a/python/paddle/distributed/collective.py
+++ b/python/paddle/distributed/collective.py
@@ -162,7 +162,7 @@ def _new_process_group_impl(

 # _custom_gid provides a way for users to
 # set the group id, which is usually useful
-# to be compatible with the static mode.
+# to be compatible with the static graph mode.
 _custom_gid = None



--- a/python/paddle/distributed/communication/stream/all_gather.py
+++ b/python/paddle/distributed/communication/stream/all_gather.py
@@ -178,10 +178,12 @@ def all_gather(
                tensor_or_tensor_list, tensor, group, sync_op, use_calc_stream
            )
    else:
-        assert group is None, "Group can not be used in static mode for now."
+        assert (
+            group is None
+        ), "Group can not be used in static graph mode for now."
        if paddle.is_tensor(tensor_or_tensor_list):
            raise RuntimeError(
-                "Only support passing a tensor list to `all_gather` in static mode now."
+                "Only support passing a tensor list to `all_gather` in static graph mode now."
            )
        else:
            return _all_gather_in_static_mode(

--- a/python/paddle/distributed/communication/stream/all_reduce.py
+++ b/python/paddle/distributed/communication/stream/all_reduce.py
@@ -58,7 +58,7 @@ def _all_reduce_in_static_mode(tensor, op, group, sync_op, use_calc_stream):
    if not isinstance(ring_id, int):
        raise ValueError("The type of 'ring_id' for all_reduce should be int.")

-    # TODO: Support task and use task.wait in static mode
+    # TODO: Support task and use task.wait in static graph mode
    #       Use use_calc_stream rather than sync_op
    helper = layer_helper.LayerHelper(op_type, **locals())
    helper.append_op(
@@ -123,7 +123,9 @@ def all_reduce(
            tensor, op, group, sync_op, use_calc_stream
        )
    else:
-        assert group is None, "Group can not be used in static mode for now."
+        assert (
+            group is None
+        ), "Group can not be used in static graph mode for now."
        return _all_reduce_in_static_mode(
            tensor, op, group, sync_op, use_calc_stream
        )
--- a/python/paddle/distributed/communication/stream/all_to_all.py
+++ b/python/paddle/distributed/communication/stream/all_to_all.py
@@ -200,7 +200,9 @@ def alltoall(
                "The output and input should be both tensor or tensor list."
            )
    else:
-        assert group is None, "Group can not be used in static mode for now."
+        assert (
+            group is None
+        ), "Group can not be used in static graph mode for now."
        return _all_to_all_in_static_mode(
            out_tensor_or_tensor_list,
            in_tensor_or_tensor_list,

--- a/python/paddle/distributed/communication/stream/broadcast.py
+++ b/python/paddle/distributed/communication/stream/broadcast.py
@@ -126,7 +126,9 @@ def broadcast(tensor, src, group=None, sync_op=True, use_calc_stream=False):
            tensor, src_rank_in_group, group, sync_op, use_calc_stream
        )
    else:
-        assert group is None, "Group can not be used in static mode for now."
+        assert (
+            group is None
+        ), "Group can not be used in static graph mode for now."
        return _broadcast_in_static_mode(
            tensor, src, group, sync_op, use_calc_stream
        )
--- a/python/paddle/distributed/communication/stream/recv.py
+++ b/python/paddle/distributed/communication/stream/recv.py
@@ -114,7 +114,9 @@ def recv(tensor, src=0, group=None, sync_op=True, use_calc_stream=False):
            tensor, src_rank_in_group, group, sync_op, use_calc_stream
        )
    else:
-        assert group is None, "Group can not be used in static mode for now."
+        assert (
+            group is None
+        ), "Group can not be used in static graph mode for now."
        return _recv_in_static_mode(
            tensor, src, group, sync_op, use_calc_stream
        )
--- a/python/paddle/distributed/communication/stream/reduce.py
+++ b/python/paddle/distributed/communication/stream/reduce.py
@@ -139,7 +139,9 @@ def reduce(
            tensor, dst_rank_in_group, op, group, sync_op, use_calc_stream
        )
    else:
-        assert group is None, "Group can not be used in static mode for now."
+        assert (
+            group is None
+        ), "Group can not be used in static graph mode for now."
        return _reduce_in_static_mode(
            tensor, dst, op, group, sync_op, use_calc_stream
        )
--- a/python/paddle/distributed/communication/stream/scatter.py
+++ b/python/paddle/distributed/communication/stream/scatter.py
@@ -220,7 +220,9 @@ def scatter(
                use_calc_stream,
            )
    else:
-        assert group is None, "Group can not be used in static mode for now."
+        assert (
+            group is None
+        ), "Group can not be used in static graph mode for now."

        return _scatter_in_static_mode(
            tensor,

--- a/python/paddle/distributed/communication/stream/send.py
+++ b/python/paddle/distributed/communication/stream/send.py
@@ -113,7 +113,9 @@ def send(tensor, dst=0, group=None, sync_op=True, use_calc_stream=False):
            tensor, dst_rank_in_group, group, sync_op, use_calc_stream
        )
    else:
-        assert group is None, "Group can not be used in static mode for now."
+        assert (
+            group is None
+        ), "Group can not be used in static graph mode for now."
        return _send_in_static_mode(
            tensor, dst, group, sync_op, use_calc_stream
        )
--- a/python/paddle/distributed/fleet/utils/hybrid_parallel_inference.py
+++ b/python/paddle/distributed/fleet/utils/hybrid_parallel_inference.py
@@ -206,7 +206,8 @@ class HybridParallelInferenceHelper:
        elif core.is_compiled_with_cuda():
            self._device = "gpu"
        assert self._device, "Only gpu and npu are supported."
-        assert not in_dygraph_mode(), "Only static mode is supported."
+
+        assert not in_dygraph_mode(), "Only static graph mode is supported."

        op_maker = core.op_proto_and_checker_maker
        self._op_role = op_maker.OpRole

--- a/python/paddle/distributed/models/moe/utils.py
+++ b/python/paddle/distributed/models/moe/utils.py
@@ -125,7 +125,7 @@ def _random_routing(topk_idx, topk_value, prob, topk=2):
        if in_dygraph_mode():
            return _legacy_C_ops.random_routing(prob, topk_value, topk_idx)
        else:
-            raise RuntimeError("Not supporting static mode now")
+            raise RuntimeError("Not supporting static graph mode now")
    else:
        raise RuntimeError("only topk=2 is supported now")


--- a/python/paddle/distributed/passes/auto_parallel_data_parallel_optimization.py
+++ b/python/paddle/distributed/passes/auto_parallel_data_parallel_optimization.py
@@ -279,7 +279,7 @@ class DataParallelOptimizationPass(PassBase):
        # NOTE current different nccl comm will use different cuda stream
        # so if there too many dp group there will be too many stream need to be
        # created and sync.
-        # revise here when framework support custom stream in static mode.
+        # revise here when framework support custom stream in static graph mode.
        num_dp_comm_stream = len(set(self._group_to_grad_name_map.keys()))
        if num_dp_comm_stream > __max_stream_num_allow__:
            return False

--- a/python/paddle/fluid/compiler.py
+++ b/python/paddle/fluid/compiler.py
@@ -751,7 +751,7 @@ class IpuDynamicPatcher:
    def patch_lr_scheduler(ipu_strategy):
        from paddle.optimizer.lr import LRScheduler

-        # For IPU dynamic graph usage, lr_var is not synced in executor as static mode do.
+        # For IPU dynamic graph usage, lr_var is not synced in executor as static graph mode do.
        # Manually set lr to ipu_strategy to update the lr.
        old_step = LRScheduler.step


--- a/python/paddle/fluid/contrib/optimizer.py
+++ b/python/paddle/fluid/contrib/optimizer.py
@@ -53,7 +53,7 @@ class Momentum(Optimizer):
        momentum (float): Momentum factor
        parameter_list (Iterable, optional):  Iterable of ``Variable`` names to update to minimize ``loss``. \
            This parameter is required in dygraph mode. \
-            The default value is None in static mode, at this time all parameters will be updated.
+            The default value is None in static graph mode, at this time all parameters will be updated.
        use_nesterov (bool, optional): Enables Nesterov momentum, default is false.
        regularization (WeightDecayRegularizer, optional): The strategy of regularization. There are two method: \
             :ref:`api_fluid_regularizer_L1Decay` , :ref:`api_fluid_regularizer_L2Decay` . If a parameter has set \

--- a/python/paddle/fluid/dataloader/dataloader_iter.py
+++ b/python/paddle/fluid/dataloader/dataloader_iter.py
@@ -303,7 +303,7 @@ class _DataLoaderIterSingleProcess(_DataLoaderIterBase):
                )
                data = _restore_batch(data, self._structure_infos.pop(0))
            else:
-                # in static mode
+                # in static graph mode
                if self._return_list:
                    data = self._reader.read_next_list()
                    for i in range(len(data)):

--- a/python/paddle/fluid/dygraph/base.py
+++ b/python/paddle/fluid/dygraph/base.py
@@ -210,7 +210,7 @@ def enable_dygraph(place=None):
            print(paddle.in_dynamic_mode())  # True, dynamic mode is turn ON by default since paddle 2.0.0

            paddle.enable_static()
-            print(paddle.in_dynamic_mode())  # False, Now we are in static mode
+            print(paddle.in_dynamic_mode())  # False, Now we are in static graph mode

            paddle.disable_static()
            print(paddle.in_dynamic_mode())  # True, Now we are in dynamic mode
@@ -245,7 +245,7 @@ def disable_dygraph():
            print(paddle.in_dynamic_mode())  # True, dynamic mode is turn ON by default since paddle 2.0.0

            paddle.enable_static()
-            print(paddle.in_dynamic_mode())  # False, Now we are in static mode
+            print(paddle.in_dynamic_mode())  # False, Now we are in static graph mode

            paddle.disable_static()
            print(paddle.in_dynamic_mode())  # True, Now we are in dynamic mode

--- a/python/paddle/fluid/dygraph/parallel.py
+++ b/python/paddle/fluid/dygraph/parallel.py
@@ -570,7 +570,7 @@ class DataParallel(layers.Layer):

        assert (
            in_dygraph_mode()
-        ), "It's not supported to construct DataParallel in static mode."
+        ), "It's not supported to construct DataParallel in static graph mode."

        self._layers = layers
        self.find_unused_parameters = find_unused_parameters

--- a/python/paddle/fluid/framework.py
+++ b/python/paddle/fluid/framework.py
@@ -227,7 +227,7 @@ def in_dygraph_mode():
            print(paddle.in_dynamic_mode())  # True, dynamic mode is turn ON by default since paddle 2.0.0

            paddle.enable_static()
-            print(paddle.in_dynamic_mode())  # False, Now we are in static mode
+            print(paddle.in_dynamic_mode())  # False, Now we are in static graph mode

            paddle.disable_static()
            print(paddle.in_dynamic_mode())  # True, Now we are in dynamic mode
@@ -2833,7 +2833,7 @@ class Operator:
                op_attrs = dict()
            del attrs

-            # attr for static mode cuda graph
+            # attr for static graph mode cuda graph
            self._cuda_graph_attr = _current_cuda_graph_mode

            op_maker = core.op_proto_and_checker_maker
@@ -2979,7 +2979,7 @@ class Operator:
                            out_arg_names.append(arg)
                        else:
                            out_arg_names.append(arg.name)
-                        # TODO(minqiyang): could we remove variable's op in static mode?
+                        # TODO(minqiyang): could we remove variable's op in static graph mode?
                        if not _non_static_mode():
                            if isinstance(arg, str):
                                block.var(arg).op = self
@@ -3990,7 +3990,7 @@ class Block:

            # record ops in tracer rather than blocks
            #
-            # TODO(minqiyang): add op stop_gradient support in static mode too.
+            # TODO(minqiyang): add op stop_gradient support in static graph mode too.
            # currently, we only support stop_gradient in dygraph mode.

            _dygraph_tracer().trace_op(
@@ -7473,7 +7473,7 @@ def device_guard(device=None):
    """

    Note:
-        The API only supports static mode.
+        The API only supports static graph mode.

    A context manager that specifies the device on which the OP will be placed.

@@ -7547,9 +7547,9 @@ def _cuda_graph_guard(cuda_graph_attr=None):
    """

    Note:
-        The API only supports static mode.
+        The API only supports static graph mode.

-    A context manager that specifies the cuda_graph_mode which indicating the cuda graph capture under static mode.
+    A context manager that specifies the cuda_graph_mode which indicating the cuda graph capture under static graph mode.

    Args:
        cuda_graph_attr(str|None): The cuda graph attr with the format of:
@@ -7557,7 +7557,7 @@ def _cuda_graph_guard(cuda_graph_attr=None):
    """
    assert (
        not _non_static_mode()
-    ), "cuda_graph_guard only works under static mode"
+    ), "cuda_graph_guard only works under static graph mode"
    assert (
        core.is_compiled_with_cuda()
    ), "cuda_graph_guard context can be only used when Paddle is compiled with cuda"

--- a/python/paddle/fluid/layers/math_op_patch.py
+++ b/python/paddle/fluid/layers/math_op_patch.py
@@ -155,12 +155,12 @@ def monkey_patch_variable():
    @static_only
    def place(self):
        """
-        Variable don't have 'place' interface in static mode
+        Variable don't have 'place' interface in static graph mode
        But this interface can greatly facilitate dy2static.
        So we give a warnning here and return None.
        """
        warnings.warn(
-            "Variable do not have 'place' interface for static mode, try not to use it. None will be returned."
+            "Variable do not have 'place' interface for static graph mode, try not to use it. None will be returned."
        )
        return None


--- a/python/paddle/fluid/layers/utils.py
+++ b/python/paddle/fluid/layers/utils.py
@@ -484,7 +484,7 @@ def try_set_static_shape_tensor(tensor, shape):

    """
    if not _non_static_mode():
-        # static mode, and shape is not all inferred (contains -1)
+        # static graph mode, and shape is not all inferred (contains -1)
        if -1 in tensor.shape:
            if isinstance(shape, Variable):
                shape = try_get_constant_shape_from_tensor(shape)

--- a/python/paddle/fluid/lazy_init.py
+++ b/python/paddle/fluid/lazy_init.py
@@ -19,7 +19,7 @@ __all__ = ["LazyGuard"]

 class LazyInitHelper:
    """
-    A Helper Context to trigger switching mode between dygraph and static mode,
+    A Helper Context to trigger switching mode between dygraph and static graph mode,
    and holds the startup program resource.
    """

@@ -54,7 +54,7 @@ class LazyInitHelper:
    def __enter__(self):
        """
        Switch into lazy mode and set _dygraph_tracer_ with None to convert
-        dygraph mode into static mode.
+        dygraph mode into static graph mode.
        """
        self.enable()
        if self._in_guard:

--- a/python/paddle/fluid/optimizer.py
+++ b/python/paddle/fluid/optimizer.py
@@ -1414,7 +1414,7 @@ class SGDOptimizer(Optimizer):
            Can be a float value or a Variable with one float value as data element.
        parameter_list (Iterable, optional):  Iterable of ``Variable`` names to update to minimize ``loss``. \
            This parameter is required in dygraph mode. \
-            The default value is None in static mode, at this time all parameters will be updated.
+            The default value is None in static graph mode, at this time all parameters will be updated.
        regularization (WeightDecayRegularizer, optional): The strategy of regularization. There are two method: \
             :ref:`api_fluid_regularizer_L1Decay` , :ref:`api_fluid_regularizer_L2Decay` . If a parameter has set \
            regularizer using :ref:`api_fluid_ParamAttr` already, the regularization setting here in optimizer will be \
@@ -1605,7 +1605,7 @@ class MomentumOptimizer(Optimizer):
        momentum (float): Momentum factor
        parameter_list (Iterable, optional):  Iterable of ``Variable`` names to update to minimize ``loss``. \
            This parameter is required in dygraph mode. \
-            The default value is None in static mode, at this time all parameters will be updated.
+            The default value is None in static graph mode, at this time all parameters will be updated.
        use_nesterov (bool, optional): Enables Nesterov momentum, default is false.
        regularization (WeightDecayRegularizer, optional): The strategy of regularization. There are two method: \
             :ref:`api_fluid_regularizer_L1Decay` , :ref:`api_fluid_regularizer_L2Decay` . If a parameter has set \
@@ -1752,7 +1752,7 @@ class LarsMomentumOptimizer(Optimizer):
        lars_weight_decay (float): Weight decay coefficient for decaying using LARS.
        parameter_list (Iterable, optional):  Iterable of ``Variable`` names to update to minimize ``loss``. \
            This parameter is required in dygraph mode. \
-            The default value is None in static mode, at this time all parameters will be updated.
+            The default value is None in static graph mode, at this time all parameters will be updated.
        regularization (WeightDecayRegularizer, optional): The strategy of regularization. There are two method: \
             :ref:`api_fluid_regularizer_L1Decay` , :ref:`api_fluid_regularizer_L2Decay` . If a parameter has set \
            regularizer using :ref:`api_fluid_ParamAttr` already, the regularization setting here in optimizer will be \
@@ -2014,7 +2014,7 @@ class AdagradOptimizer(Optimizer):
            The default value is 1e-06.
        parameter_list (Iterable, optional):  Iterable of ``Variable`` names to update to minimize ``loss``. \
            This parameter is required in dygraph mode. \
-            The default value is None in static mode, at this time all parameters will be updated.
+            The default value is None in static graph mode, at this time all parameters will be updated.
        regularization (WeightDecayRegularizer, optional): The strategy of regularization. There are two method: \
             :ref:`api_fluid_regularizer_L1Decay` , :ref:`api_fluid_regularizer_L2Decay` . If a parameter has set \
            regularizer using :ref:`api_fluid_ParamAttr` already, the regularization setting here in optimizer will be \
@@ -2160,7 +2160,7 @@ class AdamOptimizer(Optimizer):
            The default value is 1e-08.
        parameter_list (Iterable, optional):  Iterable of ``Variable`` names to update to minimize ``loss``. \
            This parameter is required in dygraph mode. \
-            The default value is None in static mode, at this time all parameters will be updated.
+            The default value is None in static graph mode, at this time all parameters will be updated.
        regularization (WeightDecayRegularizer, optional): The strategy of regularization. There are two method: \
             :ref:`api_fluid_regularizer_L1Decay` , :ref:`api_fluid_regularizer_L2Decay` . If a parameter has set \
            regularizer using :ref:`api_fluid_ParamAttr` already, the regularization setting here in optimizer will be \
@@ -2587,7 +2587,7 @@ class AdamaxOptimizer(Optimizer):
            The default value is 1e-08.
        parameter_list (Iterable, optional):  Iterable of ``Variable`` names to update to minimize ``loss``. \
            This parameter is required in dygraph mode. \
-            The default value is None in static mode, at this time all parameters will be updated.
+            The default value is None in static graph mode, at this time all parameters will be updated.
        regularization (WeightDecayRegularizer, optional): The strategy of regularization. There are two method: \
             :ref:`api_fluid_regularizer_L1Decay` , :ref:`api_fluid_regularizer_L2Decay` . If a parameter has set \
            regularizer using :ref:`api_fluid_ParamAttr` already, the regularization setting here in optimizer will be \
@@ -2793,7 +2793,7 @@ class DpsgdOptimizer(Optimizer):
        sigma (float): for gaussian noise.
        parameter_list (Iterable, optional):  Iterable of ``Variable`` names to update to minimize ``loss``. \
            This parameter is required in dygraph mode. \
-            The default value is None in static mode, at this time all parameters will be updated.
+            The default value is None in static graph mode, at this time all parameters will be updated.
    Notes:
       Currently, DpsgdOptimizer doesn't support sparse parameter optimization.
    """
@@ -2896,7 +2896,7 @@ class DecayedAdagradOptimizer(Optimizer):
            The default value is 1e-06.
        parameter_list (Iterable, optional):  Iterable of ``Variable`` names to update to minimize ``loss``. \
            This parameter is required in dygraph mode. \
-            The default value is None in static mode, at this time all parameters will be updated.
+            The default value is None in static graph mode, at this time all parameters will be updated.
        regularization (WeightDecayRegularizer, optional): The strategy of regularization. There are two method: \
             :ref:`api_fluid_regularizer_L1Decay` , :ref:`api_fluid_regularizer_L2Decay` . If a parameter has set \
            regularizer using :ref:`api_fluid_ParamAttr` already, the regularization setting here in optimizer will be \
@@ -3021,7 +3021,7 @@ class AdadeltaOptimizer(Optimizer):
        rho (float): a floating point value indicating the decay rate. Default 0.95.
        parameter_list (Iterable, optional):  Iterable of ``Variable`` names to update to minimize ``loss``. \
            This parameter is required in dygraph mode. \
-            The default value is None in static mode, at this time all parameters will be updated.
+            The default value is None in static graph mode, at this time all parameters will be updated.
        regularization (WeightDecayRegularizer, optional): The strategy of regularization. There are two method: \
             :ref:`api_fluid_regularizer_L1Decay` , :ref:`api_fluid_regularizer_L2Decay` . If a parameter has set \
            regularizer using :ref:`api_fluid_ParamAttr` already, the regularization setting here in optimizer will be \
@@ -3193,7 +3193,7 @@ class RMSPropOptimizer(Optimizer):
            computation and memory. Defaults to False.
        parameter_list (Iterable, optional):  Iterable of ``Variable`` names to update to minimize ``loss``. \
            This parameter is required in dygraph mode. \
-            The default value is None in static mode, at this time all parameters will be updated.
+            The default value is None in static graph mode, at this time all parameters will be updated.
        regularization (WeightDecayRegularizer, optional): The strategy of regularization. There are two method: \
             :ref:`api_fluid_regularizer_L1Decay` , :ref:`api_fluid_regularizer_L2Decay` . If a parameter has set \
            regularizer using :ref:`api_fluid_ParamAttr` already, the regularization setting here in optimizer will be \
@@ -3390,7 +3390,7 @@ class FtrlOptimizer(Optimizer):
        lr_power (float): Learning Rate Power, default is -0.5.
        parameter_list (Iterable, optional):  Iterable of ``Variable`` names to update to minimize ``loss``. \
            This parameter is required in dygraph mode. \
-            The default value is None in static mode, at this time all parameters will be updated.
+            The default value is None in static graph mode, at this time all parameters will be updated.
        regularization (WeightDecayRegularizer, optional): The strategy of regularization. There are two method: \
             :ref:`api_fluid_regularizer_L1Decay` , :ref:`api_fluid_regularizer_L2Decay` . If a parameter has set \
            regularizer using :ref:`api_fluid_ParamAttr` already, the regularization setting here in optimizer will be \
@@ -3571,7 +3571,7 @@ class LambOptimizer(AdamOptimizer):
        epsilon (float, optional): A small float value for numerical stability. Default 1e-6.
        parameter_list (Iterable, optional):  Iterable of ``Variable`` names to update to minimize ``loss``. \
            This parameter is required in dygraph mode. \
-            The default value is None in static mode, at this time all parameters will be updated.
+            The default value is None in static graph mode, at this time all parameters will be updated.
        regularization (WeightDecayRegularizer, optional): The strategy of regularization. There are two method: \
             :ref:`api_fluid_regularizer_L1Decay` , :ref:`api_fluid_regularizer_L2Decay` . If a parameter has set \
            regularizer using :ref:`api_fluid_ParamAttr` already, the regularization setting here in optimizer will be \

--- a/python/paddle/fluid/reader.py
+++ b/python/paddle/fluid/reader.py
@@ -1347,7 +1347,7 @@ class GeneratorLoader(DataLoaderBase):
        self._iterable = iterable
        self._return_list = return_list
        if not self._feed_list:
-            raise Exception("Feed list must be given under static mode.")
+            raise Exception("Feed list must be given under static graph mode.")
        self._use_double_buffer = use_double_buffer
        self._capacity = capacity
        if not self._iterable:

--- a/python/paddle/fluid/tests/custom_op/test_custom_relu_op_setup.py
+++ b/python/paddle/fluid/tests/custom_op/test_custom_relu_op_setup.py
@@ -58,7 +58,7 @@ def custom_relu_static(

            exe = static.Executor()
            exe.run(static.default_startup_program())
-            # in static mode, x data has been covered by out
+            # in static graph mode, x data has been covered by out
            out_v = exe.run(
                static.default_main_program(),
                feed={'X': np_x},
@@ -84,7 +84,7 @@ def custom_relu_static_pe(func, device, dtype, np_x, use_func=True):
            exe = static.Executor()
            exe.run(static.default_startup_program())

-            # in static mode, x data has been covered by out
+            # in static graph mode, x data has been covered by out
            compiled_prog = static.CompiledProgram(
                static.default_main_program()
            ).with_data_parallel(loss_name=out.name, places=places)

--- a/python/paddle/fluid/tests/custom_op/test_custom_relu_op_xpu_setup.py
+++ b/python/paddle/fluid/tests/custom_op/test_custom_relu_op_xpu_setup.py
@@ -57,7 +57,7 @@ def custom_relu_static(

            exe = static.Executor()
            exe.run(static.default_startup_program())
-            # in static mode, x data has been covered by out
+            # in static graph mode, x data has been covered by out
            out_v = exe.run(
                static.default_main_program(),
                feed={'X': np_x},
@@ -83,7 +83,7 @@ def custom_relu_static_pe(func, device, dtype, np_x, use_func=True):
            exe = static.Executor()
            exe.run(static.default_startup_program())

-            # in static mode, x data has been covered by out
+            # in static graph mode, x data has been covered by out
            compiled_prog = static.CompiledProgram(
                static.default_main_program()
            ).with_data_parallel(loss_name=out.name, places=places)

--- a/python/paddle/fluid/tests/unittests/collective/collective_allgather_api.py
+++ b/python/paddle/fluid/tests/unittests/collective/collective_allgather_api.py
@@ -60,7 +60,7 @@ class TestCollectiveAllgatherAPI(test_base.TestCollectiveAPIRunnerBase):
        )
        assert (
            args['static_mode'] == 1
-        ), "collective_allgather_api only support static mode"
+        ), "collective_allgather_api only support static graph mode"
        result = self.get_model(
            train_prog, startup_prog, rank, dtype=args["dtype"]
        )

--- a/python/paddle/fluid/tests/unittests/collective/fleet/dygraph_save_for_auto_infer.py
+++ b/python/paddle/fluid/tests/unittests/collective/fleet/dygraph_save_for_auto_infer.py
@@ -254,7 +254,7 @@ def train_mlp_static(args, model, loss, opt_state=None, save_model=False):
    model.fit(dataset, epochs=1)
    model.save(os.path.join(args.output_dir, "static_save"))
    paddle.device.cuda.synchronize()
-    print("=============== predict in static mode =================")
+    print("=============== predict in static graph mode =================")
    out = model.predict(dataset, verbose=1000)

    if save_model:

--- a/python/paddle/fluid/tests/unittests/distribution/test_distribution_laplace_static.py
+++ b/python/paddle/fluid/tests/unittests/distribution/test_distribution_laplace_static.py
@@ -273,7 +273,7 @@ class TestLaplaceAndLaplaceKL(unittest.TestCase):


 """
-# Note: Zero dimension of a Tensor is not supported by static mode of paddle;
+# Note: Zero dimension of a Tensor is not supported by static graph mode of paddle;
 # therefore, ks test below cannot be conducted temporarily.

 @parameterize.place(config.DEVICES)

--- a/python/paddle/fluid/tests/unittests/dygraph_to_static/test_convert_call.py
+++ b/python/paddle/fluid/tests/unittests/dygraph_to_static/test_convert_call.py
@@ -68,7 +68,7 @@ class A:
    def add(a, b):
        """
        dygraph mode, return a numpy object.
-        static mode, return a variable object.
+        static graph mode, return a variable object.
        """
        return paddle.to_tensor(a.numpy() + b.numpy())


--- a/python/paddle/fluid/tests/unittests/dygraph_to_static/test_mnist.py
+++ b/python/paddle/fluid/tests/unittests/dygraph_to_static/test_mnist.py
@@ -259,7 +259,7 @@ class TestMNISTWithToStatic(TestMNIST):
                input_spec=input_spec,
                output_spec=[gt_out],
            )
-            # load in static mode
+            # load in static graph mode
            static_infer_out = self.jit_load_and_run_inference_static(
                model_save_dir, model_filename, params_filename, inputs
            )

--- a/python/paddle/fluid/tests/unittests/dygraph_to_static/test_resnet.py
+++ b/python/paddle/fluid/tests/unittests/dygraph_to_static/test_resnet.py
@@ -237,7 +237,7 @@ class ResNetHelper:

    def train(self, to_static, build_strategy=None):
        """
-        Tests model decorated by `dygraph_to_static_output` in static mode. For users, the model is defined in dygraph mode and trained in static mode.
+        Tests model decorated by `dygraph_to_static_output` in static graph mode. For users, the model is defined in dygraph mode and trained in static graph mode.
        """
        with fluid.dygraph.guard(place):
            np.random.seed(SEED)

--- a/python/paddle/fluid/tests/unittests/dygraph_to_static/test_resnet_amp.py
+++ b/python/paddle/fluid/tests/unittests/dygraph_to_static/test_resnet_amp.py
@@ -37,7 +37,7 @@ if fluid.is_compiled_with_cuda():

 def train(to_static, build_strategy=None):
    """
-    Tests model decorated by `dygraph_to_static_output` in static mode. For users, the model is defined in dygraph mode and trained in static mode.
+    Tests model decorated by `dygraph_to_static_output` in static graph mode. For users, the model is defined in dygraph mode and trained in static graph mode.
    """
    with fluid.dygraph.guard(place):
        np.random.seed(SEED)

--- a/python/paddle/fluid/tests/unittests/dygraph_to_static/test_resnet_pure_fp16.py
+++ b/python/paddle/fluid/tests/unittests/dygraph_to_static/test_resnet_pure_fp16.py
@@ -34,7 +34,7 @@ if fluid.is_compiled_with_cuda():

 def train(to_static, build_strategy=None):
    """
-    Tests model decorated by `dygraph_to_static_output` in static mode. For users, the model is defined in dygraph mode and trained in static mode.
+    Tests model decorated by `dygraph_to_static_output` in static graph mode. For users, the model is defined in dygraph mode and trained in static graph mode.
    """
    np.random.seed(SEED)
    paddle.seed(SEED)

--- a/python/paddle/fluid/tests/unittests/dygraph_to_static/test_resnet_v2.py
+++ b/python/paddle/fluid/tests/unittests/dygraph_to_static/test_resnet_v2.py
@@ -243,7 +243,7 @@ class TestResnet(unittest.TestCase):

    def do_train(self, to_static):
        """
-        Tests model decorated by `dygraph_to_static_output` in static mode. For users, the model is defined in dygraph mode and trained in static mode.
+        Tests model decorated by `dygraph_to_static_output` in static graph mode. For users, the model is defined in dygraph mode and trained in static graph mode.
        """
        paddle.disable_static(place)
        np.random.seed(SEED)

--- a/python/paddle/fluid/tests/unittests/npu/test_run_program_op_npu.py
+++ b/python/paddle/fluid/tests/unittests/npu/test_run_program_op_npu.py
@@ -58,14 +58,14 @@ class RunProgramNPUOpTest(unittest.TestCase):
    def check_output(self):
        places = [fluid.NPUPlace(0)]
        for place in places:
-            # TODO: RunProgramOp is not recommended for use in static mode now
+            # TODO: RunProgramOp is not recommended for use in static graph mode now
            self.expect_outs = self.run_static_model(place, is_test=True)
            self.check_output_with_place(place)

    def check_grad(self):
        places = [fluid.NPUPlace(0)]
        for place in places:
-            # TODO: RunProgramOp is not recommended for use in static mode now
+            # TODO: RunProgramOp is not recommended for use in static graph mode now
            self.expect_grads = self.run_static_model(place, is_test=False)
            self.check_grad_with_place(place)


--- a/python/paddle/fluid/tests/unittests/test_adam_op.py
+++ b/python/paddle/fluid/tests/unittests/test_adam_op.py
@@ -1212,7 +1212,7 @@ class TestMultiTensorAdam(unittest.TestCase):
            np.testing.assert_allclose(
                params_dygraph1[idx], params_dygraph2[idx], rtol=1e-05
            )
-        # test static mode
+        # test static graph mode
        output_static1 = self._adam_optimize_static(
            place=place, use_amp=use_amp, use_multi_tensor=True
        )

--- a/python/paddle/fluid/tests/unittests/test_cholesky_solve_op.py
+++ b/python/paddle/fluid/tests/unittests/test_cholesky_solve_op.py
@@ -192,7 +192,7 @@ class TestCholeskySolveAPI(unittest.TestCase):
            )
            np.testing.assert_allclose(fetches[0], z_np, rtol=1e-05)

-    # test in static mode
+    # test in static graph mode
    def test_static(self):
        for place in self.place:
            self.check_static_result(place=place)

--- a/python/paddle/fluid/tests/unittests/test_digamma_op.py
+++ b/python/paddle/fluid/tests/unittests/test_digamma_op.py
@@ -101,7 +101,7 @@ class TestDigammaAPI(unittest.TestCase):
            self.assertTrue("digamma_res" in out.name)

    def test_dtype_error(self):
-        # in static mode
+        # in static graph mode
        with self.assertRaises(TypeError):
            with static.program_guard(static.Program()):
                x = static.data(name="x", shape=self._shape, dtype="int32")

--- a/python/paddle/fluid/tests/unittests/test_dygraph_mode_of_unittest.py
+++ b/python/paddle/fluid/tests/unittests/test_dygraph_mode_of_unittest.py
@@ -21,7 +21,7 @@ class TestDygraphModeOfUnittest(unittest.TestCase):
    def test_dygraph_mode(self):
        self.assertTrue(
            paddle.in_dynamic_mode(),
-            'Default Mode of Unittest should be dygraph mode, but get static mode.',
+            'Default Mode of Unittest should be dygraph mode, but get static graph mode.',
        )



--- a/python/paddle/fluid/tests/unittests/test_eager_deletion_padding_rnn.py
+++ b/python/paddle/fluid/tests/unittests/test_eager_deletion_padding_rnn.py
@@ -644,7 +644,7 @@ class PaddingRNNTestBase(unittest.TestCase):
        self, parallel=True, use_program_cache=True
    ):
        '''
-        Test that train ppl of padding mode is same to that of static mode
+        Test that train ppl of padding mode is same to that of static graph mode
        '''
        config = RNNConfig('test', 'padding')
        with fluid.scope_guard(fluid.Scope()):
@@ -658,7 +658,7 @@ class PaddingRNNTestBase(unittest.TestCase):
 class EagerDeletionPaddingRNNTest(PaddingRNNTestBase):
    def test_padding_mode_no_eager_deletion(self):
        '''
-        Test that train ppl of padding mode is same to that of static mode without eager deletion
+        Test that train ppl of padding mode is same to that of static graph mode without eager deletion
        '''
        fluid.core._set_eager_deletion_mode(-1.0, 1.0, True)
        # When parallel is True, use_program_cache does not make a difference.
@@ -666,7 +666,7 @@ class EagerDeletionPaddingRNNTest(PaddingRNNTestBase):

    def test_padding_mode_eager_deletion(self):
        '''
-        Test that train ppl of padding mode is same to that of static mode under eager deletion
+        Test that train ppl of padding mode is same to that of static graph mode under eager deletion
        '''
        fluid.core._set_eager_deletion_mode(0.0, 1.0, True)
        # When parallel is True, use_program_cache does not make a difference.

--- a/python/paddle/fluid/tests/unittests/test_imperative_load_static_param.py
+++ b/python/paddle/fluid/tests/unittests/test_imperative_load_static_param.py
@@ -28,7 +28,7 @@ paddle.enable_static()

 class TestDygraphLoadStatic(unittest.TestCase):
    def testLoadStaticModel(self):
-        # static mode
+        # static graph mode
        temp_dir = tempfile.TemporaryDirectory()
        a = fluid.data(name="a", shape=[10, 10])
        conv_in = fluid.data(name="conv_in", shape=[None, 10, 10, 10])

--- a/python/paddle/fluid/tests/unittests/test_inplace_auto_generated_apis.py
+++ b/python/paddle/fluid/tests/unittests/test_inplace_auto_generated_apis.py
@@ -20,7 +20,7 @@ import paddle
 from paddle.static import Program, program_guard


-# In static mode, inplace strategy will not be used in Inplace APIs.
+# In static graph mode, inplace strategy will not be used in Inplace APIs.
 class TestStaticAutoGeneratedAPI(unittest.TestCase):
    def setUp(self):
        paddle.enable_static()

--- a/python/paddle/fluid/tests/unittests/test_linalg_cond.py
+++ b/python/paddle/fluid/tests/unittests/test_linalg_cond.py
@@ -84,7 +84,7 @@ def gen_empty_input():
 class API_TestStaticCond(unittest.TestCase):
    def test_out(self):
        paddle.enable_static()
-        # test calling results of 'cond' in static mode
+        # test calling results of 'cond' in static graph mode
        x_list_n_n, x_list_m_n = gen_input()
        test_static_assert_true(self, x_list_n_n, p_list_n_n + p_list_m_n)
        test_static_assert_true(self, x_list_m_n, p_list_m_n)
@@ -117,7 +117,7 @@ class TestCondAPIError(unittest.TestCase):

    def test_static_api_error(self):
        paddle.enable_static()
-        # test raising errors when 'cond' is called in static mode
+        # test raising errors when 'cond' is called in static graph mode
        p_list_error = ('f ro', 'fre', 'NUC', -1.6, 0, 5)
        x_list_n_n, x_list_m_n = gen_input()
        for p in p_list_error:
@@ -132,7 +132,7 @@ class TestCondAPIError(unittest.TestCase):
                    x_data = static.data("X", shape=x.shape, dtype=x.dtype)
                    self.assertRaises(ValueError, paddle.linalg.cond, x_data, p)

-    # it's not supported when input is an empty tensor in static mode
+    # it's not supported when input is an empty tensor in static graph mode
    def test_static_empty_input_error(self):
        paddle.enable_static()


--- a/python/paddle/fluid/tests/unittests/test_load_state_dict_from_old_format.py
+++ b/python/paddle/fluid/tests/unittests/test_load_state_dict_from_old_format.py
@@ -67,7 +67,7 @@ class TestLoadStateDictFromSaveInferenceModel(unittest.TestCase):
        self.epoch_num = 1
        self.batch_size = 128
        self.batch_num = 10
-        # enable static mode
+        # enable static graph mode
        paddle.enable_static()

    def tearDown(self):

--- a/python/paddle/fluid/tests/unittests/test_norm_nn_grad.py
+++ b/python/paddle/fluid/tests/unittests/test_norm_nn_grad.py
@@ -87,7 +87,7 @@ class TestInstanceNormDoubleGradEagerCheck(unittest.TestCase):
            x = paddle.create_parameter(dtype=dtype, shape=shape, name='x')
            z = paddle.nn.functional.instance_norm(x)
            x_arr = np.random.uniform(-1, 1, shape).astype(dtype)
-            # check for static mode
+            # check for static graph mode
            gradient_checker.double_grad_check(
                [x], z, x_init=x_arr, atol=atol, place=place, eps=eps
            )
@@ -129,7 +129,7 @@ class TestInstanceNormDoubleGradEagerCheckWithParams(
            x = paddle.create_parameter(dtype=dtype, shape=shape, name='x')
            z = paddle.nn.InstanceNorm2D(3)(x)
            x_arr = np.random.uniform(-1, 1, shape).astype(dtype)
-            # check for static mode
+            # check for static graph mode
            gradient_checker.double_grad_check(
                [x], z, x_init=x_arr, atol=atol, place=place, eps=eps
            )

--- a/python/paddle/fluid/tests/unittests/test_paddle_save_load.py
+++ b/python/paddle/fluid/tests/unittests/test_paddle_save_load.py
@@ -374,7 +374,7 @@ class TestSaveLoadAny(unittest.TestCase):
        np.testing.assert_array_equal(tensor.numpy(), np.array(lod_static))

    def test_single_pickle_var_static(self):
-        # enable static mode
+        # enable static graph mode
        paddle.enable_static()
        with new_program_scope():
            # create network
@@ -547,7 +547,7 @@ class TestSaveLoadAny(unittest.TestCase):

        np.testing.assert_array_equal(load_array4[0], obj4[0])

-        # static mode
+        # static graph mode
        paddle.enable_static()

        load_tensor1 = paddle.load(path1, return_numpy=False)
@@ -1012,7 +1012,7 @@ class TestSaveLoad(unittest.TestCase):
        self.check_load_state_dict(layer_state_dict, load_layer_state_dict)
        self.check_load_state_dict(opt_state_dict, load_opt_state_dict)

-        # test save load in static mode
+        # test save load in static graph mode
        paddle.enable_static()
        static_save_path = os.path.join(
            self.temp_dir.name,

--- a/python/paddle/fluid/tests/unittests/test_pow.py
+++ b/python/paddle/fluid/tests/unittests/test_pow.py
@@ -42,7 +42,7 @@ def _run_power(mode, x, y, device='cpu'):
            y_ = paddle.to_tensor(y)
            res = paddle.pow(x_, y_)
            return res.numpy()
-    # static mode
+    # static graph mode
    elif mode == STATIC:
        paddle.enable_static()
        # y is scalar

--- a/python/paddle/fluid/tests/unittests/test_quantile_and_nanquantile.py
+++ b/python/paddle/fluid/tests/unittests/test_quantile_and_nanquantile.py
@@ -219,7 +219,7 @@ class TestError(unittest.TestCase):
 class TestQuantileRuntime(unittest.TestCase):
    """
    This class is used to test the API could run correctly with
-    different devices, different data types, and dygraph/static mode.
+    different devices, different data types, and dygraph/static graph mode.
    """

    def setUp(self):

--- a/python/paddle/fluid/tests/unittests/test_real_imag_op.py
+++ b/python/paddle/fluid/tests/unittests/test_real_imag_op.py
@@ -145,7 +145,7 @@ class TestRealAPI(unittest.TestCase):
            self.assertTrue("real_res" in out.name)

    def test_dtype_error(self):
-        # in static mode
+        # in static graph mode
        with self.assertRaises(TypeError):
            with static.program_guard(static.Program()):
                x = static.data(name="x", shape=self._shape, dtype="float32")

--- a/python/paddle/fluid/tests/unittests/test_run_program_op.py
+++ b/python/paddle/fluid/tests/unittests/test_run_program_op.py
@@ -86,7 +86,7 @@ class RunProgramOpTest(unittest.TestCase):
        if core.is_compiled_with_cuda():
            places.append(fluid.CUDAPlace(0))
        for place in places:
-            # TODO: RunProgramOp is not recommended for use in static mode now
+            # TODO: RunProgramOp is not recommended for use in static graph mode now
            self.expect_outs = self.run_static_model(place, is_test=True)
            self.check_output_with_place(place)

@@ -95,7 +95,7 @@ class RunProgramOpTest(unittest.TestCase):
        if core.is_compiled_with_cuda():
            places.append(fluid.CUDAPlace(0))
        for place in places:
-            # TODO: RunProgramOp is not recommended for use in static mode now
+            # TODO: RunProgramOp is not recommended for use in static graph mode now
            self.expect_grads = self.run_static_model(place, is_test=False)
            self.check_grad_with_place(place)

@@ -437,7 +437,7 @@ class TestRunProgramOpWithEmbedding(RunProgramOpTest):
        if core.is_compiled_with_cuda():
            places.append(fluid.CUDAPlace(0))
        for place in places:
-            # TODO: RunProgramOp is not recommended for use in static mode now
+            # TODO: RunProgramOp is not recommended for use in static graph mode now
            self.calc_dygraph_grad(place)

    def build_model(self):

--- a/python/paddle/fluid/tests/unittests/test_set_value_op.py
+++ b/python/paddle/fluid/tests/unittests/test_set_value_op.py
@@ -12,7 +12,7 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.

-# Test set_value op in static mode
+# Test set_value op in static graph mode

 import unittest
 from functools import reduce

--- a/python/paddle/fluid/tests/unittests/test_sgd_op.py
+++ b/python/paddle/fluid/tests/unittests/test_sgd_op.py
@@ -401,7 +401,7 @@ class TestSGDMultiPrecision2_0(unittest.TestCase):
                rtol=1e-05,
                atol=0.1,
            )
-        "Test static mode"
+        "Test static graph mode"
        output1_st = self.static_sgd_mp(mp=True)
        output2_st = self.static_sgd_mp(mp=False)
        for idx in range(len(output1_st)):
@@ -511,7 +511,7 @@ class TestSGDMultiPrecision1_0(unittest.TestCase):
                rtol=1e-05,
                atol=0.1,
            )
-        "Test static mode"
+        "Test static graph mode"
        output1_st = self.static_sgd_mp(mp=True)
        output2_st = self.static_sgd_mp(mp=False)
        for idx in range(len(output1_st)):

--- a/python/paddle/fluid/tests/unittests/test_static_save_load.py
+++ b/python/paddle/fluid/tests/unittests/test_static_save_load.py
@@ -1759,7 +1759,7 @@ class TestProgramStateOldSaveSingleModel(unittest.TestCase):

 class TestStaticSaveLoadPickle(unittest.TestCase):
    def test_pickle_protocol(self):
-        # enable static mode
+        # enable static graph mode
        paddle.enable_static()

        with new_program_scope():

--- a/python/paddle/fluid/tests/unittests/test_static_save_load_large.py
+++ b/python/paddle/fluid/tests/unittests/test_static_save_load_large.py
@@ -28,7 +28,7 @@ LARGE_PARAM = 2**26

 class TestStaticSaveLoadLargeParameters(unittest.TestCase):
    def test_large_parameters_static_save(self):
-        # enable static mode
+        # enable static graph mode
        paddle.enable_static()
        with new_program_scope():
            # create network

--- a/python/paddle/fluid/tests/unittests/test_tensordot.py
+++ b/python/paddle/fluid/tests/unittests/test_tensordot.py
@@ -282,7 +282,7 @@ class TestTensordotAPIAxesType(TestTensordotAPI):
        ]

    def test_tensor_axes(self):
-        # The 'axes' with type 'Tensor' in tensordot is not available in static mode
+        # The 'axes' with type 'Tensor' in tensordot is not available in static graph mode
        paddle.disable_static()
        tensor_axes = [
            paddle.to_tensor([1]),

--- a/python/paddle/fluid/tests/unittests/xpu/test_set_value_op_xpu.py
+++ b/python/paddle/fluid/tests/unittests/xpu/test_set_value_op_xpu.py
@@ -12,7 +12,7 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.

-# Test set_value op in static mode
+# Test set_value op in static graph mode

 import sys
 import unittest

--- a/python/paddle/framework/io.py
+++ b/python/paddle/framework/io.py
@@ -902,7 +902,7 @@ def load(path, **configs):
        directory, such as ``model`` and model is a directory.

    Note:
-        If you load ``state_dict`` from the saved result of static mode API such as
+        If you load ``state_dict`` from the saved result of static graph mode API such as
        ``paddle.static.save`` or ``paddle.static.save_inference_model`` ,
        the structured variable name in dynamic mode will cannot be restored.
        You need to set the argument ``use_structured_name=False`` when using

--- a/python/paddle/geometric/message_passing/utils.py
+++ b/python/paddle/geometric/message_passing/utils.py
@@ -37,7 +37,7 @@ def convert_out_size_to_list(out_size):
 def get_out_size_tensor_inputs(inputs, attrs, out_size, op_type):
    """
    Convert out_size(int, np.int32, np.int64, Variable) to inputs
-    and attrs in static mode.
+    and attrs in static graph mode.
    """
    if out_size is None:
        attrs['out_size'] = [0]

--- a/python/paddle/hapi/model.py
+++ b/python/paddle/hapi/model.py
@@ -305,7 +305,7 @@ class StaticGraphAdapter:
        self.mode = 'train'
        assert (
            update is True
-        ), "Does not support `update == False` in static mode by now."
+        ), "Does not support `update == False` in static graph mode by now."
        return self._run(inputs, labels)

    def eval_batch(self, inputs, labels=None):
@@ -1012,7 +1012,7 @@ class Model:
    must be required for static graph.

    When training on GPU, auto mixed precision (AMP O1) and pure float16
-    (AMP O2) training are both supported in static mode and dynamic mode.
+    (AMP O2) training are both supported in static graph mode and dynamic mode.
    In static graph mode, before training with pure float16 (AMP O2),
    `multi_precision` could be set to True when creating optimizer, which can
    avoid poor accuracy or slow convergence in a way, and inputs of dtype float
@@ -1605,7 +1605,7 @@ class Model:
            if 'use_fp16_guard' in amp_config_key_set:
                if _non_static_mode():
                    raise ValueError(
-                        "'use_fp16_guard' is supported in static mode only."
+                        "'use_fp16_guard' is supported in static graph mode only."
                    )
                self._adapter._use_fp16_guard = amp_configs['use_fp16_guard']
                amp_config_key_set.remove('use_fp16_guard')
@@ -1643,7 +1643,7 @@ class Model:
                'incr_every_n_steps', 'decr_every_n_nan_or_inf',
                'use_dynamic_loss_scaling', 'custom_white_list',
                'custom_black_list', and 'custom_black_varnames'or
-                'use_fp16_guard' is only supported in static mode. Mixed
+                'use_fp16_guard' is only supported in static graph mode. Mixed
                precision API documentations  :ref:`api_paddle_amp_auto_cast`
                and  :ref:`api_paddle_amp_GradScaler` could be referenced
                for details. For convenience, 'amp_configs' could be set to

--- a/python/paddle/hapi/model_summary.py
+++ b/python/paddle/hapi/model_summary.py
@@ -180,7 +180,7 @@ def summary(net, input_size=None, dtypes=None, input=None):

    if not paddle.in_dynamic_mode():
        warnings.warn(
-            "Your model was created in static mode, this may not get correct summary information!"
+            "Your model was created in static graph mode, this may not get correct summary information!"
        )
        in_train_mode = False
    else:

--- a/python/paddle/incubate/autograd/primapi.py
+++ b/python/paddle/incubate/autograd/primapi.py
@@ -23,7 +23,7 @@ def forward_grad(outputs, inputs, grad_inputs=None):
    """Forward mode of automatic differentiation.

    Note:
-        **ONLY available in the static mode and primitive operators.**
+        **ONLY available in the static graph mode and primitive operators.**

    Args:
        outputs(Tensor|Sequence[Tensor]): The output tensor or tensors.
@@ -106,7 +106,7 @@ def grad(outputs, inputs, grad_outputs=None):
    """Reverse mode of automatic differentiation.

    Note:
-        **ONLY available in the static mode and primitive operators**
+        **ONLY available in the static graph mode and primitive operators**

    Args:
        outputs(Tensor|Sequence[Tensor]): The output Tensor or Tensors.

--- a/python/paddle/incubate/autograd/primx.py
+++ b/python/paddle/incubate/autograd/primx.py
@@ -547,7 +547,7 @@ def _lower(block, reverse, blacklist):
 def orig2prim(block=None):
    """
    Note:
-        **This API is ONLY available in the static mode.**
+        **This API is ONLY available in the static graph mode.**
        **Args block must be None or current block of main program.**

    All operators in the target block are processed as follows.
@@ -572,7 +572,7 @@ def orig2prim(block=None):
 def prim2orig(block=None, blacklist=None):
    """
    Note:
-        **ONLY available in the static mode.**
+        **ONLY available in the static graph mode.**
        **Args block must be None or current block of main program.**

    All operators in the target block are processed as follows.

--- a/python/paddle/incubate/autograd/utils.py
+++ b/python/paddle/incubate/autograd/utils.py
@@ -35,7 +35,7 @@ prim_option = PrimOption()
 def prim_enabled():
    """
    Note:
-        **ONLY available in the static mode.**
+        **ONLY available in the static graph mode.**

    Shows whether the automatic differentiation mechanism based on
    automatic differential basic operators is ON. Defaults to OFF.
@@ -66,7 +66,7 @@ def prim_enabled():
 def enable_prim():
    """
    Note:
-        **ONLY available in the static mode.**
+        **ONLY available in the static graph mode.**

    Turns ON automatic differentiation mechanism based on automatic
    differential basic operators.
@@ -90,7 +90,7 @@ def enable_prim():
 def disable_prim():
    """
    Note:
-        **ONLY available in the static mode.**
+        **ONLY available in the static graph mode.**

    Turns OFF automatic differentiation mechanism based on automatic
    differential basic operators.

--- a/python/paddle/incubate/operators/graph_send_recv.py
+++ b/python/paddle/incubate/operators/graph_send_recv.py
@@ -189,7 +189,7 @@ def convert_out_size_to_list(out_size):
 def get_out_size_tensor_inputs(inputs, attrs, out_size, op_type):
    """
    Convert out_size(int, np.int32, np.int64, Variable) to inputs
-    and attrs in static mode.
+    and attrs in static graph mode.
    """
    if out_size is None:
        attrs['out_size'] = [0]

--- a/python/paddle/incubate/optimizer/functional/lbfgs.py
+++ b/python/paddle/incubate/optimizer/functional/lbfgs.py
@@ -132,7 +132,7 @@ def minimize_lbfgs(
    tail = paddle.full(shape=[1], fill_value=0, dtype='int64')

    shape = initial_position.shape[0]
-    # Use tensor as array of fixed length, rather than flexible tensor array. Because in static mode,
+    # Use tensor as array of fixed length, rather than flexible tensor array. Because in static graph mode,
    # tensor array will produce tensor of shape[-1], which will cause error when calling jacobian. In this way, can not use append
    # or pop, so we need head and tail to record where is the newest data and where is the oldest.
    # Totally speaking, realized a stack by array.

--- a/python/paddle/incubate/optimizer/modelaverage.py
+++ b/python/paddle/incubate/optimizer/modelaverage.py
@@ -59,7 +59,7 @@ class ModelAverage(Optimizer):
        average_window_rate (float): The calculate ratio of the window length relative to ``Parameter`` update times.
        parameters (list, optional): List of ``Tensor`` names to update to minimize ``loss``. \
            This parameter is required in dygraph mode. \
-            The default value is None in static mode, at this time all parameters will be updated.
+            The default value is None in static graph mode, at this time all parameters will be updated.
        min_average_window (int, optional): the minimum size of average window length. The default value is 10000.
        max_average_window (int, optional): The maximum size of average window length. The default value is 10000.
        name (str, optional): Normally there is no need for user to set this property.

--- a/python/paddle/jit/api.py
+++ b/python/paddle/jit/api.py
@@ -195,7 +195,7 @@ def to_static(
 ):
    """
    Converts imperative dygraph APIs into declarative function APIs. Decorator
-    @to_static handles the Program and Executor of static mode and returns
+    @to_static handles the Program and Executor of static graph mode and returns
    the result as dygraph Tensor(s). Users could use the returned dygraph
    Tensor(s) to do imperative training, inference, or other operations. If the
    decorated function calls other imperative function, the called one will be

--- a/python/paddle/jit/dy2static/basic_api_transformer.py
+++ b/python/paddle/jit/dy2static/basic_api_transformer.py
@@ -133,7 +133,7 @@ class AttributeJstTransformer(BaseTransformer):
    for example:
        a.size  -->  __jst.attr(a, "size")

-    because `size` have different behavier when in dygraph / static mode
+    because `size` have different behavier when in dygraph / static graph mode
    NOTE: we only deal with ctx=Load() case.
    """


--- a/python/paddle/jit/dy2static/partial_program.py
+++ b/python/paddle/jit/dy2static/partial_program.py
@@ -150,7 +150,7 @@ class PartialProgramLayer:
        parameters(list[VarBase]|None): All trainable parameters included in the program. Default None.

    Returns:
-        Layer: A Layer object that run all ops internally in static mode.
+        Layer: A Layer object that run all ops internally in static graph mode.
    """

    def __init__(

--- a/python/paddle/jit/dy2static/program_translator.py
+++ b/python/paddle/jit/dy2static/program_translator.py
@@ -415,7 +415,7 @@ class StaticFunction:
        if not _non_static_mode():
            raise RuntimeError(
                "Failed to run the callable object {} decorated by '@paddle.jit.to_static', "
-                "because it is NOT in dynamic mode. Please disable the static mode to enter dynamic mode with the "
+                "because it is NOT in dynamic mode. Please disable the static graph mode to enter dynamic mode with the "
                "following API: paddle.disable_static().".format(
                    self.dygraph_function
                )
@@ -691,7 +691,7 @@ class StaticFunction:
                        return out

                x = paddle.randn([10, 1], 'float32')
-                net = paddle.jit.to_static(Net())  # convert into static mode
+                net = paddle.jit.to_static(Net())  # convert into static graph mode
                out = net(x)

                net.forward.rollback()  # rollback into dygraph mode
@@ -751,7 +751,7 @@ class StaticFunction:
                        return out

                x = paddle.randn([10, 1], 'float32')
-                net = paddle.jit.to_static(Net())  # convert into static mode
+                net = paddle.jit.to_static(Net())  # convert into static graph mode

                copy_net = copy.deepcopy(net)      # deepcopy a new net without @to_static


--- a/python/paddle/metric/metrics.py
+++ b/python/paddle/metric/metrics.py
@@ -269,7 +269,7 @@ class Accuracy(Metric):
        if (len(label.shape) == 1) or (
            len(label.shape) == 2 and label.shape[-1] == 1
        ):
-            # In static mode, the real label data shape may be different
+            # In static graph mode, the real label data shape may be different
            # from shape defined by paddle.static.InputSpec in model
            # building, reshape to the right shape.
            label = paddle.reshape(label, (-1, 1))

--- a/python/paddle/nn/functional/pooling.py
+++ b/python/paddle/nn/functional/pooling.py
@@ -1593,7 +1593,7 @@ def adaptive_avg_pool2d(x, output_size, data_format='NCHW', name=None):
            item.numpy().item(0) if isinstance(item, Variable) else item
            for item in output_size
        ]
-    # output_size support Variable in static mode
+    # output_size support Variable in static graph mode
    elif utils._contain_var(output_size):
        output_size = utils._convert_to_tensor_list(output_size)


--- a/python/paddle/optimizer/adadelta.py
+++ b/python/paddle/optimizer/adadelta.py
@@ -49,7 +49,7 @@ class Adadelta(Optimizer):
            different parameter groups such as the learning rate, weight decay, etc, \
            then the parameters are list of dict. Note that the learning_rate in paramter groups \
            represents the scale of base learning_rate. \
-            The default value is None in static mode, at this time all parameters will be updated.
+            The default value is None in static graph mode, at this time all parameters will be updated.
        weight_decay (float|WeightDecayRegularizer, optional): The strategy of regularization. \
            It canbe a float value as coeff of L2 regularization or \
            :ref:`api_fluid_regularizer_L1Decay`, :ref:`api_fluid_regularizer_L2Decay`.

--- a/python/paddle/optimizer/adagrad.py
+++ b/python/paddle/optimizer/adagrad.py
@@ -48,7 +48,7 @@ class Adagrad(Optimizer):
            different parameter groups such as the learning rate, weight decay, etc,
            then the parameters are list of dict. Note that the learning_rate in paramter groups
            represents the scale of base learning_rate.
-            The default value is None in static mode, at this time all parameters will be updated.
+            The default value is None in static graph mode, at this time all parameters will be updated.
        weight_decay (float|WeightDecayRegularizer, optional): The strategy of regularization.
            It canbe a float value as coeff of L2 regularization or
            :ref:`api_paddle_regularizer_L1Decay`, :ref:`api_paddle_regularizer_L2Decay`.

--- a/python/paddle/optimizer/adam.py
+++ b/python/paddle/optimizer/adam.py
@@ -70,7 +70,7 @@ class Adam(Optimizer):
            different parameter groups such as the learning rate, weight decay, etc,
            then the parameters are list of dict. Note that the learning_rate in paramter groups
            represents the scale of base learning_rate.
-            The default value is None in static mode, at this time all parameters will be updated.
+            The default value is None in static graph mode, at this time all parameters will be updated.
        weight_decay (float|WeightDecayRegularizer, optional): The strategy of regularization.
            It canbe a float value as coeff of L2 regularization or
            :ref:`api_fluid_regularizer_L1Decay`, :ref:`api_fluid_regularizer_L2Decay`.

--- a/python/paddle/optimizer/adamax.py
+++ b/python/paddle/optimizer/adamax.py
@@ -62,7 +62,7 @@ class Adamax(Optimizer):
            different parameter groups such as the learning rate, weight decay, etc,
            then the parameters are list of dict. Note that the learning_rate in paramter groups
            represents the scale of base learning_rate.
-            The default value is None in static mode, at this time all parameters will be updated.
+            The default value is None in static graph mode, at this time all parameters will be updated.
        weight_decay (float|WeightDecayRegularizer, optional): The strategy of regularization.
            It canbe a float value as coeff of L2 regularization or
            :ref:`api_fluid_regularizer_L1Decay`, :ref:`api_fluid_regularizer_L2Decay`.

--- a/python/paddle/optimizer/adamw.py
+++ b/python/paddle/optimizer/adamw.py
@@ -58,7 +58,7 @@ class AdamW(Optimizer):
            different parameter groups such as the learning rate, weight decay, etc,
            then the parameters are list of dict. Note that the learning_rate in paramter groups
            represents the scale of base learning_rate.
-            The default value is None in static mode, at this time all parameters will be updated.
+            The default value is None in static graph mode, at this time all parameters will be updated.
        beta1 (float|Tensor, optional): The exponential decay rate for the 1st moment estimates.
            It should be a float number or a Tensor with shape [1] and data type as float32.
            The default value is 0.9.

--- a/python/paddle/optimizer/lamb.py
+++ b/python/paddle/optimizer/lamb.py
@@ -67,7 +67,7 @@ class Lamb(Optimizer):
            different parameter groups such as the learning rate, weight decay, etc, \
            then the parameters are list of dict. Note that the learning_rate in paramter groups \
            represents the scale of base learning_rate. \
-            The default value is None in static mode, at this time all parameters will be updated.
+            The default value is None in static graph mode, at this time all parameters will be updated.
        grad_clip (GradientClipBase, optional): Gradient cliping strategy, it's an instance of
            some derived class of ``GradientClipBase`` . There are three cliping strategies
            ( :ref:`api_paddle_fluid_clip_ClipGradByGlobalNorm` , :ref:`api_paddle_fluid_clip_ClipGradByNorm` ,

--- a/python/paddle/optimizer/momentum.py
+++ b/python/paddle/optimizer/momentum.py
@@ -57,7 +57,7 @@ class Momentum(Optimizer):
            different parameter groups such as the learning rate, weight decay, etc, \
            then the parameters are list of dict. Note that the learning_rate in paramter groups \
            represents the scale of base learning_rate. \
-            The default value is None in static mode, at this time all parameters will be updated.
+            The default value is None in static graph mode, at this time all parameters will be updated.
        weight_decay (float|WeightDecayRegularizer, optional): The strategy of regularization. \
            It canbe a float value as coeff of L2 regularization or \
            :ref:`api_fluid_regularizer_L1Decay`, :ref:`api_fluid_regularizer_L2Decay`.

--- a/python/paddle/optimizer/optimizer.py
+++ b/python/paddle/optimizer/optimizer.py
@@ -109,7 +109,7 @@ class Optimizer:
            different parameter groups such as the learning rate, weight decay, etc, \
            then the parameters are list of dict. Note that the learning_rate in paramter groups \
            represents the scale of base learning_rate. \
-            The default value is None in static mode, at this time all parameters will be updated.
+            The default value is None in static graph mode, at this time all parameters will be updated.
        weight_decay (float|WeightDecayRegularizer, optional): The strategy of regularization. \
            It canbe a float value as coeff of L2 regularization or \
            :ref:`api_fluid_regularizer_L1Decay`, :ref:`api_fluid_regularizer_L2Decay`.

--- a/python/paddle/optimizer/rmsprop.py
+++ b/python/paddle/optimizer/rmsprop.py
@@ -86,7 +86,7 @@ class RMSProp(Optimizer):
          different parameter groups such as the learning rate, weight decay, etc,
          then the parameters are list of dict. Note that the learning_rate in paramter groups
          represents the scale of base learning_rate.
-          The default value is None in static mode, at this time all parameters will be updated.
+          The default value is None in static graph mode, at this time all parameters will be updated.
        weight_decay (float|WeightDecayRegularizer, optional): The strategy of regularization.
          It canbe a float value as coeff of L2 regularization or \
          :ref:`api_fluid_regularizer_L1Decay`, :ref:`api_fluid_regularizer_L2Decay`.

--- a/python/paddle/optimizer/sgd.py
+++ b/python/paddle/optimizer/sgd.py
@@ -39,7 +39,7 @@ class SGD(Optimizer):
            It can be a float value, a ``Tensor`` with a float type or a LearningRateDecay. The default value is 0.001.
        parameters (list|tuple, optional): List/Tuple of ``Tensor`` to update to minimize ``loss``. \
            This parameter is required in dygraph mode. \
-            The default value is None in static mode, at this time all parameters will be updated.
+            The default value is None in static graph mode, at this time all parameters will be updated.
        weight_decay (float|WeightDecayRegularizer, optional): The strategy of regularization. \
            It canbe a float value as coeff of L2 regularization or \
            :ref:`api_fluid_regularizer_L1Decay`, :ref:`api_fluid_regularizer_L2Decay`.

--- a/python/paddle/static/nn/common.py
+++ b/python/paddle/static/nn/common.py
@@ -1593,7 +1593,7 @@ def conv2d_transpose(
                output_size
            ):
                raise ValueError(
-                    "filter_size should not be None when output_size is Tensor or contain Tensor in static mode."
+                    "filter_size should not be None when output_size is Tensor or contain Tensor in static graph mode."
                )
        else:
            output_size = utils.convert_shape_to_list(output_size)

--- a/python/paddle/static/nn/control_flow.py
+++ b/python/paddle/static/nn/control_flow.py
@@ -888,10 +888,10 @@ def cond(pred, true_fn=None, false_fn=None, name=None, return_names=None):
        the same shape because of dataflow model of PaddlePaddle while the
        tensors in the tuples or the lists can have different shapes.

-        2. This API could be used under both static mode or dygraph mode. If it
+        2. This API could be used under both static graph mode or dygraph mode. If it
        is in dygraph mode, the API only runs one branch based on condition.

-        3. If it is in static mode, any tensors or operations created outside
+        3. If it is in static graph mode, any tensors or operations created outside
        or inside of ``true_fn`` and ``false_fn`` will be in net building
        regardless of which branch is selected at runtime. This has frequently
        surprised users who expected a lazy semantics. For example:

--- a/python/paddle/tensor/array.py
+++ b/python/paddle/tensor/array.py
@@ -26,7 +26,7 @@ def array_length(array):
    This OP is used to get the length of the input array.

    Args:
-        array (list|Tensor): The input array that will be used to compute the length. In dynamic mode, ``array`` is a Python list. But in static mode, array is a Tensor whose VarType is LOD_TENSOR_ARRAY.
+        array (list|Tensor): The input array that will be used to compute the length. In dynamic mode, ``array`` is a Python list. But in static graph mode, array is a Tensor whose VarType is LOD_TENSOR_ARRAY.

    Returns:
        Tensor: 1-D Tensor with shape [1], which is the length of array.
@@ -88,7 +88,7 @@ def array_read(array, i):
            output = [0.4, 0.2]

    Args:
-        array (list|Tensor): The input array. In dynamic mode, ``array`` is a Python list. But in static mode, array is a Tensor whose ``VarType`` is ``LOD_TENSOR_ARRAY``.
+        array (list|Tensor): The input array. In dynamic mode, ``array`` is a Python list. But in static graph mode, array is a Tensor whose ``VarType`` is ``LOD_TENSOR_ARRAY``.
        i (Tensor): 1-D Tensor, whose shape is [1] and dtype is int64. It represents the
            specified read position of ``array``.

@@ -150,7 +150,7 @@ def array_write(x, i, array=None):
            ``x`` is written.
        array (list|Tensor, optional): The array into which ``x`` is written. The default value is None,
            when a new array will be created and returned as a result. In dynamic mode, ``array`` is a Python list.
-            But in static mode, array is a Tensor whose ``VarType`` is ``LOD_TENSOR_ARRAY``.
+            But in static graph mode, array is a Tensor whose ``VarType`` is ``LOD_TENSOR_ARRAY``.

    Returns:
        list|Tensor: The input ``array`` after ``x`` is written into.
@@ -230,7 +230,7 @@ def create_array(dtype, initialized_list=None):
                    All values in initialized list should be a Tensor.

    Returns:
-        list|Tensor: An empty array. In dynamic mode, ``array`` is a Python list. But in static mode, array is a Tensor
+        list|Tensor: An empty array. In dynamic mode, ``array`` is a Python list. But in static graph mode, array is a Tensor
        whose ``VarType`` is ``LOD_TENSOR_ARRAY``.

    Examples:
@@ -258,7 +258,7 @@ def create_array(dtype, initialized_list=None):
            )
        array = list(initialized_list)

-    # NOTE: Only support plain list like [x, y,...], not support nested list in static mode.
+    # NOTE: Only support plain list like [x, y,...], not support nested list in static graph mode.
    for val in array:
        if not isinstance(val, Variable):
            raise TypeError(

--- a/python/paddle/tensor/layer_function_generator.py
+++ b/python/paddle/tensor/layer_function_generator.py
@@ -334,6 +334,7 @@ def generate_inplace_fn(inplace_op_type):
    origin_op_type = inplace_op_type[:-1]

    def func(x, name=None):
+
        if in_dygraph_mode():
            if hasattr(_C_ops, inplace_op_type):
                op = getattr(_C_ops, inplace_op_type)
@@ -343,7 +344,7 @@ def generate_inplace_fn(inplace_op_type):
                return op(x)
        else:
            warnings.warn(
-                "In static mode, {}() is the same as {}() and does not perform inplace operation.".format(
+                "In static graph mode, {}() is the same as {}() and does not perform inplace operation.".format(
                    inplace_op_type, origin_op_type
                )
            )

--- a/python/paddle/tensor/linalg.py
+++ b/python/paddle/tensor/linalg.py
--- a/python/paddle/tensor/manipulation.py
+++ b/python/paddle/tensor/manipulation.py
--- a/python/paddle/tensor/stat.py
+++ b/python/paddle/tensor/stat.py
--- a/python/paddle/utils/inplace_utils.py
+++ b/python/paddle/utils/inplace_utils.py
--- a/python/paddle/vision/transforms/functional.py
+++ b/python/paddle/vision/transforms/functional.py
--- a/python/paddle/vision/transforms/functional_tensor.py
+++ b/python/paddle/vision/transforms/functional_tensor.py