Fix some typos in paddle/. (#42408)

2012672c · Shuangchi He · GitHub · 91cf770b · 2012672c · 2012672c
123 changed file
--- a/paddle/fluid/distributed/ps/service/ps_client.h
+++ b/paddle/fluid/distributed/ps/service/ps_client.h
@@ -109,7 +109,7 @@ class PSClient {
                                         size_t table_id) = 0;  // 保留

  // firstly push dense param for parameter server
-  // this is neccessary because dense weight initialized in trainer on cold
+  // this is necessary because dense weight initialized in trainer on cold
  // start
  virtual std::future<int32_t> PushDenseParam(const Region *regions,
                                              size_t region_num,

--- a/paddle/fluid/framework/data_set.h
+++ b/paddle/fluid/framework/data_set.h
@@ -152,7 +152,7 @@ class Dataset {
  virtual void DestroyPreLoadReaders() = 0;
  // set preload thread num
  virtual void SetPreLoadThreadNum(int thread_num) = 0;
-  // seperate train thread and dataset thread
+  // separate train thread and dataset thread
  virtual void DynamicAdjustChannelNum(int channel_num,
                                       bool discard_remaining_ins = false) = 0;
  virtual void DynamicAdjustReadersNum(int thread_num) = 0;

--- a/paddle/fluid/framework/details/sparse_all_reduce_op_handle.cc
+++ b/paddle/fluid/framework/details/sparse_all_reduce_op_handle.cc
@@ -75,7 +75,7 @@ void SparseAllReduceOpHandle::RunImplEncoded() {
      in_var_handles.size(), places_.size(),
      platform::errors::PreconditionNotMet(
          "The number of input variables should be equal to the number of "
-          "places, but got the number of input variables is %zu and the the "
+          "places, but got the number of input variables is %zu and the "
          "number of places is %zu.",
          in_var_handles.size(), places_.size()));
  PADDLE_ENFORCE_EQ(
@@ -83,7 +83,7 @@ void SparseAllReduceOpHandle::RunImplEncoded() {
      platform::errors::PreconditionNotMet(
          "The number of input variables should be equal to the number of "
          "output variables, but got the number of input variables is %zu and "
-          "the the number of output variables is %zu.",
+          "the number of output variables is %zu.",
          in_var_handles.size(), out_var_handles.size()));

  std::vector<const LoDTensor *> ins;

--- a/paddle/fluid/framework/heter_pipeline_trainer.cc
+++ b/paddle/fluid/framework/heter_pipeline_trainer.cc
@@ -282,7 +282,7 @@ void HeterPipelineTrainer::Run() {
  if (threads_.size() > 0) {
    threads_.clear();
  }
-  VLOG(3) << "Epoch Trainging done";
+  VLOG(3) << "Epoch Training done";
 }

 void HeterPipelineTrainer::Finalize() {

--- a/paddle/fluid/framework/ir/fuse_optimizer_ops_pass/fuse_optimizer_op_pass.cc
+++ b/paddle/fluid/framework/ir/fuse_optimizer_ops_pass/fuse_optimizer_op_pass.cc
@@ -172,7 +172,7 @@ void FuseOptimizerOpPass::ApplyImpl(ir::Graph *graph) const {
      VLOG(6) << "The number of new gradients is " << new_grad_idx.size();
      if (new_grad_idx.size() == 1) return;
      // NOTE(zcd): If the gradients of backward stage and optimization stage
-      // have diff, Only take care of the the gradient of optimization stage.
+      // have diff, Only take care of the gradient of optimization stage.
      GradientsFilter(new_grad_idx, &opt_nodes, &aux_var_map);
    }
  }

--- a/paddle/fluid/framework/ir/fusion_group/operation.cc
+++ b/paddle/fluid/framework/ir/fusion_group/operation.cc
@@ -127,7 +127,7 @@ void OperationMap::InsertUnaryElementwiseOperations() {

  // scale
  //  out = (bias_after_scale) ? scale * X +  bias : scale(X + bias)
-  //  here we use '=' operator to seperate th default value
+  //  here we use '=' operator to separate th default value
  // TODO(wangchaochaohu): Later we need to support Tensor input for scale and
  //  bias.
  insert_handler(

--- a/paddle/fluid/framework/ir/graph_pattern_detector.cc
+++ b/paddle/fluid/framework/ir/graph_pattern_detector.cc
@@ -720,7 +720,7 @@ bool HasOutput(Node *op, const std::string &argument) {
  PADDLE_ENFORCE_EQ(
      op->IsOp(), true,
      platform::errors::InvalidArgument(
-          "First parameter of function HasOuput must be Node::Op"));
+          "First parameter of function HasOutput must be Node::Op"));
  auto const &names = op->Op()->OutputNames();
  if (std::find(names.begin(), names.end(), argument) == names.end())
    return false;

--- a/paddle/fluid/framework/ir/graph_pattern_detector.h
+++ b/paddle/fluid/framework/ir/graph_pattern_detector.h
@@ -1432,7 +1432,7 @@ struct PriorBox : public PatternBase {
 };

 // Conv + ElementwiseAdd + an activation
-// This pattern can futher fuse the conv related ops after the conv+bn fusion.
+// This pattern can further fuse the conv related ops after the conv+bn fusion.
 struct ConvElementwiseaddAct : public PatternBase {
  ConvElementwiseaddAct(PDPattern* pattern, const std::string& name_scope)
      : PatternBase(pattern, name_scope, "conv_elementwiseadd_act") {}

--- a/paddle/fluid/framework/new_executor/interpretercore.cc
+++ b/paddle/fluid/framework/new_executor/interpretercore.cc
@@ -277,7 +277,7 @@ void InterpreterCore::Convert(
  }

  for (size_t i = 0; i < vec_instruction_.size(); ++i) {
-    // checkout ouput
+    // checkout output
    for (auto& item : vec_instruction_[i].Outputs()) {
      for (auto var_id : item.second) {
        if (input_var2op_info_.at(var_id).size() == 0) {

--- a/paddle/fluid/framework/new_executor/interpretercore_util.cc
+++ b/paddle/fluid/framework/new_executor/interpretercore_util.cc
@@ -666,7 +666,7 @@ std::map<int, std::list<int>> get_downstream_map(
  VLOG(6) << "downstream count: " << downstream_map_count();
  VLOG(6) << "downstream_map: " << std::endl << downstream_map_to_str();

-  // step2: remove unneccessary downstream ops
+  // step2: remove unnecessary downstream ops
  // for example, a->b->c
  // a: b, c
  // b: c

--- a/paddle/fluid/framework/new_executor/workqueue/workqueue.h
+++ b/paddle/fluid/framework/new_executor/workqueue/workqueue.h
@@ -89,7 +89,7 @@ struct WorkQueueOptions {
  // If you need to blocking the calling  thread to wait "queue empty", set
  // track_task = true and set events_waiter. EventsWaiter::WaitEvent will
  // block the calling thread until any of events (including "queue empty")
-  // occured.
+  // occurred.
  bool track_task;
  // If you need to be noticed when a WorkQueue Destruct() , set detached =
  // false and set events_waiter.

--- a/paddle/fluid/framework/paddle2cinn/build_cinn_pass.cc
+++ b/paddle/fluid/framework/paddle2cinn/build_cinn_pass.cc
@@ -511,7 +511,7 @@ void AddCinnOpToGraph(const GraphNodeSet& cluster,
                       ExtractOpRole(cluster));
  cinn_op_desc.Flush();
  auto* cinn_op_node = graph->CreateOpNode(&cinn_op_desc);
-  // Add new links from or to the the cinn launch op node
+  // Add new links from or to the cinn launch op node
  AddLinkToCinnOp(cluster_inputs, cluster_outputs, cinn_op_node);

  VLOG(4) << "Add op [" << kCinnLaunchOp << "] into graph.";

--- a/paddle/fluid/framework/prune.cc
+++ b/paddle/fluid/framework/prune.cc
@@ -421,7 +421,7 @@ void PruneBackwardImpl(proto::BlockDesc* origin, proto::BlockDesc* pruned) {
  for (const auto& name : var_names) {
    if (var_map.count(name)) {
      // NOTE(zhiqiu): For operator in a conditional block, the related vars
-      // may not exist in current block, but in its futher block.
+      // may not exist in current block, but in its further block.
      *pruned_vars->Add() = var_map[name];
    }
  }

--- a/paddle/fluid/imperative/layer.cc
+++ b/paddle/fluid/imperative/layer.cc
@@ -186,7 +186,7 @@ template <typename VarType>
 static void SetForwardDataTypeOfGradVars(const NameVarMap<VarType>& outs) {
  for (auto& var_pair : outs) {
    for (auto& var : var_pair.second) {
-      // NOTE(zhiqu): The ouput may be NULL because of pruning.
+      // NOTE(zhiqu): The output may be NULL because of pruning.
      if (var) {
        SetForwardDataTypeOfGradVar(var);
      }

--- a/paddle/fluid/imperative/reducer.cc
+++ b/paddle/fluid/imperative/reducer.cc
@@ -879,7 +879,7 @@ void Reducer::MarkVarReady(const size_t var_index, const bool is_used_var) {
 }

 // TODO(liuyuhui): If BKCL support non-blocking communication, it should be
-// fixed as same as multi gpus card trainging.
+// fixed as same as multi gpus card training.
 void Reducer::MarkGroupReady(size_t group_index) {
  PADDLE_ENFORCE_GE(
      group_index, next_group_,
@@ -957,7 +957,7 @@ void Reducer::FusedAllReduceSchedule(const int run_order, Group &group,
 // default stream for communicating, so there exist some problems in
 // synchronization. And need to add a WaitComm there.
 // TODO(liuyuhui): If BKCL support non-blocking communication, it should be
-// fixed as multi gpus card trainging.
+// fixed as multi gpus card training.
 #ifdef PADDLE_WITH_XPU_BKCL
    if (platform::is_xpu_place(group.dense_tensors_[0].place())) {
      parallel_ctx_->WaitComm(run_order);

--- a/paddle/fluid/inference/analysis/CMakeLists.txt
+++ b/paddle/fluid/inference/analysis/CMakeLists.txt
 unset(analysis_deps CACHE)
-set(analysis_deps # analysis_deps can be extended accross the project
+set(analysis_deps # analysis_deps can be extended across the project
        framework_proto proto_desc graph pass paddle_inference_io executor pretty_log
        ir_pass_manager
        CACHE INTERNAL "")

--- a/paddle/fluid/inference/analysis/ir_passes/subgraph_util.cc
+++ b/paddle/fluid/inference/analysis/ir_passes/subgraph_util.cc
@@ -13,7 +13,7 @@ See the License for the specific language governing permissions and
 limitations under the License. */

 /*
- * This file defines the the class to partition a graph.
+ * This file defines the class to partition a graph.
 */

 #include "paddle/fluid/inference/analysis/ir_passes/subgraph_util.h"

--- a/paddle/fluid/inference/analysis/ir_passes/subgraph_util.h
+++ b/paddle/fluid/inference/analysis/ir_passes/subgraph_util.h
@@ -13,7 +13,7 @@ See the License for the specific language governing permissions and
 limitations under the License. */

 /*
- * This file defines the the class to partition a graph.
+ * This file defines the class to partition a graph.
 */

 #pragma once

--- a/paddle/fluid/inference/analysis/ir_passes/tensorrt_subgraph_pass.cc
+++ b/paddle/fluid/inference/analysis/ir_passes/tensorrt_subgraph_pass.cc
@@ -286,7 +286,7 @@ void TensorRtSubgraphPass::CreateTensorRTOp(
  // There are models with the same structure but the different parameters,
  // when running in the 'use_serialize' mode, there is a bug.
  // serialization is affected by max_batch_size, but calibration is not.
-  // So we use seperate engine keys in serialization and calibration.
+  // So we use separate engine keys in serialization and calibration.
  auto engine_key = GenerateEngineKey(
      input_names_with_id, output_names_with_id, std::to_string(0),
      std::to_string(max_batch_size),

--- a/paddle/fluid/inference/api/paddle_analysis_config.h
+++ b/paddle/fluid/inference/api/paddle_analysis_config.h
@@ -410,7 +410,7 @@ struct PD_INFER_DECL AnalysisConfig {
  /// \return int The NPU device id.
  ///
  int npu_device_id() const { return npu_device_id_; }
-  /// \brief Get the the number of IPU device .
+  /// \brief Get the number of IPU device .
  ///
  /// \return int The number of IPU device.
  ///

--- a/paddle/fluid/inference/tensorrt/convert/swish_op.cc
+++ b/paddle/fluid/inference/tensorrt/convert/swish_op.cc
@@ -52,7 +52,7 @@ class SwishOpConverter : public OpConverter {
    PADDLE_ENFORCE_EQ(
        output_num, 1UL,
        platform::errors::InvalidArgument(
-            "The ouput Out's size must equal to 1 in TRT swish op. "
+            "The output Out's size must equal to 1 in TRT swish op. "
            "But received Out's size %u.",
            output_num));
    // Get attrs

--- a/paddle/fluid/inference/tests/api/analyzer_lexical_analysis_gru_tester.cc
+++ b/paddle/fluid/inference/tests/api/analyzer_lexical_analysis_gru_tester.cc
@@ -147,10 +147,10 @@ void SetInput(std::vector<std::vector<PaddleTensor>> *inputs,
  file.read(reinterpret_cast<char *>(&total_words_num), sizeof(int64_t));
  LOG(INFO) << "Total words in file: " << total_words_num;
  size_t lods_beginning_offset = static_cast<size_t>(file.tellg());
-  auto words_begining_offset =
+  auto words_beginning_offset =
      lods_beginning_offset + sizeof(size_t) * total_sentences_num;
  auto targets_beginning_offset =
-      words_begining_offset + sizeof(int64_t) * total_words_num;
+      words_beginning_offset + sizeof(int64_t) * total_words_num;

  std::vector<size_t> lod_full =
      ReadSentenceLod(file, lods_beginning_offset, total_sentences_num);
@@ -158,7 +158,7 @@ void SetInput(std::vector<std::vector<PaddleTensor>> *inputs,
  size_t lods_sum = std::accumulate(lod_full.begin(), lod_full.end(), 0UL);
  EXPECT_EQ(lods_sum, static_cast<size_t>(total_words_num));

-  TensorReader<int64_t> words_reader(file, words_begining_offset, "words");
+  TensorReader<int64_t> words_reader(file, words_beginning_offset, "words");
  TensorReader<int64_t> targets_reader(file, targets_beginning_offset,
                                       "targets");
  // If FLAGS_iterations is set to 0, run all batches

--- a/paddle/fluid/operators/activation_op.h
+++ b/paddle/fluid/operators/activation_op.h
@@ -634,7 +634,7 @@ struct SquareGradGradFunctor : public BaseActivationFunctor<T> {

 // TODO(dengkaipeng): double gradient calculation for Square/Sqrt need
 // DOut(dy) as input(not output), tensor extraction is different from
-// others. Impliment extraction kernel seperately here.
+// others. Impliment extraction kernel separately here.
 inline void ExtractDoubleGradTensorWithInputDOut(
    const framework::ExecutionContext& ctx, const framework::Tensor** X,
    const framework::Tensor** ddX, framework::Tensor** dX,

--- a/paddle/fluid/operators/affine_grid_op.cc
+++ b/paddle/fluid/operators/affine_grid_op.cc
@@ -136,7 +136,7 @@ class AffineGridOpMaker : public framework::OpProtoAndCheckerMaker {
        .AsExtra();
    AddAttr<bool>("align_corners",
                  "(bool, default false) Whether to align the corners of input"
-                  "and ouput.")
+                  "and output.")
        .SetDefault(true);
    AddAttr<std::vector<int>>(
        "output_shape",

--- a/paddle/fluid/operators/batch_norm_op.cc
+++ b/paddle/fluid/operators/batch_norm_op.cc
@@ -64,7 +64,7 @@ void BatchNormOp::InferShape(framework::InferShapeContext *ctx) const {
        (x_dims[i] == -1) || (x_dims[i] > 0), true,
        platform::errors::InvalidArgument(
            "Each dimension of input tensor is expected to be -1 or a "
-            "positive number, but recieved %d. Input's shape is [%s].",
+            "positive number, but received %d. Input's shape is [%s].",
            x_dims[i], x_dims));
  }


--- a/paddle/fluid/operators/collective/c_broadcast_op.cu.cc
+++ b/paddle/fluid/operators/collective/c_broadcast_op.cu.cc
@@ -77,7 +77,7 @@ class CBroadcastOpCUDAKernel : public framework::OpKernel<T> {
      PADDLE_ENFORCE_GPU_SUCCESS(
          platform::dynload::ncclBcast(out->mutable_data<T>(place), numel,
                                       dtype, root, comm->comm(), stream));
-      VLOG(3) << "rank " << comm->rank() << " invoke Bcast. recieved "
+      VLOG(3) << "rank " << comm->rank() << " invoke Bcast. received "
              << phi::product(out->dims());
    }


--- a/paddle/fluid/operators/collective/c_broadcast_op_mlu.cc
+++ b/paddle/fluid/operators/collective/c_broadcast_op_mlu.cc
@@ -62,7 +62,7 @@ class CBroadcastOPMLUKernel : public framework::OpKernel<T> {
    } else {
      PADDLE_ENFORCE_MLU_SUCCESS(cnclBcast(out->mutable_data<T>(place), numel,
                                           dtype, root, comm->comm(), stream));
-      VLOG(3) << "rank " << comm->rank() << " invoke Bcast. recieved "
+      VLOG(3) << "rank " << comm->rank() << " invoke Bcast. received "
              << phi::product(out->dims());
    }


--- a/paddle/fluid/operators/collective/c_broadcast_op_npu.cc
+++ b/paddle/fluid/operators/collective/c_broadcast_op_npu.cc
@@ -59,7 +59,7 @@ class CBroadcastOpASCENDKernel : public framework::OpKernel<T> {
    PADDLE_ENFORCE_NPU_SUCCESS(platform::dynload::HcclBroadcast(
        ptr, numel, dtype, (uint32_t)root, comm->comm(), stream));

-    VLOG(3) << "rank " << comm->rank() << " invoke Bcast. recieved "
+    VLOG(3) << "rank " << comm->rank() << " invoke Bcast. received "
            << phi::product(out->dims());

    dev_ctx->Wait();

--- a/paddle/fluid/operators/conv_op.h
+++ b/paddle/fluid/operators/conv_op.h
@@ -43,7 +43,7 @@ inline int ConvOutputSize(int input_size, int filter_size, int dilation,
      output_size, 0,
      platform::errors::InvalidArgument(
          "The output's size is expected to be greater than 0. "
-          "But recieved: output's size is %d. The output's size is computed by "
+          "But received: output's size is %d. The output's size is computed by "
          "((input_size + 2 * padding - (dilation * (filter_size - 1) + 1)) / "
          "stride + 1), where input_size is %d, padding is %d, "
          "filter_size is %d, dilation is %d, stride is %d.",
@@ -60,7 +60,7 @@ inline int ConvOutputSize(int input_size, int filter_size, int dilation,
      output_size, 0,
      platform::errors::InvalidArgument(
          "The output's size is expected to be greater than 0. "
-          "But recieved: output's size is %d. The output's size is computed by "
+          "But received: output's size is %d. The output's size is computed by "
          "((input_size + padding_1 + padding_2 - (dilation * (filter_size - "
          "1) + 1)) / stride + 1), where input_size is %d, padding is "
          "(%d, %d), filter_size is %d, dilation is %d, stride is %d.",
@@ -90,7 +90,7 @@ inline void UpdatePaddingAndDilation(std::vector<T>* paddings,
        platform::errors::InvalidArgument(
            "Attribute padding's size should be the same or twice as the "
            "input's dimension. "
-            "But recieved: padding's size is %d, padding is [%s]; input's "
+            "But received: padding's size is %d, padding is [%s]; input's "
            "dimension is %d, input's shape is [%s].",
            paddings->size(), phi::make_ddim(*paddings), data_dims.size(),
            data_dims));

--- a/paddle/fluid/operators/conv_op_mlu.cc
+++ b/paddle/fluid/operators/conv_op_mlu.cc
@@ -98,7 +98,7 @@ class MLUConvOpKernel : public framework::OpKernel<T> {
        output_desc.get(), GetBasePtr(&output_tensor));

    if (!channel_last) {
-      // transpose ouput from NHWC to NCHW
+      // transpose output from NHWC to NCHW
      const std::vector<int> perm_to_nchw = {0, 3, 1, 2};
      TransposeFromMLUTensor<T>(ctx, perm_to_nchw, &output_tensor, output,
                                false /*need_reshape_or_alloc*/);

--- a/paddle/fluid/operators/ctc_align_op.cu
+++ b/paddle/fluid/operators/ctc_align_op.cu
@@ -26,19 +26,19 @@ __global__ void MergeAndDelCudaKernel(const int64_t num_token, const T* tokens,
                                      const size_t num_seq, size_t* lod0,
                                      const int blank, const int merge_repeated,
                                      size_t* out_lod0, T* output) {
-  int ouput_idx = 0;
+  int output_idx = 0;
  out_lod0[0] = 0;

  for (int i = 0; i < num_seq; ++i) {
    T pre_token = -1;
    for (int j = lod0[i]; j < lod0[i + 1]; ++j) {
      if (tokens[j] != blank && !(merge_repeated && tokens[j] == pre_token)) {
-        output[ouput_idx] = tokens[j];
-        ++ouput_idx;
+        output[output_idx] = tokens[j];
+        ++output_idx;
      }
      pre_token = tokens[j];
    }
-    out_lod0[i + 1] = ouput_idx;
+    out_lod0[i + 1] = output_idx;
  }
 }


--- a/paddle/fluid/operators/deformable_psroi_pooling_op.cu
+++ b/paddle/fluid/operators/deformable_psroi_pooling_op.cu
@@ -200,7 +200,7 @@ class DeformablePSROIPoolCUDAKernel : public framework::OpKernel<T> {
        num_rois, out->dims()[0],
        platform::errors::InvalidArgument(
            "The number of Input(ROIs) should be same with the number of "
-            "Ouput(Output), but received ROIs number is:%d, Output number "
+            "Output(Output), but received ROIs number is:%d, Output number "
            "is:%d.",
            num_rois, out->dims()[0]));
    const int count = num_rois * output_dim * pooled_height * pooled_width;

--- a/paddle/fluid/operators/deformable_psroi_pooling_op.h
+++ b/paddle/fluid/operators/deformable_psroi_pooling_op.h
@@ -175,7 +175,7 @@ class DeformablePSROIPoolCPUKernel : public framework::OpKernel<T> {
        num_rois, out->dims()[0],
        platform::errors::InvalidArgument(
            "The number of Input(ROIs) should be same with the number of "
-            "Ouput(Output), but received ROIs number is:%d, Output number "
+            "Output(Output), but received ROIs number is:%d, Output number "
            "is:%d.",
            num_rois, out->dims()[0]));
    framework::Tensor roi_batch_id_list;

--- a/paddle/fluid/operators/detection/matrix_nms_op.cc
+++ b/paddle/fluid/operators/detection/matrix_nms_op.cc
@@ -385,7 +385,7 @@ independently for each class. The outputs is a 2-D LoDTenosr, for each
 image, the offsets in first dimension of LoDTensor are called LoD, the number
 of offset is N + 1, where N is the batch size. If LoD[i + 1] - LoD[i] == 0,
 means there is no detected bbox for this image. Now this operator has one more
-ouput, which is RoisNum. The size of RoisNum is N, RoisNum[i] means the number of 
+output, which is RoisNum. The size of RoisNum is N, RoisNum[i] means the number of 
 detected bbox for this image.

 For more information on Matrix NMS, please refer to:

--- a/paddle/fluid/operators/dropout_op_npu.cc
+++ b/paddle/fluid/operators/dropout_op_npu.cc
@@ -54,7 +54,7 @@ class DropoutNPUKernel : public framework::OpKernel<T> {
      return;
    }

-    // only achive the default `upscale_in_train` method
+    // only achieve the default `upscale_in_train` method
    if (!is_test) {
      Tensor tmp_x(x->dtype());
      Tensor tmp_out(out->dtype());

--- a/paddle/fluid/operators/elementwise/elementwise_mlu.h
+++ b/paddle/fluid/operators/elementwise/elementwise_mlu.h
@@ -165,7 +165,7 @@ template <UNARY_FUNCTOR func>
 void MLUUnary(const framework::ExecutionContext& ctx,
              cnnlComputationPreference_t prefer,
              const cnnlTensorDescriptor_t input_desc, const void* input,
-              const cnnlTensorDescriptor_t ouput_desc, void* output);
+              const cnnlTensorDescriptor_t output_desc, void* output);

 template <>
 inline void MLUUnary<NEG>(const framework::ExecutionContext& ctx,

--- a/paddle/fluid/operators/fc_op.cc
+++ b/paddle/fluid/operators/fc_op.cc
@@ -75,7 +75,7 @@ class FCOp : public framework::OperatorWithKernel {
        platform::errors::InvalidArgument(
            "The attribute in_num_col_dims used to flatten Input to "
            "a 2-D tensor, is expected to be less than the number of "
-            "Input's dimensions. But recieved in_num_col_dims is %d, "
+            "Input's dimensions. But received in_num_col_dims is %d, "
            "the number of Input's dimensions is %d, Input's shape is %s.",
            in_num_col_dims, in_dims.size(), in_dims));

@@ -93,7 +93,7 @@ class FCOp : public framework::OperatorWithKernel {
          in_dims.size() >= 2 && in_dims.size() <= 4, true,
          platform::errors::Unimplemented(
              "The Input of fc is expected to be a 2-D, 3-D or 4-D tensor when "
-              "use_mkldnn is set. But recieved the number of Input's "
+              "use_mkldnn is set. But received the number of Input's "
              "dimensions is %d, Input's shape is %s.",
              in_dims.size(), in_dims));
    }

--- a/paddle/fluid/operators/fc_op.h
+++ b/paddle/fluid/operators/fc_op.h
@@ -36,7 +36,7 @@ inline void FCOutputSize(const framework::DDim& in_dims,
      in_mat_dims[1], w_dims0,
      platform::errors::InvalidArgument(
          "The input's second dimension and weight's first dimension is "
-          "expected to be the same. But recieved input's second dimension is "
+          "expected to be the same. But received input's second dimension is "
          "%d, input's shape is %s; weight's first dimension is %d, weight's "
          "shape is %s.",
          in_mat_dims[1], in_mat_dims, w_dims0,

--- a/paddle/fluid/operators/fill_constant_op.cc
+++ b/paddle/fluid/operators/fill_constant_op.cc
@@ -32,7 +32,7 @@ class FillConstantOp : public framework::OperatorWithKernel {
            shape[i], 0,
            platform::errors::InvalidArgument(
                "Each value of attribute 'shape' is expected to be no less "
-                "than 0. But recieved: shape[%u] = %d; shape = [%s].",
+                "than 0. But received: shape[%u] = %d; shape = [%s].",
                i, shape[i], phi::make_ddim(shape)));
      }
    }

--- a/paddle/fluid/operators/fold_op.cc
+++ b/paddle/fluid/operators/fold_op.cc
@@ -76,47 +76,47 @@ class FoldOp : public framework::OperatorWithKernel {
    PADDLE_ENFORCE_GT(kernel_height, 0,
                      platform::errors::InvalidArgument(
                          "The `kernel_sizes` should be greater than zero, "
-                          "but recieved kernel_height: %d kernel_width: %d.",
+                          "but received kernel_height: %d kernel_width: %d.",
                          kernel_sizes[0], kernel_sizes[1]));
    PADDLE_ENFORCE_GT(kernel_width, 0,
                      platform::errors::InvalidArgument(
                          "The `kernel_sizes` should be greater than zero, "
-                          "but recieved kernel_height: %d kernel_width: %d.",
+                          "but received kernel_height: %d kernel_width: %d.",
                          kernel_sizes[0], kernel_sizes[1]));
    // check strides
    PADDLE_ENFORCE_GT(stride_height, 0,
                      platform::errors::InvalidArgument(
                          "The `strides` should be greater than zero, "
-                          "but recieved strides_height: %d strides_width: %d.",
+                          "but received strides_height: %d strides_width: %d.",
                          strides[0], strides[1]));
    PADDLE_ENFORCE_GT(stride_width, 0,
                      platform::errors::InvalidArgument(
                          "The `strides` should be greater than zero, "
-                          "but recieved strides_height: %d strides_width: %d.",
+                          "but received strides_height: %d strides_width: %d.",
                          strides[0], strides[1]));
    // check dilations
    PADDLE_ENFORCE_GT(output_height, 1,
                      platform::errors::InvalidArgument(
                          "The `output_height` should be greater than one, "
-                          "but recieved output_height: %d .",
+                          "but received output_height: %d .",
                          output_height));
    PADDLE_ENFORCE_GT(output_width, 1,
                      platform::errors::InvalidArgument(
                          "The `output_width` should be greater than one, "
-                          "but recieved output_width: %d .",
+                          "but received output_width: %d .",
                          output_width));
    // check output size
    PADDLE_ENFORCE_GT(
        dilation_height, 0,
        platform::errors::InvalidArgument(
            "The `dilations` should be greater than zero, "
-            "but recieved dilations_height: %d dilations_width: %d.",
+            "but received dilations_height: %d dilations_width: %d.",
            dilations[0], dilations[1]));
    PADDLE_ENFORCE_GT(
        dilation_width, 0,
        platform::errors::InvalidArgument(
            "The `dilations` should be greater than zero, "
-            "but recieved dilations_height: %d dilations_width: %d.",
+            "but received dilations_height: %d dilations_width: %d.",
            dilations[0], dilations[1]));

    std::vector<int> out_dims;

--- a/paddle/fluid/operators/fused/conv_fusion_op.cc
+++ b/paddle/fluid/operators/fused/conv_fusion_op.cc
@@ -80,7 +80,7 @@ class Conv2DFusionOp : public operators::ConvOp {
        data_format, "NHWC",
        platform::errors::PermissionDenied(
            "Operator(Conv2DFusion) only supports data format of "
-            "channel first (NCHW) now. But recieved: data_format = '%s'.",
+            "channel first (NCHW) now. But received: data_format = '%s'.",
            data_format));

    std::vector<int64_t> output_shape = ComputeOutputShape(ctx);
@@ -113,7 +113,7 @@ class Conv2DFusionOp : public operators::ConvOp {
          split_channels_sum, output_shape[1],
          platform::errors::InvalidArgument(
              "The sum of Attr(split_channels) is expected to be equal to the "
-              "total output channels. But recieved: the sum of "
+              "total output channels. But received: the sum of "
              "Attr(split_channels) = %d, the total output channels = %d.",
              split_channels_sum, output_shape[1]));


--- a/paddle/fluid/operators/fused/conv_fusion_op.cu
+++ b/paddle/fluid/operators/fused/conv_fusion_op.cu
@@ -130,7 +130,7 @@ class CUDNNConvFusionOpKernel : public framework::OpKernel<T> {
        default:
          PADDLE_THROW(platform::errors::PermissionDenied(
              "Operator Conv2DFusion expects Input to be a 4-D or 5-D Tensor. "
-              "But recieved the actual dimension = %d, shape = [%s].",
+              "But received the actual dimension = %d, shape = [%s].",
              rank, transformed_input_channel.dims()));
      }

@@ -355,7 +355,7 @@ class CUDNNConvFusionOpKernel : public framework::OpKernel<T> {
        workspace_size_in_bytes, workspace_size_limit,
        platform::errors::InvalidArgument(
            "The actual workspace size to be allocated for cuDNN is expected "
-            "to be less than the limit. But recieved: the actual workspace "
+            "to be less than the limit. But received: the actual workspace "
            "size = %d, limit = %d.",
            workspace_size_in_bytes, workspace_size_limit));

@@ -414,7 +414,7 @@ class CUDNNConvFusionOpKernel : public framework::OpKernel<T> {
      } else {
        // TODO(qingiqng): do copy when batch size large than 1
        PADDLE_THROW(platform::errors::Unimplemented(
-            "Input with batch size greater than 1 is unsupported. The recieved "
+            "Input with batch size greater than 1 is unsupported. The received "
            "batch size is %d, Input's shape is [%s].",
            x_dims[0], phi::make_ddim(x_dims)));
      }

--- a/paddle/fluid/operators/fused/cudnn_bn_stats_finalize.cu.h
+++ b/paddle/fluid/operators/fused/cudnn_bn_stats_finalize.cu.h
@@ -40,7 +40,7 @@ struct BNStatsFinalizeArgs {
    PADDLE_ENFORCE_EQ(
        param_shape.size(), 4U,
        platform::errors::InvalidArgument(
-            "The size of param_shape is expected to 4. But recieved "
+            "The size of param_shape is expected to 4. But received "
            "param_shape's size is %d, param_shape is [%s].",
            param_shape.size(), phi::make_ddim(param_shape)));


--- a/paddle/fluid/operators/fused/cudnn_norm_conv.cu.h
+++ b/paddle/fluid/operators/fused/cudnn_norm_conv.cu.h
@@ -45,13 +45,13 @@ struct NormConvolutionArgs {
    PADDLE_ENFORCE_EQ(
        input_shape.size(), 4U,
        platform::errors::InvalidArgument(
-            "The size of input_shape is expected to 4. But recieved "
+            "The size of input_shape is expected to 4. But received "
            "input_shape's size is %d, input_shape is [%s].",
            input_shape.size(), phi::make_ddim(input_shape)));
    PADDLE_ENFORCE_EQ(
        filter_shape.size(), 4U,
        platform::errors::InvalidArgument(
-            "The size of filter_shape is expected to 4. But recieved "
+            "The size of filter_shape is expected to 4. But received "
            "filter_shape's size is %d, filter_shape is [%s].",
            filter_shape.size(), phi::make_ddim(filter_shape)));
    PADDLE_ENFORCE_EQ(filter_shape[1] == filter_shape[2] &&
@@ -59,20 +59,20 @@ struct NormConvolutionArgs {
                      true,
                      platform::errors::InvalidArgument(
                          "The filter_shape is expected to store as nhwc, and "
-                          "h = w = 1 or 3. But recieved filter_shape is [%s].",
+                          "h = w = 1 or 3. But received filter_shape is [%s].",
                          phi::make_ddim(filter_shape)));
    PADDLE_ENFORCE_EQ((filter_shape[0] % 32 == 0 && filter_shape[3] % 8 == 0),
                      true,
                      platform::errors::InvalidArgument(
                          "The input channel is expected to be multiple of 8, "
                          "and the output channel is expected to be multiple "
-                          "of 32. But recieved input channel is %d, output "
+                          "of 32. But received input channel is %d, output "
                          "channel is %d.",
                          filter_shape[3], filter_shape[0]));
    PADDLE_ENFORCE_EQ(
        output_shape.size(), 4U,
        platform::errors::InvalidArgument(
-            "The size of output_shape is expected to 4. But recieved "
+            "The size of output_shape is expected to 4. But received "
            "filter_shape's size is %d, filter_shape is [%s].",
            output_shape.size(), phi::make_ddim(output_shape)));
    is_support = IsSupport(ctx, filter_shape, stride, dilation, group);
@@ -83,7 +83,7 @@ struct NormConvolutionArgs {
            "compatiblity greater than or equal to 70 and the kernel size "
            "must be equal to 1 or 3. When the kernel size is 1, "
            "the stride must be 1 if the compatiblity is equal to 70. "
-            "Besides, the dilation and group must be equal to 1. But recieved "
+            "Besides, the dilation and group must be equal to 1. But received "
            "compatiblity is %d, kernel size is %d, stride is %d, "
            "dilation is %d, group is %d",
            ctx.GetComputeCapability(), filter_shape[1], stride, dilation,

--- a/paddle/fluid/operators/fused/cudnn_scale_bias_add_relu.cu.h
+++ b/paddle/fluid/operators/fused/cudnn_scale_bias_add_relu.cu.h
@@ -43,19 +43,19 @@ struct ScaleBiasAddReluArgs {
    PADDLE_ENFORCE_EQ(
        data_shape.size(), 4U,
        platform::errors::InvalidArgument(
-            "The size of data_shape is expected to 4. But recieved "
+            "The size of data_shape is expected to 4. But received "
            "data_shape's size is %d, data_shape is [%s].",
            data_shape.size(), phi::make_ddim(data_shape)));
    PADDLE_ENFORCE_EQ(
        param_shape.size(), 4U,
        platform::errors::InvalidArgument(
-            "The size of param_shape is expected to 4. But recieved "
+            "The size of param_shape is expected to 4. But received "
            "param_shape's size is %d, param_shape is [%s].",
            param_shape.size(), phi::make_ddim(param_shape)));
    PADDLE_ENFORCE_EQ(
        bitmask_shape.size(), 3U,
        platform::errors::InvalidArgument(
-            "The size of bitmask_shape is expected to 3. But recieved "
+            "The size of bitmask_shape is expected to 3. But received "
            "bitmask_shape's size is %d, bitmask_shape is [%s].",
            bitmask_shape.size(), phi::make_ddim(bitmask_shape)));


--- a/paddle/fluid/operators/fused/fused_fc_elementwise_layernorm_op.cc
+++ b/paddle/fluid/operators/fused/fused_fc_elementwise_layernorm_op.cc
@@ -76,7 +76,7 @@ class FusedFCElementwiseLayerNormOp : public framework::OperatorWithKernel {
        platform::errors::InvalidArgument(
            "The attribute x_num_col_dims used to flatten input X to "
            "a 2-D tensor, is expected to be less than the number of "
-            "input X's dimensions. But recieved x_num_col_dims is %d, "
+            "input X's dimensions. But received x_num_col_dims is %d, "
            "the number of input X's dimensions is %d, input X's shape is %s.",
            x_num_col_dims, x_dims.size(), x_dims));

@@ -85,7 +85,7 @@ class FusedFCElementwiseLayerNormOp : public framework::OperatorWithKernel {
        x_mat_dims[1], w_dims[0],
        platform::errors::InvalidArgument(
            "The input's second dimension and weight's first dimension is "
-            "expected to be the same. But recieved input's second dimension is "
+            "expected to be the same. But received input's second dimension is "
            "%d, input's shape is %s; weight's first dimension is %d, weight's "
            "shape is %s.",
            x_mat_dims[1], x_mat_dims, w_dims[0], w_dims));
@@ -100,7 +100,7 @@ class FusedFCElementwiseLayerNormOp : public framework::OperatorWithKernel {
    PADDLE_ENFORCE_EQ(phi::make_ddim(fc_out_dims), y_dims,
                      platform::errors::InvalidArgument(
                          "The output's shape of fc is expected to be equal to "
-                          "that of input Y. But recieved output's shape of fc "
+                          "that of input Y. But received output's shape of fc "
                          "is %s, input Y's shape is %s.",
                          phi::make_ddim(fc_out_dims), y_dims));

@@ -110,7 +110,7 @@ class FusedFCElementwiseLayerNormOp : public framework::OperatorWithKernel {
        platform::errors::InvalidArgument(
            "The attribute begin_norm_axis used to flatten input Y to a 2-D "
            "tensor, is expected to be less than the number of input Y's "
-            "dimensions. But recieved begin_norm_axis is %d, the number of "
+            "dimensions. But received begin_norm_axis is %d, the number of "
            "input Y's dimensions is %d, input Y's shape is %s.",
            begin_norm_axis, y_dims.size(), y_dims));

@@ -122,7 +122,7 @@ class FusedFCElementwiseLayerNormOp : public framework::OperatorWithKernel {
      PADDLE_ENFORCE_EQ(scale_dims.size(), 1,
                        platform::errors::InvalidArgument(
                            "The input Scale is expected to be an 1-D tensor. "
-                            "But recieved the number of input Scale's "
+                            "But received the number of input Scale's "
                            "dimensions is %d, input Scale's shape is %s.",
                            scale_dims.size(), scale_dims));

@@ -132,7 +132,7 @@ class FusedFCElementwiseLayerNormOp : public framework::OperatorWithKernel {
            platform::errors::InvalidArgument(
                "The first dimension of input Scale is expected to be equal to "
                "the second dimension of input Y after flattened. "
-                "But recieved the first dimension of input Scale is %d, input "
+                "But received the first dimension of input Scale is %d, input "
                "Scale's shape is %s; the second dimension of flattened input "
                "Y is %d, input Y's shape is %s, flattened axis is %d.",
                scale_dims[0], scale_dims, dim_1, y_dims, begin_norm_axis));
@@ -144,7 +144,7 @@ class FusedFCElementwiseLayerNormOp : public framework::OperatorWithKernel {
          bias1_dims.size(), 1,
          platform::errors::InvalidArgument(
              "The input Bias1 is expected to be an 1-D tensor. "
-              "But recieved the number of input Bias1's dimension is %d, "
+              "But received the number of input Bias1's dimension is %d, "
              "input Bias1's shape is %s.",
              bias1_dims.size(), bias1_dims));

@@ -154,7 +154,7 @@ class FusedFCElementwiseLayerNormOp : public framework::OperatorWithKernel {
            platform::errors::InvalidArgument(
                "The first dimension of input Bias1 is expected to be equal to "
                "the second dimension of input Y after flattened. "
-                "But recieved the first dimension of input Bias1 is %d, input "
+                "But received the first dimension of input Bias1 is %d, input "
                "Bias1's shape is %s; the second dimension of flatten input "
                "Y is %d, input Y's shape is %s, flattened axis is %d.",
                bias1_dims[0], bias1_dims, dim_1, y_dims, begin_norm_axis));

--- a/paddle/fluid/operators/fused/fusion_group_op.cc
+++ b/paddle/fluid/operators/fused/fusion_group_op.cc
@@ -52,7 +52,7 @@ class FusionGroupOp : public framework::OperatorWithKernel {
            x_dims[0], x_dims[i],
            platform::errors::InvalidArgument(
                "All the inputs' dims is expected to be the same. "
-                "But recieved [%s] (name: %s) vs [%s] (name: %s).",
+                "But received [%s] (name: %s) vs [%s] (name: %s).",
                x_dims[0], input_names[0], x_dims[i], input_names[i]));
      }
      std::vector<framework::DDim> out_dims;

--- a/paddle/fluid/operators/inverse_op.cc
+++ b/paddle/fluid/operators/inverse_op.cc
@@ -33,21 +33,21 @@ class InverseOp : public framework::OperatorWithKernel {
        input_rank, 2,
        platform::errors::InvalidArgument(
            "The dimension of Input(Input) is expected to be no less than 2. "
-            "But recieved: Input(Input)'s dimension = %d, shape = [%s].",
+            "But received: Input(Input)'s dimension = %d, shape = [%s].",
            input_rank, input_dims));
    for (int64_t i = 0; i < input_rank; ++i) {
      PADDLE_ENFORCE_EQ(
          (input_dims[i] == -1) || (input_dims[i] > 0), true,
          platform::errors::InvalidArgument(
              "Each dimension of input tensor is expected to be -1 or a "
-              "positive number, but recieved %d. Input's shape is [%s].",
+              "positive number, but received %d. Input's shape is [%s].",
              input_dims[i], input_dims));
    }
    if (input_dims[input_rank - 2] > 0 && input_dims[input_rank - 1] > 0) {
      PADDLE_ENFORCE_EQ(input_dims[input_rank - 2], input_dims[input_rank - 1],
                        platform::errors::InvalidArgument(
                            "The last two dimensions are expected to be equal. "
-                            "But recieved: %d and %d; "
+                            "But received: %d and %d; "
                            "Input(Input)'s shape = [%s].",
                            input_dims[input_rank - 2],
                            input_dims[input_rank - 1], input_dims));

--- a/paddle/fluid/operators/lod_reset_op.h
+++ b/paddle/fluid/operators/lod_reset_op.h
@@ -77,7 +77,7 @@ class LoDResetKernel : public framework::OpKernel<T> {
        platform::errors::InvalidArgument(
            "The last value of 'Target LoD''s last level LoD should be equal "
            "to the first dimension of Input(X). But received the 'Target LoD' "
-            "is %s, Input(X)'s shape is is %s.",
+            "is %s, Input(X)'s shape is %s.",
            phi::make_ddim(level0), in->dims()));
    for (size_t i = 0; i < level0.size() - 1; ++i) {
      PADDLE_ENFORCE_GE(level0[i + 1], level0[i],

--- a/paddle/fluid/operators/math/cross_entropy.h
+++ b/paddle/fluid/operators/math/cross_entropy.h
@@ -38,7 +38,7 @@ struct TolerableValue {
 // NOTE(dzh): float16 value clip behave different.
 // 1. Our ValueClipping has a  hardcore threshold 1e20
 // for float number. 1e20 will resulting in overflow in float16.
-// 2. float16 should expose the the real number overflow to python.
+// 2. float16 should expose the real number overflow to python.
 // because mixed-training depends the inf/nan value to determine
 // if the scale value will be adjusted.
 // Also. In standard implementation of cross entropy, other

--- a/paddle/fluid/operators/math/selected_rows_functor.cc
+++ b/paddle/fluid/operators/math/selected_rows_functor.cc
@@ -32,7 +32,7 @@ struct SelectedRowsAdd<platform::CPUDeviceContext, T> {
    PADDLE_ENFORCE_EQ(
        in1_height, input2.height(),
        platform::errors::InvalidArgument("The two inputs height must be equal."
-                                          "But recieved first input height  = "
+                                          "But received first input height  = "
                                          "[%d], second input height = [%d]",
                                          in1_height, input2.height()));
    output->set_height(in1_height);
@@ -56,27 +56,27 @@ struct SelectedRowsAdd<platform::CPUDeviceContext, T> {
        in1_row_numel, in2_value.numel() / in2_rows.size(),
        platform::errors::InvalidArgument(
            "The two inputs width must be equal."
-            "But recieved first input width = [%d], second input width = [%d]",
+            "But received first input width = [%d], second input width = [%d]",
            in1_row_numel, in2_value.numel() / in2_rows.size()));
    PADDLE_ENFORCE_EQ(
        in1_row_numel, out_value->numel() / out_rows.size(),
        platform::errors::InvalidArgument(
            "The input and oupput width must be equal."
-            "But recieved input width = [%d], output width = [%d]",
+            "But received input width = [%d], output width = [%d]",
            in1_row_numel, out_value->numel() / out_rows.size()));

    auto in1_place = input1.place();
    PADDLE_ENFORCE_EQ(platform::is_cpu_place(in1_place), true,
                      platform::errors::InvalidArgument(
-                          "The running enviroment is not on the CPU place."));
+                          "The running environment is not on the CPU place."));
    auto in2_place = input2.place();
    PADDLE_ENFORCE_EQ(platform::is_cpu_place(in2_place), true,
                      platform::errors::InvalidArgument(
-                          "The running enviroment is not on the CPU place."));
+                          "The running environment is not on the CPU place."));
    auto out_place = context.GetPlace();
    PADDLE_ENFORCE_EQ(platform::is_cpu_place(out_place), true,
                      platform::errors::InvalidArgument(
-                          "The running enviroment is not on the CPU place."));
+                          "The running environment is not on the CPU place."));

    auto* out_data = out_value->data<T>();
    auto* in1_data = in1_value.data<T>();
@@ -103,14 +103,14 @@ struct SelectedRowsAddTensor<platform::CPUDeviceContext, T> {
    PADDLE_ENFORCE_EQ(
        in1_height, in2_dims[0],
        platform::errors::InvalidArgument("The two inputs height must be equal."
-                                          "But recieved first input height = "
+                                          "But received first input height = "
                                          "[%d], second input height = [%d]",
                                          in1_height, in2_dims[0]));
    PADDLE_ENFORCE_EQ(
        in1_height, out_dims[0],
        platform::errors::InvalidArgument(
            "The input and output height must be equal."
-            "But recieved input height = [%d], output height = [%d]",
+            "But received input height = [%d], output height = [%d]",
            in1_height, out_dims[0]));

    auto& in1_value = input1.value();
@@ -121,13 +121,13 @@ struct SelectedRowsAddTensor<platform::CPUDeviceContext, T> {
        in1_row_numel, input2.numel() / in1_height,
        platform::errors::InvalidArgument(
            "The two inputs width must be equal."
-            "But recieved first input width = [%d], second input width = [%d]",
+            "But received first input width = [%d], second input width = [%d]",
            in1_row_numel, input2.numel() / in1_height));
    PADDLE_ENFORCE_EQ(
        in1_row_numel, output->numel() / in1_height,
        platform::errors::InvalidArgument(
            "The input and output width must be equal."
-            "But recieved input width = [%d], output width = [%d]",
+            "But received input width = [%d], output width = [%d]",
            in1_row_numel, output->numel() / in1_height));

    phi::funcs::SetConstant<platform::CPUDeviceContext, T> functor;
@@ -161,7 +161,7 @@ struct SelectedRowsAddTo<platform::CPUDeviceContext, T> {
    PADDLE_ENFORCE_EQ(
        in1_height, input2->height(),
        platform::errors::InvalidArgument("The two inputs height must be equal."
-                                          "But recieved first input height = "
+                                          "But received first input height = "
                                          "[%d], second input height = [%d]",
                                          in1_height, input2->height()));

@@ -178,11 +178,11 @@ struct SelectedRowsAddTo<platform::CPUDeviceContext, T> {
    auto in1_place = input1.place();
    PADDLE_ENFORCE_EQ(platform::is_cpu_place(in1_place), true,
                      platform::errors::InvalidArgument(
-                          "The running enviroment is not on the CPU place."));
+                          "The running environment is not on the CPU place."));
    auto in2_place = input2->place();
    PADDLE_ENFORCE_EQ(platform::is_cpu_place(in2_place), true,
                      platform::errors::InvalidArgument(
-                          "The running enviroment is not on the CPU place."));
+                          "The running environment is not on the CPU place."));

    auto* in1_data = in1_value.data<T>();
    auto* in2_data = in2_value->data<T>();
@@ -211,7 +211,7 @@ struct SelectedRowsSumTo<platform::CPUDeviceContext, T> {
      PADDLE_ENFORCE_EQ(in1_height, input2->height(),
                        platform::errors::InvalidArgument(
                            "The two inputs height must be equal."
-                            "But recieved first input height = [%d], second "
+                            "But received first input height = [%d], second "
                            "input height = [%d]",
                            in1_height, input2->height()));
    }
@@ -253,7 +253,7 @@ struct SelectedRowsAddToTensor<platform::CPUDeviceContext, T> {
    PADDLE_ENFORCE_EQ(
        in1_height, in2_dims[0],
        platform::errors::InvalidArgument("The two inputs height must be equal."
-                                          "But recieved first input height = "
+                                          "But received first input height = "
                                          "[%d], second input height = [%d]",
                                          in1_height, in2_dims[0]));

@@ -265,7 +265,7 @@ struct SelectedRowsAddToTensor<platform::CPUDeviceContext, T> {
        in1_row_numel, input2->numel() / in1_height,
        platform::errors::InvalidArgument(
            "The two inputs width must be equal."
-            "But recieved first input width = [%d], second input width = [%d]",
+            "But received first input width = [%d], second input width = [%d]",
            in1_row_numel, input2->numel() / in1_height));

    auto* in1_data = in1_value.data<T>();
@@ -293,7 +293,7 @@ struct SelectedRowsAddToTensor<phi::CPUContext, T> {
    PADDLE_ENFORCE_EQ(
        in1_height, in2_dims[0],
        platform::errors::InvalidArgument("The two inputs height must be equal."
-                                          "But recieved first input height = "
+                                          "But received first input height = "
                                          "[%d], second input height = [%d]",
                                          in1_height, in2_dims[0]));

@@ -305,7 +305,7 @@ struct SelectedRowsAddToTensor<phi::CPUContext, T> {
        in1_row_numel, input2->numel() / in1_height,
        platform::errors::InvalidArgument(
            "The two inputs width must be equal."
-            "But recieved first input width = [%d], second input width = [%d]",
+            "But received first input width = [%d], second input width = [%d]",
            in1_row_numel, input2->numel() / in1_height));

    auto* in1_data = in1_value.data<T>();
@@ -842,7 +842,7 @@ struct UpdateToTensor<platform::CPUDeviceContext, T> {
    PADDLE_ENFORCE_EQ(
        in1_height, in2_dims[0],
        platform::errors::InvalidArgument("The two inputs height must be equal."
-                                          "But recieved first input height = "
+                                          "But received first input height = "
                                          "[%d], second input height = [%d]",
                                          in1_height, in2_dims[0]));

@@ -854,7 +854,7 @@ struct UpdateToTensor<platform::CPUDeviceContext, T> {
        in1_row_numel, input2->numel() / in1_height,
        platform::errors::InvalidArgument(
            "The two inputs width must be equal."
-            "But recieved first input width = [%d], second input width = [%d]",
+            "But received first input width = [%d], second input width = [%d]",
            in1_row_numel, input2->numel() / in1_height));

    auto* in1_data = in1_value.data<T>();

--- a/paddle/fluid/operators/math/selected_rows_functor.cu
+++ b/paddle/fluid/operators/math/selected_rows_functor.cu
@@ -33,7 +33,7 @@ struct SelectedRowsAdd<platform::CUDADeviceContext, T> {
    PADDLE_ENFORCE_EQ(
        in1_height, input2.height(),
        platform::errors::InvalidArgument("The two inputs height must be equal."
-                                          "But recieved first input height  = "
+                                          "But received first input height  = "
                                          "[%d], second input height = [%d]",
                                          in1_height, input2.height()));
    output->set_height(in1_height);
@@ -57,13 +57,13 @@ struct SelectedRowsAdd<platform::CUDADeviceContext, T> {
        in1_row_numel, in2_value.numel() / in2_rows.size(),
        platform::errors::InvalidArgument(
            "The two inputs width must be equal."
-            "But recieved first input width = [%d], second input width = [%d]",
+            "But received first input width = [%d], second input width = [%d]",
            in1_row_numel, in2_value.numel() / in2_rows.size()));
    PADDLE_ENFORCE_EQ(
        in1_row_numel, out_value->numel() / out_rows.size(),
        platform::errors::InvalidArgument(
            "The input and oupput width must be equal."
-            "But recieved input width = [%d], output width = [%d]",
+            "But received input width = [%d], output width = [%d]",
            in1_row_numel, out_value->numel() / out_rows.size()));

    auto* out_data = out_value->data<T>();
@@ -72,15 +72,15 @@ struct SelectedRowsAdd<platform::CUDADeviceContext, T> {
    auto in1_place = input1.place();
    PADDLE_ENFORCE_EQ(platform::is_gpu_place(in1_place), true,
                      platform::errors::InvalidArgument(
-                          "The running enviroment is not on the GPU place."));
+                          "The running environment is not on the GPU place."));
    auto in2_place = input2.place();
    PADDLE_ENFORCE_EQ(platform::is_gpu_place(in2_place), true,
                      platform::errors::InvalidArgument(
-                          "The running enviroment is not on the GPU place."));
+                          "The running environment is not on the GPU place."));
    auto out_place = context.GetPlace();
    PADDLE_ENFORCE_EQ(platform::is_gpu_place(out_place), true,
                      platform::errors::InvalidArgument(
-                          "The running enviroment is not on the GPU place."));
+                          "The running environment is not on the GPU place."));

    memory::Copy(out_place, out_data, in1_place, in1_data,
                 in1_value.numel() * sizeof(T), context.stream());
@@ -126,13 +126,13 @@ struct SelectedRowsAddTensor<platform::CUDADeviceContext, T> {
        in1_height, in2_dims[0],
        platform::errors::InvalidArgument(
            "The two inputs height must be equal."
-            "But recieved first input height = [%d], first input height = [%d]",
+            "But received first input height = [%d], first input height = [%d]",
            in1_height, in2_dims[0]));
    PADDLE_ENFORCE_EQ(
        in1_height, out_dims[0],
        platform::errors::InvalidArgument(
            "The input and output height must be equal."
-            "But recieved input height = [%d], output height = [%d]",
+            "But received input height = [%d], output height = [%d]",
            in1_height, out_dims[0]));

    auto& in1_value = input1.value();
@@ -143,13 +143,13 @@ struct SelectedRowsAddTensor<platform::CUDADeviceContext, T> {
        in1_row_numel, input2.numel() / in1_height,
        platform::errors::InvalidArgument(
            "The two inputs width must be equal."
-            "But recieved first input width = [%d], second input width = [%d]",
+            "But received first input width = [%d], second input width = [%d]",
            in1_row_numel, input2.numel() / in1_height));
    PADDLE_ENFORCE_EQ(
        in1_row_numel, output->numel() / in1_height,
        platform::errors::InvalidArgument(
            "The input and output width must be equal."
-            "But recieved input width = [%d], output width = [%d]",
+            "But received input width = [%d], output width = [%d]",
            in1_row_numel, output->numel() / in1_height));

    auto* in1_data = in1_value.data<T>();
@@ -186,13 +186,13 @@ struct SelectedRowsAddTensor<phi::GPUContext, T> {
        in1_height, in2_dims[0],
        platform::errors::InvalidArgument(
            "The two inputs height must be equal."
-            "But recieved first input height = [%d], first input height = [%d]",
+            "But received first input height = [%d], first input height = [%d]",
            in1_height, in2_dims[0]));
    PADDLE_ENFORCE_EQ(
        in1_height, out_dims[0],
        platform::errors::InvalidArgument(
            "The input and output height must be equal."
-            "But recieved input height = [%d], output height = [%d]",
+            "But received input height = [%d], output height = [%d]",
            in1_height, out_dims[0]));

    auto& in1_value = input1.value();
@@ -203,13 +203,13 @@ struct SelectedRowsAddTensor<phi::GPUContext, T> {
        in1_row_numel, input2.numel() / in1_height,
        platform::errors::InvalidArgument(
            "The two inputs width must be equal."
-            "But recieved first input width = [%d], second input width = [%d]",
+            "But received first input width = [%d], second input width = [%d]",
            in1_row_numel, input2.numel() / in1_height));
    PADDLE_ENFORCE_EQ(
        in1_row_numel, output->numel() / in1_height,
        platform::errors::InvalidArgument(
            "The input and output width must be equal."
-            "But recieved input width = [%d], output width = [%d]",
+            "But received input width = [%d], output width = [%d]",
            in1_row_numel, output->numel() / in1_height));

    auto* in1_data = in1_value.data<T>();
@@ -254,7 +254,7 @@ struct SelectedRowsAddTo<platform::CUDADeviceContext, T> {
    PADDLE_ENFORCE_EQ(
        in1_height, input2->height(),
        platform::errors::InvalidArgument("The two inputs height must be equal."
-                                          "But recieved first input height = "
+                                          "But received first input height = "
                                          "[%d], second input height = [%d]",
                                          in1_height, input2->height()));

@@ -273,11 +273,11 @@ struct SelectedRowsAddTo<platform::CUDADeviceContext, T> {
    auto in1_place = input1.place();
    PADDLE_ENFORCE_EQ(platform::is_gpu_place(in1_place), true,
                      platform::errors::InvalidArgument(
-                          "The running enviroment is not on the GPU place."));
+                          "The running environment is not on the GPU place."));
    auto in2_place = input2->place();
    PADDLE_ENFORCE_EQ(platform::is_gpu_place(in1_place), true,
                      platform::errors::InvalidArgument(
-                          "The running enviroment is not on the GPU place."));
+                          "The running environment is not on the GPU place."));

    auto* in1_data = in1_value.data<T>();
    auto* in2_data = in2_value->data<T>();
@@ -322,7 +322,7 @@ struct SelectedRowsAddToTensor<platform::CUDADeviceContext, T> {
    PADDLE_ENFORCE_EQ(
        in1_height, in2_dims[0],
        platform::errors::InvalidArgument("The two inputs height must be equal."
-                                          "But recieved first input height = "
+                                          "But received first input height = "
                                          "[%d], second input height = [%d]",
                                          in1_height, in2_dims[0]));

@@ -334,7 +334,7 @@ struct SelectedRowsAddToTensor<platform::CUDADeviceContext, T> {
        in1_row_numel, input2->numel() / in1_height,
        platform::errors::InvalidArgument(
            "The two inputs width must be equal."
-            "But recieved first input width = [%d], second input width = [%d]",
+            "But received first input width = [%d], second input width = [%d]",
            in1_row_numel, input2->numel() / in1_height));

    auto* in1_data = in1_value.data<T>();
@@ -359,7 +359,7 @@ struct SelectedRowsAddToTensor<phi::GPUContext, T> {
    PADDLE_ENFORCE_EQ(
        in1_height, in2_dims[0],
        platform::errors::InvalidArgument("The two inputs height must be equal."
-                                          "But recieved first input height = "
+                                          "But received first input height = "
                                          "[%d], second input height = [%d]",
                                          in1_height, in2_dims[0]));

@@ -371,7 +371,7 @@ struct SelectedRowsAddToTensor<phi::GPUContext, T> {
        in1_row_numel, input2->numel() / in1_height,
        platform::errors::InvalidArgument(
            "The two inputs width must be equal."
-            "But recieved first input width = [%d], second input width = [%d]",
+            "But received first input width = [%d], second input width = [%d]",
            in1_row_numel, input2->numel() / in1_height));

    auto* in1_data = in1_value.data<T>();
@@ -675,7 +675,7 @@ struct UpdateToTensor<platform::CUDADeviceContext, T> {
    PADDLE_ENFORCE_EQ(
        in1_height, in2_dims[0],
        platform::errors::InvalidArgument("The two inputs height must be equal."
-                                          "But recieved first input height = "
+                                          "But received first input height = "
                                          "[%d], second input height = [%d]",
                                          in1_height, in2_dims[0]));

@@ -687,7 +687,7 @@ struct UpdateToTensor<platform::CUDADeviceContext, T> {
        in1_row_numel, input2->numel() / in1_height,
        platform::errors::InvalidArgument(
            "The two inputs width must be equal."
-            "But recieved first input width = [%d], second input width = [%d]",
+            "But received first input width = [%d], second input width = [%d]",
            in1_row_numel, input2->numel() / in1_height));

    auto* in1_data = in1_value.template data<T>();

--- a/paddle/fluid/operators/metrics/accuracy_op.cc
+++ b/paddle/fluid/operators/metrics/accuracy_op.cc
@@ -36,7 +36,7 @@ class AccuracyOpMaker : public framework::OpProtoAndCheckerMaker {
  void Make() override {
    // TODO(typhoonzero): support both inference value and indices.
    AddInput("Out", "The network output of topk (inferences)");
-    AddInput("Indices", "The the network output of topk (indices)");
+    AddInput("Indices", "The network output of topk (indices)");
    AddInput("Label", "Label of the training data");
    // TODO(typhoonzero): AddInput("Weight", ...
    AddOutput("Accuracy", "The accuracy of current batch");

--- a/paddle/fluid/operators/mlu/mlu_baseop.cc
+++ b/paddle/fluid/operators/mlu/mlu_baseop.cc
@@ -698,14 +698,14 @@ MLUCnnlTrigonDesc::~MLUCnnlTrigonDesc() {
                                  const bool exclusive, const bool reverse,
                                  const cnnlTensorDescriptor_t input_desc,
                                  const void* input,
-                                  const cnnlTensorDescriptor_t ouput_desc,
+                                  const cnnlTensorDescriptor_t output_desc,
                                  void* output) {
  cnnlHandle_t handle = GetHandleFromCTX(ctx);

  // NAN propagation mode: Only support CNNL_NOT_PROPAGATE_NAN now.
  cnnlNanPropagation_t mode = CNNL_NOT_PROPAGATE_NAN;
  PADDLE_ENFORCE_MLU_SUCCESS(cnnlCumsum(handle, input_desc, input, axis,
-                                        exclusive, reverse, mode, ouput_desc,
+                                        exclusive, reverse, mode, output_desc,
                                        output));
 }


--- a/paddle/fluid/operators/positive_negative_pair_op.cc
+++ b/paddle/fluid/operators/positive_negative_pair_op.cc
@@ -123,7 +123,7 @@ class PositiveNegativePairOp : public framework::OperatorWithKernel {
          column, depth,
          platform::errors::OutOfRange(
              "Attr(column) should be less than depth(the second "
-              "dimension of Input(Score)). Recieved Attr(column): %d, while "
+              "dimension of Input(Score)). Received Attr(column): %d, while "
              "depth is %d.",
              column, depth));
      PADDLE_ENFORCE_GE(
@@ -131,7 +131,7 @@ class PositiveNegativePairOp : public framework::OperatorWithKernel {
          platform::errors::OutOfRange(
              "Attr(column) should be greater than equal to negative "
              "depth, i.e. the second dimension of Input(Score). "
-              "Recieved Attr(column): %d, while negative depth is %d.",
+              "Received Attr(column): %d, while negative depth is %d.",
              column, -depth));
    }


--- a/paddle/fluid/operators/prune_gate_by_capacity_op.cu
+++ b/paddle/fluid/operators/prune_gate_by_capacity_op.cu
@@ -98,7 +98,7 @@ static void VisitDataType(paddle::experimental::DataType type,
    visitor.template apply<int64_t>();
  } else {
    PADDLE_THROW(platform::errors::InvalidArgument(
-        "The recieved values gate_id type %s can not meet input requirements. "
+        "The received values gate_id type %s can not meet input requirements. "
        "Because the given gate_id data type of operators must be "
        "int64. Please input appropriate gate_id again! ",
        "framework::DataTypeToString(type)"));

--- a/paddle/fluid/operators/pscore/heter_listen_and_serv_op.cc
+++ b/paddle/fluid/operators/pscore/heter_listen_and_serv_op.cc
@@ -63,7 +63,7 @@ void HeterListenAndServOp::RunAsyncLoop(framework::ProgramDesc *program) const {
    PADDLE_ENFORCE_EQ(pieces.size(), 2,
                      platform::errors::PreconditionNotMet(
                          "Invalid format of message_and_id argument. "
-                          "Expected \"message:block_id\". Recieved %s",
+                          "Expected \"message:block_id\". Received %s",
                          grad_and_id.c_str()));
    PADDLE_ENFORCE_EQ(out_map->count(pieces[0]), 0,
                      platform::errors::AlreadyExists(
@@ -82,7 +82,7 @@ void HeterListenAndServOp::RunAsyncLoop(framework::ProgramDesc *program) const {
  PADDLE_ENFORCE_GE(num_blocks, 1,
                    platform::errors::PreconditionNotMet(
                        "Invalid number of blocks in server program. Expected "
-                        "equal or greater than 1. Recieved %zu",
+                        "equal or greater than 1. Received %zu",
                        num_blocks));
  std::vector<int> block_list;
  for (size_t blkid = 1; blkid < num_blocks; ++blkid) {

--- a/paddle/fluid/operators/rnn_op_xpu.cc
+++ b/paddle/fluid/operators/rnn_op_xpu.cc
@@ -65,7 +65,7 @@ class RnnXPUKernel : public framework::OpKernel<T> {
    auto* output = ctx.Output<Tensor>("Out");
    auto* dropout_mask = ctx.Output<Tensor>("DropoutState");
    auto* reserve_data = ctx.Output<Tensor>("Reserve");
-    // Attrbutes
+    // Attributes
    const int& num_layers = ctx.Attr<int>("num_layers");
    const bool& is_bidirec = ctx.Attr<bool>("is_bidirec");
    const int& hidden_size = ctx.Attr<int>("hidden_size");

--- a/paddle/fluid/operators/sample_logits_op.cc
+++ b/paddle/fluid/operators/sample_logits_op.cc
@@ -58,7 +58,7 @@ class SampleLogitsOpMaker : public framework::OpProtoAndCheckerMaker {
    AddOutput(
        "Probabilities",
        "(Tensor, default: Tensor<float>), A 2-D tensor with shape [N, NT + S]."
-        "The probabilites of sampled positive and negtive labels.")
+        "The probabilities of sampled positive and negtive labels.")
        .AsIntermediate();
    AddOutput("LogitsDim", "Store dim information of Logits for gradient op")
        .AsIntermediate();

--- a/paddle/fluid/operators/sigmoid_cross_entropy_with_logits_op_npu.cc
+++ b/paddle/fluid/operators/sigmoid_cross_entropy_with_logits_op_npu.cc
@@ -22,7 +22,7 @@ using Tensor = framework::Tensor;
 const int kIgnoreIndex = -100;

 void CheckAttrs(const framework::ExecutionContext& ctx) {
-  // Add this check is is due to Ascend SigmoidCrossEntropyWithLogits
+  // Add this check is due to Ascend SigmoidCrossEntropyWithLogits
  // and SigmoidCrossEntropyWithLogitsGrad does't supoort
  // attr normalize and ignore_index
  bool normalize = ctx.Attr<bool>("normalize");

--- a/paddle/fluid/operators/sum_op.cu
+++ b/paddle/fluid/operators/sum_op.cu
@@ -156,7 +156,7 @@ void SumToLoDTensor(const framework::ExecutionContext &context) {
    }
  }

-  // compute select rows seperately.
+  // compute select rows separately.
  if (!selectrow_index.empty()) {
    std::vector<const T *> sr_in_out_data;
    size_t rows = 0;
@@ -241,7 +241,7 @@ class SumKernel<platform::CUDADeviceContext, T>
      LodTensorArrayCompute<platform::CUDADeviceContext, T>(context);
    } else {
      PADDLE_THROW(platform::errors::InvalidArgument(
-          "Expected type of Ouput(out) must be Tensor,  SelectedRows or "
+          "Expected type of Output(out) must be Tensor,  SelectedRows or "
          "LodTensorArray. But got "
          "unsupport type: %s.",
          framework::ToTypeName(out_var->Type())));

--- a/paddle/fluid/operators/tdm_child_op.h
+++ b/paddle/fluid/operators/tdm_child_op.h
@@ -149,7 +149,7 @@ class TDMChildKernel : public framework::OpKernel<T> {
                          output_type == framework::proto::VarType::INT64;
    PADDLE_ENFORCE_EQ(out_type_match, true,
                      platform::errors::InvalidArgument(
-                          "Ouput(Child) & Output(LeafMask) holds the wrong "
+                          "Output(Child) & Output(LeafMask) holds the wrong "
                          "type, it holds %s, but "
                          "desires to be %s or %s",
                          paddle::framework::DataTypeToString(output_type),

--- a/paddle/fluid/operators/warpctc_op.cc
+++ b/paddle/fluid/operators/warpctc_op.cc
@@ -95,7 +95,7 @@ An operator integrating the open-source
 https://arxiv.org/pdf/1512.02595v1.pdf),
 to compute Connectionist Temporal Classification (CTC) loss.
 It can be aliased as softmax with ctc, since a native softmax activation is
-interated to the warp-ctc library, to to normalize values for each row of the
+interated to the warp-ctc library, to normalize values for each row of the
 input tensor.

 More detail of CTC loss can be found by referring to

--- a/paddle/fluid/platform/CMakeLists.txt
+++ b/paddle/fluid/platform/CMakeLists.txt
@@ -116,7 +116,7 @@ endif()

 cc_library(cudnn_workspace_helper SRCS cudnn_workspace_helper.cc DEPS boost)

-# seperate init from device_context to avoid cycle dependencies
+# separate init from device_context to avoid cycle dependencies
 cc_library(init SRCS init.cc DEPS device_context custom_kernel context_pool)

 # memcpy depends on device_context, here add deps individually for

--- a/paddle/fluid/platform/device/npu/npu_op_runner.h
+++ b/paddle/fluid/platform/device/npu/npu_op_runner.h
@@ -70,7 +70,7 @@ class NpuOpRunner {
  NpuOpRunner &AddInput(const Tensor &tensor);

  // NOTE(zhiqiu): CANN-5.0.2 support input tensors on host.
-  // Specifically, the tensor of shape, tensor of dims, etc, which are are small
+  // Specifically, the tensor of shape, tensor of dims, etc, which are small
  // vector/list.
  NpuOpRunner &AddInput(const Tensor &tensor, aclMemType mem_type);


--- a/paddle/fluid/platform/profiler/event_node.cc
+++ b/paddle/fluid/platform/profiler/event_node.cc
@@ -51,7 +51,7 @@ void NodeTrees::BuildTrees(
    const std::vector<HostTraceEventNode*>& host_event_nodes,
    std::vector<CudaRuntimeTraceEventNode*>& runtime_event_nodes,
    const std::vector<DeviceTraceEventNode*>& device_event_nodes) {
-  // seperate Host Event Nodes into different threads
+  // separate Host Event Nodes into different threads
  std::map<uint64_t, std::vector<HostTraceEventNode*>>
      thread2host_event_nodes;  // used to store HostTraceEventNodes per thread
  std::map<uint64_t, std::vector<CudaRuntimeTraceEventNode*>>

--- a/paddle/infrt/common/object.h
+++ b/paddle/infrt/common/object.h
@@ -25,7 +25,7 @@ template <typename T>
 class Shared;
 /**
 * Object is the basic element in the INFRT, with `Shared` wrapper, the object
- * can be shared accross the system.
+ * can be shared across the system.
 */
 struct Object {
  //! Get the type representation of this object.

--- a/paddle/phi/infermeta/binary.cc
+++ b/paddle/phi/infermeta/binary.cc
@@ -1534,7 +1534,7 @@ void MvInferMeta(const MetaTensor& x, const MetaTensor& vec, MetaTensor* out) {
                    phi::errors::InvalidArgument(
                        "X's second dimension is expected to be equal to "
                        "Vec's first dimension"
-                        "but recieved X'shape = [%s], Vec's shape = [%s]",
+                        "but received X'shape = [%s], Vec's shape = [%s]",
                        dim_x,
                        dim_vec));


--- a/paddle/phi/infermeta/multiary.cc
+++ b/paddle/phi/infermeta/multiary.cc
@@ -458,7 +458,7 @@ void BatchNormInferMeta(const MetaTensor& x,
        true,
        phi::errors::InvalidArgument(
            "Each dimension of input tensor is expected to be -1 or a "
-            "positive number, but recieved %d. Input's shape is [%s].",
+            "positive number, but received %d. Input's shape is [%s].",
            x_dims[i],
            x_dims));
  }
@@ -755,7 +755,7 @@ inline int ConvOutputSize(
      0,
      phi::errors::InvalidArgument(
          "The output's size is expected to be greater than 0. "
-          "But recieved: output's size is %d. The output's size is computed by "
+          "But received: output's size is %d. The output's size is computed by "
          "((input_size + 2 * padding - (dilation * (filter_size - 1) + 1)) / "
          "stride + 1), where input_size is %d, padding is %d, "
          "filter_size is %d, dilation is %d, stride is %d.",

--- a/paddle/phi/infermeta/unary.cc
+++ b/paddle/phi/infermeta/unary.cc
@@ -2746,7 +2746,7 @@ void UnfoldInferMeta(const MetaTensor& x,
      phi::errors::InvalidArgument(
          "The dims of X should be larger than that of kernel_sizes "
          "by a number of 2, due to the batch size and input channel dim. "
-          "But recieved dims(X:%u) - dims(kernel_sizes:%u) != 2",
+          "But received dims(X:%u) - dims(kernel_sizes:%u) != 2",
          in_dims.size(),
          kernel_sizes.size()));
  PADDLE_ENFORCE_EQ(
@@ -2754,7 +2754,7 @@ void UnfoldInferMeta(const MetaTensor& x,
      kernel_sizes.size(),
      phi::errors::InvalidArgument(
          "The dims of strides should be the same with that of kernel_sizes. "
-          "But recieved dims(strides: %u) != dims(kernel_sizes: %u).",
+          "But received dims(strides: %u) != dims(kernel_sizes: %u).",
          strides.size(),
          kernel_sizes.size()));
  PADDLE_ENFORCE_EQ(
@@ -2762,7 +2762,7 @@ void UnfoldInferMeta(const MetaTensor& x,
      2 * strides.size(),
      phi::errors::InvalidArgument(
          "The dims of paddings should be 2 times of that of strides. "
-          "But recieved dims(paddings: %u) != 2*dims(strides: %u).",
+          "But received dims(paddings: %u) != 2*dims(strides: %u).",
          paddings.size(),
          strides.size()));
  PADDLE_ENFORCE_EQ(
@@ -2770,7 +2770,7 @@ void UnfoldInferMeta(const MetaTensor& x,
      dilations.size(),
      phi::errors::InvalidArgument(
          "The dims of strides should be the same with that of dilations. "
-          "But recieved dims(strides: %u) != dims(dilations: %u).",
+          "But received dims(strides: %u) != dims(dilations: %u).",
          strides.size(),
          dilations.size()));

@@ -2779,14 +2779,14 @@ void UnfoldInferMeta(const MetaTensor& x,
                    0,
                    phi::errors::InvalidArgument(
                        "The `kernel_sizes` should be greater than zero, "
-                        "but recieved kernel_height: %d kernel_width: %d.",
+                        "but received kernel_height: %d kernel_width: %d.",
                        kernel_sizes[0],
                        kernel_sizes[1]));
  PADDLE_ENFORCE_GT(kernel_sizes[1],
                    0,
                    phi::errors::InvalidArgument(
                        "The `kernel_sizes` should be greater than zero, "
-                        "but recieved kernel_height: %d kernel_width: %d.",
+                        "but received kernel_height: %d kernel_width: %d.",
                        kernel_sizes[0],
                        kernel_sizes[1]));
  // check strides
@@ -2794,14 +2794,14 @@ void UnfoldInferMeta(const MetaTensor& x,
                    0,
                    phi::errors::InvalidArgument(
                        "The `strides` should be greater than zero, "
-                        "but recieved strides_height: %d strides_width: %d.",
+                        "but received strides_height: %d strides_width: %d.",
                        strides[0],
                        strides[1]));
  PADDLE_ENFORCE_GT(strides[1],
                    0,
                    phi::errors::InvalidArgument(
                        "The `strides` should be greater than zero, "
-                        "but recieved strides_height: %d strides_width: %d.",
+                        "but received strides_height: %d strides_width: %d.",
                        strides[0],
                        strides[1]));
  // check dilations
@@ -2810,7 +2810,7 @@ void UnfoldInferMeta(const MetaTensor& x,
      0,
      phi::errors::InvalidArgument(
          "The `dilations` should be greater than zero, "
-          "but recieved dilations_height: %d dilations_width: %d.",
+          "but received dilations_height: %d dilations_width: %d.",
          dilations[0],
          dilations[1]));
  PADDLE_ENFORCE_GT(
@@ -2818,7 +2818,7 @@ void UnfoldInferMeta(const MetaTensor& x,
      0,
      phi::errors::InvalidArgument(
          "The `dilations` should be greater than zero, "
-          "but recieved dilations_height: %d dilations_width: %d.",
+          "but received dilations_height: %d dilations_width: %d.",
          dilations[0],
          dilations[1]));


--- a/paddle/phi/kernels/cpu/conv_util.h
+++ b/paddle/phi/kernels/cpu/conv_util.h
@@ -38,7 +38,7 @@ inline void UpdatePaddingAndDilation(std::vector<T>* paddings,
        phi::errors::InvalidArgument(
            "Attribute padding's size should be the same or twice as the "
            "input's dimension. "
-            "But recieved: padding's size is %d, padding is [%s]; input's "
+            "But received: padding's size is %d, padding is [%s]; input's "
            "dimension is %d, input's shape is [%s].",
            paddings->size(),
            make_ddim(*paddings),

--- a/paddle/phi/kernels/cpu/rnn_kernel.cc
+++ b/paddle/phi/kernels/cpu/rnn_kernel.cc
@@ -808,7 +808,7 @@ struct BidirLayer : public Layer<T, CellType> {
                  mode,
                  is_test);

-    // concat the the output result
+    // concat the output result
    funcs::ConcatFunctor<CPUContext, T> concat_functor;
    concat_functor(dev_ctx, output_vec, static_cast<int>(2), output);
  }

--- a/paddle/phi/kernels/funcs/broadcast_function.h
+++ b/paddle/phi/kernels/funcs/broadcast_function.h
@@ -53,7 +53,7 @@ struct DimensionsTransform {
            PADDLE_THROW(phi::errors::InvalidArgument(
                "The %d-th dimension of input tensor is expected to be equal "
                "with the %d-th dimension of output tensor %d or 1, but "
-                "recieved %d.",
+                "received %d.",
                in_idx + 1,
                axis + 1,
                out_dims[axis],
@@ -70,7 +70,7 @@ struct DimensionsTransform {
            PADDLE_THROW(phi::errors::InvalidArgument(
                "The %d-th dimension of input tensor is expected to be equal "
                "with the %d-th dimension of output tensor %d or 1, but "
-                "recieved %d.",
+                "received %d.",
                in_idx + 1,
                in_idx + 1,
                out_dims[in_idx],
@@ -552,7 +552,7 @@ void BroadcastKernelForDifferentDimSize(
    default: {
      PADDLE_THROW(phi::errors::InvalidArgument(
          "The maximum dimension of input tensor is expected to be less than "
-          "%d, but recieved %d.",
+          "%d, but received %d.",
          merge_dims.dim_size,
          phi::DDim::kMaxRank));
    }
@@ -578,7 +578,7 @@ void BroadcastKernelForDifferentVecSize(
                    kArity,
                    phi::errors::InvalidArgument(
                        "The number of inputs is expected to be equal to the "
-                        "arity of functor. But recieved: the number of inputs "
+                        "arity of functor. But received: the number of inputs "
                        "is %d, the arity of functor is %d.",
                        ins.size(),
                        kArity));

--- a/paddle/phi/kernels/funcs/elementwise_base.h
+++ b/paddle/phi/kernels/funcs/elementwise_base.h
@@ -849,7 +849,7 @@ void ElementwiseKernel(const KPDevice &ctx,
                    kArity,
                    phi::errors::InvalidArgument(
                        "The number of inputs is expected to be equal to the "
-                        "arity of functor. But recieved: the number of inputs "
+                        "arity of functor. But received: the number of inputs "
                        "is %d, the arity of functor is %d.",
                        ins.size(),
                        kArity));

--- a/paddle/phi/kernels/gpu/bce_loss_kernel.cu
+++ b/paddle/phi/kernels/gpu/bce_loss_kernel.cu
@@ -38,7 +38,7 @@ struct BCELossFunctor {
  HOSTDEVICE inline T operator()(const T x, const T label) const {
    PADDLE_ENFORCE(
        (x >= static_cast<T>(0)) && (x <= one),
-        "Input is expected to be within the interval [0, 1], but recieved %f.",
+        "Input is expected to be within the interval [0, 1], but received %f.",
        x);
    T term1 = max(phi::kps::details::Log(x), neg_100);
    T term2 = max(phi::kps::details::Log(one - x), neg_100);

--- a/paddle/phi/kernels/impl/searchsorted_kernel_impl.h
+++ b/paddle/phi/kernels/impl/searchsorted_kernel_impl.h
@@ -158,7 +158,7 @@ static void VisitDataType(DataType type, Visitor visitor) {
    visitor.template apply<int64_t>();
  } else {
    PADDLE_THROW(errors::InvalidArgument(
-        "The recieved values data type %s can not meet input requirements. "
+        "The received values data type %s can not meet input requirements. "
        "Because the given values data type of searchsorted operators must be "
        "float32, float64, int32 or int64. Please input appropriate "
        "sorted_sequence again! ",

--- a/paddle/phi/ops/compat/scale_sig.cc
+++ b/paddle/phi/ops/compat/scale_sig.cc
@@ -30,7 +30,7 @@ namespace phi {
 * The infrt declare like:
 *
 * def PDKEL_Reshape_to_CPU : Pat<
- *     (PD_ReshapeOp $x, $shape_tensor， $shape_attr), // OpMaker arguements
+ *     (PD_ReshapeOp $x, $shape_tensor， $shape_attr), // OpMaker arguments
 *     (PDKEL_ReshapeKernelAttr $x, fn($shape_attr)>;  // Kernel arguments
 * def PDKEL_Reshape_to_CPU : Pat<
 *     (PD_ReshapeOp $x, $shape_tensor， $shape_attr),

--- a/python/paddle/README.rst
+++ b/python/paddle/README.rst
@@ -88,7 +88,7 @@ If you want to install paddlepaddle-gpu with cuda version of 9.0 ,10.0 ,10.1 ,or

 After the installation is complete, you can use `python` or `python3` to enter the Python interpreter and then use `import paddle.fluid` and `fluid.install_check.run_check()`

-If `Your Paddle Fluid is installed succesfully!` appears, to verify that the installation was successful.
+If `Your Paddle Fluid is installed successfully!` appears, to verify that the installation was successful.




--- a/python/paddle/distributed/auto_parallel/process_group.py
+++ b/python/paddle/distributed/auto_parallel/process_group.py
@@ -156,6 +156,6 @@ class ProcessGroup:


 # Note that Process group 0 is reserved for representing all ranks.
-# At the begining, group 0 is empty and new ranks will be added automatically. 
+# At the beginning, group 0 is empty and new ranks will be added automatically. 
 _g_process_group_map = {}
 _g_process_group_map[0] = ProcessGroup(0, [])
--- a/python/paddle/distributed/fleet/base/distributed_strategy.py
+++ b/python/paddle/distributed/fleet/base/distributed_strategy.py
@@ -1168,9 +1168,9 @@ class DistributedStrategy(object):

            dp_degree(int, optional): specific the number of data parallelism group; when dp_degree >= 2, it will introduce dp_degree ways data parallelism as the outer parallelsim for the inner parallelsim. User is responsible to ensure global_world_size = mp_degree * sharding_degree * pp_degree * dp_degree. Default is 1.

-            mp_degree(int, optional): [Hybrid parallelism ONLY] specific the the number of gpus within each megatron parallelism group; and megatron parallelism will turn be off if mp_degree=1.  Default is 1.
+            mp_degree(int, optional): [Hybrid parallelism ONLY] specific the number of gpus within each megatron parallelism group; and megatron parallelism will turn be off if mp_degree=1.  Default is 1.

-            pp_degree(int, optional): [Hybrid parallelism ONLY] specific the the number of gpus within each pipeline parallelism group; and pipeline parallelism will turn be off if pp_degree=1.  Default is 1.
+            pp_degree(int, optional): [Hybrid parallelism ONLY] specific the number of gpus within each pipeline parallelism group; and pipeline parallelism will turn be off if pp_degree=1.  Default is 1.

            pp_allreduce_in_optimize(bool, optional): [Hybrid parallelism ONLY] move the allreduce operations from backward stage to update(optimize) stage when pipeline parallelsim is on. 
            This configuration will affect the communication speed of Hybrid parallelism training depeneded on network topology. this strategy is experimental by now..  Default is False.
@@ -1485,7 +1485,7 @@ class DistributedStrategy(object):

        **Notes**:
            k_steps(int) The local steps for training before parameter synchronization. Default 1.
-            begin_step(int) The step of begining training by localsgd. Default 1.
+            begin_step(int) The step of beginning training by localsgd. Default 1.

        Examples:

@@ -1544,7 +1544,7 @@ class DistributedStrategy(object):
            init_k_steps(int) The initial steps for training before adaptive localsgd.
                              Then, the adaptive localsgd method will modify init_k_steps automatically.
                              Default 1.
-            begin_step(int) The step of begining training by adaptive localsgd. Default 1.
+            begin_step(int) The step of beginning training by adaptive localsgd. Default 1.

        Examples:


--- a/python/paddle/distributed/fleet/launch.py
+++ b/python/paddle/distributed/fleet/launch.py
@@ -556,7 +556,7 @@ def launch():

        - ``--selected_mlus``: mlus aliases, recommend to use ``--mlus``.

-        - ``training_script``: The full path to the single GPU training program/script to be launched in parallel, followed by all the arguments for the training script. e.g., ``traing.py``
+        - ``training_script``: The full path to the single GPU training program/script to be launched in parallel, followed by all the arguments for the training script. e.g., ``training.py``

        - ``training_script_args``: The args of training_script. e.g., ``--lr=0.1``


--- a/python/paddle/distributed/fleet/meta_optimizers/ascend/ascend_parser.py
+++ b/python/paddle/distributed/fleet/meta_optimizers/ascend/ascend_parser.py
@@ -1372,7 +1372,7 @@ class UniformRandomParser(AscendParserBase):
        max_v = self.op.attr("max")
        seed = self.op.attr("seed")
        dtype = self.op.attr("dtype")
-        assert max_v > min_v, "assert max_v > min_v, but recieved " + \
+        assert max_v > min_v, "assert max_v > min_v, but received " + \
               "as max_v={}, min_v={} ".format(max_v, min_v)

        tensor1 = self._create_ge_tensor([len(shape)], 2, shape)

--- a/python/paddle/distributed/fleet/meta_optimizers/dygraph_optimizer/dygraph_sharding_optimizer.py
+++ b/python/paddle/distributed/fleet/meta_optimizers/dygraph_optimizer/dygraph_sharding_optimizer.py
@@ -170,7 +170,7 @@ class DygraphShardingOptimizer(object):
        result = self._inner_optimizer.minimize(loss, startup_program,
                                                parameters, no_grad_set)

-        # sync parameters accross sharding ranks
+        # sync parameters across sharding ranks
        self._sharding_sync_parameters()

        return result
@@ -181,7 +181,7 @@ class DygraphShardingOptimizer(object):
        # actually updating
        self._inner_optimizer.step()

-        # sync parameters accross sharding ranks
+        # sync parameters across sharding ranks
        self._sharding_sync_parameters()

    # TODO is it a good way to make _grad_clip a property

--- a/python/paddle/distributed/launch/main.py
+++ b/python/paddle/distributed/launch/main.py
@@ -54,7 +54,7 @@ def launch():

        - ``--devices``: The selected accelerate devices on nodes, can be gpu/xpu/npu/mlu etc.. e.g., ``--devices=0,1,2,3`` will launch four training processes each bound to one device.

-        - ``training_script``: The full path to the single GPU training program/script to be launched in parallel, followed by all the arguments for the training script. e.g., ``traing.py``
+        - ``training_script``: The full path to the single GPU training program/script to be launched in parallel, followed by all the arguments for the training script. e.g., ``training.py``

        - ``training_script_args``: The args of training_script. e.g., ``--lr=0.1``


--- a/python/paddle/distributed/passes/auto_parallel_fp16.py
+++ b/python/paddle/distributed/passes/auto_parallel_fp16.py
@@ -306,7 +306,7 @@ class FP16State(object):
                    in_var_dist_attr = consume_op_attr.get_input_dist_attr(
                        in_var.name)
                    assert in_var_dist_attr is not None
-                    # truely insert cast op
+                    # truly insert cast op
                    if cast_var is None or cast_var.dtype != dst_dtype:
                        # NOTE we make the cast op and var's dist attr as the op that consume the
                        # cast var instead of the op which generates the var

--- a/python/paddle/distributed/ps/utils/public.py
+++ b/python/paddle/distributed/ps/utils/public.py
@@ -748,7 +748,7 @@ def find_heter_ops(program, default_device="cpu"):
 def union_forward_gradient_op(program_block_ops_list):
    """
    before analyzing the input & output of each block in program_block_list, we should
-    union the forward op and corresponding gradient op to elimincate the uneccessary variable
+    union the forward op and corresponding gradient op to elimincate the unnecessary variable
    transmit
    """
    """

--- a/python/paddle/fluid/contrib/slim/tests/test_post_training_quantization_mobilenetv1.py
+++ b/python/paddle/fluid/contrib/slim/tests/test_post_training_quantization_mobilenetv1.py
@@ -405,7 +405,7 @@ class TestPostTrainingAbsMaxForMobilenetv1(TestPostTrainingQuantization):
        is_full_quantize = False
        is_use_cache_file = False
        is_optimize_model = False
-        # The accuracy diff of post-traing quantization (abs_max) maybe bigger
+        # The accuracy diff of post-training quantization (abs_max) maybe bigger
        diff_threshold = 0.05
        self.run_test(model, algo, round_type, data_urls, data_md5s,
                      quantizable_op_type, is_full_quantize, is_use_cache_file,

--- a/python/paddle/fluid/dygraph/checkpoint.py
+++ b/python/paddle/fluid/dygraph/checkpoint.py
@@ -257,7 +257,7 @@ def load_dygraph(model_path, **configs):
                    para_dict = structured_para_dict
        else:
            # load state dict by `io.save_params/persistables` save format
-            # TODO(chenweihang): [ Now only supports loading parameters seperately ]
+            # TODO(chenweihang): [ Now only supports loading parameters separately ]
            # If users save all parameters as one file, the [ variable.name -> variable ]
            # mapping info will lost, so users need to give variable list, but users build 
            # variable list in dygraph mode is difficult, we recommend users to use

--- a/python/paddle/fluid/dygraph/dygraph_to_static/convert_operators.py
+++ b/python/paddle/fluid/dygraph/dygraph_to_static/convert_operators.py
@@ -167,7 +167,7 @@ def convert_logical_not(x):
    A function representation of a Python ``not`` statement.

    Args:
-        x(bool|Tensor): Operand of of ``not`` operator.
+        x(bool|Tensor): Operand of ``not`` operator.

    Returns:
        A python bool variable or a bool Tensor.

--- a/python/paddle/fluid/executor.py
+++ b/python/paddle/fluid/executor.py
@@ -873,7 +873,7 @@ class Executor(object):
                _fetch_list.append(item)
            else:
                raise TypeError(
-                    "The item in fetch_list should be str, variable or optimize_op, but recieved %s.",
+                    "The item in fetch_list should be str, variable or optimize_op, but received %s.",
                    type(item))

        for index, item in enumerate(fetch_list):

--- a/python/paddle/fluid/incubate/fleet/parameter_server/ir/trainer_pass.py
+++ b/python/paddle/fluid/incubate/fleet/parameter_server/ir/trainer_pass.py
@@ -1407,7 +1407,7 @@ def get_communicate_var_info(program,
 def union_forward_gradient_op(program_block_ops_list):
    """
    before analyzing the input & output of each block in program_block_list, we should
-    union the forward op and corresponding gradient op to elimincate the uneccessary variable
+    union the forward op and corresponding gradient op to elimincate the unnecessary variable
    transmit
    """
    """

--- a/python/paddle/fluid/layer_helper_base.py
+++ b/python/paddle/fluid/layer_helper_base.py
@@ -234,7 +234,7 @@ class LayerHelperBase(object):
                x=g, y=norm)  # The shapes of g and norm are the same.
            # Currently, elementwise_mul only support broadcast when the shape
            # of y is a subset of the shape of x. Thus, we reshape y to squeeze
-            # to achive the subset.
+            # to achieve the subset.
            w = elementwise_mul(
                x=v,
                y=scale if dim is None else reshape(

--- a/python/paddle/fluid/layers/nn.py
+++ b/python/paddle/fluid/layers/nn.py
@@ -13744,7 +13744,7 @@ def get_tensor_from_selected_rows(x, name=None):
           x.height = 20
           x.value = [[1, 1] [2, 2] [2, 2] [3, 3] [6, 6]]

-        Ouput is LoDTensor:
+        Output is LoDTensor:
           out.shape = [5, 2]
           out.data = [[1, 1],
                       [2, 2],

--- a/python/paddle/fluid/layers/rnn.py
+++ b/python/paddle/fluid/layers/rnn.py
@@ -673,7 +673,7 @@ def birnn(cell_fw,
    birnn creates a bidirectional recurrent neural network specified by 
    RNNCell `cell_fw` and `cell_bw`, which performs :code:`cell.call()` 
    (for dygraph mode :code:`cell.forward`) repeatedly until reaches to 
-    the maximum length of `inputs` and then concat the ouputs for both RNNs
+    the maximum length of `inputs` and then concat the outputs for both RNNs
    along the last axis.

    Arguments:

--- a/python/paddle/fluid/tests/unittests/ir/inference/auto_scan_test.py
+++ b/python/paddle/fluid/tests/unittests/ir/inference/auto_scan_test.py
@@ -388,7 +388,7 @@ class PassAutoScanTest(AutoScanTest):
        used_time = time.time() - start_time
        if max_duration > 0 and used_time > max_duration:
            logging.error(
-                "The duration exceeds {} seconds, if this is neccessary, try to set a larger number for parameter `max_duration`.".
+                "The duration exceeds {} seconds, if this is necessary, try to set a larger number for parameter `max_duration`.".
                format(max_duration))
            assert False


--- a/python/paddle/fluid/tests/unittests/ir/inference/test_shuffle_channel_detect_pass.py
+++ b/python/paddle/fluid/tests/unittests/ir/inference/test_shuffle_channel_detect_pass.py
@@ -62,13 +62,13 @@ class TestShuffleChannelDetectPass(PassAutoScanTest):
            "transpose2",
            inputs={"X": ["reshape2_output1"], },
            outputs={
-                "Out": ["transpose2_ouput"],
+                "Out": ["transpose2_output"],
                "XShape": ["transpose2_xshape"]
            },
            axis=axis_v)
        reshape2_op2 = OpConfig(
            "reshape2",
-            inputs={"X": ["transpose2_ouput"], },
+            inputs={"X": ["transpose2_output"], },
            outputs={
                "Out": ["reshape2_output2"],
                "XShape": ["reshape2_xshape2"]

--- a/python/paddle/fluid/tests/unittests/ir/pass_test.py
+++ b/python/paddle/fluid/tests/unittests/ir/pass_test.py
@@ -167,7 +167,7 @@ class PassTest(unittest.TestCase):

    def _check_fused_ops(self, program):
        '''
-        Check the number of specified fused op is equal to the the expected
+        Check the number of specified fused op is equal to the expected
        number.
        '''
        if self.fused_op_type is None or self.num_fused_ops < 0:

--- a/python/paddle/fluid/tests/unittests/op_test.py
+++ b/python/paddle/fluid/tests/unittests/op_test.py
@@ -872,7 +872,7 @@ class OpTest(unittest.TestCase):
            eager_tensor_outputs = egr_oups if egr_oups else self.append_input_output_for_dygraph(
                op_proto, self.outputs, False, False, block)

-            # prepare attrbutes
+            # prepare attributes
            attrs_outputs = {}
            if hasattr(self, "attrs"):
                for attrs_name in self.attrs:
@@ -906,7 +906,7 @@ class OpTest(unittest.TestCase):
            outputs = self.append_input_output_for_dygraph(
                op_proto, self.outputs, False, False, block)

-            # prepare attrbutes
+            # prepare attributes
            attrs_outputs = {}
            if hasattr(self, "attrs"):
                for attrs_name in self.attrs:
@@ -2016,7 +2016,7 @@ class OpTest(unittest.TestCase):
            outputs = self.append_input_output_for_dygraph(
                op_proto, self.outputs, False, False, block)

-            # prepare attrbutes
+            # prepare attributes
            attrs_outputs = {}
            if hasattr(self, "attrs"):
                for attrs_name in self.attrs:

--- a/python/paddle/fluid/tests/unittests/seresnext_test_base.py
+++ b/python/paddle/fluid/tests/unittests/seresnext_test_base.py
@@ -25,7 +25,7 @@ class TestResnetBase(TestParallelExecutorBase):
                                          check_func,
                                          use_device,
                                          delta2=1e-5,
-                                          compare_seperately=True):
+                                          compare_separately=True):
        if use_device == DeviceType.CUDA and not core.is_compiled_with_cuda():
            return

@@ -45,7 +45,7 @@ class TestResnetBase(TestParallelExecutorBase):
            batch_size=seresnext_net.batch_size(use_device),
            use_device=use_device)

-        if compare_seperately:
+        if compare_separately:
            for loss in zip(func_1_first_loss, func_2_first_loss):
                self.assertAlmostEquals(loss[0], loss[1], delta=1e-5)
            for loss in zip(func_1_last_loss, func_2_last_loss):

--- a/python/paddle/fluid/tests/unittests/static_model_parallel_fused_attention.py
+++ b/python/paddle/fluid/tests/unittests/static_model_parallel_fused_attention.py
@@ -69,9 +69,9 @@ class ParallelFusedMultiHeadAttention(Layer):
        super(ParallelFusedMultiHeadAttention, self).__init__()

        assert embed_dim > 0, ("Expected embed_dim to be greater than 0, "
-                               "but recieved {}".format(embed_dim))
+                               "but received {}".format(embed_dim))
        assert num_heads > 0, ("Expected nhead to be greater than 0, "
-                               "but recieved {}".format(num_heads))
+                               "but received {}".format(num_heads))

        self.normalize_before = normalize_before
        self._dtype = self._helper.get_default_dtype()

--- a/python/paddle/fluid/tests/unittests/static_model_parallel_fused_feedforward.py
+++ b/python/paddle/fluid/tests/unittests/static_model_parallel_fused_feedforward.py
--- a/python/paddle/fluid/tests/unittests/test_fleet_rolemaker.py
+++ b/python/paddle/fluid/tests/unittests/test_fleet_rolemaker.py
--- a/python/paddle/fluid/tests/unittests/test_fleet_rolemaker_new.py
+++ b/python/paddle/fluid/tests/unittests/test_fleet_rolemaker_new.py
--- a/python/paddle/fluid/tests/unittests/test_gradient_clip.py
+++ b/python/paddle/fluid/tests/unittests/test_gradient_clip.py
--- a/python/paddle/fluid/tests/unittests/test_hsigmoid_op.py
+++ b/python/paddle/fluid/tests/unittests/test_hsigmoid_op.py
--- a/python/paddle/fluid/tests/unittests/test_optimizer.py
+++ b/python/paddle/fluid/tests/unittests/test_optimizer.py
--- a/python/paddle/fluid/tests/unittests/test_parallel_executor_seresnext_base_cpu.py
+++ b/python/paddle/fluid/tests/unittests/test_parallel_executor_seresnext_base_cpu.py
--- a/python/paddle/fluid/tests/unittests/test_parallel_executor_seresnext_base_gpu.py
+++ b/python/paddle/fluid/tests/unittests/test_parallel_executor_seresnext_base_gpu.py
--- a/python/paddle/fluid/tests/unittests/test_unpool_op.py
+++ b/python/paddle/fluid/tests/unittests/test_unpool_op.py
--- a/python/paddle/framework/io.py
+++ b/python/paddle/framework/io.py
--- a/python/paddle/hapi/model.py
+++ b/python/paddle/hapi/model.py
--- a/python/paddle/incubate/autotune.py
+++ b/python/paddle/incubate/autotune.py
--- a/python/paddle/incubate/distributed/models/moe/grad_clip.py
+++ b/python/paddle/incubate/distributed/models/moe/grad_clip.py
--- a/python/paddle/incubate/nn/layer/fused_transformer.py
+++ b/python/paddle/incubate/nn/layer/fused_transformer.py
--- a/python/paddle/nn/functional/loss.py
+++ b/python/paddle/nn/functional/loss.py
--- a/python/paddle/nn/layer/activation.py
+++ b/python/paddle/nn/layer/activation.py
--- a/python/paddle/nn/layer/transformer.py
+++ b/python/paddle/nn/layer/transformer.py
--- a/python/paddle/profiler/profiler.py
+++ b/python/paddle/profiler/profiler.py
--- a/python/paddle/profiler/timer.py
+++ b/python/paddle/profiler/timer.py
--- a/python/paddle/profiler/utils.py
+++ b/python/paddle/profiler/utils.py
--- a/python/paddle/tensor/math.py
+++ b/python/paddle/tensor/math.py
--- a/python/paddle/tensor/to_string.py
+++ b/python/paddle/tensor/to_string.py
--- a/python/paddle/vision/models/mobilenetv3.py
+++ b/python/paddle/vision/models/mobilenetv3.py