fix typo word (#22784)

433cef03 · tianshuo78520a · GitHub · ebc7ffc3 · 433cef03 · 433cef03
74 changed file
--- a/cmake/configure.cmake
+++ b/cmake/configure.cmake
@@ -48,7 +48,7 @@ if(WIN32)
  SET(CMAKE_C_RESPONSE_FILE_LINK_FLAG "@")
  SET(CMAKE_CXX_RESPONSE_FILE_LINK_FLAG "@")
-  # set defination for the dll export
+  # set definition for the dll export
  if (NOT MSVC)
    message(FATAL "Windows build only support msvc. Which was binded by the nvcc compiler of NVIDIA.")
  endif(NOT MSVC)

--- a/cmake/third_party.cmake
+++ b/cmake/third_party.cmake
@@ -174,7 +174,7 @@ if(${CMAKE_VERSION} VERSION_GREATER "3.5.2")
    set(SHALLOW_CLONE "GIT_SHALLOW TRUE") # adds --depth=1 arg to git clone of External_Projects
 endif()
-########################### include third_party accoring to flags ###############################
+########################### include third_party according to flags ###############################
 include(external/zlib)      # download, build, install zlib
 include(external/gflags)    # download, build, install gflags
 include(external/glog)      # download, build, install glog

--- a/paddle/fluid/framework/ir/multi_devices_graph_pass/multi_devices_graph_pass.cc
+++ b/paddle/fluid/framework/ir/multi_devices_graph_pass/multi_devices_graph_pass.cc
@@ -857,7 +857,7 @@ int DistSSAGraphBuilder::CreateRPCOp(ir::Graph *result, ir::Node *node) const {
    op_dev_id = GetVarDeviceID(node->inputs[0]->Name());
    PADDLE_ENFORCE(!ir::IsControlDepVar(*node->inputs[0]),
                   "This hack no longer holds, please fix.");
-    // the variable name which contains .block means it was splited by
+    // the variable name which contains .block means it was split by
    // split_byref op
    if (strategy_.reduce_ ==
            details::BuildStrategy::ReduceStrategy::kAllReduce &&

--- a/paddle/fluid/framework/operator.cc
+++ b/paddle/fluid/framework/operator.cc
@@ -990,7 +990,7 @@ void OperatorWithKernel::RunImpl(const Scope& scope,
  }
  if (!transfered_inplace_vars.empty()) {
-    // there is inplace variable has been transfered.
+    // there is inplace variable has been transferred.
    TransferInplaceVarsBack(scope, transfered_inplace_vars, *transfer_scope);
  }
  if (FLAGS_enable_unused_var_check) {

--- a/paddle/fluid/framework/operator.h
+++ b/paddle/fluid/framework/operator.h
@@ -517,7 +517,8 @@ class OperatorWithKernel : public OperatorBase {
               RuntimeContext* runtime_ctx) const;
  /**
-   * Transfer data from scope to a transfered scope. If there is no data need to
+   * Transfer data from scope to a transferred scope. If there is no data need
+   * to
   * be tranfered, it returns nullptr.
   *
   * * transfered_inplace_vars is a output vector.

--- a/paddle/fluid/framework/parallel_executor.cc
+++ b/paddle/fluid/framework/parallel_executor.cc
@@ -87,18 +87,18 @@ class ParallelExecutorPrivate {
  inline bool HasGarbageCollectors() const { return !gcs_.empty(); }
  /**
-   * NOTE(zengjinle): the feeded variables of users should not be reused,
+   * NOTE(zengjinle): the fed variables of users should not be reused,
-   * because users may feed them into another network. Changing the feeded
+   * because users may feed them into another network. Changing the fed
   * variables that users can visit may cause calculation wrong, which is
   * a very subtle bug when traning networks. However, these variables
   * can be garbage collected.
   *
   * ParallelExecutor provides 2 methods to feed variables:
   *
-   *  - FeedTensorsIntoLocalScopes: this method would share memory of feeded
+   *  - FeedTensorsIntoLocalScopes: this method would share memory of fed
   *                                variables, so we have to skip these.
   *
-   *  - FeedAndSplitTensorIntoLocalScopes: this method would copy data of feeded
+   *  - FeedAndSplitTensorIntoLocalScopes: this method would copy data of fed
   *                                       variables, so we do not need to skip
   *                                       them.
   */

--- a/paddle/fluid/framework/reader.h
+++ b/paddle/fluid/framework/reader.h
@@ -53,10 +53,10 @@ class ReaderBase {
  // they are readers just before read op.
  std::unordered_set<ReaderBase*> GetEndPoints();
-  // Returns the shapes of the feeded variables
+  // Returns the shapes of the fed variables
  const std::vector<DDim>& Shapes() const { return shapes_; }
-  // Returns the dtypes of the feeded variables
+  // Returns the dtypes of the fed variables
  const std::vector<proto::VarType::Type>& VarTypes() const {
    return var_types_;
  }
@@ -80,13 +80,13 @@ class ReaderBase {
  mutable std::mutex mu_;
-  // The shapes of the feeded variables.
+  // The shapes of the fed variables.
  std::vector<DDim> shapes_;
-  // The dtypes of the feeded variables.
+  // The dtypes of the fed variables.
  std::vector<proto::VarType::Type> var_types_;
-  // Whether to check the shape and dtype of feeded variables.
+  // Whether to check the shape and dtype of fed variables.
  // For Backward compatibility, variables created by old API fluid.layers.data
  // doesn't check shape but fluid.data checks.
  std::vector<bool> need_check_feed_;

--- a/paddle/fluid/imperative/tests/test_prepare_op.cc
+++ b/paddle/fluid/imperative/tests/test_prepare_op.cc
@@ -210,7 +210,7 @@ TEST(test_prepare_op, test_prepare_data_same_place) {
                                            attr_map);
  framework::RuntimeContext ctx = PrepareRuntimeContext(ins, outs);
-  // test if it never transfered on GPU place
+  // test if it never transferred on GPU place
  PreparedOp prepared_op = PreparedOp::Prepare(
      ins, outs, dynamic_cast<framework::OperatorWithKernel&>(*op), cpu_place,
      &attr_map);

--- a/paddle/fluid/inference/CMakeLists.txt
+++ b/paddle/fluid/inference/CMakeLists.txt
@@ -14,7 +14,7 @@
 #
 if(WITH_TESTING)
-  include(tests/test.cmake) # some generic cmake funtion for inference
+  include(tests/test.cmake) # some generic cmake function for inference
 endif()
 # TODO(panyx0718): Should this be called paddle_fluid_inference_api_internal?

--- a/paddle/fluid/inference/api/demo_ci/README.md
+++ b/paddle/fluid/inference/api/demo_ci/README.md
@@ -12,7 +12,7 @@ There are several demos:
    - Each line contains a single record
    - Each record's format is
    ```
-    <space splitted floats as data>\t<space splitted ints as shape>
+    <space split floats as data>\t<space split ints as shape>
    ```
 To build and execute the demos, simply run 

--- a/paddle/fluid/inference/api/demo_ci/trt_mobilenet_demo.cc
+++ b/paddle/fluid/inference/api/demo_ci/trt_mobilenet_demo.cc
@@ -23,10 +23,9 @@ limitations under the License. */
 DECLARE_double(fraction_of_gpu_memory_to_use);
 DEFINE_string(modeldir, "", "Directory of the inference model.");
 DEFINE_string(refer, "", "path to reference result for comparison.");
-DEFINE_string(
+DEFINE_string(data, "",
-    data, "",
+              "path of data; each line is a record, format is "
-    "path of data; each line is a record, format is "
+              "'<space split floats as data>\t<space split ints as shape'");
-    "'<space splitted floats as data>\t<space splitted ints as shape'");
 namespace paddle {
 namespace demo {

--- a/paddle/fluid/inference/api/demo_ci/vis_demo.cc
+++ b/paddle/fluid/inference/api/demo_ci/vis_demo.cc
@@ -25,10 +25,9 @@ DECLARE_double(fraction_of_gpu_memory_to_use);
 #endif
 DEFINE_string(modeldir, "", "Directory of the inference model.");
 DEFINE_string(refer, "", "path to reference result for comparison.");
-DEFINE_string(
+DEFINE_string(data, "",
-    data, "",
+              "path of data; each line is a record, format is "
-    "path of data; each line is a record, format is "
+              "'<space split floats as data>\t<space split ints as shape'");
-    "'<space splitted floats as data>\t<space splitted ints as shape'");
 DEFINE_bool(use_gpu, false, "Whether use gpu.");
 #ifdef PADDLE_WITH_SHARED_LIB
 DECLARE_bool(profile);

--- a/paddle/fluid/operators/beam_search_op.cc
+++ b/paddle/fluid/operators/beam_search_op.cc
@@ -52,7 +52,7 @@ class BeamSearchOpMaker : public framework::OpProtoAndCheckerMaker {
              "A LoDTensor containing the accumulated scores corresponding to "
              "Output(selected_ids).");
    AddOutput("parent_idx",
-              "A Tensor preserving the selected_ids' parent indice in pre_ids.")
+              "A Tensor preserving the selected_ids' parent index in pre_ids.")
        .AsDispensable();
    // Attributes stored in AttributeMap

--- a/paddle/fluid/operators/cudnn_lstm_op.cc
+++ b/paddle/fluid/operators/cudnn_lstm_op.cc
@@ -119,7 +119,7 @@ class CudnnLSTMOpMaker : public framework::OpProtoAndCheckerMaker {
        .SetDefault(0.0);
    AddAttr<bool>("is_bidirec",
                  "is_bidirec"
-                  "if it is bidirection rnn"
+                  "if it is bidirectional rnn"
                  "The will affect the shape of the Out, last_h, and last_c")
        .SetDefault(false);
    AddAttr<int>("input_size", "input size ot the Input Tensor").SetDefault(10);

--- a/paddle/fluid/operators/detection/density_prior_box_op.cc
+++ b/paddle/fluid/operators/detection/density_prior_box_op.cc
@@ -35,7 +35,7 @@ class DensityPriorBoxOp : public framework::OperatorWithKernel {
          platform::errors::InvalidArgument(
              "The input tensor Input's height"
              "of DensityPriorBoxOp should be smaller than input tensor Image's"
-              "hight. But received Input's height = %d, Image's height = %d",
+              "height. But received Input's height = %d, Image's height = %d",
              input_dims[2], image_dims[2]));
      PADDLE_ENFORCE_LT(

--- a/paddle/fluid/operators/distributed/communicator.cc
+++ b/paddle/fluid/operators/distributed/communicator.cc
@@ -543,7 +543,7 @@ std::unordered_set<int64_t> GeoSgdCommunicator::SparseIdsMerge(
    const std::string &splited_var_name) {
  // every batch has some sparse id, merge them into one unoredered_set
  VLOG(4) << "Sparse Ids merge var: " << var_name
-          << " splited var: " << splited_var_name;
+          << " split var: " << splited_var_name;
  auto before_run_ids_merge_ = GetCurrentUS();
  auto origin_var_name = DeltaVarToVar(var_name);
  auto splited_var_index = GetSplitedVarIndex(var_name, splited_var_name);
@@ -567,9 +567,8 @@ void GeoSgdCommunicator::SendUpdateDenseVars(
  // var_name: param.delta
  auto origin_var_name = DeltaVarToVar(var_name);
  auto splited_var_index = GetSplitedVarIndex(var_name, splited_var_name);
-  VLOG(4) << "Dense var: " << var_name
+  VLOG(4) << "Dense var: " << var_name << " 's split var: " << splited_var_name
-          << " 's splited var: " << splited_var_name
+          << " split var index: " << splited_var_index;
-          << " splited var index: " << splited_var_index;
  auto before_run_send_dense = GetCurrentUS();
  auto cpu_ctx = paddle::platform::CPUDeviceContext();
@@ -592,7 +591,7 @@ void GeoSgdCommunicator::SendUpdateDenseVars(
    begin_loc = absolute_section_[origin_var_name][splited_var_index];
    dimension = total_element / vars_first_dimension_[origin_var_name];
    total_element = section * dimension;
-    VLOG(4) << "Dense splited var: " << splited_var_name
+    VLOG(4) << "Dense split var: " << splited_var_name
            << " section: " << section << " dimension: " << dimension
            << " begin loc: " << begin_loc << " total_element "
            << total_element;
@@ -600,12 +599,12 @@ void GeoSgdCommunicator::SendUpdateDenseVars(
  auto *var_x_data = var_x_tensor.mutable_data<float>(var_x_tensor.place()) +
                     begin_loc * dimension;
-  VLOG(4) << "Dense splited var: " << splited_var_name << " var_x_data[0] "
+  VLOG(4) << "Dense split var: " << splited_var_name << " var_x_data[0] "
          << var_x_data[0] << " var_x_data[end] "
          << var_x_data[total_element - 1];
  auto *var_y_data = var_y_tensor.mutable_data<float>(var_y_tensor.place()) +
                     begin_loc * dimension;
-  VLOG(4) << "Dense splited var: " << splited_var_name << " var_y_data[0] "
+  VLOG(4) << "Dense split var: " << splited_var_name << " var_y_data[0] "
          << var_y_data[0] << " var_y_data[end] "
          << var_y_data[total_element - 1];
@@ -616,14 +615,14 @@ void GeoSgdCommunicator::SendUpdateDenseVars(
  var_z_tensor->mutable_data<float>(dims, cpu_ctx.GetPlace());
  auto *var_z_data = var_z_tensor->mutable_data<float>(cpu_ctx.GetPlace());
-  VLOG(4) << "Dense splited var: " << splited_var_name << "var_z_data[0] "
+  VLOG(4) << "Dense split var: " << splited_var_name << "var_z_data[0] "
          << var_z_data[0] << " var_z_data[end] "
          << var_z_data[total_element - 1];
  // calc sub = var_training - var_old
  auto blas = math::GetBlas<paddle::platform::CPUDeviceContext, float>(cpu_ctx);
  blas.VSUB(total_element, var_x_data, var_y_data, var_z_data);
-  VLOG(4) << "Dense splited var: " << splited_var_name << " var_z_data[0] "
+  VLOG(4) << "Dense split var: " << splited_var_name << " var_z_data[0] "
          << var_z_data[0] << " var_z_data[end] "
          << var_z_data[total_element - 1];
@@ -633,7 +632,7 @@ void GeoSgdCommunicator::SendUpdateDenseVars(
  // calc var_old += var_delta
  blas.VADD(total_element, var_y_data, var_z_data, var_y_data);
-  VLOG(4) << "Dense splited var: " << splited_var_name << " var_y_data[0] "
+  VLOG(4) << "Dense split var: " << splited_var_name << " var_y_data[0] "
          << var_y_data[0] << " var_y_data[end] "
          << var_y_data[total_element - 1];
@@ -763,7 +762,7 @@ void GeoSgdCommunicator::RecvUpdateDenseVars(
    section = dims[0];
    begin_loc = absolute_section_[origin_var_name][splited_var_index];
    dimension = total_element / section;
-    VLOG(4) << "Dense splited var: " << splited_var_name
+    VLOG(4) << "Dense split var: " << splited_var_name
            << " section: " << section << " dimension: " << dimension
            << " begin loc: " << begin_loc << " total_element "
            << total_element;
@@ -771,18 +770,18 @@ void GeoSgdCommunicator::RecvUpdateDenseVars(
  auto *var_x_data = var_x_tensor.mutable_data<float>(var_x_tensor.place()) +
                     begin_loc * dimension;
-  VLOG(4) << "Dense splited var: " << splited_var_name << " var_x_data[0] "
+  VLOG(4) << "Dense split var: " << splited_var_name << " var_x_data[0] "
          << var_x_data[0] << " var_x_data[end] "
          << var_x_data[total_element - 1];
  auto *var_y_data = var_y_tensor.mutable_data<float>(var_y_tensor.place()) +
                     begin_loc * dimension;
-  VLOG(4) << "Dense splited var: " << splited_var_name << " var_y_data[0] "
+  VLOG(4) << "Dense split var: " << splited_var_name << " var_y_data[0] "
          << var_y_data[0] << " var_y_data[end] "
          << var_y_data[total_element - 1];
  auto *var_z_data = var_z_tensor.mutable_data<float>(cpu_ctx.GetPlace());
-  VLOG(4) << "Dense splited var: " << splited_var_name << " var_z_data[0] "
+  VLOG(4) << "Dense split var: " << splited_var_name << " var_z_data[0] "
          << var_z_data[0] << " var_z_data[end] "
          << var_z_data[total_element - 1];
@@ -793,7 +792,7 @@ void GeoSgdCommunicator::RecvUpdateDenseVars(
  auto *var_y_sub_data =
      var_y_sub_tensor->mutable_data<float>(cpu_ctx.GetPlace());
-  VLOG(4) << "Dense splited var: " << splited_var_name << " var_y_sub_data[0] "
+  VLOG(4) << "Dense split var: " << splited_var_name << " var_y_sub_data[0] "
          << var_y_sub_data[0] << " var_y_sub_data[end] "
          << var_y_sub_data[total_element - 1];
@@ -801,19 +800,19 @@ void GeoSgdCommunicator::RecvUpdateDenseVars(
  // calc sub = pserver - old
  blas.VSUB(total_element, var_z_data, var_y_data, var_y_sub_data);
-  VLOG(4) << "Dense splited var: " << splited_var_name << " var_y_sub_data[0] "
+  VLOG(4) << "Dense split var: " << splited_var_name << " var_y_sub_data[0] "
          << var_y_sub_data[0] << " var_y_sub_data[end] "
          << var_y_sub_data[total_element - 1];
  // calc train += sub
  blas.VADD(total_element, var_x_data, var_y_sub_data, var_x_data);
-  VLOG(4) << "Dense splited var: " << splited_var_name << " var_x_data[0] "
+  VLOG(4) << "Dense split var: " << splited_var_name << " var_x_data[0] "
          << var_x_data[0] << " var_x_data[end] "
          << var_x_data[total_element - 1];
  // calc old = pserver
  blas.VCOPY(total_element, var_z_data, var_y_data);
-  VLOG(4) << "Dense splited var: " << splited_var_name << " var_y_data[0] "
+  VLOG(4) << "Dense split var: " << splited_var_name << " var_y_data[0] "
          << var_y_data[0] << " var_y_data[end] "
          << var_y_data[total_element - 1];
@@ -824,7 +823,7 @@ void GeoSgdCommunicator::RecvUpdateDenseVars(
 void GeoSgdCommunicator::RecvUpdateSparseVars(
    const std::string &var_name, const std::string &splited_var_name) {
-  // step 1: recv splited var from pserver
+  // step 1: recv split var from pserver
  auto splited_var_index = GetSplitedVarIndex(var_name, splited_var_name);
  auto origin_var_name = DeltaVarToVar(var_name);
  auto origin_splited_var_name = DeltaVarToVar(splited_var_name);

--- a/paddle/fluid/operators/distributed_ops/recv_op.cc
+++ b/paddle/fluid/operators/distributed_ops/recv_op.cc
@@ -122,7 +122,7 @@ This operator can get variables from server side.
    AddAttr<std::vector<std::string>>(
        "recv_varnames",
        "(vector<string>) "
-        "the splited parameter varnames to be recved from pserver")
+        "the split parameter varnames to be recved from pserver")
        .SetDefault(std::vector<std::string>{});
    AddAttr<int>("do_not_run", "if recv need to really run").SetDefault(0);
  }

--- a/paddle/fluid/operators/distributed_ops/send_op.cc
+++ b/paddle/fluid/operators/distributed_ops/send_op.cc
@@ -116,7 +116,7 @@ This operator will send variables to listen_and_serve op at the parameter server
    AddAttr<std::vector<std::string>>(
        "send_varnames",
        "(vector<string>) "
-        "the splited output varnames to send to pserver")
+        "the split output varnames to send to pserver")
        .SetDefault(std::vector<std::string>{});
    AddAttr<int>("num",
                 "(int, default 0)"

--- a/paddle/fluid/operators/fused/conv_fusion_op.cc
+++ b/paddle/fluid/operators/fused/conv_fusion_op.cc
@@ -28,7 +28,7 @@ namespace operators {
 //         x is Input,
 //         z is ResidualData,
 //         bias is Bias
-// When `split_channels` is set, y will be splitted into multiple outputs,
+// When `split_channels` is set, y will be split into multiple outputs,
 // each output has split_channels[i] number of channels.
 class Conv2DFusionOpMaker : public Conv2DOpMaker {
 protected:

--- a/paddle/fluid/operators/hierarchical_sigmoid_op.cc
+++ b/paddle/fluid/operators/hierarchical_sigmoid_op.cc
@@ -162,7 +162,7 @@ class HierarchicalSigmoidOpMaker : public framework::OpProtoAndCheckerMaker {
        .SetDefault({});
    AddAttr<std::vector<std::string>>(
        "table_names",
-        "(string vector, the splited table names that will be fetched from "
+        "(string vector, the split table names that will be fetched from "
        "parameter server)"
        "in the order of input variables for mapping")
        .SetDefault({});

--- a/paddle/fluid/operators/lookup_table_op.cc
+++ b/paddle/fluid/operators/lookup_table_op.cc
@@ -113,7 +113,7 @@ class LookupTableOpMaker : public framework::OpProtoAndCheckerMaker {
        .SetDefault({});
    AddAttr<std::vector<std::string>>(
        "table_names",
-        "(string vector, the splited table names that will be fetched from "
+        "(string vector, the split table names that will be fetched from "
        "parameter server)"
        "in the order of input variables for mapping")
        .SetDefault({});

--- a/paddle/fluid/operators/lookup_table_v2_op.cc
+++ b/paddle/fluid/operators/lookup_table_v2_op.cc
@@ -100,7 +100,7 @@ class LookupTableV2OpMaker : public framework::OpProtoAndCheckerMaker {
        .SetDefault({});
    AddAttr<std::vector<std::string>>(
        "table_names",
-        "(string vector, the splited table names that will be fetched from "
+        "(string vector, the split table names that will be fetched from "
        "parameter server)"
        "in the order of input variables for mapping")
        .SetDefault({});

--- a/paddle/fluid/operators/math/blas_impl.h
+++ b/paddle/fluid/operators/math/blas_impl.h
@@ -777,11 +777,11 @@ void Blas<DeviceContext>::MatMul(const framework::Tensor &mat_a,
 * When user calls this API, the multiplication of two big matrixes is split
 * into multiplication of several (head_number_) small matrixes. e.g. if Mat A
 * is [3, 24] and Mat B is [24, 4], when multiple A and B with head_number as
- * 4, Mat A will be splitted as 4 matrix of [3, 6] and Mat B will be
+ * 4, Mat A will be split as 4 matrix of [3, 6] and Mat B will be
- * (horizontally) splitted as 4 matrix of [6, 4]. The result of final matrix
+ * (horizontally) split as 4 matrix of [6, 4]. The result of final matrix
 * will be 4 matrix of [3, 4], i.e. [3, 16].
 * Another example is A is [3, 8], B is [2, 16], head_number is 4. In this
- * case, A will be splitted as [3, 2], B will be (vertically) splitted as
+ * case, A will be split as [3, 2], B will be (vertically) split as
 * [2, 4]. The final result will be 4 matrix of 4 matrix of [3,4], i.e. [3, 16]
 */
 template <typename DeviceContext>

--- a/paddle/fluid/operators/math/matrix_bit_code.h
+++ b/paddle/fluid/operators/math/matrix_bit_code.h
@@ -106,7 +106,7 @@ class SimpleCode {
      : c_(static_cast<size_t>(ids[code]) + num_classes) {}
  /**
   * Here the id of root should be 1 rather than 0, thus the encoding of class c
-   * is `c + num_classes` and all siblings can get the same weight indice using
+   * is `c + num_classes` and all siblings can get the same weight index using
   * prefixes.
   * Weight index is the prefixes of encoding, thus leave out the right most
   * bit in calc_index.
@@ -133,7 +133,7 @@ class CustomCode {
  }
  /**
   * Here the id of root should be 1 rather than 0, thus the encoding of class c
-   * is `c + num_classes` and all siblings can get the same weight indice using
+   * is `c + num_classes` and all siblings can get the same weight index using
   * prefixes.
   * Weight index is the prefixes of encoding, thus leave out the right most
   * bit in calc_index.

--- a/paddle/fluid/operators/mkldnn/fc_mkldnn_op.cc
+++ b/paddle/fluid/operators/mkldnn/fc_mkldnn_op.cc
@@ -55,11 +55,12 @@ class FCPrimitiveFactory {
    }  // Otherwise, create a new one.
    auto in_col_dims = ctx.Attr<int>("in_num_col_dims");
-    PADDLE_ENFORCE_LE(in_col_dims, 2,
+    PADDLE_ENFORCE_LE(
-                      platform::errors::Unimplemented(
+        in_col_dims, 2,
-                          "DNNL FC doesn't support in_num_col_dims paramter to "
+        platform::errors::Unimplemented(
-                          "be higher than "
+            "DNNL FC doesn't support in_num_col_dims parameter to "
-                          "2."));
+            "be higher than "
+            "2."));
    if (in_col_dims == 2) {
      PADDLE_ENFORCE_EQ(
          input->dims().size(), 3,

--- a/paddle/fluid/operators/nce_op.cc
+++ b/paddle/fluid/operators/nce_op.cc
@@ -192,7 +192,7 @@ class NCEOpMaker : public framework::OpProtoAndCheckerMaker {
        .SetDefault({});
    AddAttr<std::vector<std::string>>(
        "table_names",
-        "(string vector, the splited table names that will be fetched from "
+        "(string vector, the split table names that will be fetched from "
        "parameter server)"
        "in the order of input variables for mapping")
        .SetDefault({});

--- a/paddle/fluid/operators/pad2d_op.cc
+++ b/paddle/fluid/operators/pad2d_op.cc
@@ -563,7 +563,7 @@ class Pad2dOpMaker : public framework::OpProtoAndCheckerMaker {
        .SetDefault("NCHW");
    AddComment(R"DOC(
 Pad2d Operator.
-Pad 2-d images accordding to 'paddings' and 'mode'. 
+Pad 2-d images according to 'paddings' and 'mode'. 
 If mode is 'reflect', paddings[0] and paddings[1] must be no greater
 than height-1. And the width dimension has the same condition.

--- a/paddle/fluid/operators/reader/read_op.cc
+++ b/paddle/fluid/operators/reader/read_op.cc
@@ -118,7 +118,7 @@ class ReadOp : public framework::OperatorBase {
        reader->VarTypes();
    const std::vector<bool>& need_check_feed = reader->NeedCheckFeed();
    PADDLE_ENFORCE_EQ(out_arg_names.size(), need_check_feed.size(),
-                      "output size of read_op and the number of feeded "
+                      "output size of read_op and the number of fed "
                      "variables of reader do not match");
    for (size_t i = 0; i < out_arg_names.size(); ++i) {
@@ -127,13 +127,13 @@ class ReadOp : public framework::OperatorBase {
      if (need_check_feed[i]) {
        auto in_dims = ins[i].dims();
        PADDLE_ENFORCE_EQ(DimensionIsCompatibleWith(shapes[i], in_dims), true,
-                          "The feeded Variable %s should have dimensions = %d, "
+                          "The fed Variable %s should have dimensions = %d, "
-                          "shape = [%s], but received feeded shape [%s]",
+                          "shape = [%s], but received fed shape [%s]",
                          out_arg_names[i], shapes[i].size(), shapes[i],
                          in_dims);
        PADDLE_ENFORCE_EQ(
            ins[i].type(), var_types[i],
-            "The data type of feeded Variable %s must be %s, but received %s",
+            "The data type of fed Variable %s must be %s, but received %s",
            out_arg_names[i], var_types[i], ins[i].type());
      }
      out->ShareDataWith(ins[i]);

--- a/paddle/fluid/operators/sequence_ops/sequence_expand_as_op.cu
+++ b/paddle/fluid/operators/sequence_ops/sequence_expand_as_op.cu
@@ -67,8 +67,8 @@ struct SequenceExpandFunctor<platform::CUDADeviceContext, T> {
      const platform::CUDADeviceContext &context, const LoDTensor &x,
      const framework::Vector<size_t> &ref_lod, /*expand referenced lod*/
      LoDTensor *out) {
-    int hight = x.dims()[0];
+    int height = x.dims()[0];
-    int width = framework::product(x.dims()) / hight;
+    int width = framework::product(x.dims()) / height;
    const int kThreadsPerBlock = 1024;
    int thread_x = kThreadsPerBlock;
@@ -82,7 +82,7 @@ struct SequenceExpandFunctor<platform::CUDADeviceContext, T> {
    dim3 block_size(thread_x);
    dim3 grid_size(block_x);
    sequence_expand_as_kernel<<<grid_size, block_size, 0, context.stream()>>>(
-        x.data<T>(), ref_lod.CUDAData(context.GetPlace()), hight, width,
+        x.data<T>(), ref_lod.CUDAData(context.GetPlace()), height, width,
        out->mutable_data<T>(context.GetPlace()));
  }
 };
@@ -93,8 +93,8 @@ struct SequenceExpandAsGradFunctor<platform::CUDADeviceContext, T> {
                  const LoDTensor &dout,
                  const framework::Vector<size_t> &ref_lod, /*expand based lod*/
                  LoDTensor *dx) {
-    int hight = dx->dims()[0];
+    int height = dx->dims()[0];
-    int width = framework::product(dx->dims()) / hight;
+    int width = framework::product(dx->dims()) / height;
    const int kThreadsPerBlock = 1024;
    int thread_x = kThreadsPerBlock;
@@ -109,7 +109,7 @@ struct SequenceExpandAsGradFunctor<platform::CUDADeviceContext, T> {
    dim3 grid_size(block_x);
    sequence_expand_as_grad_kernel<<<grid_size, block_size, 0,
                                     context.stream()>>>(
-        dout.data<T>(), ref_lod.CUDAData(context.GetPlace()), hight, width,
+        dout.data<T>(), ref_lod.CUDAData(context.GetPlace()), height, width,
        dx->mutable_data<T>(context.GetPlace()));
  }
 };

--- a/paddle/fluid/operators/sequence_ops/sequence_expand_as_op.h
+++ b/paddle/fluid/operators/sequence_ops/sequence_expand_as_op.h
@@ -46,13 +46,13 @@ struct SequenceExpandFunctor<platform::CPUDeviceContext, T> {
      const platform::CPUDeviceContext &context, const framework::LoDTensor &x,
      const framework::Vector<size_t> &ref_lod, /*expand referenced lod*/
      framework::LoDTensor *out) {
-    int64_t hight = x.dims()[0];
+    int64_t height = x.dims()[0];
-    int64_t width = framework::product(x.dims()) / hight;
+    int64_t width = framework::product(x.dims()) / height;
    const T *in_data = x.data<T>();
    T *out_data = out->mutable_data<T>(context.GetPlace());
-    for (int h_id = 0; h_id < hight; ++h_id) {
+    for (int h_id = 0; h_id < height; ++h_id) {
      size_t span = ref_lod[h_id + 1] - ref_lod[h_id];
      if (span == 0) continue;
      const T *src = in_data + h_id * width;
@@ -109,13 +109,13 @@ struct SequenceExpandAsGradFunctor<platform::CPUDeviceContext, T> {
      const framework::LoDTensor &dout,
      const framework::Vector<size_t> &ref_lod, /*expand referenced lod*/
      framework::LoDTensor *dx) {
-    int64_t hight = dx->dims()[0];
+    int64_t height = dx->dims()[0];
-    int64_t width = framework::product(dx->dims()) / hight;
+    int64_t width = framework::product(dx->dims()) / height;
    const T *dout_data = dout.data<T>();
    T *dx_data = dx->mutable_data<T>(context.GetPlace());
-    for (int64_t h_id = 0; h_id < hight; ++h_id) {
+    for (int64_t h_id = 0; h_id < height; ++h_id) {
      T *dst = dx_data + h_id * width;
      size_t span = ref_lod[h_id + 1] - ref_lod[h_id];
      for (int64_t w_id = 0; w_id < width; ++w_id) {

--- a/paddle/fluid/operators/sequence_ops/sequence_softmax_op.cu
+++ b/paddle/fluid/operators/sequence_ops/sequence_softmax_op.cu
@@ -115,7 +115,7 @@ struct SequenceSoftmaxFunctor<platform::CUDADeviceContext, T> {
                  const LoDTensor &x,
                  const framework::Vector<size_t> &ref_lod, /*referenced lod*/
                  LoDTensor *out) {
-    int hight = ref_lod.size() - 1;
+    int height = ref_lod.size() - 1;
    const int kThreadsPerBlock = 32;
    int thread_x = kThreadsPerBlock;
@@ -126,7 +126,7 @@ struct SequenceSoftmaxFunctor<platform::CUDADeviceContext, T> {
    dim3 grid_size(max_blocks);
    sequence_softmax_kernel<
        T, kThreadsPerBlock><<<grid_size, block_size, 0, context.stream()>>>(
-        x.data<T>(), ref_lod.CUDAData(context.GetPlace()), hight,
+        x.data<T>(), ref_lod.CUDAData(context.GetPlace()), height,
        out->mutable_data<T>(context.GetPlace()));
  }
 };
@@ -137,7 +137,7 @@ struct SequenceSoftmaxGradFunctor<platform::CUDADeviceContext, T> {
                  const LoDTensor &dout, const LoDTensor &out,
                  const framework::Vector<size_t> &ref_lod, /*referenced lod*/
                  LoDTensor *dx) {
-    size_t hight = ref_lod.size() - 1;
+    size_t height = ref_lod.size() - 1;
    const int kThreadsPerBlock = 32;
    int thread_x = kThreadsPerBlock;
@@ -150,7 +150,7 @@ struct SequenceSoftmaxGradFunctor<platform::CUDADeviceContext, T> {
    sequence_softmax_grad_kernel<
        T, kThreadsPerBlock><<<grid_size, block_size, 0, context.stream()>>>(
        dout.data<T>(), out.data<T>(), ref_lod.CUDAData(context.GetPlace()),
-        hight, dx->mutable_data<T>(context.GetPlace()));
+        height, dx->mutable_data<T>(context.GetPlace()));
  }
 };

--- a/paddle/fluid/operators/sequence_ops/sequence_softmax_op.h
+++ b/paddle/fluid/operators/sequence_ops/sequence_softmax_op.h
@@ -43,10 +43,10 @@ struct SequenceSoftmaxFunctor<platform::CPUDeviceContext, T> {
  void operator()(const platform::CPUDeviceContext &ctx, const LoDTensor &x,
                  const framework::Vector<size_t> &ref_lod, /*referenced lod*/
                  LoDTensor *out) {
-    size_t hight = ref_lod.size() - 1;
+    size_t height = ref_lod.size() - 1;
    const T *in_data = x.data<T>();
    T *out_data = out->mutable_data<T>(ctx.GetPlace());
-    for (size_t i = 0; i < hight; ++i) {
+    for (size_t i = 0; i < height; ++i) {
      size_t span = ref_lod[i + 1] - ref_lod[i];
      T result = 0;
      for (size_t j = 0; j < span; ++j) {
@@ -65,13 +65,13 @@ struct SequenceSoftmaxGradFunctor<platform::CPUDeviceContext, T> {
                  const LoDTensor &out,
                  const framework::Vector<size_t> &ref_lod, /*referenced lod*/
                  LoDTensor *dx) {
-    size_t hight = ref_lod.size() - 1;
+    size_t height = ref_lod.size() - 1;
    const T *softmax_grad_data = dout.data<T>();
    const T *softmax = out.data<T>();
    T *dx_data = dx->mutable_data<T>(ctx.GetPlace());
-    for (size_t i = 0; i < hight; ++i) {
+    for (size_t i = 0; i < height; ++i) {
      size_t span = ref_lod[i + 1] - ref_lod[i];
      T result = 0;
      for (size_t j = 0; j < span; ++j) {

--- a/paddle/fluid/operators/split_op.cc
+++ b/paddle/fluid/operators/split_op.cc
@@ -90,7 +90,7 @@ class SplitOpMaker : public framework::OpProtoAndCheckerMaker {
  void Make() override {
    AddInput("X", "(Tensor) Input tensor of the split operator.");
    AddInput("AxisTensor",
-             "(Tensor) The axis which the input will be splited on. "
+             "(Tensor) The axis which the input will be split on. "
             "It has higher priority than Attr(axis). "
             "The shape of AxisTensor must be [1]")
        .AsDispensable();
@@ -131,7 +131,7 @@ Example:
        .SetDefault(0);
    AddAttr<int>("axis",
                 "(int, default 0) "
-                 "The axis which the input will be splited on.")
+                 "The axis which the input will be split on.")
        .SetDefault(0);
  }
 };

--- a/paddle/fluid/operators/unfold_op.cc
+++ b/paddle/fluid/operators/unfold_op.cc
@@ -76,7 +76,7 @@ class UnfoldOp : public framework::OperatorWithKernel {
    // Only [N, C, H, W] input supported now
    PADDLE_ENFORCE(
        in_dims.size() == 4,
-        "Input shold be 4-D tensor of format [N, C, H, W], but get %u",
+        "Input should be 4-D tensor of format [N, C, H, W], but get %u",
        in_dims.size());
    PADDLE_ENFORCE(
        in_dims.size() - kernel_sizes.size() == 2U,
@@ -86,7 +86,7 @@ class UnfoldOp : public framework::OperatorWithKernel {
        in_dims.size(), kernel_sizes.size());
    PADDLE_ENFORCE_EQ(
        strides.size(), kernel_sizes.size(),
-        "The dims of strides shold be the same with that of kernel_sizes. "
+        "The dims of strides should be the same with that of kernel_sizes. "
        "But recieved dims(strides: %u) != dims(kernel_sizes: %u).",
        strides.size(), kernel_sizes.size());
    PADDLE_ENFORCE_EQ(
@@ -96,7 +96,7 @@ class UnfoldOp : public framework::OperatorWithKernel {
        paddings.size(), strides.size());
    PADDLE_ENFORCE_EQ(
        strides.size(), dilations.size(),
-        "The dims of strides shold be the same with that of dilations. "
+        "The dims of strides should be the same with that of dilations. "
        "But recieved dims(strides: %u) != dims(dilations: %u).",
        strides.size(), dilations.size());

--- a/python/paddle/compat.py
+++ b/python/paddle/compat.py
@@ -202,7 +202,7 @@ def round(x, d=0):
    """
    if six.PY3:
        # The official walkaround of round in Python3 is incorrect
-        # we implement accroding this answer: https://www.techforgeek.info/round_python.html
+        # we implement according this answer: https://www.techforgeek.info/round_python.html
        if x > 0.0:
            p = 10**d
            return float(math.floor((x * p) + math.copysign(0.5, x))) / p

--- a/python/paddle/dataset/cifar.py
+++ b/python/paddle/dataset/cifar.py
@@ -17,7 +17,7 @@ CIFAR dataset.
 This module will download dataset from https://dataset.bj.bcebos.com/cifar/cifar-10-python.tar.gz and https://dataset.bj.bcebos.com/cifar/cifar-100-python.tar.gz, parse train/test set into
 paddle reader creators.
-The CIFAR-10 dataset consists of 60000 32x32 colour images in 10 classes,
+The CIFAR-10 dataset consists of 60000 32x32 color images in 10 classes,
 with 6000 images per class. There are 50000 training images and 10000 test
 images.

--- a/python/paddle/dataset/image.py
+++ b/python/paddle/dataset/image.py
@@ -27,7 +27,7 @@ the image layout as follows.
  OpenCV use BGR color format. PIL use RGB color format. Both
  formats can be used for training. Noted that, the format should
-  be keep consistent between the training and inference peroid.
+  be keep consistent between the training and inference period.
 """
 from __future__ import print_function

--- a/python/paddle/dataset/wmt16.py
+++ b/python/paddle/dataset/wmt16.py
@@ -112,7 +112,7 @@ def reader_creator(tar_file, file_name, src_dict_size, trg_dict_size, src_lang):
        trg_dict = __load_dict(tar_file, trg_dict_size,
                               ("de" if src_lang == "en" else "en"))
-        # the indice for start mark, end mark, and unk are the same in source
+        # the index for start mark, end mark, and unk are the same in source
        # language and target language. Here uses the source language
        # dictionary to determine their indices.
        start_id = src_dict[START_MARK]

--- a/python/paddle/fluid/clip.py
+++ b/python/paddle/fluid/clip.py
@@ -192,7 +192,7 @@ class GradientClipByNorm(BaseGradientClipAttr):
    """
    Convert the input multidimensional Tensor :math:`X` to a multidimensional Tensor whose L2 norm does not exceed the given two-norm maximum ( :math:`clip\_norm` ).
-    The tensor is not passed through this class, but passed through the parametre of ``main_program`` in ``fluid.program_guard``.
+    The tensor is not passed through this class, but passed through the parameter of ``main_program`` in ``fluid.program_guard``.
    This class limits the L2 norm of the input :math:`X` within :math:`clip\_norm`.

--- a/python/paddle/fluid/contrib/layers/rnn_impl.py
+++ b/python/paddle/fluid/contrib/layers/rnn_impl.py
@@ -156,7 +156,7 @@ def basic_gru(input,
              dtype='float32',
              name='basic_gru'):
    """
-    GRU implementation using basic operator, supports multiple layers and bidirection gru.
+    GRU implementation using basic operator, supports multiple layers and bidirectional gru.
    .. math::
            u_t & = actGate(W_ux xu_{t} + W_uh h_{t-1} + b_u)
@@ -377,7 +377,7 @@ def basic_lstm(input,
               dtype='float32',
               name='basic_lstm'):
    """
-    LSTM implementation using basic operators, supports multiple layers and bidirection LSTM.
+    LSTM implementation using basic operators, supports multiple layers and bidirectional LSTM.
    .. math::
           i_t &= \sigma(W_{ix}x_{t} + W_{ih}h_{t-1} + b_i)

--- a/python/paddle/fluid/contrib/tests/test_image_classification_fp16.py
+++ b/python/paddle/fluid/contrib/tests/test_image_classification_fp16.py
@@ -236,7 +236,7 @@ def infer(use_cuda, save_dirname=None):
    inference_scope = fluid.core.Scope()
    with fluid.scope_guard(inference_scope):
        # Use fluid.io.load_inference_model to obtain the inference program desc,
-        # the feed_target_names (the names of variables that will be feeded
+        # the feed_target_names (the names of variables that will be fed
        # data using feed operators), and the fetch_targets (variables that
        # we want to obtain data from using fetch operators).
        [inference_program, feed_target_names,

--- a/python/paddle/fluid/contrib/utils/lookup_table_utils.py
+++ b/python/paddle/fluid/contrib/utils/lookup_table_utils.py
@@ -420,7 +420,7 @@ def get_inference_model(main_program, feeded_var_names, target_vars):
                                    build the inference model. If is set None,
                                    the default main program will be used.
                                    Default: None.
-        feeded_var_names(list[str]): Names of variables that need to be feeded data
+        feeded_var_names(list[str]): Names of variables that need to be fed data
                                     during inference.
        target_vars(list[Variable]): Variables from which we can get inference
                                     results.

--- a/python/paddle/fluid/data.py
+++ b/python/paddle/fluid/data.py
@@ -27,7 +27,7 @@ def data(name, shape, dtype='float32', lod_level=0):
    This function creates a variable on the global block. The global variable
    can be accessed by all the following operators in the graph. The variable
-    is a placeholder that could be feeded with input, such as Executor can feed
+    is a placeholder that could be fed with input, such as Executor can feed
    input into the variable.
    Note: 
@@ -35,8 +35,8 @@ def data(name, shape, dtype='float32', lod_level=0):
        future version. Please use this `paddle.fluid.data`. 
        The `paddle.fluid.layers.data` set shape and dtype at compile time but
-        does NOT check the shape or the dtype of feeded data, this
+        does NOT check the shape or the dtype of fed data, this
-        `paddle.fluid.data` checks the shape and the dtype of data feeded by
+        `paddle.fluid.data` checks the shape and the dtype of data fed by
        Executor or ParallelExecutor during run time.
        To feed variable size inputs, users can set -1 on the variable

--- a/python/paddle/fluid/dataset.py
+++ b/python/paddle/fluid/dataset.py
@@ -11,7 +11,7 @@
 # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 # See the License for the specific language governing permissions and
 # limitations under the License.
-"""This is defination of dataset class, which is high performance IO."""
+"""This is definition of dataset class, which is high performance IO."""
 from paddle.fluid.proto import data_feed_pb2
 from google.protobuf import text_format

--- a/python/paddle/fluid/executor.py
+++ b/python/paddle/fluid/executor.py
@@ -196,7 +196,7 @@ def dimension_is_compatible_with(first, second):
 def check_feed_shape_type(var, feed, num_places=1):
    """
    Returns True if the variable doesn't require feed check or it is compatible
-    with the shape and have same dtype as the feeded value.
+    with the shape and have same dtype as the fed value.
    A dimension is compatible with the other if:
    1. The length of the dimensions are same.
@@ -206,7 +206,7 @@ def check_feed_shape_type(var, feed, num_places=1):
    Args:
        var (Variable): the Variable object
-        feed (LoDTensor): the feeded value, which must be a LoDTensor
+        feed (LoDTensor): the fed value, which must be a LoDTensor
        num_places: an integer value indicating the number of places.
            ParallelExecutor will divide data into devices (CPU/GPU) evenly.
    Returns:
@@ -225,8 +225,8 @@ def check_feed_shape_type(var, feed, num_places=1):
                                num_places) if len(feed.lod()) == 0 else -1
        if not dimension_is_compatible_with(feed_shape, var.shape):
            raise ValueError(
-                'The feeded Variable %r should have dimensions = %d, shape = '
+                'The fed Variable %r should have dimensions = %d, shape = '
-                '%r, but received feeded shape %r on each device' %
+                '%r, but received fed shape %r on each device' %
                (var.name, len(var.shape), var.shape, feed_shape))
        if not dtype_is_compatible_with(feed._dtype(), var.dtype):
            var_dtype_format = convert_dtype(var.dtype) if isinstance(
@@ -234,8 +234,8 @@ def check_feed_shape_type(var, feed, num_places=1):
            feed_dtype_format = convert_dtype(feed._dtype()) if isinstance(
                feed._dtype(), core.VarDesc.VarType) else feed._dtype()
            raise ValueError(
-                'The data type of feeded Variable %r must be %r, but received %r'
+                'The data type of fed Variable %r must be %r, but received %r' %
-                % (var.name, var_dtype_format, feed_dtype_format))
+                (var.name, var_dtype_format, feed_dtype_format))
    return True

--- a/python/paddle/fluid/incubate/fleet/parameter_server/pslib/__init__.py
+++ b/python/paddle/fluid/incubate/fleet/parameter_server/pslib/__init__.py
@@ -448,7 +448,7 @@ class PSLib(Fleet):
                    model_proto_file(str): path of program desc proto binary
                                           file, can be local or hdfs/afs file
                    var_names(list): var name list
-                    load_combine(bool): load from a file or splited param files
+                    load_combine(bool): load from a file or split param files
                                        default False.
        Examples:
@@ -502,7 +502,7 @@ class PSLib(Fleet):
            model_proto_file(str): path of program desc proto binary file,
                                   can be local or hdfs/afs file
            var_names(list): load var names
-            load_combine(bool): load from a file or splited param files
+            load_combine(bool): load from a file or split param files
        """
        self._role_maker._barrier_worker()

--- a/python/paddle/fluid/io.py
+++ b/python/paddle/fluid/io.py
@@ -1043,7 +1043,7 @@ def save_inference_model(dirname,
    Args:
        dirname(str): The directory path to save the inference model.
-        feeded_var_names(list[str]): list of string. Names of variables that need to be feeded
+        feeded_var_names(list[str]): list of string. Names of variables that need to be fed
                                     data during inference.
        target_vars(list[Variable]): list of Variable. Variables from which we can get 
                                     inference results.

--- a/python/paddle/fluid/layer_helper_base.py
+++ b/python/paddle/fluid/layer_helper_base.py
@@ -285,7 +285,7 @@ class LayerHelperBase(object):
           Args:
               attr: [ParamAttr] should be the parameter attribute for this parameter
-               shape: shape of the paramter
+               shape: shape of the parameter
               dtype: data type of this parameter
               is_bias: if this is a bias parameter
               default_initializer: set the default initializer for this parameter

--- a/python/paddle/fluid/layers/io.py
+++ b/python/paddle/fluid/layers/io.py
@@ -56,8 +56,8 @@ def data(name,
        a later version. Please use :code:`paddle.fluid.data` .
        This :code:`paddle.fluid.layers.data` set shape and dtype at compile
-        time but does NOT check the shape or the dtype of feeded data, the
+        time but does NOT check the shape or the dtype of fed data, the
-        :code:`paddle.fluid.data` checks the shape and the dtype of data feeded 
+        :code:`paddle.fluid.data` checks the shape and the dtype of data fed 
        by Executor or ParallelExecutor during run time.
        To feed variable size inputs, users can feed variable size inputs
@@ -760,7 +760,7 @@ def create_py_reader_by_data(capacity,
          reader.decorate_paddle_reader(
              paddle.reader.shuffle(paddle.batch(mnist.train(), batch_size=5), buf_size=500))
          img, label = fluid.layers.read_file(reader)
-          loss = network(img, label) # The definition of custom network and the loss funtion
+          loss = network(img, label) # The definition of custom network and the loss function
          place = fluid.CUDAPlace(0) if USE_CUDA else fluid.CPUPlace()
          exe = fluid.Executor(place)

--- a/python/paddle/fluid/layers/nn.py
+++ b/python/paddle/fluid/layers/nn.py
@@ -4914,7 +4914,7 @@ def transpose(x, perm, name=None):
    Args:
        x (Variable): The input Tensor. It is a N-D Tensor of data types float32, float64, int32.
-        perm (list): Permute the input accoring to the data of perm.
+        perm (list): Permute the input according to the data of perm.
        name (str): The name of this layer. It is optional.
    Returns:
@@ -5488,7 +5488,7 @@ def reshape(x, shape, actual_shape=None, act=None, inplace=False, name=None):
    be set -1.
    2. 0 means the actual dimension value is going to be copied from the
-    corresponding dimension of x. The indice of 0s in shape can not exceed
+    corresponding dimension of x. The index of 0s in shape can not exceed
    the dimension of x.
    Here are some examples to explain it.
@@ -6484,7 +6484,7 @@ def image_resize(input,
    The input must be a 4-D Tensor of the shape (num_batches, channels, in_h, in_w) 
    or (num_batches, in_h, in_w, channels), or a 5-D Tensor of the shape 
    (num_batches, channels, in_d, in_h, in_w) or (num_batches, in_d, in_h, in_w, channels), 
-    and the resizing only applies on the three dimensions(depth, hight and width).
+    and the resizing only applies on the three dimensions(depth, height and width).
    **Warning:** the parameter :attr:`actual_shape` will be deprecated in the
    future and only use :attr:`out_shape` instead.
@@ -8396,7 +8396,7 @@ def pad2d(input,
          data_format="NCHW",
          name=None):
    """
-    Pad 2-d images accordding to 'paddings' and 'mode'.
+    Pad 2-d images according to 'paddings' and 'mode'.
    If mode is 'reflect', paddings[0] and paddings[1] must be no greater
    than height-1. And the width dimension has the same condition.
@@ -8418,7 +8418,7 @@ def pad2d(input,
        name (str, optional) : The default value is None.  Normally there is no need for
                    user to set this property.  For more information, please refer to :ref:`api_guide_Name` .
-    Returns: a 4-D Tensor padded accordding to paddings and mode and data type is same as input.
+    Returns: a 4-D Tensor padded according to paddings and mode and data type is same as input.
    Return Type: Variable
@@ -13346,7 +13346,7 @@ def unfold(x, kernel_sizes, strides=1, paddings=0, dilations=1, name=None):
                                  [padding_h, padding_w, padding_h, padding_w]. If an integer
                                  padding was given, [padding, padding, padding, padding] will
                                  be used. For default, paddings will be [0, 0, 0, 0]
-        dilations(int|list):      the dilations of convolution kernel, shold be
+        dilations(int|list):      the dilations of convolution kernel, should be
                                  [dilation_h, dilation_w], or an integer dilation treated as
                                  [dilation, dilation]. For default, it will be [1, 1].
        name(str, optional): The default value is None.  

--- a/python/paddle/fluid/layers/rnn.py
+++ b/python/paddle/fluid/layers/rnn.py
@@ -2469,10 +2469,10 @@ def dynamic_gru(input,
            See usage for details in :ref:`api_fluid_ParamAttr` .
        is_reverse(bool, optional): Whether to compute in the reversed order of
            input sequences. Default False.
-        gate_activation(str, optional): The activation fuction corresponding to
+        gate_activation(str, optional): The activation function corresponding to
            :math:`act_g` in the formula. "sigmoid", "tanh", "relu" and "identity"
            are supported. Default "sigmoid".
-        candidate_activation(str, optional): The activation fuction corresponding to
+        candidate_activation(str, optional): The activation function corresponding to
            :math:`act_c` in the formula. "sigmoid", "tanh", "relu" and "identity"
            are supported. Default "tanh".
        h_0 (Variable, optional): A Tensor representing the initial hidden state.
@@ -2618,10 +2618,10 @@ def gru_unit(input,
        bias_attr (ParamAttr, optional): To specify the bias parameter property.
            Default: None, which means the default bias parameter property is used.
            See usage for details in :ref:`api_fluid_ParamAttr` .
-        activation(str, optional): The activation fuction corresponding to
+        activation(str, optional): The activation function corresponding to
            :math:`act_c` in the formula. "sigmoid", "tanh", "relu" and "identity"
            are supported. Default "tanh".
-        gate_activation(str, optional): The activation fuction corresponding to
+        gate_activation(str, optional): The activation function corresponding to
            :math:`act_g` in the formula. "sigmoid", "tanh", "relu" and "identity"
            are supported. Default "sigmoid".
@@ -2746,7 +2746,7 @@ def beam_search(pre_ids,
            `[batch_size * beam_size, K]`, where `K` supposed to be greater than
            ``beam_size`` and the first dimension size (decrease as samples reach
            to the end) should be same as that of ``pre_ids`` . The data type
-            should be int64. It can be None, which use indice in ``scores`` as
+            should be int64. It can be None, which use index in ``scores`` as
            ids.
        scores(Variable): A LodTensor variable containing the accumulated
            scores corresponding to ``ids`` . Both its shape and lod are same as
@@ -2765,7 +2765,7 @@ def beam_search(pre_ids,
            to :ref:`api_guide_Name`. Usually name is no need to set and 
            None by default.
        return_parent_idx(bool, optional): Whether to return an extra Tensor variable
-            in output, which stores the selected ids' parent indice in
+            in output, which stores the selected ids' parent index in
            ``pre_ids`` and can be used to update RNN's states by gather operator.
            Default False.
@@ -2774,7 +2774,7 @@ def beam_search(pre_ids,
            representing the selected ids and the corresponding accumulated scores of \
            current step, have the same shape `[batch_size, beam_size]` and lod with 2 levels, \
            and have data types int64 and float32. If ``return_parent_idx`` is True, \
-            an extra Tensor variable preserving the selected ids' parent indice \
+            an extra Tensor variable preserving the selected ids' parent index \
            is included, whose shape is `[batch_size * beam_size]` and data type \
            is int64.

--- a/python/paddle/fluid/layers/tensor.py
+++ b/python/paddle/fluid/layers/tensor.py
@@ -668,7 +668,7 @@ def fill_constant_batch_size_like(input,
                                  output_dim_idx=0,
                                  force_cpu=False):
    """
-    This OP creates a Tesnor accroding the shape and dtype, and initializes the
+    This OP creates a Tesnor according the shape and dtype, and initializes the
    Tensor with the constants provided in ``value``. When the input is LoDTensor
    and the input_dim_idx is 0, the output_dim_idx dimension is set to the value
    of the batch_size input by the input, the Stop_gradient attribute of the created

--- a/python/paddle/fluid/metrics.py
+++ b/python/paddle/fluid/metrics.py
@@ -261,7 +261,7 @@ class CompositeMetric(MetricBase):
        Returns:
            list: results of all added metrics. 
-            The shape and dtype of each result depend on the defination of its metric.
+            The shape and dtype of each result depend on the definition of its metric.
        """
        ans = []
        for m in self._metrics:

--- a/python/paddle/fluid/optimizer.py
+++ b/python/paddle/fluid/optimizer.py
@@ -3378,10 +3378,10 @@ class PipelineOptimizer(object):
    """
    Pipeline Optimizer
-    Train with pipeline mode. The program will be splited by cut_list. 
+    Train with pipeline mode. The program will be split by cut_list. 
    If the len of cut_list is k, then the whole program (including \
-    backward part) will be splited to 2*k-1 sections. 
+    backward part) will be split to 2*k-1 sections. 
    So the length of place_list and concurrency_list must be also 2*k-1.

--- a/python/paddle/fluid/parallel_executor.py
+++ b/python/paddle/fluid/parallel_executor.py
@@ -287,7 +287,7 @@ class ParallelExecutor(object):
                                                 loss_name=loss.name)
              # If the feed is a dict:
-              # the image will be splitted into devices. If there is two devices
+              # the image will be split into devices. If there is two devices
              # each device will process an image with shape (5, 1)
              x = numpy.random.random(size=(10, 1)).astype('float32')
              loss_data, = train_exe.run(feed={"X": x},

--- a/python/paddle/fluid/reader.py
+++ b/python/paddle/fluid/reader.py
@@ -125,7 +125,7 @@ class DataLoader(object):
                presented as a list. It is only valid when iterable=True. 
                If return_list=False, the return value on each device would 
                be a dict of str -> LoDTensor, where the key of the dict is 
-                the name of each feeded variables. If return_list=True, the 
+                the name of each fed variables. If return_list=True, the 
                return value on each device would be a list(LoDTensor). It is
                recommended to use return_list=False in static graph mode and
                use return_list=True in dygraph mode.  
@@ -891,7 +891,7 @@ class PyReader(DataLoaderBase):
            presented as a list. It is only valid when iterable=True. 
            If return_list=False, the return value on each device would 
            be a dict of str -> LoDTensor, where the key of the dict is 
-            the name of each feeded variables. If return_list=True, the 
+            the name of each fed variables. If return_list=True, the 
            return value on each device would be a list(LoDTensor). It is
            recommended to use return_list=False in static graph mode and
            use return_list=True in dygraph mode. 

--- a/python/paddle/fluid/tests/book/high-level-api/cifar10_small_test_set.py
+++ b/python/paddle/fluid/tests/book/high-level-api/cifar10_small_test_set.py
@@ -18,7 +18,7 @@ This module will download dataset from
 https://www.cs.toronto.edu/~kriz/cifar.html and parse train/test set into
 paddle reader creators.
-The CIFAR-10 dataset consists of 60000 32x32 colour images in 10 classes,
+The CIFAR-10 dataset consists of 60000 32x32 color images in 10 classes,
 with 6000 images per class. There are 50000 training images and 10000 test
 images.

--- a/python/paddle/fluid/tests/book/high-level-api/test_fit_a_line_new_api.py
+++ b/python/paddle/fluid/tests/book/high-level-api/test_fit_a_line_new_api.py
@@ -121,7 +121,7 @@ def infer_by_saved_model(use_cuda, save_dirname=None):
    inference_scope = fluid.core.Scope()
    with fluid.scope_guard(inference_scope):
        # Use fluid.io.load_inference_model to obtain the inference program desc,
-        # the feed_target_names (the names of variables that will be feeded
+        # the feed_target_names (the names of variables that will be fed
        # data using feed operators), and the fetch_targets (variables that
        # we want to obtain data from using fetch operators).
        [inference_program, feed_target_names,

--- a/python/paddle/fluid/tests/book/notest_understand_sentiment.py
+++ b/python/paddle/fluid/tests/book/notest_understand_sentiment.py
@@ -213,7 +213,7 @@ def infer(word_dict, use_cuda, save_dirname=None):
    inference_scope = fluid.core.Scope()
    with fluid.scope_guard(inference_scope):
        # Use fluid.io.load_inference_model to obtain the inference program desc,
-        # the feed_target_names (the names of variables that will be feeded
+        # the feed_target_names (the names of variables that will be fed
        # data using feed operators), and the fetch_targets (variables that
        # we want to obtain data from using fetch operators).
        [inference_program, feed_target_names,

--- a/python/paddle/fluid/tests/book/test_fit_a_line.py
+++ b/python/paddle/fluid/tests/book/test_fit_a_line.py
@@ -103,7 +103,7 @@ def infer(use_cuda, save_dirname=None):
    inference_scope = fluid.core.Scope()
    with fluid.scope_guard(inference_scope):
        # Use fluid.io.load_inference_model to obtain the inference program desc,
-        # the feed_target_names (the names of variables that will be feeded
+        # the feed_target_names (the names of variables that will be fed
        # data using feed operators), and the fetch_targets (variables that
        # we want to obtain data from using fetch operators).
        [inference_program, feed_target_names,

--- a/python/paddle/fluid/tests/book/test_image_classification.py
+++ b/python/paddle/fluid/tests/book/test_image_classification.py
@@ -210,7 +210,7 @@ def infer(use_cuda, save_dirname=None):
    inference_scope = fluid.core.Scope()
    with fluid.scope_guard(inference_scope):
        # Use fluid.io.load_inference_model to obtain the inference program desc,
-        # the feed_target_names (the names of variables that will be feeded
+        # the feed_target_names (the names of variables that will be fed
        # data using feed operators), and the fetch_targets (variables that
        # we want to obtain data from using fetch operators).
        [inference_program, feed_target_names,

--- a/python/paddle/fluid/tests/book/test_label_semantic_roles.py
+++ b/python/paddle/fluid/tests/book/test_label_semantic_roles.py
@@ -247,7 +247,7 @@ def infer(use_cuda, save_dirname=None):
    inference_scope = fluid.core.Scope()
    with fluid.scope_guard(inference_scope):
        # Use fluid.io.load_inference_model to obtain the inference program desc,
-        # the feed_target_names (the names of variables that will be feeded
+        # the feed_target_names (the names of variables that will be fed
        # data using feed operators), and the fetch_targets (variables that
        # we want to obtain data from using fetch operators).
        [inference_program, feed_target_names,

--- a/python/paddle/fluid/tests/book/test_recommender_system.py
+++ b/python/paddle/fluid/tests/book/test_recommender_system.py
@@ -254,7 +254,7 @@ def infer(use_cuda, save_dirname=None):
    inference_scope = fluid.core.Scope()
    with fluid.scope_guard(inference_scope):
        # Use fluid.io.load_inference_model to obtain the inference program desc,
-        # the feed_target_names (the names of variables that will be feeded
+        # the feed_target_names (the names of variables that will be fed
        # data using feed operators), and the fetch_targets (variables that
        # we want to obtain data from using fetch operators).
        [inference_program, feed_target_names,

--- a/python/paddle/fluid/tests/book/test_rnn_encoder_decoder.py
+++ b/python/paddle/fluid/tests/book/test_rnn_encoder_decoder.py
@@ -208,7 +208,7 @@ def infer(use_cuda, save_dirname=None):
    inference_scope = fluid.core.Scope()
    with fluid.scope_guard(inference_scope):
        # Use fluid.io.load_inference_model to obtain the inference program desc,
-        # the feed_target_names (the names of variables that will be feeded
+        # the feed_target_names (the names of variables that will be fed
        # data using feed operators), and the fetch_targets (variables that
        # we want to obtain data from using fetch operators).
        [inference_program, feed_target_names,

--- a/python/paddle/fluid/tests/book/test_word2vec.py
+++ b/python/paddle/fluid/tests/book/test_word2vec.py
@@ -151,7 +151,7 @@ def infer(use_cuda, save_dirname=None):
    inference_scope = fluid.core.Scope()
    with fluid.scope_guard(inference_scope):
        # Use fluid.io.load_inference_model to obtain the inference program desc,
-        # the feed_target_names (the names of variables that will be feeded
+        # the feed_target_names (the names of variables that will be fed
        # data using feed operators), and the fetch_targets (variables that
        # we want to obtain data from using fetch operators).
        [inference_program, feed_target_names,

--- a/python/paddle/fluid/tests/unittests/dist_transformer.py
+++ b/python/paddle/fluid/tests/unittests/dist_transformer.py
@@ -272,7 +272,7 @@ class LearningRateScheduler(object):
    """
    Wrapper for learning rate scheduling as described in the Transformer paper.
    LearningRateScheduler adapts the learning rate externally and the adapted
-    learning rate will be feeded into the main_program as input data.
+    learning rate will be fed into the main_program as input data.
    """
    def __init__(self,

--- a/python/paddle/fluid/tests/unittests/ir/pass_test.py
+++ b/python/paddle/fluid/tests/unittests/ir/pass_test.py
@@ -130,7 +130,7 @@ class PassTest(unittest.TestCase):
        if startup_on_cpu and not isinstance(place, fluid.CPUPlace):
            warnings.warn(
-                "Parameters are on CPU, and will be transfered to GPU "
+                "Parameters are on CPU, and will be transferred to GPU "
                "automatically by data transform.")
        outs_opt, lods_opt = self._run_program(executor, opt_program)

--- a/python/paddle/fluid/tests/unittests/test_dist_transpiler.py
+++ b/python/paddle/fluid/tests/unittests/test_dist_transpiler.py
@@ -118,7 +118,7 @@ class TestBasicModel(TranspilerTest):
        trainer, trainer_startup = self.get_trainer()
-        # splited var blocks should be in startup program
+        # split var blocks should be in startup program
        self.assertTrue("fc_w.block0" in trainer_startup.global_block().vars)
        self.assertTrue("fc_w.block1" in trainer_startup.global_block().vars)
        self.assertTrue("fc_w" in trainer_startup.global_block().vars)

--- a/python/paddle/fluid/tests/unittests/test_feed_data_check_shape_type.py
+++ b/python/paddle/fluid/tests/unittests/test_feed_data_check_shape_type.py
@@ -46,7 +46,7 @@ class TestFeedData(unittest.TestCase):
    def _get_feed_batch_size(self, use_cuda, use_parallel_executor):
        """
-        Returns actual feeded data size. We should multiple the number of
+        Returns actual fed data size. We should multiple the number of
        devices when it is using ParallelExecutor
        """
        return self.data_batch_size * self._get_device_count(
@@ -100,8 +100,8 @@ class TestFeedData(unittest.TestCase):
                                                        use_parallel_executor)
                self.assertEqual(
                    str(shape_mismatch_err.exception),
-                    "The feeded Variable %r should have dimensions = %r, "
+                    "The fed Variable %r should have dimensions = %r, "
-                    "shape = %r, but received feeded shape %r on each device" %
+                    "shape = %r, but received fed shape %r on each device" %
                    (u'data', len(in_shape_tuple), in_shape_tuple,
                     error_shape_list))
@@ -110,7 +110,7 @@ class TestFeedData(unittest.TestCase):
                                                        use_parallel_executor)
                self.assertEqual(
                    str(dtype_mismatch_err.exception),
-                    "The data type of feeded Variable %r must be 'int64', but "
+                    "The data type of fed Variable %r must be 'int64', but "
                    "received 'float64'" % (u'label'))
    def _test_feed_data_dtype_mismatch(self, use_cuda, use_parallel_executor):

--- a/python/paddle/fluid/tests/unittests/test_static_save_load.py
+++ b/python/paddle/fluid/tests/unittests/test_static_save_load.py
@@ -905,7 +905,7 @@ class TestLoadFromOldInterface(unittest.TestCase):
            with self.assertRaises(RuntimeError):
                fluid.load(main_program, "test_path", exe)
-            # check unused paramter
+            # check unused parameter
            fluid.load(test_clone_program, "test_path", exe)

--- a/python/paddle/fluid/transpiler/distribute_transpiler.py
+++ b/python/paddle/fluid/transpiler/distribute_transpiler.py
@@ -16,11 +16,11 @@ from __future__ import print_function
 """
 Steps to transpile trainer:
 1. split variable to multiple blocks, aligned by product(dim[1:]) (width).
-2. rename splited grad variables to add trainer_id suffix ".trainer_%d".
+2. rename split grad variables to add trainer_id suffix ".trainer_%d".
 3. modify trainer program add split_op to each grad variable.
-4. append send_op to send splited variables to server and
+4. append send_op to send split variables to server and
-5. add recv_op to fetch params(splited blocks or origin param) from server.
+5. add recv_op to fetch params(split blocks or origin param) from server.
-6. append concat_op to merge splited blocks to update local weights.
+6. append concat_op to merge split blocks to update local weights.
 Steps to transpile pserver:
 1. create new program for parameter server.
@@ -106,7 +106,7 @@ def slice_variable(var_list, slice_count, min_block_size):
        var_list (list): List of variables.
        slice_count (int): Numel of count that variables will be sliced, which
            could be the pserver services' count.
-        min_block_size (int): Minimum splitted block size.
+        min_block_size (int): Minimum split block size.
    Returns:
        blocks (list[(varname, block_id, current_block_size)]): A list
            of VarBlocks. Each VarBlock specifies a shard of the var.
@@ -157,7 +157,7 @@ class DistributeTranspilerConfig(object):
    .. py:attribute:: min_block_size (int)
-          Minimum number of splitted elements in block, default is 8192.
+          Minimum number of split elements in block, default is 8192.
          According to : https://github.com/PaddlePaddle/Paddle/issues/8638#issuecomment-369912156
          We can use bandwidth efficiently when data size is larger than 2MB.If you
@@ -667,8 +667,8 @@ WIKI: https://github.com/PaddlePaddle/Fleet/blob/develop/markdown_doc/transpiler
        self.origin_program._is_chief = self.trainer_id == 0
        self.origin_program._distributed_lookup_table = self.table_name if self.table_name else None
-        # split and create vars, then put splited vars in dicts for later use.
+        # split and create vars, then put split vars in dicts for later use.
-        # step 1: split and create vars, then put splited vars in dicts for later use.
+        # step 1: split and create vars, then put split vars in dicts for later use.
        self._init_splited_vars()
        # step 2: insert send op to send gradient vars to parameter servers
@@ -742,8 +742,8 @@ WIKI: https://github.com/PaddlePaddle/Fleet/blob/develop/markdown_doc/transpiler
                sections = []
                send_varnames = []
-            # get send op_role_var, if not splited, the grad should have .trainer suffix
+            # get send op_role_var, if not split, the grad should have .trainer suffix
-            # if splited, grad should be the original grad var name (split_by_ref and send
+            # if split, grad should be the original grad var name (split_by_ref and send
            # will be on the same place). ParallelExecutor
            # will use op_role_var to get expected device place to run this op.
            program.global_block()._insert_op(
@@ -860,8 +860,8 @@ WIKI: https://github.com/PaddlePaddle/Fleet/blob/develop/markdown_doc/transpiler
                recv_dep_in = self.grad_name_to_send_dummy_out[
                    self.param_name_to_grad_name[param_varname]]
-            # get recv op_role_var, if not splited, the grad should have .trainer suffix
+            # get recv op_role_var, if not split, the grad should have .trainer suffix
-            # if splited, grad should be the original grad var name. ParallelExecutor
+            # if split, grad should be the original grad var name. ParallelExecutor
            # will use op_role_var to get expected device place to run this op.
            orig_grad_name = self.param_name_to_grad_name[param_varname]
            recv_op_role_var_name = orig_grad_name
@@ -1120,7 +1120,7 @@ WIKI: https://github.com/PaddlePaddle/Fleet/blob/develop/markdown_doc/transpiler
        for varname, splited_var in six.iteritems(self.param_var_mapping):
            if varname in sparse_table_names:
                continue
-            # add concat ops to merge splited parameters received from parameter servers.
+            # add concat ops to merge split parameters received from parameter servers.
            if len(splited_var) <= 1:
                continue
            # NOTE: if enable memory optimization, origin vars maybe removed.
@@ -1670,8 +1670,8 @@ WIKI: https://github.com/PaddlePaddle/Fleet/blob/develop/markdown_doc/transpiler
    def _init_splited_vars(self):
        # update these mappings for further transpile:
-        # 1. param_var_mapping: param var name -> [splited params vars]
+        # 1. param_var_mapping: param var name -> [split params vars]
-        # 2. grad_var_mapping: grad var name -> [splited grads vars]
+        # 2. grad_var_mapping: grad var name -> [split grads vars]
        # 3. grad_param_mapping: grad.blockx -> param.blockx
        # 4. param_grad_ep_mapping: ep -> {"params": [], "grads": []}
@@ -1966,7 +1966,7 @@ WIKI: https://github.com/PaddlePaddle/Fleet/blob/develop/markdown_doc/transpiler
                outputs={"Out": [grad_var]},
                attrs={"use_mkldnn": False})
        else:
-            # in async_mode, for table gradient, it also need to be splited to each parameter server
+            # in async_mode, for table gradient, it also need to be split to each parameter server
            origin_grad_name = grad_var.name
            splited_grad_name = self.trainer_side_table_grad_list[
                pserver_index].name
@@ -2040,9 +2040,9 @@ WIKI: https://github.com/PaddlePaddle/Fleet/blob/develop/markdown_doc/transpiler
                block_map[varname] = []
            block_map[varname].append((int(offset), int(size)))
-        for varname, splited in six.iteritems(block_map):
+        for varname, split in six.iteritems(block_map):
            orig_var = program.global_block().var(varname)
-            if len(splited) == 1:
+            if len(split) == 1:
                if self.sync_mode and add_trainer_suffix:
                    new_var_name = "%s.trainer_%d" % \
                                   (orig_var.name, self.trainer_id)
@@ -2059,7 +2059,7 @@ WIKI: https://github.com/PaddlePaddle/Fleet/blob/develop/markdown_doc/transpiler
            if len(orig_shape) >= 2:
                orig_dim1_flatten = reduce(lambda x, y: x * y, orig_shape[1:])
-            for i, block in enumerate(splited):
+            for i, block in enumerate(split):
                size = block[1]
                rows = size // orig_dim1_flatten
                splited_shape = [rows]
@@ -2077,7 +2077,7 @@ WIKI: https://github.com/PaddlePaddle/Fleet/blob/develop/markdown_doc/transpiler
                    persistable=False,
                    dtype=orig_var.dtype,
                    type=orig_var.type,
-                    shape=splited_shape)  # flattend splited var
+                    shape=splited_shape)  # flattend split var
                var_mapping[varname].append(var)
            program.global_block()._sync_with_cpp()
        return var_mapping
@@ -2393,9 +2393,9 @@ WIKI: https://github.com/PaddlePaddle/Fleet/blob/develop/markdown_doc/transpiler
        if the variable is not grad/param, e.g.
            a@GRAD -> a@GRAD.block0
-            a@GRAD -> a@GRAD (a is not splited)
+            a@GRAD -> a@GRAD (a is not split)
            fc_0.w_0 -> fc_0.w_0.block_0
-            fc_0.w_0 -> fc_0.w_0 (weight is not splited)
+            fc_0.w_0 -> fc_0.w_0 (weight is not split)
            _generated_var_123 -> None
        """
        grad_block = None
@@ -2403,7 +2403,7 @@ WIKI: https://github.com/PaddlePaddle/Fleet/blob/develop/markdown_doc/transpiler
            if self._orig_varname(g.name) == self._orig_varname(var.name):
                # skip per trainer vars
                if g.name.find(".trainer_") == -1:
-                    # only param or grads have splited blocks
+                    # only param or grads have split blocks
                    if self._orig_varname(g.name) in self.grad_name_to_param_name or \
                            self._orig_varname(g.name) in self.param_name_to_grad_name:
                        grad_block = g
@@ -2442,7 +2442,7 @@ WIKI: https://github.com/PaddlePaddle/Fleet/blob/develop/markdown_doc/transpiler
                varlist = [varlist]
            for i in range(len(varlist)):
                var = varlist[i]
-                # for ops like clipping and weight decay, get the splited var (xxx.block0)
+                # for ops like clipping and weight decay, get the split var (xxx.block0)
                # for inputs/outputs
                grad_block = self._get_pserver_grad_param_var(
                    var, program.global_block().vars)

--- a/python/paddle/fluid/transpiler/geo_sgd_transpiler.py
+++ b/python/paddle/fluid/transpiler/geo_sgd_transpiler.py
@@ -108,15 +108,15 @@ class GeoSgdTranspiler(DistributeTranspiler):
        self.sparse_var_list = []
        self.sparse_var_splited_list = []
-        # split and create vars, then put splited vars in dicts for later use.
+        # split and create vars, then put split vars in dicts for later use.
-        # step 1. split and create vars, then put splited vars in dicts for later use.
+        # step 1. split and create vars, then put split vars in dicts for later use.
        self._init_splited_vars()
        # step 3. create send recv var (param after optimize)
        send_vars = []
        ps_dispatcher.reset()
        param_var_mapping_items = list(six.iteritems(self.param_var_mapping))
-        # send_vars is the parameter which splited by communicator and send to pserver,not the origin parameter
+        # send_vars is the parameter which split by communicator and send to pserver,not the origin parameter
        for _, splited_vars in param_var_mapping_items:
            for _, var in enumerate(splited_vars):
                send_vars.append(var)
@@ -292,7 +292,7 @@ class GeoSgdTranspiler(DistributeTranspiler):
                                      len(self.pserver_endpoints),
                                      self.config.min_block_size)
-        # step 3. Create splited param from split blocks
+        # step 3. Create split param from split blocks
        # origin_param_name -> [splited_param_vars]
        # Todo: update _create_vars_from_blocklist
        self.param_var_mapping = self._create_vars_from_blocklist(

--- a/python/paddle/utils/preprocess_util.py
+++ b/python/paddle/utils/preprocess_util.py
@@ -301,9 +301,9 @@ class DatasetCreater(object):
        Create a data set object from a path.
        It will use directory structure or a file list to determine dataset if
        self.from_list is True. Otherwise, it will uses a file list  to
-        determine the datset.
+        determine the dataset.
        path: the path of the dataset.
-        return a tuple of Dataset object, and a mapping from lable set
+        return a tuple of Dataset object, and a mapping from label set
        to label id.
        """
        if self.from_list:
@@ -314,9 +314,9 @@ class DatasetCreater(object):
    def create_dataset_from_list(self, path):
        """
        Create a data set object from a path.
-        It will uses a file list to determine the datset.
+        It will uses a file list to determine the dataset.
        path: the path of the dataset.
-        return a tuple of Dataset object, and a mapping from lable set
+        return a tuple of Dataset object, and a mapping from label set
        to label id
        """
        raise NotImplementedError
@@ -327,7 +327,7 @@ class DatasetCreater(object):
        It will use directory structure or a file list to determine dataset if
        self.from_list is True.
        path: the path of the dataset.
-        return a tuple of Dataset object, and a mapping from lable set
+        return a tuple of Dataset object, and a mapping from label set
        to label id
        """
        raise NotImplementedError

--- a/tools/check_api_approvals.sh
+++ b/tools/check_api_approvals.sh
 #!/bin/bash
 if [ -z ${BRANCH} ]; then
    BRANCH="develop"
 fi