diff --git a/cmake/configure.cmake b/cmake/configure.cmake
index 72425f3f485b3fa547ad37b982b9c3bdb8756315..14a8bede08711f8106f6a8fbb5e86e32aea4499b 100644
--- a/cmake/configure.cmake
+++ b/cmake/configure.cmake
@@ -48,7 +48,7 @@ if(WIN32)
   SET(CMAKE_C_RESPONSE_FILE_LINK_FLAG "@")
   SET(CMAKE_CXX_RESPONSE_FILE_LINK_FLAG "@")
 
-  # set defination for the dll export
+  # set definition for the dll export
   if (NOT MSVC)
     message(FATAL "Windows build only support msvc. Which was binded by the nvcc compiler of NVIDIA.")
   endif(NOT MSVC)
diff --git a/cmake/third_party.cmake b/cmake/third_party.cmake
index 950bd4ca0972f15cd4747ecf967ecc258fbf525e..bd59801b950ab099fdf88cbd6f5843fd3b95f09c 100644
--- a/cmake/third_party.cmake
+++ b/cmake/third_party.cmake
@@ -174,7 +174,7 @@ if(${CMAKE_VERSION} VERSION_GREATER "3.5.2")
     set(SHALLOW_CLONE "GIT_SHALLOW TRUE") # adds --depth=1 arg to git clone of External_Projects
 endif()
 
-########################### include third_party accoring to flags ###############################
+########################### include third_party according to flags ###############################
 include(external/zlib)      # download, build, install zlib
 include(external/gflags)    # download, build, install gflags
 include(external/glog)      # download, build, install glog
diff --git a/paddle/fluid/framework/ir/multi_devices_graph_pass/multi_devices_graph_pass.cc b/paddle/fluid/framework/ir/multi_devices_graph_pass/multi_devices_graph_pass.cc
index 935931b8150373c7cc26793252918bb098ed55df..d47ac6bae7484a366162d003277bfaf96edb29d2 100644
--- a/paddle/fluid/framework/ir/multi_devices_graph_pass/multi_devices_graph_pass.cc
+++ b/paddle/fluid/framework/ir/multi_devices_graph_pass/multi_devices_graph_pass.cc
@@ -857,7 +857,7 @@ int DistSSAGraphBuilder::CreateRPCOp(ir::Graph *result, ir::Node *node) const {
     op_dev_id = GetVarDeviceID(node->inputs[0]->Name());
     PADDLE_ENFORCE(!ir::IsControlDepVar(*node->inputs[0]),
                    "This hack no longer holds, please fix.");
-    // the variable name which contains .block means it was splited by
+    // the variable name which contains .block means it was split by
     // split_byref op
     if (strategy_.reduce_ ==
             details::BuildStrategy::ReduceStrategy::kAllReduce &&
diff --git a/paddle/fluid/framework/operator.cc b/paddle/fluid/framework/operator.cc
index 6ffe3d87136c483d3930b45b01ed4388d09e118e..d64f3bf8bb71dccdfa8ef82a8b9f00384c534712 100644
--- a/paddle/fluid/framework/operator.cc
+++ b/paddle/fluid/framework/operator.cc
@@ -990,7 +990,7 @@ void OperatorWithKernel::RunImpl(const Scope& scope,
   }
 
   if (!transfered_inplace_vars.empty()) {
-    // there is inplace variable has been transfered.
+    // there is inplace variable has been transferred.
     TransferInplaceVarsBack(scope, transfered_inplace_vars, *transfer_scope);
   }
   if (FLAGS_enable_unused_var_check) {
diff --git a/paddle/fluid/framework/operator.h b/paddle/fluid/framework/operator.h
index 97d2dad06c8a0999d232ecaa2cc85b108b607c36..7fdc3b033eea6b98f47825cd78ffd92a0486e454 100644
--- a/paddle/fluid/framework/operator.h
+++ b/paddle/fluid/framework/operator.h
@@ -517,7 +517,8 @@ class OperatorWithKernel : public OperatorBase {
                RuntimeContext* runtime_ctx) const;
 
   /**
-   * Transfer data from scope to a transfered scope. If there is no data need to
+   * Transfer data from scope to a transferred scope. If there is no data need
+   * to
    * be tranfered, it returns nullptr.
    *
    * * transfered_inplace_vars is a output vector.
diff --git a/paddle/fluid/framework/parallel_executor.cc b/paddle/fluid/framework/parallel_executor.cc
index 7c4fa248a4181605d0713c314d1b38938591b350..f43ca6ff6128368e4b2422d1692ebaa1af2b9540 100644
--- a/paddle/fluid/framework/parallel_executor.cc
+++ b/paddle/fluid/framework/parallel_executor.cc
@@ -87,18 +87,18 @@ class ParallelExecutorPrivate {
   inline bool HasGarbageCollectors() const { return !gcs_.empty(); }
 
   /**
-   * NOTE(zengjinle): the feeded variables of users should not be reused,
-   * because users may feed them into another network. Changing the feeded
+   * NOTE(zengjinle): the fed variables of users should not be reused,
+   * because users may feed them into another network. Changing the fed
    * variables that users can visit may cause calculation wrong, which is
    * a very subtle bug when traning networks. However, these variables
    * can be garbage collected.
    *
    * ParallelExecutor provides 2 methods to feed variables:
    *
-   *  - FeedTensorsIntoLocalScopes: this method would share memory of feeded
+   *  - FeedTensorsIntoLocalScopes: this method would share memory of fed
    *                                variables, so we have to skip these.
    *
-   *  - FeedAndSplitTensorIntoLocalScopes: this method would copy data of feeded
+   *  - FeedAndSplitTensorIntoLocalScopes: this method would copy data of fed
    *                                       variables, so we do not need to skip
    *                                       them.
    */
diff --git a/paddle/fluid/framework/reader.h b/paddle/fluid/framework/reader.h
index 138992794372c14452fe4b25d1b57f51aaa7427f..6a346dff2fd3650be409ca15dae2b1dd9b7093f4 100644
--- a/paddle/fluid/framework/reader.h
+++ b/paddle/fluid/framework/reader.h
@@ -53,10 +53,10 @@ class ReaderBase {
   // they are readers just before read op.
   std::unordered_set<ReaderBase*> GetEndPoints();
 
-  // Returns the shapes of the feeded variables
+  // Returns the shapes of the fed variables
   const std::vector<DDim>& Shapes() const { return shapes_; }
 
-  // Returns the dtypes of the feeded variables
+  // Returns the dtypes of the fed variables
   const std::vector<proto::VarType::Type>& VarTypes() const {
     return var_types_;
   }
@@ -80,13 +80,13 @@ class ReaderBase {
 
   mutable std::mutex mu_;
 
-  // The shapes of the feeded variables.
+  // The shapes of the fed variables.
   std::vector<DDim> shapes_;
 
-  // The dtypes of the feeded variables.
+  // The dtypes of the fed variables.
   std::vector<proto::VarType::Type> var_types_;
 
-  // Whether to check the shape and dtype of feeded variables.
+  // Whether to check the shape and dtype of fed variables.
   // For Backward compatibility, variables created by old API fluid.layers.data
   // doesn't check shape but fluid.data checks.
   std::vector<bool> need_check_feed_;
diff --git a/paddle/fluid/imperative/tests/test_prepare_op.cc b/paddle/fluid/imperative/tests/test_prepare_op.cc
index 4304376a9e9cefd8fe24314e5e32d4265ff1d6b7..6a5c1621807b6afe819e4a7619567b919951dcf2 100644
--- a/paddle/fluid/imperative/tests/test_prepare_op.cc
+++ b/paddle/fluid/imperative/tests/test_prepare_op.cc
@@ -210,7 +210,7 @@ TEST(test_prepare_op, test_prepare_data_same_place) {
                                             attr_map);
   framework::RuntimeContext ctx = PrepareRuntimeContext(ins, outs);
 
-  // test if it never transfered on GPU place
+  // test if it never transferred on GPU place
   PreparedOp prepared_op = PreparedOp::Prepare(
       ins, outs, dynamic_cast<framework::OperatorWithKernel&>(*op), cpu_place,
       &attr_map);
diff --git a/paddle/fluid/inference/CMakeLists.txt b/paddle/fluid/inference/CMakeLists.txt
index aa2fce14fa2f9feab03bb3f570b06b20c1e97afd..bca662b870b01f62d8619b63d4b6f908c37a70fd 100644
--- a/paddle/fluid/inference/CMakeLists.txt
+++ b/paddle/fluid/inference/CMakeLists.txt
@@ -14,7 +14,7 @@
 #
 
 if(WITH_TESTING)
-  include(tests/test.cmake) # some generic cmake funtion for inference
+  include(tests/test.cmake) # some generic cmake function for inference
 endif()
 
 # TODO(panyx0718): Should this be called paddle_fluid_inference_api_internal?
diff --git a/paddle/fluid/inference/api/demo_ci/README.md b/paddle/fluid/inference/api/demo_ci/README.md
index 7f013da7f30acd84ec484773f4ea716a08efa0ff..928ff84baac5eaef3d60a73d6dfdf93b078c2117 100644
--- a/paddle/fluid/inference/api/demo_ci/README.md
+++ b/paddle/fluid/inference/api/demo_ci/README.md
@@ -12,7 +12,7 @@ There are several demos:
     - Each line contains a single record
     - Each record's format is
     ```
-    <space splitted floats as data>\t<space splitted ints as shape>
+    <space split floats as data>\t<space split ints as shape>
     ```
 
 To build and execute the demos, simply run 
diff --git a/paddle/fluid/inference/api/demo_ci/trt_mobilenet_demo.cc b/paddle/fluid/inference/api/demo_ci/trt_mobilenet_demo.cc
index f7da55c9ae368763786c1b1fd3e86d942c5e9fe8..f9d747c1f04970505652d1d2100f1b2164d9f224 100644
--- a/paddle/fluid/inference/api/demo_ci/trt_mobilenet_demo.cc
+++ b/paddle/fluid/inference/api/demo_ci/trt_mobilenet_demo.cc
@@ -23,10 +23,9 @@ limitations under the License. */
 DECLARE_double(fraction_of_gpu_memory_to_use);
 DEFINE_string(modeldir, "", "Directory of the inference model.");
 DEFINE_string(refer, "", "path to reference result for comparison.");
-DEFINE_string(
-    data, "",
-    "path of data; each line is a record, format is "
-    "'<space splitted floats as data>\t<space splitted ints as shape'");
+DEFINE_string(data, "",
+              "path of data; each line is a record, format is "
+              "'<space split floats as data>\t<space split ints as shape'");
 
 namespace paddle {
 namespace demo {
diff --git a/paddle/fluid/inference/api/demo_ci/vis_demo.cc b/paddle/fluid/inference/api/demo_ci/vis_demo.cc
index d1435675b8e759efc9727d583514ea80b58f7eef..ad885741483d33c4624b2359b12047bae6e3977d 100644
--- a/paddle/fluid/inference/api/demo_ci/vis_demo.cc
+++ b/paddle/fluid/inference/api/demo_ci/vis_demo.cc
@@ -25,10 +25,9 @@ DECLARE_double(fraction_of_gpu_memory_to_use);
 #endif
 DEFINE_string(modeldir, "", "Directory of the inference model.");
 DEFINE_string(refer, "", "path to reference result for comparison.");
-DEFINE_string(
-    data, "",
-    "path of data; each line is a record, format is "
-    "'<space splitted floats as data>\t<space splitted ints as shape'");
+DEFINE_string(data, "",
+              "path of data; each line is a record, format is "
+              "'<space split floats as data>\t<space split ints as shape'");
 DEFINE_bool(use_gpu, false, "Whether use gpu.");
 #ifdef PADDLE_WITH_SHARED_LIB
 DECLARE_bool(profile);
diff --git a/paddle/fluid/operators/beam_search_op.cc b/paddle/fluid/operators/beam_search_op.cc
index 62cfbfcaae217d879e0128181e6ea60de86f1640..3abd70dbb0536809f5fb405c783318cf5097b5db 100644
--- a/paddle/fluid/operators/beam_search_op.cc
+++ b/paddle/fluid/operators/beam_search_op.cc
@@ -52,7 +52,7 @@ class BeamSearchOpMaker : public framework::OpProtoAndCheckerMaker {
               "A LoDTensor containing the accumulated scores corresponding to "
               "Output(selected_ids).");
     AddOutput("parent_idx",
-              "A Tensor preserving the selected_ids' parent indice in pre_ids.")
+              "A Tensor preserving the selected_ids' parent index in pre_ids.")
         .AsDispensable();
 
     // Attributes stored in AttributeMap
diff --git a/paddle/fluid/operators/cudnn_lstm_op.cc b/paddle/fluid/operators/cudnn_lstm_op.cc
index ed1e06be7f98ce9c9f37f8625927c04453e5740c..032f509b303610d89c933effe7d90ae43e063fee 100644
--- a/paddle/fluid/operators/cudnn_lstm_op.cc
+++ b/paddle/fluid/operators/cudnn_lstm_op.cc
@@ -119,7 +119,7 @@ class CudnnLSTMOpMaker : public framework::OpProtoAndCheckerMaker {
         .SetDefault(0.0);
     AddAttr<bool>("is_bidirec",
                   "is_bidirec"
-                  "if it is bidirection rnn"
+                  "if it is bidirectional rnn"
                   "The will affect the shape of the Out, last_h, and last_c")
         .SetDefault(false);
     AddAttr<int>("input_size", "input size ot the Input Tensor").SetDefault(10);
diff --git a/paddle/fluid/operators/detection/density_prior_box_op.cc b/paddle/fluid/operators/detection/density_prior_box_op.cc
index 8a71ed0b13fd252e3c3748e7a486668f65aee3e4..27f3d85056172d2f726e6da978d42b2322a3f13c 100644
--- a/paddle/fluid/operators/detection/density_prior_box_op.cc
+++ b/paddle/fluid/operators/detection/density_prior_box_op.cc
@@ -35,7 +35,7 @@ class DensityPriorBoxOp : public framework::OperatorWithKernel {
           platform::errors::InvalidArgument(
               "The input tensor Input's height"
               "of DensityPriorBoxOp should be smaller than input tensor Image's"
-              "hight. But received Input's height = %d, Image's height = %d",
+              "height. But received Input's height = %d, Image's height = %d",
               input_dims[2], image_dims[2]));
 
       PADDLE_ENFORCE_LT(
diff --git a/paddle/fluid/operators/distributed/communicator.cc b/paddle/fluid/operators/distributed/communicator.cc
index 48ad7dc318bdeca8ea370250d204d3952729763a..5880e0fdc8b15f7b1423c69766254a316b24aefc 100644
--- a/paddle/fluid/operators/distributed/communicator.cc
+++ b/paddle/fluid/operators/distributed/communicator.cc
@@ -543,7 +543,7 @@ std::unordered_set<int64_t> GeoSgdCommunicator::SparseIdsMerge(
     const std::string &splited_var_name) {
   // every batch has some sparse id, merge them into one unoredered_set
   VLOG(4) << "Sparse Ids merge var: " << var_name
-          << " splited var: " << splited_var_name;
+          << " split var: " << splited_var_name;
   auto before_run_ids_merge_ = GetCurrentUS();
   auto origin_var_name = DeltaVarToVar(var_name);
   auto splited_var_index = GetSplitedVarIndex(var_name, splited_var_name);
@@ -567,9 +567,8 @@ void GeoSgdCommunicator::SendUpdateDenseVars(
   // var_name: param.delta
   auto origin_var_name = DeltaVarToVar(var_name);
   auto splited_var_index = GetSplitedVarIndex(var_name, splited_var_name);
-  VLOG(4) << "Dense var: " << var_name
-          << " 's splited var: " << splited_var_name
-          << " splited var index: " << splited_var_index;
+  VLOG(4) << "Dense var: " << var_name << " 's split var: " << splited_var_name
+          << " split var index: " << splited_var_index;
   auto before_run_send_dense = GetCurrentUS();
   auto cpu_ctx = paddle::platform::CPUDeviceContext();
 
@@ -592,7 +591,7 @@ void GeoSgdCommunicator::SendUpdateDenseVars(
     begin_loc = absolute_section_[origin_var_name][splited_var_index];
     dimension = total_element / vars_first_dimension_[origin_var_name];
     total_element = section * dimension;
-    VLOG(4) << "Dense splited var: " << splited_var_name
+    VLOG(4) << "Dense split var: " << splited_var_name
             << " section: " << section << " dimension: " << dimension
             << " begin loc: " << begin_loc << " total_element "
             << total_element;
@@ -600,12 +599,12 @@ void GeoSgdCommunicator::SendUpdateDenseVars(
 
   auto *var_x_data = var_x_tensor.mutable_data<float>(var_x_tensor.place()) +
                      begin_loc * dimension;
-  VLOG(4) << "Dense splited var: " << splited_var_name << " var_x_data[0] "
+  VLOG(4) << "Dense split var: " << splited_var_name << " var_x_data[0] "
           << var_x_data[0] << " var_x_data[end] "
           << var_x_data[total_element - 1];
   auto *var_y_data = var_y_tensor.mutable_data<float>(var_y_tensor.place()) +
                      begin_loc * dimension;
-  VLOG(4) << "Dense splited var: " << splited_var_name << " var_y_data[0] "
+  VLOG(4) << "Dense split var: " << splited_var_name << " var_y_data[0] "
           << var_y_data[0] << " var_y_data[end] "
           << var_y_data[total_element - 1];
 
@@ -616,14 +615,14 @@ void GeoSgdCommunicator::SendUpdateDenseVars(
   var_z_tensor->mutable_data<float>(dims, cpu_ctx.GetPlace());
   auto *var_z_data = var_z_tensor->mutable_data<float>(cpu_ctx.GetPlace());
 
-  VLOG(4) << "Dense splited var: " << splited_var_name << "var_z_data[0] "
+  VLOG(4) << "Dense split var: " << splited_var_name << "var_z_data[0] "
           << var_z_data[0] << " var_z_data[end] "
           << var_z_data[total_element - 1];
 
   // calc sub = var_training - var_old
   auto blas = math::GetBlas<paddle::platform::CPUDeviceContext, float>(cpu_ctx);
   blas.VSUB(total_element, var_x_data, var_y_data, var_z_data);
-  VLOG(4) << "Dense splited var: " << splited_var_name << " var_z_data[0] "
+  VLOG(4) << "Dense split var: " << splited_var_name << " var_z_data[0] "
           << var_z_data[0] << " var_z_data[end] "
           << var_z_data[total_element - 1];
 
@@ -633,7 +632,7 @@ void GeoSgdCommunicator::SendUpdateDenseVars(
 
   // calc var_old += var_delta
   blas.VADD(total_element, var_y_data, var_z_data, var_y_data);
-  VLOG(4) << "Dense splited var: " << splited_var_name << " var_y_data[0] "
+  VLOG(4) << "Dense split var: " << splited_var_name << " var_y_data[0] "
           << var_y_data[0] << " var_y_data[end] "
           << var_y_data[total_element - 1];
 
@@ -763,7 +762,7 @@ void GeoSgdCommunicator::RecvUpdateDenseVars(
     section = dims[0];
     begin_loc = absolute_section_[origin_var_name][splited_var_index];
     dimension = total_element / section;
-    VLOG(4) << "Dense splited var: " << splited_var_name
+    VLOG(4) << "Dense split var: " << splited_var_name
             << " section: " << section << " dimension: " << dimension
             << " begin loc: " << begin_loc << " total_element "
             << total_element;
@@ -771,18 +770,18 @@ void GeoSgdCommunicator::RecvUpdateDenseVars(
 
   auto *var_x_data = var_x_tensor.mutable_data<float>(var_x_tensor.place()) +
                      begin_loc * dimension;
-  VLOG(4) << "Dense splited var: " << splited_var_name << " var_x_data[0] "
+  VLOG(4) << "Dense split var: " << splited_var_name << " var_x_data[0] "
           << var_x_data[0] << " var_x_data[end] "
           << var_x_data[total_element - 1];
 
   auto *var_y_data = var_y_tensor.mutable_data<float>(var_y_tensor.place()) +
                      begin_loc * dimension;
-  VLOG(4) << "Dense splited var: " << splited_var_name << " var_y_data[0] "
+  VLOG(4) << "Dense split var: " << splited_var_name << " var_y_data[0] "
           << var_y_data[0] << " var_y_data[end] "
           << var_y_data[total_element - 1];
 
   auto *var_z_data = var_z_tensor.mutable_data<float>(cpu_ctx.GetPlace());
-  VLOG(4) << "Dense splited var: " << splited_var_name << " var_z_data[0] "
+  VLOG(4) << "Dense split var: " << splited_var_name << " var_z_data[0] "
           << var_z_data[0] << " var_z_data[end] "
           << var_z_data[total_element - 1];
 
@@ -793,7 +792,7 @@ void GeoSgdCommunicator::RecvUpdateDenseVars(
   auto *var_y_sub_data =
       var_y_sub_tensor->mutable_data<float>(cpu_ctx.GetPlace());
 
-  VLOG(4) << "Dense splited var: " << splited_var_name << " var_y_sub_data[0] "
+  VLOG(4) << "Dense split var: " << splited_var_name << " var_y_sub_data[0] "
           << var_y_sub_data[0] << " var_y_sub_data[end] "
           << var_y_sub_data[total_element - 1];
 
@@ -801,19 +800,19 @@ void GeoSgdCommunicator::RecvUpdateDenseVars(
 
   // calc sub = pserver - old
   blas.VSUB(total_element, var_z_data, var_y_data, var_y_sub_data);
-  VLOG(4) << "Dense splited var: " << splited_var_name << " var_y_sub_data[0] "
+  VLOG(4) << "Dense split var: " << splited_var_name << " var_y_sub_data[0] "
           << var_y_sub_data[0] << " var_y_sub_data[end] "
           << var_y_sub_data[total_element - 1];
 
   // calc train += sub
   blas.VADD(total_element, var_x_data, var_y_sub_data, var_x_data);
-  VLOG(4) << "Dense splited var: " << splited_var_name << " var_x_data[0] "
+  VLOG(4) << "Dense split var: " << splited_var_name << " var_x_data[0] "
           << var_x_data[0] << " var_x_data[end] "
           << var_x_data[total_element - 1];
 
   // calc old = pserver
   blas.VCOPY(total_element, var_z_data, var_y_data);
-  VLOG(4) << "Dense splited var: " << splited_var_name << " var_y_data[0] "
+  VLOG(4) << "Dense split var: " << splited_var_name << " var_y_data[0] "
           << var_y_data[0] << " var_y_data[end] "
           << var_y_data[total_element - 1];
 
@@ -824,7 +823,7 @@ void GeoSgdCommunicator::RecvUpdateDenseVars(
 
 void GeoSgdCommunicator::RecvUpdateSparseVars(
     const std::string &var_name, const std::string &splited_var_name) {
-  // step 1: recv splited var from pserver
+  // step 1: recv split var from pserver
   auto splited_var_index = GetSplitedVarIndex(var_name, splited_var_name);
   auto origin_var_name = DeltaVarToVar(var_name);
   auto origin_splited_var_name = DeltaVarToVar(splited_var_name);
diff --git a/paddle/fluid/operators/distributed_ops/recv_op.cc b/paddle/fluid/operators/distributed_ops/recv_op.cc
index a2b7f2f312b4c6647c0fc6f004ef9ab89d6bad05..30353ef35d754eae5bf98209b7c5ad5d25683cb4 100644
--- a/paddle/fluid/operators/distributed_ops/recv_op.cc
+++ b/paddle/fluid/operators/distributed_ops/recv_op.cc
@@ -122,7 +122,7 @@ This operator can get variables from server side.
     AddAttr<std::vector<std::string>>(
         "recv_varnames",
         "(vector<string>) "
-        "the splited parameter varnames to be recved from pserver")
+        "the split parameter varnames to be recved from pserver")
         .SetDefault(std::vector<std::string>{});
     AddAttr<int>("do_not_run", "if recv need to really run").SetDefault(0);
   }
diff --git a/paddle/fluid/operators/distributed_ops/send_op.cc b/paddle/fluid/operators/distributed_ops/send_op.cc
index ef2d74e68a121893ba578546df285206c3deedd6..1e9de78732085c50fd2d263aaf4e48d6df743adf 100644
--- a/paddle/fluid/operators/distributed_ops/send_op.cc
+++ b/paddle/fluid/operators/distributed_ops/send_op.cc
@@ -116,7 +116,7 @@ This operator will send variables to listen_and_serve op at the parameter server
     AddAttr<std::vector<std::string>>(
         "send_varnames",
         "(vector<string>) "
-        "the splited output varnames to send to pserver")
+        "the split output varnames to send to pserver")
         .SetDefault(std::vector<std::string>{});
     AddAttr<int>("num",
                  "(int, default 0)"
diff --git a/paddle/fluid/operators/fused/conv_fusion_op.cc b/paddle/fluid/operators/fused/conv_fusion_op.cc
index b53d7d18650059539df0af6f0f96d6fb5f9fe973..b60ae9127c9355a477ed84b4f8852876ba3f24a9 100644
--- a/paddle/fluid/operators/fused/conv_fusion_op.cc
+++ b/paddle/fluid/operators/fused/conv_fusion_op.cc
@@ -28,7 +28,7 @@ namespace operators {
 //         x is Input,
 //         z is ResidualData,
 //         bias is Bias
-// When `split_channels` is set, y will be splitted into multiple outputs,
+// When `split_channels` is set, y will be split into multiple outputs,
 // each output has split_channels[i] number of channels.
 class Conv2DFusionOpMaker : public Conv2DOpMaker {
  protected:
diff --git a/paddle/fluid/operators/hierarchical_sigmoid_op.cc b/paddle/fluid/operators/hierarchical_sigmoid_op.cc
index bed9b81587976d364309a42b4239fb2f15325257..06f532446e4e9935962f32ae4b9c9c4a66bd8f17 100644
--- a/paddle/fluid/operators/hierarchical_sigmoid_op.cc
+++ b/paddle/fluid/operators/hierarchical_sigmoid_op.cc
@@ -162,7 +162,7 @@ class HierarchicalSigmoidOpMaker : public framework::OpProtoAndCheckerMaker {
         .SetDefault({});
     AddAttr<std::vector<std::string>>(
         "table_names",
-        "(string vector, the splited table names that will be fetched from "
+        "(string vector, the split table names that will be fetched from "
         "parameter server)"
         "in the order of input variables for mapping")
         .SetDefault({});
diff --git a/paddle/fluid/operators/lookup_table_op.cc b/paddle/fluid/operators/lookup_table_op.cc
index 9fb208662d9bc43c0a6ba43596705e6b6d639270..ed370737f43bd3a01066f13346ee06d37c079bdc 100644
--- a/paddle/fluid/operators/lookup_table_op.cc
+++ b/paddle/fluid/operators/lookup_table_op.cc
@@ -113,7 +113,7 @@ class LookupTableOpMaker : public framework::OpProtoAndCheckerMaker {
         .SetDefault({});
     AddAttr<std::vector<std::string>>(
         "table_names",
-        "(string vector, the splited table names that will be fetched from "
+        "(string vector, the split table names that will be fetched from "
         "parameter server)"
         "in the order of input variables for mapping")
         .SetDefault({});
diff --git a/paddle/fluid/operators/lookup_table_v2_op.cc b/paddle/fluid/operators/lookup_table_v2_op.cc
index b336735d96e9d90eb94a66dc7cfb8402d9da67c9..b3ef5fe1c448b902ac5082b9271f465698f32fbc 100644
--- a/paddle/fluid/operators/lookup_table_v2_op.cc
+++ b/paddle/fluid/operators/lookup_table_v2_op.cc
@@ -100,7 +100,7 @@ class LookupTableV2OpMaker : public framework::OpProtoAndCheckerMaker {
         .SetDefault({});
     AddAttr<std::vector<std::string>>(
         "table_names",
-        "(string vector, the splited table names that will be fetched from "
+        "(string vector, the split table names that will be fetched from "
         "parameter server)"
         "in the order of input variables for mapping")
         .SetDefault({});
diff --git a/paddle/fluid/operators/math/blas_impl.h b/paddle/fluid/operators/math/blas_impl.h
index 356445b497de1e97e8aca28f09d2a782b9131abc..011c4191a4ec350727b4120e291726c9c32d2bd5 100644
--- a/paddle/fluid/operators/math/blas_impl.h
+++ b/paddle/fluid/operators/math/blas_impl.h
@@ -777,11 +777,11 @@ void Blas<DeviceContext>::MatMul(const framework::Tensor &mat_a,
  * When user calls this API, the multiplication of two big matrixes is split
  * into multiplication of several (head_number_) small matrixes. e.g. if Mat A
  * is [3, 24] and Mat B is [24, 4], when multiple A and B with head_number as
- * 4, Mat A will be splitted as 4 matrix of [3, 6] and Mat B will be
- * (horizontally) splitted as 4 matrix of [6, 4]. The result of final matrix
+ * 4, Mat A will be split as 4 matrix of [3, 6] and Mat B will be
+ * (horizontally) split as 4 matrix of [6, 4]. The result of final matrix
  * will be 4 matrix of [3, 4], i.e. [3, 16].
  * Another example is A is [3, 8], B is [2, 16], head_number is 4. In this
- * case, A will be splitted as [3, 2], B will be (vertically) splitted as
+ * case, A will be split as [3, 2], B will be (vertically) split as
  * [2, 4]. The final result will be 4 matrix of 4 matrix of [3,4], i.e. [3, 16]
  */
 template <typename DeviceContext>
diff --git a/paddle/fluid/operators/math/matrix_bit_code.h b/paddle/fluid/operators/math/matrix_bit_code.h
index 410adc7b283a543dd2a4e2a357050d224fc15362..9d8d0de53a72332f25fb7f949cd2bb9cb3055fc5 100644
--- a/paddle/fluid/operators/math/matrix_bit_code.h
+++ b/paddle/fluid/operators/math/matrix_bit_code.h
@@ -106,7 +106,7 @@ class SimpleCode {
       : c_(static_cast<size_t>(ids[code]) + num_classes) {}
   /**
    * Here the id of root should be 1 rather than 0, thus the encoding of class c
-   * is `c + num_classes` and all siblings can get the same weight indice using
+   * is `c + num_classes` and all siblings can get the same weight index using
    * prefixes.
    * Weight index is the prefixes of encoding, thus leave out the right most
    * bit in calc_index.
@@ -133,7 +133,7 @@ class CustomCode {
   }
   /**
    * Here the id of root should be 1 rather than 0, thus the encoding of class c
-   * is `c + num_classes` and all siblings can get the same weight indice using
+   * is `c + num_classes` and all siblings can get the same weight index using
    * prefixes.
    * Weight index is the prefixes of encoding, thus leave out the right most
    * bit in calc_index.
diff --git a/paddle/fluid/operators/mkldnn/fc_mkldnn_op.cc b/paddle/fluid/operators/mkldnn/fc_mkldnn_op.cc
index c4436ca05bb1f3976d38d290074753eb17080216..a50cc22e5bb0def54b057dcc23d2f6751eecc478 100644
--- a/paddle/fluid/operators/mkldnn/fc_mkldnn_op.cc
+++ b/paddle/fluid/operators/mkldnn/fc_mkldnn_op.cc
@@ -55,11 +55,12 @@ class FCPrimitiveFactory {
     }  // Otherwise, create a new one.
 
     auto in_col_dims = ctx.Attr<int>("in_num_col_dims");
-    PADDLE_ENFORCE_LE(in_col_dims, 2,
-                      platform::errors::Unimplemented(
-                          "DNNL FC doesn't support in_num_col_dims paramter to "
-                          "be higher than "
-                          "2."));
+    PADDLE_ENFORCE_LE(
+        in_col_dims, 2,
+        platform::errors::Unimplemented(
+            "DNNL FC doesn't support in_num_col_dims parameter to "
+            "be higher than "
+            "2."));
     if (in_col_dims == 2) {
       PADDLE_ENFORCE_EQ(
           input->dims().size(), 3,
diff --git a/paddle/fluid/operators/nce_op.cc b/paddle/fluid/operators/nce_op.cc
index c805d5419795c4809db0bf1718d57d0b75d5b769..60add5cce25b7608631826dd972f3396bb55f9d0 100644
--- a/paddle/fluid/operators/nce_op.cc
+++ b/paddle/fluid/operators/nce_op.cc
@@ -192,7 +192,7 @@ class NCEOpMaker : public framework::OpProtoAndCheckerMaker {
         .SetDefault({});
     AddAttr<std::vector<std::string>>(
         "table_names",
-        "(string vector, the splited table names that will be fetched from "
+        "(string vector, the split table names that will be fetched from "
         "parameter server)"
         "in the order of input variables for mapping")
         .SetDefault({});
diff --git a/paddle/fluid/operators/pad2d_op.cc b/paddle/fluid/operators/pad2d_op.cc
index 171b88247c39b4c739aa1231c171580da4d027d9..3ae006a60c8c3ccc3d5911793a04993127582b28 100644
--- a/paddle/fluid/operators/pad2d_op.cc
+++ b/paddle/fluid/operators/pad2d_op.cc
@@ -563,7 +563,7 @@ class Pad2dOpMaker : public framework::OpProtoAndCheckerMaker {
         .SetDefault("NCHW");
     AddComment(R"DOC(
 Pad2d Operator.
-Pad 2-d images accordding to 'paddings' and 'mode'. 
+Pad 2-d images according to 'paddings' and 'mode'. 
 If mode is 'reflect', paddings[0] and paddings[1] must be no greater
 than height-1. And the width dimension has the same condition.
 
diff --git a/paddle/fluid/operators/reader/read_op.cc b/paddle/fluid/operators/reader/read_op.cc
index 8a06f011a022e35a8d5c98be366587acdfa90a3c..b0e241fb9db199757895a7ade582bd04ddf585a1 100644
--- a/paddle/fluid/operators/reader/read_op.cc
+++ b/paddle/fluid/operators/reader/read_op.cc
@@ -118,7 +118,7 @@ class ReadOp : public framework::OperatorBase {
         reader->VarTypes();
     const std::vector<bool>& need_check_feed = reader->NeedCheckFeed();
     PADDLE_ENFORCE_EQ(out_arg_names.size(), need_check_feed.size(),
-                      "output size of read_op and the number of feeded "
+                      "output size of read_op and the number of fed "
                       "variables of reader do not match");
 
     for (size_t i = 0; i < out_arg_names.size(); ++i) {
@@ -127,13 +127,13 @@ class ReadOp : public framework::OperatorBase {
       if (need_check_feed[i]) {
         auto in_dims = ins[i].dims();
         PADDLE_ENFORCE_EQ(DimensionIsCompatibleWith(shapes[i], in_dims), true,
-                          "The feeded Variable %s should have dimensions = %d, "
-                          "shape = [%s], but received feeded shape [%s]",
+                          "The fed Variable %s should have dimensions = %d, "
+                          "shape = [%s], but received fed shape [%s]",
                           out_arg_names[i], shapes[i].size(), shapes[i],
                           in_dims);
         PADDLE_ENFORCE_EQ(
             ins[i].type(), var_types[i],
-            "The data type of feeded Variable %s must be %s, but received %s",
+            "The data type of fed Variable %s must be %s, but received %s",
             out_arg_names[i], var_types[i], ins[i].type());
       }
       out->ShareDataWith(ins[i]);
diff --git a/paddle/fluid/operators/sequence_ops/sequence_expand_as_op.cu b/paddle/fluid/operators/sequence_ops/sequence_expand_as_op.cu
index 998bf82ab1ddcd815491de95a3f7cf987036ee65..a7fdf39340c28b4cab7f64753eeb5de28a968750 100644
--- a/paddle/fluid/operators/sequence_ops/sequence_expand_as_op.cu
+++ b/paddle/fluid/operators/sequence_ops/sequence_expand_as_op.cu
@@ -67,8 +67,8 @@ struct SequenceExpandFunctor<platform::CUDADeviceContext, T> {
       const platform::CUDADeviceContext &context, const LoDTensor &x,
       const framework::Vector<size_t> &ref_lod, /*expand referenced lod*/
       LoDTensor *out) {
-    int hight = x.dims()[0];
-    int width = framework::product(x.dims()) / hight;
+    int height = x.dims()[0];
+    int width = framework::product(x.dims()) / height;
 
     const int kThreadsPerBlock = 1024;
     int thread_x = kThreadsPerBlock;
@@ -82,7 +82,7 @@ struct SequenceExpandFunctor<platform::CUDADeviceContext, T> {
     dim3 block_size(thread_x);
     dim3 grid_size(block_x);
     sequence_expand_as_kernel<<<grid_size, block_size, 0, context.stream()>>>(
-        x.data<T>(), ref_lod.CUDAData(context.GetPlace()), hight, width,
+        x.data<T>(), ref_lod.CUDAData(context.GetPlace()), height, width,
         out->mutable_data<T>(context.GetPlace()));
   }
 };
@@ -93,8 +93,8 @@ struct SequenceExpandAsGradFunctor<platform::CUDADeviceContext, T> {
                   const LoDTensor &dout,
                   const framework::Vector<size_t> &ref_lod, /*expand based lod*/
                   LoDTensor *dx) {
-    int hight = dx->dims()[0];
-    int width = framework::product(dx->dims()) / hight;
+    int height = dx->dims()[0];
+    int width = framework::product(dx->dims()) / height;
 
     const int kThreadsPerBlock = 1024;
     int thread_x = kThreadsPerBlock;
@@ -109,7 +109,7 @@ struct SequenceExpandAsGradFunctor<platform::CUDADeviceContext, T> {
     dim3 grid_size(block_x);
     sequence_expand_as_grad_kernel<<<grid_size, block_size, 0,
                                      context.stream()>>>(
-        dout.data<T>(), ref_lod.CUDAData(context.GetPlace()), hight, width,
+        dout.data<T>(), ref_lod.CUDAData(context.GetPlace()), height, width,
         dx->mutable_data<T>(context.GetPlace()));
   }
 };
diff --git a/paddle/fluid/operators/sequence_ops/sequence_expand_as_op.h b/paddle/fluid/operators/sequence_ops/sequence_expand_as_op.h
index 0b228170e2a73cc7da22006011e75983785c3e79..b67488200c317680e1eca0786c8ef1313c3d0f9c 100644
--- a/paddle/fluid/operators/sequence_ops/sequence_expand_as_op.h
+++ b/paddle/fluid/operators/sequence_ops/sequence_expand_as_op.h
@@ -46,13 +46,13 @@ struct SequenceExpandFunctor<platform::CPUDeviceContext, T> {
       const platform::CPUDeviceContext &context, const framework::LoDTensor &x,
       const framework::Vector<size_t> &ref_lod, /*expand referenced lod*/
       framework::LoDTensor *out) {
-    int64_t hight = x.dims()[0];
-    int64_t width = framework::product(x.dims()) / hight;
+    int64_t height = x.dims()[0];
+    int64_t width = framework::product(x.dims()) / height;
 
     const T *in_data = x.data<T>();
     T *out_data = out->mutable_data<T>(context.GetPlace());
 
-    for (int h_id = 0; h_id < hight; ++h_id) {
+    for (int h_id = 0; h_id < height; ++h_id) {
       size_t span = ref_lod[h_id + 1] - ref_lod[h_id];
       if (span == 0) continue;
       const T *src = in_data + h_id * width;
@@ -109,13 +109,13 @@ struct SequenceExpandAsGradFunctor<platform::CPUDeviceContext, T> {
       const framework::LoDTensor &dout,
       const framework::Vector<size_t> &ref_lod, /*expand referenced lod*/
       framework::LoDTensor *dx) {
-    int64_t hight = dx->dims()[0];
-    int64_t width = framework::product(dx->dims()) / hight;
+    int64_t height = dx->dims()[0];
+    int64_t width = framework::product(dx->dims()) / height;
 
     const T *dout_data = dout.data<T>();
     T *dx_data = dx->mutable_data<T>(context.GetPlace());
 
-    for (int64_t h_id = 0; h_id < hight; ++h_id) {
+    for (int64_t h_id = 0; h_id < height; ++h_id) {
       T *dst = dx_data + h_id * width;
       size_t span = ref_lod[h_id + 1] - ref_lod[h_id];
       for (int64_t w_id = 0; w_id < width; ++w_id) {
diff --git a/paddle/fluid/operators/sequence_ops/sequence_softmax_op.cu b/paddle/fluid/operators/sequence_ops/sequence_softmax_op.cu
index a9dc0a4fda253db9bb0d33c4a25fbba36492f35b..58022c076cfded46209e06346ea8138b061fc577 100644
--- a/paddle/fluid/operators/sequence_ops/sequence_softmax_op.cu
+++ b/paddle/fluid/operators/sequence_ops/sequence_softmax_op.cu
@@ -115,7 +115,7 @@ struct SequenceSoftmaxFunctor<platform::CUDADeviceContext, T> {
                   const LoDTensor &x,
                   const framework::Vector<size_t> &ref_lod, /*referenced lod*/
                   LoDTensor *out) {
-    int hight = ref_lod.size() - 1;
+    int height = ref_lod.size() - 1;
 
     const int kThreadsPerBlock = 32;
     int thread_x = kThreadsPerBlock;
@@ -126,7 +126,7 @@ struct SequenceSoftmaxFunctor<platform::CUDADeviceContext, T> {
     dim3 grid_size(max_blocks);
     sequence_softmax_kernel<
         T, kThreadsPerBlock><<<grid_size, block_size, 0, context.stream()>>>(
-        x.data<T>(), ref_lod.CUDAData(context.GetPlace()), hight,
+        x.data<T>(), ref_lod.CUDAData(context.GetPlace()), height,
         out->mutable_data<T>(context.GetPlace()));
   }
 };
@@ -137,7 +137,7 @@ struct SequenceSoftmaxGradFunctor<platform::CUDADeviceContext, T> {
                   const LoDTensor &dout, const LoDTensor &out,
                   const framework::Vector<size_t> &ref_lod, /*referenced lod*/
                   LoDTensor *dx) {
-    size_t hight = ref_lod.size() - 1;
+    size_t height = ref_lod.size() - 1;
 
     const int kThreadsPerBlock = 32;
     int thread_x = kThreadsPerBlock;
@@ -150,7 +150,7 @@ struct SequenceSoftmaxGradFunctor<platform::CUDADeviceContext, T> {
     sequence_softmax_grad_kernel<
         T, kThreadsPerBlock><<<grid_size, block_size, 0, context.stream()>>>(
         dout.data<T>(), out.data<T>(), ref_lod.CUDAData(context.GetPlace()),
-        hight, dx->mutable_data<T>(context.GetPlace()));
+        height, dx->mutable_data<T>(context.GetPlace()));
   }
 };
 
diff --git a/paddle/fluid/operators/sequence_ops/sequence_softmax_op.h b/paddle/fluid/operators/sequence_ops/sequence_softmax_op.h
index d0b584cfd7a61b72e1c44df2b1f9a00cc8e61c6b..4d8f1af456c02238c13ad274e5c44a09c2d42263 100644
--- a/paddle/fluid/operators/sequence_ops/sequence_softmax_op.h
+++ b/paddle/fluid/operators/sequence_ops/sequence_softmax_op.h
@@ -43,10 +43,10 @@ struct SequenceSoftmaxFunctor<platform::CPUDeviceContext, T> {
   void operator()(const platform::CPUDeviceContext &ctx, const LoDTensor &x,
                   const framework::Vector<size_t> &ref_lod, /*referenced lod*/
                   LoDTensor *out) {
-    size_t hight = ref_lod.size() - 1;
+    size_t height = ref_lod.size() - 1;
     const T *in_data = x.data<T>();
     T *out_data = out->mutable_data<T>(ctx.GetPlace());
-    for (size_t i = 0; i < hight; ++i) {
+    for (size_t i = 0; i < height; ++i) {
       size_t span = ref_lod[i + 1] - ref_lod[i];
       T result = 0;
       for (size_t j = 0; j < span; ++j) {
@@ -65,13 +65,13 @@ struct SequenceSoftmaxGradFunctor<platform::CPUDeviceContext, T> {
                   const LoDTensor &out,
                   const framework::Vector<size_t> &ref_lod, /*referenced lod*/
                   LoDTensor *dx) {
-    size_t hight = ref_lod.size() - 1;
+    size_t height = ref_lod.size() - 1;
 
     const T *softmax_grad_data = dout.data<T>();
     const T *softmax = out.data<T>();
     T *dx_data = dx->mutable_data<T>(ctx.GetPlace());
 
-    for (size_t i = 0; i < hight; ++i) {
+    for (size_t i = 0; i < height; ++i) {
       size_t span = ref_lod[i + 1] - ref_lod[i];
       T result = 0;
       for (size_t j = 0; j < span; ++j) {
diff --git a/paddle/fluid/operators/split_op.cc b/paddle/fluid/operators/split_op.cc
index 997d3fe4e3ca0a19b88220a8bc74284012466281..abb21acb62d51271c8d4ea11e43b50da438a99d8 100644
--- a/paddle/fluid/operators/split_op.cc
+++ b/paddle/fluid/operators/split_op.cc
@@ -90,7 +90,7 @@ class SplitOpMaker : public framework::OpProtoAndCheckerMaker {
   void Make() override {
     AddInput("X", "(Tensor) Input tensor of the split operator.");
     AddInput("AxisTensor",
-             "(Tensor) The axis which the input will be splited on. "
+             "(Tensor) The axis which the input will be split on. "
              "It has higher priority than Attr(axis). "
              "The shape of AxisTensor must be [1]")
         .AsDispensable();
@@ -131,7 +131,7 @@ Example:
         .SetDefault(0);
     AddAttr<int>("axis",
                  "(int, default 0) "
-                 "The axis which the input will be splited on.")
+                 "The axis which the input will be split on.")
         .SetDefault(0);
   }
 };
diff --git a/paddle/fluid/operators/unfold_op.cc b/paddle/fluid/operators/unfold_op.cc
index 394a89a0c07644f718141ee4d17bcd979874d758..b48f6ad0c7192f26e75303c5d415bea1efe5abbb 100644
--- a/paddle/fluid/operators/unfold_op.cc
+++ b/paddle/fluid/operators/unfold_op.cc
@@ -76,7 +76,7 @@ class UnfoldOp : public framework::OperatorWithKernel {
     // Only [N, C, H, W] input supported now
     PADDLE_ENFORCE(
         in_dims.size() == 4,
-        "Input shold be 4-D tensor of format [N, C, H, W], but get %u",
+        "Input should be 4-D tensor of format [N, C, H, W], but get %u",
         in_dims.size());
     PADDLE_ENFORCE(
         in_dims.size() - kernel_sizes.size() == 2U,
@@ -86,7 +86,7 @@ class UnfoldOp : public framework::OperatorWithKernel {
         in_dims.size(), kernel_sizes.size());
     PADDLE_ENFORCE_EQ(
         strides.size(), kernel_sizes.size(),
-        "The dims of strides shold be the same with that of kernel_sizes. "
+        "The dims of strides should be the same with that of kernel_sizes. "
         "But recieved dims(strides: %u) != dims(kernel_sizes: %u).",
         strides.size(), kernel_sizes.size());
     PADDLE_ENFORCE_EQ(
@@ -96,7 +96,7 @@ class UnfoldOp : public framework::OperatorWithKernel {
         paddings.size(), strides.size());
     PADDLE_ENFORCE_EQ(
         strides.size(), dilations.size(),
-        "The dims of strides shold be the same with that of dilations. "
+        "The dims of strides should be the same with that of dilations. "
         "But recieved dims(strides: %u) != dims(dilations: %u).",
         strides.size(), dilations.size());
 
diff --git a/python/paddle/compat.py b/python/paddle/compat.py
index f306ca7a36021dbfc1fb8e026b107285b2abb2df..1fa251a851fa923ca74fcc76d263de2bdc27c8b3 100644
--- a/python/paddle/compat.py
+++ b/python/paddle/compat.py
@@ -202,7 +202,7 @@ def round(x, d=0):
     """
     if six.PY3:
         # The official walkaround of round in Python3 is incorrect
-        # we implement accroding this answer: https://www.techforgeek.info/round_python.html
+        # we implement according this answer: https://www.techforgeek.info/round_python.html
         if x > 0.0:
             p = 10**d
             return float(math.floor((x * p) + math.copysign(0.5, x))) / p
diff --git a/python/paddle/dataset/cifar.py b/python/paddle/dataset/cifar.py
index 2c62d1c7d1112239020f2ea9669f4729ea3c367a..16f06f2400b588a0f50e40fb313a80d3b4a99d02 100644
--- a/python/paddle/dataset/cifar.py
+++ b/python/paddle/dataset/cifar.py
@@ -17,7 +17,7 @@ CIFAR dataset.
 This module will download dataset from https://dataset.bj.bcebos.com/cifar/cifar-10-python.tar.gz and https://dataset.bj.bcebos.com/cifar/cifar-100-python.tar.gz, parse train/test set into
 paddle reader creators.
 
-The CIFAR-10 dataset consists of 60000 32x32 colour images in 10 classes,
+The CIFAR-10 dataset consists of 60000 32x32 color images in 10 classes,
 with 6000 images per class. There are 50000 training images and 10000 test
 images.
 
diff --git a/python/paddle/dataset/image.py b/python/paddle/dataset/image.py
index 57547f1867a937d16fb2dfc9b84e1a30759a527e..09b5607252bda6ffb1a410a1cb194e4a6394abe4 100644
--- a/python/paddle/dataset/image.py
+++ b/python/paddle/dataset/image.py
@@ -27,7 +27,7 @@ the image layout as follows.
 
   OpenCV use BGR color format. PIL use RGB color format. Both
   formats can be used for training. Noted that, the format should
-  be keep consistent between the training and inference peroid.
+  be keep consistent between the training and inference period.
 """
 
 from __future__ import print_function
diff --git a/python/paddle/dataset/wmt16.py b/python/paddle/dataset/wmt16.py
index 3e9007c8aaf6ab74dfd72bba968807bb2c0c9b95..99fab7ffceb9278505ab7dc1bfee9bdcb4e188ba 100644
--- a/python/paddle/dataset/wmt16.py
+++ b/python/paddle/dataset/wmt16.py
@@ -112,7 +112,7 @@ def reader_creator(tar_file, file_name, src_dict_size, trg_dict_size, src_lang):
         trg_dict = __load_dict(tar_file, trg_dict_size,
                                ("de" if src_lang == "en" else "en"))
 
-        # the indice for start mark, end mark, and unk are the same in source
+        # the index for start mark, end mark, and unk are the same in source
         # language and target language. Here uses the source language
         # dictionary to determine their indices.
         start_id = src_dict[START_MARK]
diff --git a/python/paddle/fluid/clip.py b/python/paddle/fluid/clip.py
index 5f703aab25454bed4957bbe0d35d2477cda2e044..1332c3f66e7a8ac50bffd836baa72539512c6fa5 100644
--- a/python/paddle/fluid/clip.py
+++ b/python/paddle/fluid/clip.py
@@ -192,7 +192,7 @@ class GradientClipByNorm(BaseGradientClipAttr):
     """
     Convert the input multidimensional Tensor :math:`X` to a multidimensional Tensor whose L2 norm does not exceed the given two-norm maximum ( :math:`clip\_norm` ).
 
-    The tensor is not passed through this class, but passed through the parametre of ``main_program`` in ``fluid.program_guard``.
+    The tensor is not passed through this class, but passed through the parameter of ``main_program`` in ``fluid.program_guard``.
 
     This class limits the L2 norm of the input :math:`X` within :math:`clip\_norm`.
 
diff --git a/python/paddle/fluid/contrib/layers/rnn_impl.py b/python/paddle/fluid/contrib/layers/rnn_impl.py
index 603aa72a5a5f48c8b782bd23d1cee5dafc2d5bf1..d8716f08ad0d90d9816ca7c92373f93e30076e13 100644
--- a/python/paddle/fluid/contrib/layers/rnn_impl.py
+++ b/python/paddle/fluid/contrib/layers/rnn_impl.py
@@ -156,7 +156,7 @@ def basic_gru(input,
               dtype='float32',
               name='basic_gru'):
     """
-    GRU implementation using basic operator, supports multiple layers and bidirection gru.
+    GRU implementation using basic operator, supports multiple layers and bidirectional gru.
 
     .. math::
             u_t & = actGate(W_ux xu_{t} + W_uh h_{t-1} + b_u)
@@ -377,7 +377,7 @@ def basic_lstm(input,
                dtype='float32',
                name='basic_lstm'):
     """
-    LSTM implementation using basic operators, supports multiple layers and bidirection LSTM.
+    LSTM implementation using basic operators, supports multiple layers and bidirectional LSTM.
 
     .. math::
            i_t &= \sigma(W_{ix}x_{t} + W_{ih}h_{t-1} + b_i)
diff --git a/python/paddle/fluid/contrib/tests/test_image_classification_fp16.py b/python/paddle/fluid/contrib/tests/test_image_classification_fp16.py
index 918544e1c990781df891f3cfe0af1b5bc8c0c92c..5fb1dba40a3c69bd3419640a404c580c8375f215 100644
--- a/python/paddle/fluid/contrib/tests/test_image_classification_fp16.py
+++ b/python/paddle/fluid/contrib/tests/test_image_classification_fp16.py
@@ -236,7 +236,7 @@ def infer(use_cuda, save_dirname=None):
     inference_scope = fluid.core.Scope()
     with fluid.scope_guard(inference_scope):
         # Use fluid.io.load_inference_model to obtain the inference program desc,
-        # the feed_target_names (the names of variables that will be feeded
+        # the feed_target_names (the names of variables that will be fed
         # data using feed operators), and the fetch_targets (variables that
         # we want to obtain data from using fetch operators).
         [inference_program, feed_target_names,
diff --git a/python/paddle/fluid/contrib/utils/lookup_table_utils.py b/python/paddle/fluid/contrib/utils/lookup_table_utils.py
index 8552bc8fc105835ae408ae716f0172d032552cd8..7d30de565e7a41b02cbf37893f561283eef29b3a 100644
--- a/python/paddle/fluid/contrib/utils/lookup_table_utils.py
+++ b/python/paddle/fluid/contrib/utils/lookup_table_utils.py
@@ -420,7 +420,7 @@ def get_inference_model(main_program, feeded_var_names, target_vars):
                                     build the inference model. If is set None,
                                     the default main program will be used.
                                     Default: None.
-        feeded_var_names(list[str]): Names of variables that need to be feeded data
+        feeded_var_names(list[str]): Names of variables that need to be fed data
                                      during inference.
         target_vars(list[Variable]): Variables from which we can get inference
                                      results.
diff --git a/python/paddle/fluid/data.py b/python/paddle/fluid/data.py
index 179c3b07dbefc7c90ed2756d8b2ed98ec79764cf..14333cae1ec0cc20866d94b28b1851058b1763de 100644
--- a/python/paddle/fluid/data.py
+++ b/python/paddle/fluid/data.py
@@ -27,7 +27,7 @@ def data(name, shape, dtype='float32', lod_level=0):
 
     This function creates a variable on the global block. The global variable
     can be accessed by all the following operators in the graph. The variable
-    is a placeholder that could be feeded with input, such as Executor can feed
+    is a placeholder that could be fed with input, such as Executor can feed
     input into the variable.
 
     Note: 
@@ -35,8 +35,8 @@ def data(name, shape, dtype='float32', lod_level=0):
         future version. Please use this `paddle.fluid.data`. 
        
         The `paddle.fluid.layers.data` set shape and dtype at compile time but
-        does NOT check the shape or the dtype of feeded data, this
-        `paddle.fluid.data` checks the shape and the dtype of data feeded by
+        does NOT check the shape or the dtype of fed data, this
+        `paddle.fluid.data` checks the shape and the dtype of data fed by
         Executor or ParallelExecutor during run time.
 
         To feed variable size inputs, users can set -1 on the variable
diff --git a/python/paddle/fluid/dataset.py b/python/paddle/fluid/dataset.py
index 6861d86684cde36536576d52dca40eb926630906..ea83ba40e79bfd1c84c3fcda6dfd90beb786e30b 100644
--- a/python/paddle/fluid/dataset.py
+++ b/python/paddle/fluid/dataset.py
@@ -11,7 +11,7 @@
 # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 # See the License for the specific language governing permissions and
 # limitations under the License.
-"""This is defination of dataset class, which is high performance IO."""
+"""This is definition of dataset class, which is high performance IO."""
 
 from paddle.fluid.proto import data_feed_pb2
 from google.protobuf import text_format
diff --git a/python/paddle/fluid/executor.py b/python/paddle/fluid/executor.py
index 90979c6b839ed6650f8a553b0464b57f270ed583..af5c7e1c0e8088cc358140a6039b93278f23869a 100644
--- a/python/paddle/fluid/executor.py
+++ b/python/paddle/fluid/executor.py
@@ -196,7 +196,7 @@ def dimension_is_compatible_with(first, second):
 def check_feed_shape_type(var, feed, num_places=1):
     """
     Returns True if the variable doesn't require feed check or it is compatible
-    with the shape and have same dtype as the feeded value.
+    with the shape and have same dtype as the fed value.
 
     A dimension is compatible with the other if:
     1. The length of the dimensions are same.
@@ -206,7 +206,7 @@ def check_feed_shape_type(var, feed, num_places=1):
     
     Args:
         var (Variable): the Variable object
-        feed (LoDTensor): the feeded value, which must be a LoDTensor
+        feed (LoDTensor): the fed value, which must be a LoDTensor
         num_places: an integer value indicating the number of places.
             ParallelExecutor will divide data into devices (CPU/GPU) evenly.
     Returns:
@@ -225,8 +225,8 @@ def check_feed_shape_type(var, feed, num_places=1):
                                 num_places) if len(feed.lod()) == 0 else -1
         if not dimension_is_compatible_with(feed_shape, var.shape):
             raise ValueError(
-                'The feeded Variable %r should have dimensions = %d, shape = '
-                '%r, but received feeded shape %r on each device' %
+                'The fed Variable %r should have dimensions = %d, shape = '
+                '%r, but received fed shape %r on each device' %
                 (var.name, len(var.shape), var.shape, feed_shape))
         if not dtype_is_compatible_with(feed._dtype(), var.dtype):
             var_dtype_format = convert_dtype(var.dtype) if isinstance(
@@ -234,8 +234,8 @@ def check_feed_shape_type(var, feed, num_places=1):
             feed_dtype_format = convert_dtype(feed._dtype()) if isinstance(
                 feed._dtype(), core.VarDesc.VarType) else feed._dtype()
             raise ValueError(
-                'The data type of feeded Variable %r must be %r, but received %r'
-                % (var.name, var_dtype_format, feed_dtype_format))
+                'The data type of fed Variable %r must be %r, but received %r' %
+                (var.name, var_dtype_format, feed_dtype_format))
     return True
 
 
diff --git a/python/paddle/fluid/incubate/fleet/parameter_server/pslib/__init__.py b/python/paddle/fluid/incubate/fleet/parameter_server/pslib/__init__.py
index d6ea97fc57bd5957e9c87e3c101f3d4614ce84c0..40337110cfe966511050b78e3e463e7653c3caba 100644
--- a/python/paddle/fluid/incubate/fleet/parameter_server/pslib/__init__.py
+++ b/python/paddle/fluid/incubate/fleet/parameter_server/pslib/__init__.py
@@ -448,7 +448,7 @@ class PSLib(Fleet):
                     model_proto_file(str): path of program desc proto binary
                                            file, can be local or hdfs/afs file
                     var_names(list): var name list
-                    load_combine(bool): load from a file or splited param files
+                    load_combine(bool): load from a file or split param files
                                         default False.
 
         Examples:
@@ -502,7 +502,7 @@ class PSLib(Fleet):
             model_proto_file(str): path of program desc proto binary file,
                                    can be local or hdfs/afs file
             var_names(list): load var names
-            load_combine(bool): load from a file or splited param files
+            load_combine(bool): load from a file or split param files
 
         """
         self._role_maker._barrier_worker()
diff --git a/python/paddle/fluid/io.py b/python/paddle/fluid/io.py
index 1830950866cf0d87a7c40470e8f5b5631e63d736..0c49982ded8cdc74418ecc44e49c11d016e4b731 100644
--- a/python/paddle/fluid/io.py
+++ b/python/paddle/fluid/io.py
@@ -1043,7 +1043,7 @@ def save_inference_model(dirname,
 
     Args:
         dirname(str): The directory path to save the inference model.
-        feeded_var_names(list[str]): list of string. Names of variables that need to be feeded
+        feeded_var_names(list[str]): list of string. Names of variables that need to be fed
                                      data during inference.
         target_vars(list[Variable]): list of Variable. Variables from which we can get 
                                      inference results.
diff --git a/python/paddle/fluid/layer_helper_base.py b/python/paddle/fluid/layer_helper_base.py
index f6cf2a7d49c97ed7fde9490aae06d73516334ef2..3f6f59f0e03b0876116c5153ed2186afd8689acf 100644
--- a/python/paddle/fluid/layer_helper_base.py
+++ b/python/paddle/fluid/layer_helper_base.py
@@ -285,7 +285,7 @@ class LayerHelperBase(object):
 
            Args:
                attr: [ParamAttr] should be the parameter attribute for this parameter
-               shape: shape of the paramter
+               shape: shape of the parameter
                dtype: data type of this parameter
                is_bias: if this is a bias parameter
                default_initializer: set the default initializer for this parameter
diff --git a/python/paddle/fluid/layers/io.py b/python/paddle/fluid/layers/io.py
index 180aec4d9b2c23bd0edf0879f2ccd098b250f005..9973d787360b156e19d0fc429f96b57fe9581480 100644
--- a/python/paddle/fluid/layers/io.py
+++ b/python/paddle/fluid/layers/io.py
@@ -56,8 +56,8 @@ def data(name,
         a later version. Please use :code:`paddle.fluid.data` .
 
         This :code:`paddle.fluid.layers.data` set shape and dtype at compile
-        time but does NOT check the shape or the dtype of feeded data, the
-        :code:`paddle.fluid.data` checks the shape and the dtype of data feeded 
+        time but does NOT check the shape or the dtype of fed data, the
+        :code:`paddle.fluid.data` checks the shape and the dtype of data fed 
         by Executor or ParallelExecutor during run time.
 
         To feed variable size inputs, users can feed variable size inputs
@@ -760,7 +760,7 @@ def create_py_reader_by_data(capacity,
           reader.decorate_paddle_reader(
               paddle.reader.shuffle(paddle.batch(mnist.train(), batch_size=5), buf_size=500))
           img, label = fluid.layers.read_file(reader)
-          loss = network(img, label) # The definition of custom network and the loss funtion
+          loss = network(img, label) # The definition of custom network and the loss function
 
           place = fluid.CUDAPlace(0) if USE_CUDA else fluid.CPUPlace()
           exe = fluid.Executor(place)
diff --git a/python/paddle/fluid/layers/nn.py b/python/paddle/fluid/layers/nn.py
index bef581daf095a9f72bc59b5add542bf2414aacaf..93948000d4a3de4edb1205ec56459abcbbc76ac0 100644
--- a/python/paddle/fluid/layers/nn.py
+++ b/python/paddle/fluid/layers/nn.py
@@ -4914,7 +4914,7 @@ def transpose(x, perm, name=None):
 
     Args:
         x (Variable): The input Tensor. It is a N-D Tensor of data types float32, float64, int32.
-        perm (list): Permute the input accoring to the data of perm.
+        perm (list): Permute the input according to the data of perm.
         name (str): The name of this layer. It is optional.
 
     Returns:
@@ -5488,7 +5488,7 @@ def reshape(x, shape, actual_shape=None, act=None, inplace=False, name=None):
     be set -1.
 
     2. 0 means the actual dimension value is going to be copied from the
-    corresponding dimension of x. The indice of 0s in shape can not exceed
+    corresponding dimension of x. The index of 0s in shape can not exceed
     the dimension of x.
 
     Here are some examples to explain it.
@@ -6484,7 +6484,7 @@ def image_resize(input,
     The input must be a 4-D Tensor of the shape (num_batches, channels, in_h, in_w) 
     or (num_batches, in_h, in_w, channels), or a 5-D Tensor of the shape 
     (num_batches, channels, in_d, in_h, in_w) or (num_batches, in_d, in_h, in_w, channels), 
-    and the resizing only applies on the three dimensions(depth, hight and width).
+    and the resizing only applies on the three dimensions(depth, height and width).
 
     **Warning:** the parameter :attr:`actual_shape` will be deprecated in the
     future and only use :attr:`out_shape` instead.
@@ -8396,7 +8396,7 @@ def pad2d(input,
           data_format="NCHW",
           name=None):
     """
-    Pad 2-d images accordding to 'paddings' and 'mode'.
+    Pad 2-d images according to 'paddings' and 'mode'.
     If mode is 'reflect', paddings[0] and paddings[1] must be no greater
     than height-1. And the width dimension has the same condition.
 
@@ -8418,7 +8418,7 @@ def pad2d(input,
         name (str, optional) : The default value is None.  Normally there is no need for
                     user to set this property.  For more information, please refer to :ref:`api_guide_Name` .
 
-    Returns: a 4-D Tensor padded accordding to paddings and mode and data type is same as input.
+    Returns: a 4-D Tensor padded according to paddings and mode and data type is same as input.
 
     Return Type: Variable
 
@@ -13346,7 +13346,7 @@ def unfold(x, kernel_sizes, strides=1, paddings=0, dilations=1, name=None):
                                   [padding_h, padding_w, padding_h, padding_w]. If an integer
                                   padding was given, [padding, padding, padding, padding] will
                                   be used. For default, paddings will be [0, 0, 0, 0]
-        dilations(int|list):      the dilations of convolution kernel, shold be
+        dilations(int|list):      the dilations of convolution kernel, should be
                                   [dilation_h, dilation_w], or an integer dilation treated as
                                   [dilation, dilation]. For default, it will be [1, 1].
         name(str, optional): The default value is None.  
diff --git a/python/paddle/fluid/layers/rnn.py b/python/paddle/fluid/layers/rnn.py
index dd274233a6b2eaaf5411e5b8504ef64dc4ea93c3..cb12464349606abe35dd6cad8a0667107dc2fd03 100644
--- a/python/paddle/fluid/layers/rnn.py
+++ b/python/paddle/fluid/layers/rnn.py
@@ -2469,10 +2469,10 @@ def dynamic_gru(input,
             See usage for details in :ref:`api_fluid_ParamAttr` .
         is_reverse(bool, optional): Whether to compute in the reversed order of
             input sequences. Default False.
-        gate_activation(str, optional): The activation fuction corresponding to
+        gate_activation(str, optional): The activation function corresponding to
             :math:`act_g` in the formula. "sigmoid", "tanh", "relu" and "identity"
             are supported. Default "sigmoid".
-        candidate_activation(str, optional): The activation fuction corresponding to
+        candidate_activation(str, optional): The activation function corresponding to
             :math:`act_c` in the formula. "sigmoid", "tanh", "relu" and "identity"
             are supported. Default "tanh".
         h_0 (Variable, optional): A Tensor representing the initial hidden state.
@@ -2618,10 +2618,10 @@ def gru_unit(input,
         bias_attr (ParamAttr, optional): To specify the bias parameter property.
             Default: None, which means the default bias parameter property is used.
             See usage for details in :ref:`api_fluid_ParamAttr` .
-        activation(str, optional): The activation fuction corresponding to
+        activation(str, optional): The activation function corresponding to
             :math:`act_c` in the formula. "sigmoid", "tanh", "relu" and "identity"
             are supported. Default "tanh".
-        gate_activation(str, optional): The activation fuction corresponding to
+        gate_activation(str, optional): The activation function corresponding to
             :math:`act_g` in the formula. "sigmoid", "tanh", "relu" and "identity"
             are supported. Default "sigmoid".
 
@@ -2746,7 +2746,7 @@ def beam_search(pre_ids,
             `[batch_size * beam_size, K]`, where `K` supposed to be greater than
             ``beam_size`` and the first dimension size (decrease as samples reach
             to the end) should be same as that of ``pre_ids`` . The data type
-            should be int64. It can be None, which use indice in ``scores`` as
+            should be int64. It can be None, which use index in ``scores`` as
             ids.
         scores(Variable): A LodTensor variable containing the accumulated
             scores corresponding to ``ids`` . Both its shape and lod are same as
@@ -2765,7 +2765,7 @@ def beam_search(pre_ids,
             to :ref:`api_guide_Name`. Usually name is no need to set and 
             None by default.
         return_parent_idx(bool, optional): Whether to return an extra Tensor variable
-            in output, which stores the selected ids' parent indice in
+            in output, which stores the selected ids' parent index in
             ``pre_ids`` and can be used to update RNN's states by gather operator.
             Default False.
 
@@ -2774,7 +2774,7 @@ def beam_search(pre_ids,
             representing the selected ids and the corresponding accumulated scores of \
             current step, have the same shape `[batch_size, beam_size]` and lod with 2 levels, \
             and have data types int64 and float32. If ``return_parent_idx`` is True, \
-            an extra Tensor variable preserving the selected ids' parent indice \
+            an extra Tensor variable preserving the selected ids' parent index \
             is included, whose shape is `[batch_size * beam_size]` and data type \
             is int64.
 
diff --git a/python/paddle/fluid/layers/tensor.py b/python/paddle/fluid/layers/tensor.py
index 5c467f2d36df07483d5889a15dbcce05efdc9594..d065e527b38c4906e5722e970536ea799929cc97 100644
--- a/python/paddle/fluid/layers/tensor.py
+++ b/python/paddle/fluid/layers/tensor.py
@@ -668,7 +668,7 @@ def fill_constant_batch_size_like(input,
                                   output_dim_idx=0,
                                   force_cpu=False):
     """
-    This OP creates a Tesnor accroding the shape and dtype, and initializes the
+    This OP creates a Tesnor according the shape and dtype, and initializes the
     Tensor with the constants provided in ``value``. When the input is LoDTensor
     and the input_dim_idx is 0, the output_dim_idx dimension is set to the value
     of the batch_size input by the input, the Stop_gradient attribute of the created
diff --git a/python/paddle/fluid/metrics.py b/python/paddle/fluid/metrics.py
index cc9d26037622824d338131a14577e4a23d09cf6e..cab3daa29a1711ebec9dff107a3dc21efe36d3ff 100644
--- a/python/paddle/fluid/metrics.py
+++ b/python/paddle/fluid/metrics.py
@@ -261,7 +261,7 @@ class CompositeMetric(MetricBase):
 
         Returns:
             list: results of all added metrics. 
-            The shape and dtype of each result depend on the defination of its metric.
+            The shape and dtype of each result depend on the definition of its metric.
         """
         ans = []
         for m in self._metrics:
diff --git a/python/paddle/fluid/optimizer.py b/python/paddle/fluid/optimizer.py
index 0f1d593fa17798e4383442e59658a0ec7332811a..ca498bc6a47e5b3a6516eacc2164af7e261da523 100644
--- a/python/paddle/fluid/optimizer.py
+++ b/python/paddle/fluid/optimizer.py
@@ -3378,10 +3378,10 @@ class PipelineOptimizer(object):
     """
     Pipeline Optimizer
 
-    Train with pipeline mode. The program will be splited by cut_list. 
+    Train with pipeline mode. The program will be split by cut_list. 
 
     If the len of cut_list is k, then the whole program (including \
-    backward part) will be splited to 2*k-1 sections. 
+    backward part) will be split to 2*k-1 sections. 
     
     So the length of place_list and concurrency_list must be also 2*k-1.
 
diff --git a/python/paddle/fluid/parallel_executor.py b/python/paddle/fluid/parallel_executor.py
index d2aabd8c16843426bf248e136e37a926efc6f7ef..179bac78ff8d1f684cb3ec59e9420be1b6e23ffa 100644
--- a/python/paddle/fluid/parallel_executor.py
+++ b/python/paddle/fluid/parallel_executor.py
@@ -287,7 +287,7 @@ class ParallelExecutor(object):
                                                  loss_name=loss.name)
 
               # If the feed is a dict:
-              # the image will be splitted into devices. If there is two devices
+              # the image will be split into devices. If there is two devices
               # each device will process an image with shape (5, 1)
               x = numpy.random.random(size=(10, 1)).astype('float32')
               loss_data, = train_exe.run(feed={"X": x},
diff --git a/python/paddle/fluid/reader.py b/python/paddle/fluid/reader.py
index 8b7b98b32fa502791d01b7e2f2d3aaf5f1af58bc..b6723bfe9052734d74b8e7e71e8fe776923ab692 100644
--- a/python/paddle/fluid/reader.py
+++ b/python/paddle/fluid/reader.py
@@ -125,7 +125,7 @@ class DataLoader(object):
                 presented as a list. It is only valid when iterable=True. 
                 If return_list=False, the return value on each device would 
                 be a dict of str -> LoDTensor, where the key of the dict is 
-                the name of each feeded variables. If return_list=True, the 
+                the name of each fed variables. If return_list=True, the 
                 return value on each device would be a list(LoDTensor). It is
                 recommended to use return_list=False in static graph mode and
                 use return_list=True in dygraph mode.  
@@ -891,7 +891,7 @@ class PyReader(DataLoaderBase):
             presented as a list. It is only valid when iterable=True. 
             If return_list=False, the return value on each device would 
             be a dict of str -> LoDTensor, where the key of the dict is 
-            the name of each feeded variables. If return_list=True, the 
+            the name of each fed variables. If return_list=True, the 
             return value on each device would be a list(LoDTensor). It is
             recommended to use return_list=False in static graph mode and
             use return_list=True in dygraph mode. 
diff --git a/python/paddle/fluid/tests/book/high-level-api/cifar10_small_test_set.py b/python/paddle/fluid/tests/book/high-level-api/cifar10_small_test_set.py
index 6f24ec45aa6f27814e489b8dce49fe69f62d4f10..d015d4c47d667cbbaaea0e9a6cbd1691a859dcbb 100644
--- a/python/paddle/fluid/tests/book/high-level-api/cifar10_small_test_set.py
+++ b/python/paddle/fluid/tests/book/high-level-api/cifar10_small_test_set.py
@@ -18,7 +18,7 @@ This module will download dataset from
 https://www.cs.toronto.edu/~kriz/cifar.html and parse train/test set into
 paddle reader creators.
 
-The CIFAR-10 dataset consists of 60000 32x32 colour images in 10 classes,
+The CIFAR-10 dataset consists of 60000 32x32 color images in 10 classes,
 with 6000 images per class. There are 50000 training images and 10000 test
 images.
 
diff --git a/python/paddle/fluid/tests/book/high-level-api/test_fit_a_line_new_api.py b/python/paddle/fluid/tests/book/high-level-api/test_fit_a_line_new_api.py
index 87f3b7502e26d3e6a437985f99d7897b060e101e..ae2fe6c389389e0e952ea2f77d07af96a389c442 100644
--- a/python/paddle/fluid/tests/book/high-level-api/test_fit_a_line_new_api.py
+++ b/python/paddle/fluid/tests/book/high-level-api/test_fit_a_line_new_api.py
@@ -121,7 +121,7 @@ def infer_by_saved_model(use_cuda, save_dirname=None):
     inference_scope = fluid.core.Scope()
     with fluid.scope_guard(inference_scope):
         # Use fluid.io.load_inference_model to obtain the inference program desc,
-        # the feed_target_names (the names of variables that will be feeded
+        # the feed_target_names (the names of variables that will be fed
         # data using feed operators), and the fetch_targets (variables that
         # we want to obtain data from using fetch operators).
         [inference_program, feed_target_names,
diff --git a/python/paddle/fluid/tests/book/notest_understand_sentiment.py b/python/paddle/fluid/tests/book/notest_understand_sentiment.py
index 5658bb4ec44e7319c384daed2d8b2d4c420c4160..9ce90a2bd71f8d569f2301b8460e576ea88f3c42 100644
--- a/python/paddle/fluid/tests/book/notest_understand_sentiment.py
+++ b/python/paddle/fluid/tests/book/notest_understand_sentiment.py
@@ -213,7 +213,7 @@ def infer(word_dict, use_cuda, save_dirname=None):
     inference_scope = fluid.core.Scope()
     with fluid.scope_guard(inference_scope):
         # Use fluid.io.load_inference_model to obtain the inference program desc,
-        # the feed_target_names (the names of variables that will be feeded
+        # the feed_target_names (the names of variables that will be fed
         # data using feed operators), and the fetch_targets (variables that
         # we want to obtain data from using fetch operators).
         [inference_program, feed_target_names,
diff --git a/python/paddle/fluid/tests/book/test_fit_a_line.py b/python/paddle/fluid/tests/book/test_fit_a_line.py
index 334294ab485cf203aa0ccf680a53010322d3af3b..a7d5a0305993a637ba2ce7d59f91a0c03b700a69 100644
--- a/python/paddle/fluid/tests/book/test_fit_a_line.py
+++ b/python/paddle/fluid/tests/book/test_fit_a_line.py
@@ -103,7 +103,7 @@ def infer(use_cuda, save_dirname=None):
     inference_scope = fluid.core.Scope()
     with fluid.scope_guard(inference_scope):
         # Use fluid.io.load_inference_model to obtain the inference program desc,
-        # the feed_target_names (the names of variables that will be feeded
+        # the feed_target_names (the names of variables that will be fed
         # data using feed operators), and the fetch_targets (variables that
         # we want to obtain data from using fetch operators).
         [inference_program, feed_target_names,
diff --git a/python/paddle/fluid/tests/book/test_image_classification.py b/python/paddle/fluid/tests/book/test_image_classification.py
index 95d71d72c156484eddc4eaf26aaa61bb5a93b1b1..22b74f2922887eb972806eac15904795b5a48ca7 100644
--- a/python/paddle/fluid/tests/book/test_image_classification.py
+++ b/python/paddle/fluid/tests/book/test_image_classification.py
@@ -210,7 +210,7 @@ def infer(use_cuda, save_dirname=None):
     inference_scope = fluid.core.Scope()
     with fluid.scope_guard(inference_scope):
         # Use fluid.io.load_inference_model to obtain the inference program desc,
-        # the feed_target_names (the names of variables that will be feeded
+        # the feed_target_names (the names of variables that will be fed
         # data using feed operators), and the fetch_targets (variables that
         # we want to obtain data from using fetch operators).
         [inference_program, feed_target_names,
diff --git a/python/paddle/fluid/tests/book/test_label_semantic_roles.py b/python/paddle/fluid/tests/book/test_label_semantic_roles.py
index 3d40b762281ae09d3214f2d2bc496c4966984866..ef14600e6446505228b5cd70c9d9288cdae44a39 100644
--- a/python/paddle/fluid/tests/book/test_label_semantic_roles.py
+++ b/python/paddle/fluid/tests/book/test_label_semantic_roles.py
@@ -247,7 +247,7 @@ def infer(use_cuda, save_dirname=None):
     inference_scope = fluid.core.Scope()
     with fluid.scope_guard(inference_scope):
         # Use fluid.io.load_inference_model to obtain the inference program desc,
-        # the feed_target_names (the names of variables that will be feeded
+        # the feed_target_names (the names of variables that will be fed
         # data using feed operators), and the fetch_targets (variables that
         # we want to obtain data from using fetch operators).
         [inference_program, feed_target_names,
diff --git a/python/paddle/fluid/tests/book/test_recommender_system.py b/python/paddle/fluid/tests/book/test_recommender_system.py
index 0e1efc8212ec2913ca3653c47bd2d9e298a772ee..433b5498de718d46395676b70b0abd0ab9240336 100644
--- a/python/paddle/fluid/tests/book/test_recommender_system.py
+++ b/python/paddle/fluid/tests/book/test_recommender_system.py
@@ -254,7 +254,7 @@ def infer(use_cuda, save_dirname=None):
     inference_scope = fluid.core.Scope()
     with fluid.scope_guard(inference_scope):
         # Use fluid.io.load_inference_model to obtain the inference program desc,
-        # the feed_target_names (the names of variables that will be feeded
+        # the feed_target_names (the names of variables that will be fed
         # data using feed operators), and the fetch_targets (variables that
         # we want to obtain data from using fetch operators).
         [inference_program, feed_target_names,
diff --git a/python/paddle/fluid/tests/book/test_rnn_encoder_decoder.py b/python/paddle/fluid/tests/book/test_rnn_encoder_decoder.py
index 91c8705aa4c88dbfeea45e15c368459ba5b5ac1f..0d65513c122d3ea9effcc391f6049b9c1b462546 100644
--- a/python/paddle/fluid/tests/book/test_rnn_encoder_decoder.py
+++ b/python/paddle/fluid/tests/book/test_rnn_encoder_decoder.py
@@ -208,7 +208,7 @@ def infer(use_cuda, save_dirname=None):
     inference_scope = fluid.core.Scope()
     with fluid.scope_guard(inference_scope):
         # Use fluid.io.load_inference_model to obtain the inference program desc,
-        # the feed_target_names (the names of variables that will be feeded
+        # the feed_target_names (the names of variables that will be fed
         # data using feed operators), and the fetch_targets (variables that
         # we want to obtain data from using fetch operators).
         [inference_program, feed_target_names,
diff --git a/python/paddle/fluid/tests/book/test_word2vec.py b/python/paddle/fluid/tests/book/test_word2vec.py
index cfa6b1a74006c8b0f9792eaa302f1d11a0dab4ee..c919584554b1613b6b3b125cf7beaddda931c47f 100644
--- a/python/paddle/fluid/tests/book/test_word2vec.py
+++ b/python/paddle/fluid/tests/book/test_word2vec.py
@@ -151,7 +151,7 @@ def infer(use_cuda, save_dirname=None):
     inference_scope = fluid.core.Scope()
     with fluid.scope_guard(inference_scope):
         # Use fluid.io.load_inference_model to obtain the inference program desc,
-        # the feed_target_names (the names of variables that will be feeded
+        # the feed_target_names (the names of variables that will be fed
         # data using feed operators), and the fetch_targets (variables that
         # we want to obtain data from using fetch operators).
         [inference_program, feed_target_names,
diff --git a/python/paddle/fluid/tests/unittests/dist_transformer.py b/python/paddle/fluid/tests/unittests/dist_transformer.py
index c3ff3c0feb788639d86c0df545b942abc1a36caa..6887f4519a88afc13a7ce78e9f1c5ba229f50224 100644
--- a/python/paddle/fluid/tests/unittests/dist_transformer.py
+++ b/python/paddle/fluid/tests/unittests/dist_transformer.py
@@ -272,7 +272,7 @@ class LearningRateScheduler(object):
     """
     Wrapper for learning rate scheduling as described in the Transformer paper.
     LearningRateScheduler adapts the learning rate externally and the adapted
-    learning rate will be feeded into the main_program as input data.
+    learning rate will be fed into the main_program as input data.
     """
 
     def __init__(self,
diff --git a/python/paddle/fluid/tests/unittests/ir/pass_test.py b/python/paddle/fluid/tests/unittests/ir/pass_test.py
index 65d74418ed2727475ffcbb19ed22ff9d942fc0e2..73953bd2db40495a5d3aa22533da353db37a454d 100644
--- a/python/paddle/fluid/tests/unittests/ir/pass_test.py
+++ b/python/paddle/fluid/tests/unittests/ir/pass_test.py
@@ -130,7 +130,7 @@ class PassTest(unittest.TestCase):
 
         if startup_on_cpu and not isinstance(place, fluid.CPUPlace):
             warnings.warn(
-                "Parameters are on CPU, and will be transfered to GPU "
+                "Parameters are on CPU, and will be transferred to GPU "
                 "automatically by data transform.")
 
         outs_opt, lods_opt = self._run_program(executor, opt_program)
diff --git a/python/paddle/fluid/tests/unittests/test_dist_transpiler.py b/python/paddle/fluid/tests/unittests/test_dist_transpiler.py
index da9b2a472d50f519244365bb2e36beef701ffa0c..d2e30391d258f678f1abe13464462be7ad2134ab 100644
--- a/python/paddle/fluid/tests/unittests/test_dist_transpiler.py
+++ b/python/paddle/fluid/tests/unittests/test_dist_transpiler.py
@@ -118,7 +118,7 @@ class TestBasicModel(TranspilerTest):
 
         trainer, trainer_startup = self.get_trainer()
 
-        # splited var blocks should be in startup program
+        # split var blocks should be in startup program
         self.assertTrue("fc_w.block0" in trainer_startup.global_block().vars)
         self.assertTrue("fc_w.block1" in trainer_startup.global_block().vars)
         self.assertTrue("fc_w" in trainer_startup.global_block().vars)
diff --git a/python/paddle/fluid/tests/unittests/test_feed_data_check_shape_type.py b/python/paddle/fluid/tests/unittests/test_feed_data_check_shape_type.py
index afd5b52060a2dcd89188cab79156323c000837e9..4d7fc69058dedfc8bdd48850740fe8e7a33eda9e 100644
--- a/python/paddle/fluid/tests/unittests/test_feed_data_check_shape_type.py
+++ b/python/paddle/fluid/tests/unittests/test_feed_data_check_shape_type.py
@@ -46,7 +46,7 @@ class TestFeedData(unittest.TestCase):
 
     def _get_feed_batch_size(self, use_cuda, use_parallel_executor):
         """
-        Returns actual feeded data size. We should multiple the number of
+        Returns actual fed data size. We should multiple the number of
         devices when it is using ParallelExecutor
         """
         return self.data_batch_size * self._get_device_count(
@@ -100,8 +100,8 @@ class TestFeedData(unittest.TestCase):
                                                         use_parallel_executor)
                 self.assertEqual(
                     str(shape_mismatch_err.exception),
-                    "The feeded Variable %r should have dimensions = %r, "
-                    "shape = %r, but received feeded shape %r on each device" %
+                    "The fed Variable %r should have dimensions = %r, "
+                    "shape = %r, but received fed shape %r on each device" %
                     (u'data', len(in_shape_tuple), in_shape_tuple,
                      error_shape_list))
 
@@ -110,7 +110,7 @@ class TestFeedData(unittest.TestCase):
                                                         use_parallel_executor)
                 self.assertEqual(
                     str(dtype_mismatch_err.exception),
-                    "The data type of feeded Variable %r must be 'int64', but "
+                    "The data type of fed Variable %r must be 'int64', but "
                     "received 'float64'" % (u'label'))
 
     def _test_feed_data_dtype_mismatch(self, use_cuda, use_parallel_executor):
diff --git a/python/paddle/fluid/tests/unittests/test_static_save_load.py b/python/paddle/fluid/tests/unittests/test_static_save_load.py
index ad141c96bbf192c53f3473f13b565734b7669223..ac61ab756f06e5142ffae3e913b722ba6ef93dc2 100644
--- a/python/paddle/fluid/tests/unittests/test_static_save_load.py
+++ b/python/paddle/fluid/tests/unittests/test_static_save_load.py
@@ -905,7 +905,7 @@ class TestLoadFromOldInterface(unittest.TestCase):
             with self.assertRaises(RuntimeError):
                 fluid.load(main_program, "test_path", exe)
 
-            # check unused paramter
+            # check unused parameter
 
             fluid.load(test_clone_program, "test_path", exe)
 
diff --git a/python/paddle/fluid/transpiler/distribute_transpiler.py b/python/paddle/fluid/transpiler/distribute_transpiler.py
index 31177cc2c613be323d3e47983a1478b0d1fb9071..27a74ceb5fc9f73ff7c6c66e48aba587bea00dc6 100644
--- a/python/paddle/fluid/transpiler/distribute_transpiler.py
+++ b/python/paddle/fluid/transpiler/distribute_transpiler.py
@@ -16,11 +16,11 @@ from __future__ import print_function
 """
 Steps to transpile trainer:
 1. split variable to multiple blocks, aligned by product(dim[1:]) (width).
-2. rename splited grad variables to add trainer_id suffix ".trainer_%d".
+2. rename split grad variables to add trainer_id suffix ".trainer_%d".
 3. modify trainer program add split_op to each grad variable.
-4. append send_op to send splited variables to server and
-5. add recv_op to fetch params(splited blocks or origin param) from server.
-6. append concat_op to merge splited blocks to update local weights.
+4. append send_op to send split variables to server and
+5. add recv_op to fetch params(split blocks or origin param) from server.
+6. append concat_op to merge split blocks to update local weights.
 
 Steps to transpile pserver:
 1. create new program for parameter server.
@@ -106,7 +106,7 @@ def slice_variable(var_list, slice_count, min_block_size):
         var_list (list): List of variables.
         slice_count (int): Numel of count that variables will be sliced, which
             could be the pserver services' count.
-        min_block_size (int): Minimum splitted block size.
+        min_block_size (int): Minimum split block size.
     Returns:
         blocks (list[(varname, block_id, current_block_size)]): A list
             of VarBlocks. Each VarBlock specifies a shard of the var.
@@ -157,7 +157,7 @@ class DistributeTranspilerConfig(object):
 
     .. py:attribute:: min_block_size (int)
 
-          Minimum number of splitted elements in block, default is 8192.
+          Minimum number of split elements in block, default is 8192.
 
           According to : https://github.com/PaddlePaddle/Paddle/issues/8638#issuecomment-369912156
           We can use bandwidth efficiently when data size is larger than 2MB.If you
@@ -667,8 +667,8 @@ WIKI: https://github.com/PaddlePaddle/Fleet/blob/develop/markdown_doc/transpiler
         self.origin_program._is_chief = self.trainer_id == 0
         self.origin_program._distributed_lookup_table = self.table_name if self.table_name else None
 
-        # split and create vars, then put splited vars in dicts for later use.
-        # step 1: split and create vars, then put splited vars in dicts for later use.
+        # split and create vars, then put split vars in dicts for later use.
+        # step 1: split and create vars, then put split vars in dicts for later use.
         self._init_splited_vars()
 
         # step 2: insert send op to send gradient vars to parameter servers
@@ -742,8 +742,8 @@ WIKI: https://github.com/PaddlePaddle/Fleet/blob/develop/markdown_doc/transpiler
                 sections = []
                 send_varnames = []
 
-            # get send op_role_var, if not splited, the grad should have .trainer suffix
-            # if splited, grad should be the original grad var name (split_by_ref and send
+            # get send op_role_var, if not split, the grad should have .trainer suffix
+            # if split, grad should be the original grad var name (split_by_ref and send
             # will be on the same place). ParallelExecutor
             # will use op_role_var to get expected device place to run this op.
             program.global_block()._insert_op(
@@ -860,8 +860,8 @@ WIKI: https://github.com/PaddlePaddle/Fleet/blob/develop/markdown_doc/transpiler
                 recv_dep_in = self.grad_name_to_send_dummy_out[
                     self.param_name_to_grad_name[param_varname]]
 
-            # get recv op_role_var, if not splited, the grad should have .trainer suffix
-            # if splited, grad should be the original grad var name. ParallelExecutor
+            # get recv op_role_var, if not split, the grad should have .trainer suffix
+            # if split, grad should be the original grad var name. ParallelExecutor
             # will use op_role_var to get expected device place to run this op.
             orig_grad_name = self.param_name_to_grad_name[param_varname]
             recv_op_role_var_name = orig_grad_name
@@ -1120,7 +1120,7 @@ WIKI: https://github.com/PaddlePaddle/Fleet/blob/develop/markdown_doc/transpiler
         for varname, splited_var in six.iteritems(self.param_var_mapping):
             if varname in sparse_table_names:
                 continue
-            # add concat ops to merge splited parameters received from parameter servers.
+            # add concat ops to merge split parameters received from parameter servers.
             if len(splited_var) <= 1:
                 continue
             # NOTE: if enable memory optimization, origin vars maybe removed.
@@ -1670,8 +1670,8 @@ WIKI: https://github.com/PaddlePaddle/Fleet/blob/develop/markdown_doc/transpiler
 
     def _init_splited_vars(self):
         # update these mappings for further transpile:
-        # 1. param_var_mapping: param var name -> [splited params vars]
-        # 2. grad_var_mapping: grad var name -> [splited grads vars]
+        # 1. param_var_mapping: param var name -> [split params vars]
+        # 2. grad_var_mapping: grad var name -> [split grads vars]
         # 3. grad_param_mapping: grad.blockx -> param.blockx
         # 4. param_grad_ep_mapping: ep -> {"params": [], "grads": []}
 
@@ -1966,7 +1966,7 @@ WIKI: https://github.com/PaddlePaddle/Fleet/blob/develop/markdown_doc/transpiler
                 outputs={"Out": [grad_var]},
                 attrs={"use_mkldnn": False})
         else:
-            # in async_mode, for table gradient, it also need to be splited to each parameter server
+            # in async_mode, for table gradient, it also need to be split to each parameter server
             origin_grad_name = grad_var.name
             splited_grad_name = self.trainer_side_table_grad_list[
                 pserver_index].name
@@ -2040,9 +2040,9 @@ WIKI: https://github.com/PaddlePaddle/Fleet/blob/develop/markdown_doc/transpiler
                 block_map[varname] = []
             block_map[varname].append((int(offset), int(size)))
 
-        for varname, splited in six.iteritems(block_map):
+        for varname, split in six.iteritems(block_map):
             orig_var = program.global_block().var(varname)
-            if len(splited) == 1:
+            if len(split) == 1:
                 if self.sync_mode and add_trainer_suffix:
                     new_var_name = "%s.trainer_%d" % \
                                    (orig_var.name, self.trainer_id)
@@ -2059,7 +2059,7 @@ WIKI: https://github.com/PaddlePaddle/Fleet/blob/develop/markdown_doc/transpiler
             if len(orig_shape) >= 2:
                 orig_dim1_flatten = reduce(lambda x, y: x * y, orig_shape[1:])
 
-            for i, block in enumerate(splited):
+            for i, block in enumerate(split):
                 size = block[1]
                 rows = size // orig_dim1_flatten
                 splited_shape = [rows]
@@ -2077,7 +2077,7 @@ WIKI: https://github.com/PaddlePaddle/Fleet/blob/develop/markdown_doc/transpiler
                     persistable=False,
                     dtype=orig_var.dtype,
                     type=orig_var.type,
-                    shape=splited_shape)  # flattend splited var
+                    shape=splited_shape)  # flattend split var
                 var_mapping[varname].append(var)
             program.global_block()._sync_with_cpp()
         return var_mapping
@@ -2393,9 +2393,9 @@ WIKI: https://github.com/PaddlePaddle/Fleet/blob/develop/markdown_doc/transpiler
         if the variable is not grad/param, e.g.
 
             a@GRAD -> a@GRAD.block0
-            a@GRAD -> a@GRAD (a is not splited)
+            a@GRAD -> a@GRAD (a is not split)
             fc_0.w_0 -> fc_0.w_0.block_0
-            fc_0.w_0 -> fc_0.w_0 (weight is not splited)
+            fc_0.w_0 -> fc_0.w_0 (weight is not split)
             _generated_var_123 -> None
         """
         grad_block = None
@@ -2403,7 +2403,7 @@ WIKI: https://github.com/PaddlePaddle/Fleet/blob/develop/markdown_doc/transpiler
             if self._orig_varname(g.name) == self._orig_varname(var.name):
                 # skip per trainer vars
                 if g.name.find(".trainer_") == -1:
-                    # only param or grads have splited blocks
+                    # only param or grads have split blocks
                     if self._orig_varname(g.name) in self.grad_name_to_param_name or \
                             self._orig_varname(g.name) in self.param_name_to_grad_name:
                         grad_block = g
@@ -2442,7 +2442,7 @@ WIKI: https://github.com/PaddlePaddle/Fleet/blob/develop/markdown_doc/transpiler
                 varlist = [varlist]
             for i in range(len(varlist)):
                 var = varlist[i]
-                # for ops like clipping and weight decay, get the splited var (xxx.block0)
+                # for ops like clipping and weight decay, get the split var (xxx.block0)
                 # for inputs/outputs
                 grad_block = self._get_pserver_grad_param_var(
                     var, program.global_block().vars)
diff --git a/python/paddle/fluid/transpiler/geo_sgd_transpiler.py b/python/paddle/fluid/transpiler/geo_sgd_transpiler.py
index 484f6aa5eb5b9d7d7f043e20fda23c94838768e7..702b355696de9cda3cf86c88d1c52e207ea85bae 100644
--- a/python/paddle/fluid/transpiler/geo_sgd_transpiler.py
+++ b/python/paddle/fluid/transpiler/geo_sgd_transpiler.py
@@ -108,15 +108,15 @@ class GeoSgdTranspiler(DistributeTranspiler):
         self.sparse_var_list = []
         self.sparse_var_splited_list = []
 
-        # split and create vars, then put splited vars in dicts for later use.
-        # step 1. split and create vars, then put splited vars in dicts for later use.
+        # split and create vars, then put split vars in dicts for later use.
+        # step 1. split and create vars, then put split vars in dicts for later use.
         self._init_splited_vars()
 
         # step 3. create send recv var (param after optimize)
         send_vars = []
         ps_dispatcher.reset()
         param_var_mapping_items = list(six.iteritems(self.param_var_mapping))
-        # send_vars is the parameter which splited by communicator and send to pserver,not the origin parameter
+        # send_vars is the parameter which split by communicator and send to pserver,not the origin parameter
         for _, splited_vars in param_var_mapping_items:
             for _, var in enumerate(splited_vars):
                 send_vars.append(var)
@@ -292,7 +292,7 @@ class GeoSgdTranspiler(DistributeTranspiler):
                                       len(self.pserver_endpoints),
                                       self.config.min_block_size)
 
-        # step 3. Create splited param from split blocks
+        # step 3. Create split param from split blocks
         # origin_param_name -> [splited_param_vars]
         # Todo: update _create_vars_from_blocklist
         self.param_var_mapping = self._create_vars_from_blocklist(
diff --git a/python/paddle/utils/preprocess_util.py b/python/paddle/utils/preprocess_util.py
index 76fc83acdc0ee2631576a737a0f4fea42acec47d..471cb07c84bc31a34d659e9ccc8bdd57442b8489 100644
--- a/python/paddle/utils/preprocess_util.py
+++ b/python/paddle/utils/preprocess_util.py
@@ -301,9 +301,9 @@ class DatasetCreater(object):
         Create a data set object from a path.
         It will use directory structure or a file list to determine dataset if
         self.from_list is True. Otherwise, it will uses a file list  to
-        determine the datset.
+        determine the dataset.
         path: the path of the dataset.
-        return a tuple of Dataset object, and a mapping from lable set
+        return a tuple of Dataset object, and a mapping from label set
         to label id.
         """
         if self.from_list:
@@ -314,9 +314,9 @@ class DatasetCreater(object):
     def create_dataset_from_list(self, path):
         """
         Create a data set object from a path.
-        It will uses a file list to determine the datset.
+        It will uses a file list to determine the dataset.
         path: the path of the dataset.
-        return a tuple of Dataset object, and a mapping from lable set
+        return a tuple of Dataset object, and a mapping from label set
         to label id
         """
         raise NotImplementedError
@@ -327,7 +327,7 @@ class DatasetCreater(object):
         It will use directory structure or a file list to determine dataset if
         self.from_list is True.
         path: the path of the dataset.
-        return a tuple of Dataset object, and a mapping from lable set
+        return a tuple of Dataset object, and a mapping from label set
         to label id
         """
         raise NotImplementedError
diff --git a/tools/check_api_approvals.sh b/tools/check_api_approvals.sh
index 068966c683b22077fc42789e7c9510adf7da44aa..3bc99ac17a5d335ce800d66e904e1490eb7ebd71 100644
--- a/tools/check_api_approvals.sh
+++ b/tools/check_api_approvals.sh
@@ -1,4 +1,5 @@
 #!/bin/bash
+
 if [ -z ${BRANCH} ]; then
     BRANCH="develop"
 fi