fix typo words (#22653)

d2ba91aa · tianshuo78520a · GitHub · 6e7bfe30 · d2ba91aa · d2ba91aa
152 changed file
--- a/paddle/fluid/framework/data_set.h
+++ b/paddle/fluid/framework/data_set.h
@@ -126,7 +126,7 @@ class Dataset {
  virtual void DestroyPreLoadReaders() = 0;
  // set preload thread num
  virtual void SetPreLoadThreadNum(int thread_num) = 0;
-  // seperate train thread and dataset thread
+  // separate train thread and dataset thread
  virtual void DynamicAdjustChannelNum(int channel_num) = 0;
  virtual void DynamicAdjustReadersNum(int thread_num) = 0;
  // set fleet send sleep seconds

--- a/paddle/fluid/framework/details/build_strategy.h
+++ b/paddle/fluid/framework/details/build_strategy.h
@@ -133,10 +133,10 @@ struct BuildStrategy {
  // The picture is here:
  // https://github.com/PaddlePaddle/Paddle/pull/17263#discussion_r285411396
  bool use_hierarchical_allreduce_{false};
-  // Nccl ranks in a node when use hierarchical allreduce, it's setted to gpu
+  // Nccl ranks in a node when use hierarchical allreduce, it's set to gpu
  // cards' number in most cases.
  size_t hierarchical_allreduce_inter_nranks_{0};
-  // Nccl ranks bewteen nodes when use hierarchical allreduce, it's setted to
+  // Nccl ranks bewteen nodes when use hierarchical allreduce, it's set to
  // nodes number.
  size_t hierarchical_allreduce_exter_nranks_{0};


--- a/paddle/fluid/framework/ir/conv_elementwise_add2_act_fuse_pass.cc
+++ b/paddle/fluid/framework/ir/conv_elementwise_add2_act_fuse_pass.cc
@@ -33,7 +33,7 @@ namespace ir {
  GET_IR_NODE(act_op);                 \
  GET_IR_NODE(act_out);

-// Inherient the basic infomation from `base_desc`, and modify some fields.
+// Inherient the basic information from `base_desc`, and modify some fields.
 framework::proto::OpDesc PrepareOpDesc(
    const framework::proto::OpDesc& base_desc, const std::string& bias,
    const std::string& bias1, const std::string& activation,

--- a/paddle/fluid/framework/ir/conv_elementwise_add_act_fuse_pass.cc
+++ b/paddle/fluid/framework/ir/conv_elementwise_add_act_fuse_pass.cc
@@ -31,7 +31,7 @@ namespace ir {
  GET_IR_NODE(act_op);               \
  GET_IR_NODE(act_out);

-// Inherient the basic infomation from `base_desc`, and modify some fields.
+// Inherient the basic information from `base_desc`, and modify some fields.
 framework::proto::OpDesc PrepareOpDesc(
    const framework::proto::OpDesc& base_desc, const std::string& bias,
    const std::string& activation, const std::string& output) {

--- a/paddle/fluid/framework/ir/fuse_optimizer_ops_pass/fuse_optimizer_op_pass.cc
+++ b/paddle/fluid/framework/ir/fuse_optimizer_ops_pass/fuse_optimizer_op_pass.cc
@@ -382,7 +382,7 @@ const VarDesc *FuseOptimizerOpPass::GetVarDescFromVarsInfo(
    const std::string &var_name) const {
  auto grad_iter = vars_info.find(var_name);
  PADDLE_ENFORCE_EQ(grad_iter != vars_info.end(), true,
-                    "The gradient varibale %s is not found.", var_name);
+                    "The gradient variable %s is not found.", var_name);
  PADDLE_ENFORCE_EQ(!grad_iter->second.empty(), true,
                    "The gradient var node %s is not found.", var_name);
  PADDLE_ENFORCE_NOT_NULL(grad_iter->second.front()->Var(),

--- a/paddle/fluid/framework/ir/graph_pattern_detector.cc
+++ b/paddle/fluid/framework/ir/graph_pattern_detector.cc
@@ -131,7 +131,7 @@ bool GraphPatternDetector::MarkPDNodesInGraph(const ir::Graph &graph) {
 }

 // The intermediate Nodes can only link to the nodes inside the pattern, or this
-// subgraph will be droped.
+// subgraph will be dropped.
 void GraphPatternDetector::ValidateByNodeRole(
    std::vector<GraphPatternDetector::subgraph_t> *subgraphs) {
  std::vector<GraphPatternDetector::subgraph_t> result;

--- a/paddle/fluid/framework/ir/multi_batch_merge_pass.cc
+++ b/paddle/fluid/framework/ir/multi_batch_merge_pass.cc
@@ -179,7 +179,7 @@ void BatchMergePass::ApplyImpl(ir::Graph* graph) const {
        ir::Node* var = nullptr;
        auto updated_var = UpdateGradVarDesc(in_node->Var(), i, grad_names,
                                             bn_vars_need_rename);
-        // should be initialized by startup, how to initilize tensor in the
+        // should be initialized by startup, how to initialize tensor in the
        // scope?
        if (node->Name() == "batch_norm" &&
            bn_vars_need_rename.find(in_node->Name()) !=

--- a/paddle/fluid/framework/ir/multi_devices_graph_pass/multi_devices_graph_pass.cc
+++ b/paddle/fluid/framework/ir/multi_devices_graph_pass/multi_devices_graph_pass.cc
@@ -1041,7 +1041,7 @@ void DistSSAGraphBuilder::InsertPostprocessOps(ir::Graph *result) const {
    // There are 4 conditions:
    // 1. GPU && Reduce: Reduce gradient then broadcast gradient to other GPUS.
    // Need to broadcast received parameters to other GPU.
-    // 2. GPU && AllReduce: AllReduce all graident to each GPU. Need to
+    // 2. GPU && AllReduce: AllReduce all gradient to each GPU. Need to
    // broadcast received parameters to other GPU.
    // 3. CPU && AllReduce: AllReduce all gradient to each thread. Need to
    // broadcast received parameters to other scope.

--- a/paddle/fluid/framework/op_desc.cc
+++ b/paddle/fluid/framework/op_desc.cc
@@ -80,7 +80,7 @@ class CompileTimeInferShapeContext : public InferShapeContext {
    PADDLE_ENFORCE_EQ(
        in_var_names.size(), out_var_names.size(),
        platform::errors::PreconditionNotMet(
-            "Op [%s]:  Input var number shoule be equal with output var number",
+            "Op [%s]:  Input var number should be equal with output var number",
            op_.Type()));

    for (size_t i = 0; i < in_var_names.size(); ++i) {
@@ -663,7 +663,7 @@ void OpDesc::Flush() {

 void OpDesc::CheckAttrs() {
  PADDLE_ENFORCE(!Type().empty(),
-                 "CheckAttr() can not be called before type is setted.");
+                 "CheckAttr() can not be called before type is set.");
  auto *checker = OpInfoMap::Instance().Get(Type()).Checker();
  if (checker == nullptr) {
    // checker is not configured. That operator could be generated by Paddle,
@@ -706,7 +706,7 @@ void OpDesc::InferShape(const BlockDesc &block) const {
 void OpDesc::InferVarType(BlockDesc *block) const {
  // There are a few places that var type can be set.
  // When VarDesc is created, default set to LOD_TENSOR.
-  // When output variable is created, default is defaut set to LOD_TENSOR.
+  // When output variable is created, default is default set to LOD_TENSOR.
  // We limit here to be the only place that operator defines its customized
  // var type inference. Hence, we don't do any "default" setting here.
  auto &info = OpInfoMap::Instance().Get(this->Type());

--- a/paddle/fluid/framework/operator.cc
+++ b/paddle/fluid/framework/operator.cc
@@ -654,7 +654,7 @@ class RuntimeInferShapeContext : public InferShapeContext {
    PADDLE_ENFORCE_EQ(
        in_var_list.size(), out_var_list.size(),
        platform::errors::PreconditionNotMet(
-            "Op [%s]: Input var size should be equal with ouput var size",
+            "Op [%s]: Input var size should be equal with output var size",
            op_.Type()));

    auto& out_var_names = op_.Outputs(out);

--- a/paddle/fluid/framework/operator.h
+++ b/paddle/fluid/framework/operator.h
@@ -53,8 +53,8 @@ constexpr char kEmptyVarName[] = "@EMPTY@";
 constexpr char kTempVarName[] = "@TEMP@";

 /// If a variable's name has a certain suffix, it means that the
-/// variable is the gradient of another varibale.
-/// e.g. Variable "x@GRAD" is the gradient of varibale "x".
+/// variable is the gradient of another variable.
+/// e.g. Variable "x@GRAD" is the gradient of variable "x".
 constexpr char kGradVarSuffix[] = "@GRAD";

 constexpr size_t kGradVarSuffixSize = 5U;

--- a/paddle/fluid/framework/operator_test.cc
+++ b/paddle/fluid/framework/operator_test.cc
@@ -340,7 +340,7 @@ class IndicateLoDTensorDataTypeTestProtoMaker : public OpProtoAndCheckerMaker {
 public:
  void Make() {
    AddInput("LoDTensor", "Input of Tensor type Variable.");
-    AddComment("This Op is only for IndicateVarDataType inferface test.");
+    AddComment("This Op is only for IndicateVarDataType interface test.");
  }
 };

@@ -362,7 +362,7 @@ class IndicateSelectedRowsDataTypeTestProtoMaker
 public:
  void Make() {
    AddInput("SelectedRows", "Input of SelectedRows type Variable.");
-    AddComment("This Op is only for IndicateVarDataType inferface test.");
+    AddComment("This Op is only for IndicateVarDataType interface test.");
  }
 };

@@ -382,7 +382,7 @@ class IndicateOtherDataTypeTestProtoMaker : public OpProtoAndCheckerMaker {
 public:
  void Make() {
    AddInput("Other", "Input of Other type Variable");
-    AddComment("This Op is only for IndicateVarDataType inferface test.");
+    AddComment("This Op is only for IndicateVarDataType interface test.");
  }
 };

@@ -572,7 +572,7 @@ class GetSetLoDLevelTestMaker : public OpProtoAndCheckerMaker {
  void Make() {
    AddInput("X", "(LoDTensor) Input Variable.");
    AddOutput("Out", "(LoDTensor) Output Variable.");
-    AddComment("This Op is only for Get/SetLoDLevel inferface test.");
+    AddComment("This Op is only for Get/SetLoDLevel interface test.");
  }
 };


--- a/paddle/fluid/inference/analysis/ir_passes/subgraph_util.cc
+++ b/paddle/fluid/inference/analysis/ir_passes/subgraph_util.cc
@@ -112,7 +112,7 @@ void RenameAndGetOutputs(
    std::unordered_map<std::string, std::string> *output_name_map,
    const std::unordered_map<std::string, framework::ir::Node *> &graph_var_map,
    bool trt_and_not_int8) {
-  //// In the normal case, the paddle-trt exists bug when runing the googlenet.
+  //// In the normal case, the paddle-trt exists bug when running the googlenet.
  // When there are more than two convolutions of 1 * 1 with the same input, the
  // paddle-tensorrt will do the merging optimization, which fuse those conv
  // into one conv, and then trigger bug. So,  We should use strategy to avoid

--- a/paddle/fluid/inference/analysis/ir_passes/tensorrt_subgraph_pass.cc
+++ b/paddle/fluid/inference/analysis/ir_passes/tensorrt_subgraph_pass.cc
@@ -223,7 +223,7 @@ void TensorRtSubgraphPass::CreateTensorRTOp(
  auto use_static_engine = Get<bool>("use_static_engine");
  // TODO(NHZlX)
  // There are models with the same structure but the different parameters,
-  // when runing in the 'use_serialize' mode, there is a bug.
+  // when running in the 'use_serialize' mode, there is a bug.
  auto engine_key = GenerateEngineKey(input_names_with_id, output_names_with_id,
                                      std::to_string(0));
  auto predictor_id = Get<int>("predictor_id");

--- a/paddle/fluid/inference/io.cc
+++ b/paddle/fluid/inference/io.cc
@@ -137,7 +137,7 @@ std::unique_ptr<framework::ProgramDesc> Load(framework::Executor* executor,
                 "model version %ld is not supported.",
                 main_program->Version());

-  // model_from_memory is false in seperate parameters.
+  // model_from_memory is false in separate parameters.
  LoadPersistables(executor, scope, *main_program, dirname, "",
                   false /* model_from_memory */);
  return main_program;

--- a/paddle/fluid/inference/tensorrt/convert/ut_helper.h
+++ b/paddle/fluid/inference/tensorrt/convert/ut_helper.h
@@ -101,7 +101,7 @@ class TRTConvertValidation {
    DeclVar(name, dim_vec);
  }

-  // Declare a parameter varaible in the scope.
+  // Declare a parameter variable in the scope.
  void DeclParamVar(const std::string& name, const nvinfer1::Dims& dims) {
    DeclVar(name, dims, true);
  }

--- a/paddle/fluid/inference/tensorrt/engine.cc
+++ b/paddle/fluid/inference/tensorrt/engine.cc
@@ -104,10 +104,9 @@ void TensorRTEngine::FreezeNetwork() {

      for (auto &t : all_t) {
        if (!quant_dynamic_range_.count(t)) {
-          VLOG(3)
-              << "We are in trt int8 mode(not calibration), scale not setted"
-              << " for tensor " << t->getName()
-              << ", this might be ok when trt does not need this range";
+          VLOG(3) << "We are in trt int8 mode(not calibration), scale not set"
+                  << " for tensor " << t->getName()
+                  << ", this might be ok when trt does not need this range";
        }
      }
      std::unordered_set<std::string> all_out_t_name;

--- a/paddle/fluid/operators/array_to_lod_tensor_op.cc
+++ b/paddle/fluid/operators/array_to_lod_tensor_op.cc
@@ -172,7 +172,7 @@ class ArrayToLoDTensorOpProtoMaker : public framework::OpProtoAndCheckerMaker {
             "(std::vector<LodTensor>) A vector of tensors that is going to "
             "be casted to a big LoDTensor.");
    AddInput("RankTable",
-             "(LoDRankTable) RankTable provides the coarse lod infomation to "
+             "(LoDRankTable) RankTable provides the coarse lod information to "
             "build the output LoDTensor. See "
             "'paddle/framework/lod_rank_table.h' for more details.");
    AddOutput("Out", "(LoDTensor) The LoDTensor formed by input tensor array.");

--- a/paddle/fluid/operators/average_accumulates_op.cc
+++ b/paddle/fluid/operators/average_accumulates_op.cc
@@ -132,7 +132,7 @@ class AverageAccumulatesOpMaker : public framework::OpProtoAndCheckerMaker {
        "(Tensor<int64_t>), The accumulating times of previous window with "
        "shape [1].");
    AddInput("in_num_updates",
-             "(Tensor<int64_t>), The total number of batches used by trainning "
+             "(Tensor<int64_t>), The total number of batches used by training "
             "before this batch with shape [1].");

    AddOutput("out_sum_1",
@@ -155,10 +155,9 @@ class AverageAccumulatesOpMaker : public framework::OpProtoAndCheckerMaker {
        "out_old_num_accumulates",
        "(Tensor<int64_t>) The accumulating times of previous window with "
        "shape [1].");
-    AddOutput(
-        "out_num_updates",
-        "(Tensor<int64_t>), The total number of batches used by trainning "
-        "before this batch with shape [1].");
+    AddOutput("out_num_updates",
+              "(Tensor<int64_t>), The total number of batches used by training "
+              "before this batch with shape [1].");

    AddAttr<float>("average_window",
                   "(float, default 0) "

--- a/paddle/fluid/operators/bilinear_tensor_product_op.h
+++ b/paddle/fluid/operators/bilinear_tensor_product_op.h
@@ -49,7 +49,7 @@ class BilinearTensorProductKernel : public framework::OpKernel<T> {
    auto& place = *ctx.template device_context<DeviceContext>().eigen_device();
    auto& dev_ctx = ctx.template device_context<DeviceContext>();

-    // Create the intermediate variable to caculate the result of
+    // Create the intermediate variable to calculate the result of
    // Input(X) multiplied by Input(Weight_i), the formula is:
    // left_mul = X Weight_i.
    Tensor left_mul;

--- a/paddle/fluid/operators/conv_transpose_op.cc
+++ b/paddle/fluid/operators/conv_transpose_op.cc
@@ -267,7 +267,7 @@ void Conv2DTransposeOpMaker::Make() {
               "workspace is a section of GPU memory which will be "
               "allocated/freed each time the operator runs, larger "
               "workspace size can increase performance but also requires "
-               "better hardward. This size should be carefully setted.")
+               "better hardward. This size should be carefully set.")
      .SetDefault(platform::GetDefaultConvWorkspaceSizeLimitMB());
  AddComment(R"DOC(
 Convolution2D Transpose Operator.
@@ -368,7 +368,7 @@ void Conv3DTransposeOpMaker::Make() {
               "workspace is a section of GPU memory which will be "
               "allocated/freed each time the operator runs, larger "
               "workspace size can increase performance but also requires "
-               "better hardward. This size should be carefully setted.")
+               "better hardward. This size should be carefully set.")
      .SetDefault(platform::GetDefaultConvWorkspaceSizeLimitMB());
  AddComment(R"DOC(
 Convolution3D Transpose Operator.

--- a/paddle/fluid/operators/crop_op.cc
+++ b/paddle/fluid/operators/crop_op.cc
@@ -36,7 +36,7 @@ class CropOp : public framework::OperatorWithKernel {
      auto shape = ctx->Attrs().Get<std::vector<int>>("shape");
      PADDLE_ENFORCE_EQ(
          int64_t(shape.size()), x_dim.size(),
-          "Shape size should be equal to dimention size of input tensor.");
+          "Shape size should be equal to dimension size of input tensor.");
      std::vector<int64_t> tensor_shape(shape.size());
      for (size_t i = 0; i < shape.size(); ++i) {
        tensor_shape[i] = static_cast<int64_t>(shape[i]);

--- a/paddle/fluid/operators/crop_tensor_op.cc
+++ b/paddle/fluid/operators/crop_tensor_op.cc
@@ -82,7 +82,7 @@ class CropTensorOp : public framework::OperatorWithKernel {
    }
    PADDLE_ENFORCE_EQ(int64_t(shape.size()), x_dim.size(),
                      "Attr(shape)'size of Op(crop_tensor) should be equal to "
-                      "dimention size of input tensor.");
+                      "dimension size of input tensor.");
    std::vector<int64_t> out_shape(shape.size(), -1);
    for (size_t i = 0; i < shape.size(); ++i) {
      if (shape[i] > 0) {

--- a/paddle/fluid/operators/crop_tensor_op.h
+++ b/paddle/fluid/operators/crop_tensor_op.h
@@ -157,7 +157,7 @@ void CropTensorFunction(const framework::ExecutionContext& context) {

  // get shape from Input(ShapeTensor) of Input(Shape)
  std::vector<int> shape = GetShape(context);
-  // out_dims setted by arrt(shape)
+  // out_dims set by arrt(shape)
  if (shape.size() == 0) {
    for (int i = 0; i < out_dims.size(); ++i) {
      shape.push_back(out_dims[i]);

--- a/paddle/fluid/operators/cross_entropy_op.cc
+++ b/paddle/fluid/operators/cross_entropy_op.cc
@@ -203,7 +203,7 @@ class CrossEntropyOpMaker : public framework::OpProtoAndCheckerMaker {
              "represents the cross entropy loss.");
    AddAttr<bool>("soft_label",
                  "(bool, default false), a flag indicating whether to "
-                  "interpretate the given labels as soft labels.")
+                  "interpretant the given labels as soft labels.")
        .SetDefault(false);
    AddAttr<int>("ignore_index",
                 "(int, default -100), Specifies a target value that is"

--- a/paddle/fluid/operators/ctc_align_op.cc
+++ b/paddle/fluid/operators/ctc_align_op.cc
@@ -63,7 +63,7 @@ class CTCAlignOpMaker : public framework::OpProtoAndCheckerMaker {
              "sequence in Output.")
        .AsDispensable();
    AddAttr<int>("blank",
-                 "(int, default: 0), the blank label setted in Connectionist "
+                 "(int, default: 0), the blank label set in Connectionist "
                 "Temporal Classification (CTC) op.")
        .SetDefault(0);
    AddAttr<bool>("merge_repeated",

--- a/paddle/fluid/operators/cumsum_op.cc
+++ b/paddle/fluid/operators/cumsum_op.cc
@@ -33,8 +33,8 @@ class CumsumOpMaker : public framework::OpProtoAndCheckerMaker {
    AddInput("X", "Input of cumsum operator");
    AddOutput("Out", "Output of cumsum operator");
    AddAttr<int>("axis",
-                 "The dimenstion to accumulate along. -1 means the last "
-                 "dimenstion [default -1].")
+                 "The dimension to accumulate along. -1 means the last "
+                 "dimension [default -1].")
        .SetDefault(-1)
        .EqualGreaterThan(-1);
    AddAttr<bool>("exclusive",

--- a/paddle/fluid/operators/deformable_psroi_pooling_op.cc
+++ b/paddle/fluid/operators/deformable_psroi_pooling_op.cc
@@ -67,7 +67,7 @@ class DeformablePSROIPoolOpMaker : public framework::OpProtoAndCheckerMaker {
        "the number of groups which input channels are divided."
        "(eg.number of input channels is k1*k2*(C+1), which k1 and k2 "
        "are group width and height and C+1 is number of output "
-        "chanels. eg.(4, 6), which 4 is height of group and 6 is "
+        "channels. eg.(4, 6), which 4 is height of group and 6 is "
        "width of group");
    AddAttr<int>("pooled_height",
                 "(int), "

--- a/paddle/fluid/operators/detection/box_coder_op.cc
+++ b/paddle/fluid/operators/detection/box_coder_op.cc
@@ -117,7 +117,7 @@ class BoxCoderOpMaker : public framework::OpProtoAndCheckerMaker {
        .InEnum({"encode_center_size", "decode_center_size"});
    AddAttr<bool>("box_normalized",
                  "(bool, default true) "
-                  "whether treat the priorbox as a noramlized box")
+                  "whether treat the priorbox as a normalized box")
        .SetDefault(true);
    AddAttr<int>("axis",
                 "(int, default 0)"
@@ -140,7 +140,7 @@ class BoxCoderOpMaker : public framework::OpProtoAndCheckerMaker {
              "box_coder_op with shape [N, M, 4] representing the result of N "
              "target boxes encoded with M Prior boxes and variances. When "
              "code_type is 'decode_center_size', N represents the batch size "
-              "and M represents the number of deocded boxes.");
+              "and M represents the number of decoded boxes.");

    AddComment(R"DOC(


--- a/paddle/fluid/operators/detection/generate_mask_labels_op.cc
+++ b/paddle/fluid/operators/detection/generate_mask_labels_op.cc
@@ -403,7 +403,7 @@ class GenerateMaskLabelsOpMaker : public framework::OpProtoAndCheckerMaker {
        "each element is a bounding box with (xmin, ymin, xmax, ymax) format.");
    AddInput("LabelsInt32",
             "(LoDTensor), This intput is a 2D LoDTensor with shape [R, 1], "
-             "each element repersents a class label of a roi");
+             "each element represents a class label of a roi");
    AddOutput(
        "MaskRois",
        "(LoDTensor), This output is a 2D LoDTensor with shape [P, 4]. "
@@ -411,7 +411,7 @@ class GenerateMaskLabelsOpMaker : public framework::OpProtoAndCheckerMaker {
        "each element is a bounding box with [xmin, ymin, xmax, ymax] format.");
    AddOutput("RoiHasMaskInt32",
              "(LoDTensor), This output is a 2D LoDTensor with shape [P, 1], "
-              "each element repersents the output mask rois index with regard "
+              "each element represents the output mask rois index with regard "
              "to input rois");
    AddOutput("MaskInt32",
              "(LoDTensor), This output is a 4D LoDTensor with shape [P, Q], "

--- a/paddle/fluid/operators/detection/generate_proposal_labels_op.cc
+++ b/paddle/fluid/operators/detection/generate_proposal_labels_op.cc
@@ -521,11 +521,11 @@ class GenerateProposalLabelsOpMaker : public framework::OpProtoAndCheckerMaker {
        "each element is a bounding box with [xmin, ymin, xmax, ymax] format.");
    AddOutput("LabelsInt32",
              "(LoDTensor), This output is a 2D LoDTensor with shape [P, 1], "
-              "each element repersents a class label of a roi");
+              "each element represents a class label of a roi");
    AddOutput("BboxTargets",
              "(LoDTensor), This output is a 2D LoDTensor with shape [P, 4 * "
              "class_nums], "
-              "each element repersents a box label of a roi");
+              "each element represents a box label of a roi");
    AddOutput(
        "BboxInsideWeights",
        "(LoDTensor), This output is a 2D LoDTensor with shape [P, 4 * "

--- a/paddle/fluid/operators/detection/iou_similarity_op.cc
+++ b/paddle/fluid/operators/detection/iou_similarity_op.cc
@@ -63,7 +63,7 @@ class IOUSimilarityOpMaker : public framework::OpProtoAndCheckerMaker {
             "bottom coordinate of the box.");
    AddAttr<bool>("box_normalized",
                  "(bool, default true) "
-                  "whether treat the priorbox as a noramlized box")
+                  "whether treat the priorbox as a normalized box")
        .SetDefault(true);
    AddOutput("Out",
              "(LoDTensor, the lod is same as input X) The output of "

--- a/paddle/fluid/operators/detection/locality_aware_nms_op.cc
+++ b/paddle/fluid/operators/detection/locality_aware_nms_op.cc
@@ -393,7 +393,7 @@ class LocalityAwareNMSOpMaker : public framework::OpProtoAndCheckerMaker {
    AddAttr<int>("nms_top_k",
                 "(int64_t) "
                 "Maximum number of detections to be kept according to the "
-                 "confidences aftern the filtering detections based on "
+                 "confidences after the filtering detections based on "
                 "score_threshold");
    AddAttr<float>("nms_threshold",
                   "(float, default: 0.3) "

--- a/paddle/fluid/operators/detection/multiclass_nms_op.cc
+++ b/paddle/fluid/operators/detection/multiclass_nms_op.cc
@@ -424,7 +424,7 @@ class MultiClassNMSOpMaker : public framework::OpProtoAndCheckerMaker {
    AddAttr<int>("nms_top_k",
                 "(int64_t) "
                 "Maximum number of detections to be kept according to the "
-                 "confidences aftern the filtering detections based on "
+                 "confidences after the filtering detections based on "
                 "score_threshold");
    AddAttr<float>("nms_threshold",
                   "(float, default: 0.3) "

--- a/paddle/fluid/operators/detection/target_assign_op.cc
+++ b/paddle/fluid/operators/detection/target_assign_op.cc
@@ -44,7 +44,7 @@ class TargetAssignOp : public framework::OperatorWithKernel {
      PADDLE_ENFORCE_EQ(neg_dims.size(), 2,
                        "The rank of Input(NegIndices) must be 2.");
      PADDLE_ENFORCE_EQ(neg_dims[1], 1,
-                        "The last dimenstion of Out(NegIndices) must be 1.");
+                        "The last dimension of Out(NegIndices) must be 1.");
    }

    auto n = mi_dims[0];

--- a/paddle/fluid/operators/detection/yolov3_loss_op.cc
+++ b/paddle/fluid/operators/detection/yolov3_loss_op.cc
@@ -111,15 +111,15 @@ class Yolov3LossOpMaker : public framework::OpProtoAndCheckerMaker {
    AddInput("X",
             "The input tensor of YOLOv3 loss operator, "
             "This is a 4-D tensor with shape of [N, C, H, W]."
-             "H and W should be same, and the second dimention(C) stores"
+             "H and W should be same, and the second dimension(C) stores"
             "box locations, confidence score and classification one-hot"
             "keys of each anchor box");
    AddInput("GTBox",
             "The input tensor of ground truth boxes, "
             "This is a 3-D tensor with shape of [N, max_box_num, 5], "
             "max_box_num is the max number of boxes in each image, "
-             "In the third dimention, stores x, y, w, h coordinates, "
-             "x, y is the center cordinate of boxes and w, h is the "
+             "In the third dimension, stores x, y, w, h coordinates, "
+             "x, y is the center coordinate of boxes and w, h is the "
             "width and height and x, y, w, h should be divided by "
             "input image height to scale to [0, 1].");
    AddInput("GTLabel",

--- a/paddle/fluid/operators/elementwise/test_elementwise_mul_op_dim.cc
+++ b/paddle/fluid/operators/elementwise/test_elementwise_mul_op_dim.cc
@@ -79,7 +79,7 @@ TEST(ElementwiseMulOpTester, correct_dims) {
  MainTest(test_data);
 }

-// Checks if AreDimsAndFormatCorrect fails when channel_num is not divisable by
+// Checks if AreDimsAndFormatCorrect fails when channel_num is not devisable by
 // 16
 TEST(ElementwiseMulOpTester, incorrect_channel_num) {
  TestData test_data;

--- a/paddle/fluid/operators/fused/fusion_group_op.cc
+++ b/paddle/fluid/operators/fused/fusion_group_op.cc
@@ -76,7 +76,7 @@ class FusionGroupOpMaker : public framework::OpProtoAndCheckerMaker {
 fusion_group Operator.

 It is used to execute a generated CUDA kernel which fuse the computation of
-multiple operators into one. It supports serveral types:
+multiple operators into one. It supports several types:
 0, fused computation of elementwise operations in which all the dims of inputs
    and outputs should be exactly the same.
 )DOC");

--- a/paddle/fluid/operators/fused/fusion_transpose_flatten_concat_op.cu.cc
+++ b/paddle/fluid/operators/fused/fusion_transpose_flatten_concat_op.cu.cc
@@ -76,7 +76,7 @@ class TransposeFlattenConcatFusionKernel : public framework::OpKernel<T> {
        }
      }

-      // Since concat is aftern flatten, the output is 2D tensor.
+      // Since concat is after flatten, the output is 2D tensor.
      // If concat_axis is 0, each input's permutated tensor is continuous.
      // If concat_axis is 1, the stride of 0-th dim of each input's
      // permutated tensor is odims()[1].

--- a/paddle/fluid/operators/grid_sampler_op.cc
+++ b/paddle/fluid/operators/grid_sampler_op.cc
@@ -84,7 +84,7 @@ class GridSampleOpMaker : public framework::OpProtoAndCheckerMaker {
        "Grid",
        "(Tensor) The input grid of GridSampleOp generated by AffineGridOp, "
        "This is a 4-D tensor with shape of [N, H, W, 2] is the concatenation "
-        "of x and y coordinates with shape [N, H, W] in last dimention");
+        "of x and y coordinates with shape [N, H, W] in last dimension");
    AddOutput("Output", "(Tensor) Output tensor with shape [N, C, H, W]");
    AddAttr<bool>(
        "use_cudnn",
@@ -93,11 +93,11 @@ class GridSampleOpMaker : public framework::OpProtoAndCheckerMaker {

    AddComment(R"DOC(
      This operation samples input X by using bilinear interpolation based on 
-      flow field grid, which is usually gennerated by affine_grid. The grid of
+      flow field grid, which is usually generated by affine_grid. The grid of
      shape [N, H, W, 2] is the concatenation of (grid_x, grid_y) coordinates 
      with shape [N, H, W] each, where grid_x is indexing the 4th dimension 
-      (in width dimension) of input data x and grid_y is indexng the 3rd 
-      dimention (in height dimension), finally results is the bilinear 
+      (in width dimension) of input data x and grid_y is indexing the 3rd 
+      dimension (in height dimension), finally results is the bilinear 
      interpolation value of 4 nearest corner points.

      Step 1:

--- a/paddle/fluid/operators/gru_op.cc
+++ b/paddle/fluid/operators/gru_op.cc
@@ -113,7 +113,7 @@ class GRUOpMaker : public framework::OpProtoAndCheckerMaker {
        .AsIntermediate();
    AddOutput(
        "BatchResetHiddenPrev",
-        "(LoDTensor) The reseted hidden state LoDTensor organized in batches. "
+        "(LoDTensor) The reset hidden state LoDTensor organized in batches. "
        "This LoDTensor is a matrix with shape (T X D) and has the same LoD "
        "with `BatchGate`.")
        .AsIntermediate();

--- a/paddle/fluid/operators/gru_unit_op.cc
+++ b/paddle/fluid/operators/gru_unit_op.cc
@@ -97,7 +97,7 @@ class GRUUnitOpMaker : public framework::OpProtoAndCheckerMaker {
        .AsIntermediate();
    AddOutput("ResetHiddenPrev",
              "(Tensor) Matrix with shape [batch_size, frame_size] for the "
-              "reseted hidden state of previous time step.")
+              "reset hidden state of previous time step.")
        .AsIntermediate();
    AddOutput("Hidden",
              "(Tensor) The GRU hidden state of the current time step "

--- a/paddle/fluid/operators/hierarchical_sigmoid_op.cc
+++ b/paddle/fluid/operators/hierarchical_sigmoid_op.cc
@@ -144,7 +144,7 @@ class HierarchicalSigmoidOpMaker : public framework::OpProtoAndCheckerMaker {
        .AsIntermediate();
    AddOutput(
        "W_Out",
-        "(LoDTensor, optinal) using input 'W' as Output to make it mutable"
+        "(LoDTensor, optional) using input 'W' as Output to make it mutable"
        "When we are using prefetch")
        .AsIntermediate();
    AddAttr<AttrType>("num_classes", "(int, optional), The number of classes")

--- a/paddle/fluid/operators/interpolate_op.cc
+++ b/paddle/fluid/operators/interpolate_op.cc
@@ -285,7 +285,7 @@ class InterpolateOpMaker : public framework::OpProtoAndCheckerMaker {
          interpolation.

          Nearest neighbor interpolation is to perform nearest neighbor interpolation
-          in both the 3rd dimention(in height direction) and the 4th dimention(in width 
+          in both the 3rd dimension(in height direction) and the 4th dimension(in width 
          direction) on input tensor.
            
          Bilinear interpolation is an extension of linear interpolation for 
@@ -299,7 +299,7 @@ class InterpolateOpMaker : public framework::OpProtoAndCheckerMaker {
          H-direction and W-direction in this op) on a rectilinear 3D grid. 
          The linear interpolation is performed on three directions.

-          Align_corners and align_mode are optinal parameters,the calculation method 
+          Align_corners and align_mode are optional parameters,the calculation method 
          of interpolation can be selected by them.
          
          Example:

--- a/paddle/fluid/operators/lrn_op.cc
+++ b/paddle/fluid/operators/lrn_op.cc
@@ -296,7 +296,7 @@ $$

 Function implementation:

-Inputs and outpus are in NCHW or NHWC format, while input.shape.ndims() equals 4.
+Inputs and outputs are in NCHW or NHWC format, while input.shape.ndims() equals 4.
 If NCHW, the dimensions 0 ~ 3 represent batch size, feature maps, rows,
 and columns, respectively.


--- a/paddle/fluid/operators/math/matrix_bit_code.h
+++ b/paddle/fluid/operators/math/matrix_bit_code.h
@@ -105,7 +105,7 @@ class SimpleCode {
  SimpleCode(size_t code, size_t num_classes, const int64_t* ids)
      : c_(static_cast<size_t>(ids[code]) + num_classes) {}
  /**
-   * Here the id of root shoud be 1 rather than 0, thus the encoding of class c
+   * Here the id of root should be 1 rather than 0, thus the encoding of class c
   * is `c + num_classes` and all siblings can get the same weight indice using
   * prefixes.
   * Weight index is the prefixes of encoding, thus leave out the right most

--- a/paddle/fluid/operators/nce_op.cc
+++ b/paddle/fluid/operators/nce_op.cc
@@ -129,19 +129,19 @@ class NCEOpMaker : public framework::OpProtoAndCheckerMaker {
        "CustomDistProbs",
        "(Tensor) It is used in 'CostumDist' sampler. "
        "It is a tensor with shape [num_total_classes]."
-        "The i-th element is the probsbility of the i-th class being sampled.")
+        "The i-th element is the probability of the i-th class being sampled.")
        .AsDispensable();
    AddInput(
        "CustomDistAlias",
        "(Tensor) It is used in 'CostumDist' sampler. "
        "It is a tensor with shape [num_total_classes]."
-        "The i-th element is the probsbility of the i-th class being sampled.")
+        "The i-th element is the probability of the i-th class being sampled.")
        .AsDispensable();
    AddInput(
        "CustomDistAliasProbs",
        "(Tensor) It is used in 'CostumDist' sampler. "
        "It is a tensor with shape [num_total_classes]."
-        "The i-th element is the probsbility of the i-th class being sampled.")
+        "The i-th element is the probability of the i-th class being sampled.")
        .AsDispensable();

    AddOutput("Cost",

--- a/paddle/fluid/operators/pad_constant_like_op.cc
+++ b/paddle/fluid/operators/pad_constant_like_op.cc
@@ -36,7 +36,7 @@ class PadConstantLikeOp : public framework::OperatorWithKernel {
    auto y_dim = ctx->GetInputDim("Y");

    PADDLE_ENFORCE_EQ(x_dim.size(), y_dim.size(),
-                      "The dimention of X and Y should be the same.");
+                      "The dimension of X and Y should be the same.");

    for (int i = 0; i < x_dim.size(); ++i) {
      if ((!ctx->IsRuntime()) && ((x_dim[i] == -1) || (y_dim[i] == -1))) {
@@ -164,7 +164,7 @@ class PadConstantLikeOpGrad : public framework::OperatorWithKernel {
    auto dout_dim = ctx->GetInputDim(framework::GradVarName("Out"));

    PADDLE_ENFORCE_EQ(dout_dim.size(), y_dim.size(),
-                      "The dimention of X and Y should be the same.");
+                      "The dimension of X and Y should be the same.");

    auto y_grad_name = framework::GradVarName("Y");
    if (ctx->HasOutput(y_grad_name)) {

--- a/paddle/fluid/operators/prroi_pool_op.cu
+++ b/paddle/fluid/operators/prroi_pool_op.cu
@@ -325,7 +325,7 @@ class GPUPRROIPoolGradOpKernel : public framework::OpKernel<T> {
      } else {
        PADDLE_ENFORCE_EQ(rois->lod().empty(), false,
                          platform::errors::InvalidArgument(
-                              "the lod of Input ROIs shoule not be empty when "
+                              "the lod of Input ROIs should not be empty when "
                              "BatchRoINums is None!"));
        auto rois_lod = rois->lod().back();
        int rois_batch_size = rois_lod.size() - 1;

--- a/paddle/fluid/operators/prroi_pool_op.h
+++ b/paddle/fluid/operators/prroi_pool_op.h
@@ -293,7 +293,7 @@ class CPUPRROIPoolOpKernel : public framework::OpKernel<T> {
    } else {
      PADDLE_ENFORCE_EQ(rois->lod().empty(), false,
                        platform::errors::InvalidArgument(
-                            "the lod of Input ROIs shoule not be empty when "
+                            "the lod of Input ROIs should not be empty when "
                            "BatchRoINums is None!"));
      auto rois_lod = rois->lod().back();
      int rois_batch_size = rois_lod.size() - 1;

--- a/paddle/fluid/operators/reader/read_op.cc
+++ b/paddle/fluid/operators/reader/read_op.cc
@@ -24,8 +24,8 @@ namespace operators {
 // Returns true if the two dimensions are compatible.
 // A dimension is compatible with the other if:
 // 1. The length of the dimensions are same.
-// 2. Each non-negative number of the two dimentions are same.
-// 3. For negative number in a dimention, it means unknown so it is compatible
+// 2. Each non-negative number of the two dimensions are same.
+// 3. For negative number in a dimension, it means unknown so it is compatible
 //    with any number.
 bool DimensionIsCompatibleWith(const framework::DDim& first,
                               const framework::DDim& second) {

--- a/paddle/fluid/operators/reduce_ops/reduce_op.h
+++ b/paddle/fluid/operators/reduce_ops/reduce_op.h
@@ -174,7 +174,7 @@ class ReduceOp : public framework::OperatorWithKernel {
    PADDLE_ENFORCE_GT(
        dims.size(), 0,
        "ShapeError: The input dim dimensions of Reduce "
-        "shoud be greater than 0. But received the dim dimesions of Reduce "
+        "should be greater than 0. But received the dim dimesions of Reduce "
        " = %d",
        dims.size());


--- a/paddle/fluid/operators/reshape_op.cc
+++ b/paddle/fluid/operators/reshape_op.cc
@@ -162,7 +162,7 @@ class ReshapeOp : public framework::OperatorWithKernel {
            shape[i], 0,
            platform::errors::InvalidArgument(
                "Each dimension value of 'shape' in ReshapeOp must not "
-                "be negtive except one unknown dimension. "
+                "be negative except one unknown dimension. "
                "But received  shape = [%s], shape[%d] = %d.",
                framework::make_ddim(shape), i, shape[i]));
      }
@@ -234,7 +234,7 @@ class ReshapeOpMaker : public framework::OpProtoAndCheckerMaker {
             "(Tensor<int32>, optional). Target shape of reshape operator. "
             "It has a higher priority than Attr(shape) but a lower priority "
             "than Input(ShapeTensor). The Attr(shape) still should be "
-             "set correctly to gurantee shape inference in compile time.")
+             "set correctly to guarantee shape inference in compile time.")
        .AsDispensable();
    AddInput(
        "ShapeTensor",
@@ -288,7 +288,7 @@ dimension value will be copied from Input(X) at runtime. Note that the index of
 [2, 3, 4], Attr(shape) = [2, 3, 2, 0] is an invalid input.

 3. Input(Shape) has a higher priority than Attr(shape) if it is provided, while
-Attr(shape) still should be set correctly to gurantee shape inference in
+Attr(shape) still should be set correctly to guarantee shape inference in
 compile-time.

 )DOC");

--- a/paddle/fluid/operators/scatter_op.cc
+++ b/paddle/fluid/operators/scatter_op.cc
@@ -86,7 +86,7 @@ class ScatterOpMaker : public framework::OpProtoAndCheckerMaker {
    AddInput("Updates", "The updated value of scatter op");
    AddOutput("Out", "The output of scatter op");
    AddAttr<bool>("overwrite",
-                  "(bool, defalut: True) "
+                  "(bool, default: True) "
                  "The mode that updating the output when has same index,"
                  "If True, use the overwrite mode to update the output"
                  "of the same index, if False, use the accumulate mode to"

--- a/paddle/fluid/operators/select_input_op.cc
+++ b/paddle/fluid/operators/select_input_op.cc
@@ -67,7 +67,7 @@ class SelectInputOpProtoMaker : public framework::OpProtoAndCheckerMaker {
    // Because this op is blocking whole control flow. I am implementing MVP
    // (minimal viable product) here.
    AddComment(R"DOC(
-Merge branches of LoDTensor into a single Output with a mask interger
+Merge branches of LoDTensor into a single Output with a mask integer
 specifying the output branchi.
 )DOC");
  }

--- a/paddle/fluid/operators/sequence_ops/sequence_pad_op.cc
+++ b/paddle/fluid/operators/sequence_ops/sequence_pad_op.cc
@@ -118,7 +118,7 @@ class SequencePadOpMaker : public framework::OpProtoAndCheckerMaker {
        "sequences before padding.");
    AddAttr<int>(
        "padded_length",
-        "The length of padded sequences. It can be setted to -1 or "
+        "The length of padded sequences. It can be set to -1 or "
        "any positive int. When it is -1, all sequences will be padded up to "
        "the length of the longest one among them; when it a certain positive "
        "value, it must be greater than the length of the longest original "

--- a/paddle/fluid/operators/sequence_ops/sequence_pool_op.cc
+++ b/paddle/fluid/operators/sequence_ops/sequence_pool_op.cc
@@ -54,7 +54,7 @@ class SequencePoolOpMaker : public framework::OpProtoAndCheckerMaker {
    AddInput("X", "(LoDTensor) The variable-length input of SequencePoolOp");
    AddOutput("Out",
              "(Tensor) The output of SequencePoolOp does not contain LoD "
-              "infomation.");
+              "information.");
    AddOutput("MaxIndex",
              "(Tensor<int>) This tensor is used for the sequence max-pooling "
              "to record the max indexes.")
@@ -93,7 +93,7 @@ Assume X is a [7,M,N] LoDTensor, and X->lod()[0] = [0, 2, 5, 7], 7=2+3+2.
 Besides, for the sake of simplicity, we assume M=1 and N=1,
 and the value of X = [[1, 3], [2, 4, 6], [5, 1]].

-Thus, Out is a [3,1,1] Tensor without LoD infomation.
+Thus, Out is a [3,1,1] Tensor without LoD information.
 And for different pooltype, the value of Out is as follows:

 - AVERAGE: [2, 4, 3], where 2=(1+3)/2, 4=(2+4+6)/3, 3=(5+1)/2

--- a/paddle/fluid/operators/sequence_ops/sequence_topk_avg_pooling_op.cc
+++ b/paddle/fluid/operators/sequence_ops/sequence_topk_avg_pooling_op.cc
@@ -63,7 +63,7 @@ class SequenceTopkAvgPoolingOpMaker : public framework::OpProtoAndCheckerMaker {
    AddOutput(
        "Out",
        "(Tensor) The output of SequenceTopkPoolingOp does not contain LoD "
-        "infomation.");
+        "information.");
    AddOutput("pos", "(Tensor<int>) store the topk index ").AsIntermediate();
    AddAttr<std::vector<int>>("topks", "topks");
    AddAttr<int>("channel_num", "channel number");

--- a/paddle/fluid/operators/sequence_ops/sequence_unpad_op.cc
+++ b/paddle/fluid/operators/sequence_ops/sequence_unpad_op.cc
@@ -96,7 +96,7 @@ class SequenceUnpadOpMaker : public framework::OpProtoAndCheckerMaker {
                    [ 6.0,  7.0,  8.0,  9.0, 10.0],
                    [11.0, 12.0, 13.0, 14.0, 15.0]], 
 `     
-      in which there are 3 sequences padded to length 5, and the acutal length 
+      in which there are 3 sequences padded to length 5, and the actual length 
      specified by Input(Length):

          Length.data = [2, 3, 4],

--- a/paddle/fluid/operators/shard_index_op.cc
+++ b/paddle/fluid/operators/shard_index_op.cc
@@ -63,7 +63,7 @@ class ShardIndexOpMaker : public framework::OpProtoAndCheckerMaker {
    AddAttr<int>("nshards",
                 "A positive integer to specify the number of shards.");
    AddAttr<int>("shard_id", "The current shard id");
-    AddAttr<int>("ignore_value", "An ingeter value out of sharded range")
+    AddAttr<int>("ignore_value", "An integer value out of sharded range")
        .SetDefault(-1);
    AddComment(R"DOC(
 This layer creates the sharded index for input. This layers is used in
@@ -80,7 +80,7 @@ to
    y = x % shard_size if x / shard_size == shard_id else ignore_value

 We take the distributed one-hot representation to show what this layer is
-used for. The distributed one-hot representation is seperated into multiple
+used for. The distributed one-hot representation is separated into multiple
 shards, and each shard is filling zeros except the one with the index
 inside. In order to create these sharded representation in each trainer,
 the original index should be recalculated (i.e. sharded) before.

--- a/paddle/fluid/operators/shrink_rnn_memory_op.cc
+++ b/paddle/fluid/operators/shrink_rnn_memory_op.cc
@@ -73,12 +73,12 @@ class ShrinkRNNMemoryOp : public ArrayOp {
 class ShrinkRNNMemoryOpProtoMaker : public framework::OpProtoAndCheckerMaker {
 public:
  void Make() override {
-    AddInput("X", "(LoDTensor) The RNN step memory to be shrinked.");
+    AddInput("X", "(LoDTensor) The RNN step memory to be shrank.");
    AddInput("RankTable", "(LoDRankTable) The lod_rank_table of dynamic RNN.");
    AddInput("I",
             "(LoDTensor) The step index. The RNN step memory 'X' will be "
-             "shrinked to match the size of the input of the index'th step.");
-    AddOutput("Out", "(LoDTensor) The shrinked RNN step memory.");
+             "shrank to match the size of the input of the index'th step.");
+    AddOutput("Out", "(LoDTensor) The shrank RNN step memory.");
    AddComment(R"DOC(
 This operator is used to shrink output batch of memory defined in dynamic RNN.


--- a/paddle/fluid/operators/softmax_with_cross_entropy_op.cc
+++ b/paddle/fluid/operators/softmax_with_cross_entropy_op.cc
@@ -31,7 +31,7 @@ class SoftmaxWithCrossEntropyOpMaker
             "by softmax.");
    AddInput(
        "Label",
-        "(Tensor) The input tesnor of groud truth label. If :attr:`soft_label` "
+        "(Tensor) The input tensor of groud truth label. If :attr:`soft_label` "
        "is set to false, Label is a Tensor<int64> in same shape with "
        "Input(Logits) except the shape in dimension :attr:`axis` as 1. If "
        "soft_label is set to true, Label is a Tensor<float/double> in same "
@@ -50,7 +50,7 @@ class SoftmaxWithCrossEntropyOpMaker
              "entropy loss.");
    AddAttr<bool>(
        "soft_label",
-        "(bool, default: false), A flag to indicate whether to interpretate "
+        "(bool, default: false), A flag to indicate whether to interpretant "
        "the given labels as soft labels.")
        .SetDefault(false);
    AddAttr<bool>(

--- a/paddle/fluid/operators/softmax_with_cross_entropy_op.cu
+++ b/paddle/fluid/operators/softmax_with_cross_entropy_op.cu
@@ -100,7 +100,7 @@ where:
 Therefore, the calculation can be separated into 3 steps:
 Step 1: row-wise operation to calculate max_i
 Step 2: row-wise operation to calculate logDiffMaxSum_i
-Step 3: caculate tmp_i_j, and finally get softmax_i_j and cross\_entropy_i
+Step 3: calculate tmp_i_j, and finally get softmax_i_j and cross\_entropy_i
 To save memory, we can share memory among max_i, logDiffMaxSum_i and
 cross\_entropy_i.
 In this way, the 3 steps should be changed to:

--- a/paddle/fluid/operators/spectral_norm_op.cc
+++ b/paddle/fluid/operators/spectral_norm_op.cc
@@ -93,7 +93,7 @@ class SpectralNormOpMaker : public framework::OpProtoAndCheckerMaker {
    AddInput("U",
             "The weight_u tensor of spectral_norm operator, "
             "This can be a 1-D tensor in shape [H, 1],"
-             "H is the 1st dimentions of Weight after reshape"
+             "H is the 1st dimensions of Weight after reshape"
             "corresponding by Attr(dim). As for Attr(dim) = 1"
             "in conv2d layer with weight shape [M, C, K1, K2]"
             "Weight will be reshape to [C, M*K1*K2], U will"
@@ -101,7 +101,7 @@ class SpectralNormOpMaker : public framework::OpProtoAndCheckerMaker {
    AddInput("V",
             "The weight_v tensor of spectral_norm operator, "
             "This can be a 1-D tensor in shape [W, 1], "
-             "W is the 2nd dimentions of Weight after reshape "
+             "W is the 2nd dimensions of Weight after reshape "
             "corresponding by Attr(dim). As for Attr(dim) = 1 "
             "in conv2d layer with weight shape [M, C, K1, K2] "
             "Weight will be reshape to [C, M*K1*K2], V will "

--- a/paddle/fluid/operators/tensorrt/tensorrt_engine_op.h
+++ b/paddle/fluid/operators/tensorrt/tensorrt_engine_op.h
@@ -276,7 +276,7 @@ class TensorRTEngineOp : public framework::OperatorBase {
            "size(%d).\n"
            "There are two possible causes for this problem: \n"
            "1. Check whether the runtime batch is larger than the max_batch "
-            "setted by EnableTensorrtEngine()\n"
+            "set by EnableTensorrtEngine()\n"
            "2. Check whether the model you are running has multiple trt "
            "subgraphs: \n "
            "\tIf there are multiple trt subgraphs, you need to ensure that "

--- a/paddle/fluid/operators/unfold_op.cc
+++ b/paddle/fluid/operators/unfold_op.cc
@@ -51,7 +51,7 @@ class UnfoldOpMaker : public framework::OpProtoAndCheckerMaker {

 This Operator is used to extract sliding local blocks from a batched input tensor, also known
 as im2col when operated on batched 2D image tensor. For each block under the convolution filter,
-all element will be rearranged as a column. While the convolution filter silding over the input
+all element will be rearranged as a column. While the convolution filter sliding over the input
 feature map, a series of such columns will be formed. 
    )DOC");
  }

--- a/paddle/fluid/operators/uniform_random_op.cc
+++ b/paddle/fluid/operators/uniform_random_op.cc
@@ -177,7 +177,7 @@ class UniformRandomOpMaker : public framework::OpProtoAndCheckerMaker {
             "according to "
             "this given shape. It means that it has a higher priority than "
             "the shape attribute, while the shape attribute still should be "
-             "set correctly to gurantee shape inference in compile time.")
+             "set correctly to guarantee shape inference in compile time.")
        .AsDispensable();
    AddInput("ShapeTensorList",
             "(vector<Tensor<int64_t>> or vector<Tensor<int32_t>>, optional). "

--- a/paddle/fluid/operators/unsqueeze_op.cc
+++ b/paddle/fluid/operators/unsqueeze_op.cc
@@ -153,7 +153,7 @@ class UnsqueezeOpMaker : public framework::OpProtoAndCheckerMaker {
          PADDLE_ENFORCE_LT(static_cast<int>(axes.size()), 6,
                            "Invalid dimensions, dynamic dimensions should be "
                            "within [1, 6] dimensions (Eigen limit).");
-          // Validity Check: the range of unsqueeze aixs.
+          // Validity Check: the range of unsqueeze axis.
          for (int axis : axes) {
            PADDLE_ENFORCE_LT(axis, 6,
                              "Invalid dimensions, input axis should be"

--- a/paddle/fluid/operators/warpctc_op.cc
+++ b/paddle/fluid/operators/warpctc_op.cc
@@ -123,10 +123,10 @@ An operator integrating the open-source
 https://arxiv.org/pdf/1512.02595v1.pdf),
 to compute Connectionist Temporal Classification (CTC) loss.
 It can be aliased as softmax with ctc, since a native softmax activation is
-interated to the warp-ctc library, to to normlize values for each row of the
+interated to the warp-ctc library, to to normalize values for each row of the
 input tensor.

-More detail of CTC loss can be found by refering to
+More detail of CTC loss can be found by referring to
 [Connectionist Temporal Classification: Labelling Unsegmented Sequence Data with
 Recurrent Neural Networks](
 http://machinelearning.wustl.edu/mlpapers/paper_files/icml2006_GravesFGS06.pdf).

--- a/paddle/fluid/platform/device_tracer.cc
+++ b/paddle/fluid/platform/device_tracer.cc
@@ -50,7 +50,7 @@ void PrintCuptiHint() {
  static bool showed = false;
  if (showed) return;
  showed = true;
-  LOG(WARNING) << "Invalid timestamp occured. Please try increasing the "
+  LOG(WARNING) << "Invalid timestamp occurred. Please try increasing the "
                  "FLAGS_multiple_of_cupti_buffer_size.";
 }


--- a/paddle/fluid/pybind/imperative.cc
+++ b/paddle/fluid/pybind/imperative.cc
@@ -226,7 +226,7 @@ void BindImperative(py::module *m_ptr) {
    BackwardStrategy is a descriptor of how to run the backward process.

    **Note**:
-        **This API is only avaliable in** `Dygraph <../../user_guides/howto/dygraph/DyGraph.html>`_ **Mode**
+        **This API is only available in** `Dygraph <../../user_guides/howto/dygraph/DyGraph.html>`_ **Mode**

    Attribute:
        **sort_sum_gradient**:
@@ -339,7 +339,7 @@ void BindImperative(py::module *m_ptr) {
           },
           R"DOC(
        **Notes**:
-            **This API is ONLY avaliable in Dygraph mode**
+            **This API is ONLY available in Dygraph mode**

        Returns a numpy array shows the value of current :ref:`api_guide_Variable_en`

@@ -375,7 +375,7 @@ void BindImperative(py::module *m_ptr) {
           },
           py::return_value_policy::copy, R"DOC(
        **Notes**:
-            **This API is ONLY avaliable in Dygraph mode**
+            **This API is ONLY available in Dygraph mode**

        Returns a new Variable, detached from the current graph.

@@ -402,7 +402,7 @@ void BindImperative(py::module *m_ptr) {
      .def("clear_gradient", &imperative::VarBase::ClearGradient, R"DOC(

        **Notes**:
-        **1. This API is ONLY avaliable in Dygraph mode**
+        **1. This API is ONLY available in Dygraph mode**

        **2. Use it only Variable has gradient, normally we use this for Parameters since other temporal Variable will be deleted by Python's GC**


--- a/python/paddle/dataset/movielens.py
+++ b/python/paddle/dataset/movielens.py
@@ -224,7 +224,7 @@ def max_job_id():

 def movie_categories():
    """
-    Get movie categoriges dictionary.
+    Get movie categories dictionary.
    """
    __initialize_meta_info__()
    return CATEGORIES_DICT

--- a/python/paddle/dataset/mq2007.py
+++ b/python/paddle/dataset/mq2007.py
@@ -150,7 +150,7 @@ def gen_plain_txt(querylist):
  gen plain text in list for other usage
  Paramters:
  --------
-  querylist : querylist, one query match many docment pairs in list, see QueryList
+  querylist : querylist, one query match many document pairs in list, see QueryList

  return :
  ------
@@ -171,7 +171,7 @@ def gen_point(querylist):
  gen item in list for point-wise learning to rank algorithm
  Paramters:
  --------
-  querylist : querylist, one query match many docment pairs in list, see QueryList
+  querylist : querylist, one query match many document pairs in list, see QueryList

  return :
  ------
@@ -190,9 +190,9 @@ def gen_pair(querylist, partial_order="full"):
  gen pair for pair-wise learning to rank algorithm
  Paramters:
  --------
-  querylist : querylist, one query match many docment pairs in list, see QueryList
+  querylist : querylist, one query match many document pairs in list, see QueryList
  pairtial_order : "full" or "neighbour"
-    there is redudant in all possiable pair combinations, which can be simplifed
+    there is redundant in all possible pair combinations, which can be simplified
  gen pairs for neighbour items or the full partial order pairs

  return :
@@ -233,7 +233,7 @@ def gen_list(querylist):
  gen item in list for list-wise learning to rank algorithm
  Paramters:
  --------
-  querylist : querylist, one query match many docment pairs in list, see QueryList
+  querylist : querylist, one query match many document pairs in list, see QueryList

  return :
  ------
@@ -268,7 +268,7 @@ def query_filter(querylists):

 def load_from_text(filepath, shuffle=False, fill_missing=-1):
    """
-  parse data file into querys
+  parse data file into queries
  """
    prev_query_id = -1
    querylists = []

--- a/python/paddle/distributed/launch.py
+++ b/python/paddle/distributed/launch.py
@@ -13,18 +13,18 @@
 # limitations under the License.
 """
 paddle.distributed.launch is a module that spawns multiple distributed 
-process on each trainning node for gpu trainning.
+process on each training node for gpu training.
 Usage:
    In both of single node training or multiple node training, this module 
 launch a process on each of the given gpu card.
-    1. for single node trainning with all visible gpu cards:
+    1. for single node training with all visible gpu cards:
       python -m paddle.distributed.launch \
         your_training_py (arg1 arg2 and all others)
    
-    2. for single node trainning with [0,4) cards
+    2. for single node training with [0,4) cards
       python -m paddle.distributed.launch --selected_gpus="0,1,2,3" \
         your_training_py (arg1 arg2 and all others)
-    3. for mulitple node training such as two node:192.168.0.16, 192.168.0.17
+    3. for multiple node training such as two node:192.168.0.16, 192.168.0.17
        on 192.168.0.16:
            python -m paddle.distributed.launch --cluster_node_ips="192.168.0.16,192.168.0.17" \
                --node_ip=192.168.0.16 \
@@ -114,14 +114,14 @@ POD_IP (current node ip address, not needed for local training)
        "--selected_gpus",
        type=str,
        default=None,
-        help="It's for gpu trainning and the trainning process will run on the selected_gpus,"
-        "each process is bound to a single GPU. And if it's not setted, this module will use all the gpu cards for training."
+        help="It's for gpu training and the training process will run on the selected_gpus,"
+        "each process is bound to a single GPU. And if it's not set, this module will use all the gpu cards for training."
    )

    parser.add_argument(
        "--log_dir",
        type=str,
-        help="The path for each process's log.If it's not setted, the log will printed to default pipe."
+        help="The path for each process's log.If it's not set, the log will printed to default pipe."
    )

    #positional

--- a/python/paddle/distributed/launch_ps.py
+++ b/python/paddle/distributed/launch_ps.py
@@ -61,7 +61,7 @@ def parse_args():
        "--log_dir",
        default="logs",
        type=str,
-        help="The path for each process's log.If it's not setted, the log will printed to default pipe."
+        help="The path for each process's log.If it's not set, the log will printed to default pipe."
    )

    # positional

--- a/python/paddle/fluid/backward.py
+++ b/python/paddle/fluid/backward.py
@@ -832,7 +832,7 @@ def _append_backward_ops_(block,
        target_block(Block): the block which is going to hold new generated grad ops
        no_grad_dict(dict):
            key(int)  block index
-            val(set) a set of varibale names. These varibales have no gradient
+            val(set) a set of variable names. These variables have no gradient
        grad_to_var(dict)(output argument):
            key(str): grad variable name
            val(str): corresponding forward variable name

--- a/python/paddle/fluid/contrib/layers/nn.py
+++ b/python/paddle/fluid/contrib/layers/nn.py
@@ -116,7 +116,7 @@ def var_conv_2d(input,
    """
    The var_conv_2d layer calculates the output base on the :attr:`input` with variable length,
    row, col, input channel, filter size and strides. Both :attr:`input`, :attr:`row`,
-    and :attr:`col` are 1-level LodTensor. The covolution operation is same as conv2d layer with 
+    and :attr:`col` are 1-level LodTensor. The convolution operation is same as conv2d layer with 
    padding. Besides, input.dims[1] should be 1. 

    .. code-block:: text
@@ -133,9 +133,9 @@ def var_conv_2d(input,
                output.dims = [174, 1]  # where 174 = 90 + 84

    Args:
-        input (Variable): The input shoud be 1-level LodTensor with dims[1] equals 1.
-        row (Variable): The row shoud be 1-level LodTensor to provide height information.
-        col (Variable): The col shoud be 1-level LodTensor to provide width information.
+        input (Variable): The input should be 1-level LodTensor with dims[1] equals 1.
+        row (Variable): The row should be 1-level LodTensor to provide height information.
+        col (Variable): The col should be 1-level LodTensor to provide width information.
        input_channel (int): The number of input channel.
        output_channel (int): The number of output channel.
        filter_size (int|tuple|None): The filter size. If filter_size is a tuple,
@@ -325,9 +325,9 @@ def sequence_topk_avg_pooling(input, row, col, topks, channel_num):

    Args:
        input (Variable): The input should be 2D LodTensor with dims[1] equals 1.
-        row (Variable): The row shoud be 1-level LodTensor to provide the height information
+        row (Variable): The row should be 1-level LodTensor to provide the height information
                        of the input tensor data.
-        col (Variable): The col shoud be 1-level LodTensor to provide the width information
+        col (Variable): The col should be 1-level LodTensor to provide the width information
                        of the input tensor data.
        topks (list): A list of incremental value to average the topk feature.
        channel_num (int): The number of input channel.
@@ -555,7 +555,7 @@ def multiclass_nms2(bboxes,
                                 low confidence score. If not provided, 
                                 consider all boxes.
        nms_top_k (int): Maximum number of detections to be kept according to
-                         the confidences aftern the filtering detections based
+                         the confidences after the filtering detections based
                         on score_threshold.
        nms_threshold (float): The threshold to be used in NMS. Default: 0.3
        nms_eta (float): The threshold to be used in NMS. Default: 1.0

--- a/python/paddle/fluid/contrib/layers/rnn_impl.py
+++ b/python/paddle/fluid/contrib/layers/rnn_impl.py
@@ -181,7 +181,7 @@ def basic_gru(input,
        sequence_length (Variabe|None): A Tensor (shape [batch_size]) stores each real length of each instance,
                        This tensor will be convert to a mask to mask the padding ids
                        If it's None means NO padding ids
-        dropout_prob(float|0.0): Dropout prob, dropout ONLY works after rnn output of earch layers, 
+        dropout_prob(float|0.0): Dropout prob, dropout ONLY works after rnn output of each layers, 
                             NOT between time steps
        bidirectional (bool|False): If it is bidirectional
        batch_first (bool|True): The shape format of the input and output tensors. If true,
@@ -411,7 +411,7 @@ def basic_lstm(input,
        sequence_length (Variabe|None): A tensor (shape [batch_size]) stores each real length of each instance,
                        This tensor will be convert to a mask to mask the padding ids
                        If it's None means NO padding ids
-        dropout_prob(float|0.0): Dropout prob, dropout ONLY work after rnn output of earch layers, 
+        dropout_prob(float|0.0): Dropout prob, dropout ONLY work after rnn output of each layers, 
                             NOT between time steps
        bidirectional (bool|False): If it is bidirectional
        batch_first (bool|True): The shape format of the input and output tensors. If true,

--- a/python/paddle/fluid/contrib/memory_usage_calc.py
+++ b/python/paddle/fluid/contrib/memory_usage_calc.py
@@ -12,7 +12,7 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 """
-This module privides a memory usage calculate function for user.
+This module provides a memory usage calculate function for user.
 The purpose of this API is to allow users to estimate memory usage of
 a program under a special batch size, then user can set appropriate
 batch size to fully utilize a GPU.
@@ -91,8 +91,9 @@ def memory_usage(program, batch_size):
            for x in var.shape:
                if x < 0:
                    if neg_dim_count >= 1:
-                        raise ValueError("Var %s has more than one negtive dim."
-                                         % (var_name))
+                        raise ValueError(
+                            "Var %s has more than one negative dim." %
+                            (var_name))
                    neg_dim_count += 1
                    data_count *= batch_size * (-x)
                else:

--- a/python/paddle/fluid/contrib/quantize/quantize_transpiler.py
+++ b/python/paddle/fluid/contrib/quantize/quantize_transpiler.py
@@ -147,7 +147,7 @@ class QuantizeTranspiler(object):
        """Rewrites a training input program in place for simulated
        quantization. Insert fake quantization and de-quantization ops into
        program to simulate the error introduced by quantization. And change
-        the graident ops' input by using the faked quantization weights and
+        the gradient ops' input by using the faked quantization weights and
        activation. Since the program is transformed in place, the graph
        connection will change.


--- a/python/paddle/fluid/contrib/slim/core/compressor.py
+++ b/python/paddle/fluid/contrib/slim/core/compressor.py
@@ -302,7 +302,7 @@ class Compressor(object):
                                 this optimizer is used to minimize the combined loss of student-net and
                                 teacher-net while train_optimizer is used to minimize loss of
                                 student-net in fine-tune stage. 
-            search_space(slim.nas.SearchSpace): The instance that define the searching space. It must inherite
+            search_space(slim.nas.SearchSpace): The instance that define the searching space. It must inherit
                              slim.nas.SearchSpace class and overwrite the abstract methods.
            log_period(int): The period of print log of training.

@@ -551,7 +551,7 @@ class Compressor(object):

    def run(self):
        """
-        Execute compressiong pass.
+        Execute compressing pass.
        """
        context = Context(
            place=self.place,

--- a/python/paddle/fluid/contrib/slim/graph/graph_wrapper.py
+++ b/python/paddle/fluid/contrib/slim/graph/graph_wrapper.py
@@ -63,7 +63,7 @@ class VarWrapper(object):

    def shape(self):
        """
-        Get the shape of the varibale.
+        Get the shape of the variable.
        """
        return self._var.shape

@@ -152,13 +152,13 @@ class OpWrapper(object):

    def inputs(self, name):
        """
-        Get all the varibales by the input name.
+        Get all the variables by the input name.
        """
        return [self._graph.var(var_name) for var_name in self._op.input(name)]

    def outputs(self, name):
        """
-        Get all the varibales by the output name.
+        Get all the variables by the output name.
        """
        return [self._graph.var(var_name) for var_name in self._op.output(name)]

@@ -233,7 +233,7 @@ class GraphWrapper(object):
        """
        Whether the given variable is parameter.
        Args:
-            var(VarWrapper): The given varibale.
+            var(VarWrapper): The given variable.
        """
        return isinstance(var._var, Parameter)

@@ -241,7 +241,7 @@ class GraphWrapper(object):
        """
        Whether the given variable is persistable.
        Args:
-            var(VarWrapper): The given varibale.
+            var(VarWrapper): The given variable.
        """
        return var._var.persistable

@@ -397,7 +397,7 @@ class GraphWrapper(object):
        """
        Get a new graph for training by appending some backward operators and optimization operators.
        Args:
-            optimizer: The optimzier used to generate training graph.
+            optimizer: The optimizer used to generate training graph.
            place: The place to run the graph.
            scope: The scope used to run the graph. Some new variable will be added into this scope.
            no_grad_var_names(list<str>): Names of variables that should be ignored while computing gradients. default: [].

--- a/python/paddle/fluid/contrib/slim/nas/controller_server.py
+++ b/python/paddle/fluid/contrib/slim/nas/controller_server.py
@@ -27,7 +27,7 @@ _logger = get_logger(

 class ControllerServer(object):
    """
-    The controller wrapper with a socket server to handle the request of search agentt.
+    The controller wrapper with a socket server to handle the request of search agent.
    """

    def __init__(self,

--- a/python/paddle/fluid/contrib/slim/prune/auto_prune_strategy.py
+++ b/python/paddle/fluid/contrib/slim/prune/auto_prune_strategy.py
@@ -53,7 +53,7 @@ class AutoPruneStrategy(PruneStrategy):
            metric_name(str): The metric used to evaluate the model.
                         It should be one of keys in out_nodes of graph wrapper. Default: 'top1_acc'
            pruned_params(str): The pattern str to match the parameter names to be pruned. Default: 'conv.*_weights'
-            retrain_epoch(int): The training epochs in each seaching step. Default: 0
+            retrain_epoch(int): The training epochs in each searching step. Default: 0
            uniform_range(int): The token range in each position of tokens generated by controller. None means getting the range automatically. Default: None.
            init_tokens(list<int>): The initial tokens. None means getting the initial tokens automatically. Default: None.
        """

--- a/python/paddle/fluid/contrib/slim/prune/prune_strategy.py
+++ b/python/paddle/fluid/contrib/slim/prune/prune_strategy.py
@@ -741,7 +741,7 @@ class SensitivePruneStrategy(PruneStrategy):

    def _format_sensitivities(self, sensitivities):
        """
-        Print formated sensitivities in debug log level.
+        Print formatted sensitivities in debug log level.
        """
        tb = pt.PrettyTable()
        tb.field_names = ["parameter", "size"] + [

--- a/python/paddle/fluid/contrib/slim/prune/pruner.py
+++ b/python/paddle/fluid/contrib/slim/prune/pruner.py
@@ -42,7 +42,7 @@ class StructurePruner(Pruner):
            pruning_axis(dict): The key is the name of parameter to be pruned,
                                '*' means all the parameters.
                                The value is the axis to be used. Given a parameter
-                                with shape [3, 4], the result of pruning 50% on aixs 1
+                                with shape [3, 4], the result of pruning 50% on axis 1
                                is a parameter with shape [3, 2].
            criterions(dict): The key is the name of parameter to be pruned,
                              '*' means all the parameters.

--- a/python/paddle/fluid/contrib/slim/quantization/quantization_pass.py
+++ b/python/paddle/fluid/contrib/slim/quantization/quantization_pass.py
@@ -666,10 +666,10 @@ class QuantizationFreezePass(object):
                 quantizable_op_type=['conv2d', 'depthwise_conv2d', 'mul']):
        """
        The freeze pass is used to adjust the quantize operator order, for example:
-            1) `activation -> quant -> dequant -> conv2d` will be freezed into
+            1) `activation -> quant -> dequant -> conv2d` will be frozen into
            `activation -> quant -> conv2d -> dequant`
-            2) `weight -> quant -> dequant -> conv2d` will be freezed into `weight -> conv2d`,
-            and weight will be sacled offline.
+            2) `weight -> quant -> dequant -> conv2d` will be frozen into `weight -> conv2d`,
+            and weight will be scaled offline.

        Args:
            scope(fluid.Scope): scope is used to get the weight tensor values.
@@ -994,8 +994,8 @@ class ConvertToInt8Pass(object):

    def apply(self, graph):
        """
-        Convert weights' tpye of the graph. After that, the data type of the
-        graph weigths is int8_t.
+        Convert weights' type of the graph. After that, the data type of the
+        graph weights is int8_t.

        Args:
            graph(IrGraph): the applied graph.
@@ -1065,7 +1065,7 @@ class ConvertToInt8Pass(object):
 class TransformForMobilePass(object):
    def __init__(self):
        """
-        This pass is used to convert the freezed graph for paddle-mobile execution.
+        This pass is used to convert the frozen graph for paddle-mobile execution.
        """
        self._fake_quant_op_names = _fake_quant_op_list
        self._fake_dequant_op_names = _fake_dequant_op_list

--- a/python/paddle/fluid/contrib/trainer.py
+++ b/python/paddle/fluid/contrib/trainer.py
@@ -673,11 +673,11 @@ def save_checkpoint(executor,
    main_program and then saves these variables to the `checkpoint_dir`
    directory.

-    In the training precess, we generally save a checkpoint in each
+    In the training process, we generally save a checkpoint in each
    iteration. So there might be a lot of checkpoints in the
    `checkpoint_dir`. To avoid them taking too much disk space, the
    `max_num_checkpoints` are introduced to limit the total number of
-    checkpoints. If the number of existing checkpints is greater than
+    checkpoints. If the number of existing checkpoints is greater than
    the `max_num_checkpoints`, oldest ones will be scroll deleted.

    A variable is a checkpoint variable and will be saved if it meets
@@ -689,7 +689,7 @@ def save_checkpoint(executor,
    Args:
        executor(Executor): The executor to run for save checkpoint.
        checkpoint_dir(str): The folder where to save checkpoints.
-        trainer_id(int): currect trainer id, if id is equal to 0, the trainer
+        trainer_id(int): current trainer id, if id is equal to 0, the trainer
            is chief.
        trainer_args(dict|None): Current training arguments. Such as 'epoch_id'
            and 'step_id'.
@@ -772,7 +772,7 @@ def load_checkpoint(executor,
    main_program and then try to load these variables from the
    `checkpoint_dir` directory.

-    In the training precess, we generally save a checkpoint in each
+    In the training process, we generally save a checkpoint in each
    iteration. So there are more than one checkpoint in the
    `checkpoint_dir` (each checkpoint has its own sub folder), use
    `serial` to specify which serial of checkpoint you would like to
@@ -867,7 +867,7 @@ def _load_persist_vars_without_grad(executor,
                                    has_model_dir=False):
    """
    This function filters out all checkpoint variables from the give
-    program and then trys to load these variables from the given directory.
+    program and then tries to load these variables from the given directory.

    A variable is a checkpoint variable if it meets all following
    conditions:
@@ -898,7 +898,7 @@ def _load_persist_vars_without_grad(executor,

            # In this example, `_load_persist_vars_without_grad` function
            # will first filters out all checkpoint variables in the default
-            # main program, and then trys to load these variables form the
+            # main program, and then tries to load these variables form the
            # folder "./my_paddle_model/__model__".
    """

@@ -1135,12 +1135,12 @@ def _is_checkpoint_var(var):

 def _make_chekcpoint_dirs(dirs):
    """
-    _make_chekcpoint_dirs will makdir local directory directly, when the directory is exist, it will igore it.
+    _make_chekcpoint_dirs will makedir local directory directly, when the directory is exist, it will ignore it.
    """
    assert dirs is not None

    if os.path.isfile(dirs):
-        raise OSError(errno.ENOTDIR, "dirs path shoule be a Directory.", dirs)
+        raise OSError(errno.ENOTDIR, "dirs path should be a Directory.", dirs)

    if not os.path.isdir(dirs):
        try:

--- a/python/paddle/fluid/contrib/utils/hdfs_utils.py
+++ b/python/paddle/fluid/contrib/utils/hdfs_utils.py
@@ -312,9 +312,9 @@ class HDFSClient(object):
    @staticmethod
    def make_local_dirs(local_path):
        """
-        create a directiory local, is same to mkdir
+        create a directory local, is same to mkdir
        Args:
-            local_path: local path that wants to create a directiory.
+            local_path: local path that wants to create a directory.
        """
        try:
            os.makedirs(local_path)

--- a/python/paddle/fluid/contrib/utils/lookup_table_utils.py
+++ b/python/paddle/fluid/contrib/utils/lookup_table_utils.py
@@ -137,7 +137,7 @@ def load_persistables_for_increment(dirname, executor, program,
                                    lookup_table_var, lookup_table_var_path):
    """
    WARNING: this function will only be used for distributed training with distributed lookup table.
-    for increment trainning, the pserver will not only load dense variables,
+    for increment training, the pserver will not only load dense variables,
    but also load the suitable lookup table var. Because of sliced lookup table
    var with HASH, we must load the correct sliced var.

@@ -417,7 +417,7 @@ def get_inference_model(main_program, feeded_var_names, target_vars):

    Args:
        main_program(Program|None): The original program, which will be pruned to
-                                    build the inference model. If is setted None,
+                                    build the inference model. If is set None,
                                    the default main program will be used.
                                    Default: None.
        feeded_var_names(list[str]): Names of variables that need to be feeded data

--- a/python/paddle/fluid/data.py
+++ b/python/paddle/fluid/data.py
@@ -54,7 +54,7 @@ def data(name, shape, dtype='float32', lod_level=0):
           for more details.
       shape (list|tuple): List|Tuple of integers declaring the shape. You can
           set "None" at a dimension to indicate the dimension can be of any
-           size. For example, it is useful to set changable batch size as "None" 
+           size. For example, it is useful to set changeable batch size as "None" 
       dtype (np.dtype|VarType|str, optional): The type of the data. Supported
           dtype: bool, float16, float32, float64, int8, int16, int32, int64,
           uint8. Default: float32
@@ -75,7 +75,7 @@ def data(name, shape, dtype='float32', lod_level=0):
          # User can only feed data of the same shape to x
          x = fluid.data(name='x', shape=[3, 2, 1], dtype='float32')

-          # Creates a variable with changable batch size.
+          # Creates a variable with changeable batch size.
          # Users can feed data of any batch size into y,
          # but size of each data sample has to be [2, 1]
          y = fluid.data(name='y', shape=[None, 2, 1], dtype='float32')

--- a/python/paddle/fluid/data_feed_desc.py
+++ b/python/paddle/fluid/data_feed_desc.py
@@ -53,7 +53,7 @@ class DataFeedDesc(object):
      data_feed = fluid.DataFeedDesc('data.proto')

    However, users usually shouldn't care about the message format; instead,
-    they are encouragd to use :code:`Data Generator` as a tool to generate a
+    they are encouraged to use :code:`Data Generator` as a tool to generate a
    valid data description, in the process of converting their raw log files to
    training files acceptable to AsyncExecutor.


--- a/python/paddle/fluid/data_feeder.py
+++ b/python/paddle/fluid/data_feeder.py
@@ -339,10 +339,10 @@ class DataFeeder(object):
        """
        Similar with feed function, feed_parallel is used with multiple devices (CPU|GPU).
        Here :code:`iterable` is a list of python generators. The data return by each 
-        generator in the list will be fed into a seperate device.        
+        generator in the list will be fed into a separate device.        

        Parameters:
-            iterable (list|tuple): list of user-defined python geneators. The element 
+            iterable (list|tuple): list of user-defined python generators. The element 
                number should match the :code:`num_places`.
            num_places (int, optional): the number of devices. If not provided (None), 
                all available devices on the machine will be used. Default None.
@@ -379,7 +379,7 @@ class DataFeeder(object):
                exe.run(fluid.default_startup_program())
                program = fluid.CompiledProgram(fluid.default_main_program()).with_data_parallel(places=places)

-                # print sample feed_parallel r resultt
+                # print sample feed_parallel r result
                # for item in list(feeder.feed_parallel([generate_reader(5, 0, 1), generate_reader(3, 10, 2)], 2)):
                #     print(item['x'])
                #     print(item['y'])
@@ -433,7 +433,7 @@ class DataFeeder(object):

        Parameters:
            reader(generator): a user defined python generator used to get :code:`mini-batch` of data.
-                A :code:`mini-batch` can be regarded as a python generator that returns batchs of input 
+                A :code:`mini-batch` can be regarded as a python generator that returns batches of input 
                entities, just like the below :code:`_mini_batch` in the code example.                      
            multi_devices(bool): indicate whether to use multiple devices or not.
            num_places(int, optional): if :code:`multi_devices` is True, you can specify the number

--- a/python/paddle/fluid/dataset.py
+++ b/python/paddle/fluid/dataset.py
@@ -100,7 +100,7 @@ class DatasetBase(object):
        Args:
            record_candidate_size(int): size of instances candidate to shuffle 
                                        one slot
-            fea_eval(bool): wheather enable fea eval mode to enable slots shuffle.
+            fea_eval(bool): whether enable fea eval mode to enable slots shuffle.
                            default is True.
            
        Examples:
@@ -822,7 +822,7 @@ class BoxPSDataset(InMemoryDataset):

    def wait_preload_done(self):
        """
-        Wait async proload done
+        Wait async preload done
        Wait Until Feed Pass Done
        Examples:
            .. code-block:: python

--- a/python/paddle/fluid/debugger.py
+++ b/python/paddle/fluid/debugger.py
@@ -338,7 +338,7 @@ def run_fast_nan_inf_debug(executor,
                           use_program_cache=False,
                           dump_core=True):
    """
-    Run a program by the given executor. Catch the exception of NAN and INF, and save persistbales into the dumped core.
+    Run a program by the given executor. Catch the exception of NAN and INF, and save persistables into the dumped core.
    """

    assert (executor is not None)

--- a/python/paddle/fluid/distributed/downpour.py
+++ b/python/paddle/fluid/distributed/downpour.py
@@ -59,7 +59,7 @@ class DownpourSGD(object):
        """
        DownpounSGD is a distributed optimizer so
        that user can call minimize to generate backward
-        operators and optimization operators within minmize function
+        operators and optimization operators within minimize function
        Args:
            loss(Variable): loss variable defined by user
            startup_program(Program): startup program that defined by user

--- a/python/paddle/fluid/distributed/ps_instance.py
+++ b/python/paddle/fluid/distributed/ps_instance.py
@@ -110,7 +110,7 @@ class PaddlePSInstance(object):

    def gather_ips(self):
        """
-        Return all servers and workers ip throught mpi allgather 
+        Return all servers and workers ip through mpi allgather 
        """
        self._ips = self.dh.comm.allgather(self._ip)
        return self._ips

--- a/python/paddle/fluid/dygraph/learning_rate_scheduler.py
+++ b/python/paddle/fluid/dygraph/learning_rate_scheduler.py
@@ -88,9 +88,9 @@ class PiecewiseDecay(LearningRateDecay):
        boundaries(list): A list of steps numbers. The type of element in the list is python int. 
        values(list): A list of learning rate values that will be picked during
            different step boundaries. The type of element in the list is python float.
-        begin(int): The begin step to initilize the global_step in the description above.
+        begin(int): The begin step to initialize the global_step in the description above.
        step(int, optional): The step size used to calculate the new global_step in the description above.
-            The defalult value is 1.
+            The default value is 1.
        dtype(str, optional): The data type used to create the learning rate variable. The data type can be set as
            'float32', 'float64'. The default value is 'float32'.

@@ -158,7 +158,7 @@ class NaturalExpDecay(LearningRateDecay):
            default value is False.
        begin(int, optional): The begin step. The initial value of global_step described above. The default value is 0.
        step(int, optional): The step size used to calculate the new global_step in the description above.
-            The defalult value is 1.
+            The default value is 1.
        dtype(str, optional): The data type used to create the learning rate variable. The data type can be set as
            'float32', 'float64'. The default value is 'float32'.

@@ -238,7 +238,7 @@ class ExponentialDecay(LearningRateDecay):
            default value is False.
        begin(int, optional): The begin step. The initial value of global_step described above. The default value is 0.
        step(int, optional): The step size used to calculate the new global_step in the description above.
-            The defalult value is 1.
+            The default value is 1.
        dtype(str, optional): The data type used to create the learning rate variable. The data type can be set as
            'float32', 'float64'. The default value is 'float32'.

@@ -312,7 +312,7 @@ class InverseTimeDecay(LearningRateDecay):
            default value is False.
        begin(int, optional): The begin step. The initial value of global_step described above. The default value is 0.
        step(int, optional): The step size used to calculate the new global_step in the description above.
-            The defalult value is 1.
+            The default value is 1.
        dtype(str, optional): The data type used to create the learning rate variable. The data type can be 
            'float32', 'float64'. The default value is 'float32'.

@@ -393,7 +393,7 @@ class PolynomialDecay(LearningRateDecay):
        cycle(bool, optional): If set true, decay the learning rate every decay_steps. The default value is False.
        begin(int, optional): The begin step. The initial value of global_step described above. The default value is 0.
        step(int, optional): The step size used to calculate the new global_step in the description above.
-            The defalult value is 1.
+            The default value is 1.
        dtype(str, optional): The data type used to create the learning rate variable. The data type can be set as
            'float32', 'float64'. The default value is 'float32'.

@@ -471,7 +471,7 @@ class CosineDecay(LearningRateDecay):
        epochs(int): The number of epochs.
        begin(int, optional): The begin step. The initial value of global_step described above. The default value is 0.
        step(int, optional): The step size used to calculate the new global_step in the description above.
-            The defalult value is 1.
+            The default value is 1.
        dtype(str, optional): The data type used to create the learning rate variable. The data type can be set as
            'float32', 'float64'. The default value is 'float32'.

@@ -528,7 +528,7 @@ class NoamDecay(LearningRateDecay):
            it's a tensor with shape [1] and the data type can be int32 or int64. The type can also be python int.
        begin(int, optional): The begin step. The initial value of global_step described above. The default value is 0.
        step(int, optional): The step size used to calculate the new global_step in the description above.
-            The defalult value is 1.
+            The default value is 1.
        dtype(str, optional): The data type used to create the learning rate variable. The data type can be set as
            'float32', 'float64'. The default value is 'float32'.

@@ -592,7 +592,7 @@ class LinearLrWarmup(LearningRateDecay):
        end_lr (float): Final learning rate of warm up.
        begin(int, optional): The begin step. The initial value of global_step described above. The default value is 0.
        step(int, optional): The step size used to calculate the new global_step in the description above.
-            The defalult value is 1.
+            The default value is 1.
        dtype(str, optional): The data type used to create the learning rate variable. The data type can be set as
            'float32', 'float64'. The default value is 'float32'.
    

--- a/python/paddle/fluid/dygraph/nn.py
+++ b/python/paddle/fluid/dygraph/nn.py
@@ -50,7 +50,7 @@ class Conv2D(layers.Layer):
    C will equal the number of input feature map divided by the groups.
    Please refer to UFLDL's `convolution
    <http://ufldl.stanford.edu/tutorial/supervised/FeatureExtractionUsingConvolution/>`_
-    for more detials.
+    for more details.
    If bias attribution and activation type are provided, bias is added to the
    output of the convolution, and the corresponding activation function is
    applied to the final result.
@@ -1003,7 +1003,7 @@ class BatchNorm(layers.Layer):

    Parameters:
        num_channels(int): Indicate the number of channels of the input ``Tensor``.
-        act(str, optional): Activation to be applied to the output of batch normalizaiton. Default: None.
+        act(str, optional): Activation to be applied to the output of batch normalization. Default: None.
        is_test (bool, optional): A flag indicating whether it is in test phrase or not. Default: False.
        momentum(float, optional): The value used for the moving_mean and moving_var computation. Default: 0.9.
        epsilon(float, optional): The small value added to the variance to prevent division by zero. Default: 1e-5.
@@ -1242,7 +1242,7 @@ class Embedding(layers.Layer):
            default weight parameter property is used. See usage for details in :ref:`api_fluid_ParamAttr` . In addition,
            user-defined or pre-trained word vectors can be loaded with the :attr:`param_attr` parameter. 
            The local word vector needs to be transformed into numpy format, and the shape of local word
-            vector shoud be consistent with :attr:`size` . Then :ref:`api_fluid_initializer_NumpyArrayInitializer`
+            vector should be consistent with :attr:`size` . Then :ref:`api_fluid_initializer_NumpyArrayInitializer`
            is used to load custom or pre-trained word vectors. See code example 2 for details.
        dtype(np.dtype|core.VarDesc.VarType|str): It refers to the data type of output Tensor.
            It must be "float32" or "float64". Default: "float32".
@@ -1382,7 +1382,7 @@ class LayerNorm(layers.Layer):
            omitted. If :attr:`shift` is True and :attr:`param_attr` is None,
            a default :code:`ParamAttr` would be added as bias. The
            :attr:`bias_attr` is initialized as 0 if it is added. Default: None.
-        act(str, optional): Activation to be applied to the output of layer normalizaiton.
+        act(str, optional): Activation to be applied to the output of layer normalization.
                  Default: None.
        dtype (str, optional): Data type, it can be "float32" or "float64". Default: "float32".

@@ -1435,7 +1435,7 @@ class LayerNorm(layers.Layer):
                default_initializer=Constant(1.0))
        else:
            if self._param_attr:
-                logging.warn("param_attr are only avaliable with scale is True")
+                logging.warn("param_attr are only available with scale is True")

        if self._shift:
            assert self._bias_attr is not False
@@ -1446,7 +1446,7 @@ class LayerNorm(layers.Layer):
                is_bias=True)
        else:
            if self._bias_attr:
-                logging.warn("bias_attr are only avaliable with shift is True")
+                logging.warn("bias_attr are only available with shift is True")

    def forward(self, input):
        input_shape = list(input.shape)
@@ -1702,7 +1702,7 @@ class NCE(layers.Layer):
             will create ParamAttr as bias_attr. If the Initializer of the bias_attr
             is not set, the bias is initialized zero. Default: None.
        num_neg_samples (int, optional): The number of negative classes. The default value is 10.
-        sampler (str, optional): The sampler used to sample class from negtive classes.
+        sampler (str, optional): The sampler used to sample class from negative classes.
                       It can be 'uniform', 'log_uniform' or 'custom_dist'.
                       default: 'uniform'.
        custom_dist (float[], optional): A float[] with size=num_total_classes.
@@ -2544,7 +2544,7 @@ class GroupNorm(layers.Layer):
        bias_attr(ParamAttr, optional): The parameter attribute for the learnable
                                        bias :math:`b`. If it is set to False, no bias will be added to the output units.
                                        If it is set to None, the bias is initialized zero. Default: None.
-        act(str, optional): Activation to be applied to the output of group normalizaiton. Default: None.
+        act(str, optional): Activation to be applied to the output of group normalization. Default: None.
        data_layout(str, optional): Specify the input data format. Only NCHW is supported. Default: NCHW.

    Returns:
@@ -2640,7 +2640,7 @@ class SpectralNorm(layers.Layer):
    and W is the product result of remaining dimensions.

    Step 2:
-    :attr:`power_iters` shoule be a positive interger, do following
+    :attr:`power_iters` should be a positive integer, do following
    calculations with U and V for :attr:`power_iters` rounds.

    .. math::

--- a/python/paddle/fluid/dygraph/varbase_patch_methods.py
+++ b/python/paddle/fluid/dygraph/varbase_patch_methods.py
@@ -27,7 +27,7 @@ def monkey_patch_varbase():
    def set_value(self, value):
        """
        **Notes**:
-            **This API is ONLY avaliable in Dygraph mode**
+            **This API is ONLY available in Dygraph mode**

        Set a new value for this Variable.

@@ -76,7 +76,7 @@ def monkey_patch_varbase():
    def backward(self, backward_strategy=None):
        """
        **Notes**:
-            **This API is ONLY avaliable in Dygraph mode**
+            **This API is ONLY available in Dygraph mode**

        Run backward of current Graph which starts from current Variable

@@ -116,13 +116,13 @@ def monkey_patch_varbase():
            self._run_backward(backward_strategy, framework._dygraph_tracer())
        else:
            raise ValueError(
-                "Variable.backward() is only avaliable in DyGraph mode")
+                "Variable.backward() is only available in DyGraph mode")

    @framework.dygraph_only
    def gradient(self):
        """
        **Notes**:
-            **This API is ONLY avaliable in Dygraph mode**
+            **This API is ONLY available in Dygraph mode**

        Get the Gradient of Current Variable


--- a/python/paddle/fluid/dygraph_grad_clip.py
+++ b/python/paddle/fluid/dygraph_grad_clip.py
--- a/python/paddle/fluid/executor.py
+++ b/python/paddle/fluid/executor.py
--- a/python/paddle/fluid/framework.py
+++ b/python/paddle/fluid/framework.py
--- a/python/paddle/fluid/incubate/data_generator/__init__.py
+++ b/python/paddle/fluid/incubate/data_generator/__init__.py
--- a/python/paddle/fluid/incubate/fleet/base/role_maker.py
+++ b/python/paddle/fluid/incubate/fleet/base/role_maker.py
--- a/python/paddle/fluid/incubate/fleet/collective/__init__.py
+++ b/python/paddle/fluid/incubate/fleet/collective/__init__.py
--- a/python/paddle/fluid/incubate/fleet/parameter_server/pslib/__init__.py
+++ b/python/paddle/fluid/incubate/fleet/parameter_server/pslib/__init__.py
--- a/python/paddle/fluid/incubate/fleet/parameter_server/pslib/optimizer_factory.py
+++ b/python/paddle/fluid/incubate/fleet/parameter_server/pslib/optimizer_factory.py
--- a/python/paddle/fluid/incubate/fleet/utils/fleet_util.py
+++ b/python/paddle/fluid/incubate/fleet/utils/fleet_util.py
--- a/python/paddle/fluid/incubate/fleet/utils/hdfs.py
+++ b/python/paddle/fluid/incubate/fleet/utils/hdfs.py
--- a/python/paddle/fluid/initializer.py
+++ b/python/paddle/fluid/initializer.py
--- a/python/paddle/fluid/input.py
+++ b/python/paddle/fluid/input.py
--- a/python/paddle/fluid/install_check.py
+++ b/python/paddle/fluid/install_check.py
--- a/python/paddle/fluid/io.py
+++ b/python/paddle/fluid/io.py
--- a/python/paddle/fluid/layers/control_flow.py
+++ b/python/paddle/fluid/layers/control_flow.py
--- a/python/paddle/fluid/layers/detection.py
+++ b/python/paddle/fluid/layers/detection.py
--- a/python/paddle/fluid/layers/distributions.py
+++ b/python/paddle/fluid/layers/distributions.py
--- a/python/paddle/fluid/layers/io.py
+++ b/python/paddle/fluid/layers/io.py
--- a/python/paddle/fluid/layers/learning_rate_scheduler.py
+++ b/python/paddle/fluid/layers/learning_rate_scheduler.py
--- a/python/paddle/fluid/layers/loss.py
+++ b/python/paddle/fluid/layers/loss.py
--- a/python/paddle/fluid/layers/nn.py
+++ b/python/paddle/fluid/layers/nn.py
--- a/python/paddle/fluid/layers/ops.py
+++ b/python/paddle/fluid/layers/ops.py
--- a/python/paddle/fluid/layers/rnn.py
+++ b/python/paddle/fluid/layers/rnn.py
--- a/python/paddle/fluid/layers/sequence_lod.py
+++ b/python/paddle/fluid/layers/sequence_lod.py
--- a/python/paddle/fluid/layers/tensor.py
+++ b/python/paddle/fluid/layers/tensor.py
--- a/python/paddle/fluid/log_helper.py
+++ b/python/paddle/fluid/log_helper.py
--- a/python/paddle/fluid/metrics.py
+++ b/python/paddle/fluid/metrics.py
--- a/python/paddle/fluid/nets.py
+++ b/python/paddle/fluid/nets.py
--- a/python/paddle/fluid/optimizer.py
+++ b/python/paddle/fluid/optimizer.py
--- a/python/paddle/fluid/param_attr.py
+++ b/python/paddle/fluid/param_attr.py
--- a/python/paddle/fluid/profiler.py
+++ b/python/paddle/fluid/profiler.py
--- a/python/paddle/fluid/tests/demo/pipeline_train.py
+++ b/python/paddle/fluid/tests/demo/pipeline_train.py
--- a/python/paddle/fluid/tests/unittests/dist_transformer.py
+++ b/python/paddle/fluid/tests/unittests/dist_transformer.py
--- a/python/paddle/fluid/tests/unittests/test_activation_nn_grad.py
+++ b/python/paddle/fluid/tests/unittests/test_activation_nn_grad.py
--- a/python/paddle/fluid/tests/unittests/test_elementwise_nn_grad.py
+++ b/python/paddle/fluid/tests/unittests/test_elementwise_nn_grad.py
--- a/python/paddle/fluid/tests/unittests/test_generate_proposals_op.py
+++ b/python/paddle/fluid/tests/unittests/test_generate_proposals_op.py
--- a/python/paddle/fluid/tests/unittests/test_imperative_transformer_sorted_gradient.py
+++ b/python/paddle/fluid/tests/unittests/test_imperative_transformer_sorted_gradient.py
--- a/python/paddle/fluid/tests/unittests/test_linear_chain_crf_op.py
+++ b/python/paddle/fluid/tests/unittests/test_linear_chain_crf_op.py
--- a/python/paddle/fluid/tests/unittests/test_nce.py
+++ b/python/paddle/fluid/tests/unittests/test_nce.py
--- a/python/paddle/fluid/tests/unittests/test_nn_grad.py
+++ b/python/paddle/fluid/tests/unittests/test_nn_grad.py
--- a/python/paddle/fluid/tests/unittests/test_reshape_op.py
+++ b/python/paddle/fluid/tests/unittests/test_reshape_op.py
--- a/python/paddle/fluid/tests/unittests/test_static_save_load.py
+++ b/python/paddle/fluid/tests/unittests/test_static_save_load.py
--- a/python/paddle/fluid/tests/unittests/transformer_model.py
+++ b/python/paddle/fluid/tests/unittests/transformer_model.py
--- a/python/paddle/fluid/transpiler/details/program_utils.py
+++ b/python/paddle/fluid/transpiler/details/program_utils.py
--- a/python/paddle/fluid/transpiler/distribute_transpiler.py
+++ b/python/paddle/fluid/transpiler/distribute_transpiler.py
--- a/python/paddle/fluid/transpiler/geo_sgd_transpiler.py
+++ b/python/paddle/fluid/transpiler/geo_sgd_transpiler.py
--- a/python/paddle/fluid/transpiler/ps_dispatcher.py
+++ b/python/paddle/fluid/transpiler/ps_dispatcher.py
--- a/python/paddle/reader/decorator.py
+++ b/python/paddle/reader/decorator.py
--- a/python/paddle/utils/image_util.py
+++ b/python/paddle/utils/image_util.py
--- a/python/paddle/utils/plotcurve.py
+++ b/python/paddle/utils/plotcurve.py
--- a/python/paddle/utils/preprocess_img.py
+++ b/python/paddle/utils/preprocess_img.py
--- a/python/paddle/utils/preprocess_util.py
+++ b/python/paddle/utils/preprocess_util.py