run pre-commit check files and fix code style problem

test=develop

run pre-commit check files and fix code style problem
test=develop
45eb6f07 · dongdaxiang · e57ac5ed · 45eb6f07 · 45eb6f07 · 45eb6f07
12 changed file
--- a/paddle/fluid/framework/data_feed.cc
+++ b/paddle/fluid/framework/data_feed.cc
@@ -246,8 +246,8 @@ void InMemoryDataFeed<T>::FillMemoryDataToChannel() {
  VLOG(3) << "FillMemoryDataToChannel, thread_id=" << thread_id_;
  auto interval = GetMemoryDataInterval();
  VLOG(3) << "memory data size=" << memory_data_->size()
-          << ", fill data from  [" << interval.first << ", "
+          << ", fill data from  [" << interval.first << ", " << interval.second
-          << interval.second << "), thread_id=" << thread_id_;
+          << "), thread_id=" << thread_id_;
  for (int64_t i = interval.first; i < interval.second; ++i) {
    T& t = (*memory_data_)[i];
    shuffled_ins_->Push(std::move(t));
@@ -275,7 +275,7 @@ void InMemoryDataFeed<T>::FillChannelToMemoryData() {
    channel->Pop(&local_vec[i]);
  }
  VLOG(3) << "local_vec size=" << local_vec.size()
-          <<", thread_id=" << thread_id_;
+          << ", thread_id=" << thread_id_;
  {
    std::lock_guard<std::mutex> g(*mutex_for_update_memory_data_);
    VLOG(3) << "before insert, memory_data_ size=" << memory_data_->size()
@@ -308,8 +308,8 @@ void InMemoryDataFeed<T>::LoadIntoMemory() {
      local_vec.push_back(instance);
    }
    timeline.Pause();
-    VLOG(3) << "LoadIntoMemory() read all lines, file="
+    VLOG(3) << "LoadIntoMemory() read all lines, file=" << filename
-            << filename << ", cost time=" << timeline.ElapsedSec()
+            << ", cost time=" << timeline.ElapsedSec()
            << " seconds, thread_id=" << thread_id_;
    {
      std::lock_guard<std::mutex> lock(*mutex_for_update_memory_data_);
@@ -319,8 +319,7 @@ void InMemoryDataFeed<T>::LoadIntoMemory() {
                           std::make_move_iterator(local_vec.end()));
      timeline.Pause();
      VLOG(3) << "LoadIntoMemory() memory_data insert, cost time="
-              << timeline.ElapsedSec() << " seconds, thread_id="
+              << timeline.ElapsedSec() << " seconds, thread_id=" << thread_id_;
-              << thread_id_;
    }
    local_vec.clear();
  }
@@ -358,8 +357,8 @@ void InMemoryDataFeed<T>::GlobalShuffle() {
        std::string send_str;
        SerializeIns(send_vec[j], &send_str);
        VLOG(3) << "send str_length=" << send_str.length()
-                << ", ins num=" << send_vec[j].size() << " to node_id="
+                << ", ins num=" << send_vec[j].size() << " to node_id=" << j
-                << j << ", thread_id=" << thread_id_;
+                << ", thread_id=" << thread_id_;
        auto ret = fleet_ptr->SendClientToClientMsg(0, j, send_str);
        VLOG(3) << "end send, thread_id=" << thread_id_;
        send_vec[j].clear();
@@ -371,8 +370,8 @@ void InMemoryDataFeed<T>::GlobalShuffle() {
    if (send_vec[j].size() != 0) {
      std::string send_str;
      SerializeIns(send_vec[j], &send_str);
-      VLOG(3) << "send str_length=" << send_str.length()
+      VLOG(3) << "send str_length=" << send_str.length() << " to node_id=" << j
-              << " to node_id=" << j << ", thread_id=" << thread_id_;
+              << ", thread_id=" << thread_id_;
      auto ret = fleet_ptr->SendClientToClientMsg(0, j, send_str);
      VLOG(3) << "end send, thread_id=" << thread_id_;
      total_status.push_back(std::move(ret));
@@ -888,15 +887,13 @@ void MultiSlotInMemoryDataFeed::PutToFeedVec(
 // todo serialize ins in global shuffle
 void MultiSlotInMemoryDataFeed::SerializeIns(
-    const std::vector<std::vector<MultiSlotType>*>& ins,
+    const std::vector<std::vector<MultiSlotType>*>& ins, std::string* str) {
-    std::string* str) {
  auto fleet_ptr = FleetWrapper::GetInstance();
  fleet_ptr->Serialize(ins, str);
 }
 // todo deserialize ins in global shuffle
 void MultiSlotInMemoryDataFeed::DeserializeIns(
-    std::vector<std::vector<MultiSlotType>>* ins,
+    std::vector<std::vector<MultiSlotType>>* ins, const std::string& str) {
-    const std::string& str) {
  auto fleet_ptr = FleetWrapper::GetInstance();
  fleet_ptr->Deserialize(ins, str);
 }

--- a/paddle/fluid/framework/data_feed.h
+++ b/paddle/fluid/framework/data_feed.h
@@ -15,23 +15,23 @@ limitations under the License. */
 #pragma once
 #include <fstream>
+#include <future>  // NOLINT
 #include <memory>
 #include <mutex>  // NOLINT
+#include <sstream>
 #include <string>
 #include <thread>  // NOLINT
-#include <vector>
-#include <sstream>
-#include <future> // NOLINT
 #include <utility>
+#include <vector>
+#include "paddle/fluid/framework/blocking_queue.h"
 #include "paddle/fluid/framework/data_feed.pb.h"
+#include "paddle/fluid/framework/fleet/fleet_wrapper.h"
 #include "paddle/fluid/framework/lod_tensor.h"
 #include "paddle/fluid/framework/reader.h"
 #include "paddle/fluid/framework/variable.h"
 #include "paddle/fluid/operators/reader/blocking_queue.h"
 #include "paddle/fluid/string/string_helper.h"
-#include "paddle/fluid/framework/blocking_queue.h"
-#include "paddle/fluid/framework/fleet/fleet_wrapper.h"
 namespace paddle {
 namespace framework {
@@ -85,21 +85,19 @@ class DataFeed {
  virtual void AddFeedVar(Variable* var, const std::string& name);
  // This function will do nothing at default
-  virtual void SetMemoryData(void* memory_data) { }
+  virtual void SetMemoryData(void* memory_data) {}
  // This function will do nothing at default
-  virtual void SetMemoryDataMutex(std::mutex* mutex) { }
+  virtual void SetMemoryDataMutex(std::mutex* mutex) {}
  // This function will do nothing at default
-  virtual void SetThreadId(int thread_id) { }
+  virtual void SetThreadId(int thread_id) {}
  // This function will do nothing at default
-  virtual void SetThreadNum(int thread_num) { }
+  virtual void SetThreadNum(int thread_num) {}
  // This function will do nothing at default
-  virtual void SetTrainerNum(int trainer_num) { }
+  virtual void SetTrainerNum(int trainer_num) {}
  virtual void SetFileListMutex(std::mutex* mutex) {
    mutex_for_pick_file_ = mutex;
  }
-  virtual void SetFileListIndex(size_t* file_index) {
+  virtual void SetFileListIndex(size_t* file_index) { file_idx_ = file_index; }
-    file_idx_ = file_index;
-  }
  virtual void LoadIntoMemory() {
    PADDLE_THROW("This function(LoadIntoMemory) is not implemented.");
  }
@@ -110,11 +108,11 @@ class DataFeed {
    PADDLE_THROW("This function(GlobalShuffle) is not implemented.");
  }
  // This function will do nothing at default
-  virtual void FillMemoryDataToChannel() { }
+  virtual void FillMemoryDataToChannel() {}
  // This function will do nothing at default
-  virtual void FillChannelToMemoryData() { }
+  virtual void FillChannelToMemoryData() {}
  // This function will do nothing at default
-  virtual void PutInsToChannel(const std::string& ins_str) { }
+  virtual void PutInsToChannel(const std::string& ins_str) {}
 protected:
  // The following three functions are used to check if it is executed in this
@@ -222,8 +220,7 @@ class InMemoryDataFeed : public PrivateQueueDataFeed<T> {
  virtual void GlobalShuffle();
 protected:
-  virtual void AddInstanceToInsVec(T* vec_ins,
+  virtual void AddInstanceToInsVec(T* vec_ins, const T& instance,
-                                   const T& instance,
                                   int index) = 0;
  virtual bool ParseOneInstance(T* instance) = 0;
  virtual bool ParseOneInstanceFromPipe(T* instance) = 0;
@@ -363,6 +360,7 @@ class MultiSlotInMemoryDataFeed
  MultiSlotInMemoryDataFeed() {}
  virtual ~MultiSlotInMemoryDataFeed() {}
  virtual void Init(const paddle::framework::DataFeedDesc& data_feed_desc);
 protected:
  virtual void AddInstanceToInsVec(std::vector<MultiSlotType>* vec_ins,
                                   const std::vector<MultiSlotType>& instance,

--- a/paddle/fluid/framework/data_set.cc
+++ b/paddle/fluid/framework/data_set.cc
@@ -18,8 +18,8 @@
 #include "google/protobuf/message.h"
 #include "google/protobuf/text_format.h"
 #include "paddle/fluid/framework/data_feed_factory.h"
-#include "paddle/fluid/platform/timer.h"
 #include "paddle/fluid/framework/io/fs.h"
+#include "paddle/fluid/platform/timer.h"
 namespace paddle {
 namespace framework {
@@ -248,8 +248,7 @@ template <typename T>
 int DatasetImpl<T>::ReceiveFromClient(int msg_type, int client_id,
                                      const std::string& msg) {
  VLOG(3) << "ReceiveFromClient msg_type=" << msg_type
-          << ", client_id=" << client_id << ", msg length="
+          << ", client_id=" << client_id << ", msg length=" << msg.length();
-          << msg.length();
  auto fleet_ptr = FleetWrapper::GetInstance();
  int64_t index = fleet_ptr->LocalRandomEngine()() % thread_num_;
  VLOG(3) << "ramdom index=" << index;

--- a/paddle/fluid/framework/data_set.h
+++ b/paddle/fluid/framework/data_set.h
@@ -19,8 +19,8 @@
 #include <mutex>  // NOLINT
 #include <string>
 #include <thread>  // NOLINT
-#include <vector>
 #include <utility>
+#include <vector>
 #include "paddle/fluid/framework/data_feed.h"

--- a/paddle/fluid/framework/dataset_factory.cc
+++ b/paddle/fluid/framework/dataset_factory.cc
@@ -41,8 +41,7 @@ datasetMap g_dataset_map;
 std::string DatasetFactory::DatasetTypeList() {
  std::string dataset_types;
-  for (auto iter = g_dataset_map.begin(); iter != g_dataset_map.end();
+  for (auto iter = g_dataset_map.begin(); iter != g_dataset_map.end(); ++iter) {
-       ++iter) {
    if (iter != g_dataset_map.begin()) {
      dataset_types += ", ";
    }

--- a/paddle/fluid/framework/executor.h
+++ b/paddle/fluid/framework/executor.h
@@ -113,8 +113,7 @@ class Executor {
  void EnableMKLDNN(const ProgramDesc& program);
  void RunFromDataset(const ProgramDesc& main_program, Scope* scope,
-                      Dataset* dataset,
+                      Dataset* dataset, const std::string& trainer_desc_str);
-                      const std::string& trainer_desc_str);
 private:
  const platform::Place place_;

--- a/paddle/fluid/framework/io/fs.h
+++ b/paddle/fluid/framework/io/fs.h
@@ -15,9 +15,9 @@
 #pragma once
 #include <stdio.h>
+#include <memory>
 #include <string>
 #include <vector>
-#include <memory>
 #include "glog/logging.h"
 #include "paddle/fluid/framework/io/shell.h"
 #include "paddle/fluid/string/string_helper.h"

--- a/paddle/fluid/framework/pull_dense_worker.cc
+++ b/paddle/fluid/framework/pull_dense_worker.cc
--- a/paddle/fluid/pybind/async_executor_py.cc
+++ b/paddle/fluid/pybind/async_executor_py.cc
@@ -21,9 +21,9 @@ limitations under the License. */
 #ifdef _XOPEN_SOURCE
 #undef _XOPEN_SOURCE
 #endif
+#include <memory>
 #include <string>
 #include <vector>
-#include <memory>
 #include "google/protobuf/io/zero_copy_stream_impl.h"
 #include "google/protobuf/text_format.h"

--- a/paddle/fluid/pybind/data_set_py.cc
+++ b/paddle/fluid/pybind/data_set_py.cc
@@ -19,21 +19,21 @@ limitations under the License. */
 #ifdef _XOPEN_SOURCE
 #undef _XOPEN_SOURCE
 #endif
+#include <memory>
 #include <string>
 #include <vector>
-#include <memory>
 #include "google/protobuf/io/zero_copy_stream_impl.h"
 #include "google/protobuf/text_format.h"
 #include "paddle/fluid/framework/async_executor.h"
 #include "paddle/fluid/framework/data_feed.h"
 #include "paddle/fluid/framework/data_feed.pb.h"
 #include "paddle/fluid/framework/data_set.h"
+#include "paddle/fluid/framework/dataset_factory.h"
 #include "paddle/fluid/framework/scope.h"
 #include "paddle/fluid/inference/io.h"
 #include "paddle/fluid/platform/place.h"
 #include "paddle/fluid/platform/variant.h"
 #include "paddle/fluid/pybind/data_set_py.h"
-#include "paddle/fluid/framework/dataset_factory.h"
 namespace py = pybind11;
 namespace pd = paddle::framework;
@@ -42,8 +42,8 @@ namespace paddle {
 namespace pybind {
 void BindDataset(py::module* m) {
-  py::class_<framework::Dataset,
+  py::class_<framework::Dataset, std::shared_ptr<framework::Dataset>>(*m,
-    std::shared_ptr<framework::Dataset>>(*m, "Dataset")
+                                                                      "Dataset")
      .def(py::init([](const std::string& name = "MultiSlotDataset") {
        return framework::DatasetFactory::CreateDataset(name);
      }))

--- a/paddle/fluid/string/string_helper.h
+++ b/paddle/fluid/string/string_helper.h
@@ -18,8 +18,8 @@
 #include <stdio.h>
 #include <cstring>
 #include <string>
-#include <vector>
 #include <utility>
+#include <vector>
 #include "boost/lexical_cast.hpp"
 #include "glog/logging.h"

--- a/python/paddle/fluid/tests/unittests/test_dataset.py
+++ b/python/paddle/fluid/tests/unittests/test_dataset.py
@@ -80,18 +80,20 @@ class TestDataset(unittest.TestCase):
            data += "1 7 2 3 6 4 8 8 8 8 1 7\n"
            f.write(data)
-        slots = ["slot1","slot2","slot3","slot4"]
+        slots = ["slot1", "slot2", "slot3", "slot4"]
        slots_vars = []
        for slot in slots:
-            var = fluid.layers.data(name=slot, shape=[1],
+            var = fluid.layers.data(
-                                    dtype="int64", lod_level=1)
+                name=slot, shape=[1], dtype="int64", lod_level=1)
            slots_vars.append(var)
        dataset = fluid.DatasetFactory().create_dataset("InMemoryDataset")
        dataset.set_batch_size(32)
        dataset.set_thread(3)
-        dataset.set_filelist(["test_in_memory_dataset_run_a.txt",
+        dataset.set_filelist([
-                              "test_in_memory_dataset_run_b.txt"])
+            "test_in_memory_dataset_run_a.txt",
+            "test_in_memory_dataset_run_b.txt"
+        ])
        dataset.set_pipe_command("cat")
        dataset.set_use_var(slots_vars)
        dataset.load_into_memory()
@@ -124,18 +126,18 @@ class TestDataset(unittest.TestCase):
            data += "1 7 2 3 6 4 8 8 8 8 1 7\n"
            f.write(data)
-        slots = ["slot1","slot2","slot3","slot4"]
+        slots = ["slot1", "slot2", "slot3", "slot4"]
        slots_vars = []
        for slot in slots:
-            var = fluid.layers.data(name=slot, shape=[1],
+            var = fluid.layers.data(
-                                    dtype="int64", lod_level=1)
+                name=slot, shape=[1], dtype="int64", lod_level=1)
            slots_vars.append(var)
        dataset = fluid.DatasetFactory().create_dataset("QueueDataset")
        dataset.set_batch_size(32)
        dataset.set_thread(3)
-        dataset.set_filelist(["test_queue_dataset_run_a.txt",
+        dataset.set_filelist(
-                              "test_queue_dataset_run_b.txt"])
+            ["test_queue_dataset_run_a.txt", "test_queue_dataset_run_b.txt"])
        dataset.set_pipe_command("cat")
        dataset.set_use_var(slots_vars)