LOG CLEAN (#31819)

* upgrade vlog * train from dataset fetch optimize

LOG CLEAN (#31819)
* upgrade vlog * train from dataset fetch optimize
0589ed21 · tangwei12 · GitHub · b807e408 · 0589ed21 · 0589ed21
17 changed file
--- a/cmake/external/brpc.cmake
+++ b/cmake/external/brpc.cmake
@@ -41,7 +41,7 @@ ExternalProject_Add(
        ${EXTERNAL_PROJECT_LOG_ARGS}
        # TODO(gongwb): change to de newst repo when they changed.
        GIT_REPOSITORY  "https://github.com/wangjiawei04/brpc"
-        GIT_TAG         "6d79e0b17f25107c35b705ea58d888083f59ff47"
+        GIT_TAG         "e203afb794caf027da0f1e0776443e7d20c0c28e"
        PREFIX          ${BRPC_SOURCES_DIR}
        UPDATE_COMMAND  ""
        CMAKE_ARGS      -DCMAKE_CXX_COMPILER=${CMAKE_CXX_COMPILER}

--- a/paddle/fluid/distributed/service/brpc_ps_server.cc
+++ b/paddle/fluid/distributed/service/brpc_ps_server.cc
@@ -60,7 +60,8 @@ uint64_t BrpcPsServer::start(const std::string &ip, uint32_t port) {
  std::unique_lock<std::mutex> lock(mutex_);

  std::string ip_port = ip + ":" + std::to_string(port);
-  VLOG(3) << "server of rank " << _rank << " starts at " << ip_port;
+  VLOG(0) << "running server with rank id: " << _rank
+          << ", endpoint: " << ip_port;
  brpc::ServerOptions options;

  int num_threads = std::thread::hardware_concurrency();
@@ -538,7 +539,7 @@ int32_t BrpcPsService::stop_server(Table *table,
  auto *p_server = _server;
  std::thread t_stop([p_server]() {
    p_server->stop();
-    LOG(INFO) << "Server Stoped";
+    VLOG(3) << "Server Stoped";
  });
  t_stop.detach();
  return 0;

--- a/paddle/fluid/distributed/service/brpc_utils.cc
+++ b/paddle/fluid/distributed/service/brpc_utils.cc
@@ -324,7 +324,7 @@ std::string GetIntTypeEndpoint(const std::string& ip, const uint32_t& port) {

  while (hp->h_addr_list[i] != NULL) {
    int_ip = inet_ntoa(*(struct in_addr*)hp->h_addr_list[i]);
-    VLOG(0) << "Brpc Get host by name, host:" << ip << " -> ip: " << int_ip;
+    VLOG(3) << "Brpc Get host by name, host:" << ip << " -> ip: " << int_ip;
    break;
  }


--- a/paddle/fluid/distributed/service/env.h
+++ b/paddle/fluid/distributed/service/env.h
@@ -39,7 +39,7 @@ struct PSHost {

  // |---ip---|---port---|--rank--|
  // |-32bit--|--20bit---|--12bit-|
-  // for pslib
+
  uint64_t serialize_to_uint64() {
    uint64_t host_label = 0;
    host_label = inet_addr(ip.c_str());
@@ -175,14 +175,12 @@ class PSEnvironment {
    host.ip = ip;
    host.port = port;
    host.rank = rank;
-    if (sign_set.count(rank) > 0) {
-      LOG(WARNING) << "ps-host :" << host.ip << ":" << host.port
-                   << ", rank:" << host.rank
-                   << " already register, ignore register";
-    } else {
+
+    if (sign_set.count(rank) == 0) {
      host_list.push_back(host);
      sign_set.insert(rank);
    }
+
    return 0;
  }


--- a/paddle/fluid/distributed/service/ps_client.cc
+++ b/paddle/fluid/distributed/service/ps_client.cc
@@ -78,8 +78,7 @@ PSClient *PSClientFactory::create(const PSParameter &ps_config) {
  }

  TableManager::instance().initialize();
-  LOG(INFO) << "Create PSClient[" << service_param.client_class()
-            << "] success";
+  VLOG(3) << "Create PSClient[" << service_param.client_class() << "] success";
  return client;
 }
 }  // namespace distributed

--- a/paddle/fluid/distributed/service/service.cc
+++ b/paddle/fluid/distributed/service/service.cc
@@ -47,7 +47,7 @@ paddle::distributed::PSParameter load_from_prototxt(
 }

 void PSCore::init_gflag(const std::string& gflags) {
-  LOG(INFO) << "Init With Gflags:" << gflags;
+  VLOG(3) << "Init With Gflags:" << gflags;
  std::vector<std::string> flags = paddle::string::split_string(gflags);
  if (flags.size() < 1) {
    flags.push_back("-max_body_size=314217728");

--- a/paddle/fluid/distributed/table/depends/dense.h
+++ b/paddle/fluid/distributed/table/depends/dense.h
@@ -89,7 +89,6 @@ class DSGD : public DenseOptimizer {

    auto blas = GetBlas<float>();
    float lr = *(global_learning_rate_) * (*learning_rate);
-    VLOG(4) << "DSGD LearningRate: " << lr;
    blas.VCOPY(update_numel, update_values + begin, grads.data());
    blas.SCAL(update_numel, lr, grads.data());
    blas.VSUB(update_numel, param + begin, grads.data(), param + begin);
@@ -157,7 +156,6 @@ class DAdam : public DenseOptimizer {
    beta2_pow[0] = beta2_pow[0] * beta2;

    float lr_ = *(global_learning_rate_)*learning_rate[0];
-    VLOG(4) << "DAdam LearningRate: " << lr_;
    lr_ *= sqrt(1 - beta2_pow[0]) / (1 - beta1_pow[0]);

    float* tmp_ = tmp.data();

--- a/paddle/fluid/distributed/table/depends/sparse.h
+++ b/paddle/fluid/distributed/table/depends/sparse.h
@@ -110,7 +110,6 @@ class SSGD : public SparseOptimizer {
      auto* value = block->Get(id);

      float learning_rate = *(global_learning_rate_) * (value + lr_offset)[0];
-      VLOG(4) << "SSGD LearningRate: " << learning_rate;
      float* param = value + param_offset;

      std::vector<float> grads;
@@ -166,7 +165,6 @@ class SAdam : public SparseOptimizer {
      if (!block->GetEntry(id)) continue;
      auto* values = block->Get(id);
      float lr_ = *(global_learning_rate_) * (values + lr_offset)[0];
-      VLOG(4) << "SAdam LearningRate: " << lr_;
      float* param = values + param_offset;
      float* moment1 = values + m1_offset;
      float* moment2 = values + m2_offset;

--- a/paddle/fluid/framework/details/build_strategy.cc
+++ b/paddle/fluid/framework/details/build_strategy.cc
@@ -161,9 +161,6 @@ class ParallelExecutorPassBuilder : public ir::PassBuilder {
 #if (defined(PADDLE_WITH_CUDA) || defined(PADDLE_WITH_HIP)) && \
    !defined(_WIN32) && !defined(__APPLE__)
    AppendPassWithCheck(strategy_.enable_auto_fusion_, "fusion_group_pass");
-#else
-    LOG(WARNING) << "fusion_group is not enabled for Windows/MacOS now, and "
-                    "only effective when running with CUDA GPU.";
 #endif
    AppendPassWithCheck(strategy_.fuse_elewise_add_act_ops_,
                        "fuse_elewise_add_act_pass");
@@ -265,12 +262,11 @@ class ParallelExecutorPassBuilder : public ir::PassBuilder {
    if (FLAGS_use_mkldnn) {
      AppendPass(pass_name);
    } else if (!strategy_.mkldnn_enabled_op_types_.empty()) {
-      LOG(WARNING)
-          << "mkldnn_enabled_op_types specify the operator type list to "
-             "use MKLDNN acceleration. It is null in default, means "
-             "that all the operators supported by MKLDNN will be "
-             "accelerated. And it should not be set when "
-             "FLAGS_use_mkldnn=false.";
+      VLOG(1) << "mkldnn_enabled_op_types specify the operator type list to "
+                 "use MKLDNN acceleration. It is null in default, means "
+                 "that all the operators supported by MKLDNN will be "
+                 "accelerated. And it should not be set when "
+                 "FLAGS_use_mkldnn=false.";
    }
 #else
    PADDLE_ENFORCE_NE(FLAGS_use_mkldnn, true,
@@ -403,26 +399,26 @@ ir::Graph *BuildStrategy::Apply(ir::Graph *graph,
              << ", num_trainers:" << num_trainers_;
    } else if (pass->Type() == "fuse_relu_depthwise_conv_pass") {
      if (use_device != p::kCUDA) {
-        LOG(WARNING) << "fuse_relu_depthwise_conv_pass is only supported on "
-                        "GPU, skipped.";
+        VLOG(1) << "fuse_relu_depthwise_conv_pass is only supported on "
+                   "GPU, skipped.";
        continue;
      }
    } else if (pass->Type() == "fusion_group_pass") {
      pass->Set<bool>("use_gpu", new bool((use_device == p::kCUDA)));
      if (use_device != p::kCUDA) {
-        LOG(WARNING) << "fusion_group_pass is only supported on GPU, skipped.";
+        VLOG(1) << "fusion_group_pass is only supported on GPU, skipped.";
        continue;
      }
    } else if (pass->Type() == "fuse_bn_act_pass") {
      if (use_device != p::kCUDA) {
-        LOG(WARNING) << "fuse_bn_act_pass is only supported on "
-                        "GPU, skipped.";
+        VLOG(1) << "fuse_bn_act_pass is only supported on "
+                   "GPU, skipped.";
        continue;
      }
    } else if (pass->Type() == "fuse_bn_add_act_pass") {
      if (use_device != p::kCUDA) {
-        LOG(WARNING) << "fuse_bn_add_act_pass is only supported on "
-                        "GPU, skipped.";
+        VLOG(1) << "fuse_bn_add_act_pass is only supported on "
+                   "GPU, skipped.";
        continue;
      }
    } else if (pass->Type() == "mkldnn_placement_pass") {

--- a/paddle/fluid/framework/device_worker.h
+++ b/paddle/fluid/framework/device_worker.h
@@ -205,7 +205,7 @@ class DeviceWorker {
  Scope* root_scope_ = nullptr;
  Scope* thread_scope_;
  paddle::platform::Place place_;
-  int64_t batch_num_;
+  int64_t batch_num_ = 0;
  FetchConfig fetch_config_;
  bool use_cvm_;
  bool no_cvm_;

--- a/paddle/fluid/framework/hogwild_worker.cc
+++ b/paddle/fluid/framework/hogwild_worker.cc
@@ -12,6 +12,7 @@ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 See the License for the specific language governing permissions and
 limitations under the License. */

+#include <ctime>
 #include "paddle/fluid/framework/data_type.h"
 #include "paddle/fluid/framework/device_worker.h"
 #include "paddle/fluid/operators/controlflow/conditional_block_op_helper.h"
@@ -226,14 +227,32 @@ void HogwildWorker::PrintFetchVars() {
  // call count
  batch_num_++;
  int batch_per_print = fetch_config_.print_period();
-  if (thread_id_ == 0) {
-    if (batch_num_ % batch_per_print == 0) {
-      int fetch_var_num = fetch_config_.fetch_var_names_size();
-      for (int i = 0; i < fetch_var_num; ++i) {
-        platform::PrintVar(thread_scope_, fetch_config_.fetch_var_names(i),
-                           fetch_config_.fetch_var_str_format(i));
+  int fetch_var_num = fetch_config_.fetch_var_names_size();
+
+  if (fetch_var_num == 0) {
+    return;
+  }
+
+  if (thread_id_ == 0 && batch_num_ % batch_per_print == 0) {
+    time_t curtime;
+    time(&curtime);
+    char mbstr[80];
+    std::strftime(mbstr, sizeof(mbstr), "%Y-%m-%d %H:%M:%S",
+                  std::localtime(&curtime));
+
+    std::stringstream ss;
+    ss << "time: [" << mbstr << "], ";
+    ss << "batch: [" << batch_num_ << "], ";
+
+    for (int i = 0; i < fetch_var_num; ++i) {
+      platform::PrintVar(thread_scope_, fetch_config_.fetch_var_names(i),
+                         fetch_config_.fetch_var_str_format(i), &ss);
+      if (i < fetch_var_num - 1) {
+        ss << ", ";
      }
    }
+
+    std::cout << ss.str() << std::endl;
  }
 }


--- a/paddle/fluid/platform/lodtensor_printer.cc
+++ b/paddle/fluid/platform/lodtensor_printer.cc
@@ -27,24 +27,38 @@ namespace paddle {
 namespace platform {

 void PrintVar(framework::Scope* scope, const std::string& var_name,
-              const std::string& print_info) {
+              const std::string& print_info, std::stringstream* sstream) {
  framework::Variable* var = scope->FindVar(var_name);
  if (var == nullptr) {
-    VLOG(1) << "Variable Name " << var_name << " does not exist in your scope";
+    VLOG(0) << "Variable Name " << var_name << " does not exist in your scope";
    return;
  }
  framework::LoDTensor* tensor = var->GetMutable<framework::LoDTensor>();
  if (tensor == nullptr) {
-    VLOG(1) << "tensor of variable " << var_name
+    VLOG(0) << "tensor of variable " << var_name
            << " does not exist in your scope";
    return;
  }

-  std::ostringstream sstream;
-  sstream << print_info << "\t";
-  sstream << var_name << "\t";
-  sstream << *tensor << "\t";
-  std::cout << sstream.str() << std::endl;
+  *sstream << print_info << ": ";
+
+#define PrintTensorCallback(cpp_type, proto_type) \
+  do {                                            \
+    if (tensor->type() == proto_type) {           \
+      *sstream << "[";                            \
+      auto* data = tensor->data<cpp_type>();      \
+      auto element_num = tensor->numel();         \
+      if (element_num > 0) {                      \
+        *sstream << data[0];                      \
+        for (int j = 1; j < element_num; ++j) {   \
+          *sstream << " " << data[j];             \
+        }                                         \
+      }                                           \
+      *sstream << "]";                            \
+    }                                             \
+  } while (0)
+
+  _ForEachDataType_(PrintTensorCallback);
 }

 }  // end namespace platform

--- a/paddle/fluid/platform/lodtensor_printer.h
+++ b/paddle/fluid/platform/lodtensor_printer.h
@@ -26,6 +26,6 @@ class Scope;
 namespace paddle {
 namespace platform {
 void PrintVar(framework::Scope* scope, const std::string& var_name,
-              const std::string& print_info);
+              const std::string& print_info, std::stringstream* out);
 }  // end namespace platform
 }  // end namespace paddle
--- a/paddle/fluid/platform/lodtensor_printer_test.cc
+++ b/paddle/fluid/platform/lodtensor_printer_test.cc
@@ -18,5 +18,6 @@

 TEST(LodTensorPrinter, PrintVar) {
  paddle::framework::Scope scope;
-  paddle::platform::PrintVar(&scope, "NotAVar", "We don't have var");
+  std::stringstream ss;
+  paddle::platform::PrintVar(&scope, "NotAVar", "We don't have var", &ss);
 }
--- a/python/paddle/distributed/fleet/base/fleet_base.py
+++ b/python/paddle/distributed/fleet/base/fleet_base.py
@@ -628,12 +628,13 @@ class Fleet(object):
        self.user_defined_optimizer = optimizer

        if strategy is not None:
-            warnings.warn(
-                "It is recommended to use DistributedStrategy "
-                "in fleet.init(). The strategy here is only for compatibility. "
-                "If the strategy in fleet.distributed_optimizer() is "
-                "not None, then it will overwrite the DistributedStrategy in fleet.init(), "
-                "which will take effect in distributed training.")
+            if self._is_collective:
+                warnings.warn(
+                    "It is recommended to use DistributedStrategy "
+                    "in fleet.init(). The strategy here is only for compatibility. "
+                    "If the strategy in fleet.distributed_optimizer() is "
+                    "not None, then it will overwrite the DistributedStrategy in fleet.init(), "
+                    "which will take effect in distributed training.")
            self._user_defined_strategy = copy.deepcopy(strategy)

        self._context = {}

--- a/python/paddle/distributed/fleet/runtime/the_one_ps.py
+++ b/python/paddle/distributed/fleet/runtime/the_one_ps.py
@@ -768,7 +768,7 @@ class TheOnePSRuntime(RuntimeBase):
        server = self._get_fleet_proto(is_server=True, is_sync=is_sync)
        proto_txt = str(server)

-        debug = bool(os.getenv("PSERVER_DEBUG", "0"))
+        debug = bool(int(os.getenv("PSERVER_DEBUG", "0")))
        if debug:
            print("server: \n{}".format(proto_txt))


--- a/python/paddle/fluid/tests/unittests/test_monitor.py
+++ b/python/paddle/fluid/tests/unittests/test_monitor.py
@@ -17,6 +17,8 @@ TestCases for Monitor

 from __future__ import print_function
 import paddle
+paddle.enable_static()
+
 import paddle.fluid as fluid
 import paddle.fluid.core as core
 import numpy as np
@@ -52,6 +54,11 @@ class TestDatasetWithStat(unittest.TestCase):
                name=slot, shape=[1], dtype="int64", lod_level=1)
            slots_vars.append(var)

+        embs = []
+        for x in slots_vars:
+            emb = fluid.layers.embedding(x, is_sparse=True, size=[100001, 4])
+            embs.append(emb)
+
        dataset = paddle.distributed.InMemoryDataset()
        dataset._set_batch_size(32)
        dataset._set_thread(3)
@@ -74,11 +81,17 @@ class TestDatasetWithStat(unittest.TestCase):
            for i in range(self.epoch_num):
                for data in data_loader():
                    exe.run(fluid.default_main_program(), feed=data)
+
        else:
            for i in range(self.epoch_num):
                try:
-                    exe.train_from_dataset(fluid.default_main_program(),
-                                           dataset)
+                    exe.train_from_dataset(
+                        fluid.default_main_program(),
+                        dataset,
+                        fetch_list=[embs[0], embs[1]],
+                        fetch_info=["emb0", "emb1"],
+                        print_period=1)
+
                except Exception as e:
                    self.assertTrue(False)