Make optimizer consistent in dygraph and static-graph and remove some LOG-INFO. (#23426)

* Make optimizer consistent in dygraph and static-graph and remove some LOG-INFO

Make optimizer consistent in dygraph and static-graph and remove some LOG-INFO. (#23426)
* Make optimizer consistent in dygraph and static-graph and remove some LOG-INFO
6162cf2f · qingqing01 · GitHub · 118e585b · 6162cf2f · 6162cf2f
7 changed file
--- a/paddle/fluid/framework/details/build_strategy.cc
+++ b/paddle/fluid/framework/details/build_strategy.cc
@@ -378,8 +378,8 @@ ir::Graph *BuildStrategy::Apply(ir::Graph *graph,
      pass->Set<bool>(kUseHierarchicalAllReduce,
                      new bool(use_hierarchical_allreduce_));
 #endif
-      LOG(INFO) << "SeqOnlyAllReduceOps:" << SeqOnlyAllReduceOps(*this)
-                << ", num_trainers:" << num_trainers_;
+      VLOG(1) << "SeqOnlyAllReduceOps:" << SeqOnlyAllReduceOps(*this)
+              << ", num_trainers:" << num_trainers_;
    } else if (pass->Type() == "fuse_relu_depthwise_conv_pass") {
      if (!use_cuda) {
        LOG(WARNING) << "fuse_relu_depthwise_conv_pass is only supported on "

--- a/paddle/fluid/framework/parallel_executor.cc
+++ b/paddle/fluid/framework/parallel_executor.cc
@@ -330,8 +330,8 @@ ir::Graph *ParallelExecutorPrivate::ApplyMemoryOptimizePass(ir::Graph *graph) {
    VLOG(10) << "Start to apply buffer_shared_inplace_pass";
    graph = inplace_pass->Apply(graph);
    VLOG(10) << "buffer_shared_inplace_pass Applied";
-    LOG_FIRST_N(INFO, 1) << "Inplace strategy is enabled, when "
-                            "build_strategy.enable_inplace = True";
+    VLOG(1) << "Inplace strategy is enabled, when "
+               "build_strategy.enable_inplace = True";
  }

  if (build_strategy_.memory_optimize_.get()) {
@@ -398,9 +398,9 @@ ir::Graph *ParallelExecutorPrivate::ApplyMemoryOptimizePass(ir::Graph *graph) {
    eager_deletion_pass->SetNotOwned(ir::kAllPlaces, &places_);
    graph = eager_deletion_pass->Apply(graph);
    VLOG(10) << "EagerDeletionPass Applied";
-    LOG_FIRST_N(INFO, 1) << "Garbage collection strategy is enabled, when "
-                         << "FLAGS_eager_delete_tensor_gb = "
-                         << FLAGS_eager_delete_tensor_gb;
+    VLOG(1) << "Garbage collection strategy is enabled, when "
+            << "FLAGS_eager_delete_tensor_gb = "
+            << FLAGS_eager_delete_tensor_gb;
  }
  return graph;
 }
@@ -478,7 +478,7 @@ ParallelExecutor::ParallelExecutor(const std::vector<platform::Place> &places,
          "Please recompile and turn on the WITH_NCCL option."));
 #endif

-  LOG(INFO) << string::Sprintf(
+  VLOG(1) << string::Sprintf(
      "The Program will be executed on %s using ParallelExecutor, %lu "
      "cards are used, so %lu programs are executed in parallel.",
      (member_->use_cuda_ ? "CUDA" : "CPU"), places.size(), places.size());

--- a/paddle/fluid/operators/distributed/grpc/grpc_server.cc
+++ b/paddle/fluid/operators/distributed/grpc/grpc_server.cc
@@ -601,7 +601,7 @@ void AsyncGRPCServer::HandleRequest(
  while (true) {
    VLOG(4) << "HandleRequest " << rpc_name << " wait next";
    if (!cq->Next(&tag, &ok)) {
-      LOG(WARNING) << "CompletionQueue " << rpc_name << " shutdown!";
+      VLOG(4) << "CompletionQueue " << rpc_name << " shutdown!";
      break;
    }


--- a/paddle/fluid/operators/distributed/rpc_client.h
+++ b/paddle/fluid/operators/distributed/rpc_client.h
@@ -104,7 +104,7 @@ class RPCClient {
  // Init is called by GetInstance.
  template <typename T>
  static void Init(int trainer_id) {
-    VLOG(0) << "init rpc client with trainer_id " << trainer_id;
+    VLOG(1) << "init rpc client with trainer_id " << trainer_id;
    trainer_id_ = trainer_id;
    if (rpc_client_.get() == nullptr) {
      rpc_client_.reset(new T());

--- a/paddle/fluid/operators/distributed/rpc_server.cc
+++ b/paddle/fluid/operators/distributed/rpc_server.cc
@@ -25,7 +25,7 @@ namespace operators {
 namespace distributed {

 void RPCServer::ShutDown() {
-  LOG(INFO) << "RPCServer ShutDown ";
+  VLOG(3) << "RPCServer ShutDown ";
  ShutDownImpl();

  exit_flag_ = true;

--- a/python/paddle/dataset/mq2007.py
+++ b/python/paddle/dataset/mq2007.py
@@ -27,7 +27,6 @@ from __future__ import print_function

 import os
 import functools
-import rarfile
 from .common import download
 import numpy as np

@@ -40,6 +39,8 @@ def __initialize_meta_info__():
    """
  download and extract the MQ2007 dataset
  """
+    import rarfile
+
    fn = fetch()
    rar = rarfile.RarFile(fn)
    dirpath = os.path.dirname(fn)

--- a/python/paddle/fluid/optimizer.py
+++ b/python/paddle/fluid/optimizer.py
@@ -65,7 +65,7 @@ class Optimizer(object):
                 parameter_list=None,
                 regularization=None,
                 name=None):
-        self._parameter_list = None
+        self._parameter_list = parameter_list
        if framework.in_dygraph_mode():
            if not isinstance(learning_rate, float) and \
                    not isinstance(learning_rate, LearningRateDecay):
@@ -76,9 +76,7 @@ class Optimizer(object):
                self._name = unique_name.generate(name)
            else:
                self._name = unique_name.generate(self.__class__.__name__)
-            if parameter_list is not None:
-                self._parameter_list = parameter_list
-            else:
+            if self._parameter_list is None:
                raise AttributeError(
                    "parameter_list argument given to the Optimizer should not be None in dygraph mode."
                )
@@ -662,6 +660,8 @@ class Optimizer(object):
                "The loss.shape should be (1L,), but the current loss.shape is {}. " \
                "Maybe that you should call fluid.layers.mean to process the current loss.".format(
                    loss.shape)
+            parameter_list = parameter_list if parameter_list \
+                else self._parameter_list
            with program_guard(program, startup_program):
                params_grads = append_backward(loss, parameter_list,
                                               act_no_grad_set, callbacks)
@@ -826,7 +826,8 @@ class Optimizer(object):
                    "'grad_clip' should be an instance of GradientClipBase's derived class"
                )
            self._grad_clip = grad_clip
-
+        parameter_list = parameter_list if parameter_list \
+            else self._parameter_list
        params_grads = self.backward(
            loss,
            startup_program=startup_program,