From 6162cf2f2e5907c8dbd72adee3808994b52de735 Mon Sep 17 00:00:00 2001
From: qingqing01 <dangqingqing@baidu.com>
Date: Tue, 7 Apr 2020 15:06:26 +0800
Subject: [PATCH] Make optimizer consistent in dygraph and static-graph and
 remove some LOG-INFO. (#23426)

* Make optimizer consistent in dygraph and static-graph and remove some LOG-INFO
---
 paddle/fluid/framework/details/build_strategy.cc     |  4 ++--
 paddle/fluid/framework/parallel_executor.cc          | 12 ++++++------
 .../fluid/operators/distributed/grpc/grpc_server.cc  |  2 +-
 paddle/fluid/operators/distributed/rpc_client.h      |  2 +-
 paddle/fluid/operators/distributed/rpc_server.cc     |  2 +-
 python/paddle/dataset/mq2007.py                      |  3 ++-
 python/paddle/fluid/optimizer.py                     | 11 ++++++-----
 7 files changed, 19 insertions(+), 17 deletions(-)
diff --git a/paddle/fluid/framework/details/build_strategy.cc b/paddle/fluid/framework/details/build_strategy.cc
index 2dc34f1795e..eb9accaed8b 100644
--- a/paddle/fluid/framework/details/build_strategy.cc
+++ b/paddle/fluid/framework/details/build_strategy.cc
@@ -378,8 +378,8 @@ ir::Graph *BuildStrategy::Apply(ir::Graph *graph,
       pass->Set<bool>(kUseHierarchicalAllReduce,
                       new bool(use_hierarchical_allreduce_));
 #endif
-      LOG(INFO) << "SeqOnlyAllReduceOps:" << SeqOnlyAllReduceOps(*this)
-                << ", num_trainers:" << num_trainers_;
+      VLOG(1) << "SeqOnlyAllReduceOps:" << SeqOnlyAllReduceOps(*this)
+              << ", num_trainers:" << num_trainers_;
     } else if (pass->Type() == "fuse_relu_depthwise_conv_pass") {
       if (!use_cuda) {
         LOG(WARNING) << "fuse_relu_depthwise_conv_pass is only supported on "
diff --git a/paddle/fluid/framework/parallel_executor.cc b/paddle/fluid/framework/parallel_executor.cc
index f2cc9d12ee3..18049293c37 100644
--- a/paddle/fluid/framework/parallel_executor.cc
+++ b/paddle/fluid/framework/parallel_executor.cc
@@ -330,8 +330,8 @@ ir::Graph *ParallelExecutorPrivate::ApplyMemoryOptimizePass(ir::Graph *graph) {
     VLOG(10) << "Start to apply buffer_shared_inplace_pass";
     graph = inplace_pass->Apply(graph);
     VLOG(10) << "buffer_shared_inplace_pass Applied";
-    LOG_FIRST_N(INFO, 1) << "Inplace strategy is enabled, when "
-                            "build_strategy.enable_inplace = True";
+    VLOG(1) << "Inplace strategy is enabled, when "
+               "build_strategy.enable_inplace = True";
   }
 
   if (build_strategy_.memory_optimize_.get()) {
@@ -398,9 +398,9 @@ ir::Graph *ParallelExecutorPrivate::ApplyMemoryOptimizePass(ir::Graph *graph) {
     eager_deletion_pass->SetNotOwned(ir::kAllPlaces, &places_);
     graph = eager_deletion_pass->Apply(graph);
     VLOG(10) << "EagerDeletionPass Applied";
-    LOG_FIRST_N(INFO, 1) << "Garbage collection strategy is enabled, when "
-                         << "FLAGS_eager_delete_tensor_gb = "
-                         << FLAGS_eager_delete_tensor_gb;
+    VLOG(1) << "Garbage collection strategy is enabled, when "
+            << "FLAGS_eager_delete_tensor_gb = "
+            << FLAGS_eager_delete_tensor_gb;
   }
   return graph;
 }
@@ -478,7 +478,7 @@ ParallelExecutor::ParallelExecutor(const std::vector<platform::Place> &places,
           "Please recompile and turn on the WITH_NCCL option."));
 #endif
 
-  LOG(INFO) << string::Sprintf(
+  VLOG(1) << string::Sprintf(
       "The Program will be executed on %s using ParallelExecutor, %lu "
       "cards are used, so %lu programs are executed in parallel.",
       (member_->use_cuda_ ? "CUDA" : "CPU"), places.size(), places.size());
diff --git a/paddle/fluid/operators/distributed/grpc/grpc_server.cc b/paddle/fluid/operators/distributed/grpc/grpc_server.cc
index d4354d0f651..784749bc910 100644
--- a/paddle/fluid/operators/distributed/grpc/grpc_server.cc
+++ b/paddle/fluid/operators/distributed/grpc/grpc_server.cc
@@ -601,7 +601,7 @@ void AsyncGRPCServer::HandleRequest(
   while (true) {
     VLOG(4) << "HandleRequest " << rpc_name << " wait next";
     if (!cq->Next(&tag, &ok)) {
-      LOG(WARNING) << "CompletionQueue " << rpc_name << " shutdown!";
+      VLOG(4) << "CompletionQueue " << rpc_name << " shutdown!";
       break;
     }
 
diff --git a/paddle/fluid/operators/distributed/rpc_client.h b/paddle/fluid/operators/distributed/rpc_client.h
index 2071afcfd02..9f06b168f80 100644
--- a/paddle/fluid/operators/distributed/rpc_client.h
+++ b/paddle/fluid/operators/distributed/rpc_client.h
@@ -104,7 +104,7 @@ class RPCClient {
   // Init is called by GetInstance.
   template <typename T>
   static void Init(int trainer_id) {
-    VLOG(0) << "init rpc client with trainer_id " << trainer_id;
+    VLOG(1) << "init rpc client with trainer_id " << trainer_id;
     trainer_id_ = trainer_id;
     if (rpc_client_.get() == nullptr) {
       rpc_client_.reset(new T());
diff --git a/paddle/fluid/operators/distributed/rpc_server.cc b/paddle/fluid/operators/distributed/rpc_server.cc
index c3a46e348c6..691c2c1048f 100644
--- a/paddle/fluid/operators/distributed/rpc_server.cc
+++ b/paddle/fluid/operators/distributed/rpc_server.cc
@@ -25,7 +25,7 @@ namespace operators {
 namespace distributed {
 
 void RPCServer::ShutDown() {
-  LOG(INFO) << "RPCServer ShutDown ";
+  VLOG(3) << "RPCServer ShutDown ";
   ShutDownImpl();
 
   exit_flag_ = true;
diff --git a/python/paddle/dataset/mq2007.py b/python/paddle/dataset/mq2007.py
index cfabd09705b..c499b901dcc 100644
--- a/python/paddle/dataset/mq2007.py
+++ b/python/paddle/dataset/mq2007.py
@@ -27,7 +27,6 @@ from __future__ import print_function
 
 import os
 import functools
-import rarfile
 from .common import download
 import numpy as np
 
@@ -40,6 +39,8 @@ def __initialize_meta_info__():
     """
   download and extract the MQ2007 dataset
   """
+    import rarfile
+
     fn = fetch()
     rar = rarfile.RarFile(fn)
     dirpath = os.path.dirname(fn)
diff --git a/python/paddle/fluid/optimizer.py b/python/paddle/fluid/optimizer.py
index da464cb462e..9fdbaf64e1f 100644
--- a/python/paddle/fluid/optimizer.py
+++ b/python/paddle/fluid/optimizer.py
@@ -65,7 +65,7 @@ class Optimizer(object):
                  parameter_list=None,
                  regularization=None,
                  name=None):
-        self._parameter_list = None
+        self._parameter_list = parameter_list
         if framework.in_dygraph_mode():
             if not isinstance(learning_rate, float) and \
                     not isinstance(learning_rate, LearningRateDecay):
@@ -76,9 +76,7 @@ class Optimizer(object):
                 self._name = unique_name.generate(name)
             else:
                 self._name = unique_name.generate(self.__class__.__name__)
-            if parameter_list is not None:
-                self._parameter_list = parameter_list
-            else:
+            if self._parameter_list is None:
                 raise AttributeError(
                     "parameter_list argument given to the Optimizer should not be None in dygraph mode."
                 )
@@ -662,6 +660,8 @@ class Optimizer(object):
                 "The loss.shape should be (1L,), but the current loss.shape is {}. " \
                 "Maybe that you should call fluid.layers.mean to process the current loss.".format(
                     loss.shape)
+            parameter_list = parameter_list if parameter_list \
+                else self._parameter_list
             with program_guard(program, startup_program):
                 params_grads = append_backward(loss, parameter_list,
                                                act_no_grad_set, callbacks)
@@ -826,7 +826,8 @@ class Optimizer(object):
                     "'grad_clip' should be an instance of GradientClipBase's derived class"
                 )
             self._grad_clip = grad_clip
-
+        parameter_list = parameter_list if parameter_list \
+            else self._parameter_list
         params_grads = self.backward(
             loss,
             startup_program=startup_program,
-- 
GitLab