未验证 提交 6162cf2f 编写于 作者: Q qingqing01 提交者: GitHub

Make optimizer consistent in dygraph and static-graph and remove some LOG-INFO. (#23426)

* Make optimizer consistent in dygraph and static-graph and remove some LOG-INFO
上级 118e585b
...@@ -378,8 +378,8 @@ ir::Graph *BuildStrategy::Apply(ir::Graph *graph, ...@@ -378,8 +378,8 @@ ir::Graph *BuildStrategy::Apply(ir::Graph *graph,
pass->Set<bool>(kUseHierarchicalAllReduce, pass->Set<bool>(kUseHierarchicalAllReduce,
new bool(use_hierarchical_allreduce_)); new bool(use_hierarchical_allreduce_));
#endif #endif
LOG(INFO) << "SeqOnlyAllReduceOps:" << SeqOnlyAllReduceOps(*this) VLOG(1) << "SeqOnlyAllReduceOps:" << SeqOnlyAllReduceOps(*this)
<< ", num_trainers:" << num_trainers_; << ", num_trainers:" << num_trainers_;
} else if (pass->Type() == "fuse_relu_depthwise_conv_pass") { } else if (pass->Type() == "fuse_relu_depthwise_conv_pass") {
if (!use_cuda) { if (!use_cuda) {
LOG(WARNING) << "fuse_relu_depthwise_conv_pass is only supported on " LOG(WARNING) << "fuse_relu_depthwise_conv_pass is only supported on "
......
...@@ -330,8 +330,8 @@ ir::Graph *ParallelExecutorPrivate::ApplyMemoryOptimizePass(ir::Graph *graph) { ...@@ -330,8 +330,8 @@ ir::Graph *ParallelExecutorPrivate::ApplyMemoryOptimizePass(ir::Graph *graph) {
VLOG(10) << "Start to apply buffer_shared_inplace_pass"; VLOG(10) << "Start to apply buffer_shared_inplace_pass";
graph = inplace_pass->Apply(graph); graph = inplace_pass->Apply(graph);
VLOG(10) << "buffer_shared_inplace_pass Applied"; VLOG(10) << "buffer_shared_inplace_pass Applied";
LOG_FIRST_N(INFO, 1) << "Inplace strategy is enabled, when " VLOG(1) << "Inplace strategy is enabled, when "
"build_strategy.enable_inplace = True"; "build_strategy.enable_inplace = True";
} }
if (build_strategy_.memory_optimize_.get()) { if (build_strategy_.memory_optimize_.get()) {
...@@ -398,9 +398,9 @@ ir::Graph *ParallelExecutorPrivate::ApplyMemoryOptimizePass(ir::Graph *graph) { ...@@ -398,9 +398,9 @@ ir::Graph *ParallelExecutorPrivate::ApplyMemoryOptimizePass(ir::Graph *graph) {
eager_deletion_pass->SetNotOwned(ir::kAllPlaces, &places_); eager_deletion_pass->SetNotOwned(ir::kAllPlaces, &places_);
graph = eager_deletion_pass->Apply(graph); graph = eager_deletion_pass->Apply(graph);
VLOG(10) << "EagerDeletionPass Applied"; VLOG(10) << "EagerDeletionPass Applied";
LOG_FIRST_N(INFO, 1) << "Garbage collection strategy is enabled, when " VLOG(1) << "Garbage collection strategy is enabled, when "
<< "FLAGS_eager_delete_tensor_gb = " << "FLAGS_eager_delete_tensor_gb = "
<< FLAGS_eager_delete_tensor_gb; << FLAGS_eager_delete_tensor_gb;
} }
return graph; return graph;
} }
...@@ -478,7 +478,7 @@ ParallelExecutor::ParallelExecutor(const std::vector<platform::Place> &places, ...@@ -478,7 +478,7 @@ ParallelExecutor::ParallelExecutor(const std::vector<platform::Place> &places,
"Please recompile and turn on the WITH_NCCL option.")); "Please recompile and turn on the WITH_NCCL option."));
#endif #endif
LOG(INFO) << string::Sprintf( VLOG(1) << string::Sprintf(
"The Program will be executed on %s using ParallelExecutor, %lu " "The Program will be executed on %s using ParallelExecutor, %lu "
"cards are used, so %lu programs are executed in parallel.", "cards are used, so %lu programs are executed in parallel.",
(member_->use_cuda_ ? "CUDA" : "CPU"), places.size(), places.size()); (member_->use_cuda_ ? "CUDA" : "CPU"), places.size(), places.size());
......
...@@ -601,7 +601,7 @@ void AsyncGRPCServer::HandleRequest( ...@@ -601,7 +601,7 @@ void AsyncGRPCServer::HandleRequest(
while (true) { while (true) {
VLOG(4) << "HandleRequest " << rpc_name << " wait next"; VLOG(4) << "HandleRequest " << rpc_name << " wait next";
if (!cq->Next(&tag, &ok)) { if (!cq->Next(&tag, &ok)) {
LOG(WARNING) << "CompletionQueue " << rpc_name << " shutdown!"; VLOG(4) << "CompletionQueue " << rpc_name << " shutdown!";
break; break;
} }
......
...@@ -104,7 +104,7 @@ class RPCClient { ...@@ -104,7 +104,7 @@ class RPCClient {
// Init is called by GetInstance. // Init is called by GetInstance.
template <typename T> template <typename T>
static void Init(int trainer_id) { static void Init(int trainer_id) {
VLOG(0) << "init rpc client with trainer_id " << trainer_id; VLOG(1) << "init rpc client with trainer_id " << trainer_id;
trainer_id_ = trainer_id; trainer_id_ = trainer_id;
if (rpc_client_.get() == nullptr) { if (rpc_client_.get() == nullptr) {
rpc_client_.reset(new T()); rpc_client_.reset(new T());
......
...@@ -25,7 +25,7 @@ namespace operators { ...@@ -25,7 +25,7 @@ namespace operators {
namespace distributed { namespace distributed {
void RPCServer::ShutDown() { void RPCServer::ShutDown() {
LOG(INFO) << "RPCServer ShutDown "; VLOG(3) << "RPCServer ShutDown ";
ShutDownImpl(); ShutDownImpl();
exit_flag_ = true; exit_flag_ = true;
......
...@@ -27,7 +27,6 @@ from __future__ import print_function ...@@ -27,7 +27,6 @@ from __future__ import print_function
import os import os
import functools import functools
import rarfile
from .common import download from .common import download
import numpy as np import numpy as np
...@@ -40,6 +39,8 @@ def __initialize_meta_info__(): ...@@ -40,6 +39,8 @@ def __initialize_meta_info__():
""" """
download and extract the MQ2007 dataset download and extract the MQ2007 dataset
""" """
import rarfile
fn = fetch() fn = fetch()
rar = rarfile.RarFile(fn) rar = rarfile.RarFile(fn)
dirpath = os.path.dirname(fn) dirpath = os.path.dirname(fn)
......
...@@ -65,7 +65,7 @@ class Optimizer(object): ...@@ -65,7 +65,7 @@ class Optimizer(object):
parameter_list=None, parameter_list=None,
regularization=None, regularization=None,
name=None): name=None):
self._parameter_list = None self._parameter_list = parameter_list
if framework.in_dygraph_mode(): if framework.in_dygraph_mode():
if not isinstance(learning_rate, float) and \ if not isinstance(learning_rate, float) and \
not isinstance(learning_rate, LearningRateDecay): not isinstance(learning_rate, LearningRateDecay):
...@@ -76,9 +76,7 @@ class Optimizer(object): ...@@ -76,9 +76,7 @@ class Optimizer(object):
self._name = unique_name.generate(name) self._name = unique_name.generate(name)
else: else:
self._name = unique_name.generate(self.__class__.__name__) self._name = unique_name.generate(self.__class__.__name__)
if parameter_list is not None: if self._parameter_list is None:
self._parameter_list = parameter_list
else:
raise AttributeError( raise AttributeError(
"parameter_list argument given to the Optimizer should not be None in dygraph mode." "parameter_list argument given to the Optimizer should not be None in dygraph mode."
) )
...@@ -662,6 +660,8 @@ class Optimizer(object): ...@@ -662,6 +660,8 @@ class Optimizer(object):
"The loss.shape should be (1L,), but the current loss.shape is {}. " \ "The loss.shape should be (1L,), but the current loss.shape is {}. " \
"Maybe that you should call fluid.layers.mean to process the current loss.".format( "Maybe that you should call fluid.layers.mean to process the current loss.".format(
loss.shape) loss.shape)
parameter_list = parameter_list if parameter_list \
else self._parameter_list
with program_guard(program, startup_program): with program_guard(program, startup_program):
params_grads = append_backward(loss, parameter_list, params_grads = append_backward(loss, parameter_list,
act_no_grad_set, callbacks) act_no_grad_set, callbacks)
...@@ -826,7 +826,8 @@ class Optimizer(object): ...@@ -826,7 +826,8 @@ class Optimizer(object):
"'grad_clip' should be an instance of GradientClipBase's derived class" "'grad_clip' should be an instance of GradientClipBase's derived class"
) )
self._grad_clip = grad_clip self._grad_clip = grad_clip
parameter_list = parameter_list if parameter_list \
else self._parameter_list
params_grads = self.backward( params_grads = self.backward(
loss, loss,
startup_program=startup_program, startup_program=startup_program,
......
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册