未验证 提交 6162cf2f 编写于 作者: Q qingqing01 提交者: GitHub

Make optimizer consistent in dygraph and static-graph and remove some LOG-INFO. (#23426)

* Make optimizer consistent in dygraph and static-graph and remove some LOG-INFO
上级 118e585b
......@@ -378,8 +378,8 @@ ir::Graph *BuildStrategy::Apply(ir::Graph *graph,
pass->Set<bool>(kUseHierarchicalAllReduce,
new bool(use_hierarchical_allreduce_));
#endif
LOG(INFO) << "SeqOnlyAllReduceOps:" << SeqOnlyAllReduceOps(*this)
<< ", num_trainers:" << num_trainers_;
VLOG(1) << "SeqOnlyAllReduceOps:" << SeqOnlyAllReduceOps(*this)
<< ", num_trainers:" << num_trainers_;
} else if (pass->Type() == "fuse_relu_depthwise_conv_pass") {
if (!use_cuda) {
LOG(WARNING) << "fuse_relu_depthwise_conv_pass is only supported on "
......
......@@ -330,8 +330,8 @@ ir::Graph *ParallelExecutorPrivate::ApplyMemoryOptimizePass(ir::Graph *graph) {
VLOG(10) << "Start to apply buffer_shared_inplace_pass";
graph = inplace_pass->Apply(graph);
VLOG(10) << "buffer_shared_inplace_pass Applied";
LOG_FIRST_N(INFO, 1) << "Inplace strategy is enabled, when "
"build_strategy.enable_inplace = True";
VLOG(1) << "Inplace strategy is enabled, when "
"build_strategy.enable_inplace = True";
}
if (build_strategy_.memory_optimize_.get()) {
......@@ -398,9 +398,9 @@ ir::Graph *ParallelExecutorPrivate::ApplyMemoryOptimizePass(ir::Graph *graph) {
eager_deletion_pass->SetNotOwned(ir::kAllPlaces, &places_);
graph = eager_deletion_pass->Apply(graph);
VLOG(10) << "EagerDeletionPass Applied";
LOG_FIRST_N(INFO, 1) << "Garbage collection strategy is enabled, when "
<< "FLAGS_eager_delete_tensor_gb = "
<< FLAGS_eager_delete_tensor_gb;
VLOG(1) << "Garbage collection strategy is enabled, when "
<< "FLAGS_eager_delete_tensor_gb = "
<< FLAGS_eager_delete_tensor_gb;
}
return graph;
}
......@@ -478,7 +478,7 @@ ParallelExecutor::ParallelExecutor(const std::vector<platform::Place> &places,
"Please recompile and turn on the WITH_NCCL option."));
#endif
LOG(INFO) << string::Sprintf(
VLOG(1) << string::Sprintf(
"The Program will be executed on %s using ParallelExecutor, %lu "
"cards are used, so %lu programs are executed in parallel.",
(member_->use_cuda_ ? "CUDA" : "CPU"), places.size(), places.size());
......
......@@ -601,7 +601,7 @@ void AsyncGRPCServer::HandleRequest(
while (true) {
VLOG(4) << "HandleRequest " << rpc_name << " wait next";
if (!cq->Next(&tag, &ok)) {
LOG(WARNING) << "CompletionQueue " << rpc_name << " shutdown!";
VLOG(4) << "CompletionQueue " << rpc_name << " shutdown!";
break;
}
......
......@@ -104,7 +104,7 @@ class RPCClient {
// Init is called by GetInstance.
template <typename T>
static void Init(int trainer_id) {
VLOG(0) << "init rpc client with trainer_id " << trainer_id;
VLOG(1) << "init rpc client with trainer_id " << trainer_id;
trainer_id_ = trainer_id;
if (rpc_client_.get() == nullptr) {
rpc_client_.reset(new T());
......
......@@ -25,7 +25,7 @@ namespace operators {
namespace distributed {
void RPCServer::ShutDown() {
LOG(INFO) << "RPCServer ShutDown ";
VLOG(3) << "RPCServer ShutDown ";
ShutDownImpl();
exit_flag_ = true;
......
......@@ -27,7 +27,6 @@ from __future__ import print_function
import os
import functools
import rarfile
from .common import download
import numpy as np
......@@ -40,6 +39,8 @@ def __initialize_meta_info__():
"""
download and extract the MQ2007 dataset
"""
import rarfile
fn = fetch()
rar = rarfile.RarFile(fn)
dirpath = os.path.dirname(fn)
......
......@@ -65,7 +65,7 @@ class Optimizer(object):
parameter_list=None,
regularization=None,
name=None):
self._parameter_list = None
self._parameter_list = parameter_list
if framework.in_dygraph_mode():
if not isinstance(learning_rate, float) and \
not isinstance(learning_rate, LearningRateDecay):
......@@ -76,9 +76,7 @@ class Optimizer(object):
self._name = unique_name.generate(name)
else:
self._name = unique_name.generate(self.__class__.__name__)
if parameter_list is not None:
self._parameter_list = parameter_list
else:
if self._parameter_list is None:
raise AttributeError(
"parameter_list argument given to the Optimizer should not be None in dygraph mode."
)
......@@ -662,6 +660,8 @@ class Optimizer(object):
"The loss.shape should be (1L,), but the current loss.shape is {}. " \
"Maybe that you should call fluid.layers.mean to process the current loss.".format(
loss.shape)
parameter_list = parameter_list if parameter_list \
else self._parameter_list
with program_guard(program, startup_program):
params_grads = append_backward(loss, parameter_list,
act_no_grad_set, callbacks)
......@@ -826,7 +826,8 @@ class Optimizer(object):
"'grad_clip' should be an instance of GradientClipBase's derived class"
)
self._grad_clip = grad_clip
parameter_list = parameter_list if parameter_list \
else self._parameter_list
params_grads = self.backward(
loss,
startup_program=startup_program,
......
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册