未验证 提交 dfd4a111 编写于 作者: X Xin Pan 提交者: GitHub

Merge pull request #14592 from velconia/revert_vlog

Revert the changes of VLOG
...@@ -18,7 +18,7 @@ namespace framework { ...@@ -18,7 +18,7 @@ namespace framework {
void TransDataDevice(const Tensor &in, const platform::Place &dst_place, void TransDataDevice(const Tensor &in, const platform::Place &dst_place,
Tensor *out) { Tensor *out) {
VLOG(30) << "DeviceTransform in, src_place " << in.place() VLOG(3) << "DeviceTransform in, src_place " << in.place()
<< " dst_place: " << dst_place; << " dst_place: " << dst_place;
PADDLE_ENFORCE_NE( PADDLE_ENFORCE_NE(
......
...@@ -49,10 +49,10 @@ class TestOpWithKernel : public OperatorWithKernel { ...@@ -49,10 +49,10 @@ class TestOpWithKernel : public OperatorWithKernel {
OpKernelType GetExpectedKernelType( OpKernelType GetExpectedKernelType(
const ExecutionContext& ctx) const override { const ExecutionContext& ctx) const override {
if (Attr<bool>("use_gpu")) { if (Attr<bool>("use_gpu")) {
VLOG(30) << "force use gpu kernel"; VLOG(3) << "force use gpu kernel";
return OpKernelType(proto::VarType::FP32, platform::CUDAPlace(0)); return OpKernelType(proto::VarType::FP32, platform::CUDAPlace(0));
} else { } else {
VLOG(30) << "use default kernel"; VLOG(3) << "use default kernel";
return OpKernelType(proto::VarType::FP32, return OpKernelType(proto::VarType::FP32,
ctx.Input<Tensor>("input")->place()); ctx.Input<Tensor>("input")->place());
} }
...@@ -148,7 +148,7 @@ TEST(Operator, CPUtoGPU) { ...@@ -148,7 +148,7 @@ TEST(Operator, CPUtoGPU) {
// get output // get output
auto* output2 = scope.Var("OUT2"); auto* output2 = scope.Var("OUT2");
gpu_op->Run(scope, cuda_place); gpu_op->Run(scope, cuda_place);
VLOG(30) << "after gpu_op run"; VLOG(3) << "after gpu_op run";
// auto* output2_ptr = output2->Get<LoDTensor>().data<float>(); // auto* output2_ptr = output2->Get<LoDTensor>().data<float>();
paddle::platform::DeviceContextPool& pool = paddle::platform::DeviceContextPool& pool =
......
...@@ -60,7 +60,7 @@ void BroadcastOpHandle::BroadcastOneVar( ...@@ -60,7 +60,7 @@ void BroadcastOpHandle::BroadcastOneVar(
PADDLE_ENFORCE_NOT_NULL(in_var); PADDLE_ENFORCE_NOT_NULL(in_var);
Tensor &in_tensor = VariableVisitor::GetMutableTensor(in_var); Tensor &in_tensor = VariableVisitor::GetMutableTensor(in_var);
if (UNLIKELY(!in_tensor.IsInitialized())) { if (UNLIKELY(!in_tensor.IsInitialized())) {
VLOG(30) << "in var " << in_var_handle.name_ << "not inited, return!"; VLOG(3) << "in var " << in_var_handle.name_ << "not inited, return!";
return; return;
} }
......
...@@ -45,7 +45,7 @@ std::unique_ptr<ir::Graph> ModifyOpLockAndRecordEventPass::ApplyImpl( ...@@ -45,7 +45,7 @@ std::unique_ptr<ir::Graph> ModifyOpLockAndRecordEventPass::ApplyImpl(
IsLockAndRecordEventFreeComputationOpHandle(compute_op, graph_view); IsLockAndRecordEventFreeComputationOpHandle(compute_op, graph_view);
compute_op->SetLockAndRecordEventFree(is_lock_and_record_event_free); compute_op->SetLockAndRecordEventFree(is_lock_and_record_event_free);
if (is_lock_and_record_event_free) { if (is_lock_and_record_event_free) {
VLOG(100) << "Set is_lock_and_record_event_free be true in op " VLOG(10) << "Set is_lock_and_record_event_free be true in op "
<< compute_op->DebugString(); << compute_op->DebugString();
} }
} }
......
...@@ -399,7 +399,7 @@ std::unique_ptr<ir::Graph> MultiDevSSAGraphBuilder::ApplyImpl( ...@@ -399,7 +399,7 @@ std::unique_ptr<ir::Graph> MultiDevSSAGraphBuilder::ApplyImpl(
for (size_t i = 0; i < backward_vars.size(); i += 2) { for (size_t i = 0; i < backward_vars.size(); i += 2) {
auto &p_name = backward_vars[i]; auto &p_name = backward_vars[i];
auto &g_name = backward_vars[i + 1]; auto &g_name = backward_vars[i + 1];
VLOG(100) << "Bcast " << g_name << " for parameter " << p_name; VLOG(10) << "Bcast " << g_name << " for parameter " << p_name;
switch (strategy_.reduce_) { switch (strategy_.reduce_) {
case BuildStrategy::ReduceStrategy::kReduce: case BuildStrategy::ReduceStrategy::kReduce:
...@@ -809,7 +809,7 @@ int MultiDevSSAGraphBuilder::CreateRPCOp( ...@@ -809,7 +809,7 @@ int MultiDevSSAGraphBuilder::CreateRPCOp(
node->Op()->GetAttr(OpProtoAndCheckerMaker::OpRoleVarAttrName())); node->Op()->GetAttr(OpProtoAndCheckerMaker::OpRoleVarAttrName()));
PADDLE_ENFORCE_EQ(send_param_grad.size(), 2U); PADDLE_ENFORCE_EQ(send_param_grad.size(), 2U);
op_dev_id = GetAppropriateDeviceID({send_param_grad[1]}); op_dev_id = GetAppropriateDeviceID({send_param_grad[1]});
VLOG(100) << "send grad " << input_var_names[0] << " origin " VLOG(10) << "send grad " << input_var_names[0] << " origin "
<< send_param_grad[1] << " place: " << op_dev_id; << send_param_grad[1] << " place: " << op_dev_id;
for (auto &varname : input_var_names) { for (auto &varname : input_var_names) {
sharded_var_device->emplace(varname, op_dev_id); sharded_var_device->emplace(varname, op_dev_id);
...@@ -826,7 +826,7 @@ int MultiDevSSAGraphBuilder::CreateRPCOp( ...@@ -826,7 +826,7 @@ int MultiDevSSAGraphBuilder::CreateRPCOp(
if (recv_param_grad.size() == 2U) { if (recv_param_grad.size() == 2U) {
op_dev_id = op_dev_id =
GetVarDeviceID(*result, recv_param_grad[1], *sharded_var_device); GetVarDeviceID(*result, recv_param_grad[1], *sharded_var_device);
VLOG(100) << "recv param " << recv_param_grad[0] VLOG(10) << "recv param " << recv_param_grad[0]
<< " get grad place: " << recv_param_grad[1] << " get grad place: " << recv_param_grad[1]
<< " place: " << op_dev_id; << " place: " << op_dev_id;
} else { } else {
......
...@@ -140,7 +140,7 @@ std::unique_ptr<ir::Graph> ReferenceCountPass::ApplyImpl( ...@@ -140,7 +140,7 @@ std::unique_ptr<ir::Graph> ReferenceCountPass::ApplyImpl(
if (next_compute_op != nullptr) { if (next_compute_op != nullptr) {
if (compute_ref_cnt_map.count(next_compute_op)) { if (compute_ref_cnt_map.count(next_compute_op)) {
compute_ref_cnt_map[next_compute_op]->AddVar(var_name); compute_ref_cnt_map[next_compute_op]->AddVar(var_name);
VLOG(50) << "Add reference count of " << var_name << " to Operator " VLOG(5) << "Add reference count of " << var_name << " to Operator "
<< next_compute_op->Name(); << next_compute_op->Name();
} else { } else {
// Create new reference_count_op_handle // Create new reference_count_op_handle
......
...@@ -51,7 +51,7 @@ void ScaleLossGradOpHandle::RunImpl() { ...@@ -51,7 +51,7 @@ void ScaleLossGradOpHandle::RunImpl() {
->stream(); ->stream();
memory::Copy(boost::get<platform::CUDAPlace>(place_), tmp, memory::Copy(boost::get<platform::CUDAPlace>(place_), tmp,
platform::CPUPlace(), &coeff_, sizeof(float), stream); platform::CPUPlace(), &coeff_, sizeof(float), stream);
VLOG(100) << place_ << "RUN Scale loss grad op"; VLOG(10) << place_ << "RUN Scale loss grad op";
}); });
#endif #endif
} }
......
...@@ -94,7 +94,7 @@ std::unique_ptr<ir::Graph> SequentialExecutionPass::ApplyImpl( ...@@ -94,7 +94,7 @@ std::unique_ptr<ir::Graph> SequentialExecutionPass::ApplyImpl(
op_node_list[i - 1]->outputs.push_back(dep_var); op_node_list[i - 1]->outputs.push_back(dep_var);
dep_var->outputs.push_back(op_node_list[i]); dep_var->outputs.push_back(op_node_list[i]);
dep_var->inputs.push_back(op_node_list[i - 1]); dep_var->inputs.push_back(op_node_list[i - 1]);
VLOG(100) << "Add dependencies between " << op_node_list[i - 1]->Name() VLOG(10) << "Add dependencies between " << op_node_list[i - 1]->Name()
<< " and " << op_node_list[i]->Name(); << " and " << op_node_list[i]->Name();
} }
return graph; return graph;
......
...@@ -210,16 +210,16 @@ void ThreadedSSAGraphExecutor::RunOp( ...@@ -210,16 +210,16 @@ void ThreadedSSAGraphExecutor::RunOp(
details::OpHandleBase *op) { details::OpHandleBase *op) {
auto op_run = [ready_var_q, op, this] { auto op_run = [ready_var_q, op, this] {
try { try {
if (VLOG_IS_ON(100)) { if (VLOG_IS_ON(10)) {
VLOG(100) << op << " " << op->Name() << " : " << op->DebugString(); VLOG(10) << op << " " << op->Name() << " : " << op->DebugString();
} }
if (LIKELY(!strategy_.dry_run_)) { if (LIKELY(!strategy_.dry_run_)) {
op->Run(strategy_.use_cuda_); op->Run(strategy_.use_cuda_);
} }
VLOG(100) << op << " " << op->Name() << " Done "; VLOG(10) << op << " " << op->Name() << " Done ";
running_ops_--; running_ops_--;
ready_var_q->Extend(op->Outputs()); ready_var_q->Extend(op->Outputs());
VLOG(100) << op << " " << op->Name() << "Signal posted"; VLOG(10) << op << " " << op->Name() << "Signal posted";
} catch (...) { } catch (...) {
exception_holder_.Catch(std::current_exception()); exception_holder_.Catch(std::current_exception());
} }
......
...@@ -46,7 +46,7 @@ ExecutorPrepareContext::ExecutorPrepareContext( ...@@ -46,7 +46,7 @@ ExecutorPrepareContext::ExecutorPrepareContext(
} }
ExecutorPrepareContext::~ExecutorPrepareContext() { ExecutorPrepareContext::~ExecutorPrepareContext() {
VLOG(50) << "destroy ExecutorPrepareContext"; VLOG(5) << "destroy ExecutorPrepareContext";
} }
template <typename RefCntMap> template <typename RefCntMap>
...@@ -63,7 +63,7 @@ static void DeleteUnusedTensors(const Scope& scope, const OperatorBase* op, ...@@ -63,7 +63,7 @@ static void DeleteUnusedTensors(const Scope& scope, const OperatorBase* op,
if ((it->second)-- == 1) { if ((it->second)-- == 1) {
auto* var = scope.FindVar(name); auto* var = scope.FindVar(name);
if (var != nullptr) { if (var != nullptr) {
VLOG(100) << "Erase tensor \'" << name << "\'"; VLOG(10) << "Erase tensor \'" << name << "\'";
if (var->IsType<LoDTensor>()) { if (var->IsType<LoDTensor>()) {
erase_tensors.insert(var->GetMutable<LoDTensor>()); erase_tensors.insert(var->GetMutable<LoDTensor>());
} else if (var->IsType<SelectedRows>()) { } else if (var->IsType<SelectedRows>()) {
...@@ -162,12 +162,12 @@ void Executor::CreateVariables(const ProgramDesc& pdesc, Scope* scope, ...@@ -162,12 +162,12 @@ void Executor::CreateVariables(const ProgramDesc& pdesc, Scope* scope,
if (var->Persistable()) { if (var->Persistable()) {
auto* ptr = const_cast<Scope*>(ancestor_scope)->Var(var->Name()); auto* ptr = const_cast<Scope*>(ancestor_scope)->Var(var->Name());
InitializeVariable(ptr, var->GetType()); InitializeVariable(ptr, var->GetType());
VLOG(30) << "Create Variable " << var->Name() VLOG(3) << "Create Variable " << var->Name()
<< " global, which pointer is " << ptr; << " global, which pointer is " << ptr;
} else { } else {
auto* ptr = scope->Var(var->Name()); auto* ptr = scope->Var(var->Name());
InitializeVariable(ptr, var->GetType()); InitializeVariable(ptr, var->GetType());
VLOG(30) << "Create Variable " << var->Name() VLOG(3) << "Create Variable " << var->Name()
<< " locally, which pointer is " << ptr; << " locally, which pointer is " << ptr;
} }
} }
...@@ -175,7 +175,7 @@ void Executor::CreateVariables(const ProgramDesc& pdesc, Scope* scope, ...@@ -175,7 +175,7 @@ void Executor::CreateVariables(const ProgramDesc& pdesc, Scope* scope,
for (auto& var : global_block.AllVars()) { for (auto& var : global_block.AllVars()) {
auto* ptr = scope->Var(var->Name()); auto* ptr = scope->Var(var->Name());
InitializeVariable(ptr, var->GetType()); InitializeVariable(ptr, var->GetType());
VLOG(30) << "Create variable " << var->Name() << ", which pointer is " VLOG(3) << "Create variable " << var->Name() << ", which pointer is "
<< ptr; << ptr;
} }
} }
...@@ -307,7 +307,7 @@ void Executor::Run(const ProgramDesc& program, Scope* scope, ...@@ -307,7 +307,7 @@ void Executor::Run(const ProgramDesc& program, Scope* scope,
int i = 0; int i = 0;
for (auto& feed_target : (*feed_targets)) { for (auto& feed_target : (*feed_targets)) {
std::string var_name = feed_target.first; std::string var_name = feed_target.first;
VLOG(30) << "feed target's name: " << var_name; VLOG(3) << "feed target's name: " << var_name;
// prepend feed op // prepend feed op
auto* op = global_block->PrependOp(); auto* op = global_block->PrependOp();
...@@ -330,7 +330,7 @@ void Executor::Run(const ProgramDesc& program, Scope* scope, ...@@ -330,7 +330,7 @@ void Executor::Run(const ProgramDesc& program, Scope* scope,
int i = 0; int i = 0;
for (auto& fetch_target : (*fetch_targets)) { for (auto& fetch_target : (*fetch_targets)) {
std::string var_name = fetch_target.first; std::string var_name = fetch_target.first;
VLOG(30) << "fetch target's name: " << var_name; VLOG(3) << "fetch target's name: " << var_name;
// append fetch op // append fetch op
auto* op = global_block->AppendOp(); auto* op = global_block->AppendOp();
...@@ -482,7 +482,7 @@ void Executor::RunPreparedContext( ...@@ -482,7 +482,7 @@ void Executor::RunPreparedContext(
void Executor::EnableMKLDNN(const ProgramDesc& program) { void Executor::EnableMKLDNN(const ProgramDesc& program) {
#ifdef PADDLE_WITH_MKLDNN #ifdef PADDLE_WITH_MKLDNN
VLOG(30) << "use_mkldnn=True"; VLOG(3) << "use_mkldnn=True";
for (size_t bid = 0; bid < program.Size(); ++bid) { for (size_t bid = 0; bid < program.Size(); ++bid) {
auto* block = const_cast<ProgramDesc&>(program).MutableBlock(bid); auto* block = const_cast<ProgramDesc&>(program).MutableBlock(bid);
for (auto* op : block->AllOps()) { for (auto* op : block->AllOps()) {
......
...@@ -25,7 +25,7 @@ void SetFeedVariable(Scope* scope, const LoDTensor& input, ...@@ -25,7 +25,7 @@ void SetFeedVariable(Scope* scope, const LoDTensor& input,
const std::string& var_name, size_t index) { const std::string& var_name, size_t index) {
// If var_name Variable is not found in GlobalScope, a new variable will // If var_name Variable is not found in GlobalScope, a new variable will
// be created. // be created.
VLOG(30) << "SetFeedVariable name=" << var_name << " index=" << index; VLOG(3) << "SetFeedVariable name=" << var_name << " index=" << index;
Variable* g_feed_value = scope->Var(var_name); Variable* g_feed_value = scope->Var(var_name);
auto& feed_inputs = *(g_feed_value->GetMutable<FeedFetchList>()); auto& feed_inputs = *(g_feed_value->GetMutable<FeedFetchList>());
if (index >= feed_inputs.size()) { if (index >= feed_inputs.size()) {
...@@ -47,7 +47,7 @@ LoDTensor& GetFetchVariable(const Scope& scope, const std::string& var_name, ...@@ -47,7 +47,7 @@ LoDTensor& GetFetchVariable(const Scope& scope, const std::string& var_name,
typeid(FeedFetchList).name()); typeid(FeedFetchList).name());
auto& fetch_outputs = *g_fetch_value->GetMutable<FeedFetchList>(); auto& fetch_outputs = *g_fetch_value->GetMutable<FeedFetchList>();
auto& tensor = fetch_outputs[index]; auto& tensor = fetch_outputs[index];
VLOG(30) << "Fetch " << var_name << " with index " << index VLOG(3) << "Fetch " << var_name << " with index " << index
<< " shape= " << tensor.dims(); << " shape= " << tensor.dims();
PADDLE_ENFORCE_LT(index, fetch_outputs.size()); PADDLE_ENFORCE_LT(index, fetch_outputs.size());
return tensor; return tensor;
......
...@@ -152,11 +152,11 @@ void PrepareParameters(Graph* graph, const Param& param) { ...@@ -152,11 +152,11 @@ void PrepareParameters(Graph* graph, const Param& param) {
auto* W_##name__##_w1 = scope->FindVar(#name__ ".w_1"); \ auto* W_##name__##_w1 = scope->FindVar(#name__ ".w_1"); \
auto* W_##name__##_b0 = scope->FindVar(#name__ ".b_0"); \ auto* W_##name__##_b0 = scope->FindVar(#name__ ".b_0"); \
CHECK_P3(W_##name__##_w0, W_##name__##_w1, W_##name__##_b0); \ CHECK_P3(W_##name__##_w0, W_##name__##_w1, W_##name__##_b0); \
VLOG(40) << #name__ "_w0" \ VLOG(4) << #name__ "_w0" \
<< " shape: " << W_##name__##_w0->Get<LoDTensor>().dims(); \ << " shape: " << W_##name__##_w0->Get<LoDTensor>().dims(); \
VLOG(40) << #name__ "_w1" \ VLOG(4) << #name__ "_w1" \
<< " shape: " << W_##name__##_w1->Get<LoDTensor>().dims(); \ << " shape: " << W_##name__##_w1->Get<LoDTensor>().dims(); \
VLOG(40) << #name__ "_b0" \ VLOG(4) << #name__ "_b0" \
<< " shape: " << W_##name__##_b0->Get<LoDTensor>().dims(); \ << " shape: " << W_##name__##_b0->Get<LoDTensor>().dims(); \
auto& W_##name__##_w0_t = W_##name__##_w0->Get<LoDTensor>(); \ auto& W_##name__##_w0_t = W_##name__##_w0->Get<LoDTensor>(); \
auto& W_##name__##_w1_t = W_##name__##_w1->Get<LoDTensor>(); \ auto& W_##name__##_w1_t = W_##name__##_w1->Get<LoDTensor>(); \
...@@ -208,7 +208,7 @@ void PrepareLSTMWeight(const LoDTensor& W_forget_w0, ...@@ -208,7 +208,7 @@ void PrepareLSTMWeight(const LoDTensor& W_forget_w0,
int D = W_forget_w0.dims()[0]; int D = W_forget_w0.dims()[0];
int M = W_forget_w1.dims()[0]; int M = W_forget_w1.dims()[0];
out->Resize(make_ddim({D + M, 4 * D})); out->Resize(make_ddim({D + M, 4 * D}));
VLOG(30) << "LSTMWeight resized to " << out->dims(); VLOG(3) << "LSTMWeight resized to " << out->dims();
float* out_data = out->mutable_data<float>(platform::CPUPlace()); float* out_data = out->mutable_data<float>(platform::CPUPlace());
std::array<const float*, 4> tensors{ std::array<const float*, 4> tensors{
......
...@@ -57,7 +57,7 @@ std::unique_ptr<ir::Graph> ConvBiasFusePass::ApplyImpl( ...@@ -57,7 +57,7 @@ std::unique_ptr<ir::Graph> ConvBiasFusePass::ApplyImpl(
int found_conv_bias_count = 0; int found_conv_bias_count = 0;
auto handler = [&](const GraphPatternDetector::subgraph_t& subgraph, auto handler = [&](const GraphPatternDetector::subgraph_t& subgraph,
Graph* g) { Graph* g) {
VLOG(40) << "handle ConvBias fuse"; VLOG(4) << "handle ConvBias fuse";
GET_IR_NODE_FROM_SUBGRAPH(conv_weight, conv_weight, GET_IR_NODE_FROM_SUBGRAPH(conv_weight, conv_weight,
conv_bias_pattern); // Filter conv_bias_pattern); // Filter
GET_IR_NODE_FROM_SUBGRAPH(conv_out, conv_out, conv_bias_pattern); // tmp GET_IR_NODE_FROM_SUBGRAPH(conv_out, conv_out, conv_bias_pattern); // tmp
...@@ -74,7 +74,7 @@ std::unique_ptr<ir::Graph> ConvBiasFusePass::ApplyImpl( ...@@ -74,7 +74,7 @@ std::unique_ptr<ir::Graph> ConvBiasFusePass::ApplyImpl(
// check if fuse can be done and if MKL-DNN should be used // check if fuse can be done and if MKL-DNN should be used
FuseOptions fuse_option = FindFuseOption(*conv, *eltwise); FuseOptions fuse_option = FindFuseOption(*conv, *eltwise);
if (fuse_option == DO_NOT_FUSE || fuse_option == FUSE_NATIVE) { if (fuse_option == DO_NOT_FUSE || fuse_option == FUSE_NATIVE) {
VLOG(30) << "do not perform conv+bias fuse"; VLOG(3) << "do not perform conv+bias fuse";
return; return;
} }
......
...@@ -121,7 +121,7 @@ std::unique_ptr<ir::Graph> ConvBNFusePass::ApplyImpl( ...@@ -121,7 +121,7 @@ std::unique_ptr<ir::Graph> ConvBNFusePass::ApplyImpl(
int found_conv_bn_count = 0; int found_conv_bn_count = 0;
auto handler = [&](const GraphPatternDetector::subgraph_t& subgraph, auto handler = [&](const GraphPatternDetector::subgraph_t& subgraph,
Graph* g) { Graph* g) {
VLOG(40) << "handle ConvBN fuse"; VLOG(4) << "handle ConvBN fuse";
// conv, batch_norm, // conv, batch_norm,
// conv_weight, conv_out, // conv_weight, conv_out,
...@@ -133,7 +133,7 @@ std::unique_ptr<ir::Graph> ConvBNFusePass::ApplyImpl( ...@@ -133,7 +133,7 @@ std::unique_ptr<ir::Graph> ConvBNFusePass::ApplyImpl(
// check if fuse can be done and if MKL-DNN should be used // check if fuse can be done and if MKL-DNN should be used
FuseOptions fuse_option = FindFuseOption(*conv, *batch_norm); FuseOptions fuse_option = FindFuseOption(*conv, *batch_norm);
if (fuse_option == DO_NOT_FUSE) { if (fuse_option == DO_NOT_FUSE) {
VLOG(30) << "do not perform conv+bn fuse"; VLOG(3) << "do not perform conv+bn fuse";
return; return;
} }
...@@ -241,7 +241,7 @@ std::unique_ptr<ir::Graph> ConvEltwiseAddBNFusePass::ApplyImpl( ...@@ -241,7 +241,7 @@ std::unique_ptr<ir::Graph> ConvEltwiseAddBNFusePass::ApplyImpl(
int found_conv_bn_count = 0; int found_conv_bn_count = 0;
auto handler = [&](const GraphPatternDetector::subgraph_t& subgraph, auto handler = [&](const GraphPatternDetector::subgraph_t& subgraph,
Graph* g) { Graph* g) {
VLOG(40) << "handle ConvBN fuse"; VLOG(4) << "handle ConvBN fuse";
// conv, batch_norm, // conv, batch_norm,
// conv_weight, conv_out, // conv_weight, conv_out,
......
...@@ -38,7 +38,7 @@ std::unique_ptr<ir::Graph> ConvReLUFusePass::ApplyImpl( ...@@ -38,7 +38,7 @@ std::unique_ptr<ir::Graph> ConvReLUFusePass::ApplyImpl(
int found_conv_relu_count = 0; int found_conv_relu_count = 0;
auto handler = [&](const GraphPatternDetector::subgraph_t& subgraph, auto handler = [&](const GraphPatternDetector::subgraph_t& subgraph,
Graph* g) { Graph* g) {
VLOG(40) << "handle ConvReLU fuse"; VLOG(4) << "handle ConvReLU fuse";
GET_IR_NODE_FROM_SUBGRAPH(conv_weight, conv_weight, GET_IR_NODE_FROM_SUBGRAPH(conv_weight, conv_weight,
conv_relu_pattern); // Filter conv_relu_pattern); // Filter
GET_IR_NODE_FROM_SUBGRAPH(conv_out, conv_out, conv_relu_pattern); // tmp GET_IR_NODE_FROM_SUBGRAPH(conv_out, conv_out, conv_relu_pattern); // tmp
...@@ -48,7 +48,7 @@ std::unique_ptr<ir::Graph> ConvReLUFusePass::ApplyImpl( ...@@ -48,7 +48,7 @@ std::unique_ptr<ir::Graph> ConvReLUFusePass::ApplyImpl(
FuseOptions fuse_option = FindFuseOption(*conv, *relu); FuseOptions fuse_option = FindFuseOption(*conv, *relu);
if (fuse_option == DO_NOT_FUSE) { if (fuse_option == DO_NOT_FUSE) {
VLOG(30) << "do not perform conv+relu fuse"; VLOG(3) << "do not perform conv+relu fuse";
return; return;
} }
......
...@@ -39,7 +39,7 @@ std::unique_ptr<ir::Graph> DepthwiseConvMKLDNNPass::ApplyImpl( ...@@ -39,7 +39,7 @@ std::unique_ptr<ir::Graph> DepthwiseConvMKLDNNPass::ApplyImpl(
int found_depthwise_conv_mkldnn_count = 0; int found_depthwise_conv_mkldnn_count = 0;
auto handler = [&](const GraphPatternDetector::subgraph_t& subgraph, auto handler = [&](const GraphPatternDetector::subgraph_t& subgraph,
Graph* g) { Graph* g) {
VLOG(30) << "handle DepthwiseConvMKLDNN fuse"; VLOG(3) << "handle DepthwiseConvMKLDNN fuse";
GET_NODE(depthwise_conv, (*pattern)); GET_NODE(depthwise_conv, (*pattern));
depthwise_conv->Op()->SetType("conv2d"); depthwise_conv->Op()->SetType("conv2d");
found_depthwise_conv_mkldnn_count++; found_depthwise_conv_mkldnn_count++;
......
...@@ -39,7 +39,7 @@ std::unique_ptr<ir::Graph> FCFusePass::ApplyImpl( ...@@ -39,7 +39,7 @@ std::unique_ptr<ir::Graph> FCFusePass::ApplyImpl(
int found_fc_count = 0; int found_fc_count = 0;
auto handler = [&](const GraphPatternDetector::subgraph_t& subgraph, auto handler = [&](const GraphPatternDetector::subgraph_t& subgraph,
Graph* g) { Graph* g) {
VLOG(40) << "handle FC fuse"; VLOG(4) << "handle FC fuse";
GET_IR_NODE_FROM_SUBGRAPH(w, w, fc_pattern); GET_IR_NODE_FROM_SUBGRAPH(w, w, fc_pattern);
GET_IR_NODE_FROM_SUBGRAPH(fc_bias, bias, fc_pattern); GET_IR_NODE_FROM_SUBGRAPH(fc_bias, bias, fc_pattern);
GET_IR_NODE_FROM_SUBGRAPH(fc_out, Out, fc_pattern); GET_IR_NODE_FROM_SUBGRAPH(fc_out, Out, fc_pattern);
......
...@@ -61,7 +61,7 @@ std::unique_ptr<ir::Graph> FuseElewiseAddActPass::FuseElewiseAddAct( ...@@ -61,7 +61,7 @@ std::unique_ptr<ir::Graph> FuseElewiseAddActPass::FuseElewiseAddAct(
auto handler = [&](const GraphPatternDetector::subgraph_t &subgraph, auto handler = [&](const GraphPatternDetector::subgraph_t &subgraph,
Graph *g) { Graph *g) {
VLOG(40) << "handle FuseElewiseAddAct fuse"; VLOG(4) << "handle FuseElewiseAddAct fuse";
GET_IR_NODE_FROM_SUBGRAPH(ele_y, ele_y, elewise_add_act_pattern); GET_IR_NODE_FROM_SUBGRAPH(ele_y, ele_y, elewise_add_act_pattern);
GET_IR_NODE_FROM_SUBGRAPH(ele_out, elewise_add_out, GET_IR_NODE_FROM_SUBGRAPH(ele_out, elewise_add_out,
elewise_add_act_pattern); elewise_add_act_pattern);
...@@ -77,7 +77,7 @@ std::unique_ptr<ir::Graph> FuseElewiseAddActPass::FuseElewiseAddAct( ...@@ -77,7 +77,7 @@ std::unique_ptr<ir::Graph> FuseElewiseAddActPass::FuseElewiseAddAct(
Node *elewise_add_act_node = CreateFuseElewiseAddActNode( Node *elewise_add_act_node = CreateFuseElewiseAddActNode(
g, act, ele_add, ele_x_n, ele_y_n, ele_out_n, act_out_n); g, act, ele_add, ele_x_n, ele_y_n, ele_out_n, act_out_n);
VLOG(40) << "\n\t " << ele_x_n << " and " << ele_y_n << " -> " VLOG(4) << "\n\t " << ele_x_n << " and " << ele_y_n << " -> "
<< ele_add->Name() << " -> " << ele_out_n << "\n" << ele_add->Name() << " -> " << ele_out_n << "\n"
<< "\t " << ele_out_n << " -> " << act->Name() << " -> " << "\t " << ele_out_n << " -> " << act->Name() << " -> "
<< act_out_n; << act_out_n;
...@@ -113,7 +113,7 @@ std::unique_ptr<ir::Graph> FuseElewiseAddActPass::FuseActElewiseAdd( ...@@ -113,7 +113,7 @@ std::unique_ptr<ir::Graph> FuseElewiseAddActPass::FuseActElewiseAdd(
auto handler = [&](const GraphPatternDetector::subgraph_t &subgraph, auto handler = [&](const GraphPatternDetector::subgraph_t &subgraph,
Graph *g) { Graph *g) {
VLOG(40) << "handle FuseElewiseAddAct fuse"; VLOG(4) << "handle FuseElewiseAddAct fuse";
GET_IR_NODE_FROM_SUBGRAPH(act_out, act_out, act_elewise_add_pattern); GET_IR_NODE_FROM_SUBGRAPH(act_out, act_out, act_elewise_add_pattern);
GET_IR_NODE_FROM_SUBGRAPH(ele_x, ele_x, act_elewise_add_pattern); GET_IR_NODE_FROM_SUBGRAPH(ele_x, ele_x, act_elewise_add_pattern);
GET_IR_NODE_FROM_SUBGRAPH(ele_out, elewise_add_out, GET_IR_NODE_FROM_SUBGRAPH(ele_out, elewise_add_out,
...@@ -129,7 +129,7 @@ std::unique_ptr<ir::Graph> FuseElewiseAddActPass::FuseActElewiseAdd( ...@@ -129,7 +129,7 @@ std::unique_ptr<ir::Graph> FuseElewiseAddActPass::FuseActElewiseAdd(
Node *elewise_add_act_node = CreateFuseElewiseAddActNode( Node *elewise_add_act_node = CreateFuseElewiseAddActNode(
g, ele_add, act, elewise_add_x_n, act_i_n, act_o_n, elewise_add_out_n); g, ele_add, act, elewise_add_x_n, act_i_n, act_o_n, elewise_add_out_n);
VLOG(40) << "\n\t " << act_i_n << " -> " << act->Name() << " -> " << act_o_n VLOG(4) << "\n\t " << act_i_n << " -> " << act->Name() << " -> " << act_o_n
<< "\n\t " << act_o_n << " and " << elewise_add_x_n << " -> " << "\n\t " << act_o_n << " and " << elewise_add_x_n << " -> "
<< ele_add->Name() << " -> " << elewise_add_out_n; << ele_add->Name() << " -> " << elewise_add_out_n;
...@@ -165,7 +165,7 @@ std::unique_ptr<ir::Graph> FuseElewiseAddActPass::FuseElewiseAddActInplaceGrad( ...@@ -165,7 +165,7 @@ std::unique_ptr<ir::Graph> FuseElewiseAddActPass::FuseElewiseAddActInplaceGrad(
auto handler = [&](const GraphPatternDetector::subgraph_t &subgraph, auto handler = [&](const GraphPatternDetector::subgraph_t &subgraph,
Graph *g) { Graph *g) {
VLOG(40) << "handle FuseElewiseAddActGrad1 fuse"; VLOG(4) << "handle FuseElewiseAddActGrad1 fuse";
GET_IR_NODE_FROM_SUBGRAPH(act_out, act_out, elewise_add_act_grad_pattern); GET_IR_NODE_FROM_SUBGRAPH(act_out, act_out, elewise_add_act_grad_pattern);
GET_IR_NODE_FROM_SUBGRAPH(act_grad, act_grad, elewise_add_act_grad_pattern); GET_IR_NODE_FROM_SUBGRAPH(act_grad, act_grad, elewise_add_act_grad_pattern);
GET_IR_NODE_FROM_SUBGRAPH(d_itermediate_out, d_itermediate_out, GET_IR_NODE_FROM_SUBGRAPH(d_itermediate_out, d_itermediate_out,
...@@ -208,7 +208,7 @@ std::unique_ptr<ir::Graph> FuseElewiseAddActPass::FuseElewiseAddActInplaceGrad( ...@@ -208,7 +208,7 @@ std::unique_ptr<ir::Graph> FuseElewiseAddActPass::FuseElewiseAddActInplaceGrad(
auto fused_node = g->CreateOpNode(&desc); auto fused_node = g->CreateOpNode(&desc);
VLOG(40) << "\n\t " << d_act_out_n << " and " << act_out_n << " -> " VLOG(4) << "\n\t " << d_act_out_n << " and " << act_out_n << " -> "
<< act_grad->Name() << " -> " << d_itermediate_out_n << "\n\t " << act_grad->Name() << " -> " << d_itermediate_out_n << "\n\t "
<< d_itermediate_out_n << " and " << act_out_n << " -> " << d_itermediate_out_n << " and " << act_out_n << " -> "
<< ele_add_grad->Name() << " -> " << d_itermediate_out_n; << ele_add_grad->Name() << " -> " << d_itermediate_out_n;
......
...@@ -90,7 +90,7 @@ Graph::Graph(const ProgramDesc &program) : program_(program) { ...@@ -90,7 +90,7 @@ Graph::Graph(const ProgramDesc &program) : program_(program) {
std::map<std::string, std::vector<ir::Node *>> Graph::InitFromProgram( std::map<std::string, std::vector<ir::Node *>> Graph::InitFromProgram(
const ProgramDesc &program) { const ProgramDesc &program) {
VLOG(30) << "block in program:" << program_.Size(); VLOG(3) << "block in program:" << program_.Size();
std::unordered_map<std::string, VarDesc *> all_vars; std::unordered_map<std::string, VarDesc *> all_vars;
// var nodes for each var name, will have multiple versions in SSA // var nodes for each var name, will have multiple versions in SSA
std::map<std::string, std::vector<ir::Node *>> var_nodes; std::map<std::string, std::vector<ir::Node *>> var_nodes;
...@@ -158,7 +158,7 @@ void Graph::ResolveHazard( ...@@ -158,7 +158,7 @@ void Graph::ResolveHazard(
auto it_old = versions.rbegin(); auto it_old = versions.rbegin();
++it_old; ++it_old;
for (; it_old != versions.rend(); it_new = it_old, ++it_old) { for (; it_old != versions.rend(); it_new = it_old, ++it_old) {
VLOG(30) << "deal with var: " << (*it_new)->Name(); VLOG(3) << "deal with var: " << (*it_new)->Name();
ir::Node *write_op = ir::Node *write_op =
(*it_new)->inputs.empty() ? nullptr : (*it_new)->inputs[0]; (*it_new)->inputs.empty() ? nullptr : (*it_new)->inputs[0];
const auto &read_ops = (*it_old)->outputs; const auto &read_ops = (*it_old)->outputs;
......
...@@ -89,7 +89,7 @@ class Graph { ...@@ -89,7 +89,7 @@ class Graph {
attr_name); attr_name);
attrs_[attr_name] = attr; attrs_[attr_name] = attr;
attr_dels_[attr_name] = [attr, attr_name]() { attr_dels_[attr_name] = [attr, attr_name]() {
VLOG(30) << "deleting " << attr_name; VLOG(3) << "deleting " << attr_name;
delete attr; delete attr;
}; };
} }
......
...@@ -40,9 +40,8 @@ void SortHelper( ...@@ -40,9 +40,8 @@ void SortHelper(
} }
} }
VLOG(30) << "topology sort insert: " << node->Name() VLOG(3) << "topology sort insert: " << node->Name()
<< reinterpret_cast<void *>(node) << " input " << reinterpret_cast<void *>(node) << " input " << node->inputs.size();
<< node->inputs.size();
ret->push_back(node); ret->push_back(node);
} }
...@@ -111,7 +110,7 @@ std::map<ir::Node *, std::unordered_set<ir::Node *>> BuildOperationAdjList( ...@@ -111,7 +110,7 @@ std::map<ir::Node *, std::unordered_set<ir::Node *>> BuildOperationAdjList(
for (auto &var : n->inputs) { for (auto &var : n->inputs) {
for (auto &adj_n : var->inputs) { for (auto &adj_n : var->inputs) {
PADDLE_ENFORCE(adj_n->NodeType() == ir::Node::Type::kOperation); PADDLE_ENFORCE(adj_n->NodeType() == ir::Node::Type::kOperation);
VLOG(40) << "adj " << adj_n->Name() << reinterpret_cast<void *>(adj_n) VLOG(4) << "adj " << adj_n->Name() << reinterpret_cast<void *>(adj_n)
<< " -> " << n->Name() << reinterpret_cast<void *>(n) << " -> " << n->Name() << reinterpret_cast<void *>(n)
<< " via " << var->Name() << reinterpret_cast<void *>(var); << " via " << var->Name() << reinterpret_cast<void *>(var);
adj_list[n].insert(adj_n); adj_list[n].insert(adj_n);
......
...@@ -92,19 +92,19 @@ void GraphPatternDetector::operator()(Graph *graph, ...@@ -92,19 +92,19 @@ void GraphPatternDetector::operator()(Graph *graph,
PrettyLogEndl(Style::detail(), "--- detect %d subgraphs", subgraphs.size()); PrettyLogEndl(Style::detail(), "--- detect %d subgraphs", subgraphs.size());
int id = 0; int id = 0;
for (auto &g : subgraphs) { for (auto &g : subgraphs) {
VLOG(30) << "optimizing #" << id++ << " subgraph"; VLOG(3) << "optimizing #" << id++ << " subgraph";
handler(g, graph); handler(g, graph);
} }
} }
bool GraphPatternDetector::MarkPDNodesInGraph(const ir::Graph &graph) { bool GraphPatternDetector::MarkPDNodesInGraph(const ir::Graph &graph) {
VLOG(30) << "mark pdnodes in graph"; VLOG(3) << "mark pdnodes in graph";
if (graph.Nodes().empty()) return false; if (graph.Nodes().empty()) return false;
for (auto &node : GraphTraits::DFS(graph)) { for (auto &node : GraphTraits::DFS(graph)) {
for (const auto &pdnode : pattern_.nodes()) { for (const auto &pdnode : pattern_.nodes()) {
if (pdnode->Tell(&node)) { if (pdnode->Tell(&node)) {
VLOG(40) << "pdnode " << pdnode->name() << " marked"; VLOG(4) << "pdnode " << pdnode->name() << " marked";
pdnodes2nodes_[pdnode.get()].insert(&node); pdnodes2nodes_[pdnode.get()].insert(&node);
} }
} }
...@@ -112,7 +112,7 @@ bool GraphPatternDetector::MarkPDNodesInGraph(const ir::Graph &graph) { ...@@ -112,7 +112,7 @@ bool GraphPatternDetector::MarkPDNodesInGraph(const ir::Graph &graph) {
// Check to early stop if some PDNode can't find matched Node. // Check to early stop if some PDNode can't find matched Node.
for (auto &pdnode : pattern_.nodes()) { for (auto &pdnode : pattern_.nodes()) {
if (!pdnodes2nodes_.count(pdnode.get())) { if (!pdnodes2nodes_.count(pdnode.get())) {
VLOG(40) << pdnode->name() << " can't find matched Node, early stop"; VLOG(4) << pdnode->name() << " can't find matched Node, early stop";
// return false; // return false;
} }
} }
...@@ -121,7 +121,7 @@ bool GraphPatternDetector::MarkPDNodesInGraph(const ir::Graph &graph) { ...@@ -121,7 +121,7 @@ bool GraphPatternDetector::MarkPDNodesInGraph(const ir::Graph &graph) {
GetMarkedNodes(const_cast<Graph *>(&graph)).insert(n); GetMarkedNodes(const_cast<Graph *>(&graph)).insert(n);
} }
} }
VLOG(30) << pdnodes2nodes_.size() << " nodes marked"; VLOG(3) << pdnodes2nodes_.size() << " nodes marked";
return !pdnodes2nodes_.empty(); return !pdnodes2nodes_.empty();
} }
...@@ -215,7 +215,7 @@ GraphPatternDetector::DetectPatterns() { ...@@ -215,7 +215,7 @@ GraphPatternDetector::DetectPatterns() {
// Extend a PDNode to subgraphs by deducing the connection relations defined // Extend a PDNode to subgraphs by deducing the connection relations defined
// in edges of PDNodes. // in edges of PDNodes.
for (const auto &edge : pattern_.edges()) { for (const auto &edge : pattern_.edges()) {
VLOG(40) << "check " << edge.first->name() << " -> " << edge.second->name(); VLOG(4) << "check " << edge.first->name() << " -> " << edge.second->name();
// TODO(Superjomn) Fix bug here, the groups might be duplicate here. // TODO(Superjomn) Fix bug here, the groups might be duplicate here.
// Each role has two PDNodes, which indicates two roles. // Each role has two PDNodes, which indicates two roles.
// Detect two Nodes that can match these two roles and they are connected. // Detect two Nodes that can match these two roles and they are connected.
...@@ -226,7 +226,7 @@ GraphPatternDetector::DetectPatterns() { ...@@ -226,7 +226,7 @@ GraphPatternDetector::DetectPatterns() {
// source -> target // source -> target
for (Node *source : pdnodes2nodes_[edge.first]) { for (Node *source : pdnodes2nodes_[edge.first]) {
for (Node *target : pdnodes2nodes_[edge.second]) { for (Node *target : pdnodes2nodes_[edge.second]) {
VLOG(80) << "check " << source->id() << " -- " << target->id(); VLOG(8) << "check " << source->id() << " -- " << target->id();
// TODO(Superjomn) add some prune strategies. // TODO(Superjomn) add some prune strategies.
for (const auto &group : pre_groups) { for (const auto &group : pre_groups) {
if (IsNodesLink(source, target)) { if (IsNodesLink(source, target)) {
...@@ -243,13 +243,12 @@ GraphPatternDetector::DetectPatterns() { ...@@ -243,13 +243,12 @@ GraphPatternDetector::DetectPatterns() {
} }
} }
} }
VLOG(30) << "step " << step << " get records: " << cur_groups.size(); VLOG(3) << "step " << step << " get records: " << cur_groups.size();
for (auto &group : cur_groups) { for (auto &group : cur_groups) {
for (auto &item : group.roles) { for (auto &item : group.roles) {
VLOG(40) << "node " << item.second->id() << " as " VLOG(4) << "node " << item.second->id() << " as " << item.first->name();
<< item.first->name();
} }
VLOG(40) << "========================================================="; VLOG(4) << "=========================================================";
} }
} }
......
...@@ -41,7 +41,7 @@ std::string FormatName(const Node* node) { ...@@ -41,7 +41,7 @@ std::string FormatName(const Node* node) {
std::unique_ptr<ir::Graph> GraphVizPass::ApplyImpl( std::unique_ptr<ir::Graph> GraphVizPass::ApplyImpl(
std::unique_ptr<ir::Graph> graph) const { std::unique_ptr<ir::Graph> graph) const {
const std::string graph_viz_path = Get<std::string>(kGraphVizPath); const std::string graph_viz_path = Get<std::string>(kGraphVizPath);
VLOG(30) << "draw IR graph viz to " << graph_viz_path; VLOG(3) << "draw IR graph viz to " << graph_viz_path;
std::unique_ptr<std::ostream> fout(new std::ofstream(graph_viz_path)); std::unique_ptr<std::ostream> fout(new std::ofstream(graph_viz_path));
PADDLE_ENFORCE(fout->good()); PADDLE_ENFORCE(fout->good());
std::ostream& sout = *fout; std::ostream& sout = *fout;
......
...@@ -20,7 +20,7 @@ namespace ir { ...@@ -20,7 +20,7 @@ namespace ir {
std::unique_ptr<ir::Graph> MKLDNNPlacementPass::ApplyImpl( std::unique_ptr<ir::Graph> MKLDNNPlacementPass::ApplyImpl(
std::unique_ptr<ir::Graph> graph) const { std::unique_ptr<ir::Graph> graph) const {
VLOG(30) << "Aplies MKL-DNN placement strategy."; VLOG(3) << "Aplies MKL-DNN placement strategy.";
for (const Node* n : graph->Nodes()) { for (const Node* n : graph->Nodes()) {
if (n->IsOp() && n->Op()->HasAttr("use_mkldnn")) { if (n->IsOp() && n->Op()->HasAttr("use_mkldnn")) {
n->Op()->SetAttr("use_mkldnn", true); n->Op()->SetAttr("use_mkldnn", true);
......
...@@ -62,7 +62,7 @@ VarDesc UpdateGradVarDesc( ...@@ -62,7 +62,7 @@ VarDesc UpdateGradVarDesc(
string::Sprintf("%s.repeat.%d", var_desc->Name(), repeat); string::Sprintf("%s.repeat.%d", var_desc->Name(), repeat);
VarDesc repeated_var = CopyVarDesc(var_desc); VarDesc repeated_var = CopyVarDesc(var_desc);
repeated_var.SetName(new_gname); repeated_var.SetName(new_gname);
VLOG(30) << "update " << var_desc->Name() << " to repeat " << repeat; VLOG(3) << "update " << var_desc->Name() << " to repeat " << repeat;
return repeated_var; return repeated_var;
} }
return *var_desc; return *var_desc;
...@@ -78,7 +78,7 @@ std::unique_ptr<Graph> BatchMergePass::ApplyImpl( ...@@ -78,7 +78,7 @@ std::unique_ptr<Graph> BatchMergePass::ApplyImpl(
std::vector<ir::Node*> nodes = TopologySortOperations(*graph); std::vector<ir::Node*> nodes = TopologySortOperations(*graph);
auto origin_nodes = graph->ReleaseNodes(); auto origin_nodes = graph->ReleaseNodes();
VLOG(30) << "origin nodes count: " << origin_nodes.size(); VLOG(3) << "origin nodes count: " << origin_nodes.size();
ir::Graph& result = *graph; ir::Graph& result = *graph;
// 1. record op nodes of different roles // 1. record op nodes of different roles
...@@ -137,7 +137,7 @@ std::unique_ptr<Graph> BatchMergePass::ApplyImpl( ...@@ -137,7 +137,7 @@ std::unique_ptr<Graph> BatchMergePass::ApplyImpl(
"%s.repeat.%d", repeated_op.Input("Variance")[0], i); "%s.repeat.%d", repeated_op.Input("Variance")[0], i);
bn_vars_need_rename.insert(repeated_op.Input("Mean")[0]); bn_vars_need_rename.insert(repeated_op.Input("Mean")[0]);
bn_vars_need_rename.insert(repeated_op.Input("Variance")[0]); bn_vars_need_rename.insert(repeated_op.Input("Variance")[0]);
VLOG(30) << "renaming " << repeated_op.Input("Mean")[0] << " to " VLOG(3) << "renaming " << repeated_op.Input("Mean")[0] << " to "
<< new_mean_name; << new_mean_name;
repeated_op.RenameInput(repeated_op.Input("Mean")[0], new_mean_name); repeated_op.RenameInput(repeated_op.Input("Mean")[0], new_mean_name);
repeated_op.RenameInput(repeated_op.Input("Variance")[0], new_var_name); repeated_op.RenameInput(repeated_op.Input("Variance")[0], new_var_name);
......
...@@ -76,7 +76,7 @@ class Pass { ...@@ -76,7 +76,7 @@ class Pass {
attr_name); attr_name);
attrs_[attr_name] = attr; attrs_[attr_name] = attr;
attr_dels_[attr_name] = [attr, attr_name]() { attr_dels_[attr_name] = [attr, attr_name]() {
VLOG(30) << "deleting " << attr_name; VLOG(3) << "deleting " << attr_name;
delete attr; delete attr;
}; };
} }
......
...@@ -196,7 +196,7 @@ std::unique_ptr<ir::Graph> SeqConcatFcFusePass::ApplyImpl( ...@@ -196,7 +196,7 @@ std::unique_ptr<ir::Graph> SeqConcatFcFusePass::ApplyImpl(
detector(graph.get(), [&](const GraphPatternDetector::subgraph_t& subgraph, detector(graph.get(), [&](const GraphPatternDetector::subgraph_t& subgraph,
Graph* graph) { Graph* graph) {
VLOG(40) << "get one concat pattern"; VLOG(4) << "get one concat pattern";
// fc // fc
GET_NODE(fc_w, detector.pattern()); GET_NODE(fc_w, detector.pattern());
GET_NODE(fc_bias, detector.pattern()); GET_NODE(fc_bias, detector.pattern());
......
...@@ -60,7 +60,7 @@ int BuildFusion(Graph* graph, const std::string& name_scope, Scope* scope) { ...@@ -60,7 +60,7 @@ int BuildFusion(Graph* graph, const std::string& name_scope, Scope* scope) {
auto handler = [&](const GraphPatternDetector::subgraph_t& subgraph, auto handler = [&](const GraphPatternDetector::subgraph_t& subgraph,
Graph* g) { Graph* g) {
VLOG(40) << "handle SeqConv EltAdd Relu fuse"; VLOG(4) << "handle SeqConv EltAdd Relu fuse";
GET_IR_NODE_FROM_SUBGRAPH(seqconv, seqconv, fuse_pattern); GET_IR_NODE_FROM_SUBGRAPH(seqconv, seqconv, fuse_pattern);
GET_IR_NODE_FROM_SUBGRAPH(seqconv_weight, seqconv_weight, fuse_pattern); GET_IR_NODE_FROM_SUBGRAPH(seqconv_weight, seqconv_weight, fuse_pattern);
GET_IR_NODE_FROM_SUBGRAPH(seqconv_out, seqconv_out, fuse_pattern); GET_IR_NODE_FROM_SUBGRAPH(seqconv_out, seqconv_out, fuse_pattern);
......
...@@ -31,7 +31,7 @@ void LoDRankTable::Reset(const LoD& lod, size_t level) { ...@@ -31,7 +31,7 @@ void LoDRankTable::Reset(const LoD& lod, size_t level) {
TableItem item; TableItem item;
item.index = i; item.index = i;
item.length = vec[i + 1] - vec[i]; item.length = vec[i + 1] - vec[i];
VLOG(100) << "Add item to rank table " << item.index << " " << item.length; VLOG(10) << "Add item to rank table " << item.index << " " << item.length;
items_.emplace_back(item); items_.emplace_back(item);
} }
// NOTE(yuyang18): // NOTE(yuyang18):
......
...@@ -51,7 +51,7 @@ TEST(mixed_vector, InitWithCount) { ...@@ -51,7 +51,7 @@ TEST(mixed_vector, InitWithCount) {
TEST(mixed_vector, ForEach) { TEST(mixed_vector, ForEach) {
vec<int> tmp; vec<int> tmp;
for (auto& v : tmp) { for (auto& v : tmp) {
VLOG(30) << v; VLOG(3) << v;
} }
} }
......
...@@ -82,7 +82,7 @@ class CompileTimeInferShapeContext : public InferShapeContext { ...@@ -82,7 +82,7 @@ class CompileTimeInferShapeContext : public InferShapeContext {
auto *in_var = block_.FindVarRecursive(Inputs(in)[i]); auto *in_var = block_.FindVarRecursive(Inputs(in)[i]);
auto *out_var = block_.FindVarRecursive(Outputs(out)[j]); auto *out_var = block_.FindVarRecursive(Outputs(out)[j]);
if (in_var->GetType() != proto::VarType::LOD_TENSOR) { if (in_var->GetType() != proto::VarType::LOD_TENSOR) {
VLOG(30) << "input " << in << " is not LodTensor"; VLOG(3) << "input " << in << " is not LodTensor";
return; return;
} }
out_var->SetLoDLevel(in_var->GetLoDLevel()); out_var->SetLoDLevel(in_var->GetLoDLevel());
...@@ -241,37 +241,37 @@ void OpDesc::SetAttr(const std::string &name, const Attribute &v) { ...@@ -241,37 +241,37 @@ void OpDesc::SetAttr(const std::string &name, const Attribute &v) {
const proto::OpProto::Attr &attr = GetProtoAttr(name); const proto::OpProto::Attr &attr = GetProtoAttr(name);
switch (attr.type()) { switch (attr.type()) {
case proto::AttrType::BOOLEANS: { case proto::AttrType::BOOLEANS: {
VLOG(110) << "SetAttr: " << Type() << ", " << name VLOG(11) << "SetAttr: " << Type() << ", " << name
<< " from INTS to BOOLEANS"; << " from INTS to BOOLEANS";
this->attrs_[name] = std::vector<bool>(); this->attrs_[name] = std::vector<bool>();
break; break;
} }
case proto::AttrType::INTS: { case proto::AttrType::INTS: {
VLOG(110) << "SetAttr: " << Type() << ", " << name VLOG(11) << "SetAttr: " << Type() << ", " << name
<< " from INTS to INTS"; << " from INTS to INTS";
this->attrs_[name] = std::vector<int>(); this->attrs_[name] = std::vector<int>();
break; break;
} }
case proto::AttrType::LONGS: { case proto::AttrType::LONGS: {
VLOG(110) << "SetAttr: " << Type() << ", " << name VLOG(11) << "SetAttr: " << Type() << ", " << name
<< " from LONGS to LONGS"; << " from LONGS to LONGS";
this->attrs_[name] = std::vector<int64_t>(); this->attrs_[name] = std::vector<int64_t>();
break; break;
} }
case proto::AttrType::FLOATS: { case proto::AttrType::FLOATS: {
VLOG(110) << "SetAttr: " << Type() << ", " << name VLOG(11) << "SetAttr: " << Type() << ", " << name
<< " from INTS to FLOATS"; << " from INTS to FLOATS";
this->attrs_[name] = std::vector<float>(); this->attrs_[name] = std::vector<float>();
break; break;
} }
case proto::AttrType::STRINGS: { case proto::AttrType::STRINGS: {
VLOG(110) << "SetAttr: " << Type() << ", " << name VLOG(11) << "SetAttr: " << Type() << ", " << name
<< " from INTS to STRINGS"; << " from INTS to STRINGS";
this->attrs_[name] = std::vector<std::string>(); this->attrs_[name] = std::vector<std::string>();
break; break;
} }
case proto::AttrType::BLOCKS: { case proto::AttrType::BLOCKS: {
VLOG(110) << "SetAttr: " << Type() << ", " << name VLOG(11) << "SetAttr: " << Type() << ", " << name
<< " from INTS to BLOCKS"; << " from INTS to BLOCKS";
this->SetBlocksAttr(name, std::vector<BlockDesc *>()); this->SetBlocksAttr(name, std::vector<BlockDesc *>());
return; return;
...@@ -505,13 +505,13 @@ void OpDesc::CheckAttrs() { ...@@ -505,13 +505,13 @@ void OpDesc::CheckAttrs() {
} }
void OpDesc::InferShape(const BlockDesc &block) const { void OpDesc::InferShape(const BlockDesc &block) const {
VLOG(30) << "CompileTime infer shape on " << Type(); VLOG(3) << "CompileTime infer shape on " << Type();
InitInferShapeFuncs(); InitInferShapeFuncs();
auto &infer_shape = OpInfoMap::Instance().Get(this->Type()).infer_shape_; auto &infer_shape = OpInfoMap::Instance().Get(this->Type()).infer_shape_;
PADDLE_ENFORCE(static_cast<bool>(infer_shape), PADDLE_ENFORCE(static_cast<bool>(infer_shape),
"%s's infer_shape has not been registered", this->Type()); "%s's infer_shape has not been registered", this->Type());
CompileTimeInferShapeContext ctx(*this, block); CompileTimeInferShapeContext ctx(*this, block);
if (VLOG_IS_ON(100)) { if (VLOG_IS_ON(10)) {
std::ostringstream sout; std::ostringstream sout;
auto inames = this->InputArgumentNames(); auto inames = this->InputArgumentNames();
sout << " From ["; sout << " From [";
...@@ -522,7 +522,7 @@ void OpDesc::InferShape(const BlockDesc &block) const { ...@@ -522,7 +522,7 @@ void OpDesc::InferShape(const BlockDesc &block) const {
std::copy(onames.begin(), onames.end(), std::copy(onames.begin(), onames.end(),
std::ostream_iterator<std::string>(sout, ", ")); std::ostream_iterator<std::string>(sout, ", "));
sout << "]"; sout << "]";
VLOG(100) << sout.str(); VLOG(10) << sout.str();
} }
infer_shape(&ctx); infer_shape(&ctx);
} }
...@@ -613,7 +613,7 @@ DDim CompileTimeInferShapeContext::GetDim(const std::string &name) const { ...@@ -613,7 +613,7 @@ DDim CompileTimeInferShapeContext::GetDim(const std::string &name) const {
auto shape = var->GetShape(); auto shape = var->GetShape();
res = shape.empty() ? make_ddim({0UL}) : make_ddim(shape); res = shape.empty() ? make_ddim({0UL}) : make_ddim(shape);
} catch (...) { } catch (...) {
VLOG(50) << "GetDim of variable " << name << " error"; VLOG(5) << "GetDim of variable " << name << " error";
std::rethrow_exception(std::current_exception()); std::rethrow_exception(std::current_exception());
} }
return res; return res;
...@@ -630,7 +630,7 @@ std::vector<DDim> CompileTimeInferShapeContext::GetRepeatedDims( ...@@ -630,7 +630,7 @@ std::vector<DDim> CompileTimeInferShapeContext::GetRepeatedDims(
res.push_back(s.empty() ? make_ddim({0UL}) : make_ddim(s)); res.push_back(s.empty() ? make_ddim({0UL}) : make_ddim(s));
} }
} catch (...) { } catch (...) {
VLOG(50) << "GetRepeatedDim of variable " << name << " error."; VLOG(5) << "GetRepeatedDim of variable " << name << " error.";
std::rethrow_exception(std::current_exception()); std::rethrow_exception(std::current_exception());
} }
return res; return res;
......
...@@ -46,7 +46,7 @@ static VariableNameMap ConvertOpDescVarsToVarNameMap( ...@@ -46,7 +46,7 @@ static VariableNameMap ConvertOpDescVarsToVarNameMap(
std::unique_ptr<OperatorBase> OpRegistry::CreateOp( std::unique_ptr<OperatorBase> OpRegistry::CreateOp(
const proto::OpDesc& op_desc) { const proto::OpDesc& op_desc) {
VLOG(10) << "CreateOp directly from OpDesc is deprecated. It should only be" VLOG(1) << "CreateOp directly from OpDesc is deprecated. It should only be"
"used in unit tests. Use CreateOp(const OpDesc& op_desc) " "used in unit tests. Use CreateOp(const OpDesc& op_desc) "
"instead."; "instead.";
VariableNameMap inputs = ConvertOpDescVarsToVarNameMap(op_desc.inputs()); VariableNameMap inputs = ConvertOpDescVarsToVarNameMap(op_desc.inputs());
......
...@@ -139,7 +139,7 @@ static LoD GetLoD(const Scope& scope, const std::string& name) { ...@@ -139,7 +139,7 @@ static LoD GetLoD(const Scope& scope, const std::string& name) {
} }
void OperatorBase::Run(const Scope& scope, const platform::Place& place) { void OperatorBase::Run(const Scope& scope, const platform::Place& place) {
VLOG(40) << place << " " << DebugStringEx(&scope); VLOG(4) << place << " " << DebugStringEx(&scope);
if (platform::is_gpu_place(place)) { if (platform::is_gpu_place(place)) {
#ifndef PADDLE_WITH_CUDA #ifndef PADDLE_WITH_CUDA
PADDLE_THROW("Cannot run operator on place %s", place); PADDLE_THROW("Cannot run operator on place %s", place);
...@@ -159,7 +159,7 @@ void OperatorBase::Run(const Scope& scope, const platform::Place& place) { ...@@ -159,7 +159,7 @@ void OperatorBase::Run(const Scope& scope, const platform::Place& place) {
} else { } else {
RunImpl(scope, place); RunImpl(scope, place);
} }
VLOG(30) << place << " " << DebugStringEx(&scope); VLOG(3) << place << " " << DebugStringEx(&scope);
} }
bool OperatorBase::HasInputs(const std::string& name) const { bool OperatorBase::HasInputs(const std::string& name) const {
...@@ -716,14 +716,14 @@ void OperatorWithKernel::RunImpl(const Scope& scope, ...@@ -716,14 +716,14 @@ void OperatorWithKernel::RunImpl(const Scope& scope,
auto expected_kernel_key = auto expected_kernel_key =
this->GetExpectedKernelType(ExecutionContext(*this, scope, *dev_ctx)); this->GetExpectedKernelType(ExecutionContext(*this, scope, *dev_ctx));
VLOG(30) << "expected_kernel_key:" << expected_kernel_key; VLOG(3) << "expected_kernel_key:" << expected_kernel_key;
auto kernel_iter = kernels.find(expected_kernel_key); auto kernel_iter = kernels.find(expected_kernel_key);
#ifdef PADDLE_WITH_MKLDNN #ifdef PADDLE_WITH_MKLDNN
// workaround for missing MKLDNN kernel when FLAGS_use_mkldnn env var is set // workaround for missing MKLDNN kernel when FLAGS_use_mkldnn env var is set
if (kernel_iter == kernels.end() && if (kernel_iter == kernels.end() &&
expected_kernel_key.library_type_ == LibraryType::kMKLDNN) { expected_kernel_key.library_type_ == LibraryType::kMKLDNN) {
VLOG(30) << "missing MKLDNN kernel: fallbacking to PLAIN one"; VLOG(3) << "missing MKLDNN kernel: fallbacking to PLAIN one";
expected_kernel_key.library_type_ = LibraryType::kPlain; expected_kernel_key.library_type_ = LibraryType::kPlain;
expected_kernel_key.data_layout_ = DataLayout::kAnyLayout; expected_kernel_key.data_layout_ = DataLayout::kAnyLayout;
kernel_iter = kernels.find(expected_kernel_key); kernel_iter = kernels.find(expected_kernel_key);
...@@ -775,8 +775,7 @@ void OperatorWithKernel::TransferInplaceVarsBack( ...@@ -775,8 +775,7 @@ void OperatorWithKernel::TransferInplaceVarsBack(
const Scope& scope, const std::vector<std::string>& inplace_vars, const Scope& scope, const std::vector<std::string>& inplace_vars,
const Scope& transfer_scope) const { const Scope& transfer_scope) const {
for (auto& var_name : inplace_vars) { for (auto& var_name : inplace_vars) {
VLOG(30) << "share inplace var " + var_name + VLOG(3) << "share inplace var " + var_name + " back to it's original scope";
" back to it's original scope";
auto* original_tensor = auto* original_tensor =
GetMutableLoDTensorOrSelectedRowsValueFromVar(scope.FindVar(var_name)); GetMutableLoDTensorOrSelectedRowsValueFromVar(scope.FindVar(var_name));
auto* var = transfer_scope.FindVar(var_name); auto* var = transfer_scope.FindVar(var_name);
...@@ -817,7 +816,7 @@ Scope* OperatorWithKernel::TryTransferData( ...@@ -817,7 +816,7 @@ Scope* OperatorWithKernel::TryTransferData(
transfered_inplace_vars->emplace_back(var_name); transfered_inplace_vars->emplace_back(var_name);
} }
VLOG(30) << "Transform Variable " << var_name << " from " VLOG(3) << "Transform Variable " << var_name << " from "
<< kernel_type_for_var << " to " << expected_kernel_key; << kernel_type_for_var << " to " << expected_kernel_key;
// In the inference scenerio, the scopes will be reused across the // In the inference scenerio, the scopes will be reused across the
......
...@@ -208,7 +208,7 @@ void ParallelExecutor::BCastParamsToDevices( ...@@ -208,7 +208,7 @@ void ParallelExecutor::BCastParamsToDevices(
auto &main_tensor = main_var->Get<LoDTensor>(); auto &main_tensor = main_var->Get<LoDTensor>();
if (!main_tensor.IsInitialized()) { if (!main_tensor.IsInitialized()) {
VLOG(30) << "one in var not inited, return!"; VLOG(3) << "one in var not inited, return!";
continue; continue;
} }
auto &dims = main_tensor.dims(); auto &dims = main_tensor.dims();
......
...@@ -162,7 +162,7 @@ Variable* Scope::VarInternal(const std::string& name) { ...@@ -162,7 +162,7 @@ Variable* Scope::VarInternal(const std::string& name) {
v = new Variable(); v = new Variable();
vars_[name].reset(v); vars_[name].reset(v);
VLOG(30) << "Create variable " << name; VLOG(3) << "Create variable " << name;
v->name_ = &(vars_.find(name)->first); v->name_ = &(vars_.find(name)->first);
return v; return v;
} }
......
...@@ -206,7 +206,7 @@ void SelectedRows::Get(const framework::Tensor& ids, framework::Tensor* value, ...@@ -206,7 +206,7 @@ void SelectedRows::Get(const framework::Tensor& ids, framework::Tensor* value,
PADDLE_ENFORCE(value->IsInitialized(), PADDLE_ENFORCE(value->IsInitialized(),
"The value tensor should be initialized."); "The value tensor should be initialized.");
if (ids.numel() == 0) { if (ids.numel() == 0) {
VLOG(30) << "keys is empty, please check data!"; VLOG(3) << "keys is empty, please check data!";
} else { } else {
int64_t value_width = value_->numel() / value_->dims()[0]; int64_t value_width = value_->numel() / value_->dims()[0];
PADDLE_ENFORCE_EQ(value_width, value->numel() / value->dims()[0], PADDLE_ENFORCE_EQ(value_width, value->numel() / value->dims()[0],
......
...@@ -22,7 +22,7 @@ namespace framework { ...@@ -22,7 +22,7 @@ namespace framework {
void TensorCopy(const Tensor& src, const platform::Place& dst_place, void TensorCopy(const Tensor& src, const platform::Place& dst_place,
const platform::DeviceContext& ctx, Tensor* dst) { const platform::DeviceContext& ctx, Tensor* dst) {
VLOG(30) << "TensorCopy " << src.dims() << " from " << src.place() << " to " VLOG(3) << "TensorCopy " << src.dims() << " from " << src.place() << " to "
<< dst_place; << dst_place;
src.check_memory_size(); src.check_memory_size();
...@@ -37,7 +37,7 @@ void TensorCopy(const Tensor& src, const platform::Place& dst_place, ...@@ -37,7 +37,7 @@ void TensorCopy(const Tensor& src, const platform::Place& dst_place,
if (platform::is_cpu_place(src_place) && platform::is_cpu_place(dst_place)) { if (platform::is_cpu_place(src_place) && platform::is_cpu_place(dst_place)) {
if (src_ptr == dst_ptr) { if (src_ptr == dst_ptr) {
VLOG(30) << "Skip copy the same data async from " << src_place << " to " VLOG(3) << "Skip copy the same data async from " << src_place << " to "
<< dst_place; << dst_place;
return; return;
} }
...@@ -77,7 +77,7 @@ void TensorCopy(const Tensor& src, const platform::Place& dst_place, ...@@ -77,7 +77,7 @@ void TensorCopy(const Tensor& src, const platform::Place& dst_place,
reinterpret_cast<const platform::CUDADeviceContext&>(ctx).stream(); reinterpret_cast<const platform::CUDADeviceContext&>(ctx).stream();
if (platform::is_same_place(src_place, dst_place)) { if (platform::is_same_place(src_place, dst_place)) {
if (src_ptr == dst_ptr) { if (src_ptr == dst_ptr) {
VLOG(30) << "Skip copy the same data async from " << src_place << " to " VLOG(3) << "Skip copy the same data async from " << src_place << " to "
<< dst_place; << dst_place;
return; return;
} }
...@@ -114,7 +114,7 @@ void TensorCopy(const Tensor& src, const platform::Place& dst_place, ...@@ -114,7 +114,7 @@ void TensorCopy(const Tensor& src, const platform::Place& dst_place,
void TensorCopySync(const Tensor& src, const platform::Place& dst_place, void TensorCopySync(const Tensor& src, const platform::Place& dst_place,
Tensor* dst) { Tensor* dst) {
VLOG(30) << "TensorCopySync " << src.dims() << " from " << src.place() VLOG(3) << "TensorCopySync " << src.dims() << " from " << src.place()
<< " to " << dst_place; << " to " << dst_place;
src.check_memory_size(); src.check_memory_size();
dst->Resize(src.dims()); dst->Resize(src.dims());
...@@ -125,7 +125,7 @@ void TensorCopySync(const Tensor& src, const platform::Place& dst_place, ...@@ -125,7 +125,7 @@ void TensorCopySync(const Tensor& src, const platform::Place& dst_place,
auto size = src.numel() * SizeOfType(src.type()); auto size = src.numel() * SizeOfType(src.type());
if (platform::is_cpu_place(src_place) && platform::is_cpu_place(dst_place)) { if (platform::is_cpu_place(src_place) && platform::is_cpu_place(dst_place)) {
if (src_ptr == dst_ptr) { if (src_ptr == dst_ptr) {
VLOG(30) << "Skip copy the same data from " << src_place << " to " VLOG(3) << "Skip copy the same data from " << src_place << " to "
<< dst_place; << dst_place;
return; return;
} }
...@@ -146,7 +146,7 @@ void TensorCopySync(const Tensor& src, const platform::Place& dst_place, ...@@ -146,7 +146,7 @@ void TensorCopySync(const Tensor& src, const platform::Place& dst_place,
} else if (platform::is_gpu_place(src_place) && } else if (platform::is_gpu_place(src_place) &&
platform::is_gpu_place(dst_place)) { platform::is_gpu_place(dst_place)) {
if (src_ptr == dst_ptr && platform::is_same_place(src_place, dst_place)) { if (src_ptr == dst_ptr && platform::is_same_place(src_place, dst_place)) {
VLOG(30) << "Skip copy the same data from " << src_place << " to " VLOG(3) << "Skip copy the same data from " << src_place << " to "
<< dst_place; << dst_place;
return; return;
} }
......
...@@ -39,7 +39,7 @@ void ThreadPool::Init() { ...@@ -39,7 +39,7 @@ void ThreadPool::Init() {
int num_threads = std::thread::hardware_concurrency(); int num_threads = std::thread::hardware_concurrency();
if (FLAGS_dist_threadpool_size > 0) { if (FLAGS_dist_threadpool_size > 0) {
num_threads = FLAGS_dist_threadpool_size; num_threads = FLAGS_dist_threadpool_size;
VLOG(10) << "set dist_threadpool_size to " << num_threads; VLOG(1) << "set dist_threadpool_size to " << num_threads;
} }
PADDLE_ENFORCE_GT(num_threads, 0); PADDLE_ENFORCE_GT(num_threads, 0);
threadpool_.reset(new ThreadPool(num_threads)); threadpool_.reset(new ThreadPool(num_threads));
......
...@@ -61,7 +61,7 @@ size_t VarDesc::GetTensorDescNum() const { ...@@ -61,7 +61,7 @@ size_t VarDesc::GetTensorDescNum() const {
void VarDesc::SetShapes( void VarDesc::SetShapes(
const std::vector<std::vector<int64_t>> &multiple_dims) { const std::vector<std::vector<int64_t>> &multiple_dims) {
if (multiple_dims.size() != GetTensorDescNum()) { if (multiple_dims.size() != GetTensorDescNum()) {
VLOG(30) << "WARNING: The number of given shapes(" << multiple_dims.size() VLOG(3) << "WARNING: The number of given shapes(" << multiple_dims.size()
<< ") doesn't match the existing tensor number(" << ") doesn't match the existing tensor number("
<< GetTensorDescNum() << GetTensorDescNum()
<< "). The Reader is going to be reinitialized."; << "). The Reader is going to be reinitialized.";
...@@ -94,7 +94,7 @@ void VarDesc::SetDataType(proto::VarType::Type data_type) { ...@@ -94,7 +94,7 @@ void VarDesc::SetDataType(proto::VarType::Type data_type) {
void VarDesc::SetDataTypes( void VarDesc::SetDataTypes(
const std::vector<proto::VarType::Type> &multiple_data_type) { const std::vector<proto::VarType::Type> &multiple_data_type) {
if (multiple_data_type.size() != GetTensorDescNum()) { if (multiple_data_type.size() != GetTensorDescNum()) {
VLOG(30) << "WARNING: The number of given data types(" VLOG(3) << "WARNING: The number of given data types("
<< multiple_data_type.size() << multiple_data_type.size()
<< ") doesn't match the existing tensor number(" << ") doesn't match the existing tensor number("
<< GetTensorDescNum() << GetTensorDescNum()
...@@ -139,7 +139,7 @@ void VarDesc::SetLoDLevel(int32_t lod_level) { ...@@ -139,7 +139,7 @@ void VarDesc::SetLoDLevel(int32_t lod_level) {
void VarDesc::SetLoDLevels(const std::vector<int32_t> &multiple_lod_level) { void VarDesc::SetLoDLevels(const std::vector<int32_t> &multiple_lod_level) {
if (multiple_lod_level.size() != GetTensorDescNum()) { if (multiple_lod_level.size() != GetTensorDescNum()) {
VLOG(30) << "WARNING: The number of given lod_levels(" VLOG(3) << "WARNING: The number of given lod_levels("
<< multiple_lod_level.size() << multiple_lod_level.size()
<< ") doesn't match the existing tensor number(" << ") doesn't match the existing tensor number("
<< GetTensorDescNum() << GetTensorDescNum()
......
...@@ -55,7 +55,7 @@ bool IsPersistable(const framework::VarDesc *var) { ...@@ -55,7 +55,7 @@ bool IsPersistable(const framework::VarDesc *var) {
bool AnalysisPredictor::Init( bool AnalysisPredictor::Init(
const std::shared_ptr<framework::Scope> &parent_scope, const std::shared_ptr<framework::Scope> &parent_scope,
const std::shared_ptr<framework::ProgramDesc> &program) { const std::shared_ptr<framework::ProgramDesc> &program) {
VLOG(30) << "Predictor::init()"; VLOG(3) << "Predictor::init()";
if (FLAGS_profile) { if (FLAGS_profile) {
LOG(WARNING) << "Profiler is actived, might affect the performance"; LOG(WARNING) << "Profiler is actived, might affect the performance";
LOG(INFO) << "You can turn off by set gflags '-profile false'"; LOG(INFO) << "You can turn off by set gflags '-profile false'";
...@@ -169,7 +169,7 @@ void AnalysisPredictor::SetMkldnnThreadID(int tid) { ...@@ -169,7 +169,7 @@ void AnalysisPredictor::SetMkldnnThreadID(int tid) {
bool AnalysisPredictor::Run(const std::vector<PaddleTensor> &inputs, bool AnalysisPredictor::Run(const std::vector<PaddleTensor> &inputs,
std::vector<PaddleTensor> *output_data, std::vector<PaddleTensor> *output_data,
int batch_size) { int batch_size) {
VLOG(30) << "Predictor::predict"; VLOG(3) << "Predictor::predict";
inference::Timer timer; inference::Timer timer;
timer.tic(); timer.tic();
// set feed variable // set feed variable
...@@ -188,7 +188,7 @@ bool AnalysisPredictor::Run(const std::vector<PaddleTensor> &inputs, ...@@ -188,7 +188,7 @@ bool AnalysisPredictor::Run(const std::vector<PaddleTensor> &inputs,
LOG(ERROR) << "fail to get fetches"; LOG(ERROR) << "fail to get fetches";
return false; return false;
} }
VLOG(30) << "predict cost: " << timer.toc() << "ms"; VLOG(3) << "predict cost: " << timer.toc() << "ms";
// Fix TensorArray reuse not cleaned bug. // Fix TensorArray reuse not cleaned bug.
tensor_array_batch_cleaner_.CollectTensorArrays(scope_.get()); tensor_array_batch_cleaner_.CollectTensorArrays(scope_.get());
...@@ -198,7 +198,7 @@ bool AnalysisPredictor::Run(const std::vector<PaddleTensor> &inputs, ...@@ -198,7 +198,7 @@ bool AnalysisPredictor::Run(const std::vector<PaddleTensor> &inputs,
bool AnalysisPredictor::SetFeed(const std::vector<PaddleTensor> &inputs, bool AnalysisPredictor::SetFeed(const std::vector<PaddleTensor> &inputs,
framework::Scope *scope) { framework::Scope *scope) {
VLOG(30) << "Predictor::set_feed"; VLOG(3) << "Predictor::set_feed";
if (inputs.size() != feeds_.size()) { if (inputs.size() != feeds_.size()) {
LOG(ERROR) << "wrong feed input size, need " << feeds_.size() << " but get " LOG(ERROR) << "wrong feed input size, need " << feeds_.size() << " but get "
<< inputs.size(); << inputs.size();
...@@ -275,7 +275,7 @@ void AnalysisPredictor::GetFetchOne(const framework::LoDTensor &fetch, ...@@ -275,7 +275,7 @@ void AnalysisPredictor::GetFetchOne(const framework::LoDTensor &fetch,
bool AnalysisPredictor::GetFetch(std::vector<PaddleTensor> *outputs, bool AnalysisPredictor::GetFetch(std::vector<PaddleTensor> *outputs,
framework::Scope *scope) { framework::Scope *scope) {
VLOG(30) << "Predictor::get_fetch"; VLOG(3) << "Predictor::get_fetch";
outputs->resize(fetchs_.size()); outputs->resize(fetchs_.size());
for (size_t i = 0; i < fetchs_.size(); ++i) { for (size_t i = 0; i < fetchs_.size(); ++i) {
int idx = boost::get<int>(fetchs_[i]->GetAttr("col")); int idx = boost::get<int>(fetchs_[i]->GetAttr("col"));
...@@ -339,7 +339,7 @@ void AnalysisPredictor::OptimizeInferenceProgram() { ...@@ -339,7 +339,7 @@ void AnalysisPredictor::OptimizeInferenceProgram() {
template <> template <>
std::unique_ptr<PaddlePredictor> CreatePaddlePredictor< std::unique_ptr<PaddlePredictor> CreatePaddlePredictor<
AnalysisConfig, PaddleEngineKind::kAnalysis>(const AnalysisConfig &config) { AnalysisConfig, PaddleEngineKind::kAnalysis>(const AnalysisConfig &config) {
VLOG(30) << "create AnalysisConfig"; VLOG(3) << "create AnalysisConfig";
if (config.use_gpu) { if (config.use_gpu) {
// 1. GPU memeroy // 1. GPU memeroy
PADDLE_ENFORCE_GT( PADDLE_ENFORCE_GT(
...@@ -353,7 +353,7 @@ std::unique_ptr<PaddlePredictor> CreatePaddlePredictor< ...@@ -353,7 +353,7 @@ std::unique_ptr<PaddlePredictor> CreatePaddlePredictor<
std::string flag = "--fraction_of_gpu_memory_to_use=" + std::string flag = "--fraction_of_gpu_memory_to_use=" +
std::to_string(config.fraction_of_gpu_memory); std::to_string(config.fraction_of_gpu_memory);
flags.push_back(flag); flags.push_back(flag);
VLOG(30) << "set flag: " << flag; VLOG(3) << "set flag: " << flag;
framework::InitGflags(flags); framework::InitGflags(flags);
} }
} }
......
...@@ -152,7 +152,7 @@ bool NativePaddlePredictor::Run(const std::vector<PaddleTensor> &inputs, ...@@ -152,7 +152,7 @@ bool NativePaddlePredictor::Run(const std::vector<PaddleTensor> &inputs,
LOG(ERROR) << "fail to get fetches"; LOG(ERROR) << "fail to get fetches";
return false; return false;
} }
VLOG(30) << "predict cost: " << timer.toc() << "ms"; VLOG(3) << "predict cost: " << timer.toc() << "ms";
// Fix TensorArray reuse not cleaned bug. // Fix TensorArray reuse not cleaned bug.
tensor_array_batch_cleaner_.CollectTensorArrays(scope_.get()); tensor_array_batch_cleaner_.CollectTensorArrays(scope_.get());
......
...@@ -44,7 +44,7 @@ void Main() { ...@@ -44,7 +44,7 @@ void Main() {
config.fraction_of_gpu_memory = 0.1; // set by yourself config.fraction_of_gpu_memory = 0.1; // set by yourself
predictor = CreatePaddlePredictor(config); predictor = CreatePaddlePredictor(config);
VLOG(30) << "begin to process data"; VLOG(3) << "begin to process data";
// Just a single batch of data. // Just a single batch of data.
std::string line; std::string line;
std::ifstream file(FLAGS_data); std::ifstream file(FLAGS_data);
...@@ -59,13 +59,13 @@ void Main() { ...@@ -59,13 +59,13 @@ void Main() {
PaddleBuf(record.data.data(), record.data.size() * sizeof(float)); PaddleBuf(record.data.data(), record.data.size() * sizeof(float));
input.dtype = PaddleDType::FLOAT32; input.dtype = PaddleDType::FLOAT32;
VLOG(30) << "run executor"; VLOG(3) << "run executor";
std::vector<PaddleTensor> output; std::vector<PaddleTensor> output;
predictor->Run({input}, &output, 1); predictor->Run({input}, &output, 1);
VLOG(30) << "output.size " << output.size(); VLOG(3) << "output.size " << output.size();
auto& tensor = output.front(); auto& tensor = output.front();
VLOG(30) << "output: " << SummaryTensor(tensor); VLOG(3) << "output: " << SummaryTensor(tensor);
// compare with reference result // compare with reference result
CheckOutput(FLAGS_refer, tensor); CheckOutput(FLAGS_refer, tensor);
......
...@@ -47,7 +47,7 @@ static void split(const std::string& str, char sep, ...@@ -47,7 +47,7 @@ static void split(const std::string& str, char sep,
} }
Record ProcessALine(const std::string& line) { Record ProcessALine(const std::string& line) {
VLOG(30) << "process a line"; VLOG(3) << "process a line";
std::vector<std::string> columns; std::vector<std::string> columns;
split(line, '\t', &columns); split(line, '\t', &columns);
CHECK_EQ(columns.size(), 2UL) CHECK_EQ(columns.size(), 2UL)
...@@ -65,8 +65,8 @@ Record ProcessALine(const std::string& line) { ...@@ -65,8 +65,8 @@ Record ProcessALine(const std::string& line) {
for (auto& s : shape_strs) { for (auto& s : shape_strs) {
record.shape.push_back(std::stoi(s)); record.shape.push_back(std::stoi(s));
} }
VLOG(30) << "data size " << record.data.size(); VLOG(3) << "data size " << record.data.size();
VLOG(30) << "data shape size " << record.shape.size(); VLOG(3) << "data shape size " << record.shape.size();
return record; return record;
} }
...@@ -78,8 +78,8 @@ void CheckOutput(const std::string& referfile, const PaddleTensor& output) { ...@@ -78,8 +78,8 @@ void CheckOutput(const std::string& referfile, const PaddleTensor& output) {
file.close(); file.close();
size_t numel = output.data.length() / PaddleDtypeSize(output.dtype); size_t numel = output.data.length() / PaddleDtypeSize(output.dtype);
VLOG(30) << "predictor output numel " << numel; VLOG(3) << "predictor output numel " << numel;
VLOG(30) << "reference output numel " << refer.data.size(); VLOG(3) << "reference output numel " << refer.data.size();
CHECK_EQ(numel, refer.data.size()); CHECK_EQ(numel, refer.data.size());
switch (output.dtype) { switch (output.dtype) {
case PaddleDType::INT64: { case PaddleDType::INT64: {
......
...@@ -26,7 +26,7 @@ void TensorArrayBatchCleaner::CollectTensorArrays(framework::Scope *scope) { ...@@ -26,7 +26,7 @@ void TensorArrayBatchCleaner::CollectTensorArrays(framework::Scope *scope) {
// parameter. // parameter.
if (var_name == "feed" || var_name == "fetch") continue; if (var_name == "feed" || var_name == "fetch") continue;
if (var->Type() == typeid(framework::LoDTensorArray)) { if (var->Type() == typeid(framework::LoDTensorArray)) {
VLOG(40) << "collect " << var_name; VLOG(4) << "collect " << var_name;
arrays_.push_back(var->GetMutable<framework::LoDTensorArray>()); arrays_.push_back(var->GetMutable<framework::LoDTensorArray>());
} }
} }
...@@ -34,7 +34,7 @@ void TensorArrayBatchCleaner::CollectTensorArrays(framework::Scope *scope) { ...@@ -34,7 +34,7 @@ void TensorArrayBatchCleaner::CollectTensorArrays(framework::Scope *scope) {
CollectTensorArrays(kid); CollectTensorArrays(kid);
} }
VLOG(30) << "Collect " << arrays_.size() << " arrays"; VLOG(3) << "Collect " << arrays_.size() << " arrays";
flag_ = false; flag_ = false;
} }
} }
......
...@@ -78,7 +78,7 @@ void LoadPersistables(framework::Executor* executor, framework::Scope* scope, ...@@ -78,7 +78,7 @@ void LoadPersistables(framework::Executor* executor, framework::Scope* scope,
for (auto* var : global_block.AllVars()) { for (auto* var : global_block.AllVars()) {
if (IsPersistable(var)) { if (IsPersistable(var)) {
VLOG(30) << "persistable variable's name: " << var->Name(); VLOG(3) << "persistable variable's name: " << var->Name();
framework::VarDesc* new_var = load_block->Var(var->Name()); framework::VarDesc* new_var = load_block->Var(var->Name());
new_var->SetShape(var->GetShape()); new_var->SetShape(var->GetShape());
...@@ -121,7 +121,7 @@ std::unique_ptr<framework::ProgramDesc> Load(framework::Executor* executor, ...@@ -121,7 +121,7 @@ std::unique_ptr<framework::ProgramDesc> Load(framework::Executor* executor,
const std::string& dirname) { const std::string& dirname) {
std::string model_filename = dirname + "/__model__"; std::string model_filename = dirname + "/__model__";
std::string program_desc_str; std::string program_desc_str;
VLOG(30) << "loading model from " << model_filename; VLOG(3) << "loading model from " << model_filename;
ReadBinaryFile(model_filename, &program_desc_str); ReadBinaryFile(model_filename, &program_desc_str);
std::unique_ptr<framework::ProgramDesc> main_program( std::unique_ptr<framework::ProgramDesc> main_program(
......
...@@ -53,7 +53,7 @@ class Pool2dOpConverter : public OpConverter { ...@@ -53,7 +53,7 @@ class Pool2dOpConverter : public OpConverter {
public: public:
void operator()(const framework::proto::OpDesc &op, void operator()(const framework::proto::OpDesc &op,
const framework::Scope &scope, bool test_mode) override { const framework::Scope &scope, bool test_mode) override {
VLOG(40) VLOG(4)
<< "convert a fluid pool2d op to tensorrt pool2d layer without bias"; << "convert a fluid pool2d op to tensorrt pool2d layer without bias";
framework::OpDesc op_desc(op, nullptr); framework::OpDesc op_desc(op, nullptr);
// Declare inputs // Declare inputs
......
...@@ -27,7 +27,7 @@ struct Record { ...@@ -27,7 +27,7 @@ struct Record {
}; };
Record ProcessALine(const std::string &line) { Record ProcessALine(const std::string &line) {
VLOG(30) << "process a line"; VLOG(3) << "process a line";
std::vector<std::string> columns; std::vector<std::string> columns;
split(line, '\t', &columns); split(line, '\t', &columns);
CHECK_EQ(columns.size(), 2UL) CHECK_EQ(columns.size(), 2UL)
...@@ -45,8 +45,8 @@ Record ProcessALine(const std::string &line) { ...@@ -45,8 +45,8 @@ Record ProcessALine(const std::string &line) {
for (auto &s : shape_strs) { for (auto &s : shape_strs) {
record.shape.push_back(std::stoi(s)); record.shape.push_back(std::stoi(s));
} }
VLOG(30) << "data size " << record.data.size(); VLOG(3) << "data size " << record.data.size();
VLOG(30) << "data shape size " << record.shape.size(); VLOG(3) << "data shape size " << record.shape.size();
return record; return record;
} }
......
...@@ -76,11 +76,11 @@ class ChunkedAllocator : public Allocator { ...@@ -76,11 +76,11 @@ class ChunkedAllocator : public Allocator {
default_allocator_ = raw_allocator_; default_allocator_ = raw_allocator_;
} else { } else {
if (capacity == 1) { if (capacity == 1) {
VLOG(10) << "Create BestFitAllocator with chunk_size " VLOG(1) << "Create BestFitAllocator with chunk_size "
<< max_chunk_size_; << max_chunk_size_;
default_allocator_ = CreateAllocatorWithChunk(); default_allocator_ = CreateAllocatorWithChunk();
} else { } else {
VLOG(10) << "Create AutoIncrementAllocator with chunk_size " VLOG(1) << "Create AutoIncrementAllocator with chunk_size "
<< max_chunk_size_ << " and capacity " << capacity; << max_chunk_size_ << " and capacity " << capacity;
default_allocator_ = std::make_shared<AutoIncrementAllocator>( default_allocator_ = std::make_shared<AutoIncrementAllocator>(
[this] { return std::move(CreateAllocatorWithChunk()); }, capacity); [this] { return std::move(CreateAllocatorWithChunk()); }, capacity);
......
...@@ -86,18 +86,18 @@ struct NaiveAllocator { ...@@ -86,18 +86,18 @@ struct NaiveAllocator {
template <> template <>
void *Alloc<platform::CPUPlace>(const platform::CPUPlace &place, size_t size) { void *Alloc<platform::CPUPlace>(const platform::CPUPlace &place, size_t size) {
VLOG(10) << "Allocate " << size << " bytes on " << platform::Place(place); VLOG(1) << "Allocate " << size << " bytes on " << platform::Place(place);
void *p = GetCPUBuddyAllocator()->Alloc(size); void *p = GetCPUBuddyAllocator()->Alloc(size);
if (FLAGS_init_allocated_mem) { if (FLAGS_init_allocated_mem) {
memset(p, 0xEF, size); memset(p, 0xEF, size);
} }
VLOG(100) << " pointer=" << p; VLOG(10) << " pointer=" << p;
return p; return p;
} }
template <> template <>
void Free<platform::CPUPlace>(const platform::CPUPlace &place, void *p) { void Free<platform::CPUPlace>(const platform::CPUPlace &place, void *p) {
VLOG(10) << "Free pointer=" << p << " on " << platform::Place(place); VLOG(1) << "Free pointer=" << p << " on " << platform::Place(place);
GetCPUBuddyAllocator()->Free(p); GetCPUBuddyAllocator()->Free(p);
} }
...@@ -124,7 +124,7 @@ BuddyAllocator *GetGPUBuddyAllocator(int gpu_id) { ...@@ -124,7 +124,7 @@ BuddyAllocator *GetGPUBuddyAllocator(int gpu_id) {
std::unique_ptr<detail::SystemAllocator>(new detail::GPUAllocator(i)), std::unique_ptr<detail::SystemAllocator>(new detail::GPUAllocator(i)),
platform::GpuMinChunkSize(), platform::GpuMaxChunkSize()); platform::GpuMinChunkSize(), platform::GpuMaxChunkSize());
VLOG(100) << "\n\nNOTE: each GPU device use " VLOG(10) << "\n\nNOTE: each GPU device use "
<< FLAGS_fraction_of_gpu_memory_to_use * 100 << FLAGS_fraction_of_gpu_memory_to_use * 100
<< "% of GPU memory.\n" << "% of GPU memory.\n"
<< "You can set GFlags environment variable '" << "You can set GFlags environment variable '"
......
...@@ -32,11 +32,11 @@ BuddyAllocator::BuddyAllocator( ...@@ -32,11 +32,11 @@ BuddyAllocator::BuddyAllocator(
system_allocator_(std::move(system_allocator)) {} system_allocator_(std::move(system_allocator)) {}
BuddyAllocator::~BuddyAllocator() { BuddyAllocator::~BuddyAllocator() {
VLOG(100) << "BuddyAllocator Disconstructor makes sure that all of these " VLOG(10) << "BuddyAllocator Disconstructor makes sure that all of these "
"have actually been freed"; "have actually been freed";
while (!pool_.empty()) { while (!pool_.empty()) {
auto block = static_cast<MemoryBlock*>(std::get<2>(*pool_.begin())); auto block = static_cast<MemoryBlock*>(std::get<2>(*pool_.begin()));
VLOG(100) << "Free from block (" << block << ", " << max_chunk_size_ << ")"; VLOG(10) << "Free from block (" << block << ", " << max_chunk_size_ << ")";
system_allocator_->Free(block, max_chunk_size_, block->index(cache_)); system_allocator_->Free(block, max_chunk_size_, block->index(cache_));
cache_.invalidate(block); cache_.invalidate(block);
...@@ -57,12 +57,12 @@ void* BuddyAllocator::Alloc(size_t unaligned_size) { ...@@ -57,12 +57,12 @@ void* BuddyAllocator::Alloc(size_t unaligned_size) {
// acquire the allocator lock // acquire the allocator lock
std::lock_guard<std::mutex> lock(mutex_); std::lock_guard<std::mutex> lock(mutex_);
VLOG(100) << "Allocate " << unaligned_size << " bytes from chunk size " VLOG(10) << "Allocate " << unaligned_size << " bytes from chunk size "
<< size; << size;
// if the allocation is huge, send directly to the system allocator // if the allocation is huge, send directly to the system allocator
if (size > max_chunk_size_) { if (size > max_chunk_size_) {
VLOG(100) << "Allocate from system allocator."; VLOG(10) << "Allocate from system allocator.";
return SystemAlloc(size); return SystemAlloc(size);
} }
...@@ -77,7 +77,7 @@ void* BuddyAllocator::Alloc(size_t unaligned_size) { ...@@ -77,7 +77,7 @@ void* BuddyAllocator::Alloc(size_t unaligned_size) {
return nullptr; return nullptr;
} }
} else { } else {
VLOG(100) << "Allocation from existing memory block " << std::get<2>(*it) VLOG(10) << "Allocation from existing memory block " << std::get<2>(*it)
<< " at address " << " at address "
<< reinterpret_cast<MemoryBlock*>(std::get<2>(*it))->data(); << reinterpret_cast<MemoryBlock*>(std::get<2>(*it))->data();
} }
...@@ -96,10 +96,10 @@ void BuddyAllocator::Free(void* p) { ...@@ -96,10 +96,10 @@ void BuddyAllocator::Free(void* p) {
// Acquire the allocator lock // Acquire the allocator lock
std::lock_guard<std::mutex> lock(mutex_); std::lock_guard<std::mutex> lock(mutex_);
VLOG(100) << "Free from address " << block; VLOG(10) << "Free from address " << block;
if (block->type(cache_) == MemoryBlock::HUGE_CHUNK) { if (block->type(cache_) == MemoryBlock::HUGE_CHUNK) {
VLOG(100) << "Free directly from system allocator"; VLOG(10) << "Free directly from system allocator";
system_allocator_->Free(block, block->total_size(cache_), system_allocator_->Free(block, block->total_size(cache_),
block->index(cache_)); block->index(cache_));
...@@ -116,7 +116,7 @@ void BuddyAllocator::Free(void* p) { ...@@ -116,7 +116,7 @@ void BuddyAllocator::Free(void* p) {
// Trying to merge the right buddy // Trying to merge the right buddy
if (block->has_right_buddy(cache_)) { if (block->has_right_buddy(cache_)) {
VLOG(100) << "Merging this block " << block << " with its right buddy " VLOG(10) << "Merging this block " << block << " with its right buddy "
<< block->right_buddy(cache_); << block->right_buddy(cache_);
auto right_buddy = block->right_buddy(cache_); auto right_buddy = block->right_buddy(cache_);
...@@ -134,7 +134,7 @@ void BuddyAllocator::Free(void* p) { ...@@ -134,7 +134,7 @@ void BuddyAllocator::Free(void* p) {
// Trying to merge the left buddy // Trying to merge the left buddy
if (block->has_left_buddy(cache_)) { if (block->has_left_buddy(cache_)) {
VLOG(100) << "Merging this block " << block << " with its left buddy " VLOG(10) << "Merging this block " << block << " with its left buddy "
<< block->left_buddy(cache_); << block->left_buddy(cache_);
auto left_buddy = block->left_buddy(cache_); auto left_buddy = block->left_buddy(cache_);
...@@ -151,7 +151,7 @@ void BuddyAllocator::Free(void* p) { ...@@ -151,7 +151,7 @@ void BuddyAllocator::Free(void* p) {
} }
// Dumping this block into pool // Dumping this block into pool
VLOG(100) << "Inserting free block (" << block << ", " VLOG(10) << "Inserting free block (" << block << ", "
<< block->total_size(cache_) << ")"; << block->total_size(cache_) << ")";
pool_.insert( pool_.insert(
IndexSizeAddress(block->index(cache_), block->total_size(cache_), block)); IndexSizeAddress(block->index(cache_), block->total_size(cache_), block));
...@@ -174,7 +174,7 @@ void* BuddyAllocator::SystemAlloc(size_t size) { ...@@ -174,7 +174,7 @@ void* BuddyAllocator::SystemAlloc(size_t size) {
size_t index = 0; size_t index = 0;
void* p = system_allocator_->Alloc(&index, size); void* p = system_allocator_->Alloc(&index, size);
VLOG(100) << "Allocated " << p << " from system allocator."; VLOG(10) << "Allocated " << p << " from system allocator.";
if (p == nullptr) return nullptr; if (p == nullptr) return nullptr;
...@@ -200,7 +200,7 @@ BuddyAllocator::PoolSet::iterator BuddyAllocator::RefillPool() { ...@@ -200,7 +200,7 @@ BuddyAllocator::PoolSet::iterator BuddyAllocator::RefillPool() {
if (p == nullptr) return pool_.end(); if (p == nullptr) return pool_.end();
VLOG(100) << "Creating and inserting new block " << p VLOG(10) << "Creating and inserting new block " << p
<< " from system allocator"; << " from system allocator";
static_cast<MemoryBlock*>(p)->init(&cache_, MemoryBlock::FREE_CHUNK, index, static_cast<MemoryBlock*>(p)->init(&cache_, MemoryBlock::FREE_CHUNK, index,
...@@ -245,18 +245,18 @@ void* BuddyAllocator::SplitToAlloc(BuddyAllocator::PoolSet::iterator it, ...@@ -245,18 +245,18 @@ void* BuddyAllocator::SplitToAlloc(BuddyAllocator::PoolSet::iterator it,
auto block = static_cast<MemoryBlock*>(std::get<2>(*it)); auto block = static_cast<MemoryBlock*>(std::get<2>(*it));
pool_.erase(it); pool_.erase(it);
VLOG(100) << "Split block (" << block << ", " << block->total_size(cache_) VLOG(10) << "Split block (" << block << ", " << block->total_size(cache_)
<< ") into"; << ") into";
block->split(&cache_, size); block->split(&cache_, size);
VLOG(100) << "Left block (" << block << ", " << block->total_size(cache_) VLOG(10) << "Left block (" << block << ", " << block->total_size(cache_)
<< ")"; << ")";
block->set_type(&cache_, MemoryBlock::ARENA_CHUNK); block->set_type(&cache_, MemoryBlock::ARENA_CHUNK);
// the rest of memory if exist // the rest of memory if exist
if (block->has_right_buddy(cache_)) { if (block->has_right_buddy(cache_)) {
if (block->right_buddy(cache_)->type(cache_) == MemoryBlock::FREE_CHUNK) { if (block->right_buddy(cache_)->type(cache_) == MemoryBlock::FREE_CHUNK) {
VLOG(100) << "Insert right block (" << block->right_buddy(cache_) << ", " VLOG(10) << "Insert right block (" << block->right_buddy(cache_) << ", "
<< block->right_buddy(cache_)->total_size(cache_) << ")"; << block->right_buddy(cache_)->total_size(cache_) << ")";
pool_.insert( pool_.insert(
...@@ -284,7 +284,7 @@ void BuddyAllocator::CleanIdleFallBackAlloc() { ...@@ -284,7 +284,7 @@ void BuddyAllocator::CleanIdleFallBackAlloc() {
return; return;
} }
VLOG(100) << "Return block " << block << " to fallback allocator."; VLOG(10) << "Return block " << block << " to fallback allocator.";
system_allocator_->Free(block, max_chunk_size_, block->index(cache_)); system_allocator_->Free(block, max_chunk_size_, block->index(cache_));
cache_.invalidate(block); cache_.invalidate(block);
...@@ -320,7 +320,7 @@ void BuddyAllocator::CleanIdleNormalAlloc() { ...@@ -320,7 +320,7 @@ void BuddyAllocator::CleanIdleNormalAlloc() {
MemoryBlock* block = static_cast<MemoryBlock*>(std::get<2>(*pool)); MemoryBlock* block = static_cast<MemoryBlock*>(std::get<2>(*pool));
VLOG(100) << "Return block " << block << " to base allocator."; VLOG(10) << "Return block " << block << " to base allocator.";
system_allocator_->Free(block, max_chunk_size_, block->index(cache_)); system_allocator_->Free(block, max_chunk_size_, block->index(cache_));
cache_.invalidate(block); cache_.invalidate(block);
......
...@@ -29,7 +29,7 @@ MemoryBlock::Desc MetadataCache::load(const MemoryBlock* block) const { ...@@ -29,7 +29,7 @@ MemoryBlock::Desc MetadataCache::load(const MemoryBlock* block) const {
return existing_desc->second; return existing_desc->second;
} else { } else {
auto* desc = reinterpret_cast<const MemoryBlock::Desc*>(block); auto* desc = reinterpret_cast<const MemoryBlock::Desc*>(block);
VLOG(100) << "Load MemoryBlock::Desc type=" << desc->type; VLOG(10) << "Load MemoryBlock::Desc type=" << desc->type;
PADDLE_ASSERT(desc->check_guards()); PADDLE_ASSERT(desc->check_guards());
return *reinterpret_cast<const MemoryBlock::Desc*>(block); return *reinterpret_cast<const MemoryBlock::Desc*>(block);
} }
......
...@@ -100,7 +100,7 @@ class ActivationGradKernel ...@@ -100,7 +100,7 @@ class ActivationGradKernel
auto x = framework::EigenVector<T>::Flatten(*X); auto x = framework::EigenVector<T>::Flatten(*X);
functor(*place, x, out, dout, dx); functor(*place, x, out, dout, dx);
} else { } else {
VLOG(100) << " Inplace activation "; VLOG(10) << " Inplace activation ";
auto x = framework::EigenVector<T>::Flatten(*dX); auto x = framework::EigenVector<T>::Flatten(*dX);
functor(*place, x, out, dout, dx); functor(*place, x, out, dout, dx);
} }
......
...@@ -49,7 +49,7 @@ class ArrayOp : public framework::OperatorBase { ...@@ -49,7 +49,7 @@ class ArrayOp : public framework::OperatorBase {
} else { } else {
offset = static_cast<size_t>(*i_tensor.data<int64_t>()); offset = static_cast<size_t>(*i_tensor.data<int64_t>());
} }
VLOG(100) << " Offset = " << offset; VLOG(10) << " Offset = " << offset;
return offset; return offset;
} }
}; };
......
...@@ -148,7 +148,7 @@ class ArrayToLoDTensorOp : public framework::OperatorBase { ...@@ -148,7 +148,7 @@ class ArrayToLoDTensorOp : public framework::OperatorBase {
size_t start_offset = lod_and_offset.second.first; size_t start_offset = lod_and_offset.second.first;
size_t end_offset = lod_and_offset.second.second; size_t end_offset = lod_and_offset.second.second;
VLOG(100) << "idx=" << idx << " x_idx=" << x_idx << " [" VLOG(10) << "idx=" << idx << " x_idx=" << x_idx << " ["
<< ", " << end_offset << "]"; << ", " << end_offset << "]";
// Copy data // Copy data
PADDLE_ENFORCE_GE(end_offset, start_offset); PADDLE_ENFORCE_GE(end_offset, start_offset);
......
...@@ -96,7 +96,7 @@ class BatchNormKernel<platform::CUDADeviceContext, T> ...@@ -96,7 +96,7 @@ class BatchNormKernel<platform::CUDADeviceContext, T>
mode_ = CUDNN_BATCHNORM_SPATIAL; mode_ = CUDNN_BATCHNORM_SPATIAL;
#endif #endif
VLOG(30) << "Setting descriptors."; VLOG(3) << "Setting descriptors.";
std::vector<int> dims; std::vector<int> dims;
std::vector<int> strides; std::vector<int> strides;
if (data_layout == DataLayout::kNCHW) { if (data_layout == DataLayout::kNCHW) {
......
...@@ -33,11 +33,11 @@ void BeamSearch::operator()(const framework::LoDTensor &pre_ids, ...@@ -33,11 +33,11 @@ void BeamSearch::operator()(const framework::LoDTensor &pre_ids,
auto items = SelectTopBeamSizeItems(pre_ids, pre_scores); auto items = SelectTopBeamSizeItems(pre_ids, pre_scores);
auto selected_items = ToMap(items, high_level.back()); auto selected_items = ToMap(items, high_level.back());
VLOG(30) << "selected_items:"; VLOG(3) << "selected_items:";
for (size_t i = 0; i < selected_items.size(); ++i) { for (size_t i = 0; i < selected_items.size(); ++i) {
VLOG(30) << "offset:" << i; VLOG(3) << "offset:" << i;
for (auto &item : selected_items[i]) { for (auto &item : selected_items[i]) {
VLOG(30) << ItemToString(item); VLOG(3) << ItemToString(item);
} }
} }
...@@ -138,11 +138,11 @@ std::vector<std::vector<BeamSearch::Item>> BeamSearch::SelectTopBeamSizeItems( ...@@ -138,11 +138,11 @@ std::vector<std::vector<BeamSearch::Item>> BeamSearch::SelectTopBeamSizeItems(
} }
result.emplace_back(items); result.emplace_back(items);
} }
VLOG(30) << "SelectTopBeamSizeItems result size " << result.size(); VLOG(3) << "SelectTopBeamSizeItems result size " << result.size();
for (auto &items : result) { for (auto &items : result) {
VLOG(30) << "item set:"; VLOG(3) << "item set:";
for (auto &item : items) { for (auto &item : items) {
VLOG(30) << ItemToString(item); VLOG(3) << ItemToString(item);
} }
} }
......
...@@ -37,7 +37,7 @@ class ConcatOp : public framework::OperatorWithKernel { ...@@ -37,7 +37,7 @@ class ConcatOp : public framework::OperatorWithKernel {
PADDLE_ENFORCE_GT(n, 0, "Input tensors count should > 0."); PADDLE_ENFORCE_GT(n, 0, "Input tensors count should > 0.");
if (n == 1) { if (n == 1) {
VLOG(30) << "Warning: concat op have only one input, may waste memory"; VLOG(3) << "Warning: concat op have only one input, may waste memory";
} }
auto out_dims = ins[0]; auto out_dims = ins[0];
......
...@@ -47,8 +47,8 @@ class FeedOp : public framework::OperatorBase { ...@@ -47,8 +47,8 @@ class FeedOp : public framework::OperatorBase {
auto col = Attr<int>("col"); auto col = Attr<int>("col");
VLOG(30) << "Feed Var " << feed_var_name << "'s " << col VLOG(3) << "Feed Var " << feed_var_name << "'s " << col << " column to var "
<< " column to var " << out_name; << out_name;
auto &feed_list = feed_var->Get<framework::FeedFetchList>(); auto &feed_list = feed_var->Get<framework::FeedFetchList>();
auto &feed_item = feed_list.at(static_cast<size_t>(col)); auto &feed_item = feed_list.at(static_cast<size_t>(col));
......
...@@ -57,7 +57,7 @@ class FetchOp : public framework::OperatorBase { ...@@ -57,7 +57,7 @@ class FetchOp : public framework::OperatorBase {
TensorCopySync(src_item, platform::CPUPlace(), &dst_item); TensorCopySync(src_item, platform::CPUPlace(), &dst_item);
dst_item.set_lod(src_item.lod()); dst_item.set_lod(src_item.lod());
VLOG(30) << "Fetch variable " << fetch_var_name << " to " << out_name; VLOG(3) << "Fetch variable " << fetch_var_name << " to " << out_name;
} }
}; };
......
...@@ -48,7 +48,7 @@ static void SplitTensorAndMoveTensorToScopes( ...@@ -48,7 +48,7 @@ static void SplitTensorAndMoveTensorToScopes(
auto lod_tensors = tensor.SplitLoDTensor(places); auto lod_tensors = tensor.SplitLoDTensor(places);
for (auto &lod : lod_tensors) { for (auto &lod : lod_tensors) {
VLOG(30) << lod.dims(); VLOG(3) << lod.dims();
} }
if (num_sub_scopes == 0) { if (num_sub_scopes == 0) {
num_sub_scopes = lod_tensors.size(); num_sub_scopes = lod_tensors.size();
...@@ -263,7 +263,7 @@ class ParallelDoGradOp : public framework::OperatorBase { ...@@ -263,7 +263,7 @@ class ParallelDoGradOp : public framework::OperatorBase {
if (s == framework::kEmptyVarName) { if (s == framework::kEmptyVarName) {
continue; continue;
} }
VLOG(30) << "Moving " << s; VLOG(3) << "Moving " << s;
CopyOrShare(*sub_scopes[0]->FindVar(s), place, scope.FindVar(s)); CopyOrShare(*sub_scopes[0]->FindVar(s), place, scope.FindVar(s));
} }
WaitOnPlaces(places); WaitOnPlaces(places);
...@@ -277,7 +277,7 @@ class ParallelDoGradOp : public framework::OperatorBase { ...@@ -277,7 +277,7 @@ class ParallelDoGradOp : public framework::OperatorBase {
if (s == framework::kEmptyVarName) { if (s == framework::kEmptyVarName) {
continue; continue;
} }
VLOG(30) << "Accumulating " << s; VLOG(3) << "Accumulating " << s;
if (s == framework::kEmptyVarName) continue; if (s == framework::kEmptyVarName) continue;
std::string tmp_name; std::string tmp_name;
auto *tmp = sub_scopes[0]->Var(&tmp_name); auto *tmp = sub_scopes[0]->Var(&tmp_name);
...@@ -289,7 +289,7 @@ class ParallelDoGradOp : public framework::OperatorBase { ...@@ -289,7 +289,7 @@ class ParallelDoGradOp : public framework::OperatorBase {
auto sum_op = framework::OpRegistry::CreateOp( auto sum_op = framework::OpRegistry::CreateOp(
"sum", {{"X", {s, tmp_name}}}, {{"Out", {s}}}, "sum", {{"X", {s, tmp_name}}}, {{"Out", {s}}},
framework::AttributeMap{{"use_mkldnn", {false}}}); framework::AttributeMap{{"use_mkldnn", {false}}});
VLOG(100) << sum_op->DebugStringEx(sub_scopes[0]); VLOG(10) << sum_op->DebugStringEx(sub_scopes[0]);
sum_op->Run(*sub_scopes[0], places[0]); sum_op->Run(*sub_scopes[0], places[0]);
WaitOnPlace(places[0]); WaitOnPlace(places[0]);
} }
...@@ -316,7 +316,7 @@ class ParallelDoGradOpDescMaker : public framework::SingleGradOpDescMaker { ...@@ -316,7 +316,7 @@ class ParallelDoGradOpDescMaker : public framework::SingleGradOpDescMaker {
auto *grad = new framework::OpDesc(); auto *grad = new framework::OpDesc();
grad->SetType("parallel_do_grad"); grad->SetType("parallel_do_grad");
for (auto &input_param : this->InputNames()) { for (auto &input_param : this->InputNames()) {
VLOG(30) << input_param; VLOG(3) << input_param;
grad->SetInput(input_param, this->Input(input_param)); grad->SetInput(input_param, this->Input(input_param));
if (input_param != kPlaces) { if (input_param != kPlaces) {
grad->SetOutput(framework::GradVarName(input_param), grad->SetOutput(framework::GradVarName(input_param),
......
...@@ -34,7 +34,7 @@ class WriteToArrayOp : public ArrayOp { ...@@ -34,7 +34,7 @@ class WriteToArrayOp : public ArrayOp {
auto *out = auto *out =
scope.FindVar(Output("Out"))->GetMutable<framework::LoDTensorArray>(); scope.FindVar(Output("Out"))->GetMutable<framework::LoDTensorArray>();
if (offset >= out->size()) { if (offset >= out->size()) {
VLOG(100) << "Resize " << Output("Out") << " from " << out->size() VLOG(10) << "Resize " << Output("Out") << " from " << out->size()
<< " to " << offset + 1; << " to " << offset + 1;
out->resize(offset + 1); out->resize(offset + 1);
} }
...@@ -47,7 +47,7 @@ class WriteToArrayOp : public ArrayOp { ...@@ -47,7 +47,7 @@ class WriteToArrayOp : public ArrayOp {
TensorCopy(x_tensor, place, dev_ctx, out_tensor); TensorCopy(x_tensor, place, dev_ctx, out_tensor);
} else { } else {
VLOG(100) << "WARNING: The input tensor 'x_tensor' holds no memory, so " VLOG(10) << "WARNING: The input tensor 'x_tensor' holds no memory, so "
"nothing has been written to output array[" "nothing has been written to output array["
<< offset << "]."; << offset << "].";
} }
...@@ -104,7 +104,7 @@ class WriteToArrayInferVarType : public framework::VarTypeInference { ...@@ -104,7 +104,7 @@ class WriteToArrayInferVarType : public framework::VarTypeInference {
framework::BlockDesc *block) const override { framework::BlockDesc *block) const override {
auto x_name = op_desc.Input("X")[0]; auto x_name = op_desc.Input("X")[0];
auto out_name = op_desc.Output("Out")[0]; auto out_name = op_desc.Output("Out")[0];
VLOG(100) << "Set Variable " << out_name << " as LOD_TENSOR_ARRAY"; VLOG(10) << "Set Variable " << out_name << " as LOD_TENSOR_ARRAY";
auto &out = block->FindRecursiveOrCreateVar(out_name); auto &out = block->FindRecursiveOrCreateVar(out_name);
out.SetType(framework::proto::VarType::LOD_TENSOR_ARRAY); out.SetType(framework::proto::VarType::LOD_TENSOR_ARRAY);
auto *x = block->FindVarRecursive(x_name); auto *x = block->FindVarRecursive(x_name);
...@@ -139,7 +139,7 @@ class ReadFromArrayOp : public ArrayOp { ...@@ -139,7 +139,7 @@ class ReadFromArrayOp : public ArrayOp {
framework::TensorCopy(x_array[offset], place, dev_ctx, out_tensor); framework::TensorCopy(x_array[offset], place, dev_ctx, out_tensor);
out_tensor->set_lod(x_array[offset].lod()); out_tensor->set_lod(x_array[offset].lod());
} else { } else {
VLOG(100) << "offset " << offset << " >= " << x_array.size(); VLOG(10) << "offset " << offset << " >= " << x_array.size();
} }
} }
}; };
......
...@@ -132,14 +132,14 @@ class WhileGradOp : public framework::OperatorBase { ...@@ -132,14 +132,14 @@ class WhileGradOp : public framework::OperatorBase {
for (auto cur_scope_iter = step_scopes->rbegin(); for (auto cur_scope_iter = step_scopes->rbegin();
cur_scope_iter != step_scopes->rend(); ++cur_scope_iter) { cur_scope_iter != step_scopes->rend(); ++cur_scope_iter) {
VLOG(30) << "Start backward at time_step " VLOG(3) << "Start backward at time_step "
<< cur_scope_iter - step_scopes->rbegin(); << cur_scope_iter - step_scopes->rbegin();
framework::Scope &cur_scope = **cur_scope_iter; framework::Scope &cur_scope = **cur_scope_iter;
// Link OG from outside to inside // Link OG from outside to inside
for (size_t i = 0; i < outside_og_names.size(); ++i) { for (size_t i = 0; i < outside_og_names.size(); ++i) {
auto outside_og_name = outside_og_names[i]; auto outside_og_name = outside_og_names[i];
auto inside_og_name = inside_og_names[i]; auto inside_og_name = inside_og_names[i];
VLOG(80) << "Linking outside " << outside_og_name << " --> inside " VLOG(8) << "Linking outside " << outside_og_name << " --> inside "
<< inside_og_name; << inside_og_name;
if (scope.FindVar(outside_og_name) == nullptr) { if (scope.FindVar(outside_og_name) == nullptr) {
continue; continue;
...@@ -162,11 +162,11 @@ class WhileGradOp : public framework::OperatorBase { ...@@ -162,11 +162,11 @@ class WhileGradOp : public framework::OperatorBase {
auto &outside_array = og_outside.Get<framework::LoDTensorArray>(); auto &outside_array = og_outside.Get<framework::LoDTensorArray>();
auto &inside_array = auto &inside_array =
detail::Ref(og_inside.GetMutable<framework::LoDTensorArray>()); detail::Ref(og_inside.GetMutable<framework::LoDTensorArray>());
VLOG(80) << outside_og_name << " size = " << outside_array.size(); VLOG(8) << outside_og_name << " size = " << outside_array.size();
inside_array.resize(outside_array.size()); inside_array.resize(outside_array.size());
for (size_t j = 0; j < inside_array.size(); ++j) { for (size_t j = 0; j < inside_array.size(); ++j) {
VLOG(80) << j << " " << outside_array[j].numel(); VLOG(8) << j << " " << outside_array[j].numel();
if (outside_array[j].numel() != 0) { if (outside_array[j].numel() != 0) {
inside_array[j].set_lod(outside_array[j].lod()); inside_array[j].set_lod(outside_array[j].lod());
inside_array[j].ShareDataWith(outside_array[j]); inside_array[j].ShareDataWith(outside_array[j]);
...@@ -292,7 +292,7 @@ class WhileGradOpDescMaker : public framework::SingleGradOpDescMaker { ...@@ -292,7 +292,7 @@ class WhileGradOpDescMaker : public framework::SingleGradOpDescMaker {
auto igs = InputGrad(kX, /*do not drop empty gradient*/ false); auto igs = InputGrad(kX, /*do not drop empty gradient*/ false);
for (auto &each_ig : igs) { for (auto &each_ig : igs) {
if (inner_op_outputs.find(each_ig) == inner_op_outputs.end()) { if (inner_op_outputs.find(each_ig) == inner_op_outputs.end()) {
VLOG(80) << "Ignore " << each_ig; VLOG(8) << "Ignore " << each_ig;
each_ig = framework::kEmptyVarName; each_ig = framework::kEmptyVarName;
} }
} }
...@@ -356,7 +356,7 @@ class WhileGradOpVarTypeInference : public framework::VarTypeInference { ...@@ -356,7 +356,7 @@ class WhileGradOpVarTypeInference : public framework::VarTypeInference {
auto &p_var = detail::Ref(block->FindVarRecursive(p_names[i])); auto &p_var = detail::Ref(block->FindVarRecursive(p_names[i]));
auto *g_var = block->FindVarRecursive(pg_ig_names[i]); auto *g_var = block->FindVarRecursive(pg_ig_names[i]);
if (g_var != nullptr) { // Gradient could be @EMPTY@ if (g_var != nullptr) { // Gradient could be @EMPTY@
VLOG(50) << "Setting " << pg_ig_names[i] << " following " << p_names[i] VLOG(5) << "Setting " << pg_ig_names[i] << " following " << p_names[i]
<< " type: " << p_var.GetType(); << " type: " << p_var.GetType();
g_var->SetType(p_var.GetType()); g_var->SetType(p_var.GetType());
g_var->SetDataType(p_var.GetDataType()); g_var->SetDataType(p_var.GetDataType());
......
...@@ -151,11 +151,11 @@ class CUDNNConvOpKernel : public framework::OpKernel<T> { ...@@ -151,11 +151,11 @@ class CUDNNConvOpKernel : public framework::OpKernel<T> {
// Currently tensor core is only enabled using this algo // Currently tensor core is only enabled using this algo
algo = CUDNN_CONVOLUTION_FWD_ALGO_IMPLICIT_PRECOMP_GEMM; algo = CUDNN_CONVOLUTION_FWD_ALGO_IMPLICIT_PRECOMP_GEMM;
half_float = true; half_float = true;
VLOG(50) << "use cudnn_tensor_op_math"; VLOG(5) << "use cudnn_tensor_op_math";
} else { } else {
CUDNN_ENFORCE(platform::dynload::cudnnSetConvolutionMathType( CUDNN_ENFORCE(platform::dynload::cudnnSetConvolutionMathType(
cudnn_conv_desc, CUDNN_DEFAULT_MATH)); cudnn_conv_desc, CUDNN_DEFAULT_MATH));
VLOG(50) << "NOT use cudnn_tensor_op_math"; VLOG(5) << "NOT use cudnn_tensor_op_math";
} }
#endif #endif
......
...@@ -133,10 +133,10 @@ void AsyncBRPCServer::StartServer() { ...@@ -133,10 +133,10 @@ void AsyncBRPCServer::StartServer() {
void AsyncBRPCServer::ShutDownImpl() { server_.Stop(1000); } void AsyncBRPCServer::ShutDownImpl() { server_.Stop(1000); }
void AsyncBRPCServer::WaitServerReady() { void AsyncBRPCServer::WaitServerReady() {
VLOG(30) << "AsyncGRPCServer is wait server ready"; VLOG(3) << "AsyncGRPCServer is wait server ready";
std::unique_lock<std::mutex> lock(this->mutex_ready_); std::unique_lock<std::mutex> lock(this->mutex_ready_);
condition_ready_.wait(lock, [=] { return this->ready_ == 1; }); condition_ready_.wait(lock, [=] { return this->ready_ == 1; });
VLOG(30) << "AsyncGRPCServer WaitSeverReady"; VLOG(3) << "AsyncGRPCServer WaitSeverReady";
} }
}; // namespace distributed }; // namespace distributed
......
...@@ -40,7 +40,7 @@ void GRPCClient::SendComplete() { ...@@ -40,7 +40,7 @@ void GRPCClient::SendComplete() {
std::unique_lock<std::mutex> lk(completed_mutex_); std::unique_lock<std::mutex> lk(completed_mutex_);
if (!completed_) { if (!completed_) {
for (auto& it : channels_) { for (auto& it : channels_) {
VLOG(30) << "send complete message to " << it.first; VLOG(3) << "send complete message to " << it.first;
this->AsyncSendComplete(it.first); this->AsyncSendComplete(it.first);
} }
PADDLE_ENFORCE(this->Wait(), "internal grpc error"); PADDLE_ENFORCE(this->Wait(), "internal grpc error");
...@@ -83,7 +83,7 @@ VarHandlePtr GRPCClient::AsyncSendVar(const std::string& ep, ...@@ -83,7 +83,7 @@ VarHandlePtr GRPCClient::AsyncSendVar(const std::string& ep,
::grpc::ByteBuffer req; ::grpc::ByteBuffer req;
SerializeToByteBuffer(var_name_val, var, *p_ctx, &req, "", trainer_id_); SerializeToByteBuffer(var_name_val, var, *p_ctx, &req, "", trainer_id_);
VLOG(30) << s->GetVarHandlePtr()->String() << " begin"; VLOG(3) << s->GetVarHandlePtr()->String() << " begin";
// stub context // stub context
s->response_call_back_ = nullptr; s->response_call_back_ = nullptr;
...@@ -144,7 +144,7 @@ VarHandlePtr GRPCClient::AsyncGetVar(const std::string& ep, ...@@ -144,7 +144,7 @@ VarHandlePtr GRPCClient::AsyncGetVar(const std::string& ep,
::grpc::ByteBuffer buf; ::grpc::ByteBuffer buf;
RequestToByteBuffer<sendrecv::VariableMessage>(req, &buf); RequestToByteBuffer<sendrecv::VariableMessage>(req, &buf);
VLOG(30) << s->GetVarHandlePtr()->String() << " begin"; VLOG(3) << s->GetVarHandlePtr()->String() << " begin";
// stub context // stub context
s->response_call_back_ = ProcGetResponse; s->response_call_back_ = ProcGetResponse;
...@@ -192,7 +192,7 @@ VarHandlePtr GRPCClient::AsyncPrefetchVar(const std::string& ep, ...@@ -192,7 +192,7 @@ VarHandlePtr GRPCClient::AsyncPrefetchVar(const std::string& ep,
::grpc::ByteBuffer req; ::grpc::ByteBuffer req;
SerializeToByteBuffer(in_var_name_val, var, *p_ctx, &req, out_var_name_val); SerializeToByteBuffer(in_var_name_val, var, *p_ctx, &req, out_var_name_val);
VLOG(30) << s->GetVarHandlePtr()->String() << " begin"; VLOG(3) << s->GetVarHandlePtr()->String() << " begin";
// stub context // stub context
s->response_call_back_ = ProcGetResponse; s->response_call_back_ = ProcGetResponse;
...@@ -330,14 +330,14 @@ void GRPCClient::Proceed() { ...@@ -330,14 +330,14 @@ void GRPCClient::Proceed() {
void* tag = nullptr; void* tag = nullptr;
bool ok = false; bool ok = false;
VLOG(30) << "GRPCClient Proceed begin"; VLOG(3) << "GRPCClient Proceed begin";
while (!stopped_ && cq_.Next(&tag, &ok)) { while (!stopped_ && cq_.Next(&tag, &ok)) {
BaseProcessor* c = static_cast<BaseProcessor*>(tag); BaseProcessor* c = static_cast<BaseProcessor*>(tag);
GPR_ASSERT(ok); GPR_ASSERT(ok);
PADDLE_ENFORCE(c); PADDLE_ENFORCE(c);
if (c->status_.ok()) { if (c->status_.ok()) {
VLOG(30) << c->GetVarHandlePtr()->String() << " process"; VLOG(3) << c->GetVarHandlePtr()->String() << " process";
c->Process(); c->Process();
} else if (c->status_.error_code() == grpc::StatusCode::DEADLINE_EXCEEDED) { } else if (c->status_.error_code() == grpc::StatusCode::DEADLINE_EXCEEDED) {
// FIXME(gongwb): parse error_details? // FIXME(gongwb): parse error_details?
...@@ -372,7 +372,7 @@ void GRPCClient::Proceed() { ...@@ -372,7 +372,7 @@ void GRPCClient::Proceed() {
sync_cond_.notify_all(); sync_cond_.notify_all();
} }
} }
VLOG(30) << "GRPCClient Proceed end"; VLOG(3) << "GRPCClient Proceed end";
} }
std::shared_ptr<grpc::Channel> GRPCClient::GetChannel(const std::string& ep) { std::shared_ptr<grpc::Channel> GRPCClient::GetChannel(const std::string& ep) {
......
...@@ -100,7 +100,7 @@ class RequestSend final : public RequestBase { ...@@ -100,7 +100,7 @@ class RequestSend final : public RequestBase {
void Process() override { void Process() override {
std::string varname = GetReqName(); std::string varname = GetReqName();
VLOG(40) << "RequestSend var_name:" << varname; VLOG(4) << "RequestSend var_name:" << varname;
auto scope = request_->GetMutableLocalScope(); auto scope = request_->GetMutableLocalScope();
auto invar = request_->GetVar(); auto invar = request_->GetVar();
...@@ -137,7 +137,7 @@ class RequestGet final : public RequestBase { ...@@ -137,7 +137,7 @@ class RequestGet final : public RequestBase {
// proc request. // proc request.
std::string varname = request_.varname(); std::string varname = request_.varname();
int trainer_id = request_.trainer_id(); int trainer_id = request_.trainer_id();
VLOG(40) << "RequestGet " << varname; VLOG(4) << "RequestGet " << varname;
auto scope = request_handler_->scope(); auto scope = request_handler_->scope();
auto invar = scope->FindVar(varname); auto invar = scope->FindVar(varname);
...@@ -184,7 +184,7 @@ class RequestPrefetch final : public RequestBase { ...@@ -184,7 +184,7 @@ class RequestPrefetch final : public RequestBase {
std::string in_var_name = request_->Varname(); std::string in_var_name = request_->Varname();
std::string out_var_name = request_->OutVarname(); std::string out_var_name = request_->OutVarname();
int trainer_id = request_->GetTrainerId(); int trainer_id = request_->GetTrainerId();
VLOG(40) << "RequestPrefetch, in_var_name: " << in_var_name VLOG(4) << "RequestPrefetch, in_var_name: " << in_var_name
<< " out_var_name: " << out_var_name; << " out_var_name: " << out_var_name;
auto scope = request_->GetMutableLocalScope(); auto scope = request_->GetMutableLocalScope();
...@@ -233,7 +233,7 @@ class RequestCheckpointNotify final : public RequestBase { ...@@ -233,7 +233,7 @@ class RequestCheckpointNotify final : public RequestBase {
std::string checkpoint_dir = request_->OutVarname(); std::string checkpoint_dir = request_->OutVarname();
int trainer_id = request_->GetTrainerId(); int trainer_id = request_->GetTrainerId();
VLOG(40) << "RequestCheckpointNotify notify: " << checkpoint_notify VLOG(4) << "RequestCheckpointNotify notify: " << checkpoint_notify
<< ", dir: " << checkpoint_dir; << ", dir: " << checkpoint_dir;
request_handler_->Handle(checkpoint_notify, scope, nullptr, nullptr, request_handler_->Handle(checkpoint_notify, scope, nullptr, nullptr,
...@@ -248,10 +248,10 @@ class RequestCheckpointNotify final : public RequestBase { ...@@ -248,10 +248,10 @@ class RequestCheckpointNotify final : public RequestBase {
}; };
void AsyncGRPCServer::WaitServerReady() { void AsyncGRPCServer::WaitServerReady() {
VLOG(40) << "AsyncGRPCServer is wait server ready"; VLOG(4) << "AsyncGRPCServer is wait server ready";
std::unique_lock<std::mutex> lock(this->mutex_ready_); std::unique_lock<std::mutex> lock(this->mutex_ready_);
condition_ready_.wait(lock, [=] { return this->ready_ == 1; }); condition_ready_.wait(lock, [=] { return this->ready_ == 1; });
VLOG(40) << "AsyncGRPCServer WaitSeverReady"; VLOG(4) << "AsyncGRPCServer WaitSeverReady";
} }
// Define an option subclass in order to disable SO_REUSEPORT for the // Define an option subclass in order to disable SO_REUSEPORT for the
...@@ -302,15 +302,14 @@ void AsyncGRPCServer::StartServer() { ...@@ -302,15 +302,14 @@ void AsyncGRPCServer::StartServer() {
reqs.reserve(kRequestBufSize); reqs.reserve(kRequestBufSize);
for (int i = 0; i < kRequestBufSize; i++) { for (int i = 0; i < kRequestBufSize; i++) {
VLOG(60) << "TryToRegisterNewOne on RPC NAME: " << rpc_name VLOG(6) << "TryToRegisterNewOne on RPC NAME: " << rpc_name << " I: " << i;
<< " I: " << i;
TryToRegisterNewOne(rpc_name, i); TryToRegisterNewOne(rpc_name, i);
} }
for (int i = 0; i < threadnum; i++) { for (int i = 0; i < threadnum; i++) {
rpc_threads_[rpc_name].emplace_back(new std::thread(std::bind( rpc_threads_[rpc_name].emplace_back(new std::thread(std::bind(
&AsyncGRPCServer::HandleRequest, this, cq.get(), rpc_name, f))); &AsyncGRPCServer::HandleRequest, this, cq.get(), rpc_name, f)));
VLOG(40) << t.first << " creates threads!"; VLOG(4) << t.first << " creates threads!";
} }
} }
...@@ -327,7 +326,7 @@ void AsyncGRPCServer::StartServer() { ...@@ -327,7 +326,7 @@ void AsyncGRPCServer::StartServer() {
auto& threads = t.second; auto& threads = t.second;
for (size_t i = 0; i < threads.size(); ++i) { for (size_t i = 0; i < threads.size(); ++i) {
threads[i]->join(); threads[i]->join();
VLOG(40) << t.first << " threads ends!"; VLOG(4) << t.first << " threads ends!";
} }
} }
} }
...@@ -335,7 +334,7 @@ void AsyncGRPCServer::StartServer() { ...@@ -335,7 +334,7 @@ void AsyncGRPCServer::StartServer() {
void AsyncGRPCServer::ShutdownQueue() { void AsyncGRPCServer::ShutdownQueue() {
for (auto& t : rpc_cq_) { for (auto& t : rpc_cq_) {
t.second->Shutdown(); t.second->Shutdown();
VLOG(40) << t.first << " queue shutdown!"; VLOG(4) << t.first << " queue shutdown!";
} }
} }
...@@ -344,7 +343,7 @@ void AsyncGRPCServer::ShutDownImpl() { ...@@ -344,7 +343,7 @@ void AsyncGRPCServer::ShutDownImpl() {
is_shut_down_ = true; is_shut_down_ = true;
ShutdownQueue(); ShutdownQueue();
VLOG(40) << "server_ shutdown!"; VLOG(4) << "server_ shutdown!";
server_->Shutdown(); server_->Shutdown();
} }
...@@ -352,11 +351,11 @@ void AsyncGRPCServer::TryToRegisterNewOne(const std::string& rpc_name, ...@@ -352,11 +351,11 @@ void AsyncGRPCServer::TryToRegisterNewOne(const std::string& rpc_name,
int req_id) { int req_id) {
std::unique_lock<std::mutex> lock(cq_mutex_); std::unique_lock<std::mutex> lock(cq_mutex_);
if (is_shut_down_) { if (is_shut_down_) {
VLOG(40) << "shutdown, do not TryToRegisterNewSendOne"; VLOG(4) << "shutdown, do not TryToRegisterNewSendOne";
return; return;
} }
VLOG(40) << "TryToRegisterNewOne on RPC NAME: " << rpc_name VLOG(4) << "TryToRegisterNewOne on RPC NAME: " << rpc_name
<< " REQ ID: " << req_id; << " REQ ID: " << req_id;
auto& reqs = rpc_reqs_[rpc_name]; auto& reqs = rpc_reqs_[rpc_name];
...@@ -378,7 +377,7 @@ void AsyncGRPCServer::TryToRegisterNewOne(const std::string& rpc_name, ...@@ -378,7 +377,7 @@ void AsyncGRPCServer::TryToRegisterNewOne(const std::string& rpc_name,
reqs[req_id] = b; reqs[req_id] = b;
VLOG(40) << "Create RequestSend status:" << b->Status(); VLOG(4) << "Create RequestSend status:" << b->Status();
} }
void AsyncGRPCServer::HandleRequest( void AsyncGRPCServer::HandleRequest(
...@@ -388,14 +387,14 @@ void AsyncGRPCServer::HandleRequest( ...@@ -388,14 +387,14 @@ void AsyncGRPCServer::HandleRequest(
bool ok = false; bool ok = false;
while (true) { while (true) {
VLOG(40) << "HandleRequest " << rpc_name << " wait next"; VLOG(4) << "HandleRequest " << rpc_name << " wait next";
if (!cq->Next(&tag, &ok)) { if (!cq->Next(&tag, &ok)) {
VLOG(30) << "CompletionQueue " << rpc_name << " shutdown!"; VLOG(3) << "CompletionQueue " << rpc_name << " shutdown!";
break; break;
} }
int req_id = static_cast<int>(reinterpret_cast<intptr_t>(tag)); int req_id = static_cast<int>(reinterpret_cast<intptr_t>(tag));
VLOG(40) << "HandleRequest " << rpc_name << ", req_id:" << req_id VLOG(4) << "HandleRequest " << rpc_name << ", req_id:" << req_id
<< " get next"; << " get next";
auto& reqs = rpc_reqs_[rpc_name]; auto& reqs = rpc_reqs_[rpc_name];
...@@ -406,7 +405,7 @@ void AsyncGRPCServer::HandleRequest( ...@@ -406,7 +405,7 @@ void AsyncGRPCServer::HandleRequest(
base = reqs[req_id]; base = reqs[req_id];
} }
VLOG(30) << base->Status2String(rpc_name); VLOG(3) << base->Status2String(rpc_name);
// reference: // reference:
// https://github.com/tensorflow/tensorflow/issues/5596 // https://github.com/tensorflow/tensorflow/issues/5596
......
...@@ -75,7 +75,7 @@ class VarHandle { ...@@ -75,7 +75,7 @@ class VarHandle {
wait_cond_.wait(lk, [this] { return status_ != kDefaultState; }); wait_cond_.wait(lk, [this] { return status_ != kDefaultState; });
ret = status_; ret = status_;
} }
VLOG(70) << "VarHandle wait:" << ret; VLOG(7) << "VarHandle wait:" << ret;
return ret != kErrorState; return ret != kErrorState;
} }
...@@ -84,7 +84,7 @@ class VarHandle { ...@@ -84,7 +84,7 @@ class VarHandle {
std::unique_lock<std::mutex> lk(sync_mutex_); std::unique_lock<std::mutex> lk(sync_mutex_);
status_ = ok ? kFinishState : kErrorState; status_ = ok ? kFinishState : kErrorState;
} }
VLOG(70) << "VarHandle finish:" << ok; VLOG(7) << "VarHandle finish:" << ok;
wait_cond_.notify_all(); wait_cond_.notify_all();
} }
......
...@@ -38,19 +38,19 @@ bool RequestSendHandler::Handle(const std::string& varname, ...@@ -38,19 +38,19 @@ bool RequestSendHandler::Handle(const std::string& varname,
framework::Variable** outvar, framework::Variable** outvar,
const int trainer_id, const int trainer_id,
const std::string& out_var_name) { const std::string& out_var_name) {
VLOG(40) << "RequestSendHandler:" << varname; VLOG(4) << "RequestSendHandler:" << varname;
// Sync // Sync
if (varname == BATCH_BARRIER_MESSAGE) { if (varname == BATCH_BARRIER_MESSAGE) {
VLOG(30) << "sync: recv BATCH_BARRIER_MESSAGE"; VLOG(3) << "sync: recv BATCH_BARRIER_MESSAGE";
rpc_server_->IncreaseBatchBarrier(kRequestSend); rpc_server_->IncreaseBatchBarrier(kRequestSend);
} else if (varname == COMPLETE_MESSAGE) { } else if (varname == COMPLETE_MESSAGE) {
VLOG(30) << "sync: recv complete message"; VLOG(3) << "sync: recv complete message";
rpc_server_->Complete(); rpc_server_->Complete();
} else { } else {
// Async // Async
if (!sync_mode_) { if (!sync_mode_) {
VLOG(30) << "async process var: " << varname; VLOG(3) << "async process var: " << varname;
try { try {
executor_->RunPreparedContext((*grad_to_prepared_ctx_)[varname].get(), executor_->RunPreparedContext((*grad_to_prepared_ctx_)[varname].get(),
scope); scope);
...@@ -61,7 +61,7 @@ bool RequestSendHandler::Handle(const std::string& varname, ...@@ -61,7 +61,7 @@ bool RequestSendHandler::Handle(const std::string& varname,
return true; return true;
} else { // sync } else { // sync
rpc_server_->WaitCond(kRequestSend); rpc_server_->WaitCond(kRequestSend);
VLOG(30) << "sync: processing received var: " << varname; VLOG(3) << "sync: processing received var: " << varname;
if (invar == nullptr) { if (invar == nullptr) {
LOG(FATAL) << "sync: Can not find server side var: " << varname; LOG(FATAL) << "sync: Can not find server side var: " << varname;
...@@ -78,10 +78,10 @@ bool RequestGetHandler::Handle(const std::string& varname, ...@@ -78,10 +78,10 @@ bool RequestGetHandler::Handle(const std::string& varname,
framework::Variable** outvar, framework::Variable** outvar,
const int trainer_id, const int trainer_id,
const std::string& out_var_name) { const std::string& out_var_name) {
VLOG(40) << "RequestGetHandler:" << varname; VLOG(4) << "RequestGetHandler:" << varname;
if (sync_mode_) { if (sync_mode_) {
if (varname == FETCH_BARRIER_MESSAGE) { if (varname == FETCH_BARRIER_MESSAGE) {
VLOG(30) << "sync: recv fetch barrier message"; VLOG(3) << "sync: recv fetch barrier message";
rpc_server_->IncreaseBatchBarrier(kRequestGet); rpc_server_->IncreaseBatchBarrier(kRequestGet);
} else { } else {
rpc_server_->WaitCond(kRequestGet); rpc_server_->WaitCond(kRequestGet);
...@@ -93,14 +93,13 @@ bool RequestGetHandler::Handle(const std::string& varname, ...@@ -93,14 +93,13 @@ bool RequestGetHandler::Handle(const std::string& varname,
// NOTE: the format is determined by distributed_transpiler.py // NOTE: the format is determined by distributed_transpiler.py
std::string param_bak_name = std::string param_bak_name =
string::Sprintf("%s.trainer_%d_bak", varname, trainer_id); string::Sprintf("%s.trainer_%d_bak", varname, trainer_id);
VLOG(30) << "getting " << param_bak_name << " trainer_id " VLOG(3) << "getting " << param_bak_name << " trainer_id " << trainer_id;
<< trainer_id;
auto var = scope_->FindVar(varname); auto var = scope_->FindVar(varname);
auto t_orig = var->Get<framework::LoDTensor>(); auto t_orig = var->Get<framework::LoDTensor>();
auto param_bak = scope_->Var(param_bak_name); auto param_bak = scope_->Var(param_bak_name);
auto t = param_bak->GetMutable<framework::LoDTensor>(); auto t = param_bak->GetMutable<framework::LoDTensor>();
t->mutable_data(dev_ctx_->GetPlace(), t_orig.type()); t->mutable_data(dev_ctx_->GetPlace(), t_orig.type());
VLOG(30) << "copying " << varname << " to " << param_bak_name; VLOG(3) << "copying " << varname << " to " << param_bak_name;
framework::TensorCopy(t_orig, dev_ctx_->GetPlace(), t); framework::TensorCopy(t_orig, dev_ctx_->GetPlace(), t);
} }
*outvar = scope_->FindVar(varname); *outvar = scope_->FindVar(varname);
...@@ -115,7 +114,7 @@ bool RequestPrefetchHandler::Handle(const std::string& varname, ...@@ -115,7 +114,7 @@ bool RequestPrefetchHandler::Handle(const std::string& varname,
framework::Variable** outvar, framework::Variable** outvar,
const int trainer_id, const int trainer_id,
const std::string& out_var_name) { const std::string& out_var_name) {
VLOG(40) << "RequestPrefetchHandler " << varname; VLOG(4) << "RequestPrefetchHandler " << varname;
auto var_desc = program_->Block(0).FindVar(out_var_name); auto var_desc = program_->Block(0).FindVar(out_var_name);
InitializeVariable(*outvar, var_desc->GetType()); InitializeVariable(*outvar, var_desc->GetType());
...@@ -139,7 +138,7 @@ bool RequestCheckpointHandler::Handle(const std::string& varname, ...@@ -139,7 +138,7 @@ bool RequestCheckpointHandler::Handle(const std::string& varname,
auto* lt_var = scope_->FindVar(LOOKUP_TABLE_PATH)->GetMutable<std::string>(); auto* lt_var = scope_->FindVar(LOOKUP_TABLE_PATH)->GetMutable<std::string>();
lt_var->clear(); lt_var->clear();
lt_var->append(out_var_name); lt_var->append(out_var_name);
VLOG(40) << "RequestCheckpointHandler update var kLookupTablePath to: " VLOG(4) << "RequestCheckpointHandler update var kLookupTablePath to: "
<< out_var_name; << out_var_name;
executor_->RunPreparedContext(checkpoint_prepared_ctx_.get(), scope_); executor_->RunPreparedContext(checkpoint_prepared_ctx_.get(), scope_);
return true; return true;
......
...@@ -39,7 +39,7 @@ void RPCServer::SavePort() const { ...@@ -39,7 +39,7 @@ void RPCServer::SavePort() const {
port_file.open(file_path); port_file.open(file_path);
port_file << selected_port_; port_file << selected_port_;
port_file.close(); port_file.close();
VLOG(40) << "selected port written to " << file_path; VLOG(4) << "selected port written to " << file_path;
} }
void RPCServer::WaitBarrier(const std::string& rpc_name) { void RPCServer::WaitBarrier(const std::string& rpc_name) {
...@@ -49,12 +49,12 @@ void RPCServer::WaitBarrier(const std::string& rpc_name) { ...@@ -49,12 +49,12 @@ void RPCServer::WaitBarrier(const std::string& rpc_name) {
exit_flag_.load()); exit_flag_.load());
}); });
VLOG(30) << "batch_barrier_: " << rpc_name << " " VLOG(3) << "batch_barrier_: " << rpc_name << " "
<< barrier_counter_[rpc_name]; << barrier_counter_[rpc_name];
} }
void RPCServer::IncreaseBatchBarrier(const std::string rpc_name) { void RPCServer::IncreaseBatchBarrier(const std::string rpc_name) {
VLOG(40) << "RPCServer begin IncreaseBatchBarrier " << rpc_name; VLOG(4) << "RPCServer begin IncreaseBatchBarrier " << rpc_name;
int b = 0; int b = 0;
std::unique_lock<std::mutex> lock(mutex_); std::unique_lock<std::mutex> lock(mutex_);
b = ++barrier_counter_[rpc_name]; b = ++barrier_counter_[rpc_name];
...@@ -71,7 +71,7 @@ void RPCServer::Complete() { ...@@ -71,7 +71,7 @@ void RPCServer::Complete() {
client_num_--; client_num_--;
need_reset_all_vars_ = true; need_reset_all_vars_ = true;
VLOG(40) << "decrease client_num to: " << client_num_; VLOG(4) << "decrease client_num to: " << client_num_;
if (cur_cond_.load() == rpc_cond_map_[kRequestGet]) { if (cur_cond_.load() == rpc_cond_map_[kRequestGet]) {
barrier_counter_[kRequestGet]--; barrier_counter_[kRequestGet]--;
} }
...@@ -90,7 +90,7 @@ int RPCServer::GetClientNum() { ...@@ -90,7 +90,7 @@ int RPCServer::GetClientNum() {
} }
void RPCServer::ResetBarrierCounter() { void RPCServer::ResetBarrierCounter() {
VLOG(30) << "RPCServer ResetBarrierCounter "; VLOG(3) << "RPCServer ResetBarrierCounter ";
std::unique_lock<std::mutex> lock(mutex_); std::unique_lock<std::mutex> lock(mutex_);
for (auto& t : barrier_counter_) { for (auto& t : barrier_counter_) {
t.second = 0; t.second = 0;
...@@ -105,12 +105,12 @@ void RPCServer::RegisterRPC(const std::string& rpc_name, ...@@ -105,12 +105,12 @@ void RPCServer::RegisterRPC(const std::string& rpc_name,
static int cond = -1; static int cond = -1;
rpc_cond_map_[rpc_name] = ++cond; rpc_cond_map_[rpc_name] = ++cond;
VLOG(40) << "RegisterRPC rpc_name:" << rpc_name << ", handler:" << handler VLOG(4) << "RegisterRPC rpc_name:" << rpc_name << ", handler:" << handler
<< ", cond:" << rpc_cond_map_[rpc_name]; << ", cond:" << rpc_cond_map_[rpc_name];
} }
void RPCServer::SetCond(const std::string& rpc_name) { void RPCServer::SetCond(const std::string& rpc_name) {
VLOG(30) << "RPCServer SetCond " << rpc_name; VLOG(3) << "RPCServer SetCond " << rpc_name;
{ {
std::unique_lock<std::mutex> lock(mutex_); std::unique_lock<std::mutex> lock(mutex_);
cur_cond_ = rpc_cond_map_[rpc_name]; cur_cond_ = rpc_cond_map_[rpc_name];
...@@ -120,7 +120,7 @@ void RPCServer::SetCond(const std::string& rpc_name) { ...@@ -120,7 +120,7 @@ void RPCServer::SetCond(const std::string& rpc_name) {
} }
void RPCServer::WaitCond(const std::string& rpc_name) { void RPCServer::WaitCond(const std::string& rpc_name) {
VLOG(40) << "RPCServer WaitCond " << rpc_name; VLOG(4) << "RPCServer WaitCond " << rpc_name;
int cond = 0; int cond = 0;
{ {
std::unique_lock<std::mutex> lock(mutex_); std::unique_lock<std::mutex> lock(mutex_);
......
...@@ -50,7 +50,7 @@ bool VariableResponse::ReadRaw(::google::protobuf::io::CodedInputStream* input, ...@@ -50,7 +50,7 @@ bool VariableResponse::ReadRaw(::google::protobuf::io::CodedInputStream* input,
size_to_write = length - total_written; size_to_write = length - total_written;
} }
// This log is useful to see how long a internal block size is of rpc. // This log is useful to see how long a internal block size is of rpc.
VLOG(70) << "copy " << size_to_write << " data to CUDAPlace"; VLOG(7) << "copy " << size_to_write << " data to CUDAPlace";
memory::Copy(boost::get<platform::CUDAPlace>(place), memory::Copy(boost::get<platform::CUDAPlace>(place),
reinterpret_cast<void*>(p), cpu, data, size_to_write, reinterpret_cast<void*>(p), cpu, data, size_to_write,
gpu_dev_ctx.stream()); gpu_dev_ctx.stream());
...@@ -79,7 +79,7 @@ bool VariableResponse::ReadRaw(::google::protobuf::io::CodedInputStream* input, ...@@ -79,7 +79,7 @@ bool VariableResponse::ReadRaw(::google::protobuf::io::CodedInputStream* input,
// TODO(gongwb): can we avoid copy? // TODO(gongwb): can we avoid copy?
platform::CPUPlace cpu; platform::CPUPlace cpu;
// This log is useful to see how long a internal block size is of rpc. // This log is useful to see how long a internal block size is of rpc.
VLOG(70) << "copy " << size_to_write << " data to CPUPlace"; VLOG(7) << "copy " << size_to_write << " data to CPUPlace";
memory::Copy(cpu, reinterpret_cast<void*>(p), cpu, data, size_to_write); memory::Copy(cpu, reinterpret_cast<void*>(p), cpu, data, size_to_write);
p += size_to_write; p += size_to_write;
...@@ -198,7 +198,7 @@ bool VariableResponse::ProcSerializedField( ...@@ -198,7 +198,7 @@ bool VariableResponse::ProcSerializedField(
#endif #endif
} }
VLOG(70) << "ProcSerializedField:" << meta_.varname() VLOG(7) << "ProcSerializedField:" << meta_.varname()
<< ", type:" << meta_.type() << std::endl; << ", type:" << meta_.type() << std::endl;
framework::DDim dims = GetDims(meta_.dims()); framework::DDim dims = GetDims(meta_.dims());
if (meta_.type() == sendrecv::LOD_TENSOR) { if (meta_.type() == sendrecv::LOD_TENSOR) {
......
...@@ -46,8 +46,8 @@ class CheckpointNotifyOp : public framework::OperatorBase { ...@@ -46,8 +46,8 @@ class CheckpointNotifyOp : public framework::OperatorBase {
auto lookup_table_save_dir = auto lookup_table_save_dir =
string::Sprintf("%s/%s_%d", dir, lookup_table_name, i); string::Sprintf("%s/%s_%d", dir, lookup_table_name, i);
rpc_client->AsyncCheckpointNotify(epmap[i], lookup_table_save_dir); rpc_client->AsyncCheckpointNotify(epmap[i], lookup_table_save_dir);
VLOG(30) << "checkpoint notify sending lookup table: " VLOG(3) << "checkpoint notify sending lookup table: " << lookup_table_name
<< lookup_table_name << " and dir:" << dir << " to " << epmap[i]; << " and dir:" << dir << " to " << epmap[i];
} }
PADDLE_ENFORCE(rpc_client->Wait(), "internal error in RPCClient"); PADDLE_ENFORCE(rpc_client->Wait(), "internal error in RPCClient");
} }
......
...@@ -43,7 +43,7 @@ class FetchBarrierOp : public framework::OperatorBase { ...@@ -43,7 +43,7 @@ class FetchBarrierOp : public framework::OperatorBase {
PADDLE_ENFORCE(rpc_client->Wait(), "internal error in RPCClient"); PADDLE_ENFORCE(rpc_client->Wait(), "internal error in RPCClient");
for (auto& ep : eps) { for (auto& ep : eps) {
VLOG(30) << "fetch barrier, ep: " << ep; VLOG(3) << "fetch barrier, ep: " << ep;
rpc_client->AsyncSendFetchBarrier(ep); rpc_client->AsyncSendFetchBarrier(ep);
} }
PADDLE_ENFORCE(rpc_client->Wait(), "internal error in RPCClient"); PADDLE_ENFORCE(rpc_client->Wait(), "internal error in RPCClient");
......
...@@ -64,7 +64,7 @@ class GenNCCLIdOp : public framework::OperatorBase { ...@@ -64,7 +64,7 @@ class GenNCCLIdOp : public framework::OperatorBase {
distributed::RPCClient::GetInstance<RPCCLIENT_T>(0); distributed::RPCClient::GetInstance<RPCCLIENT_T>(0);
for (auto& ep : endpoint_list) { for (auto& ep : endpoint_list) {
VLOG(30) << "sending nccl id to " << ep; VLOG(3) << "sending nccl id to " << ep;
client->AsyncSendVar(ep, dev_ctx, *scope, NCCL_ID_VARNAME); client->AsyncSendVar(ep, dev_ctx, *scope, NCCL_ID_VARNAME);
} }
client->Wait(); client->Wait();
...@@ -72,7 +72,7 @@ class GenNCCLIdOp : public framework::OperatorBase { ...@@ -72,7 +72,7 @@ class GenNCCLIdOp : public framework::OperatorBase {
client->AsyncSendBatchBarrier(ep); client->AsyncSendBatchBarrier(ep);
} }
client->Wait(); client->Wait();
VLOG(30) << "sending completed..."; VLOG(3) << "sending completed...";
} }
void GetIdByServer(framework::Scope* scope, void GetIdByServer(framework::Scope* scope,
...@@ -99,11 +99,11 @@ class GenNCCLIdOp : public framework::OperatorBase { ...@@ -99,11 +99,11 @@ class GenNCCLIdOp : public framework::OperatorBase {
std::bind(&distributed::RPCServer::StartServer, rpc_service.get())); std::bind(&distributed::RPCServer::StartServer, rpc_service.get()));
rpc_service->SetCond(distributed::kRequestSend); rpc_service->SetCond(distributed::kRequestSend);
VLOG(30) << "start getting nccl id from trainer 0..."; VLOG(3) << "start getting nccl id from trainer 0...";
rpc_service->WaitBarrier(distributed::kRequestSend); rpc_service->WaitBarrier(distributed::kRequestSend);
VLOG(30) << "got nccl id and stop server..."; VLOG(3) << "got nccl id and stop server...";
rpc_service->ShutDown(); rpc_service->ShutDown();
VLOG(30) << "rpc server stopped"; VLOG(3) << "rpc server stopped";
server_thread.join(); server_thread.join();
} }
}; };
......
...@@ -36,7 +36,7 @@ namespace operators { ...@@ -36,7 +36,7 @@ namespace operators {
void RunServer(std::shared_ptr<distributed::RPCServer> service) { void RunServer(std::shared_ptr<distributed::RPCServer> service) {
service->StartServer(); service->StartServer();
VLOG(40) << "RunServer thread end"; VLOG(4) << "RunServer thread end";
} }
static void split(const std::string &str, char sep, static void split(const std::string &str, char sep,
std::vector<std::string> *pieces) { std::vector<std::string> *pieces) {
...@@ -66,7 +66,7 @@ static void ParallelExecuteBlocks( ...@@ -66,7 +66,7 @@ static void ParallelExecuteBlocks(
fs.push_back(framework::Async([&executor, &prepared, &scope, idx]() { fs.push_back(framework::Async([&executor, &prepared, &scope, idx]() {
int run_block = idx; // thread local int run_block = idx; // thread local
try { try {
VLOG(30) << "running server block: " << run_block VLOG(3) << "running server block: " << run_block
<< "pointer: " << prepared[run_block].get(); << "pointer: " << prepared[run_block].get();
executor->RunPreparedContext(prepared[run_block].get(), scope); executor->RunPreparedContext(prepared[run_block].get(), scope);
} catch (const std::exception &e) { } catch (const std::exception &e) {
...@@ -108,7 +108,7 @@ void ListenAndServOp::RunSyncLoop( ...@@ -108,7 +108,7 @@ void ListenAndServOp::RunSyncLoop(
framework::Scope *recv_scope, platform::DeviceContext *dev_ctx, framework::Scope *recv_scope, platform::DeviceContext *dev_ctx,
const std::vector<int> &prefetch_block_id_list, const std::vector<int> &prefetch_block_id_list,
const int checkpoint_point_block_id) const { const int checkpoint_point_block_id) const {
VLOG(20) << "RunSyncLoop"; VLOG(2) << "RunSyncLoop";
size_t num_blocks = program->Size(); size_t num_blocks = program->Size();
auto optimize_blocks = auto optimize_blocks =
Attr<std::vector<framework::BlockDesc *>>(kOptimizeBlocks); Attr<std::vector<framework::BlockDesc *>>(kOptimizeBlocks);
...@@ -167,7 +167,7 @@ void ListenAndServOp::RunSyncLoop( ...@@ -167,7 +167,7 @@ void ListenAndServOp::RunSyncLoop(
} }
ParallelExecuteBlocks(parallel_blkids, executor, optimize_prepared, program, ParallelExecuteBlocks(parallel_blkids, executor, optimize_prepared, program,
recv_scope); recv_scope);
VLOG(20) << "run all blocks spent " << GetTimestamp() - ts << "(ms)"; VLOG(2) << "run all blocks spent " << GetTimestamp() - ts << "(ms)";
ResetReceivedVars(recv_scope, dev_ctx, rpc_service_->NeedResetAllVars()); ResetReceivedVars(recv_scope, dev_ctx, rpc_service_->NeedResetAllVars());
...@@ -183,11 +183,11 @@ void ListenAndServOp::ResetReceivedVars(framework::Scope *recv_scope, ...@@ -183,11 +183,11 @@ void ListenAndServOp::ResetReceivedVars(framework::Scope *recv_scope,
for (auto &varname : sparse_vars_) { for (auto &varname : sparse_vars_) {
auto var = recv_scope->FindVar(varname); auto var = recv_scope->FindVar(varname);
if (var == nullptr) { if (var == nullptr) {
VLOG(20) << "can not find var " << varname << " in received scope"; VLOG(2) << "can not find var " << varname << " in received scope";
continue; continue;
} }
if (var->IsType<framework::SelectedRows>()) { if (var->IsType<framework::SelectedRows>()) {
VLOG(30) << "reset sparse var: " << varname; VLOG(3) << "reset sparse var: " << varname;
var->GetMutable<framework::SelectedRows>()->mutable_rows()->clear(); var->GetMutable<framework::SelectedRows>()->mutable_rows()->clear();
} else { } else {
PADDLE_THROW("The type of sparse var should be SelectedRows"); PADDLE_THROW("The type of sparse var should be SelectedRows");
...@@ -197,7 +197,7 @@ void ListenAndServOp::ResetReceivedVars(framework::Scope *recv_scope, ...@@ -197,7 +197,7 @@ void ListenAndServOp::ResetReceivedVars(framework::Scope *recv_scope,
for (auto &varname : dense_vars_) { for (auto &varname : dense_vars_) {
auto var = recv_scope->FindVar(varname); auto var = recv_scope->FindVar(varname);
if (var == nullptr) { if (var == nullptr) {
VLOG(20) << "can not find var " << varname << " in received scope"; VLOG(2) << "can not find var " << varname << " in received scope";
continue; continue;
} }
if (var->IsType<framework::LoDTensor>()) { if (var->IsType<framework::LoDTensor>()) {
...@@ -216,7 +216,7 @@ void ListenAndServOp::ResetReceivedVars(framework::Scope *recv_scope, ...@@ -216,7 +216,7 @@ void ListenAndServOp::ResetReceivedVars(framework::Scope *recv_scope,
void ListenAndServOp::RunAsyncLoop(framework::Executor *executor, void ListenAndServOp::RunAsyncLoop(framework::Executor *executor,
framework::ProgramDesc *program, framework::ProgramDesc *program,
framework::Scope *recv_scope) const { framework::Scope *recv_scope) const {
VLOG(20) << "RunAsyncLoop"; VLOG(2) << "RunAsyncLoop";
auto grad_to_block_id_str = auto grad_to_block_id_str =
Attr<std::vector<std::string>>("grad_to_block_id"); Attr<std::vector<std::string>>("grad_to_block_id");
DoubleFindMap<std::string, int32_t> grad_to_block_id; DoubleFindMap<std::string, int32_t> grad_to_block_id;
...@@ -225,7 +225,7 @@ void ListenAndServOp::RunAsyncLoop(framework::Executor *executor, ...@@ -225,7 +225,7 @@ void ListenAndServOp::RunAsyncLoop(framework::Executor *executor,
const std::string &grad_and_id) { const std::string &grad_and_id) {
std::vector<std::string> pieces; std::vector<std::string> pieces;
split(grad_and_id, ':', &pieces); split(grad_and_id, ':', &pieces);
VLOG(30) << "after split, key = " << pieces[0] << ", id=" << pieces[1]; VLOG(3) << "after split, key = " << pieces[0] << ", id=" << pieces[1];
PADDLE_ENFORCE_EQ(pieces.size(), 2); PADDLE_ENFORCE_EQ(pieces.size(), 2);
PADDLE_ENFORCE_EQ(out_map->count(pieces[0]), 0); PADDLE_ENFORCE_EQ(out_map->count(pieces[0]), 0);
...@@ -270,7 +270,7 @@ void ListenAndServOp::RunAsyncLoop(framework::Executor *executor, ...@@ -270,7 +270,7 @@ void ListenAndServOp::RunAsyncLoop(framework::Executor *executor,
while (true) { while (true) {
if (rpc_service_->IsExit()) { if (rpc_service_->IsExit()) {
VLOG(40) << "get exit!rpc_processor break!"; VLOG(4) << "get exit!rpc_processor break!";
break; break;
} }
...@@ -332,7 +332,7 @@ void ListenAndServOp::RunImpl(const framework::Scope &scope, ...@@ -332,7 +332,7 @@ void ListenAndServOp::RunImpl(const framework::Scope &scope,
std::string endpoint = Attr<std::string>("endpoint"); std::string endpoint = Attr<std::string>("endpoint");
int checkpoint_block_id = Attr<int>(kCheckpointBlockId); int checkpoint_block_id = Attr<int>(kCheckpointBlockId);
VLOG(40) << "sync_mode:" << sync_mode << ", fan_in:" << fan_in VLOG(4) << "sync_mode:" << sync_mode << ", fan_in:" << fan_in
<< ", end_point:" << endpoint << ", end_point:" << endpoint
<< ", checkpoint_block_id: " << checkpoint_block_id; << ", checkpoint_block_id: " << checkpoint_block_id;
...@@ -383,7 +383,7 @@ void ListenAndServOp::RunImpl(const framework::Scope &scope, ...@@ -383,7 +383,7 @@ void ListenAndServOp::RunImpl(const framework::Scope &scope,
prefetch_var_name_to_block_id_str) { prefetch_var_name_to_block_id_str) {
std::vector<std::string> pieces; std::vector<std::string> pieces;
split(prefetch_var_name_and_id, ':', &pieces); split(prefetch_var_name_and_id, ':', &pieces);
VLOG(30) << "after split, prefetch_var = " << pieces[0] VLOG(3) << "after split, prefetch_var = " << pieces[0]
<< ", id=" << pieces[1]; << ", id=" << pieces[1];
PADDLE_ENFORCE_EQ(pieces.size(), 2); PADDLE_ENFORCE_EQ(pieces.size(), 2);
...@@ -415,7 +415,7 @@ void ListenAndServOp::RunImpl(const framework::Scope &scope, ...@@ -415,7 +415,7 @@ void ListenAndServOp::RunImpl(const framework::Scope &scope,
// start the server listening after all member initialized. // start the server listening after all member initialized.
server_thread_.reset(new std::thread(RunServer, rpc_service_)); server_thread_.reset(new std::thread(RunServer, rpc_service_));
VLOG(30) << "wait server thread to become ready..."; VLOG(3) << "wait server thread to become ready...";
rpc_service_->WaitServerReady(); rpc_service_->WaitServerReady();
// register SIGINT(from ctrl+C) and SIGTERM(from kill) signal handlers // register SIGINT(from ctrl+C) and SIGTERM(from kill) signal handlers
......
...@@ -48,12 +48,12 @@ class PrefetchOp : public framework::OperatorBase { ...@@ -48,12 +48,12 @@ class PrefetchOp : public framework::OperatorBase {
std::vector<distributed::VarHandlePtr> rets; std::vector<distributed::VarHandlePtr> rets;
for (size_t i = 0; i < ins.size(); i++) { for (size_t i = 0; i < ins.size(); i++) {
if (NeedSend(scope, ins[i])) { if (NeedSend(scope, ins[i])) {
VLOG(30) << "sending " << ins[i] << " to " << epmap[i] << " to get " VLOG(3) << "sending " << ins[i] << " to " << epmap[i] << " to get "
<< outs[i] << " back"; << outs[i] << " back";
rets.push_back(rpc_client->AsyncPrefetchVar(epmap[i], ctx, scope, rets.push_back(rpc_client->AsyncPrefetchVar(epmap[i], ctx, scope,
ins[i], outs[i])); ins[i], outs[i]));
} else { } else {
VLOG(30) << "don't send no-initialied variable: " << ins[i]; VLOG(3) << "don't send no-initialied variable: " << ins[i];
} }
} }
for (size_t i = 0; i < rets.size(); i++) { for (size_t i = 0; i < rets.size(); i++) {
......
...@@ -47,7 +47,7 @@ class RecvOp : public framework::OperatorBase { ...@@ -47,7 +47,7 @@ class RecvOp : public framework::OperatorBase {
std::vector<distributed::VarHandlePtr> rets; std::vector<distributed::VarHandlePtr> rets;
for (size_t i = 0; i < outs.size(); i++) { for (size_t i = 0; i < outs.size(); i++) {
VLOG(30) << "getting " << outs[i] << " from " << epmap[i]; VLOG(3) << "getting " << outs[i] << " from " << epmap[i];
rets.push_back(rpc_client->AsyncGetVar(epmap[i], ctx, scope, outs[i])); rets.push_back(rpc_client->AsyncGetVar(epmap[i], ctx, scope, outs[i]));
} }
if (sync_mode) { if (sync_mode) {
......
...@@ -42,12 +42,12 @@ class SendBarrierOp : public framework::OperatorBase { ...@@ -42,12 +42,12 @@ class SendBarrierOp : public framework::OperatorBase {
distributed::RPCClient::GetInstance<RPCCLIENT_T>( distributed::RPCClient::GetInstance<RPCCLIENT_T>(
Attr<int>("trainer_id")); Attr<int>("trainer_id"));
VLOG(30) << "SendBarrierOp sync"; VLOG(3) << "SendBarrierOp sync";
// need to wait before sending send_barrier message // need to wait before sending send_barrier message
PADDLE_ENFORCE(rpc_client->Wait(), "internal error in RPCClient"); PADDLE_ENFORCE(rpc_client->Wait(), "internal error in RPCClient");
for (auto& ep : eps) { for (auto& ep : eps) {
VLOG(30) << "send barrier, ep: " << ep; VLOG(3) << "send barrier, ep: " << ep;
rpc_client->AsyncSendBatchBarrier(ep); rpc_client->AsyncSendBatchBarrier(ep);
} }
PADDLE_ENFORCE(rpc_client->Wait(), "internal error in RPCClient"); PADDLE_ENFORCE(rpc_client->Wait(), "internal error in RPCClient");
......
...@@ -50,10 +50,10 @@ class SendOp : public framework::OperatorBase { ...@@ -50,10 +50,10 @@ class SendOp : public framework::OperatorBase {
std::vector<distributed::VarHandlePtr> rets; std::vector<distributed::VarHandlePtr> rets;
for (size_t i = 0; i < ins.size(); i++) { for (size_t i = 0; i < ins.size(); i++) {
if (NeedSend(scope, ins[i])) { if (NeedSend(scope, ins[i])) {
VLOG(30) << "sending " << ins[i] << " to " << epmap[i]; VLOG(3) << "sending " << ins[i] << " to " << epmap[i];
rets.push_back(rpc_client->AsyncSendVar(epmap[i], ctx, scope, ins[i])); rets.push_back(rpc_client->AsyncSendVar(epmap[i], ctx, scope, ins[i]));
} else { } else {
VLOG(30) << "don't send no-initialied variable: " << ins[i]; VLOG(3) << "don't send no-initialied variable: " << ins[i];
} }
} }
if (sync_send) { if (sync_send) {
......
...@@ -120,7 +120,7 @@ void AddOp(const std::string &type, const f::VariableNameMap &inputs, ...@@ -120,7 +120,7 @@ void AddOp(const std::string &type, const f::VariableNameMap &inputs,
void StartServerNet(bool is_sparse, std::atomic<bool> *initialized) { void StartServerNet(bool is_sparse, std::atomic<bool> *initialized) {
f::Scope scope; f::Scope scope;
p::CPUPlace place; p::CPUPlace place;
VLOG(40) << "before init tensor"; VLOG(4) << "before init tensor";
if (is_sparse) { if (is_sparse) {
InitSelectedRowsInScope(place, &scope); InitSelectedRowsInScope(place, &scope);
} else { } else {
...@@ -146,7 +146,7 @@ void StartServerNet(bool is_sparse, std::atomic<bool> *initialized) { ...@@ -146,7 +146,7 @@ void StartServerNet(bool is_sparse, std::atomic<bool> *initialized) {
attrs.insert({"PrefetchBlock", prefetch_block}); attrs.insert({"PrefetchBlock", prefetch_block});
attrs.insert({"grad_to_block_id", std::vector<std::string>({""})}); attrs.insert({"grad_to_block_id", std::vector<std::string>({""})});
attrs.insert({"sync_mode", true}); attrs.insert({"sync_mode", true});
VLOG(40) << "before init op"; VLOG(4) << "before init op";
listen_and_serv_op = listen_and_serv_op =
f::OpRegistry::CreateOp("listen_and_serv", {{"X", {"x1"}}}, {}, attrs); f::OpRegistry::CreateOp("listen_and_serv", {{"X", {"x1"}}}, {}, attrs);
*initialized = true; *initialized = true;
......
...@@ -32,7 +32,7 @@ class SplitByrefOpKernel : public framework::OpKernel<T> { ...@@ -32,7 +32,7 @@ class SplitByrefOpKernel : public framework::OpKernel<T> {
for (size_t i = 0; i < outs.size(); ++i) { for (size_t i = 0; i < outs.size(); ++i) {
// NOTE: no need to call mutable_data here to allocate memory. // NOTE: no need to call mutable_data here to allocate memory.
auto* out = outs[i]; auto* out = outs[i];
VLOG(30) << "spliting by ref: " << row_offset << " " << out->dims()[0]; VLOG(3) << "spliting by ref: " << row_offset << " " << out->dims()[0];
*out = in->Slice(row_offset, row_offset + out->dims()[0]); *out = in->Slice(row_offset, row_offset + out->dims()[0]);
row_offset += out->dims()[0]; row_offset += out->dims()[0];
} }
......
...@@ -44,7 +44,7 @@ class SplitIdsOpKernel : public framework::OpKernel<T> { ...@@ -44,7 +44,7 @@ class SplitIdsOpKernel : public framework::OpKernel<T> {
for (size_t i = 0; i < ids_tensors.size(); ++i) { for (size_t i = 0; i < ids_tensors.size(); ++i) {
batch_size += ids_tensors[i]->dims()[0]; batch_size += ids_tensors[i]->dims()[0];
} }
VLOG(40) << "Get Total BatchSize is: " << batch_size; VLOG(4) << "Get Total BatchSize is: " << batch_size;
std::vector<T> all_ids(batch_size); std::vector<T> all_ids(batch_size);
int offset = 0; int offset = 0;
......
...@@ -30,9 +30,9 @@ class LoDRankTableOp : public framework::OperatorBase { ...@@ -30,9 +30,9 @@ class LoDRankTableOp : public framework::OperatorBase {
auto x = scope.FindVar(Input("X"))->Get<framework::LoDTensor>(); auto x = scope.FindVar(Input("X"))->Get<framework::LoDTensor>();
auto *out = auto *out =
scope.FindVar(Output("Out"))->GetMutable<framework::LoDRankTable>(); scope.FindVar(Output("Out"))->GetMutable<framework::LoDRankTable>();
VLOG(100) << "Level = " << static_cast<size_t>(Attr<int>("level")); VLOG(10) << "Level = " << static_cast<size_t>(Attr<int>("level"));
out->Reset(x.lod(), static_cast<size_t>(Attr<int>("level"))); out->Reset(x.lod(), static_cast<size_t>(Attr<int>("level")));
VLOG(100) << Input("X") << "'s lod information is " << *out; VLOG(10) << Input("X") << "'s lod information is " << *out;
} }
}; };
......
...@@ -134,12 +134,12 @@ class LookupTableOpGradVarTypeInference : public framework::VarTypeInference { ...@@ -134,12 +134,12 @@ class LookupTableOpGradVarTypeInference : public framework::VarTypeInference {
auto attr = op_desc.GetAttr("is_sparse"); auto attr = op_desc.GetAttr("is_sparse");
bool is_sparse = boost::get<bool>(attr); bool is_sparse = boost::get<bool>(attr);
if (is_sparse) { if (is_sparse) {
VLOG(30) << "lookup_table_grad op " << framework::GradVarName("W") VLOG(3) << "lookup_table_grad op " << framework::GradVarName("W")
<< " is set to SelectedRows"; << " is set to SelectedRows";
block->Var(out_var_name) block->Var(out_var_name)
->SetType(framework::proto::VarType::SELECTED_ROWS); ->SetType(framework::proto::VarType::SELECTED_ROWS);
} else { } else {
VLOG(30) << "lookup_table_grad op " << framework::GradVarName("W") VLOG(3) << "lookup_table_grad op " << framework::GradVarName("W")
<< " is set to LoDTensor"; << " is set to LoDTensor";
block->Var(out_var_name)->SetType(framework::proto::VarType::LOD_TENSOR); block->Var(out_var_name)->SetType(framework::proto::VarType::LOD_TENSOR);
} }
......
...@@ -96,7 +96,7 @@ void TestAndBench(const int n, std::function<void(const int, const T*, T*)> tgt, ...@@ -96,7 +96,7 @@ void TestAndBench(const int n, std::function<void(const int, const T*, T*)> tgt,
} }
auto et = GetCurrentUS(); auto et = GetCurrentUS();
VLOG(30) << "Vec size " << n << ": refer takes: " << (et - mt) / repeat VLOG(3) << "Vec size " << n << ": refer takes: " << (et - mt) / repeat
<< " us, tgt takes: " << (mt - st) / repeat; << " us, tgt takes: " << (mt - st) / repeat;
for (int i = 0; i < n; ++i) { for (int i = 0; i < n; ++i) {
EXPECT_NEAR(ytgt_data[i], yref_data[i], 1e-3); EXPECT_NEAR(ytgt_data[i], yref_data[i], 1e-3);
......
...@@ -86,7 +86,7 @@ TEST(JitKernel, vrelu) { ...@@ -86,7 +86,7 @@ TEST(JitKernel, vrelu) {
vrelu_intri8(d, x_data, zref_data); vrelu_intri8(d, x_data, zref_data);
} }
auto si1 = GetCurrentUS(); auto si1 = GetCurrentUS();
VLOG(30) << "Vec size 8 intr takes: " << (si1 - si0) / repeat << " us"; VLOG(3) << "Vec size 8 intr takes: " << (si1 - si0) / repeat << " us";
} }
#endif #endif
auto ttgts = GetCurrentUS(); auto ttgts = GetCurrentUS();
...@@ -94,8 +94,7 @@ TEST(JitKernel, vrelu) { ...@@ -94,8 +94,7 @@ TEST(JitKernel, vrelu) {
ker->Compute(x_data, ztgt_data, d); ker->Compute(x_data, ztgt_data, d);
} }
auto ttgte = GetCurrentUS(); auto ttgte = GetCurrentUS();
VLOG(30) << "Vec size " << d VLOG(3) << "Vec size " << d << ": refer takes: " << (trefe - trefs) / repeat
<< ": refer takes: " << (trefe - trefs) / repeat
<< " us, tgt takes: " << (ttgte - ttgts) / repeat << " us"; << " us, tgt takes: " << (ttgte - ttgts) / repeat << " us";
for (int i = 0; i < d; ++i) { for (int i = 0; i < d; ++i) {
EXPECT_NEAR(ztgt_data[i], zref_data[i], 1e-3); EXPECT_NEAR(ztgt_data[i], zref_data[i], 1e-3);
...@@ -127,8 +126,7 @@ TEST(JitKernel, vaddbias) { ...@@ -127,8 +126,7 @@ TEST(JitKernel, vaddbias) {
} }
auto ttgte = GetCurrentUS(); auto ttgte = GetCurrentUS();
VLOG(30) << "Vec size " << d VLOG(3) << "Vec size " << d << ": refer takes: " << (trefe - trefs) / repeat
<< ": refer takes: " << (trefe - trefs) / repeat
<< " us, tgt takes: " << (ttgte - ttgts) / repeat << " us"; << " us, tgt takes: " << (ttgte - ttgts) / repeat << " us";
for (int i = 0; i < d; ++i) { for (int i = 0; i < d; ++i) {
EXPECT_NEAR(ztgt_data[i], zref_data[i], 1e-3); EXPECT_NEAR(ztgt_data[i], zref_data[i], 1e-3);
...@@ -175,13 +173,13 @@ TEST(JitKernel, vexp) { ...@@ -175,13 +173,13 @@ TEST(JitKernel, vexp) {
} }
auto ttgte = GetCurrentUS(); auto ttgte = GetCurrentUS();
VLOG(30) << "Vec size " << d VLOG(3) << "Vec size " << d << ": refer takes: " << (trefe - trefs) / repeat
<< ": refer takes: " << (trefe - trefs) / repeat
#ifdef PADDLE_WITH_MKLML #ifdef PADDLE_WITH_MKLML
<< " us, mkl takes: " << (tmkle - tmkls) / repeat << " us, " << " us, mkl takes: " << (tmkle - tmkls) / repeat << " us, "
#else #else
<< " us, " << " us, "
#endif #endif
<< "tgt takes: " << (ttgte - ttgts) / repeat << " us"; << "tgt takes: " << (ttgte - ttgts) / repeat << " us";
for (int i = 0; i < d; ++i) { for (int i = 0; i < d; ++i) {
EXPECT_NEAR(ztgt_data[i], zref_data[i], 1e-3); EXPECT_NEAR(ztgt_data[i], zref_data[i], 1e-3);
...@@ -235,8 +233,7 @@ TEST(JitKernel, vsigmoid) { ...@@ -235,8 +233,7 @@ TEST(JitKernel, vsigmoid) {
} }
auto ttgte = GetCurrentUS(); auto ttgte = GetCurrentUS();
VLOG(30) << "Vec size " << d VLOG(3) << "Vec size " << d << ": refer takes: " << (trefe - trefs) / repeat
<< ": refer takes: " << (trefe - trefs) / repeat
<< " us, better(jit exp) takes: " << (tmkle - tmkls) / repeat << " us, better(jit exp) takes: " << (tmkle - tmkls) / repeat
<< " us, tgt takes: " << (ttgte - ttgts) / repeat << " us"; << " us, tgt takes: " << (ttgte - ttgts) / repeat << " us";
for (int i = 0; i < d; ++i) { for (int i = 0; i < d; ++i) {
...@@ -296,8 +293,7 @@ TEST(JitKernel, vtanh) { ...@@ -296,8 +293,7 @@ TEST(JitKernel, vtanh) {
} }
auto ttgte = GetCurrentUS(); auto ttgte = GetCurrentUS();
VLOG(30) << "Vec size " << d VLOG(3) << "Vec size " << d << ": refer takes: " << (trefe - trefs) / repeat
<< ": refer takes: " << (trefe - trefs) / repeat
<< " us, better(jit exp) takes: " << (tmkle - tmkls) / repeat << " us, better(jit exp) takes: " << (tmkle - tmkls) / repeat
<< " us, tgt takes: " << (ttgte - ttgts) / repeat << " us"; << " us, tgt takes: " << (ttgte - ttgts) / repeat << " us";
for (int i = 0; i < d; ++i) { for (int i = 0; i < d; ++i) {
...@@ -397,8 +393,7 @@ TEST(JitKernel, lstm) { ...@@ -397,8 +393,7 @@ TEST(JitKernel, lstm) {
ker->ComputeCtHt(&step, &attr); ker->ComputeCtHt(&step, &attr);
} }
auto ttgte = GetCurrentUS(); auto ttgte = GetCurrentUS();
VLOG(30) << "Vec size " << d VLOG(3) << "Vec size " << d << ": refer takes: " << (trefe - trefs) / repeat
<< ": refer takes: " << (trefe - trefs) / repeat
<< " us, better(jit) takes: " << (tmkle - tmkls) / repeat << " us, better(jit) takes: " << (tmkle - tmkls) / repeat
<< " us, tgt takes: " << (ttgte - ttgts) / repeat << " us"; << " us, tgt takes: " << (ttgte - ttgts) / repeat << " us";
} }
...@@ -473,7 +468,7 @@ TEST(JitKernel, vscal) { ...@@ -473,7 +468,7 @@ TEST(JitKernel, vscal) {
vscal_inp_intri8(d, a, y_data); vscal_inp_intri8(d, a, y_data);
} }
auto si3 = GetCurrentUS(); auto si3 = GetCurrentUS();
VLOG(30) << "Vec size 8 intr takes: " << (si1 - si0) / repeat VLOG(3) << "Vec size 8 intr takes: " << (si1 - si0) / repeat
<< " us, inplace: " << (si3 - si2) / repeat << " us"; << " us, inplace: " << (si3 - si2) / repeat << " us";
} }
#endif #endif
...@@ -488,18 +483,15 @@ TEST(JitKernel, vscal) { ...@@ -488,18 +483,15 @@ TEST(JitKernel, vscal) {
ker->Compute(&a, y_data, y_data, d); ker->Compute(&a, y_data, y_data, d);
} }
auto ttgte1 = GetCurrentUS(); auto ttgte1 = GetCurrentUS();
VLOG(30) << "Vec size " << d VLOG(3) << "Vec size " << d << ": refer takes: " << (trefe - trefs) / repeat
<< ": refer takes: " << (trefe - trefs) / repeat
<< " us, inplace takes: " << (trefe1 - trefs1) / repeat << " us, inplace takes: " << (trefe1 - trefs1) / repeat
#ifdef PADDLE_WITH_MKLML #ifdef PADDLE_WITH_MKLML
<< " us, mkl inplace takes: " << (tmkle - tmkls) / repeat << " us, mkl inplace takes: " << (tmkle - tmkls) / repeat << " us, "
<< " us, "
#else #else
<< " us, " << " us, "
#endif #endif
<< "tgt takes: " << (ttgte - ttgts) / repeat << "tgt takes: " << (ttgte - ttgts) / repeat
<< "us, tgt inplace takes: " << (ttgte1 - ttgts1) / repeat << "us, tgt inplace takes: " << (ttgte1 - ttgts1) / repeat << " us";
<< " us";
for (int i = 0; i < d; ++i) { for (int i = 0; i < d; ++i) {
EXPECT_NEAR(ztgt_data[i], zref_data[i], 1e-3); EXPECT_NEAR(ztgt_data[i], zref_data[i], 1e-3);
} }
...@@ -557,7 +549,7 @@ TEST(JitKernel, vmul) { ...@@ -557,7 +549,7 @@ TEST(JitKernel, vmul) {
vmul_intri8(d, x_data, y_data, zref_data); vmul_intri8(d, x_data, y_data, zref_data);
} }
auto si1 = GetCurrentUS(); auto si1 = GetCurrentUS();
VLOG(30) << "Vec size 8 intr takes: " << (si1 - si0) / repeat; VLOG(3) << "Vec size 8 intr takes: " << (si1 - si0) / repeat;
} }
#endif #endif
...@@ -567,8 +559,7 @@ TEST(JitKernel, vmul) { ...@@ -567,8 +559,7 @@ TEST(JitKernel, vmul) {
} }
auto ttgte = GetCurrentUS(); auto ttgte = GetCurrentUS();
VLOG(30) << "Vec size " << d VLOG(3) << "Vec size " << d << ": refer takes: " << (trefe - trefs) / repeat
<< ": refer takes: " << (trefe - trefs) / repeat
#ifdef PADDLE_WITH_MKLML #ifdef PADDLE_WITH_MKLML
<< " us, mkl takes: " << (tmkle - tmkls) / repeat << " us, " << " us, mkl takes: " << (tmkle - tmkls) / repeat << " us, "
#else #else
...@@ -632,7 +623,7 @@ TEST(JitKernel, vadd) { ...@@ -632,7 +623,7 @@ TEST(JitKernel, vadd) {
vadd_intri8(d, x_data, y_data, zref_data); vadd_intri8(d, x_data, y_data, zref_data);
} }
auto si1 = GetCurrentUS(); auto si1 = GetCurrentUS();
VLOG(30) << "Vec size 8 intr takes: " << (si1 - si0) / repeat; VLOG(3) << "Vec size 8 intr takes: " << (si1 - si0) / repeat;
} }
#endif #endif
...@@ -642,8 +633,7 @@ TEST(JitKernel, vadd) { ...@@ -642,8 +633,7 @@ TEST(JitKernel, vadd) {
} }
auto ttgte = GetCurrentUS(); auto ttgte = GetCurrentUS();
VLOG(30) << "Vec size " << d VLOG(3) << "Vec size " << d << ": refer takes: " << (trefe - trefs) / repeat
<< ": refer takes: " << (trefe - trefs) / repeat
#ifdef PADDLE_WITH_MKLML #ifdef PADDLE_WITH_MKLML
<< " us, mkl takes: " << (tmkle - tmkls) / repeat << " us, " << " us, mkl takes: " << (tmkle - tmkls) / repeat << " us, "
#else #else
...@@ -699,8 +689,7 @@ TEST(JitKernel, vaddrelu) { ...@@ -699,8 +689,7 @@ TEST(JitKernel, vaddrelu) {
ker->Compute(x_data, y_data, ztgt_data, d); ker->Compute(x_data, y_data, ztgt_data, d);
} }
auto ttgte = GetCurrentUS(); auto ttgte = GetCurrentUS();
VLOG(30) << "Vec size " << d VLOG(3) << "Vec size " << d << ": refer takes: " << (trefe - trefs) / repeat
<< ": refer takes: " << (trefe - trefs) / repeat
<< " us, better takes: " << (tmkle - tmkls) / repeat << " us, " << " us, better takes: " << (tmkle - tmkls) / repeat << " us, "
<< "tgt takes: " << (ttgte - ttgts) / repeat << " us"; << "tgt takes: " << (ttgte - ttgts) / repeat << " us";
for (int i = 0; i < d; ++i) { for (int i = 0; i < d; ++i) {
......
...@@ -270,7 +270,7 @@ struct MergeAdd<platform::CPUDeviceContext, T> { ...@@ -270,7 +270,7 @@ struct MergeAdd<platform::CPUDeviceContext, T> {
const std::vector<const framework::SelectedRows*>& inputs, const std::vector<const framework::SelectedRows*>& inputs,
framework::SelectedRows* output) { framework::SelectedRows* output) {
if (inputs.size() == 0) { if (inputs.size() == 0) {
VLOG(30) << "no input! return"; VLOG(3) << "no input! return";
return; return;
} }
const framework::SelectedRows* has_value_input = nullptr; const framework::SelectedRows* has_value_input = nullptr;
...@@ -281,7 +281,7 @@ struct MergeAdd<platform::CPUDeviceContext, T> { ...@@ -281,7 +281,7 @@ struct MergeAdd<platform::CPUDeviceContext, T> {
} }
} }
if (has_value_input == nullptr) { if (has_value_input == nullptr) {
VLOG(30) << "no input has value! just return" << std::endl; VLOG(3) << "no input has value! just return" << std::endl;
return; return;
} }
auto input_width = has_value_input->value().dims()[1]; auto input_width = has_value_input->value().dims()[1];
......
...@@ -314,7 +314,7 @@ struct MergeAdd<platform::CUDADeviceContext, T> { ...@@ -314,7 +314,7 @@ struct MergeAdd<platform::CUDADeviceContext, T> {
const std::vector<const framework::SelectedRows*>& inputs, const std::vector<const framework::SelectedRows*>& inputs,
framework::SelectedRows* output) { framework::SelectedRows* output) {
if (inputs.size() == 0) { if (inputs.size() == 0) {
VLOG(30) << "no input! return"; VLOG(3) << "no input! return";
return; return;
} }
const framework::SelectedRows* has_value_input = nullptr; const framework::SelectedRows* has_value_input = nullptr;
...@@ -325,7 +325,7 @@ struct MergeAdd<platform::CUDADeviceContext, T> { ...@@ -325,7 +325,7 @@ struct MergeAdd<platform::CUDADeviceContext, T> {
} }
} }
if (has_value_input == nullptr) { if (has_value_input == nullptr) {
VLOG(30) << "no input has value! just return" << std::endl; VLOG(3) << "no input has value! just return" << std::endl;
return; return;
} }
auto input_width = has_value_input->value().dims()[1]; auto input_width = has_value_input->value().dims()[1];
......
...@@ -38,7 +38,7 @@ class MulOp : public framework::OperatorWithKernel { ...@@ -38,7 +38,7 @@ class MulOp : public framework::OperatorWithKernel {
int x_num_col_dims = ctx->Attrs().Get<int>("x_num_col_dims"); int x_num_col_dims = ctx->Attrs().Get<int>("x_num_col_dims");
int y_num_col_dims = ctx->Attrs().Get<int>("y_num_col_dims"); int y_num_col_dims = ctx->Attrs().Get<int>("y_num_col_dims");
VLOG(30) << "mul operator x.shape=" << x_dims << " y.shape=" << y_dims VLOG(3) << "mul operator x.shape=" << x_dims << " y.shape=" << y_dims
<< " x_num_col_dims=" << x_num_col_dims << " x_num_col_dims=" << x_num_col_dims
<< " y_num_col_dims=" << y_num_col_dims; << " y_num_col_dims=" << y_num_col_dims;
......
...@@ -63,14 +63,14 @@ class NCCLAllReduceKernel : public framework::OpKernel<T> { ...@@ -63,14 +63,14 @@ class NCCLAllReduceKernel : public framework::OpKernel<T> {
// device id // device id
int gpu_id = boost::get<platform::CUDAPlace>(ctx.GetPlace()).GetDeviceId(); int gpu_id = boost::get<platform::CUDAPlace>(ctx.GetPlace()).GetDeviceId();
int idx = comm->GetCommId(gpu_id); int idx = comm->GetCommId(gpu_id);
VLOG(30) << "gpu : " VLOG(3) << "gpu : "
<< " invoke allreduce. send " << x->numel() << " recv " << " invoke allreduce. send " << x->numel() << " recv "
<< out->numel(); << out->numel();
PADDLE_ENFORCE(platform::dynload::ncclAllReduce( PADDLE_ENFORCE(platform::dynload::ncclAllReduce(
x->data<T>(), out->mutable_data<T>(ctx.GetPlace()), out->numel(), x->data<T>(), out->mutable_data<T>(ctx.GetPlace()), out->numel(),
NCCLTypeWrapper<T>::type, reduction_op_, comm->comms().at(idx), NCCLTypeWrapper<T>::type, reduction_op_, comm->comms().at(idx),
ctx.cuda_device_context().stream())); ctx.cuda_device_context().stream()));
VLOG(30) << "gpu : " VLOG(3) << "gpu : "
<< " finished allreduce. send " << x->numel() << " recv " << " finished allreduce. send " << x->numel() << " recv "
<< out->numel(); << out->numel();
} }
...@@ -109,13 +109,13 @@ class NCCLReduceKernel : public framework::OpKernel<T> { ...@@ -109,13 +109,13 @@ class NCCLReduceKernel : public framework::OpKernel<T> {
} else { } else {
out->Resize(framework::make_ddim({0})); out->Resize(framework::make_ddim({0}));
} }
VLOG(30) << "gpu : " << gpu_id << " invoke reduce. send " << x->numel() VLOG(3) << "gpu : " << gpu_id << " invoke reduce. send " << x->numel()
<< " recv " << out->numel(); << " recv " << out->numel();
PADDLE_ENFORCE(platform::dynload::ncclReduce( PADDLE_ENFORCE(platform::dynload::ncclReduce(
x->data<T>(), recvbuffer, x->numel(), NCCLTypeWrapper<T>::type, x->data<T>(), recvbuffer, x->numel(), NCCLTypeWrapper<T>::type,
reduction_op_, root, comm->comms().at(idx), reduction_op_, root, comm->comms().at(idx),
ctx.cuda_device_context().stream())); ctx.cuda_device_context().stream()));
VLOG(30) << "gpu : " << gpu_id << " finished reduce. send " << x->numel() VLOG(3) << "gpu : " << gpu_id << " finished reduce. send " << x->numel()
<< " recv " << out->numel(); << " recv " << out->numel();
} }
}; };
...@@ -133,22 +133,21 @@ class NCCLBcastKernel : public framework::OpKernel<T> { ...@@ -133,22 +133,21 @@ class NCCLBcastKernel : public framework::OpKernel<T> {
int idx = comm->GetCommId(gpu_id); int idx = comm->GetCommId(gpu_id);
if (idx == root) { if (idx == root) {
auto* x = ctx.Input<LoDTensor>("X"); auto* x = ctx.Input<LoDTensor>("X");
VLOG(30) << "gpu : " << gpu_id << " invoke Bcast. send " << x->numel(); VLOG(3) << "gpu : " << gpu_id << " invoke Bcast. send " << x->numel();
PADDLE_ENFORCE(platform::dynload::ncclBcast( PADDLE_ENFORCE(platform::dynload::ncclBcast(
reinterpret_cast<void*>(const_cast<T*>(x->data<T>())), x->numel(), reinterpret_cast<void*>(const_cast<T*>(x->data<T>())), x->numel(),
NCCLTypeWrapper<T>::type, root, comm->comms().at(idx), NCCLTypeWrapper<T>::type, root, comm->comms().at(idx),
ctx.cuda_device_context().stream())); ctx.cuda_device_context().stream()));
VLOG(30) << "gpu : " << gpu_id << " finished Bcast."; VLOG(3) << "gpu : " << gpu_id << " finished Bcast.";
} else { } else {
auto* out = ctx.Output<LoDTensor>("Out"); auto* out = ctx.Output<LoDTensor>("Out");
VLOG(30) << "gpu : " << gpu_id << " invoke Bcast. recv buffer " VLOG(3) << "gpu : " << gpu_id << " invoke Bcast. recv buffer "
<< framework::product(out->dims()); << framework::product(out->dims());
PADDLE_ENFORCE(platform::dynload::ncclBcast( PADDLE_ENFORCE(platform::dynload::ncclBcast(
out->mutable_data<T>(ctx.GetPlace()), out->numel(), out->mutable_data<T>(ctx.GetPlace()), out->numel(),
NCCLTypeWrapper<T>::type, root, comm->comms().at(idx), NCCLTypeWrapper<T>::type, root, comm->comms().at(idx),
ctx.cuda_device_context().stream())); ctx.cuda_device_context().stream()));
VLOG(30) << "gpu : " << gpu_id << " finished Bcast. recv " VLOG(3) << "gpu : " << gpu_id << " finished Bcast. recv " << out->numel();
<< out->numel();
} }
} }
}; };
......
...@@ -86,9 +86,9 @@ class NCCLTester : public ::testing::Test { ...@@ -86,9 +86,9 @@ class NCCLTester : public ::testing::Test {
(*p_scopes).resize(gpu_list_.size()); (*p_scopes).resize(gpu_list_.size());
auto op = f::OpRegistry::CreateOp(*op1); auto op = f::OpRegistry::CreateOp(*op1);
VLOG(10) << "invoke NCCLInitOp."; VLOG(1) << "invoke NCCLInitOp.";
op->Run(g_scope_, cpu_place); op->Run(g_scope_, cpu_place);
VLOG(10) << "NCCLInitOp finished."; VLOG(1) << "NCCLInitOp finished.";
} }
int GetGPUData(int gpu_id) { return gpu_id + 42; } int GetGPUData(int gpu_id) { return gpu_id + 42; }
...@@ -109,7 +109,7 @@ class NCCLTester : public ::testing::Test { ...@@ -109,7 +109,7 @@ class NCCLTester : public ::testing::Test {
std::vector<T> send_vector(f::product(kDims), GetGPUData(gpu_id)); std::vector<T> send_vector(f::product(kDims), GetGPUData(gpu_id));
paddle::framework::TensorFromVector<T>(send_vector, *ctx, send_tensor); paddle::framework::TensorFromVector<T>(send_vector, *ctx, send_tensor);
VLOG(10) << "Send Tensor filled with elements " << send_tensor->numel(); VLOG(1) << "Send Tensor filled with elements " << send_tensor->numel();
} }
lk.unlock(); lk.unlock();
...@@ -119,11 +119,11 @@ class NCCLTester : public ::testing::Test { ...@@ -119,11 +119,11 @@ class NCCLTester : public ::testing::Test {
auto op = f::OpRegistry::CreateOp(*op1); auto op = f::OpRegistry::CreateOp(*op1);
VLOG(10) << "Device : " << gpu_id << " invoke " << op_desc.Type(); VLOG(1) << "Device : " << gpu_id << " invoke " << op_desc.Type();
VLOG(10) << " send_tensor : " << send_tensor->numel() VLOG(1) << " send_tensor : " << send_tensor->numel()
<< " recv_tensor : " << recv_tensor->numel(); << " recv_tensor : " << recv_tensor->numel();
op->Run(*scope, place); op->Run(*scope, place);
VLOG(10) << "Device : " << gpu_id << " finished " << op_desc.Type(); VLOG(1) << "Device : " << gpu_id << " finished " << op_desc.Type();
} }
public: public:
......
...@@ -230,13 +230,13 @@ class NCEOpGradVarTypeInference : public framework::VarTypeInference { ...@@ -230,13 +230,13 @@ class NCEOpGradVarTypeInference : public framework::VarTypeInference {
auto attr = op_desc.GetAttr("is_sparse"); auto attr = op_desc.GetAttr("is_sparse");
bool is_sparse = boost::get<bool>(attr); bool is_sparse = boost::get<bool>(attr);
if (is_sparse) { if (is_sparse) {
VLOG(30) << "nce_op_grad op " << weight_grad << " and " << bias_grad VLOG(3) << "nce_op_grad op " << weight_grad << " and " << bias_grad
<< " is set to SelectedRows"; << " is set to SelectedRows";
block->Var(weight_grad) block->Var(weight_grad)
->SetType(framework::proto::VarType::SELECTED_ROWS); ->SetType(framework::proto::VarType::SELECTED_ROWS);
block->Var(bias_grad)->SetType(framework::proto::VarType::SELECTED_ROWS); block->Var(bias_grad)->SetType(framework::proto::VarType::SELECTED_ROWS);
} else { } else {
VLOG(30) << "nce_op_grad op " << weight_grad << " and " << bias_grad VLOG(3) << "nce_op_grad op " << weight_grad << " and " << bias_grad
<< " is set to LoDTensor"; << " is set to LoDTensor";
block->Var(weight_grad)->SetType(framework::proto::VarType::LOD_TENSOR); block->Var(weight_grad)->SetType(framework::proto::VarType::LOD_TENSOR);
block->Var(bias_grad)->SetType(framework::proto::VarType::LOD_TENSOR); block->Var(bias_grad)->SetType(framework::proto::VarType::LOD_TENSOR);
......
...@@ -297,7 +297,7 @@ class AdamOpKernel : public framework::OpKernel<T> { ...@@ -297,7 +297,7 @@ class AdamOpKernel : public framework::OpKernel<T> {
auto& grad = auto& grad =
Ref(ctx.Input<framework::SelectedRows>("Grad"), "Must set Grad"); Ref(ctx.Input<framework::SelectedRows>("Grad"), "Must set Grad");
if (grad.rows().size() == 0) { if (grad.rows().size() == 0) {
VLOG(30) << "grad row size is 0!!"; VLOG(3) << "grad row size is 0!!";
return; return;
} }
......
...@@ -346,7 +346,7 @@ class MomentumOpKernel : public framework::OpKernel<T> { ...@@ -346,7 +346,7 @@ class MomentumOpKernel : public framework::OpKernel<T> {
// sparse update maybe empty. // sparse update maybe empty.
if (grad->rows().size() == 0) { if (grad->rows().size() == 0) {
VLOG(30) << "Grad SelectedRows contains no data!"; VLOG(3) << "Grad SelectedRows contains no data!";
return; return;
} }
auto* merged_grad = const_cast<framework::Scope&>(ctx.scope()) auto* merged_grad = const_cast<framework::Scope&>(ctx.scope())
......
...@@ -98,7 +98,7 @@ class SGDOpKernel : public framework::OpKernel<T> { ...@@ -98,7 +98,7 @@ class SGDOpKernel : public framework::OpKernel<T> {
auto param_row_width = param.value().dims()[1]; auto param_row_width = param.value().dims()[1];
auto grad_row_width = grad.value().dims()[1]; auto grad_row_width = grad.value().dims()[1];
VLOG(40) << " param rows: " << param.rows().size() VLOG(4) << " param rows: " << param.rows().size()
<< " param memory rows: " << param.value().dims()[0] << " param memory rows: " << param.value().dims()[0]
<< " grad rows: " << grad.rows().size() << " grad rows: " << grad.rows().size()
<< " grad memory rows: " << grad.value().dims()[0]; << " grad memory rows: " << grad.value().dims()[0];
......
...@@ -155,7 +155,7 @@ class RandomCropKernel : public framework::OpKernel<T> { ...@@ -155,7 +155,7 @@ class RandomCropKernel : public framework::OpKernel<T> {
seed = *cpu_seed.data<int64_t>(); seed = *cpu_seed.data<int64_t>();
} }
} else { } else {
VLOG(50) << "WARNING: The input 'Seed' is not initialized, use attribute " VLOG(5) << "WARNING: The input 'Seed' is not initialized, use attribute "
"'startup_seed' instead."; "'startup_seed' instead.";
seed = ctx.Attr<int>("startup_seed"); seed = ctx.Attr<int>("startup_seed");
} }
......
...@@ -42,7 +42,7 @@ class BlockingQueue { ...@@ -42,7 +42,7 @@ class BlockingQueue {
std::unique_lock<std::mutex> lock(mutex_); std::unique_lock<std::mutex> lock(mutex_);
send_cv_.wait(lock, [&] { return queue_.size() < capacity_ || closed_; }); send_cv_.wait(lock, [&] { return queue_.size() < capacity_ || closed_; });
if (closed_) { if (closed_) {
VLOG(50) VLOG(5)
<< "WARNING: Sending an element to a closed reader::BlokcingQueue."; << "WARNING: Sending an element to a closed reader::BlokcingQueue.";
return false; return false;
} }
...@@ -56,7 +56,7 @@ class BlockingQueue { ...@@ -56,7 +56,7 @@ class BlockingQueue {
std::unique_lock<std::mutex> lock(mutex_); std::unique_lock<std::mutex> lock(mutex_);
send_cv_.wait(lock, [&] { return queue_.size() < capacity_ || closed_; }); send_cv_.wait(lock, [&] { return queue_.size() < capacity_ || closed_; });
if (closed_) { if (closed_) {
VLOG(50) VLOG(5)
<< "WARNING: Sending an element to a closed reader::BlokcingQueue."; << "WARNING: Sending an element to a closed reader::BlokcingQueue.";
return false; return false;
} }
......
...@@ -26,7 +26,7 @@ class ShuffleReader : public framework::DecoratedReader { ...@@ -26,7 +26,7 @@ class ShuffleReader : public framework::DecoratedReader {
ShuffleReader(const std::shared_ptr<ReaderBase>& reader, size_t buffer_size, ShuffleReader(const std::shared_ptr<ReaderBase>& reader, size_t buffer_size,
size_t seed = 0) size_t seed = 0)
: DecoratedReader(reader), buffer_size_(buffer_size), seed_(seed) { : DecoratedReader(reader), buffer_size_(buffer_size), seed_(seed) {
VLOG(100) << "Create shuffle reader of " << reader_; VLOG(10) << "Create shuffle reader of " << reader_;
if (seed_ == 0) { if (seed_ == 0) {
std::random_device device; std::random_device device;
seed_ = device(); seed_ = device();
...@@ -37,7 +37,7 @@ class ShuffleReader : public framework::DecoratedReader { ...@@ -37,7 +37,7 @@ class ShuffleReader : public framework::DecoratedReader {
void ReadNextImpl(std::vector<framework::LoDTensor>* out) override { void ReadNextImpl(std::vector<framework::LoDTensor>* out) override {
out->clear(); out->clear();
if (iteration_pos_ >= buffer_.size()) { if (iteration_pos_ >= buffer_.size()) {
VLOG(100) << "Resetting shuffle buffer"; VLOG(10) << "Resetting shuffle buffer";
ReloadBuffer(); ReloadBuffer();
if (buffer_.empty()) { if (buffer_.empty()) {
return; return;
...@@ -73,7 +73,7 @@ class ShuffleReader : public framework::DecoratedReader { ...@@ -73,7 +73,7 @@ class ShuffleReader : public framework::DecoratedReader {
std::mt19937 g(seed_); std::mt19937 g(seed_);
std::shuffle(buffer_.begin(), buffer_.end(), g); std::shuffle(buffer_.begin(), buffer_.end(), g);
seed_ = g(); // update seed_; seed_ = g(); // update seed_;
VLOG(100) << "random buffer size = " << buffer_.size(); VLOG(10) << "random buffer size = " << buffer_.size();
} }
size_t buffer_size_; size_t buffer_size_;
......
...@@ -160,7 +160,7 @@ class RecurrentBase : public framework::OperatorBase { ...@@ -160,7 +160,7 @@ class RecurrentBase : public framework::OperatorBase {
Callback callback) { Callback callback) {
PADDLE_ENFORCE_EQ(src_vars.size(), dst_vars.size()); PADDLE_ENFORCE_EQ(src_vars.size(), dst_vars.size());
for (size_t i = 0; i < dst_vars.size(); ++i) { for (size_t i = 0; i < dst_vars.size(); ++i) {
VLOG(100) << "Link " << src_vars[i] << " to " << dst_vars[i]; VLOG(10) << "Link " << src_vars[i] << " to " << dst_vars[i];
AccessTensor(src_scope, src_vars[i], dst_scope, dst_vars[i], callback); AccessTensor(src_scope, src_vars[i], dst_scope, dst_vars[i], callback);
} }
} }
...@@ -176,7 +176,7 @@ class RecurrentBase : public framework::OperatorBase { ...@@ -176,7 +176,7 @@ class RecurrentBase : public framework::OperatorBase {
Callback callback) { Callback callback) {
PADDLE_ENFORCE_EQ(src_vars.size(), dst_vars.size()); PADDLE_ENFORCE_EQ(src_vars.size(), dst_vars.size());
for (size_t i = 0; i < dst_vars.size(); ++i) { for (size_t i = 0; i < dst_vars.size(); ++i) {
VLOG(100) << "Link " << src_vars[i] << " to " << dst_vars[i]; VLOG(10) << "Link " << src_vars[i] << " to " << dst_vars[i];
AccessTensor(src_scope, src_vars[i], dst_scope, dst_vars[i], callback); AccessTensor(src_scope, src_vars[i], dst_scope, dst_vars[i], callback);
} }
} }
...@@ -230,7 +230,7 @@ class RecurrentOp : public RecurrentBase { ...@@ -230,7 +230,7 @@ class RecurrentOp : public RecurrentBase {
void RunImpl(const framework::Scope &scope, void RunImpl(const framework::Scope &scope,
const platform::Place &place) const override { const platform::Place &place) const override {
auto seq_len = static_cast<size_t>(this->GetSequenceLength(scope)); auto seq_len = static_cast<size_t>(this->GetSequenceLength(scope));
VLOG(30) << "Static RNN input sequence length = " << seq_len; VLOG(3) << "Static RNN input sequence length = " << seq_len;
StepScopes scopes = CreateStepScopes(scope, seq_len); StepScopes scopes = CreateStepScopes(scope, seq_len);
auto reverse = Attr<bool>(kReverse); auto reverse = Attr<bool>(kReverse);
...@@ -241,7 +241,7 @@ class RecurrentOp : public RecurrentBase { ...@@ -241,7 +241,7 @@ class RecurrentOp : public RecurrentBase {
for (size_t i = 0; i < seq_len; ++i) { for (size_t i = 0; i < seq_len; ++i) {
size_t seq_offset = reverse ? seq_len - i - 1 : i; size_t seq_offset = reverse ? seq_len - i - 1 : i;
VLOG(30) << "Recurrent operate at the time step " << seq_offset; VLOG(3) << "Recurrent operate at the time step " << seq_offset;
auto &cur_scope = scopes.CurScope(); auto &cur_scope = scopes.CurScope();
...@@ -334,7 +334,7 @@ class RecurrentGradOp : public RecurrentBase { ...@@ -334,7 +334,7 @@ class RecurrentGradOp : public RecurrentBase {
for (size_t step_id = 0; step_id < seq_len; ++step_id) { for (size_t step_id = 0; step_id < seq_len; ++step_id) {
size_t seq_offset = reverse ? step_id : seq_len - step_id - 1; size_t seq_offset = reverse ? step_id : seq_len - step_id - 1;
VLOG(30) << "Recurrent backward operate at the time step " << seq_offset; VLOG(3) << "Recurrent backward operate at the time step " << seq_offset;
auto &cur_scope = scopes.CurScope(); auto &cur_scope = scopes.CurScope();
// Link outside::output_grads --> inside::output_grads // Link outside::output_grads --> inside::output_grads
// inside::output_grad = outside::output_grad[seq_offset:seq_offset+1] // inside::output_grad = outside::output_grad[seq_offset:seq_offset+1]
...@@ -348,11 +348,11 @@ class RecurrentGradOp : public RecurrentBase { ...@@ -348,11 +348,11 @@ class RecurrentGradOp : public RecurrentBase {
}); });
auto og_set = List2Set(Inputs(kOutputGrads)); auto og_set = List2Set(Inputs(kOutputGrads));
if (VLOG_IS_ON(100)) { if (VLOG_IS_ON(10)) {
std::ostringstream sout; std::ostringstream sout;
std::copy(og_set.begin(), og_set.end(), std::copy(og_set.begin(), og_set.end(),
std::ostream_iterator<std::string>(sout, ",")); std::ostream_iterator<std::string>(sout, ","));
VLOG(100) << " RNN output gradients = [" << sout.str() << "]"; VLOG(10) << " RNN output gradients = [" << sout.str() << "]";
} }
// Link states // Link states
...@@ -374,7 +374,7 @@ class RecurrentGradOp : public RecurrentBase { ...@@ -374,7 +374,7 @@ class RecurrentGradOp : public RecurrentBase {
auto &ex_tensor = auto &ex_tensor =
ex_scope.FindVar(ex_grad)->Get<framework::LoDTensor>(); ex_scope.FindVar(ex_grad)->Get<framework::LoDTensor>();
VLOG(100) << " RNN link " << cur_grad << " from " << ex_grad; VLOG(10) << " RNN link " << cur_grad << " from " << ex_grad;
auto *cur_grad_var = cur_scope.Var(cur_grad); auto *cur_grad_var = cur_scope.Var(cur_grad);
auto cur_grad_tensor = auto cur_grad_tensor =
cur_grad_var->GetMutable<framework::LoDTensor>(); cur_grad_var->GetMutable<framework::LoDTensor>();
...@@ -382,12 +382,12 @@ class RecurrentGradOp : public RecurrentBase { ...@@ -382,12 +382,12 @@ class RecurrentGradOp : public RecurrentBase {
} }
} }
VLOG(50) << "Recurrent memory linking finished "; VLOG(5) << "Recurrent memory linking finished ";
// Run step block with cur_scope // Run step block with cur_scope
executor.Run(*program, &cur_scope, block->ID(), executor.Run(*program, &cur_scope, block->ID(),
false /*create_local_scope*/); false /*create_local_scope*/);
VLOG(50) << "executor.Run finished "; VLOG(5) << "executor.Run finished ";
auto local_var_names = LocalVarNames(cur_scope); auto local_var_names = LocalVarNames(cur_scope);
...@@ -436,7 +436,7 @@ class RecurrentGradOp : public RecurrentBase { ...@@ -436,7 +436,7 @@ class RecurrentGradOp : public RecurrentBase {
cur_scope.Rename(new_inside_name, inside_grad_name); cur_scope.Rename(new_inside_name, inside_grad_name);
} }
} }
VLOG(50) << "Accumulate Parameter finished "; VLOG(5) << "Accumulate Parameter finished ";
// Copy input gradient from inside to outside // Copy input gradient from inside to outside
// outside::input_grad[seq_offset: seq_offset + 1] = inside::input_grad // outside::input_grad[seq_offset: seq_offset + 1] = inside::input_grad
...@@ -455,7 +455,7 @@ class RecurrentGradOp : public RecurrentBase { ...@@ -455,7 +455,7 @@ class RecurrentGradOp : public RecurrentBase {
auto dst = outside->Slice(seq_offset, seq_offset + 1); auto dst = outside->Slice(seq_offset, seq_offset + 1);
framework::TensorCopy(inside, place, dev_ctx, &dst); framework::TensorCopy(inside, place, dev_ctx, &dst);
}); });
VLOG(50) << "Link outside gradient finished "; VLOG(5) << "Link outside gradient finished ";
if (step_id + 1 == seq_len) { // at_end if (step_id + 1 == seq_len) { // at_end
// copy initialize states gradient from inside to outside // copy initialize states gradient from inside to outside
...@@ -468,7 +468,7 @@ class RecurrentGradOp : public RecurrentBase { ...@@ -468,7 +468,7 @@ class RecurrentGradOp : public RecurrentBase {
outside->mutable_data(place, inside.type()); outside->mutable_data(place, inside.type());
framework::TensorCopy(inside, place, dev_ctx, outside); framework::TensorCopy(inside, place, dev_ctx, outside);
}); });
VLOG(50) << "Link initialize state gradient finished "; VLOG(5) << "Link initialize state gradient finished ";
} }
scopes.Next(); scopes.Next();
} }
......
...@@ -93,7 +93,7 @@ class RNNMemoryHelperGradOp : public framework::OperatorBase { ...@@ -93,7 +93,7 @@ class RNNMemoryHelperGradOp : public framework::OperatorBase {
in_grad_var_name); in_grad_var_name);
if (out_grad_var == nullptr) { if (out_grad_var == nullptr) {
VLOG(50) << "Using fill constant 0 as starting gradient"; VLOG(5) << "Using fill constant 0 as starting gradient";
auto in_var_name = Input("X"); auto in_var_name = Input("X");
auto *in_var = scope.FindVar(in_var_name); auto *in_var = scope.FindVar(in_var_name);
auto &in_var_tensor = in_var->Get<framework::LoDTensor>(); auto &in_var_tensor = in_var->Get<framework::LoDTensor>();
......
...@@ -110,7 +110,7 @@ class SaveOp : public framework::OperatorBase { ...@@ -110,7 +110,7 @@ class SaveOp : public framework::OperatorBase {
lt_var != nullptr, lt_var != nullptr,
"Can not find variable kLookupTablePath for SaveSelectedRows"); "Can not find variable kLookupTablePath for SaveSelectedRows");
std::string filename = lt_var->data(); std::string filename = lt_var->data();
VLOG(40) << "SaveSelectedRows get File name: " << filename; VLOG(4) << "SaveSelectedRows get File name: " << filename;
MkDirRecursively(DirName(filename).c_str()); MkDirRecursively(DirName(filename).c_str());
......
...@@ -127,7 +127,7 @@ class SequenceMaskKernel : public framework::OpKernel<Tx> { ...@@ -127,7 +127,7 @@ class SequenceMaskKernel : public framework::OpKernel<Tx> {
auto x_numel = x->numel(); auto x_numel = x->numel();
if (maxlen < 0) { if (maxlen < 0) {
#ifdef __NVCC__ #ifdef __NVCC__
VLOG(100) VLOG(10)
<< "SequenceMaskOp on GPU may be slow when maxlen is not provided."; << "SequenceMaskOp on GPU may be slow when maxlen is not provided.";
maxlen = static_cast<int>( maxlen = static_cast<int>(
thrust::reduce(thrust::device_pointer_cast(x_data), thrust::reduce(thrust::device_pointer_cast(x_data),
......
...@@ -186,7 +186,7 @@ class SumMKLDNNOpKernel : public paddle::framework::OpKernel<T> { ...@@ -186,7 +186,7 @@ class SumMKLDNNOpKernel : public paddle::framework::OpKernel<T> {
} }
if (in_dim.empty()) { if (in_dim.empty()) {
VLOG(30) << "WARNING: all the inputs are empty"; VLOG(3) << "WARNING: all the inputs are empty";
in_dim = framework::vectorize(get_selected_row(N - 1).value().dims()); in_dim = framework::vectorize(get_selected_row(N - 1).value().dims());
} else { } else {
in_dim[0] = static_cast<int64_t>(first_dim); in_dim[0] = static_cast<int64_t>(first_dim);
......
...@@ -45,7 +45,7 @@ class SumOp : public framework::OperatorWithKernel { ...@@ -45,7 +45,7 @@ class SumOp : public framework::OperatorWithKernel {
size_t N = x_dims.size(); size_t N = x_dims.size();
PADDLE_ENFORCE_GT(N, 0, "Input tensors count should > 0."); PADDLE_ENFORCE_GT(N, 0, "Input tensors count should > 0.");
if (N == 1) { if (N == 1) {
VLOG(30) << "Warning: sum have only one input, may waste memory"; VLOG(3) << "Warning: sum have only one input, may waste memory";
} }
framework::DDim in_dim({0}); framework::DDim in_dim({0});
...@@ -157,7 +157,7 @@ class SumOpVarTypeInference : public framework::VarTypeInference { ...@@ -157,7 +157,7 @@ class SumOpVarTypeInference : public framework::VarTypeInference {
auto& inputs = op_desc.Input("X"); auto& inputs = op_desc.Input("X");
auto var_type = framework::proto::VarType::SELECTED_ROWS; auto var_type = framework::proto::VarType::SELECTED_ROWS;
for (auto& name : op_desc.Input("X")) { for (auto& name : op_desc.Input("X")) {
VLOG(100) << name << " " VLOG(10) << name << " "
<< block->FindRecursiveOrCreateVar(name).GetType(); << block->FindRecursiveOrCreateVar(name).GetType();
} }
......
...@@ -127,9 +127,9 @@ class TensorRTEngineKernel : public framework::OpKernel<T> { ...@@ -127,9 +127,9 @@ class TensorRTEngineKernel : public framework::OpKernel<T> {
// Convert output tensor from engine to fluid // Convert output tensor from engine to fluid
int output_index = 0; int output_index = 0;
VLOG(40) << "TensorRT Engine Op Outputs:"; VLOG(4) << "TensorRT Engine Op Outputs:";
for (const auto& y : context.Outputs("Ys")) { for (const auto& y : context.Outputs("Ys")) {
VLOG(40) << y; VLOG(4) << y;
// convert output and copy to fluid. // convert output and copy to fluid.
nvinfer1::ITensor* trt_t = engine->GetITensor(output_maps[output_index]); nvinfer1::ITensor* trt_t = engine->GetITensor(output_maps[output_index]);
auto dims = trt_t->getDimensions(); auto dims = trt_t->getDimensions();
...@@ -167,7 +167,7 @@ class TensorRTEngineKernel : public framework::OpKernel<T> { ...@@ -167,7 +167,7 @@ class TensorRTEngineKernel : public framework::OpKernel<T> {
protected: protected:
void Prepare(const framework::ExecutionContext& context) const { void Prepare(const framework::ExecutionContext& context) const {
VLOG(40) << "Prepare engine"; VLOG(4) << "Prepare engine";
// Get the ProgramDesc and pass to convert. // Get the ProgramDesc and pass to convert.
framework::proto::BlockDesc block_desc; framework::proto::BlockDesc block_desc;
block_desc.ParseFromString(context.Attr<std::string>("subgraph")); block_desc.ParseFromString(context.Attr<std::string>("subgraph"));
...@@ -192,12 +192,12 @@ class TensorRTEngineKernel : public framework::OpKernel<T> { ...@@ -192,12 +192,12 @@ class TensorRTEngineKernel : public framework::OpKernel<T> {
engine->InitNetwork(); engine->InitNetwork();
framework::BlockDesc block(nullptr /*programdesc*/, &block_desc); framework::BlockDesc block(nullptr /*programdesc*/, &block_desc);
VLOG(40) << "parsed var size " << block.AllVars().size(); VLOG(4) << "parsed var size " << block.AllVars().size();
// Add inputs // Add inputs
VLOG(40) << "declare inputs"; VLOG(4) << "declare inputs";
for (auto& input : context.Inputs("Xs")) { for (auto& input : context.Inputs("Xs")) {
if (parameters.count(input)) continue; if (parameters.count(input)) continue;
VLOG(40) << "declare input " << input; VLOG(4) << "declare input " << input;
auto* var = block.FindVar(input); auto* var = block.FindVar(input);
// TensorRT engine need to create parameters. The parameter's description // TensorRT engine need to create parameters. The parameter's description
// should be set in // should be set in
......
...@@ -203,7 +203,7 @@ class DeviceTracerImpl : public DeviceTracer { ...@@ -203,7 +203,7 @@ class DeviceTracerImpl : public DeviceTracer {
void AddCPURecords(const std::string &anno, uint64_t start_ns, void AddCPURecords(const std::string &anno, uint64_t start_ns,
uint64_t end_ns, int64_t device_id, int64_t thread_id) { uint64_t end_ns, int64_t device_id, int64_t thread_id) {
if (anno.empty()) { if (anno.empty()) {
VLOG(10) << "Empty timeline annotation."; VLOG(1) << "Empty timeline annotation.";
return; return;
} }
std::lock_guard<std::mutex> l(trace_mu_); std::lock_guard<std::mutex> l(trace_mu_);
...@@ -216,7 +216,7 @@ class DeviceTracerImpl : public DeviceTracer { ...@@ -216,7 +216,7 @@ class DeviceTracerImpl : public DeviceTracer {
uint32_t correlation_id, uint64_t bytes) { uint32_t correlation_id, uint64_t bytes) {
// 0 means timestamp information could not be collected for the kernel. // 0 means timestamp information could not be collected for the kernel.
if (start_ns == 0 || end_ns == 0) { if (start_ns == 0 || end_ns == 0) {
VLOG(30) << name << " cannot be traced"; VLOG(3) << name << " cannot be traced";
return; return;
} }
std::lock_guard<std::mutex> l(trace_mu_); std::lock_guard<std::mutex> l(trace_mu_);
...@@ -228,7 +228,7 @@ class DeviceTracerImpl : public DeviceTracer { ...@@ -228,7 +228,7 @@ class DeviceTracerImpl : public DeviceTracer {
int64_t stream_id, uint32_t correlation_id) { int64_t stream_id, uint32_t correlation_id) {
// 0 means timestamp information could not be collected for the kernel. // 0 means timestamp information could not be collected for the kernel.
if (start == 0 || end == 0) { if (start == 0 || end == 0) {
VLOG(30) << correlation_id << " cannot be traced"; VLOG(3) << correlation_id << " cannot be traced";
return; return;
} }
std::lock_guard<std::mutex> l(trace_mu_); std::lock_guard<std::mutex> l(trace_mu_);
...@@ -347,7 +347,7 @@ class DeviceTracerImpl : public DeviceTracer { ...@@ -347,7 +347,7 @@ class DeviceTracerImpl : public DeviceTracer {
tracer->AddAnnotation(cbInfo->correlationId, anno); tracer->AddAnnotation(cbInfo->correlationId, anno);
} }
} else { } else {
VLOG(10) << "Unhandled API Callback for " << domain << " " << cbid; VLOG(1) << "Unhandled API Callback for " << domain << " " << cbid;
} }
} }
CUpti_SubscriberHandle subscriber_; CUpti_SubscriberHandle subscriber_;
......
...@@ -72,7 +72,7 @@ static inline std::string join(const std::string& part1, ...@@ -72,7 +72,7 @@ static inline std::string join(const std::string& part1,
static inline void* GetDsoHandleFromDefaultPath(const std::string& dso_path, static inline void* GetDsoHandleFromDefaultPath(const std::string& dso_path,
int dynload_flags) { int dynload_flags) {
VLOG(30) << "Try to find library: " << dso_path VLOG(3) << "Try to find library: " << dso_path
<< " from default system path."; << " from default system path.";
// default search from LD_LIBRARY_PATH/DYLD_LIBRARY_PATH // default search from LD_LIBRARY_PATH/DYLD_LIBRARY_PATH
// and /usr/local/lib path // and /usr/local/lib path
......
...@@ -153,7 +153,7 @@ size_t GpuMaxChunkSize() { ...@@ -153,7 +153,7 @@ size_t GpuMaxChunkSize() {
size_t available = 0; size_t available = 0;
GpuMemoryUsage(&available, &total); GpuMemoryUsage(&available, &total);
VLOG(100) << "GPU Usage " << available / 1024 / 1024 << "M/" VLOG(10) << "GPU Usage " << available / 1024 / 1024 << "M/"
<< total / 1024 / 1024 << "M"; << total / 1024 / 1024 << "M";
size_t reserving = static_cast<size_t>(0.05 * total); size_t reserving = static_cast<size_t>(0.05 * total);
// If available less than minimum chunk size, no usable memory exists. // If available less than minimum chunk size, no usable memory exists.
......
...@@ -49,7 +49,7 @@ void InitGflags(std::vector<std::string> argv) { ...@@ -49,7 +49,7 @@ void InitGflags(std::vector<std::string> argv) {
line += ' '; line += ' ';
} }
google::ParseCommandLineFlags(&argc, &arr, true); google::ParseCommandLineFlags(&argc, &arr, true);
VLOG(10) << "Init commandline: " << line; VLOG(1) << "Init commandline: " << line;
}); });
} }
......
...@@ -113,7 +113,7 @@ struct NCCLContextMap { ...@@ -113,7 +113,7 @@ struct NCCLContextMap {
NCCLGroupGuard gurad; NCCLGroupGuard gurad;
for (auto &gpu_id : order_) { for (auto &gpu_id : order_) {
int rank = trainer_id * order_.size() + gpu_id; int rank = trainer_id * order_.size() + gpu_id;
VLOG(30) << "init nccl rank: " << rank << " nranks: " << nranks; VLOG(3) << "init nccl rank: " << rank << " nranks: " << nranks;
PADDLE_ENFORCE(cudaSetDevice(gpu_id)); PADDLE_ENFORCE(cudaSetDevice(gpu_id));
PADDLE_ENFORCE(platform::dynload::ncclCommInitRank( PADDLE_ENFORCE(platform::dynload::ncclCommInitRank(
comms.get() + gpu_id, nranks, *nccl_id, rank)); comms.get() + gpu_id, nranks, *nccl_id, rank));
......
...@@ -70,7 +70,7 @@ struct paddle_variant_caster<V<Ts...>> { ...@@ -70,7 +70,7 @@ struct paddle_variant_caster<V<Ts...>> {
if (std::is_same<T, std::vector<float>>::value) { if (std::is_same<T, std::vector<float>>::value) {
auto caster_ints = make_caster<std::vector<int64_t>>(); auto caster_ints = make_caster<std::vector<int64_t>>();
if (caster_ints.load(src, convert)) { if (caster_ints.load(src, convert)) {
VLOG(40) << "This value are floats and int64_ts satisfy " VLOG(4) << "This value are floats and int64_ts satisfy "
"simultaneously, will set it's type to " "simultaneously, will set it's type to "
"std::vector<int64_t>"; "std::vector<int64_t>";
value = cast_op<std::vector<int64_t>>(caster_ints); value = cast_op<std::vector<int64_t>>(caster_ints);
......
...@@ -40,7 +40,7 @@ void ReadBinaryFile(const std::string& filename, std::string* contents) { ...@@ -40,7 +40,7 @@ void ReadBinaryFile(const std::string& filename, std::string* contents) {
std::unique_ptr<paddle::framework::ProgramDesc> Load( std::unique_ptr<paddle::framework::ProgramDesc> Load(
paddle::framework::Executor* executor, const std::string& model_filename) { paddle::framework::Executor* executor, const std::string& model_filename) {
VLOG(30) << "loading model from " << model_filename; VLOG(3) << "loading model from " << model_filename;
std::string program_desc_str; std::string program_desc_str;
ReadBinaryFile(model_filename, &program_desc_str); ReadBinaryFile(model_filename, &program_desc_str);
......
...@@ -224,8 +224,8 @@ void ParameterClient2::prepareSendData( ...@@ -224,8 +224,8 @@ void ParameterClient2::prepareSendData(
request.set_cost(cost); request.set_cost(cost);
request.set_batch_status(batchStatus); request.set_batch_status(batchStatus);
CHECK_EQ(request.blocks_size(), 0); CHECK_EQ(request.blocks_size(), 0);
VLOG(10) << "request: trainer_id: " << request.trainer_id() VLOG(1) << "request: trainer_id: " << request.trainer_id() << " update_mode"
<< " update_mode" << request.update_mode() << request.update_mode()
<< " send_back_parameter: " << request.send_back_parameter() << " send_back_parameter: " << request.send_back_parameter()
<< " send_back_parameter_type: " << " send_back_parameter_type: "
<< request.send_back_parameter_type() << request.send_back_parameter_type()
......
...@@ -118,7 +118,7 @@ void generateSequenceStartPositions(size_t batchSize, ...@@ -118,7 +118,7 @@ void generateSequenceStartPositions(size_t batchSize,
} }
buf[i] = pos; buf[i] = pos;
pos += len; pos += len;
VLOG(10) << " len=" << len; VLOG(1) << " len=" << len;
} }
buf[numSeqs] = batchSize; buf[numSeqs] = batchSize;
} }
......
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册