提交 8ebde595 编写于 作者: Q Qiao Longfei

Merge branch 'develop' of https://github.com/PaddlePaddle/Paddle into refactor-prefetch

test=develop
......@@ -18,8 +18,8 @@ namespace framework {
void TransDataDevice(const Tensor &in, const platform::Place &dst_place,
Tensor *out) {
VLOG(30) << "DeviceTransform in, src_place " << in.place()
<< " dst_place: " << dst_place;
VLOG(3) << "DeviceTransform in, src_place " << in.place()
<< " dst_place: " << dst_place;
PADDLE_ENFORCE_NE(
in.place().which(), dst_place.which(),
......
......@@ -49,10 +49,10 @@ class TestOpWithKernel : public OperatorWithKernel {
OpKernelType GetExpectedKernelType(
const ExecutionContext& ctx) const override {
if (Attr<bool>("use_gpu")) {
VLOG(30) << "force use gpu kernel";
VLOG(3) << "force use gpu kernel";
return OpKernelType(proto::VarType::FP32, platform::CUDAPlace(0));
} else {
VLOG(30) << "use default kernel";
VLOG(3) << "use default kernel";
return OpKernelType(proto::VarType::FP32,
ctx.Input<Tensor>("input")->place());
}
......@@ -148,7 +148,7 @@ TEST(Operator, CPUtoGPU) {
// get output
auto* output2 = scope.Var("OUT2");
gpu_op->Run(scope, cuda_place);
VLOG(30) << "after gpu_op run";
VLOG(3) << "after gpu_op run";
// auto* output2_ptr = output2->Get<LoDTensor>().data<float>();
paddle::platform::DeviceContextPool& pool =
......
......@@ -60,7 +60,7 @@ void BroadcastOpHandle::BroadcastOneVar(
PADDLE_ENFORCE_NOT_NULL(in_var);
Tensor &in_tensor = VariableVisitor::GetMutableTensor(in_var);
if (UNLIKELY(!in_tensor.IsInitialized())) {
VLOG(30) << "in var " << in_var_handle.name_ << "not inited, return!";
VLOG(3) << "in var " << in_var_handle.name_ << "not inited, return!";
return;
}
......
......@@ -45,8 +45,8 @@ std::unique_ptr<ir::Graph> ModifyOpLockAndRecordEventPass::ApplyImpl(
IsLockAndRecordEventFreeComputationOpHandle(compute_op, graph_view);
compute_op->SetLockAndRecordEventFree(is_lock_and_record_event_free);
if (is_lock_and_record_event_free) {
VLOG(100) << "Set is_lock_and_record_event_free be true in op "
<< compute_op->DebugString();
VLOG(10) << "Set is_lock_and_record_event_free be true in op "
<< compute_op->DebugString();
}
}
return ir_graph;
......
......@@ -399,7 +399,7 @@ std::unique_ptr<ir::Graph> MultiDevSSAGraphBuilder::ApplyImpl(
for (size_t i = 0; i < backward_vars.size(); i += 2) {
auto &p_name = backward_vars[i];
auto &g_name = backward_vars[i + 1];
VLOG(100) << "Bcast " << g_name << " for parameter " << p_name;
VLOG(10) << "Bcast " << g_name << " for parameter " << p_name;
switch (strategy_.reduce_) {
case BuildStrategy::ReduceStrategy::kReduce:
......@@ -809,8 +809,8 @@ int MultiDevSSAGraphBuilder::CreateRPCOp(
node->Op()->GetAttr(OpProtoAndCheckerMaker::OpRoleVarAttrName()));
PADDLE_ENFORCE_EQ(send_param_grad.size(), 2U);
op_dev_id = GetAppropriateDeviceID({send_param_grad[1]});
VLOG(100) << "send grad " << input_var_names[0] << " origin "
<< send_param_grad[1] << " place: " << op_dev_id;
VLOG(10) << "send grad " << input_var_names[0] << " origin "
<< send_param_grad[1] << " place: " << op_dev_id;
for (auto &varname : input_var_names) {
sharded_var_device->emplace(varname, op_dev_id);
}
......@@ -826,9 +826,9 @@ int MultiDevSSAGraphBuilder::CreateRPCOp(
if (recv_param_grad.size() == 2U) {
op_dev_id =
GetVarDeviceID(*result, recv_param_grad[1], *sharded_var_device);
VLOG(100) << "recv param " << recv_param_grad[0]
<< " get grad place: " << recv_param_grad[1]
<< " place: " << op_dev_id;
VLOG(10) << "recv param " << recv_param_grad[0]
<< " get grad place: " << recv_param_grad[1]
<< " place: " << op_dev_id;
} else {
op_dev_id = GetAppropriateDeviceID(output_var_names);
}
......
......@@ -140,8 +140,8 @@ std::unique_ptr<ir::Graph> ReferenceCountPass::ApplyImpl(
if (next_compute_op != nullptr) {
if (compute_ref_cnt_map.count(next_compute_op)) {
compute_ref_cnt_map[next_compute_op]->AddVar(var_name);
VLOG(50) << "Add reference count of " << var_name << " to Operator "
<< next_compute_op->Name();
VLOG(5) << "Add reference count of " << var_name << " to Operator "
<< next_compute_op->Name();
} else {
// Create new reference_count_op_handle
ir::Node *ref_cnt_node = graph->CreateEmptyNode(
......
......@@ -51,7 +51,7 @@ void ScaleLossGradOpHandle::RunImpl() {
->stream();
memory::Copy(boost::get<platform::CUDAPlace>(place_), tmp,
platform::CPUPlace(), &coeff_, sizeof(float), stream);
VLOG(100) << place_ << "RUN Scale loss grad op";
VLOG(10) << place_ << "RUN Scale loss grad op";
});
#endif
}
......
......@@ -94,8 +94,8 @@ std::unique_ptr<ir::Graph> SequentialExecutionPass::ApplyImpl(
op_node_list[i - 1]->outputs.push_back(dep_var);
dep_var->outputs.push_back(op_node_list[i]);
dep_var->inputs.push_back(op_node_list[i - 1]);
VLOG(100) << "Add dependencies between " << op_node_list[i - 1]->Name()
<< " and " << op_node_list[i]->Name();
VLOG(10) << "Add dependencies between " << op_node_list[i - 1]->Name()
<< " and " << op_node_list[i]->Name();
}
return graph;
}
......
......@@ -210,16 +210,16 @@ void ThreadedSSAGraphExecutor::RunOp(
details::OpHandleBase *op) {
auto op_run = [ready_var_q, op, this] {
try {
if (VLOG_IS_ON(100)) {
VLOG(100) << op << " " << op->Name() << " : " << op->DebugString();
if (VLOG_IS_ON(10)) {
VLOG(10) << op << " " << op->Name() << " : " << op->DebugString();
}
if (LIKELY(!strategy_.dry_run_)) {
op->Run(strategy_.use_cuda_);
}
VLOG(100) << op << " " << op->Name() << " Done ";
VLOG(10) << op << " " << op->Name() << " Done ";
running_ops_--;
ready_var_q->Extend(op->Outputs());
VLOG(100) << op << " " << op->Name() << "Signal posted";
VLOG(10) << op << " " << op->Name() << "Signal posted";
} catch (...) {
exception_holder_.Catch(std::current_exception());
}
......
......@@ -46,7 +46,7 @@ ExecutorPrepareContext::ExecutorPrepareContext(
}
ExecutorPrepareContext::~ExecutorPrepareContext() {
VLOG(50) << "destroy ExecutorPrepareContext";
VLOG(5) << "destroy ExecutorPrepareContext";
}
template <typename RefCntMap>
......@@ -63,7 +63,7 @@ static void DeleteUnusedTensors(const Scope& scope, const OperatorBase* op,
if ((it->second)-- == 1) {
auto* var = scope.FindVar(name);
if (var != nullptr) {
VLOG(100) << "Erase tensor \'" << name << "\'";
VLOG(10) << "Erase tensor \'" << name << "\'";
if (var->IsType<LoDTensor>()) {
erase_tensors.insert(var->GetMutable<LoDTensor>());
} else if (var->IsType<SelectedRows>()) {
......@@ -162,21 +162,21 @@ void Executor::CreateVariables(const ProgramDesc& pdesc, Scope* scope,
if (var->Persistable()) {
auto* ptr = const_cast<Scope*>(ancestor_scope)->Var(var->Name());
InitializeVariable(ptr, var->GetType());
VLOG(30) << "Create Variable " << var->Name()
<< " global, which pointer is " << ptr;
VLOG(3) << "Create Variable " << var->Name()
<< " global, which pointer is " << ptr;
} else {
auto* ptr = scope->Var(var->Name());
InitializeVariable(ptr, var->GetType());
VLOG(30) << "Create Variable " << var->Name()
<< " locally, which pointer is " << ptr;
VLOG(3) << "Create Variable " << var->Name()
<< " locally, which pointer is " << ptr;
}
}
} else {
for (auto& var : global_block.AllVars()) {
auto* ptr = scope->Var(var->Name());
InitializeVariable(ptr, var->GetType());
VLOG(30) << "Create variable " << var->Name() << ", which pointer is "
<< ptr;
VLOG(3) << "Create variable " << var->Name() << ", which pointer is "
<< ptr;
}
}
}
......@@ -307,7 +307,7 @@ void Executor::Run(const ProgramDesc& program, Scope* scope,
int i = 0;
for (auto& feed_target : (*feed_targets)) {
std::string var_name = feed_target.first;
VLOG(30) << "feed target's name: " << var_name;
VLOG(3) << "feed target's name: " << var_name;
// prepend feed op
auto* op = global_block->PrependOp();
......@@ -330,7 +330,7 @@ void Executor::Run(const ProgramDesc& program, Scope* scope,
int i = 0;
for (auto& fetch_target : (*fetch_targets)) {
std::string var_name = fetch_target.first;
VLOG(30) << "fetch target's name: " << var_name;
VLOG(3) << "fetch target's name: " << var_name;
// append fetch op
auto* op = global_block->AppendOp();
......@@ -482,7 +482,7 @@ void Executor::RunPreparedContext(
void Executor::EnableMKLDNN(const ProgramDesc& program) {
#ifdef PADDLE_WITH_MKLDNN
VLOG(30) << "use_mkldnn=True";
VLOG(3) << "use_mkldnn=True";
for (size_t bid = 0; bid < program.Size(); ++bid) {
auto* block = const_cast<ProgramDesc&>(program).MutableBlock(bid);
for (auto* op : block->AllOps()) {
......
......@@ -25,7 +25,7 @@ void SetFeedVariable(Scope* scope, const LoDTensor& input,
const std::string& var_name, size_t index) {
// If var_name Variable is not found in GlobalScope, a new variable will
// be created.
VLOG(30) << "SetFeedVariable name=" << var_name << " index=" << index;
VLOG(3) << "SetFeedVariable name=" << var_name << " index=" << index;
Variable* g_feed_value = scope->Var(var_name);
auto& feed_inputs = *(g_feed_value->GetMutable<FeedFetchList>());
if (index >= feed_inputs.size()) {
......@@ -47,8 +47,8 @@ LoDTensor& GetFetchVariable(const Scope& scope, const std::string& var_name,
typeid(FeedFetchList).name());
auto& fetch_outputs = *g_fetch_value->GetMutable<FeedFetchList>();
auto& tensor = fetch_outputs[index];
VLOG(30) << "Fetch " << var_name << " with index " << index
<< " shape= " << tensor.dims();
VLOG(3) << "Fetch " << var_name << " with index " << index
<< " shape= " << tensor.dims();
PADDLE_ENFORCE_LT(index, fetch_outputs.size());
return tensor;
}
......
......@@ -147,19 +147,19 @@ void PrepareParameters(Graph* graph, const Param& param) {
scope->Var(param.LSTMX)->GetMutable<LoDTensor>();
scope->Var(param.LSTMOUT)->GetMutable<LoDTensor>();
#define GATE_W(name__) \
auto* W_##name__##_w0 = scope->FindVar(#name__ ".w_0"); \
auto* W_##name__##_w1 = scope->FindVar(#name__ ".w_1"); \
auto* W_##name__##_b0 = scope->FindVar(#name__ ".b_0"); \
CHECK_P3(W_##name__##_w0, W_##name__##_w1, W_##name__##_b0); \
VLOG(40) << #name__ "_w0" \
<< " shape: " << W_##name__##_w0->Get<LoDTensor>().dims(); \
VLOG(40) << #name__ "_w1" \
<< " shape: " << W_##name__##_w1->Get<LoDTensor>().dims(); \
VLOG(40) << #name__ "_b0" \
<< " shape: " << W_##name__##_b0->Get<LoDTensor>().dims(); \
auto& W_##name__##_w0_t = W_##name__##_w0->Get<LoDTensor>(); \
auto& W_##name__##_w1_t = W_##name__##_w1->Get<LoDTensor>(); \
#define GATE_W(name__) \
auto* W_##name__##_w0 = scope->FindVar(#name__ ".w_0"); \
auto* W_##name__##_w1 = scope->FindVar(#name__ ".w_1"); \
auto* W_##name__##_b0 = scope->FindVar(#name__ ".b_0"); \
CHECK_P3(W_##name__##_w0, W_##name__##_w1, W_##name__##_b0); \
VLOG(4) << #name__ "_w0" \
<< " shape: " << W_##name__##_w0->Get<LoDTensor>().dims(); \
VLOG(4) << #name__ "_w1" \
<< " shape: " << W_##name__##_w1->Get<LoDTensor>().dims(); \
VLOG(4) << #name__ "_b0" \
<< " shape: " << W_##name__##_b0->Get<LoDTensor>().dims(); \
auto& W_##name__##_w0_t = W_##name__##_w0->Get<LoDTensor>(); \
auto& W_##name__##_w1_t = W_##name__##_w1->Get<LoDTensor>(); \
auto& W_##name__##_b0_t = W_##name__##_b0->Get<LoDTensor>();
GATE_W(forget);
......@@ -208,7 +208,7 @@ void PrepareLSTMWeight(const LoDTensor& W_forget_w0,
int D = W_forget_w0.dims()[0];
int M = W_forget_w1.dims()[0];
out->Resize(make_ddim({D + M, 4 * D}));
VLOG(30) << "LSTMWeight resized to " << out->dims();
VLOG(3) << "LSTMWeight resized to " << out->dims();
float* out_data = out->mutable_data<float>(platform::CPUPlace());
std::array<const float*, 4> tensors{
......
......@@ -57,7 +57,7 @@ std::unique_ptr<ir::Graph> ConvBiasFusePass::ApplyImpl(
int found_conv_bias_count = 0;
auto handler = [&](const GraphPatternDetector::subgraph_t& subgraph,
Graph* g) {
VLOG(40) << "handle ConvBias fuse";
VLOG(4) << "handle ConvBias fuse";
GET_IR_NODE_FROM_SUBGRAPH(conv_weight, conv_weight,
conv_bias_pattern); // Filter
GET_IR_NODE_FROM_SUBGRAPH(conv_out, conv_out, conv_bias_pattern); // tmp
......@@ -74,7 +74,7 @@ std::unique_ptr<ir::Graph> ConvBiasFusePass::ApplyImpl(
// check if fuse can be done and if MKL-DNN should be used
FuseOptions fuse_option = FindFuseOption(*conv, *eltwise);
if (fuse_option == DO_NOT_FUSE || fuse_option == FUSE_NATIVE) {
VLOG(30) << "do not perform conv+bias fuse";
VLOG(3) << "do not perform conv+bias fuse";
return;
}
......
......@@ -121,7 +121,7 @@ std::unique_ptr<ir::Graph> ConvBNFusePass::ApplyImpl(
int found_conv_bn_count = 0;
auto handler = [&](const GraphPatternDetector::subgraph_t& subgraph,
Graph* g) {
VLOG(40) << "handle ConvBN fuse";
VLOG(4) << "handle ConvBN fuse";
// conv, batch_norm,
// conv_weight, conv_out,
......@@ -133,7 +133,7 @@ std::unique_ptr<ir::Graph> ConvBNFusePass::ApplyImpl(
// check if fuse can be done and if MKL-DNN should be used
FuseOptions fuse_option = FindFuseOption(*conv, *batch_norm);
if (fuse_option == DO_NOT_FUSE) {
VLOG(30) << "do not perform conv+bn fuse";
VLOG(3) << "do not perform conv+bn fuse";
return;
}
......@@ -241,7 +241,7 @@ std::unique_ptr<ir::Graph> ConvEltwiseAddBNFusePass::ApplyImpl(
int found_conv_bn_count = 0;
auto handler = [&](const GraphPatternDetector::subgraph_t& subgraph,
Graph* g) {
VLOG(40) << "handle ConvBN fuse";
VLOG(4) << "handle ConvBN fuse";
// conv, batch_norm,
// conv_weight, conv_out,
......
......@@ -38,7 +38,7 @@ std::unique_ptr<ir::Graph> ConvReLUFusePass::ApplyImpl(
int found_conv_relu_count = 0;
auto handler = [&](const GraphPatternDetector::subgraph_t& subgraph,
Graph* g) {
VLOG(40) << "handle ConvReLU fuse";
VLOG(4) << "handle ConvReLU fuse";
GET_IR_NODE_FROM_SUBGRAPH(conv_weight, conv_weight,
conv_relu_pattern); // Filter
GET_IR_NODE_FROM_SUBGRAPH(conv_out, conv_out, conv_relu_pattern); // tmp
......@@ -48,7 +48,7 @@ std::unique_ptr<ir::Graph> ConvReLUFusePass::ApplyImpl(
FuseOptions fuse_option = FindFuseOption(*conv, *relu);
if (fuse_option == DO_NOT_FUSE) {
VLOG(30) << "do not perform conv+relu fuse";
VLOG(3) << "do not perform conv+relu fuse";
return;
}
......
......@@ -39,7 +39,7 @@ std::unique_ptr<ir::Graph> DepthwiseConvMKLDNNPass::ApplyImpl(
int found_depthwise_conv_mkldnn_count = 0;
auto handler = [&](const GraphPatternDetector::subgraph_t& subgraph,
Graph* g) {
VLOG(30) << "handle DepthwiseConvMKLDNN fuse";
VLOG(3) << "handle DepthwiseConvMKLDNN fuse";
GET_NODE(depthwise_conv, (*pattern));
depthwise_conv->Op()->SetType("conv2d");
found_depthwise_conv_mkldnn_count++;
......
......@@ -39,7 +39,7 @@ std::unique_ptr<ir::Graph> FCFusePass::ApplyImpl(
int found_fc_count = 0;
auto handler = [&](const GraphPatternDetector::subgraph_t& subgraph,
Graph* g) {
VLOG(40) << "handle FC fuse";
VLOG(4) << "handle FC fuse";
GET_IR_NODE_FROM_SUBGRAPH(w, w, fc_pattern);
GET_IR_NODE_FROM_SUBGRAPH(fc_bias, bias, fc_pattern);
GET_IR_NODE_FROM_SUBGRAPH(fc_out, Out, fc_pattern);
......
......@@ -61,7 +61,7 @@ std::unique_ptr<ir::Graph> FuseElewiseAddActPass::FuseElewiseAddAct(
auto handler = [&](const GraphPatternDetector::subgraph_t &subgraph,
Graph *g) {
VLOG(40) << "handle FuseElewiseAddAct fuse";
VLOG(4) << "handle FuseElewiseAddAct fuse";
GET_IR_NODE_FROM_SUBGRAPH(ele_y, ele_y, elewise_add_act_pattern);
GET_IR_NODE_FROM_SUBGRAPH(ele_out, elewise_add_out,
elewise_add_act_pattern);
......@@ -77,10 +77,10 @@ std::unique_ptr<ir::Graph> FuseElewiseAddActPass::FuseElewiseAddAct(
Node *elewise_add_act_node = CreateFuseElewiseAddActNode(
g, act, ele_add, ele_x_n, ele_y_n, ele_out_n, act_out_n);
VLOG(40) << "\n\t " << ele_x_n << " and " << ele_y_n << " -> "
<< ele_add->Name() << " -> " << ele_out_n << "\n"
<< "\t " << ele_out_n << " -> " << act->Name() << " -> "
<< act_out_n;
VLOG(4) << "\n\t " << ele_x_n << " and " << ele_y_n << " -> "
<< ele_add->Name() << " -> " << ele_out_n << "\n"
<< "\t " << ele_out_n << " -> " << act->Name() << " -> "
<< act_out_n;
ReLinkNodes(g, ele_out, ele_add, act, elewise_add_act_node);
found_elewise_add_act_count++;
......@@ -113,7 +113,7 @@ std::unique_ptr<ir::Graph> FuseElewiseAddActPass::FuseActElewiseAdd(
auto handler = [&](const GraphPatternDetector::subgraph_t &subgraph,
Graph *g) {
VLOG(40) << "handle FuseElewiseAddAct fuse";
VLOG(4) << "handle FuseElewiseAddAct fuse";
GET_IR_NODE_FROM_SUBGRAPH(act_out, act_out, act_elewise_add_pattern);
GET_IR_NODE_FROM_SUBGRAPH(ele_x, ele_x, act_elewise_add_pattern);
GET_IR_NODE_FROM_SUBGRAPH(ele_out, elewise_add_out,
......@@ -129,9 +129,9 @@ std::unique_ptr<ir::Graph> FuseElewiseAddActPass::FuseActElewiseAdd(
Node *elewise_add_act_node = CreateFuseElewiseAddActNode(
g, ele_add, act, elewise_add_x_n, act_i_n, act_o_n, elewise_add_out_n);
VLOG(40) << "\n\t " << act_i_n << " -> " << act->Name() << " -> " << act_o_n
<< "\n\t " << act_o_n << " and " << elewise_add_x_n << " -> "
<< ele_add->Name() << " -> " << elewise_add_out_n;
VLOG(4) << "\n\t " << act_i_n << " -> " << act->Name() << " -> " << act_o_n
<< "\n\t " << act_o_n << " and " << elewise_add_x_n << " -> "
<< ele_add->Name() << " -> " << elewise_add_out_n;
ReLinkNodes(g, act_out, act, ele_add, elewise_add_act_node);
found_elewise_add_act_count++;
......@@ -165,7 +165,7 @@ std::unique_ptr<ir::Graph> FuseElewiseAddActPass::FuseElewiseAddActInplaceGrad(
auto handler = [&](const GraphPatternDetector::subgraph_t &subgraph,
Graph *g) {
VLOG(40) << "handle FuseElewiseAddActGrad1 fuse";
VLOG(4) << "handle FuseElewiseAddActGrad1 fuse";
GET_IR_NODE_FROM_SUBGRAPH(act_out, act_out, elewise_add_act_grad_pattern);
GET_IR_NODE_FROM_SUBGRAPH(act_grad, act_grad, elewise_add_act_grad_pattern);
GET_IR_NODE_FROM_SUBGRAPH(d_itermediate_out, d_itermediate_out,
......@@ -208,10 +208,10 @@ std::unique_ptr<ir::Graph> FuseElewiseAddActPass::FuseElewiseAddActInplaceGrad(
auto fused_node = g->CreateOpNode(&desc);
VLOG(40) << "\n\t " << d_act_out_n << " and " << act_out_n << " -> "
<< act_grad->Name() << " -> " << d_itermediate_out_n << "\n\t "
<< d_itermediate_out_n << " and " << act_out_n << " -> "
<< ele_add_grad->Name() << " -> " << d_itermediate_out_n;
VLOG(4) << "\n\t " << d_act_out_n << " and " << act_out_n << " -> "
<< act_grad->Name() << " -> " << d_itermediate_out_n << "\n\t "
<< d_itermediate_out_n << " and " << act_out_n << " -> "
<< ele_add_grad->Name() << " -> " << d_itermediate_out_n;
ReLinkNodes(g, d_itermediate_out, act_grad, ele_add_grad, fused_node);
found_elewise_add_act_count++;
......
......@@ -90,7 +90,7 @@ Graph::Graph(const ProgramDesc &program) : program_(program) {
std::map<std::string, std::vector<ir::Node *>> Graph::InitFromProgram(
const ProgramDesc &program) {
VLOG(30) << "block in program:" << program_.Size();
VLOG(3) << "block in program:" << program_.Size();
std::unordered_map<std::string, VarDesc *> all_vars;
// var nodes for each var name, will have multiple versions in SSA
std::map<std::string, std::vector<ir::Node *>> var_nodes;
......@@ -158,7 +158,7 @@ void Graph::ResolveHazard(
auto it_old = versions.rbegin();
++it_old;
for (; it_old != versions.rend(); it_new = it_old, ++it_old) {
VLOG(30) << "deal with var: " << (*it_new)->Name();
VLOG(3) << "deal with var: " << (*it_new)->Name();
ir::Node *write_op =
(*it_new)->inputs.empty() ? nullptr : (*it_new)->inputs[0];
const auto &read_ops = (*it_old)->outputs;
......
......@@ -89,7 +89,7 @@ class Graph {
attr_name);
attrs_[attr_name] = attr;
attr_dels_[attr_name] = [attr, attr_name]() {
VLOG(30) << "deleting " << attr_name;
VLOG(3) << "deleting " << attr_name;
delete attr;
};
}
......
......@@ -40,9 +40,8 @@ void SortHelper(
}
}
VLOG(30) << "topology sort insert: " << node->Name()
<< reinterpret_cast<void *>(node) << " input "
<< node->inputs.size();
VLOG(3) << "topology sort insert: " << node->Name()
<< reinterpret_cast<void *>(node) << " input " << node->inputs.size();
ret->push_back(node);
}
......@@ -111,9 +110,9 @@ std::map<ir::Node *, std::unordered_set<ir::Node *>> BuildOperationAdjList(
for (auto &var : n->inputs) {
for (auto &adj_n : var->inputs) {
PADDLE_ENFORCE(adj_n->NodeType() == ir::Node::Type::kOperation);
VLOG(40) << "adj " << adj_n->Name() << reinterpret_cast<void *>(adj_n)
<< " -> " << n->Name() << reinterpret_cast<void *>(n)
<< " via " << var->Name() << reinterpret_cast<void *>(var);
VLOG(4) << "adj " << adj_n->Name() << reinterpret_cast<void *>(adj_n)
<< " -> " << n->Name() << reinterpret_cast<void *>(n)
<< " via " << var->Name() << reinterpret_cast<void *>(var);
adj_list[n].insert(adj_n);
}
}
......
......@@ -92,19 +92,19 @@ void GraphPatternDetector::operator()(Graph *graph,
PrettyLogEndl(Style::detail(), "--- detect %d subgraphs", subgraphs.size());
int id = 0;
for (auto &g : subgraphs) {
VLOG(30) << "optimizing #" << id++ << " subgraph";
VLOG(3) << "optimizing #" << id++ << " subgraph";
handler(g, graph);
}
}
bool GraphPatternDetector::MarkPDNodesInGraph(const ir::Graph &graph) {
VLOG(30) << "mark pdnodes in graph";
VLOG(3) << "mark pdnodes in graph";
if (graph.Nodes().empty()) return false;
for (auto &node : GraphTraits::DFS(graph)) {
for (const auto &pdnode : pattern_.nodes()) {
if (pdnode->Tell(&node)) {
VLOG(40) << "pdnode " << pdnode->name() << " marked";
VLOG(4) << "pdnode " << pdnode->name() << " marked";
pdnodes2nodes_[pdnode.get()].insert(&node);
}
}
......@@ -112,7 +112,7 @@ bool GraphPatternDetector::MarkPDNodesInGraph(const ir::Graph &graph) {
// Check to early stop if some PDNode can't find matched Node.
for (auto &pdnode : pattern_.nodes()) {
if (!pdnodes2nodes_.count(pdnode.get())) {
VLOG(40) << pdnode->name() << " can't find matched Node, early stop";
VLOG(4) << pdnode->name() << " can't find matched Node, early stop";
// return false;
}
}
......@@ -121,7 +121,7 @@ bool GraphPatternDetector::MarkPDNodesInGraph(const ir::Graph &graph) {
GetMarkedNodes(const_cast<Graph *>(&graph)).insert(n);
}
}
VLOG(30) << pdnodes2nodes_.size() << " nodes marked";
VLOG(3) << pdnodes2nodes_.size() << " nodes marked";
return !pdnodes2nodes_.empty();
}
......@@ -215,7 +215,7 @@ GraphPatternDetector::DetectPatterns() {
// Extend a PDNode to subgraphs by deducing the connection relations defined
// in edges of PDNodes.
for (const auto &edge : pattern_.edges()) {
VLOG(40) << "check " << edge.first->name() << " -> " << edge.second->name();
VLOG(4) << "check " << edge.first->name() << " -> " << edge.second->name();
// TODO(Superjomn) Fix bug here, the groups might be duplicate here.
// Each role has two PDNodes, which indicates two roles.
// Detect two Nodes that can match these two roles and they are connected.
......@@ -226,7 +226,7 @@ GraphPatternDetector::DetectPatterns() {
// source -> target
for (Node *source : pdnodes2nodes_[edge.first]) {
for (Node *target : pdnodes2nodes_[edge.second]) {
VLOG(80) << "check " << source->id() << " -- " << target->id();
VLOG(8) << "check " << source->id() << " -- " << target->id();
// TODO(Superjomn) add some prune strategies.
for (const auto &group : pre_groups) {
if (IsNodesLink(source, target)) {
......@@ -243,13 +243,12 @@ GraphPatternDetector::DetectPatterns() {
}
}
}
VLOG(30) << "step " << step << " get records: " << cur_groups.size();
VLOG(3) << "step " << step << " get records: " << cur_groups.size();
for (auto &group : cur_groups) {
for (auto &item : group.roles) {
VLOG(40) << "node " << item.second->id() << " as "
<< item.first->name();
VLOG(4) << "node " << item.second->id() << " as " << item.first->name();
}
VLOG(40) << "=========================================================";
VLOG(4) << "=========================================================";
}
}
......
......@@ -41,7 +41,7 @@ std::string FormatName(const Node* node) {
std::unique_ptr<ir::Graph> GraphVizPass::ApplyImpl(
std::unique_ptr<ir::Graph> graph) const {
const std::string graph_viz_path = Get<std::string>(kGraphVizPath);
VLOG(30) << "draw IR graph viz to " << graph_viz_path;
VLOG(3) << "draw IR graph viz to " << graph_viz_path;
std::unique_ptr<std::ostream> fout(new std::ofstream(graph_viz_path));
PADDLE_ENFORCE(fout->good());
std::ostream& sout = *fout;
......
......@@ -20,7 +20,7 @@ namespace ir {
std::unique_ptr<ir::Graph> MKLDNNPlacementPass::ApplyImpl(
std::unique_ptr<ir::Graph> graph) const {
VLOG(30) << "Aplies MKL-DNN placement strategy.";
VLOG(3) << "Aplies MKL-DNN placement strategy.";
for (const Node* n : graph->Nodes()) {
if (n->IsOp() && n->Op()->HasAttr("use_mkldnn")) {
n->Op()->SetAttr("use_mkldnn", true);
......
......@@ -62,7 +62,7 @@ VarDesc UpdateGradVarDesc(
string::Sprintf("%s.repeat.%d", var_desc->Name(), repeat);
VarDesc repeated_var = CopyVarDesc(var_desc);
repeated_var.SetName(new_gname);
VLOG(30) << "update " << var_desc->Name() << " to repeat " << repeat;
VLOG(3) << "update " << var_desc->Name() << " to repeat " << repeat;
return repeated_var;
}
return *var_desc;
......@@ -78,7 +78,7 @@ std::unique_ptr<Graph> BatchMergePass::ApplyImpl(
std::vector<ir::Node*> nodes = TopologySortOperations(*graph);
auto origin_nodes = graph->ReleaseNodes();
VLOG(30) << "origin nodes count: " << origin_nodes.size();
VLOG(3) << "origin nodes count: " << origin_nodes.size();
ir::Graph& result = *graph;
// 1. record op nodes of different roles
......@@ -137,8 +137,8 @@ std::unique_ptr<Graph> BatchMergePass::ApplyImpl(
"%s.repeat.%d", repeated_op.Input("Variance")[0], i);
bn_vars_need_rename.insert(repeated_op.Input("Mean")[0]);
bn_vars_need_rename.insert(repeated_op.Input("Variance")[0]);
VLOG(30) << "renaming " << repeated_op.Input("Mean")[0] << " to "
<< new_mean_name;
VLOG(3) << "renaming " << repeated_op.Input("Mean")[0] << " to "
<< new_mean_name;
repeated_op.RenameInput(repeated_op.Input("Mean")[0], new_mean_name);
repeated_op.RenameInput(repeated_op.Input("Variance")[0], new_var_name);
repeated_op.RenameOutput(repeated_op.Output("MeanOut")[0],
......
......@@ -76,7 +76,7 @@ class Pass {
attr_name);
attrs_[attr_name] = attr;
attr_dels_[attr_name] = [attr, attr_name]() {
VLOG(30) << "deleting " << attr_name;
VLOG(3) << "deleting " << attr_name;
delete attr;
};
}
......
......@@ -196,7 +196,7 @@ std::unique_ptr<ir::Graph> SeqConcatFcFusePass::ApplyImpl(
detector(graph.get(), [&](const GraphPatternDetector::subgraph_t& subgraph,
Graph* graph) {
VLOG(40) << "get one concat pattern";
VLOG(4) << "get one concat pattern";
// fc
GET_NODE(fc_w, detector.pattern());
GET_NODE(fc_bias, detector.pattern());
......
......@@ -60,7 +60,7 @@ int BuildFusion(Graph* graph, const std::string& name_scope, Scope* scope) {
auto handler = [&](const GraphPatternDetector::subgraph_t& subgraph,
Graph* g) {
VLOG(40) << "handle SeqConv EltAdd Relu fuse";
VLOG(4) << "handle SeqConv EltAdd Relu fuse";
GET_IR_NODE_FROM_SUBGRAPH(seqconv, seqconv, fuse_pattern);
GET_IR_NODE_FROM_SUBGRAPH(seqconv_weight, seqconv_weight, fuse_pattern);
GET_IR_NODE_FROM_SUBGRAPH(seqconv_out, seqconv_out, fuse_pattern);
......
......@@ -31,7 +31,7 @@ void LoDRankTable::Reset(const LoD& lod, size_t level) {
TableItem item;
item.index = i;
item.length = vec[i + 1] - vec[i];
VLOG(100) << "Add item to rank table " << item.index << " " << item.length;
VLOG(10) << "Add item to rank table " << item.index << " " << item.length;
items_.emplace_back(item);
}
// NOTE(yuyang18):
......
......@@ -51,7 +51,7 @@ TEST(mixed_vector, InitWithCount) {
TEST(mixed_vector, ForEach) {
vec<int> tmp;
for (auto& v : tmp) {
VLOG(30) << v;
VLOG(3) << v;
}
}
......
......@@ -82,7 +82,7 @@ class CompileTimeInferShapeContext : public InferShapeContext {
auto *in_var = block_.FindVarRecursive(Inputs(in)[i]);
auto *out_var = block_.FindVarRecursive(Outputs(out)[j]);
if (in_var->GetType() != proto::VarType::LOD_TENSOR) {
VLOG(30) << "input " << in << " is not LodTensor";
VLOG(3) << "input " << in << " is not LodTensor";
return;
}
out_var->SetLoDLevel(in_var->GetLoDLevel());
......@@ -241,38 +241,38 @@ void OpDesc::SetAttr(const std::string &name, const Attribute &v) {
const proto::OpProto::Attr &attr = GetProtoAttr(name);
switch (attr.type()) {
case proto::AttrType::BOOLEANS: {
VLOG(110) << "SetAttr: " << Type() << ", " << name
<< " from INTS to BOOLEANS";
VLOG(11) << "SetAttr: " << Type() << ", " << name
<< " from INTS to BOOLEANS";
this->attrs_[name] = std::vector<bool>();
break;
}
case proto::AttrType::INTS: {
VLOG(110) << "SetAttr: " << Type() << ", " << name
<< " from INTS to INTS";
VLOG(11) << "SetAttr: " << Type() << ", " << name
<< " from INTS to INTS";
this->attrs_[name] = std::vector<int>();
break;
}
case proto::AttrType::LONGS: {
VLOG(110) << "SetAttr: " << Type() << ", " << name
<< " from LONGS to LONGS";
VLOG(11) << "SetAttr: " << Type() << ", " << name
<< " from LONGS to LONGS";
this->attrs_[name] = std::vector<int64_t>();
break;
}
case proto::AttrType::FLOATS: {
VLOG(110) << "SetAttr: " << Type() << ", " << name
<< " from INTS to FLOATS";
VLOG(11) << "SetAttr: " << Type() << ", " << name
<< " from INTS to FLOATS";
this->attrs_[name] = std::vector<float>();
break;
}
case proto::AttrType::STRINGS: {
VLOG(110) << "SetAttr: " << Type() << ", " << name
<< " from INTS to STRINGS";
VLOG(11) << "SetAttr: " << Type() << ", " << name
<< " from INTS to STRINGS";
this->attrs_[name] = std::vector<std::string>();
break;
}
case proto::AttrType::BLOCKS: {
VLOG(110) << "SetAttr: " << Type() << ", " << name
<< " from INTS to BLOCKS";
VLOG(11) << "SetAttr: " << Type() << ", " << name
<< " from INTS to BLOCKS";
this->SetBlocksAttr(name, std::vector<BlockDesc *>());
return;
}
......@@ -505,13 +505,13 @@ void OpDesc::CheckAttrs() {
}
void OpDesc::InferShape(const BlockDesc &block) const {
VLOG(30) << "CompileTime infer shape on " << Type();
VLOG(3) << "CompileTime infer shape on " << Type();
InitInferShapeFuncs();
auto &infer_shape = OpInfoMap::Instance().Get(this->Type()).infer_shape_;
PADDLE_ENFORCE(static_cast<bool>(infer_shape),
"%s's infer_shape has not been registered", this->Type());
CompileTimeInferShapeContext ctx(*this, block);
if (VLOG_IS_ON(100)) {
if (VLOG_IS_ON(10)) {
std::ostringstream sout;
auto inames = this->InputArgumentNames();
sout << " From [";
......@@ -522,7 +522,7 @@ void OpDesc::InferShape(const BlockDesc &block) const {
std::copy(onames.begin(), onames.end(),
std::ostream_iterator<std::string>(sout, ", "));
sout << "]";
VLOG(100) << sout.str();
VLOG(10) << sout.str();
}
infer_shape(&ctx);
}
......@@ -613,7 +613,7 @@ DDim CompileTimeInferShapeContext::GetDim(const std::string &name) const {
auto shape = var->GetShape();
res = shape.empty() ? make_ddim({0UL}) : make_ddim(shape);
} catch (...) {
VLOG(50) << "GetDim of variable " << name << " error";
VLOG(5) << "GetDim of variable " << name << " error";
std::rethrow_exception(std::current_exception());
}
return res;
......@@ -630,7 +630,7 @@ std::vector<DDim> CompileTimeInferShapeContext::GetRepeatedDims(
res.push_back(s.empty() ? make_ddim({0UL}) : make_ddim(s));
}
} catch (...) {
VLOG(50) << "GetRepeatedDim of variable " << name << " error.";
VLOG(5) << "GetRepeatedDim of variable " << name << " error.";
std::rethrow_exception(std::current_exception());
}
return res;
......
......@@ -46,9 +46,9 @@ static VariableNameMap ConvertOpDescVarsToVarNameMap(
std::unique_ptr<OperatorBase> OpRegistry::CreateOp(
const proto::OpDesc& op_desc) {
VLOG(10) << "CreateOp directly from OpDesc is deprecated. It should only be"
"used in unit tests. Use CreateOp(const OpDesc& op_desc) "
"instead.";
VLOG(1) << "CreateOp directly from OpDesc is deprecated. It should only be"
"used in unit tests. Use CreateOp(const OpDesc& op_desc) "
"instead.";
VariableNameMap inputs = ConvertOpDescVarsToVarNameMap(op_desc.inputs());
VariableNameMap outputs = ConvertOpDescVarsToVarNameMap(op_desc.outputs());
AttributeMap attrs;
......
......@@ -139,7 +139,7 @@ static LoD GetLoD(const Scope& scope, const std::string& name) {
}
void OperatorBase::Run(const Scope& scope, const platform::Place& place) {
VLOG(40) << place << " " << DebugStringEx(&scope);
VLOG(4) << place << " " << DebugStringEx(&scope);
if (platform::is_gpu_place(place)) {
#ifndef PADDLE_WITH_CUDA
PADDLE_THROW("Cannot run operator on place %s", place);
......@@ -159,7 +159,7 @@ void OperatorBase::Run(const Scope& scope, const platform::Place& place) {
} else {
RunImpl(scope, place);
}
VLOG(30) << place << " " << DebugStringEx(&scope);
VLOG(3) << place << " " << DebugStringEx(&scope);
}
bool OperatorBase::HasInputs(const std::string& name) const {
......@@ -716,14 +716,14 @@ void OperatorWithKernel::RunImpl(const Scope& scope,
auto expected_kernel_key =
this->GetExpectedKernelType(ExecutionContext(*this, scope, *dev_ctx));
VLOG(30) << "expected_kernel_key:" << expected_kernel_key;
VLOG(3) << "expected_kernel_key:" << expected_kernel_key;
auto kernel_iter = kernels.find(expected_kernel_key);
#ifdef PADDLE_WITH_MKLDNN
// workaround for missing MKLDNN kernel when FLAGS_use_mkldnn env var is set
if (kernel_iter == kernels.end() &&
expected_kernel_key.library_type_ == LibraryType::kMKLDNN) {
VLOG(30) << "missing MKLDNN kernel: fallbacking to PLAIN one";
VLOG(3) << "missing MKLDNN kernel: fallbacking to PLAIN one";
expected_kernel_key.library_type_ = LibraryType::kPlain;
expected_kernel_key.data_layout_ = DataLayout::kAnyLayout;
kernel_iter = kernels.find(expected_kernel_key);
......@@ -775,8 +775,7 @@ void OperatorWithKernel::TransferInplaceVarsBack(
const Scope& scope, const std::vector<std::string>& inplace_vars,
const Scope& transfer_scope) const {
for (auto& var_name : inplace_vars) {
VLOG(30) << "share inplace var " + var_name +
" back to it's original scope";
VLOG(3) << "share inplace var " + var_name + " back to it's original scope";
auto* original_tensor =
GetMutableLoDTensorOrSelectedRowsValueFromVar(scope.FindVar(var_name));
auto* var = transfer_scope.FindVar(var_name);
......@@ -817,8 +816,8 @@ Scope* OperatorWithKernel::TryTransferData(
transfered_inplace_vars->emplace_back(var_name);
}
VLOG(30) << "Transform Variable " << var_name << " from "
<< kernel_type_for_var << " to " << expected_kernel_key;
VLOG(3) << "Transform Variable " << var_name << " from "
<< kernel_type_for_var << " to " << expected_kernel_key;
// In the inference scenerio, the scopes will be reused across the
// batches, so the `new_scope` here will result in GPU memroy explosion
......
......@@ -208,7 +208,7 @@ void ParallelExecutor::BCastParamsToDevices(
auto &main_tensor = main_var->Get<LoDTensor>();
if (!main_tensor.IsInitialized()) {
VLOG(30) << "one in var not inited, return!";
VLOG(3) << "one in var not inited, return!";
continue;
}
auto &dims = main_tensor.dims();
......
......@@ -162,7 +162,7 @@ Variable* Scope::VarInternal(const std::string& name) {
v = new Variable();
vars_[name].reset(v);
VLOG(30) << "Create variable " << name;
VLOG(3) << "Create variable " << name;
v->name_ = &(vars_.find(name)->first);
return v;
}
......
......@@ -206,7 +206,7 @@ void SelectedRows::Get(const framework::Tensor& ids, framework::Tensor* value,
PADDLE_ENFORCE(value->IsInitialized(),
"The value tensor should be initialized.");
if (ids.numel() == 0) {
VLOG(30) << "keys is empty, please check data!";
VLOG(3) << "keys is empty, please check data!";
} else {
int64_t value_width = value_->numel() / value_->dims()[0];
PADDLE_ENFORCE_EQ(value_width, value->numel() / value->dims()[0],
......
......@@ -22,8 +22,8 @@ namespace framework {
void TensorCopy(const Tensor& src, const platform::Place& dst_place,
const platform::DeviceContext& ctx, Tensor* dst) {
VLOG(30) << "TensorCopy " << src.dims() << " from " << src.place() << " to "
<< dst_place;
VLOG(3) << "TensorCopy " << src.dims() << " from " << src.place() << " to "
<< dst_place;
src.check_memory_size();
dst->Resize(src.dims());
......@@ -37,8 +37,8 @@ void TensorCopy(const Tensor& src, const platform::Place& dst_place,
if (platform::is_cpu_place(src_place) && platform::is_cpu_place(dst_place)) {
if (src_ptr == dst_ptr) {
VLOG(30) << "Skip copy the same data async from " << src_place << " to "
<< dst_place;
VLOG(3) << "Skip copy the same data async from " << src_place << " to "
<< dst_place;
return;
}
memory::Copy(boost::get<platform::CPUPlace>(dst_place), dst_ptr,
......@@ -77,8 +77,8 @@ void TensorCopy(const Tensor& src, const platform::Place& dst_place,
reinterpret_cast<const platform::CUDADeviceContext&>(ctx).stream();
if (platform::is_same_place(src_place, dst_place)) {
if (src_ptr == dst_ptr) {
VLOG(30) << "Skip copy the same data async from " << src_place << " to "
<< dst_place;
VLOG(3) << "Skip copy the same data async from " << src_place << " to "
<< dst_place;
return;
}
memory::Copy(dst_gpu_place, dst_ptr, src_gpu_place, src_ptr, size,
......@@ -114,8 +114,8 @@ void TensorCopy(const Tensor& src, const platform::Place& dst_place,
void TensorCopySync(const Tensor& src, const platform::Place& dst_place,
Tensor* dst) {
VLOG(30) << "TensorCopySync " << src.dims() << " from " << src.place()
<< " to " << dst_place;
VLOG(3) << "TensorCopySync " << src.dims() << " from " << src.place()
<< " to " << dst_place;
src.check_memory_size();
dst->Resize(src.dims());
dst->set_layout(src.layout());
......@@ -125,8 +125,8 @@ void TensorCopySync(const Tensor& src, const platform::Place& dst_place,
auto size = src.numel() * SizeOfType(src.type());
if (platform::is_cpu_place(src_place) && platform::is_cpu_place(dst_place)) {
if (src_ptr == dst_ptr) {
VLOG(30) << "Skip copy the same data from " << src_place << " to "
<< dst_place;
VLOG(3) << "Skip copy the same data from " << src_place << " to "
<< dst_place;
return;
}
memory::Copy(boost::get<platform::CPUPlace>(dst_place), dst_ptr,
......@@ -146,8 +146,8 @@ void TensorCopySync(const Tensor& src, const platform::Place& dst_place,
} else if (platform::is_gpu_place(src_place) &&
platform::is_gpu_place(dst_place)) {
if (src_ptr == dst_ptr && platform::is_same_place(src_place, dst_place)) {
VLOG(30) << "Skip copy the same data from " << src_place << " to "
<< dst_place;
VLOG(3) << "Skip copy the same data from " << src_place << " to "
<< dst_place;
return;
}
auto src_gpu_place = boost::get<platform::CUDAPlace>(src_place);
......
......@@ -39,7 +39,7 @@ void ThreadPool::Init() {
int num_threads = std::thread::hardware_concurrency();
if (FLAGS_dist_threadpool_size > 0) {
num_threads = FLAGS_dist_threadpool_size;
VLOG(10) << "set dist_threadpool_size to " << num_threads;
VLOG(1) << "set dist_threadpool_size to " << num_threads;
}
PADDLE_ENFORCE_GT(num_threads, 0);
threadpool_.reset(new ThreadPool(num_threads));
......
......@@ -61,10 +61,10 @@ size_t VarDesc::GetTensorDescNum() const {
void VarDesc::SetShapes(
const std::vector<std::vector<int64_t>> &multiple_dims) {
if (multiple_dims.size() != GetTensorDescNum()) {
VLOG(30) << "WARNING: The number of given shapes(" << multiple_dims.size()
<< ") doesn't match the existing tensor number("
<< GetTensorDescNum()
<< "). The Reader is going to be reinitialized.";
VLOG(3) << "WARNING: The number of given shapes(" << multiple_dims.size()
<< ") doesn't match the existing tensor number("
<< GetTensorDescNum()
<< "). The Reader is going to be reinitialized.";
SetTensorDescNum(multiple_dims.size());
}
std::vector<proto::VarType::TensorDesc *> tensors = mutable_tensor_descs();
......@@ -94,11 +94,11 @@ void VarDesc::SetDataType(proto::VarType::Type data_type) {
void VarDesc::SetDataTypes(
const std::vector<proto::VarType::Type> &multiple_data_type) {
if (multiple_data_type.size() != GetTensorDescNum()) {
VLOG(30) << "WARNING: The number of given data types("
<< multiple_data_type.size()
<< ") doesn't match the existing tensor number("
<< GetTensorDescNum()
<< "). The Reader is going to be reinitialized.";
VLOG(3) << "WARNING: The number of given data types("
<< multiple_data_type.size()
<< ") doesn't match the existing tensor number("
<< GetTensorDescNum()
<< "). The Reader is going to be reinitialized.";
SetTensorDescNum(multiple_data_type.size());
}
std::vector<proto::VarType::TensorDesc *> tensor_descs =
......@@ -139,11 +139,11 @@ void VarDesc::SetLoDLevel(int32_t lod_level) {
void VarDesc::SetLoDLevels(const std::vector<int32_t> &multiple_lod_level) {
if (multiple_lod_level.size() != GetTensorDescNum()) {
VLOG(30) << "WARNING: The number of given lod_levels("
<< multiple_lod_level.size()
<< ") doesn't match the existing tensor number("
<< GetTensorDescNum()
<< "). The Reader is going to be reinitialized.";
VLOG(3) << "WARNING: The number of given lod_levels("
<< multiple_lod_level.size()
<< ") doesn't match the existing tensor number("
<< GetTensorDescNum()
<< "). The Reader is going to be reinitialized.";
SetTensorDescNum(multiple_lod_level.size());
}
switch (desc_.type().type()) {
......
......@@ -55,7 +55,7 @@ bool IsPersistable(const framework::VarDesc *var) {
bool AnalysisPredictor::Init(
const std::shared_ptr<framework::Scope> &parent_scope,
const std::shared_ptr<framework::ProgramDesc> &program) {
VLOG(30) << "Predictor::init()";
VLOG(3) << "Predictor::init()";
if (FLAGS_profile) {
LOG(WARNING) << "Profiler is actived, might affect the performance";
LOG(INFO) << "You can turn off by set gflags '-profile false'";
......@@ -169,7 +169,7 @@ void AnalysisPredictor::SetMkldnnThreadID(int tid) {
bool AnalysisPredictor::Run(const std::vector<PaddleTensor> &inputs,
std::vector<PaddleTensor> *output_data,
int batch_size) {
VLOG(30) << "Predictor::predict";
VLOG(3) << "Predictor::predict";
inference::Timer timer;
timer.tic();
// set feed variable
......@@ -188,7 +188,7 @@ bool AnalysisPredictor::Run(const std::vector<PaddleTensor> &inputs,
LOG(ERROR) << "fail to get fetches";
return false;
}
VLOG(30) << "predict cost: " << timer.toc() << "ms";
VLOG(3) << "predict cost: " << timer.toc() << "ms";
// Fix TensorArray reuse not cleaned bug.
tensor_array_batch_cleaner_.CollectTensorArrays(scope_.get());
......@@ -198,7 +198,7 @@ bool AnalysisPredictor::Run(const std::vector<PaddleTensor> &inputs,
bool AnalysisPredictor::SetFeed(const std::vector<PaddleTensor> &inputs,
framework::Scope *scope) {
VLOG(30) << "Predictor::set_feed";
VLOG(3) << "Predictor::set_feed";
if (inputs.size() != feeds_.size()) {
LOG(ERROR) << "wrong feed input size, need " << feeds_.size() << " but get "
<< inputs.size();
......@@ -275,7 +275,7 @@ void AnalysisPredictor::GetFetchOne(const framework::LoDTensor &fetch,
bool AnalysisPredictor::GetFetch(std::vector<PaddleTensor> *outputs,
framework::Scope *scope) {
VLOG(30) << "Predictor::get_fetch";
VLOG(3) << "Predictor::get_fetch";
outputs->resize(fetchs_.size());
for (size_t i = 0; i < fetchs_.size(); ++i) {
int idx = boost::get<int>(fetchs_[i]->GetAttr("col"));
......@@ -339,7 +339,7 @@ void AnalysisPredictor::OptimizeInferenceProgram() {
template <>
std::unique_ptr<PaddlePredictor> CreatePaddlePredictor<
AnalysisConfig, PaddleEngineKind::kAnalysis>(const AnalysisConfig &config) {
VLOG(30) << "create AnalysisConfig";
VLOG(3) << "create AnalysisConfig";
if (config.use_gpu) {
// 1. GPU memeroy
PADDLE_ENFORCE_GT(
......@@ -353,7 +353,7 @@ std::unique_ptr<PaddlePredictor> CreatePaddlePredictor<
std::string flag = "--fraction_of_gpu_memory_to_use=" +
std::to_string(config.fraction_of_gpu_memory);
flags.push_back(flag);
VLOG(30) << "set flag: " << flag;
VLOG(3) << "set flag: " << flag;
framework::InitGflags(flags);
}
}
......
......@@ -152,7 +152,7 @@ bool NativePaddlePredictor::Run(const std::vector<PaddleTensor> &inputs,
LOG(ERROR) << "fail to get fetches";
return false;
}
VLOG(30) << "predict cost: " << timer.toc() << "ms";
VLOG(3) << "predict cost: " << timer.toc() << "ms";
// Fix TensorArray reuse not cleaned bug.
tensor_array_batch_cleaner_.CollectTensorArrays(scope_.get());
......
......@@ -44,7 +44,7 @@ void Main() {
config.fraction_of_gpu_memory = 0.1; // set by yourself
predictor = CreatePaddlePredictor(config);
VLOG(30) << "begin to process data";
VLOG(3) << "begin to process data";
// Just a single batch of data.
std::string line;
std::ifstream file(FLAGS_data);
......@@ -59,13 +59,13 @@ void Main() {
PaddleBuf(record.data.data(), record.data.size() * sizeof(float));
input.dtype = PaddleDType::FLOAT32;
VLOG(30) << "run executor";
VLOG(3) << "run executor";
std::vector<PaddleTensor> output;
predictor->Run({input}, &output, 1);
VLOG(30) << "output.size " << output.size();
VLOG(3) << "output.size " << output.size();
auto& tensor = output.front();
VLOG(30) << "output: " << SummaryTensor(tensor);
VLOG(3) << "output: " << SummaryTensor(tensor);
// compare with reference result
CheckOutput(FLAGS_refer, tensor);
......
......@@ -47,7 +47,7 @@ static void split(const std::string& str, char sep,
}
Record ProcessALine(const std::string& line) {
VLOG(30) << "process a line";
VLOG(3) << "process a line";
std::vector<std::string> columns;
split(line, '\t', &columns);
CHECK_EQ(columns.size(), 2UL)
......@@ -65,8 +65,8 @@ Record ProcessALine(const std::string& line) {
for (auto& s : shape_strs) {
record.shape.push_back(std::stoi(s));
}
VLOG(30) << "data size " << record.data.size();
VLOG(30) << "data shape size " << record.shape.size();
VLOG(3) << "data size " << record.data.size();
VLOG(3) << "data shape size " << record.shape.size();
return record;
}
......@@ -78,8 +78,8 @@ void CheckOutput(const std::string& referfile, const PaddleTensor& output) {
file.close();
size_t numel = output.data.length() / PaddleDtypeSize(output.dtype);
VLOG(30) << "predictor output numel " << numel;
VLOG(30) << "reference output numel " << refer.data.size();
VLOG(3) << "predictor output numel " << numel;
VLOG(3) << "reference output numel " << refer.data.size();
CHECK_EQ(numel, refer.data.size());
switch (output.dtype) {
case PaddleDType::INT64: {
......
......@@ -26,7 +26,7 @@ void TensorArrayBatchCleaner::CollectTensorArrays(framework::Scope *scope) {
// parameter.
if (var_name == "feed" || var_name == "fetch") continue;
if (var->Type() == typeid(framework::LoDTensorArray)) {
VLOG(40) << "collect " << var_name;
VLOG(4) << "collect " << var_name;
arrays_.push_back(var->GetMutable<framework::LoDTensorArray>());
}
}
......@@ -34,7 +34,7 @@ void TensorArrayBatchCleaner::CollectTensorArrays(framework::Scope *scope) {
CollectTensorArrays(kid);
}
VLOG(30) << "Collect " << arrays_.size() << " arrays";
VLOG(3) << "Collect " << arrays_.size() << " arrays";
flag_ = false;
}
}
......
......@@ -78,7 +78,7 @@ void LoadPersistables(framework::Executor* executor, framework::Scope* scope,
for (auto* var : global_block.AllVars()) {
if (IsPersistable(var)) {
VLOG(30) << "persistable variable's name: " << var->Name();
VLOG(3) << "persistable variable's name: " << var->Name();
framework::VarDesc* new_var = load_block->Var(var->Name());
new_var->SetShape(var->GetShape());
......@@ -121,7 +121,7 @@ std::unique_ptr<framework::ProgramDesc> Load(framework::Executor* executor,
const std::string& dirname) {
std::string model_filename = dirname + "/__model__";
std::string program_desc_str;
VLOG(30) << "loading model from " << model_filename;
VLOG(3) << "loading model from " << model_filename;
ReadBinaryFile(model_filename, &program_desc_str);
std::unique_ptr<framework::ProgramDesc> main_program(
......
......@@ -53,7 +53,7 @@ class Pool2dOpConverter : public OpConverter {
public:
void operator()(const framework::proto::OpDesc &op,
const framework::Scope &scope, bool test_mode) override {
VLOG(40)
VLOG(4)
<< "convert a fluid pool2d op to tensorrt pool2d layer without bias";
framework::OpDesc op_desc(op, nullptr);
// Declare inputs
......
......@@ -27,7 +27,7 @@ struct Record {
};
Record ProcessALine(const std::string &line) {
VLOG(30) << "process a line";
VLOG(3) << "process a line";
std::vector<std::string> columns;
split(line, '\t', &columns);
CHECK_EQ(columns.size(), 2UL)
......@@ -45,8 +45,8 @@ Record ProcessALine(const std::string &line) {
for (auto &s : shape_strs) {
record.shape.push_back(std::stoi(s));
}
VLOG(30) << "data size " << record.data.size();
VLOG(30) << "data shape size " << record.shape.size();
VLOG(3) << "data size " << record.data.size();
VLOG(3) << "data shape size " << record.shape.size();
return record;
}
......
......@@ -76,12 +76,12 @@ class ChunkedAllocator : public Allocator {
default_allocator_ = raw_allocator_;
} else {
if (capacity == 1) {
VLOG(10) << "Create BestFitAllocator with chunk_size "
<< max_chunk_size_;
VLOG(1) << "Create BestFitAllocator with chunk_size "
<< max_chunk_size_;
default_allocator_ = CreateAllocatorWithChunk();
} else {
VLOG(10) << "Create AutoIncrementAllocator with chunk_size "
<< max_chunk_size_ << " and capacity " << capacity;
VLOG(1) << "Create AutoIncrementAllocator with chunk_size "
<< max_chunk_size_ << " and capacity " << capacity;
default_allocator_ = std::make_shared<AutoIncrementAllocator>(
[this] { return std::move(CreateAllocatorWithChunk()); }, capacity);
}
......
......@@ -86,18 +86,18 @@ struct NaiveAllocator {
template <>
void *Alloc<platform::CPUPlace>(const platform::CPUPlace &place, size_t size) {
VLOG(10) << "Allocate " << size << " bytes on " << platform::Place(place);
VLOG(1) << "Allocate " << size << " bytes on " << platform::Place(place);
void *p = GetCPUBuddyAllocator()->Alloc(size);
if (FLAGS_init_allocated_mem) {
memset(p, 0xEF, size);
}
VLOG(100) << " pointer=" << p;
VLOG(10) << " pointer=" << p;
return p;
}
template <>
void Free<platform::CPUPlace>(const platform::CPUPlace &place, void *p) {
VLOG(10) << "Free pointer=" << p << " on " << platform::Place(place);
VLOG(1) << "Free pointer=" << p << " on " << platform::Place(place);
GetCPUBuddyAllocator()->Free(p);
}
......@@ -124,12 +124,12 @@ BuddyAllocator *GetGPUBuddyAllocator(int gpu_id) {
std::unique_ptr<detail::SystemAllocator>(new detail::GPUAllocator(i)),
platform::GpuMinChunkSize(), platform::GpuMaxChunkSize());
VLOG(100) << "\n\nNOTE: each GPU device use "
<< FLAGS_fraction_of_gpu_memory_to_use * 100
<< "% of GPU memory.\n"
<< "You can set GFlags environment variable '"
<< "FLAGS_fraction_of_gpu_memory_to_use"
<< "' to change the fraction of GPU usage.\n\n";
VLOG(10) << "\n\nNOTE: each GPU device use "
<< FLAGS_fraction_of_gpu_memory_to_use * 100
<< "% of GPU memory.\n"
<< "You can set GFlags environment variable '"
<< "FLAGS_fraction_of_gpu_memory_to_use"
<< "' to change the fraction of GPU usage.\n\n";
}
});
......
......@@ -32,11 +32,11 @@ BuddyAllocator::BuddyAllocator(
system_allocator_(std::move(system_allocator)) {}
BuddyAllocator::~BuddyAllocator() {
VLOG(100) << "BuddyAllocator Disconstructor makes sure that all of these "
"have actually been freed";
VLOG(10) << "BuddyAllocator Disconstructor makes sure that all of these "
"have actually been freed";
while (!pool_.empty()) {
auto block = static_cast<MemoryBlock*>(std::get<2>(*pool_.begin()));
VLOG(100) << "Free from block (" << block << ", " << max_chunk_size_ << ")";
VLOG(10) << "Free from block (" << block << ", " << max_chunk_size_ << ")";
system_allocator_->Free(block, max_chunk_size_, block->index(cache_));
cache_.invalidate(block);
......@@ -57,12 +57,12 @@ void* BuddyAllocator::Alloc(size_t unaligned_size) {
// acquire the allocator lock
std::lock_guard<std::mutex> lock(mutex_);
VLOG(100) << "Allocate " << unaligned_size << " bytes from chunk size "
<< size;
VLOG(10) << "Allocate " << unaligned_size << " bytes from chunk size "
<< size;
// if the allocation is huge, send directly to the system allocator
if (size > max_chunk_size_) {
VLOG(100) << "Allocate from system allocator.";
VLOG(10) << "Allocate from system allocator.";
return SystemAlloc(size);
}
......@@ -77,9 +77,9 @@ void* BuddyAllocator::Alloc(size_t unaligned_size) {
return nullptr;
}
} else {
VLOG(100) << "Allocation from existing memory block " << std::get<2>(*it)
<< " at address "
<< reinterpret_cast<MemoryBlock*>(std::get<2>(*it))->data();
VLOG(10) << "Allocation from existing memory block " << std::get<2>(*it)
<< " at address "
<< reinterpret_cast<MemoryBlock*>(std::get<2>(*it))->data();
}
total_used_ += size;
......@@ -96,10 +96,10 @@ void BuddyAllocator::Free(void* p) {
// Acquire the allocator lock
std::lock_guard<std::mutex> lock(mutex_);
VLOG(100) << "Free from address " << block;
VLOG(10) << "Free from address " << block;
if (block->type(cache_) == MemoryBlock::HUGE_CHUNK) {
VLOG(100) << "Free directly from system allocator";
VLOG(10) << "Free directly from system allocator";
system_allocator_->Free(block, block->total_size(cache_),
block->index(cache_));
......@@ -116,8 +116,8 @@ void BuddyAllocator::Free(void* p) {
// Trying to merge the right buddy
if (block->has_right_buddy(cache_)) {
VLOG(100) << "Merging this block " << block << " with its right buddy "
<< block->right_buddy(cache_);
VLOG(10) << "Merging this block " << block << " with its right buddy "
<< block->right_buddy(cache_);
auto right_buddy = block->right_buddy(cache_);
......@@ -134,8 +134,8 @@ void BuddyAllocator::Free(void* p) {
// Trying to merge the left buddy
if (block->has_left_buddy(cache_)) {
VLOG(100) << "Merging this block " << block << " with its left buddy "
<< block->left_buddy(cache_);
VLOG(10) << "Merging this block " << block << " with its left buddy "
<< block->left_buddy(cache_);
auto left_buddy = block->left_buddy(cache_);
......@@ -151,8 +151,8 @@ void BuddyAllocator::Free(void* p) {
}
// Dumping this block into pool
VLOG(100) << "Inserting free block (" << block << ", "
<< block->total_size(cache_) << ")";
VLOG(10) << "Inserting free block (" << block << ", "
<< block->total_size(cache_) << ")";
pool_.insert(
IndexSizeAddress(block->index(cache_), block->total_size(cache_), block));
......@@ -174,7 +174,7 @@ void* BuddyAllocator::SystemAlloc(size_t size) {
size_t index = 0;
void* p = system_allocator_->Alloc(&index, size);
VLOG(100) << "Allocated " << p << " from system allocator.";
VLOG(10) << "Allocated " << p << " from system allocator.";
if (p == nullptr) return nullptr;
......@@ -200,8 +200,8 @@ BuddyAllocator::PoolSet::iterator BuddyAllocator::RefillPool() {
if (p == nullptr) return pool_.end();
VLOG(100) << "Creating and inserting new block " << p
<< " from system allocator";
VLOG(10) << "Creating and inserting new block " << p
<< " from system allocator";
static_cast<MemoryBlock*>(p)->init(&cache_, MemoryBlock::FREE_CHUNK, index,
max_chunk_size_, nullptr, nullptr);
......@@ -245,19 +245,19 @@ void* BuddyAllocator::SplitToAlloc(BuddyAllocator::PoolSet::iterator it,
auto block = static_cast<MemoryBlock*>(std::get<2>(*it));
pool_.erase(it);
VLOG(100) << "Split block (" << block << ", " << block->total_size(cache_)
<< ") into";
VLOG(10) << "Split block (" << block << ", " << block->total_size(cache_)
<< ") into";
block->split(&cache_, size);
VLOG(100) << "Left block (" << block << ", " << block->total_size(cache_)
<< ")";
VLOG(10) << "Left block (" << block << ", " << block->total_size(cache_)
<< ")";
block->set_type(&cache_, MemoryBlock::ARENA_CHUNK);
// the rest of memory if exist
if (block->has_right_buddy(cache_)) {
if (block->right_buddy(cache_)->type(cache_) == MemoryBlock::FREE_CHUNK) {
VLOG(100) << "Insert right block (" << block->right_buddy(cache_) << ", "
<< block->right_buddy(cache_)->total_size(cache_) << ")";
VLOG(10) << "Insert right block (" << block->right_buddy(cache_) << ", "
<< block->right_buddy(cache_)->total_size(cache_) << ")";
pool_.insert(
IndexSizeAddress(block->right_buddy(cache_)->index(cache_),
......@@ -284,7 +284,7 @@ void BuddyAllocator::CleanIdleFallBackAlloc() {
return;
}
VLOG(100) << "Return block " << block << " to fallback allocator.";
VLOG(10) << "Return block " << block << " to fallback allocator.";
system_allocator_->Free(block, max_chunk_size_, block->index(cache_));
cache_.invalidate(block);
......@@ -320,7 +320,7 @@ void BuddyAllocator::CleanIdleNormalAlloc() {
MemoryBlock* block = static_cast<MemoryBlock*>(std::get<2>(*pool));
VLOG(100) << "Return block " << block << " to base allocator.";
VLOG(10) << "Return block " << block << " to base allocator.";
system_allocator_->Free(block, max_chunk_size_, block->index(cache_));
cache_.invalidate(block);
......
......@@ -29,7 +29,7 @@ MemoryBlock::Desc MetadataCache::load(const MemoryBlock* block) const {
return existing_desc->second;
} else {
auto* desc = reinterpret_cast<const MemoryBlock::Desc*>(block);
VLOG(100) << "Load MemoryBlock::Desc type=" << desc->type;
VLOG(10) << "Load MemoryBlock::Desc type=" << desc->type;
PADDLE_ASSERT(desc->check_guards());
return *reinterpret_cast<const MemoryBlock::Desc*>(block);
}
......
......@@ -100,7 +100,7 @@ class ActivationGradKernel
auto x = framework::EigenVector<T>::Flatten(*X);
functor(*place, x, out, dout, dx);
} else {
VLOG(100) << " Inplace activation ";
VLOG(10) << " Inplace activation ";
auto x = framework::EigenVector<T>::Flatten(*dX);
functor(*place, x, out, dout, dx);
}
......
......@@ -49,7 +49,7 @@ class ArrayOp : public framework::OperatorBase {
} else {
offset = static_cast<size_t>(*i_tensor.data<int64_t>());
}
VLOG(100) << " Offset = " << offset;
VLOG(10) << " Offset = " << offset;
return offset;
}
};
......
......@@ -148,8 +148,8 @@ class ArrayToLoDTensorOp : public framework::OperatorBase {
size_t start_offset = lod_and_offset.second.first;
size_t end_offset = lod_and_offset.second.second;
VLOG(100) << "idx=" << idx << " x_idx=" << x_idx << " ["
<< ", " << end_offset << "]";
VLOG(10) << "idx=" << idx << " x_idx=" << x_idx << " ["
<< ", " << end_offset << "]";
// Copy data
PADDLE_ENFORCE_GE(end_offset, start_offset);
size_t len = end_offset - start_offset;
......
......@@ -96,7 +96,7 @@ class BatchNormKernel<platform::CUDADeviceContext, T>
mode_ = CUDNN_BATCHNORM_SPATIAL;
#endif
VLOG(30) << "Setting descriptors.";
VLOG(3) << "Setting descriptors.";
std::vector<int> dims;
std::vector<int> strides;
if (data_layout == DataLayout::kNCHW) {
......
......@@ -33,11 +33,11 @@ void BeamSearch::operator()(const framework::LoDTensor &pre_ids,
auto items = SelectTopBeamSizeItems(pre_ids, pre_scores);
auto selected_items = ToMap(items, high_level.back());
VLOG(30) << "selected_items:";
VLOG(3) << "selected_items:";
for (size_t i = 0; i < selected_items.size(); ++i) {
VLOG(30) << "offset:" << i;
VLOG(3) << "offset:" << i;
for (auto &item : selected_items[i]) {
VLOG(30) << ItemToString(item);
VLOG(3) << ItemToString(item);
}
}
......@@ -138,11 +138,11 @@ std::vector<std::vector<BeamSearch::Item>> BeamSearch::SelectTopBeamSizeItems(
}
result.emplace_back(items);
}
VLOG(30) << "SelectTopBeamSizeItems result size " << result.size();
VLOG(3) << "SelectTopBeamSizeItems result size " << result.size();
for (auto &items : result) {
VLOG(30) << "item set:";
VLOG(3) << "item set:";
for (auto &item : items) {
VLOG(30) << ItemToString(item);
VLOG(3) << ItemToString(item);
}
}
......
......@@ -37,7 +37,7 @@ class ConcatOp : public framework::OperatorWithKernel {
PADDLE_ENFORCE_GT(n, 0, "Input tensors count should > 0.");
if (n == 1) {
VLOG(30) << "Warning: concat op have only one input, may waste memory";
VLOG(3) << "Warning: concat op have only one input, may waste memory";
}
auto out_dims = ins[0];
......
......@@ -47,8 +47,8 @@ class FeedOp : public framework::OperatorBase {
auto col = Attr<int>("col");
VLOG(30) << "Feed Var " << feed_var_name << "'s " << col
<< " column to var " << out_name;
VLOG(3) << "Feed Var " << feed_var_name << "'s " << col << " column to var "
<< out_name;
auto &feed_list = feed_var->Get<framework::FeedFetchList>();
auto &feed_item = feed_list.at(static_cast<size_t>(col));
......
......@@ -57,7 +57,7 @@ class FetchOp : public framework::OperatorBase {
TensorCopySync(src_item, platform::CPUPlace(), &dst_item);
dst_item.set_lod(src_item.lod());
VLOG(30) << "Fetch variable " << fetch_var_name << " to " << out_name;
VLOG(3) << "Fetch variable " << fetch_var_name << " to " << out_name;
}
};
......
......@@ -48,7 +48,7 @@ static void SplitTensorAndMoveTensorToScopes(
auto lod_tensors = tensor.SplitLoDTensor(places);
for (auto &lod : lod_tensors) {
VLOG(30) << lod.dims();
VLOG(3) << lod.dims();
}
if (num_sub_scopes == 0) {
num_sub_scopes = lod_tensors.size();
......@@ -263,7 +263,7 @@ class ParallelDoGradOp : public framework::OperatorBase {
if (s == framework::kEmptyVarName) {
continue;
}
VLOG(30) << "Moving " << s;
VLOG(3) << "Moving " << s;
CopyOrShare(*sub_scopes[0]->FindVar(s), place, scope.FindVar(s));
}
WaitOnPlaces(places);
......@@ -277,7 +277,7 @@ class ParallelDoGradOp : public framework::OperatorBase {
if (s == framework::kEmptyVarName) {
continue;
}
VLOG(30) << "Accumulating " << s;
VLOG(3) << "Accumulating " << s;
if (s == framework::kEmptyVarName) continue;
std::string tmp_name;
auto *tmp = sub_scopes[0]->Var(&tmp_name);
......@@ -289,7 +289,7 @@ class ParallelDoGradOp : public framework::OperatorBase {
auto sum_op = framework::OpRegistry::CreateOp(
"sum", {{"X", {s, tmp_name}}}, {{"Out", {s}}},
framework::AttributeMap{{"use_mkldnn", {false}}});
VLOG(100) << sum_op->DebugStringEx(sub_scopes[0]);
VLOG(10) << sum_op->DebugStringEx(sub_scopes[0]);
sum_op->Run(*sub_scopes[0], places[0]);
WaitOnPlace(places[0]);
}
......@@ -316,7 +316,7 @@ class ParallelDoGradOpDescMaker : public framework::SingleGradOpDescMaker {
auto *grad = new framework::OpDesc();
grad->SetType("parallel_do_grad");
for (auto &input_param : this->InputNames()) {
VLOG(30) << input_param;
VLOG(3) << input_param;
grad->SetInput(input_param, this->Input(input_param));
if (input_param != kPlaces) {
grad->SetOutput(framework::GradVarName(input_param),
......
......@@ -34,8 +34,8 @@ class WriteToArrayOp : public ArrayOp {
auto *out =
scope.FindVar(Output("Out"))->GetMutable<framework::LoDTensorArray>();
if (offset >= out->size()) {
VLOG(100) << "Resize " << Output("Out") << " from " << out->size()
<< " to " << offset + 1;
VLOG(10) << "Resize " << Output("Out") << " from " << out->size()
<< " to " << offset + 1;
out->resize(offset + 1);
}
auto *out_tensor = &out->at(offset);
......@@ -47,9 +47,9 @@ class WriteToArrayOp : public ArrayOp {
TensorCopy(x_tensor, place, dev_ctx, out_tensor);
} else {
VLOG(100) << "WARNING: The input tensor 'x_tensor' holds no memory, so "
"nothing has been written to output array["
<< offset << "].";
VLOG(10) << "WARNING: The input tensor 'x_tensor' holds no memory, so "
"nothing has been written to output array["
<< offset << "].";
}
}
};
......@@ -104,7 +104,7 @@ class WriteToArrayInferVarType : public framework::VarTypeInference {
framework::BlockDesc *block) const override {
auto x_name = op_desc.Input("X")[0];
auto out_name = op_desc.Output("Out")[0];
VLOG(100) << "Set Variable " << out_name << " as LOD_TENSOR_ARRAY";
VLOG(10) << "Set Variable " << out_name << " as LOD_TENSOR_ARRAY";
auto &out = block->FindRecursiveOrCreateVar(out_name);
out.SetType(framework::proto::VarType::LOD_TENSOR_ARRAY);
auto *x = block->FindVarRecursive(x_name);
......@@ -139,7 +139,7 @@ class ReadFromArrayOp : public ArrayOp {
framework::TensorCopy(x_array[offset], place, dev_ctx, out_tensor);
out_tensor->set_lod(x_array[offset].lod());
} else {
VLOG(100) << "offset " << offset << " >= " << x_array.size();
VLOG(10) << "offset " << offset << " >= " << x_array.size();
}
}
};
......
......@@ -132,15 +132,15 @@ class WhileGradOp : public framework::OperatorBase {
for (auto cur_scope_iter = step_scopes->rbegin();
cur_scope_iter != step_scopes->rend(); ++cur_scope_iter) {
VLOG(30) << "Start backward at time_step "
<< cur_scope_iter - step_scopes->rbegin();
VLOG(3) << "Start backward at time_step "
<< cur_scope_iter - step_scopes->rbegin();
framework::Scope &cur_scope = **cur_scope_iter;
// Link OG from outside to inside
for (size_t i = 0; i < outside_og_names.size(); ++i) {
auto outside_og_name = outside_og_names[i];
auto inside_og_name = inside_og_names[i];
VLOG(80) << "Linking outside " << outside_og_name << " --> inside "
<< inside_og_name;
VLOG(8) << "Linking outside " << outside_og_name << " --> inside "
<< inside_og_name;
if (scope.FindVar(outside_og_name) == nullptr) {
continue;
}
......@@ -162,11 +162,11 @@ class WhileGradOp : public framework::OperatorBase {
auto &outside_array = og_outside.Get<framework::LoDTensorArray>();
auto &inside_array =
detail::Ref(og_inside.GetMutable<framework::LoDTensorArray>());
VLOG(80) << outside_og_name << " size = " << outside_array.size();
VLOG(8) << outside_og_name << " size = " << outside_array.size();
inside_array.resize(outside_array.size());
for (size_t j = 0; j < inside_array.size(); ++j) {
VLOG(80) << j << " " << outside_array[j].numel();
VLOG(8) << j << " " << outside_array[j].numel();
if (outside_array[j].numel() != 0) {
inside_array[j].set_lod(outside_array[j].lod());
inside_array[j].ShareDataWith(outside_array[j]);
......@@ -292,7 +292,7 @@ class WhileGradOpDescMaker : public framework::SingleGradOpDescMaker {
auto igs = InputGrad(kX, /*do not drop empty gradient*/ false);
for (auto &each_ig : igs) {
if (inner_op_outputs.find(each_ig) == inner_op_outputs.end()) {
VLOG(80) << "Ignore " << each_ig;
VLOG(8) << "Ignore " << each_ig;
each_ig = framework::kEmptyVarName;
}
}
......@@ -356,8 +356,8 @@ class WhileGradOpVarTypeInference : public framework::VarTypeInference {
auto &p_var = detail::Ref(block->FindVarRecursive(p_names[i]));
auto *g_var = block->FindVarRecursive(pg_ig_names[i]);
if (g_var != nullptr) { // Gradient could be @EMPTY@
VLOG(50) << "Setting " << pg_ig_names[i] << " following " << p_names[i]
<< " type: " << p_var.GetType();
VLOG(5) << "Setting " << pg_ig_names[i] << " following " << p_names[i]
<< " type: " << p_var.GetType();
g_var->SetType(p_var.GetType());
g_var->SetDataType(p_var.GetDataType());
}
......
......@@ -151,11 +151,11 @@ class CUDNNConvOpKernel : public framework::OpKernel<T> {
// Currently tensor core is only enabled using this algo
algo = CUDNN_CONVOLUTION_FWD_ALGO_IMPLICIT_PRECOMP_GEMM;
half_float = true;
VLOG(50) << "use cudnn_tensor_op_math";
VLOG(5) << "use cudnn_tensor_op_math";
} else {
CUDNN_ENFORCE(platform::dynload::cudnnSetConvolutionMathType(
cudnn_conv_desc, CUDNN_DEFAULT_MATH));
VLOG(50) << "NOT use cudnn_tensor_op_math";
VLOG(5) << "NOT use cudnn_tensor_op_math";
}
#endif
......
......@@ -133,10 +133,10 @@ void AsyncBRPCServer::StartServer() {
void AsyncBRPCServer::ShutDownImpl() { server_.Stop(1000); }
void AsyncBRPCServer::WaitServerReady() {
VLOG(30) << "AsyncGRPCServer is wait server ready";
VLOG(3) << "AsyncGRPCServer is wait server ready";
std::unique_lock<std::mutex> lock(this->mutex_ready_);
condition_ready_.wait(lock, [=] { return this->ready_ == 1; });
VLOG(30) << "AsyncGRPCServer WaitSeverReady";
VLOG(3) << "AsyncGRPCServer WaitSeverReady";
}
}; // namespace distributed
......
......@@ -40,7 +40,7 @@ void GRPCClient::SendComplete() {
std::unique_lock<std::mutex> lk(completed_mutex_);
if (!completed_) {
for (auto& it : channels_) {
VLOG(30) << "send complete message to " << it.first;
VLOG(3) << "send complete message to " << it.first;
this->AsyncSendComplete(it.first);
}
PADDLE_ENFORCE(this->Wait(), "internal grpc error");
......@@ -83,7 +83,7 @@ VarHandlePtr GRPCClient::AsyncSendVar(const std::string& ep,
::grpc::ByteBuffer req;
SerializeToByteBuffer(var_name_val, var, *p_ctx, &req, "", trainer_id_);
VLOG(30) << s->GetVarHandlePtr()->String() << " begin";
VLOG(3) << s->GetVarHandlePtr()->String() << " begin";
// stub context
s->response_call_back_ = nullptr;
......@@ -144,7 +144,7 @@ VarHandlePtr GRPCClient::AsyncGetVar(const std::string& ep,
::grpc::ByteBuffer buf;
RequestToByteBuffer<sendrecv::VariableMessage>(req, &buf);
VLOG(30) << s->GetVarHandlePtr()->String() << " begin";
VLOG(3) << s->GetVarHandlePtr()->String() << " begin";
// stub context
s->response_call_back_ = ProcGetResponse;
......@@ -195,7 +195,7 @@ VarHandlePtr GRPCClient::AsyncPrefetchVar(const std::string& ep,
SerializeToByteBuffer(in_var_name_val, var, *p_ctx, &req, out_var_name_val,
0, table_name_val);
VLOG(30) << s->GetVarHandlePtr()->String() << " begin";
VLOG(3) << s->GetVarHandlePtr()->String() << " begin";
// stub context
s->response_call_back_ = ProcGetResponse;
......@@ -333,14 +333,14 @@ void GRPCClient::Proceed() {
void* tag = nullptr;
bool ok = false;
VLOG(30) << "GRPCClient Proceed begin";
VLOG(3) << "GRPCClient Proceed begin";
while (!stopped_ && cq_.Next(&tag, &ok)) {
BaseProcessor* c = static_cast<BaseProcessor*>(tag);
GPR_ASSERT(ok);
PADDLE_ENFORCE(c);
if (c->status_.ok()) {
VLOG(30) << c->GetVarHandlePtr()->String() << " process";
VLOG(3) << c->GetVarHandlePtr()->String() << " process";
c->Process();
} else if (c->status_.error_code() == grpc::StatusCode::DEADLINE_EXCEEDED) {
// FIXME(gongwb): parse error_details?
......@@ -375,7 +375,7 @@ void GRPCClient::Proceed() {
sync_cond_.notify_all();
}
}
VLOG(30) << "GRPCClient Proceed end";
VLOG(3) << "GRPCClient Proceed end";
}
std::shared_ptr<grpc::Channel> GRPCClient::GetChannel(const std::string& ep) {
......
......@@ -100,7 +100,7 @@ class RequestSend final : public RequestBase {
void Process() override {
std::string varname = GetReqName();
VLOG(40) << "RequestSend var_name:" << varname;
VLOG(4) << "RequestSend var_name:" << varname;
auto scope = request_->GetMutableLocalScope();
auto invar = request_->GetVar();
......@@ -137,7 +137,7 @@ class RequestGet final : public RequestBase {
// proc request.
std::string varname = request_.varname();
int trainer_id = request_.trainer_id();
VLOG(40) << "RequestGet " << varname;
VLOG(4) << "RequestGet " << varname;
auto scope = request_handler_->scope();
auto invar = scope->FindVar(varname);
......@@ -185,8 +185,8 @@ class RequestPrefetch final : public RequestBase {
std::string out_var_name = request_->OutVarname();
std::string table_name = request_->TableName();
int trainer_id = request_->GetTrainerId();
VLOG(40) << "RequestPrefetch, in_var_name: " << in_var_name
<< " out_var_name: " << out_var_name;
VLOG(4) << "RequestPrefetch, in_var_name: " << in_var_name
<< " out_var_name: " << out_var_name;
auto scope = request_->GetMutableLocalScope();
auto invar = scope->FindVar(in_var_name);
......@@ -234,8 +234,8 @@ class RequestCheckpointNotify final : public RequestBase {
std::string checkpoint_dir = request_->OutVarname();
int trainer_id = request_->GetTrainerId();
VLOG(40) << "RequestCheckpointNotify notify: " << checkpoint_notify
<< ", dir: " << checkpoint_dir;
VLOG(4) << "RequestCheckpointNotify notify: " << checkpoint_notify
<< ", dir: " << checkpoint_dir;
request_handler_->Handle(checkpoint_notify, scope, nullptr, nullptr,
trainer_id, checkpoint_dir);
......@@ -249,10 +249,10 @@ class RequestCheckpointNotify final : public RequestBase {
};
void AsyncGRPCServer::WaitServerReady() {
VLOG(40) << "AsyncGRPCServer is wait server ready";
VLOG(4) << "AsyncGRPCServer is wait server ready";
std::unique_lock<std::mutex> lock(this->mutex_ready_);
condition_ready_.wait(lock, [=] { return this->ready_ == 1; });
VLOG(40) << "AsyncGRPCServer WaitSeverReady";
VLOG(4) << "AsyncGRPCServer WaitSeverReady";
}
// Define an option subclass in order to disable SO_REUSEPORT for the
......@@ -303,15 +303,14 @@ void AsyncGRPCServer::StartServer() {
reqs.reserve(kRequestBufSize);
for (int i = 0; i < kRequestBufSize; i++) {
VLOG(60) << "TryToRegisterNewOne on RPC NAME: " << rpc_name
<< " I: " << i;
VLOG(6) << "TryToRegisterNewOne on RPC NAME: " << rpc_name << " I: " << i;
TryToRegisterNewOne(rpc_name, i);
}
for (int i = 0; i < threadnum; i++) {
rpc_threads_[rpc_name].emplace_back(new std::thread(std::bind(
&AsyncGRPCServer::HandleRequest, this, cq.get(), rpc_name, f)));
VLOG(40) << t.first << " creates threads!";
VLOG(4) << t.first << " creates threads!";
}
}
......@@ -328,7 +327,7 @@ void AsyncGRPCServer::StartServer() {
auto& threads = t.second;
for (size_t i = 0; i < threads.size(); ++i) {
threads[i]->join();
VLOG(40) << t.first << " threads ends!";
VLOG(4) << t.first << " threads ends!";
}
}
}
......@@ -336,7 +335,7 @@ void AsyncGRPCServer::StartServer() {
void AsyncGRPCServer::ShutdownQueue() {
for (auto& t : rpc_cq_) {
t.second->Shutdown();
VLOG(40) << t.first << " queue shutdown!";
VLOG(4) << t.first << " queue shutdown!";
}
}
......@@ -345,7 +344,7 @@ void AsyncGRPCServer::ShutDownImpl() {
is_shut_down_ = true;
ShutdownQueue();
VLOG(40) << "server_ shutdown!";
VLOG(4) << "server_ shutdown!";
server_->Shutdown();
}
......@@ -353,12 +352,12 @@ void AsyncGRPCServer::TryToRegisterNewOne(const std::string& rpc_name,
int req_id) {
std::unique_lock<std::mutex> lock(cq_mutex_);
if (is_shut_down_) {
VLOG(40) << "shutdown, do not TryToRegisterNewSendOne";
VLOG(4) << "shutdown, do not TryToRegisterNewSendOne";
return;
}
VLOG(40) << "TryToRegisterNewOne on RPC NAME: " << rpc_name
<< " REQ ID: " << req_id;
VLOG(4) << "TryToRegisterNewOne on RPC NAME: " << rpc_name
<< " REQ ID: " << req_id;
auto& reqs = rpc_reqs_[rpc_name];
auto& handler = rpc_call_map_[rpc_name];
......@@ -379,7 +378,7 @@ void AsyncGRPCServer::TryToRegisterNewOne(const std::string& rpc_name,
reqs[req_id] = b;
VLOG(40) << "Create RequestSend status:" << b->Status();
VLOG(4) << "Create RequestSend status:" << b->Status();
}
void AsyncGRPCServer::HandleRequest(
......@@ -389,15 +388,15 @@ void AsyncGRPCServer::HandleRequest(
bool ok = false;
while (true) {
VLOG(40) << "HandleRequest " << rpc_name << " wait next";
VLOG(4) << "HandleRequest " << rpc_name << " wait next";
if (!cq->Next(&tag, &ok)) {
VLOG(30) << "CompletionQueue " << rpc_name << " shutdown!";
VLOG(3) << "CompletionQueue " << rpc_name << " shutdown!";
break;
}
int req_id = static_cast<int>(reinterpret_cast<intptr_t>(tag));
VLOG(40) << "HandleRequest " << rpc_name << ", req_id:" << req_id
<< " get next";
VLOG(4) << "HandleRequest " << rpc_name << ", req_id:" << req_id
<< " get next";
auto& reqs = rpc_reqs_[rpc_name];
RequestBase* base = nullptr;
......@@ -407,7 +406,7 @@ void AsyncGRPCServer::HandleRequest(
base = reqs[req_id];
}
VLOG(30) << base->Status2String(rpc_name);
VLOG(3) << base->Status2String(rpc_name);
// reference:
// https://github.com/tensorflow/tensorflow/issues/5596
......
......@@ -75,7 +75,7 @@ class VarHandle {
wait_cond_.wait(lk, [this] { return status_ != kDefaultState; });
ret = status_;
}
VLOG(70) << "VarHandle wait:" << ret;
VLOG(7) << "VarHandle wait:" << ret;
return ret != kErrorState;
}
......@@ -84,7 +84,7 @@ class VarHandle {
std::unique_lock<std::mutex> lk(sync_mutex_);
status_ = ok ? kFinishState : kErrorState;
}
VLOG(70) << "VarHandle finish:" << ok;
VLOG(7) << "VarHandle finish:" << ok;
wait_cond_.notify_all();
}
......
......@@ -39,19 +39,19 @@ bool RequestSendHandler::Handle(const std::string& varname,
const int trainer_id,
const std::string& out_var_name,
const std::string& table_name) {
VLOG(40) << "RequestSendHandler:" << varname;
VLOG(4) << "RequestSendHandler:" << varname;
// Sync
if (varname == BATCH_BARRIER_MESSAGE) {
VLOG(30) << "sync: recv BATCH_BARRIER_MESSAGE";
VLOG(3) << "sync: recv BATCH_BARRIER_MESSAGE";
rpc_server_->IncreaseBatchBarrier(kRequestSend);
} else if (varname == COMPLETE_MESSAGE) {
VLOG(30) << "sync: recv complete message";
VLOG(3) << "sync: recv complete message";
rpc_server_->Complete();
} else {
// Async
if (!sync_mode_) {
VLOG(30) << "async process var: " << varname;
VLOG(3) << "async process var: " << varname;
try {
executor_->RunPreparedContext((*grad_to_prepared_ctx_)[varname].get(),
scope);
......@@ -62,7 +62,7 @@ bool RequestSendHandler::Handle(const std::string& varname,
return true;
} else { // sync
rpc_server_->WaitCond(kRequestSend);
VLOG(30) << "sync: processing received var: " << varname;
VLOG(3) << "sync: processing received var: " << varname;
if (invar == nullptr) {
LOG(FATAL) << "sync: Can not find server side var: " << varname;
......@@ -80,10 +80,11 @@ bool RequestGetHandler::Handle(const std::string& varname,
const int trainer_id,
const std::string& out_var_name,
const std::string& table_name) {
VLOG(40) << "RequestGetHandler:" << varname;
VLOG(4) << "RequestGetHandler:" << varname;
if (sync_mode_) {
if (varname == FETCH_BARRIER_MESSAGE) {
VLOG(30) << "sync: recv fetch barrier message";
VLOG(3) << "sync: recv fetch barrier message";
rpc_server_->IncreaseBatchBarrier(kRequestGet);
} else {
rpc_server_->WaitCond(kRequestGet);
......@@ -95,14 +96,13 @@ bool RequestGetHandler::Handle(const std::string& varname,
// NOTE: the format is determined by distributed_transpiler.py
std::string param_bak_name =
string::Sprintf("%s.trainer_%d_bak", varname, trainer_id);
VLOG(30) << "getting " << param_bak_name << " trainer_id "
<< trainer_id;
VLOG(3) << "getting " << param_bak_name << " trainer_id " << trainer_id;
auto var = scope_->FindVar(varname);
auto t_orig = var->Get<framework::LoDTensor>();
auto param_bak = scope_->Var(param_bak_name);
auto t = param_bak->GetMutable<framework::LoDTensor>();
t->mutable_data(dev_ctx_->GetPlace(), t_orig.type());
VLOG(30) << "copying " << varname << " to " << param_bak_name;
VLOG(3) << "copying " << varname << " to " << param_bak_name;
framework::TensorCopy(t_orig, dev_ctx_->GetPlace(), t);
}
*outvar = scope_->FindVar(varname);
......@@ -118,7 +118,7 @@ bool RequestPrefetchHandler::Handle(const std::string& varname,
const int trainer_id,
const std::string& out_var_name,
const std::string& table_name) {
VLOG(40) << "RequestPrefetchHandler " << varname;
VLOG(4) << "RequestPrefetchHandler " << varname;
if (table_name.empty()) {
auto var_desc = program_->Block(0).FindVar(out_var_name);
......@@ -150,8 +150,8 @@ bool RequestCheckpointHandler::Handle(const std::string& varname,
auto* lt_var = scope_->FindVar(LOOKUP_TABLE_PATH)->GetMutable<std::string>();
lt_var->clear();
lt_var->append(out_var_name);
VLOG(40) << "RequestCheckpointHandler update var kLookupTablePath to: "
<< out_var_name;
VLOG(4) << "RequestCheckpointHandler update var kLookupTablePath to: "
<< out_var_name;
executor_->RunPreparedContext(checkpoint_prepared_ctx_.get(), scope_);
return true;
}
......
......@@ -39,7 +39,7 @@ void RPCServer::SavePort() const {
port_file.open(file_path);
port_file << selected_port_;
port_file.close();
VLOG(40) << "selected port written to " << file_path;
VLOG(4) << "selected port written to " << file_path;
}
void RPCServer::WaitBarrier(const std::string& rpc_name) {
......@@ -49,12 +49,12 @@ void RPCServer::WaitBarrier(const std::string& rpc_name) {
exit_flag_.load());
});
VLOG(30) << "batch_barrier_: " << rpc_name << " "
<< barrier_counter_[rpc_name];
VLOG(3) << "batch_barrier_: " << rpc_name << " "
<< barrier_counter_[rpc_name];
}
void RPCServer::IncreaseBatchBarrier(const std::string rpc_name) {
VLOG(40) << "RPCServer begin IncreaseBatchBarrier " << rpc_name;
VLOG(4) << "RPCServer begin IncreaseBatchBarrier " << rpc_name;
int b = 0;
std::unique_lock<std::mutex> lock(mutex_);
b = ++barrier_counter_[rpc_name];
......@@ -71,7 +71,7 @@ void RPCServer::Complete() {
client_num_--;
need_reset_all_vars_ = true;
VLOG(40) << "decrease client_num to: " << client_num_;
VLOG(4) << "decrease client_num to: " << client_num_;
if (cur_cond_.load() == rpc_cond_map_[kRequestGet]) {
barrier_counter_[kRequestGet]--;
}
......@@ -90,7 +90,7 @@ int RPCServer::GetClientNum() {
}
void RPCServer::ResetBarrierCounter() {
VLOG(30) << "RPCServer ResetBarrierCounter ";
VLOG(3) << "RPCServer ResetBarrierCounter ";
std::unique_lock<std::mutex> lock(mutex_);
for (auto& t : barrier_counter_) {
t.second = 0;
......@@ -105,12 +105,12 @@ void RPCServer::RegisterRPC(const std::string& rpc_name,
static int cond = -1;
rpc_cond_map_[rpc_name] = ++cond;
VLOG(40) << "RegisterRPC rpc_name:" << rpc_name << ", handler:" << handler
<< ", cond:" << rpc_cond_map_[rpc_name];
VLOG(4) << "RegisterRPC rpc_name:" << rpc_name << ", handler:" << handler
<< ", cond:" << rpc_cond_map_[rpc_name];
}
void RPCServer::SetCond(const std::string& rpc_name) {
VLOG(30) << "RPCServer SetCond " << rpc_name;
VLOG(3) << "RPCServer SetCond " << rpc_name;
{
std::unique_lock<std::mutex> lock(mutex_);
cur_cond_ = rpc_cond_map_[rpc_name];
......@@ -120,7 +120,7 @@ void RPCServer::SetCond(const std::string& rpc_name) {
}
void RPCServer::WaitCond(const std::string& rpc_name) {
VLOG(40) << "RPCServer WaitCond " << rpc_name;
VLOG(4) << "RPCServer WaitCond " << rpc_name;
int cond = 0;
{
std::unique_lock<std::mutex> lock(mutex_);
......
......@@ -50,7 +50,7 @@ bool VariableResponse::ReadRaw(::google::protobuf::io::CodedInputStream* input,
size_to_write = length - total_written;
}
// This log is useful to see how long a internal block size is of rpc.
VLOG(70) << "copy " << size_to_write << " data to CUDAPlace";
VLOG(7) << "copy " << size_to_write << " data to CUDAPlace";
memory::Copy(boost::get<platform::CUDAPlace>(place),
reinterpret_cast<void*>(p), cpu, data, size_to_write,
gpu_dev_ctx.stream());
......@@ -79,7 +79,7 @@ bool VariableResponse::ReadRaw(::google::protobuf::io::CodedInputStream* input,
// TODO(gongwb): can we avoid copy?
platform::CPUPlace cpu;
// This log is useful to see how long a internal block size is of rpc.
VLOG(70) << "copy " << size_to_write << " data to CPUPlace";
VLOG(7) << "copy " << size_to_write << " data to CPUPlace";
memory::Copy(cpu, reinterpret_cast<void*>(p), cpu, data, size_to_write);
p += size_to_write;
......@@ -198,8 +198,8 @@ bool VariableResponse::ProcSerializedField(
#endif
}
VLOG(70) << "ProcSerializedField:" << meta_.varname()
<< ", type:" << meta_.type() << std::endl;
VLOG(7) << "ProcSerializedField:" << meta_.varname()
<< ", type:" << meta_.type() << std::endl;
framework::DDim dims = GetDims(meta_.dims());
if (meta_.type() == sendrecv::LOD_TENSOR) {
PADDLE_ENFORCE(meta_.lod_size() >= 0, "lod info should be got first!");
......
......@@ -46,8 +46,8 @@ class CheckpointNotifyOp : public framework::OperatorBase {
auto lookup_table_save_dir =
string::Sprintf("%s/%s_%d", dir, lookup_table_name, i);
rpc_client->AsyncCheckpointNotify(epmap[i], lookup_table_save_dir);
VLOG(30) << "checkpoint notify sending lookup table: "
<< lookup_table_name << " and dir:" << dir << " to " << epmap[i];
VLOG(3) << "checkpoint notify sending lookup table: " << lookup_table_name
<< " and dir:" << dir << " to " << epmap[i];
}
PADDLE_ENFORCE(rpc_client->Wait(), "internal error in RPCClient");
}
......
......@@ -43,7 +43,7 @@ class FetchBarrierOp : public framework::OperatorBase {
PADDLE_ENFORCE(rpc_client->Wait(), "internal error in RPCClient");
for (auto& ep : eps) {
VLOG(30) << "fetch barrier, ep: " << ep;
VLOG(3) << "fetch barrier, ep: " << ep;
rpc_client->AsyncSendFetchBarrier(ep);
}
PADDLE_ENFORCE(rpc_client->Wait(), "internal error in RPCClient");
......
......@@ -64,7 +64,7 @@ class GenNCCLIdOp : public framework::OperatorBase {
distributed::RPCClient::GetInstance<RPCCLIENT_T>(0);
for (auto& ep : endpoint_list) {
VLOG(30) << "sending nccl id to " << ep;
VLOG(3) << "sending nccl id to " << ep;
client->AsyncSendVar(ep, dev_ctx, *scope, NCCL_ID_VARNAME);
}
client->Wait();
......@@ -72,7 +72,7 @@ class GenNCCLIdOp : public framework::OperatorBase {
client->AsyncSendBatchBarrier(ep);
}
client->Wait();
VLOG(30) << "sending completed...";
VLOG(3) << "sending completed...";
}
void GetIdByServer(framework::Scope* scope,
......@@ -99,11 +99,11 @@ class GenNCCLIdOp : public framework::OperatorBase {
std::bind(&distributed::RPCServer::StartServer, rpc_service.get()));
rpc_service->SetCond(distributed::kRequestSend);
VLOG(30) << "start getting nccl id from trainer 0...";
VLOG(3) << "start getting nccl id from trainer 0...";
rpc_service->WaitBarrier(distributed::kRequestSend);
VLOG(30) << "got nccl id and stop server...";
VLOG(3) << "got nccl id and stop server...";
rpc_service->ShutDown();
VLOG(30) << "rpc server stopped";
VLOG(3) << "rpc server stopped";
server_thread.join();
}
};
......
......@@ -36,7 +36,7 @@ namespace operators {
void RunServer(std::shared_ptr<distributed::RPCServer> service) {
service->StartServer();
VLOG(40) << "RunServer thread end";
VLOG(4) << "RunServer thread end";
}
static void split(const std::string &str, char sep,
std::vector<std::string> *pieces) {
......@@ -66,8 +66,8 @@ static void ParallelExecuteBlocks(
fs.push_back(framework::Async([&executor, &prepared, &scope, idx]() {
int run_block = idx; // thread local
try {
VLOG(30) << "running server block: " << run_block
<< "pointer: " << prepared[run_block].get();
VLOG(3) << "running server block: " << run_block
<< "pointer: " << prepared[run_block].get();
executor->RunPreparedContext(prepared[run_block].get(), scope);
} catch (const std::exception &e) {
LOG(FATAL) << "run sub program:" << idx << " error " << e.what();
......@@ -108,7 +108,7 @@ void ListenAndServOp::RunSyncLoop(
framework::Scope *recv_scope, platform::DeviceContext *dev_ctx,
const std::vector<int> &prefetch_block_id_list,
const int checkpoint_point_block_id) const {
VLOG(20) << "RunSyncLoop";
VLOG(2) << "RunSyncLoop";
size_t num_blocks = program->Size();
auto optimize_blocks =
Attr<std::vector<framework::BlockDesc *>>(kOptimizeBlocks);
......@@ -167,7 +167,7 @@ void ListenAndServOp::RunSyncLoop(
}
ParallelExecuteBlocks(parallel_blkids, executor, optimize_prepared, program,
recv_scope);
VLOG(20) << "run all blocks spent " << GetTimestamp() - ts << "(ms)";
VLOG(2) << "run all blocks spent " << GetTimestamp() - ts << "(ms)";
ResetReceivedVars(recv_scope, dev_ctx, rpc_service_->NeedResetAllVars());
......@@ -183,11 +183,11 @@ void ListenAndServOp::ResetReceivedVars(framework::Scope *recv_scope,
for (auto &varname : sparse_vars_) {
auto var = recv_scope->FindVar(varname);
if (var == nullptr) {
VLOG(20) << "can not find var " << varname << " in received scope";
VLOG(2) << "can not find var " << varname << " in received scope";
continue;
}
if (var->IsType<framework::SelectedRows>()) {
VLOG(30) << "reset sparse var: " << varname;
VLOG(3) << "reset sparse var: " << varname;
var->GetMutable<framework::SelectedRows>()->mutable_rows()->clear();
} else {
PADDLE_THROW("The type of sparse var should be SelectedRows");
......@@ -197,7 +197,7 @@ void ListenAndServOp::ResetReceivedVars(framework::Scope *recv_scope,
for (auto &varname : dense_vars_) {
auto var = recv_scope->FindVar(varname);
if (var == nullptr) {
VLOG(20) << "can not find var " << varname << " in received scope";
VLOG(2) << "can not find var " << varname << " in received scope";
continue;
}
if (var->IsType<framework::LoDTensor>()) {
......@@ -216,7 +216,7 @@ void ListenAndServOp::ResetReceivedVars(framework::Scope *recv_scope,
void ListenAndServOp::RunAsyncLoop(framework::Executor *executor,
framework::ProgramDesc *program,
framework::Scope *recv_scope) const {
VLOG(20) << "RunAsyncLoop";
VLOG(2) << "RunAsyncLoop";
auto grad_to_block_id_str =
Attr<std::vector<std::string>>("grad_to_block_id");
DoubleFindMap<std::string, int32_t> grad_to_block_id;
......@@ -225,7 +225,7 @@ void ListenAndServOp::RunAsyncLoop(framework::Executor *executor,
const std::string &grad_and_id) {
std::vector<std::string> pieces;
split(grad_and_id, ':', &pieces);
VLOG(30) << "after split, key = " << pieces[0] << ", id=" << pieces[1];
VLOG(3) << "after split, key = " << pieces[0] << ", id=" << pieces[1];
PADDLE_ENFORCE_EQ(pieces.size(), 2);
PADDLE_ENFORCE_EQ(out_map->count(pieces[0]), 0);
......@@ -270,7 +270,7 @@ void ListenAndServOp::RunAsyncLoop(framework::Executor *executor,
while (true) {
if (rpc_service_->IsExit()) {
VLOG(40) << "get exit!rpc_processor break!";
VLOG(4) << "get exit!rpc_processor break!";
break;
}
......@@ -332,9 +332,9 @@ void ListenAndServOp::RunImpl(const framework::Scope &scope,
std::string endpoint = Attr<std::string>("endpoint");
int checkpoint_block_id = Attr<int>(kCheckpointBlockId);
VLOG(40) << "sync_mode:" << sync_mode << ", fan_in:" << fan_in
<< ", end_point:" << endpoint
<< ", checkpoint_block_id: " << checkpoint_block_id;
VLOG(4) << "sync_mode:" << sync_mode << ", fan_in:" << fan_in
<< ", end_point:" << endpoint
<< ", checkpoint_block_id: " << checkpoint_block_id;
rpc_service_.reset(new RPCSERVER_T(endpoint, fan_in));
......@@ -383,8 +383,8 @@ void ListenAndServOp::RunImpl(const framework::Scope &scope,
prefetch_var_name_to_block_id_str) {
std::vector<std::string> pieces;
split(prefetch_var_name_and_id, ':', &pieces);
VLOG(30) << "after split, prefetch_var = " << pieces[0]
<< ", id=" << pieces[1];
VLOG(3) << "after split, prefetch_var = " << pieces[0]
<< ", id=" << pieces[1];
PADDLE_ENFORCE_EQ(pieces.size(), 2);
int block_id = std::stoi(pieces[1]);
......@@ -415,7 +415,7 @@ void ListenAndServOp::RunImpl(const framework::Scope &scope,
// start the server listening after all member initialized.
server_thread_.reset(new std::thread(RunServer, rpc_service_));
VLOG(30) << "wait server thread to become ready...";
VLOG(3) << "wait server thread to become ready...";
rpc_service_->WaitServerReady();
// register SIGINT(from ctrl+C) and SIGTERM(from kill) signal handlers
......
......@@ -48,12 +48,12 @@ class PrefetchOp : public framework::OperatorBase {
std::vector<distributed::VarHandlePtr> rets;
for (size_t i = 0; i < ins.size(); i++) {
if (NeedSend(scope, ins[i])) {
VLOG(30) << "sending " << ins[i] << " to " << epmap[i] << " to get "
<< outs[i] << " back";
VLOG(3) << "sending " << ins[i] << " to " << epmap[i] << " to get "
<< outs[i] << " back";
rets.push_back(rpc_client->AsyncPrefetchVar(epmap[i], ctx, scope,
ins[i], outs[i]));
} else {
VLOG(30) << "don't send no-initialied variable: " << ins[i];
VLOG(3) << "don't send no-initialied variable: " << ins[i];
}
}
for (size_t i = 0; i < rets.size(); i++) {
......
......@@ -47,7 +47,7 @@ class RecvOp : public framework::OperatorBase {
std::vector<distributed::VarHandlePtr> rets;
for (size_t i = 0; i < outs.size(); i++) {
VLOG(30) << "getting " << outs[i] << " from " << epmap[i];
VLOG(3) << "getting " << outs[i] << " from " << epmap[i];
rets.push_back(rpc_client->AsyncGetVar(epmap[i], ctx, scope, outs[i]));
}
if (sync_mode) {
......
......@@ -42,12 +42,12 @@ class SendBarrierOp : public framework::OperatorBase {
distributed::RPCClient::GetInstance<RPCCLIENT_T>(
Attr<int>("trainer_id"));
VLOG(30) << "SendBarrierOp sync";
VLOG(3) << "SendBarrierOp sync";
// need to wait before sending send_barrier message
PADDLE_ENFORCE(rpc_client->Wait(), "internal error in RPCClient");
for (auto& ep : eps) {
VLOG(30) << "send barrier, ep: " << ep;
VLOG(3) << "send barrier, ep: " << ep;
rpc_client->AsyncSendBatchBarrier(ep);
}
PADDLE_ENFORCE(rpc_client->Wait(), "internal error in RPCClient");
......
......@@ -50,10 +50,10 @@ class SendOp : public framework::OperatorBase {
std::vector<distributed::VarHandlePtr> rets;
for (size_t i = 0; i < ins.size(); i++) {
if (NeedSend(scope, ins[i])) {
VLOG(30) << "sending " << ins[i] << " to " << epmap[i];
VLOG(3) << "sending " << ins[i] << " to " << epmap[i];
rets.push_back(rpc_client->AsyncSendVar(epmap[i], ctx, scope, ins[i]));
} else {
VLOG(30) << "don't send no-initialied variable: " << ins[i];
VLOG(3) << "don't send no-initialied variable: " << ins[i];
}
}
if (sync_send) {
......
......@@ -120,7 +120,7 @@ void AddOp(const std::string &type, const f::VariableNameMap &inputs,
void StartServerNet(bool is_sparse, std::atomic<bool> *initialized) {
f::Scope scope;
p::CPUPlace place;
VLOG(40) << "before init tensor";
VLOG(4) << "before init tensor";
if (is_sparse) {
InitSelectedRowsInScope(place, &scope);
} else {
......@@ -146,7 +146,7 @@ void StartServerNet(bool is_sparse, std::atomic<bool> *initialized) {
attrs.insert({"PrefetchBlock", prefetch_block});
attrs.insert({"grad_to_block_id", std::vector<std::string>({""})});
attrs.insert({"sync_mode", true});
VLOG(40) << "before init op";
VLOG(4) << "before init op";
listen_and_serv_op =
f::OpRegistry::CreateOp("listen_and_serv", {{"X", {"x1"}}}, {}, attrs);
*initialized = true;
......
......@@ -32,7 +32,7 @@ class SplitByrefOpKernel : public framework::OpKernel<T> {
for (size_t i = 0; i < outs.size(); ++i) {
// NOTE: no need to call mutable_data here to allocate memory.
auto* out = outs[i];
VLOG(30) << "spliting by ref: " << row_offset << " " << out->dims()[0];
VLOG(3) << "spliting by ref: " << row_offset << " " << out->dims()[0];
*out = in->Slice(row_offset, row_offset + out->dims()[0]);
row_offset += out->dims()[0];
}
......
......@@ -44,7 +44,7 @@ class SplitIdsOpKernel : public framework::OpKernel<T> {
for (size_t i = 0; i < ids_tensors.size(); ++i) {
batch_size += ids_tensors[i]->dims()[0];
}
VLOG(40) << "Get Total BatchSize is: " << batch_size;
VLOG(4) << "Get Total BatchSize is: " << batch_size;
std::vector<T> all_ids(batch_size);
int offset = 0;
......
......@@ -30,9 +30,9 @@ class LoDRankTableOp : public framework::OperatorBase {
auto x = scope.FindVar(Input("X"))->Get<framework::LoDTensor>();
auto *out =
scope.FindVar(Output("Out"))->GetMutable<framework::LoDRankTable>();
VLOG(100) << "Level = " << static_cast<size_t>(Attr<int>("level"));
VLOG(10) << "Level = " << static_cast<size_t>(Attr<int>("level"));
out->Reset(x.lod(), static_cast<size_t>(Attr<int>("level")));
VLOG(100) << Input("X") << "'s lod information is " << *out;
VLOG(10) << Input("X") << "'s lod information is " << *out;
}
};
......
......@@ -153,13 +153,13 @@ class LookupTableOpGradVarTypeInference : public framework::VarTypeInference {
auto attr = op_desc.GetAttr("is_sparse");
bool is_sparse = boost::get<bool>(attr);
if (is_sparse) {
VLOG(30) << "lookup_table_grad op " << framework::GradVarName("W")
<< " is set to SelectedRows";
VLOG(3) << "lookup_table_grad op " << framework::GradVarName("W")
<< " is set to SelectedRows";
block->Var(out_var_name)
->SetType(framework::proto::VarType::SELECTED_ROWS);
} else {
VLOG(30) << "lookup_table_grad op " << framework::GradVarName("W")
<< " is set to LoDTensor";
VLOG(3) << "lookup_table_grad op " << framework::GradVarName("W")
<< " is set to LoDTensor";
block->Var(out_var_name)->SetType(framework::proto::VarType::LOD_TENSOR);
}
block->Var(out_var_name)->SetDataType(block->Var("W")->GetDataType());
......
......@@ -96,8 +96,8 @@ void TestAndBench(const int n, std::function<void(const int, const T*, T*)> tgt,
}
auto et = GetCurrentUS();
VLOG(30) << "Vec size " << n << ": refer takes: " << (et - mt) / repeat
<< " us, tgt takes: " << (mt - st) / repeat;
VLOG(3) << "Vec size " << n << ": refer takes: " << (et - mt) / repeat
<< " us, tgt takes: " << (mt - st) / repeat;
for (int i = 0; i < n; ++i) {
EXPECT_NEAR(ytgt_data[i], yref_data[i], 1e-3);
}
......
......@@ -86,7 +86,7 @@ TEST(JitKernel, vrelu) {
vrelu_intri8(d, x_data, zref_data);
}
auto si1 = GetCurrentUS();
VLOG(30) << "Vec size 8 intr takes: " << (si1 - si0) / repeat << " us";
VLOG(3) << "Vec size 8 intr takes: " << (si1 - si0) / repeat << " us";
}
#endif
auto ttgts = GetCurrentUS();
......@@ -94,9 +94,8 @@ TEST(JitKernel, vrelu) {
ker->Compute(x_data, ztgt_data, d);
}
auto ttgte = GetCurrentUS();
VLOG(30) << "Vec size " << d
<< ": refer takes: " << (trefe - trefs) / repeat
<< " us, tgt takes: " << (ttgte - ttgts) / repeat << " us";
VLOG(3) << "Vec size " << d << ": refer takes: " << (trefe - trefs) / repeat
<< " us, tgt takes: " << (ttgte - ttgts) / repeat << " us";
for (int i = 0; i < d; ++i) {
EXPECT_NEAR(ztgt_data[i], zref_data[i], 1e-3);
}
......@@ -127,9 +126,8 @@ TEST(JitKernel, vaddbias) {
}
auto ttgte = GetCurrentUS();
VLOG(30) << "Vec size " << d
<< ": refer takes: " << (trefe - trefs) / repeat
<< " us, tgt takes: " << (ttgte - ttgts) / repeat << " us";
VLOG(3) << "Vec size " << d << ": refer takes: " << (trefe - trefs) / repeat
<< " us, tgt takes: " << (ttgte - ttgts) / repeat << " us";
for (int i = 0; i < d; ++i) {
EXPECT_NEAR(ztgt_data[i], zref_data[i], 1e-3);
}
......@@ -175,14 +173,14 @@ TEST(JitKernel, vexp) {
}
auto ttgte = GetCurrentUS();
VLOG(30) << "Vec size " << d
<< ": refer takes: " << (trefe - trefs) / repeat
VLOG(3) << "Vec size " << d << ": refer takes: " << (trefe - trefs) / repeat
#ifdef PADDLE_WITH_MKLML
<< " us, mkl takes: " << (tmkle - tmkls) / repeat << " us, "
<< " us, mkl takes: " << (tmkle - tmkls) / repeat << " us, "
#else
<< " us, "
<< " us, "
#endif
<< "tgt takes: " << (ttgte - ttgts) / repeat << " us";
<< "tgt takes: " << (ttgte - ttgts) / repeat << " us";
for (int i = 0; i < d; ++i) {
EXPECT_NEAR(ztgt_data[i], zref_data[i], 1e-3);
}
......@@ -235,10 +233,9 @@ TEST(JitKernel, vsigmoid) {
}
auto ttgte = GetCurrentUS();
VLOG(30) << "Vec size " << d
<< ": refer takes: " << (trefe - trefs) / repeat
<< " us, better(jit exp) takes: " << (tmkle - tmkls) / repeat
<< " us, tgt takes: " << (ttgte - ttgts) / repeat << " us";
VLOG(3) << "Vec size " << d << ": refer takes: " << (trefe - trefs) / repeat
<< " us, better(jit exp) takes: " << (tmkle - tmkls) / repeat
<< " us, tgt takes: " << (ttgte - ttgts) / repeat << " us";
for (int i = 0; i < d; ++i) {
EXPECT_NEAR(ztgt_data[i], zref_data[i], 1e-3);
}
......@@ -296,10 +293,9 @@ TEST(JitKernel, vtanh) {
}
auto ttgte = GetCurrentUS();
VLOG(30) << "Vec size " << d
<< ": refer takes: " << (trefe - trefs) / repeat
<< " us, better(jit exp) takes: " << (tmkle - tmkls) / repeat
<< " us, tgt takes: " << (ttgte - ttgts) / repeat << " us";
VLOG(3) << "Vec size " << d << ": refer takes: " << (trefe - trefs) / repeat
<< " us, better(jit exp) takes: " << (tmkle - tmkls) / repeat
<< " us, tgt takes: " << (ttgte - ttgts) / repeat << " us";
for (int i = 0; i < d; ++i) {
EXPECT_NEAR(ztgt_data[i], zref_data[i], 1e-3);
}
......@@ -397,10 +393,9 @@ TEST(JitKernel, lstm) {
ker->ComputeCtHt(&step, &attr);
}
auto ttgte = GetCurrentUS();
VLOG(30) << "Vec size " << d
<< ": refer takes: " << (trefe - trefs) / repeat
<< " us, better(jit) takes: " << (tmkle - tmkls) / repeat
<< " us, tgt takes: " << (ttgte - ttgts) / repeat << " us";
VLOG(3) << "Vec size " << d << ": refer takes: " << (trefe - trefs) / repeat
<< " us, better(jit) takes: " << (tmkle - tmkls) / repeat
<< " us, tgt takes: " << (ttgte - ttgts) / repeat << " us";
}
}
......@@ -473,8 +468,8 @@ TEST(JitKernel, vscal) {
vscal_inp_intri8(d, a, y_data);
}
auto si3 = GetCurrentUS();
VLOG(30) << "Vec size 8 intr takes: " << (si1 - si0) / repeat
<< " us, inplace: " << (si3 - si2) / repeat << " us";
VLOG(3) << "Vec size 8 intr takes: " << (si1 - si0) / repeat
<< " us, inplace: " << (si3 - si2) / repeat << " us";
}
#endif
......@@ -488,18 +483,15 @@ TEST(JitKernel, vscal) {
ker->Compute(&a, y_data, y_data, d);
}
auto ttgte1 = GetCurrentUS();
VLOG(30) << "Vec size " << d
<< ": refer takes: " << (trefe - trefs) / repeat
<< " us, inplace takes: " << (trefe1 - trefs1) / repeat
VLOG(3) << "Vec size " << d << ": refer takes: " << (trefe - trefs) / repeat
<< " us, inplace takes: " << (trefe1 - trefs1) / repeat
#ifdef PADDLE_WITH_MKLML
<< " us, mkl inplace takes: " << (tmkle - tmkls) / repeat
<< " us, "
<< " us, mkl inplace takes: " << (tmkle - tmkls) / repeat << " us, "
#else
<< " us, "
<< " us, "
#endif
<< "tgt takes: " << (ttgte - ttgts) / repeat
<< "us, tgt inplace takes: " << (ttgte1 - ttgts1) / repeat
<< " us";
<< "tgt takes: " << (ttgte - ttgts) / repeat
<< "us, tgt inplace takes: " << (ttgte1 - ttgts1) / repeat << " us";
for (int i = 0; i < d; ++i) {
EXPECT_NEAR(ztgt_data[i], zref_data[i], 1e-3);
}
......@@ -557,7 +549,7 @@ TEST(JitKernel, vmul) {
vmul_intri8(d, x_data, y_data, zref_data);
}
auto si1 = GetCurrentUS();
VLOG(30) << "Vec size 8 intr takes: " << (si1 - si0) / repeat;
VLOG(3) << "Vec size 8 intr takes: " << (si1 - si0) / repeat;
}
#endif
......@@ -567,14 +559,13 @@ TEST(JitKernel, vmul) {
}
auto ttgte = GetCurrentUS();
VLOG(30) << "Vec size " << d
<< ": refer takes: " << (trefe - trefs) / repeat
VLOG(3) << "Vec size " << d << ": refer takes: " << (trefe - trefs) / repeat
#ifdef PADDLE_WITH_MKLML
<< " us, mkl takes: " << (tmkle - tmkls) / repeat << " us, "
<< " us, mkl takes: " << (tmkle - tmkls) / repeat << " us, "
#else
<< " us, "
<< " us, "
#endif
<< "tgt takes: " << (ttgte - ttgts) / repeat << " us";
<< "tgt takes: " << (ttgte - ttgts) / repeat << " us";
for (int i = 0; i < d; ++i) {
EXPECT_NEAR(ztgt_data[i], zref_data[i], 1e-3);
}
......@@ -632,7 +623,7 @@ TEST(JitKernel, vadd) {
vadd_intri8(d, x_data, y_data, zref_data);
}
auto si1 = GetCurrentUS();
VLOG(30) << "Vec size 8 intr takes: " << (si1 - si0) / repeat;
VLOG(3) << "Vec size 8 intr takes: " << (si1 - si0) / repeat;
}
#endif
......@@ -642,14 +633,13 @@ TEST(JitKernel, vadd) {
}
auto ttgte = GetCurrentUS();
VLOG(30) << "Vec size " << d
<< ": refer takes: " << (trefe - trefs) / repeat
VLOG(3) << "Vec size " << d << ": refer takes: " << (trefe - trefs) / repeat
#ifdef PADDLE_WITH_MKLML
<< " us, mkl takes: " << (tmkle - tmkls) / repeat << " us, "
<< " us, mkl takes: " << (tmkle - tmkls) / repeat << " us, "
#else
<< " us, "
<< " us, "
#endif
<< "tgt takes: " << (ttgte - ttgts) / repeat << " us";
<< "tgt takes: " << (ttgte - ttgts) / repeat << " us";
for (int i = 0; i < d; ++i) {
EXPECT_NEAR(ztgt_data[i], zref_data[i], 1e-3);
}
......@@ -699,10 +689,9 @@ TEST(JitKernel, vaddrelu) {
ker->Compute(x_data, y_data, ztgt_data, d);
}
auto ttgte = GetCurrentUS();
VLOG(30) << "Vec size " << d
<< ": refer takes: " << (trefe - trefs) / repeat
<< " us, better takes: " << (tmkle - tmkls) / repeat << " us, "
<< "tgt takes: " << (ttgte - ttgts) / repeat << " us";
VLOG(3) << "Vec size " << d << ": refer takes: " << (trefe - trefs) / repeat
<< " us, better takes: " << (tmkle - tmkls) / repeat << " us, "
<< "tgt takes: " << (ttgte - ttgts) / repeat << " us";
for (int i = 0; i < d; ++i) {
EXPECT_NEAR(ztgt_data[i], zref_data[i], 1e-3);
}
......
......@@ -270,7 +270,7 @@ struct MergeAdd<platform::CPUDeviceContext, T> {
const std::vector<const framework::SelectedRows*>& inputs,
framework::SelectedRows* output) {
if (inputs.size() == 0) {
VLOG(30) << "no input! return";
VLOG(3) << "no input! return";
return;
}
const framework::SelectedRows* has_value_input = nullptr;
......@@ -281,7 +281,7 @@ struct MergeAdd<platform::CPUDeviceContext, T> {
}
}
if (has_value_input == nullptr) {
VLOG(30) << "no input has value! just return" << std::endl;
VLOG(3) << "no input has value! just return" << std::endl;
return;
}
auto input_width = has_value_input->value().dims()[1];
......
......@@ -314,7 +314,7 @@ struct MergeAdd<platform::CUDADeviceContext, T> {
const std::vector<const framework::SelectedRows*>& inputs,
framework::SelectedRows* output) {
if (inputs.size() == 0) {
VLOG(30) << "no input! return";
VLOG(3) << "no input! return";
return;
}
const framework::SelectedRows* has_value_input = nullptr;
......@@ -325,7 +325,7 @@ struct MergeAdd<platform::CUDADeviceContext, T> {
}
}
if (has_value_input == nullptr) {
VLOG(30) << "no input has value! just return" << std::endl;
VLOG(3) << "no input has value! just return" << std::endl;
return;
}
auto input_width = has_value_input->value().dims()[1];
......
......@@ -38,9 +38,9 @@ class MulOp : public framework::OperatorWithKernel {
int x_num_col_dims = ctx->Attrs().Get<int>("x_num_col_dims");
int y_num_col_dims = ctx->Attrs().Get<int>("y_num_col_dims");
VLOG(30) << "mul operator x.shape=" << x_dims << " y.shape=" << y_dims
<< " x_num_col_dims=" << x_num_col_dims
<< " y_num_col_dims=" << y_num_col_dims;
VLOG(3) << "mul operator x.shape=" << x_dims << " y.shape=" << y_dims
<< " x_num_col_dims=" << x_num_col_dims
<< " y_num_col_dims=" << y_num_col_dims;
PADDLE_ENFORCE_GT(
x_dims.size(), x_num_col_dims,
......
......@@ -63,16 +63,16 @@ class NCCLAllReduceKernel : public framework::OpKernel<T> {
// device id
int gpu_id = boost::get<platform::CUDAPlace>(ctx.GetPlace()).GetDeviceId();
int idx = comm->GetCommId(gpu_id);
VLOG(30) << "gpu : "
<< " invoke allreduce. send " << x->numel() << " recv "
<< out->numel();
VLOG(3) << "gpu : "
<< " invoke allreduce. send " << x->numel() << " recv "
<< out->numel();
PADDLE_ENFORCE(platform::dynload::ncclAllReduce(
x->data<T>(), out->mutable_data<T>(ctx.GetPlace()), out->numel(),
NCCLTypeWrapper<T>::type, reduction_op_, comm->comms().at(idx),
ctx.cuda_device_context().stream()));
VLOG(30) << "gpu : "
<< " finished allreduce. send " << x->numel() << " recv "
<< out->numel();
VLOG(3) << "gpu : "
<< " finished allreduce. send " << x->numel() << " recv "
<< out->numel();
}
};
......@@ -109,14 +109,14 @@ class NCCLReduceKernel : public framework::OpKernel<T> {
} else {
out->Resize(framework::make_ddim({0}));
}
VLOG(30) << "gpu : " << gpu_id << " invoke reduce. send " << x->numel()
<< " recv " << out->numel();
VLOG(3) << "gpu : " << gpu_id << " invoke reduce. send " << x->numel()
<< " recv " << out->numel();
PADDLE_ENFORCE(platform::dynload::ncclReduce(
x->data<T>(), recvbuffer, x->numel(), NCCLTypeWrapper<T>::type,
reduction_op_, root, comm->comms().at(idx),
ctx.cuda_device_context().stream()));
VLOG(30) << "gpu : " << gpu_id << " finished reduce. send " << x->numel()
<< " recv " << out->numel();
VLOG(3) << "gpu : " << gpu_id << " finished reduce. send " << x->numel()
<< " recv " << out->numel();
}
};
......@@ -133,22 +133,21 @@ class NCCLBcastKernel : public framework::OpKernel<T> {
int idx = comm->GetCommId(gpu_id);
if (idx == root) {
auto* x = ctx.Input<LoDTensor>("X");
VLOG(30) << "gpu : " << gpu_id << " invoke Bcast. send " << x->numel();
VLOG(3) << "gpu : " << gpu_id << " invoke Bcast. send " << x->numel();
PADDLE_ENFORCE(platform::dynload::ncclBcast(
reinterpret_cast<void*>(const_cast<T*>(x->data<T>())), x->numel(),
NCCLTypeWrapper<T>::type, root, comm->comms().at(idx),
ctx.cuda_device_context().stream()));
VLOG(30) << "gpu : " << gpu_id << " finished Bcast.";
VLOG(3) << "gpu : " << gpu_id << " finished Bcast.";
} else {
auto* out = ctx.Output<LoDTensor>("Out");
VLOG(30) << "gpu : " << gpu_id << " invoke Bcast. recv buffer "
<< framework::product(out->dims());
VLOG(3) << "gpu : " << gpu_id << " invoke Bcast. recv buffer "
<< framework::product(out->dims());
PADDLE_ENFORCE(platform::dynload::ncclBcast(
out->mutable_data<T>(ctx.GetPlace()), out->numel(),
NCCLTypeWrapper<T>::type, root, comm->comms().at(idx),
ctx.cuda_device_context().stream()));
VLOG(30) << "gpu : " << gpu_id << " finished Bcast. recv "
<< out->numel();
VLOG(3) << "gpu : " << gpu_id << " finished Bcast. recv " << out->numel();
}
}
};
......
......@@ -86,9 +86,9 @@ class NCCLTester : public ::testing::Test {
(*p_scopes).resize(gpu_list_.size());
auto op = f::OpRegistry::CreateOp(*op1);
VLOG(10) << "invoke NCCLInitOp.";
VLOG(1) << "invoke NCCLInitOp.";
op->Run(g_scope_, cpu_place);
VLOG(10) << "NCCLInitOp finished.";
VLOG(1) << "NCCLInitOp finished.";
}
int GetGPUData(int gpu_id) { return gpu_id + 42; }
......@@ -109,7 +109,7 @@ class NCCLTester : public ::testing::Test {
std::vector<T> send_vector(f::product(kDims), GetGPUData(gpu_id));
paddle::framework::TensorFromVector<T>(send_vector, *ctx, send_tensor);
VLOG(10) << "Send Tensor filled with elements " << send_tensor->numel();
VLOG(1) << "Send Tensor filled with elements " << send_tensor->numel();
}
lk.unlock();
......@@ -119,11 +119,11 @@ class NCCLTester : public ::testing::Test {
auto op = f::OpRegistry::CreateOp(*op1);
VLOG(10) << "Device : " << gpu_id << " invoke " << op_desc.Type();
VLOG(10) << " send_tensor : " << send_tensor->numel()
<< " recv_tensor : " << recv_tensor->numel();
VLOG(1) << "Device : " << gpu_id << " invoke " << op_desc.Type();
VLOG(1) << " send_tensor : " << send_tensor->numel()
<< " recv_tensor : " << recv_tensor->numel();
op->Run(*scope, place);
VLOG(10) << "Device : " << gpu_id << " finished " << op_desc.Type();
VLOG(1) << "Device : " << gpu_id << " finished " << op_desc.Type();
}
public:
......
......@@ -162,9 +162,9 @@ class NCEOpMaker : public framework::OpProtoAndCheckerMaker {
"user should avoid setting this attribute.")
.SetDefault({});
AddComment(R"DOC(
Compute and return the noise-contrastive estimation training loss. See
`Noise-contrastive estimation: A new estimation principle for unnormalized
statistical models
Compute and return the noise-contrastive estimation training loss. See
`Noise-contrastive estimation: A new estimation principle for unnormalized
statistical models
<http://www.jmlr.org/proceedings/papers/v9/gutmann10a/gutmann10a.pdf>`_.
By default this operator uses a uniform distribution for sampling.
)DOC");
......@@ -230,14 +230,14 @@ class NCEOpGradVarTypeInference : public framework::VarTypeInference {
auto attr = op_desc.GetAttr("is_sparse");
bool is_sparse = boost::get<bool>(attr);
if (is_sparse) {
VLOG(30) << "nce_op_grad op " << weight_grad << " and " << bias_grad
<< " is set to SelectedRows";
VLOG(3) << "nce_op_grad op " << weight_grad << " and " << bias_grad
<< " is set to SelectedRows";
block->Var(weight_grad)
->SetType(framework::proto::VarType::SELECTED_ROWS);
block->Var(bias_grad)->SetType(framework::proto::VarType::SELECTED_ROWS);
} else {
VLOG(30) << "nce_op_grad op " << weight_grad << " and " << bias_grad
<< " is set to LoDTensor";
VLOG(3) << "nce_op_grad op " << weight_grad << " and " << bias_grad
<< " is set to LoDTensor";
block->Var(weight_grad)->SetType(framework::proto::VarType::LOD_TENSOR);
block->Var(bias_grad)->SetType(framework::proto::VarType::LOD_TENSOR);
}
......
......@@ -297,7 +297,7 @@ class AdamOpKernel : public framework::OpKernel<T> {
auto& grad =
Ref(ctx.Input<framework::SelectedRows>("Grad"), "Must set Grad");
if (grad.rows().size() == 0) {
VLOG(30) << "grad row size is 0!!";
VLOG(3) << "grad row size is 0!!";
return;
}
......
......@@ -346,7 +346,7 @@ class MomentumOpKernel : public framework::OpKernel<T> {
// sparse update maybe empty.
if (grad->rows().size() == 0) {
VLOG(30) << "Grad SelectedRows contains no data!";
VLOG(3) << "Grad SelectedRows contains no data!";
return;
}
auto* merged_grad = const_cast<framework::Scope&>(ctx.scope())
......
......@@ -98,10 +98,10 @@ class SGDOpKernel : public framework::OpKernel<T> {
auto param_row_width = param.value().dims()[1];
auto grad_row_width = grad.value().dims()[1];
VLOG(40) << " param rows: " << param.rows().size()
<< " param memory rows: " << param.value().dims()[0]
<< " grad rows: " << grad.rows().size()
<< " grad memory rows: " << grad.value().dims()[0];
VLOG(4) << " param rows: " << param.rows().size()
<< " param memory rows: " << param.value().dims()[0]
<< " grad rows: " << grad.rows().size()
<< " grad memory rows: " << grad.value().dims()[0];
PADDLE_ENFORCE_EQ(param_row_width, grad_row_width,
"param_row should have the same size with grad_row");
......
......@@ -155,8 +155,8 @@ class RandomCropKernel : public framework::OpKernel<T> {
seed = *cpu_seed.data<int64_t>();
}
} else {
VLOG(50) << "WARNING: The input 'Seed' is not initialized, use attribute "
"'startup_seed' instead.";
VLOG(5) << "WARNING: The input 'Seed' is not initialized, use attribute "
"'startup_seed' instead.";
seed = ctx.Attr<int>("startup_seed");
}
auto shape = ctx.Attr<std::vector<int>>("shape");
......
......@@ -42,7 +42,7 @@ class BlockingQueue {
std::unique_lock<std::mutex> lock(mutex_);
send_cv_.wait(lock, [&] { return queue_.size() < capacity_ || closed_; });
if (closed_) {
VLOG(50)
VLOG(5)
<< "WARNING: Sending an element to a closed reader::BlokcingQueue.";
return false;
}
......@@ -56,7 +56,7 @@ class BlockingQueue {
std::unique_lock<std::mutex> lock(mutex_);
send_cv_.wait(lock, [&] { return queue_.size() < capacity_ || closed_; });
if (closed_) {
VLOG(50)
VLOG(5)
<< "WARNING: Sending an element to a closed reader::BlokcingQueue.";
return false;
}
......
......@@ -26,7 +26,7 @@ class ShuffleReader : public framework::DecoratedReader {
ShuffleReader(const std::shared_ptr<ReaderBase>& reader, size_t buffer_size,
size_t seed = 0)
: DecoratedReader(reader), buffer_size_(buffer_size), seed_(seed) {
VLOG(100) << "Create shuffle reader of " << reader_;
VLOG(10) << "Create shuffle reader of " << reader_;
if (seed_ == 0) {
std::random_device device;
seed_ = device();
......@@ -37,7 +37,7 @@ class ShuffleReader : public framework::DecoratedReader {
void ReadNextImpl(std::vector<framework::LoDTensor>* out) override {
out->clear();
if (iteration_pos_ >= buffer_.size()) {
VLOG(100) << "Resetting shuffle buffer";
VLOG(10) << "Resetting shuffle buffer";
ReloadBuffer();
if (buffer_.empty()) {
return;
......@@ -73,7 +73,7 @@ class ShuffleReader : public framework::DecoratedReader {
std::mt19937 g(seed_);
std::shuffle(buffer_.begin(), buffer_.end(), g);
seed_ = g(); // update seed_;
VLOG(100) << "random buffer size = " << buffer_.size();
VLOG(10) << "random buffer size = " << buffer_.size();
}
size_t buffer_size_;
......
......@@ -160,7 +160,7 @@ class RecurrentBase : public framework::OperatorBase {
Callback callback) {
PADDLE_ENFORCE_EQ(src_vars.size(), dst_vars.size());
for (size_t i = 0; i < dst_vars.size(); ++i) {
VLOG(100) << "Link " << src_vars[i] << " to " << dst_vars[i];
VLOG(10) << "Link " << src_vars[i] << " to " << dst_vars[i];
AccessTensor(src_scope, src_vars[i], dst_scope, dst_vars[i], callback);
}
}
......@@ -176,7 +176,7 @@ class RecurrentBase : public framework::OperatorBase {
Callback callback) {
PADDLE_ENFORCE_EQ(src_vars.size(), dst_vars.size());
for (size_t i = 0; i < dst_vars.size(); ++i) {
VLOG(100) << "Link " << src_vars[i] << " to " << dst_vars[i];
VLOG(10) << "Link " << src_vars[i] << " to " << dst_vars[i];
AccessTensor(src_scope, src_vars[i], dst_scope, dst_vars[i], callback);
}
}
......@@ -230,7 +230,7 @@ class RecurrentOp : public RecurrentBase {
void RunImpl(const framework::Scope &scope,
const platform::Place &place) const override {
auto seq_len = static_cast<size_t>(this->GetSequenceLength(scope));
VLOG(30) << "Static RNN input sequence length = " << seq_len;
VLOG(3) << "Static RNN input sequence length = " << seq_len;
StepScopes scopes = CreateStepScopes(scope, seq_len);
auto reverse = Attr<bool>(kReverse);
......@@ -241,7 +241,7 @@ class RecurrentOp : public RecurrentBase {
for (size_t i = 0; i < seq_len; ++i) {
size_t seq_offset = reverse ? seq_len - i - 1 : i;
VLOG(30) << "Recurrent operate at the time step " << seq_offset;
VLOG(3) << "Recurrent operate at the time step " << seq_offset;
auto &cur_scope = scopes.CurScope();
......@@ -334,7 +334,7 @@ class RecurrentGradOp : public RecurrentBase {
for (size_t step_id = 0; step_id < seq_len; ++step_id) {
size_t seq_offset = reverse ? step_id : seq_len - step_id - 1;
VLOG(30) << "Recurrent backward operate at the time step " << seq_offset;
VLOG(3) << "Recurrent backward operate at the time step " << seq_offset;
auto &cur_scope = scopes.CurScope();
// Link outside::output_grads --> inside::output_grads
// inside::output_grad = outside::output_grad[seq_offset:seq_offset+1]
......@@ -348,11 +348,11 @@ class RecurrentGradOp : public RecurrentBase {
});
auto og_set = List2Set(Inputs(kOutputGrads));
if (VLOG_IS_ON(100)) {
if (VLOG_IS_ON(10)) {
std::ostringstream sout;
std::copy(og_set.begin(), og_set.end(),
std::ostream_iterator<std::string>(sout, ","));
VLOG(100) << " RNN output gradients = [" << sout.str() << "]";
VLOG(10) << " RNN output gradients = [" << sout.str() << "]";
}
// Link states
......@@ -374,7 +374,7 @@ class RecurrentGradOp : public RecurrentBase {
auto &ex_tensor =
ex_scope.FindVar(ex_grad)->Get<framework::LoDTensor>();
VLOG(100) << " RNN link " << cur_grad << " from " << ex_grad;
VLOG(10) << " RNN link " << cur_grad << " from " << ex_grad;
auto *cur_grad_var = cur_scope.Var(cur_grad);
auto cur_grad_tensor =
cur_grad_var->GetMutable<framework::LoDTensor>();
......@@ -382,12 +382,12 @@ class RecurrentGradOp : public RecurrentBase {
}
}
VLOG(50) << "Recurrent memory linking finished ";
VLOG(5) << "Recurrent memory linking finished ";
// Run step block with cur_scope
executor.Run(*program, &cur_scope, block->ID(),
false /*create_local_scope*/);
VLOG(50) << "executor.Run finished ";
VLOG(5) << "executor.Run finished ";
auto local_var_names = LocalVarNames(cur_scope);
......@@ -436,7 +436,7 @@ class RecurrentGradOp : public RecurrentBase {
cur_scope.Rename(new_inside_name, inside_grad_name);
}
}
VLOG(50) << "Accumulate Parameter finished ";
VLOG(5) << "Accumulate Parameter finished ";
// Copy input gradient from inside to outside
// outside::input_grad[seq_offset: seq_offset + 1] = inside::input_grad
......@@ -455,7 +455,7 @@ class RecurrentGradOp : public RecurrentBase {
auto dst = outside->Slice(seq_offset, seq_offset + 1);
framework::TensorCopy(inside, place, dev_ctx, &dst);
});
VLOG(50) << "Link outside gradient finished ";
VLOG(5) << "Link outside gradient finished ";
if (step_id + 1 == seq_len) { // at_end
// copy initialize states gradient from inside to outside
......@@ -468,7 +468,7 @@ class RecurrentGradOp : public RecurrentBase {
outside->mutable_data(place, inside.type());
framework::TensorCopy(inside, place, dev_ctx, outside);
});
VLOG(50) << "Link initialize state gradient finished ";
VLOG(5) << "Link initialize state gradient finished ";
}
scopes.Next();
}
......
......@@ -93,7 +93,7 @@ class RNNMemoryHelperGradOp : public framework::OperatorBase {
in_grad_var_name);
if (out_grad_var == nullptr) {
VLOG(50) << "Using fill constant 0 as starting gradient";
VLOG(5) << "Using fill constant 0 as starting gradient";
auto in_var_name = Input("X");
auto *in_var = scope.FindVar(in_var_name);
auto &in_var_tensor = in_var->Get<framework::LoDTensor>();
......
......@@ -110,7 +110,7 @@ class SaveOp : public framework::OperatorBase {
lt_var != nullptr,
"Can not find variable kLookupTablePath for SaveSelectedRows");
std::string filename = lt_var->data();
VLOG(40) << "SaveSelectedRows get File name: " << filename;
VLOG(4) << "SaveSelectedRows get File name: " << filename;
MkDirRecursively(DirName(filename).c_str());
......
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册