提交 0c3227a5 编写于 作者: M minqiyang

Change the origin VLOG level to 10 times

Fix code to support cpplint syntax check

test=develop
上级 5b7a9dd7
......@@ -18,8 +18,8 @@ namespace framework {
void TransDataDevice(const Tensor &in, const platform::Place &dst_place,
Tensor *out) {
VLOG(3) << "DeviceTransform in, src_place " << in.place()
<< " dst_place: " << dst_place;
VLOG(30) << "DeviceTransform in, src_place " << in.place()
<< " dst_place: " << dst_place;
PADDLE_ENFORCE_NE(
in.place().which(), dst_place.which(),
......
......@@ -49,10 +49,10 @@ class TestOpWithKernel : public OperatorWithKernel {
OpKernelType GetExpectedKernelType(
const ExecutionContext& ctx) const override {
if (Attr<bool>("use_gpu")) {
VLOG(3) << "force use gpu kernel";
VLOG(30) << "force use gpu kernel";
return OpKernelType(proto::VarType::FP32, platform::CUDAPlace(0));
} else {
VLOG(3) << "use default kernel";
VLOG(30) << "use default kernel";
return OpKernelType(proto::VarType::FP32,
ctx.Input<Tensor>("input")->place());
}
......@@ -148,7 +148,7 @@ TEST(Operator, CPUtoGPU) {
// get output
auto* output2 = scope.Var("OUT2");
gpu_op->Run(scope, cuda_place);
VLOG(3) << "after gpu_op run";
VLOG(30) << "after gpu_op run";
// auto* output2_ptr = output2->Get<LoDTensor>().data<float>();
paddle::platform::DeviceContextPool& pool =
......
......@@ -60,7 +60,7 @@ void BroadcastOpHandle::BroadcastOneVar(
PADDLE_ENFORCE_NOT_NULL(in_var);
Tensor &in_tensor = VariableVisitor::GetMutableTensor(in_var);
if (UNLIKELY(!in_tensor.IsInitialized())) {
VLOG(3) << "in var " << in_var_handle.name_ << "not inited, return!";
VLOG(30) << "in var " << in_var_handle.name_ << "not inited, return!";
return;
}
......
......@@ -44,8 +44,8 @@ std::unique_ptr<ir::Graph> ModifyOpLockAndRecordEventPass::ApplyImpl(
IsLockAndRecordEventFreeComputationOpHandle(compute_op, graph_view);
compute_op->SetLockAndRecordEventFree(is_lock_and_record_event_free);
if (is_lock_and_record_event_free) {
VLOG(10) << "Set is_lock_and_record_event_free be true in op "
<< compute_op->DebugString();
VLOG(100) << "Set is_lock_and_record_event_free be true in op "
<< compute_op->DebugString();
}
}
return ir_graph;
......
......@@ -392,7 +392,7 @@ std::unique_ptr<ir::Graph> MultiDevSSAGraphBuilder::ApplyImpl(
for (size_t i = 0; i < backward_vars.size(); i += 2) {
auto &p_name = backward_vars[i];
auto &g_name = backward_vars[i + 1];
VLOG(10) << "Bcast " << g_name << " for parameter " << p_name;
VLOG(100) << "Bcast " << g_name << " for parameter " << p_name;
switch (strategy_.reduce_) {
case BuildStrategy::ReduceStrategy::kReduce:
......@@ -794,8 +794,8 @@ int MultiDevSSAGraphBuilder::CreateRPCOp(ir::Graph *result,
node->Op()->GetAttr(OpProtoAndCheckerMaker::OpRoleVarAttrName()));
PADDLE_ENFORCE_EQ(send_param_grad.size(), 2U);
op_dev_id = GetAppropriateDeviceID({send_param_grad[1]});
VLOG(10) << "send grad " << input_var_names[0] << " origin "
<< send_param_grad[1] << " place: " << op_dev_id;
VLOG(100) << "send grad " << input_var_names[0] << " origin "
<< send_param_grad[1] << " place: " << op_dev_id;
for (auto &varname : input_var_names) {
result->Get<ShardedVarDevice>(kShardedVarDevice)
.emplace(varname, op_dev_id);
......@@ -812,9 +812,9 @@ int MultiDevSSAGraphBuilder::CreateRPCOp(ir::Graph *result,
node->Op()->GetAttr(OpProtoAndCheckerMaker::OpRoleVarAttrName()));
if (recv_param_grad.size() == 2U) {
op_dev_id = GetVarDeviceID(*result, recv_param_grad[1]);
VLOG(10) << "recv param " << recv_param_grad[0]
<< " get grad place: " << recv_param_grad[1]
<< " place: " << op_dev_id;
VLOG(100) << "recv param " << recv_param_grad[0]
<< " get grad place: " << recv_param_grad[1]
<< " place: " << op_dev_id;
} else {
op_dev_id = GetAppropriateDeviceID(output_var_names);
}
......
......@@ -141,8 +141,8 @@ std::unique_ptr<ir::Graph> ReferenceCountPass::ApplyImpl(
if (next_compute_op != nullptr) {
if (compute_ref_cnt_map.count(next_compute_op)) {
compute_ref_cnt_map[next_compute_op]->AddVar(var_name);
VLOG(5) << "Add reference count of " << var_name << " to Operator "
<< next_compute_op->Name();
VLOG(50) << "Add reference count of " << var_name << " to Operator "
<< next_compute_op->Name();
} else {
// Create new reference_count_op_handle
ir::Node *ref_cnt_node = graph->CreateEmptyNode(
......
......@@ -51,7 +51,7 @@ void ScaleLossGradOpHandle::RunImpl() {
->stream();
memory::Copy(boost::get<platform::CUDAPlace>(place_), tmp,
platform::CPUPlace(), &coeff_, sizeof(float), stream);
VLOG(10) << place_ << "RUN Scale loss grad op";
VLOG(100) << place_ << "RUN Scale loss grad op";
});
#endif
}
......
......@@ -94,8 +94,8 @@ std::unique_ptr<ir::Graph> SequentialExecutionPass::ApplyImpl(
op_node_list[i - 1]->outputs.push_back(dep_var);
dep_var->outputs.push_back(op_node_list[i]);
dep_var->inputs.push_back(op_node_list[i - 1]);
VLOG(10) << "Add dependencies between " << op_node_list[i - 1]->Name()
<< " and " << op_node_list[i]->Name();
VLOG(100) << "Add dependencies between " << op_node_list[i - 1]->Name()
<< " and " << op_node_list[i]->Name();
}
return graph;
}
......
......@@ -208,16 +208,16 @@ void ThreadedSSAGraphExecutor::RunOp(
details::OpHandleBase *op) {
auto op_run = [ready_var_q, op, this] {
try {
if (VLOG_IS_ON(10)) {
VLOG(10) << op << " " << op->Name() << " : " << op->DebugString();
if (VLOG_IS_ON(100)) {
VLOG(100) << op << " " << op->Name() << " : " << op->DebugString();
}
if (LIKELY(!strategy_.dry_run_)) {
op->Run(strategy_.use_cuda_);
}
VLOG(10) << op << " " << op->Name() << " Done ";
VLOG(100) << op << " " << op->Name() << " Done ";
running_ops_--;
ready_var_q->Extend(op->Outputs());
VLOG(10) << op << " " << op->Name() << "Signal posted";
VLOG(100) << op << " " << op->Name() << "Signal posted";
} catch (...) {
exception_holder_.Catch(std::current_exception());
}
......
......@@ -43,7 +43,7 @@ ExecutorPrepareContext::ExecutorPrepareContext(
}
ExecutorPrepareContext::~ExecutorPrepareContext() {
VLOG(5) << "destroy ExecutorPrepareContext";
VLOG(50) << "destroy ExecutorPrepareContext";
}
template <typename RefCntMap>
......@@ -60,7 +60,7 @@ static void DeleteUnusedTensors(const Scope& scope, const OperatorBase* op,
if ((it->second)-- == 1) {
auto* var = scope.FindVar(name);
if (var != nullptr) {
VLOG(10) << "Erase tensor \'" << name << "\'";
VLOG(100) << "Erase tensor \'" << name << "\'";
if (var->IsType<LoDTensor>()) {
erase_tensors.insert(var->GetMutable<LoDTensor>());
} else if (var->IsType<SelectedRows>()) {
......@@ -141,21 +141,21 @@ void Executor::CreateVariables(const ProgramDesc& pdesc, Scope* scope,
if (var->Persistable()) {
auto* ptr = const_cast<Scope*>(ancestor_scope)->Var(var->Name());
InitializeVariable(ptr, var->GetType());
VLOG(3) << "Create Variable " << var->Name()
<< " global, which pointer is " << ptr;
VLOG(30) << "Create Variable " << var->Name()
<< " global, which pointer is " << ptr;
} else {
auto* ptr = scope->Var(var->Name());
InitializeVariable(ptr, var->GetType());
VLOG(3) << "Create Variable " << var->Name()
<< " locally, which pointer is " << ptr;
VLOG(30) << "Create Variable " << var->Name()
<< " locally, which pointer is " << ptr;
}
}
} else {
for (auto& var : global_block.AllVars()) {
auto* ptr = scope->Var(var->Name());
InitializeVariable(ptr, var->GetType());
VLOG(3) << "Create variable " << var->Name() << ", which pointer is "
<< ptr;
VLOG(30) << "Create variable " << var->Name() << ", which pointer is "
<< ptr;
}
}
}
......@@ -286,7 +286,7 @@ void Executor::Run(const ProgramDesc& program, Scope* scope,
int i = 0;
for (auto& feed_target : (*feed_targets)) {
std::string var_name = feed_target.first;
VLOG(3) << "feed target's name: " << var_name;
VLOG(30) << "feed target's name: " << var_name;
// prepend feed op
auto* op = global_block->PrependOp();
......@@ -309,7 +309,7 @@ void Executor::Run(const ProgramDesc& program, Scope* scope,
int i = 0;
for (auto& fetch_target : (*fetch_targets)) {
std::string var_name = fetch_target.first;
VLOG(3) << "fetch target's name: " << var_name;
VLOG(30) << "fetch target's name: " << var_name;
// append fetch op
auto* op = global_block->AppendOp();
......@@ -398,8 +398,8 @@ void Executor::RunPreparedContext(ExecutorPrepareContext* ctx, Scope* scope,
}
if (FLAGS_benchmark) {
VLOG(2) << "Memory used after operator " + op->Type() + " running: "
<< memory::memory_usage(place_);
VLOG(20) << "Memory used after operator " + op->Type() + " running: "
<< memory::memory_usage(place_);
}
}
......@@ -424,10 +424,10 @@ void Executor::RunPreparedContext(ExecutorPrepareContext* ctx, Scope* scope,
}
if (FLAGS_benchmark) {
VLOG(2) << "-------------------------------------------------------";
VLOG(2) << "Memory used after deleting local scope: "
<< memory::memory_usage(place_);
VLOG(2) << "-------------------------------------------------------";
VLOG(20) << "-------------------------------------------------------";
VLOG(20) << "Memory used after deleting local scope: "
<< memory::memory_usage(place_);
VLOG(20) << "-------------------------------------------------------";
}
}
......@@ -471,7 +471,7 @@ void Executor::RunPreparedContext(
void Executor::EnableMKLDNN(const ProgramDesc& program) {
#ifdef PADDLE_WITH_MKLDNN
VLOG(3) << "use_mkldnn=True";
VLOG(30) << "use_mkldnn=True";
for (size_t bid = 0; bid < program.Size(); ++bid) {
auto* block = const_cast<ProgramDesc&>(program).MutableBlock(bid);
for (auto* op : block->AllOps()) {
......
......@@ -25,7 +25,7 @@ void SetFeedVariable(Scope* scope, const LoDTensor& input,
const std::string& var_name, size_t index) {
// If var_name Variable is not found in GlobalScope, a new variable will
// be created.
VLOG(3) << "SetFeedVariable name=" << var_name << " index=" << index;
VLOG(30) << "SetFeedVariable name=" << var_name << " index=" << index;
Variable* g_feed_value = scope->Var(var_name);
auto& feed_inputs = *(g_feed_value->GetMutable<FeedFetchList>());
if (index >= feed_inputs.size()) {
......@@ -47,8 +47,8 @@ LoDTensor& GetFetchVariable(const Scope& scope, const std::string& var_name,
typeid(FeedFetchList).name());
auto& fetch_outputs = *g_fetch_value->GetMutable<FeedFetchList>();
auto& tensor = fetch_outputs[index];
VLOG(3) << "Fetch " << var_name << " with index " << index
<< " shape= " << tensor.dims();
VLOG(30) << "Fetch " << var_name << " with index " << index
<< " shape= " << tensor.dims();
PADDLE_ENFORCE_LT(index, fetch_outputs.size());
return tensor;
}
......
......@@ -147,19 +147,19 @@ void PrepareParameters(Graph* graph, const Param& param) {
scope->Var(param.LSTMX)->GetMutable<LoDTensor>();
scope->Var(param.LSTMOUT)->GetMutable<LoDTensor>();
#define GATE_W(name__) \
auto* W_##name__##_w0 = scope->FindVar(#name__ ".w_0"); \
auto* W_##name__##_w1 = scope->FindVar(#name__ ".w_1"); \
auto* W_##name__##_b0 = scope->FindVar(#name__ ".b_0"); \
CHECK_P3(W_##name__##_w0, W_##name__##_w1, W_##name__##_b0); \
VLOG(4) << #name__ "_w0" \
<< " shape: " << W_##name__##_w0->Get<LoDTensor>().dims(); \
VLOG(4) << #name__ "_w1" \
<< " shape: " << W_##name__##_w1->Get<LoDTensor>().dims(); \
VLOG(4) << #name__ "_b0" \
<< " shape: " << W_##name__##_b0->Get<LoDTensor>().dims(); \
auto& W_##name__##_w0_t = W_##name__##_w0->Get<LoDTensor>(); \
auto& W_##name__##_w1_t = W_##name__##_w1->Get<LoDTensor>(); \
#define GATE_W(name__) \
auto* W_##name__##_w0 = scope->FindVar(#name__ ".w_0"); \
auto* W_##name__##_w1 = scope->FindVar(#name__ ".w_1"); \
auto* W_##name__##_b0 = scope->FindVar(#name__ ".b_0"); \
CHECK_P3(W_##name__##_w0, W_##name__##_w1, W_##name__##_b0); \
VLOG(40) << #name__ "_w0" \
<< " shape: " << W_##name__##_w0->Get<LoDTensor>().dims(); \
VLOG(40) << #name__ "_w1" \
<< " shape: " << W_##name__##_w1->Get<LoDTensor>().dims(); \
VLOG(40) << #name__ "_b0" \
<< " shape: " << W_##name__##_b0->Get<LoDTensor>().dims(); \
auto& W_##name__##_w0_t = W_##name__##_w0->Get<LoDTensor>(); \
auto& W_##name__##_w1_t = W_##name__##_w1->Get<LoDTensor>(); \
auto& W_##name__##_b0_t = W_##name__##_b0->Get<LoDTensor>();
GATE_W(forget);
......@@ -208,7 +208,7 @@ void PrepareLSTMWeight(const LoDTensor& W_forget_w0,
int D = W_forget_w0.dims()[0];
int M = W_forget_w1.dims()[0];
out->Resize(make_ddim({D + M, 4 * D}));
VLOG(3) << "LSTMWeight resized to " << out->dims();
VLOG(30) << "LSTMWeight resized to " << out->dims();
float* out_data = out->mutable_data<float>(platform::CPUPlace());
std::array<const float*, 4> tensors(
......
......@@ -57,7 +57,7 @@ std::unique_ptr<ir::Graph> ConvBiasFusePass::ApplyImpl(
int found_conv_bias_count = 0;
auto handler = [&](const GraphPatternDetector::subgraph_t& subgraph,
Graph* g) {
VLOG(4) << "handle ConvBias fuse";
VLOG(40) << "handle ConvBias fuse";
GET_IR_NODE_FROM_SUBGRAPH(conv_weight, conv_weight,
conv_bias_pattern); // Filter
GET_IR_NODE_FROM_SUBGRAPH(conv_out, conv_out, conv_bias_pattern); // tmp
......@@ -74,7 +74,7 @@ std::unique_ptr<ir::Graph> ConvBiasFusePass::ApplyImpl(
// check if fuse can be done and if MKL-DNN should be used
FuseOptions fuse_option = FindFuseOption(*conv, *eltwise);
if (fuse_option == DO_NOT_FUSE || fuse_option == FUSE_NATIVE) {
VLOG(3) << "do not perform conv+bias fuse";
VLOG(30) << "do not perform conv+bias fuse";
return;
}
......
......@@ -121,7 +121,7 @@ std::unique_ptr<ir::Graph> ConvBNFusePass::ApplyImpl(
int found_conv_bn_count = 0;
auto handler = [&](const GraphPatternDetector::subgraph_t& subgraph,
Graph* g) {
VLOG(4) << "handle ConvBN fuse";
VLOG(40) << "handle ConvBN fuse";
// conv, batch_norm,
// conv_weight, conv_out,
......@@ -133,7 +133,7 @@ std::unique_ptr<ir::Graph> ConvBNFusePass::ApplyImpl(
// check if fuse can be done and if MKL-DNN should be used
FuseOptions fuse_option = FindFuseOption(*conv, *batch_norm);
if (fuse_option == DO_NOT_FUSE) {
VLOG(3) << "do not perform conv+bn fuse";
VLOG(30) << "do not perform conv+bn fuse";
return;
}
......@@ -241,7 +241,7 @@ std::unique_ptr<ir::Graph> ConvEltwiseAddBNFusePass::ApplyImpl(
int found_conv_bn_count = 0;
auto handler = [&](const GraphPatternDetector::subgraph_t& subgraph,
Graph* g) {
VLOG(4) << "handle ConvBN fuse";
VLOG(40) << "handle ConvBN fuse";
// conv, batch_norm,
// conv_weight, conv_out,
......
......@@ -38,7 +38,7 @@ std::unique_ptr<ir::Graph> ConvReLUFusePass::ApplyImpl(
int found_conv_relu_count = 0;
auto handler = [&](const GraphPatternDetector::subgraph_t& subgraph,
Graph* g) {
VLOG(4) << "handle ConvReLU fuse";
VLOG(40) << "handle ConvReLU fuse";
GET_IR_NODE_FROM_SUBGRAPH(conv_weight, conv_weight,
conv_relu_pattern); // Filter
GET_IR_NODE_FROM_SUBGRAPH(conv_out, conv_out, conv_relu_pattern); // tmp
......@@ -48,7 +48,7 @@ std::unique_ptr<ir::Graph> ConvReLUFusePass::ApplyImpl(
FuseOptions fuse_option = FindFuseOption(*conv, *relu);
if (fuse_option == DO_NOT_FUSE) {
VLOG(3) << "do not perform conv+relu fuse";
VLOG(30) << "do not perform conv+relu fuse";
return;
}
......
......@@ -39,7 +39,7 @@ std::unique_ptr<ir::Graph> DepthwiseConvMKLDNNPass::ApplyImpl(
int found_depthwise_conv_mkldnn_count = 0;
auto handler = [&](const GraphPatternDetector::subgraph_t& subgraph,
Graph* g) {
VLOG(3) << "handle DepthwiseConvMKLDNN fuse";
VLOG(30) << "handle DepthwiseConvMKLDNN fuse";
GET_NODE(depthwise_conv, (*pattern));
depthwise_conv->Op()->SetType("conv2d");
found_depthwise_conv_mkldnn_count++;
......
......@@ -39,7 +39,7 @@ std::unique_ptr<ir::Graph> FCFusePass::ApplyImpl(
int found_fc_count = 0;
auto handler = [&](const GraphPatternDetector::subgraph_t& subgraph,
Graph* g) {
VLOG(4) << "handle FC fuse";
VLOG(40) << "handle FC fuse";
GET_IR_NODE_FROM_SUBGRAPH(w, w, fc_pattern);
GET_IR_NODE_FROM_SUBGRAPH(fc_bias, bias, fc_pattern);
GET_IR_NODE_FROM_SUBGRAPH(fc_out, Out, fc_pattern);
......
......@@ -61,7 +61,7 @@ std::unique_ptr<ir::Graph> FuseElewiseAddActPass::FuseElewiseAddAct(
auto handler = [&](const GraphPatternDetector::subgraph_t &subgraph,
Graph *g) {
VLOG(4) << "handle FuseElewiseAddAct fuse";
VLOG(40) << "handle FuseElewiseAddAct fuse";
GET_IR_NODE_FROM_SUBGRAPH(ele_y, ele_y, elewise_add_act_pattern);
GET_IR_NODE_FROM_SUBGRAPH(ele_out, elewise_add_out,
elewise_add_act_pattern);
......@@ -77,10 +77,10 @@ std::unique_ptr<ir::Graph> FuseElewiseAddActPass::FuseElewiseAddAct(
Node *elewise_add_act_node = CreateFuseElewiseAddActNode(
g, act, ele_add, ele_x_n, ele_y_n, ele_out_n, act_out_n);
VLOG(4) << "\n\t " << ele_x_n << " and " << ele_y_n << " -> "
<< ele_add->Name() << " -> " << ele_out_n << "\n"
<< "\t " << ele_out_n << " -> " << act->Name() << " -> "
<< act_out_n;
VLOG(40) << "\n\t " << ele_x_n << " and " << ele_y_n << " -> "
<< ele_add->Name() << " -> " << ele_out_n << "\n"
<< "\t " << ele_out_n << " -> " << act->Name() << " -> "
<< act_out_n;
ReLinkNodes(g, ele_out, ele_add, act, elewise_add_act_node);
found_elewise_add_act_count++;
......@@ -113,7 +113,7 @@ std::unique_ptr<ir::Graph> FuseElewiseAddActPass::FuseActElewiseAdd(
auto handler = [&](const GraphPatternDetector::subgraph_t &subgraph,
Graph *g) {
VLOG(4) << "handle FuseElewiseAddAct fuse";
VLOG(40) << "handle FuseElewiseAddAct fuse";
GET_IR_NODE_FROM_SUBGRAPH(act_out, act_out, act_elewise_add_pattern);
GET_IR_NODE_FROM_SUBGRAPH(ele_x, ele_x, act_elewise_add_pattern);
GET_IR_NODE_FROM_SUBGRAPH(ele_out, elewise_add_out,
......@@ -129,9 +129,9 @@ std::unique_ptr<ir::Graph> FuseElewiseAddActPass::FuseActElewiseAdd(
Node *elewise_add_act_node = CreateFuseElewiseAddActNode(
g, ele_add, act, elewise_add_x_n, act_i_n, act_o_n, elewise_add_out_n);
VLOG(4) << "\n\t " << act_i_n << " -> " << act->Name() << " -> " << act_o_n
<< "\n\t " << act_o_n << " and " << elewise_add_x_n << " -> "
<< ele_add->Name() << " -> " << elewise_add_out_n;
VLOG(40) << "\n\t " << act_i_n << " -> " << act->Name() << " -> " << act_o_n
<< "\n\t " << act_o_n << " and " << elewise_add_x_n << " -> "
<< ele_add->Name() << " -> " << elewise_add_out_n;
ReLinkNodes(g, act_out, act, ele_add, elewise_add_act_node);
found_elewise_add_act_count++;
......@@ -165,7 +165,7 @@ std::unique_ptr<ir::Graph> FuseElewiseAddActPass::FuseElewiseAddActInplaceGrad(
auto handler = [&](const GraphPatternDetector::subgraph_t &subgraph,
Graph *g) {
VLOG(4) << "handle FuseElewiseAddActGrad1 fuse";
VLOG(40) << "handle FuseElewiseAddActGrad1 fuse";
GET_IR_NODE_FROM_SUBGRAPH(act_out, act_out, elewise_add_act_grad_pattern);
GET_IR_NODE_FROM_SUBGRAPH(act_grad, act_grad, elewise_add_act_grad_pattern);
GET_IR_NODE_FROM_SUBGRAPH(d_itermediate_out, d_itermediate_out,
......@@ -208,10 +208,10 @@ std::unique_ptr<ir::Graph> FuseElewiseAddActPass::FuseElewiseAddActInplaceGrad(
auto fused_node = g->CreateOpNode(&desc);
VLOG(4) << "\n\t " << d_act_out_n << " and " << act_out_n << " -> "
<< act_grad->Name() << " -> " << d_itermediate_out_n << "\n\t "
<< d_itermediate_out_n << " and " << act_out_n << " -> "
<< ele_add_grad->Name() << " -> " << d_itermediate_out_n;
VLOG(40) << "\n\t " << d_act_out_n << " and " << act_out_n << " -> "
<< act_grad->Name() << " -> " << d_itermediate_out_n << "\n\t "
<< d_itermediate_out_n << " and " << act_out_n << " -> "
<< ele_add_grad->Name() << " -> " << d_itermediate_out_n;
ReLinkNodes(g, d_itermediate_out, act_grad, ele_add_grad, fused_node);
found_elewise_add_act_count++;
......
......@@ -92,7 +92,7 @@ Graph::Graph(const ProgramDesc &program) : program_(program) {
std::map<std::string, std::vector<ir::Node *>> Graph::InitFromProgram(
const ProgramDesc &program) {
VLOG(3) << "block in program:" << program_.Size();
VLOG(30) << "block in program:" << program_.Size();
std::unordered_map<std::string, VarDesc *> all_vars;
// var nodes for each var name, will have multiple versions in SSA
std::map<std::string, std::vector<ir::Node *>> var_nodes;
......@@ -160,7 +160,7 @@ void Graph::ResolveHazard(
auto it_old = versions.rbegin();
++it_old;
for (; it_old != versions.rend(); it_new = it_old, ++it_old) {
VLOG(3) << "deal with var: " << (*it_new)->Name();
VLOG(30) << "deal with var: " << (*it_new)->Name();
ir::Node *write_op =
(*it_new)->inputs.empty() ? nullptr : (*it_new)->inputs[0];
const auto &read_ops = (*it_old)->outputs;
......
......@@ -89,7 +89,7 @@ class Graph {
attr_name);
attrs_[attr_name] = attr;
attr_dels_[attr_name] = [attr, attr_name]() {
VLOG(3) << "deleting " << attr_name;
VLOG(30) << "deleting " << attr_name;
delete attr;
};
}
......
......@@ -33,8 +33,9 @@ void SortHelper(
}
}
VLOG(3) << "topology sort insert: " << node->Name()
<< reinterpret_cast<void *>(node) << " input " << node->inputs.size();
VLOG(30) << "topology sort insert: " << node->Name()
<< reinterpret_cast<void *>(node) << " input "
<< node->inputs.size();
ret->push_back(node);
}
......@@ -103,9 +104,9 @@ std::map<ir::Node *, std::unordered_set<ir::Node *>> BuildOperationAdjList(
for (auto &var : n->inputs) {
for (auto &adj_n : var->inputs) {
PADDLE_ENFORCE(adj_n->NodeType() == ir::Node::Type::kOperation);
VLOG(4) << "adj " << adj_n->Name() << reinterpret_cast<void *>(adj_n)
<< " -> " << n->Name() << reinterpret_cast<void *>(n)
<< " via " << var->Name() << reinterpret_cast<void *>(var);
VLOG(40) << "adj " << adj_n->Name() << reinterpret_cast<void *>(adj_n)
<< " -> " << n->Name() << reinterpret_cast<void *>(n)
<< " via " << var->Name() << reinterpret_cast<void *>(var);
adj_list[n].insert(adj_n);
}
}
......@@ -163,10 +164,10 @@ size_t GraphNum(const Graph &graph) {
graph_nodes.emplace_back(g_nodes);
}
if (VLOG_IS_ON(10)) {
VLOG(10) << "graph_num: " << graph_nodes.size();
if (VLOG_IS_ON(100)) {
VLOG(100) << "graph_num: " << graph_nodes.size();
for (auto &g_n : graph_nodes) {
VLOG(10) << "graph_nodes: " << g_n.size();
VLOG(100) << "graph_nodes: " << g_n.size();
if (g_n.size() < 10) {
std::stringstream out;
for (auto &node : g_n) {
......@@ -180,7 +181,7 @@ size_t GraphNum(const Graph &graph) {
}
out << "]";
}
VLOG(10) << out.str();
VLOG(100) << out.str();
}
}
}
......
......@@ -12,6 +12,7 @@
// See the License for the specific language governing permissions and
// limitations under the License.
#include <algorithm>
#include <array>
#include <string>
#include <vector>
......@@ -91,19 +92,19 @@ void GraphPatternDetector::operator()(Graph *graph,
PrettyLogEndl(Style::detail(), "--- detect %d subgraphs", subgraphs.size());
int id = 0;
for (auto &g : subgraphs) {
VLOG(3) << "optimizing #" << id++ << " subgraph";
VLOG(30) << "optimizing #" << id++ << " subgraph";
handler(g, graph);
}
}
bool GraphPatternDetector::MarkPDNodesInGraph(const ir::Graph &graph) {
VLOG(3) << "mark pdnodes in graph";
VLOG(30) << "mark pdnodes in graph";
if (graph.Nodes().empty()) return false;
for (auto &node : GraphTraits::DFS(graph)) {
for (const auto &pdnode : pattern_.nodes()) {
if (pdnode->Tell(&node)) {
VLOG(4) << "pdnode " << pdnode->name() << " marked";
VLOG(40) << "pdnode " << pdnode->name() << " marked";
pdnodes2nodes_[pdnode.get()].insert(&node);
}
}
......@@ -111,7 +112,7 @@ bool GraphPatternDetector::MarkPDNodesInGraph(const ir::Graph &graph) {
// Check to early stop if some PDNode can't find matched Node.
for (auto &pdnode : pattern_.nodes()) {
if (!pdnodes2nodes_.count(pdnode.get())) {
VLOG(4) << pdnode->name() << " can't find matched Node, early stop";
VLOG(40) << pdnode->name() << " can't find matched Node, early stop";
// return false;
}
}
......@@ -120,7 +121,7 @@ bool GraphPatternDetector::MarkPDNodesInGraph(const ir::Graph &graph) {
GetMarkedNodes(const_cast<Graph *>(&graph)).insert(n);
}
}
VLOG(3) << pdnodes2nodes_.size() << " nodes marked";
VLOG(30) << pdnodes2nodes_.size() << " nodes marked";
return !pdnodes2nodes_.empty();
}
......@@ -213,7 +214,7 @@ GraphPatternDetector::DetectPatterns() {
// Extend a PDNode to subgraphs by deducing the connection relations defined
// in edges of PDNodes.
for (const auto &edge : pattern_.edges()) {
VLOG(4) << "check " << edge.first->name() << " -> " << edge.second->name();
VLOG(40) << "check " << edge.first->name() << " -> " << edge.second->name();
// TODO(Superjomn) Fix bug here, the groups might be duplicate here.
// Each role has two PDNodes, which indicates two roles.
// Detect two Nodes that can match these two roles and they are connected.
......@@ -224,7 +225,7 @@ GraphPatternDetector::DetectPatterns() {
// source -> target
for (Node *source : pdnodes2nodes_[edge.first]) {
for (Node *target : pdnodes2nodes_[edge.second]) {
VLOG(8) << "check " << source->id() << " -- " << target->id();
VLOG(80) << "check " << source->id() << " -- " << target->id();
// TODO(Superjomn) add some prune strategies.
for (const auto &group : pre_groups) {
HitGroup new_group = group;
......@@ -240,12 +241,13 @@ GraphPatternDetector::DetectPatterns() {
}
}
}
VLOG(3) << "step " << step << " get records: " << cur_groups.size();
VLOG(30) << "step " << step << " get records: " << cur_groups.size();
for (auto &group : cur_groups) {
for (auto &item : group.roles) {
VLOG(4) << "node " << item.second->id() << " as " << item.first->name();
VLOG(40) << "node " << item.second->id() << " as "
<< item.first->name();
}
VLOG(4) << "=========================================================";
VLOG(40) << "=========================================================";
}
}
......
......@@ -41,7 +41,7 @@ std::string FormatName(const Node* node) {
std::unique_ptr<ir::Graph> GraphVizPass::ApplyImpl(
std::unique_ptr<ir::Graph> graph) const {
const std::string graph_viz_path = Get<std::string>(kGraphVizPath);
VLOG(3) << "draw IR graph viz to " << graph_viz_path;
VLOG(30) << "draw IR graph viz to " << graph_viz_path;
std::unique_ptr<std::ostream> fout(new std::ofstream(graph_viz_path));
PADDLE_ENFORCE(fout->good());
std::ostream& sout = *fout;
......
......@@ -20,7 +20,7 @@ namespace ir {
std::unique_ptr<ir::Graph> MKLDNNPlacementPass::ApplyImpl(
std::unique_ptr<ir::Graph> graph) const {
VLOG(3) << "Aplies MKL-DNN placement strategy.";
VLOG(30) << "Aplies MKL-DNN placement strategy.";
for (const Node* n : graph->Nodes()) {
if (n->IsOp() && n->Op()->HasAttr("use_mkldnn")) {
n->Op()->SetAttr("use_mkldnn", true);
......
......@@ -62,7 +62,7 @@ VarDesc UpdateGradVarDesc(
string::Sprintf("%s.repeat.%d", var_desc->Name(), repeat);
VarDesc repeated_var = CopyVarDesc(var_desc);
repeated_var.SetName(new_gname);
VLOG(3) << "update " << var_desc->Name() << " to repeat " << repeat;
VLOG(30) << "update " << var_desc->Name() << " to repeat " << repeat;
return repeated_var;
}
return *var_desc;
......@@ -78,7 +78,7 @@ std::unique_ptr<Graph> BatchMergePass::ApplyImpl(
std::vector<ir::Node*> nodes = TopologySortOperations(*graph);
auto origin_nodes = graph->ReleaseNodes();
VLOG(3) << "origin nodes count: " << origin_nodes.size();
VLOG(30) << "origin nodes count: " << origin_nodes.size();
ir::Graph& result = *graph;
// 1. record op nodes of different roles
......@@ -137,8 +137,8 @@ std::unique_ptr<Graph> BatchMergePass::ApplyImpl(
"%s.repeat.%d", repeated_op.Input("Variance")[0], i);
bn_vars_need_rename.insert(repeated_op.Input("Mean")[0]);
bn_vars_need_rename.insert(repeated_op.Input("Variance")[0]);
VLOG(3) << "renaming " << repeated_op.Input("Mean")[0] << " to "
<< new_mean_name;
VLOG(30) << "renaming " << repeated_op.Input("Mean")[0] << " to "
<< new_mean_name;
repeated_op.RenameInput(repeated_op.Input("Mean")[0], new_mean_name);
repeated_op.RenameInput(repeated_op.Input("Variance")[0], new_var_name);
repeated_op.RenameOutput(repeated_op.Output("MeanOut")[0],
......
......@@ -76,7 +76,7 @@ class Pass {
attr_name);
attrs_[attr_name] = attr;
attr_dels_[attr_name] = [attr, attr_name]() {
VLOG(3) << "deleting " << attr_name;
VLOG(30) << "deleting " << attr_name;
delete attr;
};
}
......
......@@ -12,10 +12,13 @@
// See the License for the specific language governing permissions and
// limitations under the License.
#include "paddle/fluid/framework/ir/seq_concat_fc_fuse_pass.h"
#include <set>
#include <string>
#include "paddle/fluid/framework/ir/fuse_pass_base.h"
#include "paddle/fluid/framework/ir/graph_pattern_detector.h"
#include "paddle/fluid/framework/ir/graph_viz_pass.h"
#include "paddle/fluid/framework/ir/seq_concat_fc_fuse_pass.h"
#include "paddle/fluid/framework/lod_tensor.h"
namespace paddle {
......@@ -159,10 +162,7 @@ PDNode* BuildFCPattern(PDPattern* pattern, PDNode* fc_x) {
std::set<std::string> acts({"sigmoid", "tanh", "relu", "identity"});
PDNode* act = pattern->NewNode(
[=](Node* x) {
return x && x->IsOp() && acts.count(x->Op()->Type());
},
[=](Node* x) { return x && x->IsOp() && acts.count(x->Op()->Type()); },
"act");
PDNode* fc_out = pattern->NewNode(
......@@ -196,7 +196,7 @@ std::unique_ptr<ir::Graph> SeqConcatFcFusePass::ApplyImpl(
detector(graph.get(), [&](const GraphPatternDetector::subgraph_t& subgraph,
Graph* graph) {
VLOG(4) << "get one concat pattern";
VLOG(40) << "get one concat pattern";
// fc
GET_NODE(fc_w, detector.pattern());
GET_NODE(fc_bias, detector.pattern());
......
......@@ -60,7 +60,7 @@ int BuildFusion(Graph* graph, const std::string& name_scope, Scope* scope) {
auto handler = [&](const GraphPatternDetector::subgraph_t& subgraph,
Graph* g) {
VLOG(4) << "handle SeqConv EltAdd Relu fuse";
VLOG(40) << "handle SeqConv EltAdd Relu fuse";
GET_IR_NODE_FROM_SUBGRAPH(seqconv, seqconv, fuse_pattern);
GET_IR_NODE_FROM_SUBGRAPH(seqconv_weight, seqconv_weight, fuse_pattern);
GET_IR_NODE_FROM_SUBGRAPH(seqconv_out, seqconv_out, fuse_pattern);
......
......@@ -31,7 +31,7 @@ void LoDRankTable::Reset(const LoD& lod, size_t level) {
TableItem item;
item.index = i;
item.length = vec[i + 1] - vec[i];
VLOG(10) << "Add item to rank table " << item.index << " " << item.length;
VLOG(100) << "Add item to rank table " << item.index << " " << item.length;
items_.emplace_back(item);
}
// NOTE(yuyang18):
......
......@@ -51,7 +51,7 @@ TEST(mixed_vector, InitWithCount) {
TEST(mixed_vector, ForEach) {
vec<int> tmp;
for (auto& v : tmp) {
VLOG(3) << v;
VLOG(30) << v;
}
}
......
......@@ -71,7 +71,7 @@ void NaiveExecutor::Prepare(Scope *parent_scope,
void NaiveExecutor::Run() {
for (auto &op : ops_) {
VLOG(4) << "run " << op->Type();
VLOG(40) << "run " << op->Type();
op->Run(*scope_, place_);
}
}
......@@ -95,21 +95,21 @@ void NaiveExecutor::CreateVariables(const ProgramDesc &desc, Scope *scope,
if (var->Persistable()) {
auto *ptr = const_cast<Scope *>(ancestor_scope)->Var(var->Name());
InitializeVariable(ptr, var->GetType());
VLOG(3) << "Create Variable " << var->Name()
<< " global, which pointer is " << ptr;
VLOG(30) << "Create Variable " << var->Name()
<< " global, which pointer is " << ptr;
} else { // Create temporary variables in local scope.
auto *ptr = scope->Var(var->Name());
InitializeVariable(ptr, var->GetType());
VLOG(3) << "Create Variable " << var->Name()
<< " locally, which pointer is " << ptr;
VLOG(30) << "Create Variable " << var->Name()
<< " locally, which pointer is " << ptr;
}
}
} else {
for (auto &var : global_block.AllVars()) {
auto *ptr = scope->Var(var->Name());
InitializeVariable(ptr, var->GetType());
VLOG(3) << "Create variable " << var->Name() << ", which pointer is "
<< ptr;
VLOG(30) << "Create variable " << var->Name() << ", which pointer is "
<< ptr;
}
}
}
......
......@@ -82,7 +82,7 @@ class CompileTimeInferShapeContext : public InferShapeContext {
auto *in_var = block_.FindVarRecursive(Inputs(in)[i]);
auto *out_var = block_.FindVarRecursive(Outputs(out)[j]);
if (in_var->GetType() != proto::VarType::LOD_TENSOR) {
VLOG(3) << "input " << in << " is not LodTensor";
VLOG(30) << "input " << in << " is not LodTensor";
return;
}
out_var->SetLoDLevel(in_var->GetLoDLevel());
......@@ -241,32 +241,32 @@ void OpDesc::SetAttr(const std::string &name, const Attribute &v) {
const proto::OpProto::Attr &attr = GetProtoAttr(name);
switch (attr.type()) {
case proto::AttrType::BOOLEANS: {
VLOG(11) << "SetAttr: " << Type() << ", " << name
<< " from INTS to BOOLEANS";
VLOG(110) << "SetAttr: " << Type() << ", " << name
<< " from INTS to BOOLEANS";
this->attrs_[name] = std::vector<bool>();
break;
}
case proto::AttrType::INTS: {
VLOG(11) << "SetAttr: " << Type() << ", " << name
<< " from INTS to INTS";
VLOG(110) << "SetAttr: " << Type() << ", " << name
<< " from INTS to INTS";
this->attrs_[name] = std::vector<int>();
break;
}
case proto::AttrType::FLOATS: {
VLOG(11) << "SetAttr: " << Type() << ", " << name
<< " from INTS to FLOATS";
VLOG(110) << "SetAttr: " << Type() << ", " << name
<< " from INTS to FLOATS";
this->attrs_[name] = std::vector<float>();
break;
}
case proto::AttrType::STRINGS: {
VLOG(11) << "SetAttr: " << Type() << ", " << name
<< " from INTS to STRINGS";
VLOG(110) << "SetAttr: " << Type() << ", " << name
<< " from INTS to STRINGS";
this->attrs_[name] = std::vector<std::string>();
break;
}
case proto::AttrType::BLOCKS: {
VLOG(11) << "SetAttr: " << Type() << ", " << name
<< " from INTS to BLOCKS";
VLOG(110) << "SetAttr: " << Type() << ", " << name
<< " from INTS to BLOCKS";
this->SetBlocksAttr(name, std::vector<BlockDesc *>());
return;
}
......@@ -499,13 +499,13 @@ void OpDesc::CheckAttrs() {
}
void OpDesc::InferShape(const BlockDesc &block) const {
VLOG(3) << "CompileTime infer shape on " << Type();
VLOG(30) << "CompileTime infer shape on " << Type();
InitInferShapeFuncs();
auto &infer_shape = OpInfoMap::Instance().Get(this->Type()).infer_shape_;
PADDLE_ENFORCE(static_cast<bool>(infer_shape),
"%s's infer_shape has not been registered", this->Type());
CompileTimeInferShapeContext ctx(*this, block);
if (VLOG_IS_ON(10)) {
if (VLOG_IS_ON(100)) {
std::ostringstream sout;
auto inames = this->InputArgumentNames();
sout << " From [";
......@@ -516,7 +516,7 @@ void OpDesc::InferShape(const BlockDesc &block) const {
std::copy(onames.begin(), onames.end(),
std::ostream_iterator<std::string>(sout, ", "));
sout << "]";
VLOG(10) << sout.str();
VLOG(100) << sout.str();
}
infer_shape(&ctx);
}
......@@ -607,7 +607,7 @@ DDim CompileTimeInferShapeContext::GetDim(const std::string &name) const {
auto shape = var->GetShape();
res = shape.empty() ? make_ddim({0UL}) : make_ddim(shape);
} catch (...) {
VLOG(5) << "GetDim of variable " << name << " error";
VLOG(50) << "GetDim of variable " << name << " error";
std::rethrow_exception(std::current_exception());
}
return res;
......@@ -624,7 +624,7 @@ std::vector<DDim> CompileTimeInferShapeContext::GetRepeatedDims(
res.push_back(s.empty() ? make_ddim({0UL}) : make_ddim(s));
}
} catch (...) {
VLOG(5) << "GetRepeatedDim of variable " << name << " error.";
VLOG(50) << "GetRepeatedDim of variable " << name << " error.";
std::rethrow_exception(std::current_exception());
}
return res;
......
......@@ -46,9 +46,9 @@ static VariableNameMap ConvertOpDescVarsToVarNameMap(
std::unique_ptr<OperatorBase> OpRegistry::CreateOp(
const proto::OpDesc& op_desc) {
VLOG(1) << "CreateOp directly from OpDesc is deprecated. It should only be"
"used in unit tests. Use CreateOp(const OpDesc& op_desc) "
"instead.";
VLOG(10) << "CreateOp directly from OpDesc is deprecated. It should only be"
"used in unit tests. Use CreateOp(const OpDesc& op_desc) "
"instead.";
VariableNameMap inputs = ConvertOpDescVarsToVarNameMap(op_desc.inputs());
VariableNameMap outputs = ConvertOpDescVarsToVarNameMap(op_desc.outputs());
AttributeMap attrs;
......
......@@ -140,7 +140,7 @@ static LoD GetLoD(const Scope& scope, const std::string& name) {
}
void OperatorBase::Run(const Scope& scope, const platform::Place& place) {
VLOG(4) << place << " " << DebugStringEx(&scope);
VLOG(40) << place << " " << DebugStringEx(&scope);
if (platform::is_gpu_place(place)) {
#ifndef PADDLE_WITH_CUDA
PADDLE_THROW("Cannot run operator on place %s", place);
......@@ -160,7 +160,7 @@ void OperatorBase::Run(const Scope& scope, const platform::Place& place) {
} else {
RunImpl(scope, place);
}
VLOG(3) << place << " " << DebugStringEx(&scope);
VLOG(30) << place << " " << DebugStringEx(&scope);
}
bool OperatorBase::HasInputs(const std::string& name) const {
......@@ -708,14 +708,14 @@ void OperatorWithKernel::RunImpl(const Scope& scope,
auto expected_kernel_key =
this->GetExpectedKernelType(ExecutionContext(*this, scope, *dev_ctx));
VLOG(3) << "expected_kernel_key:" << expected_kernel_key;
VLOG(30) << "expected_kernel_key:" << expected_kernel_key;
auto kernel_iter = kernels.find(expected_kernel_key);
#ifdef PADDLE_WITH_MKLDNN
// workaround for missing MKLDNN kernel when FLAGS_use_mkldnn env var is set
if (kernel_iter == kernels.end() &&
expected_kernel_key.library_type_ == LibraryType::kMKLDNN) {
VLOG(3) << "missing MKLDNN kernel: fallbacking to PLAIN one";
VLOG(30) << "missing MKLDNN kernel: fallbacking to PLAIN one";
expected_kernel_key.library_type_ = LibraryType::kPlain;
expected_kernel_key.data_layout_ = DataLayout::kAnyLayout;
kernel_iter = kernels.find(expected_kernel_key);
......@@ -767,7 +767,8 @@ void OperatorWithKernel::TransferInplaceVarsBack(
const Scope& scope, const std::vector<std::string>& inplace_vars,
const Scope& transfer_scope) const {
for (auto& var_name : inplace_vars) {
VLOG(3) << "share inplace var " + var_name + " back to it's original scope";
VLOG(30) << "share inplace var " + var_name +
" back to it's original scope";
auto* original_tensor = GetMutableTensorFromVar(scope.FindVar(var_name));
auto* var = transfer_scope.FindVar(var_name);
PADDLE_ENFORCE(var != nullptr, "The var[%s] should not be nullptr",
......@@ -807,8 +808,8 @@ Scope* OperatorWithKernel::TryTransferData(
transfered_inplace_vars->emplace_back(var_name);
}
VLOG(3) << "Transform Variable " << var_name << " from "
<< kernel_type_for_var << " to " << expected_kernel_key;
VLOG(30) << "Transform Variable " << var_name << " from "
<< kernel_type_for_var << " to " << expected_kernel_key;
if (new_scope == nullptr) {
new_scope = &scope.NewScope();
......
......@@ -199,7 +199,7 @@ void ParallelExecutor::BCastParamsToDevices(
auto &main_tensor = main_var->Get<LoDTensor>();
if (!main_tensor.IsInitialized()) {
VLOG(3) << "one in var not inited, return!";
VLOG(30) << "one in var not inited, return!";
continue;
}
auto &dims = main_tensor.dims();
......
......@@ -149,7 +149,7 @@ Variable* Scope::VarInternal(const std::string& name) {
v = new Variable();
vars_[name].reset(v);
VLOG(3) << "Create variable " << name;
VLOG(30) << "Create variable " << name;
v->name_ = &(vars_.find(name)->first);
return v;
}
......
......@@ -176,7 +176,7 @@ void SelectedRows::Get(const framework::Tensor& ids, framework::Tensor* value,
PADDLE_ENFORCE(value->IsInitialized(),
"The value tensor should be initialized.");
if (ids.numel() == 0) {
VLOG(3) << "keys is empty, please check data!";
VLOG(30) << "keys is empty, please check data!";
} else {
int64_t value_width = value_->numel() / value_->dims()[0];
PADDLE_ENFORCE_EQ(value_width, value->numel() / value->dims()[0],
......
......@@ -22,8 +22,8 @@ namespace framework {
void TensorCopy(const Tensor& src, const platform::Place& dst_place,
const platform::DeviceContext& ctx, Tensor* dst) {
VLOG(3) << "TensorCopy " << src.dims() << " from " << src.place() << " to "
<< dst_place;
VLOG(30) << "TensorCopy " << src.dims() << " from " << src.place() << " to "
<< dst_place;
src.check_memory_size();
dst->Resize(src.dims());
......@@ -37,8 +37,8 @@ void TensorCopy(const Tensor& src, const platform::Place& dst_place,
if (platform::is_cpu_place(src_place) && platform::is_cpu_place(dst_place)) {
if (src_ptr == dst_ptr) {
VLOG(3) << "Skip copy the same data async from " << src_place << " to "
<< dst_place;
VLOG(30) << "Skip copy the same data async from " << src_place << " to "
<< dst_place;
return;
}
memory::Copy(boost::get<platform::CPUPlace>(dst_place), dst_ptr,
......@@ -77,8 +77,8 @@ void TensorCopy(const Tensor& src, const platform::Place& dst_place,
reinterpret_cast<const platform::CUDADeviceContext&>(ctx).stream();
if (platform::is_same_place(src_place, dst_place)) {
if (src_ptr == dst_ptr) {
VLOG(3) << "Skip copy the same data async from " << src_place << " to "
<< dst_place;
VLOG(30) << "Skip copy the same data async from " << src_place << " to "
<< dst_place;
return;
}
memory::Copy(dst_gpu_place, dst_ptr, src_gpu_place, src_ptr, size,
......@@ -114,8 +114,8 @@ void TensorCopy(const Tensor& src, const platform::Place& dst_place,
void TensorCopySync(const Tensor& src, const platform::Place& dst_place,
Tensor* dst) {
VLOG(3) << "TensorCopySync " << src.dims() << " from " << src.place()
<< " to " << dst_place;
VLOG(30) << "TensorCopySync " << src.dims() << " from " << src.place()
<< " to " << dst_place;
src.check_memory_size();
dst->Resize(src.dims());
dst->set_layout(src.layout());
......@@ -125,8 +125,8 @@ void TensorCopySync(const Tensor& src, const platform::Place& dst_place,
auto size = src.numel() * SizeOfType(src.type());
if (platform::is_cpu_place(src_place) && platform::is_cpu_place(dst_place)) {
if (src_ptr == dst_ptr) {
VLOG(3) << "Skip copy the same data from " << src_place << " to "
<< dst_place;
VLOG(30) << "Skip copy the same data from " << src_place << " to "
<< dst_place;
return;
}
memory::Copy(boost::get<platform::CPUPlace>(dst_place), dst_ptr,
......@@ -146,8 +146,8 @@ void TensorCopySync(const Tensor& src, const platform::Place& dst_place,
} else if (platform::is_gpu_place(src_place) &&
platform::is_gpu_place(dst_place)) {
if (src_ptr == dst_ptr && platform::is_same_place(src_place, dst_place)) {
VLOG(3) << "Skip copy the same data from " << src_place << " to "
<< dst_place;
VLOG(30) << "Skip copy the same data from " << src_place << " to "
<< dst_place;
return;
}
auto src_gpu_place = boost::get<platform::CUDAPlace>(src_place);
......
tensor_util.cc
\ No newline at end of file
/* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserved.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License. */
#include <algorithm>
#include <limits>
#include <vector>
#include "paddle/fluid/framework/data_type.h"
#include "paddle/fluid/framework/tensor_util.h"
namespace paddle {
namespace framework {
void TensorCopy(const Tensor& src, const platform::Place& dst_place,
const platform::DeviceContext& ctx, Tensor* dst) {
VLOG(30) << "TensorCopy " << src.dims() << " from " << src.place() << " to "
<< dst_place;
src.check_memory_size();
dst->Resize(src.dims());
dst->set_layout(src.layout());
auto src_place = src.place();
auto src_ptr = src.data<void>();
auto dst_ptr = dst->mutable_data(dst_place, src.type());
auto size = src.numel() * SizeOfType(src.type());
if (platform::is_cpu_place(src_place) && platform::is_cpu_place(dst_place)) {
if (src_ptr == dst_ptr) {
VLOG(30) << "Skip copy the same data async from " << src_place << " to "
<< dst_place;
return;
}
memory::Copy(boost::get<platform::CPUPlace>(dst_place), dst_ptr,
boost::get<platform::CPUPlace>(src_place), src_ptr, size);
}
#ifdef PADDLE_WITH_CUDA
else if (platform::is_gpu_place(src_place) && // NOLINT
platform::is_cpu_place(dst_place)) {
auto src_gpu_place = boost::get<platform::CUDAPlace>(src_place);
auto dst_cpu_place = boost::get<platform::CPUPlace>(dst_place);
auto ctx_place = ctx.GetPlace();
PADDLE_ENFORCE(platform::is_gpu_place(ctx_place));
auto ctx_gpu_place = boost::get<platform::CUDAPlace>(ctx_place);
PADDLE_ENFORCE_EQ(src_gpu_place, ctx_gpu_place);
auto stream =
reinterpret_cast<const platform::CUDADeviceContext&>(ctx).stream();
memory::Copy(dst_cpu_place, dst_ptr, src_gpu_place, src_ptr, size, stream);
} else if (platform::is_cpu_place(src_place) &&
platform::is_gpu_place(dst_place)) {
auto src_cpu_place = boost::get<platform::CPUPlace>(src_place);
auto dst_gpu_place = boost::get<platform::CUDAPlace>(dst_place);
auto ctx_place = ctx.GetPlace();
PADDLE_ENFORCE(platform::is_gpu_place(ctx_place));
auto ctx_gpu_place = boost::get<platform::CUDAPlace>(ctx_place);
PADDLE_ENFORCE_EQ(dst_gpu_place, ctx_gpu_place);
auto stream =
reinterpret_cast<const platform::CUDADeviceContext&>(ctx).stream();
memory::Copy(dst_gpu_place, dst_ptr, src_cpu_place, src_ptr, size, stream);
} else if (platform::is_gpu_place(src_place) &&
platform::is_gpu_place(dst_place)) {
auto src_gpu_place = boost::get<platform::CUDAPlace>(src_place);
auto dst_gpu_place = boost::get<platform::CUDAPlace>(dst_place);
auto ctx_place = ctx.GetPlace();
PADDLE_ENFORCE(platform::is_gpu_place(ctx_place));
auto stream =
reinterpret_cast<const platform::CUDADeviceContext&>(ctx).stream();
if (platform::is_same_place(src_place, dst_place)) {
if (src_ptr == dst_ptr) {
VLOG(30) << "Skip copy the same data async from " << src_place << " to "
<< dst_place;
return;
}
memory::Copy(dst_gpu_place, dst_ptr, src_gpu_place, src_ptr, size,
stream);
} else {
if (platform::is_same_place(ctx_place, src_place)) {
memory::Copy(dst_gpu_place, dst_ptr, src_gpu_place, src_ptr, size,
stream);
platform::DeviceContextPool::Instance().Get(src.place())->Wait();
} else if (platform::is_same_place(ctx_place, dst_place)) {
platform::DeviceContextPool::Instance().Get(src.place())->Wait();
memory::Copy(dst_gpu_place, dst_ptr, src_gpu_place, src_ptr, size,
stream);
} else {
PADDLE_THROW("ctx is not belong to dst_gpu_place or src_gpu_place.");
}
}
}
#endif
}
void TensorCopy(const Tensor& src, const platform::Place& dst_place,
Tensor* dst) {
platform::DeviceContextPool& pool = platform::DeviceContextPool::Instance();
const platform::DeviceContext* dev_ctx;
if (platform::is_gpu_place(dst_place)) {
dev_ctx = pool.Get(dst_place);
} else {
dev_ctx = pool.Get(src.place());
}
TensorCopy(src, dst_place, *dev_ctx, dst);
}
void TensorCopySync(const Tensor& src, const platform::Place& dst_place,
Tensor* dst) {
VLOG(30) << "TensorCopySync " << src.dims() << " from " << src.place()
<< " to " << dst_place;
src.check_memory_size();
dst->Resize(src.dims());
dst->set_layout(src.layout());
auto src_place = src.place();
auto src_ptr = src.data<void>();
auto dst_ptr = dst->mutable_data(dst_place, src.type());
auto size = src.numel() * SizeOfType(src.type());
if (platform::is_cpu_place(src_place) && platform::is_cpu_place(dst_place)) {
if (src_ptr == dst_ptr) {
VLOG(30) << "Skip copy the same data from " << src_place << " to "
<< dst_place;
return;
}
memory::Copy(boost::get<platform::CPUPlace>(dst_place), dst_ptr,
boost::get<platform::CPUPlace>(src_place), src_ptr, size);
}
#ifdef PADDLE_WITH_CUDA
else if (platform::is_gpu_place(src_place) && // NOLINT
platform::is_cpu_place(dst_place)) {
auto src_gpu_place = boost::get<platform::CUDAPlace>(src_place);
auto dst_cpu_place = boost::get<platform::CPUPlace>(dst_place);
memory::Copy(dst_cpu_place, dst_ptr, src_gpu_place, src_ptr, size, nullptr);
} else if (platform::is_cpu_place(src_place) &&
platform::is_gpu_place(dst_place)) {
auto src_cpu_place = boost::get<platform::CPUPlace>(src_place);
auto dst_gpu_place = boost::get<platform::CUDAPlace>(dst_place);
memory::Copy(dst_gpu_place, dst_ptr, src_cpu_place, src_ptr, size, nullptr);
} else if (platform::is_gpu_place(src_place) &&
platform::is_gpu_place(dst_place)) {
if (src_ptr == dst_ptr && platform::is_same_place(src_place, dst_place)) {
VLOG(30) << "Skip copy the same data from " << src_place << " to "
<< dst_place;
return;
}
auto src_gpu_place = boost::get<platform::CUDAPlace>(src_place);
auto dst_gpu_place = boost::get<platform::CUDAPlace>(dst_place);
memory::Copy(dst_gpu_place, dst_ptr, src_gpu_place, src_ptr, size, nullptr);
} else if (platform::is_cuda_pinned_place(src_place) &&
platform::is_gpu_place(dst_place)) {
auto src_pinned_place = boost::get<platform::CUDAPinnedPlace>(src_place);
auto dst_gpu_place = boost::get<platform::CUDAPlace>(dst_place);
memory::Copy(dst_gpu_place, dst_ptr, src_pinned_place, src_ptr, size,
nullptr);
}
#endif
}
template <typename Predicate, typename DevCtx>
struct AnyDTypeVisitor {
Predicate predicate_;
const Tensor& tensor_;
const DevCtx& ctx_;
Tensor* out_;
AnyDTypeVisitor(Predicate predicate, const Tensor& tensor, const DevCtx& ctx,
Tensor* out)
: predicate_(predicate), tensor_(tensor), ctx_(ctx), out_(out) {}
template <typename T>
void apply() const {
auto t = EigenVector<T>::Flatten(tensor_);
auto o = EigenScalar<bool>::From(*out_);
// return any of predicate_(t) is true.
o.device(*ctx_.eigen_device()) = predicate_(t).any();
}
};
template <typename Predicate, typename DevCtx>
inline void AnyImpl(Predicate predicate, const framework::Tensor& tensor,
const DevCtx& ctx, framework::Tensor* out) {
VisitDataType(ToDataType(tensor.type()), AnyDTypeVisitor<Predicate, DevCtx>(
predicate, tensor, ctx, out));
}
template <typename Predicate>
class AnyVisitor : public boost::static_visitor<bool> {
private:
const framework::Tensor& tensor_;
Predicate predicate_;
public:
AnyVisitor(const framework::Tensor& tensor, Predicate predicate)
: tensor_(tensor), predicate_(std::move(predicate)) {}
template <typename Place>
bool operator()(const Place& place) const {
framework::Tensor out;
out.Resize({1});
out.mutable_data<bool>(place);
auto* ctx = platform::DeviceContextPool::Instance().GetByPlace(place);
AnyImpl(predicate_, tensor_, *ctx, &out);
return this->GetResult(out, place);
}
bool GetResult(const framework::Tensor& out,
const platform::CUDAPlace& gpu) const {
platform::CPUPlace cpu;
framework::Tensor tmp;
tmp.Resize({1});
tmp.mutable_data<bool>(cpu);
auto gpuctx = platform::DeviceContextPool::Instance().Get(gpu);
gpuctx->Wait();
TensorCopy(out, cpu, *gpuctx, &tmp);
gpuctx->Wait();
return GetResult(tmp, cpu);
}
bool GetResult(const framework::Tensor& out,
const platform::CPUPlace& cpu) const {
return *out.data<bool>();
}
bool GetResult(const framework::Tensor& out,
const platform::CUDAPinnedPlace& cpu) const {
return *out.data<bool>();
}
};
template <typename Predicate>
class AnyOutVisitor : public boost::static_visitor<> {
private:
const framework::Tensor& tensor_;
mutable framework::Tensor* out_;
Predicate predicate_;
public:
AnyOutVisitor(const framework::Tensor& tensor, Predicate predicate,
framework::Tensor* out)
: tensor_(tensor), out_(out), predicate_(std::move(predicate)) {}
template <typename Place>
void operator()(const Place& place) const {
auto* ctx = platform::DeviceContextPool::Instance().GetByPlace(place);
out_->Resize({1});
out_->mutable_data<bool>(place);
AnyImpl(predicate_, tensor_, *ctx, out_);
}
};
template <typename Predicate>
inline bool Any(const framework::Tensor& tensor, Predicate predicate) {
AnyVisitor<Predicate> visitor(tensor, predicate);
auto place = tensor.place();
return platform::VisitPlace(place, visitor);
}
template <typename Predicate>
inline void Any(const framework::Tensor& tensor, Predicate predicate,
framework::Tensor* out) {
AnyOutVisitor<Predicate> visitor(tensor, predicate, out);
auto place = tensor.place();
platform::VisitPlace(place, visitor);
}
struct ContainsNANPredicate {
template <typename T>
auto operator()(const T& eigen_vec) const
-> decltype(std::declval<T>().isnan()) {
// Cast eigen_vector to vector of bool. true if is inf.
return eigen_vec.isnan();
}
};
bool TensorContainsNAN(const framework::Tensor& tensor) {
ContainsNANPredicate predicate;
return Any(tensor, predicate);
}
void TensorContainsNAN(const framework::Tensor& tensor,
framework::Tensor* out) {
ContainsNANPredicate predicate;
Any(tensor, predicate, out);
}
struct ContainsInfPredicate {
template <typename T>
auto operator()(const T& eigen_vec) const
-> decltype(std::declval<T>().isinf()) {
// Cast eigen_vector to vector of bool. true if is inf.
return eigen_vec.isinf();
}
};
bool TensorContainsInf(const framework::Tensor& tensor) {
ContainsInfPredicate predicate;
return Any(tensor, predicate);
}
void TensorContainsInf(const framework::Tensor& tensor,
framework::Tensor* out) {
ContainsInfPredicate predicate;
Any(tensor, predicate, out);
}
// NOTE(dzhwinter):
// Isfinite need a AllVisitor to loop through all the elements.
// We choose two cuda call instead of one allvisitor. The AllVisitor
// should be implemented if the performance hurts.
bool TensorIsfinite(const framework::Tensor& tensor) {
ContainsInfPredicate pred_inf;
ContainsNANPredicate pred_nan;
return !Any(tensor, pred_inf) && !Any(tensor, pred_nan);
}
#ifdef PADDLE_WITH_CUDA
template <typename T>
static inline void __global__ BothFalse(const T* cmp, T* out) {
out[0] = (!cmp[0]) && (!out[0]);
}
#endif
struct BothFalseVisitor : public boost::static_visitor<> {
const framework::Tensor& in_;
mutable framework::Tensor* out_;
BothFalseVisitor(const framework::Tensor& in, framework::Tensor* out)
: in_(in), out_(out) {}
template <typename Place>
void operator()(const Place& place) const {
VisitorImpl(place);
}
void VisitorImpl(const platform::CUDAPlace& gpu) const {
#ifdef PADDLE_WITH_CUDA
auto* ctx = platform::DeviceContextPool::Instance().GetByPlace(gpu);
BothFalse<bool><<<1, 1, 0, ctx->stream()>>>(in_.data<bool>(),
out_->mutable_data<bool>(gpu));
#endif
}
void VisitorImpl(const platform::CPUPlace& cpu) const {
bool lhs = !in_.data<bool>()[0];
bool rhs = !out_->mutable_data<bool>(cpu)[0];
out_->mutable_data<bool>(cpu)[0] = lhs && rhs;
}
void VisitorImpl(
const platform::CUDAPinnedPlace& cpu /* equals to cpu*/) const {
bool lhs = !in_.data<bool>()[0];
bool rhs = !out_->mutable_data<bool>(cpu)[0];
out_->mutable_data<bool>(cpu)[0] = lhs && rhs;
}
};
void TensorIsfinite(const framework::Tensor& tensor, framework::Tensor* out) {
framework::Tensor tmp;
TensorContainsInf(tensor, &tmp);
TensorContainsNAN(tensor, out);
BothFalseVisitor visitor(tmp, out);
auto place = tensor.place();
platform::VisitPlace(place, visitor);
}
void TensorToStream(std::ostream& os, const Tensor& tensor,
const platform::DeviceContext& dev_ctx) {
{ // the 1st field, uint32_t version
constexpr uint32_t version = 0;
os.write(reinterpret_cast<const char*>(&version), sizeof(version));
}
{ // the 2nd field, tensor description
// int32_t size
// void* protobuf message
proto::VarType::TensorDesc desc;
desc.set_data_type(framework::ToDataType(tensor.type()));
auto dims = framework::vectorize(tensor.dims());
auto* pb_dims = desc.mutable_dims();
pb_dims->Resize(static_cast<int>(dims.size()), 0);
std::copy(dims.begin(), dims.end(), pb_dims->begin());
int32_t size = desc.ByteSize();
os.write(reinterpret_cast<const char*>(&size), sizeof(size));
auto out = desc.SerializeAsString();
os.write(out.data(), size);
}
{ // the 3rd field, tensor data
uint64_t size = tensor.numel() * framework::SizeOfType(tensor.type());
auto* data_ptr = tensor.data<void>();
PADDLE_ENFORCE(size < std::numeric_limits<std::streamsize>::max(),
"Index overflow when writing tensor");
if (platform::is_gpu_place(tensor.place())) {
#ifdef PADDLE_WITH_CUDA
constexpr size_t kBufSize = 1024 * 1024 * 64; // 64MB
std::unique_ptr<char[]> buf(new char[kBufSize]);
auto& gpu_dev_ctx =
static_cast<const platform::CUDADeviceContext&>(dev_ctx);
platform::CPUPlace cpu;
uintptr_t data = reinterpret_cast<uintptr_t>(data_ptr);
while (size != 0) {
size_t size_to_write = std::min(kBufSize, static_cast<size_t>(size));
memory::Copy(cpu, buf.get(),
boost::get<platform::CUDAPlace>(tensor.place()),
reinterpret_cast<const void*>(data), size_to_write,
gpu_dev_ctx.stream());
gpu_dev_ctx.Wait();
os.write(buf.get(), size_to_write);
data += size_to_write;
size -= size_to_write;
}
#else
PADDLE_THROW("Unexpected branch");
#endif
} else {
os.write(static_cast<const char*>(data_ptr),
static_cast<std::streamsize>(size));
}
}
}
struct DeserializedDataFunctor {
DeserializedDataFunctor(void** buf, Tensor* tensor,
const platform::Place& place)
: buf_(buf), tensor_(tensor), place_(place) {}
template <typename T>
void apply() {
*buf_ = tensor_->mutable_data<T>(place_);
}
void** buf_;
Tensor* tensor_;
platform::Place place_;
};
void TensorFromStream(std::istream& is, Tensor* tensor,
const platform::DeviceContext& dev_ctx) {
uint32_t version;
is.read(reinterpret_cast<char*>(&version), sizeof(version));
PADDLE_ENFORCE_EQ(version, 0U, "Only version 0 is supported");
proto::VarType::TensorDesc desc;
{ // int32_t size
// proto buffer
int32_t size;
is.read(reinterpret_cast<char*>(&size), sizeof(size));
std::unique_ptr<char[]> buf(new char[size]);
is.read(reinterpret_cast<char*>(buf.get()), size);
PADDLE_ENFORCE(desc.ParseFromArray(buf.get(), size),
"Cannot parse tensor desc");
}
{ // read tensor
std::vector<int64_t> dims;
dims.reserve(static_cast<size_t>(desc.dims().size()));
std::copy(desc.dims().begin(), desc.dims().end(), std::back_inserter(dims));
tensor->Resize(framework::make_ddim(dims));
void* buf;
auto ctx = platform::CPUDeviceContext();
size_t size =
tensor->numel() *
framework::SizeOfType(framework::ToTypeIndex(desc.data_type()));
if (platform::is_gpu_place(dev_ctx.GetPlace())) {
#ifdef PADDLE_WITH_CUDA
Tensor cpu_tensor;
cpu_tensor.Resize(framework::make_ddim(dims));
framework::VisitDataType(
desc.data_type(),
DeserializedDataFunctor(&buf, &cpu_tensor, ctx.GetPlace()));
is.read(static_cast<char*>(buf), size);
auto dst_place = dev_ctx.GetPlace();
framework::TensorCopy(cpu_tensor, dst_place, dev_ctx, tensor);
#else
PADDLE_THROW("Unexpected branch");
#endif
} else {
framework::VisitDataType(
desc.data_type(),
DeserializedDataFunctor(&buf, tensor, ctx.GetPlace()));
is.read(static_cast<char*>(buf), size);
}
}
}
} // namespace framework
} // namespace paddle
......@@ -39,7 +39,7 @@ void ThreadPool::Init() {
int num_threads = std::thread::hardware_concurrency();
if (FLAGS_dist_threadpool_size > 0) {
num_threads = FLAGS_dist_threadpool_size;
VLOG(1) << "set dist_threadpool_size to " << num_threads;
VLOG(10) << "set dist_threadpool_size to " << num_threads;
}
PADDLE_ENFORCE_GT(num_threads, 0);
threadpool_.reset(new ThreadPool(num_threads));
......
......@@ -61,10 +61,10 @@ size_t VarDesc::GetTensorDescNum() const {
void VarDesc::SetShapes(
const std::vector<std::vector<int64_t>> &multiple_dims) {
if (multiple_dims.size() != GetTensorDescNum()) {
VLOG(3) << "WARNING: The number of given shapes(" << multiple_dims.size()
<< ") doesn't match the existing tensor number("
<< GetTensorDescNum()
<< "). The Reader is going to be reinitialized.";
VLOG(30) << "WARNING: The number of given shapes(" << multiple_dims.size()
<< ") doesn't match the existing tensor number("
<< GetTensorDescNum()
<< "). The Reader is going to be reinitialized.";
SetTensorDescNum(multiple_dims.size());
}
std::vector<proto::VarType::TensorDesc *> tensors = mutable_tensor_descs();
......@@ -94,11 +94,11 @@ void VarDesc::SetDataType(proto::VarType::Type data_type) {
void VarDesc::SetDataTypes(
const std::vector<proto::VarType::Type> &multiple_data_type) {
if (multiple_data_type.size() != GetTensorDescNum()) {
VLOG(3) << "WARNING: The number of given data types("
<< multiple_data_type.size()
<< ") doesn't match the existing tensor number("
<< GetTensorDescNum()
<< "). The Reader is going to be reinitialized.";
VLOG(30) << "WARNING: The number of given data types("
<< multiple_data_type.size()
<< ") doesn't match the existing tensor number("
<< GetTensorDescNum()
<< "). The Reader is going to be reinitialized.";
SetTensorDescNum(multiple_data_type.size());
}
std::vector<proto::VarType::TensorDesc *> tensor_descs =
......@@ -139,11 +139,11 @@ void VarDesc::SetLoDLevel(int32_t lod_level) {
void VarDesc::SetLoDLevels(const std::vector<int32_t> &multiple_lod_level) {
if (multiple_lod_level.size() != GetTensorDescNum()) {
VLOG(3) << "WARNING: The number of given lod_levels("
<< multiple_lod_level.size()
<< ") doesn't match the existing tensor number("
<< GetTensorDescNum()
<< "). The Reader is going to be reinitialized.";
VLOG(30) << "WARNING: The number of given lod_levels("
<< multiple_lod_level.size()
<< ") doesn't match the existing tensor number("
<< GetTensorDescNum()
<< "). The Reader is going to be reinitialized.";
SetTensorDescNum(multiple_lod_level.size());
}
switch (desc_.type().type()) {
......
......@@ -60,7 +60,7 @@ class DfgPassManagerImpl final : public DfgPassManager {
private:
void AddPass(const std::string& name, AnalysisPass* pass) {
VLOG(3) << "Adding pass " << name;
VLOG(30) << "Adding pass " << name;
Register(name, pass);
AddGraphvizDebugerPass(pass);
}
......@@ -103,7 +103,7 @@ void Analyzer::Run(Argument* argument) {
std::vector<std::string> passes;
#ifdef PADDLE_WITH_MKLDNN
if (use_mkldnn_) {
VLOG(3) << "Adding MKL-DNN placement pass";
VLOG(30) << "Adding MKL-DNN placement pass";
passes.push_back("mkldnn_placement_pass");
}
#endif
......
......@@ -68,8 +68,8 @@ struct Argument {
key);
attrs_[key] = data;
attr_deleters_[key] = [data, key]() {
VLOG(3) << "xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx";
VLOG(3) << "argument delete attr: " << key;
VLOG(30) << "xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx";
VLOG(30) << "argument delete attr: " << key;
delete data;
};
}
......
......@@ -132,7 +132,7 @@ void DataFlowGraph::Build(const framework::ir::Graph &graph) {
Node *x{nullptr};
if (ir_node->IsOp()) {
PADDLE_ENFORCE(ir_node->Op());
VLOG(4) << "get op " << ir_node << " " << ir_node->Name();
VLOG(40) << "get op " << ir_node << " " << ir_node->Name();
x = nodes.Create(Node::Type::kFunction);
x->attr("ir_node").Pointer() = ir_node;
PADDLE_ENFORCE(ir_node->Op()->Proto());
......@@ -141,7 +141,7 @@ void DataFlowGraph::Build(const framework::ir::Graph &graph) {
} else if (ir_node->IsVar()) {
// Not create a Node for IR ControlDepVar, considering Inference currently
// just used in single thread scenerio.
VLOG(4) << "get var " << ir_node->Name();
VLOG(40) << "get var " << ir_node->Name();
x = nodes.Create(Node::Type::kValue);
x->attr("ir_node").Pointer() = ir_node;
x->SetName(ir_node->Name());
......@@ -151,9 +151,9 @@ void DataFlowGraph::Build(const framework::ir::Graph &graph) {
}
ir_node_map.emplace(ir_node, x);
}
VLOG(4) << "finish creating Nodes";
VLOG(40) << "finish creating Nodes";
VLOG(4) << "to create edge";
VLOG(40) << "to create edge";
// Create links
for (auto *ir_node : graph.Nodes()) {
auto it = ir_node_map.find(ir_node);
......@@ -175,7 +175,7 @@ void DataFlowGraph::Build(const framework::ir::Graph &graph) {
"Can't deduce any inputs from the graph, Is the graph empty?");
ir_graph = &graph;
VLOG(3) << "finished build from IR";
VLOG(30) << "finished build from IR";
}
void DataFlowGraph::Clean() {
......
......@@ -239,9 +239,10 @@ void DataFlowGraphToFluidPass::AddEngineOp(Node *node) {
framework::BlockDesc block_desc(nullptr, &proto);
block_desc.Proto()->set_parent_idx(-1);
block_desc.Proto()->set_idx(0);
VLOG(4) << "origin variable size: "
<< argument_->origin_program_desc->blocks(0).vars().size();
VLOG(4) << "transformed variable size: " << block_desc.Proto()->vars().size();
VLOG(40) << "origin variable size: "
<< argument_->origin_program_desc->blocks(0).vars().size();
VLOG(40) << "transformed variable size: "
<< block_desc.Proto()->vars().size();
// copy ops.
for (auto *node : block_node->subgraph) {
......
......@@ -29,7 +29,7 @@ void DFG_GraphvizDrawPass::Run(DataFlowGraph *graph) {
auto png_path = dot_path.substr(0, dot_path.size() - 4) + ".png";
std::string message;
VLOG(3) << "draw to " << png_path;
VLOG(30) << "draw to " << png_path;
ExecShellCommand("dot -Tpng " + dot_path + " -o " + png_path, &message);
}
......
......@@ -29,7 +29,7 @@ void FluidToIrPass::EnableParamModify(const std::string &model_dir,
PADDLE_ENFORCE(argument_);
argument_->Set(framework::ir::kParamScopeAttr, new framework::Scope);
// Load parameters.
VLOG(3) << "Loading parameters from " << model_dir;
VLOG(30) << "Loading parameters from " << model_dir;
LoadParams(&argument_->Get<framework::Scope>(framework::ir::kParamScopeAttr),
model_dir, prog_file, param_file);
}
......
......@@ -35,21 +35,21 @@ void ModelStorePass::Run(DataFlowGraph *x) {
std::stringstream ss;
// NOTE these commands only works on linux.
ss << "mkdir -p " << *argument_->model_output_store_path;
VLOG(3) << "run command: " << ss.str();
VLOG(30) << "run command: " << ss.str();
PADDLE_ENFORCE_EQ(system(ss.str().c_str()), 0);
ss.str("");
ss << "cp " << *argument_->fluid_model_dir << "/*"
<< " " << *argument_->model_output_store_path;
VLOG(3) << "run command: " << ss.str();
VLOG(30) << "run command: " << ss.str();
PADDLE_ENFORCE_EQ(system(ss.str().c_str()), 0);
// Store program
PADDLE_ENFORCE_NOT_NULL(argument_->transformed_program_desc,
"program desc is not transformed, should call "
"DataFlowGraphToFluidPass first.");
VLOG(3) << "store analyzed program to "
<< *argument_->model_output_store_path;
VLOG(30) << "store analyzed program to "
<< *argument_->model_output_store_path;
const std::string program_output_path =
*argument_->model_output_store_path + "/__model__";
std::ofstream file(program_output_path, std::ios::binary);
......
......@@ -23,7 +23,7 @@ namespace analysis {
bool PassManager::Initialize(Argument* argument) {
argument_ = argument;
for (auto& pass : data_) {
VLOG(3) << "Initializing pass [" << pass->repr() << "]";
VLOG(30) << "Initializing pass [" << pass->repr() << "]";
if (!pass->Initialize(argument)) {
LOG(ERROR) << "Failed to initialize pass [" << pass->repr() << "]";
return false;
......@@ -34,7 +34,7 @@ bool PassManager::Initialize(Argument* argument) {
void DfgPassManager::RunAll() {
PADDLE_ENFORCE(argument_);
VLOG(3) << "Total " << data_.size() << " Analysys passes";
VLOG(30) << "Total " << data_.size() << " Analysys passes";
for (auto& pass : data_) {
string::PrettyLogEndl(string::Style::H1(), "* Running Analysis pass [%s]",
pass->repr());
......
......@@ -232,7 +232,7 @@ std::vector<std::vector<Node *>> SubGraphSplitter::ExtractSubGraphs() {
BriefNode *brief_node = itr.second;
if (!brief_node->node->attr(kMarkerAttrName).Bool()) {
VLOG(4) << brief_node->node->id() << " node not a trt candicate.";
VLOG(40) << brief_node->node->id() << " node not a trt candicate.";
continue;
}
......
......@@ -25,9 +25,9 @@ TensorRTSubGraphPass::TensorRTSubGraphPass(
void TensorRTSubGraphPass::Run(DataFlowGraph *graph) {
SubGraphFuse(graph, node_inside_subgraph_teller_, argument_)();
VLOG(4) << "debug info "
<< graph->HumanReadableInfo(false /*show_values*/,
true /*show_functions*/);
VLOG(40) << "debug info "
<< graph->HumanReadableInfo(false /*show_values*/,
true /*show_functions*/);
}
} // namespace analysis
......
......@@ -38,7 +38,7 @@ using contrib::AnalysisConfig;
bool AnalysisPredictor::Init(
const std::shared_ptr<framework::Scope> &parent_scope,
const std::shared_ptr<framework::ProgramDesc> &program) {
VLOG(3) << "Predictor::init()";
VLOG(30) << "Predictor::init()";
#if !defined(_WIN32)
if (FLAGS_profile) {
LOG(WARNING) << "Profiler is actived, might affect the performance";
......@@ -89,7 +89,7 @@ bool AnalysisPredictor::Init(
bool AnalysisPredictor::Run(const std::vector<PaddleTensor> &inputs,
std::vector<PaddleTensor> *output_data,
int batch_size) {
VLOG(3) << "Predictor::predict";
VLOG(30) << "Predictor::predict";
inference::Timer timer;
timer.tic();
// set feed variable
......@@ -109,7 +109,7 @@ bool AnalysisPredictor::Run(const std::vector<PaddleTensor> &inputs,
LOG(ERROR) << "fail to get fetches";
return false;
}
VLOG(3) << "predict cost: " << timer.toc() << "ms";
VLOG(30) << "predict cost: " << timer.toc() << "ms";
// Fix TensorArray reuse not cleaned bug.
tensor_array_batch_cleaner_.CollectTensorArrays(scope_.get());
......@@ -119,7 +119,7 @@ bool AnalysisPredictor::Run(const std::vector<PaddleTensor> &inputs,
bool AnalysisPredictor::SetFeed(const std::vector<PaddleTensor> &inputs,
framework::Scope *scope) {
VLOG(3) << "Predictor::set_feed";
VLOG(30) << "Predictor::set_feed";
if (inputs.size() != feeds_.size()) {
LOG(ERROR) << "wrong feed input size, need " << feeds_.size() << " but get "
<< inputs.size();
......@@ -184,7 +184,7 @@ void AnalysisPredictor::GetFetchOne(const framework::LoDTensor &fetch,
bool AnalysisPredictor::GetFetch(std::vector<PaddleTensor> *outputs,
framework::Scope *scope) {
VLOG(3) << "Predictor::get_fetch";
VLOG(30) << "Predictor::get_fetch";
outputs->resize(fetchs_.size());
for (size_t i = 0; i < fetchs_.size(); ++i) {
int idx = boost::get<int>(fetchs_[i]->GetAttr("col"));
......@@ -246,7 +246,7 @@ void AnalysisPredictor::OptimizeInferenceProgram() {
}
CHECK(argument_.transformed_program_desc);
VLOG(5) << "to prepare executor";
VLOG(50) << "to prepare executor";
inference_program_.reset(
new framework::ProgramDesc(*argument_.transformed_program_desc));
if (argument_.Has(framework::ir::kParamScopeAttr)) {
......@@ -260,7 +260,7 @@ void AnalysisPredictor::OptimizeInferenceProgram() {
template <>
std::unique_ptr<PaddlePredictor> CreatePaddlePredictor<
AnalysisConfig, PaddleEngineKind::kAnalysis>(const AnalysisConfig &config) {
VLOG(3) << "create AnalysisConfig";
VLOG(30) << "create AnalysisConfig";
if (config.use_gpu) {
// 1. GPU memeroy
PADDLE_ENFORCE_GT(
......@@ -274,7 +274,7 @@ std::unique_ptr<PaddlePredictor> CreatePaddlePredictor<
std::string flag = "--fraction_of_gpu_memory_to_use=" +
std::to_string(config.fraction_of_gpu_memory);
flags.push_back(flag);
VLOG(3) << "set flag: " << flag;
VLOG(30) << "set flag: " << flag;
framework::InitGflags(flags);
}
}
......
......@@ -50,7 +50,7 @@ template <typename Target>
bool PaddleInferenceAnakinPredictor<Target>::Init(
const contrib::AnakinConfig &config) {
if (!(graph_.load(config.model_file))) {
VLOG(3) << "fail to load graph from " << config.model_file;
VLOG(30) << "fail to load graph from " << config.model_file;
return false;
}
auto inputs = graph_.get_ins();
......@@ -76,15 +76,15 @@ bool PaddleInferenceAnakinPredictor<Target>::Run(
std::vector<PaddleTensor> *output_data, int batch_size) {
for (const auto &input : inputs) {
if (input.dtype != PaddleDType::FLOAT32) {
VLOG(3) << "Only support float type inputs. " << input.name
<< "'s type is not float";
VLOG(30) << "Only support float type inputs. " << input.name
<< "'s type is not float";
return false;
}
auto d_tensor_in_p = executor_p_->get_in(input.name);
auto net_shape = d_tensor_in_p->shape();
if (net_shape.size() != input.shape.size()) {
VLOG(3) << " input " << input.name
<< "'s shape size should be equal to that of net";
VLOG(30) << " input " << input.name
<< "'s shape size should be equal to that of net";
return false;
}
int sum = 1;
......@@ -105,15 +105,15 @@ bool PaddleInferenceAnakinPredictor<Target>::Run(
if (input.lod.size() > 0) {
if (input.lod.size() > 1) {
VLOG(3) << " input lod first dim should <=1, but you set "
<< input.lod.size();
VLOG(30) << " input lod first dim should <=1, but you set "
<< input.lod.size();
return false;
}
std::vector<int> offset(input.lod[0].begin(), input.lod[0].end());
d_tensor_in_p->set_seq_offset(offset);
VLOG(3) << "offset.size(): " << offset.size();
VLOG(30) << "offset.size(): " << offset.size();
for (int i = 0; i < offset.size(); i++) {
VLOG(3) << offset[i];
VLOG(30) << offset[i];
}
}
......@@ -124,7 +124,7 @@ bool PaddleInferenceAnakinPredictor<Target>::Run(
if (cudaMemcpy(d_data_p, static_cast<float *>(input.data.data()),
d_tensor_in_p->valid_size() * sizeof(float),
cudaMemcpyHostToDevice) != 0) {
VLOG(3) << "copy data from CPU to GPU error";
VLOG(30) << "copy data from CPU to GPU error";
return false;
}
}
......@@ -141,7 +141,7 @@ bool PaddleInferenceAnakinPredictor<Target>::Run(
#endif
if (output_data->empty()) {
VLOG(3) << "At least one output should be set with tensors' names.";
VLOG(30) << "At least one output should be set with tensors' names.";
return false;
}
for (auto &output : *output_data) {
......@@ -157,7 +157,7 @@ bool PaddleInferenceAnakinPredictor<Target>::Run(
if (cudaMemcpy(output.data.data(), tensor->mutable_data(),
tensor->valid_size() * sizeof(float),
cudaMemcpyDeviceToHost) != 0) {
VLOG(3) << "copy data from GPU to CPU error";
VLOG(30) << "copy data from GPU to CPU error";
return false;
}
}
......@@ -181,14 +181,14 @@ anakin::Net<Target, anakin::saber::AK_FLOAT, anakin::Precision::FP32>
template <typename Target>
std::unique_ptr<PaddlePredictor>
PaddleInferenceAnakinPredictor<Target>::Clone() {
VLOG(3) << "Anakin Predictor::clone";
VLOG(30) << "Anakin Predictor::clone";
std::unique_ptr<PaddlePredictor> cls(
new PaddleInferenceAnakinPredictor<Target>());
// construct executer from other graph
auto anakin_predictor_p =
dynamic_cast<PaddleInferenceAnakinPredictor<Target> *>(cls.get());
if (!anakin_predictor_p) {
VLOG(3) << "fail to call Init";
VLOG(30) << "fail to call Init";
return nullptr;
}
anakin_predictor_p->get_executer().init(graph_);
......@@ -206,10 +206,10 @@ template <>
std::unique_ptr<PaddlePredictor>
CreatePaddlePredictor<contrib::AnakinConfig, PaddleEngineKind::kAnakin>(
const contrib::AnakinConfig &config) {
VLOG(3) << "Anakin Predictor create.";
VLOG(30) << "Anakin Predictor create.";
if (config.target_type == contrib::AnakinConfig::NVGPU) {
#ifdef PADDLE_WITH_CUDA
VLOG(3) << "Anakin Predictor create on [ NVIDIA GPU ].";
VLOG(30) << "Anakin Predictor create on [ NVIDIA GPU ].";
std::unique_ptr<PaddlePredictor> x(
new PaddleInferenceAnakinPredictor<anakin::NV>(config));
return x;
......@@ -218,12 +218,12 @@ CreatePaddlePredictor<contrib::AnakinConfig, PaddleEngineKind::kAnakin>(
return nullptr;
#endif
} else if (config.target_type == contrib::AnakinConfig::X86) {
VLOG(3) << "Anakin Predictor create on [ Intel X86 ].";
VLOG(30) << "Anakin Predictor create on [ Intel X86 ].";
std::unique_ptr<PaddlePredictor> x(
new PaddleInferenceAnakinPredictor<anakin::X86>(config));
return x;
} else {
VLOG(3) << "Anakin Predictor create on unknown platform.";
VLOG(30) << "Anakin Predictor create on unknown platform.";
return nullptr;
}
}
......
......@@ -63,7 +63,7 @@ void NativePaddlePredictor::PrepareFeedFetch() {
bool NativePaddlePredictor::Init(
std::shared_ptr<framework::Scope> parent_scope) {
VLOG(3) << "Predictor::init()";
VLOG(30) << "Predictor::init()";
#if !defined(_WIN32)
if (FLAGS_profile) {
LOG(WARNING) << "Profiler is actived, might affect the performance";
......@@ -135,7 +135,7 @@ NativePaddlePredictor::~NativePaddlePredictor() {
bool NativePaddlePredictor::Run(const std::vector<PaddleTensor> &inputs,
std::vector<PaddleTensor> *output_data,
int batch_size) {
VLOG(3) << "Predictor::predict";
VLOG(30) << "Predictor::predict";
Timer timer;
timer.tic();
// set feed variable
......@@ -147,17 +147,17 @@ bool NativePaddlePredictor::Run(const std::vector<PaddleTensor> &inputs,
}
// Run the inference program
// if share variables, we need not create variables
VLOG(4) << "Run prepared context";
VLOG(40) << "Run prepared context";
executor_->RunPreparedContext(ctx_.get(), scope,
false, /* don't create local scope each time*/
false /* don't create variable each time */);
VLOG(4) << "Finish prepared context";
VLOG(40) << "Finish prepared context";
// get fetch variable
if (!GetFetch(output_data, scope)) {
LOG(ERROR) << "fail to get fetches";
return false;
}
VLOG(3) << "predict cost: " << timer.toc() << "ms";
VLOG(30) << "predict cost: " << timer.toc() << "ms";
// Fix TensorArray reuse not cleaned bug.
tensor_array_batch_cleaner_.CollectTensorArrays(scope_.get());
......@@ -166,7 +166,7 @@ bool NativePaddlePredictor::Run(const std::vector<PaddleTensor> &inputs,
}
std::unique_ptr<PaddlePredictor> NativePaddlePredictor::Clone() {
VLOG(3) << "Predictor::clone";
VLOG(30) << "Predictor::clone";
std::unique_ptr<PaddlePredictor> cls(new NativePaddlePredictor(config_));
if (!dynamic_cast<NativePaddlePredictor *>(cls.get())->Init(scope_)) {
......@@ -184,7 +184,7 @@ std::unique_ptr<PaddlePredictor> NativePaddlePredictor::Clone() {
bool NativePaddlePredictor::SetFeed(const std::vector<PaddleTensor> &inputs,
framework::Scope *scope) {
VLOG(3) << "Predictor::set_feed";
VLOG(30) << "Predictor::set_feed";
if (inputs.size() != feeds_.size()) {
LOG(ERROR) << "wrong feed input size, need " << feeds_.size() << " but get "
<< inputs.size();
......@@ -244,7 +244,7 @@ void NativePaddlePredictor::GetFetchOne(const framework::LoDTensor &fetch,
bool NativePaddlePredictor::GetFetch(std::vector<PaddleTensor> *outputs,
framework::Scope *scope) {
VLOG(3) << "Predictor::get_fetch";
VLOG(30) << "Predictor::get_fetch";
outputs->resize(fetchs_.size());
for (size_t i = 0; i < fetchs_.size(); ++i) {
int idx = boost::get<int>(fetchs_[i]->GetAttr("col"));
......@@ -269,7 +269,7 @@ bool NativePaddlePredictor::GetFetch(std::vector<PaddleTensor> *outputs,
template <>
std::unique_ptr<PaddlePredictor> CreatePaddlePredictor<
NativeConfig, PaddleEngineKind::kNative>(const NativeConfig &config) {
VLOG(3) << "create NativePaddlePredictor";
VLOG(30) << "create NativePaddlePredictor";
if (config.use_gpu) {
// 1. GPU memeroy
PADDLE_ENFORCE_GT(
......@@ -283,7 +283,7 @@ std::unique_ptr<PaddlePredictor> CreatePaddlePredictor<
std::string flag = "--fraction_of_gpu_memory_to_use=" +
num2str<float>(config.fraction_of_gpu_memory);
flags.push_back(flag);
VLOG(3) << "set flag: " << flag;
VLOG(30) << "set flag: " << flag;
framework::InitGflags(flags);
}
}
......
......@@ -34,7 +34,7 @@ class TensorRTSubgraphPredictor : public NativePaddlePredictor {
bool Init(const std::shared_ptr<framework::Scope>& parent_scope) {
FLAGS_IA_enable_tensorrt_subgraph_engine = true;
VLOG(3) << "Predictor::init()";
VLOG(30) << "Predictor::init()";
if (config_.use_gpu) {
place_ = paddle::platform::CUDAPlace(config_.device);
} else {
......@@ -70,7 +70,7 @@ class TensorRTSubgraphPredictor : public NativePaddlePredictor {
OptimizeInferenceProgram();
ctx_ = executor_->Prepare(*inference_program_, 0);
VLOG(5) << "to create variables";
VLOG(50) << "to create variables";
executor_->CreateVariables(*inference_program_,
sub_scope_ ? sub_scope_ : scope_.get(), 0);
// Get the feed_target_names and fetch_target_names
......@@ -114,9 +114,9 @@ class TensorRTSubgraphPredictor : public NativePaddlePredictor {
new ProgramDesc(*inference_program_->Proto()));
Singleton<Analyzer>::Global().Run(&argument);
CHECK(argument.transformed_program_desc);
VLOG(5) << "transformed program:\n"
<< argument.transformed_program_desc->SerializeAsString();
VLOG(5) << "to prepare executor";
VLOG(50) << "transformed program:\n"
<< argument.transformed_program_desc->SerializeAsString();
VLOG(50) << "to prepare executor";
inference_program_.reset(
new framework::ProgramDesc(*argument.transformed_program_desc));
}
......@@ -129,7 +129,7 @@ template <>
std::unique_ptr<PaddlePredictor>
CreatePaddlePredictor<MixedRTConfig, PaddleEngineKind::kAutoMixedTensorRT>(
const MixedRTConfig& config) {
VLOG(3) << "create TensorRTSubgraphPredictor";
VLOG(30) << "create TensorRTSubgraphPredictor";
if (config.use_gpu) {
// 1. GPU memeroy
PADDLE_ENFORCE_GT(
......@@ -143,7 +143,7 @@ CreatePaddlePredictor<MixedRTConfig, PaddleEngineKind::kAutoMixedTensorRT>(
std::string flag = "--fraction_of_gpu_memory_to_use=" +
std::to_string(config.fraction_of_gpu_memory);
flags.push_back(flag);
VLOG(3) << "set flag: " << flag;
VLOG(30) << "set flag: " << flag;
framework::InitGflags(flags);
}
}
......
......@@ -45,7 +45,7 @@ void Main() {
config.fraction_of_gpu_memory = 0.1; // set by yourself
predictor = CreatePaddlePredictor<paddle::contrib::MixedRTConfig>(config);
VLOG(3) << "begin to process data";
VLOG(30) << "begin to process data";
// Just a single batch of data.
std::string line;
std::ifstream file(FLAGS_data);
......@@ -60,13 +60,13 @@ void Main() {
PaddleBuf(record.data.data(), record.data.size() * sizeof(float));
input.dtype = PaddleDType::FLOAT32;
VLOG(3) << "run executor";
VLOG(30) << "run executor";
std::vector<PaddleTensor> output;
predictor->Run({input}, &output, 1);
VLOG(3) << "output.size " << output.size();
VLOG(30) << "output.size " << output.size();
auto& tensor = output.front();
VLOG(3) << "output: " << SummaryTensor(tensor);
VLOG(30) << "output: " << SummaryTensor(tensor);
// compare with reference result
CheckOutput(FLAGS_refer, tensor);
......
......@@ -47,7 +47,7 @@ static void split(const std::string& str, char sep,
}
Record ProcessALine(const std::string& line) {
VLOG(3) << "process a line";
VLOG(30) << "process a line";
std::vector<std::string> columns;
split(line, '\t', &columns);
CHECK_EQ(columns.size(), 2UL)
......@@ -65,8 +65,8 @@ Record ProcessALine(const std::string& line) {
for (auto& s : shape_strs) {
record.shape.push_back(std::stoi(s));
}
VLOG(3) << "data size " << record.data.size();
VLOG(3) << "data shape size " << record.shape.size();
VLOG(30) << "data size " << record.data.size();
VLOG(30) << "data shape size " << record.shape.size();
return record;
}
......@@ -78,8 +78,8 @@ void CheckOutput(const std::string& referfile, const PaddleTensor& output) {
file.close();
size_t numel = output.data.length() / PaddleDtypeSize(output.dtype);
VLOG(3) << "predictor output numel " << numel;
VLOG(3) << "reference output numel " << refer.data.size();
VLOG(30) << "predictor output numel " << numel;
VLOG(30) << "reference output numel " << refer.data.size();
CHECK_EQ(numel, refer.data.size());
switch (output.dtype) {
case PaddleDType::INT64: {
......
......@@ -49,11 +49,11 @@ void Main(bool use_gpu) {
config.fraction_of_gpu_memory = 0.1; // set by yourself
}
VLOG(3) << "init predictor";
VLOG(30) << "init predictor";
predictor = CreatePaddlePredictor<NativeConfig>(config);
analysis_predictor = CreatePaddlePredictor<AnalysisConfig>(config);
VLOG(3) << "begin to process data";
VLOG(30) << "begin to process data";
// Just a single batch of data.
std::string line;
std::ifstream file(FLAGS_data);
......@@ -68,13 +68,13 @@ void Main(bool use_gpu) {
PaddleBuf(record.data.data(), record.data.size() * sizeof(float));
input.dtype = PaddleDType::FLOAT32;
VLOG(3) << "run executor";
VLOG(30) << "run executor";
std::vector<PaddleTensor> output, analysis_output;
predictor->Run({input}, &output, 1);
VLOG(3) << "output.size " << output.size();
VLOG(30) << "output.size " << output.size();
auto& tensor = output.front();
VLOG(3) << "output: " << SummaryTensor(tensor);
VLOG(30) << "output: " << SummaryTensor(tensor);
// compare with reference result
CheckOutput(FLAGS_refer, tensor);
......
......@@ -26,7 +26,7 @@ void TensorArrayBatchCleaner::CollectTensorArrays(framework::Scope *scope) {
// parameter.
if (var_name == "feed" || var_name == "fetch") continue;
if (var->Type() == typeid(framework::LoDTensorArray)) {
VLOG(4) << "collect " << var_name;
VLOG(40) << "collect " << var_name;
arrays_.push_back(var->GetMutable<framework::LoDTensorArray>());
}
}
......@@ -34,7 +34,7 @@ void TensorArrayBatchCleaner::CollectTensorArrays(framework::Scope *scope) {
CollectTensorArrays(kid);
}
VLOG(3) << "Collect " << arrays_.size() << " arrays";
VLOG(30) << "Collect " << arrays_.size() << " arrays";
flag_ = false;
}
}
......
......@@ -77,7 +77,7 @@ void LoadPersistables(framework::Executor* executor, framework::Scope* scope,
for (auto* var : global_block.AllVars()) {
if (IsPersistable(var)) {
VLOG(3) << "persistable variable's name: " << var->Name();
VLOG(30) << "persistable variable's name: " << var->Name();
framework::VarDesc* new_var = load_block->Var(var->Name());
new_var->SetShape(var->GetShape());
......@@ -120,7 +120,7 @@ std::unique_ptr<framework::ProgramDesc> Load(framework::Executor* executor,
const std::string& dirname) {
std::string model_filename = dirname + "/__model__";
std::string program_desc_str;
VLOG(3) << "loading model from " << model_filename;
VLOG(30) << "loading model from " << model_filename;
ReadBinaryFile(model_filename, &program_desc_str);
std::unique_ptr<framework::ProgramDesc> main_program(
......
......@@ -25,7 +25,7 @@ class ConcatOpConverter : public OpConverter {
public:
void operator()(const framework::proto::OpDesc& op,
const framework::Scope& scope, bool test_mode) override {
VLOG(4) << "convert a fluid mul op to tensorrt mul layer without bias";
VLOG(40) << "convert a fluid mul op to tensorrt mul layer without bias";
framework::OpDesc op_desc(op, nullptr);
// Declare inputs
......
......@@ -25,7 +25,7 @@ class DropoutOpConverter : public OpConverter {
public:
void operator()(const framework::proto::OpDesc& op,
const framework::Scope& scope, bool test_mode) override {
VLOG(4) << "convert a fluid dropout op to tensorrt dropout layer";
VLOG(40) << "convert a fluid dropout op to tensorrt dropout layer";
framework::OpDesc op_desc(op, nullptr);
// Declare inputs
auto* input1 = engine_->GetITensor(op_desc.Input("X")[0]);
......
......@@ -52,7 +52,7 @@ class FcOpConverter : public OpConverter {
public:
void operator()(const framework::proto::OpDesc& op,
const framework::Scope& scope, bool test_mode) override {
VLOG(4) << "convert a fluid fc op to tensorrt fc layer without bias";
VLOG(40) << "convert a fluid fc op to tensorrt fc layer without bias";
framework::OpDesc op_desc(op, nullptr);
PADDLE_ENFORCE_EQ(op_desc.Input("X").size(), 1);
......
......@@ -25,7 +25,7 @@ class MulOpConverter : public OpConverter {
public:
void operator()(const framework::proto::OpDesc& op,
const framework::Scope& scope, bool test_mode) override {
VLOG(4) << "convert a fluid mul op to tensorrt mul layer without bias";
VLOG(40) << "convert a fluid mul op to tensorrt mul layer without bias";
framework::OpDesc op_desc(op, nullptr);
// Declare inputs
......
......@@ -25,7 +25,7 @@ class PadOpConverter : public OpConverter {
public:
void operator()(const framework::proto::OpDesc& op,
const framework::Scope& scope, bool test_mode) override {
VLOG(4) << "convert a fluid transpose op to tensorrt tranpose layer";
VLOG(40) << "convert a fluid transpose op to tensorrt tranpose layer";
framework::OpDesc op_desc(op, nullptr);
// Declare inputs
......
......@@ -25,7 +25,7 @@ class Pool2dOpConverter : public OpConverter {
public:
void operator()(const framework::proto::OpDesc& op,
const framework::Scope& scope, bool test_mode) override {
VLOG(4)
VLOG(40)
<< "convert a fluid pool2d op to tensorrt pool2d layer without bias";
framework::OpDesc op_desc(op, nullptr);
// Declare inputs
......
......@@ -25,7 +25,7 @@ class SoftMaxOpConverter : public OpConverter {
public:
void operator()(const framework::proto::OpDesc& op,
const framework::Scope& scope, bool test_mode) override {
VLOG(4)
VLOG(40)
<< "convert a fluid softmax op to tensorrt softmax layer without bias";
framework::OpDesc op_desc(op, nullptr);
// Declare inputs
......
......@@ -217,9 +217,9 @@ void single_test() {
LOG(INFO) << "sequence_length = " << seq_offset[seq_offset.size() - 1];
float* data_o = static_cast<float*>(outputs[0].data.data());
VLOG(3) << "outputs[0].data.length() = " << outputs[0].data.length();
VLOG(30) << "outputs[0].data.length() = " << outputs[0].data.length();
for (size_t j = 0; j < outputs[0].data.length(); ++j) {
VLOG(3) << "output[" << j << "]: " << data_o[j];
VLOG(30) << "output[" << j << "]: " << data_o[j];
}
}
}
......
......@@ -27,7 +27,7 @@ struct Record {
};
Record ProcessALine(const std::string &line) {
VLOG(3) << "process a line";
VLOG(30) << "process a line";
std::vector<std::string> columns;
split(line, '\t', &columns);
CHECK_EQ(columns.size(), 2UL)
......@@ -45,8 +45,8 @@ Record ProcessALine(const std::string &line) {
for (auto &s : shape_strs) {
record.shape.push_back(std::stoi(s));
}
VLOG(3) << "data size " << record.data.size();
VLOG(3) << "data shape size " << record.shape.size();
VLOG(30) << "data size " << record.data.size();
VLOG(30) << "data shape size " << record.shape.size();
return record;
}
......
......@@ -32,11 +32,11 @@ BuddyAllocator::BuddyAllocator(
system_allocator_(std::move(system_allocator)) {}
BuddyAllocator::~BuddyAllocator() {
VLOG(10) << "BuddyAllocator Disconstructor makes sure that all of these "
"have actually been freed";
VLOG(100) << "BuddyAllocator Disconstructor makes sure that all of these "
"have actually been freed";
while (!pool_.empty()) {
auto block = static_cast<MemoryBlock*>(std::get<2>(*pool_.begin()));
VLOG(10) << "Free from block (" << block << ", " << max_chunk_size_ << ")";
VLOG(100) << "Free from block (" << block << ", " << max_chunk_size_ << ")";
system_allocator_->Free(block, max_chunk_size_, block->index(cache_));
cache_.invalidate(block);
......@@ -57,12 +57,12 @@ void* BuddyAllocator::Alloc(size_t unaligned_size) {
// acquire the allocator lock
std::lock_guard<std::mutex> lock(mutex_);
VLOG(10) << "Allocate " << unaligned_size << " bytes from chunk size "
<< size;
VLOG(100) << "Allocate " << unaligned_size << " bytes from chunk size "
<< size;
// if the allocation is huge, send directly to the system allocator
if (size > max_chunk_size_) {
VLOG(10) << "Allocate from system allocator.";
VLOG(100) << "Allocate from system allocator.";
return SystemAlloc(size);
}
......@@ -77,9 +77,9 @@ void* BuddyAllocator::Alloc(size_t unaligned_size) {
return nullptr;
}
} else {
VLOG(10) << "Allocation from existing memory block " << std::get<2>(*it)
<< " at address "
<< reinterpret_cast<MemoryBlock*>(std::get<2>(*it))->data();
VLOG(100) << "Allocation from existing memory block " << std::get<2>(*it)
<< " at address "
<< reinterpret_cast<MemoryBlock*>(std::get<2>(*it))->data();
}
total_used_ += size;
......@@ -96,10 +96,10 @@ void BuddyAllocator::Free(void* p) {
// Acquire the allocator lock
std::lock_guard<std::mutex> lock(mutex_);
VLOG(10) << "Free from address " << block;
VLOG(100) << "Free from address " << block;
if (block->type(cache_) == MemoryBlock::HUGE_CHUNK) {
VLOG(10) << "Free directly from system allocator";
VLOG(100) << "Free directly from system allocator";
system_allocator_->Free(block, block->total_size(cache_),
block->index(cache_));
......@@ -116,8 +116,8 @@ void BuddyAllocator::Free(void* p) {
// Trying to merge the right buddy
if (block->has_right_buddy(cache_)) {
VLOG(10) << "Merging this block " << block << " with its right buddy "
<< block->right_buddy(cache_);
VLOG(100) << "Merging this block " << block << " with its right buddy "
<< block->right_buddy(cache_);
auto right_buddy = block->right_buddy(cache_);
......@@ -134,8 +134,8 @@ void BuddyAllocator::Free(void* p) {
// Trying to merge the left buddy
if (block->has_left_buddy(cache_)) {
VLOG(10) << "Merging this block " << block << " with its left buddy "
<< block->left_buddy(cache_);
VLOG(100) << "Merging this block " << block << " with its left buddy "
<< block->left_buddy(cache_);
auto left_buddy = block->left_buddy(cache_);
......@@ -151,8 +151,8 @@ void BuddyAllocator::Free(void* p) {
}
// Dumping this block into pool
VLOG(10) << "Inserting free block (" << block << ", "
<< block->total_size(cache_) << ")";
VLOG(100) << "Inserting free block (" << block << ", "
<< block->total_size(cache_) << ")";
pool_.insert(
IndexSizeAddress(block->index(cache_), block->total_size(cache_), block));
......@@ -174,7 +174,7 @@ void* BuddyAllocator::SystemAlloc(size_t size) {
size_t index = 0;
void* p = system_allocator_->Alloc(&index, size);
VLOG(10) << "Allocated " << p << " from system allocator.";
VLOG(100) << "Allocated " << p << " from system allocator.";
if (p == nullptr) return nullptr;
......@@ -200,8 +200,8 @@ BuddyAllocator::PoolSet::iterator BuddyAllocator::RefillPool() {
if (p == nullptr) return pool_.end();
VLOG(10) << "Creating and inserting new block " << p
<< " from system allocator";
VLOG(100) << "Creating and inserting new block " << p
<< " from system allocator";
static_cast<MemoryBlock*>(p)->init(&cache_, MemoryBlock::FREE_CHUNK, index,
max_chunk_size_, nullptr, nullptr);
......@@ -245,19 +245,19 @@ void* BuddyAllocator::SplitToAlloc(BuddyAllocator::PoolSet::iterator it,
auto block = static_cast<MemoryBlock*>(std::get<2>(*it));
pool_.erase(it);
VLOG(10) << "Split block (" << block << ", " << block->total_size(cache_)
<< ") into";
VLOG(100) << "Split block (" << block << ", " << block->total_size(cache_)
<< ") into";
block->split(&cache_, size);
VLOG(10) << "Left block (" << block << ", " << block->total_size(cache_)
<< ")";
VLOG(100) << "Left block (" << block << ", " << block->total_size(cache_)
<< ")";
block->set_type(&cache_, MemoryBlock::ARENA_CHUNK);
// the rest of memory if exist
if (block->has_right_buddy(cache_)) {
if (block->right_buddy(cache_)->type(cache_) == MemoryBlock::FREE_CHUNK) {
VLOG(10) << "Insert right block (" << block->right_buddy(cache_) << ", "
<< block->right_buddy(cache_)->total_size(cache_) << ")";
VLOG(100) << "Insert right block (" << block->right_buddy(cache_) << ", "
<< block->right_buddy(cache_)->total_size(cache_) << ")";
pool_.insert(
IndexSizeAddress(block->right_buddy(cache_)->index(cache_),
......@@ -284,7 +284,7 @@ void BuddyAllocator::CleanIdleFallBackAlloc() {
return;
}
VLOG(10) << "Return block " << block << " to fallback allocator.";
VLOG(100) << "Return block " << block << " to fallback allocator.";
system_allocator_->Free(block, max_chunk_size_, block->index(cache_));
cache_.invalidate(block);
......@@ -320,7 +320,7 @@ void BuddyAllocator::CleanIdleNormalAlloc() {
MemoryBlock* block = static_cast<MemoryBlock*>(std::get<2>(*pool));
VLOG(10) << "Return block " << block << " to base allocator.";
VLOG(100) << "Return block " << block << " to base allocator.";
system_allocator_->Free(block, max_chunk_size_, block->index(cache_));
cache_.invalidate(block);
......
......@@ -29,7 +29,7 @@ MemoryBlock::Desc MetadataCache::load(const MemoryBlock* block) const {
return existing_desc->second;
} else {
auto* desc = reinterpret_cast<const MemoryBlock::Desc*>(block);
VLOG(10) << "Load MemoryBlock::Desc type=" << desc->type;
VLOG(100) << "Load MemoryBlock::Desc type=" << desc->type;
PADDLE_ASSERT(desc->check_guards());
return *reinterpret_cast<const MemoryBlock::Desc*>(block);
}
......
......@@ -71,18 +71,18 @@ struct NaiveAllocator {
template <>
void* Alloc<platform::CPUPlace>(platform::CPUPlace place, size_t size) {
VLOG(10) << "Allocate " << size << " bytes on " << platform::Place(place);
VLOG(100) << "Allocate " << size << " bytes on " << platform::Place(place);
void* p = GetCPUBuddyAllocator()->Alloc(size);
if (FLAGS_init_allocated_mem) {
memset(p, 0xEF, size);
}
VLOG(10) << " pointer=" << p;
VLOG(100) << " pointer=" << p;
return p;
}
template <>
void Free<platform::CPUPlace>(platform::CPUPlace place, void* p) {
VLOG(10) << "Free pointer=" << p << " on " << platform::Place(place);
VLOG(100) << "Free pointer=" << p << " on " << platform::Place(place);
GetCPUBuddyAllocator()->Free(p);
}
......@@ -110,12 +110,12 @@ BuddyAllocator* GetGPUBuddyAllocator(int gpu_id) {
std::unique_ptr<detail::SystemAllocator>(new detail::GPUAllocator(i)),
platform::GpuMinChunkSize(), platform::GpuMaxChunkSize());
VLOG(10) << "\n\nNOTE: each GPU device use "
<< FLAGS_fraction_of_gpu_memory_to_use * 100
<< "% of GPU memory.\n"
<< "You can set GFlags environment variable '"
<< "FLAGS_fraction_of_gpu_memory_to_use"
<< "' to change the fraction of GPU usage.\n\n";
VLOG(100) << "\n\nNOTE: each GPU device use "
<< FLAGS_fraction_of_gpu_memory_to_use * 100
<< "% of GPU memory.\n"
<< "You can set GFlags environment variable '"
<< "FLAGS_fraction_of_gpu_memory_to_use"
<< "' to change the fraction of GPU usage.\n\n";
}
});
......
......@@ -95,7 +95,7 @@ class ActivationGradKernel
auto x = framework::EigenVector<T>::Flatten(*X);
functor(*place, x, out, dout, dx);
} else {
VLOG(10) << " Inplace activation ";
VLOG(100) << " Inplace activation ";
auto x = framework::EigenVector<T>::Flatten(*dX);
functor(*place, x, out, dout, dx);
}
......
......@@ -297,7 +297,7 @@ class AdamOpKernel : public framework::OpKernel<T> {
auto& grad =
Ref(ctx.Input<framework::SelectedRows>("Grad"), "Must set Grad");
if (grad.rows().size() == 0) {
VLOG(3) << "grad row size is 0!!";
VLOG(30) << "grad row size is 0!!";
return;
}
......
......@@ -49,7 +49,7 @@ class ArrayOp : public framework::OperatorBase {
} else {
offset = static_cast<size_t>(*i_tensor.data<int64_t>());
}
VLOG(10) << " Offset = " << offset;
VLOG(100) << " Offset = " << offset;
return offset;
}
};
......
......@@ -148,8 +148,8 @@ class ArrayToLoDTensorOp : public framework::OperatorBase {
size_t start_offset = lod_and_offset.second.first;
size_t end_offset = lod_and_offset.second.second;
VLOG(10) << "idx=" << idx << " x_idx=" << x_idx << " ["
<< ", " << end_offset << "]";
VLOG(100) << "idx=" << idx << " x_idx=" << x_idx << " ["
<< ", " << end_offset << "]";
// Copy data
PADDLE_ENFORCE_GE(end_offset, start_offset);
size_t len = end_offset - start_offset;
......
......@@ -96,7 +96,7 @@ class BatchNormKernel<platform::CUDADeviceContext, T>
mode_ = CUDNN_BATCHNORM_SPATIAL;
#endif
VLOG(3) << "Setting descriptors.";
VLOG(30) << "Setting descriptors.";
std::vector<int> dims;
std::vector<int> strides;
if (data_layout == DataLayout::kNCHW) {
......
......@@ -33,11 +33,11 @@ void BeamSearch::operator()(const framework::LoDTensor &pre_ids,
auto items = SelectTopBeamSizeItems(pre_ids, pre_scores);
auto selected_items = ToMap(items, high_level.back());
VLOG(3) << "selected_items:";
VLOG(30) << "selected_items:";
for (size_t i = 0; i < selected_items.size(); ++i) {
VLOG(3) << "offset:" << i;
VLOG(30) << "offset:" << i;
for (auto &item : selected_items[i]) {
VLOG(3) << ItemToString(item);
VLOG(30) << ItemToString(item);
}
}
......@@ -138,11 +138,11 @@ std::vector<std::vector<BeamSearch::Item>> BeamSearch::SelectTopBeamSizeItems(
}
result.emplace_back(items);
}
VLOG(3) << "SelectTopBeamSizeItems result size " << result.size();
VLOG(30) << "SelectTopBeamSizeItems result size " << result.size();
for (auto &items : result) {
VLOG(3) << "item set:";
VLOG(30) << "item set:";
for (auto &item : items) {
VLOG(3) << ItemToString(item);
VLOG(30) << ItemToString(item);
}
}
......
......@@ -46,8 +46,8 @@ class CheckpointNotifyOp : public framework::OperatorBase {
auto lookup_table_save_dir =
string::Sprintf("%s/%s_%d", dir, lookup_table_name, i);
rpc_client->AsyncCheckpointNotify(epmap[i], lookup_table_save_dir);
VLOG(3) << "checkpoint notify sending lookup table: " << lookup_table_name
<< " and dir:" << dir << " to " << epmap[i];
VLOG(30) << "checkpoint notify sending lookup table: "
<< lookup_table_name << " and dir:" << dir << " to " << epmap[i];
}
PADDLE_ENFORCE(rpc_client->Wait(), "internal error in RPCClient");
}
......
......@@ -37,7 +37,7 @@ class ConcatOp : public framework::OperatorWithKernel {
PADDLE_ENFORCE_GT(n, 0, "Input tensors count should > 0.");
if (n == 1) {
VLOG(3) << "Warning: concat op have only one input, may waste memory";
VLOG(30) << "Warning: concat op have only one input, may waste memory";
}
auto out_dims = ins[0];
......
......@@ -143,11 +143,11 @@ class CUDNNConvOpKernel : public framework::OpKernel<T> {
cudnn_conv_desc, CUDNN_TENSOR_OP_MATH));
// Currently tensor core is only enabled using this algo
algo = CUDNN_CONVOLUTION_FWD_ALGO_IMPLICIT_PRECOMP_GEMM;
VLOG(5) << "use cudnn_tensor_op_math";
VLOG(50) << "use cudnn_tensor_op_math";
} else {
CUDNN_ENFORCE(platform::dynload::cudnnSetConvolutionMathType(
cudnn_conv_desc, CUDNN_DEFAULT_MATH));
VLOG(5) << "NOT use cudnn_tensor_op_math";
VLOG(50) << "NOT use cudnn_tensor_op_math";
}
#endif
......
......@@ -133,10 +133,10 @@ void AsyncBRPCServer::StartServer() {
void AsyncBRPCServer::ShutDownImpl() { server_.Stop(1000); }
void AsyncBRPCServer::WaitServerReady() {
VLOG(3) << "AsyncGRPCServer is wait server ready";
VLOG(30) << "AsyncGRPCServer is wait server ready";
std::unique_lock<std::mutex> lock(this->mutex_ready_);
condition_ready_.wait(lock, [=] { return this->ready_ == 1; });
VLOG(3) << "AsyncGRPCServer WaitSeverReady";
VLOG(30) << "AsyncGRPCServer WaitSeverReady";
}
}; // namespace distributed
......
......@@ -38,7 +38,7 @@ void GRPCClient::SendComplete() {
std::unique_lock<std::mutex> lk(completed_mutex_);
if (!completed_) {
for (auto& it : channels_) {
VLOG(3) << "send complete message to " << it.first;
VLOG(30) << "send complete message to " << it.first;
this->AsyncSendComplete(it.first);
}
PADDLE_ENFORCE(this->Wait(), "internal grpc error");
......@@ -81,7 +81,7 @@ VarHandlePtr GRPCClient::AsyncSendVar(const std::string& ep,
::grpc::ByteBuffer req;
SerializeToByteBuffer(var_name_val, var, *p_ctx, &req, "", trainer_id_);
VLOG(3) << s->GetVarHandlePtr()->String() << " begin";
VLOG(30) << s->GetVarHandlePtr()->String() << " begin";
// stub context
s->response_call_back_ = nullptr;
......@@ -142,7 +142,7 @@ VarHandlePtr GRPCClient::AsyncGetVar(const std::string& ep,
::grpc::ByteBuffer buf;
RequestToByteBuffer<sendrecv::VariableMessage>(req, &buf);
VLOG(3) << s->GetVarHandlePtr()->String() << " begin";
VLOG(30) << s->GetVarHandlePtr()->String() << " begin";
// stub context
s->response_call_back_ = ProcGetResponse;
......@@ -190,7 +190,7 @@ VarHandlePtr GRPCClient::AsyncPrefetchVar(const std::string& ep,
::grpc::ByteBuffer req;
SerializeToByteBuffer(in_var_name_val, var, *p_ctx, &req, out_var_name_val);
VLOG(3) << s->GetVarHandlePtr()->String() << " begin";
VLOG(30) << s->GetVarHandlePtr()->String() << " begin";
// stub context
s->response_call_back_ = ProcGetResponse;
......@@ -328,14 +328,14 @@ void GRPCClient::Proceed() {
void* tag = nullptr;
bool ok = false;
VLOG(3) << "GRPCClient Proceed begin";
VLOG(30) << "GRPCClient Proceed begin";
while (!stopped_ && cq_.Next(&tag, &ok)) {
BaseProcessor* c = static_cast<BaseProcessor*>(tag);
GPR_ASSERT(ok);
PADDLE_ENFORCE(c);
if (c->status_.ok()) {
VLOG(3) << c->GetVarHandlePtr()->String() << " process";
VLOG(30) << c->GetVarHandlePtr()->String() << " process";
c->Process();
} else if (c->status_.error_code() == grpc::StatusCode::DEADLINE_EXCEEDED) {
// FIXME(gongwb): parse error_details?
......@@ -370,7 +370,7 @@ void GRPCClient::Proceed() {
sync_cond_.notify_all();
}
}
VLOG(3) << "GRPCClient Proceed end";
VLOG(30) << "GRPCClient Proceed end";
}
std::shared_ptr<grpc::Channel> GRPCClient::GetChannel(const std::string& ep) {
......
......@@ -98,7 +98,7 @@ class RequestSend final : public RequestBase {
void Process() override {
std::string varname = GetReqName();
VLOG(4) << "RequestSend var_name:" << varname;
VLOG(40) << "RequestSend var_name:" << varname;
auto scope = request_->GetMutableLocalScope();
auto invar = request_->GetVar();
......@@ -135,7 +135,7 @@ class RequestGet final : public RequestBase {
// proc request.
std::string varname = request_.varname();
int trainer_id = request_.trainer_id();
VLOG(4) << "RequestGet " << varname;
VLOG(40) << "RequestGet " << varname;
auto scope = request_handler_->scope();
auto invar = scope->FindVar(varname);
......@@ -182,8 +182,8 @@ class RequestPrefetch final : public RequestBase {
std::string in_var_name = request_->Varname();
std::string out_var_name = request_->OutVarname();
int trainer_id = request_->GetTrainerId();
VLOG(4) << "RequestPrefetch, in_var_name: " << in_var_name
<< " out_var_name: " << out_var_name;
VLOG(40) << "RequestPrefetch, in_var_name: " << in_var_name
<< " out_var_name: " << out_var_name;
auto scope = request_->GetMutableLocalScope();
auto invar = scope->FindVar(in_var_name);
......@@ -231,8 +231,8 @@ class RequestCheckpointNotify final : public RequestBase {
std::string checkpoint_dir = request_->OutVarname();
int trainer_id = request_->GetTrainerId();
VLOG(4) << "RequestCheckpointNotify notify: " << checkpoint_notify
<< ", dir: " << checkpoint_dir;
VLOG(40) << "RequestCheckpointNotify notify: " << checkpoint_notify
<< ", dir: " << checkpoint_dir;
request_handler_->Handle(checkpoint_notify, scope, nullptr, nullptr,
trainer_id, checkpoint_dir);
......@@ -246,10 +246,10 @@ class RequestCheckpointNotify final : public RequestBase {
};
void AsyncGRPCServer::WaitServerReady() {
VLOG(4) << "AsyncGRPCServer is wait server ready";
VLOG(40) << "AsyncGRPCServer is wait server ready";
std::unique_lock<std::mutex> lock(this->mutex_ready_);
condition_ready_.wait(lock, [=] { return this->ready_ == 1; });
VLOG(4) << "AsyncGRPCServer WaitSeverReady";
VLOG(40) << "AsyncGRPCServer WaitSeverReady";
}
void AsyncGRPCServer::StartServer() {
......@@ -282,14 +282,15 @@ void AsyncGRPCServer::StartServer() {
reqs.reserve(kRequestBufSize);
for (int i = 0; i < kRequestBufSize; i++) {
VLOG(6) << "TryToRegisterNewOne on RPC NAME: " << rpc_name << " I: " << i;
VLOG(60) << "TryToRegisterNewOne on RPC NAME: " << rpc_name
<< " I: " << i;
TryToRegisterNewOne(rpc_name, i);
}
for (int i = 0; i < threadnum; i++) {
rpc_threads_[rpc_name].emplace_back(new std::thread(std::bind(
&AsyncGRPCServer::HandleRequest, this, cq.get(), rpc_name, f)));
VLOG(4) << t.first << " creates threads!";
VLOG(40) << t.first << " creates threads!";
}
}
......@@ -306,7 +307,7 @@ void AsyncGRPCServer::StartServer() {
auto& threads = t.second;
for (size_t i = 0; i < threads.size(); ++i) {
threads[i]->join();
VLOG(4) << t.first << " threads ends!";
VLOG(40) << t.first << " threads ends!";
}
}
}
......@@ -314,7 +315,7 @@ void AsyncGRPCServer::StartServer() {
void AsyncGRPCServer::ShutdownQueue() {
for (auto& t : rpc_cq_) {
t.second->Shutdown();
VLOG(4) << t.first << " queue shutdown!";
VLOG(40) << t.first << " queue shutdown!";
}
}
......@@ -323,7 +324,7 @@ void AsyncGRPCServer::ShutDownImpl() {
is_shut_down_ = true;
ShutdownQueue();
VLOG(4) << "server_ shutdown!";
VLOG(40) << "server_ shutdown!";
server_->Shutdown();
}
......@@ -331,12 +332,12 @@ void AsyncGRPCServer::TryToRegisterNewOne(const std::string& rpc_name,
int req_id) {
std::unique_lock<std::mutex> lock(cq_mutex_);
if (is_shut_down_) {
VLOG(4) << "shutdown, do not TryToRegisterNewSendOne";
VLOG(40) << "shutdown, do not TryToRegisterNewSendOne";
return;
}
VLOG(4) << "TryToRegisterNewOne on RPC NAME: " << rpc_name
<< " REQ ID: " << req_id;
VLOG(40) << "TryToRegisterNewOne on RPC NAME: " << rpc_name
<< " REQ ID: " << req_id;
auto& reqs = rpc_reqs_[rpc_name];
auto& handler = rpc_call_map_[rpc_name];
......@@ -357,7 +358,7 @@ void AsyncGRPCServer::TryToRegisterNewOne(const std::string& rpc_name,
reqs[req_id] = b;
VLOG(4) << "Create RequestSend status:" << b->Status();
VLOG(40) << "Create RequestSend status:" << b->Status();
}
void AsyncGRPCServer::HandleRequest(
......@@ -367,15 +368,15 @@ void AsyncGRPCServer::HandleRequest(
bool ok = false;
while (true) {
VLOG(4) << "HandleRequest " << rpc_name << " wait next";
VLOG(40) << "HandleRequest " << rpc_name << " wait next";
if (!cq->Next(&tag, &ok)) {
VLOG(3) << "CompletionQueue " << rpc_name << " shutdown!";
VLOG(30) << "CompletionQueue " << rpc_name << " shutdown!";
break;
}
int req_id = static_cast<int>(reinterpret_cast<intptr_t>(tag));
VLOG(4) << "HandleRequest " << rpc_name << ", req_id:" << req_id
<< " get next";
VLOG(40) << "HandleRequest " << rpc_name << ", req_id:" << req_id
<< " get next";
auto& reqs = rpc_reqs_[rpc_name];
RequestBase* base = nullptr;
......@@ -385,7 +386,7 @@ void AsyncGRPCServer::HandleRequest(
base = reqs[req_id];
}
VLOG(3) << base->Status2String(rpc_name);
VLOG(30) << base->Status2String(rpc_name);
// reference:
// https://github.com/tensorflow/tensorflow/issues/5596
......
......@@ -75,7 +75,7 @@ class VarHandle {
wait_cond_.wait(lk, [this] { return status_ != kDefaultState; });
ret = status_;
}
VLOG(7) << "VarHandle wait:" << ret;
VLOG(70) << "VarHandle wait:" << ret;
return ret != kErrorState;
}
......@@ -84,7 +84,7 @@ class VarHandle {
std::unique_lock<std::mutex> lk(sync_mutex_);
status_ = ok ? kFinishState : kErrorState;
}
VLOG(7) << "VarHandle finish:" << ok;
VLOG(70) << "VarHandle finish:" << ok;
wait_cond_.notify_all();
}
......
......@@ -38,19 +38,19 @@ bool RequestSendHandler::Handle(const std::string& varname,
framework::Variable** outvar,
const int trainer_id,
const std::string& out_var_name) {
VLOG(4) << "RequestSendHandler:" << varname;
VLOG(40) << "RequestSendHandler:" << varname;
// Sync
if (varname == BATCH_BARRIER_MESSAGE) {
VLOG(3) << "sync: recv BATCH_BARRIER_MESSAGE";
VLOG(30) << "sync: recv BATCH_BARRIER_MESSAGE";
rpc_server_->IncreaseBatchBarrier(kRequestSend);
} else if (varname == COMPLETE_MESSAGE) {
VLOG(3) << "sync: recv complete message";
VLOG(30) << "sync: recv complete message";
rpc_server_->Complete();
} else {
// Async
if (!sync_mode_) {
VLOG(3) << "async process var: " << varname;
VLOG(30) << "async process var: " << varname;
try {
executor_->RunPreparedContext((*grad_to_prepared_ctx_)[varname].get(),
scope);
......@@ -61,7 +61,7 @@ bool RequestSendHandler::Handle(const std::string& varname,
return true;
} else { // sync
rpc_server_->WaitCond(kRequestSend);
VLOG(3) << "sync: processing received var: " << varname;
VLOG(30) << "sync: processing received var: " << varname;
if (invar == nullptr) {
LOG(FATAL) << "sync: Can not find server side var: " << varname;
......@@ -78,10 +78,10 @@ bool RequestGetHandler::Handle(const std::string& varname,
framework::Variable** outvar,
const int trainer_id,
const std::string& out_var_name) {
VLOG(4) << "RequestGetHandler:" << varname;
VLOG(40) << "RequestGetHandler:" << varname;
if (sync_mode_) {
if (varname == FETCH_BARRIER_MESSAGE) {
VLOG(3) << "sync: recv fetch barrier message";
VLOG(30) << "sync: recv fetch barrier message";
rpc_server_->IncreaseBatchBarrier(kRequestGet);
} else {
rpc_server_->WaitCond(kRequestGet);
......@@ -93,13 +93,14 @@ bool RequestGetHandler::Handle(const std::string& varname,
// NOTE: the format is determined by distributed_transpiler.py
std::string param_bak_name =
string::Sprintf("%s.trainer_%d_bak", varname, trainer_id);
VLOG(3) << "getting " << param_bak_name << " trainer_id " << trainer_id;
VLOG(30) << "getting " << param_bak_name << " trainer_id "
<< trainer_id;
auto var = scope_->FindVar(varname);
auto t_orig = var->Get<framework::LoDTensor>();
auto param_bak = scope_->Var(param_bak_name);
auto t = param_bak->GetMutable<framework::LoDTensor>();
t->mutable_data(dev_ctx_->GetPlace(), t_orig.type());
VLOG(3) << "copying " << varname << " to " << param_bak_name;
VLOG(30) << "copying " << varname << " to " << param_bak_name;
framework::TensorCopy(t_orig, dev_ctx_->GetPlace(), t);
}
*outvar = scope_->FindVar(varname);
......@@ -114,7 +115,7 @@ bool RequestPrefetchHandler::Handle(const std::string& varname,
framework::Variable** outvar,
const int trainer_id,
const std::string& out_var_name) {
VLOG(4) << "RequestPrefetchHandler " << varname;
VLOG(40) << "RequestPrefetchHandler " << varname;
auto var_desc = program_->Block(0).FindVar(out_var_name);
InitializeVariable(*outvar, var_desc->GetType());
......@@ -138,8 +139,8 @@ bool RequestCheckpointHandler::Handle(const std::string& varname,
auto* lt_var = scope_->FindVar(LOOKUP_TABLE_PATH)->GetMutable<std::string>();
lt_var->clear();
lt_var->append(out_var_name);
VLOG(4) << "RequestCheckpointHandler update var kLookupTablePath to: "
<< out_var_name;
VLOG(40) << "RequestCheckpointHandler update var kLookupTablePath to: "
<< out_var_name;
executor_->RunPreparedContext(checkpoint_prepared_ctx_.get(), scope_);
return true;
}
......
......@@ -39,7 +39,7 @@ void RPCServer::SavePort() const {
port_file.open(file_path);
port_file << selected_port_;
port_file.close();
VLOG(4) << "selected port written to " << file_path;
VLOG(40) << "selected port written to " << file_path;
}
void RPCServer::WaitBarrier(const std::string& rpc_name) {
......@@ -49,12 +49,12 @@ void RPCServer::WaitBarrier(const std::string& rpc_name) {
exit_flag_.load());
});
VLOG(3) << "batch_barrier_: " << rpc_name << " "
<< barrier_counter_[rpc_name];
VLOG(30) << "batch_barrier_: " << rpc_name << " "
<< barrier_counter_[rpc_name];
}
void RPCServer::IncreaseBatchBarrier(const std::string rpc_name) {
VLOG(4) << "RPCServer begin IncreaseBatchBarrier " << rpc_name;
VLOG(40) << "RPCServer begin IncreaseBatchBarrier " << rpc_name;
int b = 0;
std::unique_lock<std::mutex> lock(mutex_);
b = ++barrier_counter_[rpc_name];
......@@ -71,7 +71,7 @@ void RPCServer::Complete() {
client_num_--;
need_reset_all_vars_ = true;
VLOG(4) << "decrease client_num to: " << client_num_;
VLOG(40) << "decrease client_num to: " << client_num_;
if (cur_cond_.load() == rpc_cond_map_[kRequestGet]) {
barrier_counter_[kRequestGet]--;
}
......@@ -90,7 +90,7 @@ int RPCServer::GetClientNum() {
}
void RPCServer::ResetBarrierCounter() {
VLOG(3) << "RPCServer ResetBarrierCounter ";
VLOG(30) << "RPCServer ResetBarrierCounter ";
std::unique_lock<std::mutex> lock(mutex_);
for (auto& t : barrier_counter_) {
t.second = 0;
......@@ -105,12 +105,12 @@ void RPCServer::RegisterRPC(const std::string& rpc_name,
static int cond = -1;
rpc_cond_map_[rpc_name] = ++cond;
VLOG(4) << "RegisterRPC rpc_name:" << rpc_name << ", handler:" << handler
<< ", cond:" << rpc_cond_map_[rpc_name];
VLOG(40) << "RegisterRPC rpc_name:" << rpc_name << ", handler:" << handler
<< ", cond:" << rpc_cond_map_[rpc_name];
}
void RPCServer::SetCond(const std::string& rpc_name) {
VLOG(3) << "RPCServer SetCond " << rpc_name;
VLOG(30) << "RPCServer SetCond " << rpc_name;
{
std::unique_lock<std::mutex> lock(mutex_);
cur_cond_ = rpc_cond_map_[rpc_name];
......@@ -120,7 +120,7 @@ void RPCServer::SetCond(const std::string& rpc_name) {
}
void RPCServer::WaitCond(const std::string& rpc_name) {
VLOG(4) << "RPCServer WaitCond " << rpc_name;
VLOG(40) << "RPCServer WaitCond " << rpc_name;
int cond = 0;
{
std::unique_lock<std::mutex> lock(mutex_);
......
......@@ -50,7 +50,7 @@ bool VariableResponse::ReadRaw(::google::protobuf::io::CodedInputStream* input,
size_to_write = length - total_written;
}
// This log is useful to see how long a internal block size is of rpc.
VLOG(7) << "copy " << size_to_write << " data to CUDAPlace";
VLOG(70) << "copy " << size_to_write << " data to CUDAPlace";
memory::Copy(boost::get<platform::CUDAPlace>(place),
reinterpret_cast<void*>(p), cpu, data, size_to_write,
gpu_dev_ctx.stream());
......@@ -79,7 +79,7 @@ bool VariableResponse::ReadRaw(::google::protobuf::io::CodedInputStream* input,
// TODO(gongwb): can we avoid copy?
platform::CPUPlace cpu;
// This log is useful to see how long a internal block size is of rpc.
VLOG(7) << "copy " << size_to_write << " data to CPUPlace";
VLOG(70) << "copy " << size_to_write << " data to CPUPlace";
memory::Copy(cpu, reinterpret_cast<void*>(p), cpu, data, size_to_write);
p += size_to_write;
......@@ -198,8 +198,8 @@ bool VariableResponse::ProcSerializedField(
#endif
}
VLOG(7) << "ProcSerializedField:" << meta_.varname()
<< ", type:" << meta_.type() << std::endl;
VLOG(70) << "ProcSerializedField:" << meta_.varname()
<< ", type:" << meta_.type() << std::endl;
framework::DDim dims = GetDims(meta_.dims());
if (meta_.type() == sendrecv::LOD_TENSOR) {
PADDLE_ENFORCE(meta_.lod_size() >= 0, "lod info should be got first!");
......
......@@ -47,8 +47,8 @@ class FeedOp : public framework::OperatorBase {
auto col = Attr<int>("col");
VLOG(3) << "Feed Var " << feed_var_name << "'s " << col << " column to var "
<< out_name;
VLOG(30) << "Feed Var " << feed_var_name << "'s " << col
<< " column to var " << out_name;
auto &feed_list = feed_var->Get<framework::FeedFetchList>();
auto &feed_item = feed_list.at(static_cast<size_t>(col));
......
......@@ -43,7 +43,7 @@ class FetchBarrierOp : public framework::OperatorBase {
PADDLE_ENFORCE(rpc_client->Wait(), "internal error in RPCClient");
for (auto& ep : eps) {
VLOG(3) << "fetch barrier, ep: " << ep;
VLOG(30) << "fetch barrier, ep: " << ep;
rpc_client->AsyncSendFetchBarrier(ep);
}
PADDLE_ENFORCE(rpc_client->Wait(), "internal error in RPCClient");
......
......@@ -57,7 +57,7 @@ class FetchOp : public framework::OperatorBase {
TensorCopySync(src_item, platform::CPUPlace(), &dst_item);
dst_item.set_lod(src_item.lod());
VLOG(3) << "Fetch variable " << fetch_var_name << " to " << out_name;
VLOG(30) << "Fetch variable " << fetch_var_name << " to " << out_name;
}
};
......
......@@ -64,7 +64,7 @@ class GenNCCLIdOp : public framework::OperatorBase {
distributed::RPCClient::GetInstance<RPCCLIENT_T>(0);
for (auto& ep : endpoint_list) {
VLOG(3) << "sending nccl id to " << ep;
VLOG(30) << "sending nccl id to " << ep;
client->AsyncSendVar(ep, dev_ctx, *scope, NCCL_ID_VARNAME);
}
client->Wait();
......@@ -72,7 +72,7 @@ class GenNCCLIdOp : public framework::OperatorBase {
client->AsyncSendBatchBarrier(ep);
}
client->Wait();
VLOG(3) << "sending completed...";
VLOG(30) << "sending completed...";
}
void GetIdByServer(framework::Scope* scope,
......@@ -99,11 +99,11 @@ class GenNCCLIdOp : public framework::OperatorBase {
std::bind(&distributed::RPCServer::StartServer, rpc_service.get()));
rpc_service->SetCond(distributed::kRequestSend);
VLOG(3) << "start getting nccl id from trainer 0...";
VLOG(30) << "start getting nccl id from trainer 0...";
rpc_service->WaitBarrier(distributed::kRequestSend);
VLOG(3) << "got nccl id and stop server...";
VLOG(30) << "got nccl id and stop server...";
rpc_service->ShutDown();
VLOG(3) << "rpc server stopped";
VLOG(30) << "rpc server stopped";
server_thread.join();
}
};
......
......@@ -36,7 +36,7 @@ namespace operators {
void RunServer(std::shared_ptr<distributed::RPCServer> service) {
service->StartServer();
VLOG(4) << "RunServer thread end";
VLOG(40) << "RunServer thread end";
}
static void split(const std::string &str, char sep,
std::vector<std::string> *pieces) {
......@@ -66,8 +66,8 @@ static void ParallelExecuteBlocks(
fs.push_back(framework::Async([&executor, &prepared, &scope, idx]() {
int run_block = idx; // thread local
try {
VLOG(3) << "running server block: " << run_block
<< "pointer: " << prepared[run_block].get();
VLOG(30) << "running server block: " << run_block
<< "pointer: " << prepared[run_block].get();
executor->RunPreparedContext(prepared[run_block].get(), scope);
} catch (const std::exception &e) {
LOG(FATAL) << "run sub program:" << idx << " error " << e.what();
......@@ -108,7 +108,7 @@ void ListenAndServOp::RunSyncLoop(
framework::Scope *recv_scope, platform::DeviceContext *dev_ctx,
const std::vector<int> &prefetch_block_id_list,
const int checkpoint_point_block_id) const {
VLOG(2) << "RunSyncLoop";
VLOG(20) << "RunSyncLoop";
size_t num_blocks = program->Size();
auto optimize_blocks =
Attr<std::vector<framework::BlockDesc *>>(kOptimizeBlocks);
......@@ -167,7 +167,7 @@ void ListenAndServOp::RunSyncLoop(
}
ParallelExecuteBlocks(parallel_blkids, executor, optimize_prepared, program,
recv_scope);
VLOG(2) << "run all blocks spent " << GetTimestamp() - ts << "(ms)";
VLOG(20) << "run all blocks spent " << GetTimestamp() - ts << "(ms)";
ResetReceivedVars(recv_scope, dev_ctx, rpc_service_->NeedResetAllVars());
......@@ -183,11 +183,11 @@ void ListenAndServOp::ResetReceivedVars(framework::Scope *recv_scope,
for (auto &varname : sparse_vars_) {
auto var = recv_scope->FindVar(varname);
if (var == nullptr) {
VLOG(2) << "can not find var " << varname << " in received scope";
VLOG(20) << "can not find var " << varname << " in received scope";
continue;
}
if (var->IsType<framework::SelectedRows>()) {
VLOG(3) << "reset sparse var: " << varname;
VLOG(30) << "reset sparse var: " << varname;
var->GetMutable<framework::SelectedRows>()->mutable_rows()->clear();
} else {
PADDLE_THROW("The type of sparse var should be SelectedRows");
......@@ -197,7 +197,7 @@ void ListenAndServOp::ResetReceivedVars(framework::Scope *recv_scope,
for (auto &varname : dense_vars_) {
auto var = recv_scope->FindVar(varname);
if (var == nullptr) {
VLOG(2) << "can not find var " << varname << " in received scope";
VLOG(20) << "can not find var " << varname << " in received scope";
continue;
}
if (var->IsType<framework::LoDTensor>()) {
......@@ -216,7 +216,7 @@ void ListenAndServOp::ResetReceivedVars(framework::Scope *recv_scope,
void ListenAndServOp::RunAsyncLoop(framework::Executor *executor,
framework::ProgramDesc *program,
framework::Scope *recv_scope) const {
VLOG(2) << "RunAsyncLoop";
VLOG(20) << "RunAsyncLoop";
auto grad_to_block_id_str =
Attr<std::vector<std::string>>("grad_to_block_id");
DoubleFindMap<std::string, int32_t> grad_to_block_id;
......@@ -225,7 +225,7 @@ void ListenAndServOp::RunAsyncLoop(framework::Executor *executor,
const std::string &grad_and_id) {
std::vector<std::string> pieces;
split(grad_and_id, ':', &pieces);
VLOG(3) << "after split, key = " << pieces[0] << ", id=" << pieces[1];
VLOG(30) << "after split, key = " << pieces[0] << ", id=" << pieces[1];
PADDLE_ENFORCE_EQ(pieces.size(), 2);
PADDLE_ENFORCE_EQ(out_map->count(pieces[0]), 0);
......@@ -270,7 +270,7 @@ void ListenAndServOp::RunAsyncLoop(framework::Executor *executor,
while (true) {
if (rpc_service_->IsExit()) {
VLOG(4) << "get exit!rpc_processor break!";
VLOG(40) << "get exit!rpc_processor break!";
break;
}
......@@ -332,9 +332,9 @@ void ListenAndServOp::RunImpl(const framework::Scope &scope,
std::string endpoint = Attr<std::string>("endpoint");
int checkpoint_block_id = Attr<int>(kCheckpointBlockId);
VLOG(4) << "sync_mode:" << sync_mode << ", fan_in:" << fan_in
<< ", end_point:" << endpoint
<< ", checkpoint_block_id: " << checkpoint_block_id;
VLOG(40) << "sync_mode:" << sync_mode << ", fan_in:" << fan_in
<< ", end_point:" << endpoint
<< ", checkpoint_block_id: " << checkpoint_block_id;
rpc_service_.reset(new RPCSERVER_T(endpoint, fan_in));
......@@ -383,8 +383,8 @@ void ListenAndServOp::RunImpl(const framework::Scope &scope,
prefetch_var_name_to_block_id_str) {
std::vector<std::string> pieces;
split(prefetch_var_name_and_id, ':', &pieces);
VLOG(3) << "after split, prefetch_var = " << pieces[0]
<< ", id=" << pieces[1];
VLOG(30) << "after split, prefetch_var = " << pieces[0]
<< ", id=" << pieces[1];
PADDLE_ENFORCE_EQ(pieces.size(), 2);
int block_id = std::stoi(pieces[1]);
......@@ -415,7 +415,7 @@ void ListenAndServOp::RunImpl(const framework::Scope &scope,
// start the server listening after all member initialized.
server_thread_.reset(new std::thread(RunServer, rpc_service_));
VLOG(3) << "wait server thread to become ready...";
VLOG(30) << "wait server thread to become ready...";
rpc_service_->WaitServerReady();
// register SIGINT(from ctrl+C) and SIGTERM(from kill) signal handlers
......
......@@ -30,9 +30,9 @@ class LoDRankTableOp : public framework::OperatorBase {
auto x = scope.FindVar(Input("X"))->Get<framework::LoDTensor>();
auto *out =
scope.FindVar(Output("Out"))->GetMutable<framework::LoDRankTable>();
VLOG(10) << "Level = " << static_cast<size_t>(Attr<int>("level"));
VLOG(100) << "Level = " << static_cast<size_t>(Attr<int>("level"));
out->Reset(x.lod(), static_cast<size_t>(Attr<int>("level")));
VLOG(10) << Input("X") << "'s lod information is " << *out;
VLOG(100) << Input("X") << "'s lod information is " << *out;
}
};
......
......@@ -134,13 +134,13 @@ class LookupTableOpGradVarTypeInference : public framework::VarTypeInference {
auto attr = op_desc.GetAttr("is_sparse");
bool is_sparse = boost::get<bool>(attr);
if (is_sparse) {
VLOG(3) << "lookup_table_grad op " << framework::GradVarName("W")
<< " is set to SelectedRows";
VLOG(30) << "lookup_table_grad op " << framework::GradVarName("W")
<< " is set to SelectedRows";
block->Var(out_var_name)
->SetType(framework::proto::VarType::SELECTED_ROWS);
} else {
VLOG(3) << "lookup_table_grad op " << framework::GradVarName("W")
<< " is set to LoDTensor";
VLOG(30) << "lookup_table_grad op " << framework::GradVarName("W")
<< " is set to LoDTensor";
block->Var(out_var_name)->SetType(framework::proto::VarType::LOD_TENSOR);
}
block->Var(out_var_name)->SetDataType(block->Var("W")->GetDataType());
......
......@@ -96,8 +96,8 @@ void TestAndBench(const int n, std::function<void(const int, const T*, T*)> tgt,
}
auto et = GetCurrentUS();
VLOG(3) << "Vec size " << n << ": refer takes: " << (et - mt) / repeat
<< " us, tgt takes: " << (mt - st) / repeat;
VLOG(30) << "Vec size " << n << ": refer takes: " << (et - mt) / repeat
<< " us, tgt takes: " << (mt - st) / repeat;
for (int i = 0; i < n; ++i) {
EXPECT_NEAR(ytgt_data[i], yref_data[i], 1e-3);
}
......
......@@ -87,7 +87,7 @@ TEST(JitKernel, vrelu) {
vrelu_intri8(d, x_data, zref_data);
}
auto si1 = GetCurrentUS();
VLOG(3) << "Vec size 8 intr takes: " << (si1 - si0) / repeat;
VLOG(30) << "Vec size 8 intr takes: " << (si1 - si0) / repeat;
}
#endif
auto ttgts = GetCurrentUS();
......@@ -95,8 +95,9 @@ TEST(JitKernel, vrelu) {
ker->Compute(x_data, ztgt_data);
}
auto ttgte = GetCurrentUS();
VLOG(3) << "Vec size " << d << ": refer takes: " << (trefe - trefs) / repeat
<< " us, tgt takes: " << (ttgte - ttgts) / repeat;
VLOG(30) << "Vec size " << d
<< ": refer takes: " << (trefe - trefs) / repeat
<< " us, tgt takes: " << (ttgte - ttgts) / repeat;
for (int i = 0; i < d; ++i) {
EXPECT_NEAR(ztgt_data[i], zref_data[i], 1e-3);
}
......@@ -132,8 +133,9 @@ TEST(JitKernel, vaddbias) {
}
auto ttgte = GetCurrentUS();
VLOG(3) << "Vec size " << d << ": refer takes: " << (trefe - trefs) / repeat
<< " us, tgt takes: " << (ttgte - ttgts) / repeat;
VLOG(30) << "Vec size " << d
<< ": refer takes: " << (trefe - trefs) / repeat
<< " us, tgt takes: " << (ttgte - ttgts) / repeat;
for (int i = 0; i < d; ++i) {
EXPECT_NEAR(ztgt_data[i], zref_data[i], 1e-3);
}
......@@ -183,13 +185,14 @@ TEST(JitKernel, vexp) {
}
auto ttgte = GetCurrentUS();
VLOG(3) << "Vec size " << d << ": refer takes: " << (trefe - trefs) / repeat
VLOG(30) << "Vec size " << d
<< ": refer takes: " << (trefe - trefs) / repeat
#ifdef PADDLE_WITH_MKLML
<< " us, mkl takes: " << (tmkle - tmkls) / repeat << " us, "
<< " us, mkl takes: " << (tmkle - tmkls) / repeat << " us, "
#else
<< " us, "
<< " us, "
#endif
<< "tgt takes: " << (ttgte - ttgts) / repeat;
<< "tgt takes: " << (ttgte - ttgts) / repeat;
for (int i = 0; i < d; ++i) {
EXPECT_NEAR(ztgt_data[i], zref_data[i], 1e-3);
}
......@@ -254,9 +257,10 @@ TEST(JitKernel, vsigmoid) {
}
auto ttgte = GetCurrentUS();
VLOG(3) << "Vec size " << d << ": refer takes: " << (trefe - trefs) / repeat
<< " us, better(jit exp) takes: " << (tmkle - tmkls) / repeat
<< " us, tgt takes: " << (ttgte - ttgts) / repeat;
VLOG(30) << "Vec size " << d
<< ": refer takes: " << (trefe - trefs) / repeat
<< " us, better(jit exp) takes: " << (tmkle - tmkls) / repeat
<< " us, tgt takes: " << (ttgte - ttgts) / repeat;
for (int i = 0; i < d; ++i) {
EXPECT_NEAR(ztgt_data[i], zref_data[i], 1e-3);
}
......@@ -320,9 +324,10 @@ TEST(JitKernel, vtanh) {
}
auto ttgte = GetCurrentUS();
VLOG(3) << "Vec size " << d << ": refer takes: " << (trefe - trefs) / repeat
<< " us, better(jit exp) takes: " << (tmkle - tmkls) / repeat
<< " us, tgt takes: " << (ttgte - ttgts) / repeat;
VLOG(30) << "Vec size " << d
<< ": refer takes: " << (trefe - trefs) / repeat
<< " us, better(jit exp) takes: " << (tmkle - tmkls) / repeat
<< " us, tgt takes: " << (ttgte - ttgts) / repeat;
for (int i = 0; i < d; ++i) {
EXPECT_NEAR(ztgt_data[i], zref_data[i], 1e-3);
}
......@@ -440,9 +445,10 @@ TEST(JitKernel, lstm) {
ker->ComputeCtHt(x_data, ct_1_data, ct_tgt_data, ht_tgt_data);
}
auto ttgte = GetCurrentUS();
VLOG(3) << "Vec size " << d << ": refer takes: " << (trefe - trefs) / repeat
<< " us, better(jit) takes: " << (tmkle - tmkls) / repeat
<< " us, tgt takes: " << (ttgte - ttgts) / repeat;
VLOG(30) << "Vec size " << d
<< ": refer takes: " << (trefe - trefs) / repeat
<< " us, better(jit) takes: " << (tmkle - tmkls) / repeat
<< " us, tgt takes: " << (ttgte - ttgts) / repeat;
}
}
......@@ -524,8 +530,8 @@ TEST(JitKernel, vscal) {
vscal_inp_intri8(d, a, y_data);
}
auto si3 = GetCurrentUS();
VLOG(3) << "Vec size 8 intr takes: " << (si1 - si0) / repeat
<< " us, inplace: " << (si3 - si2) / repeat;
VLOG(30) << "Vec size 8 intr takes: " << (si1 - si0) / repeat
<< " us, inplace: " << (si3 - si2) / repeat;
}
#endif
......@@ -539,15 +545,17 @@ TEST(JitKernel, vscal) {
ker->Compute(a, y_data);
}
auto ttgte1 = GetCurrentUS();
VLOG(3) << "Vec size " << d << ": refer takes: " << (trefe - trefs) / repeat
<< " us, inplace takes: " << (trefe1 - trefs1) / repeat
VLOG(30) << "Vec size " << d
<< ": refer takes: " << (trefe - trefs) / repeat
<< " us, inplace takes: " << (trefe1 - trefs1) / repeat
#ifdef PADDLE_WITH_MKLML
<< " us, mkl inplace takes: " << (tmkle - tmkls) / repeat << " us, "
<< " us, mkl inplace takes: " << (tmkle - tmkls) / repeat
<< " us, "
#else
<< " us, "
<< " us, "
#endif
<< "tgt takes: " << (ttgte - ttgts) / repeat
<< "us, tgt inplace takes: " << (ttgte1 - ttgts1) / repeat;
<< "tgt takes: " << (ttgte - ttgts) / repeat
<< "us, tgt inplace takes: " << (ttgte1 - ttgts1) / repeat;
for (int i = 0; i < d; ++i) {
EXPECT_NEAR(ztgt_data[i], zref_data[i], 1e-3);
}
......@@ -610,7 +618,7 @@ TEST(JitKernel, vmul) {
vmul_intri8(d, x_data, y_data, zref_data);
}
auto si1 = GetCurrentUS();
VLOG(3) << "Vec size 8 intr takes: " << (si1 - si0) / repeat;
VLOG(30) << "Vec size 8 intr takes: " << (si1 - si0) / repeat;
}
#endif
......@@ -620,13 +628,14 @@ TEST(JitKernel, vmul) {
}
auto ttgte = GetCurrentUS();
VLOG(3) << "Vec size " << d << ": refer takes: " << (trefe - trefs) / repeat
VLOG(30) << "Vec size " << d
<< ": refer takes: " << (trefe - trefs) / repeat
#ifdef PADDLE_WITH_MKLML
<< " us, mkl takes: " << (tmkle - tmkls) / repeat << " us, "
<< " us, mkl takes: " << (tmkle - tmkls) / repeat << " us, "
#else
<< " us, "
<< " us, "
#endif
<< "tgt takes: " << (ttgte - ttgts) / repeat;
<< "tgt takes: " << (ttgte - ttgts) / repeat;
for (int i = 0; i < d; ++i) {
EXPECT_NEAR(ztgt_data[i], zref_data[i], 1e-3);
}
......@@ -689,7 +698,7 @@ TEST(JitKernel, vadd) {
vadd_intri8(d, x_data, y_data, zref_data);
}
auto si1 = GetCurrentUS();
VLOG(3) << "Vec size 8 intr takes: " << (si1 - si0) / repeat;
VLOG(30) << "Vec size 8 intr takes: " << (si1 - si0) / repeat;
}
#endif
......@@ -699,13 +708,14 @@ TEST(JitKernel, vadd) {
}
auto ttgte = GetCurrentUS();
VLOG(3) << "Vec size " << d << ": refer takes: " << (trefe - trefs) / repeat
VLOG(30) << "Vec size " << d
<< ": refer takes: " << (trefe - trefs) / repeat
#ifdef PADDLE_WITH_MKLML
<< " us, mkl takes: " << (tmkle - tmkls) / repeat << " us, "
<< " us, mkl takes: " << (tmkle - tmkls) / repeat << " us, "
#else
<< " us, "
<< " us, "
#endif
<< "tgt takes: " << (ttgte - ttgts) / repeat;
<< "tgt takes: " << (ttgte - ttgts) / repeat;
for (int i = 0; i < d; ++i) {
EXPECT_NEAR(ztgt_data[i], zref_data[i], 1e-3);
}
......@@ -760,9 +770,10 @@ TEST(JitKernel, vaddrelu) {
ker->Compute(x_data, y_data, ztgt_data, d);
}
auto ttgte = GetCurrentUS();
VLOG(3) << "Vec size " << d << ": refer takes: " << (trefe - trefs) / repeat
<< " us, better takes: " << (tmkle - tmkls) / repeat << " us, "
<< "tgt takes: " << (ttgte - ttgts) / repeat;
VLOG(30) << "Vec size " << d
<< ": refer takes: " << (trefe - trefs) / repeat
<< " us, better takes: " << (tmkle - tmkls) / repeat << " us, "
<< "tgt takes: " << (ttgte - ttgts) / repeat;
for (int i = 0; i < d; ++i) {
EXPECT_NEAR(ztgt_data[i], zref_data[i], 1e-3);
}
......
......@@ -270,7 +270,7 @@ struct MergeAdd<platform::CPUDeviceContext, T> {
const std::vector<const framework::SelectedRows*>& inputs,
framework::SelectedRows* output) {
if (inputs.size() == 0) {
VLOG(3) << "no input! return";
VLOG(30) << "no input! return";
return;
}
const framework::SelectedRows* has_value_input = nullptr;
......@@ -281,7 +281,7 @@ struct MergeAdd<platform::CPUDeviceContext, T> {
}
}
if (has_value_input == nullptr) {
VLOG(3) << "no input has value! just return" << std::endl;
VLOG(30) << "no input has value! just return" << std::endl;
return;
}
auto input_width = has_value_input->value().dims()[1];
......
......@@ -314,7 +314,7 @@ struct MergeAdd<platform::CUDADeviceContext, T> {
const std::vector<const framework::SelectedRows*>& inputs,
framework::SelectedRows* output) {
if (inputs.size() == 0) {
VLOG(3) << "no input! return";
VLOG(30) << "no input! return";
return;
}
const framework::SelectedRows* has_value_input = nullptr;
......@@ -325,7 +325,7 @@ struct MergeAdd<platform::CUDADeviceContext, T> {
}
}
if (has_value_input == nullptr) {
VLOG(3) << "no input has value! just return" << std::endl;
VLOG(30) << "no input has value! just return" << std::endl;
return;
}
auto input_width = has_value_input->value().dims()[1];
......
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册