未验证 提交 200c4102 编写于 作者: Q Qiyang Min 提交者: GitHub

Merge pull request #14324 from velconia/fix_vlog

Change the origin VLOG level to 10 times
...@@ -18,8 +18,8 @@ namespace framework { ...@@ -18,8 +18,8 @@ namespace framework {
void TransDataDevice(const Tensor &in, const platform::Place &dst_place, void TransDataDevice(const Tensor &in, const platform::Place &dst_place,
Tensor *out) { Tensor *out) {
VLOG(3) << "DeviceTransform in, src_place " << in.place() VLOG(30) << "DeviceTransform in, src_place " << in.place()
<< " dst_place: " << dst_place; << " dst_place: " << dst_place;
PADDLE_ENFORCE_NE( PADDLE_ENFORCE_NE(
in.place().which(), dst_place.which(), in.place().which(), dst_place.which(),
......
...@@ -49,10 +49,10 @@ class TestOpWithKernel : public OperatorWithKernel { ...@@ -49,10 +49,10 @@ class TestOpWithKernel : public OperatorWithKernel {
OpKernelType GetExpectedKernelType( OpKernelType GetExpectedKernelType(
const ExecutionContext& ctx) const override { const ExecutionContext& ctx) const override {
if (Attr<bool>("use_gpu")) { if (Attr<bool>("use_gpu")) {
VLOG(3) << "force use gpu kernel"; VLOG(30) << "force use gpu kernel";
return OpKernelType(proto::VarType::FP32, platform::CUDAPlace(0)); return OpKernelType(proto::VarType::FP32, platform::CUDAPlace(0));
} else { } else {
VLOG(3) << "use default kernel"; VLOG(30) << "use default kernel";
return OpKernelType(proto::VarType::FP32, return OpKernelType(proto::VarType::FP32,
ctx.Input<Tensor>("input")->place()); ctx.Input<Tensor>("input")->place());
} }
...@@ -148,7 +148,7 @@ TEST(Operator, CPUtoGPU) { ...@@ -148,7 +148,7 @@ TEST(Operator, CPUtoGPU) {
// get output // get output
auto* output2 = scope.Var("OUT2"); auto* output2 = scope.Var("OUT2");
gpu_op->Run(scope, cuda_place); gpu_op->Run(scope, cuda_place);
VLOG(3) << "after gpu_op run"; VLOG(30) << "after gpu_op run";
// auto* output2_ptr = output2->Get<LoDTensor>().data<float>(); // auto* output2_ptr = output2->Get<LoDTensor>().data<float>();
paddle::platform::DeviceContextPool& pool = paddle::platform::DeviceContextPool& pool =
......
...@@ -60,7 +60,7 @@ void BroadcastOpHandle::BroadcastOneVar( ...@@ -60,7 +60,7 @@ void BroadcastOpHandle::BroadcastOneVar(
PADDLE_ENFORCE_NOT_NULL(in_var); PADDLE_ENFORCE_NOT_NULL(in_var);
Tensor &in_tensor = VariableVisitor::GetMutableTensor(in_var); Tensor &in_tensor = VariableVisitor::GetMutableTensor(in_var);
if (UNLIKELY(!in_tensor.IsInitialized())) { if (UNLIKELY(!in_tensor.IsInitialized())) {
VLOG(3) << "in var " << in_var_handle.name_ << "not inited, return!"; VLOG(30) << "in var " << in_var_handle.name_ << "not inited, return!";
return; return;
} }
......
...@@ -45,8 +45,8 @@ std::unique_ptr<ir::Graph> ModifyOpLockAndRecordEventPass::ApplyImpl( ...@@ -45,8 +45,8 @@ std::unique_ptr<ir::Graph> ModifyOpLockAndRecordEventPass::ApplyImpl(
IsLockAndRecordEventFreeComputationOpHandle(compute_op, graph_view); IsLockAndRecordEventFreeComputationOpHandle(compute_op, graph_view);
compute_op->SetLockAndRecordEventFree(is_lock_and_record_event_free); compute_op->SetLockAndRecordEventFree(is_lock_and_record_event_free);
if (is_lock_and_record_event_free) { if (is_lock_and_record_event_free) {
VLOG(10) << "Set is_lock_and_record_event_free be true in op " VLOG(100) << "Set is_lock_and_record_event_free be true in op "
<< compute_op->DebugString(); << compute_op->DebugString();
} }
} }
return ir_graph; return ir_graph;
......
...@@ -399,7 +399,7 @@ std::unique_ptr<ir::Graph> MultiDevSSAGraphBuilder::ApplyImpl( ...@@ -399,7 +399,7 @@ std::unique_ptr<ir::Graph> MultiDevSSAGraphBuilder::ApplyImpl(
for (size_t i = 0; i < backward_vars.size(); i += 2) { for (size_t i = 0; i < backward_vars.size(); i += 2) {
auto &p_name = backward_vars[i]; auto &p_name = backward_vars[i];
auto &g_name = backward_vars[i + 1]; auto &g_name = backward_vars[i + 1];
VLOG(10) << "Bcast " << g_name << " for parameter " << p_name; VLOG(100) << "Bcast " << g_name << " for parameter " << p_name;
switch (strategy_.reduce_) { switch (strategy_.reduce_) {
case BuildStrategy::ReduceStrategy::kReduce: case BuildStrategy::ReduceStrategy::kReduce:
...@@ -809,8 +809,8 @@ int MultiDevSSAGraphBuilder::CreateRPCOp( ...@@ -809,8 +809,8 @@ int MultiDevSSAGraphBuilder::CreateRPCOp(
node->Op()->GetAttr(OpProtoAndCheckerMaker::OpRoleVarAttrName())); node->Op()->GetAttr(OpProtoAndCheckerMaker::OpRoleVarAttrName()));
PADDLE_ENFORCE_EQ(send_param_grad.size(), 2U); PADDLE_ENFORCE_EQ(send_param_grad.size(), 2U);
op_dev_id = GetAppropriateDeviceID({send_param_grad[1]}); op_dev_id = GetAppropriateDeviceID({send_param_grad[1]});
VLOG(10) << "send grad " << input_var_names[0] << " origin " VLOG(100) << "send grad " << input_var_names[0] << " origin "
<< send_param_grad[1] << " place: " << op_dev_id; << send_param_grad[1] << " place: " << op_dev_id;
for (auto &varname : input_var_names) { for (auto &varname : input_var_names) {
sharded_var_device->emplace(varname, op_dev_id); sharded_var_device->emplace(varname, op_dev_id);
} }
...@@ -826,9 +826,9 @@ int MultiDevSSAGraphBuilder::CreateRPCOp( ...@@ -826,9 +826,9 @@ int MultiDevSSAGraphBuilder::CreateRPCOp(
if (recv_param_grad.size() == 2U) { if (recv_param_grad.size() == 2U) {
op_dev_id = op_dev_id =
GetVarDeviceID(*result, recv_param_grad[1], *sharded_var_device); GetVarDeviceID(*result, recv_param_grad[1], *sharded_var_device);
VLOG(10) << "recv param " << recv_param_grad[0] VLOG(100) << "recv param " << recv_param_grad[0]
<< " get grad place: " << recv_param_grad[1] << " get grad place: " << recv_param_grad[1]
<< " place: " << op_dev_id; << " place: " << op_dev_id;
} else { } else {
op_dev_id = GetAppropriateDeviceID(output_var_names); op_dev_id = GetAppropriateDeviceID(output_var_names);
} }
......
...@@ -140,8 +140,8 @@ std::unique_ptr<ir::Graph> ReferenceCountPass::ApplyImpl( ...@@ -140,8 +140,8 @@ std::unique_ptr<ir::Graph> ReferenceCountPass::ApplyImpl(
if (next_compute_op != nullptr) { if (next_compute_op != nullptr) {
if (compute_ref_cnt_map.count(next_compute_op)) { if (compute_ref_cnt_map.count(next_compute_op)) {
compute_ref_cnt_map[next_compute_op]->AddVar(var_name); compute_ref_cnt_map[next_compute_op]->AddVar(var_name);
VLOG(5) << "Add reference count of " << var_name << " to Operator " VLOG(50) << "Add reference count of " << var_name << " to Operator "
<< next_compute_op->Name(); << next_compute_op->Name();
} else { } else {
// Create new reference_count_op_handle // Create new reference_count_op_handle
ir::Node *ref_cnt_node = graph->CreateEmptyNode( ir::Node *ref_cnt_node = graph->CreateEmptyNode(
......
...@@ -51,7 +51,7 @@ void ScaleLossGradOpHandle::RunImpl() { ...@@ -51,7 +51,7 @@ void ScaleLossGradOpHandle::RunImpl() {
->stream(); ->stream();
memory::Copy(boost::get<platform::CUDAPlace>(place_), tmp, memory::Copy(boost::get<platform::CUDAPlace>(place_), tmp,
platform::CPUPlace(), &coeff_, sizeof(float), stream); platform::CPUPlace(), &coeff_, sizeof(float), stream);
VLOG(10) << place_ << "RUN Scale loss grad op"; VLOG(100) << place_ << "RUN Scale loss grad op";
}); });
#endif #endif
} }
......
...@@ -94,8 +94,8 @@ std::unique_ptr<ir::Graph> SequentialExecutionPass::ApplyImpl( ...@@ -94,8 +94,8 @@ std::unique_ptr<ir::Graph> SequentialExecutionPass::ApplyImpl(
op_node_list[i - 1]->outputs.push_back(dep_var); op_node_list[i - 1]->outputs.push_back(dep_var);
dep_var->outputs.push_back(op_node_list[i]); dep_var->outputs.push_back(op_node_list[i]);
dep_var->inputs.push_back(op_node_list[i - 1]); dep_var->inputs.push_back(op_node_list[i - 1]);
VLOG(10) << "Add dependencies between " << op_node_list[i - 1]->Name() VLOG(100) << "Add dependencies between " << op_node_list[i - 1]->Name()
<< " and " << op_node_list[i]->Name(); << " and " << op_node_list[i]->Name();
} }
return graph; return graph;
} }
......
...@@ -210,16 +210,16 @@ void ThreadedSSAGraphExecutor::RunOp( ...@@ -210,16 +210,16 @@ void ThreadedSSAGraphExecutor::RunOp(
details::OpHandleBase *op) { details::OpHandleBase *op) {
auto op_run = [ready_var_q, op, this] { auto op_run = [ready_var_q, op, this] {
try { try {
if (VLOG_IS_ON(10)) { if (VLOG_IS_ON(100)) {
VLOG(10) << op << " " << op->Name() << " : " << op->DebugString(); VLOG(100) << op << " " << op->Name() << " : " << op->DebugString();
} }
if (LIKELY(!strategy_.dry_run_)) { if (LIKELY(!strategy_.dry_run_)) {
op->Run(strategy_.use_cuda_); op->Run(strategy_.use_cuda_);
} }
VLOG(10) << op << " " << op->Name() << " Done "; VLOG(100) << op << " " << op->Name() << " Done ";
running_ops_--; running_ops_--;
ready_var_q->Extend(op->Outputs()); ready_var_q->Extend(op->Outputs());
VLOG(10) << op << " " << op->Name() << "Signal posted"; VLOG(100) << op << " " << op->Name() << "Signal posted";
} catch (...) { } catch (...) {
exception_holder_.Catch(std::current_exception()); exception_holder_.Catch(std::current_exception());
} }
......
...@@ -43,7 +43,7 @@ ExecutorPrepareContext::ExecutorPrepareContext( ...@@ -43,7 +43,7 @@ ExecutorPrepareContext::ExecutorPrepareContext(
} }
ExecutorPrepareContext::~ExecutorPrepareContext() { ExecutorPrepareContext::~ExecutorPrepareContext() {
VLOG(5) << "destroy ExecutorPrepareContext"; VLOG(50) << "destroy ExecutorPrepareContext";
} }
template <typename RefCntMap> template <typename RefCntMap>
...@@ -60,7 +60,7 @@ static void DeleteUnusedTensors(const Scope& scope, const OperatorBase* op, ...@@ -60,7 +60,7 @@ static void DeleteUnusedTensors(const Scope& scope, const OperatorBase* op,
if ((it->second)-- == 1) { if ((it->second)-- == 1) {
auto* var = scope.FindVar(name); auto* var = scope.FindVar(name);
if (var != nullptr) { if (var != nullptr) {
VLOG(10) << "Erase tensor \'" << name << "\'"; VLOG(100) << "Erase tensor \'" << name << "\'";
if (var->IsType<LoDTensor>()) { if (var->IsType<LoDTensor>()) {
erase_tensors.insert(var->GetMutable<LoDTensor>()); erase_tensors.insert(var->GetMutable<LoDTensor>());
} else if (var->IsType<SelectedRows>()) { } else if (var->IsType<SelectedRows>()) {
...@@ -141,21 +141,21 @@ void Executor::CreateVariables(const ProgramDesc& pdesc, Scope* scope, ...@@ -141,21 +141,21 @@ void Executor::CreateVariables(const ProgramDesc& pdesc, Scope* scope,
if (var->Persistable()) { if (var->Persistable()) {
auto* ptr = const_cast<Scope*>(ancestor_scope)->Var(var->Name()); auto* ptr = const_cast<Scope*>(ancestor_scope)->Var(var->Name());
InitializeVariable(ptr, var->GetType()); InitializeVariable(ptr, var->GetType());
VLOG(3) << "Create Variable " << var->Name() VLOG(30) << "Create Variable " << var->Name()
<< " global, which pointer is " << ptr; << " global, which pointer is " << ptr;
} else { } else {
auto* ptr = scope->Var(var->Name()); auto* ptr = scope->Var(var->Name());
InitializeVariable(ptr, var->GetType()); InitializeVariable(ptr, var->GetType());
VLOG(3) << "Create Variable " << var->Name() VLOG(30) << "Create Variable " << var->Name()
<< " locally, which pointer is " << ptr; << " locally, which pointer is " << ptr;
} }
} }
} else { } else {
for (auto& var : global_block.AllVars()) { for (auto& var : global_block.AllVars()) {
auto* ptr = scope->Var(var->Name()); auto* ptr = scope->Var(var->Name());
InitializeVariable(ptr, var->GetType()); InitializeVariable(ptr, var->GetType());
VLOG(3) << "Create variable " << var->Name() << ", which pointer is " VLOG(30) << "Create variable " << var->Name() << ", which pointer is "
<< ptr; << ptr;
} }
} }
} }
...@@ -286,7 +286,7 @@ void Executor::Run(const ProgramDesc& program, Scope* scope, ...@@ -286,7 +286,7 @@ void Executor::Run(const ProgramDesc& program, Scope* scope,
int i = 0; int i = 0;
for (auto& feed_target : (*feed_targets)) { for (auto& feed_target : (*feed_targets)) {
std::string var_name = feed_target.first; std::string var_name = feed_target.first;
VLOG(3) << "feed target's name: " << var_name; VLOG(30) << "feed target's name: " << var_name;
// prepend feed op // prepend feed op
auto* op = global_block->PrependOp(); auto* op = global_block->PrependOp();
...@@ -309,7 +309,7 @@ void Executor::Run(const ProgramDesc& program, Scope* scope, ...@@ -309,7 +309,7 @@ void Executor::Run(const ProgramDesc& program, Scope* scope,
int i = 0; int i = 0;
for (auto& fetch_target : (*fetch_targets)) { for (auto& fetch_target : (*fetch_targets)) {
std::string var_name = fetch_target.first; std::string var_name = fetch_target.first;
VLOG(3) << "fetch target's name: " << var_name; VLOG(30) << "fetch target's name: " << var_name;
// append fetch op // append fetch op
auto* op = global_block->AppendOp(); auto* op = global_block->AppendOp();
...@@ -398,8 +398,8 @@ void Executor::RunPreparedContext(ExecutorPrepareContext* ctx, Scope* scope, ...@@ -398,8 +398,8 @@ void Executor::RunPreparedContext(ExecutorPrepareContext* ctx, Scope* scope,
} }
if (FLAGS_benchmark) { if (FLAGS_benchmark) {
VLOG(2) << "Memory used after operator " + op->Type() + " running: " VLOG(20) << "Memory used after operator " + op->Type() + " running: "
<< memory::memory_usage(place_); << memory::memory_usage(place_);
} }
} }
...@@ -424,10 +424,10 @@ void Executor::RunPreparedContext(ExecutorPrepareContext* ctx, Scope* scope, ...@@ -424,10 +424,10 @@ void Executor::RunPreparedContext(ExecutorPrepareContext* ctx, Scope* scope,
} }
if (FLAGS_benchmark) { if (FLAGS_benchmark) {
VLOG(2) << "-------------------------------------------------------"; VLOG(20) << "-------------------------------------------------------";
VLOG(2) << "Memory used after deleting local scope: " VLOG(20) << "Memory used after deleting local scope: "
<< memory::memory_usage(place_); << memory::memory_usage(place_);
VLOG(2) << "-------------------------------------------------------"; VLOG(20) << "-------------------------------------------------------";
} }
} }
...@@ -471,7 +471,7 @@ void Executor::RunPreparedContext( ...@@ -471,7 +471,7 @@ void Executor::RunPreparedContext(
void Executor::EnableMKLDNN(const ProgramDesc& program) { void Executor::EnableMKLDNN(const ProgramDesc& program) {
#ifdef PADDLE_WITH_MKLDNN #ifdef PADDLE_WITH_MKLDNN
VLOG(3) << "use_mkldnn=True"; VLOG(30) << "use_mkldnn=True";
for (size_t bid = 0; bid < program.Size(); ++bid) { for (size_t bid = 0; bid < program.Size(); ++bid) {
auto* block = const_cast<ProgramDesc&>(program).MutableBlock(bid); auto* block = const_cast<ProgramDesc&>(program).MutableBlock(bid);
for (auto* op : block->AllOps()) { for (auto* op : block->AllOps()) {
......
...@@ -25,7 +25,7 @@ void SetFeedVariable(Scope* scope, const LoDTensor& input, ...@@ -25,7 +25,7 @@ void SetFeedVariable(Scope* scope, const LoDTensor& input,
const std::string& var_name, size_t index) { const std::string& var_name, size_t index) {
// If var_name Variable is not found in GlobalScope, a new variable will // If var_name Variable is not found in GlobalScope, a new variable will
// be created. // be created.
VLOG(3) << "SetFeedVariable name=" << var_name << " index=" << index; VLOG(30) << "SetFeedVariable name=" << var_name << " index=" << index;
Variable* g_feed_value = scope->Var(var_name); Variable* g_feed_value = scope->Var(var_name);
auto& feed_inputs = *(g_feed_value->GetMutable<FeedFetchList>()); auto& feed_inputs = *(g_feed_value->GetMutable<FeedFetchList>());
if (index >= feed_inputs.size()) { if (index >= feed_inputs.size()) {
...@@ -47,8 +47,8 @@ LoDTensor& GetFetchVariable(const Scope& scope, const std::string& var_name, ...@@ -47,8 +47,8 @@ LoDTensor& GetFetchVariable(const Scope& scope, const std::string& var_name,
typeid(FeedFetchList).name()); typeid(FeedFetchList).name());
auto& fetch_outputs = *g_fetch_value->GetMutable<FeedFetchList>(); auto& fetch_outputs = *g_fetch_value->GetMutable<FeedFetchList>();
auto& tensor = fetch_outputs[index]; auto& tensor = fetch_outputs[index];
VLOG(3) << "Fetch " << var_name << " with index " << index VLOG(30) << "Fetch " << var_name << " with index " << index
<< " shape= " << tensor.dims(); << " shape= " << tensor.dims();
PADDLE_ENFORCE_LT(index, fetch_outputs.size()); PADDLE_ENFORCE_LT(index, fetch_outputs.size());
return tensor; return tensor;
} }
......
...@@ -147,19 +147,19 @@ void PrepareParameters(Graph* graph, const Param& param) { ...@@ -147,19 +147,19 @@ void PrepareParameters(Graph* graph, const Param& param) {
scope->Var(param.LSTMX)->GetMutable<LoDTensor>(); scope->Var(param.LSTMX)->GetMutable<LoDTensor>();
scope->Var(param.LSTMOUT)->GetMutable<LoDTensor>(); scope->Var(param.LSTMOUT)->GetMutable<LoDTensor>();
#define GATE_W(name__) \ #define GATE_W(name__) \
auto* W_##name__##_w0 = scope->FindVar(#name__ ".w_0"); \ auto* W_##name__##_w0 = scope->FindVar(#name__ ".w_0"); \
auto* W_##name__##_w1 = scope->FindVar(#name__ ".w_1"); \ auto* W_##name__##_w1 = scope->FindVar(#name__ ".w_1"); \
auto* W_##name__##_b0 = scope->FindVar(#name__ ".b_0"); \ auto* W_##name__##_b0 = scope->FindVar(#name__ ".b_0"); \
CHECK_P3(W_##name__##_w0, W_##name__##_w1, W_##name__##_b0); \ CHECK_P3(W_##name__##_w0, W_##name__##_w1, W_##name__##_b0); \
VLOG(4) << #name__ "_w0" \ VLOG(40) << #name__ "_w0" \
<< " shape: " << W_##name__##_w0->Get<LoDTensor>().dims(); \ << " shape: " << W_##name__##_w0->Get<LoDTensor>().dims(); \
VLOG(4) << #name__ "_w1" \ VLOG(40) << #name__ "_w1" \
<< " shape: " << W_##name__##_w1->Get<LoDTensor>().dims(); \ << " shape: " << W_##name__##_w1->Get<LoDTensor>().dims(); \
VLOG(4) << #name__ "_b0" \ VLOG(40) << #name__ "_b0" \
<< " shape: " << W_##name__##_b0->Get<LoDTensor>().dims(); \ << " shape: " << W_##name__##_b0->Get<LoDTensor>().dims(); \
auto& W_##name__##_w0_t = W_##name__##_w0->Get<LoDTensor>(); \ auto& W_##name__##_w0_t = W_##name__##_w0->Get<LoDTensor>(); \
auto& W_##name__##_w1_t = W_##name__##_w1->Get<LoDTensor>(); \ auto& W_##name__##_w1_t = W_##name__##_w1->Get<LoDTensor>(); \
auto& W_##name__##_b0_t = W_##name__##_b0->Get<LoDTensor>(); auto& W_##name__##_b0_t = W_##name__##_b0->Get<LoDTensor>();
GATE_W(forget); GATE_W(forget);
...@@ -208,7 +208,7 @@ void PrepareLSTMWeight(const LoDTensor& W_forget_w0, ...@@ -208,7 +208,7 @@ void PrepareLSTMWeight(const LoDTensor& W_forget_w0,
int D = W_forget_w0.dims()[0]; int D = W_forget_w0.dims()[0];
int M = W_forget_w1.dims()[0]; int M = W_forget_w1.dims()[0];
out->Resize(make_ddim({D + M, 4 * D})); out->Resize(make_ddim({D + M, 4 * D}));
VLOG(3) << "LSTMWeight resized to " << out->dims(); VLOG(30) << "LSTMWeight resized to " << out->dims();
float* out_data = out->mutable_data<float>(platform::CPUPlace()); float* out_data = out->mutable_data<float>(platform::CPUPlace());
std::array<const float*, 4> tensors( std::array<const float*, 4> tensors(
......
...@@ -57,7 +57,7 @@ std::unique_ptr<ir::Graph> ConvBiasFusePass::ApplyImpl( ...@@ -57,7 +57,7 @@ std::unique_ptr<ir::Graph> ConvBiasFusePass::ApplyImpl(
int found_conv_bias_count = 0; int found_conv_bias_count = 0;
auto handler = [&](const GraphPatternDetector::subgraph_t& subgraph, auto handler = [&](const GraphPatternDetector::subgraph_t& subgraph,
Graph* g) { Graph* g) {
VLOG(4) << "handle ConvBias fuse"; VLOG(40) << "handle ConvBias fuse";
GET_IR_NODE_FROM_SUBGRAPH(conv_weight, conv_weight, GET_IR_NODE_FROM_SUBGRAPH(conv_weight, conv_weight,
conv_bias_pattern); // Filter conv_bias_pattern); // Filter
GET_IR_NODE_FROM_SUBGRAPH(conv_out, conv_out, conv_bias_pattern); // tmp GET_IR_NODE_FROM_SUBGRAPH(conv_out, conv_out, conv_bias_pattern); // tmp
...@@ -74,7 +74,7 @@ std::unique_ptr<ir::Graph> ConvBiasFusePass::ApplyImpl( ...@@ -74,7 +74,7 @@ std::unique_ptr<ir::Graph> ConvBiasFusePass::ApplyImpl(
// check if fuse can be done and if MKL-DNN should be used // check if fuse can be done and if MKL-DNN should be used
FuseOptions fuse_option = FindFuseOption(*conv, *eltwise); FuseOptions fuse_option = FindFuseOption(*conv, *eltwise);
if (fuse_option == DO_NOT_FUSE || fuse_option == FUSE_NATIVE) { if (fuse_option == DO_NOT_FUSE || fuse_option == FUSE_NATIVE) {
VLOG(3) << "do not perform conv+bias fuse"; VLOG(30) << "do not perform conv+bias fuse";
return; return;
} }
......
...@@ -121,7 +121,7 @@ std::unique_ptr<ir::Graph> ConvBNFusePass::ApplyImpl( ...@@ -121,7 +121,7 @@ std::unique_ptr<ir::Graph> ConvBNFusePass::ApplyImpl(
int found_conv_bn_count = 0; int found_conv_bn_count = 0;
auto handler = [&](const GraphPatternDetector::subgraph_t& subgraph, auto handler = [&](const GraphPatternDetector::subgraph_t& subgraph,
Graph* g) { Graph* g) {
VLOG(4) << "handle ConvBN fuse"; VLOG(40) << "handle ConvBN fuse";
// conv, batch_norm, // conv, batch_norm,
// conv_weight, conv_out, // conv_weight, conv_out,
...@@ -133,7 +133,7 @@ std::unique_ptr<ir::Graph> ConvBNFusePass::ApplyImpl( ...@@ -133,7 +133,7 @@ std::unique_ptr<ir::Graph> ConvBNFusePass::ApplyImpl(
// check if fuse can be done and if MKL-DNN should be used // check if fuse can be done and if MKL-DNN should be used
FuseOptions fuse_option = FindFuseOption(*conv, *batch_norm); FuseOptions fuse_option = FindFuseOption(*conv, *batch_norm);
if (fuse_option == DO_NOT_FUSE) { if (fuse_option == DO_NOT_FUSE) {
VLOG(3) << "do not perform conv+bn fuse"; VLOG(30) << "do not perform conv+bn fuse";
return; return;
} }
...@@ -241,7 +241,7 @@ std::unique_ptr<ir::Graph> ConvEltwiseAddBNFusePass::ApplyImpl( ...@@ -241,7 +241,7 @@ std::unique_ptr<ir::Graph> ConvEltwiseAddBNFusePass::ApplyImpl(
int found_conv_bn_count = 0; int found_conv_bn_count = 0;
auto handler = [&](const GraphPatternDetector::subgraph_t& subgraph, auto handler = [&](const GraphPatternDetector::subgraph_t& subgraph,
Graph* g) { Graph* g) {
VLOG(4) << "handle ConvBN fuse"; VLOG(40) << "handle ConvBN fuse";
// conv, batch_norm, // conv, batch_norm,
// conv_weight, conv_out, // conv_weight, conv_out,
......
...@@ -38,7 +38,7 @@ std::unique_ptr<ir::Graph> ConvReLUFusePass::ApplyImpl( ...@@ -38,7 +38,7 @@ std::unique_ptr<ir::Graph> ConvReLUFusePass::ApplyImpl(
int found_conv_relu_count = 0; int found_conv_relu_count = 0;
auto handler = [&](const GraphPatternDetector::subgraph_t& subgraph, auto handler = [&](const GraphPatternDetector::subgraph_t& subgraph,
Graph* g) { Graph* g) {
VLOG(4) << "handle ConvReLU fuse"; VLOG(40) << "handle ConvReLU fuse";
GET_IR_NODE_FROM_SUBGRAPH(conv_weight, conv_weight, GET_IR_NODE_FROM_SUBGRAPH(conv_weight, conv_weight,
conv_relu_pattern); // Filter conv_relu_pattern); // Filter
GET_IR_NODE_FROM_SUBGRAPH(conv_out, conv_out, conv_relu_pattern); // tmp GET_IR_NODE_FROM_SUBGRAPH(conv_out, conv_out, conv_relu_pattern); // tmp
...@@ -48,7 +48,7 @@ std::unique_ptr<ir::Graph> ConvReLUFusePass::ApplyImpl( ...@@ -48,7 +48,7 @@ std::unique_ptr<ir::Graph> ConvReLUFusePass::ApplyImpl(
FuseOptions fuse_option = FindFuseOption(*conv, *relu); FuseOptions fuse_option = FindFuseOption(*conv, *relu);
if (fuse_option == DO_NOT_FUSE) { if (fuse_option == DO_NOT_FUSE) {
VLOG(3) << "do not perform conv+relu fuse"; VLOG(30) << "do not perform conv+relu fuse";
return; return;
} }
......
...@@ -39,7 +39,7 @@ std::unique_ptr<ir::Graph> DepthwiseConvMKLDNNPass::ApplyImpl( ...@@ -39,7 +39,7 @@ std::unique_ptr<ir::Graph> DepthwiseConvMKLDNNPass::ApplyImpl(
int found_depthwise_conv_mkldnn_count = 0; int found_depthwise_conv_mkldnn_count = 0;
auto handler = [&](const GraphPatternDetector::subgraph_t& subgraph, auto handler = [&](const GraphPatternDetector::subgraph_t& subgraph,
Graph* g) { Graph* g) {
VLOG(3) << "handle DepthwiseConvMKLDNN fuse"; VLOG(30) << "handle DepthwiseConvMKLDNN fuse";
GET_NODE(depthwise_conv, (*pattern)); GET_NODE(depthwise_conv, (*pattern));
depthwise_conv->Op()->SetType("conv2d"); depthwise_conv->Op()->SetType("conv2d");
found_depthwise_conv_mkldnn_count++; found_depthwise_conv_mkldnn_count++;
......
...@@ -39,7 +39,7 @@ std::unique_ptr<ir::Graph> FCFusePass::ApplyImpl( ...@@ -39,7 +39,7 @@ std::unique_ptr<ir::Graph> FCFusePass::ApplyImpl(
int found_fc_count = 0; int found_fc_count = 0;
auto handler = [&](const GraphPatternDetector::subgraph_t& subgraph, auto handler = [&](const GraphPatternDetector::subgraph_t& subgraph,
Graph* g) { Graph* g) {
VLOG(4) << "handle FC fuse"; VLOG(40) << "handle FC fuse";
GET_IR_NODE_FROM_SUBGRAPH(w, w, fc_pattern); GET_IR_NODE_FROM_SUBGRAPH(w, w, fc_pattern);
GET_IR_NODE_FROM_SUBGRAPH(fc_bias, bias, fc_pattern); GET_IR_NODE_FROM_SUBGRAPH(fc_bias, bias, fc_pattern);
GET_IR_NODE_FROM_SUBGRAPH(fc_out, Out, fc_pattern); GET_IR_NODE_FROM_SUBGRAPH(fc_out, Out, fc_pattern);
......
...@@ -61,7 +61,7 @@ std::unique_ptr<ir::Graph> FuseElewiseAddActPass::FuseElewiseAddAct( ...@@ -61,7 +61,7 @@ std::unique_ptr<ir::Graph> FuseElewiseAddActPass::FuseElewiseAddAct(
auto handler = [&](const GraphPatternDetector::subgraph_t &subgraph, auto handler = [&](const GraphPatternDetector::subgraph_t &subgraph,
Graph *g) { Graph *g) {
VLOG(4) << "handle FuseElewiseAddAct fuse"; VLOG(40) << "handle FuseElewiseAddAct fuse";
GET_IR_NODE_FROM_SUBGRAPH(ele_y, ele_y, elewise_add_act_pattern); GET_IR_NODE_FROM_SUBGRAPH(ele_y, ele_y, elewise_add_act_pattern);
GET_IR_NODE_FROM_SUBGRAPH(ele_out, elewise_add_out, GET_IR_NODE_FROM_SUBGRAPH(ele_out, elewise_add_out,
elewise_add_act_pattern); elewise_add_act_pattern);
...@@ -77,10 +77,10 @@ std::unique_ptr<ir::Graph> FuseElewiseAddActPass::FuseElewiseAddAct( ...@@ -77,10 +77,10 @@ std::unique_ptr<ir::Graph> FuseElewiseAddActPass::FuseElewiseAddAct(
Node *elewise_add_act_node = CreateFuseElewiseAddActNode( Node *elewise_add_act_node = CreateFuseElewiseAddActNode(
g, act, ele_add, ele_x_n, ele_y_n, ele_out_n, act_out_n); g, act, ele_add, ele_x_n, ele_y_n, ele_out_n, act_out_n);
VLOG(4) << "\n\t " << ele_x_n << " and " << ele_y_n << " -> " VLOG(40) << "\n\t " << ele_x_n << " and " << ele_y_n << " -> "
<< ele_add->Name() << " -> " << ele_out_n << "\n" << ele_add->Name() << " -> " << ele_out_n << "\n"
<< "\t " << ele_out_n << " -> " << act->Name() << " -> " << "\t " << ele_out_n << " -> " << act->Name() << " -> "
<< act_out_n; << act_out_n;
ReLinkNodes(g, ele_out, ele_add, act, elewise_add_act_node); ReLinkNodes(g, ele_out, ele_add, act, elewise_add_act_node);
found_elewise_add_act_count++; found_elewise_add_act_count++;
...@@ -113,7 +113,7 @@ std::unique_ptr<ir::Graph> FuseElewiseAddActPass::FuseActElewiseAdd( ...@@ -113,7 +113,7 @@ std::unique_ptr<ir::Graph> FuseElewiseAddActPass::FuseActElewiseAdd(
auto handler = [&](const GraphPatternDetector::subgraph_t &subgraph, auto handler = [&](const GraphPatternDetector::subgraph_t &subgraph,
Graph *g) { Graph *g) {
VLOG(4) << "handle FuseElewiseAddAct fuse"; VLOG(40) << "handle FuseElewiseAddAct fuse";
GET_IR_NODE_FROM_SUBGRAPH(act_out, act_out, act_elewise_add_pattern); GET_IR_NODE_FROM_SUBGRAPH(act_out, act_out, act_elewise_add_pattern);
GET_IR_NODE_FROM_SUBGRAPH(ele_x, ele_x, act_elewise_add_pattern); GET_IR_NODE_FROM_SUBGRAPH(ele_x, ele_x, act_elewise_add_pattern);
GET_IR_NODE_FROM_SUBGRAPH(ele_out, elewise_add_out, GET_IR_NODE_FROM_SUBGRAPH(ele_out, elewise_add_out,
...@@ -129,9 +129,9 @@ std::unique_ptr<ir::Graph> FuseElewiseAddActPass::FuseActElewiseAdd( ...@@ -129,9 +129,9 @@ std::unique_ptr<ir::Graph> FuseElewiseAddActPass::FuseActElewiseAdd(
Node *elewise_add_act_node = CreateFuseElewiseAddActNode( Node *elewise_add_act_node = CreateFuseElewiseAddActNode(
g, ele_add, act, elewise_add_x_n, act_i_n, act_o_n, elewise_add_out_n); g, ele_add, act, elewise_add_x_n, act_i_n, act_o_n, elewise_add_out_n);
VLOG(4) << "\n\t " << act_i_n << " -> " << act->Name() << " -> " << act_o_n VLOG(40) << "\n\t " << act_i_n << " -> " << act->Name() << " -> " << act_o_n
<< "\n\t " << act_o_n << " and " << elewise_add_x_n << " -> " << "\n\t " << act_o_n << " and " << elewise_add_x_n << " -> "
<< ele_add->Name() << " -> " << elewise_add_out_n; << ele_add->Name() << " -> " << elewise_add_out_n;
ReLinkNodes(g, act_out, act, ele_add, elewise_add_act_node); ReLinkNodes(g, act_out, act, ele_add, elewise_add_act_node);
found_elewise_add_act_count++; found_elewise_add_act_count++;
...@@ -165,7 +165,7 @@ std::unique_ptr<ir::Graph> FuseElewiseAddActPass::FuseElewiseAddActInplaceGrad( ...@@ -165,7 +165,7 @@ std::unique_ptr<ir::Graph> FuseElewiseAddActPass::FuseElewiseAddActInplaceGrad(
auto handler = [&](const GraphPatternDetector::subgraph_t &subgraph, auto handler = [&](const GraphPatternDetector::subgraph_t &subgraph,
Graph *g) { Graph *g) {
VLOG(4) << "handle FuseElewiseAddActGrad1 fuse"; VLOG(40) << "handle FuseElewiseAddActGrad1 fuse";
GET_IR_NODE_FROM_SUBGRAPH(act_out, act_out, elewise_add_act_grad_pattern); GET_IR_NODE_FROM_SUBGRAPH(act_out, act_out, elewise_add_act_grad_pattern);
GET_IR_NODE_FROM_SUBGRAPH(act_grad, act_grad, elewise_add_act_grad_pattern); GET_IR_NODE_FROM_SUBGRAPH(act_grad, act_grad, elewise_add_act_grad_pattern);
GET_IR_NODE_FROM_SUBGRAPH(d_itermediate_out, d_itermediate_out, GET_IR_NODE_FROM_SUBGRAPH(d_itermediate_out, d_itermediate_out,
...@@ -208,10 +208,10 @@ std::unique_ptr<ir::Graph> FuseElewiseAddActPass::FuseElewiseAddActInplaceGrad( ...@@ -208,10 +208,10 @@ std::unique_ptr<ir::Graph> FuseElewiseAddActPass::FuseElewiseAddActInplaceGrad(
auto fused_node = g->CreateOpNode(&desc); auto fused_node = g->CreateOpNode(&desc);
VLOG(4) << "\n\t " << d_act_out_n << " and " << act_out_n << " -> " VLOG(40) << "\n\t " << d_act_out_n << " and " << act_out_n << " -> "
<< act_grad->Name() << " -> " << d_itermediate_out_n << "\n\t " << act_grad->Name() << " -> " << d_itermediate_out_n << "\n\t "
<< d_itermediate_out_n << " and " << act_out_n << " -> " << d_itermediate_out_n << " and " << act_out_n << " -> "
<< ele_add_grad->Name() << " -> " << d_itermediate_out_n; << ele_add_grad->Name() << " -> " << d_itermediate_out_n;
ReLinkNodes(g, d_itermediate_out, act_grad, ele_add_grad, fused_node); ReLinkNodes(g, d_itermediate_out, act_grad, ele_add_grad, fused_node);
found_elewise_add_act_count++; found_elewise_add_act_count++;
......
...@@ -92,7 +92,7 @@ Graph::Graph(const ProgramDesc &program) : program_(program) { ...@@ -92,7 +92,7 @@ Graph::Graph(const ProgramDesc &program) : program_(program) {
std::map<std::string, std::vector<ir::Node *>> Graph::InitFromProgram( std::map<std::string, std::vector<ir::Node *>> Graph::InitFromProgram(
const ProgramDesc &program) { const ProgramDesc &program) {
VLOG(3) << "block in program:" << program_.Size(); VLOG(30) << "block in program:" << program_.Size();
std::unordered_map<std::string, VarDesc *> all_vars; std::unordered_map<std::string, VarDesc *> all_vars;
// var nodes for each var name, will have multiple versions in SSA // var nodes for each var name, will have multiple versions in SSA
std::map<std::string, std::vector<ir::Node *>> var_nodes; std::map<std::string, std::vector<ir::Node *>> var_nodes;
...@@ -160,7 +160,7 @@ void Graph::ResolveHazard( ...@@ -160,7 +160,7 @@ void Graph::ResolveHazard(
auto it_old = versions.rbegin(); auto it_old = versions.rbegin();
++it_old; ++it_old;
for (; it_old != versions.rend(); it_new = it_old, ++it_old) { for (; it_old != versions.rend(); it_new = it_old, ++it_old) {
VLOG(3) << "deal with var: " << (*it_new)->Name(); VLOG(30) << "deal with var: " << (*it_new)->Name();
ir::Node *write_op = ir::Node *write_op =
(*it_new)->inputs.empty() ? nullptr : (*it_new)->inputs[0]; (*it_new)->inputs.empty() ? nullptr : (*it_new)->inputs[0];
const auto &read_ops = (*it_old)->outputs; const auto &read_ops = (*it_old)->outputs;
......
...@@ -89,7 +89,7 @@ class Graph { ...@@ -89,7 +89,7 @@ class Graph {
attr_name); attr_name);
attrs_[attr_name] = attr; attrs_[attr_name] = attr;
attr_dels_[attr_name] = [attr, attr_name]() { attr_dels_[attr_name] = [attr, attr_name]() {
VLOG(3) << "deleting " << attr_name; VLOG(30) << "deleting " << attr_name;
delete attr; delete attr;
}; };
} }
......
...@@ -33,8 +33,9 @@ void SortHelper( ...@@ -33,8 +33,9 @@ void SortHelper(
} }
} }
VLOG(3) << "topology sort insert: " << node->Name() VLOG(30) << "topology sort insert: " << node->Name()
<< reinterpret_cast<void *>(node) << " input " << node->inputs.size(); << reinterpret_cast<void *>(node) << " input "
<< node->inputs.size();
ret->push_back(node); ret->push_back(node);
} }
...@@ -103,9 +104,9 @@ std::map<ir::Node *, std::unordered_set<ir::Node *>> BuildOperationAdjList( ...@@ -103,9 +104,9 @@ std::map<ir::Node *, std::unordered_set<ir::Node *>> BuildOperationAdjList(
for (auto &var : n->inputs) { for (auto &var : n->inputs) {
for (auto &adj_n : var->inputs) { for (auto &adj_n : var->inputs) {
PADDLE_ENFORCE(adj_n->NodeType() == ir::Node::Type::kOperation); PADDLE_ENFORCE(adj_n->NodeType() == ir::Node::Type::kOperation);
VLOG(4) << "adj " << adj_n->Name() << reinterpret_cast<void *>(adj_n) VLOG(40) << "adj " << adj_n->Name() << reinterpret_cast<void *>(adj_n)
<< " -> " << n->Name() << reinterpret_cast<void *>(n) << " -> " << n->Name() << reinterpret_cast<void *>(n)
<< " via " << var->Name() << reinterpret_cast<void *>(var); << " via " << var->Name() << reinterpret_cast<void *>(var);
adj_list[n].insert(adj_n); adj_list[n].insert(adj_n);
} }
} }
...@@ -163,10 +164,10 @@ size_t GraphNum(const Graph &graph) { ...@@ -163,10 +164,10 @@ size_t GraphNum(const Graph &graph) {
graph_nodes.emplace_back(g_nodes); graph_nodes.emplace_back(g_nodes);
} }
if (VLOG_IS_ON(10)) { if (VLOG_IS_ON(100)) {
VLOG(10) << "graph_num: " << graph_nodes.size(); VLOG(100) << "graph_num: " << graph_nodes.size();
for (auto &g_n : graph_nodes) { for (auto &g_n : graph_nodes) {
VLOG(10) << "graph_nodes: " << g_n.size(); VLOG(100) << "graph_nodes: " << g_n.size();
if (g_n.size() < 10) { if (g_n.size() < 10) {
std::stringstream out; std::stringstream out;
for (auto &node : g_n) { for (auto &node : g_n) {
...@@ -180,7 +181,7 @@ size_t GraphNum(const Graph &graph) { ...@@ -180,7 +181,7 @@ size_t GraphNum(const Graph &graph) {
} }
out << "]"; out << "]";
} }
VLOG(10) << out.str(); VLOG(100) << out.str();
} }
} }
} }
......
...@@ -92,19 +92,19 @@ void GraphPatternDetector::operator()(Graph *graph, ...@@ -92,19 +92,19 @@ void GraphPatternDetector::operator()(Graph *graph,
PrettyLogEndl(Style::detail(), "--- detect %d subgraphs", subgraphs.size()); PrettyLogEndl(Style::detail(), "--- detect %d subgraphs", subgraphs.size());
int id = 0; int id = 0;
for (auto &g : subgraphs) { for (auto &g : subgraphs) {
VLOG(3) << "optimizing #" << id++ << " subgraph"; VLOG(30) << "optimizing #" << id++ << " subgraph";
handler(g, graph); handler(g, graph);
} }
} }
bool GraphPatternDetector::MarkPDNodesInGraph(const ir::Graph &graph) { bool GraphPatternDetector::MarkPDNodesInGraph(const ir::Graph &graph) {
VLOG(3) << "mark pdnodes in graph"; VLOG(30) << "mark pdnodes in graph";
if (graph.Nodes().empty()) return false; if (graph.Nodes().empty()) return false;
for (auto &node : GraphTraits::DFS(graph)) { for (auto &node : GraphTraits::DFS(graph)) {
for (const auto &pdnode : pattern_.nodes()) { for (const auto &pdnode : pattern_.nodes()) {
if (pdnode->Tell(&node)) { if (pdnode->Tell(&node)) {
VLOG(4) << "pdnode " << pdnode->name() << " marked"; VLOG(40) << "pdnode " << pdnode->name() << " marked";
pdnodes2nodes_[pdnode.get()].insert(&node); pdnodes2nodes_[pdnode.get()].insert(&node);
} }
} }
...@@ -112,7 +112,7 @@ bool GraphPatternDetector::MarkPDNodesInGraph(const ir::Graph &graph) { ...@@ -112,7 +112,7 @@ bool GraphPatternDetector::MarkPDNodesInGraph(const ir::Graph &graph) {
// Check to early stop if some PDNode can't find matched Node. // Check to early stop if some PDNode can't find matched Node.
for (auto &pdnode : pattern_.nodes()) { for (auto &pdnode : pattern_.nodes()) {
if (!pdnodes2nodes_.count(pdnode.get())) { if (!pdnodes2nodes_.count(pdnode.get())) {
VLOG(4) << pdnode->name() << " can't find matched Node, early stop"; VLOG(40) << pdnode->name() << " can't find matched Node, early stop";
// return false; // return false;
} }
} }
...@@ -121,7 +121,7 @@ bool GraphPatternDetector::MarkPDNodesInGraph(const ir::Graph &graph) { ...@@ -121,7 +121,7 @@ bool GraphPatternDetector::MarkPDNodesInGraph(const ir::Graph &graph) {
GetMarkedNodes(const_cast<Graph *>(&graph)).insert(n); GetMarkedNodes(const_cast<Graph *>(&graph)).insert(n);
} }
} }
VLOG(3) << pdnodes2nodes_.size() << " nodes marked"; VLOG(30) << pdnodes2nodes_.size() << " nodes marked";
return !pdnodes2nodes_.empty(); return !pdnodes2nodes_.empty();
} }
...@@ -214,7 +214,7 @@ GraphPatternDetector::DetectPatterns() { ...@@ -214,7 +214,7 @@ GraphPatternDetector::DetectPatterns() {
// Extend a PDNode to subgraphs by deducing the connection relations defined // Extend a PDNode to subgraphs by deducing the connection relations defined
// in edges of PDNodes. // in edges of PDNodes.
for (const auto &edge : pattern_.edges()) { for (const auto &edge : pattern_.edges()) {
VLOG(4) << "check " << edge.first->name() << " -> " << edge.second->name(); VLOG(40) << "check " << edge.first->name() << " -> " << edge.second->name();
// TODO(Superjomn) Fix bug here, the groups might be duplicate here. // TODO(Superjomn) Fix bug here, the groups might be duplicate here.
// Each role has two PDNodes, which indicates two roles. // Each role has two PDNodes, which indicates two roles.
// Detect two Nodes that can match these two roles and they are connected. // Detect two Nodes that can match these two roles and they are connected.
...@@ -225,7 +225,7 @@ GraphPatternDetector::DetectPatterns() { ...@@ -225,7 +225,7 @@ GraphPatternDetector::DetectPatterns() {
// source -> target // source -> target
for (Node *source : pdnodes2nodes_[edge.first]) { for (Node *source : pdnodes2nodes_[edge.first]) {
for (Node *target : pdnodes2nodes_[edge.second]) { for (Node *target : pdnodes2nodes_[edge.second]) {
VLOG(8) << "check " << source->id() << " -- " << target->id(); VLOG(80) << "check " << source->id() << " -- " << target->id();
// TODO(Superjomn) add some prune strategies. // TODO(Superjomn) add some prune strategies.
for (const auto &group : pre_groups) { for (const auto &group : pre_groups) {
HitGroup new_group = group; HitGroup new_group = group;
...@@ -241,12 +241,13 @@ GraphPatternDetector::DetectPatterns() { ...@@ -241,12 +241,13 @@ GraphPatternDetector::DetectPatterns() {
} }
} }
} }
VLOG(3) << "step " << step << " get records: " << cur_groups.size(); VLOG(30) << "step " << step << " get records: " << cur_groups.size();
for (auto &group : cur_groups) { for (auto &group : cur_groups) {
for (auto &item : group.roles) { for (auto &item : group.roles) {
VLOG(4) << "node " << item.second->id() << " as " << item.first->name(); VLOG(40) << "node " << item.second->id() << " as "
<< item.first->name();
} }
VLOG(4) << "========================================================="; VLOG(40) << "=========================================================";
} }
} }
......
...@@ -41,7 +41,7 @@ std::string FormatName(const Node* node) { ...@@ -41,7 +41,7 @@ std::string FormatName(const Node* node) {
std::unique_ptr<ir::Graph> GraphVizPass::ApplyImpl( std::unique_ptr<ir::Graph> GraphVizPass::ApplyImpl(
std::unique_ptr<ir::Graph> graph) const { std::unique_ptr<ir::Graph> graph) const {
const std::string graph_viz_path = Get<std::string>(kGraphVizPath); const std::string graph_viz_path = Get<std::string>(kGraphVizPath);
VLOG(3) << "draw IR graph viz to " << graph_viz_path; VLOG(30) << "draw IR graph viz to " << graph_viz_path;
std::unique_ptr<std::ostream> fout(new std::ofstream(graph_viz_path)); std::unique_ptr<std::ostream> fout(new std::ofstream(graph_viz_path));
PADDLE_ENFORCE(fout->good()); PADDLE_ENFORCE(fout->good());
std::ostream& sout = *fout; std::ostream& sout = *fout;
......
...@@ -20,7 +20,7 @@ namespace ir { ...@@ -20,7 +20,7 @@ namespace ir {
std::unique_ptr<ir::Graph> MKLDNNPlacementPass::ApplyImpl( std::unique_ptr<ir::Graph> MKLDNNPlacementPass::ApplyImpl(
std::unique_ptr<ir::Graph> graph) const { std::unique_ptr<ir::Graph> graph) const {
VLOG(3) << "Aplies MKL-DNN placement strategy."; VLOG(30) << "Aplies MKL-DNN placement strategy.";
for (const Node* n : graph->Nodes()) { for (const Node* n : graph->Nodes()) {
if (n->IsOp() && n->Op()->HasAttr("use_mkldnn")) { if (n->IsOp() && n->Op()->HasAttr("use_mkldnn")) {
n->Op()->SetAttr("use_mkldnn", true); n->Op()->SetAttr("use_mkldnn", true);
......
...@@ -62,7 +62,7 @@ VarDesc UpdateGradVarDesc( ...@@ -62,7 +62,7 @@ VarDesc UpdateGradVarDesc(
string::Sprintf("%s.repeat.%d", var_desc->Name(), repeat); string::Sprintf("%s.repeat.%d", var_desc->Name(), repeat);
VarDesc repeated_var = CopyVarDesc(var_desc); VarDesc repeated_var = CopyVarDesc(var_desc);
repeated_var.SetName(new_gname); repeated_var.SetName(new_gname);
VLOG(3) << "update " << var_desc->Name() << " to repeat " << repeat; VLOG(30) << "update " << var_desc->Name() << " to repeat " << repeat;
return repeated_var; return repeated_var;
} }
return *var_desc; return *var_desc;
...@@ -78,7 +78,7 @@ std::unique_ptr<Graph> BatchMergePass::ApplyImpl( ...@@ -78,7 +78,7 @@ std::unique_ptr<Graph> BatchMergePass::ApplyImpl(
std::vector<ir::Node*> nodes = TopologySortOperations(*graph); std::vector<ir::Node*> nodes = TopologySortOperations(*graph);
auto origin_nodes = graph->ReleaseNodes(); auto origin_nodes = graph->ReleaseNodes();
VLOG(3) << "origin nodes count: " << origin_nodes.size(); VLOG(30) << "origin nodes count: " << origin_nodes.size();
ir::Graph& result = *graph; ir::Graph& result = *graph;
// 1. record op nodes of different roles // 1. record op nodes of different roles
...@@ -137,8 +137,8 @@ std::unique_ptr<Graph> BatchMergePass::ApplyImpl( ...@@ -137,8 +137,8 @@ std::unique_ptr<Graph> BatchMergePass::ApplyImpl(
"%s.repeat.%d", repeated_op.Input("Variance")[0], i); "%s.repeat.%d", repeated_op.Input("Variance")[0], i);
bn_vars_need_rename.insert(repeated_op.Input("Mean")[0]); bn_vars_need_rename.insert(repeated_op.Input("Mean")[0]);
bn_vars_need_rename.insert(repeated_op.Input("Variance")[0]); bn_vars_need_rename.insert(repeated_op.Input("Variance")[0]);
VLOG(3) << "renaming " << repeated_op.Input("Mean")[0] << " to " VLOG(30) << "renaming " << repeated_op.Input("Mean")[0] << " to "
<< new_mean_name; << new_mean_name;
repeated_op.RenameInput(repeated_op.Input("Mean")[0], new_mean_name); repeated_op.RenameInput(repeated_op.Input("Mean")[0], new_mean_name);
repeated_op.RenameInput(repeated_op.Input("Variance")[0], new_var_name); repeated_op.RenameInput(repeated_op.Input("Variance")[0], new_var_name);
repeated_op.RenameOutput(repeated_op.Output("MeanOut")[0], repeated_op.RenameOutput(repeated_op.Output("MeanOut")[0],
......
...@@ -76,7 +76,7 @@ class Pass { ...@@ -76,7 +76,7 @@ class Pass {
attr_name); attr_name);
attrs_[attr_name] = attr; attrs_[attr_name] = attr;
attr_dels_[attr_name] = [attr, attr_name]() { attr_dels_[attr_name] = [attr, attr_name]() {
VLOG(3) << "deleting " << attr_name; VLOG(30) << "deleting " << attr_name;
delete attr; delete attr;
}; };
} }
......
...@@ -12,10 +12,13 @@ ...@@ -12,10 +12,13 @@
// See the License for the specific language governing permissions and // See the License for the specific language governing permissions and
// limitations under the License. // limitations under the License.
#include "paddle/fluid/framework/ir/seq_concat_fc_fuse_pass.h" #include <set>
#include <string>
#include "paddle/fluid/framework/ir/fuse_pass_base.h" #include "paddle/fluid/framework/ir/fuse_pass_base.h"
#include "paddle/fluid/framework/ir/graph_pattern_detector.h" #include "paddle/fluid/framework/ir/graph_pattern_detector.h"
#include "paddle/fluid/framework/ir/graph_viz_pass.h" #include "paddle/fluid/framework/ir/graph_viz_pass.h"
#include "paddle/fluid/framework/ir/seq_concat_fc_fuse_pass.h"
#include "paddle/fluid/framework/lod_tensor.h" #include "paddle/fluid/framework/lod_tensor.h"
namespace paddle { namespace paddle {
...@@ -159,10 +162,7 @@ PDNode* BuildFCPattern(PDPattern* pattern, PDNode* fc_x) { ...@@ -159,10 +162,7 @@ PDNode* BuildFCPattern(PDPattern* pattern, PDNode* fc_x) {
std::set<std::string> acts({"sigmoid", "tanh", "relu", "identity"}); std::set<std::string> acts({"sigmoid", "tanh", "relu", "identity"});
PDNode* act = pattern->NewNode( PDNode* act = pattern->NewNode(
[=](Node* x) { [=](Node* x) { return x && x->IsOp() && acts.count(x->Op()->Type()); },
return x && x->IsOp() && acts.count(x->Op()->Type());
},
"act"); "act");
PDNode* fc_out = pattern->NewNode( PDNode* fc_out = pattern->NewNode(
...@@ -196,7 +196,7 @@ std::unique_ptr<ir::Graph> SeqConcatFcFusePass::ApplyImpl( ...@@ -196,7 +196,7 @@ std::unique_ptr<ir::Graph> SeqConcatFcFusePass::ApplyImpl(
detector(graph.get(), [&](const GraphPatternDetector::subgraph_t& subgraph, detector(graph.get(), [&](const GraphPatternDetector::subgraph_t& subgraph,
Graph* graph) { Graph* graph) {
VLOG(4) << "get one concat pattern"; VLOG(40) << "get one concat pattern";
// fc // fc
GET_NODE(fc_w, detector.pattern()); GET_NODE(fc_w, detector.pattern());
GET_NODE(fc_bias, detector.pattern()); GET_NODE(fc_bias, detector.pattern());
......
...@@ -60,7 +60,7 @@ int BuildFusion(Graph* graph, const std::string& name_scope, Scope* scope) { ...@@ -60,7 +60,7 @@ int BuildFusion(Graph* graph, const std::string& name_scope, Scope* scope) {
auto handler = [&](const GraphPatternDetector::subgraph_t& subgraph, auto handler = [&](const GraphPatternDetector::subgraph_t& subgraph,
Graph* g) { Graph* g) {
VLOG(4) << "handle SeqConv EltAdd Relu fuse"; VLOG(40) << "handle SeqConv EltAdd Relu fuse";
GET_IR_NODE_FROM_SUBGRAPH(seqconv, seqconv, fuse_pattern); GET_IR_NODE_FROM_SUBGRAPH(seqconv, seqconv, fuse_pattern);
GET_IR_NODE_FROM_SUBGRAPH(seqconv_weight, seqconv_weight, fuse_pattern); GET_IR_NODE_FROM_SUBGRAPH(seqconv_weight, seqconv_weight, fuse_pattern);
GET_IR_NODE_FROM_SUBGRAPH(seqconv_out, seqconv_out, fuse_pattern); GET_IR_NODE_FROM_SUBGRAPH(seqconv_out, seqconv_out, fuse_pattern);
......
...@@ -31,7 +31,7 @@ void LoDRankTable::Reset(const LoD& lod, size_t level) { ...@@ -31,7 +31,7 @@ void LoDRankTable::Reset(const LoD& lod, size_t level) {
TableItem item; TableItem item;
item.index = i; item.index = i;
item.length = vec[i + 1] - vec[i]; item.length = vec[i + 1] - vec[i];
VLOG(10) << "Add item to rank table " << item.index << " " << item.length; VLOG(100) << "Add item to rank table " << item.index << " " << item.length;
items_.emplace_back(item); items_.emplace_back(item);
} }
// NOTE(yuyang18): // NOTE(yuyang18):
......
...@@ -51,7 +51,7 @@ TEST(mixed_vector, InitWithCount) { ...@@ -51,7 +51,7 @@ TEST(mixed_vector, InitWithCount) {
TEST(mixed_vector, ForEach) { TEST(mixed_vector, ForEach) {
vec<int> tmp; vec<int> tmp;
for (auto& v : tmp) { for (auto& v : tmp) {
VLOG(3) << v; VLOG(30) << v;
} }
} }
......
...@@ -71,7 +71,7 @@ void NaiveExecutor::Prepare(Scope *parent_scope, ...@@ -71,7 +71,7 @@ void NaiveExecutor::Prepare(Scope *parent_scope,
void NaiveExecutor::Run() { void NaiveExecutor::Run() {
for (auto &op : ops_) { for (auto &op : ops_) {
VLOG(4) << "run " << op->Type(); VLOG(40) << "run " << op->Type();
op->Run(*scope_, place_); op->Run(*scope_, place_);
} }
} }
...@@ -95,21 +95,21 @@ void NaiveExecutor::CreateVariables(const ProgramDesc &desc, Scope *scope, ...@@ -95,21 +95,21 @@ void NaiveExecutor::CreateVariables(const ProgramDesc &desc, Scope *scope,
if (var->Persistable()) { if (var->Persistable()) {
auto *ptr = const_cast<Scope *>(ancestor_scope)->Var(var->Name()); auto *ptr = const_cast<Scope *>(ancestor_scope)->Var(var->Name());
InitializeVariable(ptr, var->GetType()); InitializeVariable(ptr, var->GetType());
VLOG(3) << "Create Variable " << var->Name() VLOG(30) << "Create Variable " << var->Name()
<< " global, which pointer is " << ptr; << " global, which pointer is " << ptr;
} else { // Create temporary variables in local scope. } else { // Create temporary variables in local scope.
auto *ptr = scope->Var(var->Name()); auto *ptr = scope->Var(var->Name());
InitializeVariable(ptr, var->GetType()); InitializeVariable(ptr, var->GetType());
VLOG(3) << "Create Variable " << var->Name() VLOG(30) << "Create Variable " << var->Name()
<< " locally, which pointer is " << ptr; << " locally, which pointer is " << ptr;
} }
} }
} else { } else {
for (auto &var : global_block.AllVars()) { for (auto &var : global_block.AllVars()) {
auto *ptr = scope->Var(var->Name()); auto *ptr = scope->Var(var->Name());
InitializeVariable(ptr, var->GetType()); InitializeVariable(ptr, var->GetType());
VLOG(3) << "Create variable " << var->Name() << ", which pointer is " VLOG(30) << "Create variable " << var->Name() << ", which pointer is "
<< ptr; << ptr;
} }
} }
} }
......
...@@ -82,7 +82,7 @@ class CompileTimeInferShapeContext : public InferShapeContext { ...@@ -82,7 +82,7 @@ class CompileTimeInferShapeContext : public InferShapeContext {
auto *in_var = block_.FindVarRecursive(Inputs(in)[i]); auto *in_var = block_.FindVarRecursive(Inputs(in)[i]);
auto *out_var = block_.FindVarRecursive(Outputs(out)[j]); auto *out_var = block_.FindVarRecursive(Outputs(out)[j]);
if (in_var->GetType() != proto::VarType::LOD_TENSOR) { if (in_var->GetType() != proto::VarType::LOD_TENSOR) {
VLOG(3) << "input " << in << " is not LodTensor"; VLOG(30) << "input " << in << " is not LodTensor";
return; return;
} }
out_var->SetLoDLevel(in_var->GetLoDLevel()); out_var->SetLoDLevel(in_var->GetLoDLevel());
...@@ -241,32 +241,32 @@ void OpDesc::SetAttr(const std::string &name, const Attribute &v) { ...@@ -241,32 +241,32 @@ void OpDesc::SetAttr(const std::string &name, const Attribute &v) {
const proto::OpProto::Attr &attr = GetProtoAttr(name); const proto::OpProto::Attr &attr = GetProtoAttr(name);
switch (attr.type()) { switch (attr.type()) {
case proto::AttrType::BOOLEANS: { case proto::AttrType::BOOLEANS: {
VLOG(11) << "SetAttr: " << Type() << ", " << name VLOG(110) << "SetAttr: " << Type() << ", " << name
<< " from INTS to BOOLEANS"; << " from INTS to BOOLEANS";
this->attrs_[name] = std::vector<bool>(); this->attrs_[name] = std::vector<bool>();
break; break;
} }
case proto::AttrType::INTS: { case proto::AttrType::INTS: {
VLOG(11) << "SetAttr: " << Type() << ", " << name VLOG(110) << "SetAttr: " << Type() << ", " << name
<< " from INTS to INTS"; << " from INTS to INTS";
this->attrs_[name] = std::vector<int>(); this->attrs_[name] = std::vector<int>();
break; break;
} }
case proto::AttrType::FLOATS: { case proto::AttrType::FLOATS: {
VLOG(11) << "SetAttr: " << Type() << ", " << name VLOG(110) << "SetAttr: " << Type() << ", " << name
<< " from INTS to FLOATS"; << " from INTS to FLOATS";
this->attrs_[name] = std::vector<float>(); this->attrs_[name] = std::vector<float>();
break; break;
} }
case proto::AttrType::STRINGS: { case proto::AttrType::STRINGS: {
VLOG(11) << "SetAttr: " << Type() << ", " << name VLOG(110) << "SetAttr: " << Type() << ", " << name
<< " from INTS to STRINGS"; << " from INTS to STRINGS";
this->attrs_[name] = std::vector<std::string>(); this->attrs_[name] = std::vector<std::string>();
break; break;
} }
case proto::AttrType::BLOCKS: { case proto::AttrType::BLOCKS: {
VLOG(11) << "SetAttr: " << Type() << ", " << name VLOG(110) << "SetAttr: " << Type() << ", " << name
<< " from INTS to BLOCKS"; << " from INTS to BLOCKS";
this->SetBlocksAttr(name, std::vector<BlockDesc *>()); this->SetBlocksAttr(name, std::vector<BlockDesc *>());
return; return;
} }
...@@ -499,13 +499,13 @@ void OpDesc::CheckAttrs() { ...@@ -499,13 +499,13 @@ void OpDesc::CheckAttrs() {
} }
void OpDesc::InferShape(const BlockDesc &block) const { void OpDesc::InferShape(const BlockDesc &block) const {
VLOG(3) << "CompileTime infer shape on " << Type(); VLOG(30) << "CompileTime infer shape on " << Type();
InitInferShapeFuncs(); InitInferShapeFuncs();
auto &infer_shape = OpInfoMap::Instance().Get(this->Type()).infer_shape_; auto &infer_shape = OpInfoMap::Instance().Get(this->Type()).infer_shape_;
PADDLE_ENFORCE(static_cast<bool>(infer_shape), PADDLE_ENFORCE(static_cast<bool>(infer_shape),
"%s's infer_shape has not been registered", this->Type()); "%s's infer_shape has not been registered", this->Type());
CompileTimeInferShapeContext ctx(*this, block); CompileTimeInferShapeContext ctx(*this, block);
if (VLOG_IS_ON(10)) { if (VLOG_IS_ON(100)) {
std::ostringstream sout; std::ostringstream sout;
auto inames = this->InputArgumentNames(); auto inames = this->InputArgumentNames();
sout << " From ["; sout << " From [";
...@@ -516,7 +516,7 @@ void OpDesc::InferShape(const BlockDesc &block) const { ...@@ -516,7 +516,7 @@ void OpDesc::InferShape(const BlockDesc &block) const {
std::copy(onames.begin(), onames.end(), std::copy(onames.begin(), onames.end(),
std::ostream_iterator<std::string>(sout, ", ")); std::ostream_iterator<std::string>(sout, ", "));
sout << "]"; sout << "]";
VLOG(10) << sout.str(); VLOG(100) << sout.str();
} }
infer_shape(&ctx); infer_shape(&ctx);
} }
...@@ -607,7 +607,7 @@ DDim CompileTimeInferShapeContext::GetDim(const std::string &name) const { ...@@ -607,7 +607,7 @@ DDim CompileTimeInferShapeContext::GetDim(const std::string &name) const {
auto shape = var->GetShape(); auto shape = var->GetShape();
res = shape.empty() ? make_ddim({0UL}) : make_ddim(shape); res = shape.empty() ? make_ddim({0UL}) : make_ddim(shape);
} catch (...) { } catch (...) {
VLOG(5) << "GetDim of variable " << name << " error"; VLOG(50) << "GetDim of variable " << name << " error";
std::rethrow_exception(std::current_exception()); std::rethrow_exception(std::current_exception());
} }
return res; return res;
...@@ -624,7 +624,7 @@ std::vector<DDim> CompileTimeInferShapeContext::GetRepeatedDims( ...@@ -624,7 +624,7 @@ std::vector<DDim> CompileTimeInferShapeContext::GetRepeatedDims(
res.push_back(s.empty() ? make_ddim({0UL}) : make_ddim(s)); res.push_back(s.empty() ? make_ddim({0UL}) : make_ddim(s));
} }
} catch (...) { } catch (...) {
VLOG(5) << "GetRepeatedDim of variable " << name << " error."; VLOG(50) << "GetRepeatedDim of variable " << name << " error.";
std::rethrow_exception(std::current_exception()); std::rethrow_exception(std::current_exception());
} }
return res; return res;
......
...@@ -46,9 +46,9 @@ static VariableNameMap ConvertOpDescVarsToVarNameMap( ...@@ -46,9 +46,9 @@ static VariableNameMap ConvertOpDescVarsToVarNameMap(
std::unique_ptr<OperatorBase> OpRegistry::CreateOp( std::unique_ptr<OperatorBase> OpRegistry::CreateOp(
const proto::OpDesc& op_desc) { const proto::OpDesc& op_desc) {
VLOG(1) << "CreateOp directly from OpDesc is deprecated. It should only be" VLOG(10) << "CreateOp directly from OpDesc is deprecated. It should only be"
"used in unit tests. Use CreateOp(const OpDesc& op_desc) " "used in unit tests. Use CreateOp(const OpDesc& op_desc) "
"instead."; "instead.";
VariableNameMap inputs = ConvertOpDescVarsToVarNameMap(op_desc.inputs()); VariableNameMap inputs = ConvertOpDescVarsToVarNameMap(op_desc.inputs());
VariableNameMap outputs = ConvertOpDescVarsToVarNameMap(op_desc.outputs()); VariableNameMap outputs = ConvertOpDescVarsToVarNameMap(op_desc.outputs());
AttributeMap attrs; AttributeMap attrs;
......
...@@ -140,7 +140,7 @@ static LoD GetLoD(const Scope& scope, const std::string& name) { ...@@ -140,7 +140,7 @@ static LoD GetLoD(const Scope& scope, const std::string& name) {
} }
void OperatorBase::Run(const Scope& scope, const platform::Place& place) { void OperatorBase::Run(const Scope& scope, const platform::Place& place) {
VLOG(4) << place << " " << DebugStringEx(&scope); VLOG(40) << place << " " << DebugStringEx(&scope);
if (platform::is_gpu_place(place)) { if (platform::is_gpu_place(place)) {
#ifndef PADDLE_WITH_CUDA #ifndef PADDLE_WITH_CUDA
PADDLE_THROW("Cannot run operator on place %s", place); PADDLE_THROW("Cannot run operator on place %s", place);
...@@ -160,7 +160,7 @@ void OperatorBase::Run(const Scope& scope, const platform::Place& place) { ...@@ -160,7 +160,7 @@ void OperatorBase::Run(const Scope& scope, const platform::Place& place) {
} else { } else {
RunImpl(scope, place); RunImpl(scope, place);
} }
VLOG(3) << place << " " << DebugStringEx(&scope); VLOG(30) << place << " " << DebugStringEx(&scope);
} }
bool OperatorBase::HasInputs(const std::string& name) const { bool OperatorBase::HasInputs(const std::string& name) const {
...@@ -715,14 +715,14 @@ void OperatorWithKernel::RunImpl(const Scope& scope, ...@@ -715,14 +715,14 @@ void OperatorWithKernel::RunImpl(const Scope& scope,
auto expected_kernel_key = auto expected_kernel_key =
this->GetExpectedKernelType(ExecutionContext(*this, scope, *dev_ctx)); this->GetExpectedKernelType(ExecutionContext(*this, scope, *dev_ctx));
VLOG(3) << "expected_kernel_key:" << expected_kernel_key; VLOG(30) << "expected_kernel_key:" << expected_kernel_key;
auto kernel_iter = kernels.find(expected_kernel_key); auto kernel_iter = kernels.find(expected_kernel_key);
#ifdef PADDLE_WITH_MKLDNN #ifdef PADDLE_WITH_MKLDNN
// workaround for missing MKLDNN kernel when FLAGS_use_mkldnn env var is set // workaround for missing MKLDNN kernel when FLAGS_use_mkldnn env var is set
if (kernel_iter == kernels.end() && if (kernel_iter == kernels.end() &&
expected_kernel_key.library_type_ == LibraryType::kMKLDNN) { expected_kernel_key.library_type_ == LibraryType::kMKLDNN) {
VLOG(3) << "missing MKLDNN kernel: fallbacking to PLAIN one"; VLOG(30) << "missing MKLDNN kernel: fallbacking to PLAIN one";
expected_kernel_key.library_type_ = LibraryType::kPlain; expected_kernel_key.library_type_ = LibraryType::kPlain;
expected_kernel_key.data_layout_ = DataLayout::kAnyLayout; expected_kernel_key.data_layout_ = DataLayout::kAnyLayout;
kernel_iter = kernels.find(expected_kernel_key); kernel_iter = kernels.find(expected_kernel_key);
...@@ -774,7 +774,8 @@ void OperatorWithKernel::TransferInplaceVarsBack( ...@@ -774,7 +774,8 @@ void OperatorWithKernel::TransferInplaceVarsBack(
const Scope& scope, const std::vector<std::string>& inplace_vars, const Scope& scope, const std::vector<std::string>& inplace_vars,
const Scope& transfer_scope) const { const Scope& transfer_scope) const {
for (auto& var_name : inplace_vars) { for (auto& var_name : inplace_vars) {
VLOG(3) << "share inplace var " + var_name + " back to it's original scope"; VLOG(30) << "share inplace var " + var_name +
" back to it's original scope";
auto* original_tensor = auto* original_tensor =
GetMutableLoDTensorOrSelectedRowsValueFromVar(scope.FindVar(var_name)); GetMutableLoDTensorOrSelectedRowsValueFromVar(scope.FindVar(var_name));
auto* var = transfer_scope.FindVar(var_name); auto* var = transfer_scope.FindVar(var_name);
...@@ -815,8 +816,8 @@ Scope* OperatorWithKernel::TryTransferData( ...@@ -815,8 +816,8 @@ Scope* OperatorWithKernel::TryTransferData(
transfered_inplace_vars->emplace_back(var_name); transfered_inplace_vars->emplace_back(var_name);
} }
VLOG(3) << "Transform Variable " << var_name << " from " VLOG(30) << "Transform Variable " << var_name << " from "
<< kernel_type_for_var << " to " << expected_kernel_key; << kernel_type_for_var << " to " << expected_kernel_key;
if (new_scope == nullptr) { if (new_scope == nullptr) {
new_scope = &scope.NewScope(); new_scope = &scope.NewScope();
......
...@@ -199,7 +199,7 @@ void ParallelExecutor::BCastParamsToDevices( ...@@ -199,7 +199,7 @@ void ParallelExecutor::BCastParamsToDevices(
auto &main_tensor = main_var->Get<LoDTensor>(); auto &main_tensor = main_var->Get<LoDTensor>();
if (!main_tensor.IsInitialized()) { if (!main_tensor.IsInitialized()) {
VLOG(3) << "one in var not inited, return!"; VLOG(30) << "one in var not inited, return!";
continue; continue;
} }
auto &dims = main_tensor.dims(); auto &dims = main_tensor.dims();
......
...@@ -149,7 +149,7 @@ Variable* Scope::VarInternal(const std::string& name) { ...@@ -149,7 +149,7 @@ Variable* Scope::VarInternal(const std::string& name) {
v = new Variable(); v = new Variable();
vars_[name].reset(v); vars_[name].reset(v);
VLOG(3) << "Create variable " << name; VLOG(30) << "Create variable " << name;
v->name_ = &(vars_.find(name)->first); v->name_ = &(vars_.find(name)->first);
return v; return v;
} }
......
...@@ -176,7 +176,7 @@ void SelectedRows::Get(const framework::Tensor& ids, framework::Tensor* value, ...@@ -176,7 +176,7 @@ void SelectedRows::Get(const framework::Tensor& ids, framework::Tensor* value,
PADDLE_ENFORCE(value->IsInitialized(), PADDLE_ENFORCE(value->IsInitialized(),
"The value tensor should be initialized."); "The value tensor should be initialized.");
if (ids.numel() == 0) { if (ids.numel() == 0) {
VLOG(3) << "keys is empty, please check data!"; VLOG(30) << "keys is empty, please check data!";
} else { } else {
int64_t value_width = value_->numel() / value_->dims()[0]; int64_t value_width = value_->numel() / value_->dims()[0];
PADDLE_ENFORCE_EQ(value_width, value->numel() / value->dims()[0], PADDLE_ENFORCE_EQ(value_width, value->numel() / value->dims()[0],
......
...@@ -22,8 +22,8 @@ namespace framework { ...@@ -22,8 +22,8 @@ namespace framework {
void TensorCopy(const Tensor& src, const platform::Place& dst_place, void TensorCopy(const Tensor& src, const platform::Place& dst_place,
const platform::DeviceContext& ctx, Tensor* dst) { const platform::DeviceContext& ctx, Tensor* dst) {
VLOG(3) << "TensorCopy " << src.dims() << " from " << src.place() << " to " VLOG(30) << "TensorCopy " << src.dims() << " from " << src.place() << " to "
<< dst_place; << dst_place;
src.check_memory_size(); src.check_memory_size();
dst->Resize(src.dims()); dst->Resize(src.dims());
...@@ -37,8 +37,8 @@ void TensorCopy(const Tensor& src, const platform::Place& dst_place, ...@@ -37,8 +37,8 @@ void TensorCopy(const Tensor& src, const platform::Place& dst_place,
if (platform::is_cpu_place(src_place) && platform::is_cpu_place(dst_place)) { if (platform::is_cpu_place(src_place) && platform::is_cpu_place(dst_place)) {
if (src_ptr == dst_ptr) { if (src_ptr == dst_ptr) {
VLOG(3) << "Skip copy the same data async from " << src_place << " to " VLOG(30) << "Skip copy the same data async from " << src_place << " to "
<< dst_place; << dst_place;
return; return;
} }
memory::Copy(boost::get<platform::CPUPlace>(dst_place), dst_ptr, memory::Copy(boost::get<platform::CPUPlace>(dst_place), dst_ptr,
...@@ -77,8 +77,8 @@ void TensorCopy(const Tensor& src, const platform::Place& dst_place, ...@@ -77,8 +77,8 @@ void TensorCopy(const Tensor& src, const platform::Place& dst_place,
reinterpret_cast<const platform::CUDADeviceContext&>(ctx).stream(); reinterpret_cast<const platform::CUDADeviceContext&>(ctx).stream();
if (platform::is_same_place(src_place, dst_place)) { if (platform::is_same_place(src_place, dst_place)) {
if (src_ptr == dst_ptr) { if (src_ptr == dst_ptr) {
VLOG(3) << "Skip copy the same data async from " << src_place << " to " VLOG(30) << "Skip copy the same data async from " << src_place << " to "
<< dst_place; << dst_place;
return; return;
} }
memory::Copy(dst_gpu_place, dst_ptr, src_gpu_place, src_ptr, size, memory::Copy(dst_gpu_place, dst_ptr, src_gpu_place, src_ptr, size,
...@@ -114,8 +114,8 @@ void TensorCopy(const Tensor& src, const platform::Place& dst_place, ...@@ -114,8 +114,8 @@ void TensorCopy(const Tensor& src, const platform::Place& dst_place,
void TensorCopySync(const Tensor& src, const platform::Place& dst_place, void TensorCopySync(const Tensor& src, const platform::Place& dst_place,
Tensor* dst) { Tensor* dst) {
VLOG(3) << "TensorCopySync " << src.dims() << " from " << src.place() VLOG(30) << "TensorCopySync " << src.dims() << " from " << src.place()
<< " to " << dst_place; << " to " << dst_place;
src.check_memory_size(); src.check_memory_size();
dst->Resize(src.dims()); dst->Resize(src.dims());
dst->set_layout(src.layout()); dst->set_layout(src.layout());
...@@ -125,8 +125,8 @@ void TensorCopySync(const Tensor& src, const platform::Place& dst_place, ...@@ -125,8 +125,8 @@ void TensorCopySync(const Tensor& src, const platform::Place& dst_place,
auto size = src.numel() * SizeOfType(src.type()); auto size = src.numel() * SizeOfType(src.type());
if (platform::is_cpu_place(src_place) && platform::is_cpu_place(dst_place)) { if (platform::is_cpu_place(src_place) && platform::is_cpu_place(dst_place)) {
if (src_ptr == dst_ptr) { if (src_ptr == dst_ptr) {
VLOG(3) << "Skip copy the same data from " << src_place << " to " VLOG(30) << "Skip copy the same data from " << src_place << " to "
<< dst_place; << dst_place;
return; return;
} }
memory::Copy(boost::get<platform::CPUPlace>(dst_place), dst_ptr, memory::Copy(boost::get<platform::CPUPlace>(dst_place), dst_ptr,
...@@ -146,8 +146,8 @@ void TensorCopySync(const Tensor& src, const platform::Place& dst_place, ...@@ -146,8 +146,8 @@ void TensorCopySync(const Tensor& src, const platform::Place& dst_place,
} else if (platform::is_gpu_place(src_place) && } else if (platform::is_gpu_place(src_place) &&
platform::is_gpu_place(dst_place)) { platform::is_gpu_place(dst_place)) {
if (src_ptr == dst_ptr && platform::is_same_place(src_place, dst_place)) { if (src_ptr == dst_ptr && platform::is_same_place(src_place, dst_place)) {
VLOG(3) << "Skip copy the same data from " << src_place << " to " VLOG(30) << "Skip copy the same data from " << src_place << " to "
<< dst_place; << dst_place;
return; return;
} }
auto src_gpu_place = boost::get<platform::CUDAPlace>(src_place); auto src_gpu_place = boost::get<platform::CUDAPlace>(src_place);
......
...@@ -39,7 +39,7 @@ void ThreadPool::Init() { ...@@ -39,7 +39,7 @@ void ThreadPool::Init() {
int num_threads = std::thread::hardware_concurrency(); int num_threads = std::thread::hardware_concurrency();
if (FLAGS_dist_threadpool_size > 0) { if (FLAGS_dist_threadpool_size > 0) {
num_threads = FLAGS_dist_threadpool_size; num_threads = FLAGS_dist_threadpool_size;
VLOG(1) << "set dist_threadpool_size to " << num_threads; VLOG(10) << "set dist_threadpool_size to " << num_threads;
} }
PADDLE_ENFORCE_GT(num_threads, 0); PADDLE_ENFORCE_GT(num_threads, 0);
threadpool_.reset(new ThreadPool(num_threads)); threadpool_.reset(new ThreadPool(num_threads));
......
...@@ -61,10 +61,10 @@ size_t VarDesc::GetTensorDescNum() const { ...@@ -61,10 +61,10 @@ size_t VarDesc::GetTensorDescNum() const {
void VarDesc::SetShapes( void VarDesc::SetShapes(
const std::vector<std::vector<int64_t>> &multiple_dims) { const std::vector<std::vector<int64_t>> &multiple_dims) {
if (multiple_dims.size() != GetTensorDescNum()) { if (multiple_dims.size() != GetTensorDescNum()) {
VLOG(3) << "WARNING: The number of given shapes(" << multiple_dims.size() VLOG(30) << "WARNING: The number of given shapes(" << multiple_dims.size()
<< ") doesn't match the existing tensor number(" << ") doesn't match the existing tensor number("
<< GetTensorDescNum() << GetTensorDescNum()
<< "). The Reader is going to be reinitialized."; << "). The Reader is going to be reinitialized.";
SetTensorDescNum(multiple_dims.size()); SetTensorDescNum(multiple_dims.size());
} }
std::vector<proto::VarType::TensorDesc *> tensors = mutable_tensor_descs(); std::vector<proto::VarType::TensorDesc *> tensors = mutable_tensor_descs();
...@@ -94,11 +94,11 @@ void VarDesc::SetDataType(proto::VarType::Type data_type) { ...@@ -94,11 +94,11 @@ void VarDesc::SetDataType(proto::VarType::Type data_type) {
void VarDesc::SetDataTypes( void VarDesc::SetDataTypes(
const std::vector<proto::VarType::Type> &multiple_data_type) { const std::vector<proto::VarType::Type> &multiple_data_type) {
if (multiple_data_type.size() != GetTensorDescNum()) { if (multiple_data_type.size() != GetTensorDescNum()) {
VLOG(3) << "WARNING: The number of given data types(" VLOG(30) << "WARNING: The number of given data types("
<< multiple_data_type.size() << multiple_data_type.size()
<< ") doesn't match the existing tensor number(" << ") doesn't match the existing tensor number("
<< GetTensorDescNum() << GetTensorDescNum()
<< "). The Reader is going to be reinitialized."; << "). The Reader is going to be reinitialized.";
SetTensorDescNum(multiple_data_type.size()); SetTensorDescNum(multiple_data_type.size());
} }
std::vector<proto::VarType::TensorDesc *> tensor_descs = std::vector<proto::VarType::TensorDesc *> tensor_descs =
...@@ -139,11 +139,11 @@ void VarDesc::SetLoDLevel(int32_t lod_level) { ...@@ -139,11 +139,11 @@ void VarDesc::SetLoDLevel(int32_t lod_level) {
void VarDesc::SetLoDLevels(const std::vector<int32_t> &multiple_lod_level) { void VarDesc::SetLoDLevels(const std::vector<int32_t> &multiple_lod_level) {
if (multiple_lod_level.size() != GetTensorDescNum()) { if (multiple_lod_level.size() != GetTensorDescNum()) {
VLOG(3) << "WARNING: The number of given lod_levels(" VLOG(30) << "WARNING: The number of given lod_levels("
<< multiple_lod_level.size() << multiple_lod_level.size()
<< ") doesn't match the existing tensor number(" << ") doesn't match the existing tensor number("
<< GetTensorDescNum() << GetTensorDescNum()
<< "). The Reader is going to be reinitialized."; << "). The Reader is going to be reinitialized.";
SetTensorDescNum(multiple_lod_level.size()); SetTensorDescNum(multiple_lod_level.size());
} }
switch (desc_.type().type()) { switch (desc_.type().type()) {
......
...@@ -60,7 +60,7 @@ class DfgPassManagerImpl final : public DfgPassManager { ...@@ -60,7 +60,7 @@ class DfgPassManagerImpl final : public DfgPassManager {
private: private:
void AddPass(const std::string& name, AnalysisPass* pass) { void AddPass(const std::string& name, AnalysisPass* pass) {
VLOG(3) << "Adding pass " << name; VLOG(30) << "Adding pass " << name;
Register(name, pass); Register(name, pass);
AddGraphvizDebugerPass(pass); AddGraphvizDebugerPass(pass);
} }
...@@ -104,7 +104,7 @@ void Analyzer::Run(Argument* argument) { ...@@ -104,7 +104,7 @@ void Analyzer::Run(Argument* argument) {
passes.push_back("graph_viz_pass"); // add graphviz for debug. passes.push_back("graph_viz_pass"); // add graphviz for debug.
#ifdef PADDLE_WITH_MKLDNN #ifdef PADDLE_WITH_MKLDNN
if (use_mkldnn_) { if (use_mkldnn_) {
VLOG(3) << "Adding MKL-DNN placement pass"; VLOG(30) << "Adding MKL-DNN placement pass";
passes.push_back("mkldnn_placement_pass"); passes.push_back("mkldnn_placement_pass");
} }
#endif #endif
......
...@@ -68,8 +68,8 @@ struct Argument { ...@@ -68,8 +68,8 @@ struct Argument {
key); key);
attrs_[key] = data; attrs_[key] = data;
attr_deleters_[key] = [data, key]() { attr_deleters_[key] = [data, key]() {
VLOG(3) << "xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx"; VLOG(30) << "xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx";
VLOG(3) << "argument delete attr: " << key; VLOG(30) << "argument delete attr: " << key;
delete data; delete data;
}; };
} }
......
...@@ -132,7 +132,7 @@ void DataFlowGraph::Build(const framework::ir::Graph &graph) { ...@@ -132,7 +132,7 @@ void DataFlowGraph::Build(const framework::ir::Graph &graph) {
Node *x{nullptr}; Node *x{nullptr};
if (ir_node->IsOp()) { if (ir_node->IsOp()) {
PADDLE_ENFORCE(ir_node->Op()); PADDLE_ENFORCE(ir_node->Op());
VLOG(4) << "get op " << ir_node << " " << ir_node->Name(); VLOG(40) << "get op " << ir_node << " " << ir_node->Name();
x = nodes.Create(Node::Type::kFunction); x = nodes.Create(Node::Type::kFunction);
x->attr("ir_node").Pointer() = ir_node; x->attr("ir_node").Pointer() = ir_node;
PADDLE_ENFORCE(ir_node->Op()->Proto()); PADDLE_ENFORCE(ir_node->Op()->Proto());
...@@ -141,7 +141,7 @@ void DataFlowGraph::Build(const framework::ir::Graph &graph) { ...@@ -141,7 +141,7 @@ void DataFlowGraph::Build(const framework::ir::Graph &graph) {
} else if (ir_node->IsVar()) { } else if (ir_node->IsVar()) {
// Not create a Node for IR ControlDepVar, considering Inference currently // Not create a Node for IR ControlDepVar, considering Inference currently
// just used in single thread scenerio. // just used in single thread scenerio.
VLOG(4) << "get var " << ir_node->Name(); VLOG(40) << "get var " << ir_node->Name();
x = nodes.Create(Node::Type::kValue); x = nodes.Create(Node::Type::kValue);
x->attr("ir_node").Pointer() = ir_node; x->attr("ir_node").Pointer() = ir_node;
x->SetName(ir_node->Name()); x->SetName(ir_node->Name());
...@@ -151,9 +151,9 @@ void DataFlowGraph::Build(const framework::ir::Graph &graph) { ...@@ -151,9 +151,9 @@ void DataFlowGraph::Build(const framework::ir::Graph &graph) {
} }
ir_node_map.emplace(ir_node, x); ir_node_map.emplace(ir_node, x);
} }
VLOG(4) << "finish creating Nodes"; VLOG(40) << "finish creating Nodes";
VLOG(4) << "to create edge"; VLOG(40) << "to create edge";
// Create links // Create links
for (auto *ir_node : graph.Nodes()) { for (auto *ir_node : graph.Nodes()) {
auto it = ir_node_map.find(ir_node); auto it = ir_node_map.find(ir_node);
...@@ -175,7 +175,7 @@ void DataFlowGraph::Build(const framework::ir::Graph &graph) { ...@@ -175,7 +175,7 @@ void DataFlowGraph::Build(const framework::ir::Graph &graph) {
"Can't deduce any inputs from the graph, Is the graph empty?"); "Can't deduce any inputs from the graph, Is the graph empty?");
ir_graph = &graph; ir_graph = &graph;
VLOG(3) << "finished build from IR"; VLOG(30) << "finished build from IR";
} }
void DataFlowGraph::Clean() { void DataFlowGraph::Clean() {
......
...@@ -239,9 +239,10 @@ void DataFlowGraphToFluidPass::AddEngineOp(Node *node) { ...@@ -239,9 +239,10 @@ void DataFlowGraphToFluidPass::AddEngineOp(Node *node) {
framework::BlockDesc block_desc(nullptr, &proto); framework::BlockDesc block_desc(nullptr, &proto);
block_desc.Proto()->set_parent_idx(-1); block_desc.Proto()->set_parent_idx(-1);
block_desc.Proto()->set_idx(0); block_desc.Proto()->set_idx(0);
VLOG(4) << "origin variable size: " VLOG(40) << "origin variable size: "
<< argument_->origin_program_desc->blocks(0).vars().size(); << argument_->origin_program_desc->blocks(0).vars().size();
VLOG(4) << "transformed variable size: " << block_desc.Proto()->vars().size(); VLOG(40) << "transformed variable size: "
<< block_desc.Proto()->vars().size();
// copy ops. // copy ops.
for (auto *node : block_node->subgraph) { for (auto *node : block_node->subgraph) {
......
...@@ -29,7 +29,7 @@ void DFG_GraphvizDrawPass::Run(DataFlowGraph *graph) { ...@@ -29,7 +29,7 @@ void DFG_GraphvizDrawPass::Run(DataFlowGraph *graph) {
auto png_path = dot_path.substr(0, dot_path.size() - 4) + ".png"; auto png_path = dot_path.substr(0, dot_path.size() - 4) + ".png";
std::string message; std::string message;
VLOG(3) << "draw to " << png_path; VLOG(30) << "draw to " << png_path;
ExecShellCommand("dot -Tpng " + dot_path + " -o " + png_path, &message); ExecShellCommand("dot -Tpng " + dot_path + " -o " + png_path, &message);
} }
......
...@@ -29,7 +29,7 @@ void FluidToIrPass::EnableParamModify(const std::string &model_dir, ...@@ -29,7 +29,7 @@ void FluidToIrPass::EnableParamModify(const std::string &model_dir,
PADDLE_ENFORCE(argument_); PADDLE_ENFORCE(argument_);
argument_->Set(framework::ir::kParamScopeAttr, new framework::Scope); argument_->Set(framework::ir::kParamScopeAttr, new framework::Scope);
// Load parameters. // Load parameters.
VLOG(3) << "Loading parameters from " << model_dir; VLOG(30) << "Loading parameters from " << model_dir;
LoadParams(&argument_->Get<framework::Scope>(framework::ir::kParamScopeAttr), LoadParams(&argument_->Get<framework::Scope>(framework::ir::kParamScopeAttr),
model_dir, prog_file, param_file); model_dir, prog_file, param_file);
} }
......
...@@ -35,21 +35,21 @@ void ModelStorePass::Run(DataFlowGraph *x) { ...@@ -35,21 +35,21 @@ void ModelStorePass::Run(DataFlowGraph *x) {
std::stringstream ss; std::stringstream ss;
// NOTE these commands only works on linux. // NOTE these commands only works on linux.
ss << "mkdir -p " << *argument_->model_output_store_path; ss << "mkdir -p " << *argument_->model_output_store_path;
VLOG(3) << "run command: " << ss.str(); VLOG(30) << "run command: " << ss.str();
PADDLE_ENFORCE_EQ(system(ss.str().c_str()), 0); PADDLE_ENFORCE_EQ(system(ss.str().c_str()), 0);
ss.str(""); ss.str("");
ss << "cp " << *argument_->fluid_model_dir << "/*" ss << "cp " << *argument_->fluid_model_dir << "/*"
<< " " << *argument_->model_output_store_path; << " " << *argument_->model_output_store_path;
VLOG(3) << "run command: " << ss.str(); VLOG(30) << "run command: " << ss.str();
PADDLE_ENFORCE_EQ(system(ss.str().c_str()), 0); PADDLE_ENFORCE_EQ(system(ss.str().c_str()), 0);
// Store program // Store program
PADDLE_ENFORCE_NOT_NULL(argument_->transformed_program_desc, PADDLE_ENFORCE_NOT_NULL(argument_->transformed_program_desc,
"program desc is not transformed, should call " "program desc is not transformed, should call "
"DataFlowGraphToFluidPass first."); "DataFlowGraphToFluidPass first.");
VLOG(3) << "store analyzed program to " VLOG(30) << "store analyzed program to "
<< *argument_->model_output_store_path; << *argument_->model_output_store_path;
const std::string program_output_path = const std::string program_output_path =
*argument_->model_output_store_path + "/__model__"; *argument_->model_output_store_path + "/__model__";
std::ofstream file(program_output_path, std::ios::binary); std::ofstream file(program_output_path, std::ios::binary);
......
...@@ -23,7 +23,7 @@ namespace analysis { ...@@ -23,7 +23,7 @@ namespace analysis {
bool PassManager::Initialize(Argument* argument) { bool PassManager::Initialize(Argument* argument) {
argument_ = argument; argument_ = argument;
for (auto& pass : data_) { for (auto& pass : data_) {
VLOG(3) << "Initializing pass [" << pass->repr() << "]"; VLOG(30) << "Initializing pass [" << pass->repr() << "]";
if (!pass->Initialize(argument)) { if (!pass->Initialize(argument)) {
LOG(ERROR) << "Failed to initialize pass [" << pass->repr() << "]"; LOG(ERROR) << "Failed to initialize pass [" << pass->repr() << "]";
return false; return false;
...@@ -34,7 +34,7 @@ bool PassManager::Initialize(Argument* argument) { ...@@ -34,7 +34,7 @@ bool PassManager::Initialize(Argument* argument) {
void DfgPassManager::RunAll() { void DfgPassManager::RunAll() {
PADDLE_ENFORCE(argument_); PADDLE_ENFORCE(argument_);
VLOG(3) << "Total " << data_.size() << " Analysys passes"; VLOG(30) << "Total " << data_.size() << " Analysys passes";
for (auto& pass : data_) { for (auto& pass : data_) {
string::PrettyLogEndl(string::Style::H1(), "* Running Analysis pass [%s]", string::PrettyLogEndl(string::Style::H1(), "* Running Analysis pass [%s]",
pass->repr()); pass->repr());
......
...@@ -232,7 +232,7 @@ std::vector<std::vector<Node *>> SubGraphSplitter::ExtractSubGraphs() { ...@@ -232,7 +232,7 @@ std::vector<std::vector<Node *>> SubGraphSplitter::ExtractSubGraphs() {
BriefNode *brief_node = itr.second; BriefNode *brief_node = itr.second;
if (!brief_node->node->attr(kMarkerAttrName).Bool()) { if (!brief_node->node->attr(kMarkerAttrName).Bool()) {
VLOG(4) << brief_node->node->id() << " node not a trt candicate."; VLOG(40) << brief_node->node->id() << " node not a trt candicate.";
continue; continue;
} }
......
...@@ -25,9 +25,9 @@ TensorRTSubGraphPass::TensorRTSubGraphPass( ...@@ -25,9 +25,9 @@ TensorRTSubGraphPass::TensorRTSubGraphPass(
void TensorRTSubGraphPass::Run(DataFlowGraph *graph) { void TensorRTSubGraphPass::Run(DataFlowGraph *graph) {
SubGraphFuse(graph, node_inside_subgraph_teller_, argument_)(); SubGraphFuse(graph, node_inside_subgraph_teller_, argument_)();
VLOG(4) << "debug info " VLOG(40) << "debug info "
<< graph->HumanReadableInfo(false /*show_values*/, << graph->HumanReadableInfo(false /*show_values*/,
true /*show_functions*/); true /*show_functions*/);
} }
} // namespace analysis } // namespace analysis
......
...@@ -38,7 +38,7 @@ using contrib::AnalysisConfig; ...@@ -38,7 +38,7 @@ using contrib::AnalysisConfig;
bool AnalysisPredictor::Init( bool AnalysisPredictor::Init(
const std::shared_ptr<framework::Scope> &parent_scope, const std::shared_ptr<framework::Scope> &parent_scope,
const std::shared_ptr<framework::ProgramDesc> &program) { const std::shared_ptr<framework::ProgramDesc> &program) {
VLOG(3) << "Predictor::init()"; VLOG(30) << "Predictor::init()";
#if !defined(_WIN32) #if !defined(_WIN32)
if (FLAGS_profile) { if (FLAGS_profile) {
LOG(WARNING) << "Profiler is actived, might affect the performance"; LOG(WARNING) << "Profiler is actived, might affect the performance";
...@@ -89,7 +89,7 @@ bool AnalysisPredictor::Init( ...@@ -89,7 +89,7 @@ bool AnalysisPredictor::Init(
bool AnalysisPredictor::Run(const std::vector<PaddleTensor> &inputs, bool AnalysisPredictor::Run(const std::vector<PaddleTensor> &inputs,
std::vector<PaddleTensor> *output_data, std::vector<PaddleTensor> *output_data,
int batch_size) { int batch_size) {
VLOG(3) << "Predictor::predict"; VLOG(30) << "Predictor::predict";
inference::Timer timer; inference::Timer timer;
timer.tic(); timer.tic();
// set feed variable // set feed variable
...@@ -109,7 +109,7 @@ bool AnalysisPredictor::Run(const std::vector<PaddleTensor> &inputs, ...@@ -109,7 +109,7 @@ bool AnalysisPredictor::Run(const std::vector<PaddleTensor> &inputs,
LOG(ERROR) << "fail to get fetches"; LOG(ERROR) << "fail to get fetches";
return false; return false;
} }
VLOG(3) << "predict cost: " << timer.toc() << "ms"; VLOG(30) << "predict cost: " << timer.toc() << "ms";
// Fix TensorArray reuse not cleaned bug. // Fix TensorArray reuse not cleaned bug.
tensor_array_batch_cleaner_.CollectTensorArrays(scope_.get()); tensor_array_batch_cleaner_.CollectTensorArrays(scope_.get());
...@@ -119,7 +119,7 @@ bool AnalysisPredictor::Run(const std::vector<PaddleTensor> &inputs, ...@@ -119,7 +119,7 @@ bool AnalysisPredictor::Run(const std::vector<PaddleTensor> &inputs,
bool AnalysisPredictor::SetFeed(const std::vector<PaddleTensor> &inputs, bool AnalysisPredictor::SetFeed(const std::vector<PaddleTensor> &inputs,
framework::Scope *scope) { framework::Scope *scope) {
VLOG(3) << "Predictor::set_feed"; VLOG(30) << "Predictor::set_feed";
if (inputs.size() != feeds_.size()) { if (inputs.size() != feeds_.size()) {
LOG(ERROR) << "wrong feed input size, need " << feeds_.size() << " but get " LOG(ERROR) << "wrong feed input size, need " << feeds_.size() << " but get "
<< inputs.size(); << inputs.size();
...@@ -184,7 +184,7 @@ void AnalysisPredictor::GetFetchOne(const framework::LoDTensor &fetch, ...@@ -184,7 +184,7 @@ void AnalysisPredictor::GetFetchOne(const framework::LoDTensor &fetch,
bool AnalysisPredictor::GetFetch(std::vector<PaddleTensor> *outputs, bool AnalysisPredictor::GetFetch(std::vector<PaddleTensor> *outputs,
framework::Scope *scope) { framework::Scope *scope) {
VLOG(3) << "Predictor::get_fetch"; VLOG(30) << "Predictor::get_fetch";
outputs->resize(fetchs_.size()); outputs->resize(fetchs_.size());
for (size_t i = 0; i < fetchs_.size(); ++i) { for (size_t i = 0; i < fetchs_.size(); ++i) {
int idx = boost::get<int>(fetchs_[i]->GetAttr("col")); int idx = boost::get<int>(fetchs_[i]->GetAttr("col"));
...@@ -246,7 +246,7 @@ void AnalysisPredictor::OptimizeInferenceProgram() { ...@@ -246,7 +246,7 @@ void AnalysisPredictor::OptimizeInferenceProgram() {
} }
CHECK(argument_.transformed_program_desc); CHECK(argument_.transformed_program_desc);
VLOG(5) << "to prepare executor"; VLOG(50) << "to prepare executor";
inference_program_.reset( inference_program_.reset(
new framework::ProgramDesc(*argument_.transformed_program_desc)); new framework::ProgramDesc(*argument_.transformed_program_desc));
if (argument_.Has(framework::ir::kParamScopeAttr)) { if (argument_.Has(framework::ir::kParamScopeAttr)) {
...@@ -260,7 +260,7 @@ void AnalysisPredictor::OptimizeInferenceProgram() { ...@@ -260,7 +260,7 @@ void AnalysisPredictor::OptimizeInferenceProgram() {
template <> template <>
std::unique_ptr<PaddlePredictor> CreatePaddlePredictor< std::unique_ptr<PaddlePredictor> CreatePaddlePredictor<
AnalysisConfig, PaddleEngineKind::kAnalysis>(const AnalysisConfig &config) { AnalysisConfig, PaddleEngineKind::kAnalysis>(const AnalysisConfig &config) {
VLOG(3) << "create AnalysisConfig"; VLOG(30) << "create AnalysisConfig";
if (config.use_gpu) { if (config.use_gpu) {
// 1. GPU memeroy // 1. GPU memeroy
PADDLE_ENFORCE_GT( PADDLE_ENFORCE_GT(
...@@ -274,7 +274,7 @@ std::unique_ptr<PaddlePredictor> CreatePaddlePredictor< ...@@ -274,7 +274,7 @@ std::unique_ptr<PaddlePredictor> CreatePaddlePredictor<
std::string flag = "--fraction_of_gpu_memory_to_use=" + std::string flag = "--fraction_of_gpu_memory_to_use=" +
std::to_string(config.fraction_of_gpu_memory); std::to_string(config.fraction_of_gpu_memory);
flags.push_back(flag); flags.push_back(flag);
VLOG(3) << "set flag: " << flag; VLOG(30) << "set flag: " << flag;
framework::InitGflags(flags); framework::InitGflags(flags);
} }
} }
......
...@@ -157,7 +157,7 @@ bool NativePaddlePredictor::Run(const std::vector<PaddleTensor> &inputs, ...@@ -157,7 +157,7 @@ bool NativePaddlePredictor::Run(const std::vector<PaddleTensor> &inputs,
LOG(ERROR) << "fail to get fetches"; LOG(ERROR) << "fail to get fetches";
return false; return false;
} }
VLOG(3) << "predict cost: " << timer.toc() << "ms"; VLOG(30) << "predict cost: " << timer.toc() << "ms";
// Fix TensorArray reuse not cleaned bug. // Fix TensorArray reuse not cleaned bug.
tensor_array_batch_cleaner_.CollectTensorArrays(scope_.get()); tensor_array_batch_cleaner_.CollectTensorArrays(scope_.get());
......
...@@ -34,7 +34,7 @@ class TensorRTSubgraphPredictor : public NativePaddlePredictor { ...@@ -34,7 +34,7 @@ class TensorRTSubgraphPredictor : public NativePaddlePredictor {
bool Init(const std::shared_ptr<framework::Scope>& parent_scope) { bool Init(const std::shared_ptr<framework::Scope>& parent_scope) {
FLAGS_IA_enable_tensorrt_subgraph_engine = true; FLAGS_IA_enable_tensorrt_subgraph_engine = true;
VLOG(3) << "Predictor::init()"; VLOG(30) << "Predictor::init()";
if (config_.use_gpu) { if (config_.use_gpu) {
place_ = paddle::platform::CUDAPlace(config_.device); place_ = paddle::platform::CUDAPlace(config_.device);
} else { } else {
...@@ -70,7 +70,7 @@ class TensorRTSubgraphPredictor : public NativePaddlePredictor { ...@@ -70,7 +70,7 @@ class TensorRTSubgraphPredictor : public NativePaddlePredictor {
OptimizeInferenceProgram(); OptimizeInferenceProgram();
ctx_ = executor_->Prepare(*inference_program_, 0); ctx_ = executor_->Prepare(*inference_program_, 0);
VLOG(5) << "to create variables"; VLOG(50) << "to create variables";
executor_->CreateVariables(*inference_program_, executor_->CreateVariables(*inference_program_,
sub_scope_ ? sub_scope_ : scope_.get(), 0); sub_scope_ ? sub_scope_ : scope_.get(), 0);
// Get the feed_target_names and fetch_target_names // Get the feed_target_names and fetch_target_names
...@@ -114,9 +114,9 @@ class TensorRTSubgraphPredictor : public NativePaddlePredictor { ...@@ -114,9 +114,9 @@ class TensorRTSubgraphPredictor : public NativePaddlePredictor {
new ProgramDesc(*inference_program_->Proto())); new ProgramDesc(*inference_program_->Proto()));
Singleton<Analyzer>::Global().Run(&argument); Singleton<Analyzer>::Global().Run(&argument);
CHECK(argument.transformed_program_desc); CHECK(argument.transformed_program_desc);
VLOG(5) << "transformed program:\n" VLOG(50) << "transformed program:\n"
<< argument.transformed_program_desc->SerializeAsString(); << argument.transformed_program_desc->SerializeAsString();
VLOG(5) << "to prepare executor"; VLOG(50) << "to prepare executor";
inference_program_.reset( inference_program_.reset(
new framework::ProgramDesc(*argument.transformed_program_desc)); new framework::ProgramDesc(*argument.transformed_program_desc));
} }
...@@ -129,7 +129,7 @@ template <> ...@@ -129,7 +129,7 @@ template <>
std::unique_ptr<PaddlePredictor> std::unique_ptr<PaddlePredictor>
CreatePaddlePredictor<MixedRTConfig, PaddleEngineKind::kAutoMixedTensorRT>( CreatePaddlePredictor<MixedRTConfig, PaddleEngineKind::kAutoMixedTensorRT>(
const MixedRTConfig& config) { const MixedRTConfig& config) {
VLOG(3) << "create TensorRTSubgraphPredictor"; VLOG(30) << "create TensorRTSubgraphPredictor";
if (config.use_gpu) { if (config.use_gpu) {
// 1. GPU memeroy // 1. GPU memeroy
PADDLE_ENFORCE_GT( PADDLE_ENFORCE_GT(
...@@ -143,7 +143,7 @@ CreatePaddlePredictor<MixedRTConfig, PaddleEngineKind::kAutoMixedTensorRT>( ...@@ -143,7 +143,7 @@ CreatePaddlePredictor<MixedRTConfig, PaddleEngineKind::kAutoMixedTensorRT>(
std::string flag = "--fraction_of_gpu_memory_to_use=" + std::string flag = "--fraction_of_gpu_memory_to_use=" +
std::to_string(config.fraction_of_gpu_memory); std::to_string(config.fraction_of_gpu_memory);
flags.push_back(flag); flags.push_back(flag);
VLOG(3) << "set flag: " << flag; VLOG(30) << "set flag: " << flag;
framework::InitGflags(flags); framework::InitGflags(flags);
} }
} }
......
...@@ -45,7 +45,7 @@ void Main() { ...@@ -45,7 +45,7 @@ void Main() {
config.fraction_of_gpu_memory = 0.1; // set by yourself config.fraction_of_gpu_memory = 0.1; // set by yourself
predictor = CreatePaddlePredictor<paddle::contrib::MixedRTConfig>(config); predictor = CreatePaddlePredictor<paddle::contrib::MixedRTConfig>(config);
VLOG(3) << "begin to process data"; VLOG(30) << "begin to process data";
// Just a single batch of data. // Just a single batch of data.
std::string line; std::string line;
std::ifstream file(FLAGS_data); std::ifstream file(FLAGS_data);
...@@ -60,13 +60,13 @@ void Main() { ...@@ -60,13 +60,13 @@ void Main() {
PaddleBuf(record.data.data(), record.data.size() * sizeof(float)); PaddleBuf(record.data.data(), record.data.size() * sizeof(float));
input.dtype = PaddleDType::FLOAT32; input.dtype = PaddleDType::FLOAT32;
VLOG(3) << "run executor"; VLOG(30) << "run executor";
std::vector<PaddleTensor> output; std::vector<PaddleTensor> output;
predictor->Run({input}, &output, 1); predictor->Run({input}, &output, 1);
VLOG(3) << "output.size " << output.size(); VLOG(30) << "output.size " << output.size();
auto& tensor = output.front(); auto& tensor = output.front();
VLOG(3) << "output: " << SummaryTensor(tensor); VLOG(30) << "output: " << SummaryTensor(tensor);
// compare with reference result // compare with reference result
CheckOutput(FLAGS_refer, tensor); CheckOutput(FLAGS_refer, tensor);
......
...@@ -47,7 +47,7 @@ static void split(const std::string& str, char sep, ...@@ -47,7 +47,7 @@ static void split(const std::string& str, char sep,
} }
Record ProcessALine(const std::string& line) { Record ProcessALine(const std::string& line) {
VLOG(3) << "process a line"; VLOG(30) << "process a line";
std::vector<std::string> columns; std::vector<std::string> columns;
split(line, '\t', &columns); split(line, '\t', &columns);
CHECK_EQ(columns.size(), 2UL) CHECK_EQ(columns.size(), 2UL)
...@@ -65,8 +65,8 @@ Record ProcessALine(const std::string& line) { ...@@ -65,8 +65,8 @@ Record ProcessALine(const std::string& line) {
for (auto& s : shape_strs) { for (auto& s : shape_strs) {
record.shape.push_back(std::stoi(s)); record.shape.push_back(std::stoi(s));
} }
VLOG(3) << "data size " << record.data.size(); VLOG(30) << "data size " << record.data.size();
VLOG(3) << "data shape size " << record.shape.size(); VLOG(30) << "data shape size " << record.shape.size();
return record; return record;
} }
...@@ -78,8 +78,8 @@ void CheckOutput(const std::string& referfile, const PaddleTensor& output) { ...@@ -78,8 +78,8 @@ void CheckOutput(const std::string& referfile, const PaddleTensor& output) {
file.close(); file.close();
size_t numel = output.data.length() / PaddleDtypeSize(output.dtype); size_t numel = output.data.length() / PaddleDtypeSize(output.dtype);
VLOG(3) << "predictor output numel " << numel; VLOG(30) << "predictor output numel " << numel;
VLOG(3) << "reference output numel " << refer.data.size(); VLOG(30) << "reference output numel " << refer.data.size();
CHECK_EQ(numel, refer.data.size()); CHECK_EQ(numel, refer.data.size());
switch (output.dtype) { switch (output.dtype) {
case PaddleDType::INT64: { case PaddleDType::INT64: {
......
...@@ -49,11 +49,11 @@ void Main(bool use_gpu) { ...@@ -49,11 +49,11 @@ void Main(bool use_gpu) {
config.fraction_of_gpu_memory = 0.1; // set by yourself config.fraction_of_gpu_memory = 0.1; // set by yourself
} }
VLOG(3) << "init predictor"; VLOG(30) << "init predictor";
predictor = CreatePaddlePredictor<NativeConfig>(config); predictor = CreatePaddlePredictor<NativeConfig>(config);
analysis_predictor = CreatePaddlePredictor<AnalysisConfig>(config); analysis_predictor = CreatePaddlePredictor<AnalysisConfig>(config);
VLOG(3) << "begin to process data"; VLOG(30) << "begin to process data";
// Just a single batch of data. // Just a single batch of data.
std::string line; std::string line;
std::ifstream file(FLAGS_data); std::ifstream file(FLAGS_data);
...@@ -68,13 +68,13 @@ void Main(bool use_gpu) { ...@@ -68,13 +68,13 @@ void Main(bool use_gpu) {
PaddleBuf(record.data.data(), record.data.size() * sizeof(float)); PaddleBuf(record.data.data(), record.data.size() * sizeof(float));
input.dtype = PaddleDType::FLOAT32; input.dtype = PaddleDType::FLOAT32;
VLOG(3) << "run executor"; VLOG(30) << "run executor";
std::vector<PaddleTensor> output, analysis_output; std::vector<PaddleTensor> output, analysis_output;
predictor->Run({input}, &output, 1); predictor->Run({input}, &output, 1);
VLOG(3) << "output.size " << output.size(); VLOG(30) << "output.size " << output.size();
auto& tensor = output.front(); auto& tensor = output.front();
VLOG(3) << "output: " << SummaryTensor(tensor); VLOG(30) << "output: " << SummaryTensor(tensor);
// compare with reference result // compare with reference result
CheckOutput(FLAGS_refer, tensor); CheckOutput(FLAGS_refer, tensor);
......
...@@ -26,7 +26,7 @@ void TensorArrayBatchCleaner::CollectTensorArrays(framework::Scope *scope) { ...@@ -26,7 +26,7 @@ void TensorArrayBatchCleaner::CollectTensorArrays(framework::Scope *scope) {
// parameter. // parameter.
if (var_name == "feed" || var_name == "fetch") continue; if (var_name == "feed" || var_name == "fetch") continue;
if (var->Type() == typeid(framework::LoDTensorArray)) { if (var->Type() == typeid(framework::LoDTensorArray)) {
VLOG(4) << "collect " << var_name; VLOG(40) << "collect " << var_name;
arrays_.push_back(var->GetMutable<framework::LoDTensorArray>()); arrays_.push_back(var->GetMutable<framework::LoDTensorArray>());
} }
} }
...@@ -34,7 +34,7 @@ void TensorArrayBatchCleaner::CollectTensorArrays(framework::Scope *scope) { ...@@ -34,7 +34,7 @@ void TensorArrayBatchCleaner::CollectTensorArrays(framework::Scope *scope) {
CollectTensorArrays(kid); CollectTensorArrays(kid);
} }
VLOG(3) << "Collect " << arrays_.size() << " arrays"; VLOG(30) << "Collect " << arrays_.size() << " arrays";
flag_ = false; flag_ = false;
} }
} }
......
...@@ -78,7 +78,7 @@ void LoadPersistables(framework::Executor* executor, framework::Scope* scope, ...@@ -78,7 +78,7 @@ void LoadPersistables(framework::Executor* executor, framework::Scope* scope,
for (auto* var : global_block.AllVars()) { for (auto* var : global_block.AllVars()) {
if (IsPersistable(var)) { if (IsPersistable(var)) {
VLOG(3) << "persistable variable's name: " << var->Name(); VLOG(30) << "persistable variable's name: " << var->Name();
framework::VarDesc* new_var = load_block->Var(var->Name()); framework::VarDesc* new_var = load_block->Var(var->Name());
new_var->SetShape(var->GetShape()); new_var->SetShape(var->GetShape());
...@@ -121,7 +121,7 @@ std::unique_ptr<framework::ProgramDesc> Load(framework::Executor* executor, ...@@ -121,7 +121,7 @@ std::unique_ptr<framework::ProgramDesc> Load(framework::Executor* executor,
const std::string& dirname) { const std::string& dirname) {
std::string model_filename = dirname + "/__model__"; std::string model_filename = dirname + "/__model__";
std::string program_desc_str; std::string program_desc_str;
VLOG(3) << "loading model from " << model_filename; VLOG(30) << "loading model from " << model_filename;
ReadBinaryFile(model_filename, &program_desc_str); ReadBinaryFile(model_filename, &program_desc_str);
std::unique_ptr<framework::ProgramDesc> main_program( std::unique_ptr<framework::ProgramDesc> main_program(
......
...@@ -25,7 +25,7 @@ class ConcatOpConverter : public OpConverter { ...@@ -25,7 +25,7 @@ class ConcatOpConverter : public OpConverter {
public: public:
void operator()(const framework::proto::OpDesc& op, void operator()(const framework::proto::OpDesc& op,
const framework::Scope& scope, bool test_mode) override { const framework::Scope& scope, bool test_mode) override {
VLOG(4) << "convert a fluid mul op to tensorrt mul layer without bias"; VLOG(40) << "convert a fluid mul op to tensorrt mul layer without bias";
framework::OpDesc op_desc(op, nullptr); framework::OpDesc op_desc(op, nullptr);
// Declare inputs // Declare inputs
......
...@@ -25,7 +25,7 @@ class DropoutOpConverter : public OpConverter { ...@@ -25,7 +25,7 @@ class DropoutOpConverter : public OpConverter {
public: public:
void operator()(const framework::proto::OpDesc& op, void operator()(const framework::proto::OpDesc& op,
const framework::Scope& scope, bool test_mode) override { const framework::Scope& scope, bool test_mode) override {
VLOG(4) << "convert a fluid dropout op to tensorrt dropout layer"; VLOG(40) << "convert a fluid dropout op to tensorrt dropout layer";
framework::OpDesc op_desc(op, nullptr); framework::OpDesc op_desc(op, nullptr);
// Declare inputs // Declare inputs
auto* input1 = engine_->GetITensor(op_desc.Input("X")[0]); auto* input1 = engine_->GetITensor(op_desc.Input("X")[0]);
......
...@@ -52,7 +52,7 @@ class FcOpConverter : public OpConverter { ...@@ -52,7 +52,7 @@ class FcOpConverter : public OpConverter {
public: public:
void operator()(const framework::proto::OpDesc& op, void operator()(const framework::proto::OpDesc& op,
const framework::Scope& scope, bool test_mode) override { const framework::Scope& scope, bool test_mode) override {
VLOG(4) << "convert a fluid fc op to tensorrt fc layer without bias"; VLOG(40) << "convert a fluid fc op to tensorrt fc layer without bias";
framework::OpDesc op_desc(op, nullptr); framework::OpDesc op_desc(op, nullptr);
PADDLE_ENFORCE_EQ(op_desc.Input("X").size(), 1); PADDLE_ENFORCE_EQ(op_desc.Input("X").size(), 1);
......
...@@ -25,7 +25,7 @@ class MulOpConverter : public OpConverter { ...@@ -25,7 +25,7 @@ class MulOpConverter : public OpConverter {
public: public:
void operator()(const framework::proto::OpDesc& op, void operator()(const framework::proto::OpDesc& op,
const framework::Scope& scope, bool test_mode) override { const framework::Scope& scope, bool test_mode) override {
VLOG(4) << "convert a fluid mul op to tensorrt mul layer without bias"; VLOG(40) << "convert a fluid mul op to tensorrt mul layer without bias";
framework::OpDesc op_desc(op, nullptr); framework::OpDesc op_desc(op, nullptr);
// Declare inputs // Declare inputs
......
...@@ -25,7 +25,7 @@ class PadOpConverter : public OpConverter { ...@@ -25,7 +25,7 @@ class PadOpConverter : public OpConverter {
public: public:
void operator()(const framework::proto::OpDesc& op, void operator()(const framework::proto::OpDesc& op,
const framework::Scope& scope, bool test_mode) override { const framework::Scope& scope, bool test_mode) override {
VLOG(4) << "convert a fluid transpose op to tensorrt tranpose layer"; VLOG(40) << "convert a fluid transpose op to tensorrt tranpose layer";
framework::OpDesc op_desc(op, nullptr); framework::OpDesc op_desc(op, nullptr);
// Declare inputs // Declare inputs
......
...@@ -25,7 +25,7 @@ class Pool2dOpConverter : public OpConverter { ...@@ -25,7 +25,7 @@ class Pool2dOpConverter : public OpConverter {
public: public:
void operator()(const framework::proto::OpDesc& op, void operator()(const framework::proto::OpDesc& op,
const framework::Scope& scope, bool test_mode) override { const framework::Scope& scope, bool test_mode) override {
VLOG(4) VLOG(40)
<< "convert a fluid pool2d op to tensorrt pool2d layer without bias"; << "convert a fluid pool2d op to tensorrt pool2d layer without bias";
framework::OpDesc op_desc(op, nullptr); framework::OpDesc op_desc(op, nullptr);
// Declare inputs // Declare inputs
......
...@@ -25,7 +25,7 @@ class SoftMaxOpConverter : public OpConverter { ...@@ -25,7 +25,7 @@ class SoftMaxOpConverter : public OpConverter {
public: public:
void operator()(const framework::proto::OpDesc& op, void operator()(const framework::proto::OpDesc& op,
const framework::Scope& scope, bool test_mode) override { const framework::Scope& scope, bool test_mode) override {
VLOG(4) VLOG(40)
<< "convert a fluid softmax op to tensorrt softmax layer without bias"; << "convert a fluid softmax op to tensorrt softmax layer without bias";
framework::OpDesc op_desc(op, nullptr); framework::OpDesc op_desc(op, nullptr);
// Declare inputs // Declare inputs
......
...@@ -27,7 +27,7 @@ struct Record { ...@@ -27,7 +27,7 @@ struct Record {
}; };
Record ProcessALine(const std::string &line) { Record ProcessALine(const std::string &line) {
VLOG(3) << "process a line"; VLOG(30) << "process a line";
std::vector<std::string> columns; std::vector<std::string> columns;
split(line, '\t', &columns); split(line, '\t', &columns);
CHECK_EQ(columns.size(), 2UL) CHECK_EQ(columns.size(), 2UL)
...@@ -45,8 +45,8 @@ Record ProcessALine(const std::string &line) { ...@@ -45,8 +45,8 @@ Record ProcessALine(const std::string &line) {
for (auto &s : shape_strs) { for (auto &s : shape_strs) {
record.shape.push_back(std::stoi(s)); record.shape.push_back(std::stoi(s));
} }
VLOG(3) << "data size " << record.data.size(); VLOG(30) << "data size " << record.data.size();
VLOG(3) << "data shape size " << record.shape.size(); VLOG(30) << "data shape size " << record.shape.size();
return record; return record;
} }
......
...@@ -32,11 +32,11 @@ BuddyAllocator::BuddyAllocator( ...@@ -32,11 +32,11 @@ BuddyAllocator::BuddyAllocator(
system_allocator_(std::move(system_allocator)) {} system_allocator_(std::move(system_allocator)) {}
BuddyAllocator::~BuddyAllocator() { BuddyAllocator::~BuddyAllocator() {
VLOG(10) << "BuddyAllocator Disconstructor makes sure that all of these " VLOG(100) << "BuddyAllocator Disconstructor makes sure that all of these "
"have actually been freed"; "have actually been freed";
while (!pool_.empty()) { while (!pool_.empty()) {
auto block = static_cast<MemoryBlock*>(std::get<2>(*pool_.begin())); auto block = static_cast<MemoryBlock*>(std::get<2>(*pool_.begin()));
VLOG(10) << "Free from block (" << block << ", " << max_chunk_size_ << ")"; VLOG(100) << "Free from block (" << block << ", " << max_chunk_size_ << ")";
system_allocator_->Free(block, max_chunk_size_, block->index(cache_)); system_allocator_->Free(block, max_chunk_size_, block->index(cache_));
cache_.invalidate(block); cache_.invalidate(block);
...@@ -57,12 +57,12 @@ void* BuddyAllocator::Alloc(size_t unaligned_size) { ...@@ -57,12 +57,12 @@ void* BuddyAllocator::Alloc(size_t unaligned_size) {
// acquire the allocator lock // acquire the allocator lock
std::lock_guard<std::mutex> lock(mutex_); std::lock_guard<std::mutex> lock(mutex_);
VLOG(10) << "Allocate " << unaligned_size << " bytes from chunk size " VLOG(100) << "Allocate " << unaligned_size << " bytes from chunk size "
<< size; << size;
// if the allocation is huge, send directly to the system allocator // if the allocation is huge, send directly to the system allocator
if (size > max_chunk_size_) { if (size > max_chunk_size_) {
VLOG(10) << "Allocate from system allocator."; VLOG(100) << "Allocate from system allocator.";
return SystemAlloc(size); return SystemAlloc(size);
} }
...@@ -77,9 +77,9 @@ void* BuddyAllocator::Alloc(size_t unaligned_size) { ...@@ -77,9 +77,9 @@ void* BuddyAllocator::Alloc(size_t unaligned_size) {
return nullptr; return nullptr;
} }
} else { } else {
VLOG(10) << "Allocation from existing memory block " << std::get<2>(*it) VLOG(100) << "Allocation from existing memory block " << std::get<2>(*it)
<< " at address " << " at address "
<< reinterpret_cast<MemoryBlock*>(std::get<2>(*it))->data(); << reinterpret_cast<MemoryBlock*>(std::get<2>(*it))->data();
} }
total_used_ += size; total_used_ += size;
...@@ -96,10 +96,10 @@ void BuddyAllocator::Free(void* p) { ...@@ -96,10 +96,10 @@ void BuddyAllocator::Free(void* p) {
// Acquire the allocator lock // Acquire the allocator lock
std::lock_guard<std::mutex> lock(mutex_); std::lock_guard<std::mutex> lock(mutex_);
VLOG(10) << "Free from address " << block; VLOG(100) << "Free from address " << block;
if (block->type(cache_) == MemoryBlock::HUGE_CHUNK) { if (block->type(cache_) == MemoryBlock::HUGE_CHUNK) {
VLOG(10) << "Free directly from system allocator"; VLOG(100) << "Free directly from system allocator";
system_allocator_->Free(block, block->total_size(cache_), system_allocator_->Free(block, block->total_size(cache_),
block->index(cache_)); block->index(cache_));
...@@ -116,8 +116,8 @@ void BuddyAllocator::Free(void* p) { ...@@ -116,8 +116,8 @@ void BuddyAllocator::Free(void* p) {
// Trying to merge the right buddy // Trying to merge the right buddy
if (block->has_right_buddy(cache_)) { if (block->has_right_buddy(cache_)) {
VLOG(10) << "Merging this block " << block << " with its right buddy " VLOG(100) << "Merging this block " << block << " with its right buddy "
<< block->right_buddy(cache_); << block->right_buddy(cache_);
auto right_buddy = block->right_buddy(cache_); auto right_buddy = block->right_buddy(cache_);
...@@ -134,8 +134,8 @@ void BuddyAllocator::Free(void* p) { ...@@ -134,8 +134,8 @@ void BuddyAllocator::Free(void* p) {
// Trying to merge the left buddy // Trying to merge the left buddy
if (block->has_left_buddy(cache_)) { if (block->has_left_buddy(cache_)) {
VLOG(10) << "Merging this block " << block << " with its left buddy " VLOG(100) << "Merging this block " << block << " with its left buddy "
<< block->left_buddy(cache_); << block->left_buddy(cache_);
auto left_buddy = block->left_buddy(cache_); auto left_buddy = block->left_buddy(cache_);
...@@ -151,8 +151,8 @@ void BuddyAllocator::Free(void* p) { ...@@ -151,8 +151,8 @@ void BuddyAllocator::Free(void* p) {
} }
// Dumping this block into pool // Dumping this block into pool
VLOG(10) << "Inserting free block (" << block << ", " VLOG(100) << "Inserting free block (" << block << ", "
<< block->total_size(cache_) << ")"; << block->total_size(cache_) << ")";
pool_.insert( pool_.insert(
IndexSizeAddress(block->index(cache_), block->total_size(cache_), block)); IndexSizeAddress(block->index(cache_), block->total_size(cache_), block));
...@@ -174,7 +174,7 @@ void* BuddyAllocator::SystemAlloc(size_t size) { ...@@ -174,7 +174,7 @@ void* BuddyAllocator::SystemAlloc(size_t size) {
size_t index = 0; size_t index = 0;
void* p = system_allocator_->Alloc(&index, size); void* p = system_allocator_->Alloc(&index, size);
VLOG(10) << "Allocated " << p << " from system allocator."; VLOG(100) << "Allocated " << p << " from system allocator.";
if (p == nullptr) return nullptr; if (p == nullptr) return nullptr;
...@@ -200,8 +200,8 @@ BuddyAllocator::PoolSet::iterator BuddyAllocator::RefillPool() { ...@@ -200,8 +200,8 @@ BuddyAllocator::PoolSet::iterator BuddyAllocator::RefillPool() {
if (p == nullptr) return pool_.end(); if (p == nullptr) return pool_.end();
VLOG(10) << "Creating and inserting new block " << p VLOG(100) << "Creating and inserting new block " << p
<< " from system allocator"; << " from system allocator";
static_cast<MemoryBlock*>(p)->init(&cache_, MemoryBlock::FREE_CHUNK, index, static_cast<MemoryBlock*>(p)->init(&cache_, MemoryBlock::FREE_CHUNK, index,
max_chunk_size_, nullptr, nullptr); max_chunk_size_, nullptr, nullptr);
...@@ -245,19 +245,19 @@ void* BuddyAllocator::SplitToAlloc(BuddyAllocator::PoolSet::iterator it, ...@@ -245,19 +245,19 @@ void* BuddyAllocator::SplitToAlloc(BuddyAllocator::PoolSet::iterator it,
auto block = static_cast<MemoryBlock*>(std::get<2>(*it)); auto block = static_cast<MemoryBlock*>(std::get<2>(*it));
pool_.erase(it); pool_.erase(it);
VLOG(10) << "Split block (" << block << ", " << block->total_size(cache_) VLOG(100) << "Split block (" << block << ", " << block->total_size(cache_)
<< ") into"; << ") into";
block->split(&cache_, size); block->split(&cache_, size);
VLOG(10) << "Left block (" << block << ", " << block->total_size(cache_) VLOG(100) << "Left block (" << block << ", " << block->total_size(cache_)
<< ")"; << ")";
block->set_type(&cache_, MemoryBlock::ARENA_CHUNK); block->set_type(&cache_, MemoryBlock::ARENA_CHUNK);
// the rest of memory if exist // the rest of memory if exist
if (block->has_right_buddy(cache_)) { if (block->has_right_buddy(cache_)) {
if (block->right_buddy(cache_)->type(cache_) == MemoryBlock::FREE_CHUNK) { if (block->right_buddy(cache_)->type(cache_) == MemoryBlock::FREE_CHUNK) {
VLOG(10) << "Insert right block (" << block->right_buddy(cache_) << ", " VLOG(100) << "Insert right block (" << block->right_buddy(cache_) << ", "
<< block->right_buddy(cache_)->total_size(cache_) << ")"; << block->right_buddy(cache_)->total_size(cache_) << ")";
pool_.insert( pool_.insert(
IndexSizeAddress(block->right_buddy(cache_)->index(cache_), IndexSizeAddress(block->right_buddy(cache_)->index(cache_),
...@@ -284,7 +284,7 @@ void BuddyAllocator::CleanIdleFallBackAlloc() { ...@@ -284,7 +284,7 @@ void BuddyAllocator::CleanIdleFallBackAlloc() {
return; return;
} }
VLOG(10) << "Return block " << block << " to fallback allocator."; VLOG(100) << "Return block " << block << " to fallback allocator.";
system_allocator_->Free(block, max_chunk_size_, block->index(cache_)); system_allocator_->Free(block, max_chunk_size_, block->index(cache_));
cache_.invalidate(block); cache_.invalidate(block);
...@@ -320,7 +320,7 @@ void BuddyAllocator::CleanIdleNormalAlloc() { ...@@ -320,7 +320,7 @@ void BuddyAllocator::CleanIdleNormalAlloc() {
MemoryBlock* block = static_cast<MemoryBlock*>(std::get<2>(*pool)); MemoryBlock* block = static_cast<MemoryBlock*>(std::get<2>(*pool));
VLOG(10) << "Return block " << block << " to base allocator."; VLOG(100) << "Return block " << block << " to base allocator.";
system_allocator_->Free(block, max_chunk_size_, block->index(cache_)); system_allocator_->Free(block, max_chunk_size_, block->index(cache_));
cache_.invalidate(block); cache_.invalidate(block);
......
...@@ -29,7 +29,7 @@ MemoryBlock::Desc MetadataCache::load(const MemoryBlock* block) const { ...@@ -29,7 +29,7 @@ MemoryBlock::Desc MetadataCache::load(const MemoryBlock* block) const {
return existing_desc->second; return existing_desc->second;
} else { } else {
auto* desc = reinterpret_cast<const MemoryBlock::Desc*>(block); auto* desc = reinterpret_cast<const MemoryBlock::Desc*>(block);
VLOG(10) << "Load MemoryBlock::Desc type=" << desc->type; VLOG(100) << "Load MemoryBlock::Desc type=" << desc->type;
PADDLE_ASSERT(desc->check_guards()); PADDLE_ASSERT(desc->check_guards());
return *reinterpret_cast<const MemoryBlock::Desc*>(block); return *reinterpret_cast<const MemoryBlock::Desc*>(block);
} }
......
...@@ -71,18 +71,18 @@ struct NaiveAllocator { ...@@ -71,18 +71,18 @@ struct NaiveAllocator {
template <> template <>
void* Alloc<platform::CPUPlace>(platform::CPUPlace place, size_t size) { void* Alloc<platform::CPUPlace>(platform::CPUPlace place, size_t size) {
VLOG(10) << "Allocate " << size << " bytes on " << platform::Place(place); VLOG(100) << "Allocate " << size << " bytes on " << platform::Place(place);
void* p = GetCPUBuddyAllocator()->Alloc(size); void* p = GetCPUBuddyAllocator()->Alloc(size);
if (FLAGS_init_allocated_mem) { if (FLAGS_init_allocated_mem) {
memset(p, 0xEF, size); memset(p, 0xEF, size);
} }
VLOG(10) << " pointer=" << p; VLOG(100) << " pointer=" << p;
return p; return p;
} }
template <> template <>
void Free<platform::CPUPlace>(platform::CPUPlace place, void* p) { void Free<platform::CPUPlace>(platform::CPUPlace place, void* p) {
VLOG(10) << "Free pointer=" << p << " on " << platform::Place(place); VLOG(100) << "Free pointer=" << p << " on " << platform::Place(place);
GetCPUBuddyAllocator()->Free(p); GetCPUBuddyAllocator()->Free(p);
} }
...@@ -110,12 +110,12 @@ BuddyAllocator* GetGPUBuddyAllocator(int gpu_id) { ...@@ -110,12 +110,12 @@ BuddyAllocator* GetGPUBuddyAllocator(int gpu_id) {
std::unique_ptr<detail::SystemAllocator>(new detail::GPUAllocator(i)), std::unique_ptr<detail::SystemAllocator>(new detail::GPUAllocator(i)),
platform::GpuMinChunkSize(), platform::GpuMaxChunkSize()); platform::GpuMinChunkSize(), platform::GpuMaxChunkSize());
VLOG(10) << "\n\nNOTE: each GPU device use " VLOG(100) << "\n\nNOTE: each GPU device use "
<< FLAGS_fraction_of_gpu_memory_to_use * 100 << FLAGS_fraction_of_gpu_memory_to_use * 100
<< "% of GPU memory.\n" << "% of GPU memory.\n"
<< "You can set GFlags environment variable '" << "You can set GFlags environment variable '"
<< "FLAGS_fraction_of_gpu_memory_to_use" << "FLAGS_fraction_of_gpu_memory_to_use"
<< "' to change the fraction of GPU usage.\n\n"; << "' to change the fraction of GPU usage.\n\n";
} }
}); });
......
...@@ -95,7 +95,7 @@ class ActivationGradKernel ...@@ -95,7 +95,7 @@ class ActivationGradKernel
auto x = framework::EigenVector<T>::Flatten(*X); auto x = framework::EigenVector<T>::Flatten(*X);
functor(*place, x, out, dout, dx); functor(*place, x, out, dout, dx);
} else { } else {
VLOG(10) << " Inplace activation "; VLOG(100) << " Inplace activation ";
auto x = framework::EigenVector<T>::Flatten(*dX); auto x = framework::EigenVector<T>::Flatten(*dX);
functor(*place, x, out, dout, dx); functor(*place, x, out, dout, dx);
} }
......
...@@ -297,7 +297,7 @@ class AdamOpKernel : public framework::OpKernel<T> { ...@@ -297,7 +297,7 @@ class AdamOpKernel : public framework::OpKernel<T> {
auto& grad = auto& grad =
Ref(ctx.Input<framework::SelectedRows>("Grad"), "Must set Grad"); Ref(ctx.Input<framework::SelectedRows>("Grad"), "Must set Grad");
if (grad.rows().size() == 0) { if (grad.rows().size() == 0) {
VLOG(3) << "grad row size is 0!!"; VLOG(30) << "grad row size is 0!!";
return; return;
} }
......
...@@ -49,7 +49,7 @@ class ArrayOp : public framework::OperatorBase { ...@@ -49,7 +49,7 @@ class ArrayOp : public framework::OperatorBase {
} else { } else {
offset = static_cast<size_t>(*i_tensor.data<int64_t>()); offset = static_cast<size_t>(*i_tensor.data<int64_t>());
} }
VLOG(10) << " Offset = " << offset; VLOG(100) << " Offset = " << offset;
return offset; return offset;
} }
}; };
......
...@@ -148,8 +148,8 @@ class ArrayToLoDTensorOp : public framework::OperatorBase { ...@@ -148,8 +148,8 @@ class ArrayToLoDTensorOp : public framework::OperatorBase {
size_t start_offset = lod_and_offset.second.first; size_t start_offset = lod_and_offset.second.first;
size_t end_offset = lod_and_offset.second.second; size_t end_offset = lod_and_offset.second.second;
VLOG(10) << "idx=" << idx << " x_idx=" << x_idx << " [" VLOG(100) << "idx=" << idx << " x_idx=" << x_idx << " ["
<< ", " << end_offset << "]"; << ", " << end_offset << "]";
// Copy data // Copy data
PADDLE_ENFORCE_GE(end_offset, start_offset); PADDLE_ENFORCE_GE(end_offset, start_offset);
size_t len = end_offset - start_offset; size_t len = end_offset - start_offset;
......
...@@ -96,7 +96,7 @@ class BatchNormKernel<platform::CUDADeviceContext, T> ...@@ -96,7 +96,7 @@ class BatchNormKernel<platform::CUDADeviceContext, T>
mode_ = CUDNN_BATCHNORM_SPATIAL; mode_ = CUDNN_BATCHNORM_SPATIAL;
#endif #endif
VLOG(3) << "Setting descriptors."; VLOG(30) << "Setting descriptors.";
std::vector<int> dims; std::vector<int> dims;
std::vector<int> strides; std::vector<int> strides;
if (data_layout == DataLayout::kNCHW) { if (data_layout == DataLayout::kNCHW) {
......
...@@ -33,11 +33,11 @@ void BeamSearch::operator()(const framework::LoDTensor &pre_ids, ...@@ -33,11 +33,11 @@ void BeamSearch::operator()(const framework::LoDTensor &pre_ids,
auto items = SelectTopBeamSizeItems(pre_ids, pre_scores); auto items = SelectTopBeamSizeItems(pre_ids, pre_scores);
auto selected_items = ToMap(items, high_level.back()); auto selected_items = ToMap(items, high_level.back());
VLOG(3) << "selected_items:"; VLOG(30) << "selected_items:";
for (size_t i = 0; i < selected_items.size(); ++i) { for (size_t i = 0; i < selected_items.size(); ++i) {
VLOG(3) << "offset:" << i; VLOG(30) << "offset:" << i;
for (auto &item : selected_items[i]) { for (auto &item : selected_items[i]) {
VLOG(3) << ItemToString(item); VLOG(30) << ItemToString(item);
} }
} }
...@@ -138,11 +138,11 @@ std::vector<std::vector<BeamSearch::Item>> BeamSearch::SelectTopBeamSizeItems( ...@@ -138,11 +138,11 @@ std::vector<std::vector<BeamSearch::Item>> BeamSearch::SelectTopBeamSizeItems(
} }
result.emplace_back(items); result.emplace_back(items);
} }
VLOG(3) << "SelectTopBeamSizeItems result size " << result.size(); VLOG(30) << "SelectTopBeamSizeItems result size " << result.size();
for (auto &items : result) { for (auto &items : result) {
VLOG(3) << "item set:"; VLOG(30) << "item set:";
for (auto &item : items) { for (auto &item : items) {
VLOG(3) << ItemToString(item); VLOG(30) << ItemToString(item);
} }
} }
......
...@@ -46,8 +46,8 @@ class CheckpointNotifyOp : public framework::OperatorBase { ...@@ -46,8 +46,8 @@ class CheckpointNotifyOp : public framework::OperatorBase {
auto lookup_table_save_dir = auto lookup_table_save_dir =
string::Sprintf("%s/%s_%d", dir, lookup_table_name, i); string::Sprintf("%s/%s_%d", dir, lookup_table_name, i);
rpc_client->AsyncCheckpointNotify(epmap[i], lookup_table_save_dir); rpc_client->AsyncCheckpointNotify(epmap[i], lookup_table_save_dir);
VLOG(3) << "checkpoint notify sending lookup table: " << lookup_table_name VLOG(30) << "checkpoint notify sending lookup table: "
<< " and dir:" << dir << " to " << epmap[i]; << lookup_table_name << " and dir:" << dir << " to " << epmap[i];
} }
PADDLE_ENFORCE(rpc_client->Wait(), "internal error in RPCClient"); PADDLE_ENFORCE(rpc_client->Wait(), "internal error in RPCClient");
} }
......
...@@ -37,7 +37,7 @@ class ConcatOp : public framework::OperatorWithKernel { ...@@ -37,7 +37,7 @@ class ConcatOp : public framework::OperatorWithKernel {
PADDLE_ENFORCE_GT(n, 0, "Input tensors count should > 0."); PADDLE_ENFORCE_GT(n, 0, "Input tensors count should > 0.");
if (n == 1) { if (n == 1) {
VLOG(3) << "Warning: concat op have only one input, may waste memory"; VLOG(30) << "Warning: concat op have only one input, may waste memory";
} }
auto out_dims = ins[0]; auto out_dims = ins[0];
......
...@@ -165,11 +165,11 @@ class CUDNNConvOpKernel : public framework::OpKernel<T> { ...@@ -165,11 +165,11 @@ class CUDNNConvOpKernel : public framework::OpKernel<T> {
// Currently tensor core is only enabled using this algo // Currently tensor core is only enabled using this algo
algo = CUDNN_CONVOLUTION_FWD_ALGO_IMPLICIT_PRECOMP_GEMM; algo = CUDNN_CONVOLUTION_FWD_ALGO_IMPLICIT_PRECOMP_GEMM;
half_float = true; half_float = true;
VLOG(5) << "use cudnn_tensor_op_math"; VLOG(50) << "use cudnn_tensor_op_math";
} else { } else {
CUDNN_ENFORCE(platform::dynload::cudnnSetConvolutionMathType( CUDNN_ENFORCE(platform::dynload::cudnnSetConvolutionMathType(
cudnn_conv_desc, CUDNN_DEFAULT_MATH)); cudnn_conv_desc, CUDNN_DEFAULT_MATH));
VLOG(5) << "NOT use cudnn_tensor_op_math"; VLOG(50) << "NOT use cudnn_tensor_op_math";
} }
#endif #endif
......
...@@ -133,10 +133,10 @@ void AsyncBRPCServer::StartServer() { ...@@ -133,10 +133,10 @@ void AsyncBRPCServer::StartServer() {
void AsyncBRPCServer::ShutDownImpl() { server_.Stop(1000); } void AsyncBRPCServer::ShutDownImpl() { server_.Stop(1000); }
void AsyncBRPCServer::WaitServerReady() { void AsyncBRPCServer::WaitServerReady() {
VLOG(3) << "AsyncGRPCServer is wait server ready"; VLOG(30) << "AsyncGRPCServer is wait server ready";
std::unique_lock<std::mutex> lock(this->mutex_ready_); std::unique_lock<std::mutex> lock(this->mutex_ready_);
condition_ready_.wait(lock, [=] { return this->ready_ == 1; }); condition_ready_.wait(lock, [=] { return this->ready_ == 1; });
VLOG(3) << "AsyncGRPCServer WaitSeverReady"; VLOG(30) << "AsyncGRPCServer WaitSeverReady";
} }
}; // namespace distributed }; // namespace distributed
......
...@@ -38,7 +38,7 @@ void GRPCClient::SendComplete() { ...@@ -38,7 +38,7 @@ void GRPCClient::SendComplete() {
std::unique_lock<std::mutex> lk(completed_mutex_); std::unique_lock<std::mutex> lk(completed_mutex_);
if (!completed_) { if (!completed_) {
for (auto& it : channels_) { for (auto& it : channels_) {
VLOG(3) << "send complete message to " << it.first; VLOG(30) << "send complete message to " << it.first;
this->AsyncSendComplete(it.first); this->AsyncSendComplete(it.first);
} }
PADDLE_ENFORCE(this->Wait(), "internal grpc error"); PADDLE_ENFORCE(this->Wait(), "internal grpc error");
...@@ -81,7 +81,7 @@ VarHandlePtr GRPCClient::AsyncSendVar(const std::string& ep, ...@@ -81,7 +81,7 @@ VarHandlePtr GRPCClient::AsyncSendVar(const std::string& ep,
::grpc::ByteBuffer req; ::grpc::ByteBuffer req;
SerializeToByteBuffer(var_name_val, var, *p_ctx, &req, "", trainer_id_); SerializeToByteBuffer(var_name_val, var, *p_ctx, &req, "", trainer_id_);
VLOG(3) << s->GetVarHandlePtr()->String() << " begin"; VLOG(30) << s->GetVarHandlePtr()->String() << " begin";
// stub context // stub context
s->response_call_back_ = nullptr; s->response_call_back_ = nullptr;
...@@ -142,7 +142,7 @@ VarHandlePtr GRPCClient::AsyncGetVar(const std::string& ep, ...@@ -142,7 +142,7 @@ VarHandlePtr GRPCClient::AsyncGetVar(const std::string& ep,
::grpc::ByteBuffer buf; ::grpc::ByteBuffer buf;
RequestToByteBuffer<sendrecv::VariableMessage>(req, &buf); RequestToByteBuffer<sendrecv::VariableMessage>(req, &buf);
VLOG(3) << s->GetVarHandlePtr()->String() << " begin"; VLOG(30) << s->GetVarHandlePtr()->String() << " begin";
// stub context // stub context
s->response_call_back_ = ProcGetResponse; s->response_call_back_ = ProcGetResponse;
...@@ -190,7 +190,7 @@ VarHandlePtr GRPCClient::AsyncPrefetchVar(const std::string& ep, ...@@ -190,7 +190,7 @@ VarHandlePtr GRPCClient::AsyncPrefetchVar(const std::string& ep,
::grpc::ByteBuffer req; ::grpc::ByteBuffer req;
SerializeToByteBuffer(in_var_name_val, var, *p_ctx, &req, out_var_name_val); SerializeToByteBuffer(in_var_name_val, var, *p_ctx, &req, out_var_name_val);
VLOG(3) << s->GetVarHandlePtr()->String() << " begin"; VLOG(30) << s->GetVarHandlePtr()->String() << " begin";
// stub context // stub context
s->response_call_back_ = ProcGetResponse; s->response_call_back_ = ProcGetResponse;
...@@ -328,14 +328,14 @@ void GRPCClient::Proceed() { ...@@ -328,14 +328,14 @@ void GRPCClient::Proceed() {
void* tag = nullptr; void* tag = nullptr;
bool ok = false; bool ok = false;
VLOG(3) << "GRPCClient Proceed begin"; VLOG(30) << "GRPCClient Proceed begin";
while (!stopped_ && cq_.Next(&tag, &ok)) { while (!stopped_ && cq_.Next(&tag, &ok)) {
BaseProcessor* c = static_cast<BaseProcessor*>(tag); BaseProcessor* c = static_cast<BaseProcessor*>(tag);
GPR_ASSERT(ok); GPR_ASSERT(ok);
PADDLE_ENFORCE(c); PADDLE_ENFORCE(c);
if (c->status_.ok()) { if (c->status_.ok()) {
VLOG(3) << c->GetVarHandlePtr()->String() << " process"; VLOG(30) << c->GetVarHandlePtr()->String() << " process";
c->Process(); c->Process();
} else if (c->status_.error_code() == grpc::StatusCode::DEADLINE_EXCEEDED) { } else if (c->status_.error_code() == grpc::StatusCode::DEADLINE_EXCEEDED) {
// FIXME(gongwb): parse error_details? // FIXME(gongwb): parse error_details?
...@@ -370,7 +370,7 @@ void GRPCClient::Proceed() { ...@@ -370,7 +370,7 @@ void GRPCClient::Proceed() {
sync_cond_.notify_all(); sync_cond_.notify_all();
} }
} }
VLOG(3) << "GRPCClient Proceed end"; VLOG(30) << "GRPCClient Proceed end";
} }
std::shared_ptr<grpc::Channel> GRPCClient::GetChannel(const std::string& ep) { std::shared_ptr<grpc::Channel> GRPCClient::GetChannel(const std::string& ep) {
......
...@@ -98,7 +98,7 @@ class RequestSend final : public RequestBase { ...@@ -98,7 +98,7 @@ class RequestSend final : public RequestBase {
void Process() override { void Process() override {
std::string varname = GetReqName(); std::string varname = GetReqName();
VLOG(4) << "RequestSend var_name:" << varname; VLOG(40) << "RequestSend var_name:" << varname;
auto scope = request_->GetMutableLocalScope(); auto scope = request_->GetMutableLocalScope();
auto invar = request_->GetVar(); auto invar = request_->GetVar();
...@@ -135,7 +135,7 @@ class RequestGet final : public RequestBase { ...@@ -135,7 +135,7 @@ class RequestGet final : public RequestBase {
// proc request. // proc request.
std::string varname = request_.varname(); std::string varname = request_.varname();
int trainer_id = request_.trainer_id(); int trainer_id = request_.trainer_id();
VLOG(4) << "RequestGet " << varname; VLOG(40) << "RequestGet " << varname;
auto scope = request_handler_->scope(); auto scope = request_handler_->scope();
auto invar = scope->FindVar(varname); auto invar = scope->FindVar(varname);
...@@ -182,8 +182,8 @@ class RequestPrefetch final : public RequestBase { ...@@ -182,8 +182,8 @@ class RequestPrefetch final : public RequestBase {
std::string in_var_name = request_->Varname(); std::string in_var_name = request_->Varname();
std::string out_var_name = request_->OutVarname(); std::string out_var_name = request_->OutVarname();
int trainer_id = request_->GetTrainerId(); int trainer_id = request_->GetTrainerId();
VLOG(4) << "RequestPrefetch, in_var_name: " << in_var_name VLOG(40) << "RequestPrefetch, in_var_name: " << in_var_name
<< " out_var_name: " << out_var_name; << " out_var_name: " << out_var_name;
auto scope = request_->GetMutableLocalScope(); auto scope = request_->GetMutableLocalScope();
auto invar = scope->FindVar(in_var_name); auto invar = scope->FindVar(in_var_name);
...@@ -231,8 +231,8 @@ class RequestCheckpointNotify final : public RequestBase { ...@@ -231,8 +231,8 @@ class RequestCheckpointNotify final : public RequestBase {
std::string checkpoint_dir = request_->OutVarname(); std::string checkpoint_dir = request_->OutVarname();
int trainer_id = request_->GetTrainerId(); int trainer_id = request_->GetTrainerId();
VLOG(4) << "RequestCheckpointNotify notify: " << checkpoint_notify VLOG(40) << "RequestCheckpointNotify notify: " << checkpoint_notify
<< ", dir: " << checkpoint_dir; << ", dir: " << checkpoint_dir;
request_handler_->Handle(checkpoint_notify, scope, nullptr, nullptr, request_handler_->Handle(checkpoint_notify, scope, nullptr, nullptr,
trainer_id, checkpoint_dir); trainer_id, checkpoint_dir);
...@@ -246,10 +246,10 @@ class RequestCheckpointNotify final : public RequestBase { ...@@ -246,10 +246,10 @@ class RequestCheckpointNotify final : public RequestBase {
}; };
void AsyncGRPCServer::WaitServerReady() { void AsyncGRPCServer::WaitServerReady() {
VLOG(4) << "AsyncGRPCServer is wait server ready"; VLOG(40) << "AsyncGRPCServer is wait server ready";
std::unique_lock<std::mutex> lock(this->mutex_ready_); std::unique_lock<std::mutex> lock(this->mutex_ready_);
condition_ready_.wait(lock, [=] { return this->ready_ == 1; }); condition_ready_.wait(lock, [=] { return this->ready_ == 1; });
VLOG(4) << "AsyncGRPCServer WaitSeverReady"; VLOG(40) << "AsyncGRPCServer WaitSeverReady";
} }
void AsyncGRPCServer::StartServer() { void AsyncGRPCServer::StartServer() {
...@@ -282,14 +282,15 @@ void AsyncGRPCServer::StartServer() { ...@@ -282,14 +282,15 @@ void AsyncGRPCServer::StartServer() {
reqs.reserve(kRequestBufSize); reqs.reserve(kRequestBufSize);
for (int i = 0; i < kRequestBufSize; i++) { for (int i = 0; i < kRequestBufSize; i++) {
VLOG(6) << "TryToRegisterNewOne on RPC NAME: " << rpc_name << " I: " << i; VLOG(60) << "TryToRegisterNewOne on RPC NAME: " << rpc_name
<< " I: " << i;
TryToRegisterNewOne(rpc_name, i); TryToRegisterNewOne(rpc_name, i);
} }
for (int i = 0; i < threadnum; i++) { for (int i = 0; i < threadnum; i++) {
rpc_threads_[rpc_name].emplace_back(new std::thread(std::bind( rpc_threads_[rpc_name].emplace_back(new std::thread(std::bind(
&AsyncGRPCServer::HandleRequest, this, cq.get(), rpc_name, f))); &AsyncGRPCServer::HandleRequest, this, cq.get(), rpc_name, f)));
VLOG(4) << t.first << " creates threads!"; VLOG(40) << t.first << " creates threads!";
} }
} }
...@@ -306,7 +307,7 @@ void AsyncGRPCServer::StartServer() { ...@@ -306,7 +307,7 @@ void AsyncGRPCServer::StartServer() {
auto& threads = t.second; auto& threads = t.second;
for (size_t i = 0; i < threads.size(); ++i) { for (size_t i = 0; i < threads.size(); ++i) {
threads[i]->join(); threads[i]->join();
VLOG(4) << t.first << " threads ends!"; VLOG(40) << t.first << " threads ends!";
} }
} }
} }
...@@ -314,7 +315,7 @@ void AsyncGRPCServer::StartServer() { ...@@ -314,7 +315,7 @@ void AsyncGRPCServer::StartServer() {
void AsyncGRPCServer::ShutdownQueue() { void AsyncGRPCServer::ShutdownQueue() {
for (auto& t : rpc_cq_) { for (auto& t : rpc_cq_) {
t.second->Shutdown(); t.second->Shutdown();
VLOG(4) << t.first << " queue shutdown!"; VLOG(40) << t.first << " queue shutdown!";
} }
} }
...@@ -323,7 +324,7 @@ void AsyncGRPCServer::ShutDownImpl() { ...@@ -323,7 +324,7 @@ void AsyncGRPCServer::ShutDownImpl() {
is_shut_down_ = true; is_shut_down_ = true;
ShutdownQueue(); ShutdownQueue();
VLOG(4) << "server_ shutdown!"; VLOG(40) << "server_ shutdown!";
server_->Shutdown(); server_->Shutdown();
} }
...@@ -331,12 +332,12 @@ void AsyncGRPCServer::TryToRegisterNewOne(const std::string& rpc_name, ...@@ -331,12 +332,12 @@ void AsyncGRPCServer::TryToRegisterNewOne(const std::string& rpc_name,
int req_id) { int req_id) {
std::unique_lock<std::mutex> lock(cq_mutex_); std::unique_lock<std::mutex> lock(cq_mutex_);
if (is_shut_down_) { if (is_shut_down_) {
VLOG(4) << "shutdown, do not TryToRegisterNewSendOne"; VLOG(40) << "shutdown, do not TryToRegisterNewSendOne";
return; return;
} }
VLOG(4) << "TryToRegisterNewOne on RPC NAME: " << rpc_name VLOG(40) << "TryToRegisterNewOne on RPC NAME: " << rpc_name
<< " REQ ID: " << req_id; << " REQ ID: " << req_id;
auto& reqs = rpc_reqs_[rpc_name]; auto& reqs = rpc_reqs_[rpc_name];
auto& handler = rpc_call_map_[rpc_name]; auto& handler = rpc_call_map_[rpc_name];
...@@ -357,7 +358,7 @@ void AsyncGRPCServer::TryToRegisterNewOne(const std::string& rpc_name, ...@@ -357,7 +358,7 @@ void AsyncGRPCServer::TryToRegisterNewOne(const std::string& rpc_name,
reqs[req_id] = b; reqs[req_id] = b;
VLOG(4) << "Create RequestSend status:" << b->Status(); VLOG(40) << "Create RequestSend status:" << b->Status();
} }
void AsyncGRPCServer::HandleRequest( void AsyncGRPCServer::HandleRequest(
...@@ -367,15 +368,15 @@ void AsyncGRPCServer::HandleRequest( ...@@ -367,15 +368,15 @@ void AsyncGRPCServer::HandleRequest(
bool ok = false; bool ok = false;
while (true) { while (true) {
VLOG(4) << "HandleRequest " << rpc_name << " wait next"; VLOG(40) << "HandleRequest " << rpc_name << " wait next";
if (!cq->Next(&tag, &ok)) { if (!cq->Next(&tag, &ok)) {
VLOG(3) << "CompletionQueue " << rpc_name << " shutdown!"; VLOG(30) << "CompletionQueue " << rpc_name << " shutdown!";
break; break;
} }
int req_id = static_cast<int>(reinterpret_cast<intptr_t>(tag)); int req_id = static_cast<int>(reinterpret_cast<intptr_t>(tag));
VLOG(4) << "HandleRequest " << rpc_name << ", req_id:" << req_id VLOG(40) << "HandleRequest " << rpc_name << ", req_id:" << req_id
<< " get next"; << " get next";
auto& reqs = rpc_reqs_[rpc_name]; auto& reqs = rpc_reqs_[rpc_name];
RequestBase* base = nullptr; RequestBase* base = nullptr;
...@@ -385,7 +386,7 @@ void AsyncGRPCServer::HandleRequest( ...@@ -385,7 +386,7 @@ void AsyncGRPCServer::HandleRequest(
base = reqs[req_id]; base = reqs[req_id];
} }
VLOG(3) << base->Status2String(rpc_name); VLOG(30) << base->Status2String(rpc_name);
// reference: // reference:
// https://github.com/tensorflow/tensorflow/issues/5596 // https://github.com/tensorflow/tensorflow/issues/5596
......
...@@ -75,7 +75,7 @@ class VarHandle { ...@@ -75,7 +75,7 @@ class VarHandle {
wait_cond_.wait(lk, [this] { return status_ != kDefaultState; }); wait_cond_.wait(lk, [this] { return status_ != kDefaultState; });
ret = status_; ret = status_;
} }
VLOG(7) << "VarHandle wait:" << ret; VLOG(70) << "VarHandle wait:" << ret;
return ret != kErrorState; return ret != kErrorState;
} }
...@@ -84,7 +84,7 @@ class VarHandle { ...@@ -84,7 +84,7 @@ class VarHandle {
std::unique_lock<std::mutex> lk(sync_mutex_); std::unique_lock<std::mutex> lk(sync_mutex_);
status_ = ok ? kFinishState : kErrorState; status_ = ok ? kFinishState : kErrorState;
} }
VLOG(7) << "VarHandle finish:" << ok; VLOG(70) << "VarHandle finish:" << ok;
wait_cond_.notify_all(); wait_cond_.notify_all();
} }
......
...@@ -38,19 +38,19 @@ bool RequestSendHandler::Handle(const std::string& varname, ...@@ -38,19 +38,19 @@ bool RequestSendHandler::Handle(const std::string& varname,
framework::Variable** outvar, framework::Variable** outvar,
const int trainer_id, const int trainer_id,
const std::string& out_var_name) { const std::string& out_var_name) {
VLOG(4) << "RequestSendHandler:" << varname; VLOG(40) << "RequestSendHandler:" << varname;
// Sync // Sync
if (varname == BATCH_BARRIER_MESSAGE) { if (varname == BATCH_BARRIER_MESSAGE) {
VLOG(3) << "sync: recv BATCH_BARRIER_MESSAGE"; VLOG(30) << "sync: recv BATCH_BARRIER_MESSAGE";
rpc_server_->IncreaseBatchBarrier(kRequestSend); rpc_server_->IncreaseBatchBarrier(kRequestSend);
} else if (varname == COMPLETE_MESSAGE) { } else if (varname == COMPLETE_MESSAGE) {
VLOG(3) << "sync: recv complete message"; VLOG(30) << "sync: recv complete message";
rpc_server_->Complete(); rpc_server_->Complete();
} else { } else {
// Async // Async
if (!sync_mode_) { if (!sync_mode_) {
VLOG(3) << "async process var: " << varname; VLOG(30) << "async process var: " << varname;
try { try {
executor_->RunPreparedContext((*grad_to_prepared_ctx_)[varname].get(), executor_->RunPreparedContext((*grad_to_prepared_ctx_)[varname].get(),
scope); scope);
...@@ -61,7 +61,7 @@ bool RequestSendHandler::Handle(const std::string& varname, ...@@ -61,7 +61,7 @@ bool RequestSendHandler::Handle(const std::string& varname,
return true; return true;
} else { // sync } else { // sync
rpc_server_->WaitCond(kRequestSend); rpc_server_->WaitCond(kRequestSend);
VLOG(3) << "sync: processing received var: " << varname; VLOG(30) << "sync: processing received var: " << varname;
if (invar == nullptr) { if (invar == nullptr) {
LOG(FATAL) << "sync: Can not find server side var: " << varname; LOG(FATAL) << "sync: Can not find server side var: " << varname;
...@@ -78,10 +78,10 @@ bool RequestGetHandler::Handle(const std::string& varname, ...@@ -78,10 +78,10 @@ bool RequestGetHandler::Handle(const std::string& varname,
framework::Variable** outvar, framework::Variable** outvar,
const int trainer_id, const int trainer_id,
const std::string& out_var_name) { const std::string& out_var_name) {
VLOG(4) << "RequestGetHandler:" << varname; VLOG(40) << "RequestGetHandler:" << varname;
if (sync_mode_) { if (sync_mode_) {
if (varname == FETCH_BARRIER_MESSAGE) { if (varname == FETCH_BARRIER_MESSAGE) {
VLOG(3) << "sync: recv fetch barrier message"; VLOG(30) << "sync: recv fetch barrier message";
rpc_server_->IncreaseBatchBarrier(kRequestGet); rpc_server_->IncreaseBatchBarrier(kRequestGet);
} else { } else {
rpc_server_->WaitCond(kRequestGet); rpc_server_->WaitCond(kRequestGet);
...@@ -93,13 +93,14 @@ bool RequestGetHandler::Handle(const std::string& varname, ...@@ -93,13 +93,14 @@ bool RequestGetHandler::Handle(const std::string& varname,
// NOTE: the format is determined by distributed_transpiler.py // NOTE: the format is determined by distributed_transpiler.py
std::string param_bak_name = std::string param_bak_name =
string::Sprintf("%s.trainer_%d_bak", varname, trainer_id); string::Sprintf("%s.trainer_%d_bak", varname, trainer_id);
VLOG(3) << "getting " << param_bak_name << " trainer_id " << trainer_id; VLOG(30) << "getting " << param_bak_name << " trainer_id "
<< trainer_id;
auto var = scope_->FindVar(varname); auto var = scope_->FindVar(varname);
auto t_orig = var->Get<framework::LoDTensor>(); auto t_orig = var->Get<framework::LoDTensor>();
auto param_bak = scope_->Var(param_bak_name); auto param_bak = scope_->Var(param_bak_name);
auto t = param_bak->GetMutable<framework::LoDTensor>(); auto t = param_bak->GetMutable<framework::LoDTensor>();
t->mutable_data(dev_ctx_->GetPlace(), t_orig.type()); t->mutable_data(dev_ctx_->GetPlace(), t_orig.type());
VLOG(3) << "copying " << varname << " to " << param_bak_name; VLOG(30) << "copying " << varname << " to " << param_bak_name;
framework::TensorCopy(t_orig, dev_ctx_->GetPlace(), t); framework::TensorCopy(t_orig, dev_ctx_->GetPlace(), t);
} }
*outvar = scope_->FindVar(varname); *outvar = scope_->FindVar(varname);
...@@ -114,7 +115,7 @@ bool RequestPrefetchHandler::Handle(const std::string& varname, ...@@ -114,7 +115,7 @@ bool RequestPrefetchHandler::Handle(const std::string& varname,
framework::Variable** outvar, framework::Variable** outvar,
const int trainer_id, const int trainer_id,
const std::string& out_var_name) { const std::string& out_var_name) {
VLOG(4) << "RequestPrefetchHandler " << varname; VLOG(40) << "RequestPrefetchHandler " << varname;
auto var_desc = program_->Block(0).FindVar(out_var_name); auto var_desc = program_->Block(0).FindVar(out_var_name);
InitializeVariable(*outvar, var_desc->GetType()); InitializeVariable(*outvar, var_desc->GetType());
...@@ -138,8 +139,8 @@ bool RequestCheckpointHandler::Handle(const std::string& varname, ...@@ -138,8 +139,8 @@ bool RequestCheckpointHandler::Handle(const std::string& varname,
auto* lt_var = scope_->FindVar(LOOKUP_TABLE_PATH)->GetMutable<std::string>(); auto* lt_var = scope_->FindVar(LOOKUP_TABLE_PATH)->GetMutable<std::string>();
lt_var->clear(); lt_var->clear();
lt_var->append(out_var_name); lt_var->append(out_var_name);
VLOG(4) << "RequestCheckpointHandler update var kLookupTablePath to: " VLOG(40) << "RequestCheckpointHandler update var kLookupTablePath to: "
<< out_var_name; << out_var_name;
executor_->RunPreparedContext(checkpoint_prepared_ctx_.get(), scope_); executor_->RunPreparedContext(checkpoint_prepared_ctx_.get(), scope_);
return true; return true;
} }
......
...@@ -39,7 +39,7 @@ void RPCServer::SavePort() const { ...@@ -39,7 +39,7 @@ void RPCServer::SavePort() const {
port_file.open(file_path); port_file.open(file_path);
port_file << selected_port_; port_file << selected_port_;
port_file.close(); port_file.close();
VLOG(4) << "selected port written to " << file_path; VLOG(40) << "selected port written to " << file_path;
} }
void RPCServer::WaitBarrier(const std::string& rpc_name) { void RPCServer::WaitBarrier(const std::string& rpc_name) {
...@@ -49,12 +49,12 @@ void RPCServer::WaitBarrier(const std::string& rpc_name) { ...@@ -49,12 +49,12 @@ void RPCServer::WaitBarrier(const std::string& rpc_name) {
exit_flag_.load()); exit_flag_.load());
}); });
VLOG(3) << "batch_barrier_: " << rpc_name << " " VLOG(30) << "batch_barrier_: " << rpc_name << " "
<< barrier_counter_[rpc_name]; << barrier_counter_[rpc_name];
} }
void RPCServer::IncreaseBatchBarrier(const std::string rpc_name) { void RPCServer::IncreaseBatchBarrier(const std::string rpc_name) {
VLOG(4) << "RPCServer begin IncreaseBatchBarrier " << rpc_name; VLOG(40) << "RPCServer begin IncreaseBatchBarrier " << rpc_name;
int b = 0; int b = 0;
std::unique_lock<std::mutex> lock(mutex_); std::unique_lock<std::mutex> lock(mutex_);
b = ++barrier_counter_[rpc_name]; b = ++barrier_counter_[rpc_name];
...@@ -71,7 +71,7 @@ void RPCServer::Complete() { ...@@ -71,7 +71,7 @@ void RPCServer::Complete() {
client_num_--; client_num_--;
need_reset_all_vars_ = true; need_reset_all_vars_ = true;
VLOG(4) << "decrease client_num to: " << client_num_; VLOG(40) << "decrease client_num to: " << client_num_;
if (cur_cond_.load() == rpc_cond_map_[kRequestGet]) { if (cur_cond_.load() == rpc_cond_map_[kRequestGet]) {
barrier_counter_[kRequestGet]--; barrier_counter_[kRequestGet]--;
} }
...@@ -90,7 +90,7 @@ int RPCServer::GetClientNum() { ...@@ -90,7 +90,7 @@ int RPCServer::GetClientNum() {
} }
void RPCServer::ResetBarrierCounter() { void RPCServer::ResetBarrierCounter() {
VLOG(3) << "RPCServer ResetBarrierCounter "; VLOG(30) << "RPCServer ResetBarrierCounter ";
std::unique_lock<std::mutex> lock(mutex_); std::unique_lock<std::mutex> lock(mutex_);
for (auto& t : barrier_counter_) { for (auto& t : barrier_counter_) {
t.second = 0; t.second = 0;
...@@ -105,12 +105,12 @@ void RPCServer::RegisterRPC(const std::string& rpc_name, ...@@ -105,12 +105,12 @@ void RPCServer::RegisterRPC(const std::string& rpc_name,
static int cond = -1; static int cond = -1;
rpc_cond_map_[rpc_name] = ++cond; rpc_cond_map_[rpc_name] = ++cond;
VLOG(4) << "RegisterRPC rpc_name:" << rpc_name << ", handler:" << handler VLOG(40) << "RegisterRPC rpc_name:" << rpc_name << ", handler:" << handler
<< ", cond:" << rpc_cond_map_[rpc_name]; << ", cond:" << rpc_cond_map_[rpc_name];
} }
void RPCServer::SetCond(const std::string& rpc_name) { void RPCServer::SetCond(const std::string& rpc_name) {
VLOG(3) << "RPCServer SetCond " << rpc_name; VLOG(30) << "RPCServer SetCond " << rpc_name;
{ {
std::unique_lock<std::mutex> lock(mutex_); std::unique_lock<std::mutex> lock(mutex_);
cur_cond_ = rpc_cond_map_[rpc_name]; cur_cond_ = rpc_cond_map_[rpc_name];
...@@ -120,7 +120,7 @@ void RPCServer::SetCond(const std::string& rpc_name) { ...@@ -120,7 +120,7 @@ void RPCServer::SetCond(const std::string& rpc_name) {
} }
void RPCServer::WaitCond(const std::string& rpc_name) { void RPCServer::WaitCond(const std::string& rpc_name) {
VLOG(4) << "RPCServer WaitCond " << rpc_name; VLOG(40) << "RPCServer WaitCond " << rpc_name;
int cond = 0; int cond = 0;
{ {
std::unique_lock<std::mutex> lock(mutex_); std::unique_lock<std::mutex> lock(mutex_);
......
...@@ -50,7 +50,7 @@ bool VariableResponse::ReadRaw(::google::protobuf::io::CodedInputStream* input, ...@@ -50,7 +50,7 @@ bool VariableResponse::ReadRaw(::google::protobuf::io::CodedInputStream* input,
size_to_write = length - total_written; size_to_write = length - total_written;
} }
// This log is useful to see how long a internal block size is of rpc. // This log is useful to see how long a internal block size is of rpc.
VLOG(7) << "copy " << size_to_write << " data to CUDAPlace"; VLOG(70) << "copy " << size_to_write << " data to CUDAPlace";
memory::Copy(boost::get<platform::CUDAPlace>(place), memory::Copy(boost::get<platform::CUDAPlace>(place),
reinterpret_cast<void*>(p), cpu, data, size_to_write, reinterpret_cast<void*>(p), cpu, data, size_to_write,
gpu_dev_ctx.stream()); gpu_dev_ctx.stream());
...@@ -79,7 +79,7 @@ bool VariableResponse::ReadRaw(::google::protobuf::io::CodedInputStream* input, ...@@ -79,7 +79,7 @@ bool VariableResponse::ReadRaw(::google::protobuf::io::CodedInputStream* input,
// TODO(gongwb): can we avoid copy? // TODO(gongwb): can we avoid copy?
platform::CPUPlace cpu; platform::CPUPlace cpu;
// This log is useful to see how long a internal block size is of rpc. // This log is useful to see how long a internal block size is of rpc.
VLOG(7) << "copy " << size_to_write << " data to CPUPlace"; VLOG(70) << "copy " << size_to_write << " data to CPUPlace";
memory::Copy(cpu, reinterpret_cast<void*>(p), cpu, data, size_to_write); memory::Copy(cpu, reinterpret_cast<void*>(p), cpu, data, size_to_write);
p += size_to_write; p += size_to_write;
...@@ -198,8 +198,8 @@ bool VariableResponse::ProcSerializedField( ...@@ -198,8 +198,8 @@ bool VariableResponse::ProcSerializedField(
#endif #endif
} }
VLOG(7) << "ProcSerializedField:" << meta_.varname() VLOG(70) << "ProcSerializedField:" << meta_.varname()
<< ", type:" << meta_.type() << std::endl; << ", type:" << meta_.type() << std::endl;
framework::DDim dims = GetDims(meta_.dims()); framework::DDim dims = GetDims(meta_.dims());
if (meta_.type() == sendrecv::LOD_TENSOR) { if (meta_.type() == sendrecv::LOD_TENSOR) {
PADDLE_ENFORCE(meta_.lod_size() >= 0, "lod info should be got first!"); PADDLE_ENFORCE(meta_.lod_size() >= 0, "lod info should be got first!");
......
...@@ -47,8 +47,8 @@ class FeedOp : public framework::OperatorBase { ...@@ -47,8 +47,8 @@ class FeedOp : public framework::OperatorBase {
auto col = Attr<int>("col"); auto col = Attr<int>("col");
VLOG(3) << "Feed Var " << feed_var_name << "'s " << col << " column to var " VLOG(30) << "Feed Var " << feed_var_name << "'s " << col
<< out_name; << " column to var " << out_name;
auto &feed_list = feed_var->Get<framework::FeedFetchList>(); auto &feed_list = feed_var->Get<framework::FeedFetchList>();
auto &feed_item = feed_list.at(static_cast<size_t>(col)); auto &feed_item = feed_list.at(static_cast<size_t>(col));
......
...@@ -43,7 +43,7 @@ class FetchBarrierOp : public framework::OperatorBase { ...@@ -43,7 +43,7 @@ class FetchBarrierOp : public framework::OperatorBase {
PADDLE_ENFORCE(rpc_client->Wait(), "internal error in RPCClient"); PADDLE_ENFORCE(rpc_client->Wait(), "internal error in RPCClient");
for (auto& ep : eps) { for (auto& ep : eps) {
VLOG(3) << "fetch barrier, ep: " << ep; VLOG(30) << "fetch barrier, ep: " << ep;
rpc_client->AsyncSendFetchBarrier(ep); rpc_client->AsyncSendFetchBarrier(ep);
} }
PADDLE_ENFORCE(rpc_client->Wait(), "internal error in RPCClient"); PADDLE_ENFORCE(rpc_client->Wait(), "internal error in RPCClient");
......
...@@ -57,7 +57,7 @@ class FetchOp : public framework::OperatorBase { ...@@ -57,7 +57,7 @@ class FetchOp : public framework::OperatorBase {
TensorCopySync(src_item, platform::CPUPlace(), &dst_item); TensorCopySync(src_item, platform::CPUPlace(), &dst_item);
dst_item.set_lod(src_item.lod()); dst_item.set_lod(src_item.lod());
VLOG(3) << "Fetch variable " << fetch_var_name << " to " << out_name; VLOG(30) << "Fetch variable " << fetch_var_name << " to " << out_name;
} }
}; };
......
...@@ -64,7 +64,7 @@ class GenNCCLIdOp : public framework::OperatorBase { ...@@ -64,7 +64,7 @@ class GenNCCLIdOp : public framework::OperatorBase {
distributed::RPCClient::GetInstance<RPCCLIENT_T>(0); distributed::RPCClient::GetInstance<RPCCLIENT_T>(0);
for (auto& ep : endpoint_list) { for (auto& ep : endpoint_list) {
VLOG(3) << "sending nccl id to " << ep; VLOG(30) << "sending nccl id to " << ep;
client->AsyncSendVar(ep, dev_ctx, *scope, NCCL_ID_VARNAME); client->AsyncSendVar(ep, dev_ctx, *scope, NCCL_ID_VARNAME);
} }
client->Wait(); client->Wait();
...@@ -72,7 +72,7 @@ class GenNCCLIdOp : public framework::OperatorBase { ...@@ -72,7 +72,7 @@ class GenNCCLIdOp : public framework::OperatorBase {
client->AsyncSendBatchBarrier(ep); client->AsyncSendBatchBarrier(ep);
} }
client->Wait(); client->Wait();
VLOG(3) << "sending completed..."; VLOG(30) << "sending completed...";
} }
void GetIdByServer(framework::Scope* scope, void GetIdByServer(framework::Scope* scope,
...@@ -99,11 +99,11 @@ class GenNCCLIdOp : public framework::OperatorBase { ...@@ -99,11 +99,11 @@ class GenNCCLIdOp : public framework::OperatorBase {
std::bind(&distributed::RPCServer::StartServer, rpc_service.get())); std::bind(&distributed::RPCServer::StartServer, rpc_service.get()));
rpc_service->SetCond(distributed::kRequestSend); rpc_service->SetCond(distributed::kRequestSend);
VLOG(3) << "start getting nccl id from trainer 0..."; VLOG(30) << "start getting nccl id from trainer 0...";
rpc_service->WaitBarrier(distributed::kRequestSend); rpc_service->WaitBarrier(distributed::kRequestSend);
VLOG(3) << "got nccl id and stop server..."; VLOG(30) << "got nccl id and stop server...";
rpc_service->ShutDown(); rpc_service->ShutDown();
VLOG(3) << "rpc server stopped"; VLOG(30) << "rpc server stopped";
server_thread.join(); server_thread.join();
} }
}; };
......
...@@ -36,7 +36,7 @@ namespace operators { ...@@ -36,7 +36,7 @@ namespace operators {
void RunServer(std::shared_ptr<distributed::RPCServer> service) { void RunServer(std::shared_ptr<distributed::RPCServer> service) {
service->StartServer(); service->StartServer();
VLOG(4) << "RunServer thread end"; VLOG(40) << "RunServer thread end";
} }
static void split(const std::string &str, char sep, static void split(const std::string &str, char sep,
std::vector<std::string> *pieces) { std::vector<std::string> *pieces) {
...@@ -66,8 +66,8 @@ static void ParallelExecuteBlocks( ...@@ -66,8 +66,8 @@ static void ParallelExecuteBlocks(
fs.push_back(framework::Async([&executor, &prepared, &scope, idx]() { fs.push_back(framework::Async([&executor, &prepared, &scope, idx]() {
int run_block = idx; // thread local int run_block = idx; // thread local
try { try {
VLOG(3) << "running server block: " << run_block VLOG(30) << "running server block: " << run_block
<< "pointer: " << prepared[run_block].get(); << "pointer: " << prepared[run_block].get();
executor->RunPreparedContext(prepared[run_block].get(), scope); executor->RunPreparedContext(prepared[run_block].get(), scope);
} catch (const std::exception &e) { } catch (const std::exception &e) {
LOG(FATAL) << "run sub program:" << idx << " error " << e.what(); LOG(FATAL) << "run sub program:" << idx << " error " << e.what();
...@@ -108,7 +108,7 @@ void ListenAndServOp::RunSyncLoop( ...@@ -108,7 +108,7 @@ void ListenAndServOp::RunSyncLoop(
framework::Scope *recv_scope, platform::DeviceContext *dev_ctx, framework::Scope *recv_scope, platform::DeviceContext *dev_ctx,
const std::vector<int> &prefetch_block_id_list, const std::vector<int> &prefetch_block_id_list,
const int checkpoint_point_block_id) const { const int checkpoint_point_block_id) const {
VLOG(2) << "RunSyncLoop"; VLOG(20) << "RunSyncLoop";
size_t num_blocks = program->Size(); size_t num_blocks = program->Size();
auto optimize_blocks = auto optimize_blocks =
Attr<std::vector<framework::BlockDesc *>>(kOptimizeBlocks); Attr<std::vector<framework::BlockDesc *>>(kOptimizeBlocks);
...@@ -167,7 +167,7 @@ void ListenAndServOp::RunSyncLoop( ...@@ -167,7 +167,7 @@ void ListenAndServOp::RunSyncLoop(
} }
ParallelExecuteBlocks(parallel_blkids, executor, optimize_prepared, program, ParallelExecuteBlocks(parallel_blkids, executor, optimize_prepared, program,
recv_scope); recv_scope);
VLOG(2) << "run all blocks spent " << GetTimestamp() - ts << "(ms)"; VLOG(20) << "run all blocks spent " << GetTimestamp() - ts << "(ms)";
ResetReceivedVars(recv_scope, dev_ctx, rpc_service_->NeedResetAllVars()); ResetReceivedVars(recv_scope, dev_ctx, rpc_service_->NeedResetAllVars());
...@@ -183,11 +183,11 @@ void ListenAndServOp::ResetReceivedVars(framework::Scope *recv_scope, ...@@ -183,11 +183,11 @@ void ListenAndServOp::ResetReceivedVars(framework::Scope *recv_scope,
for (auto &varname : sparse_vars_) { for (auto &varname : sparse_vars_) {
auto var = recv_scope->FindVar(varname); auto var = recv_scope->FindVar(varname);
if (var == nullptr) { if (var == nullptr) {
VLOG(2) << "can not find var " << varname << " in received scope"; VLOG(20) << "can not find var " << varname << " in received scope";
continue; continue;
} }
if (var->IsType<framework::SelectedRows>()) { if (var->IsType<framework::SelectedRows>()) {
VLOG(3) << "reset sparse var: " << varname; VLOG(30) << "reset sparse var: " << varname;
var->GetMutable<framework::SelectedRows>()->mutable_rows()->clear(); var->GetMutable<framework::SelectedRows>()->mutable_rows()->clear();
} else { } else {
PADDLE_THROW("The type of sparse var should be SelectedRows"); PADDLE_THROW("The type of sparse var should be SelectedRows");
...@@ -197,7 +197,7 @@ void ListenAndServOp::ResetReceivedVars(framework::Scope *recv_scope, ...@@ -197,7 +197,7 @@ void ListenAndServOp::ResetReceivedVars(framework::Scope *recv_scope,
for (auto &varname : dense_vars_) { for (auto &varname : dense_vars_) {
auto var = recv_scope->FindVar(varname); auto var = recv_scope->FindVar(varname);
if (var == nullptr) { if (var == nullptr) {
VLOG(2) << "can not find var " << varname << " in received scope"; VLOG(20) << "can not find var " << varname << " in received scope";
continue; continue;
} }
if (var->IsType<framework::LoDTensor>()) { if (var->IsType<framework::LoDTensor>()) {
...@@ -216,7 +216,7 @@ void ListenAndServOp::ResetReceivedVars(framework::Scope *recv_scope, ...@@ -216,7 +216,7 @@ void ListenAndServOp::ResetReceivedVars(framework::Scope *recv_scope,
void ListenAndServOp::RunAsyncLoop(framework::Executor *executor, void ListenAndServOp::RunAsyncLoop(framework::Executor *executor,
framework::ProgramDesc *program, framework::ProgramDesc *program,
framework::Scope *recv_scope) const { framework::Scope *recv_scope) const {
VLOG(2) << "RunAsyncLoop"; VLOG(20) << "RunAsyncLoop";
auto grad_to_block_id_str = auto grad_to_block_id_str =
Attr<std::vector<std::string>>("grad_to_block_id"); Attr<std::vector<std::string>>("grad_to_block_id");
DoubleFindMap<std::string, int32_t> grad_to_block_id; DoubleFindMap<std::string, int32_t> grad_to_block_id;
...@@ -225,7 +225,7 @@ void ListenAndServOp::RunAsyncLoop(framework::Executor *executor, ...@@ -225,7 +225,7 @@ void ListenAndServOp::RunAsyncLoop(framework::Executor *executor,
const std::string &grad_and_id) { const std::string &grad_and_id) {
std::vector<std::string> pieces; std::vector<std::string> pieces;
split(grad_and_id, ':', &pieces); split(grad_and_id, ':', &pieces);
VLOG(3) << "after split, key = " << pieces[0] << ", id=" << pieces[1]; VLOG(30) << "after split, key = " << pieces[0] << ", id=" << pieces[1];
PADDLE_ENFORCE_EQ(pieces.size(), 2); PADDLE_ENFORCE_EQ(pieces.size(), 2);
PADDLE_ENFORCE_EQ(out_map->count(pieces[0]), 0); PADDLE_ENFORCE_EQ(out_map->count(pieces[0]), 0);
...@@ -270,7 +270,7 @@ void ListenAndServOp::RunAsyncLoop(framework::Executor *executor, ...@@ -270,7 +270,7 @@ void ListenAndServOp::RunAsyncLoop(framework::Executor *executor,
while (true) { while (true) {
if (rpc_service_->IsExit()) { if (rpc_service_->IsExit()) {
VLOG(4) << "get exit!rpc_processor break!"; VLOG(40) << "get exit!rpc_processor break!";
break; break;
} }
...@@ -332,9 +332,9 @@ void ListenAndServOp::RunImpl(const framework::Scope &scope, ...@@ -332,9 +332,9 @@ void ListenAndServOp::RunImpl(const framework::Scope &scope,
std::string endpoint = Attr<std::string>("endpoint"); std::string endpoint = Attr<std::string>("endpoint");
int checkpoint_block_id = Attr<int>(kCheckpointBlockId); int checkpoint_block_id = Attr<int>(kCheckpointBlockId);
VLOG(4) << "sync_mode:" << sync_mode << ", fan_in:" << fan_in VLOG(40) << "sync_mode:" << sync_mode << ", fan_in:" << fan_in
<< ", end_point:" << endpoint << ", end_point:" << endpoint
<< ", checkpoint_block_id: " << checkpoint_block_id; << ", checkpoint_block_id: " << checkpoint_block_id;
rpc_service_.reset(new RPCSERVER_T(endpoint, fan_in)); rpc_service_.reset(new RPCSERVER_T(endpoint, fan_in));
...@@ -383,8 +383,8 @@ void ListenAndServOp::RunImpl(const framework::Scope &scope, ...@@ -383,8 +383,8 @@ void ListenAndServOp::RunImpl(const framework::Scope &scope,
prefetch_var_name_to_block_id_str) { prefetch_var_name_to_block_id_str) {
std::vector<std::string> pieces; std::vector<std::string> pieces;
split(prefetch_var_name_and_id, ':', &pieces); split(prefetch_var_name_and_id, ':', &pieces);
VLOG(3) << "after split, prefetch_var = " << pieces[0] VLOG(30) << "after split, prefetch_var = " << pieces[0]
<< ", id=" << pieces[1]; << ", id=" << pieces[1];
PADDLE_ENFORCE_EQ(pieces.size(), 2); PADDLE_ENFORCE_EQ(pieces.size(), 2);
int block_id = std::stoi(pieces[1]); int block_id = std::stoi(pieces[1]);
...@@ -415,7 +415,7 @@ void ListenAndServOp::RunImpl(const framework::Scope &scope, ...@@ -415,7 +415,7 @@ void ListenAndServOp::RunImpl(const framework::Scope &scope,
// start the server listening after all member initialized. // start the server listening after all member initialized.
server_thread_.reset(new std::thread(RunServer, rpc_service_)); server_thread_.reset(new std::thread(RunServer, rpc_service_));
VLOG(3) << "wait server thread to become ready..."; VLOG(30) << "wait server thread to become ready...";
rpc_service_->WaitServerReady(); rpc_service_->WaitServerReady();
// register SIGINT(from ctrl+C) and SIGTERM(from kill) signal handlers // register SIGINT(from ctrl+C) and SIGTERM(from kill) signal handlers
......
...@@ -30,9 +30,9 @@ class LoDRankTableOp : public framework::OperatorBase { ...@@ -30,9 +30,9 @@ class LoDRankTableOp : public framework::OperatorBase {
auto x = scope.FindVar(Input("X"))->Get<framework::LoDTensor>(); auto x = scope.FindVar(Input("X"))->Get<framework::LoDTensor>();
auto *out = auto *out =
scope.FindVar(Output("Out"))->GetMutable<framework::LoDRankTable>(); scope.FindVar(Output("Out"))->GetMutable<framework::LoDRankTable>();
VLOG(10) << "Level = " << static_cast<size_t>(Attr<int>("level")); VLOG(100) << "Level = " << static_cast<size_t>(Attr<int>("level"));
out->Reset(x.lod(), static_cast<size_t>(Attr<int>("level"))); out->Reset(x.lod(), static_cast<size_t>(Attr<int>("level")));
VLOG(10) << Input("X") << "'s lod information is " << *out; VLOG(100) << Input("X") << "'s lod information is " << *out;
} }
}; };
......
...@@ -134,13 +134,13 @@ class LookupTableOpGradVarTypeInference : public framework::VarTypeInference { ...@@ -134,13 +134,13 @@ class LookupTableOpGradVarTypeInference : public framework::VarTypeInference {
auto attr = op_desc.GetAttr("is_sparse"); auto attr = op_desc.GetAttr("is_sparse");
bool is_sparse = boost::get<bool>(attr); bool is_sparse = boost::get<bool>(attr);
if (is_sparse) { if (is_sparse) {
VLOG(3) << "lookup_table_grad op " << framework::GradVarName("W") VLOG(30) << "lookup_table_grad op " << framework::GradVarName("W")
<< " is set to SelectedRows"; << " is set to SelectedRows";
block->Var(out_var_name) block->Var(out_var_name)
->SetType(framework::proto::VarType::SELECTED_ROWS); ->SetType(framework::proto::VarType::SELECTED_ROWS);
} else { } else {
VLOG(3) << "lookup_table_grad op " << framework::GradVarName("W") VLOG(30) << "lookup_table_grad op " << framework::GradVarName("W")
<< " is set to LoDTensor"; << " is set to LoDTensor";
block->Var(out_var_name)->SetType(framework::proto::VarType::LOD_TENSOR); block->Var(out_var_name)->SetType(framework::proto::VarType::LOD_TENSOR);
} }
block->Var(out_var_name)->SetDataType(block->Var("W")->GetDataType()); block->Var(out_var_name)->SetDataType(block->Var("W")->GetDataType());
......
...@@ -96,8 +96,8 @@ void TestAndBench(const int n, std::function<void(const int, const T*, T*)> tgt, ...@@ -96,8 +96,8 @@ void TestAndBench(const int n, std::function<void(const int, const T*, T*)> tgt,
} }
auto et = GetCurrentUS(); auto et = GetCurrentUS();
VLOG(3) << "Vec size " << n << ": refer takes: " << (et - mt) / repeat VLOG(30) << "Vec size " << n << ": refer takes: " << (et - mt) / repeat
<< " us, tgt takes: " << (mt - st) / repeat; << " us, tgt takes: " << (mt - st) / repeat;
for (int i = 0; i < n; ++i) { for (int i = 0; i < n; ++i) {
EXPECT_NEAR(ytgt_data[i], yref_data[i], 1e-3); EXPECT_NEAR(ytgt_data[i], yref_data[i], 1e-3);
} }
......
...@@ -87,7 +87,7 @@ TEST(JitKernel, vrelu) { ...@@ -87,7 +87,7 @@ TEST(JitKernel, vrelu) {
vrelu_intri8(d, x_data, zref_data); vrelu_intri8(d, x_data, zref_data);
} }
auto si1 = GetCurrentUS(); auto si1 = GetCurrentUS();
VLOG(3) << "Vec size 8 intr takes: " << (si1 - si0) / repeat; VLOG(30) << "Vec size 8 intr takes: " << (si1 - si0) / repeat;
} }
#endif #endif
auto ttgts = GetCurrentUS(); auto ttgts = GetCurrentUS();
...@@ -95,8 +95,9 @@ TEST(JitKernel, vrelu) { ...@@ -95,8 +95,9 @@ TEST(JitKernel, vrelu) {
ker->Compute(x_data, ztgt_data); ker->Compute(x_data, ztgt_data);
} }
auto ttgte = GetCurrentUS(); auto ttgte = GetCurrentUS();
VLOG(3) << "Vec size " << d << ": refer takes: " << (trefe - trefs) / repeat VLOG(30) << "Vec size " << d
<< " us, tgt takes: " << (ttgte - ttgts) / repeat; << ": refer takes: " << (trefe - trefs) / repeat
<< " us, tgt takes: " << (ttgte - ttgts) / repeat;
for (int i = 0; i < d; ++i) { for (int i = 0; i < d; ++i) {
EXPECT_NEAR(ztgt_data[i], zref_data[i], 1e-3); EXPECT_NEAR(ztgt_data[i], zref_data[i], 1e-3);
} }
...@@ -132,8 +133,9 @@ TEST(JitKernel, vaddbias) { ...@@ -132,8 +133,9 @@ TEST(JitKernel, vaddbias) {
} }
auto ttgte = GetCurrentUS(); auto ttgte = GetCurrentUS();
VLOG(3) << "Vec size " << d << ": refer takes: " << (trefe - trefs) / repeat VLOG(30) << "Vec size " << d
<< " us, tgt takes: " << (ttgte - ttgts) / repeat; << ": refer takes: " << (trefe - trefs) / repeat
<< " us, tgt takes: " << (ttgte - ttgts) / repeat;
for (int i = 0; i < d; ++i) { for (int i = 0; i < d; ++i) {
EXPECT_NEAR(ztgt_data[i], zref_data[i], 1e-3); EXPECT_NEAR(ztgt_data[i], zref_data[i], 1e-3);
} }
...@@ -183,13 +185,14 @@ TEST(JitKernel, vexp) { ...@@ -183,13 +185,14 @@ TEST(JitKernel, vexp) {
} }
auto ttgte = GetCurrentUS(); auto ttgte = GetCurrentUS();
VLOG(3) << "Vec size " << d << ": refer takes: " << (trefe - trefs) / repeat VLOG(30) << "Vec size " << d
<< ": refer takes: " << (trefe - trefs) / repeat
#ifdef PADDLE_WITH_MKLML #ifdef PADDLE_WITH_MKLML
<< " us, mkl takes: " << (tmkle - tmkls) / repeat << " us, " << " us, mkl takes: " << (tmkle - tmkls) / repeat << " us, "
#else #else
<< " us, " << " us, "
#endif #endif
<< "tgt takes: " << (ttgte - ttgts) / repeat; << "tgt takes: " << (ttgte - ttgts) / repeat;
for (int i = 0; i < d; ++i) { for (int i = 0; i < d; ++i) {
EXPECT_NEAR(ztgt_data[i], zref_data[i], 1e-3); EXPECT_NEAR(ztgt_data[i], zref_data[i], 1e-3);
} }
...@@ -254,9 +257,10 @@ TEST(JitKernel, vsigmoid) { ...@@ -254,9 +257,10 @@ TEST(JitKernel, vsigmoid) {
} }
auto ttgte = GetCurrentUS(); auto ttgte = GetCurrentUS();
VLOG(3) << "Vec size " << d << ": refer takes: " << (trefe - trefs) / repeat VLOG(30) << "Vec size " << d
<< " us, better(jit exp) takes: " << (tmkle - tmkls) / repeat << ": refer takes: " << (trefe - trefs) / repeat
<< " us, tgt takes: " << (ttgte - ttgts) / repeat; << " us, better(jit exp) takes: " << (tmkle - tmkls) / repeat
<< " us, tgt takes: " << (ttgte - ttgts) / repeat;
for (int i = 0; i < d; ++i) { for (int i = 0; i < d; ++i) {
EXPECT_NEAR(ztgt_data[i], zref_data[i], 1e-3); EXPECT_NEAR(ztgt_data[i], zref_data[i], 1e-3);
} }
...@@ -321,9 +325,10 @@ TEST(JitKernel, vtanh) { ...@@ -321,9 +325,10 @@ TEST(JitKernel, vtanh) {
} }
auto ttgte = GetCurrentUS(); auto ttgte = GetCurrentUS();
VLOG(3) << "Vec size " << d << ": refer takes: " << (trefe - trefs) / repeat VLOG(30) << "Vec size " << d
<< " us, better(jit exp) takes: " << (tmkle - tmkls) / repeat << ": refer takes: " << (trefe - trefs) / repeat
<< " us, tgt takes: " << (ttgte - ttgts) / repeat; << " us, better(jit exp) takes: " << (tmkle - tmkls) / repeat
<< " us, tgt takes: " << (ttgte - ttgts) / repeat;
for (int i = 0; i < d; ++i) { for (int i = 0; i < d; ++i) {
EXPECT_NEAR(ztgt_data[i], zref_data[i], 1e-3); EXPECT_NEAR(ztgt_data[i], zref_data[i], 1e-3);
} }
...@@ -441,9 +446,10 @@ TEST(JitKernel, lstm) { ...@@ -441,9 +446,10 @@ TEST(JitKernel, lstm) {
ker->ComputeCtHt(x_data, ct_1_data, ct_tgt_data, ht_tgt_data); ker->ComputeCtHt(x_data, ct_1_data, ct_tgt_data, ht_tgt_data);
} }
auto ttgte = GetCurrentUS(); auto ttgte = GetCurrentUS();
VLOG(3) << "Vec size " << d << ": refer takes: " << (trefe - trefs) / repeat VLOG(30) << "Vec size " << d
<< " us, better(jit) takes: " << (tmkle - tmkls) / repeat << ": refer takes: " << (trefe - trefs) / repeat
<< " us, tgt takes: " << (ttgte - ttgts) / repeat; << " us, better(jit) takes: " << (tmkle - tmkls) / repeat
<< " us, tgt takes: " << (ttgte - ttgts) / repeat;
} }
} }
...@@ -525,8 +531,8 @@ TEST(JitKernel, vscal) { ...@@ -525,8 +531,8 @@ TEST(JitKernel, vscal) {
vscal_inp_intri8(d, a, y_data); vscal_inp_intri8(d, a, y_data);
} }
auto si3 = GetCurrentUS(); auto si3 = GetCurrentUS();
VLOG(3) << "Vec size 8 intr takes: " << (si1 - si0) / repeat VLOG(30) << "Vec size 8 intr takes: " << (si1 - si0) / repeat
<< " us, inplace: " << (si3 - si2) / repeat; << " us, inplace: " << (si3 - si2) / repeat;
} }
#endif #endif
...@@ -540,15 +546,17 @@ TEST(JitKernel, vscal) { ...@@ -540,15 +546,17 @@ TEST(JitKernel, vscal) {
ker->Compute(&a, y_data, y_data, d); ker->Compute(&a, y_data, y_data, d);
} }
auto ttgte1 = GetCurrentUS(); auto ttgte1 = GetCurrentUS();
VLOG(3) << "Vec size " << d << ": refer takes: " << (trefe - trefs) / repeat VLOG(30) << "Vec size " << d
<< " us, inplace takes: " << (trefe1 - trefs1) / repeat << ": refer takes: " << (trefe - trefs) / repeat
<< " us, inplace takes: " << (trefe1 - trefs1) / repeat
#ifdef PADDLE_WITH_MKLML #ifdef PADDLE_WITH_MKLML
<< " us, mkl inplace takes: " << (tmkle - tmkls) / repeat << " us, " << " us, mkl inplace takes: " << (tmkle - tmkls) / repeat
<< " us, "
#else #else
<< " us, " << " us, "
#endif #endif
<< "tgt takes: " << (ttgte - ttgts) / repeat << "tgt takes: " << (ttgte - ttgts) / repeat
<< "us, tgt inplace takes: " << (ttgte1 - ttgts1) / repeat; << "us, tgt inplace takes: " << (ttgte1 - ttgts1) / repeat;
for (int i = 0; i < d; ++i) { for (int i = 0; i < d; ++i) {
EXPECT_NEAR(ztgt_data[i], zref_data[i], 1e-3); EXPECT_NEAR(ztgt_data[i], zref_data[i], 1e-3);
} }
...@@ -611,7 +619,7 @@ TEST(JitKernel, vmul) { ...@@ -611,7 +619,7 @@ TEST(JitKernel, vmul) {
vmul_intri8(d, x_data, y_data, zref_data); vmul_intri8(d, x_data, y_data, zref_data);
} }
auto si1 = GetCurrentUS(); auto si1 = GetCurrentUS();
VLOG(3) << "Vec size 8 intr takes: " << (si1 - si0) / repeat; VLOG(30) << "Vec size 8 intr takes: " << (si1 - si0) / repeat;
} }
#endif #endif
...@@ -621,13 +629,14 @@ TEST(JitKernel, vmul) { ...@@ -621,13 +629,14 @@ TEST(JitKernel, vmul) {
} }
auto ttgte = GetCurrentUS(); auto ttgte = GetCurrentUS();
VLOG(3) << "Vec size " << d << ": refer takes: " << (trefe - trefs) / repeat VLOG(30) << "Vec size " << d
<< ": refer takes: " << (trefe - trefs) / repeat
#ifdef PADDLE_WITH_MKLML #ifdef PADDLE_WITH_MKLML
<< " us, mkl takes: " << (tmkle - tmkls) / repeat << " us, " << " us, mkl takes: " << (tmkle - tmkls) / repeat << " us, "
#else #else
<< " us, " << " us, "
#endif #endif
<< "tgt takes: " << (ttgte - ttgts) / repeat; << "tgt takes: " << (ttgte - ttgts) / repeat;
for (int i = 0; i < d; ++i) { for (int i = 0; i < d; ++i) {
EXPECT_NEAR(ztgt_data[i], zref_data[i], 1e-3); EXPECT_NEAR(ztgt_data[i], zref_data[i], 1e-3);
} }
...@@ -690,7 +699,7 @@ TEST(JitKernel, vadd) { ...@@ -690,7 +699,7 @@ TEST(JitKernel, vadd) {
vadd_intri8(d, x_data, y_data, zref_data); vadd_intri8(d, x_data, y_data, zref_data);
} }
auto si1 = GetCurrentUS(); auto si1 = GetCurrentUS();
VLOG(3) << "Vec size 8 intr takes: " << (si1 - si0) / repeat; VLOG(30) << "Vec size 8 intr takes: " << (si1 - si0) / repeat;
} }
#endif #endif
...@@ -700,13 +709,14 @@ TEST(JitKernel, vadd) { ...@@ -700,13 +709,14 @@ TEST(JitKernel, vadd) {
} }
auto ttgte = GetCurrentUS(); auto ttgte = GetCurrentUS();
VLOG(3) << "Vec size " << d << ": refer takes: " << (trefe - trefs) / repeat VLOG(30) << "Vec size " << d
<< ": refer takes: " << (trefe - trefs) / repeat
#ifdef PADDLE_WITH_MKLML #ifdef PADDLE_WITH_MKLML
<< " us, mkl takes: " << (tmkle - tmkls) / repeat << " us, " << " us, mkl takes: " << (tmkle - tmkls) / repeat << " us, "
#else #else
<< " us, " << " us, "
#endif #endif
<< "tgt takes: " << (ttgte - ttgts) / repeat; << "tgt takes: " << (ttgte - ttgts) / repeat;
for (int i = 0; i < d; ++i) { for (int i = 0; i < d; ++i) {
EXPECT_NEAR(ztgt_data[i], zref_data[i], 1e-3); EXPECT_NEAR(ztgt_data[i], zref_data[i], 1e-3);
} }
...@@ -761,9 +771,10 @@ TEST(JitKernel, vaddrelu) { ...@@ -761,9 +771,10 @@ TEST(JitKernel, vaddrelu) {
ker->Compute(x_data, y_data, ztgt_data, d); ker->Compute(x_data, y_data, ztgt_data, d);
} }
auto ttgte = GetCurrentUS(); auto ttgte = GetCurrentUS();
VLOG(3) << "Vec size " << d << ": refer takes: " << (trefe - trefs) / repeat VLOG(30) << "Vec size " << d
<< " us, better takes: " << (tmkle - tmkls) / repeat << " us, " << ": refer takes: " << (trefe - trefs) / repeat
<< "tgt takes: " << (ttgte - ttgts) / repeat; << " us, better takes: " << (tmkle - tmkls) / repeat << " us, "
<< "tgt takes: " << (ttgte - ttgts) / repeat;
for (int i = 0; i < d; ++i) { for (int i = 0; i < d; ++i) {
EXPECT_NEAR(ztgt_data[i], zref_data[i], 1e-3); EXPECT_NEAR(ztgt_data[i], zref_data[i], 1e-3);
} }
......
...@@ -270,7 +270,7 @@ struct MergeAdd<platform::CPUDeviceContext, T> { ...@@ -270,7 +270,7 @@ struct MergeAdd<platform::CPUDeviceContext, T> {
const std::vector<const framework::SelectedRows*>& inputs, const std::vector<const framework::SelectedRows*>& inputs,
framework::SelectedRows* output) { framework::SelectedRows* output) {
if (inputs.size() == 0) { if (inputs.size() == 0) {
VLOG(3) << "no input! return"; VLOG(30) << "no input! return";
return; return;
} }
const framework::SelectedRows* has_value_input = nullptr; const framework::SelectedRows* has_value_input = nullptr;
...@@ -281,7 +281,7 @@ struct MergeAdd<platform::CPUDeviceContext, T> { ...@@ -281,7 +281,7 @@ struct MergeAdd<platform::CPUDeviceContext, T> {
} }
} }
if (has_value_input == nullptr) { if (has_value_input == nullptr) {
VLOG(3) << "no input has value! just return" << std::endl; VLOG(30) << "no input has value! just return" << std::endl;
return; return;
} }
auto input_width = has_value_input->value().dims()[1]; auto input_width = has_value_input->value().dims()[1];
......
...@@ -314,7 +314,7 @@ struct MergeAdd<platform::CUDADeviceContext, T> { ...@@ -314,7 +314,7 @@ struct MergeAdd<platform::CUDADeviceContext, T> {
const std::vector<const framework::SelectedRows*>& inputs, const std::vector<const framework::SelectedRows*>& inputs,
framework::SelectedRows* output) { framework::SelectedRows* output) {
if (inputs.size() == 0) { if (inputs.size() == 0) {
VLOG(3) << "no input! return"; VLOG(30) << "no input! return";
return; return;
} }
const framework::SelectedRows* has_value_input = nullptr; const framework::SelectedRows* has_value_input = nullptr;
...@@ -325,7 +325,7 @@ struct MergeAdd<platform::CUDADeviceContext, T> { ...@@ -325,7 +325,7 @@ struct MergeAdd<platform::CUDADeviceContext, T> {
} }
} }
if (has_value_input == nullptr) { if (has_value_input == nullptr) {
VLOG(3) << "no input has value! just return" << std::endl; VLOG(30) << "no input has value! just return" << std::endl;
return; return;
} }
auto input_width = has_value_input->value().dims()[1]; auto input_width = has_value_input->value().dims()[1];
......
...@@ -346,7 +346,7 @@ class MomentumOpKernel : public framework::OpKernel<T> { ...@@ -346,7 +346,7 @@ class MomentumOpKernel : public framework::OpKernel<T> {
// sparse update maybe empty. // sparse update maybe empty.
if (grad->rows().size() == 0) { if (grad->rows().size() == 0) {
VLOG(3) << "Grad SelectedRows contains no data!"; VLOG(30) << "Grad SelectedRows contains no data!";
return; return;
} }
auto* merged_grad = const_cast<framework::Scope&>(ctx.scope()) auto* merged_grad = const_cast<framework::Scope&>(ctx.scope())
......
...@@ -38,9 +38,9 @@ class MulOp : public framework::OperatorWithKernel { ...@@ -38,9 +38,9 @@ class MulOp : public framework::OperatorWithKernel {
int x_num_col_dims = ctx->Attrs().Get<int>("x_num_col_dims"); int x_num_col_dims = ctx->Attrs().Get<int>("x_num_col_dims");
int y_num_col_dims = ctx->Attrs().Get<int>("y_num_col_dims"); int y_num_col_dims = ctx->Attrs().Get<int>("y_num_col_dims");
VLOG(3) << "mul operator x.shape=" << x_dims << " y.shape=" << y_dims VLOG(30) << "mul operator x.shape=" << x_dims << " y.shape=" << y_dims
<< " x_num_col_dims=" << x_num_col_dims << " x_num_col_dims=" << x_num_col_dims
<< " y_num_col_dims=" << y_num_col_dims; << " y_num_col_dims=" << y_num_col_dims;
PADDLE_ENFORCE_GT( PADDLE_ENFORCE_GT(
x_dims.size(), x_num_col_dims, x_dims.size(), x_num_col_dims,
......
...@@ -63,16 +63,16 @@ class NCCLAllReduceKernel : public framework::OpKernel<T> { ...@@ -63,16 +63,16 @@ class NCCLAllReduceKernel : public framework::OpKernel<T> {
// device id // device id
int gpu_id = boost::get<platform::CUDAPlace>(ctx.GetPlace()).GetDeviceId(); int gpu_id = boost::get<platform::CUDAPlace>(ctx.GetPlace()).GetDeviceId();
int idx = comm->GetCommId(gpu_id); int idx = comm->GetCommId(gpu_id);
VLOG(3) << "gpu : " VLOG(30) << "gpu : "
<< " invoke allreduce. send " << x->numel() << " recv " << " invoke allreduce. send " << x->numel() << " recv "
<< out->numel(); << out->numel();
PADDLE_ENFORCE(platform::dynload::ncclAllReduce( PADDLE_ENFORCE(platform::dynload::ncclAllReduce(
x->data<T>(), out->mutable_data<T>(ctx.GetPlace()), out->numel(), x->data<T>(), out->mutable_data<T>(ctx.GetPlace()), out->numel(),
NCCLTypeWrapper<T>::type, reduction_op_, comm->comms().at(idx), NCCLTypeWrapper<T>::type, reduction_op_, comm->comms().at(idx),
ctx.cuda_device_context().stream())); ctx.cuda_device_context().stream()));
VLOG(3) << "gpu : " VLOG(30) << "gpu : "
<< " finished allreduce. send " << x->numel() << " recv " << " finished allreduce. send " << x->numel() << " recv "
<< out->numel(); << out->numel();
} }
}; };
...@@ -109,14 +109,14 @@ class NCCLReduceKernel : public framework::OpKernel<T> { ...@@ -109,14 +109,14 @@ class NCCLReduceKernel : public framework::OpKernel<T> {
} else { } else {
out->Resize(framework::make_ddim({0})); out->Resize(framework::make_ddim({0}));
} }
VLOG(3) << "gpu : " << gpu_id << " invoke reduce. send " << x->numel() VLOG(30) << "gpu : " << gpu_id << " invoke reduce. send " << x->numel()
<< " recv " << out->numel(); << " recv " << out->numel();
PADDLE_ENFORCE(platform::dynload::ncclReduce( PADDLE_ENFORCE(platform::dynload::ncclReduce(
x->data<T>(), recvbuffer, x->numel(), NCCLTypeWrapper<T>::type, x->data<T>(), recvbuffer, x->numel(), NCCLTypeWrapper<T>::type,
reduction_op_, root, comm->comms().at(idx), reduction_op_, root, comm->comms().at(idx),
ctx.cuda_device_context().stream())); ctx.cuda_device_context().stream()));
VLOG(3) << "gpu : " << gpu_id << " finished reduce. send " << x->numel() VLOG(30) << "gpu : " << gpu_id << " finished reduce. send " << x->numel()
<< " recv " << out->numel(); << " recv " << out->numel();
} }
}; };
...@@ -133,21 +133,22 @@ class NCCLBcastKernel : public framework::OpKernel<T> { ...@@ -133,21 +133,22 @@ class NCCLBcastKernel : public framework::OpKernel<T> {
int idx = comm->GetCommId(gpu_id); int idx = comm->GetCommId(gpu_id);
if (idx == root) { if (idx == root) {
auto* x = ctx.Input<LoDTensor>("X"); auto* x = ctx.Input<LoDTensor>("X");
VLOG(3) << "gpu : " << gpu_id << " invoke Bcast. send " << x->numel(); VLOG(30) << "gpu : " << gpu_id << " invoke Bcast. send " << x->numel();
PADDLE_ENFORCE(platform::dynload::ncclBcast( PADDLE_ENFORCE(platform::dynload::ncclBcast(
reinterpret_cast<void*>(const_cast<T*>(x->data<T>())), x->numel(), reinterpret_cast<void*>(const_cast<T*>(x->data<T>())), x->numel(),
NCCLTypeWrapper<T>::type, root, comm->comms().at(idx), NCCLTypeWrapper<T>::type, root, comm->comms().at(idx),
ctx.cuda_device_context().stream())); ctx.cuda_device_context().stream()));
VLOG(3) << "gpu : " << gpu_id << " finished Bcast."; VLOG(30) << "gpu : " << gpu_id << " finished Bcast.";
} else { } else {
auto* out = ctx.Output<LoDTensor>("Out"); auto* out = ctx.Output<LoDTensor>("Out");
VLOG(3) << "gpu : " << gpu_id << " invoke Bcast. recv buffer " VLOG(30) << "gpu : " << gpu_id << " invoke Bcast. recv buffer "
<< framework::product(out->dims()); << framework::product(out->dims());
PADDLE_ENFORCE(platform::dynload::ncclBcast( PADDLE_ENFORCE(platform::dynload::ncclBcast(
out->mutable_data<T>(ctx.GetPlace()), out->numel(), out->mutable_data<T>(ctx.GetPlace()), out->numel(),
NCCLTypeWrapper<T>::type, root, comm->comms().at(idx), NCCLTypeWrapper<T>::type, root, comm->comms().at(idx),
ctx.cuda_device_context().stream())); ctx.cuda_device_context().stream()));
VLOG(3) << "gpu : " << gpu_id << " finished Bcast. recv " << out->numel(); VLOG(30) << "gpu : " << gpu_id << " finished Bcast. recv "
<< out->numel();
} }
} }
}; };
......
...@@ -86,9 +86,9 @@ class NCCLTester : public ::testing::Test { ...@@ -86,9 +86,9 @@ class NCCLTester : public ::testing::Test {
(*p_scopes).resize(gpu_list_.size()); (*p_scopes).resize(gpu_list_.size());
auto op = f::OpRegistry::CreateOp(*op1); auto op = f::OpRegistry::CreateOp(*op1);
VLOG(1) << "invoke NCCLInitOp."; VLOG(10) << "invoke NCCLInitOp.";
op->Run(g_scope_, cpu_place); op->Run(g_scope_, cpu_place);
VLOG(1) << "NCCLInitOp finished."; VLOG(10) << "NCCLInitOp finished.";
} }
int GetGPUData(int gpu_id) { return gpu_id + 42; } int GetGPUData(int gpu_id) { return gpu_id + 42; }
...@@ -109,7 +109,7 @@ class NCCLTester : public ::testing::Test { ...@@ -109,7 +109,7 @@ class NCCLTester : public ::testing::Test {
std::vector<T> send_vector(f::product(kDims), GetGPUData(gpu_id)); std::vector<T> send_vector(f::product(kDims), GetGPUData(gpu_id));
paddle::framework::TensorFromVector<T>(send_vector, *ctx, send_tensor); paddle::framework::TensorFromVector<T>(send_vector, *ctx, send_tensor);
VLOG(1) << "Send Tensor filled with elements " << send_tensor->numel(); VLOG(10) << "Send Tensor filled with elements " << send_tensor->numel();
} }
lk.unlock(); lk.unlock();
...@@ -119,11 +119,11 @@ class NCCLTester : public ::testing::Test { ...@@ -119,11 +119,11 @@ class NCCLTester : public ::testing::Test {
auto op = f::OpRegistry::CreateOp(*op1); auto op = f::OpRegistry::CreateOp(*op1);
VLOG(1) << "Device : " << gpu_id << " invoke " << op_desc.Type(); VLOG(10) << "Device : " << gpu_id << " invoke " << op_desc.Type();
VLOG(1) << " send_tensor : " << send_tensor->numel() VLOG(10) << " send_tensor : " << send_tensor->numel()
<< " recv_tensor : " << recv_tensor->numel(); << " recv_tensor : " << recv_tensor->numel();
op->Run(*scope, place); op->Run(*scope, place);
VLOG(1) << "Device : " << gpu_id << " finished " << op_desc.Type(); VLOG(10) << "Device : " << gpu_id << " finished " << op_desc.Type();
} }
public: public:
......
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册