提交 0c3227a5 编写于 作者: M minqiyang

Change the origin VLOG level to 10 times

Fix code to support cpplint syntax check

test=develop
上级 5b7a9dd7
...@@ -18,8 +18,8 @@ namespace framework { ...@@ -18,8 +18,8 @@ namespace framework {
void TransDataDevice(const Tensor &in, const platform::Place &dst_place, void TransDataDevice(const Tensor &in, const platform::Place &dst_place,
Tensor *out) { Tensor *out) {
VLOG(3) << "DeviceTransform in, src_place " << in.place() VLOG(30) << "DeviceTransform in, src_place " << in.place()
<< " dst_place: " << dst_place; << " dst_place: " << dst_place;
PADDLE_ENFORCE_NE( PADDLE_ENFORCE_NE(
in.place().which(), dst_place.which(), in.place().which(), dst_place.which(),
......
...@@ -49,10 +49,10 @@ class TestOpWithKernel : public OperatorWithKernel { ...@@ -49,10 +49,10 @@ class TestOpWithKernel : public OperatorWithKernel {
OpKernelType GetExpectedKernelType( OpKernelType GetExpectedKernelType(
const ExecutionContext& ctx) const override { const ExecutionContext& ctx) const override {
if (Attr<bool>("use_gpu")) { if (Attr<bool>("use_gpu")) {
VLOG(3) << "force use gpu kernel"; VLOG(30) << "force use gpu kernel";
return OpKernelType(proto::VarType::FP32, platform::CUDAPlace(0)); return OpKernelType(proto::VarType::FP32, platform::CUDAPlace(0));
} else { } else {
VLOG(3) << "use default kernel"; VLOG(30) << "use default kernel";
return OpKernelType(proto::VarType::FP32, return OpKernelType(proto::VarType::FP32,
ctx.Input<Tensor>("input")->place()); ctx.Input<Tensor>("input")->place());
} }
...@@ -148,7 +148,7 @@ TEST(Operator, CPUtoGPU) { ...@@ -148,7 +148,7 @@ TEST(Operator, CPUtoGPU) {
// get output // get output
auto* output2 = scope.Var("OUT2"); auto* output2 = scope.Var("OUT2");
gpu_op->Run(scope, cuda_place); gpu_op->Run(scope, cuda_place);
VLOG(3) << "after gpu_op run"; VLOG(30) << "after gpu_op run";
// auto* output2_ptr = output2->Get<LoDTensor>().data<float>(); // auto* output2_ptr = output2->Get<LoDTensor>().data<float>();
paddle::platform::DeviceContextPool& pool = paddle::platform::DeviceContextPool& pool =
......
...@@ -60,7 +60,7 @@ void BroadcastOpHandle::BroadcastOneVar( ...@@ -60,7 +60,7 @@ void BroadcastOpHandle::BroadcastOneVar(
PADDLE_ENFORCE_NOT_NULL(in_var); PADDLE_ENFORCE_NOT_NULL(in_var);
Tensor &in_tensor = VariableVisitor::GetMutableTensor(in_var); Tensor &in_tensor = VariableVisitor::GetMutableTensor(in_var);
if (UNLIKELY(!in_tensor.IsInitialized())) { if (UNLIKELY(!in_tensor.IsInitialized())) {
VLOG(3) << "in var " << in_var_handle.name_ << "not inited, return!"; VLOG(30) << "in var " << in_var_handle.name_ << "not inited, return!";
return; return;
} }
......
...@@ -44,8 +44,8 @@ std::unique_ptr<ir::Graph> ModifyOpLockAndRecordEventPass::ApplyImpl( ...@@ -44,8 +44,8 @@ std::unique_ptr<ir::Graph> ModifyOpLockAndRecordEventPass::ApplyImpl(
IsLockAndRecordEventFreeComputationOpHandle(compute_op, graph_view); IsLockAndRecordEventFreeComputationOpHandle(compute_op, graph_view);
compute_op->SetLockAndRecordEventFree(is_lock_and_record_event_free); compute_op->SetLockAndRecordEventFree(is_lock_and_record_event_free);
if (is_lock_and_record_event_free) { if (is_lock_and_record_event_free) {
VLOG(10) << "Set is_lock_and_record_event_free be true in op " VLOG(100) << "Set is_lock_and_record_event_free be true in op "
<< compute_op->DebugString(); << compute_op->DebugString();
} }
} }
return ir_graph; return ir_graph;
......
...@@ -392,7 +392,7 @@ std::unique_ptr<ir::Graph> MultiDevSSAGraphBuilder::ApplyImpl( ...@@ -392,7 +392,7 @@ std::unique_ptr<ir::Graph> MultiDevSSAGraphBuilder::ApplyImpl(
for (size_t i = 0; i < backward_vars.size(); i += 2) { for (size_t i = 0; i < backward_vars.size(); i += 2) {
auto &p_name = backward_vars[i]; auto &p_name = backward_vars[i];
auto &g_name = backward_vars[i + 1]; auto &g_name = backward_vars[i + 1];
VLOG(10) << "Bcast " << g_name << " for parameter " << p_name; VLOG(100) << "Bcast " << g_name << " for parameter " << p_name;
switch (strategy_.reduce_) { switch (strategy_.reduce_) {
case BuildStrategy::ReduceStrategy::kReduce: case BuildStrategy::ReduceStrategy::kReduce:
...@@ -794,8 +794,8 @@ int MultiDevSSAGraphBuilder::CreateRPCOp(ir::Graph *result, ...@@ -794,8 +794,8 @@ int MultiDevSSAGraphBuilder::CreateRPCOp(ir::Graph *result,
node->Op()->GetAttr(OpProtoAndCheckerMaker::OpRoleVarAttrName())); node->Op()->GetAttr(OpProtoAndCheckerMaker::OpRoleVarAttrName()));
PADDLE_ENFORCE_EQ(send_param_grad.size(), 2U); PADDLE_ENFORCE_EQ(send_param_grad.size(), 2U);
op_dev_id = GetAppropriateDeviceID({send_param_grad[1]}); op_dev_id = GetAppropriateDeviceID({send_param_grad[1]});
VLOG(10) << "send grad " << input_var_names[0] << " origin " VLOG(100) << "send grad " << input_var_names[0] << " origin "
<< send_param_grad[1] << " place: " << op_dev_id; << send_param_grad[1] << " place: " << op_dev_id;
for (auto &varname : input_var_names) { for (auto &varname : input_var_names) {
result->Get<ShardedVarDevice>(kShardedVarDevice) result->Get<ShardedVarDevice>(kShardedVarDevice)
.emplace(varname, op_dev_id); .emplace(varname, op_dev_id);
...@@ -812,9 +812,9 @@ int MultiDevSSAGraphBuilder::CreateRPCOp(ir::Graph *result, ...@@ -812,9 +812,9 @@ int MultiDevSSAGraphBuilder::CreateRPCOp(ir::Graph *result,
node->Op()->GetAttr(OpProtoAndCheckerMaker::OpRoleVarAttrName())); node->Op()->GetAttr(OpProtoAndCheckerMaker::OpRoleVarAttrName()));
if (recv_param_grad.size() == 2U) { if (recv_param_grad.size() == 2U) {
op_dev_id = GetVarDeviceID(*result, recv_param_grad[1]); op_dev_id = GetVarDeviceID(*result, recv_param_grad[1]);
VLOG(10) << "recv param " << recv_param_grad[0] VLOG(100) << "recv param " << recv_param_grad[0]
<< " get grad place: " << recv_param_grad[1] << " get grad place: " << recv_param_grad[1]
<< " place: " << op_dev_id; << " place: " << op_dev_id;
} else { } else {
op_dev_id = GetAppropriateDeviceID(output_var_names); op_dev_id = GetAppropriateDeviceID(output_var_names);
} }
......
...@@ -141,8 +141,8 @@ std::unique_ptr<ir::Graph> ReferenceCountPass::ApplyImpl( ...@@ -141,8 +141,8 @@ std::unique_ptr<ir::Graph> ReferenceCountPass::ApplyImpl(
if (next_compute_op != nullptr) { if (next_compute_op != nullptr) {
if (compute_ref_cnt_map.count(next_compute_op)) { if (compute_ref_cnt_map.count(next_compute_op)) {
compute_ref_cnt_map[next_compute_op]->AddVar(var_name); compute_ref_cnt_map[next_compute_op]->AddVar(var_name);
VLOG(5) << "Add reference count of " << var_name << " to Operator " VLOG(50) << "Add reference count of " << var_name << " to Operator "
<< next_compute_op->Name(); << next_compute_op->Name();
} else { } else {
// Create new reference_count_op_handle // Create new reference_count_op_handle
ir::Node *ref_cnt_node = graph->CreateEmptyNode( ir::Node *ref_cnt_node = graph->CreateEmptyNode(
......
...@@ -51,7 +51,7 @@ void ScaleLossGradOpHandle::RunImpl() { ...@@ -51,7 +51,7 @@ void ScaleLossGradOpHandle::RunImpl() {
->stream(); ->stream();
memory::Copy(boost::get<platform::CUDAPlace>(place_), tmp, memory::Copy(boost::get<platform::CUDAPlace>(place_), tmp,
platform::CPUPlace(), &coeff_, sizeof(float), stream); platform::CPUPlace(), &coeff_, sizeof(float), stream);
VLOG(10) << place_ << "RUN Scale loss grad op"; VLOG(100) << place_ << "RUN Scale loss grad op";
}); });
#endif #endif
} }
......
...@@ -94,8 +94,8 @@ std::unique_ptr<ir::Graph> SequentialExecutionPass::ApplyImpl( ...@@ -94,8 +94,8 @@ std::unique_ptr<ir::Graph> SequentialExecutionPass::ApplyImpl(
op_node_list[i - 1]->outputs.push_back(dep_var); op_node_list[i - 1]->outputs.push_back(dep_var);
dep_var->outputs.push_back(op_node_list[i]); dep_var->outputs.push_back(op_node_list[i]);
dep_var->inputs.push_back(op_node_list[i - 1]); dep_var->inputs.push_back(op_node_list[i - 1]);
VLOG(10) << "Add dependencies between " << op_node_list[i - 1]->Name() VLOG(100) << "Add dependencies between " << op_node_list[i - 1]->Name()
<< " and " << op_node_list[i]->Name(); << " and " << op_node_list[i]->Name();
} }
return graph; return graph;
} }
......
...@@ -208,16 +208,16 @@ void ThreadedSSAGraphExecutor::RunOp( ...@@ -208,16 +208,16 @@ void ThreadedSSAGraphExecutor::RunOp(
details::OpHandleBase *op) { details::OpHandleBase *op) {
auto op_run = [ready_var_q, op, this] { auto op_run = [ready_var_q, op, this] {
try { try {
if (VLOG_IS_ON(10)) { if (VLOG_IS_ON(100)) {
VLOG(10) << op << " " << op->Name() << " : " << op->DebugString(); VLOG(100) << op << " " << op->Name() << " : " << op->DebugString();
} }
if (LIKELY(!strategy_.dry_run_)) { if (LIKELY(!strategy_.dry_run_)) {
op->Run(strategy_.use_cuda_); op->Run(strategy_.use_cuda_);
} }
VLOG(10) << op << " " << op->Name() << " Done "; VLOG(100) << op << " " << op->Name() << " Done ";
running_ops_--; running_ops_--;
ready_var_q->Extend(op->Outputs()); ready_var_q->Extend(op->Outputs());
VLOG(10) << op << " " << op->Name() << "Signal posted"; VLOG(100) << op << " " << op->Name() << "Signal posted";
} catch (...) { } catch (...) {
exception_holder_.Catch(std::current_exception()); exception_holder_.Catch(std::current_exception());
} }
......
...@@ -43,7 +43,7 @@ ExecutorPrepareContext::ExecutorPrepareContext( ...@@ -43,7 +43,7 @@ ExecutorPrepareContext::ExecutorPrepareContext(
} }
ExecutorPrepareContext::~ExecutorPrepareContext() { ExecutorPrepareContext::~ExecutorPrepareContext() {
VLOG(5) << "destroy ExecutorPrepareContext"; VLOG(50) << "destroy ExecutorPrepareContext";
} }
template <typename RefCntMap> template <typename RefCntMap>
...@@ -60,7 +60,7 @@ static void DeleteUnusedTensors(const Scope& scope, const OperatorBase* op, ...@@ -60,7 +60,7 @@ static void DeleteUnusedTensors(const Scope& scope, const OperatorBase* op,
if ((it->second)-- == 1) { if ((it->second)-- == 1) {
auto* var = scope.FindVar(name); auto* var = scope.FindVar(name);
if (var != nullptr) { if (var != nullptr) {
VLOG(10) << "Erase tensor \'" << name << "\'"; VLOG(100) << "Erase tensor \'" << name << "\'";
if (var->IsType<LoDTensor>()) { if (var->IsType<LoDTensor>()) {
erase_tensors.insert(var->GetMutable<LoDTensor>()); erase_tensors.insert(var->GetMutable<LoDTensor>());
} else if (var->IsType<SelectedRows>()) { } else if (var->IsType<SelectedRows>()) {
...@@ -141,21 +141,21 @@ void Executor::CreateVariables(const ProgramDesc& pdesc, Scope* scope, ...@@ -141,21 +141,21 @@ void Executor::CreateVariables(const ProgramDesc& pdesc, Scope* scope,
if (var->Persistable()) { if (var->Persistable()) {
auto* ptr = const_cast<Scope*>(ancestor_scope)->Var(var->Name()); auto* ptr = const_cast<Scope*>(ancestor_scope)->Var(var->Name());
InitializeVariable(ptr, var->GetType()); InitializeVariable(ptr, var->GetType());
VLOG(3) << "Create Variable " << var->Name() VLOG(30) << "Create Variable " << var->Name()
<< " global, which pointer is " << ptr; << " global, which pointer is " << ptr;
} else { } else {
auto* ptr = scope->Var(var->Name()); auto* ptr = scope->Var(var->Name());
InitializeVariable(ptr, var->GetType()); InitializeVariable(ptr, var->GetType());
VLOG(3) << "Create Variable " << var->Name() VLOG(30) << "Create Variable " << var->Name()
<< " locally, which pointer is " << ptr; << " locally, which pointer is " << ptr;
} }
} }
} else { } else {
for (auto& var : global_block.AllVars()) { for (auto& var : global_block.AllVars()) {
auto* ptr = scope->Var(var->Name()); auto* ptr = scope->Var(var->Name());
InitializeVariable(ptr, var->GetType()); InitializeVariable(ptr, var->GetType());
VLOG(3) << "Create variable " << var->Name() << ", which pointer is " VLOG(30) << "Create variable " << var->Name() << ", which pointer is "
<< ptr; << ptr;
} }
} }
} }
...@@ -286,7 +286,7 @@ void Executor::Run(const ProgramDesc& program, Scope* scope, ...@@ -286,7 +286,7 @@ void Executor::Run(const ProgramDesc& program, Scope* scope,
int i = 0; int i = 0;
for (auto& feed_target : (*feed_targets)) { for (auto& feed_target : (*feed_targets)) {
std::string var_name = feed_target.first; std::string var_name = feed_target.first;
VLOG(3) << "feed target's name: " << var_name; VLOG(30) << "feed target's name: " << var_name;
// prepend feed op // prepend feed op
auto* op = global_block->PrependOp(); auto* op = global_block->PrependOp();
...@@ -309,7 +309,7 @@ void Executor::Run(const ProgramDesc& program, Scope* scope, ...@@ -309,7 +309,7 @@ void Executor::Run(const ProgramDesc& program, Scope* scope,
int i = 0; int i = 0;
for (auto& fetch_target : (*fetch_targets)) { for (auto& fetch_target : (*fetch_targets)) {
std::string var_name = fetch_target.first; std::string var_name = fetch_target.first;
VLOG(3) << "fetch target's name: " << var_name; VLOG(30) << "fetch target's name: " << var_name;
// append fetch op // append fetch op
auto* op = global_block->AppendOp(); auto* op = global_block->AppendOp();
...@@ -398,8 +398,8 @@ void Executor::RunPreparedContext(ExecutorPrepareContext* ctx, Scope* scope, ...@@ -398,8 +398,8 @@ void Executor::RunPreparedContext(ExecutorPrepareContext* ctx, Scope* scope,
} }
if (FLAGS_benchmark) { if (FLAGS_benchmark) {
VLOG(2) << "Memory used after operator " + op->Type() + " running: " VLOG(20) << "Memory used after operator " + op->Type() + " running: "
<< memory::memory_usage(place_); << memory::memory_usage(place_);
} }
} }
...@@ -424,10 +424,10 @@ void Executor::RunPreparedContext(ExecutorPrepareContext* ctx, Scope* scope, ...@@ -424,10 +424,10 @@ void Executor::RunPreparedContext(ExecutorPrepareContext* ctx, Scope* scope,
} }
if (FLAGS_benchmark) { if (FLAGS_benchmark) {
VLOG(2) << "-------------------------------------------------------"; VLOG(20) << "-------------------------------------------------------";
VLOG(2) << "Memory used after deleting local scope: " VLOG(20) << "Memory used after deleting local scope: "
<< memory::memory_usage(place_); << memory::memory_usage(place_);
VLOG(2) << "-------------------------------------------------------"; VLOG(20) << "-------------------------------------------------------";
} }
} }
...@@ -471,7 +471,7 @@ void Executor::RunPreparedContext( ...@@ -471,7 +471,7 @@ void Executor::RunPreparedContext(
void Executor::EnableMKLDNN(const ProgramDesc& program) { void Executor::EnableMKLDNN(const ProgramDesc& program) {
#ifdef PADDLE_WITH_MKLDNN #ifdef PADDLE_WITH_MKLDNN
VLOG(3) << "use_mkldnn=True"; VLOG(30) << "use_mkldnn=True";
for (size_t bid = 0; bid < program.Size(); ++bid) { for (size_t bid = 0; bid < program.Size(); ++bid) {
auto* block = const_cast<ProgramDesc&>(program).MutableBlock(bid); auto* block = const_cast<ProgramDesc&>(program).MutableBlock(bid);
for (auto* op : block->AllOps()) { for (auto* op : block->AllOps()) {
......
...@@ -25,7 +25,7 @@ void SetFeedVariable(Scope* scope, const LoDTensor& input, ...@@ -25,7 +25,7 @@ void SetFeedVariable(Scope* scope, const LoDTensor& input,
const std::string& var_name, size_t index) { const std::string& var_name, size_t index) {
// If var_name Variable is not found in GlobalScope, a new variable will // If var_name Variable is not found in GlobalScope, a new variable will
// be created. // be created.
VLOG(3) << "SetFeedVariable name=" << var_name << " index=" << index; VLOG(30) << "SetFeedVariable name=" << var_name << " index=" << index;
Variable* g_feed_value = scope->Var(var_name); Variable* g_feed_value = scope->Var(var_name);
auto& feed_inputs = *(g_feed_value->GetMutable<FeedFetchList>()); auto& feed_inputs = *(g_feed_value->GetMutable<FeedFetchList>());
if (index >= feed_inputs.size()) { if (index >= feed_inputs.size()) {
...@@ -47,8 +47,8 @@ LoDTensor& GetFetchVariable(const Scope& scope, const std::string& var_name, ...@@ -47,8 +47,8 @@ LoDTensor& GetFetchVariable(const Scope& scope, const std::string& var_name,
typeid(FeedFetchList).name()); typeid(FeedFetchList).name());
auto& fetch_outputs = *g_fetch_value->GetMutable<FeedFetchList>(); auto& fetch_outputs = *g_fetch_value->GetMutable<FeedFetchList>();
auto& tensor = fetch_outputs[index]; auto& tensor = fetch_outputs[index];
VLOG(3) << "Fetch " << var_name << " with index " << index VLOG(30) << "Fetch " << var_name << " with index " << index
<< " shape= " << tensor.dims(); << " shape= " << tensor.dims();
PADDLE_ENFORCE_LT(index, fetch_outputs.size()); PADDLE_ENFORCE_LT(index, fetch_outputs.size());
return tensor; return tensor;
} }
......
...@@ -147,19 +147,19 @@ void PrepareParameters(Graph* graph, const Param& param) { ...@@ -147,19 +147,19 @@ void PrepareParameters(Graph* graph, const Param& param) {
scope->Var(param.LSTMX)->GetMutable<LoDTensor>(); scope->Var(param.LSTMX)->GetMutable<LoDTensor>();
scope->Var(param.LSTMOUT)->GetMutable<LoDTensor>(); scope->Var(param.LSTMOUT)->GetMutable<LoDTensor>();
#define GATE_W(name__) \ #define GATE_W(name__) \
auto* W_##name__##_w0 = scope->FindVar(#name__ ".w_0"); \ auto* W_##name__##_w0 = scope->FindVar(#name__ ".w_0"); \
auto* W_##name__##_w1 = scope->FindVar(#name__ ".w_1"); \ auto* W_##name__##_w1 = scope->FindVar(#name__ ".w_1"); \
auto* W_##name__##_b0 = scope->FindVar(#name__ ".b_0"); \ auto* W_##name__##_b0 = scope->FindVar(#name__ ".b_0"); \
CHECK_P3(W_##name__##_w0, W_##name__##_w1, W_##name__##_b0); \ CHECK_P3(W_##name__##_w0, W_##name__##_w1, W_##name__##_b0); \
VLOG(4) << #name__ "_w0" \ VLOG(40) << #name__ "_w0" \
<< " shape: " << W_##name__##_w0->Get<LoDTensor>().dims(); \ << " shape: " << W_##name__##_w0->Get<LoDTensor>().dims(); \
VLOG(4) << #name__ "_w1" \ VLOG(40) << #name__ "_w1" \
<< " shape: " << W_##name__##_w1->Get<LoDTensor>().dims(); \ << " shape: " << W_##name__##_w1->Get<LoDTensor>().dims(); \
VLOG(4) << #name__ "_b0" \ VLOG(40) << #name__ "_b0" \
<< " shape: " << W_##name__##_b0->Get<LoDTensor>().dims(); \ << " shape: " << W_##name__##_b0->Get<LoDTensor>().dims(); \
auto& W_##name__##_w0_t = W_##name__##_w0->Get<LoDTensor>(); \ auto& W_##name__##_w0_t = W_##name__##_w0->Get<LoDTensor>(); \
auto& W_##name__##_w1_t = W_##name__##_w1->Get<LoDTensor>(); \ auto& W_##name__##_w1_t = W_##name__##_w1->Get<LoDTensor>(); \
auto& W_##name__##_b0_t = W_##name__##_b0->Get<LoDTensor>(); auto& W_##name__##_b0_t = W_##name__##_b0->Get<LoDTensor>();
GATE_W(forget); GATE_W(forget);
...@@ -208,7 +208,7 @@ void PrepareLSTMWeight(const LoDTensor& W_forget_w0, ...@@ -208,7 +208,7 @@ void PrepareLSTMWeight(const LoDTensor& W_forget_w0,
int D = W_forget_w0.dims()[0]; int D = W_forget_w0.dims()[0];
int M = W_forget_w1.dims()[0]; int M = W_forget_w1.dims()[0];
out->Resize(make_ddim({D + M, 4 * D})); out->Resize(make_ddim({D + M, 4 * D}));
VLOG(3) << "LSTMWeight resized to " << out->dims(); VLOG(30) << "LSTMWeight resized to " << out->dims();
float* out_data = out->mutable_data<float>(platform::CPUPlace()); float* out_data = out->mutable_data<float>(platform::CPUPlace());
std::array<const float*, 4> tensors( std::array<const float*, 4> tensors(
......
...@@ -57,7 +57,7 @@ std::unique_ptr<ir::Graph> ConvBiasFusePass::ApplyImpl( ...@@ -57,7 +57,7 @@ std::unique_ptr<ir::Graph> ConvBiasFusePass::ApplyImpl(
int found_conv_bias_count = 0; int found_conv_bias_count = 0;
auto handler = [&](const GraphPatternDetector::subgraph_t& subgraph, auto handler = [&](const GraphPatternDetector::subgraph_t& subgraph,
Graph* g) { Graph* g) {
VLOG(4) << "handle ConvBias fuse"; VLOG(40) << "handle ConvBias fuse";
GET_IR_NODE_FROM_SUBGRAPH(conv_weight, conv_weight, GET_IR_NODE_FROM_SUBGRAPH(conv_weight, conv_weight,
conv_bias_pattern); // Filter conv_bias_pattern); // Filter
GET_IR_NODE_FROM_SUBGRAPH(conv_out, conv_out, conv_bias_pattern); // tmp GET_IR_NODE_FROM_SUBGRAPH(conv_out, conv_out, conv_bias_pattern); // tmp
...@@ -74,7 +74,7 @@ std::unique_ptr<ir::Graph> ConvBiasFusePass::ApplyImpl( ...@@ -74,7 +74,7 @@ std::unique_ptr<ir::Graph> ConvBiasFusePass::ApplyImpl(
// check if fuse can be done and if MKL-DNN should be used // check if fuse can be done and if MKL-DNN should be used
FuseOptions fuse_option = FindFuseOption(*conv, *eltwise); FuseOptions fuse_option = FindFuseOption(*conv, *eltwise);
if (fuse_option == DO_NOT_FUSE || fuse_option == FUSE_NATIVE) { if (fuse_option == DO_NOT_FUSE || fuse_option == FUSE_NATIVE) {
VLOG(3) << "do not perform conv+bias fuse"; VLOG(30) << "do not perform conv+bias fuse";
return; return;
} }
......
...@@ -121,7 +121,7 @@ std::unique_ptr<ir::Graph> ConvBNFusePass::ApplyImpl( ...@@ -121,7 +121,7 @@ std::unique_ptr<ir::Graph> ConvBNFusePass::ApplyImpl(
int found_conv_bn_count = 0; int found_conv_bn_count = 0;
auto handler = [&](const GraphPatternDetector::subgraph_t& subgraph, auto handler = [&](const GraphPatternDetector::subgraph_t& subgraph,
Graph* g) { Graph* g) {
VLOG(4) << "handle ConvBN fuse"; VLOG(40) << "handle ConvBN fuse";
// conv, batch_norm, // conv, batch_norm,
// conv_weight, conv_out, // conv_weight, conv_out,
...@@ -133,7 +133,7 @@ std::unique_ptr<ir::Graph> ConvBNFusePass::ApplyImpl( ...@@ -133,7 +133,7 @@ std::unique_ptr<ir::Graph> ConvBNFusePass::ApplyImpl(
// check if fuse can be done and if MKL-DNN should be used // check if fuse can be done and if MKL-DNN should be used
FuseOptions fuse_option = FindFuseOption(*conv, *batch_norm); FuseOptions fuse_option = FindFuseOption(*conv, *batch_norm);
if (fuse_option == DO_NOT_FUSE) { if (fuse_option == DO_NOT_FUSE) {
VLOG(3) << "do not perform conv+bn fuse"; VLOG(30) << "do not perform conv+bn fuse";
return; return;
} }
...@@ -241,7 +241,7 @@ std::unique_ptr<ir::Graph> ConvEltwiseAddBNFusePass::ApplyImpl( ...@@ -241,7 +241,7 @@ std::unique_ptr<ir::Graph> ConvEltwiseAddBNFusePass::ApplyImpl(
int found_conv_bn_count = 0; int found_conv_bn_count = 0;
auto handler = [&](const GraphPatternDetector::subgraph_t& subgraph, auto handler = [&](const GraphPatternDetector::subgraph_t& subgraph,
Graph* g) { Graph* g) {
VLOG(4) << "handle ConvBN fuse"; VLOG(40) << "handle ConvBN fuse";
// conv, batch_norm, // conv, batch_norm,
// conv_weight, conv_out, // conv_weight, conv_out,
......
...@@ -38,7 +38,7 @@ std::unique_ptr<ir::Graph> ConvReLUFusePass::ApplyImpl( ...@@ -38,7 +38,7 @@ std::unique_ptr<ir::Graph> ConvReLUFusePass::ApplyImpl(
int found_conv_relu_count = 0; int found_conv_relu_count = 0;
auto handler = [&](const GraphPatternDetector::subgraph_t& subgraph, auto handler = [&](const GraphPatternDetector::subgraph_t& subgraph,
Graph* g) { Graph* g) {
VLOG(4) << "handle ConvReLU fuse"; VLOG(40) << "handle ConvReLU fuse";
GET_IR_NODE_FROM_SUBGRAPH(conv_weight, conv_weight, GET_IR_NODE_FROM_SUBGRAPH(conv_weight, conv_weight,
conv_relu_pattern); // Filter conv_relu_pattern); // Filter
GET_IR_NODE_FROM_SUBGRAPH(conv_out, conv_out, conv_relu_pattern); // tmp GET_IR_NODE_FROM_SUBGRAPH(conv_out, conv_out, conv_relu_pattern); // tmp
...@@ -48,7 +48,7 @@ std::unique_ptr<ir::Graph> ConvReLUFusePass::ApplyImpl( ...@@ -48,7 +48,7 @@ std::unique_ptr<ir::Graph> ConvReLUFusePass::ApplyImpl(
FuseOptions fuse_option = FindFuseOption(*conv, *relu); FuseOptions fuse_option = FindFuseOption(*conv, *relu);
if (fuse_option == DO_NOT_FUSE) { if (fuse_option == DO_NOT_FUSE) {
VLOG(3) << "do not perform conv+relu fuse"; VLOG(30) << "do not perform conv+relu fuse";
return; return;
} }
......
...@@ -39,7 +39,7 @@ std::unique_ptr<ir::Graph> DepthwiseConvMKLDNNPass::ApplyImpl( ...@@ -39,7 +39,7 @@ std::unique_ptr<ir::Graph> DepthwiseConvMKLDNNPass::ApplyImpl(
int found_depthwise_conv_mkldnn_count = 0; int found_depthwise_conv_mkldnn_count = 0;
auto handler = [&](const GraphPatternDetector::subgraph_t& subgraph, auto handler = [&](const GraphPatternDetector::subgraph_t& subgraph,
Graph* g) { Graph* g) {
VLOG(3) << "handle DepthwiseConvMKLDNN fuse"; VLOG(30) << "handle DepthwiseConvMKLDNN fuse";
GET_NODE(depthwise_conv, (*pattern)); GET_NODE(depthwise_conv, (*pattern));
depthwise_conv->Op()->SetType("conv2d"); depthwise_conv->Op()->SetType("conv2d");
found_depthwise_conv_mkldnn_count++; found_depthwise_conv_mkldnn_count++;
......
...@@ -39,7 +39,7 @@ std::unique_ptr<ir::Graph> FCFusePass::ApplyImpl( ...@@ -39,7 +39,7 @@ std::unique_ptr<ir::Graph> FCFusePass::ApplyImpl(
int found_fc_count = 0; int found_fc_count = 0;
auto handler = [&](const GraphPatternDetector::subgraph_t& subgraph, auto handler = [&](const GraphPatternDetector::subgraph_t& subgraph,
Graph* g) { Graph* g) {
VLOG(4) << "handle FC fuse"; VLOG(40) << "handle FC fuse";
GET_IR_NODE_FROM_SUBGRAPH(w, w, fc_pattern); GET_IR_NODE_FROM_SUBGRAPH(w, w, fc_pattern);
GET_IR_NODE_FROM_SUBGRAPH(fc_bias, bias, fc_pattern); GET_IR_NODE_FROM_SUBGRAPH(fc_bias, bias, fc_pattern);
GET_IR_NODE_FROM_SUBGRAPH(fc_out, Out, fc_pattern); GET_IR_NODE_FROM_SUBGRAPH(fc_out, Out, fc_pattern);
......
...@@ -61,7 +61,7 @@ std::unique_ptr<ir::Graph> FuseElewiseAddActPass::FuseElewiseAddAct( ...@@ -61,7 +61,7 @@ std::unique_ptr<ir::Graph> FuseElewiseAddActPass::FuseElewiseAddAct(
auto handler = [&](const GraphPatternDetector::subgraph_t &subgraph, auto handler = [&](const GraphPatternDetector::subgraph_t &subgraph,
Graph *g) { Graph *g) {
VLOG(4) << "handle FuseElewiseAddAct fuse"; VLOG(40) << "handle FuseElewiseAddAct fuse";
GET_IR_NODE_FROM_SUBGRAPH(ele_y, ele_y, elewise_add_act_pattern); GET_IR_NODE_FROM_SUBGRAPH(ele_y, ele_y, elewise_add_act_pattern);
GET_IR_NODE_FROM_SUBGRAPH(ele_out, elewise_add_out, GET_IR_NODE_FROM_SUBGRAPH(ele_out, elewise_add_out,
elewise_add_act_pattern); elewise_add_act_pattern);
...@@ -77,10 +77,10 @@ std::unique_ptr<ir::Graph> FuseElewiseAddActPass::FuseElewiseAddAct( ...@@ -77,10 +77,10 @@ std::unique_ptr<ir::Graph> FuseElewiseAddActPass::FuseElewiseAddAct(
Node *elewise_add_act_node = CreateFuseElewiseAddActNode( Node *elewise_add_act_node = CreateFuseElewiseAddActNode(
g, act, ele_add, ele_x_n, ele_y_n, ele_out_n, act_out_n); g, act, ele_add, ele_x_n, ele_y_n, ele_out_n, act_out_n);
VLOG(4) << "\n\t " << ele_x_n << " and " << ele_y_n << " -> " VLOG(40) << "\n\t " << ele_x_n << " and " << ele_y_n << " -> "
<< ele_add->Name() << " -> " << ele_out_n << "\n" << ele_add->Name() << " -> " << ele_out_n << "\n"
<< "\t " << ele_out_n << " -> " << act->Name() << " -> " << "\t " << ele_out_n << " -> " << act->Name() << " -> "
<< act_out_n; << act_out_n;
ReLinkNodes(g, ele_out, ele_add, act, elewise_add_act_node); ReLinkNodes(g, ele_out, ele_add, act, elewise_add_act_node);
found_elewise_add_act_count++; found_elewise_add_act_count++;
...@@ -113,7 +113,7 @@ std::unique_ptr<ir::Graph> FuseElewiseAddActPass::FuseActElewiseAdd( ...@@ -113,7 +113,7 @@ std::unique_ptr<ir::Graph> FuseElewiseAddActPass::FuseActElewiseAdd(
auto handler = [&](const GraphPatternDetector::subgraph_t &subgraph, auto handler = [&](const GraphPatternDetector::subgraph_t &subgraph,
Graph *g) { Graph *g) {
VLOG(4) << "handle FuseElewiseAddAct fuse"; VLOG(40) << "handle FuseElewiseAddAct fuse";
GET_IR_NODE_FROM_SUBGRAPH(act_out, act_out, act_elewise_add_pattern); GET_IR_NODE_FROM_SUBGRAPH(act_out, act_out, act_elewise_add_pattern);
GET_IR_NODE_FROM_SUBGRAPH(ele_x, ele_x, act_elewise_add_pattern); GET_IR_NODE_FROM_SUBGRAPH(ele_x, ele_x, act_elewise_add_pattern);
GET_IR_NODE_FROM_SUBGRAPH(ele_out, elewise_add_out, GET_IR_NODE_FROM_SUBGRAPH(ele_out, elewise_add_out,
...@@ -129,9 +129,9 @@ std::unique_ptr<ir::Graph> FuseElewiseAddActPass::FuseActElewiseAdd( ...@@ -129,9 +129,9 @@ std::unique_ptr<ir::Graph> FuseElewiseAddActPass::FuseActElewiseAdd(
Node *elewise_add_act_node = CreateFuseElewiseAddActNode( Node *elewise_add_act_node = CreateFuseElewiseAddActNode(
g, ele_add, act, elewise_add_x_n, act_i_n, act_o_n, elewise_add_out_n); g, ele_add, act, elewise_add_x_n, act_i_n, act_o_n, elewise_add_out_n);
VLOG(4) << "\n\t " << act_i_n << " -> " << act->Name() << " -> " << act_o_n VLOG(40) << "\n\t " << act_i_n << " -> " << act->Name() << " -> " << act_o_n
<< "\n\t " << act_o_n << " and " << elewise_add_x_n << " -> " << "\n\t " << act_o_n << " and " << elewise_add_x_n << " -> "
<< ele_add->Name() << " -> " << elewise_add_out_n; << ele_add->Name() << " -> " << elewise_add_out_n;
ReLinkNodes(g, act_out, act, ele_add, elewise_add_act_node); ReLinkNodes(g, act_out, act, ele_add, elewise_add_act_node);
found_elewise_add_act_count++; found_elewise_add_act_count++;
...@@ -165,7 +165,7 @@ std::unique_ptr<ir::Graph> FuseElewiseAddActPass::FuseElewiseAddActInplaceGrad( ...@@ -165,7 +165,7 @@ std::unique_ptr<ir::Graph> FuseElewiseAddActPass::FuseElewiseAddActInplaceGrad(
auto handler = [&](const GraphPatternDetector::subgraph_t &subgraph, auto handler = [&](const GraphPatternDetector::subgraph_t &subgraph,
Graph *g) { Graph *g) {
VLOG(4) << "handle FuseElewiseAddActGrad1 fuse"; VLOG(40) << "handle FuseElewiseAddActGrad1 fuse";
GET_IR_NODE_FROM_SUBGRAPH(act_out, act_out, elewise_add_act_grad_pattern); GET_IR_NODE_FROM_SUBGRAPH(act_out, act_out, elewise_add_act_grad_pattern);
GET_IR_NODE_FROM_SUBGRAPH(act_grad, act_grad, elewise_add_act_grad_pattern); GET_IR_NODE_FROM_SUBGRAPH(act_grad, act_grad, elewise_add_act_grad_pattern);
GET_IR_NODE_FROM_SUBGRAPH(d_itermediate_out, d_itermediate_out, GET_IR_NODE_FROM_SUBGRAPH(d_itermediate_out, d_itermediate_out,
...@@ -208,10 +208,10 @@ std::unique_ptr<ir::Graph> FuseElewiseAddActPass::FuseElewiseAddActInplaceGrad( ...@@ -208,10 +208,10 @@ std::unique_ptr<ir::Graph> FuseElewiseAddActPass::FuseElewiseAddActInplaceGrad(
auto fused_node = g->CreateOpNode(&desc); auto fused_node = g->CreateOpNode(&desc);
VLOG(4) << "\n\t " << d_act_out_n << " and " << act_out_n << " -> " VLOG(40) << "\n\t " << d_act_out_n << " and " << act_out_n << " -> "
<< act_grad->Name() << " -> " << d_itermediate_out_n << "\n\t " << act_grad->Name() << " -> " << d_itermediate_out_n << "\n\t "
<< d_itermediate_out_n << " and " << act_out_n << " -> " << d_itermediate_out_n << " and " << act_out_n << " -> "
<< ele_add_grad->Name() << " -> " << d_itermediate_out_n; << ele_add_grad->Name() << " -> " << d_itermediate_out_n;
ReLinkNodes(g, d_itermediate_out, act_grad, ele_add_grad, fused_node); ReLinkNodes(g, d_itermediate_out, act_grad, ele_add_grad, fused_node);
found_elewise_add_act_count++; found_elewise_add_act_count++;
......
...@@ -92,7 +92,7 @@ Graph::Graph(const ProgramDesc &program) : program_(program) { ...@@ -92,7 +92,7 @@ Graph::Graph(const ProgramDesc &program) : program_(program) {
std::map<std::string, std::vector<ir::Node *>> Graph::InitFromProgram( std::map<std::string, std::vector<ir::Node *>> Graph::InitFromProgram(
const ProgramDesc &program) { const ProgramDesc &program) {
VLOG(3) << "block in program:" << program_.Size(); VLOG(30) << "block in program:" << program_.Size();
std::unordered_map<std::string, VarDesc *> all_vars; std::unordered_map<std::string, VarDesc *> all_vars;
// var nodes for each var name, will have multiple versions in SSA // var nodes for each var name, will have multiple versions in SSA
std::map<std::string, std::vector<ir::Node *>> var_nodes; std::map<std::string, std::vector<ir::Node *>> var_nodes;
...@@ -160,7 +160,7 @@ void Graph::ResolveHazard( ...@@ -160,7 +160,7 @@ void Graph::ResolveHazard(
auto it_old = versions.rbegin(); auto it_old = versions.rbegin();
++it_old; ++it_old;
for (; it_old != versions.rend(); it_new = it_old, ++it_old) { for (; it_old != versions.rend(); it_new = it_old, ++it_old) {
VLOG(3) << "deal with var: " << (*it_new)->Name(); VLOG(30) << "deal with var: " << (*it_new)->Name();
ir::Node *write_op = ir::Node *write_op =
(*it_new)->inputs.empty() ? nullptr : (*it_new)->inputs[0]; (*it_new)->inputs.empty() ? nullptr : (*it_new)->inputs[0];
const auto &read_ops = (*it_old)->outputs; const auto &read_ops = (*it_old)->outputs;
......
...@@ -89,7 +89,7 @@ class Graph { ...@@ -89,7 +89,7 @@ class Graph {
attr_name); attr_name);
attrs_[attr_name] = attr; attrs_[attr_name] = attr;
attr_dels_[attr_name] = [attr, attr_name]() { attr_dels_[attr_name] = [attr, attr_name]() {
VLOG(3) << "deleting " << attr_name; VLOG(30) << "deleting " << attr_name;
delete attr; delete attr;
}; };
} }
......
...@@ -33,8 +33,9 @@ void SortHelper( ...@@ -33,8 +33,9 @@ void SortHelper(
} }
} }
VLOG(3) << "topology sort insert: " << node->Name() VLOG(30) << "topology sort insert: " << node->Name()
<< reinterpret_cast<void *>(node) << " input " << node->inputs.size(); << reinterpret_cast<void *>(node) << " input "
<< node->inputs.size();
ret->push_back(node); ret->push_back(node);
} }
...@@ -103,9 +104,9 @@ std::map<ir::Node *, std::unordered_set<ir::Node *>> BuildOperationAdjList( ...@@ -103,9 +104,9 @@ std::map<ir::Node *, std::unordered_set<ir::Node *>> BuildOperationAdjList(
for (auto &var : n->inputs) { for (auto &var : n->inputs) {
for (auto &adj_n : var->inputs) { for (auto &adj_n : var->inputs) {
PADDLE_ENFORCE(adj_n->NodeType() == ir::Node::Type::kOperation); PADDLE_ENFORCE(adj_n->NodeType() == ir::Node::Type::kOperation);
VLOG(4) << "adj " << adj_n->Name() << reinterpret_cast<void *>(adj_n) VLOG(40) << "adj " << adj_n->Name() << reinterpret_cast<void *>(adj_n)
<< " -> " << n->Name() << reinterpret_cast<void *>(n) << " -> " << n->Name() << reinterpret_cast<void *>(n)
<< " via " << var->Name() << reinterpret_cast<void *>(var); << " via " << var->Name() << reinterpret_cast<void *>(var);
adj_list[n].insert(adj_n); adj_list[n].insert(adj_n);
} }
} }
...@@ -163,10 +164,10 @@ size_t GraphNum(const Graph &graph) { ...@@ -163,10 +164,10 @@ size_t GraphNum(const Graph &graph) {
graph_nodes.emplace_back(g_nodes); graph_nodes.emplace_back(g_nodes);
} }
if (VLOG_IS_ON(10)) { if (VLOG_IS_ON(100)) {
VLOG(10) << "graph_num: " << graph_nodes.size(); VLOG(100) << "graph_num: " << graph_nodes.size();
for (auto &g_n : graph_nodes) { for (auto &g_n : graph_nodes) {
VLOG(10) << "graph_nodes: " << g_n.size(); VLOG(100) << "graph_nodes: " << g_n.size();
if (g_n.size() < 10) { if (g_n.size() < 10) {
std::stringstream out; std::stringstream out;
for (auto &node : g_n) { for (auto &node : g_n) {
...@@ -180,7 +181,7 @@ size_t GraphNum(const Graph &graph) { ...@@ -180,7 +181,7 @@ size_t GraphNum(const Graph &graph) {
} }
out << "]"; out << "]";
} }
VLOG(10) << out.str(); VLOG(100) << out.str();
} }
} }
} }
......
...@@ -12,6 +12,7 @@ ...@@ -12,6 +12,7 @@
// See the License for the specific language governing permissions and // See the License for the specific language governing permissions and
// limitations under the License. // limitations under the License.
#include <algorithm>
#include <array> #include <array>
#include <string> #include <string>
#include <vector> #include <vector>
...@@ -91,19 +92,19 @@ void GraphPatternDetector::operator()(Graph *graph, ...@@ -91,19 +92,19 @@ void GraphPatternDetector::operator()(Graph *graph,
PrettyLogEndl(Style::detail(), "--- detect %d subgraphs", subgraphs.size()); PrettyLogEndl(Style::detail(), "--- detect %d subgraphs", subgraphs.size());
int id = 0; int id = 0;
for (auto &g : subgraphs) { for (auto &g : subgraphs) {
VLOG(3) << "optimizing #" << id++ << " subgraph"; VLOG(30) << "optimizing #" << id++ << " subgraph";
handler(g, graph); handler(g, graph);
} }
} }
bool GraphPatternDetector::MarkPDNodesInGraph(const ir::Graph &graph) { bool GraphPatternDetector::MarkPDNodesInGraph(const ir::Graph &graph) {
VLOG(3) << "mark pdnodes in graph"; VLOG(30) << "mark pdnodes in graph";
if (graph.Nodes().empty()) return false; if (graph.Nodes().empty()) return false;
for (auto &node : GraphTraits::DFS(graph)) { for (auto &node : GraphTraits::DFS(graph)) {
for (const auto &pdnode : pattern_.nodes()) { for (const auto &pdnode : pattern_.nodes()) {
if (pdnode->Tell(&node)) { if (pdnode->Tell(&node)) {
VLOG(4) << "pdnode " << pdnode->name() << " marked"; VLOG(40) << "pdnode " << pdnode->name() << " marked";
pdnodes2nodes_[pdnode.get()].insert(&node); pdnodes2nodes_[pdnode.get()].insert(&node);
} }
} }
...@@ -111,7 +112,7 @@ bool GraphPatternDetector::MarkPDNodesInGraph(const ir::Graph &graph) { ...@@ -111,7 +112,7 @@ bool GraphPatternDetector::MarkPDNodesInGraph(const ir::Graph &graph) {
// Check to early stop if some PDNode can't find matched Node. // Check to early stop if some PDNode can't find matched Node.
for (auto &pdnode : pattern_.nodes()) { for (auto &pdnode : pattern_.nodes()) {
if (!pdnodes2nodes_.count(pdnode.get())) { if (!pdnodes2nodes_.count(pdnode.get())) {
VLOG(4) << pdnode->name() << " can't find matched Node, early stop"; VLOG(40) << pdnode->name() << " can't find matched Node, early stop";
// return false; // return false;
} }
} }
...@@ -120,7 +121,7 @@ bool GraphPatternDetector::MarkPDNodesInGraph(const ir::Graph &graph) { ...@@ -120,7 +121,7 @@ bool GraphPatternDetector::MarkPDNodesInGraph(const ir::Graph &graph) {
GetMarkedNodes(const_cast<Graph *>(&graph)).insert(n); GetMarkedNodes(const_cast<Graph *>(&graph)).insert(n);
} }
} }
VLOG(3) << pdnodes2nodes_.size() << " nodes marked"; VLOG(30) << pdnodes2nodes_.size() << " nodes marked";
return !pdnodes2nodes_.empty(); return !pdnodes2nodes_.empty();
} }
...@@ -213,7 +214,7 @@ GraphPatternDetector::DetectPatterns() { ...@@ -213,7 +214,7 @@ GraphPatternDetector::DetectPatterns() {
// Extend a PDNode to subgraphs by deducing the connection relations defined // Extend a PDNode to subgraphs by deducing the connection relations defined
// in edges of PDNodes. // in edges of PDNodes.
for (const auto &edge : pattern_.edges()) { for (const auto &edge : pattern_.edges()) {
VLOG(4) << "check " << edge.first->name() << " -> " << edge.second->name(); VLOG(40) << "check " << edge.first->name() << " -> " << edge.second->name();
// TODO(Superjomn) Fix bug here, the groups might be duplicate here. // TODO(Superjomn) Fix bug here, the groups might be duplicate here.
// Each role has two PDNodes, which indicates two roles. // Each role has two PDNodes, which indicates two roles.
// Detect two Nodes that can match these two roles and they are connected. // Detect two Nodes that can match these two roles and they are connected.
...@@ -224,7 +225,7 @@ GraphPatternDetector::DetectPatterns() { ...@@ -224,7 +225,7 @@ GraphPatternDetector::DetectPatterns() {
// source -> target // source -> target
for (Node *source : pdnodes2nodes_[edge.first]) { for (Node *source : pdnodes2nodes_[edge.first]) {
for (Node *target : pdnodes2nodes_[edge.second]) { for (Node *target : pdnodes2nodes_[edge.second]) {
VLOG(8) << "check " << source->id() << " -- " << target->id(); VLOG(80) << "check " << source->id() << " -- " << target->id();
// TODO(Superjomn) add some prune strategies. // TODO(Superjomn) add some prune strategies.
for (const auto &group : pre_groups) { for (const auto &group : pre_groups) {
HitGroup new_group = group; HitGroup new_group = group;
...@@ -240,12 +241,13 @@ GraphPatternDetector::DetectPatterns() { ...@@ -240,12 +241,13 @@ GraphPatternDetector::DetectPatterns() {
} }
} }
} }
VLOG(3) << "step " << step << " get records: " << cur_groups.size(); VLOG(30) << "step " << step << " get records: " << cur_groups.size();
for (auto &group : cur_groups) { for (auto &group : cur_groups) {
for (auto &item : group.roles) { for (auto &item : group.roles) {
VLOG(4) << "node " << item.second->id() << " as " << item.first->name(); VLOG(40) << "node " << item.second->id() << " as "
<< item.first->name();
} }
VLOG(4) << "========================================================="; VLOG(40) << "=========================================================";
} }
} }
......
...@@ -41,7 +41,7 @@ std::string FormatName(const Node* node) { ...@@ -41,7 +41,7 @@ std::string FormatName(const Node* node) {
std::unique_ptr<ir::Graph> GraphVizPass::ApplyImpl( std::unique_ptr<ir::Graph> GraphVizPass::ApplyImpl(
std::unique_ptr<ir::Graph> graph) const { std::unique_ptr<ir::Graph> graph) const {
const std::string graph_viz_path = Get<std::string>(kGraphVizPath); const std::string graph_viz_path = Get<std::string>(kGraphVizPath);
VLOG(3) << "draw IR graph viz to " << graph_viz_path; VLOG(30) << "draw IR graph viz to " << graph_viz_path;
std::unique_ptr<std::ostream> fout(new std::ofstream(graph_viz_path)); std::unique_ptr<std::ostream> fout(new std::ofstream(graph_viz_path));
PADDLE_ENFORCE(fout->good()); PADDLE_ENFORCE(fout->good());
std::ostream& sout = *fout; std::ostream& sout = *fout;
......
...@@ -20,7 +20,7 @@ namespace ir { ...@@ -20,7 +20,7 @@ namespace ir {
std::unique_ptr<ir::Graph> MKLDNNPlacementPass::ApplyImpl( std::unique_ptr<ir::Graph> MKLDNNPlacementPass::ApplyImpl(
std::unique_ptr<ir::Graph> graph) const { std::unique_ptr<ir::Graph> graph) const {
VLOG(3) << "Aplies MKL-DNN placement strategy."; VLOG(30) << "Aplies MKL-DNN placement strategy.";
for (const Node* n : graph->Nodes()) { for (const Node* n : graph->Nodes()) {
if (n->IsOp() && n->Op()->HasAttr("use_mkldnn")) { if (n->IsOp() && n->Op()->HasAttr("use_mkldnn")) {
n->Op()->SetAttr("use_mkldnn", true); n->Op()->SetAttr("use_mkldnn", true);
......
...@@ -62,7 +62,7 @@ VarDesc UpdateGradVarDesc( ...@@ -62,7 +62,7 @@ VarDesc UpdateGradVarDesc(
string::Sprintf("%s.repeat.%d", var_desc->Name(), repeat); string::Sprintf("%s.repeat.%d", var_desc->Name(), repeat);
VarDesc repeated_var = CopyVarDesc(var_desc); VarDesc repeated_var = CopyVarDesc(var_desc);
repeated_var.SetName(new_gname); repeated_var.SetName(new_gname);
VLOG(3) << "update " << var_desc->Name() << " to repeat " << repeat; VLOG(30) << "update " << var_desc->Name() << " to repeat " << repeat;
return repeated_var; return repeated_var;
} }
return *var_desc; return *var_desc;
...@@ -78,7 +78,7 @@ std::unique_ptr<Graph> BatchMergePass::ApplyImpl( ...@@ -78,7 +78,7 @@ std::unique_ptr<Graph> BatchMergePass::ApplyImpl(
std::vector<ir::Node*> nodes = TopologySortOperations(*graph); std::vector<ir::Node*> nodes = TopologySortOperations(*graph);
auto origin_nodes = graph->ReleaseNodes(); auto origin_nodes = graph->ReleaseNodes();
VLOG(3) << "origin nodes count: " << origin_nodes.size(); VLOG(30) << "origin nodes count: " << origin_nodes.size();
ir::Graph& result = *graph; ir::Graph& result = *graph;
// 1. record op nodes of different roles // 1. record op nodes of different roles
...@@ -137,8 +137,8 @@ std::unique_ptr<Graph> BatchMergePass::ApplyImpl( ...@@ -137,8 +137,8 @@ std::unique_ptr<Graph> BatchMergePass::ApplyImpl(
"%s.repeat.%d", repeated_op.Input("Variance")[0], i); "%s.repeat.%d", repeated_op.Input("Variance")[0], i);
bn_vars_need_rename.insert(repeated_op.Input("Mean")[0]); bn_vars_need_rename.insert(repeated_op.Input("Mean")[0]);
bn_vars_need_rename.insert(repeated_op.Input("Variance")[0]); bn_vars_need_rename.insert(repeated_op.Input("Variance")[0]);
VLOG(3) << "renaming " << repeated_op.Input("Mean")[0] << " to " VLOG(30) << "renaming " << repeated_op.Input("Mean")[0] << " to "
<< new_mean_name; << new_mean_name;
repeated_op.RenameInput(repeated_op.Input("Mean")[0], new_mean_name); repeated_op.RenameInput(repeated_op.Input("Mean")[0], new_mean_name);
repeated_op.RenameInput(repeated_op.Input("Variance")[0], new_var_name); repeated_op.RenameInput(repeated_op.Input("Variance")[0], new_var_name);
repeated_op.RenameOutput(repeated_op.Output("MeanOut")[0], repeated_op.RenameOutput(repeated_op.Output("MeanOut")[0],
......
...@@ -76,7 +76,7 @@ class Pass { ...@@ -76,7 +76,7 @@ class Pass {
attr_name); attr_name);
attrs_[attr_name] = attr; attrs_[attr_name] = attr;
attr_dels_[attr_name] = [attr, attr_name]() { attr_dels_[attr_name] = [attr, attr_name]() {
VLOG(3) << "deleting " << attr_name; VLOG(30) << "deleting " << attr_name;
delete attr; delete attr;
}; };
} }
......
...@@ -12,10 +12,13 @@ ...@@ -12,10 +12,13 @@
// See the License for the specific language governing permissions and // See the License for the specific language governing permissions and
// limitations under the License. // limitations under the License.
#include "paddle/fluid/framework/ir/seq_concat_fc_fuse_pass.h" #include <set>
#include <string>
#include "paddle/fluid/framework/ir/fuse_pass_base.h" #include "paddle/fluid/framework/ir/fuse_pass_base.h"
#include "paddle/fluid/framework/ir/graph_pattern_detector.h" #include "paddle/fluid/framework/ir/graph_pattern_detector.h"
#include "paddle/fluid/framework/ir/graph_viz_pass.h" #include "paddle/fluid/framework/ir/graph_viz_pass.h"
#include "paddle/fluid/framework/ir/seq_concat_fc_fuse_pass.h"
#include "paddle/fluid/framework/lod_tensor.h" #include "paddle/fluid/framework/lod_tensor.h"
namespace paddle { namespace paddle {
...@@ -159,10 +162,7 @@ PDNode* BuildFCPattern(PDPattern* pattern, PDNode* fc_x) { ...@@ -159,10 +162,7 @@ PDNode* BuildFCPattern(PDPattern* pattern, PDNode* fc_x) {
std::set<std::string> acts({"sigmoid", "tanh", "relu", "identity"}); std::set<std::string> acts({"sigmoid", "tanh", "relu", "identity"});
PDNode* act = pattern->NewNode( PDNode* act = pattern->NewNode(
[=](Node* x) { [=](Node* x) { return x && x->IsOp() && acts.count(x->Op()->Type()); },
return x && x->IsOp() && acts.count(x->Op()->Type());
},
"act"); "act");
PDNode* fc_out = pattern->NewNode( PDNode* fc_out = pattern->NewNode(
...@@ -196,7 +196,7 @@ std::unique_ptr<ir::Graph> SeqConcatFcFusePass::ApplyImpl( ...@@ -196,7 +196,7 @@ std::unique_ptr<ir::Graph> SeqConcatFcFusePass::ApplyImpl(
detector(graph.get(), [&](const GraphPatternDetector::subgraph_t& subgraph, detector(graph.get(), [&](const GraphPatternDetector::subgraph_t& subgraph,
Graph* graph) { Graph* graph) {
VLOG(4) << "get one concat pattern"; VLOG(40) << "get one concat pattern";
// fc // fc
GET_NODE(fc_w, detector.pattern()); GET_NODE(fc_w, detector.pattern());
GET_NODE(fc_bias, detector.pattern()); GET_NODE(fc_bias, detector.pattern());
......
...@@ -60,7 +60,7 @@ int BuildFusion(Graph* graph, const std::string& name_scope, Scope* scope) { ...@@ -60,7 +60,7 @@ int BuildFusion(Graph* graph, const std::string& name_scope, Scope* scope) {
auto handler = [&](const GraphPatternDetector::subgraph_t& subgraph, auto handler = [&](const GraphPatternDetector::subgraph_t& subgraph,
Graph* g) { Graph* g) {
VLOG(4) << "handle SeqConv EltAdd Relu fuse"; VLOG(40) << "handle SeqConv EltAdd Relu fuse";
GET_IR_NODE_FROM_SUBGRAPH(seqconv, seqconv, fuse_pattern); GET_IR_NODE_FROM_SUBGRAPH(seqconv, seqconv, fuse_pattern);
GET_IR_NODE_FROM_SUBGRAPH(seqconv_weight, seqconv_weight, fuse_pattern); GET_IR_NODE_FROM_SUBGRAPH(seqconv_weight, seqconv_weight, fuse_pattern);
GET_IR_NODE_FROM_SUBGRAPH(seqconv_out, seqconv_out, fuse_pattern); GET_IR_NODE_FROM_SUBGRAPH(seqconv_out, seqconv_out, fuse_pattern);
......
...@@ -31,7 +31,7 @@ void LoDRankTable::Reset(const LoD& lod, size_t level) { ...@@ -31,7 +31,7 @@ void LoDRankTable::Reset(const LoD& lod, size_t level) {
TableItem item; TableItem item;
item.index = i; item.index = i;
item.length = vec[i + 1] - vec[i]; item.length = vec[i + 1] - vec[i];
VLOG(10) << "Add item to rank table " << item.index << " " << item.length; VLOG(100) << "Add item to rank table " << item.index << " " << item.length;
items_.emplace_back(item); items_.emplace_back(item);
} }
// NOTE(yuyang18): // NOTE(yuyang18):
......
...@@ -51,7 +51,7 @@ TEST(mixed_vector, InitWithCount) { ...@@ -51,7 +51,7 @@ TEST(mixed_vector, InitWithCount) {
TEST(mixed_vector, ForEach) { TEST(mixed_vector, ForEach) {
vec<int> tmp; vec<int> tmp;
for (auto& v : tmp) { for (auto& v : tmp) {
VLOG(3) << v; VLOG(30) << v;
} }
} }
......
...@@ -71,7 +71,7 @@ void NaiveExecutor::Prepare(Scope *parent_scope, ...@@ -71,7 +71,7 @@ void NaiveExecutor::Prepare(Scope *parent_scope,
void NaiveExecutor::Run() { void NaiveExecutor::Run() {
for (auto &op : ops_) { for (auto &op : ops_) {
VLOG(4) << "run " << op->Type(); VLOG(40) << "run " << op->Type();
op->Run(*scope_, place_); op->Run(*scope_, place_);
} }
} }
...@@ -95,21 +95,21 @@ void NaiveExecutor::CreateVariables(const ProgramDesc &desc, Scope *scope, ...@@ -95,21 +95,21 @@ void NaiveExecutor::CreateVariables(const ProgramDesc &desc, Scope *scope,
if (var->Persistable()) { if (var->Persistable()) {
auto *ptr = const_cast<Scope *>(ancestor_scope)->Var(var->Name()); auto *ptr = const_cast<Scope *>(ancestor_scope)->Var(var->Name());
InitializeVariable(ptr, var->GetType()); InitializeVariable(ptr, var->GetType());
VLOG(3) << "Create Variable " << var->Name() VLOG(30) << "Create Variable " << var->Name()
<< " global, which pointer is " << ptr; << " global, which pointer is " << ptr;
} else { // Create temporary variables in local scope. } else { // Create temporary variables in local scope.
auto *ptr = scope->Var(var->Name()); auto *ptr = scope->Var(var->Name());
InitializeVariable(ptr, var->GetType()); InitializeVariable(ptr, var->GetType());
VLOG(3) << "Create Variable " << var->Name() VLOG(30) << "Create Variable " << var->Name()
<< " locally, which pointer is " << ptr; << " locally, which pointer is " << ptr;
} }
} }
} else { } else {
for (auto &var : global_block.AllVars()) { for (auto &var : global_block.AllVars()) {
auto *ptr = scope->Var(var->Name()); auto *ptr = scope->Var(var->Name());
InitializeVariable(ptr, var->GetType()); InitializeVariable(ptr, var->GetType());
VLOG(3) << "Create variable " << var->Name() << ", which pointer is " VLOG(30) << "Create variable " << var->Name() << ", which pointer is "
<< ptr; << ptr;
} }
} }
} }
......
...@@ -82,7 +82,7 @@ class CompileTimeInferShapeContext : public InferShapeContext { ...@@ -82,7 +82,7 @@ class CompileTimeInferShapeContext : public InferShapeContext {
auto *in_var = block_.FindVarRecursive(Inputs(in)[i]); auto *in_var = block_.FindVarRecursive(Inputs(in)[i]);
auto *out_var = block_.FindVarRecursive(Outputs(out)[j]); auto *out_var = block_.FindVarRecursive(Outputs(out)[j]);
if (in_var->GetType() != proto::VarType::LOD_TENSOR) { if (in_var->GetType() != proto::VarType::LOD_TENSOR) {
VLOG(3) << "input " << in << " is not LodTensor"; VLOG(30) << "input " << in << " is not LodTensor";
return; return;
} }
out_var->SetLoDLevel(in_var->GetLoDLevel()); out_var->SetLoDLevel(in_var->GetLoDLevel());
...@@ -241,32 +241,32 @@ void OpDesc::SetAttr(const std::string &name, const Attribute &v) { ...@@ -241,32 +241,32 @@ void OpDesc::SetAttr(const std::string &name, const Attribute &v) {
const proto::OpProto::Attr &attr = GetProtoAttr(name); const proto::OpProto::Attr &attr = GetProtoAttr(name);
switch (attr.type()) { switch (attr.type()) {
case proto::AttrType::BOOLEANS: { case proto::AttrType::BOOLEANS: {
VLOG(11) << "SetAttr: " << Type() << ", " << name VLOG(110) << "SetAttr: " << Type() << ", " << name
<< " from INTS to BOOLEANS"; << " from INTS to BOOLEANS";
this->attrs_[name] = std::vector<bool>(); this->attrs_[name] = std::vector<bool>();
break; break;
} }
case proto::AttrType::INTS: { case proto::AttrType::INTS: {
VLOG(11) << "SetAttr: " << Type() << ", " << name VLOG(110) << "SetAttr: " << Type() << ", " << name
<< " from INTS to INTS"; << " from INTS to INTS";
this->attrs_[name] = std::vector<int>(); this->attrs_[name] = std::vector<int>();
break; break;
} }
case proto::AttrType::FLOATS: { case proto::AttrType::FLOATS: {
VLOG(11) << "SetAttr: " << Type() << ", " << name VLOG(110) << "SetAttr: " << Type() << ", " << name
<< " from INTS to FLOATS"; << " from INTS to FLOATS";
this->attrs_[name] = std::vector<float>(); this->attrs_[name] = std::vector<float>();
break; break;
} }
case proto::AttrType::STRINGS: { case proto::AttrType::STRINGS: {
VLOG(11) << "SetAttr: " << Type() << ", " << name VLOG(110) << "SetAttr: " << Type() << ", " << name
<< " from INTS to STRINGS"; << " from INTS to STRINGS";
this->attrs_[name] = std::vector<std::string>(); this->attrs_[name] = std::vector<std::string>();
break; break;
} }
case proto::AttrType::BLOCKS: { case proto::AttrType::BLOCKS: {
VLOG(11) << "SetAttr: " << Type() << ", " << name VLOG(110) << "SetAttr: " << Type() << ", " << name
<< " from INTS to BLOCKS"; << " from INTS to BLOCKS";
this->SetBlocksAttr(name, std::vector<BlockDesc *>()); this->SetBlocksAttr(name, std::vector<BlockDesc *>());
return; return;
} }
...@@ -499,13 +499,13 @@ void OpDesc::CheckAttrs() { ...@@ -499,13 +499,13 @@ void OpDesc::CheckAttrs() {
} }
void OpDesc::InferShape(const BlockDesc &block) const { void OpDesc::InferShape(const BlockDesc &block) const {
VLOG(3) << "CompileTime infer shape on " << Type(); VLOG(30) << "CompileTime infer shape on " << Type();
InitInferShapeFuncs(); InitInferShapeFuncs();
auto &infer_shape = OpInfoMap::Instance().Get(this->Type()).infer_shape_; auto &infer_shape = OpInfoMap::Instance().Get(this->Type()).infer_shape_;
PADDLE_ENFORCE(static_cast<bool>(infer_shape), PADDLE_ENFORCE(static_cast<bool>(infer_shape),
"%s's infer_shape has not been registered", this->Type()); "%s's infer_shape has not been registered", this->Type());
CompileTimeInferShapeContext ctx(*this, block); CompileTimeInferShapeContext ctx(*this, block);
if (VLOG_IS_ON(10)) { if (VLOG_IS_ON(100)) {
std::ostringstream sout; std::ostringstream sout;
auto inames = this->InputArgumentNames(); auto inames = this->InputArgumentNames();
sout << " From ["; sout << " From [";
...@@ -516,7 +516,7 @@ void OpDesc::InferShape(const BlockDesc &block) const { ...@@ -516,7 +516,7 @@ void OpDesc::InferShape(const BlockDesc &block) const {
std::copy(onames.begin(), onames.end(), std::copy(onames.begin(), onames.end(),
std::ostream_iterator<std::string>(sout, ", ")); std::ostream_iterator<std::string>(sout, ", "));
sout << "]"; sout << "]";
VLOG(10) << sout.str(); VLOG(100) << sout.str();
} }
infer_shape(&ctx); infer_shape(&ctx);
} }
...@@ -607,7 +607,7 @@ DDim CompileTimeInferShapeContext::GetDim(const std::string &name) const { ...@@ -607,7 +607,7 @@ DDim CompileTimeInferShapeContext::GetDim(const std::string &name) const {
auto shape = var->GetShape(); auto shape = var->GetShape();
res = shape.empty() ? make_ddim({0UL}) : make_ddim(shape); res = shape.empty() ? make_ddim({0UL}) : make_ddim(shape);
} catch (...) { } catch (...) {
VLOG(5) << "GetDim of variable " << name << " error"; VLOG(50) << "GetDim of variable " << name << " error";
std::rethrow_exception(std::current_exception()); std::rethrow_exception(std::current_exception());
} }
return res; return res;
...@@ -624,7 +624,7 @@ std::vector<DDim> CompileTimeInferShapeContext::GetRepeatedDims( ...@@ -624,7 +624,7 @@ std::vector<DDim> CompileTimeInferShapeContext::GetRepeatedDims(
res.push_back(s.empty() ? make_ddim({0UL}) : make_ddim(s)); res.push_back(s.empty() ? make_ddim({0UL}) : make_ddim(s));
} }
} catch (...) { } catch (...) {
VLOG(5) << "GetRepeatedDim of variable " << name << " error."; VLOG(50) << "GetRepeatedDim of variable " << name << " error.";
std::rethrow_exception(std::current_exception()); std::rethrow_exception(std::current_exception());
} }
return res; return res;
......
...@@ -46,9 +46,9 @@ static VariableNameMap ConvertOpDescVarsToVarNameMap( ...@@ -46,9 +46,9 @@ static VariableNameMap ConvertOpDescVarsToVarNameMap(
std::unique_ptr<OperatorBase> OpRegistry::CreateOp( std::unique_ptr<OperatorBase> OpRegistry::CreateOp(
const proto::OpDesc& op_desc) { const proto::OpDesc& op_desc) {
VLOG(1) << "CreateOp directly from OpDesc is deprecated. It should only be" VLOG(10) << "CreateOp directly from OpDesc is deprecated. It should only be"
"used in unit tests. Use CreateOp(const OpDesc& op_desc) " "used in unit tests. Use CreateOp(const OpDesc& op_desc) "
"instead."; "instead.";
VariableNameMap inputs = ConvertOpDescVarsToVarNameMap(op_desc.inputs()); VariableNameMap inputs = ConvertOpDescVarsToVarNameMap(op_desc.inputs());
VariableNameMap outputs = ConvertOpDescVarsToVarNameMap(op_desc.outputs()); VariableNameMap outputs = ConvertOpDescVarsToVarNameMap(op_desc.outputs());
AttributeMap attrs; AttributeMap attrs;
......
...@@ -140,7 +140,7 @@ static LoD GetLoD(const Scope& scope, const std::string& name) { ...@@ -140,7 +140,7 @@ static LoD GetLoD(const Scope& scope, const std::string& name) {
} }
void OperatorBase::Run(const Scope& scope, const platform::Place& place) { void OperatorBase::Run(const Scope& scope, const platform::Place& place) {
VLOG(4) << place << " " << DebugStringEx(&scope); VLOG(40) << place << " " << DebugStringEx(&scope);
if (platform::is_gpu_place(place)) { if (platform::is_gpu_place(place)) {
#ifndef PADDLE_WITH_CUDA #ifndef PADDLE_WITH_CUDA
PADDLE_THROW("Cannot run operator on place %s", place); PADDLE_THROW("Cannot run operator on place %s", place);
...@@ -160,7 +160,7 @@ void OperatorBase::Run(const Scope& scope, const platform::Place& place) { ...@@ -160,7 +160,7 @@ void OperatorBase::Run(const Scope& scope, const platform::Place& place) {
} else { } else {
RunImpl(scope, place); RunImpl(scope, place);
} }
VLOG(3) << place << " " << DebugStringEx(&scope); VLOG(30) << place << " " << DebugStringEx(&scope);
} }
bool OperatorBase::HasInputs(const std::string& name) const { bool OperatorBase::HasInputs(const std::string& name) const {
...@@ -708,14 +708,14 @@ void OperatorWithKernel::RunImpl(const Scope& scope, ...@@ -708,14 +708,14 @@ void OperatorWithKernel::RunImpl(const Scope& scope,
auto expected_kernel_key = auto expected_kernel_key =
this->GetExpectedKernelType(ExecutionContext(*this, scope, *dev_ctx)); this->GetExpectedKernelType(ExecutionContext(*this, scope, *dev_ctx));
VLOG(3) << "expected_kernel_key:" << expected_kernel_key; VLOG(30) << "expected_kernel_key:" << expected_kernel_key;
auto kernel_iter = kernels.find(expected_kernel_key); auto kernel_iter = kernels.find(expected_kernel_key);
#ifdef PADDLE_WITH_MKLDNN #ifdef PADDLE_WITH_MKLDNN
// workaround for missing MKLDNN kernel when FLAGS_use_mkldnn env var is set // workaround for missing MKLDNN kernel when FLAGS_use_mkldnn env var is set
if (kernel_iter == kernels.end() && if (kernel_iter == kernels.end() &&
expected_kernel_key.library_type_ == LibraryType::kMKLDNN) { expected_kernel_key.library_type_ == LibraryType::kMKLDNN) {
VLOG(3) << "missing MKLDNN kernel: fallbacking to PLAIN one"; VLOG(30) << "missing MKLDNN kernel: fallbacking to PLAIN one";
expected_kernel_key.library_type_ = LibraryType::kPlain; expected_kernel_key.library_type_ = LibraryType::kPlain;
expected_kernel_key.data_layout_ = DataLayout::kAnyLayout; expected_kernel_key.data_layout_ = DataLayout::kAnyLayout;
kernel_iter = kernels.find(expected_kernel_key); kernel_iter = kernels.find(expected_kernel_key);
...@@ -767,7 +767,8 @@ void OperatorWithKernel::TransferInplaceVarsBack( ...@@ -767,7 +767,8 @@ void OperatorWithKernel::TransferInplaceVarsBack(
const Scope& scope, const std::vector<std::string>& inplace_vars, const Scope& scope, const std::vector<std::string>& inplace_vars,
const Scope& transfer_scope) const { const Scope& transfer_scope) const {
for (auto& var_name : inplace_vars) { for (auto& var_name : inplace_vars) {
VLOG(3) << "share inplace var " + var_name + " back to it's original scope"; VLOG(30) << "share inplace var " + var_name +
" back to it's original scope";
auto* original_tensor = GetMutableTensorFromVar(scope.FindVar(var_name)); auto* original_tensor = GetMutableTensorFromVar(scope.FindVar(var_name));
auto* var = transfer_scope.FindVar(var_name); auto* var = transfer_scope.FindVar(var_name);
PADDLE_ENFORCE(var != nullptr, "The var[%s] should not be nullptr", PADDLE_ENFORCE(var != nullptr, "The var[%s] should not be nullptr",
...@@ -807,8 +808,8 @@ Scope* OperatorWithKernel::TryTransferData( ...@@ -807,8 +808,8 @@ Scope* OperatorWithKernel::TryTransferData(
transfered_inplace_vars->emplace_back(var_name); transfered_inplace_vars->emplace_back(var_name);
} }
VLOG(3) << "Transform Variable " << var_name << " from " VLOG(30) << "Transform Variable " << var_name << " from "
<< kernel_type_for_var << " to " << expected_kernel_key; << kernel_type_for_var << " to " << expected_kernel_key;
if (new_scope == nullptr) { if (new_scope == nullptr) {
new_scope = &scope.NewScope(); new_scope = &scope.NewScope();
......
...@@ -199,7 +199,7 @@ void ParallelExecutor::BCastParamsToDevices( ...@@ -199,7 +199,7 @@ void ParallelExecutor::BCastParamsToDevices(
auto &main_tensor = main_var->Get<LoDTensor>(); auto &main_tensor = main_var->Get<LoDTensor>();
if (!main_tensor.IsInitialized()) { if (!main_tensor.IsInitialized()) {
VLOG(3) << "one in var not inited, return!"; VLOG(30) << "one in var not inited, return!";
continue; continue;
} }
auto &dims = main_tensor.dims(); auto &dims = main_tensor.dims();
......
...@@ -149,7 +149,7 @@ Variable* Scope::VarInternal(const std::string& name) { ...@@ -149,7 +149,7 @@ Variable* Scope::VarInternal(const std::string& name) {
v = new Variable(); v = new Variable();
vars_[name].reset(v); vars_[name].reset(v);
VLOG(3) << "Create variable " << name; VLOG(30) << "Create variable " << name;
v->name_ = &(vars_.find(name)->first); v->name_ = &(vars_.find(name)->first);
return v; return v;
} }
......
...@@ -176,7 +176,7 @@ void SelectedRows::Get(const framework::Tensor& ids, framework::Tensor* value, ...@@ -176,7 +176,7 @@ void SelectedRows::Get(const framework::Tensor& ids, framework::Tensor* value,
PADDLE_ENFORCE(value->IsInitialized(), PADDLE_ENFORCE(value->IsInitialized(),
"The value tensor should be initialized."); "The value tensor should be initialized.");
if (ids.numel() == 0) { if (ids.numel() == 0) {
VLOG(3) << "keys is empty, please check data!"; VLOG(30) << "keys is empty, please check data!";
} else { } else {
int64_t value_width = value_->numel() / value_->dims()[0]; int64_t value_width = value_->numel() / value_->dims()[0];
PADDLE_ENFORCE_EQ(value_width, value->numel() / value->dims()[0], PADDLE_ENFORCE_EQ(value_width, value->numel() / value->dims()[0],
......
...@@ -22,8 +22,8 @@ namespace framework { ...@@ -22,8 +22,8 @@ namespace framework {
void TensorCopy(const Tensor& src, const platform::Place& dst_place, void TensorCopy(const Tensor& src, const platform::Place& dst_place,
const platform::DeviceContext& ctx, Tensor* dst) { const platform::DeviceContext& ctx, Tensor* dst) {
VLOG(3) << "TensorCopy " << src.dims() << " from " << src.place() << " to " VLOG(30) << "TensorCopy " << src.dims() << " from " << src.place() << " to "
<< dst_place; << dst_place;
src.check_memory_size(); src.check_memory_size();
dst->Resize(src.dims()); dst->Resize(src.dims());
...@@ -37,8 +37,8 @@ void TensorCopy(const Tensor& src, const platform::Place& dst_place, ...@@ -37,8 +37,8 @@ void TensorCopy(const Tensor& src, const platform::Place& dst_place,
if (platform::is_cpu_place(src_place) && platform::is_cpu_place(dst_place)) { if (platform::is_cpu_place(src_place) && platform::is_cpu_place(dst_place)) {
if (src_ptr == dst_ptr) { if (src_ptr == dst_ptr) {
VLOG(3) << "Skip copy the same data async from " << src_place << " to " VLOG(30) << "Skip copy the same data async from " << src_place << " to "
<< dst_place; << dst_place;
return; return;
} }
memory::Copy(boost::get<platform::CPUPlace>(dst_place), dst_ptr, memory::Copy(boost::get<platform::CPUPlace>(dst_place), dst_ptr,
...@@ -77,8 +77,8 @@ void TensorCopy(const Tensor& src, const platform::Place& dst_place, ...@@ -77,8 +77,8 @@ void TensorCopy(const Tensor& src, const platform::Place& dst_place,
reinterpret_cast<const platform::CUDADeviceContext&>(ctx).stream(); reinterpret_cast<const platform::CUDADeviceContext&>(ctx).stream();
if (platform::is_same_place(src_place, dst_place)) { if (platform::is_same_place(src_place, dst_place)) {
if (src_ptr == dst_ptr) { if (src_ptr == dst_ptr) {
VLOG(3) << "Skip copy the same data async from " << src_place << " to " VLOG(30) << "Skip copy the same data async from " << src_place << " to "
<< dst_place; << dst_place;
return; return;
} }
memory::Copy(dst_gpu_place, dst_ptr, src_gpu_place, src_ptr, size, memory::Copy(dst_gpu_place, dst_ptr, src_gpu_place, src_ptr, size,
...@@ -114,8 +114,8 @@ void TensorCopy(const Tensor& src, const platform::Place& dst_place, ...@@ -114,8 +114,8 @@ void TensorCopy(const Tensor& src, const platform::Place& dst_place,
void TensorCopySync(const Tensor& src, const platform::Place& dst_place, void TensorCopySync(const Tensor& src, const platform::Place& dst_place,
Tensor* dst) { Tensor* dst) {
VLOG(3) << "TensorCopySync " << src.dims() << " from " << src.place() VLOG(30) << "TensorCopySync " << src.dims() << " from " << src.place()
<< " to " << dst_place; << " to " << dst_place;
src.check_memory_size(); src.check_memory_size();
dst->Resize(src.dims()); dst->Resize(src.dims());
dst->set_layout(src.layout()); dst->set_layout(src.layout());
...@@ -125,8 +125,8 @@ void TensorCopySync(const Tensor& src, const platform::Place& dst_place, ...@@ -125,8 +125,8 @@ void TensorCopySync(const Tensor& src, const platform::Place& dst_place,
auto size = src.numel() * SizeOfType(src.type()); auto size = src.numel() * SizeOfType(src.type());
if (platform::is_cpu_place(src_place) && platform::is_cpu_place(dst_place)) { if (platform::is_cpu_place(src_place) && platform::is_cpu_place(dst_place)) {
if (src_ptr == dst_ptr) { if (src_ptr == dst_ptr) {
VLOG(3) << "Skip copy the same data from " << src_place << " to " VLOG(30) << "Skip copy the same data from " << src_place << " to "
<< dst_place; << dst_place;
return; return;
} }
memory::Copy(boost::get<platform::CPUPlace>(dst_place), dst_ptr, memory::Copy(boost::get<platform::CPUPlace>(dst_place), dst_ptr,
...@@ -146,8 +146,8 @@ void TensorCopySync(const Tensor& src, const platform::Place& dst_place, ...@@ -146,8 +146,8 @@ void TensorCopySync(const Tensor& src, const platform::Place& dst_place,
} else if (platform::is_gpu_place(src_place) && } else if (platform::is_gpu_place(src_place) &&
platform::is_gpu_place(dst_place)) { platform::is_gpu_place(dst_place)) {
if (src_ptr == dst_ptr && platform::is_same_place(src_place, dst_place)) { if (src_ptr == dst_ptr && platform::is_same_place(src_place, dst_place)) {
VLOG(3) << "Skip copy the same data from " << src_place << " to " VLOG(30) << "Skip copy the same data from " << src_place << " to "
<< dst_place; << dst_place;
return; return;
} }
auto src_gpu_place = boost::get<platform::CUDAPlace>(src_place); auto src_gpu_place = boost::get<platform::CUDAPlace>(src_place);
......
tensor_util.cc
\ No newline at end of file
/* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserved.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License. */
#include <algorithm>
#include <limits>
#include <vector>
#include "paddle/fluid/framework/data_type.h"
#include "paddle/fluid/framework/tensor_util.h"
namespace paddle {
namespace framework {
void TensorCopy(const Tensor& src, const platform::Place& dst_place,
const platform::DeviceContext& ctx, Tensor* dst) {
VLOG(30) << "TensorCopy " << src.dims() << " from " << src.place() << " to "
<< dst_place;
src.check_memory_size();
dst->Resize(src.dims());
dst->set_layout(src.layout());
auto src_place = src.place();
auto src_ptr = src.data<void>();
auto dst_ptr = dst->mutable_data(dst_place, src.type());
auto size = src.numel() * SizeOfType(src.type());
if (platform::is_cpu_place(src_place) && platform::is_cpu_place(dst_place)) {
if (src_ptr == dst_ptr) {
VLOG(30) << "Skip copy the same data async from " << src_place << " to "
<< dst_place;
return;
}
memory::Copy(boost::get<platform::CPUPlace>(dst_place), dst_ptr,
boost::get<platform::CPUPlace>(src_place), src_ptr, size);
}
#ifdef PADDLE_WITH_CUDA
else if (platform::is_gpu_place(src_place) && // NOLINT
platform::is_cpu_place(dst_place)) {
auto src_gpu_place = boost::get<platform::CUDAPlace>(src_place);
auto dst_cpu_place = boost::get<platform::CPUPlace>(dst_place);
auto ctx_place = ctx.GetPlace();
PADDLE_ENFORCE(platform::is_gpu_place(ctx_place));
auto ctx_gpu_place = boost::get<platform::CUDAPlace>(ctx_place);
PADDLE_ENFORCE_EQ(src_gpu_place, ctx_gpu_place);
auto stream =
reinterpret_cast<const platform::CUDADeviceContext&>(ctx).stream();
memory::Copy(dst_cpu_place, dst_ptr, src_gpu_place, src_ptr, size, stream);
} else if (platform::is_cpu_place(src_place) &&
platform::is_gpu_place(dst_place)) {
auto src_cpu_place = boost::get<platform::CPUPlace>(src_place);
auto dst_gpu_place = boost::get<platform::CUDAPlace>(dst_place);
auto ctx_place = ctx.GetPlace();
PADDLE_ENFORCE(platform::is_gpu_place(ctx_place));
auto ctx_gpu_place = boost::get<platform::CUDAPlace>(ctx_place);
PADDLE_ENFORCE_EQ(dst_gpu_place, ctx_gpu_place);
auto stream =
reinterpret_cast<const platform::CUDADeviceContext&>(ctx).stream();
memory::Copy(dst_gpu_place, dst_ptr, src_cpu_place, src_ptr, size, stream);
} else if (platform::is_gpu_place(src_place) &&
platform::is_gpu_place(dst_place)) {
auto src_gpu_place = boost::get<platform::CUDAPlace>(src_place);
auto dst_gpu_place = boost::get<platform::CUDAPlace>(dst_place);
auto ctx_place = ctx.GetPlace();
PADDLE_ENFORCE(platform::is_gpu_place(ctx_place));
auto stream =
reinterpret_cast<const platform::CUDADeviceContext&>(ctx).stream();
if (platform::is_same_place(src_place, dst_place)) {
if (src_ptr == dst_ptr) {
VLOG(30) << "Skip copy the same data async from " << src_place << " to "
<< dst_place;
return;
}
memory::Copy(dst_gpu_place, dst_ptr, src_gpu_place, src_ptr, size,
stream);
} else {
if (platform::is_same_place(ctx_place, src_place)) {
memory::Copy(dst_gpu_place, dst_ptr, src_gpu_place, src_ptr, size,
stream);
platform::DeviceContextPool::Instance().Get(src.place())->Wait();
} else if (platform::is_same_place(ctx_place, dst_place)) {
platform::DeviceContextPool::Instance().Get(src.place())->Wait();
memory::Copy(dst_gpu_place, dst_ptr, src_gpu_place, src_ptr, size,
stream);
} else {
PADDLE_THROW("ctx is not belong to dst_gpu_place or src_gpu_place.");
}
}
}
#endif
}
void TensorCopy(const Tensor& src, const platform::Place& dst_place,
Tensor* dst) {
platform::DeviceContextPool& pool = platform::DeviceContextPool::Instance();
const platform::DeviceContext* dev_ctx;
if (platform::is_gpu_place(dst_place)) {
dev_ctx = pool.Get(dst_place);
} else {
dev_ctx = pool.Get(src.place());
}
TensorCopy(src, dst_place, *dev_ctx, dst);
}
void TensorCopySync(const Tensor& src, const platform::Place& dst_place,
Tensor* dst) {
VLOG(30) << "TensorCopySync " << src.dims() << " from " << src.place()
<< " to " << dst_place;
src.check_memory_size();
dst->Resize(src.dims());
dst->set_layout(src.layout());
auto src_place = src.place();
auto src_ptr = src.data<void>();
auto dst_ptr = dst->mutable_data(dst_place, src.type());
auto size = src.numel() * SizeOfType(src.type());
if (platform::is_cpu_place(src_place) && platform::is_cpu_place(dst_place)) {
if (src_ptr == dst_ptr) {
VLOG(30) << "Skip copy the same data from " << src_place << " to "
<< dst_place;
return;
}
memory::Copy(boost::get<platform::CPUPlace>(dst_place), dst_ptr,
boost::get<platform::CPUPlace>(src_place), src_ptr, size);
}
#ifdef PADDLE_WITH_CUDA
else if (platform::is_gpu_place(src_place) && // NOLINT
platform::is_cpu_place(dst_place)) {
auto src_gpu_place = boost::get<platform::CUDAPlace>(src_place);
auto dst_cpu_place = boost::get<platform::CPUPlace>(dst_place);
memory::Copy(dst_cpu_place, dst_ptr, src_gpu_place, src_ptr, size, nullptr);
} else if (platform::is_cpu_place(src_place) &&
platform::is_gpu_place(dst_place)) {
auto src_cpu_place = boost::get<platform::CPUPlace>(src_place);
auto dst_gpu_place = boost::get<platform::CUDAPlace>(dst_place);
memory::Copy(dst_gpu_place, dst_ptr, src_cpu_place, src_ptr, size, nullptr);
} else if (platform::is_gpu_place(src_place) &&
platform::is_gpu_place(dst_place)) {
if (src_ptr == dst_ptr && platform::is_same_place(src_place, dst_place)) {
VLOG(30) << "Skip copy the same data from " << src_place << " to "
<< dst_place;
return;
}
auto src_gpu_place = boost::get<platform::CUDAPlace>(src_place);
auto dst_gpu_place = boost::get<platform::CUDAPlace>(dst_place);
memory::Copy(dst_gpu_place, dst_ptr, src_gpu_place, src_ptr, size, nullptr);
} else if (platform::is_cuda_pinned_place(src_place) &&
platform::is_gpu_place(dst_place)) {
auto src_pinned_place = boost::get<platform::CUDAPinnedPlace>(src_place);
auto dst_gpu_place = boost::get<platform::CUDAPlace>(dst_place);
memory::Copy(dst_gpu_place, dst_ptr, src_pinned_place, src_ptr, size,
nullptr);
}
#endif
}
template <typename Predicate, typename DevCtx>
struct AnyDTypeVisitor {
Predicate predicate_;
const Tensor& tensor_;
const DevCtx& ctx_;
Tensor* out_;
AnyDTypeVisitor(Predicate predicate, const Tensor& tensor, const DevCtx& ctx,
Tensor* out)
: predicate_(predicate), tensor_(tensor), ctx_(ctx), out_(out) {}
template <typename T>
void apply() const {
auto t = EigenVector<T>::Flatten(tensor_);
auto o = EigenScalar<bool>::From(*out_);
// return any of predicate_(t) is true.
o.device(*ctx_.eigen_device()) = predicate_(t).any();
}
};
template <typename Predicate, typename DevCtx>
inline void AnyImpl(Predicate predicate, const framework::Tensor& tensor,
const DevCtx& ctx, framework::Tensor* out) {
VisitDataType(ToDataType(tensor.type()), AnyDTypeVisitor<Predicate, DevCtx>(
predicate, tensor, ctx, out));
}
template <typename Predicate>
class AnyVisitor : public boost::static_visitor<bool> {
private:
const framework::Tensor& tensor_;
Predicate predicate_;
public:
AnyVisitor(const framework::Tensor& tensor, Predicate predicate)
: tensor_(tensor), predicate_(std::move(predicate)) {}
template <typename Place>
bool operator()(const Place& place) const {
framework::Tensor out;
out.Resize({1});
out.mutable_data<bool>(place);
auto* ctx = platform::DeviceContextPool::Instance().GetByPlace(place);
AnyImpl(predicate_, tensor_, *ctx, &out);
return this->GetResult(out, place);
}
bool GetResult(const framework::Tensor& out,
const platform::CUDAPlace& gpu) const {
platform::CPUPlace cpu;
framework::Tensor tmp;
tmp.Resize({1});
tmp.mutable_data<bool>(cpu);
auto gpuctx = platform::DeviceContextPool::Instance().Get(gpu);
gpuctx->Wait();
TensorCopy(out, cpu, *gpuctx, &tmp);
gpuctx->Wait();
return GetResult(tmp, cpu);
}
bool GetResult(const framework::Tensor& out,
const platform::CPUPlace& cpu) const {
return *out.data<bool>();
}
bool GetResult(const framework::Tensor& out,
const platform::CUDAPinnedPlace& cpu) const {
return *out.data<bool>();
}
};
template <typename Predicate>
class AnyOutVisitor : public boost::static_visitor<> {
private:
const framework::Tensor& tensor_;
mutable framework::Tensor* out_;
Predicate predicate_;
public:
AnyOutVisitor(const framework::Tensor& tensor, Predicate predicate,
framework::Tensor* out)
: tensor_(tensor), out_(out), predicate_(std::move(predicate)) {}
template <typename Place>
void operator()(const Place& place) const {
auto* ctx = platform::DeviceContextPool::Instance().GetByPlace(place);
out_->Resize({1});
out_->mutable_data<bool>(place);
AnyImpl(predicate_, tensor_, *ctx, out_);
}
};
template <typename Predicate>
inline bool Any(const framework::Tensor& tensor, Predicate predicate) {
AnyVisitor<Predicate> visitor(tensor, predicate);
auto place = tensor.place();
return platform::VisitPlace(place, visitor);
}
template <typename Predicate>
inline void Any(const framework::Tensor& tensor, Predicate predicate,
framework::Tensor* out) {
AnyOutVisitor<Predicate> visitor(tensor, predicate, out);
auto place = tensor.place();
platform::VisitPlace(place, visitor);
}
struct ContainsNANPredicate {
template <typename T>
auto operator()(const T& eigen_vec) const
-> decltype(std::declval<T>().isnan()) {
// Cast eigen_vector to vector of bool. true if is inf.
return eigen_vec.isnan();
}
};
bool TensorContainsNAN(const framework::Tensor& tensor) {
ContainsNANPredicate predicate;
return Any(tensor, predicate);
}
void TensorContainsNAN(const framework::Tensor& tensor,
framework::Tensor* out) {
ContainsNANPredicate predicate;
Any(tensor, predicate, out);
}
struct ContainsInfPredicate {
template <typename T>
auto operator()(const T& eigen_vec) const
-> decltype(std::declval<T>().isinf()) {
// Cast eigen_vector to vector of bool. true if is inf.
return eigen_vec.isinf();
}
};
bool TensorContainsInf(const framework::Tensor& tensor) {
ContainsInfPredicate predicate;
return Any(tensor, predicate);
}
void TensorContainsInf(const framework::Tensor& tensor,
framework::Tensor* out) {
ContainsInfPredicate predicate;
Any(tensor, predicate, out);
}
// NOTE(dzhwinter):
// Isfinite need a AllVisitor to loop through all the elements.
// We choose two cuda call instead of one allvisitor. The AllVisitor
// should be implemented if the performance hurts.
bool TensorIsfinite(const framework::Tensor& tensor) {
ContainsInfPredicate pred_inf;
ContainsNANPredicate pred_nan;
return !Any(tensor, pred_inf) && !Any(tensor, pred_nan);
}
#ifdef PADDLE_WITH_CUDA
template <typename T>
static inline void __global__ BothFalse(const T* cmp, T* out) {
out[0] = (!cmp[0]) && (!out[0]);
}
#endif
struct BothFalseVisitor : public boost::static_visitor<> {
const framework::Tensor& in_;
mutable framework::Tensor* out_;
BothFalseVisitor(const framework::Tensor& in, framework::Tensor* out)
: in_(in), out_(out) {}
template <typename Place>
void operator()(const Place& place) const {
VisitorImpl(place);
}
void VisitorImpl(const platform::CUDAPlace& gpu) const {
#ifdef PADDLE_WITH_CUDA
auto* ctx = platform::DeviceContextPool::Instance().GetByPlace(gpu);
BothFalse<bool><<<1, 1, 0, ctx->stream()>>>(in_.data<bool>(),
out_->mutable_data<bool>(gpu));
#endif
}
void VisitorImpl(const platform::CPUPlace& cpu) const {
bool lhs = !in_.data<bool>()[0];
bool rhs = !out_->mutable_data<bool>(cpu)[0];
out_->mutable_data<bool>(cpu)[0] = lhs && rhs;
}
void VisitorImpl(
const platform::CUDAPinnedPlace& cpu /* equals to cpu*/) const {
bool lhs = !in_.data<bool>()[0];
bool rhs = !out_->mutable_data<bool>(cpu)[0];
out_->mutable_data<bool>(cpu)[0] = lhs && rhs;
}
};
void TensorIsfinite(const framework::Tensor& tensor, framework::Tensor* out) {
framework::Tensor tmp;
TensorContainsInf(tensor, &tmp);
TensorContainsNAN(tensor, out);
BothFalseVisitor visitor(tmp, out);
auto place = tensor.place();
platform::VisitPlace(place, visitor);
}
void TensorToStream(std::ostream& os, const Tensor& tensor,
const platform::DeviceContext& dev_ctx) {
{ // the 1st field, uint32_t version
constexpr uint32_t version = 0;
os.write(reinterpret_cast<const char*>(&version), sizeof(version));
}
{ // the 2nd field, tensor description
// int32_t size
// void* protobuf message
proto::VarType::TensorDesc desc;
desc.set_data_type(framework::ToDataType(tensor.type()));
auto dims = framework::vectorize(tensor.dims());
auto* pb_dims = desc.mutable_dims();
pb_dims->Resize(static_cast<int>(dims.size()), 0);
std::copy(dims.begin(), dims.end(), pb_dims->begin());
int32_t size = desc.ByteSize();
os.write(reinterpret_cast<const char*>(&size), sizeof(size));
auto out = desc.SerializeAsString();
os.write(out.data(), size);
}
{ // the 3rd field, tensor data
uint64_t size = tensor.numel() * framework::SizeOfType(tensor.type());
auto* data_ptr = tensor.data<void>();
PADDLE_ENFORCE(size < std::numeric_limits<std::streamsize>::max(),
"Index overflow when writing tensor");
if (platform::is_gpu_place(tensor.place())) {
#ifdef PADDLE_WITH_CUDA
constexpr size_t kBufSize = 1024 * 1024 * 64; // 64MB
std::unique_ptr<char[]> buf(new char[kBufSize]);
auto& gpu_dev_ctx =
static_cast<const platform::CUDADeviceContext&>(dev_ctx);
platform::CPUPlace cpu;
uintptr_t data = reinterpret_cast<uintptr_t>(data_ptr);
while (size != 0) {
size_t size_to_write = std::min(kBufSize, static_cast<size_t>(size));
memory::Copy(cpu, buf.get(),
boost::get<platform::CUDAPlace>(tensor.place()),
reinterpret_cast<const void*>(data), size_to_write,
gpu_dev_ctx.stream());
gpu_dev_ctx.Wait();
os.write(buf.get(), size_to_write);
data += size_to_write;
size -= size_to_write;
}
#else
PADDLE_THROW("Unexpected branch");
#endif
} else {
os.write(static_cast<const char*>(data_ptr),
static_cast<std::streamsize>(size));
}
}
}
struct DeserializedDataFunctor {
DeserializedDataFunctor(void** buf, Tensor* tensor,
const platform::Place& place)
: buf_(buf), tensor_(tensor), place_(place) {}
template <typename T>
void apply() {
*buf_ = tensor_->mutable_data<T>(place_);
}
void** buf_;
Tensor* tensor_;
platform::Place place_;
};
void TensorFromStream(std::istream& is, Tensor* tensor,
const platform::DeviceContext& dev_ctx) {
uint32_t version;
is.read(reinterpret_cast<char*>(&version), sizeof(version));
PADDLE_ENFORCE_EQ(version, 0U, "Only version 0 is supported");
proto::VarType::TensorDesc desc;
{ // int32_t size
// proto buffer
int32_t size;
is.read(reinterpret_cast<char*>(&size), sizeof(size));
std::unique_ptr<char[]> buf(new char[size]);
is.read(reinterpret_cast<char*>(buf.get()), size);
PADDLE_ENFORCE(desc.ParseFromArray(buf.get(), size),
"Cannot parse tensor desc");
}
{ // read tensor
std::vector<int64_t> dims;
dims.reserve(static_cast<size_t>(desc.dims().size()));
std::copy(desc.dims().begin(), desc.dims().end(), std::back_inserter(dims));
tensor->Resize(framework::make_ddim(dims));
void* buf;
auto ctx = platform::CPUDeviceContext();
size_t size =
tensor->numel() *
framework::SizeOfType(framework::ToTypeIndex(desc.data_type()));
if (platform::is_gpu_place(dev_ctx.GetPlace())) {
#ifdef PADDLE_WITH_CUDA
Tensor cpu_tensor;
cpu_tensor.Resize(framework::make_ddim(dims));
framework::VisitDataType(
desc.data_type(),
DeserializedDataFunctor(&buf, &cpu_tensor, ctx.GetPlace()));
is.read(static_cast<char*>(buf), size);
auto dst_place = dev_ctx.GetPlace();
framework::TensorCopy(cpu_tensor, dst_place, dev_ctx, tensor);
#else
PADDLE_THROW("Unexpected branch");
#endif
} else {
framework::VisitDataType(
desc.data_type(),
DeserializedDataFunctor(&buf, tensor, ctx.GetPlace()));
is.read(static_cast<char*>(buf), size);
}
}
}
} // namespace framework
} // namespace paddle
...@@ -39,7 +39,7 @@ void ThreadPool::Init() { ...@@ -39,7 +39,7 @@ void ThreadPool::Init() {
int num_threads = std::thread::hardware_concurrency(); int num_threads = std::thread::hardware_concurrency();
if (FLAGS_dist_threadpool_size > 0) { if (FLAGS_dist_threadpool_size > 0) {
num_threads = FLAGS_dist_threadpool_size; num_threads = FLAGS_dist_threadpool_size;
VLOG(1) << "set dist_threadpool_size to " << num_threads; VLOG(10) << "set dist_threadpool_size to " << num_threads;
} }
PADDLE_ENFORCE_GT(num_threads, 0); PADDLE_ENFORCE_GT(num_threads, 0);
threadpool_.reset(new ThreadPool(num_threads)); threadpool_.reset(new ThreadPool(num_threads));
......
...@@ -61,10 +61,10 @@ size_t VarDesc::GetTensorDescNum() const { ...@@ -61,10 +61,10 @@ size_t VarDesc::GetTensorDescNum() const {
void VarDesc::SetShapes( void VarDesc::SetShapes(
const std::vector<std::vector<int64_t>> &multiple_dims) { const std::vector<std::vector<int64_t>> &multiple_dims) {
if (multiple_dims.size() != GetTensorDescNum()) { if (multiple_dims.size() != GetTensorDescNum()) {
VLOG(3) << "WARNING: The number of given shapes(" << multiple_dims.size() VLOG(30) << "WARNING: The number of given shapes(" << multiple_dims.size()
<< ") doesn't match the existing tensor number(" << ") doesn't match the existing tensor number("
<< GetTensorDescNum() << GetTensorDescNum()
<< "). The Reader is going to be reinitialized."; << "). The Reader is going to be reinitialized.";
SetTensorDescNum(multiple_dims.size()); SetTensorDescNum(multiple_dims.size());
} }
std::vector<proto::VarType::TensorDesc *> tensors = mutable_tensor_descs(); std::vector<proto::VarType::TensorDesc *> tensors = mutable_tensor_descs();
...@@ -94,11 +94,11 @@ void VarDesc::SetDataType(proto::VarType::Type data_type) { ...@@ -94,11 +94,11 @@ void VarDesc::SetDataType(proto::VarType::Type data_type) {
void VarDesc::SetDataTypes( void VarDesc::SetDataTypes(
const std::vector<proto::VarType::Type> &multiple_data_type) { const std::vector<proto::VarType::Type> &multiple_data_type) {
if (multiple_data_type.size() != GetTensorDescNum()) { if (multiple_data_type.size() != GetTensorDescNum()) {
VLOG(3) << "WARNING: The number of given data types(" VLOG(30) << "WARNING: The number of given data types("
<< multiple_data_type.size() << multiple_data_type.size()
<< ") doesn't match the existing tensor number(" << ") doesn't match the existing tensor number("
<< GetTensorDescNum() << GetTensorDescNum()
<< "). The Reader is going to be reinitialized."; << "). The Reader is going to be reinitialized.";
SetTensorDescNum(multiple_data_type.size()); SetTensorDescNum(multiple_data_type.size());
} }
std::vector<proto::VarType::TensorDesc *> tensor_descs = std::vector<proto::VarType::TensorDesc *> tensor_descs =
...@@ -139,11 +139,11 @@ void VarDesc::SetLoDLevel(int32_t lod_level) { ...@@ -139,11 +139,11 @@ void VarDesc::SetLoDLevel(int32_t lod_level) {
void VarDesc::SetLoDLevels(const std::vector<int32_t> &multiple_lod_level) { void VarDesc::SetLoDLevels(const std::vector<int32_t> &multiple_lod_level) {
if (multiple_lod_level.size() != GetTensorDescNum()) { if (multiple_lod_level.size() != GetTensorDescNum()) {
VLOG(3) << "WARNING: The number of given lod_levels(" VLOG(30) << "WARNING: The number of given lod_levels("
<< multiple_lod_level.size() << multiple_lod_level.size()
<< ") doesn't match the existing tensor number(" << ") doesn't match the existing tensor number("
<< GetTensorDescNum() << GetTensorDescNum()
<< "). The Reader is going to be reinitialized."; << "). The Reader is going to be reinitialized.";
SetTensorDescNum(multiple_lod_level.size()); SetTensorDescNum(multiple_lod_level.size());
} }
switch (desc_.type().type()) { switch (desc_.type().type()) {
......
...@@ -60,7 +60,7 @@ class DfgPassManagerImpl final : public DfgPassManager { ...@@ -60,7 +60,7 @@ class DfgPassManagerImpl final : public DfgPassManager {
private: private:
void AddPass(const std::string& name, AnalysisPass* pass) { void AddPass(const std::string& name, AnalysisPass* pass) {
VLOG(3) << "Adding pass " << name; VLOG(30) << "Adding pass " << name;
Register(name, pass); Register(name, pass);
AddGraphvizDebugerPass(pass); AddGraphvizDebugerPass(pass);
} }
...@@ -103,7 +103,7 @@ void Analyzer::Run(Argument* argument) { ...@@ -103,7 +103,7 @@ void Analyzer::Run(Argument* argument) {
std::vector<std::string> passes; std::vector<std::string> passes;
#ifdef PADDLE_WITH_MKLDNN #ifdef PADDLE_WITH_MKLDNN
if (use_mkldnn_) { if (use_mkldnn_) {
VLOG(3) << "Adding MKL-DNN placement pass"; VLOG(30) << "Adding MKL-DNN placement pass";
passes.push_back("mkldnn_placement_pass"); passes.push_back("mkldnn_placement_pass");
} }
#endif #endif
......
...@@ -68,8 +68,8 @@ struct Argument { ...@@ -68,8 +68,8 @@ struct Argument {
key); key);
attrs_[key] = data; attrs_[key] = data;
attr_deleters_[key] = [data, key]() { attr_deleters_[key] = [data, key]() {
VLOG(3) << "xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx"; VLOG(30) << "xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx";
VLOG(3) << "argument delete attr: " << key; VLOG(30) << "argument delete attr: " << key;
delete data; delete data;
}; };
} }
......
...@@ -132,7 +132,7 @@ void DataFlowGraph::Build(const framework::ir::Graph &graph) { ...@@ -132,7 +132,7 @@ void DataFlowGraph::Build(const framework::ir::Graph &graph) {
Node *x{nullptr}; Node *x{nullptr};
if (ir_node->IsOp()) { if (ir_node->IsOp()) {
PADDLE_ENFORCE(ir_node->Op()); PADDLE_ENFORCE(ir_node->Op());
VLOG(4) << "get op " << ir_node << " " << ir_node->Name(); VLOG(40) << "get op " << ir_node << " " << ir_node->Name();
x = nodes.Create(Node::Type::kFunction); x = nodes.Create(Node::Type::kFunction);
x->attr("ir_node").Pointer() = ir_node; x->attr("ir_node").Pointer() = ir_node;
PADDLE_ENFORCE(ir_node->Op()->Proto()); PADDLE_ENFORCE(ir_node->Op()->Proto());
...@@ -141,7 +141,7 @@ void DataFlowGraph::Build(const framework::ir::Graph &graph) { ...@@ -141,7 +141,7 @@ void DataFlowGraph::Build(const framework::ir::Graph &graph) {
} else if (ir_node->IsVar()) { } else if (ir_node->IsVar()) {
// Not create a Node for IR ControlDepVar, considering Inference currently // Not create a Node for IR ControlDepVar, considering Inference currently
// just used in single thread scenerio. // just used in single thread scenerio.
VLOG(4) << "get var " << ir_node->Name(); VLOG(40) << "get var " << ir_node->Name();
x = nodes.Create(Node::Type::kValue); x = nodes.Create(Node::Type::kValue);
x->attr("ir_node").Pointer() = ir_node; x->attr("ir_node").Pointer() = ir_node;
x->SetName(ir_node->Name()); x->SetName(ir_node->Name());
...@@ -151,9 +151,9 @@ void DataFlowGraph::Build(const framework::ir::Graph &graph) { ...@@ -151,9 +151,9 @@ void DataFlowGraph::Build(const framework::ir::Graph &graph) {
} }
ir_node_map.emplace(ir_node, x); ir_node_map.emplace(ir_node, x);
} }
VLOG(4) << "finish creating Nodes"; VLOG(40) << "finish creating Nodes";
VLOG(4) << "to create edge"; VLOG(40) << "to create edge";
// Create links // Create links
for (auto *ir_node : graph.Nodes()) { for (auto *ir_node : graph.Nodes()) {
auto it = ir_node_map.find(ir_node); auto it = ir_node_map.find(ir_node);
...@@ -175,7 +175,7 @@ void DataFlowGraph::Build(const framework::ir::Graph &graph) { ...@@ -175,7 +175,7 @@ void DataFlowGraph::Build(const framework::ir::Graph &graph) {
"Can't deduce any inputs from the graph, Is the graph empty?"); "Can't deduce any inputs from the graph, Is the graph empty?");
ir_graph = &graph; ir_graph = &graph;
VLOG(3) << "finished build from IR"; VLOG(30) << "finished build from IR";
} }
void DataFlowGraph::Clean() { void DataFlowGraph::Clean() {
......
...@@ -239,9 +239,10 @@ void DataFlowGraphToFluidPass::AddEngineOp(Node *node) { ...@@ -239,9 +239,10 @@ void DataFlowGraphToFluidPass::AddEngineOp(Node *node) {
framework::BlockDesc block_desc(nullptr, &proto); framework::BlockDesc block_desc(nullptr, &proto);
block_desc.Proto()->set_parent_idx(-1); block_desc.Proto()->set_parent_idx(-1);
block_desc.Proto()->set_idx(0); block_desc.Proto()->set_idx(0);
VLOG(4) << "origin variable size: " VLOG(40) << "origin variable size: "
<< argument_->origin_program_desc->blocks(0).vars().size(); << argument_->origin_program_desc->blocks(0).vars().size();
VLOG(4) << "transformed variable size: " << block_desc.Proto()->vars().size(); VLOG(40) << "transformed variable size: "
<< block_desc.Proto()->vars().size();
// copy ops. // copy ops.
for (auto *node : block_node->subgraph) { for (auto *node : block_node->subgraph) {
......
...@@ -29,7 +29,7 @@ void DFG_GraphvizDrawPass::Run(DataFlowGraph *graph) { ...@@ -29,7 +29,7 @@ void DFG_GraphvizDrawPass::Run(DataFlowGraph *graph) {
auto png_path = dot_path.substr(0, dot_path.size() - 4) + ".png"; auto png_path = dot_path.substr(0, dot_path.size() - 4) + ".png";
std::string message; std::string message;
VLOG(3) << "draw to " << png_path; VLOG(30) << "draw to " << png_path;
ExecShellCommand("dot -Tpng " + dot_path + " -o " + png_path, &message); ExecShellCommand("dot -Tpng " + dot_path + " -o " + png_path, &message);
} }
......
...@@ -29,7 +29,7 @@ void FluidToIrPass::EnableParamModify(const std::string &model_dir, ...@@ -29,7 +29,7 @@ void FluidToIrPass::EnableParamModify(const std::string &model_dir,
PADDLE_ENFORCE(argument_); PADDLE_ENFORCE(argument_);
argument_->Set(framework::ir::kParamScopeAttr, new framework::Scope); argument_->Set(framework::ir::kParamScopeAttr, new framework::Scope);
// Load parameters. // Load parameters.
VLOG(3) << "Loading parameters from " << model_dir; VLOG(30) << "Loading parameters from " << model_dir;
LoadParams(&argument_->Get<framework::Scope>(framework::ir::kParamScopeAttr), LoadParams(&argument_->Get<framework::Scope>(framework::ir::kParamScopeAttr),
model_dir, prog_file, param_file); model_dir, prog_file, param_file);
} }
......
...@@ -35,21 +35,21 @@ void ModelStorePass::Run(DataFlowGraph *x) { ...@@ -35,21 +35,21 @@ void ModelStorePass::Run(DataFlowGraph *x) {
std::stringstream ss; std::stringstream ss;
// NOTE these commands only works on linux. // NOTE these commands only works on linux.
ss << "mkdir -p " << *argument_->model_output_store_path; ss << "mkdir -p " << *argument_->model_output_store_path;
VLOG(3) << "run command: " << ss.str(); VLOG(30) << "run command: " << ss.str();
PADDLE_ENFORCE_EQ(system(ss.str().c_str()), 0); PADDLE_ENFORCE_EQ(system(ss.str().c_str()), 0);
ss.str(""); ss.str("");
ss << "cp " << *argument_->fluid_model_dir << "/*" ss << "cp " << *argument_->fluid_model_dir << "/*"
<< " " << *argument_->model_output_store_path; << " " << *argument_->model_output_store_path;
VLOG(3) << "run command: " << ss.str(); VLOG(30) << "run command: " << ss.str();
PADDLE_ENFORCE_EQ(system(ss.str().c_str()), 0); PADDLE_ENFORCE_EQ(system(ss.str().c_str()), 0);
// Store program // Store program
PADDLE_ENFORCE_NOT_NULL(argument_->transformed_program_desc, PADDLE_ENFORCE_NOT_NULL(argument_->transformed_program_desc,
"program desc is not transformed, should call " "program desc is not transformed, should call "
"DataFlowGraphToFluidPass first."); "DataFlowGraphToFluidPass first.");
VLOG(3) << "store analyzed program to " VLOG(30) << "store analyzed program to "
<< *argument_->model_output_store_path; << *argument_->model_output_store_path;
const std::string program_output_path = const std::string program_output_path =
*argument_->model_output_store_path + "/__model__"; *argument_->model_output_store_path + "/__model__";
std::ofstream file(program_output_path, std::ios::binary); std::ofstream file(program_output_path, std::ios::binary);
......
...@@ -23,7 +23,7 @@ namespace analysis { ...@@ -23,7 +23,7 @@ namespace analysis {
bool PassManager::Initialize(Argument* argument) { bool PassManager::Initialize(Argument* argument) {
argument_ = argument; argument_ = argument;
for (auto& pass : data_) { for (auto& pass : data_) {
VLOG(3) << "Initializing pass [" << pass->repr() << "]"; VLOG(30) << "Initializing pass [" << pass->repr() << "]";
if (!pass->Initialize(argument)) { if (!pass->Initialize(argument)) {
LOG(ERROR) << "Failed to initialize pass [" << pass->repr() << "]"; LOG(ERROR) << "Failed to initialize pass [" << pass->repr() << "]";
return false; return false;
...@@ -34,7 +34,7 @@ bool PassManager::Initialize(Argument* argument) { ...@@ -34,7 +34,7 @@ bool PassManager::Initialize(Argument* argument) {
void DfgPassManager::RunAll() { void DfgPassManager::RunAll() {
PADDLE_ENFORCE(argument_); PADDLE_ENFORCE(argument_);
VLOG(3) << "Total " << data_.size() << " Analysys passes"; VLOG(30) << "Total " << data_.size() << " Analysys passes";
for (auto& pass : data_) { for (auto& pass : data_) {
string::PrettyLogEndl(string::Style::H1(), "* Running Analysis pass [%s]", string::PrettyLogEndl(string::Style::H1(), "* Running Analysis pass [%s]",
pass->repr()); pass->repr());
......
...@@ -232,7 +232,7 @@ std::vector<std::vector<Node *>> SubGraphSplitter::ExtractSubGraphs() { ...@@ -232,7 +232,7 @@ std::vector<std::vector<Node *>> SubGraphSplitter::ExtractSubGraphs() {
BriefNode *brief_node = itr.second; BriefNode *brief_node = itr.second;
if (!brief_node->node->attr(kMarkerAttrName).Bool()) { if (!brief_node->node->attr(kMarkerAttrName).Bool()) {
VLOG(4) << brief_node->node->id() << " node not a trt candicate."; VLOG(40) << brief_node->node->id() << " node not a trt candicate.";
continue; continue;
} }
......
...@@ -25,9 +25,9 @@ TensorRTSubGraphPass::TensorRTSubGraphPass( ...@@ -25,9 +25,9 @@ TensorRTSubGraphPass::TensorRTSubGraphPass(
void TensorRTSubGraphPass::Run(DataFlowGraph *graph) { void TensorRTSubGraphPass::Run(DataFlowGraph *graph) {
SubGraphFuse(graph, node_inside_subgraph_teller_, argument_)(); SubGraphFuse(graph, node_inside_subgraph_teller_, argument_)();
VLOG(4) << "debug info " VLOG(40) << "debug info "
<< graph->HumanReadableInfo(false /*show_values*/, << graph->HumanReadableInfo(false /*show_values*/,
true /*show_functions*/); true /*show_functions*/);
} }
} // namespace analysis } // namespace analysis
......
...@@ -38,7 +38,7 @@ using contrib::AnalysisConfig; ...@@ -38,7 +38,7 @@ using contrib::AnalysisConfig;
bool AnalysisPredictor::Init( bool AnalysisPredictor::Init(
const std::shared_ptr<framework::Scope> &parent_scope, const std::shared_ptr<framework::Scope> &parent_scope,
const std::shared_ptr<framework::ProgramDesc> &program) { const std::shared_ptr<framework::ProgramDesc> &program) {
VLOG(3) << "Predictor::init()"; VLOG(30) << "Predictor::init()";
#if !defined(_WIN32) #if !defined(_WIN32)
if (FLAGS_profile) { if (FLAGS_profile) {
LOG(WARNING) << "Profiler is actived, might affect the performance"; LOG(WARNING) << "Profiler is actived, might affect the performance";
...@@ -89,7 +89,7 @@ bool AnalysisPredictor::Init( ...@@ -89,7 +89,7 @@ bool AnalysisPredictor::Init(
bool AnalysisPredictor::Run(const std::vector<PaddleTensor> &inputs, bool AnalysisPredictor::Run(const std::vector<PaddleTensor> &inputs,
std::vector<PaddleTensor> *output_data, std::vector<PaddleTensor> *output_data,
int batch_size) { int batch_size) {
VLOG(3) << "Predictor::predict"; VLOG(30) << "Predictor::predict";
inference::Timer timer; inference::Timer timer;
timer.tic(); timer.tic();
// set feed variable // set feed variable
...@@ -109,7 +109,7 @@ bool AnalysisPredictor::Run(const std::vector<PaddleTensor> &inputs, ...@@ -109,7 +109,7 @@ bool AnalysisPredictor::Run(const std::vector<PaddleTensor> &inputs,
LOG(ERROR) << "fail to get fetches"; LOG(ERROR) << "fail to get fetches";
return false; return false;
} }
VLOG(3) << "predict cost: " << timer.toc() << "ms"; VLOG(30) << "predict cost: " << timer.toc() << "ms";
// Fix TensorArray reuse not cleaned bug. // Fix TensorArray reuse not cleaned bug.
tensor_array_batch_cleaner_.CollectTensorArrays(scope_.get()); tensor_array_batch_cleaner_.CollectTensorArrays(scope_.get());
...@@ -119,7 +119,7 @@ bool AnalysisPredictor::Run(const std::vector<PaddleTensor> &inputs, ...@@ -119,7 +119,7 @@ bool AnalysisPredictor::Run(const std::vector<PaddleTensor> &inputs,
bool AnalysisPredictor::SetFeed(const std::vector<PaddleTensor> &inputs, bool AnalysisPredictor::SetFeed(const std::vector<PaddleTensor> &inputs,
framework::Scope *scope) { framework::Scope *scope) {
VLOG(3) << "Predictor::set_feed"; VLOG(30) << "Predictor::set_feed";
if (inputs.size() != feeds_.size()) { if (inputs.size() != feeds_.size()) {
LOG(ERROR) << "wrong feed input size, need " << feeds_.size() << " but get " LOG(ERROR) << "wrong feed input size, need " << feeds_.size() << " but get "
<< inputs.size(); << inputs.size();
...@@ -184,7 +184,7 @@ void AnalysisPredictor::GetFetchOne(const framework::LoDTensor &fetch, ...@@ -184,7 +184,7 @@ void AnalysisPredictor::GetFetchOne(const framework::LoDTensor &fetch,
bool AnalysisPredictor::GetFetch(std::vector<PaddleTensor> *outputs, bool AnalysisPredictor::GetFetch(std::vector<PaddleTensor> *outputs,
framework::Scope *scope) { framework::Scope *scope) {
VLOG(3) << "Predictor::get_fetch"; VLOG(30) << "Predictor::get_fetch";
outputs->resize(fetchs_.size()); outputs->resize(fetchs_.size());
for (size_t i = 0; i < fetchs_.size(); ++i) { for (size_t i = 0; i < fetchs_.size(); ++i) {
int idx = boost::get<int>(fetchs_[i]->GetAttr("col")); int idx = boost::get<int>(fetchs_[i]->GetAttr("col"));
...@@ -246,7 +246,7 @@ void AnalysisPredictor::OptimizeInferenceProgram() { ...@@ -246,7 +246,7 @@ void AnalysisPredictor::OptimizeInferenceProgram() {
} }
CHECK(argument_.transformed_program_desc); CHECK(argument_.transformed_program_desc);
VLOG(5) << "to prepare executor"; VLOG(50) << "to prepare executor";
inference_program_.reset( inference_program_.reset(
new framework::ProgramDesc(*argument_.transformed_program_desc)); new framework::ProgramDesc(*argument_.transformed_program_desc));
if (argument_.Has(framework::ir::kParamScopeAttr)) { if (argument_.Has(framework::ir::kParamScopeAttr)) {
...@@ -260,7 +260,7 @@ void AnalysisPredictor::OptimizeInferenceProgram() { ...@@ -260,7 +260,7 @@ void AnalysisPredictor::OptimizeInferenceProgram() {
template <> template <>
std::unique_ptr<PaddlePredictor> CreatePaddlePredictor< std::unique_ptr<PaddlePredictor> CreatePaddlePredictor<
AnalysisConfig, PaddleEngineKind::kAnalysis>(const AnalysisConfig &config) { AnalysisConfig, PaddleEngineKind::kAnalysis>(const AnalysisConfig &config) {
VLOG(3) << "create AnalysisConfig"; VLOG(30) << "create AnalysisConfig";
if (config.use_gpu) { if (config.use_gpu) {
// 1. GPU memeroy // 1. GPU memeroy
PADDLE_ENFORCE_GT( PADDLE_ENFORCE_GT(
...@@ -274,7 +274,7 @@ std::unique_ptr<PaddlePredictor> CreatePaddlePredictor< ...@@ -274,7 +274,7 @@ std::unique_ptr<PaddlePredictor> CreatePaddlePredictor<
std::string flag = "--fraction_of_gpu_memory_to_use=" + std::string flag = "--fraction_of_gpu_memory_to_use=" +
std::to_string(config.fraction_of_gpu_memory); std::to_string(config.fraction_of_gpu_memory);
flags.push_back(flag); flags.push_back(flag);
VLOG(3) << "set flag: " << flag; VLOG(30) << "set flag: " << flag;
framework::InitGflags(flags); framework::InitGflags(flags);
} }
} }
......
...@@ -50,7 +50,7 @@ template <typename Target> ...@@ -50,7 +50,7 @@ template <typename Target>
bool PaddleInferenceAnakinPredictor<Target>::Init( bool PaddleInferenceAnakinPredictor<Target>::Init(
const contrib::AnakinConfig &config) { const contrib::AnakinConfig &config) {
if (!(graph_.load(config.model_file))) { if (!(graph_.load(config.model_file))) {
VLOG(3) << "fail to load graph from " << config.model_file; VLOG(30) << "fail to load graph from " << config.model_file;
return false; return false;
} }
auto inputs = graph_.get_ins(); auto inputs = graph_.get_ins();
...@@ -76,15 +76,15 @@ bool PaddleInferenceAnakinPredictor<Target>::Run( ...@@ -76,15 +76,15 @@ bool PaddleInferenceAnakinPredictor<Target>::Run(
std::vector<PaddleTensor> *output_data, int batch_size) { std::vector<PaddleTensor> *output_data, int batch_size) {
for (const auto &input : inputs) { for (const auto &input : inputs) {
if (input.dtype != PaddleDType::FLOAT32) { if (input.dtype != PaddleDType::FLOAT32) {
VLOG(3) << "Only support float type inputs. " << input.name VLOG(30) << "Only support float type inputs. " << input.name
<< "'s type is not float"; << "'s type is not float";
return false; return false;
} }
auto d_tensor_in_p = executor_p_->get_in(input.name); auto d_tensor_in_p = executor_p_->get_in(input.name);
auto net_shape = d_tensor_in_p->shape(); auto net_shape = d_tensor_in_p->shape();
if (net_shape.size() != input.shape.size()) { if (net_shape.size() != input.shape.size()) {
VLOG(3) << " input " << input.name VLOG(30) << " input " << input.name
<< "'s shape size should be equal to that of net"; << "'s shape size should be equal to that of net";
return false; return false;
} }
int sum = 1; int sum = 1;
...@@ -105,15 +105,15 @@ bool PaddleInferenceAnakinPredictor<Target>::Run( ...@@ -105,15 +105,15 @@ bool PaddleInferenceAnakinPredictor<Target>::Run(
if (input.lod.size() > 0) { if (input.lod.size() > 0) {
if (input.lod.size() > 1) { if (input.lod.size() > 1) {
VLOG(3) << " input lod first dim should <=1, but you set " VLOG(30) << " input lod first dim should <=1, but you set "
<< input.lod.size(); << input.lod.size();
return false; return false;
} }
std::vector<int> offset(input.lod[0].begin(), input.lod[0].end()); std::vector<int> offset(input.lod[0].begin(), input.lod[0].end());
d_tensor_in_p->set_seq_offset(offset); d_tensor_in_p->set_seq_offset(offset);
VLOG(3) << "offset.size(): " << offset.size(); VLOG(30) << "offset.size(): " << offset.size();
for (int i = 0; i < offset.size(); i++) { for (int i = 0; i < offset.size(); i++) {
VLOG(3) << offset[i]; VLOG(30) << offset[i];
} }
} }
...@@ -124,7 +124,7 @@ bool PaddleInferenceAnakinPredictor<Target>::Run( ...@@ -124,7 +124,7 @@ bool PaddleInferenceAnakinPredictor<Target>::Run(
if (cudaMemcpy(d_data_p, static_cast<float *>(input.data.data()), if (cudaMemcpy(d_data_p, static_cast<float *>(input.data.data()),
d_tensor_in_p->valid_size() * sizeof(float), d_tensor_in_p->valid_size() * sizeof(float),
cudaMemcpyHostToDevice) != 0) { cudaMemcpyHostToDevice) != 0) {
VLOG(3) << "copy data from CPU to GPU error"; VLOG(30) << "copy data from CPU to GPU error";
return false; return false;
} }
} }
...@@ -141,7 +141,7 @@ bool PaddleInferenceAnakinPredictor<Target>::Run( ...@@ -141,7 +141,7 @@ bool PaddleInferenceAnakinPredictor<Target>::Run(
#endif #endif
if (output_data->empty()) { if (output_data->empty()) {
VLOG(3) << "At least one output should be set with tensors' names."; VLOG(30) << "At least one output should be set with tensors' names.";
return false; return false;
} }
for (auto &output : *output_data) { for (auto &output : *output_data) {
...@@ -157,7 +157,7 @@ bool PaddleInferenceAnakinPredictor<Target>::Run( ...@@ -157,7 +157,7 @@ bool PaddleInferenceAnakinPredictor<Target>::Run(
if (cudaMemcpy(output.data.data(), tensor->mutable_data(), if (cudaMemcpy(output.data.data(), tensor->mutable_data(),
tensor->valid_size() * sizeof(float), tensor->valid_size() * sizeof(float),
cudaMemcpyDeviceToHost) != 0) { cudaMemcpyDeviceToHost) != 0) {
VLOG(3) << "copy data from GPU to CPU error"; VLOG(30) << "copy data from GPU to CPU error";
return false; return false;
} }
} }
...@@ -181,14 +181,14 @@ anakin::Net<Target, anakin::saber::AK_FLOAT, anakin::Precision::FP32> ...@@ -181,14 +181,14 @@ anakin::Net<Target, anakin::saber::AK_FLOAT, anakin::Precision::FP32>
template <typename Target> template <typename Target>
std::unique_ptr<PaddlePredictor> std::unique_ptr<PaddlePredictor>
PaddleInferenceAnakinPredictor<Target>::Clone() { PaddleInferenceAnakinPredictor<Target>::Clone() {
VLOG(3) << "Anakin Predictor::clone"; VLOG(30) << "Anakin Predictor::clone";
std::unique_ptr<PaddlePredictor> cls( std::unique_ptr<PaddlePredictor> cls(
new PaddleInferenceAnakinPredictor<Target>()); new PaddleInferenceAnakinPredictor<Target>());
// construct executer from other graph // construct executer from other graph
auto anakin_predictor_p = auto anakin_predictor_p =
dynamic_cast<PaddleInferenceAnakinPredictor<Target> *>(cls.get()); dynamic_cast<PaddleInferenceAnakinPredictor<Target> *>(cls.get());
if (!anakin_predictor_p) { if (!anakin_predictor_p) {
VLOG(3) << "fail to call Init"; VLOG(30) << "fail to call Init";
return nullptr; return nullptr;
} }
anakin_predictor_p->get_executer().init(graph_); anakin_predictor_p->get_executer().init(graph_);
...@@ -206,10 +206,10 @@ template <> ...@@ -206,10 +206,10 @@ template <>
std::unique_ptr<PaddlePredictor> std::unique_ptr<PaddlePredictor>
CreatePaddlePredictor<contrib::AnakinConfig, PaddleEngineKind::kAnakin>( CreatePaddlePredictor<contrib::AnakinConfig, PaddleEngineKind::kAnakin>(
const contrib::AnakinConfig &config) { const contrib::AnakinConfig &config) {
VLOG(3) << "Anakin Predictor create."; VLOG(30) << "Anakin Predictor create.";
if (config.target_type == contrib::AnakinConfig::NVGPU) { if (config.target_type == contrib::AnakinConfig::NVGPU) {
#ifdef PADDLE_WITH_CUDA #ifdef PADDLE_WITH_CUDA
VLOG(3) << "Anakin Predictor create on [ NVIDIA GPU ]."; VLOG(30) << "Anakin Predictor create on [ NVIDIA GPU ].";
std::unique_ptr<PaddlePredictor> x( std::unique_ptr<PaddlePredictor> x(
new PaddleInferenceAnakinPredictor<anakin::NV>(config)); new PaddleInferenceAnakinPredictor<anakin::NV>(config));
return x; return x;
...@@ -218,12 +218,12 @@ CreatePaddlePredictor<contrib::AnakinConfig, PaddleEngineKind::kAnakin>( ...@@ -218,12 +218,12 @@ CreatePaddlePredictor<contrib::AnakinConfig, PaddleEngineKind::kAnakin>(
return nullptr; return nullptr;
#endif #endif
} else if (config.target_type == contrib::AnakinConfig::X86) { } else if (config.target_type == contrib::AnakinConfig::X86) {
VLOG(3) << "Anakin Predictor create on [ Intel X86 ]."; VLOG(30) << "Anakin Predictor create on [ Intel X86 ].";
std::unique_ptr<PaddlePredictor> x( std::unique_ptr<PaddlePredictor> x(
new PaddleInferenceAnakinPredictor<anakin::X86>(config)); new PaddleInferenceAnakinPredictor<anakin::X86>(config));
return x; return x;
} else { } else {
VLOG(3) << "Anakin Predictor create on unknown platform."; VLOG(30) << "Anakin Predictor create on unknown platform.";
return nullptr; return nullptr;
} }
} }
......
...@@ -63,7 +63,7 @@ void NativePaddlePredictor::PrepareFeedFetch() { ...@@ -63,7 +63,7 @@ void NativePaddlePredictor::PrepareFeedFetch() {
bool NativePaddlePredictor::Init( bool NativePaddlePredictor::Init(
std::shared_ptr<framework::Scope> parent_scope) { std::shared_ptr<framework::Scope> parent_scope) {
VLOG(3) << "Predictor::init()"; VLOG(30) << "Predictor::init()";
#if !defined(_WIN32) #if !defined(_WIN32)
if (FLAGS_profile) { if (FLAGS_profile) {
LOG(WARNING) << "Profiler is actived, might affect the performance"; LOG(WARNING) << "Profiler is actived, might affect the performance";
...@@ -135,7 +135,7 @@ NativePaddlePredictor::~NativePaddlePredictor() { ...@@ -135,7 +135,7 @@ NativePaddlePredictor::~NativePaddlePredictor() {
bool NativePaddlePredictor::Run(const std::vector<PaddleTensor> &inputs, bool NativePaddlePredictor::Run(const std::vector<PaddleTensor> &inputs,
std::vector<PaddleTensor> *output_data, std::vector<PaddleTensor> *output_data,
int batch_size) { int batch_size) {
VLOG(3) << "Predictor::predict"; VLOG(30) << "Predictor::predict";
Timer timer; Timer timer;
timer.tic(); timer.tic();
// set feed variable // set feed variable
...@@ -147,17 +147,17 @@ bool NativePaddlePredictor::Run(const std::vector<PaddleTensor> &inputs, ...@@ -147,17 +147,17 @@ bool NativePaddlePredictor::Run(const std::vector<PaddleTensor> &inputs,
} }
// Run the inference program // Run the inference program
// if share variables, we need not create variables // if share variables, we need not create variables
VLOG(4) << "Run prepared context"; VLOG(40) << "Run prepared context";
executor_->RunPreparedContext(ctx_.get(), scope, executor_->RunPreparedContext(ctx_.get(), scope,
false, /* don't create local scope each time*/ false, /* don't create local scope each time*/
false /* don't create variable each time */); false /* don't create variable each time */);
VLOG(4) << "Finish prepared context"; VLOG(40) << "Finish prepared context";
// get fetch variable // get fetch variable
if (!GetFetch(output_data, scope)) { if (!GetFetch(output_data, scope)) {
LOG(ERROR) << "fail to get fetches"; LOG(ERROR) << "fail to get fetches";
return false; return false;
} }
VLOG(3) << "predict cost: " << timer.toc() << "ms"; VLOG(30) << "predict cost: " << timer.toc() << "ms";
// Fix TensorArray reuse not cleaned bug. // Fix TensorArray reuse not cleaned bug.
tensor_array_batch_cleaner_.CollectTensorArrays(scope_.get()); tensor_array_batch_cleaner_.CollectTensorArrays(scope_.get());
...@@ -166,7 +166,7 @@ bool NativePaddlePredictor::Run(const std::vector<PaddleTensor> &inputs, ...@@ -166,7 +166,7 @@ bool NativePaddlePredictor::Run(const std::vector<PaddleTensor> &inputs,
} }
std::unique_ptr<PaddlePredictor> NativePaddlePredictor::Clone() { std::unique_ptr<PaddlePredictor> NativePaddlePredictor::Clone() {
VLOG(3) << "Predictor::clone"; VLOG(30) << "Predictor::clone";
std::unique_ptr<PaddlePredictor> cls(new NativePaddlePredictor(config_)); std::unique_ptr<PaddlePredictor> cls(new NativePaddlePredictor(config_));
if (!dynamic_cast<NativePaddlePredictor *>(cls.get())->Init(scope_)) { if (!dynamic_cast<NativePaddlePredictor *>(cls.get())->Init(scope_)) {
...@@ -184,7 +184,7 @@ std::unique_ptr<PaddlePredictor> NativePaddlePredictor::Clone() { ...@@ -184,7 +184,7 @@ std::unique_ptr<PaddlePredictor> NativePaddlePredictor::Clone() {
bool NativePaddlePredictor::SetFeed(const std::vector<PaddleTensor> &inputs, bool NativePaddlePredictor::SetFeed(const std::vector<PaddleTensor> &inputs,
framework::Scope *scope) { framework::Scope *scope) {
VLOG(3) << "Predictor::set_feed"; VLOG(30) << "Predictor::set_feed";
if (inputs.size() != feeds_.size()) { if (inputs.size() != feeds_.size()) {
LOG(ERROR) << "wrong feed input size, need " << feeds_.size() << " but get " LOG(ERROR) << "wrong feed input size, need " << feeds_.size() << " but get "
<< inputs.size(); << inputs.size();
...@@ -244,7 +244,7 @@ void NativePaddlePredictor::GetFetchOne(const framework::LoDTensor &fetch, ...@@ -244,7 +244,7 @@ void NativePaddlePredictor::GetFetchOne(const framework::LoDTensor &fetch,
bool NativePaddlePredictor::GetFetch(std::vector<PaddleTensor> *outputs, bool NativePaddlePredictor::GetFetch(std::vector<PaddleTensor> *outputs,
framework::Scope *scope) { framework::Scope *scope) {
VLOG(3) << "Predictor::get_fetch"; VLOG(30) << "Predictor::get_fetch";
outputs->resize(fetchs_.size()); outputs->resize(fetchs_.size());
for (size_t i = 0; i < fetchs_.size(); ++i) { for (size_t i = 0; i < fetchs_.size(); ++i) {
int idx = boost::get<int>(fetchs_[i]->GetAttr("col")); int idx = boost::get<int>(fetchs_[i]->GetAttr("col"));
...@@ -269,7 +269,7 @@ bool NativePaddlePredictor::GetFetch(std::vector<PaddleTensor> *outputs, ...@@ -269,7 +269,7 @@ bool NativePaddlePredictor::GetFetch(std::vector<PaddleTensor> *outputs,
template <> template <>
std::unique_ptr<PaddlePredictor> CreatePaddlePredictor< std::unique_ptr<PaddlePredictor> CreatePaddlePredictor<
NativeConfig, PaddleEngineKind::kNative>(const NativeConfig &config) { NativeConfig, PaddleEngineKind::kNative>(const NativeConfig &config) {
VLOG(3) << "create NativePaddlePredictor"; VLOG(30) << "create NativePaddlePredictor";
if (config.use_gpu) { if (config.use_gpu) {
// 1. GPU memeroy // 1. GPU memeroy
PADDLE_ENFORCE_GT( PADDLE_ENFORCE_GT(
...@@ -283,7 +283,7 @@ std::unique_ptr<PaddlePredictor> CreatePaddlePredictor< ...@@ -283,7 +283,7 @@ std::unique_ptr<PaddlePredictor> CreatePaddlePredictor<
std::string flag = "--fraction_of_gpu_memory_to_use=" + std::string flag = "--fraction_of_gpu_memory_to_use=" +
num2str<float>(config.fraction_of_gpu_memory); num2str<float>(config.fraction_of_gpu_memory);
flags.push_back(flag); flags.push_back(flag);
VLOG(3) << "set flag: " << flag; VLOG(30) << "set flag: " << flag;
framework::InitGflags(flags); framework::InitGflags(flags);
} }
} }
......
...@@ -34,7 +34,7 @@ class TensorRTSubgraphPredictor : public NativePaddlePredictor { ...@@ -34,7 +34,7 @@ class TensorRTSubgraphPredictor : public NativePaddlePredictor {
bool Init(const std::shared_ptr<framework::Scope>& parent_scope) { bool Init(const std::shared_ptr<framework::Scope>& parent_scope) {
FLAGS_IA_enable_tensorrt_subgraph_engine = true; FLAGS_IA_enable_tensorrt_subgraph_engine = true;
VLOG(3) << "Predictor::init()"; VLOG(30) << "Predictor::init()";
if (config_.use_gpu) { if (config_.use_gpu) {
place_ = paddle::platform::CUDAPlace(config_.device); place_ = paddle::platform::CUDAPlace(config_.device);
} else { } else {
...@@ -70,7 +70,7 @@ class TensorRTSubgraphPredictor : public NativePaddlePredictor { ...@@ -70,7 +70,7 @@ class TensorRTSubgraphPredictor : public NativePaddlePredictor {
OptimizeInferenceProgram(); OptimizeInferenceProgram();
ctx_ = executor_->Prepare(*inference_program_, 0); ctx_ = executor_->Prepare(*inference_program_, 0);
VLOG(5) << "to create variables"; VLOG(50) << "to create variables";
executor_->CreateVariables(*inference_program_, executor_->CreateVariables(*inference_program_,
sub_scope_ ? sub_scope_ : scope_.get(), 0); sub_scope_ ? sub_scope_ : scope_.get(), 0);
// Get the feed_target_names and fetch_target_names // Get the feed_target_names and fetch_target_names
...@@ -114,9 +114,9 @@ class TensorRTSubgraphPredictor : public NativePaddlePredictor { ...@@ -114,9 +114,9 @@ class TensorRTSubgraphPredictor : public NativePaddlePredictor {
new ProgramDesc(*inference_program_->Proto())); new ProgramDesc(*inference_program_->Proto()));
Singleton<Analyzer>::Global().Run(&argument); Singleton<Analyzer>::Global().Run(&argument);
CHECK(argument.transformed_program_desc); CHECK(argument.transformed_program_desc);
VLOG(5) << "transformed program:\n" VLOG(50) << "transformed program:\n"
<< argument.transformed_program_desc->SerializeAsString(); << argument.transformed_program_desc->SerializeAsString();
VLOG(5) << "to prepare executor"; VLOG(50) << "to prepare executor";
inference_program_.reset( inference_program_.reset(
new framework::ProgramDesc(*argument.transformed_program_desc)); new framework::ProgramDesc(*argument.transformed_program_desc));
} }
...@@ -129,7 +129,7 @@ template <> ...@@ -129,7 +129,7 @@ template <>
std::unique_ptr<PaddlePredictor> std::unique_ptr<PaddlePredictor>
CreatePaddlePredictor<MixedRTConfig, PaddleEngineKind::kAutoMixedTensorRT>( CreatePaddlePredictor<MixedRTConfig, PaddleEngineKind::kAutoMixedTensorRT>(
const MixedRTConfig& config) { const MixedRTConfig& config) {
VLOG(3) << "create TensorRTSubgraphPredictor"; VLOG(30) << "create TensorRTSubgraphPredictor";
if (config.use_gpu) { if (config.use_gpu) {
// 1. GPU memeroy // 1. GPU memeroy
PADDLE_ENFORCE_GT( PADDLE_ENFORCE_GT(
...@@ -143,7 +143,7 @@ CreatePaddlePredictor<MixedRTConfig, PaddleEngineKind::kAutoMixedTensorRT>( ...@@ -143,7 +143,7 @@ CreatePaddlePredictor<MixedRTConfig, PaddleEngineKind::kAutoMixedTensorRT>(
std::string flag = "--fraction_of_gpu_memory_to_use=" + std::string flag = "--fraction_of_gpu_memory_to_use=" +
std::to_string(config.fraction_of_gpu_memory); std::to_string(config.fraction_of_gpu_memory);
flags.push_back(flag); flags.push_back(flag);
VLOG(3) << "set flag: " << flag; VLOG(30) << "set flag: " << flag;
framework::InitGflags(flags); framework::InitGflags(flags);
} }
} }
......
...@@ -45,7 +45,7 @@ void Main() { ...@@ -45,7 +45,7 @@ void Main() {
config.fraction_of_gpu_memory = 0.1; // set by yourself config.fraction_of_gpu_memory = 0.1; // set by yourself
predictor = CreatePaddlePredictor<paddle::contrib::MixedRTConfig>(config); predictor = CreatePaddlePredictor<paddle::contrib::MixedRTConfig>(config);
VLOG(3) << "begin to process data"; VLOG(30) << "begin to process data";
// Just a single batch of data. // Just a single batch of data.
std::string line; std::string line;
std::ifstream file(FLAGS_data); std::ifstream file(FLAGS_data);
...@@ -60,13 +60,13 @@ void Main() { ...@@ -60,13 +60,13 @@ void Main() {
PaddleBuf(record.data.data(), record.data.size() * sizeof(float)); PaddleBuf(record.data.data(), record.data.size() * sizeof(float));
input.dtype = PaddleDType::FLOAT32; input.dtype = PaddleDType::FLOAT32;
VLOG(3) << "run executor"; VLOG(30) << "run executor";
std::vector<PaddleTensor> output; std::vector<PaddleTensor> output;
predictor->Run({input}, &output, 1); predictor->Run({input}, &output, 1);
VLOG(3) << "output.size " << output.size(); VLOG(30) << "output.size " << output.size();
auto& tensor = output.front(); auto& tensor = output.front();
VLOG(3) << "output: " << SummaryTensor(tensor); VLOG(30) << "output: " << SummaryTensor(tensor);
// compare with reference result // compare with reference result
CheckOutput(FLAGS_refer, tensor); CheckOutput(FLAGS_refer, tensor);
......
...@@ -47,7 +47,7 @@ static void split(const std::string& str, char sep, ...@@ -47,7 +47,7 @@ static void split(const std::string& str, char sep,
} }
Record ProcessALine(const std::string& line) { Record ProcessALine(const std::string& line) {
VLOG(3) << "process a line"; VLOG(30) << "process a line";
std::vector<std::string> columns; std::vector<std::string> columns;
split(line, '\t', &columns); split(line, '\t', &columns);
CHECK_EQ(columns.size(), 2UL) CHECK_EQ(columns.size(), 2UL)
...@@ -65,8 +65,8 @@ Record ProcessALine(const std::string& line) { ...@@ -65,8 +65,8 @@ Record ProcessALine(const std::string& line) {
for (auto& s : shape_strs) { for (auto& s : shape_strs) {
record.shape.push_back(std::stoi(s)); record.shape.push_back(std::stoi(s));
} }
VLOG(3) << "data size " << record.data.size(); VLOG(30) << "data size " << record.data.size();
VLOG(3) << "data shape size " << record.shape.size(); VLOG(30) << "data shape size " << record.shape.size();
return record; return record;
} }
...@@ -78,8 +78,8 @@ void CheckOutput(const std::string& referfile, const PaddleTensor& output) { ...@@ -78,8 +78,8 @@ void CheckOutput(const std::string& referfile, const PaddleTensor& output) {
file.close(); file.close();
size_t numel = output.data.length() / PaddleDtypeSize(output.dtype); size_t numel = output.data.length() / PaddleDtypeSize(output.dtype);
VLOG(3) << "predictor output numel " << numel; VLOG(30) << "predictor output numel " << numel;
VLOG(3) << "reference output numel " << refer.data.size(); VLOG(30) << "reference output numel " << refer.data.size();
CHECK_EQ(numel, refer.data.size()); CHECK_EQ(numel, refer.data.size());
switch (output.dtype) { switch (output.dtype) {
case PaddleDType::INT64: { case PaddleDType::INT64: {
......
...@@ -49,11 +49,11 @@ void Main(bool use_gpu) { ...@@ -49,11 +49,11 @@ void Main(bool use_gpu) {
config.fraction_of_gpu_memory = 0.1; // set by yourself config.fraction_of_gpu_memory = 0.1; // set by yourself
} }
VLOG(3) << "init predictor"; VLOG(30) << "init predictor";
predictor = CreatePaddlePredictor<NativeConfig>(config); predictor = CreatePaddlePredictor<NativeConfig>(config);
analysis_predictor = CreatePaddlePredictor<AnalysisConfig>(config); analysis_predictor = CreatePaddlePredictor<AnalysisConfig>(config);
VLOG(3) << "begin to process data"; VLOG(30) << "begin to process data";
// Just a single batch of data. // Just a single batch of data.
std::string line; std::string line;
std::ifstream file(FLAGS_data); std::ifstream file(FLAGS_data);
...@@ -68,13 +68,13 @@ void Main(bool use_gpu) { ...@@ -68,13 +68,13 @@ void Main(bool use_gpu) {
PaddleBuf(record.data.data(), record.data.size() * sizeof(float)); PaddleBuf(record.data.data(), record.data.size() * sizeof(float));
input.dtype = PaddleDType::FLOAT32; input.dtype = PaddleDType::FLOAT32;
VLOG(3) << "run executor"; VLOG(30) << "run executor";
std::vector<PaddleTensor> output, analysis_output; std::vector<PaddleTensor> output, analysis_output;
predictor->Run({input}, &output, 1); predictor->Run({input}, &output, 1);
VLOG(3) << "output.size " << output.size(); VLOG(30) << "output.size " << output.size();
auto& tensor = output.front(); auto& tensor = output.front();
VLOG(3) << "output: " << SummaryTensor(tensor); VLOG(30) << "output: " << SummaryTensor(tensor);
// compare with reference result // compare with reference result
CheckOutput(FLAGS_refer, tensor); CheckOutput(FLAGS_refer, tensor);
......
...@@ -26,7 +26,7 @@ void TensorArrayBatchCleaner::CollectTensorArrays(framework::Scope *scope) { ...@@ -26,7 +26,7 @@ void TensorArrayBatchCleaner::CollectTensorArrays(framework::Scope *scope) {
// parameter. // parameter.
if (var_name == "feed" || var_name == "fetch") continue; if (var_name == "feed" || var_name == "fetch") continue;
if (var->Type() == typeid(framework::LoDTensorArray)) { if (var->Type() == typeid(framework::LoDTensorArray)) {
VLOG(4) << "collect " << var_name; VLOG(40) << "collect " << var_name;
arrays_.push_back(var->GetMutable<framework::LoDTensorArray>()); arrays_.push_back(var->GetMutable<framework::LoDTensorArray>());
} }
} }
...@@ -34,7 +34,7 @@ void TensorArrayBatchCleaner::CollectTensorArrays(framework::Scope *scope) { ...@@ -34,7 +34,7 @@ void TensorArrayBatchCleaner::CollectTensorArrays(framework::Scope *scope) {
CollectTensorArrays(kid); CollectTensorArrays(kid);
} }
VLOG(3) << "Collect " << arrays_.size() << " arrays"; VLOG(30) << "Collect " << arrays_.size() << " arrays";
flag_ = false; flag_ = false;
} }
} }
......
...@@ -77,7 +77,7 @@ void LoadPersistables(framework::Executor* executor, framework::Scope* scope, ...@@ -77,7 +77,7 @@ void LoadPersistables(framework::Executor* executor, framework::Scope* scope,
for (auto* var : global_block.AllVars()) { for (auto* var : global_block.AllVars()) {
if (IsPersistable(var)) { if (IsPersistable(var)) {
VLOG(3) << "persistable variable's name: " << var->Name(); VLOG(30) << "persistable variable's name: " << var->Name();
framework::VarDesc* new_var = load_block->Var(var->Name()); framework::VarDesc* new_var = load_block->Var(var->Name());
new_var->SetShape(var->GetShape()); new_var->SetShape(var->GetShape());
...@@ -120,7 +120,7 @@ std::unique_ptr<framework::ProgramDesc> Load(framework::Executor* executor, ...@@ -120,7 +120,7 @@ std::unique_ptr<framework::ProgramDesc> Load(framework::Executor* executor,
const std::string& dirname) { const std::string& dirname) {
std::string model_filename = dirname + "/__model__"; std::string model_filename = dirname + "/__model__";
std::string program_desc_str; std::string program_desc_str;
VLOG(3) << "loading model from " << model_filename; VLOG(30) << "loading model from " << model_filename;
ReadBinaryFile(model_filename, &program_desc_str); ReadBinaryFile(model_filename, &program_desc_str);
std::unique_ptr<framework::ProgramDesc> main_program( std::unique_ptr<framework::ProgramDesc> main_program(
......
...@@ -25,7 +25,7 @@ class ConcatOpConverter : public OpConverter { ...@@ -25,7 +25,7 @@ class ConcatOpConverter : public OpConverter {
public: public:
void operator()(const framework::proto::OpDesc& op, void operator()(const framework::proto::OpDesc& op,
const framework::Scope& scope, bool test_mode) override { const framework::Scope& scope, bool test_mode) override {
VLOG(4) << "convert a fluid mul op to tensorrt mul layer without bias"; VLOG(40) << "convert a fluid mul op to tensorrt mul layer without bias";
framework::OpDesc op_desc(op, nullptr); framework::OpDesc op_desc(op, nullptr);
// Declare inputs // Declare inputs
......
...@@ -25,7 +25,7 @@ class DropoutOpConverter : public OpConverter { ...@@ -25,7 +25,7 @@ class DropoutOpConverter : public OpConverter {
public: public:
void operator()(const framework::proto::OpDesc& op, void operator()(const framework::proto::OpDesc& op,
const framework::Scope& scope, bool test_mode) override { const framework::Scope& scope, bool test_mode) override {
VLOG(4) << "convert a fluid dropout op to tensorrt dropout layer"; VLOG(40) << "convert a fluid dropout op to tensorrt dropout layer";
framework::OpDesc op_desc(op, nullptr); framework::OpDesc op_desc(op, nullptr);
// Declare inputs // Declare inputs
auto* input1 = engine_->GetITensor(op_desc.Input("X")[0]); auto* input1 = engine_->GetITensor(op_desc.Input("X")[0]);
......
...@@ -52,7 +52,7 @@ class FcOpConverter : public OpConverter { ...@@ -52,7 +52,7 @@ class FcOpConverter : public OpConverter {
public: public:
void operator()(const framework::proto::OpDesc& op, void operator()(const framework::proto::OpDesc& op,
const framework::Scope& scope, bool test_mode) override { const framework::Scope& scope, bool test_mode) override {
VLOG(4) << "convert a fluid fc op to tensorrt fc layer without bias"; VLOG(40) << "convert a fluid fc op to tensorrt fc layer without bias";
framework::OpDesc op_desc(op, nullptr); framework::OpDesc op_desc(op, nullptr);
PADDLE_ENFORCE_EQ(op_desc.Input("X").size(), 1); PADDLE_ENFORCE_EQ(op_desc.Input("X").size(), 1);
......
...@@ -25,7 +25,7 @@ class MulOpConverter : public OpConverter { ...@@ -25,7 +25,7 @@ class MulOpConverter : public OpConverter {
public: public:
void operator()(const framework::proto::OpDesc& op, void operator()(const framework::proto::OpDesc& op,
const framework::Scope& scope, bool test_mode) override { const framework::Scope& scope, bool test_mode) override {
VLOG(4) << "convert a fluid mul op to tensorrt mul layer without bias"; VLOG(40) << "convert a fluid mul op to tensorrt mul layer without bias";
framework::OpDesc op_desc(op, nullptr); framework::OpDesc op_desc(op, nullptr);
// Declare inputs // Declare inputs
......
...@@ -25,7 +25,7 @@ class PadOpConverter : public OpConverter { ...@@ -25,7 +25,7 @@ class PadOpConverter : public OpConverter {
public: public:
void operator()(const framework::proto::OpDesc& op, void operator()(const framework::proto::OpDesc& op,
const framework::Scope& scope, bool test_mode) override { const framework::Scope& scope, bool test_mode) override {
VLOG(4) << "convert a fluid transpose op to tensorrt tranpose layer"; VLOG(40) << "convert a fluid transpose op to tensorrt tranpose layer";
framework::OpDesc op_desc(op, nullptr); framework::OpDesc op_desc(op, nullptr);
// Declare inputs // Declare inputs
......
...@@ -25,7 +25,7 @@ class Pool2dOpConverter : public OpConverter { ...@@ -25,7 +25,7 @@ class Pool2dOpConverter : public OpConverter {
public: public:
void operator()(const framework::proto::OpDesc& op, void operator()(const framework::proto::OpDesc& op,
const framework::Scope& scope, bool test_mode) override { const framework::Scope& scope, bool test_mode) override {
VLOG(4) VLOG(40)
<< "convert a fluid pool2d op to tensorrt pool2d layer without bias"; << "convert a fluid pool2d op to tensorrt pool2d layer without bias";
framework::OpDesc op_desc(op, nullptr); framework::OpDesc op_desc(op, nullptr);
// Declare inputs // Declare inputs
......
...@@ -25,7 +25,7 @@ class SoftMaxOpConverter : public OpConverter { ...@@ -25,7 +25,7 @@ class SoftMaxOpConverter : public OpConverter {
public: public:
void operator()(const framework::proto::OpDesc& op, void operator()(const framework::proto::OpDesc& op,
const framework::Scope& scope, bool test_mode) override { const framework::Scope& scope, bool test_mode) override {
VLOG(4) VLOG(40)
<< "convert a fluid softmax op to tensorrt softmax layer without bias"; << "convert a fluid softmax op to tensorrt softmax layer without bias";
framework::OpDesc op_desc(op, nullptr); framework::OpDesc op_desc(op, nullptr);
// Declare inputs // Declare inputs
......
...@@ -217,9 +217,9 @@ void single_test() { ...@@ -217,9 +217,9 @@ void single_test() {
LOG(INFO) << "sequence_length = " << seq_offset[seq_offset.size() - 1]; LOG(INFO) << "sequence_length = " << seq_offset[seq_offset.size() - 1];
float* data_o = static_cast<float*>(outputs[0].data.data()); float* data_o = static_cast<float*>(outputs[0].data.data());
VLOG(3) << "outputs[0].data.length() = " << outputs[0].data.length(); VLOG(30) << "outputs[0].data.length() = " << outputs[0].data.length();
for (size_t j = 0; j < outputs[0].data.length(); ++j) { for (size_t j = 0; j < outputs[0].data.length(); ++j) {
VLOG(3) << "output[" << j << "]: " << data_o[j]; VLOG(30) << "output[" << j << "]: " << data_o[j];
} }
} }
} }
......
...@@ -27,7 +27,7 @@ struct Record { ...@@ -27,7 +27,7 @@ struct Record {
}; };
Record ProcessALine(const std::string &line) { Record ProcessALine(const std::string &line) {
VLOG(3) << "process a line"; VLOG(30) << "process a line";
std::vector<std::string> columns; std::vector<std::string> columns;
split(line, '\t', &columns); split(line, '\t', &columns);
CHECK_EQ(columns.size(), 2UL) CHECK_EQ(columns.size(), 2UL)
...@@ -45,8 +45,8 @@ Record ProcessALine(const std::string &line) { ...@@ -45,8 +45,8 @@ Record ProcessALine(const std::string &line) {
for (auto &s : shape_strs) { for (auto &s : shape_strs) {
record.shape.push_back(std::stoi(s)); record.shape.push_back(std::stoi(s));
} }
VLOG(3) << "data size " << record.data.size(); VLOG(30) << "data size " << record.data.size();
VLOG(3) << "data shape size " << record.shape.size(); VLOG(30) << "data shape size " << record.shape.size();
return record; return record;
} }
......
...@@ -32,11 +32,11 @@ BuddyAllocator::BuddyAllocator( ...@@ -32,11 +32,11 @@ BuddyAllocator::BuddyAllocator(
system_allocator_(std::move(system_allocator)) {} system_allocator_(std::move(system_allocator)) {}
BuddyAllocator::~BuddyAllocator() { BuddyAllocator::~BuddyAllocator() {
VLOG(10) << "BuddyAllocator Disconstructor makes sure that all of these " VLOG(100) << "BuddyAllocator Disconstructor makes sure that all of these "
"have actually been freed"; "have actually been freed";
while (!pool_.empty()) { while (!pool_.empty()) {
auto block = static_cast<MemoryBlock*>(std::get<2>(*pool_.begin())); auto block = static_cast<MemoryBlock*>(std::get<2>(*pool_.begin()));
VLOG(10) << "Free from block (" << block << ", " << max_chunk_size_ << ")"; VLOG(100) << "Free from block (" << block << ", " << max_chunk_size_ << ")";
system_allocator_->Free(block, max_chunk_size_, block->index(cache_)); system_allocator_->Free(block, max_chunk_size_, block->index(cache_));
cache_.invalidate(block); cache_.invalidate(block);
...@@ -57,12 +57,12 @@ void* BuddyAllocator::Alloc(size_t unaligned_size) { ...@@ -57,12 +57,12 @@ void* BuddyAllocator::Alloc(size_t unaligned_size) {
// acquire the allocator lock // acquire the allocator lock
std::lock_guard<std::mutex> lock(mutex_); std::lock_guard<std::mutex> lock(mutex_);
VLOG(10) << "Allocate " << unaligned_size << " bytes from chunk size " VLOG(100) << "Allocate " << unaligned_size << " bytes from chunk size "
<< size; << size;
// if the allocation is huge, send directly to the system allocator // if the allocation is huge, send directly to the system allocator
if (size > max_chunk_size_) { if (size > max_chunk_size_) {
VLOG(10) << "Allocate from system allocator."; VLOG(100) << "Allocate from system allocator.";
return SystemAlloc(size); return SystemAlloc(size);
} }
...@@ -77,9 +77,9 @@ void* BuddyAllocator::Alloc(size_t unaligned_size) { ...@@ -77,9 +77,9 @@ void* BuddyAllocator::Alloc(size_t unaligned_size) {
return nullptr; return nullptr;
} }
} else { } else {
VLOG(10) << "Allocation from existing memory block " << std::get<2>(*it) VLOG(100) << "Allocation from existing memory block " << std::get<2>(*it)
<< " at address " << " at address "
<< reinterpret_cast<MemoryBlock*>(std::get<2>(*it))->data(); << reinterpret_cast<MemoryBlock*>(std::get<2>(*it))->data();
} }
total_used_ += size; total_used_ += size;
...@@ -96,10 +96,10 @@ void BuddyAllocator::Free(void* p) { ...@@ -96,10 +96,10 @@ void BuddyAllocator::Free(void* p) {
// Acquire the allocator lock // Acquire the allocator lock
std::lock_guard<std::mutex> lock(mutex_); std::lock_guard<std::mutex> lock(mutex_);
VLOG(10) << "Free from address " << block; VLOG(100) << "Free from address " << block;
if (block->type(cache_) == MemoryBlock::HUGE_CHUNK) { if (block->type(cache_) == MemoryBlock::HUGE_CHUNK) {
VLOG(10) << "Free directly from system allocator"; VLOG(100) << "Free directly from system allocator";
system_allocator_->Free(block, block->total_size(cache_), system_allocator_->Free(block, block->total_size(cache_),
block->index(cache_)); block->index(cache_));
...@@ -116,8 +116,8 @@ void BuddyAllocator::Free(void* p) { ...@@ -116,8 +116,8 @@ void BuddyAllocator::Free(void* p) {
// Trying to merge the right buddy // Trying to merge the right buddy
if (block->has_right_buddy(cache_)) { if (block->has_right_buddy(cache_)) {
VLOG(10) << "Merging this block " << block << " with its right buddy " VLOG(100) << "Merging this block " << block << " with its right buddy "
<< block->right_buddy(cache_); << block->right_buddy(cache_);
auto right_buddy = block->right_buddy(cache_); auto right_buddy = block->right_buddy(cache_);
...@@ -134,8 +134,8 @@ void BuddyAllocator::Free(void* p) { ...@@ -134,8 +134,8 @@ void BuddyAllocator::Free(void* p) {
// Trying to merge the left buddy // Trying to merge the left buddy
if (block->has_left_buddy(cache_)) { if (block->has_left_buddy(cache_)) {
VLOG(10) << "Merging this block " << block << " with its left buddy " VLOG(100) << "Merging this block " << block << " with its left buddy "
<< block->left_buddy(cache_); << block->left_buddy(cache_);
auto left_buddy = block->left_buddy(cache_); auto left_buddy = block->left_buddy(cache_);
...@@ -151,8 +151,8 @@ void BuddyAllocator::Free(void* p) { ...@@ -151,8 +151,8 @@ void BuddyAllocator::Free(void* p) {
} }
// Dumping this block into pool // Dumping this block into pool
VLOG(10) << "Inserting free block (" << block << ", " VLOG(100) << "Inserting free block (" << block << ", "
<< block->total_size(cache_) << ")"; << block->total_size(cache_) << ")";
pool_.insert( pool_.insert(
IndexSizeAddress(block->index(cache_), block->total_size(cache_), block)); IndexSizeAddress(block->index(cache_), block->total_size(cache_), block));
...@@ -174,7 +174,7 @@ void* BuddyAllocator::SystemAlloc(size_t size) { ...@@ -174,7 +174,7 @@ void* BuddyAllocator::SystemAlloc(size_t size) {
size_t index = 0; size_t index = 0;
void* p = system_allocator_->Alloc(&index, size); void* p = system_allocator_->Alloc(&index, size);
VLOG(10) << "Allocated " << p << " from system allocator."; VLOG(100) << "Allocated " << p << " from system allocator.";
if (p == nullptr) return nullptr; if (p == nullptr) return nullptr;
...@@ -200,8 +200,8 @@ BuddyAllocator::PoolSet::iterator BuddyAllocator::RefillPool() { ...@@ -200,8 +200,8 @@ BuddyAllocator::PoolSet::iterator BuddyAllocator::RefillPool() {
if (p == nullptr) return pool_.end(); if (p == nullptr) return pool_.end();
VLOG(10) << "Creating and inserting new block " << p VLOG(100) << "Creating and inserting new block " << p
<< " from system allocator"; << " from system allocator";
static_cast<MemoryBlock*>(p)->init(&cache_, MemoryBlock::FREE_CHUNK, index, static_cast<MemoryBlock*>(p)->init(&cache_, MemoryBlock::FREE_CHUNK, index,
max_chunk_size_, nullptr, nullptr); max_chunk_size_, nullptr, nullptr);
...@@ -245,19 +245,19 @@ void* BuddyAllocator::SplitToAlloc(BuddyAllocator::PoolSet::iterator it, ...@@ -245,19 +245,19 @@ void* BuddyAllocator::SplitToAlloc(BuddyAllocator::PoolSet::iterator it,
auto block = static_cast<MemoryBlock*>(std::get<2>(*it)); auto block = static_cast<MemoryBlock*>(std::get<2>(*it));
pool_.erase(it); pool_.erase(it);
VLOG(10) << "Split block (" << block << ", " << block->total_size(cache_) VLOG(100) << "Split block (" << block << ", " << block->total_size(cache_)
<< ") into"; << ") into";
block->split(&cache_, size); block->split(&cache_, size);
VLOG(10) << "Left block (" << block << ", " << block->total_size(cache_) VLOG(100) << "Left block (" << block << ", " << block->total_size(cache_)
<< ")"; << ")";
block->set_type(&cache_, MemoryBlock::ARENA_CHUNK); block->set_type(&cache_, MemoryBlock::ARENA_CHUNK);
// the rest of memory if exist // the rest of memory if exist
if (block->has_right_buddy(cache_)) { if (block->has_right_buddy(cache_)) {
if (block->right_buddy(cache_)->type(cache_) == MemoryBlock::FREE_CHUNK) { if (block->right_buddy(cache_)->type(cache_) == MemoryBlock::FREE_CHUNK) {
VLOG(10) << "Insert right block (" << block->right_buddy(cache_) << ", " VLOG(100) << "Insert right block (" << block->right_buddy(cache_) << ", "
<< block->right_buddy(cache_)->total_size(cache_) << ")"; << block->right_buddy(cache_)->total_size(cache_) << ")";
pool_.insert( pool_.insert(
IndexSizeAddress(block->right_buddy(cache_)->index(cache_), IndexSizeAddress(block->right_buddy(cache_)->index(cache_),
...@@ -284,7 +284,7 @@ void BuddyAllocator::CleanIdleFallBackAlloc() { ...@@ -284,7 +284,7 @@ void BuddyAllocator::CleanIdleFallBackAlloc() {
return; return;
} }
VLOG(10) << "Return block " << block << " to fallback allocator."; VLOG(100) << "Return block " << block << " to fallback allocator.";
system_allocator_->Free(block, max_chunk_size_, block->index(cache_)); system_allocator_->Free(block, max_chunk_size_, block->index(cache_));
cache_.invalidate(block); cache_.invalidate(block);
...@@ -320,7 +320,7 @@ void BuddyAllocator::CleanIdleNormalAlloc() { ...@@ -320,7 +320,7 @@ void BuddyAllocator::CleanIdleNormalAlloc() {
MemoryBlock* block = static_cast<MemoryBlock*>(std::get<2>(*pool)); MemoryBlock* block = static_cast<MemoryBlock*>(std::get<2>(*pool));
VLOG(10) << "Return block " << block << " to base allocator."; VLOG(100) << "Return block " << block << " to base allocator.";
system_allocator_->Free(block, max_chunk_size_, block->index(cache_)); system_allocator_->Free(block, max_chunk_size_, block->index(cache_));
cache_.invalidate(block); cache_.invalidate(block);
......
...@@ -29,7 +29,7 @@ MemoryBlock::Desc MetadataCache::load(const MemoryBlock* block) const { ...@@ -29,7 +29,7 @@ MemoryBlock::Desc MetadataCache::load(const MemoryBlock* block) const {
return existing_desc->second; return existing_desc->second;
} else { } else {
auto* desc = reinterpret_cast<const MemoryBlock::Desc*>(block); auto* desc = reinterpret_cast<const MemoryBlock::Desc*>(block);
VLOG(10) << "Load MemoryBlock::Desc type=" << desc->type; VLOG(100) << "Load MemoryBlock::Desc type=" << desc->type;
PADDLE_ASSERT(desc->check_guards()); PADDLE_ASSERT(desc->check_guards());
return *reinterpret_cast<const MemoryBlock::Desc*>(block); return *reinterpret_cast<const MemoryBlock::Desc*>(block);
} }
......
...@@ -71,18 +71,18 @@ struct NaiveAllocator { ...@@ -71,18 +71,18 @@ struct NaiveAllocator {
template <> template <>
void* Alloc<platform::CPUPlace>(platform::CPUPlace place, size_t size) { void* Alloc<platform::CPUPlace>(platform::CPUPlace place, size_t size) {
VLOG(10) << "Allocate " << size << " bytes on " << platform::Place(place); VLOG(100) << "Allocate " << size << " bytes on " << platform::Place(place);
void* p = GetCPUBuddyAllocator()->Alloc(size); void* p = GetCPUBuddyAllocator()->Alloc(size);
if (FLAGS_init_allocated_mem) { if (FLAGS_init_allocated_mem) {
memset(p, 0xEF, size); memset(p, 0xEF, size);
} }
VLOG(10) << " pointer=" << p; VLOG(100) << " pointer=" << p;
return p; return p;
} }
template <> template <>
void Free<platform::CPUPlace>(platform::CPUPlace place, void* p) { void Free<platform::CPUPlace>(platform::CPUPlace place, void* p) {
VLOG(10) << "Free pointer=" << p << " on " << platform::Place(place); VLOG(100) << "Free pointer=" << p << " on " << platform::Place(place);
GetCPUBuddyAllocator()->Free(p); GetCPUBuddyAllocator()->Free(p);
} }
...@@ -110,12 +110,12 @@ BuddyAllocator* GetGPUBuddyAllocator(int gpu_id) { ...@@ -110,12 +110,12 @@ BuddyAllocator* GetGPUBuddyAllocator(int gpu_id) {
std::unique_ptr<detail::SystemAllocator>(new detail::GPUAllocator(i)), std::unique_ptr<detail::SystemAllocator>(new detail::GPUAllocator(i)),
platform::GpuMinChunkSize(), platform::GpuMaxChunkSize()); platform::GpuMinChunkSize(), platform::GpuMaxChunkSize());
VLOG(10) << "\n\nNOTE: each GPU device use " VLOG(100) << "\n\nNOTE: each GPU device use "
<< FLAGS_fraction_of_gpu_memory_to_use * 100 << FLAGS_fraction_of_gpu_memory_to_use * 100
<< "% of GPU memory.\n" << "% of GPU memory.\n"
<< "You can set GFlags environment variable '" << "You can set GFlags environment variable '"
<< "FLAGS_fraction_of_gpu_memory_to_use" << "FLAGS_fraction_of_gpu_memory_to_use"
<< "' to change the fraction of GPU usage.\n\n"; << "' to change the fraction of GPU usage.\n\n";
} }
}); });
......
...@@ -95,7 +95,7 @@ class ActivationGradKernel ...@@ -95,7 +95,7 @@ class ActivationGradKernel
auto x = framework::EigenVector<T>::Flatten(*X); auto x = framework::EigenVector<T>::Flatten(*X);
functor(*place, x, out, dout, dx); functor(*place, x, out, dout, dx);
} else { } else {
VLOG(10) << " Inplace activation "; VLOG(100) << " Inplace activation ";
auto x = framework::EigenVector<T>::Flatten(*dX); auto x = framework::EigenVector<T>::Flatten(*dX);
functor(*place, x, out, dout, dx); functor(*place, x, out, dout, dx);
} }
......
...@@ -297,7 +297,7 @@ class AdamOpKernel : public framework::OpKernel<T> { ...@@ -297,7 +297,7 @@ class AdamOpKernel : public framework::OpKernel<T> {
auto& grad = auto& grad =
Ref(ctx.Input<framework::SelectedRows>("Grad"), "Must set Grad"); Ref(ctx.Input<framework::SelectedRows>("Grad"), "Must set Grad");
if (grad.rows().size() == 0) { if (grad.rows().size() == 0) {
VLOG(3) << "grad row size is 0!!"; VLOG(30) << "grad row size is 0!!";
return; return;
} }
......
...@@ -49,7 +49,7 @@ class ArrayOp : public framework::OperatorBase { ...@@ -49,7 +49,7 @@ class ArrayOp : public framework::OperatorBase {
} else { } else {
offset = static_cast<size_t>(*i_tensor.data<int64_t>()); offset = static_cast<size_t>(*i_tensor.data<int64_t>());
} }
VLOG(10) << " Offset = " << offset; VLOG(100) << " Offset = " << offset;
return offset; return offset;
} }
}; };
......
...@@ -148,8 +148,8 @@ class ArrayToLoDTensorOp : public framework::OperatorBase { ...@@ -148,8 +148,8 @@ class ArrayToLoDTensorOp : public framework::OperatorBase {
size_t start_offset = lod_and_offset.second.first; size_t start_offset = lod_and_offset.second.first;
size_t end_offset = lod_and_offset.second.second; size_t end_offset = lod_and_offset.second.second;
VLOG(10) << "idx=" << idx << " x_idx=" << x_idx << " [" VLOG(100) << "idx=" << idx << " x_idx=" << x_idx << " ["
<< ", " << end_offset << "]"; << ", " << end_offset << "]";
// Copy data // Copy data
PADDLE_ENFORCE_GE(end_offset, start_offset); PADDLE_ENFORCE_GE(end_offset, start_offset);
size_t len = end_offset - start_offset; size_t len = end_offset - start_offset;
......
...@@ -96,7 +96,7 @@ class BatchNormKernel<platform::CUDADeviceContext, T> ...@@ -96,7 +96,7 @@ class BatchNormKernel<platform::CUDADeviceContext, T>
mode_ = CUDNN_BATCHNORM_SPATIAL; mode_ = CUDNN_BATCHNORM_SPATIAL;
#endif #endif
VLOG(3) << "Setting descriptors."; VLOG(30) << "Setting descriptors.";
std::vector<int> dims; std::vector<int> dims;
std::vector<int> strides; std::vector<int> strides;
if (data_layout == DataLayout::kNCHW) { if (data_layout == DataLayout::kNCHW) {
......
...@@ -33,11 +33,11 @@ void BeamSearch::operator()(const framework::LoDTensor &pre_ids, ...@@ -33,11 +33,11 @@ void BeamSearch::operator()(const framework::LoDTensor &pre_ids,
auto items = SelectTopBeamSizeItems(pre_ids, pre_scores); auto items = SelectTopBeamSizeItems(pre_ids, pre_scores);
auto selected_items = ToMap(items, high_level.back()); auto selected_items = ToMap(items, high_level.back());
VLOG(3) << "selected_items:"; VLOG(30) << "selected_items:";
for (size_t i = 0; i < selected_items.size(); ++i) { for (size_t i = 0; i < selected_items.size(); ++i) {
VLOG(3) << "offset:" << i; VLOG(30) << "offset:" << i;
for (auto &item : selected_items[i]) { for (auto &item : selected_items[i]) {
VLOG(3) << ItemToString(item); VLOG(30) << ItemToString(item);
} }
} }
...@@ -138,11 +138,11 @@ std::vector<std::vector<BeamSearch::Item>> BeamSearch::SelectTopBeamSizeItems( ...@@ -138,11 +138,11 @@ std::vector<std::vector<BeamSearch::Item>> BeamSearch::SelectTopBeamSizeItems(
} }
result.emplace_back(items); result.emplace_back(items);
} }
VLOG(3) << "SelectTopBeamSizeItems result size " << result.size(); VLOG(30) << "SelectTopBeamSizeItems result size " << result.size();
for (auto &items : result) { for (auto &items : result) {
VLOG(3) << "item set:"; VLOG(30) << "item set:";
for (auto &item : items) { for (auto &item : items) {
VLOG(3) << ItemToString(item); VLOG(30) << ItemToString(item);
} }
} }
......
...@@ -46,8 +46,8 @@ class CheckpointNotifyOp : public framework::OperatorBase { ...@@ -46,8 +46,8 @@ class CheckpointNotifyOp : public framework::OperatorBase {
auto lookup_table_save_dir = auto lookup_table_save_dir =
string::Sprintf("%s/%s_%d", dir, lookup_table_name, i); string::Sprintf("%s/%s_%d", dir, lookup_table_name, i);
rpc_client->AsyncCheckpointNotify(epmap[i], lookup_table_save_dir); rpc_client->AsyncCheckpointNotify(epmap[i], lookup_table_save_dir);
VLOG(3) << "checkpoint notify sending lookup table: " << lookup_table_name VLOG(30) << "checkpoint notify sending lookup table: "
<< " and dir:" << dir << " to " << epmap[i]; << lookup_table_name << " and dir:" << dir << " to " << epmap[i];
} }
PADDLE_ENFORCE(rpc_client->Wait(), "internal error in RPCClient"); PADDLE_ENFORCE(rpc_client->Wait(), "internal error in RPCClient");
} }
......
...@@ -37,7 +37,7 @@ class ConcatOp : public framework::OperatorWithKernel { ...@@ -37,7 +37,7 @@ class ConcatOp : public framework::OperatorWithKernel {
PADDLE_ENFORCE_GT(n, 0, "Input tensors count should > 0."); PADDLE_ENFORCE_GT(n, 0, "Input tensors count should > 0.");
if (n == 1) { if (n == 1) {
VLOG(3) << "Warning: concat op have only one input, may waste memory"; VLOG(30) << "Warning: concat op have only one input, may waste memory";
} }
auto out_dims = ins[0]; auto out_dims = ins[0];
......
...@@ -143,11 +143,11 @@ class CUDNNConvOpKernel : public framework::OpKernel<T> { ...@@ -143,11 +143,11 @@ class CUDNNConvOpKernel : public framework::OpKernel<T> {
cudnn_conv_desc, CUDNN_TENSOR_OP_MATH)); cudnn_conv_desc, CUDNN_TENSOR_OP_MATH));
// Currently tensor core is only enabled using this algo // Currently tensor core is only enabled using this algo
algo = CUDNN_CONVOLUTION_FWD_ALGO_IMPLICIT_PRECOMP_GEMM; algo = CUDNN_CONVOLUTION_FWD_ALGO_IMPLICIT_PRECOMP_GEMM;
VLOG(5) << "use cudnn_tensor_op_math"; VLOG(50) << "use cudnn_tensor_op_math";
} else { } else {
CUDNN_ENFORCE(platform::dynload::cudnnSetConvolutionMathType( CUDNN_ENFORCE(platform::dynload::cudnnSetConvolutionMathType(
cudnn_conv_desc, CUDNN_DEFAULT_MATH)); cudnn_conv_desc, CUDNN_DEFAULT_MATH));
VLOG(5) << "NOT use cudnn_tensor_op_math"; VLOG(50) << "NOT use cudnn_tensor_op_math";
} }
#endif #endif
......
...@@ -133,10 +133,10 @@ void AsyncBRPCServer::StartServer() { ...@@ -133,10 +133,10 @@ void AsyncBRPCServer::StartServer() {
void AsyncBRPCServer::ShutDownImpl() { server_.Stop(1000); } void AsyncBRPCServer::ShutDownImpl() { server_.Stop(1000); }
void AsyncBRPCServer::WaitServerReady() { void AsyncBRPCServer::WaitServerReady() {
VLOG(3) << "AsyncGRPCServer is wait server ready"; VLOG(30) << "AsyncGRPCServer is wait server ready";
std::unique_lock<std::mutex> lock(this->mutex_ready_); std::unique_lock<std::mutex> lock(this->mutex_ready_);
condition_ready_.wait(lock, [=] { return this->ready_ == 1; }); condition_ready_.wait(lock, [=] { return this->ready_ == 1; });
VLOG(3) << "AsyncGRPCServer WaitSeverReady"; VLOG(30) << "AsyncGRPCServer WaitSeverReady";
} }
}; // namespace distributed }; // namespace distributed
......
...@@ -38,7 +38,7 @@ void GRPCClient::SendComplete() { ...@@ -38,7 +38,7 @@ void GRPCClient::SendComplete() {
std::unique_lock<std::mutex> lk(completed_mutex_); std::unique_lock<std::mutex> lk(completed_mutex_);
if (!completed_) { if (!completed_) {
for (auto& it : channels_) { for (auto& it : channels_) {
VLOG(3) << "send complete message to " << it.first; VLOG(30) << "send complete message to " << it.first;
this->AsyncSendComplete(it.first); this->AsyncSendComplete(it.first);
} }
PADDLE_ENFORCE(this->Wait(), "internal grpc error"); PADDLE_ENFORCE(this->Wait(), "internal grpc error");
...@@ -81,7 +81,7 @@ VarHandlePtr GRPCClient::AsyncSendVar(const std::string& ep, ...@@ -81,7 +81,7 @@ VarHandlePtr GRPCClient::AsyncSendVar(const std::string& ep,
::grpc::ByteBuffer req; ::grpc::ByteBuffer req;
SerializeToByteBuffer(var_name_val, var, *p_ctx, &req, "", trainer_id_); SerializeToByteBuffer(var_name_val, var, *p_ctx, &req, "", trainer_id_);
VLOG(3) << s->GetVarHandlePtr()->String() << " begin"; VLOG(30) << s->GetVarHandlePtr()->String() << " begin";
// stub context // stub context
s->response_call_back_ = nullptr; s->response_call_back_ = nullptr;
...@@ -142,7 +142,7 @@ VarHandlePtr GRPCClient::AsyncGetVar(const std::string& ep, ...@@ -142,7 +142,7 @@ VarHandlePtr GRPCClient::AsyncGetVar(const std::string& ep,
::grpc::ByteBuffer buf; ::grpc::ByteBuffer buf;
RequestToByteBuffer<sendrecv::VariableMessage>(req, &buf); RequestToByteBuffer<sendrecv::VariableMessage>(req, &buf);
VLOG(3) << s->GetVarHandlePtr()->String() << " begin"; VLOG(30) << s->GetVarHandlePtr()->String() << " begin";
// stub context // stub context
s->response_call_back_ = ProcGetResponse; s->response_call_back_ = ProcGetResponse;
...@@ -190,7 +190,7 @@ VarHandlePtr GRPCClient::AsyncPrefetchVar(const std::string& ep, ...@@ -190,7 +190,7 @@ VarHandlePtr GRPCClient::AsyncPrefetchVar(const std::string& ep,
::grpc::ByteBuffer req; ::grpc::ByteBuffer req;
SerializeToByteBuffer(in_var_name_val, var, *p_ctx, &req, out_var_name_val); SerializeToByteBuffer(in_var_name_val, var, *p_ctx, &req, out_var_name_val);
VLOG(3) << s->GetVarHandlePtr()->String() << " begin"; VLOG(30) << s->GetVarHandlePtr()->String() << " begin";
// stub context // stub context
s->response_call_back_ = ProcGetResponse; s->response_call_back_ = ProcGetResponse;
...@@ -328,14 +328,14 @@ void GRPCClient::Proceed() { ...@@ -328,14 +328,14 @@ void GRPCClient::Proceed() {
void* tag = nullptr; void* tag = nullptr;
bool ok = false; bool ok = false;
VLOG(3) << "GRPCClient Proceed begin"; VLOG(30) << "GRPCClient Proceed begin";
while (!stopped_ && cq_.Next(&tag, &ok)) { while (!stopped_ && cq_.Next(&tag, &ok)) {
BaseProcessor* c = static_cast<BaseProcessor*>(tag); BaseProcessor* c = static_cast<BaseProcessor*>(tag);
GPR_ASSERT(ok); GPR_ASSERT(ok);
PADDLE_ENFORCE(c); PADDLE_ENFORCE(c);
if (c->status_.ok()) { if (c->status_.ok()) {
VLOG(3) << c->GetVarHandlePtr()->String() << " process"; VLOG(30) << c->GetVarHandlePtr()->String() << " process";
c->Process(); c->Process();
} else if (c->status_.error_code() == grpc::StatusCode::DEADLINE_EXCEEDED) { } else if (c->status_.error_code() == grpc::StatusCode::DEADLINE_EXCEEDED) {
// FIXME(gongwb): parse error_details? // FIXME(gongwb): parse error_details?
...@@ -370,7 +370,7 @@ void GRPCClient::Proceed() { ...@@ -370,7 +370,7 @@ void GRPCClient::Proceed() {
sync_cond_.notify_all(); sync_cond_.notify_all();
} }
} }
VLOG(3) << "GRPCClient Proceed end"; VLOG(30) << "GRPCClient Proceed end";
} }
std::shared_ptr<grpc::Channel> GRPCClient::GetChannel(const std::string& ep) { std::shared_ptr<grpc::Channel> GRPCClient::GetChannel(const std::string& ep) {
......
...@@ -98,7 +98,7 @@ class RequestSend final : public RequestBase { ...@@ -98,7 +98,7 @@ class RequestSend final : public RequestBase {
void Process() override { void Process() override {
std::string varname = GetReqName(); std::string varname = GetReqName();
VLOG(4) << "RequestSend var_name:" << varname; VLOG(40) << "RequestSend var_name:" << varname;
auto scope = request_->GetMutableLocalScope(); auto scope = request_->GetMutableLocalScope();
auto invar = request_->GetVar(); auto invar = request_->GetVar();
...@@ -135,7 +135,7 @@ class RequestGet final : public RequestBase { ...@@ -135,7 +135,7 @@ class RequestGet final : public RequestBase {
// proc request. // proc request.
std::string varname = request_.varname(); std::string varname = request_.varname();
int trainer_id = request_.trainer_id(); int trainer_id = request_.trainer_id();
VLOG(4) << "RequestGet " << varname; VLOG(40) << "RequestGet " << varname;
auto scope = request_handler_->scope(); auto scope = request_handler_->scope();
auto invar = scope->FindVar(varname); auto invar = scope->FindVar(varname);
...@@ -182,8 +182,8 @@ class RequestPrefetch final : public RequestBase { ...@@ -182,8 +182,8 @@ class RequestPrefetch final : public RequestBase {
std::string in_var_name = request_->Varname(); std::string in_var_name = request_->Varname();
std::string out_var_name = request_->OutVarname(); std::string out_var_name = request_->OutVarname();
int trainer_id = request_->GetTrainerId(); int trainer_id = request_->GetTrainerId();
VLOG(4) << "RequestPrefetch, in_var_name: " << in_var_name VLOG(40) << "RequestPrefetch, in_var_name: " << in_var_name
<< " out_var_name: " << out_var_name; << " out_var_name: " << out_var_name;
auto scope = request_->GetMutableLocalScope(); auto scope = request_->GetMutableLocalScope();
auto invar = scope->FindVar(in_var_name); auto invar = scope->FindVar(in_var_name);
...@@ -231,8 +231,8 @@ class RequestCheckpointNotify final : public RequestBase { ...@@ -231,8 +231,8 @@ class RequestCheckpointNotify final : public RequestBase {
std::string checkpoint_dir = request_->OutVarname(); std::string checkpoint_dir = request_->OutVarname();
int trainer_id = request_->GetTrainerId(); int trainer_id = request_->GetTrainerId();
VLOG(4) << "RequestCheckpointNotify notify: " << checkpoint_notify VLOG(40) << "RequestCheckpointNotify notify: " << checkpoint_notify
<< ", dir: " << checkpoint_dir; << ", dir: " << checkpoint_dir;
request_handler_->Handle(checkpoint_notify, scope, nullptr, nullptr, request_handler_->Handle(checkpoint_notify, scope, nullptr, nullptr,
trainer_id, checkpoint_dir); trainer_id, checkpoint_dir);
...@@ -246,10 +246,10 @@ class RequestCheckpointNotify final : public RequestBase { ...@@ -246,10 +246,10 @@ class RequestCheckpointNotify final : public RequestBase {
}; };
void AsyncGRPCServer::WaitServerReady() { void AsyncGRPCServer::WaitServerReady() {
VLOG(4) << "AsyncGRPCServer is wait server ready"; VLOG(40) << "AsyncGRPCServer is wait server ready";
std::unique_lock<std::mutex> lock(this->mutex_ready_); std::unique_lock<std::mutex> lock(this->mutex_ready_);
condition_ready_.wait(lock, [=] { return this->ready_ == 1; }); condition_ready_.wait(lock, [=] { return this->ready_ == 1; });
VLOG(4) << "AsyncGRPCServer WaitSeverReady"; VLOG(40) << "AsyncGRPCServer WaitSeverReady";
} }
void AsyncGRPCServer::StartServer() { void AsyncGRPCServer::StartServer() {
...@@ -282,14 +282,15 @@ void AsyncGRPCServer::StartServer() { ...@@ -282,14 +282,15 @@ void AsyncGRPCServer::StartServer() {
reqs.reserve(kRequestBufSize); reqs.reserve(kRequestBufSize);
for (int i = 0; i < kRequestBufSize; i++) { for (int i = 0; i < kRequestBufSize; i++) {
VLOG(6) << "TryToRegisterNewOne on RPC NAME: " << rpc_name << " I: " << i; VLOG(60) << "TryToRegisterNewOne on RPC NAME: " << rpc_name
<< " I: " << i;
TryToRegisterNewOne(rpc_name, i); TryToRegisterNewOne(rpc_name, i);
} }
for (int i = 0; i < threadnum; i++) { for (int i = 0; i < threadnum; i++) {
rpc_threads_[rpc_name].emplace_back(new std::thread(std::bind( rpc_threads_[rpc_name].emplace_back(new std::thread(std::bind(
&AsyncGRPCServer::HandleRequest, this, cq.get(), rpc_name, f))); &AsyncGRPCServer::HandleRequest, this, cq.get(), rpc_name, f)));
VLOG(4) << t.first << " creates threads!"; VLOG(40) << t.first << " creates threads!";
} }
} }
...@@ -306,7 +307,7 @@ void AsyncGRPCServer::StartServer() { ...@@ -306,7 +307,7 @@ void AsyncGRPCServer::StartServer() {
auto& threads = t.second; auto& threads = t.second;
for (size_t i = 0; i < threads.size(); ++i) { for (size_t i = 0; i < threads.size(); ++i) {
threads[i]->join(); threads[i]->join();
VLOG(4) << t.first << " threads ends!"; VLOG(40) << t.first << " threads ends!";
} }
} }
} }
...@@ -314,7 +315,7 @@ void AsyncGRPCServer::StartServer() { ...@@ -314,7 +315,7 @@ void AsyncGRPCServer::StartServer() {
void AsyncGRPCServer::ShutdownQueue() { void AsyncGRPCServer::ShutdownQueue() {
for (auto& t : rpc_cq_) { for (auto& t : rpc_cq_) {
t.second->Shutdown(); t.second->Shutdown();
VLOG(4) << t.first << " queue shutdown!"; VLOG(40) << t.first << " queue shutdown!";
} }
} }
...@@ -323,7 +324,7 @@ void AsyncGRPCServer::ShutDownImpl() { ...@@ -323,7 +324,7 @@ void AsyncGRPCServer::ShutDownImpl() {
is_shut_down_ = true; is_shut_down_ = true;
ShutdownQueue(); ShutdownQueue();
VLOG(4) << "server_ shutdown!"; VLOG(40) << "server_ shutdown!";
server_->Shutdown(); server_->Shutdown();
} }
...@@ -331,12 +332,12 @@ void AsyncGRPCServer::TryToRegisterNewOne(const std::string& rpc_name, ...@@ -331,12 +332,12 @@ void AsyncGRPCServer::TryToRegisterNewOne(const std::string& rpc_name,
int req_id) { int req_id) {
std::unique_lock<std::mutex> lock(cq_mutex_); std::unique_lock<std::mutex> lock(cq_mutex_);
if (is_shut_down_) { if (is_shut_down_) {
VLOG(4) << "shutdown, do not TryToRegisterNewSendOne"; VLOG(40) << "shutdown, do not TryToRegisterNewSendOne";
return; return;
} }
VLOG(4) << "TryToRegisterNewOne on RPC NAME: " << rpc_name VLOG(40) << "TryToRegisterNewOne on RPC NAME: " << rpc_name
<< " REQ ID: " << req_id; << " REQ ID: " << req_id;
auto& reqs = rpc_reqs_[rpc_name]; auto& reqs = rpc_reqs_[rpc_name];
auto& handler = rpc_call_map_[rpc_name]; auto& handler = rpc_call_map_[rpc_name];
...@@ -357,7 +358,7 @@ void AsyncGRPCServer::TryToRegisterNewOne(const std::string& rpc_name, ...@@ -357,7 +358,7 @@ void AsyncGRPCServer::TryToRegisterNewOne(const std::string& rpc_name,
reqs[req_id] = b; reqs[req_id] = b;
VLOG(4) << "Create RequestSend status:" << b->Status(); VLOG(40) << "Create RequestSend status:" << b->Status();
} }
void AsyncGRPCServer::HandleRequest( void AsyncGRPCServer::HandleRequest(
...@@ -367,15 +368,15 @@ void AsyncGRPCServer::HandleRequest( ...@@ -367,15 +368,15 @@ void AsyncGRPCServer::HandleRequest(
bool ok = false; bool ok = false;
while (true) { while (true) {
VLOG(4) << "HandleRequest " << rpc_name << " wait next"; VLOG(40) << "HandleRequest " << rpc_name << " wait next";
if (!cq->Next(&tag, &ok)) { if (!cq->Next(&tag, &ok)) {
VLOG(3) << "CompletionQueue " << rpc_name << " shutdown!"; VLOG(30) << "CompletionQueue " << rpc_name << " shutdown!";
break; break;
} }
int req_id = static_cast<int>(reinterpret_cast<intptr_t>(tag)); int req_id = static_cast<int>(reinterpret_cast<intptr_t>(tag));
VLOG(4) << "HandleRequest " << rpc_name << ", req_id:" << req_id VLOG(40) << "HandleRequest " << rpc_name << ", req_id:" << req_id
<< " get next"; << " get next";
auto& reqs = rpc_reqs_[rpc_name]; auto& reqs = rpc_reqs_[rpc_name];
RequestBase* base = nullptr; RequestBase* base = nullptr;
...@@ -385,7 +386,7 @@ void AsyncGRPCServer::HandleRequest( ...@@ -385,7 +386,7 @@ void AsyncGRPCServer::HandleRequest(
base = reqs[req_id]; base = reqs[req_id];
} }
VLOG(3) << base->Status2String(rpc_name); VLOG(30) << base->Status2String(rpc_name);
// reference: // reference:
// https://github.com/tensorflow/tensorflow/issues/5596 // https://github.com/tensorflow/tensorflow/issues/5596
......
...@@ -75,7 +75,7 @@ class VarHandle { ...@@ -75,7 +75,7 @@ class VarHandle {
wait_cond_.wait(lk, [this] { return status_ != kDefaultState; }); wait_cond_.wait(lk, [this] { return status_ != kDefaultState; });
ret = status_; ret = status_;
} }
VLOG(7) << "VarHandle wait:" << ret; VLOG(70) << "VarHandle wait:" << ret;
return ret != kErrorState; return ret != kErrorState;
} }
...@@ -84,7 +84,7 @@ class VarHandle { ...@@ -84,7 +84,7 @@ class VarHandle {
std::unique_lock<std::mutex> lk(sync_mutex_); std::unique_lock<std::mutex> lk(sync_mutex_);
status_ = ok ? kFinishState : kErrorState; status_ = ok ? kFinishState : kErrorState;
} }
VLOG(7) << "VarHandle finish:" << ok; VLOG(70) << "VarHandle finish:" << ok;
wait_cond_.notify_all(); wait_cond_.notify_all();
} }
......
...@@ -38,19 +38,19 @@ bool RequestSendHandler::Handle(const std::string& varname, ...@@ -38,19 +38,19 @@ bool RequestSendHandler::Handle(const std::string& varname,
framework::Variable** outvar, framework::Variable** outvar,
const int trainer_id, const int trainer_id,
const std::string& out_var_name) { const std::string& out_var_name) {
VLOG(4) << "RequestSendHandler:" << varname; VLOG(40) << "RequestSendHandler:" << varname;
// Sync // Sync
if (varname == BATCH_BARRIER_MESSAGE) { if (varname == BATCH_BARRIER_MESSAGE) {
VLOG(3) << "sync: recv BATCH_BARRIER_MESSAGE"; VLOG(30) << "sync: recv BATCH_BARRIER_MESSAGE";
rpc_server_->IncreaseBatchBarrier(kRequestSend); rpc_server_->IncreaseBatchBarrier(kRequestSend);
} else if (varname == COMPLETE_MESSAGE) { } else if (varname == COMPLETE_MESSAGE) {
VLOG(3) << "sync: recv complete message"; VLOG(30) << "sync: recv complete message";
rpc_server_->Complete(); rpc_server_->Complete();
} else { } else {
// Async // Async
if (!sync_mode_) { if (!sync_mode_) {
VLOG(3) << "async process var: " << varname; VLOG(30) << "async process var: " << varname;
try { try {
executor_->RunPreparedContext((*grad_to_prepared_ctx_)[varname].get(), executor_->RunPreparedContext((*grad_to_prepared_ctx_)[varname].get(),
scope); scope);
...@@ -61,7 +61,7 @@ bool RequestSendHandler::Handle(const std::string& varname, ...@@ -61,7 +61,7 @@ bool RequestSendHandler::Handle(const std::string& varname,
return true; return true;
} else { // sync } else { // sync
rpc_server_->WaitCond(kRequestSend); rpc_server_->WaitCond(kRequestSend);
VLOG(3) << "sync: processing received var: " << varname; VLOG(30) << "sync: processing received var: " << varname;
if (invar == nullptr) { if (invar == nullptr) {
LOG(FATAL) << "sync: Can not find server side var: " << varname; LOG(FATAL) << "sync: Can not find server side var: " << varname;
...@@ -78,10 +78,10 @@ bool RequestGetHandler::Handle(const std::string& varname, ...@@ -78,10 +78,10 @@ bool RequestGetHandler::Handle(const std::string& varname,
framework::Variable** outvar, framework::Variable** outvar,
const int trainer_id, const int trainer_id,
const std::string& out_var_name) { const std::string& out_var_name) {
VLOG(4) << "RequestGetHandler:" << varname; VLOG(40) << "RequestGetHandler:" << varname;
if (sync_mode_) { if (sync_mode_) {
if (varname == FETCH_BARRIER_MESSAGE) { if (varname == FETCH_BARRIER_MESSAGE) {
VLOG(3) << "sync: recv fetch barrier message"; VLOG(30) << "sync: recv fetch barrier message";
rpc_server_->IncreaseBatchBarrier(kRequestGet); rpc_server_->IncreaseBatchBarrier(kRequestGet);
} else { } else {
rpc_server_->WaitCond(kRequestGet); rpc_server_->WaitCond(kRequestGet);
...@@ -93,13 +93,14 @@ bool RequestGetHandler::Handle(const std::string& varname, ...@@ -93,13 +93,14 @@ bool RequestGetHandler::Handle(const std::string& varname,
// NOTE: the format is determined by distributed_transpiler.py // NOTE: the format is determined by distributed_transpiler.py
std::string param_bak_name = std::string param_bak_name =
string::Sprintf("%s.trainer_%d_bak", varname, trainer_id); string::Sprintf("%s.trainer_%d_bak", varname, trainer_id);
VLOG(3) << "getting " << param_bak_name << " trainer_id " << trainer_id; VLOG(30) << "getting " << param_bak_name << " trainer_id "
<< trainer_id;
auto var = scope_->FindVar(varname); auto var = scope_->FindVar(varname);
auto t_orig = var->Get<framework::LoDTensor>(); auto t_orig = var->Get<framework::LoDTensor>();
auto param_bak = scope_->Var(param_bak_name); auto param_bak = scope_->Var(param_bak_name);
auto t = param_bak->GetMutable<framework::LoDTensor>(); auto t = param_bak->GetMutable<framework::LoDTensor>();
t->mutable_data(dev_ctx_->GetPlace(), t_orig.type()); t->mutable_data(dev_ctx_->GetPlace(), t_orig.type());
VLOG(3) << "copying " << varname << " to " << param_bak_name; VLOG(30) << "copying " << varname << " to " << param_bak_name;
framework::TensorCopy(t_orig, dev_ctx_->GetPlace(), t); framework::TensorCopy(t_orig, dev_ctx_->GetPlace(), t);
} }
*outvar = scope_->FindVar(varname); *outvar = scope_->FindVar(varname);
...@@ -114,7 +115,7 @@ bool RequestPrefetchHandler::Handle(const std::string& varname, ...@@ -114,7 +115,7 @@ bool RequestPrefetchHandler::Handle(const std::string& varname,
framework::Variable** outvar, framework::Variable** outvar,
const int trainer_id, const int trainer_id,
const std::string& out_var_name) { const std::string& out_var_name) {
VLOG(4) << "RequestPrefetchHandler " << varname; VLOG(40) << "RequestPrefetchHandler " << varname;
auto var_desc = program_->Block(0).FindVar(out_var_name); auto var_desc = program_->Block(0).FindVar(out_var_name);
InitializeVariable(*outvar, var_desc->GetType()); InitializeVariable(*outvar, var_desc->GetType());
...@@ -138,8 +139,8 @@ bool RequestCheckpointHandler::Handle(const std::string& varname, ...@@ -138,8 +139,8 @@ bool RequestCheckpointHandler::Handle(const std::string& varname,
auto* lt_var = scope_->FindVar(LOOKUP_TABLE_PATH)->GetMutable<std::string>(); auto* lt_var = scope_->FindVar(LOOKUP_TABLE_PATH)->GetMutable<std::string>();
lt_var->clear(); lt_var->clear();
lt_var->append(out_var_name); lt_var->append(out_var_name);
VLOG(4) << "RequestCheckpointHandler update var kLookupTablePath to: " VLOG(40) << "RequestCheckpointHandler update var kLookupTablePath to: "
<< out_var_name; << out_var_name;
executor_->RunPreparedContext(checkpoint_prepared_ctx_.get(), scope_); executor_->RunPreparedContext(checkpoint_prepared_ctx_.get(), scope_);
return true; return true;
} }
......
...@@ -39,7 +39,7 @@ void RPCServer::SavePort() const { ...@@ -39,7 +39,7 @@ void RPCServer::SavePort() const {
port_file.open(file_path); port_file.open(file_path);
port_file << selected_port_; port_file << selected_port_;
port_file.close(); port_file.close();
VLOG(4) << "selected port written to " << file_path; VLOG(40) << "selected port written to " << file_path;
} }
void RPCServer::WaitBarrier(const std::string& rpc_name) { void RPCServer::WaitBarrier(const std::string& rpc_name) {
...@@ -49,12 +49,12 @@ void RPCServer::WaitBarrier(const std::string& rpc_name) { ...@@ -49,12 +49,12 @@ void RPCServer::WaitBarrier(const std::string& rpc_name) {
exit_flag_.load()); exit_flag_.load());
}); });
VLOG(3) << "batch_barrier_: " << rpc_name << " " VLOG(30) << "batch_barrier_: " << rpc_name << " "
<< barrier_counter_[rpc_name]; << barrier_counter_[rpc_name];
} }
void RPCServer::IncreaseBatchBarrier(const std::string rpc_name) { void RPCServer::IncreaseBatchBarrier(const std::string rpc_name) {
VLOG(4) << "RPCServer begin IncreaseBatchBarrier " << rpc_name; VLOG(40) << "RPCServer begin IncreaseBatchBarrier " << rpc_name;
int b = 0; int b = 0;
std::unique_lock<std::mutex> lock(mutex_); std::unique_lock<std::mutex> lock(mutex_);
b = ++barrier_counter_[rpc_name]; b = ++barrier_counter_[rpc_name];
...@@ -71,7 +71,7 @@ void RPCServer::Complete() { ...@@ -71,7 +71,7 @@ void RPCServer::Complete() {
client_num_--; client_num_--;
need_reset_all_vars_ = true; need_reset_all_vars_ = true;
VLOG(4) << "decrease client_num to: " << client_num_; VLOG(40) << "decrease client_num to: " << client_num_;
if (cur_cond_.load() == rpc_cond_map_[kRequestGet]) { if (cur_cond_.load() == rpc_cond_map_[kRequestGet]) {
barrier_counter_[kRequestGet]--; barrier_counter_[kRequestGet]--;
} }
...@@ -90,7 +90,7 @@ int RPCServer::GetClientNum() { ...@@ -90,7 +90,7 @@ int RPCServer::GetClientNum() {
} }
void RPCServer::ResetBarrierCounter() { void RPCServer::ResetBarrierCounter() {
VLOG(3) << "RPCServer ResetBarrierCounter "; VLOG(30) << "RPCServer ResetBarrierCounter ";
std::unique_lock<std::mutex> lock(mutex_); std::unique_lock<std::mutex> lock(mutex_);
for (auto& t : barrier_counter_) { for (auto& t : barrier_counter_) {
t.second = 0; t.second = 0;
...@@ -105,12 +105,12 @@ void RPCServer::RegisterRPC(const std::string& rpc_name, ...@@ -105,12 +105,12 @@ void RPCServer::RegisterRPC(const std::string& rpc_name,
static int cond = -1; static int cond = -1;
rpc_cond_map_[rpc_name] = ++cond; rpc_cond_map_[rpc_name] = ++cond;
VLOG(4) << "RegisterRPC rpc_name:" << rpc_name << ", handler:" << handler VLOG(40) << "RegisterRPC rpc_name:" << rpc_name << ", handler:" << handler
<< ", cond:" << rpc_cond_map_[rpc_name]; << ", cond:" << rpc_cond_map_[rpc_name];
} }
void RPCServer::SetCond(const std::string& rpc_name) { void RPCServer::SetCond(const std::string& rpc_name) {
VLOG(3) << "RPCServer SetCond " << rpc_name; VLOG(30) << "RPCServer SetCond " << rpc_name;
{ {
std::unique_lock<std::mutex> lock(mutex_); std::unique_lock<std::mutex> lock(mutex_);
cur_cond_ = rpc_cond_map_[rpc_name]; cur_cond_ = rpc_cond_map_[rpc_name];
...@@ -120,7 +120,7 @@ void RPCServer::SetCond(const std::string& rpc_name) { ...@@ -120,7 +120,7 @@ void RPCServer::SetCond(const std::string& rpc_name) {
} }
void RPCServer::WaitCond(const std::string& rpc_name) { void RPCServer::WaitCond(const std::string& rpc_name) {
VLOG(4) << "RPCServer WaitCond " << rpc_name; VLOG(40) << "RPCServer WaitCond " << rpc_name;
int cond = 0; int cond = 0;
{ {
std::unique_lock<std::mutex> lock(mutex_); std::unique_lock<std::mutex> lock(mutex_);
......
...@@ -50,7 +50,7 @@ bool VariableResponse::ReadRaw(::google::protobuf::io::CodedInputStream* input, ...@@ -50,7 +50,7 @@ bool VariableResponse::ReadRaw(::google::protobuf::io::CodedInputStream* input,
size_to_write = length - total_written; size_to_write = length - total_written;
} }
// This log is useful to see how long a internal block size is of rpc. // This log is useful to see how long a internal block size is of rpc.
VLOG(7) << "copy " << size_to_write << " data to CUDAPlace"; VLOG(70) << "copy " << size_to_write << " data to CUDAPlace";
memory::Copy(boost::get<platform::CUDAPlace>(place), memory::Copy(boost::get<platform::CUDAPlace>(place),
reinterpret_cast<void*>(p), cpu, data, size_to_write, reinterpret_cast<void*>(p), cpu, data, size_to_write,
gpu_dev_ctx.stream()); gpu_dev_ctx.stream());
...@@ -79,7 +79,7 @@ bool VariableResponse::ReadRaw(::google::protobuf::io::CodedInputStream* input, ...@@ -79,7 +79,7 @@ bool VariableResponse::ReadRaw(::google::protobuf::io::CodedInputStream* input,
// TODO(gongwb): can we avoid copy? // TODO(gongwb): can we avoid copy?
platform::CPUPlace cpu; platform::CPUPlace cpu;
// This log is useful to see how long a internal block size is of rpc. // This log is useful to see how long a internal block size is of rpc.
VLOG(7) << "copy " << size_to_write << " data to CPUPlace"; VLOG(70) << "copy " << size_to_write << " data to CPUPlace";
memory::Copy(cpu, reinterpret_cast<void*>(p), cpu, data, size_to_write); memory::Copy(cpu, reinterpret_cast<void*>(p), cpu, data, size_to_write);
p += size_to_write; p += size_to_write;
...@@ -198,8 +198,8 @@ bool VariableResponse::ProcSerializedField( ...@@ -198,8 +198,8 @@ bool VariableResponse::ProcSerializedField(
#endif #endif
} }
VLOG(7) << "ProcSerializedField:" << meta_.varname() VLOG(70) << "ProcSerializedField:" << meta_.varname()
<< ", type:" << meta_.type() << std::endl; << ", type:" << meta_.type() << std::endl;
framework::DDim dims = GetDims(meta_.dims()); framework::DDim dims = GetDims(meta_.dims());
if (meta_.type() == sendrecv::LOD_TENSOR) { if (meta_.type() == sendrecv::LOD_TENSOR) {
PADDLE_ENFORCE(meta_.lod_size() >= 0, "lod info should be got first!"); PADDLE_ENFORCE(meta_.lod_size() >= 0, "lod info should be got first!");
......
...@@ -47,8 +47,8 @@ class FeedOp : public framework::OperatorBase { ...@@ -47,8 +47,8 @@ class FeedOp : public framework::OperatorBase {
auto col = Attr<int>("col"); auto col = Attr<int>("col");
VLOG(3) << "Feed Var " << feed_var_name << "'s " << col << " column to var " VLOG(30) << "Feed Var " << feed_var_name << "'s " << col
<< out_name; << " column to var " << out_name;
auto &feed_list = feed_var->Get<framework::FeedFetchList>(); auto &feed_list = feed_var->Get<framework::FeedFetchList>();
auto &feed_item = feed_list.at(static_cast<size_t>(col)); auto &feed_item = feed_list.at(static_cast<size_t>(col));
......
...@@ -43,7 +43,7 @@ class FetchBarrierOp : public framework::OperatorBase { ...@@ -43,7 +43,7 @@ class FetchBarrierOp : public framework::OperatorBase {
PADDLE_ENFORCE(rpc_client->Wait(), "internal error in RPCClient"); PADDLE_ENFORCE(rpc_client->Wait(), "internal error in RPCClient");
for (auto& ep : eps) { for (auto& ep : eps) {
VLOG(3) << "fetch barrier, ep: " << ep; VLOG(30) << "fetch barrier, ep: " << ep;
rpc_client->AsyncSendFetchBarrier(ep); rpc_client->AsyncSendFetchBarrier(ep);
} }
PADDLE_ENFORCE(rpc_client->Wait(), "internal error in RPCClient"); PADDLE_ENFORCE(rpc_client->Wait(), "internal error in RPCClient");
......
...@@ -57,7 +57,7 @@ class FetchOp : public framework::OperatorBase { ...@@ -57,7 +57,7 @@ class FetchOp : public framework::OperatorBase {
TensorCopySync(src_item, platform::CPUPlace(), &dst_item); TensorCopySync(src_item, platform::CPUPlace(), &dst_item);
dst_item.set_lod(src_item.lod()); dst_item.set_lod(src_item.lod());
VLOG(3) << "Fetch variable " << fetch_var_name << " to " << out_name; VLOG(30) << "Fetch variable " << fetch_var_name << " to " << out_name;
} }
}; };
......
...@@ -64,7 +64,7 @@ class GenNCCLIdOp : public framework::OperatorBase { ...@@ -64,7 +64,7 @@ class GenNCCLIdOp : public framework::OperatorBase {
distributed::RPCClient::GetInstance<RPCCLIENT_T>(0); distributed::RPCClient::GetInstance<RPCCLIENT_T>(0);
for (auto& ep : endpoint_list) { for (auto& ep : endpoint_list) {
VLOG(3) << "sending nccl id to " << ep; VLOG(30) << "sending nccl id to " << ep;
client->AsyncSendVar(ep, dev_ctx, *scope, NCCL_ID_VARNAME); client->AsyncSendVar(ep, dev_ctx, *scope, NCCL_ID_VARNAME);
} }
client->Wait(); client->Wait();
...@@ -72,7 +72,7 @@ class GenNCCLIdOp : public framework::OperatorBase { ...@@ -72,7 +72,7 @@ class GenNCCLIdOp : public framework::OperatorBase {
client->AsyncSendBatchBarrier(ep); client->AsyncSendBatchBarrier(ep);
} }
client->Wait(); client->Wait();
VLOG(3) << "sending completed..."; VLOG(30) << "sending completed...";
} }
void GetIdByServer(framework::Scope* scope, void GetIdByServer(framework::Scope* scope,
...@@ -99,11 +99,11 @@ class GenNCCLIdOp : public framework::OperatorBase { ...@@ -99,11 +99,11 @@ class GenNCCLIdOp : public framework::OperatorBase {
std::bind(&distributed::RPCServer::StartServer, rpc_service.get())); std::bind(&distributed::RPCServer::StartServer, rpc_service.get()));
rpc_service->SetCond(distributed::kRequestSend); rpc_service->SetCond(distributed::kRequestSend);
VLOG(3) << "start getting nccl id from trainer 0..."; VLOG(30) << "start getting nccl id from trainer 0...";
rpc_service->WaitBarrier(distributed::kRequestSend); rpc_service->WaitBarrier(distributed::kRequestSend);
VLOG(3) << "got nccl id and stop server..."; VLOG(30) << "got nccl id and stop server...";
rpc_service->ShutDown(); rpc_service->ShutDown();
VLOG(3) << "rpc server stopped"; VLOG(30) << "rpc server stopped";
server_thread.join(); server_thread.join();
} }
}; };
......
...@@ -36,7 +36,7 @@ namespace operators { ...@@ -36,7 +36,7 @@ namespace operators {
void RunServer(std::shared_ptr<distributed::RPCServer> service) { void RunServer(std::shared_ptr<distributed::RPCServer> service) {
service->StartServer(); service->StartServer();
VLOG(4) << "RunServer thread end"; VLOG(40) << "RunServer thread end";
} }
static void split(const std::string &str, char sep, static void split(const std::string &str, char sep,
std::vector<std::string> *pieces) { std::vector<std::string> *pieces) {
...@@ -66,8 +66,8 @@ static void ParallelExecuteBlocks( ...@@ -66,8 +66,8 @@ static void ParallelExecuteBlocks(
fs.push_back(framework::Async([&executor, &prepared, &scope, idx]() { fs.push_back(framework::Async([&executor, &prepared, &scope, idx]() {
int run_block = idx; // thread local int run_block = idx; // thread local
try { try {
VLOG(3) << "running server block: " << run_block VLOG(30) << "running server block: " << run_block
<< "pointer: " << prepared[run_block].get(); << "pointer: " << prepared[run_block].get();
executor->RunPreparedContext(prepared[run_block].get(), scope); executor->RunPreparedContext(prepared[run_block].get(), scope);
} catch (const std::exception &e) { } catch (const std::exception &e) {
LOG(FATAL) << "run sub program:" << idx << " error " << e.what(); LOG(FATAL) << "run sub program:" << idx << " error " << e.what();
...@@ -108,7 +108,7 @@ void ListenAndServOp::RunSyncLoop( ...@@ -108,7 +108,7 @@ void ListenAndServOp::RunSyncLoop(
framework::Scope *recv_scope, platform::DeviceContext *dev_ctx, framework::Scope *recv_scope, platform::DeviceContext *dev_ctx,
const std::vector<int> &prefetch_block_id_list, const std::vector<int> &prefetch_block_id_list,
const int checkpoint_point_block_id) const { const int checkpoint_point_block_id) const {
VLOG(2) << "RunSyncLoop"; VLOG(20) << "RunSyncLoop";
size_t num_blocks = program->Size(); size_t num_blocks = program->Size();
auto optimize_blocks = auto optimize_blocks =
Attr<std::vector<framework::BlockDesc *>>(kOptimizeBlocks); Attr<std::vector<framework::BlockDesc *>>(kOptimizeBlocks);
...@@ -167,7 +167,7 @@ void ListenAndServOp::RunSyncLoop( ...@@ -167,7 +167,7 @@ void ListenAndServOp::RunSyncLoop(
} }
ParallelExecuteBlocks(parallel_blkids, executor, optimize_prepared, program, ParallelExecuteBlocks(parallel_blkids, executor, optimize_prepared, program,
recv_scope); recv_scope);
VLOG(2) << "run all blocks spent " << GetTimestamp() - ts << "(ms)"; VLOG(20) << "run all blocks spent " << GetTimestamp() - ts << "(ms)";
ResetReceivedVars(recv_scope, dev_ctx, rpc_service_->NeedResetAllVars()); ResetReceivedVars(recv_scope, dev_ctx, rpc_service_->NeedResetAllVars());
...@@ -183,11 +183,11 @@ void ListenAndServOp::ResetReceivedVars(framework::Scope *recv_scope, ...@@ -183,11 +183,11 @@ void ListenAndServOp::ResetReceivedVars(framework::Scope *recv_scope,
for (auto &varname : sparse_vars_) { for (auto &varname : sparse_vars_) {
auto var = recv_scope->FindVar(varname); auto var = recv_scope->FindVar(varname);
if (var == nullptr) { if (var == nullptr) {
VLOG(2) << "can not find var " << varname << " in received scope"; VLOG(20) << "can not find var " << varname << " in received scope";
continue; continue;
} }
if (var->IsType<framework::SelectedRows>()) { if (var->IsType<framework::SelectedRows>()) {
VLOG(3) << "reset sparse var: " << varname; VLOG(30) << "reset sparse var: " << varname;
var->GetMutable<framework::SelectedRows>()->mutable_rows()->clear(); var->GetMutable<framework::SelectedRows>()->mutable_rows()->clear();
} else { } else {
PADDLE_THROW("The type of sparse var should be SelectedRows"); PADDLE_THROW("The type of sparse var should be SelectedRows");
...@@ -197,7 +197,7 @@ void ListenAndServOp::ResetReceivedVars(framework::Scope *recv_scope, ...@@ -197,7 +197,7 @@ void ListenAndServOp::ResetReceivedVars(framework::Scope *recv_scope,
for (auto &varname : dense_vars_) { for (auto &varname : dense_vars_) {
auto var = recv_scope->FindVar(varname); auto var = recv_scope->FindVar(varname);
if (var == nullptr) { if (var == nullptr) {
VLOG(2) << "can not find var " << varname << " in received scope"; VLOG(20) << "can not find var " << varname << " in received scope";
continue; continue;
} }
if (var->IsType<framework::LoDTensor>()) { if (var->IsType<framework::LoDTensor>()) {
...@@ -216,7 +216,7 @@ void ListenAndServOp::ResetReceivedVars(framework::Scope *recv_scope, ...@@ -216,7 +216,7 @@ void ListenAndServOp::ResetReceivedVars(framework::Scope *recv_scope,
void ListenAndServOp::RunAsyncLoop(framework::Executor *executor, void ListenAndServOp::RunAsyncLoop(framework::Executor *executor,
framework::ProgramDesc *program, framework::ProgramDesc *program,
framework::Scope *recv_scope) const { framework::Scope *recv_scope) const {
VLOG(2) << "RunAsyncLoop"; VLOG(20) << "RunAsyncLoop";
auto grad_to_block_id_str = auto grad_to_block_id_str =
Attr<std::vector<std::string>>("grad_to_block_id"); Attr<std::vector<std::string>>("grad_to_block_id");
DoubleFindMap<std::string, int32_t> grad_to_block_id; DoubleFindMap<std::string, int32_t> grad_to_block_id;
...@@ -225,7 +225,7 @@ void ListenAndServOp::RunAsyncLoop(framework::Executor *executor, ...@@ -225,7 +225,7 @@ void ListenAndServOp::RunAsyncLoop(framework::Executor *executor,
const std::string &grad_and_id) { const std::string &grad_and_id) {
std::vector<std::string> pieces; std::vector<std::string> pieces;
split(grad_and_id, ':', &pieces); split(grad_and_id, ':', &pieces);
VLOG(3) << "after split, key = " << pieces[0] << ", id=" << pieces[1]; VLOG(30) << "after split, key = " << pieces[0] << ", id=" << pieces[1];
PADDLE_ENFORCE_EQ(pieces.size(), 2); PADDLE_ENFORCE_EQ(pieces.size(), 2);
PADDLE_ENFORCE_EQ(out_map->count(pieces[0]), 0); PADDLE_ENFORCE_EQ(out_map->count(pieces[0]), 0);
...@@ -270,7 +270,7 @@ void ListenAndServOp::RunAsyncLoop(framework::Executor *executor, ...@@ -270,7 +270,7 @@ void ListenAndServOp::RunAsyncLoop(framework::Executor *executor,
while (true) { while (true) {
if (rpc_service_->IsExit()) { if (rpc_service_->IsExit()) {
VLOG(4) << "get exit!rpc_processor break!"; VLOG(40) << "get exit!rpc_processor break!";
break; break;
} }
...@@ -332,9 +332,9 @@ void ListenAndServOp::RunImpl(const framework::Scope &scope, ...@@ -332,9 +332,9 @@ void ListenAndServOp::RunImpl(const framework::Scope &scope,
std::string endpoint = Attr<std::string>("endpoint"); std::string endpoint = Attr<std::string>("endpoint");
int checkpoint_block_id = Attr<int>(kCheckpointBlockId); int checkpoint_block_id = Attr<int>(kCheckpointBlockId);
VLOG(4) << "sync_mode:" << sync_mode << ", fan_in:" << fan_in VLOG(40) << "sync_mode:" << sync_mode << ", fan_in:" << fan_in
<< ", end_point:" << endpoint << ", end_point:" << endpoint
<< ", checkpoint_block_id: " << checkpoint_block_id; << ", checkpoint_block_id: " << checkpoint_block_id;
rpc_service_.reset(new RPCSERVER_T(endpoint, fan_in)); rpc_service_.reset(new RPCSERVER_T(endpoint, fan_in));
...@@ -383,8 +383,8 @@ void ListenAndServOp::RunImpl(const framework::Scope &scope, ...@@ -383,8 +383,8 @@ void ListenAndServOp::RunImpl(const framework::Scope &scope,
prefetch_var_name_to_block_id_str) { prefetch_var_name_to_block_id_str) {
std::vector<std::string> pieces; std::vector<std::string> pieces;
split(prefetch_var_name_and_id, ':', &pieces); split(prefetch_var_name_and_id, ':', &pieces);
VLOG(3) << "after split, prefetch_var = " << pieces[0] VLOG(30) << "after split, prefetch_var = " << pieces[0]
<< ", id=" << pieces[1]; << ", id=" << pieces[1];
PADDLE_ENFORCE_EQ(pieces.size(), 2); PADDLE_ENFORCE_EQ(pieces.size(), 2);
int block_id = std::stoi(pieces[1]); int block_id = std::stoi(pieces[1]);
...@@ -415,7 +415,7 @@ void ListenAndServOp::RunImpl(const framework::Scope &scope, ...@@ -415,7 +415,7 @@ void ListenAndServOp::RunImpl(const framework::Scope &scope,
// start the server listening after all member initialized. // start the server listening after all member initialized.
server_thread_.reset(new std::thread(RunServer, rpc_service_)); server_thread_.reset(new std::thread(RunServer, rpc_service_));
VLOG(3) << "wait server thread to become ready..."; VLOG(30) << "wait server thread to become ready...";
rpc_service_->WaitServerReady(); rpc_service_->WaitServerReady();
// register SIGINT(from ctrl+C) and SIGTERM(from kill) signal handlers // register SIGINT(from ctrl+C) and SIGTERM(from kill) signal handlers
......
...@@ -30,9 +30,9 @@ class LoDRankTableOp : public framework::OperatorBase { ...@@ -30,9 +30,9 @@ class LoDRankTableOp : public framework::OperatorBase {
auto x = scope.FindVar(Input("X"))->Get<framework::LoDTensor>(); auto x = scope.FindVar(Input("X"))->Get<framework::LoDTensor>();
auto *out = auto *out =
scope.FindVar(Output("Out"))->GetMutable<framework::LoDRankTable>(); scope.FindVar(Output("Out"))->GetMutable<framework::LoDRankTable>();
VLOG(10) << "Level = " << static_cast<size_t>(Attr<int>("level")); VLOG(100) << "Level = " << static_cast<size_t>(Attr<int>("level"));
out->Reset(x.lod(), static_cast<size_t>(Attr<int>("level"))); out->Reset(x.lod(), static_cast<size_t>(Attr<int>("level")));
VLOG(10) << Input("X") << "'s lod information is " << *out; VLOG(100) << Input("X") << "'s lod information is " << *out;
} }
}; };
......
...@@ -134,13 +134,13 @@ class LookupTableOpGradVarTypeInference : public framework::VarTypeInference { ...@@ -134,13 +134,13 @@ class LookupTableOpGradVarTypeInference : public framework::VarTypeInference {
auto attr = op_desc.GetAttr("is_sparse"); auto attr = op_desc.GetAttr("is_sparse");
bool is_sparse = boost::get<bool>(attr); bool is_sparse = boost::get<bool>(attr);
if (is_sparse) { if (is_sparse) {
VLOG(3) << "lookup_table_grad op " << framework::GradVarName("W") VLOG(30) << "lookup_table_grad op " << framework::GradVarName("W")
<< " is set to SelectedRows"; << " is set to SelectedRows";
block->Var(out_var_name) block->Var(out_var_name)
->SetType(framework::proto::VarType::SELECTED_ROWS); ->SetType(framework::proto::VarType::SELECTED_ROWS);
} else { } else {
VLOG(3) << "lookup_table_grad op " << framework::GradVarName("W") VLOG(30) << "lookup_table_grad op " << framework::GradVarName("W")
<< " is set to LoDTensor"; << " is set to LoDTensor";
block->Var(out_var_name)->SetType(framework::proto::VarType::LOD_TENSOR); block->Var(out_var_name)->SetType(framework::proto::VarType::LOD_TENSOR);
} }
block->Var(out_var_name)->SetDataType(block->Var("W")->GetDataType()); block->Var(out_var_name)->SetDataType(block->Var("W")->GetDataType());
......
...@@ -96,8 +96,8 @@ void TestAndBench(const int n, std::function<void(const int, const T*, T*)> tgt, ...@@ -96,8 +96,8 @@ void TestAndBench(const int n, std::function<void(const int, const T*, T*)> tgt,
} }
auto et = GetCurrentUS(); auto et = GetCurrentUS();
VLOG(3) << "Vec size " << n << ": refer takes: " << (et - mt) / repeat VLOG(30) << "Vec size " << n << ": refer takes: " << (et - mt) / repeat
<< " us, tgt takes: " << (mt - st) / repeat; << " us, tgt takes: " << (mt - st) / repeat;
for (int i = 0; i < n; ++i) { for (int i = 0; i < n; ++i) {
EXPECT_NEAR(ytgt_data[i], yref_data[i], 1e-3); EXPECT_NEAR(ytgt_data[i], yref_data[i], 1e-3);
} }
......
...@@ -87,7 +87,7 @@ TEST(JitKernel, vrelu) { ...@@ -87,7 +87,7 @@ TEST(JitKernel, vrelu) {
vrelu_intri8(d, x_data, zref_data); vrelu_intri8(d, x_data, zref_data);
} }
auto si1 = GetCurrentUS(); auto si1 = GetCurrentUS();
VLOG(3) << "Vec size 8 intr takes: " << (si1 - si0) / repeat; VLOG(30) << "Vec size 8 intr takes: " << (si1 - si0) / repeat;
} }
#endif #endif
auto ttgts = GetCurrentUS(); auto ttgts = GetCurrentUS();
...@@ -95,8 +95,9 @@ TEST(JitKernel, vrelu) { ...@@ -95,8 +95,9 @@ TEST(JitKernel, vrelu) {
ker->Compute(x_data, ztgt_data); ker->Compute(x_data, ztgt_data);
} }
auto ttgte = GetCurrentUS(); auto ttgte = GetCurrentUS();
VLOG(3) << "Vec size " << d << ": refer takes: " << (trefe - trefs) / repeat VLOG(30) << "Vec size " << d
<< " us, tgt takes: " << (ttgte - ttgts) / repeat; << ": refer takes: " << (trefe - trefs) / repeat
<< " us, tgt takes: " << (ttgte - ttgts) / repeat;
for (int i = 0; i < d; ++i) { for (int i = 0; i < d; ++i) {
EXPECT_NEAR(ztgt_data[i], zref_data[i], 1e-3); EXPECT_NEAR(ztgt_data[i], zref_data[i], 1e-3);
} }
...@@ -132,8 +133,9 @@ TEST(JitKernel, vaddbias) { ...@@ -132,8 +133,9 @@ TEST(JitKernel, vaddbias) {
} }
auto ttgte = GetCurrentUS(); auto ttgte = GetCurrentUS();
VLOG(3) << "Vec size " << d << ": refer takes: " << (trefe - trefs) / repeat VLOG(30) << "Vec size " << d
<< " us, tgt takes: " << (ttgte - ttgts) / repeat; << ": refer takes: " << (trefe - trefs) / repeat
<< " us, tgt takes: " << (ttgte - ttgts) / repeat;
for (int i = 0; i < d; ++i) { for (int i = 0; i < d; ++i) {
EXPECT_NEAR(ztgt_data[i], zref_data[i], 1e-3); EXPECT_NEAR(ztgt_data[i], zref_data[i], 1e-3);
} }
...@@ -183,13 +185,14 @@ TEST(JitKernel, vexp) { ...@@ -183,13 +185,14 @@ TEST(JitKernel, vexp) {
} }
auto ttgte = GetCurrentUS(); auto ttgte = GetCurrentUS();
VLOG(3) << "Vec size " << d << ": refer takes: " << (trefe - trefs) / repeat VLOG(30) << "Vec size " << d
<< ": refer takes: " << (trefe - trefs) / repeat
#ifdef PADDLE_WITH_MKLML #ifdef PADDLE_WITH_MKLML
<< " us, mkl takes: " << (tmkle - tmkls) / repeat << " us, " << " us, mkl takes: " << (tmkle - tmkls) / repeat << " us, "
#else #else
<< " us, " << " us, "
#endif #endif
<< "tgt takes: " << (ttgte - ttgts) / repeat; << "tgt takes: " << (ttgte - ttgts) / repeat;
for (int i = 0; i < d; ++i) { for (int i = 0; i < d; ++i) {
EXPECT_NEAR(ztgt_data[i], zref_data[i], 1e-3); EXPECT_NEAR(ztgt_data[i], zref_data[i], 1e-3);
} }
...@@ -254,9 +257,10 @@ TEST(JitKernel, vsigmoid) { ...@@ -254,9 +257,10 @@ TEST(JitKernel, vsigmoid) {
} }
auto ttgte = GetCurrentUS(); auto ttgte = GetCurrentUS();
VLOG(3) << "Vec size " << d << ": refer takes: " << (trefe - trefs) / repeat VLOG(30) << "Vec size " << d
<< " us, better(jit exp) takes: " << (tmkle - tmkls) / repeat << ": refer takes: " << (trefe - trefs) / repeat
<< " us, tgt takes: " << (ttgte - ttgts) / repeat; << " us, better(jit exp) takes: " << (tmkle - tmkls) / repeat
<< " us, tgt takes: " << (ttgte - ttgts) / repeat;
for (int i = 0; i < d; ++i) { for (int i = 0; i < d; ++i) {
EXPECT_NEAR(ztgt_data[i], zref_data[i], 1e-3); EXPECT_NEAR(ztgt_data[i], zref_data[i], 1e-3);
} }
...@@ -320,9 +324,10 @@ TEST(JitKernel, vtanh) { ...@@ -320,9 +324,10 @@ TEST(JitKernel, vtanh) {
} }
auto ttgte = GetCurrentUS(); auto ttgte = GetCurrentUS();
VLOG(3) << "Vec size " << d << ": refer takes: " << (trefe - trefs) / repeat VLOG(30) << "Vec size " << d
<< " us, better(jit exp) takes: " << (tmkle - tmkls) / repeat << ": refer takes: " << (trefe - trefs) / repeat
<< " us, tgt takes: " << (ttgte - ttgts) / repeat; << " us, better(jit exp) takes: " << (tmkle - tmkls) / repeat
<< " us, tgt takes: " << (ttgte - ttgts) / repeat;
for (int i = 0; i < d; ++i) { for (int i = 0; i < d; ++i) {
EXPECT_NEAR(ztgt_data[i], zref_data[i], 1e-3); EXPECT_NEAR(ztgt_data[i], zref_data[i], 1e-3);
} }
...@@ -440,9 +445,10 @@ TEST(JitKernel, lstm) { ...@@ -440,9 +445,10 @@ TEST(JitKernel, lstm) {
ker->ComputeCtHt(x_data, ct_1_data, ct_tgt_data, ht_tgt_data); ker->ComputeCtHt(x_data, ct_1_data, ct_tgt_data, ht_tgt_data);
} }
auto ttgte = GetCurrentUS(); auto ttgte = GetCurrentUS();
VLOG(3) << "Vec size " << d << ": refer takes: " << (trefe - trefs) / repeat VLOG(30) << "Vec size " << d
<< " us, better(jit) takes: " << (tmkle - tmkls) / repeat << ": refer takes: " << (trefe - trefs) / repeat
<< " us, tgt takes: " << (ttgte - ttgts) / repeat; << " us, better(jit) takes: " << (tmkle - tmkls) / repeat
<< " us, tgt takes: " << (ttgte - ttgts) / repeat;
} }
} }
...@@ -524,8 +530,8 @@ TEST(JitKernel, vscal) { ...@@ -524,8 +530,8 @@ TEST(JitKernel, vscal) {
vscal_inp_intri8(d, a, y_data); vscal_inp_intri8(d, a, y_data);
} }
auto si3 = GetCurrentUS(); auto si3 = GetCurrentUS();
VLOG(3) << "Vec size 8 intr takes: " << (si1 - si0) / repeat VLOG(30) << "Vec size 8 intr takes: " << (si1 - si0) / repeat
<< " us, inplace: " << (si3 - si2) / repeat; << " us, inplace: " << (si3 - si2) / repeat;
} }
#endif #endif
...@@ -539,15 +545,17 @@ TEST(JitKernel, vscal) { ...@@ -539,15 +545,17 @@ TEST(JitKernel, vscal) {
ker->Compute(a, y_data); ker->Compute(a, y_data);
} }
auto ttgte1 = GetCurrentUS(); auto ttgte1 = GetCurrentUS();
VLOG(3) << "Vec size " << d << ": refer takes: " << (trefe - trefs) / repeat VLOG(30) << "Vec size " << d
<< " us, inplace takes: " << (trefe1 - trefs1) / repeat << ": refer takes: " << (trefe - trefs) / repeat
<< " us, inplace takes: " << (trefe1 - trefs1) / repeat
#ifdef PADDLE_WITH_MKLML #ifdef PADDLE_WITH_MKLML
<< " us, mkl inplace takes: " << (tmkle - tmkls) / repeat << " us, " << " us, mkl inplace takes: " << (tmkle - tmkls) / repeat
<< " us, "
#else #else
<< " us, " << " us, "
#endif #endif
<< "tgt takes: " << (ttgte - ttgts) / repeat << "tgt takes: " << (ttgte - ttgts) / repeat
<< "us, tgt inplace takes: " << (ttgte1 - ttgts1) / repeat; << "us, tgt inplace takes: " << (ttgte1 - ttgts1) / repeat;
for (int i = 0; i < d; ++i) { for (int i = 0; i < d; ++i) {
EXPECT_NEAR(ztgt_data[i], zref_data[i], 1e-3); EXPECT_NEAR(ztgt_data[i], zref_data[i], 1e-3);
} }
...@@ -610,7 +618,7 @@ TEST(JitKernel, vmul) { ...@@ -610,7 +618,7 @@ TEST(JitKernel, vmul) {
vmul_intri8(d, x_data, y_data, zref_data); vmul_intri8(d, x_data, y_data, zref_data);
} }
auto si1 = GetCurrentUS(); auto si1 = GetCurrentUS();
VLOG(3) << "Vec size 8 intr takes: " << (si1 - si0) / repeat; VLOG(30) << "Vec size 8 intr takes: " << (si1 - si0) / repeat;
} }
#endif #endif
...@@ -620,13 +628,14 @@ TEST(JitKernel, vmul) { ...@@ -620,13 +628,14 @@ TEST(JitKernel, vmul) {
} }
auto ttgte = GetCurrentUS(); auto ttgte = GetCurrentUS();
VLOG(3) << "Vec size " << d << ": refer takes: " << (trefe - trefs) / repeat VLOG(30) << "Vec size " << d
<< ": refer takes: " << (trefe - trefs) / repeat
#ifdef PADDLE_WITH_MKLML #ifdef PADDLE_WITH_MKLML
<< " us, mkl takes: " << (tmkle - tmkls) / repeat << " us, " << " us, mkl takes: " << (tmkle - tmkls) / repeat << " us, "
#else #else
<< " us, " << " us, "
#endif #endif
<< "tgt takes: " << (ttgte - ttgts) / repeat; << "tgt takes: " << (ttgte - ttgts) / repeat;
for (int i = 0; i < d; ++i) { for (int i = 0; i < d; ++i) {
EXPECT_NEAR(ztgt_data[i], zref_data[i], 1e-3); EXPECT_NEAR(ztgt_data[i], zref_data[i], 1e-3);
} }
...@@ -689,7 +698,7 @@ TEST(JitKernel, vadd) { ...@@ -689,7 +698,7 @@ TEST(JitKernel, vadd) {
vadd_intri8(d, x_data, y_data, zref_data); vadd_intri8(d, x_data, y_data, zref_data);
} }
auto si1 = GetCurrentUS(); auto si1 = GetCurrentUS();
VLOG(3) << "Vec size 8 intr takes: " << (si1 - si0) / repeat; VLOG(30) << "Vec size 8 intr takes: " << (si1 - si0) / repeat;
} }
#endif #endif
...@@ -699,13 +708,14 @@ TEST(JitKernel, vadd) { ...@@ -699,13 +708,14 @@ TEST(JitKernel, vadd) {
} }
auto ttgte = GetCurrentUS(); auto ttgte = GetCurrentUS();
VLOG(3) << "Vec size " << d << ": refer takes: " << (trefe - trefs) / repeat VLOG(30) << "Vec size " << d
<< ": refer takes: " << (trefe - trefs) / repeat
#ifdef PADDLE_WITH_MKLML #ifdef PADDLE_WITH_MKLML
<< " us, mkl takes: " << (tmkle - tmkls) / repeat << " us, " << " us, mkl takes: " << (tmkle - tmkls) / repeat << " us, "
#else #else
<< " us, " << " us, "
#endif #endif
<< "tgt takes: " << (ttgte - ttgts) / repeat; << "tgt takes: " << (ttgte - ttgts) / repeat;
for (int i = 0; i < d; ++i) { for (int i = 0; i < d; ++i) {
EXPECT_NEAR(ztgt_data[i], zref_data[i], 1e-3); EXPECT_NEAR(ztgt_data[i], zref_data[i], 1e-3);
} }
...@@ -760,9 +770,10 @@ TEST(JitKernel, vaddrelu) { ...@@ -760,9 +770,10 @@ TEST(JitKernel, vaddrelu) {
ker->Compute(x_data, y_data, ztgt_data, d); ker->Compute(x_data, y_data, ztgt_data, d);
} }
auto ttgte = GetCurrentUS(); auto ttgte = GetCurrentUS();
VLOG(3) << "Vec size " << d << ": refer takes: " << (trefe - trefs) / repeat VLOG(30) << "Vec size " << d
<< " us, better takes: " << (tmkle - tmkls) / repeat << " us, " << ": refer takes: " << (trefe - trefs) / repeat
<< "tgt takes: " << (ttgte - ttgts) / repeat; << " us, better takes: " << (tmkle - tmkls) / repeat << " us, "
<< "tgt takes: " << (ttgte - ttgts) / repeat;
for (int i = 0; i < d; ++i) { for (int i = 0; i < d; ++i) {
EXPECT_NEAR(ztgt_data[i], zref_data[i], 1e-3); EXPECT_NEAR(ztgt_data[i], zref_data[i], 1e-3);
} }
......
...@@ -270,7 +270,7 @@ struct MergeAdd<platform::CPUDeviceContext, T> { ...@@ -270,7 +270,7 @@ struct MergeAdd<platform::CPUDeviceContext, T> {
const std::vector<const framework::SelectedRows*>& inputs, const std::vector<const framework::SelectedRows*>& inputs,
framework::SelectedRows* output) { framework::SelectedRows* output) {
if (inputs.size() == 0) { if (inputs.size() == 0) {
VLOG(3) << "no input! return"; VLOG(30) << "no input! return";
return; return;
} }
const framework::SelectedRows* has_value_input = nullptr; const framework::SelectedRows* has_value_input = nullptr;
...@@ -281,7 +281,7 @@ struct MergeAdd<platform::CPUDeviceContext, T> { ...@@ -281,7 +281,7 @@ struct MergeAdd<platform::CPUDeviceContext, T> {
} }
} }
if (has_value_input == nullptr) { if (has_value_input == nullptr) {
VLOG(3) << "no input has value! just return" << std::endl; VLOG(30) << "no input has value! just return" << std::endl;
return; return;
} }
auto input_width = has_value_input->value().dims()[1]; auto input_width = has_value_input->value().dims()[1];
......
...@@ -314,7 +314,7 @@ struct MergeAdd<platform::CUDADeviceContext, T> { ...@@ -314,7 +314,7 @@ struct MergeAdd<platform::CUDADeviceContext, T> {
const std::vector<const framework::SelectedRows*>& inputs, const std::vector<const framework::SelectedRows*>& inputs,
framework::SelectedRows* output) { framework::SelectedRows* output) {
if (inputs.size() == 0) { if (inputs.size() == 0) {
VLOG(3) << "no input! return"; VLOG(30) << "no input! return";
return; return;
} }
const framework::SelectedRows* has_value_input = nullptr; const framework::SelectedRows* has_value_input = nullptr;
...@@ -325,7 +325,7 @@ struct MergeAdd<platform::CUDADeviceContext, T> { ...@@ -325,7 +325,7 @@ struct MergeAdd<platform::CUDADeviceContext, T> {
} }
} }
if (has_value_input == nullptr) { if (has_value_input == nullptr) {
VLOG(3) << "no input has value! just return" << std::endl; VLOG(30) << "no input has value! just return" << std::endl;
return; return;
} }
auto input_width = has_value_input->value().dims()[1]; auto input_width = has_value_input->value().dims()[1];
......
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册