diff --git a/paddle/fluid/framework/operator.cc b/paddle/fluid/framework/operator.cc index 42095c7bdc3e26bae2b80436af2f487a79aedb45..72dec87847b7be52c6b582eb9bf1859df6adcd2e 100644 --- a/paddle/fluid/framework/operator.cc +++ b/paddle/fluid/framework/operator.cc @@ -167,7 +167,7 @@ void OperatorBase::Run(const Scope& scope, const platform::Place& place) { } { - platform::RecordEvent record_event(Type() + "_op"); + platform::RecordEvent record_event(Type()); RunImpl(scope, place); } @@ -950,7 +950,7 @@ void OperatorWithKernel::RunImpl(const Scope& scope, std::vector transfered_inplace_vars; Scope* transfer_scope = nullptr; { - platform::RecordEvent record_event("prepare_data"); + platform::RecordEvent record_event("prepare_data_inner_op"); transfer_scope = PrepareData(scope, *kernel_type_, &transfered_inplace_vars, runtime_ctx); } @@ -963,7 +963,7 @@ void OperatorWithKernel::RunImpl(const Scope& scope, } if (!all_kernels_must_compute_runtime_shape_) { - platform::RecordEvent record_event("infer_shape"); + platform::RecordEvent record_event("infer_shape_inner_op"); RuntimeInferShapeContext infer_shape_ctx(*this, *runtime_ctx); this->InferShape(&infer_shape_ctx); } @@ -975,7 +975,7 @@ void OperatorWithKernel::RunImpl(const Scope& scope, // TODO(panyx0718): ExecutionContext should only depend on RuntimeContext // not Scope. Imperative mode only pass inputs and get outputs. { - platform::RecordEvent record_event("compute"); + platform::RecordEvent record_event("compute_inner_op"); (*kernel_func_)(ExecutionContext(*this, exec_scope, *dev_ctx, *runtime_ctx, kernel_configs)); } diff --git a/paddle/fluid/platform/profiler.cc b/paddle/fluid/platform/profiler.cc index a32b89bc4ce02edf4da0fd90598f0b17808191ee..baa5c2743f9d0796173c7fd0c00fb910ba60d77b 100644 --- a/paddle/fluid/platform/profiler.cc +++ b/paddle/fluid/platform/profiler.cc @@ -372,12 +372,13 @@ void PrintProfiler(const std::vector> &events_table, std::vector> child_table; std::vector table; bool do_next = false; - std::string op_end_str = "_op"; + std::string op_end_str = "inner_op"; for (auto it = child_map.begin(); it != child_map.end(); it++) { if (it->first == event_item.name) { table.push_back(it->second); - do_next = it->second.name.rfind(op_end_str) == - (it->second.name.length() - op_end_str.length()); + if (!do_next) + do_next = !(it->second.name.rfind(op_end_str) == + (it->second.name.length() - op_end_str.length())); } } child_table.push_back(table); @@ -579,6 +580,7 @@ void ParseEvents(const std::vector> &events, std::vector event_items; std::vector main_event_items; std::unordered_map event_idx; + std::multimap sub_child_map; for (size_t j = 0; j < (*analyze_events)[i].size(); j++) { Event analyze_event = (*analyze_events)[i][j]; @@ -599,7 +601,7 @@ void ParseEvents(const std::vector> &events, (cname[fname.length()] == '/' && cname.rfind('/') == fname.length()); if (condition) { - child_map.insert( + sub_child_map.insert( std::pair(fname, event_items[k])); child_index[k] = 1; } @@ -618,9 +620,9 @@ void ParseEvents(const std::vector> &events, item.ave_time = item.total_time / item.calls; item.ratio = item.total_time / total; } - for (auto it = child_map.begin(); it != child_map.end(); it++) { + for (auto it = sub_child_map.begin(); it != sub_child_map.end(); it++) { it->second.ratio = it->second.total_time / total; - it->second.ave_time = it->second.ave_time / it->second.calls; + it->second.ave_time = it->second.total_time / it->second.calls; } // sort @@ -636,6 +638,11 @@ void ParseEvents(const std::vector> &events, << "\', which will be ignored in profiling report."; ++rit; } + + for (auto it = sub_child_map.begin(); it != sub_child_map.end(); it++) { + child_map.insert( + std::pair(it->first, it->second)); + } } // Print report