未验证 提交 c65c6ae5 编写于 作者: W wangchaochaohu 提交者: GitHub

add flag to control profile level in python API (#22319)

* add python flag to control profile level test=develop
上级 fce37bc5
...@@ -167,7 +167,13 @@ void OperatorBase::Run(const Scope& scope, const platform::Place& place) { ...@@ -167,7 +167,13 @@ void OperatorBase::Run(const Scope& scope, const platform::Place& place) {
} }
{ {
platform::RecordEvent record_event(Type()); // TODO(wangchaochaohu) : refine code to use only one RecordEvent)
// in order to record different op type cost time
// and different op name cost time,we set two event.
platform::RecordEvent op_type_record_event(Type());
auto op_name = platform::OpName(outputs_, Type());
platform::RecordEvent op_name_record_event(
op_name, platform::RecordRole::kUniqueOp);
RunImpl(scope, place); RunImpl(scope, place);
} }
...@@ -950,7 +956,8 @@ void OperatorWithKernel::RunImpl(const Scope& scope, ...@@ -950,7 +956,8 @@ void OperatorWithKernel::RunImpl(const Scope& scope,
std::vector<std::string> transfered_inplace_vars; std::vector<std::string> transfered_inplace_vars;
Scope* transfer_scope = nullptr; Scope* transfer_scope = nullptr;
{ {
platform::RecordEvent record_event("prepare_data_inner_op"); platform::RecordEvent record_event("prepare_data",
platform::RecordRole::kInnerOp);
transfer_scope = PrepareData(scope, *kernel_type_, &transfered_inplace_vars, transfer_scope = PrepareData(scope, *kernel_type_, &transfered_inplace_vars,
runtime_ctx); runtime_ctx);
} }
...@@ -963,7 +970,8 @@ void OperatorWithKernel::RunImpl(const Scope& scope, ...@@ -963,7 +970,8 @@ void OperatorWithKernel::RunImpl(const Scope& scope,
} }
if (!all_kernels_must_compute_runtime_shape_) { if (!all_kernels_must_compute_runtime_shape_) {
platform::RecordEvent record_event("infer_shape_inner_op"); platform::RecordEvent record_event("infer_shape",
platform::RecordRole::kInnerOp);
RuntimeInferShapeContext infer_shape_ctx(*this, *runtime_ctx); RuntimeInferShapeContext infer_shape_ctx(*this, *runtime_ctx);
this->InferShape(&infer_shape_ctx); this->InferShape(&infer_shape_ctx);
} }
...@@ -975,7 +983,8 @@ void OperatorWithKernel::RunImpl(const Scope& scope, ...@@ -975,7 +983,8 @@ void OperatorWithKernel::RunImpl(const Scope& scope,
// TODO(panyx0718): ExecutionContext should only depend on RuntimeContext // TODO(panyx0718): ExecutionContext should only depend on RuntimeContext
// not Scope. Imperative mode only pass inputs and get outputs. // not Scope. Imperative mode only pass inputs and get outputs.
{ {
platform::RecordEvent record_event("compute_inner_op"); platform::RecordEvent record_event("compute",
platform::RecordRole::kInnerOp);
(*kernel_func_)(ExecutionContext(*this, exec_scope, *dev_ctx, *runtime_ctx, (*kernel_func_)(ExecutionContext(*this, exec_scope, *dev_ctx, *runtime_ctx,
kernel_configs)); kernel_configs));
} }
......
...@@ -42,6 +42,7 @@ static int64_t profiler_lister_id = 0; ...@@ -42,6 +42,7 @@ static int64_t profiler_lister_id = 0;
static bool should_send_profile_state = false; static bool should_send_profile_state = false;
std::mutex profiler_mu; std::mutex profiler_mu;
static TracerOption g_tracer_option = TracerOption::kDefault;
// The profiler state, the initial value is ProfilerState::kDisabled // The profiler state, the initial value is ProfilerState::kDisabled
static ProfilerState g_state = ProfilerState::kDisabled; static ProfilerState g_state = ProfilerState::kDisabled;
// The thread local event list only can be accessed by the specific thread // The thread local event list only can be accessed by the specific thread
...@@ -137,11 +138,15 @@ void PopEvent(const std::string &name) { ...@@ -137,11 +138,15 @@ void PopEvent(const std::string &name) {
GetEventList().Record(EventType::kPopRange, name, g_thread_id); GetEventList().Record(EventType::kPopRange, name, g_thread_id);
} }
RecordEvent::RecordEvent(const std::string &name) RecordEvent::RecordEvent(const std::string &name, const RecordRole role)
: is_enabled_(false), start_ns_(PosixInNsec()) { : is_enabled_(false), start_ns_(PosixInNsec()), role_(role) {
if (g_state == ProfilerState::kDisabled || name.empty()) return; if (g_state == ProfilerState::kDisabled || name.empty()) return;
if ((g_tracer_option == TracerOption::kOpDetail &&
role_ != RecordRole::kInnerOp) ||
(g_tracer_option == TracerOption::kDefault &&
role != RecordRole::kOrdinary))
return;
// lock is not needed, the code below is thread-safe // lock is not needed, the code below is thread-safe
is_enabled_ = true; is_enabled_ = true;
Event *e = PushEvent(name); Event *e = PushEvent(name);
// Maybe need the same push/pop behavior. // Maybe need the same push/pop behavior.
...@@ -363,30 +368,21 @@ void PrintProfiler(const std::vector<std::vector<EventItem>> &events_table, ...@@ -363,30 +368,21 @@ void PrintProfiler(const std::vector<std::vector<EventItem>> &events_table,
<< "Max." << std::setw(data_width) << "Ave." << "Max." << std::setw(data_width) << "Ave."
<< std::setw(data_width) << "Ratio." << std::endl; << std::setw(data_width) << "Ratio." << std::endl;
} }
if (print_depth >= 100) return;
int print_depth_next = print_depth; if (events_table.size() <= 0) return;
for (size_t i = 0; i < events_table.size(); ++i) { for (size_t i = 0; i < events_table.size(); ++i) {
for (size_t j = 0; j < events_table[i].size(); ++j) { for (size_t j = 0; j < events_table[i].size(); ++j) {
auto event_item = events_table[i][j]; auto event_item = events_table[i][j];
std::vector<std::vector<EventItem>> child_table; std::vector<std::vector<EventItem>> child_table;
std::vector<EventItem> table; std::vector<EventItem> table;
bool do_next = false;
std::string op_end_str = "inner_op";
for (auto it = child_map.begin(); it != child_map.end(); it++) { for (auto it = child_map.begin(); it != child_map.end(); it++) {
if (it->first == event_item.name) { if (it->first == event_item.name) {
table.push_back(it->second); table.push_back(it->second);
if (!do_next)
do_next = !(it->second.name.rfind(op_end_str) ==
(it->second.name.length() - op_end_str.length()));
} }
} }
child_table.push_back(table); child_table.push_back(table);
if (do_next)
print_depth_next = print_depth + 1;
else
print_depth_next = print_depth + 100;
auto name_len = event_item.name.length(); auto name_len = event_item.name.length();
std::string print_name = event_item.name.substr(remove_len, name_len); std::string print_name = event_item.name.substr(remove_len, name_len);
std::string delimiter; std::string delimiter;
...@@ -394,10 +390,7 @@ void PrintProfiler(const std::vector<std::vector<EventItem>> &events_table, ...@@ -394,10 +390,7 @@ void PrintProfiler(const std::vector<std::vector<EventItem>> &events_table,
delimiter = " " + delimiter; delimiter = " " + delimiter;
} }
print_name = delimiter + print_name; print_name = delimiter + print_name;
size_t pos = 0;
while ((pos = print_name.find(op_end_str)) != std::string::npos) {
print_name.erase(pos, op_end_str.length());
}
std::cout << std::setw(name_width) << print_name << std::setw(data_width) std::cout << std::setw(name_width) << print_name << std::setw(data_width)
<< event_item.calls << std::setw(data_width) << event_item.calls << std::setw(data_width)
<< event_item.total_time; << event_item.total_time;
...@@ -416,14 +409,14 @@ void PrintProfiler(const std::vector<std::vector<EventItem>> &events_table, ...@@ -416,14 +409,14 @@ void PrintProfiler(const std::vector<std::vector<EventItem>> &events_table,
<< std::setw(data_width) << event_item.ave_time << std::setw(data_width) << event_item.ave_time
<< std::setw(data_width) << event_item.ratio << std::endl; << std::setw(data_width) << event_item.ratio << std::endl;
PrintProfiler(child_table, child_map, sorted_domain, name_width, PrintProfiler(child_table, child_map, sorted_domain, name_width,
data_width, merge_thread, print_depth_next, 0); data_width, merge_thread, print_depth + 1, 0);
} }
} }
} }
std::function<bool(const EventItem &, const EventItem &)> SetSortedFunc( std::function<bool(const EventItem &, const EventItem &)> SetSortedFunc(
EventSortingKey sorted_by, std::string *domain) { EventSortingKey sorted_by, std::string *domain) {
auto sorted_domain = *domain; std::string sorted_domain;
std::function<bool(const EventItem &, const EventItem &)> sorted_func; std::function<bool(const EventItem &, const EventItem &)> sorted_func;
switch (sorted_by) { switch (sorted_by) {
case EventSortingKey::kCalls: case EventSortingKey::kCalls:
...@@ -471,6 +464,7 @@ std::function<bool(const EventItem &, const EventItem &)> SetSortedFunc( ...@@ -471,6 +464,7 @@ std::function<bool(const EventItem &, const EventItem &)> SetSortedFunc(
default: default:
sorted_domain = "event first end time"; sorted_domain = "event first end time";
} }
*domain = sorted_domain;
return sorted_func; return sorted_func;
} }
...@@ -709,6 +703,45 @@ void ParseMemEvents(const std::vector<std::vector<MemEvent>> &events) { ...@@ -709,6 +703,45 @@ void ParseMemEvents(const std::vector<std::vector<MemEvent>> &events) {
PrintMemProfiler(annotation_report, 55, 18); PrintMemProfiler(annotation_report, 55, 18);
} }
void DealWithShowName() {
std::unordered_map<std::string, int> prefix_name;
std::vector<std::string> op_out_name;
for (auto it = g_all_event_lists.begin(); it != g_all_event_lists.end();
++it) {
for (auto &block : (*it)->event_blocks) {
for (auto &r : block) {
auto event_name = r.name();
size_t start = event_name.find('%', 0);
size_t end = event_name.find('%', start + 1);
std::string prefix_str = event_name.substr(0, start);
while (start != std::string::npos && end != std::string::npos) {
auto search_str = event_name.substr(start, end - start + 1);
auto it = find(op_out_name.begin(), op_out_name.end(), search_str);
std::string replace_str;
bool prefix_find = true;
if (prefix_name.find(prefix_str) == prefix_name.end()) {
prefix_find = false;
prefix_name[prefix_str] = 0;
}
if (it == op_out_name.end()) {
if (prefix_find)
prefix_name[prefix_str] = prefix_name[prefix_str] + 1;
op_out_name.push_back(search_str);
}
replace_str = std::to_string(prefix_name[prefix_str]);
event_name.replace(start, end - start + 1, replace_str);
start = start + 1;
start = event_name.find('%', start);
end = event_name.find('%', start + 1);
prefix_str = event_name.substr(0, start);
}
r.set_name(event_name);
}
}
}
}
void DisableProfiler(EventSortingKey sorted_key, void DisableProfiler(EventSortingKey sorted_key,
const std::string &profile_path) { const std::string &profile_path) {
SynchronizeAllDevice(); SynchronizeAllDevice();
...@@ -718,37 +751,17 @@ void DisableProfiler(EventSortingKey sorted_key, ...@@ -718,37 +751,17 @@ void DisableProfiler(EventSortingKey sorted_key,
if (g_state == ProfilerState::kDisabled) return; if (g_state == ProfilerState::kDisabled) return;
// Mark the profiling stop. // Mark the profiling stop.
Mark("_stop_profiler_"); Mark("_stop_profiler_");
DealWithShowName();
DeviceTracer *tracer = GetDeviceTracer(); DeviceTracer *tracer = GetDeviceTracer();
if (tracer->IsEnabled()) { if (tracer->IsEnabled()) {
tracer->Disable(); tracer->Disable();
tracer->GenProfile(profile_path);
tracer->GenEventKernelCudaElapsedTime(); tracer->GenEventKernelCudaElapsedTime();
tracer->GenProfile(profile_path);
} }
std::vector<std::vector<Event>> all_events = GetAllEvents(); std::vector<std::vector<Event>> all_events = GetAllEvents();
std::vector<std::string> op_name;
for (size_t i = 0; i < (all_events).size(); i++) {
for (size_t j = 0; j < (all_events)[i].size(); j++) {
std::string event_name = (all_events)[i][j].name();
size_t start = event_name.find('%', 0);
size_t end = event_name.rfind('%', event_name.length() - 1);
if (start == std::string::npos || end == std::string::npos) continue;
std::string search_str = event_name.substr(start, end - start + 1);
auto it = find(op_name.begin(), op_name.end(), search_str);
std::string replace_str;
if (it != op_name.end()) {
replace_str = std::to_string(std::distance(op_name.begin(), it));
} else {
replace_str = std::to_string(op_name.size());
op_name.push_back(search_str);
}
event_name.replace(start, end - start + 1, replace_str);
(all_events)[i][j].set_name(event_name);
}
}
ParseEvents(all_events, true, sorted_key); ParseEvents(all_events, true, sorted_key);
ParseEvents(all_events, false, sorted_key); ParseEvents(all_events, false, sorted_key);
if (VLOG_IS_ON(5)) { if (VLOG_IS_ON(5)) {
...@@ -773,5 +786,30 @@ void SetProfileListener() { ...@@ -773,5 +786,30 @@ void SetProfileListener() {
} }
int64_t ListenerId() { return profiler_lister_id; } int64_t ListenerId() { return profiler_lister_id; }
std::string OpName(const framework::VariableNameMap &name_map,
const std::string &type_name) {
if (platform::GetTracerOption() != platform::TracerOption::kAllOpDetail)
return "";
std::string ret = type_name + "%";
for (auto it = name_map.begin(); it != name_map.end(); it++) {
auto name_outputs = it->second;
if (!name_outputs.empty() &&
type_name.length() < name_outputs[0].length()) {
ret = ret + name_outputs[0];
break;
}
}
ret = ret + "%";
return ret;
}
void SetTracerOption(TracerOption option) {
std::lock_guard<std::mutex> l(profiler_mu);
g_tracer_option = option;
}
platform::TracerOption GetTracerOption() { return g_tracer_option; }
} // namespace platform } // namespace platform
} // namespace paddle } // namespace paddle
...@@ -23,6 +23,7 @@ limitations under the License. */ ...@@ -23,6 +23,7 @@ limitations under the License. */
#include <unordered_set> #include <unordered_set>
#include <utility> #include <utility>
#include <vector> #include <vector>
#include "paddle/fluid/framework/type_defs.h"
#include "paddle/fluid/platform/enforce.h" #include "paddle/fluid/platform/enforce.h"
#include "paddle/fluid/platform/event.h" #include "paddle/fluid/platform/event.h"
#include "paddle/fluid/platform/place.h" #include "paddle/fluid/platform/place.h"
...@@ -39,6 +40,19 @@ enum class ProfilerState { ...@@ -39,6 +40,19 @@ enum class ProfilerState {
kAll, // Profile both CPU and GPU. (Currently experimental). kAll, // Profile both CPU and GPU. (Currently experimental).
}; };
enum class RecordRole {
kOrdinary, // only record op time with op type key
kInnerOp, // record op detail time with op type key
kUniqueOp, // record op detail time with op unique name key
};
// it is the flag to control to print the profiling result
enum class TracerOption {
kDefault, // print the different op type profiling result
kOpDetail, // print the detail profiling result of different op type
kAllOpDetail, // print the detail profiling result of different op name
};
void Mark(const std::string& name); void Mark(const std::string& name);
void PushMemEvent(uint64_t start_ns, uint64_t end_ns, size_t bytes, void PushMemEvent(uint64_t start_ns, uint64_t end_ns, size_t bytes,
...@@ -79,7 +93,8 @@ Event* PushEvent(const std::string& name); ...@@ -79,7 +93,8 @@ Event* PushEvent(const std::string& name);
void PopEvent(const std::string& name); void PopEvent(const std::string& name);
struct RecordEvent { struct RecordEvent {
explicit RecordEvent(const std::string& name); RecordEvent(const std::string& name,
const RecordRole r_type = RecordRole::kOrdinary);
~RecordEvent(); ~RecordEvent();
...@@ -90,6 +105,7 @@ struct RecordEvent { ...@@ -90,6 +105,7 @@ struct RecordEvent {
// Need to distinguish name by op type, block_id, program_id and perhaps // Need to distinguish name by op type, block_id, program_id and perhaps
// different kernel invocations within an op. // different kernel invocations within an op.
std::string full_name_; std::string full_name_;
RecordRole role_{RecordRole::kOrdinary};
}; };
class RecordRPCEvent { class RecordRPCEvent {
...@@ -181,6 +197,10 @@ bool ShouldSendProfileState(); ...@@ -181,6 +197,10 @@ bool ShouldSendProfileState();
void SetProfileListener(); void SetProfileListener();
int64_t ListenerId(); int64_t ListenerId();
std::string OpName(const framework::VariableNameMap& name_map,
const std::string& type_name);
void SetTracerOption(TracerOption option);
platform::TracerOption GetTracerOption();
#ifdef PADDLE_WITH_CUDA #ifdef PADDLE_WITH_CUDA
void DummyKernelAndEvent(); void DummyKernelAndEvent();
#endif #endif
......
...@@ -1555,6 +1555,12 @@ All parameter, weight, gradient are variables in Paddle. ...@@ -1555,6 +1555,12 @@ All parameter, weight, gradient are variables in Paddle.
#endif #endif
#endif #endif
py::enum_<platform::TracerOption>(m, "TracerOption", py::arithmetic())
.value("kDefault", platform::TracerOption::kDefault)
.value("kOpDetail", platform::TracerOption::kOpDetail)
.value("kAllOpDetail", platform::TracerOption::kAllOpDetail)
.export_values();
py::enum_<platform::ProfilerState>(m, "ProfilerState", py::arithmetic()) py::enum_<platform::ProfilerState>(m, "ProfilerState", py::arithmetic())
.value("kDisabled", platform::ProfilerState::kDisabled) .value("kDisabled", platform::ProfilerState::kDisabled)
.value("kCPU", platform::ProfilerState::kCPU) .value("kCPU", platform::ProfilerState::kCPU)
...@@ -1571,6 +1577,7 @@ All parameter, weight, gradient are variables in Paddle. ...@@ -1571,6 +1577,7 @@ All parameter, weight, gradient are variables in Paddle.
.value("kAve", platform::EventSortingKey::kAve) .value("kAve", platform::EventSortingKey::kAve)
.export_values(); .export_values();
m.def("set_tracer_option", platform::SetTracerOption);
m.def("enable_profiler", platform::EnableProfiler); m.def("enable_profiler", platform::EnableProfiler);
m.def("disable_profiler", platform::DisableProfiler); m.def("disable_profiler", platform::DisableProfiler);
m.def("is_profiler_enabled", platform::IsProfileEnabled); m.def("is_profiler_enabled", platform::IsProfileEnabled);
......
...@@ -126,7 +126,7 @@ def reset_profiler(): ...@@ -126,7 +126,7 @@ def reset_profiler():
core.reset_profiler() core.reset_profiler()
def start_profiler(state): def start_profiler(state, tracer_option='Default'):
""" """
Enable the profiler. Uers can use `fluid.profiler.start_profiler` and Enable the profiler. Uers can use `fluid.profiler.start_profiler` and
`fluid.profiler.stop_profiler` to profile, which is equal to the usage `fluid.profiler.stop_profiler` to profile, which is equal to the usage
...@@ -137,9 +137,15 @@ def start_profiler(state): ...@@ -137,9 +137,15 @@ def start_profiler(state):
or 'All'. 'CPU' means only profiling CPU; 'GPU' means profiling or 'All'. 'CPU' means only profiling CPU; 'GPU' means profiling
both CPU and GPU; 'All' means profiling both CPU and GPU, and both CPU and GPU; 'All' means profiling both CPU and GPU, and
generates timeline as well. generates timeline as well.
tracer_option (str) : tracer_option can be one of ['Default', 'OpDetail', 'AllOpDetail'], it
can control the profile level and print the different level profile result. Default option print
the different Op type profiling result and the OpDetail option print the detail profiling
result of different op types such as compute and data transform, AllOpDetail option
print the detail profiling result of different op name same as OpDetail.
Raises: Raises:
ValueError: If `state` is not in ['CPU', 'GPU', 'All']. ValueError: If `state` is not in ['CPU', 'GPU', 'All'].
ValueError: If `tracer_option` is not in ['Default', 'OpDetail', 'AllOpDetail']
Examples: Examples:
...@@ -154,6 +160,13 @@ def start_profiler(state): ...@@ -154,6 +160,13 @@ def start_profiler(state):
profiler.reset_profiler() profiler.reset_profiler()
# except each iteration # except each iteration
profiler.stop_profiler('total', '/tmp/profile') profiler.stop_profiler('total', '/tmp/profile')
profiler.start_profiler('GPU', "OpDetail")
for iter in range(10):
if iter == 2:
profiler.reset_profiler()
# except each iteration
profiler.stop_profiler('total', '/tmp/profile')
""" """
if core.is_profiler_enabled(): if core.is_profiler_enabled():
return return
...@@ -165,6 +178,18 @@ def start_profiler(state): ...@@ -165,6 +178,18 @@ def start_profiler(state):
prof_state = core.ProfilerState.kCPU prof_state = core.ProfilerState.kCPU
else: else:
prof_state = core.ProfilerState.kAll prof_state = core.ProfilerState.kAll
if tracer_option not in ['Default', 'OpDetail', 'AllOpDetail']:
raise ValueError(
"tracer option must be 'Default', 'OpDetail', 'AllOpDetail'.")
if tracer_option == "Default":
prof_tracer_option = core.TracerOption.kDefault
elif tracer_option == "OpDetail":
prof_tracer_option = core.TracerOption.kOpDetail
else:
prof_tracer_option = core.TracerOption.kAllOpDetail
core.set_tracer_option(prof_tracer_option)
core.enable_profiler(prof_state) core.enable_profiler(prof_state)
...@@ -184,8 +209,8 @@ def stop_profiler(sorted_key=None, profile_path='/tmp/profile'): ...@@ -184,8 +209,8 @@ def stop_profiler(sorted_key=None, profile_path='/tmp/profile'):
The `max` means sorting by the maximum execution time. The `max` means sorting by the maximum execution time.
The `min` means sorting by the minimum execution time. The `min` means sorting by the minimum execution time.
The `ave` means sorting by the average execution time. The `ave` means sorting by the average execution time.
profile_path (str, optional) : If state == 'All', it will generate timeline,
and write it into `profile_path`. The default profile_path is `/tmp/profile`. and write it into `profile_path`. The default profile_path is `/tmp/profile`.
profile_path (str, optional) : If state == 'All', it will generate timeline,
Raises: Raises:
ValueError: If `sorted_key` is not in ValueError: If `sorted_key` is not in
...@@ -225,7 +250,10 @@ def stop_profiler(sorted_key=None, profile_path='/tmp/profile'): ...@@ -225,7 +250,10 @@ def stop_profiler(sorted_key=None, profile_path='/tmp/profile'):
@signature_safe_contextmanager @signature_safe_contextmanager
def profiler(state, sorted_key=None, profile_path='/tmp/profile'): def profiler(state,
sorted_key=None,
profile_path='/tmp/profile',
tracer_option='Default'):
""" """
The profiler interface. Different from `fluid.profiler.cuda_profiler`, The profiler interface. Different from `fluid.profiler.cuda_profiler`,
this profiler can be used to profile both CPU and GPU program. this profiler can be used to profile both CPU and GPU program.
...@@ -246,6 +274,11 @@ def profiler(state, sorted_key=None, profile_path='/tmp/profile'): ...@@ -246,6 +274,11 @@ def profiler(state, sorted_key=None, profile_path='/tmp/profile'):
The `ave` means sorting by the average execution time. The `ave` means sorting by the average execution time.
profile_path (str, optional) : If state == 'All', it will generate timeline, profile_path (str, optional) : If state == 'All', it will generate timeline,
and write it into `profile_path`. The default profile_path is `/tmp/profile`. and write it into `profile_path`. The default profile_path is `/tmp/profile`.
tracer_option (str) : tracer_option can be one of ['Default', 'OpDetail', 'AllOpDetail'], it
can control the profile level and print the different level profile result. Default option print
the different Op type profiling result and the OpDetail option print the detail profiling
result of different op types such as compute and data transform, AllOpDetail option
print the detail profiling result of different op name same as OpDetail.
Raises: Raises:
ValueError: If `state` is not in ['CPU', 'GPU', 'All']. If `sorted_key` is ValueError: If `state` is not in ['CPU', 'GPU', 'All']. If `sorted_key` is
...@@ -268,7 +301,7 @@ def profiler(state, sorted_key=None, profile_path='/tmp/profile'): ...@@ -268,7 +301,7 @@ def profiler(state, sorted_key=None, profile_path='/tmp/profile'):
exe = fluid.Executor(place) exe = fluid.Executor(place)
exe.run(fluid.default_startup_program()) exe.run(fluid.default_startup_program())
with profiler.profiler('CPU', 'total', '/tmp/profile') as prof: with profiler.profiler('CPU', 'total', '/tmp/profile', 'Default') as prof:
for i in range(epoc): for i in range(epoc):
input = np.random.random(dshape).astype('float32') input = np.random.random(dshape).astype('float32')
exe.run(fluid.default_main_program(), feed={'data': input}) exe.run(fluid.default_main_program(), feed={'data': input})
...@@ -311,6 +344,6 @@ def profiler(state, sorted_key=None, profile_path='/tmp/profile'): ...@@ -311,6 +344,6 @@ def profiler(state, sorted_key=None, profile_path='/tmp/profile'):
thread0::conv2d 8 7.93456 0.291385 5.63342 0.99182 0.795243 thread0::conv2d 8 7.93456 0.291385 5.63342 0.99182 0.795243
thread0::elementwise_add 8 1.96555 0.191884 0.518004 0.245693 0.196998 thread0::elementwise_add 8 1.96555 0.191884 0.518004 0.245693 0.196998
""" """
start_profiler(state) start_profiler(state, tracer_option)
yield yield
stop_profiler(sorted_key, profile_path) stop_profiler(sorted_key, profile_path)
...@@ -22,6 +22,7 @@ import paddle.fluid as fluid ...@@ -22,6 +22,7 @@ import paddle.fluid as fluid
import paddle.fluid.profiler as profiler import paddle.fluid.profiler as profiler
import paddle.fluid.layers as layers import paddle.fluid.layers as layers
import paddle.fluid.core as core import paddle.fluid.core as core
from paddle.fluid import compiler, Program, program_guard
import paddle.fluid.proto.profiler.profiler_pb2 as profiler_pb2 import paddle.fluid.proto.profiler.profiler_pb2 as profiler_pb2
...@@ -30,7 +31,11 @@ class TestProfiler(unittest.TestCase): ...@@ -30,7 +31,11 @@ class TestProfiler(unittest.TestCase):
def setUpClass(cls): def setUpClass(cls):
os.environ['CPU_NUM'] = str(4) os.environ['CPU_NUM'] = str(4)
def net_profiler(self, state, use_parallel_executor=False): def net_profiler(self,
state,
option,
iter_range=None,
use_parallel_executor=False):
profile_path = os.path.join(tempfile.gettempdir(), "profile") profile_path = os.path.join(tempfile.gettempdir(), "profile")
open(profile_path, "w").write("") open(profile_path, "w").write("")
startup_program = fluid.Program() startup_program = fluid.Program()
...@@ -75,7 +80,7 @@ class TestProfiler(unittest.TestCase): ...@@ -75,7 +80,7 @@ class TestProfiler(unittest.TestCase):
main_program=main_program) main_program=main_program)
pass_acc_calculator = fluid.average.WeightedAverage() pass_acc_calculator = fluid.average.WeightedAverage()
with profiler.profiler(state, 'total', profile_path) as prof: with profiler.profiler(state, 'total', profile_path, option) as prof:
for iter in range(10): for iter in range(10):
if iter == 2: if iter == 2:
profiler.reset_profiler() profiler.reset_profiler()
...@@ -94,36 +99,37 @@ class TestProfiler(unittest.TestCase): ...@@ -94,36 +99,37 @@ class TestProfiler(unittest.TestCase):
pass_acc_calculator.add(value=acc, weight=b_size) pass_acc_calculator.add(value=acc, weight=b_size)
pass_acc = pass_acc_calculator.eval() pass_acc = pass_acc_calculator.eval()
data = open(profile_path, 'rb').read() data = open(profile_path, 'rb').read()
self.assertGreater(len(data), 0) if (len(data) > 0):
profile_pb = profiler_pb2.Profile() profile_pb = profiler_pb2.Profile()
profile_pb.ParseFromString(data) profile_pb.ParseFromString(data)
self.assertGreater(len(profile_pb.events), 0) self.assertGreater(len(profile_pb.events), 0)
for event in profile_pb.events: for event in profile_pb.events:
if event.type == profiler_pb2.Event.GPUKernel: if event.type == profiler_pb2.Event.GPUKernel:
if not event.detail_info and not event.name.startswith("MEM"): if not event.detail_info and not event.name.startswith(
raise Exception( "MEM"):
"Kernel %s missing event. Has this kernel been recorded by RecordEvent?" raise Exception(
% event.name) "Kernel %s missing event. Has this kernel been recorded by RecordEvent?"
elif event.type == profiler_pb2.Event.CPU and ( % event.name)
event.name.startswith("Driver API") or elif event.type == profiler_pb2.Event.CPU and (
event.name.startswith("Runtime API")): event.name.startswith("Driver API") or
print("Warning: unregister", event.name) event.name.startswith("Runtime API")):
print("Warning: unregister", event.name)
def test_cpu_profiler(self): def test_cpu_profiler(self):
self.net_profiler('CPU') self.net_profiler('CPU', "Default")
self.net_profiler('CPU', use_parallel_executor=True) self.net_profiler('CPU', "Default", use_parallel_executor=True)
@unittest.skipIf(not core.is_compiled_with_cuda(), @unittest.skipIf(not core.is_compiled_with_cuda(),
"profiler is enabled only with GPU") "profiler is enabled only with GPU")
def test_cuda_profiler(self): def test_cuda_profiler(self):
self.net_profiler('GPU') self.net_profiler('GPU', "OpDetail")
self.net_profiler('GPU', use_parallel_executor=True) self.net_profiler('GPU', "OpDetail", use_parallel_executor=True)
@unittest.skipIf(not core.is_compiled_with_cuda(), @unittest.skipIf(not core.is_compiled_with_cuda(),
"profiler is enabled only with GPU") "profiler is enabled only with GPU")
def test_all_profiler(self): def test_all_profiler(self):
self.net_profiler('All') self.net_profiler('All', "AllOpDetail")
self.net_profiler('All', use_parallel_executor=True) self.net_profiler('All', "AllOpDetail", use_parallel_executor=True)
if __name__ == '__main__': if __name__ == '__main__':
......
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册