未验证 提交 7578fcba 编写于 作者: W wangchaochaohu 提交者: GitHub

Profile code refine (#22800)

* add profiler_help.h to refine the code test=develop
上级 b7782ea7
此差异已折叠。
...@@ -33,6 +33,9 @@ limitations under the License. */ ...@@ -33,6 +33,9 @@ limitations under the License. */
namespace paddle { namespace paddle {
namespace platform { namespace platform {
const int kEnableProfiler = 1;
const int kDisableProfiler = 2;
enum class ProfilerState { enum class ProfilerState {
kDisabled, // disabled state kDisabled, // disabled state
kCPU, // CPU profiling state kCPU, // CPU profiling state
...@@ -53,12 +56,46 @@ enum class TracerOption { ...@@ -53,12 +56,46 @@ enum class TracerOption {
kAllOpDetail, // print the detail profiling result of different op name kAllOpDetail, // print the detail profiling result of different op name
}; };
void Mark(const std::string& name); // Candidate keys to sort the profiling report
enum class EventSortingKey {
kDefault,
kCalls,
kTotal,
kMin,
kMax,
kAve,
kCPUTime,
kGPUTime
};
void PushMemEvent(uint64_t start_ns, uint64_t end_ns, size_t bytes, struct MemoryProfierReport {
const Place& place); size_t alloc_times{0};
void PopMemEvent(uint64_t start_ns, uint64_t end_ns, size_t bytes, size_t alloc_size{0};
const Place& place); size_t free_times{0};
size_t free_size{0};
};
// The information of each event given in the profiling report
struct EventItem {
std::string name;
int calls;
double total_time;
double max_time;
double ave_time;
double min_time;
double cpu_time;
double gpu_time;
float ratio;
};
struct OverHead {
bool print = false;
double total_time = 0.;
float compute_ratio = 0.0f;
float framework_ratio = 0.0f;
EventItem memcpy_item;
std::vector<EventItem> sub_memcpy_items;
};
struct MemEvenRecorder { struct MemEvenRecorder {
public: public:
...@@ -89,9 +126,6 @@ struct MemEvenRecorder { ...@@ -89,9 +126,6 @@ struct MemEvenRecorder {
DISABLE_COPY_AND_ASSIGN(MemEvenRecorder); DISABLE_COPY_AND_ASSIGN(MemEvenRecorder);
}; };
Event* PushEvent(const std::string& name);
void PopEvent(const std::string& name);
struct RecordEvent { struct RecordEvent {
RecordEvent(const std::string& name, RecordEvent(const std::string& name,
const RecordRole role = RecordRole::kOrdinary); const RecordRole role = RecordRole::kOrdinary);
...@@ -127,22 +161,6 @@ struct RecordBlock { ...@@ -127,22 +161,6 @@ struct RecordBlock {
uint64_t start_ns_; uint64_t start_ns_;
}; };
// Return the event list of all threads. Assumed the returned value calls
// event_lists, event_lists[i][j] represents the j-th Event of i-th thread.
std::vector<std::vector<Event>> GetAllEvents();
// Candidate keys to sort the profiling report
enum class EventSortingKey {
kDefault,
kCalls,
kTotal,
kMin,
kMax,
kAve,
kCPUTime,
kGPUTime
};
template <typename T> template <typename T>
struct EventList { struct EventList {
constexpr static size_t kMB = 1024 * 1024; constexpr static size_t kMB = 1024 * 1024;
...@@ -178,25 +196,27 @@ struct EventList { ...@@ -178,25 +196,27 @@ struct EventList {
std::forward_list<std::vector<T>> event_blocks; std::forward_list<std::vector<T>> event_blocks;
}; };
void Mark(const std::string& name);
void PushMemEvent(uint64_t start_ns, uint64_t end_ns, size_t bytes,
const Place& place, const std::string& annotation);
void PopMemEvent(uint64_t start_ns, uint64_t end_ns, size_t bytes,
const Place& place, const std::string& annotation);
Event* PushEvent(const std::string& name);
void PopEvent(const std::string& name);
// Return the event list of all threads. Assumed the returned value calls
// event_lists, event_lists[i][j] represents the j-th Event of i-th thread.
std::vector<std::vector<Event>> GetAllEvents();
// Enable the profiling function. // Enable the profiling function.
void EnableProfiler(ProfilerState state); void EnableProfiler(ProfilerState state);
// Clear the g_all_event_lists, which is total event lists of all threads. // Clear the g_all_event_lists, which is total event lists of all threads.
void ResetProfiler(); void ResetProfiler();
void DisableProfiler(EventSortingKey sorted_key, void DisableProfiler(EventSortingKey sorted_key,
const std::string& profile_path); const std::string& profile_path);
const int kEnableProfiler = 1;
const int kDisableProfiler = 2;
// Test if the profiler is currently enabled. // Test if the profiler is currently enabled.
bool IsProfileEnabled(); bool IsProfileEnabled();
// Whether the trainer should send profiling state to PS. // Whether the trainer should send profiling state to PS.
bool ShouldSendProfileState(); bool ShouldSendProfileState();
// Mark current process as PS by assigning a lister id.
void SetProfileListener();
int64_t ListenerId();
std::string OpName(const framework::VariableNameMap& name_map, std::string OpName(const framework::VariableNameMap& name_map,
const std::string& type_name); const std::string& type_name);
void SetTracerOption(TracerOption option); void SetTracerOption(TracerOption option);
...@@ -205,5 +225,9 @@ platform::TracerOption GetTracerOption(); ...@@ -205,5 +225,9 @@ platform::TracerOption GetTracerOption();
void DummyKernelAndEvent(); void DummyKernelAndEvent();
#endif #endif
// Mark current process as PS by assigning a lister id.
void SetProfileListener();
int64_t ListenerId();
} // namespace platform } // namespace platform
} // namespace paddle } // namespace paddle
此差异已折叠。
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册