未验证 提交 7578fcba 编写于 作者: W wangchaochaohu 提交者: GitHub

Profile code refine (#22800)

* add profiler_help.h to refine the code test=develop
上级 b7782ea7
此差异已折叠。
......@@ -33,6 +33,9 @@ limitations under the License. */
namespace paddle {
namespace platform {
const int kEnableProfiler = 1;
const int kDisableProfiler = 2;
enum class ProfilerState {
kDisabled, // disabled state
kCPU, // CPU profiling state
......@@ -53,12 +56,46 @@ enum class TracerOption {
kAllOpDetail, // print the detail profiling result of different op name
};
void Mark(const std::string& name);
// Candidate keys to sort the profiling report
enum class EventSortingKey {
kDefault,
kCalls,
kTotal,
kMin,
kMax,
kAve,
kCPUTime,
kGPUTime
};
void PushMemEvent(uint64_t start_ns, uint64_t end_ns, size_t bytes,
const Place& place);
void PopMemEvent(uint64_t start_ns, uint64_t end_ns, size_t bytes,
const Place& place);
struct MemoryProfierReport {
size_t alloc_times{0};
size_t alloc_size{0};
size_t free_times{0};
size_t free_size{0};
};
// The information of each event given in the profiling report
struct EventItem {
std::string name;
int calls;
double total_time;
double max_time;
double ave_time;
double min_time;
double cpu_time;
double gpu_time;
float ratio;
};
struct OverHead {
bool print = false;
double total_time = 0.;
float compute_ratio = 0.0f;
float framework_ratio = 0.0f;
EventItem memcpy_item;
std::vector<EventItem> sub_memcpy_items;
};
struct MemEvenRecorder {
public:
......@@ -89,9 +126,6 @@ struct MemEvenRecorder {
DISABLE_COPY_AND_ASSIGN(MemEvenRecorder);
};
Event* PushEvent(const std::string& name);
void PopEvent(const std::string& name);
struct RecordEvent {
RecordEvent(const std::string& name,
const RecordRole role = RecordRole::kOrdinary);
......@@ -127,22 +161,6 @@ struct RecordBlock {
uint64_t start_ns_;
};
// Return the event list of all threads. Assumed the returned value calls
// event_lists, event_lists[i][j] represents the j-th Event of i-th thread.
std::vector<std::vector<Event>> GetAllEvents();
// Candidate keys to sort the profiling report
enum class EventSortingKey {
kDefault,
kCalls,
kTotal,
kMin,
kMax,
kAve,
kCPUTime,
kGPUTime
};
template <typename T>
struct EventList {
constexpr static size_t kMB = 1024 * 1024;
......@@ -178,25 +196,27 @@ struct EventList {
std::forward_list<std::vector<T>> event_blocks;
};
void Mark(const std::string& name);
void PushMemEvent(uint64_t start_ns, uint64_t end_ns, size_t bytes,
const Place& place, const std::string& annotation);
void PopMemEvent(uint64_t start_ns, uint64_t end_ns, size_t bytes,
const Place& place, const std::string& annotation);
Event* PushEvent(const std::string& name);
void PopEvent(const std::string& name);
// Return the event list of all threads. Assumed the returned value calls
// event_lists, event_lists[i][j] represents the j-th Event of i-th thread.
std::vector<std::vector<Event>> GetAllEvents();
// Enable the profiling function.
void EnableProfiler(ProfilerState state);
// Clear the g_all_event_lists, which is total event lists of all threads.
void ResetProfiler();
void DisableProfiler(EventSortingKey sorted_key,
const std::string& profile_path);
const int kEnableProfiler = 1;
const int kDisableProfiler = 2;
// Test if the profiler is currently enabled.
bool IsProfileEnabled();
// Whether the trainer should send profiling state to PS.
bool ShouldSendProfileState();
// Mark current process as PS by assigning a lister id.
void SetProfileListener();
int64_t ListenerId();
std::string OpName(const framework::VariableNameMap& name_map,
const std::string& type_name);
void SetTracerOption(TracerOption option);
......@@ -205,5 +225,9 @@ platform::TracerOption GetTracerOption();
void DummyKernelAndEvent();
#endif
// Mark current process as PS by assigning a lister id.
void SetProfileListener();
int64_t ListenerId();
} // namespace platform
} // namespace paddle
此差异已折叠。
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册