From f0d571da7b9725dc0d45c15ba5fc18fca8b02b37 Mon Sep 17 00:00:00 2001 From: yaopenghui Date: Tue, 27 Aug 2019 12:35:50 +0800 Subject: [PATCH] add cost monitor --- .../custom_trainer/feed/executor/executor.h | 4 ++ .../feed/monitor/auc_monitor.cc | 2 +- .../feed/monitor/cost_monitor.cc | 49 +++++++++++++++++ .../feed/monitor/cost_monitor.h | 53 +++++++++++++++++++ 4 files changed, 107 insertions(+), 1 deletion(-) create mode 100644 paddle/fluid/train/custom_trainer/feed/monitor/cost_monitor.cc create mode 100644 paddle/fluid/train/custom_trainer/feed/monitor/cost_monitor.h diff --git a/paddle/fluid/train/custom_trainer/feed/executor/executor.h b/paddle/fluid/train/custom_trainer/feed/executor/executor.h index f51f3f11..0c8237b8 100644 --- a/paddle/fluid/train/custom_trainer/feed/executor/executor.h +++ b/paddle/fluid/train/custom_trainer/feed/executor/executor.h @@ -37,6 +37,10 @@ public: virtual bool is_dump_all_model() { return false; } + // cost time millisecond + virtual uint64_t epoch_cost() const { + return 0; + } protected: ::paddle::framework::Scope _scope; }; diff --git a/paddle/fluid/train/custom_trainer/feed/monitor/auc_monitor.cc b/paddle/fluid/train/custom_trainer/feed/monitor/auc_monitor.cc index cb472fe7..9f20e3b5 100644 --- a/paddle/fluid/train/custom_trainer/feed/monitor/auc_monitor.cc +++ b/paddle/fluid/train/custom_trainer/feed/monitor/auc_monitor.cc @@ -44,6 +44,7 @@ void AucMonitor::add_data(int epoch_id, const Executor* executor, SampleInstance bool AucMonitor::need_compute_result(int epoch_id, EpochAccessor* accessor) { CHECK(accessor != nullptr); uint64_t epoch_time = accessor->epoch_timestamp(epoch_id); + CHECK(_compute_interval != 0); if (epoch_time % _compute_interval != 0) { return false; } @@ -68,7 +69,6 @@ void AucMonitor::compute_result() { _auc = area / (fp * tp); _mae = Monitor::_context_ptr->environment->all_reduce_ele(_local_abserr) / (fp + tp); _rmse = sqrt(Monitor::_context_ptr->environment->all_reduce_ele(_local_sqrerr) / (fp + tp)); - _rmse = sqrt(_rmse / (fp + tp)); _actual_ctr = tp / (fp + tp); _predicted_ctr = Monitor::_context_ptr->environment->all_reduce_ele(_local_pred) / (fp + tp); _size = fp + tp; diff --git a/paddle/fluid/train/custom_trainer/feed/monitor/cost_monitor.cc b/paddle/fluid/train/custom_trainer/feed/monitor/cost_monitor.cc new file mode 100644 index 00000000..f4b48f28 --- /dev/null +++ b/paddle/fluid/train/custom_trainer/feed/monitor/cost_monitor.cc @@ -0,0 +1,49 @@ +#include "paddle/fluid/train/custom_trainer/feed/monitor/cost_monitor.h" + +namespace paddle { +namespace custom_trainer { +namespace feed { + +int CostMonitor::initialize(const YAML::Node& config, std::shared_ptr context_ptr) { + Monitor::initialize(config, context_ptr); + _compute_interval = 3600; + if (config["compute_interval"]) { + uint32_t interval = config["compute_interval"].as(); + if (interval != 3600 || interval != 86400) { + LOG(FATAL) << " AucMonitor config compute_interval just support hour: 3600 or day: 86400. "; + return -1; + } + _compute_interval = interval; + } +} + +void CostMonitor::add_data(int epoch_id, + const Executor* executor, + SampleInstance* instance, + size_t num) { + CHECK(executor != nullptr); + _total_time_ms += executor->epoch_cost(); + _total_cnt ++; +} + +bool CostMonitor::need_compute_result(int epoch_id, EpochAccessor* accessor) { + CHECK(accessor != nullptr); + uint64_t epoch_time = accessor->epoch_timestamp(epoch_id); + CHECK(_compute_interval != 0); + if (epoch_time % _compute_interval != 0) { + return false; + } + return true; +} + +std::string CostMonitor::format_result() { + char buf[1024]; + snprintf(buf, 1024 * sizeof(char), "%s: Cost Time=%lu", + Monitor::_name.c_str(), + _avg_time_ms); + return std::string(buf); +} + +} // namespace feed +} // namespace custom_trainer +} // namespace paddle diff --git a/paddle/fluid/train/custom_trainer/feed/monitor/cost_monitor.h b/paddle/fluid/train/custom_trainer/feed/monitor/cost_monitor.h new file mode 100644 index 00000000..56a0ecfb --- /dev/null +++ b/paddle/fluid/train/custom_trainer/feed/monitor/cost_monitor.h @@ -0,0 +1,53 @@ +#pragma once +#include +#include //std::lround +#include "paddle/fluid/train/custom_trainer/feed/monitor/monitor.h" + +namespace paddle { +namespace custom_trainer { +namespace feed { + +// cost time profile +class CostMonitor : public Monitor { +public: + CostMonitor() : _total_time_ms(0), _total_cnt(0), _avg_time_ms(0) {} + virtual ~CostMonitor() {} + + virtual int initialize(const YAML::Node& config, + std::shared_ptr context_ptr) override; + + //添加一项记录,统计内容Monitor自行从Executor按需获取 + virtual void add_data(int epoch_id, + const Executor* executor, + SampleInstance* instance, + size_t num); + + //是否开始结果统计 + virtual bool need_compute_result(int epoch_id, EpochAccessor* accessor); + //统计当前结果 + virtual void compute_result() { + CHECK(_total_cnt != 0); + _avg_time_ms = _total_time_ms / _total_cnt; + } + //基于现有结果,输出格式化的统计信息 + virtual std::string format_result(); + + virtual void reset() { + _total_time_ms = 0; + _total_cnt = 0; + _avg_time_ms = 0; + } + +protected: + std::string _name; + +private: + uint64_t _total_time_ms; + uint64_t _total_cnt; + uint64_t _avg_time_ms; + uint32_t _compute_interval; +}; + +} // namespace feed +} // namespace custom_trainer +} // namespace paddle -- GitLab