From 6e1383e1dda9e458bafc77423abf8e52ba2bd503 Mon Sep 17 00:00:00 2001 From: Smith-Cruise Date: Tue, 11 Nov 2025 18:49:26 +0000 Subject: [PATCH] =?UTF-8?q?=E4=BF=AE=E5=A4=8D=20parquet/orc=20profile=20?= =?UTF-8?q?=E7=BB=9F=E8=AE=A1=E9=94=99=E8=AF=AF?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- src/share/diagnosis/ob_profile_name_def.h | 6 +- src/share/diagnosis/ob_sql_monitor_statname.h | 18 +++-- .../basic/ob_lake_table_reader_profile.cpp | 77 +++++++++++++++---- .../basic/ob_lake_table_reader_profile.h | 74 +++++++++++++----- .../engine/table/ob_orc_table_row_iter.cpp | 41 ++++++---- src/sql/engine/table/ob_orc_table_row_iter.h | 2 +- .../table/ob_parquet_table_row_iter.cpp | 14 ++-- .../engine/table/ob_parquet_table_row_iter.h | 6 +- 8 files changed, 175 insertions(+), 63 deletions(-) diff --git a/src/share/diagnosis/ob_profile_name_def.h b/src/share/diagnosis/ob_profile_name_def.h index 231336f3c4..b10eb2fb03 100644 --- a/src/share/diagnosis/ob_profile_name_def.h +++ b/src/share/diagnosis/ob_profile_name_def.h @@ -154,10 +154,14 @@ OP_PROFILE_NAME_DEF(PHY_END) #ifdef OTHER_PROFILE_NAME_DEF OTHER_PROFILE_NAME_DEF(OTHER_PROFILE_NAME_BEGIN, "other profile name begin") /* 1000 */ OTHER_PROFILE_NAME_DEF(LAKE_TABLE_FILE_READER, "Lake Table File Reader") -OTHER_PROFILE_NAME_DEF(LAKE_TABLE_PREFETCH, "Lake Table Prefetch") +OTHER_PROFILE_NAME_DEF(LAKE_TABLE_PREFETCH, "Lake Table Prefetch") OTHER_PROFILE_NAME_DEF(LAKE_TABLE_MEM_CACHE, "Lake Table Memory Cache") OTHER_PROFILE_NAME_DEF(LAKE_TABLE_DISK_CACHE, "Lake Table Disk Cache") OTHER_PROFILE_NAME_DEF(LAKE_TABLE_STORAGE_IO, "Lake Table Storage IO") +OTHER_PROFILE_NAME_DEF(LAKE_TABLE_EAGER, "Lake Table Eager") +OTHER_PROFILE_NAME_DEF(LAKE_TABLE_NON_EAGER, "Lake Table Non Eager") +OTHER_PROFILE_NAME_DEF(LAKE_TABLE_PARQUET_FILE_READER, "Lake Table Parquet File Reader") +OTHER_PROFILE_NAME_DEF(LAKE_TABLE_ORC_FILE_READER, "Lake Table ORC File Reader") OTHER_PROFILE_NAME_DEF(OTHER_PROFILE_NAME_END, "other profile name end") #endif diff --git a/src/share/diagnosis/ob_sql_monitor_statname.h b/src/share/diagnosis/ob_sql_monitor_statname.h index bffd4beeb1..29f0d17946 100644 --- a/src/share/diagnosis/ob_sql_monitor_statname.h +++ b/src/share/diagnosis/ob_sql_monitor_statname.h @@ -129,29 +129,33 @@ SQL_MONITOR_STATNAME_DEF(LAKE_TABLE_PREBUFFER_COUNT, metric::Unit::INT, "lake ta SQL_MONITOR_STATNAME_DEF(LAKE_TABLE_MISS_COUNT, metric::Unit::INT, "lake table miss count", "lake table miss count", M_SUM, metric::Level::CRITICAL) SQL_MONITOR_STATNAME_DEF(LAKE_TABLE_HIT_COUNT, metric::Unit::INT, "lake table hit count", "lake table hit count", M_SUM, metric::Level::CRITICAL) SQL_MONITOR_STATNAME_DEF(LAKE_TABLE_ASYNC_IO_COUNT, metric::Unit::INT, "lake table async io count", "lake table async io count", M_SUM, metric::Level::CRITICAL) -SQL_MONITOR_STATNAME_DEF(LAKE_TABLE_ASYNC_IO_SIZE, metric::Unit::INT, "lake table async io size", "lake table async io size", M_SUM, metric::Level::CRITICAL) +SQL_MONITOR_STATNAME_DEF(LAKE_TABLE_ASYNC_IO_SIZE, metric::Unit::BYTES, "lake table async io size", "lake table async io size", M_SUM, metric::Level::CRITICAL) SQL_MONITOR_STATNAME_DEF(LAKE_TABLE_TOTAL_IO_WAIT_TIME, metric::Unit::TIME_NS, "lake table total io wait time", "lake table total io wait time", M_SUM, metric::Level::CRITICAL) SQL_MONITOR_STATNAME_DEF(LAKE_TABLE_MAX_IO_WAIT_TIME, metric::Unit::TIME_NS, "lake table max io wait time", "lake table max io wait time", E_MAX, metric::Level::CRITICAL) -SQL_MONITOR_STATNAME_DEF(LAKE_TABLE_TOTAL_READ_SIZE, metric::Unit::INT, "lake table total read size", "lake table total read size", M_SUM, metric::Level::CRITICAL) +SQL_MONITOR_STATNAME_DEF(LAKE_TABLE_TOTAL_READ_SIZE, metric::Unit::BYTES, "lake table total read size", "lake table total read size", M_SUM, metric::Level::CRITICAL) // Lake Table IO SQL_MONITOR_STATNAME_DEF(LAKE_TABLE_READ_COUNT, metric::Unit::INT, "lake table read count", "lake table read count", M_SUM, metric::Level::CRITICAL) SQL_MONITOR_STATNAME_DEF(LAKE_TABLE_SYNC_READ_COUNT, metric::Unit::INT, "lake table sync read count", "lake table sync read count", M_SUM, metric::Level::CRITICAL) SQL_MONITOR_STATNAME_DEF(LAKE_TABLE_ASYNC_READ_COUNT, metric::Unit::INT, "lake table async read count", "lake table async read count", M_SUM, metric::Level::CRITICAL) -SQL_MONITOR_STATNAME_DEF(LAKE_TABLE_READ_IO_SIZE, metric::Unit::INT, "lake table read io size", "lake table read io size", M_SUM, metric::Level::CRITICAL) +SQL_MONITOR_STATNAME_DEF(LAKE_TABLE_READ_IO_SIZE, metric::Unit::BYTES, "lake table read io size", "lake table read io size", M_SUM, metric::Level::CRITICAL) SQL_MONITOR_STATNAME_DEF(LAKE_TABLE_MEM_CACHE_HIT_COUNT, metric::Unit::INT, "lake table memory cache hit count", "lake table memory cache hit count", M_SUM, metric::Level::CRITICAL) SQL_MONITOR_STATNAME_DEF(LAKE_TABLE_MEM_CACHE_MISS_COUNT, metric::Unit::INT, "lake table memory cache miss count", "lake table memory cache miss count", M_SUM, metric::Level::CRITICAL) -SQL_MONITOR_STATNAME_DEF(LAKE_TABLE_MEM_CACHE_HIT_IO_SIZE, metric::Unit::INT, "lake table memory cache hit io size", "lake table memory cache hit io size", M_SUM, metric::Level::CRITICAL) -SQL_MONITOR_STATNAME_DEF(LAKE_TABLE_MEM_CACHE_MISS_IO_SIZE, metric::Unit::INT, "lake table memory cache miss io size", "lake table memory cache miss io size", M_SUM, metric::Level::CRITICAL) +SQL_MONITOR_STATNAME_DEF(LAKE_TABLE_MEM_CACHE_HIT_IO_SIZE, metric::Unit::BYTES, "lake table memory cache hit io size", "lake table memory cache hit io size", M_SUM, metric::Level::CRITICAL) +SQL_MONITOR_STATNAME_DEF(LAKE_TABLE_MEM_CACHE_MISS_IO_SIZE, metric::Unit::BYTES, "lake table memory cache miss io size", "lake table memory cache miss io size", M_SUM, metric::Level::CRITICAL) SQL_MONITOR_STATNAME_DEF(LAKE_TABLE_DISK_CACHE_HIT_COUNT, metric::Unit::INT, "lake table disk cache hit count", "lake table disk cache hit count", M_SUM, metric::Level::CRITICAL) SQL_MONITOR_STATNAME_DEF(LAKE_TABLE_DISK_CACHE_MISS_COUNT, metric::Unit::INT, "lake table disk cache miss count", "lake table disk cache miss count", M_SUM, metric::Level::CRITICAL) -SQL_MONITOR_STATNAME_DEF(LAKE_TABLE_DISK_CACHE_HIT_IO_SIZE, metric::Unit::INT, "lake table disk cache hit io size", "lake table disk cache hit io size", M_SUM, metric::Level::CRITICAL) -SQL_MONITOR_STATNAME_DEF(LAKE_TABLE_DISK_CACHE_MISS_IO_SIZE, metric::Unit::INT, "lake table disk cache miss io size", "lake table disk cache miss io size", M_SUM, metric::Level::CRITICAL) +SQL_MONITOR_STATNAME_DEF(LAKE_TABLE_DISK_CACHE_HIT_IO_SIZE, metric::Unit::BYTES, "lake table disk cache hit io size", "lake table disk cache hit io size", M_SUM, metric::Level::CRITICAL) +SQL_MONITOR_STATNAME_DEF(LAKE_TABLE_DISK_CACHE_MISS_IO_SIZE, metric::Unit::BYTES, "lake table disk cache miss io size", "lake table disk cache miss io size", M_SUM, metric::Level::CRITICAL) SQL_MONITOR_STATNAME_DEF(LAKE_TABLE_MAX_IO_TIME, metric::Unit::TIME_NS, "lake table max io time", "lake table total io wait time", E_MAX, metric::Level::CRITICAL) SQL_MONITOR_STATNAME_DEF(LAKE_TABLE_AVG_IO_TIME, metric::Unit::TIME_NS, "lake table avg io time", "lake table avg io time", M_AVG, metric::Level::CRITICAL) SQL_MONITOR_STATNAME_DEF(LAKE_TABLE_TOTAL_IO_TIME, metric::Unit::TIME_NS, "lake table total io time", "lake table total io time", M_SUM, metric::Level::CRITICAL) SQL_MONITOR_STATNAME_DEF(LAKE_TABLE_STORAGE_IO_COUNT, metric::Unit::INT, "lake table storage io count", "lake table storage io count", M_SUM, metric::Level::CRITICAL) +// Lake Table Reader +SQL_MONITOR_STATNAME_DEF(LAKE_TABLE_SELECTED_STRIPE_COUNT, metric::Unit::INT, "lake table selected stripe count", "lake table selected stripe count", M_SUM, metric::Level::CRITICAL) +SQL_MONITOR_STATNAME_DEF(LAKE_TABLE_SKIPPED_STRIPE_COUNT, metric::Unit::INT, "lake table skipped stripe count", "lake table skipped stripe count", M_SUM, metric::Level::CRITICAL) + //end SQL_MONITOR_STATNAME_DEF(MONITOR_STATNAME_END, metric::Unit::INVALID, "monitor end", "monitor stat name end", E_MIN | E_MAX, metric::Level::AD_HOC) #endif diff --git a/src/sql/engine/basic/ob_lake_table_reader_profile.cpp b/src/sql/engine/basic/ob_lake_table_reader_profile.cpp index 87d40b6896..27ddb53d97 100644 --- a/src/sql/engine/basic/ob_lake_table_reader_profile.cpp +++ b/src/sql/engine/basic/ob_lake_table_reader_profile.cpp @@ -11,10 +11,8 @@ */ #define USING_LOG_PREFIX SQL_ENG -#include "share/diagnosis/ob_runtime_profile.h" #include "ob_lake_table_reader_profile.h" - namespace oceanbase { using namespace common; @@ -26,21 +24,40 @@ void ObLakeTableIMetrics::set_label(const common::ObString &label) label_.assign_ptr(label.ptr(), label.length()); } -int ObLakeTableReaderMetrics::update_profile() +int ObLakeTableORCReaderMetrics::update_profile() { int ret = OB_SUCCESS; - ObProfileSwitcher swicher(ObProfileId::LAKE_TABLE_FILE_READER); + ObProfileSwitcher switcher(ObProfileId::LAKE_TABLE_ORC_FILE_READER); SET_METRIC_VAL(ObMetricId::LAKE_TABLE_SELECTED_FILE_COUNT, selected_file_count_); SET_METRIC_VAL(ObMetricId::LAKE_TABLE_SKIPPED_FILE_COUNT, skipped_file_count_); - SET_METRIC_VAL(ObMetricId::LAKE_TABLE_SELECTED_PAGE_COUNT, selected_page_count_); - SET_METRIC_VAL(ObMetricId::LAKE_TABLE_SKIPPED_PAGE_COUNT, skipped_page_count_); + SET_METRIC_VAL(ObMetricId::LAKE_TABLE_SELECTED_STRIPE_COUNT, selected_stripe_count_); + SET_METRIC_VAL(ObMetricId::LAKE_TABLE_SKIPPED_STRIPE_COUNT, skipped_stripe_count_); SET_METRIC_VAL(ObMetricId::LAKE_TABLE_SELECTED_ROW_GROUP_COUNT, selected_row_group_count_); SET_METRIC_VAL(ObMetricId::LAKE_TABLE_SKIPPED_ROW_GROUP_COUNT, skipped_row_group_count_); SET_METRIC_VAL(ObMetricId::LAKE_TABLE_READ_ROW_COUNT, read_rows_count_); return ret; } -void ObLakeTableReaderMetrics::dump_metrics() +void ObLakeTableORCReaderMetrics::dump_metrics() +{ + LOG_INFO("dump metrics", K_(label), KPC(this)); +} + +int ObLakeTableParquetReaderMetrics::update_profile() +{ + int ret = OB_SUCCESS; + ObProfileSwitcher switcher(ObProfileId::LAKE_TABLE_PARQUET_FILE_READER); + SET_METRIC_VAL(ObMetricId::LAKE_TABLE_SELECTED_FILE_COUNT, selected_file_count_); + SET_METRIC_VAL(ObMetricId::LAKE_TABLE_SKIPPED_FILE_COUNT, skipped_file_count_); + SET_METRIC_VAL(ObMetricId::LAKE_TABLE_SELECTED_ROW_GROUP_COUNT, selected_row_group_count_); + SET_METRIC_VAL(ObMetricId::LAKE_TABLE_SKIPPED_ROW_GROUP_COUNT, skipped_row_group_count_); + SET_METRIC_VAL(ObMetricId::LAKE_TABLE_SELECTED_PAGE_COUNT, selected_page_count_); + SET_METRIC_VAL(ObMetricId::LAKE_TABLE_SKIPPED_PAGE_COUNT, skipped_page_count_); + SET_METRIC_VAL(ObMetricId::LAKE_TABLE_READ_ROW_COUNT, read_rows_count_); + return ret; +} + +void ObLakeTableParquetReaderMetrics::dump_metrics() { LOG_INFO("dump metrics", K_(label), KPC(this)); } @@ -48,7 +65,22 @@ void ObLakeTableReaderMetrics::dump_metrics() int ObLakeTablePreBufferMetrics::update_profile() { int ret = OB_SUCCESS; - ObProfileSwitcher swicher(ObProfileId::LAKE_TABLE_PREFETCH); + ObProfileSwitcher switcher(ObProfileId::LAKE_TABLE_PREFETCH); + if (label_.case_compare_equal(PREBUFFER_METRICS_LABEL)) { + ret = update_specific_profile_(ObProfileId::LAKE_TABLE_NON_EAGER); + } else if (label_.case_compare_equal(EAGER_PREBUFFER_METRICS_LABEL)) { + ret = update_specific_profile_(ObProfileId::LAKE_TABLE_EAGER); + } else { + ret = OB_INVALID_ARGUMENT; + LOG_WARN("invalid metric label", K_(label), K(ret)); + } + return ret; +} + +int ObLakeTablePreBufferMetrics::update_specific_profile_(ObProfileId eager_intend) +{ + int ret = OB_SUCCESS; + ObProfileSwitcher switcher(eager_intend); SET_METRIC_VAL(ObMetricId::LAKE_TABLE_PREBUFFER_COUNT, prebuffer_count_); SET_METRIC_VAL(ObMetricId::LAKE_TABLE_MISS_COUNT, miss_count_); SET_METRIC_VAL(ObMetricId::LAKE_TABLE_HIT_COUNT, hit_count_); @@ -66,33 +98,52 @@ void ObLakeTablePreBufferMetrics::dump_metrics() } int ObLakeTableIOMetrics::update_profile() +{ + int ret = OB_SUCCESS; + if (label_.case_compare_equal(IO_METRICS_LABEL)) { + ret = update_specific_profile_(ObProfileId::LAKE_TABLE_NON_EAGER); + } else if (label_.case_compare_equal(EAGER_IO_METRICS_LABEL)) { + ret = update_specific_profile_(ObProfileId::LAKE_TABLE_EAGER); + } else { + ret = OB_INVALID_ARGUMENT; + LOG_WARN("invalid metric label", K_(label), K(ret)); + } + return ret; +} + +int ObLakeTableIOMetrics::update_specific_profile_(ObProfileId eager_intend) { int ret = OB_SUCCESS; { - ObProfileSwitcher swicher(ObProfileId::LAKE_TABLE_FILE_READER); + ObProfileSwitcher switcher(ObProfileId::LAKE_TABLE_FILE_READER); + ObProfileSwitcher eager_switcher(eager_intend); SET_METRIC_VAL(ObMetricId::LAKE_TABLE_READ_COUNT, access_count_); SET_METRIC_VAL(ObMetricId::LAKE_TABLE_SYNC_READ_COUNT, sync_read_count_); SET_METRIC_VAL(ObMetricId::LAKE_TABLE_ASYNC_READ_COUNT, async_read_count_); SET_METRIC_VAL(ObMetricId::LAKE_TABLE_READ_IO_SIZE, access_io_size_); } { - ObProfileSwitcher swicher(ObProfileId::LAKE_TABLE_MEM_CACHE); + ObProfileSwitcher switcher(ObProfileId::LAKE_TABLE_MEM_CACHE); + ObProfileSwitcher eager_switcher(eager_intend); SET_METRIC_VAL(ObMetricId::LAKE_TABLE_MEM_CACHE_HIT_COUNT, mem_cache_hit_count_); SET_METRIC_VAL(ObMetricId::LAKE_TABLE_MEM_CACHE_MISS_COUNT, mem_cache_miss_count_); SET_METRIC_VAL(ObMetricId::LAKE_TABLE_MEM_CACHE_HIT_IO_SIZE, mem_cache_hit_io_size_); SET_METRIC_VAL(ObMetricId::LAKE_TABLE_MEM_CACHE_MISS_IO_SIZE, mem_cache_miss_io_size_); } { - ObProfileSwitcher swicher(ObProfileId::LAKE_TABLE_DISK_CACHE); + ObProfileSwitcher switcher(ObProfileId::LAKE_TABLE_DISK_CACHE); + ObProfileSwitcher eager_switcher(eager_intend); SET_METRIC_VAL(ObMetricId::LAKE_TABLE_DISK_CACHE_HIT_COUNT, disk_cache_hit_count_); SET_METRIC_VAL(ObMetricId::LAKE_TABLE_DISK_CACHE_MISS_COUNT, disk_cache_miss_count_); SET_METRIC_VAL(ObMetricId::LAKE_TABLE_DISK_CACHE_HIT_IO_SIZE, disk_cache_hit_io_size_); SET_METRIC_VAL(ObMetricId::LAKE_TABLE_DISK_CACHE_MISS_IO_SIZE, disk_cache_miss_io_size_); } { - ObProfileSwitcher swicher(ObProfileId::LAKE_TABLE_STORAGE_IO); + ObProfileSwitcher switcher(ObProfileId::LAKE_TABLE_STORAGE_IO); + ObProfileSwitcher eager_switcher(eager_intend); SET_METRIC_VAL(ObMetricId::LAKE_TABLE_MAX_IO_TIME, max_io_time_us_); - SET_METRIC_VAL(ObMetricId::LAKE_TABLE_AVG_IO_TIME, (io_count_ > 0 ? total_io_time_us_ / io_count_ : 0)); + SET_METRIC_VAL(ObMetricId::LAKE_TABLE_AVG_IO_TIME, + (io_count_ > 0 ? total_io_time_us_ / io_count_ : 0)); SET_METRIC_VAL(ObMetricId::LAKE_TABLE_TOTAL_IO_TIME, total_io_time_us_); SET_METRIC_VAL(ObMetricId::LAKE_TABLE_STORAGE_IO_COUNT, io_count_); } diff --git a/src/sql/engine/basic/ob_lake_table_reader_profile.h b/src/sql/engine/basic/ob_lake_table_reader_profile.h index e4473797f4..012fc270a6 100644 --- a/src/sql/engine/basic/ob_lake_table_reader_profile.h +++ b/src/sql/engine/basic/ob_lake_table_reader_profile.h @@ -14,8 +14,9 @@ #define OCEANBASE_BASIC_OB_LAKE_TABLE_READER_PROFILE_H_ #include "lib/container/ob_se_array.h" -#include "lib/statistic_event/ob_stat_event.h" #include "lib/stat/ob_diagnostic_info_guard.h" +#include "lib/statistic_event/ob_stat_event.h" +#include "share/diagnosis/ob_runtime_profile.h" namespace oceanbase { @@ -24,6 +25,12 @@ using namespace lib; namespace sql { +static constexpr const char *READER_METRICS_LABEL = "READER_METRICS"; +static constexpr const char *IO_METRICS_LABEL = "IO_METRICS"; +static constexpr const char *EAGER_IO_METRICS_LABEL = "EAGER_IO_METRICS"; +static constexpr const char *PREBUFFER_METRICS_LABEL = "PREBUFFER_METRICS"; +static constexpr const char *EAGER_PREBUFFER_METRICS_LABEL = "EAGER_PREBUFFER_METRICS"; + struct ObLakeTableIMetrics { ObLakeTableIMetrics() : label_() @@ -36,27 +43,52 @@ struct ObLakeTableIMetrics common::ObString label_; }; -struct ObLakeTableReaderMetrics : public ObLakeTableIMetrics +struct ObLakeTableParquetReaderMetrics : public ObLakeTableIMetrics { public: - ObLakeTableReaderMetrics() : - selected_file_count_(0), skipped_file_count_(0), selected_page_count_(0), - skipped_page_count_(0), selected_row_group_count_(0), skipped_row_group_count_(0), - read_rows_count_() - {} + ObLakeTableParquetReaderMetrics() = default; + ~ObLakeTableParquetReaderMetrics() = default; + virtual int update_profile() override; + virtual void dump_metrics() override; + VIRTUAL_TO_STRING_KV(K_(selected_file_count), + K_(skipped_file_count), + K_(selected_row_group_count), + K_(skipped_row_group_count), + K_(selected_page_count), + K_(skipped_page_count), + K_(read_rows_count)); + + int64_t selected_file_count_ = 0; + int64_t skipped_file_count_ = 0; + int64_t selected_row_group_count_ = 0; + int64_t skipped_row_group_count_ = 0; + int64_t selected_page_count_ = 0; + int64_t skipped_page_count_ = 0; + int64_t read_rows_count_ = 0; +}; + +struct ObLakeTableORCReaderMetrics : public ObLakeTableIMetrics +{ +public: + ObLakeTableORCReaderMetrics() = default; + ~ObLakeTableORCReaderMetrics() = default; virtual int update_profile() override; virtual void dump_metrics() override; - VIRTUAL_TO_STRING_KV(K_(selected_file_count), K_(skipped_file_count), K_(selected_page_count), - K_(skipped_page_count), K_(selected_row_group_count), - K_(skipped_row_group_count), K_(read_rows_count)); - - int64_t selected_file_count_; - int64_t skipped_file_count_; - int64_t selected_page_count_; - int64_t skipped_page_count_; - int64_t selected_row_group_count_; - int64_t skipped_row_group_count_; - int64_t read_rows_count_; + VIRTUAL_TO_STRING_KV(K_(selected_file_count), + K_(skipped_file_count), + K_(selected_stripe_count), + K_(skipped_stripe_count), + K_(selected_row_group_count), + K_(skipped_row_group_count), + K_(read_rows_count)); + + int64_t selected_file_count_ = 0; + int64_t skipped_file_count_ = 0; + int64_t selected_stripe_count_ = 0; + int64_t skipped_stripe_count_ = 0; + int64_t selected_row_group_count_ = 0; + int64_t skipped_row_group_count_ = 0; + int64_t read_rows_count_ = 0; }; struct ObLakeTablePreBufferMetrics : public ObLakeTableIMetrics @@ -81,6 +113,9 @@ public: int64_t total_io_wait_time_us_; // total waiting time for async read IO int64_t max_io_wait_time_us_; // max waiting time for async read IO int64_t total_read_size_; // total size read from the prebuffer + +private: + int update_specific_profile_(ObProfileId id); }; struct ObLakeTableIOMetrics : public ObLakeTableIMetrics @@ -163,6 +198,9 @@ public: int64_t disk_cache_miss_io_size_; // total IO size that miss the disk cache int64_t max_io_time_us_; // maximum waiting time for read IO int64_t total_io_time_us_; // total waiting time for read IO + +private: + int update_specific_profile_(ObProfileId id); }; class ObLakeTableReaderProfile diff --git a/src/sql/engine/table/ob_orc_table_row_iter.cpp b/src/sql/engine/table/ob_orc_table_row_iter.cpp index 0429c40a78..0a8fa6e7c8 100644 --- a/src/sql/engine/table/ob_orc_table_row_iter.cpp +++ b/src/sql/engine/table/ob_orc_table_row_iter.cpp @@ -293,9 +293,9 @@ int ObOrcTableRowIterator::init(const storage::ObTableScanParam *scan_param) scan_param->ext_tbl_filter_pd_level_, scan_param->column_ids_, eval_ctx)); - OZ (reader_profile_.register_metrics(&reader_metrics_, "READER_METRICS")); - OZ (data_access_driver_.register_io_metrics(reader_profile_, "IO_METRICS")); - OZ (file_prebuffer_.register_metrics(reader_profile_, "PREBUFFER_METRICS")); + OZ (reader_profile_.register_metrics(&reader_metrics_, READER_METRICS_LABEL)); + OZ (data_access_driver_.register_io_metrics(reader_profile_, IO_METRICS_LABEL)); + OZ (file_prebuffer_.register_metrics(reader_profile_, PREBUFFER_METRICS_LABEL)); if (OB_SUCC(ret) && OB_ISNULL(bit_vector_cache_)) { void *mem = nullptr; @@ -579,9 +579,19 @@ int ObOrcTableRowIterator::select_row_ranges(const int64_t stripe_idx) build_whole_stripe_range))) { LOG_WARN("fail to select row ranges by pushdown filters", K(ret)); } - } else if (need_pre_buffer_index_ && OB_FAIL(pre_buffer(true /* row index */))) { - // pre buffer row index for lazy seek - LOG_WARN("fail to pre buffer row index", K(ret)); + } else { + // 在没有 pushdown_filter 的情况下,需要更新 selected 指标 + reader_metrics_.selected_stripe_count_++; + const int64_t row_index_stride = reader_->getRowIndexStride(); + if (row_index_stride > 0) { + // if row_index_stripe == 0 means row index is disabled + reader_metrics_.selected_row_group_count_ += (stripe_num_rows + row_index_stride - 1) / row_index_stride; + } + + if (need_pre_buffer_index_ && OB_FAIL(pre_buffer(true /* row index */))) { + // pre buffer row index for lazy seek + LOG_WARN("fail to pre buffer row index", K(ret)); + } } if (OB_SUCC(ret) && build_whole_stripe_range) { SelectedRowRange whole_stripe_range; @@ -708,14 +718,14 @@ int ObOrcTableRowIterator::select_row_ranges_by_pushdown_filter( if (OB_SUCC(ret)) { // update reader metrics if (build_whole_stripe_range) { // no statistic or read orc exception - ++reader_metrics_.selected_row_group_count_; - reader_metrics_.selected_page_count_ += groups_in_stripe; + ++reader_metrics_.selected_stripe_count_; + reader_metrics_.selected_row_group_count_ += groups_in_stripe; } else if (is_stripe_filtered) { - ++reader_metrics_.skipped_row_group_count_; + ++reader_metrics_.skipped_stripe_count_; } else { - ++reader_metrics_.selected_row_group_count_; - reader_metrics_.selected_page_count_ += (groups_in_stripe - groups_filtered); - reader_metrics_.skipped_page_count_ += groups_filtered; + ++reader_metrics_.selected_stripe_count_; + reader_metrics_.selected_row_group_count_ += (groups_in_stripe - groups_filtered); + reader_metrics_.skipped_row_group_count_ += groups_filtered; } } return ret; @@ -1163,12 +1173,17 @@ int ObOrcTableRowIterator::filter_file(const int64_t task_idx) // no column statistics, do nothing } else if (OB_FAIL(filter_by_statistic(PushdownLevel::FILE, orc_col_stat.get(), file_skipped))) { LOG_WARN("fail to apply skipping index filter", K(ret)); - } else if (file_skipped) { + } + } + + if (OB_SUCC(ret)) { + if (file_skipped) { ++reader_metrics_.skipped_file_count_; } else { ++reader_metrics_.selected_file_count_; } } + if (OB_SUCC(ret) && !file_skipped) { // resolve stripe index by task id int64_t start_lineno = scan_param_->scan_tasks_.at(task_idx)->first_lineno_; diff --git a/src/sql/engine/table/ob_orc_table_row_iter.h b/src/sql/engine/table/ob_orc_table_row_iter.h index 82d454829d..a2fb9946c7 100644 --- a/src/sql/engine/table/ob_orc_table_row_iter.h +++ b/src/sql/engine/table/ob_orc_table_row_iter.h @@ -519,7 +519,7 @@ private: ObFilePreBuffer file_prebuffer_; common::ObArenaAllocator temp_allocator_; // used for lob filter pushdown common::ObArrayWrap column_range_slices_; - ObLakeTableReaderMetrics reader_metrics_; + ObLakeTableORCReaderMetrics reader_metrics_; sql::ColumnIndexType column_index_type_; bool is_col_name_case_sensitive_; }; diff --git a/src/sql/engine/table/ob_parquet_table_row_iter.cpp b/src/sql/engine/table/ob_parquet_table_row_iter.cpp index ce3fa330b3..40c70270b2 100644 --- a/src/sql/engine/table/ob_parquet_table_row_iter.cpp +++ b/src/sql/engine/table/ob_parquet_table_row_iter.cpp @@ -73,11 +73,11 @@ int ObParquetTableRowIterator::init(const storage::ObTableScanParam *scan_param) scan_param->ext_tbl_filter_pd_level_, scan_param->column_ids_, eval_ctx)); - OZ (reader_profile_.register_metrics(&reader_metrics_, "READER_METRICS")); - OZ (data_access_driver_.register_io_metrics(reader_profile_, "IO_METRICS")); - OZ (file_prebuffer_.register_metrics(reader_profile_, "PREBUFFER_METRICS")); - OZ (eager_data_access_driver_.register_io_metrics(reader_profile_, "EAGER_IO_METRICS")); - OZ (eager_file_prebuffer_.register_metrics(reader_profile_, "EAGER_PREBUFFER_METRICS")); + OZ(reader_profile_.register_metrics(&reader_metrics_, READER_METRICS_LABEL)); + OZ(data_access_driver_.register_io_metrics(reader_profile_, IO_METRICS_LABEL)); + OZ(file_prebuffer_.register_metrics(reader_profile_, PREBUFFER_METRICS_LABEL)); + OZ(eager_data_access_driver_.register_io_metrics(reader_profile_, EAGER_IO_METRICS_LABEL)); + OZ(eager_file_prebuffer_.register_metrics(reader_profile_, EAGER_PREBUFFER_METRICS_LABEL)); if (OB_SUCC(ret)) { if (!scan_param->ext_enable_late_materialization_ @@ -633,8 +633,8 @@ int ObParquetTableRowIterator::create_file_reader(const ObString& data_path, LOG_WARN("failed to open eager file", K(ret)); } else { file_reader = parquet::ParquetFileReader::Open(cur_file, read_props_); - eager_file_reader_ = parquet::ParquetFileReader::Open(eager_file, read_props_); - if (!file_reader || !eager_file_reader_) { + eager_file_reader = parquet::ParquetFileReader::Open(eager_file, read_props_); + if (!file_reader || !eager_file_reader) { ret = OB_ERR_UNEXPECTED; LOG_WARN("create row reader failed", K(ret)); } else { diff --git a/src/sql/engine/table/ob_parquet_table_row_iter.h b/src/sql/engine/table/ob_parquet_table_row_iter.h index 77927e36a5..6592e33320 100644 --- a/src/sql/engine/table/ob_parquet_table_row_iter.h +++ b/src/sql/engine/table/ob_parquet_table_row_iter.h @@ -57,7 +57,7 @@ struct ReadPages int64_t &cur_eager_id, common::ObIArray &read_row_counts, common::ObIArray> *> &page_skip_ranges, - ObLakeTableReaderMetrics &reader_metrics) + ObLakeTableParquetReaderMetrics &reader_metrics) : cur_col_id_(cur_col_id), cur_eager_id_(cur_eager_id), read_row_counts_(read_row_counts), @@ -90,7 +90,7 @@ struct ReadPages int64_t &cur_eager_id_; common::ObIArray &read_row_counts_; common::ObIArray> *> &page_skip_ranges_; - ObLakeTableReaderMetrics &reader_metrics_; + ObLakeTableParquetReaderMetrics &reader_metrics_; }; @@ -494,7 +494,7 @@ private: common::ObFixedArray> *, ObIAllocator> page_skip_ranges_; ParquetStatInfo stat_; FilterCalcMode mode_; - ObLakeTableReaderMetrics reader_metrics_; + ObLakeTableParquetReaderMetrics reader_metrics_; sql::ColumnIndexType column_index_type_; bool is_col_name_case_sensitive_; }; -- GitLab