提交 bab5f9a6 编写于 作者: H Hui Xiao 提交者: Facebook GitHub Bot

Add new stat rocksdb.table.open.prefetch.tail.read.bytes,...

Add new stat rocksdb.table.open.prefetch.tail.read.bytes, rocksdb.table.open.prefetch.tail.{miss|hit} (#11265)

Summary:
**Context/Summary:**
We are adding new stats to measure behavior of prefetched tail size and look up into this buffer

The stat collection is done in FilePrefetchBuffer but only for prefetched tail buffer during table open for now using FilePrefetchBuffer enum. It's cleaner than the alternative of implementing in upper-level call places of FilePrefetchBuffer for table open. It also has the benefit of extensible to other types of FilePrefetchBuffer if needed. See db bench for perf regression concern.

Pull Request resolved: https://github.com/facebook/rocksdb/pull/11265

Test Plan:
**- Piggyback on existing test**
**- rocksdb.table.open.prefetch.tail.miss is harder to UT so I manually set prefetch tail read bytes to be small and run db bench.**
```
./db_bench -db=/tmp/testdb -statistics=true -benchmarks="fillseq" -key_size=32 -value_size=512 -num=5000 -write_buffer_size=655 -target_file_size_base=655 -disable_auto_compactions=false -compression_type=none -bloom_bits=3  -use_direct_reads=true
```
```
rocksdb.table.open.prefetch.tail.read.bytes P50 : 4096.000000 P95 : 4096.000000 P99 : 4096.000000 P100 : 4096.000000 COUNT : 225 SUM : 921600
rocksdb.table.open.prefetch.tail.miss COUNT : 91
rocksdb.table.open.prefetch.tail.hit COUNT : 1034
```
**- No perf regression observed in db_bench**

SETUP command: create same db with ~900 files for pre-change/post-change.
```
./db_bench -db=/tmp/testdb -benchmarks="fillseq" -key_size=32 -value_size=512 -num=500000 -write_buffer_size=655360  -disable_auto_compactions=true -target_file_size_base=16777216 -compression_type=none
```
TEST command 60 runs or til convergence: as suggested by anand1976 and akankshamahajan15, vary `seek_nexts` and `async_io` in testing.
```
./db_bench -use_existing_db=true -db=/tmp/testdb -statistics=false -cache_size=0 -cache_index_and_filter_blocks=false -benchmarks=seekrandom[-X60] -num=50000 -seek_nexts={10, 500, 1000} -async_io={0|1} -use_direct_reads=true
```
async io = 0, direct io read = true

  | seek_nexts = 10, 30 runs | seek_nexts = 500, 12 runs | seek_nexts = 1000, 6 runs
-- | -- | -- | --
pre-post change | 4776 (± 28) ops/sec;   24.8 (± 0.1) MB/sec | 288 (± 1) ops/sec;   74.8 (± 0.4) MB/sec | 145 (± 4) ops/sec;   75.6 (± 2.2) MB/sec
post-change | 4790 (± 32) ops/sec;   24.9 (± 0.2) MB/sec | 288 (± 3) ops/sec;   74.7 (± 0.8) MB/sec | 143 (± 3) ops/sec;   74.5 (± 1.6) MB/sec

async io = 1, direct io read = true
  | seek_nexts = 10, 54 runs | seek_nexts = 500, 6 runs | seek_nexts = 1000, 4 runs
-- | -- | -- | --
pre-post change | 3350 (± 36) ops/sec;   17.4 (± 0.2) MB/sec | 264 (± 0) ops/sec;   68.7 (± 0.2) MB/sec | 138 (± 1) ops/sec;   71.8 (± 1.0) MB/sec
post-change | 3358 (± 27) ops/sec;   17.4 (± 0.1) MB/sec  | 263 (± 2) ops/sec;   68.3 (± 0.8) MB/sec | 139 (± 1) ops/sec;   72.6 (± 0.6) MB/sec

Reviewed By: ajkr

Differential Revision: D43781467

Pulled By: hx235

fbshipit-source-id: a706a18472a8edb2b952bac3af40eec803537f2a
上级 601efe3c
...@@ -12,6 +12,7 @@ ...@@ -12,6 +12,7 @@
### New Features ### New Features
* Add statistics rocksdb.secondary.cache.filter.hits, rocksdb.secondary.cache.index.hits, and rocksdb.secondary.cache.filter.hits * Add statistics rocksdb.secondary.cache.filter.hits, rocksdb.secondary.cache.index.hits, and rocksdb.secondary.cache.filter.hits
* Added a new PerfContext counter `internal_merge_point_lookup_count` which tracks the number of Merge operands applied while serving point lookup queries. * Added a new PerfContext counter `internal_merge_point_lookup_count` which tracks the number of Merge operands applied while serving point lookup queries.
* Add new statistics rocksdb.table.open.prefetch.tail.read.bytes, rocksdb.table.open.prefetch.tail.{miss|hit}
## 8.0.0 (02/19/2023) ## 8.0.0 (02/19/2023)
### Behavior changes ### Behavior changes
......
...@@ -162,6 +162,9 @@ Status FilePrefetchBuffer::Prefetch(const IOOptions& opts, ...@@ -162,6 +162,9 @@ Status FilePrefetchBuffer::Prefetch(const IOOptions& opts,
Status s = Read(opts, reader, rate_limiter_priority, read_len, chunk_len, Status s = Read(opts, reader, rate_limiter_priority, read_len, chunk_len,
rounddown_offset, curr_); rounddown_offset, curr_);
if (usage_ == FilePrefetchBufferUsage::kTableOpenPrefetchTail && s.ok()) {
RecordInHistogram(stats_, TABLE_OPEN_PREFETCH_TAIL_READ_BYTES, read_len);
}
return s; return s;
} }
...@@ -609,6 +612,22 @@ bool FilePrefetchBuffer::TryReadFromCache(const IOOptions& opts, ...@@ -609,6 +612,22 @@ bool FilePrefetchBuffer::TryReadFromCache(const IOOptions& opts,
Slice* result, Status* status, Slice* result, Status* status,
Env::IOPriority rate_limiter_priority, Env::IOPriority rate_limiter_priority,
bool for_compaction /* = false */) { bool for_compaction /* = false */) {
bool ret = TryReadFromCacheUntracked(opts, reader, offset, n, result, status,
rate_limiter_priority, for_compaction);
if (usage_ == FilePrefetchBufferUsage::kTableOpenPrefetchTail && enable_) {
if (ret) {
RecordTick(stats_, TABLE_OPEN_PREFETCH_TAIL_HIT);
} else {
RecordTick(stats_, TABLE_OPEN_PREFETCH_TAIL_MISS);
}
}
return ret;
}
bool FilePrefetchBuffer::TryReadFromCacheUntracked(
const IOOptions& opts, RandomAccessFileReader* reader, uint64_t offset,
size_t n, Slice* result, Status* status,
Env::IOPriority rate_limiter_priority, bool for_compaction /* = false */) {
if (track_min_offset_ && offset < min_offset_read_) { if (track_min_offset_ && offset < min_offset_read_) {
min_offset_read_ = static_cast<size_t>(offset); min_offset_read_ = static_cast<size_t>(offset);
} }
...@@ -666,6 +685,22 @@ bool FilePrefetchBuffer::TryReadFromCacheAsync( ...@@ -666,6 +685,22 @@ bool FilePrefetchBuffer::TryReadFromCacheAsync(
const IOOptions& opts, RandomAccessFileReader* reader, uint64_t offset, const IOOptions& opts, RandomAccessFileReader* reader, uint64_t offset,
size_t n, Slice* result, Status* status, size_t n, Slice* result, Status* status,
Env::IOPriority rate_limiter_priority) { Env::IOPriority rate_limiter_priority) {
bool ret = TryReadFromCacheAsyncUntracked(opts, reader, offset, n, result,
status, rate_limiter_priority);
if (usage_ == FilePrefetchBufferUsage::kTableOpenPrefetchTail && enable_) {
if (ret) {
RecordTick(stats_, TABLE_OPEN_PREFETCH_TAIL_HIT);
} else {
RecordTick(stats_, TABLE_OPEN_PREFETCH_TAIL_MISS);
}
}
return ret;
}
bool FilePrefetchBuffer::TryReadFromCacheAsyncUntracked(
const IOOptions& opts, RandomAccessFileReader* reader, uint64_t offset,
size_t n, Slice* result, Status* status,
Env::IOPriority rate_limiter_priority) {
if (track_min_offset_ && offset < min_offset_read_) { if (track_min_offset_ && offset < min_offset_read_) {
min_offset_read_ = static_cast<size_t>(offset); min_offset_read_ = static_cast<size_t>(offset);
} }
......
...@@ -54,6 +54,11 @@ struct BufferInfo { ...@@ -54,6 +54,11 @@ struct BufferInfo {
uint32_t pos_ = 0; uint32_t pos_ = 0;
}; };
enum class FilePrefetchBufferUsage {
kTableOpenPrefetchTail,
kUnknown,
};
// FilePrefetchBuffer is a smart buffer to store and read data from a file. // FilePrefetchBuffer is a smart buffer to store and read data from a file.
class FilePrefetchBuffer { class FilePrefetchBuffer {
public: public:
...@@ -78,13 +83,13 @@ class FilePrefetchBuffer { ...@@ -78,13 +83,13 @@ class FilePrefetchBuffer {
// and max_readahead_size are passed in. // and max_readahead_size are passed in.
// A user can construct a FilePrefetchBuffer without any arguments, but use // A user can construct a FilePrefetchBuffer without any arguments, but use
// `Prefetch` to load data into the buffer. // `Prefetch` to load data into the buffer.
FilePrefetchBuffer(size_t readahead_size = 0, size_t max_readahead_size = 0, FilePrefetchBuffer(
bool enable = true, bool track_min_offset = false, size_t readahead_size = 0, size_t max_readahead_size = 0,
bool implicit_auto_readahead = false, bool enable = true, bool track_min_offset = false,
uint64_t num_file_reads = 0, bool implicit_auto_readahead = false, uint64_t num_file_reads = 0,
uint64_t num_file_reads_for_auto_readahead = 0, uint64_t num_file_reads_for_auto_readahead = 0, FileSystem* fs = nullptr,
FileSystem* fs = nullptr, SystemClock* clock = nullptr, SystemClock* clock = nullptr, Statistics* stats = nullptr,
Statistics* stats = nullptr) FilePrefetchBufferUsage usage = FilePrefetchBufferUsage::kUnknown)
: curr_(0), : curr_(0),
readahead_size_(readahead_size), readahead_size_(readahead_size),
initial_auto_readahead_size_(readahead_size), initial_auto_readahead_size_(readahead_size),
...@@ -100,7 +105,8 @@ class FilePrefetchBuffer { ...@@ -100,7 +105,8 @@ class FilePrefetchBuffer {
explicit_prefetch_submitted_(false), explicit_prefetch_submitted_(false),
fs_(fs), fs_(fs),
clock_(clock), clock_(clock),
stats_(stats) { stats_(stats),
usage_(usage) {
assert((num_file_reads_ >= num_file_reads_for_auto_readahead_ + 1) || assert((num_file_reads_ >= num_file_reads_for_auto_readahead_ + 1) ||
(num_file_reads_ == 0)); (num_file_reads_ == 0));
// If ReadOptions.async_io is enabled, data is asynchronously filled in // If ReadOptions.async_io is enabled, data is asynchronously filled in
...@@ -403,6 +409,19 @@ class FilePrefetchBuffer { ...@@ -403,6 +409,19 @@ class FilePrefetchBuffer {
bool& copy_to_third_buffer, uint64_t& tmp_offset, bool& copy_to_third_buffer, uint64_t& tmp_offset,
size_t& tmp_length); size_t& tmp_length);
bool TryReadFromCacheUntracked(const IOOptions& opts,
RandomAccessFileReader* reader,
uint64_t offset, size_t n, Slice* result,
Status* s,
Env::IOPriority rate_limiter_priority,
bool for_compaction = false);
bool TryReadFromCacheAsyncUntracked(const IOOptions& opts,
RandomAccessFileReader* reader,
uint64_t offset, size_t n, Slice* result,
Status* status,
Env::IOPriority rate_limiter_priority);
std::vector<BufferInfo> bufs_; std::vector<BufferInfo> bufs_;
// curr_ represents the index for bufs_ indicating which buffer is being // curr_ represents the index for bufs_ indicating which buffer is being
// consumed currently. // consumed currently.
...@@ -442,5 +461,7 @@ class FilePrefetchBuffer { ...@@ -442,5 +461,7 @@ class FilePrefetchBuffer {
FileSystem* fs_; FileSystem* fs_;
SystemClock* clock_; SystemClock* clock_;
Statistics* stats_; Statistics* stats_;
FilePrefetchBufferUsage usage_;
}; };
} // namespace ROCKSDB_NAMESPACE } // namespace ROCKSDB_NAMESPACE
...@@ -125,6 +125,7 @@ TEST_P(PrefetchTest, Basic) { ...@@ -125,6 +125,7 @@ TEST_P(PrefetchTest, Basic) {
std::unique_ptr<Env> env(new CompositeEnvWrapper(env_, fs)); std::unique_ptr<Env> env(new CompositeEnvWrapper(env_, fs));
Options options; Options options;
SetGenericOptions(env.get(), use_direct_io, options); SetGenericOptions(env.get(), use_direct_io, options);
options.statistics = CreateDBStatistics();
const int kNumKeys = 1100; const int kNumKeys = 1100;
int buff_prefetch_count = 0; int buff_prefetch_count = 0;
...@@ -167,9 +168,25 @@ TEST_P(PrefetchTest, Basic) { ...@@ -167,9 +168,25 @@ TEST_P(PrefetchTest, Basic) {
Slice least(start_key.data(), start_key.size()); Slice least(start_key.data(), start_key.size());
Slice greatest(end_key.data(), end_key.size()); Slice greatest(end_key.data(), end_key.size());
HistogramData prev_table_open_prefetch_tail_read;
options.statistics->histogramData(TABLE_OPEN_PREFETCH_TAIL_READ_BYTES,
&prev_table_open_prefetch_tail_read);
const uint64_t prev_table_open_prefetch_tail_miss =
options.statistics->getTickerCount(TABLE_OPEN_PREFETCH_TAIL_MISS);
const uint64_t prev_table_open_prefetch_tail_hit =
options.statistics->getTickerCount(TABLE_OPEN_PREFETCH_TAIL_HIT);
// commenting out the line below causes the example to work correctly // commenting out the line below causes the example to work correctly
ASSERT_OK(db_->CompactRange(CompactRangeOptions(), &least, &greatest)); ASSERT_OK(db_->CompactRange(CompactRangeOptions(), &least, &greatest));
HistogramData cur_table_open_prefetch_tail_read;
options.statistics->histogramData(TABLE_OPEN_PREFETCH_TAIL_READ_BYTES,
&cur_table_open_prefetch_tail_read);
const uint64_t cur_table_open_prefetch_tail_miss =
options.statistics->getTickerCount(TABLE_OPEN_PREFETCH_TAIL_MISS);
const uint64_t cur_table_open_prefetch_tail_hit =
options.statistics->getTickerCount(TABLE_OPEN_PREFETCH_TAIL_HIT);
if (support_prefetch && !use_direct_io) { if (support_prefetch && !use_direct_io) {
// If underline file system supports prefetch, and directIO is not enabled // If underline file system supports prefetch, and directIO is not enabled
// make sure prefetch() is called and FilePrefetchBuffer is not used. // make sure prefetch() is called and FilePrefetchBuffer is not used.
...@@ -182,6 +199,12 @@ TEST_P(PrefetchTest, Basic) { ...@@ -182,6 +199,12 @@ TEST_P(PrefetchTest, Basic) {
// used. // used.
ASSERT_FALSE(fs->IsPrefetchCalled()); ASSERT_FALSE(fs->IsPrefetchCalled());
ASSERT_GT(buff_prefetch_count, 0); ASSERT_GT(buff_prefetch_count, 0);
ASSERT_GT(cur_table_open_prefetch_tail_read.count,
prev_table_open_prefetch_tail_read.count);
ASSERT_GT(cur_table_open_prefetch_tail_hit,
prev_table_open_prefetch_tail_hit);
ASSERT_GE(cur_table_open_prefetch_tail_miss,
prev_table_open_prefetch_tail_miss);
buff_prefetch_count = 0; buff_prefetch_count = 0;
} }
......
...@@ -420,6 +420,15 @@ enum Tickers : uint32_t { ...@@ -420,6 +420,15 @@ enum Tickers : uint32_t {
SECONDARY_CACHE_INDEX_HITS, SECONDARY_CACHE_INDEX_HITS,
SECONDARY_CACHE_DATA_HITS, SECONDARY_CACHE_DATA_HITS,
// Number of lookup into the prefetched tail (see
// `TABLE_OPEN_PREFETCH_TAIL_READ_BYTES`)
// that can't find its data for table open
TABLE_OPEN_PREFETCH_TAIL_MISS,
// Number of lookup into the prefetched tail (see
// `TABLE_OPEN_PREFETCH_TAIL_READ_BYTES`)
// that finds its data for table open
TABLE_OPEN_PREFETCH_TAIL_HIT,
TICKER_ENUM_MAX TICKER_ENUM_MAX
}; };
...@@ -533,6 +542,10 @@ enum Histograms : uint32_t { ...@@ -533,6 +542,10 @@ enum Histograms : uint32_t {
// Wait time for aborting async read in FilePrefetchBuffer destructor // Wait time for aborting async read in FilePrefetchBuffer destructor
ASYNC_PREFETCH_ABORT_MICROS, ASYNC_PREFETCH_ABORT_MICROS,
// Number of bytes read for RocksDB's prefetching contents (as opposed to file
// system's prefetch) from the end of SST table during block based table open
TABLE_OPEN_PREFETCH_TAIL_READ_BYTES,
HISTOGRAM_ENUM_MAX HISTOGRAM_ENUM_MAX
}; };
......
...@@ -5125,6 +5125,10 @@ class TickerTypeJni { ...@@ -5125,6 +5125,10 @@ class TickerTypeJni {
return -0x38; return -0x38;
case ROCKSDB_NAMESPACE::Tickers::SECONDARY_CACHE_DATA_HITS: case ROCKSDB_NAMESPACE::Tickers::SECONDARY_CACHE_DATA_HITS:
return -0x39; return -0x39;
case ROCKSDB_NAMESPACE::Tickers::TABLE_OPEN_PREFETCH_TAIL_MISS:
return -0x3A;
case ROCKSDB_NAMESPACE::Tickers::TABLE_OPEN_PREFETCH_TAIL_HIT:
return -0x3B;
case ROCKSDB_NAMESPACE::Tickers::TICKER_ENUM_MAX: case ROCKSDB_NAMESPACE::Tickers::TICKER_ENUM_MAX:
// 0x5F was the max value in the initial copy of tickers to Java. // 0x5F was the max value in the initial copy of tickers to Java.
// Since these values are exposed directly to Java clients, we keep // Since these values are exposed directly to Java clients, we keep
...@@ -5482,6 +5486,10 @@ class TickerTypeJni { ...@@ -5482,6 +5486,10 @@ class TickerTypeJni {
return ROCKSDB_NAMESPACE::Tickers::SECONDARY_CACHE_INDEX_HITS; return ROCKSDB_NAMESPACE::Tickers::SECONDARY_CACHE_INDEX_HITS;
case -0x39: case -0x39:
return ROCKSDB_NAMESPACE::Tickers::SECONDARY_CACHE_DATA_HITS; return ROCKSDB_NAMESPACE::Tickers::SECONDARY_CACHE_DATA_HITS;
case -0x3A:
return ROCKSDB_NAMESPACE::Tickers::TABLE_OPEN_PREFETCH_TAIL_MISS;
case -0x3B:
return ROCKSDB_NAMESPACE::Tickers::TABLE_OPEN_PREFETCH_TAIL_HIT;
case 0x5F: case 0x5F:
// 0x5F was the max value in the initial copy of tickers to Java. // 0x5F was the max value in the initial copy of tickers to Java.
// Since these values are exposed directly to Java clients, we keep // Since these values are exposed directly to Java clients, we keep
...@@ -5609,6 +5617,8 @@ class HistogramTypeJni { ...@@ -5609,6 +5617,8 @@ class HistogramTypeJni {
return 0x37; return 0x37;
case ASYNC_PREFETCH_ABORT_MICROS: case ASYNC_PREFETCH_ABORT_MICROS:
return 0x38; return 0x38;
case ROCKSDB_NAMESPACE::Histograms::TABLE_OPEN_PREFETCH_TAIL_READ_BYTES:
return 0x39;
case ROCKSDB_NAMESPACE::Histograms::HISTOGRAM_ENUM_MAX: case ROCKSDB_NAMESPACE::Histograms::HISTOGRAM_ENUM_MAX:
// 0x1F for backwards compatibility on current minor version. // 0x1F for backwards compatibility on current minor version.
return 0x1F; return 0x1F;
...@@ -5725,6 +5735,9 @@ class HistogramTypeJni { ...@@ -5725,6 +5735,9 @@ class HistogramTypeJni {
return ROCKSDB_NAMESPACE::Histograms::NUM_LEVEL_READ_PER_MULTIGET; return ROCKSDB_NAMESPACE::Histograms::NUM_LEVEL_READ_PER_MULTIGET;
case 0x38: case 0x38:
return ROCKSDB_NAMESPACE::Histograms::ASYNC_PREFETCH_ABORT_MICROS; return ROCKSDB_NAMESPACE::Histograms::ASYNC_PREFETCH_ABORT_MICROS;
case 0x39:
return ROCKSDB_NAMESPACE::Histograms::
TABLE_OPEN_PREFETCH_TAIL_READ_BYTES;
case 0x1F: case 0x1F:
// 0x1F for backwards compatibility on current minor version. // 0x1F for backwards compatibility on current minor version.
return ROCKSDB_NAMESPACE::Histograms::HISTOGRAM_ENUM_MAX; return ROCKSDB_NAMESPACE::Histograms::HISTOGRAM_ENUM_MAX;
......
...@@ -162,6 +162,13 @@ public enum HistogramType { ...@@ -162,6 +162,13 @@ public enum HistogramType {
ASYNC_READ_BYTES((byte) 0x33), ASYNC_READ_BYTES((byte) 0x33),
/**
* Number of bytes read for RocksDB's prefetching contents
* (as opposed to file system's prefetch)
* from the end of SST table during block based table open
*/
TABLE_OPEN_PREFETCH_TAIL_READ_BYTES((byte) 0x39),
// 0x1F for backwards compatibility on current minor version. // 0x1F for backwards compatibility on current minor version.
HISTOGRAM_ENUM_MAX((byte) 0x1F); HISTOGRAM_ENUM_MAX((byte) 0x1F);
......
...@@ -740,6 +740,20 @@ public enum TickerType { ...@@ -740,6 +740,20 @@ public enum TickerType {
*/ */
BLOB_DB_CACHE_BYTES_WRITE((byte) -0x34), BLOB_DB_CACHE_BYTES_WRITE((byte) -0x34),
/**
* Number of lookup into the prefetched tail (see
* `TABLE_OPEN_PREFETCH_TAIL_READ_BYTES`)
* that can't find its data for table open
*/
TABLE_OPEN_PREFETCH_TAIL_MISS((byte) -0x3A),
/**
* Number of lookup into the prefetched tail (see
* `TABLE_OPEN_PREFETCH_TAIL_READ_BYTES`)
* that finds its data for table open
*/
TABLE_OPEN_PREFETCH_TAIL_HIT((byte) -0x3B),
TICKER_ENUM_MAX((byte) 0x5F); TICKER_ENUM_MAX((byte) 0x5F);
private final byte value; private final byte value;
......
...@@ -216,7 +216,10 @@ const std::vector<std::pair<Tickers, std::string>> TickersNameMap = { ...@@ -216,7 +216,10 @@ const std::vector<std::pair<Tickers, std::string>> TickersNameMap = {
{ASYNC_READ_ERROR_COUNT, "rocksdb.async.read.error.count"}, {ASYNC_READ_ERROR_COUNT, "rocksdb.async.read.error.count"},
{SECONDARY_CACHE_FILTER_HITS, "rocksdb.secondary.cache.filter.hits"}, {SECONDARY_CACHE_FILTER_HITS, "rocksdb.secondary.cache.filter.hits"},
{SECONDARY_CACHE_INDEX_HITS, "rocksdb.secondary.cache.index.hits"}, {SECONDARY_CACHE_INDEX_HITS, "rocksdb.secondary.cache.index.hits"},
{SECONDARY_CACHE_DATA_HITS, "rocksdb.secondary.cache.data.hits"}}; {SECONDARY_CACHE_DATA_HITS, "rocksdb.secondary.cache.data.hits"},
{TABLE_OPEN_PREFETCH_TAIL_MISS, "rocksdb.table.open.prefetch.tail.miss"},
{TABLE_OPEN_PREFETCH_TAIL_HIT, "rocksdb.table.open.prefetch.tail.hit"},
};
const std::vector<std::pair<Histograms, std::string>> HistogramsNameMap = { const std::vector<std::pair<Histograms, std::string>> HistogramsNameMap = {
{DB_GET, "rocksdb.db.get.micros"}, {DB_GET, "rocksdb.db.get.micros"},
...@@ -272,6 +275,8 @@ const std::vector<std::pair<Histograms, std::string>> HistogramsNameMap = { ...@@ -272,6 +275,8 @@ const std::vector<std::pair<Histograms, std::string>> HistogramsNameMap = {
{MULTIGET_IO_BATCH_SIZE, "rocksdb.multiget.io.batch.size"}, {MULTIGET_IO_BATCH_SIZE, "rocksdb.multiget.io.batch.size"},
{NUM_LEVEL_READ_PER_MULTIGET, "rocksdb.num.level.read.per.multiget"}, {NUM_LEVEL_READ_PER_MULTIGET, "rocksdb.num.level.read.per.multiget"},
{ASYNC_PREFETCH_ABORT_MICROS, "rocksdb.async.prefetch.abort.micros"}, {ASYNC_PREFETCH_ABORT_MICROS, "rocksdb.async.prefetch.abort.micros"},
{TABLE_OPEN_PREFETCH_TAIL_READ_BYTES,
"rocksdb.table.open.prefetch.tail.read.bytes"},
}; };
std::shared_ptr<Statistics> CreateDBStatistics() { std::shared_ptr<Statistics> CreateDBStatistics() {
......
...@@ -591,7 +591,7 @@ Status BlockBasedTable::Open( ...@@ -591,7 +591,7 @@ Status BlockBasedTable::Open(
if (!ioptions.allow_mmap_reads) { if (!ioptions.allow_mmap_reads) {
s = PrefetchTail(ro, file.get(), file_size, force_direct_prefetch, s = PrefetchTail(ro, file.get(), file_size, force_direct_prefetch,
tail_prefetch_stats, prefetch_all, preload_all, tail_prefetch_stats, prefetch_all, preload_all,
&prefetch_buffer); &prefetch_buffer, ioptions.stats);
// Return error in prefetch path to users. // Return error in prefetch path to users.
if (!s.ok()) { if (!s.ok()) {
return s; return s;
...@@ -802,7 +802,7 @@ Status BlockBasedTable::PrefetchTail( ...@@ -802,7 +802,7 @@ Status BlockBasedTable::PrefetchTail(
const ReadOptions& ro, RandomAccessFileReader* file, uint64_t file_size, const ReadOptions& ro, RandomAccessFileReader* file, uint64_t file_size,
bool force_direct_prefetch, TailPrefetchStats* tail_prefetch_stats, bool force_direct_prefetch, TailPrefetchStats* tail_prefetch_stats,
const bool prefetch_all, const bool preload_all, const bool prefetch_all, const bool preload_all,
std::unique_ptr<FilePrefetchBuffer>* prefetch_buffer) { std::unique_ptr<FilePrefetchBuffer>* prefetch_buffer, Statistics* stats) {
size_t tail_prefetch_size = 0; size_t tail_prefetch_size = 0;
if (tail_prefetch_stats != nullptr) { if (tail_prefetch_stats != nullptr) {
// Multiple threads may get a 0 (no history) when running in parallel, // Multiple threads may get a 0 (no history) when running in parallel,
...@@ -842,9 +842,12 @@ Status BlockBasedTable::PrefetchTail( ...@@ -842,9 +842,12 @@ Status BlockBasedTable::PrefetchTail(
} }
// Use `FilePrefetchBuffer` // Use `FilePrefetchBuffer`
prefetch_buffer->reset( prefetch_buffer->reset(new FilePrefetchBuffer(
new FilePrefetchBuffer(0 /* readahead_size */, 0 /* max_readahead_size */, 0 /* readahead_size */, 0 /* max_readahead_size */, true /* enable */,
true /* enable */, true /* track_min_offset */)); true /* track_min_offset */, false /* implicit_auto_readahead */,
0 /* num_file_reads */, 0 /* num_file_reads_for_auto_readahead */,
nullptr /* fs */, nullptr /* clock */, stats,
FilePrefetchBufferUsage::kTableOpenPrefetchTail));
IOOptions opts; IOOptions opts;
Status s = file->PrepareIOOptions(ro, opts); Status s = file->PrepareIOOptions(ro, opts);
......
...@@ -444,7 +444,7 @@ class BlockBasedTable : public TableReader { ...@@ -444,7 +444,7 @@ class BlockBasedTable : public TableReader {
const ReadOptions& ro, RandomAccessFileReader* file, uint64_t file_size, const ReadOptions& ro, RandomAccessFileReader* file, uint64_t file_size,
bool force_direct_prefetch, TailPrefetchStats* tail_prefetch_stats, bool force_direct_prefetch, TailPrefetchStats* tail_prefetch_stats,
const bool prefetch_all, const bool preload_all, const bool prefetch_all, const bool preload_all,
std::unique_ptr<FilePrefetchBuffer>* prefetch_buffer); std::unique_ptr<FilePrefetchBuffer>* prefetch_buffer, Statistics* stats);
Status ReadMetaIndexBlock(const ReadOptions& ro, Status ReadMetaIndexBlock(const ReadOptions& ro,
FilePrefetchBuffer* prefetch_buffer, FilePrefetchBuffer* prefetch_buffer,
std::unique_ptr<Block>* metaindex_block, std::unique_ptr<Block>* metaindex_block,
......
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册