提交 f6f1533c 编写于 作者: L Lei Jin

make internal stats independent of statistics

Summary:
also make it aware of column family
output from db_bench

```
** Compaction Stats [default] **
Level Files Size(MB) Score Read(GB)  Rn(GB) Rnp1(GB) Write(GB) Wnew(GB) RW-Amp W-Amp Rd(MB/s) Wr(MB/s)  Rn(cnt) Rnp1(cnt) Wnp1(cnt) Wnew(cnt)  Comp(sec) Comp(cnt) Avg(sec) Stall(sec) Stall(cnt) Avg(ms)
------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------
  L0    14      956   0.9      0.0     0.0      0.0       2.7      2.7    0.0   0.0      0.0    111.6        0         0         0         0         24        40    0.612      75.20     492387    0.15
  L1    21     2001   2.0      5.7     2.0      3.7       5.3      1.6    5.4   2.6     71.2     65.7       31        43        55        12         82         2   41.242      43.72      41183    1.06
  L2   217    18974   1.9     16.5     2.0     14.4      15.1      0.7   15.6   7.4     70.1     64.3       17       182       185         3        241        16   15.052       0.00          0    0.00
  L3  1641   188245   1.8      9.1     1.1      8.0       8.5      0.5   15.4   7.4     61.3     57.2        9        75        76         1        152         9   16.887       0.00          0    0.00
  L4  4447   449025   0.4     13.4     4.8      8.6       9.1      0.5    4.7   1.9     77.8     52.7       38        79       100        21        176        38    4.639       0.00          0    0.00
 Sum  6340   659201   0.0     44.7    10.0     34.7      40.6      6.0   32.0  15.2     67.7     61.6       95       379       416        37        676       105    6.439     118.91     533570    0.22
 Int     0        0   0.0      1.2     0.4      0.8       1.3      0.5    5.2   2.7     59.1     65.6        3         7         9         2         20        10    2.003       0.00          0    0.00
Stalls(secs): 75.197 level0_slowdown, 0.000 level0_numfiles, 0.000 memtable_compaction, 43.717 leveln_slowdown
Stalls(count): 492387 level0_slowdown, 0 level0_numfiles, 0 memtable_compaction, 41183 leveln_slowdown

** DB Stats **
Uptime(secs): 202.1 total, 13.5 interval
Cumulative writes: 6291456 writes, 6291456 batches, 1.0 writes per batch, 4.90 ingest GB
Cumulative WAL: 6291456 writes, 6291456 syncs, 1.00 writes per sync, 4.90 GB written
Interval writes: 1048576 writes, 1048576 batches, 1.0 writes per batch, 836.0 ingest MB
Interval WAL: 1048576 writes, 1048576 syncs, 1.00 writes per sync, 0.82 MB written

Test Plan: ran it

Reviewers: sdong, yhchiang, igor

Reviewed By: igor

Subscribers: leveldb

Differential Revision: https://reviews.facebook.net/D19917
上级 50c2dcb7
......@@ -222,8 +222,8 @@ ColumnFamilyData::ColumnFamilyData(const std::string& dbname, uint32_t id,
// if dummy_versions is nullptr, then this is a dummy column family.
if (dummy_versions != nullptr) {
internal_stats_.reset(new InternalStats(
options_.num_levels, db_options->env, db_options->statistics.get()));
internal_stats_.reset(
new InternalStats(options_.num_levels, db_options->env));
table_cache_.reset(new TableCache(&options_, storage_options, table_cache));
if (options_.compaction_style == kCompactionStyleUniversal) {
compaction_picker_.reset(
......
......@@ -510,9 +510,23 @@ void DBImpl::MaybeDumpStats() {
// atomically. We could see more than one dump during one dump
// period in rare cases.
last_stats_dump_time_microsec_ = now_micros;
DBPropertyType cf_property_type = GetPropertyType("rocksdb.cfstats");
std::string stats;
GetProperty("rocksdb.stats", &stats);
{
MutexLock l(&mutex_);
for (auto cfd : *versions_->GetColumnFamilySet()) {
cfd->internal_stats()->GetProperty(cf_property_type, "rocksdb.cfstats",
&stats, cfd);
}
DBPropertyType db_property_type = GetPropertyType("rocksdb.dbstats");
default_cf_internal_stats_->GetProperty(
db_property_type, "rocksdb.dbstats", &stats,
default_cf_handle_->cfd());
}
Log(options_.info_log, "------- DUMPING STATS -------");
Log(options_.info_log, "%s", stats.c_str());
PrintStatistics();
}
}
......@@ -1157,6 +1171,7 @@ Status DBImpl::Recover(
SequenceNumber max_sequence(0);
default_cf_handle_ = new ColumnFamilyHandleImpl(
versions_->GetColumnFamilySet()->GetDefault(), this, &mutex_);
default_cf_internal_stats_ = default_cf_handle_->cfd()->internal_stats();
single_column_family_mode_ =
versions_->GetColumnFamilySet()->NumberOfColumnFamilies() == 1;
......@@ -1419,6 +1434,8 @@ Status DBImpl::WriteLevel0TableForRecovery(ColumnFamilyData* cfd, MemTable* mem,
stats.bytes_written = meta.fd.GetFileSize();
stats.files_out_levelnp1 = 1;
cfd->internal_stats()->AddCompactionStats(level, stats);
cfd->internal_stats()->AddCFStats(
InternalStats::BYTES_FLUSHED, meta.fd.GetFileSize());
RecordTick(options_.statistics.get(), COMPACT_WRITE_BYTES,
meta.fd.GetFileSize());
return s;
......@@ -1509,6 +1526,8 @@ Status DBImpl::WriteLevel0Table(ColumnFamilyData* cfd,
stats.micros = env_->NowMicros() - start_micros;
stats.bytes_written = meta.fd.GetFileSize();
cfd->internal_stats()->AddCompactionStats(level, stats);
cfd->internal_stats()->AddCFStats(
InternalStats::BYTES_FLUSHED, meta.fd.GetFileSize());
RecordTick(options_.statistics.get(), COMPACT_WRITE_BYTES,
meta.fd.GetFileSize());
return s;
......@@ -1979,9 +1998,8 @@ void DBImpl::BackgroundCallFlush() {
// case this is an environmental problem and we do not want to
// chew up resources for failed compactions for the duration of
// the problem.
uint64_t error_cnt = default_cf_handle_->cfd()
->internal_stats()
->BumpAndGetBackgroundErrorCount();
uint64_t error_cnt =
default_cf_internal_stats_->BumpAndGetBackgroundErrorCount();
bg_cv_.SignalAll(); // In case a waiter can proceed despite the error
mutex_.Unlock();
Log(options_.info_log,
......@@ -2047,9 +2065,8 @@ void DBImpl::BackgroundCallCompaction() {
// case this is an environmental problem and we do not want to
// chew up resources for failed compactions for the duration of
// the problem.
uint64_t error_cnt = default_cf_handle_->cfd()
->internal_stats()
->BumpAndGetBackgroundErrorCount();
uint64_t error_cnt =
default_cf_internal_stats_->BumpAndGetBackgroundErrorCount();
bg_cv_.SignalAll(); // In case a waiter can proceed despite the error
mutex_.Unlock();
log_buffer.FlushBufferToLog();
......@@ -3146,8 +3163,8 @@ Status DBImpl::DoCompactionWork(CompactionState* compact,
LogFlush(options_.info_log);
mutex_.Lock();
cfd->internal_stats()->AddCompactionStats(compact->compaction->output_level(),
stats);
cfd->internal_stats()->AddCompactionStats(
compact->compaction->output_level(), stats);
// if there were any unused file number (mostly in case of
// compaction error), free up the entry from pending_putputs
......@@ -3774,9 +3791,13 @@ Status DBImpl::Write(const WriteOptions& options, WriteBatch* my_batch) {
if (!options.disableWAL) {
RecordTick(options_.statistics.get(), WRITE_WITH_WAL, 1);
default_cf_internal_stats_->AddDBStats(
InternalStats::WRITE_WITH_WAL, 1);
}
if (w.done) {
default_cf_internal_stats_->AddDBStats(
InternalStats::WRITE_DONE_BY_OTHER, 1);
mutex_.Unlock();
RecordTick(options_.statistics.get(), WRITE_DONE_BY_OTHER, 1);
return w.status;
......@@ -3807,6 +3828,8 @@ Status DBImpl::Write(const WriteOptions& options, WriteBatch* my_batch) {
return Status::TimedOut();
} else {
RecordTick(options_.statistics.get(), WRITE_DONE_BY_SELF, 1);
default_cf_internal_stats_->AddDBStats(
InternalStats::WRITE_DONE_BY_SELF, 1);
}
// Once reaches this point, the current writer "w" will try to do its write
......@@ -3892,17 +3915,19 @@ Status DBImpl::Write(const WriteOptions& options, WriteBatch* my_batch) {
WriteBatchInternal::SetSequence(updates, current_sequence);
int my_batch_count = WriteBatchInternal::Count(updates);
last_sequence += my_batch_count;
const uint64_t batch_size = WriteBatchInternal::ByteSize(updates);
// Record statistics
RecordTick(options_.statistics.get(),
NUMBER_KEYS_WRITTEN, my_batch_count);
RecordTick(options_.statistics.get(),
BYTES_WRITTEN,
WriteBatchInternal::ByteSize(updates));
batch_size);
if (options.disableWAL) {
flush_on_destroy_ = true;
}
PERF_TIMER_STOP(write_pre_and_post_process_time);
uint64_t log_size = 0;
if (!options.disableWAL) {
PERF_TIMER_START(write_wal_time);
Slice log_entry = WriteBatchInternal::Contents(updates);
......@@ -3910,8 +3935,9 @@ Status DBImpl::Write(const WriteOptions& options, WriteBatch* my_batch) {
total_log_size_ += log_entry.size();
alive_log_files_.back().AddSize(log_entry.size());
log_empty_ = false;
log_size = log_entry.size();
RecordTick(options_.statistics.get(), WAL_FILE_SYNCED, 1);
RecordTick(options_.statistics.get(), WAL_FILE_BYTES, log_entry.size());
RecordTick(options_.statistics.get(), WAL_FILE_BYTES, log_size);
if (status.ok() && options.sync) {
if (options_.use_fsync) {
StopWatch(env_, options_.statistics.get(), WAL_FILE_SYNC_MICROS);
......@@ -3942,8 +3968,19 @@ Status DBImpl::Write(const WriteOptions& options, WriteBatch* my_batch) {
last_sequence);
}
PERF_TIMER_START(write_pre_and_post_process_time);
if (updates == &tmp_batch_) tmp_batch_.Clear();
if (updates == &tmp_batch_) {
tmp_batch_.Clear();
}
mutex_.Lock();
// internal stats
default_cf_internal_stats_->AddDBStats(
InternalStats::BYTES_WRITTEN, batch_size);
if (!options.disableWAL) {
default_cf_internal_stats_->AddDBStats(
InternalStats::WAL_FILE_SYNCED, 1);
default_cf_internal_stats_->AddDBStats(
InternalStats::WAL_FILE_BYTES, log_size);
}
if (status.ok()) {
versions_->SetLastSequence(last_sequence);
}
......@@ -4126,10 +4163,10 @@ Status DBImpl::MakeRoomForWrite(
delayed = sw.ElapsedMicros();
}
RecordTick(options_.statistics.get(), STALL_L0_SLOWDOWN_MICROS, delayed);
cfd->internal_stats()->RecordWriteStall(InternalStats::LEVEL0_SLOWDOWN,
delayed);
allow_delay = false; // Do not delay a single write more than once
mutex_.Lock();
cfd->internal_stats()->AddCFStats(
InternalStats::LEVEL0_SLOWDOWN, delayed);
delayed_writes_++;
} else if (!force && !cfd->mem()->ShouldFlush()) {
// There is room in current memtable
......@@ -4160,7 +4197,7 @@ Status DBImpl::MakeRoomForWrite(
}
RecordTick(options_.statistics.get(),
STALL_MEMTABLE_COMPACTION_MICROS, stall);
cfd->internal_stats()->RecordWriteStall(
cfd->internal_stats()->AddCFStats(
InternalStats::MEMTABLE_COMPACTION, stall);
} else if (cfd->NeedWaitForNumLevel0Files()) {
DelayLoggingAndReset();
......@@ -4179,11 +4216,11 @@ Status DBImpl::MakeRoomForWrite(
}
RecordTick(options_.statistics.get(),
STALL_L0_NUM_FILES_MICROS, stall);
cfd->internal_stats()->RecordWriteStall(InternalStats::LEVEL0_NUM_FILES,
stall);
cfd->internal_stats()->AddCFStats(
InternalStats::LEVEL0_NUM_FILES, stall);
} else if (allow_hard_rate_limit_delay && cfd->ExceedsHardRateLimit()) {
// Delay a write when the compaction score for any level is too large.
int max_level = cfd->current()->MaxCompactionScoreLevel();
const int max_level = cfd->current()->MaxCompactionScoreLevel();
score = cfd->current()->MaxCompactionScore();
mutex_.Unlock();
uint64_t delayed;
......@@ -4193,7 +4230,6 @@ Status DBImpl::MakeRoomForWrite(
env_->SleepForMicroseconds(1000);
delayed = sw.ElapsedMicros();
}
cfd->internal_stats()->RecordLevelNSlowdown(max_level, delayed);
// Make sure the following value doesn't round to zero.
uint64_t rate_limit = std::max((delayed / 1000), (uint64_t) 1);
rate_limit_delay_millis += rate_limit;
......@@ -4205,7 +4241,9 @@ Status DBImpl::MakeRoomForWrite(
allow_hard_rate_limit_delay = false;
}
mutex_.Lock();
cfd->internal_stats()->RecordLevelNSlowdown(max_level, delayed, false);
} else if (allow_soft_rate_limit_delay && cfd->ExceedsSoftRateLimit()) {
const int max_level = cfd->current()->MaxCompactionScoreLevel();
score = cfd->current()->MaxCompactionScore();
// Delay a write when the compaction score for any level is too large.
// TODO: add statistics
......@@ -4221,6 +4259,7 @@ Status DBImpl::MakeRoomForWrite(
}
allow_soft_rate_limit_delay = false;
mutex_.Lock();
cfd->internal_stats()->RecordLevelNSlowdown(max_level, slowdown, true);
} else {
unique_ptr<WritableFile> lfile;
log::Writer* new_log = nullptr;
......
......@@ -475,6 +475,7 @@ class DBImpl : public DB {
unique_ptr<log::Writer> log_;
bool log_empty_;
ColumnFamilyHandleImpl* default_cf_handle_;
InternalStats* default_cf_internal_stats_;
unique_ptr<ColumnFamilyMemTablesImpl> column_family_memtables_;
struct LogFileNumberSize {
explicit LogFileNumberSize(uint64_t _number)
......
此差异已折叠。
......@@ -9,8 +9,6 @@
//
#pragma once
#include "rocksdb/statistics.h"
#include "util/statistics.h"
#include "db/version_set.h"
#include <vector>
......@@ -26,7 +24,9 @@ class DBImpl;
enum DBPropertyType {
kNumFilesAtLevel, // Number of files at a specific level
kLevelStats, // Return number of files and total sizes of each level
kStats, // Return general statitistics of DB
kCFStats, // Return general statitistics of CF
kDBStats, // Return general statitistics of DB
kStats, // Return general statitistics of both DB and CF
kSsTables, // Return a human readable string of current SST files
kNumImmutableMemTable, // Return number of immutable mem tables
kMemtableFlushPending, // Return 1 if mem table flushing is pending,
......@@ -45,38 +45,66 @@ extern DBPropertyType GetPropertyType(const Slice& property);
class InternalStats {
public:
enum WriteStallType {
enum InternalCFStatsType {
LEVEL0_SLOWDOWN,
MEMTABLE_COMPACTION,
LEVEL0_NUM_FILES,
WRITE_STALLS_ENUM_MAX,
BYTES_FLUSHED,
INTERNAL_CF_STATS_ENUM_MAX,
};
InternalStats(int num_levels, Env* env, Statistics* statistics)
: stats_(num_levels),
stall_micros_(WRITE_STALLS_ENUM_MAX, 0),
stall_counts_(WRITE_STALLS_ENUM_MAX, 0),
stall_leveln_slowdown_(num_levels, 0),
stall_leveln_slowdown_count_(num_levels, 0),
enum InternalDBStatsType {
WAL_FILE_BYTES,
WAL_FILE_SYNCED,
BYTES_WRITTEN,
WRITE_DONE_BY_OTHER,
WRITE_DONE_BY_SELF,
WRITE_WITH_WAL,
INTERNAL_DB_STATS_ENUM_MAX,
};
InternalStats(int num_levels, Env* env)
: db_stats_(INTERNAL_DB_STATS_ENUM_MAX),
cf_stats_value_(INTERNAL_CF_STATS_ENUM_MAX),
cf_stats_count_(INTERNAL_CF_STATS_ENUM_MAX),
comp_stats_(num_levels),
stall_leveln_slowdown_hard_(num_levels),
stall_leveln_slowdown_count_hard_(num_levels),
stall_leveln_slowdown_soft_(num_levels),
stall_leveln_slowdown_count_soft_(num_levels),
bg_error_count_(0),
number_levels_(num_levels),
statistics_(statistics),
env_(env),
started_at_(env->NowMicros()) {}
started_at_(env->NowMicros()) {
for (int i = 0; i< INTERNAL_DB_STATS_ENUM_MAX; ++i) {
db_stats_[i] = 0;
}
for (int i = 0; i< INTERNAL_CF_STATS_ENUM_MAX; ++i) {
cf_stats_value_[i] = 0;
cf_stats_count_[i] = 0;
}
for (int i = 0; i < num_levels; ++i) {
stall_leveln_slowdown_hard_[i] = 0;
stall_leveln_slowdown_count_hard_[i] = 0;
stall_leveln_slowdown_soft_[i] = 0;
stall_leveln_slowdown_count_soft_[i] = 0;
}
}
// Per level compaction stats. stats_[level] stores the stats for
// Per level compaction stats. comp_stats_[level] stores the stats for
// compactions that produced data for the specified "level".
struct CompactionStats {
uint64_t micros;
// Bytes read from level N during compaction between levels N and N+1
int64_t bytes_readn;
uint64_t bytes_readn;
// Bytes read from level N+1 during compaction between levels N and N+1
int64_t bytes_readnp1;
uint64_t bytes_readnp1;
// Total bytes written during compaction between levels N and N+1
int64_t bytes_written;
uint64_t bytes_written;
// Files read from level N during compaction between levels N and N+1
int files_in_leveln;
......@@ -100,6 +128,16 @@ class InternalStats {
files_out_levelnp1(0),
count(count) {}
explicit CompactionStats(const CompactionStats& c)
: micros(c.micros),
bytes_readn(c.bytes_readn),
bytes_readnp1(c.bytes_readnp1),
bytes_written(c.bytes_written),
files_in_leveln(c.files_in_leveln),
files_in_levelnp1(c.files_in_levelnp1),
files_out_levelnp1(c.files_out_levelnp1),
count(c.count) {}
void Add(const CompactionStats& c) {
this->micros += c.micros;
this->bytes_readn += c.bytes_readn;
......@@ -110,20 +148,40 @@ class InternalStats {
this->files_out_levelnp1 += c.files_out_levelnp1;
this->count += c.count;
}
void Subtract(const CompactionStats& c) {
this->micros -= c.micros;
this->bytes_readn -= c.bytes_readn;
this->bytes_readnp1 -= c.bytes_readnp1;
this->bytes_written -= c.bytes_written;
this->files_in_leveln -= c.files_in_leveln;
this->files_in_levelnp1 -= c.files_in_levelnp1;
this->files_out_levelnp1 -= c.files_out_levelnp1;
this->count -= c.count;
}
};
void AddCompactionStats(int level, const CompactionStats& stats) {
stats_[level].Add(stats);
comp_stats_[level].Add(stats);
}
void RecordWriteStall(WriteStallType write_stall_type, uint64_t micros) {
stall_micros_[write_stall_type] += micros;
++stall_counts_[write_stall_type];
void RecordLevelNSlowdown(int level, uint64_t micros, bool soft) {
if (soft) {
stall_leveln_slowdown_soft_[level] += micros;
++stall_leveln_slowdown_count_soft_[level];
} else {
stall_leveln_slowdown_hard_[level] += micros;
++stall_leveln_slowdown_count_hard_[level];
}
}
void AddCFStats(InternalCFStatsType type, uint64_t value) {
cf_stats_value_[type] += value;
++cf_stats_count_[type];
}
void RecordLevelNSlowdown(int level, uint64_t micros) {
stall_leveln_slowdown_[level] += micros;
++stall_leveln_slowdown_count_[level];
void AddDBStats(InternalDBStatsType type, uint64_t value) {
db_stats_[type] += value;
}
uint64_t GetBackgroundErrorCount() const { return bg_error_count_; }
......@@ -134,42 +192,58 @@ class InternalStats {
std::string* value, ColumnFamilyData* cfd);
private:
std::vector<CompactionStats> stats_;
void DumpDBStats(std::string* value);
void DumpCFStats(std::string* value, ColumnFamilyData* cfd);
// Per-DB stats
std::vector<uint64_t> db_stats_;
// Per-ColumnFamily stats
std::vector<uint64_t> cf_stats_value_;
std::vector<uint64_t> cf_stats_count_;
// Per-ColumnFamily/level compaction stats
std::vector<CompactionStats> comp_stats_;
// These count the number of microseconds for which MakeRoomForWrite stalls.
std::vector<uint64_t> stall_leveln_slowdown_hard_;
std::vector<uint64_t> stall_leveln_slowdown_count_hard_;
std::vector<uint64_t> stall_leveln_slowdown_soft_;
std::vector<uint64_t> stall_leveln_slowdown_count_soft_;
// Used to compute per-interval statistics
struct StatsSnapshot {
uint64_t compaction_bytes_read_; // Bytes read by compaction
uint64_t compaction_bytes_written_; // Bytes written by compaction
uint64_t ingest_bytes_; // Bytes written by user
uint64_t wal_bytes_; // Bytes written to WAL
uint64_t wal_synced_; // Number of times WAL is synced
uint64_t write_with_wal_; // Number of writes that request WAL
struct CFStatsSnapshot {
// ColumnFamily-level stats
CompactionStats comp_stats;
uint64_t ingest_bytes; // Bytes written to L0
uint64_t stall_us; // Stall time in micro-seconds
uint64_t stall_count; // Stall count
CFStatsSnapshot()
: comp_stats(0),
ingest_bytes(0),
stall_us(0),
stall_count(0) {}
} cf_stats_snapshot_;
struct DBStatsSnapshot {
// DB-level stats
uint64_t ingest_bytes; // Bytes written by user
uint64_t wal_bytes; // Bytes written to WAL
uint64_t wal_synced; // Number of times WAL is synced
uint64_t write_with_wal; // Number of writes that request WAL
// These count the number of writes processed by the calling thread or
// another thread.
uint64_t write_other_;
uint64_t write_self_;
double seconds_up_;
StatsSnapshot()
: compaction_bytes_read_(0),
compaction_bytes_written_(0),
ingest_bytes_(0),
wal_bytes_(0),
wal_synced_(0),
write_with_wal_(0),
write_other_(0),
write_self_(0),
seconds_up_(0) {}
};
// Counters from the previous time per-interval stats were computed
StatsSnapshot last_stats_;
// These count the number of microseconds for which MakeRoomForWrite stalls.
std::vector<uint64_t> stall_micros_;
std::vector<uint64_t> stall_counts_;
std::vector<uint64_t> stall_leveln_slowdown_;
std::vector<uint64_t> stall_leveln_slowdown_count_;
uint64_t write_other;
uint64_t write_self;
double seconds_up;
DBStatsSnapshot()
: ingest_bytes(0),
wal_bytes(0),
wal_synced(0),
write_with_wal(0),
write_other(0),
write_self(0),
seconds_up(0) {}
} db_stats_snapshot_;
// Total number of background errors encountered. Every time a flush task
// or compaction task fails, this counter is incremented. The failure can
......@@ -178,10 +252,9 @@ class InternalStats {
// or compaction will cause the counter to increase too.
uint64_t bg_error_count_;
int number_levels_;
Statistics* statistics_;
const int number_levels_;
Env* env_;
uint64_t started_at_;
const uint64_t started_at_;
};
} // namespace rocksdb
......@@ -248,6 +248,7 @@ class Version {
friend class UniversalCompactionPicker;
friend class FIFOCompactionPicker;
friend class ForwardIterator;
friend class InternalStats;
class LevelFileNumIterator;
class LevelFileIteratorState;
......
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册