internal_stats.cc 18.6 KB
Newer Older
I
Igor Canadi 已提交
1 2 3 4 5 6 7 8 9
//  This source code is licensed under the BSD-style license found in the
//  LICENSE file in the root directory of this source tree. An additional grant
//  of patent rights can be found in the PATENTS file in the same directory.
//
// Copyright (c) 2011 The LevelDB Authors. All rights reserved.
// Use of this source code is governed by a BSD-style license that can be
// found in the LICENSE file. See the AUTHORS file for names of contributors.

#include "db/internal_stats.h"
L
liuhuahang 已提交
10 11

#ifndef __STDC_FORMAT_MACROS
I
Igor Canadi 已提交
12
#define __STDC_FORMAT_MACROS
L
liuhuahang 已提交
13 14
#endif

I
Igor Canadi 已提交
15
#include <inttypes.h>
I
Igor Canadi 已提交
16
#include <vector>
17
#include "db/column_family.h"
18

19
#include "db/db_impl.h"
20
#include "util/string_util.h"
I
Igor Canadi 已提交
21 22 23

namespace rocksdb {

24
#ifndef ROCKSDB_LITE
L
Lei Jin 已提交
25 26 27 28
namespace {
const double kMB = 1048576.0;
const double kGB = kMB * 1024;

29
void PrintLevelStatsHeader(char* buf, size_t len, const std::string& cf_name) {
L
Lei Jin 已提交
30 31
  snprintf(
      buf, len,
32
      "\n** Compaction Stats [%s] **\n"
33
      "Level    Files   Size(MB) Score Read(GB)  Rn(GB) Rnp1(GB) "
34 35
      "Write(GB) Wnew(GB) Moved(GB) W-Amp Rd(MB/s) Wr(MB/s) "
      "Comp(sec) Comp(cnt) Avg(sec) "
36
      "Stall(cnt)  KeyIn KeyDrop\n"
L
Lei Jin 已提交
37
      "--------------------------------------------------------------------"
38
      "-----------------------------------------------------------"
39
      "--------------------------------------\n",
40
      cf_name.c_str());
L
Lei Jin 已提交
41 42 43
}

void PrintLevelStats(char* buf, size_t len, const std::string& name,
44
    int num_files, int being_compacted, double total_file_size, double score,
45
    double w_amp, uint64_t stalls,
L
Lei Jin 已提交
46
    const InternalStats::CompactionStats& stats) {
47
  uint64_t bytes_read = stats.bytes_readn + stats.bytes_readnp1;
I
Islam AbdelRahman 已提交
48
  int64_t bytes_new = stats.bytes_written - stats.bytes_readnp1;
L
Lei Jin 已提交
49
  double elapsed = (stats.micros + 1) / 1000000.0;
50 51 52
  std::string num_input_records = NumberToHumanString(stats.num_input_records);
  std::string num_dropped_records =
      NumberToHumanString(stats.num_dropped_records);
L
Lei Jin 已提交
53 54

  snprintf(buf, len,
55
           "%4s %6d/%-3d %8.0f %5.1f " /* Level, Files, Size(MB), Score */
S
sdong 已提交
56 57 58 59 60
           "%8.1f "                    /* Read(GB) */
           "%7.1f "                    /* Rn(GB) */
           "%8.1f "                    /* Rnp1(GB) */
           "%9.1f "                    /* Write(GB) */
           "%8.1f "                    /* Wnew(GB) */
61
           "%9.1f "                    /* Moved(GB) */
S
sdong 已提交
62 63 64
           "%5.1f "                    /* W-Amp */
           "%8.1f "                    /* Rd(MB/s) */
           "%8.1f "                    /* Wr(MB/s) */
65
           "%9.0f "                    /* Comp(sec) */
S
sdong 已提交
66 67 68 69
           "%9d "                      /* Comp(cnt) */
           "%8.3f "                    /* Avg(sec) */
           "%10" PRIu64
           " "      /* Stall(cnt) */
70 71
           "%7s "   /* KeyIn */
           "%6s\n", /* KeyDrop */
S
sdong 已提交
72 73 74
           name.c_str(), num_files, being_compacted, total_file_size / kMB,
           score, bytes_read / kGB, stats.bytes_readn / kGB,
           stats.bytes_readnp1 / kGB, stats.bytes_written / kGB,
75 76
           bytes_new / kGB, stats.bytes_moved / kGB,
           w_amp, bytes_read / kMB / elapsed,
77 78
           stats.bytes_written / kMB / elapsed, stats.micros / 1000000.0,
           stats.count,
S
sdong 已提交
79
           stats.count == 0 ? 0 : stats.micros / 1000000.0 / stats.count,
80
           stalls,
81
           num_input_records.c_str(), num_dropped_records.c_str());
L
Lei Jin 已提交
82 83 84
}
}

85 86 87 88
DBPropertyType GetPropertyType(const Slice& property, bool* is_int_property,
                               bool* need_out_of_mutex) {
  assert(is_int_property != nullptr);
  assert(need_out_of_mutex != nullptr);
I
Igor Canadi 已提交
89 90
  Slice in = property;
  Slice prefix("rocksdb.");
91
  *need_out_of_mutex = false;
92
  *is_int_property = false;
S
sdong 已提交
93 94 95
  if (!in.starts_with(prefix)) {
    return kUnknown;
  }
I
Igor Canadi 已提交
96 97 98
  in.remove_prefix(prefix.size());

  if (in.starts_with("num-files-at-level")) {
99
    return kNumFilesAtLevel;
I
Igor Canadi 已提交
100
  } else if (in == "levelstats") {
101
    return kLevelStats;
I
Igor Canadi 已提交
102
  } else if (in == "stats") {
103
    return kStats;
104 105 106 107
  } else if (in == "cfstats") {
    return kCFStats;
  } else if (in == "dbstats") {
    return kDBStats;
108 109
  } else if (in == "sstables") {
    return kSsTables;
110 111 112 113
  }

  *is_int_property = true;
  if (in == "num-immutable-mem-table") {
114 115
    return kNumImmutableMemTable;
  } else if (in == "mem-table-flush-pending") {
116
    return kMemtableFlushPending;
117
  } else if (in == "compaction-pending") {
118 119 120
    return kCompactionPending;
  } else if (in == "background-errors") {
    return kBackgroundErrors;
121 122
  } else if (in == "cur-size-active-mem-table") {
    return kCurSizeActiveMemTable;
I
Igor Canadi 已提交
123 124
  } else if (in == "cur-size-all-mem-tables") {
    return kCurSizeAllMemTables;
125 126 127 128
  } else if (in == "num-entries-active-mem-table") {
    return kNumEntriesInMutableMemtable;
  } else if (in == "num-entries-imm-mem-tables") {
    return kNumEntriesInImmutableMemtable;
S
sdong 已提交
129 130
  } else if (in == "estimate-num-keys") {
    return kEstimatedNumKeys;
131 132 133
  } else if (in == "estimate-table-readers-mem") {
    *need_out_of_mutex = true;
    return kEstimatedUsageByTableReaders;
134 135
  } else if (in == "is-file-deletions-enabled") {
    return kIsFileDeletionEnabled;
136 137 138 139
  } else if (in == "num-snapshots") {
    return kNumSnapshots;
  } else if (in == "oldest-snapshot-time") {
    return kOldestSnapshotTime;
140 141
  } else if (in == "num-live-versions") {
    return kNumLiveVersions;
142 143
  } else if (in == "base-level") {
    return kBaseLevel;
144 145 146 147
  }
  return kUnknown;
}

148 149 150 151 152 153
bool InternalStats::GetIntPropertyOutOfMutex(DBPropertyType property_type,
                                             Version* version,
                                             uint64_t* value) const {
  assert(value != nullptr);
  if (property_type != kEstimatedUsageByTableReaders) {
    return false;
154
  }
155 156 157 158 159 160 161
  if (version == nullptr) {
    *value = 0;
  } else {
    *value = version->GetMemoryUsageByTableReaders();
  }
  return true;
}
162

163 164 165 166
bool InternalStats::GetStringProperty(DBPropertyType property_type,
                                      const Slice& property,
                                      std::string* value) {
  assert(value != nullptr);
S
sdong 已提交
167
  auto* current = cfd_->current();
S
sdong 已提交
168
  const auto* vstorage = current->storage_info();
169 170 171 172 173 174 175 176 177 178 179 180
  Slice in = property;

  switch (property_type) {
    case kNumFilesAtLevel: {
      in.remove_prefix(strlen("rocksdb.num-files-at-level"));
      uint64_t level;
      bool ok = ConsumeDecimalNumber(&in, &level) && in.empty();
      if (!ok || (int)level >= number_levels_) {
        return false;
      } else {
        char buf[100];
        snprintf(buf, sizeof(buf), "%d",
S
sdong 已提交
181
                 vstorage->NumLevelFiles(static_cast<int>(level)));
182 183 184
        *value = buf;
        return true;
      }
I
Igor Canadi 已提交
185
    }
186 187 188 189 190 191
    case kLevelStats: {
      char buf[1000];
      snprintf(buf, sizeof(buf),
               "Level Files Size(MB)\n"
               "--------------------\n");
      value->append(buf);
I
Igor Canadi 已提交
192

193 194
      for (int level = 0; level < number_levels_; level++) {
        snprintf(buf, sizeof(buf), "%3d %8d %8.0f\n", level,
S
sdong 已提交
195 196
                 vstorage->NumLevelFiles(level),
                 vstorage->NumLevelBytes(level) / kMB);
I
Igor Canadi 已提交
197 198
        value->append(buf);
      }
199
      return true;
I
Igor Canadi 已提交
200
    }
201
    case kStats: {
202
      if (!GetStringProperty(kCFStats, "rocksdb.cfstats", value)) {
203
        return false;
204
      }
205
      if (!GetStringProperty(kDBStats, "rocksdb.dbstats", value)) {
206
        return false;
L
Lei Jin 已提交
207
      }
208 209 210
      return true;
    }
    case kCFStats: {
211
      DumpCFStats(value);
212 213 214 215
      return true;
    }
    case kDBStats: {
      DumpDBStats(value);
216 217 218 219 220
      return true;
    }
    case kSsTables:
      *value = current->DebugString();
      return true;
221 222 223 224 225 226
    default:
      return false;
  }
}

bool InternalStats::GetIntProperty(DBPropertyType property_type,
227
                                   uint64_t* value, DBImpl* db) const {
228
  db->mutex_.AssertHeld();
S
sdong 已提交
229
  const auto* vstorage = cfd_->current()->storage_info();
230 231

  switch (property_type) {
232
    case kNumImmutableMemTable:
233
      *value = cfd_->imm()->size();
234
      return true;
235
    case kMemtableFlushPending:
236
      // Return number of mem tables that are ready to flush (made immutable)
237
      *value = (cfd_->imm()->IsFlushPending() ? 1 : 0);
238
      return true;
239
    case kCompactionPending:
240 241
      // 1 if the system already determines at least one compacdtion is needed.
      // 0 otherwise,
Y
Yueh-Hsuan Chiang 已提交
242
      *value = (cfd_->compaction_picker()->NeedsCompaction(vstorage) ? 1 : 0);
243
      return true;
244 245
    case kBackgroundErrors:
      // Accumulated number of  errors in background flushes or compactions.
246
      *value = GetBackgroundErrorCount();
247
      return true;
248 249
    case kCurSizeActiveMemTable:
      // Current size of the active memtable
250
      *value = cfd_->mem()->ApproximateMemoryUsage();
251
      return true;
I
Igor Canadi 已提交
252 253 254 255 256
    case kCurSizeAllMemTables:
      // Current size of the active memtable + immutable memtables
      *value = cfd_->mem()->ApproximateMemoryUsage() +
               cfd_->imm()->ApproximateMemoryUsage();
      return true;
257
    case kNumEntriesInMutableMemtable:
I
Igor Canadi 已提交
258
      // Current number of entires in the active memtable
259
      *value = cfd_->mem()->GetNumEntries();
260 261
      return true;
    case kNumEntriesInImmutableMemtable:
I
Igor Canadi 已提交
262
      // Current number of entries in the immutable memtables
263
      *value = cfd_->imm()->current()->GetTotalNumEntries();
264
      return true;
S
sdong 已提交
265 266 267
    case kEstimatedNumKeys:
      // Estimate number of entries in the column family:
      // Use estimated entries in tables + total entries in memtables.
268 269
      *value = cfd_->mem()->GetNumEntries() +
               cfd_->imm()->current()->GetTotalNumEntries() +
S
sdong 已提交
270
               vstorage->GetEstimatedActiveKeys();
S
sdong 已提交
271
      return true;
272 273 274 275 276 277
    case kNumSnapshots:
      *value = db->snapshots().count();
      return true;
    case kOldestSnapshotTime:
      *value = static_cast<uint64_t>(db->snapshots().GetOldestSnapshotTime());
      return true;
278 279 280
    case kNumLiveVersions:
      *value = cfd_->GetNumLiveVersions();
      return true;
I
Igor Canadi 已提交
281
#ifndef ROCKSDB_LITE
282 283 284
    case kIsFileDeletionEnabled:
      *value = db->IsFileDeletionsEnabled();
      return true;
I
Igor Canadi 已提交
285
#endif
286 287 288
    case kBaseLevel:
      *value = vstorage->base_level();
      return true;
289 290 291
    default:
      return false;
  }
I
Igor Canadi 已提交
292 293
}

294 295 296 297 298 299 300 301 302 303 304
void InternalStats::DumpDBStats(std::string* value) {
  char buf[1000];
  // DB-level stats, only available from default column family
  double seconds_up = (env_->NowMicros() - started_at_ + 1) / 1000000.0;
  double interval_seconds_up = seconds_up - db_stats_snapshot_.seconds_up;
  snprintf(buf, sizeof(buf),
           "\n** DB Stats **\nUptime(secs): %.1f total, %.1f interval\n",
           seconds_up, interval_seconds_up);
  value->append(buf);
  // Cumulative
  uint64_t user_bytes_written = db_stats_[InternalStats::BYTES_WRITTEN];
S
sdong 已提交
305
  uint64_t num_keys_written = db_stats_[InternalStats::NUMBER_KEYS_WRITTEN];
306 307 308 309 310
  uint64_t write_other = db_stats_[InternalStats::WRITE_DONE_BY_OTHER];
  uint64_t write_self = db_stats_[InternalStats::WRITE_DONE_BY_SELF];
  uint64_t wal_bytes = db_stats_[InternalStats::WAL_FILE_BYTES];
  uint64_t wal_synced = db_stats_[InternalStats::WAL_FILE_SYNCED];
  uint64_t write_with_wal = db_stats_[InternalStats::WRITE_WITH_WAL];
S
sdong 已提交
311
  uint64_t write_stall_micros = db_stats_[InternalStats::WRITE_STALL_MICROS];
312 313 314
  const int kHumanMicrosLen = 32;
  char human_micros[kHumanMicrosLen];

315
  // Data
S
sdong 已提交
316 317 318 319 320 321 322 323
  // writes: total number of write requests.
  // keys: total number of key updates issued by all the write requests
  // batches: number of group commits issued to the DB. Each group can contain
  //          one or more writes.
  // so writes/keys is the average number of put in multi-put or put
  // writes/batches is the average group commit size.
  //
  // The format is the same for interval stats.
324
  AppendHumanMicros(write_stall_micros, human_micros, kHumanMicrosLen);
325
  snprintf(buf, sizeof(buf),
S
sdong 已提交
326
           "Cumulative writes: %" PRIu64 " writes, %" PRIu64 " keys, %" PRIu64
S
sdong 已提交
327
           " batches, %.1f writes per batch, %.2f GB user ingest, "
328
           "stall time: %s\n",
S
sdong 已提交
329
           write_other + write_self, num_keys_written, write_self,
330
           (write_other + write_self) / static_cast<double>(write_self + 1),
331
           user_bytes_written / kGB, human_micros);
332 333 334 335 336 337 338 339 340 341 342 343 344
  value->append(buf);
  // WAL
  snprintf(buf, sizeof(buf),
           "Cumulative WAL: %" PRIu64 " writes, %" PRIu64 " syncs, "
           "%.2f writes per sync, %.2f GB written\n",
           write_with_wal, wal_synced,
           write_with_wal / static_cast<double>(wal_synced + 1),
           wal_bytes / kGB);
  value->append(buf);

  // Interval
  uint64_t interval_write_other = write_other - db_stats_snapshot_.write_other;
  uint64_t interval_write_self = write_self - db_stats_snapshot_.write_self;
S
sdong 已提交
345 346
  uint64_t interval_num_keys_written =
      num_keys_written - db_stats_snapshot_.num_keys_written;
347 348 349
  AppendHumanMicros(
      write_stall_micros - db_stats_snapshot_.write_stall_micros,
      human_micros, kHumanMicrosLen);
350
  snprintf(buf, sizeof(buf),
S
sdong 已提交
351
           "Interval writes: %" PRIu64 " writes, %" PRIu64 " keys, %" PRIu64
S
sdong 已提交
352
           " batches, %.1f writes per batch, %.1f MB user ingest, "
353
           "stall time: %s\n",
354
           interval_write_other + interval_write_self,
S
sdong 已提交
355
           interval_num_keys_written, interval_write_self,
356 357
           static_cast<double>(interval_write_other + interval_write_self) /
               (interval_write_self + 1),
S
sdong 已提交
358
           (user_bytes_written - db_stats_snapshot_.ingest_bytes) / kMB,
359
           human_micros);
360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380
  value->append(buf);

  uint64_t interval_write_with_wal =
      write_with_wal - db_stats_snapshot_.write_with_wal;
  uint64_t interval_wal_synced = wal_synced - db_stats_snapshot_.wal_synced;
  uint64_t interval_wal_bytes = wal_bytes - db_stats_snapshot_.wal_bytes;

  snprintf(buf, sizeof(buf),
           "Interval WAL: %" PRIu64 " writes, %" PRIu64 " syncs, "
           "%.2f writes per sync, %.2f MB written\n",
           interval_write_with_wal,
           interval_wal_synced,
           interval_write_with_wal /
              static_cast<double>(interval_wal_synced + 1),
           interval_wal_bytes / kGB);
  value->append(buf);

  db_stats_snapshot_.seconds_up = seconds_up;
  db_stats_snapshot_.ingest_bytes = user_bytes_written;
  db_stats_snapshot_.write_other = write_other;
  db_stats_snapshot_.write_self = write_self;
S
sdong 已提交
381
  db_stats_snapshot_.num_keys_written = num_keys_written;
382 383 384
  db_stats_snapshot_.wal_bytes = wal_bytes;
  db_stats_snapshot_.wal_synced = wal_synced;
  db_stats_snapshot_.write_with_wal = write_with_wal;
S
sdong 已提交
385
  db_stats_snapshot_.write_stall_micros = write_stall_micros;
386 387
}

388
void InternalStats::DumpCFStats(std::string* value) {
S
sdong 已提交
389
  const VersionStorageInfo* vstorage = cfd_->current()->storage_info();
390 391

  int num_levels_to_check =
392 393
      (cfd_->ioptions()->compaction_style != kCompactionStyleUniversal &&
       cfd_->ioptions()->compaction_style != kCompactionStyleFIFO)
394
          ? vstorage->num_levels() - 1
395
          : 1;
S
sdong 已提交
396

397 398 399 400
  // Compaction scores are sorted base on its value. Restore them to the
  // level order
  std::vector<double> compaction_score(number_levels_, 0);
  for (int i = 0; i < num_levels_to_check; ++i) {
401 402
    compaction_score[vstorage->CompactionScoreLevel(i)] =
        vstorage->CompactionScore(i);
403 404 405 406
  }
  // Count # of files being compacted for each level
  std::vector<int> files_being_compacted(number_levels_, 0);
  for (int level = 0; level < num_levels_to_check; ++level) {
407
    for (auto* f : vstorage->LevelFiles(level)) {
408 409 410 411 412 413 414 415
      if (f->being_compacted) {
        ++files_being_compacted[level];
      }
    }
  }

  char buf[1000];
  // Per-ColumnFamily stats
416
  PrintLevelStatsHeader(buf, sizeof(buf), cfd_->GetName());
417 418 419 420 421 422 423 424 425 426
  value->append(buf);

  CompactionStats stats_sum(0);
  int total_files = 0;
  int total_files_being_compacted = 0;
  double total_file_size = 0;
  uint64_t total_slowdown_count_soft = 0;
  uint64_t total_slowdown_count_hard = 0;
  uint64_t total_stall_count = 0;
  for (int level = 0; level < number_levels_; level++) {
S
sdong 已提交
427
    int files = vstorage->NumLevelFiles(level);
428 429 430 431 432 433 434 435 436 437 438
    total_files += files;
    total_files_being_compacted += files_being_compacted[level];
    if (comp_stats_[level].micros > 0 || files > 0) {
      uint64_t stalls = level == 0 ?
        (cf_stats_count_[LEVEL0_SLOWDOWN] +
         cf_stats_count_[LEVEL0_NUM_FILES] +
         cf_stats_count_[MEMTABLE_COMPACTION])
        : (stall_leveln_slowdown_count_soft_[level] +
           stall_leveln_slowdown_count_hard_[level]);

      stats_sum.Add(comp_stats_[level]);
S
sdong 已提交
439
      total_file_size += vstorage->NumLevelBytes(level);
440 441 442 443 444 445
      total_stall_count += stalls;
      total_slowdown_count_soft += stall_leveln_slowdown_count_soft_[level];
      total_slowdown_count_hard += stall_leveln_slowdown_count_hard_[level];
      double w_amp = (comp_stats_[level].bytes_readn == 0) ? 0.0
          : comp_stats_[level].bytes_written /
            static_cast<double>(comp_stats_[level].bytes_readn);
446
      PrintLevelStats(buf, sizeof(buf), "L" + ToString(level), files,
S
sdong 已提交
447 448
                      files_being_compacted[level],
                      vstorage->NumLevelBytes(level), compaction_score[level],
449
                      w_amp, stalls, comp_stats_[level]);
450 451 452 453 454 455 456 457
      value->append(buf);
    }
  }
  uint64_t curr_ingest = cf_stats_value_[BYTES_FLUSHED];
  // Cumulative summary
  double w_amp = stats_sum.bytes_written / static_cast<double>(curr_ingest + 1);
  // Stats summary across levels
  PrintLevelStats(buf, sizeof(buf), "Sum", total_files,
458
      total_files_being_compacted, total_file_size, 0, w_amp,
459
      total_stall_count, stats_sum);
460 461 462 463 464 465 466 467
  value->append(buf);
  // Interval summary
  uint64_t interval_ingest =
      curr_ingest - cf_stats_snapshot_.ingest_bytes + 1;
  CompactionStats interval_stats(stats_sum);
  interval_stats.Subtract(cf_stats_snapshot_.comp_stats);
  w_amp = interval_stats.bytes_written / static_cast<double>(interval_ingest);
  PrintLevelStats(buf, sizeof(buf), "Int", 0, 0, 0, 0,
468 469
      w_amp, total_stall_count - cf_stats_snapshot_.stall_count,
      interval_stats);
470 471 472 473 474 475 476 477 478 479 480 481 482 483 484 485 486 487 488 489 490 491 492
  value->append(buf);

  snprintf(buf, sizeof(buf),
           "Flush(GB): accumulative %.3f, interval %.3f\n",
           curr_ingest / kGB, interval_ingest / kGB);
  value->append(buf);

  snprintf(buf, sizeof(buf),
           "Stalls(count): %" PRIu64 " level0_slowdown, "
           "%" PRIu64 " level0_numfiles, %" PRIu64 " memtable_compaction, "
           "%" PRIu64 " leveln_slowdown_soft, "
           "%" PRIu64 " leveln_slowdown_hard\n",
           cf_stats_count_[LEVEL0_SLOWDOWN],
           cf_stats_count_[LEVEL0_NUM_FILES],
           cf_stats_count_[MEMTABLE_COMPACTION],
           total_slowdown_count_soft, total_slowdown_count_hard);
  value->append(buf);

  cf_stats_snapshot_.ingest_bytes = curr_ingest;
  cf_stats_snapshot_.comp_stats = stats_sum;
  cf_stats_snapshot_.stall_count = total_stall_count;
}

493 494 495 496 497 498 499 500 501 502

#else

DBPropertyType GetPropertyType(const Slice& property, bool* is_int_property,
                               bool* need_out_of_mutex) {
  return kUnknown;
}

#endif  // !ROCKSDB_LITE

I
Igor Canadi 已提交
503
}  // namespace rocksdb