db.h 42.1 KB
Newer Older
1
// Copyright (c) 2011-present, Facebook, Inc.  All rights reserved.
2 3 4
// This source code is licensed under the BSD-style license found in the
// LICENSE file in the root directory of this source tree. An additional grant
// of patent rights can be found in the PATENTS file in the same directory.
J
jorlow@chromium.org 已提交
5 6 7 8
// Copyright (c) 2011 The LevelDB Authors. All rights reserved.
// Use of this source code is governed by a BSD-style license that can be
// found in the LICENSE file. See the AUTHORS file for names of contributors.

9 10
#ifndef STORAGE_ROCKSDB_INCLUDE_DB_H_
#define STORAGE_ROCKSDB_INCLUDE_DB_H_
J
jorlow@chromium.org 已提交
11 12 13

#include <stdint.h>
#include <stdio.h>
14
#include <memory>
15
#include <string>
16
#include <unordered_map>
17
#include <vector>
18
#include "rocksdb/iterator.h"
19
#include "rocksdb/listener.h"
20 21
#include "rocksdb/metadata.h"
#include "rocksdb/options.h"
A
agiardullo 已提交
22
#include "rocksdb/snapshot.h"
23
#include "rocksdb/sst_file_writer.h"
Y
Yueh-Hsuan Chiang 已提交
24
#include "rocksdb/thread_status.h"
25 26 27
#include "rocksdb/transaction_log.h"
#include "rocksdb/types.h"
#include "rocksdb/version.h"
J
jorlow@chromium.org 已提交
28

29 30 31 32 33 34
#ifdef _WIN32
// Windows API macro interference
#undef DeleteFile
#endif


35
namespace rocksdb {
J
jorlow@chromium.org 已提交
36

37 38 39 40 41 42 43
struct Options;
struct DBOptions;
struct ColumnFamilyOptions;
struct ReadOptions;
struct WriteOptions;
struct FlushOptions;
struct CompactionOptions;
44
struct CompactRangeOptions;
45
struct TableProperties;
46
struct ExternalSstFileInfo;
47 48 49 50
class WriteBatch;
class Env;
class EventListener;

51 52
using std::unique_ptr;

53
extern const std::string kDefaultColumnFamilyName;
54
struct ColumnFamilyDescriptor {
55
  std::string name;
56
  ColumnFamilyOptions options;
I
Igor Canadi 已提交
57
  ColumnFamilyDescriptor()
58
      : name(kDefaultColumnFamilyName), options(ColumnFamilyOptions()) {}
59 60 61
  ColumnFamilyDescriptor(const std::string& _name,
                         const ColumnFamilyOptions& _options)
      : name(_name), options(_options) {}
62 63
};

64 65 66 67 68 69 70 71 72 73 74 75 76 77 78
class ColumnFamilyHandle {
 public:
  virtual ~ColumnFamilyHandle() {}
  // Returns the name of the column family associated with the current handle.
  virtual const std::string& GetName() const = 0;
  // Returns the ID of the column family associated with the current handle.
  virtual uint32_t GetID() const = 0;
  // Fills "*desc" with the up-to-date descriptor of the column family
  // associated with this handle. Since it fills "*desc" with the up-to-date
  // information, this call might internally lock and release DB mutex to
  // access the up-to-date CF options.  In addition, all the pointer-typed
  // options cannot be referenced any longer than the original options exist.
  //
  // Note that this function is not supported in RocksDBLite.
  virtual Status GetDescriptor(ColumnFamilyDescriptor* desc) = 0;
79 80 81
  // Returns the comparator of the column family associated with the
  // current handle.
  virtual const Comparator* GetComparator() const = 0;
82 83
};

84 85
static const int kMajorVersion = __ROCKSDB_MAJOR__;
static const int kMinorVersion = __ROCKSDB_MINOR__;
86

87
// A range of keys
J
jorlow@chromium.org 已提交
88
struct Range {
89 90
  Slice start;          // Included in the range
  Slice limit;          // Not included in the range
J
jorlow@chromium.org 已提交
91

92
  Range() { }
J
jorlow@chromium.org 已提交
93 94 95
  Range(const Slice& s, const Slice& l) : start(s), limit(l) { }
};

96 97 98 99 100 101
// A collections of table properties objects, where
//  key: is the table's file name.
//  value: the table properties object of the given table.
typedef std::unordered_map<std::string, std::shared_ptr<const TableProperties>>
    TablePropertiesCollection;

J
jorlow@chromium.org 已提交
102
// A DB is a persistent ordered map from keys to values.
103 104
// A DB is safe for concurrent access from multiple threads without
// any external synchronization.
J
jorlow@chromium.org 已提交
105 106 107 108 109
class DB {
 public:
  // Open the database with the specified "name".
  // Stores a pointer to a heap-allocated database in *dbptr and returns
  // OK on success.
A
Abhishek Kona 已提交
110
  // Stores nullptr in *dbptr and returns a non-OK status on error.
J
jorlow@chromium.org 已提交
111 112 113 114 115
  // Caller should delete *dbptr when it is no longer needed.
  static Status Open(const Options& options,
                     const std::string& name,
                     DB** dbptr);

H
heyongqiang 已提交
116 117 118 119
  // Open the database for read only. All DB interfaces
  // that modify data, like put/delete, will return error.
  // If the db is opened in read only mode, then no compactions
  // will happen.
120 121 122
  //
  // Not supported in ROCKSDB_LITE, in which case the function will
  // return Status::NotSupported.
H
heyongqiang 已提交
123 124
  static Status OpenForReadOnly(const Options& options,
      const std::string& name, DB** dbptr,
125
      bool error_if_log_file_exist = false);
H
heyongqiang 已提交
126

127 128 129 130 131
  // Open the database for read only with column families. When opening DB with
  // read only, you can specify only a subset of column families in the
  // database that should be opened. However, you always need to specify default
  // column family. The default column family name is 'default' and it's stored
  // in rocksdb::kDefaultColumnFamilyName
132 133 134
  //
  // Not supported in ROCKSDB_LITE, in which case the function will
  // return Status::NotSupported.
135 136 137 138 139 140
  static Status OpenForReadOnly(
      const DBOptions& db_options, const std::string& name,
      const std::vector<ColumnFamilyDescriptor>& column_families,
      std::vector<ColumnFamilyHandle*>* handles, DB** dbptr,
      bool error_if_log_file_exist = false);

141 142
  // Open DB with column families.
  // db_options specify database specific options
J
Jonah Cohen 已提交
143
  // column_families is the vector of all column families in the database,
144 145 146 147 148 149
  // containing column family name and options. You need to open ALL column
  // families in the database. To get the list of column families, you can use
  // ListColumnFamilies(). Also, you can open only a subset of column families
  // for read-only access.
  // The default column family name is 'default' and it's stored
  // in rocksdb::kDefaultColumnFamilyName.
150 151
  // If everything is OK, handles will on return be the same size
  // as column_families --- handles[i] will be a handle that you
152 153 154
  // will use to operate on column family column_family[i].
  // Before delete DB, you have to close All column families by calling
  // DestroyColumnFamilyHandle() with all the handles.
155 156
  static Status Open(const DBOptions& db_options, const std::string& name,
                     const std::vector<ColumnFamilyDescriptor>& column_families,
157
                     std::vector<ColumnFamilyHandle*>* handles, DB** dbptr);
158 159 160 161 162

  // ListColumnFamilies will open the DB specified by argument name
  // and return the list of all column families in that DB
  // through column_families argument. The ordering of
  // column families in column_families is unspecified.
163 164 165
  static Status ListColumnFamilies(const DBOptions& db_options,
                                   const std::string& name,
                                   std::vector<std::string>* column_families);
166

J
jorlow@chromium.org 已提交
167 168 169
  DB() { }
  virtual ~DB();

170 171 172
  // Create a column_family and return the handle of column family
  // through the argument handle.
  virtual Status CreateColumnFamily(const ColumnFamilyOptions& options,
173
                                    const std::string& column_family_name,
174
                                    ColumnFamilyHandle** handle);
175

176 177 178 179
  // Drop a column family specified by column_family handle. This call
  // only records a drop record in the manifest and prevents the column
  // family from flushing and compacting.
  virtual Status DropColumnFamily(ColumnFamilyHandle* column_family);
180 181 182 183 184
  // Close a column family specified by column_family handle and destroy
  // the column family handle specified to avoid double deletion. This call
  // deletes the column family handle by default. Use this method to
  // close column family instead of deleting column family handle directly
  virtual Status DestroyColumnFamilyHandle(ColumnFamilyHandle* column_family);
185

186
  // Set the database entry for "key" to "value".
187
  // If "key" already exists, it will be overwritten.
188
  // Returns OK on success, and a non-OK status on error.
189
  // Note: consider setting options.sync = true.
J
jorlow@chromium.org 已提交
190
  virtual Status Put(const WriteOptions& options,
191
                     ColumnFamilyHandle* column_family, const Slice& key,
J
jorlow@chromium.org 已提交
192
                     const Slice& value) = 0;
193 194
  virtual Status Put(const WriteOptions& options, const Slice& key,
                     const Slice& value) {
195
    return Put(options, DefaultColumnFamily(), key, value);
196
  }
J
jorlow@chromium.org 已提交
197 198 199 200

  // Remove the database entry (if any) for "key".  Returns OK on
  // success, and a non-OK status on error.  It is not an error if "key"
  // did not exist in the database.
201
  // Note: consider setting options.sync = true.
202
  virtual Status Delete(const WriteOptions& options,
203
                        ColumnFamilyHandle* column_family,
204
                        const Slice& key) = 0;
205
  virtual Status Delete(const WriteOptions& options, const Slice& key) {
206
    return Delete(options, DefaultColumnFamily(), key);
207
  }
J
jorlow@chromium.org 已提交
208

A
Andres Noetzli 已提交
209 210 211
  // Remove the database entry for "key". Requires that the key exists
  // and was not overwritten. Returns OK on success, and a non-OK status
  // on error.  It is not an error if "key" did not exist in the database.
A
agiardullo 已提交
212 213 214 215 216 217 218 219 220 221 222 223
  //
  // If a key is overwritten (by calling Put() multiple times), then the result
  // of calling SingleDelete() on this key is undefined.  SingleDelete() only
  // behaves correctly if there has been only one Put() for this key since the
  // previous call to SingleDelete() for this key.
  //
  // This feature is currently an experimental performance optimization
  // for a very specific workload.  It is up to the caller to ensure that
  // SingleDelete is only used for a key that is not deleted using Delete() or
  // written using Merge().  Mixing SingleDelete operations with Deletes and
  // Merges can result in undefined behavior.
  //
A
Andres Noetzli 已提交
224 225 226 227 228 229 230 231
  // Note: consider setting options.sync = true.
  virtual Status SingleDelete(const WriteOptions& options,
                              ColumnFamilyHandle* column_family,
                              const Slice& key) = 0;
  virtual Status SingleDelete(const WriteOptions& options, const Slice& key) {
    return SingleDelete(options, DefaultColumnFamily(), key);
  }

232 233 234 235 236
  // Merge the database entry for "key" with "value".  Returns OK on success,
  // and a non-OK status on error. The semantics of this operation is
  // determined by the user provided merge_operator when opening DB.
  // Note: consider setting options.sync = true.
  virtual Status Merge(const WriteOptions& options,
237 238
                       ColumnFamilyHandle* column_family, const Slice& key,
                       const Slice& value) = 0;
239 240
  virtual Status Merge(const WriteOptions& options, const Slice& key,
                       const Slice& value) {
241
    return Merge(options, DefaultColumnFamily(), key, value);
242
  }
243

J
jorlow@chromium.org 已提交
244
  // Apply the specified updates to the database.
245 246
  // If `updates` contains no update, WAL will still be synced if
  // options.sync=true.
J
jorlow@chromium.org 已提交
247
  // Returns OK on success, non-OK on failure.
248
  // Note: consider setting options.sync = true.
J
jorlow@chromium.org 已提交
249 250 251 252 253 254 255 256 257 258
  virtual Status Write(const WriteOptions& options, WriteBatch* updates) = 0;

  // If the database contains an entry for "key" store the
  // corresponding value in *value and return OK.
  //
  // If there is no entry for "key" leave *value unchanged and return
  // a status for which Status::IsNotFound() returns true.
  //
  // May return some other Status on an error.
  virtual Status Get(const ReadOptions& options,
259
                     ColumnFamilyHandle* column_family, const Slice& key,
260
                     std::string* value) = 0;
261
  virtual Status Get(const ReadOptions& options, const Slice& key, std::string* value) {
262
    return Get(options, DefaultColumnFamily(), key, value);
263
  }
J
jorlow@chromium.org 已提交
264

265 266 267 268 269 270 271 272 273 274
  // If keys[i] does not exist in the database, then the i'th returned
  // status will be one for which Status::IsNotFound() is true, and
  // (*values)[i] will be set to some arbitrary value (often ""). Otherwise,
  // the i'th returned status will have Status::ok() true, and (*values)[i]
  // will store the value associated with keys[i].
  //
  // (*values) will always be resized to be the same size as (keys).
  // Similarly, the number of returned statuses will be the number of keys.
  // Note: keys will not be "de-duplicated". Duplicate keys will return
  // duplicate values in order.
275 276
  virtual std::vector<Status> MultiGet(
      const ReadOptions& options,
277
      const std::vector<ColumnFamilyHandle*>& column_family,
278
      const std::vector<Slice>& keys, std::vector<std::string>* values) = 0;
279 280 281
  virtual std::vector<Status> MultiGet(const ReadOptions& options,
                                       const std::vector<Slice>& keys,
                                       std::vector<std::string>* values) {
282 283
    return MultiGet(options, std::vector<ColumnFamilyHandle*>(
                                 keys.size(), DefaultColumnFamily()),
284 285
                    keys, values);
  }
286

287
  // If the key definitely does not exist in the database, then this method
288 289 290 291 292 293
  // returns false, else true. If the caller wants to obtain value when the key
  // is found in memory, a bool for 'value_found' must be passed. 'value_found'
  // will be true on return if value has been set properly.
  // This check is potentially lighter-weight than invoking DB::Get(). One way
  // to make this lighter weight is to avoid doing any IOs.
  // Default implementation here returns true and sets 'value_found' to false
A
Alex Loukissas 已提交
294 295 296 297
  virtual bool KeyMayExist(const ReadOptions& /*options*/,
                           ColumnFamilyHandle* /*column_family*/,
                           const Slice& /*key*/, std::string* /*value*/,
                           bool* value_found = nullptr) {
298 299 300
    if (value_found != nullptr) {
      *value_found = false;
    }
301 302
    return true;
  }
303 304
  virtual bool KeyMayExist(const ReadOptions& options, const Slice& key,
                           std::string* value, bool* value_found = nullptr) {
305
    return KeyMayExist(options, DefaultColumnFamily(), key, value, value_found);
306
  }
307

J
jorlow@chromium.org 已提交
308 309 310 311 312 313
  // Return a heap-allocated iterator over the contents of the database.
  // The result of NewIterator() is initially invalid (caller must
  // call one of the Seek methods on the iterator before using it).
  //
  // Caller should delete the iterator when it is no longer needed.
  // The returned iterator should be deleted before this db is deleted.
314
  virtual Iterator* NewIterator(const ReadOptions& options,
315
                                ColumnFamilyHandle* column_family) = 0;
316
  virtual Iterator* NewIterator(const ReadOptions& options) {
317
    return NewIterator(options, DefaultColumnFamily());
318 319 320 321 322 323
  }
  // Returns iterators from a consistent database state across multiple
  // column families. Iterators are heap allocated and need to be deleted
  // before the db is deleted
  virtual Status NewIterators(
      const ReadOptions& options,
I
Igor Canadi 已提交
324
      const std::vector<ColumnFamilyHandle*>& column_families,
325
      std::vector<Iterator*>* iterators) = 0;
J
jorlow@chromium.org 已提交
326 327 328 329 330

  // Return a handle to the current DB state.  Iterators created with
  // this handle will all observe a stable snapshot of the current DB
  // state.  The caller must call ReleaseSnapshot(result) when the
  // snapshot is no longer needed.
331 332 333
  //
  // nullptr will be returned if the DB fails to take a snapshot or does
  // not support snapshot.
J
jorlow@chromium.org 已提交
334 335 336 337 338 339
  virtual const Snapshot* GetSnapshot() = 0;

  // Release a previously acquired snapshot.  The caller must not
  // use "snapshot" after this call.
  virtual void ReleaseSnapshot(const Snapshot* snapshot) = 0;

340
#ifndef ROCKSDB_LITE
341
  // Contains all valid property arguments for GetProperty().
342 343 344
  //
  // NOTE: Property names cannot end in numbers since those are interpreted as
  //       arguments, e.g., see kNumFilesAtLevelPrefix.
345
  struct Properties {
346 347 348
    //  "rocksdb.num-files-at-level<N>" - returns string containing the number
    //      of files at level <N>, where <N> is an ASCII representation of a
    //      level number (e.g., "0").
349
    static const std::string kNumFilesAtLevelPrefix;
350

351 352 353 354 355 356 357
    //  "rocksdb.compression-ratio-at-level<N>" - returns string containing the
    //      compression ratio of data at level <N>, where <N> is an ASCII
    //      representation of a level number (e.g., "0"). Here, compression
    //      ratio is defined as uncompressed data size / compressed file size.
    //      Returns "-1.0" if no open files at level <N>.
    static const std::string kCompressionRatioAtLevelPrefix;

358 359
    //  "rocksdb.stats" - returns a multi-line string containing the data
    //      described by kCFStats followed by the data described by kDBStats.
360
    static const std::string kStats;
361 362 363

    //  "rocksdb.sstables" - returns a multi-line string summarizing current
    //      SST files.
364
    static const std::string kSSTables;
365 366 367 368 369

    //  "rocksdb.cfstats" - returns a multi-line string with general column
    //      family stats per-level over db's lifetime ("L<n>"), aggregated over
    //      db's lifetime ("Sum"), and aggregated over the interval since the
    //      last retrieval ("Int").
370
    static const std::string kCFStats;
371 372 373 374

    //  "rocksdb.dbstats" - returns a multi-line string with general database
    //      stats, both cumulative (over the db's lifetime) and interval (since
    //      the last retrieval of kDBStats).
375
    static const std::string kDBStats;
376 377 378

    //  "rocksdb.levelstats" - returns multi-line string containing the number
    //      of files per level and total size of each level (MB).
379
    static const std::string kLevelStats;
380 381 382

    //  "rocksdb.num-immutable-mem-table" - returns number of immutable
    //      memtables that have not yet been flushed.
383
    static const std::string kNumImmutableMemTable;
384 385 386

    //  "rocksdb.num-immutable-mem-table-flushed" - returns number of immutable
    //      memtables that have already been flushed.
387
    static const std::string kNumImmutableMemTableFlushed;
388 389 390

    //  "rocksdb.mem-table-flush-pending" - returns 1 if a memtable flush is
    //      pending; otherwise, returns 0.
391
    static const std::string kMemTableFlushPending;
392 393 394

    //  "rocksdb.num-running-flushes" - returns the number of currently running
    //      flushes.
395
    static const std::string kNumRunningFlushes;
396 397 398

    //  "rocksdb.compaction-pending" - returns 1 if at least one compaction is
    //      pending; otherwise, returns 0.
399
    static const std::string kCompactionPending;
400 401 402

    //  "rocksdb.num-running-compactions" - returns the number of currently
    //      running compactions.
403
    static const std::string kNumRunningCompactions;
404 405 406

    //  "rocksdb.background-errors" - returns accumulated number of background
    //      errors.
407
    static const std::string kBackgroundErrors;
408 409 410

    //  "rocksdb.cur-size-active-mem-table" - returns approximate size of active
    //      memtable (bytes).
411
    static const std::string kCurSizeActiveMemTable;
412 413 414

    //  "rocksdb.cur-size-all-mem-tables" - returns approximate size of active
    //      and unflushed immutable memtables (bytes).
415
    static const std::string kCurSizeAllMemTables;
416 417 418

    //  "rocksdb.size-all-mem-tables" - returns approximate size of active,
    //      unflushed immutable, and pinned immutable memtables (bytes).
419
    static const std::string kSizeAllMemTables;
420 421 422

    //  "rocksdb.num-entries-active-mem-table" - returns total number of entries
    //      in the active memtable.
423
    static const std::string kNumEntriesActiveMemTable;
424 425 426

    //  "rocksdb.num-entries-imm-mem-tables" - returns total number of entries
    //      in the unflushed immutable memtables.
427
    static const std::string kNumEntriesImmMemTables;
428 429 430

    //  "rocksdb.num-deletes-active-mem-table" - returns total number of delete
    //      entries in the active memtable.
431
    static const std::string kNumDeletesActiveMemTable;
432 433 434

    //  "rocksdb.num-deletes-imm-mem-tables" - returns total number of delete
    //      entries in the unflushed immutable memtables.
435
    static const std::string kNumDeletesImmMemTables;
436 437 438

    //  "rocksdb.estimate-num-keys" - returns estimated number of total keys in
    //      the active and unflushed immutable memtables.
439
    static const std::string kEstimateNumKeys;
440 441 442 443

    //  "rocksdb.estimate-table-readers-mem" - returns estimated memory used for
    //      reading SST tables, excluding memory used in block cache (e.g.,
    //      filter and index blocks).
444
    static const std::string kEstimateTableReadersMem;
445 446 447

    //  "rocksdb.is-file-deletions-enabled" - returns 0 if deletion of obsolete
    //      files is enabled; otherwise, returns a non-zero number.
448
    static const std::string kIsFileDeletionsEnabled;
449 450 451

    //  "rocksdb.num-snapshots" - returns number of unreleased snapshots of the
    //      database.
452
    static const std::string kNumSnapshots;
453 454 455

    //  "rocksdb.oldest-snapshot-time" - returns number representing unix
    //      timestamp of oldest unreleased snapshot.
456
    static const std::string kOldestSnapshotTime;
457 458 459 460 461

    //  "rocksdb.num-live-versions" - returns number of live versions. `Version`
    //      is an internal data structure. See version_set.h for details. More
    //      live versions often mean more SST files are held from being deleted,
    //      by iterators or unfinished compactions.
462
    static const std::string kNumLiveVersions;
463

464 465 466 467 468
    //  "rocksdb.current-super-version-number" - returns number of curent LSM
    //  version. It is a uint64_t integer number, incremented after there is
    //  any change to the LSM tree. The number is not preserved after restarting
    //  the DB. After DB restart, it will start from 0 again.
    static const std::string kCurrentSuperVersionNumber;
469

470 471
    //  "rocksdb.estimate-live-data-size" - returns an estimate of the amount of
    //      live data in bytes.
A
Andres Notzli 已提交
472
    static const std::string kEstimateLiveDataSize;
473 474 475 476

    //  "rocksdb.total-sst-files-size" - returns total size (bytes) of all SST
    //      files.
    //  WARNING: may slow down online queries if there are too many files.
477
    static const std::string kTotalSstFilesSize;
478 479 480 481 482 483 484 485 486

    //  "rocksdb.base-level" - returns number of level to which L0 data will be
    //      compacted.
    static const std::string kBaseLevel;

    //  "rocksdb.estimate-pending-compaction-bytes" - returns estimated total
    //      number of bytes compaction needs to rewrite to get all levels down
    //      to under target size. Not valid for other compactions than level-
    //      based.
487
    static const std::string kEstimatePendingCompactionBytes;
488 489 490

    //  "rocksdb.aggregated-table-properties" - returns a string representation
    //      of the aggregated table properties of the target column family.
491
    static const std::string kAggregatedTableProperties;
492 493 494 495

    //  "rocksdb.aggregated-table-properties-at-level<N>", same as the previous
    //      one but only returns the aggregated table properties of the
    //      specified level "N" at the target column family.
496
    static const std::string kAggregatedTablePropertiesAtLevel;
497 498 499
  };
#endif /* ROCKSDB_LITE */

500 501 502 503
  // DB implementations can export properties about their state via this method.
  // If "property" is a valid property understood by this DB implementation (see
  // Properties struct above for valid options), fills "*value" with its current
  // value and returns true.  Otherwise, returns false.
504
  virtual bool GetProperty(ColumnFamilyHandle* column_family,
505
                           const Slice& property, std::string* value) = 0;
506
  virtual bool GetProperty(const Slice& property, std::string* value) {
507
    return GetProperty(DefaultColumnFamily(), property, value);
508
  }
J
jorlow@chromium.org 已提交
509

510
  // Similar to GetProperty(), but only works for a subset of properties whose
511 512 513 514 515 516 517 518
  // return value is an integer. Return the value by integer. Supported
  // properties:
  //  "rocksdb.num-immutable-mem-table"
  //  "rocksdb.mem-table-flush-pending"
  //  "rocksdb.compaction-pending"
  //  "rocksdb.background-errors"
  //  "rocksdb.cur-size-active-mem-table"
  //  "rocksdb.cur-size-all-mem-tables"
519
  //  "rocksdb.size-all-mem-tables"
520 521
  //  "rocksdb.num-entries-active-mem-table"
  //  "rocksdb.num-entries-imm-mem-tables"
522 523
  //  "rocksdb.num-deletes-active-mem-table"
  //  "rocksdb.num-deletes-imm-mem-tables"
524 525 526 527 528
  //  "rocksdb.estimate-num-keys"
  //  "rocksdb.estimate-table-readers-mem"
  //  "rocksdb.is-file-deletions-enabled"
  //  "rocksdb.num-snapshots"
  //  "rocksdb.oldest-snapshot-time"
529
  //  "rocksdb.num-live-versions"
530
  //  "rocksdb.current-super-version-number"
A
Andres Notzli 已提交
531
  //  "rocksdb.estimate-live-data-size"
532
  //  "rocksdb.total-sst-files-size"
533 534
  //  "rocksdb.base-level"
  //  "rocksdb.estimate-pending-compaction-bytes"
535 536
  //  "rocksdb.num-running-compactions"
  //  "rocksdb.num-running-flushes"
537 538 539 540 541 542
  virtual bool GetIntProperty(ColumnFamilyHandle* column_family,
                              const Slice& property, uint64_t* value) = 0;
  virtual bool GetIntProperty(const Slice& property, uint64_t* value) {
    return GetIntProperty(DefaultColumnFamily(), property, value);
  }

543 544 545 546 547
  // Same as GetIntProperty(), but this one returns the aggregated int
  // property from all column families.
  virtual bool GetAggregatedIntProperty(const Slice& property,
                                        uint64_t* value) = 0;

J
jorlow@chromium.org 已提交
548 549 550 551 552 553 554
  // For each i in [0,n-1], store in "sizes[i]", the approximate
  // file system space used by keys in "[range[i].start .. range[i].limit)".
  //
  // Note that the returned sizes measure file system space usage, so
  // if the user data compresses by a factor of ten, the returned
  // sizes will be one-tenth the size of the corresponding user data size.
  //
555 556 557
  // If include_memtable is set to true, then the result will also
  // include those recently written data in the mem-tables if
  // the mem-table type supports it.
558
  virtual void GetApproximateSizes(ColumnFamilyHandle* column_family,
559 560 561 562 563 564
                                   const Range* range, int n, uint64_t* sizes,
                                   bool include_memtable = false) = 0;
  virtual void GetApproximateSizes(const Range* range, int n, uint64_t* sizes,
                                   bool include_memtable = false) {
    GetApproximateSizes(DefaultColumnFamily(), range, n, sizes,
                        include_memtable);
565
  }
J
jorlow@chromium.org 已提交
566

G
Gabor Cselle 已提交
567
  // Compact the underlying storage for the key range [*begin,*end].
568
  // The actual compaction interval might be superset of [*begin, *end].
G
Gabor Cselle 已提交
569 570 571 572 573
  // In particular, deleted and overwritten versions are discarded,
  // and the data is rearranged to reduce the cost of operations
  // needed to access the data.  This operation should typically only
  // be invoked by users who understand the underlying implementation.
  //
A
Abhishek Kona 已提交
574 575
  // begin==nullptr is treated as a key before all keys in the database.
  // end==nullptr is treated as a key after all keys in the database.
G
Gabor Cselle 已提交
576
  // Therefore the following call will compact the entire database:
577
  //    db->CompactRange(options, nullptr, nullptr);
578
  // Note that after the entire database is compacted, all data are pushed
579 580 581 582 583 584 585 586 587 588 589
  // down to the last level containing any data. If the total data size after
  // compaction is reduced, that level might not be appropriate for hosting all
  // the files. In this case, client could set options.change_level to true, to
  // move the files back to the minimum level capable of holding the data set
  // or a given level (specified by non-negative options.target_level).
  virtual Status CompactRange(const CompactRangeOptions& options,
                              ColumnFamilyHandle* column_family,
                              const Slice* begin, const Slice* end) = 0;
  virtual Status CompactRange(const CompactRangeOptions& options,
                              const Slice* begin, const Slice* end) {
    return CompactRange(options, DefaultColumnFamily(), begin, end);
590
  }
591

592
#if defined(__GNUC__) || defined(__clang__)
593
  __attribute__((__deprecated__))
594 595 596
#elif _WIN32
  __declspec(deprecated)
#endif
597 598 599 600
  virtual Status
  CompactRange(ColumnFamilyHandle* column_family, const Slice* begin,
               const Slice* end, bool change_level = false,
               int target_level = -1, uint32_t target_path_id = 0) {
601 602 603 604 605 606
    CompactRangeOptions options;
    options.change_level = change_level;
    options.target_level = target_level;
    options.target_path_id = target_path_id;
    return CompactRange(options, column_family, begin, end);
  }
607
#if defined(__GNUC__) || defined(__clang__)
608
  __attribute__((__deprecated__))
609 610 611
#elif _WIN32
  __declspec(deprecated)
#endif
612 613 614
  virtual Status
  CompactRange(const Slice* begin, const Slice* end, bool change_level = false,
               int target_level = -1, uint32_t target_path_id = 0) {
615 616 617 618 619 620 621
    CompactRangeOptions options;
    options.change_level = change_level;
    options.target_level = target_level;
    options.target_path_id = target_path_id;
    return CompactRange(options, DefaultColumnFamily(), begin, end);
  }

S
sdong 已提交
622 623
  virtual Status SetOptions(
      ColumnFamilyHandle* /*column_family*/,
A
Alex Loukissas 已提交
624
      const std::unordered_map<std::string, std::string>& /*new_options*/) {
625
    return Status::NotSupported("Not implemented");
626
  }
627
  virtual Status SetOptions(
628 629 630
      const std::unordered_map<std::string, std::string>& new_options) {
    return SetOptions(DefaultColumnFamily(), new_options);
  }
J
jorlow@chromium.org 已提交
631

632 633 634
  virtual Status SetDBOptions(
      const std::unordered_map<std::string, std::string>& new_options) = 0;

A
Andres Notzli 已提交
635 636 637 638
  // CompactFiles() inputs a list of files specified by file numbers and
  // compacts them to the specified level. Note that the behavior is different
  // from CompactRange() in that CompactFiles() performs the compaction job
  // using the CURRENT thread.
639 640 641 642 643 644 645 646 647 648 649 650 651 652 653 654
  //
  // @see GetDataBaseMetaData
  // @see GetColumnFamilyMetaData
  virtual Status CompactFiles(
      const CompactionOptions& compact_options,
      ColumnFamilyHandle* column_family,
      const std::vector<std::string>& input_file_names,
      const int output_level, const int output_path_id = -1) = 0;

  virtual Status CompactFiles(
      const CompactionOptions& compact_options,
      const std::vector<std::string>& input_file_names,
      const int output_level, const int output_path_id = -1) {
    return CompactFiles(compact_options, DefaultColumnFamily(),
                        input_file_names, output_level, output_path_id);
  }
655 656 657 658 659 660 661

  // This function will wait until all currently running background processes
  // finish. After it returns, no background process will be run until
  // UnblockBackgroundWork is called
  virtual Status PauseBackgroundWork() = 0;
  virtual Status ContinueBackgroundWork() = 0;

662
  // This function will enable automatic compactions for the given column
663 664 665 666 667 668 669 670
  // families if they were previously disabled. The function will first set the
  // disable_auto_compactions option for each column family to 'false', after
  // which it will schedule a flush/compaction.
  //
  // NOTE: Setting disable_auto_compactions to 'false' through SetOptions() API
  // does NOT schedule a flush/compaction afterwards, and only changes the
  // parameter itself within the column family option.
  //
671 672 673
  virtual Status EnableAutoCompaction(
      const std::vector<ColumnFamilyHandle*>& column_family_handles) = 0;

674
  // Number of levels used for this DB.
675
  virtual int NumberLevels(ColumnFamilyHandle* column_family) = 0;
676
  virtual int NumberLevels() { return NumberLevels(DefaultColumnFamily()); }
677 678 679

  // Maximum level to which a new compacted memtable is pushed if it
  // does not create overlap.
680
  virtual int MaxMemCompactionLevel(ColumnFamilyHandle* column_family) = 0;
681
  virtual int MaxMemCompactionLevel() {
682
    return MaxMemCompactionLevel(DefaultColumnFamily());
683
  }
684 685

  // Number of files in level-0 that would stop writes.
686
  virtual int Level0StopWriteTrigger(ColumnFamilyHandle* column_family) = 0;
687
  virtual int Level0StopWriteTrigger() {
688
    return Level0StopWriteTrigger(DefaultColumnFamily());
689
  }
690

I
Igor Canadi 已提交
691 692 693 694
  // Get DB name -- the exact same name that was provided as an argument to
  // DB::Open()
  virtual const std::string& GetName() const = 0;

695 696 697
  // Get Env object from the DB
  virtual Env* GetEnv() const = 0;

698 699 700 701
  // Get DB Options that we use.  During the process of opening the
  // column family, the options provided when calling DB::Open() or
  // DB::CreateColumnFamily() will have been "sanitized" and transformed
  // in an implementation-defined manner.
702 703
  virtual Options GetOptions(ColumnFamilyHandle* column_family) const = 0;
  virtual Options GetOptions() const {
704
    return GetOptions(DefaultColumnFamily());
705
  }
I
Igor Canadi 已提交
706

707
  virtual DBOptions GetDBOptions() const = 0;
708

H
heyongqiang 已提交
709
  // Flush all mem-table data.
710
  virtual Status Flush(const FlushOptions& options,
711
                       ColumnFamilyHandle* column_family) = 0;
712
  virtual Status Flush(const FlushOptions& options) {
713
    return Flush(options, DefaultColumnFamily());
714
  }
H
heyongqiang 已提交
715

716 717 718 719 720 721
  // Sync the wal. Note that Write() followed by SyncWAL() is not exactly the
  // same as Write() with sync=true: in the latter case the changes won't be
  // visible until the sync is done.
  // Currently only works if allow_mmap_writes = false in Options.
  virtual Status SyncWAL() = 0;

I
Igor Canadi 已提交
722 723 724 725 726
  // The sequence number of the most recent transaction.
  virtual SequenceNumber GetLatestSequenceNumber() const = 0;

#ifndef ROCKSDB_LITE

727 728 729 730 731
  // Prevent file deletions. Compactions will continue to occur,
  // but no obsolete files will be deleted. Calling this multiple
  // times have the same effect as calling it once.
  virtual Status DisableFileDeletions() = 0;

732
  // Allow compactions to delete obsolete files.
733 734 735 736 737 738 739 740 741
  // If force == true, the call to EnableFileDeletions() will guarantee that
  // file deletions are enabled after the call, even if DisableFileDeletions()
  // was called multiple times before.
  // If force == false, EnableFileDeletions will only enable file deletion
  // after it's been called at least as many times as DisableFileDeletions(),
  // enabling the two methods to be called by two threads concurrently without
  // synchronization -- i.e., file deletions will be enabled only after both
  // threads call EnableFileDeletions()
  virtual Status EnableFileDeletions(bool force = true) = 0;
742

743 744
  // GetLiveFiles followed by GetSortedWalFiles can generate a lossless backup

745
  // Retrieve the list of all files in the database. The files are
746 747 748 749
  // relative to the dbname and are not absolute paths. The valid size of the
  // manifest file is returned in manifest_file_size. The manifest file is an
  // ever growing file, but only the portion specified by manifest_file_size is
  // valid for this snapshot.
750 751 752
  // Setting flush_memtable to true does Flush before recording the live files.
  // Setting flush_memtable to false is useful when we don't want to wait for
  // flush which may have to wait for compaction to complete taking an
753 754 755 756 757 758
  // indeterminate time.
  //
  // In case you have multiple column families, even if flush_memtable is true,
  // you still need to call GetSortedWalFiles after GetLiveFiles to compensate
  // for new data that arrived to already-flushed column families while other
  // column families were flushing
759
  virtual Status GetLiveFiles(std::vector<std::string>&,
760 761
                              uint64_t* manifest_file_size,
                              bool flush_memtable = true) = 0;
762

763 764 765
  // Retrieve the sorted list of all wal files with earliest file first
  virtual Status GetSortedWalFiles(VectorLogPtr& files) = 0;

766 767 768
  // Sets iter to an iterator that is positioned at a write-batch containing
  // seq_number. If the sequence number is non existent, it returns an iterator
  // at the first available seq_no after the requested seq_no
769
  // Returns Status::OK if iterator is valid
770 771 772 773
  // Must set WAL_ttl_seconds or WAL_size_limit_MB to large values to
  // use this api, else the WAL files will get
  // cleared aggressively and the iterator might keep getting invalid before
  // an update is read.
774 775 776 777
  virtual Status GetUpdatesSince(
      SequenceNumber seq_number, unique_ptr<TransactionLogIterator>* iter,
      const TransactionLogIterator::ReadOptions&
          read_options = TransactionLogIterator::ReadOptions()) = 0;
778

D
Dmitri Smirnov 已提交
779 780
// Windows API macro interference
#undef DeleteFile
781 782 783 784
  // Delete the file name from the db directory and update the internal state to
  // reflect that. Supports deletion of sst and log files only. 'name' must be
  // path relative to the db directory. eg. 000001.sst, /archive/000003.log
  virtual Status DeleteFile(std::string name) = 0;
785 786 787

  // Returns a list of all table files with their level, start key
  // and end key
A
Alex Loukissas 已提交
788
  virtual void GetLiveFilesMetaData(
S
sdong 已提交
789
      std::vector<LiveFileMetaData>* /*metadata*/) {}
790

791 792 793 794 795 796
  // Obtains the meta data of the specified column family of the DB.
  // Status::NotFound() will be returned if the current DB does not have
  // any column family match the specified name.
  //
  // If cf_name is not specified, then the metadata of the default
  // column family will be returned.
S
sdong 已提交
797 798
  virtual void GetColumnFamilyMetaData(ColumnFamilyHandle* /*column_family*/,
                                       ColumnFamilyMetaData* /*metadata*/) {}
799 800

  // Get the metadata of the default column family.
801
  void GetColumnFamilyMetaData(
802 803 804
      ColumnFamilyMetaData* metadata) {
    GetColumnFamilyMetaData(DefaultColumnFamily(), metadata);
  }
805

806 807 808 809
  // Batch load table files whose paths stored in  "file_path_list" into
  // "column_family", a vector of  ExternalSstFileInfo can be used
  // instead of "file_path_list" to do a blind batch add that wont
  // need to read the file, move_file can be set to true to
810 811 812
  // move the files instead of copying them, skip_snapshot_check can be set to
  // true to ignore the snapshot, make sure that you know that when you use it,
  // snapshots see the data that is added in the new files.
813 814
  //
  // Current Requirements:
815
  // (1) The key ranges of the files don't overlap with each other
816
  // (2) The key range of any file in list doesn't overlap with
817
  //     existing keys or tombstones in DB.
818
  // (3) No snapshots are held (check skip_snapshot_check to skip this check).
819
  //
820
  // Notes: We will try to ingest the files to the lowest possible level
821
  //        even if the file compression dont match the level compression
822
  virtual Status AddFile(ColumnFamilyHandle* column_family,
823
                         const std::vector<std::string>& file_path_list,
824
                         bool move_file = false, bool skip_snapshot_check = false) = 0;
825
  virtual Status AddFile(const std::vector<std::string>& file_path_list,
826 827
                         bool move_file = false, bool skip_snapshot_check = false) {
    return AddFile(DefaultColumnFamily(), file_path_list, move_file, skip_snapshot_check);
828 829 830 831 832 833 834 835
  }
#if defined(__GNUC__) || defined(__clang__)
  __attribute__((__deprecated__))
#elif _WIN32
  __declspec(deprecated)
#endif
  virtual Status
  AddFile(ColumnFamilyHandle* column_family, const std::string& file_path,
836
          bool move_file = false, bool skip_snapshot_check = false) {
837
    return AddFile(column_family, std::vector<std::string>(1, file_path),
838
                   move_file, skip_snapshot_check);
839 840 841 842 843 844 845
  }
#if defined(__GNUC__) || defined(__clang__)
  __attribute__((__deprecated__))
#elif _WIN32
  __declspec(deprecated)
#endif
  virtual Status
846
  AddFile(const std::string& file_path, bool move_file = false, bool skip_snapshot_check = false) {
847
    return AddFile(DefaultColumnFamily(),
848
                   std::vector<std::string>(1, file_path), move_file, skip_snapshot_check);
849 850 851 852
  }

  // Load table file with information "file_info" into "column_family"
  virtual Status AddFile(ColumnFamilyHandle* column_family,
853
                         const std::vector<ExternalSstFileInfo>& file_info_list,
854
                         bool move_file = false, bool skip_snapshot_check = false) = 0;
855
  virtual Status AddFile(const std::vector<ExternalSstFileInfo>& file_info_list,
856 857
                         bool move_file = false, bool skip_snapshot_check = false) {
    return AddFile(DefaultColumnFamily(), file_info_list, move_file, skip_snapshot_check);
858 859 860 861 862 863 864 865
  }
#if defined(__GNUC__) || defined(__clang__)
  __attribute__((__deprecated__))
#elif _WIN32
  __declspec(deprecated)
#endif
  virtual Status
  AddFile(ColumnFamilyHandle* column_family,
866
          const ExternalSstFileInfo* file_info, bool move_file = false, bool skip_snapshot_check = false) {
867
    return AddFile(column_family,
868
                   std::vector<ExternalSstFileInfo>(1, *file_info), move_file, skip_snapshot_check);
869 870 871 872 873 874 875
  }
#if defined(__GNUC__) || defined(__clang__)
  __attribute__((__deprecated__))
#elif _WIN32
  __declspec(deprecated)
#endif
  virtual Status
876
  AddFile(const ExternalSstFileInfo* file_info, bool move_file = false, bool skip_snapshot_check = false) {
877
    return AddFile(DefaultColumnFamily(),
878
                   std::vector<ExternalSstFileInfo>(1, *file_info), move_file, skip_snapshot_check);
879 880
  }

I
Igor Canadi 已提交
881 882
#endif  // ROCKSDB_LITE

883 884 885
  // Sets the globally unique ID created at database creation time by invoking
  // Env::GenerateUniqueId(), in identity. Returns Status::OK if identity could
  // be set properly
886
  virtual Status GetDbIdentity(std::string& identity) const = 0;
887

888 889 890
  // Returns default column family handle
  virtual ColumnFamilyHandle* DefaultColumnFamily() const = 0;

I
Igor Canadi 已提交
891
#ifndef ROCKSDB_LITE
I
Igor Canadi 已提交
892 893
  virtual Status GetPropertiesOfAllTables(ColumnFamilyHandle* column_family,
                                          TablePropertiesCollection* props) = 0;
894
  virtual Status GetPropertiesOfAllTables(TablePropertiesCollection* props) {
I
Igor Canadi 已提交
895 896
    return GetPropertiesOfAllTables(DefaultColumnFamily(), props);
  }
897
  virtual Status GetPropertiesOfTablesInRange(
898
      ColumnFamilyHandle* column_family, const Range* range, std::size_t n,
899
      TablePropertiesCollection* props) = 0;
I
Igor Canadi 已提交
900
#endif  // ROCKSDB_LITE
901

A
agiardullo 已提交
902 903 904
  // Needed for StackableDB
  virtual DB* GetRootDB() { return this; }

J
jorlow@chromium.org 已提交
905 906 907 908 909 910 911 912 913 914
 private:
  // No copying allowed
  DB(const DB&);
  void operator=(const DB&);
};

// Destroy the contents of the specified database.
// Be very careful using this method.
Status DestroyDB(const std::string& name, const Options& options);

I
Igor Canadi 已提交
915
#ifndef ROCKSDB_LITE
J
jorlow@chromium.org 已提交
916 917 918 919
// If a DB cannot be opened, you may attempt to call this method to
// resurrect as much of the contents of the database as possible.
// Some data may be lost, so be careful when calling this function
// on a database that contains important information.
920 921 922 923 924 925 926 927 928 929 930 931 932 933 934 935
//
// With this API, we will warn and skip data associated with column families not
// specified in column_families.
//
// @param column_families Descriptors for known column families
Status RepairDB(const std::string& dbname, const DBOptions& db_options,
                const std::vector<ColumnFamilyDescriptor>& column_families);

// @param unknown_cf_opts Options for column families encountered during the
//                        repair that were not specified in column_families.
Status RepairDB(const std::string& dbname, const DBOptions& db_options,
                const std::vector<ColumnFamilyDescriptor>& column_families,
                const ColumnFamilyOptions& unknown_cf_opts);

// @param options These options will be used for the database and for ALL column
//                families encountered during the repair
J
jorlow@chromium.org 已提交
936
Status RepairDB(const std::string& dbname, const Options& options);
937

I
Igor Canadi 已提交
938
#endif
J
jorlow@chromium.org 已提交
939

940 941
}  // namespace rocksdb

942
#endif  // STORAGE_ROCKSDB_INCLUDE_DB_H_