db.h 57.9 KB
Newer Older
1
// Copyright (c) 2011-present, Facebook, Inc.  All rights reserved.
S
Siying Dong 已提交
2 3 4
//  This source code is licensed under both the GPLv2 (found in the
//  COPYING file in the root directory) and Apache 2.0 License
//  (found in the LICENSE.Apache file in the root directory).
J
jorlow@chromium.org 已提交
5 6 7 8
// Copyright (c) 2011 The LevelDB Authors. All rights reserved.
// Use of this source code is governed by a BSD-style license that can be
// found in the LICENSE file. See the AUTHORS file for names of contributors.

9
#pragma once
J
jorlow@chromium.org 已提交
10 11 12

#include <stdint.h>
#include <stdio.h>
赵明 已提交
13

14
#include <map>
15
#include <memory>
16
#include <string>
17
#include <unordered_map>
18
#include <vector>
L
leipeng 已提交
19 20
//#include <terark/thread/fiber_future.hpp>
//#include <boost/fiber/future.hpp>
21
#include "rocksdb/iterator.h"
22
#include "rocksdb/listener.h"
23 24
#include "rocksdb/metadata.h"
#include "rocksdb/options.h"
A
agiardullo 已提交
25
#include "rocksdb/snapshot.h"
26
#include "rocksdb/sst_file_writer.h"
Y
Yueh-Hsuan Chiang 已提交
27
#include "rocksdb/thread_status.h"
28 29 30
#include "rocksdb/transaction_log.h"
#include "rocksdb/types.h"
#include "rocksdb/version.h"
J
jorlow@chromium.org 已提交
31

32 33 34 35 36
#ifdef _WIN32
// Windows API macro interference
#undef DeleteFile
#endif

37 38 39 40 41
#if defined(__GNUC__) || defined(__clang__)
#define ROCKSDB_DEPRECATED_FUNC __attribute__((__deprecated__))
#elif _WIN32
#define ROCKSDB_DEPRECATED_FUNC __declspec(deprecated)
#endif
42

L
leipeng 已提交
43
namespace boost {
Z
ZhaoMing 已提交
44
namespace fibers {
赵明 已提交
45 46
template <typename>
class future;  // forward declaration
L
leipeng 已提交
47
}
赵明 已提交
48
}  // namespace boost
L
leipeng 已提交
49

50
namespace rocksdb {
J
jorlow@chromium.org 已提交
51

L
leipeng 已提交
52 53
using boost::fibers::future;

54 55 56 57 58 59 60
struct Options;
struct DBOptions;
struct ColumnFamilyOptions;
struct ReadOptions;
struct WriteOptions;
struct FlushOptions;
struct CompactionOptions;
61
struct CompactRangeOptions;
62
struct TableProperties;
63
struct ExternalSstFileInfo;
64 65 66
class WriteBatch;
class Env;
class EventListener;
67
class TraceWriter;
68

69 70
using std::unique_ptr;

71
extern const std::string kDefaultColumnFamilyName;
72
struct ColumnFamilyDescriptor {
73
  std::string name;
74
  ColumnFamilyOptions options;
I
Igor Canadi 已提交
75
  ColumnFamilyDescriptor()
76
      : name(kDefaultColumnFamilyName), options(ColumnFamilyOptions()) {}
77 78 79
  ColumnFamilyDescriptor(const std::string& _name,
                         const ColumnFamilyOptions& _options)
      : name(_name), options(_options) {}
80 81
};

82 83 84 85 86 87 88 89 90 91 92 93 94 95 96
class ColumnFamilyHandle {
 public:
  virtual ~ColumnFamilyHandle() {}
  // Returns the name of the column family associated with the current handle.
  virtual const std::string& GetName() const = 0;
  // Returns the ID of the column family associated with the current handle.
  virtual uint32_t GetID() const = 0;
  // Fills "*desc" with the up-to-date descriptor of the column family
  // associated with this handle. Since it fills "*desc" with the up-to-date
  // information, this call might internally lock and release DB mutex to
  // access the up-to-date CF options.  In addition, all the pointer-typed
  // options cannot be referenced any longer than the original options exist.
  //
  // Note that this function is not supported in RocksDBLite.
  virtual Status GetDescriptor(ColumnFamilyDescriptor* desc) = 0;
97 98 99
  // Returns the comparator of the column family associated with the
  // current handle.
  virtual const Comparator* GetComparator() const = 0;
100 101
};

102 103
static const int kMajorVersion = __ROCKSDB_MAJOR__;
static const int kMinorVersion = __ROCKSDB_MINOR__;
104

105
// A range of keys
赵明 已提交
106
template <class TValue>
奏之章 已提交
107 108 109
struct RangeBase {
  TValue start;
  TValue limit;
奏之章 已提交
110
  bool include_start;
奏之章 已提交
111 112
  bool include_limit;

奏之章 已提交
113
  RangeBase() : include_start(true), include_limit(true) {}
奏之章 已提交
114 115 116 117
  RangeBase(const Slice& _start, const Slice& _limit,
            bool _include_start = true, bool _include_limit = true)
      : start(_start.data(), _start.size()),
        limit(_limit.data(), _limit.size()),
奏之章 已提交
118
        include_start(_include_start),
奏之章 已提交
119
        include_limit(_include_limit) {}
奏之章 已提交
120
};
奏之章 已提交
121 122 123 124
using Range = RangeBase<Slice>;
using RangeStorage = RangeBase<std::string>;

// A range of keys, support infinite bound
奏之章 已提交
125 126 127 128 129
struct RangePtr {
  const Slice* start;
  const Slice* limit;
  bool include_start;
  bool include_limit;
奏之章 已提交
130

奏之章 已提交
131 132 133 134 135
  RangePtr()
      : start(nullptr),
        limit(nullptr),
        include_start(true),
        include_limit(true) {}
赵明 已提交
136 137
  RangePtr(const Slice* _start, const Slice* _limit, bool _include_start = true,
           bool _include_limit = true)
奏之章 已提交
138 139 140 141
      : start(_start),
        limit(_limit),
        include_start(_include_start),
        include_limit(_include_limit) {}
奏之章 已提交
142 143
};

144 145 146 147 148 149
// A collections of table properties objects, where
//  key: is the table's file name.
//  value: the table properties object of the given table.
typedef std::unordered_map<std::string, std::shared_ptr<const TableProperties>>
    TablePropertiesCollection;

J
jorlow@chromium.org 已提交
150
// A DB is a persistent ordered map from keys to values.
151 152
// A DB is safe for concurrent access from multiple threads without
// any external synchronization.
J
jorlow@chromium.org 已提交
153 154 155 156 157
class DB {
 public:
  // Open the database with the specified "name".
  // Stores a pointer to a heap-allocated database in *dbptr and returns
  // OK on success.
A
Abhishek Kona 已提交
158
  // Stores nullptr in *dbptr and returns a non-OK status on error.
J
jorlow@chromium.org 已提交
159
  // Caller should delete *dbptr when it is no longer needed.
赵明 已提交
160
  static Status Open(const Options& options, const std::string& name,
J
jorlow@chromium.org 已提交
161 162
                     DB** dbptr);

H
heyongqiang 已提交
163 164 165 166
  // Open the database for read only. All DB interfaces
  // that modify data, like put/delete, will return error.
  // If the db is opened in read only mode, then no compactions
  // will happen.
167 168 169
  //
  // Not supported in ROCKSDB_LITE, in which case the function will
  // return Status::NotSupported.
赵明 已提交
170 171 172
  static Status OpenForReadOnly(const Options& options, const std::string& name,
                                DB** dbptr,
                                bool error_if_log_file_exist = false);
H
heyongqiang 已提交
173

174 175 176 177 178
  // Open the database for read only with column families. When opening DB with
  // read only, you can specify only a subset of column families in the
  // database that should be opened. However, you always need to specify default
  // column family. The default column family name is 'default' and it's stored
  // in rocksdb::kDefaultColumnFamilyName
179 180 181
  //
  // Not supported in ROCKSDB_LITE, in which case the function will
  // return Status::NotSupported.
182 183 184 185 186 187
  static Status OpenForReadOnly(
      const DBOptions& db_options, const std::string& name,
      const std::vector<ColumnFamilyDescriptor>& column_families,
      std::vector<ColumnFamilyHandle*>* handles, DB** dbptr,
      bool error_if_log_file_exist = false);

188 189
  // Open DB with column families.
  // db_options specify database specific options
J
Jonah Cohen 已提交
190
  // column_families is the vector of all column families in the database,
191 192 193 194 195 196
  // containing column family name and options. You need to open ALL column
  // families in the database. To get the list of column families, you can use
  // ListColumnFamilies(). Also, you can open only a subset of column families
  // for read-only access.
  // The default column family name is 'default' and it's stored
  // in rocksdb::kDefaultColumnFamilyName.
197 198
  // If everything is OK, handles will on return be the same size
  // as column_families --- handles[i] will be a handle that you
199 200 201
  // will use to operate on column family column_family[i].
  // Before delete DB, you have to close All column families by calling
  // DestroyColumnFamilyHandle() with all the handles.
202 203
  static Status Open(const DBOptions& db_options, const std::string& name,
                     const std::vector<ColumnFamilyDescriptor>& column_families,
204
                     std::vector<ColumnFamilyHandle*>* handles, DB** dbptr);
205

206 207
  virtual Status Resume() { return Status::NotSupported(); }

208
  // Close the DB by releasing resources, closing files etc. This should be
209
  // called before calling the destructor so that the caller can get back a
210
  // status in case there are any errors. This will not fsync the WAL files.
S
Siying Dong 已提交
211 212
  // If syncing is required, the caller must first call SyncWAL(), or Write()
  // using an empty write batch with WriteOptions.sync=true.
213 214 215 216
  // Regardless of the return status, the DB must be freed. If the return
  // status is NotSupported(), then the DB implementation does cleanup in the
  // destructor
  virtual Status Close() { return Status::NotSupported(); }
217

218 219 220 221
  // ListColumnFamilies will open the DB specified by argument name
  // and return the list of all column families in that DB
  // through column_families argument. The ordering of
  // column families in column_families is unspecified.
222 223 224
  static Status ListColumnFamilies(const DBOptions& db_options,
                                   const std::string& name,
                                   std::vector<std::string>* column_families);
225

赵明 已提交
226
  DB() {}
J
jorlow@chromium.org 已提交
227 228
  virtual ~DB();

229 230 231
  // Create a column_family and return the handle of column family
  // through the argument handle.
  virtual Status CreateColumnFamily(const ColumnFamilyOptions& options,
232
                                    const std::string& column_family_name,
233
                                    ColumnFamilyHandle** handle);
234

Y
Yi Wu 已提交
235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253
  // Bulk create column families with the same column family options.
  // Return the handles of the column families through the argument handles.
  // In case of error, the request may succeed partially, and handles will
  // contain column family handles that it managed to create, and have size
  // equal to the number of created column families.
  virtual Status CreateColumnFamilies(
      const ColumnFamilyOptions& options,
      const std::vector<std::string>& column_family_names,
      std::vector<ColumnFamilyHandle*>* handles);

  // Bulk create column families.
  // Return the handles of the column families through the argument handles.
  // In case of error, the request may succeed partially, and handles will
  // contain column family handles that it managed to create, and have size
  // equal to the number of created column families.
  virtual Status CreateColumnFamilies(
      const std::vector<ColumnFamilyDescriptor>& column_families,
      std::vector<ColumnFamilyHandle*>* handles);

254 255 256 257
  // Drop a column family specified by column_family handle. This call
  // only records a drop record in the manifest and prevents the column
  // family from flushing and compacting.
  virtual Status DropColumnFamily(ColumnFamilyHandle* column_family);
Y
Yi Wu 已提交
258 259 260 261 262 263 264 265

  // Bulk drop column families. This call only records drop records in the
  // manifest and prevents the column families from flushing and compacting.
  // In case of error, the request may succeed partially. User may call
  // ListColumnFamilies to check the result.
  virtual Status DropColumnFamilies(
      const std::vector<ColumnFamilyHandle*>& column_families);

266 267 268 269 270
  // Close a column family specified by column_family handle and destroy
  // the column family handle specified to avoid double deletion. This call
  // deletes the column family handle by default. Use this method to
  // close column family instead of deleting column family handle directly
  virtual Status DestroyColumnFamilyHandle(ColumnFamilyHandle* column_family);
271

272
  // Set the database entry for "key" to "value".
273
  // If "key" already exists, it will be overwritten.
274
  // Returns OK on success, and a non-OK status on error.
275
  // Note: consider setting options.sync = true.
J
jorlow@chromium.org 已提交
276
  virtual Status Put(const WriteOptions& options,
277
                     ColumnFamilyHandle* column_family, const Slice& key,
J
jorlow@chromium.org 已提交
278
                     const Slice& value) = 0;
279 280
  virtual Status Put(const WriteOptions& options, const Slice& key,
                     const Slice& value) {
281
    return Put(options, DefaultColumnFamily(), key, value);
282
  }
J
jorlow@chromium.org 已提交
283 284 285 286

  // Remove the database entry (if any) for "key".  Returns OK on
  // success, and a non-OK status on error.  It is not an error if "key"
  // did not exist in the database.
287
  // Note: consider setting options.sync = true.
288
  virtual Status Delete(const WriteOptions& options,
289
                        ColumnFamilyHandle* column_family,
290
                        const Slice& key) = 0;
291
  virtual Status Delete(const WriteOptions& options, const Slice& key) {
292
    return Delete(options, DefaultColumnFamily(), key);
293
  }
J
jorlow@chromium.org 已提交
294

A
Andres Noetzli 已提交
295 296 297
  // Remove the database entry for "key". Requires that the key exists
  // and was not overwritten. Returns OK on success, and a non-OK status
  // on error.  It is not an error if "key" did not exist in the database.
A
agiardullo 已提交
298 299 300 301 302 303 304 305 306 307 308 309
  //
  // If a key is overwritten (by calling Put() multiple times), then the result
  // of calling SingleDelete() on this key is undefined.  SingleDelete() only
  // behaves correctly if there has been only one Put() for this key since the
  // previous call to SingleDelete() for this key.
  //
  // This feature is currently an experimental performance optimization
  // for a very specific workload.  It is up to the caller to ensure that
  // SingleDelete is only used for a key that is not deleted using Delete() or
  // written using Merge().  Mixing SingleDelete operations with Deletes and
  // Merges can result in undefined behavior.
  //
A
Andres Noetzli 已提交
310 311 312 313 314 315 316 317
  // Note: consider setting options.sync = true.
  virtual Status SingleDelete(const WriteOptions& options,
                              ColumnFamilyHandle* column_family,
                              const Slice& key) = 0;
  virtual Status SingleDelete(const WriteOptions& options, const Slice& key) {
    return SingleDelete(options, DefaultColumnFamily(), key);
  }

A
Andrew Kryczka 已提交
318 319 320 321 322
  // Removes the database entries in the range ["begin_key", "end_key"), i.e.,
  // including "begin_key" and excluding "end_key". Returns OK on success, and
  // a non-OK status on error. It is not an error if no keys exist in the range
  // ["begin_key", "end_key").
  //
323 324 325 326 327 328
  // This feature is now usable in production, with the following caveats:
  // 1) Accumulating many range tombstones in the memtable will degrade read
  // performance; this can be avoided by manually flushing occasionally.
  // 2) Limiting the maximum number of open files in the presence of range
  // tombstones can degrade read performance. To avoid this problem, set
  // max_open_files to -1 whenever possible.
A
Andrew Kryczka 已提交
329 330 331 332
  virtual Status DeleteRange(const WriteOptions& options,
                             ColumnFamilyHandle* column_family,
                             const Slice& begin_key, const Slice& end_key);

333 334 335 336 337
  // Merge the database entry for "key" with "value".  Returns OK on success,
  // and a non-OK status on error. The semantics of this operation is
  // determined by the user provided merge_operator when opening DB.
  // Note: consider setting options.sync = true.
  virtual Status Merge(const WriteOptions& options,
338 339
                       ColumnFamilyHandle* column_family, const Slice& key,
                       const Slice& value) = 0;
340 341
  virtual Status Merge(const WriteOptions& options, const Slice& key,
                       const Slice& value) {
342
    return Merge(options, DefaultColumnFamily(), key, value);
343
  }
344

J
jorlow@chromium.org 已提交
345
  // Apply the specified updates to the database.
346 347
  // If `updates` contains no update, WAL will still be synced if
  // options.sync=true.
J
jorlow@chromium.org 已提交
348
  // Returns OK on success, non-OK on failure.
349
  // Note: consider setting options.sync = true.
J
jorlow@chromium.org 已提交
350 351 352 353 354 355 356 357 358
  virtual Status Write(const WriteOptions& options, WriteBatch* updates) = 0;

  // If the database contains an entry for "key" store the
  // corresponding value in *value and return OK.
  //
  // If there is no entry for "key" leave *value unchanged and return
  // a status for which Status::IsNotFound() returns true.
  //
  // May return some other Status on an error.
359 360 361
  virtual inline Status Get(const ReadOptions& options,
                            ColumnFamilyHandle* column_family, const Slice& key,
                            std::string* value) {
M
Maysam Yabandeh 已提交
362
    assert(value != nullptr);
Z
ZhaoMing 已提交
363
    LazyBuffer lazy_val(value);
Z
ZhaoMing 已提交
364 365
    auto s = Get(options, column_family, key, &lazy_val);
    if (s.ok()) {
Z
ZhaoMing 已提交
366
      s = std::move(lazy_val).dump(value);
Z
ZhaoMing 已提交
367
    }
M
Maysam Yabandeh 已提交
368 369
    return s;
  }
J
jorlow@chromium.org 已提交
370
  virtual Status Get(const ReadOptions& options,
371
                     ColumnFamilyHandle* column_family, const Slice& key,
Z
ZhaoMing 已提交
372
                     LazyBuffer* value) = 0;
赵明 已提交
373 374
  virtual Status Get(const ReadOptions& options, const Slice& key,
                     std::string* value) {
375
    return Get(options, DefaultColumnFamily(), key, value);
376
  }
J
jorlow@chromium.org 已提交
377

L
leipeng 已提交
378
  static void CallOnMainStack(const std::function<void()>&);
L
leipeng 已提交
379 380 381
  static void SubmitAsyncTask(std::function<void()>);
  static void SubmitAsyncTask(std::function<void()>, size_t concurrency);
  static bool TrySubmitAsyncTask(const std::function<void()>&);
Z
ZhaoMing 已提交
382 383 384
  static bool TrySubmitAsyncTask(const std::function<void()>&,
                                 size_t concurrency);

赵明 已提交
385 386
  typedef std::function<void(Status&&, std::string&& key, std::string* value)>
      GetAsyncCallback;
Z
ZhaoMing 已提交
387 388 389 390 391 392 393

  void GetAsync(const ReadOptions&, ColumnFamilyHandle*, std::string key,
                std::string* value, GetAsyncCallback);
  void GetAsync(const ReadOptions&, std::string key, std::string* value,
                GetAsyncCallback);
  void GetAsync(const ReadOptions&, ColumnFamilyHandle*, std::string key,
                GetAsyncCallback);
394
  void GetAsync(const ReadOptions&, std::string key, GetAsyncCallback);
Z
ZhaoMing 已提交
395

L
leipeng 已提交
396 397
  static int WaitAsync(int timeout_us);
  static int WaitAsync();
Z
ZhaoMing 已提交
398

赵明 已提交
399 400 401
  future<std::tuple<Status, std::string, std::string*>> GetFuture(
      const ReadOptions&, ColumnFamilyHandle*, std::string key,
      std::string* value);
Z
ZhaoMing 已提交
402

赵明 已提交
403 404
  future<std::tuple<Status, std::string, std::string*>> GetFuture(
      const ReadOptions&, std::string key, std::string* value);
Z
ZhaoMing 已提交
405

赵明 已提交
406 407
  future<std::tuple<Status, std::string, std::string>> GetFuture(
      const ReadOptions&, ColumnFamilyHandle*, std::string key);
Z
ZhaoMing 已提交
408

赵明 已提交
409 410
  future<std::tuple<Status, std::string, std::string>> GetFuture(
      const ReadOptions&, std::string key);
L
leipeng 已提交
411

412 413 414 415 416 417 418 419 420 421
  // If keys[i] does not exist in the database, then the i'th returned
  // status will be one for which Status::IsNotFound() is true, and
  // (*values)[i] will be set to some arbitrary value (often ""). Otherwise,
  // the i'th returned status will have Status::ok() true, and (*values)[i]
  // will store the value associated with keys[i].
  //
  // (*values) will always be resized to be the same size as (keys).
  // Similarly, the number of returned statuses will be the number of keys.
  // Note: keys will not be "de-duplicated". Duplicate keys will return
  // duplicate values in order.
422 423
  virtual std::vector<Status> MultiGet(
      const ReadOptions& options,
424
      const std::vector<ColumnFamilyHandle*>& column_family,
425
      const std::vector<Slice>& keys, std::vector<std::string>* values) = 0;
426 427 428
  virtual std::vector<Status> MultiGet(const ReadOptions& options,
                                       const std::vector<Slice>& keys,
                                       std::vector<std::string>* values) {
赵明 已提交
429 430 431 432
    return MultiGet(
        options,
        std::vector<ColumnFamilyHandle*>(keys.size(), DefaultColumnFamily()),
        keys, values);
433
  }
434

435
  // If the key definitely does not exist in the database, then this method
436 437 438 439 440 441
  // returns false, else true. If the caller wants to obtain value when the key
  // is found in memory, a bool for 'value_found' must be passed. 'value_found'
  // will be true on return if value has been set properly.
  // This check is potentially lighter-weight than invoking DB::Get(). One way
  // to make this lighter weight is to avoid doing any IOs.
  // Default implementation here returns true and sets 'value_found' to false
A
Alex Loukissas 已提交
442 443 444 445
  virtual bool KeyMayExist(const ReadOptions& /*options*/,
                           ColumnFamilyHandle* /*column_family*/,
                           const Slice& /*key*/, std::string* /*value*/,
                           bool* value_found = nullptr) {
446 447 448
    if (value_found != nullptr) {
      *value_found = false;
    }
449 450
    return true;
  }
451 452
  virtual bool KeyMayExist(const ReadOptions& options, const Slice& key,
                           std::string* value, bool* value_found = nullptr) {
453
    return KeyMayExist(options, DefaultColumnFamily(), key, value, value_found);
454
  }
455

J
jorlow@chromium.org 已提交
456 457 458 459 460 461
  // Return a heap-allocated iterator over the contents of the database.
  // The result of NewIterator() is initially invalid (caller must
  // call one of the Seek methods on the iterator before using it).
  //
  // Caller should delete the iterator when it is no longer needed.
  // The returned iterator should be deleted before this db is deleted.
462
  virtual Iterator* NewIterator(const ReadOptions& options,
463
                                ColumnFamilyHandle* column_family) = 0;
464
  virtual Iterator* NewIterator(const ReadOptions& options) {
465
    return NewIterator(options, DefaultColumnFamily());
466 467 468 469 470 471
  }
  // Returns iterators from a consistent database state across multiple
  // column families. Iterators are heap allocated and need to be deleted
  // before the db is deleted
  virtual Status NewIterators(
      const ReadOptions& options,
I
Igor Canadi 已提交
472
      const std::vector<ColumnFamilyHandle*>& column_families,
473
      std::vector<Iterator*>* iterators) = 0;
J
jorlow@chromium.org 已提交
474 475 476 477 478

  // Return a handle to the current DB state.  Iterators created with
  // this handle will all observe a stable snapshot of the current DB
  // state.  The caller must call ReleaseSnapshot(result) when the
  // snapshot is no longer needed.
479 480 481
  //
  // nullptr will be returned if the DB fails to take a snapshot or does
  // not support snapshot.
J
jorlow@chromium.org 已提交
482 483 484 485 486 487
  virtual const Snapshot* GetSnapshot() = 0;

  // Release a previously acquired snapshot.  The caller must not
  // use "snapshot" after this call.
  virtual void ReleaseSnapshot(const Snapshot* snapshot) = 0;

488
#ifndef ROCKSDB_LITE
489
  // Contains all valid property arguments for GetProperty().
490 491 492
  //
  // NOTE: Property names cannot end in numbers since those are interpreted as
  //       arguments, e.g., see kNumFilesAtLevelPrefix.
493
  struct Properties {
494 495 496
    //  "rocksdb.num-files-at-level<N>" - returns string containing the number
    //      of files at level <N>, where <N> is an ASCII representation of a
    //      level number (e.g., "0").
497
    static const std::string kNumFilesAtLevelPrefix;
498

499 500 501 502 503 504 505
    //  "rocksdb.compression-ratio-at-level<N>" - returns string containing the
    //      compression ratio of data at level <N>, where <N> is an ASCII
    //      representation of a level number (e.g., "0"). Here, compression
    //      ratio is defined as uncompressed data size / compressed file size.
    //      Returns "-1.0" if no open files at level <N>.
    static const std::string kCompressionRatioAtLevelPrefix;

506 507
    //  "rocksdb.stats" - returns a multi-line string containing the data
    //      described by kCFStats followed by the data described by kDBStats.
508
    static const std::string kStats;
509 510 511

    //  "rocksdb.sstables" - returns a multi-line string summarizing current
    //      SST files.
512
    static const std::string kSSTables;
513

514 515 516 517 518 519 520 521 522
    //  "rocksdb.cfstats" - Both of "rocksdb.cfstats-no-file-histogram" and
    //      "rocksdb.cf-file-histogram" together. See below for description
    //      of the two.
    static const std::string kCFStats;

    //  "rocksdb.cfstats-no-file-histogram" - returns a multi-line string with
    //      general columm family stats per-level over db's lifetime ("L<n>"),
    //      aggregated over db's lifetime ("Sum"), and aggregated over the
    //      interval since the last retrieval ("Int").
523 524 525
    //  It could also be used to return the stats in the format of the map.
    //  In this case there will a pair of string to array of double for
    //  each level as well as for "Sum". "Int" stats will not be affected
Y
yizhu.sun 已提交
526
    //  when this form of stats are retrieved.
527 528 529 530 531
    static const std::string kCFStatsNoFileHistogram;

    //  "rocksdb.cf-file-histogram" - print out how many file reads to every
    //      level, as well as the histogram of latency of single requests.
    static const std::string kCFFileHistogram;
532 533 534 535

    //  "rocksdb.dbstats" - returns a multi-line string with general database
    //      stats, both cumulative (over the db's lifetime) and interval (since
    //      the last retrieval of kDBStats).
536
    static const std::string kDBStats;
537 538 539

    //  "rocksdb.levelstats" - returns multi-line string containing the number
    //      of files per level and total size of each level (MB).
540
    static const std::string kLevelStats;
541 542 543

    //  "rocksdb.num-immutable-mem-table" - returns number of immutable
    //      memtables that have not yet been flushed.
544
    static const std::string kNumImmutableMemTable;
545 546 547

    //  "rocksdb.num-immutable-mem-table-flushed" - returns number of immutable
    //      memtables that have already been flushed.
548
    static const std::string kNumImmutableMemTableFlushed;
549 550 551

    //  "rocksdb.mem-table-flush-pending" - returns 1 if a memtable flush is
    //      pending; otherwise, returns 0.
552
    static const std::string kMemTableFlushPending;
553 554 555

    //  "rocksdb.num-running-flushes" - returns the number of currently running
    //      flushes.
556
    static const std::string kNumRunningFlushes;
557 558 559

    //  "rocksdb.compaction-pending" - returns 1 if at least one compaction is
    //      pending; otherwise, returns 0.
560
    static const std::string kCompactionPending;
561 562 563

    //  "rocksdb.num-running-compactions" - returns the number of currently
    //      running compactions.
564
    static const std::string kNumRunningCompactions;
565 566 567

    //  "rocksdb.background-errors" - returns accumulated number of background
    //      errors.
568
    static const std::string kBackgroundErrors;
569 570 571

    //  "rocksdb.cur-size-active-mem-table" - returns approximate size of active
    //      memtable (bytes).
572
    static const std::string kCurSizeActiveMemTable;
573 574 575

    //  "rocksdb.cur-size-all-mem-tables" - returns approximate size of active
    //      and unflushed immutable memtables (bytes).
576
    static const std::string kCurSizeAllMemTables;
577 578 579

    //  "rocksdb.size-all-mem-tables" - returns approximate size of active,
    //      unflushed immutable, and pinned immutable memtables (bytes).
580
    static const std::string kSizeAllMemTables;
581 582 583

    //  "rocksdb.num-entries-active-mem-table" - returns total number of entries
    //      in the active memtable.
584
    static const std::string kNumEntriesActiveMemTable;
585 586 587

    //  "rocksdb.num-entries-imm-mem-tables" - returns total number of entries
    //      in the unflushed immutable memtables.
588
    static const std::string kNumEntriesImmMemTables;
589 590 591

    //  "rocksdb.num-deletes-active-mem-table" - returns total number of delete
    //      entries in the active memtable.
592
    static const std::string kNumDeletesActiveMemTable;
593 594 595

    //  "rocksdb.num-deletes-imm-mem-tables" - returns total number of delete
    //      entries in the unflushed immutable memtables.
596
    static const std::string kNumDeletesImmMemTables;
597 598

    //  "rocksdb.estimate-num-keys" - returns estimated number of total keys in
O
oranagra 已提交
599
    //      the active and unflushed immutable memtables and storage.
600
    static const std::string kEstimateNumKeys;
601 602 603 604

    //  "rocksdb.estimate-table-readers-mem" - returns estimated memory used for
    //      reading SST tables, excluding memory used in block cache (e.g.,
    //      filter and index blocks).
605
    static const std::string kEstimateTableReadersMem;
606 607 608

    //  "rocksdb.is-file-deletions-enabled" - returns 0 if deletion of obsolete
    //      files is enabled; otherwise, returns a non-zero number.
609
    static const std::string kIsFileDeletionsEnabled;
610 611 612

    //  "rocksdb.num-snapshots" - returns number of unreleased snapshots of the
    //      database.
613
    static const std::string kNumSnapshots;
614 615 616

    //  "rocksdb.oldest-snapshot-time" - returns number representing unix
    //      timestamp of oldest unreleased snapshot.
617
    static const std::string kOldestSnapshotTime;
618 619 620 621 622

    //  "rocksdb.num-live-versions" - returns number of live versions. `Version`
    //      is an internal data structure. See version_set.h for details. More
    //      live versions often mean more SST files are held from being deleted,
    //      by iterators or unfinished compactions.
623
    static const std::string kNumLiveVersions;
624

Y
yizhu.sun 已提交
625
    //  "rocksdb.current-super-version-number" - returns number of current LSM
626 627 628 629
    //  version. It is a uint64_t integer number, incremented after there is
    //  any change to the LSM tree. The number is not preserved after restarting
    //  the DB. After DB restart, it will start from 0 again.
    static const std::string kCurrentSuperVersionNumber;
630

631 632
    //  "rocksdb.estimate-live-data-size" - returns an estimate of the amount of
    //      live data in bytes.
A
Andres Notzli 已提交
633
    static const std::string kEstimateLiveDataSize;
634

Y
yizhu.sun 已提交
635
    //  "rocksdb.min-log-number-to-keep" - return the minimum log number of the
636 637 638
    //      log files that should be kept.
    static const std::string kMinLogNumberToKeep;

639 640 641 642 643
    //  "rocksdb.min-obsolete-sst-number-to-keep" - return the minimum file
    //      number for an obsolete SST to be kept. The max value of `uint64_t`
    //      will be returned if all obsolete files can be deleted.
    static const std::string kMinObsoleteSstNumberToKeep;

644 645 646
    //  "rocksdb.total-sst-files-size" - returns total size (bytes) of all SST
    //      files.
    //  WARNING: may slow down online queries if there are too many files.
647
    static const std::string kTotalSstFilesSize;
648

649 650 651 652
    //  "rocksdb.live-sst-files-size" - returns total size (bytes) of all SST
    //      files belong to the latest LSM tree.
    static const std::string kLiveSstFilesSize;

653 654 655 656 657 658 659 660
    //  "rocksdb.base-level" - returns number of level to which L0 data will be
    //      compacted.
    static const std::string kBaseLevel;

    //  "rocksdb.estimate-pending-compaction-bytes" - returns estimated total
    //      number of bytes compaction needs to rewrite to get all levels down
    //      to under target size. Not valid for other compactions than level-
    //      based.
661
    static const std::string kEstimatePendingCompactionBytes;
662 663 664

    //  "rocksdb.aggregated-table-properties" - returns a string representation
    //      of the aggregated table properties of the target column family.
665
    static const std::string kAggregatedTableProperties;
666 667 668 669

    //  "rocksdb.aggregated-table-properties-at-level<N>", same as the previous
    //      one but only returns the aggregated table properties of the
    //      specified level "N" at the target column family.
670
    static const std::string kAggregatedTablePropertiesAtLevel;
671 672 673 674 675 676 677

    //  "rocksdb.actual-delayed-write-rate" - returns the current actual delayed
    //      write rate. 0 means no delay.
    static const std::string kActualDelayedWriteRate;

    //  "rocksdb.is-write-stopped" - Return 1 if write has been stopped.
    static const std::string kIsWriteStopped;
Y
Yi Wu 已提交
678 679 680 681 682 683

    //  "rocksdb.estimate-oldest-key-time" - returns an estimation of
    //      oldest key timestamp in the DB. Currently only available for
    //      FIFO compaction with
    //      compaction_options_fifo.allow_compaction = false.
    static const std::string kEstimateOldestKeyTime;
Y
Yi Wu 已提交
684 685 686 687 688 689 690 691 692 693 694

    //  "rocksdb.block-cache-capacity" - returns block cache capacity.
    static const std::string kBlockCacheCapacity;

    //  "rocksdb.block-cache-usage" - returns the memory size for the entries
    //      residing in block cache.
    static const std::string kBlockCacheUsage;

    // "rocksdb.block-cache-pinned-usage" - returns the memory size for the
    //      entries being pinned.
    static const std::string kBlockCachePinnedUsage;
695 696 697 698

    // "rocksdb.options-statistics" - returns multi-line string
    //      of options.statistics
    static const std::string kOptionsStatistics;
699 700 701
  };
#endif /* ROCKSDB_LITE */

702 703 704 705
  // DB implementations can export properties about their state via this method.
  // If "property" is a valid property understood by this DB implementation (see
  // Properties struct above for valid options), fills "*value" with its current
  // value and returns true.  Otherwise, returns false.
706
  virtual bool GetProperty(ColumnFamilyHandle* column_family,
707
                           const Slice& property, std::string* value) = 0;
708
  virtual bool GetProperty(const Slice& property, std::string* value) {
709
    return GetProperty(DefaultColumnFamily(), property, value);
710
  }
711 712
  virtual bool GetMapProperty(ColumnFamilyHandle* column_family,
                              const Slice& property,
713
                              std::map<std::string, std::string>* value) = 0;
714
  virtual bool GetMapProperty(const Slice& property,
715
                              std::map<std::string, std::string>* value) {
716 717
    return GetMapProperty(DefaultColumnFamily(), property, value);
  }
J
jorlow@chromium.org 已提交
718

719
  // Similar to GetProperty(), but only works for a subset of properties whose
720 721 722 723 724 725 726 727
  // return value is an integer. Return the value by integer. Supported
  // properties:
  //  "rocksdb.num-immutable-mem-table"
  //  "rocksdb.mem-table-flush-pending"
  //  "rocksdb.compaction-pending"
  //  "rocksdb.background-errors"
  //  "rocksdb.cur-size-active-mem-table"
  //  "rocksdb.cur-size-all-mem-tables"
728
  //  "rocksdb.size-all-mem-tables"
729 730
  //  "rocksdb.num-entries-active-mem-table"
  //  "rocksdb.num-entries-imm-mem-tables"
731 732
  //  "rocksdb.num-deletes-active-mem-table"
  //  "rocksdb.num-deletes-imm-mem-tables"
733 734 735 736 737
  //  "rocksdb.estimate-num-keys"
  //  "rocksdb.estimate-table-readers-mem"
  //  "rocksdb.is-file-deletions-enabled"
  //  "rocksdb.num-snapshots"
  //  "rocksdb.oldest-snapshot-time"
738
  //  "rocksdb.num-live-versions"
739
  //  "rocksdb.current-super-version-number"
A
Andres Notzli 已提交
740
  //  "rocksdb.estimate-live-data-size"
741
  //  "rocksdb.min-log-number-to-keep"
742
  //  "rocksdb.min-obsolete-sst-number-to-keep"
743
  //  "rocksdb.total-sst-files-size"
744
  //  "rocksdb.live-sst-files-size"
745 746
  //  "rocksdb.base-level"
  //  "rocksdb.estimate-pending-compaction-bytes"
747 748
  //  "rocksdb.num-running-compactions"
  //  "rocksdb.num-running-flushes"
749 750
  //  "rocksdb.actual-delayed-write-rate"
  //  "rocksdb.is-write-stopped"
Y
Yi Wu 已提交
751
  //  "rocksdb.estimate-oldest-key-time"
Y
Yi Wu 已提交
752 753 754
  //  "rocksdb.block-cache-capacity"
  //  "rocksdb.block-cache-usage"
  //  "rocksdb.block-cache-pinned-usage"
755 756 757 758 759 760
  virtual bool GetIntProperty(ColumnFamilyHandle* column_family,
                              const Slice& property, uint64_t* value) = 0;
  virtual bool GetIntProperty(const Slice& property, uint64_t* value) {
    return GetIntProperty(DefaultColumnFamily(), property, value);
  }

S
Siying Dong 已提交
761 762 763 764 765 766 767
  // Reset internal stats for DB and all column families.
  // Note this doesn't reset options.statistics as it is not owned by
  // DB.
  virtual Status ResetStats() {
    return Status::NotSupported("Not implemented");
  }

768 769 770 771 772
  // Same as GetIntProperty(), but this one returns the aggregated int
  // property from all column families.
  virtual bool GetAggregatedIntProperty(const Slice& property,
                                        uint64_t* value) = 0;

773
  // Flags for DB::GetSizeApproximation that specify whether memtable
774 775 776 777 778 779 780
  // stats should be included, or file stats approximation or both
  enum SizeApproximationFlags : uint8_t {
    NONE = 0,
    INCLUDE_MEMTABLES = 1,
    INCLUDE_FILES = 1 << 1
  };

J
jorlow@chromium.org 已提交
781 782 783 784 785 786 787
  // For each i in [0,n-1], store in "sizes[i]", the approximate
  // file system space used by keys in "[range[i].start .. range[i].limit)".
  //
  // Note that the returned sizes measure file system space usage, so
  // if the user data compresses by a factor of ten, the returned
  // sizes will be one-tenth the size of the corresponding user data size.
  //
788 789 790 791
  // If include_flags defines whether the returned size should include
  // the recently written data in the mem-tables (if
  // the mem-table type supports it), data serialized to disk, or both.
  // include_flags should be of type DB::SizeApproximationFlags
792
  virtual void GetApproximateSizes(ColumnFamilyHandle* column_family,
793
                                   const Range* range, int n, uint64_t* sizes,
赵明 已提交
794
                                   uint8_t include_flags = INCLUDE_FILES) = 0;
795
  virtual void GetApproximateSizes(const Range* range, int n, uint64_t* sizes,
赵明 已提交
796 797
                                   uint8_t include_flags = INCLUDE_FILES) {
    GetApproximateSizes(DefaultColumnFamily(), range, n, sizes, include_flags);
798 799
  }

800 801 802 803 804 805 806 807 808 809 810 811
  // The method is similar to GetApproximateSizes, except it
  // returns approximate number of records in memtables.
  virtual void GetApproximateMemTableStats(ColumnFamilyHandle* column_family,
                                           const Range& range,
                                           uint64_t* const count,
                                           uint64_t* const size) = 0;
  virtual void GetApproximateMemTableStats(const Range& range,
                                           uint64_t* const count,
                                           uint64_t* const size) {
    GetApproximateMemTableStats(DefaultColumnFamily(), range, count, size);
  }

812 813
  // Deprecated versions of GetApproximateSizes
  ROCKSDB_DEPRECATED_FUNC virtual void GetApproximateSizes(
赵明 已提交
814
      const Range* range, int n, uint64_t* sizes, bool include_memtable) {
815 816 817 818 819 820 821
    uint8_t include_flags = SizeApproximationFlags::INCLUDE_FILES;
    if (include_memtable) {
      include_flags |= SizeApproximationFlags::INCLUDE_MEMTABLES;
    }
    GetApproximateSizes(DefaultColumnFamily(), range, n, sizes, include_flags);
  }
  ROCKSDB_DEPRECATED_FUNC virtual void GetApproximateSizes(
赵明 已提交
822 823
      ColumnFamilyHandle* column_family, const Range* range, int n,
      uint64_t* sizes, bool include_memtable) {
824 825 826 827 828
    uint8_t include_flags = SizeApproximationFlags::INCLUDE_FILES;
    if (include_memtable) {
      include_flags |= SizeApproximationFlags::INCLUDE_MEMTABLES;
    }
    GetApproximateSizes(column_family, range, n, sizes, include_flags);
829
  }
J
jorlow@chromium.org 已提交
830

G
Gabor Cselle 已提交
831
  // Compact the underlying storage for the key range [*begin,*end].
832
  // The actual compaction interval might be superset of [*begin, *end].
G
Gabor Cselle 已提交
833 834 835 836 837
  // In particular, deleted and overwritten versions are discarded,
  // and the data is rearranged to reduce the cost of operations
  // needed to access the data.  This operation should typically only
  // be invoked by users who understand the underlying implementation.
  //
A
Abhishek Kona 已提交
838 839
  // begin==nullptr is treated as a key before all keys in the database.
  // end==nullptr is treated as a key after all keys in the database.
G
Gabor Cselle 已提交
840
  // Therefore the following call will compact the entire database:
841
  //    db->CompactRange(options, nullptr, nullptr);
842
  // Note that after the entire database is compacted, all data are pushed
843 844 845 846 847 848 849 850 851 852 853
  // down to the last level containing any data. If the total data size after
  // compaction is reduced, that level might not be appropriate for hosting all
  // the files. In this case, client could set options.change_level to true, to
  // move the files back to the minimum level capable of holding the data set
  // or a given level (specified by non-negative options.target_level).
  virtual Status CompactRange(const CompactRangeOptions& options,
                              ColumnFamilyHandle* column_family,
                              const Slice* begin, const Slice* end) = 0;
  virtual Status CompactRange(const CompactRangeOptions& options,
                              const Slice* begin, const Slice* end) {
    return CompactRange(options, DefaultColumnFamily(), begin, end);
854
  }
855

856 857 858 859 860 861 862 863 864 865 866 867 868 869 870 871 872 873 874 875 876
  ROCKSDB_DEPRECATED_FUNC virtual Status CompactRange(
      ColumnFamilyHandle* column_family, const Slice* begin, const Slice* end,
      bool change_level = false, int target_level = -1,
      uint32_t target_path_id = 0) {
    CompactRangeOptions options;
    options.change_level = change_level;
    options.target_level = target_level;
    options.target_path_id = target_path_id;
    return CompactRange(options, column_family, begin, end);
  }

  ROCKSDB_DEPRECATED_FUNC virtual Status CompactRange(
      const Slice* begin, const Slice* end, bool change_level = false,
      int target_level = -1, uint32_t target_path_id = 0) {
    CompactRangeOptions options;
    options.change_level = change_level;
    options.target_level = target_level;
    options.target_path_id = target_path_id;
    return CompactRange(options, DefaultColumnFamily(), begin, end);
  }

S
sdong 已提交
877 878
  virtual Status SetOptions(
      ColumnFamilyHandle* /*column_family*/,
A
Alex Loukissas 已提交
879
      const std::unordered_map<std::string, std::string>& /*new_options*/) {
880
    return Status::NotSupported("Not implemented");
881
  }
882
  virtual Status SetOptions(
883 884 885
      const std::unordered_map<std::string, std::string>& new_options) {
    return SetOptions(DefaultColumnFamily(), new_options);
  }
J
jorlow@chromium.org 已提交
886

887 888 889
  virtual Status SetDBOptions(
      const std::unordered_map<std::string, std::string>& new_options) = 0;

A
Andres Notzli 已提交
890 891 892 893
  // CompactFiles() inputs a list of files specified by file numbers and
  // compacts them to the specified level. Note that the behavior is different
  // from CompactRange() in that CompactFiles() performs the compaction job
  // using the CURRENT thread.
894 895 896 897 898 899
  //
  // @see GetDataBaseMetaData
  // @see GetColumnFamilyMetaData
  virtual Status CompactFiles(
      const CompactionOptions& compact_options,
      ColumnFamilyHandle* column_family,
赵明 已提交
900 901
      const std::vector<std::string>& input_file_names, const int output_level,
      const int output_path_id = -1,
902
      std::vector<std::string>* const output_file_names = nullptr) = 0;
903 904 905

  virtual Status CompactFiles(
      const CompactionOptions& compact_options,
赵明 已提交
906 907
      const std::vector<std::string>& input_file_names, const int output_level,
      const int output_path_id = -1,
908
      std::vector<std::string>* const output_file_names = nullptr) {
909
    return CompactFiles(compact_options, DefaultColumnFamily(),
910 911
                        input_file_names, output_level, output_path_id,
                        output_file_names);
912
  }
913 914 915

  // This function will wait until all currently running background processes
  // finish. After it returns, no background process will be run until
S
Sagar Vemuri 已提交
916
  // ContinueBackgroundWork is called
917 918 919
  virtual Status PauseBackgroundWork() = 0;
  virtual Status ContinueBackgroundWork() = 0;

920
  // This function will enable automatic compactions for the given column
921 922 923 924 925 926 927 928
  // families if they were previously disabled. The function will first set the
  // disable_auto_compactions option for each column family to 'false', after
  // which it will schedule a flush/compaction.
  //
  // NOTE: Setting disable_auto_compactions to 'false' through SetOptions() API
  // does NOT schedule a flush/compaction afterwards, and only changes the
  // parameter itself within the column family option.
  //
929 930 931
  virtual Status EnableAutoCompaction(
      const std::vector<ColumnFamilyHandle*>& column_family_handles) = 0;

932
  // Number of levels used for this DB.
933
  virtual int NumberLevels(ColumnFamilyHandle* column_family) = 0;
934
  virtual int NumberLevels() { return NumberLevels(DefaultColumnFamily()); }
935 936 937

  // Maximum level to which a new compacted memtable is pushed if it
  // does not create overlap.
938
  virtual int MaxMemCompactionLevel(ColumnFamilyHandle* column_family) = 0;
939
  virtual int MaxMemCompactionLevel() {
940
    return MaxMemCompactionLevel(DefaultColumnFamily());
941
  }
942 943

  // Number of files in level-0 that would stop writes.
944
  virtual int Level0StopWriteTrigger(ColumnFamilyHandle* column_family) = 0;
945
  virtual int Level0StopWriteTrigger() {
946
    return Level0StopWriteTrigger(DefaultColumnFamily());
947
  }
948

I
Igor Canadi 已提交
949 950 951 952
  // Get DB name -- the exact same name that was provided as an argument to
  // DB::Open()
  virtual const std::string& GetName() const = 0;

953 954 955
  // Get Env object from the DB
  virtual Env* GetEnv() const = 0;

956 957 958 959
  // Get DB Options that we use.  During the process of opening the
  // column family, the options provided when calling DB::Open() or
  // DB::CreateColumnFamily() will have been "sanitized" and transformed
  // in an implementation-defined manner.
960 961
  virtual Options GetOptions(ColumnFamilyHandle* column_family) const = 0;
  virtual Options GetOptions() const {
962
    return GetOptions(DefaultColumnFamily());
963
  }
I
Igor Canadi 已提交
964

965
  virtual DBOptions GetDBOptions() const = 0;
966

H
heyongqiang 已提交
967
  // Flush all mem-table data.
Y
Yanqin Jin 已提交
968 969
  // Flush a single column family, even when atomic flush is enabled. To flush
  // multiple column families, use Flush(options, column_families).
970
  virtual Status Flush(const FlushOptions& options,
971
                       ColumnFamilyHandle* column_family) = 0;
972
  virtual Status Flush(const FlushOptions& options) {
973
    return Flush(options, DefaultColumnFamily());
974
  }
Y
Yanqin Jin 已提交
975 976 977 978 979 980 981 982 983
  // Flushes multiple column families.
  // If atomic flush is not enabled, Flush(options, column_families) is
  // equivalent to calling Flush(options, column_family) multiple times.
  // If atomic flush is enabled, Flush(options, column_families) will flush all
  // column families specified in 'column_families' up to the latest sequence
  // number at the time when flush is requested.
  virtual Status Flush(
      const FlushOptions& options,
      const std::vector<ColumnFamilyHandle*>& column_families) = 0;
H
heyongqiang 已提交
984

985 986
  // Flush the WAL memory buffer to the file. If sync is true, it calls SyncWAL
  // afterwards.
A
Andrew Kryczka 已提交
987
  virtual Status FlushWAL(bool /*sync*/) {
988 989
    return Status::NotSupported("FlushWAL not implemented");
  }
L
liuyangming 已提交
990 991 992 993 994 995 996 997 998
  // Lock the WAL. Also flushes the WAL after locking.
  virtual Status LockWAL() {
    return Status::NotSupported("LockWAL not implemented");
  }

  // Unlock the WAL.
  virtual Status UnlockWAL() {
    return Status::NotSupported("UnlockWAL not implemented");
  }
999 1000 1001 1002 1003 1004
  // Sync the wal. Note that Write() followed by SyncWAL() is not exactly the
  // same as Write() with sync=true: in the latter case the changes won't be
  // visible until the sync is done.
  // Currently only works if allow_mmap_writes = false in Options.
  virtual Status SyncWAL() = 0;

I
Igor Canadi 已提交
1005 1006 1007
  // The sequence number of the most recent transaction.
  virtual SequenceNumber GetLatestSequenceNumber() const = 0;

1008 1009 1010 1011 1012 1013 1014 1015
  // Instructs DB to preserve deletes with sequence numbers >= passed seqnum.
  // Has no effect if DBOptions.preserve_deletes is set to false.
  // This function assumes that user calls this function with monotonically
  // increasing seqnums (otherwise we can't guarantee that a particular delete
  // hasn't been already processed); returns true if the value was successfully
  // updated, false if user attempted to call if with seqnum <= current value.
  virtual bool SetPreserveDeletesSequenceNumber(SequenceNumber seqnum) = 0;

I
Igor Canadi 已提交
1016 1017
#ifndef ROCKSDB_LITE

1018 1019 1020 1021 1022
  // Prevent file deletions. Compactions will continue to occur,
  // but no obsolete files will be deleted. Calling this multiple
  // times have the same effect as calling it once.
  virtual Status DisableFileDeletions() = 0;

1023
  // Allow compactions to delete obsolete files.
1024 1025 1026 1027 1028 1029 1030 1031 1032
  // If force == true, the call to EnableFileDeletions() will guarantee that
  // file deletions are enabled after the call, even if DisableFileDeletions()
  // was called multiple times before.
  // If force == false, EnableFileDeletions will only enable file deletion
  // after it's been called at least as many times as DisableFileDeletions(),
  // enabling the two methods to be called by two threads concurrently without
  // synchronization -- i.e., file deletions will be enabled only after both
  // threads call EnableFileDeletions()
  virtual Status EnableFileDeletions(bool force = true) = 0;
1033

1034 1035
  // GetLiveFiles followed by GetSortedWalFiles can generate a lossless backup

1036
  // Retrieve the list of all files in the database. The files are
1037 1038 1039 1040 1041 1042 1043 1044
  // relative to the dbname and are not absolute paths. Despite being relative
  // paths, the file names begin with "/". The valid size of the manifest file
  // is returned in manifest_file_size. The manifest file is an ever growing
  // file, but only the portion specified by manifest_file_size is valid for
  // this snapshot. Setting flush_memtable to true does Flush before recording
  // the live files. Setting flush_memtable to false is useful when we don't
  // want to wait for flush which may have to wait for compaction to complete
  // taking an indeterminate time.
1045 1046 1047 1048 1049
  //
  // In case you have multiple column families, even if flush_memtable is true,
  // you still need to call GetSortedWalFiles after GetLiveFiles to compensate
  // for new data that arrived to already-flushed column families while other
  // column families were flushing
1050
  virtual Status GetLiveFiles(std::vector<std::string>&,
1051 1052
                              uint64_t* manifest_file_size,
                              bool flush_memtable = true) = 0;
1053

1054 1055 1056
  // Retrieve the sorted list of all wal files with earliest file first
  virtual Status GetSortedWalFiles(VectorLogPtr& files) = 0;

1057
  // Note: this API is not yet consistent with WritePrepared transactions.
1058 1059 1060
  // Sets iter to an iterator that is positioned at a write-batch containing
  // seq_number. If the sequence number is non existent, it returns an iterator
  // at the first available seq_no after the requested seq_no
1061
  // Returns Status::OK if iterator is valid
1062 1063 1064 1065
  // Must set WAL_ttl_seconds or WAL_size_limit_MB to large values to
  // use this api, else the WAL files will get
  // cleared aggressively and the iterator might keep getting invalid before
  // an update is read.
1066
  virtual Status GetUpdatesSince(
1067 1068 1069
      SequenceNumber seq_number, std::unique_ptr<TransactionLogIterator>* iter,
      const TransactionLogIterator::ReadOptions& read_options =
          TransactionLogIterator::ReadOptions()) = 0;
1070

赵明 已提交
1071
  virtual void SetGuardSeqno(SequenceNumber /*guard_seqno*/) {}
G
guokuankuan 已提交
1072
  
D
Dmitri Smirnov 已提交
1073 1074
// Windows API macro interference
#undef DeleteFile
1075 1076 1077 1078
  // Delete the file name from the db directory and update the internal state to
  // reflect that. Supports deletion of sst and log files only. 'name' must be
  // path relative to the db directory. eg. 000001.sst, /archive/000003.log
  virtual Status DeleteFile(std::string name) = 0;
1079 1080 1081

  // Returns a list of all table files with their level, start key
  // and end key
A
Alex Loukissas 已提交
1082
  virtual void GetLiveFilesMetaData(
S
sdong 已提交
1083
      std::vector<LiveFileMetaData>* /*metadata*/) {}
1084

1085
  // Obtains the meta data of the specified column family of the DB.
S
sdong 已提交
1086 1087
  virtual void GetColumnFamilyMetaData(ColumnFamilyHandle* /*column_family*/,
                                       ColumnFamilyMetaData* /*metadata*/) {}
1088 1089

  // Get the metadata of the default column family.
赵明 已提交
1090
  void GetColumnFamilyMetaData(ColumnFamilyMetaData* metadata) {
1091 1092
    GetColumnFamilyMetaData(DefaultColumnFamily(), metadata);
  }
1093

1094
  // IngestExternalFile() will load a list of external SST files (1) into the DB
1095 1096 1097 1098 1099 1100 1101
  // Two primary modes are supported:
  // - Duplicate keys in the new files will overwrite exiting keys (default)
  // - Duplicate keys will be skipped (set ingest_behind=true)
  // In the first mode we will try to find the lowest possible level that
  // the file can fit in, and ingest the file into this level (2). A file that
  // have a key range that overlap with the memtable key range will require us
  // to Flush the memtable first before ingesting the file.
1102
  // In the second mode we will always ingest in the bottom most level (see
1103
  // docs to IngestExternalFileOptions::ingest_behind).
1104
  //
1105 1106
  // (1) External SST files can be created using SstFileWriter
  // (2) We will try to ingest the files to the lowest possible level
Y
yizhu.sun 已提交
1107
  //     even if the file compression doesn't match the level compression
1108 1109 1110
  // (3) If IngestExternalFileOptions->ingest_behind is set to true,
  //     we always ingest at the bottommost level, which should be reserved
  //     for this purpose (see DBOPtions::allow_ingest_behind flag).
1111 1112 1113 1114 1115 1116 1117 1118 1119
  virtual Status IngestExternalFile(
      ColumnFamilyHandle* column_family,
      const std::vector<std::string>& external_files,
      const IngestExternalFileOptions& options) = 0;

  virtual Status IngestExternalFile(
      const std::vector<std::string>& external_files,
      const IngestExternalFileOptions& options) {
    return IngestExternalFile(DefaultColumnFamily(), external_files, options);
1120
  }
1121

A
Aaron G 已提交
1122 1123
  virtual Status VerifyChecksum() = 0;

1124 1125 1126 1127 1128 1129 1130 1131 1132 1133 1134 1135 1136 1137 1138 1139 1140 1141 1142 1143 1144 1145 1146 1147 1148 1149 1150 1151 1152 1153 1154 1155 1156 1157 1158 1159 1160 1161 1162 1163 1164 1165 1166 1167 1168 1169 1170 1171 1172 1173 1174 1175 1176 1177 1178 1179 1180 1181 1182 1183 1184 1185 1186 1187 1188 1189 1190 1191 1192 1193 1194 1195 1196 1197 1198 1199 1200 1201 1202 1203 1204 1205 1206 1207 1208 1209 1210 1211 1212 1213 1214 1215 1216 1217 1218 1219 1220 1221 1222 1223 1224 1225
  // AddFile() is deprecated, please use IngestExternalFile()
  ROCKSDB_DEPRECATED_FUNC virtual Status AddFile(
      ColumnFamilyHandle* column_family,
      const std::vector<std::string>& file_path_list, bool move_file = false,
      bool skip_snapshot_check = false) {
    IngestExternalFileOptions ifo;
    ifo.move_files = move_file;
    ifo.snapshot_consistency = !skip_snapshot_check;
    ifo.allow_global_seqno = false;
    ifo.allow_blocking_flush = false;
    return IngestExternalFile(column_family, file_path_list, ifo);
  }

  ROCKSDB_DEPRECATED_FUNC virtual Status AddFile(
      const std::vector<std::string>& file_path_list, bool move_file = false,
      bool skip_snapshot_check = false) {
    IngestExternalFileOptions ifo;
    ifo.move_files = move_file;
    ifo.snapshot_consistency = !skip_snapshot_check;
    ifo.allow_global_seqno = false;
    ifo.allow_blocking_flush = false;
    return IngestExternalFile(DefaultColumnFamily(), file_path_list, ifo);
  }

  // AddFile() is deprecated, please use IngestExternalFile()
  ROCKSDB_DEPRECATED_FUNC virtual Status AddFile(
      ColumnFamilyHandle* column_family, const std::string& file_path,
      bool move_file = false, bool skip_snapshot_check = false) {
    IngestExternalFileOptions ifo;
    ifo.move_files = move_file;
    ifo.snapshot_consistency = !skip_snapshot_check;
    ifo.allow_global_seqno = false;
    ifo.allow_blocking_flush = false;
    return IngestExternalFile(column_family, {file_path}, ifo);
  }

  ROCKSDB_DEPRECATED_FUNC virtual Status AddFile(
      const std::string& file_path, bool move_file = false,
      bool skip_snapshot_check = false) {
    IngestExternalFileOptions ifo;
    ifo.move_files = move_file;
    ifo.snapshot_consistency = !skip_snapshot_check;
    ifo.allow_global_seqno = false;
    ifo.allow_blocking_flush = false;
    return IngestExternalFile(DefaultColumnFamily(), {file_path}, ifo);
  }

  // Load table file with information "file_info" into "column_family"
  ROCKSDB_DEPRECATED_FUNC virtual Status AddFile(
      ColumnFamilyHandle* column_family,
      const std::vector<ExternalSstFileInfo>& file_info_list,
      bool move_file = false, bool skip_snapshot_check = false) {
    std::vector<std::string> external_files;
    for (const ExternalSstFileInfo& file_info : file_info_list) {
      external_files.push_back(file_info.file_path);
    }
    IngestExternalFileOptions ifo;
    ifo.move_files = move_file;
    ifo.snapshot_consistency = !skip_snapshot_check;
    ifo.allow_global_seqno = false;
    ifo.allow_blocking_flush = false;
    return IngestExternalFile(column_family, external_files, ifo);
  }

  ROCKSDB_DEPRECATED_FUNC virtual Status AddFile(
      const std::vector<ExternalSstFileInfo>& file_info_list,
      bool move_file = false, bool skip_snapshot_check = false) {
    std::vector<std::string> external_files;
    for (const ExternalSstFileInfo& file_info : file_info_list) {
      external_files.push_back(file_info.file_path);
    }
    IngestExternalFileOptions ifo;
    ifo.move_files = move_file;
    ifo.snapshot_consistency = !skip_snapshot_check;
    ifo.allow_global_seqno = false;
    ifo.allow_blocking_flush = false;
    return IngestExternalFile(DefaultColumnFamily(), external_files, ifo);
  }

  ROCKSDB_DEPRECATED_FUNC virtual Status AddFile(
      ColumnFamilyHandle* column_family, const ExternalSstFileInfo* file_info,
      bool move_file = false, bool skip_snapshot_check = false) {
    IngestExternalFileOptions ifo;
    ifo.move_files = move_file;
    ifo.snapshot_consistency = !skip_snapshot_check;
    ifo.allow_global_seqno = false;
    ifo.allow_blocking_flush = false;
    return IngestExternalFile(column_family, {file_info->file_path}, ifo);
  }

  ROCKSDB_DEPRECATED_FUNC virtual Status AddFile(
      const ExternalSstFileInfo* file_info, bool move_file = false,
      bool skip_snapshot_check = false) {
    IngestExternalFileOptions ifo;
    ifo.move_files = move_file;
    ifo.snapshot_consistency = !skip_snapshot_check;
    ifo.allow_global_seqno = false;
    ifo.allow_blocking_flush = false;
    return IngestExternalFile(DefaultColumnFamily(), {file_info->file_path},
                              ifo);
  }

I
Igor Canadi 已提交
1226 1227
#endif  // ROCKSDB_LITE

1228 1229 1230
  // Sets the globally unique ID created at database creation time by invoking
  // Env::GenerateUniqueId(), in identity. Returns Status::OK if identity could
  // be set properly
1231
  virtual Status GetDbIdentity(std::string& identity) const = 0;
1232

1233 1234 1235
  // Returns default column family handle
  virtual ColumnFamilyHandle* DefaultColumnFamily() const = 0;

I
Igor Canadi 已提交
1236
#ifndef ROCKSDB_LITE
I
Igor Canadi 已提交
1237 1238
  virtual Status GetPropertiesOfAllTables(ColumnFamilyHandle* column_family,
                                          TablePropertiesCollection* props) = 0;
1239
  virtual Status GetPropertiesOfAllTables(TablePropertiesCollection* props) {
I
Igor Canadi 已提交
1240 1241
    return GetPropertiesOfAllTables(DefaultColumnFamily(), props);
  }
1242
  virtual Status GetPropertiesOfTablesInRange(
1243
      ColumnFamilyHandle* column_family, const Range* range, std::size_t n,
1244
      TablePropertiesCollection* props) = 0;
S
Siying Dong 已提交
1245

A
Andrew Kryczka 已提交
1246 1247 1248
  virtual Status SuggestCompactRange(ColumnFamilyHandle* /*column_family*/,
                                     const Slice* /*begin*/,
                                     const Slice* /*end*/) {
S
Siying Dong 已提交
1249 1250 1251
    return Status::NotSupported("SuggestCompactRange() is not implemented.");
  }

A
Andrew Kryczka 已提交
1252 1253
  virtual Status PromoteL0(ColumnFamilyHandle* /*column_family*/,
                           int /*target_level*/) {
S
Siying Dong 已提交
1254 1255 1256
    return Status::NotSupported("PromoteL0() is not implemented.");
  }

1257 1258 1259 1260 1261 1262 1263 1264 1265
  // Trace DB operations. Use EndTrace() to stop tracing.
  virtual Status StartTrace(const TraceOptions& /*options*/,
                            std::unique_ptr<TraceWriter>&& /*trace_writer*/) {
    return Status::NotSupported("StartTrace() is not implemented.");
  }

  virtual Status EndTrace() {
    return Status::NotSupported("EndTrace() is not implemented.");
  }
I
Igor Canadi 已提交
1266
#endif  // ROCKSDB_LITE
1267

A
agiardullo 已提交
1268 1269 1270
  // Needed for StackableDB
  virtual DB* GetRootDB() { return this; }

J
jorlow@chromium.org 已提交
1271 1272 1273 1274 1275 1276 1277 1278
 private:
  // No copying allowed
  DB(const DB&);
  void operator=(const DB&);
};

// Destroy the contents of the specified database.
// Be very careful using this method.
1279 1280
Status DestroyDB(const std::string& name, const Options& options,
                 const std::vector<ColumnFamilyDescriptor>& column_families =
赵明 已提交
1281
                     std::vector<ColumnFamilyDescriptor>());
J
jorlow@chromium.org 已提交
1282

I
Igor Canadi 已提交
1283
#ifndef ROCKSDB_LITE
J
jorlow@chromium.org 已提交
1284 1285 1286 1287
// If a DB cannot be opened, you may attempt to call this method to
// resurrect as much of the contents of the database as possible.
// Some data may be lost, so be careful when calling this function
// on a database that contains important information.
1288 1289 1290 1291 1292 1293 1294 1295 1296 1297 1298 1299 1300 1301 1302 1303
//
// With this API, we will warn and skip data associated with column families not
// specified in column_families.
//
// @param column_families Descriptors for known column families
Status RepairDB(const std::string& dbname, const DBOptions& db_options,
                const std::vector<ColumnFamilyDescriptor>& column_families);

// @param unknown_cf_opts Options for column families encountered during the
//                        repair that were not specified in column_families.
Status RepairDB(const std::string& dbname, const DBOptions& db_options,
                const std::vector<ColumnFamilyDescriptor>& column_families,
                const ColumnFamilyOptions& unknown_cf_opts);

// @param options These options will be used for the database and for ALL column
//                families encountered during the repair
J
jorlow@chromium.org 已提交
1304
Status RepairDB(const std::string& dbname, const Options& options);
1305

I
Igor Canadi 已提交
1306
#endif
J
jorlow@chromium.org 已提交
1307

1308
}  // namespace rocksdb