env.h 18.9 KB
Newer Older
J
jorlow@chromium.org 已提交
1 2 3 4 5 6 7 8
// Copyright (c) 2011 The LevelDB Authors. All rights reserved.
// Use of this source code is governed by a BSD-style license that can be
// found in the LICENSE file. See the AUTHORS file for names of contributors.
//
// An Env is an interface used by the leveldb implementation to access
// operating system functionality like the filesystem etc.  Callers
// may wish to provide a custom Env object when opening a database to
// get fine gain control; e.g., to rate limit file system operations.
9 10 11
//
// All Env implementations are safe for concurrent access from
// multiple threads without any external synchronization.
J
jorlow@chromium.org 已提交
12

13 14
#ifndef STORAGE_ROCKSDB_INCLUDE_ENV_H_
#define STORAGE_ROCKSDB_INCLUDE_ENV_H_
J
jorlow@chromium.org 已提交
15 16 17

#include <cstdarg>
#include <string>
18
#include <memory>
J
jorlow@chromium.org 已提交
19 20
#include <vector>
#include <stdint.h>
21
#include "rocksdb/status.h"
J
jorlow@chromium.org 已提交
22 23 24 25

namespace leveldb {

class FileLock;
26
class Logger;
J
jorlow@chromium.org 已提交
27 28 29 30
class RandomAccessFile;
class SequentialFile;
class Slice;
class WritableFile;
H
Haobo Xu 已提交
31
class Options;
J
jorlow@chromium.org 已提交
32

33 34 35
using std::unique_ptr;
using std::shared_ptr;

H
Haobo Xu 已提交
36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57

// Options while opening a file to read/write
struct EnvOptions {

  // construct with default Options
  EnvOptions();

  // construct from Options
  EnvOptions(const Options& options);

  // If true, then allow caching of data in environment buffers
  bool use_os_buffer;

   // If true, then use mmap to read data
  bool use_mmap_reads;

   // If true, then use mmap to write data
  bool use_mmap_writes;

  // If true, set the FD_CLOEXEC on open fd.
  bool set_fd_cloexec;

H
Haobo Xu 已提交
58 59 60 61 62 63
  // Allows OS to incrementally sync files to disk while they are being
  // written, in the background. Issue one request for every bytes_per_sync
  // written. 0 turns it off.
  // Default: 0
  uint64_t bytes_per_sync;

H
Haobo Xu 已提交
64 65
};

J
jorlow@chromium.org 已提交
66 67 68 69 70 71 72 73 74 75 76 77 78 79
class Env {
 public:
  Env() { }
  virtual ~Env();

  // Return a default environment suitable for the current operating
  // system.  Sophisticated users may wish to provide their own Env
  // implementation instead of relying on this default environment.
  //
  // The result of Default() belongs to leveldb and must never be deleted.
  static Env* Default();

  // Create a brand new sequentially-readable file with the specified name.
  // On success, stores a pointer to the new file in *result and returns OK.
A
Abhishek Kona 已提交
80
  // On failure stores nullptr in *result and returns non-OK.  If the file does
J
jorlow@chromium.org 已提交
81 82 83 84
  // not exist, returns a non-OK status.
  //
  // The returned file will only be accessed by one thread at a time.
  virtual Status NewSequentialFile(const std::string& fname,
85 86 87
                                   unique_ptr<SequentialFile>* result,
                                   const EnvOptions& options)
                                   = 0;
J
jorlow@chromium.org 已提交
88 89 90

  // Create a brand new random access read-only file with the
  // specified name.  On success, stores a pointer to the new file in
A
Abhishek Kona 已提交
91
  // *result and returns OK.  On failure stores nullptr in *result and
J
jorlow@chromium.org 已提交
92 93 94 95 96
  // returns non-OK.  If the file does not exist, returns a non-OK
  // status.
  //
  // The returned file may be concurrently accessed by multiple threads.
  virtual Status NewRandomAccessFile(const std::string& fname,
97 98 99
                                     unique_ptr<RandomAccessFile>* result,
                                     const EnvOptions& options)
                                     = 0;
J
jorlow@chromium.org 已提交
100 101 102 103

  // Create an object that writes to a new file with the specified
  // name.  Deletes any existing file with the same name and creates a
  // new file.  On success, stores a pointer to the new file in
A
Abhishek Kona 已提交
104
  // *result and returns OK.  On failure stores nullptr in *result and
J
jorlow@chromium.org 已提交
105 106 107 108
  // returns non-OK.
  //
  // The returned file will only be accessed by one thread at a time.
  virtual Status NewWritableFile(const std::string& fname,
109 110
                                 unique_ptr<WritableFile>* result,
                                 const EnvOptions& options) = 0;
J
jorlow@chromium.org 已提交
111 112 113 114 115 116 117 118 119 120 121 122 123

  // Returns true iff the named file exists.
  virtual bool FileExists(const std::string& fname) = 0;

  // Store in *result the names of the children of the specified directory.
  // The names are relative to "dir".
  // Original contents of *results are dropped.
  virtual Status GetChildren(const std::string& dir,
                             std::vector<std::string>* result) = 0;

  // Delete the named file.
  virtual Status DeleteFile(const std::string& fname) = 0;

124
  // Create the specified directory. Returns error if directory exists.
J
jorlow@chromium.org 已提交
125 126
  virtual Status CreateDir(const std::string& dirname) = 0;

127 128 129 130
  // Creates directory if missing. Return Ok if it exists, or successful in
  // Creating.
  virtual Status CreateDirIfMissing(const std::string& dirname) = 0;

J
jorlow@chromium.org 已提交
131 132 133 134 135 136
  // Delete the specified directory.
  virtual Status DeleteDir(const std::string& dirname) = 0;

  // Store the size of fname in *file_size.
  virtual Status GetFileSize(const std::string& fname, uint64_t* file_size) = 0;

137 138 139
  // Store the last modification time of fname in *file_mtime.
  virtual Status GetFileModificationTime(const std::string& fname,
                                         uint64_t* file_mtime) = 0;
J
jorlow@chromium.org 已提交
140 141 142 143 144
  // Rename file src to target.
  virtual Status RenameFile(const std::string& src,
                            const std::string& target) = 0;

  // Lock the specified file.  Used to prevent concurrent access to
A
Abhishek Kona 已提交
145
  // the same db by multiple processes.  On failure, stores nullptr in
J
jorlow@chromium.org 已提交
146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184
  // *lock and returns non-OK.
  //
  // On success, stores a pointer to the object that represents the
  // acquired lock in *lock and returns OK.  The caller should call
  // UnlockFile(*lock) to release the lock.  If the process exits,
  // the lock will be automatically released.
  //
  // If somebody else already holds the lock, finishes immediately
  // with a failure.  I.e., this call does not wait for existing locks
  // to go away.
  //
  // May create the named file if it does not already exist.
  virtual Status LockFile(const std::string& fname, FileLock** lock) = 0;

  // Release the lock acquired by a previous successful call to LockFile.
  // REQUIRES: lock was returned by a successful LockFile() call
  // REQUIRES: lock has not already been unlocked.
  virtual Status UnlockFile(FileLock* lock) = 0;

  // Arrange to run "(*function)(arg)" once in a background thread.
  //
  // "function" may run in an unspecified thread.  Multiple functions
  // added to the same Env may run concurrently in different threads.
  // I.e., the caller may not assume that background work items are
  // serialized.
  virtual void Schedule(
      void (*function)(void* arg),
      void* arg) = 0;

  // Start a new thread, invoking "function(arg)" within the new thread.
  // When "function(arg)" returns, the thread will be destroyed.
  virtual void StartThread(void (*function)(void* arg), void* arg) = 0;

  // *path is set to a temporary directory that can be used for testing. It may
  // or many not have just been created. The directory may or may not differ
  // between runs of the same process, but subsequent calls will return the
  // same directory.
  virtual Status GetTestDirectory(std::string* path) = 0;

185
  // Create and return a log file for storing informational messages.
186 187
  virtual Status NewLogger(const std::string& fname,
                           shared_ptr<Logger>* result) = 0;
J
jorlow@chromium.org 已提交
188 189 190 191 192

  // Returns the number of micro-seconds since some fixed point in time. Only
  // useful for computing deltas of time.
  virtual uint64_t NowMicros() = 0;

193 194 195 196 197 198 199
  // Returns the number of nano-seconds since some fixed point in time. Only
  // useful for computing deltas of time in one run.
  // Default implementation simply relies on NowMicros
  virtual uint64_t NowNanos() {
    return NowMicros() * 1000;
  }

J
jorlow@chromium.org 已提交
200 201 202
  // Sleep/delay the thread for the perscribed number of micro-seconds.
  virtual void SleepForMicroseconds(int micros) = 0;

203
  // Get the current host name.
204
  virtual Status GetHostName(char* name, uint64_t len) = 0;
205 206 207 208 209 210 211 212

  // Get the number of seconds since the Epoch, 1970-01-01 00:00:00 (UTC).
  virtual Status GetCurrentTime(int64_t* unix_time) = 0;

  // Get full directory name for this db.
  virtual Status GetAbsolutePath(const std::string& db_path,
      std::string* output_path) = 0;

213 214 215 216
  // The number of background worker threads for this environment.
  // default: 1
  virtual void SetBackgroundThreads(int number) = 0;

217 218 219
  // Converts seconds-since-Jan-01-1970 to a printable string
  virtual std::string TimeToString(uint64_t time) = 0;

J
jorlow@chromium.org 已提交
220 221 222 223 224 225 226 227 228 229 230 231 232 233 234
 private:
  // No copying allowed
  Env(const Env&);
  void operator=(const Env&);
};

// A file abstraction for reading sequentially through a file
class SequentialFile {
 public:
  SequentialFile() { }
  virtual ~SequentialFile();

  // Read up to "n" bytes from the file.  "scratch[0..n-1]" may be
  // written by this routine.  Sets "*result" to the data that was
  // read (including if fewer than "n" bytes were successfully read).
G
Gabor Cselle 已提交
235 236
  // May set "*result" to point at data in "scratch[0..n-1]", so
  // "scratch[0..n-1]" must be live when "*result" is used.
J
jorlow@chromium.org 已提交
237 238 239 240
  // If an error was encountered, returns a non-OK status.
  //
  // REQUIRES: External synchronization
  virtual Status Read(size_t n, Slice* result, char* scratch) = 0;
241 242 243 244 245 246 247 248 249

  // Skip "n" bytes from the file. This is guaranteed to be no
  // slower that reading the same data, but may be faster.
  //
  // If end of file is reached, skipping will stop at the end of the
  // file, and Skip will return OK.
  //
  // REQUIRES: External synchronization
  virtual Status Skip(uint64_t n) = 0;
J
jorlow@chromium.org 已提交
250 251 252 253 254 255 256 257 258 259 260
};

// A file abstraction for randomly reading the contents of a file.
class RandomAccessFile {
 public:
  RandomAccessFile() { }
  virtual ~RandomAccessFile();

  // Read up to "n" bytes from the file starting at "offset".
  // "scratch[0..n-1]" may be written by this routine.  Sets "*result"
  // to the data that was read (including if fewer than "n" bytes were
G
Gabor Cselle 已提交
261 262 263 264
  // successfully read).  May set "*result" to point at data in
  // "scratch[0..n-1]", so "scratch[0..n-1]" must be live when
  // "*result" is used.  If an error was encountered, returns a non-OK
  // status.
J
jorlow@chromium.org 已提交
265 266 267 268
  //
  // Safe for concurrent use by multiple threads.
  virtual Status Read(uint64_t offset, size_t n, Slice* result,
                      char* scratch) const = 0;
269 270 271 272 273

  // Tries to get an unique ID for this file that will be the same each time
  // the file is opened (and will stay the same while the file is open).
  // Furthermore, it tries to make this ID at most "max_size" bytes. If such an
  // ID can be created this function returns the length of the ID and places it
274
  // in "id"; otherwise, this function returns 0, in which case "id"
275 276 277 278 279 280 281 282 283 284 285 286 287 288
  // may not have been modified.
  //
  // This function guarantees, for IDs from a given environment, two unique ids
  // cannot be made equal to eachother by adding arbitrary bytes to one of
  // them. That is, no unique ID is the prefix of another.
  //
  // This function guarantees that the returned ID will not be interpretable as
  // a single varint.
  //
  // Note: these IDs are only valid for the duration of the process.
  virtual size_t GetUniqueId(char* id, size_t max_size) const {
    return 0; // Default implementation to prevent issues with backwards
              // compatibility.
  };
289 290 291 292 293 294


  enum AccessPattern { NORMAL, RANDOM, SEQUENTIAL, WILLNEED, DONTNEED };

  virtual void Hint(AccessPattern pattern) {}

J
jorlow@chromium.org 已提交
295 296 297 298 299 300 301
};

// A file abstraction for sequential writing.  The implementation
// must provide buffering since callers may append small fragments
// at a time to the file.
class WritableFile {
 public:
302 303
  WritableFile() : last_preallocated_block_(0), preallocation_block_size_ (0) {
  }
J
jorlow@chromium.org 已提交
304 305 306 307 308
  virtual ~WritableFile();

  virtual Status Append(const Slice& data) = 0;
  virtual Status Close() = 0;
  virtual Status Flush() = 0;
309 310
  virtual Status Sync() = 0; // sync data

311
  /*
312
   * Sync data and/or metadata as well.
313
   * By default, sync only metadata.
314 315 316 317 318 319
   * Override this method for environments where we need to sync
   * metadata as well.
   */
  virtual Status Fsync() {
    return Sync();
  }
J
jorlow@chromium.org 已提交
320

321 322 323 324 325 326 327
  /*
   * Get the size of valid data in the file.
   */
  virtual uint64_t GetFileSize() {
    return 0;
  }

328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375
  /*
   * Get and set the default pre-allocation block size for writes to
   * this file.  If non-zero, then Allocate will be used to extend the
   * underlying storage of a file (generally via fallocate) if the Env
   * instance supports it.
   */
  void SetPreallocationBlockSize(size_t size) {
    preallocation_block_size_ = size;
  }

  virtual void GetPreallocationStatus(size_t* block_size,
                                      size_t* last_allocated_block) {
    *last_allocated_block = last_preallocated_block_;
    *block_size = preallocation_block_size_;
  }

 protected:
  // PrepareWrite performs any necessary preparation for a write
  // before the write actually occurs.  This allows for pre-allocation
  // of space on devices where it can result in less file
  // fragmentation and/or less waste from over-zealous filesystem
  // pre-allocation.
  void PrepareWrite(size_t offset, size_t len) {
    if (preallocation_block_size_ == 0) {
      return;
    }
    // If this write would cross one or more preallocation blocks,
    // determine what the last preallocation block necesessary to
    // cover this write would be and Allocate to that point.
    const auto block_size = preallocation_block_size_;
    size_t new_last_preallocated_block =
      (offset + len + block_size - 1) / block_size;
    if (new_last_preallocated_block > last_preallocated_block_) {
      size_t num_spanned_blocks =
        new_last_preallocated_block - last_preallocated_block_;
      Allocate(block_size * last_preallocated_block_,
               block_size * num_spanned_blocks);
      last_preallocated_block_ = new_last_preallocated_block;
    }
  }

  /*
   * Pre-allocate space for a file.
   */
  virtual Status Allocate(off_t offset, off_t len) {
    return Status::OK();
  }

376 377 378 379 380 381 382 383 384 385
  // Sync a file range with disk.
  // offset is the starting byte of the file range to be synchronized.
  // nbytes specifies the length of the range to be synchronized.
  // This asks the OS to initiate flushing the cached data to disk,
  // without waiting for completion.
  // Default implementation does nothing.
  virtual Status RangeSync(off64_t offset, off64_t nbytes) {
    return Status::OK();
  }

J
jorlow@chromium.org 已提交
386
 private:
387 388
  size_t last_preallocated_block_;
  size_t preallocation_block_size_;
J
jorlow@chromium.org 已提交
389 390 391 392 393
  // No copying allowed
  WritableFile(const WritableFile&);
  void operator=(const WritableFile&);
};

394 395 396
// An interface for writing log messages.
class Logger {
 public:
397
  enum { DO_NOT_SUPPORT_GET_LOG_FILE_SIZE = -1 };
398 399 400 401 402
  Logger() { }
  virtual ~Logger();

  // Write an entry to the log file with the specified format.
  virtual void Logv(const char* format, va_list ap) = 0;
403 404 405
  virtual size_t GetLogFileSize() const {
    return DO_NOT_SUPPORT_GET_LOG_FILE_SIZE;
  }
406 407 408 409 410 411 412 413

 private:
  // No copying allowed
  Logger(const Logger&);
  void operator=(const Logger&);
};


J
jorlow@chromium.org 已提交
414 415 416 417 418 419 420 421 422 423 424
// Identifies a locked file.
class FileLock {
 public:
  FileLock() { }
  virtual ~FileLock();
 private:
  // No copying allowed
  FileLock(const FileLock&);
  void operator=(const FileLock&);
};

A
Abhishek Kona 已提交
425
// Log the specified data to *info_log if info_log is non-nullptr.
426 427 428 429 430 431
extern void Log(const shared_ptr<Logger>& info_log, const char* format, ...)
#   if defined(__GNUC__) || defined(__clang__)
    __attribute__((__format__ (__printf__, 2, 3)))
#   endif
    ;

432
extern void Log(Logger* info_log, const char* format, ...)
J
jorlow@chromium.org 已提交
433
#   if defined(__GNUC__) || defined(__clang__)
434
    __attribute__((__format__ (__printf__, 2, 3)))
J
jorlow@chromium.org 已提交
435 436 437 438 439 440 441 442 443 444 445 446 447 448 449 450
#   endif
    ;

// A utility routine: write "data" to the named file.
extern Status WriteStringToFile(Env* env, const Slice& data,
                                const std::string& fname);

// A utility routine: read contents of named file into *data
extern Status ReadFileToString(Env* env, const std::string& fname,
                               std::string* data);

// An implementation of Env that forwards all calls to another Env.
// May be useful to clients who wish to override just part of the
// functionality of another Env.
class EnvWrapper : public Env {
 public:
H
Hans Wennborg 已提交
451 452
  // Initialize an EnvWrapper that delegates all calls to *t
  explicit EnvWrapper(Env* t) : target_(t) { }
J
jorlow@chromium.org 已提交
453 454 455 456 457 458
  virtual ~EnvWrapper();

  // Return the target to which this Env forwards all calls
  Env* target() const { return target_; }

  // The following text is boilerplate that forwards all methods to target()
459
  Status NewSequentialFile(const std::string& f,
460 461 462
                           unique_ptr<SequentialFile>* r,
                           const EnvOptions& options) {
    return target_->NewSequentialFile(f, r, options);
J
jorlow@chromium.org 已提交
463
  }
464
  Status NewRandomAccessFile(const std::string& f,
465 466 467
                             unique_ptr<RandomAccessFile>* r,
                             const EnvOptions& options) {
    return target_->NewRandomAccessFile(f, r, options);
J
jorlow@chromium.org 已提交
468
  }
469 470 471
  Status NewWritableFile(const std::string& f, unique_ptr<WritableFile>* r,
                         const EnvOptions& options) {
    return target_->NewWritableFile(f, r, options);
J
jorlow@chromium.org 已提交
472 473 474 475 476 477 478
  }
  bool FileExists(const std::string& f) { return target_->FileExists(f); }
  Status GetChildren(const std::string& dir, std::vector<std::string>* r) {
    return target_->GetChildren(dir, r);
  }
  Status DeleteFile(const std::string& f) { return target_->DeleteFile(f); }
  Status CreateDir(const std::string& d) { return target_->CreateDir(d); }
479 480 481
  Status CreateDirIfMissing(const std::string& d) {
    return target_->CreateDirIfMissing(d);
  }
J
jorlow@chromium.org 已提交
482 483 484 485
  Status DeleteDir(const std::string& d) { return target_->DeleteDir(d); }
  Status GetFileSize(const std::string& f, uint64_t* s) {
    return target_->GetFileSize(f, s);
  }
486 487 488 489 490 491

  Status GetFileModificationTime(const std::string& fname,
                                 uint64_t* file_mtime) {
    return target_->GetFileModificationTime(fname, file_mtime);
  }

J
jorlow@chromium.org 已提交
492 493 494 495 496 497 498 499 500 501 502 503 504 505 506 507
  Status RenameFile(const std::string& s, const std::string& t) {
    return target_->RenameFile(s, t);
  }
  Status LockFile(const std::string& f, FileLock** l) {
    return target_->LockFile(f, l);
  }
  Status UnlockFile(FileLock* l) { return target_->UnlockFile(l); }
  void Schedule(void (*f)(void*), void* a) {
    return target_->Schedule(f, a);
  }
  void StartThread(void (*f)(void*), void* a) {
    return target_->StartThread(f, a);
  }
  virtual Status GetTestDirectory(std::string* path) {
    return target_->GetTestDirectory(path);
  }
508 509
  virtual Status NewLogger(const std::string& fname,
                           shared_ptr<Logger>* result) {
510
    return target_->NewLogger(fname, result);
J
jorlow@chromium.org 已提交
511 512 513 514 515 516 517
  }
  uint64_t NowMicros() {
    return target_->NowMicros();
  }
  void SleepForMicroseconds(int micros) {
    target_->SleepForMicroseconds(micros);
  }
518
  Status GetHostName(char* name, uint64_t len) {
519 520 521 522 523 524 525 526 527
    return target_->GetHostName(name, len);
  }
  Status GetCurrentTime(int64_t* unix_time) {
    return target_->GetCurrentTime(unix_time);
  }
  Status GetAbsolutePath(const std::string& db_path,
      std::string* output_path) {
    return target_->GetAbsolutePath(db_path, output_path);
  }
528 529 530
  void SetBackgroundThreads(int num) {
    return target_->SetBackgroundThreads(num);
  }
531 532 533
  std::string TimeToString(uint64_t time) {
    return target_->TimeToString(time);
  }
534

J
jorlow@chromium.org 已提交
535 536 537 538
 private:
  Env* target_;
};

H
Hans Wennborg 已提交
539
}  // namespace leveldb
J
jorlow@chromium.org 已提交
540

541
#endif  // STORAGE_ROCKSDB_INCLUDE_ENV_H_