提交 1ca05843 编写于 作者: D Dhruba Borthakur

This is the mega-patch multi-threaded compaction

published in https://reviews.facebook.net/D5997.

Summary:
This patch allows compaction to occur in multiple background threads
concurrently.

If a manual compaction is issued, the system falls back to a
single-compaction-thread model. This is done to ensure correctess
and simplicity of code. When the manual compaction is finished,
the system resumes its concurrent-compaction mode automatically.

The updates to the manifest are done via group-commit approach.

Test Plan: run db_bench
上级 cd93e828
......@@ -87,6 +87,16 @@ static bool FLAGS_histogram = false;
// (initialized to default value by "main")
static int FLAGS_write_buffer_size = 0;
// The number of in-memory memtables.
// Each memtable is of size FLAGS_write_buffer_size.
// This is initialized to default value of 2 in "main" function.
static int FLAGS_max_write_buffer_number = 0;
// The maximum number of concurrent background compactions
// that can occur in parallel.
// This is initialized to default value of 1 in "main" function.
static int FLAGS_max_background_compactions = 0;
// Number of bytes to use as a cache of uncompressed data.
// Negative means use default settings.
static long FLAGS_cache_size = -1;
......@@ -159,6 +169,9 @@ static int FLAGS_level0_stop_writes_trigger = 12;
// Number of files in level-0 that will slow down writes.
static int FLAGS_level0_slowdown_writes_trigger = 8;
// Number of files in level-0 when compactions start
static int FLAGS_level0_file_num_compaction_trigger = 4;
// Ratio of reads to writes (expressed as a percentage)
// for the ReadRandomWriteRandom workload. The default
// setting is 9 gets for every 1 put.
......@@ -326,7 +339,8 @@ class Stats {
} else {
double now = FLAGS_env->NowMicros();
fprintf(stderr,
"... thread %d: %ld ops in %.6f seconds and %.2f ops/sec\n",
"%s thread %d: %ld ops in %.6f seconds and %.2f ops/sec\n",
FLAGS_env->TimeToString((uint64_t) now/1000000).c_str(),
id_,
done_ - last_report_done_,
(now - last_report_finish_) / 1000000.0,
......@@ -873,6 +887,8 @@ class Benchmark {
options.create_if_missing = !FLAGS_use_existing_db;
options.block_cache = cache_;
options.write_buffer_size = FLAGS_write_buffer_size;
options.max_write_buffer_number = FLAGS_max_write_buffer_number;
options.max_background_compactions = FLAGS_max_background_compactions;
options.block_size = FLAGS_block_size;
options.filter_policy = filter_policy_;
options.max_open_files = FLAGS_open_files;
......@@ -887,6 +903,8 @@ class Benchmark {
options.max_bytes_for_level_multiplier =
FLAGS_max_bytes_for_level_multiplier;
options.level0_stop_writes_trigger = FLAGS_level0_stop_writes_trigger;
options.level0_file_num_compaction_trigger =
FLAGS_level0_file_num_compaction_trigger;
options.level0_slowdown_writes_trigger =
FLAGS_level0_slowdown_writes_trigger;
options.compression = FLAGS_compression_type;
......@@ -1172,7 +1190,10 @@ class Benchmark {
int main(int argc, char** argv) {
FLAGS_write_buffer_size = leveldb::Options().write_buffer_size;
FLAGS_max_write_buffer_number = leveldb::Options().max_write_buffer_number;
FLAGS_open_files = leveldb::Options().max_open_files;
FLAGS_max_background_compactions =
leveldb::Options().max_background_compactions;
// Compression test code above refers to FLAGS_block_size
FLAGS_block_size = leveldb::Options().block_size;
std::string default_db_path;
......@@ -1203,6 +1224,10 @@ int main(int argc, char** argv) {
FLAGS_value_size = n;
} else if (sscanf(argv[i], "--write_buffer_size=%d%c", &n, &junk) == 1) {
FLAGS_write_buffer_size = n;
} else if (sscanf(argv[i], "--max_write_buffer_number=%d%c", &n, &junk) == 1) {
FLAGS_max_write_buffer_number = n;
} else if (sscanf(argv[i], "--max_background_compactions=%d%c", &n, &junk) == 1) {
FLAGS_max_background_compactions = n;
} else if (sscanf(argv[i], "--cache_size=%ld%c", &l, &junk) == 1) {
FLAGS_cache_size = l;
} else if (sscanf(argv[i], "--block_size=%d%c", &n, &junk) == 1) {
......@@ -1281,6 +1306,9 @@ int main(int argc, char** argv) {
} else if (sscanf(argv[i],"--level0_slowdown_writes_trigger=%d%c",
&n, &junk) == 1) {
FLAGS_level0_slowdown_writes_trigger = n;
} else if (sscanf(argv[i],"--level0_file_num_compaction_trigger=%d%c",
&n, &junk) == 1) {
FLAGS_level0_file_num_compaction_trigger = n;
} else if (strncmp(argv[i], "--compression_type=", 19) == 0) {
const char* ctype = argv[i] + 19;
if (!strcasecmp(ctype, "none"))
......@@ -1309,6 +1337,10 @@ int main(int argc, char** argv) {
}
}
// The number of background threads should be at least as much the
// max number of concurrent compactions.
FLAGS_env->SetBackgroundThreads(FLAGS_max_background_compactions);
// Choose a location for the test database if none given with --db=<path>
if (FLAGS_db == NULL) {
leveldb::Env::Default()->GetTestDirectory(&default_db_path);
......
此差异已折叠。
......@@ -14,6 +14,7 @@
#include "leveldb/env.h"
#include "port/port.h"
#include "util/stats_logger.h"
#include "memtablelist.h"
#ifdef USE_SCRIBE
#include "scribe/scribe_logger.h"
......@@ -99,13 +100,20 @@ class DBImpl : public DB {
// Compact the in-memory write buffer to disk. Switches to a new
// log-file/memtable and writes a new descriptor iff successful.
Status CompactMemTable();
Status CompactMemTable(bool* madeProgress = NULL);
Status RecoverLogFile(uint64_t log_number,
VersionEdit* edit,
SequenceNumber* max_sequence);
Status WriteLevel0Table(MemTable* mem, VersionEdit* edit, Version* base);
// The following two methods are used to flush a memtable to
// storage. The first one is used atdatabase RecoveryTime (when the
// database is opened) and is heavyweight because it holds the mutex
// for the entire period. The second method WriteLevel0Table supports
// concurrent flush memtables to storage.
Status WriteLevel0TableForRecovery(MemTable* mem, VersionEdit* edit);
Status WriteLevel0Table(MemTable* mem, VersionEdit* edit,
uint64_t* filenumber);
Status MakeRoomForWrite(bool force /* compact even if there is room? */);
WriteBatch* BuildBatchGroup(Writer** last_writer);
......@@ -123,13 +131,16 @@ class DBImpl : public DB {
void MaybeScheduleCompaction();
static void BGWork(void* db);
void BackgroundCall();
Status BackgroundCompaction();
Status BackgroundCompaction(bool* madeProgress);
void CleanupCompaction(CompactionState* compact);
Status DoCompactionWork(CompactionState* compact);
Status OpenCompactionOutputFile(CompactionState* compact);
Status FinishCompactionOutputFile(CompactionState* compact, Iterator* input);
Status InstallCompactionResults(CompactionState* compact);
void AllocateCompactionOutputFileNumbers(CompactionState* compact);
void ReleaseCompactionUnusedFileNumbers(CompactionState* compact);
// Constant after construction
Env* const env_;
......@@ -151,8 +162,7 @@ class DBImpl : public DB {
port::AtomicPointer shutting_down_;
port::CondVar bg_cv_; // Signalled when background work finishes
MemTable* mem_;
MemTable* imm_; // Memtable being compacted
port::AtomicPointer has_imm_; // So bg thread can detect non-NULL imm_
MemTableList imm_; // Memtable that are not changing
WritableFile* logfile_;
uint64_t logfile_number_;
log::Writer* log_;
......@@ -169,8 +179,8 @@ class DBImpl : public DB {
// part of ongoing compactions.
std::set<uint64_t> pending_outputs_;
// Has a background compaction been scheduled or is running?
bool bg_compaction_scheduled_;
// count how many background compaction been scheduled or is running?
int bg_compaction_scheduled_;
// Has a background stats log thread scheduled?
bool bg_logstats_scheduled_;
......@@ -179,6 +189,7 @@ class DBImpl : public DB {
struct ManualCompaction {
int level;
bool done;
bool in_progress; // compaction request being processed?
const InternalKey* begin; // NULL means beginning of key range
const InternalKey* end; // NULL means end of key range
InternalKey tmp_storage; // Used to keep track of compaction progress
......@@ -220,6 +231,9 @@ class DBImpl : public DB {
static const int KEEP_LOG_FILE_NUM = 1000;
std::string db_absolute_path_;
// count of the number of contiguous delaying writes
int delayed_writes_;
// No copying allowed
DBImpl(const DBImpl&);
void operator=(const DBImpl&);
......@@ -227,6 +241,9 @@ class DBImpl : public DB {
const Comparator* user_comparator() const {
return internal_comparator_.user_comparator();
}
// dump the delayed_writes_ to the log file and reset counter.
void DelayLoggingAndReset();
};
// Sanitize db options. The caller should delete result.info_log if
......
......@@ -18,10 +18,14 @@ static Slice GetLengthPrefixedSlice(const char* data) {
return Slice(p, len);
}
MemTable::MemTable(const InternalKeyComparator& cmp)
MemTable::MemTable(const InternalKeyComparator& cmp, int numlevel)
: comparator_(cmp),
refs_(0),
table_(comparator_, &arena_) {
table_(comparator_, &arena_),
flush_in_progress_(false),
flush_completed_(false),
file_number_(0),
edit_(numlevel) {
}
MemTable::~MemTable() {
......@@ -101,7 +105,7 @@ void MemTable::Add(SequenceNumber s, ValueType type,
p += 8;
p = EncodeVarint32(p, val_size);
memcpy(p, value.data(), val_size);
assert((p + val_size) - buf == encoded_len);
assert((p + val_size) - buf == (unsigned)encoded_len);
table_.Insert(buf);
}
......
......@@ -9,6 +9,7 @@
#include "leveldb/db.h"
#include "db/dbformat.h"
#include "db/skiplist.h"
#include "db/version_set.h"
#include "util/arena.h"
namespace leveldb {
......@@ -21,7 +22,8 @@ class MemTable {
public:
// MemTables are reference counted. The initial reference count
// is zero and the caller must call Ref() at least once.
explicit MemTable(const InternalKeyComparator& comparator);
explicit MemTable(const InternalKeyComparator& comparator,
int numlevel = 7);
// Increase reference count.
void Ref() { ++refs_; }
......@@ -63,6 +65,9 @@ class MemTable {
// Else, return false.
bool Get(const LookupKey& key, std::string* value, Status* s);
// Returns the edits area that is needed for flushing the memtable
VersionEdit* GetEdits() { return &edit_; }
private:
~MemTable(); // Private since only Unref() should be used to delete it
......@@ -73,6 +78,7 @@ class MemTable {
};
friend class MemTableIterator;
friend class MemTableBackwardIterator;
friend class MemTableList;
typedef SkipList<const char*, KeyComparator> Table;
......@@ -81,6 +87,15 @@ class MemTable {
Arena arena_;
Table table_;
// These are used to manage memtable flushes to storage
bool flush_in_progress_; // started the flush
bool flush_completed_; // finished the flush
uint64_t file_number_; // filled up after flush is complete
// The udpates to be applied to the transaction log when this
// memtable is flushed to storage.
VersionEdit edit_;
// No copying allowed
MemTable(const MemTable&);
void operator=(const MemTable&);
......
// Copyright (c) 2012 Facebook.
#include <string>
#include "leveldb/db.h"
#include "db/memtable.h"
#include "db/memtablelist.h"
#include "leveldb/env.h"
#include "leveldb/iterator.h"
#include "util/coding.h"
namespace leveldb {
class InternalKeyComparator;
class Mutex;
class MemTableListIterator;
class VersionSet;
using std::list;
// Increase reference count on all underling memtables
void MemTableList::RefAll() {
for (list<MemTable*>::iterator it = memlist_.begin();
it != memlist_.end() ; ++it) {
(*it)->Ref();
}
}
// Drop reference count on all underling memtables
void MemTableList::UnrefAll() {
for (list<MemTable*>::iterator it = memlist_.begin();
it != memlist_.end() ; ++it) {
(*it)->Unref();
}
}
// Returns the total number of memtables in the list
int MemTableList::size() {
assert(num_flush_not_started_ <= size_);
return size_;
}
// Returns true if there is at least one memtable on which flush has
// not yet started.
bool MemTableList::IsFlushPending() {
if (num_flush_not_started_ > 0) {
assert(imm_flush_needed.NoBarrier_Load() != NULL);
return true;
}
return false;
}
// Returns the earliest memtable that needs to be flushed.
// Returns null, if no such memtable exist.
MemTable* MemTableList::PickMemtableToFlush() {
for (list<MemTable*>::reverse_iterator it = memlist_.rbegin();
it != memlist_.rend(); it++) {
MemTable* m = *it;
if (!m->flush_in_progress_) {
assert(!m->flush_completed_);
num_flush_not_started_--;
if (num_flush_not_started_ == 0) {
imm_flush_needed.Release_Store(NULL);
}
m->flush_in_progress_ = true; // flushing will start very soon
return m;
}
}
return NULL;
}
// Record a successful flush in the manifest file
Status MemTableList::InstallMemtableFlushResults(MemTable* m,
VersionSet* vset, Status flushStatus,
port::Mutex* mu, Logger* info_log,
uint64_t file_number,
std::set<uint64_t>& pending_outputs) {
mu->AssertHeld();
assert(m->flush_in_progress_);
assert(m->file_number_ == 0);
// If the flush was not successful, then just reset state.
// Maybe a suceeding attempt to flush will be successful.
if (!flushStatus.ok()) {
m->flush_in_progress_ = false;
m->flush_completed_ = false;
m->edit_.Clear();
num_flush_not_started_++;
imm_flush_needed.Release_Store((void *)1);
pending_outputs.erase(file_number);
return flushStatus;
}
// flush was sucessful
m->flush_completed_ = true;
m->file_number_ = file_number;
// if some other thread is already commiting, then return
Status s;
if (commit_in_progress_) {
return s;
}
// Only a single thread can be executing this piece of code
commit_in_progress_ = true;
// scan all memtables from the earliest, and commit those
// (in that order) that have finished flushing.
while (!memlist_.empty()) {
m = memlist_.back(); // get the last element
if (!m->flush_completed_) {
break;
}
Log(info_log,
"Level-0 commit table #%llu: started",
(unsigned long long)m->file_number_);
// this can release and reacquire the mutex.
s = vset->LogAndApply(&m->edit_, mu);
if (s.ok()) { // commit new state
Log(info_log, "Level-0 commit table #%llu: done",
(unsigned long long)m->file_number_);
memlist_.remove(m);
assert(m->file_number_ > 0);
// pending_outputs can be cleared only after the newly created file
// has been written to a committed version so that other concurrently
// executing compaction threads do not mistakenly assume that this
// file is not live.
pending_outputs.erase(m->file_number_);
m->Unref();
size_--;
} else {
//commit failed. setup state so that we can flush again.
Log(info_log, "Level-0 commit table #%llu: failed",
(unsigned long long)m->file_number_);
m->flush_completed_ = false;
m->flush_in_progress_ = false;
m->edit_.Clear();
num_flush_not_started_++;
pending_outputs.erase(m->file_number_);
m->file_number_ = 0;
imm_flush_needed.Release_Store((void *)1);
s = Status::IOError("Unable to commit flushed memtable");
break;
}
}
commit_in_progress_ = false;
return s;
}
// New memtables are inserted at the front of the list.
void MemTableList::Add(MemTable* m) {
assert(size_ >= num_flush_not_started_);
size_++;
memlist_.push_front(m);
num_flush_not_started_++;
if (num_flush_not_started_ == 1) {
imm_flush_needed.Release_Store((void *)1);
}
}
// Returns an estimate of the number of bytes of data in use.
size_t MemTableList::ApproximateMemoryUsage() {
size_t size = 0;
for (list<MemTable*>::iterator it = memlist_.begin();
it != memlist_.end(); ++it) {
size += (*it)->ApproximateMemoryUsage();
}
return size;
}
// Search all the memtables starting from the most recent one.
// Return the most recent value found, if any.
bool MemTableList::Get(const LookupKey& key, std::string* value, Status* s) {
for (list<MemTable*>::iterator it = memlist_.begin();
it != memlist_.end(); ++it) {
if ((*it)->Get(key, value, s)) {
return true;
}
}
return false;
}
void MemTableList::GetMemTables(std::vector<MemTable*>* output) {
for (list<MemTable*>::iterator it = memlist_.begin();
it != memlist_.end(); ++it) {
output->push_back(*it);
}
}
} // namespace leveldb
// Copyright (c) 2012 Facebook.
#ifndef STORAGE_LEVELDB_DB_MEMTABLELIST_H_
#define STORAGE_LEVELDB_DB_MEMTABLELIST_H_
#include <string>
#include <list>
#include "leveldb/db.h"
#include "db/dbformat.h"
#include "db/skiplist.h"
#include "util/arena.h"
#include "memtable.h"
namespace leveldb {
class InternalKeyComparator;
class Mutex;
class MemTableListIterator;
//
// This class stores refeernces to all the immutable memtables.
// The memtables are flushed to L0 as soon as possible and in
// any order. If there are more than one immutable memtable, their
// flushes can occur concurrently. However, they are 'committed'
// to the manifest in FIFO order to maintain correctness and
// recoverability from a crash.
//
class MemTableList {
public:
// A list of memtables.
MemTableList() : size_(0), num_flush_not_started_(0),
commit_in_progress_(false) {
imm_flush_needed.Release_Store(NULL);
}
~MemTableList() {};
// so that backgrund threads can detect non-NULL pointer to
// determine whether this is anything more to start flushing.
port::AtomicPointer imm_flush_needed;
// Increase reference count on all underling memtables
void RefAll();
// Drop reference count on all underling memtables
void UnrefAll();
// Returns the total number of memtables in the list
int size();
// Returns true if there is at least one memtable on which flush has
// not yet started.
bool IsFlushPending();
// Returns the earliest memtable that needs to be flushed.
// Returns null, if no such memtable exist.
MemTable* PickMemtableToFlush();
// Commit a successful flush in the manifest file
Status InstallMemtableFlushResults(MemTable* m,
VersionSet* vset, Status flushStatus,
port::Mutex* mu, Logger* info_log,
uint64_t file_number,
std::set<uint64_t>& pending_outputs);
// New memtables are inserted at the front of the list.
void Add(MemTable* m);
// Returns an estimate of the number of bytes of data in use.
size_t ApproximateMemoryUsage();
// Search all the memtables starting from the most recent one.
// Return the most recent value found, if any.
bool Get(const LookupKey& key, std::string* value, Status* s);
// Returns the list of underlying memtables.
void GetMemTables(std::vector<MemTable*>* list);
// Copying allowed
// MemTableList(const MemTableList&);
// void operator=(const MemTableList&);
private:
std::list<MemTable*> memlist_;
int size_;
// the number of elements that still need flushing
int num_flush_not_started_;
// committing in progress
bool commit_in_progress_;
};
} // namespace leveldb
#endif // STORAGE_LEVELDB_DB_MEMTABLELIST_H_
......@@ -199,7 +199,7 @@ class Repairer {
std::string scratch;
Slice record;
WriteBatch batch;
MemTable* mem = new MemTable(icmp_);
MemTable* mem = new MemTable(icmp_, options_.num_levels);
mem->Ref();
int counter = 0;
while (reader.ReadRecord(&record, &scratch)) {
......
......@@ -23,6 +23,10 @@
// more lists.
//
// ... prev vs. next pointer ordering ...
//
#ifndef STORAGE_LEVELDB_DB_SKIPLIST_H_
#define STORAGE_LEVELDB_DB_SKIPLIST_H_
#include <assert.h>
#include <stdlib.h>
......@@ -390,3 +394,5 @@ bool SkipList<Key,Comparator>::Contains(const Key& key) const {
}
} // namespace leveldb
#endif // STORAGE_LEVELDB_DB_SKIPLIST_H_
......@@ -21,8 +21,10 @@ struct FileMetaData {
uint64_t file_size; // File size in bytes
InternalKey smallest; // Smallest internal key served by table
InternalKey largest; // Largest internal key served by table
bool being_compacted; // Is this file undergoing compaction?
FileMetaData() : refs(0), allowed_seeks(1 << 30), file_size(0) { }
FileMetaData() : refs(0), allowed_seeks(1 << 30), file_size(0),
being_compacted(false) { }
};
class VersionEdit {
......
此差异已折叠。
......@@ -18,6 +18,7 @@
#include <map>
#include <set>
#include <vector>
#include <deque>
#include "db/dbformat.h"
#include "db/version_edit.h"
#include "port/port.h"
......@@ -106,6 +107,11 @@ class Version {
// Return a human readable string that describes this version's contents.
std::string DebugString() const;
// Returns the version nuber of this version
uint64_t GetVersionNumber() {
return version_number_;
}
private:
friend class Compaction;
friend class VersionSet;
......@@ -128,13 +134,19 @@ class Version {
// Level that should be compacted next and its compaction score.
// Score < 1 means compaction is not strictly needed. These fields
// are initialized by Finalize().
double compaction_score_;
int compaction_level_;
// The most critical level to be compacted is listed first
// These are used to pick the best compaction level
std::vector<double> compaction_score_;
std::vector<int> compaction_level_;
// The offset in the manifest file where this version is stored.
uint64_t offset_manifest_file_;
explicit Version(VersionSet* vset);
// A version number that uniquely represents this version. This is
// used for debugging and logging purposes only.
uint64_t version_number_;
explicit Version(VersionSet* vset, uint64_t version_number = 0);
~Version();
......@@ -229,10 +241,20 @@ class VersionSet {
// The caller should delete the iterator when no longer needed.
Iterator* MakeInputIterator(Compaction* c);
// Returns true iff some level needs a compaction because it has
// exceeded its target size.
bool NeedsSizeCompaction() const {
for (int i = 0; i < NumberLevels()-1; i++) {
if (current_->compaction_score_[i] >= 1) {
return true;
}
}
return false;
}
// Returns true iff some level needs a compaction.
bool NeedsCompaction() const {
Version* v = current_;
return (v->compaction_score_ >= 1) || (v->file_to_compact_ != NULL);
return ((current_->file_to_compact_ != NULL) ||
NeedsSizeCompaction());
}
// Add all files listed in any live version to *live.
......@@ -263,8 +285,22 @@ class VersionSet {
// Return the size of the current manifest file
const uint64_t ManifestFileSize() { return current_->offset_manifest_file_; }
// For the specfied level, pick a compaction.
// Returns NULL if there is no compaction to be done.
Compaction* PickCompactionBySize(int level);
// Free up the files that were participated in a compaction
void ReleaseCompactionFiles(Compaction* c);
// verify that the files that we started with for a compaction
// still exist in the current version and in the same original level.
// This ensures that a concurrent compaction did not erroneously
// pick the same files to compact.
bool VerifyCompactionFileConsistency(Compaction* c);
private:
class Builder;
struct ManifestWriter;
friend class Compaction;
friend class Version;
......@@ -322,9 +358,34 @@ class VersionSet {
// Per-level max bytes
uint64_t* level_max_bytes_;
// record all the ongoing compactions for all levels
std::vector<std::set<Compaction*> > compactions_in_progress_;
// A lock that serialize writes to the manifest
port::Mutex manifest_lock_;
// generates a increasing version number for every new version
uint64_t current_version_number_;
// Queue of writers to the manifest file
std::deque<ManifestWriter*> manifest_writers_;
// No copying allowed
VersionSet(const VersionSet&);
void operator=(const VersionSet&);
// Return the total amount of data that is undergoing
// compactions at this level
uint64_t SizeBeingCompacted(int level);
// Returns true if any one of the parent files are being compacted
bool ParentFilesInCompaction(FileMetaData* f, int level);
// Returns true if any one of the specified files are being compacted
bool FilesInCompaction(std::vector<FileMetaData*>& files);
void LogAndApplyHelper(Builder*b, Version* v,
VersionEdit* edit, port::Mutex* mu);
};
// A Compaction encapsulates information about a compaction.
......@@ -403,6 +464,9 @@ class Compaction {
// higher level than the ones involved in this compaction (i.e. for
// all L >= level_ + 2).
size_t* level_ptrs_;
// mark (or clear) all files that are being compacted
void MarkFilesBeingCompacted(bool);
};
} // namespace leveldb
......
......@@ -139,6 +139,10 @@ class HdfsEnv : public Env {
posixEnv->SetBackgroundThreads(number);
}
virtual std::string TimeToString(uint64_t number) {
return posixEnv->TimeToString(number);
}
static uint64_t gettid() {
assert(sizeof(pthread_t) <= sizeof(uint64_t));
return (uint64_t)pthread_self();
......@@ -273,6 +277,8 @@ class HdfsEnv : public Env {
std::string* outputpath) {return notsup;}
virtual void SetBackgroundThreads(int number) {}
virtual std::string TimeToString(uint64_t number) { return "";}
};
}
......
......@@ -159,6 +159,9 @@ class Env {
// default: 1
virtual void SetBackgroundThreads(int number) = 0;
// Converts seconds-since-Jan-01-1970 to a printable string
virtual std::string TimeToString(uint64_t time) = 0;
private:
// No copying allowed
Env(const Env&);
......@@ -358,6 +361,9 @@ class EnvWrapper : public Env {
void SetBackgroundThreads(int num) {
return target_->SetBackgroundThreads(num);
}
std::string TimeToString(uint64_t time) {
return target_->TimeToString(time);
}
private:
Env* target_;
......
......@@ -78,7 +78,8 @@ struct Options {
// on disk) before converting to a sorted on-disk file.
//
// Larger values increase performance, especially during bulk loads.
// Up to two write buffers may be held in memory at the same time,
// Up to max_write_buffer_number write buffers may be held in memory
// at the same time,
// so you may wish to adjust this parameter to control memory usage.
// Also, a larger write buffer will result in a longer recovery time
// the next time the database is opened.
......@@ -86,6 +87,12 @@ struct Options {
// Default: 4MB
size_t write_buffer_size;
// The maximum number of write buffers that are built up in memory.
// The default is 2, so that when 1 write buffer is being flushed to
// storage, new writes can continue to the other write buffer.
// Default: 2
int max_write_buffer_number;
// Number of open files that can be used by the DB. You may need to
// increase this if your database has a large working set (budget
// one open file per 2MB of working set).
......@@ -244,6 +251,10 @@ struct Options {
// value is 0 which means that obsolete files get removed after
// every compaction run.
uint64_t delete_obsolete_files_period_micros;
// Maximum number of concurrent background compactions.
// Default: 1
int max_background_compactions;
// Create an Options object with default values for all fields.
Options();
......
......@@ -46,6 +46,16 @@ static bool FLAGS_verbose = false;
// (initialized to default value by "main")
static int FLAGS_write_buffer_size = 0;
// The number of in-memory memtables.
// Each memtable is of size FLAGS_write_buffer_size.
// This is initialized to default value of 2 in "main" function.
static int FLAGS_max_write_buffer_number = 0;
// The maximum number of concurrent background compactions
// that can occur in parallel.
// This is initialized to default value of 1 in "main" function.
static int FLAGS_max_background_compactions = 0;
// Number of bytes to use as a cache of uncompressed data.
static long FLAGS_cache_size = 2 * KB * KB * KB;
......@@ -104,6 +114,11 @@ static int FLAGS_readwritepercent = 10;
// Option to disable compation triggered by read.
static int FLAGS_disable_seek_compaction = false;
// Option to delete obsolete files periodically
// Default: 0 which means that obsolete files are
// deleted after every compaction run.
static uint64_t FLAGS_delete_obsolete_files_period_micros = 0;
// Algorithm to use to compress the database
static enum leveldb::CompressionType FLAGS_compression_type =
leveldb::kSnappyCompression;
......@@ -626,6 +641,8 @@ class StressTest {
Options options;
options.block_cache = cache_;
options.write_buffer_size = FLAGS_write_buffer_size;
options.max_write_buffer_number = FLAGS_max_write_buffer_number;
options.max_background_compactions = FLAGS_max_background_compactions;
options.block_size = FLAGS_block_size;
options.filter_policy = filter_policy_;
options.max_open_files = FLAGS_open_files;
......@@ -644,6 +661,8 @@ class StressTest {
options.compression = FLAGS_compression_type;
options.create_if_missing = true;
options.disable_seek_compaction = FLAGS_disable_seek_compaction;
options.delete_obsolete_files_period_micros =
FLAGS_delete_obsolete_files_period_micros;
Status s = DB::Open(options, FLAGS_db, &db_);
if (!s.ok()) {
fprintf(stderr, "open error: %s\n", s.ToString().c_str());
......@@ -670,7 +689,10 @@ class StressTest {
int main(int argc, char** argv) {
FLAGS_write_buffer_size = leveldb::Options().write_buffer_size;
FLAGS_max_write_buffer_number = leveldb::Options().max_write_buffer_number;
FLAGS_open_files = leveldb::Options().max_open_files;
FLAGS_max_background_compactions =
leveldb::Options().max_background_compactions;
// Compression test code above refers to FLAGS_block_size
FLAGS_block_size = leveldb::Options().block_size;
std::string default_db_path;
......@@ -706,6 +728,10 @@ int main(int argc, char** argv) {
FLAGS_value_size_mult = n;
} else if (sscanf(argv[i], "--write_buffer_size=%d%c", &n, &junk) == 1) {
FLAGS_write_buffer_size = n;
} else if (sscanf(argv[i], "--max_write_buffer_number=%d%c", &n, &junk) == 1) {
FLAGS_max_write_buffer_number = n;
} else if (sscanf(argv[i], "--max_background_compactions=%d%c", &n, &junk) == 1) {
FLAGS_max_background_compactions = n;
} else if (sscanf(argv[i], "--cache_size=%ld%c", &l, &junk) == 1) {
FLAGS_cache_size = l;
} else if (sscanf(argv[i], "--block_size=%d%c", &n, &junk) == 1) {
......@@ -784,6 +810,9 @@ int main(int argc, char** argv) {
} else if (sscanf(argv[i], "--disable_seek_compaction=%d%c", &n, &junk) == 1
&& (n == 0 || n == 1)) {
FLAGS_disable_seek_compaction = n;
} else if (sscanf(argv[i], "--delete_obsolete_files_period_micros=%ld%c",
&l, &junk) == 1) {
FLAGS_delete_obsolete_files_period_micros = n;
} else {
fprintf(stderr, "Invalid flag '%s'\n", argv[i]);
exit(1);
......
......@@ -749,6 +749,26 @@ class PosixEnv : public Env {
}
}
virtual std::string TimeToString(uint64_t secondsSince1970) {
const time_t seconds = (time_t)secondsSince1970;
struct tm t;
int maxsize = 64;
std::string dummy;
dummy.reserve(maxsize);
dummy.resize(maxsize);
char* p = &dummy[0];
localtime_r(&seconds, &t);
snprintf(p, maxsize,
"%04d/%02d/%02d-%02d:%02d:%02d ",
t.tm_year + 1900,
t.tm_mon + 1,
t.tm_mday,
t.tm_hour,
t.tm_min,
t.tm_sec);
return dummy;
}
private:
void PthreadCall(const char* label, int result) {
if (result != 0) {
......
......@@ -19,6 +19,7 @@ Options::Options()
env(Env::Default()),
info_log(NULL),
write_buffer_size(4<<20),
max_write_buffer_number(2),
max_open_files(1000),
block_cache(NULL),
block_size(4096),
......@@ -42,32 +43,34 @@ Options::Options()
db_stats_log_interval(1800),
db_log_dir(""),
disable_seek_compaction(false),
delete_obsolete_files_period_micros(0) {
delete_obsolete_files_period_micros(0),
max_background_compactions(1) {
}
void
Options::Dump(
Logger * log) const
{
Log(log," Options.comparator: %s", comparator->Name());
Log(log," Options.create_if_missing: %d", create_if_missing);
Log(log," Options.error_if_exists: %d", error_if_exists);
Log(log," Options.paranoid_checks: %d", paranoid_checks);
Log(log," Options.env: %p", env);
Log(log," Options.info_log: %p", info_log);
Log(log," Options.write_buffer_size: %zd", write_buffer_size);
Log(log," Options.max_open_files: %d", max_open_files);
Log(log," Options.block_cache: %p", block_cache);
Log(log," Options.block_cache_size: %zd", block_cache->GetCapacity());
Log(log," Options.block_size: %zd", block_size);
Log(log,"Options.block_restart_interval: %d", block_restart_interval);
Log(log," Options.compression: %d", compression);
Log(log," Options.filter_policy: %s",
Log(log," Options.comparator: %s", comparator->Name());
Log(log," Options.create_if_missing: %d", create_if_missing);
Log(log," Options.error_if_exists: %d", error_if_exists);
Log(log," Options.paranoid_checks: %d", paranoid_checks);
Log(log," Options.env: %p", env);
Log(log," Options.info_log: %p", info_log);
Log(log," Options.write_buffer_size: %zd", write_buffer_size);
Log(log," Options.max_write_buffer_number: %zd", max_write_buffer_number);
Log(log," Options.max_open_files: %d", max_open_files);
Log(log," Options.block_cache: %p", block_cache);
Log(log," Options.block_cache_size: %zd", block_cache->GetCapacity());
Log(log," Options.block_size: %zd", block_size);
Log(log," Options.block_restart_interval: %d", block_restart_interval);
Log(log," Options.compression: %d", compression);
Log(log," Options.filter_policy: %s",
filter_policy == NULL ? "NULL" : filter_policy->Name());
Log(log," Options.num_levels: %d", num_levels);
Log(log," Options.disableDataSync: %d", disableDataSync);
Log(log," Options.use_fsync: %d", use_fsync);
Log(log," Options.db_stats_log_interval: %d",
Log(log," Options.num_levels: %d", num_levels);
Log(log," Options.disableDataSync: %d", disableDataSync);
Log(log," Options.use_fsync: %d", use_fsync);
Log(log," Options.db_stats_log_interval: %d",
db_stats_log_interval);
Log(log," Options.level0_file_num_compaction_trigger: %d",
level0_file_num_compaction_trigger);
......@@ -89,10 +92,12 @@ Options::Dump(
expanded_compaction_factor);
Log(log," Options.max_grandparent_overlap_factor: %d",
max_grandparent_overlap_factor);
Log(log," Options.db_log_dir: %s",
Log(log," Options.db_log_dir: %s",
db_log_dir.c_str());
Log(log," Options.disable_seek_compaction: %d",
Log(log," Options.disable_seek_compaction: %d",
disable_seek_compaction);
Log(log," Options.max_background_compactions: %d",
max_background_compactions);
} // Options::Dump
......
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册