提交 9013f13b 编写于 作者: S Sanjay Ghemawat

use mmap on 64-bit machines to speed-up reads; small build fixes

上级 583f1499
...@@ -124,65 +124,65 @@ db_bench: db/db_bench.o $(LIBOBJECTS) $(TESTUTIL) ...@@ -124,65 +124,65 @@ db_bench: db/db_bench.o $(LIBOBJECTS) $(TESTUTIL)
$(CXX) $(LDFLAGS) db/db_bench.o $(LIBOBJECTS) $(TESTUTIL) -o $@ $(CXX) $(LDFLAGS) db/db_bench.o $(LIBOBJECTS) $(TESTUTIL) -o $@
db_bench_sqlite3: doc/bench/db_bench_sqlite3.o $(LIBOBJECTS) $(TESTUTIL) db_bench_sqlite3: doc/bench/db_bench_sqlite3.o $(LIBOBJECTS) $(TESTUTIL)
$(CXX) $(LDFLAGS) -lsqlite3 doc/bench/db_bench_sqlite3.o $(LIBOBJECTS) $(TESTUTIL) -o $@ $(CXX) -lsqlite3 doc/bench/db_bench_sqlite3.o $(LIBOBJECTS) $(TESTUTIL) -o $@ $(LDFLAGS
db_bench_tree_db: doc/bench/db_bench_tree_db.o $(LIBOBJECTS) $(TESTUTIL) db_bench_tree_db: doc/bench/db_bench_tree_db.o $(LIBOBJECTS) $(TESTUTIL)
$(CXX) $(LDFLAGS) -lkyotocabinet doc/bench/db_bench_tree_db.o $(LIBOBJECTS) $(TESTUTIL) -o $@ $(CXX) $(LDFLAGS) -lkyotocabinet doc/bench/db_bench_tree_db.o $(LIBOBJECTS) $(TESTUTIL) -o $@
arena_test: util/arena_test.o $(LIBOBJECTS) $(TESTHARNESS) arena_test: util/arena_test.o $(LIBOBJECTS) $(TESTHARNESS)
$(CXX) $(LDFLAGS) util/arena_test.o $(LIBOBJECTS) $(TESTHARNESS) -o $@ $(CXX) util/arena_test.o $(LIBOBJECTS) $(TESTHARNESS) -o $@ $(LDFLAGS)
c_test: db/c_test.o $(LIBOBJECTS) $(TESTHARNESS) c_test: db/c_test.o $(LIBOBJECTS) $(TESTHARNESS)
$(CXX) $(LDFLAGS) db/c_test.o $(LIBOBJECTS) $(TESTHARNESS) -o $@ $(CXX) db/c_test.o $(LIBOBJECTS) $(TESTHARNESS) -o $@ $(LDFLAGS)
cache_test: util/cache_test.o $(LIBOBJECTS) $(TESTHARNESS) cache_test: util/cache_test.o $(LIBOBJECTS) $(TESTHARNESS)
$(CXX) $(LDFLAGS) util/cache_test.o $(LIBOBJECTS) $(TESTHARNESS) -o $@ $(CXX) util/cache_test.o $(LIBOBJECTS) $(TESTHARNESS) -o $@ $(LDFLAGS)
coding_test: util/coding_test.o $(LIBOBJECTS) $(TESTHARNESS) coding_test: util/coding_test.o $(LIBOBJECTS) $(TESTHARNESS)
$(CXX) $(LDFLAGS) util/coding_test.o $(LIBOBJECTS) $(TESTHARNESS) -o $@ $(CXX) util/coding_test.o $(LIBOBJECTS) $(TESTHARNESS) -o $@ $(LDFLAGS)
corruption_test: db/corruption_test.o $(LIBOBJECTS) $(TESTHARNESS) corruption_test: db/corruption_test.o $(LIBOBJECTS) $(TESTHARNESS)
$(CXX) $(LDFLAGS) db/corruption_test.o $(LIBOBJECTS) $(TESTHARNESS) -o $@ $(CXX) db/corruption_test.o $(LIBOBJECTS) $(TESTHARNESS) -o $@ $(LDFLAGS)
crc32c_test: util/crc32c_test.o $(LIBOBJECTS) $(TESTHARNESS) crc32c_test: util/crc32c_test.o $(LIBOBJECTS) $(TESTHARNESS)
$(CXX) $(LDFLAGS) util/crc32c_test.o $(LIBOBJECTS) $(TESTHARNESS) -o $@ $(CXX) util/crc32c_test.o $(LIBOBJECTS) $(TESTHARNESS) -o $@ $(LDFLAGS)
db_test: db/db_test.o $(LIBOBJECTS) $(TESTHARNESS) db_test: db/db_test.o $(LIBOBJECTS) $(TESTHARNESS)
$(CXX) $(LDFLAGS) db/db_test.o $(LIBOBJECTS) $(TESTHARNESS) -o $@ $(CXX) db/db_test.o $(LIBOBJECTS) $(TESTHARNESS) -o $@ $(LDFLAGS)
dbformat_test: db/dbformat_test.o $(LIBOBJECTS) $(TESTHARNESS) dbformat_test: db/dbformat_test.o $(LIBOBJECTS) $(TESTHARNESS)
$(CXX) $(LDFLAGS) db/dbformat_test.o $(LIBOBJECTS) $(TESTHARNESS) -o $@ $(CXX) db/dbformat_test.o $(LIBOBJECTS) $(TESTHARNESS) -o $@ $(LDFLAGS)
env_test: util/env_test.o $(LIBOBJECTS) $(TESTHARNESS) env_test: util/env_test.o $(LIBOBJECTS) $(TESTHARNESS)
$(CXX) $(LDFLAGS) util/env_test.o $(LIBOBJECTS) $(TESTHARNESS) -o $@ $(CXX) util/env_test.o $(LIBOBJECTS) $(TESTHARNESS) -o $@ $(LDFLAGS)
filename_test: db/filename_test.o $(LIBOBJECTS) $(TESTHARNESS) filename_test: db/filename_test.o $(LIBOBJECTS) $(TESTHARNESS)
$(CXX) $(LDFLAGS) db/filename_test.o $(LIBOBJECTS) $(TESTHARNESS) -o $@ $(CXX) db/filename_test.o $(LIBOBJECTS) $(TESTHARNESS) -o $@ $(LDFLAGS)
log_test: db/log_test.o $(LIBOBJECTS) $(TESTHARNESS) log_test: db/log_test.o $(LIBOBJECTS) $(TESTHARNESS)
$(CXX) $(LDFLAGS) db/log_test.o $(LIBOBJECTS) $(TESTHARNESS) -o $@ $(CXX) db/log_test.o $(LIBOBJECTS) $(TESTHARNESS) -o $@ $(LDFLAGS)
table_test: table/table_test.o $(LIBOBJECTS) $(TESTHARNESS) table_test: table/table_test.o $(LIBOBJECTS) $(TESTHARNESS)
$(CXX) $(LDFLAGS) table/table_test.o $(LIBOBJECTS) $(TESTHARNESS) -o $@ $(CXX) table/table_test.o $(LIBOBJECTS) $(TESTHARNESS) -o $@ $(LDFLAGS)
skiplist_test: db/skiplist_test.o $(LIBOBJECTS) $(TESTHARNESS) skiplist_test: db/skiplist_test.o $(LIBOBJECTS) $(TESTHARNESS)
$(CXX) $(LDFLAGS) db/skiplist_test.o $(LIBOBJECTS) $(TESTHARNESS) -o $@ $(CXX) db/skiplist_test.o $(LIBOBJECTS) $(TESTHARNESS) -o $@ $(LDFLAGS)
version_edit_test: db/version_edit_test.o $(LIBOBJECTS) $(TESTHARNESS) version_edit_test: db/version_edit_test.o $(LIBOBJECTS) $(TESTHARNESS)
$(CXX) $(LDFLAGS) db/version_edit_test.o $(LIBOBJECTS) $(TESTHARNESS) -o $@ $(CXX) db/version_edit_test.o $(LIBOBJECTS) $(TESTHARNESS) -o $@ $(LDFLAGS)
version_set_test: db/version_set_test.o $(LIBOBJECTS) $(TESTHARNESS) version_set_test: db/version_set_test.o $(LIBOBJECTS) $(TESTHARNESS)
$(CXX) $(LDFLAGS) db/version_set_test.o $(LIBOBJECTS) $(TESTHARNESS) -o $@ $(CXX) db/version_set_test.o $(LIBOBJECTS) $(TESTHARNESS) -o $@ $(LDFLAGS)
write_batch_test: db/write_batch_test.o $(LIBOBJECTS) $(TESTHARNESS) write_batch_test: db/write_batch_test.o $(LIBOBJECTS) $(TESTHARNESS)
$(CXX) $(LDFLAGS) db/write_batch_test.o $(LIBOBJECTS) $(TESTHARNESS) -o $@ $(CXX) db/write_batch_test.o $(LIBOBJECTS) $(TESTHARNESS) -o $@ $(LDFLAGS)
$(MEMENVLIBRARY) : helpers/memenv/memenv.o $(MEMENVLIBRARY) : helpers/memenv/memenv.o
rm -f $@ rm -f $@
$(AR) -rs $@ helpers/memenv/memenv.o $(AR) -rs $@ helpers/memenv/memenv.o
memenv_test : helpers/memenv/memenv_test.o $(MEMENVLIBRARY) $(LIBRARY) $(TESTHARNESS) memenv_test : helpers/memenv/memenv_test.o $(MEMENVLIBRARY) $(LIBRARY) $(TESTHARNESS)
$(CXX) $(LDFLAGS) helpers/memenv/memenv_test.o $(MEMENVLIBRARY) $(LIBRARY) $(TESTHARNESS) -o $@ $(CXX) helpers/memenv/memenv_test.o $(MEMENVLIBRARY) $(LIBRARY) $(TESTHARNESS) -o $@ $(LDFLAGS)
ifeq ($(PLATFORM), IOS) ifeq ($(PLATFORM), IOS)
# For iOS, create universal object files to be used on both the simulator and # For iOS, create universal object files to be used on both the simulator and
......
...@@ -19,9 +19,10 @@ inline uint32_t Block::NumRestarts() const { ...@@ -19,9 +19,10 @@ inline uint32_t Block::NumRestarts() const {
return DecodeFixed32(data_ + size_ - sizeof(uint32_t)); return DecodeFixed32(data_ + size_ - sizeof(uint32_t));
} }
Block::Block(const char* data, size_t size) Block::Block(const char* data, size_t size, bool take_ownership)
: data_(data), : data_(data),
size_(size) { size_(size),
owned_(take_ownership) {
if (size_ < sizeof(uint32_t)) { if (size_ < sizeof(uint32_t)) {
size_ = 0; // Error marker size_ = 0; // Error marker
} else { } else {
...@@ -35,7 +36,9 @@ Block::Block(const char* data, size_t size) ...@@ -35,7 +36,9 @@ Block::Block(const char* data, size_t size)
} }
Block::~Block() { Block::~Block() {
delete[] data_; if (owned_) {
delete[] data_;
}
} }
// Helper routine: decode the next block entry starting at "p", // Helper routine: decode the next block entry starting at "p",
......
...@@ -16,8 +16,9 @@ class Comparator; ...@@ -16,8 +16,9 @@ class Comparator;
class Block { class Block {
public: public:
// Initialize the block with the specified contents. // Initialize the block with the specified contents.
// Takes ownership of data[] and will delete[] it when done. // Takes ownership of data[] and will delete[] it when done iff
Block(const char* data, size_t size); // "take_ownership is true.
Block(const char* data, size_t size, bool take_ownership);
~Block(); ~Block();
...@@ -30,6 +31,7 @@ class Block { ...@@ -30,6 +31,7 @@ class Block {
const char* data_; const char* data_;
size_t size_; size_t size_;
uint32_t restart_offset_; // Offset in data_ of restart array uint32_t restart_offset_; // Offset in data_ of restart array
bool owned_; // Block owns data_[]
// No copying allowed // No copying allowed
Block(const Block&); Block(const Block&);
......
...@@ -66,8 +66,10 @@ Status Footer::DecodeFrom(Slice* input) { ...@@ -66,8 +66,10 @@ Status Footer::DecodeFrom(Slice* input) {
Status ReadBlock(RandomAccessFile* file, Status ReadBlock(RandomAccessFile* file,
const ReadOptions& options, const ReadOptions& options,
const BlockHandle& handle, const BlockHandle& handle,
Block** block) { Block** block,
bool* may_cache) {
*block = NULL; *block = NULL;
*may_cache = false;
// Read the block contents as well as the type/crc footer. // Read the block contents as well as the type/crc footer.
// See table_builder.cc for the code that built this structure. // See table_builder.cc for the code that built this structure.
...@@ -100,8 +102,14 @@ Status ReadBlock(RandomAccessFile* file, ...@@ -100,8 +102,14 @@ Status ReadBlock(RandomAccessFile* file,
case kNoCompression: case kNoCompression:
if (data != buf) { if (data != buf) {
// File implementation gave us pointer to some other data. // File implementation gave us pointer to some other data.
// Copy into buf[]. // Use it directly under the assumption that it will be live
memcpy(buf, data, n + kBlockTrailerSize); // while the file is open.
delete[] buf;
*block = new Block(data, n, false /* do not take ownership */);
*may_cache = false; // Do not double-cache
} else {
*block = new Block(buf, n, true /* take ownership */);
*may_cache = true;
} }
// Ok // Ok
...@@ -119,8 +127,8 @@ Status ReadBlock(RandomAccessFile* file, ...@@ -119,8 +127,8 @@ Status ReadBlock(RandomAccessFile* file,
return Status::Corruption("corrupted compressed block contents"); return Status::Corruption("corrupted compressed block contents");
} }
delete[] buf; delete[] buf;
buf = ubuf; *block = new Block(ubuf, ulength, true /* take ownership */);
n = ulength; *may_cache = true;
break; break;
} }
default: default:
...@@ -128,7 +136,6 @@ Status ReadBlock(RandomAccessFile* file, ...@@ -128,7 +136,6 @@ Status ReadBlock(RandomAccessFile* file,
return Status::Corruption("bad block type"); return Status::Corruption("bad block type");
} }
*block = new Block(buf, n); // Block takes ownership of buf[]
return Status::OK(); return Status::OK();
} }
......
...@@ -86,10 +86,13 @@ static const size_t kBlockTrailerSize = 5; ...@@ -86,10 +86,13 @@ static const size_t kBlockTrailerSize = 5;
// Read the block identified by "handle" from "file". On success, // Read the block identified by "handle" from "file". On success,
// store a pointer to the heap-allocated result in *block and return // store a pointer to the heap-allocated result in *block and return
// OK. On failure store NULL in *block and return non-OK. // OK. On failure store NULL in *block and return non-OK.
// On success, stores true in *may_cache if the result may be
// cached, false if it must not be cached.
extern Status ReadBlock(RandomAccessFile* file, extern Status ReadBlock(RandomAccessFile* file,
const ReadOptions& options, const ReadOptions& options,
const BlockHandle& handle, const BlockHandle& handle,
Block** block); Block** block,
bool* may_cache);
// Implementation details follow. Clients should ignore, // Implementation details follow. Clients should ignore,
......
...@@ -49,7 +49,9 @@ Status Table::Open(const Options& options, ...@@ -49,7 +49,9 @@ Status Table::Open(const Options& options,
// Read the index block // Read the index block
Block* index_block = NULL; Block* index_block = NULL;
if (s.ok()) { if (s.ok()) {
s = ReadBlock(file, ReadOptions(), footer.index_handle(), &index_block); bool may_cache; // Ignored result
s = ReadBlock(file, ReadOptions(), footer.index_handle(), &index_block,
&may_cache);
} }
if (s.ok()) { if (s.ok()) {
...@@ -105,6 +107,7 @@ Iterator* Table::BlockReader(void* arg, ...@@ -105,6 +107,7 @@ Iterator* Table::BlockReader(void* arg,
// can add more features in the future. // can add more features in the future.
if (s.ok()) { if (s.ok()) {
bool may_cache;
if (block_cache != NULL) { if (block_cache != NULL) {
char cache_key_buffer[16]; char cache_key_buffer[16];
EncodeFixed64(cache_key_buffer, table->rep_->cache_id); EncodeFixed64(cache_key_buffer, table->rep_->cache_id);
...@@ -114,14 +117,14 @@ Iterator* Table::BlockReader(void* arg, ...@@ -114,14 +117,14 @@ Iterator* Table::BlockReader(void* arg,
if (cache_handle != NULL) { if (cache_handle != NULL) {
block = reinterpret_cast<Block*>(block_cache->Value(cache_handle)); block = reinterpret_cast<Block*>(block_cache->Value(cache_handle));
} else { } else {
s = ReadBlock(table->rep_->file, options, handle, &block); s = ReadBlock(table->rep_->file, options, handle, &block, &may_cache);
if (s.ok() && options.fill_cache) { if (s.ok() && may_cache && options.fill_cache) {
cache_handle = block_cache->Insert( cache_handle = block_cache->Insert(
key, block, block->size(), &DeleteCachedBlock); key, block, block->size(), &DeleteCachedBlock);
} }
} }
} else { } else {
s = ReadBlock(table->rep_->file, options, handle, &block); s = ReadBlock(table->rep_->file, options, handle, &block, &may_cache);
} }
} }
......
...@@ -205,7 +205,7 @@ class BlockConstructor: public Constructor { ...@@ -205,7 +205,7 @@ class BlockConstructor: public Constructor {
block_size_ = block_data.size(); block_size_ = block_data.size();
char* block_data_copy = new char[block_size_]; char* block_data_copy = new char[block_size_];
memcpy(block_data_copy, block_data.data(), block_size_); memcpy(block_data_copy, block_data.data(), block_size_);
block_ = new Block(block_data_copy, block_size_); block_ = new Block(block_data_copy, block_size_, true /* take ownership */);
return Status::OK(); return Status::OK();
} }
virtual size_t NumBytes() const { return block_size_; } virtual size_t NumBytes() const { return block_size_; }
......
...@@ -66,6 +66,7 @@ class PosixSequentialFile: public SequentialFile { ...@@ -66,6 +66,7 @@ class PosixSequentialFile: public SequentialFile {
} }
}; };
// pread() based random-access
class PosixRandomAccessFile: public RandomAccessFile { class PosixRandomAccessFile: public RandomAccessFile {
private: private:
std::string filename_; std::string filename_;
...@@ -89,6 +90,32 @@ class PosixRandomAccessFile: public RandomAccessFile { ...@@ -89,6 +90,32 @@ class PosixRandomAccessFile: public RandomAccessFile {
} }
}; };
// mmap() based random-access
class PosixMmapReadableFile: public RandomAccessFile {
private:
std::string filename_;
void* mmapped_region_;
size_t length_;
public:
// base[0,length-1] contains the mmapped contents of the file.
PosixMmapReadableFile(const std::string& fname, void* base, size_t length)
: filename_(fname), mmapped_region_(base), length_(length) { }
virtual ~PosixMmapReadableFile() { munmap(mmapped_region_, length_); }
virtual Status Read(uint64_t offset, size_t n, Slice* result,
char* scratch) const {
Status s;
if (offset + n > length_) {
*result = Slice();
s = IOError(filename_, EINVAL);
} else {
*result = Slice(reinterpret_cast<char*>(mmapped_region_) + offset, n);
}
return s;
}
};
// We preallocate up to an extra megabyte and use memcpy to append new // We preallocate up to an extra megabyte and use memcpy to append new
// data to the file. This is safe since we either properly close the // data to the file. This is safe since we either properly close the
// file before reading from it, or for log files, the reading code // file before reading from it, or for log files, the reading code
...@@ -297,13 +324,28 @@ class PosixEnv : public Env { ...@@ -297,13 +324,28 @@ class PosixEnv : public Env {
virtual Status NewRandomAccessFile(const std::string& fname, virtual Status NewRandomAccessFile(const std::string& fname,
RandomAccessFile** result) { RandomAccessFile** result) {
*result = NULL;
Status s;
int fd = open(fname.c_str(), O_RDONLY); int fd = open(fname.c_str(), O_RDONLY);
if (fd < 0) { if (fd < 0) {
*result = NULL; s = IOError(fname, errno);
return IOError(fname, errno); } else if (sizeof(void*) >= 8) {
// Use mmap when virtual address-space is plentiful.
uint64_t size;
s = GetFileSize(fname, &size);
if (s.ok()) {
void* base = mmap(NULL, size, PROT_READ, MAP_SHARED, fd, 0);
if (base != MAP_FAILED) {
*result = new PosixMmapReadableFile(fname, base, size);
} else {
s = IOError(fname, errno);
}
}
close(fd);
} else {
*result = new PosixRandomAccessFile(fname, fd);
} }
*result = new PosixRandomAccessFile(fname, fd); return s;
return Status::OK();
} }
virtual Status NewWritableFile(const std::string& fname, virtual Status NewWritableFile(const std::string& fname,
......
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册