Merge branch 'master' of https://github.com/facebook/rocksdb

5a1b41ce · Ankit Gupta · e87973cd · 1e560459 · 5a1b41ce · 5a1b41ce
40 changed file
--- a/.gitignore
+++ b/.gitignore
@@ -19,6 +19,7 @@ build_config.mk
 *.*jnilib*
 *.d-e
 *.o-*
+*.swp

 ldb
 manifest_dump

--- a/.travis.yml
+++ b/.travis.yml
+language: cpp
+compiler: gcc
+before_install:
+# As of this writing (10 May 2014) the Travis build environment is Ubuntu 12.04,
+# which needs the following ugly dependency incantations to build RocksDB:
+ - sudo add-apt-repository -y ppa:ubuntu-toolchain-r/test
+ - sudo apt-get update -qq
+ - sudo apt-get install -y -qq gcc-4.8 g++-4.8 zlib1g-dev libbz2-dev libsnappy-dev libjemalloc-dev
+ - sudo update-alternatives --install /usr/bin/gcc gcc /usr/bin/gcc-4.8 50
+ - sudo update-alternatives --install /usr/bin/g++ g++ /usr/bin/g++-4.8 50
+ - wget https://gflags.googlecode.com/files/libgflags0_2.0-1_amd64.deb
+ - sudo dpkg -i libgflags0_2.0-1_amd64.deb
+ - wget https://gflags.googlecode.com/files/libgflags-dev_2.0-1_amd64.deb
+ - sudo dpkg -i libgflags-dev_2.0-1_amd64.deb
+# Lousy hack to disable use and testing of fallocate, which doesn't behave quite
+# as EnvPosixTest::AllocateTest expects within the Travis OpenVZ environment.
+ - sed -i "s/fallocate(/HACK_NO_fallocate(/" build_tools/build_detect_platform
+script: make check -j8
+notifications:
+    email: false
--- a/HISTORY.md
+++ b/HISTORY.md
@@ -5,6 +5,9 @@
 ### Public API changes
 * Replaced ColumnFamilyOptions::table_properties_collectors with ColumnFamilyOptions::table_properties_collector_factories

+### New Features
+* Hash index for block-based table will be materialized and reconstructed more efficiently. Previously hash index is constructed by scanning the whole table during every table open.
+
 ## 3.0.0 (05/05/2014)

 ### Public API changes

--- a/README.md
+++ b/README.md
 ## RocksDB: A Persistent Key-Value Store for Flash and RAM Storage

+[![Build Status](https://travis-ci.org/facebook/rocksdb.svg?branch=master)](https://travis-ci.org/facebook/rocksdb)
+
 RocksDB is developed and maintained by Facebook Database Engineering Team.
 It is built on on earlier work on LevelDB by Sanjay Ghemawat (sanjay@google.com)
 and Jeff Dean (jeff@google.com)

--- a/db/compaction.cc
+++ b/db/compaction.cc
@@ -8,7 +8,13 @@
 // found in the LICENSE file. See the AUTHORS file for names of contributors.

 #include "db/compaction.h"
+
+#define __STDC_FORMAT_MACROS
+#include <inttypes.h>
+#include <vector>
+
 #include "db/column_family.h"
+#include "util/logging.h"

 namespace rocksdb {

@@ -191,71 +197,51 @@ void Compaction::ResetNextCompactionIndex() {
  input_version_->ResetNextCompactionIndex(level_);
 }

-/*
-for sizes >=10TB, print "XXTB"
-for sizes >=10GB, print "XXGB"
-etc.
-*/
-static void FileSizeSummary(unsigned long long sz, char* output, int len) {
-  const unsigned long long ull10 = 10;
-  if (sz >= ull10<<40) {
-    snprintf(output, len, "%lluTB", sz>>40);
-  } else if (sz >= ull10<<30) {
-    snprintf(output, len, "%lluGB", sz>>30);
-  } else if (sz >= ull10<<20) {
-    snprintf(output, len, "%lluMB", sz>>20);
-  } else if (sz >= ull10<<10) {
-    snprintf(output, len, "%lluKB", sz>>10);
-  } else {
-    snprintf(output, len, "%lluB", sz);
-  }
-}
-
-static int InputSummary(std::vector<FileMetaData*>& files, char* output,
-                         int len) {
+namespace {
+int InputSummary(const std::vector<FileMetaData*>& files, char* output,
+                 int len) {
  *output = '\0';
  int write = 0;
  for (unsigned int i = 0; i < files.size(); i++) {
    int sz = len - write;
    int ret;
    char sztxt[16];
-    FileSizeSummary((unsigned long long)files.at(i)->file_size, sztxt, 16);
-    ret = snprintf(output + write, sz, "%lu(%s) ",
-                   (unsigned long)files.at(i)->number,
+    AppendHumanBytes(files.at(i)->file_size, sztxt, 16);
+    ret = snprintf(output + write, sz, "%" PRIu64 "(%s) ", files.at(i)->number,
                   sztxt);
-    if (ret < 0 || ret >= sz)
-      break;
+    if (ret < 0 || ret >= sz) break;
    write += ret;
  }
-  return write;
+  // if files.size() is non-zero, overwrite the last space
+  return write - !!files.size();
 }
+}  // namespace

 void Compaction::Summary(char* output, int len) {
-  int write = snprintf(output, len,
-      "Base version %lu Base level %d, seek compaction:%d, inputs: [",
-      (unsigned long)input_version_->GetVersionNumber(),
-      level_,
-      seek_compaction_);
+  int write =
+      snprintf(output, len, "Base version %" PRIu64
+                            " Base level %d, seek compaction:%d, inputs: [",
+               input_version_->GetVersionNumber(), level_, seek_compaction_);
  if (write < 0 || write >= len) {
    return;
  }

-  write += InputSummary(inputs_[0], output+write, len-write);
+  write += InputSummary(inputs_[0], output + write, len - write);
  if (write < 0 || write >= len) {
    return;
  }

-  write += snprintf(output+write, len-write, "],[");
+  write += snprintf(output + write, len - write, "], [");
  if (write < 0 || write >= len) {
    return;
  }

-  write += InputSummary(inputs_[1], output+write, len-write);
+  write += InputSummary(inputs_[1], output + write, len - write);
  if (write < 0 || write >= len) {
    return;
  }

-  snprintf(output+write, len-write, "]");
+  snprintf(output + write, len - write, "]");
 }

 }  // namespace rocksdb
--- a/db/db_bench.cc
+++ b/db/db_bench.cc
@@ -159,14 +159,7 @@ DEFINE_int32(duration, 0, "Time in seconds for the random-ops tests to run."
 DEFINE_int32(value_size, 100, "Size of each value");


-// the maximum size of key in bytes
-static const int kMaxKeySize = 128;
 static bool ValidateKeySize(const char* flagname, int32_t value) {
-  if (value > kMaxKeySize) {
-    fprintf(stderr, "Invalid value for --%s: %d, must be < %d\n",
-            flagname, value, kMaxKeySize);
-    return false;
-  }
  return true;
 }


--- a/db/db_test.cc
+++ b/db/db_test.cc
@@ -2365,9 +2365,9 @@ TEST(DBTest, NumImmutableMemTable) {
    ASSERT_EQ(num, "0");
    ASSERT_TRUE(dbfull()->GetProperty(
        handles_[1], "rocksdb.cur-size-active-mem-table", &num));
-    // "208" is the size of the metadata of an empty skiplist, this would
+    // "200" is the size of the metadata of an empty skiplist, this would
    // break if we change the default skiplist implementation
-    ASSERT_EQ(num, "208");
+    ASSERT_EQ(num, "200");
    SetPerfLevel(kDisable);
  } while (ChangeCompactOptions());
 }

--- a/db/version_set.cc
+++ b/db/version_set.cc
@@ -7,9 +7,9 @@
 // Use of this source code is governed by a BSD-style license that can be
 // found in the LICENSE file. See the AUTHORS file for names of contributors.

-#define __STDC_FORMAT_MACROS
 #include "db/version_set.h"

+#define __STDC_FORMAT_MACROS
 #include <inttypes.h>
 #include <algorithm>
 #include <map>
@@ -1151,6 +1151,10 @@ const char* Version::LevelSummary(LevelSummaryStorage* scratch) const {
    if (ret < 0 || ret >= sz) break;
    len += ret;
  }
+  if (len > 0) {
+    // overwrite the last space
+    --len;
+  }
  snprintf(scratch->buffer + len, sizeof(scratch->buffer) - len, "]");
  return scratch->buffer;
 }
@@ -1160,16 +1164,20 @@ const char* Version::LevelFileSummary(FileSummaryStorage* scratch,
  int len = snprintf(scratch->buffer, sizeof(scratch->buffer), "files_size[");
  for (const auto& f : files_[level]) {
    int sz = sizeof(scratch->buffer) - len;
+    char sztxt[16];
+    AppendHumanBytes(f->file_size, sztxt, 16);
    int ret = snprintf(scratch->buffer + len, sz,
-                       "#%lu(seq=%lu,sz=%lu,%lu) ",
-                       (unsigned long)f->number,
-                       (unsigned long)f->smallest_seqno,
-                       (unsigned long)f->file_size,
-                       (unsigned long)f->being_compacted);
+                       "#%" PRIu64 "(seq=%" PRIu64 ",sz=%s,%d) ", f->number,
+                       f->smallest_seqno, sztxt,
+                       static_cast<int>(f->being_compacted));
    if (ret < 0 || ret >= sz)
      break;
    len += ret;
  }
+  // overwrite the last space (only if files_[level].size() is non-zero)
+  if (files_[level].size() && len > 0) {
+    --len;
+  }
  snprintf(scratch->buffer + len, sizeof(scratch->buffer) - len, "]");
  return scratch->buffer;
 }

--- a/hdfs/env_hdfs.h
+++ b/hdfs/env_hdfs.h
@@ -18,9 +18,6 @@

 namespace rocksdb {

-static const std::string kProto = "hdfs://";
-static const std::string pathsep = "/";
-
 // Thrown during execution when there is an issue with the supplied
 // arguments.
 class HdfsUsageException : public std::exception { };
@@ -58,20 +55,23 @@ class HdfsEnv : public Env {
  }

  virtual Status NewSequentialFile(const std::string& fname,
-                                   SequentialFile** result);
+                                   std::unique_ptr<SequentialFile>* result,
+                                   const EnvOptions& options);

  virtual Status NewRandomAccessFile(const std::string& fname,
-                                     RandomAccessFile** result);
+                                     std::unique_ptr<RandomAccessFile>* result,
+                                     const EnvOptions& options);

  virtual Status NewWritableFile(const std::string& fname,
-                                 WritableFile** result);
+                                 std::unique_ptr<WritableFile>* result,
+                                 const EnvOptions& options);

  virtual Status NewRandomRWFile(const std::string& fname,
-                                 unique_ptr<RandomRWFile>* result,
+                                 std::unique_ptr<RandomRWFile>* result,
                                 const EnvOptions& options);

  virtual Status NewDirectory(const std::string& name,
-                              unique_ptr<Directory>* result);
+                              std::unique_ptr<Directory>* result);

  virtual bool FileExists(const std::string& fname);

@@ -97,7 +97,8 @@ class HdfsEnv : public Env {

  virtual Status UnlockFile(FileLock* lock);

-  virtual Status NewLogger(const std::string& fname, Logger** result);
+  virtual Status NewLogger(const std::string& fname,
+                           std::shared_ptr<Logger>* result);

  virtual void Schedule(void (*function)(void* arg), void* arg,
                        Priority pri = LOW) {
@@ -161,6 +162,9 @@ class HdfsEnv : public Env {
                        // object here so that we can use posix timers,
                        // posix threads, etc.

+  static const std::string kProto;
+  static const std::string pathsep;
+
  /**
   * If the URI is specified of the form hdfs://server:port/path,
   * then connect to the specified cluster

--- a/include/rocksdb/status.h
+++ b/include/rocksdb/status.h
@@ -97,7 +97,6 @@ class Status {
  // Returns the string "OK" for success.
  std::string ToString() const;

- private:
  enum Code {
    kOk = 0,
    kNotFound = 1,
@@ -110,6 +109,10 @@ class Status {
    kShutdownInProgress = 8
  };

+  Code code() const {
+    return code_;
+  }
+ private:
  // A nullptr state_ (which is always the case for OK) means the message
  // is empty.
  // of the following form:
@@ -118,9 +121,6 @@ class Status {
  Code code_;
  const char* state_;

-  Code code() const {
-    return code_;
-  }
  explicit Status(Code code) : code_(code), state_(nullptr) { }
  Status(Code code, const Slice& msg, const Slice& msg2);
  static const char* CopyState(const char* s);

--- a/table/block_based_table_builder.cc
+++ b/table/block_based_table_builder.cc
@@ -15,6 +15,8 @@

 #include <map>
 #include <memory>
+#include <string>
+#include <unordered_map>

 #include "db/dbformat.h"

@@ -41,6 +43,8 @@

 namespace rocksdb {

+extern const std::string kHashIndexPrefixesBlock;
+extern const std::string kHashIndexPrefixesMetadataBlock;
 namespace {

 typedef BlockBasedTableOptions::IndexType IndexType;
@@ -57,6 +61,14 @@ typedef BlockBasedTableOptions::IndexType IndexType;
 // design that just works.
 class IndexBuilder {
 public:
+  // Index builder will construct a set of blocks which contain:
+  //  1. One primary index block.
+  //  2. (Optional) a set of metablocks that contains the metadata of the
+  //     primary index.
+  struct IndexBlocks {
+    Slice index_block_contents;
+    std::unordered_map<std::string, Slice> meta_blocks;
+  };
  explicit IndexBuilder(const Comparator* comparator)
      : comparator_(comparator) {}

@@ -72,15 +84,19 @@ class IndexBuilder {
  //                           the last one in the table
  //
  // REQUIRES: Finish() has not yet been called.
-  virtual void AddEntry(std::string* last_key_in_current_block,
-                        const Slice* first_key_in_next_block,
-                        const BlockHandle& block_handle) = 0;
+  virtual void AddIndexEntry(std::string* last_key_in_current_block,
+                             const Slice* first_key_in_next_block,
+                             const BlockHandle& block_handle) = 0;
+
+  // This method will be called whenever a key is added. The subclasses may
+  // override OnKeyAdded() if they need to collect additional information.
+  virtual void OnKeyAdded(const Slice& key) {}

  // Inform the index builder that all entries has been written. Block builder
  // may therefore perform any operation required for block finalization.
  //
  // REQUIRES: Finish() has not yet been called.
-  virtual Slice Finish() = 0;
+  virtual Status Finish(IndexBlocks* index_blocks) = 0;

  // Get the estimated size for index block.
  virtual size_t EstimatedSize() const = 0;
@@ -103,9 +119,9 @@ class ShortenedIndexBuilder : public IndexBuilder {
      : IndexBuilder(comparator),
        index_block_builder_(1 /* block_restart_interval == 1 */, comparator) {}

-  virtual void AddEntry(std::string* last_key_in_current_block,
-                        const Slice* first_key_in_next_block,
-                        const BlockHandle& block_handle) override {
+  virtual void AddIndexEntry(std::string* last_key_in_current_block,
+                             const Slice* first_key_in_next_block,
+                             const BlockHandle& block_handle) override {
    if (first_key_in_next_block != nullptr) {
      comparator_->FindShortestSeparator(last_key_in_current_block,
                                         *first_key_in_next_block);
@@ -118,7 +134,10 @@ class ShortenedIndexBuilder : public IndexBuilder {
    index_block_builder_.Add(*last_key_in_current_block, handle_encoding);
  }

-  virtual Slice Finish() override { return index_block_builder_.Finish(); }
+  virtual Status Finish(IndexBlocks* index_blocks) {
+    index_blocks->index_block_contents = index_block_builder_.Finish();
+    return Status::OK();
+  }

  virtual size_t EstimatedSize() const {
    return index_block_builder_.CurrentSizeEstimate();
@@ -128,38 +147,125 @@ class ShortenedIndexBuilder : public IndexBuilder {
  BlockBuilder index_block_builder_;
 };

-// FullKeyIndexBuilder is also based on BlockBuilder. It works pretty much like
-// ShortenedIndexBuilder, but preserves the full key instead the substitude key.
-class FullKeyIndexBuilder : public IndexBuilder {
+// HashIndexBuilder contains a binary-searchable primary index and the
+// metadata for secondary hash index construction.
+// The metadata for hash index consists two parts:
+//  - a metablock that compactly contains a sequence of prefixes. All prefixes
+//    are stored consectively without any metadata (like, prefix sizes) being
+//    stored, which is kept in the other metablock.
+//  - a metablock contains the metadata of the prefixes, including prefix size,
+//    restart index and number of block it spans. The format looks like:
+//
+// +-----------------+---------------------------+---------------------+ <=prefix 1
+// | length: 4 bytes | restart interval: 4 bytes | num-blocks: 4 bytes |
+// +-----------------+---------------------------+---------------------+ <=prefix 2
+// | length: 4 bytes | restart interval: 4 bytes | num-blocks: 4 bytes |
+// +-----------------+---------------------------+---------------------+
+// |                                                                   |
+// | ....                                                              |
+// |                                                                   |
+// +-----------------+---------------------------+---------------------+ <=prefix n
+// | length: 4 bytes | restart interval: 4 bytes | num-blocks: 4 bytes |
+// +-----------------+---------------------------+---------------------+
+//
+// The reason of separating these two metablocks is to enable the efficiently
+// reuse the first metablock during hash index construction without unnecessary
+// data copy or small heap allocations for prefixes.
+class HashIndexBuilder : public IndexBuilder {
 public:
-  explicit FullKeyIndexBuilder(const Comparator* comparator)
+  explicit HashIndexBuilder(const Comparator* comparator,
+                            const SliceTransform* hash_key_extractor)
      : IndexBuilder(comparator),
-        index_block_builder_(1 /* block_restart_interval == 1 */, comparator) {}
+        primary_index_builder(comparator),
+        hash_key_extractor_(hash_key_extractor) {}
+
+  virtual void AddIndexEntry(std::string* last_key_in_current_block,
+                             const Slice* first_key_in_next_block,
+                             const BlockHandle& block_handle) override {
+    ++current_restart_index_;
+    primary_index_builder.AddIndexEntry(last_key_in_current_block,
+                                        first_key_in_next_block, block_handle);
+  }

-  virtual void AddEntry(std::string* last_key_in_current_block,
-                        const Slice* first_key_in_next_block,
-                        const BlockHandle& block_handle) override {
-    std::string handle_encoding;
-    block_handle.EncodeTo(&handle_encoding);
-    index_block_builder_.Add(*last_key_in_current_block, handle_encoding);
+  virtual void OnKeyAdded(const Slice& key) override {
+    auto key_prefix = hash_key_extractor_->Transform(key);
+    bool is_first_entry = pending_block_num_ == 0;
+
+    // Keys may share the prefix
+    if (is_first_entry || pending_entry_prefix_ != key_prefix) {
+      if (!is_first_entry) {
+        FlushPendingPrefix();
+      }
+
+      // need a hard copy otherwise the underlying data changes all the time.
+      // TODO(kailiu) ToString() is expensive. We may speed up can avoid data
+      // copy.
+      pending_entry_prefix_ = key_prefix.ToString();
+      pending_block_num_ = 1;
+      pending_entry_index_ = current_restart_index_;
+    } else {
+      // entry number increments when keys share the prefix reside in
+      // differnt data blocks.
+      auto last_restart_index = pending_entry_index_ + pending_block_num_ - 1;
+      assert(last_restart_index <= current_restart_index_);
+      if (last_restart_index != current_restart_index_) {
+        ++pending_block_num_;
+      }
+    }
  }

-  virtual Slice Finish() override { return index_block_builder_.Finish(); }
+  virtual Status Finish(IndexBlocks* index_blocks) {
+    FlushPendingPrefix();
+    primary_index_builder.Finish(index_blocks);
+    index_blocks->meta_blocks.insert(
+        {kHashIndexPrefixesBlock.c_str(), prefix_block_});
+    index_blocks->meta_blocks.insert(
+        {kHashIndexPrefixesMetadataBlock.c_str(), prefix_meta_block_});
+    return Status::OK();
+  }

  virtual size_t EstimatedSize() const {
-    return index_block_builder_.CurrentSizeEstimate();
+    return primary_index_builder.EstimatedSize() + prefix_block_.size() +
+           prefix_meta_block_.size();
  }

 private:
-  BlockBuilder index_block_builder_;
+  void FlushPendingPrefix() {
+    prefix_block_.append(pending_entry_prefix_.data(),
+                         pending_entry_prefix_.size());
+    PutVarint32(&prefix_meta_block_, pending_entry_prefix_.size());
+    PutVarint32(&prefix_meta_block_, pending_entry_index_);
+    PutVarint32(&prefix_meta_block_, pending_block_num_);
+  }
+
+  ShortenedIndexBuilder primary_index_builder;
+  const SliceTransform* hash_key_extractor_;
+
+  // stores a sequence of prefixes
+  std::string prefix_block_;
+  // stores the metadata of prefixes
+  std::string prefix_meta_block_;
+
+  // The following 3 variables keeps unflushed prefix and its metadata.
+  // The details of block_num and entry_index can be found in
+  // "block_hash_index.{h,cc}"
+  uint32_t pending_block_num_ = 0;
+  uint32_t pending_entry_index_ = 0;
+  std::string pending_entry_prefix_;
+
+  uint64_t current_restart_index_ = 0;
 };

 // Create a index builder based on its type.
-IndexBuilder* CreateIndexBuilder(IndexType type, const Comparator* comparator) {
+IndexBuilder* CreateIndexBuilder(IndexType type, const Comparator* comparator,
+                                 const SliceTransform* prefix_extractor) {
  switch (type) {
    case BlockBasedTableOptions::kBinarySearch: {
      return new ShortenedIndexBuilder(comparator);
    }
+    case BlockBasedTableOptions::kHashSearch: {
+      return new HashIndexBuilder(comparator, prefix_extractor);
+    }
    default: {
      assert(!"Do not recognize the index type ");
      return nullptr;
@@ -249,7 +355,7 @@ extern const uint64_t kLegacyBlockBasedTableMagicNumber = 0xdb4775248b80fb57ull;
 class BlockBasedTableBuilder::BlockBasedTablePropertiesCollector
    : public TablePropertiesCollector {
 public:
-  BlockBasedTablePropertiesCollector(
+  explicit BlockBasedTablePropertiesCollector(
      BlockBasedTableOptions::IndexType index_type)
      : index_type_(index_type) {}

@@ -288,6 +394,8 @@ struct BlockBasedTableBuilder::Rep {
  uint64_t offset = 0;
  Status status;
  BlockBuilder data_block;
+
+  InternalKeySliceTransform internal_prefix_transform;
  std::unique_ptr<IndexBuilder> index_builder;

  std::string last_key;
@@ -316,8 +424,9 @@ struct BlockBasedTableBuilder::Rep {
        internal_comparator(icomparator),
        file(f),
        data_block(options, &internal_comparator),
-        index_builder(
-            CreateIndexBuilder(index_block_type, &internal_comparator)),
+        internal_prefix_transform(options.prefix_extractor.get()),
+        index_builder(CreateIndexBuilder(index_block_type, &internal_comparator,
+                                         &this->internal_prefix_transform)),
        compression_type(compression_type),
        checksum_type(checksum_type),
        filter_block(opt.filter_policy == nullptr
@@ -335,16 +444,13 @@ struct BlockBasedTableBuilder::Rep {
  }
 };

-// TODO(sdong): Currently only write out binary search index. In
-// BlockBasedTableReader, Hash index will be built using binary search index.
 BlockBasedTableBuilder::BlockBasedTableBuilder(
    const Options& options, const BlockBasedTableOptions& table_options,
    const InternalKeyComparator& internal_comparator, WritableFile* file,
    CompressionType compression_type)
    : rep_(new Rep(options, internal_comparator, file,
                   table_options.flush_block_policy_factory.get(),
-                   compression_type,
-                   BlockBasedTableOptions::IndexType::kBinarySearch,
+                   compression_type, table_options.index_type,
                   table_options.checksum)) {
  if (rep_->filter_block != nullptr) {
    rep_->filter_block->StartBlock(0);
@@ -370,7 +476,7 @@ void BlockBasedTableBuilder::Add(const Slice& key, const Slice& value) {
  if (r->props.num_entries > 0) {
    assert(r->internal_comparator.Compare(key, Slice(r->last_key)) > 0);
  }
-
+  r->index_builder->OnKeyAdded(key);
  auto should_flush = r->flush_block_policy->Update(key, value);
  if (should_flush) {
    assert(!r->data_block.empty());
@@ -385,7 +491,7 @@ void BlockBasedTableBuilder::Add(const Slice& key, const Slice& value) {
    // entries in the first block and < all entries in subsequent
    // blocks.
    if (ok()) {
-      r->index_builder->AddEntry(&r->last_key, &key, r->pending_handle);
+      r->index_builder->AddIndexEntry(&r->last_key, &key, r->pending_handle);
    }
  }

@@ -561,24 +667,36 @@ Status BlockBasedTableBuilder::Finish() {
  // block, we will finish writing all index entries here and flush them
  // to storage after metaindex block is written.
  if (ok() && !empty_data_block) {
-    r->index_builder->AddEntry(&r->last_key, nullptr /* no next data block */,
-                               r->pending_handle);
+    r->index_builder->AddIndexEntry(
+        &r->last_key, nullptr /* no next data block */, r->pending_handle);
+  }
+
+  IndexBuilder::IndexBlocks index_blocks;
+  auto s = r->index_builder->Finish(&index_blocks);
+  if (!s.ok()) {
+    return s;
  }

  // Write meta blocks and metaindex block with the following order.
  //    1. [meta block: filter]
-  //    2. [meta block: properties]
-  //    3. [metaindex block]
-  if (ok()) {
-    MetaIndexBuilder meta_index_builer;
+  //    2. [other meta blocks]
+  //    3. [meta block: properties]
+  //    4. [metaindex block]
+  // write meta blocks
+  MetaIndexBuilder meta_index_builder;
+  for (const auto& item : index_blocks.meta_blocks) {
+    BlockHandle block_handle;
+    WriteBlock(item.second, &block_handle);
+    meta_index_builder.Add(item.first, block_handle);
+  }

-    // Write filter block.
+  if (ok()) {
    if (r->filter_block != nullptr) {
      // Add mapping from "<filter_block_prefix>.Name" to location
      // of filter data.
      std::string key = BlockBasedTable::kFilterBlockPrefix;
      key.append(r->options.filter_policy->Name());
-      meta_index_builer.Add(key, filter_block_handle);
+      meta_index_builder.Add(key, filter_block_handle);
    }

    // Write properties block.
@@ -605,20 +723,16 @@ Status BlockBasedTableBuilder::Finish() {
          &properties_block_handle
      );

-      meta_index_builer.Add(kPropertiesBlock,
-                            properties_block_handle);
+      meta_index_builder.Add(kPropertiesBlock, properties_block_handle);
    }  // end of properties block writing
-
-    WriteRawBlock(
-        meta_index_builer.Finish(),
-        kNoCompression,
-        &metaindex_block_handle
-    );
-  }  // meta blocks and metaindex block.
+  }    // meta blocks

  // Write index block
  if (ok()) {
-    WriteBlock(r->index_builder->Finish(), &index_block_handle);
+    // flush the meta index block
+    WriteRawBlock(meta_index_builder.Finish(), kNoCompression,
+                  &metaindex_block_handle);
+    WriteBlock(index_blocks.index_block_contents, &index_block_handle);
  }

  // Write footer
@@ -685,7 +799,6 @@ uint64_t BlockBasedTableBuilder::FileSize() const {
  return rep_->offset;
 }

-const std::string BlockBasedTable::kFilterBlockPrefix =
-    "filter.";
+const std::string BlockBasedTable::kFilterBlockPrefix = "filter.";

 }  // namespace rocksdb
--- a/table/block_based_table_factory.cc
+++ b/table/block_based_table_factory.cc
@@ -56,5 +56,8 @@ TableFactory* NewBlockBasedTableFactory(

 const std::string BlockBasedTablePropertyNames::kIndexType =
    "rocksdb.block.based.table.index.type";
+const std::string kHashIndexPrefixesBlock = "rocksdb.hashindex.prefixes";
+const std::string kHashIndexPrefixesMetadataBlock =
+    "rocksdb.hashindex.metadata";

 }  // namespace rocksdb
--- a/table/block_based_table_factory.h
+++ b/table/block_based_table_factory.h
@@ -8,9 +8,11 @@
 // found in the LICENSE file. See the AUTHORS file for names of contributors.

 #pragma once
-#include <memory>
 #include <stdint.h>

+#include <memory>
+#include <string>
+
 #include "rocksdb/flush_block_policy.h"
 #include "rocksdb/options.h"
 #include "rocksdb/table.h"
@@ -45,4 +47,7 @@ class BlockBasedTableFactory : public TableFactory {
  BlockBasedTableOptions table_options_;
 };

+extern const std::string kHashIndexPrefixesBlock;
+extern const std::string kHashIndexPrefixesMetadataBlock;
+
 }  // namespace rocksdb
--- a/table/block_based_table_reader.cc
+++ b/table/block_based_table_reader.cc
@@ -38,6 +38,8 @@
 namespace rocksdb {

 extern const uint64_t kBlockBasedTableMagicNumber;
+extern const std::string kHashIndexPrefixesBlock;
+extern const std::string kHashIndexPrefixesMetadataBlock;
 using std::unique_ptr;

 typedef BlockBasedTable::IndexReader IndexReader;
@@ -186,19 +188,13 @@ class BinarySearchIndexReader : public IndexReader {

 // Index that leverages an internal hash table to quicken the lookup for a given
 // key.
-// @param data_iter_gen, equavalent to BlockBasedTable::NewIterator(). But that
-// functions requires index to be initalized. To avoid this problem external
-// caller will pass a function that can create the iterator over the entries
-// without the table to be fully initialized.
 class HashIndexReader : public IndexReader {
 public:
-  static Status Create(RandomAccessFile* file, const Footer& footer,
-                       const BlockHandle& index_handle, Env* env,
+  static Status Create(const SliceTransform* hash_key_extractor,
+                       const Footer& footer, RandomAccessFile* file, Env* env,
                       const Comparator* comparator,
-                       std::function<Iterator*(Iterator*)> data_iter_gen,
-                       const SliceTransform* prefix_extractor,
-                       IndexReader** index_reader) {
-    assert(prefix_extractor);
+                       const BlockHandle& index_handle,
+                       Iterator* meta_index_iter, IndexReader** index_reader) {
    Block* index_block = nullptr;
    auto s = ReadBlockFromFile(file, footer, ReadOptions(), index_handle,
                               &index_block, env);
@@ -207,14 +203,57 @@ class HashIndexReader : public IndexReader {
      return s;
    }

-    *index_reader = new HashIndexReader(comparator, index_block);
-    std::unique_ptr<Iterator> index_iter(index_block->NewIterator(nullptr));
-    std::unique_ptr<Iterator> data_iter(
-        data_iter_gen(index_block->NewIterator(nullptr)));
-    auto hash_index = CreateBlockHashIndex(index_iter.get(), data_iter.get(),
-                                           index_block->NumRestarts(),
-                                           comparator, prefix_extractor);
-    index_block->SetBlockHashIndex(hash_index);
+    // Get prefixes block
+    BlockHandle prefixes_handle;
+    s = FindMetaBlock(meta_index_iter, kHashIndexPrefixesBlock,
+                      &prefixes_handle);
+    if (!s.ok()) {
+      return s;
+    }
+
+    // Get index metadata block
+    BlockHandle prefixes_meta_handle;
+    s = FindMetaBlock(meta_index_iter, kHashIndexPrefixesMetadataBlock,
+                      &prefixes_meta_handle);
+    if (!s.ok()) {
+      return s;
+    }
+
+    // Read contents for the blocks
+    BlockContents prefixes_contents;
+    s = ReadBlockContents(file, footer, ReadOptions(), prefixes_handle,
+                          &prefixes_contents, env, true /* do decompression */);
+    if (!s.ok()) {
+      return s;
+    }
+    BlockContents prefixes_meta_contents;
+    s = ReadBlockContents(file, footer, ReadOptions(), prefixes_meta_handle,
+                          &prefixes_meta_contents, env,
+                          true /* do decompression */);
+    if (!s.ok()) {
+      if (prefixes_contents.heap_allocated) {
+        delete[] prefixes_contents.data.data();
+      }
+      return s;
+    }
+
+    auto new_index_reader =
+        new HashIndexReader(comparator, index_block, prefixes_contents);
+    BlockHashIndex* hash_index = nullptr;
+    s = CreateBlockHashIndex(hash_key_extractor, prefixes_contents.data,
+                             prefixes_meta_contents.data, &hash_index);
+    if (!s.ok()) {
+      return s;
+    }
+
+    new_index_reader->index_block_->SetBlockHashIndex(hash_index);
+
+    *index_reader = new_index_reader;
+
+    // release resources
+    if (prefixes_meta_contents.heap_allocated) {
+      delete[] prefixes_meta_contents.data.data();
+    }
    return s;
  }

@@ -225,11 +264,22 @@ class HashIndexReader : public IndexReader {
  virtual size_t size() const override { return index_block_->size(); }

 private:
-  HashIndexReader(const Comparator* comparator, Block* index_block)
-      : IndexReader(comparator), index_block_(index_block) {
+  HashIndexReader(const Comparator* comparator, Block* index_block,
+                  const BlockContents& prefixes_contents)
+      : IndexReader(comparator),
+        index_block_(index_block),
+        prefixes_contents_(prefixes_contents) {
    assert(index_block_ != nullptr);
  }
+
+  ~HashIndexReader() {
+    if (prefixes_contents_.heap_allocated) {
+      delete[] prefixes_contents_.data.data();
+    }
+  }
+
  std::unique_ptr<Block> index_block_;
+  BlockContents prefixes_contents_;
 };


@@ -408,7 +458,7 @@ Status BlockBasedTable::Open(const Options& options, const EnvOptions& soptions,
    // and with a same life-time as this table object.
    IndexReader* index_reader = nullptr;
    // TODO: we never really verify check sum for index block
-    s = new_table->CreateIndexReader(&index_reader);
+    s = new_table->CreateIndexReader(&index_reader, meta_iter.get());

    if (s.ok()) {
      rep->index_reader.reset(index_reader);
@@ -417,10 +467,9 @@ Status BlockBasedTable::Open(const Options& options, const EnvOptions& soptions,
      if (rep->options.filter_policy) {
        std::string key = kFilterBlockPrefix;
        key.append(rep->options.filter_policy->Name());
-        meta_iter->Seek(key);
-
-        if (meta_iter->Valid() && meta_iter->key() == Slice(key)) {
-          rep->filter.reset(ReadFilter(meta_iter->value(), rep));
+        BlockHandle handle;
+        if (FindMetaBlock(meta_iter.get(), key, &handle).ok()) {
+          rep->filter.reset(ReadFilter(handle, rep));
        }
      }
    } else {
@@ -617,16 +666,9 @@ Status BlockBasedTable::PutDataBlockToCache(
  return s;
 }

-FilterBlockReader* BlockBasedTable::ReadFilter (
-    const Slice& filter_handle_value,
-    BlockBasedTable::Rep* rep,
-    size_t* filter_size) {
-  Slice v = filter_handle_value;
-  BlockHandle filter_handle;
-  if (!filter_handle.DecodeFrom(&v).ok()) {
-    return nullptr;
-  }
-
+FilterBlockReader* BlockBasedTable::ReadFilter(const BlockHandle& filter_handle,
+                                               BlockBasedTable::Rep* rep,
+                                               size_t* filter_size) {
  // TODO: We might want to unify with ReadBlockFromFile() if we start
  // requiring checksum verification in Table::Open.
  ReadOptions opt;
@@ -687,10 +729,9 @@ BlockBasedTable::CachableEntry<FilterBlockReader> BlockBasedTable::GetFilter(
    if (s.ok()) {
      std::string filter_block_key = kFilterBlockPrefix;
      filter_block_key.append(rep_->options.filter_policy->Name());
-      iter->Seek(filter_block_key);
-
-      if (iter->Valid() && iter->key() == Slice(filter_block_key)) {
-        filter = ReadFilter(iter->value(), rep_, &filter_size);
+      BlockHandle handle;
+      if (FindMetaBlock(iter.get(), filter_block_key, &handle).ok()) {
+        filter = ReadFilter(handle, rep_, &filter_size);
        assert(filter);
        assert(filter_size > 0);

@@ -1032,7 +1073,8 @@ bool BlockBasedTable::TEST_KeyInCache(const ReadOptions& options,
 //  3. options
 //  4. internal_comparator
 //  5. index_type
-Status BlockBasedTable::CreateIndexReader(IndexReader** index_reader) {
+Status BlockBasedTable::CreateIndexReader(IndexReader** index_reader,
+                                          Iterator* preloaded_meta_index_iter) {
  // Some old version of block-based tables don't have index type present in
  // table properties. If that's the case we can safely use the kBinarySearch.
  auto index_type_on_file = BlockBasedTableOptions::kBinarySearch;
@@ -1045,41 +1087,45 @@ Status BlockBasedTable::CreateIndexReader(IndexReader** index_reader) {
    }
  }

-  // TODO(sdong): Currently binary index is the only index type we support in
-  // files. Hash index is built on top of binary index too.
-  if (index_type_on_file != BlockBasedTableOptions::kBinarySearch) {
-    return Status::NotSupported("File Contains not supported index type: ",
-                                std::to_string(index_type_on_file));
-  }
-
  auto file = rep_->file.get();
  auto env = rep_->options.env;
  auto comparator = &rep_->internal_comparator;
  const Footer& footer = rep_->footer;

-  switch (rep_->index_type) {
+  switch (index_type_on_file) {
    case BlockBasedTableOptions::kBinarySearch: {
      return BinarySearchIndexReader::Create(
          file, footer, footer.index_handle(), env, comparator, index_reader);
    }
    case BlockBasedTableOptions::kHashSearch: {
+      std::unique_ptr<Block> meta_guard;
+      std::unique_ptr<Iterator> meta_iter_guard;
+      auto meta_index_iter = preloaded_meta_index_iter;
+      if (meta_index_iter == nullptr) {
+        auto s = ReadMetaBlock(rep_, &meta_guard, &meta_iter_guard);
+        if (!s.ok()) {
+          return Status::Corruption("Unable to read the metaindex block");
+        }
+        meta_index_iter = meta_iter_guard.get();
+      }
+
      // We need to wrap data with internal_prefix_transform to make sure it can
      // handle prefix correctly.
+      if (rep_->options.prefix_extractor == nullptr) {
+        return Status::InvalidArgument(
+            "BlockBasedTableOptions::kHashSearch requires "
+            "options.prefix_extractor to be set.");
+      }
+
      rep_->internal_prefix_transform.reset(
          new InternalKeySliceTransform(rep_->options.prefix_extractor.get()));
      return HashIndexReader::Create(
-          file, footer, footer.index_handle(), env, comparator,
-          [&](Iterator* index_iter) {
-            return NewTwoLevelIterator(new BlockEntryIteratorState(this,
-                ReadOptions(), nullptr), index_iter);
-          },
-          rep_->internal_prefix_transform.get(), index_reader);
+          rep_->internal_prefix_transform.get(), footer, file, env, comparator,
+          footer.index_handle(), meta_index_iter, index_reader);
    }
    default: {
      std::string error_message =
          "Unrecognized index type: " + std::to_string(rep_->index_type);
-      // equivalent to assert(false), but more informative.
-      assert(!error_message.c_str());
      return Status::InvalidArgument(error_message.c_str());
    }
  }

--- a/table/block_based_table_reader.h
+++ b/table/block_based_table_reader.h
@@ -160,8 +160,13 @@ class BlockBasedTable : public TableReader {
  friend class BlockBasedTableBuilder;

  void ReadMeta(const Footer& footer);
-  void ReadFilter(const Slice& filter_handle_value);
-  Status CreateIndexReader(IndexReader** index_reader);
+
+  // Create a index reader based on the index type stored in the table.
+  // Optionally, user can pass a preloaded meta_index_iter for the index that
+  // need to access extra meta blocks for index construction. This parameter
+  // helps avoid re-reading meta index block if caller already created one.
+  Status CreateIndexReader(IndexReader** index_reader,
+                           Iterator* preloaded_meta_index_iter = nullptr);

  // Read the meta block from sst.
  static Status ReadMetaBlock(
@@ -170,10 +175,8 @@ class BlockBasedTable : public TableReader {
      std::unique_ptr<Iterator>* iter);

  // Create the filter from the filter block.
-  static FilterBlockReader* ReadFilter(
-      const Slice& filter_handle_value,
-      Rep* rep,
-      size_t* filter_size = nullptr);
+  static FilterBlockReader* ReadFilter(const BlockHandle& filter_handle,
+                                       Rep* rep, size_t* filter_size = nullptr);

  static void SetupCacheKeyPrefix(Rep* rep);


--- a/table/block_hash_index.cc
+++ b/table/block_hash_index.cc
@@ -3,21 +3,62 @@
 // LICENSE file in the root directory of this source tree. An additional grant
 // of patent rights can be found in the PATENTS file in the same directory.

+#include "table/block_hash_index.h"
+
 #include <algorithm>

-#include "table/block_hash_index.h"
 #include "rocksdb/comparator.h"
 #include "rocksdb/iterator.h"
 #include "rocksdb/slice_transform.h"
+#include "util/coding.h"

 namespace rocksdb {

-BlockHashIndex* CreateBlockHashIndex(Iterator* index_iter, Iterator* data_iter,
-                                     const uint32_t num_restarts,
-                                     const Comparator* comparator,
-                                     const SliceTransform* hash_key_extractor) {
+Status CreateBlockHashIndex(const SliceTransform* hash_key_extractor,
+                            const Slice& prefixes, const Slice& prefix_meta,
+                            BlockHashIndex** hash_index) {
+  uint64_t pos = 0;
+  auto meta_pos = prefix_meta;
+  Status s;
+  *hash_index = new BlockHashIndex(
+      hash_key_extractor,
+      false /* external module manages memory space for prefixes */);
+
+  while (!meta_pos.empty()) {
+    uint32_t prefix_size = 0;
+    uint32_t entry_index = 0;
+    uint32_t num_blocks = 0;
+    if (!GetVarint32(&meta_pos, &prefix_size) ||
+        !GetVarint32(&meta_pos, &entry_index) ||
+        !GetVarint32(&meta_pos, &num_blocks)) {
+      s = Status::Corruption(
+          "Corrupted prefix meta block: unable to read from it.");
+      break;
+    }
+    Slice prefix(prefixes.data() + pos, prefix_size);
+    (*hash_index)->Add(prefix, entry_index, num_blocks);
+
+    pos += prefix_size;
+  }
+
+  if (s.ok() && pos != prefixes.size()) {
+    s = Status::Corruption("Corrupted prefix meta block");
+  }
+
+  if (!s.ok()) {
+    delete *hash_index;
+  }
+
+  return s;
+}
+
+BlockHashIndex* CreateBlockHashIndexOnTheFly(
+    Iterator* index_iter, Iterator* data_iter, const uint32_t num_restarts,
+    const Comparator* comparator, const SliceTransform* hash_key_extractor) {
  assert(hash_key_extractor);
-  auto hash_index = new BlockHashIndex(hash_key_extractor);
+  auto hash_index = new BlockHashIndex(
+      hash_key_extractor,
+      true /* hash_index will copy prefix when Add() is called */);
  uint64_t current_restart_index = 0;

  std::string pending_entry_prefix;
@@ -88,12 +129,16 @@ BlockHashIndex* CreateBlockHashIndex(Iterator* index_iter, Iterator* data_iter,

 bool BlockHashIndex::Add(const Slice& prefix, uint32_t restart_index,
                         uint32_t num_blocks) {
-  auto prefix_ptr = arena_.Allocate(prefix.size());
-  std::copy(prefix.data() /* begin */, prefix.data() + prefix.size() /* end */,
-            prefix_ptr /* destination */);
-  auto result =
-      restart_indices_.insert({Slice(prefix_ptr, prefix.size()),
-                               RestartIndex(restart_index, num_blocks)});
+  auto prefix_to_insert = prefix;
+  if (kOwnPrefixes) {
+    auto prefix_ptr = arena_.Allocate(prefix.size());
+    std::copy(prefix.data() /* begin */,
+              prefix.data() + prefix.size() /* end */,
+              prefix_ptr /* destination */);
+    prefix_to_insert = Slice(prefix_ptr, prefix.size());
+  }
+  auto result = restart_indices_.insert(
+      {prefix_to_insert, RestartIndex(restart_index, num_blocks)});
  return result.second;
 }


--- a/table/block_hash_index.h
+++ b/table/block_hash_index.h
@@ -7,6 +7,7 @@
 #include <string>
 #include <unordered_map>

+#include "rocksdb/status.h"
 #include "util/arena.h"
 #include "util/murmurhash.h"

@@ -35,8 +36,12 @@ class BlockHashIndex {
    uint32_t num_blocks = 1;
  };

-  explicit BlockHashIndex(const SliceTransform* hash_key_extractor)
-      : hash_key_extractor_(hash_key_extractor) {}
+  // @params own_prefixes indicate if we should take care the memory space for
+  // the `key_prefix`
+  // passed by Add()
+  explicit BlockHashIndex(const SliceTransform* hash_key_extractor,
+                          bool own_prefixes)
+      : hash_key_extractor_(hash_key_extractor), kOwnPrefixes(own_prefixes) {}

  // Maps a key to its restart first_index.
  // Returns nullptr if the restart first_index is found
@@ -52,9 +57,18 @@ class BlockHashIndex {
 private:
  const SliceTransform* hash_key_extractor_;
  std::unordered_map<Slice, RestartIndex, murmur_hash> restart_indices_;
+
  Arena arena_;
+  bool kOwnPrefixes;
 };

+// Create hash index by reading from the metadata blocks.
+// @params prefixes: a sequence of prefixes.
+// @params prefix_meta: contains the "metadata" to of the prefixes.
+Status CreateBlockHashIndex(const SliceTransform* hash_key_extractor,
+                            const Slice& prefixes, const Slice& prefix_meta,
+                            BlockHashIndex** hash_index);
+
 // Create hash index by scanning the entries in index as well as the whole
 // dataset.
 // @params index_iter: an iterator with the pointer to the first entry in a
@@ -64,9 +78,8 @@ class BlockHashIndex {
 // @params num_restarts: used for correctness verification.
 // @params hash_key_extractor: extract the hashable part of a given key.
 // On error, nullptr will be returned.
-BlockHashIndex* CreateBlockHashIndex(Iterator* index_iter, Iterator* data_iter,
-                                     const uint32_t num_restarts,
-                                     const Comparator* comparator,
-                                     const SliceTransform* hash_key_extractor);
+BlockHashIndex* CreateBlockHashIndexOnTheFly(
+    Iterator* index_iter, Iterator* data_iter, const uint32_t num_restarts,
+    const Comparator* comparator, const SliceTransform* hash_key_extractor);

 }  // namespace rocksdb
--- a/table/block_hash_index_test.cc
+++ b/table/block_hash_index_test.cc
@@ -81,9 +81,9 @@ TEST(BlockTest, BasicTest) {
  MapIterator index_iter(index_entries);

  auto prefix_extractor = NewFixedPrefixTransform(prefix_size);
-  std::unique_ptr<BlockHashIndex> block_hash_index(
-      CreateBlockHashIndex(&index_iter, &data_iter, index_entries.size(),
-                           BytewiseComparator(), prefix_extractor));
+  std::unique_ptr<BlockHashIndex> block_hash_index(CreateBlockHashIndexOnTheFly(
+      &index_iter, &data_iter, index_entries.size(), BytewiseComparator(),
+      prefix_extractor));

  std::map<std::string, BlockHashIndex::RestartIndex> expected = {
      {"01xx", BlockHashIndex::RestartIndex(0, 1)},

--- a/table/block_test.cc
+++ b/table/block_test.cc
@@ -163,9 +163,9 @@ void CheckBlockContents(BlockContents contents, const int max_key,
  {
    auto iter1 = reader1.NewIterator(nullptr);
    auto iter2 = reader1.NewIterator(nullptr);
-    reader1.SetBlockHashIndex(CreateBlockHashIndex(iter1, iter2, keys.size(),
-                                                   BytewiseComparator(),
-                                                   prefix_extractor.get()));
+    reader1.SetBlockHashIndex(CreateBlockHashIndexOnTheFly(
+        iter1, iter2, keys.size(), BytewiseComparator(),
+        prefix_extractor.get()));

    delete iter1;
    delete iter2;

--- a/table/meta_blocks.cc
+++ b/table/meta_blocks.cc
@@ -254,11 +254,23 @@ Status ReadTableProperties(RandomAccessFile* file, uint64_t file_size,
                       properties);
  } else {
    s = Status::Corruption("Unable to read the property block.");
-    Log(WARN_LEVEL, info_log,
-        "Cannot find Properties block from file.");
+    Log(WARN_LEVEL, info_log, "Cannot find Properties block from file.");
  }

  return s;
 }

+Status FindMetaBlock(Iterator* meta_index_iter,
+                     const std::string& meta_block_name,
+                     BlockHandle* block_handle) {
+  meta_index_iter->Seek(meta_block_name);
+  if (meta_index_iter->status().ok() && meta_index_iter->Valid() &&
+      meta_index_iter->key() == meta_block_name) {
+    Slice v = meta_index_iter->value();
+    return block_handle->DecodeFrom(&v);
+  } else {
+    return Status::Corruption("Cannot find the meta block", meta_block_name);
+  }
+}
+
 }  // namespace rocksdb
--- a/table/meta_blocks.h
+++ b/table/meta_blocks.h
@@ -123,4 +123,9 @@ Status ReadTableProperties(RandomAccessFile* file, uint64_t file_size,
 // set to true.
 extern Status SeekToPropertiesBlock(Iterator* meta_iter, bool* is_found);

+// Find the meta block from the meta index block.
+Status FindMetaBlock(Iterator* meta_index_iter,
+                     const std::string& meta_block_name,
+                     BlockHandle* block_handle);
+
 }  // namespace rocksdb
--- a/third-party/rapidjson/document.h
+++ b/third-party/rapidjson/document.h
--- a/third-party/rapidjson/filestream.h
+++ b/third-party/rapidjson/filestream.h
+#ifndef RAPIDJSON_FILESTREAM_H_
+#define RAPIDJSON_FILESTREAM_H_
+
+#include <cstdio>
+
+namespace rapidjson {
+
+//! Wrapper of C file stream for input or output.
+/*!
+	This simple wrapper does not check the validity of the stream.
+	\implements Stream
+*/
+class FileStream {
+public:
+	typedef char Ch;	//!< Character type. Only support char.
+
+	FileStream(FILE* fp) : fp_(fp), count_(0) { Read(); }
+	char Peek() const { return current_; }
+	char Take() { char c = current_; Read(); return c; }
+	size_t Tell() const { return count_; }
+	void Put(char c) { fputc(c, fp_); }
+
+	// Not implemented
+	char* PutBegin() { return 0; }
+	size_t PutEnd(char*) { return 0; }
+
+private:
+	void Read() {
+		RAPIDJSON_ASSERT(fp_ != 0);
+		int c = fgetc(fp_);
+		if (c != EOF) {
+			current_ = (char)c;
+			count_++;
+		}
+		else
+			current_ = '\0';
+	}
+
+	FILE* fp_;
+	char current_;
+	size_t count_;
+};
+
+} // namespace rapidjson
+
+#endif // RAPIDJSON_FILESTREAM_H_
--- a/third-party/rapidjson/internal/pow10.h
+++ b/third-party/rapidjson/internal/pow10.h
+#ifndef RAPIDJSON_POW10_
+#define RAPIDJSON_POW10_
+
+namespace rapidjson {
+namespace internal {
+
+//! Computes integer powers of 10 in double (10.0^n).
+/*! This function uses lookup table for fast and accurate results.
+	\param n positive/negative exponent. Must <= 308.
+	\return 10.0^n
+*/
+inline double Pow10(int n) {
+	static const double e[] = { // 1e-308...1e308: 617 * 8 bytes = 4936 bytes
+		1e-308,1e-307,1e-306,1e-305,1e-304,1e-303,1e-302,1e-301,1e-300,
+		1e-299,1e-298,1e-297,1e-296,1e-295,1e-294,1e-293,1e-292,1e-291,1e-290,1e-289,1e-288,1e-287,1e-286,1e-285,1e-284,1e-283,1e-282,1e-281,1e-280,
+		1e-279,1e-278,1e-277,1e-276,1e-275,1e-274,1e-273,1e-272,1e-271,1e-270,1e-269,1e-268,1e-267,1e-266,1e-265,1e-264,1e-263,1e-262,1e-261,1e-260,
+		1e-259,1e-258,1e-257,1e-256,1e-255,1e-254,1e-253,1e-252,1e-251,1e-250,1e-249,1e-248,1e-247,1e-246,1e-245,1e-244,1e-243,1e-242,1e-241,1e-240,
+		1e-239,1e-238,1e-237,1e-236,1e-235,1e-234,1e-233,1e-232,1e-231,1e-230,1e-229,1e-228,1e-227,1e-226,1e-225,1e-224,1e-223,1e-222,1e-221,1e-220,
+		1e-219,1e-218,1e-217,1e-216,1e-215,1e-214,1e-213,1e-212,1e-211,1e-210,1e-209,1e-208,1e-207,1e-206,1e-205,1e-204,1e-203,1e-202,1e-201,1e-200,
+		1e-199,1e-198,1e-197,1e-196,1e-195,1e-194,1e-193,1e-192,1e-191,1e-190,1e-189,1e-188,1e-187,1e-186,1e-185,1e-184,1e-183,1e-182,1e-181,1e-180,
+		1e-179,1e-178,1e-177,1e-176,1e-175,1e-174,1e-173,1e-172,1e-171,1e-170,1e-169,1e-168,1e-167,1e-166,1e-165,1e-164,1e-163,1e-162,1e-161,1e-160,
+		1e-159,1e-158,1e-157,1e-156,1e-155,1e-154,1e-153,1e-152,1e-151,1e-150,1e-149,1e-148,1e-147,1e-146,1e-145,1e-144,1e-143,1e-142,1e-141,1e-140,
+		1e-139,1e-138,1e-137,1e-136,1e-135,1e-134,1e-133,1e-132,1e-131,1e-130,1e-129,1e-128,1e-127,1e-126,1e-125,1e-124,1e-123,1e-122,1e-121,1e-120,
+		1e-119,1e-118,1e-117,1e-116,1e-115,1e-114,1e-113,1e-112,1e-111,1e-110,1e-109,1e-108,1e-107,1e-106,1e-105,1e-104,1e-103,1e-102,1e-101,1e-100,
+		1e-99, 1e-98, 1e-97, 1e-96, 1e-95, 1e-94, 1e-93, 1e-92, 1e-91, 1e-90, 1e-89, 1e-88, 1e-87, 1e-86, 1e-85, 1e-84, 1e-83, 1e-82, 1e-81, 1e-80, 
+		1e-79, 1e-78, 1e-77, 1e-76, 1e-75, 1e-74, 1e-73, 1e-72, 1e-71, 1e-70, 1e-69, 1e-68, 1e-67, 1e-66, 1e-65, 1e-64, 1e-63, 1e-62, 1e-61, 1e-60, 
+		1e-59, 1e-58, 1e-57, 1e-56, 1e-55, 1e-54, 1e-53, 1e-52, 1e-51, 1e-50, 1e-49, 1e-48, 1e-47, 1e-46, 1e-45, 1e-44, 1e-43, 1e-42, 1e-41, 1e-40, 
+		1e-39, 1e-38, 1e-37, 1e-36, 1e-35, 1e-34, 1e-33, 1e-32, 1e-31, 1e-30, 1e-29, 1e-28, 1e-27, 1e-26, 1e-25, 1e-24, 1e-23, 1e-22, 1e-21, 1e-20, 
+		1e-19, 1e-18, 1e-17, 1e-16, 1e-15, 1e-14, 1e-13, 1e-12, 1e-11, 1e-10, 1e-9,  1e-8,  1e-7,  1e-6,  1e-5,  1e-4,  1e-3,  1e-2,  1e-1,  1e+0,  
+		1e+1,  1e+2,  1e+3,  1e+4,  1e+5,  1e+6,  1e+7,  1e+8,  1e+9,  1e+10, 1e+11, 1e+12, 1e+13, 1e+14, 1e+15, 1e+16, 1e+17, 1e+18, 1e+19, 1e+20, 
+		1e+21, 1e+22, 1e+23, 1e+24, 1e+25, 1e+26, 1e+27, 1e+28, 1e+29, 1e+30, 1e+31, 1e+32, 1e+33, 1e+34, 1e+35, 1e+36, 1e+37, 1e+38, 1e+39, 1e+40,
+		1e+41, 1e+42, 1e+43, 1e+44, 1e+45, 1e+46, 1e+47, 1e+48, 1e+49, 1e+50, 1e+51, 1e+52, 1e+53, 1e+54, 1e+55, 1e+56, 1e+57, 1e+58, 1e+59, 1e+60,
+		1e+61, 1e+62, 1e+63, 1e+64, 1e+65, 1e+66, 1e+67, 1e+68, 1e+69, 1e+70, 1e+71, 1e+72, 1e+73, 1e+74, 1e+75, 1e+76, 1e+77, 1e+78, 1e+79, 1e+80,
+		1e+81, 1e+82, 1e+83, 1e+84, 1e+85, 1e+86, 1e+87, 1e+88, 1e+89, 1e+90, 1e+91, 1e+92, 1e+93, 1e+94, 1e+95, 1e+96, 1e+97, 1e+98, 1e+99, 1e+100,
+		1e+101,1e+102,1e+103,1e+104,1e+105,1e+106,1e+107,1e+108,1e+109,1e+110,1e+111,1e+112,1e+113,1e+114,1e+115,1e+116,1e+117,1e+118,1e+119,1e+120,
+		1e+121,1e+122,1e+123,1e+124,1e+125,1e+126,1e+127,1e+128,1e+129,1e+130,1e+131,1e+132,1e+133,1e+134,1e+135,1e+136,1e+137,1e+138,1e+139,1e+140,
+		1e+141,1e+142,1e+143,1e+144,1e+145,1e+146,1e+147,1e+148,1e+149,1e+150,1e+151,1e+152,1e+153,1e+154,1e+155,1e+156,1e+157,1e+158,1e+159,1e+160,
+		1e+161,1e+162,1e+163,1e+164,1e+165,1e+166,1e+167,1e+168,1e+169,1e+170,1e+171,1e+172,1e+173,1e+174,1e+175,1e+176,1e+177,1e+178,1e+179,1e+180,
+		1e+181,1e+182,1e+183,1e+184,1e+185,1e+186,1e+187,1e+188,1e+189,1e+190,1e+191,1e+192,1e+193,1e+194,1e+195,1e+196,1e+197,1e+198,1e+199,1e+200,
+		1e+201,1e+202,1e+203,1e+204,1e+205,1e+206,1e+207,1e+208,1e+209,1e+210,1e+211,1e+212,1e+213,1e+214,1e+215,1e+216,1e+217,1e+218,1e+219,1e+220,
+		1e+221,1e+222,1e+223,1e+224,1e+225,1e+226,1e+227,1e+228,1e+229,1e+230,1e+231,1e+232,1e+233,1e+234,1e+235,1e+236,1e+237,1e+238,1e+239,1e+240,
+		1e+241,1e+242,1e+243,1e+244,1e+245,1e+246,1e+247,1e+248,1e+249,1e+250,1e+251,1e+252,1e+253,1e+254,1e+255,1e+256,1e+257,1e+258,1e+259,1e+260,
+		1e+261,1e+262,1e+263,1e+264,1e+265,1e+266,1e+267,1e+268,1e+269,1e+270,1e+271,1e+272,1e+273,1e+274,1e+275,1e+276,1e+277,1e+278,1e+279,1e+280,
+		1e+281,1e+282,1e+283,1e+284,1e+285,1e+286,1e+287,1e+288,1e+289,1e+290,1e+291,1e+292,1e+293,1e+294,1e+295,1e+296,1e+297,1e+298,1e+299,1e+300,
+		1e+301,1e+302,1e+303,1e+304,1e+305,1e+306,1e+307,1e+308
+	};
+	RAPIDJSON_ASSERT(n <= 308);
+	return n < -308 ? 0.0 : e[n + 308];
+}
+
+} // namespace internal
+} // namespace rapidjson
+
+#endif // RAPIDJSON_POW10_
--- a/third-party/rapidjson/internal/stack.h
+++ b/third-party/rapidjson/internal/stack.h
+#ifndef RAPIDJSON_INTERNAL_STACK_H_
+#define RAPIDJSON_INTERNAL_STACK_H_
+
+namespace rapidjson {
+namespace internal {
+
+///////////////////////////////////////////////////////////////////////////////
+// Stack
+
+//! A type-unsafe stack for storing different types of data.
+/*! \tparam Allocator Allocator for allocating stack memory.
+*/
+template <typename Allocator>
+class Stack {
+public:
+	Stack(Allocator* allocator, size_t stack_capacity) : allocator_(allocator), own_allocator_(0), stack_(0), stack_top_(0), stack_end_(0), stack_capacity_(stack_capacity) {
+		RAPIDJSON_ASSERT(stack_capacity_ > 0);
+		if (!allocator_)
+			own_allocator_ = allocator_ = new Allocator();
+		stack_top_ = stack_ = (char*)allocator_->Malloc(stack_capacity_);
+		stack_end_ = stack_ + stack_capacity_;
+	}
+
+	~Stack() {
+		Allocator::Free(stack_);
+		delete own_allocator_; // Only delete if it is owned by the stack
+	}
+
+	void Clear() { /*stack_top_ = 0;*/ stack_top_ = stack_; }
+
+	template<typename T>
+	T* Push(size_t count = 1) {
+		 // Expand the stack if needed
+		if (stack_top_ + sizeof(T) * count >= stack_end_) {
+			size_t new_capacity = stack_capacity_ * 2;
+			size_t size = GetSize();
+			size_t new_size = GetSize() + sizeof(T) * count;
+			if (new_capacity < new_size)
+				new_capacity = new_size;
+			stack_ = (char*)allocator_->Realloc(stack_, stack_capacity_, new_capacity);
+			stack_capacity_ = new_capacity;
+			stack_top_ = stack_ + size;
+			stack_end_ = stack_ + stack_capacity_;
+		}
+		T* ret = (T*)stack_top_;
+		stack_top_ += sizeof(T) * count;
+		return ret;
+	}
+
+	template<typename T>
+	T* Pop(size_t count) {
+		RAPIDJSON_ASSERT(GetSize() >= count * sizeof(T));
+		stack_top_ -= count * sizeof(T);
+		return (T*)stack_top_;
+	}
+
+	template<typename T>
+	T* Top() { 
+		RAPIDJSON_ASSERT(GetSize() >= sizeof(T));
+		return (T*)(stack_top_ - sizeof(T));
+	}
+
+	template<typename T>
+	T* Bottom() { return (T*)stack_; }
+
+	Allocator& GetAllocator() { return *allocator_; }
+	size_t GetSize() const { return stack_top_ - stack_; }
+	size_t GetCapacity() const { return stack_capacity_; }
+
+private:
+	Allocator* allocator_;
+	Allocator* own_allocator_;
+	char *stack_;
+	char *stack_top_;
+	char *stack_end_;
+	size_t stack_capacity_;
+};
+
+} // namespace internal
+} // namespace rapidjson
+
+#endif // RAPIDJSON_STACK_H_
--- a/third-party/rapidjson/internal/strfunc.h
+++ b/third-party/rapidjson/internal/strfunc.h
+#ifndef RAPIDJSON_INTERNAL_STRFUNC_H_
+#define RAPIDJSON_INTERNAL_STRFUNC_H_
+
+namespace rapidjson {
+namespace internal {
+
+//! Custom strlen() which works on different character types.
+/*!	\tparam Ch Character type (e.g. char, wchar_t, short)
+	\param s Null-terminated input string.
+	\return Number of characters in the string. 
+	\note This has the same semantics as strlen(), the return value is not number of Unicode codepoints.
+*/
+template <typename Ch>
+inline SizeType StrLen(const Ch* s) {
+	const Ch* p = s;
+	while (*p != '\0')
+		++p;
+	return SizeType(p - s);
+}
+
+} // namespace internal
+} // namespace rapidjson
+
+#endif // RAPIDJSON_INTERNAL_STRFUNC_H_
--- a/third-party/rapidjson/license.txt
+++ b/third-party/rapidjson/license.txt
+Copyright (C) 2011 Milo Yip
+
+Permission is hereby granted, free of charge, to any person obtaining a copy
+of this software and associated documentation files (the "Software"), to deal
+in the Software without restriction, including without limitation the rights
+to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+copies of the Software, and to permit persons to whom the Software is
+furnished to do so, subject to the following conditions:
+
+The above copyright notice and this permission notice shall be included in
+all copies or substantial portions of the Software.
+
+THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
+THE SOFTWARE.
\ No newline at end of file
--- a/third-party/rapidjson/prettywriter.h
+++ b/third-party/rapidjson/prettywriter.h
+#ifndef RAPIDJSON_PRETTYWRITER_H_
+#define RAPIDJSON_PRETTYWRITER_H_
+
+#include "writer.h"
+
+namespace rapidjson {
+
+//! Writer with indentation and spacing.
+/*!
+	\tparam Stream Type of ouptut stream.
+	\tparam Encoding Encoding of both source strings and output.
+	\tparam Allocator Type of allocator for allocating memory of stack.
+*/
+template<typename Stream, typename Encoding = UTF8<>, typename Allocator = MemoryPoolAllocator<> >
+class PrettyWriter : public Writer<Stream, Encoding, Allocator> {
+public:
+	typedef Writer<Stream, Encoding, Allocator> Base;
+	typedef typename Base::Ch Ch;
+
+	//! Constructor
+	/*! \param stream Output stream.
+		\param allocator User supplied allocator. If it is null, it will create a private one.
+		\param levelDepth Initial capacity of 
+	*/
+	PrettyWriter(Stream& stream, Allocator* allocator = 0, size_t levelDepth = Base::kDefaultLevelDepth) : 
+		Base(stream, allocator, levelDepth), indentChar_(' '), indentCharCount_(4) {}
+
+	//! Set custom indentation.
+	/*! \param indentChar		Character for indentation. Must be whitespace character (' ', '\t', '\n', '\r').
+		\param indentCharCount	Number of indent characters for each indentation level.
+		\note The default indentation is 4 spaces.
+	*/
+	PrettyWriter& SetIndent(Ch indentChar, unsigned indentCharCount) {
+		RAPIDJSON_ASSERT(indentChar == ' ' || indentChar == '\t' || indentChar == '\n' || indentChar == '\r');
+		indentChar_ = indentChar;
+		indentCharCount_ = indentCharCount;
+		return *this;
+	}
+
+	//@name Implementation of Handler.
+	//@{
+
+	PrettyWriter& Null()				{ PrettyPrefix(kNullType);   Base::WriteNull();			return *this; }
+	PrettyWriter& Bool(bool b)			{ PrettyPrefix(b ? kTrueType : kFalseType); Base::WriteBool(b); return *this; }
+	PrettyWriter& Int(int i)			{ PrettyPrefix(kNumberType); Base::WriteInt(i);			return *this; }
+	PrettyWriter& Uint(unsigned u)		{ PrettyPrefix(kNumberType); Base::WriteUint(u);		return *this; }
+	PrettyWriter& Int64(int64_t i64)	{ PrettyPrefix(kNumberType); Base::WriteInt64(i64);		return *this; }
+	PrettyWriter& Uint64(uint64_t u64)	{ PrettyPrefix(kNumberType); Base::WriteUint64(u64);	return *this; }
+	PrettyWriter& Double(double d)		{ PrettyPrefix(kNumberType); Base::WriteDouble(d);		return *this; }
+
+	PrettyWriter& String(const Ch* str, SizeType length, bool copy = false) {
+		(void)copy;
+		PrettyPrefix(kStringType);
+		Base::WriteString(str, length);
+		return *this;
+	}
+
+	PrettyWriter& StartObject() {
+		PrettyPrefix(kObjectType);
+		new (Base::level_stack_.template Push<typename Base::Level>()) typename Base::Level(false);
+		Base::WriteStartObject();
+		return *this;
+	}
+
+	PrettyWriter& EndObject(SizeType memberCount = 0) {
+		(void)memberCount;
+		RAPIDJSON_ASSERT(Base::level_stack_.GetSize() >= sizeof(typename Base::Level));
+		RAPIDJSON_ASSERT(!Base::level_stack_.template Top<typename Base::Level>()->inArray);
+		bool empty = Base::level_stack_.template Pop<typename Base::Level>(1)->valueCount == 0;
+
+		if (!empty) {
+			Base::stream_.Put('\n');
+			WriteIndent();
+		}
+		Base::WriteEndObject();
+		return *this;
+	}
+
+	PrettyWriter& StartArray() {
+		PrettyPrefix(kArrayType);
+		new (Base::level_stack_.template Push<typename Base::Level>()) typename Base::Level(true);
+		Base::WriteStartArray();
+		return *this;
+	}
+
+	PrettyWriter& EndArray(SizeType memberCount = 0) {
+		(void)memberCount;
+		RAPIDJSON_ASSERT(Base::level_stack_.GetSize() >= sizeof(typename Base::Level));
+		RAPIDJSON_ASSERT(Base::level_stack_.template Top<typename Base::Level>()->inArray);
+		bool empty = Base::level_stack_.template Pop<typename Base::Level>(1)->valueCount == 0;
+
+		if (!empty) {
+			Base::stream_.Put('\n');
+			WriteIndent();
+		}
+		Base::WriteEndArray();
+		return *this;
+	}
+
+	//@}
+
+	//! Simpler but slower overload.
+	PrettyWriter& String(const Ch* str) { return String(str, internal::StrLen(str)); }
+
+protected:
+	void PrettyPrefix(Type type) {
+		(void)type;
+		if (Base::level_stack_.GetSize() != 0) { // this value is not at root
+			typename Base::Level* level = Base::level_stack_.template Top<typename Base::Level>();
+
+			if (level->inArray) {
+				if (level->valueCount > 0) {
+					Base::stream_.Put(','); // add comma if it is not the first element in array
+					Base::stream_.Put('\n');
+				}
+				else
+					Base::stream_.Put('\n');
+				WriteIndent();
+			}
+			else {	// in object
+				if (level->valueCount > 0) {
+					if (level->valueCount % 2 == 0) {
+						Base::stream_.Put(',');
+						Base::stream_.Put('\n');
+					}
+					else {
+						Base::stream_.Put(':');
+						Base::stream_.Put(' ');
+					}
+				}
+				else
+					Base::stream_.Put('\n');
+
+				if (level->valueCount % 2 == 0)
+					WriteIndent();
+			}
+			if (!level->inArray && level->valueCount % 2 == 0)
+				RAPIDJSON_ASSERT(type == kStringType);  // if it's in object, then even number should be a name
+			level->valueCount++;
+		}
+		else
+			RAPIDJSON_ASSERT(type == kObjectType || type == kArrayType);
+	}
+
+	void WriteIndent()  {
+		size_t count = (Base::level_stack_.GetSize() / sizeof(typename Base::Level)) * indentCharCount_;
+		PutN(Base::stream_, indentChar_, count);
+	}
+
+	Ch indentChar_;
+	unsigned indentCharCount_;
+};
+
+} // namespace rapidjson
+
+#endif // RAPIDJSON_RAPIDJSON_H_
--- a/third-party/rapidjson/rapidjson.h
+++ b/third-party/rapidjson/rapidjson.h
+#ifndef RAPIDJSON_RAPIDJSON_H_
+#define RAPIDJSON_RAPIDJSON_H_
+
+// Copyright (c) 2011-2012 Milo Yip (miloyip@gmail.com)
+// Version 0.11
+
+#include <cstdlib>	// malloc(), realloc(), free()
+#include <cstring>	// memcpy()
+
+///////////////////////////////////////////////////////////////////////////////
+// RAPIDJSON_NO_INT64DEFINE
+
+// Here defines int64_t and uint64_t types in global namespace.
+// If user have their own definition, can define RAPIDJSON_NO_INT64DEFINE to disable this.
+#ifndef RAPIDJSON_NO_INT64DEFINE
+#ifdef _MSC_VER
+typedef __int64 int64_t;
+typedef unsigned __int64 uint64_t;
+#else
+#include <inttypes.h>
+#endif
+#endif // RAPIDJSON_NO_INT64TYPEDEF
+
+///////////////////////////////////////////////////////////////////////////////
+// RAPIDJSON_ENDIAN
+#define RAPIDJSON_LITTLEENDIAN	0	//!< Little endian machine
+#define RAPIDJSON_BIGENDIAN		1	//!< Big endian machine
+
+//! Endianness of the machine.
+/*!	GCC provided macro for detecting endianness of the target machine. But other
+	compilers may not have this. User can define RAPIDJSON_ENDIAN to either
+	RAPIDJSON_LITTLEENDIAN or RAPIDJSON_BIGENDIAN.
+*/
+#ifndef RAPIDJSON_ENDIAN
+#ifdef __BYTE_ORDER__
+#if __BYTE_ORDER__ == __ORDER_LITTLE_ENDIAN__
+#define RAPIDJSON_ENDIAN RAPIDJSON_LITTLEENDIAN
+#else
+#define RAPIDJSON_ENDIAN RAPIDJSON_BIGENDIAN
+#endif // __BYTE_ORDER__
+#else
+#define RAPIDJSON_ENDIAN RAPIDJSON_LITTLEENDIAN	// Assumes little endian otherwise.
+#endif
+#endif // RAPIDJSON_ENDIAN
+
+///////////////////////////////////////////////////////////////////////////////
+// RAPIDJSON_SSE2/RAPIDJSON_SSE42/RAPIDJSON_SIMD
+
+// Enable SSE2 optimization.
+//#define RAPIDJSON_SSE2
+
+// Enable SSE4.2 optimization.
+//#define RAPIDJSON_SSE42
+
+#if defined(RAPIDJSON_SSE2) || defined(RAPIDJSON_SSE42)
+#define RAPIDJSON_SIMD
+#endif
+
+///////////////////////////////////////////////////////////////////////////////
+// RAPIDJSON_NO_SIZETYPEDEFINE
+
+#ifndef RAPIDJSON_NO_SIZETYPEDEFINE
+namespace rapidjson {
+//! Use 32-bit array/string indices even for 64-bit platform, instead of using size_t.
+/*! User may override the SizeType by defining RAPIDJSON_NO_SIZETYPEDEFINE.
+*/
+typedef unsigned SizeType;
+} // namespace rapidjson
+#endif
+
+///////////////////////////////////////////////////////////////////////////////
+// RAPIDJSON_ASSERT
+
+//! Assertion.
+/*! By default, rapidjson uses C assert() for assertion.
+	User can override it by defining RAPIDJSON_ASSERT(x) macro.
+*/
+#ifndef RAPIDJSON_ASSERT
+#include <cassert>
+#define RAPIDJSON_ASSERT(x) assert(x)
+#endif // RAPIDJSON_ASSERT
+
+///////////////////////////////////////////////////////////////////////////////
+// Helpers
+
+#define RAPIDJSON_MULTILINEMACRO_BEGIN do {  
+#define RAPIDJSON_MULTILINEMACRO_END \
+} while((void)0, 0)
+
+namespace rapidjson {
+
+///////////////////////////////////////////////////////////////////////////////
+// Allocator
+
+/*! \class rapidjson::Allocator
+	\brief Concept for allocating, resizing and freeing memory block.
+	
+	Note that Malloc() and Realloc() are non-static but Free() is static.
+	
+	So if an allocator need to support Free(), it needs to put its pointer in 
+	the header of memory block.
+
+\code
+concept Allocator {
+	static const bool kNeedFree;	//!< Whether this allocator needs to call Free().
+
+	// Allocate a memory block.
+	// \param size of the memory block in bytes.
+	// \returns pointer to the memory block.
+	void* Malloc(size_t size);
+
+	// Resize a memory block.
+	// \param originalPtr The pointer to current memory block. Null pointer is permitted.
+	// \param originalSize The current size in bytes. (Design issue: since some allocator may not book-keep this, explicitly pass to it can save memory.)
+	// \param newSize the new size in bytes.
+	void* Realloc(void* originalPtr, size_t originalSize, size_t newSize);
+
+	// Free a memory block.
+	// \param pointer to the memory block. Null pointer is permitted.
+	static void Free(void *ptr);
+};
+\endcode
+*/
+
+///////////////////////////////////////////////////////////////////////////////
+// CrtAllocator
+
+//! C-runtime library allocator.
+/*! This class is just wrapper for standard C library memory routines.
+	\implements Allocator
+*/
+class CrtAllocator {
+public:
+	static const bool kNeedFree = true;
+	void* Malloc(size_t size) { return malloc(size); }
+	void* Realloc(void* originalPtr, size_t originalSize, size_t newSize) { (void)originalSize; return realloc(originalPtr, newSize); }
+	static void Free(void *ptr) { free(ptr); }
+};
+
+///////////////////////////////////////////////////////////////////////////////
+// MemoryPoolAllocator
+
+//! Default memory allocator used by the parser and DOM.
+/*! This allocator allocate memory blocks from pre-allocated memory chunks. 
+
+    It does not free memory blocks. And Realloc() only allocate new memory.
+
+    The memory chunks are allocated by BaseAllocator, which is CrtAllocator by default.
+
+    User may also supply a buffer as the first chunk.
+
+    If the user-buffer is full then additional chunks are allocated by BaseAllocator.
+
+    The user-buffer is not deallocated by this allocator.
+
+    \tparam BaseAllocator the allocator type for allocating memory chunks. Default is CrtAllocator.
+	\implements Allocator
+*/
+template <typename BaseAllocator = CrtAllocator>
+class MemoryPoolAllocator {
+public:
+	static const bool kNeedFree = false;	//!< Tell users that no need to call Free() with this allocator. (concept Allocator)
+
+	//! Constructor with chunkSize.
+	/*! \param chunkSize The size of memory chunk. The default is kDefaultChunkSize.
+		\param baseAllocator The allocator for allocating memory chunks.
+	*/
+	MemoryPoolAllocator(size_t chunkSize = kDefaultChunkCapacity, BaseAllocator* baseAllocator = 0) : 
+		chunkHead_(0), chunk_capacity_(chunkSize), userBuffer_(0), baseAllocator_(baseAllocator), ownBaseAllocator_(0)
+	{
+		if (!baseAllocator_)
+			ownBaseAllocator_ = baseAllocator_ = new BaseAllocator();
+		AddChunk(chunk_capacity_);
+	}
+
+	//! Constructor with user-supplied buffer.
+	/*! The user buffer will be used firstly. When it is full, memory pool allocates new chunk with chunk size.
+
+		The user buffer will not be deallocated when this allocator is destructed.
+
+		\param buffer User supplied buffer.
+		\param size Size of the buffer in bytes. It must at least larger than sizeof(ChunkHeader).
+		\param chunkSize The size of memory chunk. The default is kDefaultChunkSize.
+		\param baseAllocator The allocator for allocating memory chunks.
+	*/
+	MemoryPoolAllocator(char *buffer, size_t size, size_t chunkSize = kDefaultChunkCapacity, BaseAllocator* baseAllocator = 0) :
+		chunkHead_(0), chunk_capacity_(chunkSize), userBuffer_(buffer), baseAllocator_(baseAllocator), ownBaseAllocator_(0)
+	{
+		RAPIDJSON_ASSERT(buffer != 0);
+		RAPIDJSON_ASSERT(size > sizeof(ChunkHeader));
+		chunkHead_ = (ChunkHeader*)buffer;
+		chunkHead_->capacity = size - sizeof(ChunkHeader);
+		chunkHead_->size = 0;
+		chunkHead_->next = 0;
+	}
+
+	//! Destructor.
+	/*! This deallocates all memory chunks, excluding the user-supplied buffer.
+	*/
+	~MemoryPoolAllocator() {
+		Clear();
+		delete ownBaseAllocator_;
+	}
+
+	//! Deallocates all memory chunks, excluding the user-supplied buffer.
+	void Clear() {
+		while(chunkHead_ != 0 && chunkHead_ != (ChunkHeader *)userBuffer_) {
+			ChunkHeader* next = chunkHead_->next;
+			baseAllocator_->Free(chunkHead_);
+			chunkHead_ = next;
+		}
+	}
+
+	//! Computes the total capacity of allocated memory chunks.
+	/*! \return total capacity in bytes.
+	*/
+	size_t Capacity() {
+		size_t capacity = 0;
+		for (ChunkHeader* c = chunkHead_; c != 0; c = c->next)
+			capacity += c->capacity;
+		return capacity;
+	}
+
+	//! Computes the memory blocks allocated.
+	/*! \return total used bytes.
+	*/
+	size_t Size() {
+		size_t size = 0;
+		for (ChunkHeader* c = chunkHead_; c != 0; c = c->next)
+			size += c->size;
+		return size;
+	}
+
+	//! Allocates a memory block. (concept Allocator)
+	void* Malloc(size_t size) {
+		size = (size + 3) & ~3;	// Force aligning size to 4
+
+		if (chunkHead_->size + size > chunkHead_->capacity)
+			AddChunk(chunk_capacity_ > size ? chunk_capacity_ : size);
+
+		char *buffer = (char *)(chunkHead_ + 1) + chunkHead_->size;
+		RAPIDJSON_ASSERT(((uintptr_t)buffer & 3) == 0);	// returned buffer is aligned to 4
+		chunkHead_->size += size;
+
+		return buffer;
+	}
+
+	//! Resizes a memory block (concept Allocator)
+	void* Realloc(void* originalPtr, size_t originalSize, size_t newSize) {
+		if (originalPtr == 0)
+			return Malloc(newSize);
+
+		// Do not shrink if new size is smaller than original
+		if (originalSize >= newSize)
+			return originalPtr;
+
+		// Simply expand it if it is the last allocation and there is sufficient space
+		if (originalPtr == (char *)(chunkHead_ + 1) + chunkHead_->size - originalSize) {
+			size_t increment = newSize - originalSize;
+			increment = (increment + 3) & ~3;	// Force aligning size to 4
+			if (chunkHead_->size + increment <= chunkHead_->capacity) {
+				chunkHead_->size += increment;
+				RAPIDJSON_ASSERT(((uintptr_t)originalPtr & 3) == 0);	// returned buffer is aligned to 4
+				return originalPtr;
+			}
+		}
+
+		// Realloc process: allocate and copy memory, do not free original buffer.
+		void* newBuffer = Malloc(newSize);
+		RAPIDJSON_ASSERT(newBuffer != 0);	// Do not handle out-of-memory explicitly.
+		return memcpy(newBuffer, originalPtr, originalSize);
+	}
+
+	//! Frees a memory block (concept Allocator)
+	static void Free(void *) {} // Do nothing
+
+private:
+	//! Creates a new chunk.
+	/*! \param capacity Capacity of the chunk in bytes.
+	*/
+	void AddChunk(size_t capacity) {
+		ChunkHeader* chunk = (ChunkHeader*)baseAllocator_->Malloc(sizeof(ChunkHeader) + capacity);
+		chunk->capacity = capacity;
+		chunk->size = 0;
+		chunk->next = chunkHead_;
+		chunkHead_ =  chunk;
+	}
+
+	static const int kDefaultChunkCapacity = 64 * 1024; //!< Default chunk capacity.
+
+	//! Chunk header for perpending to each chunk.
+	/*! Chunks are stored as a singly linked list.
+	*/
+	struct ChunkHeader {
+		size_t capacity;	//!< Capacity of the chunk in bytes (excluding the header itself).
+		size_t size;		//!< Current size of allocated memory in bytes.
+		ChunkHeader *next;	//!< Next chunk in the linked list.
+	};
+
+	ChunkHeader *chunkHead_;	//!< Head of the chunk linked-list. Only the head chunk serves allocation.
+	size_t chunk_capacity_;		//!< The minimum capacity of chunk when they are allocated.
+	char *userBuffer_;			//!< User supplied buffer.
+	BaseAllocator* baseAllocator_;	//!< base allocator for allocating memory chunks.
+	BaseAllocator* ownBaseAllocator_;	//!< base allocator created by this object.
+};
+
+///////////////////////////////////////////////////////////////////////////////
+// Encoding
+
+/*! \class rapidjson::Encoding
+	\brief Concept for encoding of Unicode characters.
+
+\code
+concept Encoding {
+	typename Ch;	//! Type of character.
+
+	//! \brief Encode a Unicode codepoint to a buffer.
+	//! \param buffer pointer to destination buffer to store the result. It should have sufficient size of encoding one character.
+	//! \param codepoint An unicode codepoint, ranging from 0x0 to 0x10FFFF inclusively.
+	//! \returns the pointer to the next character after the encoded data.
+	static Ch* Encode(Ch *buffer, unsigned codepoint);
+};
+\endcode
+*/
+
+///////////////////////////////////////////////////////////////////////////////
+// UTF8
+
+//! UTF-8 encoding.
+/*! http://en.wikipedia.org/wiki/UTF-8
+	\tparam CharType Type for storing 8-bit UTF-8 data. Default is char.
+	\implements Encoding
+*/
+template<typename CharType = char>
+struct UTF8 {
+	typedef CharType Ch;
+
+	static Ch* Encode(Ch *buffer, unsigned codepoint) {
+		if (codepoint <= 0x7F) 
+			*buffer++ = codepoint & 0xFF;
+		else if (codepoint <= 0x7FF) {
+			*buffer++ = 0xC0 | ((codepoint >> 6) & 0xFF);
+			*buffer++ = 0x80 | ((codepoint & 0x3F));
+		}
+		else if (codepoint <= 0xFFFF) {
+			*buffer++ = 0xE0 | ((codepoint >> 12) & 0xFF);
+			*buffer++ = 0x80 | ((codepoint >> 6) & 0x3F);
+			*buffer++ = 0x80 | (codepoint & 0x3F);
+		}
+		else {
+			RAPIDJSON_ASSERT(codepoint <= 0x10FFFF);
+			*buffer++ = 0xF0 | ((codepoint >> 18) & 0xFF);
+			*buffer++ = 0x80 | ((codepoint >> 12) & 0x3F);
+			*buffer++ = 0x80 | ((codepoint >> 6) & 0x3F);
+			*buffer++ = 0x80 | (codepoint & 0x3F);
+		}
+		return buffer;
+	}
+};
+
+///////////////////////////////////////////////////////////////////////////////
+// UTF16
+
+//! UTF-16 encoding.
+/*! http://en.wikipedia.org/wiki/UTF-16
+	\tparam CharType Type for storing 16-bit UTF-16 data. Default is wchar_t. C++11 may use char16_t instead.
+	\implements Encoding
+*/
+template<typename CharType = wchar_t>
+struct UTF16 {
+	typedef CharType Ch;
+
+	static Ch* Encode(Ch* buffer, unsigned codepoint) {
+		if (codepoint <= 0xFFFF) {
+			RAPIDJSON_ASSERT(codepoint < 0xD800 || codepoint > 0xDFFF); // Code point itself cannot be surrogate pair 
+			*buffer++ = static_cast<Ch>(codepoint);
+		}
+		else {
+			RAPIDJSON_ASSERT(codepoint <= 0x10FFFF);
+			unsigned v = codepoint - 0x10000;
+			*buffer++ = static_cast<Ch>((v >> 10) + 0xD800);
+			*buffer++ = (v & 0x3FF) + 0xDC00;
+		}
+		return buffer;
+	}
+};
+
+///////////////////////////////////////////////////////////////////////////////
+// UTF32
+
+//! UTF-32 encoding. 
+/*! http://en.wikipedia.org/wiki/UTF-32
+	\tparam Ch Type for storing 32-bit UTF-32 data. Default is unsigned. C++11 may use char32_t instead.
+	\implements Encoding
+*/
+template<typename CharType = unsigned>
+struct UTF32 {
+	typedef CharType Ch;
+
+	static Ch *Encode(Ch* buffer, unsigned codepoint) {
+		RAPIDJSON_ASSERT(codepoint <= 0x10FFFF);
+		*buffer++ = codepoint;
+		return buffer;
+	}
+};
+
+///////////////////////////////////////////////////////////////////////////////
+//  Stream
+
+/*! \class rapidjson::Stream
+	\brief Concept for reading and writing characters.
+
+	For read-only stream, no need to implement PutBegin(), Put() and PutEnd().
+
+	For write-only stream, only need to implement Put().
+
+\code
+concept Stream {
+	typename Ch;	//!< Character type of the stream.
+
+	//! Read the current character from stream without moving the read cursor.
+	Ch Peek() const;
+
+	//! Read the current character from stream and moving the read cursor to next character.
+	Ch Take();
+
+	//! Get the current read cursor.
+	//! \return Number of characters read from start.
+	size_t Tell();
+
+	//! Begin writing operation at the current read pointer.
+	//! \return The begin writer pointer.
+	Ch* PutBegin();
+
+	//! Write a character.
+	void Put(Ch c);
+
+	//! End the writing operation.
+	//! \param begin The begin write pointer returned by PutBegin().
+	//! \return Number of characters written.
+	size_t PutEnd(Ch* begin);
+}
+\endcode
+*/
+
+//! Put N copies of a character to a stream.
+template<typename Stream, typename Ch>
+inline void PutN(Stream& stream, Ch c, size_t n) {
+	for (size_t i = 0; i < n; i++)
+		stream.Put(c);
+}
+
+///////////////////////////////////////////////////////////////////////////////
+// StringStream
+
+//! Read-only string stream.
+/*! \implements Stream
+*/
+template <typename Encoding>
+struct GenericStringStream {
+	typedef typename Encoding::Ch Ch;
+
+	GenericStringStream(const Ch *src) : src_(src), head_(src) {}
+
+	Ch Peek() const { return *src_; }
+	Ch Take() { return *src_++; }
+	size_t Tell() const { return src_ - head_; }
+
+	Ch* PutBegin() { RAPIDJSON_ASSERT(false); return 0; }
+	void Put(Ch) { RAPIDJSON_ASSERT(false); }
+	size_t PutEnd(Ch*) { RAPIDJSON_ASSERT(false); return 0; }
+
+	const Ch* src_;		//!< Current read position.
+	const Ch* head_;	//!< Original head of the string.
+};
+
+typedef GenericStringStream<UTF8<> > StringStream;
+
+///////////////////////////////////////////////////////////////////////////////
+// InsituStringStream
+
+//! A read-write string stream.
+/*! This string stream is particularly designed for in-situ parsing.
+	\implements Stream
+*/
+template <typename Encoding>
+struct GenericInsituStringStream {
+	typedef typename Encoding::Ch Ch;
+
+	GenericInsituStringStream(Ch *src) : src_(src), dst_(0), head_(src) {}
+
+	// Read
+	Ch Peek() { return *src_; }
+	Ch Take() { return *src_++; }
+	size_t Tell() { return src_ - head_; }
+
+	// Write
+	Ch* PutBegin() { return dst_ = src_; }
+	void Put(Ch c) { RAPIDJSON_ASSERT(dst_ != 0); *dst_++ = c; }
+	size_t PutEnd(Ch* begin) { return dst_ - begin; }
+
+	Ch* src_;
+	Ch* dst_;
+	Ch* head_;
+};
+
+typedef GenericInsituStringStream<UTF8<> > InsituStringStream;
+
+///////////////////////////////////////////////////////////////////////////////
+// Type
+
+//! Type of JSON value
+enum Type {
+	kNullType = 0,		//!< null
+	kFalseType = 1,		//!< false
+	kTrueType = 2,		//!< true
+	kObjectType = 3,	//!< object
+	kArrayType = 4,		//!< array 
+	kStringType = 5,	//!< string
+	kNumberType = 6,	//!< number
+};
+
+} // namespace rapidjson
+
+#endif // RAPIDJSON_RAPIDJSON_H_
--- a/third-party/rapidjson/reader.h
+++ b/third-party/rapidjson/reader.h
--- a/third-party/rapidjson/stringbuffer.h
+++ b/third-party/rapidjson/stringbuffer.h
+#ifndef RAPIDJSON_STRINGBUFFER_H_
+#define RAPIDJSON_STRINGBUFFER_H_
+
+#include "rapidjson.h"
+#include "internal/stack.h"
+
+namespace rapidjson {
+
+//! Represents an in-memory output stream.
+/*!
+	\tparam Encoding Encoding of the stream.
+	\tparam Allocator type for allocating memory buffer.
+	\implements Stream
+*/
+template <typename Encoding, typename Allocator = CrtAllocator>
+struct GenericStringBuffer {
+	typedef typename Encoding::Ch Ch;
+
+	GenericStringBuffer(Allocator* allocator = 0, size_t capacity = kDefaultCapacity) : stack_(allocator, capacity) {}
+
+	void Put(Ch c) { *stack_.template Push<Ch>() = c; }
+
+	void Clear() { stack_.Clear(); }
+
+	const char* GetString() const {
+		// Push and pop a null terminator. This is safe.
+		*stack_.template Push<Ch>() = '\0';
+		stack_.template Pop<Ch>(1);
+
+		return stack_.template Bottom<Ch>();
+	}
+
+	size_t Size() const { return stack_.GetSize(); }
+
+	static const size_t kDefaultCapacity = 256;
+	mutable internal::Stack<Allocator> stack_;
+};
+
+typedef GenericStringBuffer<UTF8<> > StringBuffer;
+
+//! Implement specialized version of PutN() with memset() for better performance.
+template<>
+inline void PutN(GenericStringBuffer<UTF8<> >& stream, char c, size_t n) {
+	memset(stream.stack_.Push<char>(n), c, n * sizeof(c));
+}
+
+} // namespace rapidjson
+
+#endif // RAPIDJSON_STRINGBUFFER_H_
--- a/third-party/rapidjson/writer.h
+++ b/third-party/rapidjson/writer.h
+#ifndef RAPIDJSON_WRITER_H_
+#define RAPIDJSON_WRITER_H_
+
+#include "rapidjson.h"
+#include "internal/stack.h"
+#include "internal/strfunc.h"
+#include <cstdio>	// snprintf() or _sprintf_s()
+#include <new>		// placement new
+
+#ifdef _MSC_VER
+#pragma warning(push)
+#pragma warning(disable : 4127) // conditional expression is constant
+#endif
+
+namespace rapidjson {
+
+//! JSON writer
+/*! Writer implements the concept Handler.
+	It generates JSON text by events to an output stream.
+
+	User may programmatically calls the functions of a writer to generate JSON text.
+
+	On the other side, a writer can also be passed to objects that generates events, 
+
+	for example Reader::Parse() and Document::Accept().
+
+	\tparam Stream Type of ouptut stream.
+	\tparam Encoding Encoding of both source strings and output.
+	\implements Handler
+*/
+template<typename Stream, typename Encoding = UTF8<>, typename Allocator = MemoryPoolAllocator<> >
+class Writer {
+public:
+	typedef typename Encoding::Ch Ch;
+
+	Writer(Stream& stream, Allocator* allocator = 0, size_t levelDepth = kDefaultLevelDepth) : 
+		stream_(stream), level_stack_(allocator, levelDepth * sizeof(Level)) {}
+
+	//@name Implementation of Handler
+	//@{
+	Writer& Null()					{ Prefix(kNullType);   WriteNull();			return *this; }
+	Writer& Bool(bool b)			{ Prefix(b ? kTrueType : kFalseType); WriteBool(b); return *this; }
+	Writer& Int(int i)				{ Prefix(kNumberType); WriteInt(i);			return *this; }
+	Writer& Uint(unsigned u)		{ Prefix(kNumberType); WriteUint(u);		return *this; }
+	Writer& Int64(int64_t i64)		{ Prefix(kNumberType); WriteInt64(i64);		return *this; }
+	Writer& Uint64(uint64_t u64)	{ Prefix(kNumberType); WriteUint64(u64);	return *this; }
+	Writer& Double(double d)		{ Prefix(kNumberType); WriteDouble(d);		return *this; }
+
+	Writer& String(const Ch* str, SizeType length, bool copy = false) {
+		(void)copy;
+		Prefix(kStringType);
+		WriteString(str, length);
+		return *this;
+	}
+
+	Writer& StartObject() {
+		Prefix(kObjectType);
+		new (level_stack_.template Push<Level>()) Level(false);
+		WriteStartObject();
+		return *this;
+	}
+
+	Writer& EndObject(SizeType memberCount = 0) {
+		(void)memberCount;
+		RAPIDJSON_ASSERT(level_stack_.GetSize() >= sizeof(Level));
+		RAPIDJSON_ASSERT(!level_stack_.template Top<Level>()->inArray);
+		level_stack_.template Pop<Level>(1);
+		WriteEndObject();
+		return *this;
+	}
+
+	Writer& StartArray() {
+		Prefix(kArrayType);
+		new (level_stack_.template Push<Level>()) Level(true);
+		WriteStartArray();
+		return *this;
+	}
+
+	Writer& EndArray(SizeType elementCount = 0) {
+		(void)elementCount;
+		RAPIDJSON_ASSERT(level_stack_.GetSize() >= sizeof(Level));
+		RAPIDJSON_ASSERT(level_stack_.template Top<Level>()->inArray);
+		level_stack_.template Pop<Level>(1);
+		WriteEndArray();
+		return *this;
+	}
+	//@}
+
+	//! Simpler but slower overload.
+	Writer& String(const Ch* str) { return String(str, internal::StrLen(str)); }
+
+protected:
+	//! Information for each nested level
+	struct Level {
+		Level(bool inArray_) : inArray(inArray_), valueCount(0) {}
+		bool inArray;		//!< true if in array, otherwise in object
+		size_t valueCount;	//!< number of values in this level
+	};
+
+	static const size_t kDefaultLevelDepth = 32;
+
+	void WriteNull()  {
+		stream_.Put('n'); stream_.Put('u'); stream_.Put('l'); stream_.Put('l');
+	}
+
+	void WriteBool(bool b)  {
+		if (b) {
+			stream_.Put('t'); stream_.Put('r'); stream_.Put('u'); stream_.Put('e');
+		}
+		else {
+			stream_.Put('f'); stream_.Put('a'); stream_.Put('l'); stream_.Put('s'); stream_.Put('e');
+		}
+	}
+
+	void WriteInt(int i) {
+		if (i < 0) {
+			stream_.Put('-');
+			i = -i;
+		}
+		WriteUint((unsigned)i);
+	}
+
+	void WriteUint(unsigned u) {
+		char buffer[10];
+		char *p = buffer;
+		do {
+			*p++ = (u % 10) + '0';
+			u /= 10;
+		} while (u > 0);
+
+		do {
+			--p;
+			stream_.Put(*p);
+		} while (p != buffer);
+	}
+
+	void WriteInt64(int64_t i64) {
+		if (i64 < 0) {
+			stream_.Put('-');
+			i64 = -i64;
+		}
+		WriteUint64((uint64_t)i64);
+	}
+
+	void WriteUint64(uint64_t u64) {
+		char buffer[20];
+		char *p = buffer;
+		do {
+			*p++ = char(u64 % 10) + '0';
+			u64 /= 10;
+		} while (u64 > 0);
+
+		do {
+			--p;
+			stream_.Put(*p);
+		} while (p != buffer);
+	}
+
+	//! \todo Optimization with custom double-to-string converter.
+	void WriteDouble(double d) {
+		char buffer[100];
+#if _MSC_VER
+		int ret = sprintf_s(buffer, sizeof(buffer), "%g", d);
+#else
+		int ret = snprintf(buffer, sizeof(buffer), "%g", d);
+#endif
+		RAPIDJSON_ASSERT(ret >= 1);
+		for (int i = 0; i < ret; i++)
+			stream_.Put(buffer[i]);
+	}
+
+	void WriteString(const Ch* str, SizeType length)  {
+		static const char hexDigits[] = "0123456789ABCDEF";
+		static const char escape[256] = {
+#define Z16 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
+			//0    1    2    3    4    5    6    7    8    9    A    B    C    D    E    F
+			'u', 'u', 'u', 'u', 'u', 'u', 'u', 'u', 'b', 't', 'n', 'u', 'f', 'r', 'u', 'u', // 00
+			'u', 'u', 'u', 'u', 'u', 'u', 'u', 'u', 'u', 'u', 'u', 'u', 'u', 'u', 'u', 'u', // 10
+			  0,   0, '"',   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0, // 20
+			Z16, Z16,																		// 30~4F
+			  0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,'\\',   0,   0,   0, // 50
+			Z16, Z16, Z16, Z16, Z16, Z16, Z16, Z16, Z16, Z16								// 60~FF
+#undef Z16
+		};
+
+		stream_.Put('\"');
+		for (const Ch* p = str; p != str + length; ++p) {
+			if ((sizeof(Ch) == 1 || *p < 256) && escape[(unsigned char)*p])  {
+				stream_.Put('\\');
+				stream_.Put(escape[(unsigned char)*p]);
+				if (escape[(unsigned char)*p] == 'u') {
+					stream_.Put('0');
+					stream_.Put('0');
+					stream_.Put(hexDigits[(*p) >> 4]);
+					stream_.Put(hexDigits[(*p) & 0xF]);
+				}
+			}
+			else
+				stream_.Put(*p);
+		}
+		stream_.Put('\"');
+	}
+
+	void WriteStartObject()	{ stream_.Put('{'); }
+	void WriteEndObject()	{ stream_.Put('}'); }
+	void WriteStartArray()	{ stream_.Put('['); }
+	void WriteEndArray()	{ stream_.Put(']'); }
+
+	void Prefix(Type type) {
+		(void)type;
+		if (level_stack_.GetSize() != 0) { // this value is not at root
+			Level* level = level_stack_.template Top<Level>();
+			if (level->valueCount > 0) {
+				if (level->inArray) 
+					stream_.Put(','); // add comma if it is not the first element in array
+				else  // in object
+					stream_.Put((level->valueCount % 2 == 0) ? ',' : ':');
+			}
+			if (!level->inArray && level->valueCount % 2 == 0)
+				RAPIDJSON_ASSERT(type == kStringType);  // if it's in object, then even number should be a name
+			level->valueCount++;
+		}
+		else
+			RAPIDJSON_ASSERT(type == kObjectType || type == kArrayType);
+	}
+
+	Stream& stream_;
+	internal::Stack<Allocator> level_stack_;
+
+private:
+	// Prohibit assignment for VC C4512 warning
+	Writer& operator=(const Writer& w);
+};
+
+} // namespace rapidjson
+
+#ifdef _MSC_VER
+#pragma warning(pop)
+#endif
+
+#endif // RAPIDJSON_RAPIDJSON_H_
--- a/tools/db_sanity_test.cc
+++ b/tools/db_sanity_test.cc
@@ -4,6 +4,7 @@
 //  of patent rights can be found in the PATENTS file in the same directory.

 #include <cstdio>
+#include <cstdlib>
 #include <vector>
 #include <memory>


--- a/util/arena.cc
+++ b/util/arena.cc
@@ -14,6 +14,7 @@

 namespace rocksdb {

+const size_t Arena::kInlineSize;
 const size_t Arena::kMinBlockSize = 4096;
 const size_t Arena::kMaxBlockSize = 2 << 30;
 static const int kAlignUnit = sizeof(void*);
@@ -34,6 +35,10 @@ size_t OptimizeBlockSize(size_t block_size) {
 Arena::Arena(size_t block_size) : kBlockSize(OptimizeBlockSize(block_size)) {
  assert(kBlockSize >= kMinBlockSize && kBlockSize <= kMaxBlockSize &&
         kBlockSize % kAlignUnit == 0);
+  alloc_bytes_remaining_ = sizeof(inline_block_);
+  blocks_memory_ += alloc_bytes_remaining_;
+  aligned_alloc_ptr_ = inline_block_;
+  unaligned_alloc_ptr_ = inline_block_ + alloc_bytes_remaining_;
 }

 Arena::~Arena() {
@@ -71,17 +76,17 @@ char* Arena::AllocateFallback(size_t bytes, bool aligned) {
  }
 }

-char* Arena::AllocateAligned(size_t bytes, size_t huge_page_tlb_size,
+char* Arena::AllocateAligned(size_t bytes, size_t huage_page_size,
                             Logger* logger) {
  assert((kAlignUnit & (kAlignUnit - 1)) ==
         0);  // Pointer size should be a power of 2

 #ifdef MAP_HUGETLB
-  if (huge_page_tlb_size > 0 && bytes > 0) {
+  if (huage_page_size > 0 && bytes > 0) {
    // Allocate from a huge page TBL table.
    assert(logger != nullptr);  // logger need to be passed in.
    size_t reserved_size =
-        ((bytes - 1U) / huge_page_tlb_size + 1U) * huge_page_tlb_size;
+        ((bytes - 1U) / huage_page_size + 1U) * huage_page_size;
    assert(reserved_size >= bytes);
    void* addr = mmap(nullptr, reserved_size, (PROT_READ | PROT_WRITE),
                      (MAP_PRIVATE | MAP_ANONYMOUS | MAP_HUGETLB), 0, 0);

--- a/util/arena.h
+++ b/util/arena.h
@@ -13,6 +13,7 @@

 #pragma once
 #include <cstddef>
+#include <cerrno>
 #include <vector>
 #include <assert.h>
 #include <stdint.h>
@@ -28,6 +29,7 @@ class Arena {
  Arena(const Arena&) = delete;
  void operator=(const Arena&) = delete;

+  static const size_t kInlineSize = 2048;
  static const size_t kMinBlockSize;
  static const size_t kMaxBlockSize;

@@ -36,18 +38,19 @@ class Arena {

  char* Allocate(size_t bytes);

-  // huge_page_tlb_size: if >0, allocate bytes from huge page TLB and the size
-  // of the huge page TLB. Bytes will be rounded up to multiple and 2MB and
-  // allocate huge pages through mmap anonymous option with huge page on.
-  // The extra  space allocated will be wasted. To enable it, need to reserve
-  // huge pages for it to be allocated, like:
+  // huge_page_size: if >0, will try to allocate from huage page TLB.
+  // The argument will be the size of the page size for huge page TLB. Bytes
+  // will be rounded up to multiple of the page size to allocate through mmap
+  // anonymous option with huge page on. The extra  space allocated will be
+  // wasted. If allocation fails, will fall back to normal case. To enable it,
+  // need to reserve huge pages for it to be allocated, like:
  //     sysctl -w vm.nr_hugepages=20
  // See linux doc Documentation/vm/hugetlbpage.txt for details.
  // huge page allocation can fail. In this case it will fail back to
  // normal cases. The messages will be logged to logger. So when calling with
  // huge_page_tlb_size > 0, we highly recommend a logger is passed in.
  // Otherwise, the error message will be printed out to stderr directly.
-  char* AllocateAligned(size_t bytes, size_t huge_page_tlb_size = 0,
+  char* AllocateAligned(size_t bytes, size_t huge_page_size = 0,
                        Logger* logger = nullptr);

  // Returns an estimate of the total memory usage of data allocated
@@ -69,6 +72,7 @@ class Arena {
  size_t BlockSize() const { return kBlockSize; }

 private:
+  char inline_block_[kInlineSize];
  // Number of bytes allocated in one block
  const size_t kBlockSize;
  // Array of new[] allocated memory blocks

--- a/util/arena_test.cc
+++ b/util/arena_test.cc
@@ -31,9 +31,11 @@ TEST(ArenaTest, MemoryAllocatedBytes) {
  for (int i = 0; i < N; i++) {
    arena.Allocate(req_sz);
  }
-  expected_memory_allocated = req_sz * N;
+  expected_memory_allocated = req_sz * N + Arena::kInlineSize;
  ASSERT_EQ(arena.MemoryAllocatedBytes(), expected_memory_allocated);

+  arena.Allocate(Arena::kInlineSize - 1);
+
  // requested size < quarter of a block:
  //   allocate a block with the default size, then try to use unused part
  //   of the block. So one new block will be allocated for the first
@@ -64,12 +66,19 @@ TEST(ArenaTest, ApproximateMemoryUsageTest) {
  Arena arena(kBlockSize);
  ASSERT_EQ(kZero, arena.ApproximateMemoryUsage());

+  // allocate inline bytes
+  arena.AllocateAligned(8);
+  arena.AllocateAligned(Arena::kInlineSize / 2 - 16);
+  arena.AllocateAligned(Arena::kInlineSize / 2);
+  ASSERT_EQ(arena.ApproximateMemoryUsage(), Arena::kInlineSize - 8);
+  ASSERT_EQ(arena.MemoryAllocatedBytes(), Arena::kInlineSize);
+
  auto num_blocks = kBlockSize / kEntrySize;

  // first allocation
  arena.AllocateAligned(kEntrySize);
  auto mem_usage = arena.MemoryAllocatedBytes();
-  ASSERT_EQ(mem_usage, kBlockSize);
+  ASSERT_EQ(mem_usage, kBlockSize + Arena::kInlineSize);
  auto usage = arena.ApproximateMemoryUsage();
  ASSERT_LT(usage, mem_usage);
  for (size_t i = 1; i < num_blocks; ++i) {

--- a/util/env_hdfs.cc
+++ b/util/env_hdfs.cc
@@ -18,6 +18,9 @@
 #include "hdfs/hdfs.h"
 #include "hdfs/env_hdfs.h"

+#define HDFS_EXISTS 0
+#define HDFS_DOESNT_EXIST 1
+
 //
 // This file defines an HDFS environment for rocksdb. It uses the libhdfs
 // api to access HDFS. All HDFS files created by one instance of rocksdb
@@ -39,7 +42,8 @@ static Logger* mylog = nullptr;

 // Used for reading a file from HDFS. It implements both sequential-read
 // access methods as well as random read access methods.
-class HdfsReadableFile: virtual public SequentialFile, virtual public RandomAccessFile {
+class HdfsReadableFile : virtual public SequentialFile,
+                         virtual public RandomAccessFile {
 private:
  hdfsFS fileSys_;
  std::string filename_;
@@ -73,17 +77,34 @@ class HdfsReadableFile: virtual public SequentialFile, virtual public RandomAcce
    Status s;
    Log(mylog, "[hdfs] HdfsReadableFile reading %s %ld\n",
        filename_.c_str(), n);
-    size_t bytes_read = hdfsRead(fileSys_, hfile_, scratch, (tSize)n);
-    Log(mylog, "[hdfs] HdfsReadableFile read %s\n", filename_.c_str());
-    *result = Slice(scratch, bytes_read);
-    if (bytes_read < n) {
-      if (feof()) {
-        // We leave status as ok if we hit the end of the file
-      } else {
-        // A partial read with an error: return a non-ok status
-        s = IOError(filename_, errno);
+
+    char* buffer = scratch;
+    size_t total_bytes_read = 0;
+    tSize bytes_read = 0;
+    tSize remaining_bytes = (tSize)n;
+
+    // Read a total of n bytes repeatedly until we hit error or eof
+    while (remaining_bytes > 0) {
+      bytes_read = hdfsRead(fileSys_, hfile_, buffer, remaining_bytes);
+      if (bytes_read <= 0) {
+        break;
      }
+      assert(bytes_read <= remaining_bytes);
+
+      total_bytes_read += bytes_read;
+      remaining_bytes -= bytes_read;
+      buffer += bytes_read;
+    }
+    assert(total_bytes_read <= n);
+
+    Log(mylog, "[hdfs] HdfsReadableFile read %s\n", filename_.c_str());
+
+    if (bytes_read < 0) {
+      s = IOError(filename_, errno);
+    } else {
+      *result = Slice(scratch, total_bytes_read);
    }
+
    return s;
  }

@@ -139,8 +160,7 @@ class HdfsReadableFile: virtual public SequentialFile, virtual public RandomAcce
      size = pFileInfo->mSize;
      hdfsFreeFileInfo(pFileInfo, 1);
    } else {
-      throw rocksdb::HdfsFatalException("fileSize on unknown file " +
-                                            filename_);
+      throw HdfsFatalException("fileSize on unknown file " + filename_);
    }
    return size;
  }
@@ -236,9 +256,8 @@ class HdfsLogger : public Logger {
  uint64_t (*gettid_)();  // Return the thread id for the current thread

 public:
-  HdfsLogger(HdfsWritableFile* f, uint64_t (*gettid)(),
-             const InfoLogLevel log_level = InfoLogLevel::ERROR)
-      : Logger(log_level), file_(f), gettid_(gettid) {
+  HdfsLogger(HdfsWritableFile* f, uint64_t (*gettid)())
+      : file_(f), gettid_(gettid) {
    Log(mylog, "[hdfs] HdfsLogger opened %s\n",
            file_->getName().c_str());
  }
@@ -324,40 +343,52 @@ class HdfsLogger : public Logger {

 // Finally, the hdfs environment

+const std::string HdfsEnv::kProto = "hdfs://";
+const std::string HdfsEnv::pathsep = "/";
+
 // open a file for sequential reading
 Status HdfsEnv::NewSequentialFile(const std::string& fname,
-                                 SequentialFile** result) {
+                                  unique_ptr<SequentialFile>* result,
+                                  const EnvOptions& options) {
+  result->reset();
  HdfsReadableFile* f = new HdfsReadableFile(fileSys_, fname);
-  if (f == nullptr) {
+  if (f == nullptr || !f->isValid()) {
+    delete f;
    *result = nullptr;
    return IOError(fname, errno);
  }
-  *result = dynamic_cast<SequentialFile*>(f);
+  result->reset(dynamic_cast<SequentialFile*>(f));
  return Status::OK();
 }

 // open a file for random reading
 Status HdfsEnv::NewRandomAccessFile(const std::string& fname,
-                                   RandomAccessFile** result) {
+                                    unique_ptr<RandomAccessFile>* result,
+                                    const EnvOptions& options) {
+  result->reset();
  HdfsReadableFile* f = new HdfsReadableFile(fileSys_, fname);
-  if (f == nullptr) {
+  if (f == nullptr || !f->isValid()) {
+    delete f;
    *result = nullptr;
    return IOError(fname, errno);
  }
-  *result = dynamic_cast<RandomAccessFile*>(f);
+  result->reset(dynamic_cast<RandomAccessFile*>(f));
  return Status::OK();
 }

 // create a new file for writing
 Status HdfsEnv::NewWritableFile(const std::string& fname,
-                               WritableFile** result) {
+                                unique_ptr<WritableFile>* result,
+                                const EnvOptions& options) {
+  result->reset();
  Status s;
  HdfsWritableFile* f = new HdfsWritableFile(fileSys_, fname);
  if (f == nullptr || !f->isValid()) {
+    delete f;
    *result = nullptr;
    return IOError(fname, errno);
  }
-  *result = dynamic_cast<WritableFile*>(f);
+  result->reset(dynamic_cast<WritableFile*>(f));
  return Status::OK();
 }

@@ -367,24 +398,30 @@ Status HdfsEnv::NewRandomRWFile(const std::string& fname,
  return Status::NotSupported("NewRandomRWFile not supported on HdfsEnv");
 }

-virtual Status NewDirectory(const std::string& name,
-                            unique_ptr<Directory>* result) {
-  return Status::NotSupported("NewDirectory not yet supported on HdfsEnv");
+Status HdfsEnv::NewDirectory(const std::string& name,
+                             unique_ptr<Directory>* result) {
+  return Status::NotSupported("NewDirectory not supported on HdfsEnv");
 }

 bool HdfsEnv::FileExists(const std::string& fname) {
  int value = hdfsExists(fileSys_, fname.c_str());
-  if (value == 0) {
+  switch (value) {
+    case HDFS_EXISTS:
    return true;
+    case HDFS_DOESNT_EXIST:
+      return false;
+    default:  // anything else should be an error
+      Log(mylog, "FileExists hdfsExists call failed");
+      throw HdfsFatalException("hdfsExists call failed with error " +
+                               std::to_string(value) + ".\n");
  }
-  return false;
 }

 Status HdfsEnv::GetChildren(const std::string& path,
                            std::vector<std::string>* result) {
  int value = hdfsExists(fileSys_, path.c_str());
  switch (value) {
-  case 0: {
+    case HDFS_EXISTS: {  // directory exists
    int numEntries = 0;
    hdfsFileInfo* pHdfsFileInfo = 0;
    pHdfsFileInfo = hdfsListDirectory(fileSys_, path.c_str(), &numEntries);
@@ -402,15 +439,17 @@ Status HdfsEnv::GetChildren(const std::string& path,
    } else {
      // numEntries < 0 indicates error
      Log(mylog, "hdfsListDirectory call failed with error ");
-      throw HdfsFatalException("hdfsListDirectory call failed negative error.\n");
+      throw HdfsFatalException(
+          "hdfsListDirectory call failed negative error.\n");
    }
    break;
  }
-  case 1:           // directory does not exist, exit
+  case HDFS_DOESNT_EXIST:  // directory does not exist, exit
    break;
  default:          // anything else should be an error
-    Log(mylog, "hdfsListDirectory call failed with error ");
-    throw HdfsFatalException("hdfsListDirectory call failed with error.\n");
+    Log(mylog, "GetChildren hdfsExists call failed");
+    throw HdfsFatalException("hdfsExists call failed with error " +
+                             std::to_string(value) + ".\n");
  }
  return Status::OK();
 }
@@ -432,10 +471,15 @@ Status HdfsEnv::CreateDir(const std::string& name) {
 Status HdfsEnv::CreateDirIfMissing(const std::string& name) {
  const int value = hdfsExists(fileSys_, name.c_str());
  //  Not atomic. state might change b/w hdfsExists and CreateDir.
-  if (value == 0) {
+  switch (value) {
+    case HDFS_EXISTS:
    return Status::OK();
-  } else {
+    case HDFS_DOESNT_EXIST:
    return CreateDir(name);
+    default:  // anything else should be an error
+      Log(mylog, "CreateDirIfMissing hdfsExists call failed");
+      throw HdfsFatalException("hdfsExists call failed with error " +
+                               std::to_string(value) + ".\n");
  }
 };

@@ -492,11 +536,12 @@ Status HdfsEnv::NewLogger(const std::string& fname,
                          shared_ptr<Logger>* result) {
  HdfsWritableFile* f = new HdfsWritableFile(fileSys_, fname);
  if (f == nullptr || !f->isValid()) {
+    delete f;
    *result = nullptr;
    return IOError(fname, errno);
  }
  HdfsLogger* h = new HdfsLogger(f, &HdfsEnv::gettid);
-  *result = h;
+  result->reset(h);
  if (mylog == nullptr) {
    // mylog = h; // uncomment this for detailed logging
  }

--- a/util/log_buffer.h
+++ b/util/log_buffer.h
@@ -8,6 +8,7 @@
 #include "rocksdb/env.h"
 #include "util/arena.h"
 #include "util/autovector.h"
+#include <ctime>

 namespace rocksdb {


--- a/util/logging.cc
+++ b/util/logging.cc
@@ -9,6 +9,8 @@

 #include "util/logging.h"

+#define __STDC_FORMAT_MACROS
+#include <inttypes.h>
 #include <errno.h>
 #include <stdarg.h>
 #include <stdio.h>
@@ -18,6 +20,26 @@

 namespace rocksdb {

+
+// for sizes >=10TB, print "XXTB"
+// for sizes >=10GB, print "XXGB"
+// etc.
+// append file size summary to output and return the len
+int AppendHumanBytes(uint64_t bytes, char* output, int len) {
+  const uint64_t ull10 = 10;
+  if (bytes >= ull10 << 40) {
+    return snprintf(output, len, "%" PRIu64 "TB", bytes >> 40);
+  } else if (bytes >= ull10 << 30) {
+    return snprintf(output, len, "%" PRIu64 "GB", bytes >> 30);
+  } else if (bytes >= ull10 << 20) {
+    return snprintf(output, len, "%" PRIu64 "MB", bytes >> 20);
+  } else if (bytes >= ull10 << 10) {
+    return snprintf(output, len, "%" PRIu64 "KB", bytes >> 10);
+  } else {
+    return snprintf(output, len, "%" PRIu64 "B", bytes);
+  }
+}
+
 void AppendNumberTo(std::string* str, uint64_t num) {
  char buf[30];
  snprintf(buf, sizeof(buf), "%llu", (unsigned long long) num);

--- a/util/logging.h
+++ b/util/logging.h
@@ -21,6 +21,9 @@ namespace rocksdb {
 class Slice;
 class WritableFile;

+// Append a human-readable size in bytes
+int AppendHumanBytes(uint64_t bytes, char* output, int len);
+
 // Append a human-readable printout of "num" to *str
 extern void AppendNumberTo(std::string* str, uint64_t num);