提交 606a1267 编写于 作者: R Radheshyam Balasundaram

Changing implementaiton of CuckooTableBuilder to not take file_size, key_length, value_length.

Summary:
 - Maintain a list of key-value pairs as vectors during Add operation.
 - Start building hash table only when Finish() is called.
 - This approach takes more time and space but avoids taking file_size, key and value lengths.
 - Rewrote cuckoo_table_builder_test

I did not know about IterKey while writing this diff. I shall change places where IterKey could be used instead of std::string tomorrow. Please review rest of the logic.

Test Plan:
cuckoo_table_reader_test --enable_perf
cuckoo_table_builder_test
valgrind_check
asan_check

Reviewers: sdong, igor, yhchiang, ljin

Reviewed By: ljin

Subscribers: leveldb

Differential Revision: https://reviews.facebook.net/D20907
上级 2124c85c
......@@ -8,6 +8,7 @@
#include <assert.h>
#include <algorithm>
#include <limits>
#include <string>
#include <vector>
......@@ -36,25 +37,18 @@ const std::string CuckooTablePropertyNames::kIsLastLevel =
extern const uint64_t kCuckooTableMagicNumber = 0x926789d0c5f17873ull;
CuckooTableBuilder::CuckooTableBuilder(
WritableFile* file, uint32_t fixed_key_length,
uint32_t fixed_value_length, double hash_table_ratio,
uint64_t file_size, uint32_t max_num_hash_table,
uint32_t max_search_depth, bool is_last_level,
uint64_t (*GetSliceHashPtr)(const Slice&, uint32_t, uint64_t))
WritableFile* file, double hash_table_ratio,
uint32_t max_num_hash_table, uint32_t max_search_depth,
uint64_t (*get_slice_hash)(const Slice&, uint32_t, uint64_t))
: num_hash_table_(2),
file_(file),
value_length_(fixed_value_length),
// 8 is the difference between sizes of user key and InternalKey.
bucket_size_(fixed_key_length +
fixed_value_length - (is_last_level ? 8 : 0)),
hash_table_ratio_(hash_table_ratio),
max_num_buckets_(file_size / bucket_size_),
max_num_hash_table_(max_num_hash_table),
max_search_depth_(max_search_depth),
is_last_level_file_(is_last_level),
buckets_(max_num_buckets_),
make_space_for_key_call_id_(0),
GetSliceHash(GetSliceHashPtr) {
is_last_level_file_(false),
has_seen_first_key_(false),
get_slice_hash_(get_slice_hash),
closed_(false) {
properties_.num_entries = 0;
// Data is in a huge block.
properties_.num_data_blocks = 1;
......@@ -62,105 +56,120 @@ CuckooTableBuilder::CuckooTableBuilder(
properties_.filter_size = 0;
}
CuckooTableBuilder::~CuckooTableBuilder() {
}
void CuckooTableBuilder::Add(const Slice& key, const Slice& value) {
if (NumEntries() == max_num_buckets_) {
status_ = Status::Corruption("Hash Table is full.");
if (properties_.num_entries >= kMaxVectorIdx - 1) {
status_ = Status::NotSupported("Number of keys in a file must be < 2^32-1");
return;
}
uint64_t bucket_id;
bool bucket_found = false;
autovector<uint64_t> hash_vals;
ParsedInternalKey ikey;
if (!ParseInternalKey(key, &ikey)) {
status_ = Status::Corruption("Unable to parse key into inernal key.");
return;
}
Slice user_key = ikey.user_key;
for (uint32_t hash_cnt = 0; hash_cnt < num_hash_table_; ++hash_cnt) {
uint64_t hash_val = GetSliceHash(user_key, hash_cnt, max_num_buckets_);
if (buckets_[hash_val].is_empty) {
bucket_id = hash_val;
bucket_found = true;
break;
} else {
if (user_key.compare(
is_last_level_file_ ? Slice(buckets_[hash_val].key)
: ExtractUserKey(Slice(buckets_[hash_val].key))) == 0) {
status_ = Status::Corruption("Same key is being inserted again.");
return;
}
hash_vals.push_back(hash_val);
}
}
while (!bucket_found && !MakeSpaceForKey(key, &bucket_id, hash_vals)) {
// Rehash by increashing number of hash tables.
if (num_hash_table_ >= max_num_hash_table_) {
status_ = Status::Corruption("Too many collissions. Unable to hash.");
return;
}
// We don't really need to rehash the entire table because old hashes are
// still valid and we only increased the number of hash functions.
uint64_t hash_val = GetSliceHash(user_key,
num_hash_table_, max_num_buckets_);
++num_hash_table_;
if (buckets_[hash_val].is_empty) {
bucket_found = true;
bucket_id = hash_val;
break;
} else {
hash_vals.push_back(hash_val);
}
// Determine if we can ignore the sequence number and value type from
// internal keys by looking at sequence number from first key. We assume
// that if first key has a zero sequence number, then all the remaining
// keys will have zero seq. no.
if (!has_seen_first_key_) {
is_last_level_file_ = ikey.sequence == 0;
has_seen_first_key_ = true;
}
// Even if one sequence number is non-zero, then it is not last level.
assert(!is_last_level_file_ || ikey.sequence == 0);
if (is_last_level_file_) {
buckets_[bucket_id].key.assign(user_key.data(), user_key.size());
kvs_.emplace_back(std::make_pair(
ikey.user_key.ToString(), value.ToString()));
} else {
buckets_[bucket_id].key.assign(key.data(), key.size());
kvs_.emplace_back(std::make_pair(key.ToString(), value.ToString()));
}
buckets_[bucket_id].value.assign(value.data(), value.size());
buckets_[bucket_id].is_empty = false;
properties_.num_entries++;
// We assume that the keys are inserted in sorted order. To identify an
// unused key, which will be used in filling empty buckets in the table,
// we try to find gaps between successive keys inserted. This is done by
// maintaining the previous key and comparing it with next key.
if (unused_user_key_.empty()) {
if (prev_key_.empty()) {
prev_key_ = user_key.ToString();
return;
}
std::string new_user_key = prev_key_;
// We assume that the keys are inserted in sorted order as determined by
// Byte-wise comparator. To identify an unused key, which will be used in
// filling empty buckets in the table, we try to find gaps between successive
// keys inserted (ie, latest key and previous in kvs_).
if (unused_user_key_.empty() && kvs_.size() > 1) {
std::string prev_key = is_last_level_file_ ? kvs_[kvs_.size()-1].first
: ExtractUserKey(kvs_[kvs_.size()-1].first).ToString();
std::string new_user_key = prev_key;
new_user_key.back()++;
// We ignore carry-overs and check that it is larger than previous key.
if ((new_user_key > prev_key_) &&
(new_user_key < user_key.ToString())) {
if (Slice(new_user_key).compare(Slice(prev_key)) > 0 &&
Slice(new_user_key).compare(ikey.user_key) < 0) {
unused_user_key_ = new_user_key;
} else {
prev_key_ = user_key.ToString();
}
}
}
Status CuckooTableBuilder::status() const { return status_; }
Status CuckooTableBuilder::MakeHashTable(std::vector<CuckooBucket>* buckets) {
uint64_t num_buckets = kvs_.size() / hash_table_ratio_;
buckets->resize(num_buckets);
uint64_t make_space_for_key_call_id = 0;
for (uint32_t vector_idx = 0; vector_idx < kvs_.size(); vector_idx++) {
uint64_t bucket_id;
bool bucket_found = false;
autovector<uint64_t> hash_vals;
Slice user_key = is_last_level_file_ ? kvs_[vector_idx].first :
ExtractUserKey(kvs_[vector_idx].first);
for (uint32_t hash_cnt = 0; hash_cnt < num_hash_table_; ++hash_cnt) {
uint64_t hash_val = get_slice_hash_(user_key, hash_cnt, num_buckets);
if ((*buckets)[hash_val].vector_idx == kMaxVectorIdx) {
bucket_id = hash_val;
bucket_found = true;
break;
} else {
if (user_key.compare(is_last_level_file_
? Slice(kvs_[(*buckets)[hash_val].vector_idx].first)
: ExtractUserKey(
kvs_[(*buckets)[hash_val].vector_idx].first)) == 0) {
return Status::NotSupported("Same key is being inserted again.");
}
hash_vals.push_back(hash_val);
}
}
while (!bucket_found && !MakeSpaceForKey(hash_vals,
++make_space_for_key_call_id, buckets, &bucket_id)) {
// Rehash by increashing number of hash tables.
if (num_hash_table_ >= max_num_hash_table_) {
return Status::NotSupported("Too many collissions. Unable to hash.");
}
// We don't really need to rehash the entire table because old hashes are
// still valid and we only increased the number of hash functions.
uint64_t hash_val = get_slice_hash_(user_key,
num_hash_table_, num_buckets);
++num_hash_table_;
if ((*buckets)[hash_val].vector_idx == kMaxVectorIdx) {
bucket_found = true;
bucket_id = hash_val;
break;
} else {
hash_vals.push_back(hash_val);
}
}
(*buckets)[bucket_id].vector_idx = vector_idx;
}
return Status::OK();
}
Status CuckooTableBuilder::Finish() {
assert(!closed_);
closed_ = true;
if (unused_user_key_.empty()) {
if (prev_key_.empty()) {
return Status::Corruption("Unable to find unused key");
}
// Try to find the key next to prev_key_ by handling carryovers.
std::string new_user_key = prev_key_;
std::vector<CuckooBucket> buckets;
Status s = MakeHashTable(&buckets);
if (!s.ok()) {
return s;
}
if (unused_user_key_.empty() && !kvs_.empty()) {
// Try to find the key next to last key by handling carryovers.
std::string last_key =
is_last_level_file_ ? kvs_[kvs_.size()-1].first
: ExtractUserKey(kvs_[kvs_.size()-1].first).ToString();
std::string new_user_key = last_key;
int curr_pos = new_user_key.size() - 1;
while (curr_pos >= 0) {
++new_user_key[curr_pos];
if (new_user_key > prev_key_) {
if (new_user_key > last_key) {
unused_user_key_ = new_user_key;
break;
}
......@@ -171,29 +180,32 @@ Status CuckooTableBuilder::Finish() {
}
}
std::string unused_bucket;
if (is_last_level_file_) {
unused_bucket = unused_user_key_;
} else {
ParsedInternalKey ikey(unused_user_key_, 0, kTypeValue);
AppendInternalKey(&unused_bucket, ikey);
if (!kvs_.empty()) {
if (is_last_level_file_) {
unused_bucket = unused_user_key_;
} else {
ParsedInternalKey ikey(unused_user_key_, 0, kTypeValue);
AppendInternalKey(&unused_bucket, ikey);
}
}
properties_.fixed_key_len = unused_bucket.size();
uint32_t value_length = kvs_.empty() ? 0 : kvs_[0].second.size();
uint32_t bucket_size = value_length + properties_.fixed_key_len;
properties_.user_collected_properties[
CuckooTablePropertyNames::kValueLength].assign(
reinterpret_cast<const char*>(&value_length_), sizeof(value_length_));
reinterpret_cast<const char*>(&value_length), sizeof(value_length));
unused_bucket.resize(bucket_size_, 'a');
unused_bucket.resize(bucket_size, 'a');
// Write the table.
uint32_t num_added = 0;
for (auto& bucket : buckets_) {
Status s;
if (bucket.is_empty) {
for (auto& bucket : buckets) {
if (bucket.vector_idx == kMaxVectorIdx) {
s = file_->Append(Slice(unused_bucket));
} else {
++num_added;
s = file_->Append(Slice(bucket.key));
s = file_->Append(kvs_[bucket.vector_idx].first);
if (s.ok()) {
s = file_->Append(Slice(bucket.value));
s = file_->Append(kvs_[bucket.vector_idx].second);
}
}
if (!s.ok()) {
......@@ -202,17 +214,17 @@ Status CuckooTableBuilder::Finish() {
}
assert(num_added == NumEntries());
uint64_t offset = buckets_.size() * bucket_size_;
uint64_t offset = buckets.size() * bucket_size;
unused_bucket.resize(properties_.fixed_key_len);
properties_.user_collected_properties[
CuckooTablePropertyNames::kEmptyKey] = unused_bucket;
properties_.user_collected_properties[
CuckooTablePropertyNames::kNumHashTable].assign(
reinterpret_cast<char*>(&num_hash_table_), sizeof(num_hash_table_));
uint64_t num_buckets = buckets.size();
properties_.user_collected_properties[
CuckooTablePropertyNames::kMaxNumBuckets].assign(
reinterpret_cast<const char*>(&max_num_buckets_),
sizeof(max_num_buckets_));
reinterpret_cast<const char*>(&num_buckets), sizeof(num_buckets));
properties_.user_collected_properties[
CuckooTablePropertyNames::kIsLastLevel].assign(
reinterpret_cast<const char*>(&is_last_level_file_),
......@@ -228,7 +240,7 @@ Status CuckooTableBuilder::Finish() {
BlockHandle property_block_handle;
property_block_handle.set_offset(offset);
property_block_handle.set_size(property_block.size());
Status s = file_->Append(property_block);
s = file_->Append(property_block);
offset += property_block.size();
if (!s.ok()) {
return s;
......@@ -266,12 +278,14 @@ uint64_t CuckooTableBuilder::NumEntries() const {
uint64_t CuckooTableBuilder::FileSize() const {
if (closed_) {
return file_->GetFileSize();
} else {
// This is not the actual size of the file as we need to account for
// hash table ratio. This returns the size of filled buckets in the table
// scaled up by a factor of 1/hash table ratio.
return (properties_.num_entries * bucket_size_) / hash_table_ratio_;
} else if (properties_.num_entries == 0) {
return 0;
}
// This is not the actual size of the file as we need to account for
// hash table ratio. This returns the size of filled buckets in the table
// scaled up by a factor of 1/hash_table_ratio.
return ((kvs_[0].first.size() + kvs_[0].second.size()) *
properties_.num_entries) / hash_table_ratio_;
}
// This method is invoked when there is no place to insert the target key.
......@@ -284,8 +298,11 @@ uint64_t CuckooTableBuilder::FileSize() const {
// move all elements along the path from first level to this empty bucket, to
// make space for target key which is inserted at first level (*bucket_id).
// If tree depth exceedes max depth, we return false indicating failure.
bool CuckooTableBuilder::MakeSpaceForKey(const Slice& key,
uint64_t *bucket_id, autovector<uint64_t> hash_vals) {
bool CuckooTableBuilder::MakeSpaceForKey(
const autovector<uint64_t>& hash_vals,
const uint64_t make_space_for_key_call_id,
std::vector<CuckooBucket>* buckets,
uint64_t* bucket_id) {
struct CuckooNode {
uint64_t bucket_id;
uint32_t depth;
......@@ -302,13 +319,11 @@ bool CuckooTableBuilder::MakeSpaceForKey(const Slice& key,
// unique id for this invocation of the method. We store this number into
// the nodes that we explore in current method call.
// It is unlikely for the increment operation to overflow because the maximum
// number of times this will be called is <= max_num_hash_table_ +
// max_num_buckets_.
++make_space_for_key_call_id_;
// no. of times this will be called is <= max_num_hash_table_ + kvs_.size().
for (uint32_t hash_cnt = 0; hash_cnt < num_hash_table_; ++hash_cnt) {
uint64_t bucket_id = hash_vals[hash_cnt];
buckets_[bucket_id].make_space_for_key_call_id =
make_space_for_key_call_id_;
(*buckets)[bucket_id].make_space_for_key_call_id =
make_space_for_key_call_id;
tree.push_back(CuckooNode(bucket_id, 0, 0));
}
bool null_found = false;
......@@ -318,21 +333,21 @@ bool CuckooTableBuilder::MakeSpaceForKey(const Slice& key,
if (curr_node.depth >= max_search_depth_) {
break;
}
CuckooBucket& curr_bucket = buckets_[curr_node.bucket_id];
CuckooBucket& curr_bucket = (*buckets)[curr_node.bucket_id];
for (uint32_t hash_cnt = 0; hash_cnt < num_hash_table_; ++hash_cnt) {
uint64_t child_bucket_id = GetSliceHash(
is_last_level_file_ ? curr_bucket.key
: ExtractUserKey(Slice(curr_bucket.key)),
hash_cnt, max_num_buckets_);
if (buckets_[child_bucket_id].make_space_for_key_call_id ==
make_space_for_key_call_id_) {
uint64_t child_bucket_id = get_slice_hash_(
is_last_level_file_ ? kvs_[curr_bucket.vector_idx].first
: ExtractUserKey(Slice(kvs_[curr_bucket.vector_idx].first)),
hash_cnt, buckets->size());
if ((*buckets)[child_bucket_id].make_space_for_key_call_id ==
make_space_for_key_call_id) {
continue;
}
buckets_[child_bucket_id].make_space_for_key_call_id =
make_space_for_key_call_id_;
(*buckets)[child_bucket_id].make_space_for_key_call_id =
make_space_for_key_call_id;
tree.push_back(CuckooNode(child_bucket_id, curr_node.depth + 1,
curr_pos));
if (buckets_[child_bucket_id].is_empty) {
if ((*buckets)[child_bucket_id].vector_idx == kMaxVectorIdx) {
null_found = true;
break;
}
......@@ -349,8 +364,8 @@ bool CuckooTableBuilder::MakeSpaceForKey(const Slice& key,
uint32_t bucket_to_replace_pos = tree.size()-1;
while (bucket_to_replace_pos >= num_hash_table_) {
CuckooNode& curr_node = tree[bucket_to_replace_pos];
buckets_[curr_node.bucket_id] =
buckets_[tree[curr_node.parent_pos].bucket_id];
(*buckets)[curr_node.bucket_id] =
(*buckets)[tree[curr_node.parent_pos].bucket_id];
bucket_to_replace_pos = curr_node.parent_pos;
}
*bucket_id = tree[bucket_to_replace_pos].bucket_id;
......
......@@ -6,7 +6,9 @@
#pragma once
#ifndef ROCKSDB_LITE
#include <stdint.h>
#include <limits>
#include <string>
#include <utility>
#include <vector>
#include "rocksdb/status.h"
#include "table/table_builder.h"
......@@ -19,14 +21,12 @@ namespace rocksdb {
class CuckooTableBuilder: public TableBuilder {
public:
CuckooTableBuilder(
WritableFile* file, uint32_t fixed_key_length,
uint32_t fixed_value_length, double hash_table_ratio,
uint64_t file_size, uint32_t max_num_hash_table,
uint32_t max_search_depth, bool is_last_level,
uint64_t (*GetSliceHash)(const Slice&, uint32_t, uint64_t));
WritableFile* file, double hash_table_ratio, uint32_t max_num_hash_table,
uint32_t max_search_depth,
uint64_t (*get_slice_hash)(const Slice&, uint32_t, uint64_t));
// REQUIRES: Either Finish() or Abandon() has been called.
~CuckooTableBuilder();
~CuckooTableBuilder() {}
// Add key,value to the table being constructed.
// REQUIRES: key is after any previously added key according to comparator.
......@@ -34,7 +34,7 @@ class CuckooTableBuilder: public TableBuilder {
void Add(const Slice& key, const Slice& value) override;
// Return non-ok iff some error has been detected.
Status status() const override;
Status status() const override { return status_; }
// Finish building the table. Stops using the file passed to the
// constructor after this function returns.
......@@ -57,35 +57,37 @@ class CuckooTableBuilder: public TableBuilder {
private:
struct CuckooBucket {
CuckooBucket(): is_empty(true), make_space_for_key_call_id(0) {}
std::string key;
std::string value;
bool is_empty;
uint64_t make_space_for_key_call_id;
CuckooBucket()
: vector_idx(kMaxVectorIdx), make_space_for_key_call_id(0) {}
uint32_t vector_idx;
// This number will not exceed kvs_.size() + max_num_hash_table_.
// We assume number of items is <= 2^32.
uint32_t make_space_for_key_call_id;
};
static const uint32_t kMaxVectorIdx = std::numeric_limits<int32_t>::max();
bool MakeSpaceForKey(const Slice& key, uint64_t* bucket_id,
autovector<uint64_t> hash_vals);
bool MakeSpaceForKey(
const autovector<uint64_t>& hash_vals,
const uint64_t call_id,
std::vector<CuckooBucket>* buckets,
uint64_t* bucket_id);
Status MakeHashTable(std::vector<CuckooBucket>* buckets);
uint32_t num_hash_table_;
WritableFile* file_;
const uint32_t value_length_;
const uint32_t bucket_size_;
const double hash_table_ratio_;
const uint64_t max_num_buckets_;
const uint32_t max_num_hash_table_;
const uint32_t max_search_depth_;
const bool is_last_level_file_;
bool is_last_level_file_;
Status status_;
std::vector<CuckooBucket> buckets_;
std::vector<std::pair<std::string, std::string>> kvs_;
TableProperties properties_;
uint64_t make_space_for_key_call_id_;
uint64_t (*GetSliceHash)(const Slice& s, uint32_t index,
bool has_seen_first_key_;
uint64_t (*get_slice_hash_)(const Slice& s, uint32_t index,
uint64_t max_num_buckets);
std::string unused_user_key_ = "";
std::string prev_key_;
bool closed_ = false; // Either Finish() or Abandon() has been called.
bool closed_; // Either Finish() or Abandon() has been called.
// No copying allowed
CuckooTableBuilder(const CuckooTableBuilder&) = delete;
......
此差异已折叠。
......@@ -28,9 +28,9 @@ CuckooTableReader::CuckooTableReader(
const Options& options,
std::unique_ptr<RandomAccessFile>&& file,
uint64_t file_size,
uint64_t (*GetSliceHashPtr)(const Slice&, uint32_t, uint64_t))
uint64_t (*get_slice_hash)(const Slice&, uint32_t, uint64_t))
: file_(std::move(file)),
GetSliceHash(GetSliceHashPtr) {
get_slice_hash_(get_slice_hash) {
if (!options.allow_mmap_reads) {
status_ = Status::InvalidArgument("File is not mmaped");
}
......@@ -90,7 +90,7 @@ Status CuckooTableReader::Get(
return Status::Corruption("Unable to parse key into inernal key.");
}
for (uint32_t hash_cnt = 0; hash_cnt < num_hash_fun_; ++hash_cnt) {
uint64_t hash_val = GetSliceHash(ikey.user_key, hash_cnt, num_buckets_);
uint64_t hash_val = get_slice_hash_(ikey.user_key, hash_cnt, num_buckets_);
assert(hash_val < num_buckets_);
uint64_t offset = hash_val * bucket_length_;
const char* bucket = &file_data_.data()[offset];
......
......@@ -29,7 +29,7 @@ class CuckooTableReader: public TableReader {
const Options& options,
std::unique_ptr<RandomAccessFile>&& file,
uint64_t file_size,
uint64_t (*GetSliceHash)(const Slice&, uint32_t, uint64_t));
uint64_t (*get_slice_hash)(const Slice&, uint32_t, uint64_t));
~CuckooTableReader() {}
std::shared_ptr<const TableProperties> GetTableProperties() const override {
......@@ -67,7 +67,7 @@ class CuckooTableReader: public TableReader {
uint32_t value_length_;
uint32_t bucket_length_;
uint64_t num_buckets_;
uint64_t (*GetSliceHash)(const Slice& s, uint32_t index,
uint64_t (*get_slice_hash_)(const Slice& s, uint32_t index,
uint64_t max_num_buckets);
};
......
......@@ -101,12 +101,11 @@ class CuckooReaderTest {
return std::string(reinterpret_cast<char*>(&i), sizeof(i));
}
void CreateCuckooFile(bool is_last_level) {
void CreateCuckooFileAndCheckReader() {
unique_ptr<WritableFile> writable_file;
ASSERT_OK(env->NewWritableFile(fname, &writable_file, env_options));
CuckooTableBuilder builder(
writable_file.get(), keys[0].size(), values[0].size(), 0.9,
10000, kNumHashFunc, 100, is_last_level, GetSliceHash);
writable_file.get(), 0.9, kNumHashFunc, 100, GetSliceHash);
ASSERT_OK(builder.status());
for (uint32_t key_idx = 0; key_idx < num_items; ++key_idx) {
builder.Add(Slice(keys[key_idx]), Slice(values[key_idx]));
......@@ -117,9 +116,8 @@ class CuckooReaderTest {
ASSERT_EQ(num_items, builder.NumEntries());
file_size = builder.FileSize();
ASSERT_OK(writable_file->Close());
}
void CheckReader() {
// Check reader now.
unique_ptr<RandomAccessFile> read_file;
ASSERT_OK(env->NewRandomAccessFile(fname, &read_file, env_options));
CuckooTableReader reader(
......@@ -135,6 +133,14 @@ class CuckooReaderTest {
ASSERT_EQ(1, v.call_count);
}
}
void UpdateKeys(bool with_zero_seqno) {
for (uint32_t i = 0; i < num_items; i++) {
ParsedInternalKey ikey(user_keys[i],
with_zero_seqno ? 0 : i + 1000, kTypeValue);
keys[i].clear();
AppendInternalKey(&keys[i], ikey);
}
}
void CheckIterator() {
unique_ptr<RandomAccessFile> read_file;
......@@ -216,23 +222,22 @@ TEST(CuckooReaderTest, WhenKeyExists) {
AppendInternalKey(&keys[i], ikey);
values[i] = "value" + NumToStr(i);
// Give disjoint hash values.
AddHashLookups(user_keys[i], i * kNumHashFunc, kNumHashFunc);
AddHashLookups(user_keys[i], i, kNumHashFunc);
}
CreateCuckooFile(false);
CheckReader();
CreateCuckooFileAndCheckReader();
// Last level file.
CreateCuckooFile(true);
CheckReader();
UpdateKeys(true);
CreateCuckooFileAndCheckReader();
// Test with collision. Make all hash values collide.
hash_map.clear();
for (uint32_t i = 0; i < num_items; i++) {
AddHashLookups(user_keys[i], 0, kNumHashFunc);
}
CreateCuckooFile(false);
CheckReader();
UpdateKeys(false);
CreateCuckooFileAndCheckReader();
// Last level file.
CreateCuckooFile(true);
CheckReader();
UpdateKeys(true);
CreateCuckooFileAndCheckReader();
}
TEST(CuckooReaderTest, CheckIterator) {
......@@ -244,18 +249,19 @@ TEST(CuckooReaderTest, CheckIterator) {
AppendInternalKey(&keys[i], ikey);
values[i] = "value" + NumToStr(i);
// Give disjoint hash values, in reverse order.
AddHashLookups(user_keys[i], (num_items-i-1)*kNumHashFunc, kNumHashFunc);
AddHashLookups(user_keys[i], num_items-i-1, kNumHashFunc);
}
CreateCuckooFile(false);
CreateCuckooFileAndCheckReader();
CheckIterator();
// Last level file.
CreateCuckooFile(true);
UpdateKeys(true);
CreateCuckooFileAndCheckReader();
CheckIterator();
}
TEST(CuckooReaderTest, WhenKeyNotFound) {
// Add keys with colliding hash values.
SetUp(kNumHashFunc / 2);
SetUp(kNumHashFunc);
fname = test::TmpDir() + "/CuckooReader_WhenKeyNotFound";
for (uint64_t i = 0; i < num_items; i++) {
user_keys[i] = "key" + NumToStr(i);
......@@ -265,8 +271,7 @@ TEST(CuckooReaderTest, WhenKeyNotFound) {
// Make all hash values collide.
AddHashLookups(user_keys[i], 0, kNumHashFunc);
}
CreateCuckooFile(false);
CheckReader();
CreateCuckooFileAndCheckReader();
unique_ptr<RandomAccessFile> read_file;
ASSERT_OK(env->NewRandomAccessFile(fname, &read_file, env_options));
CuckooTableReader reader(
......@@ -351,20 +356,17 @@ void BM_CuckooRead(uint64_t num, uint32_t key_length,
}
std::string fname = FLAGS_file_dir + "/cuckoo_read_benchmark";
uint64_t predicted_file_size =
num * (key_length + value_length) / hash_ratio + 1024;
unique_ptr<WritableFile> writable_file;
ASSERT_OK(env->NewWritableFile(fname, &writable_file, env_options));
CuckooTableBuilder builder(
writable_file.get(), key_length + 8, value_length, hash_ratio,
predicted_file_size, kMaxNumHashTable, 1000, true, GetSliceMurmurHash);
writable_file.get(), hash_ratio,
kMaxNumHashTable, 1000, GetSliceMurmurHash);
ASSERT_OK(builder.status());
for (uint64_t key_idx = 0; key_idx < num; ++key_idx) {
// Value is just a part of key.
std::string new_key(reinterpret_cast<char*>(&key_idx), sizeof(key_idx));
new_key = std::string(key_length - new_key.size(), 'k') + new_key;
ParsedInternalKey ikey(new_key, num, kTypeValue);
ParsedInternalKey ikey(new_key, 0, kTypeValue);
std::string full_key;
AppendInternalKey(&full_key, ikey);
builder.Add(Slice(full_key), Slice(&full_key[0], value_length));
......
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册