提交 1fb2e274 编写于 作者: S Siying Dong 提交者: Facebook Github Bot

Remove some components (#4101)

Summary:
Remove some components that we never heard people using them.
Pull Request resolved: https://github.com/facebook/rocksdb/pull/4101

Differential Revision: D8825431

Pulled By: siying

fbshipit-source-id: 97a12ad3cad4ab12c82741a5ba49669aaa854180
上级 d56ac22b
......@@ -643,18 +643,10 @@ set(SOURCES
utilities/cassandra/format.cc
utilities/cassandra/merge_operator.cc
utilities/checkpoint/checkpoint_impl.cc
utilities/col_buf_decoder.cc
utilities/col_buf_encoder.cc
utilities/column_aware_encoding_util.cc
utilities/compaction_filters/remove_emptyvalue_compactionfilter.cc
utilities/date_tiered/date_tiered_db_impl.cc
utilities/debug.cc
utilities/document/document_db.cc
utilities/document/json_document.cc
utilities/document/json_document_builder.cc
utilities/env_mirror.cc
utilities/env_timed.cc
utilities/geodb/geodb_impl.cc
utilities/leveldb_options/leveldb_options.cc
utilities/lua/rocks_lua_compaction_filter.cc
utilities/memory/memory_util.cc
......@@ -671,9 +663,7 @@ set(SOURCES
utilities/persistent_cache/block_cache_tier_metadata.cc
utilities/persistent_cache/persistent_cache_tier.cc
utilities/persistent_cache/volatile_tier_impl.cc
utilities/redis/redis_lists.cc
utilities/simulator_cache/sim_cache.cc
utilities/spatialdb/spatial_db.cc
utilities/table_properties_collectors/compact_on_deletion_collector.cc
utilities/trace/file_trace_reader_writer.cc
utilities/transactions/optimistic_transaction_db_impl.cc
......@@ -975,11 +965,6 @@ if(WITH_TESTS)
utilities/cassandra/cassandra_row_merge_test.cc
utilities/cassandra/cassandra_serialize_test.cc
utilities/checkpoint/checkpoint_test.cc
utilities/column_aware_encoding_test.cc
utilities/date_tiered/date_tiered_test.cc
utilities/document/document_db_test.cc
utilities/document/json_document_test.cc
utilities/geodb/geodb_test.cc
utilities/lua/rocks_lua_test.cc
utilities/memory/memory_test.cc
utilities/merge_operators/string_append/stringappend_test.cc
......@@ -988,8 +973,6 @@ if(WITH_TESTS)
utilities/options/options_util_test.cc
utilities/persistent_cache/hash_table_test.cc
utilities/persistent_cache/persistent_cache_test.cc
utilities/redis/redis_lists_test.cc
utilities/spatialdb/spatial_db_test.cc
utilities/simulator_cache/sim_cache_test.cc
utilities/table_properties_collectors/compact_on_deletion_collector_test.cc
utilities/transactions/optimistic_transaction_test.cc
......@@ -1009,7 +992,6 @@ if(WITH_TESTS)
db/range_del_aggregator_bench.cc
tools/db_bench.cc
table/table_reader_bench.cc
utilities/column_aware_encoding_exp.cc
utilities/persistent_cache/hash_table_bench.cc)
add_library(testharness OBJECT util/testharness.cc)
foreach(sourcefile ${BENCHMARKS})
......
......@@ -8,6 +8,7 @@
### Public API Change
* Transaction::GetForUpdate is extended with a do_validate parameter with default value of true. If false it skips validating the snapshot before doing the read. Similarly ::Merge, ::Put, ::Delete, and ::SingleDelete are extended with assume_tracked with default value of false. If true it indicates that call is assumed to be after a ::GetForUpdate.
* `TableProperties::num_entries` and `TableProperties::num_deletions` now also account for number of range tombstones.
* Remove geodb, spatial_db, document_db, json_document, date_tiered_db, and redis_lists.
### Bug Fixes
* Fix a deadlock caused by compaction and file ingestion waiting for each other in the event of write stalls.
......
......@@ -403,7 +403,7 @@ BENCHTOOLOBJECTS = $(BENCH_LIB_SOURCES:.cc=.o) $(LIBOBJECTS) $(TESTUTIL)
ANALYZETOOLOBJECTS = $(ANALYZER_LIB_SOURCES:.cc=.o)
EXPOBJECTS = $(EXP_LIB_SOURCES:.cc=.o) $(LIBOBJECTS) $(TESTUTIL)
EXPOBJECTS = $(LIBOBJECTS) $(TESTUTIL)
TESTS = \
db_basic_test \
......@@ -482,7 +482,6 @@ TESTS = \
merger_test \
util_merge_operators_test \
options_file_test \
redis_test \
reduce_levels_test \
plain_table_db_test \
comparator_db_test \
......@@ -496,12 +495,8 @@ TESTS = \
cassandra_row_merge_test \
cassandra_serialize_test \
ttl_test \
date_tiered_test \
backupable_db_test \
document_db_test \
json_document_test \
sim_cache_test \
spatial_db_test \
version_edit_test \
version_set_test \
compaction_picker_test \
......@@ -513,7 +508,6 @@ TESTS = \
deletefile_test \
obsolete_files_test \
table_test \
geodb_test \
delete_scheduler_test \
options_test \
options_settable_test \
......@@ -530,7 +524,6 @@ TESTS = \
compaction_job_test \
thread_list_test \
sst_dump_test \
column_aware_encoding_test \
compact_files_test \
optimistic_transaction_test \
write_callback_test \
......@@ -604,7 +597,7 @@ TEST_LIBS = \
librocksdb_env_basic_test.a
# TODO: add back forward_iterator_bench, after making it build in all environemnts.
BENCHMARKS = db_bench table_reader_bench cache_bench memtablerep_bench column_aware_encoding_exp persistent_cache_bench range_del_aggregator_bench
BENCHMARKS = db_bench table_reader_bench cache_bench memtablerep_bench persistent_cache_bench range_del_aggregator_bench
# if user didn't config LIBNAME, set the default
ifeq ($(LIBNAME),)
......@@ -1153,9 +1146,6 @@ cassandra_row_merge_test: utilities/cassandra/cassandra_row_merge_test.o utiliti
cassandra_serialize_test: utilities/cassandra/cassandra_serialize_test.o $(LIBOBJECTS) $(TESTHARNESS)
$(AM_LINK)
redis_test: utilities/redis/redis_lists_test.o $(LIBOBJECTS) $(TESTHARNESS)
$(AM_LINK)
hash_table_test: utilities/persistent_cache/hash_table_test.o $(LIBOBJECTS) $(TESTHARNESS)
$(AM_LINK)
......@@ -1294,18 +1284,9 @@ backupable_db_test: utilities/backupable/backupable_db_test.o $(LIBOBJECTS) $(TE
checkpoint_test: utilities/checkpoint/checkpoint_test.o $(LIBOBJECTS) $(TESTHARNESS)
$(AM_LINK)
document_db_test: utilities/document/document_db_test.o $(LIBOBJECTS) $(TESTHARNESS)
$(AM_LINK)
json_document_test: utilities/document/json_document_test.o $(LIBOBJECTS) $(TESTHARNESS)
$(AM_LINK)
sim_cache_test: utilities/simulator_cache/sim_cache_test.o db/db_test_util.o $(LIBOBJECTS) $(TESTHARNESS)
$(AM_LINK)
spatial_db_test: utilities/spatialdb/spatial_db_test.o $(LIBOBJECTS) $(TESTHARNESS)
$(AM_LINK)
env_mirror_test: utilities/env_mirror_test.o $(LIBOBJECTS) $(TESTHARNESS)
$(AM_LINK)
......@@ -1323,9 +1304,6 @@ object_registry_test: utilities/object_registry_test.o $(LIBOBJECTS) $(TESTHARNE
ttl_test: utilities/ttl/ttl_test.o $(LIBOBJECTS) $(TESTHARNESS)
$(AM_LINK)
date_tiered_test: utilities/date_tiered/date_tiered_test.o $(LIBOBJECTS) $(TESTHARNESS)
$(AM_LINK)
write_batch_with_index_test: utilities/write_batch_with_index/write_batch_with_index_test.o $(LIBOBJECTS) $(TESTHARNESS)
$(AM_LINK)
......@@ -1452,9 +1430,6 @@ deletefile_test: db/deletefile_test.o $(LIBOBJECTS) $(TESTHARNESS)
obsolete_files_test: db/obsolete_files_test.o $(LIBOBJECTS) $(TESTHARNESS)
$(AM_LINK)
geodb_test: utilities/geodb/geodb_test.o $(LIBOBJECTS) $(TESTHARNESS)
$(AM_LINK)
rocksdb_dump: tools/dump/rocksdb_dump.o $(LIBOBJECTS)
$(AM_LINK)
......@@ -1503,9 +1478,6 @@ timer_queue_test: util/timer_queue_test.o $(LIBOBJECTS) $(TESTHARNESS)
sst_dump_test: tools/sst_dump_test.o $(LIBOBJECTS) $(TESTHARNESS)
$(AM_LINK)
column_aware_encoding_test: utilities/column_aware_encoding_test.o $(TESTHARNESS) $(EXPOBJECTS)
$(AM_LINK)
optimistic_transaction_test: utilities/transactions/optimistic_transaction_test.o $(LIBOBJECTS) $(TESTHARNESS)
$(AM_LINK)
......@@ -1545,9 +1517,6 @@ sst_dump: tools/sst_dump.o $(LIBOBJECTS)
blob_dump: tools/blob_dump.o $(LIBOBJECTS)
$(AM_LINK)
column_aware_encoding_exp: utilities/column_aware_encoding_exp.o $(EXPOBJECTS)
$(AM_LINK)
repair_test: db/repair_test.o db/db_test_util.o $(LIBOBJECTS) $(TESTHARNESS)
$(AM_LINK)
......@@ -1973,7 +1942,7 @@ endif
# Source files dependencies detection
# ---------------------------------------------------------------------------
all_sources = $(LIB_SOURCES) $(MAIN_SOURCES) $(MOCK_LIB_SOURCES) $(TOOL_LIB_SOURCES) $(BENCH_LIB_SOURCES) $(TEST_LIB_SOURCES) $(EXP_LIB_SOURCES) $(ANALYZER_LIB_SOURCES)
all_sources = $(LIB_SOURCES) $(MAIN_SOURCES) $(MOCK_LIB_SOURCES) $(TOOL_LIB_SOURCES) $(BENCH_LIB_SOURCES) $(TEST_LIB_SOURCES) $(ANALYZER_LIB_SOURCES)
DEPFILES = $(all_sources:.cc=.cc.d)
# Add proper dependency support so changing a .h file forces a .cc file to
......
......@@ -255,14 +255,9 @@ cpp_library(
"utilities/checkpoint/checkpoint_impl.cc",
"utilities/compaction_filters/remove_emptyvalue_compactionfilter.cc",
"utilities/convenience/info_log_finder.cc",
"utilities/date_tiered/date_tiered_db_impl.cc",
"utilities/debug.cc",
"utilities/document/document_db.cc",
"utilities/document/json_document.cc",
"utilities/document/json_document_builder.cc",
"utilities/env_mirror.cc",
"utilities/env_timed.cc",
"utilities/geodb/geodb_impl.cc",
"utilities/leveldb_options/leveldb_options.cc",
"utilities/lua/rocks_lua_compaction_filter.cc",
"utilities/memory/memory_util.cc",
......@@ -279,9 +274,7 @@ cpp_library(
"utilities/persistent_cache/block_cache_tier_metadata.cc",
"utilities/persistent_cache/persistent_cache_tier.cc",
"utilities/persistent_cache/volatile_tier_impl.cc",
"utilities/redis/redis_lists.cc",
"utilities/simulator_cache/sim_cache.cc",
"utilities/spatialdb/spatial_db.cc",
"utilities/table_properties_collectors/compact_on_deletion_collector.cc",
"utilities/trace/file_trace_reader_writer.cc",
"utilities/transactions/optimistic_transaction.cc",
......@@ -319,9 +312,6 @@ cpp_library(
"util/testharness.cc",
"util/testutil.cc",
"utilities/cassandra/test_utils.cc",
"utilities/col_buf_decoder.cc",
"utilities/col_buf_encoder.cc",
"utilities/column_aware_encoding_util.cc",
],
auto_headers = AutoHeaders.RECURSIVE_GLOB,
arch_preprocessor_flags = ROCKSDB_ARCH_PREPROCESSOR_FLAGS,
......@@ -444,11 +434,6 @@ ROCKS_TESTS = [
"util/coding_test.cc",
"serial",
],
[
"column_aware_encoding_test",
"utilities/column_aware_encoding_test.cc",
"serial",
],
[
"column_family_test",
"db/column_family_test.cc",
......@@ -519,11 +504,6 @@ ROCKS_TESTS = [
"table/data_block_hash_index_test.cc",
"serial",
],
[
"date_tiered_test",
"utilities/date_tiered/date_tiered_test.cc",
"serial",
],
[
"db_basic_test",
"db/db_basic_test.cc",
......@@ -684,11 +664,6 @@ ROCKS_TESTS = [
"db/deletefile_test.cc",
"serial",
],
[
"document_db_test",
"utilities/document/document_db_test.cc",
"serial",
],
[
"dynamic_bloom_test",
"util/dynamic_bloom_test.cc",
......@@ -764,11 +739,6 @@ ROCKS_TESTS = [
"table/full_filter_block_test.cc",
"serial",
],
[
"geodb_test",
"utilities/geodb/geodb_test.cc",
"serial",
],
[
"hash_table_test",
"utilities/persistent_cache/hash_table_test.cc",
......@@ -799,11 +769,6 @@ ROCKS_TESTS = [
"monitoring/iostats_context_test.cc",
"serial",
],
[
"json_document_test",
"utilities/document/json_document_test.cc",
"serial",
],
[
"ldb_cmd_test",
"tools/ldb_cmd_test.cc",
......@@ -969,11 +934,6 @@ ROCKS_TESTS = [
"util/slice_transform_test.cc",
"serial",
],
[
"spatial_db_test",
"utilities/spatialdb/spatial_db_test.cc",
"serial",
],
[
"sst_dump_test",
"tools/sst_dump_test.cc",
......
// Copyright (c) 2011-present, Facebook, Inc. All rights reserved.
// This source code is licensed under both the GPLv2 (found in the
// COPYING file in the root directory) and Apache 2.0 License
// (found in the LICENSE.Apache file in the root directory).
#pragma once
#ifndef ROCKSDB_LITE
#include <map>
#include <string>
#include <vector>
#include "rocksdb/db.h"
namespace rocksdb {
// Date tiered database is a wrapper of DB that implements
// a simplified DateTieredCompactionStrategy by using multiple column famillies
// as time windows.
//
// DateTieredDB provides an interface similar to DB, but it assumes that user
// provides keys with last 8 bytes encoded as timestamp in seconds. DateTieredDB
// is assigned with a TTL to declare when data should be deleted.
//
// DateTieredDB hides column families layer from standard RocksDB instance. It
// uses multiple column families to manage time series data, each containing a
// specific range of time. Column families are named by its maximum possible
// timestamp. A column family is created automatically when data newer than
// latest timestamp of all existing column families. The time range of a column
// family is configurable by `column_family_interval`. By doing this, we
// guarantee that compaction will only happen in a column family.
//
// DateTieredDB is assigned with a TTL. When all data in a column family are
// expired (CF_Timestamp <= CUR_Timestamp - TTL), we directly drop the whole
// column family.
//
// TODO(jhli): This is only a simplified version of DTCS. In a complete DTCS,
// time windows can be merged over time, so that older time windows will have
// larger time range. Also, compaction are executed only for adjacent SST files
// to guarantee there is no time overlap between SST files.
class DateTieredDB {
public:
// Open a DateTieredDB whose name is `dbname`.
// Similar to DB::Open(), created database object is stored in dbptr.
//
// Two parameters can be configured: `ttl` to specify the length of time that
// keys should exist in the database, and `column_family_interval` to specify
// the time range of a column family interval.
//
// Open a read only database if read only is set as true.
// TODO(jhli): Should use an option object that includes ttl and
// column_family_interval.
static Status Open(const Options& options, const std::string& dbname,
DateTieredDB** dbptr, int64_t ttl,
int64_t column_family_interval, bool read_only = false);
explicit DateTieredDB() {}
virtual ~DateTieredDB() {}
// Wrapper for Put method. Similar to DB::Put(), but column family to be
// inserted is decided by the timestamp in keys, i.e. the last 8 bytes of user
// key. If key is already obsolete, it will not be inserted.
//
// When client put a key value pair in DateTieredDB, it assumes last 8 bytes
// of keys are encoded as timestamp. Timestamp is a 64-bit signed integer
// encoded as the number of seconds since 1970-01-01 00:00:00 (UTC) (Same as
// Env::GetCurrentTime()). Timestamp should be encoded in big endian.
virtual Status Put(const WriteOptions& options, const Slice& key,
const Slice& val) = 0;
// Wrapper for Get method. Similar to DB::Get() but column family is decided
// by timestamp in keys. If key is already obsolete, it will not be found.
virtual Status Get(const ReadOptions& options, const Slice& key,
std::string* value) = 0;
// Wrapper for Delete method. Similar to DB::Delete() but column family is
// decided by timestamp in keys. If key is already obsolete, return NotFound
// status.
virtual Status Delete(const WriteOptions& options, const Slice& key) = 0;
// Wrapper for KeyMayExist method. Similar to DB::KeyMayExist() but column
// family is decided by timestamp in keys. Return false when key is already
// obsolete.
virtual bool KeyMayExist(const ReadOptions& options, const Slice& key,
std::string* value, bool* value_found = nullptr) = 0;
// Wrapper for Merge method. Similar to DB::Merge() but column family is
// decided by timestamp in keys.
virtual Status Merge(const WriteOptions& options, const Slice& key,
const Slice& value) = 0;
// Create an iterator that hides low level details. This iterator internally
// merge results from all active time series column families. Note that
// column families are not deleted until all data are obsolete, so this
// iterator can possibly access obsolete key value pairs.
virtual Iterator* NewIterator(const ReadOptions& opts) = 0;
// Explicitly drop column families in which all keys are obsolete. This
// process is also inplicitly done in Put() operation.
virtual Status DropObsoleteColumnFamilies() = 0;
static const uint64_t kTSLength = sizeof(int64_t); // size of timestamp
};
} // namespace rocksdb
#endif // ROCKSDB_LITE
// Copyright (c) 2011-present, Facebook, Inc. All rights reserved.
// This source code is licensed under both the GPLv2 (found in the
// COPYING file in the root directory) and Apache 2.0 License
// (found in the LICENSE.Apache file in the root directory).
#pragma once
#ifndef ROCKSDB_LITE
#include <string>
#include <vector>
#include "rocksdb/utilities/stackable_db.h"
#include "rocksdb/utilities/json_document.h"
#include "rocksdb/db.h"
namespace rocksdb {
// IMPORTANT: DocumentDB is a work in progress. It is unstable and we might
// change the API without warning. Talk to RocksDB team before using this in
// production ;)
// DocumentDB is a layer on top of RocksDB that provides a very simple JSON API.
// When creating a DB, you specify a list of indexes you want to keep on your
// data. You can insert a JSON document to the DB, which is automatically
// indexed. Every document added to the DB needs to have "_id" field which is
// automatically indexed and is an unique primary key. All other indexes are
// non-unique.
// NOTE: field names in the JSON are NOT allowed to start with '$' or
// contain '.'. We don't currently enforce that rule, but will start behaving
// badly.
// Cursor is what you get as a result of executing query. To get all
// results from a query, call Next() on a Cursor while Valid() returns true
class Cursor {
public:
Cursor() = default;
virtual ~Cursor() {}
virtual bool Valid() const = 0;
virtual void Next() = 0;
// Lifecycle of the returned JSONDocument is until the next Next() call
virtual const JSONDocument& document() const = 0;
virtual Status status() const = 0;
private:
// No copying allowed
Cursor(const Cursor&);
void operator=(const Cursor&);
};
struct DocumentDBOptions {
int background_threads = 4;
uint64_t memtable_size = 128 * 1024 * 1024; // 128 MB
uint64_t cache_size = 1 * 1024 * 1024 * 1024; // 1 GB
};
// TODO(icanadi) Add `JSONDocument* info` parameter to all calls that can be
// used by the caller to get more information about the call execution (number
// of dropped records, number of updated records, etc.)
class DocumentDB : public StackableDB {
public:
struct IndexDescriptor {
// Currently, you can only define an index on a single field. To specify an
// index on a field X, set index description to JSON "{X: 1}"
// Currently the value needs to be 1, which means ascending.
// In the future, we plan to also support indexes on multiple keys, where
// you could mix ascending sorting (1) with descending sorting indexes (-1)
JSONDocument* description;
std::string name;
};
// Open DocumentDB with specified indexes. The list of indexes has to be
// complete, i.e. include all indexes present in the DB, except the primary
// key index.
// Otherwise, Open() will return an error
static Status Open(const DocumentDBOptions& options, const std::string& name,
const std::vector<IndexDescriptor>& indexes,
DocumentDB** db, bool read_only = false);
explicit DocumentDB(DB* db) : StackableDB(db) {}
// Create a new index. It will stop all writes for the duration of the call.
// All current documents in the DB are scanned and corresponding index entries
// are created
virtual Status CreateIndex(const WriteOptions& write_options,
const IndexDescriptor& index) = 0;
// Drop an index. Client is responsible to make sure that index is not being
// used by currently executing queries
virtual Status DropIndex(const std::string& name) = 0;
// Insert a document to the DB. The document needs to have a primary key "_id"
// which can either be a string or an integer. Otherwise the write will fail
// with InvalidArgument.
virtual Status Insert(const WriteOptions& options,
const JSONDocument& document) = 0;
// Deletes all documents matching a filter atomically
virtual Status Remove(const ReadOptions& read_options,
const WriteOptions& write_options,
const JSONDocument& query) = 0;
// Does this sequence of operations:
// 1. Find all documents matching a filter
// 2. For all documents, atomically:
// 2.1. apply the update operators
// 2.2. update the secondary indexes
//
// Currently only $set update operator is supported.
// Syntax is: {$set: {key1: value1, key2: value2, etc...}}
// This operator will change a document's key1 field to value1, key2 to
// value2, etc. New values will be set even if a document didn't have an entry
// for the specified key.
//
// You can not change a primary key of a document.
//
// Update example: Update({id: {$gt: 5}, $index: id}, {$set: {enabled: true}})
virtual Status Update(const ReadOptions& read_options,
const WriteOptions& write_options,
const JSONDocument& filter,
const JSONDocument& updates) = 0;
// query has to be an array in which every element is an operator. Currently
// only $filter operator is supported. Syntax of $filter operator is:
// {$filter: {key1: condition1, key2: condition2, etc.}} where conditions can
// be either:
// 1) a single value in which case the condition is equality condition, or
// 2) a defined operators, like {$gt: 4}, which will match all documents that
// have key greater than 4.
//
// Supported operators are:
// 1) $gt -- greater than
// 2) $gte -- greater than or equal
// 3) $lt -- less than
// 4) $lte -- less than or equal
// If you want the filter to use an index, you need to specify it like this:
// {$filter: {...(conditions)..., $index: index_name}}
//
// Example query:
// * [{$filter: {name: John, age: {$gte: 18}, $index: age}}]
// will return all Johns whose age is greater or equal to 18 and it will use
// index "age" to satisfy the query.
virtual Cursor* Query(const ReadOptions& read_options,
const JSONDocument& query) = 0;
};
} // namespace rocksdb
#endif // ROCKSDB_LITE
// Copyright (c) 2011-present, Facebook, Inc. All rights reserved.
// This source code is licensed under both the GPLv2 (found in the
// COPYING file in the root directory) and Apache 2.0 License
// (found in the LICENSE.Apache file in the root directory).
//
#ifndef ROCKSDB_LITE
#pragma once
#include <string>
#include <vector>
#include "rocksdb/utilities/stackable_db.h"
#include "rocksdb/status.h"
namespace rocksdb {
//
// Configurable options needed for setting up a Geo database
//
struct GeoDBOptions {
// Backup info and error messages will be written to info_log
// if non-nullptr.
// Default: nullptr
Logger* info_log;
explicit GeoDBOptions(Logger* _info_log = nullptr):info_log(_info_log) { }
};
//
// A position in the earth's geoid
//
class GeoPosition {
public:
double latitude;
double longitude;
explicit GeoPosition(double la = 0, double lo = 0) :
latitude(la), longitude(lo) {
}
};
//
// Description of an object on the Geoid. It is located by a GPS location,
// and is identified by the id. The value associated with this object is
// an opaque string 'value'. Different objects identified by unique id's
// can have the same gps-location associated with them.
//
class GeoObject {
public:
GeoPosition position;
std::string id;
std::string value;
GeoObject() {}
GeoObject(const GeoPosition& pos, const std::string& i,
const std::string& val) :
position(pos), id(i), value(val) {
}
};
class GeoIterator {
public:
GeoIterator() = default;
virtual ~GeoIterator() {}
virtual void Next() = 0;
virtual bool Valid() const = 0;
virtual const GeoObject& geo_object() = 0;
virtual Status status() const = 0;
};
//
// Stack your DB with GeoDB to be able to get geo-spatial support
//
class GeoDB : public StackableDB {
public:
// GeoDBOptions have to be the same as the ones used in a previous
// incarnation of the DB
//
// GeoDB owns the pointer `DB* db` now. You should not delete it or
// use it after the invocation of GeoDB
// GeoDB(DB* db, const GeoDBOptions& options) : StackableDB(db) {}
GeoDB(DB* db, const GeoDBOptions& /*options*/) : StackableDB(db) {}
virtual ~GeoDB() {}
// Insert a new object into the location database. The object is
// uniquely identified by the id. If an object with the same id already
// exists in the db, then the old one is overwritten by the new
// object being inserted here.
virtual Status Insert(const GeoObject& object) = 0;
// Retrieve the value of the object located at the specified GPS
// location and is identified by the 'id'.
virtual Status GetByPosition(const GeoPosition& pos,
const Slice& id, std::string* value) = 0;
// Retrieve the value of the object identified by the 'id'. This method
// could be potentially slower than GetByPosition
virtual Status GetById(const Slice& id, GeoObject* object) = 0;
// Delete the specified object
virtual Status Remove(const Slice& id) = 0;
// Returns an iterator for the items within a circular radius from the
// specified gps location. If 'number_of_values' is specified,
// then the iterator is capped to that number of objects.
// The radius is specified in 'meters'.
virtual GeoIterator* SearchRadial(const GeoPosition& pos,
double radius,
int number_of_values = INT_MAX) = 0;
};
} // namespace rocksdb
#endif // ROCKSDB_LITE
// Copyright (c) 2011-present, Facebook, Inc. All rights reserved.
// This source code is licensed under both the GPLv2 (found in the
// COPYING file in the root directory) and Apache 2.0 License
// (found in the LICENSE.Apache file in the root directory).
#pragma once
#ifndef ROCKSDB_LITE
#include <deque>
#include <map>
#include <memory>
#include <string>
#include <unordered_map>
#include <utility>
#include <vector>
#include "rocksdb/slice.h"
// We use JSONDocument for DocumentDB API
// Implementation inspired by folly::dynamic, rapidjson and fbson
namespace fbson {
class FbsonValue;
class ObjectVal;
template <typename T>
class FbsonWriterT;
class FbsonOutStream;
typedef FbsonWriterT<FbsonOutStream> FbsonWriter;
} // namespace fbson
namespace rocksdb {
// NOTE: none of this is thread-safe
class JSONDocument {
public:
// return nullptr on parse failure
static JSONDocument* ParseJSON(const char* json);
enum Type {
kNull,
kArray,
kBool,
kDouble,
kInt64,
kObject,
kString,
};
/* implicit */ JSONDocument(); // null
/* implicit */ JSONDocument(bool b);
/* implicit */ JSONDocument(double d);
/* implicit */ JSONDocument(int8_t i);
/* implicit */ JSONDocument(int16_t i);
/* implicit */ JSONDocument(int32_t i);
/* implicit */ JSONDocument(int64_t i);
/* implicit */ JSONDocument(const std::string& s);
/* implicit */ JSONDocument(const char* s);
// constructs JSONDocument of specific type with default value
explicit JSONDocument(Type _type);
JSONDocument(const JSONDocument& json_document);
JSONDocument(JSONDocument&& json_document);
Type type() const;
// REQUIRES: IsObject()
bool Contains(const std::string& key) const;
// REQUIRES: IsObject()
// Returns non-owner object
JSONDocument operator[](const std::string& key) const;
// REQUIRES: IsArray() == true || IsObject() == true
size_t Count() const;
// REQUIRES: IsArray()
// Returns non-owner object
JSONDocument operator[](size_t i) const;
JSONDocument& operator=(JSONDocument jsonDocument);
bool IsNull() const;
bool IsArray() const;
bool IsBool() const;
bool IsDouble() const;
bool IsInt64() const;
bool IsObject() const;
bool IsString() const;
// REQUIRES: IsBool() == true
bool GetBool() const;
// REQUIRES: IsDouble() == true
double GetDouble() const;
// REQUIRES: IsInt64() == true
int64_t GetInt64() const;
// REQUIRES: IsString() == true
std::string GetString() const;
bool operator==(const JSONDocument& rhs) const;
bool operator!=(const JSONDocument& rhs) const;
JSONDocument Copy() const;
bool IsOwner() const;
std::string DebugString() const;
private:
class ItemsIteratorGenerator;
public:
// REQUIRES: IsObject()
ItemsIteratorGenerator Items() const;
// appends serialized object to dst
void Serialize(std::string* dst) const;
// returns nullptr if Slice doesn't represent valid serialized JSONDocument
static JSONDocument* Deserialize(const Slice& src);
private:
friend class JSONDocumentBuilder;
JSONDocument(fbson::FbsonValue* val, bool makeCopy);
void InitFromValue(const fbson::FbsonValue* val);
// iteration on objects
class const_item_iterator {
private:
class Impl;
public:
typedef std::pair<std::string, JSONDocument> value_type;
explicit const_item_iterator(Impl* impl);
const_item_iterator(const_item_iterator&&);
const_item_iterator& operator++();
bool operator!=(const const_item_iterator& other);
value_type operator*();
~const_item_iterator();
private:
friend class ItemsIteratorGenerator;
std::unique_ptr<Impl> it_;
};
class ItemsIteratorGenerator {
public:
explicit ItemsIteratorGenerator(const fbson::ObjectVal& object);
const_item_iterator begin() const;
const_item_iterator end() const;
private:
const fbson::ObjectVal& object_;
};
std::unique_ptr<char[]> data_;
mutable fbson::FbsonValue* value_;
// Our serialization format's first byte specifies the encoding version. That
// way, we can easily change our format while providing backwards
// compatibility. This constant specifies the current version of the
// serialization format
static const char kSerializationFormatVersion;
};
class JSONDocumentBuilder {
public:
JSONDocumentBuilder();
explicit JSONDocumentBuilder(fbson::FbsonOutStream* out);
void Reset();
bool WriteStartArray();
bool WriteEndArray();
bool WriteStartObject();
bool WriteEndObject();
bool WriteKeyValue(const std::string& key, const JSONDocument& value);
bool WriteJSONDocument(const JSONDocument& value);
JSONDocument GetJSONDocument();
~JSONDocumentBuilder();
private:
std::unique_ptr<fbson::FbsonWriter> writer_;
};
} // namespace rocksdb
#endif // ROCKSDB_LITE
// Copyright (c) 2011-present, Facebook, Inc. All rights reserved.
// This source code is licensed under both the GPLv2 (found in the
// COPYING file in the root directory) and Apache 2.0 License
// (found in the LICENSE.Apache file in the root directory).
#pragma once
#ifndef ROCKSDB_LITE
#include <string>
#include <vector>
#include "rocksdb/db.h"
#include "rocksdb/slice.h"
#include "rocksdb/utilities/stackable_db.h"
namespace rocksdb {
namespace spatial {
// NOTE: SpatialDB is experimental and we might change its API without warning.
// Please talk to us before developing against SpatialDB API.
//
// SpatialDB is a support for spatial indexes built on top of RocksDB.
// When creating a new SpatialDB, clients specifies a list of spatial indexes to
// build on their data. Each spatial index is defined by the area and
// granularity. If you're storing map data, different spatial index
// granularities can be used for different zoom levels.
//
// Each element inserted into SpatialDB has:
// * a bounding box, which determines how will the element be indexed
// * string blob, which will usually be WKB representation of the polygon
// (http://en.wikipedia.org/wiki/Well-known_text)
// * feature set, which is a map of key-value pairs, where value can be null,
// int, double, bool, string
// * a list of indexes to insert the element in
//
// Each query is executed on a single spatial index. Query guarantees that it
// will return all elements intersecting the specified bounding box, but it
// might also return some extra non-intersecting elements.
// Variant is a class that can be many things: null, bool, int, double or string
// It is used to store different value types in FeatureSet (see below)
struct Variant {
// Don't change the values here, they are persisted on disk
enum Type {
kNull = 0x0,
kBool = 0x1,
kInt = 0x2,
kDouble = 0x3,
kString = 0x4,
};
Variant() : type_(kNull) {}
/* implicit */ Variant(bool b) : type_(kBool) { data_.b = b; }
/* implicit */ Variant(uint64_t i) : type_(kInt) { data_.i = i; }
/* implicit */ Variant(double d) : type_(kDouble) { data_.d = d; }
/* implicit */ Variant(const std::string& s) : type_(kString) {
new (&data_.s) std::string(s);
}
Variant(const Variant& v) : type_(v.type_) { Init(v, data_); }
Variant& operator=(const Variant& v);
Variant(Variant&& rhs) : type_(kNull) { *this = std::move(rhs); }
Variant& operator=(Variant&& v);
~Variant() { Destroy(type_, data_); }
Type type() const { return type_; }
bool get_bool() const { return data_.b; }
uint64_t get_int() const { return data_.i; }
double get_double() const { return data_.d; }
const std::string& get_string() const { return *GetStringPtr(data_); }
bool operator==(const Variant& other) const;
bool operator!=(const Variant& other) const { return !(*this == other); }
private:
Type type_;
union Data {
bool b;
uint64_t i;
double d;
// Current version of MS compiler not C++11 compliant so can not put
// std::string
// however, even then we still need the rest of the maintenance.
char s[sizeof(std::string)];
} data_;
// Avoid type_punned aliasing problem
static std::string* GetStringPtr(Data& d) {
void* p = d.s;
return reinterpret_cast<std::string*>(p);
}
static const std::string* GetStringPtr(const Data& d) {
const void* p = d.s;
return reinterpret_cast<const std::string*>(p);
}
static void Init(const Variant&, Data&);
static void Destroy(Type t, Data& d) {
if (t == kString) {
using std::string;
GetStringPtr(d)->~string();
}
}
};
// FeatureSet is a map of key-value pairs. One feature set is associated with
// each element in SpatialDB. It can be used to add rich data about the element.
class FeatureSet {
private:
typedef std::unordered_map<std::string, Variant> map;
public:
class iterator {
public:
/* implicit */ iterator(const map::const_iterator itr) : itr_(itr) {}
iterator& operator++() {
++itr_;
return *this;
}
bool operator!=(const iterator& other) { return itr_ != other.itr_; }
bool operator==(const iterator& other) { return itr_ == other.itr_; }
map::value_type operator*() { return *itr_; }
private:
map::const_iterator itr_;
};
FeatureSet() = default;
FeatureSet* Set(const std::string& key, const Variant& value);
bool Contains(const std::string& key) const;
// REQUIRES: Contains(key)
const Variant& Get(const std::string& key) const;
iterator Find(const std::string& key) const;
iterator begin() const { return map_.begin(); }
iterator end() const { return map_.end(); }
void Clear();
size_t Size() const { return map_.size(); }
void Serialize(std::string* output) const;
// REQUIRED: empty FeatureSet
bool Deserialize(const Slice& input);
std::string DebugString() const;
private:
map map_;
};
// BoundingBox is a helper structure for defining rectangles representing
// bounding boxes of spatial elements.
template <typename T>
struct BoundingBox {
T min_x, min_y, max_x, max_y;
BoundingBox() = default;
BoundingBox(T _min_x, T _min_y, T _max_x, T _max_y)
: min_x(_min_x), min_y(_min_y), max_x(_max_x), max_y(_max_y) {}
bool Intersects(const BoundingBox<T>& a) const {
return !(min_x > a.max_x || min_y > a.max_y || a.min_x > max_x ||
a.min_y > max_y);
}
};
struct SpatialDBOptions {
uint64_t cache_size = 1 * 1024 * 1024 * 1024LL; // 1GB
int num_threads = 16;
bool bulk_load = true;
};
// Cursor is used to return data from the query to the client. To get all the
// data from the query, just call Next() while Valid() is true
class Cursor {
public:
Cursor() = default;
virtual ~Cursor() {}
virtual bool Valid() const = 0;
// REQUIRES: Valid()
virtual void Next() = 0;
// Lifetime of the underlying storage until the next call to Next()
// REQUIRES: Valid()
virtual const Slice blob() = 0;
// Lifetime of the underlying storage until the next call to Next()
// REQUIRES: Valid()
virtual const FeatureSet& feature_set() = 0;
virtual Status status() const = 0;
private:
// No copying allowed
Cursor(const Cursor&);
void operator=(const Cursor&);
};
// SpatialIndexOptions defines a spatial index that will be built on the data
struct SpatialIndexOptions {
// Spatial indexes are referenced by names
std::string name;
// An area that is indexed. If the element is not intersecting with spatial
// index's bbox, it will not be inserted into the index
BoundingBox<double> bbox;
// tile_bits control the granularity of the spatial index. Each dimension of
// the bbox will be split into (1 << tile_bits) tiles, so there will be a
// total of (1 << tile_bits)^2 tiles. It is recommended to configure a size of
// each tile to be approximately the size of the query on that spatial index
uint32_t tile_bits;
SpatialIndexOptions() {}
SpatialIndexOptions(const std::string& _name,
const BoundingBox<double>& _bbox, uint32_t _tile_bits)
: name(_name), bbox(_bbox), tile_bits(_tile_bits) {}
};
class SpatialDB : public StackableDB {
public:
// Creates the SpatialDB with specified list of indexes.
// REQUIRED: db doesn't exist
static Status Create(const SpatialDBOptions& options, const std::string& name,
const std::vector<SpatialIndexOptions>& spatial_indexes);
// Open the existing SpatialDB. The resulting db object will be returned
// through db parameter.
// REQUIRED: db was created using SpatialDB::Create
static Status Open(const SpatialDBOptions& options, const std::string& name,
SpatialDB** db, bool read_only = false);
explicit SpatialDB(DB* db) : StackableDB(db) {}
// Insert the element into the DB. Element will be inserted into specified
// spatial_indexes, based on specified bbox.
// REQUIRES: spatial_indexes.size() > 0
virtual Status Insert(const WriteOptions& write_options,
const BoundingBox<double>& bbox, const Slice& blob,
const FeatureSet& feature_set,
const std::vector<std::string>& spatial_indexes) = 0;
// Calling Compact() after inserting a bunch of elements should speed up
// reading. This is especially useful if you use SpatialDBOptions::bulk_load
// Num threads determines how many threads we'll use for compactions. Setting
// this to bigger number will use more IO and CPU, but finish faster
virtual Status Compact(int num_threads = 1) = 0;
// Query the specified spatial_index. Query will return all elements that
// intersect bbox, but it may also return some extra elements.
virtual Cursor* Query(const ReadOptions& read_options,
const BoundingBox<double>& bbox,
const std::string& spatial_index) = 0;
};
} // namespace spatial
} // namespace rocksdb
#endif // ROCKSDB_LITE
......@@ -177,14 +177,9 @@ LIB_SOURCES = \
utilities/checkpoint/checkpoint_impl.cc \
utilities/compaction_filters/remove_emptyvalue_compactionfilter.cc \
utilities/convenience/info_log_finder.cc \
utilities/date_tiered/date_tiered_db_impl.cc \
utilities/debug.cc \
utilities/document/document_db.cc \
utilities/document/json_document.cc \
utilities/document/json_document_builder.cc \
utilities/env_mirror.cc \
utilities/env_timed.cc \
utilities/geodb/geodb_impl.cc \
utilities/leveldb_options/leveldb_options.cc \
utilities/lua/rocks_lua_compaction_filter.cc \
utilities/memory/memory_util.cc \
......@@ -201,9 +196,7 @@ LIB_SOURCES = \
utilities/persistent_cache/block_cache_tier_metadata.cc \
utilities/persistent_cache/persistent_cache_tier.cc \
utilities/persistent_cache/volatile_tier_impl.cc \
utilities/redis/redis_lists.cc \
utilities/simulator_cache/sim_cache.cc \
utilities/spatialdb/spatial_db.cc \
utilities/table_properties_collectors/compact_on_deletion_collector.cc \
utilities/trace/file_trace_reader_writer.cc \
utilities/transactions/optimistic_transaction.cc \
......@@ -249,11 +242,6 @@ MOCK_LIB_SOURCES = \
BENCH_LIB_SOURCES = \
tools/db_bench_tool.cc \
EXP_LIB_SOURCES = \
utilities/col_buf_decoder.cc \
utilities/col_buf_encoder.cc \
utilities/column_aware_encoding_util.cc
TEST_LIB_SOURCES = \
db/db_test_util.cc \
util/testharness.cc \
......@@ -330,7 +318,6 @@ MAIN_SOURCES = \
db/persistent_cache_test.cc \
db/plain_table_db_test.cc \
db/prefix_test.cc \
db/redis_test.cc \
db/repair_test.cc \
db/range_del_aggregator_test.cc \
db/range_del_aggregator_bench.cc \
......@@ -397,21 +384,13 @@ MAIN_SOURCES = \
utilities/cassandra/cassandra_row_merge_test.cc \
utilities/cassandra/cassandra_serialize_test.cc \
utilities/checkpoint/checkpoint_test.cc \
utilities/column_aware_encoding_exp.cc \
utilities/column_aware_encoding_test.cc \
utilities/date_tiered/date_tiered_test.cc \
utilities/document/document_db_test.cc \
utilities/document/json_document_test.cc \
utilities/geodb/geodb_test.cc \
utilities/lua/rocks_lua_test.cc \
utilities/memory/memory_test.cc \
utilities/merge_operators/string_append/stringappend_test.cc \
utilities/object_registry_test.cc \
utilities/option_change_migration/option_change_migration_test.cc \
utilities/options/options_util_test.cc \
utilities/redis/redis_lists_test.cc \
utilities/simulator_cache/sim_cache_test.cc \
utilities/spatialdb/spatial_db_test.cc \
utilities/table_properties_collectors/compact_on_deletion_collector_test.cc \
utilities/transactions/optimistic_transaction_test.cc \
utilities/transactions/transaction_test.cc \
......
// Copyright (c) 2011-present, Facebook, Inc. All rights reserved.
// This source code is licensed under both the GPLv2 (found in the
// COPYING file in the root directory) and Apache 2.0 License
// (found in the LICENSE.Apache file in the root directory).
#include "utilities/col_buf_decoder.h"
#include <cstring>
#include <string>
#include "port/port.h"
namespace rocksdb {
ColBufDecoder::~ColBufDecoder() {}
namespace {
inline uint64_t EncodeFixed64WithEndian(uint64_t val, bool big_endian,
size_t size) {
if (big_endian && port::kLittleEndian) {
val = EndianTransform(val, size);
} else if (!big_endian && !port::kLittleEndian) {
val = EndianTransform(val, size);
}
return val;
}
} // namespace
ColBufDecoder* ColBufDecoder::NewColBufDecoder(
const ColDeclaration& col_declaration) {
if (col_declaration.col_type == "FixedLength") {
return new FixedLengthColBufDecoder(
col_declaration.size, col_declaration.col_compression_type,
col_declaration.nullable, col_declaration.big_endian);
} else if (col_declaration.col_type == "VariableLength") {
return new VariableLengthColBufDecoder();
} else if (col_declaration.col_type == "VariableChunk") {
return new VariableChunkColBufDecoder(col_declaration.col_compression_type);
} else if (col_declaration.col_type == "LongFixedLength") {
return new LongFixedLengthColBufDecoder(col_declaration.size,
col_declaration.nullable);
}
// Unrecognized column type
return nullptr;
}
namespace {
void ReadVarint64(const char** src_ptr, uint64_t* val_ptr) {
const char* q = GetVarint64Ptr(*src_ptr, *src_ptr + 10, val_ptr);
assert(q != nullptr);
*src_ptr = q;
}
} // namespace
size_t FixedLengthColBufDecoder::Init(const char* src) {
remain_runs_ = 0;
last_val_ = 0;
// Dictionary initialization
dict_vec_.clear();
const char* orig_src = src;
if (col_compression_type_ == kColDict ||
col_compression_type_ == kColRleDict) {
const char* q;
uint64_t dict_size;
// Bypass limit
q = GetVarint64Ptr(src, src + 10, &dict_size);
assert(q != nullptr);
src = q;
uint64_t dict_key;
for (uint64_t i = 0; i < dict_size; ++i) {
// Bypass limit
ReadVarint64(&src, &dict_key);
dict_key = EncodeFixed64WithEndian(dict_key, big_endian_, size_);
dict_vec_.push_back(dict_key);
}
}
return src - orig_src;
}
size_t FixedLengthColBufDecoder::Decode(const char* src, char** dest) {
uint64_t read_val = 0;
const char* orig_src = src;
const char* src_limit = src + 20;
if (nullable_) {
bool not_null;
not_null = *src;
src += 1;
if (!not_null) {
return 1;
}
}
if (IsRunLength(col_compression_type_)) {
if (remain_runs_ == 0) {
const char* q;
run_val_ = 0;
if (col_compression_type_ == kColRle) {
memcpy(&run_val_, src, size_);
src += size_;
} else {
q = GetVarint64Ptr(src, src_limit, &run_val_);
assert(q != nullptr);
src = q;
}
q = GetVarint64Ptr(src, src_limit, &remain_runs_);
assert(q != nullptr);
src = q;
if (col_compression_type_ != kColRleDeltaVarint &&
col_compression_type_ != kColRleDict) {
run_val_ = EncodeFixed64WithEndian(run_val_, big_endian_, size_);
}
}
read_val = run_val_;
} else {
if (col_compression_type_ == kColNoCompression) {
memcpy(&read_val, src, size_);
src += size_;
} else {
// Assume a column does not exceed 8 bytes here
const char* q = GetVarint64Ptr(src, src_limit, &read_val);
assert(q != nullptr);
src = q;
}
if (col_compression_type_ != kColDeltaVarint &&
col_compression_type_ != kColDict) {
read_val = EncodeFixed64WithEndian(read_val, big_endian_, size_);
}
}
uint64_t write_val = read_val;
if (col_compression_type_ == kColDeltaVarint ||
col_compression_type_ == kColRleDeltaVarint) {
// does not support 64 bit
uint64_t mask = (write_val & 1) ? (~uint64_t(0)) : 0;
int64_t delta = (write_val >> 1) ^ mask;
write_val = last_val_ + delta;
uint64_t tmp = write_val;
write_val = EncodeFixed64WithEndian(write_val, big_endian_, size_);
last_val_ = tmp;
} else if (col_compression_type_ == kColRleDict ||
col_compression_type_ == kColDict) {
uint64_t dict_val = read_val;
assert(dict_val < dict_vec_.size());
write_val = dict_vec_[static_cast<size_t>(dict_val)];
}
// dest->append(reinterpret_cast<char*>(&write_val), size_);
memcpy(*dest, reinterpret_cast<char*>(&write_val), size_);
*dest += size_;
if (IsRunLength(col_compression_type_)) {
--remain_runs_;
}
return src - orig_src;
}
size_t LongFixedLengthColBufDecoder::Decode(const char* src, char** dest) {
if (nullable_) {
bool not_null;
not_null = *src;
src += 1;
if (!not_null) {
return 1;
}
}
memcpy(*dest, src, size_);
*dest += size_;
return size_ + 1;
}
size_t VariableLengthColBufDecoder::Decode(const char* src, char** dest) {
uint8_t len;
len = *src;
memcpy(dest, reinterpret_cast<char*>(&len), 1);
*dest += 1;
src += 1;
memcpy(*dest, src, len);
*dest += len;
return len + 1;
}
size_t VariableChunkColBufDecoder::Init(const char* src) {
// Dictionary initialization
dict_vec_.clear();
const char* orig_src = src;
if (col_compression_type_ == kColDict) {
const char* q;
uint64_t dict_size;
// Bypass limit
q = GetVarint64Ptr(src, src + 10, &dict_size);
assert(q != nullptr);
src = q;
uint64_t dict_key;
for (uint64_t i = 0; i < dict_size; ++i) {
// Bypass limit
ReadVarint64(&src, &dict_key);
dict_vec_.push_back(dict_key);
}
}
return src - orig_src;
}
size_t VariableChunkColBufDecoder::Decode(const char* src, char** dest) {
const char* orig_src = src;
uint64_t size = 0;
ReadVarint64(&src, &size);
int64_t full_chunks = size / 8;
uint64_t chunk_buf;
size_t chunk_size = 8;
for (int64_t i = 0; i < full_chunks + 1; ++i) {
chunk_buf = 0;
if (i == full_chunks) {
chunk_size = size % 8;
}
if (col_compression_type_ == kColDict) {
uint64_t dict_val;
ReadVarint64(&src, &dict_val);
assert(dict_val < dict_vec_.size());
chunk_buf = dict_vec_[static_cast<size_t>(dict_val)];
} else {
memcpy(&chunk_buf, src, chunk_size);
src += chunk_size;
}
memcpy(*dest, reinterpret_cast<char*>(&chunk_buf), 8);
*dest += 8;
uint8_t mask = ((0xFF - 8) + chunk_size) & 0xFF;
memcpy(*dest, reinterpret_cast<char*>(&mask), 1);
*dest += 1;
}
return src - orig_src;
}
} // namespace rocksdb
// Copyright (c) 2011-present, Facebook, Inc. All rights reserved.
// This source code is licensed under both the GPLv2 (found in the
// COPYING file in the root directory) and Apache 2.0 License
// (found in the LICENSE.Apache file in the root directory).
#pragma once
#include <cstdio>
#include <cstring>
#include <memory>
#include <string>
#include <unordered_map>
#include <vector>
#include "util/coding.h"
#include "utilities/col_buf_encoder.h"
namespace rocksdb {
struct ColDeclaration;
// ColBufDecoder is a class to decode column buffers. It can be populated from a
// ColDeclaration. Before starting decoding, a Init() method should be called.
// Each time it takes a column value into Decode() method.
class ColBufDecoder {
public:
virtual ~ColBufDecoder() = 0;
virtual size_t Init(const char* /*src*/) { return 0; }
virtual size_t Decode(const char* src, char** dest) = 0;
static ColBufDecoder* NewColBufDecoder(const ColDeclaration& col_declaration);
protected:
std::string buffer_;
static inline bool IsRunLength(ColCompressionType type) {
return type == kColRle || type == kColRleVarint ||
type == kColRleDeltaVarint || type == kColRleDict;
}
};
class FixedLengthColBufDecoder : public ColBufDecoder {
public:
explicit FixedLengthColBufDecoder(
size_t size, ColCompressionType col_compression_type = kColNoCompression,
bool nullable = false, bool big_endian = false)
: size_(size),
col_compression_type_(col_compression_type),
nullable_(nullable),
big_endian_(big_endian),
remain_runs_(0),
run_val_(0),
last_val_(0) {}
size_t Init(const char* src) override;
size_t Decode(const char* src, char** dest) override;
~FixedLengthColBufDecoder() {}
private:
size_t size_;
ColCompressionType col_compression_type_;
bool nullable_;
bool big_endian_;
// for decoding
std::vector<uint64_t> dict_vec_;
uint64_t remain_runs_;
uint64_t run_val_;
uint64_t last_val_;
};
class LongFixedLengthColBufDecoder : public ColBufDecoder {
public:
LongFixedLengthColBufDecoder(size_t size, bool nullable)
: size_(size), nullable_(nullable) {}
size_t Decode(const char* src, char** dest) override;
~LongFixedLengthColBufDecoder() {}
private:
size_t size_;
bool nullable_;
};
class VariableLengthColBufDecoder : public ColBufDecoder {
public:
size_t Decode(const char* src, char** dest) override;
~VariableLengthColBufDecoder() {}
};
class VariableChunkColBufDecoder : public VariableLengthColBufDecoder {
public:
size_t Init(const char* src) override;
size_t Decode(const char* src, char** dest) override;
explicit VariableChunkColBufDecoder(ColCompressionType col_compression_type)
: col_compression_type_(col_compression_type) {}
VariableChunkColBufDecoder() : col_compression_type_(kColNoCompression) {}
private:
ColCompressionType col_compression_type_;
std::unordered_map<uint64_t, uint64_t> dictionary_;
std::vector<uint64_t> dict_vec_;
};
struct KVPairColBufDecoders {
std::vector<std::unique_ptr<ColBufDecoder>> key_col_bufs;
std::vector<std::unique_ptr<ColBufDecoder>> value_col_bufs;
std::unique_ptr<ColBufDecoder> value_checksum_buf;
explicit KVPairColBufDecoders(const KVPairColDeclarations& kvp_cd) {
for (auto kcd : *kvp_cd.key_col_declarations) {
key_col_bufs.emplace_back(
std::move(ColBufDecoder::NewColBufDecoder(kcd)));
}
for (auto vcd : *kvp_cd.value_col_declarations) {
value_col_bufs.emplace_back(
std::move(ColBufDecoder::NewColBufDecoder(vcd)));
}
value_checksum_buf.reset(
ColBufDecoder::NewColBufDecoder(*kvp_cd.value_checksum_declaration));
}
};
} // namespace rocksdb
// Copyright (c) 2011-present, Facebook, Inc. All rights reserved.
// This source code is licensed under both the GPLv2 (found in the
// COPYING file in the root directory) and Apache 2.0 License
// (found in the LICENSE.Apache file in the root directory).
#include "utilities/col_buf_encoder.h"
#include <cstring>
#include <string>
#include "port/port.h"
namespace rocksdb {
ColBufEncoder::~ColBufEncoder() {}
namespace {
inline uint64_t DecodeFixed64WithEndian(uint64_t val, bool big_endian,
size_t size) {
if (big_endian && port::kLittleEndian) {
val = EndianTransform(val, size);
} else if (!big_endian && !port::kLittleEndian) {
val = EndianTransform(val, size);
}
return val;
}
} // namespace
const std::string &ColBufEncoder::GetData() { return buffer_; }
ColBufEncoder *ColBufEncoder::NewColBufEncoder(
const ColDeclaration &col_declaration) {
if (col_declaration.col_type == "FixedLength") {
return new FixedLengthColBufEncoder(
col_declaration.size, col_declaration.col_compression_type,
col_declaration.nullable, col_declaration.big_endian);
} else if (col_declaration.col_type == "VariableLength") {
return new VariableLengthColBufEncoder();
} else if (col_declaration.col_type == "VariableChunk") {
return new VariableChunkColBufEncoder(col_declaration.col_compression_type);
} else if (col_declaration.col_type == "LongFixedLength") {
return new LongFixedLengthColBufEncoder(col_declaration.size,
col_declaration.nullable);
}
// Unrecognized column type
return nullptr;
}
size_t FixedLengthColBufEncoder::Append(const char *buf) {
if (nullable_) {
if (buf == nullptr) {
buffer_.append(1, 0);
return 0;
} else {
buffer_.append(1, 1);
}
}
uint64_t read_val = 0;
memcpy(&read_val, buf, size_);
read_val = DecodeFixed64WithEndian(read_val, big_endian_, size_);
// Determine write value
uint64_t write_val = read_val;
if (col_compression_type_ == kColDeltaVarint ||
col_compression_type_ == kColRleDeltaVarint) {
int64_t delta = read_val - last_val_;
// Encode signed delta value
delta = (static_cast<uint64_t>(delta) << 1) ^ (delta >> 63);
write_val = delta;
last_val_ = read_val;
} else if (col_compression_type_ == kColDict ||
col_compression_type_ == kColRleDict) {
auto iter = dictionary_.find(read_val);
uint64_t dict_val;
if (iter == dictionary_.end()) {
// Add new entry to dictionary
dict_val = dictionary_.size();
dictionary_.insert(std::make_pair(read_val, dict_val));
dict_vec_.push_back(read_val);
} else {
dict_val = iter->second;
}
write_val = dict_val;
}
// Write into buffer
if (IsRunLength(col_compression_type_)) {
if (run_length_ == -1) {
// First element
run_val_ = write_val;
run_length_ = 1;
} else if (write_val != run_val_) {
// End of run
// Write run value
if (col_compression_type_ == kColRle) {
buffer_.append(reinterpret_cast<char *>(&run_val_), size_);
} else {
PutVarint64(&buffer_, run_val_);
}
// Write run length
PutVarint64(&buffer_, run_length_);
run_val_ = write_val;
run_length_ = 1;
} else {
run_length_++;
}
} else { // non run-length encodings
if (col_compression_type_ == kColNoCompression) {
buffer_.append(reinterpret_cast<char *>(&write_val), size_);
} else {
PutVarint64(&buffer_, write_val);
}
}
return size_;
}
void FixedLengthColBufEncoder::Finish() {
if (col_compression_type_ == kColDict ||
col_compression_type_ == kColRleDict) {
std::string header;
PutVarint64(&header, dict_vec_.size());
// Put dictionary in the header
for (auto item : dict_vec_) {
PutVarint64(&header, item);
}
buffer_ = header + buffer_;
}
if (IsRunLength(col_compression_type_)) {
// Finish last run value
if (col_compression_type_ == kColRle) {
buffer_.append(reinterpret_cast<char *>(&run_val_), size_);
} else {
PutVarint64(&buffer_, run_val_);
}
PutVarint64(&buffer_, run_length_);
}
}
size_t LongFixedLengthColBufEncoder::Append(const char *buf) {
if (nullable_) {
if (buf == nullptr) {
buffer_.append(1, 0);
return 0;
} else {
buffer_.append(1, 1);
}
}
buffer_.append(buf, size_);
return size_;
}
void LongFixedLengthColBufEncoder::Finish() {}
size_t VariableLengthColBufEncoder::Append(const char *buf) {
uint8_t length = 0;
length = *buf;
buffer_.append(buf, 1);
buf += 1;
buffer_.append(buf, length);
return length + 1;
}
void VariableLengthColBufEncoder::Finish() {}
size_t VariableChunkColBufEncoder::Append(const char *buf) {
const char *orig_buf = buf;
uint8_t mark = 0xFF;
size_t length = 0;
std::string tmp_buffer;
while (mark == 0xFF) {
uint64_t val;
memcpy(&val, buf, 8);
buf += 8;
mark = *buf;
buf += 1;
int8_t chunk_size = 8 - (0xFF - mark);
if (col_compression_type_ == kColDict) {
auto iter = dictionary_.find(val);
uint64_t dict_val;
if (iter == dictionary_.end()) {
dict_val = dictionary_.size();
dictionary_.insert(std::make_pair(val, dict_val));
dict_vec_.push_back(val);
} else {
dict_val = iter->second;
}
PutVarint64(&tmp_buffer, dict_val);
} else {
tmp_buffer.append(reinterpret_cast<char *>(&val), chunk_size);
}
length += chunk_size;
}
PutVarint64(&buffer_, length);
buffer_.append(tmp_buffer);
return buf - orig_buf;
}
void VariableChunkColBufEncoder::Finish() {
if (col_compression_type_ == kColDict) {
std::string header;
PutVarint64(&header, dict_vec_.size());
for (auto item : dict_vec_) {
PutVarint64(&header, item);
}
buffer_ = header + buffer_;
}
}
} // namespace rocksdb
// Copyright (c) 2011-present, Facebook, Inc. All rights reserved.
// This source code is licensed under both the GPLv2 (found in the
// COPYING file in the root directory) and Apache 2.0 License
// (found in the LICENSE.Apache file in the root directory).
#pragma once
#include <cstdio>
#include <cstring>
#include <memory>
#include <string>
#include <unordered_map>
#include <vector>
#include "util/coding.h"
namespace rocksdb {
enum ColCompressionType {
kColNoCompression,
kColRle,
kColVarint,
kColRleVarint,
kColDeltaVarint,
kColRleDeltaVarint,
kColDict,
kColRleDict
};
struct ColDeclaration;
// ColBufEncoder is a class to encode column buffers. It can be populated from a
// ColDeclaration. Each time it takes a column value into Append() method to
// encode the column and store it into an internal buffer. After all rows for
// this column are consumed, a Finish() should be called to add header and
// remaining data.
class ColBufEncoder {
public:
// Read a column, encode data and append into internal buffer.
virtual size_t Append(const char *buf) = 0;
virtual ~ColBufEncoder() = 0;
// Get the internal column buffer. Should only be called after Finish().
const std::string &GetData();
// Finish encoding. Add header and remaining data.
virtual void Finish() = 0;
// Populate a ColBufEncoder from ColDeclaration.
static ColBufEncoder *NewColBufEncoder(const ColDeclaration &col_declaration);
protected:
std::string buffer_;
static inline bool IsRunLength(ColCompressionType type) {
return type == kColRle || type == kColRleVarint ||
type == kColRleDeltaVarint || type == kColRleDict;
}
};
// Encoder for fixed length column buffer. In fixed length column buffer, the
// size of the column should not exceed 8 bytes.
// The following encodings are supported:
// Varint: Variable length integer. See util/coding.h for more details
// Rle (Run length encoding): encode a sequence of contiguous value as
// [run_value][run_length]. Can be combined with Varint
// Delta: Encode value to its delta with its adjacent entry. Use varint to
// possibly reduce stored bytes. Can be combined with Rle.
// Dictionary: Use a dictionary to record all possible values in the block and
// encode them with an ID started from 0. IDs are encoded as varint. A column
// with dictionary encoding will have a header to store all actual values,
// ordered by their dictionary value, and the data will be replaced by
// dictionary value. Can be combined with Rle.
class FixedLengthColBufEncoder : public ColBufEncoder {
public:
explicit FixedLengthColBufEncoder(
size_t size, ColCompressionType col_compression_type = kColNoCompression,
bool nullable = false, bool big_endian = false)
: size_(size),
col_compression_type_(col_compression_type),
nullable_(nullable),
big_endian_(big_endian),
last_val_(0),
run_length_(-1),
run_val_(0) {}
size_t Append(const char *buf) override;
void Finish() override;
~FixedLengthColBufEncoder() {}
private:
size_t size_;
ColCompressionType col_compression_type_;
// If set as true, the input value can be null (represented as nullptr). When
// nullable is true, use one more byte before actual value to indicate if the
// current value is null.
bool nullable_;
// If set as true, input value will be treated as big endian encoded.
bool big_endian_;
// for encoding
uint64_t last_val_;
int16_t run_length_;
uint64_t run_val_;
// Map to store dictionary for dictionary encoding
std::unordered_map<uint64_t, uint64_t> dictionary_;
// Vector of dictionary keys.
std::vector<uint64_t> dict_vec_;
};
// Long fixed length column buffer is a variant of fixed length buffer to hold
// fixed length buffer with more than 8 bytes. We do not support any special
// encoding schemes in LongFixedLengthColBufEncoder.
class LongFixedLengthColBufEncoder : public ColBufEncoder {
public:
LongFixedLengthColBufEncoder(size_t size, bool nullable)
: size_(size), nullable_(nullable) {}
size_t Append(const char *buf) override;
void Finish() override;
~LongFixedLengthColBufEncoder() {}
private:
size_t size_;
bool nullable_;
};
// Variable length column buffer holds a format of variable length column. In
// this format, a column is composed of one byte length k, followed by data with
// k bytes long data.
class VariableLengthColBufEncoder : public ColBufEncoder {
public:
size_t Append(const char *buf) override;
void Finish() override;
~VariableLengthColBufEncoder() {}
};
// Variable chunk column buffer holds another format of variable length column.
// In this format, a column contains multiple chunks of data, each of which is
// composed of 8 bytes long data, and one byte as a mask to indicate whether we
// have more data to come. If no more data coming, the mask is set as 0xFF. If
// the chunk is the last chunk and has only k valid bytes, the mask is set as
// 0xFF - (8 - k).
class VariableChunkColBufEncoder : public VariableLengthColBufEncoder {
public:
size_t Append(const char *buf) override;
void Finish() override;
explicit VariableChunkColBufEncoder(ColCompressionType col_compression_type)
: col_compression_type_(col_compression_type) {}
VariableChunkColBufEncoder() : col_compression_type_(kColNoCompression) {}
private:
ColCompressionType col_compression_type_;
// Map to store dictionary for dictionary encoding
std::unordered_map<uint64_t, uint64_t> dictionary_;
// Vector of dictionary keys.
std::vector<uint64_t> dict_vec_;
};
// ColDeclaration declares a column's type, algorithm of column-aware encoding,
// and other column data like endian and nullability.
struct ColDeclaration {
explicit ColDeclaration(
std::string _col_type,
ColCompressionType _col_compression_type = kColNoCompression,
size_t _size = 0, bool _nullable = false, bool _big_endian = false)
: col_type(_col_type),
col_compression_type(_col_compression_type),
size(_size),
nullable(_nullable),
big_endian(_big_endian) {}
std::string col_type;
ColCompressionType col_compression_type;
size_t size;
bool nullable;
bool big_endian;
};
// KVPairColDeclarations is a class to hold column declaration of columns in
// key and value.
struct KVPairColDeclarations {
std::vector<ColDeclaration> *key_col_declarations;
std::vector<ColDeclaration> *value_col_declarations;
ColDeclaration *value_checksum_declaration;
KVPairColDeclarations(std::vector<ColDeclaration> *_key_col_declarations,
std::vector<ColDeclaration> *_value_col_declarations,
ColDeclaration *_value_checksum_declaration)
: key_col_declarations(_key_col_declarations),
value_col_declarations(_value_col_declarations),
value_checksum_declaration(_value_checksum_declaration) {}
};
// Similar to KVPairDeclarations, KVPairColBufEncoders is used to hold column
// buffer encoders of all columns in key and value.
struct KVPairColBufEncoders {
std::vector<std::unique_ptr<ColBufEncoder>> key_col_bufs;
std::vector<std::unique_ptr<ColBufEncoder>> value_col_bufs;
std::unique_ptr<ColBufEncoder> value_checksum_buf;
explicit KVPairColBufEncoders(const KVPairColDeclarations &kvp_cd) {
for (auto kcd : *kvp_cd.key_col_declarations) {
key_col_bufs.emplace_back(
std::move(ColBufEncoder::NewColBufEncoder(kcd)));
}
for (auto vcd : *kvp_cd.value_col_declarations) {
value_col_bufs.emplace_back(
std::move(ColBufEncoder::NewColBufEncoder(vcd)));
}
value_checksum_buf.reset(
ColBufEncoder::NewColBufEncoder(*kvp_cd.value_checksum_declaration));
}
// Helper function to call Finish()
void Finish() {
for (auto &col_buf : key_col_bufs) {
col_buf->Finish();
}
for (auto &col_buf : value_col_bufs) {
col_buf->Finish();
}
value_checksum_buf->Finish();
}
};
} // namespace rocksdb
// Copyright (c) 2011-present, Facebook, Inc. All rights reserved.
// This source code is licensed under both the GPLv2 (found in the
// COPYING file in the root directory) and Apache 2.0 License
// (found in the LICENSE.Apache file in the root directory).
//
#ifndef __STDC_FORMAT_MACROS
#define __STDC_FORMAT_MACROS
#endif
#include <cstdio>
#include <cstdlib>
#ifndef ROCKSDB_LITE
#ifdef GFLAGS
#include <inttypes.h>
#include <vector>
#include "rocksdb/env.h"
#include "rocksdb/options.h"
#include "table/block_based_table_builder.h"
#include "table/block_based_table_reader.h"
#include "table/format.h"
#include "tools/sst_dump_tool_imp.h"
#include "util/compression.h"
#include "util/gflags_compat.h"
#include "util/stop_watch.h"
#include "utilities/col_buf_encoder.h"
#include "utilities/column_aware_encoding_util.h"
using GFLAGS_NAMESPACE::ParseCommandLineFlags;
DEFINE_string(encoded_file, "", "file to store encoded data blocks");
DEFINE_string(decoded_file, "",
"file to store decoded data blocks after encoding");
DEFINE_string(format, "col", "Output Format. Can be 'row' or 'col'");
// TODO(jhli): option `col` should be removed and replaced by general
// column specifications.
DEFINE_string(index_type, "col", "Index type. Can be 'primary' or 'secondary'");
DEFINE_string(dump_file, "",
"Dump data blocks separated by columns in human-readable format");
DEFINE_bool(decode, false, "Deocde blocks after they are encoded");
DEFINE_bool(stat, false,
"Print column distribution statistics. Cannot decode in this mode");
DEFINE_string(compression_type, "kNoCompression",
"The compression algorithm used to compress data blocks");
namespace rocksdb {
class ColumnAwareEncodingExp {
public:
static void Run(const std::string& sst_file) {
bool decode = FLAGS_decode;
if (FLAGS_decoded_file.size() > 0) {
decode = true;
}
if (FLAGS_stat) {
decode = false;
}
ColumnAwareEncodingReader reader(sst_file);
std::vector<ColDeclaration>* key_col_declarations;
std::vector<ColDeclaration>* value_col_declarations;
ColDeclaration* value_checksum_declaration;
if (FLAGS_index_type == "primary") {
ColumnAwareEncodingReader::GetColDeclarationsPrimary(
&key_col_declarations, &value_col_declarations,
&value_checksum_declaration);
} else {
ColumnAwareEncodingReader::GetColDeclarationsSecondary(
&key_col_declarations, &value_col_declarations,
&value_checksum_declaration);
}
KVPairColDeclarations kvp_cd(key_col_declarations, value_col_declarations,
value_checksum_declaration);
if (!FLAGS_dump_file.empty()) {
std::vector<KVPairBlock> kv_pair_blocks;
reader.GetKVPairsFromDataBlocks(&kv_pair_blocks);
reader.DumpDataColumns(FLAGS_dump_file, kvp_cd, kv_pair_blocks);
return;
}
std::unordered_map<std::string, CompressionType> compressions = {
{"kNoCompression", CompressionType::kNoCompression},
{"kZlibCompression", CompressionType::kZlibCompression},
{"kZSTD", CompressionType::kZSTD}};
// Find Compression
CompressionType compression_type = compressions[FLAGS_compression_type];
EnvOptions env_options;
if (CompressionTypeSupported(compression_type)) {
fprintf(stdout, "[%s]\n", FLAGS_compression_type.c_str());
std::unique_ptr<WritableFile> encoded_out_file;
std::unique_ptr<Env> env(NewMemEnv(Env::Default()));
if (!FLAGS_encoded_file.empty()) {
env->NewWritableFile(FLAGS_encoded_file, &encoded_out_file,
env_options);
}
std::vector<KVPairBlock> kv_pair_blocks;
reader.GetKVPairsFromDataBlocks(&kv_pair_blocks);
std::vector<std::string> encoded_blocks;
StopWatchNano sw(env.get(), true);
if (FLAGS_format == "col") {
reader.EncodeBlocks(kvp_cd, encoded_out_file.get(), compression_type,
kv_pair_blocks, &encoded_blocks, FLAGS_stat);
} else { // row format
reader.EncodeBlocksToRowFormat(encoded_out_file.get(), compression_type,
kv_pair_blocks, &encoded_blocks);
}
if (encoded_out_file != nullptr) {
uint64_t size = 0;
env->GetFileSize(FLAGS_encoded_file, &size);
fprintf(stdout, "File size: %" PRIu64 "\n", size);
}
uint64_t encode_time = sw.ElapsedNanosSafe(false /* reset */);
fprintf(stdout, "Encode time: %" PRIu64 "\n", encode_time);
if (decode) {
std::unique_ptr<WritableFile> decoded_out_file;
if (!FLAGS_decoded_file.empty()) {
env->NewWritableFile(FLAGS_decoded_file, &decoded_out_file,
env_options);
}
sw.Start();
if (FLAGS_format == "col") {
reader.DecodeBlocks(kvp_cd, decoded_out_file.get(), &encoded_blocks);
} else {
reader.DecodeBlocksFromRowFormat(decoded_out_file.get(),
&encoded_blocks);
}
uint64_t decode_time = sw.ElapsedNanosSafe(true /* reset */);
fprintf(stdout, "Decode time: %" PRIu64 "\n", decode_time);
}
} else {
fprintf(stdout, "Unsupported compression type: %s.\n",
FLAGS_compression_type.c_str());
}
delete key_col_declarations;
delete value_col_declarations;
delete value_checksum_declaration;
}
};
} // namespace rocksdb
int main(int argc, char** argv) {
int arg_idx = ParseCommandLineFlags(&argc, &argv, true);
if (arg_idx >= argc) {
fprintf(stdout, "SST filename required.\n");
exit(1);
}
std::string sst_file(argv[arg_idx]);
if (FLAGS_format != "row" && FLAGS_format != "col") {
fprintf(stderr, "Format must be 'row' or 'col'\n");
exit(1);
}
if (FLAGS_index_type != "primary" && FLAGS_index_type != "secondary") {
fprintf(stderr, "Format must be 'primary' or 'secondary'\n");
exit(1);
}
rocksdb::ColumnAwareEncodingExp::Run(sst_file);
return 0;
}
#else
int main() {
fprintf(stderr, "Please install gflags to run rocksdb tools\n");
return 1;
}
#endif // GFLAGS
#else
int main(int /*argc*/, char** /*argv*/) {
fprintf(stderr, "Not supported in lite mode.\n");
return 1;
}
#endif // ROCKSDB_LITE
// Copyright (c) 2011-present, Facebook, Inc. All rights reserved.
// This source code is licensed under both the GPLv2 (found in the
// COPYING file in the root directory) and Apache 2.0 License
// (found in the LICENSE.Apache file in the root directory).
//
#ifndef ROCKSDB_LITE
#include <vector>
#include "util/testharness.h"
#include "util/testutil.h"
#include "utilities/col_buf_decoder.h"
#include "utilities/col_buf_encoder.h"
namespace rocksdb {
class ColumnAwareEncodingTest : public testing::Test {
public:
ColumnAwareEncodingTest() {}
~ColumnAwareEncodingTest() {}
};
class ColumnAwareEncodingTestWithSize
: public ColumnAwareEncodingTest,
public testing::WithParamInterface<size_t> {
public:
ColumnAwareEncodingTestWithSize() {}
~ColumnAwareEncodingTestWithSize() {}
static std::vector<size_t> GetValues() { return {4, 8}; }
};
INSTANTIATE_TEST_CASE_P(
ColumnAwareEncodingTestWithSize, ColumnAwareEncodingTestWithSize,
::testing::ValuesIn(ColumnAwareEncodingTestWithSize::GetValues()));
TEST_P(ColumnAwareEncodingTestWithSize, NoCompressionEncodeDecode) {
size_t col_size = GetParam();
std::unique_ptr<ColBufEncoder> col_buf_encoder(
new FixedLengthColBufEncoder(col_size, kColNoCompression, false, true));
std::string str_buf;
uint64_t base_val = 0x0102030405060708;
uint64_t val = 0;
memcpy(&val, &base_val, col_size);
const int row_count = 4;
for (int i = 0; i < row_count; ++i) {
str_buf.append(reinterpret_cast<char*>(&val), col_size);
}
const char* str_buf_ptr = str_buf.c_str();
for (int i = 0; i < row_count; ++i) {
col_buf_encoder->Append(str_buf_ptr);
}
col_buf_encoder->Finish();
const std::string& encoded_data = col_buf_encoder->GetData();
// Check correctness of encoded string length
ASSERT_EQ(row_count * col_size, encoded_data.size());
const char* encoded_data_ptr = encoded_data.c_str();
uint64_t expected_encoded_val;
if (col_size == 8) {
expected_encoded_val = port::kLittleEndian ? 0x0807060504030201 : 0x0102030405060708;
} else if (col_size == 4) {
expected_encoded_val = port::kLittleEndian ? 0x08070605 : 0x0102030400000000;
}
uint64_t encoded_val = 0;
for (int i = 0; i < row_count; ++i) {
memcpy(&encoded_val, encoded_data_ptr, col_size);
// Check correctness of encoded value
ASSERT_EQ(expected_encoded_val, encoded_val);
encoded_data_ptr += col_size;
}
std::unique_ptr<ColBufDecoder> col_buf_decoder(
new FixedLengthColBufDecoder(col_size, kColNoCompression, false, true));
encoded_data_ptr = encoded_data.c_str();
encoded_data_ptr += col_buf_decoder->Init(encoded_data_ptr);
char* decoded_data = new char[100];
char* decoded_data_base = decoded_data;
for (int i = 0; i < row_count; ++i) {
encoded_data_ptr +=
col_buf_decoder->Decode(encoded_data_ptr, &decoded_data);
}
// Check correctness of decoded string length
ASSERT_EQ(row_count * col_size, decoded_data - decoded_data_base);
decoded_data = decoded_data_base;
for (int i = 0; i < row_count; ++i) {
uint64_t decoded_val;
decoded_val = 0;
memcpy(&decoded_val, decoded_data, col_size);
// Check correctness of decoded value
ASSERT_EQ(val, decoded_val);
decoded_data += col_size;
}
delete[] decoded_data_base;
}
TEST_P(ColumnAwareEncodingTestWithSize, RleEncodeDecode) {
size_t col_size = GetParam();
std::unique_ptr<ColBufEncoder> col_buf_encoder(
new FixedLengthColBufEncoder(col_size, kColRle, false, true));
std::string str_buf;
uint64_t base_val = 0x0102030405060708;
uint64_t val = 0;
memcpy(&val, &base_val, col_size);
const int row_count = 4;
for (int i = 0; i < row_count; ++i) {
str_buf.append(reinterpret_cast<char*>(&val), col_size);
}
const char* str_buf_ptr = str_buf.c_str();
for (int i = 0; i < row_count; ++i) {
str_buf_ptr += col_buf_encoder->Append(str_buf_ptr);
}
col_buf_encoder->Finish();
const std::string& encoded_data = col_buf_encoder->GetData();
// Check correctness of encoded string length
ASSERT_EQ(col_size + 1, encoded_data.size());
const char* encoded_data_ptr = encoded_data.c_str();
uint64_t encoded_val = 0;
memcpy(&encoded_val, encoded_data_ptr, col_size);
uint64_t expected_encoded_val;
if (col_size == 8) {
expected_encoded_val = port::kLittleEndian ? 0x0807060504030201 : 0x0102030405060708;
} else if (col_size == 4) {
expected_encoded_val = port::kLittleEndian ? 0x08070605 : 0x0102030400000000;
}
// Check correctness of encoded value
ASSERT_EQ(expected_encoded_val, encoded_val);
std::unique_ptr<ColBufDecoder> col_buf_decoder(
new FixedLengthColBufDecoder(col_size, kColRle, false, true));
char* decoded_data = new char[100];
char* decoded_data_base = decoded_data;
encoded_data_ptr += col_buf_decoder->Init(encoded_data_ptr);
for (int i = 0; i < row_count; ++i) {
encoded_data_ptr +=
col_buf_decoder->Decode(encoded_data_ptr, &decoded_data);
}
// Check correctness of decoded string length
ASSERT_EQ(decoded_data - decoded_data_base, row_count * col_size);
decoded_data = decoded_data_base;
for (int i = 0; i < row_count; ++i) {
uint64_t decoded_val;
decoded_val = 0;
memcpy(&decoded_val, decoded_data, col_size);
// Check correctness of decoded value
ASSERT_EQ(val, decoded_val);
decoded_data += col_size;
}
delete[] decoded_data_base;
}
TEST_P(ColumnAwareEncodingTestWithSize, DeltaEncodeDecode) {
size_t col_size = GetParam();
int row_count = 4;
std::unique_ptr<ColBufEncoder> col_buf_encoder(
new FixedLengthColBufEncoder(col_size, kColDeltaVarint, false, true));
std::string str_buf;
uint64_t base_val1 = port::kLittleEndian ? 0x0102030405060708 : 0x0807060504030201;
uint64_t base_val2 = port::kLittleEndian ? 0x0202030405060708 : 0x0807060504030202;
uint64_t val1 = 0, val2 = 0;
memcpy(&val1, &base_val1, col_size);
memcpy(&val2, &base_val2, col_size);
const char* str_buf_ptr;
for (int i = 0; i < row_count / 2; ++i) {
str_buf = std::string(reinterpret_cast<char*>(&val1), col_size);
str_buf_ptr = str_buf.c_str();
col_buf_encoder->Append(str_buf_ptr);
str_buf = std::string(reinterpret_cast<char*>(&val2), col_size);
str_buf_ptr = str_buf.c_str();
col_buf_encoder->Append(str_buf_ptr);
}
col_buf_encoder->Finish();
const std::string& encoded_data = col_buf_encoder->GetData();
// Check encoded string length
int varint_len = 0;
if (col_size == 8) {
varint_len = 9;
} else if (col_size == 4) {
varint_len = port::kLittleEndian ? 5 : 9;
}
// Check encoded string length: first value is original one (val - 0), the
// coming three are encoded as 1, -1, 1, so they should take 1 byte in varint.
ASSERT_EQ(varint_len + 3 * 1, encoded_data.size());
std::unique_ptr<ColBufDecoder> col_buf_decoder(
new FixedLengthColBufDecoder(col_size, kColDeltaVarint, false, true));
char* decoded_data = new char[100];
char* decoded_data_base = decoded_data;
const char* encoded_data_ptr = encoded_data.c_str();
encoded_data_ptr += col_buf_decoder->Init(encoded_data_ptr);
for (int i = 0; i < row_count; ++i) {
encoded_data_ptr +=
col_buf_decoder->Decode(encoded_data_ptr, &decoded_data);
}
// Check correctness of decoded string length
ASSERT_EQ(row_count * col_size, decoded_data - decoded_data_base);
decoded_data = decoded_data_base;
// Check correctness of decoded data
for (int i = 0; i < row_count / 2; ++i) {
uint64_t decoded_val = 0;
memcpy(&decoded_val, decoded_data, col_size);
ASSERT_EQ(val1, decoded_val);
decoded_data += col_size;
memcpy(&decoded_val, decoded_data, col_size);
ASSERT_EQ(val2, decoded_val);
decoded_data += col_size;
}
delete[] decoded_data_base;
}
TEST_F(ColumnAwareEncodingTest, ChunkBufEncodeDecode) {
std::unique_ptr<ColBufEncoder> col_buf_encoder(
new VariableChunkColBufEncoder(kColDict));
std::string buf("12345678\377\1\0\0\0\0\0\0\0\376", 18);
col_buf_encoder->Append(buf.c_str());
col_buf_encoder->Finish();
const std::string& encoded_data = col_buf_encoder->GetData();
const char* str_ptr = encoded_data.c_str();
std::unique_ptr<ColBufDecoder> col_buf_decoder(
new VariableChunkColBufDecoder(kColDict));
str_ptr += col_buf_decoder->Init(str_ptr);
char* decoded_data = new char[100];
char* decoded_data_base = decoded_data;
col_buf_decoder->Decode(str_ptr, &decoded_data);
for (size_t i = 0; i < buf.size(); ++i) {
ASSERT_EQ(buf[i], decoded_data_base[i]);
}
delete[] decoded_data_base;
}
} // namespace rocksdb
int main(int argc, char** argv) {
::testing::InitGoogleTest(&argc, argv);
return RUN_ALL_TESTS();
}
#else
#include <cstdio>
int main() {
fprintf(stderr,
"SKIPPED as column aware encoding experiment is not enabled in "
"ROCKSDB_LITE\n");
}
#endif // ROCKSDB_LITE
// Copyright (c) 2011-present, Facebook, Inc. All rights reserved.
// This source code is licensed under both the GPLv2 (found in the
// COPYING file in the root directory) and Apache 2.0 License
// (found in the LICENSE.Apache file in the root directory).
//
#ifndef ROCKSDB_LITE
#include "utilities/column_aware_encoding_util.h"
#ifndef __STDC_FORMAT_MACROS
#define __STDC_FORMAT_MACROS
#endif
#include <stddef.h>
#include <stdint.h>
#include <stdio.h>
#include <algorithm>
#include <utility>
#include <vector>
#include "include/rocksdb/comparator.h"
#include "include/rocksdb/slice.h"
#include "rocksdb/env.h"
#include "rocksdb/status.h"
#include "table/block_based_table_builder.h"
#include "table/block_based_table_factory.h"
#include "table/format.h"
#include "table/table_reader.h"
#include "util/cast_util.h"
#include "util/coding.h"
#include "utilities/col_buf_decoder.h"
#include "utilities/col_buf_encoder.h"
#include "port/port.h"
namespace rocksdb {
ColumnAwareEncodingReader::ColumnAwareEncodingReader(
const std::string& file_path)
: file_name_(file_path),
ioptions_(options_),
moptions_(options_),
internal_comparator_(BytewiseComparator()) {
InitTableReader(file_name_);
}
void ColumnAwareEncodingReader::InitTableReader(const std::string& file_path) {
std::unique_ptr<RandomAccessFile> file;
uint64_t file_size;
options_.env->NewRandomAccessFile(file_path, &file, soptions_);
options_.env->GetFileSize(file_path, &file_size);
file_.reset(new RandomAccessFileReader(std::move(file), file_path));
options_.comparator = &internal_comparator_;
options_.table_factory = std::make_shared<BlockBasedTableFactory>();
std::unique_ptr<TableReader> table_reader;
options_.table_factory->NewTableReader(
TableReaderOptions(ioptions_, moptions_.prefix_extractor.get(), soptions_,
internal_comparator_),
std::move(file_), file_size, &table_reader, /*enable_prefetch=*/false);
table_reader_.reset(static_cast_with_check<BlockBasedTable, TableReader>(
table_reader.release()));
}
void ColumnAwareEncodingReader::GetKVPairsFromDataBlocks(
std::vector<KVPairBlock>* kv_pair_blocks) {
table_reader_->GetKVPairsFromDataBlocks(kv_pair_blocks);
}
void ColumnAwareEncodingReader::DecodeBlocks(
const KVPairColDeclarations& kvp_col_declarations, WritableFile* out_file,
const std::vector<std::string>* blocks) {
char* decoded_content_base = new char[16384];
Options options;
ImmutableCFOptions ioptions(options);
for (auto& block : *blocks) {
KVPairColBufDecoders kvp_col_bufs(kvp_col_declarations);
auto& key_col_bufs = kvp_col_bufs.key_col_bufs;
auto& value_col_bufs = kvp_col_bufs.value_col_bufs;
auto& value_checksum_buf = kvp_col_bufs.value_checksum_buf;
auto& slice_final_with_bit = block;
uint32_t format_version = 2;
BlockContents contents;
const char* content_ptr;
CompressionType type =
(CompressionType)slice_final_with_bit[slice_final_with_bit.size() - 1];
if (type != kNoCompression) {
UncompressionContext uncompression_ctx(type);
UncompressBlockContents(uncompression_ctx, slice_final_with_bit.c_str(),
slice_final_with_bit.size() - 1, &contents,
format_version, ioptions);
content_ptr = contents.data.data();
} else {
content_ptr = slice_final_with_bit.data();
}
size_t num_kv_pairs;
const char* header_content_ptr = content_ptr;
num_kv_pairs = static_cast<size_t>(DecodeFixed64(header_content_ptr));
header_content_ptr += sizeof(size_t);
size_t num_key_columns = key_col_bufs.size();
size_t num_value_columns = value_col_bufs.size();
std::vector<const char*> key_content_ptr(num_key_columns);
std::vector<const char*> value_content_ptr(num_value_columns);
const char* checksum_content_ptr;
size_t num_columns = num_key_columns + num_value_columns;
const char* col_content_ptr =
header_content_ptr + sizeof(size_t) * num_columns;
// Read headers
for (size_t i = 0; i < num_key_columns; ++i) {
key_content_ptr[i] = col_content_ptr;
key_content_ptr[i] += key_col_bufs[i]->Init(key_content_ptr[i]);
size_t offset;
offset = static_cast<size_t>(DecodeFixed64(header_content_ptr));
header_content_ptr += sizeof(size_t);
col_content_ptr += offset;
}
for (size_t i = 0; i < num_value_columns; ++i) {
value_content_ptr[i] = col_content_ptr;
value_content_ptr[i] += value_col_bufs[i]->Init(value_content_ptr[i]);
size_t offset;
offset = static_cast<size_t>(DecodeFixed64(header_content_ptr));
header_content_ptr += sizeof(size_t);
col_content_ptr += offset;
}
checksum_content_ptr = col_content_ptr;
checksum_content_ptr += value_checksum_buf->Init(checksum_content_ptr);
// Decode block
char* decoded_content = decoded_content_base;
for (size_t j = 0; j < num_kv_pairs; ++j) {
for (size_t i = 0; i < num_key_columns; ++i) {
key_content_ptr[i] +=
key_col_bufs[i]->Decode(key_content_ptr[i], &decoded_content);
}
for (size_t i = 0; i < num_value_columns; ++i) {
value_content_ptr[i] +=
value_col_bufs[i]->Decode(value_content_ptr[i], &decoded_content);
}
checksum_content_ptr +=
value_checksum_buf->Decode(checksum_content_ptr, &decoded_content);
}
size_t offset = decoded_content - decoded_content_base;
Slice output_content(decoded_content, offset);
if (out_file != nullptr) {
out_file->Append(output_content);
}
}
delete[] decoded_content_base;
}
void ColumnAwareEncodingReader::DecodeBlocksFromRowFormat(
WritableFile* out_file, const std::vector<std::string>* blocks) {
Options options;
ImmutableCFOptions ioptions(options);
for (auto& block : *blocks) {
auto& slice_final_with_bit = block;
uint32_t format_version = 2;
BlockContents contents;
std::string decoded_content;
CompressionType type =
(CompressionType)slice_final_with_bit[slice_final_with_bit.size() - 1];
if (type != kNoCompression) {
UncompressionContext uncompression_ctx(type);
UncompressBlockContents(uncompression_ctx, slice_final_with_bit.c_str(),
slice_final_with_bit.size() - 1, &contents,
format_version, ioptions);
decoded_content = std::string(contents.data.data(), contents.data.size());
} else {
decoded_content = std::move(slice_final_with_bit);
}
if (out_file != nullptr) {
out_file->Append(decoded_content);
}
}
}
void ColumnAwareEncodingReader::DumpDataColumns(
const std::string& filename,
const KVPairColDeclarations& kvp_col_declarations,
const std::vector<KVPairBlock>& kv_pair_blocks) {
KVPairColBufEncoders kvp_col_bufs(kvp_col_declarations);
auto& key_col_bufs = kvp_col_bufs.key_col_bufs;
auto& value_col_bufs = kvp_col_bufs.value_col_bufs;
auto& value_checksum_buf = kvp_col_bufs.value_checksum_buf;
FILE* fp = fopen(filename.c_str(), "w");
size_t block_id = 1;
for (auto& kv_pairs : kv_pair_blocks) {
fprintf(fp, "---------------- Block: %-4" ROCKSDB_PRIszt " ----------------\n", block_id);
for (auto& kv_pair : kv_pairs) {
const auto& key = kv_pair.first;
const auto& value = kv_pair.second;
size_t value_offset = 0;
const char* key_ptr = key.data();
for (auto& buf : key_col_bufs) {
size_t col_size = buf->Append(key_ptr);
std::string tmp_buf(key_ptr, col_size);
Slice col(tmp_buf);
fprintf(fp, "%s ", col.ToString(true).c_str());
key_ptr += col_size;
}
fprintf(fp, "|");
const char* value_ptr = value.data();
for (auto& buf : value_col_bufs) {
size_t col_size = buf->Append(value_ptr);
std::string tmp_buf(value_ptr, col_size);
Slice col(tmp_buf);
fprintf(fp, " %s", col.ToString(true).c_str());
value_ptr += col_size;
value_offset += col_size;
}
if (value_offset < value.size()) {
size_t col_size = value_checksum_buf->Append(value_ptr);
std::string tmp_buf(value_ptr, col_size);
Slice col(tmp_buf);
fprintf(fp, "|%s", col.ToString(true).c_str());
} else {
value_checksum_buf->Append(nullptr);
}
fprintf(fp, "\n");
}
block_id++;
}
fclose(fp);
}
namespace {
void CompressDataBlock(const std::string& output_content, Slice* slice_final,
CompressionType* type, std::string* compressed_output) {
CompressionContext compression_ctx(*type);
uint32_t format_version = 2; // hard-coded version
*slice_final = CompressBlock(output_content, compression_ctx, type,
format_version, compressed_output);
}
} // namespace
void ColumnAwareEncodingReader::EncodeBlocksToRowFormat(
WritableFile* out_file, CompressionType compression_type,
const std::vector<KVPairBlock>& kv_pair_blocks,
std::vector<std::string>* blocks) {
std::string output_content;
for (auto& kv_pairs : kv_pair_blocks) {
output_content.clear();
std::string last_key;
size_t counter = 0;
const size_t block_restart_interval = 16;
for (auto& kv_pair : kv_pairs) {
const auto& key = kv_pair.first;
const auto& value = kv_pair.second;
Slice last_key_piece(last_key);
size_t shared = 0;
if (counter >= block_restart_interval) {
counter = 0;
} else {
const size_t min_length = std::min(last_key_piece.size(), key.size());
while ((shared < min_length) && last_key_piece[shared] == key[shared]) {
shared++;
}
}
const size_t non_shared = key.size() - shared;
output_content.append(key.c_str() + shared, non_shared);
output_content.append(value);
last_key.resize(shared);
last_key.append(key.data() + shared, non_shared);
counter++;
}
Slice slice_final;
auto type = compression_type;
std::string compressed_output;
CompressDataBlock(output_content, &slice_final, &type, &compressed_output);
if (out_file != nullptr) {
out_file->Append(slice_final);
}
// Add a bit in the end for decoding
std::string slice_final_with_bit(slice_final.data(), slice_final.size());
slice_final_with_bit.append(reinterpret_cast<char*>(&type), 1);
blocks->push_back(
std::string(slice_final_with_bit.data(), slice_final_with_bit.size()));
}
}
Status ColumnAwareEncodingReader::EncodeBlocks(
const KVPairColDeclarations& kvp_col_declarations, WritableFile* out_file,
CompressionType compression_type,
const std::vector<KVPairBlock>& kv_pair_blocks,
std::vector<std::string>* blocks, bool print_column_stat) {
std::vector<size_t> key_col_sizes(
kvp_col_declarations.key_col_declarations->size(), 0);
std::vector<size_t> value_col_sizes(
kvp_col_declarations.value_col_declarations->size(), 0);
size_t value_checksum_size = 0;
for (auto& kv_pairs : kv_pair_blocks) {
KVPairColBufEncoders kvp_col_bufs(kvp_col_declarations);
auto& key_col_bufs = kvp_col_bufs.key_col_bufs;
auto& value_col_bufs = kvp_col_bufs.value_col_bufs;
auto& value_checksum_buf = kvp_col_bufs.value_checksum_buf;
size_t num_kv_pairs = 0;
for (auto& kv_pair : kv_pairs) {
const auto& key = kv_pair.first;
const auto& value = kv_pair.second;
size_t value_offset = 0;
num_kv_pairs++;
const char* key_ptr = key.data();
for (auto& buf : key_col_bufs) {
size_t col_size = buf->Append(key_ptr);
key_ptr += col_size;
}
const char* value_ptr = value.data();
for (auto& buf : value_col_bufs) {
size_t col_size = buf->Append(value_ptr);
value_ptr += col_size;
value_offset += col_size;
}
if (value_offset < value.size()) {
value_checksum_buf->Append(value_ptr);
} else {
value_checksum_buf->Append(nullptr);
}
}
kvp_col_bufs.Finish();
// Get stats
// Compress and write a block
if (print_column_stat) {
for (size_t i = 0; i < key_col_bufs.size(); ++i) {
Slice slice_final;
auto type = compression_type;
std::string compressed_output;
CompressDataBlock(key_col_bufs[i]->GetData(), &slice_final, &type,
&compressed_output);
out_file->Append(slice_final);
key_col_sizes[i] += slice_final.size();
}
for (size_t i = 0; i < value_col_bufs.size(); ++i) {
Slice slice_final;
auto type = compression_type;
std::string compressed_output;
CompressDataBlock(value_col_bufs[i]->GetData(), &slice_final, &type,
&compressed_output);
out_file->Append(slice_final);
value_col_sizes[i] += slice_final.size();
}
Slice slice_final;
auto type = compression_type;
std::string compressed_output;
CompressDataBlock(value_checksum_buf->GetData(), &slice_final, &type,
&compressed_output);
out_file->Append(slice_final);
value_checksum_size += slice_final.size();
} else {
std::string output_content;
// Write column sizes
PutFixed64(&output_content, num_kv_pairs);
for (auto& buf : key_col_bufs) {
size_t size = buf->GetData().size();
PutFixed64(&output_content, size);
}
for (auto& buf : value_col_bufs) {
size_t size = buf->GetData().size();
PutFixed64(&output_content, size);
}
// Write data
for (auto& buf : key_col_bufs) {
output_content.append(buf->GetData());
}
for (auto& buf : value_col_bufs) {
output_content.append(buf->GetData());
}
output_content.append(value_checksum_buf->GetData());
Slice slice_final;
auto type = compression_type;
std::string compressed_output;
CompressDataBlock(output_content, &slice_final, &type,
&compressed_output);
if (out_file != nullptr) {
out_file->Append(slice_final);
}
// Add a bit in the end for decoding
std::string slice_final_with_bit(slice_final.data(),
slice_final.size() + 1);
slice_final_with_bit[slice_final.size()] = static_cast<char>(type);
blocks->push_back(std::string(slice_final_with_bit.data(),
slice_final_with_bit.size()));
}
}
if (print_column_stat) {
size_t total_size = 0;
for (size_t i = 0; i < key_col_sizes.size(); ++i)
total_size += key_col_sizes[i];
for (size_t i = 0; i < value_col_sizes.size(); ++i)
total_size += value_col_sizes[i];
total_size += value_checksum_size;
for (size_t i = 0; i < key_col_sizes.size(); ++i)
printf("Key col %" ROCKSDB_PRIszt " size: %" ROCKSDB_PRIszt " percentage %lf%%\n", i, key_col_sizes[i],
100.0 * key_col_sizes[i] / total_size);
for (size_t i = 0; i < value_col_sizes.size(); ++i)
printf("Value col %" ROCKSDB_PRIszt " size: %" ROCKSDB_PRIszt " percentage %lf%%\n", i,
value_col_sizes[i], 100.0 * value_col_sizes[i] / total_size);
printf("Value checksum size: %" ROCKSDB_PRIszt " percentage %lf%%\n", value_checksum_size,
100.0 * value_checksum_size / total_size);
}
return Status::OK();
}
void ColumnAwareEncodingReader::GetColDeclarationsPrimary(
std::vector<ColDeclaration>** key_col_declarations,
std::vector<ColDeclaration>** value_col_declarations,
ColDeclaration** value_checksum_declaration) {
*key_col_declarations = new std::vector<ColDeclaration>{
ColDeclaration("FixedLength", ColCompressionType::kColRleVarint, 4, false,
true),
ColDeclaration("FixedLength", ColCompressionType::kColRleDeltaVarint, 8,
false, true),
ColDeclaration("FixedLength", ColCompressionType::kColDeltaVarint, 8,
false, true),
ColDeclaration("FixedLength", ColCompressionType::kColDeltaVarint, 8,
false, true),
ColDeclaration("FixedLength", ColCompressionType::kColRleVarint, 8)};
*value_col_declarations = new std::vector<ColDeclaration>{
ColDeclaration("FixedLength", ColCompressionType::kColRleVarint, 4),
ColDeclaration("FixedLength", ColCompressionType::kColRleVarint, 4),
ColDeclaration("FixedLength", ColCompressionType::kColRle, 1),
ColDeclaration("VariableLength"),
ColDeclaration("FixedLength", ColCompressionType::kColDeltaVarint, 4),
ColDeclaration("FixedLength", ColCompressionType::kColRleVarint, 8)};
*value_checksum_declaration = new ColDeclaration(
"LongFixedLength", ColCompressionType::kColNoCompression, 9,
true /* nullable */);
}
void ColumnAwareEncodingReader::GetColDeclarationsSecondary(
std::vector<ColDeclaration>** key_col_declarations,
std::vector<ColDeclaration>** value_col_declarations,
ColDeclaration** value_checksum_declaration) {
*key_col_declarations = new std::vector<ColDeclaration>{
ColDeclaration("FixedLength", ColCompressionType::kColRleVarint, 4, false,
true),
ColDeclaration("FixedLength", ColCompressionType::kColDeltaVarint, 8,
false, true),
ColDeclaration("FixedLength", ColCompressionType::kColRleDeltaVarint, 8,
false, true),
ColDeclaration("FixedLength", ColCompressionType::kColRle, 1),
ColDeclaration("FixedLength", ColCompressionType::kColDeltaVarint, 4,
false, true),
ColDeclaration("FixedLength", ColCompressionType::kColDeltaVarint, 8,
false, true),
ColDeclaration("FixedLength", ColCompressionType::kColRleVarint, 8, false,
true),
ColDeclaration("VariableChunk", ColCompressionType::kColNoCompression),
ColDeclaration("FixedLength", ColCompressionType::kColRleVarint, 8)};
*value_col_declarations = new std::vector<ColDeclaration>();
*value_checksum_declaration = new ColDeclaration(
"LongFixedLength", ColCompressionType::kColNoCompression, 9,
true /* nullable */);
}
} // namespace rocksdb
#endif // ROCKSDB_LITE
// Copyright (c) 2011-present, Facebook, Inc. All rights reserved.
// This source code is licensed under both the GPLv2 (found in the
// COPYING file in the root directory) and Apache 2.0 License
// (found in the LICENSE.Apache file in the root directory).
#pragma once
#ifndef ROCKSDB_LITE
#include <string>
#include <vector>
#include "db/dbformat.h"
#include "include/rocksdb/env.h"
#include "include/rocksdb/listener.h"
#include "include/rocksdb/options.h"
#include "include/rocksdb/status.h"
#include "options/cf_options.h"
#include "table/block_based_table_reader.h"
namespace rocksdb {
struct ColDeclaration;
struct KVPairColDeclarations;
class ColumnAwareEncodingReader {
public:
explicit ColumnAwareEncodingReader(const std::string& file_name);
void GetKVPairsFromDataBlocks(std::vector<KVPairBlock>* kv_pair_blocks);
void EncodeBlocksToRowFormat(WritableFile* out_file,
CompressionType compression_type,
const std::vector<KVPairBlock>& kv_pair_blocks,
std::vector<std::string>* blocks);
void DecodeBlocksFromRowFormat(WritableFile* out_file,
const std::vector<std::string>* blocks);
void DumpDataColumns(const std::string& filename,
const KVPairColDeclarations& kvp_col_declarations,
const std::vector<KVPairBlock>& kv_pair_blocks);
Status EncodeBlocks(const KVPairColDeclarations& kvp_col_declarations,
WritableFile* out_file, CompressionType compression_type,
const std::vector<KVPairBlock>& kv_pair_blocks,
std::vector<std::string>* blocks, bool print_column_stat);
void DecodeBlocks(const KVPairColDeclarations& kvp_col_declarations,
WritableFile* out_file,
const std::vector<std::string>* blocks);
static void GetColDeclarationsPrimary(
std::vector<ColDeclaration>** key_col_declarations,
std::vector<ColDeclaration>** value_col_declarations,
ColDeclaration** value_checksum_declaration);
static void GetColDeclarationsSecondary(
std::vector<ColDeclaration>** key_col_declarations,
std::vector<ColDeclaration>** value_col_declarations,
ColDeclaration** value_checksum_declaration);
private:
// Init the TableReader for the sst file
void InitTableReader(const std::string& file_path);
std::string file_name_;
EnvOptions soptions_;
Options options_;
Status init_result_;
std::unique_ptr<BlockBasedTable> table_reader_;
std::unique_ptr<RandomAccessFileReader> file_;
const ImmutableCFOptions ioptions_;
const MutableCFOptions moptions_;
InternalKeyComparator internal_comparator_;
std::unique_ptr<TableProperties> table_properties_;
};
} // namespace rocksdb
#endif // ROCKSDB_LITE
// Copyright (c) 2011 The LevelDB Authors. All rights reserved.
// Use of this source code is governed by a BSD-style license that can be
// found in the LICENSE file. See the AUTHORS file for names of contributors.
#ifndef ROCKSDB_LITE
#include "utilities/date_tiered/date_tiered_db_impl.h"
#include <limits>
#include "db/db_impl.h"
#include "db/db_iter.h"
#include "db/write_batch_internal.h"
#include "monitoring/instrumented_mutex.h"
#include "options/options_helper.h"
#include "rocksdb/convenience.h"
#include "rocksdb/env.h"
#include "rocksdb/iterator.h"
#include "rocksdb/utilities/date_tiered_db.h"
#include "table/merging_iterator.h"
#include "util/coding.h"
#include "util/filename.h"
#include "util/string_util.h"
namespace rocksdb {
// Open the db inside DateTieredDBImpl because options needs pointer to its ttl
DateTieredDBImpl::DateTieredDBImpl(
DB* db, Options options,
const std::vector<ColumnFamilyDescriptor>& descriptors,
const std::vector<ColumnFamilyHandle*>& handles, int64_t ttl,
int64_t column_family_interval)
: db_(db),
cf_options_(ColumnFamilyOptions(options)),
ioptions_(ImmutableCFOptions(options)),
moptions_(MutableCFOptions(options)),
icomp_(cf_options_.comparator),
ttl_(ttl),
column_family_interval_(column_family_interval),
mutex_(options.statistics.get(), db->GetEnv(), DB_MUTEX_WAIT_MICROS,
options.use_adaptive_mutex) {
latest_timebound_ = std::numeric_limits<int64_t>::min();
for (size_t i = 0; i < handles.size(); ++i) {
const auto& name = descriptors[i].name;
int64_t timestamp = 0;
try {
timestamp = ParseUint64(name);
} catch (const std::invalid_argument&) {
// Bypass unrelated column family, e.g. default
db_->DestroyColumnFamilyHandle(handles[i]);
continue;
}
if (timestamp > latest_timebound_) {
latest_timebound_ = timestamp;
}
handle_map_.insert(std::make_pair(timestamp, handles[i]));
}
}
DateTieredDBImpl::~DateTieredDBImpl() {
for (auto handle : handle_map_) {
db_->DestroyColumnFamilyHandle(handle.second);
}
delete db_;
db_ = nullptr;
}
Status DateTieredDB::Open(const Options& options, const std::string& dbname,
DateTieredDB** dbptr, int64_t ttl,
int64_t column_family_interval, bool read_only) {
DBOptions db_options(options);
ColumnFamilyOptions cf_options(options);
std::vector<ColumnFamilyDescriptor> descriptors;
std::vector<ColumnFamilyHandle*> handles;
DB* db;
Status s;
// Get column families
std::vector<std::string> column_family_names;
s = DB::ListColumnFamilies(db_options, dbname, &column_family_names);
if (!s.ok()) {
// No column family found. Use default
s = DB::Open(options, dbname, &db);
if (!s.ok()) {
return s;
}
} else {
for (auto name : column_family_names) {
descriptors.emplace_back(ColumnFamilyDescriptor(name, cf_options));
}
// Open database
if (read_only) {
s = DB::OpenForReadOnly(db_options, dbname, descriptors, &handles, &db);
} else {
s = DB::Open(db_options, dbname, descriptors, &handles, &db);
}
}
if (s.ok()) {
*dbptr = new DateTieredDBImpl(db, options, descriptors, handles, ttl,
column_family_interval);
}
return s;
}
// Checks if the string is stale or not according to TTl provided
bool DateTieredDBImpl::IsStale(int64_t keytime, int64_t ttl, Env* env) {
if (ttl <= 0) {
// Data is fresh if TTL is non-positive
return false;
}
int64_t curtime;
if (!env->GetCurrentTime(&curtime).ok()) {
// Treat the data as fresh if could not get current time
return false;
}
return curtime >= keytime + ttl;
}
// Drop column family when all data in that column family is expired
// TODO(jhli): Can be made a background job
Status DateTieredDBImpl::DropObsoleteColumnFamilies() {
int64_t curtime;
Status s;
s = db_->GetEnv()->GetCurrentTime(&curtime);
if (!s.ok()) {
return s;
}
{
InstrumentedMutexLock l(&mutex_);
auto iter = handle_map_.begin();
while (iter != handle_map_.end()) {
if (iter->first <= curtime - ttl_) {
s = db_->DropColumnFamily(iter->second);
if (!s.ok()) {
return s;
}
delete iter->second;
iter = handle_map_.erase(iter);
} else {
break;
}
}
}
return Status::OK();
}
// Get timestamp from user key
Status DateTieredDBImpl::GetTimestamp(const Slice& key, int64_t* result) {
if (key.size() < kTSLength) {
return Status::Corruption("Bad timestamp in key");
}
const char* pos = key.data() + key.size() - 8;
int64_t timestamp = 0;
if (port::kLittleEndian) {
int bytes_to_fill = 8;
for (int i = 0; i < bytes_to_fill; ++i) {
timestamp |= (static_cast<uint64_t>(static_cast<unsigned char>(pos[i]))
<< ((bytes_to_fill - i - 1) << 3));
}
} else {
memcpy(&timestamp, pos, sizeof(timestamp));
}
*result = timestamp;
return Status::OK();
}
Status DateTieredDBImpl::CreateColumnFamily(
ColumnFamilyHandle** column_family) {
int64_t curtime;
Status s;
mutex_.AssertHeld();
s = db_->GetEnv()->GetCurrentTime(&curtime);
if (!s.ok()) {
return s;
}
int64_t new_timebound;
if (handle_map_.empty()) {
new_timebound = curtime + column_family_interval_;
} else {
new_timebound =
latest_timebound_ +
((curtime - latest_timebound_) / column_family_interval_ + 1) *
column_family_interval_;
}
std::string cf_name = ToString(new_timebound);
latest_timebound_ = new_timebound;
s = db_->CreateColumnFamily(cf_options_, cf_name, column_family);
if (s.ok()) {
handle_map_.insert(std::make_pair(new_timebound, *column_family));
}
return s;
}
Status DateTieredDBImpl::FindColumnFamily(int64_t keytime,
ColumnFamilyHandle** column_family,
bool create_if_missing) {
*column_family = nullptr;
{
InstrumentedMutexLock l(&mutex_);
auto iter = handle_map_.upper_bound(keytime);
if (iter == handle_map_.end()) {
if (!create_if_missing) {
return Status::NotFound();
} else {
return CreateColumnFamily(column_family);
}
}
// Move to previous element to get the appropriate time window
*column_family = iter->second;
}
return Status::OK();
}
Status DateTieredDBImpl::Put(const WriteOptions& options, const Slice& key,
const Slice& val) {
int64_t timestamp = 0;
Status s;
s = GetTimestamp(key, &timestamp);
if (!s.ok()) {
return s;
}
DropObsoleteColumnFamilies();
// Prune request to obsolete data
if (IsStale(timestamp, ttl_, db_->GetEnv())) {
return Status::InvalidArgument();
}
// Decide column family (i.e. the time window) to put into
ColumnFamilyHandle* column_family;
s = FindColumnFamily(timestamp, &column_family, true /*create_if_missing*/);
if (!s.ok()) {
return s;
}
// Efficiently put with WriteBatch
WriteBatch batch;
batch.Put(column_family, key, val);
return Write(options, &batch);
}
Status DateTieredDBImpl::Get(const ReadOptions& options, const Slice& key,
std::string* value) {
int64_t timestamp = 0;
Status s;
s = GetTimestamp(key, &timestamp);
if (!s.ok()) {
return s;
}
// Prune request to obsolete data
if (IsStale(timestamp, ttl_, db_->GetEnv())) {
return Status::NotFound();
}
// Decide column family to get from
ColumnFamilyHandle* column_family;
s = FindColumnFamily(timestamp, &column_family, false /*create_if_missing*/);
if (!s.ok()) {
return s;
}
if (column_family == nullptr) {
// Cannot find column family
return Status::NotFound();
}
// Get value with key
return db_->Get(options, column_family, key, value);
}
bool DateTieredDBImpl::KeyMayExist(const ReadOptions& options, const Slice& key,
std::string* value, bool* value_found) {
int64_t timestamp = 0;
Status s;
s = GetTimestamp(key, &timestamp);
if (!s.ok()) {
// Cannot get current time
return false;
}
// Decide column family to get from
ColumnFamilyHandle* column_family;
s = FindColumnFamily(timestamp, &column_family, false /*create_if_missing*/);
if (!s.ok() || column_family == nullptr) {
// Cannot find column family
return false;
}
if (IsStale(timestamp, ttl_, db_->GetEnv())) {
return false;
}
return db_->KeyMayExist(options, column_family, key, value, value_found);
}
Status DateTieredDBImpl::Delete(const WriteOptions& options, const Slice& key) {
int64_t timestamp = 0;
Status s;
s = GetTimestamp(key, &timestamp);
if (!s.ok()) {
return s;
}
DropObsoleteColumnFamilies();
// Prune request to obsolete data
if (IsStale(timestamp, ttl_, db_->GetEnv())) {
return Status::NotFound();
}
// Decide column family to get from
ColumnFamilyHandle* column_family;
s = FindColumnFamily(timestamp, &column_family, false /*create_if_missing*/);
if (!s.ok()) {
return s;
}
if (column_family == nullptr) {
// Cannot find column family
return Status::NotFound();
}
// Get value with key
return db_->Delete(options, column_family, key);
}
Status DateTieredDBImpl::Merge(const WriteOptions& options, const Slice& key,
const Slice& value) {
// Decide column family to get from
int64_t timestamp = 0;
Status s;
s = GetTimestamp(key, &timestamp);
if (!s.ok()) {
// Cannot get current time
return s;
}
ColumnFamilyHandle* column_family;
s = FindColumnFamily(timestamp, &column_family, true /*create_if_missing*/);
if (!s.ok()) {
return s;
}
WriteBatch batch;
batch.Merge(column_family, key, value);
return Write(options, &batch);
}
Status DateTieredDBImpl::Write(const WriteOptions& opts, WriteBatch* updates) {
class Handler : public WriteBatch::Handler {
public:
explicit Handler() {}
WriteBatch updates_ttl;
Status batch_rewrite_status;
virtual Status PutCF(uint32_t column_family_id, const Slice& key,
const Slice& value) override {
WriteBatchInternal::Put(&updates_ttl, column_family_id, key, value);
return Status::OK();
}
virtual Status MergeCF(uint32_t column_family_id, const Slice& key,
const Slice& value) override {
WriteBatchInternal::Merge(&updates_ttl, column_family_id, key, value);
return Status::OK();
}
virtual Status DeleteCF(uint32_t column_family_id,
const Slice& key) override {
WriteBatchInternal::Delete(&updates_ttl, column_family_id, key);
return Status::OK();
}
virtual void LogData(const Slice& blob) override {
updates_ttl.PutLogData(blob);
}
};
Handler handler;
updates->Iterate(&handler);
if (!handler.batch_rewrite_status.ok()) {
return handler.batch_rewrite_status;
} else {
return db_->Write(opts, &(handler.updates_ttl));
}
}
Iterator* DateTieredDBImpl::NewIterator(const ReadOptions& opts) {
if (handle_map_.empty()) {
return NewEmptyIterator();
}
DBImpl* db_impl = reinterpret_cast<DBImpl*>(db_);
auto db_iter = NewArenaWrappedDbIterator(
db_impl->GetEnv(), opts, ioptions_, moptions_, kMaxSequenceNumber,
cf_options_.max_sequential_skip_in_iterations, 0,
nullptr /*read_callback*/);
auto arena = db_iter->GetArena();
MergeIteratorBuilder builder(&icomp_, arena);
for (auto& item : handle_map_) {
auto handle = item.second;
builder.AddIterator(db_impl->NewInternalIterator(
arena, db_iter->GetRangeDelAggregator(), kMaxSequenceNumber, handle));
}
auto internal_iter = builder.Finish();
db_iter->SetIterUnderDBIter(internal_iter);
return db_iter;
}
} // namespace rocksdb
#endif // ROCKSDB_LITE
// Copyright (c) 2011-present, Facebook, Inc. All rights reserved.
// This source code is licensed under both the GPLv2 (found in the
// COPYING file in the root directory) and Apache 2.0 License
// (found in the LICENSE.Apache file in the root directory).
#pragma once
#ifndef ROCKSDB_LITE
#include <map>
#include <string>
#include <vector>
#include "monitoring/instrumented_mutex.h"
#include "options/cf_options.h"
#include "rocksdb/db.h"
#include "rocksdb/utilities/date_tiered_db.h"
namespace rocksdb {
// Implementation of DateTieredDB.
class DateTieredDBImpl : public DateTieredDB {
public:
DateTieredDBImpl(DB* db, Options options,
const std::vector<ColumnFamilyDescriptor>& descriptors,
const std::vector<ColumnFamilyHandle*>& handles, int64_t ttl,
int64_t column_family_interval);
virtual ~DateTieredDBImpl();
Status Put(const WriteOptions& options, const Slice& key,
const Slice& val) override;
Status Get(const ReadOptions& options, const Slice& key,
std::string* value) override;
Status Delete(const WriteOptions& options, const Slice& key) override;
bool KeyMayExist(const ReadOptions& options, const Slice& key,
std::string* value, bool* value_found = nullptr) override;
Status Merge(const WriteOptions& options, const Slice& key,
const Slice& value) override;
Iterator* NewIterator(const ReadOptions& opts) override;
Status DropObsoleteColumnFamilies() override;
// Extract timestamp from key.
static Status GetTimestamp(const Slice& key, int64_t* result);
private:
// Base database object
DB* db_;
const ColumnFamilyOptions cf_options_;
const ImmutableCFOptions ioptions_;
const MutableCFOptions moptions_;
const InternalKeyComparator icomp_;
// Storing all column family handles for time series data.
std::vector<ColumnFamilyHandle*> handles_;
// Manages a mapping from a column family's maximum timestamp to its handle.
std::map<int64_t, ColumnFamilyHandle*> handle_map_;
// A time-to-live value to indicate when the data should be removed.
int64_t ttl_;
// An variable to indicate the time range of a column family.
int64_t column_family_interval_;
// Indicate largest maximum timestamp of a column family.
int64_t latest_timebound_;
// Mutex to protect handle_map_ operations.
InstrumentedMutex mutex_;
// Internal method to execute Put and Merge in batch.
Status Write(const WriteOptions& opts, WriteBatch* updates);
Status CreateColumnFamily(ColumnFamilyHandle** column_family);
Status FindColumnFamily(int64_t keytime, ColumnFamilyHandle** column_family,
bool create_if_missing);
static bool IsStale(int64_t keytime, int64_t ttl, Env* env);
};
} // namespace rocksdb
#endif // ROCKSDB_LITE
// Copyright (c) 2011 The LevelDB Authors. All rights reserved.
// Use of this source code is governed by a BSD-style license that can be
// found in the LICENSE file. See the AUTHORS file for names of contributors.
#ifndef ROCKSDB_LITE
#ifndef OS_WIN
#include <unistd.h>
#endif
#include <map>
#include <memory>
#include "rocksdb/compaction_filter.h"
#include "rocksdb/utilities/date_tiered_db.h"
#include "port/port.h"
#include "util/logging.h"
#include "util/string_util.h"
#include "util/testharness.h"
namespace rocksdb {
namespace {
typedef std::map<std::string, std::string> KVMap;
}
class SpecialTimeEnv : public EnvWrapper {
public:
explicit SpecialTimeEnv(Env* base) : EnvWrapper(base) {
base->GetCurrentTime(&current_time_);
}
void Sleep(int64_t sleep_time) { current_time_ += sleep_time; }
virtual Status GetCurrentTime(int64_t* current_time) override {
*current_time = current_time_;
return Status::OK();
}
private:
int64_t current_time_ = 0;
};
class DateTieredTest : public testing::Test {
public:
DateTieredTest() {
env_.reset(new SpecialTimeEnv(Env::Default()));
dbname_ = test::PerThreadDBPath("date_tiered");
options_.create_if_missing = true;
options_.env = env_.get();
date_tiered_db_.reset(nullptr);
DestroyDB(dbname_, Options());
}
~DateTieredTest() {
CloseDateTieredDB();
DestroyDB(dbname_, Options());
}
void OpenDateTieredDB(int64_t ttl, int64_t column_family_interval,
bool read_only = false) {
ASSERT_TRUE(date_tiered_db_.get() == nullptr);
DateTieredDB* date_tiered_db = nullptr;
ASSERT_OK(DateTieredDB::Open(options_, dbname_, &date_tiered_db, ttl,
column_family_interval, read_only));
date_tiered_db_.reset(date_tiered_db);
}
void CloseDateTieredDB() { date_tiered_db_.reset(nullptr); }
Status AppendTimestamp(std::string* key) {
char ts[8];
int bytes_to_fill = 8;
int64_t timestamp_value = 0;
Status s = env_->GetCurrentTime(&timestamp_value);
if (!s.ok()) {
return s;
}
if (port::kLittleEndian) {
for (int i = 0; i < bytes_to_fill; ++i) {
ts[i] = (timestamp_value >> ((bytes_to_fill - i - 1) << 3)) & 0xFF;
}
} else {
memcpy(ts, static_cast<void*>(&timestamp_value), bytes_to_fill);
}
key->append(ts, 8);
return Status::OK();
}
// Populates and returns a kv-map
void MakeKVMap(int64_t num_entries, KVMap* kvmap) {
kvmap->clear();
int digits = 1;
for (int64_t dummy = num_entries; dummy /= 10; ++digits) {
}
int digits_in_i = 1;
for (int64_t i = 0; i < num_entries; i++) {
std::string key = "key";
std::string value = "value";
if (i % 10 == 0) {
digits_in_i++;
}
for (int j = digits_in_i; j < digits; j++) {
key.append("0");
value.append("0");
}
AppendNumberTo(&key, i);
AppendNumberTo(&value, i);
ASSERT_OK(AppendTimestamp(&key));
(*kvmap)[key] = value;
}
// check all insertions done
ASSERT_EQ(num_entries, static_cast<int64_t>(kvmap->size()));
}
size_t GetColumnFamilyCount() {
DBOptions db_options(options_);
std::vector<std::string> cf;
DB::ListColumnFamilies(db_options, dbname_, &cf);
return cf.size();
}
void Sleep(int64_t sleep_time) { env_->Sleep(sleep_time); }
static const int64_t kSampleSize_ = 100;
std::string dbname_;
std::unique_ptr<DateTieredDB> date_tiered_db_;
std::unique_ptr<SpecialTimeEnv> env_;
KVMap kvmap_;
private:
Options options_;
KVMap::iterator kv_it_;
const std::string kNewValue_ = "new_value";
std::unique_ptr<CompactionFilter> test_comp_filter_;
};
// Puts a set of values and checks its presence using Get during ttl
TEST_F(DateTieredTest, KeyLifeCycle) {
WriteOptions wopts;
ReadOptions ropts;
// T=0, open the database and insert data
OpenDateTieredDB(2, 2);
ASSERT_TRUE(date_tiered_db_.get() != nullptr);
// Create key value pairs to insert
KVMap map_insert;
MakeKVMap(kSampleSize_, &map_insert);
// Put data in database
for (auto& kv : map_insert) {
ASSERT_OK(date_tiered_db_->Put(wopts, kv.first, kv.second));
}
Sleep(1);
// T=1, keys should still reside in database
for (auto& kv : map_insert) {
std::string value;
ASSERT_OK(date_tiered_db_->Get(ropts, kv.first, &value));
ASSERT_EQ(value, kv.second);
}
Sleep(1);
// T=2, keys should not be retrieved
for (auto& kv : map_insert) {
std::string value;
auto s = date_tiered_db_->Get(ropts, kv.first, &value);
ASSERT_TRUE(s.IsNotFound());
}
CloseDateTieredDB();
}
TEST_F(DateTieredTest, DeleteTest) {
WriteOptions wopts;
ReadOptions ropts;
// T=0, open the database and insert data
OpenDateTieredDB(2, 2);
ASSERT_TRUE(date_tiered_db_.get() != nullptr);
// Create key value pairs to insert
KVMap map_insert;
MakeKVMap(kSampleSize_, &map_insert);
// Put data in database
for (auto& kv : map_insert) {
ASSERT_OK(date_tiered_db_->Put(wopts, kv.first, kv.second));
}
Sleep(1);
// Delete keys when they are not obsolete
for (auto& kv : map_insert) {
ASSERT_OK(date_tiered_db_->Delete(wopts, kv.first));
}
// Key should not be found
for (auto& kv : map_insert) {
std::string value;
auto s = date_tiered_db_->Get(ropts, kv.first, &value);
ASSERT_TRUE(s.IsNotFound());
}
}
TEST_F(DateTieredTest, KeyMayExistTest) {
WriteOptions wopts;
ReadOptions ropts;
// T=0, open the database and insert data
OpenDateTieredDB(2, 2);
ASSERT_TRUE(date_tiered_db_.get() != nullptr);
// Create key value pairs to insert
KVMap map_insert;
MakeKVMap(kSampleSize_, &map_insert);
// Put data in database
for (auto& kv : map_insert) {
ASSERT_OK(date_tiered_db_->Put(wopts, kv.first, kv.second));
}
Sleep(1);
// T=1, keys should still reside in database
for (auto& kv : map_insert) {
std::string value;
ASSERT_TRUE(date_tiered_db_->KeyMayExist(ropts, kv.first, &value));
ASSERT_EQ(value, kv.second);
}
}
// Database open and close should not affect
TEST_F(DateTieredTest, MultiOpen) {
WriteOptions wopts;
ReadOptions ropts;
// T=0, open the database and insert data
OpenDateTieredDB(4, 4);
ASSERT_TRUE(date_tiered_db_.get() != nullptr);
// Create key value pairs to insert
KVMap map_insert;
MakeKVMap(kSampleSize_, &map_insert);
// Put data in database
for (auto& kv : map_insert) {
ASSERT_OK(date_tiered_db_->Put(wopts, kv.first, kv.second));
}
CloseDateTieredDB();
Sleep(1);
OpenDateTieredDB(2, 2);
// T=1, keys should still reside in database
for (auto& kv : map_insert) {
std::string value;
ASSERT_OK(date_tiered_db_->Get(ropts, kv.first, &value));
ASSERT_EQ(value, kv.second);
}
Sleep(1);
// T=2, keys should not be retrieved
for (auto& kv : map_insert) {
std::string value;
auto s = date_tiered_db_->Get(ropts, kv.first, &value);
ASSERT_TRUE(s.IsNotFound());
}
CloseDateTieredDB();
}
// If the key in Put() is obsolete, the data should not be written into database
TEST_F(DateTieredTest, InsertObsoleteDate) {
WriteOptions wopts;
ReadOptions ropts;
// T=0, open the database and insert data
OpenDateTieredDB(2, 2);
ASSERT_TRUE(date_tiered_db_.get() != nullptr);
// Create key value pairs to insert
KVMap map_insert;
MakeKVMap(kSampleSize_, &map_insert);
Sleep(2);
// T=2, keys put into database are already obsolete
// Put data in database. Operations should not return OK
for (auto& kv : map_insert) {
auto s = date_tiered_db_->Put(wopts, kv.first, kv.second);
ASSERT_TRUE(s.IsInvalidArgument());
}
// Data should not be found in database
for (auto& kv : map_insert) {
std::string value;
auto s = date_tiered_db_->Get(ropts, kv.first, &value);
ASSERT_TRUE(s.IsNotFound());
}
CloseDateTieredDB();
}
// Resets the timestamp of a set of kvs by updating them and checks that they
// are not deleted according to the old timestamp
TEST_F(DateTieredTest, ColumnFamilyCounts) {
WriteOptions wopts;
ReadOptions ropts;
// T=0, open the database and insert data
OpenDateTieredDB(4, 2);
ASSERT_TRUE(date_tiered_db_.get() != nullptr);
// Only default column family
ASSERT_EQ(1, GetColumnFamilyCount());
// Create key value pairs to insert
KVMap map_insert;
MakeKVMap(kSampleSize_, &map_insert);
for (auto& kv : map_insert) {
ASSERT_OK(date_tiered_db_->Put(wopts, kv.first, kv.second));
}
// A time series column family is created
ASSERT_EQ(2, GetColumnFamilyCount());
Sleep(2);
KVMap map_insert2;
MakeKVMap(kSampleSize_, &map_insert2);
for (auto& kv : map_insert2) {
ASSERT_OK(date_tiered_db_->Put(wopts, kv.first, kv.second));
}
// Another time series column family is created
ASSERT_EQ(3, GetColumnFamilyCount());
Sleep(4);
// Data should not be found in database
for (auto& kv : map_insert) {
std::string value;
auto s = date_tiered_db_->Get(ropts, kv.first, &value);
ASSERT_TRUE(s.IsNotFound());
}
// Explicitly drop obsolete column families
date_tiered_db_->DropObsoleteColumnFamilies();
// The first column family is deleted from database
ASSERT_EQ(2, GetColumnFamilyCount());
CloseDateTieredDB();
}
// Puts a set of values and checks its presence using iterator during ttl
TEST_F(DateTieredTest, IteratorLifeCycle) {
WriteOptions wopts;
ReadOptions ropts;
// T=0, open the database and insert data
OpenDateTieredDB(2, 2);
ASSERT_TRUE(date_tiered_db_.get() != nullptr);
// Create key value pairs to insert
KVMap map_insert;
MakeKVMap(kSampleSize_, &map_insert);
Iterator* dbiter;
// Put data in database
for (auto& kv : map_insert) {
ASSERT_OK(date_tiered_db_->Put(wopts, kv.first, kv.second));
}
Sleep(1);
ASSERT_EQ(2, GetColumnFamilyCount());
// T=1, keys should still reside in database
dbiter = date_tiered_db_->NewIterator(ropts);
dbiter->SeekToFirst();
for (auto& kv : map_insert) {
ASSERT_TRUE(dbiter->Valid());
ASSERT_EQ(0, dbiter->value().compare(kv.second));
dbiter->Next();
}
delete dbiter;
Sleep(4);
// T=5, keys should not be retrieved
for (auto& kv : map_insert) {
std::string value;
auto s = date_tiered_db_->Get(ropts, kv.first, &value);
ASSERT_TRUE(s.IsNotFound());
}
// Explicitly drop obsolete column families
date_tiered_db_->DropObsoleteColumnFamilies();
// Only default column family
ASSERT_EQ(1, GetColumnFamilyCount());
// Empty iterator
dbiter = date_tiered_db_->NewIterator(ropts);
dbiter->Seek(map_insert.begin()->first);
ASSERT_FALSE(dbiter->Valid());
delete dbiter;
CloseDateTieredDB();
}
// Iterator should be able to merge data from multiple column families
TEST_F(DateTieredTest, IteratorMerge) {
WriteOptions wopts;
ReadOptions ropts;
// T=0, open the database and insert data
OpenDateTieredDB(4, 2);
ASSERT_TRUE(date_tiered_db_.get() != nullptr);
Iterator* dbiter;
// Put data in database
KVMap map_insert1;
MakeKVMap(kSampleSize_, &map_insert1);
for (auto& kv : map_insert1) {
ASSERT_OK(date_tiered_db_->Put(wopts, kv.first, kv.second));
}
ASSERT_EQ(2, GetColumnFamilyCount());
Sleep(2);
// Put more data
KVMap map_insert2;
MakeKVMap(kSampleSize_, &map_insert2);
for (auto& kv : map_insert2) {
ASSERT_OK(date_tiered_db_->Put(wopts, kv.first, kv.second));
}
// Multiple column families for time series data
ASSERT_EQ(3, GetColumnFamilyCount());
// Iterator should be able to merge data from different column families
dbiter = date_tiered_db_->NewIterator(ropts);
dbiter->SeekToFirst();
KVMap::iterator iter1 = map_insert1.begin();
KVMap::iterator iter2 = map_insert2.begin();
for (; iter1 != map_insert1.end() && iter2 != map_insert2.end();
iter1++, iter2++) {
ASSERT_TRUE(dbiter->Valid());
ASSERT_EQ(0, dbiter->value().compare(iter1->second));
dbiter->Next();
ASSERT_TRUE(dbiter->Valid());
ASSERT_EQ(0, dbiter->value().compare(iter2->second));
dbiter->Next();
}
delete dbiter;
CloseDateTieredDB();
}
} // namespace rocksdb
// A black-box test for the DateTieredDB around rocksdb
int main(int argc, char** argv) {
::testing::InitGoogleTest(&argc, argv);
return RUN_ALL_TESTS();
}
#else
#include <stdio.h>
int main(int /*argc*/, char** /*argv*/) {
fprintf(stderr, "SKIPPED as DateTieredDB is not supported in ROCKSDB_LITE\n");
return 0;
}
#endif // !ROCKSDB_LITE
此差异已折叠。
// Copyright (c) 2011-present, Facebook, Inc. All rights reserved.
// This source code is licensed under both the GPLv2 (found in the
// COPYING file in the root directory) and Apache 2.0 License
// (found in the LICENSE.Apache file in the root directory).
#ifndef ROCKSDB_LITE
#include <algorithm>
#include "rocksdb/utilities/json_document.h"
#include "rocksdb/utilities/document_db.h"
#include "util/testharness.h"
#include "util/testutil.h"
namespace rocksdb {
class DocumentDBTest : public testing::Test {
public:
DocumentDBTest() {
dbname_ = test::PerThreadDBPath("document_db_test");
DestroyDB(dbname_, Options());
}
~DocumentDBTest() {
delete db_;
DestroyDB(dbname_, Options());
}
void AssertCursorIDs(Cursor* cursor, std::vector<int64_t> expected) {
std::vector<int64_t> got;
while (cursor->Valid()) {
ASSERT_TRUE(cursor->Valid());
ASSERT_TRUE(cursor->document().Contains("_id"));
got.push_back(cursor->document()["_id"].GetInt64());
cursor->Next();
}
std::sort(expected.begin(), expected.end());
std::sort(got.begin(), got.end());
ASSERT_TRUE(got == expected);
}
// converts ' to ", so that we don't have to escape " all over the place
std::string ConvertQuotes(const std::string& input) {
std::string output;
for (auto x : input) {
if (x == '\'') {
output.push_back('\"');
} else {
output.push_back(x);
}
}
return output;
}
void CreateIndexes(std::vector<DocumentDB::IndexDescriptor> indexes) {
for (auto i : indexes) {
ASSERT_OK(db_->CreateIndex(WriteOptions(), i));
}
}
JSONDocument* Parse(const std::string& doc) {
return JSONDocument::ParseJSON(ConvertQuotes(doc).c_str());
}
std::string dbname_;
DocumentDB* db_;
};
TEST_F(DocumentDBTest, SimpleQueryTest) {
DocumentDBOptions options;
DocumentDB::IndexDescriptor index;
index.description = Parse("{\"name\": 1}");
index.name = "name_index";
ASSERT_OK(DocumentDB::Open(options, dbname_, {}, &db_));
CreateIndexes({index});
delete db_;
db_ = nullptr;
// now there is index present
ASSERT_OK(DocumentDB::Open(options, dbname_, {index}, &db_));
assert(db_ != nullptr);
delete index.description;
std::vector<std::string> json_objects = {
"{\"_id\': 1, \"name\": \"One\"}", "{\"_id\": 2, \"name\": \"Two\"}",
"{\"_id\": 3, \"name\": \"Three\"}", "{\"_id\": 4, \"name\": \"Four\"}"};
for (auto& json : json_objects) {
std::unique_ptr<JSONDocument> document(Parse(json));
ASSERT_TRUE(document.get() != nullptr);
ASSERT_OK(db_->Insert(WriteOptions(), *document));
}
// inserting a document with existing primary key should return failure
{
std::unique_ptr<JSONDocument> document(Parse(json_objects[0]));
ASSERT_TRUE(document.get() != nullptr);
Status s = db_->Insert(WriteOptions(), *document);
ASSERT_TRUE(s.IsInvalidArgument());
}
// find equal to "Two"
{
std::unique_ptr<JSONDocument> query(
Parse("[{'$filter': {'name': 'Two', '$index': 'name_index'}}]"));
std::unique_ptr<Cursor> cursor(db_->Query(ReadOptions(), *query));
AssertCursorIDs(cursor.get(), {2});
}
// find less than "Three"
{
std::unique_ptr<JSONDocument> query(Parse(
"[{'$filter': {'name': {'$lt': 'Three'}, '$index': "
"'name_index'}}]"));
std::unique_ptr<Cursor> cursor(db_->Query(ReadOptions(), *query));
AssertCursorIDs(cursor.get(), {1, 4});
}
// find less than "Three" without index
{
std::unique_ptr<JSONDocument> query(
Parse("[{'$filter': {'name': {'$lt': 'Three'} }}]"));
std::unique_ptr<Cursor> cursor(db_->Query(ReadOptions(), *query));
AssertCursorIDs(cursor.get(), {1, 4});
}
// remove less or equal to "Three"
{
std::unique_ptr<JSONDocument> query(
Parse("{'name': {'$lte': 'Three'}, '$index': 'name_index'}"));
ASSERT_OK(db_->Remove(ReadOptions(), WriteOptions(), *query));
}
// find all -- only "Two" left, everything else should be deleted
{
std::unique_ptr<JSONDocument> query(Parse("[]"));
std::unique_ptr<Cursor> cursor(db_->Query(ReadOptions(), *query));
AssertCursorIDs(cursor.get(), {2});
}
}
TEST_F(DocumentDBTest, ComplexQueryTest) {
DocumentDBOptions options;
DocumentDB::IndexDescriptor priority_index;
priority_index.description = Parse("{'priority': 1}");
priority_index.name = "priority";
DocumentDB::IndexDescriptor job_name_index;
job_name_index.description = Parse("{'job_name': 1}");
job_name_index.name = "job_name";
DocumentDB::IndexDescriptor progress_index;
progress_index.description = Parse("{'progress': 1}");
progress_index.name = "progress";
ASSERT_OK(DocumentDB::Open(options, dbname_, {}, &db_));
CreateIndexes({priority_index, progress_index});
delete priority_index.description;
delete progress_index.description;
std::vector<std::string> json_objects = {
"{'_id': 1, 'job_name': 'play', 'priority': 10, 'progress': 14.2}",
"{'_id': 2, 'job_name': 'white', 'priority': 2, 'progress': 45.1}",
"{'_id': 3, 'job_name': 'straw', 'priority': 5, 'progress': 83.2}",
"{'_id': 4, 'job_name': 'temporary', 'priority': 3, 'progress': 14.9}",
"{'_id': 5, 'job_name': 'white', 'priority': 4, 'progress': 44.2}",
"{'_id': 6, 'job_name': 'tea', 'priority': 1, 'progress': 12.4}",
"{'_id': 7, 'job_name': 'delete', 'priority': 2, 'progress': 77.54}",
"{'_id': 8, 'job_name': 'rock', 'priority': 3, 'progress': 93.24}",
"{'_id': 9, 'job_name': 'steady', 'priority': 3, 'progress': 9.1}",
"{'_id': 10, 'job_name': 'white', 'priority': 1, 'progress': 61.4}",
"{'_id': 11, 'job_name': 'who', 'priority': 4, 'progress': 39.41}",
"{'_id': 12, 'job_name': 'who', 'priority': -1, 'progress': 39.42}",
"{'_id': 13, 'job_name': 'who', 'priority': -2, 'progress': 39.42}", };
// add index on the fly!
CreateIndexes({job_name_index});
delete job_name_index.description;
for (auto& json : json_objects) {
std::unique_ptr<JSONDocument> document(Parse(json));
ASSERT_TRUE(document != nullptr);
ASSERT_OK(db_->Insert(WriteOptions(), *document));
}
// 2 < priority < 4 AND progress > 10.0, index priority
{
std::unique_ptr<JSONDocument> query(Parse(
"[{'$filter': {'priority': {'$lt': 4, '$gt': 2}, 'progress': {'$gt': "
"10.0}, '$index': 'priority'}}]"));
std::unique_ptr<Cursor> cursor(db_->Query(ReadOptions(), *query));
AssertCursorIDs(cursor.get(), {4, 8});
}
// -1 <= priority <= 1, index priority
{
std::unique_ptr<JSONDocument> query(Parse(
"[{'$filter': {'priority': {'$lte': 1, '$gte': -1},"
" '$index': 'priority'}}]"));
std::unique_ptr<Cursor> cursor(db_->Query(ReadOptions(), *query));
AssertCursorIDs(cursor.get(), {6, 10, 12});
}
// 2 < priority < 4 AND progress > 10.0, index progress
{
std::unique_ptr<JSONDocument> query(Parse(
"[{'$filter': {'priority': {'$lt': 4, '$gt': 2}, 'progress': {'$gt': "
"10.0}, '$index': 'progress'}}]"));
std::unique_ptr<Cursor> cursor(db_->Query(ReadOptions(), *query));
AssertCursorIDs(cursor.get(), {4, 8});
}
// job_name == 'white' AND priority >= 2, index job_name
{
std::unique_ptr<JSONDocument> query(Parse(
"[{'$filter': {'job_name': 'white', 'priority': {'$gte': "
"2}, '$index': 'job_name'}}]"));
std::unique_ptr<Cursor> cursor(db_->Query(ReadOptions(), *query));
AssertCursorIDs(cursor.get(), {2, 5});
}
// 35.0 <= progress < 65.5, index progress
{
std::unique_ptr<JSONDocument> query(Parse(
"[{'$filter': {'progress': {'$gt': 5.0, '$gte': 35.0, '$lt': 65.5}, "
"'$index': 'progress'}}]"));
std::unique_ptr<Cursor> cursor(db_->Query(ReadOptions(), *query));
AssertCursorIDs(cursor.get(), {2, 5, 10, 11, 12, 13});
}
// 2 < priority <= 4, index priority
{
std::unique_ptr<JSONDocument> query(Parse(
"[{'$filter': {'priority': {'$gt': 2, '$lt': 8, '$lte': 4}, "
"'$index': 'priority'}}]"));
std::unique_ptr<Cursor> cursor(db_->Query(ReadOptions(), *query));
AssertCursorIDs(cursor.get(), {4, 5, 8, 9, 11});
}
// Delete all whose progress is bigger than 50%
{
std::unique_ptr<JSONDocument> query(
Parse("{'progress': {'$gt': 50.0}, '$index': 'progress'}"));
ASSERT_OK(db_->Remove(ReadOptions(), WriteOptions(), *query));
}
// 2 < priority < 6, index priority
{
std::unique_ptr<JSONDocument> query(Parse(
"[{'$filter': {'priority': {'$gt': 2, '$lt': 6}, "
"'$index': 'priority'}}]"));
std::unique_ptr<Cursor> cursor(db_->Query(ReadOptions(), *query));
AssertCursorIDs(cursor.get(), {4, 5, 9, 11});
}
// update set priority to 10 where job_name is 'white'
{
std::unique_ptr<JSONDocument> query(Parse("{'job_name': 'white'}"));
std::unique_ptr<JSONDocument> update(Parse("{'$set': {'priority': 10}}"));
ASSERT_OK(db_->Update(ReadOptions(), WriteOptions(), *query, *update));
}
// update twice: set priority to 15 where job_name is 'white'
{
std::unique_ptr<JSONDocument> query(Parse("{'job_name': 'white'}"));
std::unique_ptr<JSONDocument> update(Parse("{'$set': {'priority': 10},"
"'$set': {'priority': 15}}"));
ASSERT_OK(db_->Update(ReadOptions(), WriteOptions(), *query, *update));
}
// update twice: set priority to 15 and
// progress to 40 where job_name is 'white'
{
std::unique_ptr<JSONDocument> query(Parse("{'job_name': 'white'}"));
std::unique_ptr<JSONDocument> update(
Parse("{'$set': {'priority': 10, 'progress': 35},"
"'$set': {'priority': 15, 'progress': 40}}"));
ASSERT_OK(db_->Update(ReadOptions(), WriteOptions(), *query, *update));
}
// priority < 0
{
std::unique_ptr<JSONDocument> query(
Parse("[{'$filter': {'priority': {'$lt': 0}, '$index': 'priority'}}]"));
std::unique_ptr<Cursor> cursor(db_->Query(ReadOptions(), *query));
ASSERT_OK(cursor->status());
AssertCursorIDs(cursor.get(), {12, 13});
}
// -2 < priority < 0
{
std::unique_ptr<JSONDocument> query(
Parse("[{'$filter': {'priority': {'$gt': -2, '$lt': 0},"
" '$index': 'priority'}}]"));
std::unique_ptr<Cursor> cursor(db_->Query(ReadOptions(), *query));
ASSERT_OK(cursor->status());
AssertCursorIDs(cursor.get(), {12});
}
// -2 <= priority < 0
{
std::unique_ptr<JSONDocument> query(
Parse("[{'$filter': {'priority': {'$gte': -2, '$lt': 0},"
" '$index': 'priority'}}]"));
std::unique_ptr<Cursor> cursor(db_->Query(ReadOptions(), *query));
ASSERT_OK(cursor->status());
AssertCursorIDs(cursor.get(), {12, 13});
}
// 4 < priority
{
std::unique_ptr<JSONDocument> query(
Parse("[{'$filter': {'priority': {'$gt': 4}, '$index': 'priority'}}]"));
std::unique_ptr<Cursor> cursor(db_->Query(ReadOptions(), *query));
ASSERT_OK(cursor->status());
AssertCursorIDs(cursor.get(), {1, 2, 5});
}
Status s = db_->DropIndex("doesnt-exist");
ASSERT_TRUE(!s.ok());
ASSERT_OK(db_->DropIndex("priority"));
}
} // namespace rocksdb
int main(int argc, char** argv) {
::testing::InitGoogleTest(&argc, argv);
return RUN_ALL_TESTS();
}
#else
#include <stdio.h>
int main(int /*argc*/, char** /*argv*/) {
fprintf(stderr, "SKIPPED as DocumentDB is not supported in ROCKSDB_LITE\n");
return 0;
}
#endif // !ROCKSDB_LITE
// Copyright (c) 2011-present, Facebook, Inc. All rights reserved.
// This source code is licensed under both the GPLv2 (found in the
// COPYING file in the root directory) and Apache 2.0 License
// (found in the LICENSE.Apache file in the root directory).
#ifndef ROCKSDB_LITE
#include "rocksdb/utilities/json_document.h"
#ifndef __STDC_FORMAT_MACROS
#define __STDC_FORMAT_MACROS
#endif
#include <assert.h>
#include <inttypes.h>
#include <string.h>
#include <functional>
#include <limits>
#include <map>
#include <memory>
#include <string>
#include <vector>
#include "third-party/fbson/FbsonDocument.h"
#include "third-party/fbson/FbsonJsonParser.h"
#include "third-party/fbson/FbsonUtil.h"
#include "util/coding.h"
using std::placeholders::_1;
namespace {
size_t ObjectNumElem(const fbson::ObjectVal& objectVal) {
size_t size = 0;
for (auto keyValuePair : objectVal) {
(void)keyValuePair;
++size;
}
return size;
}
template <typename Func>
void InitJSONDocument(std::unique_ptr<char[]>* data,
fbson::FbsonValue** value,
Func f) {
// TODO(stash): maybe add function to FbsonDocument to avoid creating array?
fbson::FbsonWriter writer;
bool res __attribute__((__unused__)) = writer.writeStartArray();
assert(res);
uint32_t bytesWritten __attribute__((__unused__));
bytesWritten = f(writer);
assert(bytesWritten != 0);
res = writer.writeEndArray();
assert(res);
char* buf = new char[writer.getOutput()->getSize()];
memcpy(buf, writer.getOutput()->getBuffer(), writer.getOutput()->getSize());
*value = ((fbson::FbsonDocument *)buf)->getValue();
assert((*value)->isArray());
assert(((fbson::ArrayVal*)*value)->numElem() == 1);
*value = ((fbson::ArrayVal*)*value)->get(0);
data->reset(buf);
}
void InitString(std::unique_ptr<char[]>* data,
fbson::FbsonValue** value,
const std::string& s) {
InitJSONDocument(data, value, std::bind(
[](fbson::FbsonWriter& writer, const std::string& str) -> uint32_t {
bool res __attribute__((__unused__)) = writer.writeStartString();
assert(res);
auto bytesWritten = writer.writeString(str.c_str(),
static_cast<uint32_t>(str.length()));
res = writer.writeEndString();
assert(res);
// If the string is empty, then bytesWritten == 0, and assert in
// InitJsonDocument will fail.
return bytesWritten + static_cast<uint32_t>(str.empty());
},
_1, s));
}
bool IsNumeric(fbson::FbsonValue* value) {
return value->isInt8() || value->isInt16() ||
value->isInt32() || value->isInt64();
}
int64_t GetInt64ValFromFbsonNumericType(fbson::FbsonValue* value) {
switch (value->type()) {
case fbson::FbsonType::T_Int8:
return reinterpret_cast<fbson::Int8Val*>(value)->val();
case fbson::FbsonType::T_Int16:
return reinterpret_cast<fbson::Int16Val*>(value)->val();
case fbson::FbsonType::T_Int32:
return reinterpret_cast<fbson::Int32Val*>(value)->val();
case fbson::FbsonType::T_Int64:
return reinterpret_cast<fbson::Int64Val*>(value)->val();
default:
assert(false);
}
return 0;
}
bool IsComparable(fbson::FbsonValue* left, fbson::FbsonValue* right) {
if (left->type() == right->type()) {
return true;
}
if (IsNumeric(left) && IsNumeric(right)) {
return true;
}
return false;
}
void CreateArray(std::unique_ptr<char[]>* data, fbson::FbsonValue** value) {
fbson::FbsonWriter writer;
bool res __attribute__((__unused__)) = writer.writeStartArray();
assert(res);
res = writer.writeEndArray();
assert(res);
data->reset(new char[writer.getOutput()->getSize()]);
memcpy(data->get(),
writer.getOutput()->getBuffer(),
writer.getOutput()->getSize());
*value = reinterpret_cast<fbson::FbsonDocument*>(data->get())->getValue();
}
void CreateObject(std::unique_ptr<char[]>* data, fbson::FbsonValue** value) {
fbson::FbsonWriter writer;
bool res __attribute__((__unused__)) = writer.writeStartObject();
assert(res);
res = writer.writeEndObject();
assert(res);
data->reset(new char[writer.getOutput()->getSize()]);
memcpy(data->get(),
writer.getOutput()->getBuffer(),
writer.getOutput()->getSize());
*value = reinterpret_cast<fbson::FbsonDocument*>(data->get())->getValue();
}
} // namespace
namespace rocksdb {
// TODO(stash): find smth easier
JSONDocument::JSONDocument() {
InitJSONDocument(&data_,
&value_,
std::bind(&fbson::FbsonWriter::writeNull, _1));
}
JSONDocument::JSONDocument(bool b) {
InitJSONDocument(&data_,
&value_,
std::bind(&fbson::FbsonWriter::writeBool, _1, b));
}
JSONDocument::JSONDocument(double d) {
InitJSONDocument(&data_,
&value_,
std::bind(&fbson::FbsonWriter::writeDouble, _1, d));
}
JSONDocument::JSONDocument(int8_t i) {
InitJSONDocument(&data_,
&value_,
std::bind(&fbson::FbsonWriter::writeInt8, _1, i));
}
JSONDocument::JSONDocument(int16_t i) {
InitJSONDocument(&data_,
&value_,
std::bind(&fbson::FbsonWriter::writeInt16, _1, i));
}
JSONDocument::JSONDocument(int32_t i) {
InitJSONDocument(&data_,
&value_,
std::bind(&fbson::FbsonWriter::writeInt32, _1, i));
}
JSONDocument::JSONDocument(int64_t i) {
InitJSONDocument(&data_,
&value_,
std::bind(&fbson::FbsonWriter::writeInt64, _1, i));
}
JSONDocument::JSONDocument(const std::string& s) {
InitString(&data_, &value_, s);
}
JSONDocument::JSONDocument(const char* s) : JSONDocument(std::string(s)) {
}
void JSONDocument::InitFromValue(const fbson::FbsonValue* val) {
data_.reset(new char[val->numPackedBytes()]);
memcpy(data_.get(), val, val->numPackedBytes());
value_ = reinterpret_cast<fbson::FbsonValue*>(data_.get());
}
// Private constructor
JSONDocument::JSONDocument(fbson::FbsonValue* val, bool makeCopy) {
if (makeCopy) {
InitFromValue(val);
} else {
value_ = val;
}
}
JSONDocument::JSONDocument(Type _type) {
// TODO(icanadi) make all of this better by using templates
switch (_type) {
case kNull:
InitJSONDocument(&data_, &value_,
std::bind(&fbson::FbsonWriter::writeNull, _1));
break;
case kObject:
CreateObject(&data_, &value_);
break;
case kBool:
InitJSONDocument(&data_, &value_,
std::bind(&fbson::FbsonWriter::writeBool, _1, false));
break;
case kDouble:
InitJSONDocument(&data_, &value_,
std::bind(&fbson::FbsonWriter::writeDouble, _1, 0.));
break;
case kArray:
CreateArray(&data_, &value_);
break;
case kInt64:
InitJSONDocument(&data_, &value_,
std::bind(&fbson::FbsonWriter::writeInt64, _1, 0));
break;
case kString:
InitString(&data_, &value_, "");
break;
default:
assert(false);
}
}
JSONDocument::JSONDocument(const JSONDocument& jsonDocument) {
if (jsonDocument.IsOwner()) {
InitFromValue(jsonDocument.value_);
} else {
value_ = jsonDocument.value_;
}
}
JSONDocument::JSONDocument(JSONDocument&& jsonDocument) {
value_ = jsonDocument.value_;
data_.swap(jsonDocument.data_);
}
JSONDocument& JSONDocument::operator=(JSONDocument jsonDocument) {
value_ = jsonDocument.value_;
data_.swap(jsonDocument.data_);
return *this;
}
JSONDocument::Type JSONDocument::type() const {
switch (value_->type()) {
case fbson::FbsonType::T_Null:
return JSONDocument::kNull;
case fbson::FbsonType::T_True:
case fbson::FbsonType::T_False:
return JSONDocument::kBool;
case fbson::FbsonType::T_Int8:
case fbson::FbsonType::T_Int16:
case fbson::FbsonType::T_Int32:
case fbson::FbsonType::T_Int64:
return JSONDocument::kInt64;
case fbson::FbsonType::T_Double:
return JSONDocument::kDouble;
case fbson::FbsonType::T_String:
return JSONDocument::kString;
case fbson::FbsonType::T_Object:
return JSONDocument::kObject;
case fbson::FbsonType::T_Array:
return JSONDocument::kArray;
case fbson::FbsonType::T_Binary:
default:
assert(false);
}
return JSONDocument::kNull;
}
bool JSONDocument::Contains(const std::string& key) const {
assert(IsObject());
auto objectVal = reinterpret_cast<fbson::ObjectVal*>(value_);
return objectVal->find(key.c_str()) != nullptr;
}
JSONDocument JSONDocument::operator[](const std::string& key) const {
assert(IsObject());
auto objectVal = reinterpret_cast<fbson::ObjectVal*>(value_);
auto foundValue = objectVal->find(key.c_str());
assert(foundValue != nullptr);
// No need to save paths in const objects
JSONDocument ans(foundValue, false);
return ans;
}
size_t JSONDocument::Count() const {
assert(IsObject() || IsArray());
if (IsObject()) {
// TODO(stash): add to fbson?
const fbson::ObjectVal& objectVal =
*reinterpret_cast<fbson::ObjectVal*>(value_);
return ObjectNumElem(objectVal);
} else if (IsArray()) {
auto arrayVal = reinterpret_cast<fbson::ArrayVal*>(value_);
return arrayVal->numElem();
}
assert(false);
return 0;
}
JSONDocument JSONDocument::operator[](size_t i) const {
assert(IsArray());
auto arrayVal = reinterpret_cast<fbson::ArrayVal*>(value_);
auto foundValue = arrayVal->get(static_cast<int>(i));
JSONDocument ans(foundValue, false);
return ans;
}
bool JSONDocument::IsNull() const {
return value_->isNull();
}
bool JSONDocument::IsArray() const {
return value_->isArray();
}
bool JSONDocument::IsBool() const {
return value_->isTrue() || value_->isFalse();
}
bool JSONDocument::IsDouble() const {
return value_->isDouble();
}
bool JSONDocument::IsInt64() const {
return value_->isInt8() || value_->isInt16() ||
value_->isInt32() || value_->isInt64();
}
bool JSONDocument::IsObject() const {
return value_->isObject();
}
bool JSONDocument::IsString() const {
return value_->isString();
}
bool JSONDocument::GetBool() const {
assert(IsBool());
return value_->isTrue();
}
double JSONDocument::GetDouble() const {
assert(IsDouble());
return ((fbson::DoubleVal*)value_)->val();
}
int64_t JSONDocument::GetInt64() const {
assert(IsInt64());
return GetInt64ValFromFbsonNumericType(value_);
}
std::string JSONDocument::GetString() const {
assert(IsString());
fbson::StringVal* stringVal = (fbson::StringVal*)value_;
return std::string(stringVal->getBlob(), stringVal->getBlobLen());
}
namespace {
// FbsonValue can be int8, int16, int32, int64
bool CompareNumeric(fbson::FbsonValue* left, fbson::FbsonValue* right) {
assert(IsNumeric(left) && IsNumeric(right));
return GetInt64ValFromFbsonNumericType(left) ==
GetInt64ValFromFbsonNumericType(right);
}
bool CompareSimpleTypes(fbson::FbsonValue* left, fbson::FbsonValue* right) {
if (IsNumeric(left)) {
return CompareNumeric(left, right);
}
if (left->numPackedBytes() != right->numPackedBytes()) {
return false;
}
return memcmp(left, right, left->numPackedBytes()) == 0;
}
bool CompareFbsonValue(fbson::FbsonValue* left, fbson::FbsonValue* right) {
if (!IsComparable(left, right)) {
return false;
}
switch (left->type()) {
case fbson::FbsonType::T_True:
case fbson::FbsonType::T_False:
case fbson::FbsonType::T_Null:
return true;
case fbson::FbsonType::T_Int8:
case fbson::FbsonType::T_Int16:
case fbson::FbsonType::T_Int32:
case fbson::FbsonType::T_Int64:
return CompareNumeric(left, right);
case fbson::FbsonType::T_String:
case fbson::FbsonType::T_Double:
return CompareSimpleTypes(left, right);
case fbson::FbsonType::T_Object:
{
auto leftObject = reinterpret_cast<fbson::ObjectVal*>(left);
auto rightObject = reinterpret_cast<fbson::ObjectVal*>(right);
if (ObjectNumElem(*leftObject) != ObjectNumElem(*rightObject)) {
return false;
}
for (auto && keyValue : *leftObject) {
std::string str(keyValue.getKeyStr(), keyValue.klen());
if (rightObject->find(str.c_str()) == nullptr) {
return false;
}
if (!CompareFbsonValue(keyValue.value(),
rightObject->find(str.c_str()))) {
return false;
}
}
return true;
}
case fbson::FbsonType::T_Array:
{
auto leftArr = reinterpret_cast<fbson::ArrayVal*>(left);
auto rightArr = reinterpret_cast<fbson::ArrayVal*>(right);
if (leftArr->numElem() != rightArr->numElem()) {
return false;
}
for (int i = 0; i < static_cast<int>(leftArr->numElem()); ++i) {
if (!CompareFbsonValue(leftArr->get(i), rightArr->get(i))) {
return false;
}
}
return true;
}
default:
assert(false);
}
return false;
}
} // namespace
bool JSONDocument::operator==(const JSONDocument& rhs) const {
return CompareFbsonValue(value_, rhs.value_);
}
bool JSONDocument::operator!=(const JSONDocument& rhs) const {
return !(*this == rhs);
}
JSONDocument JSONDocument::Copy() const {
return JSONDocument(value_, true);
}
bool JSONDocument::IsOwner() const {
return data_.get() != nullptr;
}
std::string JSONDocument::DebugString() const {
fbson::FbsonToJson fbsonToJson;
return fbsonToJson.json(value_);
}
JSONDocument::ItemsIteratorGenerator JSONDocument::Items() const {
assert(IsObject());
return ItemsIteratorGenerator(*(reinterpret_cast<fbson::ObjectVal*>(value_)));
}
// TODO(icanadi) (perf) allocate objects with arena
JSONDocument* JSONDocument::ParseJSON(const char* json) {
fbson::FbsonJsonParser parser;
if (!parser.parse(json)) {
return nullptr;
}
auto fbsonVal = fbson::FbsonDocument::createValue(
parser.getWriter().getOutput()->getBuffer(),
static_cast<uint32_t>(parser.getWriter().getOutput()->getSize()));
if (fbsonVal == nullptr) {
return nullptr;
}
return new JSONDocument(fbsonVal, true);
}
void JSONDocument::Serialize(std::string* dst) const {
// first byte is reserved for header
// currently, header is only version number. that will help us provide
// backwards compatility. we might also store more information here if
// necessary
dst->push_back(kSerializationFormatVersion);
dst->push_back(FBSON_VER);
dst->append(reinterpret_cast<char*>(value_), value_->numPackedBytes());
}
const char JSONDocument::kSerializationFormatVersion = 2;
JSONDocument* JSONDocument::Deserialize(const Slice& src) {
Slice input(src);
if (src.size() == 0) {
return nullptr;
}
char header = input[0];
if (header == 1) {
assert(false);
}
input.remove_prefix(1);
auto value = fbson::FbsonDocument::createValue(input.data(),
static_cast<uint32_t>(input.size()));
if (value == nullptr) {
return nullptr;
}
return new JSONDocument(value, true);
}
class JSONDocument::const_item_iterator::Impl {
public:
typedef fbson::ObjectVal::const_iterator It;
explicit Impl(It it) : it_(it) {}
const char* getKeyStr() const {
return it_->getKeyStr();
}
uint8_t klen() const {
return it_->klen();
}
It& operator++() {
return ++it_;
}
bool operator!=(const Impl& other) {
return it_ != other.it_;
}
fbson::FbsonValue* value() const {
return it_->value();
}
private:
It it_;
};
JSONDocument::const_item_iterator::const_item_iterator(Impl* impl)
: it_(impl) {}
JSONDocument::const_item_iterator::const_item_iterator(const_item_iterator&& a)
: it_(std::move(a.it_)) {}
JSONDocument::const_item_iterator&
JSONDocument::const_item_iterator::operator++() {
++(*it_);
return *this;
}
bool JSONDocument::const_item_iterator::operator!=(
const const_item_iterator& other) {
return *it_ != *(other.it_);
}
JSONDocument::const_item_iterator::~const_item_iterator() {
}
JSONDocument::const_item_iterator::value_type
JSONDocument::const_item_iterator::operator*() {
return JSONDocument::const_item_iterator::value_type(std::string(it_->getKeyStr(), it_->klen()),
JSONDocument(it_->value(), false));
}
JSONDocument::ItemsIteratorGenerator::ItemsIteratorGenerator(
const fbson::ObjectVal& object)
: object_(object) {}
JSONDocument::const_item_iterator
JSONDocument::ItemsIteratorGenerator::begin() const {
return const_item_iterator(new const_item_iterator::Impl(object_.begin()));
}
JSONDocument::const_item_iterator
JSONDocument::ItemsIteratorGenerator::end() const {
return const_item_iterator(new const_item_iterator::Impl(object_.end()));
}
} // namespace rocksdb
#endif // ROCKSDB_LITE
// Copyright (c) 2011-present, Facebook, Inc. All rights reserved.
// This source code is licensed under both the GPLv2 (found in the
// COPYING file in the root directory) and Apache 2.0 License
// (found in the LICENSE.Apache file in the root directory).
#ifndef ROCKSDB_LITE
#include <assert.h>
#include <limits>
#include <stdint.h>
#include "rocksdb/utilities/json_document.h"
#include "third-party/fbson/FbsonWriter.h"
namespace rocksdb {
JSONDocumentBuilder::JSONDocumentBuilder()
: writer_(new fbson::FbsonWriter()) {
}
JSONDocumentBuilder::JSONDocumentBuilder(fbson::FbsonOutStream* out)
: writer_(new fbson::FbsonWriter(*out)) {
}
void JSONDocumentBuilder::Reset() {
writer_->reset();
}
bool JSONDocumentBuilder::WriteStartArray() {
return writer_->writeStartArray();
}
bool JSONDocumentBuilder::WriteEndArray() {
return writer_->writeEndArray();
}
bool JSONDocumentBuilder::WriteStartObject() {
return writer_->writeStartObject();
}
bool JSONDocumentBuilder::WriteEndObject() {
return writer_->writeEndObject();
}
bool JSONDocumentBuilder::WriteKeyValue(const std::string& key,
const JSONDocument& value) {
assert(key.size() <= std::numeric_limits<uint8_t>::max());
size_t bytesWritten = writer_->writeKey(key.c_str(),
static_cast<uint8_t>(key.size()));
if (bytesWritten == 0) {
return false;
}
return WriteJSONDocument(value);
}
bool JSONDocumentBuilder::WriteJSONDocument(const JSONDocument& value) {
switch (value.type()) {
case JSONDocument::kNull:
return writer_->writeNull() != 0;
case JSONDocument::kInt64:
return writer_->writeInt64(value.GetInt64());
case JSONDocument::kDouble:
return writer_->writeDouble(value.GetDouble());
case JSONDocument::kBool:
return writer_->writeBool(value.GetBool());
case JSONDocument::kString:
{
bool res = writer_->writeStartString();
if (!res) {
return false;
}
const std::string& str = value.GetString();
res = writer_->writeString(str.c_str(),
static_cast<uint32_t>(str.size()));
if (!res) {
return false;
}
return writer_->writeEndString();
}
case JSONDocument::kArray:
{
bool res = WriteStartArray();
if (!res) {
return false;
}
for (size_t i = 0; i < value.Count(); ++i) {
res = WriteJSONDocument(value[i]);
if (!res) {
return false;
}
}
return WriteEndArray();
}
case JSONDocument::kObject:
{
bool res = WriteStartObject();
if (!res) {
return false;
}
for (auto keyValue : value.Items()) {
WriteKeyValue(keyValue.first, keyValue.second);
}
return WriteEndObject();
}
default:
assert(false);
}
return false;
}
JSONDocument JSONDocumentBuilder::GetJSONDocument() {
fbson::FbsonValue* value =
fbson::FbsonDocument::createValue(writer_->getOutput()->getBuffer(),
static_cast<uint32_t>(writer_->getOutput()->getSize()));
return JSONDocument(value, true);
}
JSONDocumentBuilder::~JSONDocumentBuilder() {
}
} // namespace rocksdb
#endif // ROCKSDB_LITE
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
This folder defines a REDIS-style interface for Rocksdb.
Right now it is written as a simple tag-on in the rocksdb::RedisLists class.
It implements Redis Lists, and supports only the "non-blocking operations".
Internally, the set of lists are stored in a rocksdb database, mapping keys to
values. Each "value" is the list itself, storing a sequence of "elements".
Each element is stored as a 32-bit-integer, followed by a sequence of bytes.
The 32-bit-integer represents the length of the element (that is, the number
of bytes that follow). And then that many bytes follow.
NOTE: This README file may be old. See the actual redis_lists.cc file for
definitive details on the implementation. There should be a header at the top
of that file, explaining a bit of the implementation details.
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册