From 629633041756e4ee17299684bb21941ffe0bf9a3 Mon Sep 17 00:00:00 2001 From: Igor Canadi Date: Wed, 23 Jul 2014 14:22:58 -0400 Subject: [PATCH] SpatialDB Summary: This diff is adding spatial index support to RocksDB. When creating the DB user specifies a list of spatial indexes. Spatial indexes can cover different areas and have different resolution (i.e. number of tiles). This is useful for supporting different zoom levels. Each element inserted into SpatialDB has: * a bounding box, which determines how will the element be indexed * string blob, which will usually be WKB representation of the polygon (http://en.wikipedia.org/wiki/Well-known_text) * feature set, which is a map of key-value pairs, where value can be int, double, bool, null or a string. FeatureSet will be a set of tags associated with geo elements (for example, 'road': 'highway' and similar) * a list of indexes to insert the element in. For example, small river element will be inserted in index for high zoom level, while country border will be inserted in all indexes (including the index for low zoom level). Each query is executed on single spatial index. Query guarantees that it will return all elements intersecting the specified bounding box, but it might also return some extra non-intersecting elements. Test Plan: Added bunch of unit tests in spatial_db_test Reviewers: dhruba, yinwang Reviewed By: yinwang Subscribers: leveldb Differential Revision: https://reviews.facebook.net/D20361 --- Makefile | 4 + include/rocksdb/utilities/spatial_db.h | 231 +++++++++ utilities/spatialdb/spatial_db.cc | 640 +++++++++++++++++++++++++ utilities/spatialdb/spatial_db_test.cc | 265 ++++++++++ 4 files changed, 1140 insertions(+) create mode 100644 include/rocksdb/utilities/spatial_db.h create mode 100644 utilities/spatialdb/spatial_db.cc create mode 100644 utilities/spatialdb/spatial_db_test.cc diff --git a/Makefile b/Makefile index 1dd4299a8..ad0f5d310 100644 --- a/Makefile +++ b/Makefile @@ -107,6 +107,7 @@ TESTS = \ backupable_db_test \ document_db_test \ json_document_test \ + spatial_db_test \ version_edit_test \ version_set_test \ file_indexer_test \ @@ -359,6 +360,9 @@ document_db_test: utilities/document/document_db_test.o $(LIBOBJECTS) $(TESTHARN json_document_test: utilities/document/json_document_test.o $(LIBOBJECTS) $(TESTHARNESS) $(CXX) utilities/document/json_document_test.o $(LIBOBJECTS) $(TESTHARNESS) $(EXEC_LDFLAGS) -o $@ $(LDFLAGS) $(COVERAGEFLAGS) +spatial_db_test: utilities/spatialdb/spatial_db_test.o $(LIBOBJECTS) $(TESTHARNESS) + $(CXX) utilities/spatialdb/spatial_db_test.o $(LIBOBJECTS) $(TESTHARNESS) $(EXEC_LDFLAGS) -o $@ $(LDFLAGS) $(COVERAGEFLAGS) + ttl_test: utilities/ttl/ttl_test.o $(LIBOBJECTS) $(TESTHARNESS) $(CXX) utilities/ttl/ttl_test.o $(LIBOBJECTS) $(TESTHARNESS) $(EXEC_LDFLAGS) -o $@ $(LDFLAGS) $(COVERAGEFLAGS) diff --git a/include/rocksdb/utilities/spatial_db.h b/include/rocksdb/utilities/spatial_db.h new file mode 100644 index 000000000..d5f994132 --- /dev/null +++ b/include/rocksdb/utilities/spatial_db.h @@ -0,0 +1,231 @@ +// Copyright (c) 2013, Facebook, Inc. All rights reserved. +// This source code is licensed under the BSD-style license found in the +// LICENSE file in the root directory of this source tree. An additional grant +// of patent rights can be found in the PATENTS file in the same directory. + +#pragma once +#ifndef ROCKSDB_LITE + +#include +#include + +#include "rocksdb/db.h" +#include "rocksdb/slice.h" +#include "rocksdb/utilities/stackable_db.h" + +namespace rocksdb { +namespace spatial { + +// NOTE: SpatialDB is experimental and we might change its API without warning. +// Please talk to us before developing against SpatialDB API. +// +// SpatialDB is a support for spatial indexes built on top of RocksDB. +// When creating a new SpatialDB, clients specifies a list of spatial indexes to +// build on their data. Each spatial index is defined by the area and +// granularity. If you're storing map data, different spatial index +// granularities can be used for different zoom levels. +// +// Each element inserted into SpatialDB has: +// * a bounding box, which determines how will the element be indexed +// * string blob, which will usually be WKB representation of the polygon +// (http://en.wikipedia.org/wiki/Well-known_text) +// * feature set, which is a map of key-value pairs, where value can be null, +// int, double, bool, string +// * a list of indexes to insert the element in +// +// Each query is executed on a single spatial index. Query guarantees that it +// will return all elements intersecting the specified bounding box, but it +// might also return some extra non-intersecting elements. + +// Variant is a class that can be many things: null, bool, int, double or string +// It is used to store different value types in FeatureSet (see below) +struct Variant { + // Don't change the values here, they are persisted on disk + enum Type { + kNull = 0x0, + kBool = 0x1, + kInt = 0x2, + kDouble = 0x3, + kString = 0x4, + }; + + Variant() : type_(kNull) {} + /* implicit */ Variant(bool b) : type_(kBool) { data_.b = b; } + /* implicit */ Variant(uint64_t i) : type_(kInt) { data_.i = i; } + /* implicit */ Variant(double d) : type_(kDouble) { data_.d = d; } + /* implicit */ Variant(const std::string& s) : type_(kString) { + new (&data_.s) std::string(s); + } + + Variant(const Variant& v); + + ~Variant() { + if (type_ == kString) { + using std::string; + (&data_.s)->~string(); + } + } + + Type type() const { return type_; } + bool get_bool() const { return data_.b; } + uint64_t get_int() const { return data_.i; } + double get_double() const { return data_.d; } + const std::string& get_string() const { return data_.s; } + + bool operator==(const Variant& other); + bool operator!=(const Variant& other); + + private: + Type type_; + union Data { + Data() {} + ~Data() {} + bool b; + uint64_t i; + double d; + std::string s; + } data_; +}; + +// FeatureSet is a map of key-value pairs. One feature set is associated with +// each element in SpatialDB. It can be used to add rich data about the element. +class FeatureSet { + private: + typedef std::unordered_map map; + + public: + class iterator { + public: + /* implicit */ iterator(const map::const_iterator itr) : itr_(itr) {} + iterator& operator++() { + ++itr_; + return *this; + } + bool operator!=(const iterator& other) { return itr_ != other.itr_; } + bool operator==(const iterator& other) { return itr_ == other.itr_; } + map::value_type operator*() { return *itr_; } + + private: + map::const_iterator itr_; + }; + FeatureSet() = default; + + FeatureSet* Set(const std::string& key, const Variant& value); + bool Contains(const std::string& key) const; + // REQUIRES: Contains(key) + const Variant& Get(const std::string& key) const; + iterator Find(const std::string& key) const; + + iterator begin() const { return map_.begin(); } + iterator end() const { return map_.end(); } + + void Clear(); + + void Serialize(std::string* output) const; + // REQUIRED: empty FeatureSet + bool Deserialize(const Slice& input); + + private: + map map_; +}; + +// BoundingBox is a helper structure for defining rectangles representing +// bounding boxes of spatial elements. +template +struct BoundingBox { + T min_x, min_y, max_x, max_y; + BoundingBox() = default; + BoundingBox(T _min_x, T _min_y, T _max_x, T _max_y) + : min_x(_min_x), min_y(_min_y), max_x(_max_x), max_y(_max_y) {} + + bool Intersects(const BoundingBox& a) const { + return !(min_x > a.max_x || min_y > a.max_y || a.min_x > max_x || + a.min_y > max_y); + } +}; + +struct SpatialDBOptions { + uint64_t cache_size = 1 * 1024 * 1024 * 1024LL; // 1GB + int num_threads = 16; + bool bulk_load = true; +}; + +// Cursor is used to return data from the query to the client. To get all the +// data from the query, just call Next() while Valid() is true +class Cursor { + public: + Cursor() = default; + virtual ~Cursor() {} + + virtual bool Valid() const = 0; + // REQUIRES: Valid() + virtual void Next() = 0; + + // Lifetime of the underlying storage until the next call to Next() + // REQUIRES: Valid() + virtual const Slice blob() = 0; + // Lifetime of the underlying storage until the next call to Next() + // REQUIRES: Valid() + virtual const FeatureSet& feature_set() = 0; + + virtual Status status() const = 0; + + private: + // No copying allowed + Cursor(const Cursor&); + void operator=(const Cursor&); +}; + +// SpatialIndexOptions defines a spatial index that will be built on the data +struct SpatialIndexOptions { + // Spatial indexes are referenced by names + std::string name; + // An area that is indexed. If the element is not intersecting with spatial + // index's bbox, it will not be inserted into the index + BoundingBox bbox; + // tile_bits control the granularity of the spatial index. Each dimension of + // the bbox will be split into (1 << tile_bits) tiles, so there will be a + // total of (1 << tile_bits)^2 tiles. It is recommended to configure a size of + // each tile to be approximately the size of the query on that spatial index + uint32_t tile_bits; + SpatialIndexOptions() {} + SpatialIndexOptions(const std::string& _name, + const BoundingBox& _bbox, uint32_t _tile_bits) + : name(_name), bbox(_bbox), tile_bits(_tile_bits) {} +}; + +class SpatialDB : public StackableDB { + public: + // Open the SpatialDB. List of spatial_indexes need to include all indexes + // that already exist in the DB (if the DB already exists). It can include new + // indexes, which will be created and initialized as empty (data will not be + // re-indexed). The resulting db object will be returned through db parameter. + // TODO(icanadi) read_only = true doesn't yet work because of #4743185 + static Status Open(const SpatialDBOptions& options, const std::string& name, + const std::vector& spatial_indexes, + SpatialDB** db, bool read_only = false); + + explicit SpatialDB(DB* db) : StackableDB(db) {} + + // Insert the element into the DB. Element will be inserted into specified + // spatial_indexes, based on specified bbox. + // REQUIRES: spatial_indexes.size() > 0 + virtual Status Insert(const WriteOptions& write_options, + const BoundingBox& bbox, const Slice& blob, + const FeatureSet& feature_set, + const std::vector& spatial_indexes) = 0; + + // Calling Compact() after inserting a bunch of elements should speed up + // reading. This is especially useful if you use SpatialDBOptions::bulk_load + virtual Status Compact() = 0; + + // Query the specified spatial_index. Query will return all elements that + // intersect bbox, but it may also return some extra elements. + virtual Cursor* Query(const ReadOptions& read_options, + const BoundingBox& bbox, + const std::string& spatial_index) = 0; +}; + +} // namespace spatial +} // namespace rocksdb +#endif // ROCKSDB_LITE diff --git a/utilities/spatialdb/spatial_db.cc b/utilities/spatialdb/spatial_db.cc new file mode 100644 index 000000000..4f75d48c9 --- /dev/null +++ b/utilities/spatialdb/spatial_db.cc @@ -0,0 +1,640 @@ +// Copyright (c) 2013, Facebook, Inc. All rights reserved. +// This source code is licensed under the BSD-style license found in the +// LICENSE file in the root directory of this source tree. An additional grant +// of patent rights can be found in the PATENTS file in the same directory. + +#ifndef ROCKSDB_LITE + +#include +#include +#include +#include + +#include "rocksdb/cache.h" +#include "rocksdb/db.h" +#include "rocksdb/utilities/stackable_db.h" +#include "rocksdb/utilities/spatial_db.h" +#include "util/coding.h" + +namespace rocksdb { +namespace spatial { + +Variant::Variant(const Variant& v) : type_(v.type_) { + switch (v.type_) { + case kNull: + break; + case kBool: + data_.b = v.data_.b; + break; + case kInt: + data_.i = v.data_.i; + break; + case kDouble: + data_.d = v.data_.d; + break; + case kString: + new (&data_.s) std::string(v.data_.s); + break; + default: + assert(false); + } +} + +bool Variant::operator==(const Variant& rhs) { + if (type_ != rhs.type_) { + return false; + } + + switch (type_) { + case kNull: + return true; + case kBool: + return data_.b == rhs.data_.b; + case kInt: + return data_.i == rhs.data_.i; + case kDouble: + return data_.d == rhs.data_.d; + case kString: + return data_.s == rhs.data_.s; + default: + assert(false); + } +} + +bool Variant::operator!=(const Variant& rhs) { return !(*this == rhs); } + +FeatureSet* FeatureSet::Set(const std::string& key, const Variant& value) { + map_.insert({key, value}); + return this; +} + +bool FeatureSet::Contains(const std::string& key) const { + return map_.find(key) != map_.end(); +} + +const Variant& FeatureSet::Get(const std::string& key) const { + auto itr = map_.find(key); + assert(itr != map_.end()); + return itr->second; +} + +FeatureSet::iterator FeatureSet::Find(const std::string& key) const { + return iterator(map_.find(key)); +} + +void FeatureSet::Clear() { map_.clear(); } + +void FeatureSet::Serialize(std::string* output) const { + for (const auto& iter : map_) { + PutLengthPrefixedSlice(output, iter.first); + output->push_back(static_cast(iter.second.type())); + switch (iter.second.type()) { + case Variant::kNull: + break; + case Variant::kBool: + output->push_back(static_cast(iter.second.get_bool())); + break; + case Variant::kInt: + PutVarint64(output, iter.second.get_int()); + break; + case Variant::kDouble: { + double d = iter.second.get_double(); + output->append(reinterpret_cast(&d), sizeof(double)); + break; + } + case Variant::kString: + PutLengthPrefixedSlice(output, iter.second.get_string()); + break; + default: + assert(false); + } + } +} + +bool FeatureSet::Deserialize(const Slice& input) { + assert(map_.empty()); + Slice s(input); + while (s.size()) { + Slice key; + if (!GetLengthPrefixedSlice(&s, &key) || s.size() == 0) { + return false; + } + char type = s[0]; + s.remove_prefix(1); + switch (type) { + case Variant::kNull: { + map_.insert({key.ToString(), Variant()}); + break; + } + case Variant::kBool: { + if (s.size() == 0) { + return false; + } + map_.insert({key.ToString(), Variant(static_cast(s[0]))}); + s.remove_prefix(1); + break; + } + case Variant::kInt: { + uint64_t v; + if (!GetVarint64(&s, &v)) { + return false; + } + map_.insert({key.ToString(), Variant(v)}); + break; + } + case Variant::kDouble: { + if (s.size() < sizeof(double)) { + return false; + } + double d; + memcpy(&d, s.data(), sizeof(double)); + map_.insert({key.ToString(), Variant(d)}); + s.remove_prefix(sizeof(double)); + break; + } + case Variant::kString: { + Slice str; + if (!GetLengthPrefixedSlice(&s, &str)) { + return false; + } + map_.insert({key.ToString(), str.ToString()}); + break; + } + default: + return false; + } + } + return true; +} + +namespace { +// indexing idea from http://msdn.microsoft.com/en-us/library/bb259689.aspx +inline uint64_t GetTileFromCoord(double x, double start, double end, + uint32_t tile_bits) { + if (x < start) { + return 0; + } + uint64_t tiles = static_cast(1) << tile_bits; + uint64_t r = ((x - start) / (end - start)) * tiles; + return std::min(r, tiles - 1); +} +inline uint64_t GetQuadKeyFromTile(uint64_t tile_x, uint64_t tile_y, + uint32_t tile_bits) { + uint64_t quad_key = 0; + for (uint32_t i = 0; i < tile_bits; ++i) { + uint32_t mask = (1LL << i); + quad_key |= (tile_x & mask) << i; + quad_key |= (tile_y & mask) << (i + 1); + } + return quad_key; +} +inline BoundingBox GetTileBoundingBox( + const SpatialIndexOptions& spatial_index, BoundingBox bbox) { + return BoundingBox( + GetTileFromCoord(bbox.min_x, spatial_index.bbox.min_x, + spatial_index.bbox.max_x, spatial_index.tile_bits), + GetTileFromCoord(bbox.min_y, spatial_index.bbox.min_y, + spatial_index.bbox.max_y, spatial_index.tile_bits), + GetTileFromCoord(bbox.max_x, spatial_index.bbox.min_x, + spatial_index.bbox.max_x, spatial_index.tile_bits), + GetTileFromCoord(bbox.max_y, spatial_index.bbox.min_y, + spatial_index.bbox.max_y, spatial_index.tile_bits)); +} + +// big endian can be compared using memcpy +inline void PutFixed64BigEndian(std::string* dst, uint64_t value) { + char buf[sizeof(value)]; + buf[0] = (value >> 56) & 0xff; + buf[1] = (value >> 48) & 0xff; + buf[2] = (value >> 40) & 0xff; + buf[3] = (value >> 32) & 0xff; + buf[4] = (value >> 24) & 0xff; + buf[5] = (value >> 16) & 0xff; + buf[6] = (value >> 8) & 0xff; + buf[7] = value & 0xff; + dst->append(buf, sizeof(buf)); +} +// big endian can be compared using memcpy +inline bool GetFixed64BigEndian(const Slice& input, uint64_t* value) { + if (input.size() < sizeof(uint64_t)) { + return false; + } + auto ptr = input.data(); + *value = (static_cast(static_cast(ptr[0])) << 56) | + (static_cast(static_cast(ptr[1])) << 48) | + (static_cast(static_cast(ptr[2])) << 40) | + (static_cast(static_cast(ptr[3])) << 32) | + (static_cast(static_cast(ptr[4])) << 24) | + (static_cast(static_cast(ptr[5])) << 16) | + (static_cast(static_cast(ptr[6])) << 8) | + static_cast(static_cast(ptr[7])); + return true; +} + +} // namespace + +class SpatialIndexCursor : public Cursor { + public: + SpatialIndexCursor(Iterator* spatial_iterator, Iterator* data_iterator, + const BoundingBox& tile_bbox, uint32_t tile_bits) + : spatial_iterator_(spatial_iterator), + data_iterator_(data_iterator), + tile_bbox_(tile_bbox), + tile_bits_(tile_bits), + valid_(true) { + current_x_ = tile_bbox.min_x; + current_y_ = tile_bbox.min_y; + UpdateQuadKey(); + ReSeek(); + if (valid_) { + // this is the first ID returned, so I don't care about return value of + // Dedup + Dedup(); + } + if (valid_) { + ExtractData(); + } + } + + virtual bool Valid() const override { return valid_; } + + virtual void Next() override { + assert(valid_); + + // this do-while loop deals only with deduplication + do { + spatial_iterator_->Next(); + if (ExtractID()) { + // OK, found what we needed + continue; + } + + // move to the next tile + Increment(); + + if (ExtractID()) { + // no need to reseek, found what we needed + continue; + } + // reseek, find next good tile + ReSeek(); + } while (valid_ && !Dedup() && valid_); + + if (valid_) { + ExtractData(); + } + } + + virtual const Slice blob() override { return current_blob_; } + virtual const FeatureSet& feature_set() override { + return current_feature_set_; + } + + virtual Status status() const override { + if (!status_.ok()) { + return status_; + } + if (!spatial_iterator_->status().ok()) { + return spatial_iterator_->status(); + } + return data_iterator_->status(); + } + + private: + // returns true if OK, false if already returned (duplicate) + bool Dedup() { + assert(valid_); + uint64_t id; + bool ok = GetFixed64BigEndian(current_id_, &id); + if (!ok) { + valid_ = false; + status_ = Status::Corruption("Spatial index corruption"); + return false; + } + if (returned_ids_.find(id) != returned_ids_.end()) { + return false; + } + returned_ids_.insert(id); + return true; + } + void ReSeek() { + while (valid_) { + spatial_iterator_->Seek(current_quad_key_); + if (ExtractID()) { + // found what we're looking for! + break; + } + Increment(); + } + } + + void Increment() { + ++current_x_; + if (current_x_ > tile_bbox_.max_x) { + current_x_ = tile_bbox_.min_x; + ++current_y_; + } + if (current_y_ > tile_bbox_.max_y) { + valid_ = false; + } else { + UpdateQuadKey(); + } + } + void UpdateQuadKey() { + current_quad_key_.clear(); + PutFixed64BigEndian(¤t_quad_key_, + GetQuadKeyFromTile(current_x_, current_y_, tile_bits_)); + } + // * returns true if spatial iterator is on the current quad key and all is + // well. Caller will call Next() to get new data + // * returns false if spatial iterator is not on current, or invalid or status + // bad. Caller will need to reseek to get new data + bool ExtractID() { + if (!spatial_iterator_->Valid()) { + // caller needs to reseek + return false; + } + if (spatial_iterator_->key().size() != 2 * sizeof(uint64_t)) { + status_ = Status::Corruption("Invalid spatial index key"); + valid_ = false; + return false; + } + Slice quad_key(spatial_iterator_->key().data(), sizeof(uint64_t)); + if (quad_key != current_quad_key_) { + // caller needs to reseek + return false; + } + // if we come to here, we have found the quad key + current_id_ = Slice(spatial_iterator_->key().data() + sizeof(uint64_t), + sizeof(uint64_t)); + return true; + } + // doesn't return anything, but sets valid_ and status_ on corruption + void ExtractData() { + assert(valid_); + data_iterator_->Seek(current_id_); + + if (!data_iterator_->Valid() || data_iterator_->key() != current_id_) { + status_ = Status::Corruption("Inconsistency in data column family"); + valid_ = false; + return; + } + + Slice data = data_iterator_->value(); + current_feature_set_.Clear(); + if (!GetLengthPrefixedSlice(&data, ¤t_blob_) || + !current_feature_set_.Deserialize(data)) { + status_ = Status::Corruption("Data column family corruption"); + valid_ = false; + return; + } + } + + unique_ptr spatial_iterator_; + unique_ptr data_iterator_; + BoundingBox tile_bbox_; + uint32_t tile_bits_; + uint64_t current_x_; + uint64_t current_y_; + std::string current_quad_key_; + Slice current_id_; + bool valid_; + Status status_; + + FeatureSet current_feature_set_; + Slice current_blob_; + + // used for deduplicating results + std::set returned_ids_; +}; + +class ErrorCursor : public Cursor { + public: + explicit ErrorCursor(Status s) : s_(s) { assert(!s.ok()); } + virtual Status status() const override { return s_; } + virtual bool Valid() const override { return false; } + virtual void Next() override { assert(false); } + + virtual const Slice blob() override { + assert(false); + return Slice(); + } + virtual const FeatureSet& feature_set() override { + assert(false); + // compiler complains otherwise + return trash_; + } + + private: + Status s_; + FeatureSet trash_; +}; + +// Column families are used to store element's data and spatial indexes. We use +// [default] column family to store the element data. This is the format of +// [default] column family: +// * id (fixed 64 big endian) -> blob (length prefixed slice) feature_set +// (serialized) +// We have one additional column family for each spatial index. The name of the +// column family is [spatial$]. The format is: +// * quad_key (fixed 64 bit big endian) id (fixed 64 bit big endian) -> "" +class SpatialDBImpl : public SpatialDB { + public: + // * db -- base DB that needs to be forwarded to StackableDB + // * data_column_family -- column family used to store the data + // * spatial_indexes -- a list of spatial indexes together with column + // families that correspond to those spatial indexes + // * next_id -- next ID in auto-incrementing ID. This is usually + // `max_id_currenty_in_db + 1` + SpatialDBImpl(DB* db, ColumnFamilyHandle* data_column_family, + const std::vector< + std::pair> + spatial_indexes, + uint64_t next_id) + : SpatialDB(db), + data_column_family_(data_column_family), + next_id_(next_id) { + for (const auto& index : spatial_indexes) { + name_to_index_.insert( + {index.first.name, IndexColumnFamily(index.first, index.second)}); + } + } + + ~SpatialDBImpl() { + for (auto& iter : name_to_index_) { + delete iter.second.column_family; + } + delete data_column_family_; + } + + virtual Status Insert( + const WriteOptions& write_options, const BoundingBox& bbox, + const Slice& blob, const FeatureSet& feature_set, + const std::vector& spatial_indexes) override { + WriteBatch batch; + + if (spatial_indexes.size() == 0) { + return Status::InvalidArgument("Spatial indexes can't be empty"); + } + + uint64_t id = next_id_.fetch_add(1); + + for (const auto& si : spatial_indexes) { + auto itr = name_to_index_.find(si); + if (itr == name_to_index_.end()) { + return Status::InvalidArgument("Can't find index " + si); + } + const auto& spatial_index = itr->second.index; + if (!spatial_index.bbox.Intersects(bbox)) { + continue; + } + BoundingBox tile_bbox = GetTileBoundingBox(spatial_index, bbox); + + for (uint64_t x = tile_bbox.min_x; x <= tile_bbox.max_x; ++x) { + for (uint64_t y = tile_bbox.min_y; y <= tile_bbox.max_y; ++y) { + // see above for format + std::string key; + PutFixed64BigEndian( + &key, GetQuadKeyFromTile(x, y, spatial_index.tile_bits)); + PutFixed64BigEndian(&key, id); + batch.Put(itr->second.column_family, key, Slice()); + } + } + } + + // see above for format + std::string data_key; + PutFixed64BigEndian(&data_key, id); + std::string data_value; + PutLengthPrefixedSlice(&data_value, blob); + feature_set.Serialize(&data_value); + batch.Put(data_column_family_, data_key, data_value); + + return Write(write_options, &batch); + } + + virtual Status Compact() override { + Status s, t; + for (auto& iter : name_to_index_) { + t = CompactRange(iter.second.column_family, nullptr, nullptr); + if (!t.ok()) { + s = t; + } + } + t = CompactRange(data_column_family_, nullptr, nullptr); + if (!t.ok()) { + s = t; + } + return s; + } + + virtual Cursor* Query(const ReadOptions& read_options, + const BoundingBox& bbox, + const std::string& spatial_index) override { + auto itr = name_to_index_.find(spatial_index); + if (itr == name_to_index_.end()) { + return new ErrorCursor(Status::InvalidArgument( + "Spatial index " + spatial_index + " not found")); + } + + std::vector iterators; + Status s = NewIterators(read_options, + {data_column_family_, itr->second.column_family}, + &iterators); + if (!s.ok()) { + return new ErrorCursor(s); + } + + const auto& si = itr->second.index; + return new SpatialIndexCursor(iterators[1], iterators[0], + GetTileBoundingBox(si, bbox), si.tile_bits); + } + + private: + ColumnFamilyHandle* data_column_family_; + struct IndexColumnFamily { + SpatialIndexOptions index; + ColumnFamilyHandle* column_family; + IndexColumnFamily(const SpatialIndexOptions& _index, + ColumnFamilyHandle* _cf) + : index(_index), column_family(_cf) {} + }; + // constant after construction! + std::unordered_map name_to_index_; + + std::atomic next_id_; +}; + +namespace { +Options GetRocksDBOptionsFromOptions(const SpatialDBOptions& options) { + Options rocksdb_options; + rocksdb_options.OptimizeLevelStyleCompaction(); + rocksdb_options.IncreaseParallelism(options.num_threads); + rocksdb_options.block_cache = NewLRUCache(options.cache_size); + if (options.bulk_load) { + rocksdb_options.PrepareForBulkLoad(); + } + return rocksdb_options; +} +} // namespace + +Status SpatialDB::Open(const SpatialDBOptions& options, const std::string& name, + const std::vector& spatial_indexes, + SpatialDB** db, bool read_only) { + Options rocksdb_options = GetRocksDBOptionsFromOptions(options); + rocksdb_options.create_if_missing = true; + rocksdb_options.create_missing_column_families = true; + + std::vector column_families; + column_families.push_back(ColumnFamilyDescriptor( + kDefaultColumnFamilyName, ColumnFamilyOptions(rocksdb_options))); + + for (const auto& index : spatial_indexes) { + column_families.emplace_back("spatial$" + index.name, + ColumnFamilyOptions(rocksdb_options)); + } + std::vector handles; + DB* base_db; + Status s; + if (read_only) { + s = DB::OpenForReadOnly(DBOptions(rocksdb_options), name, column_families, + &handles, &base_db); + } else { + s = DB::Open(DBOptions(rocksdb_options), name, column_families, &handles, + &base_db); + } + if (!s.ok()) { + return s; + } + + std::vector> + index_cf; + assert(handles.size() == spatial_indexes.size() + 1); + for (size_t i = 0; i < spatial_indexes.size(); ++i) { + index_cf.emplace_back(spatial_indexes[i], handles[i + 1]); + } + uint64_t next_id; + { + // find next_id + Iterator* iter = base_db->NewIterator(ReadOptions(), handles[0]); + iter->SeekToLast(); + if (iter->Valid()) { + uint64_t last_id; + bool ok = GetFixed64BigEndian(iter->key(), &last_id); + if (!ok) { + return Status::Corruption("Invalid key in data column family"); + } + next_id = last_id + 1; + } else { + next_id = 1; + } + delete iter; + } + + *db = new SpatialDBImpl(base_db, handles[0], index_cf, next_id); + return Status::OK(); +} + +} // namespace spatial +} // namespace rocksdb +#endif // ROCKSDB_LITE diff --git a/utilities/spatialdb/spatial_db_test.cc b/utilities/spatialdb/spatial_db_test.cc new file mode 100644 index 000000000..b4d5c23cc --- /dev/null +++ b/utilities/spatialdb/spatial_db_test.cc @@ -0,0 +1,265 @@ +// Copyright (c) 2013, Facebook, Inc. All rights reserved. +// This source code is licensed under the BSD-style license found in the +// LICENSE file in the root directory of this source tree. An additional grant +// of patent rights can be found in the PATENTS file in the same directory. + +#include +#include +#include + +#include "rocksdb/utilities/spatial_db.h" +#include "util/testharness.h" +#include "util/testutil.h" +#include "util/random.h" + +namespace rocksdb { +namespace spatial { + +class SpatialDBTest { + public: + SpatialDBTest() { + dbname_ = test::TmpDir() + "/spatial_db_test"; + DestroyDB(dbname_, Options()); + } + + void AssertCursorResults(BoundingBox bbox, const std::string& index, + const std::vector& blobs) { + Cursor* c = db_->Query(ReadOptions(), bbox, index); + ASSERT_OK(c->status()); + std::multiset b; + for (auto x : blobs) { + b.insert(x); + } + + while (c->Valid()) { + auto itr = b.find(c->blob().ToString()); + ASSERT_TRUE(itr != b.end()); + b.erase(itr); + c->Next(); + } + ASSERT_EQ(b.size(), 0U); + ASSERT_OK(c->status()); + delete c; + } + + std::string dbname_; + SpatialDB* db_; +}; + +TEST(SpatialDBTest, FeatureSetSerializeTest) { + FeatureSet fs; + + fs.Set("a", std::string("b")); + fs.Set("x", static_cast(3)); + fs.Set("y", false); + fs.Set("n", Variant()); // null + fs.Set("m", 3.25); + + ASSERT_TRUE(fs.Find("w") == fs.end()); + ASSERT_TRUE(fs.Find("x") != fs.end()); + ASSERT_TRUE((*fs.Find("x")).second == Variant(static_cast(3))); + ASSERT_TRUE((*fs.Find("y")).second != Variant(true)); + std::set keys({"a", "x", "y", "n", "m"}); + for (const auto& x : fs) { + ASSERT_TRUE(keys.find(x.first) != keys.end()); + keys.erase(x.first); + } + ASSERT_EQ(keys.size(), 0U); + + std::string serialized; + fs.Serialize(&serialized); + + FeatureSet deserialized; + ASSERT_TRUE(deserialized.Deserialize(serialized)); + + ASSERT_TRUE(deserialized.Contains("a")); + ASSERT_EQ(deserialized.Get("a").type(), Variant::kString); + ASSERT_EQ(deserialized.Get("a").get_string(), "b"); + ASSERT_TRUE(deserialized.Contains("x")); + ASSERT_EQ(deserialized.Get("x").type(), Variant::kInt); + ASSERT_EQ(deserialized.Get("x").get_int(), static_cast(3)); + ASSERT_TRUE(deserialized.Contains("y")); + ASSERT_EQ(deserialized.Get("y").type(), Variant::kBool); + ASSERT_EQ(deserialized.Get("y").get_bool(), false); + ASSERT_TRUE(deserialized.Contains("n")); + ASSERT_EQ(deserialized.Get("n").type(), Variant::kNull); + ASSERT_TRUE(deserialized.Contains("m")); + ASSERT_EQ(deserialized.Get("m").type(), Variant::kDouble); + ASSERT_EQ(deserialized.Get("m").get_double(), 3.25); + + // corrupted serialization + serialized = serialized.substr(0, serialized.size() - 3); + deserialized.Clear(); + ASSERT_TRUE(!deserialized.Deserialize(serialized)); +} + +TEST(SpatialDBTest, TestNextID) { + ASSERT_OK(SpatialDB::Open( + SpatialDBOptions(), dbname_, + {SpatialIndexOptions("simple", BoundingBox(0, 0, 100, 100), 2)}, + &db_)); + + ASSERT_OK(db_->Insert(WriteOptions(), BoundingBox(5, 5, 10, 10), + "one", FeatureSet(), {"simple"})); + ASSERT_OK(db_->Insert(WriteOptions(), BoundingBox(10, 10, 15, 15), + "two", FeatureSet(), {"simple"})); + delete db_; + + ASSERT_OK(SpatialDB::Open( + SpatialDBOptions(), dbname_, + {SpatialIndexOptions("simple", BoundingBox(0, 0, 100, 100), 2)}, + &db_)); + + ASSERT_OK(db_->Insert(WriteOptions(), BoundingBox(55, 55, 65, 65), + "three", FeatureSet(), {"simple"})); + + delete db_; + + ASSERT_OK(SpatialDB::Open( + SpatialDBOptions(), dbname_, + {SpatialIndexOptions("simple", BoundingBox(0, 0, 100, 100), 2)}, + &db_)); + + AssertCursorResults(BoundingBox(0, 0, 100, 100), "simple", + {"one", "two", "three"}); + + delete db_; +} + +TEST(SpatialDBTest, FeatureSetTest) { + ASSERT_OK(SpatialDB::Open( + SpatialDBOptions(), dbname_, + {SpatialIndexOptions("simple", BoundingBox(0, 0, 100, 100), 2)}, + &db_)); + + FeatureSet fs; + fs.Set("a", std::string("b")); + fs.Set("c", std::string("d")); + + ASSERT_OK(db_->Insert(WriteOptions(), BoundingBox(5, 5, 10, 10), + "one", fs, {"simple"})); + + Cursor* c = + db_->Query(ReadOptions(), BoundingBox(5, 5, 10, 10), "simple"); + + ASSERT_TRUE(c->Valid()); + ASSERT_EQ(c->blob().compare("one"), 0); + FeatureSet returned = c->feature_set(); + ASSERT_TRUE(returned.Contains("a")); + ASSERT_TRUE(!returned.Contains("b")); + ASSERT_TRUE(returned.Contains("c")); + ASSERT_EQ(returned.Get("a").type(), Variant::kString); + ASSERT_EQ(returned.Get("a").get_string(), "b"); + ASSERT_EQ(returned.Get("c").type(), Variant::kString); + ASSERT_EQ(returned.Get("c").get_string(), "d"); + + c->Next(); + ASSERT_TRUE(!c->Valid()); + + delete c; + delete db_; +} + +TEST(SpatialDBTest, SimpleTest) { + ASSERT_OK(SpatialDB::Open( + SpatialDBOptions(), dbname_, + {SpatialIndexOptions("index", BoundingBox(0, 0, 128, 128), 3)}, + &db_)); + + ASSERT_OK(db_->Insert(WriteOptions(), BoundingBox(33, 17, 63, 79), + "one", FeatureSet(), {"index"})); + ASSERT_OK(db_->Insert(WriteOptions(), BoundingBox(65, 65, 111, 111), + "two", FeatureSet(), {"index"})); + ASSERT_OK(db_->Insert(WriteOptions(), BoundingBox(1, 49, 127, 63), + "three", FeatureSet(), {"index"})); + ASSERT_OK(db_->Insert(WriteOptions(), BoundingBox(20, 100, 21, 101), + "four", FeatureSet(), {"index"})); + ASSERT_OK(db_->Insert(WriteOptions(), BoundingBox(81, 33, 127, 63), + "five", FeatureSet(), {"index"})); + ASSERT_OK(db_->Insert(WriteOptions(), BoundingBox(1, 65, 47, 95), + "six", FeatureSet(), {"index"})); + + AssertCursorResults(BoundingBox(33, 17, 47, 31), "index", {"one"}); + AssertCursorResults(BoundingBox(17, 33, 79, 63), "index", + {"one", "three"}); + AssertCursorResults(BoundingBox(17, 81, 63, 111), "index", + {"four", "six"}); + AssertCursorResults(BoundingBox(85, 86, 85, 86), "index", {"two"}); + AssertCursorResults(BoundingBox(33, 1, 127, 111), "index", + {"one", "two", "three", "five", "six"}); + // even though the bounding box doesn't intersect, we got "four" back because + // it's in the same tile + AssertCursorResults(BoundingBox(18, 98, 19, 99), "index", {"four"}); + AssertCursorResults(BoundingBox(130, 130, 131, 131), "index", {}); + AssertCursorResults(BoundingBox(81, 17, 127, 31), "index", {}); + AssertCursorResults(BoundingBox(90, 50, 91, 51), "index", + {"three", "five"}); + + delete db_; +} + +namespace { +std::string RandomStr(Random* rnd) { + std::string r; + for (int k = 0; k < 10; ++k) { + r.push_back(rnd->Uniform(26) + 'a'); + } + return r; +} + +BoundingBox RandomBoundingBox(int limit, Random* rnd, int max_size) { + BoundingBox r; + r.min_x = rnd->Uniform(limit - 1); + r.min_y = rnd->Uniform(limit - 1); + r.max_x = r.min_x + rnd->Uniform(std::min(limit - 1 - r.min_x, max_size)) + 1; + r.max_y = r.min_y + rnd->Uniform(std::min(limit - 1 - r.min_y, max_size)) + 1; + return r; +} + +BoundingBox ScaleBB(BoundingBox b, double step) { + return BoundingBox(b.min_x * step + 1, b.min_y * step + 1, + (b.max_x + 1) * step - 1, + (b.max_y + 1) * step - 1); +} + +} // namespace + +TEST(SpatialDBTest, RandomizedTest) { + Random rnd(301); + std::vector>> elements; + + BoundingBox spatial_index_bounds(0, 0, (1LL << 32), (1LL << 32)); + ASSERT_OK(SpatialDB::Open( + SpatialDBOptions(), dbname_, + {SpatialIndexOptions("index", spatial_index_bounds, 7)}, &db_)); + double step = (1LL << 32) / (1 << 7); + + for (int i = 0; i < 1000; ++i) { + std::string blob = RandomStr(&rnd); + BoundingBox bbox = RandomBoundingBox(128, &rnd, 10); + ASSERT_OK(db_->Insert(WriteOptions(), ScaleBB(bbox, step), blob, + FeatureSet(), {"index"})); + elements.push_back(make_pair(blob, bbox)); + } + + db_->Compact(); + + for (int i = 0; i < 1000; ++i) { + BoundingBox int_bbox = RandomBoundingBox(128, &rnd, 10); + BoundingBox double_bbox = ScaleBB(int_bbox, step); + std::vector blobs; + for (auto e : elements) { + if (e.second.Intersects(int_bbox)) { + blobs.push_back(e.first); + } + } + AssertCursorResults(double_bbox, "index", blobs); + } + + delete db_; +} + +} // namespace spatial +} // namespace rocksdb + +int main(int argc, char** argv) { return rocksdb::test::RunAllTests(); } -- GitLab