diff --git a/db/memtable.cc b/db/memtable.cc index c0166bb400476678b3a5b40af8b6955fc379d274..51b54d6368be3f1265dc48cfa283eb356658f435 100644 --- a/db/memtable.cc +++ b/db/memtable.cc @@ -428,7 +428,7 @@ FragmentedRangeTombstoneIterator* MemTable::NewRangeTombstoneIterator( comparator_.comparator); auto* fragmented_iter = new FragmentedRangeTombstoneIterator( - fragmented_tombstone_list, read_seq, comparator_.comparator); + fragmented_tombstone_list, comparator_.comparator, read_seq); return fragmented_iter; } diff --git a/db/range_del_aggregator_bench.cc b/db/range_del_aggregator_bench.cc index 01974702b70e2acf4cdff774c869ef0e63ce0277..9fdcefc39202bb0e0368f7bcf1e68cff81427043 100644 --- a/db/range_del_aggregator_bench.cc +++ b/db/range_del_aggregator_bench.cc @@ -220,8 +220,8 @@ int main(int argc, char** argv) { std::unique_ptr fragmented_range_del_iter( new rocksdb::FragmentedRangeTombstoneIterator( - fragmented_range_tombstone_lists.back().get(), - rocksdb::kMaxSequenceNumber, icmp)); + fragmented_range_tombstone_lists.back().get(), icmp, + rocksdb::kMaxSequenceNumber)); if (FLAGS_use_v2_aggregator) { rocksdb::StopWatchNano stop_watch_add_tombstones( diff --git a/db/range_del_aggregator_v2_test.cc b/db/range_del_aggregator_v2_test.cc index 576d3339edc05c4fc056925b7628ae68be239098..79cb548b1f7862f107408d77f78eeceede69316b 100644 --- a/db/range_del_aggregator_v2_test.cc +++ b/db/range_del_aggregator_v2_test.cc @@ -173,8 +173,8 @@ TEST_F(RangeDelAggregatorV2Test, EmptyTruncatedIter) { FragmentedRangeTombstoneList fragment_list(std::move(range_del_iter), bytewise_icmp); std::unique_ptr input_iter( - new FragmentedRangeTombstoneIterator(&fragment_list, kMaxSequenceNumber, - bytewise_icmp)); + new FragmentedRangeTombstoneIterator(&fragment_list, bytewise_icmp, + kMaxSequenceNumber)); TruncatedRangeDelIterator iter(std::move(input_iter), &bytewise_icmp, nullptr, nullptr); @@ -192,8 +192,8 @@ TEST_F(RangeDelAggregatorV2Test, UntruncatedIter) { FragmentedRangeTombstoneList fragment_list(std::move(range_del_iter), bytewise_icmp); std::unique_ptr input_iter( - new FragmentedRangeTombstoneIterator(&fragment_list, kMaxSequenceNumber, - bytewise_icmp)); + new FragmentedRangeTombstoneIterator(&fragment_list, bytewise_icmp, + kMaxSequenceNumber)); TruncatedRangeDelIterator iter(std::move(input_iter), &bytewise_icmp, nullptr, nullptr); @@ -226,8 +226,8 @@ TEST_F(RangeDelAggregatorV2Test, UntruncatedIterWithSnapshot) { FragmentedRangeTombstoneList fragment_list(std::move(range_del_iter), bytewise_icmp); std::unique_ptr input_iter( - new FragmentedRangeTombstoneIterator(&fragment_list, 9 /* snapshot */, - bytewise_icmp)); + new FragmentedRangeTombstoneIterator(&fragment_list, bytewise_icmp, + 9 /* snapshot */)); TruncatedRangeDelIterator iter(std::move(input_iter), &bytewise_icmp, nullptr, nullptr); @@ -259,8 +259,8 @@ TEST_F(RangeDelAggregatorV2Test, TruncatedIter) { FragmentedRangeTombstoneList fragment_list(std::move(range_del_iter), bytewise_icmp); std::unique_ptr input_iter( - new FragmentedRangeTombstoneIterator(&fragment_list, kMaxSequenceNumber, - bytewise_icmp)); + new FragmentedRangeTombstoneIterator(&fragment_list, bytewise_icmp, + kMaxSequenceNumber)); InternalKey smallest("d", 7, kTypeValue); InternalKey largest("m", 9, kTypeValue); @@ -294,8 +294,8 @@ TEST_F(RangeDelAggregatorV2Test, SingleIterInAggregator) { FragmentedRangeTombstoneList fragment_list(std::move(range_del_iter), bytewise_icmp); std::unique_ptr input_iter( - new FragmentedRangeTombstoneIterator(&fragment_list, kMaxSequenceNumber, - bytewise_icmp)); + new FragmentedRangeTombstoneIterator(&fragment_list, bytewise_icmp, + kMaxSequenceNumber)); RangeDelAggregatorV2 range_del_agg(&bytewise_icmp, kMaxSequenceNumber); range_del_agg.AddTombstones(std::move(input_iter)); @@ -321,8 +321,8 @@ TEST_F(RangeDelAggregatorV2Test, MultipleItersInAggregator) { RangeDelAggregatorV2 range_del_agg(&bytewise_icmp, kMaxSequenceNumber); for (const auto& fragment_list : fragment_lists) { std::unique_ptr input_iter( - new FragmentedRangeTombstoneIterator( - fragment_list.get(), kMaxSequenceNumber, bytewise_icmp)); + new FragmentedRangeTombstoneIterator(fragment_list.get(), bytewise_icmp, + kMaxSequenceNumber)); range_del_agg.AddTombstones(std::move(input_iter)); } @@ -353,8 +353,8 @@ TEST_F(RangeDelAggregatorV2Test, MultipleItersInAggregatorWithUpperBound) { RangeDelAggregatorV2 range_del_agg(&bytewise_icmp, 19); for (const auto& fragment_list : fragment_lists) { std::unique_ptr input_iter( - new FragmentedRangeTombstoneIterator(fragment_list.get(), - 19 /* snapshot */, bytewise_icmp)); + new FragmentedRangeTombstoneIterator(fragment_list.get(), bytewise_icmp, + 19 /* snapshot */)); range_del_agg.AddTombstones(std::move(input_iter)); } @@ -392,8 +392,8 @@ TEST_F(RangeDelAggregatorV2Test, MultipleTruncatedItersInAggregator) { const auto& fragment_list = fragment_lists[i]; const auto& bounds = iter_bounds[i]; std::unique_ptr input_iter( - new FragmentedRangeTombstoneIterator(fragment_list.get(), - 19 /* snapshot */, bytewise_icmp)); + new FragmentedRangeTombstoneIterator(fragment_list.get(), bytewise_icmp, + 19 /* snapshot */)); range_del_agg.AddTombstones(std::move(input_iter), &bounds.first, &bounds.second); } @@ -432,7 +432,7 @@ TEST_F(RangeDelAggregatorV2Test, MultipleTruncatedItersInAggregatorSameLevel) { auto add_iter_to_agg = [&](size_t i) { std::unique_ptr input_iter( new FragmentedRangeTombstoneIterator(fragment_lists[i].get(), - 19 /* snapshot */, bytewise_icmp)); + bytewise_icmp, 19 /* snapshot */)); range_del_agg.AddTombstones(std::move(input_iter), &iter_bounds[i].first, &iter_bounds[i].second); }; diff --git a/db/range_tombstone_fragmenter.cc b/db/range_tombstone_fragmenter.cc index 4137f25cf03909b298e0e0dfab67675745dd27bf..1748c54306d11cc485ccd50b799164511246cf49 100644 --- a/db/range_tombstone_fragmenter.cc +++ b/db/range_tombstone_fragmenter.cc @@ -20,7 +20,8 @@ namespace rocksdb { FragmentedRangeTombstoneList::FragmentedRangeTombstoneList( std::unique_ptr unfragmented_tombstones, - const InternalKeyComparator& icmp) { + const InternalKeyComparator& icmp, bool for_compaction, + const std::vector& snapshots) { if (unfragmented_tombstones == nullptr) { return; } @@ -43,7 +44,8 @@ FragmentedRangeTombstoneList::FragmentedRangeTombstoneList( } } if (is_sorted) { - FragmentTombstones(std::move(unfragmented_tombstones), icmp); + FragmentTombstones(std::move(unfragmented_tombstones), icmp, for_compaction, + snapshots); return; } @@ -61,12 +63,13 @@ FragmentedRangeTombstoneList::FragmentedRangeTombstoneList( // VectorIterator implicitly sorts by key during construction. auto iter = std::unique_ptr( new VectorIterator(std::move(keys), std::move(values), &icmp)); - FragmentTombstones(std::move(iter), icmp); + FragmentTombstones(std::move(iter), icmp, for_compaction, snapshots); } void FragmentedRangeTombstoneList::FragmentTombstones( std::unique_ptr unfragmented_tombstones, - const InternalKeyComparator& icmp) { + const InternalKeyComparator& icmp, bool for_compaction, + const std::vector& snapshots) { Slice cur_start_key(nullptr, 0); auto cmp = ParsedInternalKeyComparator(&icmp); @@ -117,10 +120,38 @@ void FragmentedRangeTombstoneList::FragmentTombstones( } std::sort(seqnums_to_flush.begin(), seqnums_to_flush.end(), std::greater()); + size_t start_idx = tombstone_seqs_.size(); size_t end_idx = start_idx + seqnums_to_flush.size(); - tombstone_seqs_.insert(tombstone_seqs_.end(), seqnums_to_flush.begin(), - seqnums_to_flush.end()); + + if (for_compaction) { + // Drop all tombstone seqnums that are not preserved by a snapshot. + SequenceNumber next_snapshot = kMaxSequenceNumber; + for (auto seq : seqnums_to_flush) { + if (seq <= next_snapshot) { + // This seqnum is visible by a lower snapshot. + tombstone_seqs_.push_back(seq); + seq_set_.insert(seq); + auto upper_bound_it = + std::lower_bound(snapshots.begin(), snapshots.end(), seq); + if (upper_bound_it == snapshots.begin()) { + // This seqnum is the topmost one visible by the earliest + // snapshot. None of the seqnums below it will be visible, so we + // can skip them. + break; + } + next_snapshot = *std::prev(upper_bound_it); + } + } + end_idx = tombstone_seqs_.size(); + } else { + // The fragmentation is being done for reads, so preserve all seqnums. + tombstone_seqs_.insert(tombstone_seqs_.end(), seqnums_to_flush.begin(), + seqnums_to_flush.end()); + seq_set_.insert(seqnums_to_flush.begin(), seqnums_to_flush.end()); + } + + assert(start_idx < end_idx); tombstones_.emplace_back(cur_start_key, cur_end_key, start_idx, end_idx); cur_start_key = cur_end_key; @@ -178,33 +209,41 @@ void FragmentedRangeTombstoneList::FragmentTombstones( } } +bool FragmentedRangeTombstoneList::ContainsRange(SequenceNumber lower, + SequenceNumber upper) const { + auto seq_it = seq_set_.lower_bound(lower); + return seq_it != seq_set_.end() && *seq_it <= upper; +} + FragmentedRangeTombstoneIterator::FragmentedRangeTombstoneIterator( - const FragmentedRangeTombstoneList* tombstones, SequenceNumber snapshot, - const InternalKeyComparator& icmp) + const FragmentedRangeTombstoneList* tombstones, + const InternalKeyComparator& icmp, SequenceNumber _upper_bound, + SequenceNumber _lower_bound) : tombstone_start_cmp_(icmp.user_comparator()), tombstone_end_cmp_(icmp.user_comparator()), + icmp_(&icmp), ucmp_(icmp.user_comparator()), tombstones_(tombstones), - snapshot_(snapshot) { + upper_bound_(_upper_bound), + lower_bound_(_lower_bound) { assert(tombstones_ != nullptr); - pos_ = tombstones_->end(); - pinned_pos_ = tombstones_->end(); + Invalidate(); } FragmentedRangeTombstoneIterator::FragmentedRangeTombstoneIterator( const std::shared_ptr& tombstones, - SequenceNumber snapshot, const InternalKeyComparator& icmp) + const InternalKeyComparator& icmp, SequenceNumber _upper_bound, + SequenceNumber _lower_bound) : tombstone_start_cmp_(icmp.user_comparator()), tombstone_end_cmp_(icmp.user_comparator()), + icmp_(&icmp), ucmp_(icmp.user_comparator()), tombstones_ref_(tombstones), tombstones_(tombstones_ref_.get()), - snapshot_(snapshot) { + upper_bound_(_upper_bound), + lower_bound_(_lower_bound) { assert(tombstones_ != nullptr); - pos_ = tombstones_->end(); - seq_pos_ = tombstones_->seq_end(); - pinned_pos_ = tombstones_->end(); - pinned_seq_pos_ = tombstones_->seq_end(); + Invalidate(); } void FragmentedRangeTombstoneIterator::SeekToFirst() { @@ -220,7 +259,7 @@ void FragmentedRangeTombstoneIterator::SeekToTopFirst() { pos_ = tombstones_->begin(); seq_pos_ = std::lower_bound(tombstones_->seq_iter(pos_->seq_start_idx), tombstones_->seq_iter(pos_->seq_end_idx), - snapshot_, std::greater()); + upper_bound_, std::greater()); ScanForwardToVisibleTombstone(); } @@ -237,7 +276,7 @@ void FragmentedRangeTombstoneIterator::SeekToTopLast() { pos_ = std::prev(tombstones_->end()); seq_pos_ = std::lower_bound(tombstones_->seq_iter(pos_->seq_start_idx), tombstones_->seq_iter(pos_->seq_end_idx), - snapshot_, std::greater()); + upper_bound_, std::greater()); ScanBackwardToVisibleTombstone(); } @@ -270,7 +309,7 @@ void FragmentedRangeTombstoneIterator::SeekToCoveringTombstone( } seq_pos_ = std::lower_bound(tombstones_->seq_iter(pos_->seq_start_idx), tombstones_->seq_iter(pos_->seq_end_idx), - snapshot_, std::greater()); + upper_bound_, std::greater()); } void FragmentedRangeTombstoneIterator::SeekForPrevToCoveringTombstone( @@ -289,25 +328,28 @@ void FragmentedRangeTombstoneIterator::SeekForPrevToCoveringTombstone( --pos_; seq_pos_ = std::lower_bound(tombstones_->seq_iter(pos_->seq_start_idx), tombstones_->seq_iter(pos_->seq_end_idx), - snapshot_, std::greater()); + upper_bound_, std::greater()); } void FragmentedRangeTombstoneIterator::ScanForwardToVisibleTombstone() { while (pos_ != tombstones_->end() && - seq_pos_ == tombstones_->seq_iter(pos_->seq_end_idx)) { + (seq_pos_ == tombstones_->seq_iter(pos_->seq_end_idx) || + *seq_pos_ < lower_bound_)) { ++pos_; if (pos_ == tombstones_->end()) { + Invalidate(); return; } seq_pos_ = std::lower_bound(tombstones_->seq_iter(pos_->seq_start_idx), tombstones_->seq_iter(pos_->seq_end_idx), - snapshot_, std::greater()); + upper_bound_, std::greater()); } } void FragmentedRangeTombstoneIterator::ScanBackwardToVisibleTombstone() { while (pos_ != tombstones_->end() && - seq_pos_ == tombstones_->seq_iter(pos_->seq_end_idx)) { + (seq_pos_ == tombstones_->seq_iter(pos_->seq_end_idx) || + *seq_pos_ < lower_bound_)) { if (pos_ == tombstones_->begin()) { Invalidate(); return; @@ -315,7 +357,7 @@ void FragmentedRangeTombstoneIterator::ScanBackwardToVisibleTombstone() { --pos_; seq_pos_ = std::lower_bound(tombstones_->seq_iter(pos_->seq_start_idx), tombstones_->seq_iter(pos_->seq_end_idx), - snapshot_, std::greater()); + upper_bound_, std::greater()); } } @@ -333,14 +375,13 @@ void FragmentedRangeTombstoneIterator::TopNext() { } seq_pos_ = std::lower_bound(tombstones_->seq_iter(pos_->seq_start_idx), tombstones_->seq_iter(pos_->seq_end_idx), - snapshot_, std::greater()); + upper_bound_, std::greater()); ScanForwardToVisibleTombstone(); } void FragmentedRangeTombstoneIterator::Prev() { if (seq_pos_ == tombstones_->seq_begin()) { - pos_ = tombstones_->end(); - seq_pos_ = tombstones_->seq_end(); + Invalidate(); return; } --seq_pos_; @@ -358,7 +399,7 @@ void FragmentedRangeTombstoneIterator::TopPrev() { --pos_; seq_pos_ = std::lower_bound(tombstones_->seq_iter(pos_->seq_start_idx), tombstones_->seq_iter(pos_->seq_end_idx), - snapshot_, std::greater()); + upper_bound_, std::greater()); ScanBackwardToVisibleTombstone(); } @@ -372,4 +413,27 @@ SequenceNumber FragmentedRangeTombstoneIterator::MaxCoveringTombstoneSeqnum( return ValidPos() && ucmp_->Compare(start_key(), user_key) <= 0 ? seq() : 0; } +std::map> +FragmentedRangeTombstoneIterator::SplitBySnapshot( + const std::vector& snapshots) { + std::map> + splits; + SequenceNumber lower = 0; + SequenceNumber upper; + for (size_t i = 0; i <= snapshots.size(); i++) { + if (i >= snapshots.size()) { + upper = kMaxSequenceNumber; + } else { + upper = snapshots[i]; + } + if (tombstones_->ContainsRange(lower, upper)) { + splits.emplace(upper, std::unique_ptr( + new FragmentedRangeTombstoneIterator( + tombstones_, *icmp_, upper, lower))); + } + lower = upper + 1; + } + return splits; +} + } // namespace rocksdb diff --git a/db/range_tombstone_fragmenter.h b/db/range_tombstone_fragmenter.h index 2ad346af171b922443588500ba0becc4dbd42d2a..306a0347b6f64470d02cde4b45b57d5bc91755d9 100644 --- a/db/range_tombstone_fragmenter.h +++ b/db/range_tombstone_fragmenter.h @@ -7,6 +7,7 @@ #include #include +#include #include #include @@ -38,7 +39,8 @@ struct FragmentedRangeTombstoneList { }; FragmentedRangeTombstoneList( std::unique_ptr unfragmented_tombstones, - const InternalKeyComparator& icmp); + const InternalKeyComparator& icmp, bool for_compaction = false, + const std::vector& snapshots = {}); std::vector::const_iterator begin() const { return tombstones_.begin(); @@ -60,7 +62,11 @@ struct FragmentedRangeTombstoneList { return tombstone_seqs_.end(); } - bool empty() const { return tombstones_.size() == 0; } + bool empty() const { return tombstones_.empty(); } + + // Returns true if the stored tombstones contain with one with a sequence + // number in [lower, upper]. + bool ContainsRange(SequenceNumber lower, SequenceNumber upper) const; private: // Given an ordered range tombstone iterator unfragmented_tombstones, @@ -68,10 +74,12 @@ struct FragmentedRangeTombstoneList { // tombstones_ and tombstone_seqs_. void FragmentTombstones( std::unique_ptr unfragmented_tombstones, - const InternalKeyComparator& icmp); + const InternalKeyComparator& icmp, bool for_compaction, + const std::vector& snapshots); std::vector tombstones_; std::vector tombstone_seqs_; + std::set seq_set_; std::list pinned_slices_; PinnedIteratorsManager pinned_iters_mgr_; }; @@ -88,11 +96,13 @@ struct FragmentedRangeTombstoneList { class FragmentedRangeTombstoneIterator : public InternalIterator { public: FragmentedRangeTombstoneIterator( - const FragmentedRangeTombstoneList* tombstones, SequenceNumber snapshot, - const InternalKeyComparator& icmp); + const FragmentedRangeTombstoneList* tombstones, + const InternalKeyComparator& icmp, SequenceNumber upper_bound, + SequenceNumber lower_bound = 0); FragmentedRangeTombstoneIterator( const std::shared_ptr& tombstones, - SequenceNumber snapshot, const InternalKeyComparator& icmp); + const InternalKeyComparator& icmp, SequenceNumber upper_bound, + SequenceNumber lower_bound = 0); void SeekToFirst() override; void SeekToLast() override; @@ -136,10 +146,6 @@ class FragmentedRangeTombstoneIterator : public InternalIterator { seq_pos_ = tombstones_->seq_end(); } - // TODO: implement properly - RangeTombstone tombstone() const { - return RangeTombstone(start_key(), end_key(), seq()); - } Slice start_key() const { return pos_->start_key; } Slice end_key() const { return pos_->end_key; } SequenceNumber seq() const { return *seq_pos_; } @@ -151,12 +157,24 @@ class FragmentedRangeTombstoneIterator : public InternalIterator { return ParsedInternalKey(pos_->end_key, kMaxSequenceNumber, kTypeRangeDeletion); } - ParsedInternalKey internal_key() const { - return ParsedInternalKey(pos_->start_key, *seq_pos_, kTypeRangeDeletion); - } SequenceNumber MaxCoveringTombstoneSeqnum(const Slice& user_key); + // Splits the iterator into n+1 iterators (where n is the number of + // snapshots), each providing a view over a "stripe" of sequence numbers. The + // iterators are keyed by the upper bound of their ranges (the provided + // snapshots + kMaxSequenceNumber). + // + // NOTE: the iterators in the returned map are no longer valid if their + // parent iterator is deleted, since they do not modify the refcount of the + // underlying tombstone list. Therefore, this map should be deleted before + // the parent iterator. + std::map> + SplitBySnapshot(const std::vector& snapshots); + + SequenceNumber upper_bound() const { return upper_bound_; } + SequenceNumber lower_bound() const { return lower_bound_; } + private: using RangeTombstoneStack = FragmentedRangeTombstoneList::RangeTombstoneStack; @@ -217,10 +235,12 @@ class FragmentedRangeTombstoneIterator : public InternalIterator { const RangeTombstoneStackStartComparator tombstone_start_cmp_; const RangeTombstoneStackEndComparator tombstone_end_cmp_; + const InternalKeyComparator* icmp_; const Comparator* ucmp_; std::shared_ptr tombstones_ref_; const FragmentedRangeTombstoneList* tombstones_; - SequenceNumber snapshot_; + SequenceNumber upper_bound_; + SequenceNumber lower_bound_; std::vector::const_iterator pos_; std::vector::const_iterator seq_pos_; mutable std::vector::const_iterator pinned_pos_; diff --git a/db/range_tombstone_fragmenter_test.cc b/db/range_tombstone_fragmenter_test.cc index fc6eddc29d7ef26d54f62573afff2a26949ba6d7..ddd3f774176a6f34ee98944a65107fea26ac4dea 100644 --- a/db/range_tombstone_fragmenter_test.cc +++ b/db/range_tombstone_fragmenter_test.cc @@ -29,15 +29,26 @@ std::unique_ptr MakeRangeDelIter( new test::VectorIterator(keys, values)); } +void CheckIterPosition(const RangeTombstone& tombstone, + const FragmentedRangeTombstoneIterator* iter) { + // Test InternalIterator interface. + EXPECT_EQ(tombstone.start_key_, ExtractUserKey(iter->key())); + EXPECT_EQ(tombstone.end_key_, iter->value()); + EXPECT_EQ(tombstone.seq_, iter->seq()); + + // Test FragmentedRangeTombstoneIterator interface. + EXPECT_EQ(tombstone.start_key_, iter->start_key()); + EXPECT_EQ(tombstone.end_key_, iter->end_key()); + EXPECT_EQ(tombstone.seq_, GetInternalKeySeqno(iter->key())); +} + void VerifyFragmentedRangeDels( FragmentedRangeTombstoneIterator* iter, const std::vector& expected_tombstones) { iter->SeekToFirst(); - for (size_t i = 0; i < expected_tombstones.size() && iter->Valid(); - i++, iter->Next()) { - EXPECT_EQ(iter->start_key(), expected_tombstones[i].start_key_); - EXPECT_EQ(iter->value(), expected_tombstones[i].end_key_); - EXPECT_EQ(iter->seq(), expected_tombstones[i].seq_); + for (size_t i = 0; i < expected_tombstones.size(); i++, iter->Next()) { + ASSERT_TRUE(iter->Valid()); + CheckIterPosition(expected_tombstones[i], iter); } EXPECT_FALSE(iter->Valid()); } @@ -46,11 +57,9 @@ void VerifyVisibleTombstones( FragmentedRangeTombstoneIterator* iter, const std::vector& expected_tombstones) { iter->SeekToTopFirst(); - for (size_t i = 0; i < expected_tombstones.size() && iter->Valid(); - i++, iter->TopNext()) { - EXPECT_EQ(iter->start_key(), expected_tombstones[i].start_key_); - EXPECT_EQ(iter->value(), expected_tombstones[i].end_key_); - EXPECT_EQ(iter->seq(), expected_tombstones[i].seq_); + for (size_t i = 0; i < expected_tombstones.size(); i++, iter->TopNext()) { + ASSERT_TRUE(iter->Valid()); + CheckIterPosition(expected_tombstones[i], iter); } EXPECT_FALSE(iter->Valid()); } @@ -69,9 +78,7 @@ void VerifySeek(FragmentedRangeTombstoneIterator* iter, ASSERT_FALSE(iter->Valid()); } else { ASSERT_TRUE(iter->Valid()); - EXPECT_EQ(testcase.expected_position.start_key_, iter->start_key()); - EXPECT_EQ(testcase.expected_position.end_key_, iter->value()); - EXPECT_EQ(testcase.expected_position.seq_, iter->seq()); + CheckIterPosition(testcase.expected_position, iter); } } } @@ -84,9 +91,7 @@ void VerifySeekForPrev(FragmentedRangeTombstoneIterator* iter, ASSERT_FALSE(iter->Valid()); } else { ASSERT_TRUE(iter->Valid()); - EXPECT_EQ(testcase.expected_position.start_key_, iter->start_key()); - EXPECT_EQ(testcase.expected_position.end_key_, iter->value()); - EXPECT_EQ(testcase.expected_position.seq_, iter->seq()); + CheckIterPosition(testcase.expected_position, iter); } } } @@ -112,8 +117,10 @@ TEST_F(RangeTombstoneFragmenterTest, NonOverlappingTombstones) { FragmentedRangeTombstoneList fragment_list(std::move(range_del_iter), bytewise_icmp); - FragmentedRangeTombstoneIterator iter(&fragment_list, kMaxSequenceNumber, - bytewise_icmp); + FragmentedRangeTombstoneIterator iter(&fragment_list, bytewise_icmp, + kMaxSequenceNumber); + ASSERT_EQ(0, iter.lower_bound()); + ASSERT_EQ(kMaxSequenceNumber, iter.upper_bound()); VerifyFragmentedRangeDels(&iter, {{"a", "b", 10}, {"c", "d", 5}}); VerifyMaxCoveringTombstoneSeqnum(&iter, {{"", 0}, {"a", 10}, {"b", 0}, {"c", 5}}); @@ -124,8 +131,10 @@ TEST_F(RangeTombstoneFragmenterTest, OverlappingTombstones) { FragmentedRangeTombstoneList fragment_list(std::move(range_del_iter), bytewise_icmp); - FragmentedRangeTombstoneIterator iter(&fragment_list, kMaxSequenceNumber, - bytewise_icmp); + FragmentedRangeTombstoneIterator iter(&fragment_list, bytewise_icmp, + kMaxSequenceNumber); + ASSERT_EQ(0, iter.lower_bound()); + ASSERT_EQ(kMaxSequenceNumber, iter.upper_bound()); VerifyFragmentedRangeDels( &iter, {{"a", "c", 10}, {"c", "e", 15}, {"c", "e", 10}, {"e", "g", 15}}); VerifyMaxCoveringTombstoneSeqnum(&iter, @@ -138,8 +147,10 @@ TEST_F(RangeTombstoneFragmenterTest, ContiguousTombstones) { FragmentedRangeTombstoneList fragment_list(std::move(range_del_iter), bytewise_icmp); - FragmentedRangeTombstoneIterator iter(&fragment_list, kMaxSequenceNumber, - bytewise_icmp); + FragmentedRangeTombstoneIterator iter(&fragment_list, bytewise_icmp, + kMaxSequenceNumber); + ASSERT_EQ(0, iter.lower_bound()); + ASSERT_EQ(kMaxSequenceNumber, iter.upper_bound()); VerifyFragmentedRangeDels( &iter, {{"a", "c", 10}, {"c", "e", 20}, {"c", "e", 5}, {"e", "g", 15}}); VerifyMaxCoveringTombstoneSeqnum(&iter, @@ -152,8 +163,10 @@ TEST_F(RangeTombstoneFragmenterTest, RepeatedStartAndEndKey) { FragmentedRangeTombstoneList fragment_list(std::move(range_del_iter), bytewise_icmp); - FragmentedRangeTombstoneIterator iter(&fragment_list, kMaxSequenceNumber, - bytewise_icmp); + FragmentedRangeTombstoneIterator iter(&fragment_list, bytewise_icmp, + kMaxSequenceNumber); + ASSERT_EQ(0, iter.lower_bound()); + ASSERT_EQ(kMaxSequenceNumber, iter.upper_bound()); VerifyFragmentedRangeDels(&iter, {{"a", "c", 10}, {"a", "c", 7}, {"a", "c", 3}}); VerifyMaxCoveringTombstoneSeqnum(&iter, {{"a", 10}, {"b", 10}, {"c", 0}}); @@ -165,8 +178,10 @@ TEST_F(RangeTombstoneFragmenterTest, RepeatedStartKeyDifferentEndKeys) { FragmentedRangeTombstoneList fragment_list(std::move(range_del_iter), bytewise_icmp); - FragmentedRangeTombstoneIterator iter(&fragment_list, kMaxSequenceNumber, - bytewise_icmp); + FragmentedRangeTombstoneIterator iter(&fragment_list, bytewise_icmp, + kMaxSequenceNumber); + ASSERT_EQ(0, iter.lower_bound()); + ASSERT_EQ(kMaxSequenceNumber, iter.upper_bound()); VerifyFragmentedRangeDels(&iter, {{"a", "c", 10}, {"a", "c", 7}, {"a", "c", 3}, @@ -186,8 +201,10 @@ TEST_F(RangeTombstoneFragmenterTest, RepeatedStartKeyMixedEndKeys) { FragmentedRangeTombstoneList fragment_list(std::move(range_del_iter), bytewise_icmp); - FragmentedRangeTombstoneIterator iter(&fragment_list, kMaxSequenceNumber, - bytewise_icmp); + FragmentedRangeTombstoneIterator iter(&fragment_list, bytewise_icmp, + kMaxSequenceNumber); + ASSERT_EQ(0, iter.lower_bound()); + ASSERT_EQ(kMaxSequenceNumber, iter.upper_bound()); VerifyFragmentedRangeDels(&iter, {{"a", "c", 30}, {"a", "c", 20}, {"a", "c", 10}, @@ -211,16 +228,16 @@ TEST_F(RangeTombstoneFragmenterTest, OverlapAndRepeatedStartKey) { FragmentedRangeTombstoneList fragment_list(std::move(range_del_iter), bytewise_icmp); - FragmentedRangeTombstoneIterator iter1(&fragment_list, kMaxSequenceNumber, - bytewise_icmp); - FragmentedRangeTombstoneIterator iter2(&fragment_list, 9 /* snapshot */, - bytewise_icmp); - FragmentedRangeTombstoneIterator iter3(&fragment_list, 7 /* snapshot */, - bytewise_icmp); - FragmentedRangeTombstoneIterator iter4(&fragment_list, 5 /* snapshot */, - bytewise_icmp); - FragmentedRangeTombstoneIterator iter5(&fragment_list, 3 /* snapshot */, - bytewise_icmp); + FragmentedRangeTombstoneIterator iter1(&fragment_list, bytewise_icmp, + kMaxSequenceNumber); + FragmentedRangeTombstoneIterator iter2(&fragment_list, bytewise_icmp, + 9 /* upper_bound */); + FragmentedRangeTombstoneIterator iter3(&fragment_list, bytewise_icmp, + 7 /* upper_bound */); + FragmentedRangeTombstoneIterator iter4(&fragment_list, bytewise_icmp, + 5 /* upper_bound */); + FragmentedRangeTombstoneIterator iter5(&fragment_list, bytewise_icmp, + 3 /* upper_bound */); for (auto* iter : {&iter1, &iter2, &iter3, &iter4, &iter5}) { VerifyFragmentedRangeDels(iter, {{"a", "c", 10}, {"c", "e", 10}, @@ -234,6 +251,8 @@ TEST_F(RangeTombstoneFragmenterTest, OverlapAndRepeatedStartKey) { {"l", "n", 4}}); } + ASSERT_EQ(0, iter1.lower_bound()); + ASSERT_EQ(kMaxSequenceNumber, iter1.upper_bound()); VerifyVisibleTombstones(&iter1, {{"a", "c", 10}, {"c", "e", 10}, {"e", "g", 8}, @@ -243,6 +262,8 @@ TEST_F(RangeTombstoneFragmenterTest, OverlapAndRepeatedStartKey) { VerifyMaxCoveringTombstoneSeqnum( &iter1, {{"a", 10}, {"c", 10}, {"e", 8}, {"i", 0}, {"j", 4}, {"m", 4}}); + ASSERT_EQ(0, iter2.lower_bound()); + ASSERT_EQ(9, iter2.upper_bound()); VerifyVisibleTombstones(&iter2, {{"c", "e", 8}, {"e", "g", 8}, {"g", "i", 6}, @@ -251,6 +272,8 @@ TEST_F(RangeTombstoneFragmenterTest, OverlapAndRepeatedStartKey) { VerifyMaxCoveringTombstoneSeqnum( &iter2, {{"a", 0}, {"c", 8}, {"e", 8}, {"i", 0}, {"j", 4}, {"m", 4}}); + ASSERT_EQ(0, iter3.lower_bound()); + ASSERT_EQ(7, iter3.upper_bound()); VerifyVisibleTombstones(&iter3, {{"c", "e", 6}, {"e", "g", 6}, {"g", "i", 6}, @@ -259,10 +282,14 @@ TEST_F(RangeTombstoneFragmenterTest, OverlapAndRepeatedStartKey) { VerifyMaxCoveringTombstoneSeqnum( &iter3, {{"a", 0}, {"c", 6}, {"e", 6}, {"i", 0}, {"j", 4}, {"m", 4}}); + ASSERT_EQ(0, iter4.lower_bound()); + ASSERT_EQ(5, iter4.upper_bound()); VerifyVisibleTombstones(&iter4, {{"j", "l", 4}, {"l", "n", 4}}); VerifyMaxCoveringTombstoneSeqnum( &iter4, {{"a", 0}, {"c", 0}, {"e", 0}, {"i", 0}, {"j", 4}, {"m", 4}}); + ASSERT_EQ(0, iter5.lower_bound()); + ASSERT_EQ(3, iter5.upper_bound()); VerifyVisibleTombstones(&iter5, {{"j", "l", 2}}); VerifyMaxCoveringTombstoneSeqnum( &iter5, {{"a", 0}, {"c", 0}, {"e", 0}, {"i", 0}, {"j", 2}, {"m", 0}}); @@ -277,8 +304,10 @@ TEST_F(RangeTombstoneFragmenterTest, OverlapAndRepeatedStartKeyUnordered) { FragmentedRangeTombstoneList fragment_list(std::move(range_del_iter), bytewise_icmp); - FragmentedRangeTombstoneIterator iter(&fragment_list, 9 /* snapshot */, - bytewise_icmp); + FragmentedRangeTombstoneIterator iter(&fragment_list, bytewise_icmp, + 9 /* upper_bound */); + ASSERT_EQ(0, iter.lower_bound()); + ASSERT_EQ(9, iter.upper_bound()); VerifyFragmentedRangeDels(&iter, {{"a", "c", 10}, {"c", "e", 10}, {"c", "e", 8}, @@ -293,6 +322,116 @@ TEST_F(RangeTombstoneFragmenterTest, OverlapAndRepeatedStartKeyUnordered) { &iter, {{"a", 0}, {"c", 8}, {"e", 8}, {"i", 0}, {"j", 4}, {"m", 4}}); } +TEST_F(RangeTombstoneFragmenterTest, OverlapAndRepeatedStartKeyForCompaction) { + auto range_del_iter = MakeRangeDelIter({{"a", "e", 10}, + {"j", "n", 4}, + {"c", "i", 6}, + {"c", "g", 8}, + {"j", "l", 2}}); + + FragmentedRangeTombstoneList fragment_list( + std::move(range_del_iter), bytewise_icmp, true /* for_compaction */, + {} /* snapshots */); + FragmentedRangeTombstoneIterator iter(&fragment_list, bytewise_icmp, + kMaxSequenceNumber /* upper_bound */); + VerifyFragmentedRangeDels(&iter, {{"a", "c", 10}, + {"c", "e", 10}, + {"e", "g", 8}, + {"g", "i", 6}, + {"j", "l", 4}, + {"l", "n", 4}}); +} + +TEST_F(RangeTombstoneFragmenterTest, + OverlapAndRepeatedStartKeyForCompactionWithSnapshot) { + auto range_del_iter = MakeRangeDelIter({{"a", "e", 10}, + {"j", "n", 4}, + {"c", "i", 6}, + {"c", "g", 8}, + {"j", "l", 2}}); + + FragmentedRangeTombstoneList fragment_list( + std::move(range_del_iter), bytewise_icmp, true /* for_compaction */, + {20, 9} /* upper_bounds */); + FragmentedRangeTombstoneIterator iter(&fragment_list, bytewise_icmp, + kMaxSequenceNumber /* upper_bound */); + VerifyFragmentedRangeDels(&iter, {{"a", "c", 10}, + {"c", "e", 10}, + {"c", "e", 8}, + {"e", "g", 8}, + {"g", "i", 6}, + {"j", "l", 4}, + {"l", "n", 4}}); +} + +TEST_F(RangeTombstoneFragmenterTest, IteratorSplitNoSnapshots) { + auto range_del_iter = MakeRangeDelIter({{"a", "e", 10}, + {"j", "n", 4}, + {"c", "i", 6}, + {"c", "g", 8}, + {"j", "l", 2}}); + + FragmentedRangeTombstoneList fragment_list(std::move(range_del_iter), + bytewise_icmp); + FragmentedRangeTombstoneIterator iter(&fragment_list, bytewise_icmp, + kMaxSequenceNumber /* upper_bound */); + + auto split_iters = iter.SplitBySnapshot({} /* snapshots */); + ASSERT_EQ(1, split_iters.size()); + + auto* split_iter = split_iters[kMaxSequenceNumber].get(); + ASSERT_EQ(0, split_iter->lower_bound()); + ASSERT_EQ(kMaxSequenceNumber, split_iter->upper_bound()); + VerifyVisibleTombstones(split_iter, {{"a", "c", 10}, + {"c", "e", 10}, + {"e", "g", 8}, + {"g", "i", 6}, + {"j", "l", 4}, + {"l", "n", 4}}); +} + +TEST_F(RangeTombstoneFragmenterTest, IteratorSplitWithSnapshots) { + auto range_del_iter = MakeRangeDelIter({{"a", "e", 10}, + {"j", "n", 4}, + {"c", "i", 6}, + {"c", "g", 8}, + {"j", "l", 2}}); + + FragmentedRangeTombstoneList fragment_list(std::move(range_del_iter), + bytewise_icmp); + FragmentedRangeTombstoneIterator iter(&fragment_list, bytewise_icmp, + kMaxSequenceNumber /* upper_bound */); + + auto split_iters = iter.SplitBySnapshot({3, 5, 7, 9} /* snapshots */); + ASSERT_EQ(5, split_iters.size()); + + auto* split_iter1 = split_iters[3].get(); + ASSERT_EQ(0, split_iter1->lower_bound()); + ASSERT_EQ(3, split_iter1->upper_bound()); + VerifyVisibleTombstones(split_iter1, {{"j", "l", 2}}); + + auto* split_iter2 = split_iters[5].get(); + ASSERT_EQ(4, split_iter2->lower_bound()); + ASSERT_EQ(5, split_iter2->upper_bound()); + VerifyVisibleTombstones(split_iter2, {{"j", "l", 4}, {"l", "n", 4}}); + + auto* split_iter3 = split_iters[7].get(); + ASSERT_EQ(6, split_iter3->lower_bound()); + ASSERT_EQ(7, split_iter3->upper_bound()); + VerifyVisibleTombstones(split_iter3, + {{"c", "e", 6}, {"e", "g", 6}, {"g", "i", 6}}); + + auto* split_iter4 = split_iters[9].get(); + ASSERT_EQ(8, split_iter4->lower_bound()); + ASSERT_EQ(9, split_iter4->upper_bound()); + VerifyVisibleTombstones(split_iter4, {{"c", "e", 8}, {"e", "g", 8}}); + + auto* split_iter5 = split_iters[kMaxSequenceNumber].get(); + ASSERT_EQ(10, split_iter5->lower_bound()); + ASSERT_EQ(kMaxSequenceNumber, split_iter5->upper_bound()); + VerifyVisibleTombstones(split_iter5, {{"a", "c", 10}, {"c", "e", 10}}); +} + TEST_F(RangeTombstoneFragmenterTest, SeekStartKey) { // Same tombstones as OverlapAndRepeatedStartKey. auto range_del_iter = MakeRangeDelIter({{"a", "e", 10}, @@ -304,8 +443,8 @@ TEST_F(RangeTombstoneFragmenterTest, SeekStartKey) { FragmentedRangeTombstoneList fragment_list(std::move(range_del_iter), bytewise_icmp); - FragmentedRangeTombstoneIterator iter1(&fragment_list, kMaxSequenceNumber, - bytewise_icmp); + FragmentedRangeTombstoneIterator iter1(&fragment_list, bytewise_icmp, + kMaxSequenceNumber); VerifySeek( &iter1, {{"a", {"a", "c", 10}}, {"e", {"e", "g", 8}}, {"l", {"l", "n", 4}}}); @@ -313,8 +452,8 @@ TEST_F(RangeTombstoneFragmenterTest, SeekStartKey) { &iter1, {{"a", {"a", "c", 10}}, {"e", {"e", "g", 8}}, {"l", {"l", "n", 4}}}); - FragmentedRangeTombstoneIterator iter2(&fragment_list, 3 /* snapshot */, - bytewise_icmp); + FragmentedRangeTombstoneIterator iter2(&fragment_list, bytewise_icmp, + 3 /* upper_bound */); VerifySeek(&iter2, {{"a", {"j", "l", 2}}, {"e", {"j", "l", 2}}, {"l", {}, true /* out of range */}}); @@ -334,8 +473,8 @@ TEST_F(RangeTombstoneFragmenterTest, SeekCovered) { FragmentedRangeTombstoneList fragment_list(std::move(range_del_iter), bytewise_icmp); - FragmentedRangeTombstoneIterator iter1(&fragment_list, kMaxSequenceNumber, - bytewise_icmp); + FragmentedRangeTombstoneIterator iter1(&fragment_list, bytewise_icmp, + kMaxSequenceNumber); VerifySeek( &iter1, {{"b", {"a", "c", 10}}, {"f", {"e", "g", 8}}, {"m", {"l", "n", 4}}}); @@ -343,8 +482,8 @@ TEST_F(RangeTombstoneFragmenterTest, SeekCovered) { &iter1, {{"b", {"a", "c", 10}}, {"f", {"e", "g", 8}}, {"m", {"l", "n", 4}}}); - FragmentedRangeTombstoneIterator iter2(&fragment_list, 3 /* snapshot */, - bytewise_icmp); + FragmentedRangeTombstoneIterator iter2(&fragment_list, bytewise_icmp, + 3 /* upper_bound */); VerifySeek(&iter2, {{"b", {"j", "l", 2}}, {"f", {"j", "l", 2}}, {"m", {}, true /* out of range */}}); @@ -364,8 +503,8 @@ TEST_F(RangeTombstoneFragmenterTest, SeekEndKey) { FragmentedRangeTombstoneList fragment_list(std::move(range_del_iter), bytewise_icmp); - FragmentedRangeTombstoneIterator iter1(&fragment_list, kMaxSequenceNumber, - bytewise_icmp); + FragmentedRangeTombstoneIterator iter1(&fragment_list, bytewise_icmp, + kMaxSequenceNumber); VerifySeek(&iter1, {{"c", {"c", "e", 10}}, {"g", {"g", "i", 6}}, {"i", {"j", "l", 4}}, @@ -375,8 +514,8 @@ TEST_F(RangeTombstoneFragmenterTest, SeekEndKey) { {"i", {"g", "i", 6}}, {"n", {"l", "n", 4}}}); - FragmentedRangeTombstoneIterator iter2(&fragment_list, 3 /* snapshot */, - bytewise_icmp); + FragmentedRangeTombstoneIterator iter2(&fragment_list, bytewise_icmp, + 3 /* upper_bound */); VerifySeek(&iter2, {{"c", {"j", "l", 2}}, {"g", {"j", "l", 2}}, {"i", {"j", "l", 2}}, @@ -398,8 +537,8 @@ TEST_F(RangeTombstoneFragmenterTest, SeekOutOfBounds) { FragmentedRangeTombstoneList fragment_list(std::move(range_del_iter), bytewise_icmp); - FragmentedRangeTombstoneIterator iter(&fragment_list, kMaxSequenceNumber, - bytewise_icmp); + FragmentedRangeTombstoneIterator iter(&fragment_list, bytewise_icmp, + kMaxSequenceNumber); VerifySeek(&iter, {{"", {"a", "c", 10}}, {"z", {}, true /* out of range */}}); VerifySeekForPrev(&iter, {{"", {}, true /* out of range */}, {"z", {"l", "n", 4}}}); diff --git a/table/block_based_table_reader.cc b/table/block_based_table_reader.cc index 974ba4b24a0d1fbc2d2c4a5571b91c0a00d3d4eb..959044a09395d6a10b6095cd337556329a0d6fec 100644 --- a/table/block_based_table_reader.cc +++ b/table/block_based_table_reader.cc @@ -2412,7 +2412,7 @@ FragmentedRangeTombstoneIterator* BlockBasedTable::NewRangeTombstoneIterator( snapshot = read_options.snapshot->GetSequenceNumber(); } return new FragmentedRangeTombstoneIterator( - rep_->fragmented_range_dels, snapshot, rep_->internal_comparator); + rep_->fragmented_range_dels, rep_->internal_comparator, snapshot); } InternalIterator* BlockBasedTable::NewUnfragmentedRangeTombstoneIterator(