提交 66ff1630 编写于 作者: B bors

Auto merge of #32466 - jooert:btree_append, r=apasel422

Implement `append` for b-trees.

I have finally found time to revive #26227, this time only with an `append` implementation.

The algorithm implemented here is linear in the size of the two b-trees. It firsts creates
a `MergeIter` from the two b-trees and then builds a new b-tree by pushing
key-value pairs from the `MergeIter` into nodes at the right heights.

Three functions for stealing have been added to the implementation of `Handle` as
well as a getter for the height of a `NodeRef`.

The docs have been updated with performance information about `BTreeMap::append` and
the remark about B has been removed now that it is the same for all instances of `BTreeMap`.

cc @gereeter @Gankro @apasel422
......@@ -11,7 +11,7 @@
use core::cmp::Ordering;
use core::fmt::Debug;
use core::hash::{Hash, Hasher};
use core::iter::FromIterator;
use core::iter::{FromIterator, Peekable};
use core::marker::PhantomData;
use core::ops::Index;
use core::{fmt, intrinsics, mem, ptr};
......@@ -348,6 +348,12 @@ pub struct OccupiedEntry<'a, K: 'a, V: 'a> {
_marker: PhantomData<&'a mut (K, V)>,
}
// An iterator for merging two sorted sequences into one
struct MergeIter<K, V, I: Iterator<Item=(K, V)>> {
left: Peekable<I>,
right: Peekable<I>,
}
impl<K: Ord, V> BTreeMap<K, V> {
/// Makes a new empty BTreeMap with a reasonable choice for B.
///
......@@ -535,6 +541,62 @@ pub fn remove<Q: ?Sized>(&mut self, key: &Q) -> Option<V> where K: Borrow<Q>, Q:
}
}
/// Moves all elements from `other` into `Self`, leaving `other` empty.
///
/// # Examples
///
/// ```
/// #![feature(btree_append)]
/// use std::collections::BTreeMap;
///
/// let mut a = BTreeMap::new();
/// a.insert(1, "a");
/// a.insert(2, "b");
/// a.insert(3, "c");
///
/// let mut b = BTreeMap::new();
/// b.insert(3, "d");
/// b.insert(4, "e");
/// b.insert(5, "f");
///
/// a.append(&mut b);
///
/// assert_eq!(a.len(), 5);
/// assert_eq!(b.len(), 0);
///
/// assert_eq!(a[&1], "a");
/// assert_eq!(a[&2], "b");
/// assert_eq!(a[&3], "d");
/// assert_eq!(a[&4], "e");
/// assert_eq!(a[&5], "f");
/// ```
#[unstable(feature = "btree_append", reason = "recently added as part of collections reform 2",
issue = "19986")]
pub fn append(&mut self, other: &mut Self) {
// Do we have to append anything at all?
if other.len() == 0 {
return;
}
// We can just swap `self` and `other` if `self` is empty.
if self.len() == 0 {
mem::swap(self, other);
return;
}
// First, we merge `self` and `other` into a sorted sequence in linear time.
let self_iter = mem::replace(self, BTreeMap::new()).into_iter();
let other_iter = mem::replace(other, BTreeMap::new()).into_iter();
let iter = MergeIter {
left: self_iter.peekable(),
right: other_iter.peekable(),
};
// Second, we build a tree from the sorted sequence in linear time.
self.from_sorted_iter(iter);
self.fix_right_edge();
}
/// Constructs a double-ended iterator over a sub-range of elements in the map, starting
/// at min, and ending at max. If min is `Unbounded`, then it will be treated as "negative
/// infinity", and if max is `Unbounded`, then it will be treated as "positive infinity".
......@@ -724,6 +786,76 @@ pub fn entry(&mut self, key: K) -> Entry<K, V> {
})
}
}
fn from_sorted_iter<I: Iterator<Item=(K, V)>>(&mut self, iter: I) {
let mut cur_node = last_leaf_edge(self.root.as_mut()).into_node();
// Iterate through all key-value pairs, pushing them into nodes at the right level.
for (key, value) in iter {
// Try to push key-value pair into the current leaf node.
if cur_node.len() < node::CAPACITY {
cur_node.push(key, value);
} else {
// No space left, go up and push there.
let mut open_node;
let mut test_node = cur_node.forget_type();
loop {
match test_node.ascend() {
Ok(parent) => {
let parent = parent.into_node();
if parent.len() < node::CAPACITY {
// Found a node with space left, push here.
open_node = parent;
break;
} else {
// Go up again.
test_node = parent.forget_type();
}
},
Err(node) => {
// We are at the top, create a new root node and push there.
open_node = node.into_root_mut().push_level();
break;
},
}
}
// Push key-value pair and new right subtree.
let tree_height = open_node.height() - 1;
let mut right_tree = node::Root::new_leaf();
for _ in 0..tree_height {
right_tree.push_level();
}
open_node.push(key, value, right_tree);
// Go down to the right-most leaf again.
cur_node = last_leaf_edge(open_node.forget_type()).into_node();
}
self.length += 1;
}
}
fn fix_right_edge(&mut self) {
// Handle underfull nodes, start from the top.
let mut cur_node = self.root.as_mut();
while let Internal(internal) = cur_node.force() {
// Check if right-most child is underfull.
let mut last_edge = internal.last_edge();
let right_child_len = last_edge.reborrow().descend().len();
if right_child_len < node::CAPACITY / 2 {
// We need to steal.
let mut last_kv = match last_edge.left_kv() {
Ok(left) => left,
Err(_) => unreachable!(),
};
last_kv.bulk_steal_left(node::CAPACITY/2 - right_child_len);
last_edge = last_kv.right_edge();
}
// Go further down.
cur_node = last_edge.descend();
}
}
}
impl<'a, K: 'a, V: 'a> IntoIterator for &'a BTreeMap<K, V> {
......@@ -1690,32 +1822,41 @@ fn handle_underfull_node<'a, K, V>(node: NodeRef<marker::Mut<'a>,
};
if handle.can_merge() {
return Merged(handle.merge().into_node());
Merged(handle.merge().into_node())
} else {
unsafe {
let (k, v, edge) = if is_left {
handle.reborrow_mut().left_edge().descend().pop()
} else {
handle.reborrow_mut().right_edge().descend().pop_front()
};
if is_left {
handle.steal_left();
} else {
handle.steal_right();
}
Stole(handle.into_node())
}
}
let k = mem::replace(handle.reborrow_mut().into_kv_mut().0, k);
let v = mem::replace(handle.reborrow_mut().into_kv_mut().1, v);
impl<K: Ord, V, I: Iterator<Item=(K, V)>> Iterator for MergeIter<K, V, I> {
type Item = (K, V);
// FIXME: reuse cur_node?
if is_left {
match handle.reborrow_mut().right_edge().descend().force() {
Leaf(mut leaf) => leaf.push_front(k, v),
Internal(mut internal) => internal.push_front(k, v, edge.unwrap())
}
} else {
match handle.reborrow_mut().left_edge().descend().force() {
Leaf(mut leaf) => leaf.push(k, v),
Internal(mut internal) => internal.push(k, v, edge.unwrap())
}
}
}
fn next(&mut self) -> Option<(K, V)> {
let res = match (self.left.peek(), self.right.peek()) {
(Some(&(ref left_key, _)), Some(&(ref right_key, _))) => left_key.cmp(right_key),
(Some(_), None) => Ordering::Less,
(None, Some(_)) => Ordering::Greater,
(None, None) => return None,
};
return Stole(handle.into_node());
// Check which elements comes first and only advance the corresponding iterator.
// If two keys are equal, take the value from `right`.
match res {
Ordering::Less => {
self.left.next()
},
Ordering::Greater => {
self.right.next()
},
Ordering::Equal => {
self.left.next();
self.right.next()
},
}
}
}
......@@ -328,6 +328,12 @@ pub fn len(&self) -> usize {
self.as_leaf().len as usize
}
/// Returns the height of this node in the whole tree. Zero height denotes the
/// leaf level.
pub fn height(&self) -> usize {
self.height
}
/// Removes any static information about whether this node is a `Leaf` or an
/// `Internal` node.
pub fn forget_type(self) -> NodeRef<BorrowType, K, V, marker::LeafOrInternal> {
......@@ -1233,6 +1239,139 @@ pub fn merge(mut self)
Handle::new_edge(self.node, self.idx)
}
}
/// This removes a key/value pair from the left child and replaces it with the key/value pair
/// pointed to by this handle while pushing the old key/value pair of this handle into the right
/// child.
pub fn steal_left(&mut self) {
unsafe {
let (k, v, edge) = self.reborrow_mut().left_edge().descend().pop();
let k = mem::replace(self.reborrow_mut().into_kv_mut().0, k);
let v = mem::replace(self.reborrow_mut().into_kv_mut().1, v);
match self.reborrow_mut().right_edge().descend().force() {
ForceResult::Leaf(mut leaf) => leaf.push_front(k, v),
ForceResult::Internal(mut internal) => internal.push_front(k, v, edge.unwrap())
}
}
}
/// This removes a key/value pair from the right child and replaces it with the key/value pair
/// pointed to by this handle while pushing the old key/value pair of this handle into the left
/// child.
pub fn steal_right(&mut self) {
unsafe {
let (k, v, edge) = self.reborrow_mut().right_edge().descend().pop_front();
let k = mem::replace(self.reborrow_mut().into_kv_mut().0, k);
let v = mem::replace(self.reborrow_mut().into_kv_mut().1, v);
match self.reborrow_mut().left_edge().descend().force() {
ForceResult::Leaf(mut leaf) => leaf.push(k, v),
ForceResult::Internal(mut internal) => internal.push(k, v, edge.unwrap())
}
}
}
/// This does stealing similar to `steal_left` but steals multiple elements at once.
pub fn bulk_steal_left(&mut self, n: usize) {
unsafe {
// Get raw pointers to left child's keys, values and edges.
let (left_len, left_k, left_v, left_e) = {
let mut left = self.reborrow_mut().left_edge().descend();
(left.len(),
left.keys_mut().as_mut_ptr(),
left.vals_mut().as_mut_ptr(),
match left.force() {
ForceResult::Leaf(_) => None,
ForceResult::Internal(mut i) => Some(i.as_internal_mut().edges.as_mut_ptr()),
})
};
// Get raw pointers to right child's keys, values and edges.
let (right_len, right_k, right_v, right_e) = {
let mut right = self.reborrow_mut().right_edge().descend();
(right.len(),
right.keys_mut().as_mut_ptr(),
right.vals_mut().as_mut_ptr(),
match right.force() {
ForceResult::Leaf(_) => None,
ForceResult::Internal(mut i) => Some(i.as_internal_mut().edges.as_mut_ptr()),
})
};
// Get raw pointers to parent's key and value.
let (parent_k, parent_v) = {
let kv = self.reborrow_mut().into_kv_mut();
(kv.0 as *mut K, kv.1 as *mut V)
};
// Make sure that we may steal safely.
debug_assert!(right_len + n <= CAPACITY);
debug_assert!(left_len >= n);
// Make room for stolen elements in right child.
ptr::copy(right_k,
right_k.offset(n as isize),
right_len);
ptr::copy(right_v,
right_v.offset(n as isize),
right_len);
if let Some(edges) = right_e {
ptr::copy(edges,
edges.offset(n as isize),
right_len+1);
}
// Move elements from the left child to the right one.
let left_ind = (left_len - n) as isize;
ptr::copy_nonoverlapping(left_k.offset(left_ind + 1),
right_k,
n - 1);
ptr::copy_nonoverlapping(left_v.offset(left_ind + 1),
right_v,
n - 1);
match (left_e, right_e) {
(Some(left), Some(right)) => {
ptr::copy_nonoverlapping(left.offset(left_ind + 1),
right,
n);
},
(Some(_), None) => unreachable!(),
(None, Some(_)) => unreachable!(),
(None, None) => {},
}
// Copy parent key/value pair to right child.
ptr::copy_nonoverlapping(parent_k,
right_k.offset(n as isize - 1),
1);
ptr::copy_nonoverlapping(parent_v,
right_v.offset(n as isize - 1),
1);
// Copy left-most stolen pair to parent.
ptr::copy_nonoverlapping(left_k.offset(left_ind),
parent_k,
1);
ptr::copy_nonoverlapping(left_v.offset(left_ind),
parent_v,
1);
// Fix lengths of left and right child and parent pointers in children of the right
// child.
self.reborrow_mut().left_edge().descend().as_leaf_mut().len -= n as u16;
let mut right = self.reborrow_mut().right_edge().descend();
right.as_leaf_mut().len += n as u16;
if let ForceResult::Internal(mut node) = right.force() {
for i in 0..(right_len+n+1) {
Handle::new_edge(node.reborrow_mut(), i as usize).correct_parent_link();
}
}
}
}
}
impl<BorrowType, K, V, HandleType>
......
......@@ -545,6 +545,41 @@ pub fn take<Q: ?Sized>(&mut self, value: &Q) -> Option<T>
{
Recover::take(&mut self.map, value)
}
/// Moves all elements from `other` into `Self`, leaving `other` empty.
///
/// # Examples
///
/// ```
/// #![feature(btree_append)]
/// use std::collections::BTreeSet;
///
/// let mut a = BTreeSet::new();
/// a.insert(1);
/// a.insert(2);
/// a.insert(3);
///
/// let mut b = BTreeSet::new();
/// b.insert(3);
/// b.insert(4);
/// b.insert(5);
///
/// a.append(&mut b);
///
/// assert_eq!(a.len(), 5);
/// assert_eq!(b.len(), 0);
///
/// assert!(a.contains(&1));
/// assert!(a.contains(&2));
/// assert!(a.contains(&3));
/// assert!(a.contains(&4));
/// assert!(a.contains(&5));
/// ```
#[unstable(feature = "btree_append", reason = "recently added as part of collections reform 2",
issue = "19986")]
pub fn append(&mut self, other: &mut Self) {
self.map.append(&mut other.map);
}
}
#[stable(feature = "rust1", since = "1.0.0")]
......
......@@ -446,6 +446,58 @@ fn test_vacant_entry_key() {
assert_eq!(a[key], value);
}
macro_rules! create_append_test {
($name:ident, $len:expr) => {
#[test]
fn $name() {
let mut a = BTreeMap::new();
for i in 0..8 {
a.insert(i, i);
}
let mut b = BTreeMap::new();
for i in 5..$len {
b.insert(i, 2*i);
}
a.append(&mut b);
assert_eq!(a.len(), $len);
assert_eq!(b.len(), 0);
for i in 0..$len {
if i < 5 {
assert_eq!(a[&i], i);
} else {
assert_eq!(a[&i], 2*i);
}
}
assert_eq!(a.remove(&($len-1)), Some(2*($len-1)));
assert_eq!(a.insert($len-1, 20), None);
}
};
}
// These are mostly for testing the algorithm that "fixes" the right edge after insertion.
// Single node.
create_append_test!(test_append_9, 9);
// Two leafs that don't need fixing.
create_append_test!(test_append_17, 17);
// Two leafs where the second one ends up underfull and needs stealing at the end.
create_append_test!(test_append_14, 14);
// Two leafs where the second one ends up empty because the insertion finished at the root.
create_append_test!(test_append_12, 12);
// Three levels; insertion finished at the root.
create_append_test!(test_append_144, 144);
// Three levels; insertion finished at leaf while there is an empty node on the second level.
create_append_test!(test_append_145, 145);
// Tests for several randomly chosen sizes.
create_append_test!(test_append_170, 170);
create_append_test!(test_append_181, 181);
create_append_test!(test_append_239, 239);
create_append_test!(test_append_1700, 1700);
mod bench {
use std::collections::BTreeMap;
use std::__rand::{Rng, thread_rng};
......
......@@ -265,3 +265,27 @@ fn iter<'a, 'new>(v: Iter<'a, &'static str>) -> Iter<'a, &'new str> { v }
fn into_iter<'new>(v: IntoIter<&'static str>) -> IntoIter<&'new str> { v }
fn range<'a, 'new>(v: Range<'a, &'static str>) -> Range<'a, &'new str> { v }
}
#[test]
fn test_append() {
let mut a = BTreeSet::new();
a.insert(1);
a.insert(2);
a.insert(3);
let mut b = BTreeSet::new();
b.insert(3);
b.insert(4);
b.insert(5);
a.append(&mut b);
assert_eq!(a.len(), 5);
assert_eq!(b.len(), 0);
assert_eq!(a.contains(&1), true);
assert_eq!(a.contains(&2), true);
assert_eq!(a.contains(&3), true);
assert_eq!(a.contains(&4), true);
assert_eq!(a.contains(&5), true);
}
......@@ -13,6 +13,7 @@
#![feature(binary_heap_extras)]
#![feature(binary_heap_append)]
#![feature(box_syntax)]
#![feature(btree_append)]
#![feature(btree_range)]
#![feature(collections)]
#![feature(collections_bound)]
......
......@@ -120,12 +120,10 @@
//!
//! For Sets, all operations have the cost of the equivalent Map operation.
//!
//! | | get | insert | remove | predecessor |
//! |----------|-----------|----------|----------|-------------|
//! | HashMap | O(1)~ | O(1)~* | O(1)~ | N/A |
//! | BTreeMap | O(log n) | O(log n) | O(log n) | O(log n) |
//!
//! Note that BTreeMap's precise performance depends on the value of B.
//! | | get | insert | remove | predecessor | append |
//! |----------|-----------|----------|----------|-------------|--------|
//! | HashMap | O(1)~ | O(1)~* | O(1)~ | N/A | N/A |
//! | BTreeMap | O(log n) | O(log n) | O(log n) | O(log n) | O(n+m) |
//!
//! # Correct and Efficient Usage of Collections
//!
......
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册