Auto merge of #50866 - michaelwoerister:relocations-in-vec, r=oli-obk

Use different datastructure for MIRI relocations This PR makes relocations in MIRI used a sorted vector instead of a `BTreeMap` which should make a few common operations more efficient. Let's see if that's true. r? @oli-obk

Auto merge of #50866 - michaelwoerister:relocations-in-vec, r=oli-obk
Use different datastructure for MIRI relocations This PR makes relocations in MIRI used a sorted vector instead of a `BTreeMap` which should make a few common operations more efficient. Let's see if that's true. r? @oli-obk
1962a70e · bors · c3733a77 · 95fac99a · 1962a70e · 1962a70e
5 changed file
--- a/src/librustc/mir/interpret/mod.rs
+++ b/src/librustc/mir/interpret/mod.rs
@@ -12,7 +12,6 @@

 pub use self::value::{PrimVal, PrimValKind, Value, Pointer, ConstValue};

-use std::collections::BTreeMap;
 use std::fmt;
 use mir;
 use hir::def_id::DefId;
@@ -21,9 +20,11 @@
 use middle::region;
 use std::iter;
 use std::io;
+use std::ops::{Deref, DerefMut};
 use std::hash::Hash;
 use syntax::ast::Mutability;
 use rustc_serialize::{Encoder, Decoder, Decodable, Encodable};
+use rustc_data_structures::sorted_map::SortedMap;
 use rustc_data_structures::fx::FxHashMap;
 use byteorder::{WriteBytesExt, ReadBytesExt, LittleEndian, BigEndian};

@@ -341,7 +342,7 @@ pub struct Allocation {
    pub bytes: Vec<u8>,
    /// Maps from byte addresses to allocations.
    /// Only the first byte of a pointer is inserted into the map.
-    pub relocations: BTreeMap<Size, AllocId>,
+    pub relocations: Relocations,
    /// Denotes undefined memory. Reading from undefined memory is forbidden in miri
    pub undef_mask: UndefMask,
    /// The alignment of the allocation to detect unaligned reads.
@@ -358,7 +359,7 @@ pub fn from_bytes(slice: &[u8], align: Align) -> Self {
        undef_mask.grow(Size::from_bytes(slice.len() as u64), true);
        Self {
            bytes: slice.to_owned(),
-            relocations: BTreeMap::new(),
+            relocations: Relocations::new(),
            undef_mask,
            align,
            runtime_mutability: Mutability::Immutable,
@@ -373,7 +374,7 @@ pub fn undef(size: Size, align: Align) -> Self {
        assert_eq!(size.bytes() as usize as u64, size.bytes());
        Allocation {
            bytes: vec![0; size.bytes() as usize],
-            relocations: BTreeMap::new(),
+            relocations: Relocations::new(),
            undef_mask: UndefMask::new(size),
            align,
            runtime_mutability: Mutability::Immutable,
@@ -383,6 +384,35 @@ pub fn undef(size: Size, align: Align) -> Self {

 impl<'tcx> ::serialize::UseSpecializedDecodable for &'tcx Allocation {}

+#[derive(Clone, PartialEq, Eq, Hash, Debug, RustcEncodable, RustcDecodable)]
+pub struct Relocations(SortedMap<Size, AllocId>);
+
+impl Relocations {
+    pub fn new() -> Relocations {
+        Relocations(SortedMap::new())
+    }
+
+    // The caller must guarantee that the given relocations are already sorted
+    // by address and contain no duplicates.
+    pub fn from_presorted(r: Vec<(Size, AllocId)>) -> Relocations {
+        Relocations(SortedMap::from_presorted_elements(r))
+    }
+}
+
+impl Deref for Relocations {
+    type Target = SortedMap<Size, AllocId>;
+
+    fn deref(&self) -> &Self::Target {
+        &self.0
+    }
+}
+
+impl DerefMut for Relocations {
+    fn deref_mut(&mut self) -> &mut Self::Target {
+        &mut self.0
+    }
+}
+
 ////////////////////////////////////////////////////////////////////////////////
 // Methods to access integers in the target endianness
 ////////////////////////////////////////////////////////////////////////////////

--- a/src/librustc_codegen_llvm/mir/constant.rs
+++ b/src/librustc_codegen_llvm/mir/constant.rs
@@ -83,7 +83,7 @@ pub fn const_alloc_to_llvm(cx: &CodegenCx, alloc: &Allocation) -> ValueRef {
    let pointer_size = layout.pointer_size.bytes() as usize;

    let mut next_offset = 0;
-    for (&offset, &alloc_id) in &alloc.relocations {
+    for &(offset, alloc_id) in alloc.relocations.iter() {
        let offset = offset.bytes();
        assert_eq!(offset as usize as u64, offset);
        let offset = offset as usize;

--- a/src/librustc_data_structures/lib.rs
+++ b/src/librustc_data_structures/lib.rs
@@ -73,6 +73,7 @@
 pub mod flock;
 pub mod sync;
 pub mod owning_ref;
+pub mod sorted_map;

 pub struct OnDrop<F: Fn()>(pub F);


--- a/src/librustc_data_structures/sorted_map.rs
+++ b/src/librustc_data_structures/sorted_map.rs
+// Copyright 2018 The Rust Project Developers. See the COPYRIGHT
+// file at the top-level directory of this distribution and at
+// http://rust-lang.org/COPYRIGHT.
+//
+// Licensed under the Apache License, Version 2.0 <LICENSE-APACHE or
+// http://www.apache.org/licenses/LICENSE-2.0> or the MIT license
+// <LICENSE-MIT or http://opensource.org/licenses/MIT>, at your
+// option. This file may not be copied, modified, or distributed
+// except according to those terms.
+
+use std::borrow::Borrow;
+use std::cmp::Ordering;
+use std::convert::From;
+use std::mem;
+use std::ops::{RangeBounds, Bound, Index, IndexMut};
+
+/// `SortedMap` is a data structure with similar characteristics as BTreeMap but
+/// slightly different trade-offs: lookup, inseration, and removal are O(log(N))
+/// and elements can be iterated in order cheaply.
+///
+/// `SortedMap` can be faster than a `BTreeMap` for small sizes (<50) since it
+/// stores data in a more compact way. It also supports accessing contiguous
+/// ranges of elements as a slice, and slices of already sorted elements can be
+/// inserted efficiently.
+#[derive(Clone, PartialEq, Eq, Hash, Default, Debug, RustcEncodable, RustcDecodable)]
+pub struct SortedMap<K: Ord, V> {
+    data: Vec<(K,V)>
+}
+
+impl<K: Ord, V> SortedMap<K, V> {
+
+    #[inline]
+    pub fn new() -> SortedMap<K, V> {
+        SortedMap {
+            data: vec![]
+        }
+    }
+
+    /// Construct a `SortedMap` from a presorted set of elements. This is faster
+    /// than creating an empty map and then inserting the elements individually.
+    ///
+    /// It is up to the caller to make sure that the elements are sorted by key
+    /// and that there are no duplicates.
+    #[inline]
+    pub fn from_presorted_elements(elements: Vec<(K, V)>) -> SortedMap<K, V>
+    {
+        debug_assert!(elements.windows(2).all(|w| w[0].0 < w[1].0));
+
+        SortedMap {
+            data: elements
+        }
+    }
+
+    #[inline]
+    pub fn insert(&mut self, key: K, mut value: V) -> Option<V> {
+        match self.lookup_index_for(&key) {
+            Ok(index) => {
+                let mut slot = unsafe {
+                    self.data.get_unchecked_mut(index)
+                };
+                mem::swap(&mut slot.1, &mut value);
+                Some(value)
+            }
+            Err(index) => {
+                self.data.insert(index, (key, value));
+                None
+            }
+        }
+    }
+
+    #[inline]
+    pub fn remove(&mut self, key: &K) -> Option<V> {
+        match self.lookup_index_for(key) {
+            Ok(index) => {
+                Some(self.data.remove(index).1)
+            }
+            Err(_) => {
+                None
+            }
+        }
+    }
+
+    #[inline]
+    pub fn get(&self, key: &K) -> Option<&V> {
+        match self.lookup_index_for(key) {
+            Ok(index) => {
+                unsafe {
+                    Some(&self.data.get_unchecked(index).1)
+                }
+            }
+            Err(_) => {
+                None
+            }
+        }
+    }
+
+    #[inline]
+    pub fn get_mut(&mut self, key: &K) -> Option<&mut V> {
+        match self.lookup_index_for(key) {
+            Ok(index) => {
+                unsafe {
+                    Some(&mut self.data.get_unchecked_mut(index).1)
+                }
+            }
+            Err(_) => {
+                None
+            }
+        }
+    }
+
+    #[inline]
+    pub fn clear(&mut self) {
+        self.data.clear();
+    }
+
+    /// Iterate over elements, sorted by key
+    #[inline]
+    pub fn iter(&self) -> ::std::slice::Iter<(K, V)> {
+        self.data.iter()
+    }
+
+    /// Iterate over the keys, sorted
+    #[inline]
+    pub fn keys(&self) -> impl Iterator<Item=&K> + ExactSizeIterator {
+        self.data.iter().map(|&(ref k, _)| k)
+    }
+
+    /// Iterate over values, sorted by key
+    #[inline]
+    pub fn values(&self) -> impl Iterator<Item=&V> + ExactSizeIterator {
+        self.data.iter().map(|&(_, ref v)| v)
+    }
+
+    #[inline]
+    pub fn len(&self) -> usize {
+        self.data.len()
+    }
+
+    #[inline]
+    pub fn range<R>(&self, range: R) -> &[(K, V)]
+        where R: RangeBounds<K>
+    {
+        let (start, end) = self.range_slice_indices(range);
+        (&self.data[start .. end])
+    }
+
+    #[inline]
+    pub fn remove_range<R>(&mut self, range: R)
+        where R: RangeBounds<K>
+    {
+        let (start, end) = self.range_slice_indices(range);
+        self.data.splice(start .. end, ::std::iter::empty());
+    }
+
+    /// Mutate all keys with the given function `f`. This mutation must not
+    /// change the sort-order of keys.
+    #[inline]
+    pub fn offset_keys<F>(&mut self, f: F)
+        where F: Fn(&mut K)
+    {
+        self.data.iter_mut().map(|&mut (ref mut k, _)| k).for_each(f);
+    }
+
+    /// Inserts a presorted range of elements into the map. If the range can be
+    /// inserted as a whole in between to existing elements of the map, this
+    /// will be faster than inserting the elements individually.
+    ///
+    /// It is up to the caller to make sure that the elements are sorted by key
+    /// and that there are no duplicates.
+    #[inline]
+    pub fn insert_presorted(&mut self, mut elements: Vec<(K, V)>) {
+        if elements.is_empty() {
+            return
+        }
+
+        debug_assert!(elements.windows(2).all(|w| w[0].0 < w[1].0));
+
+        let start_index = self.lookup_index_for(&elements[0].0);
+
+        let drain = match start_index {
+            Ok(index) => {
+                let mut drain = elements.drain(..);
+                self.data[index] = drain.next().unwrap();
+                drain
+            }
+            Err(index) => {
+                if index == self.data.len() ||
+                   elements.last().unwrap().0 < self.data[index].0 {
+                    // We can copy the whole range without having to mix with
+                    // existing elements.
+                    self.data.splice(index .. index, elements.drain(..));
+                    return
+                }
+
+                let mut drain = elements.drain(..);
+                self.data.insert(index, drain.next().unwrap());
+                drain
+            }
+        };
+
+        // Insert the rest
+        for (k, v) in drain {
+            self.insert(k, v);
+        }
+    }
+
+    /// Looks up the key in `self.data` via `slice::binary_search()`.
+    #[inline(always)]
+    fn lookup_index_for(&self, key: &K) -> Result<usize, usize> {
+        self.data.binary_search_by(|&(ref x, _)| x.cmp(key))
+    }
+
+    #[inline]
+    fn range_slice_indices<R>(&self, range: R) -> (usize, usize)
+        where R: RangeBounds<K>
+    {
+        let start = match range.start() {
+            Bound::Included(ref k) => {
+                match self.lookup_index_for(k) {
+                    Ok(index) | Err(index) => index
+                }
+            }
+            Bound::Excluded(ref k) => {
+                match self.lookup_index_for(k) {
+                    Ok(index) => index + 1,
+                    Err(index) => index,
+                }
+            }
+            Bound::Unbounded => 0,
+        };
+
+        let end = match range.end() {
+            Bound::Included(ref k) => {
+                match self.lookup_index_for(k) {
+                    Ok(index) => index + 1,
+                    Err(index) => index,
+                }
+            }
+            Bound::Excluded(ref k) => {
+                match self.lookup_index_for(k) {
+                    Ok(index) | Err(index) => index,
+                }
+            }
+            Bound::Unbounded => self.data.len(),
+        };
+
+        (start, end)
+    }
+}
+
+impl<K: Ord, V> IntoIterator for SortedMap<K, V> {
+    type Item = (K, V);
+    type IntoIter = ::std::vec::IntoIter<(K, V)>;
+    fn into_iter(self) -> Self::IntoIter {
+        self.data.into_iter()
+    }
+}
+
+impl<K: Ord, V, Q: Borrow<K>> Index<Q> for SortedMap<K, V> {
+    type Output = V;
+    fn index(&self, index: Q) -> &Self::Output {
+        let k: &K = index.borrow();
+        self.get(k).unwrap()
+    }
+}
+
+impl<K: Ord, V, Q: Borrow<K>> IndexMut<Q> for SortedMap<K, V> {
+    fn index_mut(&mut self, index: Q) -> &mut Self::Output {
+        let k: &K = index.borrow();
+        self.get_mut(k).unwrap()
+    }
+}
+
+impl<K: Ord, V, I: Iterator<Item=(K, V)>> From<I> for SortedMap<K, V> {
+    fn from(data: I) -> Self {
+        let mut data: Vec<(K, V)> = data.collect();
+        data.sort_unstable_by(|&(ref k1, _), &(ref k2, _)| k1.cmp(k2));
+        data.dedup_by(|&mut (ref k1, _), &mut (ref k2, _)| {
+            k1.cmp(k2) == Ordering::Equal
+        });
+        SortedMap {
+            data
+        }
+    }
+}
+
+#[cfg(test)]
+mod tests {
+    use super::SortedMap;
+
+    #[test]
+    fn test_insert_and_iter() {
+        let mut map = SortedMap::new();
+        let mut expected = Vec::new();
+
+        for x in 0 .. 100 {
+            assert_eq!(map.iter().cloned().collect::<Vec<_>>(), expected);
+
+            let x = 1000 - x * 2;
+            map.insert(x, x);
+            expected.insert(0, (x, x));
+        }
+    }
+
+    #[test]
+    fn test_get_and_index() {
+        let mut map = SortedMap::new();
+        let mut expected = Vec::new();
+
+        for x in 0 .. 100 {
+            let x = 1000 - x;
+            if x & 1 == 0 {
+                map.insert(x, x);
+            }
+            expected.push(x);
+        }
+
+        for mut x in expected {
+            if x & 1 == 0 {
+                assert_eq!(map.get(&x), Some(&x));
+                assert_eq!(map.get_mut(&x), Some(&mut x));
+                assert_eq!(map[&x], x);
+                assert_eq!(&mut map[&x], &mut x);
+            } else {
+                assert_eq!(map.get(&x), None);
+                assert_eq!(map.get_mut(&x), None);
+            }
+        }
+    }
+
+    #[test]
+    fn test_range() {
+        let mut map = SortedMap::new();
+        map.insert(1, 1);
+        map.insert(3, 3);
+        map.insert(6, 6);
+        map.insert(9, 9);
+
+        let keys = |s: &[(_, _)]| {
+            s.into_iter().map(|e| e.0).collect::<Vec<u32>>()
+        };
+
+        for start in 0 .. 11 {
+            for end in 0 .. 11 {
+                if end < start {
+                    continue
+                }
+
+                let mut expected = vec![1, 3, 6, 9];
+                expected.retain(|&x| x >= start && x < end);
+
+                assert_eq!(keys(map.range(start..end)), expected, "range = {}..{}", start, end);
+            }
+        }
+    }
+
+
+    #[test]
+    fn test_offset_keys() {
+        let mut map = SortedMap::new();
+        map.insert(1, 1);
+        map.insert(3, 3);
+        map.insert(6, 6);
+
+        map.offset_keys(|k| *k += 1);
+
+        let mut expected = SortedMap::new();
+        expected.insert(2, 1);
+        expected.insert(4, 3);
+        expected.insert(7, 6);
+
+        assert_eq!(map, expected);
+    }
+
+    fn keys(s: SortedMap<u32, u32>) -> Vec<u32> {
+        s.into_iter().map(|(k, _)| k).collect::<Vec<u32>>()
+    }
+
+    fn elements(s: SortedMap<u32, u32>) -> Vec<(u32, u32)> {
+        s.into_iter().collect::<Vec<(u32, u32)>>()
+    }
+
+    #[test]
+    fn test_remove_range() {
+        let mut map = SortedMap::new();
+        map.insert(1, 1);
+        map.insert(3, 3);
+        map.insert(6, 6);
+        map.insert(9, 9);
+
+        for start in 0 .. 11 {
+            for end in 0 .. 11 {
+                if end < start {
+                    continue
+                }
+
+                let mut expected = vec![1, 3, 6, 9];
+                expected.retain(|&x| x < start || x >= end);
+
+                let mut map = map.clone();
+                map.remove_range(start .. end);
+
+                assert_eq!(keys(map), expected, "range = {}..{}", start, end);
+            }
+        }
+    }
+
+    #[test]
+    fn test_remove() {
+        let mut map = SortedMap::new();
+        let mut expected = Vec::new();
+
+        for x in 0..10 {
+            map.insert(x, x);
+            expected.push((x, x));
+        }
+
+        for x in 0 .. 10 {
+            let mut map = map.clone();
+            let mut expected = expected.clone();
+
+            assert_eq!(map.remove(&x), Some(x));
+            expected.remove(x as usize);
+
+            assert_eq!(map.iter().cloned().collect::<Vec<_>>(), expected);
+        }
+    }
+
+    #[test]
+    fn test_insert_presorted_non_overlapping() {
+        let mut map = SortedMap::new();
+        map.insert(2, 0);
+        map.insert(8, 0);
+
+        map.insert_presorted(vec![(3, 0), (7, 0)]);
+
+        let expected = vec![2, 3, 7, 8];
+        assert_eq!(keys(map), expected);
+    }
+
+    #[test]
+    fn test_insert_presorted_first_elem_equal() {
+        let mut map = SortedMap::new();
+        map.insert(2, 2);
+        map.insert(8, 8);
+
+        map.insert_presorted(vec![(2, 0), (7, 7)]);
+
+        let expected = vec![(2, 0), (7, 7), (8, 8)];
+        assert_eq!(elements(map), expected);
+    }
+
+    #[test]
+    fn test_insert_presorted_last_elem_equal() {
+        let mut map = SortedMap::new();
+        map.insert(2, 2);
+        map.insert(8, 8);
+
+        map.insert_presorted(vec![(3, 3), (8, 0)]);
+
+        let expected = vec![(2, 2), (3, 3), (8, 0)];
+        assert_eq!(elements(map), expected);
+    }
+
+    #[test]
+    fn test_insert_presorted_shuffle() {
+        let mut map = SortedMap::new();
+        map.insert(2, 2);
+        map.insert(7, 7);
+
+        map.insert_presorted(vec![(1, 1), (3, 3), (8, 8)]);
+
+        let expected = vec![(1, 1), (2, 2), (3, 3), (7, 7), (8, 8)];
+        assert_eq!(elements(map), expected);
+    }
+
+    #[test]
+    fn test_insert_presorted_at_end() {
+        let mut map = SortedMap::new();
+        map.insert(1, 1);
+        map.insert(2, 2);
+
+        map.insert_presorted(vec![(3, 3), (8, 8)]);
+
+        let expected = vec![(1, 1), (2, 2), (3, 3), (8, 8)];
+        assert_eq!(elements(map), expected);
+    }
+}
--- a/src/librustc_mir/interpret/memory.rs
+++ b/src/librustc_mir/interpret/memory.rs
-use std::collections::{btree_map, VecDeque};
+use std::collections::VecDeque;
 use std::ptr;

 use rustc::hir::def_id::DefId;
@@ -519,7 +519,7 @@ fn get_bytes_unchecked_mut(

    fn get_bytes(&self, ptr: MemoryPointer, size: Size, align: Align) -> EvalResult<'tcx, &[u8]> {
        assert_ne!(size.bytes(), 0);
-        if self.relocations(ptr, size)?.count() != 0 {
+        if self.relocations(ptr, size)?.len() != 0 {
            return err!(ReadPointerAsBytes);
        }
        self.check_defined(ptr, size)?;
@@ -614,9 +614,9 @@ pub fn copy(
        // first copy the relocations to a temporary buffer, because
        // `get_bytes_mut` will clear the relocations, which is correct,
        // since we don't want to keep any relocations at the target.
-
        let relocations: Vec<_> = self.relocations(src, size)?
-            .map(|(&offset, &alloc_id)| {
+            .iter()
+            .map(|&(offset, alloc_id)| {
                // Update relocation offsets for the new positions in the destination allocation.
                (offset + dest.offset - src.offset, alloc_id)
            })
@@ -648,7 +648,7 @@ pub fn copy(

        self.copy_undef_mask(src, dest, size)?;
        // copy back the relocations
-        self.get_mut(dest.alloc_id)?.relocations.extend(relocations);
+        self.get_mut(dest.alloc_id)?.relocations.insert_presorted(relocations);

        Ok(())
    }
@@ -660,7 +660,7 @@ pub fn read_c_str(&self, ptr: MemoryPointer) -> EvalResult<'tcx, &[u8]> {
        match alloc.bytes[offset..].iter().position(|&c| c == 0) {
            Some(size) => {
                let p1 = Size::from_bytes((size + 1) as u64);
-                if self.relocations(ptr, p1)?.count() != 0 {
+                if self.relocations(ptr, p1)?.len() != 0 {
                    return err!(ReadPointerAsBytes);
                }
                self.check_defined(ptr, p1)?;
@@ -720,7 +720,7 @@ pub fn read_primval(&self, ptr: MemoryPointer, ptr_align: Align, size: Size) ->
        let bytes = read_target_uint(endianness, bytes).unwrap();
        // See if we got a pointer
        if size != self.pointer_size() {
-            if self.relocations(ptr, size)?.count() != 0 {
+            if self.relocations(ptr, size)?.len() != 0 {
                return err!(ReadPointerAsBytes);
            }
        } else {
@@ -808,24 +808,26 @@ fn relocations(
        &self,
        ptr: MemoryPointer,
        size: Size,
-    ) -> EvalResult<'tcx, btree_map::Range<Size, AllocId>> {
+    ) -> EvalResult<'tcx, &[(Size, AllocId)]> {
        let start = ptr.offset.bytes().saturating_sub(self.pointer_size().bytes() - 1);
        let end = ptr.offset + size;
        Ok(self.get(ptr.alloc_id)?.relocations.range(Size::from_bytes(start)..end))
    }

    fn clear_relocations(&mut self, ptr: MemoryPointer, size: Size) -> EvalResult<'tcx> {
-        // Find all relocations overlapping the given range.
-        let keys: Vec<_> = self.relocations(ptr, size)?.map(|(&k, _)| k).collect();
-        if keys.is_empty() {
-            return Ok(());
-        }
-
        // Find the start and end of the given range and its outermost relocations.
+        let (first, last) = {
+            // Find all relocations overlapping the given range.
+            let relocations = self.relocations(ptr, size)?;
+            if relocations.is_empty() {
+                return Ok(());
+            }
+
+            (relocations.first().unwrap().0,
+             relocations.last().unwrap().0 + self.pointer_size())
+        };
        let start = ptr.offset;
        let end = start + size;
-        let first = *keys.first().unwrap();
-        let last = *keys.last().unwrap() + self.pointer_size();

        let alloc = self.get_mut(ptr.alloc_id)?;

@@ -839,16 +841,14 @@ fn clear_relocations(&mut self, ptr: MemoryPointer, size: Size) -> EvalResult<'t
        }

        // Forget all the relocations.
-        for k in keys {
-            alloc.relocations.remove(&k);
-        }
+        alloc.relocations.remove_range(first ..= last);

        Ok(())
    }

    fn check_relocation_edges(&self, ptr: MemoryPointer, size: Size) -> EvalResult<'tcx> {
-        let overlapping_start = self.relocations(ptr, Size::from_bytes(0))?.count();
-        let overlapping_end = self.relocations(ptr.offset(size, self)?, Size::from_bytes(0))?.count();
+        let overlapping_start = self.relocations(ptr, Size::from_bytes(0))?.len();
+        let overlapping_end = self.relocations(ptr.offset(size, self)?, Size::from_bytes(0))?.len();
        if overlapping_start + overlapping_end != 0 {
            return err!(ReadPointerAsBytes);
        }