std::str: Tune CharSplitIterator after benchmarks

Implement Huon Wilson's suggestions (since the benchmarks agree!). Use `self.sep.matches(byte as char) && byte < 128u8` to match in the only_ascii case so that mistaken matches outside the ascii range can't create invalid substrings. Put the conditional on only_ascii outside the loop.

std::str: Tune CharSplitIterator after benchmarks
Implement Huon Wilson's suggestions (since the benchmarks agree!). Use `self.sep.matches(byte as char) && byte < 128u8` to match in the only_ascii case so that mistaken matches outside the ascii range can't create invalid substrings. Put the conditional on only_ascii outside the loop.
4de9bca4 · blake2-ppc · 413f8682 · 4de9bca4
隐藏空白更改
内联并排

Showing with 44 addition and 55 deletion

src/libstd/str.rs src/libstd/str.rs +44 -55

未找到文件。
--- a/src/libstd/str.rs
+++ b/src/libstd/str.rs
@@ -21,7 +21,6 @@
 use char::Char;
 use clone::{Clone, DeepClone};
 use container::{Container, Mutable};
-use either::{Left, Right};
 use iter::Times;
 use iterator::{Iterator, FromIterator, Extendable};
 use iterator::{Filter, AdditiveIterator, Map};
@@ -411,36 +410,30 @@ impl<'self, Sep: CharEq> Iterator<&'self str> for CharSplitIterator<'self, Sep>
    fn next(&mut self) -> Option<&'self str> {
        if self.finished { return None }

-        let len = self.string.len();
-        let mut iter = match self.only_ascii {
-            true => Left(self.string.byte_iter().enumerate()),
-            false => Right(self.string.char_offset_iter())
-        };
-
-        loop {
-            let (idx, next) = match iter {
-                // this gives a *huge* speed up for splitting on ASCII
-                // characters (e.g. '\n' or ' ')
-                Left(ref mut it) => match it.next() {
-                    Some((idx, byte)) if byte < 128u8 && self.sep.matches(byte as char) =>
-                        (idx, idx + 1),
-                    Some(*) => loop,
-                    None => break,
-                },
-                Right(ref mut it) => match it.next() {
-                    Some((idx, ch)) if self.sep.matches(ch) =>
-                        (idx, self.string.char_range_at(idx).next),
-                    Some(*) => loop,
-                    None => break,
+        let mut next_split = None;
+        if self.only_ascii {
+            for (idx, byte) in self.string.byte_iter().enumerate() {
+                if self.sep.matches(byte as char) && byte < 128u8 {
+                    next_split = Some((idx, idx + 1));
+                    break;
                }
-            };
-            unsafe {
-                let elt = raw::slice_bytes(self.string, 0, idx);
-                self.string = raw::slice_bytes(self.string, next, len);
-                return Some(elt)
            }
+        } else {
+            for (idx, ch) in self.string.char_offset_iter() {
+                if self.sep.matches(ch) {
+                    next_split = Some((idx, self.string.char_range_at(idx).next));
+                    break;
+                }
+            }
+        }
+        match next_split {
+            Some((a, b)) => unsafe {
+                let elt = raw::slice_unchecked(self.string, 0, a);
+                self.string = raw::slice_unchecked(self.string, b, self.string.len());
+                Some(elt)
+            },
+            None => self.get_end(),
        }
-        self.get_end()
    }
 }

@@ -458,36 +451,32 @@ fn next_back(&mut self) -> Option<&'self str> {
            }
        }
        let len = self.string.len();
-        let mut iter = match self.only_ascii {
-            true => Left(self.string.byte_rev_iter().enumerate()),
-            false => Right(self.string.char_offset_iter())
-        };
-
-        loop {
-            let (idx, next) = match iter {
-                Left(ref mut it) => match it.next() {
-                    Some((j, byte)) if byte < 128u8 && self.sep.matches(byte as char) => {
-                        let idx = self.string.len() - j - 1;
-                        (idx, idx + 1)
-                    },
-                    Some(*) => loop,
-                    None => break,
-                },
-                Right(ref mut it) => match it.next_back() {
-                    Some((idx, ch)) if self.sep.matches(ch) =>
-                        (idx, self.string.char_range_at(idx).next),
-                    Some(*) => loop,
-                    None => break,
+        let mut next_split = None;
+
+        if self.only_ascii {
+            for (j, byte) in self.string.byte_rev_iter().enumerate() {
+                if self.sep.matches(byte as char) && byte < 128u8 {
+                    let idx = len - j - 1;
+                    next_split = Some((idx, idx + 1));
+                    break;
                }
-            };
-            unsafe {
-                let elt = raw::slice_bytes(self.string, next, len);
-                self.string = raw::slice_bytes(self.string, 0, idx);
-                return Some(elt)
            }
+        } else {
+            for (idx, ch) in self.string.char_offset_rev_iter() {
+                if self.sep.matches(ch) {
+                    next_split = Some((idx, self.string.char_range_at(idx).next));
+                    break;
+                }
+            }
+        }
+        match next_split {
+            Some((a, b)) => unsafe {
+                let elt = raw::slice_unchecked(self.string, b, len);
+                self.string = raw::slice_unchecked(self.string, 0, a);
+                Some(elt)
+            },
+            None => { self.finished = true; Some(self.string) }
        }
-        self.finished = true;
-        Some(self.string)
    }
 }