From 4de9bca4d813b88f06f7c5f64d2c452c80611f6b Mon Sep 17 00:00:00 2001 From: blake2-ppc Date: Mon, 26 Aug 2013 11:48:49 +0200 Subject: [PATCH] std::str: Tune CharSplitIterator after benchmarks Implement Huon Wilson's suggestions (since the benchmarks agree!). Use `self.sep.matches(byte as char) && byte < 128u8` to match in the only_ascii case so that mistaken matches outside the ascii range can't create invalid substrings. Put the conditional on only_ascii outside the loop. --- src/libstd/str.rs | 99 +++++++++++++++++++++-------------------------- 1 file changed, 44 insertions(+), 55 deletions(-) diff --git a/src/libstd/str.rs b/src/libstd/str.rs index 057d09bd665..431ce7fb6fe 100644 --- a/src/libstd/str.rs +++ b/src/libstd/str.rs @@ -21,7 +21,6 @@ use char::Char; use clone::{Clone, DeepClone}; use container::{Container, Mutable}; -use either::{Left, Right}; use iter::Times; use iterator::{Iterator, FromIterator, Extendable}; use iterator::{Filter, AdditiveIterator, Map}; @@ -411,36 +410,30 @@ impl<'self, Sep: CharEq> Iterator<&'self str> for CharSplitIterator<'self, Sep> fn next(&mut self) -> Option<&'self str> { if self.finished { return None } - let len = self.string.len(); - let mut iter = match self.only_ascii { - true => Left(self.string.byte_iter().enumerate()), - false => Right(self.string.char_offset_iter()) - }; - - loop { - let (idx, next) = match iter { - // this gives a *huge* speed up for splitting on ASCII - // characters (e.g. '\n' or ' ') - Left(ref mut it) => match it.next() { - Some((idx, byte)) if byte < 128u8 && self.sep.matches(byte as char) => - (idx, idx + 1), - Some(*) => loop, - None => break, - }, - Right(ref mut it) => match it.next() { - Some((idx, ch)) if self.sep.matches(ch) => - (idx, self.string.char_range_at(idx).next), - Some(*) => loop, - None => break, + let mut next_split = None; + if self.only_ascii { + for (idx, byte) in self.string.byte_iter().enumerate() { + if self.sep.matches(byte as char) && byte < 128u8 { + next_split = Some((idx, idx + 1)); + break; } - }; - unsafe { - let elt = raw::slice_bytes(self.string, 0, idx); - self.string = raw::slice_bytes(self.string, next, len); - return Some(elt) } + } else { + for (idx, ch) in self.string.char_offset_iter() { + if self.sep.matches(ch) { + next_split = Some((idx, self.string.char_range_at(idx).next)); + break; + } + } + } + match next_split { + Some((a, b)) => unsafe { + let elt = raw::slice_unchecked(self.string, 0, a); + self.string = raw::slice_unchecked(self.string, b, self.string.len()); + Some(elt) + }, + None => self.get_end(), } - self.get_end() } } @@ -458,36 +451,32 @@ fn next_back(&mut self) -> Option<&'self str> { } } let len = self.string.len(); - let mut iter = match self.only_ascii { - true => Left(self.string.byte_rev_iter().enumerate()), - false => Right(self.string.char_offset_iter()) - }; - - loop { - let (idx, next) = match iter { - Left(ref mut it) => match it.next() { - Some((j, byte)) if byte < 128u8 && self.sep.matches(byte as char) => { - let idx = self.string.len() - j - 1; - (idx, idx + 1) - }, - Some(*) => loop, - None => break, - }, - Right(ref mut it) => match it.next_back() { - Some((idx, ch)) if self.sep.matches(ch) => - (idx, self.string.char_range_at(idx).next), - Some(*) => loop, - None => break, + let mut next_split = None; + + if self.only_ascii { + for (j, byte) in self.string.byte_rev_iter().enumerate() { + if self.sep.matches(byte as char) && byte < 128u8 { + let idx = len - j - 1; + next_split = Some((idx, idx + 1)); + break; } - }; - unsafe { - let elt = raw::slice_bytes(self.string, next, len); - self.string = raw::slice_bytes(self.string, 0, idx); - return Some(elt) } + } else { + for (idx, ch) in self.string.char_offset_rev_iter() { + if self.sep.matches(ch) { + next_split = Some((idx, self.string.char_range_at(idx).next)); + break; + } + } + } + match next_split { + Some((a, b)) => unsafe { + let elt = raw::slice_unchecked(self.string, b, len); + self.string = raw::slice_unchecked(self.string, 0, a); + Some(elt) + }, + None => { self.finished = true; Some(self.string) } } - self.finished = true; - Some(self.string) } } -- GitLab