提交 b5392f54 编写于 作者: B bors

Auto merge of #47208 - Manishearth:double-ended-searcher, r=pnkfelix

Make double ended searchers use dependent fingers

(fixes #47175)

r? @burntsushi @alexcrichton

needs uplift to beta
......@@ -284,7 +284,7 @@ fn haystack(&self) -> &'a str {
#[inline]
fn next(&mut self) -> SearchStep {
let old_finger = self.finger;
let slice = unsafe { self.haystack.get_unchecked(old_finger..self.haystack.len()) };
let slice = unsafe { self.haystack.get_unchecked(old_finger..self.finger_back) };
let mut iter = slice.chars();
let old_len = iter.iter.len();
if let Some(ch) = iter.next() {
......@@ -304,7 +304,8 @@ fn next(&mut self) -> SearchStep {
fn next_match(&mut self) -> Option<(usize, usize)> {
loop {
// get the haystack after the last character found
let bytes = if let Some(slice) = self.haystack.as_bytes().get(self.finger..) {
let bytes = if let Some(slice) = self.haystack.as_bytes()
.get(self.finger..self.finger_back) {
slice
} else {
return None;
......@@ -340,7 +341,7 @@ fn next_match(&mut self) -> Option<(usize, usize)> {
}
} else {
// found nothing, exit
self.finger = self.haystack.len();
self.finger = self.finger_back;
return None;
}
}
......@@ -353,7 +354,7 @@ unsafe impl<'a> ReverseSearcher<'a> for CharSearcher<'a> {
#[inline]
fn next_back(&mut self) -> SearchStep {
let old_finger = self.finger_back;
let slice = unsafe { self.haystack.slice_unchecked(0, old_finger) };
let slice = unsafe { self.haystack.slice_unchecked(self.finger, old_finger) };
let mut iter = slice.chars();
let old_len = iter.iter.len();
if let Some(ch) = iter.next_back() {
......@@ -374,7 +375,7 @@ fn next_match_back(&mut self) -> Option<(usize, usize)> {
let haystack = self.haystack.as_bytes();
loop {
// get the haystack up to but not including the last character searched
let bytes = if let Some(slice) = haystack.get(..self.finger_back) {
let bytes = if let Some(slice) = haystack.get(self.finger..self.finger_back) {
slice
} else {
return None;
......@@ -382,6 +383,9 @@ fn next_match_back(&mut self) -> Option<(usize, usize)> {
// the last byte of the utf8 encoded needle
let last_byte = unsafe { *self.utf8_encoded.get_unchecked(self.utf8_size - 1) };
if let Some(index) = memchr::memrchr(last_byte, bytes) {
// we searched a slice that was offset by self.finger,
// add self.finger to recoup the original index
let index = self.finger + index;
// memrchr will return the index of the byte we wish to
// find. In case of an ASCII character, this is indeed
// were we wish our new finger to be ("after" the found
......@@ -412,7 +416,7 @@ fn next_match_back(&mut self) -> Option<(usize, usize)> {
// found the last byte when searching in reverse.
self.finger_back = index;
} else {
self.finger_back = 0;
self.finger_back = self.finger;
// found nothing, exit
return None;
}
......
......@@ -262,3 +262,41 @@ fn test_reverse_search_shared_bytes() {
[InRange(37, 40), Rejects(34, 37), InRange(10, 13), Rejects(8, 10), Done]
);
}
#[test]
fn double_ended_regression_test() {
// https://github.com/rust-lang/rust/issues/47175
// Ensures that double ended searching comes to a convergence
search_asserts!("abcdeabcdeabcde", 'a', "alternating double ended search",
[next_match, next_match_back, next_match, next_match_back],
[InRange(0, 1), InRange(10, 11), InRange(5, 6), Done]
);
search_asserts!("abcdeabcdeabcde", 'a', "triple double ended search for a",
[next_match, next_match_back, next_match_back, next_match_back],
[InRange(0, 1), InRange(10, 11), InRange(5, 6), Done]
);
search_asserts!("abcdeabcdeabcde", 'd', "triple double ended search for d",
[next_match, next_match_back, next_match_back, next_match_back],
[InRange(3, 4), InRange(13, 14), InRange(8, 9), Done]
);
search_asserts!(STRESS, 'Á', "Double ended search for two-byte Latin character",
[next_match, next_match_back, next_match, next_match_back],
[InRange(0, 2), InRange(32, 34), InRange(8, 10), Done]
);
search_asserts!(STRESS, '각', "Reverse double ended search for three-byte Hangul character",
[next_match_back, next_back, next_match, next, next_match_back, next_match],
[InRange(34, 37), Rejects(32, 34), InRange(19, 22), Rejects(22, 25), InRange(28, 31), Done]
);
search_asserts!(STRESS, 'ก', "Double ended search for three-byte Thai character",
[next_match, next_back, next, next_match_back, next_match],
[InRange(22, 25), Rejects(47, 48), Rejects(25, 28), InRange(40, 43), Done]
);
search_asserts!(STRESS, '😁', "Double ended search for four-byte emoji",
[next_match_back, next, next_match, next_back, next_match],
[InRange(43, 47), Rejects(0, 2), InRange(15, 19), Rejects(40, 43), Done]
);
search_asserts!(STRESS, 'ꁁ', "Double ended search for three-byte Yi character with repeated bytes",
[next_match, next, next_match_back, next_back, next_match],
[InRange(10, 13), Rejects(13, 14), InRange(37, 40), Rejects(34, 37), Done]
);
}
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册