提交 9cf6fba9 编写于 作者: B bors

Auto merge of #31253 - ranma42:improve-unicode-iter-offset, r=brson

Improve computation of offset in `EscapeUnicode`

Unify the computation of `offset` and use `leading_zeros` instead of manually scanning the bits.
This PR removes some duplicated code and makes it a little simpler .
The computation of `offset` is also faster, but it is unlikely to have an impact on actual code.

(split from #31049)
...@@ -299,7 +299,20 @@ fn to_digit(self, radix: u32) -> Option<u32> { ...@@ -299,7 +299,20 @@ fn to_digit(self, radix: u32) -> Option<u32> {
#[inline] #[inline]
fn escape_unicode(self) -> EscapeUnicode { fn escape_unicode(self) -> EscapeUnicode {
EscapeUnicode { c: self, state: EscapeUnicodeState::Backslash } let c = self as u32;
// or-ing 1 ensures that for c==0 the code computes that one
// digit should be printed and (which is the same) avoids the
// (31 - 32) underflow
let msb = 31 - (c | 1).leading_zeros();
// the index of the most significant hex digit
let ms_hex_digit = msb / 4;
EscapeUnicode {
c: self,
state: EscapeUnicodeState::Backslash,
hex_digit_idx: ms_hex_digit as usize,
}
} }
#[inline] #[inline]
...@@ -392,7 +405,12 @@ fn encode_utf16(self) -> EncodeUtf16 { ...@@ -392,7 +405,12 @@ fn encode_utf16(self) -> EncodeUtf16 {
#[stable(feature = "rust1", since = "1.0.0")] #[stable(feature = "rust1", since = "1.0.0")]
pub struct EscapeUnicode { pub struct EscapeUnicode {
c: char, c: char,
state: EscapeUnicodeState state: EscapeUnicodeState,
// The index of the next hex digit to be printed (0 if none),
// i.e. the number of remaining hex digits to be printed;
// increasing from the least significant digit: 0x543210
hex_digit_idx: usize,
} }
#[derive(Clone, Debug)] #[derive(Clone, Debug)]
...@@ -400,7 +418,7 @@ enum EscapeUnicodeState { ...@@ -400,7 +418,7 @@ enum EscapeUnicodeState {
Backslash, Backslash,
Type, Type,
LeftBrace, LeftBrace,
Value(usize), Value,
RightBrace, RightBrace,
Done, Done,
} }
...@@ -420,19 +438,16 @@ fn next(&mut self) -> Option<char> { ...@@ -420,19 +438,16 @@ fn next(&mut self) -> Option<char> {
Some('u') Some('u')
} }
EscapeUnicodeState::LeftBrace => { EscapeUnicodeState::LeftBrace => {
let mut n = 0; self.state = EscapeUnicodeState::Value;
while (self.c as u32) >> (4 * (n + 1)) != 0 {
n += 1;
}
self.state = EscapeUnicodeState::Value(n);
Some('{') Some('{')
} }
EscapeUnicodeState::Value(offset) => { EscapeUnicodeState::Value => {
let c = from_digit(((self.c as u32) >> (offset * 4)) & 0xf, 16).unwrap(); let hex_digit = ((self.c as u32) >> (self.hex_digit_idx * 4)) & 0xf;
if offset == 0 { let c = from_digit(hex_digit, 16).unwrap();
if self.hex_digit_idx == 0 {
self.state = EscapeUnicodeState::RightBrace; self.state = EscapeUnicodeState::RightBrace;
} else { } else {
self.state = EscapeUnicodeState::Value(offset - 1); self.hex_digit_idx -= 1;
} }
Some(c) Some(c)
} }
...@@ -445,18 +460,15 @@ fn next(&mut self) -> Option<char> { ...@@ -445,18 +460,15 @@ fn next(&mut self) -> Option<char> {
} }
fn size_hint(&self) -> (usize, Option<usize>) { fn size_hint(&self) -> (usize, Option<usize>) {
let mut n = 0;
while (self.c as usize) >> (4 * (n + 1)) != 0 {
n += 1;
}
let n = match self.state { let n = match self.state {
EscapeUnicodeState::Backslash => n + 5, EscapeUnicodeState::Backslash => 5,
EscapeUnicodeState::Type => n + 4, EscapeUnicodeState::Type => 4,
EscapeUnicodeState::LeftBrace => n + 3, EscapeUnicodeState::LeftBrace => 3,
EscapeUnicodeState::Value(offset) => offset + 2, EscapeUnicodeState::Value => 2,
EscapeUnicodeState::RightBrace => 1, EscapeUnicodeState::RightBrace => 1,
EscapeUnicodeState::Done => 0, EscapeUnicodeState::Done => 0,
}; };
let n = n + self.hex_digit_idx;
(n, Some(n)) (n, Some(n))
} }
} }
......
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册