提交 1f147a2e 编写于 作者: J Julian Wollersberger

Replace `nth_char(0)` with `next()` in `cursor.first()`

and optimize the iterator returned by `tokenize().

This improves lexer performance by 35%
上级 72d66064
......@@ -2,10 +2,11 @@
/// Peekable iterator over a char sequence.
///
/// Next characters can be peeked via `nth_char` method,
/// Next characters can be peeked via `first` method,
/// and position can be shifted forward via `bump` method.
pub(crate) struct Cursor<'a> {
initial_len: usize,
/// Iterator over chars. Slightly faster than a &str.
chars: Chars<'a>,
#[cfg(debug_assertions)]
prev: char,
......@@ -37,22 +38,21 @@ pub(crate) fn prev(&self) -> char {
}
}
/// Returns nth character relative to the current cursor position.
/// Peeks the next symbol from the input stream without consuming it.
/// If requested position doesn't exist, `EOF_CHAR` is returned.
/// However, getting `EOF_CHAR` doesn't always mean actual end of file,
/// it should be checked with `is_eof` method.
fn nth_char(&self, n: usize) -> char {
self.chars().nth(n).unwrap_or(EOF_CHAR)
}
/// Peeks the next symbol from the input stream without consuming it.
pub(crate) fn first(&self) -> char {
self.nth_char(0)
// `.next()` optimizes better than `.nth(0)`
self.chars.clone().next().unwrap_or(EOF_CHAR)
}
/// Peeks the second symbol from the input stream without consuming it.
pub(crate) fn second(&self) -> char {
self.nth_char(1)
// `.next()` optimizes better than `.nth(1)`
let mut iter = self.chars.clone();
iter.next();
iter.next().unwrap_or(EOF_CHAR)
}
/// Checks if there is nothing more to consume.
......@@ -65,9 +65,9 @@ pub(crate) fn len_consumed(&self) -> usize {
self.initial_len - self.chars.as_str().len()
}
/// Returns a `Chars` iterator over the remaining characters.
fn chars(&self) -> Chars<'a> {
self.chars.clone()
/// Resets the number of bytes consumed to 0.
pub(crate) fn reset_len_consumed(&mut self) {
self.initial_len = self.chars.as_str().len();
}
/// Moves to the next character.
......@@ -81,4 +81,13 @@ pub(crate) fn bump(&mut self) -> Option<char> {
Some(c)
}
/// Eats symbols while predicate returns true or until the end of file is reached.
pub(crate) fn eat_while(&mut self, mut predicate: impl FnMut(char) -> bool) {
// It was tried making optimized version of this for eg. line comments, but
// LLVM can inline all of this and compile it down to fast iteration over bytes.
while predicate(self.first()) && !self.is_eof() {
self.bump();
}
}
}
......@@ -225,14 +225,15 @@ pub fn first_token(input: &str) -> Token {
}
/// Creates an iterator that produces tokens from the input string.
pub fn tokenize(mut input: &str) -> impl Iterator<Item = Token> + '_ {
pub fn tokenize(input: &str) -> impl Iterator<Item = Token> + '_ {
let mut cursor = Cursor::new(input);
std::iter::from_fn(move || {
if input.is_empty() {
return None;
if cursor.is_eof() {
None
} else {
cursor.reset_len_consumed();
Some(cursor.advance_token())
}
let token = first_token(input);
input = &input[token.len..];
Some(token)
})
}
......@@ -808,11 +809,4 @@ fn eat_identifier(&mut self) {
self.eat_while(is_id_continue);
}
/// Eats symbols while predicate returns true or until the end of file is reached.
fn eat_while(&mut self, mut predicate: impl FnMut(char) -> bool) {
while predicate(self.first()) && !self.is_eof() {
self.bump();
}
}
}
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册