未验证 提交 bc335d6c 编写于 作者: M Mazdak Farrokhzad 提交者: GitHub

Rollup merge of #62124 - matklad:without-with, r=petrochenkov

refactor lexer to use idiomatic borrowing
use crate::ast;
use crate::parse::ParseSess;
use crate::parse::token::{self, Token, TokenKind};
use crate::symbol::{sym, Symbol};
......@@ -321,33 +320,29 @@ fn src_index(&self, pos: BytePos) -> usize {
(pos - self.source_file.start_pos).to_usize()
}
/// Calls `f` with a string slice of the source text spanning from `start`
/// up to but excluding `self.pos`, meaning the slice does not include
/// the character `self.ch`.
fn with_str_from<T, F>(&self, start: BytePos, f: F) -> T
where F: FnOnce(&str) -> T
/// Slice of the source text from `start` up to but excluding `self.pos`,
/// meaning the slice does not include the character `self.ch`.
fn str_from(&self, start: BytePos) -> &str
{
self.with_str_from_to(start, self.pos, f)
self.str_from_to(start, self.pos)
}
/// Creates a Name from a given offset to the current offset.
fn name_from(&self, start: BytePos) -> ast::Name {
/// Creates a Symbol from a given offset to the current offset.
fn symbol_from(&self, start: BytePos) -> Symbol {
debug!("taking an ident from {:?} to {:?}", start, self.pos);
self.with_str_from(start, Symbol::intern)
Symbol::intern(self.str_from(start))
}
/// As name_from, with an explicit endpoint.
fn name_from_to(&self, start: BytePos, end: BytePos) -> ast::Name {
/// As symbol_from, with an explicit endpoint.
fn symbol_from_to(&self, start: BytePos, end: BytePos) -> Symbol {
debug!("taking an ident from {:?} to {:?}", start, end);
self.with_str_from_to(start, end, Symbol::intern)
Symbol::intern(self.str_from_to(start, end))
}
/// Calls `f` with a string slice of the source text spanning from `start`
/// up to but excluding `end`.
fn with_str_from_to<T, F>(&self, start: BytePos, end: BytePos, f: F) -> T
where F: FnOnce(&str) -> T
/// Slice of the source text spanning from `start` up to but excluding `end`.
fn str_from_to(&self, start: BytePos, end: BytePos) -> &str
{
f(&self.src[self.src_index(start)..self.src_index(end)])
&self.src[self.src_index(start)..self.src_index(end)]
}
/// Converts CRLF to LF in the given string, raising an error on bare CR.
......@@ -444,7 +439,7 @@ fn nextnextch_is(&self, c: char) -> bool {
}
/// Eats <XID_start><XID_continue>*, if possible.
fn scan_optional_raw_name(&mut self) -> Option<ast::Name> {
fn scan_optional_raw_name(&mut self) -> Option<Symbol> {
if !ident_start(self.ch) {
return None;
}
......@@ -456,8 +451,8 @@ fn scan_optional_raw_name(&mut self) -> Option<ast::Name> {
self.bump();
}
self.with_str_from(start, |string| {
if string == "_" {
match self.str_from(start) {
"_" => {
self.sess.span_diagnostic
.struct_span_warn(self.mk_sp(start, self.pos),
"underscore literal suffix is not allowed")
......@@ -468,10 +463,9 @@ fn scan_optional_raw_name(&mut self) -> Option<ast::Name> {
<https://github.com/rust-lang/rust/issues/42326>")
.emit();
None
} else {
Some(Symbol::intern(string))
}
})
name => Some(Symbol::intern(name))
}
}
/// PRECONDITION: self.ch is not whitespace
......@@ -513,9 +507,7 @@ fn scan_comment(&mut self) -> Option<Token> {
}
let kind = if doc_comment {
self.with_str_from(start_bpos, |string| {
token::DocComment(Symbol::intern(string))
})
token::DocComment(self.symbol_from(start_bpos))
} else {
token::Comment
};
......@@ -544,7 +536,7 @@ fn scan_comment(&mut self) -> Option<Token> {
self.bump();
}
return Some(Token::new(
token::Shebang(self.name_from(start)),
token::Shebang(self.symbol_from(start)),
self.mk_sp(start, self.pos),
));
}
......@@ -615,23 +607,22 @@ fn scan_block_comment(&mut self) -> Option<Token> {
self.bump();
}
self.with_str_from(start_bpos, |string| {
// but comments with only "*"s between two "/"s are not
let kind = if is_block_doc_comment(string) {
let string = if has_cr {
self.translate_crlf(start_bpos,
string,
"bare CR not allowed in block doc-comment")
} else {
string.into()
};
token::DocComment(Symbol::intern(&string[..]))
let string = self.str_from(start_bpos);
// but comments with only "*"s between two "/"s are not
let kind = if is_block_doc_comment(string) {
let string = if has_cr {
self.translate_crlf(start_bpos,
string,
"bare CR not allowed in block doc-comment")
} else {
token::Comment
string.into()
};
token::DocComment(Symbol::intern(&string[..]))
} else {
token::Comment
};
Some(Token::new(kind, self.mk_sp(start_bpos, self.pos)))
})
Some(Token::new(kind, self.mk_sp(start_bpos, self.pos)))
}
/// Scan through any digits (base `scan_radix`) or underscores,
......@@ -727,17 +718,17 @@ fn scan_number(&mut self, c: char) -> (token::LitKind, Symbol) {
let pos = self.pos;
self.check_float_base(start_bpos, pos, base);
(token::Float, self.name_from(start_bpos))
(token::Float, self.symbol_from(start_bpos))
} else {
// it might be a float if it has an exponent
if self.ch_is('e') || self.ch_is('E') {
self.scan_float_exponent();
let pos = self.pos;
self.check_float_base(start_bpos, pos, base);
return (token::Float, self.name_from(start_bpos));
return (token::Float, self.symbol_from(start_bpos));
}
// but we certainly have an integer!
(token::Integer, self.name_from(start_bpos))
(token::Integer, self.symbol_from(start_bpos))
}
}
......@@ -838,20 +829,17 @@ fn next_token_inner(&mut self) -> Result<TokenKind, ()> {
self.bump();
}
return Ok(self.with_str_from(start, |string| {
// FIXME: perform NFKC normalization here. (Issue #2253)
let name = ast::Name::intern(string);
if is_raw_ident {
let span = self.mk_sp(raw_start, self.pos);
if !name.can_be_raw() {
self.err_span(span, &format!("`{}` cannot be a raw identifier", name));
}
self.sess.raw_identifier_spans.borrow_mut().push(span);
// FIXME: perform NFKC normalization here. (Issue #2253)
let name = self.symbol_from(start);
if is_raw_ident {
let span = self.mk_sp(raw_start, self.pos);
if !name.can_be_raw() {
self.err_span(span, &format!("`{}` cannot be a raw identifier", name));
}
self.sess.raw_identifier_spans.borrow_mut().push(span);
}
token::Ident(name, is_raw_ident)
}));
return Ok(token::Ident(name, is_raw_ident));
}
}
......@@ -1017,7 +1005,7 @@ fn next_token_inner(&mut self) -> Result<TokenKind, ()> {
// lifetimes shouldn't end with a single quote
// if we find one, then this is an invalid character literal
if self.ch_is('\'') {
let symbol = self.name_from(start);
let symbol = self.symbol_from(start);
self.bump();
self.validate_char_escape(start_with_quote);
return Ok(TokenKind::lit(token::Char, symbol, None));
......@@ -1035,7 +1023,7 @@ fn next_token_inner(&mut self) -> Result<TokenKind, ()> {
// Include the leading `'` in the real identifier, for macro
// expansion purposes. See #12512 for the gory details of why
// this is necessary.
return Ok(token::Lifetime(self.name_from(start_with_quote)));
return Ok(token::Lifetime(self.symbol_from(start_with_quote)));
}
let msg = "unterminated character literal";
let symbol = self.scan_single_quoted_string(start_with_quote, msg);
......@@ -1063,7 +1051,7 @@ fn next_token_inner(&mut self) -> Result<TokenKind, ()> {
},
Some('r') => {
let (start, end, hash_count) = self.scan_raw_string();
let symbol = self.name_from_to(start, end);
let symbol = self.symbol_from_to(start, end);
self.validate_raw_byte_str_escape(start, end);
(token::ByteStrRaw(hash_count), symbol)
......@@ -1084,7 +1072,7 @@ fn next_token_inner(&mut self) -> Result<TokenKind, ()> {
}
'r' => {
let (start, end, hash_count) = self.scan_raw_string();
let symbol = self.name_from_to(start, end);
let symbol = self.symbol_from_to(start, end);
self.validate_raw_str_escape(start, end);
let suffix = self.scan_optional_raw_name();
......@@ -1185,7 +1173,7 @@ fn peeking_at_comment(&self) -> bool {
fn scan_single_quoted_string(&mut self,
start_with_quote: BytePos,
unterminated_msg: &str) -> ast::Name {
unterminated_msg: &str) -> Symbol {
// assumes that first `'` is consumed
let start = self.pos;
// lex `'''` as a single char, for recovery
......@@ -1217,12 +1205,12 @@ fn scan_single_quoted_string(&mut self,
}
}
let id = self.name_from(start);
let id = self.symbol_from(start);
self.bump();
id
}
fn scan_double_quoted_string(&mut self, unterminated_msg: &str) -> ast::Name {
fn scan_double_quoted_string(&mut self, unterminated_msg: &str) -> Symbol {
debug_assert!(self.ch_is('\"'));
let start_with_quote = self.pos;
self.bump();
......@@ -1237,7 +1225,7 @@ fn scan_double_quoted_string(&mut self, unterminated_msg: &str) -> ast::Name {
}
self.bump();
}
let id = self.name_from(start);
let id = self.symbol_from(start);
self.bump();
id
}
......@@ -1300,101 +1288,95 @@ fn scan_raw_string(&mut self) -> (BytePos, BytePos, u16) {
}
fn validate_char_escape(&self, start_with_quote: BytePos) {
self.with_str_from_to(start_with_quote + BytePos(1), self.pos - BytePos(1), |lit| {
if let Err((off, err)) = unescape::unescape_char(lit) {
let lit = self.str_from_to(start_with_quote + BytePos(1), self.pos - BytePos(1));
if let Err((off, err)) = unescape::unescape_char(lit) {
emit_unescape_error(
&self.sess.span_diagnostic,
lit,
self.mk_sp(start_with_quote, self.pos),
unescape::Mode::Char,
0..off,
err,
)
}
}
fn validate_byte_escape(&self, start_with_quote: BytePos) {
let lit = self.str_from_to(start_with_quote + BytePos(1), self.pos - BytePos(1));
if let Err((off, err)) = unescape::unescape_byte(lit) {
emit_unescape_error(
&self.sess.span_diagnostic,
lit,
self.mk_sp(start_with_quote, self.pos),
unescape::Mode::Byte,
0..off,
err,
)
}
}
fn validate_str_escape(&self, start_with_quote: BytePos) {
let lit = self.str_from_to(start_with_quote + BytePos(1), self.pos - BytePos(1));
unescape::unescape_str(lit, &mut |range, c| {
if let Err(err) = c {
emit_unescape_error(
&self.sess.span_diagnostic,
lit,
self.mk_sp(start_with_quote, self.pos),
unescape::Mode::Char,
0..off,
unescape::Mode::Str,
range,
err,
)
}
});
})
}
fn validate_byte_escape(&self, start_with_quote: BytePos) {
self.with_str_from_to(start_with_quote + BytePos(1), self.pos - BytePos(1), |lit| {
if let Err((off, err)) = unescape::unescape_byte(lit) {
fn validate_raw_str_escape(&self, content_start: BytePos, content_end: BytePos) {
let lit = self.str_from_to(content_start, content_end);
unescape::unescape_raw_str(lit, &mut |range, c| {
if let Err(err) = c {
emit_unescape_error(
&self.sess.span_diagnostic,
lit,
self.mk_sp(start_with_quote, self.pos),
unescape::Mode::Byte,
0..off,
self.mk_sp(content_start - BytePos(1), content_end + BytePos(1)),
unescape::Mode::Str,
range,
err,
)
}
});
}
fn validate_str_escape(&self, start_with_quote: BytePos) {
self.with_str_from_to(start_with_quote + BytePos(1), self.pos - BytePos(1), |lit| {
unescape::unescape_str(lit, &mut |range, c| {
if let Err(err) = c {
emit_unescape_error(
&self.sess.span_diagnostic,
lit,
self.mk_sp(start_with_quote, self.pos),
unescape::Mode::Str,
range,
err,
)
}
})
});
}
fn validate_raw_str_escape(&self, content_start: BytePos, content_end: BytePos) {
self.with_str_from_to(content_start, content_end, |lit: &str| {
unescape::unescape_raw_str(lit, &mut |range, c| {
if let Err(err) = c {
emit_unescape_error(
&self.sess.span_diagnostic,
lit,
self.mk_sp(content_start - BytePos(1), content_end + BytePos(1)),
unescape::Mode::Str,
range,
err,
)
}
})
});
})
}
fn validate_raw_byte_str_escape(&self, content_start: BytePos, content_end: BytePos) {
self.with_str_from_to(content_start, content_end, |lit: &str| {
unescape::unescape_raw_byte_str(lit, &mut |range, c| {
if let Err(err) = c {
emit_unescape_error(
&self.sess.span_diagnostic,
lit,
self.mk_sp(content_start - BytePos(1), content_end + BytePos(1)),
unescape::Mode::ByteStr,
range,
err,
)
}
})
});
let lit = self.str_from_to(content_start, content_end);
unescape::unescape_raw_byte_str(lit, &mut |range, c| {
if let Err(err) = c {
emit_unescape_error(
&self.sess.span_diagnostic,
lit,
self.mk_sp(content_start - BytePos(1), content_end + BytePos(1)),
unescape::Mode::ByteStr,
range,
err,
)
}
})
}
fn validate_byte_str_escape(&self, start_with_quote: BytePos) {
self.with_str_from_to(start_with_quote + BytePos(1), self.pos - BytePos(1), |lit| {
unescape::unescape_byte_str(lit, &mut |range, c| {
if let Err(err) = c {
emit_unescape_error(
&self.sess.span_diagnostic,
lit,
self.mk_sp(start_with_quote, self.pos),
unescape::Mode::ByteStr,
range,
err,
)
}
})
});
let lit = self.str_from_to(start_with_quote + BytePos(1), self.pos - BytePos(1));
unescape::unescape_byte_str(lit, &mut |range, c| {
if let Err(err) = c {
emit_unescape_error(
&self.sess.span_diagnostic,
lit,
self.mk_sp(start_with_quote, self.pos),
unescape::Mode::ByteStr,
range,
err,
)
}
})
}
}
......
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册