remove special handling of \r\n from the lexer

911398b9 · Aleksey Kladov · 004f3ace · 911398b9 · 911398b9 · 911398b9
4 changed file
--- a/src/librustc_lexer/src/lib.rs
+++ b/src/librustc_lexer/src/lib.rs
@@ -352,7 +352,6 @@ fn line_comment(&mut self) -> TokenKind {
        loop {
            match self.nth_char(0) {
                '\n' => break,
-                '\r' if self.nth_char(1) == '\n' => break,
                EOF_CHAR if self.is_eof() => break,
                _ => {
                    self.bump();
@@ -525,7 +524,6 @@ fn single_quoted_string(&mut self) -> bool {
            match self.nth_char(0) {
                '/' if !first => break,
                '\n' if self.nth_char(1) != '\'' => break,
-                '\r' if self.nth_char(1) == '\n' => break,
                EOF_CHAR if self.is_eof() => break,
                '\'' => {
                    self.bump();

--- a/src/librustc_lexer/src/unescape.rs
+++ b/src/librustc_lexer/src/unescape.rs
@@ -128,11 +128,7 @@ fn scan_escape(first_char: char, chars: &mut Chars<'_>, mode: Mode) -> Result<ch
    if first_char != '\\' {
        return match first_char {
            '\t' | '\n' => Err(EscapeError::EscapeOnlyChar),
-            '\r' => Err(if chars.clone().next() == Some('\n') {
-                EscapeError::EscapeOnlyChar
-            } else {
-                EscapeError::BareCarriageReturn
-            }),
+            '\r' => Err(EscapeError::BareCarriageReturn),
            '\'' if mode.in_single_quotes() => Err(EscapeError::EscapeOnlyChar),
            '"' if mode.in_double_quotes() => Err(EscapeError::EscapeOnlyChar),
            _ => {
@@ -244,27 +240,15 @@ fn unescape_str_or_byte_str<F>(src: &str, mode: Mode, callback: &mut F)

        let unescaped_char = match first_char {
            '\\' => {
-                let (second_char, third_char) = {
-                    let mut chars = chars.clone();
-                    (chars.next(), chars.next())
-                };
-                match (second_char, third_char) {
-                    (Some('\n'), _) | (Some('\r'), Some('\n')) => {
+                let second_char = chars.clone().next();
+                match second_char {
+                    Some('\n') => {
                        skip_ascii_whitespace(&mut chars);
                        continue;
                    }
                    _ => scan_escape(first_char, &mut chars, mode),
                }
            }
-            '\r' => {
-                let second_char = chars.clone().next();
-                if second_char == Some('\n') {
-                    chars.next();
-                    Ok('\n')
-                } else {
-                    scan_escape(first_char, &mut chars, mode)
-                }
-            }
            '\n' => Ok('\n'),
            '\t' => Ok('\t'),
            _ => scan_escape(first_char, &mut chars, mode),
@@ -298,15 +282,11 @@ fn unescape_raw_str_or_byte_str<F>(literal_text: &str, mode: Mode, callback: &mu
    while let Some(curr) = chars.next() {
        let start = initial_len - chars.as_str().len() - curr.len_utf8();

-        let result = match (curr, chars.clone().next()) {
-            ('\r', Some('\n')) => {
-                chars.next();
-                Ok('\n')
-            },
-            ('\r', _) => Err(EscapeError::BareCarriageReturnInRawString),
-            (c, _) if mode.is_bytes() && !c.is_ascii() =>
+        let result = match curr {
+            '\r' => Err(EscapeError::BareCarriageReturnInRawString),
+            c if mode.is_bytes() && !c.is_ascii() =>
                Err(EscapeError::NonAsciiCharInByteString),
-            (c, _) => Ok(c),
+            c => Ok(c),
        };
        let end = initial_len - chars.as_str().len();


--- a/src/librustc_lexer/src/unescape/tests.rs
+++ b/src/librustc_lexer/src/unescape/tests.rs
@@ -11,7 +11,6 @@ fn check(literal_text: &str, expected_error: EscapeError) {
    check(r"\", EscapeError::LoneSlash);

    check("\n", EscapeError::EscapeOnlyChar);
-    check("\r\n", EscapeError::EscapeOnlyChar);
    check("\t", EscapeError::EscapeOnlyChar);
    check("'", EscapeError::EscapeOnlyChar);
    check("\r", EscapeError::BareCarriageReturn);
@@ -31,6 +30,7 @@ fn check(literal_text: &str, expected_error: EscapeError) {
    check(r"\v", EscapeError::InvalidEscape);
    check(r"\💩", EscapeError::InvalidEscape);
    check(r"\●", EscapeError::InvalidEscape);
+    check("\\\r", EscapeError::InvalidEscape);

    check(r"\x", EscapeError::TooShortHexEscape);
    check(r"\x0", EscapeError::TooShortHexEscape);
@@ -116,10 +116,9 @@ fn check(literal_text: &str, expected: &str) {

    check("foo", "foo");
    check("", "");
-    check(" \t\n\r\n", " \t\n\n");
+    check(" \t\n", " \t\n");

    check("hello \\\n     world", "hello world");
-    check("hello \\\r\n     world", "hello world");
    check("thread's", "thread's")
 }

@@ -134,7 +133,6 @@ fn check(literal_text: &str, expected_error: EscapeError) {
    check(r"\", EscapeError::LoneSlash);

    check("\n", EscapeError::EscapeOnlyChar);
-    check("\r\n", EscapeError::EscapeOnlyChar);
    check("\t", EscapeError::EscapeOnlyChar);
    check("'", EscapeError::EscapeOnlyChar);
    check("\r", EscapeError::BareCarriageReturn);
@@ -238,10 +236,9 @@ fn check(literal_text: &str, expected: &[u8]) {

    check("foo", b"foo");
    check("", b"");
-    check(" \t\n\r\n", b" \t\n\n");
+    check(" \t\n", b" \t\n");

    check("hello \\\n     world", b"hello world");
-    check("hello \\\r\n     world", b"hello world");
    check("thread's", b"thread's")
 }

@@ -253,7 +250,6 @@ fn check(literal: &str, expected: &[(Range<usize>, Result<char, EscapeError>)])
        assert_eq!(unescaped, expected);
    }

-    check("\r\n", &[(0..2, Ok('\n'))]);
    check("\r", &[(0..1, Err(EscapeError::BareCarriageReturnInRawString))]);
    check("\rx", &[(0..1, Err(EscapeError::BareCarriageReturnInRawString)), (1..2, Ok('x'))]);
 }
@@ -266,7 +262,6 @@ fn check(literal: &str, expected: &[(Range<usize>, Result<u8, EscapeError>)]) {
        assert_eq!(unescaped, expected);
    }

-    check("\r\n", &[(0..2, Ok(byte_from_char('\n')))]);
    check("\r", &[(0..1, Err(EscapeError::BareCarriageReturnInRawString))]);
    check("🦀", &[(0..4, Err(EscapeError::NonAsciiCharInByteString))]);
    check(

--- a/src/libsyntax/parse/lexer/mod.rs
+++ b/src/libsyntax/parse/lexer/mod.rs
@@ -8,9 +8,7 @@
 use rustc_lexer::Base;
 use rustc_lexer::unescape;

-use std::borrow::Cow;
 use std::char;
-use std::iter;
 use std::convert::TryInto;
 use rustc_data_structures::sync::Lrc;
 use log::debug;
@@ -181,18 +179,7 @@ fn cook_lexer_token(
                let string = self.str_from(start);
                // comments with only more "/"s are not doc comments
                let tok = if is_doc_comment(string) {
-                    let mut idx = 0;
-                    loop {
-                        idx = match string[idx..].find('\r') {
-                            None => break,
-                            Some(it) => idx + it + 1
-                        };
-                        if string[idx..].chars().next() != Some('\n') {
-                            self.err_span_(start + BytePos(idx as u32 - 1),
-                                            start + BytePos(idx as u32),
-                                            "bare CR not allowed in doc-comment");
-                        }
-                    }
+                    self.forbid_bare_cr(start, string, "bare CR not allowed in doc-comment");
                    token::DocComment(Symbol::intern(string))
                } else {
                    token::Comment
@@ -217,15 +204,10 @@ fn cook_lexer_token(
                }

                let tok = if is_doc_comment {
-                    let has_cr = string.contains('\r');
-                    let string = if has_cr {
-                        self.translate_crlf(start,
-                                            string,
-                                            "bare CR not allowed in block doc-comment")
-                    } else {
-                        string.into()
-                    };
-                    token::DocComment(Symbol::intern(&string[..]))
+                    self.forbid_bare_cr(start,
+                                        string,
+                                        "bare CR not allowed in block doc-comment");
+                    token::DocComment(Symbol::intern(string))
                } else {
                    token::Comment
                };
@@ -516,49 +498,16 @@ fn str_from_to(&self, start: BytePos, end: BytePos) -> &str
        &self.src[self.src_index(start)..self.src_index(end)]
    }

-    /// Converts CRLF to LF in the given string, raising an error on bare CR.
-    fn translate_crlf<'b>(&self, start: BytePos, s: &'b str, errmsg: &'b str) -> Cow<'b, str> {
-        let mut chars = s.char_indices().peekable();
-        while let Some((i, ch)) = chars.next() {
-            if ch == '\r' {
-                if let Some((lf_idx, '\n')) = chars.peek() {
-                    return translate_crlf_(self, start, s, *lf_idx, chars, errmsg).into();
-                }
-                let pos = start + BytePos(i as u32);
-                let end_pos = start + BytePos((i + ch.len_utf8()) as u32);
-                self.err_span_(pos, end_pos, errmsg);
-            }
-        }
-        return s.into();
-
-        fn translate_crlf_(rdr: &StringReader<'_>,
-                           start: BytePos,
-                           s: &str,
-                           mut j: usize,
-                           mut chars: iter::Peekable<impl Iterator<Item = (usize, char)>>,
-                           errmsg: &str)
-                           -> String {
-            let mut buf = String::with_capacity(s.len());
-            // Skip first CR
-            buf.push_str(&s[.. j - 1]);
-            while let Some((i, ch)) = chars.next() {
-                if ch == '\r' {
-                    if j < i {
-                        buf.push_str(&s[j..i]);
-                    }
-                    let next = i + ch.len_utf8();
-                    j = next;
-                    if chars.peek().map(|(_, ch)| *ch) != Some('\n') {
-                        let pos = start + BytePos(i as u32);
-                        let end_pos = start + BytePos(next as u32);
-                        rdr.err_span_(pos, end_pos, errmsg);
-                    }
-                }
-            }
-            if j < s.len() {
-                buf.push_str(&s[j..]);
-            }
-            buf
+    fn forbid_bare_cr(&self, start: BytePos, s: &str, errmsg: &str) {
+        let mut idx = 0;
+        loop {
+            idx = match s[idx..].find('\r') {
+                None => break,
+                Some(it) => idx + it + 1
+            };
+            self.err_span_(start + BytePos(idx as u32 - 1),
+                           start + BytePos(idx as u32),
+                           errmsg);
        }
    }