auto merge of #14630 : cmr/rust/rewrite-lexer, r=alexcrichton

These are a pain to rebase, so I'm separating this from the rest of my work. Nothing controversial here, just some simple refactoring and removal of an unused entry in the token table. Brings the lexer into 2012 with methods!

auto merge of #14630 : cmr/rust/rewrite-lexer, r=alexcrichton
These are a pain to rebase, so I'm separating this from the rest of my work. Nothing controversial here, just some simple refactoring and removal of an unused entry in the token table. Brings the lexer into 2012 with methods!
ef9bf3a4 · bors · 7645982e · 181e5f3f · ef9bf3a4 · ef9bf3a4
9 changed file
--- a/src/librustdoc/html/highlight.rs
+++ b/src/librustdoc/html/highlight.rs
@@ -34,7 +34,7 @@ pub fn highlight(src: &str, class: Option<&str>) -> String {

    let mut out = io::MemWriter::new();
    doit(&sess,
-         lexer::new_string_reader(&sess.span_diagnostic, fm),
+         lexer::StringReader::new(&sess.span_diagnostic, fm),
         class,
         &mut out).unwrap();
    str::from_utf8_lossy(out.unwrap().as_slice()).to_string()

--- a/src/libsyntax/attr.rs
+++ b/src/libsyntax/attr.rs
@@ -15,7 +15,7 @@
 use codemap::{Span, Spanned, spanned, dummy_spanned};
 use codemap::BytePos;
 use diagnostic::SpanHandler;
-use parse::comments::{doc_comment_style, strip_doc_comment_decoration};
+use parse::lexer::comments::{doc_comment_style, strip_doc_comment_decoration};
 use parse::token::InternedString;
 use parse::token;
 use crateid::CrateId;

--- a/src/libsyntax/parse/lexer.rs
+++ b/src/libsyntax/parse/lexer.rs
--- a/src/libsyntax/parse/comments.rs
+++ b/src/libsyntax/parse/comments.rs
@@ -11,8 +11,8 @@
 use ast;
 use codemap::{BytePos, CharPos, CodeMap, Pos};
 use diagnostic;
-use parse::lexer::{is_whitespace, with_str_from, Reader};
-use parse::lexer::{StringReader, bump, is_eof, nextch_is, TokenAndSpan};
+use parse::lexer::{is_whitespace, Reader};
+use parse::lexer::{StringReader, TokenAndSpan};
 use parse::lexer::{is_line_non_doc_comment, is_block_non_doc_comment};
 use parse::lexer;
 use parse::token;
@@ -141,31 +141,6 @@ fn horizontal_trim(lines: Vec<String> ) -> Vec<String> {
    fail!("not a doc-comment: {}", comment);
 }

-fn read_to_eol(rdr: &mut StringReader) -> String {
-    let mut val = String::new();
-    while !rdr.curr_is('\n') && !is_eof(rdr) {
-        val.push_char(rdr.curr.unwrap());
-        bump(rdr);
-    }
-    if rdr.curr_is('\n') { bump(rdr); }
-    return val
-}
-
-fn read_one_line_comment(rdr: &mut StringReader) -> String {
-    let val = read_to_eol(rdr);
-    assert!((val.as_slice()[0] == '/' as u8 &&
-                val.as_slice()[1] == '/' as u8) ||
-                (val.as_slice()[0] == '#' as u8 &&
-                 val.as_slice()[1] == '!' as u8));
-    return val;
-}
-
-fn consume_non_eol_whitespace(rdr: &mut StringReader) {
-    while is_whitespace(rdr.curr) && !rdr.curr_is('\n') && !is_eof(rdr) {
-        bump(rdr);
-    }
-}
-
 fn push_blank_line_comment(rdr: &StringReader, comments: &mut Vec<Comment>) {
    debug!(">>> blank-line comment");
    comments.push(Comment {
@@ -177,11 +152,11 @@ fn push_blank_line_comment(rdr: &StringReader, comments: &mut Vec<Comment>) {

 fn consume_whitespace_counting_blank_lines(rdr: &mut StringReader,
                                           comments: &mut Vec<Comment>) {
-    while is_whitespace(rdr.curr) && !is_eof(rdr) {
+    while is_whitespace(rdr.curr) && !rdr.is_eof() {
        if rdr.col == CharPos(0u) && rdr.curr_is('\n') {
            push_blank_line_comment(rdr, &mut *comments);
        }
-        bump(rdr);
+        rdr.bump();
    }
 }

@@ -193,7 +168,7 @@ fn read_shebang_comment(rdr: &mut StringReader, code_to_the_left: bool,
    debug!("<<< shebang comment");
    comments.push(Comment {
        style: if code_to_the_left { Trailing } else { Isolated },
-        lines: vec!(read_one_line_comment(rdr)),
+        lines: vec!(rdr.read_one_line_comment()),
        pos: p
    });
 }
@@ -203,15 +178,15 @@ fn read_line_comments(rdr: &mut StringReader, code_to_the_left: bool,
    debug!(">>> line comments");
    let p = rdr.last_pos;
    let mut lines: Vec<String> = Vec::new();
-    while rdr.curr_is('/') && nextch_is(rdr, '/') {
-        let line = read_one_line_comment(rdr);
+    while rdr.curr_is('/') && rdr.nextch_is('/') {
+        let line = rdr.read_one_line_comment();
        debug!("{}", line);
        // Doc comments are not put in comments.
        if is_doc_comment(line.as_slice()) {
            break;
        }
        lines.push(line);
-        consume_non_eol_whitespace(rdr);
+        rdr.consume_non_eol_whitespace();
    }
    debug!("<<< line comments");
    if !lines.is_empty() {
@@ -265,21 +240,21 @@ fn read_block_comment(rdr: &mut StringReader,
    let p = rdr.last_pos;
    let mut lines: Vec<String> = Vec::new();
    let col = rdr.col;
-    bump(rdr);
-    bump(rdr);
+    rdr.bump();
+    rdr.bump();

    let mut curr_line = String::from_str("/*");

    // doc-comments are not really comments, they are attributes
-    if (rdr.curr_is('*') && !nextch_is(rdr, '*')) || rdr.curr_is('!') {
-        while !(rdr.curr_is('*') && nextch_is(rdr, '/')) && !is_eof(rdr) {
+    if (rdr.curr_is('*') && !rdr.nextch_is('*')) || rdr.curr_is('!') {
+        while !(rdr.curr_is('*') && rdr.nextch_is('/')) && !rdr.is_eof() {
            curr_line.push_char(rdr.curr.unwrap());
-            bump(rdr);
+            rdr.bump();
        }
-        if !is_eof(rdr) {
+        if !rdr.is_eof() {
            curr_line.push_str("*/");
-            bump(rdr);
-            bump(rdr);
+            rdr.bump();
+            rdr.bump();
        }
        if !is_block_non_doc_comment(curr_line.as_slice()) {
            return
@@ -290,7 +265,7 @@ fn read_block_comment(rdr: &mut StringReader,
        let mut level: int = 1;
        while level > 0 {
            debug!("=== block comment level {}", level);
-            if is_eof(rdr) {
+            if rdr.is_eof() {
                rdr.fatal("unterminated block comment");
            }
            if rdr.curr_is('\n') {
@@ -298,21 +273,21 @@ fn read_block_comment(rdr: &mut StringReader,
                                                     curr_line,
                                                     col);
                curr_line = String::new();
-                bump(rdr);
+                rdr.bump();
            } else {
                curr_line.push_char(rdr.curr.unwrap());
-                if rdr.curr_is('/') && nextch_is(rdr, '*') {
-                    bump(rdr);
-                    bump(rdr);
+                if rdr.curr_is('/') && rdr.nextch_is('*') {
+                    rdr.bump();
+                    rdr.bump();
                    curr_line.push_char('*');
                    level += 1;
                } else {
-                    if rdr.curr_is('*') && nextch_is(rdr, '/') {
-                        bump(rdr);
-                        bump(rdr);
+                    if rdr.curr_is('*') && rdr.nextch_is('/') {
+                        rdr.bump();
+                        rdr.bump();
                        curr_line.push_char('/');
                        level -= 1;
-                    } else { bump(rdr); }
+                    } else { rdr.bump(); }
                }
            }
        }
@@ -324,31 +299,24 @@ fn read_block_comment(rdr: &mut StringReader,
    }

    let mut style = if code_to_the_left { Trailing } else { Isolated };
-    consume_non_eol_whitespace(rdr);
-    if !is_eof(rdr) && !rdr.curr_is('\n') && lines.len() == 1u {
+    rdr.consume_non_eol_whitespace();
+    if !rdr.is_eof() && !rdr.curr_is('\n') && lines.len() == 1u {
        style = Mixed;
    }
    debug!("<<< block comment");
    comments.push(Comment {style: style, lines: lines, pos: p});
 }

-fn peeking_at_comment(rdr: &StringReader) -> bool {
-    return (rdr.curr_is('/') && nextch_is(rdr, '/')) ||
-         (rdr.curr_is('/') && nextch_is(rdr, '*')) ||
-         // consider shebangs comments, but not inner attributes
-         (rdr.curr_is('#') && nextch_is(rdr, '!') &&
-          !lexer::nextnextch_is(rdr, '['));
-}

 fn consume_comment(rdr: &mut StringReader,
                   code_to_the_left: bool,
                   comments: &mut Vec<Comment> ) {
    debug!(">>> consume comment");
-    if rdr.curr_is('/') && nextch_is(rdr, '/') {
+    if rdr.curr_is('/') && rdr.nextch_is('/') {
        read_line_comments(rdr, code_to_the_left, comments);
-    } else if rdr.curr_is('/') && nextch_is(rdr, '*') {
+    } else if rdr.curr_is('/') && rdr.nextch_is('*') {
        read_block_comment(rdr, code_to_the_left, comments);
-    } else if rdr.curr_is('#') && nextch_is(rdr, '!') {
+    } else if rdr.curr_is('#') && rdr.nextch_is('!') {
        read_shebang_comment(rdr, code_to_the_left, comments);
    } else { fail!(); }
    debug!("<<< consume comment");
@@ -362,8 +330,7 @@ pub struct Literal {

 // it appears this function is called only from pprust... that's
 // probably not a good thing.
-pub fn gather_comments_and_literals(span_diagnostic:
-                                        &diagnostic::SpanHandler,
+pub fn gather_comments_and_literals(span_diagnostic: &diagnostic::SpanHandler,
                                    path: String,
                                    srdr: &mut io::Reader)
                                 -> (Vec<Comment>, Vec<Literal>) {
@@ -371,20 +338,20 @@ pub fn gather_comments_and_literals(span_diagnostic:
    let src = str::from_utf8(src.as_slice()).unwrap().to_string();
    let cm = CodeMap::new();
    let filemap = cm.new_filemap(path, src);
-    let mut rdr = lexer::new_low_level_string_reader(span_diagnostic, filemap);
+    let mut rdr = lexer::StringReader::new_raw(span_diagnostic, filemap);

    let mut comments: Vec<Comment> = Vec::new();
    let mut literals: Vec<Literal> = Vec::new();
    let mut first_read: bool = true;
-    while !is_eof(&rdr) {
+    while !rdr.is_eof() {
        loop {
            let mut code_to_the_left = !first_read;
-            consume_non_eol_whitespace(&mut rdr);
+            rdr.consume_non_eol_whitespace();
            if rdr.curr_is('\n') {
                code_to_the_left = false;
                consume_whitespace_counting_blank_lines(&mut rdr, &mut comments);
            }
-            while peeking_at_comment(&rdr) {
+            while rdr.peeking_at_comment() {
                consume_comment(&mut rdr, code_to_the_left, &mut comments);
                consume_whitespace_counting_blank_lines(&mut rdr, &mut comments);
            }
@@ -397,7 +364,7 @@ pub fn gather_comments_and_literals(span_diagnostic:
        //discard, and look ahead; we're working with internal state
        let TokenAndSpan {tok: tok, sp: sp} = rdr.peek();
        if token::is_lit(&tok) {
-            with_str_from(&rdr, bstart, |s| {
+            rdr.with_str_from(bstart, |s| {
                debug!("tok lit: {}", s);
                literals.push(Literal {lit: s.to_string(), pos: sp.lo});
            })

--- a/src/libsyntax/parse/lexer/mod.rs
+++ b/src/libsyntax/parse/lexer/mod.rs
--- a/src/libsyntax/parse/mod.rs
+++ b/src/libsyntax/parse/mod.rs
@@ -25,7 +25,6 @@
 pub mod lexer;
 pub mod parser;
 pub mod token;
-pub mod comments;
 pub mod attr;

 pub mod common;
@@ -255,7 +254,7 @@ pub fn filemap_to_tts(sess: &ParseSess, filemap: Rc<FileMap>)
    // it appears to me that the cfg doesn't matter here... indeed,
    // parsing tt's probably shouldn't require a parser at all.
    let cfg = Vec::new();
-    let srdr = lexer::new_string_reader(&sess.span_diagnostic, filemap);
+    let srdr = lexer::StringReader::new(&sess.span_diagnostic, filemap);
    let mut p1 = Parser::new(sess, cfg, box srdr);
    p1.parse_all_token_trees()
 }

--- a/src/libsyntax/parse/token.rs
+++ b/src/libsyntax/parse/token.rs
@@ -136,18 +136,18 @@ fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
    }
 }

-pub fn binop_to_str(o: BinOp) -> String {
+pub fn binop_to_str(o: BinOp) -> &'static str {
    match o {
-      PLUS => "+".to_string(),
-      MINUS => "-".to_string(),
-      STAR => "*".to_string(),
-      SLASH => "/".to_string(),
-      PERCENT => "%".to_string(),
-      CARET => "^".to_string(),
-      AND => "&".to_string(),
-      OR => "|".to_string(),
-      SHL => "<<".to_string(),
-      SHR => ">>".to_string()
+      PLUS => "+",
+      MINUS => "-",
+      STAR => "*",
+      SLASH => "/",
+      PERCENT => "%",
+      CARET => "^",
+      AND => "&",
+      OR => "|",
+      SHL => "<<",
+      SHR => ">>"
    }
 }

@@ -164,9 +164,9 @@ pub fn to_str(t: &Token) -> String {
      TILDE => "~".to_string(),
      OROR => "||".to_string(),
      ANDAND => "&&".to_string(),
-      BINOP(op) => binop_to_str(op),
+      BINOP(op) => binop_to_str(op).to_string(),
      BINOPEQ(op) => {
-          let mut s = binop_to_str(op);
+          let mut s = binop_to_str(op).to_string();
          s.push_str("=");
          s
      }
@@ -423,6 +423,10 @@ fn mk_fresh_ident_interner() -> IdentInterner {
 static SELF_KEYWORD_NAME: Name = 1;
 static STATIC_KEYWORD_NAME: Name = 2;

+// NB: leaving holes in the ident table is bad! a different ident will get
+// interned with the id from the hole, but it will be between the min and max
+// of the reserved words, and thus tagged as "reserved".
+
 declare_special_idents_and_keywords! {
    pub mod special_idents {
        // These ones are statics

--- a/src/libsyntax/print/pprust.rs
+++ b/src/libsyntax/print/pprust.rs
@@ -20,7 +20,8 @@
 use diagnostic;
 use parse::classify::expr_is_simple_block;
 use parse::token::IdentInterner;
-use parse::{comments, token};
+use parse::token;
+use parse::lexer::comments;
 use parse;
 use print::pp::{break_offset, word, space, zerobreak, hardbreak};
 use print::pp::{Breaks, Consistent, Inconsistent, eof};

--- a/src/libsyntax/util/interner.rs
+++ b/src/libsyntax/util/interner.rs
@@ -126,14 +126,14 @@ pub fn new(string: &str) -> RcStr {
    }
 }

-// A StrInterner differs from Interner<String> in that it accepts
-// &str rather than RcStr, resulting in less allocation.
+/// A StrInterner differs from Interner<String> in that it accepts
+/// &str rather than RcStr, resulting in less allocation.
 pub struct StrInterner {
    map: RefCell<HashMap<RcStr, Name>>,
    vect: RefCell<Vec<RcStr> >,
 }

-// when traits can extend traits, we should extend index<Name,T> to get []
+/// When traits can extend traits, we should extend index<Name,T> to get []
 impl StrInterner {
    pub fn new() -> StrInterner {
        StrInterner {
@@ -177,8 +177,8 @@ pub fn gensym(&self, val: &str) -> Name {
    // lightweight way to get what I want, though not
    // necessarily the cleanest.

-    // create a gensym with the same name as an existing
-    // entry.
+    /// Create a gensym with the same name as an existing
+    /// entry.
    pub fn gensym_copy(&self, idx : Name) -> Name {
        let new_idx = self.len() as Name;
        // leave out of map to avoid colliding