auto merge of #6083 : jbclements/rust/parser-cleanup, r=jbclements

r? @pcwalton A month's worth of parser cleanup here. Much of this is new comments and renaming. A number of these commits also remove unneeded code. Probably the biggest refactor here is splitting "parse_item_or_view_item" into two functions; it turns out that the only overlap between items in foreign modules and items in regular modules was macros, so this refactor should make things substantially easier for future maintenance.

auto merge of #6083 : jbclements/rust/parser-cleanup, r=jbclements
r? @pcwalton A month's worth of parser cleanup here. Much of this is new comments and renaming. A number of these commits also remove unneeded code. Probably the biggest refactor here is splitting "parse_item_or_view_item" into two functions; it turns out that the only overlap between items in foreign modules and items in regular modules was macros, so this refactor should make things substantially easier for future maintenance.
ea74f684 · bors · 7b7a0fc2 · cce13c18 · ea74f684 · ea74f684
17 changed file
--- a/src/librustc/driver/driver.rs
+++ b/src/librustc/driver/driver.rs
@@ -149,7 +149,7 @@ pub fn parse_input(sess: Session, cfg: ast::crate_cfg, input: &input)
    -> @ast::crate {
    match *input {
      file_input(ref file) => {
-        parse::parse_crate_from_file_using_tts(&(*file), cfg, sess.parse_sess)
+        parse::parse_crate_from_file(&(*file), cfg, sess.parse_sess)
      }
      str_input(ref src) => {
        // FIXME (#2319): Don't really want to box the source string

--- a/src/librustdoc/attr_parser.rs
+++ b/src/librustdoc/attr_parser.rs
@@ -79,7 +79,7 @@ fn parse_attributes(source: ~str) -> ~[ast::attribute] {

        let parse_sess = syntax::parse::new_parse_sess(None);
        let parser = parse::new_parser_from_source_str(
-            parse_sess, ~[], ~"-", codemap::FssNone, @source);
+            parse_sess, ~[], ~"-", @source);

        parser.parse_outer_attributes()
    }

--- a/src/librustpkg/tests.rs
+++ b/src/librustpkg/tests.rs
@@ -66,7 +66,7 @@ fn test_make_dir_rwx() {
 fn test_install_valid() {
    let ctxt = fake_ctxt();
    let temp_pkg_id = fake_pkg();
-    let temp_workspace() = mk_temp_workspace();
+    let temp_workspace = mk_temp_workspace();
    // should have test, bench, lib, and main
    ctxt.install(&temp_workspace, temp_pkg_id);
    // Check that all files exist

--- a/src/libsyntax/ast_util.rs
+++ b/src/libsyntax/ast_util.rs
@@ -355,6 +355,10 @@ pub fn operator_prec(op: ast::binop) -> uint {
  }
 }

+/// Precedence of the `as` operator, which is a binary operator
+/// not appearing in the prior table.
+pub static as_prec: uint = 11u;
+
 pub fn dtor_ty() -> @ast::Ty {
    @ast::Ty {id: 0, node: ty_nil, span: dummy_sp()}
 }
@@ -756,7 +760,6 @@ fn refold_test_sc(mut sc: SyntaxContext, table : &SCTable) -> ~[TestSC] {
        assert_eq!(refold_test_sc(3,&t),test_sc);
    }

-
    // extend a syntax context with a sequence of marks given
    // in a vector. v[0] will be the outermost mark.
    fn unfold_marks(mrks:~[Mrk],tail:SyntaxContext,table: &mut SCTable) -> SyntaxContext {

--- a/src/libsyntax/ext/base.rs
+++ b/src/libsyntax/ext/base.rs
@@ -461,9 +461,7 @@ fn get_map(&self) -> &'self HashMap<K,@V> {

    // ugh: can't get this to compile with mut because of the
    // lack of flow sensitivity.
-    #[cfg(stage1)]
-    #[cfg(stage2)]
-    #[cfg(stage3)]
+    #[cfg(not(stage0))]
    fn get_map<'a>(&'a self) -> &'a HashMap<K,@V> {
        match *self {
            BaseMapChain (~ref map) => map,

--- a/src/libsyntax/opt_vec.rs
+++ b/src/libsyntax/opt_vec.rs
@@ -69,9 +69,7 @@ fn get(&self, i: uint) -> &'self T {
        }
    }

-    #[cfg(stage1)]
-    #[cfg(stage2)]
-    #[cfg(stage3)]
+    #[cfg(not(stage0))]
    fn get<'a>(&'a self, i: uint) -> &'a T {
        match *self {
            Empty => fail!(fmt!("Invalid index %u", i)),

--- a/src/libsyntax/parse/attr.rs
+++ b/src/libsyntax/parse/attr.rs
@@ -62,12 +62,14 @@ fn parse_outer_attributes(&self) -> ~[ast::attribute] {
        return attrs;
    }

+    // matches attribute = # attribute_naked
    fn parse_attribute(&self, style: ast::attr_style) -> ast::attribute {
        let lo = self.span.lo;
        self.expect(&token::POUND);
        return self.parse_attribute_naked(style, lo);
    }

+    // matches attribute_naked = [ meta_item ]
    fn parse_attribute_naked(&self, style: ast::attr_style, lo: BytePos) ->
        ast::attribute {
        self.expect(&token::LBRACKET);
@@ -86,6 +88,7 @@ fn parse_attribute_naked(&self, style: ast::attr_style, lo: BytePos) ->
    // is an inner attribute of the containing item or an outer attribute of
    // the first contained item until we see the semi).

+    // matches inner_attrs* outer_attr?
    // you can make the 'next' field an Option, but the result is going to be
    // more useful as a vector.
    fn parse_inner_attrs_and_next(&self) ->
@@ -134,6 +137,9 @@ fn parse_inner_attrs_and_next(&self) ->
        (inner_attrs, next_outer_attrs)
    }

+    // matches meta_item = IDENT
+    // | IDENT = lit
+    // | IDENT meta_seq
    fn parse_meta_item(&self) -> @ast::meta_item {
        let lo = self.span.lo;
        let name = self.id_to_str(self.parse_ident());
@@ -156,6 +162,7 @@ fn parse_meta_item(&self) -> @ast::meta_item {
        }
    }

+    // matches meta_seq = ( COMMASEP(meta_item) )
    fn parse_meta_seq(&self) -> ~[@ast::meta_item] {
        copy self.parse_seq(
            &token::LPAREN,

--- a/src/libsyntax/parse/classify.rs
+++ b/src/libsyntax/parse/classify.rs
@@ -15,6 +15,13 @@
 use ast;
 use codemap;

+// does this expression require a semicolon to be treated
+// as a statement? The negation of this: 'can this expression
+// be used as a statement without a semicolon' -- is used
+// as an early-bail-out in the parser so that, for instance,
+// 'if true {...} else {...}
+//  |x| 5 '
+// isn't parsed as (if true {...} else {...} | x) | 5
 pub fn expr_requires_semi_to_be_stmt(e: @ast::expr) -> bool {
    match e.node {
      ast::expr_if(*)
@@ -40,6 +47,9 @@ pub fn expr_is_simple_block(e: @ast::expr) -> bool {
    }
 }

+// this statement requires a semicolon after it.
+// note that in one case (stmt_semi), we've already
+// seen the semicolon, and thus don't need another.
 pub fn stmt_ends_with_semi(stmt: &ast::stmt) -> bool {
    return match stmt.node {
        ast::stmt_decl(d, _) => {

--- a/src/libsyntax/parse/comments.rs
+++ b/src/libsyntax/parse/comments.rs
@@ -309,6 +309,8 @@ pub struct lit {
    pos: BytePos
 }

+// it appears this function is called only from pprust... that's
+// probably not a good thing.
 pub fn gather_comments_and_literals(span_diagnostic:
                                    @diagnostic::span_handler,
                                    path: ~str,

--- a/src/libsyntax/parse/lexer.rs
+++ b/src/libsyntax/parse/lexer.rs
@@ -225,20 +225,12 @@ pub fn is_whitespace(c: char) -> bool {
    return c == ' ' || c == '\t' || c == '\r' || c == '\n';
 }

-fn may_begin_ident(c: char) -> bool { return is_alpha(c) || c == '_'; }
-
 fn in_range(c: char, lo: char, hi: char) -> bool {
    return lo <= c && c <= hi
 }

-fn is_alpha(c: char) -> bool {
-    return in_range(c, 'a', 'z') || in_range(c, 'A', 'Z');
-}
-
 fn is_dec_digit(c: char) -> bool { return in_range(c, '0', '9'); }

-fn is_alnum(c: char) -> bool { return is_alpha(c) || is_dec_digit(c); }
-
 fn is_hex_digit(c: char) -> bool {
    return in_range(c, '0', '9') || in_range(c, 'a', 'f') ||
            in_range(c, 'A', 'F');
@@ -294,6 +286,8 @@ fn consume_any_line_comment(rdr: @mut StringReader)
        }
    } else if rdr.curr == '#' {
        if nextch(rdr) == '!' {
+            // I guess this is the only way to figure out if
+            // we're at the beginning of the file...
            let cmap = @CodeMap::new();
            (*cmap).files.push(rdr.filemap);
            let loc = cmap.lookup_char_pos_adj(rdr.last_pos);
@@ -444,8 +438,7 @@ fn scan_number(c: char, rdr: @mut StringReader) -> token::Token {
        }
    }
    let mut is_float = false;
-    if rdr.curr == '.' && !(is_alpha(nextch(rdr)) || nextch(rdr) == '_' ||
-                            nextch(rdr) == '.') {
+    if rdr.curr == '.' && !(ident_start(nextch(rdr)) || nextch(rdr) == '.') {
        is_float = true;
        bump(rdr);
        let dec_part = scan_digits(rdr, 10u);

--- a/src/libsyntax/parse/mod.rs
+++ b/src/libsyntax/parse/mod.rs
--- a/src/libsyntax/parse/obsolete.rs
+++ b/src/libsyntax/parse/obsolete.rs
@@ -259,7 +259,7 @@ fn eat_obsolete_ident(&self, ident: &str) -> bool {
    fn try_parse_obsolete_struct_ctor(&self) -> bool {
        if self.eat_obsolete_ident("new") {
            self.obsolete(*self.last_span, ObsoleteStructCtor);
-            self.parse_fn_decl(|p| p.parse_arg());
+            self.parse_fn_decl();
            self.parse_block();
            true
        } else {
@@ -288,7 +288,7 @@ fn try_parse_obsolete_priv_section(&self) -> bool {
            self.eat_keyword(&~"priv");
            self.bump();
            while *self.token != token::RBRACE {
-                self.parse_single_class_item(ast::private);
+                self.parse_single_struct_field(ast::private);
            }
            self.bump();
            true

--- a/src/libsyntax/parse/parser.rs
+++ b/src/libsyntax/parse/parser.rs
--- a/src/libsyntax/parse/prec.rs
+++ b/src/libsyntax/parse/prec.rs
-// Copyright 2012 The Rust Project Developers. See the COPYRIGHT
-// file at the top-level directory of this distribution and at
-// http://rust-lang.org/COPYRIGHT.
-//
-// Licensed under the Apache License, Version 2.0 <LICENSE-APACHE or
-// http://www.apache.org/licenses/LICENSE-2.0> or the MIT license
-// <LICENSE-MIT or http://opensource.org/licenses/MIT>, at your
-// option. This file may not be copied, modified, or distributed
-// except according to those terms.
-
-use ast;
-use ast::*;
-use parse::token::*;
-use parse::token::Token;
-
-/// Unary operators have higher precedence than binary
-pub static unop_prec: uint = 100u;
-
-/**
- * Precedence of the `as` operator, which is a binary operator
- * but is not represented in the precedence table.
- */
-pub static as_prec: uint = 11u;
-
-/**
- * Maps a token to a record specifying the corresponding binary
- * operator and its precedence
- */
-pub fn token_to_binop(tok: Token) -> Option<ast::binop> {
-  match tok {
-      BINOP(STAR)    => Some(mul),
-      BINOP(SLASH)   => Some(quot),
-      BINOP(PERCENT) => Some(rem),
-      // 'as' sits between here with 11
-      BINOP(PLUS)    => Some(add),
-      BINOP(MINUS)   => Some(subtract),
-      BINOP(SHL)     => Some(shl),
-      BINOP(SHR)     => Some(shr),
-      BINOP(AND)     => Some(bitand),
-      BINOP(CARET)   => Some(bitxor),
-      BINOP(OR)      => Some(bitor),
-      LT             => Some(lt),
-      LE             => Some(le),
-      GE             => Some(ge),
-      GT             => Some(gt),
-      EQEQ           => Some(eq),
-      NE             => Some(ne),
-      ANDAND         => Some(and),
-      OROR           => Some(or),
-      _              => None
-  }
-}
--- a/src/libsyntax/parse/token.rs
+++ b/src/libsyntax/parse/token.rs
@@ -364,6 +364,34 @@ fn iter_bytes(&self, lsb0: bool, f: to_bytes::Cb) {
    }
 }

+/**
+ * Maps a token to a record specifying the corresponding binary
+ * operator
+ */
+pub fn token_to_binop(tok: Token) -> Option<ast::binop> {
+  match tok {
+      BINOP(STAR)    => Some(ast::mul),
+      BINOP(SLASH)   => Some(ast::quot),
+      BINOP(PERCENT) => Some(ast::rem),
+      BINOP(PLUS)    => Some(ast::add),
+      BINOP(MINUS)   => Some(ast::subtract),
+      BINOP(SHL)     => Some(ast::shl),
+      BINOP(SHR)     => Some(ast::shr),
+      BINOP(AND)     => Some(ast::bitand),
+      BINOP(CARET)   => Some(ast::bitxor),
+      BINOP(OR)      => Some(ast::bitor),
+      LT             => Some(ast::lt),
+      LE             => Some(ast::le),
+      GE             => Some(ast::ge),
+      GT             => Some(ast::gt),
+      EQEQ           => Some(ast::eq),
+      NE             => Some(ast::ne),
+      ANDAND         => Some(ast::and),
+      OROR           => Some(ast::or),
+      _              => None
+  }
+}
+
 pub struct ident_interner {
    priv interner: Interner<@~str>,
 }
@@ -390,60 +418,68 @@ fn find_equiv<Q:Hash + IterBytes + Equiv<@~str>>(&self, val: &Q)
    }
 }

+// return a fresh interner, preloaded with special identifiers.
+// EFFECT: stores this interner in TLS
+pub fn mk_fresh_ident_interner() -> @ident_interner {
+    // the indices here must correspond to the numbers in
+    // special_idents.
+    let init_vec = ~[
+        @~"_",                  // 0
+        @~"anon",               // 1
+        @~"drop",               // 2
+        @~"",                   // 3
+        @~"unary",              // 4
+        @~"!",                  // 5
+        @~"[]",                 // 6
+        @~"unary-",             // 7
+        @~"__extensions__",     // 8
+        @~"self",               // 9
+        @~"item",               // 10
+        @~"block",              // 11
+        @~"stmt",               // 12
+        @~"pat",                // 13
+        @~"expr",               // 14
+        @~"ty",                 // 15
+        @~"ident",              // 16
+        @~"path",               // 17
+        @~"tt",                 // 18
+        @~"matchers",           // 19
+        @~"str",                // 20
+        @~"TyVisitor",          // 21
+        @~"arg",                // 22
+        @~"descrim",            // 23
+        @~"__rust_abi",         // 24
+        @~"__rust_stack_shim",  // 25
+        @~"TyDesc",             // 26
+        @~"dtor",               // 27
+        @~"main",               // 28
+        @~"<opaque>",           // 29
+        @~"blk",                // 30
+        @~"static",             // 31
+        @~"intrinsic",          // 32
+        @~"__foreign_mod__",    // 33
+        @~"__field__",          // 34
+        @~"C",                  // 35
+        @~"Self",               // 36
+    ];
+
+    let rv = @ident_interner {
+        interner: interner::Interner::prefill(init_vec)
+    };
+    unsafe {
+        task::local_data::local_data_set(interner_key!(), @rv);
+    }
+    rv
+}
+
+// if an interner exists in TLS, return it. Otherwise, prepare a
+// fresh one.
 pub fn mk_ident_interner() -> @ident_interner {
    unsafe {
        match task::local_data::local_data_get(interner_key!()) {
            Some(interner) => *interner,
            None => {
-                // the indices here must correspond to the numbers in
-                // special_idents.
-                let init_vec = ~[
-                    @~"_",                  // 0
-                    @~"anon",               // 1
-                    @~"drop",               // 2
-                    @~"",                   // 3
-                    @~"unary",              // 4
-                    @~"!",                  // 5
-                    @~"[]",                 // 6
-                    @~"unary-",             // 7
-                    @~"__extensions__",     // 8
-                    @~"self",               // 9
-                    @~"item",               // 10
-                    @~"block",              // 11
-                    @~"stmt",               // 12
-                    @~"pat",                // 13
-                    @~"expr",               // 14
-                    @~"ty",                 // 15
-                    @~"ident",              // 16
-                    @~"path",               // 17
-                    @~"tt",                 // 18
-                    @~"matchers",           // 19
-                    @~"str",                // 20
-                    @~"TyVisitor",          // 21
-                    @~"arg",                // 22
-                    @~"descrim",            // 23
-                    @~"__rust_abi",         // 24
-                    @~"__rust_stack_shim",  // 25
-                    @~"TyDesc",             // 26
-                    @~"dtor",               // 27
-                    @~"main",               // 28
-                    @~"<opaque>",           // 29
-                    @~"blk",                // 30
-                    @~"static",             // 31
-                    @~"intrinsic",          // 32
-                    @~"__foreign_mod__",    // 33
-                    @~"__field__",          // 34
-                    @~"C",                  // 35
-                    @~"Self",               // 36
-                ];
-
-                let rv = @ident_interner {
-                    interner: interner::Interner::prefill(init_vec)
-                };
-
-                task::local_data::local_data_set(interner_key!(), @rv);
-
-                rv
+                mk_fresh_ident_interner()
            }
        }
    }

--- a/src/libsyntax/syntax.rc
+++ b/src/libsyntax/syntax.rc
@@ -8,6 +8,11 @@
 // option. This file may not be copied, modified, or distributed
 // except according to those terms.

+/*! This module contains the Rust parser. It maps source text
+ *  to token trees and to ASTs. It contains code for expanding
+ *  macros.
+ */
+
 #[link(name = "syntax",
       vers = "0.7-pre",
       uuid = "9311401b-d6ea-4cd9-a1d9-61f89499c645")];

--- a/src/test/compile-fail/enums-pats-not-idents.rs
+++ b/src/test/compile-fail/enums-pats-not-idents.rs
+// Copyright 2012 The Rust Project Developers. See the COPYRIGHT
+// file at the top-level directory of this distribution and at
+// http://rust-lang.org/COPYRIGHT.
+//
+// Licensed under the Apache License, Version 2.0 <LICENSE-APACHE or
+// http://www.apache.org/licenses/LICENSE-2.0> or the MIT license
+// <LICENSE-MIT or http://opensource.org/licenses/MIT>, at your
+// option. This file may not be copied, modified, or distributed
+// except according to those terms.
+
+//error-pattern:unresolved enum variant
+
+fn main() {
+    // a bug in the parser is allowing this:
+    let a() = 13;
+}