rollup merge of #20330: fhahn/issue-15877-model-lexer-range-2

This patch resolves the second problem mentioned in #15877: function calls to integers, e.g. `3.b()`. It does so, by checking whether the character following the first dot of a FLOAT_LIT is a character or an underscore (these should denote a valid identifier). This does not look like a particularly, but it seems like a lookahead of 1 is needed for this distinction. Another interesting aspect are ranges that start with a integer constant, but end with a function call, e.g. `1..b()`. Rust treats this as a range from 1 to `b()`, but given that `1.` is a valid FLOAT_LIT, `1..b()` could be a function call to a float as well. cc @cmr

rollup merge of #20330: fhahn/issue-15877-model-lexer-range-2
This patch resolves the second problem mentioned in #15877: function calls to integers, e.g. `3.b()`. It does so, by checking whether the character following the first dot of a FLOAT_LIT is a character or an underscore (these should denote a valid identifier). This does not look like a particularly, but it seems like a lookahead of 1 is needed for this distinction. Another interesting aspect are ranges that start with a integer constant, but end with a function call, e.g. `1..b()`. Rust treats this as a range from 1 to `b()`, but given that `1.` is a valid FLOAT_LIT, `1..b()` could be a function call to a float as well. cc @cmr
fa130ae6 · Alex Crichton · ecf48fb4 · 1e278c1c · fa130ae6 · fa130ae6
隐藏空白更改
内联并排

Showing with 77 addition and 4 deletion

src/grammar/README.md src/grammar/README.md +2 -2

src/grammar/RustLexer.g4 src/grammar/RustLexer.g4 +58 -2

src/grammar/check.sh src/grammar/check.sh +17 -0

未找到文件。
--- a/src/grammar/README.md
+++ b/src/grammar/README.md
 Reference grammar.

 Uses [antlr4](http://www.antlr.org/) and a custom Rust tool to compare
-ASTs/token streams generated. You can use the `check-syntax` make target to
+ASTs/token streams generated. You can use the `check-lexer` make target to
 run all of the available tests.

 To use manually:
@@ -12,7 +12,7 @@ javac *.java
 rustc -O verify.rs
 for file in ../*/**.rs; do
    echo $file;
-    grun RustLexer tokens -tokens < $file | ./verify $file || break
+    grun RustLexer tokens -tokens < $file | ./verify $file RustLexer.tokens || break
 done
 ```


--- a/src/grammar/RustLexer.g4
+++ b/src/grammar/RustLexer.g4
@@ -112,8 +112,64 @@ LIT_INTEGER
  ;

 LIT_FLOAT
-  : [0-9][0-9_]* ( '.' {_input.LA(1) != '.'}?
-                 | ('.' [0-9][0-9_]*)? ([eE] [-+]? [0-9][0-9_]*)? SUFFIX?)
+  : [0-9][0-9_]* ('.' {
+        /* dot followed by another dot is a range, no float */
+        _input.LA(1) != '.' &&
+        /* dot followed by an identifier is an integer with a function call, no float */
+        _input.LA(1) != '_' &&
+        _input.LA(1) != 'a' &&
+        _input.LA(1) != 'b' &&
+        _input.LA(1) != 'c' &&
+        _input.LA(1) != 'd' &&
+        _input.LA(1) != 'e' &&
+        _input.LA(1) != 'f' &&
+        _input.LA(1) != 'g' &&
+        _input.LA(1) != 'h' &&
+        _input.LA(1) != 'i' &&
+        _input.LA(1) != 'j' &&
+        _input.LA(1) != 'k' &&
+        _input.LA(1) != 'l' &&
+        _input.LA(1) != 'm' &&
+        _input.LA(1) != 'n' &&
+        _input.LA(1) != 'o' &&
+        _input.LA(1) != 'p' &&
+        _input.LA(1) != 'q' &&
+        _input.LA(1) != 'r' &&
+        _input.LA(1) != 's' &&
+        _input.LA(1) != 't' &&
+        _input.LA(1) != 'u' &&
+        _input.LA(1) != 'v' &&
+        _input.LA(1) != 'w' &&
+        _input.LA(1) != 'x' &&
+        _input.LA(1) != 'y' &&
+        _input.LA(1) != 'z' &&
+        _input.LA(1) != 'A' &&
+        _input.LA(1) != 'B' &&
+        _input.LA(1) != 'C' &&
+        _input.LA(1) != 'D' &&
+        _input.LA(1) != 'E' &&
+        _input.LA(1) != 'F' &&
+        _input.LA(1) != 'G' &&
+        _input.LA(1) != 'H' &&
+        _input.LA(1) != 'I' &&
+        _input.LA(1) != 'J' &&
+        _input.LA(1) != 'K' &&
+        _input.LA(1) != 'L' &&
+        _input.LA(1) != 'M' &&
+        _input.LA(1) != 'N' &&
+        _input.LA(1) != 'O' &&
+        _input.LA(1) != 'P' &&
+        _input.LA(1) != 'Q' &&
+        _input.LA(1) != 'R' &&
+        _input.LA(1) != 'S' &&
+        _input.LA(1) != 'T' &&
+        _input.LA(1) != 'U' &&
+        _input.LA(1) != 'V' &&
+        _input.LA(1) != 'W' &&
+        _input.LA(1) != 'X' &&
+        _input.LA(1) != 'Y' &&
+        _input.LA(1) != 'Z'
+  }? | ('.' [0-9][0-9_]*)? ([eE] [-+]? [0-9][0-9_]*)? SUFFIX?)
  ;

 LIT_STR

--- a/src/grammar/check.sh
+++ b/src/grammar/check.sh
@@ -11,6 +11,10 @@ if [ "${VERBOSE}" == "1" ]; then
    set -x
 fi

+passed=0
+failed=0
+skipped=0
+
 check() {
    grep --silent "// ignore-lexer-test" $1;

@@ -21,14 +25,27 @@ check() {
        # seem to have anny effect.
        if $3 RustLexer tokens -tokens < $1 | $4 $1 $5; then
            echo "pass: $1"
+            passed=`expr $passed + 1`
        else
            echo "fail: $1"
+            failed=`expr $failed + 1`
        fi
    else
        echo "skip: $1"
+        skipped=`expr $skipped + 1`
    fi
 }

 for file in $(find $1 -iname '*.rs' ! -path '*/test/compile-fail*'); do
    check $file $2 $3 $4 $5
 done
+
+printf "\ntest result: "
+
+if [ $failed -eq 0 ]; then
+    printf "ok. $passed passed; $failed failed; $skipped skipped\n\n"
+else
+    printf "failed. $passed passed; $failed failed; $skipped skipped\n\n"
+    exit 1
+fi
+