提交 1a0a4b1c 编写于 作者: H hannesw

8016681: regex capture behaves differently than on V8

Reviewed-by: lagergren, sundar
上级 c1624b82
......@@ -57,7 +57,10 @@ final class RegExpScanner extends Scanner {
private final LinkedList<Integer> forwardReferences = new LinkedList<>();
/** Current level of zero-width negative lookahead assertions. */
private int negativeLookaheadLevel;
private int negLookaheadLevel;
/** Sequential id of current top-level zero-width negative lookahead assertion. */
private int negLookaheadGroup;
/** Are we currently inside a character class? */
private boolean inCharClass = false;
......@@ -68,17 +71,18 @@ final class RegExpScanner extends Scanner {
private static final String NON_IDENT_ESCAPES = "$^*+(){}[]|\\.?-";
private static class Capture {
/**
* Zero-width negative lookaheads enclosing the capture.
*/
private final int negativeLookaheadLevel;
/** Zero-width negative lookaheads enclosing the capture. */
private final int negLookaheadLevel;
/** Sequential id of top-level negative lookaheads containing the capture. */
private final int negLookaheadGroup;
Capture(final int negativeLookaheadLevel) {
this.negativeLookaheadLevel = negativeLookaheadLevel;
Capture(final int negLookaheadGroup, final int negLookaheadLevel) {
this.negLookaheadGroup = negLookaheadGroup;
this.negLookaheadLevel = negLookaheadLevel;
}
public int getNegativeLookaheadLevel() {
return negativeLookaheadLevel;
boolean isContained(final int group, final int level) {
return group == this.negLookaheadGroup && level >= this.negLookaheadLevel;
}
}
......@@ -152,7 +156,7 @@ final class RegExpScanner extends Scanner {
BitVector vec = null;
for (int i = 0; i < caps.size(); i++) {
final Capture cap = caps.get(i);
if (cap.getNegativeLookaheadLevel() > 0) {
if (cap.negLookaheadLevel > 0) {
if (vec == null) {
vec = new BitVector(caps.size() + 1);
}
......@@ -311,11 +315,14 @@ final class RegExpScanner extends Scanner {
commit(3);
if (isNegativeLookahead) {
negativeLookaheadLevel++;
if (negLookaheadLevel == 0) {
negLookaheadGroup++;
}
negLookaheadLevel++;
}
disjunction();
if (isNegativeLookahead) {
negativeLookaheadLevel--;
negLookaheadLevel--;
}
if (ch0 == ')') {
......@@ -432,20 +439,17 @@ final class RegExpScanner extends Scanner {
}
if (ch0 == '(') {
boolean capturingParens = true;
commit(1);
if (ch0 == '?' && ch1 == ':') {
capturingParens = false;
commit(2);
} else {
caps.add(new Capture(negLookaheadGroup, negLookaheadLevel));
}
disjunction();
if (ch0 == ')') {
commit(1);
if (capturingParens) {
caps.add(new Capture(negativeLookaheadLevel));
}
return true;
}
}
......@@ -675,24 +679,22 @@ final class RegExpScanner extends Scanner {
sb.setLength(sb.length() - 1);
octalOrLiteral(Integer.toString(decimalValue), sb);
} else if (decimalValue <= caps.size() && caps.get(decimalValue - 1).getNegativeLookaheadLevel() > 0) {
// Captures that live inside a negative lookahead are dead after the
// lookahead and will be undefined if referenced from outside.
if (caps.get(decimalValue - 1).getNegativeLookaheadLevel() > negativeLookaheadLevel) {
} else if (decimalValue <= caps.size()) {
// Captures inside a negative lookahead are undefined when referenced from the outside.
if (!caps.get(decimalValue - 1).isContained(negLookaheadGroup, negLookaheadLevel)) {
// Reference to capture in negative lookahead, omit from output buffer.
sb.setLength(sb.length() - 1);
} else {
// Append backreference to output buffer.
sb.append(decimalValue);
}
} else if (decimalValue > caps.size()) {
// Forward reference to a capture group. Forward references are always undefined so we can omit
// it from the output buffer. However, if the target capture does not exist, we need to rewrite
// the reference as hex escape or literal string, so register the reference for later processing.
} else {
// Forward references to a capture group are always undefined so we can omit it from the output buffer.
// However, if the target capture does not exist, we need to rewrite the reference as hex escape
// or literal string, so register the reference for later processing.
sb.setLength(sb.length() - 1);
forwardReferences.add(decimalValue);
forwardReferences.add(sb.length());
} else {
// Append as backreference
sb.append(decimalValue);
}
}
......
/*
* Copyright (c) 2010, 2013, Oracle and/or its affiliates. All rights reserved.
* DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
*
* This code is free software; you can redistribute it and/or modify it
* under the terms of the GNU General Public License version 2 only, as
* published by the Free Software Foundation.
*
* This code is distributed in the hope that it will be useful, but WITHOUT
* ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
* FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
* version 2 for more details (a copy is included in the LICENSE file that
* accompanied this code).
*
* You should have received a copy of the GNU General Public License version
* 2 along with this work; if not, write to the Free Software Foundation,
* Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
*
* Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
* or visit www.oracle.com if you need additional information or have any
* questions.
*/
/**
* JDK-8016681: regex capture behaves differently than on V8
*
* @test
* @run
*/
// regexp similar to the one used in marked.js
/^((?:[^\n]+\n?(?!( *[-*_]){3,} *(?:\n+|$)| *(#{1,6}) *([^\n]+?) *#* *(?:\n+|$)|([^\n]+)\n *(=|-){3,} *\n*))+)\n*/
.exec("a\n\nb")
.forEach(function(e) { print(e); });
// simplified regexp
/(x(?!(a))(?!(b))y)/
.exec("xy")
.forEach(function(e) { print(e); });
// should not match as cross-negative-lookeahead backreference \2 should be undefined
print(/(x(?!(a))(?!(b)\2))/.exec("xbc"));
a
a
undefined
undefined
undefined
undefined
undefined
xy
xy
undefined
undefined
null
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册