提交 25dc9282 编写于 作者: H hannesw

8009230: Nashorn rejects extended RegExp syntax accepted by all major JS engines

Reviewed-by: jlaskey, lagergren
上级 cda2da02
......@@ -26,11 +26,10 @@
package jdk.nashorn.internal.runtime.regexp;
import java.util.HashMap;
import java.util.LinkedHashSet;
import java.util.Iterator;
import java.util.LinkedList;
import java.util.List;
import java.util.Map;
import java.util.Set;
import java.util.regex.PatternSyntaxException;
import jdk.nashorn.internal.parser.Lexer;
......@@ -58,7 +57,7 @@ final class RegExpScanner extends Scanner {
private final List<Capture> caps = new LinkedList<>();
/** Forward references to capturing parenthesis to be resolved later.*/
private final Set<Integer> forwardReferences = new LinkedHashSet<>();
private final LinkedList<Integer> forwardReferences = new LinkedList<>();
/** Current level of zero-width negative lookahead assertions. */
private int negativeLookaheadLevel;
......@@ -104,10 +103,20 @@ final class RegExpScanner extends Scanner {
return;
}
for (final Integer ref : forwardReferences) {
if (ref.intValue() > caps.size()) {
neverMatches = true;
break;
Iterator<Integer> iterator = forwardReferences.descendingIterator();
while (iterator.hasNext()) {
final int pos = iterator.next();
final int num = iterator.next();
if (num > caps.size()) {
// Non-existing reference should never match, if smaller than 8 convert to octal escape
// to be compatible with other engines.
if (num < 8) {
String escape = "\\x0" + num;
sb.insert(pos, escape);
} else {
neverMatches = true;
break;
}
}
}
......@@ -402,6 +411,10 @@ final class RegExpScanner extends Scanner {
if (ch0 == '}') {
pop('}');
commit(1);
} else {
// Bad quantifier should be rejected but is accepted by all major engines
restart(startIn, startOut);
return false;
}
return true;
......@@ -637,7 +650,16 @@ final class RegExpScanner extends Scanner {
throw new RuntimeException("\\ at end of pattern"); // will be converted to PatternSyntaxException
}
// ES 5.1 A.7 requires "not IdentifierPart" here but all major engines accept any character here.
if (NON_IDENT_ESCAPES.indexOf(ch0) == -1) {
if (ch0 == 'c') {
// Ignore invalid control letter escape if within a character class
if (inCharClass && ch1 != ']') {
sb.setLength(sb.length() - 1);
skip(2);
return true;
} else {
sb.append('\\'); // Treat invalid \c control sequence as \\c
}
} else if (NON_IDENT_ESCAPES.indexOf(ch0) == -1) {
sb.setLength(sb.length() - 1);
}
return commit(1);
......@@ -677,8 +699,9 @@ final class RegExpScanner extends Scanner {
// Forward reference to a capture group. Forward references are always undefined so we
// can omit it from the output buffer. Additionally, if the capture group does not exist
// the whole regexp becomes invalid, so register the reference for later processing.
forwardReferences.add(num);
sb.setLength(sb.length() - 1);
forwardReferences.add(num);
forwardReferences.add(sb.length());
skip(1);
return true;
}
......
/*
* Copyright (c) 2010, 2013, Oracle and/or its affiliates. All rights reserved.
* DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
*
* This code is free software; you can redistribute it and/or modify it
* under the terms of the GNU General Public License version 2 only, as
* published by the Free Software Foundation.
*
* This code is distributed in the hope that it will be useful, but WITHOUT
* ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
* FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
* version 2 for more details (a copy is included in the LICENSE file that
* accompanied this code).
*
* You should have received a copy of the GNU General Public License version
* 2 along with this work; if not, write to the Free Software Foundation,
* Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
*
* Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
* or visit www.oracle.com if you need additional information or have any
* questions.
*/
/**
* JDK-8009230: Nashorn rejects extended RegExp syntax accepted by all major JS engines
*
* @test
* @run
*/
// Invalid ControlEscape/IdentityEscape character treated as literal.
print(/\z/.exec("z")); // Invalid escape, same as /z/
// Incomplete/Invalid ControlEscape treated as "\\c"
print(/\c/.exec("\\c")); // same as /\\c/
print(/\c2/.exec("\\c2")); // same as /\\c2/
print(/\C/.exec("C")); // same as /C/
print(/\C2/.exec("C2")); // same as /C2/
// Incomplete HexEscapeSequence escape treated as "x".
print(/\x/.exec("x")); // incomplete x-escape
print(/\x1/.exec("x1")); // incomplete x-escape
print(/\x1z/.exec("x1z")); // incomplete x-escape
// Incomplete UnicodeEscapeSequence escape treated as "u".
print(/\u/.exec("u")); // incomplete u-escape
print(/\uz/.exec("uz")); // incomplete u-escape
print(/\u1/.exec("u1")); // incomplete u-escape
print(/\u1z/.exec("u1z")); // incomplete u-escape
print(/\u12/.exec("u12")); // incomplete u-escape
print(/\u12z/.exec("u12z")); // incomplete u-escape
print(/\u123/.exec("u123")); // incomplete u-escape
print(/\u123z/.exec("u123z")); // incomplete u-escape
// Bad quantifier range:
print(/x{z/.exec("x{z")); // same as /x\{z/
print(/x{1z/.exec("x{1z")); // same as /x\{1z/
print(/x{1,z/.exec("x{1,z")); // same as /x\{1,z/
print(/x{1,2z/.exec("x{1,2z")); // same as /x\{1,2z/
print(/x{10000,20000z/.exec("x{10000,20000z")); // same as /x\{10000,20000z/
// Notice: It needs arbitrary lookahead to determine the invalidity,
// except Mozilla that limits the numbers.
// Zero-initialized Octal escapes.
/\012/; // same as /\x0a/
// Nonexisting back-references smaller than 8 treated as octal escapes:
print(/\5/.exec("\u0005")); // same as /\x05/
print(/\7/.exec("\u0007")); // same as /\x07/
print(/\8/.exec("\u0008")); // does not match
// Invalid PatternCharacter accepted unescaped
print(/]/.exec("]"));
print(/{/.exec("{"));
print(/}/.exec("}"));
// Bad escapes also inside CharacterClass.
print(/[\z]/.exec("z"));
print(/[\c]/.exec("c"));
print(/[\c2]/.exec("c"));
print(/[\x]/.exec("x"));
print(/[\x1]/.exec("x1"));
print(/[\x1z]/.exec("x1z"));
print(/[\u]/.exec("u"));
print(/[\uz]/.exec("u"));
print(/[\u1]/.exec("u"));
print(/[\u1z]/.exec("u"));
print(/[\u12]/.exec("u"));
print(/[\u12z]/.exec("u"));
print(/[\u123]/.exec("u"));
print(/[\u123z]/.exec("u"));
print(/[\012]/.exec("0"));
print(/[\5]/.exec("5"));
// And in addition:
print(/[\B]/.exec("B"));
print(/()()[\2]/.exec("")); // Valid backreference should be invalid.
z
\c
\c2
C
C2
x
x1
x1z
u
uz
u1
u1z
u12
u12z
u123
u123z
x{z
x{1z
x{1,z
x{1,2z
x{10000,20000z


null
]
{
}
z
c
null
x
x
x
u
u
u
u
u
u
u
u
null
null
B
null
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册