提交 ba2c9571 编写于 作者: H hannesw

8030202: Nashorn: Multiple RegExp#ignoreCase issues

Reviewed-by: sundar, jlaskey
上级 5c466287
...@@ -771,7 +771,7 @@ final class Analyser extends Parser { ...@@ -771,7 +771,7 @@ final class Analyser extends Parser {
while (value < end) { while (value < end) {
int ovalue = value; int ovalue = value;
buf = Character.toLowerCase(chars[value++]); buf = EncodingHelper.toLowerCase(chars[value++]);
if (chars[ovalue] != buf) { if (chars[ovalue] != buf) {
...@@ -779,7 +779,7 @@ final class Analyser extends Parser { ...@@ -779,7 +779,7 @@ final class Analyser extends Parser {
System.arraycopy(chars, sn.p, sbuf, 0, ovalue - sn.p); System.arraycopy(chars, sn.p, sbuf, 0, ovalue - sn.p);
value = ovalue; value = ovalue;
while (value < end) { while (value < end) {
buf = Character.toLowerCase(chars[value++]); buf = EncodingHelper.toLowerCase(chars[value++]);
if (sp >= sbuf.length) { if (sp >= sbuf.length) {
char[]tmp = new char[sbuf.length << 1]; char[]tmp = new char[sbuf.length << 1];
System.arraycopy(sbuf, 0, tmp, 0, sbuf.length); System.arraycopy(sbuf, 0, tmp, 0, sbuf.length);
......
...@@ -20,71 +20,43 @@ ...@@ -20,71 +20,43 @@
package jdk.nashorn.internal.runtime.regexp.joni; package jdk.nashorn.internal.runtime.regexp.joni;
import jdk.nashorn.internal.runtime.regexp.joni.ast.CClassNode; import jdk.nashorn.internal.runtime.regexp.joni.ast.CClassNode;
import jdk.nashorn.internal.runtime.regexp.joni.ast.ConsAltNode;
import jdk.nashorn.internal.runtime.regexp.joni.ast.StringNode;
final class ApplyCaseFold { final class ApplyCaseFold {
// i_apply_case_fold // i_apply_case_fold
public void apply(int from, int[]to, int length, Object o) { public void apply(int from, int to, Object o) {
ApplyCaseFoldArg arg = (ApplyCaseFoldArg)o; ApplyCaseFoldArg arg = (ApplyCaseFoldArg)o;
ScanEnvironment env = arg.env; ScanEnvironment env = arg.env;
CClassNode cc = arg.cc; CClassNode cc = arg.cc;
BitSet bs = cc.bs; BitSet bs = cc.bs;
if (length == 1) {
boolean inCC = cc.isCodeInCC(from); boolean inCC = cc.isCodeInCC(from);
if (Config.CASE_FOLD_IS_APPLIED_INSIDE_NEGATIVE_CCLASS) { if (Config.CASE_FOLD_IS_APPLIED_INSIDE_NEGATIVE_CCLASS) {
if ((inCC && !cc.isNot()) || (!inCC && cc.isNot())) { if ((inCC && !cc.isNot()) || (!inCC && cc.isNot())) {
if (to[0] >= BitSet.SINGLE_BYTE_SIZE) { if (to >= BitSet.SINGLE_BYTE_SIZE) {
cc.addCodeRange(env, to[0], to[0]); cc.addCodeRange(env, to, to);
} else { } else {
/* /(?i:[^A-C])/.match("a") ==> fail. */ /* /(?i:[^A-C])/.match("a") ==> fail. */
bs.set(to[0]); bs.set(to);
} }
} }
} else { } else {
if (inCC) { if (inCC) {
if (to[0] >= BitSet.SINGLE_BYTE_SIZE) { if (to >= BitSet.SINGLE_BYTE_SIZE) {
if (cc.isNot()) cc.clearNotFlag(); if (cc.isNot()) cc.clearNotFlag();
cc.addCodeRange(env, to[0], to[0]); cc.addCodeRange(env, to, to);
} else { } else {
if (cc.isNot()) { if (cc.isNot()) {
bs.clear(to[0]); bs.clear(to);
} else { } else {
bs.set(to[0]); bs.set(to);
} }
} }
} }
} // CASE_FOLD_IS_APPLIED_INSIDE_NEGATIVE_CCLASS } // CASE_FOLD_IS_APPLIED_INSIDE_NEGATIVE_CCLASS
} else {
if (cc.isCodeInCC(from) && (!Config.CASE_FOLD_IS_APPLIED_INSIDE_NEGATIVE_CCLASS || !cc.isNot())) {
StringNode node = null;
for (int i=0; i<length; i++) {
if (i == 0) {
node = new StringNode();
/* char-class expanded multi-char only
compare with string folded at match time. */
node.setAmbig();
}
node.catCode(to[i]);
}
ConsAltNode alt = ConsAltNode.newAltNode(node, null);
if (arg.tail == null) {
arg.altRoot = alt;
} else {
arg.tail.setCdr(alt);
}
arg.tail = alt;
}
}
} }
static final ApplyCaseFold INSTANCE = new ApplyCaseFold(); static final ApplyCaseFold INSTANCE = new ApplyCaseFold();
......
...@@ -58,8 +58,8 @@ class ByteCodeMachine extends StackMachine { ...@@ -58,8 +58,8 @@ class ByteCodeMachine extends StackMachine {
int end1 = s1 + mbLen; int end1 = s1 + mbLen;
while (s1 < end1) { while (s1 < end1) {
char c1 = Character.toLowerCase(chars[s1++]); char c1 = EncodingHelper.toLowerCase(chars[s1++]);
char c2 = Character.toLowerCase(chars[s2++]); char c2 = EncodingHelper.toLowerCase(chars[s2++]);
if (c1 != c2) { if (c1 != c2) {
return false; return false;
...@@ -367,7 +367,7 @@ class ByteCodeMachine extends StackMachine { ...@@ -367,7 +367,7 @@ class ByteCodeMachine extends StackMachine {
} }
private void opExact1IC() { private void opExact1IC() {
if (s >= range || code[ip] != Character.toLowerCase(chars[s++])) {opFail(); return;} if (s >= range || code[ip] != EncodingHelper.toLowerCase(chars[s++])) {opFail(); return;}
ip++; ip++;
sprev = sbegin; // break; sprev = sbegin; // break;
} }
...@@ -380,10 +380,10 @@ class ByteCodeMachine extends StackMachine { ...@@ -380,10 +380,10 @@ class ByteCodeMachine extends StackMachine {
char[] bs = regex.templates[code[ip++]]; char[] bs = regex.templates[code[ip++]];
int ps = code[ip++]; int ps = code[ip++];
while (tlen-- > 0) if (bs[ps++] != Character.toLowerCase(chars[s++])) {opFail(); return;} while (tlen-- > 0) if (bs[ps++] != EncodingHelper.toLowerCase(chars[s++])) {opFail(); return;}
} else { } else {
while (tlen-- > 0) if (code[ip++] != Character.toLowerCase(chars[s++])) {opFail(); return;} while (tlen-- > 0) if (code[ip++] != EncodingHelper.toLowerCase(chars[s++])) {opFail(); return;}
} }
sprev = s - 1; sprev = s - 1;
} }
......
...@@ -93,41 +93,78 @@ public final class EncodingHelper { ...@@ -93,41 +93,78 @@ public final class EncodingHelper {
return s; return s;
} }
public static int mbcToCode(byte[] bytes, int p, int end) {
int code = 0;
for (int i = p; i < end; i++) {
code = (code << 8) | (bytes[i] & 0xff);
}
return code;
}
public static int mbcodeStartPosition() { public static int mbcodeStartPosition() {
return 0x80; return 0x80;
} }
public static char[] caseFoldCodesByString(int flag, char c) { public static char[] caseFoldCodesByString(int flag, char c) {
if (Character.isUpperCase(c)) { char[] codes = EMPTYCHARS;
return new char[] {Character.toLowerCase(c)}; final char upper = toUpperCase(c);
} else if (Character.isLowerCase(c)) {
return new char[] {Character.toUpperCase(c)}; if (upper != toLowerCase(upper)) {
} else { int count = 0;
return EMPTYCHARS; char ch = 0;
do {
final char u = toUpperCase(ch);
if (u == upper && ch != c) {
// Almost all characters will return array of length 1, very few 2 or 3, so growing by one is fine.
codes = count == 0 ? new char[1] : Arrays.copyOf(codes, count + 1);
codes[count++] = ch;
} }
} while (ch++ < 0xffff);
}
return codes;
} }
public static void applyAllCaseFold(int flag, ApplyCaseFold fun, Object arg) { public static void applyAllCaseFold(int flag, ApplyCaseFold fun, Object arg) {
int[] code = new int[1]; for (int c = 0; c < 0xffff; c++) {
if (Character.isLowerCase(c)) {
final int upper = toUpperCase(c);
if (upper != c) {
fun.apply(c, upper, arg);
}
}
}
// Some characters have multiple lower case variants, hence we need to do a second run
for (int c = 0; c < 0xffff; c++) { for (int c = 0; c < 0xffff; c++) {
if (Character.getType(c) == Character.LOWERCASE_LETTER) { if (Character.isLowerCase(c)) {
final int upper = toUpperCase(c);
int upper = code[0] = Character.toUpperCase(c); if (upper != c) {
fun.apply(c, code, 1, arg); fun.apply(upper, c, arg);
}
}
}
}
public static char toLowerCase(char c) {
return (char)toLowerCase((int)c);
}
code[0] = c; public static int toLowerCase(int c) {
fun.apply(upper, code, 1, arg); if (c < 128) {
return ('A' <= c && c <= 'Z') ? (c + ('a' - 'A')) : c;
} }
// Do not convert non-ASCII upper case character to ASCII lower case.
int lower = Character.toLowerCase(c);
return (lower < 128) ? c : lower;
}
public static char toUpperCase(char c) {
return (char)toUpperCase((int)c);
}
public static int toUpperCase(int c) {
if (c < 128) {
return ('a' <= c && c <= 'z') ? c + ('A' - 'a') : c;
} }
// Do not convert non-ASCII lower case character to ASCII upper case.
int upper = Character.toUpperCase(c);
return (upper < 128) ? c : upper;
} }
public static int[] ctypeCodeRange(int ctype, IntHolder sbOut) { public static int[] ctypeCodeRange(int ctype, IntHolder sbOut) {
......
...@@ -168,7 +168,7 @@ public abstract class SearchAlgorithm { ...@@ -168,7 +168,7 @@ public abstract class SearchAlgorithm {
char[] chars, int p, int end) { char[] chars, int p, int end) {
while (tP < tEnd) { while (tP < tEnd) {
if (t[tP++] != Character.toLowerCase(chars[p++])) return false; if (t[tP++] != EncodingHelper.toLowerCase(chars[p++])) return false;
} }
return true; return true;
} }
......
/*
* Copyright (c) 2010, 2014, Oracle and/or its affiliates. All rights reserved.
* DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
*
* This code is free software; you can redistribute it and/or modify it
* under the terms of the GNU General Public License version 2 only, as
* published by the Free Software Foundation.
*
* This code is distributed in the hope that it will be useful, but WITHOUT
* ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
* FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
* version 2 for more details (a copy is included in the LICENSE file that
* accompanied this code).
*
* You should have received a copy of the GNU General Public License version
* 2 along with this work; if not, write to the Free Software Foundation,
* Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
*
* Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
* or visit www.oracle.com if you need additional information or have any
* questions.
*/
/**
* JDK-8030202: Nashorn: Multiple RegExp#ignoreCase issues
*
* @test
* @run
*/
print(/\u2160/i.test("\u2170"));
print(/[\u2160]/i.test("\u2170"));
print(/\u2170/i.test("\u2160"));
print(/[\u2170]/i.test("\u2160"));
print(/\u0130/i.test("\u0069"));
print(/[\u0130]/i.test("\u0069"));
print(/\u0069/i.test("\u0130"));
print(/[\u0069]/i.test("\u0130"));
print(/\u1e9e/i.test("\u00df"));
print(/[\u1e9e]/i.test("\u00df"));
print(/\u00df/i.test("\u1e9e"));
print(/[\u00df]/i.test("\u1e9e"));
print(/[^\u1e9e]/i.test("\u00df"));
print(/[^\u00df]/i.test("\u1e9e"));
print(/\u0345{4}/i.test("\u0345\u0399\u03b9\u1fbe"));
print(/\u0399{4}/i.test("\u0345\u0399\u03b9\u1fbe"));
print(/\u03b9{4}/i.test("\u0345\u0399\u03b9\u1fbe"));
print(/\u1fbe{4}/i.test("\u0345\u0399\u03b9\u1fbe"));
print(/[\u0345]{4}/i.test("\u0345\u0399\u03b9\u1fbe"));
print(/[\u0399]{4}/i.test("\u0345\u0399\u03b9\u1fbe"));
print(/[\u03b9]{4}/i.test("\u0345\u0399\u03b9\u1fbe"));
print(/[\u1fbe]{4}/i.test("\u0345\u0399\u03b9\u1fbe"));
true
true
true
true
false
false
false
false
false
false
false
false
true
true
true
true
true
true
true
true
true
true
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册