提交 2fc1a820 编写于 作者: O okutsu

6644493: [Fmt-Da] SimpleDateFormat parsing RFC822 time offset is slow

Reviewed-by: peytoia
上级 53b02057
...@@ -1662,6 +1662,81 @@ public class SimpleDateFormat extends DateFormat { ...@@ -1662,6 +1662,81 @@ public class SimpleDateFormat extends DateFormat {
return 0; return 0;
} }
/**
* Parses numeric forms of time zone offset, such as "hh:mm", and
* sets calb to the parsed value.
*
* @param text the text to be parsed
* @param start the character position to start parsing
* @param sign 1: positive; -1: negative
* @param count 0: 'Z' or "GMT+hh:mm" parsing; 1 - 3: the number of 'X's
* @param colon true - colon required between hh and mm; false - no colon required
* @param calb a CalendarBuilder in which the parsed value is stored
* @return updated parsed position, or its negative value to indicate a parsing error
*/
private int subParseNumericZone(String text, int start, int sign, int count,
boolean colon, CalendarBuilder calb) {
int index = start;
parse:
try {
char c = text.charAt(index++);
// Parse hh
int hours;
if (!isDigit(c)) {
break parse;
}
hours = c - '0';
c = text.charAt(index++);
if (isDigit(c)) {
hours = hours * 10 + (c - '0');
} else {
// If no colon in RFC 822 or 'X' (ISO), two digits are
// required.
if (count > 0 || !colon) {
break parse;
}
--index;
}
if (hours > 23) {
break parse;
}
int minutes = 0;
if (count != 1) {
// Proceed with parsing mm
c = text.charAt(index++);
if (colon) {
if (c != ':') {
break parse;
}
c = text.charAt(index++);
}
if (!isDigit(c)) {
break parse;
}
minutes = c - '0';
c = text.charAt(index++);
if (!isDigit(c)) {
break parse;
}
minutes = minutes * 10 + (c - '0');
if (minutes > 59) {
break parse;
}
}
minutes += hours * 60;
calb.set(Calendar.ZONE_OFFSET, minutes * MILLIS_PER_MINUTE * sign)
.set(Calendar.DST_OFFSET, 0);
return index;
} catch (IndexOutOfBoundsException e) {
}
return 1 - index; // -(index - 1)
}
private boolean isDigit(char c) {
return c >= '0' && c <= '9';
}
/** /**
* Private member function that converts the parsed date strings into * Private member function that converts the parsed date strings into
* timeFields. Returns -start (for ParsePosition) if failed. * timeFields. Returns -start (for ParsePosition) if failed.
...@@ -1907,170 +1982,74 @@ public class SimpleDateFormat extends DateFormat { ...@@ -1907,170 +1982,74 @@ public class SimpleDateFormat extends DateFormat {
case PATTERN_ZONE_NAME: // 'z' case PATTERN_ZONE_NAME: // 'z'
case PATTERN_ZONE_VALUE: // 'Z' case PATTERN_ZONE_VALUE: // 'Z'
// First try to parse generic forms such as GMT-07:00. Do this first
// in case localized TimeZoneNames contains the string "GMT"
// for a zone; in that case, we don't want to match the first three
// characters of GMT+/-hh:mm etc.
{ {
int sign = 0; int sign = 0;
int offset; try {
// For time zones that have no known names, look for strings
// of the form:
// GMT[+-]hours:minutes or
// GMT.
if ((text.length() - start) >= GMT.length() &&
text.regionMatches(true, start, GMT, 0, GMT.length())) {
int num;
calb.set(Calendar.DST_OFFSET, 0);
pos.index = start + GMT.length();
try { // try-catch for "GMT" only time zone string
char c = text.charAt(pos.index); char c = text.charAt(pos.index);
if (c == '+') { if (c == '+') {
sign = 1; sign = 1;
} else if (c == '-') { } else if (c == '-') {
sign = -1; sign = -1;
} }
} if (sign == 0) {
catch(StringIndexOutOfBoundsException e) {} // Try parsing a custom time zone "GMT+hh:mm" or "GMT".
if ((c == 'G' || c == 'g')
if (sign == 0) { /* "GMT" without offset */ && (text.length() - start) >= GMT.length()
calb.set(Calendar.ZONE_OFFSET, 0); && text.regionMatches(true, start, GMT, 0, GMT.length())) {
return pos.index; pos.index = start + GMT.length();
}
// Look for hours.
try {
char c = text.charAt(++pos.index);
if (c < '0' || c > '9') { /* must be from '0' to '9'. */
break parsing;
}
num = c - '0';
if (text.charAt(++pos.index) != ':') { if ((text.length() - pos.index) > 0) {
c = text.charAt(pos.index); c = text.charAt(pos.index);
if (c < '0' || c > '9') { /* must be from '0' to '9'. */
break parsing;
}
num *= 10;
num += c - '0';
pos.index++;
}
if (num > 23) {
--pos.index;
break parsing;
}
if (text.charAt(pos.index) != ':') {
break parsing;
}
// Look for minutes.
offset = num * 60;
c = text.charAt(++pos.index);
if (c < '0' || c > '9') { /* must be from '0' to '9'. */
break parsing;
}
num = c - '0';
c = text.charAt(++pos.index);
if (c < '0' || c > '9') { /* must be from '0' to '9'. */
break parsing;
}
num *= 10;
num += c - '0';
if (num > 59) {
break parsing;
}
} catch (StringIndexOutOfBoundsException e) {
break parsing;
}
offset += num;
// Fall through for final processing below of 'offset' and 'sign'.
} else {
// If the first character is a sign, look for numeric timezones of
// the form [+-]hhmm as specified by RFC 822. Otherwise, check
// for named time zones by looking through the locale data from
// the TimeZoneNames strings.
try {
char c = text.charAt(pos.index);
if (c == '+') { if (c == '+') {
sign = 1; sign = 1;
} else if (c == '-') { } else if (c == '-') {
sign = -1; sign = -1;
} else {
// Try parsing the text as a time zone name (abbr).
int i = subParseZoneString(text, pos.index, calb);
if (i != 0) {
return i;
}
break parsing;
}
// Parse the text as an RFC 822 time zone string. This code is
// actually a little more permissive than RFC 822. It will
// try to do its best with numbers that aren't strictly 4
// digits long.
// Look for hh.
int hours = 0;
c = text.charAt(++pos.index);
if (c < '0' || c > '9') { /* must be from '0' to '9'. */
break parsing;
} }
hours = c - '0';
c = text.charAt(++pos.index);
if (c < '0' || c > '9') { /* must be from '0' to '9'. */
break parsing;
} }
hours *= 10;
hours += c - '0';
if (hours > 23) { if (sign == 0) { /* "GMT" without offset */
break parsing; calb.set(Calendar.ZONE_OFFSET, 0)
.set(Calendar.DST_OFFSET, 0);
return pos.index;
} }
// Look for mm. // Parse the rest as "hh:mm"
int minutes = 0; int i = subParseNumericZone(text, ++pos.index,
c = text.charAt(++pos.index); sign, 0, true, calb);
if (c < '0' || c > '9') { /* must be from '0' to '9'. */ if (i > 0) {
break parsing; return i;
} }
minutes = c - '0'; pos.index = -i;
c = text.charAt(++pos.index); } else {
if (c < '0' || c > '9') { /* must be from '0' to '9'. */ // Try parsing the text as a time zone
break parsing; // name or abbreviation.
int i = subParseZoneString(text, pos.index, calb);
if (i > 0) {
return i;
} }
minutes *= 10; pos.index = -i;
minutes += c - '0';
if (minutes > 59) {
break parsing;
} }
} else {
offset = hours * 60 + minutes; // Parse the rest as "hhmm" (RFC 822)
} catch (StringIndexOutOfBoundsException e) { int i = subParseNumericZone(text, ++pos.index,
break parsing; sign, 0, false, calb);
if (i > 0) {
return i;
} }
pos.index = -i;
} }
} catch (IndexOutOfBoundsException e) {
// Do the final processing for both of the above cases. We only
// arrive here if the form GMT+/-... or an RFC 822 form was seen.
if (sign != 0) {
offset *= MILLIS_PER_MINUTE * sign;
calb.set(Calendar.ZONE_OFFSET, offset).set(Calendar.DST_OFFSET, 0);
return ++pos.index;
} }
} }
break parsing; break parsing;
case PATTERN_ISO_ZONE: // 'X' case PATTERN_ISO_ZONE: // 'X'
{ {
int sign = 0; if ((text.length() - pos.index) <= 0) {
int offset = 0; break parsing;
}
iso8601: { int sign = 0;
try {
char c = text.charAt(pos.index); char c = text.charAt(pos.index);
if (c == 'Z') { if (c == 'Z') {
calb.set(Calendar.ZONE_OFFSET, 0).set(Calendar.DST_OFFSET, 0); calb.set(Calendar.ZONE_OFFSET, 0).set(Calendar.DST_OFFSET, 0);
...@@ -2082,73 +2061,16 @@ public class SimpleDateFormat extends DateFormat { ...@@ -2082,73 +2061,16 @@ public class SimpleDateFormat extends DateFormat {
sign = 1; sign = 1;
} else if (c == '-') { } else if (c == '-') {
sign = -1; sign = -1;
}
// Look for hh.
int hours = 0;
c = text.charAt(++pos.index);
if (c < '0' || c > '9') { /* must be from '0' to '9'. */
break parsing;
}
hours = c - '0';
c = text.charAt(++pos.index);
if (c < '0' || c > '9') { /* must be from '0' to '9'. */
break parsing;
}
hours *= 10;
hours += c - '0';
if (hours > 23) {
break parsing;
}
if (count == 1) { // "X"
offset = hours * 60;
break iso8601;
}
c = text.charAt(++pos.index);
// Skip ':' if "XXX"
if (c == ':') {
if (count == 2) {
break parsing;
}
c = text.charAt(++pos.index);
} else { } else {
if (count == 3) { ++pos.index;
// missing ':'
break parsing;
}
}
// Look for mm.
int minutes = 0;
if (c < '0' || c > '9') { /* must be from '0' to '9'. */
break parsing;
}
minutes = c - '0';
c = text.charAt(++pos.index);
if (c < '0' || c > '9') { /* must be from '0' to '9'. */
break parsing;
}
minutes *= 10;
minutes += c - '0';
if (minutes > 59) {
break parsing;
}
offset = hours * 60 + minutes;
} catch (StringIndexOutOfBoundsException e) {
break parsing; break parsing;
} }
int i = subParseNumericZone(text, ++pos.index, sign, count,
count == 3, calb);
if (i > 0) {
return i;
} }
pos.index = -i;
// Do the final processing for both of the above cases. We only
// arrive here if the form GMT+/-... or an RFC 822 form was seen.
if (sign != 0) {
offset *= MILLIS_PER_MINUTE * sign;
calb.set(Calendar.ZONE_OFFSET, offset).set(Calendar.DST_OFFSET, 0);
return ++pos.index;
}
} }
break parsing; break parsing;
......
...@@ -60,48 +60,51 @@ public class ISO8601ZoneTest { ...@@ -60,48 +60,51 @@ public class ISO8601ZoneTest {
"yyyy-MM-dd'T'HH:mm:ss.SSSXXX", "yyyy-MM-dd'T'HH:mm:ss.SSSXXX",
}; };
// badData[][0] - format
// badData[][1] - (bad) text to be parsed
// badData[][2] - subtext at the end of which a parse error is detected
static final String[][] badData = { static final String[][] badData = {
{ "X", "1" }, { "X", "1", "1" },
{ "X", "+1" }, { "X", "+1", "+1" },
{ "X", "-2" }, { "X", "-2", "-2" },
{ "X", "-24" }, { "X", "-24", "-2" },
{ "X", "+24" }, { "X", "+24", "+2" },
{ "XX", "9" }, { "XX", "9", "9" },
{ "XX", "23" }, { "XX", "23", "2" },
{ "XX", "234" }, { "XX", "234", "2" },
{ "XX", "3456" }, { "XX", "3456", "3" },
{ "XX", "23456" }, { "XX", "23456", "2" },
{ "XX", "+1" }, { "XX", "+1", "+1" },
{ "XX", "-12" }, { "XX", "-12", "-12" },
{ "XX", "+123" }, { "XX", "+123", "+123" },
{ "XX", "-12:34" }, { "XX", "-12:34", "-12" },
{ "XX", "+12:34" }, { "XX", "+12:34", "+12" },
{ "XX", "-2423" }, { "XX", "-2423", "-2" },
{ "XX", "+2423" }, { "XX", "+2423", "+2" },
{ "XX", "-1260" }, { "XX", "-1260", "-126" },
{ "XX", "+1260" }, { "XX", "+1260", "+126" },
{ "XXX", "9" }, { "XXX", "9", "9" },
{ "XXX", "23" }, { "XXX", "23", "2" },
{ "XXX", "234" }, { "XXX", "234", "2" },
{ "XXX", "3456" }, { "XXX", "3456", "3" },
{ "XXX", "23456" }, { "XXX", "23456", "2" },
{ "XXX", "2:34" }, { "XXX", "2:34", "2" },
{ "XXX", "12:4" }, { "XXX", "12:4", "1" },
{ "XXX", "12:34" }, { "XXX", "12:34", "1" },
{ "XXX", "-1" }, { "XXX", "-1", "-1" },
{ "XXX", "+1" }, { "XXX", "+1", "+1" },
{ "XXX", "-12" }, { "XXX", "-12", "-12" },
{ "XXX", "+12" }, { "XXX", "+12", "+12" },
{ "XXX", "-123" }, { "XXX", "-123", "-12" },
{ "XXX", "+123" }, { "XXX", "+123", "+12" },
{ "XXX", "-1234" }, { "XXX", "-1234", "-12" },
{ "XXX", "+1234" }, { "XXX", "+1234", "+12" },
{ "XXX", "+24:23" }, { "XXX", "+24:23", "+2" },
{ "XXX", "+12:60" }, { "XXX", "+12:60", "+12:6" },
{ "XXX", "+1:23" }, { "XXX", "+1:23", "+1" },
{ "XXX", "+12:3" }, { "XXX", "+12:3", "+12:3" },
}; };
static String[] badFormats = { static String[] badFormats = {
...@@ -110,6 +113,8 @@ public class ISO8601ZoneTest { ...@@ -110,6 +113,8 @@ public class ISO8601ZoneTest {
public static void main(String[] args) throws Exception { public static void main(String[] args) throws Exception {
TimeZone tz = TimeZone.getDefault(); TimeZone tz = TimeZone.getDefault();
Locale loc = Locale.getDefault();
Locale.setDefault(Locale.US);
try { try {
for (int i = 0; i < formatData.length; i++) { for (int i = 0; i < formatData.length; i++) {
...@@ -128,7 +133,7 @@ public class ISO8601ZoneTest { ...@@ -128,7 +133,7 @@ public class ISO8601ZoneTest {
} }
for (String[] d : badData) { for (String[] d : badData) {
badDataParsing(d[0], d[1]); badDataParsing(d[0], d[1], d[2].length());
} }
for (String fmt : badFormats) { for (String fmt : badFormats) {
...@@ -136,6 +141,7 @@ public class ISO8601ZoneTest { ...@@ -136,6 +141,7 @@ public class ISO8601ZoneTest {
} }
} finally { } finally {
TimeZone.setDefault(tz); TimeZone.setDefault(tz);
Locale.setDefault(loc);
} }
} }
...@@ -188,15 +194,24 @@ public class ISO8601ZoneTest { ...@@ -188,15 +194,24 @@ public class ISO8601ZoneTest {
} }
static void badDataParsing(String fmt, String text) { static void badDataParsing(String fmt, String text, int expectedErrorIndex) {
try {
SimpleDateFormat sdf = new SimpleDateFormat(fmt); SimpleDateFormat sdf = new SimpleDateFormat(fmt);
try {
sdf.parse(text); sdf.parse(text);
throw new RuntimeException("didn't throw an exception: fmt=" + fmt throw new RuntimeException("didn't throw an exception: fmt=" + fmt
+ ", text=" + text); + ", text=" + text);
} catch (ParseException e) { } catch (ParseException e) {
// OK // OK
} }
ParsePosition pos = new ParsePosition(0);
Date d = sdf.parse(text, pos);
int errorIndex = pos.getErrorIndex();
if (d != null || errorIndex != expectedErrorIndex) {
throw new RuntimeException("Bad error index=" + errorIndex
+ ", expected=" + expectedErrorIndex
+ ", fmt=" + fmt + ", text=" + text);
}
} }
static void badFormat(String fmt) { static void badFormat(String fmt) {
......
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册