提交 2fc1a820 编写于 作者: O okutsu

6644493: [Fmt-Da] SimpleDateFormat parsing RFC822 time offset is slow

Reviewed-by: peytoia
上级 53b02057
...@@ -1662,6 +1662,81 @@ public class SimpleDateFormat extends DateFormat { ...@@ -1662,6 +1662,81 @@ public class SimpleDateFormat extends DateFormat {
return 0; return 0;
} }
/**
* Parses numeric forms of time zone offset, such as "hh:mm", and
* sets calb to the parsed value.
*
* @param text the text to be parsed
* @param start the character position to start parsing
* @param sign 1: positive; -1: negative
* @param count 0: 'Z' or "GMT+hh:mm" parsing; 1 - 3: the number of 'X's
* @param colon true - colon required between hh and mm; false - no colon required
* @param calb a CalendarBuilder in which the parsed value is stored
* @return updated parsed position, or its negative value to indicate a parsing error
*/
private int subParseNumericZone(String text, int start, int sign, int count,
boolean colon, CalendarBuilder calb) {
int index = start;
parse:
try {
char c = text.charAt(index++);
// Parse hh
int hours;
if (!isDigit(c)) {
break parse;
}
hours = c - '0';
c = text.charAt(index++);
if (isDigit(c)) {
hours = hours * 10 + (c - '0');
} else {
// If no colon in RFC 822 or 'X' (ISO), two digits are
// required.
if (count > 0 || !colon) {
break parse;
}
--index;
}
if (hours > 23) {
break parse;
}
int minutes = 0;
if (count != 1) {
// Proceed with parsing mm
c = text.charAt(index++);
if (colon) {
if (c != ':') {
break parse;
}
c = text.charAt(index++);
}
if (!isDigit(c)) {
break parse;
}
minutes = c - '0';
c = text.charAt(index++);
if (!isDigit(c)) {
break parse;
}
minutes = minutes * 10 + (c - '0');
if (minutes > 59) {
break parse;
}
}
minutes += hours * 60;
calb.set(Calendar.ZONE_OFFSET, minutes * MILLIS_PER_MINUTE * sign)
.set(Calendar.DST_OFFSET, 0);
return index;
} catch (IndexOutOfBoundsException e) {
}
return 1 - index; // -(index - 1)
}
private boolean isDigit(char c) {
return c >= '0' && c <= '9';
}
/** /**
* Private member function that converts the parsed date strings into * Private member function that converts the parsed date strings into
* timeFields. Returns -start (for ParsePosition) if failed. * timeFields. Returns -start (for ParsePosition) if failed.
...@@ -1907,248 +1982,95 @@ public class SimpleDateFormat extends DateFormat { ...@@ -1907,248 +1982,95 @@ public class SimpleDateFormat extends DateFormat {
case PATTERN_ZONE_NAME: // 'z' case PATTERN_ZONE_NAME: // 'z'
case PATTERN_ZONE_VALUE: // 'Z' case PATTERN_ZONE_VALUE: // 'Z'
// First try to parse generic forms such as GMT-07:00. Do this first
// in case localized TimeZoneNames contains the string "GMT"
// for a zone; in that case, we don't want to match the first three
// characters of GMT+/-hh:mm etc.
{ {
int sign = 0; int sign = 0;
int offset; try {
char c = text.charAt(pos.index);
// For time zones that have no known names, look for strings if (c == '+') {
// of the form: sign = 1;
// GMT[+-]hours:minutes or } else if (c == '-') {
// GMT. sign = -1;
if ((text.length() - start) >= GMT.length() &&
text.regionMatches(true, start, GMT, 0, GMT.length())) {
int num;
calb.set(Calendar.DST_OFFSET, 0);
pos.index = start + GMT.length();
try { // try-catch for "GMT" only time zone string
char c = text.charAt(pos.index);
if (c == '+') {
sign = 1;
} else if (c == '-') {
sign = -1;
}
}
catch(StringIndexOutOfBoundsException e) {}
if (sign == 0) { /* "GMT" without offset */
calb.set(Calendar.ZONE_OFFSET, 0);
return pos.index;
} }
if (sign == 0) {
// Look for hours. // Try parsing a custom time zone "GMT+hh:mm" or "GMT".
try { if ((c == 'G' || c == 'g')
char c = text.charAt(++pos.index); && (text.length() - start) >= GMT.length()
if (c < '0' || c > '9') { /* must be from '0' to '9'. */ && text.regionMatches(true, start, GMT, 0, GMT.length())) {
break parsing; pos.index = start + GMT.length();
}
num = c - '0'; if ((text.length() - pos.index) > 0) {
c = text.charAt(pos.index);
if (text.charAt(++pos.index) != ':') { if (c == '+') {
c = text.charAt(pos.index); sign = 1;
if (c < '0' || c > '9') { /* must be from '0' to '9'. */ } else if (c == '-') {
break parsing; sign = -1;
}
} }
num *= 10;
num += c - '0';
pos.index++;
}
if (num > 23) {
--pos.index;
break parsing;
}
if (text.charAt(pos.index) != ':') {
break parsing;
}
// Look for minutes. if (sign == 0) { /* "GMT" without offset */
offset = num * 60; calb.set(Calendar.ZONE_OFFSET, 0)
c = text.charAt(++pos.index); .set(Calendar.DST_OFFSET, 0);
if (c < '0' || c > '9') { /* must be from '0' to '9'. */ return pos.index;
break parsing; }
}
num = c - '0';
c = text.charAt(++pos.index);
if (c < '0' || c > '9') { /* must be from '0' to '9'. */
break parsing;
}
num *= 10;
num += c - '0';
if (num > 59) { // Parse the rest as "hh:mm"
break parsing; int i = subParseNumericZone(text, ++pos.index,
} sign, 0, true, calb);
} catch (StringIndexOutOfBoundsException e) { if (i > 0) {
break parsing; return i;
} }
offset += num; pos.index = -i;
// Fall through for final processing below of 'offset' and 'sign'.
} else {
// If the first character is a sign, look for numeric timezones of
// the form [+-]hhmm as specified by RFC 822. Otherwise, check
// for named time zones by looking through the locale data from
// the TimeZoneNames strings.
try {
char c = text.charAt(pos.index);
if (c == '+') {
sign = 1;
} else if (c == '-') {
sign = -1;
} else { } else {
// Try parsing the text as a time zone name (abbr). // Try parsing the text as a time zone
// name or abbreviation.
int i = subParseZoneString(text, pos.index, calb); int i = subParseZoneString(text, pos.index, calb);
if (i != 0) { if (i > 0) {
return i; return i;
} }
break parsing; pos.index = -i;
}
// Parse the text as an RFC 822 time zone string. This code is
// actually a little more permissive than RFC 822. It will
// try to do its best with numbers that aren't strictly 4
// digits long.
// Look for hh.
int hours = 0;
c = text.charAt(++pos.index);
if (c < '0' || c > '9') { /* must be from '0' to '9'. */
break parsing;
} }
hours = c - '0'; } else {
c = text.charAt(++pos.index); // Parse the rest as "hhmm" (RFC 822)
if (c < '0' || c > '9') { /* must be from '0' to '9'. */ int i = subParseNumericZone(text, ++pos.index,
break parsing; sign, 0, false, calb);
} if (i > 0) {
hours *= 10; return i;
hours += c - '0';
if (hours > 23) {
break parsing;
}
// Look for mm.
int minutes = 0;
c = text.charAt(++pos.index);
if (c < '0' || c > '9') { /* must be from '0' to '9'. */
break parsing;
}
minutes = c - '0';
c = text.charAt(++pos.index);
if (c < '0' || c > '9') { /* must be from '0' to '9'. */
break parsing;
}
minutes *= 10;
minutes += c - '0';
if (minutes > 59) {
break parsing;
} }
pos.index = -i;
offset = hours * 60 + minutes;
} catch (StringIndexOutOfBoundsException e) {
break parsing;
} }
} } catch (IndexOutOfBoundsException e) {
// Do the final processing for both of the above cases. We only
// arrive here if the form GMT+/-... or an RFC 822 form was seen.
if (sign != 0) {
offset *= MILLIS_PER_MINUTE * sign;
calb.set(Calendar.ZONE_OFFSET, offset).set(Calendar.DST_OFFSET, 0);
return ++pos.index;
} }
} }
break parsing; break parsing;
case PATTERN_ISO_ZONE: // 'X' case PATTERN_ISO_ZONE: // 'X'
{ {
int sign = 0; if ((text.length() - pos.index) <= 0) {
int offset = 0; break parsing;
iso8601: {
try {
char c = text.charAt(pos.index);
if (c == 'Z') {
calb.set(Calendar.ZONE_OFFSET, 0).set(Calendar.DST_OFFSET, 0);
return ++pos.index;
}
// parse text as "+/-hh[[:]mm]" based on count
if (c == '+') {
sign = 1;
} else if (c == '-') {
sign = -1;
}
// Look for hh.
int hours = 0;
c = text.charAt(++pos.index);
if (c < '0' || c > '9') { /* must be from '0' to '9'. */
break parsing;
}
hours = c - '0';
c = text.charAt(++pos.index);
if (c < '0' || c > '9') { /* must be from '0' to '9'. */
break parsing;
}
hours *= 10;
hours += c - '0';
if (hours > 23) {
break parsing;
}
if (count == 1) { // "X"
offset = hours * 60;
break iso8601;
}
c = text.charAt(++pos.index);
// Skip ':' if "XXX"
if (c == ':') {
if (count == 2) {
break parsing;
}
c = text.charAt(++pos.index);
} else {
if (count == 3) {
// missing ':'
break parsing;
}
}
// Look for mm.
int minutes = 0;
if (c < '0' || c > '9') { /* must be from '0' to '9'. */
break parsing;
}
minutes = c - '0';
c = text.charAt(++pos.index);
if (c < '0' || c > '9') { /* must be from '0' to '9'. */
break parsing;
}
minutes *= 10;
minutes += c - '0';
if (minutes > 59) {
break parsing;
}
offset = hours * 60 + minutes;
} catch (StringIndexOutOfBoundsException e) {
break parsing;
}
} }
// Do the final processing for both of the above cases. We only int sign = 0;
// arrive here if the form GMT+/-... or an RFC 822 form was seen. char c = text.charAt(pos.index);
if (sign != 0) { if (c == 'Z') {
offset *= MILLIS_PER_MINUTE * sign; calb.set(Calendar.ZONE_OFFSET, 0).set(Calendar.DST_OFFSET, 0);
calb.set(Calendar.ZONE_OFFSET, offset).set(Calendar.DST_OFFSET, 0);
return ++pos.index; return ++pos.index;
} }
// parse text as "+/-hh[[:]mm]" based on count
if (c == '+') {
sign = 1;
} else if (c == '-') {
sign = -1;
} else {
++pos.index;
break parsing;
}
int i = subParseNumericZone(text, ++pos.index, sign, count,
count == 3, calb);
if (i > 0) {
return i;
}
pos.index = -i;
} }
break parsing; break parsing;
......
...@@ -60,48 +60,51 @@ public class ISO8601ZoneTest { ...@@ -60,48 +60,51 @@ public class ISO8601ZoneTest {
"yyyy-MM-dd'T'HH:mm:ss.SSSXXX", "yyyy-MM-dd'T'HH:mm:ss.SSSXXX",
}; };
// badData[][0] - format
// badData[][1] - (bad) text to be parsed
// badData[][2] - subtext at the end of which a parse error is detected
static final String[][] badData = { static final String[][] badData = {
{ "X", "1" }, { "X", "1", "1" },
{ "X", "+1" }, { "X", "+1", "+1" },
{ "X", "-2" }, { "X", "-2", "-2" },
{ "X", "-24" }, { "X", "-24", "-2" },
{ "X", "+24" }, { "X", "+24", "+2" },
{ "XX", "9" }, { "XX", "9", "9" },
{ "XX", "23" }, { "XX", "23", "2" },
{ "XX", "234" }, { "XX", "234", "2" },
{ "XX", "3456" }, { "XX", "3456", "3" },
{ "XX", "23456" }, { "XX", "23456", "2" },
{ "XX", "+1" }, { "XX", "+1", "+1" },
{ "XX", "-12" }, { "XX", "-12", "-12" },
{ "XX", "+123" }, { "XX", "+123", "+123" },
{ "XX", "-12:34" }, { "XX", "-12:34", "-12" },
{ "XX", "+12:34" }, { "XX", "+12:34", "+12" },
{ "XX", "-2423" }, { "XX", "-2423", "-2" },
{ "XX", "+2423" }, { "XX", "+2423", "+2" },
{ "XX", "-1260" }, { "XX", "-1260", "-126" },
{ "XX", "+1260" }, { "XX", "+1260", "+126" },
{ "XXX", "9" }, { "XXX", "9", "9" },
{ "XXX", "23" }, { "XXX", "23", "2" },
{ "XXX", "234" }, { "XXX", "234", "2" },
{ "XXX", "3456" }, { "XXX", "3456", "3" },
{ "XXX", "23456" }, { "XXX", "23456", "2" },
{ "XXX", "2:34" }, { "XXX", "2:34", "2" },
{ "XXX", "12:4" }, { "XXX", "12:4", "1" },
{ "XXX", "12:34" }, { "XXX", "12:34", "1" },
{ "XXX", "-1" }, { "XXX", "-1", "-1" },
{ "XXX", "+1" }, { "XXX", "+1", "+1" },
{ "XXX", "-12" }, { "XXX", "-12", "-12" },
{ "XXX", "+12" }, { "XXX", "+12", "+12" },
{ "XXX", "-123" }, { "XXX", "-123", "-12" },
{ "XXX", "+123" }, { "XXX", "+123", "+12" },
{ "XXX", "-1234" }, { "XXX", "-1234", "-12" },
{ "XXX", "+1234" }, { "XXX", "+1234", "+12" },
{ "XXX", "+24:23" }, { "XXX", "+24:23", "+2" },
{ "XXX", "+12:60" }, { "XXX", "+12:60", "+12:6" },
{ "XXX", "+1:23" }, { "XXX", "+1:23", "+1" },
{ "XXX", "+12:3" }, { "XXX", "+12:3", "+12:3" },
}; };
static String[] badFormats = { static String[] badFormats = {
...@@ -110,6 +113,8 @@ public class ISO8601ZoneTest { ...@@ -110,6 +113,8 @@ public class ISO8601ZoneTest {
public static void main(String[] args) throws Exception { public static void main(String[] args) throws Exception {
TimeZone tz = TimeZone.getDefault(); TimeZone tz = TimeZone.getDefault();
Locale loc = Locale.getDefault();
Locale.setDefault(Locale.US);
try { try {
for (int i = 0; i < formatData.length; i++) { for (int i = 0; i < formatData.length; i++) {
...@@ -128,7 +133,7 @@ public class ISO8601ZoneTest { ...@@ -128,7 +133,7 @@ public class ISO8601ZoneTest {
} }
for (String[] d : badData) { for (String[] d : badData) {
badDataParsing(d[0], d[1]); badDataParsing(d[0], d[1], d[2].length());
} }
for (String fmt : badFormats) { for (String fmt : badFormats) {
...@@ -136,6 +141,7 @@ public class ISO8601ZoneTest { ...@@ -136,6 +141,7 @@ public class ISO8601ZoneTest {
} }
} finally { } finally {
TimeZone.setDefault(tz); TimeZone.setDefault(tz);
Locale.setDefault(loc);
} }
} }
...@@ -188,15 +194,24 @@ public class ISO8601ZoneTest { ...@@ -188,15 +194,24 @@ public class ISO8601ZoneTest {
} }
static void badDataParsing(String fmt, String text) { static void badDataParsing(String fmt, String text, int expectedErrorIndex) {
SimpleDateFormat sdf = new SimpleDateFormat(fmt);
try { try {
SimpleDateFormat sdf = new SimpleDateFormat(fmt);
sdf.parse(text); sdf.parse(text);
throw new RuntimeException("didn't throw an exception: fmt=" + fmt throw new RuntimeException("didn't throw an exception: fmt=" + fmt
+ ", text=" + text); + ", text=" + text);
} catch (ParseException e) { } catch (ParseException e) {
// OK // OK
} }
ParsePosition pos = new ParsePosition(0);
Date d = sdf.parse(text, pos);
int errorIndex = pos.getErrorIndex();
if (d != null || errorIndex != expectedErrorIndex) {
throw new RuntimeException("Bad error index=" + errorIndex
+ ", expected=" + expectedErrorIndex
+ ", fmt=" + fmt + ", text=" + text);
}
} }
static void badFormat(String fmt) { static void badFormat(String fmt) {
......
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册