From 80edfd76591fdb9beec061de3c05ef4e9d96ce56 Mon Sep 17 00:00:00 2001 From: Tom Lane Date: Wed, 13 Jun 2012 19:43:35 -0400 Subject: [PATCH] Revisit error message details for JSON input parsing. Instead of identifying error locations only by line number (which could be entirely unhelpful with long input lines), provide a fragment of the input text too, placing this info in a new CONTEXT entry. Make the error detail messages conform more closely to style guidelines, fix failure to expose some of them for translation, ensure compiler can check formats against supplied parameters. --- src/backend/utils/adt/json.c | 211 ++++++++++++++++++++++------- src/test/regress/expected/json.out | 99 +++++++++----- 2 files changed, 224 insertions(+), 86 deletions(-) diff --git a/src/backend/utils/adt/json.c b/src/backend/utils/adt/json.c index e79c2946d0..a7a7c2b3ad 100644 --- a/src/backend/utils/adt/json.c +++ b/src/backend/utils/adt/json.c @@ -43,8 +43,6 @@ typedef struct /* state of JSON lexer */ char *token_start; /* start of current token within input */ char *token_terminator; /* end of previous or current token */ JsonValueType token_type; /* type of current token, once it's known */ - int line_number; /* current line number (counting from 1) */ - char *line_start; /* start of current line within input (BROKEN!!) */ } JsonLexContext; typedef enum /* states of JSON parser */ @@ -78,6 +76,7 @@ static void json_lex_string(JsonLexContext *lex); static void json_lex_number(JsonLexContext *lex, char *s); static void report_parse_error(JsonParseStack *stack, JsonLexContext *lex); static void report_invalid_token(JsonLexContext *lex); +static int report_json_context(JsonLexContext *lex); static char *extract_mb_char(char *s); static void composite_to_json(Datum composite, StringInfo result, bool use_line_feeds); @@ -185,8 +184,6 @@ json_validate_cstring(char *input) /* Set up lexing context. */ lex.input = input; lex.token_terminator = lex.input; - lex.line_number = 1; - lex.line_start = input; /* Set up parse stack. */ stacksize = 32; @@ -335,11 +332,7 @@ json_lex(JsonLexContext *lex) /* Skip leading whitespace. */ s = lex->token_terminator; while (*s == ' ' || *s == '\t' || *s == '\n' || *s == '\r') - { - if (*s == '\n') - lex->line_number++; s++; - } lex->token_start = s; /* Determine token type. */ @@ -350,7 +343,7 @@ json_lex(JsonLexContext *lex) { /* End of string. */ lex->token_start = NULL; - lex->token_terminator = NULL; + lex->token_terminator = s; } else { @@ -397,7 +390,8 @@ json_lex(JsonLexContext *lex) /* * We got some sort of unexpected punctuation or an otherwise * unexpected character, so just complain about that one - * character. + * character. (It can't be multibyte because the above loop + * will advance over any multibyte characters.) */ lex->token_terminator = s + 1; report_invalid_token(lex); @@ -443,11 +437,14 @@ json_lex_string(JsonLexContext *lex) lex->token_terminator = s; report_invalid_token(lex); } + /* Since *s isn't printable, exclude it from the context string */ + lex->token_terminator = s; ereport(ERROR, (errcode(ERRCODE_INVALID_TEXT_REPRESENTATION), errmsg("invalid input syntax for type json"), - errdetail("line %d: Character with value \"0x%02x\" must be escaped.", - lex->line_number, (unsigned char) *s))); + errdetail("Character with value 0x%02x must be escaped.", + (unsigned char) *s), + report_json_context(lex))); } else if (*s == '\\') { @@ -465,38 +462,39 @@ json_lex_string(JsonLexContext *lex) for (i = 1; i <= 4; i++) { - if (s[i] == '\0') + s++; + if (*s == '\0') { - lex->token_terminator = s + i; + lex->token_terminator = s; report_invalid_token(lex); } - else if (s[i] >= '0' && s[i] <= '9') - ch = (ch * 16) + (s[i] - '0'); - else if (s[i] >= 'a' && s[i] <= 'f') - ch = (ch * 16) + (s[i] - 'a') + 10; - else if (s[i] >= 'A' && s[i] <= 'F') - ch = (ch * 16) + (s[i] - 'A') + 10; + else if (*s >= '0' && *s <= '9') + ch = (ch * 16) + (*s - '0'); + else if (*s >= 'a' && *s <= 'f') + ch = (ch * 16) + (*s - 'a') + 10; + else if (*s >= 'A' && *s <= 'F') + ch = (ch * 16) + (*s - 'A') + 10; else { + lex->token_terminator = s + pg_mblen(s); ereport(ERROR, (errcode(ERRCODE_INVALID_TEXT_REPRESENTATION), errmsg("invalid input syntax for type json"), - errdetail("line %d: \"\\u\" must be followed by four hexadecimal digits.", - lex->line_number))); + errdetail("\"\\u\" must be followed by four hexadecimal digits."), + report_json_context(lex))); } } - - /* Account for the four additional bytes we just parsed. */ - s += 4; } else if (strchr("\"\\/bfnrt", *s) == NULL) { /* Not a valid string escape, so error out. */ + lex->token_terminator = s + pg_mblen(s); ereport(ERROR, (errcode(ERRCODE_INVALID_TEXT_REPRESENTATION), errmsg("invalid input syntax for type json"), - errdetail("line %d: Invalid escape \"\\%s\".", - lex->line_number, extract_mb_char(s)))); + errdetail("Escape sequence \"\\%s\" is invalid.", + extract_mb_char(s)), + report_json_context(lex))); } } } @@ -599,68 +597,108 @@ json_lex_number(JsonLexContext *lex, char *s) /* * Report a parse error. + * + * lex->token_start and lex->token_terminator must identify the current token. */ static void report_parse_error(JsonParseStack *stack, JsonLexContext *lex) { - char *detail = NULL; - char *token = NULL; + char *token; int toklen; /* Handle case where the input ended prematurely. */ if (lex->token_start == NULL) ereport(ERROR, (errcode(ERRCODE_INVALID_TEXT_REPRESENTATION), - errmsg("invalid input syntax for type json: \"%s\"", - lex->input), - errdetail("The input string ended unexpectedly."))); + errmsg("invalid input syntax for type json"), + errdetail("The input string ended unexpectedly."), + report_json_context(lex))); - /* Separate out the offending token. */ + /* Separate out the current token. */ toklen = lex->token_terminator - lex->token_start; token = palloc(toklen + 1); memcpy(token, lex->token_start, toklen); token[toklen] = '\0'; - /* Select correct detail message. */ + /* Complain, with the appropriate detail message. */ if (stack == NULL) - detail = "line %d: Expected end of input, but found \"%s\"."; + ereport(ERROR, + (errcode(ERRCODE_INVALID_TEXT_REPRESENTATION), + errmsg("invalid input syntax for type json"), + errdetail("Expected end of input, but found \"%s\".", + token), + report_json_context(lex))); else { switch (stack->state) { case JSON_PARSE_VALUE: - detail = "line %d: Expected string, number, object, array, true, false, or null, but found \"%s\"."; + ereport(ERROR, + (errcode(ERRCODE_INVALID_TEXT_REPRESENTATION), + errmsg("invalid input syntax for type json"), + errdetail("Expected JSON value, but found \"%s\".", + token), + report_json_context(lex))); break; case JSON_PARSE_ARRAY_START: - detail = "line %d: Expected array element or \"]\", but found \"%s\"."; + ereport(ERROR, + (errcode(ERRCODE_INVALID_TEXT_REPRESENTATION), + errmsg("invalid input syntax for type json"), + errdetail("Expected array element or \"]\", but found \"%s\".", + token), + report_json_context(lex))); break; case JSON_PARSE_ARRAY_NEXT: - detail = "line %d: Expected \",\" or \"]\", but found \"%s\"."; + ereport(ERROR, + (errcode(ERRCODE_INVALID_TEXT_REPRESENTATION), + errmsg("invalid input syntax for type json"), + errdetail("Expected \",\" or \"]\", but found \"%s\".", + token), + report_json_context(lex))); break; case JSON_PARSE_OBJECT_START: - detail = "line %d: Expected string or \"}\", but found \"%s\"."; + ereport(ERROR, + (errcode(ERRCODE_INVALID_TEXT_REPRESENTATION), + errmsg("invalid input syntax for type json"), + errdetail("Expected string or \"}\", but found \"%s\".", + token), + report_json_context(lex))); break; case JSON_PARSE_OBJECT_LABEL: - detail = "line %d: Expected \":\", but found \"%s\"."; + ereport(ERROR, + (errcode(ERRCODE_INVALID_TEXT_REPRESENTATION), + errmsg("invalid input syntax for type json"), + errdetail("Expected \":\", but found \"%s\".", + token), + report_json_context(lex))); break; case JSON_PARSE_OBJECT_NEXT: - detail = "line %d: Expected \",\" or \"}\", but found \"%s\"."; + ereport(ERROR, + (errcode(ERRCODE_INVALID_TEXT_REPRESENTATION), + errmsg("invalid input syntax for type json"), + errdetail("Expected \",\" or \"}\", but found \"%s\".", + token), + report_json_context(lex))); break; case JSON_PARSE_OBJECT_COMMA: - detail = "line %d: Expected string, but found \"%s\"."; + ereport(ERROR, + (errcode(ERRCODE_INVALID_TEXT_REPRESENTATION), + errmsg("invalid input syntax for type json"), + errdetail("Expected string, but found \"%s\".", + token), + report_json_context(lex))); break; + default: + elog(ERROR, "unexpected json parse state: %d", + (int) stack->state); } } - - ereport(ERROR, - (errcode(ERRCODE_INVALID_TEXT_REPRESENTATION), - errmsg("invalid input syntax for type json: \"%s\"", - lex->input), - detail ? errdetail(detail, lex->line_number, token) : 0)); } /* * Report an invalid input token. + * + * lex->token_start and lex->token_terminator must identify the token. */ static void report_invalid_token(JsonLexContext *lex) @@ -668,6 +706,7 @@ report_invalid_token(JsonLexContext *lex) char *token; int toklen; + /* Separate out the offending token. */ toklen = lex->token_terminator - lex->token_start; token = palloc(toklen + 1); memcpy(token, lex->token_start, toklen); @@ -676,8 +715,80 @@ report_invalid_token(JsonLexContext *lex) ereport(ERROR, (errcode(ERRCODE_INVALID_TEXT_REPRESENTATION), errmsg("invalid input syntax for type json"), - errdetail("line %d: Token \"%s\" is invalid.", - lex->line_number, token))); + errdetail("Token \"%s\" is invalid.", token), + report_json_context(lex))); +} + +/* + * Report a CONTEXT line for bogus JSON input. + * + * lex->token_terminator must be set to identify the spot where we detected + * the error. Note that lex->token_start might be NULL, in case we recognized + * error at EOF. + * + * The return value isn't meaningful, but we make it non-void so that this + * can be invoked inside ereport(). + */ +static int +report_json_context(JsonLexContext *lex) +{ + const char *context_start; + const char *context_end; + const char *line_start; + int line_number; + char *ctxt; + int ctxtlen; + const char *prefix; + const char *suffix; + + /* Choose boundaries for the part of the input we will display */ + context_start = lex->input; + context_end = lex->token_terminator; + line_start = context_start; + line_number = 1; + for (;;) + { + /* Always advance over newlines (context_end test is just paranoia) */ + if (*context_start == '\n' && context_start < context_end) + { + context_start++; + line_start = context_start; + line_number++; + continue; + } + /* Otherwise, done as soon as we are close enough to context_end */ + if (context_end - context_start < 50) + break; + /* Advance to next multibyte character */ + if (IS_HIGHBIT_SET(*context_start)) + context_start += pg_mblen(context_start); + else + context_start++; + } + + /* + * We add "..." to indicate that the excerpt doesn't start at the + * beginning of the line ... but if we're within 3 characters of the + * beginning of the line, we might as well just show the whole line. + */ + if (context_start - line_start <= 3) + context_start = line_start; + + /* Get a null-terminated copy of the data to present */ + ctxtlen = context_end - context_start; + ctxt = palloc(ctxtlen + 1); + memcpy(ctxt, context_start, ctxtlen); + ctxt[ctxtlen] = '\0'; + + /* + * Show the context, prefixing "..." if not starting at start of line, and + * suffixing "..." if not ending at end of line. + */ + prefix = (context_start > line_start) ? "..." : ""; + suffix = (*context_end != '\0' && *context_end != '\n' && *context_end != '\r') ? "..." : ""; + + return errcontext("JSON data, line %d: %s%s%s", + line_number, prefix, ctxt, suffix); } /* diff --git a/src/test/regress/expected/json.out b/src/test/regress/expected/json.out index 4b1ad89de6..2dfe7bb0ee 100644 --- a/src/test/regress/expected/json.out +++ b/src/test/regress/expected/json.out @@ -9,7 +9,8 @@ SELECT $$''$$::json; -- ERROR, single quotes are not allowed ERROR: invalid input syntax for type json LINE 1: SELECT $$''$$::json; ^ -DETAIL: line 1: Token "'" is invalid. +DETAIL: Token "'" is invalid. +CONTEXT: JSON data, line 1: '... SELECT '"abc"'::json; -- OK json ------- @@ -20,13 +21,15 @@ SELECT '"abc'::json; -- ERROR, quotes not closed ERROR: invalid input syntax for type json LINE 1: SELECT '"abc'::json; ^ -DETAIL: line 1: Token ""abc" is invalid. +DETAIL: Token ""abc" is invalid. +CONTEXT: JSON data, line 1: "abc SELECT '"abc def"'::json; -- ERROR, unescaped newline in string constant ERROR: invalid input syntax for type json LINE 1: SELECT '"abc ^ -DETAIL: line 1: Character with value "0x0a" must be escaped. +DETAIL: Character with value 0x0a must be escaped. +CONTEXT: JSON data, line 1: "abc SELECT '"\n\"\\"'::json; -- OK, legal escapes json ---------- @@ -37,22 +40,26 @@ SELECT '"\v"'::json; -- ERROR, not a valid JSON escape ERROR: invalid input syntax for type json LINE 1: SELECT '"\v"'::json; ^ -DETAIL: line 1: Invalid escape "\v". +DETAIL: Escape sequence "\v" is invalid. +CONTEXT: JSON data, line 1: "\v... SELECT '"\u"'::json; -- ERROR, incomplete escape ERROR: invalid input syntax for type json LINE 1: SELECT '"\u"'::json; ^ -DETAIL: line 1: "\u" must be followed by four hexadecimal digits. +DETAIL: "\u" must be followed by four hexadecimal digits. +CONTEXT: JSON data, line 1: "\u" SELECT '"\u00"'::json; -- ERROR, incomplete escape ERROR: invalid input syntax for type json LINE 1: SELECT '"\u00"'::json; ^ -DETAIL: line 1: "\u" must be followed by four hexadecimal digits. +DETAIL: "\u" must be followed by four hexadecimal digits. +CONTEXT: JSON data, line 1: "\u00" SELECT '"\u000g"'::json; -- ERROR, g is not a hex digit ERROR: invalid input syntax for type json LINE 1: SELECT '"\u000g"'::json; ^ -DETAIL: line 1: "\u" must be followed by four hexadecimal digits. +DETAIL: "\u" must be followed by four hexadecimal digits. +CONTEXT: JSON data, line 1: "\u000g... SELECT '"\u0000"'::json; -- OK, legal escape json ---------- @@ -82,7 +89,8 @@ SELECT '01'::json; -- ERROR, not valid according to JSON spec ERROR: invalid input syntax for type json LINE 1: SELECT '01'::json; ^ -DETAIL: line 1: Token "01" is invalid. +DETAIL: Token "01" is invalid. +CONTEXT: JSON data, line 1: 01 SELECT '0.1'::json; -- OK json ------ @@ -111,17 +119,20 @@ SELECT '1f2'::json; -- ERROR ERROR: invalid input syntax for type json LINE 1: SELECT '1f2'::json; ^ -DETAIL: line 1: Token "1f2" is invalid. +DETAIL: Token "1f2" is invalid. +CONTEXT: JSON data, line 1: 1f2 SELECT '0.x1'::json; -- ERROR ERROR: invalid input syntax for type json LINE 1: SELECT '0.x1'::json; ^ -DETAIL: line 1: Token "0.x1" is invalid. +DETAIL: Token "0.x1" is invalid. +CONTEXT: JSON data, line 1: 0.x1 SELECT '1.3ex100'::json; -- ERROR ERROR: invalid input syntax for type json LINE 1: SELECT '1.3ex100'::json; ^ -DETAIL: line 1: Token "1.3ex100" is invalid. +DETAIL: Token "1.3ex100" is invalid. +CONTEXT: JSON data, line 1: 1.3ex100 -- Arrays. SELECT '[]'::json; -- OK json @@ -142,20 +153,23 @@ SELECT '[1,2]'::json; -- OK (1 row) SELECT '[1,2,]'::json; -- ERROR, trailing comma -ERROR: invalid input syntax for type json: "[1,2,]" +ERROR: invalid input syntax for type json LINE 1: SELECT '[1,2,]'::json; ^ -DETAIL: line 1: Expected string, number, object, array, true, false, or null, but found "]". +DETAIL: Expected JSON value, but found "]". +CONTEXT: JSON data, line 1: [1,2,] SELECT '[1,2'::json; -- ERROR, no closing bracket -ERROR: invalid input syntax for type json: "[1,2" +ERROR: invalid input syntax for type json LINE 1: SELECT '[1,2'::json; ^ DETAIL: The input string ended unexpectedly. +CONTEXT: JSON data, line 1: [1,2 SELECT '[1,[2]'::json; -- ERROR, no closing bracket -ERROR: invalid input syntax for type json: "[1,[2]" +ERROR: invalid input syntax for type json LINE 1: SELECT '[1,[2]'::json; ^ DETAIL: The input string ended unexpectedly. +CONTEXT: JSON data, line 1: [1,[2] -- Objects. SELECT '{}'::json; -- OK json @@ -164,10 +178,11 @@ SELECT '{}'::json; -- OK (1 row) SELECT '{"abc"}'::json; -- ERROR, no value -ERROR: invalid input syntax for type json: "{"abc"}" +ERROR: invalid input syntax for type json LINE 1: SELECT '{"abc"}'::json; ^ -DETAIL: line 1: Expected ":", but found "}". +DETAIL: Expected ":", but found "}". +CONTEXT: JSON data, line 1: {"abc"} SELECT '{"abc":1}'::json; -- OK json ----------- @@ -175,25 +190,29 @@ SELECT '{"abc":1}'::json; -- OK (1 row) SELECT '{1:"abc"}'::json; -- ERROR, keys must be strings -ERROR: invalid input syntax for type json: "{1:"abc"}" +ERROR: invalid input syntax for type json LINE 1: SELECT '{1:"abc"}'::json; ^ -DETAIL: line 1: Expected string or "}", but found "1". +DETAIL: Expected string or "}", but found "1". +CONTEXT: JSON data, line 1: {1... SELECT '{"abc",1}'::json; -- ERROR, wrong separator -ERROR: invalid input syntax for type json: "{"abc",1}" +ERROR: invalid input syntax for type json LINE 1: SELECT '{"abc",1}'::json; ^ -DETAIL: line 1: Expected ":", but found ",". +DETAIL: Expected ":", but found ",". +CONTEXT: JSON data, line 1: {"abc",... SELECT '{"abc"=1}'::json; -- ERROR, totally wrong separator ERROR: invalid input syntax for type json LINE 1: SELECT '{"abc"=1}'::json; ^ -DETAIL: line 1: Token "=" is invalid. +DETAIL: Token "=" is invalid. +CONTEXT: JSON data, line 1: {"abc"=... SELECT '{"abc"::1}'::json; -- ERROR, another wrong separator -ERROR: invalid input syntax for type json: "{"abc"::1}" +ERROR: invalid input syntax for type json LINE 1: SELECT '{"abc"::1}'::json; ^ -DETAIL: line 1: Expected string, number, object, array, true, false, or null, but found ":". +DETAIL: Expected JSON value, but found ":". +CONTEXT: JSON data, line 1: {"abc"::... SELECT '{"abc":1,"def":2,"ghi":[3,4],"hij":{"klm":5,"nop":[6]}}'::json; -- OK json --------------------------------------------------------- @@ -201,15 +220,17 @@ SELECT '{"abc":1,"def":2,"ghi":[3,4],"hij":{"klm":5,"nop":[6]}}'::json; -- OK (1 row) SELECT '{"abc":1:2}'::json; -- ERROR, colon in wrong spot -ERROR: invalid input syntax for type json: "{"abc":1:2}" +ERROR: invalid input syntax for type json LINE 1: SELECT '{"abc":1:2}'::json; ^ -DETAIL: line 1: Expected "," or "}", but found ":". +DETAIL: Expected "," or "}", but found ":". +CONTEXT: JSON data, line 1: {"abc":1:... SELECT '{"abc":1,3}'::json; -- ERROR, no value -ERROR: invalid input syntax for type json: "{"abc":1,3}" +ERROR: invalid input syntax for type json LINE 1: SELECT '{"abc":1,3}'::json; ^ -DETAIL: line 1: Expected string, but found "3". +DETAIL: Expected string, but found "3". +CONTEXT: JSON data, line 1: {"abc":1,3... -- Miscellaneous stuff. SELECT 'true'::json; -- OK json @@ -236,35 +257,41 @@ SELECT ' true '::json; -- OK, even with extra whitespace (1 row) SELECT 'true false'::json; -- ERROR, too many values -ERROR: invalid input syntax for type json: "true false" +ERROR: invalid input syntax for type json LINE 1: SELECT 'true false'::json; ^ -DETAIL: line 1: Expected end of input, but found "false". +DETAIL: Expected end of input, but found "false". +CONTEXT: JSON data, line 1: true false SELECT 'true, false'::json; -- ERROR, too many values -ERROR: invalid input syntax for type json: "true, false" +ERROR: invalid input syntax for type json LINE 1: SELECT 'true, false'::json; ^ -DETAIL: line 1: Expected end of input, but found ",". +DETAIL: Expected end of input, but found ",". +CONTEXT: JSON data, line 1: true,... SELECT 'truf'::json; -- ERROR, not a keyword ERROR: invalid input syntax for type json LINE 1: SELECT 'truf'::json; ^ -DETAIL: line 1: Token "truf" is invalid. +DETAIL: Token "truf" is invalid. +CONTEXT: JSON data, line 1: truf SELECT 'trues'::json; -- ERROR, not a keyword ERROR: invalid input syntax for type json LINE 1: SELECT 'trues'::json; ^ -DETAIL: line 1: Token "trues" is invalid. +DETAIL: Token "trues" is invalid. +CONTEXT: JSON data, line 1: trues SELECT ''::json; -- ERROR, no value -ERROR: invalid input syntax for type json: "" +ERROR: invalid input syntax for type json LINE 1: SELECT ''::json; ^ DETAIL: The input string ended unexpectedly. +CONTEXT: JSON data, line 1: SELECT ' '::json; -- ERROR, no value -ERROR: invalid input syntax for type json: " " +ERROR: invalid input syntax for type json LINE 1: SELECT ' '::json; ^ DETAIL: The input string ended unexpectedly. +CONTEXT: JSON data, line 1: --constructors -- array_to_json SELECT array_to_json(array(select 1 as a)); -- GitLab