diff --git a/src/Core/Settings.h b/src/Core/Settings.h index e7141677f783479d6304319df14436e9542359e3..4045a61de5a9cb54ad3438b878ef0757c36f3df0 100644 --- a/src/Core/Settings.h +++ b/src/Core/Settings.h @@ -441,6 +441,7 @@ class IColumn; M(Bool, output_format_json_quote_denormals, false, "Enables '+nan', '-nan', '+inf', '-inf' outputs in JSON output format.", 0) \ \ M(Bool, output_format_json_escape_forward_slashes, true, "Controls escaping forward slashes for string outputs in JSON output format. This is intended for compatibility with JavaScript. Don't confuse with backslashes that are always escaped.", 0) \ + M(Bool, output_format_json_array_of_rows, false, "Output a JSON array of all rows in JSONEachRow(Compact) format.", 0) \ \ M(UInt64, output_format_pretty_max_rows, 10000, "Rows limit for Pretty formats.", 0) \ M(UInt64, output_format_pretty_max_column_pad_width, 250, "Maximum width to pad all values in a column in Pretty formats.", 0) \ @@ -517,7 +518,7 @@ struct Settings : public BaseSettings }; /* - * User-specified file format settings for File and ULR engines. + * User-specified file format settings for File and URL engines. */ DECLARE_SETTINGS_TRAITS(FormatFactorySettingsTraits, FORMAT_FACTORY_SETTINGS) diff --git a/src/Formats/FormatFactory.cpp b/src/Formats/FormatFactory.cpp index d3e5541edc7453311197dcef9d77166e0d90a7c7..e5337ad72a735dcb8b23db616acffe453bd4c7fe 100644 --- a/src/Formats/FormatFactory.cpp +++ b/src/Formats/FormatFactory.cpp @@ -78,6 +78,7 @@ FormatSettings getFormatSettings(const Context & context, format_settings.import_nested_json = settings.input_format_import_nested_json; format_settings.input_allow_errors_num = settings.input_format_allow_errors_num; format_settings.input_allow_errors_ratio = settings.input_format_allow_errors_ratio; + format_settings.json.array_of_rows = settings.output_format_json_array_of_rows; format_settings.json.escape_forward_slashes = settings.output_format_json_escape_forward_slashes; format_settings.json.quote_64bit_integers = settings.output_format_json_quote_64bit_integers; format_settings.json.quote_denormals = settings.output_format_json_quote_denormals; diff --git a/src/Formats/FormatSettings.h b/src/Formats/FormatSettings.h index b3c01ddcf14ce300e52b507d4ff765edb0cb6b9d..ead0900afc73fff598035bb40bc210abb30dac90 100644 --- a/src/Formats/FormatSettings.h +++ b/src/Formats/FormatSettings.h @@ -86,6 +86,7 @@ struct FormatSettings struct { + bool array_of_rows = false; bool quote_64bit_integers = true; bool quote_denormals = true; bool escape_forward_slashes = true; diff --git a/src/Processors/Formats/Impl/JSONEachRowRowOutputFormat.cpp b/src/Processors/Formats/Impl/JSONEachRowRowOutputFormat.cpp index b3b83949642179de6e7e573fbd6e8fd7039e7eb3..15d8a843f41ca2d3031b62faead2061ebbad0214 100644 --- a/src/Processors/Formats/Impl/JSONEachRowRowOutputFormat.cpp +++ b/src/Processors/Formats/Impl/JSONEachRowRowOutputFormat.cpp @@ -12,9 +12,9 @@ JSONEachRowRowOutputFormat::JSONEachRowRowOutputFormat( WriteBuffer & out_, const Block & header_, const RowOutputFormatParams & params_, - const FormatSettings & settings_, - bool yield_strings_) - : IRowOutputFormat(header_, out_, params_), settings(settings_), yield_strings(yield_strings_) + const FormatSettings & settings_) + : IRowOutputFormat(header_, out_, params_), + settings(settings_) { const auto & sample = getPort(PortKind::Main).getHeader(); size_t columns = sample.columns(); @@ -33,7 +33,7 @@ void JSONEachRowRowOutputFormat::writeField(const IColumn & column, const IDataT writeString(fields[field_number], out); writeChar(':', out); - if (yield_strings) + if (settings.json.serialize_as_strings) { WriteBufferFromOwnString buf; @@ -61,29 +61,94 @@ void JSONEachRowRowOutputFormat::writeRowStartDelimiter() void JSONEachRowRowOutputFormat::writeRowEndDelimiter() { - writeCString("}\n", out); + // Why do we need this weird `if`? + // + // The reason is the formatRow function that is broken with respect to + // row-between delimiters. It should not write them, but it does, and then + // hacks around it by having a special formatRowNoNewline version, which, as + // you guessed, removes the newline from the end of row. But the row-between + // delimiter goes into a second row, so it turns out to be in the beginning + // of the line, and the removal doesn't work. There is also a second bug -- + // the row-between delimiter in this format is written incorrectly. In fact, + // it is not written at all, and the newline is written in a row-end + // delimiter ("}\n" instead of the correct "}"). With these two bugs + // combined, the test 01420_format_row works perfectly. + // + // A proper implementation of formatRow would use IRowOutputFormat directly, + // and not write row-between delimiters, instead of using IOutputFormat + // processor and its crutch row callback. This would require exposing + // IRowOutputFormat, which we don't do now, but which can be generally useful + // for other cases such as parallel formatting, that also require a control + // flow different from the usual IOutputFormat. + // + // I just don't have time or energy to redo all of this, but I need to + // support JSON array output here, which requires proper ",\n" row-between + // delimiters. For compatibility, I preserve the bug in case of non-array + // output. + if (settings.json.array_of_rows) + { + writeCString("}", out); + } + else + { + writeCString("}\n", out); + } field_number = 0; } +void JSONEachRowRowOutputFormat::writeRowBetweenDelimiter() +{ + // We preserve an existing bug here for compatibility. See the comment above. + if (settings.json.array_of_rows) + { + writeCString(",\n", out); + } +} + + +void JSONEachRowRowOutputFormat::writePrefix() +{ + if (settings.json.array_of_rows) + { + writeCString("[\n", out); + } +} + + +void JSONEachRowRowOutputFormat::writeSuffix() +{ + if (settings.json.array_of_rows) + { + writeCString("\n]\n", out); + } +} + + void registerOutputFormatProcessorJSONEachRow(FormatFactory & factory) { factory.registerOutputFormatProcessor("JSONEachRow", []( WriteBuffer & buf, const Block & sample, const RowOutputFormatParams & params, - const FormatSettings & format_settings) + const FormatSettings & _format_settings) { - return std::make_shared(buf, sample, params, format_settings, false); + FormatSettings settings = _format_settings; + settings.json.serialize_as_strings = false; + return std::make_shared(buf, sample, params, + settings); }); factory.registerOutputFormatProcessor("JSONStringsEachRow", []( WriteBuffer & buf, const Block & sample, const RowOutputFormatParams & params, - const FormatSettings & format_settings) + const FormatSettings & _format_settings) { - return std::make_shared(buf, sample, params, format_settings, true); + FormatSettings settings = _format_settings; + settings.json.serialize_as_strings = true; + return std::make_shared(buf, sample, params, + settings); }); } diff --git a/src/Processors/Formats/Impl/JSONEachRowRowOutputFormat.h b/src/Processors/Formats/Impl/JSONEachRowRowOutputFormat.h index bd9cfff68c5befce6f8623d879b68913cb486cc5..38760379056f4c78974b00363c4aed81e8f5a63d 100644 --- a/src/Processors/Formats/Impl/JSONEachRowRowOutputFormat.h +++ b/src/Processors/Formats/Impl/JSONEachRowRowOutputFormat.h @@ -19,8 +19,7 @@ public: WriteBuffer & out_, const Block & header_, const RowOutputFormatParams & params_, - const FormatSettings & settings_, - bool yield_strings_); + const FormatSettings & settings_); String getName() const override { return "JSONEachRowRowOutputFormat"; } @@ -28,6 +27,9 @@ public: void writeFieldDelimiter() override; void writeRowStartDelimiter() override; void writeRowEndDelimiter() override; + void writeRowBetweenDelimiter() override; + void writePrefix() override; + void writeSuffix() override; protected: /// No totals and extremes. @@ -40,9 +42,6 @@ private: Names fields; FormatSettings settings; - -protected: - bool yield_strings; }; } diff --git a/src/Processors/Formats/Impl/JSONEachRowWithProgressRowOutputFormat.cpp b/src/Processors/Formats/Impl/JSONEachRowWithProgressRowOutputFormat.cpp index 48c23abd680945983393aadf416d3048fbc3ee5c..4612ce99f0535ee27f1e3208b33baa20391085ea 100644 --- a/src/Processors/Formats/Impl/JSONEachRowWithProgressRowOutputFormat.cpp +++ b/src/Processors/Formats/Impl/JSONEachRowWithProgressRowOutputFormat.cpp @@ -34,18 +34,24 @@ void registerOutputFormatProcessorJSONEachRowWithProgress(FormatFactory & factor WriteBuffer & buf, const Block & sample, const RowOutputFormatParams & params, - const FormatSettings & format_settings) + const FormatSettings & _format_settings) { - return std::make_shared(buf, sample, params, format_settings, false); + FormatSettings settings = _format_settings; + settings.json.serialize_as_strings = false; + return std::make_shared(buf, + sample, params, settings); }); factory.registerOutputFormatProcessor("JSONStringsEachRowWithProgress", []( WriteBuffer & buf, const Block & sample, const RowOutputFormatParams & params, - const FormatSettings & format_settings) + const FormatSettings & _format_settings) { - return std::make_shared(buf, sample, params, format_settings, true); + FormatSettings settings = _format_settings; + settings.json.serialize_as_strings = true; + return std::make_shared(buf, + sample, params, settings); }); }