提交 8cde8844 编写于 作者: A Alexander Kuzmenkov

Write rows as JSON array in JSONEachRow output format

上级 ffb83155
......@@ -441,6 +441,7 @@ class IColumn;
M(Bool, output_format_json_quote_denormals, false, "Enables '+nan', '-nan', '+inf', '-inf' outputs in JSON output format.", 0) \
\
M(Bool, output_format_json_escape_forward_slashes, true, "Controls escaping forward slashes for string outputs in JSON output format. This is intended for compatibility with JavaScript. Don't confuse with backslashes that are always escaped.", 0) \
M(Bool, output_format_json_array_of_rows, false, "Output a JSON array of all rows in JSONEachRow(Compact) format.", 0) \
\
M(UInt64, output_format_pretty_max_rows, 10000, "Rows limit for Pretty formats.", 0) \
M(UInt64, output_format_pretty_max_column_pad_width, 250, "Maximum width to pad all values in a column in Pretty formats.", 0) \
......@@ -517,7 +518,7 @@ struct Settings : public BaseSettings<SettingsTraits>
};
/*
* User-specified file format settings for File and ULR engines.
* User-specified file format settings for File and URL engines.
*/
DECLARE_SETTINGS_TRAITS(FormatFactorySettingsTraits, FORMAT_FACTORY_SETTINGS)
......
......@@ -78,6 +78,7 @@ FormatSettings getFormatSettings(const Context & context,
format_settings.import_nested_json = settings.input_format_import_nested_json;
format_settings.input_allow_errors_num = settings.input_format_allow_errors_num;
format_settings.input_allow_errors_ratio = settings.input_format_allow_errors_ratio;
format_settings.json.array_of_rows = settings.output_format_json_array_of_rows;
format_settings.json.escape_forward_slashes = settings.output_format_json_escape_forward_slashes;
format_settings.json.quote_64bit_integers = settings.output_format_json_quote_64bit_integers;
format_settings.json.quote_denormals = settings.output_format_json_quote_denormals;
......
......@@ -86,6 +86,7 @@ struct FormatSettings
struct
{
bool array_of_rows = false;
bool quote_64bit_integers = true;
bool quote_denormals = true;
bool escape_forward_slashes = true;
......
......@@ -12,9 +12,9 @@ JSONEachRowRowOutputFormat::JSONEachRowRowOutputFormat(
WriteBuffer & out_,
const Block & header_,
const RowOutputFormatParams & params_,
const FormatSettings & settings_,
bool yield_strings_)
: IRowOutputFormat(header_, out_, params_), settings(settings_), yield_strings(yield_strings_)
const FormatSettings & settings_)
: IRowOutputFormat(header_, out_, params_),
settings(settings_)
{
const auto & sample = getPort(PortKind::Main).getHeader();
size_t columns = sample.columns();
......@@ -33,7 +33,7 @@ void JSONEachRowRowOutputFormat::writeField(const IColumn & column, const IDataT
writeString(fields[field_number], out);
writeChar(':', out);
if (yield_strings)
if (settings.json.serialize_as_strings)
{
WriteBufferFromOwnString buf;
......@@ -61,29 +61,94 @@ void JSONEachRowRowOutputFormat::writeRowStartDelimiter()
void JSONEachRowRowOutputFormat::writeRowEndDelimiter()
{
writeCString("}\n", out);
// Why do we need this weird `if`?
//
// The reason is the formatRow function that is broken with respect to
// row-between delimiters. It should not write them, but it does, and then
// hacks around it by having a special formatRowNoNewline version, which, as
// you guessed, removes the newline from the end of row. But the row-between
// delimiter goes into a second row, so it turns out to be in the beginning
// of the line, and the removal doesn't work. There is also a second bug --
// the row-between delimiter in this format is written incorrectly. In fact,
// it is not written at all, and the newline is written in a row-end
// delimiter ("}\n" instead of the correct "}"). With these two bugs
// combined, the test 01420_format_row works perfectly.
//
// A proper implementation of formatRow would use IRowOutputFormat directly,
// and not write row-between delimiters, instead of using IOutputFormat
// processor and its crutch row callback. This would require exposing
// IRowOutputFormat, which we don't do now, but which can be generally useful
// for other cases such as parallel formatting, that also require a control
// flow different from the usual IOutputFormat.
//
// I just don't have time or energy to redo all of this, but I need to
// support JSON array output here, which requires proper ",\n" row-between
// delimiters. For compatibility, I preserve the bug in case of non-array
// output.
if (settings.json.array_of_rows)
{
writeCString("}", out);
}
else
{
writeCString("}\n", out);
}
field_number = 0;
}
void JSONEachRowRowOutputFormat::writeRowBetweenDelimiter()
{
// We preserve an existing bug here for compatibility. See the comment above.
if (settings.json.array_of_rows)
{
writeCString(",\n", out);
}
}
void JSONEachRowRowOutputFormat::writePrefix()
{
if (settings.json.array_of_rows)
{
writeCString("[\n", out);
}
}
void JSONEachRowRowOutputFormat::writeSuffix()
{
if (settings.json.array_of_rows)
{
writeCString("\n]\n", out);
}
}
void registerOutputFormatProcessorJSONEachRow(FormatFactory & factory)
{
factory.registerOutputFormatProcessor("JSONEachRow", [](
WriteBuffer & buf,
const Block & sample,
const RowOutputFormatParams & params,
const FormatSettings & format_settings)
const FormatSettings & _format_settings)
{
return std::make_shared<JSONEachRowRowOutputFormat>(buf, sample, params, format_settings, false);
FormatSettings settings = _format_settings;
settings.json.serialize_as_strings = false;
return std::make_shared<JSONEachRowRowOutputFormat>(buf, sample, params,
settings);
});
factory.registerOutputFormatProcessor("JSONStringsEachRow", [](
WriteBuffer & buf,
const Block & sample,
const RowOutputFormatParams & params,
const FormatSettings & format_settings)
const FormatSettings & _format_settings)
{
return std::make_shared<JSONEachRowRowOutputFormat>(buf, sample, params, format_settings, true);
FormatSettings settings = _format_settings;
settings.json.serialize_as_strings = true;
return std::make_shared<JSONEachRowRowOutputFormat>(buf, sample, params,
settings);
});
}
......
......@@ -19,8 +19,7 @@ public:
WriteBuffer & out_,
const Block & header_,
const RowOutputFormatParams & params_,
const FormatSettings & settings_,
bool yield_strings_);
const FormatSettings & settings_);
String getName() const override { return "JSONEachRowRowOutputFormat"; }
......@@ -28,6 +27,9 @@ public:
void writeFieldDelimiter() override;
void writeRowStartDelimiter() override;
void writeRowEndDelimiter() override;
void writeRowBetweenDelimiter() override;
void writePrefix() override;
void writeSuffix() override;
protected:
/// No totals and extremes.
......@@ -40,9 +42,6 @@ private:
Names fields;
FormatSettings settings;
protected:
bool yield_strings;
};
}
......@@ -34,18 +34,24 @@ void registerOutputFormatProcessorJSONEachRowWithProgress(FormatFactory & factor
WriteBuffer & buf,
const Block & sample,
const RowOutputFormatParams & params,
const FormatSettings & format_settings)
const FormatSettings & _format_settings)
{
return std::make_shared<JSONEachRowWithProgressRowOutputFormat>(buf, sample, params, format_settings, false);
FormatSettings settings = _format_settings;
settings.json.serialize_as_strings = false;
return std::make_shared<JSONEachRowWithProgressRowOutputFormat>(buf,
sample, params, settings);
});
factory.registerOutputFormatProcessor("JSONStringsEachRowWithProgress", [](
WriteBuffer & buf,
const Block & sample,
const RowOutputFormatParams & params,
const FormatSettings & format_settings)
const FormatSettings & _format_settings)
{
return std::make_shared<JSONEachRowWithProgressRowOutputFormat>(buf, sample, params, format_settings, true);
FormatSettings settings = _format_settings;
settings.json.serialize_as_strings = true;
return std::make_shared<JSONEachRowWithProgressRowOutputFormat>(buf,
sample, params, settings);
});
}
......
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册