From bcac2722d16cb7bd432a91709a853b69176c1208 Mon Sep 17 00:00:00 2001 From: DarkWanderer Date: Wed, 30 Jan 2019 14:30:43 +0300 Subject: [PATCH] Added RowBinaryWithNamesAndTypes format * Extended BinaryRowOutputStream with flags allowing to output names/types of columns * Added one (for now) new output format - RowBinaryWithNamesAndTypes * Updated docs --- dbms/src/Formats/BinaryRowOutputStream.cpp | 42 ++++++++++++++++++++-- dbms/src/Formats/BinaryRowOutputStream.h | 4 +++ docs/en/interfaces/formats.md | 7 ++++ 3 files changed, 50 insertions(+), 3 deletions(-) diff --git a/dbms/src/Formats/BinaryRowOutputStream.cpp b/dbms/src/Formats/BinaryRowOutputStream.cpp index e659384312..7f1d4e7a4c 100644 --- a/dbms/src/Formats/BinaryRowOutputStream.cpp +++ b/dbms/src/Formats/BinaryRowOutputStream.cpp @@ -9,11 +9,37 @@ namespace DB { -BinaryRowOutputStream::BinaryRowOutputStream(WriteBuffer & ostr_) - : ostr(ostr_) +BinaryRowOutputStream::BinaryRowOutputStream(WriteBuffer & ostr_, const Block & sample_, bool with_names_, bool with_types_) + : ostr(ostr_), with_names(with_names_), with_types(with_types_), sample(sample_) { } +void BinaryRowOutputStream::writePrefix() +{ + size_t columns = sample.columns(); + + if (with_names || with_types) + { + writeVarUInt(columns) + } + + if (with_names) + { + for (size_t i = 0; i < columns; ++i) + { + writeBinary(sample.safeGetByPosition(i).name, ostr); + } + } + + if (with_types) + { + for (size_t i = 0; i < columns; ++i) + { + writeBinary(sample.safeGetByPosition(i).type->getName(), ostr); + } + } +} + void BinaryRowOutputStream::flush() { ostr.next(); @@ -33,7 +59,17 @@ void registerOutputFormatRowBinary(FormatFactory & factory) const FormatSettings &) { return std::make_shared( - std::make_shared(buf), sample); + std::make_shared(buf, sample, false, false), sample); + }); + + factory.registerOutputFormat("RowBinaryWithNamesAndTypes", []( + WriteBuffer & buf, + const Block & sample, + const Context &, + const FormatSettings &) + { + return std::make_shared( + std::make_shared(buf, sample, true, true), sample); }); } diff --git a/dbms/src/Formats/BinaryRowOutputStream.h b/dbms/src/Formats/BinaryRowOutputStream.h index f63064c27e..ff908ba4a7 100644 --- a/dbms/src/Formats/BinaryRowOutputStream.h +++ b/dbms/src/Formats/BinaryRowOutputStream.h @@ -19,6 +19,7 @@ public: BinaryRowOutputStream(WriteBuffer & ostr_); void writeField(const IColumn & column, const IDataType & type, size_t row_num) override; + void writePrefix() override; void flush() override; @@ -26,6 +27,9 @@ public: protected: WriteBuffer & ostr; + bool with_names; + bool with_types; + const Block sample; }; } diff --git a/docs/en/interfaces/formats.md b/docs/en/interfaces/formats.md index 0cb8454239..78bff1c206 100644 --- a/docs/en/interfaces/formats.md +++ b/docs/en/interfaces/formats.md @@ -448,6 +448,13 @@ Array is represented as a varint length (unsigned [LEB128](https://en.wikipedia. For [NULL](../query_language/syntax.md#null-literal) support, an additional byte containing 1 or 0 is added before each [Nullable](../data_types/nullable.md) value. If 1, then the value is `NULL` and this byte is interpreted as a separate value. If 0, the value after the byte is not `NULL`. +## RowBinaryWithNamesAndTypes {#rowbinarywithnamesandtypes} + +Similar to [RowBinary](#rowbinary), but with added header: +* [LEB128](https://en.wikipedia.org/wiki/LEB128)-encoded number of columns (N) +* N `String`s specifying column names +* N `String`s specifying column types + ## Values Prints every row in brackets. Rows are separated by commas. There is no comma after the last row. The values inside the brackets are also comma-separated. Numbers are output in decimal format without quotes. Arrays are output in square brackets. Strings, dates, and dates with times are output in quotes. Escaping rules and parsing are similar to the [TabSeparated](#tabseparated) format. During formatting, extra spaces aren't inserted, but during parsing, they are allowed and skipped (except for spaces inside array values, which are not allowed). [NULL](../query_language/syntax.md) is represented as `NULL`. -- GitLab