未验证 提交 750ff0c7 编写于 作者: A Alexander Kuzmenkov 提交者: GitHub

Merge branch 'master' into trying_parallel_func_tests

......@@ -47,6 +47,10 @@ endif()
target_include_directories(common PUBLIC .. ${CMAKE_CURRENT_BINARY_DIR}/..)
if (OS_DARWIN AND NOT MAKE_STATIC_LIBRARIES)
target_link_libraries(common PUBLIC -Wl,-U,_inside_main)
endif()
# Allow explicit fallback to readline
if (NOT ENABLE_REPLXX AND ENABLE_READLINE)
message (STATUS "Attempt to fallback to readline explicitly")
......
......@@ -853,15 +853,43 @@ public:
{
if (hours == 1)
return toStartOfHour(t);
/** We will round the hour number since the midnight.
* It may split the day into non-equal intervals.
* For example, if we will round to 11-hour interval,
* the day will be split to the intervals 00:00:00..10:59:59, 11:00:00..21:59:59, 22:00:00..23:59:59.
* In case of daylight saving time or other transitions,
* the intervals can be shortened or prolonged to the amount of transition.
*/
UInt64 seconds = hours * 3600;
t = roundDown(t, seconds);
const LUTIndex index = findIndex(t);
const Values & values = lut[index];
if (t >= 0 && offset_is_whole_number_of_hours_during_epoch)
return t;
time_t time = t - values.date;
if (time >= values.time_at_offset_change())
{
/// Align to new hour numbers before rounding.
time += values.amount_of_offset_change();
time = time / seconds * seconds;
/// TODO check if it's correct.
return toStartOfHour(t);
/// Should subtract the shift back but only if rounded time is not before shift.
if (time >= values.time_at_offset_change())
{
time -= values.amount_of_offset_change();
/// With cutoff at the time of the shift. Otherwise we may end up with something like 23:00 previous day.
if (time < values.time_at_offset_change())
time = values.time_at_offset_change();
}
}
else
{
time = time / seconds * seconds;
}
return values.date + time;
}
inline time_t toStartOfMinuteInterval(time_t t, UInt64 minutes) const
......@@ -869,6 +897,14 @@ public:
if (minutes == 1)
return toStartOfMinute(t);
/** In contrast to "toStartOfHourInterval" function above,
* the minute intervals are not aligned to the midnight.
* You will get unexpected results if for example, you round down to 60 minute interval
* and there was a time shift to 30 minutes.
*
* But this is not specified in docs and can be changed in future.
*/
UInt64 seconds = 60 * minutes;
return roundDown(t, seconds);
}
......
......@@ -5,6 +5,11 @@ add_library (daemon
)
target_include_directories (daemon PUBLIC ..)
if (OS_DARWIN AND NOT MAKE_STATIC_LIBRARIES)
target_link_libraries (daemon PUBLIC -Wl,-undefined,dynamic_lookup)
endif()
target_link_libraries (daemon PUBLIC loggers PRIVATE clickhouse_common_io clickhouse_common_config common ${EXECINFO_LIBRARIES})
if (USE_SENTRY)
......
......@@ -4,6 +4,21 @@ ARG repository="deb https://repo.clickhouse.tech/deb/stable/ main/"
ARG version=21.4.1.*
ARG gosu_ver=1.10
# set non-empty deb_location_url url to create a docker image
# from debs created by CI build, for example:
# docker build . --network host --build-arg version="21.4.1.6282" --build-arg deb_location_url="https://clickhouse-builds.s3.yandex.net/21852/069cfbff388b3d478d1a16dc7060b48073f5d522/clickhouse_build_check/clang-11_relwithdebuginfo_none_bundled_unsplitted_disable_False_deb/" -t filimonovq/clickhouse-server:pr21852
ARG deb_location_url=""
# set non-empty single_binary_location_url to create docker image
# from a single binary url (useful for non-standard builds - with sanitizers, for arm64).
# for example (run on aarch64 server):
# docker build . --network host --build-arg single_binary_location_url="https://builds.clickhouse.tech/master/aarch64/clickhouse" -t altinity/clickhouse-server:master-testing-arm
# note: clickhouse-odbc-bridge is not supported there.
ARG single_binary_location_url=""
# see https://github.com/moby/moby/issues/4032#issuecomment-192327844
ARG DEBIAN_FRONTEND=noninteractive
# user/group precreated explicitly with fixed uid/gid on purpose.
# It is especially important for rootless containers: in that case entrypoint
# can't do chown and owners of mounted volumes should be configured externally.
......@@ -19,20 +34,37 @@ RUN groupadd -r clickhouse --gid=101 \
ca-certificates \
dirmngr \
gnupg \
locales \
wget \
tzdata \
&& mkdir -p /etc/apt/sources.list.d \
&& apt-key adv --keyserver keyserver.ubuntu.com --recv E0C56BD4 \
&& echo $repository > /etc/apt/sources.list.d/clickhouse.list \
&& apt-get update \
&& env DEBIAN_FRONTEND=noninteractive \
apt-get --yes -o "Dpkg::Options::=--force-confdef" -o "Dpkg::Options::=--force-confold" upgrade \
&& env DEBIAN_FRONTEND=noninteractive \
apt-get install --allow-unauthenticated --yes --no-install-recommends \
clickhouse-common-static=$version \
clickhouse-client=$version \
clickhouse-server=$version \
locales \
wget \
tzdata \
&& if [ -n "$deb_location_url" ]; then \
echo "installing from custom url with deb packages: $deb_location_url" \
rm -rf /tmp/clickhouse_debs \
&& mkdir -p /tmp/clickhouse_debs \
&& wget --progress=bar:force:noscroll "${deb_location_url}/clickhouse-common-static_${version}_amd64.deb" -P /tmp/clickhouse_debs \
&& wget --progress=bar:force:noscroll "${deb_location_url}/clickhouse-client_${version}_all.deb" -P /tmp/clickhouse_debs \
&& wget --progress=bar:force:noscroll "${deb_location_url}/clickhouse-server_${version}_all.deb" -P /tmp/clickhouse_debs \
&& dpkg -i /tmp/clickhouse_debs/*.deb ; \
elif [ -n "$single_binary_location_url" ]; then \
echo "installing from single binary url: $single_binary_location_url" \
&& rm -rf /tmp/clickhouse_binary \
&& mkdir -p /tmp/clickhouse_binary \
&& wget --progress=bar:force:noscroll "$single_binary_location_url" -O /tmp/clickhouse_binary/clickhouse \
&& chmod +x /tmp/clickhouse_binary/clickhouse \
&& /tmp/clickhouse_binary/clickhouse install --user "clickhouse" --group "clickhouse" ; \
else \
echo "installing from repository: $repository" \
&& apt-get update \
&& apt-get --yes -o "Dpkg::Options::=--force-confdef" -o "Dpkg::Options::=--force-confold" upgrade \
&& apt-get install --allow-unauthenticated --yes --no-install-recommends \
clickhouse-common-static=$version \
clickhouse-client=$version \
clickhouse-server=$version ; \
fi \
&& clickhouse-local -q 'SELECT * FROM system.build_options' \
&& rm -rf \
/var/lib/apt/lists/* \
/var/cache/debconf \
......
......@@ -38,9 +38,6 @@ if ! $gosu test -f "$CLICKHOUSE_CONFIG" -a -r "$CLICKHOUSE_CONFIG"; then
exit 1
fi
# port is needed to check if clickhouse-server is ready for connections
HTTP_PORT="$(clickhouse extract-from-config --config-file "$CLICKHOUSE_CONFIG" --key=http_port)"
# get CH directories locations
DATA_DIR="$(clickhouse extract-from-config --config-file "$CLICKHOUSE_CONFIG" --key=path || true)"
TMP_DIR="$(clickhouse extract-from-config --config-file "$CLICKHOUSE_CONFIG" --key=tmp_path || true)"
......@@ -108,6 +105,9 @@ EOT
fi
if [ -n "$(ls /docker-entrypoint-initdb.d/)" ] || [ -n "$CLICKHOUSE_DB" ]; then
# port is needed to check if clickhouse-server is ready for connections
HTTP_PORT="$(clickhouse extract-from-config --config-file "$CLICKHOUSE_CONFIG" --key=http_port)"
# Listen only on localhost until the initialization is done
$gosu /usr/bin/clickhouse-server --config-file="$CLICKHOUSE_CONFIG" -- --listen_host=127.0.0.1 &
pid="$!"
......
......@@ -69,7 +69,7 @@ MySQL DDL queries are converted into the corresponding ClickHouse DDL queries ([
- MySQL `INSERT` query is converted into `INSERT` with `_sign=1`.
- MySQl `DELETE` query is converted into `INSERT` with `_sign=-1`.
- MySQL `DELETE` query is converted into `INSERT` with `_sign=-1`.
- MySQL `UPDATE` query is converted into `INSERT` with `_sign=-1` and `INSERT` with `_sign=1`.
......
......@@ -1514,6 +1514,14 @@ FORMAT PrettyCompactMonoBlock
Default value: 0
## optimize_skip_unused_shards_limit {#optimize-skip-unused-shards-limit}
Limit for number of sharding key values, turns off `optimize_skip_unused_shards` if the limit is reached.
Too many values may require significant amount for processing, while the benefit is doubtful, since if you have huge number of values in `IN (...)`, then most likely the query will be sent to all shards anyway.
Default value: 1000
## optimize_skip_unused_shards {#optimize-skip-unused-shards}
Enables or disables skipping of unused shards for [SELECT](../../sql-reference/statements/select/index.md) queries that have sharding key condition in `WHERE/PREWHERE` (assuming that the data is distributed by sharding key, otherwise does nothing).
......@@ -2728,11 +2736,11 @@ Default value: `0`.
## engine_file_truncate_on_insert {#engine-file-truncate-on-insert}
Enables or disables truncate before insert in file engine tables.
Enables or disables truncate before insert in [File](../../engines/table-engines/special/file.md) engine tables.
Possible values:
- 0 — Disabled.
- 1 — Enabled.
- 0 — `INSERT` query appends new data to the end of the file.
- 1 — `INSERT` replaces existing content of the file with the new data.
Default value: `0`.
......@@ -2747,4 +2755,39 @@ Possible values:
Default value: `0`.
## allow_experimental_live_view {#allow-experimental-live-view}
Allows creation of experimental [live views](../../sql-reference/statements/create/view.md#live-view).
Possible values:
- 0 — Working with live views is disabled.
- 1 — Working with live views is enabled.
Default value: `0`.
## live_view_heartbeat_interval {#live-view-heartbeat-interval}
Sets the heartbeat interval in seconds to indicate [live view](../../sql-reference/statements/create/view.md#live-view) is alive .
Default value: `15`.
## max_live_view_insert_blocks_before_refresh {#max-live-view-insert-blocks-before-refresh}
Sets the maximum number of inserted blocks after which mergeable blocks are dropped and query for [live view](../../sql-reference/statements/create/view.md#live-view) is re-executed.
Default value: `64`.
## temporary_live_view_timeout {#temporary-live-view-timeout}
Sets the interval in seconds after which [live view](../../sql-reference/statements/create/view.md#live-view) with timeout is deleted.
Default value: `5`.
## periodic_live_view_refresh {#periodic-live-view-refresh}
Sets the interval in seconds after which periodically refreshed [live view](../../sql-reference/statements/create/view.md#live-view) is forced to refresh.
Default value: `60`.
[Original article](https://clickhouse.tech/docs/en/operations/settings/settings/) <!-- hide -->
......@@ -68,7 +68,7 @@ To delete a view, use [DROP VIEW](../../../sql-reference/statements/drop.md#drop
!!! important "Important"
This is an experimental feature that may change in backwards-incompatible ways in the future releases.
Enable usage of live views and `WATCH` query using `set allow_experimental_live_view = 1`.
Enable usage of live views and `WATCH` query using [allow_experimental_live_view](../../../operations/settings/settings.md#allow-experimental-live-view) setting. Input the command `set allow_experimental_live_view = 1`.
```sql
......@@ -90,7 +90,9 @@ Live views work similarly to how a query in a distributed table works. But inste
See [WITH REFRESH](#live-view-with-refresh) to force periodic updates of a live view that in some cases can be used as a workaround.
You can watch for changes in the live view query result using the [WATCH](../../../sql-reference/statements/watch.md) query
### Monitoring Changes {#live-view-monitoring}
You can monitor changes in the `LIVE VIEW` query result using [WATCH](../../../sql-reference/statements/watch.md) query.
```sql
WATCH [db.]live_view
......@@ -102,11 +104,10 @@ WATCH [db.]live_view
CREATE TABLE mt (x Int8) Engine = MergeTree ORDER BY x;
CREATE LIVE VIEW lv AS SELECT sum(x) FROM mt;
```
Watch a live view while doing a parallel insert into the source table.
```sql
WATCH lv
WATCH lv;
```
```bash
......@@ -128,16 +129,16 @@ INSERT INTO mt VALUES (2);
INSERT INTO mt VALUES (3);
```
or add [EVENTS](../../../sql-reference/statements/watch.md#events-clause) clause to just get change events.
Or add [EVENTS](../../../sql-reference/statements/watch.md#events-clause) clause to just get change events.
```sql
WATCH [db.]live_view EVENTS
WATCH [db.]live_view EVENTS;
```
**Example:**
```sql
WATCH lv EVENTS
WATCH lv EVENTS;
```
```bash
......@@ -163,15 +164,15 @@ SELECT * FROM [db.]live_view WHERE ...
You can force live view refresh using the `ALTER LIVE VIEW [db.]table_name REFRESH` statement.
### With Timeout {#live-view-with-timeout}
### WITH TIMEOUT Clause {#live-view-with-timeout}
When a live view is create with a `WITH TIMEOUT` clause then the live view will be dropped automatically after the specified number of seconds elapse since the end of the last [WATCH](../../../sql-reference/statements/watch.md) query that was watching the live view.
When a live view is created with a `WITH TIMEOUT` clause then the live view will be dropped automatically after the specified number of seconds elapse since the end of the last [WATCH](../../../sql-reference/statements/watch.md) query that was watching the live view.
```sql
CREATE LIVE VIEW [db.]table_name WITH TIMEOUT [value_in_sec] AS SELECT ...
```
If the timeout value is not specified then the value specified by the `temporary_live_view_timeout` setting is used.
If the timeout value is not specified then the value specified by the [temporary_live_view_timeout](../../../operations/settings/settings.md#temporary-live-view-timeout) setting is used.
**Example:**
......@@ -180,7 +181,7 @@ CREATE TABLE mt (x Int8) Engine = MergeTree ORDER BY x;
CREATE LIVE VIEW lv WITH TIMEOUT 15 AS SELECT sum(x) FROM mt;
```
### With Refresh {#live-view-with-refresh}
### WITH REFRESH Clause {#live-view-with-refresh}
When a live view is created with a `WITH REFRESH` clause then it will be automatically refreshed after the specified number of seconds elapse since the last refresh or trigger.
......@@ -188,7 +189,7 @@ When a live view is created with a `WITH REFRESH` clause then it will be automat
CREATE LIVE VIEW [db.]table_name WITH REFRESH [value_in_sec] AS SELECT ...
```
If the refresh value is not specified then the value specified by the `periodic_live_view_refresh` setting is used.
If the refresh value is not specified then the value specified by the [periodic_live_view_refresh](../../../operations/settings/settings.md#periodic-live-view-refresh) setting is used.
**Example:**
......@@ -231,7 +232,7 @@ WATCH lv
Code: 60. DB::Exception: Received from localhost:9000. DB::Exception: Table default.lv doesn't exist..
```
### Usage
### Usage {#live-view-usage}
Most common uses of live view tables include:
......@@ -240,15 +241,4 @@ Most common uses of live view tables include:
- Watching for table changes and triggering a follow-up select queries.
- Watching metrics from system tables using periodic refresh.
### Settings {#live-view-settings}
You can use the following settings to control the behaviour of live views.
- `allow_experimental_live_view` - enable live views. Default is `0`.
- `live_view_heartbeat_interval` - the heartbeat interval in seconds to indicate live query is alive. Default is `15` seconds.
- `max_live_view_insert_blocks_before_refresh` - maximum number of inserted blocks after which
mergeable blocks are dropped and query is re-executed. Default is `64` inserts.
- `temporary_live_view_timeout` - interval after which live view with timeout is deleted. Default is `5` seconds.
- `periodic_live_view_refresh` - interval after which periodically refreshed live view is forced to refresh. Default is `60` seconds.
[Original article](https://clickhouse.tech/docs/en/sql-reference/statements/create/view/) <!--hide-->
......@@ -17,19 +17,21 @@ WATCH [db.]live_view
[FORMAT format]
```
The `WATCH` query performs continuous data retrieval from a [live view](./create/view.md#live-view) table. Unless the `LIMIT` clause is specified it provides an infinite stream of query results from a [live view](./create/view.md#live-view).
The `WATCH` query performs continuous data retrieval from a [LIVE VIEW](./create/view.md#live-view) table. Unless the `LIMIT` clause is specified it provides an infinite stream of query results from a [LIVE VIEW](./create/view.md#live-view).
```sql
WATCH [db.]live_view
WATCH [db.]live_view [EVENTS] [LIMIT n] [FORMAT format]
```
## Virtual columns {#watch-virtual-columns}
The virtual `_version` column in the query result indicates the current result version.
**Example:**
```sql
CREATE LIVE VIEW lv WITH REFRESH 5 AS SELECT now();
WATCH lv
WATCH lv;
```
```bash
......@@ -47,6 +49,8 @@ WATCH lv
By default, the requested data is returned to the client, while in conjunction with [INSERT INTO](../../sql-reference/statements/insert-into.md) it can be forwarded to a different table.
**Example:**
```sql
INSERT INTO [db.]table WATCH [db.]live_view ...
```
......@@ -56,14 +60,14 @@ INSERT INTO [db.]table WATCH [db.]live_view ...
The `EVENTS` clause can be used to obtain a short form of the `WATCH` query where instead of the query result you will just get the latest query result version.
```sql
WATCH [db.]live_view EVENTS
WATCH [db.]live_view EVENTS;
```
**Example:**
```sql
CREATE LIVE VIEW lv WITH REFRESH 5 AS SELECT now();
WATCH lv EVENTS
WATCH lv EVENTS;
```
```bash
......@@ -78,17 +82,17 @@ WATCH lv EVENTS
## LIMIT Clause {#limit-clause}
The `LIMIT n` clause species the number of updates the `WATCH` query should wait for before terminating. By default there is no limit on the number of updates and therefore the query will not terminate. The value of `0` indicates that the `WATCH` query should not wait for any new query results and therefore will return immediately once query is evaluated.
The `LIMIT n` clause specifies the number of updates the `WATCH` query should wait for before terminating. By default there is no limit on the number of updates and therefore the query will not terminate. The value of `0` indicates that the `WATCH` query should not wait for any new query results and therefore will return immediately once query result is evaluated.
```sql
WATCH [db.]live_view LIMIT 1
WATCH [db.]live_view LIMIT 1;
```
**Example:**
```sql
CREATE LIVE VIEW lv WITH REFRESH 5 AS SELECT now();
WATCH lv EVENTS LIMIT 1
WATCH lv EVENTS LIMIT 1;
```
```bash
......@@ -102,5 +106,4 @@ WATCH lv EVENTS LIMIT 1
The `FORMAT` clause works the same way as for the [SELECT](../../sql-reference/statements/select/format.md#format-clause).
!!! info "Note"
The [JSONEachRowWithProgress](../../interfaces/formats/#jsoneachrowwithprogress) format should be used when watching [live view](./create/view.md#live-view) tables over the HTTP interface. The progress messages will be added to the output to keep the long-lived HTTP connection alive until the query result changes. The interval between progress messages is controlled using the [live_view_heartbeat_interval](./create/view.md#live-view-settings) setting.
The [JSONEachRowWithProgress](../../interfaces/formats.md#jsoneachrowwithprogress) format should be used when watching [LIVE VIEW](./create/view.md#live-view) tables over the HTTP interface. The progress messages will be added to the output to keep the long-lived HTTP connection alive until the query result changes. The interval between progress messages is controlled using the [live_view_heartbeat_interval](./create/view.md#live-view-settings) setting.
......@@ -2615,14 +2615,69 @@ SELECT * FROM test2;
Обратите внимание на то, что эта настройка влияет на поведение [материализованных представлений](../../sql-reference/statements/create/view.md#materialized) и БД [MaterializeMySQL](../../engines/database-engines/materialize-mysql.md).
## engine_file_empty_if_not_exists {#engine-file-empty_if-not-exists}
Включает или отключает возможность выполнять запрос `SELECT` к таблице на движке [File](../../engines/table-engines/special/file.md), не содержащей файл.
Возможные значения:
- 0 — запрос `SELECT` генерирует исключение.
- 1 — запрос `SELECT` возвращает пустой результат.
Значение по умолчанию: `0`.
## engine_file_truncate_on_insert {#engine-file-truncate-on-insert}
Включает или выключает удаление данных из таблицы до вставки в таблицу на движке [File](../../engines/table-engines/special/file.md).
Возможные значения:
- 0 — запрос `INSERT` добавляет данные в конец файла после существующих.
- 1 — `INSERT` удаляет имеющиеся в файле данные и замещает их новыми.
Значение по умолчанию: `0`.
## allow_experimental_geo_types {#allow-experimental-geo-types}
Разрешает использование экспериментальных типов данных для работы с [географическими структурами](../../sql-reference/data-types/geo.md).
Возможные значения:
- 0 — использование типов данных для работы с географическими структурами не поддерживается.
- 1 — использование типов данных для работы с географическими структурами поддерживается.
- 0 — Использование типов данных для работы с географическими структурами не поддерживается.
- 1 — Использование типов данных для работы с географическими структурами поддерживается.
Значение по умолчанию: `0`.
## allow_experimental_live_view {#allow-experimental-live-view}
Включает экспериментальную возможность использования [LIVE-представлений](../../sql-reference/statements/create/view.md#live-view).
Возможные значения:
- 0 — живые представления не поддерживаются.
- 1 — живые представления поддерживаются.
Значение по умолчанию: `0`.
## live_view_heartbeat_interval {#live-view-heartbeat-interval}
Задает интервал в секундах для периодической проверки существования [LIVE VIEW](../../sql-reference/statements/create/view.md#live-view).
Значение по умолчанию: `15`.
## max_live_view_insert_blocks_before_refresh {#max-live-view-insert-blocks-before-refresh}
Задает наибольшее число вставок, после которых запрос на формирование [LIVE VIEW](../../sql-reference/statements/create/view.md#live-view) исполняется снова.
Значение по умолчанию: `64`.
## temporary_live_view_timeout {#temporary-live-view-timeout}
Задает время в секундах, после которого [LIVE VIEW](../../sql-reference/statements/create/view.md#live-view) удаляется.
Значение по умолчанию: `5`.
## periodic_live_view_refresh {#periodic-live-view-refresh}
Задает время в секундах, по истечении которого [LIVE VIEW](../../sql-reference/statements/create/view.md#live-view) с установленным автообновлением обновляется.
Значение по умолчанию: `60`.
[Оригинальная статья](https://clickhouse.tech/docs/ru/operations/settings/settings/) <!--hide-->
\ No newline at end of file
......@@ -13,7 +13,7 @@ toc_title: "Представление"
CREATE [OR REPLACE] VIEW [IF NOT EXISTS] [db.]table_name [ON CLUSTER] AS SELECT ...
```
Обычные представления не хранят никаких данных, они выполняют чтение данных из другой таблицы при каждом доступе. Другими словами, обычное представление - это не что иное, как сохраненный запрос. При чтении данных из представления этот сохраненный запрос используется как подзапрос в секции [FROM](../../../sql-reference/statements/select/from.md).
Обычные представления не хранят никаких данных, они выполняют чтение данных из другой таблицы при каждом доступе. Другими словами, обычное представление это не что иное, как сохраненный запрос. При чтении данных из представления этот сохраненный запрос используется как подзапрос в секции [FROM](../../../sql-reference/statements/select/from.md).
Для примера, пусть вы создали представление:
......@@ -43,12 +43,12 @@ CREATE MATERIALIZED VIEW [IF NOT EXISTS] [db.]table_name [ON CLUSTER] [TO[db.]na
При создании материализованного представления без использования `TO [db].[table]`, нужно обязательно указать `ENGINE` - движок таблицы для хранения данных.
При создании материализованного представления с испольованием `TO [db].[table]`, нельзя указывать `POPULATE`
При создании материализованного представления с испольованием `TO [db].[table]`, нельзя указывать `POPULATE`.
Материализованное представление устроено следующим образом: при вставке данных в таблицу, указанную в SELECT-е, кусок вставляемых данных преобразуется этим запросом SELECT, и полученный результат вставляется в представление.
!!! important "Важно"
Материализованные представлени в ClickHouse больше похожи на `after insert` триггеры. Если в запросе материализованного представления есть агрегирование, оно применяется только к вставляемому блоку записей. Любые изменения существующих данных исходной таблицы (например обновление, удаление, удаление раздела и т.д.) не изменяют материализованное представление.
Материализованные представления в ClickHouse больше похожи на `after insert` триггеры. Если в запросе материализованного представления есть агрегирование, оно применяется только к вставляемому блоку записей. Любые изменения существующих данных исходной таблицы (например обновление, удаление, удаление раздела и т.д.) не изменяют материализованное представление.
Если указано `POPULATE`, то при создании представления, в него будут вставлены имеющиеся данные таблицы, как если бы был сделан запрос `CREATE TABLE ... AS SELECT ...` . Иначе, представление будет содержать только данные, вставляемые в таблицу после создания представления. Не рекомендуется использовать POPULATE, так как вставляемые в таблицу данные во время создания представления, не попадут в него.
......@@ -56,9 +56,177 @@ CREATE MATERIALIZED VIEW [IF NOT EXISTS] [db.]table_name [ON CLUSTER] [TO[db.]na
Недоработано выполнение запросов `ALTER` над материализованными представлениями, поэтому они могут быть неудобными для использования. Если материализованное представление использует конструкцию `TO [db.]name`, то можно выполнить `DETACH` представления, `ALTER` для целевой таблицы и последующий `ATTACH` ранее отсоединенного (`DETACH`) представления.
Обратите внимание, что работа материлизованного представления находится под влиянием настройки [optimize_on_insert](../../../operations/settings/settings.md#optimize-on-insert). Перед вставкой данных в таблицу происходит их слияние.
Обратите внимание, что работа материализованного представления находится под влиянием настройки [optimize_on_insert](../../../operations/settings/settings.md#optimize-on-insert). Перед вставкой данных в таблицу происходит их слияние.
Представления выглядят так же, как обычные таблицы. Например, они перечисляются в результате запроса `SHOW TABLES`.
Чтобы удалить представление, следует использовать [DROP VIEW](../../../sql-reference/statements/drop.md#drop-view). Впрочем, `DROP TABLE` тоже работает для представлений.
## LIVE-представления {#live-view}
!!! important "Важно"
Представления `LIVE VIEW` являются экспериментальной возможностью. Их использование может повлечь потерю совместимости в будущих версиях.
Чтобы использовать `LIVE VIEW` и запросы `WATCH`, включите настройку [allow_experimental_live_view](../../../operations/settings/settings.md#allow-experimental-live-view).
```sql
CREATE LIVE VIEW [IF NOT EXISTS] [db.]table_name [WITH [TIMEOUT [value_in_sec] [AND]] [REFRESH [value_in_sec]]] AS SELECT ...
```
`LIVE VIEW` хранит результат запроса [SELECT](../../../sql-reference/statements/select/index.md), указанного при создании, и обновляется сразу же при изменении этого результата. Конечный результат запроса и промежуточные данные, из которых формируется результат, хранятся в оперативной памяти, и это обеспечивает высокую скорость обработки для повторяющихся запросов. LIVE-представления могут отправлять push-уведомления при изменении результата исходного запроса `SELECT`. Для этого используйте запрос [WATCH](../../../sql-reference/statements/watch.md).
Изменение `LIVE VIEW` запускается при вставке данных в таблицу, указанную в исходном запросе `SELECT`.
LIVE-представления работают по тому же принципу, что и распределенные таблицы. Но вместо объединения отдельных частей данных с разных серверов, LIVE-представления объединяют уже имеющийся результат с новыми данными. Если в исходном запросе LIVE-представления есть вложенный подзапрос, его результаты не кешируются, в кеше хранится только результат основного запроса.
!!! info "Ограничения"
- [Табличные функции](../../../sql-reference/table-functions/index.md) в основном запросе не поддерживаются.
- Таблицы, не поддерживающие изменение с помощью запроса `INSERT`, такие как [словари](../../../sql-reference/dictionaries/index.md) и [системные таблицы](../../../operations/system-tables/index.md), а также [нормальные представления](#normal) или [материализованные представления](#materialized), не запускают обновление LIVE-представления.
- В LIVE-представлениях могут использоваться только такие запросы, которые объединяют результаты по старым и новым данным. LIVE-представления не работают с запросами, требующими полного пересчета данных или агрегирования с сохранением состояния.
- `LIVE VIEW` не работает для реплицируемых и распределенных таблиц, добавление данных в которые происходит на разных узлах.
- `LIVE VIEW` не обновляется, если в исходном запросе используются несколько таблиц.
В случаях, когда `LIVE VIEW` не обновляется автоматически, чтобы обновлять его принудительно с заданной периодичностью, используйте [WITH REFRESH](#live-view-with-refresh).
### Отслеживание изменений {#live-view-monitoring}
Для отслеживания изменений LIVE-представления используйте запрос [WATCH](../../../sql-reference/statements/watch.md).
**Пример:**
```sql
CREATE TABLE mt (x Int8) Engine = MergeTree ORDER BY x;
CREATE LIVE VIEW lv AS SELECT sum(x) FROM mt;
```
Отслеживаем изменения LIVE-представления при вставке данных в исходную таблицу.
```sql
WATCH lv;
```
```bash
┌─sum(x)─┬─_version─┐
│ 1 │ 1 │
└────────┴──────────┘
┌─sum(x)─┬─_version─┐
│ 2 │ 2 │
└────────┴──────────┘
┌─sum(x)─┬─_version─┐
│ 6 │ 3 │
└────────┴──────────┘
...
```
```sql
INSERT INTO mt VALUES (1);
INSERT INTO mt VALUES (2);
INSERT INTO mt VALUES (3);
```
Для получения списка изменений используйте ключевое слово [EVENTS](../../../sql-reference/statements/watch.md#events-clause).
```sql
WATCH lv EVENTS;
```
```bash
┌─version─┐
│ 1 │
└─────────┘
┌─version─┐
│ 2 │
└─────────┘
┌─version─┐
│ 3 │
└─────────┘
...
```
Для работы с LIVE-представлениями, как и с любыми другими, можно использовать запросы [SELECT](../../../sql-reference/statements/select/index.md). Если результат запроса кеширован, он будет возвращен немедленно, без обращения к исходным таблицам представления.
```sql
SELECT * FROM [db.]live_view WHERE ...
```
### Принудительное обновление {#live-view-alter-refresh}
Чтобы принудительно обновить LIVE-представление, используйте запрос `ALTER LIVE VIEW [db.]table_name REFRESH`.
### Секция WITH TIMEOUT {#live-view-with-timeout}
LIVE-представление, созданное с параметром `WITH TIMEOUT`, будет автоматически удалено через определенное количество секунд с момента предыдущего запроса [WATCH](../../../sql-reference/statements/watch.md), примененного к данному LIVE-представлению.
```sql
CREATE LIVE VIEW [db.]table_name WITH TIMEOUT [value_in_sec] AS SELECT ...
```
Если временной промежуток не указан, используется значение настройки [temporary_live_view_timeout](../../../operations/settings/settings.md#temporary-live-view-timeout).
**Пример:**
```sql
CREATE TABLE mt (x Int8) Engine = MergeTree ORDER BY x;
CREATE LIVE VIEW lv WITH TIMEOUT 15 AS SELECT sum(x) FROM mt;
```
### Секция WITH REFRESH {#live-view-with-refresh}
LIVE-представление, созданное с параметром `WITH REFRESH`, будет автоматически обновляться через указанные промежутки времени, начиная с момента последнего обновления.
```sql
CREATE LIVE VIEW [db.]table_name WITH REFRESH [value_in_sec] AS SELECT ...
```
Если значение временного промежутка не задано, используется значение [periodic_live_view_refresh](../../../operations/settings/settings.md#periodic-live-view-refresh).
**Пример:**
```sql
CREATE LIVE VIEW lv WITH REFRESH 5 AS SELECT now();
WATCH lv;
```
```bash
┌───────────────now()─┬─_version─┐
│ 2021-02-21 08:47:05 │ 1 │
└─────────────────────┴──────────┘
┌───────────────now()─┬─_version─┐
│ 2021-02-21 08:47:10 │ 2 │
└─────────────────────┴──────────┘
┌───────────────now()─┬─_version─┐
│ 2021-02-21 08:47:15 │ 3 │
└─────────────────────┴──────────┘
```
Параметры `WITH TIMEOUT` и `WITH REFRESH` можно сочетать с помощью `AND`.
```sql
CREATE LIVE VIEW [db.]table_name WITH TIMEOUT [value_in_sec] AND REFRESH [value_in_sec] AS SELECT ...
```
**Пример:**
```sql
CREATE LIVE VIEW lv WITH TIMEOUT 15 AND REFRESH 5 AS SELECT now();
```
По истечении 15 секунд представление будет автоматически удалено, если нет активного запроса `WATCH`.
```sql
WATCH lv;
```
```
Code: 60. DB::Exception: Received from localhost:9000. DB::Exception: Table default.lv doesn't exist..
```
### Использование {#live-view-usage}
Наиболее частые случаи использования `LIVE-VIEW`:
- Получение push-уведомлений об изменениях данных без дополнительных периодических запросов.
- Кеширование результатов часто используемых запросов для получения их без задержки.
- Отслеживание изменений таблицы для запуска других запросов `SELECT`.
- Отслеживание показателей из системных таблиц с помощью периодических обновлений.
[Оригинальная статья](https://clickhouse.tech/docs/ru/sql-reference/statements/create/view) <!--hide-->
---
toc_priority: 53
toc_title: WATCH
---
# Запрос WATCH {#watch}
!!! important "Важно"
Это экспериментальная функция. Она может повлечь потерю совместимости в будущих версиях.
Чтобы использовать `LIVE VIEW` и запросы `WATCH`, включите настройку `set allow_experimental_live_view = 1`.
**Синтаксис**
``` sql
WATCH [db.]live_view [EVENTS] [LIMIT n] [FORMAT format]
```
Запрос `WATCH` постоянно возвращает содержимое [LIVE-представления](./create/view.md#live-view). Если параметр `LIMIT` не был задан, запрос `WATCH` будет непрерывно обновлять содержимое [LIVE-представления](./create/view.md#live-view).
```sql
WATCH [db.]live_view;
```
## Виртуальные столбцы {#watch-virtual-columns}
Виртуальный столбец `_version` в результате запроса обозначает версию данного результата.
**Пример:**
```sql
CREATE LIVE VIEW lv WITH REFRESH 5 AS SELECT now();
WATCH lv;
```
```bash
┌───────────────now()─┬─_version─┐
│ 2021-02-21 09:17:21 │ 1 │
└─────────────────────┴──────────┘
┌───────────────now()─┬─_version─┐
│ 2021-02-21 09:17:26 │ 2 │
└─────────────────────┴──────────┘
┌───────────────now()─┬─_version─┐
│ 2021-02-21 09:17:31 │ 3 │
└─────────────────────┴──────────┘
...
```
По умолчанию запрашиваемые данные возвращаются клиенту, однако в сочетании с запросом [INSERT INTO](../../sql-reference/statements/insert-into.md) они могут быть перенаправлены для вставки в другую таблицу.
**Пример:**
```sql
INSERT INTO [db.]table WATCH [db.]live_view ...
```
## Секция EVENTS {#events-clause}
С помощью параметра `EVENTS` можно получить компактную форму результата запроса `WATCH`. Вместо полного результата вы получаете номер последней версии результата.
```sql
WATCH [db.]live_view EVENTS;
```
**Пример:**
```sql
CREATE LIVE VIEW lv WITH REFRESH 5 AS SELECT now();
WATCH lv EVENTS;
```
```bash
┌─version─┐
│ 1 │
└─────────┘
┌─version─┐
│ 2 │
└─────────┘
...
```
## Секция LIMIT {#limit-clause}
Параметр `LIMIT n` задает количество обновлений запроса `WATCH`, после которого отслеживание прекращается. По умолчанию это число не задано, поэтому запрос будет выполняться постоянно. Значение `LIMIT 0` означает, что запрос `WATCH` вернет единственный актуальный результат запроса и прекратит отслеживание.
```sql
WATCH [db.]live_view LIMIT 1;
```
**Пример:**
```sql
CREATE LIVE VIEW lv WITH REFRESH 5 AS SELECT now();
WATCH lv EVENTS LIMIT 1;
```
```bash
┌─version─┐
│ 1 │
└─────────┘
```
## Секция FORMAT {#format-clause}
Параметр `FORMAT` работает аналогично одноименному параметру запроса [SELECT](../../sql-reference/statements/select/format.md#format-clause).
!!! info "Примечание"
При отслеживании [LIVE VIEW](./create/view.md#live-view) через интерфейс HTTP следует использовать формат [JSONEachRowWithProgress](../../interfaces/formats.md#jsoneachrowwithprogress). Постоянные сообщения об изменениях будут добавлены в поток вывода для поддержания активности долговременного HTTP-соединения до тех пор, пока результат запроса изменяется. Проомежуток времени между сообщениями об изменениях управляется настройкой[live_view_heartbeat_interval](./create/view.md#live-view-settings).
---
machine_translated: true
machine_translated_rev: 72537a2d527c63c07aa5d2361a8829f3895cf2bd
toc_folder_title: "\u5BFC\u8A00"
toc_folder_title: 快速上手
toc_priority: 2
---
......@@ -9,7 +7,7 @@ toc_priority: 2
如果您是ClickHouse的新手,并希望亲身体验它的性能。
首先需要进行 [环境安装与部署](install.md).
首先需要完成 [安装与部署](install.md).
之后,您可以通过教程与示例数据完成自己的入门第一步:
......
---
toc_folder_title: 引言
toc_folder_title: 简介
toc_priority: 1
---
......
# 使用建议 {#usage-recommendations}
## CPU {#cpu}
## CPU频率调节器 {#cpu-scaling-governor}
必须支持SSE4.2指令集。 现代处理器(自2008年以来)支持它。
选择处理器时,与较少的内核和较高的时钟速率相比,更喜欢大量内核和稍慢的时钟速率。
例如,具有2600MHz的16核心比具有3600MHz的8核心更好。
## 超线程 {#hyper-threading}
不要禁用超线程。 它有助于某些查询,但不适用于其他查询。
## 超频 {#turbo-boost}
强烈推荐超频(turbo-boost)。 它显着提高了典型负载的性能。
您可以使用 `turbostat` 要查看负载下的CPU的实际时钟速率。
## CPU缩放调控器 {#cpu-scaling-governor}
始终使用 `performance` 缩放调控器。 该 `on-demand` 随着需求的不断增加,缩放调节器的工作要糟糕得多。
始终使用 `performance` 频率调节器。 `on-demand` 频率调节器在持续高需求的情况下,效果更差。
``` bash
echo 'performance' | sudo tee /sys/devices/system/cpu/cpu*/cpufreq/scaling_governor
......@@ -26,68 +10,70 @@ echo 'performance' | sudo tee /sys/devices/system/cpu/cpu*/cpufreq/scaling_gover
## CPU限制 {#cpu-limitations}
处理器可能会过热。 使用 `dmesg` 看CPU的时钟速率是否由于过热而受到限制。
此限制也可以在数据中心级别的外部设置。 您可以使用 `turbostat` 在负载下监视它
处理器可能会过热。 使用 `dmesg` 看CPU的时钟速率是否由于过热而受到限制。
该限制也可以在数据中心级别外部设置。 您可以使用 `turbostat` 在负载下对其进行监控
## RAM {#ram}
对于少量数据(高达-200GB压缩),最好使用与数据量一样多的内存。
对于大量数据处理交互式(在线)查询时,应使用合理数量的RAM(128GB或更多),以便热数据子集适合页面缓存。
即使对于每台服务器约50TB的数据量,使用128GB的RAM与64GB相比显着提高了查询性能。
对于少量数据(压缩后约200GB),最好使用与数据量一样多的内存。
对于大量数据,以及在处理交互式(在线)查询时,应使用合理数量的RAM(128GB或更多),以便热数据子集适合页面缓存。
即使对于每台服务器约50TB的数据量,与64GB相比,使用128GB的RAM也可以显着提高查询性能。
## 交换文件 {#swap-file}
不要禁用 overcommit。`cat /proc/sys/vm/overcommit_memory` 的值应该为0或1。运行
始终禁用交换文件。 不这样做的唯一原因是,如果您使用的ClickHouse在您的个人笔记本电脑。
``` bash
$ echo 0 | sudo tee /proc/sys/vm/overcommit_memory
```
## 大页(Huge Pages) {#huge-pages}
始终禁用透明大页(transparent huge pages)。 它会干扰内存分alloc,从而导致显着的性能下降。
始终禁用透明大页(transparent huge pages)。 它会干扰内存分配器,从而导致显着的性能下降。
``` bash
echo 'never' | sudo tee /sys/kernel/mm/transparent_hugepage/enabled
```
使用 `perf top` 观察内核中用于内存管理的时间。
使用 `perf top` 来查看内核在内存管理上花费的时间。
永久大页(permanent huge pages)也不需要被分配。
## 存储系统 {#storage-subsystem}
## 存储系统 {#storage-subsystem}
如果您的预算允许您使用SSD,请使用SSD。
如果没有,请使用硬盘。 SATA硬盘7200转就行了。
优先选择带有本地硬盘驱动器的大量服务器,而不是带有附加磁盘架的小量服务器。
但是对于存储具有罕见查询的档案,货架将起作用。
优先选择许多带有本地硬盘驱动器的服务器,而不是少量带有附加磁盘架的服务器。
但是对于存储极少查询的档案,架子可以使用。
## RAID {#raid}
当使用硬盘,你可以结合他们的RAID-10,RAID-5,RAID-6或RAID-50。
对于Linux,软件RAID更好( `mdadm`). 我们不建议使用LVM。
对于Linux,软件RAID更好(使用 `mdadm`). 我们不建议使用LVM。
当创建RAID-10,选择 `far` 布局。
如果您的预算允许,请选择RAID-10。
如果您有超过4个磁盘,请使用RAID-6(首选)或RAID-50,而不是RAID-5。
如果您有4个以上的磁盘,请使用RAID-6(首选)或RAID-50,而不是RAID-5。
当使用RAID-5、RAID-6或RAID-50时,始终增加stripe_cache_size,因为默认值通常不是最佳选择。
``` bash
echo 4096 | sudo tee /sys/block/md2/md/stripe_cache_size
```
使用以下公式,从设备数量和块大小计算确切数量: `2 * num_devices * chunk_size_in_bytes / 4096`.
使用以下公式从设备数量和块大小中计算出确切的数量: `2 * num_devices * chunk_size_in_bytes / 4096`
1025KB的块大小足以满足所有RAID配置。
1024KB的块大小足以满足所有RAID配置。
切勿将块大小设置得太小或太大。
您可以在SSD上使用RAID-0。
无论使用种RAID,始终使用复制来保证数据安全。
无论使用种RAID,始终使用复制来保证数据安全。
使用长队列启用NCQ。 对于HDD,选择CFQ调度程序,对于SSD,选择noop。 不要减少 ‘readahead’ 设置。
启用有长队列的NCQ。 对于HDD,选择CFQ调度程序,对于SSD,选择noop。 不要减少 ‘readahead’ 设置。
对于HDD,启用写入缓存。
## 文件系统 {#file-system}
Ext4是最可靠的选择。 设置挂载选项 `noatime, nobarrier`.
XFS也是合适的,但它还没有经过ClickHouse的彻底测试。
大多数其他文件系统也应该正常工作。 具有延迟分配的文件系统工作得更好。
XFS也是合适的,但它还没有经过ClickHouse的全面测试。
大多数其他文件系统也应该可以正常工作。 具有延迟分配的文件系统工作得更好。
## Linux内核 {#linux-kernel}
......@@ -95,26 +81,43 @@ XFS也是合适的,但它还没有经过ClickHouse的彻底测试。
## 网络 {#network}
如果您使用的是IPv6,请增加路由缓存的大小。
3.2之前的Linux内核在IPv6实现方面遇到了许多问题。
如果使用的是IPv6,请增加路由缓存的大小。
3.2之前的Linux内核在IPv6实现方面存在许多问题。
如果可能的话,至少使用10GB的网络。1GB也可以工作,但对于使用数十TB的数据修补副本或处理具有大量中间数据的分布式查询,情况会更糟。
## 虚拟机监视器(Hypervisor)配置
如果您使用的是OpenStack,请在nova.conf中设置
```
cpu_mode=host-passthrough
```
如果您使用的是libvirt,请在XML配置中设置
```
<cpu mode='host-passthrough'/>
```
如果可能的话,至少使用一个10GB的网络。 1Gb也可以工作,但对于使用数十tb的数据修补副本或处理具有大量中间数据的分布式查询,情况会更糟。
这对于ClickHouse能够通过 `cpuid` 指令获取正确的信息非常重要。
否则,当在旧的CPU型号上运行虚拟机监视器时,可能会导致 `Illegal instruction` 崩溃。
## Zookeeper {#zookeeper}
您可能已经将ZooKeeper用于其他目的。 您可以使用相同的zookeeper安装,如果它还没有超载
您可能已经将ZooKeeper用于其他目的。 如果它还没有超载,您可以使用相同的zookeeper
最好使用新版本的 Zookeeper – 3.4.9 或之后的版本. 稳定 Liunx 发行版中的 Zookeeper 版本可能是落后的
最好使用新版本的Zookeeper – 3.4.9 或更高的版本. 稳定的Liunx发行版中的Zookeeper版本可能已过时
你永远不该使用自己手写的脚本在不同的 Zookeeper 集群之间转移数据, 这可能会导致序列节点的数据不正确。出于同样的原因,永远不要使用 zkcopy 工具: https://github.com/ksprojects/zkcopy/issues/15
你永远不要使用手动编写的脚本在不同的Zookeeper集群之间传输数据, 这可能会导致序列节点的数据不正确。出于相同的原因,永远不要使用 zkcopy 工具: https://github.com/ksprojects/zkcopy/issues/15
如果要将现有ZooKeeper集群分为两个,正确的方法是增加其副本的数量,然后将其重新配置为两个独立的集群。
如果要将现有ZooKeeper集群分为两个,正确的方法是增加其副本的数量,然后将其重新配置为两个独立的集群。
不要在与ClickHouse相同的服务器上运行ZooKeeper。 因为ZooKeeper对延迟非常敏感,而ClickHouse可能会占用所有可用的系统资源。
不要在ClickHouse所在的服务器上运行ZooKeeper。 因为ZooKeeper对延迟非常敏感,而ClickHouse可能会占用所有可用的系统资源。
默认设置下,ZooKeeper 就像是一个定时炸弹:
当使用默认配置时,ZooKeeper服务不会从旧快照和日志中删除文件(请参阅autopurge),这是操作员的责任。
当使用默认配置时,ZooKeeper服务器不会从旧的快照和日志中删除文件(请参阅autopurge),这是操作员的责任。
必须拆除炸弹。
......@@ -222,7 +225,7 @@ JAVA_OPTS="-Xms{{ '{{' }} cluster.get('xms','128M') {{ '}}' }} \
-XX:+CMSParallelRemarkEnabled"
```
Salt init:
初始化:
description "zookeeper-{{ '{{' }} cluster['name'] {{ '}}' }} centralized coordination service"
......
......@@ -106,7 +106,7 @@ void ClusterCopier::discoverShardPartitions(const ConnectionTimeouts & timeouts,
try
{
type->deserializeAsTextQuoted(*column_dummy, rb, FormatSettings());
type->getDefaultSerialization()->deserializeTextQuoted(*column_dummy, rb, FormatSettings());
}
catch (Exception & e)
{
......@@ -1719,7 +1719,7 @@ std::set<String> ClusterCopier::getShardPartitions(const ConnectionTimeouts & ti
for (size_t i = 0; i < column.column->size(); ++i)
{
WriteBufferFromOwnString wb;
column.type->serializeAsTextQuoted(*column.column, i, wb, FormatSettings());
column.type->getDefaultSerialization()->serializeTextQuoted(*column.column, i, wb, FormatSettings());
res.emplace(wb.str());
}
}
......
......@@ -100,16 +100,16 @@ class IModel
{
public:
/// Call train iteratively for each block to train a model.
virtual void train(const IColumn & column);
virtual void train(const IColumn & column) = 0;
/// Call finalize one time after training before generating.
virtual void finalize();
virtual void finalize() = 0;
/// Call generate: pass source data column to obtain a column with anonymized data as a result.
virtual ColumnPtr generate(const IColumn & column);
virtual ColumnPtr generate(const IColumn & column) = 0;
/// Deterministically change seed to some other value. This can be used to generate more values than were in source.
virtual void updateSeed();
virtual void updateSeed() = 0;
virtual ~IModel() = default;
};
......
......@@ -39,6 +39,8 @@ class AggregateFunctionArgMinMax final : public IAggregateFunctionTupleArgHelper
private:
const DataTypePtr & type_res;
const DataTypePtr & type_val;
const SerializationPtr serialization_res;
const SerializationPtr serialization_val;
bool tuple_argument;
using Base = IAggregateFunctionTupleArgHelper<Data, AggregateFunctionArgMinMax<Data>, 2>;
......@@ -48,6 +50,8 @@ public:
: Base({type_res_, type_val_}, {}, tuple_argument_)
, type_res(this->argument_types[0])
, type_val(this->argument_types[1])
, serialization_res(type_res->getDefaultSerialization())
, serialization_val(type_val->getDefaultSerialization())
{
if (!type_val->isComparable())
throw Exception(
......@@ -84,14 +88,14 @@ public:
void serialize(ConstAggregateDataPtr __restrict place, WriteBuffer & buf) const override
{
this->data(place).result.write(buf, *type_res);
this->data(place).value.write(buf, *type_val);
this->data(place).result.write(buf, *serialization_res);
this->data(place).value.write(buf, *serialization_val);
}
void deserialize(AggregateDataPtr __restrict place, ReadBuffer & buf, Arena * arena) const override
{
this->data(place).result.read(buf, *type_res, arena);
this->data(place).value.read(buf, *type_val, arena);
this->data(place).result.read(buf, *serialization_res, arena);
this->data(place).value.read(buf, *serialization_val, arena);
}
bool allocatesMemoryInArena() const override { return Data::allocatesMemoryInArena(); }
......
......@@ -55,7 +55,8 @@ class AggregateFunctionGroupArrayInsertAtGeneric final
: public IAggregateFunctionDataHelper<AggregateFunctionGroupArrayInsertAtDataGeneric, AggregateFunctionGroupArrayInsertAtGeneric>
{
private:
DataTypePtr & type;
DataTypePtr type;
SerializationPtr serialization;
Field default_value;
UInt64 length_to_resize = 0; /// zero means - do not do resizing.
......@@ -63,6 +64,7 @@ public:
AggregateFunctionGroupArrayInsertAtGeneric(const DataTypes & arguments, const Array & params)
: IAggregateFunctionDataHelper<AggregateFunctionGroupArrayInsertAtDataGeneric, AggregateFunctionGroupArrayInsertAtGeneric>(arguments, params)
, type(argument_types[0])
, serialization(type->getDefaultSerialization())
{
if (!params.empty())
{
......@@ -154,7 +156,7 @@ public:
else
{
writeBinary(UInt8(0), buf);
type->serializeBinary(elem, buf);
serialization->serializeBinary(elem, buf);
}
}
}
......@@ -175,7 +177,7 @@ public:
UInt8 is_null = 0;
readBinary(is_null, buf);
if (!is_null)
type->deserializeBinary(arr[i], buf);
serialization->deserializeBinary(arr[i], buf);
}
}
......
......@@ -50,14 +50,14 @@ public:
assert_cast<ColVecType &>(to).insertDefault();
}
void write(WriteBuffer & buf, const IDataType & /*data_type*/) const
void write(WriteBuffer & buf, const ISerialization & /*serialization*/) const
{
writeBinary(has(), buf);
if (has())
writeBinary(value, buf);
}
void read(ReadBuffer & buf, const IDataType & /*data_type*/, Arena *)
void read(ReadBuffer & buf, const ISerialization & /*serialization*/, Arena *)
{
readBinary(has_value, buf);
if (has())
......@@ -221,14 +221,14 @@ public:
assert_cast<ColumnString &>(to).insertDefault();
}
void write(WriteBuffer & buf, const IDataType & /*data_type*/) const
void write(WriteBuffer & buf, const ISerialization & /*serialization*/) const
{
writeBinary(size, buf);
if (has())
buf.write(getData(), size);
}
void read(ReadBuffer & buf, const IDataType & /*data_type*/, Arena * arena)
void read(ReadBuffer & buf, const ISerialization & /*serialization*/, Arena * arena)
{
Int32 rhs_size;
readBinary(rhs_size, buf);
......@@ -427,24 +427,24 @@ public:
to.insertDefault();
}
void write(WriteBuffer & buf, const IDataType & data_type) const
void write(WriteBuffer & buf, const ISerialization & serialization) const
{
if (!value.isNull())
{
writeBinary(true, buf);
data_type.serializeBinary(value, buf);
serialization.serializeBinary(value, buf);
}
else
writeBinary(false, buf);
}
void read(ReadBuffer & buf, const IDataType & data_type, Arena *)
void read(ReadBuffer & buf, const ISerialization & serialization, Arena *)
{
bool is_not_null;
readBinary(is_not_null, buf);
if (is_not_null)
data_type.deserializeBinary(value, buf);
serialization.deserializeBinary(value, buf);
}
void change(const IColumn & column, size_t row_num, Arena *)
......@@ -678,15 +678,15 @@ struct AggregateFunctionAnyHeavyData : Data
return false;
}
void write(WriteBuffer & buf, const IDataType & data_type) const
void write(WriteBuffer & buf, const ISerialization & serialization) const
{
Data::write(buf, data_type);
Data::write(buf, serialization);
writeBinary(counter, buf);
}
void read(ReadBuffer & buf, const IDataType & data_type, Arena * arena)
void read(ReadBuffer & buf, const ISerialization & serialization, Arena * arena)
{
Data::read(buf, data_type, arena);
Data::read(buf, serialization, arena);
readBinary(counter, buf);
}
......@@ -698,12 +698,14 @@ template <typename Data>
class AggregateFunctionsSingleValue final : public IAggregateFunctionDataHelper<Data, AggregateFunctionsSingleValue<Data>>
{
private:
DataTypePtr & type;
DataTypePtr type;
SerializationPtr serialization;
public:
AggregateFunctionsSingleValue(const DataTypePtr & type_)
: IAggregateFunctionDataHelper<Data, AggregateFunctionsSingleValue<Data>>({type_}, {})
, type(this->argument_types[0])
, serialization(type->getDefaultSerialization())
{
if (StringRef(Data::name()) == StringRef("min")
|| StringRef(Data::name()) == StringRef("max"))
......@@ -733,12 +735,12 @@ public:
void serialize(ConstAggregateDataPtr __restrict place, WriteBuffer & buf) const override
{
this->data(place).write(buf, *type.get());
this->data(place).write(buf, *serialization);
}
void deserialize(AggregateDataPtr __restrict place, ReadBuffer & buf, Arena * arena) const override
{
this->data(place).read(buf, *type.get(), arena);
this->data(place).read(buf, *serialization, arena);
}
bool allocatesMemoryInArena() const override
......
......@@ -64,7 +64,9 @@ class AggregateFunctionMapBase : public IAggregateFunctionDataHelper<
{
private:
DataTypePtr keys_type;
SerializationPtr keys_serialization;
DataTypes values_types;
Serializations values_serializations;
public:
using Base = IAggregateFunctionDataHelper<
......@@ -72,9 +74,14 @@ public:
AggregateFunctionMapBase(const DataTypePtr & keys_type_,
const DataTypes & values_types_, const DataTypes & argument_types_)
: Base(argument_types_, {} /* parameters */), keys_type(keys_type_),
values_types(values_types_)
: Base(argument_types_, {} /* parameters */)
, keys_type(keys_type_)
, keys_serialization(keys_type->getDefaultSerialization())
, values_types(values_types_)
{
values_serializations.reserve(values_types.size());
for (const auto & type : values_types)
values_serializations.emplace_back(type->getDefaultSerialization());
}
DataTypePtr getReturnType() const override
......@@ -248,9 +255,9 @@ public:
for (const auto & elem : merged_maps)
{
keys_type->serializeBinary(elem.first, buf);
keys_serialization->serializeBinary(elem.first, buf);
for (size_t col = 0; col < values_types.size(); ++col)
values_types[col]->serializeBinary(elem.second[col], buf);
values_serializations[col]->serializeBinary(elem.second[col], buf);
}
}
......@@ -263,12 +270,12 @@ public:
for (size_t i = 0; i < size; ++i)
{
Field key;
keys_type->deserializeBinary(key, buf);
keys_serialization->deserializeBinary(key, buf);
Array values;
values.resize(values_types.size());
for (size_t col = 0; col < values_types.size(); ++col)
values_types[col]->deserializeBinary(values[col], buf);
values_serializations[col]->deserializeBinary(values[col], buf);
if constexpr (IsDecimalNumber<T>)
merged_maps[key.get<DecimalField<T>>()] = values;
......
......@@ -158,7 +158,11 @@ macro(add_object_library name common_path)
list (APPEND all_modules ${name})
add_headers_and_sources(${name} ${common_path})
add_library(${name} SHARED ${${name}_sources} ${${name}_headers})
target_link_libraries (${name} PRIVATE -Wl,--unresolved-symbols=ignore-all)
if (OS_DARWIN)
target_link_libraries (${name} PRIVATE -Wl,-undefined,dynamic_lookup)
else()
target_link_libraries (${name} PRIVATE -Wl,--unresolved-symbols=ignore-all)
endif()
endif ()
endmacro()
......@@ -168,6 +172,7 @@ add_object_library(clickhouse_core_mysql Core/MySQL)
add_object_library(clickhouse_compression Compression)
add_object_library(clickhouse_datastreams DataStreams)
add_object_library(clickhouse_datatypes DataTypes)
add_object_library(clickhouse_datatypes_serializations DataTypes/Serializations)
add_object_library(clickhouse_databases Databases)
add_object_library(clickhouse_databases_mysql Databases/MySQL)
add_object_library(clickhouse_disks Disks)
......@@ -215,7 +220,11 @@ else()
target_link_libraries (clickhouse_interpreters PRIVATE clickhouse_parsers_new jemalloc libdivide)
list (APPEND all_modules dbms)
# force all split libs to be linked
set (CMAKE_SHARED_LINKER_FLAGS "${CMAKE_SHARED_LINKER_FLAGS} -Wl,--no-as-needed")
if (OS_DARWIN)
set (CMAKE_SHARED_LINKER_FLAGS "${CMAKE_SHARED_LINKER_FLAGS} -Wl,-undefined,error")
else()
set (CMAKE_SHARED_LINKER_FLAGS "${CMAKE_SHARED_LINKER_FLAGS} -Wl,--no-as-needed")
endif()
endif ()
macro (dbms_target_include_directories)
......
......@@ -362,9 +362,10 @@ int HedgedConnections::getReadyFileDescriptor(AsyncCallback async_callback)
epoll_event event;
event.data.fd = -1;
size_t events_count = 0;
bool blocking = !static_cast<bool>(async_callback);
while (events_count == 0)
{
events_count = epoll.getManyReady(1, &event, false);
events_count = epoll.getManyReady(1, &event, blocking);
if (!events_count && async_callback)
async_callback(epoll.getFileDescriptor(), 0, epoll.getDescription());
}
......
......@@ -1211,7 +1211,6 @@ ColumnPtr ColumnArray::replicateTuple(const Offsets & replicate_offsets) const
assert_cast<const ColumnArray &>(*temporary_arrays.front()).getOffsetsPtr());
}
void ColumnArray::gather(ColumnGathererStream & gatherer)
{
gatherer.gather(*this);
......
......@@ -536,7 +536,6 @@ void ColumnString::getExtremes(Field & min, Field & max) const
get(max_idx, max);
}
ColumnPtr ColumnString::compress() const
{
size_t source_chars_size = chars.size();
......
......@@ -279,7 +279,6 @@ public:
return typeid(rhs) == typeid(ColumnString);
}
Chars & getChars() { return chars; }
const Chars & getChars() const { return chars; }
......
......@@ -26,6 +26,9 @@ class ColumnGathererStream;
class Field;
class WeakHash32;
class ISerialization;
using SerializationPtr = std::shared_ptr<const ISerialization>;
/*
* Represents a set of equal ranges in previous column to perform sorting in current column.
......
......@@ -4,6 +4,7 @@
#include <Common/PODArray.h>
#include <Columns/IColumn.h>
#include <Columns/ColumnsCommon.h>
#include <Core/Field.h>
namespace DB
......
......@@ -130,7 +130,6 @@ TEST(DateLUTTest, TimeValuesInMiddleOfRange)
EXPECT_EQ(lut.toRelativeQuarterNum(time), 8078 /*unsigned*/);
EXPECT_EQ(lut.toRelativeHourNum(time), 435736 /*time_t*/);
EXPECT_EQ(lut.toRelativeMinuteNum(time), 26144180 /*time_t*/);
EXPECT_EQ(lut.toStartOfHourInterval(time, 5), 1568646000 /*time_t*/);
EXPECT_EQ(lut.toStartOfMinuteInterval(time, 6), 1568650680 /*time_t*/);
EXPECT_EQ(lut.toStartOfSecondInterval(time, 7), 1568650811 /*time_t*/);
EXPECT_EQ(lut.toNumYYYYMM(time), 201909 /*UInt32*/);
......@@ -191,7 +190,6 @@ TEST(DateLUTTest, TimeValuesAtLeftBoderOfRange)
EXPECT_EQ(lut.toRelativeQuarterNum(time), 7880 /*unsigned*/); // ?
EXPECT_EQ(lut.toRelativeHourNum(time), 0 /*time_t*/);
EXPECT_EQ(lut.toRelativeMinuteNum(time), 0 /*time_t*/);
EXPECT_EQ(lut.toStartOfHourInterval(time, 5), 0 /*time_t*/);
EXPECT_EQ(lut.toStartOfMinuteInterval(time, 6), 0 /*time_t*/);
EXPECT_EQ(lut.toStartOfSecondInterval(time, 7), 0 /*time_t*/);
EXPECT_EQ(lut.toNumYYYYMM(time), 197001 /*UInt32*/);
......@@ -253,7 +251,6 @@ TEST(DateLUTTest, TimeValuesAtRightBoderOfRangeOfOldLUT)
EXPECT_EQ(lut.toRelativeQuarterNum(time), 8424 /*unsigned*/);
EXPECT_EQ(lut.toRelativeHourNum(time), 1192873 /*time_t*/);
EXPECT_EQ(lut.toRelativeMinuteNum(time), 71572397 /*time_t*/);
EXPECT_EQ(lut.toStartOfHourInterval(time, 5), 4294332000 /*time_t*/);
EXPECT_EQ(lut.toStartOfMinuteInterval(time, 6), 4294343520 /*time_t*/);
EXPECT_EQ(lut.toStartOfSecondInterval(time, 7), 4294343872 /*time_t*/);
EXPECT_EQ(lut.toNumYYYYMM(time), 210601 /*UInt32*/);
......
......@@ -107,9 +107,9 @@ ASTPtr CompressionCodecFactory::validateCodecAndGetPreprocessedAST(const ASTPtr
if (column_type)
{
CompressionCodecPtr prev_codec;
IDataType::StreamCallback callback = [&](const IDataType::SubstreamPath & substream_path, const IDataType & substream_type)
IDataType::StreamCallbackWithType callback = [&](const ISerialization::SubstreamPath & substream_path, const IDataType & substream_type)
{
if (IDataType::isSpecialCompressionAllowed(substream_path))
if (ISerialization::isSpecialCompressionAllowed(substream_path))
{
result_codec = getImpl(codec_family_name, codec_arguments, &substream_type);
......@@ -121,8 +121,8 @@ ASTPtr CompressionCodecFactory::validateCodecAndGetPreprocessedAST(const ASTPtr
}
};
IDataType::SubstreamPath stream_path;
column_type->enumerateStreams(callback, stream_path);
ISerialization::SubstreamPath stream_path;
column_type->enumerateStreams(column_type->getDefaultSerialization(), callback, stream_path);
if (!result_codec)
throw Exception(ErrorCodes::LOGICAL_ERROR, "Cannot find any substream with data type for type {}. It's a bug", column_type->getName());
......
......@@ -12,7 +12,7 @@ namespace MySQLProtocol
namespace ProtocolText
{
ResultSetRow::ResultSetRow(const DataTypes & data_types, const Columns & columns_, int row_num_)
ResultSetRow::ResultSetRow(const Serializations & serializations, const Columns & columns_, int row_num_)
: columns(columns_), row_num(row_num_)
{
for (size_t i = 0; i < columns.size(); i++)
......@@ -25,7 +25,7 @@ ResultSetRow::ResultSetRow(const DataTypes & data_types, const Columns & columns
else
{
WriteBufferFromOwnString ostr;
data_types[i]->serializeAsText(*columns[i], row_num, ostr, FormatSettings());
serializations[i]->serializeText(*columns[i], row_num, ostr, FormatSettings());
payload_size += getLengthEncodedStringSize(ostr.str());
serialized.push_back(std::move(ostr.str()));
}
......
......@@ -76,7 +76,7 @@ protected:
void writePayloadImpl(WriteBuffer & buffer) const override;
public:
ResultSetRow(const DataTypes & data_types, const Columns & columns_, int row_num_);
ResultSetRow(const Serializations & serializations, const Columns & columns_, int row_num_);
};
class ComFieldList : public LimitedReadPacket
......
......@@ -22,7 +22,9 @@ NameAndTypePair::NameAndTypePair(
: name(name_in_storage_ + (subcolumn_name_.empty() ? "" : "." + subcolumn_name_))
, type(subcolumn_type_)
, type_in_storage(type_in_storage_)
, subcolumn_delimiter_position(name_in_storage_.size()) {}
, subcolumn_delimiter_position(subcolumn_name_.empty() ? std::nullopt : std::make_optional(name_in_storage_.size()))
{
}
String NameAndTypePair::getNameInStorage() const
{
......
......@@ -116,6 +116,7 @@ class IColumn;
M(UInt64, parallel_distributed_insert_select, 0, "Process distributed INSERT SELECT query in the same cluster on local tables on every shard, if 1 SELECT is executed on each shard, if 2 SELECT and INSERT is executed on each shard", 0) \
M(UInt64, distributed_group_by_no_merge, 0, "If 1, Do not merge aggregation states from different servers for distributed query processing - in case it is for certain that there are different keys on different shards. If 2 - same as 1 but also apply ORDER BY and LIMIT stages", 0) \
M(Bool, optimize_distributed_group_by_sharding_key, false, "Optimize GROUP BY sharding_key queries (by avoiding costly aggregation on the initiator server).", 0) \
M(UInt64, optimize_skip_unused_shards_limit, 1000, "Limit for number of sharding key values, turns off optimize_skip_unused_shards if the limit is reached", 0) \
M(Bool, optimize_skip_unused_shards, false, "Assumes that data is distributed by sharding_key. Optimization to skip unused shards if SELECT query filters by sharding_key.", 0) \
M(Bool, allow_nondeterministic_optimize_skip_unused_shards, false, "Allow non-deterministic functions (includes dictGet) in sharding_key for optimize_skip_unused_shards", 0) \
M(UInt64, force_optimize_skip_unused_shards, 0, "Throw an exception if unused shards cannot be skipped (1 - throw only if the table has the sharding key, 2 - always throw.", 0) \
......@@ -215,7 +216,7 @@ class IColumn;
\
M(Bool, insert_distributed_sync, false, "If setting is enabled, insert query into distributed waits until data will be sent to all nodes in cluster.", 0) \
M(UInt64, insert_distributed_timeout, 0, "Timeout for insert query into distributed. Setting is used only with insert_distributed_sync enabled. Zero value means no timeout.", 0) \
M(Int64, distributed_ddl_task_timeout, 180, "Timeout for DDL query responses from all hosts in cluster. If a ddl request has not been performed on all hosts, a response will contain a timeout error and a request will be executed in an async mode. Negative value means infinite.", 0) \
M(Int64, distributed_ddl_task_timeout, 180, "Timeout for DDL query responses from all hosts in cluster. If a ddl request has not been performed on all hosts, a response will contain a timeout error and a request will be executed in an async mode. Negative value means infinite. Zero means async mode.", 0) \
M(Milliseconds, stream_flush_interval_ms, 7500, "Timeout for flushing data from streaming storages.", 0) \
M(Milliseconds, stream_poll_timeout_ms, 500, "Timeout for polling data from/to streaming storages.", 0) \
\
......@@ -437,7 +438,9 @@ class IColumn;
M(Bool, engine_file_truncate_on_insert, false, "Enables or disables truncate before insert in file engine tables", 0) \
M(Bool, allow_experimental_database_replicated, false, "Allow to create databases with Replicated engine", 0) \
M(UInt64, database_replicated_initial_query_timeout_sec, 300, "How long initial DDL query should wait for Replicated database to precess previous DDL queue entries", 0) \
M(Bool, database_replicated_ddl_output, true, "Return table with query execution status as a result of DDL query", 0) \
M(Bool, database_replicated_always_detach_permanently, false, "Execute DETACH TABLE as DETACH TABLE PERMANENTLY if database engine is Replicated", 0) \
M(DistributedDDLOutputMode, distributed_ddl_output_mode, DistributedDDLOutputMode::THROW, "Format of distributed DDL query result", 0) \
M(UInt64, distributed_ddl_entry_format_version, 1, "Version of DDL entry to write into ZooKeeper", 0) \
\
/** Obsolete settings that do nothing but left for compatibility reasons. Remove each one after half a year of obsolescence. */ \
\
......@@ -449,6 +452,7 @@ class IColumn;
M(Bool, optimize_aggregators_of_group_by_keys, true, "Eliminates min/max/any/anyLast aggregators of GROUP BY keys in SELECT section", 0) \
M(Bool, optimize_group_by_function_keys, true, "Eliminates functions of other keys in GROUP BY section", 0) \
M(UInt64, query_plan_max_optimizations_to_apply, 10000, "Limit the total number of optimizations applied to query plan. If zero, ignored. If limit reached, throw exception", 0) \
M(Bool, database_replicated_ddl_output, true, "Obsolete setting, does nothing. Will be removed after 2021-09-08", 0) \
// End of COMMON_SETTINGS
// Please add settings related to formats into the FORMAT_FACTORY_SETTINGS below.
......
......@@ -102,4 +102,10 @@ IMPLEMENT_SETTING_ENUM(UnionMode, ErrorCodes::UNKNOWN_UNION,
{"ALL", UnionMode::ALL},
{"DISTINCT", UnionMode::DISTINCT}})
IMPLEMENT_SETTING_ENUM(DistributedDDLOutputMode, ErrorCodes::BAD_ARGUMENTS,
{{"none", DistributedDDLOutputMode::NONE},
{"throw", DistributedDDLOutputMode::THROW},
{"null_status_on_timeout", DistributedDDLOutputMode::NULL_STATUS_ON_TIMEOUT},
{"never_throw", DistributedDDLOutputMode::NEVER_THROW}})
}
......@@ -138,4 +138,15 @@ enum class UnionMode
DECLARE_SETTING_ENUM(UnionMode)
enum class DistributedDDLOutputMode
{
NONE,
THROW,
NULL_STATUS_ON_TIMEOUT,
NEVER_THROW,
};
DECLARE_SETTING_ENUM(DistributedDDLOutputMode)
}
......@@ -73,14 +73,16 @@ void NativeBlockInputStream::resetParser()
void NativeBlockInputStream::readData(const IDataType & type, ColumnPtr & column, ReadBuffer & istr, size_t rows, double avg_value_size_hint)
{
IDataType::DeserializeBinaryBulkSettings settings;
settings.getter = [&](IDataType::SubstreamPath) -> ReadBuffer * { return &istr; };
ISerialization::DeserializeBinaryBulkSettings settings;
settings.getter = [&](ISerialization::SubstreamPath) -> ReadBuffer * { return &istr; };
settings.avg_value_size_hint = avg_value_size_hint;
settings.position_independent_encoding = false;
IDataType::DeserializeBinaryBulkStatePtr state;
type.deserializeBinaryBulkStatePrefix(settings, state);
type.deserializeBinaryBulkWithMultipleStreams(column, rows, settings, state);
ISerialization::DeserializeBinaryBulkStatePtr state;
auto serialization = type.getDefaultSerialization();
serialization->deserializeBinaryBulkStatePrefix(settings, state);
serialization->deserializeBinaryBulkWithMultipleStreams(column, rows, settings, state, nullptr);
if (column->size() != rows)
throw Exception("Cannot read all data in NativeBlockInputStream. Rows read: " + toString(column->size()) + ". Rows expected: " + toString(rows) + ".",
......
......@@ -48,15 +48,17 @@ void NativeBlockOutputStream::writeData(const IDataType & type, const ColumnPtr
*/
ColumnPtr full_column = column->convertToFullColumnIfConst();
IDataType::SerializeBinaryBulkSettings settings;
settings.getter = [&ostr](IDataType::SubstreamPath) -> WriteBuffer * { return &ostr; };
ISerialization::SerializeBinaryBulkSettings settings;
settings.getter = [&ostr](ISerialization::SubstreamPath) -> WriteBuffer * { return &ostr; };
settings.position_independent_encoding = false;
settings.low_cardinality_max_dictionary_size = 0;
IDataType::SerializeBinaryBulkStatePtr state;
type.serializeBinaryBulkStatePrefix(settings, state);
type.serializeBinaryBulkWithMultipleStreams(*full_column, offset, limit, settings, state);
type.serializeBinaryBulkStateSuffix(settings, state);
auto serialization = type.getDefaultSerialization();
ISerialization::SerializeBinaryBulkStatePtr state;
serialization->serializeBinaryBulkStatePrefix(settings, state);
serialization->serializeBinaryBulkWithMultipleStreams(*full_column, offset, limit, settings, state);
serialization->serializeBinaryBulkStateSuffix(settings, state);
}
......
......@@ -176,7 +176,7 @@ void PostgreSQLBlockInputStream::insertValue(IColumn & column, std::string_view
case ValueType::vtDecimal256:
{
ReadBufferFromString istr(value);
data_type->deserializeAsWholeText(column, istr, FormatSettings{});
data_type->getDefaultSerialization()->deserializeWholeText(column, istr, FormatSettings{});
break;
}
case ValueType::vtArray:
......
......@@ -104,7 +104,7 @@ void RemoteQueryExecutorReadContext::setConnectionFD(int fd, const Poco::Timespa
connection_fd_description = fd_description;
}
bool RemoteQueryExecutorReadContext::checkTimeout(bool blocking) const
bool RemoteQueryExecutorReadContext::checkTimeout(bool blocking)
{
try
{
......@@ -118,7 +118,7 @@ bool RemoteQueryExecutorReadContext::checkTimeout(bool blocking) const
}
}
bool RemoteQueryExecutorReadContext::checkTimeoutImpl(bool blocking) const
bool RemoteQueryExecutorReadContext::checkTimeoutImpl(bool blocking)
{
/// Wait for epoll will not block if it was polled externally.
epoll_event events[3];
......@@ -128,14 +128,13 @@ bool RemoteQueryExecutorReadContext::checkTimeoutImpl(bool blocking) const
bool is_socket_ready = false;
bool is_pipe_alarmed = false;
bool has_timer_alarm = false;
for (int i = 0; i < num_events; ++i)
{
if (events[i].data.fd == connection_fd)
is_socket_ready = true;
if (events[i].data.fd == timer.getDescriptor())
has_timer_alarm = true;
is_timer_alarmed = true;
if (events[i].data.fd == pipe_fd[0])
is_pipe_alarmed = true;
}
......@@ -143,7 +142,7 @@ bool RemoteQueryExecutorReadContext::checkTimeoutImpl(bool blocking) const
if (is_pipe_alarmed)
return false;
if (has_timer_alarm && !is_socket_ready)
if (is_timer_alarmed && !is_socket_ready)
{
/// Socket receive timeout. Drain it in case or error, or it may be hide by timeout exception.
timer.drain();
......@@ -188,10 +187,18 @@ void RemoteQueryExecutorReadContext::cancel()
/// It is safe to just destroy fiber - we are not in the process of reading from socket.
boost::context::fiber to_destroy = std::move(fiber);
while (is_read_in_progress.load(std::memory_order_relaxed))
/// One should not try to wait for the current packet here in case of
/// timeout because this will exceed the timeout.
/// Anyway if the timeout is exceeded, then the connection will be shutdown
/// (disconnected), so it will not left in an unsynchronised state.
if (!is_timer_alarmed)
{
checkTimeout(/* blocking= */ true);
to_destroy = std::move(to_destroy).resume();
/// Wait for current pending packet, to avoid leaving connection in unsynchronised state.
while (is_read_in_progress.load(std::memory_order_relaxed))
{
checkTimeout(/* blocking= */ true);
to_destroy = std::move(to_destroy).resume();
}
}
/// Send something to pipe to cancel executor waiting.
......
......@@ -44,6 +44,7 @@ public:
/// * pipe_fd is a pipe we use to cancel query and socket polling by executor.
/// We put those descriptors into our own epoll which is used by external executor.
TimerDescriptor timer{CLOCK_MONOTONIC, 0};
bool is_timer_alarmed = false;
int connection_fd = -1;
int pipe_fd[2] = { -1, -1 };
......@@ -54,8 +55,8 @@ public:
explicit RemoteQueryExecutorReadContext(IConnections & connections_);
~RemoteQueryExecutorReadContext();
bool checkTimeout(bool blocking = false) const;
bool checkTimeoutImpl(bool blocking) const;
bool checkTimeout(bool blocking = false);
bool checkTimeoutImpl(bool blocking);
void setConnectionFD(int fd, const Poco::Timespan & timeout = 0, const std::string & fd_description = "");
void setTimer() const;
......
......@@ -11,6 +11,7 @@
#include <Formats/FormatSettings.h>
#include <DataTypes/DataTypeAggregateFunction.h>
#include <DataTypes/Serializations/SerializationAggregateFunction.h>
#include <DataTypes/DataTypeFactory.h>
#include <IO/WriteBufferFromString.h>
#include <IO/Operators.h>
......@@ -58,207 +59,6 @@ std::string DataTypeAggregateFunction::doGetName() const
return stream.str();
}
void DataTypeAggregateFunction::serializeBinary(const Field & field, WriteBuffer & ostr) const
{
const String & s = get<const String &>(field);
writeVarUInt(s.size(), ostr);
writeString(s, ostr);
}
void DataTypeAggregateFunction::deserializeBinary(Field & field, ReadBuffer & istr) const
{
UInt64 size;
readVarUInt(size, istr);
field = String();
String & s = get<String &>(field);
s.resize(size);
istr.readStrict(s.data(), size);
}
void DataTypeAggregateFunction::serializeBinary(const IColumn & column, size_t row_num, WriteBuffer & ostr) const
{
function->serialize(assert_cast<const ColumnAggregateFunction &>(column).getData()[row_num], ostr);
}
void DataTypeAggregateFunction::deserializeBinary(IColumn & column, ReadBuffer & istr) const
{
ColumnAggregateFunction & column_concrete = assert_cast<ColumnAggregateFunction &>(column);
Arena & arena = column_concrete.createOrGetArena();
size_t size_of_state = function->sizeOfData();
AggregateDataPtr place = arena.alignedAlloc(size_of_state, function->alignOfData());
function->create(place);
try
{
function->deserialize(place, istr, &arena);
}
catch (...)
{
function->destroy(place);
throw;
}
column_concrete.getData().push_back(place);
}
void DataTypeAggregateFunction::serializeBinaryBulk(const IColumn & column, WriteBuffer & ostr, size_t offset, size_t limit) const
{
const ColumnAggregateFunction & real_column = typeid_cast<const ColumnAggregateFunction &>(column);
const ColumnAggregateFunction::Container & vec = real_column.getData();
ColumnAggregateFunction::Container::const_iterator it = vec.begin() + offset;
ColumnAggregateFunction::Container::const_iterator end = limit ? it + limit : vec.end();
if (end > vec.end())
end = vec.end();
for (; it != end; ++it)
function->serialize(*it, ostr);
}
void DataTypeAggregateFunction::deserializeBinaryBulk(IColumn & column, ReadBuffer & istr, size_t limit, double /*avg_value_size_hint*/) const
{
ColumnAggregateFunction & real_column = typeid_cast<ColumnAggregateFunction &>(column);
ColumnAggregateFunction::Container & vec = real_column.getData();
Arena & arena = real_column.createOrGetArena();
real_column.set(function);
vec.reserve(vec.size() + limit);
size_t size_of_state = function->sizeOfData();
size_t align_of_state = function->alignOfData();
for (size_t i = 0; i < limit; ++i)
{
if (istr.eof())
break;
AggregateDataPtr place = arena.alignedAlloc(size_of_state, align_of_state);
function->create(place);
try
{
function->deserialize(place, istr, &arena);
}
catch (...)
{
function->destroy(place);
throw;
}
vec.push_back(place);
}
}
static String serializeToString(const AggregateFunctionPtr & function, const IColumn & column, size_t row_num)
{
WriteBufferFromOwnString buffer;
function->serialize(assert_cast<const ColumnAggregateFunction &>(column).getData()[row_num], buffer);
return buffer.str();
}
static void deserializeFromString(const AggregateFunctionPtr & function, IColumn & column, const String & s)
{
ColumnAggregateFunction & column_concrete = assert_cast<ColumnAggregateFunction &>(column);
Arena & arena = column_concrete.createOrGetArena();
size_t size_of_state = function->sizeOfData();
AggregateDataPtr place = arena.alignedAlloc(size_of_state, function->alignOfData());
function->create(place);
try
{
ReadBufferFromString istr(s);
function->deserialize(place, istr, &arena);
}
catch (...)
{
function->destroy(place);
throw;
}
column_concrete.getData().push_back(place);
}
void DataTypeAggregateFunction::serializeText(const IColumn & column, size_t row_num, WriteBuffer & ostr, const FormatSettings &) const
{
writeString(serializeToString(function, column, row_num), ostr);
}
void DataTypeAggregateFunction::serializeTextEscaped(const IColumn & column, size_t row_num, WriteBuffer & ostr, const FormatSettings &) const
{
writeEscapedString(serializeToString(function, column, row_num), ostr);
}
void DataTypeAggregateFunction::deserializeTextEscaped(IColumn & column, ReadBuffer & istr, const FormatSettings &) const
{
String s;
readEscapedString(s, istr);
deserializeFromString(function, column, s);
}
void DataTypeAggregateFunction::serializeTextQuoted(const IColumn & column, size_t row_num, WriteBuffer & ostr, const FormatSettings &) const
{
writeQuotedString(serializeToString(function, column, row_num), ostr);
}
void DataTypeAggregateFunction::deserializeTextQuoted(IColumn & column, ReadBuffer & istr, const FormatSettings &) const
{
String s;
readQuotedStringWithSQLStyle(s, istr);
deserializeFromString(function, column, s);
}
void DataTypeAggregateFunction::deserializeWholeText(IColumn & column, ReadBuffer & istr, const FormatSettings &) const
{
String s;
readStringUntilEOF(s, istr);
deserializeFromString(function, column, s);
}
void DataTypeAggregateFunction::serializeTextJSON(const IColumn & column, size_t row_num, WriteBuffer & ostr, const FormatSettings & settings) const
{
writeJSONString(serializeToString(function, column, row_num), ostr, settings);
}
void DataTypeAggregateFunction::deserializeTextJSON(IColumn & column, ReadBuffer & istr, const FormatSettings &) const
{
String s;
readJSONString(s, istr);
deserializeFromString(function, column, s);
}
void DataTypeAggregateFunction::serializeTextXML(const IColumn & column, size_t row_num, WriteBuffer & ostr, const FormatSettings &) const
{
writeXMLStringForTextElement(serializeToString(function, column, row_num), ostr);
}
void DataTypeAggregateFunction::serializeTextCSV(const IColumn & column, size_t row_num, WriteBuffer & ostr, const FormatSettings &) const
{
writeCSV(serializeToString(function, column, row_num), ostr);
}
void DataTypeAggregateFunction::deserializeTextCSV(IColumn & column, ReadBuffer & istr, const FormatSettings & settings) const
{
String s;
readCSV(s, istr, settings.csv);
deserializeFromString(function, column, s);
}
MutableColumnPtr DataTypeAggregateFunction::createColumn() const
{
return ColumnAggregateFunction::create(function);
......@@ -298,6 +98,11 @@ bool DataTypeAggregateFunction::equals(const IDataType & rhs) const
return typeid(rhs) == typeid(*this) && getName() == rhs.getName();
}
SerializationPtr DataTypeAggregateFunction::doGetDefaultSerialization() const
{
return std::make_shared<SerializationAggregateFunction>(function);
}
static DataTypePtr create(const ASTPtr & arguments)
{
......
......@@ -39,27 +39,6 @@ public:
DataTypePtr getReturnTypeToPredict() const { return function->getReturnTypeToPredict(); }
DataTypes getArgumentsDataTypes() const { return argument_types; }
/// NOTE These two functions for serializing single values are incompatible with the functions below.
void serializeBinary(const Field & field, WriteBuffer & ostr) const override;
void deserializeBinary(Field & field, ReadBuffer & istr) const override;
void serializeBinary(const IColumn & column, size_t row_num, WriteBuffer & ostr) const override;
void deserializeBinary(IColumn & column, ReadBuffer & istr) const override;
void serializeBinaryBulk(const IColumn & column, WriteBuffer & ostr, size_t offset, size_t limit) const override;
void deserializeBinaryBulk(IColumn & column, ReadBuffer & istr, size_t limit, double avg_value_size_hint) const override;
void serializeText(const IColumn & column, size_t row_num, WriteBuffer & ostr, const FormatSettings &) const override;
void serializeTextEscaped(const IColumn & column, size_t row_num, WriteBuffer & ostr, const FormatSettings &) const override;
void deserializeTextEscaped(IColumn & column, ReadBuffer & istr, const FormatSettings &) const override;
void serializeTextQuoted(const IColumn & column, size_t row_num, WriteBuffer & ostr, const FormatSettings &) const override;
void deserializeTextQuoted(IColumn & column, ReadBuffer & istr, const FormatSettings &) const override;
void deserializeWholeText(IColumn & column, ReadBuffer & istr, const FormatSettings &) const override;
void serializeTextJSON(const IColumn & column, size_t row_num, WriteBuffer & ostr, const FormatSettings &) const override;
void deserializeTextJSON(IColumn & column, ReadBuffer & istr, const FormatSettings &) const override;
void serializeTextXML(const IColumn & column, size_t row_num, WriteBuffer & ostr, const FormatSettings &) const override;
void serializeTextCSV(const IColumn & column, size_t row_num, WriteBuffer & ostr, const FormatSettings &) const override;
void deserializeTextCSV(IColumn & column, ReadBuffer & istr, const FormatSettings & settings) const override;
MutableColumnPtr createColumn() const override;
Field getDefault() const override;
......@@ -69,6 +48,8 @@ public:
bool isParametric() const override { return true; }
bool haveSubtypes() const override { return false; }
bool shouldAlignRightInPrettyFormats() const override { return false; }
SerializationPtr doGetDefaultSerialization() const override;
};
......
......@@ -9,7 +9,9 @@
#include <DataTypes/DataTypesNumber.h>
#include <DataTypes/DataTypeArray.h>
#include <DataTypes/DataTypeFactory.h>
#include <DataTypes/DataTypeOneElementTuple.h>
#include <DataTypes/Serializations/SerializationArray.h>
#include <DataTypes/Serializations/SerializationTupleElement.h>
#include <DataTypes/Serializations/SerializationNumber.h>
#include <Parsers/IAST.h>
......@@ -24,10 +26,7 @@ namespace DB
namespace ErrorCodes
{
extern const int CANNOT_READ_ALL_DATA;
extern const int CANNOT_READ_ARRAY_FROM_TEXT;
extern const int NUMBER_OF_ARGUMENTS_DOESNT_MATCH;
extern const int LOGICAL_ERROR;
}
......@@ -37,490 +36,6 @@ DataTypeArray::DataTypeArray(const DataTypePtr & nested_)
}
void DataTypeArray::serializeBinary(const Field & field, WriteBuffer & ostr) const
{
const Array & a = get<const Array &>(field);
writeVarUInt(a.size(), ostr);
for (size_t i = 0; i < a.size(); ++i)
{
nested->serializeBinary(a[i], ostr);
}
}
void DataTypeArray::deserializeBinary(Field & field, ReadBuffer & istr) const
{
size_t size;
readVarUInt(size, istr);
field = Array(size);
Array & arr = get<Array &>(field);
for (size_t i = 0; i < size; ++i)
nested->deserializeBinary(arr[i], istr);
}
void DataTypeArray::serializeBinary(const IColumn & column, size_t row_num, WriteBuffer & ostr) const
{
const ColumnArray & column_array = assert_cast<const ColumnArray &>(column);
const ColumnArray::Offsets & offsets = column_array.getOffsets();
size_t offset = offsets[row_num - 1];
size_t next_offset = offsets[row_num];
size_t size = next_offset - offset;
writeVarUInt(size, ostr);
const IColumn & nested_column = column_array.getData();
for (size_t i = offset; i < next_offset; ++i)
nested->serializeBinary(nested_column, i, ostr);
}
void DataTypeArray::deserializeBinary(IColumn & column, ReadBuffer & istr) const
{
ColumnArray & column_array = assert_cast<ColumnArray &>(column);
ColumnArray::Offsets & offsets = column_array.getOffsets();
size_t size;
readVarUInt(size, istr);
IColumn & nested_column = column_array.getData();
size_t i = 0;
try
{
for (; i < size; ++i)
nested->deserializeBinary(nested_column, istr);
}
catch (...)
{
if (i)
nested_column.popBack(i);
throw;
}
offsets.push_back(offsets.back() + size);
}
namespace
{
void serializeArraySizesPositionIndependent(const IColumn & column, WriteBuffer & ostr, UInt64 offset, UInt64 limit)
{
const ColumnArray & column_array = typeid_cast<const ColumnArray &>(column);
const ColumnArray::Offsets & offset_values = column_array.getOffsets();
size_t size = offset_values.size();
if (!size)
return;
size_t end = limit && (offset + limit < size)
? offset + limit
: size;
ColumnArray::Offset prev_offset = offset_values[offset - 1];
for (size_t i = offset; i < end; ++i)
{
ColumnArray::Offset current_offset = offset_values[i];
writeIntBinary(current_offset - prev_offset, ostr);
prev_offset = current_offset;
}
}
void deserializeArraySizesPositionIndependent(IColumn & column, ReadBuffer & istr, UInt64 limit)
{
ColumnArray & column_array = typeid_cast<ColumnArray &>(column);
ColumnArray::Offsets & offset_values = column_array.getOffsets();
size_t initial_size = offset_values.size();
offset_values.resize(initial_size + limit);
size_t i = initial_size;
ColumnArray::Offset current_offset = initial_size ? offset_values[initial_size - 1] : 0;
while (i < initial_size + limit && !istr.eof())
{
ColumnArray::Offset current_size = 0;
readIntBinary(current_size, istr);
current_offset += current_size;
offset_values[i] = current_offset;
++i;
}
offset_values.resize(i);
}
ColumnPtr arrayOffsetsToSizes(const IColumn & column)
{
const auto & column_offsets = assert_cast<const ColumnArray::ColumnOffsets &>(column);
MutableColumnPtr column_sizes = column_offsets.cloneEmpty();
if (column_offsets.empty())
return column_sizes;
const auto & offsets_data = column_offsets.getData();
auto & sizes_data = assert_cast<ColumnArray::ColumnOffsets &>(*column_sizes).getData();
sizes_data.resize(offsets_data.size());
IColumn::Offset prev_offset = 0;
for (size_t i = 0, size = offsets_data.size(); i < size; ++i)
{
auto current_offset = offsets_data[i];
sizes_data[i] = current_offset - prev_offset;
prev_offset = current_offset;
}
return column_sizes;
}
ColumnPtr arraySizesToOffsets(const IColumn & column)
{
const auto & column_sizes = assert_cast<const ColumnArray::ColumnOffsets &>(column);
MutableColumnPtr column_offsets = column_sizes.cloneEmpty();
if (column_sizes.empty())
return column_offsets;
const auto & sizes_data = column_sizes.getData();
auto & offsets_data = assert_cast<ColumnArray::ColumnOffsets &>(*column_offsets).getData();
offsets_data.resize(sizes_data.size());
IColumn::Offset prev_offset = 0;
for (size_t i = 0, size = sizes_data.size(); i < size; ++i)
{
prev_offset += sizes_data[i];
offsets_data[i] = prev_offset;
}
return column_offsets;
}
}
void DataTypeArray::enumerateStreamsImpl(const StreamCallback & callback, SubstreamPath & path) const
{
path.push_back(Substream::ArraySizes);
callback(path, *this);
path.back() = Substream::ArrayElements;
nested->enumerateStreams(callback, path);
path.pop_back();
}
void DataTypeArray::serializeBinaryBulkStatePrefixImpl(
SerializeBinaryBulkSettings & settings,
SerializeBinaryBulkStatePtr & state) const
{
settings.path.push_back(Substream::ArrayElements);
nested->serializeBinaryBulkStatePrefix(settings, state);
settings.path.pop_back();
}
void DataTypeArray::serializeBinaryBulkStateSuffixImpl(
SerializeBinaryBulkSettings & settings,
SerializeBinaryBulkStatePtr & state) const
{
settings.path.push_back(Substream::ArrayElements);
nested->serializeBinaryBulkStateSuffix(settings, state);
settings.path.pop_back();
}
void DataTypeArray::deserializeBinaryBulkStatePrefixImpl(
DeserializeBinaryBulkSettings & settings,
DeserializeBinaryBulkStatePtr & state) const
{
settings.path.push_back(Substream::ArrayElements);
nested->deserializeBinaryBulkStatePrefix(settings, state);
settings.path.pop_back();
}
void DataTypeArray::serializeBinaryBulkWithMultipleStreamsImpl(
const IColumn & column,
size_t offset,
size_t limit,
SerializeBinaryBulkSettings & settings,
SerializeBinaryBulkStatePtr & state) const
{
const ColumnArray & column_array = typeid_cast<const ColumnArray &>(column);
/// First serialize array sizes.
settings.path.push_back(Substream::ArraySizes);
if (auto * stream = settings.getter(settings.path))
{
if (settings.position_independent_encoding)
serializeArraySizesPositionIndependent(column, *stream, offset, limit);
else
DataTypeNumber<ColumnArray::Offset>().serializeBinaryBulk(*column_array.getOffsetsPtr(), *stream, offset, limit);
}
/// Then serialize contents of arrays.
settings.path.back() = Substream::ArrayElements;
const ColumnArray::Offsets & offset_values = column_array.getOffsets();
if (offset > offset_values.size())
return;
/** offset - from which array to write.
* limit - how many arrays should be written, or 0, if you write everything that is.
* end - up to which array the recorded piece ends.
*
* nested_offset - from which element of the innards to write.
* nested_limit - how many elements of the innards to write, or 0, if you write everything that is.
*/
size_t end = std::min(offset + limit, offset_values.size());
size_t nested_offset = offset ? offset_values[offset - 1] : 0;
size_t nested_limit = limit
? offset_values[end - 1] - nested_offset
: 0;
if (limit == 0 || nested_limit)
nested->serializeBinaryBulkWithMultipleStreams(column_array.getData(), nested_offset, nested_limit, settings, state);
settings.path.pop_back();
}
void DataTypeArray::deserializeBinaryBulkWithMultipleStreamsImpl(
IColumn & column,
size_t limit,
DeserializeBinaryBulkSettings & settings,
DeserializeBinaryBulkStatePtr & state,
SubstreamsCache * cache) const
{
ColumnArray & column_array = typeid_cast<ColumnArray &>(column);
settings.path.push_back(Substream::ArraySizes);
if (auto cached_column = getFromSubstreamsCache(cache, settings.path))
{
column_array.getOffsetsPtr() = arraySizesToOffsets(*cached_column);
}
else if (auto * stream = settings.getter(settings.path))
{
if (settings.position_independent_encoding)
deserializeArraySizesPositionIndependent(column, *stream, limit);
else
DataTypeNumber<ColumnArray::Offset>().deserializeBinaryBulk(column_array.getOffsetsColumn(), *stream, limit, 0);
addToSubstreamsCache(cache, settings.path, arrayOffsetsToSizes(column_array.getOffsetsColumn()));
}
settings.path.back() = Substream::ArrayElements;
ColumnArray::Offsets & offset_values = column_array.getOffsets();
ColumnPtr & nested_column = column_array.getDataPtr();
/// Number of values corresponding with `offset_values` must be read.
size_t last_offset = offset_values.back();
if (last_offset < nested_column->size())
throw Exception("Nested column is longer than last offset", ErrorCodes::LOGICAL_ERROR);
size_t nested_limit = last_offset - nested_column->size();
/// Adjust value size hint. Divide it to the average array size.
settings.avg_value_size_hint = nested_limit ? settings.avg_value_size_hint / nested_limit * offset_values.size() : 0;
nested->deserializeBinaryBulkWithMultipleStreams(nested_column, nested_limit, settings, state, cache);
settings.path.pop_back();
/// Check consistency between offsets and elements subcolumns.
/// But if elements column is empty - it's ok for columns of Nested types that was added by ALTER.
if (!nested_column->empty() && nested_column->size() != last_offset)
throw ParsingException("Cannot read all array values: read just " + toString(nested_column->size()) + " of " + toString(last_offset),
ErrorCodes::CANNOT_READ_ALL_DATA);
}
template <typename Writer>
static void serializeTextImpl(const IColumn & column, size_t row_num, WriteBuffer & ostr, Writer && write_nested)
{
const ColumnArray & column_array = assert_cast<const ColumnArray &>(column);
const ColumnArray::Offsets & offsets = column_array.getOffsets();
size_t offset = offsets[row_num - 1];
size_t next_offset = offsets[row_num];
const IColumn & nested_column = column_array.getData();
writeChar('[', ostr);
for (size_t i = offset; i < next_offset; ++i)
{
if (i != offset)
writeChar(',', ostr);
write_nested(nested_column, i);
}
writeChar(']', ostr);
}
template <typename Reader>
static void deserializeTextImpl(IColumn & column, ReadBuffer & istr, Reader && read_nested, bool allow_unenclosed)
{
ColumnArray & column_array = assert_cast<ColumnArray &>(column);
ColumnArray::Offsets & offsets = column_array.getOffsets();
IColumn & nested_column = column_array.getData();
size_t size = 0;
bool has_braces = false;
if (checkChar('[', istr))
has_braces = true;
else if (!allow_unenclosed)
throw Exception(ErrorCodes::CANNOT_READ_ARRAY_FROM_TEXT, "Array does not start with '[' character");
try
{
bool first = true;
while (!istr.eof() && *istr.position() != ']')
{
if (!first)
{
if (*istr.position() == ',')
++istr.position();
else
throw ParsingException(ErrorCodes::CANNOT_READ_ARRAY_FROM_TEXT,
"Cannot read array from text, expected comma or end of array, found '{}'",
*istr.position());
}
first = false;
skipWhitespaceIfAny(istr);
if (*istr.position() == ']')
break;
read_nested(nested_column);
++size;
skipWhitespaceIfAny(istr);
}
if (has_braces)
assertChar(']', istr);
else /// If array is not enclosed in braces, we read until EOF.
assertEOF(istr);
}
catch (...)
{
if (size)
nested_column.popBack(size);
throw;
}
offsets.push_back(offsets.back() + size);
}
void DataTypeArray::serializeText(const IColumn & column, size_t row_num, WriteBuffer & ostr, const FormatSettings & settings) const
{
serializeTextImpl(column, row_num, ostr,
[&](const IColumn & nested_column, size_t i)
{
nested->serializeAsTextQuoted(nested_column, i, ostr, settings);
});
}
void DataTypeArray::deserializeText(IColumn & column, ReadBuffer & istr, const FormatSettings & settings) const
{
deserializeTextImpl(column, istr,
[&](IColumn & nested_column)
{
nested->deserializeAsTextQuoted(nested_column, istr, settings);
}, false);
}
void DataTypeArray::serializeTextJSON(const IColumn & column, size_t row_num, WriteBuffer & ostr, const FormatSettings & settings) const
{
const ColumnArray & column_array = assert_cast<const ColumnArray &>(column);
const ColumnArray::Offsets & offsets = column_array.getOffsets();
size_t offset = offsets[row_num - 1];
size_t next_offset = offsets[row_num];
const IColumn & nested_column = column_array.getData();
writeChar('[', ostr);
for (size_t i = offset; i < next_offset; ++i)
{
if (i != offset)
writeChar(',', ostr);
nested->serializeAsTextJSON(nested_column, i, ostr, settings);
}
writeChar(']', ostr);
}
void DataTypeArray::deserializeTextJSON(IColumn & column, ReadBuffer & istr, const FormatSettings & settings) const
{
deserializeTextImpl(column, istr,
[&](IColumn & nested_column)
{
nested->deserializeAsTextJSON(nested_column, istr, settings);
}, false);
}
void DataTypeArray::serializeTextXML(const IColumn & column, size_t row_num, WriteBuffer & ostr, const FormatSettings & settings) const
{
const ColumnArray & column_array = assert_cast<const ColumnArray &>(column);
const ColumnArray::Offsets & offsets = column_array.getOffsets();
size_t offset = offsets[row_num - 1];
size_t next_offset = offsets[row_num];
const IColumn & nested_column = column_array.getData();
writeCString("<array>", ostr);
for (size_t i = offset; i < next_offset; ++i)
{
writeCString("<elem>", ostr);
nested->serializeAsTextXML(nested_column, i, ostr, settings);
writeCString("</elem>", ostr);
}
writeCString("</array>", ostr);
}
void DataTypeArray::serializeTextCSV(const IColumn & column, size_t row_num, WriteBuffer & ostr, const FormatSettings & settings) const
{
/// There is no good way to serialize an array in CSV. Therefore, we serialize it into a string, and then write the resulting string in CSV.
WriteBufferFromOwnString wb;
serializeText(column, row_num, wb, settings);
writeCSV(wb.str(), ostr);
}
void DataTypeArray::deserializeTextCSV(IColumn & column, ReadBuffer & istr, const FormatSettings & settings) const
{
String s;
readCSV(s, istr, settings.csv);
ReadBufferFromString rb(s);
if (settings.csv.input_format_arrays_as_nested_csv)
{
deserializeTextImpl(column, rb,
[&](IColumn & nested_column)
{
nested->deserializeAsTextCSV(nested_column, rb, settings);
}, true);
}
else
{
deserializeTextImpl(column, rb,
[&](IColumn & nested_column)
{
nested->deserializeAsTextQuoted(nested_column, rb, settings);
}, true);
}
}
MutableColumnPtr DataTypeArray::createColumn() const
{
return ColumnArray::create(nested->createColumn(), ColumnArray::ColumnOffsets::create());
......@@ -546,7 +61,7 @@ DataTypePtr DataTypeArray::tryGetSubcolumnType(const String & subcolumn_name) co
DataTypePtr DataTypeArray::tryGetSubcolumnTypeImpl(const String & subcolumn_name, size_t level) const
{
if (subcolumn_name == "size" + std::to_string(level))
return createOneElementTuple(std::make_shared<DataTypeUInt64>(), subcolumn_name, false);
return std::make_shared<DataTypeUInt64>();
DataTypePtr subcolumn;
if (const auto * nested_array = typeid_cast<const DataTypeArray *>(nested.get()))
......@@ -554,7 +69,10 @@ DataTypePtr DataTypeArray::tryGetSubcolumnTypeImpl(const String & subcolumn_name
else
subcolumn = nested->tryGetSubcolumnType(subcolumn_name);
return (subcolumn ? std::make_shared<DataTypeArray>(std::move(subcolumn)) : subcolumn);
if (subcolumn && subcolumn_name != MAIN_SUBCOLUMN_NAME)
subcolumn = std::make_shared<DataTypeArray>(std::move(subcolumn));
return subcolumn;
}
ColumnPtr DataTypeArray::getSubcolumn(const String & subcolumn_name, const IColumn & column) const
......@@ -577,6 +95,32 @@ ColumnPtr DataTypeArray::getSubcolumnImpl(const String & subcolumn_name, const I
return ColumnArray::create(subcolumn, column_array.getOffsetsPtr());
}
SerializationPtr DataTypeArray::getSubcolumnSerialization(
const String & subcolumn_name, const BaseSerializationGetter & base_serialization_getter) const
{
return getSubcolumnSerializationImpl(subcolumn_name, base_serialization_getter, 0);
}
SerializationPtr DataTypeArray::getSubcolumnSerializationImpl(
const String & subcolumn_name, const BaseSerializationGetter & base_serialization_getter, size_t level) const
{
if (subcolumn_name == "size" + std::to_string(level))
return std::make_shared<SerializationTupleElement>(base_serialization_getter(DataTypeUInt64()), subcolumn_name, false);
SerializationPtr subcolumn;
if (const auto * nested_array = typeid_cast<const DataTypeArray *>(nested.get()))
subcolumn = nested_array->getSubcolumnSerializationImpl(subcolumn_name, base_serialization_getter, level + 1);
else
subcolumn = nested->getSubcolumnSerialization(subcolumn_name, base_serialization_getter);
return std::make_shared<SerializationArray>(subcolumn);
}
SerializationPtr DataTypeArray::doGetDefaultSerialization() const
{
return std::make_shared<SerializationArray>(nested->getDefaultSerialization());
}
size_t DataTypeArray::getNumberOfDimensions() const
{
const DataTypeArray * nested_array = typeid_cast<const DataTypeArray *>(nested.get());
......
#pragma once
#include <DataTypes/DataTypeWithSimpleSerialization.h>
#include <DataTypes/IDataType.h>
#include <DataTypes/Serializations/SerializationArray.h>
namespace DB
{
class DataTypeArray final : public DataTypeWithSimpleSerialization
class DataTypeArray final : public IDataType
{
private:
/// The type of array elements.
......@@ -35,56 +36,6 @@ public:
return false;
}
void serializeBinary(const Field & field, WriteBuffer & ostr) const override;
void deserializeBinary(Field & field, ReadBuffer & istr) const override;
void serializeBinary(const IColumn & column, size_t row_num, WriteBuffer & ostr) const override;
void deserializeBinary(IColumn & column, ReadBuffer & istr) const override;
void serializeText(const IColumn & column, size_t row_num, WriteBuffer & ostr, const FormatSettings &) const override;
void deserializeText(IColumn & column, ReadBuffer & istr, const FormatSettings &) const override;
void serializeTextJSON(const IColumn & column, size_t row_num, WriteBuffer & ostr, const FormatSettings &) const override;
void deserializeTextJSON(IColumn & column, ReadBuffer & istr, const FormatSettings &) const override;
void serializeTextXML(const IColumn & column, size_t row_num, WriteBuffer & ostr, const FormatSettings &) const override;
void serializeTextCSV(const IColumn & column, size_t row_num, WriteBuffer & ostr, const FormatSettings &) const override;
void deserializeTextCSV(IColumn & column, ReadBuffer & istr, const FormatSettings &) const override;
/** Streaming serialization of arrays is arranged in a special way:
* - elements placed in a row are written/read without array sizes;
* - the sizes are written/read in a separate stream,
* This is necessary, because when implementing nested structures, several arrays can have common sizes.
*/
void enumerateStreamsImpl(const StreamCallback & callback, SubstreamPath & path) const override;
void serializeBinaryBulkStatePrefixImpl(
SerializeBinaryBulkSettings & settings,
SerializeBinaryBulkStatePtr & state) const override;
void serializeBinaryBulkStateSuffixImpl(
SerializeBinaryBulkSettings & settings,
SerializeBinaryBulkStatePtr & state) const override;
void deserializeBinaryBulkStatePrefixImpl(
DeserializeBinaryBulkSettings & settings,
DeserializeBinaryBulkStatePtr & state) const override;
void serializeBinaryBulkWithMultipleStreamsImpl(
const IColumn & column,
size_t offset,
size_t limit,
SerializeBinaryBulkSettings & settings,
SerializeBinaryBulkStatePtr & state) const override;
void deserializeBinaryBulkWithMultipleStreamsImpl(
IColumn & column,
size_t limit,
DeserializeBinaryBulkSettings & settings,
DeserializeBinaryBulkStatePtr & state,
SubstreamsCache * cache) const override;
MutableColumnPtr createColumn() const override;
Field getDefault() const override;
......@@ -105,6 +56,10 @@ public:
DataTypePtr tryGetSubcolumnType(const String & subcolumn_name) const override;
ColumnPtr getSubcolumn(const String & subcolumn_name, const IColumn & column) const override;
SerializationPtr getSubcolumnSerialization(
const String & subcolumn_name, const BaseSerializationGetter & base_serialization_getter) const override;
SerializationPtr doGetDefaultSerialization() const override;
const DataTypePtr & getNestedType() const { return nested; }
......@@ -114,6 +69,8 @@ public:
private:
ColumnPtr getSubcolumnImpl(const String & subcolumn_name, const IColumn & column, size_t level) const;
DataTypePtr tryGetSubcolumnTypeImpl(const String & subcolumn_name, size_t level) const;
SerializationPtr getSubcolumnSerializationImpl(
const String & subcolumn_name, const BaseSerializationGetter & base_serialization_getter, size_t level) const;
};
}
......@@ -3,7 +3,7 @@
#include <memory>
#include <cstddef>
#include <Core/Types.h>
#include <DataTypes/IDataType.h>
#include <DataTypes/Serializations/ISerialization.h>
namespace DB
{
......@@ -24,106 +24,20 @@ public:
virtual String getName() const = 0;
};
class IDataTypeCustomTextSerialization
{
public:
virtual ~IDataTypeCustomTextSerialization() {}
/** Text serialization for displaying on a terminal or saving into a text file, and the like.
* Without escaping or quoting.
*/
virtual void serializeText(const IColumn & column, size_t row_num, WriteBuffer & ostr, const FormatSettings &) const = 0;
/** Text deserialization without quoting or escaping.
*/
virtual void deserializeWholeText(IColumn & column, ReadBuffer & istr, const FormatSettings &) const = 0;
/** Text serialization with escaping but without quoting.
*/
virtual void serializeTextEscaped(const IColumn & column, size_t row_num, WriteBuffer & ostr, const FormatSettings &) const = 0;
virtual void deserializeTextEscaped(IColumn & column, ReadBuffer & istr, const FormatSettings &) const = 0;
/** Text serialization as a literal that may be inserted into a query.
*/
virtual void serializeTextQuoted(const IColumn & column, size_t row_num, WriteBuffer & ostr, const FormatSettings &) const = 0;
virtual void deserializeTextQuoted(IColumn & column, ReadBuffer & istr, const FormatSettings &) const = 0;
/** Text serialization for the CSV format.
*/
virtual void serializeTextCSV(const IColumn & column, size_t row_num, WriteBuffer & ostr, const FormatSettings &) const = 0;
virtual void deserializeTextCSV(IColumn & column, ReadBuffer & istr, const FormatSettings &) const = 0;
/** Text serialization intended for using in JSON format.
*/
virtual void serializeTextJSON(const IColumn & column, size_t row_num, WriteBuffer & ostr, const FormatSettings &) const = 0;
virtual void deserializeTextJSON(IColumn & column, ReadBuffer & istr, const FormatSettings &) const = 0;
/** Text serialization for putting into the XML format.
*/
virtual void serializeTextXML(const IColumn & column, size_t row_num, WriteBuffer & ostr, const FormatSettings & settings) const = 0;
};
/** Allows to customize an existing data type by representation with custom substreams.
* Customized data type will be serialized/deserialized to files with different names than base type,
* but binary and text representation will be unchanged.
* E.g it can be used for reading single subcolumns of complex types.
*/
class IDataTypeCustomStreams
{
public:
virtual ~IDataTypeCustomStreams() = default;
virtual void enumerateStreams(
const IDataType::StreamCallback & callback,
IDataType::SubstreamPath & path) const = 0;
virtual void serializeBinaryBulkStatePrefix(
IDataType::SerializeBinaryBulkSettings & settings,
IDataType::SerializeBinaryBulkStatePtr & state) const = 0;
virtual void serializeBinaryBulkStateSuffix(
IDataType::SerializeBinaryBulkSettings & settings,
IDataType::SerializeBinaryBulkStatePtr & state) const = 0;
virtual void deserializeBinaryBulkStatePrefix(
IDataType::DeserializeBinaryBulkSettings & settings,
IDataType::DeserializeBinaryBulkStatePtr & state) const = 0;
virtual void serializeBinaryBulkWithMultipleStreams(
const IColumn & column,
size_t offset,
size_t limit,
IDataType::SerializeBinaryBulkSettings & settings,
IDataType::SerializeBinaryBulkStatePtr & state) const = 0;
virtual void deserializeBinaryBulkWithMultipleStreams(
ColumnPtr & column,
size_t limit,
IDataType::DeserializeBinaryBulkSettings & settings,
IDataType::DeserializeBinaryBulkStatePtr & state,
IDataType::SubstreamsCache * cache) const = 0;
};
using DataTypeCustomNamePtr = std::unique_ptr<const IDataTypeCustomName>;
using DataTypeCustomTextSerializationPtr = std::unique_ptr<const IDataTypeCustomTextSerialization>;
using DataTypeCustomStreamsPtr = std::unique_ptr<const IDataTypeCustomStreams>;
/** Describe a data type customization
*/
struct DataTypeCustomDesc
{
DataTypeCustomNamePtr name;
DataTypeCustomTextSerializationPtr text_serialization;
DataTypeCustomStreamsPtr streams;
SerializationPtr serialization;
DataTypeCustomDesc(
DataTypeCustomNamePtr name_,
DataTypeCustomTextSerializationPtr text_serialization_ = nullptr,
DataTypeCustomStreamsPtr streams_ = nullptr)
SerializationPtr serialization_ = nullptr)
: name(std::move(name_))
, text_serialization(std::move(text_serialization_))
, streams(std::move(streams_)) {}
, serialization(std::move(serialization_)) {}
};
using DataTypeCustomDescPtr = std::unique_ptr<DataTypeCustomDesc>;
......
#include <DataTypes/DataTypeCustomGeo.h>
#include <Columns/ColumnsNumber.h>
#include <Columns/ColumnTuple.h>
#include <DataTypes/DataTypeArray.h>
#include <DataTypes/DataTypeCustom.h>
#include <DataTypes/DataTypeCustomSimpleTextSerialization.h>
#include <DataTypes/DataTypeFactory.h>
#include <DataTypes/DataTypeTuple.h>
#include <DataTypes/DataTypesNumber.h>
......@@ -12,102 +8,20 @@
namespace DB
{
namespace
{
const auto point_data_type = std::make_shared<const DataTypeTuple>(
DataTypes{std::make_shared<const DataTypeFloat64>(), std::make_shared<const DataTypeFloat64>()}
);
const auto ring_data_type = std::make_shared<const DataTypeArray>(DataTypeCustomPointSerialization::nestedDataType());
const auto polygon_data_type = std::make_shared<const DataTypeArray>(DataTypeCustomRingSerialization::nestedDataType());
const auto multipolygon_data_type = std::make_shared<const DataTypeArray>(DataTypeCustomPolygonSerialization::nestedDataType());
}
void DataTypeCustomPointSerialization::serializeText(
const IColumn & column, size_t row_num, WriteBuffer & ostr, const FormatSettings & settings) const
{
nestedDataType()->serializeAsText(column, row_num, ostr, settings);
}
void DataTypeCustomPointSerialization::deserializeText(
IColumn & column, ReadBuffer & istr, const FormatSettings & settings) const
{
nestedDataType()->deserializeAsWholeText(column, istr, settings);
}
DataTypePtr DataTypeCustomPointSerialization::nestedDataType()
{
return point_data_type;
}
void DataTypeCustomRingSerialization::serializeText(
const IColumn & column, size_t row_num, WriteBuffer & ostr, const FormatSettings & settings) const
{
nestedDataType()->serializeAsText(column, row_num, ostr, settings);
}
void DataTypeCustomRingSerialization::deserializeText(
IColumn & column, ReadBuffer & istr, const FormatSettings & settings) const
{
nestedDataType()->deserializeAsWholeText(column, istr, settings);
}
DataTypePtr DataTypeCustomRingSerialization::nestedDataType()
{
return ring_data_type;
}
void DataTypeCustomPolygonSerialization::serializeText(
const IColumn & column, size_t row_num, WriteBuffer & ostr, const FormatSettings & settings) const
{
nestedDataType()->serializeAsText(column, row_num, ostr, settings);
}
void DataTypeCustomPolygonSerialization::deserializeText(
IColumn & column, ReadBuffer & istr, const FormatSettings & settings) const
{
nestedDataType()->deserializeAsWholeText(column, istr, settings);
}
DataTypePtr DataTypeCustomPolygonSerialization::nestedDataType()
{
return polygon_data_type;
}
void DataTypeCustomMultiPolygonSerialization::serializeText(
const IColumn & column, size_t row_num, WriteBuffer & ostr, const FormatSettings & settings) const
{
nestedDataType()->serializeAsText(column, row_num, ostr, settings);
}
void DataTypeCustomMultiPolygonSerialization::deserializeText(
IColumn & column, ReadBuffer & istr, const FormatSettings & settings) const
{
nestedDataType()->deserializeAsWholeText(column, istr, settings);
}
DataTypePtr DataTypeCustomMultiPolygonSerialization::nestedDataType()
{
return multipolygon_data_type;
}
void registerDataTypeDomainGeo(DataTypeFactory & factory)
{
// Custom type for point represented as its coordinates stored as Tuple(Float64, Float64)
factory.registerSimpleDataTypeCustom("Point", []
{
return std::make_pair(DataTypeFactory::instance().get("Tuple(Float64, Float64)"),
std::make_unique<DataTypeCustomDesc>(std::make_unique<DataTypeCustomFixedName>("Point"), std::make_unique<DataTypeCustomPointSerialization>()));
std::make_unique<DataTypeCustomDesc>(std::make_unique<DataTypePointName>()));
});
// Custom type for simple polygon without holes stored as Array(Point)
factory.registerSimpleDataTypeCustom("Ring", []
{
return std::make_pair(DataTypeFactory::instance().get("Array(Point)"),
std::make_unique<DataTypeCustomDesc>(std::make_unique<DataTypeCustomFixedName>("Ring"), std::make_unique<DataTypeCustomRingSerialization>()));
std::make_unique<DataTypeCustomDesc>(std::make_unique<DataTypeRingName>()));
});
// Custom type for polygon with holes stored as Array(Ring)
......@@ -115,14 +29,14 @@ void registerDataTypeDomainGeo(DataTypeFactory & factory)
factory.registerSimpleDataTypeCustom("Polygon", []
{
return std::make_pair(DataTypeFactory::instance().get("Array(Ring)"),
std::make_unique<DataTypeCustomDesc>(std::make_unique<DataTypeCustomFixedName>("Polygon"), std::make_unique<DataTypeCustomPolygonSerialization>()));
std::make_unique<DataTypeCustomDesc>(std::make_unique<DataTypePolygonName>()));
});
// Custom type for multiple polygons with holes stored as Array(Polygon)
factory.registerSimpleDataTypeCustom("MultiPolygon", []
{
return std::make_pair(DataTypeFactory::instance().get("Array(Polygon)"),
std::make_unique<DataTypeCustomDesc>(std::make_unique<DataTypeCustomFixedName>("MultiPolygon"), std::make_unique<DataTypeCustomMultiPolygonSerialization>()));
std::make_unique<DataTypeCustomDesc>(std::make_unique<DataTypeMultiPolygonName>()));
});
}
......
#pragma once
#include <Columns/ColumnsNumber.h>
#include <Columns/ColumnTuple.h>
#include <DataTypes/DataTypeArray.h>
#include <DataTypes/DataTypeCustom.h>
#include <DataTypes/DataTypeCustomSimpleTextSerialization.h>
#include <DataTypes/DataTypeFactory.h>
#include <DataTypes/DataTypeTuple.h>
#include <DataTypes/DataTypesNumber.h>
namespace DB
{
class DataTypeCustomPointSerialization : public DataTypeCustomSimpleTextSerialization
class DataTypePointName : public DataTypeCustomFixedName
{
public:
void serializeText(const IColumn & column, size_t row_num, WriteBuffer & ostr, const FormatSettings & settings) const override;
void deserializeText(IColumn & column, ReadBuffer & istr, const FormatSettings & settings) const override;
static DataTypePtr nestedDataType();
DataTypePointName() : DataTypeCustomFixedName("Point") {}
};
class DataTypeCustomRingSerialization : public DataTypeCustomSimpleTextSerialization
class DataTypeRingName : public DataTypeCustomFixedName
{
public:
void serializeText(const IColumn & column, size_t row_num, WriteBuffer & ostr, const FormatSettings & settings) const override;
void deserializeText(IColumn & column, ReadBuffer & istr, const FormatSettings & settings) const override;
static DataTypePtr nestedDataType();
DataTypeRingName() : DataTypeCustomFixedName("Ring") {}
};
class DataTypeCustomPolygonSerialization : public DataTypeCustomSimpleTextSerialization
class DataTypePolygonName : public DataTypeCustomFixedName
{
public:
void serializeText(const IColumn & column, size_t row_num, WriteBuffer & ostr, const FormatSettings & settings) const override;
void deserializeText(IColumn & column, ReadBuffer & istr, const FormatSettings & settings) const override;
static DataTypePtr nestedDataType();
DataTypePolygonName() : DataTypeCustomFixedName("Polygon") {}
};
class DataTypeCustomMultiPolygonSerialization : public DataTypeCustomSimpleTextSerialization
class DataTypeMultiPolygonName : public DataTypeCustomFixedName
{
public:
void serializeText(const IColumn & column, size_t row_num, WriteBuffer & ostr, const FormatSettings & settings) const override;
void deserializeText(IColumn & column, ReadBuffer & istr, const FormatSettings & settings) const override;
static DataTypePtr nestedDataType();
DataTypeMultiPolygonName() : DataTypeCustomFixedName("MultiPolygon") {}
};
}
#include <Columns/ColumnsNumber.h>
#include <Common/Exception.h>
#include <Common/formatIPv6.h>
#include <DataTypes/DataTypeCustomSimpleTextSerialization.h>
#include <DataTypes/Serializations/SerializationIP.h>
#include <DataTypes/DataTypeFactory.h>
#include <DataTypes/DataTypeCustom.h>
#include <Functions/FunctionsCoding.h>
namespace DB
{
namespace ErrorCodes
{
extern const int ILLEGAL_COLUMN;
extern const int CANNOT_PARSE_DOMAIN_VALUE_FROM_STRING;
}
namespace
{
class DataTypeCustomIPv4Serialization : public DataTypeCustomSimpleTextSerialization
{
public:
void serializeText(const IColumn & column, size_t row_num, WriteBuffer & ostr, const FormatSettings &) const override
{
const auto * col = checkAndGetColumn<ColumnUInt32>(&column);
if (!col)
{
throw Exception("IPv4 type can only serialize columns of type UInt32." + column.getName(), ErrorCodes::ILLEGAL_COLUMN);
}
char buffer[IPV4_MAX_TEXT_LENGTH + 1] = {'\0'};
char * ptr = buffer;
formatIPv4(reinterpret_cast<const unsigned char *>(&col->getData()[row_num]), ptr);
ostr.write(buffer, strlen(buffer));
}
void deserializeText(IColumn & column, ReadBuffer & istr, const FormatSettings &) const override
{
ColumnUInt32 * col = typeid_cast<ColumnUInt32 *>(&column);
if (!col)
{
throw Exception("IPv4 type can only deserialize columns of type UInt32." + column.getName(), ErrorCodes::ILLEGAL_COLUMN);
}
char buffer[IPV4_MAX_TEXT_LENGTH + 1] = {'\0'};
istr.read(buffer, sizeof(buffer) - 1);
UInt32 ipv4_value = 0;
if (!parseIPv4(buffer, reinterpret_cast<unsigned char *>(&ipv4_value)))
{
throw Exception("Invalid IPv4 value.", ErrorCodes::CANNOT_PARSE_DOMAIN_VALUE_FROM_STRING);
}
col->insert(ipv4_value);
}
};
class DataTypeCustomIPv6Serialization : public DataTypeCustomSimpleTextSerialization
{
public:
void serializeText(const IColumn & column, size_t row_num, WriteBuffer & ostr, const FormatSettings &) const override
{
const auto * col = checkAndGetColumn<ColumnFixedString>(&column);
if (!col)
{
throw Exception("IPv6 type domain can only serialize columns of type FixedString(16)." + column.getName(), ErrorCodes::ILLEGAL_COLUMN);
}
char buffer[IPV6_MAX_TEXT_LENGTH + 1] = {'\0'};
char * ptr = buffer;
formatIPv6(reinterpret_cast<const unsigned char *>(col->getDataAt(row_num).data), ptr);
ostr.write(buffer, strlen(buffer));
}
void deserializeText(IColumn & column, ReadBuffer & istr, const FormatSettings &) const override
{
ColumnFixedString * col = typeid_cast<ColumnFixedString *>(&column);
if (!col)
{
throw Exception("IPv6 type domain can only deserialize columns of type FixedString(16)." + column.getName(), ErrorCodes::ILLEGAL_COLUMN);
}
char buffer[IPV6_MAX_TEXT_LENGTH + 1] = {'\0'};
istr.read(buffer, sizeof(buffer) - 1);
std::string ipv6_value(IPV6_BINARY_LENGTH, '\0');
if (!parseIPv6(buffer, reinterpret_cast<unsigned char *>(ipv6_value.data())))
{
throw Exception("Invalid IPv6 value.", ErrorCodes::CANNOT_PARSE_DOMAIN_VALUE_FROM_STRING);
}
col->insertString(ipv6_value);
}
};
}
void registerDataTypeDomainIPv4AndIPv6(DataTypeFactory & factory)
{
factory.registerSimpleDataTypeCustom("IPv4", []
{
return std::make_pair(DataTypeFactory::instance().get("UInt32"),
std::make_unique<DataTypeCustomDesc>(std::make_unique<DataTypeCustomFixedName>("IPv4"), std::make_unique<DataTypeCustomIPv4Serialization>()));
auto type = DataTypeFactory::instance().get("UInt32");
return std::make_pair(type, std::make_unique<DataTypeCustomDesc>(
std::make_unique<DataTypeCustomFixedName>("IPv4"), std::make_unique<SerializationIPv4>(type->getDefaultSerialization())));
});
factory.registerSimpleDataTypeCustom("IPv6", []
{
return std::make_pair(DataTypeFactory::instance().get("FixedString(16)"),
std::make_unique<DataTypeCustomDesc>(std::make_unique<DataTypeCustomFixedName>("IPv6"), std::make_unique<DataTypeCustomIPv6Serialization>()));
auto type = DataTypeFactory::instance().get("FixedString(16)");
return std::make_pair(type, std::make_unique<DataTypeCustomDesc>(
std::make_unique<DataTypeCustomFixedName>("IPv6"), std::make_unique<SerializationIPv6>(type->getDefaultSerialization())));
});
/// MySQL, MariaDB
......
#pragma once
#include <memory>
namespace DB
{
class IDataTypeCustomName;
class IDataTypeCustomTextSerialization;
class IDataTypeCustomStreams;
struct DataTypeCustomDesc;
using DataTypeCustomNamePtr = std::unique_ptr<const IDataTypeCustomName>;
using DataTypeCustomTextSerializationPtr = std::unique_ptr<const IDataTypeCustomTextSerialization>;
using DataTypeCustomStreamsPtr = std::unique_ptr<const IDataTypeCustomStreams>;
using DataTypeCustomDescPtr = std::unique_ptr<DataTypeCustomDesc>;
}
......@@ -3,6 +3,7 @@
#include <Columns/ColumnsNumber.h>
#include <DataTypes/DataTypeDate.h>
#include <DataTypes/Serializations/SerializationDate.h>
#include <DataTypes/DataTypeFactory.h>
#include <Common/assert_cast.h>
......@@ -11,79 +12,15 @@
namespace DB
{
void DataTypeDate::serializeText(const IColumn & column, size_t row_num, WriteBuffer & ostr, const FormatSettings &) const
{
writeDateText(DayNum(assert_cast<const ColumnUInt16 &>(column).getData()[row_num]), ostr);
}
void DataTypeDate::deserializeWholeText(IColumn & column, ReadBuffer & istr, const FormatSettings & settings) const
{
deserializeTextEscaped(column, istr, settings);
}
void DataTypeDate::deserializeTextEscaped(IColumn & column, ReadBuffer & istr, const FormatSettings &) const
{
DayNum x;
readDateText(x, istr);
assert_cast<ColumnUInt16 &>(column).getData().push_back(x);
}
void DataTypeDate::serializeTextEscaped(const IColumn & column, size_t row_num, WriteBuffer & ostr, const FormatSettings & settings) const
{
serializeText(column, row_num, ostr, settings);
}
void DataTypeDate::serializeTextQuoted(const IColumn & column, size_t row_num, WriteBuffer & ostr, const FormatSettings & settings) const
{
writeChar('\'', ostr);
serializeText(column, row_num, ostr, settings);
writeChar('\'', ostr);
}
void DataTypeDate::deserializeTextQuoted(IColumn & column, ReadBuffer & istr, const FormatSettings &) const
{
DayNum x;
assertChar('\'', istr);
readDateText(x, istr);
assertChar('\'', istr);
assert_cast<ColumnUInt16 &>(column).getData().push_back(x); /// It's important to do this at the end - for exception safety.
}
void DataTypeDate::serializeTextJSON(const IColumn & column, size_t row_num, WriteBuffer & ostr, const FormatSettings & settings) const
{
writeChar('"', ostr);
serializeText(column, row_num, ostr, settings);
writeChar('"', ostr);
}
void DataTypeDate::deserializeTextJSON(IColumn & column, ReadBuffer & istr, const FormatSettings &) const
{
DayNum x;
assertChar('"', istr);
readDateText(x, istr);
assertChar('"', istr);
assert_cast<ColumnUInt16 &>(column).getData().push_back(x);
}
void DataTypeDate::serializeTextCSV(const IColumn & column, size_t row_num, WriteBuffer & ostr, const FormatSettings & settings) const
{
writeChar('"', ostr);
serializeText(column, row_num, ostr, settings);
writeChar('"', ostr);
}
void DataTypeDate::deserializeTextCSV(IColumn & column, ReadBuffer & istr, const FormatSettings &) const
{
LocalDate value;
readCSV(value, istr);
assert_cast<ColumnUInt16 &>(column).getData().push_back(value.getDayNum());
}
bool DataTypeDate::equals(const IDataType & rhs) const
{
return typeid(rhs) == typeid(*this);
}
SerializationPtr DataTypeDate::doGetDefaultSerialization() const
{
return std::make_shared<SerializationDate>();
}
void registerDataTypeDate(DataTypeFactory & factory)
{
......
......@@ -14,21 +14,13 @@ public:
TypeIndex getTypeId() const override { return TypeIndex::Date; }
const char * getFamilyName() const override { return family_name; }
void serializeText(const IColumn & column, size_t row_num, WriteBuffer & ostr, const FormatSettings &) const override;
void deserializeWholeText(IColumn & column, ReadBuffer & istr, const FormatSettings &) const override;
void serializeTextEscaped(const IColumn & column, size_t row_num, WriteBuffer & ostr, const FormatSettings &) const override;
void deserializeTextEscaped(IColumn & column, ReadBuffer & istr, const FormatSettings &) const override;
void serializeTextQuoted(const IColumn & column, size_t row_num, WriteBuffer & ostr, const FormatSettings &) const override;
void deserializeTextQuoted(IColumn & column, ReadBuffer & istr, const FormatSettings &) const override;
void serializeTextJSON(const IColumn & column, size_t row_num, WriteBuffer & ostr, const FormatSettings &) const override;
void deserializeTextJSON(IColumn & column, ReadBuffer & istr, const FormatSettings &) const override;
void serializeTextCSV(const IColumn & column, size_t row_num, WriteBuffer & ostr, const FormatSettings &) const override;
void deserializeTextCSV(IColumn & column, ReadBuffer & istr, const FormatSettings & settings) const override;
bool canBeUsedAsVersion() const override { return true; }
bool canBeInsideNullable() const override { return true; }
bool equals(const IDataType & rhs) const override;
protected:
SerializationPtr doGetDefaultSerialization() const override;
};
}
#include <DataTypes/DataTypeDateTime.h>
#include <DataTypes/Serializations/SerializationDateTime.h>
#include <Columns/ColumnVector.h>
#include <Common/assert_cast.h>
......@@ -15,26 +16,6 @@
namespace DB
{
namespace
{
inline void readTextHelper(
time_t & x, ReadBuffer & istr, const FormatSettings & settings, const DateLUTImpl & time_zone, const DateLUTImpl & utc_time_zone)
{
switch (settings.date_time_input_format)
{
case FormatSettings::DateTimeInputFormat::Basic:
readDateTimeText(x, istr, time_zone);
return;
case FormatSettings::DateTimeInputFormat::BestEffort:
parseDateTimeBestEffort(x, istr, time_zone, utc_time_zone);
return;
}
}
}
TimezoneMixin::TimezoneMixin(const String & time_zone_name)
: has_explicit_time_zone(!time_zone_name.empty()),
time_zone(DateLUT::instance(time_zone_name)),
......@@ -62,124 +43,6 @@ String DataTypeDateTime::doGetName() const
return out.str();
}
void DataTypeDateTime::serializeText(const IColumn & column, size_t row_num, WriteBuffer & ostr, const FormatSettings & settings) const
{
auto value = assert_cast<const ColumnType &>(column).getData()[row_num];
switch (settings.date_time_output_format)
{
case FormatSettings::DateTimeOutputFormat::Simple:
writeDateTimeText(value, ostr, time_zone);
return;
case FormatSettings::DateTimeOutputFormat::UnixTimestamp:
writeIntText(value, ostr);
return;
case FormatSettings::DateTimeOutputFormat::ISO:
writeDateTimeTextISO(value, ostr, utc_time_zone);
return;
}
}
void DataTypeDateTime::serializeTextEscaped(const IColumn & column, size_t row_num, WriteBuffer & ostr, const FormatSettings & settings) const
{
serializeText(column, row_num, ostr, settings);
}
void DataTypeDateTime::deserializeWholeText(IColumn & column, ReadBuffer & istr, const FormatSettings & settings) const
{
deserializeTextEscaped(column, istr, settings);
}
void DataTypeDateTime::deserializeTextEscaped(IColumn & column, ReadBuffer & istr, const FormatSettings & settings) const
{
time_t x = 0;
readTextHelper(x, istr, settings, time_zone, utc_time_zone);
if (x < 0)
x = 0;
assert_cast<ColumnType &>(column).getData().push_back(x);
}
void DataTypeDateTime::serializeTextQuoted(const IColumn & column, size_t row_num, WriteBuffer & ostr, const FormatSettings & settings) const
{
writeChar('\'', ostr);
serializeText(column, row_num, ostr, settings);
writeChar('\'', ostr);
}
void DataTypeDateTime::deserializeTextQuoted(IColumn & column, ReadBuffer & istr, const FormatSettings & settings) const
{
time_t x = 0;
if (checkChar('\'', istr)) /// Cases: '2017-08-31 18:36:48' or '1504193808'
{
readTextHelper(x, istr, settings, time_zone, utc_time_zone);
assertChar('\'', istr);
}
else /// Just 1504193808 or 01504193808
{
readIntText(x, istr);
}
if (x < 0)
x = 0;
assert_cast<ColumnType &>(column).getData().push_back(x); /// It's important to do this at the end - for exception safety.
}
void DataTypeDateTime::serializeTextJSON(const IColumn & column, size_t row_num, WriteBuffer & ostr, const FormatSettings & settings) const
{
writeChar('"', ostr);
serializeText(column, row_num, ostr, settings);
writeChar('"', ostr);
}
void DataTypeDateTime::deserializeTextJSON(IColumn & column, ReadBuffer & istr, const FormatSettings & settings) const
{
time_t x = 0;
if (checkChar('"', istr))
{
readTextHelper(x, istr, settings, time_zone, utc_time_zone);
assertChar('"', istr);
}
else
{
readIntText(x, istr);
}
if (x < 0)
x = 0;
assert_cast<ColumnType &>(column).getData().push_back(x);
}
void DataTypeDateTime::serializeTextCSV(const IColumn & column, size_t row_num, WriteBuffer & ostr, const FormatSettings & settings) const
{
writeChar('"', ostr);
serializeText(column, row_num, ostr, settings);
writeChar('"', ostr);
}
void DataTypeDateTime::deserializeTextCSV(IColumn & column, ReadBuffer & istr, const FormatSettings & settings) const
{
time_t x = 0;
if (istr.eof())
throwReadAfterEOF();
char maybe_quote = *istr.position();
if (maybe_quote == '\'' || maybe_quote == '\"')
++istr.position();
readTextHelper(x, istr, settings, time_zone, utc_time_zone);
if (maybe_quote == '\'' || maybe_quote == '\"')
assertChar(maybe_quote, istr);
if (x < 0)
x = 0;
assert_cast<ColumnType &>(column).getData().push_back(x);
}
bool DataTypeDateTime::equals(const IDataType & rhs) const
{
/// DateTime with different timezones are equal, because:
......@@ -187,4 +50,9 @@ bool DataTypeDateTime::equals(const IDataType & rhs) const
return typeid(rhs) == typeid(*this);
}
SerializationPtr DataTypeDateTime::doGetDefaultSerialization() const
{
return std::make_shared<SerializationDateTime>(time_zone, utc_time_zone);
}
}
......@@ -58,21 +58,12 @@ public:
String doGetName() const override;
TypeIndex getTypeId() const override { return TypeIndex::DateTime; }
void serializeText(const IColumn & column, size_t row_num, WriteBuffer & ostr, const FormatSettings &) const override;
void deserializeWholeText(IColumn & column, ReadBuffer & istr, const FormatSettings & settings) const override;
void serializeTextEscaped(const IColumn & column, size_t row_num, WriteBuffer & ostr, const FormatSettings &) const override;
void deserializeTextEscaped(IColumn & column, ReadBuffer & istr, const FormatSettings &) const override;
void serializeTextQuoted(const IColumn & column, size_t row_num, WriteBuffer & ostr, const FormatSettings &) const override;
void deserializeTextQuoted(IColumn & column, ReadBuffer & istr, const FormatSettings &) const override;
void serializeTextJSON(const IColumn & column, size_t row_num, WriteBuffer & ostr, const FormatSettings &) const override;
void deserializeTextJSON(IColumn & column, ReadBuffer & istr, const FormatSettings &) const override;
void serializeTextCSV(const IColumn & column, size_t row_num, WriteBuffer & ostr, const FormatSettings &) const override;
void deserializeTextCSV(IColumn & column, ReadBuffer & istr, const FormatSettings & settings) const override;
bool canBeUsedAsVersion() const override { return true; }
bool canBeInsideNullable() const override { return true; }
bool equals(const IDataType & rhs) const override;
SerializationPtr doGetDefaultSerialization() const override;
};
}
......
#include <DataTypes/DataTypeDateTime64.h>
#include <DataTypes/Serializations/SerializationDateTime64.h>
#include <Columns/ColumnVector.h>
#include <Common/assert_cast.h>
......@@ -55,131 +56,6 @@ std::string DataTypeDateTime64::doGetName() const
return out.str();
}
void DataTypeDateTime64::serializeText(const IColumn & column, size_t row_num, WriteBuffer & ostr, const FormatSettings & settings) const
{
auto value = assert_cast<const ColumnType &>(column).getData()[row_num];
switch (settings.date_time_output_format)
{
case FormatSettings::DateTimeOutputFormat::Simple:
writeDateTimeText(value, scale, ostr, time_zone);
return;
case FormatSettings::DateTimeOutputFormat::UnixTimestamp:
writeDateTimeUnixTimestamp(value, scale, ostr);
return;
case FormatSettings::DateTimeOutputFormat::ISO:
writeDateTimeTextISO(value, scale, ostr, utc_time_zone);
return;
}
}
void DataTypeDateTime64::deserializeText(IColumn & column, ReadBuffer & istr, const FormatSettings &) const
{
DateTime64 result = 0;
readDateTime64Text(result, this->getScale(), istr, time_zone);
assert_cast<ColumnType &>(column).getData().push_back(result);
}
void DataTypeDateTime64::deserializeWholeText(IColumn & column, ReadBuffer & istr, const FormatSettings & settings) const
{
deserializeTextEscaped(column, istr, settings);
}
void DataTypeDateTime64::serializeTextEscaped(const IColumn & column, size_t row_num, WriteBuffer & ostr, const FormatSettings & settings) const
{
serializeText(column, row_num, ostr, settings);
}
static inline void readText(DateTime64 & x, UInt32 scale, ReadBuffer & istr, const FormatSettings & settings, const DateLUTImpl & time_zone, const DateLUTImpl & utc_time_zone)
{
switch (settings.date_time_input_format)
{
case FormatSettings::DateTimeInputFormat::Basic:
readDateTime64Text(x, scale, istr, time_zone);
return;
case FormatSettings::DateTimeInputFormat::BestEffort:
parseDateTime64BestEffort(x, scale, istr, time_zone, utc_time_zone);
return;
}
}
void DataTypeDateTime64::deserializeTextEscaped(IColumn & column, ReadBuffer & istr, const FormatSettings & settings) const
{
DateTime64 x = 0;
readText(x, scale, istr, settings, time_zone, utc_time_zone);
assert_cast<ColumnType &>(column).getData().push_back(x);
}
void DataTypeDateTime64::serializeTextQuoted(const IColumn & column, size_t row_num, WriteBuffer & ostr, const FormatSettings & settings) const
{
writeChar('\'', ostr);
serializeText(column, row_num, ostr, settings);
writeChar('\'', ostr);
}
void DataTypeDateTime64::deserializeTextQuoted(IColumn & column, ReadBuffer & istr, const FormatSettings & settings) const
{
DateTime64 x = 0;
if (checkChar('\'', istr)) /// Cases: '2017-08-31 18:36:48' or '1504193808'
{
readText(x, scale, istr, settings, time_zone, utc_time_zone);
assertChar('\'', istr);
}
else /// Just 1504193808 or 01504193808
{
readIntText(x, istr);
}
assert_cast<ColumnType &>(column).getData().push_back(x); /// It's important to do this at the end - for exception safety.
}
void DataTypeDateTime64::serializeTextJSON(const IColumn & column, size_t row_num, WriteBuffer & ostr, const FormatSettings & settings) const
{
writeChar('"', ostr);
serializeText(column, row_num, ostr, settings);
writeChar('"', ostr);
}
void DataTypeDateTime64::deserializeTextJSON(IColumn & column, ReadBuffer & istr, const FormatSettings & settings) const
{
DateTime64 x = 0;
if (checkChar('"', istr))
{
readText(x, scale, istr, settings, time_zone, utc_time_zone);
assertChar('"', istr);
}
else
{
readIntText(x, istr);
}
assert_cast<ColumnType &>(column).getData().push_back(x);
}
void DataTypeDateTime64::serializeTextCSV(const IColumn & column, size_t row_num, WriteBuffer & ostr, const FormatSettings & settings) const
{
writeChar('"', ostr);
serializeText(column, row_num, ostr, settings);
writeChar('"', ostr);
}
void DataTypeDateTime64::deserializeTextCSV(IColumn & column, ReadBuffer & istr, const FormatSettings & settings) const
{
DateTime64 x = 0;
if (istr.eof())
throwReadAfterEOF();
char maybe_quote = *istr.position();
if (maybe_quote == '\'' || maybe_quote == '\"')
++istr.position();
readText(x, scale, istr, settings, time_zone, utc_time_zone);
if (maybe_quote == '\'' || maybe_quote == '\"')
assertChar(maybe_quote, istr);
assert_cast<ColumnType &>(column).getData().push_back(x);
}
bool DataTypeDateTime64::equals(const IDataType & rhs) const
{
if (const auto * ptype = typeid_cast<const DataTypeDateTime64 *>(&rhs))
......@@ -187,4 +63,9 @@ bool DataTypeDateTime64::equals(const IDataType & rhs) const
return false;
}
SerializationPtr DataTypeDateTime64::doGetDefaultSerialization() const
{
return std::make_shared<SerializationDateTime64>(time_zone, utc_time_zone, scale);
}
}
......@@ -31,21 +31,12 @@ public:
std::string doGetName() const override;
TypeIndex getTypeId() const override { return type_id; }
void serializeText(const IColumn & column, size_t row_num, WriteBuffer & ostr, const FormatSettings &) const override;
void deserializeText(IColumn & column, ReadBuffer & istr, const FormatSettings &) const override;
void deserializeWholeText(IColumn & column, ReadBuffer & istr, const FormatSettings & settings) const override;
void serializeTextEscaped(const IColumn & column, size_t row_num, WriteBuffer & ostr, const FormatSettings &) const override;
void deserializeTextEscaped(IColumn & column, ReadBuffer & istr, const FormatSettings &) const override;
void serializeTextQuoted(const IColumn & column, size_t row_num, WriteBuffer & ostr, const FormatSettings &) const override;
void deserializeTextQuoted(IColumn & column, ReadBuffer & istr, const FormatSettings &) const override;
void serializeTextJSON(const IColumn & column, size_t row_num, WriteBuffer & ostr, const FormatSettings &) const override;
void deserializeTextJSON(IColumn & column, ReadBuffer & istr, const FormatSettings &) const override;
void serializeTextCSV(const IColumn & column, size_t row_num, WriteBuffer & ostr, const FormatSettings &) const override;
void deserializeTextCSV(IColumn & column, ReadBuffer & istr, const FormatSettings & settings) const override;
bool equals(const IDataType & rhs) const override;
bool canBePromoted() const override { return false; }
protected:
SerializationPtr doGetDefaultSerialization() const override;
};
}
......
......@@ -35,59 +35,6 @@ MutableColumnPtr DataTypeDecimalBase<T>::createColumn() const
return ColumnType::create(0, scale);
}
template <typename T>
void DataTypeDecimalBase<T>::serializeBinary(const Field & field, WriteBuffer & ostr) const
{
FieldType x = get<DecimalField<T>>(field);
writeBinary(x, ostr);
}
template <typename T>
void DataTypeDecimalBase<T>::serializeBinary(const IColumn & column, size_t row_num, WriteBuffer & ostr) const
{
const FieldType & x = assert_cast<const ColumnType &>(column).getElement(row_num);
writeBinary(x, ostr);
}
template <typename T>
void DataTypeDecimalBase<T>::serializeBinaryBulk(const IColumn & column, WriteBuffer & ostr, size_t offset, size_t limit) const
{
const typename ColumnType::Container & x = typeid_cast<const ColumnType &>(column).getData();
size_t size = x.size();
if (limit == 0 || offset + limit > size)
limit = size - offset;
ostr.write(reinterpret_cast<const char *>(&x[offset]), sizeof(FieldType) * limit);
}
template <typename T>
void DataTypeDecimalBase<T>::deserializeBinary(Field & field, ReadBuffer & istr) const
{
typename FieldType::NativeType x;
readBinary(x, istr);
field = DecimalField(T(x), this->scale);
}
template <typename T>
void DataTypeDecimalBase<T>::deserializeBinary(IColumn & column, ReadBuffer & istr) const
{
typename FieldType::NativeType x;
readBinary(x, istr);
assert_cast<ColumnType &>(column).getData().push_back(FieldType(x));
}
template <typename T>
void DataTypeDecimalBase<T>::deserializeBinaryBulk(IColumn & column, ReadBuffer & istr, size_t limit, double) const
{
typename ColumnType::Container & x = typeid_cast<ColumnType &>(column).getData();
size_t initial_size = x.size();
x.resize(initial_size + limit);
size_t size = istr.readBig(reinterpret_cast<char*>(&x[initial_size]), sizeof(FieldType) * limit);
x.resize(initial_size + size / sizeof(FieldType));
}
template <typename T>
T DataTypeDecimalBase<T>::getScaleMultiplier(UInt32 scale_)
{
......
......@@ -5,7 +5,6 @@
#include <Core/DecimalFunctions.h>
#include <DataTypes/IDataType.h>
#include <DataTypes/DataTypesNumber.h>
#include <DataTypes/DataTypeWithSimpleSerialization.h>
#include <type_traits>
......@@ -55,7 +54,7 @@ inline UInt32 leastDecimalPrecisionFor(TypeIndex int_type)
/// P is one of (9, 18, 38, 76); equals to the maximum precision for the biggest underlying type of operands.
/// S is maximum scale of operands. The allowed valuas are [0, precision]
template <typename T>
class DataTypeDecimalBase : public DataTypeWithSimpleSerialization
class DataTypeDecimalBase : public IDataType
{
static_assert(IsDecimalNumber<T>);
......@@ -96,14 +95,6 @@ public:
bool canBeUsedInBooleanContext() const override { return true; }
bool canBeInsideNullable() const override { return true; }
void serializeBinary(const Field & field, WriteBuffer & ostr) const override;
void serializeBinary(const IColumn & column, size_t row_num, WriteBuffer & ostr) const override;
void serializeBinaryBulk(const IColumn & column, WriteBuffer & ostr, size_t offset, size_t limit) const override;
void deserializeBinary(Field & field, ReadBuffer & istr) const override;
void deserializeBinary(IColumn & column, ReadBuffer & istr) const override;
void deserializeBinaryBulk(IColumn & column, ReadBuffer & istr, size_t limit, double avg_value_size_hint) const override;
/// Decimal specific
UInt32 getPrecision() const { return precision; }
......
#include <IO/WriteBufferFromString.h>
#include <Formats/FormatSettings.h>
#include <DataTypes/DataTypeEnum.h>
#include <DataTypes/Serializations/SerializationEnum.h>
#include <DataTypes/DataTypeFactory.h>
#include <Parsers/IAST.h>
#include <Parsers/ASTFunction.h>
......@@ -19,7 +20,6 @@ namespace DB
namespace ErrorCodes
{
extern const int BAD_TYPE_OF_FIELD;
extern const int SYNTAX_ERROR;
extern const int EMPTY_DATA_PASSED;
extern const int UNEXPECTED_AST_STRUCTURE;
extern const int ARGUMENT_OUT_OF_BOUND;
......@@ -65,203 +65,22 @@ std::string DataTypeEnum<Type>::generateName(const Values & values)
}
template <typename Type>
void DataTypeEnum<Type>::fillMaps()
DataTypeEnum<Type>::DataTypeEnum(const Values & values_)
: EnumValues<Type>(values_)
, type_name(generateName(this->getValues()))
{
for (const auto & name_and_value : values)
{
const auto inserted_value = name_to_value_map.insert(
{ StringRef{name_and_value.first}, name_and_value.second });
if (!inserted_value.second)
throw Exception{"Duplicate names in enum: '" + name_and_value.first + "' = " + toString(name_and_value.second)
+ " and " + toString(inserted_value.first->getMapped()),
ErrorCodes::SYNTAX_ERROR};
const auto inserted_name = value_to_name_map.insert(
{ name_and_value.second, StringRef{name_and_value.first} });
if (!inserted_name.second)
throw Exception{"Duplicate values in enum: '" + name_and_value.first + "' = " + toString(name_and_value.second)
+ " and '" + toString((*inserted_name.first).first) + "'",
ErrorCodes::SYNTAX_ERROR};
}
}
template <typename Type>
DataTypeEnum<Type>::DataTypeEnum(const Values & values_) : values{values_}
{
if (values.empty())
throw Exception{"DataTypeEnum enumeration cannot be empty", ErrorCodes::EMPTY_DATA_PASSED};
std::sort(std::begin(values), std::end(values), [] (auto & left, auto & right)
{
return left.second < right.second;
});
fillMaps();
type_name = generateName(values);
}
template <typename Type>
void DataTypeEnum<Type>::serializeBinary(const Field & field, WriteBuffer & ostr) const
{
const FieldType x = get<FieldType>(field);
writeBinary(x, ostr);
}
template <typename Type>
void DataTypeEnum<Type>::deserializeBinary(Field & field, ReadBuffer & istr) const
{
FieldType x;
readBinary(x, istr);
field = castToNearestFieldType(x);
}
template <typename Type>
void DataTypeEnum<Type>::serializeBinary(const IColumn & column, size_t row_num, WriteBuffer & ostr) const
{
writeBinary(assert_cast<const ColumnType &>(column).getData()[row_num], ostr);
}
template <typename Type>
void DataTypeEnum<Type>::deserializeBinary(IColumn & column, ReadBuffer & istr) const
{
typename ColumnType::ValueType x;
readBinary(x, istr);
assert_cast<ColumnType &>(column).getData().push_back(x);
}
template <typename Type>
void DataTypeEnum<Type>::serializeText(const IColumn & column, size_t row_num, WriteBuffer & ostr, const FormatSettings &) const
{
writeString(getNameForValue(assert_cast<const ColumnType &>(column).getData()[row_num]), ostr);
}
template <typename Type>
void DataTypeEnum<Type>::serializeTextEscaped(const IColumn & column, size_t row_num, WriteBuffer & ostr, const FormatSettings &) const
{
writeEscapedString(getNameForValue(assert_cast<const ColumnType &>(column).getData()[row_num]), ostr);
}
template <typename Type>
void DataTypeEnum<Type>::deserializeTextEscaped(IColumn & column, ReadBuffer & istr, const FormatSettings & settings) const
{
if (settings.tsv.input_format_enum_as_number)
assert_cast<ColumnType &>(column).getData().push_back(readValue(istr));
else
{
/// NOTE It would be nice to do without creating a temporary object - at least extract std::string out.
std::string field_name;
readEscapedString(field_name, istr);
assert_cast<ColumnType &>(column).getData().push_back(getValue(StringRef(field_name), true));
}
}
template <typename Type>
void DataTypeEnum<Type>::serializeTextQuoted(const IColumn & column, size_t row_num, WriteBuffer & ostr, const FormatSettings &) const
{
writeQuotedString(getNameForValue(assert_cast<const ColumnType &>(column).getData()[row_num]), ostr);
}
template <typename Type>
void DataTypeEnum<Type>::deserializeTextQuoted(IColumn & column, ReadBuffer & istr, const FormatSettings &) const
{
std::string field_name;
readQuotedStringWithSQLStyle(field_name, istr);
assert_cast<ColumnType &>(column).getData().push_back(getValue(StringRef(field_name)));
}
template <typename Type>
void DataTypeEnum<Type>::deserializeWholeText(IColumn & column, ReadBuffer & istr, const FormatSettings & settings) const
{
if (settings.tsv.input_format_enum_as_number)
assert_cast<ColumnType &>(column).getData().push_back(readValue(istr));
else
{
std::string field_name;
readString(field_name, istr);
assert_cast<ColumnType &>(column).getData().push_back(getValue(StringRef(field_name), true));
}
}
template <typename Type>
void DataTypeEnum<Type>::serializeTextJSON(const IColumn & column, size_t row_num, WriteBuffer & ostr, const FormatSettings & settings) const
{
writeJSONString(getNameForValue(assert_cast<const ColumnType &>(column).getData()[row_num]), ostr, settings);
}
template <typename Type>
void DataTypeEnum<Type>::serializeTextXML(const IColumn & column, size_t row_num, WriteBuffer & ostr, const FormatSettings &) const
{
writeXMLStringForTextElement(getNameForValue(assert_cast<const ColumnType &>(column).getData()[row_num]), ostr);
}
template <typename Type>
void DataTypeEnum<Type>::deserializeTextJSON(IColumn & column, ReadBuffer & istr, const FormatSettings &) const
{
if (!istr.eof() && *istr.position() != '"')
assert_cast<ColumnType &>(column).getData().push_back(readValue(istr));
else
{
std::string field_name;
readJSONString(field_name, istr);
assert_cast<ColumnType &>(column).getData().push_back(getValue(StringRef(field_name)));
}
}
template <typename Type>
void DataTypeEnum<Type>::serializeTextCSV(const IColumn & column, size_t row_num, WriteBuffer & ostr, const FormatSettings &) const
{
writeCSVString(getNameForValue(assert_cast<const ColumnType &>(column).getData()[row_num]), ostr);
}
template <typename Type>
void DataTypeEnum<Type>::deserializeTextCSV(IColumn & column, ReadBuffer & istr, const FormatSettings & settings) const
{
if (settings.csv.input_format_enum_as_number)
assert_cast<ColumnType &>(column).getData().push_back(readValue(istr));
else
{
std::string field_name;
readCSVString(field_name, istr, settings.csv);
assert_cast<ColumnType &>(column).getData().push_back(getValue(StringRef(field_name), true));
}
}
template <typename Type>
void DataTypeEnum<Type>::serializeBinaryBulk(
const IColumn & column, WriteBuffer & ostr, const size_t offset, size_t limit) const
{
const auto & x = typeid_cast<const ColumnType &>(column).getData();
const auto size = x.size();
if (limit == 0 || offset + limit > size)
limit = size - offset;
ostr.write(reinterpret_cast<const char *>(&x[offset]), sizeof(FieldType) * limit);
}
template <typename Type>
void DataTypeEnum<Type>::deserializeBinaryBulk(
IColumn & column, ReadBuffer & istr, const size_t limit, const double /*avg_value_size_hint*/) const
{
auto & x = typeid_cast<ColumnType &>(column).getData();
const auto initial_size = x.size();
x.resize(initial_size + limit);
const auto size = istr.readBig(reinterpret_cast<char*>(&x[initial_size]), sizeof(FieldType) * limit);
x.resize(initial_size + size / sizeof(FieldType));
}
template <typename Type>
Field DataTypeEnum<Type>::getDefault() const
{
return values.front().second;
return this->getValues().front().second;
}
template <typename Type>
void DataTypeEnum<Type>::insertDefaultInto(IColumn & column) const
{
assert_cast<ColumnType &>(column).getData().push_back(values.front().second);
assert_cast<ColumnType &>(column).getData().push_back(this->getValues().front().second);
}
template <typename Type>
......@@ -274,7 +93,7 @@ bool DataTypeEnum<Type>::equals(const IDataType & rhs) const
template <typename Type>
bool DataTypeEnum<Type>::textCanContainOnlyValidUTF8() const
{
for (const auto & elem : values)
for (const auto & elem : this->getValues())
{
const char * pos = elem.first.data();
const char * end = pos + elem.first.size();
......@@ -305,14 +124,14 @@ Field DataTypeEnum<Type>::castToName(const Field & value_or_name) const
{
if (value_or_name.getType() == Field::Types::String)
{
getValue(value_or_name.get<String>()); /// Check correctness
this->getValue(value_or_name.get<String>()); /// Check correctness
return value_or_name.get<String>();
}
else if (value_or_name.getType() == Field::Types::Int64)
{
Int64 value = value_or_name.get<Int64>();
checkOverflow<Type>(value);
return getNameForValue(static_cast<Type>(value)).toString();
return this->getNameForValue(static_cast<Type>(value)).toString();
}
else
throw Exception(String("DataTypeEnum: Unsupported type of field ") + value_or_name.getTypeName(), ErrorCodes::BAD_TYPE_OF_FIELD);
......@@ -323,14 +142,14 @@ Field DataTypeEnum<Type>::castToValue(const Field & value_or_name) const
{
if (value_or_name.getType() == Field::Types::String)
{
return getValue(value_or_name.get<String>());
return this->getValue(value_or_name.get<String>());
}
else if (value_or_name.getType() == Field::Types::Int64
|| value_or_name.getType() == Field::Types::UInt64)
{
Int64 value = value_or_name.get<Int64>();
checkOverflow<Type>(value);
getNameForValue(static_cast<Type>(value)); /// Check correctness
this->getNameForValue(static_cast<Type>(value)); /// Check correctness
return value;
}
else
......@@ -341,25 +160,19 @@ Field DataTypeEnum<Type>::castToValue(const Field & value_or_name) const
template <typename Type>
bool DataTypeEnum<Type>::contains(const IDataType & rhs) const
{
auto check = [&](const auto & value)
{
auto it = name_to_value_map.find(value.first);
/// If we don't have this name, than we have to be sure,
/// that this value exists in enum
if (it == name_to_value_map.end())
return value_to_name_map.count(value.second) > 0;
/// If we have this name, than it should have the same value
return it->value.second == value.second;
};
if (const auto * rhs_enum8 = typeid_cast<const DataTypeEnum8 *>(&rhs))
return std::all_of(rhs_enum8->getValues().begin(), rhs_enum8->getValues().end(), check);
return this->containsAll(rhs_enum8->getValues());
if (const auto * rhs_enum16 = typeid_cast<const DataTypeEnum16 *>(&rhs))
return std::all_of(rhs_enum16->getValues().begin(), rhs_enum16->getValues().end(), check);
return this->containsAll(rhs_enum16->getValues());
return false;
}
template <typename Type>
SerializationPtr DataTypeEnum<Type>::doGetDefaultSerialization() const
{
return std::make_shared<SerializationEnum<Type>>(this->getValues());
}
/// Explicit instantiations.
template class DataTypeEnum<Int8>;
......
#pragma once
#include <DataTypes/IDataType.h>
#include <DataTypes/EnumValues.h>
#include <Columns/ColumnVector.h>
#include <Columns/ColumnConst.h>
#include <Common/HashTable/HashMap.h>
......@@ -11,12 +12,6 @@
namespace DB
{
namespace ErrorCodes
{
extern const int BAD_ARGUMENTS;
}
class IDataTypeEnum : public IDataType
{
public:
......@@ -36,102 +31,37 @@ public:
template <typename Type>
class DataTypeEnum final : public IDataTypeEnum
class DataTypeEnum final : public IDataTypeEnum, public EnumValues<Type>
{
public:
using FieldType = Type;
using ColumnType = ColumnVector<FieldType>;
using Value = std::pair<std::string, FieldType>;
using Values = std::vector<Value>;
using NameToValueMap = HashMap<StringRef, FieldType, StringRefHash>;
using ValueToNameMap = std::unordered_map<FieldType, StringRef>;
using typename EnumValues<Type>::Values;
static constexpr bool is_parametric = true;
private:
Values values;
NameToValueMap name_to_value_map;
ValueToNameMap value_to_name_map;
std::string type_name;
static std::string generateName(const Values & values);
void fillMaps();
public:
explicit DataTypeEnum(const Values & values_);
const Values & getValues() const { return values; }
std::string doGetName() const override { return type_name; }
const char * getFamilyName() const override;
TypeIndex getTypeId() const override { return sizeof(FieldType) == 1 ? TypeIndex::Enum8 : TypeIndex::Enum16; }
auto findByValue(const FieldType & value) const
{
const auto it = value_to_name_map.find(value);
if (it == std::end(value_to_name_map))
throw Exception{"Unexpected value " + toString(value) + " for type " + getName(), ErrorCodes::BAD_ARGUMENTS};
return it;
}
const StringRef & getNameForValue(const FieldType & value) const
{
return findByValue(value)->second;
}
FieldType getValue(StringRef field_name, bool try_treat_as_id = false) const
{
const auto it = name_to_value_map.find(field_name);
if (!it)
{
/// It is used in CSV and TSV input formats. If we fail to find given string in
/// enum names, we will try to treat it as enum id.
if (try_treat_as_id)
{
FieldType x;
ReadBufferFromMemory tmp_buf(field_name.data, field_name.size);
readText(x, tmp_buf);
/// Check if we reached end of the tmp_buf (otherwise field_name is not a number)
/// and try to find it in enum ids
if (tmp_buf.eof() && value_to_name_map.find(x) != value_to_name_map.end())
return x;
}
throw Exception{"Unknown element '" + field_name.toString() + "' for type " + getName(), ErrorCodes::BAD_ARGUMENTS};
}
return it->getMapped();
}
FieldType readValue(ReadBuffer & istr) const
{
FieldType x;
readText(x, istr);
return findByValue(x)->first;
return this->findByValue(x)->first;
}
Field castToName(const Field & value_or_name) const override;
Field castToValue(const Field & value_or_name) const override;
void serializeBinary(const Field & field, WriteBuffer & ostr) const override;
void deserializeBinary(Field & field, ReadBuffer & istr) const override;
void serializeBinary(const IColumn & column, size_t row_num, WriteBuffer & ostr) const override;
void deserializeBinary(IColumn & column, ReadBuffer & istr) const override;
void serializeText(const IColumn & column, size_t row_num, WriteBuffer & ostr, const FormatSettings &) const override;
void serializeTextEscaped(const IColumn & column, size_t row_num, WriteBuffer & ostr, const FormatSettings &) const override;
void deserializeTextEscaped(IColumn & column, ReadBuffer & istr, const FormatSettings &) const override;
void serializeTextQuoted(const IColumn & column, size_t row_num, WriteBuffer & ostr, const FormatSettings &) const override;
void deserializeTextQuoted(IColumn & column, ReadBuffer & istr, const FormatSettings &) const override;
void deserializeWholeText(IColumn & column, ReadBuffer & istr, const FormatSettings &) const override;
void serializeTextJSON(const IColumn & column, size_t row_num, WriteBuffer & ostr, const FormatSettings &) const override;
void deserializeTextJSON(IColumn & column, ReadBuffer & istr, const FormatSettings &) const override;
void serializeTextXML(const IColumn & column, size_t row_num, WriteBuffer & ostr, const FormatSettings &) const override;
void serializeTextCSV(const IColumn & column, size_t row_num, WriteBuffer & ostr, const FormatSettings &) const override;
void deserializeTextCSV(IColumn & column, ReadBuffer & istr, const FormatSettings & settings) const override;
void serializeBinaryBulk(const IColumn & column, WriteBuffer & ostr, const size_t offset, size_t limit) const override;
void deserializeBinaryBulk(IColumn & column, ReadBuffer & istr, const size_t limit, const double avg_value_size_hint) const override;
MutableColumnPtr createColumn() const override { return ColumnType::create(); }
Field getDefault() const override;
......@@ -147,6 +77,8 @@ public:
/// Enum('a' = 1, 'b' = 2) -> Enum('c' = 1, 'b' = 2, 'd' = 3) OK
/// Enum('a' = 1, 'b' = 2) -> Enum('a' = 2, 'b' = 1) NOT OK
bool contains(const IDataType & rhs) const;
SerializationPtr doGetDefaultSerialization() const override;
};
......
......@@ -3,7 +3,7 @@
#include <DataTypes/IDataType.h>
#include <Parsers/IAST_fwd.h>
#include <Common/IFactoryWithAliases.h>
#include <DataTypes/DataTypeCustom_fwd.h>
#include <DataTypes/DataTypeCustom.h>
#include <functional>
......@@ -86,6 +86,5 @@ void registerDataTypeLowCardinality(DataTypeFactory & factory);
void registerDataTypeDomainIPv4AndIPv6(DataTypeFactory & factory);
void registerDataTypeDomainSimpleAggregateFunction(DataTypeFactory & factory);
void registerDataTypeDomainGeo(DataTypeFactory & factory);
void registerDataTypeOneElementTuple(DataTypeFactory & factory);
}
......@@ -4,6 +4,7 @@
#include <Formats/FormatSettings.h>
#include <DataTypes/DataTypeFixedString.h>
#include <DataTypes/DataTypeFactory.h>
#include <DataTypes/Serializations/SerializationFixedString.h>
#include <IO/WriteBuffer.h>
#include <IO/ReadHelpers.h>
......@@ -22,10 +23,8 @@ namespace DB
namespace ErrorCodes
{
extern const int CANNOT_READ_ALL_DATA;
extern const int NUMBER_OF_ARGUMENTS_DOESNT_MATCH;
extern const int UNEXPECTED_AST_STRUCTURE;
extern const int TOO_LARGE_STRING_SIZE;
}
......@@ -34,184 +33,6 @@ std::string DataTypeFixedString::doGetName() const
return "FixedString(" + toString(n) + ")";
}
void DataTypeFixedString::serializeBinary(const Field & field, WriteBuffer & ostr) const
{
const String & s = get<const String &>(field);
ostr.write(s.data(), std::min(s.size(), n));
if (s.size() < n)
for (size_t i = s.size(); i < n; ++i)
ostr.write(0);
}
void DataTypeFixedString::deserializeBinary(Field & field, ReadBuffer & istr) const
{
field = String();
String & s = get<String &>(field);
s.resize(n);
istr.readStrict(s.data(), n);
}
void DataTypeFixedString::serializeBinary(const IColumn & column, size_t row_num, WriteBuffer & ostr) const
{
ostr.write(reinterpret_cast<const char *>(&assert_cast<const ColumnFixedString &>(column).getChars()[n * row_num]), n);
}
void DataTypeFixedString::deserializeBinary(IColumn & column, ReadBuffer & istr) const
{
ColumnFixedString::Chars & data = assert_cast<ColumnFixedString &>(column).getChars();
size_t old_size = data.size();
data.resize(old_size + n);
try
{
istr.readStrict(reinterpret_cast<char *>(data.data() + old_size), n);
}
catch (...)
{
data.resize_assume_reserved(old_size);
throw;
}
}
void DataTypeFixedString::serializeBinaryBulk(const IColumn & column, WriteBuffer & ostr, size_t offset, size_t limit) const
{
const ColumnFixedString::Chars & data = typeid_cast<const ColumnFixedString &>(column).getChars();
size_t size = data.size() / n;
if (limit == 0 || offset + limit > size)
limit = size - offset;
if (limit)
ostr.write(reinterpret_cast<const char *>(&data[n * offset]), n * limit);
}
void DataTypeFixedString::deserializeBinaryBulk(IColumn & column, ReadBuffer & istr, size_t limit, double /*avg_value_size_hint*/) const
{
ColumnFixedString::Chars & data = typeid_cast<ColumnFixedString &>(column).getChars();
size_t initial_size = data.size();
size_t max_bytes = limit * n;
data.resize(initial_size + max_bytes);
size_t read_bytes = istr.readBig(reinterpret_cast<char *>(&data[initial_size]), max_bytes);
if (read_bytes % n != 0)
throw Exception("Cannot read all data of type FixedString. Bytes read:" + toString(read_bytes) + ". String size:" + toString(n) + ".",
ErrorCodes::CANNOT_READ_ALL_DATA);
data.resize(initial_size + read_bytes);
}
void DataTypeFixedString::serializeText(const IColumn & column, size_t row_num, WriteBuffer & ostr, const FormatSettings &) const
{
writeString(reinterpret_cast<const char *>(&assert_cast<const ColumnFixedString &>(column).getChars()[n * row_num]), n, ostr);
}
void DataTypeFixedString::serializeTextEscaped(const IColumn & column, size_t row_num, WriteBuffer & ostr, const FormatSettings &) const
{
const char * pos = reinterpret_cast<const char *>(&assert_cast<const ColumnFixedString &>(column).getChars()[n * row_num]);
writeAnyEscapedString<'\''>(pos, pos + n, ostr);
}
void DataTypeFixedString::alignStringLength(PaddedPODArray<UInt8> & chars, size_t old_size) const
{
size_t length = chars.size() - old_size;
if (length < n)
{
chars.resize_fill(old_size + n);
}
else if (length > n)
{
chars.resize_assume_reserved(old_size);
throw Exception("Too large value for FixedString(" + std::to_string(n) + ")", ErrorCodes::TOO_LARGE_STRING_SIZE);
}
}
template <typename Reader>
static inline void read(const DataTypeFixedString & self, IColumn & column, Reader && reader)
{
ColumnFixedString::Chars & data = typeid_cast<ColumnFixedString &>(column).getChars();
size_t prev_size = data.size();
try
{
reader(data);
self.alignStringLength(data, prev_size);
}
catch (...)
{
data.resize_assume_reserved(prev_size);
throw;
}
}
void DataTypeFixedString::deserializeTextEscaped(IColumn & column, ReadBuffer & istr, const FormatSettings &) const
{
read(*this, column, [&istr](ColumnFixedString::Chars & data) { readEscapedStringInto(data, istr); });
}
void DataTypeFixedString::serializeTextQuoted(const IColumn & column, size_t row_num, WriteBuffer & ostr, const FormatSettings &) const
{
const char * pos = reinterpret_cast<const char *>(&assert_cast<const ColumnFixedString &>(column).getChars()[n * row_num]);
writeAnyQuotedString<'\''>(pos, pos + n, ostr);
}
void DataTypeFixedString::deserializeTextQuoted(IColumn & column, ReadBuffer & istr, const FormatSettings &) const
{
read(*this, column, [&istr](ColumnFixedString::Chars & data) { readQuotedStringInto<true>(data, istr); });
}
void DataTypeFixedString::deserializeWholeText(IColumn & column, ReadBuffer & istr, const FormatSettings &) const
{
read(*this, column, [&istr](ColumnFixedString::Chars & data) { readStringInto(data, istr); });
}
void DataTypeFixedString::serializeTextJSON(const IColumn & column, size_t row_num, WriteBuffer & ostr, const FormatSettings & settings) const
{
const char * pos = reinterpret_cast<const char *>(&assert_cast<const ColumnFixedString &>(column).getChars()[n * row_num]);
writeJSONString(pos, pos + n, ostr, settings);
}
void DataTypeFixedString::deserializeTextJSON(IColumn & column, ReadBuffer & istr, const FormatSettings &) const
{
read(*this, column, [&istr](ColumnFixedString::Chars & data) { readJSONStringInto(data, istr); });
}
void DataTypeFixedString::serializeTextXML(const IColumn & column, size_t row_num, WriteBuffer & ostr, const FormatSettings &) const
{
const char * pos = reinterpret_cast<const char *>(&assert_cast<const ColumnFixedString &>(column).getChars()[n * row_num]);
writeXMLStringForTextElement(pos, pos + n, ostr);
}
void DataTypeFixedString::serializeTextCSV(const IColumn & column, size_t row_num, WriteBuffer & ostr, const FormatSettings &) const
{
const char * pos = reinterpret_cast<const char *>(&assert_cast<const ColumnFixedString &>(column).getChars()[n * row_num]);
writeCSVString(pos, pos + n, ostr);
}
void DataTypeFixedString::deserializeTextCSV(IColumn & column, ReadBuffer & istr, const FormatSettings & settings) const
{
read(*this, column, [&istr, &csv = settings.csv](ColumnFixedString::Chars & data) { readCSVStringInto(data, istr, csv); });
}
MutableColumnPtr DataTypeFixedString::createColumn() const
{
return ColumnFixedString::create(n);
......@@ -227,6 +48,11 @@ bool DataTypeFixedString::equals(const IDataType & rhs) const
return typeid(rhs) == typeid(*this) && n == static_cast<const DataTypeFixedString &>(rhs).n;
}
SerializationPtr DataTypeFixedString::doGetDefaultSerialization() const
{
return std::make_shared<SerializationFixedString>(n);
}
static DataTypePtr create(const ASTPtr & arguments)
{
......
......@@ -41,38 +41,14 @@ public:
return n;
}
void serializeBinary(const Field & field, WriteBuffer & ostr) const override;
void deserializeBinary(Field & field, ReadBuffer & istr) const override;
void serializeBinary(const IColumn & column, size_t row_num, WriteBuffer & ostr) const override;
void deserializeBinary(IColumn & column, ReadBuffer & istr) const override;
void serializeBinaryBulk(const IColumn & column, WriteBuffer & ostr, size_t offset, size_t limit) const override;
void deserializeBinaryBulk(IColumn & column, ReadBuffer & istr, size_t limit, double avg_value_size_hint) const override;
void serializeText(const IColumn & column, size_t row_num, WriteBuffer & ostr, const FormatSettings &) const override;
void deserializeWholeText(IColumn & column, ReadBuffer & istr, const FormatSettings &) const override;
void serializeTextEscaped(const IColumn & column, size_t row_num, WriteBuffer & ostr, const FormatSettings &) const override;
void deserializeTextEscaped(IColumn & column, ReadBuffer & istr, const FormatSettings &) const override;
void serializeTextQuoted(const IColumn & column, size_t row_num, WriteBuffer & ostr, const FormatSettings &) const override;
void deserializeTextQuoted(IColumn & column, ReadBuffer & istr, const FormatSettings &) const override;
void serializeTextJSON(const IColumn & column, size_t row_num, WriteBuffer & ostr, const FormatSettings &) const override;
void deserializeTextJSON(IColumn & column, ReadBuffer & istr, const FormatSettings &) const override;
void serializeTextXML(const IColumn & column, size_t row_num, WriteBuffer & ostr, const FormatSettings &) const override;
void serializeTextCSV(const IColumn & column, size_t row_num, WriteBuffer & ostr, const FormatSettings &) const override;
void deserializeTextCSV(IColumn & column, ReadBuffer & istr, const FormatSettings &) const override;
MutableColumnPtr createColumn() const override;
Field getDefault() const override;
bool equals(const IDataType & rhs) const override;
SerializationPtr doGetDefaultSerialization() const override;
bool isParametric() const override { return true; }
bool haveSubtypes() const override { return false; }
bool isComparable() const override { return true; }
......
......@@ -24,50 +24,6 @@ public:
const char * getFamilyName() const override { return "LowCardinality"; }
TypeIndex getTypeId() const override { return TypeIndex::LowCardinality; }
void enumerateStreamsImpl(const StreamCallback & callback, SubstreamPath & path) const override;
void serializeBinaryBulkStatePrefixImpl(
SerializeBinaryBulkSettings & settings,
SerializeBinaryBulkStatePtr & state) const override;
void serializeBinaryBulkStateSuffixImpl(
SerializeBinaryBulkSettings & settings,
SerializeBinaryBulkStatePtr & state) const override;
void deserializeBinaryBulkStatePrefixImpl(
DeserializeBinaryBulkSettings & settings,
DeserializeBinaryBulkStatePtr & state) const override;
void serializeBinaryBulkWithMultipleStreamsImpl(
const IColumn & column,
size_t offset,
size_t limit,
SerializeBinaryBulkSettings & settings,
SerializeBinaryBulkStatePtr & state) const override;
void deserializeBinaryBulkWithMultipleStreamsImpl(
IColumn & column,
size_t limit,
DeserializeBinaryBulkSettings & settings,
DeserializeBinaryBulkStatePtr & state,
SubstreamsCache * cache) const override;
void serializeBinary(const Field & field, WriteBuffer & ostr) const override;
void deserializeBinary(Field & field, ReadBuffer & istr) const override;
void serializeBinary(const IColumn & column, size_t row_num, WriteBuffer & ostr) const override;
void deserializeBinary(IColumn & column, ReadBuffer & istr) const override;
void serializeTextEscaped(const IColumn & column, size_t row_num, WriteBuffer & ostr, const FormatSettings & settings) const override;
void deserializeTextEscaped(IColumn & column, ReadBuffer & istr, const FormatSettings & settings) const override;
void serializeTextQuoted(const IColumn & column, size_t row_num, WriteBuffer & ostr, const FormatSettings & settings) const override;
void deserializeTextQuoted(IColumn & column, ReadBuffer & istr, const FormatSettings & settings) const override;
void deserializeWholeText(IColumn & column, ReadBuffer & istr, const FormatSettings & settings) const override;
void serializeTextCSV(const IColumn & column, size_t row_num, WriteBuffer & ostr, const FormatSettings & settings) const override;
void deserializeTextCSV(IColumn & column, ReadBuffer & istr, const FormatSettings & settings) const override;
void serializeText(const IColumn & column, size_t row_num, WriteBuffer & ostr, const FormatSettings & settings) const override;
void serializeTextJSON(const IColumn & column, size_t row_num, WriteBuffer & ostr, const FormatSettings & settings) const override;
void deserializeTextJSON(IColumn & column, ReadBuffer & istr, const FormatSettings & settings) const override;
void serializeTextXML(const IColumn & column, size_t row_num, WriteBuffer & ostr, const FormatSettings & settings) const override;
MutableColumnPtr createColumn() const override;
Field getDefault() const override;
......@@ -100,6 +56,7 @@ public:
static MutableColumnUniquePtr createColumnUnique(const IDataType & keys_type, MutableColumnPtr && keys);
private:
SerializationPtr doGetDefaultSerialization() const override;
template <typename ... Params>
using SerializeFunctionPtr = void (IDataType::*)(const IColumn &, size_t, Params ...) const;
......
......@@ -7,6 +7,7 @@
#include <DataTypes/DataTypeArray.h>
#include <DataTypes/DataTypeTuple.h>
#include <DataTypes/DataTypeFactory.h>
#include <DataTypes/Serializations/SerializationMap.h>
#include <Parsers/IAST.h>
#include <Parsers/ASTNameTypePair.h>
#include <Common/typeid_cast.h>
......@@ -28,7 +29,6 @@ namespace DB
namespace ErrorCodes
{
extern const int NUMBER_OF_ARGUMENTS_DOESNT_MATCH;
extern const int CANNOT_READ_MAP_FROM_TEXT;
extern const int BAD_ARGUMENTS;
}
......@@ -75,11 +75,6 @@ static const IColumn & extractNestedColumn(const IColumn & column)
return assert_cast<const ColumnMap &>(column).getNestedColumn();
}
static IColumn & extractNestedColumn(IColumn & column)
{
return assert_cast<ColumnMap &>(column).getNestedColumn();
}
DataTypePtr DataTypeMap::tryGetSubcolumnType(const String & subcolumn_name) const
{
return nested->tryGetSubcolumnType(subcolumn_name);
......@@ -90,265 +85,10 @@ ColumnPtr DataTypeMap::getSubcolumn(const String & subcolumn_name, const IColumn
return nested->getSubcolumn(subcolumn_name, extractNestedColumn(column));
}
void DataTypeMap::serializeBinary(const Field & field, WriteBuffer & ostr) const
{
const auto & map = get<const Map &>(field);
writeVarUInt(map.size(), ostr);
for (const auto & elem : map)
{
const auto & tuple = elem.safeGet<const Tuple>();
assert(tuple.size() == 2);
key_type->serializeBinary(tuple[0], ostr);
value_type->serializeBinary(tuple[1], ostr);
}
}
void DataTypeMap::deserializeBinary(Field & field, ReadBuffer & istr) const
{
size_t size;
readVarUInt(size, istr);
field = Map(size);
for (auto & elem : field.get<Map &>())
{
Tuple tuple(2);
key_type->deserializeBinary(tuple[0], istr);
value_type->deserializeBinary(tuple[1], istr);
elem = std::move(tuple);
}
}
void DataTypeMap::serializeBinary(const IColumn & column, size_t row_num, WriteBuffer & ostr) const
{
nested->serializeBinary(extractNestedColumn(column), row_num, ostr);
}
void DataTypeMap::deserializeBinary(IColumn & column, ReadBuffer & istr) const
{
nested->deserializeBinary(extractNestedColumn(column), istr);
}
template <typename Writer>
void DataTypeMap::serializeTextImpl(const IColumn & column, size_t row_num, WriteBuffer & ostr, Writer && writer) const
{
const auto & column_map = assert_cast<const ColumnMap &>(column);
const auto & nested_array = column_map.getNestedColumn();
const auto & nested_tuple = column_map.getNestedData();
const auto & offsets = nested_array.getOffsets();
size_t offset = offsets[row_num - 1];
size_t next_offset = offsets[row_num];
writeChar('{', ostr);
for (size_t i = offset; i < next_offset; ++i)
{
if (i != offset)
writeChar(',', ostr);
writer(key_type, nested_tuple.getColumn(0), i);
writeChar(':', ostr);
writer(value_type, nested_tuple.getColumn(1), i);
}
writeChar('}', ostr);
}
template <typename Reader>
void DataTypeMap::deserializeTextImpl(IColumn & column, ReadBuffer & istr, bool need_safe_get_int_key, Reader && reader) const
{
auto & column_map = assert_cast<ColumnMap &>(column);
auto & nested_array = column_map.getNestedColumn();
auto & nested_tuple = column_map.getNestedData();
auto & offsets = nested_array.getOffsets();
auto & key_column = nested_tuple.getColumn(0);
auto & value_column = nested_tuple.getColumn(1);
size_t size = 0;
assertChar('{', istr);
try
{
bool first = true;
while (!istr.eof() && *istr.position() != '}')
{
if (!first)
{
if (*istr.position() == ',')
++istr.position();
else
throw Exception("Cannot read Map from text", ErrorCodes::CANNOT_READ_MAP_FROM_TEXT);
}
first = false;
skipWhitespaceIfAny(istr);
if (*istr.position() == '}')
break;
if (need_safe_get_int_key)
{
ReadBuffer::Position tmp = istr.position();
while (*tmp != ':' && *tmp != '}')
++tmp;
*tmp = ' ';
reader(key_type, key_column);
}
else
{
reader(key_type, key_column);
skipWhitespaceIfAny(istr);
assertChar(':', istr);
}
++size;
skipWhitespaceIfAny(istr);
reader(value_type, value_column);
skipWhitespaceIfAny(istr);
}
offsets.push_back(offsets.back() + size);
assertChar('}', istr);
}
catch (...)
{
throw;
}
}
void DataTypeMap::serializeText(const IColumn & column, size_t row_num, WriteBuffer & ostr, const FormatSettings & settings) const
{
serializeTextImpl(column, row_num, ostr,
[&](const DataTypePtr & subcolumn_type, const IColumn & subcolumn, size_t pos)
{
subcolumn_type->serializeAsTextQuoted(subcolumn, pos, ostr, settings);
});
}
void DataTypeMap::deserializeText(IColumn & column, ReadBuffer & istr, const FormatSettings & settings) const
{
// need_safe_get_int_key is set for Integer to prevent to readIntTextUnsafe
bool need_safe_get_int_key = isInteger(key_type);
deserializeTextImpl(column, istr, need_safe_get_int_key,
[&](const DataTypePtr & subcolumn_type, IColumn & subcolumn)
{
subcolumn_type->deserializeAsTextQuoted(subcolumn, istr, settings);
});
}
void DataTypeMap::serializeTextJSON(const IColumn & column, size_t row_num, WriteBuffer & ostr, const FormatSettings & settings) const
{
serializeTextImpl(column, row_num, ostr,
[&](const DataTypePtr & subcolumn_type, const IColumn & subcolumn, size_t pos)
{
subcolumn_type->serializeAsTextJSON(subcolumn, pos, ostr, settings);
});
}
void DataTypeMap::deserializeTextJSON(IColumn & column, ReadBuffer & istr, const FormatSettings & settings) const
{
// need_safe_get_int_key is set for Integer to prevent to readIntTextUnsafe
bool need_safe_get_int_key = isInteger(key_type);
deserializeTextImpl(column, istr, need_safe_get_int_key,
[&](const DataTypePtr & subcolumn_type, IColumn & subcolumn)
{
subcolumn_type->deserializeAsTextJSON(subcolumn, istr, settings);
});
}
void DataTypeMap::serializeTextXML(const IColumn & column, size_t row_num, WriteBuffer & ostr, const FormatSettings & settings) const
{
const auto & column_map = assert_cast<const ColumnMap &>(column);
const auto & offsets = column_map.getNestedColumn().getOffsets();
size_t offset = offsets[row_num - 1];
size_t next_offset = offsets[row_num];
const auto & nested_data = column_map.getNestedData();
writeCString("<map>", ostr);
for (size_t i = offset; i < next_offset; ++i)
{
writeCString("<elem>", ostr);
writeCString("<key>", ostr);
key_type->serializeAsTextXML(nested_data.getColumn(0), i, ostr, settings);
writeCString("</key>", ostr);
writeCString("<value>", ostr);
value_type->serializeAsTextXML(nested_data.getColumn(1), i, ostr, settings);
writeCString("</value>", ostr);
writeCString("</elem>", ostr);
}
writeCString("</map>", ostr);
}
void DataTypeMap::serializeTextCSV(const IColumn & column, size_t row_num, WriteBuffer & ostr, const FormatSettings & settings) const
{
WriteBufferFromOwnString wb;
serializeText(column, row_num, wb, settings);
writeCSV(wb.str(), ostr);
}
void DataTypeMap::deserializeTextCSV(IColumn & column, ReadBuffer & istr, const FormatSettings & settings) const
{
String s;
readCSV(s, istr, settings.csv);
ReadBufferFromString rb(s);
deserializeText(column, rb, settings);
}
void DataTypeMap::enumerateStreamsImpl(const StreamCallback & callback, SubstreamPath & path) const
{
nested->enumerateStreams(callback, path);
}
void DataTypeMap::serializeBinaryBulkStatePrefixImpl(
SerializeBinaryBulkSettings & settings,
SerializeBinaryBulkStatePtr & state) const
{
nested->serializeBinaryBulkStatePrefix(settings, state);
}
void DataTypeMap::serializeBinaryBulkStateSuffixImpl(
SerializeBinaryBulkSettings & settings,
SerializeBinaryBulkStatePtr & state) const
{
nested->serializeBinaryBulkStateSuffix(settings, state);
}
void DataTypeMap::deserializeBinaryBulkStatePrefixImpl(
DeserializeBinaryBulkSettings & settings,
DeserializeBinaryBulkStatePtr & state) const
SerializationPtr DataTypeMap::getSubcolumnSerialization(
const String & subcolumn_name, const BaseSerializationGetter & base_serialization_getter) const
{
nested->deserializeBinaryBulkStatePrefix(settings, state);
}
void DataTypeMap::serializeBinaryBulkWithMultipleStreamsImpl(
const IColumn & column,
size_t offset,
size_t limit,
SerializeBinaryBulkSettings & settings,
SerializeBinaryBulkStatePtr & state) const
{
nested->serializeBinaryBulkWithMultipleStreams(extractNestedColumn(column), offset, limit, settings, state);
}
void DataTypeMap::deserializeBinaryBulkWithMultipleStreamsImpl(
IColumn & column,
size_t limit,
DeserializeBinaryBulkSettings & settings,
DeserializeBinaryBulkStatePtr & state,
SubstreamsCache * cache) const
{
auto & column_map = assert_cast<ColumnMap &>(column);
nested->deserializeBinaryBulkWithMultipleStreams(column_map.getNestedColumnPtr(), limit, settings, state, cache);
return nested->getSubcolumnSerialization(subcolumn_name, base_serialization_getter);
}
MutableColumnPtr DataTypeMap::createColumn() const
......@@ -361,6 +101,14 @@ Field DataTypeMap::getDefault() const
return Map();
}
SerializationPtr DataTypeMap::doGetDefaultSerialization() const
{
return std::make_shared<SerializationMap>(
key_type->getDefaultSerialization(),
value_type->getDefaultSerialization(),
nested->getDefaultSerialization());
}
bool DataTypeMap::equals(const IDataType & rhs) const
{
if (typeid(rhs) != typeid(*this))
......
#pragma once
#include <DataTypes/DataTypeWithSimpleSerialization.h>
#include <DataTypes/IDataType.h>
namespace DB
......@@ -11,7 +11,7 @@ namespace DB
* Serialization of type 'Map(K, V)' is similar to serialization.
* of 'Array(Tuple(keys K, values V))' or in other words of 'Nested(keys K, valuev V)'.
*/
class DataTypeMap final : public DataTypeWithSimpleSerialization
class DataTypeMap final : public IDataType
{
private:
DataTypePtr key_type;
......@@ -34,47 +34,8 @@ public:
DataTypePtr tryGetSubcolumnType(const String & subcolumn_name) const override;
ColumnPtr getSubcolumn(const String & subcolumn_name, const IColumn & column) const override;
void serializeBinary(const Field & field, WriteBuffer & ostr) const override;
void deserializeBinary(Field & field, ReadBuffer & istr) const override;
void serializeBinary(const IColumn & column, size_t row_num, WriteBuffer & ostr) const override;
void deserializeBinary(IColumn & column, ReadBuffer & istr) const override;
void serializeText(const IColumn & column, size_t row_num, WriteBuffer & ostr, const FormatSettings &) const override;
void deserializeText(IColumn & column, ReadBuffer & istr, const FormatSettings &) const override;
void serializeTextJSON(const IColumn & column, size_t row_num, WriteBuffer & ostr, const FormatSettings &) const override;
void deserializeTextJSON(IColumn & column, ReadBuffer & istr, const FormatSettings &) const override;
void serializeTextXML(const IColumn & column, size_t row_num, WriteBuffer & ostr, const FormatSettings &) const override;
void serializeTextCSV(const IColumn & column, size_t row_num, WriteBuffer & ostr, const FormatSettings &) const override;
void deserializeTextCSV(IColumn & column, ReadBuffer & istr, const FormatSettings &) const override;
void enumerateStreamsImpl(const StreamCallback & callback, SubstreamPath & path) const override;
void serializeBinaryBulkStatePrefixImpl(
SerializeBinaryBulkSettings & settings,
SerializeBinaryBulkStatePtr & state) const override;
void serializeBinaryBulkStateSuffixImpl(
SerializeBinaryBulkSettings & settings,
SerializeBinaryBulkStatePtr & state) const override;
void deserializeBinaryBulkStatePrefixImpl(
DeserializeBinaryBulkSettings & settings,
DeserializeBinaryBulkStatePtr & state) const override;
void serializeBinaryBulkWithMultipleStreamsImpl(
const IColumn & column,
size_t offset,
size_t limit,
SerializeBinaryBulkSettings & settings,
SerializeBinaryBulkStatePtr & state) const override;
void deserializeBinaryBulkWithMultipleStreamsImpl(
IColumn & column,
size_t limit,
DeserializeBinaryBulkSettings & settings,
DeserializeBinaryBulkStatePtr & state,
SubstreamsCache * cache) const override;
SerializationPtr getSubcolumnSerialization(
const String & subcolumn_name, const BaseSerializationGetter & base_serialization_getter) const override;
MutableColumnPtr createColumn() const override;
......@@ -88,16 +49,11 @@ public:
const DataTypePtr & getKeyType() const { return key_type; }
const DataTypePtr & getValueType() const { return value_type; }
DataTypes getKeyValueTypes() const { return {key_type, value_type}; }
const DataTypePtr & getNestedType() const { return nested; }
private:
template <typename Writer>
void serializeTextImpl(const IColumn & column, size_t row_num, WriteBuffer & ostr, Writer && writer) const;
template <typename Reader>
void deserializeTextImpl(IColumn & column, ReadBuffer & istr, bool need_safe_get_int_key, Reader && reader) const;
SerializationPtr doGetDefaultSerialization() const override;
private:
void assertKeyType() const;
};
......
......@@ -57,7 +57,7 @@ static std::pair<DataTypePtr, DataTypeCustomDescPtr> create(const ASTPtr & argum
auto data_type = std::make_shared<DataTypeArray>(std::make_shared<DataTypeTuple>(nested_types, nested_names));
auto custom_name = std::make_unique<DataTypeNestedCustomName>(nested_types, nested_names);
return std::make_pair(std::move(data_type), std::make_unique<DataTypeCustomDesc>(std::move(custom_name), nullptr));
return std::make_pair(std::move(data_type), std::make_unique<DataTypeCustomDesc>(std::move(custom_name)));
}
void registerDataTypeNested(DataTypeFactory & factory)
......
#pragma once
#include <DataTypes/DataTypeWithSimpleSerialization.h>
#include <DataTypes/DataTypeCustom.h>
#include <DataTypes/IDataType.h>
namespace DB
......
#include <Common/typeid_cast.h>
#include <DataTypes/DataTypeNothing.h>
#include <DataTypes/Serializations/SerializationNothing.h>
#include <DataTypes/DataTypeFactory.h>
#include <Columns/ColumnNothing.h>
#include <IO/ReadBuffer.h>
......@@ -14,25 +15,14 @@ MutableColumnPtr DataTypeNothing::createColumn() const
return ColumnNothing::create(0);
}
void DataTypeNothing::serializeBinaryBulk(const IColumn & column, WriteBuffer & ostr, size_t offset, size_t limit) const
{
size_t size = column.size();
if (limit == 0 || offset + limit > size)
limit = size - offset;
for (size_t i = 0; i < limit; ++i)
ostr.write('0');
}
void DataTypeNothing::deserializeBinaryBulk(IColumn & column, ReadBuffer & istr, size_t limit, double /*avg_value_size_hint*/) const
bool DataTypeNothing::equals(const IDataType & rhs) const
{
typeid_cast<ColumnNothing &>(column).addSize(istr.tryIgnore(limit));
return typeid(rhs) == typeid(*this);
}
bool DataTypeNothing::equals(const IDataType & rhs) const
SerializationPtr DataTypeNothing::doGetDefaultSerialization() const
{
return typeid(rhs) == typeid(*this);
return std::make_shared<SerializationNothing>();
}
......
......@@ -20,10 +20,6 @@ public:
MutableColumnPtr createColumn() const override;
/// These methods read and write zero bytes just to allow to figure out size of column.
void serializeBinaryBulk(const IColumn & column, WriteBuffer & ostr, size_t offset, size_t limit) const override;
void deserializeBinaryBulk(IColumn & column, ReadBuffer & istr, size_t limit, double avg_value_size_hint) const override;
bool equals(const IDataType & rhs) const override;
bool isParametric() const override { return false; }
......@@ -31,6 +27,8 @@ public:
bool haveMaximumSizeOfValue() const override { return true; }
size_t getSizeOfValueInMemory() const override { return 0; }
bool canBeInsideNullable() const override { return true; }
SerializationPtr doGetDefaultSerialization() const override;
};
}
此差异已折叠。
......@@ -18,61 +18,6 @@ public:
const char * getFamilyName() const override { return "Nullable"; }
TypeIndex getTypeId() const override { return TypeIndex::Nullable; }
void enumerateStreamsImpl(const StreamCallback & callback, SubstreamPath & path) const override;
void serializeBinaryBulkStatePrefixImpl(
SerializeBinaryBulkSettings & settings,
SerializeBinaryBulkStatePtr & state) const override;
void serializeBinaryBulkStateSuffixImpl(
SerializeBinaryBulkSettings & settings,
SerializeBinaryBulkStatePtr & state) const override;
void deserializeBinaryBulkStatePrefixImpl(
DeserializeBinaryBulkSettings & settings,
DeserializeBinaryBulkStatePtr & state) const override;
void serializeBinaryBulkWithMultipleStreamsImpl(
const IColumn & column,
size_t offset,
size_t limit,
SerializeBinaryBulkSettings & settings,
SerializeBinaryBulkStatePtr & state) const override;
void deserializeBinaryBulkWithMultipleStreamsImpl(
IColumn & column,
size_t limit,
DeserializeBinaryBulkSettings & settings,
DeserializeBinaryBulkStatePtr & state,
SubstreamsCache * cache) const override;
void serializeBinary(const Field & field, WriteBuffer & ostr) const override;
void deserializeBinary(Field & field, ReadBuffer & istr) const override;
void serializeBinary(const IColumn & column, size_t row_num, WriteBuffer & ostr) const override;
void deserializeBinary(IColumn & column, ReadBuffer & istr) const override;
void serializeTextEscaped(const IColumn & column, size_t row_num, WriteBuffer & ostr, const FormatSettings &) const override;
void deserializeTextEscaped(IColumn & column, ReadBuffer & istr, const FormatSettings &) const override;
void serializeTextQuoted(const IColumn & column, size_t row_num, WriteBuffer & ostr, const FormatSettings &) const override;
void deserializeTextQuoted(IColumn & column, ReadBuffer & istr, const FormatSettings &) const override;
void deserializeWholeText(IColumn & column, ReadBuffer & istr, const FormatSettings &) const override;
void serializeTextCSV(const IColumn & column, size_t row_num, WriteBuffer & ostr, const FormatSettings &) const override;
/** It is questionable, how NULL values could be represented in CSV. There are three variants:
* 1. \N
* 2. empty string (without quotes)
* 3. NULL
* We support all of them (however, second variant is supported by CSVRowInputStream, not by deserializeTextCSV).
* (see also input_format_defaults_for_omitted_fields and input_format_csv_unquoted_null_literal_as_null settings)
* In CSV, non-NULL string value, starting with \N characters, must be placed in quotes, to avoid ambiguity.
*/
void deserializeTextCSV(IColumn & column, ReadBuffer & istr, const FormatSettings & settings) const override;
void serializeTextJSON(const IColumn & column, size_t row_num, WriteBuffer & ostr, const FormatSettings &) const override;
void deserializeTextJSON(IColumn & column, ReadBuffer & istr, const FormatSettings &) const override;
void serializeText(const IColumn & column, size_t row_num, WriteBuffer & ostr, const FormatSettings &) const override;
void serializeTextXML(const IColumn & column, size_t row_num, WriteBuffer & ostr, const FormatSettings &) const override;
MutableColumnPtr createColumn() const override;
Field getDefault() const override;
......@@ -95,25 +40,16 @@ public:
size_t getSizeOfValueInMemory() const override;
bool onlyNull() const override;
bool canBeInsideLowCardinality() const override { return nested_data_type->canBeInsideLowCardinality(); }
DataTypePtr tryGetSubcolumnType(const String & subcolumn_name) const override;
ColumnPtr getSubcolumn(const String & subcolumn_name, const IColumn & column) const override;
SerializationPtr getSubcolumnSerialization(
const String & subcolumn_name, const BaseSerializationGetter & base_serialization_getter) const override;
const DataTypePtr & getNestedType() const { return nested_data_type; }
/// If ReturnType is bool, check for NULL and deserialize value into non-nullable column (and return true) or insert default value of nested type (and return false)
/// If ReturnType is void, deserialize Nullable(T)
template <typename ReturnType = bool>
static ReturnType deserializeWholeText(IColumn & column, ReadBuffer & istr, const FormatSettings & settings, const DataTypePtr & nested);
template <typename ReturnType = bool>
static ReturnType deserializeTextEscaped(IColumn & column, ReadBuffer & istr, const FormatSettings & settings, const DataTypePtr & nested);
template <typename ReturnType = bool>
static ReturnType deserializeTextQuoted(IColumn & column, ReadBuffer & istr, const FormatSettings &, const DataTypePtr & nested);
template <typename ReturnType = bool>
static ReturnType deserializeTextCSV(IColumn & column, ReadBuffer & istr, const FormatSettings & settings, const DataTypePtr & nested);
template <typename ReturnType = bool>
static ReturnType deserializeTextJSON(IColumn & column, ReadBuffer & istr, const FormatSettings &, const DataTypePtr & nested);
private:
SerializationPtr doGetDefaultSerialization() const override;
DataTypePtr nested_data_type;
};
......
......@@ -2,7 +2,7 @@
#include <Common/UInt128.h>
#include <DataTypes/IDataType.h>
#include <DataTypes/DataTypeWithSimpleSerialization.h>
#include <DataTypes/Serializations/SerializationNumber.h>
namespace DB
......@@ -14,7 +14,7 @@ class ColumnVector;
/** Implements part of the IDataType interface, common to all numbers and for Date and DateTime.
*/
template <typename T>
class DataTypeNumberBase : public DataTypeWithSimpleSerialization
class DataTypeNumberBase : public IDataType
{
static_assert(IsNumber<T>);
......@@ -30,21 +30,8 @@ public:
const char * getFamilyName() const override { return family_name; }
TypeIndex getTypeId() const override { return type_id; }
void serializeText(const IColumn & column, size_t row_num, WriteBuffer & ostr, const FormatSettings &) const override;
void deserializeText(IColumn & column, ReadBuffer & istr, const FormatSettings & settings) const override;
void serializeTextJSON(const IColumn & column, size_t row_num, WriteBuffer & ostr, const FormatSettings & settings) const override;
void deserializeTextJSON(IColumn & column, ReadBuffer & istr, const FormatSettings &) const override;
void deserializeTextCSV(IColumn & column, ReadBuffer & istr, const FormatSettings & settings) const override;
Field getDefault() const override;
/** Format is platform-dependent. */
void serializeBinary(const Field & field, WriteBuffer & ostr) const override;
void deserializeBinary(Field & field, ReadBuffer & istr) const override;
void serializeBinary(const IColumn & column, size_t row_num, WriteBuffer & ostr) const override;
void deserializeBinary(IColumn & column, ReadBuffer & istr) const override;
void serializeBinaryBulk(const IColumn & column, WriteBuffer & ostr, size_t offset, size_t limit) const override;
void deserializeBinaryBulk(IColumn & column, ReadBuffer & istr, size_t limit, double avg_value_size_hint) const override;
MutableColumnPtr createColumn() const override;
bool isParametric() const override { return false; }
......@@ -53,7 +40,7 @@ public:
bool shouldAlignRightInPrettyFormats() const override
{
/// Just a number, without customizations. Counterexample: IPv4.
return !custom_text_serialization;
return !custom_serialization;
}
bool textCanContainOnlyValidUTF8() const override { return true; }
......@@ -66,6 +53,8 @@ public:
size_t getSizeOfValueInMemory() const override { return sizeof(T); }
bool isCategorial() const override { return isValueRepresentedByInteger(); }
bool canBeInsideLowCardinality() const override { return true; }
SerializationPtr doGetDefaultSerialization() const override { return std::make_shared<SerializationNumber<T>>(); }
};
/// Prevent implicit template instantiation of DataTypeNumberBase for common numeric types
......
#include <DataTypes/DataTypeOneElementTuple.h>
#include <DataTypes/DataTypeFactory.h>
#include <DataTypes/DataTypeCustom.h>
#include <IO/WriteBufferFromString.h>
#include <IO/Operators.h>
#include <Common/quoteString.h>
#include <Parsers/ASTNameTypePair.h>
#include <Columns/IColumn.h>
namespace DB
{
namespace
{
/** Custom substreams representation for single subcolumn.
* It serializes/deserializes column as a nested type, but in that way
* if it was a named tuple with one element and a given name.
*/
class DataTypeOneElementTupleStreams : public IDataTypeCustomStreams
{
private:
DataTypePtr nested;
String name;
bool escape_delimiter;
public:
DataTypeOneElementTupleStreams(const DataTypePtr & nested_, const String & name_, bool escape_delimiter_)
: nested(nested_), name(name_), escape_delimiter(escape_delimiter_) {}
void enumerateStreams(
const IDataType::StreamCallback & callback,
IDataType::SubstreamPath & path) const override
{
addToPath(path);
nested->enumerateStreams(callback, path);
path.pop_back();
}
void serializeBinaryBulkStatePrefix(
IDataType:: SerializeBinaryBulkSettings & settings,
IDataType::SerializeBinaryBulkStatePtr & state) const override
{
addToPath(settings.path);
nested->serializeBinaryBulkStatePrefix(settings, state);
settings.path.pop_back();
}
void serializeBinaryBulkStateSuffix(
IDataType::SerializeBinaryBulkSettings & settings,
IDataType::SerializeBinaryBulkStatePtr & state) const override
{
addToPath(settings.path);
nested->serializeBinaryBulkStateSuffix(settings, state);
settings.path.pop_back();
}
void deserializeBinaryBulkStatePrefix(
IDataType::DeserializeBinaryBulkSettings & settings,
IDataType::DeserializeBinaryBulkStatePtr & state) const override
{
addToPath(settings.path);
nested->deserializeBinaryBulkStatePrefix(settings, state);
settings.path.pop_back();
}
void serializeBinaryBulkWithMultipleStreams(
const IColumn & column,
size_t offset,
size_t limit,
IDataType::SerializeBinaryBulkSettings & settings,
IDataType::SerializeBinaryBulkStatePtr & state) const override
{
addToPath(settings.path);
nested->serializeBinaryBulkWithMultipleStreams(column, offset, limit, settings, state);
settings.path.pop_back();
}
void deserializeBinaryBulkWithMultipleStreams(
ColumnPtr & column,
size_t limit,
IDataType::DeserializeBinaryBulkSettings & settings,
IDataType::DeserializeBinaryBulkStatePtr & state,
IDataType::SubstreamsCache * cache) const override
{
addToPath(settings.path);
nested->deserializeBinaryBulkWithMultipleStreams(column, limit, settings, state, cache);
settings.path.pop_back();
}
private:
void addToPath(IDataType::SubstreamPath & path) const
{
path.push_back(IDataType::Substream::TupleElement);
path.back().tuple_element_name = name;
path.back().escape_tuple_delimiter = escape_delimiter;
}
};
}
DataTypePtr createOneElementTuple(const DataTypePtr & type, const String & name, bool escape_delimiter)
{
auto custom_desc = std::make_unique<DataTypeCustomDesc>(
std::make_unique<DataTypeCustomFixedName>(type->getName()),nullptr,
std::make_unique<DataTypeOneElementTupleStreams>(type, name, escape_delimiter));
return DataTypeFactory::instance().getCustom(std::move(custom_desc));
}
}
#pragma once
#include <DataTypes/IDataType.h>
namespace DB
{
DataTypePtr createOneElementTuple(const DataTypePtr & type, const String & name, bool escape_delimiter = true);
}
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册