未验证 提交 eedb5a3e 编写于 作者: A alexey-milovidov 提交者: GitHub

Merge pull request #10218 from ClickHouse/fix-generate-random-date

Fixed "generateRandom" function for Date type
......@@ -133,7 +133,10 @@ DateLUTImpl::DateLUTImpl(const std::string & time_zone_)
}
/// Fill lookup table for years and months.
for (size_t day = 0; day < DATE_LUT_SIZE && lut[day].year <= DATE_LUT_MAX_YEAR; ++day)
size_t year_months_lut_index = 0;
size_t first_day_of_last_month = 0;
for (size_t day = 0; day < DATE_LUT_SIZE; ++day)
{
const Values & values = lut[day];
......@@ -141,7 +144,16 @@ DateLUTImpl::DateLUTImpl(const std::string & time_zone_)
{
if (values.month == 1)
years_lut[values.year - DATE_LUT_MIN_YEAR] = day;
years_months_lut[(values.year - DATE_LUT_MIN_YEAR) * 12 + values.month - 1] = day;
year_months_lut_index = (values.year - DATE_LUT_MIN_YEAR) * 12 + values.month - 1;
years_months_lut[year_months_lut_index] = day;
first_day_of_last_month = day;
}
}
/// Fill the rest of lookup table with the same last month (2106-02-01).
for (; year_months_lut_index < DATE_LUT_YEARS * 12; ++year_months_lut_index)
{
years_months_lut[year_months_lut_index] = first_day_of_last_month;
}
}
......@@ -12,7 +12,7 @@
/// Table size is bigger than DATE_LUT_MAX_DAY_NUM to fill all indices within UInt16 range: this allows to remove extra check.
#define DATE_LUT_SIZE 0x10000
#define DATE_LUT_MIN_YEAR 1970
#define DATE_LUT_MAX_YEAR 2105 /// Last supported year
#define DATE_LUT_MAX_YEAR 2106 /// Last supported year (incomplete)
#define DATE_LUT_YEARS (1 + DATE_LUT_MAX_YEAR - DATE_LUT_MIN_YEAR) /// Number of years in lookup table
#if defined(__PPC__)
......
......@@ -210,8 +210,8 @@ MergeTreeData::MutableDataPartPtr MergeTreeDataWriter::writeTempPart(BlockWithPa
const auto & date_lut = DateLUT::instance();
DayNum min_month = date_lut.toFirstDayNumOfMonth(DayNum(min_date));
DayNum max_month = date_lut.toFirstDayNumOfMonth(DayNum(max_date));
auto min_month = date_lut.toNumYYYYMM(min_date);
auto max_month = date_lut.toNumYYYYMM(max_date);
if (min_month != max_month)
throw Exception("Logical error: part spans more than one month.", ErrorCodes::LOGICAL_ERROR);
......
......@@ -120,8 +120,8 @@ void MergeTreePartInfo::parseMinMaxDatesFromPartName(const String & part_name, D
min_date = date_lut.YYYYMMDDToDayNum(min_yyyymmdd);
max_date = date_lut.YYYYMMDDToDayNum(max_yyyymmdd);
DayNum min_month = date_lut.toFirstDayNumOfMonth(min_date);
DayNum max_month = date_lut.toFirstDayNumOfMonth(max_date);
auto min_month = date_lut.toNumYYYYMM(min_date);
auto max_month = date_lut.toNumYYYYMM(max_date);
if (min_month != max_month)
throw Exception("Part name " + part_name + " contains different months", ErrorCodes::BAD_DATA_PART_NAME);
......
......@@ -14,6 +14,7 @@
#include <DataTypes/DataTypeArray.h>
#include <DataTypes/DataTypeString.h>
#include <DataTypes/DataTypeFixedString.h>
#include <DataTypes/NestedUtils.h>
#include <Columns/ColumnArray.h>
#include <Columns/ColumnFixedString.h>
#include <Columns/ColumnString.h>
......@@ -57,7 +58,12 @@ void fillBufferWithRandomData(char * __restrict data, size_t size, pcg64 & rng)
ColumnPtr fillColumnWithRandomData(
const DataTypePtr type, UInt64 limit, UInt64 max_array_length, UInt64 max_string_length, pcg64 & rng, const Context & context)
const DataTypePtr type,
UInt64 limit,
UInt64 max_array_length,
UInt64 max_string_length,
pcg64 & rng,
const Context & context)
{
TypeIndex idx = type->getTypeId();
......@@ -205,7 +211,10 @@ ColumnPtr fillColumnWithRandomData(
{
auto column = ColumnUInt16::create();
column->getData().resize(limit);
fillBufferWithRandomData(reinterpret_cast<char *>(column->getData().data()), limit * sizeof(UInt16), rng);
for (size_t i = 0; i < limit; ++i)
column->getData()[i] = rng() % (DATE_LUT_MAX_DAY_NUM + 1); /// Slow
return column;
}
case TypeIndex::UInt32: [[fallthrough]];
......@@ -337,14 +346,24 @@ public:
protected:
Chunk generate() override
{
/// To support Nested types, we will collect them to single Array of Tuple.
auto names_and_types = Nested::collect(block_header.getNamesAndTypesList());
Columns columns;
columns.reserve(block_header.columns());
DataTypes types = block_header.getDataTypes();
columns.reserve(names_and_types.size());
for (const auto & type : types)
columns.emplace_back(fillColumnWithRandomData(type, block_size, max_array_length, max_string_length, rng, context));
Block compact_block;
for (const auto & elem : names_and_types)
{
compact_block.insert(
{
fillColumnWithRandomData(elem.type, block_size, max_array_length, max_string_length, rng, context),
elem.type,
elem.name
});
}
return {std::move(columns), block_size};
return {Nested::flatten(compact_block).getColumns(), block_size};
}
private:
......
UInt64 Int64 UInt32 Int32 UInt16 Int16 UInt8 Int8
2804162938822577320 -2776833771540858 3467776823 1163715250 23903 13655 137 -41
7885388429666205427 -1363628932535403038 484159052 -308788249 56810 -22227 51 -41
4357435422797280898 1355609803008819271 4126129912 -852056475 64304 -11401 139 86
5935810273536892891 -804738887697332962 3109335413 -80126721 258 12889 18 88
368066018677693974 -4927165984347126295 1015254922 2026080544 44305 21973 16 0
8124171311239967992 -1179703908046100129 1720727300 -138469036 61343 10573 252 -32
15657812979985370729 -5733276247123822513 3254757884 -500590428 45913 19153 105 -102
18371568619324220532 -6793779541583578394 1686821450 -455892108 49050 -28603 248 80
821735343441964030 3148260644406230976 256251035 -885069056 58858 -29361 58 61
9558594037060121162 -2907172753635797124 4276198376 1947296644 26801 -13531 204 -66
2804162938822577320 -2776833771540858 3467776823 1163715250 31161 -2916 220 -117
7885388429666205427 -1363628932535403038 484159052 -308788249 43346 13638 143 -105
4357435422797280898 1355609803008819271 4126129912 -852056475 34184 9166 49 33
5935810273536892891 -804738887697332962 3109335413 -80126721 47877 -31421 186 -77
368066018677693974 -4927165984347126295 1015254922 2026080544 46037 -29626 240 108
8124171311239967992 -1179703908046100129 1720727300 -138469036 33028 -12819 138 16
15657812979985370729 -5733276247123822513 3254757884 -500590428 3829 30527 3 -81
18371568619324220532 -6793779541583578394 1686821450 -455892108 43475 2284 252 -90
821735343441964030 3148260644406230976 256251035 -885069056 11643 11455 176 90
9558594037060121162 -2907172753635797124 4276198376 1947296644 45922 26632 97 43
-
Enum8(\'hello\' = 1, \'world\' = 5)
hello
......@@ -47,16 +47,16 @@ h
o
-
Date DateTime DateTime(\'Europe/Moscow\')
2106-02-07 2050-12-17 02:46:35 2096-02-16 22:18:22
2106-02-07 2013-10-17 23:35:26 1976-01-24 12:52:48
2039-08-16 1974-11-17 23:22:46 1980-03-04 21:02:50
1997-04-11 1972-09-18 23:44:08 2040-07-10 14:46:42
2103-11-03 2044-11-23 20:57:12 1970-10-09 02:30:14
2066-11-19 2029-12-10 03:13:55 2106-01-30 21:52:44
2064-08-14 2016-07-14 11:33:45 2096-12-12 00:40:50
2046-09-13 2085-07-10 18:51:14 2096-01-15 16:31:33
2008-03-16 2047-05-16 23:28:36 2103-02-11 16:44:39
2000-07-07 2105-07-19 19:29:06 1980-01-02 05:18:22
2077-09-17 1970-10-09 02:30:14 2074-08-12 11:31:27
2005-11-19 2106-01-30 21:52:44 2097-05-25 07:54:35
2007-02-24 2096-12-12 00:40:50 1988-08-10 11:16:31
2019-06-30 2096-01-15 16:31:33 2063-10-20 08:48:17
2039-01-16 2103-02-11 16:44:39 2036-10-09 04:29:10
1994-11-03 1980-01-02 05:18:22 2055-12-23 12:33:52
2083-08-20 2079-06-11 16:29:02 2000-12-05 17:46:24
2030-06-25 2100-03-01 18:50:22 1993-03-25 01:19:12
2087-03-16 2034-08-25 19:46:33 2045-12-10 16:47:40
2006-04-30 2069-09-30 16:07:48 2084-08-26 03:33:12
-
DateTime64(3) DateTime64(6) DateTime64(6, \'Europe/Moscow\')
1978-06-07 23:50:57.320 2013-08-28 10:21:54.010758 1991-08-25 16:23:26.140215
......@@ -225,14 +225,14 @@ RL,{Xs\\tw
[114] -84125.1554 ('2023-06-06 06:55:06.492','bf9ab359-ef9f-ad11-7e6c-160368b1e5ea')
[124] -114719.5228 ('2010-11-11 22:57:23.722','c1046ffb-3415-cc3a-509a-e0005856d7d7')
-
[] 1900051923 { -189530.5846 h -5.6279699579452485e47 ('1984-12-06','2028-08-17 06:05:01','2036-04-02 23:52:28.468','4b3d498c-dd44-95c1-5b75-921504ec5d8d') F743
[-102,-118] 392272782 Eb -14818.0200 o -2.664492247169164e59 ('2082-12-26','2052-09-09 06:50:50','2088-04-21 05:07:08.245','aeb9c26e-0ee7-2b8e-802b-2a96319b8e60') CBF4
[-71] 775049089 \N -158115.1178 w 4.1323844687113747e-305 ('2106-02-07','2090-07-31 16:45:26','2076-07-10 09:11:06.385','57c69bc6-dddd-0975-e932-a7b5173a1304') EB1D
[-28,100] 3675466147 { -146685.1749 h 3.6676044396877755e142 ('2017-10-25','2100-02-28 18:07:18','2055-10-14 06:36:20.056','14949dae-dfa8-a124-af83-887348b2f609') 6D88
[-23] 2514120753 (`u, -119659.6174 w 1.3231258347475906e34 ('2106-02-07','2074-08-10 06:25:12','1976-12-04 18:31:55.745','86a9b3c1-4593-4d56-7762-3aa1dd22cbbf') AD43
[11,-36] 3308237300 \N 171205.1896 \N 5.634708707075817e195 ('1974-10-31','1993-12-24 09:38:45','2038-07-15 05:22:51.805','63d999b8-8cca-e237-c4a4-4dd7d0096f65') 609E
[39] 1614362420 `4A8P 157144.0630 o -1.1843143253872814e-255 ('2106-02-07','2072-09-28 18:27:27','2073-07-10 12:19:58.146','6483f5c0-8733-364c-4fa0-9948d32e8903') A886
[48,-120] 3848918261 1<Lu3 91487.2852 h -1.9300793134783347e263 ('2050-12-04','2076-04-05 09:33:05','2103-12-13 23:48:44.066','e522b794-b8fa-3f11-003b-3b6b088ff941') 556E
[55] 3047524030 li&lF 93462.3661 h 2.8979254388809897e54 ('1976-01-10','1987-07-14 00:25:51','2021-11-19 04:44:08.986','486e5b26-5fe8-fe3e-12ef-09aee40643e0') 9E75
[100,-42] 3999367674 -112975.9852 h 2.658098863752086e-160 ('2081-05-13','2071-08-07 13:34:33','1980-11-11 12:00:44.669','9754e8ac-5145-befb-63d9-a12dd1cf1f3a') DF63
[] 1900051923 { -189530.5846 h -5.6279699579452485e47 ('1980-08-29','2090-10-31 19:35:45','2038-07-15 05:22:51.805','63d9a12d-d1cf-1f3a-57c6-9bc6dddd0975') 8502
[-102,-118] 392272782 Eb -14818.0200 o -2.664492247169164e59 ('2059-02-10','1994-07-16 00:40:02','2034-02-02 05:30:44.960','4fa09948-d32e-8903-63df-43ad759e43f7') DA61
[-71] 775049089 \N -158115.1178 w 4.1323844687113747e-305 ('1997-02-15','2062-08-12 23:41:53','2074-02-13 10:29:40.749','c4a44dd7-d009-6f65-1494-9daedfa8a124') 83A7
[-28,100] 3675466147 { -146685.1749 h 3.6676044396877755e142 ('1997-10-26','2002-06-26 03:33:41','2002-12-02 05:46:03.455','98714b2c-65e7-b5cb-a040-421e260c6d8d') 4B94
[-23] 2514120753 (`u, -119659.6174 w 1.3231258347475906e34 ('2055-11-20','2080-03-28 08:11:25','2073-07-10 12:19:58.146','003b3b6b-088f-f941-aeb9-c26e0ee72b8e') 6B1F
[11,-36] 3308237300 \N 171205.1896 \N 5.634708707075817e195 ('2009-03-18','2041-11-11 13:19:44','2044-03-18 17:34:17.814','9e60f4cb-6e55-1deb-5ac4-d66a86a8886d') 1964
[39] 1614362420 `4A8P 157144.0630 o -1.1843143253872814e-255 ('1991-04-27','2066-03-02 11:07:49','1997-10-22 20:14:13.755','97685503-2609-d2b9-981c-02fd75d106cb') A35B
[48,-120] 3848918261 1<Lu3 91487.2852 h -1.9300793134783347e263 ('1983-09-27','2031-08-07 11:29:42','2085-03-11 15:17:17.374','af838873-48b2-f609-6483-f5c08733364c') 8E7C
[55] 3047524030 li&lF 93462.3661 h 2.8979254388809897e54 ('2011-08-01','2058-07-14 01:47:30','2055-10-14 06:36:20.056','e932a7b5-173a-1304-e522-b794b8fa3f11') 35D9
[100,-42] 3999367674 -112975.9852 h 2.658098863752086e-160 ('2025-12-13','2029-04-21 12:27:59','2068-08-27 07:49:41.825','802b2a96-319b-8e60-63d9-99b88ccae237') 074E
-
DROP TABLE IF EXISTS mass_table_117;
CREATE TABLE mass_table_117 (`dt` Date, `site_id` Int32, `site_key` String) ENGINE = MergeTree(dt, (site_id, site_key, dt), 8192);
INSERT INTO mass_table_117 SELECT * FROM generateRandom('`dt` Date,`site_id` Int32,`site_key` String', 1, 10, 2) LIMIT 100;
SELECT count(), sum(cityHash64(*)) FROM mass_table_117;
DROP TABLE mass_table_117;
DROP TABLE IF EXISTS mt;
CREATE TABLE mt (d Date, x UInt8) ENGINE = MergeTree(d, x, 8192);
INSERT INTO mt VALUES (52392, 1), (62677, 2);
DROP TABLE mt;
Q1 2106-02-07 Hello
Q2 0000-00-00 World
Q1 2106-02-07 Hello
Q2 0000-00-00 World
DROP TABLE IF EXISTS mt;
CREATE TABLE mt (d Date, x String) ENGINE = MergeTree(d, x, 8192);
INSERT INTO mt VALUES ('2106-02-07', 'Hello'), ('1970-01-01', 'World');
SELECT 'Q1', * FROM mt WHERE d = '2106-02-07';
SELECT 'Q2', * FROM mt WHERE d = '1970-01-01';
DETACH TABLE mt;
ATTACH TABLE mt;
SELECT 'Q1', * FROM mt WHERE d = '2106-02-07';
SELECT 'Q2', * FROM mt WHERE d = '1970-01-01';
DROP TABLE mt;
100 12366141706519416319
109 2990700419202507835
DROP TABLE IF EXISTS mass_table_312;
CREATE TABLE mass_table_312 (d Date DEFAULT '2000-01-01', x UInt64, n Nested(a String, b String)) ENGINE = MergeTree(d, x, 1);
INSERT INTO mass_table_312 SELECT * FROM generateRandom('`d` Date,`x` UInt64,`n.a` Array(String),`n.b` Array(String)', 1, 10, 2) LIMIT 100;
SELECT count(), sum(cityHash64(*)) FROM mass_table_312;
SELECT count(), sum(cityHash64(*)) FROM mass_table_312 ARRAY JOIN n;
DROP TABLE mass_table_312;
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册