提交 c48f892e 编写于 作者: A alesapin

Merge with master

......@@ -358,11 +358,8 @@ struct Settings : public SettingsCollection<Settings>
M(SettingBool, enable_unaligned_array_join, false, "Allow ARRAY JOIN with multiple arrays that have different sizes. When this settings is enabled, arrays will be resized to the longest one.", 0) \
M(SettingBool, optimize_read_in_order, true, "Enable ORDER BY optimization for reading data in corresponding order in MergeTree tables.", 0) \
M(SettingBool, low_cardinality_allow_in_native_format, true, "Use LowCardinality type in Native format. Otherwise, convert LowCardinality columns to ordinary for select query, and convert ordinary columns to required LowCardinality for insert query.", 0) \
M(SettingBool, allow_experimental_multiple_joins_emulation, true, "Emulate multiple joins using subselects", 0) \
M(SettingBool, allow_experimental_cross_to_join_conversion, true, "Convert CROSS JOIN to INNER JOIN if possible", 0) \
M(SettingBool, cancel_http_readonly_queries_on_client_close, false, "Cancel HTTP readonly queries when a client closes the connection without waiting for response.", 0) \
M(SettingBool, external_table_functions_use_nulls, true, "If it is set to true, external table functions will implicitly use Nullable type if needed. Otherwise NULLs will be substituted with default values. Currently supported only by 'mysql' and 'odbc' table functions.", 0) \
M(SettingBool, allow_experimental_data_skipping_indices, false, "If it is set to true, data skipping indices can be used in CREATE TABLE/ALTER TABLE queries.", 0) \
\
M(SettingBool, experimental_use_processors, false, "Use processors pipeline.", 0) \
\
......@@ -390,11 +387,13 @@ struct Settings : public SettingsCollection<Settings>
/** Obsolete settings that do nothing but left for compatibility reasons. Remove each one after half a year of obsolescence. */ \
\
M(SettingBool, allow_experimental_low_cardinality_type, true, "Obsolete setting, does nothing. Will be removed after 2019-08-13", 0) \
M(SettingBool, compile, false, "Obsolete setting, does nothing. Will be removed after 2020-03-13", 0) \
M(SettingBool, compile, false, "Whether query compilation is enabled. Will be removed after 2020-03-13", 0) \
M(SettingUInt64, min_count_to_compile, 0, "Obsolete setting, does nothing. Will be removed after 2020-03-13", 0) \
M(SettingBool, allow_experimental_multiple_joins_emulation, true, "Obsolete setting, does nothing. Will be removed after 2020-05-31", 0) \
M(SettingBool, allow_experimental_cross_to_join_conversion, true, "Obsolete setting, does nothing. Will be removed after 2020-05-31", 0) \
M(SettingBool, allow_experimental_data_skipping_indices, true, "Obsolete setting, does nothing. Will be removed after 2020-05-31", 0) \
M(SettingBool, merge_tree_uniform_read_distribution, true, "Obsolete setting, does nothing. Will be removed after 2020-05-20", 0) \
DECLARE_SETTINGS_COLLECTION(LIST_OF_SETTINGS)
/** Set multiple settings from "profile" (in server configuration file (users.xml), profiles contain groups of multiple settings).
......
......@@ -261,6 +261,10 @@ void PointInPolygonWithGrid<CoordinateType>::buildGrid()
for (size_t row = 0; row < grid_size; ++row)
{
#pragma GCC diagnostic push
#if !__clang__
#pragma GCC diagnostic ignored "-Wmaybe-uninitialized"
#endif
CoordinateType y_min = min_corner.y() + row * cell_height;
CoordinateType y_max = min_corner.y() + (row + 1) * cell_height;
......@@ -268,6 +272,7 @@ void PointInPolygonWithGrid<CoordinateType>::buildGrid()
{
CoordinateType x_min = min_corner.x() + col * cell_width;
CoordinateType x_max = min_corner.x() + (col + 1) * cell_width;
#pragma GCC diagnostic pop
Box cell_box(Point(x_min, y_min), Point(x_max, y_max));
Polygon cell_bound;
......
......@@ -85,8 +85,16 @@ struct BloomFilterHash
throw Exception("Unexpected type " + data_type->getName() + " of bloom filter index.", ErrorCodes::LOGICAL_ERROR);
const auto & offsets = array_col->getOffsets();
size_t offset = (pos == 0) ? 0 : offsets[pos - 1];
limit = std::max(array_col->getData().size() - offset, limit);
limit = offsets[pos + limit - 1] - offsets[pos - 1]; /// PaddedPODArray allows access on index -1.
pos = offsets[pos - 1];
if (limit == 0)
{
auto index_column = ColumnUInt64::create(1);
ColumnUInt64::Container & index_column_vec = index_column->getData();
index_column_vec[0] = 0;
return index_column;
}
}
const ColumnPtr actual_col = BloomFilter::getPrimitiveColumn(column);
......
......@@ -242,17 +242,11 @@ InterpreterSelectQuery::InterpreterSelectQuery(
throw Exception("Too deep subqueries. Maximum: " + settings.max_subquery_depth.toString(),
ErrorCodes::TOO_DEEP_SUBQUERIES);
if (settings.allow_experimental_cross_to_join_conversion)
{
CrossToInnerJoinVisitor::Data cross_to_inner;
CrossToInnerJoinVisitor(cross_to_inner).visit(query_ptr);
}
CrossToInnerJoinVisitor::Data cross_to_inner;
CrossToInnerJoinVisitor(cross_to_inner).visit(query_ptr);
if (settings.allow_experimental_multiple_joins_emulation)
{
JoinToSubqueryTransformVisitor::Data join_to_subs_data{*context};
JoinToSubqueryTransformVisitor(join_to_subs_data).visit(query_ptr);
}
JoinToSubqueryTransformVisitor::Data join_to_subs_data{*context};
JoinToSubqueryTransformVisitor(join_to_subs_data).visit(query_ptr);
max_streams = settings.max_threads;
auto & query = getSelectQuery();
......
......@@ -239,7 +239,7 @@ static const ASTTablesInSelectQueryElement * getFirstTableJoin(const ASTSelectQu
if (!joined_table)
joined_table = &tables_element;
else
throw Exception("Multiple JOIN disabled or does not support the query.", ErrorCodes::NOT_IMPLEMENTED);
throw Exception("Multiple JOIN does not support the query.", ErrorCodes::NOT_IMPLEMENTED);
}
}
......
......@@ -429,21 +429,30 @@ IProcessor::Status SortingAggregatedTransform::prepare()
continue;
}
all_finished = false;
//all_finished = false;
in->setNeeded();
if (!in->hasData())
{
need_data = true;
all_finished = false;
continue;
}
auto chunk = in->pull();
/// If chunk was pulled, then we need data from this port.
need_data = true;
addChunk(std::move(chunk), input_num);
if (in->isFinished())
{
is_input_finished[input_num] = true;
}
else
{
/// If chunk was pulled, then we need data from this port.
need_data = true;
all_finished = false;
}
}
if (pushed_to_output)
......
......@@ -140,35 +140,36 @@ void registerStorageJoin(StorageFactory & factory)
{
const String strictness_str = Poco::toLower(*opt_strictness_id);
if (strictness_str == "any" || strictness_str == "\'any\'")
if (strictness_str == "any")
{
if (old_any_join)
strictness = ASTTableJoin::Strictness::RightAny;
else
strictness = ASTTableJoin::Strictness::Any;
}
else if (strictness_str == "all" || strictness_str == "\'all\'")
else if (strictness_str == "all")
strictness = ASTTableJoin::Strictness::All;
else if (strictness_str == "semi" || strictness_str == "\'semi\'")
else if (strictness_str == "semi")
strictness = ASTTableJoin::Strictness::Semi;
else if (strictness_str == "anti" || strictness_str == "\'anti\'")
else if (strictness_str == "anti")
strictness = ASTTableJoin::Strictness::Anti;
}
if (strictness == ASTTableJoin::Strictness::Unspecified)
throw Exception("First parameter of storage Join must be ANY or ALL or SEMI or ANTI.", ErrorCodes::BAD_ARGUMENTS);
throw Exception("First parameter of storage Join must be ANY or ALL or SEMI or ANTI (without quotes).",
ErrorCodes::BAD_ARGUMENTS);
if (auto opt_kind_id = tryGetIdentifierName(engine_args[1]))
{
const String kind_str = Poco::toLower(*opt_kind_id);
if (kind_str == "left" || kind_str == "\'left\'")
if (kind_str == "left")
kind = ASTTableJoin::Kind::Left;
else if (kind_str == "inner" || kind_str == "\'inner\'")
else if (kind_str == "inner")
kind = ASTTableJoin::Kind::Inner;
else if (kind_str == "right" || kind_str == "\'right\'")
else if (kind_str == "right")
kind = ASTTableJoin::Kind::Right;
else if (kind_str == "full" || kind_str == "\'full\'")
else if (kind_str == "full")
{
if (strictness == ASTTableJoin::Strictness::Any)
strictness = ASTTableJoin::Strictness::RightAny;
......@@ -177,7 +178,8 @@ void registerStorageJoin(StorageFactory & factory)
}
if (kind == ASTTableJoin::Kind::Comma)
throw Exception("Second parameter of storage Join must be LEFT or INNER or RIGHT or FULL.", ErrorCodes::BAD_ARGUMENTS);
throw Exception("Second parameter of storage Join must be LEFT or INNER or RIGHT or FULL (without quotes).",
ErrorCodes::BAD_ARGUMENTS);
Names key_names;
key_names.reserve(engine_args.size() - 2);
......
......@@ -61,7 +61,6 @@ def test_dictionary_ddl_on_cluster(started_cluster):
node.query("ALTER TABLE sometbl UPDATE value = 'new_key' WHERE 1")
ch1.query("SYSTEM RELOAD DICTIONARY ON CLUSTER 'cluster' `default.somedict`")
time.sleep(2) # SYSTEM RELOAD DICTIONARY is an asynchronous query
for num, node in enumerate([ch1, ch2, ch3, ch4]):
assert node.query("SELECT dictGetString('default.somedict', 'value', toUInt64({}))".format(num)) == 'new_key' + '\n'
......
<test>
<type>once</type>
<stop_conditions>
<all_of>
<total_time_ms>30000</total_time_ms>
</all_of>
</stop_conditions>
<create_query>DROP TABLE IF EXISTS test_bf</create_query>
<create_query>CREATE TABLE test_bf (`id` int, `ary` Array(String), INDEX idx_ary ary TYPE bloom_filter(0.01) GRANULARITY 8192) ENGINE = MergeTree() ORDER BY id</create_query>
<query>SYSTEM STOP MERGES</query>
<query>INSERT INTO test_bf SELECT number AS id, [CAST(id, 'String'), CAST(id + 1, 'String'), CAST(id + 2, 'String')] FROM system.numbers LIMIT 3000000</query>
<query>SYSTEM START MERGES</query>
<drop_query>DROP TABLE IF EXISTS test_bf</drop_query>
</test>
......@@ -13,8 +13,6 @@ INSERT INTO table2 SELECT number * 2, number * 20 FROM numbers(11);
INSERT INTO table3 SELECT number * 30, number * 300 FROM numbers(10);
INSERT INTO table5 SELECT number * 5, number * 50, number * 500 FROM numbers(10);
SET allow_experimental_multiple_joins_emulation = 1;
select t1.a, t2.b, t3.c from table1 as t1 join table2 as t2 on t1.a = t2.a join table3 as t3 on t2.b = t3.b;
select t1.a, t2.b, t5.c from table1 as t1 join table2 as t2 on t1.a = t2.a join table5 as t5 on t1.a = t5.a AND t2.b = t5.b;
......
......@@ -13,7 +13,6 @@ INSERT INTO table2 SELECT number * 2, number * 20 FROM numbers(11);
INSERT INTO table3 SELECT number * 30, number * 300 FROM numbers(10);
INSERT INTO table5 SELECT number * 5, number * 50, number * 500 FROM numbers(10);
SET allow_experimental_multiple_joins_emulation = 1;
SET joined_subquery_requires_alias = 1;
select t1.a, t2.b, t3.c from table1 as t1 join table2 as t2 on t1.a = t2.a join table3 as t3 on t2.b = t3.b;
......
0 0
0 0
cross
1 1 1 1
1 1 1 2
2 2 2 \N
1 1 1 1
1 1 1 2
2 2 2 \N
cross nullable
1 1 1 1
2 2 1 2
1 1 1 1
2 2 1 2
cross nullable vs not nullable
1 1 1 1
2 2 1 2
1 1 1 1
2 2 1 2
cross self
1 1 1 1
2 2 2 2
1 1 1 1
2 2 2 2
cross one table expr
1 1 1 1
1 1 1 2
......@@ -31,23 +21,12 @@ cross one table expr
2 2 1 2
2 2 2 \N
2 2 3 \N
1 1 1 1
1 1 1 2
1 1 2 \N
1 1 3 \N
2 2 1 1
2 2 1 2
2 2 2 \N
2 2 3 \N
cross multiple ands
1 1 1 1
1 1 1 1
cross and inside and
1 1 1 1
1 1 1 1
cross split conjunction
1 1 1 1
1 1 1 1
comma
1 1 1 1
1 1 1 2
......@@ -56,26 +35,18 @@ comma nullable
1 1 1 1
2 2 1 2
cross
SELECT \n a, \n b, \n t2_00826.a, \n t2_00826.b\nFROM t1_00826\nCROSS JOIN t2_00826\nWHERE a = t2_00826.a
SELECT \n a, \n b, \n t2_00826.a, \n t2_00826.b\nFROM t1_00826\nALL INNER JOIN t2_00826 ON a = t2_00826.a\nWHERE a = t2_00826.a
cross nullable
SELECT \n a, \n b, \n t2_00826.a, \n t2_00826.b\nFROM t1_00826\n, t2_00826\nWHERE a = t2_00826.a
SELECT \n a, \n b, \n t2_00826.a, \n t2_00826.b\nFROM t1_00826\nALL INNER JOIN t2_00826 ON a = t2_00826.a\nWHERE a = t2_00826.a
cross nullable vs not nullable
SELECT \n a, \n b, \n t2_00826.a, \n t2_00826.b\nFROM t1_00826\nCROSS JOIN t2_00826\nWHERE a = t2_00826.b
SELECT \n a, \n b, \n t2_00826.a, \n t2_00826.b\nFROM t1_00826\nALL INNER JOIN t2_00826 ON a = t2_00826.b\nWHERE a = t2_00826.b
cross self
SELECT \n a, \n b, \n y.a, \n y.b\nFROM t1_00826 AS x\nCROSS JOIN t1_00826 AS y\nWHERE (a = y.a) AND (b = y.b)
SELECT \n a, \n b, \n y.a, \n y.b\nFROM t1_00826 AS x\nALL INNER JOIN t1_00826 AS y ON (a = y.a) AND (b = y.b)\nWHERE (a = y.a) AND (b = y.b)
cross one table expr
SELECT \n a, \n b, \n t2_00826.a, \n t2_00826.b\nFROM t1_00826\nCROSS JOIN t2_00826\nWHERE a = b
SELECT \n a, \n b, \n t2_00826.a, \n t2_00826.b\nFROM t1_00826\nCROSS JOIN t2_00826\nWHERE a = b
cross multiple ands
SELECT \n a, \n b, \n t2_00826.a, \n t2_00826.b\nFROM t1_00826\nCROSS JOIN t2_00826\nWHERE (a = t2_00826.a) AND (b = t2_00826.b)
SELECT \n a, \n b, \n t2_00826.a, \n t2_00826.b\nFROM t1_00826\nALL INNER JOIN t2_00826 ON (a = t2_00826.a) AND (b = t2_00826.b)\nWHERE (a = t2_00826.a) AND (b = t2_00826.b)
cross and inside and
SELECT \n a, \n b, \n t2_00826.a, \n t2_00826.b\nFROM t1_00826\nCROSS JOIN t2_00826\nWHERE (a = t2_00826.a) AND ((a = t2_00826.a) AND ((a = t2_00826.a) AND (b = t2_00826.b)))
SELECT \n a, \n b, \n t2_00826.a, \n t2_00826.b\nFROM t1_00826\nALL INNER JOIN t2_00826 ON (a = t2_00826.a) AND (a = t2_00826.a) AND (a = t2_00826.a) AND (b = t2_00826.b)\nWHERE (a = t2_00826.a) AND ((a = t2_00826.a) AND ((a = t2_00826.a) AND (b = t2_00826.b)))
cross split conjunction
SELECT \n a, \n b, \n t2_00826.a, \n t2_00826.b\nFROM t1_00826\nCROSS JOIN t2_00826\nWHERE (a = t2_00826.a) AND (b = t2_00826.b) AND (a >= 1) AND (t2_00826.b > 0)
SELECT \n a, \n b, \n t2_00826.a, \n t2_00826.b\nFROM t1_00826\nALL INNER JOIN t2_00826 ON (a = t2_00826.a) AND (b = t2_00826.b)\nWHERE (a = t2_00826.a) AND (b = t2_00826.b) AND (a >= 1) AND (t2_00826.b > 0)
SET enable_debug_queries = 1;
SET enable_optimize_predicate_expression = 0;
set allow_experimental_cross_to_join_conversion = 0;
select * from system.one l cross join system.one r;
set allow_experimental_cross_to_join_conversion = 1;
select * from system.one l cross join system.one r;
DROP TABLE IF EXISTS t1_00826;
......@@ -17,50 +14,21 @@ INSERT INTO t2_00826 values (1,1), (1,2);
INSERT INTO t2_00826 (a) values (2), (3);
SELECT 'cross';
SET allow_experimental_cross_to_join_conversion = 0;
SELECT * FROM t1_00826 cross join t2_00826 where t1_00826.a = t2_00826.a;
SET allow_experimental_cross_to_join_conversion = 1;
SELECT * FROM t1_00826 cross join t2_00826 where t1_00826.a = t2_00826.a;
SELECT 'cross nullable';
SET allow_experimental_cross_to_join_conversion = 0;
SELECT * FROM t1_00826 cross join t2_00826 where t1_00826.b = t2_00826.b;
SET allow_experimental_cross_to_join_conversion = 1;
SELECT * FROM t1_00826 cross join t2_00826 where t1_00826.b = t2_00826.b;
SELECT 'cross nullable vs not nullable';
SET allow_experimental_cross_to_join_conversion = 0;
SELECT * FROM t1_00826 cross join t2_00826 where t1_00826.a = t2_00826.b;
SET allow_experimental_cross_to_join_conversion = 1;
SELECT * FROM t1_00826 cross join t2_00826 where t1_00826.a = t2_00826.b;
SELECT 'cross self';
SET allow_experimental_cross_to_join_conversion = 0;
SELECT * FROM t1_00826 x cross join t1_00826 y where x.a = y.a and x.b = y.b;
SET allow_experimental_cross_to_join_conversion = 1;
SELECT * FROM t1_00826 x cross join t1_00826 y where x.a = y.a and x.b = y.b;
SELECT 'cross one table expr';
SET allow_experimental_cross_to_join_conversion = 0;
SELECT * FROM t1_00826 cross join t2_00826 where t1_00826.a = t1_00826.b order by (t1_00826.a, t2_00826.a, t2_00826.b);
SET allow_experimental_cross_to_join_conversion = 1;
SELECT * FROM t1_00826 cross join t2_00826 where t1_00826.a = t1_00826.b order by (t1_00826.a, t2_00826.a, t2_00826.b);
SELECT 'cross multiple ands';
SET allow_experimental_cross_to_join_conversion = 0;
--SELECT * FROM t1_00826 cross join t2_00826 where t1_00826.a = t2_00826.a and t1_00826.a = t2_00826.a and t1_00826.b = t2_00826.b and t1_00826.a = t2_00826.a;
SELECT * FROM t1_00826 cross join t2_00826 where t1_00826.a = t2_00826.a and t1_00826.b = t2_00826.b;
SET allow_experimental_cross_to_join_conversion = 1;
SELECT * FROM t1_00826 cross join t2_00826 where t1_00826.a = t2_00826.a and t1_00826.b = t2_00826.b;
SELECT 'cross and inside and';
SET allow_experimental_cross_to_join_conversion = 0;
--SELECT * FROM t1_00826 cross join t2_00826 where t1_00826.a = t2_00826.a and (t1_00826.a = t2_00826.a and (t1_00826.a = t2_00826.a and t1_00826.b = t2_00826.b));
--SELECT * FROM t1_00826 x cross join t2_00826 y where t1_00826.a = t2_00826.a and (t1_00826.b = t2_00826.b and (x.a = y.a and x.b = y.b));
SELECT * FROM t1_00826 cross join t2_00826 where t1_00826.a = t2_00826.a and (t1_00826.b = t2_00826.b and 1);
SET allow_experimental_cross_to_join_conversion = 1;
SELECT * FROM t1_00826 cross join t2_00826 where t1_00826.a = t2_00826.a and (t1_00826.b = t2_00826.b and 1);
SELECT 'cross split conjunction';
SET allow_experimental_cross_to_join_conversion = 0;
SELECT * FROM t1_00826 cross join t2_00826 where t1_00826.a = t2_00826.a and t1_00826.b = t2_00826.b and t1_00826.a >= 1 and t2_00826.b = 1;
SET allow_experimental_cross_to_join_conversion = 1;
SELECT * FROM t1_00826 cross join t2_00826 where t1_00826.a = t2_00826.a and t1_00826.b = t2_00826.b and t1_00826.a >= 1 and t2_00826.b = 1;
SET allow_experimental_cross_to_join_conversion = 1;
SELECT 'comma';
SELECT * FROM t1_00826, t2_00826 where t1_00826.a = t2_00826.a;
......@@ -69,30 +37,22 @@ SELECT * FROM t1_00826, t2_00826 where t1_00826.b = t2_00826.b;
SELECT 'cross';
SET allow_experimental_cross_to_join_conversion = 0; ANALYZE SELECT * FROM t1_00826 cross join t2_00826 where t1_00826.a = t2_00826.a;
SET allow_experimental_cross_to_join_conversion = 1; ANALYZE SELECT * FROM t1_00826 cross join t2_00826 where t1_00826.a = t2_00826.a;
ANALYZE SELECT * FROM t1_00826 cross join t2_00826 where t1_00826.a = t2_00826.a;
SELECT 'cross nullable';
SET allow_experimental_cross_to_join_conversion = 0; ANALYZE SELECT * FROM t1_00826, t2_00826 where t1_00826.a = t2_00826.a;
SET allow_experimental_cross_to_join_conversion = 1; ANALYZE SELECT * FROM t1_00826, t2_00826 where t1_00826.a = t2_00826.a;
ANALYZE SELECT * FROM t1_00826, t2_00826 where t1_00826.a = t2_00826.a;
SELECT 'cross nullable vs not nullable';
SET allow_experimental_cross_to_join_conversion = 0; ANALYZE SELECT * FROM t1_00826 cross join t2_00826 where t1_00826.a = t2_00826.b;
SET allow_experimental_cross_to_join_conversion = 1; ANALYZE SELECT * FROM t1_00826 cross join t2_00826 where t1_00826.a = t2_00826.b;
ANALYZE SELECT * FROM t1_00826 cross join t2_00826 where t1_00826.a = t2_00826.b;
SELECT 'cross self';
SET allow_experimental_cross_to_join_conversion = 0; ANALYZE SELECT * FROM t1_00826 x cross join t1_00826 y where x.a = y.a and x.b = y.b;
SET allow_experimental_cross_to_join_conversion = 1; ANALYZE SELECT * FROM t1_00826 x cross join t1_00826 y where x.a = y.a and x.b = y.b;
ANALYZE SELECT * FROM t1_00826 x cross join t1_00826 y where x.a = y.a and x.b = y.b;
SELECT 'cross one table expr';
SET allow_experimental_cross_to_join_conversion = 0; ANALYZE SELECT * FROM t1_00826 cross join t2_00826 where t1_00826.a = t1_00826.b;
SET allow_experimental_cross_to_join_conversion = 1; ANALYZE SELECT * FROM t1_00826 cross join t2_00826 where t1_00826.a = t1_00826.b;
ANALYZE SELECT * FROM t1_00826 cross join t2_00826 where t1_00826.a = t1_00826.b;
SELECT 'cross multiple ands';
SET allow_experimental_cross_to_join_conversion = 0; ANALYZE SELECT * FROM t1_00826 cross join t2_00826 where t1_00826.a = t2_00826.a and t1_00826.b = t2_00826.b;
SET allow_experimental_cross_to_join_conversion = 1; ANALYZE SELECT * FROM t1_00826 cross join t2_00826 where t1_00826.a = t2_00826.a and t1_00826.b = t2_00826.b;
ANALYZE SELECT * FROM t1_00826 cross join t2_00826 where t1_00826.a = t2_00826.a and t1_00826.b = t2_00826.b;
SELECT 'cross and inside and';
SET allow_experimental_cross_to_join_conversion = 0; ANALYZE SELECT * FROM t1_00826 cross join t2_00826 where t1_00826.a = t2_00826.a and (t1_00826.a = t2_00826.a and (t1_00826.a = t2_00826.a and t1_00826.b = t2_00826.b));
SET allow_experimental_cross_to_join_conversion = 1; ANALYZE SELECT * FROM t1_00826 cross join t2_00826 where t1_00826.a = t2_00826.a and (t1_00826.a = t2_00826.a and (t1_00826.a = t2_00826.a and t1_00826.b = t2_00826.b));
ANALYZE SELECT * FROM t1_00826 cross join t2_00826 where t1_00826.a = t2_00826.a and (t1_00826.a = t2_00826.a and (t1_00826.a = t2_00826.a and t1_00826.b = t2_00826.b));
SELECT 'cross split conjunction';
SET allow_experimental_cross_to_join_conversion = 0; ANALYZE SELECT * FROM t1_00826 cross join t2_00826 where t1_00826.a = t2_00826.a and t1_00826.b = t2_00826.b and t1_00826.a >= 1 and t2_00826.b > 0;
SET allow_experimental_cross_to_join_conversion = 1; ANALYZE SELECT * FROM t1_00826 cross join t2_00826 where t1_00826.a = t2_00826.a and t1_00826.b = t2_00826.b and t1_00826.a >= 1 and t2_00826.b > 0;
ANALYZE SELECT * FROM t1_00826 cross join t2_00826 where t1_00826.a = t2_00826.a and t1_00826.b = t2_00826.b and t1_00826.a >= 1 and t2_00826.b > 0;
DROP TABLE t1_00826;
DROP TABLE t2_00826;
DROP TABLE IF EXISTS minmax_idx;
DROP TABLE IF EXISTS minmax_idx2;
SET allow_experimental_data_skipping_indices = 1;
CREATE TABLE minmax_idx
(
......
......@@ -3,7 +3,6 @@ DROP TABLE IF EXISTS test.minmax_idx_r;
DROP TABLE IF EXISTS test.minmax_idx2;
DROP TABLE IF EXISTS test.minmax_idx2_r;
SET allow_experimental_data_skipping_indices = 1;
CREATE TABLE test.minmax_idx
(
......
#!/usr/bin/env bash
CLICKHOUSE_CLIENT_OPT="--allow_experimental_data_skipping_indices=1"
CURDIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)
. $CURDIR/../shell_config.sh
......@@ -9,7 +8,6 @@ $CLICKHOUSE_CLIENT --query="DROP TABLE IF EXISTS minmax_idx;"
$CLICKHOUSE_CLIENT -n --query="
SET allow_experimental_data_skipping_indices = 1;
CREATE TABLE minmax_idx
(
u64 UInt64,
......
DROP TABLE IF EXISTS minmax_idx1;
DROP TABLE IF EXISTS minmax_idx2;
SET allow_experimental_data_skipping_indices = 1;
CREATE TABLE minmax_idx1
(
......
#!/usr/bin/env bash
CLICKHOUSE_CLIENT_OPT="--allow_experimental_data_skipping_indices=1"
CURDIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)
. $CURDIR/../shell_config.sh
......@@ -8,7 +7,6 @@ CURDIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)
$CLICKHOUSE_CLIENT --query="DROP TABLE IF EXISTS set_idx;"
$CLICKHOUSE_CLIENT -n --query="
SET allow_experimental_data_skipping_indices = 1;
CREATE TABLE set_idx
(
u64 UInt64,
......
SET joined_subquery_requires_alias = 0;
set allow_experimental_multiple_joins_emulation = 0;
set allow_experimental_cross_to_join_conversion = 0;
select ax, c from (select [1,2] ax, 0 c) array join ax join (select 0 c) using(c);
select ax, c from (select [3,4] ax, 0 c) join (select 0 c) using(c) array join ax;
select ax, c from (select [5,6] ax, 0 c) s1 join system.one s2 ON s1.c = s2.dummy array join ax;
set allow_experimental_multiple_joins_emulation = 1;
set allow_experimental_cross_to_join_conversion = 1;
select ax, c from (select [1,2] ax, 0 c) array join ax join (select 0 c) using(c);
select ax, c from (select [3,4] ax, 0 c) join (select 0 c) using(c) array join ax;
select ax, c from (select [5,6] ax, 0 c) s1 join system.one s2 ON s1.c = s2.dummy array join ax;
......
......@@ -6,7 +6,6 @@ CURDIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)
$CLICKHOUSE_CLIENT --query="DROP TABLE IF EXISTS set_idx;"
$CLICKHOUSE_CLIENT -n --query="
SET allow_experimental_data_skipping_indices = 1;
CREATE TABLE set_idx
(
u64 UInt64,
......
SET allow_experimental_data_skipping_indices=1;
drop table if exists nullable_set_index;
create table nullable_set_index (a UInt64, b Nullable(String), INDEX b_index b TYPE set(0) GRANULARITY 8192) engine = MergeTree order by a;
......
SET allow_experimental_data_skipping_indices=1;
drop table if exists null_lc_set_index;
......
......@@ -10,7 +10,6 @@ $CLICKHOUSE_CLIENT --query="DROP TABLE IF EXISTS bloom_filter_idx2;"
# NGRAM BF
$CLICKHOUSE_CLIENT -n --query="
SET allow_experimental_data_skipping_indices = 1;
CREATE TABLE bloom_filter_idx
(
k UInt64,
......@@ -21,7 +20,6 @@ ORDER BY k
SETTINGS index_granularity = 2;"
$CLICKHOUSE_CLIENT -n --query="
SET allow_experimental_data_skipping_indices = 1;
CREATE TABLE bloom_filter_idx2
(
k UInt64,
......@@ -105,7 +103,6 @@ $CLICKHOUSE_CLIENT --query="SELECT * FROM bloom_filter_idx WHERE (s, lower(s)) I
# TOKEN BF
$CLICKHOUSE_CLIENT -n --query="
SET allow_experimental_data_skipping_indices = 1;
CREATE TABLE bloom_filter_idx3
(
k UInt64,
......@@ -144,7 +141,6 @@ $CLICKHOUSE_CLIENT --query="DROP TABLE bloom_filter_idx3"
$CLICKHOUSE_CLIENT --query="DROP TABLE IF EXISTS bloom_filter_idx_na;"
$CLICKHOUSE_CLIENT -n --query="
SET allow_experimental_data_skipping_indices = 1;
CREATE TABLE bloom_filter_idx_na
(
na Array(Array(String)),
......
......@@ -8,7 +8,6 @@ $CLICKHOUSE_CLIENT --query="DROP TABLE IF EXISTS test.minmax_idx;"
$CLICKHOUSE_CLIENT -n --query="
SET allow_experimental_data_skipping_indices=1;
CREATE TABLE test.minmax_idx
(
u64 UInt64,
......
......@@ -35,7 +35,6 @@ $CLICKHOUSE_CLIENT --query="SELECT count() FROM test.minmax_idx WHERE i64 = 2;"
$CLICKHOUSE_CLIENT --query="SELECT count() FROM test.minmax_idx WHERE i64 = 2 FORMAT JSON" | grep "rows_read"
$CLICKHOUSE_CLIENT -n --query="
SET allow_experimental_data_skipping_indices=1;
ALTER TABLE test.minmax_idx ADD INDEX idx (i64, u64 * i64) TYPE minmax GRANULARITY 1;"
$CLICKHOUSE_CLIENT --query="ALTER TABLE test.minmax_idx MATERIALIZE INDEX idx IN PARTITION 1;"
......
......@@ -8,7 +8,6 @@ $CLICKHOUSE_CLIENT --query="DROP TABLE IF EXISTS test.minmax_idx;"
$CLICKHOUSE_CLIENT -n --query="
SET allow_experimental_data_skipping_indices=1;
CREATE TABLE test.minmax_idx
(
u64 UInt64,
......
......@@ -8,5 +8,5 @@ set -e
for sequence in 1 10 100 1000 10000 100000 1000000 10000000 100000000 1000000000; do \
rate=`echo "1 $sequence" | awk '{printf("%0.9f\n",$1/$2)}'`
$CLICKHOUSE_CLIENT --query="DROP TABLE IF EXISTS test.bloom_filter_idx";
$CLICKHOUSE_CLIENT --allow_experimental_data_skipping_indices=1 --query="CREATE TABLE test.bloom_filter_idx ( u64 UInt64, i32 Int32, f64 Float64, d Decimal(10, 2), s String, e Enum8('a' = 1, 'b' = 2, 'c' = 3), dt Date, INDEX bloom_filter_a i32 TYPE bloom_filter($rate) GRANULARITY 1 ) ENGINE = MergeTree() ORDER BY u64 SETTINGS index_granularity = 8192"
$CLICKHOUSE_CLIENT --query="CREATE TABLE test.bloom_filter_idx ( u64 UInt64, i32 Int32, f64 Float64, d Decimal(10, 2), s String, e Enum8('a' = 1, 'b' = 2, 'c' = 3), dt Date, INDEX bloom_filter_a i32 TYPE bloom_filter($rate) GRANULARITY 1 ) ENGINE = MergeTree() ORDER BY u64 SETTINGS index_granularity = 8192"
done
DROP TABLE IF EXISTS min_max_with_nullable_string;
SET allow_experimental_data_skipping_indices = 1;
CREATE TABLE min_max_with_nullable_string (
t DateTime,
......
SET allow_experimental_data_skipping_indices = 1;
DROP TABLE IF EXISTS test.single_column_bloom_filter;
......@@ -246,3 +245,21 @@ SELECT COUNT() FROM test.bloom_filter_array_lc_null_types_test WHERE has(str, '1
SELECT COUNT() FROM test.bloom_filter_array_lc_null_types_test WHERE has(fixed_string, toFixedString('100', 5));
DROP TABLE IF EXISTS test.bloom_filter_array_lc_null_types_test;
DROP TABLE IF EXISTS test.bloom_filter_array_offsets_lc_str;
CREATE TABLE test.bloom_filter_array_offsets_lc_str (order_key int, str Array(LowCardinality((String))), INDEX idx str TYPE bloom_filter(1.01) GRANULARITY 1024) ENGINE = MergeTree() ORDER BY order_key SETTINGS index_granularity = 1024;
INSERT INTO test.bloom_filter_array_offsets_lc_str SELECT number AS i, if(i%2, ['value'], []) FROM system.numbers LIMIT 10000;
SELECT count() FROM test.bloom_filter_array_offsets_lc_str WHERE has(str, 'value');
DROP TABLE IF EXISTS test.bloom_filter_array_offsets_lc_str;
DROP TABLE IF EXISTS test.bloom_filter_array_offsets_str;
CREATE TABLE test.bloom_filter_array_offsets_str (order_key int, str Array(String), INDEX idx str TYPE bloom_filter(1.01) GRANULARITY 1024) ENGINE = MergeTree() ORDER BY order_key SETTINGS index_granularity = 1024;
INSERT INTO test.bloom_filter_array_offsets_str SELECT number AS i, if(i%2, ['value'], []) FROM system.numbers LIMIT 10000;
SELECT count() FROM test.bloom_filter_array_offsets_str WHERE has(str, 'value');
DROP TABLE IF EXISTS test.bloom_filter_array_offsets_str;
DROP TABLE IF EXISTS test.bloom_filter_array_offsets_i;
CREATE TABLE test.bloom_filter_array_offsets_i (order_key int, i Array(int), INDEX idx i TYPE bloom_filter(1.01) GRANULARITY 1024) ENGINE = MergeTree() ORDER BY order_key SETTINGS index_granularity = 1024;
INSERT INTO test.bloom_filter_array_offsets_i SELECT number AS i, if(i%2, [99999], []) FROM system.numbers LIMIT 10000;
SELECT count() FROM test.bloom_filter_array_offsets_i WHERE has(i, 99999);
DROP TABLE IF EXISTS test.bloom_filter_array_offsets_i;
SET send_logs_level = 'none';
SET allow_experimental_data_skipping_indices = 1;
DROP TABLE IF EXISTS mt_with_pk;
......
......@@ -7,7 +7,6 @@ $CLICKHOUSE_CLIENT --query="DROP TABLE IF EXISTS bloom_filter_idx;"
# NGRAM BF
$CLICKHOUSE_CLIENT -n --query="
SET allow_experimental_data_skipping_indices = 1;
CREATE TABLE bloom_filter_idx
(
k UInt64,
......
......@@ -6,7 +6,6 @@ CURDIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)
$CLICKHOUSE_CLIENT --query="DROP TABLE IF EXISTS set_idx;"
$CLICKHOUSE_CLIENT -n --query="
SET allow_experimental_data_skipping_indices = 1;
CREATE TABLE set_idx
(
k UInt64,
......
SET allow_experimental_data_skipping_indices = 1;
DROP TABLE IF EXISTS indexed_table;
......
......@@ -9,7 +9,6 @@ $CLICKHOUSE_CLIENT --query="DROP TABLE IF EXISTS test.indices_mutaions2;"
$CLICKHOUSE_CLIENT -n --query="
SET allow_experimental_data_skipping_indices=1;
CREATE TABLE test.indices_mutaions1
(
u64 UInt64,
......
SET allow_experimental_data_skipping_indices = 1;
DROP TABLE IF EXISTS test.set_index_not;
......
SET allow_experimental_data_skipping_indices = 1;
DROP TABLE IF EXISTS bloom_filter;
......
SET allow_experimental_data_skipping_indices = 1;
DROP TABLE IF EXISTS test.set_array;
......
SET allow_experimental_data_skipping_indices=1;
DROP TABLE IF EXISTS test_vertical_merge;
CREATE TABLE test_vertical_merge (
......
SET allow_experimental_data_skipping_indices=1;
DROP TABLE IF EXISTS bad_skip_idx;
......
SET allow_experimental_data_skipping_indices=1;
CREATE TABLE foo (key int, INDEX i1 key TYPE minmax GRANULARITY 1) Engine=MergeTree() ORDER BY key;
CREATE TABLE as_foo AS foo;
CREATE TABLE dist (key int, INDEX i1 key TYPE minmax GRANULARITY 1) Engine=Distributed(test_shard_localhost, currentDatabase(), 'foo'); -- { serverError 36 }
......
SET allow_experimental_data_skipping_indices = 1;
DROP TABLE IF EXISTS test.bloom_filter_null_array;
CREATE TABLE test.bloom_filter_null_array (v Array(LowCardinality(Nullable(String))), INDEX idx v TYPE bloom_filter(0.1) GRANULARITY 1) ENGINE = MergeTree() ORDER BY v;
INSERT INTO test.bloom_filter_null_array VALUES ([]);
INSERT INTO test.bloom_filter_null_array VALUES (['1', '2']) ([]) ([]);
INSERT INTO test.bloom_filter_null_array VALUES ([]) ([]) (['2', '3']);
SELECT COUNT() FROM test.bloom_filter_null_array;
SELECT COUNT() FROM test.bloom_filter_null_array WHERE has(v, '1');
SELECT COUNT() FROM test.bloom_filter_null_array WHERE has(v, '2');
SELECT COUNT() FROM test.bloom_filter_null_array WHERE has(v, '3');
SELECT COUNT() FROM test.bloom_filter_null_array WHERE has(v, '4');
DROP TABLE IF EXISTS test.bloom_filter_null_array;
left
0 a1
1 a2
2 a3 b1
2 a3 b2
3 a4
4 a5 b3
4 a5 b4
4 a5 b5
inner
2 a3 b1
2 a3 b2
4 a5 b3
4 a5 b4
4 a5 b5
right
2 a3 b1
2 a3 b2
4 a5 b3
4 a5 b4
4 a5 b5
5 b6
full
0 a1
1 a2
2 a3 b1
2 a3 b2
3 a4
4 a5 b3
4 a5 b4
4 a5 b5
5 b6
DROP TABLE IF EXISTS t1;
DROP TABLE IF EXISTS left_join;
DROP TABLE IF EXISTS inner_join;
DROP TABLE IF EXISTS right_join;
DROP TABLE IF EXISTS full_join;
CREATE TABLE t1 (x UInt32, str String) engine = Memory;
CREATE TABLE left_join (x UInt32, s String) engine = Join(ALL, LEFT, x);
CREATE TABLE inner_join (x UInt32, s String) engine = Join(ALL, INNER, x);
CREATE TABLE right_join (x UInt32, s String) engine = Join(ALL, RIGHT, x);
CREATE TABLE full_join (x UInt32, s String) engine = Join(ALL, FULL, x);
INSERT INTO t1 (x, str) VALUES (0, 'a1'), (1, 'a2'), (2, 'a3'), (3, 'a4'), (4, 'a5');
INSERT INTO left_join (x, s) VALUES (2, 'b1'), (2, 'b2'), (4, 'b3'), (4, 'b4'), (4, 'b5'), (5, 'b6');
INSERT INTO inner_join (x, s) VALUES (2, 'b1'), (2, 'b2'), (4, 'b3'), (4, 'b4'), (4, 'b5'), (5, 'b6');
INSERT INTO right_join (x, s) VALUES (2, 'b1'), (2, 'b2'), (4, 'b3'), (4, 'b4'), (4, 'b5'), (5, 'b6');
INSERT INTO full_join (x, s) VALUES (2, 'b1'), (2, 'b2'), (4, 'b3'), (4, 'b4'), (4, 'b5'), (5, 'b6');
SET join_use_nulls = 0;
SELECT 'left';
SELECT * FROM t1 LEFT JOIN left_join j USING(x) ORDER BY x, str, s;
SELECT 'inner';
SELECT * FROM t1 INNER JOIN inner_join j USING(x) ORDER BY x, str, s;
SELECT 'right';
SELECT * FROM t1 RIGHT JOIN right_join j USING(x) ORDER BY x, str, s;
SELECT 'full';
SELECT * FROM t1 FULL JOIN full_join j USING(x) ORDER BY x, str, s;
-- TODO
-- SET join_use_nulls = 1;
--
-- SELECT 'left (join_use_nulls)';
-- SELECT * FROM t1 LEFT JOIN left_join j USING(x) ORDER BY x, str, s;
--
-- SELECT 'inner (join_use_nulls)';
-- SELECT * FROM t1 INNER JOIN inner_join j USING(x) ORDER BY x, str, s;
--
-- SELECT 'right (join_use_nulls)';
-- SELECT * FROM t1 RIGHT JOIN right_join j USING(x) ORDER BY x, str, s;
--
-- SELECT 'full (join_use_nulls)';
-- SELECT * FROM t1 FULL JOIN full_join j USING(x) ORDER BY x, str, s;
DROP TABLE t1;
DROP TABLE left_join;
DROP TABLE inner_join;
DROP TABLE right_join;
DROP TABLE full_join;
......@@ -16,7 +16,6 @@ any right
5 b6
semi left
2 a3 b1
2 a6 b1
4 a5 b3
semi right
2 a3 b1
......
......@@ -45,9 +45,6 @@ SELECT * FROM t1 ANY INNER JOIN any_inner_join j USING(x) ORDER BY x, str, s;
SELECT 'any right';
SELECT * FROM t1 ANY RIGHT JOIN any_right_join j USING(x) ORDER BY x, str, s;
INSERT INTO t1 (x, str) VALUES (2, 'a6');
SELECT 'semi left';
SELECT * FROM t1 SEMI LEFT JOIN semi_left_join j USING(x) ORDER BY x, str, s;
......
# docker build -t yandex/clickhouse-performance-comparison .
FROM alpine
FROM ubuntu:18.04
RUN apk update && apk add --no-cache bash wget python3 python3-dev g++
RUN pip3 --no-cache-dir install clickhouse_driver
RUN apk del g++ python3-dev
RUN apt-get update \
&& apt-get install --yes --no-install-recommends \
p7zip-full bash ncdu wget python3 python3-pip python3-dev g++ \
&& pip3 --no-cache-dir install clickhouse_driver \
&& apt-get purge --yes python3-dev g++ \
&& apt-get autoremove --yes \
&& apt-get clean
COPY * /
CMD /entrypoint.sh
# docker run --network=host --volume <workspace>:/workspace --volume=<output>:/output -e LEFT_PR=<> -e LEFT_SHA=<> -e RIGHT_PR=<> -e RIGHT_SHA=<> yandex/clickhouse-performance-comparison
......@@ -6,8 +6,6 @@ trap "kill 0" EXIT
script_dir="$( cd "$( dirname "${BASH_SOURCE[0]}" )" >/dev/null 2>&1 && pwd )"
mkdir left ||:
mkdir right ||:
mkdir db0 ||:
left_pr=$1
......@@ -18,19 +16,21 @@ right_sha=$4
function download
{
rm -r left ||:
mkdir left ||:
rm -r right ||:
mkdir right ||:
la="$left_pr-$left_sha.tgz"
ra="$right_pr-$right_sha.tgz"
wget -nd -c "https://clickhouse-builds.s3.yandex.net/$left_pr/$left_sha/performance/performance.tgz" -O "$la" && tar -C left --strip-components=1 -zxvf "$la" &
wget -nd -c "https://clickhouse-builds.s3.yandex.net/$right_pr/$right_sha/performance/performance.tgz" -O "$ra" && tar -C right --strip-components=1 -zxvf "$ra" &
cd db0 && wget -nd -c "https://s3.mds.yandex.net/clickhouse-private-datasets/hits_10m_single/partitions/hits_10m_single.tar" && tar -xvf hits_10m_single.tar &
cd db0 && wget -nd -c "https://s3.mds.yandex.net/clickhouse-private-datasets/hits_100m_single/partitions/hits_100m_single.tar" && tar -xvf hits_100m_single.tar &
cd db0 && wget -nd -c "https://clickhouse-datasets.s3.yandex.net/hits/partitions/hits_v1.tar" && tar -xvf hits_v1.tar &
cd db0 && wget -nd -c "https://clickhouse-datasets.s3.yandex.net/visits/partitions/visits_v1.tar" && tar -xvf visits_v1.tar &
wget -q -nd -c "https://clickhouse-builds.s3.yandex.net/$left_pr/$left_sha/performance/performance.tgz" -O "$la" && tar -C left --strip-components=1 -zxvf "$la" &
wget -q -nd -c "https://clickhouse-builds.s3.yandex.net/$right_pr/$right_sha/performance/performance.tgz" -O "$ra" && tar -C right --strip-components=1 -zxvf "$ra" &
cd db0 && wget -q -nd -c "https://s3.mds.yandex.net/clickhouse-private-datasets/hits_10m_single/partitions/hits_10m_single.tar" && tar -xvf hits_10m_single.tar &
cd db0 && wget -q -nd -c "https://s3.mds.yandex.net/clickhouse-private-datasets/hits_100m_single/partitions/hits_100m_single.tar" && tar -xvf hits_100m_single.tar &
cd db0 && wget -q -nd -c "https://clickhouse-datasets.s3.yandex.net/hits/partitions/hits_v1.tar" && tar -xvf hits_v1.tar &
wait
# Use hardlinks instead of copying
rm -r left/db ||:
rm -r right/db ||:
cp -al db0/ left/db/
cp -al db0/ right/db/
}
......@@ -40,16 +40,26 @@ function configure
{
sed -i 's/<tcp_port>9000/<tcp_port>9001/g' right/config/config.xml
cat > right/config/config.d/perf-test-tweaks.xml <<EOF
cat > right/config/config.d/zz-perf-test-tweaks.xml <<EOF
<yandex>
<logger>
<console>true</console>
</logger>
<text_log remove="remove"/>
<text_log remove="remove">
<table remove="remove"/>
</text_log>
<metric_log remove="remove">
<table remove="remove"/>
</metric_log>
</yandex>
EOF
cp right/config/config.d/perf-test-tweaks.xml left/config/config.d/perf-test-tweaks.xml
cp right/config/config.d/zz-perf-test-tweaks.xml left/config/config.d/zz-perf-test-tweaks.xml
rm left/config/config.d/metric_log.xml ||:
rm left/config/config.d/text_log.xml ||:
rm right/config/config.d/metric_log.xml ||:
rm right/config/config.d/text_log.xml ||:
}
configure
......@@ -78,6 +88,11 @@ function restart
while ! right/clickhouse client --port 9001 --query "select 1" ; do kill -0 $right_pid ; echo . ; sleep 1 ; done
echo right ok
right/clickhouse client --port 9001 --query "create database test" ||:
right/clickhouse client --port 9001 --query "rename table datasets.hits_v1 to test.hits" ||:
left/clickhouse client --port 9000 --query "create database test" ||:
left/clickhouse client --port 9000 --query "rename table datasets.hits_v1 to test.hits" ||:
}
restart
......@@ -90,13 +105,14 @@ function run_tests
for test in left/performance/*.xml
do
test_name=$(basename $test ".xml")
"$script_dir/perf.py" "$test" > "$test_name-raw.tsv" || continue
"$script_dir/perf.py" "$test" > "$test_name-raw.tsv" 2> "$test_name-err.log" || continue
right/clickhouse local --file "$test_name-raw.tsv" --structure 'query text, run int, version UInt32, time float' --query "$(cat $script_dir/eqmed.sql)" > "$test_name-report.tsv"
done
}
run_tests
# Analyze results
result_structure="fail int, left float, right float, diff float, rd Array(float), query text"
result_structure="left float, right float, diff float, rd Array(float), query text"
right/clickhouse local --file '*-report.tsv' -S "$result_structure" --query "select * from table where rd[3] > 0.05 order by rd[3] desc" > flap-prone.tsv
right/clickhouse local --file '*-report.tsv' -S "$result_structure" --query "select * from table where diff > 0.05 and diff > rd[3] order by diff desc" > failed.tsv
right/clickhouse local --file '*-report.tsv' -S "$result_structure" --query "select * from table where diff > 0.05 and diff > rd[3] order by diff desc" > bad-perf.tsv
grep Exception:[^:] *-err.log > run-errors.log
#!/bin/bash
cd /workspace
../compare.sh $LEFT_PR $LEFT_SHA $RIGHT_PR $RIGHT_SHA > compare.log 2>&1
7z a /output/output.7z *.log *.tsv
cp compare.log /output
......@@ -38,4 +38,4 @@ from
group by query
) original_medians_array
where rd.query = original_medians_array.query
order by fail desc, rd_quantiles_percent[3] asc;
order by rd_quantiles_percent[3] desc;
......@@ -15,8 +15,13 @@ root = tree.getroot()
# Check main metric
main_metric_element = root.find('main_metric/*')
if main_metric_element and main_metric_element.tag != 'min_time':
raise Exception('Only the min_time main metric is supported. This test uses \'{}\''.format(main_metric))
if main_metric_element is not None and main_metric_element.tag != 'min_time':
raise Exception('Only the min_time main metric is supported. This test uses \'{}\''.format(main_metric_element.tag))
# FIXME another way to detect infinite tests. They should have an appropriate main_metric but sometimes they don't.
infinite_sign = root.find('.//average_speed_not_changing_for_ms')
if infinite_sign is not None:
raise Exception('Looks like the test is infinite (sign 1)')
# Open connections
servers = [{'host': 'localhost', 'port': 9000, 'client_name': 'left'}, {'host': 'localhost', 'port': 9001, 'client_name': 'right'}]
......@@ -24,12 +29,9 @@ connections = [clickhouse_driver.Client(**server) for server in servers]
# Check tables that should exist
tables = [e.text for e in root.findall('preconditions/table_exists')]
if tables:
for t in tables:
for c in connections:
tables_list = ", ".join("'{}'".format(t) for t in tables)
res = c.execute("select t from values('t text', {}) anti join system.tables on database = currentDatabase() and name = t".format(tables_list))
if res:
raise Exception('Some tables are not found: {}'.format(res))
res = c.execute("select 1 from {}".format(t))
# Apply settings
settings = root.findall('settings/*')
......@@ -76,6 +78,9 @@ for c in connections:
c.execute(q)
# Run test queries
def tsv_escape(s):
return s.replace('\\', '\\\\').replace('\t', '\\t').replace('\n', '\\n').replace('\r','')
test_query_templates = [q.text for q in root.findall('query')]
test_queries = substitute_parameters(test_query_templates, parameter_combinations)
......@@ -83,7 +88,7 @@ for q in test_queries:
for run in range(0, 7):
for conn_index, c in enumerate(connections):
res = c.execute(q)
print(q + '\t' + str(run) + '\t' + str(conn_index) + '\t' + str(c.last_query.elapsed))
print(tsv_escape(q) + '\t' + str(run) + '\t' + str(conn_index) + '\t' + str(c.last_query.elapsed))
# Run drop queries
drop_query_templates = [q.text for q in root.findall('drop_query')]
......
......@@ -21,4 +21,36 @@ If you use Oracle through the ODBC driver as a source of external dictionaries,
NLS_LANG=RUSSIAN_RUSSIA.UTF8
```
## How to export data from ClickHouse to the file?
### Using INTO OUTFILE Clause
Add [INTO OUTFILE](../query_language/select/#into-outfile-clause) clause to your query.
For example:
```sql
SELECT * FROM table INTO OUTFILE 'file'
```
By default, ClickHouse uses the [TabSeparated](../interfaces/formats.md#tabseparated) format for output data. To select the [data format](../interfaces/formats.md), use the [FORMAT clause](../query_language/select/#format-clause).
For example:
```sql
SELECT * FROM table INTO OUTFILE 'file' FORMAT CSV
```
### Using File-engine Table
See [File](../operations/table_engines/file.md).
### Using Command-line Redirection
```sql
$ clickhouse-client --query "SELECT * from table" > result.txt
```
See [clickhouse-client](../interfaces/cli.md).
[Original article](https://clickhouse.yandex/docs/en/faq/general/) <!--hide-->
......@@ -29,6 +29,7 @@ The supported formats are:
| [PrettySpace](#prettyspace) | ✗ | ✔ |
| [Protobuf](#protobuf) | ✔ | ✔ |
| [Parquet](#data-format-parquet) | ✔ | ✔ |
| [ORC](#data-format-orc) | ✔ | ✗ |
| [RowBinary](#rowbinary) | ✔ | ✔ |
| [RowBinaryWithNamesAndTypes](#rowbinarywithnamesandtypes) | ✔ | ✔ |
| [Native](#native) | ✔ | ✔ |
......@@ -954,16 +955,57 @@ Data types of a ClickHouse table columns can differ from the corresponding field
You can insert Parquet data from a file into ClickHouse table by the following command:
```bash
cat {filename} | clickhouse-client --query="INSERT INTO {some_table} FORMAT Parquet"
$ cat {filename} | clickhouse-client --query="INSERT INTO {some_table} FORMAT Parquet"
```
You can select data from a ClickHouse table and save them into some file in the Parquet format by the following command:
```sql
clickhouse-client --query="SELECT * FROM {some_table} FORMAT Parquet" > {some_file.pq}
```bash
$ clickhouse-client --query="SELECT * FROM {some_table} FORMAT Parquet" > {some_file.pq}
```
To exchange data with Hadoop, you can use [HDFS table engine](../operations/table_engines/hdfs.md).
## ORC {#data-format-orc}
[Apache ORC](https://orc.apache.org/) is a columnar storage format widespread in the Hadoop ecosystem. ClickHouse supports only read operations for this format.
### Data Types Matching
The table below shows supported data types and how they match ClickHouse [data types](../data_types/index.md) in `INSERT` queries.
| ORC data type (`INSERT`) | ClickHouse data type |
| -------------------- | ------------------ |
| `UINT8`, `BOOL` | [UInt8](../data_types/int_uint.md) |
| `INT8` | [Int8](../data_types/int_uint.md) |
| `UINT16` | [UInt16](../data_types/int_uint.md) |
| `INT16` | [Int16](../data_types/int_uint.md) |
| `UINT32` | [UInt32](../data_types/int_uint.md) |
| `INT32` | [Int32](../data_types/int_uint.md) |
| `UINT64` | [UInt64](../data_types/int_uint.md) |
| `INT64` | [Int64](../data_types/int_uint.md) |
| `FLOAT`, `HALF_FLOAT` | [Float32](../data_types/float.md) |
| `DOUBLE` | [Float64](../data_types/float.md) |
| `DATE32` | [Date](../data_types/date.md) |
| `DATE64`, `TIMESTAMP` | [DateTime](../data_types/datetime.md) |
| `STRING`, `BINARY` | [String](../data_types/string.md) |
| `DECIMAL` | [Decimal](../data_types/decimal.md) |
ClickHouse supports configurable precision of `Decimal` type. The `INSERT` query treats the ORC `DECIMAL` type as the ClickHouse `Decimal128` type.
Unsupported ORC data types: `DATE32`, `TIME32`, `FIXED_SIZE_BINARY`, `JSON`, `UUID`, `ENUM`.
Data types of a ClickHouse table columns can differ from the corresponding fields of the ORC data inserted. When inserting data, ClickHouse interprets data types according to the table above and then [cast](../query_language/functions/type_conversion_functions/#type_conversion_function-cast) the data to that data type which is set for the ClickHouse table column.
### Inserting Data
You can insert Parquet data from a file into ClickHouse table by the following command:
```bash
$ cat {filename} | clickhouse-client --query="INSERT INTO {some_table} FORMAT ORC"
```
To exchange data with the Hadoop, you can use [HDFS table engine](../operations/table_engines/hdfs.md).
To exchange data with Hadoop, you can use [HDFS table engine](../operations/table_engines/hdfs.md).
## Format Schema {#formatschema}
......
......@@ -1120,7 +1120,7 @@ The structure of results (the number and type of columns) must match for the que
Queries that are parts of UNION ALL can't be enclosed in brackets. ORDER BY and LIMIT are applied to separate queries, not to the final result. If you need to apply a conversion to the final result, you can put all the queries with UNION ALL in a subquery in the FROM clause.
### INTO OUTFILE Clause
### INTO OUTFILE Clause {#into-outfile-clause}
Add the `INTO OUTFILE filename` clause (where filename is a string literal) to redirect query output to the specified file.
In contrast to MySQL, the file is created on the client side. The query will fail if a file with the same filename already exists.
......@@ -1128,7 +1128,7 @@ This functionality is available in the command-line client and clickhouse-local
The default output format is TabSeparated (the same as in the command-line client batch mode).
### FORMAT Clause
### FORMAT Clause {#format-clause}
Specify 'FORMAT format' to get data in any specified format.
You can use this for convenience, or for creating dumps.
......
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册