From 5212d43ae4f084847d8b4fc4b76342e0ce15fe4d Mon Sep 17 00:00:00 2001 From: chertus Date: Mon, 30 Sep 2019 17:36:45 +0300 Subject: [PATCH] simplify pmj settings --- dbms/src/Core/Settings.h | 4 ++-- dbms/src/Interpreters/AnalyzedJoin.cpp | 12 ++---------- dbms/src/Interpreters/InterpreterSelectQuery.cpp | 8 ++------ .../queries/0_stateless/01010_pmj_skip_blocks.sql | 1 - 4 files changed, 6 insertions(+), 19 deletions(-) diff --git a/dbms/src/Core/Settings.h b/dbms/src/Core/Settings.h index 4153cae011..5b356d3c04 100644 --- a/dbms/src/Core/Settings.h +++ b/dbms/src/Core/Settings.h @@ -293,8 +293,8 @@ struct Settings : public SettingsCollection M(SettingBool, join_any_take_last_row, false, "When disabled (default) ANY JOIN will take the first found row for a key. When enabled, it will take the last row seen if there are multiple rows for the same key.") \ M(SettingBool, partial_merge_join, false, "Use partial merge join instead of hash join for LEFT and INNER JOINs.") \ M(SettingBool, partial_merge_join_optimizations, false, "Enable optimizations in partial merge join") \ - M(SettingUInt64, partial_merge_join_rows_in_right_blocks, 10000, "Split right-hand joining data in blocks of specified size.") \ - M(SettingFloat, partial_merge_join_memory_coefficient, 0.25, "How much query memory would be used for left|right table in join. Do not include result data memory.") \ + M(SettingUInt64, partial_merge_join_rows_in_right_blocks, 10000, "Split right-hand joining data in blocks of specified size. It's a portion of data indexed by min-max values and possibly unloaded on disk.") \ + M(SettingFloat, partial_merge_join_rows_in_left_blocks, 10000, "Group left-hand joining data in bigger blocks. Setting it to a bigger value increase JOIN performance and memory usage.") \ \ M(SettingUInt64, max_rows_to_transfer, 0, "Maximum size (in rows) of the transmitted external table obtained when the GLOBAL IN/JOIN section is executed.") \ M(SettingUInt64, max_bytes_to_transfer, 0, "Maximum size (in uncompressed bytes) of the transmitted external table obtained when the GLOBAL IN/JOIN section is executed.") \ diff --git a/dbms/src/Interpreters/AnalyzedJoin.cpp b/dbms/src/Interpreters/AnalyzedJoin.cpp index 4cb6494a60..5c1fe4bcae 100644 --- a/dbms/src/Interpreters/AnalyzedJoin.cpp +++ b/dbms/src/Interpreters/AnalyzedJoin.cpp @@ -25,20 +25,12 @@ namespace ErrorCodes } AnalyzedJoin::AnalyzedJoin(const Settings & settings) - : size_limits(SizeLimits{settings.max_rows_in_join, - (settings.partial_merge_join ? - UInt64(settings.max_bytes_in_join * settings.partial_merge_join_memory_coefficient) : - UInt64(settings.max_bytes_in_join)), - settings.join_overflow_mode}) + : size_limits(SizeLimits{settings.max_rows_in_join, settings.max_bytes_in_join, settings.join_overflow_mode}) , join_use_nulls(settings.join_use_nulls) , partial_merge_join(settings.partial_merge_join) , partial_merge_join_optimizations(settings.partial_merge_join_optimizations) , partial_merge_join_rows_in_right_blocks(settings.partial_merge_join_rows_in_right_blocks) -{ - Float32 memory_coef = settings.partial_merge_join_memory_coefficient; - if (memory_coef < 0.0f || memory_coef > 1.0f) - throw Exception("Wrond partial_merge_join_memory_coefficient. It should be in range [0,1]", ErrorCodes::PARAMETER_OUT_OF_BOUND); -} +{} void AnalyzedJoin::addUsingKey(const ASTPtr & ast) { diff --git a/dbms/src/Interpreters/InterpreterSelectQuery.cpp b/dbms/src/Interpreters/InterpreterSelectQuery.cpp index aed151056e..55f4edc819 100644 --- a/dbms/src/Interpreters/InterpreterSelectQuery.cpp +++ b/dbms/src/Interpreters/InterpreterSelectQuery.cpp @@ -1120,13 +1120,9 @@ void InterpreterSelectQuery::executeImpl(TPipeline & pipeline, const BlockInputS if (isMergeJoin(expressions.before_join->getTableJoinAlgo()) && settings.partial_merge_join_optimizations) { - /// TODO: * min(query_memoty_limit, max_bytes_in_join) - size_t bytes_in_block = settings.partial_merge_join_memory_coefficient * settings.max_bytes_in_join; - if (pipeline.streams.size()) - bytes_in_block /= pipeline.streams.size(); - if (bytes_in_block) + if (size_t rows_in_block = settings.partial_merge_join_rows_in_left_blocks) for (auto & stream : pipeline.streams) - stream = std::make_shared(stream, 0, bytes_in_block); + stream = std::make_shared(stream, rows_in_block, 0); } } diff --git a/dbms/tests/queries/0_stateless/01010_pmj_skip_blocks.sql b/dbms/tests/queries/0_stateless/01010_pmj_skip_blocks.sql index 3554aabe5f..cb66fe6038 100644 --- a/dbms/tests/queries/0_stateless/01010_pmj_skip_blocks.sql +++ b/dbms/tests/queries/0_stateless/01010_pmj_skip_blocks.sql @@ -8,7 +8,6 @@ CREATE TABLE t2 (x UInt32, y UInt64) engine = MergeTree ORDER BY (x,y); SET partial_merge_join = 1; SET partial_merge_join_optimizations = 1; -SET partial_merge_join_rows_in_right_blocks = 2; SET any_join_distinct_right_table_keys = 1; INSERT INTO t1 (x, y) VALUES (0, 0); -- GitLab