提交 e9232fc2 编写于 作者: A Alexey Milovidov

dbms: more scalable aggregator: development [#METR-2944].

上级 716fad23
......@@ -95,6 +95,7 @@ public:
{
typename Source::const_iterator it = src.begin();
/// Предполагается, что нулевой ключ (хранящийся отдельно) при итерировании идёт первым.
if (it != src.end() && it.getPtr()->isZero(src))
{
insert(*it);
......@@ -142,6 +143,8 @@ public:
value_type & operator* () const { return *current_it; }
value_type * operator->() const { return &*current_it; }
Cell * getPtr() const { return current_it.getPtr(); }
};
......@@ -177,6 +180,8 @@ public:
const value_type & operator* () const { return *current_it; }
const value_type * operator->() const { return &*current_it; }
const Cell * getPtr() const { return current_it.getPtr(); }
};
......@@ -205,6 +210,10 @@ public:
std::pair<iterator, bool> res;
emplace(Cell::getKey(x), res.first, res.second, hash_value);
if (res.second)
res.first.getPtr()->setMapped(x);
return res;
}
......
#pragma once
#include <DB/Interpreters/Context.h>
#include <DB/DataStreams/AddingConstColumnBlockInputStream.h>
#include <DB/DataStreams/OneBlockInputStream.h>
#include <DB/DataTypes/DataTypeString.h>
#include <DB/DataTypes/DataTypesNumberFixed.h>
#include <DB/Parsers/ASTIdentifier.h>
#include <DB/Parsers/ASTExpressionList.h>
#include <DB/Parsers/ASTLiteral.h>
#include <DB/Parsers/ASTSelectQuery.h>
#include <DB/Columns/ColumnString.h>
#include <set>
#include <DB/Core/Block.h>
#include <DB/Core/NamesAndTypes.h>
#include <DB/Parsers/IAST.h>
namespace DB
{
class Context;
namespace VirtualColumnUtils
{
......
......@@ -3,7 +3,9 @@
#include <Yandex/logger_useful.h>
#include <DB/DataStreams/IProfilingBlockInputStream.h>
#include <DB/DataStreams/OneBlockInputStream.h>
#include <DB/Common/VirtualColumnUtils.h>
#include <DB/Interpreters/Context.h>
#include <DB/Client/ConnectionPool.h>
......
......@@ -9,8 +9,6 @@
#include <DB/Core/Defines.h>
#include <DB/Core/StringRef.h>
#include <DB/Columns/IColumn.h>
#include <DB/Common/HashTable/HashMap.h>
#include <DB/Common/HashTable/TwoLevelHashMap.h>
template <>
......
......@@ -7,6 +7,8 @@
#include <DB/Core/StringRef.h>
#include <DB/Common/Arena.h>
#include <DB/Common/HashTable/HashMap.h>
#include <DB/Common/HashTable/TwoLevelHashMap.h>
#include <DB/DataStreams/IBlockInputStream.h>
......
#include <DB/Common/VirtualColumnUtils.h>
#include <DB/Interpreters/Context.h>
#include <DB/DataStreams/AddingConstColumnBlockInputStream.h>
#include <DB/DataStreams/OneBlockInputStream.h>
#include <DB/DataTypes/DataTypeString.h>
#include <DB/DataTypes/DataTypesNumberFixed.h>
#include <DB/Interpreters/ExpressionAnalyzer.h>
#include <DB/Parsers/ASTIdentifier.h>
#include <DB/Parsers/ASTExpressionList.h>
#include <DB/Parsers/ASTLiteral.h>
#include <DB/Parsers/ASTSelectQuery.h>
#include <DB/Storages/IStorage.h>
#include <DB/Interpreters/InterpreterSelectQuery.h>
#include <DB/Common/VirtualColumnUtils.h>
namespace DB
{
......
......@@ -442,10 +442,7 @@ void NO_INLINE Aggregator::mergeTwoLevelDataImpl(
}
};
/// future и packaged_task используются, чтобы исключения автоматически прокидывались в основной поток.
std::vector<std::future<void>> futures;
futures.reserve(Method::Data::NUM_BUCKETS);
/// packaged_task используются, чтобы исключения автоматически прокидывались в основной поток.
std::vector<std::packaged_task<void()>> tasks;
tasks.reserve(Method::Data::NUM_BUCKETS);
......@@ -453,7 +450,6 @@ void NO_INLINE Aggregator::mergeTwoLevelDataImpl(
for (size_t bucket = 0; bucket < Method::Data::NUM_BUCKETS; ++bucket)
{
tasks.emplace_back(std::bind(merge_bucket, bucket, current_memory_tracker));
futures.emplace_back(tasks.back().get_future());
if (thread_pool)
thread_pool->schedule([bucket, &tasks] { tasks[bucket](); });
......@@ -464,8 +460,8 @@ void NO_INLINE Aggregator::mergeTwoLevelDataImpl(
if (thread_pool)
thread_pool->wait();
for (auto & future : futures)
future.get();
for (auto & task : tasks)
task.get_future().get();
}
......@@ -635,16 +631,17 @@ bool Aggregator::executeOnBlock(Block & block, AggregatedDataVariants & result,
/** Почему выбрано 30 000? Потому что при таком количестве элементов, в TwoLevelHashTable,
* скорее всего, хватит места на все ключи, с размером таблицы по-умолчанию
* (256 корзин по 256 ячеек, fill factor = 0.5)
* TODO Не конвертировать, если запрос выполняется в один поток.
*/
if (result.isConvertibleToTwoLevel() && result_size >= TWO_LEVEL_HASH_TABLE_THRESHOLD)
result.convertToTwoLevel();
/// Проверка ограничений.
if (!no_more_keys && max_rows_to_group_by && result.size() > max_rows_to_group_by)
if (!no_more_keys && max_rows_to_group_by && result_size > max_rows_to_group_by)
{
if (group_by_overflow_mode == OverflowMode::THROW)
throw Exception("Limit for rows to GROUP BY exceeded: has " + toString(result.size())
throw Exception("Limit for rows to GROUP BY exceeded: has " + toString(result_size)
+ " rows, maximum: " + toString(max_rows_to_group_by),
ErrorCodes::TOO_MUCH_ROWS);
else if (group_by_overflow_mode == OverflowMode::BREAK)
......
......@@ -6,6 +6,7 @@
#include <DB/DataStreams/FilterBlockInputStream.h>
#include <DB/DataStreams/ConcatBlockInputStream.h>
#include <DB/DataStreams/CollapsingFinalBlockInputStream.h>
#include <DB/DataStreams/AddingConstColumnBlockInputStream.h>
#include <DB/DataTypes/DataTypesNumberFixed.h>
#include <DB/Common/VirtualColumnUtils.h>
......
......@@ -14,7 +14,10 @@
#include <DB/DataStreams/ConcatBlockInputStream.h>
#include <DB/DataStreams/narrowBlockInputStreams.h>
#include <DB/DataStreams/AddingDefaultBlockInputStream.h>
#include <DB/DataStreams/AddingConstColumnBlockInputStream.h>
#include <DB/Common/VirtualColumnUtils.h>
#include <DB/DataTypes/DataTypeString.h>
#include <DB/Columns/ColumnString.h>
namespace DB
......
......@@ -6,6 +6,8 @@
#include <DB/Interpreters/InterpreterDropQuery.h>
#include <DB/Parsers/ASTDropQuery.h>
#include <DB/Common/VirtualColumnUtils.h>
#include <DB/DataTypes/DataTypeString.h>
#include <DB/Columns/ColumnString.h>
namespace DB
......
#include <DB/DataStreams/narrowBlockInputStreams.h>
#include <DB/DataStreams/AddingConstColumnBlockInputStream.h>
#include <DB/Storages/StorageMerge.h>
#include <DB/Common/VirtualColumnUtils.h>
#include <DB/Interpreters/InterpreterAlterQuery.h>
#include <DB/Storages/VirtualColumnFactory.h>
#include <DB/Parsers/ASTSelectQuery.h>
#include <DB/DataTypes/DataTypeString.h>
#include <DB/Columns/ColumnString.h>
namespace DB
{
......
......@@ -8,8 +8,10 @@
#include <DB/IO/ReadBufferFromString.h>
#include <DB/Interpreters/InterpreterAlterQuery.h>
#include <DB/Common/VirtualColumnUtils.h>
#include <DB/DataStreams/AddingConstColumnBlockInputStream.h>
#include <time.h>
namespace DB
{
......
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册