提交 22badef1 编写于 作者: C CurtizJ

Merge remote-tracking branch 'upstream/master' into merging_with_ties_and_with_fill

......@@ -21,6 +21,7 @@
#include <Common/StringUtils/StringUtils.h>
#include <common/phdr_cache.h>
#include <ext/scope_guard.h>
/// Universal executable for various clickhouse applications
......@@ -130,8 +131,19 @@ bool isClickhouseApp(const std::string & app_suffix, std::vector<char *> & argv)
}
/// This allows to implement assert to forbid initialization of a class in static constructors.
/// Usage:
///
/// extern bool inside_main;
/// class C { C() { assert(inside_main); } };
bool inside_main = false;
int main(int argc_, char ** argv_)
{
inside_main = true;
SCOPE_EXIT({ inside_main = false; });
/// Reset new handler to default (that throws std::bad_alloc)
/// It is needed because LLVM library clobbers it.
std::set_new_handler(nullptr);
......
......@@ -447,9 +447,10 @@ namespace ErrorCodes
extern const int QUERY_IS_NOT_SUPPORTED_IN_LIVE_VIEW = 470;
extern const int SETTINGS_ARE_NOT_SUPPORTED = 471;
extern const int READONLY_SETTING = 472;
extern const int INVALID_TEMPLATE_FORMAT = 473;
extern const int INVALID_WITH_FILL_EXPRESSION = 474;
extern const int WITH_TIES_WITHOUT_ORDER_BY = 475;
extern const int DEADLOCK_AVOIDED = 473;
extern const int INVALID_TEMPLATE_FORMAT = 474;
extern const int INVALID_WITH_FILL_EXPRESSION = 475;
extern const int WITH_TIES_WITHOUT_ORDER_BY = 476;
extern const int KEEPER_EXCEPTION = 999;
extern const int POCO_EXCEPTION = 1000;
......
......@@ -4,6 +4,8 @@
#include <Common/CurrentMetrics.h>
#include <Common/ProfileEvents.h>
#include <cassert>
namespace ProfileEvents
{
......@@ -29,6 +31,7 @@ namespace DB
namespace ErrorCodes
{
extern const int LOGICAL_ERROR;
extern const int DEADLOCK_AVOIDED;
}
......@@ -52,6 +55,44 @@ public:
};
namespace
{
/// Global information about all read locks that query has. It is needed to avoid some type of deadlocks.
class QueryLockInfo
{
private:
std::mutex mutex;
std::map<std::string, size_t> queries;
public:
void add(const String & query_id)
{
std::lock_guard lock(mutex);
++queries[query_id];
}
void remove(const String & query_id)
{
std::lock_guard lock(mutex);
auto it = queries.find(query_id);
assert(it != queries.end());
if (--it->second == 0)
queries.erase(it);
}
void check(const String & query_id)
{
std::lock_guard lock(mutex);
if (queries.count(query_id))
throw Exception("Possible deadlock avoided. Client should retry.", ErrorCodes::DEADLOCK_AVOIDED);
}
};
QueryLockInfo all_read_locks;
}
RWLockImpl::LockHolder RWLockImpl::getLock(RWLockImpl::Type type, const String & query_id)
{
Stopwatch watch(CLOCK_MONOTONIC_COARSE);
......@@ -68,29 +109,48 @@ RWLockImpl::LockHolder RWLockImpl::getLock(RWLockImpl::Type type, const String &
GroupsContainer::iterator it_group;
ClientsContainer::iterator it_client;
/// This object is placed above unique_lock, because it may lock in destructor.
LockHolder res;
std::unique_lock lock(mutex);
/// Check if the same query is acquiring previously acquired lock
LockHolder existing_holder_ptr;
if (query_id != RWLockImpl::NO_QUERY)
{
auto it_query = query_id_to_holder.find(query_id);
if (it_query != query_id_to_holder.end())
existing_holder_ptr = it_query->second.lock();
res = it_query->second.lock();
}
if (existing_holder_ptr)
if (res)
{
/// XXX: it means we can't upgrade lock from read to write - with proper waiting!
if (type != Read || existing_holder_ptr->it_group->type != Read)
if (type != Read || res->it_group->type != Read)
throw Exception("Attempt to acquire exclusive lock recursively", ErrorCodes::LOGICAL_ERROR);
return existing_holder_ptr;
else
return res;
}
/** If the query already has any active read lock and tries to acquire another read lock
* but it is not in front of the queue and has to wait, deadlock is possible:
*
* Example (four queries, two RWLocks - 'a' and 'b'):
*
* --> time -->
*
* q1: ra rb
* q2: wa
* q3: rb ra
* q4: wb
*
* We will throw an exception instead.
*/
if (type == Type::Write || queue.empty() || queue.back().type == Type::Write)
{
if (type == Type::Read && !queue.empty() && queue.back().type == Type::Write && query_id != RWLockImpl::NO_QUERY)
all_read_locks.check(query_id);
/// Create new group of clients
it_group = queue.emplace(queue.end(), type);
}
......@@ -98,6 +158,9 @@ RWLockImpl::LockHolder RWLockImpl::getLock(RWLockImpl::Type type, const String &
{
/// Will append myself to last group
it_group = std::prev(queue.end());
if (it_group != queue.begin() && query_id != RWLockImpl::NO_QUERY)
all_read_locks.check(query_id);
}
/// Append myself to the end of chosen group
......@@ -114,14 +177,19 @@ RWLockImpl::LockHolder RWLockImpl::getLock(RWLockImpl::Type type, const String &
throw;
}
LockHolder res(new LockHolderImpl(shared_from_this(), it_group, it_client));
res.reset(new LockHolderImpl(shared_from_this(), it_group, it_client));
/// Wait a notification until we will be the only in the group.
it_group->cv.wait(lock, [&] () { return it_group == queue.begin(); });
/// Insert myself (weak_ptr to the holder) to queries set to implement recursive lock
if (query_id != RWLockImpl::NO_QUERY)
{
query_id_to_holder.emplace(query_id, res);
if (type == Type::Read)
all_read_locks.add(query_id);
}
res->query_id = query_id;
finalize_metrics();
......@@ -131,11 +199,14 @@ RWLockImpl::LockHolder RWLockImpl::getLock(RWLockImpl::Type type, const String &
RWLockImpl::LockHolderImpl::~LockHolderImpl()
{
std::unique_lock lock(parent->mutex);
std::lock_guard lock(parent->mutex);
/// Remove weak_ptrs to the holder, since there are no owners of the current lock
parent->query_id_to_holder.erase(query_id);
if (*it_client == RWLockImpl::Read && query_id != RWLockImpl::NO_QUERY)
all_read_locks.remove(query_id);
/// Removes myself from client list of our group
it_group->clients.erase(it_client);
......@@ -156,6 +227,7 @@ RWLockImpl::LockHolderImpl::LockHolderImpl(RWLock && parent_, RWLockImpl::Groups
: parent{std::move(parent_)}, it_group{it_group_}, it_client{it_client_},
active_client_increment{(*it_client == RWLockImpl::Read) ? CurrentMetrics::RWLockActiveReaders
: CurrentMetrics::RWLockActiveWriters}
{}
{
}
}
......@@ -13,6 +13,14 @@
using namespace DB;
namespace DB
{
namespace ErrorCodes
{
extern const int DEADLOCK_AVOIDED;
}
}
TEST(Common, RWLock_1)
{
......@@ -123,6 +131,74 @@ TEST(Common, RWLock_Recursive)
}
TEST(Common, RWLock_Deadlock)
{
static auto lock1 = RWLockImpl::create();
static auto lock2 = RWLockImpl::create();
/**
* q1: r1 r2
* q2: w1
* q3: r2 r1
* q4: w2
*/
std::thread t1([&] ()
{
auto holder1 = lock1->getLock(RWLockImpl::Read, "q1");
usleep(100000);
usleep(100000);
usleep(100000);
try
{
auto holder2 = lock2->getLock(RWLockImpl::Read, "q1");
}
catch (const Exception & e)
{
if (e.code() != ErrorCodes::DEADLOCK_AVOIDED)
throw;
}
});
std::thread t2([&] ()
{
usleep(100000);
auto holder1 = lock1->getLock(RWLockImpl::Write, "q2");
});
std::thread t3([&] ()
{
usleep(100000);
usleep(100000);
auto holder2 = lock2->getLock(RWLockImpl::Read, "q3");
usleep(100000);
usleep(100000);
try
{
auto holder1 = lock1->getLock(RWLockImpl::Read, "q3");
}
catch (const Exception & e)
{
if (e.code() != ErrorCodes::DEADLOCK_AVOIDED)
throw;
}
});
std::thread t4([&] ()
{
usleep(100000);
usleep(100000);
usleep(100000);
auto holder2 = lock2->getLock(RWLockImpl::Write, "q4");
});
t1.join();
t2.join();
t3.join();
t4.join();
}
TEST(Common, RWLock_PerfTest_Readers)
{
constexpr int cycles = 100000; // 100k
......
......@@ -332,7 +332,7 @@ UInt64 geohashesInBox(const GeohashesInBoxPreparedArgs & args, char * out)
}
}
if (items == 0 && args.items_count != 0)
if (items == 0)
{
size_t l = geohashEncodeImpl(args.longitude_min, args.latitude_min, args.precision, out);
out += l;
......
......@@ -336,7 +336,9 @@ void FunctionArrayEnumerateRankedExtended<Derived>::executeMethodImpl(
/// Skipping offsets if no data in this array
if (prev_off == off)
{
want_clear = true;
if (depth_to_look > 2)
want_clear = true;
if (depth_to_look >= 2)
{
......
#include <Interpreters/AnalyzedJoin.h>
#include <Interpreters/DatabaseAndTableWithAlias.h>
#include <Interpreters/InterpreterSelectWithUnionQuery.h>
#include <Interpreters/Join.h>
#include <Parsers/ASTExpressionList.h>
#include <Parsers/ASTTablesInSelectQuery.h>
#include <Parsers/ASTSelectQuery.h>
#include <Core/Block.h>
#include <Storages/IStorage.h>
#include <DataTypes/DataTypeNullable.h>
namespace DB
{
......@@ -26,7 +31,6 @@ void AnalyzedJoin::addUsingKey(const ASTPtr & ast)
void AnalyzedJoin::addOnKeys(ASTPtr & left_table_ast, ASTPtr & right_table_ast)
{
with_using = false;
key_names_left.push_back(left_table_ast->getColumnName());
key_names_right.push_back(right_table_ast->getAliasOrColumnName());
......@@ -37,7 +41,7 @@ void AnalyzedJoin::addOnKeys(ASTPtr & left_table_ast, ASTPtr & right_table_ast)
/// @return how many times right key appears in ON section.
size_t AnalyzedJoin::rightKeyInclusion(const String & name) const
{
if (with_using)
if (hasUsing())
return 0;
size_t count = 0;
......@@ -101,6 +105,120 @@ std::unordered_map<String, String> AnalyzedJoin::getOriginalColumnsMap(const Nam
return out;
}
ASTPtr AnalyzedJoin::leftKeysList() const
{
ASTPtr keys_list = std::make_shared<ASTExpressionList>();
keys_list->children = key_asts_left;
return keys_list;
}
ASTPtr AnalyzedJoin::rightKeysList() const
{
ASTPtr keys_list = std::make_shared<ASTExpressionList>();
if (hasOn())
keys_list->children = key_asts_right;
return keys_list;
}
Names AnalyzedJoin::requiredJoinedNames() const
{
NameSet required_columns_set(key_names_right.begin(), key_names_right.end());
for (const auto & joined_column : columns_added_by_join)
required_columns_set.insert(joined_column.name);
return Names(required_columns_set.begin(), required_columns_set.end());
}
void AnalyzedJoin::appendRequiredColumns(const Block & sample, NameSet & required_columns) const
{
for (auto & column : key_names_right)
if (!sample.has(column))
required_columns.insert(column);
for (auto & column : columns_added_by_join)
if (!sample.has(column.name))
required_columns.insert(column.name);
}
void AnalyzedJoin::addJoinedColumn(const NameAndTypePair & joined_column)
{
if (join_use_nulls && isLeftOrFull(table_join.kind))
{
auto type = joined_column.type->canBeInsideNullable() ? makeNullable(joined_column.type) : joined_column.type;
columns_added_by_join.emplace_back(NameAndTypePair(joined_column.name, std::move(type)));
}
else
columns_added_by_join.push_back(joined_column);
}
void AnalyzedJoin::addJoinedColumnsAndCorrectNullability(Block & sample_block) const
{
bool right_or_full_join = isRightOrFull(table_join.kind);
bool left_or_full_join = isLeftOrFull(table_join.kind);
for (auto & col : sample_block)
{
/// Materialize column.
/// Column is not empty if it is constant, but after Join all constants will be materialized.
/// So, we need remove constants from header.
if (col.column)
col.column = nullptr;
bool make_nullable = join_use_nulls && right_or_full_join;
if (make_nullable && col.type->canBeInsideNullable())
col.type = makeNullable(col.type);
}
for (const auto & col : columns_added_by_join)
{
auto res_type = col.type;
bool make_nullable = join_use_nulls && left_or_full_join;
if (!make_nullable)
{
/// Keys from right table are usually not stored in Join, but copied from the left one.
/// So, if left key is nullable, let's make right key nullable too.
/// Note: for some join types it's not needed and, probably, may be removed.
/// Note: changing this code, take into account the implementation in Join.cpp.
auto it = std::find(key_names_right.begin(), key_names_right.end(), col.name);
if (it != key_names_right.end())
{
auto pos = it - key_names_right.begin();
const auto & left_key_name = key_names_left[pos];
make_nullable = sample_block.getByName(left_key_name).type->isNullable();
}
}
if (make_nullable && res_type->canBeInsideNullable())
res_type = makeNullable(res_type);
sample_block.insert(ColumnWithTypeAndName(nullptr, res_type, col.name));
}
}
bool AnalyzedJoin::sameJoin(const AnalyzedJoin * x, const AnalyzedJoin * y)
{
if (!x && !y)
return true;
if (!x || !y)
return false;
return x->table_join.kind == y->table_join.kind
&& x->table_join.strictness == y->table_join.strictness
&& x->key_names_left == y->key_names_left
&& x->key_names_right == y->key_names_right
&& x->columns_added_by_join == y->columns_added_by_join;
}
JoinPtr AnalyzedJoin::makeHashJoin(const Block & sample_block, const SizeLimits & size_limits_for_join) const
{
auto join = std::make_shared<Join>(key_names_right, join_use_nulls, size_limits_for_join, table_join.kind, table_join.strictness);
join->setSampleBlock(sample_block);
return join;
}
NamesAndTypesList getNamesAndTypeListFromTableExpression(const ASTTableExpression & table_expression, const Context & context)
{
NamesAndTypesList names_and_type_list;
......
......@@ -2,7 +2,8 @@
#include <Core/Names.h>
#include <Core/NamesAndTypes.h>
#include <Parsers/IAST.h>
#include <Core/SettingsCommon.h>
#include <Parsers/ASTTablesInSelectQuery.h>
#include <utility>
#include <memory>
......@@ -13,6 +14,10 @@ namespace DB
class Context;
class ASTSelectQuery;
struct DatabaseAndTableWithAlias;
class Block;
class Join;
using JoinPtr = std::shared_ptr<Join>;
struct AnalyzedJoin
{
......@@ -30,18 +35,19 @@ struct AnalyzedJoin
private:
friend class SyntaxAnalyzer;
friend struct SyntaxAnalyzerResult;
friend class ExpressionAnalyzer;
friend class SelectQueryExpressionAnalyzer;
Names key_names_left;
Names key_names_right; /// Duplicating names are qualified.
ASTs key_asts_left;
ASTs key_asts_right;
bool with_using = true;
ASTTableJoin table_join;
bool join_use_nulls = false;
/// All columns which can be read from joined table. Duplicating names are qualified.
NamesAndTypesList columns_from_joined_table;
/// Columns will be added to block by JOIN. It's a subset of columns_from_joined_table with corrected Nullability
NamesAndTypesList columns_added_by_join;
/// Name -> original name. Names are the same as in columns_from_joined_table list.
std::unordered_map<String, String> original_names;
/// Original name -> name. Only ranamed columns.
......@@ -51,8 +57,8 @@ public:
void addUsingKey(const ASTPtr & ast);
void addOnKeys(ASTPtr & left_table_ast, ASTPtr & right_table_ast);
bool hasUsing() const { return with_using; }
bool hasOn() const { return !with_using; }
bool hasUsing() const { return table_join.using_expression_list != nullptr; }
bool hasOn() const { return !hasUsing(); }
NameSet getQualifiedColumnsSet() const;
NameSet getOriginalColumnsSet() const;
......@@ -60,6 +66,22 @@ public:
void deduplicateAndQualifyColumnNames(const NameSet & left_table_columns, const String & right_table_prefix);
size_t rightKeyInclusion(const String & name) const;
void appendRequiredColumns(const Block & sample, NameSet & required_columns) const;
void addJoinedColumn(const NameAndTypePair & joined_column);
void addJoinedColumnsAndCorrectNullability(Block & sample_block) const;
ASTPtr leftKeysList() const;
ASTPtr rightKeysList() const; /// For ON syntax only
Names requiredJoinedNames() const;
const Names & keyNamesLeft() const { return key_names_left; }
const NamesAndTypesList & columnsFromJoinedTable() const { return columns_from_joined_table; }
const NamesAndTypesList & columnsAddedByJoin() const { return columns_added_by_join; }
JoinPtr makeHashJoin(const Block & sample_block, const SizeLimits & size_limits_for_join) const;
static bool sameJoin(const AnalyzedJoin * x, const AnalyzedJoin * y);
};
struct ASTTableExpression;
......
......@@ -12,7 +12,6 @@
#include <Functions/IFunction.h>
#include <set>
#include <optional>
#include <DataTypes/DataTypeNullable.h>
namespace ProfileEvents
......@@ -45,7 +44,8 @@ Names ExpressionAction::getNeededColumns() const
res.insert(res.end(), array_joined_columns.begin(), array_joined_columns.end());
res.insert(res.end(), join_key_names_left.begin(), join_key_names_left.end());
if (join_params)
res.insert(res.end(), join_params->keyNamesLeft().begin(), join_params->keyNamesLeft().end());
for (const auto & column : projection)
res.push_back(column.first);
......@@ -159,20 +159,12 @@ ExpressionAction ExpressionAction::arrayJoin(const NameSet & array_joined_column
return a;
}
ExpressionAction ExpressionAction::ordinaryJoin(
const ASTTableJoin & join_params,
std::shared_ptr<const Join> join_,
const Names & join_key_names_left,
const Names & join_key_names_right,
const NamesAndTypesList & columns_added_by_join_)
ExpressionAction ExpressionAction::ordinaryJoin(std::shared_ptr<AnalyzedJoin> join_params, std::shared_ptr<const Join> hash_join)
{
ExpressionAction a;
a.type = JOIN;
a.join = std::move(join_);
a.join_kind = join_params.kind;
a.join_key_names_left = join_key_names_left;
a.join_key_names_right = join_key_names_right;
a.columns_added_by_join = columns_added_by_join_;
a.join_params = join_params;
a.join = hash_join;
return a;
}
......@@ -277,51 +269,7 @@ void ExpressionAction::prepare(Block & sample_block, const Settings & settings,
case JOIN:
{
bool is_null_used_as_default = settings.join_use_nulls;
bool right_or_full_join = isRightOrFull(join_kind);
bool left_or_full_join = isLeftOrFull(join_kind);
for (auto & col : sample_block)
{
/// Materialize column.
/// Column is not empty if it is constant, but after Join all constants will be materialized.
/// So, we need remove constants from header.
if (col.column)
col.column = nullptr;
bool make_nullable = is_null_used_as_default && right_or_full_join;
if (make_nullable && col.type->canBeInsideNullable())
col.type = makeNullable(col.type);
}
for (const auto & col : columns_added_by_join)
{
auto res_type = col.type;
bool make_nullable = is_null_used_as_default && left_or_full_join;
if (!make_nullable)
{
/// Keys from right table are usually not stored in Join, but copied from the left one.
/// So, if left key is nullable, let's make right key nullable too.
/// Note: for some join types it's not needed and, probably, may be removed.
/// Note: changing this code, take into account the implementation in Join.cpp.
auto it = std::find(join_key_names_right.begin(), join_key_names_right.end(), col.name);
if (it != join_key_names_right.end())
{
auto pos = it - join_key_names_right.begin();
const auto & left_key_name = join_key_names_left[pos];
make_nullable = sample_block.getByName(left_key_name).type->isNullable();
}
}
if (make_nullable && res_type->canBeInsideNullable())
res_type = makeNullable(res_type);
sample_block.insert(ColumnWithTypeAndName(nullptr, res_type, col.name));
}
join_params->addJoinedColumnsAndCorrectNullability(sample_block);
break;
}
......@@ -527,7 +475,7 @@ void ExpressionAction::execute(Block & block, bool dry_run) const
case JOIN:
{
join->joinBlock(block, join_key_names_left, columns_added_by_join);
join->joinBlock(block, *join_params);
break;
}
......@@ -645,9 +593,10 @@ std::string ExpressionAction::toString() const
case JOIN:
ss << "JOIN ";
for (NamesAndTypesList::const_iterator it = columns_added_by_join.begin(); it != columns_added_by_join.end(); ++it)
for (NamesAndTypesList::const_iterator it = join_params->columnsAddedByJoin().begin();
it != join_params->columnsAddedByJoin().end(); ++it)
{
if (it != columns_added_by_join.begin())
if (it != join_params->columnsAddedByJoin().begin())
ss << ", ";
ss << it->name;
}
......@@ -1220,7 +1169,7 @@ BlockInputStreamPtr ExpressionActions::createStreamWithNonJoinedDataIfFullOrRigh
for (const auto & action : actions)
if (action.join && isRightOrFull(action.join->getKind()))
return action.join->createStreamWithNonJoinedRows(
source_header, action.join_key_names_left, action.columns_added_by_join, max_block_size);
source_header, *action.join_params, max_block_size);
return {};
}
......@@ -1267,7 +1216,7 @@ UInt128 ExpressionAction::ActionHash::operator()(const ExpressionAction & action
hash.update(col);
break;
case JOIN:
for (const auto & col : action.columns_added_by_join)
for (const auto & col : action.join_params->columnsAddedByJoin())
hash.update(col.name);
break;
case PROJECT:
......@@ -1326,9 +1275,7 @@ bool ExpressionAction::operator==(const ExpressionAction & other) const
&& array_joined_columns == other.array_joined_columns
&& array_join_is_left == other.array_join_is_left
&& join == other.join
&& join_key_names_left == other.join_key_names_left
&& join_key_names_right == other.join_key_names_right
&& columns_added_by_join == other.columns_added_by_join
&& AnalyzedJoin::sameJoin(join_params.get(), other.join_params.get())
&& projection == other.projection
&& is_function_compiled == other.is_function_compiled;
}
......
......@@ -6,6 +6,7 @@
#include <Core/Settings.h>
#include <DataStreams/IBlockStream_fwd.h>
#include <Interpreters/Context.h>
#include <Interpreters/AnalyzedJoin.h>
#include <Common/SipHash.h>
#include "config_core.h"
#include <unordered_map>
......@@ -104,11 +105,8 @@ public:
bool unaligned_array_join = false;
/// For JOIN
std::shared_ptr<AnalyzedJoin> join_params = nullptr;
std::shared_ptr<const Join> join;
ASTTableJoin::Kind join_kind;
Names join_key_names_left;
Names join_key_names_right;
NamesAndTypesList columns_added_by_join;
/// For PROJECT.
NamesWithAliases projection;
......@@ -124,9 +122,7 @@ public:
static ExpressionAction project(const Names & projected_columns_);
static ExpressionAction addAliases(const NamesWithAliases & aliased_columns_);
static ExpressionAction arrayJoin(const NameSet & array_joined_columns, bool array_join_is_left, const Context & context);
static ExpressionAction ordinaryJoin(const ASTTableJoin & join_params, std::shared_ptr<const Join> join_,
const Names & join_key_names_left, const Names & join_key_names_right,
const NamesAndTypesList & columns_added_by_join_);
static ExpressionAction ordinaryJoin(std::shared_ptr<AnalyzedJoin> join_params, std::shared_ptr<const Join> hash_join);
/// Which columns necessary to perform this action.
Names getNeededColumns() const;
......
......@@ -29,7 +29,6 @@
#include <Interpreters/PredicateExpressionsOptimizer.h>
#include <Interpreters/ExternalDictionaries.h>
#include <Interpreters/Set.h>
#include <Interpreters/Join.h>
#include <AggregateFunctions/AggregateFunctionFactory.h>
#include <AggregateFunctions/parseAggregateFunctionParameters.h>
......@@ -134,14 +133,8 @@ void ExpressionAnalyzer::analyzeAggregation()
const ASTTablesInSelectQueryElement * join = select_query->join();
if (join)
{
const auto & table_join = join->table_join->as<ASTTableJoin &>();
if (table_join.using_expression_list)
getRootActions(table_join.using_expression_list, true, temp_actions);
if (table_join.on_expression)
for (const auto & key_ast : analyzedJoin().key_asts_left)
getRootActions(key_ast, true, temp_actions);
addJoinAction(table_join, temp_actions);
getRootActions(analyzedJoin().leftKeysList(), true, temp_actions);
addJoinAction(temp_actions);
}
}
......@@ -298,7 +291,8 @@ void SelectQueryExpressionAnalyzer::makeSetsForIndex(const ASTPtr & node)
{
NamesAndTypesList temp_columns = sourceColumns();
temp_columns.insert(temp_columns.end(), array_join_columns.begin(), array_join_columns.end());
temp_columns.insert(temp_columns.end(), columnsAddedByJoin().begin(), columnsAddedByJoin().end());
temp_columns.insert(temp_columns.end(),
analyzedJoin().columnsAddedByJoin().begin(), analyzedJoin().columnsAddedByJoin().end());
ExpressionActionsPtr temp_actions = std::make_shared<ExpressionActions>(temp_columns, context);
getRootActions(left_in_operand, true, temp_actions);
......@@ -412,22 +406,10 @@ bool SelectQueryExpressionAnalyzer::appendArrayJoin(ExpressionActionsChain & cha
return true;
}
static void appendRequiredColumns(
NameSet & required_columns, const Block & sample, const Names & key_names_right, const NamesAndTypesList & columns_added_by_join)
{
for (auto & column : key_names_right)
if (!sample.has(column))
required_columns.insert(column);
for (auto & column : columns_added_by_join)
if (!sample.has(column.name))
required_columns.insert(column.name);
}
/// It's possible to set nullptr as join for only_types mode
void ExpressionAnalyzer::addJoinAction(const ASTTableJoin & join_params, ExpressionActionsPtr & actions, JoinPtr join) const
void ExpressionAnalyzer::addJoinAction(ExpressionActionsPtr & actions, JoinPtr join) const
{
actions->add(ExpressionAction::ordinaryJoin(join_params, std::move(join), analyzedJoin().key_names_left, analyzedJoin().key_names_right, columnsAddedByJoin()));
actions->add(ExpressionAction::ordinaryJoin(syntax->analyzed_join, join));
}
bool SelectQueryExpressionAnalyzer::appendJoin(ExpressionActionsChain & chain, bool only_types)
......@@ -438,16 +420,11 @@ bool SelectQueryExpressionAnalyzer::appendJoin(ExpressionActionsChain & chain, b
SubqueryForSet & subquery_for_set = getSubqueryForJoin(*ast_join);
ASTPtr left_keys_list = std::make_shared<ASTExpressionList>();
left_keys_list->children = analyzedJoin().key_asts_left;
initChain(chain, sourceColumns());
ExpressionActionsChain::Step & step = chain.steps.back();
auto & join_params = ast_join->table_join->as<ASTTableJoin &>();
getRootActions(left_keys_list, only_types, step.actions);
addJoinAction(join_params, step.actions, subquery_for_set.join);
getRootActions(analyzedJoin().leftKeysList(), only_types, step.actions);
addJoinAction(step.actions, subquery_for_set.join);
return true;
}
......@@ -524,11 +501,9 @@ void SelectQueryExpressionAnalyzer::makeHashJoin(const ASTTablesInSelectQueryEle
Names action_columns = joined_block_actions->getRequiredColumns();
NameSet required_columns(action_columns.begin(), action_columns.end());
auto & analyzed_join = analyzedJoin();
appendRequiredColumns(
required_columns, joined_block_actions->getSampleBlock(), analyzed_join.key_names_right, columnsAddedByJoin());
analyzedJoin().appendRequiredColumns(joined_block_actions->getSampleBlock(), required_columns);
auto original_map = analyzed_join.getOriginalColumnsMap(required_columns);
auto original_map = analyzedJoin().getOriginalColumnsMap(required_columns);
Names original_columns;
for (auto & pr : original_map)
original_columns.push_back(pr.second);
......@@ -542,29 +517,16 @@ void SelectQueryExpressionAnalyzer::makeHashJoin(const ASTTablesInSelectQueryEle
joined_block_actions->execute(sample_block);
/// TODO You do not need to set this up when JOIN is only needed on remote servers.
auto & join_params = join_element.table_join->as<ASTTableJoin &>();
subquery_for_set.join = std::make_shared<Join>(analyzedJoin().key_names_right, settings.join_use_nulls,
settings.size_limits_for_join, join_params.kind, join_params.strictness);
subquery_for_set.join->setSampleBlock(sample_block);
subquery_for_set.join = analyzedJoin().makeHashJoin(sample_block, settings.size_limits_for_join);
subquery_for_set.joined_block_actions = joined_block_actions;
}
ExpressionActionsPtr SelectQueryExpressionAnalyzer::createJoinedBlockActions() const
{
/// Create custom expression list with join keys from right table.
ASTPtr expression_list = std::make_shared<ASTExpressionList>();
ASTs & children = expression_list->children;
if (analyzedJoin().hasOn())
for (const auto & join_right_key : analyzedJoin().key_asts_right)
children.emplace_back(join_right_key);
NameSet required_columns_set(analyzedJoin().key_names_right.begin(), analyzedJoin().key_names_right.end());
for (const auto & joined_column : columnsAddedByJoin())
required_columns_set.insert(joined_column.name);
Names required_columns(required_columns_set.begin(), required_columns_set.end());
ASTPtr expression_list = analyzedJoin().rightKeysList();
Names required_columns = analyzedJoin().requiredJoinedNames();
auto syntax_result = SyntaxAnalyzer(context).analyze(expression_list, analyzedJoin().columns_from_joined_table, required_columns);
auto syntax_result = SyntaxAnalyzer(context).analyze(expression_list, analyzedJoin().columnsFromJoinedTable(), required_columns);
return ExpressionAnalyzer(expression_list, syntax_result, context).getActions(true, false);
}
......
......@@ -121,9 +121,8 @@ protected:
SyntaxAnalyzerResultPtr syntax;
const StoragePtr & storage() const { return syntax->storage; } /// The main table in FROM clause, if exists.
const AnalyzedJoin & analyzedJoin() const { return syntax->analyzed_join; }
const AnalyzedJoin & analyzedJoin() const { return *syntax->analyzed_join; }
const NamesAndTypesList & sourceColumns() const { return syntax->required_source_columns; }
const NamesAndTypesList & columnsAddedByJoin() const { return syntax->columns_added_by_join; }
const std::vector<const ASTFunction *> & aggregates() const { return syntax->aggregates; }
/// Find global subqueries in the GLOBAL IN/JOIN sections. Fills in external_tables.
......@@ -131,7 +130,7 @@ protected:
void addMultipleArrayJoinAction(ExpressionActionsPtr & actions, bool is_left) const;
void addJoinAction(const ASTTableJoin & join_params, ExpressionActionsPtr & actions, JoinPtr join = {}) const;
void addJoinAction(ExpressionActionsPtr & actions, JoinPtr join = {}) const;
void getRootActions(const ASTPtr & ast, bool no_subqueries, ExpressionActionsPtr & actions, bool only_consts = false);
......
......@@ -10,6 +10,7 @@
#include <DataTypes/DataTypeNullable.h>
#include <Interpreters/Join.h>
#include <Interpreters/AnalyzedJoin.h>
#include <Interpreters/joinDispatch.h>
#include <Interpreters/NullableUtils.h>
......@@ -1048,8 +1049,11 @@ void Join::joinGet(Block & block, const String & column_name) const
}
void Join::joinBlock(Block & block, const Names & key_names_left, const NamesAndTypesList & columns_added_by_join) const
void Join::joinBlock(Block & block, const AnalyzedJoin & join_params) const
{
const Names & key_names_left = join_params.keyNamesLeft();
const NamesAndTypesList & columns_added_by_join = join_params.columnsAddedByJoin();
std::shared_lock lock(rwlock);
checkTypesOfKeys(block, key_names_left, sample_block_with_keys);
......@@ -1457,10 +1461,11 @@ private:
};
BlockInputStreamPtr Join::createStreamWithNonJoinedRows(const Block & left_sample_block, const Names & key_names_left,
const NamesAndTypesList & columns_added_by_join, UInt64 max_block_size) const
BlockInputStreamPtr Join::createStreamWithNonJoinedRows(const Block & left_sample_block, const AnalyzedJoin & join_params,
UInt64 max_block_size) const
{
return std::make_shared<NonJoinedBlockInputStream>(*this, left_sample_block, key_names_left, columns_added_by_join, max_block_size);
return std::make_shared<NonJoinedBlockInputStream>(*this, left_sample_block,
join_params.keyNamesLeft(), join_params.columnsAddedByJoin(), max_block_size);
}
......
......@@ -26,6 +26,8 @@
namespace DB
{
struct AnalyzedJoin;
namespace JoinStuff
{
......@@ -141,7 +143,7 @@ public:
/** Join data from the map (that was previously built by calls to insertFromBlock) to the block with data from "left" table.
* Could be called from different threads in parallel.
*/
void joinBlock(Block & block, const Names & key_names_left, const NamesAndTypesList & columns_added_by_join) const;
void joinBlock(Block & block, const AnalyzedJoin & join_params) const;
/// Infer the return type for joinGet function
DataTypePtr joinGetReturnType(const String & column_name) const;
......@@ -161,8 +163,8 @@ public:
* Use only after all calls to joinBlock was done.
* left_sample_block is passed without account of 'use_nulls' setting (columns will be converted to Nullable inside).
*/
BlockInputStreamPtr createStreamWithNonJoinedRows(const Block & left_sample_block, const Names & key_names_left,
const NamesAndTypesList & columns_added_by_join, UInt64 max_block_size) const;
BlockInputStreamPtr createStreamWithNonJoinedRows(const Block & left_sample_block, const AnalyzedJoin & join_params,
UInt64 max_block_size) const;
/// Number of keys in all built JOIN maps.
size_t getTotalRowCount() const;
......
......@@ -489,14 +489,13 @@ void getArrayJoinedColumns(ASTPtr & query, SyntaxAnalyzerResult & result, const
}
}
void setJoinStrictness(ASTSelectQuery & select_query, JoinStrictness join_default_strictness, ASTTableJoin::Kind & join_kind)
void setJoinStrictness(ASTSelectQuery & select_query, JoinStrictness join_default_strictness, ASTTableJoin & out_table_join)
{
const ASTTablesInSelectQueryElement * node = select_query.join();
if (!node)
return;
auto & table_join = const_cast<ASTTablesInSelectQueryElement *>(node)->table_join->as<ASTTableJoin &>();
join_kind = table_join.kind;
if (table_join.strictness == ASTTableJoin::Strictness::Unspecified &&
table_join.kind != ASTTableJoin::Kind::Cross)
......@@ -509,6 +508,8 @@ void setJoinStrictness(ASTSelectQuery & select_query, JoinStrictness join_defaul
throw Exception("Expected ANY or ALL in JOIN section, because setting (join_default_strictness) is empty",
DB::ErrorCodes::EXPECTED_ALL_OR_ANY);
}
out_table_join = table_join;
}
/// Find the columns that are obtained by JOIN.
......@@ -609,8 +610,7 @@ std::vector<const ASTFunction *> getAggregates(const ASTPtr & query)
/// Calculate which columns are required to execute the expression.
/// Then, delete all other columns from the list of available columns.
/// After execution, columns will only contain the list of columns needed to read from the table.
void SyntaxAnalyzerResult::collectUsedColumns(const ASTPtr & query, const NamesAndTypesList & additional_source_columns,
bool make_joined_columns_nullable)
void SyntaxAnalyzerResult::collectUsedColumns(const ASTPtr & query, const NamesAndTypesList & additional_source_columns)
{
/// We caclulate required_source_columns with source_columns modifications and swap them on exit
required_source_columns = source_columns;
......@@ -637,8 +637,7 @@ void SyntaxAnalyzerResult::collectUsedColumns(const ASTPtr & query, const NamesA
avaliable_columns.insert(name.name);
/// Add columns obtained by JOIN (if needed).
columns_added_by_join.clear();
for (const auto & joined_column : analyzed_join.columns_from_joined_table)
for (const auto & joined_column : analyzed_join->columnsFromJoinedTable())
{
auto & name = joined_column.name;
if (avaliable_columns.count(name))
......@@ -647,16 +646,9 @@ void SyntaxAnalyzerResult::collectUsedColumns(const ASTPtr & query, const NamesA
if (required.count(name))
{
/// Optimisation: do not add columns needed only in JOIN ON section.
if (columns_context.nameInclusion(name) > analyzed_join.rightKeyInclusion(name))
{
if (make_joined_columns_nullable)
{
auto type = joined_column.type->canBeInsideNullable() ? makeNullable(joined_column.type) : joined_column.type;
columns_added_by_join.emplace_back(NameAndTypePair(joined_column.name, std::move(type)));
}
else
columns_added_by_join.push_back(joined_column);
}
if (columns_context.nameInclusion(name) > analyzed_join->rightKeyInclusion(name))
analyzed_join->addJoinedColumn(joined_column);
required.erase(name);
}
}
......@@ -766,7 +758,7 @@ void SyntaxAnalyzerResult::collectUsedColumns(const ASTPtr & query, const NamesA
if (columns_context.has_table_join)
{
ss << ", joined columns:";
for (const auto & column : analyzed_join.columns_from_joined_table)
for (const auto & column : analyzed_join->columnsFromJoinedTable())
ss << " '" << column.name << "'";
}
......@@ -798,15 +790,17 @@ SyntaxAnalyzerResultPtr SyntaxAnalyzer::analyze(
storage = context.tryGetTable(db_and_table->database, db_and_table->table);
}
const auto & settings = context.getSettingsRef();
SyntaxAnalyzerResult result;
result.storage = storage;
result.source_columns = source_columns_;
result.analyzed_join = std::make_shared<AnalyzedJoin>(); /// TODO: move to select_query logic
result.analyzed_join->join_use_nulls = settings.join_use_nulls;
collectSourceColumns(select_query, result.storage, result.source_columns);
NameSet source_columns_set = removeDuplicateColumns(result.source_columns);
const auto & settings = context.getSettingsRef();
Names source_columns_list;
source_columns_list.reserve(result.source_columns.size());
for (const auto & type_name : result.source_columns)
......@@ -831,13 +825,13 @@ SyntaxAnalyzerResultPtr SyntaxAnalyzer::analyze(
const auto & joined_expression = node->table_expression->as<ASTTableExpression &>();
DatabaseAndTableWithAlias table(joined_expression, context.getCurrentDatabase());
result.analyzed_join.columns_from_joined_table = getNamesAndTypeListFromTableExpression(joined_expression, context);
result.analyzed_join.deduplicateAndQualifyColumnNames(source_columns_set, table.getQualifiedNamePrefix());
result.analyzed_join->columns_from_joined_table = getNamesAndTypeListFromTableExpression(joined_expression, context);
result.analyzed_join->deduplicateAndQualifyColumnNames(source_columns_set, table.getQualifiedNamePrefix());
}
translateQualifiedNames(query, *select_query, context,
(storage ? storage->getColumns().getOrdinary().getNames() : source_columns_list), source_columns_set,
result.analyzed_join.getQualifiedColumnsSet());
result.analyzed_join->getQualifiedColumnsSet());
/// Rewrite IN and/or JOIN for distributed tables according to distributed_product_mode setting.
InJoinSubqueriesPreprocessor(context).visit(query);
......@@ -872,7 +866,6 @@ SyntaxAnalyzerResultPtr SyntaxAnalyzer::analyze(
/// Optimize if with constant condition after constants was substituted instead of scalar subqueries.
OptimizeIfWithConstantConditionVisitor(result.aliases).visit(query);
bool make_joined_columns_nullable = false;
if (select_query)
{
/// GROUP BY injective function elimination.
......@@ -893,15 +886,12 @@ SyntaxAnalyzerResultPtr SyntaxAnalyzer::analyze(
/// Push the predicate expression down to the subqueries.
result.rewrite_subqueries = PredicateExpressionsOptimizer(select_query, settings, context).optimize();
ASTTableJoin::Kind join_kind = ASTTableJoin::Kind::Comma;
setJoinStrictness(*select_query, settings.join_default_strictness, join_kind);
make_joined_columns_nullable = settings.join_use_nulls && isLeftOrFull(join_kind);
collectJoinedColumns(result.analyzed_join, *select_query, source_columns_set, result.aliases);
setJoinStrictness(*select_query, settings.join_default_strictness, result.analyzed_join->table_join);
collectJoinedColumns(*result.analyzed_join, *select_query, source_columns_set, result.aliases);
}
result.aggregates = getAggregates(query);
result.collectUsedColumns(query, additional_source_columns, make_joined_columns_nullable);
result.collectUsedColumns(query, additional_source_columns);
return std::make_shared<const SyntaxAnalyzerResult>(result);
}
......
......@@ -15,13 +15,11 @@ class ASTFunction;
struct SyntaxAnalyzerResult
{
StoragePtr storage;
AnalyzedJoin analyzed_join;
std::shared_ptr<AnalyzedJoin> analyzed_join;
NamesAndTypesList source_columns;
/// Set of columns that are enough to read from the table to evaluate the expression. It does not include joined columns.
NamesAndTypesList required_source_columns;
/// Columns will be added to block by JOIN. It's a subset of analyzed_join.columns_from_joined_table with corrected Nullability
NamesAndTypesList columns_added_by_join;
Aliases aliases;
std::vector<const ASTFunction *> aggregates;
......@@ -42,7 +40,7 @@ struct SyntaxAnalyzerResult
/// Predicate optimizer overrides the sub queries
bool rewrite_subqueries = false;
void collectUsedColumns(const ASTPtr & query, const NamesAndTypesList & additional_source_columns, bool make_joined_columns_nullable);
void collectUsedColumns(const ASTPtr & query, const NamesAndTypesList & additional_source_columns);
Names requiredSourceColumns() const { return required_source_columns.getNames(); }
};
......
......@@ -287,12 +287,6 @@ bool PipelineExecutor::prepareProcessor(UInt64 pid, Stack & children, Stack & pa
switch (node.last_processor_status)
{
case IProcessor::Status::NeedData:
{
add_neighbours_to_prepare_queue();
try_release_ownership();
break;
}
case IProcessor::Status::PortFull:
{
add_neighbours_to_prepare_queue();
......
......@@ -13,6 +13,8 @@
#include <Columns/ColumnString.h>
#include <Columns/ColumnNullable.h>
#include <Interpreters/castColumn.h>
#include <algorithm>
namespace DB
{
......@@ -27,34 +29,28 @@ namespace DB
extern const int CANNOT_INSERT_NULL_IN_ORDINARY_COLUMN;
extern const int THERE_IS_NO_COLUMN;
}
const std::unordered_map<arrow::Type::type, std::shared_ptr<IDataType>> arrow_type_to_internal_type = {
//{arrow::Type::DECIMAL, std::make_shared<DataTypeDecimal>()},
{arrow::Type::UINT8, std::make_shared<DataTypeUInt8>()},
{arrow::Type::INT8, std::make_shared<DataTypeInt8>()},
{arrow::Type::UINT16, std::make_shared<DataTypeUInt16>()},
{arrow::Type::INT16, std::make_shared<DataTypeInt16>()},
{arrow::Type::UINT32, std::make_shared<DataTypeUInt32>()},
{arrow::Type::INT32, std::make_shared<DataTypeInt32>()},
{arrow::Type::UINT64, std::make_shared<DataTypeUInt64>()},
{arrow::Type::INT64, std::make_shared<DataTypeInt64>()},
{arrow::Type::HALF_FLOAT, std::make_shared<DataTypeFloat32>()},
{arrow::Type::FLOAT, std::make_shared<DataTypeFloat32>()},
{arrow::Type::DOUBLE, std::make_shared<DataTypeFloat64>()},
{arrow::Type::BOOL, std::make_shared<DataTypeUInt8>()},
//{arrow::Type::DATE32, std::make_shared<DataTypeDate>()},
{arrow::Type::DATE32, std::make_shared<DataTypeDate>()},
//{arrow::Type::DATE32, std::make_shared<DataTypeDateTime>()},
{arrow::Type::DATE64, std::make_shared<DataTypeDateTime>()},
{arrow::Type::TIMESTAMP, std::make_shared<DataTypeDateTime>()},
//{arrow::Type::TIME32, std::make_shared<DataTypeDateTime>()},
{arrow::Type::STRING, std::make_shared<DataTypeString>()},
{arrow::Type::BINARY, std::make_shared<DataTypeString>()},
//{arrow::Type::FIXED_SIZE_BINARY, std::make_shared<DataTypeString>()},
//{arrow::Type::UUID, std::make_shared<DataTypeString>()},
static const std::initializer_list<std::pair<arrow::Type::type, const char *>> arrow_type_to_internal_type =
{
{arrow::Type::UINT8, "UInt8"},
{arrow::Type::INT8, "Int8"},
{arrow::Type::UINT16, "UInt16"},
{arrow::Type::INT16, "Int16"},
{arrow::Type::UINT32, "UInt32"},
{arrow::Type::INT32, "Int32"},
{arrow::Type::UINT64, "UInt64"},
{arrow::Type::INT64, "Int64"},
{arrow::Type::HALF_FLOAT, "Float32"},
{arrow::Type::FLOAT, "Float32"},
{arrow::Type::DOUBLE, "Float64"},
{arrow::Type::BOOL, "UInt8"},
{arrow::Type::DATE32, "Date"},
{arrow::Type::DATE64, "DateTime"},
{arrow::Type::TIMESTAMP, "DateTime"},
{arrow::Type::STRING, "String"},
{arrow::Type::BINARY, "String"},
// TODO: add other types that are convertable to internal ones:
// 0. ENUM?
......@@ -253,7 +249,7 @@ namespace DB
void ArrowColumnToCHColumn::arrowTableToCHChunk(Chunk &res, std::shared_ptr<arrow::Table> &table,
arrow::Status &read_status, const Block &header,
int &row_group_current, const Context &context, std::string format_name)
{
{
Columns columns_list;
UInt64 num_rows = 0;
......@@ -308,15 +304,16 @@ namespace DB
const auto decimal_type = static_cast<arrow::DecimalType *>(arrow_column->type().get());
internal_nested_type = std::make_shared<DataTypeDecimal<Decimal128>>(decimal_type->precision(),
decimal_type->scale());
} else if (arrow_type_to_internal_type.find(arrow_type) != arrow_type_to_internal_type.end())
}
else if (auto internal_type_it = std::find_if(arrow_type_to_internal_type.begin(), arrow_type_to_internal_type.end(),
[=](auto && elem) { return elem.first == arrow_type; });
internal_type_it != arrow_type_to_internal_type.end())
{
internal_nested_type = arrow_type_to_internal_type.at(arrow_type);
internal_nested_type = DataTypeFactory::instance().get(internal_type_it->second);
}
else
{
throw Exception
{
"The type \"" + arrow_column->type()->name() + "\" of an input column \"" + arrow_column->name()
throw Exception{"The type \"" + arrow_column->type()->name() + "\" of an input column \"" + arrow_column->name()
+ "\" is not supported for conversion from a " + format_name + " data format",
ErrorCodes::CANNOT_CONVERT_TYPE};
}
......
......@@ -21,34 +21,26 @@ MergedColumnOnlyOutputStream::MergedColumnOnlyOutputStream(
header(header_), sync(sync_), skip_offsets(skip_offsets_),
already_written_offset_columns(already_written_offset_columns_)
{
}
serialization_states.reserve(header.columns());
WrittenOffsetColumns tmp_offset_columns;
IDataType::SerializeBinaryBulkSettings settings;
void MergedColumnOnlyOutputStream::write(const Block & block)
{
if (!initialized)
for (const auto & column_name : header.getNames())
{
column_streams.clear();
serialization_states.clear();
serialization_states.reserve(header.columns());
WrittenOffsetColumns tmp_offset_columns;
IDataType::SerializeBinaryBulkSettings settings;
for (const auto & column_name : header.getNames())
{
const auto & col = block.getByName(column_name);
const auto columns = storage.getColumns();
addStreams(part_path, col.name, *col.type, columns.getCodecOrDefault(col.name, codec), 0, skip_offsets);
serialization_states.emplace_back(nullptr);
settings.getter = createStreamGetter(col.name, tmp_offset_columns, false);
col.type->serializeBinaryBulkStatePrefix(settings, serialization_states.back());
}
initSkipIndices();
initialized = true;
const auto & col = header.getByName(column_name);
const auto columns = storage.getColumns();
addStreams(part_path, col.name, *col.type, columns.getCodecOrDefault(col.name, codec), 0, skip_offsets);
serialization_states.emplace_back(nullptr);
settings.getter = createStreamGetter(col.name, tmp_offset_columns, false);
col.type->serializeBinaryBulkStatePrefix(settings, serialization_states.back());
}
initSkipIndices();
}
void MergedColumnOnlyOutputStream::write(const Block & block)
{
std::set<String> skip_indexes_column_names_set;
for (const auto & index : skip_indices)
std::copy(index->columns.cbegin(), index->columns.cend(),
......@@ -68,7 +60,6 @@ void MergedColumnOnlyOutputStream::write(const Block & block)
if (!rows)
return;
size_t new_index_offset = 0;
size_t new_current_mark = 0;
WrittenOffsetColumns offset_columns = already_written_offset_columns;
......@@ -106,7 +97,8 @@ MergeTreeData::DataPart::Checksums MergedColumnOnlyOutputStream::writeSuffixAndG
serialize_settings.getter = createStreamGetter(column.name, already_written_offset_columns, skip_offsets);
column.type->serializeBinaryBulkStateSuffix(serialize_settings, serialization_states[i]);
if (with_final_mark)
/// We wrote at least one row
if (with_final_mark && (index_offset != 0 || current_mark != 0))
writeFinalMark(column.name, column.type, offset_columns, skip_offsets, serialize_settings.path);
}
......@@ -125,7 +117,6 @@ MergeTreeData::DataPart::Checksums MergedColumnOnlyOutputStream::writeSuffixAndG
column_streams.clear();
serialization_states.clear();
initialized = false;
return checksums;
}
......
......@@ -28,7 +28,6 @@ public:
private:
Block header;
bool initialized = false;
bool sync;
bool skip_offsets;
......
......@@ -34,7 +34,8 @@ set the following environment variables:
### Running with runner script
The only requirement is fresh docker configured docker.
The only requirement is fresh configured docker and
docker pull yandex/clickhouse-integration-tests-runner
Notes:
* If you want to run integration tests without `sudo` you have to add your user to docker group `sudo usermod -aG docker $USER`. [More information](https://docs.docker.com/install/linux/linux-postinstall/) about docker configuration.
......
......@@ -723,7 +723,8 @@ class ClickHouseInstance:
os.mkdir(config_d_dir)
os.mkdir(users_d_dir)
shutil.copy(p.join(HELPERS_DIR, 'common_instance_config.xml'), config_d_dir)
# The file is named with 0_ prefix to be processed before other configuration overloads.
shutil.copy(p.join(HELPERS_DIR, '0_common_instance_config.xml'), config_d_dir)
# Generate and write macros file
macros = self.macros.copy()
......
<?xml version="1.0"?>
<yandex>
<logger>
<level>trace</level>
<log>/var/log/clickhouse-server/clickhouse-server.log</log>
<errorlog>/var/log/clickhouse-server/clickhouse-server.err.log</errorlog>
<size>1000M</size>
<count>10</count>
</logger>
<tcp_port>9000</tcp_port>
<listen_host>127.0.0.1</listen_host>
<openSSL>
<client>
<cacheSessions>true</cacheSessions>
<verificationMode>none</verificationMode>
<invalidCertificateHandler>
<name>AcceptCertificateHandler</name>
</invalidCertificateHandler>
</client>
</openSSL>
<max_concurrent_queries>500</max_concurrent_queries>
<mark_cache_size>5368709120</mark_cache_size>
<path>./clickhouse/</path>
<users_config>users.xml</users_config>
<dictionaries_config>/etc/clickhouse-server/config.d/*.xml</dictionaries_config>
</yandex>
......@@ -8,7 +8,7 @@
<user>default</user>
<password></password>
<db>test</db>
<table>small_dict_source</table>
<table>elements</table>
</clickhouse>
</source>
<lifetime>5</lifetime>
......
import pytest
import os
from helpers.cluster import ClickHouseCluster
from helpers.test_tools import assert_eq_with_retry
SCRIPT_DIR = os.path.dirname(os.path.realpath(__file__))
DICTIONARY_FILES = ['configs/dictionaries/dep_x.xml', 'configs/dictionaries/dep_y.xml', 'configs/dictionaries/dep_z.xml']
cluster = ClickHouseCluster(__file__, base_configs_dir=os.path.join(SCRIPT_DIR, 'configs'))
instance = cluster.add_instance('instance', main_configs=DICTIONARY_FILES)
@pytest.fixture(scope="module")
def started_cluster():
try:
cluster.start()
instance.query('''
CREATE DATABASE IF NOT EXISTS dict ENGINE=Dictionary;
CREATE DATABASE IF NOT EXISTS test;
DROP TABLE IF EXISTS test.elements;
CREATE TABLE test.elements (id UInt64, a String, b Int32, c Float64) ENGINE=Log;
INSERT INTO test.elements VALUES (0, 'water', 10, 1), (1, 'air', 40, 0.01), (2, 'earth', 100, 1.7);
''')
yield cluster
finally:
cluster.shutdown()
def get_status(dictionary_name):
return instance.query("SELECT status FROM system.dictionaries WHERE name='" + dictionary_name + "'").rstrip("\n")
def test_get_data(started_cluster):
query = instance.query
# dictionaries_lazy_load == false, so these dictionary are not loaded.
assert get_status('dep_x') == 'NOT_LOADED'
assert get_status('dep_y') == 'NOT_LOADED'
assert get_status('dep_z') == 'NOT_LOADED'
# Dictionary 'dep_x' depends on 'dep_z', which depends on 'dep_y'.
# So they all should be loaded at once.
assert query("SELECT dictGetString('dep_x', 'a', toUInt64(1))") == "air\n"
assert get_status('dep_x') == 'LOADED'
assert get_status('dep_y') == 'LOADED'
assert get_status('dep_z') == 'LOADED'
# Other dictionaries should work too.
assert query("SELECT dictGetString('dep_y', 'a', toUInt64(1))") == "air\n"
assert query("SELECT dictGetString('dep_z', 'a', toUInt64(1))") == "air\n"
assert query("SELECT dictGetString('dep_x', 'a', toUInt64(3))") == "XX\n"
assert query("SELECT dictGetString('dep_y', 'a', toUInt64(3))") == "YY\n"
assert query("SELECT dictGetString('dep_z', 'a', toUInt64(3))") == "ZZ\n"
# Update the source table.
query("INSERT INTO test.elements VALUES (3, 'fire', 30, 8)")
# Wait for dictionaries to be reloaded.
assert_eq_with_retry(instance, "SELECT dictHas('dep_y', toUInt64(3))", "1", sleep_time = 2, retry_count = 10)
assert query("SELECT dictGetString('dep_x', 'a', toUInt64(3))") == "XX\n"
assert query("SELECT dictGetString('dep_y', 'a', toUInt64(3))") == "fire\n"
assert query("SELECT dictGetString('dep_z', 'a', toUInt64(3))") == "ZZ\n"
# dep_x and dep_z are updated only when there `intDiv(count(), 4)` is changed.
query("INSERT INTO test.elements VALUES (4, 'ether', 404, 0.001)")
assert_eq_with_retry(instance, "SELECT dictHas('dep_x', toUInt64(4))", "1", sleep_time = 2, retry_count = 10)
assert query("SELECT dictGetString('dep_x', 'a', toUInt64(3))") == "fire\n"
assert query("SELECT dictGetString('dep_y', 'a', toUInt64(3))") == "fire\n"
assert query("SELECT dictGetString('dep_z', 'a', toUInt64(3))") == "fire\n"
assert query("SELECT dictGetString('dep_x', 'a', toUInt64(4))") == "ether\n"
assert query("SELECT dictGetString('dep_y', 'a', toUInt64(4))") == "ether\n"
assert query("SELECT dictGetString('dep_z', 'a', toUInt64(4))") == "ether\n"
<yandex>
<dictionary>
<name>cache</name>
<source>
<clickhouse>
<host>localhost</host>
<port>9000</port>
<user>default</user>
<password></password>
<db>test</db>
<table>source</table>
</clickhouse>
</source>
<lifetime>0</lifetime>
<layout>
<cache><size_in_cells>128</size_in_cells></cache>
</layout>
<structure>
<id>
<name>id</name>
</id>
<attribute>
<name>UInt8_</name>
<type>UInt8</type>
<null_value>1</null_value>
</attribute>
<attribute>
<name>UInt16_</name>
<type>UInt16</type>
<null_value>1</null_value>
</attribute>
<attribute>
<name>UInt32_</name>
<type>UInt32</type>
<null_value>1</null_value>
</attribute>
<attribute>
<name>UInt64_</name>
<type>UInt64</type>
<null_value></null_value>
</attribute>
<attribute>
<name>Int8_</name>
<type>Int8</type>
<null_value>-1</null_value>
</attribute>
<attribute>
<name>Int16_</name>
<type>Int16</type>
<null_value>-1</null_value>
</attribute>
<attribute>
<name>Int32_</name>
<type>Int32</type>
<null_value>-1</null_value>
</attribute>
<attribute>
<name>Int64_</name>
<type>Int64</type>
<null_value>-1</null_value>
</attribute>
<attribute>
<name>Float32_</name>
<type>Float32</type>
<null_value>2.71828</null_value>
</attribute>
<attribute>
<name>Float64_</name>
<type>Float64</type>
<null_value>2.71828</null_value>
</attribute>
<attribute>
<name>String_</name>
<type>String</type>
<null_value>implicit-default</null_value>
</attribute>
<attribute>
<name>Date_</name>
<type>Date</type>
<null_value>2015-11-25</null_value>
</attribute>
<attribute>
<name>DateTime_</name>
<type>DateTime</type>
<null_value></null_value>
</attribute>
<attribute>
<name>Parent</name>
<type>UInt64</type>
<hierarchical>true</hierarchical>
<null_value>0</null_value>
</attribute>
</structure>
</dictionary>
</yandex>
<?xml version="1.0"?>
<yandex>
<profiles>
<default>
</default>
</profiles>
<users>
<default>
<password></password>
<networks incl="networks" replace="replace">
<ip>::/0</ip>
</networks>
<profile>default</profile>
<quota>default</quota>
</default>
</users>
<quotas>
<default>
</default>
</quotas>
</yandex>
import pytest
import os
from helpers.cluster import ClickHouseCluster
from helpers.test_tools import TSV, assert_eq_with_retry
SCRIPT_DIR = os.path.dirname(os.path.realpath(__file__))
DICTIONARY_FILES = ['configs/dictionaries/cache.xml']
cluster = ClickHouseCluster(__file__, base_configs_dir=os.path.join(SCRIPT_DIR, 'configs'))
instance = cluster.add_instance('instance', main_configs=DICTIONARY_FILES)
@pytest.fixture(scope="module")
def started_cluster():
try:
cluster.start()
instance.query('''
CREATE DATABASE IF NOT EXISTS test;
DROP TABLE IF EXISTS test.source;
CREATE TABLE test.source (id UInt64, key0 UInt8, key0_str String, key1 UInt8,
StartDate Date, EndDate Date,
UInt8_ UInt8, UInt16_ UInt16, UInt32_ UInt32, UInt64_ UInt64,
Int8_ Int8, Int16_ Int16, Int32_ Int32, Int64_ Int64,
Float32_ Float32, Float64_ Float64,
String_ String,
Date_ Date, DateTime_ DateTime, Parent UInt64) ENGINE=Log;
''')
yield cluster
finally:
cluster.shutdown()
def test_null_value(started_cluster):
query = instance.query
assert query("select dictGetUInt8('cache', 'UInt8_', toUInt64(12121212))") == "1\n"
assert query("select dictGetString('cache', 'String_', toUInt64(12121212))") == "implicit-default\n"
assert query("select dictGetDate('cache', 'Date_', toUInt64(12121212))") == "2015-11-25\n"
# Check, that empty null_value interprets as default value
assert query("select dictGetUInt64('cache', 'UInt64_', toUInt64(12121212))") == "0\n"
assert query("select dictGetDateTime('cache', 'DateTime_', toUInt64(12121212))") == "0000-00-00 00:00:00\n"
<?xml version="1.0"?>
<yandex>
<logger>
<level>trace</level>
<log>/var/log/clickhouse-server/clickhouse-server.log</log>
<errorlog>/var/log/clickhouse-server/clickhouse-server.err.log</errorlog>
<size>1000M</size>
<count>10</count>
</logger>
<tcp_port>9000</tcp_port>
<listen_host>127.0.0.1</listen_host>
<openSSL>
<client>
<cacheSessions>true</cacheSessions>
<verificationMode>none</verificationMode>
<invalidCertificateHandler>
<name>AcceptCertificateHandler</name>
</invalidCertificateHandler>
</client>
</openSSL>
<max_concurrent_queries>500</max_concurrent_queries>
<mark_cache_size>5368709120</mark_cache_size>
<path>./clickhouse/</path>
<users_config>users.xml</users_config>
<dictionaries_config>/etc/clickhouse-server/config.d/*.xml</dictionaries_config>
</yandex>
<?xml version="1.0"?>
<yandex>
<profiles>
<default>
</default>
</profiles>
<users>
<default>
<password></password>
<networks incl="networks" replace="replace">
<ip>::/0</ip>
</networks>
<profile>default</profile>
<quota>default</quota>
</default>
</users>
<quotas>
<default>
</default>
</quotas>
</yandex>
......@@ -12,13 +12,6 @@ types = [
'Date', 'DateTime'
]
explicit_defaults = [
'42', '42', '42', '42',
'-42', '-42', '-42', '-42',
'1.5', '1.6',
"'explicit-default'",
"'2015-01-01'", "'2015-01-01 00:00:00'"
]
implicit_defaults = [
'1', '1', '1', '',
......@@ -182,9 +175,6 @@ def generate_dictionaries(path, structure):
file_names = []
# Add ready dictionaries.
file_names.extend(glob.glob(os.path.join(path, "*dictionary_preset*")))
# Generate dictionaries.
for (name, key_idx, has_parent), (source, layout) in zip(structure, sources_and_layouts):
filename = os.path.join(path, 'dictionary_%s.xml' % name)
......
import pytest
import os
from helpers.cluster import ClickHouseCluster
from helpers.test_tools import TSV, assert_eq_with_retry
from generate_dictionaries import generate_structure, generate_dictionaries, DictionaryTestTable
SCRIPT_DIR = os.path.dirname(os.path.realpath(__file__))
cluster = None
instance = None
test_table = None
def setup_module(module):
global cluster
global instance
global test_table
structure = generate_structure()
dictionary_files = generate_dictionaries(os.path.join(SCRIPT_DIR, 'configs/dictionaries'), structure)
cluster = ClickHouseCluster(__file__, base_configs_dir=os.path.join(SCRIPT_DIR, 'configs'))
instance = cluster.add_instance('instance', main_configs=dictionary_files)
test_table = DictionaryTestTable(os.path.join(SCRIPT_DIR, 'configs/dictionaries/source.tsv'))
@pytest.fixture(scope="module")
def started_cluster():
try:
cluster.start()
test_table.create_clickhouse_source(instance)
for line in TSV(instance.query('select name from system.dictionaries')).lines:
print line,
yield cluster
finally:
cluster.shutdown()
@pytest.fixture(params=[
# name, keys, use_parent
('clickhouse_hashed', ('id',), True),
('clickhouse_flat', ('id',), True),
('clickhouse_complex_integers_key_hashed', ('key0', 'key1'), False),
('clickhouse_complex_mixed_key_hashed', ('key0_str', 'key1'), False),
('clickhouse_range_hashed', ('id', 'StartDate', 'EndDate'), False),
],
ids=['clickhouse_hashed', 'clickhouse_flat',
'clickhouse_complex_integers_key_hashed',
'clickhouse_complex_mixed_key_hashed',
'clickhouse_range_hashed']
)
def dictionary_structure(started_cluster, request):
return request.param
def test_select_all(dictionary_structure):
name, keys, use_parent = dictionary_structure
query = instance.query
structure = test_table.get_structure_for_keys(keys, use_parent)
query('''
DROP TABLE IF EXISTS test.{0}
'''.format(name))
create_query = "CREATE TABLE test.{0} ({1}) engine = Dictionary({0})".format(name, structure)
TSV(query(create_query))
result = TSV(query('select * from test.{0}'.format(name)))
diff = test_table.compare_by_keys(keys, result.lines, use_parent, add_not_found_rows=True)
print test_table.process_diff(diff)
assert not diff
@pytest.fixture(params=[
# name, keys, use_parent
('clickhouse_cache', ('id',), True),
('clickhouse_complex_integers_key_cache', ('key0', 'key1'), False),
('clickhouse_complex_mixed_key_cache', ('key0_str', 'key1'), False)
],
ids=['clickhouse_cache', 'clickhouse_complex_integers_key_cache', 'clickhouse_complex_mixed_key_cache']
)
def cached_dictionary_structure(started_cluster, request):
return request.param
def test_select_all_from_cached(cached_dictionary_structure):
name, keys, use_parent = cached_dictionary_structure
query = instance.query
structure = test_table.get_structure_for_keys(keys, use_parent)
query('''
DROP TABLE IF EXISTS test.{0}
'''.format(name))
create_query = "CREATE TABLE test.{0} ({1}) engine = Dictionary({0})".format(name, structure)
TSV(query(create_query))
for i in range(4):
result = TSV(query('select * from test.{0}'.format(name)))
diff = test_table.compare_by_keys(keys, result.lines, use_parent, add_not_found_rows=False)
print test_table.process_diff(diff)
assert not diff
key = []
for key_name in keys:
if key_name.endswith('str'):
key.append("'" + str(i) + "'")
else:
key.append(str(i))
if len(key) == 1:
key = 'toUInt64(' + str(i) + ')'
else:
key = str('(' + ','.join(key) + ')')
query("select dictGetUInt8('{0}', 'UInt8_', {1})".format(name, key))
result = TSV(query('select * from test.{0}'.format(name)))
diff = test_table.compare_by_keys(keys, result.lines, use_parent, add_not_found_rows=True)
print test_table.process_diff(diff)
assert not diff
<?xml version="1.0"?>
<yandex>
<logger>
<level>trace</level>
<log>/var/log/clickhouse-server/clickhouse-server.log</log>
<errorlog>/var/log/clickhouse-server/clickhouse-server.err.log</errorlog>
<size>1000M</size>
<count>10</count>
</logger>
<tcp_port>9000</tcp_port>
<listen_host>127.0.0.1</listen_host>
<openSSL>
<client>
<cacheSessions>true</cacheSessions>
<verificationMode>none</verificationMode>
<invalidCertificateHandler>
<name>AcceptCertificateHandler</name>
</invalidCertificateHandler>
</client>
</openSSL>
<max_concurrent_queries>500</max_concurrent_queries>
<mark_cache_size>5368709120</mark_cache_size>
<path>./clickhouse/</path>
<users_config>users.xml</users_config>
<dictionaries_config>/etc/clickhouse-server/config.d/*.xml</dictionaries_config>
</yandex>
<?xml version="1.0"?>
<yandex>
<dictionary>
<name>cmd</name>
<name>executable</name>
<source>
<executable>
<command>echo '7\t8';</command>
......
......@@ -4,7 +4,7 @@
<name>file</name>
<source>
<file>
<path>/etc/clickhouse-server/config.d/dictionary_preset_file.txt</path>
<path>/etc/clickhouse-server/config.d/file.txt</path>
<format>TabSeparated</format>
</file>
</source>
......@@ -21,7 +21,7 @@
<name>no_file</name>
<source>
<file>
<path>/etc/clickhouse-server/config.d/dictionary_preset_no_file.txt</path>
<path>/etc/clickhouse-server/config.d/no_file.txt</path>
<format>TabSeparated</format>
</file>
</source>
......@@ -38,7 +38,7 @@
<name>no_file_2</name>
<source>
<file>
<path>/etc/clickhouse-server/config.d/dictionary_preset_no_file_2.txt</path>
<path>/etc/clickhouse-server/config.d/no_file_2.txt</path>
<format>TabSeparated</format>
</file>
</source>
......
<?xml version="1.0"?>
<yandex>
<dictionary>
<name>longload</name>
<name>slow</name>
<source>
<executable>
<command>sleep 100 &amp;&amp; echo '5\t6';</command>
......
<?xml version="1.0"?>
<yandex>
<profiles>
<default>
</default>
</profiles>
<users>
<default>
<password></password>
<networks incl="networks" replace="replace">
<ip>::/0</ip>
</networks>
<profile>default</profile>
<quota>default</quota>
</default>
</users>
<quotas>
<default>
</default>
</quotas>
</yandex>
import pytest
import os
import time
from helpers.cluster import ClickHouseCluster
from helpers.test_tools import TSV, assert_eq_with_retry
from generate_dictionaries import generate_structure, generate_dictionaries, DictionaryTestTable
from helpers.test_tools import assert_eq_with_retry
SCRIPT_DIR = os.path.dirname(os.path.realpath(__file__))
DICTIONARY_FILES = ['configs/dictionaries/cache_xypairs.xml', 'configs/dictionaries/executable.xml', 'configs/dictionaries/file.xml', 'configs/dictionaries/file.txt', 'configs/dictionaries/slow.xml']
cluster = None
instance = None
test_table = None
cluster = ClickHouseCluster(__file__, base_configs_dir=os.path.join(SCRIPT_DIR, 'configs'))
instance = cluster.add_instance('instance', main_configs=DICTIONARY_FILES)
@pytest.fixture(scope="module")
def started_cluster():
try:
cluster.start()
instance.query("CREATE DATABASE IF NOT EXISTS test")
yield cluster
finally:
cluster.shutdown()
def get_status(dictionary_name):
......@@ -36,266 +46,92 @@ def replace_in_file_in_container(file_name, what, replace_with):
instance.exec_in_container('sed -i "s/' + what + '/' + replace_with + '/g" ' + file_name)
def setup_module(module):
global cluster
global instance
global test_table
structure = generate_structure()
dictionary_files = generate_dictionaries(os.path.join(SCRIPT_DIR, 'configs/dictionaries'), structure)
cluster = ClickHouseCluster(__file__, base_configs_dir=os.path.join(SCRIPT_DIR, 'configs'))
instance = cluster.add_instance('instance', main_configs=dictionary_files)
test_table = DictionaryTestTable(os.path.join(SCRIPT_DIR, 'configs/dictionaries/source.tsv'))
@pytest.fixture(scope="module")
def started_cluster():
try:
cluster.start()
instance.query("CREATE DATABASE IF NOT EXISTS dict ENGINE=Dictionary")
test_table.create_clickhouse_source(instance)
for line in TSV(instance.query('select name from system.dictionaries')).lines:
print line,
# Create table `test.small_dict_source`
instance.query('''
drop table if exists test.small_dict_source;
create table test.small_dict_source (id UInt64, a String, b Int32, c Float64) engine=Log;
insert into test.small_dict_source values (0, 'water', 10, 1), (1, 'air', 40, 0.01), (2, 'earth', 100, 1.7);
''')
yield cluster
finally:
cluster.shutdown()
@pytest.fixture(params=[
# name, keys, use_parent
('clickhouse_hashed', ('id',), True),
('clickhouse_flat', ('id',), True),
('clickhouse_complex_integers_key_hashed', ('key0', 'key1'), False),
('clickhouse_complex_mixed_key_hashed', ('key0_str', 'key1'), False),
('clickhouse_range_hashed', ('id', 'StartDate', 'EndDate'), False),
],
ids=['clickhouse_hashed', 'clickhouse_flat',
'clickhouse_complex_integers_key_hashed',
'clickhouse_complex_mixed_key_hashed',
'clickhouse_range_hashed']
)
def dictionary_structure(started_cluster, request):
return request.param
def test_select_all(dictionary_structure):
name, keys, use_parent = dictionary_structure
query = instance.query
structure = test_table.get_structure_for_keys(keys, use_parent)
query('''
DROP TABLE IF EXISTS test.{0}
'''.format(name))
create_query = "CREATE TABLE test.{0} ({1}) engine = Dictionary({0})".format(name, structure)
TSV(query(create_query))
result = TSV(query('select * from test.{0}'.format(name)))
diff = test_table.compare_by_keys(keys, result.lines, use_parent, add_not_found_rows=True)
print test_table.process_diff(diff)
assert not diff
@pytest.fixture(params=[
# name, keys, use_parent
('clickhouse_cache', ('id',), True),
('clickhouse_complex_integers_key_cache', ('key0', 'key1'), False),
('clickhouse_complex_mixed_key_cache', ('key0_str', 'key1'), False)
],
ids=['clickhouse_cache', 'clickhouse_complex_integers_key_cache', 'clickhouse_complex_mixed_key_cache']
)
def cached_dictionary_structure(started_cluster, request):
return request.param
def test_select_all_from_cached(cached_dictionary_structure):
name, keys, use_parent = cached_dictionary_structure
query = instance.query
structure = test_table.get_structure_for_keys(keys, use_parent)
query('''
DROP TABLE IF EXISTS test.{0}
'''.format(name))
create_query = "CREATE TABLE test.{0} ({1}) engine = Dictionary({0})".format(name, structure)
TSV(query(create_query))
for i in range(4):
result = TSV(query('select * from test.{0}'.format(name)))
diff = test_table.compare_by_keys(keys, result.lines, use_parent, add_not_found_rows=False)
print test_table.process_diff(diff)
assert not diff
key = []
for key_name in keys:
if key_name.endswith('str'):
key.append("'" + str(i) + "'")
else:
key.append(str(i))
if len(key) == 1:
key = 'toUInt64(' + str(i) + ')'
else:
key = str('(' + ','.join(key) + ')')
query("select dictGetUInt8('{0}', 'UInt8_', {1})".format(name, key))
result = TSV(query('select * from test.{0}'.format(name)))
diff = test_table.compare_by_keys(keys, result.lines, use_parent, add_not_found_rows=True)
print test_table.process_diff(diff)
assert not diff
def test_null_value(started_cluster):
query = instance.query
assert TSV(query("select dictGetUInt8('clickhouse_cache', 'UInt8_', toUInt64(12121212))")) == TSV("1")
assert TSV(query("select dictGetString('clickhouse_cache', 'String_', toUInt64(12121212))")) == TSV("implicit-default")
assert TSV(query("select dictGetDate('clickhouse_cache', 'Date_', toUInt64(12121212))")) == TSV("2015-11-25")
# Check, that empty null_value interprets as default value
assert TSV(query("select dictGetUInt64('clickhouse_cache', 'UInt64_', toUInt64(12121212))")) == TSV("0")
assert TSV(query("select dictGetDateTime('clickhouse_cache', 'DateTime_', toUInt64(12121212))")) == TSV("0000-00-00 00:00:00")
def test_dictionary_dependency(started_cluster):
query = instance.query
# dictionaries_lazy_load == false, so these dictionary are not loaded.
assert get_status('dep_x') == 'NOT_LOADED'
assert get_status('dep_y') == 'NOT_LOADED'
assert get_status('dep_z') == 'NOT_LOADED'
# Dictionary 'dep_x' depends on 'dep_z', which depends on 'dep_y'.
# So they all should be loaded at once.
assert query("SELECT dictGetString('dep_x', 'a', toUInt64(1))") == "air\n"
assert get_status('dep_x') == 'LOADED'
assert get_status('dep_y') == 'LOADED'
assert get_status('dep_z') == 'LOADED'
# Other dictionaries should work too.
assert query("SELECT dictGetString('dep_y', 'a', toUInt64(1))") == "air\n"
assert query("SELECT dictGetString('dep_z', 'a', toUInt64(1))") == "air\n"
assert query("SELECT dictGetString('dep_x', 'a', toUInt64(3))") == "XX\n"
assert query("SELECT dictGetString('dep_y', 'a', toUInt64(3))") == "YY\n"
assert query("SELECT dictGetString('dep_z', 'a', toUInt64(3))") == "ZZ\n"
# Update the source table.
query("insert into test.small_dict_source values (3, 'fire', 30, 8)")
# Wait for dictionaries to be reloaded.
assert_eq_with_retry(instance, "SELECT dictHas('dep_y', toUInt64(3))", "1", sleep_time = 2, retry_count = 10)
assert query("SELECT dictGetString('dep_x', 'a', toUInt64(3))") == "XX\n"
assert query("SELECT dictGetString('dep_y', 'a', toUInt64(3))") == "fire\n"
assert query("SELECT dictGetString('dep_z', 'a', toUInt64(3))") == "ZZ\n"
# dep_x and dep_z are updated only when there `intDiv(count(), 4)` is changed.
query("insert into test.small_dict_source values (4, 'ether', 404, 0.001)")
assert_eq_with_retry(instance, "SELECT dictHas('dep_x', toUInt64(4))", "1", sleep_time = 2, retry_count = 10)
assert query("SELECT dictGetString('dep_x', 'a', toUInt64(3))") == "fire\n"
assert query("SELECT dictGetString('dep_y', 'a', toUInt64(3))") == "fire\n"
assert query("SELECT dictGetString('dep_z', 'a', toUInt64(3))") == "fire\n"
assert query("SELECT dictGetString('dep_x', 'a', toUInt64(4))") == "ether\n"
assert query("SELECT dictGetString('dep_y', 'a', toUInt64(4))") == "ether\n"
assert query("SELECT dictGetString('dep_z', 'a', toUInt64(4))") == "ether\n"
def test_reload_while_loading(started_cluster):
query = instance.query
# dictionaries_lazy_load == false, so this dictionary is not loaded.
assert get_status('longload') == "NOT_LOADED"
assert get_loading_duration('longload') == 0
assert get_status('slow') == "NOT_LOADED"
assert get_loading_duration('slow') == 0
# It's not possible to get a value from the dictionary within 1.0 second, so the following query fails by timeout.
assert query("SELECT dictGetInt32('longload', 'a', toUInt64(5))", timeout = 1, ignore_error = True) == ""
assert query("SELECT dictGetInt32('slow', 'a', toUInt64(5))", timeout = 1, ignore_error = True) == ""
# The dictionary is now loading.
assert get_status('longload') == "LOADING"
start_time, duration = get_loading_start_time('longload'), get_loading_duration('longload')
assert get_status('slow') == "LOADING"
start_time, duration = get_loading_start_time('slow'), get_loading_duration('slow')
assert duration > 0
time.sleep(0.5) # Still loading.
assert get_status('longload') == "LOADING"
assert get_status('slow') == "LOADING"
prev_start_time, prev_duration = start_time, duration
start_time, duration = get_loading_start_time('longload'), get_loading_duration('longload')
start_time, duration = get_loading_start_time('slow'), get_loading_duration('slow')
assert start_time == prev_start_time
assert duration >= prev_duration
# SYSTEM RELOAD DICTIONARY should restart loading.
query("SYSTEM RELOAD DICTIONARY 'longload'")
assert get_status('longload') == "LOADING"
query("SYSTEM RELOAD DICTIONARY 'slow'")
assert get_status('slow') == "LOADING"
prev_start_time, prev_duration = start_time, duration
start_time, duration = get_loading_start_time('longload'), get_loading_duration('longload')
start_time, duration = get_loading_start_time('slow'), get_loading_duration('slow')
assert start_time > prev_start_time
assert duration < prev_duration
time.sleep(0.5) # Still loading.
assert get_status('longload') == "LOADING"
assert get_status('slow') == "LOADING"
prev_start_time, prev_duration = start_time, duration
start_time, duration = get_loading_start_time('longload'), get_loading_duration('longload')
start_time, duration = get_loading_start_time('slow'), get_loading_duration('slow')
assert start_time == prev_start_time
assert duration >= prev_duration
# SYSTEM RELOAD DICTIONARIES should restart loading again.
query("SYSTEM RELOAD DICTIONARIES")
assert get_status('longload') == "LOADING"
assert get_status('slow') == "LOADING"
prev_start_time, prev_duration = start_time, duration
start_time, duration = get_loading_start_time('longload'), get_loading_duration('longload')
start_time, duration = get_loading_start_time('slow'), get_loading_duration('slow')
assert start_time > prev_start_time
assert duration < prev_duration
# Changing the configuration file should restart loading one more time.
replace_in_file_in_container('/etc/clickhouse-server/config.d/dictionary_preset_longload.xml', 'sleep 100', 'sleep 0')
replace_in_file_in_container('/etc/clickhouse-server/config.d/slow.xml', 'sleep 100', 'sleep 0')
time.sleep(5) # Configuration files are reloaded once in 5 seconds.
# This time loading should finish quickly.
assert get_status('longload') == "LOADED"
assert query("SELECT dictGetInt32('longload', 'a', toUInt64(5))") == "6\n"
assert get_status('slow') == "LOADED"
assert query("SELECT dictGetInt32('slow', 'a', toUInt64(5))") == "6\n"
def test_reload_after_loading(started_cluster):
query = instance.query
assert query("SELECT dictGetInt32('cmd', 'a', toUInt64(7))") == "8\n"
assert query("SELECT dictGetInt32('executable', 'a', toUInt64(7))") == "8\n"
assert query("SELECT dictGetInt32('file', 'a', toUInt64(9))") == "10\n"
# Change the dictionaries' data.
replace_in_file_in_container('/etc/clickhouse-server/config.d/dictionary_preset_cmd.xml', '8', '81')
replace_in_file_in_container('/etc/clickhouse-server/config.d/dictionary_preset_file.txt', '10', '101')
replace_in_file_in_container('/etc/clickhouse-server/config.d/executable.xml', '8', '81')
replace_in_file_in_container('/etc/clickhouse-server/config.d/file.txt', '10', '101')
# SYSTEM RELOAD 'name' reloads only the specified dictionary.
query("SYSTEM RELOAD DICTIONARY 'cmd'")
assert query("SELECT dictGetInt32('cmd', 'a', toUInt64(7))") == "81\n"
query("SYSTEM RELOAD DICTIONARY 'executable'")
assert query("SELECT dictGetInt32('executable', 'a', toUInt64(7))") == "81\n"
assert query("SELECT dictGetInt32('file', 'a', toUInt64(9))") == "10\n"
query("SYSTEM RELOAD DICTIONARY 'file'")
assert query("SELECT dictGetInt32('cmd', 'a', toUInt64(7))") == "81\n"
assert query("SELECT dictGetInt32('executable', 'a', toUInt64(7))") == "81\n"
assert query("SELECT dictGetInt32('file', 'a', toUInt64(9))") == "101\n"
# SYSTEM RELOAD DICTIONARIES reloads all loaded dictionaries.
replace_in_file_in_container('/etc/clickhouse-server/config.d/dictionary_preset_cmd.xml', '81', '82')
replace_in_file_in_container('/etc/clickhouse-server/config.d/dictionary_preset_file.txt', '101', '102')
replace_in_file_in_container('/etc/clickhouse-server/config.d/executable.xml', '81', '82')
replace_in_file_in_container('/etc/clickhouse-server/config.d/file.txt', '101', '102')
query("SYSTEM RELOAD DICTIONARIES")
assert query("SELECT dictGetInt32('cmd', 'a', toUInt64(7))") == "82\n"
assert query("SELECT dictGetInt32('executable', 'a', toUInt64(7))") == "82\n"
assert query("SELECT dictGetInt32('file', 'a', toUInt64(9))") == "102\n"
# Configuration files are reloaded and lifetimes are checked automatically once in 5 seconds.
replace_in_file_in_container('/etc/clickhouse-server/config.d/dictionary_preset_cmd.xml', '82', '83')
replace_in_file_in_container('/etc/clickhouse-server/config.d/dictionary_preset_file.txt', '102', '103')
replace_in_file_in_container('/etc/clickhouse-server/config.d/executable.xml', '82', '83')
replace_in_file_in_container('/etc/clickhouse-server/config.d/file.txt', '102', '103')
time.sleep(5)
assert query("SELECT dictGetInt32('file', 'a', toUInt64(9))") == "103\n"
assert query("SELECT dictGetInt32('cmd', 'a', toUInt64(7))") == "83\n"
assert query("SELECT dictGetInt32('executable', 'a', toUInt64(7))") == "83\n"
def test_reload_after_fail_by_system_reload(started_cluster):
......@@ -315,13 +151,13 @@ def test_reload_after_fail_by_system_reload(started_cluster):
assert get_status("no_file") == "FAILED"
# Creating the file source makes the dictionary able to load.
instance.copy_file_to_container(os.path.join(SCRIPT_DIR, "configs/dictionaries/dictionary_preset_file.txt"), "/etc/clickhouse-server/config.d/dictionary_preset_no_file.txt")
instance.copy_file_to_container(os.path.join(SCRIPT_DIR, "configs/dictionaries/file.txt"), "/etc/clickhouse-server/config.d/no_file.txt")
query("SYSTEM RELOAD DICTIONARY 'no_file'")
query("SELECT dictGetInt32('no_file', 'a', toUInt64(9))") == "10\n"
assert get_status("no_file") == "LOADED"
# Removing the file source should not spoil the loaded dictionary.
instance.exec_in_container("rm /etc/clickhouse-server/config.d/dictionary_preset_no_file.txt")
instance.exec_in_container("rm /etc/clickhouse-server/config.d/no_file.txt")
query("SYSTEM RELOAD DICTIONARY 'no_file'")
query("SELECT dictGetInt32('no_file', 'a', toUInt64(9))") == "10\n"
assert get_status("no_file") == "LOADED"
......@@ -344,13 +180,13 @@ def test_reload_after_fail_by_timer(started_cluster):
assert get_status("no_file_2") == "FAILED"
# Creating the file source makes the dictionary able to load.
instance.copy_file_to_container(os.path.join(SCRIPT_DIR, "configs/dictionaries/dictionary_preset_file.txt"), "/etc/clickhouse-server/config.d/dictionary_preset_no_file_2.txt")
instance.copy_file_to_container(os.path.join(SCRIPT_DIR, "configs/dictionaries/file.txt"), "/etc/clickhouse-server/config.d/no_file_2.txt")
time.sleep(6);
query("SELECT dictGetInt32('no_file_2', 'a', toUInt64(9))") == "10\n"
assert get_status("no_file_2") == "LOADED"
# Removing the file source should not spoil the loaded dictionary.
instance.exec_in_container("rm /etc/clickhouse-server/config.d/dictionary_preset_no_file_2.txt")
instance.exec_in_container("rm /etc/clickhouse-server/config.d/no_file_2.txt")
time.sleep(6);
query("SELECT dictGetInt32('no_file_2', 'a', toUInt64(9))") == "10\n"
assert get_status("no_file_2") == "LOADED"
......@@ -368,9 +204,9 @@ def test_reload_after_fail_in_cache_dictionary(started_cluster):
# Create table `test.xypairs`.
query('''
drop table if exists test.xypairs;
create table test.xypairs (x UInt64, y UInt64) engine=Log;
insert into test.xypairs values (1, 56), (3, 78);
DROP TABLE IF EXISTS test.xypairs;
CREATE TABLE test.xypairs (x UInt64, y UInt64) ENGINE=Log;
INSERT INTO test.xypairs VALUES (1, 56), (3, 78);
''')
# Cache dictionary now works.
......@@ -379,7 +215,7 @@ def test_reload_after_fail_in_cache_dictionary(started_cluster):
assert get_last_exception("cache_xypairs") == ""
# Drop table `test.xypairs`.
query('drop table if exists test.xypairs')
query('DROP TABLE test.xypairs')
# Values are cached so we can get them.
query("SELECT dictGet('cache_xypairs', 'y', toUInt64(1))") == "56"
......@@ -399,9 +235,8 @@ def test_reload_after_fail_in_cache_dictionary(started_cluster):
# Create table `test.xypairs` again with changed values.
query('''
drop table if exists test.xypairs;
create table test.xypairs (x UInt64, y UInt64) engine=Log;
insert into test.xypairs values (1, 57), (3, 79);
CREATE TABLE test.xypairs (x UInt64, y UInt64) ENGINE=Log;
INSERT INTO test.xypairs VALUES (1, 57), (3, 79);
''')
# The cache dictionary returns new values now.
......
<?xml version="1.0"?>
<yandex>
<timezone>America/Los_Angeles</timezone>
</yandex>
import pytest
from helpers.cluster import ClickHouseCluster
cluster = ClickHouseCluster(__file__)
node = cluster.add_instance('node', main_configs=['configs/config.xml'])
@pytest.fixture(scope="module")
def start_cluster():
try:
cluster.start()
yield cluster
finally:
cluster.shutdown()
def test_check_timezone_config(start_cluster):
assert node.query("SELECT toDateTime(1111111111)") == "2005-03-17 17:58:31\n"
......@@ -17,7 +17,7 @@ function thread1()
function thread2()
{
seq 1 1000 | sed -r -e 's/.+/SELECT count() FROM test.buffer_00763_2;/' | ${CLICKHOUSE_CLIENT} --multiquery --server_logs_file='/dev/null' --ignore-error 2>&1 | grep -vP '^0$|^10$|^Received exception|^Code: 60|^Code: 218'
seq 1 1000 | sed -r -e 's/.+/SELECT count() FROM test.buffer_00763_2;/' | ${CLICKHOUSE_CLIENT} --multiquery --server_logs_file='/dev/null' --ignore-error 2>&1 | grep -vP '^0$|^10$|^Received exception|^Code: 60|^Code: 218|^Code: 473'
}
thread1 &
......
#!/usr/bin/env bash
set -e
CLICKHOUSE_CLIENT_SERVER_LOGS_LEVEL=none
CURDIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)
. $CURDIR/../shell_config.sh
......@@ -18,7 +20,7 @@ function thread1()
function thread2()
{
seq 1 2000 | sed -r -e 's/.+/SELECT sum(length(s)) FROM test.buffer_00763_1;/' | ${CLICKHOUSE_CLIENT} --multiquery --server_logs_file='/dev/null' --ignore-error 2>&1 | grep -vP '^3$'
seq 1 2000 | sed -r -e 's/.+/SELECT sum(length(s)) FROM test.buffer_00763_1;/' | ${CLICKHOUSE_CLIENT} --multiquery --ignore-error 2>&1 | grep -vP '(^3$|^Received exception from server|^Code: 473)'
}
thread1 &
......
......@@ -8,6 +8,6 @@ CURDIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)
$CLICKHOUSE_CLIENT -q "DROP TABLE IF EXISTS table"
seq 1 100 | sed -r -e "s/.+/CREATE TABLE table (x UInt8) ENGINE = MergeTree ORDER BY x; DROP TABLE table;/" | $CLICKHOUSE_CLIENT -n &
seq 1 100 | sed -r -e "s/.+/SELECT * FROM system.tables WHERE database = '${CLICKHOUSE_DATABASE}' LIMIT 1000000, 1;/" | $CLICKHOUSE_CLIENT -n &
seq 1 100 | sed -r -e "s/.+/SELECT * FROM system.tables WHERE database = '${CLICKHOUSE_DATABASE}' LIMIT 1000000, 1;/" | $CLICKHOUSE_CLIENT -n 2>/dev/null &
wait
......@@ -278,3 +278,9 @@ a1,a2 12 [1,2]
1 2019-06-06 1 4 2 1 5 1 [1,2] [1001,1002] [1,1]
1 2019-06-06 1 4 2 1 5 0 [1,2] [1002,1003] [1,1]
1 2019-06-06 1 4 2 1 6 0 [3] [2001] [1]
-- empty
[[1],[],[2]]
[[1],[],[2]]
[[1],[],[2],[],[3],[],[4],[],[5],[],[6],[],[7],[],[8],[],[9]]
[[],[1],[],[2],[],[3],[],[4],[],[5],[],[6],[],[7],[],[8]]
[[1],[2],[],[3]]
......@@ -305,3 +305,11 @@ ARRAY JOIN
Test.PuidVal AS PuidValArr;
DROP TABLE arr_tests_visits;
select '-- empty';
SELECT arrayEnumerateUniqRanked([['a'], [], ['a']]);
SELECT arrayEnumerateUniqRanked([[1], [], [1]]);
SELECT arrayEnumerateUniqRanked([[1], [], [1], [], [1], [], [1], [], [1], [], [1], [], [1], [], [1], [], [1]]);
SELECT arrayEnumerateUniqRanked([[], [1], [], [1], [], [1], [], [1], [], [1], [], [1], [], [1], [], [1]]);
SELECT arrayEnumerateUniqRanked([[1], [1], [], [1]]);
......@@ -5,6 +5,7 @@ insert into ttl_00933_2 values (toDateTime('2000-10-10 00:00:00'), 1);
insert into ttl_00933_2 values (toDateTime('2000-10-10 00:00:00'), 2);
insert into ttl_00933_2 values (toDateTime('2100-10-10 00:00:00'), 3);
insert into ttl_00933_2 values (toDateTime('2100-10-10 00:00:00'), 4);
select sleep(0.7) format Null; -- wait if very fast merge happen
optimize table ttl_00933_2 final;
select a from ttl_00933_2 order by a;
......@@ -15,6 +16,7 @@ insert into ttl_00933_2 values (toDateTime('2000-10-10 00:00:00'), 1, 100);
insert into ttl_00933_2 values (toDateTime('2000-10-10 00:00:00'), 2, 200);
insert into ttl_00933_2 values (toDateTime('2100-10-10 00:00:00'), 3, 300);
insert into ttl_00933_2 values (toDateTime('2100-10-10 00:00:00'), 4, 400);
select sleep(0.7) format Null; -- wait if very fast merge happen
optimize table ttl_00933_2 final;
select a, b from ttl_00933_2 order by a;
......@@ -25,6 +27,7 @@ insert into ttl_00933_2 values (toDateTime('2000-10-10 00:00:00'), 1, 5);
insert into ttl_00933_2 values (toDateTime('2000-10-10 00:00:00'), 2, 10);
insert into ttl_00933_2 values (toDateTime('2100-10-10 00:00:00'), 3, 15);
insert into ttl_00933_2 values (toDateTime('2100-10-10 00:00:00'), 4, 20);
select sleep(0.7) format Null; -- wait if very fast merge happen
optimize table ttl_00933_2 final;
select a, b from ttl_00933_2 order by a;
......
......@@ -37,9 +37,9 @@ SELECT * FROM fill ORDER BY a WITH FILL, b WITH fill;
SELECT '*** a WITH FILL, b WITH fill TO 6 STEP 2 ***';
SELECT * FROM fill ORDER BY a WITH FILL, b WITH fill TO 6 STEP 2;
SELECT * FROM fill ORDER BY a WITH FILL STEP -1; -- { serverError 474 }
SELECT * FROM fill ORDER BY a WITH FILL FROM 10 TO 1; -- { serverError 474 }
SELECT * FROM fill ORDER BY a DESC WITH FILL FROM 1 TO 10; -- { serverError 474 }
SELECT * FROM fill ORDER BY a WITH FILL FROM -10 to 10; -- { serverError 474 }
SELECT * FROM fill ORDER BY a WITH FILL STEP -1; -- { serverError 475 }
SELECT * FROM fill ORDER BY a WITH FILL FROM 10 TO 1; -- { serverError 475 }
SELECT * FROM fill ORDER BY a DESC WITH FILL FROM 1 TO 10; -- { serverError 475 }
SELECT * FROM fill ORDER BY a WITH FILL FROM -10 to 10; -- { serverError 475 }
DROP TABLE fill;
#!/usr/bin/env bash
CURDIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)
. $CURDIR/../shell_config.sh
. $CURDIR/mergetree_mutations.lib
${CLICKHOUSE_CLIENT} --query="DROP TABLE IF EXISTS table_with_empty_part"
${CLICKHOUSE_CLIENT} --query="CREATE TABLE table_with_empty_part
(
id UInt64,
value UInt64
)
ENGINE = MergeTree()
ORDER BY id
PARTITION BY id
SETTINGS vertical_merge_algorithm_min_rows_to_activate=0, vertical_merge_algorithm_min_columns_to_activate=0
"
${CLICKHOUSE_CLIENT} --query="INSERT INTO table_with_empty_part VALUES (1, 1)"
${CLICKHOUSE_CLIENT} --query="INSERT INTO table_with_empty_part VALUES (2, 2)"
${CLICKHOUSE_CLIENT} --query="ALTER TABLE table_with_empty_part DELETE WHERE id % 2 == 0"
sleep 0.5
mutation_id=`${CLICKHOUSE_CLIENT} --query="SELECT max(mutation_id) FROM system.mutations WHERE table='table_with_empty_part'"`
wait_for_mutation "table_with_empty_part" "$mutation_id"
${CLICKHOUSE_CLIENT} --query="SELECT COUNT(DISTINCT value) FROM table_with_empty_part"
${CLICKHOUSE_CLIENT} --query="ALTER TABLE table_with_empty_part MODIFY COLUMN value Nullable(UInt64)"
${CLICKHOUSE_CLIENT} --query="SELECT COUNT(distinct value) FROM table_with_empty_part"
${CLICKHOUSE_CLIENT} --query="OPTIMIZE TABLE table_with_empty_part FINAL"
${CLICKHOUSE_CLIENT} --query="DROP TABLE IF EXISTS table_with_empty_part"
#!/usr/bin/env bash
CURDIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)
. $CURDIR/../shell_config.sh
set -e
$CLICKHOUSE_CLIENT --query "DROP TABLE IF EXISTS a"
$CLICKHOUSE_CLIENT --query "DROP TABLE IF EXISTS b"
$CLICKHOUSE_CLIENT --query "CREATE TABLE a (x UInt8) ENGINE = MergeTree ORDER BY tuple()"
$CLICKHOUSE_CLIENT --query "CREATE TABLE b (x UInt8) ENGINE = MergeTree ORDER BY tuple()"
function thread1()
{
while true; do
seq 1 100 | awk '{ print "SELECT x FROM a WHERE x IN (SELECT toUInt8(count()) FROM system.tables);" }' | $CLICKHOUSE_CLIENT -n
done
}
function thread2()
{
while true; do
seq 1 100 | awk '{ print "SELECT x FROM b WHERE x IN (SELECT toUInt8(count()) FROM system.tables);" }' | $CLICKHOUSE_CLIENT -n
done
}
function thread3()
{
while true; do
$CLICKHOUSE_CLIENT --query "ALTER TABLE a MODIFY COLUMN x Nullable(UInt8)"
$CLICKHOUSE_CLIENT --query "ALTER TABLE a MODIFY COLUMN x UInt8"
done
}
function thread4()
{
while true; do
$CLICKHOUSE_CLIENT --query "ALTER TABLE b MODIFY COLUMN x Nullable(UInt8)"
$CLICKHOUSE_CLIENT --query "ALTER TABLE b MODIFY COLUMN x UInt8"
done
}
# https://stackoverflow.com/questions/9954794/execute-a-shell-function-with-timeout
export -f thread1;
export -f thread2;
export -f thread3;
export -f thread4;
TIMEOUT=10
timeout $TIMEOUT bash -c thread1 2> /dev/null &
timeout $TIMEOUT bash -c thread2 2> /dev/null &
timeout $TIMEOUT bash -c thread3 2> /dev/null &
timeout $TIMEOUT bash -c thread4 2> /dev/null &
wait
$CLICKHOUSE_CLIENT --query "DROP TABLE a"
$CLICKHOUSE_CLIENT --query "DROP TABLE b"
DROP TABLE IF EXISTS foo;
DROP TABLE IF EXISTS bar;
DROP TABLE IF EXISTS view_foo_bar;
create table foo (ddate Date, id Int64, n String) ENGINE = ReplacingMergeTree(ddate, (id), 8192);
create table bar (ddate Date, id Int64, n String, foo_id Int64) ENGINE = ReplacingMergeTree(ddate, (id), 8192);
insert into bar (id, n, foo_id) values (1, 'bar_n_1', 1);
create MATERIALIZED view view_foo_bar ENGINE = ReplacingMergeTree(ddate, (bar_id), 8192) as select ddate, bar_id, bar_n, foo_id, foo_n from (select ddate, id as bar_id, n as bar_n, foo_id from bar) any left join (select id as foo_id, n as foo_n from foo) using foo_id;
insert into bar (id, n, foo_id) values (1, 'bar_n_1', 1);
SELECT * FROM view_foo_bar;
DROP TABLE foo;
DROP TABLE bar;
DROP TABLE view_foo_bar;
......@@ -74,7 +74,7 @@ If `force_primary_key=1`, ClickHouse checks to see if the query has a primary ke
## format_schema
This parameter is useful when you are using formats that require a schema definition, such as [Cap'n Proto](https://capnproto.org/), [Protobuf](https://developers.google.com/protocol-buffers/) or [Template](../../interfaces/formats.md#template-template). The value depends on the format.
This parameter is useful when you are using formats that require a schema definition, such as [Cap'n Proto](https://capnproto.org/), [Protobuf](https://developers.google.com/protocol-buffers/) or [Template](https://clickhouse.yandex/docs/en/interfaces/formats/#template). The value depends on the format.
## fsync_metadata
......
......@@ -214,4 +214,66 @@ SELECT * FROM UAct FINAL
This way of selecting the data is very inefficient. Don't use it for big tables.
## Example of another approach
Example data:
```
┌──────────────UserID─┬─PageViews─┬─Duration─┬─Sign─┐
│ 4324182021466249494 │ 5 │ 146 │ 1 │
│ 4324182021466249494 │ -5 │ -146 │ -1 │
│ 4324182021466249494 │ 6 │ 185 │ 1 │
└─────────────────────┴───────────┴──────────┴──────┘
```
The idea is that merges take into account only key fields. And in the "Cancel" line we can specify negative values that equalize the previous version of the row when summing without using the Sign column. For this approach, it is necessary to change the data type `PageViews`,` Duration` to store negative values of UInt8 -> Int16.
```sql
CREATE TABLE UAct
(
UserID UInt64,
PageViews Int16,
Duration Int16,
Sign Int8
)
ENGINE = CollapsingMergeTree(Sign)
ORDER BY UserID
```
Let's test the approach:
```sql
insert into UAct values(4324182021466249494, 5, 146, 1);
insert into UAct values(4324182021466249494, -5, -146, -1);
insert into UAct values(4324182021466249494, 6, 185, 1);
select * from UAct final; // avoid using final in production (just for a test or small tables)
┌──────────────UserID─┬─PageViews─┬─Duration─┬─Sign─┐
4324182021466249494 6 185 1
└─────────────────────┴───────────┴──────────┴──────┘
SELECT
UserID,
sum(PageViews) AS PageViews,
sum(Duration) AS Duration
FROM UAct
GROUP BY UserID
┌──────────────UserID─┬─PageViews─┬─Duration─┐
4324182021466249494 6 185
└─────────────────────┴───────────┴──────────┘
select count() FROM UAct
┌─count()─┐
3
└─────────┘
optimize table UAct final;
select * FROM UAct
┌──────────────UserID─┬─PageViews─┬─Duration─┬─Sign─┐
4324182021466249494 6 185 1
└─────────────────────┴───────────┴──────────┴──────┘
```
[Original article](https://clickhouse.yandex/docs/en/operations/table_engines/collapsingmergetree/) <!--hide-->
......@@ -394,6 +394,33 @@ FROM
└─────────┴─────────────────────┴───────┘
```
Please note - block size affects the result. With each new block, the `runningDifference` state is reset.
``` sql
SELECT
number,
runningDifference(number + 1) AS diff
FROM numbers(100000)
WHERE diff != 1
┌─number─┬─diff─┐
0 0
└────────┴──────┘
┌─number─┬─diff─┐
65536 0
└────────┴──────┘
set max_block_size=100000 // default value is 65536!
SELECT
number,
runningDifference(number + 1) AS diff
FROM numbers(100000)
WHERE diff != 1
┌─number─┬─diff─┐
0 0
└────────┴──────┘
```
## runningDifferenceStartingWithFirstValue
Same as for [runningDifference](./other_functions.md#other_functions-runningdifference), the difference is the value of the first row, returned the value of the first row, and each subsequent row returns the difference from the previous row.
......
......@@ -72,7 +72,7 @@ ClickHouse применяет настройку в тех случаях, ко
## format_schema
Параметр применяется в том случае, когда используются форматы, требующие определения схемы, например [Cap'n Proto](https://capnproto.org/), [Protobuf](https://developers.google.com/protocol-buffers/) или [Template](../../interfaces/formats.md#template-template). Значение параметра зависит от формата.
Параметр применяется в том случае, когда используются форматы, требующие определения схемы, например [Cap'n Proto](https://capnproto.org/), [Protobuf](https://developers.google.com/protocol-buffers/) или [Template](../../interfaces/formats.md#template). Значение параметра зависит от формата.
## fsync_metadata
......
......@@ -220,5 +220,65 @@ SELECT * FROM UAct FINAL
Такой способ выбора данных очень неэффективен. Не используйте его для больших таблиц.
## Пример другого подхода
Исходные данные:
```
┌──────────────UserID─┬─PageViews─┬─Duration─┬─Sign─┐
│ 4324182021466249494 │ 5 │ 146 │ 1 │
│ 4324182021466249494 │ -5 │ -146 │ -1 │
│ 4324182021466249494 │ 6 │ 185 │ 1 │
└─────────────────────┴───────────┴──────────┴──────┘
```
Идея состоит в том, что слияния при сворачивании учитывают только ключевые поля, поэтому в отменяющей строке можно указать отрицательные значения, которые нивелируют предыдущую версию записи при суммировании без учета поля Sign.
Для этого подхода необходимо изменить тип данных `PageViews`, `Duration` для хранения отрицательных значений UInt8 -> Int16.
```sql
CREATE TABLE UAct
(
UserID UInt64,
PageViews Int16,
Duration Int16,
Sign Int8
)
ENGINE = CollapsingMergeTree(Sign)
ORDER BY UserID
```
Тестируем подход:
```sql
insert into UAct values(4324182021466249494, 5, 146, 1);
insert into UAct values(4324182021466249494, -5, -146, -1);
insert into UAct values(4324182021466249494, 6, 185, 1);
select * from UAct final; // старайтесь не использовать final (он подходит только для тестов и маленьких таблиц)
┌──────────────UserID─┬─PageViews─┬─Duration─┬─Sign─┐
4324182021466249494 6 185 1
└─────────────────────┴───────────┴──────────┴──────┘
SELECT
UserID,
sum(PageViews) AS PageViews,
sum(Duration) AS Duration
FROM UAct
GROUP BY UserID
┌──────────────UserID─┬─PageViews─┬─Duration─┐
4324182021466249494 6 185
└─────────────────────┴───────────┴──────────┘
select count() FROM UAct
┌─count()─┐
3
└─────────┘
optimize table UAct final;
select * FROM UAct
┌──────────────UserID─┬─PageViews─┬─Duration─┬─Sign─┐
4324182021466249494 6 185 1
└─────────────────────┴───────────┴──────────┴──────┘
```
[Оригинальная статья](https://clickhouse.yandex/docs/ru/operations/table_engines/collapsingmergetree/) <!--hide-->
......@@ -369,6 +369,39 @@ FROM
└─────────┴─────────────────────┴───────┘
```
Обратите внимание — размер блока влияет на результат. С каждым новым блоком состояние `runningDifference` сбрасывается.
``` sql
SELECT
number,
runningDifference(number + 1) AS diff
FROM numbers(100000)
WHERE diff != 1
┌─number─┬─diff─┐
0 0
└────────┴──────┘
┌─number─┬─diff─┐
65536 0
└────────┴──────┘
set max_block_size=100000 // по умолчанию 65536!
SELECT
number,
runningDifference(number + 1) AS diff
FROM numbers(100000)
WHERE diff != 1
┌─number─┬─diff─┐
0 0
└────────┴──────┘
```
## runningDifferenceStartingWithFirstValue
То же, что и [runningDifference] (./other_functions.md # other_functions-runningdifference), но в первой строке возвращается значение первой строки, а не ноль.
## MACNumToString(num)
Принимает число типа UInt64. Интерпретирует его, как MAC-адрес в big endian. Возвращает строку, содержащую соответствующий MAC-адрес в формате AA:BB:CC:DD:EE:FF (числа в шестнадцатеричной форме через двоеточие).
......
......@@ -16,6 +16,7 @@
#include <memory>
#include <chrono>
#include <cstring>
#include <cassert>
#include <iostream>
#define DATE_LUT_MIN 0
......@@ -44,9 +45,16 @@ UInt8 getDayOfWeek(const cctz::civil_day & date)
}
__attribute__((__weak__)) extern bool inside_main;
DateLUTImpl::DateLUTImpl(const std::string & time_zone_)
: time_zone(time_zone_)
{
/// DateLUT should not be initialized in global constructors for the following reasons:
/// 1. It is too heavy.
if (&inside_main)
assert(inside_main);
size_t i = 0;
time_t start_of_day = DATE_LUT_MIN;
......
......@@ -597,10 +597,12 @@ void BaseDaemon::initialize(Application & self)
/// This must be done before any usage of DateLUT. In particular, before any logging.
if (config().has("timezone"))
{
if (0 != setenv("TZ", config().getString("timezone").data(), 1))
const std::string timezone = config().getString("timezone");
if (0 != setenv("TZ", timezone.data(), 1))
throw Poco::Exception("Cannot setenv TZ variable");
tzset();
DateLUT::setDefaultTimezone(timezone);
}
std::string log_path = config().getString("logger.log", "");
......
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册