提交 77c884b7 编写于 作者: A alesapin

Merge branch 'master' of github.com:yandex/ClickHouse

......@@ -50,7 +50,8 @@ IncludeCategories:
- Regex: '.*'
Priority: 40
ReflowComments: false
AlignEscapedNewlinesLeft: true
AlignEscapedNewlinesLeft: false
AlignEscapedNewlines: DontAlign
# Not changed:
AccessModifierOffset: -4
......
# This strings autochanged from release_lib.sh:
set(VERSION_REVISION 54409 CACHE STRING "")
set(VERSION_REVISION 54410 CACHE STRING "") # changed manually for tests
set(VERSION_MAJOR 18 CACHE STRING "")
set(VERSION_MINOR 14 CACHE STRING "")
set(VERSION_PATCH 17 CACHE STRING "")
......
......@@ -43,6 +43,7 @@
#include <IO/WriteHelpers.h>
#include <IO/UseSSL.h>
#include <DataStreams/AsynchronousBlockInputStream.h>
#include <DataStreams/AddingDefaultsBlockInputStream.h>
#include <DataStreams/InternalTextLogsRowOutputStream.h>
#include <Parsers/ParserQuery.h>
#include <Parsers/ASTSetQuery.h>
......@@ -60,6 +61,7 @@
#include <Functions/registerFunctions.h>
#include <AggregateFunctions/registerAggregateFunctions.h>
#include <Common/Config/configReadClient.h>
#include <Storages/ColumnsDescription.h>
#if USE_READLINE
#include "Suggest.h" // Y_IGNORE
......@@ -69,7 +71,6 @@
#pragma GCC optimize("-fno-var-tracking-assignments")
#endif
/// http://en.wikipedia.org/wiki/ANSI_escape_code
/// Similar codes \e[s, \e[u don't work in VT100 and Mosh.
......@@ -875,11 +876,12 @@ private:
/// Receive description of table structure.
Block sample;
if (receiveSampleBlock(sample))
ColumnsDescription columns_description;
if (receiveSampleBlock(sample, columns_description))
{
/// If structure was received (thus, server has not thrown an exception),
/// send our data with that structure.
sendData(sample);
sendData(sample, columns_description);
receiveEndOfQuery();
}
}
......@@ -917,7 +919,7 @@ private:
}
void sendData(Block & sample)
void sendData(Block & sample, const ColumnsDescription & columns_description)
{
/// If INSERT data must be sent.
const ASTInsertQuery * parsed_insert_query = typeid_cast<const ASTInsertQuery *>(&*parsed_query);
......@@ -928,19 +930,19 @@ private:
{
/// Send data contained in the query.
ReadBufferFromMemory data_in(parsed_insert_query->data, parsed_insert_query->end - parsed_insert_query->data);
sendDataFrom(data_in, sample);
sendDataFrom(data_in, sample, columns_description);
}
else if (!is_interactive)
{
/// Send data read from stdin.
sendDataFrom(std_in, sample);
sendDataFrom(std_in, sample, columns_description);
}
else
throw Exception("No data to insert", ErrorCodes::NO_DATA_TO_INSERT);
}
void sendDataFrom(ReadBuffer & buf, Block & sample)
void sendDataFrom(ReadBuffer & buf, Block & sample, const ColumnsDescription & columns_description)
{
String current_format = insert_format;
......@@ -952,6 +954,10 @@ private:
BlockInputStreamPtr block_input = context.getInputFormat(
current_format, buf, sample, insert_format_max_block_size);
const auto & column_defaults = columns_description.defaults;
if (!column_defaults.empty())
block_input = std::make_shared<AddingDefaultsBlockInputStream>(block_input, column_defaults, context);
BlockInputStreamPtr async_block_input = std::make_shared<AsynchronousBlockInputStream>(block_input);
async_block_input->readPrefix();
......@@ -1089,7 +1095,7 @@ private:
/// Receive the block that serves as an example of the structure of table where data will be inserted.
bool receiveSampleBlock(Block & out)
bool receiveSampleBlock(Block & out, ColumnsDescription & columns_description)
{
while (true)
{
......@@ -1110,6 +1116,10 @@ private:
onLogData(packet.block);
break;
case Protocol::Server::TableColumns:
columns_description = ColumnsDescription::parse(packet.multistring_message[1]);
return receiveSampleBlock(out, columns_description);
default:
throw NetException("Unexpected packet from server (expected Data, Exception or Log, got "
+ String(Protocol::Server::toString(packet.type)) + ")", ErrorCodes::UNEXPECTED_PACKET_FROM_SERVER);
......
......@@ -30,6 +30,7 @@
#include <Storages/StorageMemory.h>
#include <Storages/StorageReplicatedMergeTree.h>
#include <Core/ExternalTable.h>
#include <Storages/ColumnDefault.h>
#include <DataTypes/DataTypeLowCardinality.h>
#include "TCPHandler.h"
......@@ -360,6 +361,14 @@ void TCPHandler::processInsertQuery(const Settings & global_settings)
*/
state.io.out->writePrefix();
/// Send ColumnsDescription for insertion table
if (client_revision >= DBMS_MIN_REVISION_WITH_COLUMN_DEFAULTS_METADATA)
{
const auto & db_and_table = query_context.getInsertionTable();
if (auto * columns = ColumnsDescription::loadFromContext(query_context, db_and_table.first, db_and_table.second))
sendTableColumns(*columns);
}
/// Send block to the client - table structure.
Block block = state.io.out->getHeader();
......@@ -860,6 +869,16 @@ void TCPHandler::sendLogData(const Block & block)
out->next();
}
void TCPHandler::sendTableColumns(const ColumnsDescription & columns)
{
writeVarUInt(Protocol::Server::TableColumns, *out);
/// Send external table name (empty name is the main table)
writeStringBinary("", *out);
writeStringBinary(columns.toString(), *out);
out->next();
}
void TCPHandler::sendException(const Exception & e, bool with_stack_trace)
{
......
......@@ -144,6 +144,7 @@ private:
void sendHello();
void sendData(const Block & block); /// Write a block to the network.
void sendLogData(const Block & block);
void sendTableColumns(const ColumnsDescription & columns);
void sendException(const Exception & e, bool with_stack_trace);
void sendProgress();
void sendLogs();
......
......@@ -603,6 +603,10 @@ Connection::Packet Connection::receivePacket()
res.block = receiveLogData();
return res;
case Protocol::Server::TableColumns:
res.multistring_message = receiveMultistringMessage(res.type);
return res;
case Protocol::Server::EndOfStream:
return res;
......@@ -712,6 +716,16 @@ std::unique_ptr<Exception> Connection::receiveException()
}
std::vector<String> Connection::receiveMultistringMessage(UInt64 msg_type)
{
size_t num = Protocol::Server::stringsInMessage(msg_type);
std::vector<String> out(num);
for (size_t i = 0; i < num; ++i)
readStringBinary(out[i], *in);
return out;
}
Progress Connection::receiveProgress()
{
//LOG_TRACE(log_wrapper.get(), "Receiving progress");
......
#pragma once
#include <optional>
#include <common/logger_useful.h>
#include <Poco/Net/StreamSocket.h>
......@@ -96,6 +98,7 @@ public:
Block block;
std::unique_ptr<Exception> exception;
std::vector<String> multistring_message;
Progress progress;
BlockStreamProfileInfo profile_info;
......@@ -254,6 +257,7 @@ private:
Block receiveLogData();
Block receiveDataImpl(BlockInputStreamPtr & stream);
std::vector<String> receiveMultistringMessage(UInt64 msg_type);
std::unique_ptr<Exception> receiveException();
Progress receiveProgress();
BlockStreamProfileInfo receiveProfileInfo();
......
......@@ -99,6 +99,13 @@ void Block::insertUnique(ColumnWithTypeAndName && elem)
}
void Block::erase(const std::set<size_t> & positions)
{
for (auto it = positions.rbegin(); it != positions.rend(); ++it)
erase(*it);
}
void Block::erase(size_t position)
{
if (data.empty())
......
......@@ -2,6 +2,7 @@
#include <vector>
#include <list>
#include <set>
#include <map>
#include <initializer_list>
......@@ -51,6 +52,8 @@ public:
void insertUnique(ColumnWithTypeAndName && elem);
/// remove the column at the specified position
void erase(size_t position);
/// remove the columns at the specified positions
void erase(const std::set<size_t> & positions);
/// remove the column with the specified name
void erase(const String & name);
......
......@@ -58,4 +58,20 @@ void BlockInfo::read(ReadBuffer & in)
}
}
void BlockMissingValues::setBit(size_t column_idx, size_t row_idx)
{
RowsBitMask & mask = rows_mask_by_column_id[column_idx];
mask.resize(row_idx + 1);
mask[row_idx] = true;
}
const BlockMissingValues::RowsBitMask & BlockMissingValues::getDefaultsBitmask(size_t column_idx) const
{
static RowsBitMask none;
auto it = rows_mask_by_column_id.find(column_idx);
if (it != rows_mask_by_column_id.end())
return it->second;
return none;
}
}
#pragma once
#include <unordered_map>
#include <Core/Types.h>
......@@ -43,4 +45,24 @@ struct BlockInfo
void read(ReadBuffer & in);
};
/// Block extention to support delayed defaults. AddingDefaultsBlockInputStream uses it to replace missing values with column defaults.
class BlockMissingValues
{
public:
using RowsBitMask = std::vector<bool>; /// a bit per row for a column
const RowsBitMask & getDefaultsBitmask(size_t column_idx) const;
void setBit(size_t column_idx, size_t row_idx);
bool empty() const { return rows_mask_by_column_id.empty(); }
size_t size() const { return rows_mask_by_column_id.size(); }
void clear() { rows_mask_by_column_id.clear(); }
private:
using RowsMaskByColumnId = std::unordered_map<size_t, RowsBitMask>;
/// If rows_mask_by_column_id[column_id][row_id] is true related value in Block should be replaced with column default.
/// It could contain less columns and rows then related block.
RowsMaskByColumnId rows_mask_by_column_id;
};
}
......@@ -51,6 +51,7 @@
/// Two-level (bucketed) aggregation is incompatible if servers are inconsistent in these rules
/// (keys will be placed in different buckets and result will not be fully aggregated).
#define DBMS_MIN_REVISION_WITH_CURRENT_AGGREGATION_VARIANT_SELECTION_METHOD 54408
#define DBMS_MIN_REVISION_WITH_COLUMN_DEFAULTS_METADATA 54410
#define DBMS_MIN_REVISION_WITH_LOW_CARDINALITY_TYPE 54405
......
......@@ -69,7 +69,8 @@ namespace Protocol
Totals = 7, /// A block with totals (compressed or not).
Extremes = 8, /// A block with minimums and maximums (compressed or not).
TablesStatusResponse = 9, /// A response to TablesStatus request.
Log = 10 /// System logs of the query execution
Log = 10, /// System logs of the query execution
TableColumns = 11, /// Columns' description for default values calculation
};
/// NOTE: If the type of packet argument would be Enum, the comparison packet >= 0 && packet < 10
......@@ -78,11 +79,24 @@ namespace Protocol
/// See https://www.securecoding.cert.org/confluence/display/cplusplus/INT36-CPP.+Do+not+use+out-of-range+enumeration+values
inline const char * toString(UInt64 packet)
{
static const char * data[] = { "Hello", "Data", "Exception", "Progress", "Pong", "EndOfStream", "ProfileInfo", "Totals", "Extremes", "TablesStatusResponse", "Log" };
return packet < 11
static const char * data[] = { "Hello", "Data", "Exception", "Progress", "Pong", "EndOfStream", "ProfileInfo", "Totals",
"Extremes", "TablesStatusResponse", "Log", "TableColumns" };
return packet < 12
? data[packet]
: "Unknown packet";
}
inline size_t stringsInMessage(UInt64 msg_type)
{
switch (msg_type)
{
case TableColumns:
return 2;
default:
break;
}
return 0;
}
}
/// Packet types that client transmits.
......@@ -103,8 +117,8 @@ namespace Protocol
inline const char * toString(UInt64 packet)
{
static const char * data[] = { "Hello", "Query", "Data", "Cancel", "Ping", "TablesStatusRequest" };
return packet < 6
static const char * data[] = { "Hello", "Query", "Data", "Cancel", "Ping", "TablesStatusRequest", "KeepAlive" };
return packet < 7
? data[packet]
: "Unknown packet";
}
......
#include <Common/typeid_cast.h>
#include <Functions/FunctionHelpers.h>
#include <Interpreters/ExpressionActions.h>
#include <Interpreters/evaluateMissingDefaults.h>
#include <DataStreams/AddingDefaultsBlockInputStream.h>
#include <Columns/ColumnsNumber.h>
#include <Columns/ColumnsCommon.h>
#include <Columns/ColumnDecimal.h>
#include <Columns/ColumnConst.h>
#include <Columns/FilterDescription.h>
#include <DataTypes/DataTypesNumber.h>
#include <DataTypes/DataTypesDecimal.h>
#include <DataTypes/DataTypeDate.h>
#include <DataTypes/DataTypeDateTime.h>
#include <DataTypes/DataTypeEnum.h>
#include <DataTypes/DataTypeUUID.h>
#include <DataTypes/DataTypeString.h>
#include <DataTypes/DataTypeFixedString.h>
namespace DB
{
namespace ErrorCodes
{
extern const int LOGICAL_ERROR;
extern const int SIZES_OF_COLUMNS_DOESNT_MATCH;
extern const int TYPE_MISMATCH;
}
AddingDefaultsBlockInputStream::AddingDefaultsBlockInputStream(const BlockInputStreamPtr & input,
const ColumnDefaults & column_defaults_,
const Context & context_)
: column_defaults(column_defaults_),
context(context_)
{
children.push_back(input);
header = input->getHeader();
}
Block AddingDefaultsBlockInputStream::readImpl()
{
Block res = children.back()->read();
if (!res)
return res;
if (column_defaults.empty())
return res;
const BlockMissingValues & block_missing_values = children.back()->getMissingValues();
if (block_missing_values.empty())
return res;
Block evaluate_block{res};
/// remove columns for recalculation
for (const auto & column : column_defaults)
if (evaluate_block.has(column.first))
evaluate_block.erase(column.first);
evaluateMissingDefaults(evaluate_block, header.getNamesAndTypesList(), column_defaults, context, false);
std::unordered_map<size_t, MutableColumnPtr> mixed_columns;
for (const ColumnWithTypeAndName & column_def : evaluate_block)
{
const String & column_name = column_def.name;
if (column_defaults.count(column_name) == 0)
continue;
size_t block_column_position = res.getPositionByName(column_name);
ColumnWithTypeAndName & column_read = res.getByPosition(block_column_position);
const auto & defaults_mask = block_missing_values.getDefaultsBitmask(block_column_position);
checkCalculated(column_read, column_def, defaults_mask.size());
if (!defaults_mask.empty())
{
/// TODO: FixedString
if (isColumnedAsNumber(column_read.type) || isDecimal(column_read.type))
{
MutableColumnPtr column_mixed = (*std::move(column_read.column)).mutate();
mixNumberColumns(column_read.type->getTypeId(), column_mixed, column_def.column, defaults_mask);
column_read.column = std::move(column_mixed);
}
else
{
MutableColumnPtr column_mixed = mixColumns(column_read, column_def, defaults_mask);
mixed_columns.emplace(block_column_position, std::move(column_mixed));
}
}
}
if (!mixed_columns.empty())
{
/// replace columns saving block structure
MutableColumns mutation = res.mutateColumns();
for (size_t position = 0; position < mutation.size(); ++position)
{
auto it = mixed_columns.find(position);
if (it != mixed_columns.end())
mutation[position] = std::move(it->second);
}
res.setColumns(std::move(mutation));
}
return res;
}
void AddingDefaultsBlockInputStream::checkCalculated(const ColumnWithTypeAndName & col_read,
const ColumnWithTypeAndName & col_defaults,
size_t defaults_needed) const
{
size_t column_size = col_read.column->size();
if (column_size != col_defaults.column->size())
throw Exception("Mismatch column sizes while adding defaults", ErrorCodes::SIZES_OF_COLUMNS_DOESNT_MATCH);
if (column_size < defaults_needed)
throw Exception("Unexpected defaults count", ErrorCodes::SIZES_OF_COLUMNS_DOESNT_MATCH);
if (!col_read.type->equals(*col_defaults.type))
throw Exception("Mismach column types while adding defaults", ErrorCodes::TYPE_MISMATCH);
}
void AddingDefaultsBlockInputStream::mixNumberColumns(TypeIndex type_idx, MutableColumnPtr & column_mixed, const ColumnPtr & column_defs,
const BlockMissingValues::RowsBitMask & defaults_mask) const
{
auto call = [&](const auto & types) -> bool
{
using Types = std::decay_t<decltype(types)>;
using DataType = typename Types::LeftType;
if constexpr (!std::is_same_v<DataType, DataTypeString> && !std::is_same_v<DataType, DataTypeFixedString>)
{
using FieldType = typename DataType::FieldType;
using ColVecType = std::conditional_t<IsDecimalNumber<FieldType>, ColumnDecimal<FieldType>, ColumnVector<FieldType>>;
auto col_read = typeid_cast<ColVecType *>(column_mixed.get());
if (!col_read)
return false;
typename ColVecType::Container & dst = col_read->getData();
if (auto const_col_defs = checkAndGetColumnConst<ColVecType>(column_defs.get()))
{
FieldType value = checkAndGetColumn<ColVecType>(const_col_defs->getDataColumnPtr().get())->getData()[0];
for (size_t i = 0; i < defaults_mask.size(); ++i)
if (defaults_mask[i])
dst[i] = value;
return true;
}
else if (auto col_defs = checkAndGetColumn<ColVecType>(column_defs.get()))
{
auto & src = col_defs->getData();
for (size_t i = 0; i < defaults_mask.size(); ++i)
if (defaults_mask[i])
dst[i] = src[i];
return true;
}
}
return false;
};
if (!callOnIndexAndDataType<void>(type_idx, call))
throw Exception("Unexpected type on mixNumberColumns", ErrorCodes::LOGICAL_ERROR);
}
MutableColumnPtr AddingDefaultsBlockInputStream::mixColumns(const ColumnWithTypeAndName & col_read,
const ColumnWithTypeAndName & col_defaults,
const BlockMissingValues::RowsBitMask & defaults_mask) const
{
size_t column_size = col_read.column->size();
size_t defaults_needed = defaults_mask.size();
MutableColumnPtr column_mixed = col_read.column->cloneEmpty();
for (size_t i = 0; i < defaults_needed; ++i)
{
if (defaults_mask[i])
{
if (col_defaults.column->isColumnConst())
column_mixed->insert((*col_defaults.column)[i]);
else
column_mixed->insertFrom(*col_defaults.column, i);
}
else
column_mixed->insertFrom(*col_read.column, i);
}
for (size_t i = defaults_needed; i < column_size; ++i)
column_mixed->insertFrom(*col_read.column, i);
return column_mixed;
}
}
#pragma once
#include <DataStreams/IProfilingBlockInputStream.h>
#include <Storages/ColumnDefault.h>
#include <Interpreters/Context.h>
namespace DB
{
/// Adds defaults to columns using BlockDelayedDefaults bitmask attached to Block by child InputStream.
class AddingDefaultsBlockInputStream : public IProfilingBlockInputStream
{
public:
AddingDefaultsBlockInputStream(
const BlockInputStreamPtr & input,
const ColumnDefaults & column_defaults_,
const Context & context_);
String getName() const override { return "AddingDefaults"; }
Block getHeader() const override { return header; }
protected:
Block readImpl() override;
private:
Block header;
const ColumnDefaults column_defaults;
const Context & context;
void checkCalculated(const ColumnWithTypeAndName & col_read, const ColumnWithTypeAndName & col_defaults, size_t needed) const;
MutableColumnPtr mixColumns(const ColumnWithTypeAndName & col_read, const ColumnWithTypeAndName & col_defaults,
const BlockMissingValues::RowsBitMask & defaults_mask) const;
void mixNumberColumns(TypeIndex type_idx, MutableColumnPtr & col_mixed, const ColumnPtr & col_defaults,
const BlockMissingValues::RowsBitMask & defaults_mask) const;
};
}
#include <DataStreams/AddingDefaultBlockInputStream.h>
#include <DataStreams/AddingMissedBlockInputStream.h>
#include <Interpreters/addMissingDefaults.h>
namespace DB
{
AddingDefaultBlockInputStream::AddingDefaultBlockInputStream(
AddingMissedBlockInputStream::AddingMissedBlockInputStream(
const BlockInputStreamPtr & input_,
const Block & header_,
const ColumnDefaults & column_defaults_,
......@@ -16,7 +16,7 @@ AddingDefaultBlockInputStream::AddingDefaultBlockInputStream(
children.emplace_back(input);
}
Block AddingDefaultBlockInputStream::readImpl()
Block AddingMissedBlockInputStream::readImpl()
{
Block src = children.back()->read();
if (!src)
......
......@@ -14,16 +14,16 @@ namespace DB
* 3. Columns that materialized from other columns (materialized columns)
* All three types of columns are materialized (not constants).
*/
class AddingDefaultBlockInputStream : public IProfilingBlockInputStream
class AddingMissedBlockInputStream : public IProfilingBlockInputStream
{
public:
AddingDefaultBlockInputStream(
AddingMissedBlockInputStream(
const BlockInputStreamPtr & input_,
const Block & header_,
const ColumnDefaults & column_defaults_,
const Context & context_);
String getName() const override { return "AddingDefault"; }
String getName() const override { return "AddingMissed"; }
Block getHeader() const override { return header; }
private:
......
......@@ -63,6 +63,12 @@ public:
*/
virtual Block read() = 0;
virtual const BlockMissingValues & getMissingValues() const
{
static const BlockMissingValues none;
return none;
}
/** Read something before starting all data or after the end of all data.
* In the `readSuffix` function, you can implement a finalization that can lead to an exception.
* readPrefix() must be called before the first call to read().
......
......@@ -4,7 +4,8 @@
#include <IO/ReadBufferFromMemory.h>
#include <DataStreams/BlockIO.h>
#include <DataStreams/InputStreamFromASTInsertQuery.h>
#include <DataStreams/AddingDefaultsBlockInputStream.h>
#include <Storages/ColumnsDescription.h>
namespace DB
{
......@@ -44,6 +45,10 @@ InputStreamFromASTInsertQuery::InputStreamFromASTInsertQuery(
input_buffer_contacenated = std::make_unique<ConcatReadBuffer>(buffers);
res_stream = context.getInputFormat(format, *input_buffer_contacenated, streams.out->getHeader(), context.getSettings().max_insert_block_size);
auto columns_description = ColumnsDescription::loadFromContext(context, ast_insert_query->database, ast_insert_query->table);
if (columns_description && !columns_description->defaults.empty())
res_stream = std::make_shared<AddingDefaultsBlockInputStream>(res_stream, columns_description->defaults, context);
}
}
......@@ -514,6 +514,13 @@ inline bool isNumber(const T & data_type)
return which.isInt() || which.isUInt() || which.isFloat();
}
template <typename T>
inline bool isColumnedAsNumber(const T & data_type)
{
WhichDataType which(data_type);
return which.isInt() || which.isUInt() || which.isFloat() || which.isDateOrDateTime() || which.isUUID();
}
template <typename T>
inline bool isString(const T & data_type)
{
......
#pragma once
#include "IDictionary.h"
#include "IDictionarySource.h"
#include "DictionaryStructure.h"
#include <Common/ArenaWithFreeLists.h>
#include <Common/CurrentMetrics.h>
#include <Columns/ColumnDecimal.h>
#include <Columns/ColumnString.h>
#include <ext/bit_cast.h>
#include <cmath>
#include <atomic>
#include <chrono>
#include <vector>
#include <cmath>
#include <map>
#include <shared_mutex>
#include <variant>
#include <vector>
#include <Columns/ColumnDecimal.h>
#include <Columns/ColumnString.h>
#include <pcg_random.hpp>
#include <shared_mutex>
#include <Common/ArenaWithFreeLists.h>
#include <Common/CurrentMetrics.h>
#include <ext/bit_cast.h>
#include "DictionaryStructure.h"
#include "IDictionary.h"
#include "IDictionarySource.h"
namespace DB
{
class CacheDictionary final : public IDictionary
{
public:
CacheDictionary(const std::string & name, const DictionaryStructure & dict_struct,
DictionarySourcePtr source_ptr, const DictionaryLifetime dict_lifetime,
CacheDictionary(
const std::string & name,
const DictionaryStructure & dict_struct,
DictionarySourcePtr source_ptr,
const DictionaryLifetime dict_lifetime,
const size_t size);
CacheDictionary(const CacheDictionary & other);
......@@ -42,16 +44,12 @@ public:
double getHitRate() const override
{
return static_cast<double>(hit_count.load(std::memory_order_acquire)) /
query_count.load(std::memory_order_relaxed);
return static_cast<double>(hit_count.load(std::memory_order_acquire)) / query_count.load(std::memory_order_relaxed);
}
size_t getElementCount() const override { return element_count.load(std::memory_order_relaxed); }
double getLoadFactor() const override
{
return static_cast<double>(element_count.load(std::memory_order_relaxed)) / size;
}
double getLoadFactor() const override { return static_cast<double>(element_count.load(std::memory_order_relaxed)) / size; }
bool isCached() const override { return true; }
......@@ -63,10 +61,7 @@ public:
const DictionaryStructure & getStructure() const override { return dict_struct; }
std::chrono::time_point<std::chrono::system_clock> getCreationTime() const override
{
return creation_time;
}
std::chrono::time_point<std::chrono::system_clock> getCreationTime() const override { return creation_time; }
bool isInjective(const std::string & attribute_name) const override
{
......@@ -77,14 +72,15 @@ public:
void toParent(const PaddedPODArray<Key> & ids, PaddedPODArray<Key> & out) const override;
void isInVectorVector(const PaddedPODArray<Key> & child_ids, const PaddedPODArray<Key> & ancestor_ids, PaddedPODArray<UInt8> & out) const override;
void isInVectorVector(
const PaddedPODArray<Key> & child_ids, const PaddedPODArray<Key> & ancestor_ids, PaddedPODArray<UInt8> & out) const override;
void isInVectorConstant(const PaddedPODArray<Key> & child_ids, const Key ancestor_id, PaddedPODArray<UInt8> & out) const override;
void isInConstantVector(const Key child_id, const PaddedPODArray<Key> & ancestor_ids, PaddedPODArray<UInt8> & out) const override;
template <typename T>
using ResultArrayType = std::conditional_t<IsDecimalNumber<T>, DecimalPaddedPODArray<T>, PaddedPODArray<T>>;
#define DECLARE(TYPE)\
#define DECLARE(TYPE) \
void get##TYPE(const std::string & attribute_name, const PaddedPODArray<Key> & ids, ResultArrayType<TYPE> & out) const;
DECLARE(UInt8)
DECLARE(UInt16)
......@@ -104,9 +100,11 @@ public:
void getString(const std::string & attribute_name, const PaddedPODArray<Key> & ids, ColumnString * out) const;
#define DECLARE(TYPE)\
void get##TYPE(\
const std::string & attribute_name, const PaddedPODArray<Key> & ids, const PaddedPODArray<TYPE> & def,\
#define DECLARE(TYPE) \
void get##TYPE( \
const std::string & attribute_name, \
const PaddedPODArray<Key> & ids, \
const PaddedPODArray<TYPE> & def, \
ResultArrayType<TYPE> & out) const;
DECLARE(UInt8)
DECLARE(UInt16)
......@@ -124,11 +122,11 @@ public:
DECLARE(Decimal128)
#undef DECLARE
void getString(
const std::string & attribute_name, const PaddedPODArray<Key> & ids, const ColumnString * const def,
ColumnString * const out) const;
void
getString(const std::string & attribute_name, const PaddedPODArray<Key> & ids, const ColumnString * const def, ColumnString * const out)
const;
#define DECLARE(TYPE)\
#define DECLARE(TYPE) \
void get##TYPE(const std::string & attribute_name, const PaddedPODArray<Key> & ids, const TYPE def, ResultArrayType<TYPE> & out) const;
DECLARE(UInt8)
DECLARE(UInt16)
......@@ -146,17 +144,17 @@ public:
DECLARE(Decimal128)
#undef DECLARE
void getString(
const std::string & attribute_name, const PaddedPODArray<Key> & ids, const String & def,
ColumnString * const out) const;
void getString(const std::string & attribute_name, const PaddedPODArray<Key> & ids, const String & def, ColumnString * const out) const;
void has(const PaddedPODArray<Key> & ids, PaddedPODArray<UInt8> & out) const override;
BlockInputStreamPtr getBlockInputStream(const Names & column_names, size_t max_block_size) const override;
private:
template <typename Value> using ContainerType = Value[];
template <typename Value> using ContainerPtrType = std::unique_ptr<ContainerType<Value>>;
template <typename Value>
using ContainerType = Value[];
template <typename Value>
using ContainerPtrType = std::unique_ptr<ContainerType<Value>>;
struct CellMetadata final
{
......@@ -183,19 +181,39 @@ private:
{
AttributeUnderlyingType type;
std::variant<
UInt8, UInt16, UInt32, UInt64,
UInt8,
UInt16,
UInt32,
UInt64,
UInt128,
Int8, Int16, Int32, Int64,
Decimal32, Decimal64, Decimal128,
Float32, Float64,
String> null_values;
Int8,
Int16,
Int32,
Int64,
Decimal32,
Decimal64,
Decimal128,
Float32,
Float64,
String>
null_values;
std::variant<
ContainerPtrType<UInt8>, ContainerPtrType<UInt16>, ContainerPtrType<UInt32>, ContainerPtrType<UInt64>,
ContainerPtrType<UInt8>,
ContainerPtrType<UInt16>,
ContainerPtrType<UInt32>,
ContainerPtrType<UInt64>,
ContainerPtrType<UInt128>,
ContainerPtrType<Int8>, ContainerPtrType<Int16>, ContainerPtrType<Int32>, ContainerPtrType<Int64>,
ContainerPtrType<Decimal32>, ContainerPtrType<Decimal64>, ContainerPtrType<Decimal128>,
ContainerPtrType<Float32>, ContainerPtrType<Float64>,
ContainerPtrType<StringRef>> arrays;
ContainerPtrType<Int8>,
ContainerPtrType<Int16>,
ContainerPtrType<Int32>,
ContainerPtrType<Int64>,
ContainerPtrType<Decimal32>,
ContainerPtrType<Decimal64>,
ContainerPtrType<Decimal128>,
ContainerPtrType<Float32>,
ContainerPtrType<Float64>,
ContainerPtrType<StringRef>>
arrays;
};
void createAttributes();
......@@ -205,29 +223,17 @@ private:
template <typename OutputType, typename DefaultGetter>
void getItemsNumber(
Attribute & attribute,
const PaddedPODArray<Key> & ids,
ResultArrayType<OutputType> & out,
DefaultGetter && get_default) const;
Attribute & attribute, const PaddedPODArray<Key> & ids, ResultArrayType<OutputType> & out, DefaultGetter && get_default) const;
template <typename AttributeType, typename OutputType, typename DefaultGetter>
void getItemsNumberImpl(
Attribute & attribute,
const PaddedPODArray<Key> & ids,
ResultArrayType<OutputType> & out,
DefaultGetter && get_default) const;
Attribute & attribute, const PaddedPODArray<Key> & ids, ResultArrayType<OutputType> & out, DefaultGetter && get_default) const;
template <typename DefaultGetter>
void getItemsString(
Attribute & attribute,
const PaddedPODArray<Key> & ids,
ColumnString * out,
DefaultGetter && get_default) const;
void getItemsString(Attribute & attribute, const PaddedPODArray<Key> & ids, ColumnString * out, DefaultGetter && get_default) const;
template <typename PresentIdHandler, typename AbsentIdHandler>
void update(
const std::vector<Key> & requested_ids, PresentIdHandler && on_cell_updated,
AbsentIdHandler && on_id_not_found) const;
void update(const std::vector<Key> & requested_ids, PresentIdHandler && on_cell_updated, AbsentIdHandler && on_id_not_found) const;
PaddedPODArray<Key> getCachedIds() const;
......@@ -251,10 +257,7 @@ private:
FindResult findCellIdx(const Key & id, const CellMetadata::time_point_t now) const;
template <typename AncestorType>
void isInImpl(
const PaddedPODArray<Key> & child_ids,
const AncestorType & ancestor_ids,
PaddedPODArray<UInt8> & out) const;
void isInImpl(const PaddedPODArray<Key> & child_ids, const AncestorType & ancestor_ids, PaddedPODArray<UInt8> & out) const;
const std::string name;
const DictionaryStructure dict_struct;
......
#include "CacheDictionary.h"
#include <ext/size.h>
#include <ext/map.h>
#include <ext/range.h>
#include <Columns/ColumnsNumber.h>
#include <Common/ProfilingScopedRWLock.h>
#include <Common/typeid_cast.h>
#include <Columns/ColumnsNumber.h>
#include <ext/map.h>
#include <ext/range.h>
#include <ext/size.h>
namespace ProfileEvents
{
extern const Event DictCacheKeysRequested;
extern const Event DictCacheKeysRequestedMiss;
extern const Event DictCacheKeysRequestedFound;
extern const Event DictCacheKeysExpired;
extern const Event DictCacheKeysNotFound;
extern const Event DictCacheKeysHit;
extern const Event DictCacheRequestTimeNs;
extern const Event DictCacheRequests;
extern const Event DictCacheLockWriteNs;
extern const Event DictCacheLockReadNs;
extern const Event DictCacheKeysRequested;
extern const Event DictCacheKeysRequestedMiss;
extern const Event DictCacheKeysRequestedFound;
extern const Event DictCacheKeysExpired;
extern const Event DictCacheKeysNotFound;
extern const Event DictCacheKeysHit;
extern const Event DictCacheRequestTimeNs;
extern const Event DictCacheRequests;
extern const Event DictCacheLockWriteNs;
extern const Event DictCacheLockReadNs;
}
namespace CurrentMetrics
{
extern const Metric DictCacheRequests;
extern const Metric DictCacheRequests;
}
namespace DB
{
namespace ErrorCodes
{
extern const int TYPE_MISMATCH;
......@@ -36,12 +35,11 @@ namespace ErrorCodes
template <typename OutputType, typename DefaultGetter>
void CacheDictionary::getItemsNumber(
Attribute & attribute,
const PaddedPODArray<Key> & ids,
ResultArrayType<OutputType> & out,
DefaultGetter && get_default) const
Attribute & attribute, const PaddedPODArray<Key> & ids, ResultArrayType<OutputType> & out, DefaultGetter && get_default) const
{
if (false) {}
if (false)
{
}
#define DISPATCH(TYPE) \
else if (attribute.type == AttributeUnderlyingType::TYPE) \
getItemsNumberImpl<TYPE, OutputType>(attribute, ids, out, std::forward<DefaultGetter>(get_default));
......@@ -60,16 +58,12 @@ void CacheDictionary::getItemsNumber(
DISPATCH(Decimal64)
DISPATCH(Decimal128)
#undef DISPATCH
else
throw Exception("Unexpected type of attribute: " + toString(attribute.type), ErrorCodes::LOGICAL_ERROR);
else throw Exception("Unexpected type of attribute: " + toString(attribute.type), ErrorCodes::LOGICAL_ERROR);
}
template <typename AttributeType, typename OutputType, typename DefaultGetter>
void CacheDictionary::getItemsNumberImpl(
Attribute & attribute,
const PaddedPODArray<Key> & ids,
ResultArrayType<OutputType> & out,
DefaultGetter && get_default) const
Attribute & attribute, const PaddedPODArray<Key> & ids, ResultArrayType<OutputType> & out, DefaultGetter && get_default) const
{
/// Mapping: <id> -> { all indices `i` of `ids` such that `ids[i]` = <id> }
std::unordered_map<Key, std::vector<size_t>> outdated_ids;
......@@ -122,31 +116,28 @@ void CacheDictionary::getItemsNumberImpl(
return;
std::vector<Key> required_ids(outdated_ids.size());
std::transform(std::begin(outdated_ids), std::end(outdated_ids), std::begin(required_ids),
[] (auto & pair) { return pair.first; });
std::transform(std::begin(outdated_ids), std::end(outdated_ids), std::begin(required_ids), [](auto & pair) { return pair.first; });
/// request new values
update(required_ids,
[&] (const auto id, const auto cell_idx)
{
const auto attribute_value = attribute_array[cell_idx];
update(
required_ids,
[&](const auto id, const auto cell_idx)
{
const auto attribute_value = attribute_array[cell_idx];
for (const size_t row : outdated_ids[id])
out[row] = static_cast<OutputType>(attribute_value);
},
[&] (const auto id, const auto)
{
for (const size_t row : outdated_ids[id])
out[row] = get_default(row);
});
for (const size_t row : outdated_ids[id])
out[row] = static_cast<OutputType>(attribute_value);
},
[&](const auto id, const auto)
{
for (const size_t row : outdated_ids[id])
out[row] = get_default(row);
});
}
template <typename DefaultGetter>
void CacheDictionary::getItemsString(
Attribute & attribute,
const PaddedPODArray<Key> & ids,
ColumnString * out,
DefaultGetter && get_default) const
Attribute & attribute, const PaddedPODArray<Key> & ids, ColumnString * out, DefaultGetter && get_default) const
{
const auto rows = ext::size(ids);
......@@ -245,22 +236,22 @@ void CacheDictionary::getItemsString(
if (!outdated_ids.empty())
{
std::vector<Key> required_ids(outdated_ids.size());
std::transform(std::begin(outdated_ids), std::end(outdated_ids), std::begin(required_ids),
[] (auto & pair) { return pair.first; });
std::transform(std::begin(outdated_ids), std::end(outdated_ids), std::begin(required_ids), [](auto & pair) { return pair.first; });
update(required_ids,
[&] (const auto id, const auto cell_idx)
{
const auto attribute_value = attribute_array[cell_idx];
update(
required_ids,
[&](const auto id, const auto cell_idx)
{
const auto attribute_value = attribute_array[cell_idx];
map[id] = String{attribute_value};
total_length += (attribute_value.size + 1) * outdated_ids[id].size();
},
[&] (const auto id, const auto)
{
for (const auto row : outdated_ids[id])
total_length += get_default(row).size + 1;
});
map[id] = String{attribute_value};
total_length += (attribute_value.size + 1) * outdated_ids[id].size();
},
[&](const auto id, const auto)
{
for (const auto row : outdated_ids[id])
total_length += get_default(row).size + 1;
});
}
out->getChars().reserve(total_length);
......@@ -277,19 +268,13 @@ void CacheDictionary::getItemsString(
template <typename PresentIdHandler, typename AbsentIdHandler>
void CacheDictionary::update(
const std::vector<Key> & requested_ids,
PresentIdHandler && on_cell_updated,
AbsentIdHandler && on_id_not_found) const
const std::vector<Key> & requested_ids, PresentIdHandler && on_cell_updated, AbsentIdHandler && on_id_not_found) const
{
std::unordered_map<Key, UInt8> remaining_ids{requested_ids.size()};
for (const auto id : requested_ids)
remaining_ids.insert({ id, 0 });
remaining_ids.insert({id, 0});
std::uniform_int_distribution<UInt64> distribution
{
dict_lifetime.min_sec,
dict_lifetime.max_sec
};
std::uniform_int_distribution<UInt64> distribution{dict_lifetime.min_sec, dict_lifetime.max_sec};
const ProfilingScopedWriteRWLock write_lock{rw_lock, ProfileEvents::DictCacheLockWriteNs};
......@@ -310,10 +295,8 @@ void CacheDictionary::update(
const auto & ids = id_column->getData();
/// cache column pointers
const auto column_ptrs = ext::map<std::vector>(ext::range(0, attributes.size()), [&block] (size_t i)
{
return block.safeGetByPosition(i + 1).column.get();
});
const auto column_ptrs = ext::map<std::vector>(
ext::range(0, attributes.size()), [&block](size_t i) { return block.safeGetByPosition(i + 1).column.get(); });
for (const auto i : ext::range(0, ids.size()))
{
......
#include "ClickHouseDictionarySource.h"
#include "ExternalQueryBuilder.h"
#include "writeParenthesisedString.h"
#include <memory>
#include <Client/ConnectionPool.h>
#include <DataStreams/RemoteBlockInputStream.h>
#include "readInvalidateQuery.h"
#include <IO/ConnectionTimeouts.h>
#include <Interpreters/executeQuery.h>
#include <Common/isLocalAddress.h>
#include <memory>
#include <ext/range.h>
#include <IO/ConnectionTimeouts.h>
#include "DictionarySourceFactory.h"
#include "DictionaryStructure.h"
#include "ExternalQueryBuilder.h"
#include "readInvalidateQuery.h"
#include "writeParenthesisedString.h"
namespace DB
{
namespace ErrorCodes
{
extern const int UNSUPPORTED_METHOD;
......@@ -25,61 +24,81 @@ namespace ErrorCodes
static const size_t MAX_CONNECTIONS = 16;
static ConnectionPoolWithFailoverPtr createPool(
const std::string & host, UInt16 port, bool secure, const std::string & db,
const std::string & user, const std::string & password, const Context & context)
const std::string & host,
UInt16 port,
bool secure,
const std::string & db,
const std::string & user,
const std::string & password,
const Context & context)
{
auto timeouts = ConnectionTimeouts::getTCPTimeoutsWithFailover(context.getSettingsRef());
ConnectionPoolPtrs pools;
pools.emplace_back(std::make_shared<ConnectionPool>(
MAX_CONNECTIONS, host, port, db, user, password, timeouts, "ClickHouseDictionarySource",
Protocol::Compression::Enable,
secure ? Protocol::Secure::Enable : Protocol::Secure::Disable));
MAX_CONNECTIONS,
host,
port,
db,
user,
password,
timeouts,
"ClickHouseDictionarySource",
Protocol::Compression::Enable,
secure ? Protocol::Secure::Enable : Protocol::Secure::Disable));
return std::make_shared<ConnectionPoolWithFailover>(pools, LoadBalancing::RANDOM);
}
ClickHouseDictionarySource::ClickHouseDictionarySource(
const DictionaryStructure & dict_struct_,
const Poco::Util::AbstractConfiguration & config,
const std::string & config_prefix,
const Block & sample_block, Context & context)
: update_time{std::chrono::system_clock::from_time_t(0)},
dict_struct{dict_struct_},
host{config.getString(config_prefix + ".host")},
port(config.getInt(config_prefix + ".port")),
secure(config.getBool(config_prefix + ".secure", false)),
user{config.getString(config_prefix + ".user", "")},
password{config.getString(config_prefix + ".password", "")},
db{config.getString(config_prefix + ".db", "")},
table{config.getString(config_prefix + ".table")},
where{config.getString(config_prefix + ".where", "")},
update_field{config.getString(config_prefix + ".update_field", "")},
invalidate_query{config.getString(config_prefix + ".invalidate_query", "")},
query_builder{dict_struct, db, table, where, IdentifierQuotingStyle::Backticks},
sample_block{sample_block}, context(context),
is_local{isLocalAddress({ host, port }, config.getInt("tcp_port", 0))},
pool{is_local ? nullptr : createPool(host, port, secure, db, user, password, context)},
load_all_query{query_builder.composeLoadAllQuery()}
{}
const DictionaryStructure & dict_struct_,
const Poco::Util::AbstractConfiguration & config,
const std::string & config_prefix,
const Block & sample_block,
Context & context)
: update_time{std::chrono::system_clock::from_time_t(0)}
, dict_struct{dict_struct_}
, host{config.getString(config_prefix + ".host")}
, port(config.getInt(config_prefix + ".port"))
, secure(config.getBool(config_prefix + ".secure", false))
, user{config.getString(config_prefix + ".user", "")}
, password{config.getString(config_prefix + ".password", "")}
, db{config.getString(config_prefix + ".db", "")}
, table{config.getString(config_prefix + ".table")}
, where{config.getString(config_prefix + ".where", "")}
, update_field{config.getString(config_prefix + ".update_field", "")}
, invalidate_query{config.getString(config_prefix + ".invalidate_query", "")}
, query_builder{dict_struct, db, table, where, IdentifierQuotingStyle::Backticks}
, sample_block{sample_block}
, context(context)
, is_local{isLocalAddress({host, port}, config.getInt("tcp_port", 0))}
, pool{is_local ? nullptr : createPool(host, port, secure, db, user, password, context)}
, load_all_query{query_builder.composeLoadAllQuery()}
{
}
ClickHouseDictionarySource::ClickHouseDictionarySource(const ClickHouseDictionarySource & other)
: update_time{other.update_time},
dict_struct{other.dict_struct},
host{other.host}, port{other.port},
secure{other.secure},
user{other.user}, password{other.password},
db{other.db}, table{other.table},
where{other.where},
update_field{other.update_field},
invalidate_query{other.invalidate_query},
invalidate_query_response{other.invalidate_query_response},
query_builder{dict_struct, db, table, where, IdentifierQuotingStyle::Backticks},
sample_block{other.sample_block}, context(other.context),
is_local{other.is_local},
pool{is_local ? nullptr : createPool(host, port, secure, db, user, password, context)},
load_all_query{other.load_all_query}
{}
: update_time{other.update_time}
, dict_struct{other.dict_struct}
, host{other.host}
, port{other.port}
, secure{other.secure}
, user{other.user}
, password{other.password}
, db{other.db}
, table{other.table}
, where{other.where}
, update_field{other.update_field}
, invalidate_query{other.invalidate_query}
, invalidate_query_response{other.invalidate_query_response}
, query_builder{dict_struct, db, table, where, IdentifierQuotingStyle::Backticks}
, sample_block{other.sample_block}
, context(other.context)
, is_local{other.is_local}
, pool{is_local ? nullptr : createPool(host, port, secure, db, user, password, context)}
, load_all_query{other.load_all_query}
{
}
std::string ClickHouseDictionarySource::getUpdateFieldAndDate()
{
......@@ -119,17 +138,14 @@ BlockInputStreamPtr ClickHouseDictionarySource::loadUpdatedAll()
BlockInputStreamPtr ClickHouseDictionarySource::loadIds(const std::vector<UInt64> & ids)
{
return createStreamForSelectiveLoad(
query_builder.composeLoadIdsQuery(ids));
return createStreamForSelectiveLoad(query_builder.composeLoadIdsQuery(ids));
}
BlockInputStreamPtr ClickHouseDictionarySource::loadKeys(
const Columns & key_columns, const std::vector<size_t> & requested_rows)
BlockInputStreamPtr ClickHouseDictionarySource::loadKeys(const Columns & key_columns, const std::vector<size_t> & requested_rows)
{
return createStreamForSelectiveLoad(
query_builder.composeLoadKeysQuery(
key_columns, requested_rows, ExternalQueryBuilder::IN_WITH_TUPLES));
query_builder.composeLoadKeysQuery(key_columns, requested_rows, ExternalQueryBuilder::IN_WITH_TUPLES));
}
bool ClickHouseDictionarySource::isModified() const
......@@ -167,7 +183,7 @@ std::string ClickHouseDictionarySource::doInvalidateQuery(const std::string & re
if (is_local)
{
auto input_block = executeQuery(request, context, true).in;
return readInvalidateQuery(dynamic_cast<IProfilingBlockInputStream&>((*input_block)));
return readInvalidateQuery(dynamic_cast<IProfilingBlockInputStream &>((*input_block)));
}
else
{
......
#pragma once
#include "IDictionarySource.h"
#include <memory>
#include <Client/ConnectionPoolWithFailover.h>
#include "DictionaryStructure.h"
#include "ExternalQueryBuilder.h"
#include <Client/ConnectionPoolWithFailover.h>
#include <memory>
#include "IDictionarySource.h"
namespace DB
{
/** Allows loading dictionaries from local or remote ClickHouse instance
* @todo use ConnectionPoolWithFailover
* @todo invent a way to keep track of source modifications
......@@ -17,10 +16,12 @@ namespace DB
class ClickHouseDictionarySource final : public IDictionarySource
{
public:
ClickHouseDictionarySource(const DictionaryStructure & dict_struct_,
ClickHouseDictionarySource(
const DictionaryStructure & dict_struct_,
const Poco::Util::AbstractConfiguration & config,
const std::string & config_prefix,
const Block & sample_block, Context & context);
const Block & sample_block,
Context & context);
/// copy-constructor is provided in order to support cloneability
ClickHouseDictionarySource(const ClickHouseDictionarySource & other);
......@@ -31,8 +32,7 @@ public:
BlockInputStreamPtr loadIds(const std::vector<UInt64> & ids) override;
BlockInputStreamPtr loadKeys(
const Columns & key_columns, const std::vector<size_t> & requested_rows) override;
BlockInputStreamPtr loadKeys(const Columns & key_columns, const std::vector<size_t> & requested_rows) override;
bool isModified() const override;
bool supportsSelectiveLoad() const override { return true; }
......
#include "ComplexKeyCacheDictionary.h"
#include "DictionaryBlockInputStream.h"
#include <Common/Arena.h>
#include <Common/BitHelpers.h>
#include <Common/randomSeed.h>
#include <Common/Stopwatch.h>
#include <Common/ProfilingScopedRWLock.h>
#include <Common/ProfileEvents.h>
#include <Common/CurrentMetrics.h>
#include <ext/range.h>
#include <Common/ProfileEvents.h>
#include <Common/ProfilingScopedRWLock.h>
#include <Common/Stopwatch.h>
#include <Common/randomSeed.h>
#include <ext/map.h>
#include <ext/range.h>
#include "DictionaryBlockInputStream.h"
#include "DictionaryFactory.h"
namespace ProfileEvents
{
extern const Event DictCacheKeysRequested;
extern const Event DictCacheKeysRequestedMiss;
extern const Event DictCacheKeysRequestedFound;
extern const Event DictCacheKeysExpired;
extern const Event DictCacheKeysNotFound;
extern const Event DictCacheKeysHit;
extern const Event DictCacheRequestTimeNs;
extern const Event DictCacheLockWriteNs;
extern const Event DictCacheLockReadNs;
extern const Event DictCacheKeysRequested;
extern const Event DictCacheKeysRequestedMiss;
extern const Event DictCacheKeysRequestedFound;
extern const Event DictCacheKeysExpired;
extern const Event DictCacheKeysNotFound;
extern const Event DictCacheKeysHit;
extern const Event DictCacheRequestTimeNs;
extern const Event DictCacheLockWriteNs;
extern const Event DictCacheLockReadNs;
}
namespace CurrentMetrics
{
extern const Metric DictCacheRequests;
extern const Metric DictCacheRequests;
}
namespace DB
{
namespace ErrorCodes
{
extern const int TYPE_MISMATCH;
......@@ -52,13 +50,19 @@ inline UInt64 ComplexKeyCacheDictionary::getCellIdx(const StringRef key) const
}
ComplexKeyCacheDictionary::ComplexKeyCacheDictionary(const std::string & name, const DictionaryStructure & dict_struct,
DictionarySourcePtr source_ptr, const DictionaryLifetime dict_lifetime,
ComplexKeyCacheDictionary::ComplexKeyCacheDictionary(
const std::string & name,
const DictionaryStructure & dict_struct,
DictionarySourcePtr source_ptr,
const DictionaryLifetime dict_lifetime,
const size_t size)
: name{name}, dict_struct(dict_struct), source_ptr{std::move(source_ptr)}, dict_lifetime(dict_lifetime),
size{roundUpToPowerOfTwoOrZero(std::max(size, size_t(max_collision_length)))},
size_overlap_mask{this->size - 1},
rnd_engine(randomSeed())
: name{name}
, dict_struct(dict_struct)
, source_ptr{std::move(source_ptr)}
, dict_lifetime(dict_lifetime)
, size{roundUpToPowerOfTwoOrZero(std::max(size, size_t(max_collision_length)))}
, size_overlap_mask{this->size - 1}
, rnd_engine(randomSeed())
{
if (!this->source_ptr->supportsSelectiveLoad())
throw Exception{name + ": source cannot be used with ComplexKeyCacheDictionary", ErrorCodes::UNSUPPORTED_METHOD};
......@@ -68,47 +72,56 @@ ComplexKeyCacheDictionary::ComplexKeyCacheDictionary(const std::string & name, c
ComplexKeyCacheDictionary::ComplexKeyCacheDictionary(const ComplexKeyCacheDictionary & other)
: ComplexKeyCacheDictionary{other.name, other.dict_struct, other.source_ptr->clone(), other.dict_lifetime, other.size}
{}
{
}
void ComplexKeyCacheDictionary::getString(
const std::string & attribute_name, const Columns & key_columns, const DataTypes & key_types,
ColumnString * out) const
const std::string & attribute_name, const Columns & key_columns, const DataTypes & key_types, ColumnString * out) const
{
dict_struct.validateKeyTypes(key_types);
auto & attribute = getAttribute(attribute_name);
if (!isAttributeTypeConvertibleTo(attribute.type, AttributeUnderlyingType::String))
throw Exception{name + ": type mismatch: attribute " + attribute_name + " has type " + toString(attribute.type), ErrorCodes::TYPE_MISMATCH};
throw Exception{name + ": type mismatch: attribute " + attribute_name + " has type " + toString(attribute.type),
ErrorCodes::TYPE_MISMATCH};
const auto null_value = StringRef{std::get<String>(attribute.null_values)};
getItemsString(attribute, key_columns, out, [&] (const size_t) { return null_value; });
getItemsString(attribute, key_columns, out, [&](const size_t) { return null_value; });
}
void ComplexKeyCacheDictionary::getString(
const std::string & attribute_name, const Columns & key_columns, const DataTypes & key_types,
const ColumnString * const def, ColumnString * const out) const
const std::string & attribute_name,
const Columns & key_columns,
const DataTypes & key_types,
const ColumnString * const def,
ColumnString * const out) const
{
dict_struct.validateKeyTypes(key_types);
auto & attribute = getAttribute(attribute_name);
if (!isAttributeTypeConvertibleTo(attribute.type, AttributeUnderlyingType::String))
throw Exception{name + ": type mismatch: attribute " + attribute_name + " has type " + toString(attribute.type), ErrorCodes::TYPE_MISMATCH};
throw Exception{name + ": type mismatch: attribute " + attribute_name + " has type " + toString(attribute.type),
ErrorCodes::TYPE_MISMATCH};
getItemsString(attribute, key_columns, out, [&] (const size_t row) { return def->getDataAt(row); });
getItemsString(attribute, key_columns, out, [&](const size_t row) { return def->getDataAt(row); });
}
void ComplexKeyCacheDictionary::getString(
const std::string & attribute_name, const Columns & key_columns, const DataTypes & key_types,
const String & def, ColumnString * const out) const
const std::string & attribute_name,
const Columns & key_columns,
const DataTypes & key_types,
const String & def,
ColumnString * const out) const
{
dict_struct.validateKeyTypes(key_types);
auto & attribute = getAttribute(attribute_name);
if (!isAttributeTypeConvertibleTo(attribute.type, AttributeUnderlyingType::String))
throw Exception{name + ": type mismatch: attribute " + attribute_name + " has type " + toString(attribute.type), ErrorCodes::TYPE_MISMATCH};
throw Exception{name + ": type mismatch: attribute " + attribute_name + " has type " + toString(attribute.type),
ErrorCodes::TYPE_MISMATCH};
getItemsString(attribute, key_columns, out, [&] (const size_t) { return StringRef{def}; });
getItemsString(attribute, key_columns, out, [&](const size_t) { return StringRef{def}; });
}
/// returns cell_idx (always valid for replacing), 'cell is valid' flag, 'cell is outdated' flag,
......@@ -118,7 +131,8 @@ void ComplexKeyCacheDictionary::getString(
/// true true impossible
///
/// todo: split this func to two: find_for_get and find_for_set
ComplexKeyCacheDictionary::FindResult ComplexKeyCacheDictionary::findCellIdx(const StringRef & key, const CellMetadata::time_point_t now, const size_t hash) const
ComplexKeyCacheDictionary::FindResult
ComplexKeyCacheDictionary::findCellIdx(const StringRef & key, const CellMetadata::time_point_t now, const size_t hash) const
{
auto pos = hash;
auto oldest_id = pos;
......@@ -211,17 +225,20 @@ void ComplexKeyCacheDictionary::has(const Columns & key_columns, const DataTypes
return;
std::vector<size_t> required_rows(outdated_keys.size());
std::transform(std::begin(outdated_keys), std::end(outdated_keys), std::begin(required_rows),
[] (auto & pair) { return pair.second.front(); });
std::transform(
std::begin(outdated_keys), std::end(outdated_keys), std::begin(required_rows), [](auto & pair) { return pair.second.front(); });
/// request new values
update(key_columns, keys_array, required_rows,
[&] (const StringRef key, const auto)
update(
key_columns,
keys_array,
required_rows,
[&](const StringRef key, const auto)
{
for (const auto out_idx : outdated_keys[key])
out[out_idx] = true;
},
[&] (const StringRef key, const auto)
[&](const StringRef key, const auto)
{
for (const auto out_idx : outdated_keys[key])
out[out_idx] = false;
......@@ -242,7 +259,8 @@ void ComplexKeyCacheDictionary::createAttributes()
attributes.push_back(createAttributeWithType(attribute.underlying_type, attribute.null_value));
if (attribute.hierarchical)
throw Exception{name + ": hierarchical attributes not supported for dictionary of type " + getTypeName(), ErrorCodes::TYPE_MISMATCH};
throw Exception{name + ": hierarchical attributes not supported for dictionary of type " + getTypeName(),
ErrorCodes::TYPE_MISMATCH};
}
}
......@@ -273,8 +291,7 @@ void ComplexKeyCacheDictionary::freeKey(const StringRef key) const
template <typename Pool>
StringRef ComplexKeyCacheDictionary::placeKeysInPool(
const size_t row, const Columns & key_columns, StringRefs & keys,
const std::vector<DictionaryAttribute> & key_attributes, Pool & pool)
const size_t row, const Columns & key_columns, StringRefs & keys, const std::vector<DictionaryAttribute> & key_attributes, Pool & pool)
{
const auto keys_size = key_columns.size();
size_t sum_keys_size{};
......@@ -313,22 +330,27 @@ StringRef ComplexKeyCacheDictionary::placeKeysInPool(
}
}
return { place, sum_keys_size };
return {place, sum_keys_size};
}
/// Explicit instantiations.
template StringRef ComplexKeyCacheDictionary::placeKeysInPool<Arena>(
const size_t row, const Columns & key_columns, StringRefs & keys,
const std::vector<DictionaryAttribute> & key_attributes, Arena & pool);
const size_t row,
const Columns & key_columns,
StringRefs & keys,
const std::vector<DictionaryAttribute> & key_attributes,
Arena & pool);
template StringRef ComplexKeyCacheDictionary::placeKeysInPool<ArenaWithFreeLists>(
const size_t row, const Columns & key_columns, StringRefs & keys,
const std::vector<DictionaryAttribute> & key_attributes, ArenaWithFreeLists & pool);
const size_t row,
const Columns & key_columns,
StringRefs & keys,
const std::vector<DictionaryAttribute> & key_attributes,
ArenaWithFreeLists & pool);
StringRef ComplexKeyCacheDictionary::placeKeysInFixedSizePool(
const size_t row, const Columns & key_columns) const
StringRef ComplexKeyCacheDictionary::placeKeysInFixedSizePool(const size_t row, const Columns & key_columns) const
{
const auto res = fixed_size_keys_pool->alloc();
auto place = res;
......@@ -340,14 +362,14 @@ StringRef ComplexKeyCacheDictionary::placeKeysInFixedSizePool(
place += key.size;
}
return { res, key_size };
return {res, key_size};
}
StringRef ComplexKeyCacheDictionary::copyIntoArena(StringRef src, Arena & arena)
{
char * allocated = arena.alloc(src.size);
memcpy(allocated, src.data, src.size);
return { allocated, src.size };
return {allocated, src.size};
}
StringRef ComplexKeyCacheDictionary::copyKey(const StringRef key) const
......@@ -355,13 +377,14 @@ StringRef ComplexKeyCacheDictionary::copyKey(const StringRef key) const
const auto res = key_size_is_fixed ? fixed_size_keys_pool->alloc() : keys_pool->alloc(key.size);
memcpy(res, key.data, key.size);
return { res, key.size };
return {res, key.size};
}
bool ComplexKeyCacheDictionary::isEmptyCell(const UInt64 idx) const
{
return (cells[idx].key == StringRef{} && (idx != zero_cell_idx
|| cells[idx].data == ext::safe_bit_cast<CellMetadata::time_point_urep_t>(CellMetadata::time_point_t())));
return (
cells[idx].key == StringRef{}
&& (idx != zero_cell_idx || cells[idx].data == ext::safe_bit_cast<CellMetadata::time_point_urep_t>(CellMetadata::time_point_t())));
}
BlockInputStreamPtr ComplexKeyCacheDictionary::getBlockInputStream(const Names & column_names, size_t max_block_size) const
......@@ -371,8 +394,7 @@ BlockInputStreamPtr ComplexKeyCacheDictionary::getBlockInputStream(const Names &
const ProfilingScopedReadRWLock read_lock{rw_lock, ProfileEvents::DictCacheLockReadNs};
for (auto idx : ext::range(0, cells.size()))
if (!isEmptyCell(idx)
&& !cells[idx].isDefault())
if (!isEmptyCell(idx) && !cells[idx].isDefault())
keys.push_back(cells[idx].key);
}
......@@ -382,26 +404,25 @@ BlockInputStreamPtr ComplexKeyCacheDictionary::getBlockInputStream(const Names &
void registerDictionaryComplexKeyCache(DictionaryFactory & factory)
{
auto create_layout = [=](
const std::string & name,
const DictionaryStructure & dict_struct,
const Poco::Util::AbstractConfiguration & config,
const std::string & config_prefix,
DictionarySourcePtr source_ptr
) -> DictionaryPtr {
auto create_layout = [=](const std::string & name,
const DictionaryStructure & dict_struct,
const Poco::Util::AbstractConfiguration & config,
const std::string & config_prefix,
DictionarySourcePtr source_ptr) -> DictionaryPtr
{
if (!dict_struct.key)
throw Exception {"'key' is required for dictionary of layout 'complex_key_hashed'", ErrorCodes::BAD_ARGUMENTS};
throw Exception{"'key' is required for dictionary of layout 'complex_key_hashed'", ErrorCodes::BAD_ARGUMENTS};
const auto & layout_prefix = config_prefix + ".layout";
const auto size = config.getInt(layout_prefix + ".complex_key_cache.size_in_cells");
if (size == 0)
throw Exception {name + ": dictionary of layout 'cache' cannot have 0 cells", ErrorCodes::TOO_SMALL_BUFFER_SIZE};
throw Exception{name + ": dictionary of layout 'cache' cannot have 0 cells", ErrorCodes::TOO_SMALL_BUFFER_SIZE};
const bool require_nonempty = config.getBool(config_prefix + ".require_nonempty", false);
if (require_nonempty)
throw Exception {name + ": dictionary of layout 'cache' cannot have 'require_nonempty' attribute set",
ErrorCodes::BAD_ARGUMENTS};
throw Exception{name + ": dictionary of layout 'cache' cannot have 'require_nonempty' attribute set",
ErrorCodes::BAD_ARGUMENTS};
const DictionaryLifetime dict_lifetime {config, config_prefix + ".lifetime"};
const DictionaryLifetime dict_lifetime{config, config_prefix + ".lifetime"};
return std::make_unique<ComplexKeyCacheDictionary>(name, dict_struct, std::move(source_ptr), dict_lifetime, size);
};
factory.registerLayout("complex_key_cache", create_layout);
......
......@@ -3,23 +3,23 @@
#include <atomic>
#include <chrono>
#include <map>
#include <shared_mutex>
#include <variant>
#include <vector>
#include <shared_mutex>
#include <Columns/ColumnDecimal.h>
#include <Columns/ColumnString.h>
#include <pcg_random.hpp>
#include <Common/ArenaWithFreeLists.h>
#include <Common/HashTable/HashMap.h>
#include <Common/ProfilingScopedRWLock.h>
#include <Common/SmallObjectPool.h>
#include "DictionaryStructure.h"
#include "IDictionary.h"
#include "IDictionarySource.h"
#include <common/StringRef.h>
#include <ext/bit_cast.h>
#include <ext/map.h>
#include <ext/scope_guard.h>
#include <pcg_random.hpp>
#include "DictionaryStructure.h"
#include "IDictionary.h"
#include "IDictionarySource.h"
namespace ProfileEvents
......@@ -40,7 +40,8 @@ namespace DB
class ComplexKeyCacheDictionary final : public IDictionaryBase
{
public:
ComplexKeyCacheDictionary(const std::string & name,
ComplexKeyCacheDictionary(
const std::string & name,
const DictionaryStructure & dict_struct,
DictionarySourcePtr source_ptr,
const DictionaryLifetime dict_lifetime,
......@@ -48,25 +49,13 @@ public:
ComplexKeyCacheDictionary(const ComplexKeyCacheDictionary & other);
std::string getKeyDescription() const
{
return key_description;
}
std::string getKeyDescription() const { return key_description; }
std::exception_ptr getCreationException() const override
{
return {};
}
std::exception_ptr getCreationException() const override { return {}; }
std::string getName() const override
{
return name;
}
std::string getName() const override { return name; }
std::string getTypeName() const override
{
return "ComplexKeyCache";
}
std::string getTypeName() const override { return "ComplexKeyCache"; }
size_t getBytesAllocated() const override
{
......@@ -74,55 +63,28 @@ public:
+ (string_arena ? string_arena->size() : 0);
}
size_t getQueryCount() const override
{
return query_count.load(std::memory_order_relaxed);
}
size_t getQueryCount() const override { return query_count.load(std::memory_order_relaxed); }
double getHitRate() const override
{
return static_cast<double>(hit_count.load(std::memory_order_acquire)) / query_count.load(std::memory_order_relaxed);
}
size_t getElementCount() const override
{
return element_count.load(std::memory_order_relaxed);
}
size_t getElementCount() const override { return element_count.load(std::memory_order_relaxed); }
double getLoadFactor() const override
{
return static_cast<double>(element_count.load(std::memory_order_relaxed)) / size;
}
double getLoadFactor() const override { return static_cast<double>(element_count.load(std::memory_order_relaxed)) / size; }
bool isCached() const override
{
return true;
}
bool isCached() const override { return true; }
std::unique_ptr<IExternalLoadable> clone() const override
{
return std::make_unique<ComplexKeyCacheDictionary>(*this);
}
std::unique_ptr<IExternalLoadable> clone() const override { return std::make_unique<ComplexKeyCacheDictionary>(*this); }
const IDictionarySource * getSource() const override
{
return source_ptr.get();
}
const IDictionarySource * getSource() const override { return source_ptr.get(); }
const DictionaryLifetime & getLifetime() const override
{
return dict_lifetime;
}
const DictionaryLifetime & getLifetime() const override { return dict_lifetime; }
const DictionaryStructure & getStructure() const override
{
return dict_struct;
}
const DictionaryStructure & getStructure() const override { return dict_struct; }
std::chrono::time_point<std::chrono::system_clock> getCreationTime() const override
{
return creation_time;
}
std::chrono::time_point<std::chrono::system_clock> getCreationTime() const override { return creation_time; }
bool isInjective(const std::string & attribute_name) const override
{
......@@ -135,7 +97,7 @@ public:
/// In all functions below, key_columns must be full (non-constant) columns.
/// See the requirement in IDataType.h for text-serialization functions.
#define DECLARE(TYPE) \
void get##TYPE( \
void get##TYPE( \
const std::string & attribute_name, const Columns & key_columns, const DataTypes & key_types, ResultArrayType<TYPE> & out) const;
DECLARE(UInt8)
DECLARE(UInt16)
......@@ -155,11 +117,12 @@ public:
void getString(const std::string & attribute_name, const Columns & key_columns, const DataTypes & key_types, ColumnString * out) const;
#define DECLARE(TYPE) \
void get##TYPE(const std::string & attribute_name, \
const Columns & key_columns, \
const DataTypes & key_types, \
const PaddedPODArray<TYPE> & def, \
#define DECLARE(TYPE) \
void get##TYPE( \
const std::string & attribute_name, \
const Columns & key_columns, \
const DataTypes & key_types, \
const PaddedPODArray<TYPE> & def, \
ResultArrayType<TYPE> & out) const;
DECLARE(UInt8)
DECLARE(UInt16)
......@@ -177,17 +140,19 @@ public:
DECLARE(Decimal128)
#undef DECLARE
void getString(const std::string & attribute_name,
void getString(
const std::string & attribute_name,
const Columns & key_columns,
const DataTypes & key_types,
const ColumnString * const def,
ColumnString * const out) const;
#define DECLARE(TYPE) \
void get##TYPE(const std::string & attribute_name, \
const Columns & key_columns, \
const DataTypes & key_types, \
const TYPE def, \
#define DECLARE(TYPE) \
void get##TYPE( \
const std::string & attribute_name, \
const Columns & key_columns, \
const DataTypes & key_types, \
const TYPE def, \
ResultArrayType<TYPE> & out) const;
DECLARE(UInt8)
DECLARE(UInt16)
......@@ -205,7 +170,8 @@ public:
DECLARE(Decimal128)
#undef DECLARE
void getString(const std::string & attribute_name,
void getString(
const std::string & attribute_name,
const Columns & key_columns,
const DataTypes & key_types,
const String & def,
......@@ -216,9 +182,12 @@ public:
BlockInputStreamPtr getBlockInputStream(const Names & column_names, size_t max_block_size) const override;
private:
template <typename Value> using MapType = HashMapWithSavedHash<StringRef, Value, StringRefHash>;
template <typename Value> using ContainerType = Value[];
template <typename Value> using ContainerPtrType = std::unique_ptr<ContainerType<Value>>;
template <typename Value>
using MapType = HashMapWithSavedHash<StringRef, Value, StringRefHash>;
template <typename Value>
using ContainerType = Value[];
template <typename Value>
using ContainerPtrType = std::unique_ptr<ContainerType<Value>>;
struct CellMetadata final
{
......@@ -235,32 +204,35 @@ private:
time_point_urep_t data;
/// Sets expiration time, resets `is_default` flag to false
time_point_t expiresAt() const
{
return ext::safe_bit_cast<time_point_t>(data & EXPIRES_AT_MASK);
}
void setExpiresAt(const time_point_t & t)
{
data = ext::safe_bit_cast<time_point_urep_t>(t);
}
time_point_t expiresAt() const { return ext::safe_bit_cast<time_point_t>(data & EXPIRES_AT_MASK); }
void setExpiresAt(const time_point_t & t) { data = ext::safe_bit_cast<time_point_urep_t>(t); }
bool isDefault() const
{
return (data & IS_DEFAULT_MASK) == IS_DEFAULT_MASK;
}
void setDefault()
{
data |= IS_DEFAULT_MASK;
}
bool isDefault() const { return (data & IS_DEFAULT_MASK) == IS_DEFAULT_MASK; }
void setDefault() { data |= IS_DEFAULT_MASK; }
};
struct Attribute final
{
AttributeUnderlyingType type;
std::variant<UInt8, UInt16, UInt32, UInt64, UInt128, Int8, Int16, Int32, Int64,
Decimal32, Decimal64, Decimal128,
Float32, Float64, String> null_values;
std::variant<ContainerPtrType<UInt8>,
std::variant<
UInt8,
UInt16,
UInt32,
UInt64,
UInt128,
Int8,
Int16,
Int32,
Int64,
Decimal32,
Decimal64,
Decimal128,
Float32,
Float64,
String>
null_values;
std::variant<
ContainerPtrType<UInt8>,
ContainerPtrType<UInt16>,
ContainerPtrType<UInt32>,
ContainerPtrType<UInt64>,
......@@ -283,13 +255,13 @@ private:
Attribute createAttributeWithType(const AttributeUnderlyingType type, const Field & null_value);
template <typename OutputType, typename DefaultGetter>
void getItemsNumber(
Attribute & attribute, const Columns & key_columns, PaddedPODArray<OutputType> & out, DefaultGetter && get_default) const
void
getItemsNumber(Attribute & attribute, const Columns & key_columns, PaddedPODArray<OutputType> & out, DefaultGetter && get_default) const
{
if (false)
{
}
#define DISPATCH(TYPE) \
#define DISPATCH(TYPE) \
else if (attribute.type == AttributeUnderlyingType::TYPE) \
getItemsNumberImpl<TYPE, OutputType>(attribute, key_columns, out, std::forward<DefaultGetter>(get_default));
DISPATCH(UInt8)
......@@ -372,7 +344,8 @@ private:
std::begin(outdated_keys), std::end(outdated_keys), std::begin(required_rows), [](auto & pair) { return pair.second.front(); });
/// request new values
update(key_columns,
update(
key_columns,
keys_array,
required_rows,
[&](const StringRef key, const size_t cell_idx)
......@@ -497,7 +470,8 @@ private:
return pair.second.front();
});
update(key_columns,
update(
key_columns,
keys_array,
required_rows,
[&](const StringRef key, const size_t cell_idx)
......@@ -531,7 +505,8 @@ private:
}
template <typename PresentKeyHandler, typename AbsentKeyHandler>
void update(const Columns & in_key_columns,
void update(
const Columns & in_key_columns,
const PODArray<StringRef> & in_keys,
const std::vector<size_t> & in_requested_rows,
PresentKeyHandler && on_cell_updated,
......@@ -561,8 +536,10 @@ private:
const auto key_columns = ext::map<Columns>(
ext::range(0, keys_size), [&](const size_t attribute_idx) { return block.safeGetByPosition(attribute_idx).column; });
const auto attribute_columns = ext::map<Columns>(ext::range(0, attributes_size),
[&](const size_t attribute_idx) { return block.safeGetByPosition(keys_size + attribute_idx).column; });
const auto attribute_columns = ext::map<Columns>(ext::range(0, attributes_size), [&](const size_t attribute_idx)
{
return block.safeGetByPosition(keys_size + attribute_idx).column;
});
const auto rows_num = block.rows();
......@@ -693,7 +670,8 @@ private:
void freeKey(const StringRef key) const;
template <typename Arena>
static StringRef placeKeysInPool(const size_t row,
static StringRef placeKeysInPool(
const size_t row,
const Columns & key_columns,
StringRefs & keys,
const std::vector<DictionaryAttribute> & key_attributes,
......
......@@ -2,19 +2,19 @@
namespace DB
{
ComplexKeyCacheDictionary::Attribute ComplexKeyCacheDictionary::createAttributeWithType(const AttributeUnderlyingType type, const Field & null_value)
ComplexKeyCacheDictionary::Attribute
ComplexKeyCacheDictionary::createAttributeWithType(const AttributeUnderlyingType type, const Field & null_value)
{
Attribute attr{type, {}, {}};
switch (type)
{
#define DISPATCH(TYPE) \
case AttributeUnderlyingType::TYPE: \
attr.null_values = TYPE(null_value.get<NearestFieldType<TYPE>>()); \
attr.arrays = std::make_unique<ContainerType<TYPE>>(size); \
bytes_allocated += size * sizeof(TYPE); \
break;
case AttributeUnderlyingType::TYPE: \
attr.null_values = TYPE(null_value.get<NearestFieldType<TYPE>>()); \
attr.arrays = std::make_unique<ContainerType<TYPE>>(size); \
bytes_allocated += size * sizeof(TYPE); \
break;
DISPATCH(UInt8)
DISPATCH(UInt16)
DISPATCH(UInt32)
......
......@@ -2,26 +2,53 @@
namespace DB
{
void ComplexKeyCacheDictionary::setAttributeValue(Attribute & attribute, const size_t idx, const Field & value) const
{
switch (attribute.type)
{
case AttributeUnderlyingType::UInt8: std::get<ContainerPtrType<UInt8>>(attribute.arrays)[idx] = value.get<UInt64>(); break;
case AttributeUnderlyingType::UInt16: std::get<ContainerPtrType<UInt16>>(attribute.arrays)[idx] = value.get<UInt64>(); break;
case AttributeUnderlyingType::UInt32: std::get<ContainerPtrType<UInt32>>(attribute.arrays)[idx] = value.get<UInt64>(); break;
case AttributeUnderlyingType::UInt64: std::get<ContainerPtrType<UInt64>>(attribute.arrays)[idx] = value.get<UInt64>(); break;
case AttributeUnderlyingType::UInt128: std::get<ContainerPtrType<UInt128>>(attribute.arrays)[idx] = value.get<UInt128>(); break;
case AttributeUnderlyingType::Int8: std::get<ContainerPtrType<Int8>>(attribute.arrays)[idx] = value.get<Int64>(); break;
case AttributeUnderlyingType::Int16: std::get<ContainerPtrType<Int16>>(attribute.arrays)[idx] = value.get<Int64>(); break;
case AttributeUnderlyingType::Int32: std::get<ContainerPtrType<Int32>>(attribute.arrays)[idx] = value.get<Int64>(); break;
case AttributeUnderlyingType::Int64: std::get<ContainerPtrType<Int64>>(attribute.arrays)[idx] = value.get<Int64>(); break;
case AttributeUnderlyingType::Float32: std::get<ContainerPtrType<Float32>>(attribute.arrays)[idx] = value.get<Float64>(); break;
case AttributeUnderlyingType::Float64: std::get<ContainerPtrType<Float64>>(attribute.arrays)[idx] = value.get<Float64>(); break;
case AttributeUnderlyingType::UInt8:
std::get<ContainerPtrType<UInt8>>(attribute.arrays)[idx] = value.get<UInt64>();
break;
case AttributeUnderlyingType::UInt16:
std::get<ContainerPtrType<UInt16>>(attribute.arrays)[idx] = value.get<UInt64>();
break;
case AttributeUnderlyingType::UInt32:
std::get<ContainerPtrType<UInt32>>(attribute.arrays)[idx] = value.get<UInt64>();
break;
case AttributeUnderlyingType::UInt64:
std::get<ContainerPtrType<UInt64>>(attribute.arrays)[idx] = value.get<UInt64>();
break;
case AttributeUnderlyingType::UInt128:
std::get<ContainerPtrType<UInt128>>(attribute.arrays)[idx] = value.get<UInt128>();
break;
case AttributeUnderlyingType::Int8:
std::get<ContainerPtrType<Int8>>(attribute.arrays)[idx] = value.get<Int64>();
break;
case AttributeUnderlyingType::Int16:
std::get<ContainerPtrType<Int16>>(attribute.arrays)[idx] = value.get<Int64>();
break;
case AttributeUnderlyingType::Int32:
std::get<ContainerPtrType<Int32>>(attribute.arrays)[idx] = value.get<Int64>();
break;
case AttributeUnderlyingType::Int64:
std::get<ContainerPtrType<Int64>>(attribute.arrays)[idx] = value.get<Int64>();
break;
case AttributeUnderlyingType::Float32:
std::get<ContainerPtrType<Float32>>(attribute.arrays)[idx] = value.get<Float64>();
break;
case AttributeUnderlyingType::Float64:
std::get<ContainerPtrType<Float64>>(attribute.arrays)[idx] = value.get<Float64>();
break;
case AttributeUnderlyingType::Decimal32: std::get<ContainerPtrType<Decimal32>>(attribute.arrays)[idx] = value.get<Decimal32>(); break;
case AttributeUnderlyingType::Decimal64: std::get<ContainerPtrType<Decimal64>>(attribute.arrays)[idx] = value.get<Decimal64>(); break;
case AttributeUnderlyingType::Decimal128: std::get<ContainerPtrType<Decimal128>>(attribute.arrays)[idx] = value.get<Decimal128>(); break;
case AttributeUnderlyingType::Decimal32:
std::get<ContainerPtrType<Decimal32>>(attribute.arrays)[idx] = value.get<Decimal32>();
break;
case AttributeUnderlyingType::Decimal64:
std::get<ContainerPtrType<Decimal64>>(attribute.arrays)[idx] = value.get<Decimal64>();
break;
case AttributeUnderlyingType::Decimal128:
std::get<ContainerPtrType<Decimal128>>(attribute.arrays)[idx] = value.get<Decimal128>();
break;
case AttributeUnderlyingType::String:
{
......
......@@ -2,22 +2,43 @@
namespace DB
{
void ComplexKeyCacheDictionary::setDefaultAttributeValue(Attribute & attribute, const size_t idx) const
{
switch (attribute.type)
{
case AttributeUnderlyingType::UInt8: std::get<ContainerPtrType<UInt8>>(attribute.arrays)[idx] = std::get<UInt8>(attribute.null_values); break;
case AttributeUnderlyingType::UInt16: std::get<ContainerPtrType<UInt16>>(attribute.arrays)[idx] = std::get<UInt16>(attribute.null_values); break;
case AttributeUnderlyingType::UInt32: std::get<ContainerPtrType<UInt32>>(attribute.arrays)[idx] = std::get<UInt32>(attribute.null_values); break;
case AttributeUnderlyingType::UInt64: std::get<ContainerPtrType<UInt64>>(attribute.arrays)[idx] = std::get<UInt64>(attribute.null_values); break;
case AttributeUnderlyingType::UInt128: std::get<ContainerPtrType<UInt128>>(attribute.arrays)[idx] = std::get<UInt128>(attribute.null_values); break;
case AttributeUnderlyingType::Int8: std::get<ContainerPtrType<Int8>>(attribute.arrays)[idx] = std::get<Int8>(attribute.null_values); break;
case AttributeUnderlyingType::Int16: std::get<ContainerPtrType<Int16>>(attribute.arrays)[idx] = std::get<Int16>(attribute.null_values); break;
case AttributeUnderlyingType::Int32: std::get<ContainerPtrType<Int32>>(attribute.arrays)[idx] = std::get<Int32>(attribute.null_values); break;
case AttributeUnderlyingType::Int64: std::get<ContainerPtrType<Int64>>(attribute.arrays)[idx] = std::get<Int64>(attribute.null_values); break;
case AttributeUnderlyingType::Float32: std::get<ContainerPtrType<Float32>>(attribute.arrays)[idx] = std::get<Float32>(attribute.null_values); break;
case AttributeUnderlyingType::Float64: std::get<ContainerPtrType<Float64>>(attribute.arrays)[idx] = std::get<Float64>(attribute.null_values); break;
case AttributeUnderlyingType::UInt8:
std::get<ContainerPtrType<UInt8>>(attribute.arrays)[idx] = std::get<UInt8>(attribute.null_values);
break;
case AttributeUnderlyingType::UInt16:
std::get<ContainerPtrType<UInt16>>(attribute.arrays)[idx] = std::get<UInt16>(attribute.null_values);
break;
case AttributeUnderlyingType::UInt32:
std::get<ContainerPtrType<UInt32>>(attribute.arrays)[idx] = std::get<UInt32>(attribute.null_values);
break;
case AttributeUnderlyingType::UInt64:
std::get<ContainerPtrType<UInt64>>(attribute.arrays)[idx] = std::get<UInt64>(attribute.null_values);
break;
case AttributeUnderlyingType::UInt128:
std::get<ContainerPtrType<UInt128>>(attribute.arrays)[idx] = std::get<UInt128>(attribute.null_values);
break;
case AttributeUnderlyingType::Int8:
std::get<ContainerPtrType<Int8>>(attribute.arrays)[idx] = std::get<Int8>(attribute.null_values);
break;
case AttributeUnderlyingType::Int16:
std::get<ContainerPtrType<Int16>>(attribute.arrays)[idx] = std::get<Int16>(attribute.null_values);
break;
case AttributeUnderlyingType::Int32:
std::get<ContainerPtrType<Int32>>(attribute.arrays)[idx] = std::get<Int32>(attribute.null_values);
break;
case AttributeUnderlyingType::Int64:
std::get<ContainerPtrType<Int64>>(attribute.arrays)[idx] = std::get<Int64>(attribute.null_values);
break;
case AttributeUnderlyingType::Float32:
std::get<ContainerPtrType<Float32>>(attribute.arrays)[idx] = std::get<Float32>(attribute.null_values);
break;
case AttributeUnderlyingType::Float64:
std::get<ContainerPtrType<Float64>>(attribute.arrays)[idx] = std::get<Float64>(attribute.null_values);
break;
case AttributeUnderlyingType::Decimal32:
std::get<ContainerPtrType<Decimal32>>(attribute.arrays)[idx] = std::get<Decimal32>(attribute.null_values);
......
#pragma once
#include "IDictionary.h"
#include "IDictionarySource.h"
#include "DictionaryStructure.h"
#include <common/StringRef.h>
#include <Common/HashTable/HashMap.h>
#include <atomic>
#include <memory>
#include <variant>
#include <Columns/ColumnDecimal.h>
#include <Columns/ColumnString.h>
#include <Common/Arena.h>
#include <Common/HashTable/HashMap.h>
#include <common/StringRef.h>
#include <ext/range.h>
#include <atomic>
#include <memory>
#include <variant>
#include "DictionaryStructure.h"
#include "IDictionary.h"
#include "IDictionarySource.h"
namespace DB
{
using BlockPtr = std::shared_ptr<Block>;
class ComplexKeyHashedDictionary final : public IDictionaryBase
{
public:
ComplexKeyHashedDictionary(
const std::string & name, const DictionaryStructure & dict_struct, DictionarySourcePtr source_ptr,
const DictionaryLifetime dict_lifetime, bool require_nonempty, BlockPtr saved_block = nullptr);
const std::string & name,
const DictionaryStructure & dict_struct,
DictionarySourcePtr source_ptr,
const DictionaryLifetime dict_lifetime,
bool require_nonempty,
BlockPtr saved_block = nullptr);
ComplexKeyHashedDictionary(const ComplexKeyHashedDictionary & other);
......@@ -56,10 +59,7 @@ public:
const DictionaryStructure & getStructure() const override { return dict_struct; }
std::chrono::time_point<std::chrono::system_clock> getCreationTime() const override
{
return creation_time;
}
std::chrono::time_point<std::chrono::system_clock> getCreationTime() const override { return creation_time; }
bool isInjective(const std::string & attribute_name) const override
{
......@@ -69,10 +69,9 @@ public:
template <typename T>
using ResultArrayType = std::conditional_t<IsDecimalNumber<T>, DecimalPaddedPODArray<T>, PaddedPODArray<T>>;
#define DECLARE(TYPE)\
void get##TYPE(\
const std::string & attribute_name, const Columns & key_columns, const DataTypes & key_types,\
ResultArrayType<TYPE> & out) const;
#define DECLARE(TYPE) \
void get##TYPE( \
const std::string & attribute_name, const Columns & key_columns, const DataTypes & key_types, ResultArrayType<TYPE> & out) const;
DECLARE(UInt8)
DECLARE(UInt16)
DECLARE(UInt32)
......@@ -89,14 +88,15 @@ public:
DECLARE(Decimal128)
#undef DECLARE
void getString(
const std::string & attribute_name, const Columns & key_columns, const DataTypes & key_types,
ColumnString * out) const;
void getString(const std::string & attribute_name, const Columns & key_columns, const DataTypes & key_types, ColumnString * out) const;
#define DECLARE(TYPE)\
void get##TYPE(\
const std::string & attribute_name, const Columns & key_columns, const DataTypes & key_types,\
const PaddedPODArray<TYPE> & def, ResultArrayType<TYPE> & out) const;
#define DECLARE(TYPE) \
void get##TYPE( \
const std::string & attribute_name, \
const Columns & key_columns, \
const DataTypes & key_types, \
const PaddedPODArray<TYPE> & def, \
ResultArrayType<TYPE> & out) const;
DECLARE(UInt8)
DECLARE(UInt16)
DECLARE(UInt32)
......@@ -114,13 +114,19 @@ public:
#undef DECLARE
void getString(
const std::string & attribute_name, const Columns & key_columns, const DataTypes & key_types,
const ColumnString * const def, ColumnString * const out) const;
#define DECLARE(TYPE)\
void get##TYPE(\
const std::string & attribute_name, const Columns & key_columns, const DataTypes & key_types,\
const TYPE def, ResultArrayType<TYPE> & out) const;
const std::string & attribute_name,
const Columns & key_columns,
const DataTypes & key_types,
const ColumnString * const def,
ColumnString * const out) const;
#define DECLARE(TYPE) \
void get##TYPE( \
const std::string & attribute_name, \
const Columns & key_columns, \
const DataTypes & key_types, \
const TYPE def, \
ResultArrayType<TYPE> & out) const;
DECLARE(UInt8)
DECLARE(UInt16)
DECLARE(UInt32)
......@@ -138,33 +144,57 @@ public:
#undef DECLARE
void getString(
const std::string & attribute_name, const Columns & key_columns, const DataTypes & key_types,
const String & def, ColumnString * const out) const;
const std::string & attribute_name,
const Columns & key_columns,
const DataTypes & key_types,
const String & def,
ColumnString * const out) const;
void has(const Columns & key_columns, const DataTypes & key_types, PaddedPODArray<UInt8> & out) const;
BlockInputStreamPtr getBlockInputStream(const Names & column_names, size_t max_block_size) const override;
private:
template <typename Value> using ContainerType = HashMapWithSavedHash<StringRef, Value, StringRefHash>;
template <typename Value>
using ContainerType = HashMapWithSavedHash<StringRef, Value, StringRefHash>;
struct Attribute final
{
AttributeUnderlyingType type;
std::variant<
UInt8, UInt16, UInt32, UInt64,
UInt8,
UInt16,
UInt32,
UInt64,
UInt128,
Int8, Int16, Int32, Int64,
Decimal32, Decimal64, Decimal128,
Float32, Float64,
String> null_values;
Int8,
Int16,
Int32,
Int64,
Decimal32,
Decimal64,
Decimal128,
Float32,
Float64,
String>
null_values;
std::variant<
ContainerType<UInt8>, ContainerType<UInt16>, ContainerType<UInt32>, ContainerType<UInt64>,
ContainerType<UInt8>,
ContainerType<UInt16>,
ContainerType<UInt32>,
ContainerType<UInt64>,
ContainerType<UInt128>,
ContainerType<Int8>, ContainerType<Int16>, ContainerType<Int32>, ContainerType<Int64>,
ContainerType<Decimal32>, ContainerType<Decimal64>, ContainerType<Decimal128>,
ContainerType<Float32>, ContainerType<Float64>,
ContainerType<StringRef>> maps;
ContainerType<Int8>,
ContainerType<Int16>,
ContainerType<Int32>,
ContainerType<Int64>,
ContainerType<Decimal32>,
ContainerType<Decimal64>,
ContainerType<Decimal128>,
ContainerType<Float32>,
ContainerType<Float64>,
ContainerType<StringRef>>
maps;
std::unique_ptr<Arena> string_arena;
};
......@@ -188,18 +218,12 @@ private:
template <typename OutputType, typename ValueSetter, typename DefaultGetter>
void getItemsNumber(
const Attribute & attribute,
const Columns & key_columns,
ValueSetter && set_value,
DefaultGetter && get_default) const;
void
getItemsNumber(const Attribute & attribute, const Columns & key_columns, ValueSetter && set_value, DefaultGetter && get_default) const;
template <typename AttributeType, typename OutputType, typename ValueSetter, typename DefaultGetter>
void getItemsImpl(
const Attribute & attribute,
const Columns & key_columns,
ValueSetter && set_value,
DefaultGetter && get_default) const;
void
getItemsImpl(const Attribute & attribute, const Columns & key_columns, ValueSetter && set_value, DefaultGetter && get_default) const;
template <typename T>
......@@ -209,8 +233,7 @@ private:
const Attribute & getAttribute(const std::string & attribute_name) const;
static StringRef placeKeysInPool(
const size_t row, const Columns & key_columns, StringRefs & keys, Arena & pool);
static StringRef placeKeysInPool(const size_t row, const Columns & key_columns, StringRefs & keys, Arena & pool);
template <typename T>
void has(const Attribute & attribute, const Columns & key_columns, PaddedPODArray<UInt8> & out) const;
......
......@@ -2,7 +2,6 @@
namespace DB
{
DictionaryBlockInputStreamBase::DictionaryBlockInputStreamBase(size_t rows_count, size_t max_block_size)
: rows_count(rows_count), max_block_size(max_block_size)
{
......
......@@ -4,7 +4,6 @@
namespace DB
{
class DictionaryBlockInputStreamBase : public IProfilingBlockInputStream
{
protected:
......
......@@ -27,10 +27,10 @@ DictionaryPtr DictionaryFactory::create(
const auto & layout_prefix = config_prefix + ".layout";
config.keys(layout_prefix, keys);
if (keys.size() != 1)
throw Exception {name + ": element dictionary.layout should have exactly one child element",
ErrorCodes::EXCESSIVE_ELEMENT_IN_CONFIG};
throw Exception{name + ": element dictionary.layout should have exactly one child element",
ErrorCodes::EXCESSIVE_ELEMENT_IN_CONFIG};
const DictionaryStructure dict_struct {config, config_prefix + ".structure"};
const DictionaryStructure dict_struct{config, config_prefix + ".structure"};
auto source_ptr = DictionarySourceFactory::instance().create(name, config, config_prefix + ".source", dict_struct, context);
......@@ -45,7 +45,7 @@ DictionaryPtr DictionaryFactory::create(
}
}
throw Exception {name + ": unknown dictionary layout type: " + layout_type, ErrorCodes::UNKNOWN_ELEMENT_IN_CONFIG};
throw Exception{name + ": unknown dictionary layout type: " + layout_type, ErrorCodes::UNKNOWN_ELEMENT_IN_CONFIG};
}
}
......@@ -26,7 +26,7 @@ namespace
Block block;
if (dict_struct.id)
block.insert(ColumnWithTypeAndName {ColumnUInt64::create(1, 0), std::make_shared<DataTypeUInt64>(), dict_struct.id->name});
block.insert(ColumnWithTypeAndName{ColumnUInt64::create(1, 0), std::make_shared<DataTypeUInt64>(), dict_struct.id->name});
if (dict_struct.key)
{
......@@ -35,7 +35,7 @@ namespace
auto column = attribute.type->createColumn();
column->insertDefault();
block.insert(ColumnWithTypeAndName {std::move(column), attribute.type, attribute.name});
block.insert(ColumnWithTypeAndName{std::move(column), attribute.type, attribute.name});
}
}
......@@ -47,7 +47,7 @@ namespace
auto column = type->createColumn();
column->insertDefault();
block.insert(ColumnWithTypeAndName {std::move(column), type, attribute->name});
block.insert(ColumnWithTypeAndName{std::move(column), type, attribute->name});
}
}
......@@ -56,7 +56,7 @@ namespace
auto column = attribute.type->createColumn();
column->insert(attribute.null_value);
block.insert(ColumnWithTypeAndName {std::move(column), attribute.type, attribute.name});
block.insert(ColumnWithTypeAndName{std::move(column), attribute.type, attribute.name});
}
return block;
......@@ -86,8 +86,8 @@ DictionarySourcePtr DictionarySourceFactory::create(
Poco::Util::AbstractConfiguration::Keys keys;
config.keys(config_prefix, keys);
if (keys.size() != 1)
throw Exception {name + ": element dictionary.source should have exactly one child element",
ErrorCodes::EXCESSIVE_ELEMENT_IN_CONFIG};
throw Exception{name + ": element dictionary.source should have exactly one child element",
ErrorCodes::EXCESSIVE_ELEMENT_IN_CONFIG};
auto sample_block = createSampleBlock(dict_struct);
......@@ -102,7 +102,7 @@ DictionarySourcePtr DictionarySourceFactory::create(
}
}
throw Exception {name + ": unknown dictionary source type: " + source_type, ErrorCodes::UNKNOWN_ELEMENT_IN_CONFIG};
throw Exception{name + ": unknown dictionary source type: " + source_type, ErrorCodes::UNKNOWN_ELEMENT_IN_CONFIG};
}
}
#include "DictionarySourceHelpers.h"
#include "DictionaryStructure.h"
#include <Core/ColumnWithTypeAndName.h>
#include <Core/Block.h>
#include <Columns/ColumnsNumber.h>
#include <DataTypes/DataTypesNumber.h>
#include <Core/Block.h>
#include <Core/ColumnWithTypeAndName.h>
#include <DataStreams/IBlockOutputStream.h>
#include <DataTypes/DataTypesNumber.h>
#include <IO/WriteHelpers.h>
#include "DictionaryStructure.h"
namespace DB
{
/// For simple key
void formatIDs(BlockOutputStreamPtr & out, const std::vector<UInt64> & ids)
{
auto column = ColumnUInt64::create(ids.size());
memcpy(column->getData().data(), ids.data(), ids.size() * sizeof(ids.front()));
Block block{{ std::move(column), std::make_shared<DataTypeUInt64>(), "id" }};
Block block{{std::move(column), std::make_shared<DataTypeUInt64>(), "id"}};
out->writePrefix();
out->write(block);
......@@ -26,8 +25,11 @@ void formatIDs(BlockOutputStreamPtr & out, const std::vector<UInt64> & ids)
}
/// For composite key
void formatKeys(const DictionaryStructure & dict_struct, BlockOutputStreamPtr & out,
const Columns & key_columns, const std::vector<size_t> & requested_rows)
void formatKeys(
const DictionaryStructure & dict_struct,
BlockOutputStreamPtr & out,
const Columns & key_columns,
const std::vector<size_t> & requested_rows)
{
Block block;
for (size_t i = 0, size = key_columns.size(); i < size; ++i)
......@@ -39,7 +41,7 @@ void formatKeys(const DictionaryStructure & dict_struct, BlockOutputStreamPtr &
for (size_t idx : requested_rows)
filtered_column->insertFrom(*source_column, idx);
block.insert({ std::move(filtered_column), (*dict_struct.key)[i].type, toString(i) });
block.insert({std::move(filtered_column), (*dict_struct.key)[i].type, toString(i)});
}
out->writePrefix();
......
#pragma once
#include <vector>
#include <common/Types.h>
#include <Columns/IColumn.h>
#include <common/Types.h>
namespace DB
{
class IBlockOutputStream;
using BlockOutputStreamPtr = std::shared_ptr<IBlockOutputStream>;
......@@ -19,7 +18,10 @@ struct DictionaryStructure;
void formatIDs(BlockOutputStreamPtr & out, const std::vector<UInt64> & ids);
/// For composite key
void formatKeys(const DictionaryStructure & dict_struct, BlockOutputStreamPtr & out,
const Columns & key_columns, const std::vector<size_t> & requested_rows);
void formatKeys(
const DictionaryStructure & dict_struct,
BlockOutputStreamPtr & out,
const Columns & key_columns,
const std::vector<size_t> & requested_rows);
}
#include "DictionaryStructure.h"
#include <Formats/FormatSettings.h>
#include <Columns/IColumn.h>
#include <DataTypes/DataTypeFactory.h>
#include <DataTypes/DataTypeNullable.h>
#include <Columns/IColumn.h>
#include <Common/StringUtils/StringUtils.h>
#include <Formats/FormatSettings.h>
#include <IO/WriteHelpers.h>
#include <Common/StringUtils/StringUtils.h>
#include <ext/range.h>
#include <numeric>
#include <unordered_set>
#include <unordered_map>
#include <unordered_set>
#include <ext/range.h>
namespace DB
{
namespace ErrorCodes
{
extern const int UNKNOWN_TYPE;
......@@ -25,20 +24,18 @@ namespace ErrorCodes
namespace
{
DictionaryTypedSpecialAttribute makeDictionaryTypedSpecialAttribute(
const Poco::Util::AbstractConfiguration & config,
const std::string & config_prefix,
const std::string& default_type)
{
const auto name = config.getString(config_prefix + ".name", "");
const auto expression = config.getString(config_prefix + ".expression", "");
DictionaryTypedSpecialAttribute makeDictionaryTypedSpecialAttribute(
const Poco::Util::AbstractConfiguration & config, const std::string & config_prefix, const std::string & default_type)
{
const auto name = config.getString(config_prefix + ".name", "");
const auto expression = config.getString(config_prefix + ".expression", "");
if (name.empty() && !expression.empty())
throw Exception{"Element " + config_prefix + ".name is empty", ErrorCodes::BAD_ARGUMENTS};
if (name.empty() && !expression.empty())
throw Exception{"Element " + config_prefix + ".name is empty", ErrorCodes::BAD_ARGUMENTS};
const auto type_name = config.getString(config_prefix + ".type", default_type);
return DictionaryTypedSpecialAttribute{std::move(name), std::move(expression), DataTypeFactory::instance().get(type_name)};
}
const auto type_name = config.getString(config_prefix + ".type", default_type);
return DictionaryTypedSpecialAttribute{std::move(name), std::move(expression), DataTypeFactory::instance().get(type_name)};
}
} // namespace
......@@ -52,27 +49,27 @@ bool isAttributeTypeConvertibleTo(AttributeUnderlyingType from, AttributeUnderly
* (for example, because integers can not be converted to floats)
* This is normal for a limited usage scope.
*/
if ( (from == AttributeUnderlyingType::UInt8 && to == AttributeUnderlyingType::UInt16)
|| (from == AttributeUnderlyingType::UInt8 && to == AttributeUnderlyingType::UInt32)
|| (from == AttributeUnderlyingType::UInt8 && to == AttributeUnderlyingType::UInt64)
|| (from == AttributeUnderlyingType::UInt16 && to == AttributeUnderlyingType::UInt32)
|| (from == AttributeUnderlyingType::UInt16 && to == AttributeUnderlyingType::UInt64)
|| (from == AttributeUnderlyingType::UInt32 && to == AttributeUnderlyingType::UInt64)
|| (from == AttributeUnderlyingType::UInt8 && to == AttributeUnderlyingType::Int16)
|| (from == AttributeUnderlyingType::UInt8 && to == AttributeUnderlyingType::Int32)
|| (from == AttributeUnderlyingType::UInt8 && to == AttributeUnderlyingType::Int64)
|| (from == AttributeUnderlyingType::UInt16 && to == AttributeUnderlyingType::Int32)
|| (from == AttributeUnderlyingType::UInt16 && to == AttributeUnderlyingType::Int64)
|| (from == AttributeUnderlyingType::UInt32 && to == AttributeUnderlyingType::Int64)
|| (from == AttributeUnderlyingType::Int8 && to == AttributeUnderlyingType::Int16)
|| (from == AttributeUnderlyingType::Int8 && to == AttributeUnderlyingType::Int32)
|| (from == AttributeUnderlyingType::Int8 && to == AttributeUnderlyingType::Int64)
|| (from == AttributeUnderlyingType::Int16 && to == AttributeUnderlyingType::Int32)
|| (from == AttributeUnderlyingType::Int16 && to == AttributeUnderlyingType::Int64)
|| (from == AttributeUnderlyingType::Int32 && to == AttributeUnderlyingType::Int64)
|| (from == AttributeUnderlyingType::Float32 && to == AttributeUnderlyingType::Float64))
if ((from == AttributeUnderlyingType::UInt8 && to == AttributeUnderlyingType::UInt16)
|| (from == AttributeUnderlyingType::UInt8 && to == AttributeUnderlyingType::UInt32)
|| (from == AttributeUnderlyingType::UInt8 && to == AttributeUnderlyingType::UInt64)
|| (from == AttributeUnderlyingType::UInt16 && to == AttributeUnderlyingType::UInt32)
|| (from == AttributeUnderlyingType::UInt16 && to == AttributeUnderlyingType::UInt64)
|| (from == AttributeUnderlyingType::UInt32 && to == AttributeUnderlyingType::UInt64)
|| (from == AttributeUnderlyingType::UInt8 && to == AttributeUnderlyingType::Int16)
|| (from == AttributeUnderlyingType::UInt8 && to == AttributeUnderlyingType::Int32)
|| (from == AttributeUnderlyingType::UInt8 && to == AttributeUnderlyingType::Int64)
|| (from == AttributeUnderlyingType::UInt16 && to == AttributeUnderlyingType::Int32)
|| (from == AttributeUnderlyingType::UInt16 && to == AttributeUnderlyingType::Int64)
|| (from == AttributeUnderlyingType::UInt32 && to == AttributeUnderlyingType::Int64)
|| (from == AttributeUnderlyingType::Int8 && to == AttributeUnderlyingType::Int16)
|| (from == AttributeUnderlyingType::Int8 && to == AttributeUnderlyingType::Int32)
|| (from == AttributeUnderlyingType::Int8 && to == AttributeUnderlyingType::Int64)
|| (from == AttributeUnderlyingType::Int16 && to == AttributeUnderlyingType::Int32)
|| (from == AttributeUnderlyingType::Int16 && to == AttributeUnderlyingType::Int64)
|| (from == AttributeUnderlyingType::Int32 && to == AttributeUnderlyingType::Int64)
|| (from == AttributeUnderlyingType::Float32 && to == AttributeUnderlyingType::Float64))
{
return true;
}
......@@ -84,20 +81,20 @@ bool isAttributeTypeConvertibleTo(AttributeUnderlyingType from, AttributeUnderly
AttributeUnderlyingType getAttributeUnderlyingType(const std::string & type)
{
static const std::unordered_map<std::string, AttributeUnderlyingType> dictionary{
{ "UInt8", AttributeUnderlyingType::UInt8 },
{ "UInt16", AttributeUnderlyingType::UInt16 },
{ "UInt32", AttributeUnderlyingType::UInt32 },
{ "UInt64", AttributeUnderlyingType::UInt64 },
{ "UUID", AttributeUnderlyingType::UInt128 },
{ "Int8", AttributeUnderlyingType::Int8 },
{ "Int16", AttributeUnderlyingType::Int16 },
{ "Int32", AttributeUnderlyingType::Int32 },
{ "Int64", AttributeUnderlyingType::Int64 },
{ "Float32", AttributeUnderlyingType::Float32 },
{ "Float64", AttributeUnderlyingType::Float64 },
{ "String", AttributeUnderlyingType::String },
{ "Date", AttributeUnderlyingType::UInt16 },
{ "DateTime", AttributeUnderlyingType::UInt32 },
{"UInt8", AttributeUnderlyingType::UInt8},
{"UInt16", AttributeUnderlyingType::UInt16},
{"UInt32", AttributeUnderlyingType::UInt32},
{"UInt64", AttributeUnderlyingType::UInt64},
{"UUID", AttributeUnderlyingType::UInt128},
{"Int8", AttributeUnderlyingType::Int8},
{"Int16", AttributeUnderlyingType::Int16},
{"Int32", AttributeUnderlyingType::Int32},
{"Int64", AttributeUnderlyingType::Int64},
{"Float32", AttributeUnderlyingType::Float32},
{"Float64", AttributeUnderlyingType::Float64},
{"String", AttributeUnderlyingType::String},
{"Date", AttributeUnderlyingType::UInt16},
{"DateTime", AttributeUnderlyingType::UInt32},
};
const auto it = dictionary.find(type);
......@@ -123,21 +120,36 @@ std::string toString(const AttributeUnderlyingType type)
{
switch (type)
{
case AttributeUnderlyingType::UInt8: return "UInt8";
case AttributeUnderlyingType::UInt16: return "UInt16";
case AttributeUnderlyingType::UInt32: return "UInt32";
case AttributeUnderlyingType::UInt64: return "UInt64";
case AttributeUnderlyingType::UInt128: return "UUID";
case AttributeUnderlyingType::Int8: return "Int8";
case AttributeUnderlyingType::Int16: return "Int16";
case AttributeUnderlyingType::Int32: return "Int32";
case AttributeUnderlyingType::Int64: return "Int64";
case AttributeUnderlyingType::Float32: return "Float32";
case AttributeUnderlyingType::Float64: return "Float64";
case AttributeUnderlyingType::Decimal32: return "Decimal32";
case AttributeUnderlyingType::Decimal64: return "Decimal64";
case AttributeUnderlyingType::Decimal128: return "Decimal128";
case AttributeUnderlyingType::String: return "String";
case AttributeUnderlyingType::UInt8:
return "UInt8";
case AttributeUnderlyingType::UInt16:
return "UInt16";
case AttributeUnderlyingType::UInt32:
return "UInt32";
case AttributeUnderlyingType::UInt64:
return "UInt64";
case AttributeUnderlyingType::UInt128:
return "UUID";
case AttributeUnderlyingType::Int8:
return "Int8";
case AttributeUnderlyingType::Int16:
return "Int16";
case AttributeUnderlyingType::Int32:
return "Int32";
case AttributeUnderlyingType::Int64:
return "Int64";
case AttributeUnderlyingType::Float32:
return "Float32";
case AttributeUnderlyingType::Float64:
return "Float64";
case AttributeUnderlyingType::Decimal32:
return "Decimal32";
case AttributeUnderlyingType::Decimal64:
return "Decimal64";
case AttributeUnderlyingType::Decimal128:
return "Decimal128";
case AttributeUnderlyingType::String:
return "String";
}
throw Exception{"Unknown attribute_type " + toString(static_cast<int>(type)), ErrorCodes::ARGUMENT_OUT_OF_BOUND};
......@@ -145,8 +157,7 @@ std::string toString(const AttributeUnderlyingType type)
DictionarySpecialAttribute::DictionarySpecialAttribute(const Poco::Util::AbstractConfiguration & config, const std::string & config_prefix)
: name{config.getString(config_prefix + ".name", "")},
expression{config.getString(config_prefix + ".expression", "")}
: name{config.getString(config_prefix + ".name", "")}, expression{config.getString(config_prefix + ".expression", "")}
{
if (name.empty() && !expression.empty())
throw Exception{"Element " + config_prefix + ".name is empty", ErrorCodes::BAD_ARGUMENTS};
......@@ -186,28 +197,31 @@ DictionaryStructure::DictionaryStructure(const Poco::Util::AbstractConfiguration
if (range_min.has_value() != range_max.has_value())
{
throw Exception{"Dictionary structure should have both 'range_min' and 'range_max' either specified or not.", ErrorCodes::BAD_ARGUMENTS};
throw Exception{"Dictionary structure should have both 'range_min' and 'range_max' either specified or not.",
ErrorCodes::BAD_ARGUMENTS};
}
if (range_min && range_max && !range_min->type->equals(*range_max->type))
{
throw Exception{"Dictionary structure 'range_min' and 'range_max' should have same type, "
"'range_min' type: " + range_min->type->getName() + ", "
"'range_max' type: " + range_max->type->getName(),
ErrorCodes::BAD_ARGUMENTS};
"'range_min' type: "
+ range_min->type->getName()
+ ", "
"'range_max' type: "
+ range_max->type->getName(),
ErrorCodes::BAD_ARGUMENTS};
}
if (range_min)
{
if (!range_min->type->isValueRepresentedByInteger())
throw Exception{"Dictionary structure type of 'range_min' and 'range_max' should be an integer, Date, DateTime, or Enum."
" Actual 'range_min' and 'range_max' type is " + range_min->type->getName(),
ErrorCodes::BAD_ARGUMENTS};
" Actual 'range_min' and 'range_max' type is "
+ range_min->type->getName(),
ErrorCodes::BAD_ARGUMENTS};
}
if (!id->expression.empty() ||
(range_min && !range_min->expression.empty()) ||
(range_max && !range_max->expression.empty()))
if (!id->expression.empty() || (range_min && !range_min->expression.empty()) || (range_max && !range_max->expression.empty()))
has_expressions = true;
}
......@@ -228,8 +242,9 @@ void DictionaryStructure::validateKeyTypes(const DataTypes & key_types) const
const auto & actual_type = key_types[i]->getName();
if (expected_type != actual_type)
throw Exception{"Key type at position " + std::to_string(i) + " does not match, expected " + expected_type +
", found " + actual_type, ErrorCodes::TYPE_MISMATCH};
throw Exception{"Key type at position " + std::to_string(i) + " does not match, expected " + expected_type + ", found "
+ actual_type,
ErrorCodes::TYPE_MISMATCH};
}
}
......@@ -274,15 +289,17 @@ bool DictionaryStructure::isKeySizeFixed() const
size_t DictionaryStructure::getKeySize() const
{
return std::accumulate(std::begin(*key), std::end(*key), size_t{},
[] (const auto running_size, const auto & key_i) {return running_size + key_i.type->getSizeOfValueInMemory(); });
return std::accumulate(std::begin(*key), std::end(*key), size_t{}, [](const auto running_size, const auto & key_i)
{
return running_size + key_i.type->getSizeOfValueInMemory();
});
}
static void checkAttributeKeys(const Poco::Util::AbstractConfiguration::Keys & keys)
{
static const std::unordered_set<std::string> valid_keys =
{ "name", "type", "expression", "null_value", "hierarchical", "injective", "is_object_id" };
static const std::unordered_set<std::string> valid_keys
= {"name", "type", "expression", "null_value", "hierarchical", "injective", "is_object_id"};
for (const auto & key : keys)
{
......@@ -293,8 +310,10 @@ static void checkAttributeKeys(const Poco::Util::AbstractConfiguration::Keys & k
std::vector<DictionaryAttribute> DictionaryStructure::getAttributes(
const Poco::Util::AbstractConfiguration & config, const std::string & config_prefix,
const bool hierarchy_allowed, const bool allow_null_values)
const Poco::Util::AbstractConfiguration & config,
const std::string & config_prefix,
const bool hierarchy_allowed,
const bool allow_null_values)
{
Poco::Util::AbstractConfiguration::Keys config_elems;
config.keys(config_prefix, config_elems);
......@@ -361,9 +380,8 @@ std::vector<DictionaryAttribute> DictionaryStructure::getAttributes(
has_hierarchy = has_hierarchy || hierarchical;
res_attributes.emplace_back(DictionaryAttribute{
name, underlying_type, type, expression, null_value, hierarchical, injective, is_object_id
});
res_attributes.emplace_back(
DictionaryAttribute{name, underlying_type, type, expression, null_value, hierarchical, injective, is_object_id});
}
return res_attributes;
......
......@@ -5,15 +5,14 @@
#include <Interpreters/IExternalLoadable.h>
#include <Poco/Util/AbstractConfiguration.h>
#include <vector>
#include <string>
#include <map>
#include <optional>
#include <string>
#include <vector>
namespace DB
{
enum class AttributeUnderlyingType
{
UInt8,
......@@ -104,8 +103,10 @@ struct DictionaryStructure final
private:
std::vector<DictionaryAttribute> getAttributes(
const Poco::Util::AbstractConfiguration & config, const std::string & config_prefix,
const bool hierarchy_allowed = true, const bool allow_null_values = true);
const Poco::Util::AbstractConfiguration & config,
const std::string & config_prefix,
const bool hierarchy_allowed = true,
const bool allow_null_values = true);
};
}
......@@ -4,8 +4,7 @@
#include "GeodataProviders/HierarchiesProvider.h"
#include "GeodataProviders/NamesProvider.h"
std::unique_ptr<RegionsHierarchies> GeoDictionariesLoader::reloadRegionsHierarchies(
const Poco::Util::AbstractConfiguration & config)
std::unique_ptr<RegionsHierarchies> GeoDictionariesLoader::reloadRegionsHierarchies(const Poco::Util::AbstractConfiguration & config)
{
static constexpr auto config_key = "path_to_regions_hierarchy_file";
......@@ -17,8 +16,7 @@ std::unique_ptr<RegionsHierarchies> GeoDictionariesLoader::reloadRegionsHierarch
return std::make_unique<RegionsHierarchies>(std::move(data_provider));
}
std::unique_ptr<RegionsNames> GeoDictionariesLoader::reloadRegionsNames(
const Poco::Util::AbstractConfiguration & config)
std::unique_ptr<RegionsNames> GeoDictionariesLoader::reloadRegionsNames(const Poco::Util::AbstractConfiguration & config)
{
static constexpr auto config_key = "path_to_regions_names_files";
......
......@@ -7,9 +7,7 @@
class GeoDictionariesLoader : public IGeoDictionariesLoader
{
public:
std::unique_ptr<RegionsHierarchies> reloadRegionsHierarchies(
const Poco::Util::AbstractConfiguration & config) override;
std::unique_ptr<RegionsHierarchies> reloadRegionsHierarchies(const Poco::Util::AbstractConfiguration & config) override;
std::unique_ptr<RegionsNames> reloadRegionsNames(
const Poco::Util::AbstractConfiguration & config) override;
std::unique_ptr<RegionsNames> reloadRegionsNames(const Poco::Util::AbstractConfiguration & config) override;
};
#pragma once
#include "Types.h"
#include <string>
#include "Types.h"
struct RegionEntry
{
......@@ -17,4 +17,3 @@ struct RegionNameEntry
RegionID id;
std::string name;
};
#include "HierarchiesProvider.h"
#include "HierarchyFormatReader.h"
#include <IO/ReadBufferFromFile.h>
#include <Poco/Util/Application.h>
#include <Poco/Exception.h>
#include <Poco/DirectoryIterator.h>
#include <Poco/Exception.h>
#include <Poco/Util/Application.h>
#include "HierarchyFormatReader.h"
bool RegionsHierarchyDataSource::isModified() const
......@@ -20,8 +20,7 @@ IRegionsHierarchyReaderPtr RegionsHierarchyDataSource::createReader()
}
RegionsHierarchiesDataProvider::RegionsHierarchiesDataProvider(const std::string & path)
: path(path)
RegionsHierarchiesDataProvider::RegionsHierarchiesDataProvider(const std::string & path) : path(path)
{
discoverFilesWithCustomHierarchies();
}
......@@ -37,9 +36,8 @@ void RegionsHierarchiesDataProvider::discoverFilesWithCustomHierarchies()
{
std::string candidate_basename = dir_it.path().getBaseName();
if ((0 == candidate_basename.compare(0, basename.size(), basename)) &&
(candidate_basename.size() > basename.size() + 1) &&
(candidate_basename[basename.size()] == '_'))
if ((0 == candidate_basename.compare(0, basename.size(), basename)) && (candidate_basename.size() > basename.size() + 1)
&& (candidate_basename[basename.size()] == '_'))
{
const std::string suffix = candidate_basename.substr(basename.size() + 1);
hierarchy_files.emplace(suffix, dir_it->path());
......
......@@ -2,23 +2,19 @@
#include "IHierarchiesProvider.h"
#include <Common/FileUpdatesTracker.h>
#include <unordered_map>
#include <Common/FileUpdatesTracker.h>
// Represents local file with regions hierarchy dump
class RegionsHierarchyDataSource
: public IRegionsHierarchyDataSource
class RegionsHierarchyDataSource : public IRegionsHierarchyDataSource
{
private:
std::string path;
FileUpdatesTracker updates_tracker;
public:
RegionsHierarchyDataSource(const std::string & path_)
: path(path_)
, updates_tracker(path_)
{}
RegionsHierarchyDataSource(const std::string & path_) : path(path_), updates_tracker(path_) {}
bool isModified() const override;
......@@ -27,8 +23,7 @@ public:
// Provides access to directory with multiple data source files: one file per regions hierarchy
class RegionsHierarchiesDataProvider
: public IRegionsHierarchiesDataProvider
class RegionsHierarchiesDataProvider : public IRegionsHierarchiesDataProvider
{
private:
// path to file with default regions hierarchy
......@@ -55,4 +50,3 @@ public:
private:
void discoverFilesWithCustomHierarchies();
};
......@@ -30,9 +30,8 @@ bool RegionsHierarchyFormatReader::readNext(RegionEntry & entry)
++input->position();
UInt64 population_big = 0;
DB::readIntText(population_big, *input);
population = population_big > std::numeric_limits<RegionPopulation>::max()
? std::numeric_limits<RegionPopulation>::max()
: population_big;
population = population_big > std::numeric_limits<RegionPopulation>::max() ? std::numeric_limits<RegionPopulation>::max()
: population_big;
}
DB::assertChar('\n', *input);
......
#pragma once
#include "IHierarchiesProvider.h"
#include <IO/ReadBuffer.h>
#include "IHierarchiesProvider.h"
// Reads regions hierarchy in geoexport format
......@@ -11,10 +11,7 @@ private:
DB::ReadBufferPtr input;
public:
RegionsHierarchyFormatReader(DB::ReadBufferPtr input_)
: input(std::move(input_))
{}
RegionsHierarchyFormatReader(DB::ReadBufferPtr input_) : input(std::move(input_)) {}
bool readNext(RegionEntry & entry) override;
};
#pragma once
#include "Entries.h"
#include <memory>
#include <string>
#include <vector>
#include "Entries.h"
// Iterates over all regions in data source
......@@ -46,4 +46,3 @@ public:
};
using IRegionsHierarchiesDataProviderPtr = std::shared_ptr<IRegionsHierarchiesDataProvider>;
#pragma once
#include "Entries.h"
#include <memory>
#include "Entries.h"
// Iterates over all name entries in data source
......@@ -42,11 +42,9 @@ using ILanguageRegionsNamesDataSourcePtr = std::unique_ptr<ILanguageRegionsNames
class IRegionsNamesDataProvider
{
public:
virtual ILanguageRegionsNamesDataSourcePtr getLanguageRegionsNamesSource(
const std::string & language) const = 0;
virtual ILanguageRegionsNamesDataSourcePtr getLanguageRegionsNamesSource(const std::string & language) const = 0;
virtual ~IRegionsNamesDataProvider() {}
};
using IRegionsNamesDataProviderPtr = std::unique_ptr<IRegionsNamesDataProvider>;
#pragma once
#include "INamesProvider.h"
#include <IO/ReadBuffer.h>
#include "INamesProvider.h"
// Reads regions names list in geoexport format
......@@ -11,9 +11,7 @@ private:
DB::ReadBufferPtr input;
public:
LanguageRegionsNamesFormatReader(DB::ReadBufferPtr input_)
: input(std::move(input_))
{}
LanguageRegionsNamesFormatReader(DB::ReadBufferPtr input_) : input(std::move(input_)) {}
bool readNext(RegionNameEntry & entry) override;
};
#include "NamesProvider.h"
#include "NamesFormatReader.h"
#include <IO/ReadBufferFromFile.h>
#include "NamesFormatReader.h"
bool LanguageRegionsNamesDataSource::isModified() const
......@@ -32,12 +32,11 @@ std::string LanguageRegionsNamesDataSource::getSourceName() const
}
RegionsNamesDataProvider::RegionsNamesDataProvider(const std::string & directory_)
: directory(directory_)
{}
RegionsNamesDataProvider::RegionsNamesDataProvider(const std::string & directory_) : directory(directory_)
{
}
ILanguageRegionsNamesDataSourcePtr RegionsNamesDataProvider::getLanguageRegionsNamesSource(
const std::string & language) const
ILanguageRegionsNamesDataSourcePtr RegionsNamesDataProvider::getLanguageRegionsNamesSource(const std::string & language) const
{
const auto data_file = getDataFilePath(language);
return std::make_unique<LanguageRegionsNamesDataSource>(data_file, language);
......
#pragma once
#include "INamesProvider.h"
#include <Common/FileUpdatesTracker.h>
#include "INamesProvider.h"
// Represents local file with list of regions ids / names
......@@ -14,10 +14,9 @@ private:
public:
LanguageRegionsNamesDataSource(const std::string & path_, const std::string & language_)
: path(path_)
, updates_tracker(path_)
, language(language_)
{}
: path(path_), updates_tracker(path_), language(language_)
{
}
bool isModified() const override;
......@@ -42,8 +41,7 @@ private:
public:
RegionsNamesDataProvider(const std::string & directory_);
ILanguageRegionsNamesDataSourcePtr getLanguageRegionsNamesSource(
const std::string & language) const override;
ILanguageRegionsNamesDataSourcePtr getLanguageRegionsNamesSource(const std::string & language) const override;
private:
std::string getDataFilePath(const std::string & language) const;
......
#pragma once
#include <memory>
#include "RegionsHierarchies.h"
#include "RegionsNames.h"
#include <memory>
namespace Poco
{
namespace Util
{
class AbstractConfiguration;
}
namespace Util
{
class AbstractConfiguration;
}
class Logger;
class Logger;
}
......@@ -20,11 +20,9 @@ namespace Poco
class IGeoDictionariesLoader
{
public:
virtual std::unique_ptr<RegionsHierarchies> reloadRegionsHierarchies(
const Poco::Util::AbstractConfiguration & config) = 0;
virtual std::unique_ptr<RegionsHierarchies> reloadRegionsHierarchies(const Poco::Util::AbstractConfiguration & config) = 0;
virtual std::unique_ptr<RegionsNames> reloadRegionsNames(
const Poco::Util::AbstractConfiguration & config) = 0;
virtual std::unique_ptr<RegionsNames> reloadRegionsNames(const Poco::Util::AbstractConfiguration & config) = 0;
virtual ~IGeoDictionariesLoader() {}
};
#include "RegionsHierarchies.h"
#include <common/logger_useful.h>
#include <Poco/DirectoryIterator.h>
#include <common/logger_useful.h>
RegionsHierarchies::RegionsHierarchies(IRegionsHierarchiesDataProviderPtr data_provider)
......
#pragma once
#include "RegionsHierarchy.h"
#include "GeodataProviders/IHierarchiesProvider.h"
#include <Poco/Exception.h>
#include <unordered_map>
#include <Poco/Exception.h>
#include "GeodataProviders/IHierarchiesProvider.h"
#include "RegionsHierarchy.h"
/** Contains several hierarchies of regions.
......
#include "RegionsHierarchy.h"
#include "GeodataProviders/IHierarchiesProvider.h"
#include <Poco/Util/Application.h>
#include <IO/WriteHelpers.h>
#include <Poco/Exception.h>
#include <Poco/Util/Application.h>
#include <common/logger_useful.h>
#include <ext/singleton.h>
#include <IO/WriteHelpers.h>
#include "GeodataProviders/IHierarchiesProvider.h"
namespace DB
{
namespace ErrorCodes
{
extern const int INCORRECT_DATA;
}
namespace ErrorCodes
{
extern const int INCORRECT_DATA;
}
}
RegionsHierarchy::RegionsHierarchy(IRegionsHierarchyDataSourcePtr data_source_)
: data_source(data_source_)
RegionsHierarchy::RegionsHierarchy(IRegionsHierarchyDataSourcePtr data_source_) : data_source(data_source_)
{
}
......@@ -56,7 +55,8 @@ void RegionsHierarchy::reload()
if (region_entry.id > max_region_id)
{
if (region_entry.id > max_size)
throw DB::Exception("Region id is too large: " + DB::toString(region_entry.id) + ", should be not more than " + DB::toString(max_size),
throw DB::Exception(
"Region id is too large: " + DB::toString(region_entry.id) + ", should be not more than " + DB::toString(max_size),
DB::ErrorCodes::INCORRECT_DATA);
max_region_id = region_entry.id;
......@@ -74,16 +74,16 @@ void RegionsHierarchy::reload()
types[region_entry.id] = region_entry.type;
}
new_parents .resize(max_region_id + 1);
new_city .resize(max_region_id + 1);
new_country .resize(max_region_id + 1);
new_area .resize(max_region_id + 1);
new_district .resize(max_region_id + 1);
new_continent .resize(max_region_id + 1);
new_parents.resize(max_region_id + 1);
new_city.resize(max_region_id + 1);
new_country.resize(max_region_id + 1);
new_area.resize(max_region_id + 1);
new_district.resize(max_region_id + 1);
new_continent.resize(max_region_id + 1);
new_top_continent.resize(max_region_id + 1);
new_populations .resize(max_region_id + 1);
new_depths .resize(max_region_id + 1);
types .resize(max_region_id + 1);
new_populations.resize(max_region_id + 1);
new_depths.resize(max_region_id + 1);
types.resize(max_region_id + 1);
/// prescribe the cities and countries for the regions
for (RegionID i = 0; i <= max_region_id; ++i)
......@@ -113,14 +113,16 @@ void RegionsHierarchy::reload()
++depth;
if (depth == std::numeric_limits<RegionDepth>::max())
throw Poco::Exception("Logical error in regions hierarchy: region " + DB::toString(current) + " possible is inside infinite loop");
throw Poco::Exception(
"Logical error in regions hierarchy: region " + DB::toString(current) + " possible is inside infinite loop");
current = new_parents[current];
if (current == 0)
break;
if (current > max_region_id)
throw Poco::Exception("Logical error in regions hierarchy: region " + DB::toString(current) + " (specified as parent) doesn't exist");
throw Poco::Exception(
"Logical error in regions hierarchy: region " + DB::toString(current) + " (specified as parent) doesn't exist");
if (types[current] == RegionType::City)
new_city[i] = current;
......
#pragma once
#include "GeodataProviders/IHierarchiesProvider.h"
#include <vector>
#include <boost/noncopyable.hpp>
#include <common/Types.h>
#include "GeodataProviders/IHierarchiesProvider.h"
class IRegionsHierarchyDataProvider;
......
#include "RegionsNames.h"
#include "GeodataProviders/INamesProvider.h"
#include <Poco/Util/Application.h>
#include <IO/WriteHelpers.h>
#include <Poco/Exception.h>
#include <Poco/Util/Application.h>
#include <common/logger_useful.h>
#include <IO/WriteHelpers.h>
#include "GeodataProviders/INamesProvider.h"
namespace DB
{
namespace ErrorCodes
{
extern const int INCORRECT_DATA;
}
namespace ErrorCodes
{
extern const int INCORRECT_DATA;
}
}
......@@ -84,7 +84,8 @@ void RegionsNames::reload()
max_region_id = name_entry.id;
if (name_entry.id > max_size)
throw DB::Exception("Region id is too large: " + DB::toString(name_entry.id) + ", should be not more than " + DB::toString(max_size),
throw DB::Exception(
"Region id is too large: " + DB::toString(name_entry.id) + ", should be not more than " + DB::toString(max_size),
DB::ErrorCodes::INCORRECT_DATA);
}
......
#include <Common/config.h>
#if USE_MYSQL
#include "TechDataHierarchy.h"
# include "TechDataHierarchy.h"
#include <common/logger_useful.h>
#include <mysqlxx/PoolWithFailover.h>
# include <common/logger_useful.h>
# include <mysqlxx/PoolWithFailover.h>
static constexpr auto config_key = "mysql_metrica";
......
#pragma once
#include "IDictionarySource.h"
#include "DictionaryStructure.h"
#include "IDictionarySource.h"
namespace Poco { class Logger; }
namespace Poco
{
class Logger;
}
namespace DB
{
/// Allows loading dictionaries from executable
class ExecutableDictionarySource final : public IDictionarySource
{
......@@ -29,8 +31,7 @@ public:
BlockInputStreamPtr loadIds(const std::vector<UInt64> & ids) override;
BlockInputStreamPtr loadKeys(
const Columns & key_columns, const std::vector<size_t> & requested_rows) override;
BlockInputStreamPtr loadKeys(const Columns & key_columns, const std::vector<size_t> & requested_rows) override;
bool isModified() const override;
......
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册