CacheDictionary.cpp 22.4 KB
Newer Older
1 2
#include "CacheDictionary.h"

3
#include <functional>
4
#include <memory>
P
proller 已提交
5
#include <sstream>
6
#include <Columns/ColumnString.h>
P
proller 已提交
7
#include <Columns/ColumnsNumber.h>
8
#include <Common/BitHelpers.h>
P
proller 已提交
9
#include <Common/CurrentMetrics.h>
10
#include <Common/HashTable/Hash.h>
11
#include <Common/ProfileEvents.h>
P
proller 已提交
12 13 14
#include <Common/ProfilingScopedRWLock.h>
#include <Common/Stopwatch.h>
#include <Common/randomSeed.h>
15
#include <Common/typeid_cast.h>
16
#include <ext/map.h>
P
proller 已提交
17 18
#include <ext/range.h>
#include <ext/size.h>
19
#include "CacheDictionary.inc.h"
P
proller 已提交
20 21
#include "DictionaryBlockInputStream.h"
#include "DictionaryFactory.h"
22

23 24
namespace ProfileEvents
{
P
proller 已提交
25 26 27 28 29 30 31 32 33 34
extern const Event DictCacheKeysRequested;
extern const Event DictCacheKeysRequestedMiss;
extern const Event DictCacheKeysRequestedFound;
extern const Event DictCacheKeysExpired;
extern const Event DictCacheKeysNotFound;
extern const Event DictCacheKeysHit;
extern const Event DictCacheRequestTimeNs;
extern const Event DictCacheRequests;
extern const Event DictCacheLockWriteNs;
extern const Event DictCacheLockReadNs;
35 36 37 38
}

namespace CurrentMetrics
{
P
proller 已提交
39
extern const Metric DictCacheRequests;
40 41 42
}


43 44 45 46
namespace DB
{
namespace ErrorCodes
{
47 48 49
    extern const int TYPE_MISMATCH;
    extern const int BAD_ARGUMENTS;
    extern const int UNSUPPORTED_METHOD;
A
Alexey Milovidov 已提交
50
    extern const int LOGICAL_ERROR;
51
    extern const int TOO_SMALL_BUFFER_SIZE;
52 53 54
}


P
proller 已提交
55
inline size_t CacheDictionary::getCellIdx(const Key id) const
56
{
57 58 59
    const auto hash = intHash64(id);
    const auto idx = hash & size_overlap_mask;
    return idx;
60 61 62
}


P
proller 已提交
63 64 65 66 67
CacheDictionary::CacheDictionary(
    const std::string & name,
    const DictionaryStructure & dict_struct,
    DictionarySourcePtr source_ptr,
    const DictionaryLifetime dict_lifetime,
68
    const size_t size)
P
proller 已提交
69 70 71 72 73 74 75 76
    : name{name}
    , dict_struct(dict_struct)
    , source_ptr{std::move(source_ptr)}
    , dict_lifetime(dict_lifetime)
    , size{roundUpToPowerOfTwoOrZero(std::max(size, size_t(max_collision_length)))}
    , size_overlap_mask{this->size - 1}
    , cells{this->size}
    , rnd_engine(randomSeed())
77
{
78
    if (!this->source_ptr->supportsSelectiveLoad())
79
        throw Exception{name + ": source cannot be used with CacheDictionary", ErrorCodes::UNSUPPORTED_METHOD};
80

81
    createAttributes();
82 83 84
}


A
Alexey Milovidov 已提交
85
void CacheDictionary::toParent(const PaddedPODArray<Key> & ids, PaddedPODArray<Key> & out) const
86
{
87
    const auto null_value = std::get<UInt64>(hierarchical_attribute->null_values);
88

P
proller 已提交
89
    getItemsNumber<UInt64>(*hierarchical_attribute, ids, out, [&](const size_t) { return null_value; });
90 91 92
}


93
/// Allow to use single value in same way as array.
P
proller 已提交
94 95 96 97 98 99 100 101
static inline CacheDictionary::Key getAt(const PaddedPODArray<CacheDictionary::Key> & arr, const size_t idx)
{
    return arr[idx];
}
static inline CacheDictionary::Key getAt(const CacheDictionary::Key & value, const size_t)
{
    return value;
}
102 103 104


template <typename AncestorType>
P
proller 已提交
105
void CacheDictionary::isInImpl(const PaddedPODArray<Key> & child_ids, const AncestorType & ancestor_ids, PaddedPODArray<UInt8> & out) const
106
{
107 108
    /// Transform all children to parents until ancestor id or null_value will be reached.

109
    size_t out_size = out.size();
P
proller 已提交
110
    memset(out.data(), 0xFF, out_size); /// 0xFF means "not calculated"
111 112 113

    const auto null_value = std::get<UInt64>(hierarchical_attribute->null_values);

114
    PaddedPODArray<Key> children(out_size);
115 116 117 118 119 120 121 122
    PaddedPODArray<Key> parents(child_ids.begin(), child_ids.end());

    while (true)
    {
        size_t out_idx = 0;
        size_t parents_idx = 0;
        size_t new_children_idx = 0;

123
        while (out_idx < out_size)
124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141
        {
            /// Already calculated
            if (out[out_idx] != 0xFF)
            {
                ++out_idx;
                continue;
            }

            /// No parent
            if (parents[parents_idx] == null_value)
            {
                out[out_idx] = 0;
            }
            /// Found ancestor
            else if (parents[parents_idx] == getAt(ancestor_ids, parents_idx))
            {
                out[out_idx] = 1;
            }
A
alexey-milovidov 已提交
142
            /// Loop detected
143 144
            else if (children[new_children_idx] == parents[parents_idx])
            {
P
proller 已提交
145
                out[out_idx] = 1;
146
            }
A
alexey-milovidov 已提交
147
            /// Found intermediate parent, add this value to search at next loop iteration
148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166
            else
            {
                children[new_children_idx] = parents[parents_idx];
                ++new_children_idx;
            }

            ++out_idx;
            ++parents_idx;
        }

        if (new_children_idx == 0)
            break;

        /// Transform all children to its parents.
        children.resize(new_children_idx);
        parents.resize(new_children_idx);

        toParent(children, parents);
    }
167 168 169
}

void CacheDictionary::isInVectorVector(
P
proller 已提交
170
    const PaddedPODArray<Key> & child_ids, const PaddedPODArray<Key> & ancestor_ids, PaddedPODArray<UInt8> & out) const
171
{
172
    isInImpl(child_ids, ancestor_ids, out);
173
}
174

P
proller 已提交
175
void CacheDictionary::isInVectorConstant(const PaddedPODArray<Key> & child_ids, const Key ancestor_id, PaddedPODArray<UInt8> & out) const
176
{
177
    isInImpl(child_ids, ancestor_id, out);
178
}
179

P
proller 已提交
180
void CacheDictionary::isInConstantVector(const Key child_id, const PaddedPODArray<Key> & ancestor_ids, PaddedPODArray<UInt8> & out) const
181
{
182
    /// Special case with single child value.
183

184
    const auto null_value = std::get<UInt64>(hierarchical_attribute->null_values);
185

186 187 188
    PaddedPODArray<Key> child(1, child_id);
    PaddedPODArray<Key> parent(1);
    std::vector<Key> ancestors(1, child_id);
189

190 191 192 193
    /// Iteratively find all ancestors for child.
    while (true)
    {
        toParent(child, parent);
194

195 196
        if (parent[0] == null_value)
            break;
197

198 199 200
        child[0] = parent[0];
        ancestors.push_back(parent[0]);
    }
201

202
    /// Assuming short hierarchy, so linear search is Ok.
203
    for (size_t i = 0, out_size = out.size(); i < out_size; ++i)
204
        out[i] = std::find(ancestors.begin(), ancestors.end(), ancestor_ids[i]) != ancestors.end();
205
}
206

A
Alexey Milovidov 已提交
207
void CacheDictionary::getString(const std::string & attribute_name, const PaddedPODArray<Key> & ids, ColumnString * out) const
208
{
209 210
    auto & attribute = getAttribute(attribute_name);
    if (!isAttributeTypeConvertibleTo(attribute.type, AttributeUnderlyingType::String))
P
proller 已提交
211 212
        throw Exception{name + ": type mismatch: attribute " + attribute_name + " has type " + toString(attribute.type),
                        ErrorCodes::TYPE_MISMATCH};
213

214
    const auto null_value = StringRef{std::get<String>(attribute.null_values)};
215

P
proller 已提交
216
    getItemsString(attribute, ids, out, [&](const size_t) { return null_value; });
217 218 219
}

void CacheDictionary::getString(
P
proller 已提交
220
    const std::string & attribute_name, const PaddedPODArray<Key> & ids, const ColumnString * const def, ColumnString * const out) const
221
{
222 223
    auto & attribute = getAttribute(attribute_name);
    if (!isAttributeTypeConvertibleTo(attribute.type, AttributeUnderlyingType::String))
P
proller 已提交
224 225
        throw Exception{name + ": type mismatch: attribute " + attribute_name + " has type " + toString(attribute.type),
                        ErrorCodes::TYPE_MISMATCH};
226

P
proller 已提交
227
    getItemsString(attribute, ids, out, [&](const size_t row) { return def->getDataAt(row); });
228 229 230
}

void CacheDictionary::getString(
P
proller 已提交
231
    const std::string & attribute_name, const PaddedPODArray<Key> & ids, const String & def, ColumnString * const out) const
232
{
233 234
    auto & attribute = getAttribute(attribute_name);
    if (!isAttributeTypeConvertibleTo(attribute.type, AttributeUnderlyingType::String))
P
proller 已提交
235 236
        throw Exception{name + ": type mismatch: attribute " + attribute_name + " has type " + toString(attribute.type),
                        ErrorCodes::TYPE_MISMATCH};
237

P
proller 已提交
238
    getItemsString(attribute, ids, out, [&](const size_t) { return StringRef{def}; });
239 240 241
}


242
/// returns cell_idx (always valid for replacing), 'cell is valid' flag, 'cell is outdated' flag
243 244 245 246 247 248 249 250
/// true  false   found and valid
/// false true    not found (something outdated, maybe our cell)
/// false false   not found (other id stored with valid data)
/// true  true    impossible
///
/// todo: split this func to two: find_for_get and find_for_set
CacheDictionary::FindResult CacheDictionary::findCellIdx(const Key & id, const CellMetadata::time_point_t now) const
{
251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279
    auto pos = getCellIdx(id);
    auto oldest_id = pos;
    auto oldest_time = CellMetadata::time_point_t::max();
    const auto stop = pos + max_collision_length;
    for (; pos < stop; ++pos)
    {
        const auto cell_idx = pos & size_overlap_mask;
        const auto & cell = cells[cell_idx];

        if (cell.id != id)
        {
            /// maybe we already found nearest expired cell (try minimize collision_length on insert)
            if (oldest_time > now && oldest_time > cell.expiresAt())
            {
                oldest_time = cell.expiresAt();
                oldest_id = cell_idx;
            }
            continue;
        }

        if (cell.expiresAt() < now)
        {
            return {cell_idx, false, true};
        }

        return {cell_idx, true, false};
    }

    return {oldest_id, false, false};
280 281
}

A
Alexey Milovidov 已提交
282
void CacheDictionary::has(const PaddedPODArray<Key> & ids, PaddedPODArray<UInt8> & out) const
283
{
284
    /// Mapping: <id> -> { all indices `i` of `ids` such that `ids[i]` = <id> }
285
    std::unordered_map<Key, std::vector<size_t>> outdated_ids;
286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327

    size_t cache_expired = 0, cache_not_found = 0, cache_hit = 0;

    const auto rows = ext::size(ids);
    {
        const ProfilingScopedReadRWLock read_lock{rw_lock, ProfileEvents::DictCacheLockReadNs};

        const auto now = std::chrono::system_clock::now();
        /// fetch up-to-date values, decide which ones require update
        for (const auto row : ext::range(0, rows))
        {
            const auto id = ids[row];
            const auto find_result = findCellIdx(id, now);
            const auto & cell_idx = find_result.cell_idx;
            if (!find_result.valid)
            {
                outdated_ids[id].push_back(row);
                if (find_result.outdated)
                    ++cache_expired;
                else
                    ++cache_not_found;
            }
            else
            {
                ++cache_hit;
                const auto & cell = cells[cell_idx];
                out[row] = !cell.isDefault();
            }
        }
    }

    ProfileEvents::increment(ProfileEvents::DictCacheKeysExpired, cache_expired);
    ProfileEvents::increment(ProfileEvents::DictCacheKeysNotFound, cache_not_found);
    ProfileEvents::increment(ProfileEvents::DictCacheKeysHit, cache_hit);

    query_count.fetch_add(rows, std::memory_order_relaxed);
    hit_count.fetch_add(rows - outdated_ids.size(), std::memory_order_release);

    if (outdated_ids.empty())
        return;

    std::vector<Key> required_ids(outdated_ids.size());
P
proller 已提交
328
    std::transform(std::begin(outdated_ids), std::end(outdated_ids), std::begin(required_ids), [](auto & pair) { return pair.first; });
329 330

    /// request new values
P
proller 已提交
331 332 333 334 335 336 337 338 339 340 341 342
    update(
        required_ids,
        [&](const auto id, const auto)
        {
            for (const auto row : outdated_ids[id])
                out[row] = true;
        },
        [&](const auto id, const auto)
        {
            for (const auto row : outdated_ids[id])
                out[row] = false;
        });
343 344 345 346 347
}


void CacheDictionary::createAttributes()
{
348 349
    const auto attributes_size = dict_struct.attributes.size();
    attributes.reserve(attributes_size);
350 351

    bytes_allocated += size * sizeof(CellMetadata);
352
    bytes_allocated += attributes_size * sizeof(attributes.front());
353 354 355 356 357 358 359 360

    for (const auto & attribute : dict_struct.attributes)
    {
        attribute_index_by_name.emplace(attribute.name, attributes.size());
        attributes.push_back(createAttributeWithType(attribute.underlying_type, attribute.null_value));

        if (attribute.hierarchical)
        {
P
proller 已提交
361
            hierarchical_attribute = &attributes.back();
362 363

            if (hierarchical_attribute->type != AttributeUnderlyingType::UInt64)
364
                throw Exception{name + ": hierarchical attribute must be UInt64.", ErrorCodes::TYPE_MISMATCH};
365 366
        }
    }
367 368
}

A
Alexey Milovidov 已提交
369
CacheDictionary::Attribute CacheDictionary::createAttributeWithType(const AttributeUnderlyingType type, const Field & null_value)
370
{
A
Alexey Milovidov 已提交
371
    Attribute attr{type, {}, {}};
372 373 374

    switch (type)
    {
P
proller 已提交
375 376
#define DISPATCH(TYPE) \
    case AttributeUnderlyingType::TYPE: \
P
proller 已提交
377
        attr.null_values = TYPE(null_value.get<NearestFieldType<TYPE>>()); \
P
proller 已提交
378 379
        attr.arrays = std::make_unique<ContainerType<TYPE>>(size); \
        bytes_allocated += size * sizeof(TYPE); \
P
proller 已提交
380
        break;
A
Amos Bird 已提交
381 382 383 384 385 386 387 388 389 390 391 392 393 394 395
        DISPATCH(UInt8)
        DISPATCH(UInt16)
        DISPATCH(UInt32)
        DISPATCH(UInt64)
        DISPATCH(UInt128)
        DISPATCH(Int8)
        DISPATCH(Int16)
        DISPATCH(Int32)
        DISPATCH(Int64)
        DISPATCH(Decimal32)
        DISPATCH(Decimal64)
        DISPATCH(Decimal128)
        DISPATCH(Float32)
        DISPATCH(Float64)
#undef DISPATCH
396
        case AttributeUnderlyingType::String:
A
Alexey Milovidov 已提交
397 398
            attr.null_values = null_value.get<String>();
            attr.arrays = std::make_unique<ContainerType<StringRef>>(size);
399 400 401 402 403 404 405
            bytes_allocated += size * sizeof(StringRef);
            if (!string_arena)
                string_arena = std::make_unique<ArenaWithFreeLists>();
            break;
    }

    return attr;
406 407
}

A
Alexey Milovidov 已提交
408
void CacheDictionary::setDefaultAttributeValue(Attribute & attribute, const Key idx) const
409
{
410 411
    switch (attribute.type)
    {
P
proller 已提交
412 413 414 415 416 417 418 419 420 421 422 423 424 425 426 427 428 429 430 431 432 433 434 435 436 437 438 439 440 441 442 443 444
        case AttributeUnderlyingType::UInt8:
            std::get<ContainerPtrType<UInt8>>(attribute.arrays)[idx] = std::get<UInt8>(attribute.null_values);
            break;
        case AttributeUnderlyingType::UInt16:
            std::get<ContainerPtrType<UInt16>>(attribute.arrays)[idx] = std::get<UInt16>(attribute.null_values);
            break;
        case AttributeUnderlyingType::UInt32:
            std::get<ContainerPtrType<UInt32>>(attribute.arrays)[idx] = std::get<UInt32>(attribute.null_values);
            break;
        case AttributeUnderlyingType::UInt64:
            std::get<ContainerPtrType<UInt64>>(attribute.arrays)[idx] = std::get<UInt64>(attribute.null_values);
            break;
        case AttributeUnderlyingType::UInt128:
            std::get<ContainerPtrType<UInt128>>(attribute.arrays)[idx] = std::get<UInt128>(attribute.null_values);
            break;
        case AttributeUnderlyingType::Int8:
            std::get<ContainerPtrType<Int8>>(attribute.arrays)[idx] = std::get<Int8>(attribute.null_values);
            break;
        case AttributeUnderlyingType::Int16:
            std::get<ContainerPtrType<Int16>>(attribute.arrays)[idx] = std::get<Int16>(attribute.null_values);
            break;
        case AttributeUnderlyingType::Int32:
            std::get<ContainerPtrType<Int32>>(attribute.arrays)[idx] = std::get<Int32>(attribute.null_values);
            break;
        case AttributeUnderlyingType::Int64:
            std::get<ContainerPtrType<Int64>>(attribute.arrays)[idx] = std::get<Int64>(attribute.null_values);
            break;
        case AttributeUnderlyingType::Float32:
            std::get<ContainerPtrType<Float32>>(attribute.arrays)[idx] = std::get<Float32>(attribute.null_values);
            break;
        case AttributeUnderlyingType::Float64:
            std::get<ContainerPtrType<Float64>>(attribute.arrays)[idx] = std::get<Float64>(attribute.null_values);
            break;
445 446 447 448 449 450 451 452 453 454 455

        case AttributeUnderlyingType::Decimal32:
            std::get<ContainerPtrType<Decimal32>>(attribute.arrays)[idx] = std::get<Decimal32>(attribute.null_values);
            break;
        case AttributeUnderlyingType::Decimal64:
            std::get<ContainerPtrType<Decimal64>>(attribute.arrays)[idx] = std::get<Decimal64>(attribute.null_values);
            break;
        case AttributeUnderlyingType::Decimal128:
            std::get<ContainerPtrType<Decimal128>>(attribute.arrays)[idx] = std::get<Decimal128>(attribute.null_values);
            break;

456 457 458 459 460 461 462 463 464 465 466 467 468 469 470 471
        case AttributeUnderlyingType::String:
        {
            const auto & null_value_ref = std::get<String>(attribute.null_values);
            auto & string_ref = std::get<ContainerPtrType<StringRef>>(attribute.arrays)[idx];

            if (string_ref.data != null_value_ref.data())
            {
                if (string_ref.data)
                    string_arena->free(const_cast<char *>(string_ref.data), string_ref.size);

                string_ref = StringRef{null_value_ref};
            }

            break;
        }
    }
472 473
}

A
Alexey Milovidov 已提交
474
void CacheDictionary::setAttributeValue(Attribute & attribute, const Key idx, const Field & value) const
475
{
476 477
    switch (attribute.type)
    {
P
proller 已提交
478 479 480 481 482 483 484 485 486 487 488 489 490 491 492 493 494 495 496 497 498 499 500 501 502 503 504 505 506 507 508 509 510 511 512 513 514 515 516 517 518 519 520
        case AttributeUnderlyingType::UInt8:
            std::get<ContainerPtrType<UInt8>>(attribute.arrays)[idx] = value.get<UInt64>();
            break;
        case AttributeUnderlyingType::UInt16:
            std::get<ContainerPtrType<UInt16>>(attribute.arrays)[idx] = value.get<UInt64>();
            break;
        case AttributeUnderlyingType::UInt32:
            std::get<ContainerPtrType<UInt32>>(attribute.arrays)[idx] = value.get<UInt64>();
            break;
        case AttributeUnderlyingType::UInt64:
            std::get<ContainerPtrType<UInt64>>(attribute.arrays)[idx] = value.get<UInt64>();
            break;
        case AttributeUnderlyingType::UInt128:
            std::get<ContainerPtrType<UInt128>>(attribute.arrays)[idx] = value.get<UInt128>();
            break;
        case AttributeUnderlyingType::Int8:
            std::get<ContainerPtrType<Int8>>(attribute.arrays)[idx] = value.get<Int64>();
            break;
        case AttributeUnderlyingType::Int16:
            std::get<ContainerPtrType<Int16>>(attribute.arrays)[idx] = value.get<Int64>();
            break;
        case AttributeUnderlyingType::Int32:
            std::get<ContainerPtrType<Int32>>(attribute.arrays)[idx] = value.get<Int64>();
            break;
        case AttributeUnderlyingType::Int64:
            std::get<ContainerPtrType<Int64>>(attribute.arrays)[idx] = value.get<Int64>();
            break;
        case AttributeUnderlyingType::Float32:
            std::get<ContainerPtrType<Float32>>(attribute.arrays)[idx] = value.get<Float64>();
            break;
        case AttributeUnderlyingType::Float64:
            std::get<ContainerPtrType<Float64>>(attribute.arrays)[idx] = value.get<Float64>();
            break;

        case AttributeUnderlyingType::Decimal32:
            std::get<ContainerPtrType<Decimal32>>(attribute.arrays)[idx] = value.get<Decimal32>();
            break;
        case AttributeUnderlyingType::Decimal64:
            std::get<ContainerPtrType<Decimal64>>(attribute.arrays)[idx] = value.get<Decimal64>();
            break;
        case AttributeUnderlyingType::Decimal128:
            std::get<ContainerPtrType<Decimal128>>(attribute.arrays)[idx] = value.get<Decimal128>();
            break;
521

522 523 524 525 526 527 528 529 530 531
        case AttributeUnderlyingType::String:
        {
            const auto & string = value.get<String>();
            auto & string_ref = std::get<ContainerPtrType<StringRef>>(attribute.arrays)[idx];
            const auto & null_value_ref = std::get<String>(attribute.null_values);

            /// free memory unless it points to a null_value
            if (string_ref.data && string_ref.data != null_value_ref.data())
                string_arena->free(const_cast<char *>(string_ref.data), string_ref.size);

532 533
            const auto str_size = string.size();
            if (str_size != 0)
534
            {
535 536 537
                auto string_ptr = string_arena->alloc(str_size + 1);
                std::copy(string.data(), string.data() + str_size + 1, string_ptr);
                string_ref = StringRef{string_ptr, str_size};
538 539 540 541 542 543 544
            }
            else
                string_ref = {};

            break;
        }
    }
545 546
}

A
Alexey Milovidov 已提交
547
CacheDictionary::Attribute & CacheDictionary::getAttribute(const std::string & attribute_name) const
548
{
549 550
    const auto it = attribute_index_by_name.find(attribute_name);
    if (it == std::end(attribute_index_by_name))
551
        throw Exception{name + ": no such attribute '" + attribute_name + "'", ErrorCodes::BAD_ARGUMENTS};
552 553

    return attributes[it->second];
554 555
}

556 557
bool CacheDictionary::isEmptyCell(const UInt64 idx) const
{
P
proller 已提交
558 559
    return (idx != zero_cell_idx && cells[idx].id == 0)
        || (cells[idx].data == ext::safe_bit_cast<CellMetadata::time_point_urep_t>(CellMetadata::time_point_t()));
560 561 562 563
}

PaddedPODArray<CacheDictionary::Key> CacheDictionary::getCachedIds() const
{
564 565
    const ProfilingScopedReadRWLock read_lock{rw_lock, ProfileEvents::DictCacheLockReadNs};

566 567 568
    PaddedPODArray<Key> array;
    for (size_t idx = 0; idx < cells.size(); ++idx)
    {
N
Nikolai Kochetov 已提交
569
        auto & cell = cells[idx];
570
        if (!isEmptyCell(idx) && !cells[idx].isDefault())
571 572 573 574 575 576 577
        {
            array.push_back(cell.id);
        }
    }
    return array;
}

578
BlockInputStreamPtr CacheDictionary::getBlockInputStream(const Names & column_names, size_t max_block_size) const
579
{
580
    using BlockInputStreamType = DictionaryBlockInputStream<CacheDictionary, Key>;
581
    return std::make_shared<BlockInputStreamType>(shared_from_this(), max_block_size, getCachedIds(), column_names);
582 583
}

584 585
void registerDictionaryCache(DictionaryFactory & factory)
{
P
proller 已提交
586 587 588 589
    auto create_layout = [=](const std::string & name,
                             const DictionaryStructure & dict_struct,
                             const Poco::Util::AbstractConfiguration & config,
                             const std::string & config_prefix,
P
proller 已提交
590 591
                             DictionarySourcePtr source_ptr) -> DictionaryPtr
    {
592
        if (dict_struct.key)
P
proller 已提交
593
            throw Exception{"'key' is not supported for dictionary of layout 'cache'", ErrorCodes::UNSUPPORTED_METHOD};
594 595

        if (dict_struct.range_min || dict_struct.range_max)
P
proller 已提交
596 597 598 599
            throw Exception{name
                                + ": elements .structure.range_min and .structure.range_max should be defined only "
                                  "for a dictionary of layout 'range_hashed'",
                            ErrorCodes::BAD_ARGUMENTS};
600 601 602
        const auto & layout_prefix = config_prefix + ".layout";
        const auto size = config.getInt(layout_prefix + ".cache.size_in_cells");
        if (size == 0)
P
proller 已提交
603
            throw Exception{name + ": dictionary of layout 'cache' cannot have 0 cells", ErrorCodes::TOO_SMALL_BUFFER_SIZE};
604 605 606

        const bool require_nonempty = config.getBool(config_prefix + ".require_nonempty", false);
        if (require_nonempty)
P
proller 已提交
607 608
            throw Exception{name + ": dictionary of layout 'cache' cannot have 'require_nonempty' attribute set",
                            ErrorCodes::BAD_ARGUMENTS};
609

P
proller 已提交
610
        const DictionaryLifetime dict_lifetime{config, config_prefix + ".lifetime"};
611 612 613 614 615
        return std::make_unique<CacheDictionary>(name, dict_struct, std::move(source_ptr), dict_lifetime, size);
    };
    factory.registerLayout("cache", create_layout);
}

616

617
}