CacheDictionary.cpp 22.2 KB
Newer Older
1 2
#include "CacheDictionary.h"

3
#include <functional>
4 5
#include <memory>
#include <Columns/ColumnString.h>
6
#include <Common/BitHelpers.h>
P
proller 已提交
7
#include <Common/CurrentMetrics.h>
8
#include <Common/HashTable/Hash.h>
9
#include <Common/ProfileEvents.h>
P
proller 已提交
10 11
#include <Common/ProfilingScopedRWLock.h>
#include <Common/randomSeed.h>
12
#include <Common/typeid_cast.h>
P
proller 已提交
13 14
#include <ext/range.h>
#include <ext/size.h>
15
#include "CacheDictionary.inc.h"
P
proller 已提交
16 17
#include "DictionaryBlockInputStream.h"
#include "DictionaryFactory.h"
18

19 20
namespace ProfileEvents
{
P
proller 已提交
21 22 23 24 25 26 27 28 29 30
extern const Event DictCacheKeysRequested;
extern const Event DictCacheKeysRequestedMiss;
extern const Event DictCacheKeysRequestedFound;
extern const Event DictCacheKeysExpired;
extern const Event DictCacheKeysNotFound;
extern const Event DictCacheKeysHit;
extern const Event DictCacheRequestTimeNs;
extern const Event DictCacheRequests;
extern const Event DictCacheLockWriteNs;
extern const Event DictCacheLockReadNs;
31 32 33 34
}

namespace CurrentMetrics
{
P
proller 已提交
35
extern const Metric DictCacheRequests;
36 37 38
}


39 40 41 42
namespace DB
{
namespace ErrorCodes
{
43 44 45
    extern const int TYPE_MISMATCH;
    extern const int BAD_ARGUMENTS;
    extern const int UNSUPPORTED_METHOD;
A
Alexey Milovidov 已提交
46
    extern const int LOGICAL_ERROR;
47
    extern const int TOO_SMALL_BUFFER_SIZE;
48 49 50
}


P
proller 已提交
51
inline size_t CacheDictionary::getCellIdx(const Key id) const
52
{
53 54 55
    const auto hash = intHash64(id);
    const auto idx = hash & size_overlap_mask;
    return idx;
56 57 58
}


P
proller 已提交
59
CacheDictionary::CacheDictionary(
K
kreuzerkrieg 已提交
60 61 62 63 64 65 66 67 68
    const std::string & name_,
    const DictionaryStructure & dict_struct_,
    DictionarySourcePtr source_ptr_,
    const DictionaryLifetime dict_lifetime_,
    const size_t size_)
    : name{name_}
    , dict_struct(dict_struct_)
    , source_ptr{std::move(source_ptr_)}
    , dict_lifetime(dict_lifetime_)
69
    , log(&Logger::get("ExternalDictionaries"))
K
kreuzerkrieg 已提交
70
    , size{roundUpToPowerOfTwoOrZero(std::max(size_, size_t(max_collision_length)))}
P
proller 已提交
71 72 73
    , size_overlap_mask{this->size - 1}
    , cells{this->size}
    , rnd_engine(randomSeed())
74
{
75
    if (!this->source_ptr->supportsSelectiveLoad())
76
        throw Exception{name + ": source cannot be used with CacheDictionary", ErrorCodes::UNSUPPORTED_METHOD};
77

78
    createAttributes();
79 80 81
}


A
Alexey Milovidov 已提交
82
void CacheDictionary::toParent(const PaddedPODArray<Key> & ids, PaddedPODArray<Key> & out) const
83
{
84
    const auto null_value = std::get<UInt64>(hierarchical_attribute->null_values);
85

86
    getItemsNumberImpl<UInt64, UInt64>(*hierarchical_attribute, ids, out, [&](const size_t) { return null_value; });
87 88 89
}


90
/// Allow to use single value in same way as array.
P
proller 已提交
91 92 93 94 95 96 97 98
static inline CacheDictionary::Key getAt(const PaddedPODArray<CacheDictionary::Key> & arr, const size_t idx)
{
    return arr[idx];
}
static inline CacheDictionary::Key getAt(const CacheDictionary::Key & value, const size_t)
{
    return value;
}
99 100 101


template <typename AncestorType>
P
proller 已提交
102
void CacheDictionary::isInImpl(const PaddedPODArray<Key> & child_ids, const AncestorType & ancestor_ids, PaddedPODArray<UInt8> & out) const
103
{
104 105
    /// Transform all children to parents until ancestor id or null_value will be reached.

106
    size_t out_size = out.size();
P
proller 已提交
107
    memset(out.data(), 0xFF, out_size); /// 0xFF means "not calculated"
108 109 110

    const auto null_value = std::get<UInt64>(hierarchical_attribute->null_values);

111
    PaddedPODArray<Key> children(out_size, 0);
112 113 114 115 116 117 118 119
    PaddedPODArray<Key> parents(child_ids.begin(), child_ids.end());

    while (true)
    {
        size_t out_idx = 0;
        size_t parents_idx = 0;
        size_t new_children_idx = 0;

120
        while (out_idx < out_size)
121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138
        {
            /// Already calculated
            if (out[out_idx] != 0xFF)
            {
                ++out_idx;
                continue;
            }

            /// No parent
            if (parents[parents_idx] == null_value)
            {
                out[out_idx] = 0;
            }
            /// Found ancestor
            else if (parents[parents_idx] == getAt(ancestor_ids, parents_idx))
            {
                out[out_idx] = 1;
            }
A
alexey-milovidov 已提交
139
            /// Loop detected
140 141
            else if (children[new_children_idx] == parents[parents_idx])
            {
P
proller 已提交
142
                out[out_idx] = 1;
143
            }
A
alexey-milovidov 已提交
144
            /// Found intermediate parent, add this value to search at next loop iteration
145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163
            else
            {
                children[new_children_idx] = parents[parents_idx];
                ++new_children_idx;
            }

            ++out_idx;
            ++parents_idx;
        }

        if (new_children_idx == 0)
            break;

        /// Transform all children to its parents.
        children.resize(new_children_idx);
        parents.resize(new_children_idx);

        toParent(children, parents);
    }
164 165 166
}

void CacheDictionary::isInVectorVector(
P
proller 已提交
167
    const PaddedPODArray<Key> & child_ids, const PaddedPODArray<Key> & ancestor_ids, PaddedPODArray<UInt8> & out) const
168
{
169
    isInImpl(child_ids, ancestor_ids, out);
170
}
171

P
proller 已提交
172
void CacheDictionary::isInVectorConstant(const PaddedPODArray<Key> & child_ids, const Key ancestor_id, PaddedPODArray<UInt8> & out) const
173
{
174
    isInImpl(child_ids, ancestor_id, out);
175
}
176

P
proller 已提交
177
void CacheDictionary::isInConstantVector(const Key child_id, const PaddedPODArray<Key> & ancestor_ids, PaddedPODArray<UInt8> & out) const
178
{
179
    /// Special case with single child value.
180

181
    const auto null_value = std::get<UInt64>(hierarchical_attribute->null_values);
182

183 184 185
    PaddedPODArray<Key> child(1, child_id);
    PaddedPODArray<Key> parent(1);
    std::vector<Key> ancestors(1, child_id);
186

187 188 189 190
    /// Iteratively find all ancestors for child.
    while (true)
    {
        toParent(child, parent);
191

192 193
        if (parent[0] == null_value)
            break;
194

195 196 197
        child[0] = parent[0];
        ancestors.push_back(parent[0]);
    }
198

199
    /// Assuming short hierarchy, so linear search is Ok.
200
    for (size_t i = 0, out_size = out.size(); i < out_size; ++i)
201
        out[i] = std::find(ancestors.begin(), ancestors.end(), ancestor_ids[i]) != ancestors.end();
202
}
203

A
Alexey Milovidov 已提交
204
void CacheDictionary::getString(const std::string & attribute_name, const PaddedPODArray<Key> & ids, ColumnString * out) const
205
{
206
    auto & attribute = getAttribute(attribute_name);
K
kreuzerkrieg 已提交
207
    checkAttributeType(name, attribute_name, attribute.type, AttributeUnderlyingType::utString);
208

209
    const auto null_value = StringRef{std::get<String>(attribute.null_values)};
210

P
proller 已提交
211
    getItemsString(attribute, ids, out, [&](const size_t) { return null_value; });
212 213 214
}

void CacheDictionary::getString(
P
proller 已提交
215
    const std::string & attribute_name, const PaddedPODArray<Key> & ids, const ColumnString * const def, ColumnString * const out) const
216
{
217
    auto & attribute = getAttribute(attribute_name);
K
kreuzerkrieg 已提交
218
    checkAttributeType(name, attribute_name, attribute.type, AttributeUnderlyingType::utString);
219

P
proller 已提交
220
    getItemsString(attribute, ids, out, [&](const size_t row) { return def->getDataAt(row); });
221 222 223
}

void CacheDictionary::getString(
P
proller 已提交
224
    const std::string & attribute_name, const PaddedPODArray<Key> & ids, const String & def, ColumnString * const out) const
225
{
226
    auto & attribute = getAttribute(attribute_name);
K
kreuzerkrieg 已提交
227
    checkAttributeType(name, attribute_name, attribute.type, AttributeUnderlyingType::utString);
228

P
proller 已提交
229
    getItemsString(attribute, ids, out, [&](const size_t) { return StringRef{def}; });
230 231 232
}


233
/// returns cell_idx (always valid for replacing), 'cell is valid' flag, 'cell is outdated' flag
234 235 236 237 238 239 240 241
/// true  false   found and valid
/// false true    not found (something outdated, maybe our cell)
/// false false   not found (other id stored with valid data)
/// true  true    impossible
///
/// todo: split this func to two: find_for_get and find_for_set
CacheDictionary::FindResult CacheDictionary::findCellIdx(const Key & id, const CellMetadata::time_point_t now) const
{
242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270
    auto pos = getCellIdx(id);
    auto oldest_id = pos;
    auto oldest_time = CellMetadata::time_point_t::max();
    const auto stop = pos + max_collision_length;
    for (; pos < stop; ++pos)
    {
        const auto cell_idx = pos & size_overlap_mask;
        const auto & cell = cells[cell_idx];

        if (cell.id != id)
        {
            /// maybe we already found nearest expired cell (try minimize collision_length on insert)
            if (oldest_time > now && oldest_time > cell.expiresAt())
            {
                oldest_time = cell.expiresAt();
                oldest_id = cell_idx;
            }
            continue;
        }

        if (cell.expiresAt() < now)
        {
            return {cell_idx, false, true};
        }

        return {cell_idx, true, false};
    }

    return {oldest_id, false, false};
271 272
}

A
Alexey Milovidov 已提交
273
void CacheDictionary::has(const PaddedPODArray<Key> & ids, PaddedPODArray<UInt8> & out) const
274
{
275
    /// Mapping: <id> -> { all indices `i` of `ids` such that `ids[i]` = <id> }
276
    std::unordered_map<Key, std::vector<size_t>> outdated_ids;
277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318

    size_t cache_expired = 0, cache_not_found = 0, cache_hit = 0;

    const auto rows = ext::size(ids);
    {
        const ProfilingScopedReadRWLock read_lock{rw_lock, ProfileEvents::DictCacheLockReadNs};

        const auto now = std::chrono::system_clock::now();
        /// fetch up-to-date values, decide which ones require update
        for (const auto row : ext::range(0, rows))
        {
            const auto id = ids[row];
            const auto find_result = findCellIdx(id, now);
            const auto & cell_idx = find_result.cell_idx;
            if (!find_result.valid)
            {
                outdated_ids[id].push_back(row);
                if (find_result.outdated)
                    ++cache_expired;
                else
                    ++cache_not_found;
            }
            else
            {
                ++cache_hit;
                const auto & cell = cells[cell_idx];
                out[row] = !cell.isDefault();
            }
        }
    }

    ProfileEvents::increment(ProfileEvents::DictCacheKeysExpired, cache_expired);
    ProfileEvents::increment(ProfileEvents::DictCacheKeysNotFound, cache_not_found);
    ProfileEvents::increment(ProfileEvents::DictCacheKeysHit, cache_hit);

    query_count.fetch_add(rows, std::memory_order_relaxed);
    hit_count.fetch_add(rows - outdated_ids.size(), std::memory_order_release);

    if (outdated_ids.empty())
        return;

    std::vector<Key> required_ids(outdated_ids.size());
P
proller 已提交
319
    std::transform(std::begin(outdated_ids), std::end(outdated_ids), std::begin(required_ids), [](auto & pair) { return pair.first; });
320 321

    /// request new values
P
proller 已提交
322 323 324 325 326 327 328 329 330 331 332 333
    update(
        required_ids,
        [&](const auto id, const auto)
        {
            for (const auto row : outdated_ids[id])
                out[row] = true;
        },
        [&](const auto id, const auto)
        {
            for (const auto row : outdated_ids[id])
                out[row] = false;
        });
334 335 336 337 338
}


void CacheDictionary::createAttributes()
{
339 340
    const auto attributes_size = dict_struct.attributes.size();
    attributes.reserve(attributes_size);
341 342

    bytes_allocated += size * sizeof(CellMetadata);
343
    bytes_allocated += attributes_size * sizeof(attributes.front());
344 345 346 347 348 349 350 351

    for (const auto & attribute : dict_struct.attributes)
    {
        attribute_index_by_name.emplace(attribute.name, attributes.size());
        attributes.push_back(createAttributeWithType(attribute.underlying_type, attribute.null_value));

        if (attribute.hierarchical)
        {
P
proller 已提交
352
            hierarchical_attribute = &attributes.back();
353

K
kreuzerkrieg 已提交
354
            if (hierarchical_attribute->type != AttributeUnderlyingType::utUInt64)
355
                throw Exception{name + ": hierarchical attribute must be UInt64.", ErrorCodes::TYPE_MISMATCH};
356 357
        }
    }
358 359
}

A
Alexey Milovidov 已提交
360
CacheDictionary::Attribute CacheDictionary::createAttributeWithType(const AttributeUnderlyingType type, const Field & null_value)
361
{
A
Alexey Milovidov 已提交
362
    Attribute attr{type, {}, {}};
363 364 365

    switch (type)
    {
P
proller 已提交
366
#define DISPATCH(TYPE) \
K
kreuzerkrieg 已提交
367
    case AttributeUnderlyingType::ut##TYPE: \
P
proller 已提交
368
        attr.null_values = TYPE(null_value.get<NearestFieldType<TYPE>>()); \
P
proller 已提交
369 370
        attr.arrays = std::make_unique<ContainerType<TYPE>>(size); \
        bytes_allocated += size * sizeof(TYPE); \
P
proller 已提交
371
        break;
A
Amos Bird 已提交
372 373 374 375 376 377 378 379 380 381 382 383 384 385 386
        DISPATCH(UInt8)
        DISPATCH(UInt16)
        DISPATCH(UInt32)
        DISPATCH(UInt64)
        DISPATCH(UInt128)
        DISPATCH(Int8)
        DISPATCH(Int16)
        DISPATCH(Int32)
        DISPATCH(Int64)
        DISPATCH(Decimal32)
        DISPATCH(Decimal64)
        DISPATCH(Decimal128)
        DISPATCH(Float32)
        DISPATCH(Float64)
#undef DISPATCH
K
kreuzerkrieg 已提交
387
        case AttributeUnderlyingType::utString:
A
Alexey Milovidov 已提交
388 389
            attr.null_values = null_value.get<String>();
            attr.arrays = std::make_unique<ContainerType<StringRef>>(size);
390 391 392 393 394 395 396
            bytes_allocated += size * sizeof(StringRef);
            if (!string_arena)
                string_arena = std::make_unique<ArenaWithFreeLists>();
            break;
    }

    return attr;
397 398
}

A
Alexey Milovidov 已提交
399
void CacheDictionary::setDefaultAttributeValue(Attribute & attribute, const Key idx) const
400
{
401 402
    switch (attribute.type)
    {
K
kreuzerkrieg 已提交
403
        case AttributeUnderlyingType::utUInt8:
P
proller 已提交
404 405
            std::get<ContainerPtrType<UInt8>>(attribute.arrays)[idx] = std::get<UInt8>(attribute.null_values);
            break;
K
kreuzerkrieg 已提交
406
        case AttributeUnderlyingType::utUInt16:
P
proller 已提交
407 408
            std::get<ContainerPtrType<UInt16>>(attribute.arrays)[idx] = std::get<UInt16>(attribute.null_values);
            break;
K
kreuzerkrieg 已提交
409
        case AttributeUnderlyingType::utUInt32:
P
proller 已提交
410 411
            std::get<ContainerPtrType<UInt32>>(attribute.arrays)[idx] = std::get<UInt32>(attribute.null_values);
            break;
K
kreuzerkrieg 已提交
412
        case AttributeUnderlyingType::utUInt64:
P
proller 已提交
413 414
            std::get<ContainerPtrType<UInt64>>(attribute.arrays)[idx] = std::get<UInt64>(attribute.null_values);
            break;
K
kreuzerkrieg 已提交
415
        case AttributeUnderlyingType::utUInt128:
P
proller 已提交
416 417
            std::get<ContainerPtrType<UInt128>>(attribute.arrays)[idx] = std::get<UInt128>(attribute.null_values);
            break;
K
kreuzerkrieg 已提交
418
        case AttributeUnderlyingType::utInt8:
P
proller 已提交
419 420
            std::get<ContainerPtrType<Int8>>(attribute.arrays)[idx] = std::get<Int8>(attribute.null_values);
            break;
K
kreuzerkrieg 已提交
421
        case AttributeUnderlyingType::utInt16:
P
proller 已提交
422 423
            std::get<ContainerPtrType<Int16>>(attribute.arrays)[idx] = std::get<Int16>(attribute.null_values);
            break;
K
kreuzerkrieg 已提交
424
        case AttributeUnderlyingType::utInt32:
P
proller 已提交
425 426
            std::get<ContainerPtrType<Int32>>(attribute.arrays)[idx] = std::get<Int32>(attribute.null_values);
            break;
K
kreuzerkrieg 已提交
427
        case AttributeUnderlyingType::utInt64:
P
proller 已提交
428 429
            std::get<ContainerPtrType<Int64>>(attribute.arrays)[idx] = std::get<Int64>(attribute.null_values);
            break;
K
kreuzerkrieg 已提交
430
        case AttributeUnderlyingType::utFloat32:
P
proller 已提交
431 432
            std::get<ContainerPtrType<Float32>>(attribute.arrays)[idx] = std::get<Float32>(attribute.null_values);
            break;
K
kreuzerkrieg 已提交
433
        case AttributeUnderlyingType::utFloat64:
P
proller 已提交
434 435
            std::get<ContainerPtrType<Float64>>(attribute.arrays)[idx] = std::get<Float64>(attribute.null_values);
            break;
436

K
kreuzerkrieg 已提交
437
        case AttributeUnderlyingType::utDecimal32:
438 439
            std::get<ContainerPtrType<Decimal32>>(attribute.arrays)[idx] = std::get<Decimal32>(attribute.null_values);
            break;
K
kreuzerkrieg 已提交
440
        case AttributeUnderlyingType::utDecimal64:
441 442
            std::get<ContainerPtrType<Decimal64>>(attribute.arrays)[idx] = std::get<Decimal64>(attribute.null_values);
            break;
K
kreuzerkrieg 已提交
443
        case AttributeUnderlyingType::utDecimal128:
444 445 446
            std::get<ContainerPtrType<Decimal128>>(attribute.arrays)[idx] = std::get<Decimal128>(attribute.null_values);
            break;

K
kreuzerkrieg 已提交
447
        case AttributeUnderlyingType::utString:
448 449 450 451 452 453 454 455 456 457 458 459 460 461 462
        {
            const auto & null_value_ref = std::get<String>(attribute.null_values);
            auto & string_ref = std::get<ContainerPtrType<StringRef>>(attribute.arrays)[idx];

            if (string_ref.data != null_value_ref.data())
            {
                if (string_ref.data)
                    string_arena->free(const_cast<char *>(string_ref.data), string_ref.size);

                string_ref = StringRef{null_value_ref};
            }

            break;
        }
    }
463 464
}

A
Alexey Milovidov 已提交
465
void CacheDictionary::setAttributeValue(Attribute & attribute, const Key idx, const Field & value) const
466
{
467 468
    switch (attribute.type)
    {
K
kreuzerkrieg 已提交
469
        case AttributeUnderlyingType::utUInt8:
P
proller 已提交
470 471
            std::get<ContainerPtrType<UInt8>>(attribute.arrays)[idx] = value.get<UInt64>();
            break;
K
kreuzerkrieg 已提交
472
        case AttributeUnderlyingType::utUInt16:
P
proller 已提交
473 474
            std::get<ContainerPtrType<UInt16>>(attribute.arrays)[idx] = value.get<UInt64>();
            break;
K
kreuzerkrieg 已提交
475
        case AttributeUnderlyingType::utUInt32:
P
proller 已提交
476 477
            std::get<ContainerPtrType<UInt32>>(attribute.arrays)[idx] = value.get<UInt64>();
            break;
K
kreuzerkrieg 已提交
478
        case AttributeUnderlyingType::utUInt64:
P
proller 已提交
479 480
            std::get<ContainerPtrType<UInt64>>(attribute.arrays)[idx] = value.get<UInt64>();
            break;
K
kreuzerkrieg 已提交
481
        case AttributeUnderlyingType::utUInt128:
P
proller 已提交
482 483
            std::get<ContainerPtrType<UInt128>>(attribute.arrays)[idx] = value.get<UInt128>();
            break;
K
kreuzerkrieg 已提交
484
        case AttributeUnderlyingType::utInt8:
P
proller 已提交
485 486
            std::get<ContainerPtrType<Int8>>(attribute.arrays)[idx] = value.get<Int64>();
            break;
K
kreuzerkrieg 已提交
487
        case AttributeUnderlyingType::utInt16:
P
proller 已提交
488 489
            std::get<ContainerPtrType<Int16>>(attribute.arrays)[idx] = value.get<Int64>();
            break;
K
kreuzerkrieg 已提交
490
        case AttributeUnderlyingType::utInt32:
P
proller 已提交
491 492
            std::get<ContainerPtrType<Int32>>(attribute.arrays)[idx] = value.get<Int64>();
            break;
K
kreuzerkrieg 已提交
493
        case AttributeUnderlyingType::utInt64:
P
proller 已提交
494 495
            std::get<ContainerPtrType<Int64>>(attribute.arrays)[idx] = value.get<Int64>();
            break;
K
kreuzerkrieg 已提交
496
        case AttributeUnderlyingType::utFloat32:
P
proller 已提交
497 498
            std::get<ContainerPtrType<Float32>>(attribute.arrays)[idx] = value.get<Float64>();
            break;
K
kreuzerkrieg 已提交
499
        case AttributeUnderlyingType::utFloat64:
P
proller 已提交
500 501 502
            std::get<ContainerPtrType<Float64>>(attribute.arrays)[idx] = value.get<Float64>();
            break;

K
kreuzerkrieg 已提交
503
        case AttributeUnderlyingType::utDecimal32:
P
proller 已提交
504 505
            std::get<ContainerPtrType<Decimal32>>(attribute.arrays)[idx] = value.get<Decimal32>();
            break;
K
kreuzerkrieg 已提交
506
        case AttributeUnderlyingType::utDecimal64:
P
proller 已提交
507 508
            std::get<ContainerPtrType<Decimal64>>(attribute.arrays)[idx] = value.get<Decimal64>();
            break;
K
kreuzerkrieg 已提交
509
        case AttributeUnderlyingType::utDecimal128:
P
proller 已提交
510 511
            std::get<ContainerPtrType<Decimal128>>(attribute.arrays)[idx] = value.get<Decimal128>();
            break;
512

K
kreuzerkrieg 已提交
513
        case AttributeUnderlyingType::utString:
514 515 516 517 518 519 520 521 522
        {
            const auto & string = value.get<String>();
            auto & string_ref = std::get<ContainerPtrType<StringRef>>(attribute.arrays)[idx];
            const auto & null_value_ref = std::get<String>(attribute.null_values);

            /// free memory unless it points to a null_value
            if (string_ref.data && string_ref.data != null_value_ref.data())
                string_arena->free(const_cast<char *>(string_ref.data), string_ref.size);

523 524
            const auto str_size = string.size();
            if (str_size != 0)
525
            {
526 527 528
                auto string_ptr = string_arena->alloc(str_size + 1);
                std::copy(string.data(), string.data() + str_size + 1, string_ptr);
                string_ref = StringRef{string_ptr, str_size};
529 530 531 532 533 534 535
            }
            else
                string_ref = {};

            break;
        }
    }
536 537
}

A
Alexey Milovidov 已提交
538
CacheDictionary::Attribute & CacheDictionary::getAttribute(const std::string & attribute_name) const
539
{
540 541
    const auto it = attribute_index_by_name.find(attribute_name);
    if (it == std::end(attribute_index_by_name))
542
        throw Exception{name + ": no such attribute '" + attribute_name + "'", ErrorCodes::BAD_ARGUMENTS};
543 544

    return attributes[it->second];
545 546
}

547 548
bool CacheDictionary::isEmptyCell(const UInt64 idx) const
{
P
proller 已提交
549 550
    return (idx != zero_cell_idx && cells[idx].id == 0)
        || (cells[idx].data == ext::safe_bit_cast<CellMetadata::time_point_urep_t>(CellMetadata::time_point_t()));
551 552 553 554
}

PaddedPODArray<CacheDictionary::Key> CacheDictionary::getCachedIds() const
{
555 556
    const ProfilingScopedReadRWLock read_lock{rw_lock, ProfileEvents::DictCacheLockReadNs};

557 558 559
    PaddedPODArray<Key> array;
    for (size_t idx = 0; idx < cells.size(); ++idx)
    {
N
Nikolai Kochetov 已提交
560
        auto & cell = cells[idx];
561
        if (!isEmptyCell(idx) && !cells[idx].isDefault())
562 563 564 565 566 567 568
        {
            array.push_back(cell.id);
        }
    }
    return array;
}

569
BlockInputStreamPtr CacheDictionary::getBlockInputStream(const Names & column_names, size_t max_block_size) const
570
{
571
    using BlockInputStreamType = DictionaryBlockInputStream<CacheDictionary, Key>;
572
    return std::make_shared<BlockInputStreamType>(shared_from_this(), max_block_size, getCachedIds(), column_names);
573 574
}

575 576 577 578 579 580
std::exception_ptr CacheDictionary::getLastException() const
{
    const ProfilingScopedReadRWLock read_lock{rw_lock, ProfileEvents::DictCacheLockReadNs};
    return last_exception;
}

581 582
void registerDictionaryCache(DictionaryFactory & factory)
{
P
proller 已提交
583 584 585 586
    auto create_layout = [=](const std::string & name,
                             const DictionaryStructure & dict_struct,
                             const Poco::Util::AbstractConfiguration & config,
                             const std::string & config_prefix,
P
proller 已提交
587 588
                             DictionarySourcePtr source_ptr) -> DictionaryPtr
    {
589
        if (dict_struct.key)
P
proller 已提交
590
            throw Exception{"'key' is not supported for dictionary of layout 'cache'", ErrorCodes::UNSUPPORTED_METHOD};
591 592

        if (dict_struct.range_min || dict_struct.range_max)
P
proller 已提交
593 594 595 596
            throw Exception{name
                                + ": elements .structure.range_min and .structure.range_max should be defined only "
                                  "for a dictionary of layout 'range_hashed'",
                            ErrorCodes::BAD_ARGUMENTS};
597 598 599
        const auto & layout_prefix = config_prefix + ".layout";
        const auto size = config.getInt(layout_prefix + ".cache.size_in_cells");
        if (size == 0)
P
proller 已提交
600
            throw Exception{name + ": dictionary of layout 'cache' cannot have 0 cells", ErrorCodes::TOO_SMALL_BUFFER_SIZE};
601 602 603

        const bool require_nonempty = config.getBool(config_prefix + ".require_nonempty", false);
        if (require_nonempty)
P
proller 已提交
604 605
            throw Exception{name + ": dictionary of layout 'cache' cannot have 'require_nonempty' attribute set",
                            ErrorCodes::BAD_ARGUMENTS};
606

P
proller 已提交
607
        const DictionaryLifetime dict_lifetime{config, config_prefix + ".lifetime"};
608 609
        return std::make_unique<CacheDictionary>(name, dict_struct, std::move(source_ptr), dict_lifetime, size);
    };
610
    factory.registerLayout("cache", create_layout, false);
611 612
}

613

614
}