CacheDictionary.cpp 22.5 KB
Newer Older
1 2
#include "CacheDictionary.h"

3
#include <functional>
4 5
#include <memory>
#include <Columns/ColumnString.h>
6
#include <Common/BitHelpers.h>
P
proller 已提交
7
#include <Common/CurrentMetrics.h>
8
#include <Common/HashTable/Hash.h>
9
#include <Common/ProfileEvents.h>
P
proller 已提交
10 11
#include <Common/ProfilingScopedRWLock.h>
#include <Common/randomSeed.h>
12
#include <Common/typeid_cast.h>
P
proller 已提交
13 14
#include <ext/range.h>
#include <ext/size.h>
15
#include "CacheDictionary.inc.h"
P
proller 已提交
16 17
#include "DictionaryBlockInputStream.h"
#include "DictionaryFactory.h"
18

19 20
namespace ProfileEvents
{
P
proller 已提交
21 22 23 24 25 26 27 28 29 30
extern const Event DictCacheKeysRequested;
extern const Event DictCacheKeysRequestedMiss;
extern const Event DictCacheKeysRequestedFound;
extern const Event DictCacheKeysExpired;
extern const Event DictCacheKeysNotFound;
extern const Event DictCacheKeysHit;
extern const Event DictCacheRequestTimeNs;
extern const Event DictCacheRequests;
extern const Event DictCacheLockWriteNs;
extern const Event DictCacheLockReadNs;
31 32 33 34
}

namespace CurrentMetrics
{
P
proller 已提交
35
extern const Metric DictCacheRequests;
36 37 38
}


39 40 41 42
namespace DB
{
namespace ErrorCodes
{
43 44 45
    extern const int TYPE_MISMATCH;
    extern const int BAD_ARGUMENTS;
    extern const int UNSUPPORTED_METHOD;
A
Alexey Milovidov 已提交
46
    extern const int LOGICAL_ERROR;
47
    extern const int TOO_SMALL_BUFFER_SIZE;
48 49 50
}


P
proller 已提交
51
inline size_t CacheDictionary::getCellIdx(const Key id) const
52
{
53 54 55
    const auto hash = intHash64(id);
    const auto idx = hash & size_overlap_mask;
    return idx;
56 57 58
}


P
proller 已提交
59
CacheDictionary::CacheDictionary(
60
    const std::string & database_,
K
kreuzerkrieg 已提交
61 62 63
    const std::string & name_,
    const DictionaryStructure & dict_struct_,
    DictionarySourcePtr source_ptr_,
64 65
    const DictionaryLifetime dict_lifetime_,
    const size_t size_)
66 67 68
    : database(database_)
    , name(name_)
    , full_name{database_.empty() ? name_ : (database_ + "." + name_)}
K
kreuzerkrieg 已提交
69 70 71
    , dict_struct(dict_struct_)
    , source_ptr{std::move(source_ptr_)}
    , dict_lifetime(dict_lifetime_)
72
    , log(&Logger::get("ExternalDictionaries"))
K
kreuzerkrieg 已提交
73
    , size{roundUpToPowerOfTwoOrZero(std::max(size_, size_t(max_collision_length)))}
P
proller 已提交
74 75 76
    , size_overlap_mask{this->size - 1}
    , cells{this->size}
    , rnd_engine(randomSeed())
77
{
78
    if (!this->source_ptr->supportsSelectiveLoad())
79
        throw Exception{full_name + ": source cannot be used with CacheDictionary", ErrorCodes::UNSUPPORTED_METHOD};
80

81
    createAttributes();
82 83 84
}


A
Alexey Milovidov 已提交
85
void CacheDictionary::toParent(const PaddedPODArray<Key> & ids, PaddedPODArray<Key> & out) const
86
{
87
    const auto null_value = std::get<UInt64>(hierarchical_attribute->null_values);
88

89
    getItemsNumberImpl<UInt64, UInt64>(*hierarchical_attribute, ids, out, [&](const size_t) { return null_value; });
90 91 92
}


93
/// Allow to use single value in same way as array.
P
proller 已提交
94 95 96 97 98 99 100 101
static inline CacheDictionary::Key getAt(const PaddedPODArray<CacheDictionary::Key> & arr, const size_t idx)
{
    return arr[idx];
}
static inline CacheDictionary::Key getAt(const CacheDictionary::Key & value, const size_t)
{
    return value;
}
102 103 104


template <typename AncestorType>
P
proller 已提交
105
void CacheDictionary::isInImpl(const PaddedPODArray<Key> & child_ids, const AncestorType & ancestor_ids, PaddedPODArray<UInt8> & out) const
106
{
107 108
    /// Transform all children to parents until ancestor id or null_value will be reached.

109
    size_t out_size = out.size();
P
proller 已提交
110
    memset(out.data(), 0xFF, out_size); /// 0xFF means "not calculated"
111 112 113

    const auto null_value = std::get<UInt64>(hierarchical_attribute->null_values);

114
    PaddedPODArray<Key> children(out_size, 0);
115 116 117 118 119 120 121 122
    PaddedPODArray<Key> parents(child_ids.begin(), child_ids.end());

    while (true)
    {
        size_t out_idx = 0;
        size_t parents_idx = 0;
        size_t new_children_idx = 0;

123
        while (out_idx < out_size)
124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141
        {
            /// Already calculated
            if (out[out_idx] != 0xFF)
            {
                ++out_idx;
                continue;
            }

            /// No parent
            if (parents[parents_idx] == null_value)
            {
                out[out_idx] = 0;
            }
            /// Found ancestor
            else if (parents[parents_idx] == getAt(ancestor_ids, parents_idx))
            {
                out[out_idx] = 1;
            }
A
alexey-milovidov 已提交
142
            /// Loop detected
143 144
            else if (children[new_children_idx] == parents[parents_idx])
            {
P
proller 已提交
145
                out[out_idx] = 1;
146
            }
A
alexey-milovidov 已提交
147
            /// Found intermediate parent, add this value to search at next loop iteration
148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166
            else
            {
                children[new_children_idx] = parents[parents_idx];
                ++new_children_idx;
            }

            ++out_idx;
            ++parents_idx;
        }

        if (new_children_idx == 0)
            break;

        /// Transform all children to its parents.
        children.resize(new_children_idx);
        parents.resize(new_children_idx);

        toParent(children, parents);
    }
167 168 169
}

void CacheDictionary::isInVectorVector(
P
proller 已提交
170
    const PaddedPODArray<Key> & child_ids, const PaddedPODArray<Key> & ancestor_ids, PaddedPODArray<UInt8> & out) const
171
{
172
    isInImpl(child_ids, ancestor_ids, out);
173
}
174

P
proller 已提交
175
void CacheDictionary::isInVectorConstant(const PaddedPODArray<Key> & child_ids, const Key ancestor_id, PaddedPODArray<UInt8> & out) const
176
{
177
    isInImpl(child_ids, ancestor_id, out);
178
}
179

P
proller 已提交
180
void CacheDictionary::isInConstantVector(const Key child_id, const PaddedPODArray<Key> & ancestor_ids, PaddedPODArray<UInt8> & out) const
181
{
182
    /// Special case with single child value.
183

184
    const auto null_value = std::get<UInt64>(hierarchical_attribute->null_values);
185

186 187 188
    PaddedPODArray<Key> child(1, child_id);
    PaddedPODArray<Key> parent(1);
    std::vector<Key> ancestors(1, child_id);
189

190 191 192 193
    /// Iteratively find all ancestors for child.
    while (true)
    {
        toParent(child, parent);
194

195 196
        if (parent[0] == null_value)
            break;
197

198 199 200
        child[0] = parent[0];
        ancestors.push_back(parent[0]);
    }
201

202
    /// Assuming short hierarchy, so linear search is Ok.
203
    for (size_t i = 0, out_size = out.size(); i < out_size; ++i)
204
        out[i] = std::find(ancestors.begin(), ancestors.end(), ancestor_ids[i]) != ancestors.end();
205
}
206

A
Alexey Milovidov 已提交
207
void CacheDictionary::getString(const std::string & attribute_name, const PaddedPODArray<Key> & ids, ColumnString * out) const
208
{
209
    auto & attribute = getAttribute(attribute_name);
210
    checkAttributeType(full_name, attribute_name, attribute.type, AttributeUnderlyingType::utString);
211

212
    const auto null_value = StringRef{std::get<String>(attribute.null_values)};
213

P
proller 已提交
214
    getItemsString(attribute, ids, out, [&](const size_t) { return null_value; });
215 216 217
}

void CacheDictionary::getString(
P
proller 已提交
218
    const std::string & attribute_name, const PaddedPODArray<Key> & ids, const ColumnString * const def, ColumnString * const out) const
219
{
220
    auto & attribute = getAttribute(attribute_name);
221
    checkAttributeType(full_name, attribute_name, attribute.type, AttributeUnderlyingType::utString);
222

P
proller 已提交
223
    getItemsString(attribute, ids, out, [&](const size_t row) { return def->getDataAt(row); });
224 225 226
}

void CacheDictionary::getString(
P
proller 已提交
227
    const std::string & attribute_name, const PaddedPODArray<Key> & ids, const String & def, ColumnString * const out) const
228
{
229
    auto & attribute = getAttribute(attribute_name);
230
    checkAttributeType(full_name, attribute_name, attribute.type, AttributeUnderlyingType::utString);
231

P
proller 已提交
232
    getItemsString(attribute, ids, out, [&](const size_t) { return StringRef{def}; });
233 234 235
}


236
/// returns cell_idx (always valid for replacing), 'cell is valid' flag, 'cell is outdated' flag
237 238 239 240 241 242 243 244
/// true  false   found and valid
/// false true    not found (something outdated, maybe our cell)
/// false false   not found (other id stored with valid data)
/// true  true    impossible
///
/// todo: split this func to two: find_for_get and find_for_set
CacheDictionary::FindResult CacheDictionary::findCellIdx(const Key & id, const CellMetadata::time_point_t now) const
{
245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273
    auto pos = getCellIdx(id);
    auto oldest_id = pos;
    auto oldest_time = CellMetadata::time_point_t::max();
    const auto stop = pos + max_collision_length;
    for (; pos < stop; ++pos)
    {
        const auto cell_idx = pos & size_overlap_mask;
        const auto & cell = cells[cell_idx];

        if (cell.id != id)
        {
            /// maybe we already found nearest expired cell (try minimize collision_length on insert)
            if (oldest_time > now && oldest_time > cell.expiresAt())
            {
                oldest_time = cell.expiresAt();
                oldest_id = cell_idx;
            }
            continue;
        }

        if (cell.expiresAt() < now)
        {
            return {cell_idx, false, true};
        }

        return {cell_idx, true, false};
    }

    return {oldest_id, false, false};
274 275
}

A
Alexey Milovidov 已提交
276
void CacheDictionary::has(const PaddedPODArray<Key> & ids, PaddedPODArray<UInt8> & out) const
277
{
278
    /// Mapping: <id> -> { all indices `i` of `ids` such that `ids[i]` = <id> }
279
    std::unordered_map<Key, std::vector<size_t>> outdated_ids;
280

281
    size_t cache_expired = 0, cache_not_found = 0, cache_hit = 0;
N
mvc  
Nikita Mikhaylov 已提交
282

283 284 285 286 287 288 289 290 291 292 293 294 295
    const auto rows = ext::size(ids);
    {
        const ProfilingScopedReadRWLock read_lock{rw_lock, ProfileEvents::DictCacheLockReadNs};

        const auto now = std::chrono::system_clock::now();
        /// fetch up-to-date values, decide which ones require update
        for (const auto row : ext::range(0, rows))
        {
            const auto id = ids[row];
            const auto find_result = findCellIdx(id, now);
            const auto & cell_idx = find_result.cell_idx;
            if (!find_result.valid)
            {
296
                outdated_ids[id].push_back(row);
297
                if (find_result.outdated)
298
                    ++cache_expired;
299
                else
300
                    ++cache_not_found;
301 302 303 304
            }
            else
            {
                ++cache_hit;
305 306
                const auto & cell = cells[cell_idx];
                out[row] = !cell.isDefault();
307 308 309 310
            }
        }
    }

311 312
    ProfileEvents::increment(ProfileEvents::DictCacheKeysExpired, cache_expired);
    ProfileEvents::increment(ProfileEvents::DictCacheKeysNotFound, cache_not_found);
313 314 315
    ProfileEvents::increment(ProfileEvents::DictCacheKeysHit, cache_hit);

    query_count.fetch_add(rows, std::memory_order_relaxed);
316
    hit_count.fetch_add(rows - outdated_ids.size(), std::memory_order_release);
N
Nikita Mikhaylov 已提交
317

318 319
    if (outdated_ids.empty())
        return;
N
Nikita Mikhaylov 已提交
320

321 322
    std::vector<Key> required_ids(outdated_ids.size());
    std::transform(std::begin(outdated_ids), std::end(outdated_ids), std::begin(required_ids), [](auto & pair) { return pair.first; });
N
mvc  
Nikita Mikhaylov 已提交
323

324 325 326 327 328 329 330 331 332 333 334 335 336
    /// request new values
    update(
        required_ids,
        [&](const auto id, const auto)
        {
            for (const auto row : outdated_ids[id])
                out[row] = true;
        },
        [&](const auto id, const auto)
        {
            for (const auto row : outdated_ids[id])
                out[row] = false;
        });
337 338 339 340 341
}


void CacheDictionary::createAttributes()
{
342 343
    const auto attributes_size = dict_struct.attributes.size();
    attributes.reserve(attributes_size);
344 345

    bytes_allocated += size * sizeof(CellMetadata);
346
    bytes_allocated += attributes_size * sizeof(attributes.front());
347 348 349 350 351 352 353 354

    for (const auto & attribute : dict_struct.attributes)
    {
        attribute_index_by_name.emplace(attribute.name, attributes.size());
        attributes.push_back(createAttributeWithType(attribute.underlying_type, attribute.null_value));

        if (attribute.hierarchical)
        {
P
proller 已提交
355
            hierarchical_attribute = &attributes.back();
356

K
kreuzerkrieg 已提交
357
            if (hierarchical_attribute->type != AttributeUnderlyingType::utUInt64)
358
                throw Exception{full_name + ": hierarchical attribute must be UInt64.", ErrorCodes::TYPE_MISMATCH};
359 360
        }
    }
361 362
}

A
Alexey Milovidov 已提交
363
CacheDictionary::Attribute CacheDictionary::createAttributeWithType(const AttributeUnderlyingType type, const Field & null_value)
364
{
A
Alexey Milovidov 已提交
365
    Attribute attr{type, {}, {}};
366 367 368

    switch (type)
    {
P
proller 已提交
369
#define DISPATCH(TYPE) \
K
kreuzerkrieg 已提交
370
    case AttributeUnderlyingType::ut##TYPE: \
P
proller 已提交
371
        attr.null_values = TYPE(null_value.get<NearestFieldType<TYPE>>()); \
P
proller 已提交
372 373
        attr.arrays = std::make_unique<ContainerType<TYPE>>(size); \
        bytes_allocated += size * sizeof(TYPE); \
P
proller 已提交
374
        break;
A
Amos Bird 已提交
375 376 377 378 379 380 381 382 383 384 385 386 387 388 389
        DISPATCH(UInt8)
        DISPATCH(UInt16)
        DISPATCH(UInt32)
        DISPATCH(UInt64)
        DISPATCH(UInt128)
        DISPATCH(Int8)
        DISPATCH(Int16)
        DISPATCH(Int32)
        DISPATCH(Int64)
        DISPATCH(Decimal32)
        DISPATCH(Decimal64)
        DISPATCH(Decimal128)
        DISPATCH(Float32)
        DISPATCH(Float64)
#undef DISPATCH
K
kreuzerkrieg 已提交
390
        case AttributeUnderlyingType::utString:
A
Alexey Milovidov 已提交
391 392
            attr.null_values = null_value.get<String>();
            attr.arrays = std::make_unique<ContainerType<StringRef>>(size);
393 394 395 396 397 398 399
            bytes_allocated += size * sizeof(StringRef);
            if (!string_arena)
                string_arena = std::make_unique<ArenaWithFreeLists>();
            break;
    }

    return attr;
400 401
}

A
Alexey Milovidov 已提交
402
void CacheDictionary::setDefaultAttributeValue(Attribute & attribute, const Key idx) const
403
{
404 405
    switch (attribute.type)
    {
K
kreuzerkrieg 已提交
406
        case AttributeUnderlyingType::utUInt8:
P
proller 已提交
407 408
            std::get<ContainerPtrType<UInt8>>(attribute.arrays)[idx] = std::get<UInt8>(attribute.null_values);
            break;
K
kreuzerkrieg 已提交
409
        case AttributeUnderlyingType::utUInt16:
P
proller 已提交
410 411
            std::get<ContainerPtrType<UInt16>>(attribute.arrays)[idx] = std::get<UInt16>(attribute.null_values);
            break;
K
kreuzerkrieg 已提交
412
        case AttributeUnderlyingType::utUInt32:
P
proller 已提交
413 414
            std::get<ContainerPtrType<UInt32>>(attribute.arrays)[idx] = std::get<UInt32>(attribute.null_values);
            break;
K
kreuzerkrieg 已提交
415
        case AttributeUnderlyingType::utUInt64:
P
proller 已提交
416 417
            std::get<ContainerPtrType<UInt64>>(attribute.arrays)[idx] = std::get<UInt64>(attribute.null_values);
            break;
K
kreuzerkrieg 已提交
418
        case AttributeUnderlyingType::utUInt128:
P
proller 已提交
419 420
            std::get<ContainerPtrType<UInt128>>(attribute.arrays)[idx] = std::get<UInt128>(attribute.null_values);
            break;
K
kreuzerkrieg 已提交
421
        case AttributeUnderlyingType::utInt8:
P
proller 已提交
422 423
            std::get<ContainerPtrType<Int8>>(attribute.arrays)[idx] = std::get<Int8>(attribute.null_values);
            break;
K
kreuzerkrieg 已提交
424
        case AttributeUnderlyingType::utInt16:
P
proller 已提交
425 426
            std::get<ContainerPtrType<Int16>>(attribute.arrays)[idx] = std::get<Int16>(attribute.null_values);
            break;
K
kreuzerkrieg 已提交
427
        case AttributeUnderlyingType::utInt32:
P
proller 已提交
428 429
            std::get<ContainerPtrType<Int32>>(attribute.arrays)[idx] = std::get<Int32>(attribute.null_values);
            break;
K
kreuzerkrieg 已提交
430
        case AttributeUnderlyingType::utInt64:
P
proller 已提交
431 432
            std::get<ContainerPtrType<Int64>>(attribute.arrays)[idx] = std::get<Int64>(attribute.null_values);
            break;
K
kreuzerkrieg 已提交
433
        case AttributeUnderlyingType::utFloat32:
P
proller 已提交
434 435
            std::get<ContainerPtrType<Float32>>(attribute.arrays)[idx] = std::get<Float32>(attribute.null_values);
            break;
K
kreuzerkrieg 已提交
436
        case AttributeUnderlyingType::utFloat64:
P
proller 已提交
437 438
            std::get<ContainerPtrType<Float64>>(attribute.arrays)[idx] = std::get<Float64>(attribute.null_values);
            break;
439

K
kreuzerkrieg 已提交
440
        case AttributeUnderlyingType::utDecimal32:
441 442
            std::get<ContainerPtrType<Decimal32>>(attribute.arrays)[idx] = std::get<Decimal32>(attribute.null_values);
            break;
K
kreuzerkrieg 已提交
443
        case AttributeUnderlyingType::utDecimal64:
444 445
            std::get<ContainerPtrType<Decimal64>>(attribute.arrays)[idx] = std::get<Decimal64>(attribute.null_values);
            break;
K
kreuzerkrieg 已提交
446
        case AttributeUnderlyingType::utDecimal128:
447 448 449
            std::get<ContainerPtrType<Decimal128>>(attribute.arrays)[idx] = std::get<Decimal128>(attribute.null_values);
            break;

K
kreuzerkrieg 已提交
450
        case AttributeUnderlyingType::utString:
451 452 453 454 455 456 457 458 459 460 461 462 463 464 465
        {
            const auto & null_value_ref = std::get<String>(attribute.null_values);
            auto & string_ref = std::get<ContainerPtrType<StringRef>>(attribute.arrays)[idx];

            if (string_ref.data != null_value_ref.data())
            {
                if (string_ref.data)
                    string_arena->free(const_cast<char *>(string_ref.data), string_ref.size);

                string_ref = StringRef{null_value_ref};
            }

            break;
        }
    }
466 467
}

A
Alexey Milovidov 已提交
468
void CacheDictionary::setAttributeValue(Attribute & attribute, const Key idx, const Field & value) const
469
{
470 471
    switch (attribute.type)
    {
K
kreuzerkrieg 已提交
472
        case AttributeUnderlyingType::utUInt8:
P
proller 已提交
473 474
            std::get<ContainerPtrType<UInt8>>(attribute.arrays)[idx] = value.get<UInt64>();
            break;
K
kreuzerkrieg 已提交
475
        case AttributeUnderlyingType::utUInt16:
P
proller 已提交
476 477
            std::get<ContainerPtrType<UInt16>>(attribute.arrays)[idx] = value.get<UInt64>();
            break;
K
kreuzerkrieg 已提交
478
        case AttributeUnderlyingType::utUInt32:
P
proller 已提交
479 480
            std::get<ContainerPtrType<UInt32>>(attribute.arrays)[idx] = value.get<UInt64>();
            break;
K
kreuzerkrieg 已提交
481
        case AttributeUnderlyingType::utUInt64:
P
proller 已提交
482 483
            std::get<ContainerPtrType<UInt64>>(attribute.arrays)[idx] = value.get<UInt64>();
            break;
K
kreuzerkrieg 已提交
484
        case AttributeUnderlyingType::utUInt128:
P
proller 已提交
485 486
            std::get<ContainerPtrType<UInt128>>(attribute.arrays)[idx] = value.get<UInt128>();
            break;
K
kreuzerkrieg 已提交
487
        case AttributeUnderlyingType::utInt8:
P
proller 已提交
488 489
            std::get<ContainerPtrType<Int8>>(attribute.arrays)[idx] = value.get<Int64>();
            break;
K
kreuzerkrieg 已提交
490
        case AttributeUnderlyingType::utInt16:
P
proller 已提交
491 492
            std::get<ContainerPtrType<Int16>>(attribute.arrays)[idx] = value.get<Int64>();
            break;
K
kreuzerkrieg 已提交
493
        case AttributeUnderlyingType::utInt32:
P
proller 已提交
494 495
            std::get<ContainerPtrType<Int32>>(attribute.arrays)[idx] = value.get<Int64>();
            break;
K
kreuzerkrieg 已提交
496
        case AttributeUnderlyingType::utInt64:
P
proller 已提交
497 498
            std::get<ContainerPtrType<Int64>>(attribute.arrays)[idx] = value.get<Int64>();
            break;
K
kreuzerkrieg 已提交
499
        case AttributeUnderlyingType::utFloat32:
P
proller 已提交
500 501
            std::get<ContainerPtrType<Float32>>(attribute.arrays)[idx] = value.get<Float64>();
            break;
K
kreuzerkrieg 已提交
502
        case AttributeUnderlyingType::utFloat64:
P
proller 已提交
503 504 505
            std::get<ContainerPtrType<Float64>>(attribute.arrays)[idx] = value.get<Float64>();
            break;

K
kreuzerkrieg 已提交
506
        case AttributeUnderlyingType::utDecimal32:
P
proller 已提交
507 508
            std::get<ContainerPtrType<Decimal32>>(attribute.arrays)[idx] = value.get<Decimal32>();
            break;
K
kreuzerkrieg 已提交
509
        case AttributeUnderlyingType::utDecimal64:
P
proller 已提交
510 511
            std::get<ContainerPtrType<Decimal64>>(attribute.arrays)[idx] = value.get<Decimal64>();
            break;
K
kreuzerkrieg 已提交
512
        case AttributeUnderlyingType::utDecimal128:
P
proller 已提交
513 514
            std::get<ContainerPtrType<Decimal128>>(attribute.arrays)[idx] = value.get<Decimal128>();
            break;
515

K
kreuzerkrieg 已提交
516
        case AttributeUnderlyingType::utString:
517 518 519 520 521 522 523 524 525
        {
            const auto & string = value.get<String>();
            auto & string_ref = std::get<ContainerPtrType<StringRef>>(attribute.arrays)[idx];
            const auto & null_value_ref = std::get<String>(attribute.null_values);

            /// free memory unless it points to a null_value
            if (string_ref.data && string_ref.data != null_value_ref.data())
                string_arena->free(const_cast<char *>(string_ref.data), string_ref.size);

526 527
            const auto str_size = string.size();
            if (str_size != 0)
528
            {
529 530 531
                auto string_ptr = string_arena->alloc(str_size + 1);
                std::copy(string.data(), string.data() + str_size + 1, string_ptr);
                string_ref = StringRef{string_ptr, str_size};
532 533 534 535 536 537 538
            }
            else
                string_ref = {};

            break;
        }
    }
539 540
}

A
Alexey Milovidov 已提交
541
CacheDictionary::Attribute & CacheDictionary::getAttribute(const std::string & attribute_name) const
542
{
543 544
    const auto it = attribute_index_by_name.find(attribute_name);
    if (it == std::end(attribute_index_by_name))
545
        throw Exception{full_name + ": no such attribute '" + attribute_name + "'", ErrorCodes::BAD_ARGUMENTS};
546 547

    return attributes[it->second];
548 549
}

550 551
bool CacheDictionary::isEmptyCell(const UInt64 idx) const
{
P
proller 已提交
552 553
    return (idx != zero_cell_idx && cells[idx].id == 0)
        || (cells[idx].data == ext::safe_bit_cast<CellMetadata::time_point_urep_t>(CellMetadata::time_point_t()));
554 555 556 557
}

PaddedPODArray<CacheDictionary::Key> CacheDictionary::getCachedIds() const
{
558 559
    const ProfilingScopedReadRWLock read_lock{rw_lock, ProfileEvents::DictCacheLockReadNs};

560 561 562
    PaddedPODArray<Key> array;
    for (size_t idx = 0; idx < cells.size(); ++idx)
    {
N
Nikolai Kochetov 已提交
563
        auto & cell = cells[idx];
564
        if (!isEmptyCell(idx) && !cells[idx].isDefault())
565 566 567 568 569 570 571
        {
            array.push_back(cell.id);
        }
    }
    return array;
}

572
BlockInputStreamPtr CacheDictionary::getBlockInputStream(const Names & column_names, size_t max_block_size) const
573
{
574
    using BlockInputStreamType = DictionaryBlockInputStream<CacheDictionary, Key>;
575
    return std::make_shared<BlockInputStreamType>(shared_from_this(), max_block_size, getCachedIds(), column_names);
576 577
}

578 579 580 581 582 583
std::exception_ptr CacheDictionary::getLastException() const
{
    const ProfilingScopedReadRWLock read_lock{rw_lock, ProfileEvents::DictCacheLockReadNs};
    return last_exception;
}

584 585
void registerDictionaryCache(DictionaryFactory & factory)
{
586
    auto create_layout = [=](const std::string & full_name,
P
proller 已提交
587 588 589
                             const DictionaryStructure & dict_struct,
                             const Poco::Util::AbstractConfiguration & config,
                             const std::string & config_prefix,
P
proller 已提交
590 591
                             DictionarySourcePtr source_ptr) -> DictionaryPtr
    {
592
        if (dict_struct.key)
593
            throw Exception{"'key' is not supported for dictionary of layout 'cache'", ErrorCodes::UNSUPPORTED_METHOD};
594 595

        if (dict_struct.range_min || dict_struct.range_max)
596
            throw Exception{full_name
P
proller 已提交
597 598 599
                                + ": elements .structure.range_min and .structure.range_max should be defined only "
                                  "for a dictionary of layout 'range_hashed'",
                            ErrorCodes::BAD_ARGUMENTS};
600
        const auto & layout_prefix = config_prefix + ".layout";
601
        const auto size = config.getInt(layout_prefix + ".cache.size_in_cells");
602
        if (size == 0)
603
            throw Exception{full_name + ": dictionary of layout 'cache' cannot have 0 cells", ErrorCodes::TOO_SMALL_BUFFER_SIZE};
604 605 606

        const bool require_nonempty = config.getBool(config_prefix + ".require_nonempty", false);
        if (require_nonempty)
607
            throw Exception{full_name + ": dictionary of layout 'cache' cannot have 'require_nonempty' attribute set",
P
proller 已提交
608
                            ErrorCodes::BAD_ARGUMENTS};
609

610 611
        const String database = config.getString(config_prefix + ".database", "");
        const String name = config.getString(config_prefix + ".name");
P
proller 已提交
612
        const DictionaryLifetime dict_lifetime{config, config_prefix + ".lifetime"};
613
        return std::make_unique<CacheDictionary>(database, name, dict_struct, std::move(source_ptr), dict_lifetime, size);
614
    };
615
    factory.registerLayout("cache", create_layout, false);
616 617
}

N
Nikita Mikhaylov 已提交
618

619
}