CacheDictionary.cpp 22.5 KB
Newer Older
1 2
#include "CacheDictionary.h"

3
#include <functional>
4
#include <memory>
P
proller 已提交
5
#include <sstream>
6
#include <Columns/ColumnString.h>
P
proller 已提交
7
#include <Columns/ColumnsNumber.h>
8
#include <Common/BitHelpers.h>
P
proller 已提交
9
#include <Common/CurrentMetrics.h>
10
#include <Common/HashTable/Hash.h>
11
#include <Common/ProfileEvents.h>
P
proller 已提交
12 13 14
#include <Common/ProfilingScopedRWLock.h>
#include <Common/Stopwatch.h>
#include <Common/randomSeed.h>
15
#include <Common/typeid_cast.h>
16
#include <ext/map.h>
P
proller 已提交
17 18
#include <ext/range.h>
#include <ext/size.h>
19
#include "CacheDictionary.inc.h"
P
proller 已提交
20 21
#include "DictionaryBlockInputStream.h"
#include "DictionaryFactory.h"
22

23 24
namespace ProfileEvents
{
P
proller 已提交
25 26 27 28 29 30 31 32 33 34
extern const Event DictCacheKeysRequested;
extern const Event DictCacheKeysRequestedMiss;
extern const Event DictCacheKeysRequestedFound;
extern const Event DictCacheKeysExpired;
extern const Event DictCacheKeysNotFound;
extern const Event DictCacheKeysHit;
extern const Event DictCacheRequestTimeNs;
extern const Event DictCacheRequests;
extern const Event DictCacheLockWriteNs;
extern const Event DictCacheLockReadNs;
35 36 37 38
}

namespace CurrentMetrics
{
P
proller 已提交
39
extern const Metric DictCacheRequests;
40 41 42
}


43 44 45 46
namespace DB
{
namespace ErrorCodes
{
47 48 49
    extern const int TYPE_MISMATCH;
    extern const int BAD_ARGUMENTS;
    extern const int UNSUPPORTED_METHOD;
A
Alexey Milovidov 已提交
50
    extern const int LOGICAL_ERROR;
51
    extern const int TOO_SMALL_BUFFER_SIZE;
52 53 54
}


P
proller 已提交
55
inline size_t CacheDictionary::getCellIdx(const Key id) const
56
{
57 58 59
    const auto hash = intHash64(id);
    const auto idx = hash & size_overlap_mask;
    return idx;
60 61 62
}


P
proller 已提交
63
CacheDictionary::CacheDictionary(
K
kreuzerkrieg 已提交
64
    const std::string & name_,
65
    const std::unordered_set<std::string> & allowed_databases_,
K
kreuzerkrieg 已提交
66 67 68 69 70
    const DictionaryStructure & dict_struct_,
    DictionarySourcePtr source_ptr_,
    const DictionaryLifetime dict_lifetime_,
    const size_t size_)
    : name{name_}
71
    , allowed_databases{allowed_databases_}
K
kreuzerkrieg 已提交
72 73 74
    , dict_struct(dict_struct_)
    , source_ptr{std::move(source_ptr_)}
    , dict_lifetime(dict_lifetime_)
75
    , log(&Logger::get("ExternalDictionaries"))
K
kreuzerkrieg 已提交
76
    , size{roundUpToPowerOfTwoOrZero(std::max(size_, size_t(max_collision_length)))}
P
proller 已提交
77 78 79
    , size_overlap_mask{this->size - 1}
    , cells{this->size}
    , rnd_engine(randomSeed())
80
{
81
    if (!this->source_ptr->supportsSelectiveLoad())
82
        throw Exception{name + ": source cannot be used with CacheDictionary", ErrorCodes::UNSUPPORTED_METHOD};
83

84
    createAttributes();
85 86 87
}


A
Alexey Milovidov 已提交
88
void CacheDictionary::toParent(const PaddedPODArray<Key> & ids, PaddedPODArray<Key> & out) const
89
{
90
    const auto null_value = std::get<UInt64>(hierarchical_attribute->null_values);
91

92
    getItemsNumberImpl<UInt64, UInt64>(*hierarchical_attribute, ids, out, [&](const size_t) { return null_value; });
93 94 95
}


96
/// Allow to use single value in same way as array.
P
proller 已提交
97 98 99 100 101 102 103 104
static inline CacheDictionary::Key getAt(const PaddedPODArray<CacheDictionary::Key> & arr, const size_t idx)
{
    return arr[idx];
}
static inline CacheDictionary::Key getAt(const CacheDictionary::Key & value, const size_t)
{
    return value;
}
105 106 107


template <typename AncestorType>
P
proller 已提交
108
void CacheDictionary::isInImpl(const PaddedPODArray<Key> & child_ids, const AncestorType & ancestor_ids, PaddedPODArray<UInt8> & out) const
109
{
110 111
    /// Transform all children to parents until ancestor id or null_value will be reached.

112
    size_t out_size = out.size();
P
proller 已提交
113
    memset(out.data(), 0xFF, out_size); /// 0xFF means "not calculated"
114 115 116

    const auto null_value = std::get<UInt64>(hierarchical_attribute->null_values);

117
    PaddedPODArray<Key> children(out_size, 0);
118 119 120 121 122 123 124 125
    PaddedPODArray<Key> parents(child_ids.begin(), child_ids.end());

    while (true)
    {
        size_t out_idx = 0;
        size_t parents_idx = 0;
        size_t new_children_idx = 0;

126
        while (out_idx < out_size)
127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144
        {
            /// Already calculated
            if (out[out_idx] != 0xFF)
            {
                ++out_idx;
                continue;
            }

            /// No parent
            if (parents[parents_idx] == null_value)
            {
                out[out_idx] = 0;
            }
            /// Found ancestor
            else if (parents[parents_idx] == getAt(ancestor_ids, parents_idx))
            {
                out[out_idx] = 1;
            }
A
alexey-milovidov 已提交
145
            /// Loop detected
146 147
            else if (children[new_children_idx] == parents[parents_idx])
            {
P
proller 已提交
148
                out[out_idx] = 1;
149
            }
A
alexey-milovidov 已提交
150
            /// Found intermediate parent, add this value to search at next loop iteration
151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169
            else
            {
                children[new_children_idx] = parents[parents_idx];
                ++new_children_idx;
            }

            ++out_idx;
            ++parents_idx;
        }

        if (new_children_idx == 0)
            break;

        /// Transform all children to its parents.
        children.resize(new_children_idx);
        parents.resize(new_children_idx);

        toParent(children, parents);
    }
170 171 172
}

void CacheDictionary::isInVectorVector(
P
proller 已提交
173
    const PaddedPODArray<Key> & child_ids, const PaddedPODArray<Key> & ancestor_ids, PaddedPODArray<UInt8> & out) const
174
{
175
    isInImpl(child_ids, ancestor_ids, out);
176
}
177

P
proller 已提交
178
void CacheDictionary::isInVectorConstant(const PaddedPODArray<Key> & child_ids, const Key ancestor_id, PaddedPODArray<UInt8> & out) const
179
{
180
    isInImpl(child_ids, ancestor_id, out);
181
}
182

P
proller 已提交
183
void CacheDictionary::isInConstantVector(const Key child_id, const PaddedPODArray<Key> & ancestor_ids, PaddedPODArray<UInt8> & out) const
184
{
185
    /// Special case with single child value.
186

187
    const auto null_value = std::get<UInt64>(hierarchical_attribute->null_values);
188

189 190 191
    PaddedPODArray<Key> child(1, child_id);
    PaddedPODArray<Key> parent(1);
    std::vector<Key> ancestors(1, child_id);
192

193 194 195 196
    /// Iteratively find all ancestors for child.
    while (true)
    {
        toParent(child, parent);
197

198 199
        if (parent[0] == null_value)
            break;
200

201 202 203
        child[0] = parent[0];
        ancestors.push_back(parent[0]);
    }
204

205
    /// Assuming short hierarchy, so linear search is Ok.
206
    for (size_t i = 0, out_size = out.size(); i < out_size; ++i)
207
        out[i] = std::find(ancestors.begin(), ancestors.end(), ancestor_ids[i]) != ancestors.end();
208
}
209

A
Alexey Milovidov 已提交
210
void CacheDictionary::getString(const std::string & attribute_name, const PaddedPODArray<Key> & ids, ColumnString * out) const
211
{
212
    auto & attribute = getAttribute(attribute_name);
K
kreuzerkrieg 已提交
213
    checkAttributeType(name, attribute_name, attribute.type, AttributeUnderlyingType::utString);
214

215
    const auto null_value = StringRef{std::get<String>(attribute.null_values)};
216

P
proller 已提交
217
    getItemsString(attribute, ids, out, [&](const size_t) { return null_value; });
218 219 220
}

void CacheDictionary::getString(
P
proller 已提交
221
    const std::string & attribute_name, const PaddedPODArray<Key> & ids, const ColumnString * const def, ColumnString * const out) const
222
{
223
    auto & attribute = getAttribute(attribute_name);
K
kreuzerkrieg 已提交
224
    checkAttributeType(name, attribute_name, attribute.type, AttributeUnderlyingType::utString);
225

P
proller 已提交
226
    getItemsString(attribute, ids, out, [&](const size_t row) { return def->getDataAt(row); });
227 228 229
}

void CacheDictionary::getString(
P
proller 已提交
230
    const std::string & attribute_name, const PaddedPODArray<Key> & ids, const String & def, ColumnString * const out) const
231
{
232
    auto & attribute = getAttribute(attribute_name);
K
kreuzerkrieg 已提交
233
    checkAttributeType(name, attribute_name, attribute.type, AttributeUnderlyingType::utString);
234

P
proller 已提交
235
    getItemsString(attribute, ids, out, [&](const size_t) { return StringRef{def}; });
236 237 238
}


239
/// returns cell_idx (always valid for replacing), 'cell is valid' flag, 'cell is outdated' flag
240 241 242 243 244 245 246 247
/// true  false   found and valid
/// false true    not found (something outdated, maybe our cell)
/// false false   not found (other id stored with valid data)
/// true  true    impossible
///
/// todo: split this func to two: find_for_get and find_for_set
CacheDictionary::FindResult CacheDictionary::findCellIdx(const Key & id, const CellMetadata::time_point_t now) const
{
248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276
    auto pos = getCellIdx(id);
    auto oldest_id = pos;
    auto oldest_time = CellMetadata::time_point_t::max();
    const auto stop = pos + max_collision_length;
    for (; pos < stop; ++pos)
    {
        const auto cell_idx = pos & size_overlap_mask;
        const auto & cell = cells[cell_idx];

        if (cell.id != id)
        {
            /// maybe we already found nearest expired cell (try minimize collision_length on insert)
            if (oldest_time > now && oldest_time > cell.expiresAt())
            {
                oldest_time = cell.expiresAt();
                oldest_id = cell_idx;
            }
            continue;
        }

        if (cell.expiresAt() < now)
        {
            return {cell_idx, false, true};
        }

        return {cell_idx, true, false};
    }

    return {oldest_id, false, false};
277 278
}

A
Alexey Milovidov 已提交
279
void CacheDictionary::has(const PaddedPODArray<Key> & ids, PaddedPODArray<UInt8> & out) const
280
{
281
    /// Mapping: <id> -> { all indices `i` of `ids` such that `ids[i]` = <id> }
282
    std::unordered_map<Key, std::vector<size_t>> outdated_ids;
283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324

    size_t cache_expired = 0, cache_not_found = 0, cache_hit = 0;

    const auto rows = ext::size(ids);
    {
        const ProfilingScopedReadRWLock read_lock{rw_lock, ProfileEvents::DictCacheLockReadNs};

        const auto now = std::chrono::system_clock::now();
        /// fetch up-to-date values, decide which ones require update
        for (const auto row : ext::range(0, rows))
        {
            const auto id = ids[row];
            const auto find_result = findCellIdx(id, now);
            const auto & cell_idx = find_result.cell_idx;
            if (!find_result.valid)
            {
                outdated_ids[id].push_back(row);
                if (find_result.outdated)
                    ++cache_expired;
                else
                    ++cache_not_found;
            }
            else
            {
                ++cache_hit;
                const auto & cell = cells[cell_idx];
                out[row] = !cell.isDefault();
            }
        }
    }

    ProfileEvents::increment(ProfileEvents::DictCacheKeysExpired, cache_expired);
    ProfileEvents::increment(ProfileEvents::DictCacheKeysNotFound, cache_not_found);
    ProfileEvents::increment(ProfileEvents::DictCacheKeysHit, cache_hit);

    query_count.fetch_add(rows, std::memory_order_relaxed);
    hit_count.fetch_add(rows - outdated_ids.size(), std::memory_order_release);

    if (outdated_ids.empty())
        return;

    std::vector<Key> required_ids(outdated_ids.size());
P
proller 已提交
325
    std::transform(std::begin(outdated_ids), std::end(outdated_ids), std::begin(required_ids), [](auto & pair) { return pair.first; });
326 327

    /// request new values
P
proller 已提交
328 329 330 331 332 333 334 335 336 337 338 339
    update(
        required_ids,
        [&](const auto id, const auto)
        {
            for (const auto row : outdated_ids[id])
                out[row] = true;
        },
        [&](const auto id, const auto)
        {
            for (const auto row : outdated_ids[id])
                out[row] = false;
        });
340 341 342 343 344
}


void CacheDictionary::createAttributes()
{
345 346
    const auto attributes_size = dict_struct.attributes.size();
    attributes.reserve(attributes_size);
347 348

    bytes_allocated += size * sizeof(CellMetadata);
349
    bytes_allocated += attributes_size * sizeof(attributes.front());
350 351 352 353 354 355 356 357

    for (const auto & attribute : dict_struct.attributes)
    {
        attribute_index_by_name.emplace(attribute.name, attributes.size());
        attributes.push_back(createAttributeWithType(attribute.underlying_type, attribute.null_value));

        if (attribute.hierarchical)
        {
P
proller 已提交
358
            hierarchical_attribute = &attributes.back();
359

K
kreuzerkrieg 已提交
360
            if (hierarchical_attribute->type != AttributeUnderlyingType::utUInt64)
361
                throw Exception{name + ": hierarchical attribute must be UInt64.", ErrorCodes::TYPE_MISMATCH};
362 363
        }
    }
364 365
}

A
Alexey Milovidov 已提交
366
CacheDictionary::Attribute CacheDictionary::createAttributeWithType(const AttributeUnderlyingType type, const Field & null_value)
367
{
A
Alexey Milovidov 已提交
368
    Attribute attr{type, {}, {}};
369 370 371

    switch (type)
    {
P
proller 已提交
372
#define DISPATCH(TYPE) \
K
kreuzerkrieg 已提交
373
    case AttributeUnderlyingType::ut##TYPE: \
P
proller 已提交
374
        attr.null_values = TYPE(null_value.get<NearestFieldType<TYPE>>()); \
P
proller 已提交
375 376
        attr.arrays = std::make_unique<ContainerType<TYPE>>(size); \
        bytes_allocated += size * sizeof(TYPE); \
P
proller 已提交
377
        break;
A
Amos Bird 已提交
378 379 380 381 382 383 384 385 386 387 388 389 390 391 392
        DISPATCH(UInt8)
        DISPATCH(UInt16)
        DISPATCH(UInt32)
        DISPATCH(UInt64)
        DISPATCH(UInt128)
        DISPATCH(Int8)
        DISPATCH(Int16)
        DISPATCH(Int32)
        DISPATCH(Int64)
        DISPATCH(Decimal32)
        DISPATCH(Decimal64)
        DISPATCH(Decimal128)
        DISPATCH(Float32)
        DISPATCH(Float64)
#undef DISPATCH
K
kreuzerkrieg 已提交
393
        case AttributeUnderlyingType::utString:
A
Alexey Milovidov 已提交
394 395
            attr.null_values = null_value.get<String>();
            attr.arrays = std::make_unique<ContainerType<StringRef>>(size);
396 397 398 399 400 401 402
            bytes_allocated += size * sizeof(StringRef);
            if (!string_arena)
                string_arena = std::make_unique<ArenaWithFreeLists>();
            break;
    }

    return attr;
403 404
}

A
Alexey Milovidov 已提交
405
void CacheDictionary::setDefaultAttributeValue(Attribute & attribute, const Key idx) const
406
{
407 408
    switch (attribute.type)
    {
K
kreuzerkrieg 已提交
409
        case AttributeUnderlyingType::utUInt8:
P
proller 已提交
410 411
            std::get<ContainerPtrType<UInt8>>(attribute.arrays)[idx] = std::get<UInt8>(attribute.null_values);
            break;
K
kreuzerkrieg 已提交
412
        case AttributeUnderlyingType::utUInt16:
P
proller 已提交
413 414
            std::get<ContainerPtrType<UInt16>>(attribute.arrays)[idx] = std::get<UInt16>(attribute.null_values);
            break;
K
kreuzerkrieg 已提交
415
        case AttributeUnderlyingType::utUInt32:
P
proller 已提交
416 417
            std::get<ContainerPtrType<UInt32>>(attribute.arrays)[idx] = std::get<UInt32>(attribute.null_values);
            break;
K
kreuzerkrieg 已提交
418
        case AttributeUnderlyingType::utUInt64:
P
proller 已提交
419 420
            std::get<ContainerPtrType<UInt64>>(attribute.arrays)[idx] = std::get<UInt64>(attribute.null_values);
            break;
K
kreuzerkrieg 已提交
421
        case AttributeUnderlyingType::utUInt128:
P
proller 已提交
422 423
            std::get<ContainerPtrType<UInt128>>(attribute.arrays)[idx] = std::get<UInt128>(attribute.null_values);
            break;
K
kreuzerkrieg 已提交
424
        case AttributeUnderlyingType::utInt8:
P
proller 已提交
425 426
            std::get<ContainerPtrType<Int8>>(attribute.arrays)[idx] = std::get<Int8>(attribute.null_values);
            break;
K
kreuzerkrieg 已提交
427
        case AttributeUnderlyingType::utInt16:
P
proller 已提交
428 429
            std::get<ContainerPtrType<Int16>>(attribute.arrays)[idx] = std::get<Int16>(attribute.null_values);
            break;
K
kreuzerkrieg 已提交
430
        case AttributeUnderlyingType::utInt32:
P
proller 已提交
431 432
            std::get<ContainerPtrType<Int32>>(attribute.arrays)[idx] = std::get<Int32>(attribute.null_values);
            break;
K
kreuzerkrieg 已提交
433
        case AttributeUnderlyingType::utInt64:
P
proller 已提交
434 435
            std::get<ContainerPtrType<Int64>>(attribute.arrays)[idx] = std::get<Int64>(attribute.null_values);
            break;
K
kreuzerkrieg 已提交
436
        case AttributeUnderlyingType::utFloat32:
P
proller 已提交
437 438
            std::get<ContainerPtrType<Float32>>(attribute.arrays)[idx] = std::get<Float32>(attribute.null_values);
            break;
K
kreuzerkrieg 已提交
439
        case AttributeUnderlyingType::utFloat64:
P
proller 已提交
440 441
            std::get<ContainerPtrType<Float64>>(attribute.arrays)[idx] = std::get<Float64>(attribute.null_values);
            break;
442

K
kreuzerkrieg 已提交
443
        case AttributeUnderlyingType::utDecimal32:
444 445
            std::get<ContainerPtrType<Decimal32>>(attribute.arrays)[idx] = std::get<Decimal32>(attribute.null_values);
            break;
K
kreuzerkrieg 已提交
446
        case AttributeUnderlyingType::utDecimal64:
447 448
            std::get<ContainerPtrType<Decimal64>>(attribute.arrays)[idx] = std::get<Decimal64>(attribute.null_values);
            break;
K
kreuzerkrieg 已提交
449
        case AttributeUnderlyingType::utDecimal128:
450 451 452
            std::get<ContainerPtrType<Decimal128>>(attribute.arrays)[idx] = std::get<Decimal128>(attribute.null_values);
            break;

K
kreuzerkrieg 已提交
453
        case AttributeUnderlyingType::utString:
454 455 456 457 458 459 460 461 462 463 464 465 466 467 468
        {
            const auto & null_value_ref = std::get<String>(attribute.null_values);
            auto & string_ref = std::get<ContainerPtrType<StringRef>>(attribute.arrays)[idx];

            if (string_ref.data != null_value_ref.data())
            {
                if (string_ref.data)
                    string_arena->free(const_cast<char *>(string_ref.data), string_ref.size);

                string_ref = StringRef{null_value_ref};
            }

            break;
        }
    }
469 470
}

A
Alexey Milovidov 已提交
471
void CacheDictionary::setAttributeValue(Attribute & attribute, const Key idx, const Field & value) const
472
{
473 474
    switch (attribute.type)
    {
K
kreuzerkrieg 已提交
475
        case AttributeUnderlyingType::utUInt8:
P
proller 已提交
476 477
            std::get<ContainerPtrType<UInt8>>(attribute.arrays)[idx] = value.get<UInt64>();
            break;
K
kreuzerkrieg 已提交
478
        case AttributeUnderlyingType::utUInt16:
P
proller 已提交
479 480
            std::get<ContainerPtrType<UInt16>>(attribute.arrays)[idx] = value.get<UInt64>();
            break;
K
kreuzerkrieg 已提交
481
        case AttributeUnderlyingType::utUInt32:
P
proller 已提交
482 483
            std::get<ContainerPtrType<UInt32>>(attribute.arrays)[idx] = value.get<UInt64>();
            break;
K
kreuzerkrieg 已提交
484
        case AttributeUnderlyingType::utUInt64:
P
proller 已提交
485 486
            std::get<ContainerPtrType<UInt64>>(attribute.arrays)[idx] = value.get<UInt64>();
            break;
K
kreuzerkrieg 已提交
487
        case AttributeUnderlyingType::utUInt128:
P
proller 已提交
488 489
            std::get<ContainerPtrType<UInt128>>(attribute.arrays)[idx] = value.get<UInt128>();
            break;
K
kreuzerkrieg 已提交
490
        case AttributeUnderlyingType::utInt8:
P
proller 已提交
491 492
            std::get<ContainerPtrType<Int8>>(attribute.arrays)[idx] = value.get<Int64>();
            break;
K
kreuzerkrieg 已提交
493
        case AttributeUnderlyingType::utInt16:
P
proller 已提交
494 495
            std::get<ContainerPtrType<Int16>>(attribute.arrays)[idx] = value.get<Int64>();
            break;
K
kreuzerkrieg 已提交
496
        case AttributeUnderlyingType::utInt32:
P
proller 已提交
497 498
            std::get<ContainerPtrType<Int32>>(attribute.arrays)[idx] = value.get<Int64>();
            break;
K
kreuzerkrieg 已提交
499
        case AttributeUnderlyingType::utInt64:
P
proller 已提交
500 501
            std::get<ContainerPtrType<Int64>>(attribute.arrays)[idx] = value.get<Int64>();
            break;
K
kreuzerkrieg 已提交
502
        case AttributeUnderlyingType::utFloat32:
P
proller 已提交
503 504
            std::get<ContainerPtrType<Float32>>(attribute.arrays)[idx] = value.get<Float64>();
            break;
K
kreuzerkrieg 已提交
505
        case AttributeUnderlyingType::utFloat64:
P
proller 已提交
506 507 508
            std::get<ContainerPtrType<Float64>>(attribute.arrays)[idx] = value.get<Float64>();
            break;

K
kreuzerkrieg 已提交
509
        case AttributeUnderlyingType::utDecimal32:
P
proller 已提交
510 511
            std::get<ContainerPtrType<Decimal32>>(attribute.arrays)[idx] = value.get<Decimal32>();
            break;
K
kreuzerkrieg 已提交
512
        case AttributeUnderlyingType::utDecimal64:
P
proller 已提交
513 514
            std::get<ContainerPtrType<Decimal64>>(attribute.arrays)[idx] = value.get<Decimal64>();
            break;
K
kreuzerkrieg 已提交
515
        case AttributeUnderlyingType::utDecimal128:
P
proller 已提交
516 517
            std::get<ContainerPtrType<Decimal128>>(attribute.arrays)[idx] = value.get<Decimal128>();
            break;
518

K
kreuzerkrieg 已提交
519
        case AttributeUnderlyingType::utString:
520 521 522 523 524 525 526 527 528
        {
            const auto & string = value.get<String>();
            auto & string_ref = std::get<ContainerPtrType<StringRef>>(attribute.arrays)[idx];
            const auto & null_value_ref = std::get<String>(attribute.null_values);

            /// free memory unless it points to a null_value
            if (string_ref.data && string_ref.data != null_value_ref.data())
                string_arena->free(const_cast<char *>(string_ref.data), string_ref.size);

529 530
            const auto str_size = string.size();
            if (str_size != 0)
531
            {
532 533 534
                auto string_ptr = string_arena->alloc(str_size + 1);
                std::copy(string.data(), string.data() + str_size + 1, string_ptr);
                string_ref = StringRef{string_ptr, str_size};
535 536 537 538 539 540 541
            }
            else
                string_ref = {};

            break;
        }
    }
542 543
}

A
Alexey Milovidov 已提交
544
CacheDictionary::Attribute & CacheDictionary::getAttribute(const std::string & attribute_name) const
545
{
546 547
    const auto it = attribute_index_by_name.find(attribute_name);
    if (it == std::end(attribute_index_by_name))
548
        throw Exception{name + ": no such attribute '" + attribute_name + "'", ErrorCodes::BAD_ARGUMENTS};
549 550

    return attributes[it->second];
551 552
}

553 554
bool CacheDictionary::isEmptyCell(const UInt64 idx) const
{
P
proller 已提交
555 556
    return (idx != zero_cell_idx && cells[idx].id == 0)
        || (cells[idx].data == ext::safe_bit_cast<CellMetadata::time_point_urep_t>(CellMetadata::time_point_t()));
557 558 559 560
}

PaddedPODArray<CacheDictionary::Key> CacheDictionary::getCachedIds() const
{
561 562
    const ProfilingScopedReadRWLock read_lock{rw_lock, ProfileEvents::DictCacheLockReadNs};

563 564 565
    PaddedPODArray<Key> array;
    for (size_t idx = 0; idx < cells.size(); ++idx)
    {
N
Nikolai Kochetov 已提交
566
        auto & cell = cells[idx];
567
        if (!isEmptyCell(idx) && !cells[idx].isDefault())
568 569 570 571 572 573 574
        {
            array.push_back(cell.id);
        }
    }
    return array;
}

575
BlockInputStreamPtr CacheDictionary::getBlockInputStream(const Names & column_names, size_t max_block_size) const
576
{
577
    using BlockInputStreamType = DictionaryBlockInputStream<CacheDictionary, Key>;
578
    return std::make_shared<BlockInputStreamType>(shared_from_this(), max_block_size, getCachedIds(), column_names);
579 580
}

581 582 583 584 585 586
std::exception_ptr CacheDictionary::getLastException() const
{
    const ProfilingScopedReadRWLock read_lock{rw_lock, ProfileEvents::DictCacheLockReadNs};
    return last_exception;
}

587 588
void registerDictionaryCache(DictionaryFactory & factory)
{
P
proller 已提交
589
    auto create_layout = [=](const std::string & name,
590
                             const std::unordered_set<std::string> & allowed_databases,
P
proller 已提交
591 592 593
                             const DictionaryStructure & dict_struct,
                             const Poco::Util::AbstractConfiguration & config,
                             const std::string & config_prefix,
P
proller 已提交
594 595
                             DictionarySourcePtr source_ptr) -> DictionaryPtr
    {
596
        if (dict_struct.key)
P
proller 已提交
597
            throw Exception{"'key' is not supported for dictionary of layout 'cache'", ErrorCodes::UNSUPPORTED_METHOD};
598 599

        if (dict_struct.range_min || dict_struct.range_max)
P
proller 已提交
600 601 602 603
            throw Exception{name
                                + ": elements .structure.range_min and .structure.range_max should be defined only "
                                  "for a dictionary of layout 'range_hashed'",
                            ErrorCodes::BAD_ARGUMENTS};
604 605 606
        const auto & layout_prefix = config_prefix + ".layout";
        const auto size = config.getInt(layout_prefix + ".cache.size_in_cells");
        if (size == 0)
P
proller 已提交
607
            throw Exception{name + ": dictionary of layout 'cache' cannot have 0 cells", ErrorCodes::TOO_SMALL_BUFFER_SIZE};
608 609 610

        const bool require_nonempty = config.getBool(config_prefix + ".require_nonempty", false);
        if (require_nonempty)
P
proller 已提交
611 612
            throw Exception{name + ": dictionary of layout 'cache' cannot have 'require_nonempty' attribute set",
                            ErrorCodes::BAD_ARGUMENTS};
613

P
proller 已提交
614
        const DictionaryLifetime dict_lifetime{config, config_prefix + ".lifetime"};
615
        return std::make_unique<CacheDictionary>(name, allowed_databases, dict_struct, std::move(source_ptr), dict_lifetime, size);
616 617 618 619
    };
    factory.registerLayout("cache", create_layout);
}

620

621
}