ExecExprVisitor.cpp 76.5 KB
Newer Older
F
FluorineDog 已提交
1 2 3 4 5 6 7 8 9 10 11
// Copyright (C) 2019-2020 Zilliz. All rights reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except in compliance
// with the License. You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software distributed under the License
// is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express
// or implied. See the License for the specific language governing permissions and limitations under the License

Y
yah01 已提交
12 13 14
#include "query/generated/ExecExprVisitor.h"

#include <boost/variant.hpp>
15
#include <boost/utility/binary.hpp>
16 17
#include <cmath>
#include <cstdint>
18
#include <ctime>
19
#include <deque>
N
neza2017 已提交
20
#include <optional>
21 22 23
#include <string>
#include <string_view>
#include <type_traits>
24
#include <unordered_set>
25 26
#include <utility>

27
#include "arrow/type_fwd.h"
28 29 30 31
#include "common/Json.h"
#include "common/Types.h"
#include "exceptions/EasyAssert.h"
#include "pb/plan.pb.h"
G
GuoRentong 已提交
32
#include "query/ExprImpl.h"
33
#include "query/Relational.h"
Y
yah01 已提交
34 35
#include "query/Utils.h"
#include "segcore/SegmentGrowingImpl.h"
36
#include "simdjson/error.h"
37
#include "query/PlanProto.h"
N
neza2017 已提交
38 39 40 41 42 43
namespace milvus::query {
// THIS CONTAINS EXTRA BODY FOR VISITOR
// WILL BE USED BY GENERATOR
namespace impl {
class ExecExprVisitor : ExprVisitor {
 public:
Y
yah01 已提交
44 45 46
    ExecExprVisitor(const segcore::SegmentInternalInterface& segment,
                    int64_t row_count,
                    Timestamp timestamp)
47
        : segment_(segment), row_count_(row_count), timestamp_(timestamp) {
N
neza2017 已提交
48
    }
49 50

    BitsetType
N
neza2017 已提交
51
    call_child(Expr& expr) {
Y
yah01 已提交
52 53
        AssertInfo(!bitset_opt_.has_value(),
                   "[ExecExprVisitor]Bitset already has value before accept");
N
neza2017 已提交
54
        expr.accept(*this);
Y
yah01 已提交
55 56
        AssertInfo(bitset_opt_.has_value(),
                   "[ExecExprVisitor]Bitset doesn't have value after accept");
57 58
        auto res = std::move(bitset_opt_);
        bitset_opt_ = std::nullopt;
59
        return std::move(res.value());
N
neza2017 已提交
60 61
    }

G
GuoRentong 已提交
62
 public:
F
FluorineDog 已提交
63
    template <typename T, typename IndexFunc, typename ElementFunc>
G
GuoRentong 已提交
64
    auto
Y
yah01 已提交
65 66 67
    ExecRangeVisitorImpl(FieldId field_id,
                         IndexFunc func,
                         ElementFunc element_func) -> BitsetType;
G
GuoRentong 已提交
68 69 70

    template <typename T>
    auto
71
    ExecUnaryRangeVisitorDispatcher(UnaryRangeExpr& expr_raw) -> BitsetType;
72

73 74
    template <typename T>
    auto
Y
yah01 已提交
75 76
    ExecBinaryArithOpEvalRangeVisitorDispatcher(
        BinaryArithOpEvalRangeExpr& expr_raw) -> BitsetType;
77

78 79
    template <typename T>
    auto
80
    ExecBinaryRangeVisitorDispatcher(BinaryRangeExpr& expr_raw) -> BitsetType;
G
GuoRentong 已提交
81

S
sunby 已提交
82 83
    template <typename T>
    auto
84
    ExecTermVisitorImpl(TermExpr& expr_raw) -> BitsetType;
S
sunby 已提交
85

86 87 88 89
    template <typename T>
    auto
    ExecTermVisitorImplTemplate(TermExpr& expr_raw) -> BitsetType;

90 91
    template <typename CmpFunc>
    auto
Y
yah01 已提交
92 93
    ExecCompareExprDispatcher(CompareExpr& expr, CmpFunc cmp_func)
        -> BitsetType;
94

N
neza2017 已提交
95
 private:
96 97
    const segcore::SegmentInternalInterface& segment_;
    int64_t row_count_;
98
    Timestamp timestamp_;
99
    BitsetTypeOpt bitset_opt_;
N
neza2017 已提交
100 101 102 103
};
}  // namespace impl

void
F
FluorineDog 已提交
104 105
ExecExprVisitor::visit(LogicalUnaryExpr& expr) {
    using OpType = LogicalUnaryExpr::OpType;
106
    auto child_res = call_child(*expr.child_);
107
    BitsetType res = std::move(child_res);
108 109 110 111 112 113 114
    switch (expr.op_type_) {
        case OpType::LogicalNot: {
            res.flip();
            break;
        }
        default: {
            PanicInfo("Invalid Unary Op");
F
FluorineDog 已提交
115 116
        }
    }
Y
yah01 已提交
117 118
    AssertInfo(res.size() == row_count_,
               "[ExecExprVisitor]Size of results not equal row count");
119
    bitset_opt_ = std::move(res);
N
neza2017 已提交
120 121 122
}

void
F
FluorineDog 已提交
123 124
ExecExprVisitor::visit(LogicalBinaryExpr& expr) {
    using OpType = LogicalBinaryExpr::OpType;
F
FluorineDog 已提交
125 126
    auto left = call_child(*expr.left_);
    auto right = call_child(*expr.right_);
Y
yah01 已提交
127 128
    AssertInfo(left.size() == right.size(),
               "[ExecExprVisitor]Left size not equal to right size");
129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150
    auto res = std::move(left);
    switch (expr.op_type_) {
        case OpType::LogicalAnd: {
            res &= right;
            break;
        }
        case OpType::LogicalOr: {
            res |= right;
            break;
        }
        case OpType::LogicalXor: {
            res ^= right;
            break;
        }
        case OpType::LogicalMinus: {
            res -= right;
            break;
        }
        default: {
            PanicInfo("Invalid Binary Op");
        }
    }
Y
yah01 已提交
151 152
    AssertInfo(res.size() == row_count_,
               "[ExecExprVisitor]Size of results not equal row count");
153
    bitset_opt_ = std::move(res);
154
}
F
FluorineDog 已提交
155

156
static auto
157 158
Assemble(const std::deque<BitsetType>& srcs) -> BitsetType {
    BitsetType res;
159

160 161 162 163
    if (srcs.size() == 1) {
        return srcs[0];
    }

164 165 166 167 168 169 170 171 172 173
    int64_t total_size = 0;
    for (auto& chunk : srcs) {
        total_size += chunk.size();
    }
    res.resize(total_size);

    int64_t counter = 0;
    for (auto& chunk : srcs) {
        for (int64_t i = 0; i < chunk.size(); ++i) {
            res[counter + i] = chunk[i];
F
FluorineDog 已提交
174
        }
175
        counter += chunk.size();
F
FluorineDog 已提交
176
    }
177
    return res;
N
neza2017 已提交
178 179
}

180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242
void
AppendOneChunk(BitsetType& result, const FixedVector<bool>& chunk_res) {
    // Append a value once instead of BITSET_BLOCK_BIT_SIZE times.
    auto AppendBlock = [&result](const bool* ptr, int n) {
        for (int i = 0; i < n; ++i) {
            BitSetBlockType val = 0;
            // This can use CPU SIMD optimzation
            uint8_t vals[BITSET_BLOCK_SIZE] = {0};
            for (size_t j = 0; j < 8; ++j) {
                for (size_t k = 0; k < BITSET_BLOCK_SIZE; ++k) {
                    vals[k] |= uint8_t(*(ptr + k * 8 + j)) << j;
                }
            }
            for (size_t j = 0; j < BITSET_BLOCK_SIZE; ++j) {
                val |= BitSetBlockType(vals[j]) << (8 * j);
            }
            result.append(val);
            ptr += BITSET_BLOCK_SIZE * 8;
        }
    };
    // Append bit for these bits that can not be union as a block
    // Usually n less than BITSET_BLOCK_BIT_SIZE.
    auto AppendBit = [&result](const bool* ptr, int n) {
        for (int i = 0; i < n; ++i) {
            bool bit = *ptr++;
            result.push_back(bit);
        }
    };

    size_t res_len = result.size();
    size_t chunk_len = chunk_res.size();
    const bool* chunk_ptr = chunk_res.data();

    int n_prefix =
        res_len % BITSET_BLOCK_BIT_SIZE == 0
            ? 0
            : std::min(BITSET_BLOCK_BIT_SIZE - res_len % BITSET_BLOCK_BIT_SIZE,
                       chunk_len);

    AppendBit(chunk_ptr, n_prefix);

    if (n_prefix == chunk_len)
        return;

    size_t n_block = (chunk_len - n_prefix) / BITSET_BLOCK_BIT_SIZE;
    size_t n_suffix = (chunk_len - n_prefix) % BITSET_BLOCK_BIT_SIZE;

    AppendBlock(chunk_ptr + n_prefix, n_block);

    AppendBit(chunk_ptr + n_prefix + n_block * BITSET_BLOCK_BIT_SIZE, n_suffix);

    return;
}

BitsetType
AssembleChunk(const std::vector<FixedVector<bool>>& results) {
    BitsetType assemble_result;
    for (auto& result : results) {
        AppendOneChunk(assemble_result, result);
    }
    return assemble_result;
}

F
FluorineDog 已提交
243
template <typename T, typename IndexFunc, typename ElementFunc>
G
GuoRentong 已提交
244
auto
Y
yah01 已提交
245 246 247
ExecExprVisitor::ExecRangeVisitorImpl(FieldId field_id,
                                      IndexFunc index_func,
                                      ElementFunc element_func) -> BitsetType {
G
GuoRentong 已提交
248
    auto& schema = segment_.get_schema();
249 250
    auto& field_meta = schema[field_id];
    auto indexing_barrier = segment_.num_chunk_index(field_id);
B
BossZou 已提交
251 252
    auto size_per_chunk = segment_.size_per_chunk();
    auto num_chunk = upper_div(row_count_, size_per_chunk);
253
    std::vector<FixedVector<bool>> results;
254 255
    results.reserve(num_chunk);

Y
yah01 已提交
256 257 258
    typedef std::
        conditional_t<std::is_same_v<T, std::string_view>, std::string, T>
            IndexInnerType;
Y
yah01 已提交
259
    using Index = index::ScalarIndex<IndexInnerType>;
F
FluorineDog 已提交
260
    for (auto chunk_id = 0; chunk_id < indexing_barrier; ++chunk_id) {
Y
yah01 已提交
261 262
        const Index& indexing =
            segment_.chunk_scalar_index<IndexInnerType>(field_id, chunk_id);
263 264 265
        // NOTE: knowhere is not const-ready
        // This is a dirty workaround
        auto data = index_func(const_cast<Index*>(&indexing));
266
        AssertInfo(data.size() == size_per_chunk,
Y
yah01 已提交
267
                   "[ExecExprVisitor]Data size not equal to size_per_chunk");
268
        results.emplace_back(std::move(data));
F
FluorineDog 已提交
269
    }
270
    for (auto chunk_id = indexing_barrier; chunk_id < num_chunk; ++chunk_id) {
Y
yah01 已提交
271 272 273
        auto this_size = chunk_id == num_chunk - 1
                             ? row_count_ - chunk_id * size_per_chunk
                             : size_per_chunk;
274
        FixedVector<bool> chunk_res(this_size);
275
        auto chunk = segment_.chunk_data<T>(field_id, chunk_id);
G
GuoRentong 已提交
276
        const T* data = chunk.data();
277
        // Can use CPU SIMD optimazation to speed up
278
        for (int index = 0; index < this_size; ++index) {
279
            chunk_res[index] = element_func(data[index]);
G
GuoRentong 已提交
280
        }
281
        results.emplace_back(std::move(chunk_res));
G
GuoRentong 已提交
282
    }
283
    auto final_result = AssembleChunk(results);
Y
yah01 已提交
284 285
    AssertInfo(final_result.size() == row_count_,
               "[ExecExprVisitor]Final result size not equal to row count");
286
    return final_result;
G
GuoRentong 已提交
287
}
288

289
template <typename T, typename IndexFunc, typename ElementFunc>
290
auto
Y
yah01 已提交
291 292 293
ExecExprVisitor::ExecDataRangeVisitorImpl(FieldId field_id,
                                          IndexFunc index_func,
                                          ElementFunc element_func)
294
    -> BitsetType {
295
    auto& schema = segment_.get_schema();
296
    auto& field_meta = schema[field_id];
297 298
    auto size_per_chunk = segment_.size_per_chunk();
    auto num_chunk = upper_div(row_count_, size_per_chunk);
299 300 301 302
    auto indexing_barrier = segment_.num_chunk_index(field_id);
    auto data_barrier = segment_.num_chunk_data(field_id);
    AssertInfo(std::max(data_barrier, indexing_barrier) == num_chunk,
               "max(data_barrier, index_barrier) not equal to num_chunk");
303
    std::vector<FixedVector<bool>> results;
304
    results.reserve(num_chunk);
305

306 307 308 309 310
    // for growing segment, indexing_barrier will always less than data_barrier
    // so growing segment will always execute expr plan using raw data
    // if sealed segment has loaded raw data on this field, then index_barrier = 0 and data_barrier = 1
    // in this case, sealed segment execute expr plan using raw data
    for (auto chunk_id = 0; chunk_id < data_barrier; ++chunk_id) {
Y
yah01 已提交
311 312 313
        auto this_size = chunk_id == num_chunk - 1
                             ? row_count_ - chunk_id * size_per_chunk
                             : size_per_chunk;
314
        FixedVector<bool> result(this_size);
315
        auto chunk = segment_.chunk_data<T>(field_id, chunk_id);
316 317 318 319
        const T* data = chunk.data();
        for (int index = 0; index < this_size; ++index) {
            result[index] = element_func(data[index]);
        }
320 321 322
        AssertInfo(result.size() == this_size,
                   "[ExecExprVisitor]Chunk result size not equal to "
                   "expected size");
323 324
        results.emplace_back(std::move(result));
    }
325 326 327

    // if sealed segment has loaded scalar index for this field, then index_barrier = 1 and data_barrier = 0
    // in this case, sealed segment execute expr plan using scalar index
Y
yah01 已提交
328 329 330
    typedef std::
        conditional_t<std::is_same_v<T, std::string_view>, std::string, T>
            IndexInnerType;
Y
yah01 已提交
331
    using Index = index::ScalarIndex<IndexInnerType>;
Y
yah01 已提交
332 333 334 335
    for (auto chunk_id = data_barrier; chunk_id < indexing_barrier;
         ++chunk_id) {
        auto& indexing =
            segment_.chunk_scalar_index<IndexInnerType>(field_id, chunk_id);
336
        auto this_size = const_cast<Index*>(&indexing)->Count();
337
        FixedVector<bool> result(this_size);
338 339 340 341 342 343
        for (int offset = 0; offset < this_size; ++offset) {
            result[offset] = index_func(const_cast<Index*>(&indexing), offset);
        }
        results.emplace_back(std::move(result));
    }

344
    auto final_result = AssembleChunk(results);
Y
yah01 已提交
345 346
    AssertInfo(final_result.size() == row_count_,
               "[ExecExprVisitor]Final result size not equal to row count");
347 348 349
    return final_result;
}

G
GuoRentong 已提交
350 351 352 353
#pragma clang diagnostic push
#pragma ide diagnostic ignored "Simplify"
template <typename T>
auto
Y
yah01 已提交
354 355 356 357 358
ExecExprVisitor::ExecUnaryRangeVisitorDispatcher(UnaryRangeExpr& expr_raw)
    -> BitsetType {
    typedef std::
        conditional_t<std::is_same_v<T, std::string_view>, std::string, T>
            IndexInnerType;
Y
yah01 已提交
359 360 361
    using Index = index::ScalarIndex<IndexInnerType>;
    auto& expr = static_cast<UnaryRangeExprImpl<IndexInnerType>&>(expr_raw);

362
    auto op = expr.op_type_;
Y
yah01 已提交
363
    auto val = IndexInnerType(expr.value_);
364
    auto field_id = expr.column_.field_id;
365 366
    switch (op) {
        case OpType::Equal: {
367
            auto index_func = [&](Index* index) { return index->In(1, &val); };
368
            auto elem_func = [&](MayConstRef<T> x) { return (x == val); };
369
            return ExecRangeVisitorImpl<T>(field_id, index_func, elem_func);
370 371
        }
        case OpType::NotEqual: {
372
            auto index_func = [&](Index* index) {
Y
yah01 已提交
373 374
                return index->NotIn(1, &val);
            };
375
            auto elem_func = [&](MayConstRef<T> x) { return (x != val); };
376
            return ExecRangeVisitorImpl<T>(field_id, index_func, elem_func);
377 378
        }
        case OpType::GreaterEqual: {
379
            auto index_func = [&](Index* index) {
Y
yah01 已提交
380 381
                return index->Range(val, OpType::GreaterEqual);
            };
382
            auto elem_func = [&](MayConstRef<T> x) { return (x >= val); };
383
            return ExecRangeVisitorImpl<T>(field_id, index_func, elem_func);
G
GuoRentong 已提交
384
        }
385
        case OpType::GreaterThan: {
386
            auto index_func = [&](Index* index) {
Y
yah01 已提交
387 388
                return index->Range(val, OpType::GreaterThan);
            };
389
            auto elem_func = [&](MayConstRef<T> x) { return (x > val); };
390
            return ExecRangeVisitorImpl<T>(field_id, index_func, elem_func);
391 392
        }
        case OpType::LessEqual: {
393
            auto index_func = [&](Index* index) {
Y
yah01 已提交
394 395
                return index->Range(val, OpType::LessEqual);
            };
396
            auto elem_func = [&](MayConstRef<T> x) { return (x <= val); };
397
            return ExecRangeVisitorImpl<T>(field_id, index_func, elem_func);
398 399
        }
        case OpType::LessThan: {
400
            auto index_func = [&](Index* index) {
Y
yah01 已提交
401 402
                return index->Range(val, OpType::LessThan);
            };
403
            auto elem_func = [&](MayConstRef<T> x) { return (x < val); };
404
            return ExecRangeVisitorImpl<T>(field_id, index_func, elem_func);
405 406
        }
        case OpType::PrefixMatch: {
407
            auto index_func = [&](Index* index) {
P
presburger 已提交
408
                auto dataset = std::make_unique<Dataset>();
409 410
                dataset->Set(milvus::index::OPERATOR_TYPE, OpType::PrefixMatch);
                dataset->Set(milvus::index::PREFIX_VALUE, val);
411 412
                return index->Query(std::move(dataset));
            };
413 414 415
            auto elem_func = [&](MayConstRef<T> x) {
                return Match(x, val, op);
            };
416
            return ExecRangeVisitorImpl<T>(field_id, index_func, elem_func);
417 418
        }
        // TODO: PostfixMatch
419
        default: {
G
GuoRentong 已提交
420 421
            PanicInfo("unsupported range node");
        }
422 423 424 425
    }
}
#pragma clang diagnostic pop

426 427 428 429 430 431 432 433 434
template <typename ExprValueType>
auto
ExecExprVisitor::ExecUnaryRangeVisitorDispatcherJson(UnaryRangeExpr& expr_raw)
    -> BitsetType {
    using Index = index::ScalarIndex<milvus::Json>;
    auto& expr = static_cast<UnaryRangeExprImpl<ExprValueType>&>(expr_raw);

    auto op = expr.op_type_;
    auto val = expr.value_;
435
    auto pointer = milvus::Json::pointer(expr.column_.nested_path);
436
    auto field_id = expr.column_.field_id;
437
    auto index_func = [=](Index* index) { return TargetBitmap{}; };
438 439 440 441 442
    using GetType =
        std::conditional_t<std::is_same_v<ExprValueType, std::string>,
                           std::string_view,
                           ExprValueType>;

443 444 445 446 447 448 449 450 451 452 453
#define UnaryRangeJSONCompare(cmp)                            \
    do {                                                      \
        auto x = json.template at<GetType>(pointer);          \
        if (x.error()) {                                      \
            if constexpr (std::is_same_v<GetType, int64_t>) { \
                auto x = json.template at<double>(pointer);   \
                return !x.error() && (cmp);                   \
            }                                                 \
            return false;                                     \
        }                                                     \
        return (cmp);                                         \
454 455
    } while (false)

456 457 458 459 460 461 462 463 464 465 466
#define UnaryRangeJSONCompareNotEqual(cmp)                    \
    do {                                                      \
        auto x = json.template at<GetType>(pointer);          \
        if (x.error()) {                                      \
            if constexpr (std::is_same_v<GetType, int64_t>) { \
                auto x = json.template at<double>(pointer);   \
                return x.error() || (cmp);                    \
            }                                                 \
            return true;                                      \
        }                                                     \
        return (cmp);                                         \
467 468
    } while (false)

469 470
    switch (op) {
        case OpType::Equal: {
471
            auto elem_func = [&](const milvus::Json& json) {
472
                UnaryRangeJSONCompare(x.value() == val);
473 474 475 476 477
            };
            return ExecRangeVisitorImpl<milvus::Json>(
                field_id, index_func, elem_func);
        }
        case OpType::NotEqual: {
478
            auto elem_func = [&](const milvus::Json& json) {
479
                UnaryRangeJSONCompareNotEqual(x.value() != val);
480 481 482 483 484
            };
            return ExecRangeVisitorImpl<milvus::Json>(
                field_id, index_func, elem_func);
        }
        case OpType::GreaterEqual: {
485
            auto elem_func = [&](const milvus::Json& json) {
486
                UnaryRangeJSONCompare(x.value() >= val);
487 488 489 490 491
            };
            return ExecRangeVisitorImpl<milvus::Json>(
                field_id, index_func, elem_func);
        }
        case OpType::GreaterThan: {
492
            auto elem_func = [&](const milvus::Json& json) {
493
                UnaryRangeJSONCompare(x.value() > val);
494 495 496 497 498
            };
            return ExecRangeVisitorImpl<milvus::Json>(
                field_id, index_func, elem_func);
        }
        case OpType::LessEqual: {
499
            auto elem_func = [&](const milvus::Json& json) {
500
                UnaryRangeJSONCompare(x.value() <= val);
501 502 503 504 505
            };
            return ExecRangeVisitorImpl<milvus::Json>(
                field_id, index_func, elem_func);
        }
        case OpType::LessThan: {
506
            auto elem_func = [&](const milvus::Json& json) {
507
                UnaryRangeJSONCompare(x.value() < val);
508 509 510 511 512
            };
            return ExecRangeVisitorImpl<milvus::Json>(
                field_id, index_func, elem_func);
        }
        case OpType::PrefixMatch: {
513
            auto elem_func = [&](const milvus::Json& json) {
514
                UnaryRangeJSONCompare(Match(ExprValueType(x.value()), val, op));
515 516 517 518 519 520 521 522 523 524 525
            };
            return ExecRangeVisitorImpl<milvus::Json>(
                field_id, index_func, elem_func);
        }
        // TODO: PostfixMatch
        default: {
            PanicInfo("unsupported range node");
        }
    }
}

526 527 528 529
#pragma clang diagnostic push
#pragma ide diagnostic ignored "Simplify"
template <typename T>
auto
Y
yah01 已提交
530 531
ExecExprVisitor::ExecBinaryArithOpEvalRangeVisitorDispatcher(
    BinaryArithOpEvalRangeExpr& expr_raw) -> BitsetType {
532
    auto& expr = static_cast<BinaryArithOpEvalRangeExprImpl<T>&>(expr_raw);
533
    using Index = index::ScalarIndex<T>;
534 535 536 537
    auto arith_op = expr.arith_op_;
    auto right_operand = expr.right_operand_;
    auto op = expr.op_type_;
    auto val = expr.value_;
538
    auto& nested_path = expr.column_.nested_path;
539 540 541 542 543

    switch (op) {
        case OpType::Equal: {
            switch (arith_op) {
                case ArithOpType::Add: {
Y
yah01 已提交
544 545
                    auto index_func = [val, right_operand](Index* index,
                                                           size_t offset) {
546 547 548
                        auto x = index->Reverse_Lookup(offset);
                        return (x + right_operand) == val;
                    };
549 550 551 552 553 554
                    auto elem_func =
                        [val, right_operand, &nested_path](MayConstRef<T> x) {
                            // visit the nested field
                            // now it must be Json
                            return ((x + right_operand) == val);
                        };
Y
yah01 已提交
555
                    return ExecDataRangeVisitorImpl<T>(
556
                        expr.column_.field_id, index_func, elem_func);
557 558
                }
                case ArithOpType::Sub: {
Y
yah01 已提交
559 560
                    auto index_func = [val, right_operand](Index* index,
                                                           size_t offset) {
561 562 563
                        auto x = index->Reverse_Lookup(offset);
                        return (x - right_operand) == val;
                    };
564
                    auto elem_func = [val, right_operand](MayConstRef<T> x) {
Y
yah01 已提交
565 566 567
                        return ((x - right_operand) == val);
                    };
                    return ExecDataRangeVisitorImpl<T>(
568
                        expr.column_.field_id, index_func, elem_func);
569 570
                }
                case ArithOpType::Mul: {
Y
yah01 已提交
571 572
                    auto index_func = [val, right_operand](Index* index,
                                                           size_t offset) {
573 574 575
                        auto x = index->Reverse_Lookup(offset);
                        return (x * right_operand) == val;
                    };
576
                    auto elem_func = [val, right_operand](MayConstRef<T> x) {
Y
yah01 已提交
577 578 579
                        return ((x * right_operand) == val);
                    };
                    return ExecDataRangeVisitorImpl<T>(
580
                        expr.column_.field_id, index_func, elem_func);
581 582
                }
                case ArithOpType::Div: {
Y
yah01 已提交
583 584
                    auto index_func = [val, right_operand](Index* index,
                                                           size_t offset) {
585 586 587
                        auto x = index->Reverse_Lookup(offset);
                        return (x / right_operand) == val;
                    };
588
                    auto elem_func = [val, right_operand](MayConstRef<T> x) {
Y
yah01 已提交
589 590 591
                        return ((x / right_operand) == val);
                    };
                    return ExecDataRangeVisitorImpl<T>(
592
                        expr.column_.field_id, index_func, elem_func);
593 594
                }
                case ArithOpType::Mod: {
Y
yah01 已提交
595 596
                    auto index_func = [val, right_operand](Index* index,
                                                           size_t offset) {
597 598 599
                        auto x = index->Reverse_Lookup(offset);
                        return static_cast<T>(fmod(x, right_operand)) == val;
                    };
600
                    auto elem_func = [val, right_operand](MayConstRef<T> x) {
601 602
                        return (static_cast<T>(fmod(x, right_operand)) == val);
                    };
Y
yah01 已提交
603
                    return ExecDataRangeVisitorImpl<T>(
604
                        expr.column_.field_id, index_func, elem_func);
605 606 607 608 609 610 611 612 613
                }
                default: {
                    PanicInfo("unsupported arithmetic operation");
                }
            }
        }
        case OpType::NotEqual: {
            switch (arith_op) {
                case ArithOpType::Add: {
Y
yah01 已提交
614 615
                    auto index_func = [val, right_operand](Index* index,
                                                           size_t offset) {
616 617 618
                        auto x = index->Reverse_Lookup(offset);
                        return (x + right_operand) != val;
                    };
619
                    auto elem_func = [val, right_operand](MayConstRef<T> x) {
Y
yah01 已提交
620 621 622
                        return ((x + right_operand) != val);
                    };
                    return ExecDataRangeVisitorImpl<T>(
623
                        expr.column_.field_id, index_func, elem_func);
624 625
                }
                case ArithOpType::Sub: {
Y
yah01 已提交
626 627
                    auto index_func = [val, right_operand](Index* index,
                                                           size_t offset) {
628 629 630
                        auto x = index->Reverse_Lookup(offset);
                        return (x - right_operand) != val;
                    };
631
                    auto elem_func = [val, right_operand](MayConstRef<T> x) {
Y
yah01 已提交
632 633 634
                        return ((x - right_operand) != val);
                    };
                    return ExecDataRangeVisitorImpl<T>(
635
                        expr.column_.field_id, index_func, elem_func);
636 637
                }
                case ArithOpType::Mul: {
Y
yah01 已提交
638 639
                    auto index_func = [val, right_operand](Index* index,
                                                           size_t offset) {
640 641 642
                        auto x = index->Reverse_Lookup(offset);
                        return (x * right_operand) != val;
                    };
643
                    auto elem_func = [val, right_operand](MayConstRef<T> x) {
Y
yah01 已提交
644 645 646
                        return ((x * right_operand) != val);
                    };
                    return ExecDataRangeVisitorImpl<T>(
647
                        expr.column_.field_id, index_func, elem_func);
648 649
                }
                case ArithOpType::Div: {
Y
yah01 已提交
650 651
                    auto index_func = [val, right_operand](Index* index,
                                                           size_t offset) {
652 653 654
                        auto x = index->Reverse_Lookup(offset);
                        return (x / right_operand) != val;
                    };
655
                    auto elem_func = [val, right_operand](MayConstRef<T> x) {
Y
yah01 已提交
656 657 658
                        return ((x / right_operand) != val);
                    };
                    return ExecDataRangeVisitorImpl<T>(
659
                        expr.column_.field_id, index_func, elem_func);
660 661
                }
                case ArithOpType::Mod: {
Y
yah01 已提交
662 663
                    auto index_func = [val, right_operand](Index* index,
                                                           size_t offset) {
664 665 666
                        auto x = index->Reverse_Lookup(offset);
                        return static_cast<T>(fmod(x, right_operand)) != val;
                    };
667
                    auto elem_func = [val, right_operand](MayConstRef<T> x) {
668 669
                        return (static_cast<T>(fmod(x, right_operand)) != val);
                    };
Y
yah01 已提交
670
                    return ExecDataRangeVisitorImpl<T>(
671
                        expr.column_.field_id, index_func, elem_func);
672 673 674 675 676 677 678 679 680 681 682 683 684
                }
                default: {
                    PanicInfo("unsupported arithmetic operation");
                }
            }
        }
        default: {
            PanicInfo("unsupported range node with arithmetic operation");
        }
    }
}
#pragma clang diagnostic pop

685 686 687 688 689 690 691 692 693 694 695 696 697 698 699 700
template <typename ExprValueType>
auto
ExecExprVisitor::ExecBinaryArithOpEvalRangeVisitorDispatcherJson(
    BinaryArithOpEvalRangeExpr& expr_raw) -> BitsetType {
    auto& expr =
        static_cast<BinaryArithOpEvalRangeExprImpl<ExprValueType>&>(expr_raw);
    using Index = index::ScalarIndex<milvus::Json>;
    using GetType =
        std::conditional_t<std::is_same_v<ExprValueType, std::string>,
                           std::string_view,
                           ExprValueType>;

    auto arith_op = expr.arith_op_;
    auto right_operand = expr.right_operand_;
    auto op = expr.op_type_;
    auto val = expr.value_;
701
    auto pointer = milvus::Json::pointer(expr.column_.nested_path);
702 703 704 705 706 707 708 709 710 711 712 713

#define BinaryArithRangeJSONCompare(cmp)                      \
    do {                                                      \
        auto x = json.template at<GetType>(pointer);          \
        if (x.error()) {                                      \
            if constexpr (std::is_same_v<GetType, int64_t>) { \
                auto x = json.template at<double>(pointer);   \
                return !x.error() && (cmp);                   \
            }                                                 \
            return false;                                     \
        }                                                     \
        return (cmp);                                         \
714 715
    } while (false)

716 717 718 719 720 721 722 723 724 725 726
#define BinaryArithRangeJSONCompareNotEqual(cmp)              \
    do {                                                      \
        auto x = json.template at<GetType>(pointer);          \
        if (x.error()) {                                      \
            if constexpr (std::is_same_v<GetType, int64_t>) { \
                auto x = json.template at<double>(pointer);   \
                return x.error() || (cmp);                    \
            }                                                 \
            return true;                                      \
        }                                                     \
        return (cmp);                                         \
727 728
    } while (false)

729 730 731 732 733 734 735 736 737
    switch (op) {
        case OpType::Equal: {
            switch (arith_op) {
                case ArithOpType::Add: {
                    auto index_func = [val, right_operand](Index* index,
                                                           size_t offset) {
                        return false;
                    };
                    auto elem_func = [&](const milvus::Json& json) {
738 739
                        BinaryArithRangeJSONCompare(x.value() + right_operand ==
                                                    val);
740 741 742 743 744 745 746 747 748 749
                    };
                    return ExecDataRangeVisitorImpl<milvus::Json>(
                        expr.column_.field_id, index_func, elem_func);
                }
                case ArithOpType::Sub: {
                    auto index_func = [val, right_operand](Index* index,
                                                           size_t offset) {
                        return false;
                    };
                    auto elem_func = [&](const milvus::Json& json) {
750 751
                        BinaryArithRangeJSONCompare(x.value() - right_operand ==
                                                    val);
752 753 754 755 756 757 758 759 760 761
                    };
                    return ExecDataRangeVisitorImpl<milvus::Json>(
                        expr.column_.field_id, index_func, elem_func);
                }
                case ArithOpType::Mul: {
                    auto index_func = [val, right_operand](Index* index,
                                                           size_t offset) {
                        return false;
                    };
                    auto elem_func = [&](const milvus::Json& json) {
762 763
                        BinaryArithRangeJSONCompare(x.value() * right_operand ==
                                                    val);
764 765 766 767 768 769 770 771 772 773
                    };
                    return ExecDataRangeVisitorImpl<milvus::Json>(
                        expr.column_.field_id, index_func, elem_func);
                }
                case ArithOpType::Div: {
                    auto index_func = [val, right_operand](Index* index,
                                                           size_t offset) {
                        return false;
                    };
                    auto elem_func = [&](const milvus::Json& json) {
774 775
                        BinaryArithRangeJSONCompare(x.value() / right_operand ==
                                                    val);
776 777 778 779 780 781 782 783 784 785
                    };
                    return ExecDataRangeVisitorImpl<milvus::Json>(
                        expr.column_.field_id, index_func, elem_func);
                }
                case ArithOpType::Mod: {
                    auto index_func = [val, right_operand](Index* index,
                                                           size_t offset) {
                        return false;
                    };
                    auto elem_func = [&](const milvus::Json& json) {
786 787 788
                        BinaryArithRangeJSONCompare(
                            static_cast<ExprValueType>(
                                fmod(x.value(), right_operand)) == val);
789 790 791 792 793 794 795 796 797 798 799 800 801 802 803 804 805
                    };
                    return ExecDataRangeVisitorImpl<milvus::Json>(
                        expr.column_.field_id, index_func, elem_func);
                }
                default: {
                    PanicInfo("unsupported arithmetic operation");
                }
            }
        }
        case OpType::NotEqual: {
            switch (arith_op) {
                case ArithOpType::Add: {
                    auto index_func = [val, right_operand](Index* index,
                                                           size_t offset) {
                        return false;
                    };
                    auto elem_func = [&](const milvus::Json& json) {
806 807
                        BinaryArithRangeJSONCompareNotEqual(
                            x.value() + right_operand != val);
808 809 810 811 812 813 814 815 816 817
                    };
                    return ExecDataRangeVisitorImpl<milvus::Json>(
                        expr.column_.field_id, index_func, elem_func);
                }
                case ArithOpType::Sub: {
                    auto index_func = [val, right_operand](Index* index,
                                                           size_t offset) {
                        return false;
                    };
                    auto elem_func = [&](const milvus::Json& json) {
818 819
                        BinaryArithRangeJSONCompareNotEqual(
                            x.value() - right_operand != val);
820 821 822 823 824 825 826 827 828 829
                    };
                    return ExecDataRangeVisitorImpl<milvus::Json>(
                        expr.column_.field_id, index_func, elem_func);
                }
                case ArithOpType::Mul: {
                    auto index_func = [val, right_operand](Index* index,
                                                           size_t offset) {
                        return false;
                    };
                    auto elem_func = [&](const milvus::Json& json) {
830 831
                        BinaryArithRangeJSONCompareNotEqual(
                            x.value() * right_operand != val);
832 833 834 835 836 837 838 839 840 841
                    };
                    return ExecDataRangeVisitorImpl<milvus::Json>(
                        expr.column_.field_id, index_func, elem_func);
                }
                case ArithOpType::Div: {
                    auto index_func = [val, right_operand](Index* index,
                                                           size_t offset) {
                        return false;
                    };
                    auto elem_func = [&](const milvus::Json& json) {
842 843
                        BinaryArithRangeJSONCompareNotEqual(
                            x.value() / right_operand != val);
844 845 846 847 848 849 850 851 852 853
                    };
                    return ExecDataRangeVisitorImpl<milvus::Json>(
                        expr.column_.field_id, index_func, elem_func);
                }
                case ArithOpType::Mod: {
                    auto index_func = [val, right_operand](Index* index,
                                                           size_t offset) {
                        return false;
                    };
                    auto elem_func = [&](const milvus::Json& json) {
854 855 856
                        BinaryArithRangeJSONCompareNotEqual(
                            static_cast<ExprValueType>(
                                fmod(x.value(), right_operand)) != val);
857 858 859 860 861 862 863 864 865 866 867 868 869
                    };
                    return ExecDataRangeVisitorImpl<milvus::Json>(
                        expr.column_.field_id, index_func, elem_func);
                }
                default: {
                    PanicInfo("unsupported arithmetic operation");
                }
            }
        }
        default: {
            PanicInfo("unsupported range node with arithmetic operation");
        }
    }
870
}  // namespace milvus::query
871

872 873 874 875
#pragma clang diagnostic push
#pragma ide diagnostic ignored "Simplify"
template <typename T>
auto
Y
yah01 已提交
876 877 878 879 880
ExecExprVisitor::ExecBinaryRangeVisitorDispatcher(BinaryRangeExpr& expr_raw)
    -> BitsetType {
    typedef std::
        conditional_t<std::is_same_v<T, std::string_view>, std::string, T>
            IndexInnerType;
Y
yah01 已提交
881 882 883
    using Index = index::ScalarIndex<IndexInnerType>;
    auto& expr = static_cast<BinaryRangeExprImpl<IndexInnerType>&>(expr_raw);

884 885
    bool lower_inclusive = expr.lower_inclusive_;
    bool upper_inclusive = expr.upper_inclusive_;
886 887
    IndexInnerType val1 = expr.lower_value_;
    IndexInnerType val2 = expr.upper_value_;
888

889
    auto index_func = [&](Index* index) {
Y
yah01 已提交
890 891
        return index->Range(val1, lower_inclusive, val2, upper_inclusive);
    };
892
    if (lower_inclusive && upper_inclusive) {
893 894 895
        auto elem_func = [val1, val2](MayConstRef<T> x) {
            return (val1 <= x && x <= val2);
        };
896 897
        return ExecRangeVisitorImpl<T>(
            expr.column_.field_id, index_func, elem_func);
898
    } else if (lower_inclusive && !upper_inclusive) {
899 900 901
        auto elem_func = [val1, val2](MayConstRef<T> x) {
            return (val1 <= x && x < val2);
        };
902 903
        return ExecRangeVisitorImpl<T>(
            expr.column_.field_id, index_func, elem_func);
904
    } else if (!lower_inclusive && upper_inclusive) {
905 906 907
        auto elem_func = [val1, val2](MayConstRef<T> x) {
            return (val1 < x && x <= val2);
        };
908 909
        return ExecRangeVisitorImpl<T>(
            expr.column_.field_id, index_func, elem_func);
G
GuoRentong 已提交
910
    } else {
911 912 913
        auto elem_func = [val1, val2](MayConstRef<T> x) {
            return (val1 < x && x < val2);
        };
914 915
        return ExecRangeVisitorImpl<T>(
            expr.column_.field_id, index_func, elem_func);
G
GuoRentong 已提交
916 917 918 919
    }
}
#pragma clang diagnostic pop

920 921 922 923 924 925 926 927 928 929 930 931 932 933 934
template <typename ExprValueType>
auto
ExecExprVisitor::ExecBinaryRangeVisitorDispatcherJson(BinaryRangeExpr& expr_raw)
    -> BitsetType {
    using Index = index::ScalarIndex<milvus::Json>;
    using GetType =
        std::conditional_t<std::is_same_v<ExprValueType, std::string>,
                           std::string_view,
                           ExprValueType>;

    auto& expr = static_cast<BinaryRangeExprImpl<ExprValueType>&>(expr_raw);
    bool lower_inclusive = expr.lower_inclusive_;
    bool upper_inclusive = expr.upper_inclusive_;
    ExprValueType val1 = expr.lower_value_;
    ExprValueType val2 = expr.upper_value_;
935
    auto pointer = milvus::Json::pointer(expr.column_.nested_path);
936 937

    // no json index now
938
    auto index_func = [=](Index* index) { return TargetBitmap{}; };
939

940 941 942 943 944 945 946 947 948 949 950 951 952 953 954
#define BinaryRangeJSONCompare(cmp)                           \
    do {                                                      \
        auto x = json.template at<GetType>(pointer);          \
        if (x.error()) {                                      \
            if constexpr (std::is_same_v<GetType, int64_t>) { \
                auto x = json.template at<double>(pointer);   \
                if (!x.error()) {                             \
                    auto value = x.value();                   \
                    return (cmp);                             \
                }                                             \
            }                                                 \
            return false;                                     \
        }                                                     \
        auto value = x.value();                               \
        return (cmp);                                         \
955 956
    } while (false)

957 958
    if (lower_inclusive && upper_inclusive) {
        auto elem_func = [&](const milvus::Json& json) {
959
            BinaryRangeJSONCompare(val1 <= value && value <= val2);
960 961 962 963 964
        };
        return ExecRangeVisitorImpl<milvus::Json>(
            expr.column_.field_id, index_func, elem_func);
    } else if (lower_inclusive && !upper_inclusive) {
        auto elem_func = [&](const milvus::Json& json) {
965
            BinaryRangeJSONCompare(val1 <= value && value < val2);
966 967 968 969 970
        };
        return ExecRangeVisitorImpl<milvus::Json>(
            expr.column_.field_id, index_func, elem_func);
    } else if (!lower_inclusive && upper_inclusive) {
        auto elem_func = [&](const milvus::Json& json) {
971
            BinaryRangeJSONCompare(val1 < value && value <= val2);
972 973 974 975 976
        };
        return ExecRangeVisitorImpl<milvus::Json>(
            expr.column_.field_id, index_func, elem_func);
    } else {
        auto elem_func = [&](const milvus::Json& json) {
977
            BinaryRangeJSONCompare(val1 < value && value < val2);
978 979 980 981 982 983
        };
        return ExecRangeVisitorImpl<milvus::Json>(
            expr.column_.field_id, index_func, elem_func);
    }
}

N
neza2017 已提交
984
void
985
ExecExprVisitor::visit(UnaryRangeExpr& expr) {
986 987
    auto& field_meta = segment_.get_schema()[expr.column_.field_id];
    AssertInfo(expr.column_.data_type == field_meta.get_data_type(),
988
               "[ExecExprVisitor]DataType of expr isn't field_meta data type");
989
    BitsetType res;
990
    switch (expr.column_.data_type) {
N
neza2017 已提交
991
        case DataType::BOOL: {
992
            res = ExecUnaryRangeVisitorDispatcher<bool>(expr);
N
neza2017 已提交
993 994
            break;
        }
G
GuoRentong 已提交
995
        case DataType::INT8: {
996
            res = ExecUnaryRangeVisitorDispatcher<int8_t>(expr);
G
GuoRentong 已提交
997 998 999
            break;
        }
        case DataType::INT16: {
1000
            res = ExecUnaryRangeVisitorDispatcher<int16_t>(expr);
G
GuoRentong 已提交
1001 1002 1003
            break;
        }
        case DataType::INT32: {
1004
            res = ExecUnaryRangeVisitorDispatcher<int32_t>(expr);
G
GuoRentong 已提交
1005 1006 1007
            break;
        }
        case DataType::INT64: {
1008
            res = ExecUnaryRangeVisitorDispatcher<int64_t>(expr);
G
GuoRentong 已提交
1009 1010 1011
            break;
        }
        case DataType::FLOAT: {
1012
            res = ExecUnaryRangeVisitorDispatcher<float>(expr);
G
GuoRentong 已提交
1013 1014 1015
            break;
        }
        case DataType::DOUBLE: {
1016 1017 1018
            res = ExecUnaryRangeVisitorDispatcher<double>(expr);
            break;
        }
1019
        case DataType::VARCHAR: {
Y
yah01 已提交
1020 1021 1022 1023 1024
            if (segment_.type() == SegmentType::Growing) {
                res = ExecUnaryRangeVisitorDispatcher<std::string>(expr);
            } else {
                res = ExecUnaryRangeVisitorDispatcher<std::string_view>(expr);
            }
1025 1026
            break;
        }
1027 1028 1029 1030 1031 1032 1033 1034 1035 1036 1037 1038 1039 1040 1041 1042 1043 1044 1045 1046 1047
        case DataType::JSON: {
            switch (expr.val_case_) {
                case proto::plan::GenericValue::ValCase::kBoolVal:
                    res = ExecUnaryRangeVisitorDispatcherJson<bool>(expr);
                    break;
                case proto::plan::GenericValue::ValCase::kInt64Val:
                    res = ExecUnaryRangeVisitorDispatcherJson<int64_t>(expr);
                    break;
                case proto::plan::GenericValue::ValCase::kFloatVal:
                    res = ExecUnaryRangeVisitorDispatcherJson<double>(expr);
                    break;
                case proto::plan::GenericValue::ValCase::kStringVal:
                    res =
                        ExecUnaryRangeVisitorDispatcherJson<std::string>(expr);
                    break;
                default:
                    PanicInfo(
                        fmt::format("unknown data type: {}", expr.val_case_));
            }
            break;
        }
1048
        default:
1049 1050
            PanicInfo(fmt::format("unsupported data type: {}",
                                  expr.column_.data_type));
1051
    }
Y
yah01 已提交
1052 1053
    AssertInfo(res.size() == row_count_,
               "[ExecExprVisitor]Size of results not equal row count");
1054
    bitset_opt_ = std::move(res);
1055 1056
}

1057 1058
void
ExecExprVisitor::visit(BinaryArithOpEvalRangeExpr& expr) {
1059 1060
    auto& field_meta = segment_.get_schema()[expr.column_.field_id];
    AssertInfo(expr.column_.data_type == field_meta.get_data_type(),
1061 1062
               "[ExecExprVisitor]DataType of expr isn't field_meta data type");
    BitsetType res;
1063
    switch (expr.column_.data_type) {
1064 1065 1066 1067 1068 1069 1070 1071 1072 1073 1074 1075 1076 1077 1078 1079 1080 1081 1082 1083 1084 1085 1086 1087
        case DataType::INT8: {
            res = ExecBinaryArithOpEvalRangeVisitorDispatcher<int8_t>(expr);
            break;
        }
        case DataType::INT16: {
            res = ExecBinaryArithOpEvalRangeVisitorDispatcher<int16_t>(expr);
            break;
        }
        case DataType::INT32: {
            res = ExecBinaryArithOpEvalRangeVisitorDispatcher<int32_t>(expr);
            break;
        }
        case DataType::INT64: {
            res = ExecBinaryArithOpEvalRangeVisitorDispatcher<int64_t>(expr);
            break;
        }
        case DataType::FLOAT: {
            res = ExecBinaryArithOpEvalRangeVisitorDispatcher<float>(expr);
            break;
        }
        case DataType::DOUBLE: {
            res = ExecBinaryArithOpEvalRangeVisitorDispatcher<double>(expr);
            break;
        }
1088 1089 1090 1091 1092 1093 1094 1095 1096 1097 1098 1099 1100 1101 1102 1103 1104 1105 1106
        case DataType::JSON: {
            switch (expr.val_case_) {
                case proto::plan::GenericValue::ValCase::kBoolVal: {
                    res = ExecBinaryArithOpEvalRangeVisitorDispatcherJson<bool>(
                        expr);
                    break;
                }
                case proto::plan::GenericValue::ValCase::kInt64Val: {
                    res = ExecBinaryArithOpEvalRangeVisitorDispatcherJson<
                        int64_t>(expr);
                    break;
                }
                case proto::plan::GenericValue::ValCase::kFloatVal: {
                    res =
                        ExecBinaryArithOpEvalRangeVisitorDispatcherJson<double>(
                            expr);
                    break;
                }
                default: {
1107 1108 1109
                    PanicInfo(
                        fmt::format("unsupported value type {} in expression",
                                    expr.val_case_));
1110 1111 1112 1113
                }
            }
            break;
        }
1114
        default:
1115 1116
            PanicInfo(fmt::format("unsupported data type: {}",
                                  expr.column_.data_type));
1117
    }
Y
yah01 已提交
1118 1119
    AssertInfo(res.size() == row_count_,
               "[ExecExprVisitor]Size of results not equal row count");
1120 1121 1122
    bitset_opt_ = std::move(res);
}

1123 1124
void
ExecExprVisitor::visit(BinaryRangeExpr& expr) {
1125 1126
    auto& field_meta = segment_.get_schema()[expr.column_.field_id];
    AssertInfo(expr.column_.data_type == field_meta.get_data_type(),
1127
               "[ExecExprVisitor]DataType of expr isn't field_meta data type");
1128
    BitsetType res;
1129
    switch (expr.column_.data_type) {
1130 1131 1132 1133 1134 1135 1136 1137 1138 1139 1140 1141 1142 1143 1144 1145 1146 1147 1148 1149 1150 1151 1152 1153 1154 1155
        case DataType::BOOL: {
            res = ExecBinaryRangeVisitorDispatcher<bool>(expr);
            break;
        }
        case DataType::INT8: {
            res = ExecBinaryRangeVisitorDispatcher<int8_t>(expr);
            break;
        }
        case DataType::INT16: {
            res = ExecBinaryRangeVisitorDispatcher<int16_t>(expr);
            break;
        }
        case DataType::INT32: {
            res = ExecBinaryRangeVisitorDispatcher<int32_t>(expr);
            break;
        }
        case DataType::INT64: {
            res = ExecBinaryRangeVisitorDispatcher<int64_t>(expr);
            break;
        }
        case DataType::FLOAT: {
            res = ExecBinaryRangeVisitorDispatcher<float>(expr);
            break;
        }
        case DataType::DOUBLE: {
            res = ExecBinaryRangeVisitorDispatcher<double>(expr);
G
GuoRentong 已提交
1156 1157
            break;
        }
1158
        case DataType::VARCHAR: {
Y
yah01 已提交
1159 1160 1161 1162 1163
            if (segment_.type() == SegmentType::Growing) {
                res = ExecBinaryRangeVisitorDispatcher<std::string>(expr);
            } else {
                res = ExecBinaryRangeVisitorDispatcher<std::string_view>(expr);
            }
1164 1165
            break;
        }
1166 1167 1168 1169 1170 1171 1172 1173 1174 1175 1176 1177 1178 1179 1180 1181 1182 1183 1184 1185
        case DataType::JSON: {
            switch (expr.val_case_) {
                case proto::plan::GenericValue::ValCase::kBoolVal: {
                    res = ExecBinaryRangeVisitorDispatcherJson<bool>(expr);
                    break;
                }
                case proto::plan::GenericValue::ValCase::kInt64Val: {
                    res = ExecBinaryRangeVisitorDispatcherJson<int64_t>(expr);
                    break;
                }
                case proto::plan::GenericValue::ValCase::kFloatVal: {
                    res = ExecBinaryRangeVisitorDispatcherJson<double>(expr);
                    break;
                }
                case proto::plan::GenericValue::ValCase::kStringVal: {
                    res =
                        ExecBinaryRangeVisitorDispatcherJson<std::string>(expr);
                    break;
                }
                default: {
1186 1187 1188
                    PanicInfo(
                        fmt::format("unsupported value type {} in expression",
                                    expr.val_case_));
1189 1190 1191 1192
                }
            }
            break;
        }
G
GuoRentong 已提交
1193
        default:
1194 1195
            PanicInfo(fmt::format("unsupported data type: {}",
                                  expr.column_.data_type));
G
GuoRentong 已提交
1196
    }
Y
yah01 已提交
1197 1198
    AssertInfo(res.size() == row_count_,
               "[ExecExprVisitor]Size of results not equal row count");
1199
    bitset_opt_ = std::move(res);
N
neza2017 已提交
1200 1201
}

1202 1203 1204 1205 1206 1207 1208 1209 1210 1211 1212 1213 1214 1215
template <typename Op>
struct relational {
    template <typename T, typename U>
    bool
    operator()(T const& a, U const& b) const {
        return Op{}(a, b);
    }
    template <typename... T>
    bool
    operator()(T const&...) const {
        PanicInfo("incompatible operands");
    }
};

1216 1217 1218 1219 1220 1221 1222 1223 1224 1225 1226 1227 1228 1229 1230 1231 1232 1233 1234 1235 1236 1237 1238 1239 1240 1241 1242 1243 1244 1245 1246
template <typename T, typename U, typename CmpFunc>
TargetBitmap
ExecExprVisitor::ExecCompareRightType(const T* left_raw_data,
                                      const FieldId& right_field_id,
                                      const int64_t current_chunk_id,
                                      CmpFunc cmp_func) {
    auto size_per_chunk = segment_.size_per_chunk();
    auto num_chunks = upper_div(row_count_, size_per_chunk);
    auto size = current_chunk_id == num_chunks - 1
                    ? row_count_ - current_chunk_id * size_per_chunk
                    : size_per_chunk;

    TargetBitmap result(size);
    const U* right_raw_data =
        segment_.chunk_data<U>(right_field_id, current_chunk_id).data();

    for (int i = 0; i < size; ++i) {
        result[i] = cmp_func(left_raw_data[i], right_raw_data[i]);
    }

    return result;
}

template <typename T, typename CmpFunc>
BitsetType
ExecExprVisitor::ExecCompareLeftType(const FieldId& left_field_id,
                                     const FieldId& right_field_id,
                                     const DataType& right_field_type,
                                     CmpFunc cmp_func) {
    auto size_per_chunk = segment_.size_per_chunk();
    auto num_chunks = upper_div(row_count_, size_per_chunk);
1247 1248
    std::vector<FixedVector<bool>> results;
    results.reserve(num_chunks);
1249 1250 1251 1252 1253 1254 1255 1256 1257 1258 1259 1260 1261 1262 1263 1264 1265 1266 1267 1268 1269 1270 1271 1272 1273 1274 1275 1276 1277 1278 1279 1280 1281 1282 1283 1284 1285 1286 1287 1288 1289 1290 1291 1292 1293 1294 1295 1296 1297 1298 1299 1300 1301 1302 1303 1304 1305 1306 1307 1308 1309 1310 1311 1312 1313 1314 1315 1316 1317 1318 1319 1320 1321 1322 1323 1324 1325 1326 1327 1328 1329 1330 1331 1332 1333 1334 1335 1336 1337 1338 1339

    for (int64_t chunk_id = 0; chunk_id < num_chunks; ++chunk_id) {
        FixedVector<bool> result;
        const T* left_raw_data =
            segment_.chunk_data<T>(left_field_id, chunk_id).data();

        switch (right_field_type) {
            case DataType::BOOL:
                result = ExecCompareRightType<T, bool, CmpFunc>(
                    left_raw_data, right_field_id, chunk_id, cmp_func);
                break;
            case DataType::INT8:
                result = ExecCompareRightType<T, int8_t, CmpFunc>(
                    left_raw_data, right_field_id, chunk_id, cmp_func);
                break;
            case DataType::INT16:
                result = ExecCompareRightType<T, int16_t, CmpFunc>(
                    left_raw_data, right_field_id, chunk_id, cmp_func);
                break;
            case DataType::INT32:
                result = ExecCompareRightType<T, int32_t, CmpFunc>(
                    left_raw_data, right_field_id, chunk_id, cmp_func);
                break;
            case DataType::INT64:
                result = ExecCompareRightType<T, int64_t, CmpFunc>(
                    left_raw_data, right_field_id, chunk_id, cmp_func);
                break;
            case DataType::FLOAT:
                result = ExecCompareRightType<T, float, CmpFunc>(
                    left_raw_data, right_field_id, chunk_id, cmp_func);
                break;
            case DataType::DOUBLE:
                result = ExecCompareRightType<T, double, CmpFunc>(
                    left_raw_data, right_field_id, chunk_id, cmp_func);
                break;
            default:
                PanicInfo("unsupported left datatype of compare expr");
        }
        results.push_back(result);
    }
    auto final_result = AssembleChunk(results);
    AssertInfo(final_result.size() == row_count_,
               "[ExecExprVisitor]Size of results not equal row count");
    return final_result;
}

template <typename CmpFunc>
BitsetType
ExecExprVisitor::ExecCompareExprDispatcherForNonIndexedSegment(
    CompareExpr& expr, CmpFunc cmp_func) {
    switch (expr.left_data_type_) {
        case DataType::BOOL:
            return ExecCompareLeftType<bool, CmpFunc>(expr.left_field_id_,
                                                      expr.right_field_id_,
                                                      expr.right_data_type_,
                                                      cmp_func);
        case DataType::INT8:
            return ExecCompareLeftType<int8_t, CmpFunc>(expr.left_field_id_,
                                                        expr.right_field_id_,
                                                        expr.right_data_type_,
                                                        cmp_func);
        case DataType::INT16:
            return ExecCompareLeftType<int16_t, CmpFunc>(expr.left_field_id_,
                                                         expr.right_field_id_,
                                                         expr.right_data_type_,
                                                         cmp_func);
        case DataType::INT32:
            return ExecCompareLeftType<int32_t, CmpFunc>(expr.left_field_id_,
                                                         expr.right_field_id_,
                                                         expr.right_data_type_,
                                                         cmp_func);
        case DataType::INT64:
            return ExecCompareLeftType<int64_t, CmpFunc>(expr.left_field_id_,
                                                         expr.right_field_id_,
                                                         expr.right_data_type_,
                                                         cmp_func);
        case DataType::FLOAT:
            return ExecCompareLeftType<float, CmpFunc>(expr.left_field_id_,
                                                       expr.right_field_id_,
                                                       expr.right_data_type_,
                                                       cmp_func);
        case DataType::DOUBLE:
            return ExecCompareLeftType<double, CmpFunc>(expr.left_field_id_,
                                                        expr.right_field_id_,
                                                        expr.right_data_type_,
                                                        cmp_func);
        default:
            PanicInfo("unsupported right datatype of compare expr");
    }
}

1340 1341
template <typename Op>
auto
Y
yah01 已提交
1342 1343 1344 1345 1346 1347 1348 1349 1350 1351
ExecExprVisitor::ExecCompareExprDispatcher(CompareExpr& expr, Op op)
    -> BitsetType {
    using number = boost::variant<bool,
                                  int8_t,
                                  int16_t,
                                  int32_t,
                                  int64_t,
                                  float,
                                  double,
                                  std::string>;
1352 1353 1354 1355 1356
    auto is_string_expr = [&expr]() -> bool {
        return expr.left_data_type_ == DataType::VARCHAR ||
               expr.right_data_type_ == DataType::VARCHAR;
    };

1357 1358
    auto size_per_chunk = segment_.size_per_chunk();
    auto num_chunk = upper_div(row_count_, size_per_chunk);
1359
    std::deque<BitsetType> bitsets;
1360 1361 1362 1363

    // check for sealed segment, load either raw field data or index
    auto left_indexing_barrier = segment_.num_chunk_index(expr.left_field_id_);
    auto left_data_barrier = segment_.num_chunk_data(expr.left_field_id_);
1364 1365 1366
    AssertInfo(std::max(left_data_barrier, left_indexing_barrier) == num_chunk,
               "max(left_data_barrier, left_indexing_barrier) not equal to "
               "num_chunk");
1367

Y
yah01 已提交
1368 1369
    auto right_indexing_barrier =
        segment_.num_chunk_index(expr.right_field_id_);
1370
    auto right_data_barrier = segment_.num_chunk_data(expr.right_field_id_);
Y
yah01 已提交
1371 1372 1373 1374
    AssertInfo(
        std::max(right_data_barrier, right_indexing_barrier) == num_chunk,
        "max(right_data_barrier, right_indexing_barrier) not equal to "
        "num_chunk");
1375

1376 1377 1378 1379 1380 1381 1382 1383
    // For segment both fields has no index, can use SIMD to speed up.
    // Avoiding too much call stack that blocks SIMD.
    if (left_indexing_barrier == 0 && right_indexing_barrier == 0 &&
        !is_string_expr()) {
        return ExecCompareExprDispatcherForNonIndexedSegment<Op>(expr, op);
    }

    // TODO: refactoring the code that contains too much call stack.
1384
    for (int64_t chunk_id = 0; chunk_id < num_chunk; ++chunk_id) {
Y
yah01 已提交
1385 1386 1387 1388 1389 1390
        auto size = chunk_id == num_chunk - 1
                        ? row_count_ - chunk_id * size_per_chunk
                        : size_per_chunk;
        auto getChunkData =
            [&, chunk_id](DataType type, FieldId field_id, int64_t data_barrier)
            -> std::function<const number(int)> {
1391 1392
            switch (type) {
                case DataType::BOOL: {
1393
                    if (chunk_id < data_barrier) {
Y
yah01 已提交
1394 1395 1396 1397 1398 1399
                        auto chunk_data =
                            segment_.chunk_data<bool>(field_id, chunk_id)
                                .data();
                        return [chunk_data](int i) -> const number {
                            return chunk_data[i];
                        };
1400 1401
                    } else {
                        // for case, sealed segment has loaded index for scalar field instead of raw data
Y
yah01 已提交
1402 1403 1404 1405 1406
                        auto& indexing = segment_.chunk_scalar_index<bool>(
                            field_id, chunk_id);
                        return [&indexing](int i) -> const number {
                            return indexing.Reverse_Lookup(i);
                        };
1407
                    }
1408 1409
                }
                case DataType::INT8: {
1410
                    if (chunk_id < data_barrier) {
Y
yah01 已提交
1411 1412 1413 1414 1415 1416
                        auto chunk_data =
                            segment_.chunk_data<int8_t>(field_id, chunk_id)
                                .data();
                        return [chunk_data](int i) -> const number {
                            return chunk_data[i];
                        };
1417 1418
                    } else {
                        // for case, sealed segment has loaded index for scalar field instead of raw data
Y
yah01 已提交
1419 1420 1421 1422 1423
                        auto& indexing = segment_.chunk_scalar_index<int8_t>(
                            field_id, chunk_id);
                        return [&indexing](int i) -> const number {
                            return indexing.Reverse_Lookup(i);
                        };
1424
                    }
1425 1426
                }
                case DataType::INT16: {
1427
                    if (chunk_id < data_barrier) {
Y
yah01 已提交
1428 1429 1430 1431 1432 1433
                        auto chunk_data =
                            segment_.chunk_data<int16_t>(field_id, chunk_id)
                                .data();
                        return [chunk_data](int i) -> const number {
                            return chunk_data[i];
                        };
1434 1435
                    } else {
                        // for case, sealed segment has loaded index for scalar field instead of raw data
Y
yah01 已提交
1436 1437 1438 1439 1440
                        auto& indexing = segment_.chunk_scalar_index<int16_t>(
                            field_id, chunk_id);
                        return [&indexing](int i) -> const number {
                            return indexing.Reverse_Lookup(i);
                        };
1441
                    }
1442 1443
                }
                case DataType::INT32: {
1444
                    if (chunk_id < data_barrier) {
Y
yah01 已提交
1445 1446 1447 1448 1449 1450
                        auto chunk_data =
                            segment_.chunk_data<int32_t>(field_id, chunk_id)
                                .data();
                        return [chunk_data](int i) -> const number {
                            return chunk_data[i];
                        };
1451 1452
                    } else {
                        // for case, sealed segment has loaded index for scalar field instead of raw data
Y
yah01 已提交
1453 1454 1455 1456 1457
                        auto& indexing = segment_.chunk_scalar_index<int32_t>(
                            field_id, chunk_id);
                        return [&indexing](int i) -> const number {
                            return indexing.Reverse_Lookup(i);
                        };
1458
                    }
1459 1460
                }
                case DataType::INT64: {
1461
                    if (chunk_id < data_barrier) {
Y
yah01 已提交
1462 1463 1464 1465 1466 1467
                        auto chunk_data =
                            segment_.chunk_data<int64_t>(field_id, chunk_id)
                                .data();
                        return [chunk_data](int i) -> const number {
                            return chunk_data[i];
                        };
1468 1469
                    } else {
                        // for case, sealed segment has loaded index for scalar field instead of raw data
Y
yah01 已提交
1470 1471 1472 1473 1474
                        auto& indexing = segment_.chunk_scalar_index<int64_t>(
                            field_id, chunk_id);
                        return [&indexing](int i) -> const number {
                            return indexing.Reverse_Lookup(i);
                        };
1475
                    }
1476 1477
                }
                case DataType::FLOAT: {
1478
                    if (chunk_id < data_barrier) {
Y
yah01 已提交
1479 1480 1481 1482 1483 1484
                        auto chunk_data =
                            segment_.chunk_data<float>(field_id, chunk_id)
                                .data();
                        return [chunk_data](int i) -> const number {
                            return chunk_data[i];
                        };
1485 1486
                    } else {
                        // for case, sealed segment has loaded index for scalar field instead of raw data
Y
yah01 已提交
1487 1488 1489 1490 1491
                        auto& indexing = segment_.chunk_scalar_index<float>(
                            field_id, chunk_id);
                        return [&indexing](int i) -> const number {
                            return indexing.Reverse_Lookup(i);
                        };
1492
                    }
1493 1494
                }
                case DataType::DOUBLE: {
1495
                    if (chunk_id < data_barrier) {
Y
yah01 已提交
1496 1497 1498 1499 1500 1501
                        auto chunk_data =
                            segment_.chunk_data<double>(field_id, chunk_id)
                                .data();
                        return [chunk_data](int i) -> const number {
                            return chunk_data[i];
                        };
1502 1503
                    } else {
                        // for case, sealed segment has loaded index for scalar field instead of raw data
Y
yah01 已提交
1504 1505 1506 1507 1508
                        auto& indexing = segment_.chunk_scalar_index<double>(
                            field_id, chunk_id);
                        return [&indexing](int i) -> const number {
                            return indexing.Reverse_Lookup(i);
                        };
1509
                    }
1510 1511
                }
                case DataType::VARCHAR: {
1512
                    if (chunk_id < data_barrier) {
Y
yah01 已提交
1513
                        if (segment_.type() == SegmentType::Growing) {
Y
yah01 已提交
1514 1515 1516 1517 1518 1519 1520
                            auto chunk_data =
                                segment_
                                    .chunk_data<std::string>(field_id, chunk_id)
                                    .data();
                            return [chunk_data](int i) -> const number {
                                return chunk_data[i];
                            };
Y
yah01 已提交
1521
                        } else {
Y
yah01 已提交
1522 1523 1524 1525 1526 1527 1528
                            auto chunk_data = segment_
                                                  .chunk_data<std::string_view>(
                                                      field_id, chunk_id)
                                                  .data();
                            return [chunk_data](int i) -> const number {
                                return std::string(chunk_data[i]);
                            };
Y
yah01 已提交
1529
                        }
1530 1531
                    } else {
                        // for case, sealed segment has loaded index for scalar field instead of raw data
Y
yah01 已提交
1532 1533 1534 1535 1536 1537
                        auto& indexing =
                            segment_.chunk_scalar_index<std::string>(field_id,
                                                                     chunk_id);
                        return [&indexing](int i) -> const number {
                            return indexing.Reverse_Lookup(i);
                        };
1538
                    }
1539 1540
                }
                default:
1541
                    PanicInfo(fmt::format("unsupported data type: {}", type));
1542 1543
            }
        };
Y
yah01 已提交
1544 1545 1546 1547
        auto left = getChunkData(
            expr.left_data_type_, expr.left_field_id_, left_data_barrier);
        auto right = getChunkData(
            expr.right_data_type_, expr.right_field_id_, right_data_barrier);
1548

1549
        BitsetType bitset(size);
1550
        for (int i = 0; i < size; ++i) {
Y
yah01 已提交
1551 1552
            bool is_in = boost::apply_visitor(
                Relational<decltype(op)>{}, left(i), right(i));
1553 1554 1555 1556
            bitset[i] = is_in;
        }
        bitsets.emplace_back(std::move(bitset));
    }
1557
    auto final_result = Assemble(bitsets);
Y
yah01 已提交
1558 1559
    AssertInfo(final_result.size() == row_count_,
               "[ExecExprVisitor]Size of results not equal row count");
1560
    return final_result;
1561 1562 1563 1564 1565
}

void
ExecExprVisitor::visit(CompareExpr& expr) {
    auto& schema = segment_.get_schema();
1566 1567
    auto& left_field_meta = schema[expr.left_field_id_];
    auto& right_field_meta = schema[expr.right_field_id_];
1568
    AssertInfo(expr.left_data_type_ == left_field_meta.get_data_type(),
1569 1570
               "[ExecExprVisitor]Left data type not equal to left field "
               "meta type");
1571 1572 1573
    AssertInfo(expr.right_data_type_ == right_field_meta.get_data_type(),
               "[ExecExprVisitor]right data type not equal to right field "
               "meta type");
1574

1575
    BitsetType res;
1576
    switch (expr.op_type_) {
1577
        case OpType::Equal: {
1578
            res = ExecCompareExprDispatcher(expr, std::equal_to<>{});
1579 1580 1581
            break;
        }
        case OpType::NotEqual: {
1582
            res = ExecCompareExprDispatcher(expr, std::not_equal_to<>{});
1583 1584 1585
            break;
        }
        case OpType::GreaterEqual: {
1586
            res = ExecCompareExprDispatcher(expr, std::greater_equal<>{});
1587 1588 1589
            break;
        }
        case OpType::GreaterThan: {
1590
            res = ExecCompareExprDispatcher(expr, std::greater<>{});
1591 1592 1593
            break;
        }
        case OpType::LessEqual: {
1594
            res = ExecCompareExprDispatcher(expr, std::less_equal<>{});
1595 1596 1597
            break;
        }
        case OpType::LessThan: {
1598
            res = ExecCompareExprDispatcher(expr, std::less<>{});
1599 1600
            break;
        }
1601
        case OpType::PrefixMatch: {
Y
yah01 已提交
1602 1603
            res =
                ExecCompareExprDispatcher(expr, MatchOp<OpType::PrefixMatch>{});
1604 1605 1606 1607
            break;
        }
            // case OpType::PostfixMatch: {
            // }
1608 1609 1610 1611
        default: {
            PanicInfo("unsupported optype");
        }
    }
Y
yah01 已提交
1612 1613
    AssertInfo(res.size() == row_count_,
               "[ExecExprVisitor]Size of results not equal row count");
1614
    bitset_opt_ = std::move(res);
1615 1616
}

S
sunby 已提交
1617 1618
template <typename T>
auto
1619
ExecExprVisitor::ExecTermVisitorImpl(TermExpr& expr_raw) -> BitsetType {
S
sunby 已提交
1620 1621
    auto& expr = static_cast<TermExprImpl<T>&>(expr_raw);
    auto& schema = segment_.get_schema();
1622
    auto primary_filed_id = schema.get_primary_field_id();
1623
    auto field_id = expr_raw.column_.field_id;
1624
    auto& field_meta = schema[field_id];
1625 1626

    bool use_pk_index = false;
1627
    if (primary_filed_id.has_value()) {
Y
yah01 已提交
1628 1629
        use_pk_index = primary_filed_id.value() == field_id &&
                       IsPrimaryKeyDataType(field_meta.get_data_type());
1630 1631 1632 1633
    }

    if (use_pk_index) {
        auto id_array = std::make_unique<IdArray>();
1634 1635 1636 1637 1638 1639 1640 1641 1642 1643 1644 1645 1646 1647 1648 1649 1650 1651
        switch (field_meta.get_data_type()) {
            case DataType::INT64: {
                auto dst_ids = id_array->mutable_int_id();
                for (const auto& id : expr.terms_) {
                    dst_ids->add_data((int64_t&)id);
                }
                break;
            }
            case DataType::VARCHAR: {
                auto dst_ids = id_array->mutable_str_id();
                for (const auto& id : expr.terms_) {
                    dst_ids->add_data((std::string&)id);
                }
                break;
            }
            default: {
                PanicInfo("unsupported type");
            }
1652
        }
1653

1654 1655 1656 1657 1658 1659
        auto [uids, seg_offsets] = segment_.search_ids(*id_array, timestamp_);
        BitsetType bitset(row_count_);
        for (const auto& offset : seg_offsets) {
            auto _offset = (int64_t)offset.get();
            bitset[_offset] = true;
        }
Y
yah01 已提交
1660 1661
        AssertInfo(bitset.size() == row_count_,
                   "[ExecExprVisitor]Size of results not equal row count");
1662 1663 1664
        return bitset;
    }

1665
    return ExecTermVisitorImplTemplate<T>(expr_raw);
S
sunby 已提交
1666 1667
}

1668 1669
template <>
auto
Y
yah01 已提交
1670 1671
ExecExprVisitor::ExecTermVisitorImpl<std::string>(TermExpr& expr_raw)
    -> BitsetType {
1672 1673 1674
    return ExecTermVisitorImplTemplate<std::string>(expr_raw);
}

Y
yah01 已提交
1675 1676
template <>
auto
Y
yah01 已提交
1677 1678
ExecExprVisitor::ExecTermVisitorImpl<std::string_view>(TermExpr& expr_raw)
    -> BitsetType {
Y
yah01 已提交
1679 1680 1681
    return ExecTermVisitorImplTemplate<std::string_view>(expr_raw);
}

1682 1683 1684
template <typename T>
auto
ExecExprVisitor::ExecTermVisitorImplTemplate(TermExpr& expr_raw) -> BitsetType {
Y
yah01 已提交
1685 1686 1687
    typedef std::
        conditional_t<std::is_same_v<T, std::string_view>, std::string, T>
            IndexInnerType;
Y
yah01 已提交
1688 1689
    using Index = index::ScalarIndex<IndexInnerType>;
    auto& expr = static_cast<TermExprImpl<IndexInnerType>&>(expr_raw);
Y
yah01 已提交
1690 1691
    const std::vector<IndexInnerType> terms(expr.terms_.begin(),
                                            expr.terms_.end());
1692 1693 1694
    auto n = terms.size();
    std::unordered_set<T> term_set(expr.terms_.begin(), expr.terms_.end());

Y
yah01 已提交
1695 1696 1697
    auto index_func = [&terms, n](Index* index) {
        return index->In(n, terms.data());
    };
1698
    auto elem_func = [&terms, &term_set](MayConstRef<T> x) {
1699 1700 1701 1702 1703
        //// terms has already been sorted.
        // return std::binary_search(terms.begin(), terms.end(), x);
        return term_set.find(x) != term_set.end();
    };

1704 1705
    return ExecRangeVisitorImpl<T>(
        expr.column_.field_id, index_func, elem_func);
1706 1707
}

1708 1709 1710
// TODO: bool is so ugly here.
template <>
auto
Y
yah01 已提交
1711 1712
ExecExprVisitor::ExecTermVisitorImplTemplate<bool>(TermExpr& expr_raw)
    -> BitsetType {
1713 1714 1715 1716 1717 1718 1719 1720 1721 1722 1723 1724 1725 1726 1727 1728 1729 1730
    using T = bool;
    auto& expr = static_cast<TermExprImpl<T>&>(expr_raw);
    using Index = index::ScalarIndex<T>;
    const auto& terms = expr.terms_;
    auto n = terms.size();
    std::unordered_set<T> term_set(expr.terms_.begin(), expr.terms_.end());

    auto index_func = [&terms, n](Index* index) {
        auto bool_arr_copy = new bool[terms.size()];
        int it = 0;
        for (auto elem : terms) {
            bool_arr_copy[it++] = elem;
        }
        auto bitset = index->In(n, bool_arr_copy);
        delete[] bool_arr_copy;
        return bitset;
    };

1731
    auto elem_func = [&terms, &term_set](MayConstRef<T> x) {
1732 1733 1734 1735 1736
        //// terms has already been sorted.
        // return std::binary_search(terms.begin(), terms.end(), x);
        return term_set.find(x) != term_set.end();
    };

1737 1738 1739 1740 1741 1742 1743 1744 1745 1746
    return ExecRangeVisitorImpl<T>(
        expr.column_.field_id, index_func, elem_func);
}

template <typename ExprValueType>
auto
ExecExprVisitor::ExecTermVisitorImplTemplateJson(TermExpr& expr_raw)
    -> BitsetType {
    using Index = index::ScalarIndex<milvus::Json>;
    auto& expr = static_cast<TermExprImpl<ExprValueType>&>(expr_raw);
1747
    auto pointer = milvus::Json::pointer(expr.column_.nested_path);
1748
    auto index_func = [](Index* index) { return TargetBitmap{}; };
1749 1750 1751 1752 1753 1754 1755 1756 1757 1758

    std::unordered_set<ExprValueType> term_set(expr.terms_.begin(),
                                               expr.terms_.end());

    if (term_set.empty()) {
        auto elem_func = [=](const milvus::Json& json) { return false; };
        return ExecRangeVisitorImpl<milvus::Json>(
            expr.column_.field_id, index_func, elem_func);
    }

1759
    auto elem_func = [&term_set, &pointer](const milvus::Json& json) {
1760 1761 1762 1763
        using GetType =
            std::conditional_t<std::is_same_v<ExprValueType, std::string>,
                               std::string_view,
                               ExprValueType>;
1764
        auto x = json.template at<GetType>(pointer);
1765
        if (x.error()) {
1766 1767 1768 1769 1770 1771 1772 1773 1774 1775 1776
            if constexpr (std::is_same_v<GetType, std::int64_t>) {
                auto x = json.template at<double>(pointer);
                if (x.error()) {
                    return false;
                }

                auto value = x.value();
                // if the term set is {1}, and the value is 1.1, we should not return true.
                return std::floor(value) == value &&
                       term_set.find(ExprValueType(value)) != term_set.end();
            }
1777 1778 1779 1780 1781 1782 1783
            return false;
        }
        return term_set.find(ExprValueType(x.value())) != term_set.end();
    };

    return ExecRangeVisitorImpl<milvus::Json>(
        expr.column_.field_id, index_func, elem_func);
1784 1785
}

S
sunby 已提交
1786 1787
void
ExecExprVisitor::visit(TermExpr& expr) {
1788 1789
    auto& field_meta = segment_.get_schema()[expr.column_.field_id];
    AssertInfo(expr.column_.data_type == field_meta.get_data_type(),
1790 1791
               "[ExecExprVisitor]DataType of expr isn't field_meta "
               "data type ");
1792
    BitsetType res;
1793
    switch (expr.column_.data_type) {
S
sunby 已提交
1794
        case DataType::BOOL: {
1795
            res = ExecTermVisitorImpl<bool>(expr);
S
sunby 已提交
1796 1797 1798
            break;
        }
        case DataType::INT8: {
1799
            res = ExecTermVisitorImpl<int8_t>(expr);
S
sunby 已提交
1800 1801 1802
            break;
        }
        case DataType::INT16: {
1803
            res = ExecTermVisitorImpl<int16_t>(expr);
S
sunby 已提交
1804 1805 1806
            break;
        }
        case DataType::INT32: {
1807
            res = ExecTermVisitorImpl<int32_t>(expr);
S
sunby 已提交
1808 1809 1810
            break;
        }
        case DataType::INT64: {
1811
            res = ExecTermVisitorImpl<int64_t>(expr);
S
sunby 已提交
1812 1813 1814
            break;
        }
        case DataType::FLOAT: {
1815
            res = ExecTermVisitorImpl<float>(expr);
S
sunby 已提交
1816 1817 1818
            break;
        }
        case DataType::DOUBLE: {
1819
            res = ExecTermVisitorImpl<double>(expr);
S
sunby 已提交
1820 1821
            break;
        }
1822
        case DataType::VARCHAR: {
Y
yah01 已提交
1823 1824 1825 1826 1827
            if (segment_.type() == SegmentType::Growing) {
                res = ExecTermVisitorImpl<std::string>(expr);
            } else {
                res = ExecTermVisitorImpl<std::string_view>(expr);
            }
1828 1829
            break;
        }
1830 1831 1832 1833 1834 1835 1836 1837 1838 1839 1840 1841 1842 1843 1844 1845 1846 1847 1848 1849 1850 1851 1852
        case DataType::JSON: {
            switch (expr.val_case_) {
                case proto::plan::GenericValue::ValCase::kBoolVal:
                    res = ExecTermVisitorImplTemplateJson<bool>(expr);
                    break;
                case proto::plan::GenericValue::ValCase::kInt64Val:
                    res = ExecTermVisitorImplTemplateJson<int64_t>(expr);
                    break;
                case proto::plan::GenericValue::ValCase::kFloatVal:
                    res = ExecTermVisitorImplTemplateJson<double>(expr);
                    break;
                case proto::plan::GenericValue::ValCase::kStringVal:
                    res = ExecTermVisitorImplTemplateJson<std::string>(expr);
                    break;
                case proto::plan::GenericValue::ValCase::VAL_NOT_SET:
                    res = ExecTermVisitorImplTemplateJson<bool>(expr);
                    break;
                default:
                    PanicInfo(
                        fmt::format("unknown data type: {}", expr.val_case_));
            }
            break;
        }
S
sunby 已提交
1853
        default:
1854 1855
            PanicInfo(fmt::format("unsupported data type: {}",
                                  expr.column_.data_type));
S
sunby 已提交
1856
    }
Y
yah01 已提交
1857 1858
    AssertInfo(res.size() == row_count_,
               "[ExecExprVisitor]Size of results not equal row count");
1859
    bitset_opt_ = std::move(res);
S
sunby 已提交
1860
}
1861 1862 1863 1864 1865 1866 1867

void
ExecExprVisitor::visit(ExistsExpr& expr) {
    auto& field_meta = segment_.get_schema()[expr.column_.field_id];
    AssertInfo(expr.column_.data_type == field_meta.get_data_type(),
               "[ExecExprVisitor]DataType of expr isn't field_meta data type");
    BitsetType res;
1868
    auto pointer = milvus::Json::pointer(expr.column_.nested_path);
1869 1870 1871
    switch (expr.column_.data_type) {
        case DataType::JSON: {
            using Index = index::ScalarIndex<milvus::Json>;
1872 1873 1874
            auto index_func = [&](Index* index) { return TargetBitmap{}; };
            auto elem_func = [&](const milvus::Json& json) {
                auto x = json.exist(pointer);
1875 1876 1877 1878 1879 1880 1881 1882 1883 1884 1885 1886 1887 1888 1889
                return x;
            };
            res = ExecRangeVisitorImpl<milvus::Json>(
                expr.column_.field_id, index_func, elem_func);
            break;
        }
        default:
            PanicInfo(fmt::format("unsupported data type {}",
                                  expr.column_.data_type));
    }
    AssertInfo(res.size() == row_count_,
               "[ExecExprVisitor]Size of results not equal row count");
    bitset_opt_ = std::move(res);
}

N
neza2017 已提交
1890
}  // namespace milvus::query