ExecExprVisitor.cpp 56.4 KB
Newer Older
F
FluorineDog 已提交
1 2 3 4 5 6 7 8 9 10 11
// Copyright (C) 2019-2020 Zilliz. All rights reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except in compliance
// with the License. You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software distributed under the License
// is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express
// or implied. See the License for the specific language governing permissions and limitations under the License

Y
yah01 已提交
12 13 14
#include "query/generated/ExecExprVisitor.h"

#include <boost/variant.hpp>
15
#include <deque>
N
neza2017 已提交
16
#include <optional>
17 18 19
#include <string>
#include <string_view>
#include <type_traits>
20
#include <unordered_set>
21 22
#include <utility>

23 24 25 26
#include "common/Json.h"
#include "common/Types.h"
#include "exceptions/EasyAssert.h"
#include "pb/plan.pb.h"
G
GuoRentong 已提交
27
#include "query/ExprImpl.h"
28
#include "query/Relational.h"
Y
yah01 已提交
29 30
#include "query/Utils.h"
#include "segcore/SegmentGrowingImpl.h"
31
#include "simdjson/error.h"
N
neza2017 已提交
32 33 34 35 36 37 38

namespace milvus::query {
// THIS CONTAINS EXTRA BODY FOR VISITOR
// WILL BE USED BY GENERATOR
namespace impl {
class ExecExprVisitor : ExprVisitor {
 public:
Y
yah01 已提交
39 40 41
    ExecExprVisitor(const segcore::SegmentInternalInterface& segment,
                    int64_t row_count,
                    Timestamp timestamp)
42
        : segment_(segment), row_count_(row_count), timestamp_(timestamp) {
N
neza2017 已提交
43
    }
44 45

    BitsetType
N
neza2017 已提交
46
    call_child(Expr& expr) {
Y
yah01 已提交
47 48
        AssertInfo(!bitset_opt_.has_value(),
                   "[ExecExprVisitor]Bitset already has value before accept");
N
neza2017 已提交
49
        expr.accept(*this);
Y
yah01 已提交
50 51
        AssertInfo(bitset_opt_.has_value(),
                   "[ExecExprVisitor]Bitset doesn't have value after accept");
52 53
        auto res = std::move(bitset_opt_);
        bitset_opt_ = std::nullopt;
54
        return std::move(res.value());
N
neza2017 已提交
55 56
    }

G
GuoRentong 已提交
57
 public:
F
FluorineDog 已提交
58
    template <typename T, typename IndexFunc, typename ElementFunc>
G
GuoRentong 已提交
59
    auto
Y
yah01 已提交
60 61 62
    ExecRangeVisitorImpl(FieldId field_id,
                         IndexFunc func,
                         ElementFunc element_func) -> BitsetType;
G
GuoRentong 已提交
63 64 65

    template <typename T>
    auto
66
    ExecUnaryRangeVisitorDispatcher(UnaryRangeExpr& expr_raw) -> BitsetType;
67

68 69
    template <typename T>
    auto
Y
yah01 已提交
70 71
    ExecBinaryArithOpEvalRangeVisitorDispatcher(
        BinaryArithOpEvalRangeExpr& expr_raw) -> BitsetType;
72

73 74
    template <typename T>
    auto
75
    ExecBinaryRangeVisitorDispatcher(BinaryRangeExpr& expr_raw) -> BitsetType;
G
GuoRentong 已提交
76

S
sunby 已提交
77 78
    template <typename T>
    auto
79
    ExecTermVisitorImpl(TermExpr& expr_raw) -> BitsetType;
S
sunby 已提交
80

81 82 83 84
    template <typename T>
    auto
    ExecTermVisitorImplTemplate(TermExpr& expr_raw) -> BitsetType;

85 86
    template <typename CmpFunc>
    auto
Y
yah01 已提交
87 88
    ExecCompareExprDispatcher(CompareExpr& expr, CmpFunc cmp_func)
        -> BitsetType;
89

N
neza2017 已提交
90
 private:
91 92
    const segcore::SegmentInternalInterface& segment_;
    int64_t row_count_;
93
    Timestamp timestamp_;
94
    BitsetTypeOpt bitset_opt_;
N
neza2017 已提交
95 96 97 98
};
}  // namespace impl

void
F
FluorineDog 已提交
99 100
ExecExprVisitor::visit(LogicalUnaryExpr& expr) {
    using OpType = LogicalUnaryExpr::OpType;
101
    auto child_res = call_child(*expr.child_);
102
    BitsetType res = std::move(child_res);
103 104 105 106 107 108 109
    switch (expr.op_type_) {
        case OpType::LogicalNot: {
            res.flip();
            break;
        }
        default: {
            PanicInfo("Invalid Unary Op");
F
FluorineDog 已提交
110 111
        }
    }
Y
yah01 已提交
112 113
    AssertInfo(res.size() == row_count_,
               "[ExecExprVisitor]Size of results not equal row count");
114
    bitset_opt_ = std::move(res);
N
neza2017 已提交
115 116 117
}

void
F
FluorineDog 已提交
118 119
ExecExprVisitor::visit(LogicalBinaryExpr& expr) {
    using OpType = LogicalBinaryExpr::OpType;
F
FluorineDog 已提交
120 121
    auto left = call_child(*expr.left_);
    auto right = call_child(*expr.right_);
Y
yah01 已提交
122 123
    AssertInfo(left.size() == right.size(),
               "[ExecExprVisitor]Left size not equal to right size");
124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145
    auto res = std::move(left);
    switch (expr.op_type_) {
        case OpType::LogicalAnd: {
            res &= right;
            break;
        }
        case OpType::LogicalOr: {
            res |= right;
            break;
        }
        case OpType::LogicalXor: {
            res ^= right;
            break;
        }
        case OpType::LogicalMinus: {
            res -= right;
            break;
        }
        default: {
            PanicInfo("Invalid Binary Op");
        }
    }
Y
yah01 已提交
146 147
    AssertInfo(res.size() == row_count_,
               "[ExecExprVisitor]Size of results not equal row count");
148
    bitset_opt_ = std::move(res);
149
}
F
FluorineDog 已提交
150

151
static auto
152 153
Assemble(const std::deque<BitsetType>& srcs) -> BitsetType {
    BitsetType res;
154 155 156 157 158 159 160 161 162 163 164

    int64_t total_size = 0;
    for (auto& chunk : srcs) {
        total_size += chunk.size();
    }
    res.resize(total_size);

    int64_t counter = 0;
    for (auto& chunk : srcs) {
        for (int64_t i = 0; i < chunk.size(); ++i) {
            res[counter + i] = chunk[i];
F
FluorineDog 已提交
165
        }
166
        counter += chunk.size();
F
FluorineDog 已提交
167
    }
168
    return res;
N
neza2017 已提交
169 170
}

F
FluorineDog 已提交
171
template <typename T, typename IndexFunc, typename ElementFunc>
G
GuoRentong 已提交
172
auto
Y
yah01 已提交
173 174 175
ExecExprVisitor::ExecRangeVisitorImpl(FieldId field_id,
                                      IndexFunc index_func,
                                      ElementFunc element_func) -> BitsetType {
G
GuoRentong 已提交
176
    auto& schema = segment_.get_schema();
177 178
    auto& field_meta = schema[field_id];
    auto indexing_barrier = segment_.num_chunk_index(field_id);
B
BossZou 已提交
179 180
    auto size_per_chunk = segment_.size_per_chunk();
    auto num_chunk = upper_div(row_count_, size_per_chunk);
181
    std::deque<BitsetType> results;
182

Y
yah01 已提交
183 184 185
    typedef std::
        conditional_t<std::is_same_v<T, std::string_view>, std::string, T>
            IndexInnerType;
Y
yah01 已提交
186
    using Index = index::ScalarIndex<IndexInnerType>;
F
FluorineDog 已提交
187
    for (auto chunk_id = 0; chunk_id < indexing_barrier; ++chunk_id) {
Y
yah01 已提交
188 189
        const Index& indexing =
            segment_.chunk_scalar_index<IndexInnerType>(field_id, chunk_id);
190 191 192
        // NOTE: knowhere is not const-ready
        // This is a dirty workaround
        auto data = index_func(const_cast<Index*>(&indexing));
Y
yah01 已提交
193 194
        AssertInfo(data->size() == size_per_chunk,
                   "[ExecExprVisitor]Data size not equal to size_per_chunk");
195
        results.emplace_back(std::move(*data));
F
FluorineDog 已提交
196
    }
197
    for (auto chunk_id = indexing_barrier; chunk_id < num_chunk; ++chunk_id) {
Y
yah01 已提交
198 199 200
        auto this_size = chunk_id == num_chunk - 1
                             ? row_count_ - chunk_id * size_per_chunk
                             : size_per_chunk;
201
        BitsetType result(this_size);
202
        auto chunk = segment_.chunk_data<T>(field_id, chunk_id);
G
GuoRentong 已提交
203
        const T* data = chunk.data();
204
        for (int index = 0; index < this_size; ++index) {
205
            result[index] = element_func(data[index]);
G
GuoRentong 已提交
206
        }
Y
yah01 已提交
207

208
        results.emplace_back(std::move(result));
G
GuoRentong 已提交
209
    }
210
    auto final_result = Assemble(results);
Y
yah01 已提交
211 212
    AssertInfo(final_result.size() == row_count_,
               "[ExecExprVisitor]Final result size not equal to row count");
213
    return final_result;
G
GuoRentong 已提交
214
}
215

216
template <typename T, typename IndexFunc, typename ElementFunc>
217
auto
Y
yah01 已提交
218 219 220
ExecExprVisitor::ExecDataRangeVisitorImpl(FieldId field_id,
                                          IndexFunc index_func,
                                          ElementFunc element_func)
221
    -> BitsetType {
222
    auto& schema = segment_.get_schema();
223
    auto& field_meta = schema[field_id];
224 225
    auto size_per_chunk = segment_.size_per_chunk();
    auto num_chunk = upper_div(row_count_, size_per_chunk);
226 227 228 229
    auto indexing_barrier = segment_.num_chunk_index(field_id);
    auto data_barrier = segment_.num_chunk_data(field_id);
    AssertInfo(std::max(data_barrier, indexing_barrier) == num_chunk,
               "max(data_barrier, index_barrier) not equal to num_chunk");
230 231
    std::deque<BitsetType> results;

232 233 234 235 236
    // for growing segment, indexing_barrier will always less than data_barrier
    // so growing segment will always execute expr plan using raw data
    // if sealed segment has loaded raw data on this field, then index_barrier = 0 and data_barrier = 1
    // in this case, sealed segment execute expr plan using raw data
    for (auto chunk_id = 0; chunk_id < data_barrier; ++chunk_id) {
Y
yah01 已提交
237 238 239
        auto this_size = chunk_id == num_chunk - 1
                             ? row_count_ - chunk_id * size_per_chunk
                             : size_per_chunk;
240
        BitsetType result(this_size);
241
        auto chunk = segment_.chunk_data<T>(field_id, chunk_id);
242 243 244 245
        const T* data = chunk.data();
        for (int index = 0; index < this_size; ++index) {
            result[index] = element_func(data[index]);
        }
Y
yah01 已提交
246 247 248
        AssertInfo(
            result.size() == this_size,
            "[ExecExprVisitor]Chunk result size not equal to expected size");
249 250
        results.emplace_back(std::move(result));
    }
251 252 253

    // if sealed segment has loaded scalar index for this field, then index_barrier = 1 and data_barrier = 0
    // in this case, sealed segment execute expr plan using scalar index
Y
yah01 已提交
254 255 256
    typedef std::
        conditional_t<std::is_same_v<T, std::string_view>, std::string, T>
            IndexInnerType;
Y
yah01 已提交
257
    using Index = index::ScalarIndex<IndexInnerType>;
Y
yah01 已提交
258 259 260 261
    for (auto chunk_id = data_barrier; chunk_id < indexing_barrier;
         ++chunk_id) {
        auto& indexing =
            segment_.chunk_scalar_index<IndexInnerType>(field_id, chunk_id);
262 263 264 265 266 267 268 269
        auto this_size = const_cast<Index*>(&indexing)->Count();
        BitsetType result(this_size);
        for (int offset = 0; offset < this_size; ++offset) {
            result[offset] = index_func(const_cast<Index*>(&indexing), offset);
        }
        results.emplace_back(std::move(result));
    }

270
    auto final_result = Assemble(results);
Y
yah01 已提交
271 272
    AssertInfo(final_result.size() == row_count_,
               "[ExecExprVisitor]Final result size not equal to row count");
273 274 275
    return final_result;
}

G
GuoRentong 已提交
276 277 278 279
#pragma clang diagnostic push
#pragma ide diagnostic ignored "Simplify"
template <typename T>
auto
Y
yah01 已提交
280 281 282 283 284
ExecExprVisitor::ExecUnaryRangeVisitorDispatcher(UnaryRangeExpr& expr_raw)
    -> BitsetType {
    typedef std::
        conditional_t<std::is_same_v<T, std::string_view>, std::string, T>
            IndexInnerType;
Y
yah01 已提交
285 286 287
    using Index = index::ScalarIndex<IndexInnerType>;
    auto& expr = static_cast<UnaryRangeExprImpl<IndexInnerType>&>(expr_raw);

288
    auto op = expr.op_type_;
Y
yah01 已提交
289
    auto val = IndexInnerType(expr.value_);
290 291
    switch (op) {
        case OpType::Equal: {
Y
yah01 已提交
292 293 294
            auto index_func = [val](Index* index) {
                return index->In(1, &val);
            };
295
            auto elem_func = [val](T x) { return (x == val); };
Y
yah01 已提交
296 297
            return ExecRangeVisitorImpl<T>(
                expr.field_id_, index_func, elem_func);
298 299
        }
        case OpType::NotEqual: {
Y
yah01 已提交
300 301 302
            auto index_func = [val](Index* index) {
                return index->NotIn(1, &val);
            };
303
            auto elem_func = [val](T x) { return (x != val); };
Y
yah01 已提交
304 305
            return ExecRangeVisitorImpl<T>(
                expr.field_id_, index_func, elem_func);
306 307
        }
        case OpType::GreaterEqual: {
Y
yah01 已提交
308 309 310
            auto index_func = [val](Index* index) {
                return index->Range(val, OpType::GreaterEqual);
            };
311
            auto elem_func = [val](T x) { return (x >= val); };
Y
yah01 已提交
312 313
            return ExecRangeVisitorImpl<T>(
                expr.field_id_, index_func, elem_func);
G
GuoRentong 已提交
314
        }
315
        case OpType::GreaterThan: {
Y
yah01 已提交
316 317 318
            auto index_func = [val](Index* index) {
                return index->Range(val, OpType::GreaterThan);
            };
319
            auto elem_func = [val](T x) { return (x > val); };
Y
yah01 已提交
320 321
            return ExecRangeVisitorImpl<T>(
                expr.field_id_, index_func, elem_func);
322 323
        }
        case OpType::LessEqual: {
Y
yah01 已提交
324 325 326
            auto index_func = [val](Index* index) {
                return index->Range(val, OpType::LessEqual);
            };
327
            auto elem_func = [val](T x) { return (x <= val); };
Y
yah01 已提交
328 329
            return ExecRangeVisitorImpl<T>(
                expr.field_id_, index_func, elem_func);
330 331
        }
        case OpType::LessThan: {
Y
yah01 已提交
332 333 334
            auto index_func = [val](Index* index) {
                return index->Range(val, OpType::LessThan);
            };
335
            auto elem_func = [val](T x) { return (x < val); };
Y
yah01 已提交
336 337
            return ExecRangeVisitorImpl<T>(
                expr.field_id_, index_func, elem_func);
338 339 340
        }
        case OpType::PrefixMatch: {
            auto index_func = [val](Index* index) {
P
presburger 已提交
341
                auto dataset = std::make_unique<Dataset>();
342 343
                dataset->Set(milvus::index::OPERATOR_TYPE, OpType::PrefixMatch);
                dataset->Set(milvus::index::PREFIX_VALUE, val);
344 345 346
                return index->Query(std::move(dataset));
            };
            auto elem_func = [val, op](T x) { return Match(x, val, op); };
Y
yah01 已提交
347 348
            return ExecRangeVisitorImpl<T>(
                expr.field_id_, index_func, elem_func);
349 350
        }
        // TODO: PostfixMatch
351
        default: {
G
GuoRentong 已提交
352 353
            PanicInfo("unsupported range node");
        }
354 355 356 357
    }
}
#pragma clang diagnostic pop

358 359 360 361
#pragma clang diagnostic push
#pragma ide diagnostic ignored "Simplify"
template <typename T>
auto
Y
yah01 已提交
362 363
ExecExprVisitor::ExecBinaryArithOpEvalRangeVisitorDispatcher(
    BinaryArithOpEvalRangeExpr& expr_raw) -> BitsetType {
364
    auto& expr = static_cast<BinaryArithOpEvalRangeExprImpl<T>&>(expr_raw);
365
    using Index = index::ScalarIndex<T>;
366 367 368 369
    auto arith_op = expr.arith_op_;
    auto right_operand = expr.right_operand_;
    auto op = expr.op_type_;
    auto val = expr.value_;
370
    auto& nested_path = expr.column_.nested_path;
371 372 373 374 375

    switch (op) {
        case OpType::Equal: {
            switch (arith_op) {
                case ArithOpType::Add: {
Y
yah01 已提交
376 377
                    auto index_func = [val, right_operand](Index* index,
                                                           size_t offset) {
378 379 380
                        auto x = index->Reverse_Lookup(offset);
                        return (x + right_operand) == val;
                    };
381 382 383
                    auto elem_func = [val, right_operand, &nested_path](T x) {
                        // visit the nested field
                        // now it must be Json
Y
yah01 已提交
384 385 386
                        return ((x + right_operand) == val);
                    };
                    return ExecDataRangeVisitorImpl<T>(
387
                        expr.column_.field_id, index_func, elem_func);
388 389
                }
                case ArithOpType::Sub: {
Y
yah01 已提交
390 391
                    auto index_func = [val, right_operand](Index* index,
                                                           size_t offset) {
392 393 394
                        auto x = index->Reverse_Lookup(offset);
                        return (x - right_operand) == val;
                    };
Y
yah01 已提交
395 396 397 398
                    auto elem_func = [val, right_operand](T x) {
                        return ((x - right_operand) == val);
                    };
                    return ExecDataRangeVisitorImpl<T>(
399
                        expr.column_.field_id, index_func, elem_func);
400 401
                }
                case ArithOpType::Mul: {
Y
yah01 已提交
402 403
                    auto index_func = [val, right_operand](Index* index,
                                                           size_t offset) {
404 405 406
                        auto x = index->Reverse_Lookup(offset);
                        return (x * right_operand) == val;
                    };
Y
yah01 已提交
407 408 409 410
                    auto elem_func = [val, right_operand](T x) {
                        return ((x * right_operand) == val);
                    };
                    return ExecDataRangeVisitorImpl<T>(
411
                        expr.column_.field_id, index_func, elem_func);
412 413
                }
                case ArithOpType::Div: {
Y
yah01 已提交
414 415
                    auto index_func = [val, right_operand](Index* index,
                                                           size_t offset) {
416 417 418
                        auto x = index->Reverse_Lookup(offset);
                        return (x / right_operand) == val;
                    };
Y
yah01 已提交
419 420 421 422
                    auto elem_func = [val, right_operand](T x) {
                        return ((x / right_operand) == val);
                    };
                    return ExecDataRangeVisitorImpl<T>(
423
                        expr.column_.field_id, index_func, elem_func);
424 425
                }
                case ArithOpType::Mod: {
Y
yah01 已提交
426 427
                    auto index_func = [val, right_operand](Index* index,
                                                           size_t offset) {
428 429 430
                        auto x = index->Reverse_Lookup(offset);
                        return static_cast<T>(fmod(x, right_operand)) == val;
                    };
431 432 433
                    auto elem_func = [val, right_operand](T x) {
                        return (static_cast<T>(fmod(x, right_operand)) == val);
                    };
Y
yah01 已提交
434
                    return ExecDataRangeVisitorImpl<T>(
435
                        expr.column_.field_id, index_func, elem_func);
436 437 438 439 440 441 442 443 444
                }
                default: {
                    PanicInfo("unsupported arithmetic operation");
                }
            }
        }
        case OpType::NotEqual: {
            switch (arith_op) {
                case ArithOpType::Add: {
Y
yah01 已提交
445 446
                    auto index_func = [val, right_operand](Index* index,
                                                           size_t offset) {
447 448 449
                        auto x = index->Reverse_Lookup(offset);
                        return (x + right_operand) != val;
                    };
Y
yah01 已提交
450 451 452 453
                    auto elem_func = [val, right_operand](T x) {
                        return ((x + right_operand) != val);
                    };
                    return ExecDataRangeVisitorImpl<T>(
454
                        expr.column_.field_id, index_func, elem_func);
455 456
                }
                case ArithOpType::Sub: {
Y
yah01 已提交
457 458
                    auto index_func = [val, right_operand](Index* index,
                                                           size_t offset) {
459 460 461
                        auto x = index->Reverse_Lookup(offset);
                        return (x - right_operand) != val;
                    };
Y
yah01 已提交
462 463 464 465
                    auto elem_func = [val, right_operand](T x) {
                        return ((x - right_operand) != val);
                    };
                    return ExecDataRangeVisitorImpl<T>(
466
                        expr.column_.field_id, index_func, elem_func);
467 468
                }
                case ArithOpType::Mul: {
Y
yah01 已提交
469 470
                    auto index_func = [val, right_operand](Index* index,
                                                           size_t offset) {
471 472 473
                        auto x = index->Reverse_Lookup(offset);
                        return (x * right_operand) != val;
                    };
Y
yah01 已提交
474 475 476 477
                    auto elem_func = [val, right_operand](T x) {
                        return ((x * right_operand) != val);
                    };
                    return ExecDataRangeVisitorImpl<T>(
478
                        expr.column_.field_id, index_func, elem_func);
479 480
                }
                case ArithOpType::Div: {
Y
yah01 已提交
481 482
                    auto index_func = [val, right_operand](Index* index,
                                                           size_t offset) {
483 484 485
                        auto x = index->Reverse_Lookup(offset);
                        return (x / right_operand) != val;
                    };
Y
yah01 已提交
486 487 488 489
                    auto elem_func = [val, right_operand](T x) {
                        return ((x / right_operand) != val);
                    };
                    return ExecDataRangeVisitorImpl<T>(
490
                        expr.column_.field_id, index_func, elem_func);
491 492
                }
                case ArithOpType::Mod: {
Y
yah01 已提交
493 494
                    auto index_func = [val, right_operand](Index* index,
                                                           size_t offset) {
495 496 497
                        auto x = index->Reverse_Lookup(offset);
                        return static_cast<T>(fmod(x, right_operand)) != val;
                    };
498 499 500
                    auto elem_func = [val, right_operand](T x) {
                        return (static_cast<T>(fmod(x, right_operand)) != val);
                    };
Y
yah01 已提交
501
                    return ExecDataRangeVisitorImpl<T>(
502
                        expr.column_.field_id, index_func, elem_func);
503 504 505 506 507 508 509 510 511 512 513 514 515
                }
                default: {
                    PanicInfo("unsupported arithmetic operation");
                }
            }
        }
        default: {
            PanicInfo("unsupported range node with arithmetic operation");
        }
    }
}
#pragma clang diagnostic pop

516 517 518 519 520 521 522 523 524 525 526 527 528 529 530 531 532 533 534 535 536 537 538 539 540 541 542 543 544 545 546 547 548 549 550 551 552 553 554 555 556 557 558 559 560 561 562 563 564 565 566 567 568 569 570 571 572 573 574 575 576 577 578 579 580 581 582 583 584 585 586 587 588 589 590 591 592 593 594 595 596 597 598 599 600 601 602 603 604 605 606 607 608 609 610 611 612 613 614 615 616 617 618 619 620 621 622 623 624 625 626 627 628 629 630 631 632 633 634 635 636 637 638 639 640 641 642 643 644 645 646 647 648 649 650 651 652 653 654 655 656 657 658 659 660 661 662 663 664 665 666 667 668 669 670 671 672 673 674 675 676 677 678 679 680 681 682 683 684 685 686
template <typename ExprValueType>
auto
ExecExprVisitor::ExecBinaryArithOpEvalRangeVisitorDispatcherJson(
    BinaryArithOpEvalRangeExpr& expr_raw) -> BitsetType {
    auto& expr =
        static_cast<BinaryArithOpEvalRangeExprImpl<ExprValueType>&>(expr_raw);
    using Index = index::ScalarIndex<milvus::Json>;
    using GetType =
        std::conditional_t<std::is_same_v<ExprValueType, std::string>,
                           std::string_view,
                           ExprValueType>;

    auto arith_op = expr.arith_op_;
    auto right_operand = expr.right_operand_;
    auto op = expr.op_type_;
    auto val = expr.value_;
    auto& nested_path = expr.column_.nested_path;

    switch (op) {
        case OpType::Equal: {
            switch (arith_op) {
                case ArithOpType::Add: {
                    auto index_func = [val, right_operand](Index* index,
                                                           size_t offset) {
                        return false;
                    };
                    auto elem_func = [&](const milvus::Json& json) {
                        auto x = json.template at<GetType>(nested_path);
                        return !x.error() &&
                               ((x.value() + right_operand) == val);
                    };
                    return ExecDataRangeVisitorImpl<milvus::Json>(
                        expr.column_.field_id, index_func, elem_func);
                }
                case ArithOpType::Sub: {
                    auto index_func = [val, right_operand](Index* index,
                                                           size_t offset) {
                        return false;
                    };
                    auto elem_func = [&](const milvus::Json& json) {
                        auto x = json.template at<GetType>(nested_path);
                        return !x.error() &&
                               ((x.value() - right_operand) == val);
                    };
                    return ExecDataRangeVisitorImpl<milvus::Json>(
                        expr.column_.field_id, index_func, elem_func);
                }
                case ArithOpType::Mul: {
                    auto index_func = [val, right_operand](Index* index,
                                                           size_t offset) {
                        return false;
                    };
                    auto elem_func = [&](const milvus::Json& json) {
                        auto x = json.template at<GetType>(nested_path);
                        return !x.error() &&
                               ((x.value() * right_operand) == val);
                    };
                    return ExecDataRangeVisitorImpl<milvus::Json>(
                        expr.column_.field_id, index_func, elem_func);
                }
                case ArithOpType::Div: {
                    auto index_func = [val, right_operand](Index* index,
                                                           size_t offset) {
                        return false;
                    };
                    auto elem_func = [&](const milvus::Json& json) {
                        auto x = json.template at<GetType>(nested_path);
                        return !x.error() &&
                               ((x.value() / right_operand) == val);
                    };
                    return ExecDataRangeVisitorImpl<milvus::Json>(
                        expr.column_.field_id, index_func, elem_func);
                }
                case ArithOpType::Mod: {
                    auto index_func = [val, right_operand](Index* index,
                                                           size_t offset) {
                        return false;
                    };
                    auto elem_func = [&](const milvus::Json& json) {
                        auto x = json.template at<GetType>(nested_path);
                        return !x.error() &&
                               (static_cast<ExprValueType>(
                                    fmod(x.value(), right_operand)) == val);
                    };
                    return ExecDataRangeVisitorImpl<milvus::Json>(
                        expr.column_.field_id, index_func, elem_func);
                }
                default: {
                    PanicInfo("unsupported arithmetic operation");
                }
            }
        }
        case OpType::NotEqual: {
            switch (arith_op) {
                case ArithOpType::Add: {
                    auto index_func = [val, right_operand](Index* index,
                                                           size_t offset) {
                        return false;
                    };
                    auto elem_func = [&](const milvus::Json& json) {
                        auto x = json.template at<GetType>(nested_path);
                        return x.error() ||
                               ((x.value() + right_operand) != val);
                    };
                    return ExecDataRangeVisitorImpl<milvus::Json>(
                        expr.column_.field_id, index_func, elem_func);
                }
                case ArithOpType::Sub: {
                    auto index_func = [val, right_operand](Index* index,
                                                           size_t offset) {
                        return false;
                    };
                    auto elem_func = [&](const milvus::Json& json) {
                        auto x = json.template at<GetType>(nested_path);
                        return x.error() ||
                               ((x.value() - right_operand) != val);
                    };
                    return ExecDataRangeVisitorImpl<milvus::Json>(
                        expr.column_.field_id, index_func, elem_func);
                }
                case ArithOpType::Mul: {
                    auto index_func = [val, right_operand](Index* index,
                                                           size_t offset) {
                        return false;
                    };
                    auto elem_func = [&](const milvus::Json& json) {
                        auto x = json.template at<GetType>(nested_path);
                        return x.error() ||
                               ((x.value() * right_operand) != val);
                    };
                    return ExecDataRangeVisitorImpl<milvus::Json>(
                        expr.column_.field_id, index_func, elem_func);
                }
                case ArithOpType::Div: {
                    auto index_func = [val, right_operand](Index* index,
                                                           size_t offset) {
                        return false;
                    };
                    auto elem_func = [&](const milvus::Json& json) {
                        auto x = json.template at<GetType>(nested_path);
                        return x.error() ||
                               ((x.value() / right_operand) != val);
                    };
                    return ExecDataRangeVisitorImpl<milvus::Json>(
                        expr.column_.field_id, index_func, elem_func);
                }
                case ArithOpType::Mod: {
                    auto index_func = [val, right_operand](Index* index,
                                                           size_t offset) {
                        return false;
                    };
                    auto elem_func = [&](const milvus::Json& json) {
                        auto x = json.template at<GetType>(nested_path);
                        return x.error() ||
                               (static_cast<ExprValueType>(
                                    fmod(x.value(), right_operand)) != val);
                    };
                    return ExecDataRangeVisitorImpl<milvus::Json>(
                        expr.column_.field_id, index_func, elem_func);
                }
                default: {
                    PanicInfo("unsupported arithmetic operation");
                }
            }
        }
        default: {
            PanicInfo("unsupported range node with arithmetic operation");
        }
    }
}

687 688 689 690
#pragma clang diagnostic push
#pragma ide diagnostic ignored "Simplify"
template <typename T>
auto
Y
yah01 已提交
691 692 693 694 695
ExecExprVisitor::ExecBinaryRangeVisitorDispatcher(BinaryRangeExpr& expr_raw)
    -> BitsetType {
    typedef std::
        conditional_t<std::is_same_v<T, std::string_view>, std::string, T>
            IndexInnerType;
Y
yah01 已提交
696 697 698
    using Index = index::ScalarIndex<IndexInnerType>;
    auto& expr = static_cast<BinaryRangeExprImpl<IndexInnerType>&>(expr_raw);

699 700
    bool lower_inclusive = expr.lower_inclusive_;
    bool upper_inclusive = expr.upper_inclusive_;
701 702
    IndexInnerType val1 = expr.lower_value_;
    IndexInnerType val2 = expr.upper_value_;
703

Y
yah01 已提交
704 705 706
    auto index_func = [=](Index* index) {
        return index->Range(val1, lower_inclusive, val2, upper_inclusive);
    };
707 708
    if (lower_inclusive && upper_inclusive) {
        auto elem_func = [val1, val2](T x) { return (val1 <= x && x <= val2); };
709 710
        return ExecRangeVisitorImpl<T>(
            expr.column_.field_id, index_func, elem_func);
711 712
    } else if (lower_inclusive && !upper_inclusive) {
        auto elem_func = [val1, val2](T x) { return (val1 <= x && x < val2); };
713 714
        return ExecRangeVisitorImpl<T>(
            expr.column_.field_id, index_func, elem_func);
715 716
    } else if (!lower_inclusive && upper_inclusive) {
        auto elem_func = [val1, val2](T x) { return (val1 < x && x <= val2); };
717 718
        return ExecRangeVisitorImpl<T>(
            expr.column_.field_id, index_func, elem_func);
G
GuoRentong 已提交
719
    } else {
720
        auto elem_func = [val1, val2](T x) { return (val1 < x && x < val2); };
721 722
        return ExecRangeVisitorImpl<T>(
            expr.column_.field_id, index_func, elem_func);
G
GuoRentong 已提交
723 724 725 726
    }
}
#pragma clang diagnostic pop

727 728 729 730 731 732 733 734 735 736 737 738 739 740 741 742 743 744 745 746 747 748 749 750 751 752 753 754 755 756 757 758 759 760 761 762 763 764 765 766 767 768 769 770 771 772 773 774 775 776 777 778 779 780
template <typename ExprValueType>
auto
ExecExprVisitor::ExecBinaryRangeVisitorDispatcherJson(BinaryRangeExpr& expr_raw)
    -> BitsetType {
    using Index = index::ScalarIndex<milvus::Json>;
    using GetType =
        std::conditional_t<std::is_same_v<ExprValueType, std::string>,
                           std::string_view,
                           ExprValueType>;

    auto& expr = static_cast<BinaryRangeExprImpl<ExprValueType>&>(expr_raw);
    bool lower_inclusive = expr.lower_inclusive_;
    bool upper_inclusive = expr.upper_inclusive_;
    ExprValueType val1 = expr.lower_value_;
    ExprValueType val2 = expr.upper_value_;

    // no json index now
    auto index_func = [=](Index* index) { return TargetBitmapPtr{}; };

    if (lower_inclusive && upper_inclusive) {
        auto elem_func = [&](const milvus::Json& json) {
            auto x = json.template at<GetType>(expr.column_.nested_path);
            auto value = x.value();
            return !x.error() && (val1 <= value && value <= val2);
        };
        return ExecRangeVisitorImpl<milvus::Json>(
            expr.column_.field_id, index_func, elem_func);
    } else if (lower_inclusive && !upper_inclusive) {
        auto elem_func = [&](const milvus::Json& json) {
            auto x = json.template at<GetType>(expr.column_.nested_path);
            auto value = x.value();
            return !x.error() && (val1 <= value && value < val2);
        };
        return ExecRangeVisitorImpl<milvus::Json>(
            expr.column_.field_id, index_func, elem_func);
    } else if (!lower_inclusive && upper_inclusive) {
        auto elem_func = [&](const milvus::Json& json) {
            auto x = json.template at<GetType>(expr.column_.nested_path);
            auto value = x.value();
            return !x.error() && (val1 < value && value <= val2);
        };
        return ExecRangeVisitorImpl<milvus::Json>(
            expr.column_.field_id, index_func, elem_func);
    } else {
        auto elem_func = [&](const milvus::Json& json) {
            auto x = json.template at<GetType>(expr.column_.nested_path);
            auto value = x.value();
            return !x.error() && (val1 < value && value < val2);
        };
        return ExecRangeVisitorImpl<milvus::Json>(
            expr.column_.field_id, index_func, elem_func);
    }
}

N
neza2017 已提交
781
void
782
ExecExprVisitor::visit(UnaryRangeExpr& expr) {
783
    auto& field_meta = segment_.get_schema()[expr.field_id_];
784 785
    AssertInfo(expr.data_type_ == field_meta.get_data_type(),
               "[ExecExprVisitor]DataType of expr isn't field_meta data type");
786
    BitsetType res;
G
GuoRentong 已提交
787
    switch (expr.data_type_) {
N
neza2017 已提交
788
        case DataType::BOOL: {
789
            res = ExecUnaryRangeVisitorDispatcher<bool>(expr);
N
neza2017 已提交
790 791
            break;
        }
G
GuoRentong 已提交
792
        case DataType::INT8: {
793
            res = ExecUnaryRangeVisitorDispatcher<int8_t>(expr);
G
GuoRentong 已提交
794 795 796
            break;
        }
        case DataType::INT16: {
797
            res = ExecUnaryRangeVisitorDispatcher<int16_t>(expr);
G
GuoRentong 已提交
798 799 800
            break;
        }
        case DataType::INT32: {
801
            res = ExecUnaryRangeVisitorDispatcher<int32_t>(expr);
G
GuoRentong 已提交
802 803 804
            break;
        }
        case DataType::INT64: {
805
            res = ExecUnaryRangeVisitorDispatcher<int64_t>(expr);
G
GuoRentong 已提交
806 807 808
            break;
        }
        case DataType::FLOAT: {
809
            res = ExecUnaryRangeVisitorDispatcher<float>(expr);
G
GuoRentong 已提交
810 811 812
            break;
        }
        case DataType::DOUBLE: {
813 814 815
            res = ExecUnaryRangeVisitorDispatcher<double>(expr);
            break;
        }
816
        case DataType::VARCHAR: {
Y
yah01 已提交
817 818 819 820 821
            if (segment_.type() == SegmentType::Growing) {
                res = ExecUnaryRangeVisitorDispatcher<std::string>(expr);
            } else {
                res = ExecUnaryRangeVisitorDispatcher<std::string_view>(expr);
            }
822 823
            break;
        }
824 825 826
        default:
            PanicInfo("unsupported");
    }
Y
yah01 已提交
827 828
    AssertInfo(res.size() == row_count_,
               "[ExecExprVisitor]Size of results not equal row count");
829
    bitset_opt_ = std::move(res);
830 831
}

832 833
void
ExecExprVisitor::visit(BinaryArithOpEvalRangeExpr& expr) {
834 835
    auto& field_meta = segment_.get_schema()[expr.column_.field_id];
    AssertInfo(expr.column_.data_type == field_meta.get_data_type(),
836 837
               "[ExecExprVisitor]DataType of expr isn't field_meta data type");
    BitsetType res;
838
    switch (expr.column_.data_type) {
839 840 841 842 843 844 845 846 847 848 849 850 851 852 853 854 855 856 857 858 859 860 861 862
        case DataType::INT8: {
            res = ExecBinaryArithOpEvalRangeVisitorDispatcher<int8_t>(expr);
            break;
        }
        case DataType::INT16: {
            res = ExecBinaryArithOpEvalRangeVisitorDispatcher<int16_t>(expr);
            break;
        }
        case DataType::INT32: {
            res = ExecBinaryArithOpEvalRangeVisitorDispatcher<int32_t>(expr);
            break;
        }
        case DataType::INT64: {
            res = ExecBinaryArithOpEvalRangeVisitorDispatcher<int64_t>(expr);
            break;
        }
        case DataType::FLOAT: {
            res = ExecBinaryArithOpEvalRangeVisitorDispatcher<float>(expr);
            break;
        }
        case DataType::DOUBLE: {
            res = ExecBinaryArithOpEvalRangeVisitorDispatcher<double>(expr);
            break;
        }
863 864 865 866 867 868 869 870 871 872 873 874 875 876 877 878 879 880 881 882 883 884 885 886
        case DataType::JSON: {
            switch (expr.val_case_) {
                case proto::plan::GenericValue::ValCase::kBoolVal: {
                    res = ExecBinaryArithOpEvalRangeVisitorDispatcherJson<bool>(
                        expr);
                    break;
                }
                case proto::plan::GenericValue::ValCase::kInt64Val: {
                    res = ExecBinaryArithOpEvalRangeVisitorDispatcherJson<
                        int64_t>(expr);
                    break;
                }
                case proto::plan::GenericValue::ValCase::kFloatVal: {
                    res =
                        ExecBinaryArithOpEvalRangeVisitorDispatcherJson<double>(
                            expr);
                    break;
                }
                default: {
                    PanicInfo("unsupported value type {} in expression");
                }
            }
            break;
        }
887 888 889
        default:
            PanicInfo("unsupported");
    }
Y
yah01 已提交
890 891
    AssertInfo(res.size() == row_count_,
               "[ExecExprVisitor]Size of results not equal row count");
892 893 894
    bitset_opt_ = std::move(res);
}

895 896
void
ExecExprVisitor::visit(BinaryRangeExpr& expr) {
897 898
    auto& field_meta = segment_.get_schema()[expr.column_.field_id];
    AssertInfo(expr.column_.data_type == field_meta.get_data_type(),
899
               "[ExecExprVisitor]DataType of expr isn't field_meta data type");
900
    BitsetType res;
901
    switch (expr.column_.data_type) {
902 903 904 905 906 907 908 909 910 911 912 913 914 915 916 917 918 919 920 921 922 923 924 925 926 927
        case DataType::BOOL: {
            res = ExecBinaryRangeVisitorDispatcher<bool>(expr);
            break;
        }
        case DataType::INT8: {
            res = ExecBinaryRangeVisitorDispatcher<int8_t>(expr);
            break;
        }
        case DataType::INT16: {
            res = ExecBinaryRangeVisitorDispatcher<int16_t>(expr);
            break;
        }
        case DataType::INT32: {
            res = ExecBinaryRangeVisitorDispatcher<int32_t>(expr);
            break;
        }
        case DataType::INT64: {
            res = ExecBinaryRangeVisitorDispatcher<int64_t>(expr);
            break;
        }
        case DataType::FLOAT: {
            res = ExecBinaryRangeVisitorDispatcher<float>(expr);
            break;
        }
        case DataType::DOUBLE: {
            res = ExecBinaryRangeVisitorDispatcher<double>(expr);
G
GuoRentong 已提交
928 929
            break;
        }
930
        case DataType::VARCHAR: {
Y
yah01 已提交
931 932 933 934 935
            if (segment_.type() == SegmentType::Growing) {
                res = ExecBinaryRangeVisitorDispatcher<std::string>(expr);
            } else {
                res = ExecBinaryRangeVisitorDispatcher<std::string_view>(expr);
            }
936 937
            break;
        }
938 939 940 941 942 943 944 945 946 947 948 949 950 951 952 953 954 955 956 957 958 959 960 961 962
        case DataType::JSON: {
            switch (expr.val_case_) {
                case proto::plan::GenericValue::ValCase::kBoolVal: {
                    res = ExecBinaryRangeVisitorDispatcherJson<bool>(expr);
                    break;
                }
                case proto::plan::GenericValue::ValCase::kInt64Val: {
                    res = ExecBinaryRangeVisitorDispatcherJson<int64_t>(expr);
                    break;
                }
                case proto::plan::GenericValue::ValCase::kFloatVal: {
                    res = ExecBinaryRangeVisitorDispatcherJson<double>(expr);
                    break;
                }
                case proto::plan::GenericValue::ValCase::kStringVal: {
                    res =
                        ExecBinaryRangeVisitorDispatcherJson<std::string>(expr);
                    break;
                }
                default: {
                    PanicInfo("unsupported value type {} in expression");
                }
            }
            break;
        }
G
GuoRentong 已提交
963 964 965
        default:
            PanicInfo("unsupported");
    }
Y
yah01 已提交
966 967
    AssertInfo(res.size() == row_count_,
               "[ExecExprVisitor]Size of results not equal row count");
968
    bitset_opt_ = std::move(res);
N
neza2017 已提交
969 970
}

971 972 973 974 975 976 977 978 979 980 981 982 983 984 985 986
template <typename Op>
struct relational {
    template <typename T, typename U>
    bool
    operator()(T const& a, U const& b) const {
        return Op{}(a, b);
    }
    template <typename... T>
    bool
    operator()(T const&...) const {
        PanicInfo("incompatible operands");
    }
};

template <typename Op>
auto
Y
yah01 已提交
987 988 989 990 991 992 993 994 995 996
ExecExprVisitor::ExecCompareExprDispatcher(CompareExpr& expr, Op op)
    -> BitsetType {
    using number = boost::variant<bool,
                                  int8_t,
                                  int16_t,
                                  int32_t,
                                  int64_t,
                                  float,
                                  double,
                                  std::string>;
997 998
    auto size_per_chunk = segment_.size_per_chunk();
    auto num_chunk = upper_div(row_count_, size_per_chunk);
999
    std::deque<BitsetType> bitsets;
1000 1001 1002 1003

    // check for sealed segment, load either raw field data or index
    auto left_indexing_barrier = segment_.num_chunk_index(expr.left_field_id_);
    auto left_data_barrier = segment_.num_chunk_data(expr.left_field_id_);
1004 1005 1006
    AssertInfo(std::max(left_data_barrier, left_indexing_barrier) == num_chunk,
               "max(left_data_barrier, left_indexing_barrier) not equal to "
               "num_chunk");
1007

Y
yah01 已提交
1008 1009
    auto right_indexing_barrier =
        segment_.num_chunk_index(expr.right_field_id_);
1010
    auto right_data_barrier = segment_.num_chunk_data(expr.right_field_id_);
Y
yah01 已提交
1011 1012 1013 1014
    AssertInfo(
        std::max(right_data_barrier, right_indexing_barrier) == num_chunk,
        "max(right_data_barrier, right_indexing_barrier) not equal to "
        "num_chunk");
1015

1016
    for (int64_t chunk_id = 0; chunk_id < num_chunk; ++chunk_id) {
Y
yah01 已提交
1017 1018 1019 1020 1021 1022
        auto size = chunk_id == num_chunk - 1
                        ? row_count_ - chunk_id * size_per_chunk
                        : size_per_chunk;
        auto getChunkData =
            [&, chunk_id](DataType type, FieldId field_id, int64_t data_barrier)
            -> std::function<const number(int)> {
1023 1024
            switch (type) {
                case DataType::BOOL: {
1025
                    if (chunk_id < data_barrier) {
Y
yah01 已提交
1026 1027 1028 1029 1030 1031
                        auto chunk_data =
                            segment_.chunk_data<bool>(field_id, chunk_id)
                                .data();
                        return [chunk_data](int i) -> const number {
                            return chunk_data[i];
                        };
1032 1033
                    } else {
                        // for case, sealed segment has loaded index for scalar field instead of raw data
Y
yah01 已提交
1034 1035 1036 1037 1038
                        auto& indexing = segment_.chunk_scalar_index<bool>(
                            field_id, chunk_id);
                        return [&indexing](int i) -> const number {
                            return indexing.Reverse_Lookup(i);
                        };
1039
                    }
1040 1041
                }
                case DataType::INT8: {
1042
                    if (chunk_id < data_barrier) {
Y
yah01 已提交
1043 1044 1045 1046 1047 1048
                        auto chunk_data =
                            segment_.chunk_data<int8_t>(field_id, chunk_id)
                                .data();
                        return [chunk_data](int i) -> const number {
                            return chunk_data[i];
                        };
1049 1050
                    } else {
                        // for case, sealed segment has loaded index for scalar field instead of raw data
Y
yah01 已提交
1051 1052 1053 1054 1055
                        auto& indexing = segment_.chunk_scalar_index<int8_t>(
                            field_id, chunk_id);
                        return [&indexing](int i) -> const number {
                            return indexing.Reverse_Lookup(i);
                        };
1056
                    }
1057 1058
                }
                case DataType::INT16: {
1059
                    if (chunk_id < data_barrier) {
Y
yah01 已提交
1060 1061 1062 1063 1064 1065
                        auto chunk_data =
                            segment_.chunk_data<int16_t>(field_id, chunk_id)
                                .data();
                        return [chunk_data](int i) -> const number {
                            return chunk_data[i];
                        };
1066 1067
                    } else {
                        // for case, sealed segment has loaded index for scalar field instead of raw data
Y
yah01 已提交
1068 1069 1070 1071 1072
                        auto& indexing = segment_.chunk_scalar_index<int16_t>(
                            field_id, chunk_id);
                        return [&indexing](int i) -> const number {
                            return indexing.Reverse_Lookup(i);
                        };
1073
                    }
1074 1075
                }
                case DataType::INT32: {
1076
                    if (chunk_id < data_barrier) {
Y
yah01 已提交
1077 1078 1079 1080 1081 1082
                        auto chunk_data =
                            segment_.chunk_data<int32_t>(field_id, chunk_id)
                                .data();
                        return [chunk_data](int i) -> const number {
                            return chunk_data[i];
                        };
1083 1084
                    } else {
                        // for case, sealed segment has loaded index for scalar field instead of raw data
Y
yah01 已提交
1085 1086 1087 1088 1089
                        auto& indexing = segment_.chunk_scalar_index<int32_t>(
                            field_id, chunk_id);
                        return [&indexing](int i) -> const number {
                            return indexing.Reverse_Lookup(i);
                        };
1090
                    }
1091 1092
                }
                case DataType::INT64: {
1093
                    if (chunk_id < data_barrier) {
Y
yah01 已提交
1094 1095 1096 1097 1098 1099
                        auto chunk_data =
                            segment_.chunk_data<int64_t>(field_id, chunk_id)
                                .data();
                        return [chunk_data](int i) -> const number {
                            return chunk_data[i];
                        };
1100 1101
                    } else {
                        // for case, sealed segment has loaded index for scalar field instead of raw data
Y
yah01 已提交
1102 1103 1104 1105 1106
                        auto& indexing = segment_.chunk_scalar_index<int64_t>(
                            field_id, chunk_id);
                        return [&indexing](int i) -> const number {
                            return indexing.Reverse_Lookup(i);
                        };
1107
                    }
1108 1109
                }
                case DataType::FLOAT: {
1110
                    if (chunk_id < data_barrier) {
Y
yah01 已提交
1111 1112 1113 1114 1115 1116
                        auto chunk_data =
                            segment_.chunk_data<float>(field_id, chunk_id)
                                .data();
                        return [chunk_data](int i) -> const number {
                            return chunk_data[i];
                        };
1117 1118
                    } else {
                        // for case, sealed segment has loaded index for scalar field instead of raw data
Y
yah01 已提交
1119 1120 1121 1122 1123
                        auto& indexing = segment_.chunk_scalar_index<float>(
                            field_id, chunk_id);
                        return [&indexing](int i) -> const number {
                            return indexing.Reverse_Lookup(i);
                        };
1124
                    }
1125 1126
                }
                case DataType::DOUBLE: {
1127
                    if (chunk_id < data_barrier) {
Y
yah01 已提交
1128 1129 1130 1131 1132 1133
                        auto chunk_data =
                            segment_.chunk_data<double>(field_id, chunk_id)
                                .data();
                        return [chunk_data](int i) -> const number {
                            return chunk_data[i];
                        };
1134 1135
                    } else {
                        // for case, sealed segment has loaded index for scalar field instead of raw data
Y
yah01 已提交
1136 1137 1138 1139 1140
                        auto& indexing = segment_.chunk_scalar_index<double>(
                            field_id, chunk_id);
                        return [&indexing](int i) -> const number {
                            return indexing.Reverse_Lookup(i);
                        };
1141
                    }
1142 1143
                }
                case DataType::VARCHAR: {
1144
                    if (chunk_id < data_barrier) {
Y
yah01 已提交
1145
                        if (segment_.type() == SegmentType::Growing) {
Y
yah01 已提交
1146 1147 1148 1149 1150 1151 1152
                            auto chunk_data =
                                segment_
                                    .chunk_data<std::string>(field_id, chunk_id)
                                    .data();
                            return [chunk_data](int i) -> const number {
                                return chunk_data[i];
                            };
Y
yah01 已提交
1153
                        } else {
Y
yah01 已提交
1154 1155 1156 1157 1158 1159 1160
                            auto chunk_data = segment_
                                                  .chunk_data<std::string_view>(
                                                      field_id, chunk_id)
                                                  .data();
                            return [chunk_data](int i) -> const number {
                                return std::string(chunk_data[i]);
                            };
Y
yah01 已提交
1161
                        }
1162 1163
                    } else {
                        // for case, sealed segment has loaded index for scalar field instead of raw data
Y
yah01 已提交
1164 1165 1166 1167 1168 1169
                        auto& indexing =
                            segment_.chunk_scalar_index<std::string>(field_id,
                                                                     chunk_id);
                        return [&indexing](int i) -> const number {
                            return indexing.Reverse_Lookup(i);
                        };
1170
                    }
1171 1172 1173 1174 1175
                }
                default:
                    PanicInfo("unsupported datatype");
            }
        };
Y
yah01 已提交
1176 1177 1178 1179
        auto left = getChunkData(
            expr.left_data_type_, expr.left_field_id_, left_data_barrier);
        auto right = getChunkData(
            expr.right_data_type_, expr.right_field_id_, right_data_barrier);
1180

1181
        BitsetType bitset(size);
1182
        for (int i = 0; i < size; ++i) {
Y
yah01 已提交
1183 1184
            bool is_in = boost::apply_visitor(
                Relational<decltype(op)>{}, left(i), right(i));
1185 1186 1187 1188
            bitset[i] = is_in;
        }
        bitsets.emplace_back(std::move(bitset));
    }
1189
    auto final_result = Assemble(bitsets);
Y
yah01 已提交
1190 1191
    AssertInfo(final_result.size() == row_count_,
               "[ExecExprVisitor]Size of results not equal row count");
1192
    return final_result;
1193 1194 1195 1196 1197
}

void
ExecExprVisitor::visit(CompareExpr& expr) {
    auto& schema = segment_.get_schema();
1198 1199
    auto& left_field_meta = schema[expr.left_field_id_];
    auto& right_field_meta = schema[expr.right_field_id_];
1200 1201 1202 1203 1204 1205
    AssertInfo(expr.left_data_type_ == left_field_meta.get_data_type(),
               "[ExecExprVisitor]Left data type not equal to left "
               "field meta type");
    AssertInfo(expr.right_data_type_ == right_field_meta.get_data_type(),
               "[ExecExprVisitor]right data type not equal to right field "
               "meta type");
1206

1207
    BitsetType res;
1208
    switch (expr.op_type_) {
1209
        case OpType::Equal: {
1210
            res = ExecCompareExprDispatcher(expr, std::equal_to<>{});
1211 1212 1213
            break;
        }
        case OpType::NotEqual: {
1214
            res = ExecCompareExprDispatcher(expr, std::not_equal_to<>{});
1215 1216 1217
            break;
        }
        case OpType::GreaterEqual: {
1218
            res = ExecCompareExprDispatcher(expr, std::greater_equal<>{});
1219 1220 1221
            break;
        }
        case OpType::GreaterThan: {
1222
            res = ExecCompareExprDispatcher(expr, std::greater<>{});
1223 1224 1225
            break;
        }
        case OpType::LessEqual: {
1226
            res = ExecCompareExprDispatcher(expr, std::less_equal<>{});
1227 1228 1229
            break;
        }
        case OpType::LessThan: {
1230
            res = ExecCompareExprDispatcher(expr, std::less<>{});
1231 1232
            break;
        }
1233
        case OpType::PrefixMatch: {
Y
yah01 已提交
1234 1235
            res =
                ExecCompareExprDispatcher(expr, MatchOp<OpType::PrefixMatch>{});
1236 1237 1238 1239
            break;
        }
            // case OpType::PostfixMatch: {
            // }
1240 1241 1242 1243
        default: {
            PanicInfo("unsupported optype");
        }
    }
Y
yah01 已提交
1244 1245
    AssertInfo(res.size() == row_count_,
               "[ExecExprVisitor]Size of results not equal row count");
1246
    bitset_opt_ = std::move(res);
1247 1248
}

S
sunby 已提交
1249 1250
template <typename T>
auto
1251
ExecExprVisitor::ExecTermVisitorImpl(TermExpr& expr_raw) -> BitsetType {
S
sunby 已提交
1252 1253
    auto& expr = static_cast<TermExprImpl<T>&>(expr_raw);
    auto& schema = segment_.get_schema();
1254 1255 1256
    auto primary_filed_id = schema.get_primary_field_id();
    auto field_id = expr_raw.field_id_;
    auto& field_meta = schema[field_id];
1257 1258

    bool use_pk_index = false;
1259
    if (primary_filed_id.has_value()) {
Y
yah01 已提交
1260 1261
        use_pk_index = primary_filed_id.value() == field_id &&
                       IsPrimaryKeyDataType(field_meta.get_data_type());
1262 1263 1264 1265
    }

    if (use_pk_index) {
        auto id_array = std::make_unique<IdArray>();
1266 1267 1268 1269 1270 1271 1272 1273 1274 1275 1276 1277 1278 1279 1280 1281 1282 1283
        switch (field_meta.get_data_type()) {
            case DataType::INT64: {
                auto dst_ids = id_array->mutable_int_id();
                for (const auto& id : expr.terms_) {
                    dst_ids->add_data((int64_t&)id);
                }
                break;
            }
            case DataType::VARCHAR: {
                auto dst_ids = id_array->mutable_str_id();
                for (const auto& id : expr.terms_) {
                    dst_ids->add_data((std::string&)id);
                }
                break;
            }
            default: {
                PanicInfo("unsupported type");
            }
1284
        }
1285

1286 1287 1288 1289 1290 1291
        auto [uids, seg_offsets] = segment_.search_ids(*id_array, timestamp_);
        BitsetType bitset(row_count_);
        for (const auto& offset : seg_offsets) {
            auto _offset = (int64_t)offset.get();
            bitset[_offset] = true;
        }
Y
yah01 已提交
1292 1293
        AssertInfo(bitset.size() == row_count_,
                   "[ExecExprVisitor]Size of results not equal row count");
1294 1295 1296
        return bitset;
    }

1297
    return ExecTermVisitorImplTemplate<T>(expr_raw);
S
sunby 已提交
1298 1299
}

1300 1301
template <>
auto
Y
yah01 已提交
1302 1303
ExecExprVisitor::ExecTermVisitorImpl<std::string>(TermExpr& expr_raw)
    -> BitsetType {
1304 1305 1306
    return ExecTermVisitorImplTemplate<std::string>(expr_raw);
}

Y
yah01 已提交
1307 1308
template <>
auto
Y
yah01 已提交
1309 1310
ExecExprVisitor::ExecTermVisitorImpl<std::string_view>(TermExpr& expr_raw)
    -> BitsetType {
Y
yah01 已提交
1311 1312 1313
    return ExecTermVisitorImplTemplate<std::string_view>(expr_raw);
}

1314 1315 1316
template <typename T>
auto
ExecExprVisitor::ExecTermVisitorImplTemplate(TermExpr& expr_raw) -> BitsetType {
Y
yah01 已提交
1317 1318 1319
    typedef std::
        conditional_t<std::is_same_v<T, std::string_view>, std::string, T>
            IndexInnerType;
Y
yah01 已提交
1320 1321
    using Index = index::ScalarIndex<IndexInnerType>;
    auto& expr = static_cast<TermExprImpl<IndexInnerType>&>(expr_raw);
Y
yah01 已提交
1322 1323
    const std::vector<IndexInnerType> terms(expr.terms_.begin(),
                                            expr.terms_.end());
1324 1325 1326
    auto n = terms.size();
    std::unordered_set<T> term_set(expr.terms_.begin(), expr.terms_.end());

Y
yah01 已提交
1327 1328 1329
    auto index_func = [&terms, n](Index* index) {
        return index->In(n, terms.data());
    };
1330 1331 1332 1333 1334 1335 1336 1337 1338
    auto elem_func = [&terms, &term_set](T x) {
        //// terms has already been sorted.
        // return std::binary_search(terms.begin(), terms.end(), x);
        return term_set.find(x) != term_set.end();
    };

    return ExecRangeVisitorImpl<T>(expr.field_id_, index_func, elem_func);
}

1339 1340 1341
// TODO: bool is so ugly here.
template <>
auto
Y
yah01 已提交
1342 1343
ExecExprVisitor::ExecTermVisitorImplTemplate<bool>(TermExpr& expr_raw)
    -> BitsetType {
1344 1345 1346 1347 1348 1349 1350 1351 1352 1353 1354 1355 1356 1357 1358 1359 1360 1361 1362 1363 1364 1365 1366 1367 1368 1369 1370
    using T = bool;
    auto& expr = static_cast<TermExprImpl<T>&>(expr_raw);
    using Index = index::ScalarIndex<T>;
    const auto& terms = expr.terms_;
    auto n = terms.size();
    std::unordered_set<T> term_set(expr.terms_.begin(), expr.terms_.end());

    auto index_func = [&terms, n](Index* index) {
        auto bool_arr_copy = new bool[terms.size()];
        int it = 0;
        for (auto elem : terms) {
            bool_arr_copy[it++] = elem;
        }
        auto bitset = index->In(n, bool_arr_copy);
        delete[] bool_arr_copy;
        return bitset;
    };

    auto elem_func = [&terms, &term_set](T x) {
        //// terms has already been sorted.
        // return std::binary_search(terms.begin(), terms.end(), x);
        return term_set.find(x) != term_set.end();
    };

    return ExecRangeVisitorImpl<T>(expr.field_id_, index_func, elem_func);
}

S
sunby 已提交
1371 1372
void
ExecExprVisitor::visit(TermExpr& expr) {
1373
    auto& field_meta = segment_.get_schema()[expr.field_id_];
1374
    AssertInfo(expr.data_type_ == field_meta.get_data_type(),
1375 1376
               "[ExecExprVisitor]DataType of expr isn't field_meta "
               "data type ");
1377
    BitsetType res;
S
sunby 已提交
1378 1379
    switch (expr.data_type_) {
        case DataType::BOOL: {
1380
            res = ExecTermVisitorImpl<bool>(expr);
S
sunby 已提交
1381 1382 1383
            break;
        }
        case DataType::INT8: {
1384
            res = ExecTermVisitorImpl<int8_t>(expr);
S
sunby 已提交
1385 1386 1387
            break;
        }
        case DataType::INT16: {
1388
            res = ExecTermVisitorImpl<int16_t>(expr);
S
sunby 已提交
1389 1390 1391
            break;
        }
        case DataType::INT32: {
1392
            res = ExecTermVisitorImpl<int32_t>(expr);
S
sunby 已提交
1393 1394 1395
            break;
        }
        case DataType::INT64: {
1396
            res = ExecTermVisitorImpl<int64_t>(expr);
S
sunby 已提交
1397 1398 1399
            break;
        }
        case DataType::FLOAT: {
1400
            res = ExecTermVisitorImpl<float>(expr);
S
sunby 已提交
1401 1402 1403
            break;
        }
        case DataType::DOUBLE: {
1404
            res = ExecTermVisitorImpl<double>(expr);
S
sunby 已提交
1405 1406
            break;
        }
1407
        case DataType::VARCHAR: {
Y
yah01 已提交
1408 1409 1410 1411 1412
            if (segment_.type() == SegmentType::Growing) {
                res = ExecTermVisitorImpl<std::string>(expr);
            } else {
                res = ExecTermVisitorImpl<std::string_view>(expr);
            }
1413 1414
            break;
        }
S
sunby 已提交
1415 1416 1417
        default:
            PanicInfo("unsupported");
    }
Y
yah01 已提交
1418 1419
    AssertInfo(res.size() == row_count_,
               "[ExecExprVisitor]Size of results not equal row count");
1420
    bitset_opt_ = std::move(res);
S
sunby 已提交
1421
}
N
neza2017 已提交
1422
}  // namespace milvus::query