未验证 提交 7721cc73 编写于 作者: Y yah01 提交者: GitHub

Perf: Avoid copying string/json when filter on them (#24381)

Signed-off-by: Nyah01 <yang.cen@zilliz.com>
上级 59b0a61c
...@@ -28,6 +28,7 @@ ...@@ -28,6 +28,7 @@
#include <memory> #include <memory>
#include <string> #include <string>
#include <string_view> #include <string_view>
#include <type_traits>
#include <unordered_map> #include <unordered_map>
#include <utility> #include <utility>
#include <variant> #include <variant>
...@@ -143,21 +144,11 @@ using BitSetBlockType = BitsetType::block_type; ...@@ -143,21 +144,11 @@ using BitSetBlockType = BitsetType::block_type;
constexpr size_t BITSET_BLOCK_SIZE = sizeof(BitsetType::block_type); constexpr size_t BITSET_BLOCK_SIZE = sizeof(BitsetType::block_type);
constexpr size_t BITSET_BLOCK_BIT_SIZE = sizeof(BitsetType::block_type) * 8; constexpr size_t BITSET_BLOCK_BIT_SIZE = sizeof(BitsetType::block_type) * 8;
template <typename T> template <typename T>
using MayRef = std::conditional_t<!std::is_trivially_copyable_v<T> || using MayConstRef = std::conditional_t<std::is_same_v<T, std::string> ||
sizeof(T) >= REF_SIZE_THRESHOLD, std::is_same_v<T, milvus::Json>,
T&, const T&,
T>; T>;
template <typename T> static_assert(std::is_same_v<const std::string&, MayConstRef<std::string>>);
using Parameter = std::
conditional_t<std::is_same_v<T, std::string>, std::string_view, MayRef<T>>;
static_assert(std::is_same_v<int64_t, Parameter<int64_t>>);
static_assert(std::is_same_v<std::string_view, Parameter<std::string>>);
struct LargeType {
int64_t x, y, z;
};
static_assert(std::is_same_v<LargeType&, Parameter<LargeType>>);
} // namespace milvus } // namespace milvus
// //
template <> template <>
......
...@@ -249,6 +249,8 @@ ExecExprVisitor::ExecRangeVisitorImpl(FieldId field_id, ...@@ -249,6 +249,8 @@ ExecExprVisitor::ExecRangeVisitorImpl(FieldId field_id,
auto size_per_chunk = segment_.size_per_chunk(); auto size_per_chunk = segment_.size_per_chunk();
auto num_chunk = upper_div(row_count_, size_per_chunk); auto num_chunk = upper_div(row_count_, size_per_chunk);
std::vector<FixedVector<bool>> results; std::vector<FixedVector<bool>> results;
results.reserve(num_chunk);
typedef std:: typedef std::
conditional_t<std::is_same_v<T, std::string_view>, std::string, T> conditional_t<std::is_same_v<T, std::string_view>, std::string, T>
IndexInnerType; IndexInnerType;
...@@ -297,6 +299,7 @@ ExecExprVisitor::ExecDataRangeVisitorImpl(FieldId field_id, ...@@ -297,6 +299,7 @@ ExecExprVisitor::ExecDataRangeVisitorImpl(FieldId field_id,
AssertInfo(std::max(data_barrier, indexing_barrier) == num_chunk, AssertInfo(std::max(data_barrier, indexing_barrier) == num_chunk,
"max(data_barrier, index_barrier) not equal to num_chunk"); "max(data_barrier, index_barrier) not equal to num_chunk");
std::vector<FixedVector<bool>> results; std::vector<FixedVector<bool>> results;
results.reserve(num_chunk);
// for growing segment, indexing_barrier will always less than data_barrier // for growing segment, indexing_barrier will always less than data_barrier
// so growing segment will always execute expr plan using raw data // so growing segment will always execute expr plan using raw data
...@@ -360,42 +363,42 @@ ExecExprVisitor::ExecUnaryRangeVisitorDispatcher(UnaryRangeExpr& expr_raw) ...@@ -360,42 +363,42 @@ ExecExprVisitor::ExecUnaryRangeVisitorDispatcher(UnaryRangeExpr& expr_raw)
switch (op) { switch (op) {
case OpType::Equal: { case OpType::Equal: {
auto index_func = [&](Index* index) { return index->In(1, &val); }; auto index_func = [&](Index* index) { return index->In(1, &val); };
auto elem_func = [&](T x) { return (x == val); }; auto elem_func = [&](MayConstRef<T> x) { return (x == val); };
return ExecRangeVisitorImpl<T>(field_id, index_func, elem_func); return ExecRangeVisitorImpl<T>(field_id, index_func, elem_func);
} }
case OpType::NotEqual: { case OpType::NotEqual: {
auto index_func = [&](Index* index) { auto index_func = [&](Index* index) {
return index->NotIn(1, &val); return index->NotIn(1, &val);
}; };
auto elem_func = [&](T x) { return (x != val); }; auto elem_func = [&](MayConstRef<T> x) { return (x != val); };
return ExecRangeVisitorImpl<T>(field_id, index_func, elem_func); return ExecRangeVisitorImpl<T>(field_id, index_func, elem_func);
} }
case OpType::GreaterEqual: { case OpType::GreaterEqual: {
auto index_func = [&](Index* index) { auto index_func = [&](Index* index) {
return index->Range(val, OpType::GreaterEqual); return index->Range(val, OpType::GreaterEqual);
}; };
auto elem_func = [&](T x) { return (x >= val); }; auto elem_func = [&](MayConstRef<T> x) { return (x >= val); };
return ExecRangeVisitorImpl<T>(field_id, index_func, elem_func); return ExecRangeVisitorImpl<T>(field_id, index_func, elem_func);
} }
case OpType::GreaterThan: { case OpType::GreaterThan: {
auto index_func = [&](Index* index) { auto index_func = [&](Index* index) {
return index->Range(val, OpType::GreaterThan); return index->Range(val, OpType::GreaterThan);
}; };
auto elem_func = [&](T x) { return (x > val); }; auto elem_func = [&](MayConstRef<T> x) { return (x > val); };
return ExecRangeVisitorImpl<T>(field_id, index_func, elem_func); return ExecRangeVisitorImpl<T>(field_id, index_func, elem_func);
} }
case OpType::LessEqual: { case OpType::LessEqual: {
auto index_func = [&](Index* index) { auto index_func = [&](Index* index) {
return index->Range(val, OpType::LessEqual); return index->Range(val, OpType::LessEqual);
}; };
auto elem_func = [&](T x) { return (x <= val); }; auto elem_func = [&](MayConstRef<T> x) { return (x <= val); };
return ExecRangeVisitorImpl<T>(field_id, index_func, elem_func); return ExecRangeVisitorImpl<T>(field_id, index_func, elem_func);
} }
case OpType::LessThan: { case OpType::LessThan: {
auto index_func = [&](Index* index) { auto index_func = [&](Index* index) {
return index->Range(val, OpType::LessThan); return index->Range(val, OpType::LessThan);
}; };
auto elem_func = [&](T x) { return (x < val); }; auto elem_func = [&](MayConstRef<T> x) { return (x < val); };
return ExecRangeVisitorImpl<T>(field_id, index_func, elem_func); return ExecRangeVisitorImpl<T>(field_id, index_func, elem_func);
} }
case OpType::PrefixMatch: { case OpType::PrefixMatch: {
...@@ -405,7 +408,9 @@ ExecExprVisitor::ExecUnaryRangeVisitorDispatcher(UnaryRangeExpr& expr_raw) ...@@ -405,7 +408,9 @@ ExecExprVisitor::ExecUnaryRangeVisitorDispatcher(UnaryRangeExpr& expr_raw)
dataset->Set(milvus::index::PREFIX_VALUE, val); dataset->Set(milvus::index::PREFIX_VALUE, val);
return index->Query(std::move(dataset)); return index->Query(std::move(dataset));
}; };
auto elem_func = [&](T x) { return Match(x, val, op); }; auto elem_func = [&](MayConstRef<T> x) {
return Match(x, val, op);
};
return ExecRangeVisitorImpl<T>(field_id, index_func, elem_func); return ExecRangeVisitorImpl<T>(field_id, index_func, elem_func);
} }
// TODO: PostfixMatch // TODO: PostfixMatch
...@@ -539,11 +544,12 @@ ExecExprVisitor::ExecBinaryArithOpEvalRangeVisitorDispatcher( ...@@ -539,11 +544,12 @@ ExecExprVisitor::ExecBinaryArithOpEvalRangeVisitorDispatcher(
auto x = index->Reverse_Lookup(offset); auto x = index->Reverse_Lookup(offset);
return (x + right_operand) == val; return (x + right_operand) == val;
}; };
auto elem_func = [val, right_operand, &nested_path](T x) { auto elem_func =
// visit the nested field [val, right_operand, &nested_path](MayConstRef<T> x) {
// now it must be Json // visit the nested field
return ((x + right_operand) == val); // now it must be Json
}; return ((x + right_operand) == val);
};
return ExecDataRangeVisitorImpl<T>( return ExecDataRangeVisitorImpl<T>(
expr.column_.field_id, index_func, elem_func); expr.column_.field_id, index_func, elem_func);
} }
...@@ -553,7 +559,7 @@ ExecExprVisitor::ExecBinaryArithOpEvalRangeVisitorDispatcher( ...@@ -553,7 +559,7 @@ ExecExprVisitor::ExecBinaryArithOpEvalRangeVisitorDispatcher(
auto x = index->Reverse_Lookup(offset); auto x = index->Reverse_Lookup(offset);
return (x - right_operand) == val; return (x - right_operand) == val;
}; };
auto elem_func = [val, right_operand](T x) { auto elem_func = [val, right_operand](MayConstRef<T> x) {
return ((x - right_operand) == val); return ((x - right_operand) == val);
}; };
return ExecDataRangeVisitorImpl<T>( return ExecDataRangeVisitorImpl<T>(
...@@ -565,7 +571,7 @@ ExecExprVisitor::ExecBinaryArithOpEvalRangeVisitorDispatcher( ...@@ -565,7 +571,7 @@ ExecExprVisitor::ExecBinaryArithOpEvalRangeVisitorDispatcher(
auto x = index->Reverse_Lookup(offset); auto x = index->Reverse_Lookup(offset);
return (x * right_operand) == val; return (x * right_operand) == val;
}; };
auto elem_func = [val, right_operand](T x) { auto elem_func = [val, right_operand](MayConstRef<T> x) {
return ((x * right_operand) == val); return ((x * right_operand) == val);
}; };
return ExecDataRangeVisitorImpl<T>( return ExecDataRangeVisitorImpl<T>(
...@@ -577,7 +583,7 @@ ExecExprVisitor::ExecBinaryArithOpEvalRangeVisitorDispatcher( ...@@ -577,7 +583,7 @@ ExecExprVisitor::ExecBinaryArithOpEvalRangeVisitorDispatcher(
auto x = index->Reverse_Lookup(offset); auto x = index->Reverse_Lookup(offset);
return (x / right_operand) == val; return (x / right_operand) == val;
}; };
auto elem_func = [val, right_operand](T x) { auto elem_func = [val, right_operand](MayConstRef<T> x) {
return ((x / right_operand) == val); return ((x / right_operand) == val);
}; };
return ExecDataRangeVisitorImpl<T>( return ExecDataRangeVisitorImpl<T>(
...@@ -589,7 +595,7 @@ ExecExprVisitor::ExecBinaryArithOpEvalRangeVisitorDispatcher( ...@@ -589,7 +595,7 @@ ExecExprVisitor::ExecBinaryArithOpEvalRangeVisitorDispatcher(
auto x = index->Reverse_Lookup(offset); auto x = index->Reverse_Lookup(offset);
return static_cast<T>(fmod(x, right_operand)) == val; return static_cast<T>(fmod(x, right_operand)) == val;
}; };
auto elem_func = [val, right_operand](T x) { auto elem_func = [val, right_operand](MayConstRef<T> x) {
return (static_cast<T>(fmod(x, right_operand)) == val); return (static_cast<T>(fmod(x, right_operand)) == val);
}; };
return ExecDataRangeVisitorImpl<T>( return ExecDataRangeVisitorImpl<T>(
...@@ -608,7 +614,7 @@ ExecExprVisitor::ExecBinaryArithOpEvalRangeVisitorDispatcher( ...@@ -608,7 +614,7 @@ ExecExprVisitor::ExecBinaryArithOpEvalRangeVisitorDispatcher(
auto x = index->Reverse_Lookup(offset); auto x = index->Reverse_Lookup(offset);
return (x + right_operand) != val; return (x + right_operand) != val;
}; };
auto elem_func = [val, right_operand](T x) { auto elem_func = [val, right_operand](MayConstRef<T> x) {
return ((x + right_operand) != val); return ((x + right_operand) != val);
}; };
return ExecDataRangeVisitorImpl<T>( return ExecDataRangeVisitorImpl<T>(
...@@ -620,7 +626,7 @@ ExecExprVisitor::ExecBinaryArithOpEvalRangeVisitorDispatcher( ...@@ -620,7 +626,7 @@ ExecExprVisitor::ExecBinaryArithOpEvalRangeVisitorDispatcher(
auto x = index->Reverse_Lookup(offset); auto x = index->Reverse_Lookup(offset);
return (x - right_operand) != val; return (x - right_operand) != val;
}; };
auto elem_func = [val, right_operand](T x) { auto elem_func = [val, right_operand](MayConstRef<T> x) {
return ((x - right_operand) != val); return ((x - right_operand) != val);
}; };
return ExecDataRangeVisitorImpl<T>( return ExecDataRangeVisitorImpl<T>(
...@@ -632,7 +638,7 @@ ExecExprVisitor::ExecBinaryArithOpEvalRangeVisitorDispatcher( ...@@ -632,7 +638,7 @@ ExecExprVisitor::ExecBinaryArithOpEvalRangeVisitorDispatcher(
auto x = index->Reverse_Lookup(offset); auto x = index->Reverse_Lookup(offset);
return (x * right_operand) != val; return (x * right_operand) != val;
}; };
auto elem_func = [val, right_operand](T x) { auto elem_func = [val, right_operand](MayConstRef<T> x) {
return ((x * right_operand) != val); return ((x * right_operand) != val);
}; };
return ExecDataRangeVisitorImpl<T>( return ExecDataRangeVisitorImpl<T>(
...@@ -644,7 +650,7 @@ ExecExprVisitor::ExecBinaryArithOpEvalRangeVisitorDispatcher( ...@@ -644,7 +650,7 @@ ExecExprVisitor::ExecBinaryArithOpEvalRangeVisitorDispatcher(
auto x = index->Reverse_Lookup(offset); auto x = index->Reverse_Lookup(offset);
return (x / right_operand) != val; return (x / right_operand) != val;
}; };
auto elem_func = [val, right_operand](T x) { auto elem_func = [val, right_operand](MayConstRef<T> x) {
return ((x / right_operand) != val); return ((x / right_operand) != val);
}; };
return ExecDataRangeVisitorImpl<T>( return ExecDataRangeVisitorImpl<T>(
...@@ -656,7 +662,7 @@ ExecExprVisitor::ExecBinaryArithOpEvalRangeVisitorDispatcher( ...@@ -656,7 +662,7 @@ ExecExprVisitor::ExecBinaryArithOpEvalRangeVisitorDispatcher(
auto x = index->Reverse_Lookup(offset); auto x = index->Reverse_Lookup(offset);
return static_cast<T>(fmod(x, right_operand)) != val; return static_cast<T>(fmod(x, right_operand)) != val;
}; };
auto elem_func = [val, right_operand](T x) { auto elem_func = [val, right_operand](MayConstRef<T> x) {
return (static_cast<T>(fmod(x, right_operand)) != val); return (static_cast<T>(fmod(x, right_operand)) != val);
}; };
return ExecDataRangeVisitorImpl<T>( return ExecDataRangeVisitorImpl<T>(
...@@ -882,19 +888,27 @@ ExecExprVisitor::ExecBinaryRangeVisitorDispatcher(BinaryRangeExpr& expr_raw) ...@@ -882,19 +888,27 @@ ExecExprVisitor::ExecBinaryRangeVisitorDispatcher(BinaryRangeExpr& expr_raw)
return index->Range(val1, lower_inclusive, val2, upper_inclusive); return index->Range(val1, lower_inclusive, val2, upper_inclusive);
}; };
if (lower_inclusive && upper_inclusive) { if (lower_inclusive && upper_inclusive) {
auto elem_func = [val1, val2](T x) { return (val1 <= x && x <= val2); }; auto elem_func = [val1, val2](MayConstRef<T> x) {
return (val1 <= x && x <= val2);
};
return ExecRangeVisitorImpl<T>( return ExecRangeVisitorImpl<T>(
expr.column_.field_id, index_func, elem_func); expr.column_.field_id, index_func, elem_func);
} else if (lower_inclusive && !upper_inclusive) { } else if (lower_inclusive && !upper_inclusive) {
auto elem_func = [val1, val2](T x) { return (val1 <= x && x < val2); }; auto elem_func = [val1, val2](MayConstRef<T> x) {
return (val1 <= x && x < val2);
};
return ExecRangeVisitorImpl<T>( return ExecRangeVisitorImpl<T>(
expr.column_.field_id, index_func, elem_func); expr.column_.field_id, index_func, elem_func);
} else if (!lower_inclusive && upper_inclusive) { } else if (!lower_inclusive && upper_inclusive) {
auto elem_func = [val1, val2](T x) { return (val1 < x && x <= val2); }; auto elem_func = [val1, val2](MayConstRef<T> x) {
return (val1 < x && x <= val2);
};
return ExecRangeVisitorImpl<T>( return ExecRangeVisitorImpl<T>(
expr.column_.field_id, index_func, elem_func); expr.column_.field_id, index_func, elem_func);
} else { } else {
auto elem_func = [val1, val2](T x) { return (val1 < x && x < val2); }; auto elem_func = [val1, val2](MayConstRef<T> x) {
return (val1 < x && x < val2);
};
return ExecRangeVisitorImpl<T>( return ExecRangeVisitorImpl<T>(
expr.column_.field_id, index_func, elem_func); expr.column_.field_id, index_func, elem_func);
} }
...@@ -1226,10 +1240,10 @@ ExecExprVisitor::ExecCompareLeftType(const FieldId& left_field_id, ...@@ -1226,10 +1240,10 @@ ExecExprVisitor::ExecCompareLeftType(const FieldId& left_field_id,
const FieldId& right_field_id, const FieldId& right_field_id,
const DataType& right_field_type, const DataType& right_field_type,
CmpFunc cmp_func) { CmpFunc cmp_func) {
std::vector<FixedVector<bool>> results;
auto size_per_chunk = segment_.size_per_chunk(); auto size_per_chunk = segment_.size_per_chunk();
auto num_chunks = upper_div(row_count_, size_per_chunk); auto num_chunks = upper_div(row_count_, size_per_chunk);
std::vector<FixedVector<bool>> results;
results.reserve(num_chunks);
for (int64_t chunk_id = 0; chunk_id < num_chunks; ++chunk_id) { for (int64_t chunk_id = 0; chunk_id < num_chunks; ++chunk_id) {
FixedVector<bool> result; FixedVector<bool> result;
...@@ -1679,7 +1693,7 @@ ExecExprVisitor::ExecTermVisitorImplTemplate(TermExpr& expr_raw) -> BitsetType { ...@@ -1679,7 +1693,7 @@ ExecExprVisitor::ExecTermVisitorImplTemplate(TermExpr& expr_raw) -> BitsetType {
auto index_func = [&terms, n](Index* index) { auto index_func = [&terms, n](Index* index) {
return index->In(n, terms.data()); return index->In(n, terms.data());
}; };
auto elem_func = [&terms, &term_set](T x) { auto elem_func = [&terms, &term_set](MayConstRef<T> x) {
//// terms has already been sorted. //// terms has already been sorted.
// return std::binary_search(terms.begin(), terms.end(), x); // return std::binary_search(terms.begin(), terms.end(), x);
return term_set.find(x) != term_set.end(); return term_set.find(x) != term_set.end();
...@@ -1712,7 +1726,7 @@ ExecExprVisitor::ExecTermVisitorImplTemplate<bool>(TermExpr& expr_raw) ...@@ -1712,7 +1726,7 @@ ExecExprVisitor::ExecTermVisitorImplTemplate<bool>(TermExpr& expr_raw)
return bitset; return bitset;
}; };
auto elem_func = [&terms, &term_set](T x) { auto elem_func = [&terms, &term_set](MayConstRef<T> x) {
//// terms has already been sorted. //// terms has already been sorted.
// return std::binary_search(terms.begin(), terms.end(), x); // return std::binary_search(terms.begin(), terms.end(), x);
return term_set.find(x) != term_set.end(); return term_set.find(x) != term_set.end();
......
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册