未验证 提交 54d17bc5 编写于 作者: X xige-16 提交者: GitHub

Fix query too slow when insert multi repeated pk data (#18231)

Signed-off-by: Nxige-16 <xi.ge@zilliz.com>
上级 e5fe4612
...@@ -22,6 +22,7 @@ ...@@ -22,6 +22,7 @@
#include <utility> #include <utility>
#include <vector> #include <vector>
#include <tbb/concurrent_unordered_map.h> #include <tbb/concurrent_unordered_map.h>
#include <tbb/concurrent_unordered_set.h>
#include <boost/align/aligned_allocator.hpp> #include <boost/align/aligned_allocator.hpp>
#include <boost/container/vector.hpp> #include <boost/container/vector.hpp>
#include <boost/dynamic_bitset.hpp> #include <boost/dynamic_bitset.hpp>
...@@ -70,7 +71,9 @@ using VectorArray = proto::schema::VectorField; ...@@ -70,7 +71,9 @@ using VectorArray = proto::schema::VectorField;
using IdArray = proto::schema::IDs; using IdArray = proto::schema::IDs;
using InsertData = proto::segcore::InsertRecord; using InsertData = proto::segcore::InsertRecord;
using PkType = std::variant<std::monostate, int64_t, std::string>; using PkType = std::variant<std::monostate, int64_t, std::string>;
using Pk2OffsetType = tbb::concurrent_unordered_multimap<PkType, int64_t, std::hash<PkType>>; // tbb::concurrent_unordered_multimap equal_range too slow when multi repeated key
// using Pk2OffsetType = tbb::concurrent_unordered_multimap<PkType, int64_t, std::hash<PkType>>;
using Pk2OffsetType = tbb::concurrent_unordered_map<PkType, tbb::concurrent_unordered_set<int64_t>, std::hash<PkType>>;
inline bool inline bool
IsPrimaryKeyDataType(DataType data_type) { IsPrimaryKeyDataType(DataType data_type) {
......
...@@ -42,6 +42,21 @@ struct DeletedRecord { ...@@ -42,6 +42,21 @@ struct DeletedRecord {
return lru_; return lru_;
} }
std::shared_ptr<TmpBitmap>
clone_lru_entry(int64_t insert_barrier, int64_t del_barrier, int64_t& old_del_barrier, bool& hit_cache) {
std::shared_lock lck(shared_mutex_);
auto res = lru_->clone(insert_barrier);
old_del_barrier = lru_->del_barrier;
if (lru_->bitmap_ptr->size() == insert_barrier && lru_->del_barrier == del_barrier) {
hit_cache = true;
} else {
res->del_barrier = del_barrier;
}
return res;
}
void void
insert_lru_entry(std::shared_ptr<TmpBitmap> new_entry, bool force = false) { insert_lru_entry(std::shared_ptr<TmpBitmap> new_entry, bool force = false) {
std::lock_guard lck(shared_mutex_); std::lock_guard lck(shared_mutex_);
...@@ -59,7 +74,6 @@ struct DeletedRecord { ...@@ -59,7 +74,6 @@ struct DeletedRecord {
AckResponder ack_responder_; AckResponder ack_responder_;
ConcurrentVector<Timestamp> timestamps_; ConcurrentVector<Timestamp> timestamps_;
ConcurrentVector<PkType> pks_; ConcurrentVector<PkType> pks_;
int64_t record_size_ = 0;
private: private:
std::shared_ptr<TmpBitmap> lru_; std::shared_ptr<TmpBitmap> lru_;
......
...@@ -43,11 +43,12 @@ struct InsertRecord { ...@@ -43,11 +43,12 @@ struct InsertRecord {
std::vector<SegOffset> std::vector<SegOffset>
search_pk(const PkType pk, Timestamp timestamp) const { search_pk(const PkType pk, Timestamp timestamp) const {
std::vector<SegOffset> res_offsets; std::vector<SegOffset> res_offsets;
auto [iter_b, iter_e] = pk2offset_.equal_range(pk); auto offset_iter = pk2offset_.find(pk);
for (auto iter = iter_b; iter != iter_e; ++iter) { if (offset_iter != pk2offset_.end()) {
auto offset = SegOffset(iter->second); for (auto offset : offset_iter->second) {
if (timestamps_[offset.get()] <= timestamp) { if (timestamps_[offset] <= timestamp) {
res_offsets.push_back(offset); res_offsets.push_back(SegOffset(offset));
}
} }
} }
...@@ -57,11 +58,12 @@ struct InsertRecord { ...@@ -57,11 +58,12 @@ struct InsertRecord {
std::vector<SegOffset> std::vector<SegOffset>
search_pk(const PkType pk, int64_t insert_barrier) const { search_pk(const PkType pk, int64_t insert_barrier) const {
std::vector<SegOffset> res_offsets; std::vector<SegOffset> res_offsets;
auto [iter_b, iter_e] = pk2offset_.equal_range(pk); auto offset_iter = pk2offset_.find(pk);
for (auto iter = iter_b; iter != iter_e; ++iter) { if (offset_iter != pk2offset_.end()) {
auto offset = SegOffset(iter->second); for (auto offset : offset_iter->second) {
if (offset.get() < insert_barrier) { if (offset < insert_barrier) {
res_offsets.push_back(offset); res_offsets.push_back(SegOffset(offset));
}
} }
} }
...@@ -70,7 +72,7 @@ struct InsertRecord { ...@@ -70,7 +72,7 @@ struct InsertRecord {
void void
insert_pk(const PkType pk, int64_t offset) { insert_pk(const PkType pk, int64_t offset) {
pk2offset_.insert(std::make_pair(pk, offset)); pk2offset_[pk].insert(offset);
} }
bool bool
......
...@@ -157,11 +157,10 @@ SegmentGrowingImpl::LoadDeletedRecord(const LoadDeletedRecordInfo& info) { ...@@ -157,11 +157,10 @@ SegmentGrowingImpl::LoadDeletedRecord(const LoadDeletedRecordInfo& info) {
auto timestamps = reinterpret_cast<const Timestamp*>(info.timestamps); auto timestamps = reinterpret_cast<const Timestamp*>(info.timestamps);
// step 2: fill pks and timestamps // step 2: fill pks and timestamps
deleted_record_.pks_.set_data_raw(0, pks.data(), size); auto reserved_begin = deleted_record_.reserved.fetch_add(size);
deleted_record_.timestamps_.set_data_raw(0, timestamps, size); deleted_record_.pks_.set_data_raw(reserved_begin, pks.data(), size);
deleted_record_.ack_responder_.AddSegment(0, size); deleted_record_.timestamps_.set_data_raw(reserved_begin, timestamps, size);
deleted_record_.reserved.fetch_add(size); deleted_record_.ack_responder_.AddSegment(reserved_begin, reserved_begin + size);
deleted_record_.record_size_ = size;
} }
SpanBase SpanBase
......
...@@ -64,6 +64,11 @@ class SegmentGrowingImpl : public SegmentGrowing { ...@@ -64,6 +64,11 @@ class SegmentGrowingImpl : public SegmentGrowing {
std::string std::string
debug() const override; debug() const override;
int64_t
get_segment_id() const override {
return id_;
}
public: public:
const InsertRecord& const InsertRecord&
get_insert_record() const { get_insert_record() const {
......
...@@ -69,6 +69,9 @@ class SegmentInterface { ...@@ -69,6 +69,9 @@ class SegmentInterface {
virtual void virtual void
LoadDeletedRecord(const LoadDeletedRecordInfo& info) = 0; LoadDeletedRecord(const LoadDeletedRecordInfo& info) = 0;
virtual int64_t
get_segment_id() const = 0;
}; };
// internal API for DSL calculation // internal API for DSL calculation
......
...@@ -254,11 +254,10 @@ SegmentSealedImpl::LoadDeletedRecord(const LoadDeletedRecordInfo& info) { ...@@ -254,11 +254,10 @@ SegmentSealedImpl::LoadDeletedRecord(const LoadDeletedRecordInfo& info) {
auto timestamps = reinterpret_cast<const Timestamp*>(info.timestamps); auto timestamps = reinterpret_cast<const Timestamp*>(info.timestamps);
// step 2: fill pks and timestamps // step 2: fill pks and timestamps
deleted_record_.pks_.set_data_raw(0, pks.data(), size); auto reserved_begin = deleted_record_.reserved.fetch_add(size);
deleted_record_.timestamps_.set_data_raw(0, timestamps, size); deleted_record_.pks_.set_data_raw(reserved_begin, pks.data(), size);
deleted_record_.ack_responder_.AddSegment(0, size); deleted_record_.timestamps_.set_data_raw(reserved_begin, timestamps, size);
deleted_record_.reserved.fetch_add(size); deleted_record_.ack_responder_.AddSegment(reserved_begin, reserved_begin + size);
deleted_record_.record_size_ = size;
} }
// internal API: support scalar index only // internal API: support scalar index only
......
...@@ -50,6 +50,11 @@ class SegmentSealedImpl : public SegmentSealed { ...@@ -50,6 +50,11 @@ class SegmentSealedImpl : public SegmentSealed {
bool bool
HasFieldData(FieldId field_id) const override; HasFieldData(FieldId field_id) const override;
int64_t
get_segment_id() const override {
return id_;
}
public: public:
int64_t int64_t
GetMemoryUsageInBytes() const override; GetMemoryUsageInBytes() const override;
......
...@@ -380,37 +380,43 @@ get_deleted_bitmap(int64_t del_barrier, ...@@ -380,37 +380,43 @@ get_deleted_bitmap(int64_t del_barrier,
DeletedRecord& delete_record, DeletedRecord& delete_record,
const InsertRecord& insert_record, const InsertRecord& insert_record,
Timestamp query_timestamp) { Timestamp query_timestamp) {
auto old = delete_record.get_lru_entry();
// if insert_barrier and del_barrier have not changed, use cache data directly // if insert_barrier and del_barrier have not changed, use cache data directly
if (old->bitmap_ptr->size() == insert_barrier) { bool hit_cache = false;
if (old->del_barrier == del_barrier) { int64_t old_del_barrier = 0;
return old; auto current = delete_record.clone_lru_entry(insert_barrier, del_barrier, old_del_barrier, hit_cache);
} if (hit_cache) {
return current;
} }
auto current = old->clone(insert_barrier);
current->del_barrier = del_barrier;
auto bitmap = current->bitmap_ptr; auto bitmap = current->bitmap_ptr;
int64_t start, end; int64_t start, end;
if (del_barrier < old->del_barrier) { if (del_barrier < old_del_barrier) {
// in this case, ts of delete record[current_del_barrier : old_del_barrier] > query_timestamp // in this case, ts of delete record[current_del_barrier : old_del_barrier] > query_timestamp
// so these deletion records do not take effect in query/search // so these deletion records do not take effect in query/search
// so bitmap corresponding to those pks in delete record[current_del_barrier:old_del_barrier] wil be reset to 0 // so bitmap corresponding to those pks in delete record[current_del_barrier:old_del_barrier] wil be reset to 0
// for example, current_del_barrier = 2, query_time = 120, the bitmap will be reset to [0, 1, 1, 0, 0, 0, 0, 0] // for example, current_del_barrier = 2, query_time = 120, the bitmap will be reset to [0, 1, 1, 0, 0, 0, 0, 0]
start = del_barrier; start = del_barrier;
end = old->del_barrier; end = old_del_barrier;
} else { } else {
// the cache is not enough, so update bitmap using new pks in delete record[old_del_barrier:current_del_barrier] // the cache is not enough, so update bitmap using new pks in delete record[old_del_barrier:current_del_barrier]
// for example, current_del_barrier = 4, query_time = 300, bitmap will be updated to [0, 1, 1, 0, 1, 1, 0, 0] // for example, current_del_barrier = 4, query_time = 300, bitmap will be updated to [0, 1, 1, 0, 1, 1, 0, 0]
start = old->del_barrier; start = old_del_barrier;
end = del_barrier; end = del_barrier;
} }
// Avoid invalid calculations when there are a lot of repeated delete pks
std::unordered_map<PkType, Timestamp> delete_timestamps;
for (auto del_index = start; del_index < end; ++del_index) { for (auto del_index = start; del_index < end; ++del_index) {
// get pk in delete logs
auto pk = delete_record.pks_[del_index]; auto pk = delete_record.pks_[del_index];
// find insert data which has same pk auto timestamp = delete_record.timestamps_[del_index];
delete_timestamps[pk] = timestamp > delete_timestamps[pk] ? timestamp : delete_timestamps[pk];
}
for (auto iter = delete_timestamps.begin(); iter != delete_timestamps.end(); iter++) {
auto pk = iter->first;
auto delete_timestamp = iter->second;
auto segOffsets = insert_record.search_pk(pk, insert_barrier); auto segOffsets = insert_record.search_pk(pk, insert_barrier);
for (auto offset : segOffsets) { for (auto offset : segOffsets) {
int64_t insert_row_offset = offset.get(); int64_t insert_row_offset = offset.get();
...@@ -419,22 +425,22 @@ get_deleted_bitmap(int64_t del_barrier, ...@@ -419,22 +425,22 @@ get_deleted_bitmap(int64_t del_barrier,
// insert after delete with same pk, delete will not task effect on this insert record // insert after delete with same pk, delete will not task effect on this insert record
// and reset bitmap to 0 // and reset bitmap to 0
if (insert_record.timestamps_[insert_row_offset] > delete_record.timestamps_[del_index]) { if (insert_record.timestamps_[insert_row_offset] > delete_timestamp) {
bitmap->reset(insert_row_offset); bitmap->reset(insert_row_offset);
continue; continue;
} }
// the deletion record do not take effect in search/query // the deletion record do not take effect in search/query
// and reset bitmap to 0 // and reset bitmap to 0
if (delete_record.timestamps_[del_index] > query_timestamp) { if (delete_timestamp > query_timestamp) {
bitmap->reset(insert_row_offset); bitmap->reset(insert_row_offset);
continue; continue;
} }
// insert data corresponding to the insert_row_offset will be ignored in search/query // insert data corresponding to the insert_row_offset will be ignored in search/query
bitmap->set(insert_row_offset); bitmap->set(insert_row_offset);
} }
} }
delete_record.insert_lru_entry(current); delete_record.insert_lru_entry(current);
return current; return current;
} }
......
...@@ -54,7 +54,7 @@ func (q *queryTask) PreExecute(ctx context.Context) error { ...@@ -54,7 +54,7 @@ func (q *queryTask) PreExecute(ctx context.Context) error {
func (q *queryTask) queryOnStreaming() error { func (q *queryTask) queryOnStreaming() error {
// check ctx timeout // check ctx timeout
if !funcutil.CheckCtxValid(q.Ctx()) { if !funcutil.CheckCtxValid(q.Ctx()) {
return errors.New("search context timeout") return errors.New("query context timeout")
} }
// check if collection has been released, check streaming since it's released first // check if collection has been released, check streaming since it's released first
......
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册