From 5ad4cdda251b448d4d5d5f7ee83ee3d3f5d10410 Mon Sep 17 00:00:00 2001 From: Cai Yudong Date: Wed, 27 Oct 2021 20:02:26 +0800 Subject: [PATCH] Clean search result duplicates removal debug log (#10769) Signed-off-by: yudong.cai --- internal/core/src/segcore/reduce_c.cpp | 6 +++++- internal/proxy/task.go | 10 +++++----- 2 files changed, 10 insertions(+), 6 deletions(-) diff --git a/internal/core/src/segcore/reduce_c.cpp b/internal/core/src/segcore/reduce_c.cpp index 0f2ccb56f..614963e78 100644 --- a/internal/core/src/segcore/reduce_c.cpp +++ b/internal/core/src/segcore/reduce_c.cpp @@ -83,6 +83,7 @@ GetResultData(std::vector>& search_records, search_records[index].push_back(result_pair.offset_++); } #else + int64_t skip_dup_cnt = 0; float prev_dis = MAXFLOAT; std::unordered_set prev_pk_set; while (loc_offset - query_offset < topk) { @@ -111,11 +112,14 @@ GetResultData(std::vector>& search_records, prev_pk_set.insert(curr_pk); } else { // the entity with same distance and same primary key must be duplicated - LOG_SEGCORE_DEBUG_ << "skip duplicated search result, primary key " << curr_pk; + skip_dup_cnt++; } } result_pair.offset_++; } + if (skip_dup_cnt > 0) { + LOG_SEGCORE_DEBUG_ << "skip duplicated search result, count = " << skip_dup_cnt; + } #endif } diff --git a/internal/proxy/task.go b/internal/proxy/task.go index 24b4285d1..f27597129 100644 --- a/internal/proxy/task.go +++ b/internal/proxy/task.go @@ -1925,6 +1925,7 @@ func reduceSearchResultData(searchResultData []*schemapb.SearchResultData, nq in //printSearchResultData(sData, strconv.FormatInt(int64(i), 10)) } + var skipDupCnt int64 = 0 var realTopK int64 = -1 for i := int64(0); i < nq; i++ { offsets := make([]int64, len(searchResultData)) @@ -1967,10 +1968,7 @@ func reduceSearchResultData(searchResultData []*schemapb.SearchResultData, nq in j++ } else { // entity with same id and same score must be duplicated - log.Debug("skip duplicated search result", - zap.Int64("id", id), - zap.Float32("score", score), - zap.Float32("prevScore", prevScore)) + skipDupCnt++ } } offsets[sel]++ @@ -1982,7 +1980,9 @@ func reduceSearchResultData(searchResultData []*schemapb.SearchResultData, nq in realTopK = j ret.Results.Topks = append(ret.Results.Topks, realTopK) } - + if skipDupCnt > 0 { + log.Debug("skip duplicated search result", zap.Int64("count", skipDupCnt)) + } ret.Results.TopK = realTopK if metricType != "IP" { -- GitLab