From 6f18587f3590a1557d9e13700074a978d19d6325 Mon Sep 17 00:00:00 2001 From: xige-16 Date: Wed, 26 Jul 2023 14:49:01 +0800 Subject: [PATCH] Fix small segment compaction (#21327) Signed-off-by: xige-16 --- configs/milvus.yaml | 2 +- internal/datacoord/compaction_trigger.go | 15 +++++++++++++-- internal/datacoord/compaction_trigger_test.go | 4 ++-- pkg/util/paramtable/component_param.go | 2 +- 4 files changed, 17 insertions(+), 6 deletions(-) diff --git a/configs/milvus.yaml b/configs/milvus.yaml index 42b2ac86d..960543ce1 100644 --- a/configs/milvus.yaml +++ b/configs/milvus.yaml @@ -338,7 +338,7 @@ dataCoord: smallProportion: 0.5 # The segment is considered as "small segment" when its # of rows is smaller than # (smallProportion * segment max # of rows). # A compaction will happen on small segments if the segment after compaction will have - compactableProportion: 0.5 + compactableProportion: 0.85 # over (compactableProportion * segment max # of rows) rows. # MUST BE GREATER THAN OR EQUAL TO !!! # During compaction, the size of segment # of rows is able to exceed segment max # of rows by (expansionRate-1) * 100%. diff --git a/internal/datacoord/compaction_trigger.go b/internal/datacoord/compaction_trigger.go index ac008a209..f83a583ee 100644 --- a/internal/datacoord/compaction_trigger.go +++ b/internal/datacoord/compaction_trigger.go @@ -654,8 +654,7 @@ func (t *compactionTrigger) generatePlans(segments []*SegmentInfo, force bool, i } // only merge if candidate number is large than MinSegmentToMerge or if target row is large enough if len(bucket) >= Params.DataCoordCfg.MinSegmentToMerge.GetAsInt() || - len(bucket) > 1 && - targetRow > int64(float64(segment.GetMaxRowNum())*Params.DataCoordCfg.SegmentCompactableProportion.GetAsFloat()) { + len(bucket) > 1 && t.isCompactableSegment(targetRow, segment) { plan := segmentsToPlan(bucket, compactTime) log.Info("generate a plan for small candidates", zap.Int64s("plan segmentIDs", lo.Map(bucket, getSegmentIDs)), @@ -798,6 +797,18 @@ func (t *compactionTrigger) isSmallSegment(segment *SegmentInfo) bool { return segment.GetNumOfRows() < int64(float64(segment.GetMaxRowNum())*Params.DataCoordCfg.SegmentSmallProportion.GetAsFloat()) } +func (t *compactionTrigger) isCompactableSegment(targetRow int64, segment *SegmentInfo) bool { + smallProportion := Params.DataCoordCfg.SegmentSmallProportion.GetAsFloat() + compactableProportion := Params.DataCoordCfg.SegmentCompactableProportion.GetAsFloat() + + // avoid invalid single segment compaction + if compactableProportion < smallProportion { + compactableProportion = smallProportion + } + + return targetRow > int64(float64(segment.GetMaxRowNum())*compactableProportion) +} + func isExpandableSmallSegment(segment *SegmentInfo) bool { return segment.GetNumOfRows() < int64(float64(segment.GetMaxRowNum())*(Params.DataCoordCfg.SegmentExpansionRate.GetAsFloat()-1)) } diff --git a/internal/datacoord/compaction_trigger_test.go b/internal/datacoord/compaction_trigger_test.go index 3ad34d514..2d89ffaa5 100644 --- a/internal/datacoord/compaction_trigger_test.go +++ b/internal/datacoord/compaction_trigger_test.go @@ -1156,8 +1156,8 @@ func Test_compactionTrigger_PrioritizedCandi(t *testing.T) { spy := (tt.fields.compactionHandler).(*spyCompactionHandler) select { case val := <-spy.spyChan: - // 5 segments in the final pick list - assert.Equal(t, len(val.SegmentBinlogs), 5) + // 6 segments in the final pick list + assert.Equal(t, len(val.SegmentBinlogs), 6) return case <-time.After(3 * time.Second): assert.Fail(t, "failed to get plan") diff --git a/pkg/util/paramtable/component_param.go b/pkg/util/paramtable/component_param.go index d632affbb..370485b0b 100644 --- a/pkg/util/paramtable/component_param.go +++ b/pkg/util/paramtable/component_param.go @@ -2177,7 +2177,7 @@ the number of binlog file reaches to max value.`, p.SegmentCompactableProportion = ParamItem{ Key: "dataCoord.segment.compactableProportion", Version: "2.2.1", - DefaultValue: "0.5", + DefaultValue: "0.85", Doc: `(smallProportion * segment max # of rows). A compaction will happen on small segments if the segment after compaction will have`, Export: true, -- GitLab