未验证 提交 6f18587f 编写于 作者: X xige-16 提交者: GitHub

Fix small segment compaction (#21327)

Signed-off-by: Nxige-16 <xi.ge@zilliz.com>
上级 b5e79e7f
......@@ -338,7 +338,7 @@ dataCoord:
smallProportion: 0.5 # The segment is considered as "small segment" when its # of rows is smaller than
# (smallProportion * segment max # of rows).
# A compaction will happen on small segments if the segment after compaction will have
compactableProportion: 0.5
compactableProportion: 0.85
# over (compactableProportion * segment max # of rows) rows.
# MUST BE GREATER THAN OR EQUAL TO <smallProportion>!!!
# During compaction, the size of segment # of rows is able to exceed segment max # of rows by (expansionRate-1) * 100%.
......
......@@ -654,8 +654,7 @@ func (t *compactionTrigger) generatePlans(segments []*SegmentInfo, force bool, i
}
// only merge if candidate number is large than MinSegmentToMerge or if target row is large enough
if len(bucket) >= Params.DataCoordCfg.MinSegmentToMerge.GetAsInt() ||
len(bucket) > 1 &&
targetRow > int64(float64(segment.GetMaxRowNum())*Params.DataCoordCfg.SegmentCompactableProportion.GetAsFloat()) {
len(bucket) > 1 && t.isCompactableSegment(targetRow, segment) {
plan := segmentsToPlan(bucket, compactTime)
log.Info("generate a plan for small candidates",
zap.Int64s("plan segmentIDs", lo.Map(bucket, getSegmentIDs)),
......@@ -798,6 +797,18 @@ func (t *compactionTrigger) isSmallSegment(segment *SegmentInfo) bool {
return segment.GetNumOfRows() < int64(float64(segment.GetMaxRowNum())*Params.DataCoordCfg.SegmentSmallProportion.GetAsFloat())
}
func (t *compactionTrigger) isCompactableSegment(targetRow int64, segment *SegmentInfo) bool {
smallProportion := Params.DataCoordCfg.SegmentSmallProportion.GetAsFloat()
compactableProportion := Params.DataCoordCfg.SegmentCompactableProportion.GetAsFloat()
// avoid invalid single segment compaction
if compactableProportion < smallProportion {
compactableProportion = smallProportion
}
return targetRow > int64(float64(segment.GetMaxRowNum())*compactableProportion)
}
func isExpandableSmallSegment(segment *SegmentInfo) bool {
return segment.GetNumOfRows() < int64(float64(segment.GetMaxRowNum())*(Params.DataCoordCfg.SegmentExpansionRate.GetAsFloat()-1))
}
......
......@@ -1156,8 +1156,8 @@ func Test_compactionTrigger_PrioritizedCandi(t *testing.T) {
spy := (tt.fields.compactionHandler).(*spyCompactionHandler)
select {
case val := <-spy.spyChan:
// 5 segments in the final pick list
assert.Equal(t, len(val.SegmentBinlogs), 5)
// 6 segments in the final pick list
assert.Equal(t, len(val.SegmentBinlogs), 6)
return
case <-time.After(3 * time.Second):
assert.Fail(t, "failed to get plan")
......
......@@ -2177,7 +2177,7 @@ the number of binlog file reaches to max value.`,
p.SegmentCompactableProportion = ParamItem{
Key: "dataCoord.segment.compactableProportion",
Version: "2.2.1",
DefaultValue: "0.5",
DefaultValue: "0.85",
Doc: `(smallProportion * segment max # of rows).
A compaction will happen on small segments if the segment after compaction will have`,
Export: true,
......
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册