未验证 提交 4f6b8731 编写于 作者: X Xiaofan 提交者: GitHub

Refine request merger (#20189)

Signed-off-by: Nxiaofan-luan <xiaofan.luan@zilliz.com>
Signed-off-by: Nxiaofan-luan <xiaofan.luan@zilliz.com>
上级 d737a661
...@@ -83,7 +83,7 @@ func (mgr *TargetManager) RemoveSegment(segmentID int64) { ...@@ -83,7 +83,7 @@ func (mgr *TargetManager) RemoveSegment(segmentID int64) {
func (mgr *TargetManager) removeSegment(segmentID int64) { func (mgr *TargetManager) removeSegment(segmentID int64) {
delete(mgr.segments, segmentID) delete(mgr.segments, segmentID)
log.Info("segment removed from targets") log.Info("segment removed from targets", zap.Int64("segment", segmentID))
} }
// AddSegment adds segment into target set, // AddSegment adds segment into target set,
......
...@@ -133,6 +133,7 @@ func (ex *Executor) scheduleRequests() { ...@@ -133,6 +133,7 @@ func (ex *Executor) scheduleRequests() {
} }
func (ex *Executor) processMergeTask(mergeTask *LoadSegmentsTask) { func (ex *Executor) processMergeTask(mergeTask *LoadSegmentsTask) {
startTs := time.Now()
task := mergeTask.tasks[0] task := mergeTask.tasks[0]
action := task.Actions()[mergeTask.steps[0]] action := task.Actions()[mergeTask.steps[0]]
...@@ -152,6 +153,7 @@ func (ex *Executor) processMergeTask(mergeTask *LoadSegmentsTask) { ...@@ -152,6 +153,7 @@ func (ex *Executor) processMergeTask(mergeTask *LoadSegmentsTask) {
log := log.With( log := log.With(
zap.Int64s("taskIDs", taskIDs), zap.Int64s("taskIDs", taskIDs),
zap.Int64("collectionID", task.CollectionID()), zap.Int64("collectionID", task.CollectionID()),
zap.String("shard", task.Shard()),
zap.Int64s("segmentIDs", segments), zap.Int64s("segmentIDs", segments),
zap.Int64("nodeID", action.Node()), zap.Int64("nodeID", action.Node()),
zap.Int64("source", task.SourceID()), zap.Int64("source", task.SourceID()),
...@@ -177,7 +179,8 @@ func (ex *Executor) processMergeTask(mergeTask *LoadSegmentsTask) { ...@@ -177,7 +179,8 @@ func (ex *Executor) processMergeTask(mergeTask *LoadSegmentsTask) {
log.Warn("failed to load segment", zap.String("reason", status.GetReason())) log.Warn("failed to load segment", zap.String("reason", status.GetReason()))
return return
} }
log.Info("load segments done") elapsed := time.Since(startTs)
log.Info("load segments done", zap.Int64("taskID", task.ID()), zap.Duration("timeTaken", elapsed))
} }
func (ex *Executor) removeAction(task Task, step int) { func (ex *Executor) removeAction(task Task, step int) {
...@@ -275,7 +278,7 @@ func (ex *Executor) loadSegment(task *SegmentTask, step int) error { ...@@ -275,7 +278,7 @@ func (ex *Executor) loadSegment(task *SegmentTask, step int) error {
func (ex *Executor) releaseSegment(task *SegmentTask, step int) { func (ex *Executor) releaseSegment(task *SegmentTask, step int) {
defer ex.removeAction(task, step) defer ex.removeAction(task, step)
startTs := time.Now()
action := task.Actions()[step].(*SegmentAction) action := task.Actions()[step].(*SegmentAction)
defer action.isReleaseCommitted.Store(true) defer action.isReleaseCommitted.Store(true)
...@@ -332,7 +335,8 @@ func (ex *Executor) releaseSegment(task *SegmentTask, step int) { ...@@ -332,7 +335,8 @@ func (ex *Executor) releaseSegment(task *SegmentTask, step int) {
log.Warn("failed to release segment", zap.String("reason", status.GetReason())) log.Warn("failed to release segment", zap.String("reason", status.GetReason()))
return return
} }
log.Info("release segment done") elapsed := time.Since(startTs)
log.Info("release segment done", zap.Int64("taskID", task.ID()), zap.Duration("time taken", elapsed))
} }
func (ex *Executor) executeDmChannelAction(task *ChannelTask, step int) { func (ex *Executor) executeDmChannelAction(task *ChannelTask, step int) {
...@@ -347,7 +351,7 @@ func (ex *Executor) executeDmChannelAction(task *ChannelTask, step int) { ...@@ -347,7 +351,7 @@ func (ex *Executor) executeDmChannelAction(task *ChannelTask, step int) {
func (ex *Executor) subDmChannel(task *ChannelTask, step int) error { func (ex *Executor) subDmChannel(task *ChannelTask, step int) error {
defer ex.removeAction(task, step) defer ex.removeAction(task, step)
startTs := time.Now()
action := task.Actions()[step].(*ChannelAction) action := task.Actions()[step].(*ChannelAction)
log := log.With( log := log.With(
zap.Int64("taskID", task.ID()), zap.Int64("taskID", task.ID()),
...@@ -407,13 +411,14 @@ func (ex *Executor) subDmChannel(task *ChannelTask, step int) error { ...@@ -407,13 +411,14 @@ func (ex *Executor) subDmChannel(task *ChannelTask, step int) error {
log.Warn("failed to subscribe DmChannel", zap.String("reason", status.GetReason())) log.Warn("failed to subscribe DmChannel", zap.String("reason", status.GetReason()))
return err return err
} }
log.Info("subscribe DmChannel done") elapsed := time.Since(startTs)
log.Info("subscribe DmChannel done", zap.Int64("taskID", task.ID()), zap.Duration("time taken", elapsed))
return nil return nil
} }
func (ex *Executor) unsubDmChannel(task *ChannelTask, step int) error { func (ex *Executor) unsubDmChannel(task *ChannelTask, step int) error {
defer ex.removeAction(task, step) defer ex.removeAction(task, step)
startTs := time.Now()
action := task.Actions()[step].(*ChannelAction) action := task.Actions()[step].(*ChannelAction)
log := log.With( log := log.With(
zap.Int64("taskID", task.ID()), zap.Int64("taskID", task.ID()),
...@@ -444,5 +449,8 @@ func (ex *Executor) unsubDmChannel(task *ChannelTask, step int) error { ...@@ -444,5 +449,8 @@ func (ex *Executor) unsubDmChannel(task *ChannelTask, step int) error {
log.Warn("failed to unsubscribe DmChannel", zap.String("reason", status.GetReason())) log.Warn("failed to unsubscribe DmChannel", zap.String("reason", status.GetReason()))
return err return err
} }
elapsed := time.Since(startTs)
log.Info("unsubscribe DmChannel done", zap.Int64("taskID", task.ID()), zap.Duration("time taken", elapsed))
return nil return nil
} }
...@@ -23,7 +23,6 @@ import ( ...@@ -23,7 +23,6 @@ import (
"github.com/milvus-io/milvus/internal/log" "github.com/milvus-io/milvus/internal/log"
. "github.com/milvus-io/milvus/internal/querycoordv2/params" . "github.com/milvus-io/milvus/internal/querycoordv2/params"
"github.com/milvus-io/milvus/internal/util/typeutil"
"go.uber.org/zap" "go.uber.org/zap"
) )
...@@ -31,24 +30,21 @@ import ( ...@@ -31,24 +30,21 @@ import (
const waitQueueCap = 256 const waitQueueCap = 256
type Merger[K comparable, R any] struct { type Merger[K comparable, R any] struct {
stopCh chan struct{} stopCh chan struct{}
wg sync.WaitGroup wg sync.WaitGroup
queues map[K][]MergeableTask[K, R] // TaskID -> Queue
processors *typeutil.ConcurrentSet[K] // Tasks of having processor waitQueue chan MergeableTask[K, R]
queues map[K]chan MergeableTask[K, R] // TaskID -> Queue outCh chan MergeableTask[K, R]
waitQueue chan MergeableTask[K, R]
outCh chan MergeableTask[K, R]
stopOnce sync.Once stopOnce sync.Once
} }
func NewMerger[K comparable, R any]() *Merger[K, R] { func NewMerger[K comparable, R any]() *Merger[K, R] {
return &Merger[K, R]{ return &Merger[K, R]{
stopCh: make(chan struct{}), stopCh: make(chan struct{}),
processors: typeutil.NewConcurrentSet[K](), queues: make(map[K][]MergeableTask[K, R]),
queues: make(map[K]chan MergeableTask[K, R]), waitQueue: make(chan MergeableTask[K, R], waitQueueCap),
waitQueue: make(chan MergeableTask[K, R], waitQueueCap), outCh: make(chan MergeableTask[K, R], Params.QueryCoordCfg.TaskMergeCap),
outCh: make(chan MergeableTask[K, R], Params.QueryCoordCfg.TaskMergeCap),
} }
} }
...@@ -60,7 +56,6 @@ func (merger *Merger[K, R]) Stop() { ...@@ -60,7 +56,6 @@ func (merger *Merger[K, R]) Stop() {
merger.stopOnce.Do(func() { merger.stopOnce.Do(func() {
close(merger.stopCh) close(merger.stopCh)
merger.wg.Wait() merger.wg.Wait()
close(merger.outCh)
}) })
} }
...@@ -73,40 +68,23 @@ func (merger *Merger[K, R]) schedule(ctx context.Context) { ...@@ -73,40 +68,23 @@ func (merger *Merger[K, R]) schedule(ctx context.Context) {
go func() { go func() {
defer merger.wg.Done() defer merger.wg.Done()
ticker := time.NewTicker(500 * time.Millisecond) ticker := time.NewTicker(500 * time.Millisecond)
defer ticker.Stop()
for { for {
select { select {
case <-ctx.Done(): case <-ctx.Done():
close(merger.outCh)
log.Info("Merger stopped due to context canceled") log.Info("Merger stopped due to context canceled")
return return
case <-merger.stopCh: case <-merger.stopCh:
close(merger.outCh)
log.Info("Merger stopped") log.Info("Merger stopped")
return return
case task := <-merger.waitQueue:
queue, ok := merger.queues[task.ID()]
if !ok {
queue = make(chan MergeableTask[K, R], Params.QueryCoordCfg.TaskMergeCap)
merger.queues[task.ID()] = queue
}
outer:
for {
select {
case queue <- task:
break outer
default: // Queue full, flush and retry
merger.merge(task.ID(), queue)
}
}
case <-ticker.C: case <-ticker.C:
for id, queue := range merger.queues { merger.drain()
if len(queue) > 0 { for id := range merger.queues {
merger.merge(id, queue) merger.triggerExecution(id)
} else {
// Release resource if no task for the queue
delete(merger.queues, id)
}
} }
} }
} }
...@@ -126,32 +104,44 @@ func (merger *Merger[K, R]) Add(task MergeableTask[K, R]) { ...@@ -126,32 +104,44 @@ func (merger *Merger[K, R]) Add(task MergeableTask[K, R]) {
merger.waitQueue <- task merger.waitQueue <- task
} }
func (merger *Merger[K, R]) merge(id K, queue chan MergeableTask[K, R]) { func (merger *Merger[K, R]) drain() {
if merger.isStopped() { for {
return select {
} case task := <-merger.waitQueue:
if !merger.processors.Insert(id) { queue, ok := merger.queues[task.ID()]
return if !ok {
queue = []MergeableTask[K, R]{}
}
queue = append(queue, task)
merger.queues[task.ID()] = queue
default:
return
}
} }
merger.wg.Add(1)
go merger.mergeQueue(id, queue)
} }
// mergeQueue merges tasks in the given queue, func (merger *Merger[K, R]) triggerExecution(id K) {
// it only processes tasks with the number of the length of queue at the time, tasks := merger.queues[id]
// to avoid leaking goroutines delete(merger.queues, id)
func (merger *Merger[K, R]) mergeQueue(id K, queue chan MergeableTask[K, R]) {
defer merger.wg.Done() var task MergeableTask[K, R]
defer merger.processors.Remove(id) merged := 0
for i := 0; i < len(tasks); i++ {
len := len(queue) if merged == 0 {
task := <-queue task = tasks[i]
for i := 1; i < len; i++ { } else {
task.Merge(<-queue) task.Merge(tasks[i])
}
merged++
if merged >= int(Params.QueryCoordCfg.TaskMergeCap) {
merger.outCh <- task
merged = 0
}
}
if merged != 0 {
merger.outCh <- task
} }
log.Info("merge tasks done", log.Info("merge tasks done, trigger execution", zap.Any("mergeID", task.ID()))
zap.Any("mergeID", task.ID()))
merger.outCh <- task
} }
...@@ -134,6 +134,9 @@ func (suite *MergerSuite) TestMerge() { ...@@ -134,6 +134,9 @@ func (suite *MergerSuite) TestMerge() {
suite.Len(task.steps, 3) suite.Len(task.steps, 3)
suite.EqualValues(1, task.Result().DeltaPositions[0].Timestamp) suite.EqualValues(1, task.Result().DeltaPositions[0].Timestamp)
suite.EqualValues(1, task.Result().DeltaPositions[1].Timestamp) suite.EqualValues(1, task.Result().DeltaPositions[1].Timestamp)
suite.merger.Stop()
_, ok := <-suite.merger.Chan()
suite.Equal(ok, false)
} }
func TestMerger(t *testing.T) { func TestMerger(t *testing.T) {
......
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册