impl.go 28.7 KB
Newer Older
1 2 3 4 5 6
// Licensed to the LF AI & Data foundation under one
// or more contributor license agreements. See the NOTICE file
// distributed with this work for additional information
// regarding copyright ownership. The ASF licenses this file
// to you under the Apache License, Version 2.0 (the
// "License"); you may not use this file except in compliance
7 8
// with the License. You may obtain a copy of the License at
//
9
//     http://www.apache.org/licenses/LICENSE-2.0
10
//
11 12 13 14 15
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
16

17 18 19 20
package querynode

import (
	"context"
21
	"errors"
22
	"fmt"
23
	"sync"
24 25 26

	"go.uber.org/zap"

27
	"github.com/milvus-io/milvus/internal/common"
X
Xiangyu Wang 已提交
28
	"github.com/milvus-io/milvus/internal/log"
29
	"github.com/milvus-io/milvus/internal/metrics"
X
Xiangyu Wang 已提交
30 31 32
	"github.com/milvus-io/milvus/internal/proto/commonpb"
	"github.com/milvus-io/milvus/internal/proto/internalpb"
	"github.com/milvus-io/milvus/internal/proto/milvuspb"
33
	"github.com/milvus-io/milvus/internal/proto/querypb"
X
Xiangyu Wang 已提交
34
	queryPb "github.com/milvus-io/milvus/internal/proto/querypb"
35
	"github.com/milvus-io/milvus/internal/util/metricsinfo"
36
	"github.com/milvus-io/milvus/internal/util/timerecord"
X
Xiangyu Wang 已提交
37
	"github.com/milvus-io/milvus/internal/util/typeutil"
38 39
)

40
// GetComponentStates returns information about whether the node is healthy
41 42 43 44 45 46
func (node *QueryNode) GetComponentStates(ctx context.Context) (*internalpb.ComponentStates, error) {
	stats := &internalpb.ComponentStates{
		Status: &commonpb.Status{
			ErrorCode: commonpb.ErrorCode_Success,
		},
	}
47 48 49
	code, ok := node.stateCode.Load().(internalpb.StateCode)
	if !ok {
		errMsg := "unexpected error in type assertion"
50 51
		stats.Status = &commonpb.Status{
			ErrorCode: commonpb.ErrorCode_UnexpectedError,
52
			Reason:    errMsg,
53
		}
G
godchen 已提交
54
		return stats, nil
55 56 57 58
	}
	nodeID := common.NotRegisteredID
	if node.session != nil && node.session.Registered() {
		nodeID = node.session.ServerID
59 60
	}
	info := &internalpb.ComponentInfo{
61
		NodeID:    nodeID,
62 63 64 65
		Role:      typeutil.QueryNodeRole,
		StateCode: code,
	}
	stats.State = info
66
	log.Debug("Get QueryNode component state done", zap.Any("stateCode", info.StateCode))
67 68 69
	return stats, nil
}

70 71
// GetTimeTickChannel returns the time tick channel
// TimeTickChannel contains many time tick messages, which will be sent by query nodes
72 73 74 75 76 77
func (node *QueryNode) GetTimeTickChannel(ctx context.Context) (*milvuspb.StringResponse, error) {
	return &milvuspb.StringResponse{
		Status: &commonpb.Status{
			ErrorCode: commonpb.ErrorCode_Success,
			Reason:    "",
		},
78
		Value: Params.CommonCfg.QueryCoordTimeTick,
79 80 81
	}, nil
}

82
// GetStatisticsChannel returns the statistics channel
83
// Statistics channel contains statistics infos of query nodes, such as segment infos, memory infos
84 85 86 87 88 89
func (node *QueryNode) GetStatisticsChannel(ctx context.Context) (*milvuspb.StringResponse, error) {
	return &milvuspb.StringResponse{
		Status: &commonpb.Status{
			ErrorCode: commonpb.ErrorCode_Success,
			Reason:    "",
		},
90
		Value: Params.CommonCfg.QueryNodeStats,
91 92 93
	}, nil
}

G
godchen 已提交
94
// WatchDmChannels create consumers on dmChannels to receive Incremental data,which is the important part of real-time query
95
func (node *QueryNode) WatchDmChannels(ctx context.Context, in *queryPb.WatchDmChannelsRequest) (*commonpb.Status, error) {
96 97
	code := node.stateCode.Load().(internalpb.StateCode)
	if code != internalpb.StateCode_Healthy {
X
Xiaofan 已提交
98
		err := fmt.Errorf("query node %d is not ready", Params.QueryNodeCfg.GetNodeID())
99 100 101 102
		status := &commonpb.Status{
			ErrorCode: commonpb.ErrorCode_UnexpectedError,
			Reason:    err.Error(),
		}
G
godchen 已提交
103
		return status, nil
104
	}
105 106 107 108 109 110 111
	dct := &watchDmChannelsTask{
		baseTask: baseTask{
			ctx:  ctx,
			done: make(chan error),
		},
		req:  in,
		node: node,
112 113
	}

114 115
	err := node.scheduler.queue.Enqueue(dct)
	if err != nil {
116 117
		status := &commonpb.Status{
			ErrorCode: commonpb.ErrorCode_UnexpectedError,
118
			Reason:    err.Error(),
119
		}
X
Xiaofan 已提交
120
		log.Warn(err.Error())
G
godchen 已提交
121
		return status, nil
122
	}
X
Xiaofan 已提交
123
	log.Info("watchDmChannelsTask Enqueue done", zap.Int64("collectionID", in.CollectionID), zap.Int64("nodeID", Params.QueryNodeCfg.GetNodeID()), zap.Int64("replicaID", in.GetReplicaID()))
124
	waitFunc := func() (*commonpb.Status, error) {
125
		err = dct.WaitToFinish()
126
		if err != nil {
127 128 129 130
			status := &commonpb.Status{
				ErrorCode: commonpb.ErrorCode_UnexpectedError,
				Reason:    err.Error(),
			}
X
Xiaofan 已提交
131
			log.Warn(err.Error())
G
godchen 已提交
132
			return status, nil
133
		}
X
Xiaofan 已提交
134
		log.Info("watchDmChannelsTask WaitToFinish done", zap.Int64("collectionID", in.CollectionID), zap.Int64("nodeID", Params.QueryNodeCfg.GetNodeID()))
135 136 137
		return &commonpb.Status{
			ErrorCode: commonpb.ErrorCode_Success,
		}, nil
138
	}
139 140

	return waitFunc()
141 142
}

G
godchen 已提交
143
// WatchDeltaChannels create consumers on dmChannels to receive Incremental data,which is the important part of real-time query
144
func (node *QueryNode) WatchDeltaChannels(ctx context.Context, in *queryPb.WatchDeltaChannelsRequest) (*commonpb.Status, error) {
145 146
	code := node.stateCode.Load().(internalpb.StateCode)
	if code != internalpb.StateCode_Healthy {
X
Xiaofan 已提交
147
		err := fmt.Errorf("query node %d is not ready", Params.QueryNodeCfg.GetNodeID())
148 149 150 151
		status := &commonpb.Status{
			ErrorCode: commonpb.ErrorCode_UnexpectedError,
			Reason:    err.Error(),
		}
G
godchen 已提交
152
		return status, nil
153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168
	}
	dct := &watchDeltaChannelsTask{
		baseTask: baseTask{
			ctx:  ctx,
			done: make(chan error),
		},
		req:  in,
		node: node,
	}

	err := node.scheduler.queue.Enqueue(dct)
	if err != nil {
		status := &commonpb.Status{
			ErrorCode: commonpb.ErrorCode_UnexpectedError,
			Reason:    err.Error(),
		}
X
Xiaofan 已提交
169
		log.Warn(err.Error())
G
godchen 已提交
170
		return status, nil
171
	}
X
Xiaofan 已提交
172 173

	log.Info("watchDeltaChannelsTask Enqueue done", zap.Int64("collectionID", in.CollectionID), zap.Int64("nodeID", Params.QueryNodeCfg.GetNodeID()))
174 175 176 177 178 179 180 181

	waitFunc := func() (*commonpb.Status, error) {
		err = dct.WaitToFinish()
		if err != nil {
			status := &commonpb.Status{
				ErrorCode: commonpb.ErrorCode_UnexpectedError,
				Reason:    err.Error(),
			}
X
Xiaofan 已提交
182
			log.Warn(err.Error())
G
godchen 已提交
183
			return status, nil
184
		}
X
Xiaofan 已提交
185 186

		log.Info("watchDeltaChannelsTask WaitToFinish done", zap.Int64("collectionID", in.CollectionID), zap.Int64("nodeID", Params.QueryNodeCfg.GetNodeID()))
187 188 189 190 191 192
		return &commonpb.Status{
			ErrorCode: commonpb.ErrorCode_Success,
		}, nil
	}

	return waitFunc()
193 194
}

195
// LoadSegments load historical data into query node, historical data can be vector data or index
196
func (node *QueryNode) LoadSegments(ctx context.Context, in *queryPb.LoadSegmentsRequest) (*commonpb.Status, error) {
197 198
	code := node.stateCode.Load().(internalpb.StateCode)
	if code != internalpb.StateCode_Healthy {
X
Xiaofan 已提交
199
		err := fmt.Errorf("query node %d is not ready", Params.QueryNodeCfg.GetNodeID())
200 201 202 203
		status := &commonpb.Status{
			ErrorCode: commonpb.ErrorCode_UnexpectedError,
			Reason:    err.Error(),
		}
G
godchen 已提交
204
		return status, nil
205
	}
206 207 208 209 210 211 212 213 214
	dct := &loadSegmentsTask{
		baseTask: baseTask{
			ctx:  ctx,
			done: make(chan error),
		},
		req:  in,
		node: node,
	}

215 216 217 218
	segmentIDs := make([]UniqueID, 0, len(in.GetInfos()))
	for _, info := range in.Infos {
		segmentIDs = append(segmentIDs, info.SegmentID)
	}
219 220 221 222 223 224
	err := node.scheduler.queue.Enqueue(dct)
	if err != nil {
		status := &commonpb.Status{
			ErrorCode: commonpb.ErrorCode_UnexpectedError,
			Reason:    err.Error(),
		}
X
Xiaofan 已提交
225
		log.Warn(err.Error())
G
godchen 已提交
226
		return status, nil
227
	}
228

X
Xiaofan 已提交
229
	log.Info("loadSegmentsTask Enqueue done", zap.Int64("collectionID", in.CollectionID), zap.Int64s("segmentIDs", segmentIDs), zap.Int64("nodeID", Params.QueryNodeCfg.GetNodeID()))
230

231
	waitFunc := func() (*commonpb.Status, error) {
232 233
		err = dct.WaitToFinish()
		if err != nil {
234 235 236 237
			status := &commonpb.Status{
				ErrorCode: commonpb.ErrorCode_UnexpectedError,
				Reason:    err.Error(),
			}
X
Xiaofan 已提交
238
			log.Warn(err.Error())
G
godchen 已提交
239
			return status, nil
240
		}
X
Xiaofan 已提交
241
		log.Info("loadSegmentsTask WaitToFinish done", zap.Int64("collectionID", in.CollectionID), zap.Int64s("segmentIDs", segmentIDs), zap.Int64("nodeID", Params.QueryNodeCfg.GetNodeID()))
242 243 244
		return &commonpb.Status{
			ErrorCode: commonpb.ErrorCode_Success,
		}, nil
245
	}
246 247

	return waitFunc()
248 249
}

G
godchen 已提交
250
// ReleaseCollection clears all data related to this collection on the querynode
251
func (node *QueryNode) ReleaseCollection(ctx context.Context, in *queryPb.ReleaseCollectionRequest) (*commonpb.Status, error) {
252 253
	code := node.stateCode.Load().(internalpb.StateCode)
	if code != internalpb.StateCode_Healthy {
X
Xiaofan 已提交
254
		err := fmt.Errorf("query node %d is not ready", Params.QueryNodeCfg.GetNodeID())
255 256 257 258
		status := &commonpb.Status{
			ErrorCode: commonpb.ErrorCode_UnexpectedError,
			Reason:    err.Error(),
		}
G
godchen 已提交
259
		return status, nil
260
	}
261 262 263 264 265 266 267 268 269 270 271 272 273 274 275
	dct := &releaseCollectionTask{
		baseTask: baseTask{
			ctx:  ctx,
			done: make(chan error),
		},
		req:  in,
		node: node,
	}

	err := node.scheduler.queue.Enqueue(dct)
	if err != nil {
		status := &commonpb.Status{
			ErrorCode: commonpb.ErrorCode_UnexpectedError,
			Reason:    err.Error(),
		}
X
Xiaofan 已提交
276
		log.Warn(err.Error())
G
godchen 已提交
277
		return status, nil
278
	}
X
Xiaofan 已提交
279
	log.Info("releaseCollectionTask Enqueue done", zap.Int64("collectionID", in.CollectionID))
280

281
	func() {
282 283
		err = dct.WaitToFinish()
		if err != nil {
X
Xiaofan 已提交
284
			log.Warn(err.Error())
285
			return
286
		}
X
Xiaofan 已提交
287
		log.Info("releaseCollectionTask WaitToFinish done", zap.Int64("collectionID", in.CollectionID))
288
	}()
289 290 291 292 293 294 295

	status := &commonpb.Status{
		ErrorCode: commonpb.ErrorCode_Success,
	}
	return status, nil
}

296
// ReleasePartitions clears all data related to this partition on the querynode
297
func (node *QueryNode) ReleasePartitions(ctx context.Context, in *queryPb.ReleasePartitionsRequest) (*commonpb.Status, error) {
298 299
	code := node.stateCode.Load().(internalpb.StateCode)
	if code != internalpb.StateCode_Healthy {
X
Xiaofan 已提交
300
		err := fmt.Errorf("query node %d is not ready", Params.QueryNodeCfg.GetNodeID())
301 302 303 304
		status := &commonpb.Status{
			ErrorCode: commonpb.ErrorCode_UnexpectedError,
			Reason:    err.Error(),
		}
G
godchen 已提交
305
		return status, nil
306
	}
307 308 309 310 311 312 313 314 315 316 317 318 319 320 321
	dct := &releasePartitionsTask{
		baseTask: baseTask{
			ctx:  ctx,
			done: make(chan error),
		},
		req:  in,
		node: node,
	}

	err := node.scheduler.queue.Enqueue(dct)
	if err != nil {
		status := &commonpb.Status{
			ErrorCode: commonpb.ErrorCode_UnexpectedError,
			Reason:    err.Error(),
		}
X
Xiaofan 已提交
322
		log.Warn(err.Error())
G
godchen 已提交
323
		return status, nil
324
	}
X
Xiaofan 已提交
325
	log.Info("releasePartitionsTask Enqueue done", zap.Int64("collectionID", in.CollectionID), zap.Int64s("partitionIDs", in.PartitionIDs))
326

327
	func() {
328 329
		err = dct.WaitToFinish()
		if err != nil {
X
Xiaofan 已提交
330
			log.Warn(err.Error())
331
			return
332
		}
X
Xiaofan 已提交
333
		log.Info("releasePartitionsTask WaitToFinish done", zap.Int64("collectionID", in.CollectionID), zap.Int64s("partitionIDs", in.PartitionIDs))
334
	}()
335 336 337 338 339 340 341

	status := &commonpb.Status{
		ErrorCode: commonpb.ErrorCode_Success,
	}
	return status, nil
}

342
// ReleaseSegments remove the specified segments from query node according segmentIDs, partitionIDs, and collectionID
343
func (node *QueryNode) ReleaseSegments(ctx context.Context, in *queryPb.ReleaseSegmentsRequest) (*commonpb.Status, error) {
344 345
	code := node.stateCode.Load().(internalpb.StateCode)
	if code != internalpb.StateCode_Healthy {
X
Xiaofan 已提交
346
		err := fmt.Errorf("query node %d is not ready", Params.QueryNodeCfg.GetNodeID())
347 348 349 350
		status := &commonpb.Status{
			ErrorCode: commonpb.ErrorCode_UnexpectedError,
			Reason:    err.Error(),
		}
G
godchen 已提交
351
		return status, nil
352
	}
353

354
	// collection lock is not needed since we guarantee not query/search will be dispatch from leader
355
	for _, id := range in.SegmentIDs {
356 357
		node.metaReplica.removeSegment(id, segmentTypeSealed)
		node.metaReplica.removeSegment(id, segmentTypeGrowing)
358
	}
X
xige-16 已提交
359

X
Xiaofan 已提交
360
	log.Info("release segments done", zap.Int64("collectionID", in.CollectionID), zap.Int64s("segmentIDs", in.SegmentIDs))
361 362 363
	return &commonpb.Status{
		ErrorCode: commonpb.ErrorCode_Success,
	}, nil
364 365
}

366
// GetSegmentInfo returns segment information of the collection on the queryNode, and the information includes memSize, numRow, indexName, indexID ...
367
func (node *QueryNode) GetSegmentInfo(ctx context.Context, in *queryPb.GetSegmentInfoRequest) (*queryPb.GetSegmentInfoResponse, error) {
368 369
	code := node.stateCode.Load().(internalpb.StateCode)
	if code != internalpb.StateCode_Healthy {
X
Xiaofan 已提交
370
		err := fmt.Errorf("query node %d is not ready", Params.QueryNodeCfg.GetNodeID())
371 372 373 374 375 376
		res := &queryPb.GetSegmentInfoResponse{
			Status: &commonpb.Status{
				ErrorCode: commonpb.ErrorCode_UnexpectedError,
				Reason:    err.Error(),
			},
		}
G
godchen 已提交
377
		return res, nil
378
	}
379 380 381 382 383 384
	var segmentInfos []*queryPb.SegmentInfo

	segmentIDs := make(map[int64]struct{})
	for _, segmentID := range in.GetSegmentIDs() {
		segmentIDs[segmentID] = struct{}{}
	}
385

386 387
	infos := node.metaReplica.getSegmentInfosByColID(in.CollectionID)
	segmentInfos = append(segmentInfos, filterSegmentInfo(infos, segmentIDs)...)
388

389 390 391 392
	return &queryPb.GetSegmentInfoResponse{
		Status: &commonpb.Status{
			ErrorCode: commonpb.ErrorCode_Success,
		},
393
		Infos: segmentInfos,
394 395
	}, nil
}
396

397 398 399 400 401 402 403 404 405 406 407 408 409 410 411 412
// filterSegmentInfo returns segment info which segment id in segmentIDs map
func filterSegmentInfo(segmentInfos []*queryPb.SegmentInfo, segmentIDs map[int64]struct{}) []*queryPb.SegmentInfo {
	if len(segmentIDs) == 0 {
		return segmentInfos
	}
	filtered := make([]*queryPb.SegmentInfo, 0, len(segmentIDs))
	for _, info := range segmentInfos {
		_, ok := segmentIDs[info.GetSegmentID()]
		if !ok {
			continue
		}
		filtered = append(filtered, info)
	}
	return filtered
}

413
// isHealthy checks if QueryNode is healthy
414 415 416 417 418
func (node *QueryNode) isHealthy() bool {
	code := node.stateCode.Load().(internalpb.StateCode)
	return code == internalpb.StateCode_Healthy
}

419
// Search performs replica search tasks.
420
func (node *QueryNode) Search(ctx context.Context, req *queryPb.SearchRequest) (*internalpb.SearchResults, error) {
421
	metrics.QueryNodeSQCount.WithLabelValues(fmt.Sprint(Params.QueryNodeCfg.GetNodeID()), metrics.SearchLabel, metrics.TotalLabel).Inc()
422 423 424 425 426
	failRet := &internalpb.SearchResults{
		Status: &commonpb.Status{
			ErrorCode: commonpb.ErrorCode_UnexpectedError,
		},
	}
427 428 429 430 431 432

	defer func() {
		if failRet.Status.ErrorCode != commonpb.ErrorCode_Success {
			metrics.QueryNodeSQCount.WithLabelValues(fmt.Sprint(Params.QueryNodeCfg.GetNodeID()), metrics.QueryLabel, metrics.FailLabel).Inc()
		}
	}()
433
	if !node.isHealthy() {
434 435
		failRet.Status.Reason = msgQueryNodeIsUnhealthy(Params.QueryNodeCfg.GetNodeID())
		return failRet, nil
436 437
	}

438
	msgID := req.GetReq().GetBase().GetMsgID()
439
	log.Debug("Received SearchRequest",
440 441
		zap.Int64("msgID", msgID),
		zap.Bool("fromShardLeader", req.GetFromShardLeader()),
442 443 444 445
		zap.String("vChannel", req.GetDmlChannel()),
		zap.Int64s("segmentIDs", req.GetSegmentIDs()),
		zap.Uint64("guaranteeTimestamp", req.GetReq().GetGuaranteeTimestamp()),
		zap.Uint64("timeTravel", req.GetReq().GetTravelTimestamp()))
446 447

	if node.queryShardService == nil {
448 449
		failRet.Status.Reason = "queryShardService is nil"
		return failRet, nil
450 451 452 453
	}

	qs, err := node.queryShardService.getQueryShard(req.GetDmlChannel())
	if err != nil {
454 455 456 457
		log.Warn("Search failed, failed to get query shard",
			zap.Int64("msgID", msgID),
			zap.String("dml channel", req.GetDmlChannel()),
			zap.Error(err))
458 459 460
		failRet.Status.ErrorCode = commonpb.ErrorCode_NotShardLeader
		failRet.Status.Reason = err.Error()
		return failRet, nil
461 462
	}

463 464 465 466 467 468
	log.Debug("start do search",
		zap.Int64("msgID", msgID),
		zap.Bool("fromShardLeader", req.GetFromShardLeader()),
		zap.String("vChannel", req.GetDmlChannel()),
		zap.Int64s("segmentIDs", req.GetSegmentIDs()))
	tr := timerecord.NewTimeRecorder("")
469

470 471 472 473 474 475 476 477 478 479 480 481 482 483 484 485 486 487 488
	if req.FromShardLeader {
		historicalTask, err2 := newSearchTask(ctx, req)
		if err2 != nil {
			failRet.Status.Reason = err2.Error()
			return failRet, nil
		}
		historicalTask.QS = qs
		historicalTask.DataScope = querypb.DataScope_Historical
		err2 = node.scheduler.AddReadTask(ctx, historicalTask)
		if err2 != nil {
			failRet.Status.Reason = err2.Error()
			return failRet, nil
		}

		err2 = historicalTask.WaitToFinish()
		if err2 != nil {
			failRet.Status.Reason = err2.Error()
			return failRet, nil
		}
489

490 491 492
		tr.Elapse(fmt.Sprintf("do search done, msgID = %d, fromSharedLeader = %t, vChannel = %s, segmentIDs = %v",
			msgID, req.GetFromShardLeader(), req.GetDmlChannel(), req.GetSegmentIDs()))

493 494 495 496 497 498 499 500
		failRet.Status.ErrorCode = commonpb.ErrorCode_Success
		metrics.QueryNodeSQLatencyInQueue.WithLabelValues(fmt.Sprint(Params.QueryNodeCfg.GetNodeID()),
			metrics.SearchLabel).Observe(float64(historicalTask.queueDur.Milliseconds()))
		metrics.QueryNodeReduceLatency.WithLabelValues(fmt.Sprint(Params.QueryNodeCfg.GetNodeID()),
			metrics.SearchLabel).Observe(float64(historicalTask.reduceDur.Milliseconds()))
		latency := tr.ElapseSpan()
		metrics.QueryNodeSQReqLatency.WithLabelValues(fmt.Sprint(Params.QueryNodeCfg.GetNodeID()), metrics.SearchLabel).Observe(float64(latency.Milliseconds()))
		metrics.QueryNodeSQCount.WithLabelValues(fmt.Sprint(Params.QueryNodeCfg.GetNodeID()), metrics.SearchLabel, metrics.SuccessLabel).Inc()
501 502 503 504 505 506 507 508 509 510 511 512 513 514 515 516 517 518 519 520 521 522 523 524 525
		return historicalTask.Ret, nil
	}

	//from Proxy
	cluster, ok := qs.clusterService.getShardCluster(req.GetDmlChannel())
	if !ok {
		failRet.Status.Reason = fmt.Sprintf("channel %s leader is not here", req.GetDmlChannel())
		return failRet, nil
	}

	searchCtx, cancel := context.WithCancel(ctx)
	defer cancel()

	var results []*internalpb.SearchResults
	var streamingResult *internalpb.SearchResults

	var wg sync.WaitGroup
	var errCluster error

	wg.Add(1) // search cluster
	go func() {
		defer wg.Done()
		// shard leader dispatches request to its shard cluster
		oResults, cErr := cluster.Search(searchCtx, req)
		if cErr != nil {
526
			log.Warn("search cluster failed", zap.Int64("msgID", msgID), zap.Int64("collectionID", req.Req.GetCollectionID()), zap.Error(cErr))
527 528 529 530 531 532 533 534 535 536 537 538 539 540 541 542 543 544 545 546 547 548 549 550 551 552 553 554 555 556 557 558 559
			cancel()
			errCluster = cErr
			return
		}
		results = oResults
	}()

	var errStreaming error
	wg.Add(1) // search streaming
	go func() {
		defer func() {
			if errStreaming != nil {
				cancel()
			}
		}()

		defer wg.Done()
		streamingTask, err2 := newSearchTask(searchCtx, req)
		if err2 != nil {
			errStreaming = err2
		}
		streamingTask.QS = qs
		streamingTask.DataScope = querypb.DataScope_Streaming
		err2 = node.scheduler.AddReadTask(searchCtx, streamingTask)
		if err2 != nil {
			errStreaming = err2
			return
		}
		err2 = streamingTask.WaitToFinish()
		if err2 != nil {
			errStreaming = err2
			return
		}
560 561 562 563
		metrics.QueryNodeSQLatencyInQueue.WithLabelValues(fmt.Sprint(Params.QueryNodeCfg.GetNodeID()),
			metrics.SearchLabel).Observe(float64(streamingTask.queueDur.Milliseconds()))
		metrics.QueryNodeReduceLatency.WithLabelValues(fmt.Sprint(Params.QueryNodeCfg.GetNodeID()),
			metrics.SearchLabel).Observe(float64(streamingTask.reduceDur.Milliseconds()))
564 565 566 567 568 569 570 571 572 573 574 575 576 577
		streamingResult = streamingTask.Ret
	}()
	wg.Wait()

	var mainErr error
	if errCluster != nil {
		mainErr = errCluster
		if errors.Is(errCluster, context.Canceled) {
			if errStreaming != nil {
				mainErr = errStreaming
			}
		}
	} else if errStreaming != nil {
		mainErr = errStreaming
578 579
	}

580 581 582 583
	if mainErr != nil {
		failRet.Status.Reason = mainErr.Error()
		return failRet, nil
	}
584 585 586 587

	tr.Elapse(fmt.Sprintf("start reduce search result, msgID = %d, fromSharedLeader = %t, vChannel = %s, segmentIDs = %v",
		msgID, req.GetFromShardLeader(), req.GetDmlChannel(), req.GetSegmentIDs()))

588 589 590 591 592 593
	results = append(results, streamingResult)
	ret, err2 := reduceSearchResults(results, req.Req.GetNq(), req.Req.GetTopk(), req.Req.GetMetricType())
	if err2 != nil {
		failRet.Status.Reason = err2.Error()
		return failRet, nil
	}
594

595 596 597
	tr.Elapse(fmt.Sprintf("do search done, msgID = %d, fromSharedLeader = %t, vChannel = %s, segmentIDs = %v",
		msgID, req.GetFromShardLeader(), req.GetDmlChannel(), req.GetSegmentIDs()))

598 599 600 601
	failRet.Status.ErrorCode = commonpb.ErrorCode_Success
	latency := tr.ElapseSpan()
	metrics.QueryNodeSQReqLatency.WithLabelValues(fmt.Sprint(Params.QueryNodeCfg.GetNodeID()), metrics.SearchLabel).Observe(float64(latency.Milliseconds()))
	metrics.QueryNodeSQCount.WithLabelValues(fmt.Sprint(Params.QueryNodeCfg.GetNodeID()), metrics.SearchLabel, metrics.SuccessLabel).Inc()
602 603
	metrics.QueryNodeSearchNQ.WithLabelValues(fmt.Sprint(Params.QueryNodeCfg.GetNodeID())).Observe(float64(req.Req.GetNq()))
	metrics.QueryNodeSearchTopK.WithLabelValues(fmt.Sprint(Params.QueryNodeCfg.GetNodeID())).Observe(float64(req.Req.GetTopk()))
604
	return ret, nil
605 606 607
}

// Query performs replica query tasks.
608
func (node *QueryNode) Query(ctx context.Context, req *queryPb.QueryRequest) (*internalpb.RetrieveResults, error) {
609
	metrics.QueryNodeSQCount.WithLabelValues(fmt.Sprint(Params.QueryNodeCfg.GetNodeID()), metrics.QueryLabel, metrics.TotalLabel).Inc()
610 611 612 613 614
	failRet := &internalpb.RetrieveResults{
		Status: &commonpb.Status{
			ErrorCode: commonpb.ErrorCode_UnexpectedError,
		},
	}
615 616 617 618 619 620

	defer func() {
		if failRet.Status.ErrorCode != commonpb.ErrorCode_Success {
			metrics.QueryNodeSQCount.WithLabelValues(fmt.Sprint(Params.QueryNodeCfg.GetNodeID()), metrics.SearchLabel, metrics.FailLabel).Inc()
		}
	}()
621
	if !node.isHealthy() {
622 623
		failRet.Status.Reason = msgQueryNodeIsUnhealthy(Params.QueryNodeCfg.GetNodeID())
		return failRet, nil
624
	}
625

626
	msgID := req.GetReq().GetBase().GetMsgID()
627
	log.Debug("Received QueryRequest",
628 629
		zap.Int64("msgID", msgID),
		zap.Bool("fromShardLeader", req.GetFromShardLeader()),
630 631 632 633
		zap.String("vChannel", req.GetDmlChannel()),
		zap.Int64s("segmentIDs", req.GetSegmentIDs()),
		zap.Uint64("guaranteeTimestamp", req.GetReq().GetGuaranteeTimestamp()),
		zap.Uint64("timeTravel", req.GetReq().GetTravelTimestamp()))
634 635

	if node.queryShardService == nil {
636 637 638 639
		failRet.Status.Reason = "queryShardService is nil"
		return failRet, nil
	}

640 641
	qs, err := node.queryShardService.getQueryShard(req.GetDmlChannel())
	if err != nil {
642
		log.Warn("Query failed, failed to get query shard", zap.Int64("msgID", msgID), zap.String("dml channel", req.GetDmlChannel()), zap.Error(err))
643 644 645 646
		failRet.Status.Reason = err.Error()
		return failRet, nil
	}

647 648 649 650 651 652
	log.Debug("start do query",
		zap.Int64("msgID", msgID),
		zap.Bool("fromShardLeader", req.GetFromShardLeader()),
		zap.String("vChannel", req.GetDmlChannel()),
		zap.Int64s("segmentIDs", req.GetSegmentIDs()))
	tr := timerecord.NewTimeRecorder("")
653

654 655 656 657 658 659 660 661 662 663 664 665 666 667 668 669
	if req.FromShardLeader {
		// construct a queryTask
		queryTask := newQueryTask(ctx, req)
		queryTask.QS = qs
		queryTask.DataScope = querypb.DataScope_Historical
		err2 := node.scheduler.AddReadTask(ctx, queryTask)
		if err2 != nil {
			failRet.Status.Reason = err2.Error()
			return failRet, nil
		}

		err2 = queryTask.WaitToFinish()
		if err2 != nil {
			failRet.Status.Reason = err2.Error()
			return failRet, nil
		}
670 671 672 673

		tr.Elapse(fmt.Sprintf("do query done, msgID = %d, fromSharedLeader = %t, vChannel = %s, segmentIDs = %v",
			msgID, req.GetFromShardLeader(), req.GetDmlChannel(), req.GetSegmentIDs()))

674 675 676 677 678 679 680 681
		failRet.Status.ErrorCode = commonpb.ErrorCode_Success
		metrics.QueryNodeSQLatencyInQueue.WithLabelValues(fmt.Sprint(Params.QueryNodeCfg.GetNodeID()),
			metrics.QueryLabel).Observe(float64(queryTask.queueDur.Milliseconds()))
		metrics.QueryNodeReduceLatency.WithLabelValues(fmt.Sprint(Params.QueryNodeCfg.GetNodeID()),
			metrics.QueryLabel).Observe(float64(queryTask.reduceDur.Milliseconds()))
		latency := tr.ElapseSpan()
		metrics.QueryNodeSQReqLatency.WithLabelValues(fmt.Sprint(Params.QueryNodeCfg.GetNodeID()), metrics.QueryLabel).Observe(float64(latency.Milliseconds()))
		metrics.QueryNodeSQCount.WithLabelValues(fmt.Sprint(Params.QueryNodeCfg.GetNodeID()), metrics.QueryLabel, metrics.SuccessLabel).Inc()
682
		return queryTask.Ret, nil
683 684
	}

685 686 687 688 689 690 691 692 693 694 695 696 697 698 699 700 701 702 703 704 705
	cluster, ok := qs.clusterService.getShardCluster(req.GetDmlChannel())
	if !ok {
		failRet.Status.Reason = fmt.Sprintf("channel %s leader is not here", req.GetDmlChannel())
		return failRet, nil
	}

	// add cancel when error occurs
	queryCtx, cancel := context.WithCancel(ctx)
	defer cancel()

	var results []*internalpb.RetrieveResults
	var streamingResult *internalpb.RetrieveResults
	var wg sync.WaitGroup

	var errCluster error
	wg.Add(1)
	go func() {
		defer wg.Done()
		// shard leader dispatches request to its shard cluster
		oResults, cErr := cluster.Query(queryCtx, req)
		if cErr != nil {
706
			log.Warn("failed to query cluster", zap.Int64("msgID", msgID), zap.Int64("collectionID", req.Req.GetCollectionID()), zap.Error(cErr))
707 708 709 710 711 712 713 714 715 716 717 718 719 720 721 722 723 724 725 726 727 728 729 730 731 732 733 734
			errCluster = cErr
			cancel()
			return
		}
		results = oResults
	}()

	var errStreaming error
	wg.Add(1)
	go func() {
		defer wg.Done()
		streamingTask := newQueryTask(queryCtx, req)
		streamingTask.DataScope = querypb.DataScope_Streaming
		streamingTask.QS = qs
		err2 := node.scheduler.AddReadTask(queryCtx, streamingTask)
		defer func() {
			errStreaming = err2
			if err2 != nil {
				cancel()
			}
		}()
		if err2 != nil {
			return
		}
		err2 = streamingTask.WaitToFinish()
		if err2 != nil {
			return
		}
735 736 737 738
		metrics.QueryNodeSQLatencyInQueue.WithLabelValues(fmt.Sprint(Params.QueryNodeCfg.GetNodeID()),
			metrics.QueryLabel).Observe(float64(streamingTask.queueDur.Milliseconds()))
		metrics.QueryNodeReduceLatency.WithLabelValues(fmt.Sprint(Params.QueryNodeCfg.GetNodeID()),
			metrics.QueryLabel).Observe(float64(streamingTask.reduceDur.Milliseconds()))
739 740 741 742 743 744 745 746 747 748 749 750 751 752
		streamingResult = streamingTask.Ret
	}()
	wg.Wait()

	var mainErr error
	if errCluster != nil {
		mainErr = errCluster
		if errors.Is(errCluster, context.Canceled) {
			if errStreaming != nil {
				mainErr = errStreaming
			}
		}
	} else if errStreaming != nil {
		mainErr = errStreaming
753 754
	}

755 756 757 758
	if mainErr != nil {
		failRet.Status.Reason = mainErr.Error()
		return failRet, nil
	}
759 760 761 762

	tr.Elapse(fmt.Sprintf("start reduce query result, msgID = %d, fromSharedLeader = %t, vChannel = %s, segmentIDs = %v",
		msgID, req.GetFromShardLeader(), req.GetDmlChannel(), req.GetSegmentIDs()))

763 764 765 766 767 768
	results = append(results, streamingResult)
	ret, err2 := mergeInternalRetrieveResults(results)
	if err2 != nil {
		failRet.Status.Reason = err2.Error()
		return failRet, nil
	}
769 770 771 772

	tr.Elapse(fmt.Sprintf("do query done, msgID = %d, fromSharedLeader = %t, vChannel = %s, segmentIDs = %v",
		msgID, req.GetFromShardLeader(), req.GetDmlChannel(), req.GetSegmentIDs()))

773 774 775 776
	failRet.Status.ErrorCode = commonpb.ErrorCode_Success
	latency := tr.ElapseSpan()
	metrics.QueryNodeSQReqLatency.WithLabelValues(fmt.Sprint(Params.QueryNodeCfg.GetNodeID()), metrics.QueryLabel).Observe(float64(latency.Milliseconds()))
	metrics.QueryNodeSQCount.WithLabelValues(fmt.Sprint(Params.QueryNodeCfg.GetNodeID()), metrics.QueryLabel, metrics.SuccessLabel).Inc()
777
	return ret, nil
778 779
}

780 781 782 783 784 785 786 787 788 789 790 791 792 793 794 795 796 797 798 799 800 801 802 803 804
// SyncReplicaSegments syncs replica node & segments states
func (node *QueryNode) SyncReplicaSegments(ctx context.Context, req *querypb.SyncReplicaSegmentsRequest) (*commonpb.Status, error) {
	if !node.isHealthy() {
		return &commonpb.Status{
			ErrorCode: commonpb.ErrorCode_UnexpectedError,
			Reason:    msgQueryNodeIsUnhealthy(Params.QueryNodeCfg.GetNodeID()),
		}, nil
	}

	log.Debug("Received SyncReplicaSegments request", zap.String("vchannelName", req.GetVchannelName()))

	err := node.ShardClusterService.SyncReplicaSegments(req.GetVchannelName(), req.GetReplicaSegments())
	if err != nil {
		log.Warn("failed to sync replica semgents,", zap.String("vchannel", req.GetVchannelName()), zap.Error(err))
		return &commonpb.Status{
			ErrorCode: commonpb.ErrorCode_UnexpectedError,
			Reason:    err.Error(),
		}, nil
	}

	log.Debug("SyncReplicaSegments Done", zap.String("vchannel", req.GetVchannelName()))

	return &commonpb.Status{ErrorCode: commonpb.ErrorCode_Success}, nil
}

G
godchen 已提交
805
// GetMetrics return system infos of the query node, such as total memory, memory usage, cpu usage ...
806
// TODO(dragondriver): cache the Metrics and set a retention to the cache
807 808 809
func (node *QueryNode) GetMetrics(ctx context.Context, req *milvuspb.GetMetricsRequest) (*milvuspb.GetMetricsResponse, error) {
	if !node.isHealthy() {
		log.Warn("QueryNode.GetMetrics failed",
X
Xiaofan 已提交
810
			zap.Int64("node_id", Params.QueryNodeCfg.GetNodeID()),
811
			zap.String("req", req.Request),
X
Xiaofan 已提交
812
			zap.Error(errQueryNodeIsUnhealthy(Params.QueryNodeCfg.GetNodeID())))
813 814 815 816

		return &milvuspb.GetMetricsResponse{
			Status: &commonpb.Status{
				ErrorCode: commonpb.ErrorCode_UnexpectedError,
X
Xiaofan 已提交
817
				Reason:    msgQueryNodeIsUnhealthy(Params.QueryNodeCfg.GetNodeID()),
818 819 820 821 822 823 824 825
			},
			Response: "",
		}, nil
	}

	metricType, err := metricsinfo.ParseMetricType(req.Request)
	if err != nil {
		log.Warn("QueryNode.GetMetrics failed to parse metric type",
X
Xiaofan 已提交
826
			zap.Int64("node_id", Params.QueryNodeCfg.GetNodeID()),
827 828 829 830 831 832 833 834 835 836 837 838 839 840
			zap.String("req", req.Request),
			zap.Error(err))

		return &milvuspb.GetMetricsResponse{
			Status: &commonpb.Status{
				ErrorCode: commonpb.ErrorCode_UnexpectedError,
				Reason:    err.Error(),
			},
			Response: "",
		}, nil
	}

	if metricType == metricsinfo.SystemInfoMetrics {
		metrics, err := getSystemInfoMetrics(ctx, req, node)
X
Xiaofan 已提交
841 842
		if err != nil {
			log.Warn("QueryNode.GetMetrics failed",
X
Xiaofan 已提交
843
				zap.Int64("node_id", Params.QueryNodeCfg.GetNodeID()),
X
Xiaofan 已提交
844 845 846 847
				zap.String("req", req.Request),
				zap.String("metric_type", metricType),
				zap.Error(err))
		}
848

G
godchen 已提交
849
		return metrics, nil
850 851 852
	}

	log.Debug("QueryNode.GetMetrics failed, request metric type is not implemented yet",
X
Xiaofan 已提交
853
		zap.Int64("node_id", Params.QueryNodeCfg.GetNodeID()),
854 855 856 857 858 859 860 861 862 863 864
		zap.String("req", req.Request),
		zap.String("metric_type", metricType))

	return &milvuspb.GetMetricsResponse{
		Status: &commonpb.Status{
			ErrorCode: commonpb.ErrorCode_UnexpectedError,
			Reason:    metricsinfo.MsgUnimplementedMetric,
		},
		Response: "",
	}, nil
}