impl.go 24.3 KB
Newer Older
1 2 3 4 5 6
// Licensed to the LF AI & Data foundation under one
// or more contributor license agreements. See the NOTICE file
// distributed with this work for additional information
// regarding copyright ownership. The ASF licenses this file
// to you under the Apache License, Version 2.0 (the
// "License"); you may not use this file except in compliance
7 8
// with the License. You may obtain a copy of the License at
//
9
//     http://www.apache.org/licenses/LICENSE-2.0
10
//
11 12 13 14 15
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
16

17 18 19 20
package querynode

import (
	"context"
21
	"fmt"
22 23 24

	"go.uber.org/zap"

25
	"github.com/milvus-io/milvus/internal/common"
X
Xiangyu Wang 已提交
26 27 28 29
	"github.com/milvus-io/milvus/internal/log"
	"github.com/milvus-io/milvus/internal/proto/commonpb"
	"github.com/milvus-io/milvus/internal/proto/internalpb"
	"github.com/milvus-io/milvus/internal/proto/milvuspb"
30
	"github.com/milvus-io/milvus/internal/proto/querypb"
X
Xiangyu Wang 已提交
31
	queryPb "github.com/milvus-io/milvus/internal/proto/querypb"
32
	"github.com/milvus-io/milvus/internal/util/metricsinfo"
X
Xiangyu Wang 已提交
33
	"github.com/milvus-io/milvus/internal/util/typeutil"
34 35
)

36
// GetComponentStates returns information about whether the node is healthy
37
func (node *QueryNode) GetComponentStates(ctx context.Context) (*internalpb.ComponentStates, error) {
38
	log.Debug("Get QueryNode component states")
39 40 41 42 43
	stats := &internalpb.ComponentStates{
		Status: &commonpb.Status{
			ErrorCode: commonpb.ErrorCode_Success,
		},
	}
44 45 46
	code, ok := node.stateCode.Load().(internalpb.StateCode)
	if !ok {
		errMsg := "unexpected error in type assertion"
47 48
		stats.Status = &commonpb.Status{
			ErrorCode: commonpb.ErrorCode_UnexpectedError,
49
			Reason:    errMsg,
50
		}
G
godchen 已提交
51
		return stats, nil
52 53 54 55
	}
	nodeID := common.NotRegisteredID
	if node.session != nil && node.session.Registered() {
		nodeID = node.session.ServerID
56 57
	}
	info := &internalpb.ComponentInfo{
58
		NodeID:    nodeID,
59 60 61 62
		Role:      typeutil.QueryNodeRole,
		StateCode: code,
	}
	stats.State = info
63
	log.Debug("Get QueryNode component state done", zap.Any("stateCode", info.StateCode))
64 65 66
	return stats, nil
}

67 68
// GetTimeTickChannel returns the time tick channel
// TimeTickChannel contains many time tick messages, which will be sent by query nodes
69 70 71 72 73 74
func (node *QueryNode) GetTimeTickChannel(ctx context.Context) (*milvuspb.StringResponse, error) {
	return &milvuspb.StringResponse{
		Status: &commonpb.Status{
			ErrorCode: commonpb.ErrorCode_Success,
			Reason:    "",
		},
75
		Value: Params.CommonCfg.QueryCoordTimeTick,
76 77 78
	}, nil
}

79
// GetStatisticsChannel returns the statistics channel
80
// Statistics channel contains statistics infos of query nodes, such as segment infos, memory infos
81 82 83 84 85 86
func (node *QueryNode) GetStatisticsChannel(ctx context.Context) (*milvuspb.StringResponse, error) {
	return &milvuspb.StringResponse{
		Status: &commonpb.Status{
			ErrorCode: commonpb.ErrorCode_Success,
			Reason:    "",
		},
87
		Value: Params.CommonCfg.QueryNodeStats,
88 89 90
	}, nil
}

91
// AddQueryChannel watch queryChannel of the collection to receive query message
92
func (node *QueryNode) AddQueryChannel(ctx context.Context, in *queryPb.AddQueryChannelRequest) (*commonpb.Status, error) {
93 94
	code := node.stateCode.Load().(internalpb.StateCode)
	if code != internalpb.StateCode_Healthy {
X
Xiaofan 已提交
95
		err := fmt.Errorf("query node %d is not ready", Params.QueryNodeCfg.GetNodeID())
96 97 98 99
		status := &commonpb.Status{
			ErrorCode: commonpb.ErrorCode_UnexpectedError,
			Reason:    err.Error(),
		}
G
godchen 已提交
100
		return status, nil
101
	}
102 103 104 105 106 107 108
	dct := &addQueryChannelTask{
		baseTask: baseTask{
			ctx:  ctx,
			done: make(chan error),
		},
		req:  in,
		node: node,
109
	}
110

111
	err := node.scheduler.queue.Enqueue(dct)
112 113 114 115
	if err != nil {
		status := &commonpb.Status{
			ErrorCode: commonpb.ErrorCode_UnexpectedError,
			Reason:    err.Error(),
116
		}
117
		log.Error(err.Error())
G
godchen 已提交
118
		return status, nil
119
	}
120 121 122 123 124
	log.Debug("addQueryChannelTask Enqueue done",
		zap.Int64("collectionID", in.CollectionID),
		zap.String("queryChannel", in.QueryChannel),
		zap.String("queryResultChannel", in.QueryResultChannel),
	)
125

126 127 128 129 130 131
	waitFunc := func() (*commonpb.Status, error) {
		err = dct.WaitToFinish()
		if err != nil {
			status := &commonpb.Status{
				ErrorCode: commonpb.ErrorCode_UnexpectedError,
				Reason:    err.Error(),
132
			}
133
			log.Error(err.Error())
G
godchen 已提交
134
			return status, nil
135
		}
136 137 138 139 140 141
		log.Debug("addQueryChannelTask WaitToFinish done",
			zap.Int64("collectionID", in.CollectionID),
			zap.String("queryChannel", in.QueryChannel),
			zap.String("queryResultChannel", in.QueryResultChannel),
		)

142 143 144
		return &commonpb.Status{
			ErrorCode: commonpb.ErrorCode_Success,
		}, nil
145
	}
146

147
	return waitFunc()
148 149
}

150
// RemoveQueryChannel remove queryChannel of the collection to stop receiving query message
151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200
func (node *QueryNode) RemoveQueryChannel(ctx context.Context, in *queryPb.RemoveQueryChannelRequest) (*commonpb.Status, error) {
	// if node.searchService == nil || node.searchService.searchMsgStream == nil {
	// 	errMsg := "null search service or null search result message stream"
	// 	status := &commonpb.Status{
	// 		ErrorCode: commonpb.ErrorCode_UnexpectedError,
	// 		Reason:    errMsg,
	// 	}

	// 	return status, errors.New(errMsg)
	// }

	// searchStream, ok := node.searchService.searchMsgStream.(*pulsarms.PulsarMsgStream)
	// if !ok {
	// 	errMsg := "type assertion failed for search message stream"
	// 	status := &commonpb.Status{
	// 		ErrorCode: commonpb.ErrorCode_UnexpectedError,
	// 		Reason:    errMsg,
	// 	}

	// 	return status, errors.New(errMsg)
	// }

	// resultStream, ok := node.searchService.searchResultMsgStream.(*pulsarms.PulsarMsgStream)
	// if !ok {
	// 	errMsg := "type assertion failed for search result message stream"
	// 	status := &commonpb.Status{
	// 		ErrorCode: commonpb.ErrorCode_UnexpectedError,
	// 		Reason:    errMsg,
	// 	}

	// 	return status, errors.New(errMsg)
	// }

	// // remove request channel
	// consumeChannels := []string{in.RequestChannelID}
	// consumeSubName := Params.MsgChannelSubName
	// // TODO: searchStream.RemovePulsarConsumers(producerChannels)
	// searchStream.AsConsumer(consumeChannels, consumeSubName)

	// // remove result channel
	// producerChannels := []string{in.ResultChannelID}
	// // TODO: resultStream.RemovePulsarProducer(producerChannels)
	// resultStream.AsProducer(producerChannels)

	status := &commonpb.Status{
		ErrorCode: commonpb.ErrorCode_Success,
	}
	return status, nil
}

G
godchen 已提交
201
// WatchDmChannels create consumers on dmChannels to receive Incremental data,which is the important part of real-time query
202
func (node *QueryNode) WatchDmChannels(ctx context.Context, in *queryPb.WatchDmChannelsRequest) (*commonpb.Status, error) {
203 204
	code := node.stateCode.Load().(internalpb.StateCode)
	if code != internalpb.StateCode_Healthy {
X
Xiaofan 已提交
205
		err := fmt.Errorf("query node %d is not ready", Params.QueryNodeCfg.GetNodeID())
206 207 208 209
		status := &commonpb.Status{
			ErrorCode: commonpb.ErrorCode_UnexpectedError,
			Reason:    err.Error(),
		}
G
godchen 已提交
210
		return status, nil
211
	}
212 213 214 215 216 217 218
	dct := &watchDmChannelsTask{
		baseTask: baseTask{
			ctx:  ctx,
			done: make(chan error),
		},
		req:  in,
		node: node,
219 220
	}

221 222
	err := node.scheduler.queue.Enqueue(dct)
	if err != nil {
223 224
		status := &commonpb.Status{
			ErrorCode: commonpb.ErrorCode_UnexpectedError,
225
			Reason:    err.Error(),
226
		}
227
		log.Error(err.Error())
G
godchen 已提交
228
		return status, nil
229
	}
X
Xiaofan 已提交
230
	log.Debug("watchDmChannelsTask Enqueue done", zap.Int64("collectionID", in.CollectionID), zap.Int64("nodeID", Params.QueryNodeCfg.GetNodeID()), zap.Int64("replicaID", in.GetReplicaID()))
231

232
	waitFunc := func() (*commonpb.Status, error) {
233
		err = dct.WaitToFinish()
234
		if err != nil {
235 236 237 238
			status := &commonpb.Status{
				ErrorCode: commonpb.ErrorCode_UnexpectedError,
				Reason:    err.Error(),
			}
239
			log.Error(err.Error())
G
godchen 已提交
240
			return status, nil
241
		}
X
Xiaofan 已提交
242
		log.Debug("watchDmChannelsTask WaitToFinish done", zap.Int64("collectionID", in.CollectionID), zap.Int64("nodeID", Params.QueryNodeCfg.GetNodeID()))
243 244 245
		return &commonpb.Status{
			ErrorCode: commonpb.ErrorCode_Success,
		}, nil
246
	}
247 248

	return waitFunc()
249 250
}

G
godchen 已提交
251
// WatchDeltaChannels create consumers on dmChannels to receive Incremental data,which is the important part of real-time query
252
func (node *QueryNode) WatchDeltaChannels(ctx context.Context, in *queryPb.WatchDeltaChannelsRequest) (*commonpb.Status, error) {
253 254
	code := node.stateCode.Load().(internalpb.StateCode)
	if code != internalpb.StateCode_Healthy {
X
Xiaofan 已提交
255
		err := fmt.Errorf("query node %d is not ready", Params.QueryNodeCfg.GetNodeID())
256 257 258 259
		status := &commonpb.Status{
			ErrorCode: commonpb.ErrorCode_UnexpectedError,
			Reason:    err.Error(),
		}
G
godchen 已提交
260
		return status, nil
261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276
	}
	dct := &watchDeltaChannelsTask{
		baseTask: baseTask{
			ctx:  ctx,
			done: make(chan error),
		},
		req:  in,
		node: node,
	}

	err := node.scheduler.queue.Enqueue(dct)
	if err != nil {
		status := &commonpb.Status{
			ErrorCode: commonpb.ErrorCode_UnexpectedError,
			Reason:    err.Error(),
		}
277
		log.Error(err.Error())
G
godchen 已提交
278
		return status, nil
279
	}
X
Xiaofan 已提交
280
	log.Debug("watchDeltaChannelsTask Enqueue done", zap.Int64("collectionID", in.CollectionID), zap.Int64("nodeID", Params.QueryNodeCfg.GetNodeID()))
281 282 283 284 285 286 287 288

	waitFunc := func() (*commonpb.Status, error) {
		err = dct.WaitToFinish()
		if err != nil {
			status := &commonpb.Status{
				ErrorCode: commonpb.ErrorCode_UnexpectedError,
				Reason:    err.Error(),
			}
289
			log.Error(err.Error())
G
godchen 已提交
290
			return status, nil
291
		}
X
Xiaofan 已提交
292
		log.Debug("watchDeltaChannelsTask WaitToFinish done", zap.Int64("collectionID", in.CollectionID), zap.Int64("nodeID", Params.QueryNodeCfg.GetNodeID()))
293 294 295 296 297 298
		return &commonpb.Status{
			ErrorCode: commonpb.ErrorCode_Success,
		}, nil
	}

	return waitFunc()
299 300
}

301
// LoadSegments load historical data into query node, historical data can be vector data or index
302
func (node *QueryNode) LoadSegments(ctx context.Context, in *queryPb.LoadSegmentsRequest) (*commonpb.Status, error) {
303 304
	code := node.stateCode.Load().(internalpb.StateCode)
	if code != internalpb.StateCode_Healthy {
X
Xiaofan 已提交
305
		err := fmt.Errorf("query node %d is not ready", Params.QueryNodeCfg.GetNodeID())
306 307 308 309
		status := &commonpb.Status{
			ErrorCode: commonpb.ErrorCode_UnexpectedError,
			Reason:    err.Error(),
		}
G
godchen 已提交
310
		return status, nil
311
	}
312 313 314 315 316 317 318 319 320 321 322 323 324 325 326
	dct := &loadSegmentsTask{
		baseTask: baseTask{
			ctx:  ctx,
			done: make(chan error),
		},
		req:  in,
		node: node,
	}

	err := node.scheduler.queue.Enqueue(dct)
	if err != nil {
		status := &commonpb.Status{
			ErrorCode: commonpb.ErrorCode_UnexpectedError,
			Reason:    err.Error(),
		}
327
		log.Error(err.Error())
G
godchen 已提交
328
		return status, nil
329
	}
330 331 332 333
	segmentIDs := make([]UniqueID, 0)
	for _, info := range in.Infos {
		segmentIDs = append(segmentIDs, info.SegmentID)
	}
X
Xiaofan 已提交
334
	log.Debug("loadSegmentsTask Enqueue done", zap.Int64("collectionID", in.CollectionID), zap.Int64s("segmentIDs", segmentIDs), zap.Int64("nodeID", Params.QueryNodeCfg.GetNodeID()))
335

336
	waitFunc := func() (*commonpb.Status, error) {
337 338
		err = dct.WaitToFinish()
		if err != nil {
339 340 341 342
			status := &commonpb.Status{
				ErrorCode: commonpb.ErrorCode_UnexpectedError,
				Reason:    err.Error(),
			}
343
			log.Error(err.Error())
G
godchen 已提交
344
			return status, nil
345
		}
X
Xiaofan 已提交
346
		log.Debug("loadSegmentsTask WaitToFinish done", zap.Int64("collectionID", in.CollectionID), zap.Int64s("segmentIDs", segmentIDs), zap.Int64("nodeID", Params.QueryNodeCfg.GetNodeID()))
347 348 349
		return &commonpb.Status{
			ErrorCode: commonpb.ErrorCode_Success,
		}, nil
350
	}
351 352

	return waitFunc()
353 354
}

G
godchen 已提交
355
// ReleaseCollection clears all data related to this collection on the querynode
356
func (node *QueryNode) ReleaseCollection(ctx context.Context, in *queryPb.ReleaseCollectionRequest) (*commonpb.Status, error) {
357 358
	code := node.stateCode.Load().(internalpb.StateCode)
	if code != internalpb.StateCode_Healthy {
X
Xiaofan 已提交
359
		err := fmt.Errorf("query node %d is not ready", Params.QueryNodeCfg.GetNodeID())
360 361 362 363
		status := &commonpb.Status{
			ErrorCode: commonpb.ErrorCode_UnexpectedError,
			Reason:    err.Error(),
		}
G
godchen 已提交
364
		return status, nil
365
	}
366 367 368 369 370 371 372 373 374 375 376 377 378 379 380
	dct := &releaseCollectionTask{
		baseTask: baseTask{
			ctx:  ctx,
			done: make(chan error),
		},
		req:  in,
		node: node,
	}

	err := node.scheduler.queue.Enqueue(dct)
	if err != nil {
		status := &commonpb.Status{
			ErrorCode: commonpb.ErrorCode_UnexpectedError,
			Reason:    err.Error(),
		}
381
		log.Error(err.Error())
G
godchen 已提交
382
		return status, nil
383
	}
X
xige-16 已提交
384
	log.Debug("releaseCollectionTask Enqueue done", zap.Int64("collectionID", in.CollectionID))
385

386
	func() {
387 388
		err = dct.WaitToFinish()
		if err != nil {
389
			log.Error(err.Error())
390
			return
391
		}
X
xige-16 已提交
392
		log.Debug("releaseCollectionTask WaitToFinish done", zap.Int64("collectionID", in.CollectionID))
393
	}()
394 395 396 397 398 399 400

	status := &commonpb.Status{
		ErrorCode: commonpb.ErrorCode_Success,
	}
	return status, nil
}

401
// ReleasePartitions clears all data related to this partition on the querynode
402
func (node *QueryNode) ReleasePartitions(ctx context.Context, in *queryPb.ReleasePartitionsRequest) (*commonpb.Status, error) {
403 404
	code := node.stateCode.Load().(internalpb.StateCode)
	if code != internalpb.StateCode_Healthy {
X
Xiaofan 已提交
405
		err := fmt.Errorf("query node %d is not ready", Params.QueryNodeCfg.GetNodeID())
406 407 408 409
		status := &commonpb.Status{
			ErrorCode: commonpb.ErrorCode_UnexpectedError,
			Reason:    err.Error(),
		}
G
godchen 已提交
410
		return status, nil
411
	}
412 413 414 415 416 417 418 419 420 421 422 423 424 425 426
	dct := &releasePartitionsTask{
		baseTask: baseTask{
			ctx:  ctx,
			done: make(chan error),
		},
		req:  in,
		node: node,
	}

	err := node.scheduler.queue.Enqueue(dct)
	if err != nil {
		status := &commonpb.Status{
			ErrorCode: commonpb.ErrorCode_UnexpectedError,
			Reason:    err.Error(),
		}
427
		log.Error(err.Error())
G
godchen 已提交
428
		return status, nil
429
	}
X
xige-16 已提交
430
	log.Debug("releasePartitionsTask Enqueue done", zap.Int64("collectionID", in.CollectionID), zap.Int64s("partitionIDs", in.PartitionIDs))
431

432
	func() {
433 434
		err = dct.WaitToFinish()
		if err != nil {
435
			log.Error(err.Error())
436
			return
437
		}
X
xige-16 已提交
438
		log.Debug("releasePartitionsTask WaitToFinish done", zap.Int64("collectionID", in.CollectionID), zap.Int64s("partitionIDs", in.PartitionIDs))
439
	}()
440 441 442 443 444 445 446

	status := &commonpb.Status{
		ErrorCode: commonpb.ErrorCode_Success,
	}
	return status, nil
}

447
// ReleaseSegments remove the specified segments from query node according segmentIDs, partitionIDs, and collectionID
448
func (node *QueryNode) ReleaseSegments(ctx context.Context, in *queryPb.ReleaseSegmentsRequest) (*commonpb.Status, error) {
449 450
	code := node.stateCode.Load().(internalpb.StateCode)
	if code != internalpb.StateCode_Healthy {
X
Xiaofan 已提交
451
		err := fmt.Errorf("query node %d is not ready", Params.QueryNodeCfg.GetNodeID())
452 453 454 455
		status := &commonpb.Status{
			ErrorCode: commonpb.ErrorCode_UnexpectedError,
			Reason:    err.Error(),
		}
G
godchen 已提交
456
		return status, nil
457
	}
458 459 460 461
	status := &commonpb.Status{
		ErrorCode: commonpb.ErrorCode_Success,
	}
	for _, id := range in.SegmentIDs {
462 463 464 465 466 467 468 469
		err := node.historical.replica.removeSegment(id)
		if err != nil {
			// not return, try to release all segments
			status.ErrorCode = commonpb.ErrorCode_UnexpectedError
			status.Reason = err.Error()
		}
		err = node.streaming.replica.removeSegment(id)
		if err != nil {
470 471
			// not return, try to release all segments
			status.ErrorCode = commonpb.ErrorCode_UnexpectedError
472
			status.Reason = err.Error()
473 474
		}
	}
X
xige-16 已提交
475 476

	log.Debug("release segments done", zap.Int64("collectionID", in.CollectionID), zap.Int64s("segmentIDs", in.SegmentIDs))
477 478 479
	return status, nil
}

480
// GetSegmentInfo returns segment information of the collection on the queryNode, and the information includes memSize, numRow, indexName, indexID ...
481
func (node *QueryNode) GetSegmentInfo(ctx context.Context, in *queryPb.GetSegmentInfoRequest) (*queryPb.GetSegmentInfoResponse, error) {
482 483
	code := node.stateCode.Load().(internalpb.StateCode)
	if code != internalpb.StateCode_Healthy {
X
Xiaofan 已提交
484
		err := fmt.Errorf("query node %d is not ready", Params.QueryNodeCfg.GetNodeID())
485 486 487 488 489 490
		res := &queryPb.GetSegmentInfoResponse{
			Status: &commonpb.Status{
				ErrorCode: commonpb.ErrorCode_UnexpectedError,
				Reason:    err.Error(),
			},
		}
G
godchen 已提交
491
		return res, nil
492
	}
493 494 495 496 497 498
	var segmentInfos []*queryPb.SegmentInfo

	segmentIDs := make(map[int64]struct{})
	for _, segmentID := range in.GetSegmentIDs() {
		segmentIDs[segmentID] = struct{}{}
	}
499

500
	// get info from historical
501
	historicalSegmentInfos, err := node.historical.replica.getSegmentInfosByColID(in.CollectionID)
502
	if err != nil {
503
		log.Debug("GetSegmentInfo: get historical segmentInfo failed", zap.Int64("collectionID", in.CollectionID), zap.Error(err))
504 505 506 507 508 509
		res := &queryPb.GetSegmentInfoResponse{
			Status: &commonpb.Status{
				ErrorCode: commonpb.ErrorCode_UnexpectedError,
				Reason:    err.Error(),
			},
		}
G
godchen 已提交
510
		return res, nil
511
	}
512
	segmentInfos = append(segmentInfos, filterSegmentInfo(historicalSegmentInfos, segmentIDs)...)
513

514
	// get info from streaming
515
	streamingSegmentInfos, err := node.streaming.replica.getSegmentInfosByColID(in.CollectionID)
516
	if err != nil {
517
		log.Debug("GetSegmentInfo: get streaming segmentInfo failed", zap.Int64("collectionID", in.CollectionID), zap.Error(err))
518 519 520 521 522 523
		res := &queryPb.GetSegmentInfoResponse{
			Status: &commonpb.Status{
				ErrorCode: commonpb.ErrorCode_UnexpectedError,
				Reason:    err.Error(),
			},
		}
G
godchen 已提交
524
		return res, nil
525
	}
526
	segmentInfos = append(segmentInfos, filterSegmentInfo(streamingSegmentInfos, segmentIDs)...)
527

528 529 530 531
	return &queryPb.GetSegmentInfoResponse{
		Status: &commonpb.Status{
			ErrorCode: commonpb.ErrorCode_Success,
		},
532
		Infos: segmentInfos,
533 534
	}, nil
}
535

536 537 538 539 540 541 542 543 544 545 546 547 548 549 550 551
// filterSegmentInfo returns segment info which segment id in segmentIDs map
func filterSegmentInfo(segmentInfos []*queryPb.SegmentInfo, segmentIDs map[int64]struct{}) []*queryPb.SegmentInfo {
	if len(segmentIDs) == 0 {
		return segmentInfos
	}
	filtered := make([]*queryPb.SegmentInfo, 0, len(segmentIDs))
	for _, info := range segmentInfos {
		_, ok := segmentIDs[info.GetSegmentID()]
		if !ok {
			continue
		}
		filtered = append(filtered, info)
	}
	return filtered
}

552
// isHealthy checks if QueryNode is healthy
553 554 555 556 557
func (node *QueryNode) isHealthy() bool {
	code := node.stateCode.Load().(internalpb.StateCode)
	return code == internalpb.StateCode_Healthy
}

558
// Search performs replica search tasks.
559
func (node *QueryNode) Search(ctx context.Context, req *queryPb.SearchRequest) (*internalpb.SearchResults, error) {
560 561 562 563
	if !node.isHealthy() {
		return &internalpb.SearchResults{
			Status: &commonpb.Status{
				ErrorCode: commonpb.ErrorCode_UnexpectedError,
X
Xiaofan 已提交
564
				Reason:    msgQueryNodeIsUnhealthy(Params.QueryNodeCfg.GetNodeID()),
565 566 567 568 569 570 571 572 573 574 575 576 577 578 579 580 581 582 583 584 585 586 587 588 589 590 591 592 593 594 595 596 597 598 599 600 601 602 603 604 605 606 607 608 609 610 611 612 613 614
			},
		}, nil
	}

	log.Debug("Received SearchRequest", zap.String("vchannel", req.GetDmlChannel()), zap.Int64s("segmentIDs", req.GetSegmentIDs()))

	if node.queryShardService == nil {
		return &internalpb.SearchResults{
			Status: &commonpb.Status{
				ErrorCode: commonpb.ErrorCode_UnexpectedError,
				Reason:    "queryShardService is nil",
			},
		}, nil
	}

	if !node.queryShardService.hasQueryShard(req.GetDmlChannel()) {
		err := node.queryShardService.addQueryShard(req.Req.CollectionID, req.GetDmlChannel(), 0) // TODO: add replicaID in request or remove it in query shard
		if err != nil {
			return &internalpb.SearchResults{
				Status: &commonpb.Status{
					ErrorCode: commonpb.ErrorCode_UnexpectedError,
					Reason:    err.Error(),
				},
			}, nil
		}
	}

	qs, err := node.queryShardService.getQueryShard(req.GetDmlChannel())
	if err != nil {
		return &internalpb.SearchResults{
			Status: &commonpb.Status{
				ErrorCode: commonpb.ErrorCode_UnexpectedError,
				Reason:    err.Error(),
			},
		}, nil
	}

	results, err := qs.search(ctx, req)
	if err != nil {
		log.Warn("QueryService failed to search", zap.String("vchannel", req.GetDmlChannel()), zap.Int64s("segmentIDs", req.GetSegmentIDs()), zap.Error(err))
		return &internalpb.SearchResults{
			Status: &commonpb.Status{
				ErrorCode: commonpb.ErrorCode_UnexpectedError,
				Reason:    err.Error(),
			},
		}, nil
	}
	log.Debug("Search Shard Done", zap.String("vchannel", req.GetDmlChannel()), zap.Int64s("segmentIDs", req.GetSegmentIDs()))

	return results, err
615 616 617
}

// Query performs replica query tasks.
618
func (node *QueryNode) Query(ctx context.Context, req *queryPb.QueryRequest) (*internalpb.RetrieveResults, error) {
619 620 621 622
	if !node.isHealthy() {
		return &internalpb.RetrieveResults{
			Status: &commonpb.Status{
				ErrorCode: commonpb.ErrorCode_UnexpectedError,
X
Xiaofan 已提交
623
				Reason:    msgQueryNodeIsUnhealthy(Params.QueryNodeCfg.GetNodeID()),
624 625 626 627 628 629 630 631 632 633 634 635 636 637 638 639 640 641 642 643 644 645 646 647 648 649 650 651 652 653 654 655 656 657 658 659 660 661 662 663 664 665 666 667 668 669 670 671 672
			},
		}, nil
	}
	log.Debug("Received QueryRequest", zap.String("vchannel", req.GetDmlChannel()), zap.Int64s("segmentIDs", req.GetSegmentIDs()))

	if node.queryShardService == nil {
		return &internalpb.RetrieveResults{
			Status: &commonpb.Status{
				ErrorCode: commonpb.ErrorCode_UnexpectedError,
				Reason:    "queryShardService is nil",
			},
		}, nil
	}

	if !node.queryShardService.hasQueryShard(req.GetDmlChannel()) {
		err := node.queryShardService.addQueryShard(req.Req.CollectionID, req.GetDmlChannel(), 0) // TODO: add replicaID in request or remove it in query shard
		if err != nil {
			return &internalpb.RetrieveResults{
				Status: &commonpb.Status{
					ErrorCode: commonpb.ErrorCode_UnexpectedError,
					Reason:    err.Error(),
				},
			}, nil
		}
	}

	qs, err := node.queryShardService.getQueryShard(req.GetDmlChannel())
	if err != nil {
		return &internalpb.RetrieveResults{
			Status: &commonpb.Status{
				ErrorCode: commonpb.ErrorCode_UnexpectedError,
				Reason:    err.Error(),
			},
		}, nil
	}

	results, err := qs.query(ctx, req)
	if err != nil {
		log.Warn("QueryService failed to query", zap.String("vchannel", req.GetDmlChannel()), zap.Int64s("segmentIDs", req.GetSegmentIDs()), zap.Error(err))
		return &internalpb.RetrieveResults{
			Status: &commonpb.Status{
				ErrorCode: commonpb.ErrorCode_UnexpectedError,
				Reason:    err.Error(),
			},
		}, nil
	}
	log.Debug("Query Shard Done", zap.String("vchannel", req.GetDmlChannel()), zap.Int64s("segmentIDs", req.GetSegmentIDs()))

	return results, nil
673 674
}

675 676 677 678 679 680 681 682 683 684 685 686 687 688 689 690 691 692 693 694 695 696 697 698 699
// SyncReplicaSegments syncs replica node & segments states
func (node *QueryNode) SyncReplicaSegments(ctx context.Context, req *querypb.SyncReplicaSegmentsRequest) (*commonpb.Status, error) {
	if !node.isHealthy() {
		return &commonpb.Status{
			ErrorCode: commonpb.ErrorCode_UnexpectedError,
			Reason:    msgQueryNodeIsUnhealthy(Params.QueryNodeCfg.GetNodeID()),
		}, nil
	}

	log.Debug("Received SyncReplicaSegments request", zap.String("vchannelName", req.GetVchannelName()))

	err := node.ShardClusterService.SyncReplicaSegments(req.GetVchannelName(), req.GetReplicaSegments())
	if err != nil {
		log.Warn("failed to sync replica semgents,", zap.String("vchannel", req.GetVchannelName()), zap.Error(err))
		return &commonpb.Status{
			ErrorCode: commonpb.ErrorCode_UnexpectedError,
			Reason:    err.Error(),
		}, nil
	}

	log.Debug("SyncReplicaSegments Done", zap.String("vchannel", req.GetVchannelName()))

	return &commonpb.Status{ErrorCode: commonpb.ErrorCode_Success}, nil
}

G
godchen 已提交
700
// GetMetrics return system infos of the query node, such as total memory, memory usage, cpu usage ...
701
// TODO(dragondriver): cache the Metrics and set a retention to the cache
702 703 704
func (node *QueryNode) GetMetrics(ctx context.Context, req *milvuspb.GetMetricsRequest) (*milvuspb.GetMetricsResponse, error) {
	if !node.isHealthy() {
		log.Warn("QueryNode.GetMetrics failed",
X
Xiaofan 已提交
705
			zap.Int64("node_id", Params.QueryNodeCfg.GetNodeID()),
706
			zap.String("req", req.Request),
X
Xiaofan 已提交
707
			zap.Error(errQueryNodeIsUnhealthy(Params.QueryNodeCfg.GetNodeID())))
708 709 710 711

		return &milvuspb.GetMetricsResponse{
			Status: &commonpb.Status{
				ErrorCode: commonpb.ErrorCode_UnexpectedError,
X
Xiaofan 已提交
712
				Reason:    msgQueryNodeIsUnhealthy(Params.QueryNodeCfg.GetNodeID()),
713 714 715 716 717 718 719 720
			},
			Response: "",
		}, nil
	}

	metricType, err := metricsinfo.ParseMetricType(req.Request)
	if err != nil {
		log.Warn("QueryNode.GetMetrics failed to parse metric type",
X
Xiaofan 已提交
721
			zap.Int64("node_id", Params.QueryNodeCfg.GetNodeID()),
722 723 724 725 726 727 728 729 730 731 732 733 734 735
			zap.String("req", req.Request),
			zap.Error(err))

		return &milvuspb.GetMetricsResponse{
			Status: &commonpb.Status{
				ErrorCode: commonpb.ErrorCode_UnexpectedError,
				Reason:    err.Error(),
			},
			Response: "",
		}, nil
	}

	if metricType == metricsinfo.SystemInfoMetrics {
		metrics, err := getSystemInfoMetrics(ctx, req, node)
X
Xiaofan 已提交
736 737
		if err != nil {
			log.Warn("QueryNode.GetMetrics failed",
X
Xiaofan 已提交
738
				zap.Int64("node_id", Params.QueryNodeCfg.GetNodeID()),
X
Xiaofan 已提交
739 740 741 742
				zap.String("req", req.Request),
				zap.String("metric_type", metricType),
				zap.Error(err))
		}
743

G
godchen 已提交
744
		return metrics, nil
745 746 747
	}

	log.Debug("QueryNode.GetMetrics failed, request metric type is not implemented yet",
X
Xiaofan 已提交
748
		zap.Int64("node_id", Params.QueryNodeCfg.GetNodeID()),
749 750 751 752 753 754 755 756 757 758 759
		zap.String("req", req.Request),
		zap.String("metric_type", metricType))

	return &milvuspb.GetMetricsResponse{
		Status: &commonpb.Status{
			ErrorCode: commonpb.ErrorCode_UnexpectedError,
			Reason:    metricsinfo.MsgUnimplementedMetric,
		},
		Response: "",
	}, nil
}