query_node.go 15.2 KB
Newer Older
1
package querynode
B
bigsheeper 已提交
2

3 4
/*

5
#cgo CFLAGS: -I${SRCDIR}/../core/output/include
6

G
GuoRentong 已提交
7
#cgo LDFLAGS: -L${SRCDIR}/../core/output/lib -lmilvus_segcore -Wl,-rpath=${SRCDIR}/../core/output/lib
8

F
FluorineDog 已提交
9 10
#include "segcore/collection_c.h"
#include "segcore/segment_c.h"
11 12

*/
B
bigsheeper 已提交
13
import "C"
14

B
bigsheeper 已提交
15
import (
16
	"context"
17
	"fmt"
18
	"log"
C
cai.zhang 已提交
19
	"sync/atomic"
20

21
	"github.com/zilliztech/milvus-distributed/internal/errors"
G
groot 已提交
22
	"github.com/zilliztech/milvus-distributed/internal/msgstream"
X
Xiangyu Wang 已提交
23
	"github.com/zilliztech/milvus-distributed/internal/msgstream/pulsarms"
G
groot 已提交
24
	"github.com/zilliztech/milvus-distributed/internal/msgstream/rmqms"
25
	"github.com/zilliztech/milvus-distributed/internal/proto/commonpb"
C
cai.zhang 已提交
26
	"github.com/zilliztech/milvus-distributed/internal/proto/internalpb2"
27
	queryPb "github.com/zilliztech/milvus-distributed/internal/proto/querypb"
28
	"github.com/zilliztech/milvus-distributed/internal/util/typeutil"
B
bigsheeper 已提交
29 30
)

31 32 33
type Node interface {
	typeutil.Component

G
godchen 已提交
34 35 36 37 38 39 40 41
	AddQueryChannel(ctx context.Context, in *queryPb.AddQueryChannelsRequest) (*commonpb.Status, error)
	RemoveQueryChannel(ctx context.Context, in *queryPb.RemoveQueryChannelsRequest) (*commonpb.Status, error)
	WatchDmChannels(ctx context.Context, in *queryPb.WatchDmChannelsRequest) (*commonpb.Status, error)
	LoadSegments(ctx context.Context, in *queryPb.LoadSegmentRequest) (*commonpb.Status, error)
	ReleaseCollection(ctx context.Context, in *queryPb.ReleaseCollectionRequest) (*commonpb.Status, error)
	ReleasePartitions(ctx context.Context, in *queryPb.ReleasePartitionRequest) (*commonpb.Status, error)
	ReleaseSegments(ctx context.Context, in *queryPb.ReleaseSegmentRequest) (*commonpb.Status, error)
	GetSegmentInfo(ctx context.Context, in *queryPb.SegmentInfoRequest) (*queryPb.SegmentInfoResponse, error)
42 43
}

X
xige-16 已提交
44
type QueryService = typeutil.QueryServiceInterface
45

B
bigsheeper 已提交
46
type QueryNode struct {
47 48
	typeutil.Service

X
XuanYang-cn 已提交
49
	queryNodeLoopCtx    context.Context
50
	queryNodeLoopCancel context.CancelFunc
51

52
	QueryNodeID UniqueID
C
cai.zhang 已提交
53
	stateCode   atomic.Value
B
bigsheeper 已提交
54

X
XuanYang-cn 已提交
55
	replica collectionReplica
B
bigsheeper 已提交
56

57
	// internal services
58 59 60 61 62
	dataSyncService *dataSyncService
	metaService     *metaService
	searchService   *searchService
	loadService     *loadService
	statsService    *statsService
63

64
	// clients
B
bigsheeper 已提交
65 66 67 68
	masterClient MasterServiceInterface
	queryClient  QueryServiceInterface
	indexClient  IndexServiceInterface
	dataClient   DataServiceInterface
G
groot 已提交
69 70

	msFactory msgstream.Factory
B
bigsheeper 已提交
71
}
72

73
func NewQueryNode(ctx context.Context, queryNodeID UniqueID, factory msgstream.Factory) *QueryNode {
X
XuanYang-cn 已提交
74
	ctx1, cancel := context.WithCancel(ctx)
C
cai.zhang 已提交
75
	node := &QueryNode{
76 77 78 79 80 81 82 83
		queryNodeLoopCtx:    ctx1,
		queryNodeLoopCancel: cancel,
		QueryNodeID:         queryNodeID,

		dataSyncService: nil,
		metaService:     nil,
		searchService:   nil,
		statsService:    nil,
G
groot 已提交
84 85

		msFactory: factory,
86 87
	}

B
bigsheeper 已提交
88
	node.replica = newCollectionReplicaImpl()
89
	node.UpdateStateCode(internalpb2.StateCode_ABNORMAL)
C
cai.zhang 已提交
90 91
	return node
}
G
godchen 已提交
92

G
groot 已提交
93
func NewQueryNodeWithoutID(ctx context.Context, factory msgstream.Factory) *QueryNode {
94 95 96 97 98 99 100 101 102
	ctx1, cancel := context.WithCancel(ctx)
	node := &QueryNode{
		queryNodeLoopCtx:    ctx1,
		queryNodeLoopCancel: cancel,

		dataSyncService: nil,
		metaService:     nil,
		searchService:   nil,
		statsService:    nil,
G
groot 已提交
103 104

		msFactory: factory,
105 106
	}

B
bigsheeper 已提交
107
	node.replica = newCollectionReplicaImpl()
108
	node.UpdateStateCode(internalpb2.StateCode_ABNORMAL)
109

110
	return node
B
bigsheeper 已提交
111 112
}

N
neza2017 已提交
113
func (node *QueryNode) Init() error {
G
godchen 已提交
114
	ctx := context.Background()
X
xige-16 已提交
115
	registerReq := &queryPb.RegisterNodeRequest{
116 117 118 119
		Base: &commonpb.MsgBase{
			MsgType:  commonpb.MsgType_kNone,
			SourceID: Params.QueryNodeID,
		},
C
cai.zhang 已提交
120 121 122 123 124
		Address: &commonpb.Address{
			Ip:   Params.QueryNodeIP,
			Port: Params.QueryNodePort,
		},
	}
125

G
godchen 已提交
126
	resp, err := node.queryClient.RegisterNode(ctx, registerReq)
C
cai.zhang 已提交
127 128 129
	if err != nil {
		panic(err)
	}
130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146
	if resp.Status.ErrorCode != commonpb.ErrorCode_SUCCESS {
		panic(resp.Status.Reason)
	}

	for _, kv := range resp.InitParams.StartParams {
		switch kv.Key {
		case "StatsChannelName":
			Params.StatsChannelName = kv.Value
		case "TimeTickChannelName":
			Params.QueryTimeTickChannelName = kv.Value
		case "QueryChannelName":
			Params.SearchChannelNames = append(Params.SearchChannelNames, kv.Value)
		case "QueryResultChannelName":
			Params.SearchResultChannelNames = append(Params.SearchResultChannelNames, kv.Value)
		default:
			return errors.Errorf("Invalid key: %v", kv.Key)
		}
C
cai.zhang 已提交
147 148
	}

149
	fmt.Println("QueryNodeID is", Params.QueryNodeID)
C
cai.zhang 已提交
150

151 152 153 154
	if node.masterClient == nil {
		log.Println("WARN: null master service detected")
	}

155 156 157 158 159 160 161 162
	if node.indexClient == nil {
		log.Println("WARN: null index service detected")
	}

	if node.dataClient == nil {
		log.Println("WARN: null data service detected")
	}

163 164 165 166
	return nil
}

func (node *QueryNode) Start() error {
G
groot 已提交
167 168 169 170 171 172 173 174 175 176
	var err error
	m := map[string]interface{}{
		"PulsarAddress":  Params.PulsarAddress,
		"ReceiveBufSize": 1024,
		"PulsarBufSize":  1024}
	err = node.msFactory.SetParams(m)
	if err != nil {
		return err
	}

X
XuanYang-cn 已提交
177
	// init services and manager
G
groot 已提交
178 179
	node.dataSyncService = newDataSyncService(node.queryNodeLoopCtx, node.replica, node.msFactory)
	node.searchService = newSearchService(node.queryNodeLoopCtx, node.replica, node.msFactory)
B
bigsheeper 已提交
180
	//node.metaService = newMetaService(node.queryNodeLoopCtx, node.replica)
G
groot 已提交
181

182
	node.loadService = newLoadService(node.queryNodeLoopCtx, node.masterClient, node.dataClient, node.indexClient, node.replica, node.dataSyncService.dmStream)
G
groot 已提交
183
	node.statsService = newStatsService(node.queryNodeLoopCtx, node.replica, node.loadService.segLoader.indexLoader.fieldStatsChan, node.msFactory)
B
bigsheeper 已提交
184

X
XuanYang-cn 已提交
185
	// start services
186
	go node.dataSyncService.start()
N
neza2017 已提交
187
	go node.searchService.start()
B
bigsheeper 已提交
188
	//go node.metaService.start()
189
	go node.loadService.start()
X
XuanYang-cn 已提交
190
	go node.statsService.start()
191
	node.UpdateStateCode(internalpb2.StateCode_HEALTHY)
N
neza2017 已提交
192
	return nil
B
bigsheeper 已提交
193
}
B
bigsheeper 已提交
194

N
neza2017 已提交
195
func (node *QueryNode) Stop() error {
196
	node.UpdateStateCode(internalpb2.StateCode_ABNORMAL)
X
XuanYang-cn 已提交
197 198
	node.queryNodeLoopCancel()

B
bigsheeper 已提交
199
	// free collectionReplica
X
XuanYang-cn 已提交
200
	node.replica.freeAll()
B
bigsheeper 已提交
201 202 203

	// close services
	if node.dataSyncService != nil {
X
XuanYang-cn 已提交
204
		node.dataSyncService.close()
B
bigsheeper 已提交
205 206
	}
	if node.searchService != nil {
X
XuanYang-cn 已提交
207
		node.searchService.close()
B
bigsheeper 已提交
208
	}
209 210
	if node.loadService != nil {
		node.loadService.close()
B
bigsheeper 已提交
211
	}
B
bigsheeper 已提交
212
	if node.statsService != nil {
X
XuanYang-cn 已提交
213
		node.statsService.close()
B
bigsheeper 已提交
214
	}
N
neza2017 已提交
215
	return nil
X
XuanYang-cn 已提交
216 217
}

218 219 220 221
func (node *QueryNode) UpdateStateCode(code internalpb2.StateCode) {
	node.stateCode.Store(code)
}

B
bigsheeper 已提交
222 223 224 225 226 227 228 229
func (node *QueryNode) SetMasterService(master MasterServiceInterface) error {
	if master == nil {
		return errors.New("null master service interface")
	}
	node.masterClient = master
	return nil
}

230 231
func (node *QueryNode) SetQueryService(query QueryServiceInterface) error {
	if query == nil {
B
bigsheeper 已提交
232
		return errors.New("null query service interface")
233 234 235 236 237
	}
	node.queryClient = query
	return nil
}

238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253
func (node *QueryNode) SetIndexService(index IndexServiceInterface) error {
	if index == nil {
		return errors.New("null index service interface")
	}
	node.indexClient = index
	return nil
}

func (node *QueryNode) SetDataService(data DataServiceInterface) error {
	if data == nil {
		return errors.New("null data service interface")
	}
	node.dataClient = data
	return nil
}

C
cai.zhang 已提交
254
func (node *QueryNode) GetComponentStates() (*internalpb2.ComponentStates, error) {
255 256 257 258 259
	stats := &internalpb2.ComponentStates{
		Status: &commonpb.Status{
			ErrorCode: commonpb.ErrorCode_SUCCESS,
		},
	}
C
cai.zhang 已提交
260 261
	code, ok := node.stateCode.Load().(internalpb2.StateCode)
	if !ok {
262 263 264 265 266 267
		errMsg := "unexpected error in type assertion"
		stats.Status = &commonpb.Status{
			ErrorCode: commonpb.ErrorCode_UNEXPECTED_ERROR,
			Reason:    errMsg,
		}
		return stats, errors.New(errMsg)
C
cai.zhang 已提交
268 269 270
	}
	info := &internalpb2.ComponentInfo{
		NodeID:    Params.QueryNodeID,
X
XuanYang-cn 已提交
271
		Role:      typeutil.QueryNodeRole,
C
cai.zhang 已提交
272 273
		StateCode: code,
	}
274
	stats.State = info
C
cai.zhang 已提交
275 276 277 278
	return stats, nil
}

func (node *QueryNode) GetTimeTickChannel() (string, error) {
N
neza2017 已提交
279
	return Params.QueryTimeTickChannelName, nil
C
cai.zhang 已提交
280 281 282 283 284 285
}

func (node *QueryNode) GetStatisticsChannel() (string, error) {
	return Params.StatsChannelName, nil
}

X
XuanYang-cn 已提交
286 287 288 289 290 291 292 293 294 295 296 297 298 299
func (node *QueryNode) AddQueryChannel(in *queryPb.AddQueryChannelsRequest) (*commonpb.Status, error) {
	if node.searchService == nil || node.searchService.searchMsgStream == nil {
		errMsg := "null search service or null search message stream"
		status := &commonpb.Status{
			ErrorCode: commonpb.ErrorCode_UNEXPECTED_ERROR,
			Reason:    errMsg,
		}

		return status, errors.New(errMsg)
	}

	// add request channel
	consumeChannels := []string{in.RequestChannelID}
	consumeSubName := Params.MsgChannelSubName
X
xige-16 已提交
300
	node.searchService.searchMsgStream.AsConsumer(consumeChannels, consumeSubName)
X
XuanYang-cn 已提交
301 302 303

	// add result channel
	producerChannels := []string{in.ResultChannelID}
X
xige-16 已提交
304
	node.searchService.searchResultMsgStream.AsProducer(producerChannels)
X
XuanYang-cn 已提交
305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322

	status := &commonpb.Status{
		ErrorCode: commonpb.ErrorCode_SUCCESS,
	}
	return status, nil
}

func (node *QueryNode) RemoveQueryChannel(in *queryPb.RemoveQueryChannelsRequest) (*commonpb.Status, error) {
	if node.searchService == nil || node.searchService.searchMsgStream == nil {
		errMsg := "null search service or null search result message stream"
		status := &commonpb.Status{
			ErrorCode: commonpb.ErrorCode_UNEXPECTED_ERROR,
			Reason:    errMsg,
		}

		return status, errors.New(errMsg)
	}

X
Xiangyu Wang 已提交
323
	searchStream, ok := node.searchService.searchMsgStream.(*pulsarms.PulsarMsgStream)
X
XuanYang-cn 已提交
324 325 326 327 328 329 330 331 332 333
	if !ok {
		errMsg := "type assertion failed for search message stream"
		status := &commonpb.Status{
			ErrorCode: commonpb.ErrorCode_UNEXPECTED_ERROR,
			Reason:    errMsg,
		}

		return status, errors.New(errMsg)
	}

X
Xiangyu Wang 已提交
334
	resultStream, ok := node.searchService.searchResultMsgStream.(*pulsarms.PulsarMsgStream)
X
XuanYang-cn 已提交
335 336 337 338 339 340 341 342 343 344 345 346 347 348
	if !ok {
		errMsg := "type assertion failed for search result message stream"
		status := &commonpb.Status{
			ErrorCode: commonpb.ErrorCode_UNEXPECTED_ERROR,
			Reason:    errMsg,
		}

		return status, errors.New(errMsg)
	}

	// remove request channel
	consumeChannels := []string{in.RequestChannelID}
	consumeSubName := Params.MsgChannelSubName
	// TODO: searchStream.RemovePulsarConsumers(producerChannels)
Z
zhenshan.cao 已提交
349
	searchStream.AsConsumer(consumeChannels, consumeSubName)
X
XuanYang-cn 已提交
350 351 352 353

	// remove result channel
	producerChannels := []string{in.ResultChannelID}
	// TODO: resultStream.RemovePulsarProducer(producerChannels)
Z
zhenshan.cao 已提交
354
	resultStream.AsProducer(producerChannels)
X
XuanYang-cn 已提交
355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372

	status := &commonpb.Status{
		ErrorCode: commonpb.ErrorCode_SUCCESS,
	}
	return status, nil
}

func (node *QueryNode) WatchDmChannels(in *queryPb.WatchDmChannelsRequest) (*commonpb.Status, error) {
	if node.dataSyncService == nil || node.dataSyncService.dmStream == nil {
		errMsg := "null data sync service or null data manipulation stream"
		status := &commonpb.Status{
			ErrorCode: commonpb.ErrorCode_UNEXPECTED_ERROR,
			Reason:    errMsg,
		}

		return status, errors.New(errMsg)
	}

G
groot 已提交
373 374 375 376 377
	switch t := node.dataSyncService.dmStream.(type) {
	case *pulsarms.PulsarTtMsgStream:
	case *rmqms.RmqTtMsgStream:
	default:
		_ = t
X
XuanYang-cn 已提交
378 379 380 381 382 383 384 385 386 387 388 389
		errMsg := "type assertion failed for dm message stream"
		status := &commonpb.Status{
			ErrorCode: commonpb.ErrorCode_UNEXPECTED_ERROR,
			Reason:    errMsg,
		}

		return status, errors.New(errMsg)
	}

	// add request channel
	consumeChannels := in.ChannelIDs
	consumeSubName := Params.MsgChannelSubName
G
groot 已提交
390
	node.dataSyncService.dmStream.AsConsumer(consumeChannels, consumeSubName)
X
XuanYang-cn 已提交
391 392 393 394 395 396 397 398 399

	status := &commonpb.Status{
		ErrorCode: commonpb.ErrorCode_SUCCESS,
	}
	return status, nil
}

func (node *QueryNode) LoadSegments(in *queryPb.LoadSegmentRequest) (*commonpb.Status, error) {
	// TODO: support db
Z
zhenshan.cao 已提交
400
	collectionID := in.CollectionID
C
cai.zhang 已提交
401 402
	partitionID := in.PartitionID
	segmentIDs := in.SegmentIDs
X
XuanYang-cn 已提交
403
	fieldIDs := in.FieldIDs
404
	schema := in.Schema
405

X
xige-16 已提交
406 407 408 409 410
	fmt.Println("query node load segment ,info = ", in)

	status := &commonpb.Status{
		ErrorCode: commonpb.ErrorCode_SUCCESS,
	}
411 412 413 414 415
	hasCollection := node.replica.hasCollection(collectionID)
	hasPartition := node.replica.hasPartition(partitionID)
	if !hasCollection {
		err := node.replica.addCollection(collectionID, schema)
		if err != nil {
X
xige-16 已提交
416 417
			status.ErrorCode = commonpb.ErrorCode_UNEXPECTED_ERROR
			status.Reason = err.Error()
418 419 420 421 422 423
			return status, err
		}
	}
	if !hasPartition {
		err := node.replica.addPartition(collectionID, partitionID)
		if err != nil {
X
xige-16 已提交
424 425
			status.ErrorCode = commonpb.ErrorCode_UNEXPECTED_ERROR
			status.Reason = err.Error()
426 427 428
			return status, err
		}
	}
429
	err := node.replica.enablePartition(partitionID)
C
cai.zhang 已提交
430
	if err != nil {
X
xige-16 已提交
431 432 433 434 435 436 437 438 439 440 441 442 443
		status.ErrorCode = commonpb.ErrorCode_UNEXPECTED_ERROR
		status.Reason = err.Error()
		return status, err
	}

	if len(segmentIDs) == 0 {
		return status, nil
	}

	if len(in.SegmentIDs) != len(in.SegmentStates) {
		err := errors.New("len(segmentIDs) should equal to len(segmentStates)")
		status.ErrorCode = commonpb.ErrorCode_UNEXPECTED_ERROR
		status.Reason = err.Error()
C
cai.zhang 已提交
444 445 446
		return status, err
	}

447
	// segments are ordered before LoadSegments calling
X
xige-16 已提交
448
	var position *internalpb2.MsgPosition = nil
449
	for i, state := range in.SegmentStates {
X
xige-16 已提交
450 451 452 453 454
		thisPosition := state.StartPosition
		if state.State <= commonpb.SegmentState_SegmentGrowing {
			if position == nil {
				position = &internalpb2.MsgPosition{
					ChannelName: thisPosition.ChannelName,
455
				}
C
cai.zhang 已提交
456
			}
457 458
			segmentIDs = segmentIDs[:i]
			break
C
cai.zhang 已提交
459
		}
X
xige-16 已提交
460
		position = state.StartPosition
461 462
	}

X
xige-16 已提交
463
	err = node.dataSyncService.seekSegment(position)
C
cai.zhang 已提交
464 465 466 467
	if err != nil {
		status := &commonpb.Status{
			ErrorCode: commonpb.ErrorCode_UNEXPECTED_ERROR,
			Reason:    err.Error(),
Z
zhenshan.cao 已提交
468
		}
C
cai.zhang 已提交
469
		return status, err
Z
zhenshan.cao 已提交
470
	}
X
xige-16 已提交
471 472 473 474 475 476 477 478

	err = node.loadService.loadSegment(collectionID, partitionID, segmentIDs, fieldIDs)
	if err != nil {
		status.ErrorCode = commonpb.ErrorCode_UNEXPECTED_ERROR
		status.Reason = err.Error()
		return status, err
	}
	return status, nil
C
cai.zhang 已提交
479 480
}

B
bigsheeper 已提交
481 482 483 484 485 486 487 488 489 490 491 492 493 494 495 496 497 498 499
func (node *QueryNode) ReleaseCollection(in *queryPb.ReleaseCollectionRequest) (*commonpb.Status, error) {
	err := node.replica.removeCollection(in.CollectionID)
	if err != nil {
		status := &commonpb.Status{
			ErrorCode: commonpb.ErrorCode_UNEXPECTED_ERROR,
			Reason:    err.Error(),
		}
		return status, err
	}

	return &commonpb.Status{
		ErrorCode: commonpb.ErrorCode_SUCCESS,
	}, nil
}

func (node *QueryNode) ReleasePartitions(in *queryPb.ReleasePartitionRequest) (*commonpb.Status, error) {
	status := &commonpb.Status{
		ErrorCode: commonpb.ErrorCode_SUCCESS,
	}
C
cai.zhang 已提交
500
	for _, id := range in.PartitionIDs {
B
bigsheeper 已提交
501
		err := node.loadService.segLoader.replica.removePartition(id)
C
cai.zhang 已提交
502
		if err != nil {
B
bigsheeper 已提交
503 504 505
			// not return, try to release all partitions
			status.ErrorCode = commonpb.ErrorCode_UNEXPECTED_ERROR
			status.Reason = err.Error()
C
cai.zhang 已提交
506 507
		}
	}
B
bigsheeper 已提交
508 509
	return status, nil
}
C
cai.zhang 已提交
510

B
bigsheeper 已提交
511 512 513 514
func (node *QueryNode) ReleaseSegments(in *queryPb.ReleaseSegmentRequest) (*commonpb.Status, error) {
	status := &commonpb.Status{
		ErrorCode: commonpb.ErrorCode_SUCCESS,
	}
C
cai.zhang 已提交
515
	for _, id := range in.SegmentIDs {
B
bigsheeper 已提交
516 517 518 519 520
		err2 := node.loadService.segLoader.replica.removeSegment(id)
		if err2 != nil {
			// not return, try to release all segments
			status.ErrorCode = commonpb.ErrorCode_UNEXPECTED_ERROR
			status.Reason = err2.Error()
X
XuanYang-cn 已提交
521 522
		}
	}
B
bigsheeper 已提交
523
	return status, nil
X
XuanYang-cn 已提交
524
}
B
bigsheeper 已提交
525 526 527 528 529 530 531 532 533 534 535 536 537 538 539 540 541 542 543 544 545 546 547 548 549 550

func (node *QueryNode) GetSegmentInfo(in *queryPb.SegmentInfoRequest) (*queryPb.SegmentInfoResponse, error) {
	infos := make([]*queryPb.SegmentInfo, 0)
	for _, id := range in.SegmentIDs {
		segment, err := node.replica.getSegmentByID(id)
		if err != nil {
			continue
		}
		info := &queryPb.SegmentInfo{
			SegmentID:    segment.ID(),
			CollectionID: segment.collectionID,
			PartitionID:  segment.partitionID,
			MemSize:      segment.getMemSize(),
			NumRows:      segment.getRowCount(),
			IndexName:    segment.getIndexName(),
			IndexID:      segment.getIndexID(),
		}
		infos = append(infos, info)
	}
	return &queryPb.SegmentInfoResponse{
		Status: &commonpb.Status{
			ErrorCode: commonpb.ErrorCode_SUCCESS,
		},
		Infos: infos,
	}, nil
}