query_node.go 13.2 KB
Newer Older
1
package querynode
B
bigsheeper 已提交
2

3 4
/*

5
#cgo CFLAGS: -I${SRCDIR}/../core/output/include
6

G
GuoRentong 已提交
7
#cgo LDFLAGS: -L${SRCDIR}/../core/output/lib -lmilvus_segcore -Wl,-rpath=${SRCDIR}/../core/output/lib
8

F
FluorineDog 已提交
9 10
#include "segcore/collection_c.h"
#include "segcore/segment_c.h"
11 12

*/
B
bigsheeper 已提交
13
import "C"
14

B
bigsheeper 已提交
15
import (
16
	"context"
X
XuanYang-cn 已提交
17
	"errors"
18 19
	"fmt"
	"io"
20
	"log"
C
cai.zhang 已提交
21
	"sync/atomic"
22

G
groot 已提交
23
	"github.com/zilliztech/milvus-distributed/internal/msgstream"
X
Xiangyu Wang 已提交
24
	"github.com/zilliztech/milvus-distributed/internal/msgstream/pulsarms"
G
groot 已提交
25
	"github.com/zilliztech/milvus-distributed/internal/msgstream/rmqms"
26
	"github.com/zilliztech/milvus-distributed/internal/proto/commonpb"
C
cai.zhang 已提交
27
	"github.com/zilliztech/milvus-distributed/internal/proto/internalpb2"
28
	queryPb "github.com/zilliztech/milvus-distributed/internal/proto/querypb"
29
	"github.com/zilliztech/milvus-distributed/internal/util/typeutil"
B
bigsheeper 已提交
30 31
)

32 33 34 35 36 37 38 39
type Node interface {
	typeutil.Component

	AddQueryChannel(in *queryPb.AddQueryChannelsRequest) (*commonpb.Status, error)
	RemoveQueryChannel(in *queryPb.RemoveQueryChannelsRequest) (*commonpb.Status, error)
	WatchDmChannels(in *queryPb.WatchDmChannelsRequest) (*commonpb.Status, error)
	LoadSegments(in *queryPb.LoadSegmentRequest) (*commonpb.Status, error)
	ReleaseSegments(in *queryPb.ReleaseSegmentRequest) (*commonpb.Status, error)
B
bigsheeper 已提交
40
	GetSegmentInfo(in *queryPb.SegmentInfoRequest) (*queryPb.SegmentInfoResponse, error)
41 42
}

X
xige-16 已提交
43
type QueryService = typeutil.QueryServiceInterface
44

B
bigsheeper 已提交
45
type QueryNode struct {
46 47
	typeutil.Service

X
XuanYang-cn 已提交
48
	queryNodeLoopCtx    context.Context
49
	queryNodeLoopCancel context.CancelFunc
50

B
bigsheeper 已提交
51
	QueryNodeID uint64
C
cai.zhang 已提交
52
	stateCode   atomic.Value
B
bigsheeper 已提交
53

X
XuanYang-cn 已提交
54
	replica collectionReplica
B
bigsheeper 已提交
55

56
	// internal services
57 58 59 60 61
	dataSyncService *dataSyncService
	metaService     *metaService
	searchService   *searchService
	loadService     *loadService
	statsService    *statsService
62

63 64
	//opentracing
	closer io.Closer
65 66

	// clients
B
bigsheeper 已提交
67 68 69 70
	masterClient MasterServiceInterface
	queryClient  QueryServiceInterface
	indexClient  IndexServiceInterface
	dataClient   DataServiceInterface
G
groot 已提交
71 72

	msFactory msgstream.Factory
B
bigsheeper 已提交
73
}
74

G
groot 已提交
75
func NewQueryNode(ctx context.Context, queryNodeID uint64, factory msgstream.Factory) *QueryNode {
X
XuanYang-cn 已提交
76
	ctx1, cancel := context.WithCancel(ctx)
C
cai.zhang 已提交
77
	node := &QueryNode{
78 79 80 81 82 83 84 85
		queryNodeLoopCtx:    ctx1,
		queryNodeLoopCancel: cancel,
		QueryNodeID:         queryNodeID,

		dataSyncService: nil,
		metaService:     nil,
		searchService:   nil,
		statsService:    nil,
G
groot 已提交
86 87

		msFactory: factory,
88 89
	}

B
bigsheeper 已提交
90
	node.replica = newCollectionReplicaImpl()
C
cai.zhang 已提交
91 92 93
	node.stateCode.Store(internalpb2.StateCode_INITIALIZING)
	return node
}
G
godchen 已提交
94

G
groot 已提交
95
func NewQueryNodeWithoutID(ctx context.Context, factory msgstream.Factory) *QueryNode {
96 97 98 99 100 101 102 103 104
	ctx1, cancel := context.WithCancel(ctx)
	node := &QueryNode{
		queryNodeLoopCtx:    ctx1,
		queryNodeLoopCancel: cancel,

		dataSyncService: nil,
		metaService:     nil,
		searchService:   nil,
		statsService:    nil,
G
groot 已提交
105 106

		msFactory: factory,
107 108
	}

B
bigsheeper 已提交
109
	node.replica = newCollectionReplicaImpl()
110 111 112 113
	node.stateCode.Store(internalpb2.StateCode_INITIALIZING)
	return node
}

C
cai.zhang 已提交
114 115 116
// TODO: delete this and call node.Init()
func Init() {
	Params.Init()
B
bigsheeper 已提交
117 118
}

N
neza2017 已提交
119
func (node *QueryNode) Init() error {
X
xige-16 已提交
120
	registerReq := &queryPb.RegisterNodeRequest{
C
cai.zhang 已提交
121 122 123 124 125
		Address: &commonpb.Address{
			Ip:   Params.QueryNodeIP,
			Port: Params.QueryNodePort,
		},
	}
126 127

	response, err := node.queryClient.RegisterNode(registerReq)
C
cai.zhang 已提交
128 129 130 131 132 133 134
	if err != nil {
		panic(err)
	}
	if response.Status.ErrorCode != commonpb.ErrorCode_SUCCESS {
		panic(response.Status.Reason)
	}

135 136
	Params.QueryNodeID = response.InitParams.NodeID
	fmt.Println("QueryNodeID is", Params.QueryNodeID)
C
cai.zhang 已提交
137

138 139 140 141
	if node.masterClient == nil {
		log.Println("WARN: null master service detected")
	}

142 143 144 145 146 147 148 149
	if node.indexClient == nil {
		log.Println("WARN: null index service detected")
	}

	if node.dataClient == nil {
		log.Println("WARN: null data service detected")
	}

150 151 152 153
	return nil
}

func (node *QueryNode) Start() error {
G
groot 已提交
154 155 156 157 158 159 160 161 162 163
	var err error
	m := map[string]interface{}{
		"PulsarAddress":  Params.PulsarAddress,
		"ReceiveBufSize": 1024,
		"PulsarBufSize":  1024}
	err = node.msFactory.SetParams(m)
	if err != nil {
		return err
	}

X
XuanYang-cn 已提交
164
	// init services and manager
G
groot 已提交
165 166
	node.dataSyncService = newDataSyncService(node.queryNodeLoopCtx, node.replica, node.msFactory)
	node.searchService = newSearchService(node.queryNodeLoopCtx, node.replica, node.msFactory)
B
bigsheeper 已提交
167
	//node.metaService = newMetaService(node.queryNodeLoopCtx, node.replica)
G
groot 已提交
168

169
	node.loadService = newLoadService(node.queryNodeLoopCtx, node.masterClient, node.dataClient, node.indexClient, node.replica, node.dataSyncService.dmStream)
G
groot 已提交
170
	node.statsService = newStatsService(node.queryNodeLoopCtx, node.replica, node.loadService.segLoader.indexLoader.fieldStatsChan, node.msFactory)
B
bigsheeper 已提交
171

X
XuanYang-cn 已提交
172
	// start services
173
	go node.dataSyncService.start()
N
neza2017 已提交
174
	go node.searchService.start()
B
bigsheeper 已提交
175
	//go node.metaService.start()
176
	go node.loadService.start()
X
XuanYang-cn 已提交
177
	go node.statsService.start()
178

C
cai.zhang 已提交
179
	node.stateCode.Store(internalpb2.StateCode_HEALTHY)
180
	<-node.queryNodeLoopCtx.Done()
N
neza2017 已提交
181
	return nil
B
bigsheeper 已提交
182
}
B
bigsheeper 已提交
183

N
neza2017 已提交
184
func (node *QueryNode) Stop() error {
C
cai.zhang 已提交
185
	node.stateCode.Store(internalpb2.StateCode_ABNORMAL)
X
XuanYang-cn 已提交
186 187
	node.queryNodeLoopCancel()

B
bigsheeper 已提交
188
	// free collectionReplica
X
XuanYang-cn 已提交
189
	node.replica.freeAll()
B
bigsheeper 已提交
190 191 192

	// close services
	if node.dataSyncService != nil {
X
XuanYang-cn 已提交
193
		node.dataSyncService.close()
B
bigsheeper 已提交
194 195
	}
	if node.searchService != nil {
X
XuanYang-cn 已提交
196
		node.searchService.close()
B
bigsheeper 已提交
197
	}
198 199
	if node.loadService != nil {
		node.loadService.close()
B
bigsheeper 已提交
200
	}
B
bigsheeper 已提交
201
	if node.statsService != nil {
X
XuanYang-cn 已提交
202
		node.statsService.close()
B
bigsheeper 已提交
203
	}
204 205 206
	if node.closer != nil {
		node.closer.Close()
	}
N
neza2017 已提交
207
	return nil
X
XuanYang-cn 已提交
208 209
}

B
bigsheeper 已提交
210 211 212 213 214 215 216 217
func (node *QueryNode) SetMasterService(master MasterServiceInterface) error {
	if master == nil {
		return errors.New("null master service interface")
	}
	node.masterClient = master
	return nil
}

218 219
func (node *QueryNode) SetQueryService(query QueryServiceInterface) error {
	if query == nil {
B
bigsheeper 已提交
220
		return errors.New("null query service interface")
221 222 223 224 225
	}
	node.queryClient = query
	return nil
}

226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241
func (node *QueryNode) SetIndexService(index IndexServiceInterface) error {
	if index == nil {
		return errors.New("null index service interface")
	}
	node.indexClient = index
	return nil
}

func (node *QueryNode) SetDataService(data DataServiceInterface) error {
	if data == nil {
		return errors.New("null data service interface")
	}
	node.dataClient = data
	return nil
}

C
cai.zhang 已提交
242 243 244 245 246 247 248
func (node *QueryNode) GetComponentStates() (*internalpb2.ComponentStates, error) {
	code, ok := node.stateCode.Load().(internalpb2.StateCode)
	if !ok {
		return nil, errors.New("unexpected error in type assertion")
	}
	info := &internalpb2.ComponentInfo{
		NodeID:    Params.QueryNodeID,
X
XuanYang-cn 已提交
249
		Role:      typeutil.QueryNodeRole,
C
cai.zhang 已提交
250 251 252 253 254 255 256 257 258
		StateCode: code,
	}
	stats := &internalpb2.ComponentStates{
		State: info,
	}
	return stats, nil
}

func (node *QueryNode) GetTimeTickChannel() (string, error) {
N
neza2017 已提交
259
	return Params.QueryTimeTickChannelName, nil
C
cai.zhang 已提交
260 261 262 263 264 265
}

func (node *QueryNode) GetStatisticsChannel() (string, error) {
	return Params.StatsChannelName, nil
}

X
XuanYang-cn 已提交
266 267 268 269 270 271 272 273 274 275 276 277 278 279
func (node *QueryNode) AddQueryChannel(in *queryPb.AddQueryChannelsRequest) (*commonpb.Status, error) {
	if node.searchService == nil || node.searchService.searchMsgStream == nil {
		errMsg := "null search service or null search message stream"
		status := &commonpb.Status{
			ErrorCode: commonpb.ErrorCode_UNEXPECTED_ERROR,
			Reason:    errMsg,
		}

		return status, errors.New(errMsg)
	}

	// add request channel
	consumeChannels := []string{in.RequestChannelID}
	consumeSubName := Params.MsgChannelSubName
X
xige-16 已提交
280
	node.searchService.searchMsgStream.AsConsumer(consumeChannels, consumeSubName)
X
XuanYang-cn 已提交
281 282 283

	// add result channel
	producerChannels := []string{in.ResultChannelID}
X
xige-16 已提交
284
	node.searchService.searchResultMsgStream.AsProducer(producerChannels)
X
XuanYang-cn 已提交
285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302

	status := &commonpb.Status{
		ErrorCode: commonpb.ErrorCode_SUCCESS,
	}
	return status, nil
}

func (node *QueryNode) RemoveQueryChannel(in *queryPb.RemoveQueryChannelsRequest) (*commonpb.Status, error) {
	if node.searchService == nil || node.searchService.searchMsgStream == nil {
		errMsg := "null search service or null search result message stream"
		status := &commonpb.Status{
			ErrorCode: commonpb.ErrorCode_UNEXPECTED_ERROR,
			Reason:    errMsg,
		}

		return status, errors.New(errMsg)
	}

X
Xiangyu Wang 已提交
303
	searchStream, ok := node.searchService.searchMsgStream.(*pulsarms.PulsarMsgStream)
X
XuanYang-cn 已提交
304 305 306 307 308 309 310 311 312 313
	if !ok {
		errMsg := "type assertion failed for search message stream"
		status := &commonpb.Status{
			ErrorCode: commonpb.ErrorCode_UNEXPECTED_ERROR,
			Reason:    errMsg,
		}

		return status, errors.New(errMsg)
	}

X
Xiangyu Wang 已提交
314
	resultStream, ok := node.searchService.searchResultMsgStream.(*pulsarms.PulsarMsgStream)
X
XuanYang-cn 已提交
315 316 317 318 319 320 321 322 323 324 325 326 327 328
	if !ok {
		errMsg := "type assertion failed for search result message stream"
		status := &commonpb.Status{
			ErrorCode: commonpb.ErrorCode_UNEXPECTED_ERROR,
			Reason:    errMsg,
		}

		return status, errors.New(errMsg)
	}

	// remove request channel
	consumeChannels := []string{in.RequestChannelID}
	consumeSubName := Params.MsgChannelSubName
	// TODO: searchStream.RemovePulsarConsumers(producerChannels)
Z
zhenshan.cao 已提交
329
	searchStream.AsConsumer(consumeChannels, consumeSubName)
X
XuanYang-cn 已提交
330 331 332 333

	// remove result channel
	producerChannels := []string{in.ResultChannelID}
	// TODO: resultStream.RemovePulsarProducer(producerChannels)
Z
zhenshan.cao 已提交
334
	resultStream.AsProducer(producerChannels)
X
XuanYang-cn 已提交
335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352

	status := &commonpb.Status{
		ErrorCode: commonpb.ErrorCode_SUCCESS,
	}
	return status, nil
}

func (node *QueryNode) WatchDmChannels(in *queryPb.WatchDmChannelsRequest) (*commonpb.Status, error) {
	if node.dataSyncService == nil || node.dataSyncService.dmStream == nil {
		errMsg := "null data sync service or null data manipulation stream"
		status := &commonpb.Status{
			ErrorCode: commonpb.ErrorCode_UNEXPECTED_ERROR,
			Reason:    errMsg,
		}

		return status, errors.New(errMsg)
	}

G
groot 已提交
353 354 355 356 357
	switch t := node.dataSyncService.dmStream.(type) {
	case *pulsarms.PulsarTtMsgStream:
	case *rmqms.RmqTtMsgStream:
	default:
		_ = t
X
XuanYang-cn 已提交
358 359 360 361 362 363 364 365 366 367 368 369
		errMsg := "type assertion failed for dm message stream"
		status := &commonpb.Status{
			ErrorCode: commonpb.ErrorCode_UNEXPECTED_ERROR,
			Reason:    errMsg,
		}

		return status, errors.New(errMsg)
	}

	// add request channel
	consumeChannels := in.ChannelIDs
	consumeSubName := Params.MsgChannelSubName
G
groot 已提交
370
	node.dataSyncService.dmStream.AsConsumer(consumeChannels, consumeSubName)
X
XuanYang-cn 已提交
371 372 373 374 375 376 377 378 379

	status := &commonpb.Status{
		ErrorCode: commonpb.ErrorCode_SUCCESS,
	}
	return status, nil
}

func (node *QueryNode) LoadSegments(in *queryPb.LoadSegmentRequest) (*commonpb.Status, error) {
	// TODO: support db
Z
zhenshan.cao 已提交
380
	collectionID := in.CollectionID
C
cai.zhang 已提交
381 382
	partitionID := in.PartitionID
	segmentIDs := in.SegmentIDs
X
XuanYang-cn 已提交
383
	fieldIDs := in.FieldIDs
384
	schema := in.Schema
385

386 387 388 389 390 391 392 393 394 395 396 397 398 399 400 401 402 403 404 405 406 407
	hasCollection := node.replica.hasCollection(collectionID)
	hasPartition := node.replica.hasPartition(partitionID)
	if !hasCollection {
		err := node.replica.addCollection(collectionID, schema)
		if err != nil {
			status := &commonpb.Status{
				ErrorCode: commonpb.ErrorCode_UNEXPECTED_ERROR,
				Reason:    err.Error(),
			}
			return status, err
		}
	}
	if !hasPartition {
		err := node.replica.addPartition(collectionID, partitionID)
		if err != nil {
			status := &commonpb.Status{
				ErrorCode: commonpb.ErrorCode_UNEXPECTED_ERROR,
				Reason:    err.Error(),
			}
			return status, err
		}
	}
408
	err := node.replica.enablePartition(partitionID)
C
cai.zhang 已提交
409 410 411 412 413 414 415 416
	if err != nil {
		status := &commonpb.Status{
			ErrorCode: commonpb.ErrorCode_UNEXPECTED_ERROR,
			Reason:    err.Error(),
		}
		return status, err
	}

417
	// segments are ordered before LoadSegments calling
418 419 420
	for i, state := range in.SegmentStates {
		if state.State == commonpb.SegmentState_SegmentGrowing {
			position := state.StartPosition
421
			err := node.loadService.segLoader.seekSegment(position)
422 423 424 425 426 427
			if err != nil {
				status := &commonpb.Status{
					ErrorCode: commonpb.ErrorCode_UNEXPECTED_ERROR,
					Reason:    err.Error(),
				}
				return status, err
C
cai.zhang 已提交
428
			}
429 430
			segmentIDs = segmentIDs[:i]
			break
C
cai.zhang 已提交
431
		}
432 433
	}

434
	err = node.loadService.loadSegment(collectionID, partitionID, segmentIDs, fieldIDs)
C
cai.zhang 已提交
435 436 437 438
	if err != nil {
		status := &commonpb.Status{
			ErrorCode: commonpb.ErrorCode_UNEXPECTED_ERROR,
			Reason:    err.Error(),
Z
zhenshan.cao 已提交
439
		}
C
cai.zhang 已提交
440
		return status, err
Z
zhenshan.cao 已提交
441
	}
442 443 444
	return &commonpb.Status{
		ErrorCode: commonpb.ErrorCode_SUCCESS,
	}, nil
C
cai.zhang 已提交
445 446 447
}

func (node *QueryNode) ReleaseSegments(in *queryPb.ReleaseSegmentRequest) (*commonpb.Status, error) {
C
cai.zhang 已提交
448
	for _, id := range in.PartitionIDs {
449
		err := node.replica.enablePartition(id)
C
cai.zhang 已提交
450 451 452 453 454 455 456 457 458
		if err != nil {
			status := &commonpb.Status{
				ErrorCode: commonpb.ErrorCode_UNEXPECTED_ERROR,
				Reason:    err.Error(),
			}
			return status, err
		}
	}

C
cai.zhang 已提交
459 460
	// release all fields in the segments
	for _, id := range in.SegmentIDs {
461
		err := node.loadService.segLoader.releaseSegment(id)
X
XuanYang-cn 已提交
462 463 464 465 466 467 468 469
		if err != nil {
			status := &commonpb.Status{
				ErrorCode: commonpb.ErrorCode_UNEXPECTED_ERROR,
				Reason:    err.Error(),
			}
			return status, err
		}
	}
470 471 472
	return &commonpb.Status{
		ErrorCode: commonpb.ErrorCode_SUCCESS,
	}, nil
X
XuanYang-cn 已提交
473
}
B
bigsheeper 已提交
474 475 476 477 478 479 480 481 482 483 484 485 486 487 488 489 490 491 492 493 494 495 496 497 498 499

func (node *QueryNode) GetSegmentInfo(in *queryPb.SegmentInfoRequest) (*queryPb.SegmentInfoResponse, error) {
	infos := make([]*queryPb.SegmentInfo, 0)
	for _, id := range in.SegmentIDs {
		segment, err := node.replica.getSegmentByID(id)
		if err != nil {
			continue
		}
		info := &queryPb.SegmentInfo{
			SegmentID:    segment.ID(),
			CollectionID: segment.collectionID,
			PartitionID:  segment.partitionID,
			MemSize:      segment.getMemSize(),
			NumRows:      segment.getRowCount(),
			IndexName:    segment.getIndexName(),
			IndexID:      segment.getIndexID(),
		}
		infos = append(infos, info)
	}
	return &queryPb.SegmentInfoResponse{
		Status: &commonpb.Status{
			ErrorCode: commonpb.ErrorCode_SUCCESS,
		},
		Infos: infos,
	}, nil
}