query_node.go 6.7 KB
Newer Older
1
package querynode
B
bigsheeper 已提交
2

3 4
/*

5
#cgo CFLAGS: -I${SRCDIR}/../core/output/include
6

G
GuoRentong 已提交
7
#cgo LDFLAGS: -L${SRCDIR}/../core/output/lib -lmilvus_segcore -Wl,-rpath=${SRCDIR}/../core/output/lib
8

F
FluorineDog 已提交
9 10
#include "segcore/collection_c.h"
#include "segcore/segment_c.h"
11 12

*/
B
bigsheeper 已提交
13
import "C"
14

B
bigsheeper 已提交
15
import (
16
	"context"
Z
zhenshan.cao 已提交
17
	"errors"
18
	"fmt"
S
sunby 已提交
19
	"math/rand"
20
	"sync"
C
cai.zhang 已提交
21
	"sync/atomic"
S
sunby 已提交
22
	"time"
23

B
bigsheeper 已提交
24 25 26
	"go.uber.org/zap"

	"github.com/zilliztech/milvus-distributed/internal/log"
G
groot 已提交
27
	"github.com/zilliztech/milvus-distributed/internal/msgstream"
28
	"github.com/zilliztech/milvus-distributed/internal/proto/commonpb"
G
godchen 已提交
29
	"github.com/zilliztech/milvus-distributed/internal/proto/internalpb"
30
	queryPb "github.com/zilliztech/milvus-distributed/internal/proto/querypb"
Z
zhenshan.cao 已提交
31
	"github.com/zilliztech/milvus-distributed/internal/types"
B
bigsheeper 已提交
32 33 34
)

type QueryNode struct {
X
XuanYang-cn 已提交
35
	queryNodeLoopCtx    context.Context
36
	queryNodeLoopCancel context.CancelFunc
37

38
	QueryNodeID UniqueID
C
cai.zhang 已提交
39
	stateCode   atomic.Value
B
bigsheeper 已提交
40

41
	replica ReplicaInterface
B
bigsheeper 已提交
42

43
	// internal services
Z
zhenshan.cao 已提交
44 45 46 47
	metaService      *metaService
	searchService    *searchService
	loadService      *loadService
	statsService     *statsService
48 49
	dsServicesMu     sync.Mutex // guards dataSyncServices
	dataSyncServices map[UniqueID]*dataSyncService
50

51
	// clients
T
ThreadDao 已提交
52 53 54 55
	masterService types.MasterService
	queryService  types.QueryService
	indexService  types.IndexService
	dataService   types.DataService
G
groot 已提交
56 57

	msFactory msgstream.Factory
58
	scheduler *taskScheduler
B
bigsheeper 已提交
59
}
60

61
func NewQueryNode(ctx context.Context, queryNodeID UniqueID, factory msgstream.Factory) *QueryNode {
S
sunby 已提交
62
	rand.Seed(time.Now().UnixNano())
X
XuanYang-cn 已提交
63
	ctx1, cancel := context.WithCancel(ctx)
C
cai.zhang 已提交
64
	node := &QueryNode{
65 66 67 68
		queryNodeLoopCtx:    ctx1,
		queryNodeLoopCancel: cancel,
		QueryNodeID:         queryNodeID,

Z
zhenshan.cao 已提交
69 70 71 72
		dataSyncServices: make(map[UniqueID]*dataSyncService),
		metaService:      nil,
		searchService:    nil,
		statsService:     nil,
G
groot 已提交
73 74

		msFactory: factory,
75 76
	}

77
	node.scheduler = newTaskScheduler(ctx1)
78
	node.replica = newCollectionReplica()
G
godchen 已提交
79
	node.UpdateStateCode(internalpb.StateCode_Abnormal)
C
cai.zhang 已提交
80 81
	return node
}
G
godchen 已提交
82

G
groot 已提交
83
func NewQueryNodeWithoutID(ctx context.Context, factory msgstream.Factory) *QueryNode {
84 85 86 87 88
	ctx1, cancel := context.WithCancel(ctx)
	node := &QueryNode{
		queryNodeLoopCtx:    ctx1,
		queryNodeLoopCancel: cancel,

Z
zhenshan.cao 已提交
89 90 91 92
		dataSyncServices: make(map[UniqueID]*dataSyncService),
		metaService:      nil,
		searchService:    nil,
		statsService:     nil,
G
groot 已提交
93 94

		msFactory: factory,
95 96
	}

97
	node.scheduler = newTaskScheduler(ctx1)
98
	node.replica = newCollectionReplica()
G
godchen 已提交
99
	node.UpdateStateCode(internalpb.StateCode_Abnormal)
100

101
	return node
B
bigsheeper 已提交
102 103
}

N
neza2017 已提交
104
func (node *QueryNode) Init() error {
G
godchen 已提交
105
	ctx := context.Background()
X
xige-16 已提交
106
	registerReq := &queryPb.RegisterNodeRequest{
107 108 109
		Base: &commonpb.MsgBase{
			SourceID: Params.QueryNodeID,
		},
C
cai.zhang 已提交
110 111 112 113 114
		Address: &commonpb.Address{
			Ip:   Params.QueryNodeIP,
			Port: Params.QueryNodePort,
		},
	}
115

T
ThreadDao 已提交
116
	resp, err := node.queryService.RegisterNode(ctx, registerReq)
C
cai.zhang 已提交
117 118 119
	if err != nil {
		panic(err)
	}
120
	if resp.Status.ErrorCode != commonpb.ErrorCode_Success {
121 122 123 124 125 126 127 128 129 130 131 132 133 134
		panic(resp.Status.Reason)
	}

	for _, kv := range resp.InitParams.StartParams {
		switch kv.Key {
		case "StatsChannelName":
			Params.StatsChannelName = kv.Value
		case "TimeTickChannelName":
			Params.QueryTimeTickChannelName = kv.Value
		case "QueryChannelName":
			Params.SearchChannelNames = append(Params.SearchChannelNames, kv.Value)
		case "QueryResultChannelName":
			Params.SearchResultChannelNames = append(Params.SearchResultChannelNames, kv.Value)
		default:
S
sunby 已提交
135
			return fmt.Errorf("Invalid key: %v", kv.Key)
136
		}
C
cai.zhang 已提交
137 138
	}

B
bigsheeper 已提交
139
	log.Debug("", zap.Int64("QueryNodeID", Params.QueryNodeID))
C
cai.zhang 已提交
140

T
ThreadDao 已提交
141
	if node.masterService == nil {
B
bigsheeper 已提交
142
		log.Error("null master service detected")
143 144
	}

T
ThreadDao 已提交
145
	if node.indexService == nil {
B
bigsheeper 已提交
146
		log.Error("null index service detected")
147 148
	}

T
ThreadDao 已提交
149
	if node.dataService == nil {
B
bigsheeper 已提交
150
		log.Error("null data service detected")
151 152
	}

153 154 155 156
	return nil
}

func (node *QueryNode) Start() error {
G
groot 已提交
157 158 159 160 161 162 163 164 165 166
	var err error
	m := map[string]interface{}{
		"PulsarAddress":  Params.PulsarAddress,
		"ReceiveBufSize": 1024,
		"PulsarBufSize":  1024}
	err = node.msFactory.SetParams(m)
	if err != nil {
		return err
	}

X
XuanYang-cn 已提交
167
	// init services and manager
G
groot 已提交
168
	node.searchService = newSearchService(node.queryNodeLoopCtx, node.replica, node.msFactory)
Z
zhenshan.cao 已提交
169
	node.loadService = newLoadService(node.queryNodeLoopCtx, node.masterService, node.dataService, node.indexService, node.replica)
G
groot 已提交
170
	node.statsService = newStatsService(node.queryNodeLoopCtx, node.replica, node.loadService.segLoader.indexLoader.fieldStatsChan, node.msFactory)
B
bigsheeper 已提交
171

172 173 174
	// start task scheduler
	go node.scheduler.Start()

X
XuanYang-cn 已提交
175
	// start services
N
neza2017 已提交
176
	go node.searchService.start()
177
	go node.loadService.start()
X
XuanYang-cn 已提交
178
	go node.statsService.start()
G
godchen 已提交
179
	node.UpdateStateCode(internalpb.StateCode_Healthy)
N
neza2017 已提交
180
	return nil
B
bigsheeper 已提交
181
}
B
bigsheeper 已提交
182

N
neza2017 已提交
183
func (node *QueryNode) Stop() error {
G
godchen 已提交
184
	node.UpdateStateCode(internalpb.StateCode_Abnormal)
X
XuanYang-cn 已提交
185 186
	node.queryNodeLoopCancel()

B
bigsheeper 已提交
187
	// free collectionReplica
X
XuanYang-cn 已提交
188
	node.replica.freeAll()
B
bigsheeper 已提交
189 190

	// close services
Z
zhenshan.cao 已提交
191 192 193 194
	for _, dsService := range node.dataSyncServices {
		if dsService != nil {
			dsService.close()
		}
B
bigsheeper 已提交
195 196
	}
	if node.searchService != nil {
X
XuanYang-cn 已提交
197
		node.searchService.close()
B
bigsheeper 已提交
198
	}
199 200
	if node.loadService != nil {
		node.loadService.close()
B
bigsheeper 已提交
201
	}
B
bigsheeper 已提交
202
	if node.statsService != nil {
X
XuanYang-cn 已提交
203
		node.statsService.close()
B
bigsheeper 已提交
204
	}
N
neza2017 已提交
205
	return nil
X
XuanYang-cn 已提交
206 207
}

G
godchen 已提交
208
func (node *QueryNode) UpdateStateCode(code internalpb.StateCode) {
209 210 211
	node.stateCode.Store(code)
}

T
ThreadDao 已提交
212
func (node *QueryNode) SetMasterService(master types.MasterService) error {
B
bigsheeper 已提交
213 214 215
	if master == nil {
		return errors.New("null master service interface")
	}
T
ThreadDao 已提交
216
	node.masterService = master
B
bigsheeper 已提交
217 218 219
	return nil
}

T
ThreadDao 已提交
220
func (node *QueryNode) SetQueryService(query types.QueryService) error {
221
	if query == nil {
B
bigsheeper 已提交
222
		return errors.New("null query service interface")
223
	}
T
ThreadDao 已提交
224
	node.queryService = query
225 226 227
	return nil
}

T
ThreadDao 已提交
228
func (node *QueryNode) SetIndexService(index types.IndexService) error {
229 230 231
	if index == nil {
		return errors.New("null index service interface")
	}
T
ThreadDao 已提交
232
	node.indexService = index
233 234 235
	return nil
}

T
ThreadDao 已提交
236
func (node *QueryNode) SetDataService(data types.DataService) error {
237 238 239
	if data == nil {
		return errors.New("null data service interface")
	}
T
ThreadDao 已提交
240
	node.dataService = data
241 242 243
	return nil
}

244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268
func (node *QueryNode) getDataSyncService(collectionID UniqueID) (*dataSyncService, error) {
	node.dsServicesMu.Lock()
	defer node.dsServicesMu.Unlock()
	ds, ok := node.dataSyncServices[collectionID]
	if !ok {
		return nil, errors.New("cannot found dataSyncService, collectionID =" + fmt.Sprintln(collectionID))
	}
	return ds, nil
}

func (node *QueryNode) addDataSyncService(collectionID UniqueID, ds *dataSyncService) error {
	node.dsServicesMu.Lock()
	defer node.dsServicesMu.Unlock()
	if _, ok := node.dataSyncServices[collectionID]; ok {
		return errors.New("dataSyncService has been existed, collectionID =" + fmt.Sprintln(collectionID))
	}
	node.dataSyncServices[collectionID] = ds
	return nil
}

func (node *QueryNode) removeDataSyncService(collectionID UniqueID) {
	node.dsServicesMu.Lock()
	defer node.dsServicesMu.Unlock()
	delete(node.dataSyncServices, collectionID)
}