Skip to content
体验新版
项目
组织
正在加载...
登录
切换导航
打开侧边栏
milvus
milvus
提交
cc69c5cd
M
milvus
项目概览
milvus
/
milvus
11 个月 前同步成功
通知
260
Star
22476
Fork
2472
代码
文件
提交
分支
Tags
贡献者
分支图
Diff
Issue
0
列表
看板
标记
里程碑
合并请求
0
DevOps
流水线
流水线任务
计划
Wiki
0
Wiki
分析
仓库
DevOps
项目成员
Pages
M
milvus
项目概览
项目概览
详情
发布
仓库
仓库
文件
提交
分支
标签
贡献者
分支图
比较
Issue
0
Issue
0
列表
看板
标记
里程碑
合并请求
0
合并请求
0
Pages
DevOps
DevOps
流水线
流水线任务
计划
分析
分析
仓库分析
DevOps
Wiki
0
Wiki
成员
成员
收起侧边栏
关闭侧边栏
动态
分支图
创建新Issue
流水线任务
提交
Issue看板
前往新版Gitcode,体验更适合开发者的 AI 搜索 >>
未验证
提交
cc69c5cd
编写于
6月 02, 2022
作者:
Y
yah01
提交者:
GitHub
6月 02, 2022
浏览文件
操作
浏览文件
下载
电子邮件补丁
差异文件
Make Cluster interface's methods called outside public (#17315)
Signed-off-by:
N
yah01
<
yang.cen@zilliz.com
>
上级
c76b4ade
变更
17
隐藏空白更改
内联
并排
Showing
17 changed file
with
144 addition
and
144 deletion
+144
-144
internal/querycoord/channel_allocator.go
internal/querycoord/channel_allocator.go
+2
-2
internal/querycoord/channel_allocator_test.go
internal/querycoord/channel_allocator_test.go
+2
-2
internal/querycoord/cluster.go
internal/querycoord/cluster.go
+63
-63
internal/querycoord/cluster_test.go
internal/querycoord/cluster_test.go
+16
-16
internal/querycoord/impl.go
internal/querycoord/impl.go
+3
-3
internal/querycoord/impl_test.go
internal/querycoord/impl_test.go
+4
-4
internal/querycoord/meta.go
internal/querycoord/meta.go
+1
-1
internal/querycoord/metrics_info.go
internal/querycoord/metrics_info.go
+1
-1
internal/querycoord/mock_cluster.go
internal/querycoord/mock_cluster.go
+2
-2
internal/querycoord/query_coord.go
internal/querycoord/query_coord.go
+8
-8
internal/querycoord/query_coord_test.go
internal/querycoord/query_coord_test.go
+4
-4
internal/querycoord/querynode_test.go
internal/querycoord/querynode_test.go
+4
-4
internal/querycoord/segment_allocator.go
internal/querycoord/segment_allocator.go
+4
-4
internal/querycoord/segment_allocator_test.go
internal/querycoord/segment_allocator_test.go
+3
-3
internal/querycoord/task.go
internal/querycoord/task.go
+25
-25
internal/querycoord/task_scheduler.go
internal/querycoord/task_scheduler.go
+1
-1
internal/querycoord/util.go
internal/querycoord/util.go
+1
-1
未找到文件。
internal/querycoord/channel_allocator.go
浏览文件 @
cc69c5cd
...
...
@@ -42,7 +42,7 @@ func shuffleChannelsToQueryNode(ctx context.Context, reqs []*querypb.WatchDmChan
var
onlineNodeIDs
[]
int64
for
{
if
replicaID
==
-
1
{
onlineNodeIDs
=
cluster
.
o
nlineNodeIDs
()
onlineNodeIDs
=
cluster
.
O
nlineNodeIDs
()
}
else
{
replica
,
err
:=
metaCache
.
getReplicaByID
(
replicaID
)
if
err
!=
nil
{
...
...
@@ -50,7 +50,7 @@ func shuffleChannelsToQueryNode(ctx context.Context, reqs []*querypb.WatchDmChan
}
replicaNodes
:=
replica
.
GetNodeIds
()
for
_
,
nodeID
:=
range
replicaNodes
{
if
ok
,
err
:=
cluster
.
i
sOnline
(
nodeID
);
err
==
nil
&&
ok
{
if
ok
,
err
:=
cluster
.
I
sOnline
(
nodeID
);
err
==
nil
&&
ok
{
onlineNodeIDs
=
append
(
onlineNodeIDs
,
nodeID
)
}
}
...
...
internal/querycoord/channel_allocator_test.go
浏览文件 @
cc69c5cd
...
...
@@ -50,7 +50,7 @@ func TestShuffleChannelsToQueryNode(t *testing.T) {
}
meta
,
err
:=
newMeta
(
baseCtx
,
kv
,
nil
,
idAllocator
)
assert
.
Nil
(
t
,
err
)
cluster
:
=
&
queryNodeCluster
{
var
cluster
Cluster
=
&
queryNodeCluster
{
ctx
:
baseCtx
,
cancel
:
cancel
,
client
:
kv
,
...
...
@@ -87,7 +87,7 @@ func TestShuffleChannelsToQueryNode(t *testing.T) {
assert
.
Nil
(
t
,
err
)
nodeSession
:=
node
.
session
nodeID
:=
node
.
queryNodeID
cluster
.
r
egisterNode
(
baseCtx
,
nodeSession
,
nodeID
,
disConnect
)
cluster
.
R
egisterNode
(
baseCtx
,
nodeSession
,
nodeID
,
disConnect
)
waitQueryNodeOnline
(
cluster
,
nodeID
)
err
=
shuffleChannelsToQueryNode
(
baseCtx
,
reqs
,
cluster
,
meta
,
false
,
nil
,
nil
,
-
1
)
...
...
internal/querycoord/cluster.go
浏览文件 @
cc69c5cd
...
...
@@ -46,40 +46,43 @@ const (
// Cluster manages all query node connections and grpc requests
type
Cluster
interface
{
// Collection/Parition
ReleaseCollection
(
ctx
context
.
Context
,
nodeID
int64
,
in
*
querypb
.
ReleaseCollectionRequest
)
error
ReleasePartitions
(
ctx
context
.
Context
,
nodeID
int64
,
in
*
querypb
.
ReleasePartitionsRequest
)
error
// Segment
LoadSegments
(
ctx
context
.
Context
,
nodeID
int64
,
in
*
querypb
.
LoadSegmentsRequest
)
error
ReleaseSegments
(
ctx
context
.
Context
,
nodeID
int64
,
in
*
querypb
.
ReleaseSegmentsRequest
)
error
GetSegmentInfo
(
ctx
context
.
Context
,
in
*
querypb
.
GetSegmentInfoRequest
)
([]
*
querypb
.
SegmentInfo
,
error
)
GetSegmentInfoByNode
(
ctx
context
.
Context
,
nodeID
int64
,
in
*
querypb
.
GetSegmentInfoRequest
)
([]
*
querypb
.
SegmentInfo
,
error
)
GetSegmentInfoByID
(
ctx
context
.
Context
,
segmentID
UniqueID
)
(
*
querypb
.
SegmentInfo
,
error
)
SyncReplicaSegments
(
ctx
context
.
Context
,
leaderID
UniqueID
,
in
*
querypb
.
SyncReplicaSegmentsRequest
)
error
// Channel
WatchDmChannels
(
ctx
context
.
Context
,
nodeID
int64
,
in
*
querypb
.
WatchDmChannelsRequest
)
error
WatchDeltaChannels
(
ctx
context
.
Context
,
nodeID
int64
,
in
*
querypb
.
WatchDeltaChannelsRequest
)
error
HasWatchedDeltaChannel
(
ctx
context
.
Context
,
nodeID
int64
,
collectionID
UniqueID
)
bool
// Node
RegisterNode
(
ctx
context
.
Context
,
session
*
sessionutil
.
Session
,
id
UniqueID
,
state
nodeState
)
error
GetNodeInfoByID
(
nodeID
int64
)
(
Node
,
error
)
RemoveNodeInfo
(
nodeID
int64
)
error
StopNode
(
nodeID
int64
)
OnlineNodeIDs
()
[]
int64
IsOnline
(
nodeID
int64
)
(
bool
,
error
)
OfflineNodeIDs
()
[]
int64
HasNode
(
nodeID
int64
)
bool
GetMetrics
(
ctx
context
.
Context
,
in
*
milvuspb
.
GetMetricsRequest
)
[]
queryNodeGetMetricsResponse
AllocateSegmentsToQueryNode
(
ctx
context
.
Context
,
reqs
[]
*
querypb
.
LoadSegmentsRequest
,
wait
bool
,
excludeNodeIDs
[]
int64
,
includeNodeIDs
[]
int64
,
replicaID
int64
)
error
AllocateChannelsToQueryNode
(
ctx
context
.
Context
,
reqs
[]
*
querypb
.
WatchDmChannelsRequest
,
wait
bool
,
excludeNodeIDs
[]
int64
,
includeNodeIDs
[]
int64
,
replicaID
int64
)
error
AssignNodesToReplicas
(
ctx
context
.
Context
,
replicas
[]
*
milvuspb
.
ReplicaInfo
,
collectionSize
uint64
)
error
GetSessionVersion
()
int64
// Inner
reloadFromKV
()
error
getComponentInfos
(
ctx
context
.
Context
)
[]
*
internalpb
.
ComponentInfo
loadSegments
(
ctx
context
.
Context
,
nodeID
int64
,
in
*
querypb
.
LoadSegmentsRequest
)
error
releaseSegments
(
ctx
context
.
Context
,
nodeID
int64
,
in
*
querypb
.
ReleaseSegmentsRequest
)
error
watchDmChannels
(
ctx
context
.
Context
,
nodeID
int64
,
in
*
querypb
.
WatchDmChannelsRequest
)
error
watchDeltaChannels
(
ctx
context
.
Context
,
nodeID
int64
,
in
*
querypb
.
WatchDeltaChannelsRequest
)
error
hasWatchedDeltaChannel
(
ctx
context
.
Context
,
nodeID
int64
,
collectionID
UniqueID
)
bool
releaseCollection
(
ctx
context
.
Context
,
nodeID
int64
,
in
*
querypb
.
ReleaseCollectionRequest
)
error
releasePartitions
(
ctx
context
.
Context
,
nodeID
int64
,
in
*
querypb
.
ReleasePartitionsRequest
)
error
getSegmentInfo
(
ctx
context
.
Context
,
in
*
querypb
.
GetSegmentInfoRequest
)
([]
*
querypb
.
SegmentInfo
,
error
)
getSegmentInfoByNode
(
ctx
context
.
Context
,
nodeID
int64
,
in
*
querypb
.
GetSegmentInfoRequest
)
([]
*
querypb
.
SegmentInfo
,
error
)
getSegmentInfoByID
(
ctx
context
.
Context
,
segmentID
UniqueID
)
(
*
querypb
.
SegmentInfo
,
error
)
syncReplicaSegments
(
ctx
context
.
Context
,
leaderID
UniqueID
,
in
*
querypb
.
SyncReplicaSegmentsRequest
)
error
registerNode
(
ctx
context
.
Context
,
session
*
sessionutil
.
Session
,
id
UniqueID
,
state
nodeState
)
error
getNodeInfoByID
(
nodeID
int64
)
(
Node
,
error
)
removeNodeInfo
(
nodeID
int64
)
error
stopNode
(
nodeID
int64
)
onlineNodeIDs
()
[]
int64
isOnline
(
nodeID
int64
)
(
bool
,
error
)
offlineNodeIDs
()
[]
int64
hasNode
(
nodeID
int64
)
bool
allocateSegmentsToQueryNode
(
ctx
context
.
Context
,
reqs
[]
*
querypb
.
LoadSegmentsRequest
,
wait
bool
,
excludeNodeIDs
[]
int64
,
includeNodeIDs
[]
int64
,
replicaID
int64
)
error
allocateChannelsToQueryNode
(
ctx
context
.
Context
,
reqs
[]
*
querypb
.
WatchDmChannelsRequest
,
wait
bool
,
excludeNodeIDs
[]
int64
,
includeNodeIDs
[]
int64
,
replicaID
int64
)
error
assignNodesToReplicas
(
ctx
context
.
Context
,
replicas
[]
*
milvuspb
.
ReplicaInfo
,
collectionSize
uint64
)
error
getSessionVersion
()
int64
getMetrics
(
ctx
context
.
Context
,
in
*
milvuspb
.
GetMetricsRequest
)
[]
queryNodeGetMetricsResponse
}
type
newQueryNodeFn
func
(
ctx
context
.
Context
,
address
string
,
id
UniqueID
,
kv
*
etcdkv
.
EtcdKV
)
(
Node
,
error
)
...
...
@@ -135,7 +138,6 @@ func newQueryNodeCluster(ctx context.Context, clusterMeta Meta, kv *etcdkv.EtcdK
// Reload trigger task, trigger task states, internal task, internal task state from etcd
// Assign the internal task to the corresponding trigger task as a child task
func
(
c
*
queryNodeCluster
)
reloadFromKV
()
error
{
toLoadMetaNodeIDs
:=
make
([]
int64
,
0
)
// get current online session
onlineNodeSessions
,
version
,
_
:=
c
.
session
.
GetSessions
(
typeutil
.
QueryNodeRole
)
onlineSessionMap
:=
make
(
map
[
int64
]
*
sessionutil
.
Session
)
...
...
@@ -145,12 +147,11 @@ func (c *queryNodeCluster) reloadFromKV() error {
}
for
nodeID
,
session
:=
range
onlineSessionMap
{
log
.
Info
(
"reloadFromKV: register a queryNode to cluster"
,
zap
.
Any
(
"nodeID"
,
nodeID
))
err
:=
c
.
r
egisterNode
(
c
.
ctx
,
session
,
nodeID
,
disConnect
)
err
:=
c
.
R
egisterNode
(
c
.
ctx
,
session
,
nodeID
,
disConnect
)
if
err
!=
nil
{
log
.
Warn
(
"QueryNode failed to register"
,
zap
.
Int64
(
"nodeID"
,
nodeID
),
zap
.
String
(
"error info"
,
err
.
Error
()))
return
err
}
toLoadMetaNodeIDs
=
append
(
toLoadMetaNodeIDs
,
nodeID
)
}
c
.
sessionVersion
=
version
...
...
@@ -173,19 +174,18 @@ func (c *queryNodeCluster) reloadFromKV() error {
log
.
Warn
(
"watchNodeLoop: unmarshal session error"
,
zap
.
Error
(
err
))
return
err
}
err
=
c
.
r
egisterNode
(
context
.
Background
(),
session
,
nodeID
,
offline
)
err
=
c
.
R
egisterNode
(
context
.
Background
(),
session
,
nodeID
,
offline
)
if
err
!=
nil
{
log
.
Warn
(
"reloadFromKV: failed to add queryNode to cluster"
,
zap
.
Int64
(
"nodeID"
,
nodeID
),
zap
.
String
(
"error info"
,
err
.
Error
()))
return
err
}
toLoadMetaNodeIDs
=
append
(
toLoadMetaNodeIDs
,
nodeID
)
}
}
return
nil
}
func
(
c
*
queryNodeCluster
)
g
etSessionVersion
()
int64
{
func
(
c
*
queryNodeCluster
)
G
etSessionVersion
()
int64
{
return
c
.
sessionVersion
}
...
...
@@ -201,7 +201,7 @@ func (c *queryNodeCluster) getComponentInfos(ctx context.Context) []*internalpb.
return
subComponentInfos
}
func
(
c
*
queryNodeCluster
)
l
oadSegments
(
ctx
context
.
Context
,
nodeID
int64
,
in
*
querypb
.
LoadSegmentsRequest
)
error
{
func
(
c
*
queryNodeCluster
)
L
oadSegments
(
ctx
context
.
Context
,
nodeID
int64
,
in
*
querypb
.
LoadSegmentsRequest
)
error
{
c
.
RLock
()
var
targetNode
Node
if
node
,
ok
:=
c
.
nodes
[
nodeID
];
ok
{
...
...
@@ -221,7 +221,7 @@ func (c *queryNodeCluster) loadSegments(ctx context.Context, nodeID int64, in *q
return
fmt
.
Errorf
(
"loadSegments: can't find QueryNode by nodeID, nodeID = %d"
,
nodeID
)
}
func
(
c
*
queryNodeCluster
)
r
eleaseSegments
(
ctx
context
.
Context
,
leaderID
int64
,
in
*
querypb
.
ReleaseSegmentsRequest
)
error
{
func
(
c
*
queryNodeCluster
)
R
eleaseSegments
(
ctx
context
.
Context
,
leaderID
int64
,
in
*
querypb
.
ReleaseSegmentsRequest
)
error
{
c
.
RLock
()
var
targetNode
Node
if
node
,
ok
:=
c
.
nodes
[
leaderID
];
ok
{
...
...
@@ -246,7 +246,7 @@ func (c *queryNodeCluster) releaseSegments(ctx context.Context, leaderID int64,
return
fmt
.
Errorf
(
"releaseSegments: can't find QueryNode by nodeID, nodeID = %d"
,
leaderID
)
}
func
(
c
*
queryNodeCluster
)
w
atchDmChannels
(
ctx
context
.
Context
,
nodeID
int64
,
in
*
querypb
.
WatchDmChannelsRequest
)
error
{
func
(
c
*
queryNodeCluster
)
W
atchDmChannels
(
ctx
context
.
Context
,
nodeID
int64
,
in
*
querypb
.
WatchDmChannelsRequest
)
error
{
c
.
RLock
()
var
targetNode
Node
if
node
,
ok
:=
c
.
nodes
[
nodeID
];
ok
{
...
...
@@ -281,7 +281,7 @@ func (c *queryNodeCluster) watchDmChannels(ctx context.Context, nodeID int64, in
return
fmt
.
Errorf
(
"watchDmChannels: can't find QueryNode by nodeID, nodeID = %d"
,
nodeID
)
}
func
(
c
*
queryNodeCluster
)
w
atchDeltaChannels
(
ctx
context
.
Context
,
nodeID
int64
,
in
*
querypb
.
WatchDeltaChannelsRequest
)
error
{
func
(
c
*
queryNodeCluster
)
W
atchDeltaChannels
(
ctx
context
.
Context
,
nodeID
int64
,
in
*
querypb
.
WatchDeltaChannelsRequest
)
error
{
c
.
RLock
()
var
targetNode
Node
if
node
,
ok
:=
c
.
nodes
[
nodeID
];
ok
{
...
...
@@ -301,14 +301,14 @@ func (c *queryNodeCluster) watchDeltaChannels(ctx context.Context, nodeID int64,
return
fmt
.
Errorf
(
"watchDeltaChannels: can't find QueryNode by nodeID, nodeID = %d"
,
nodeID
)
}
func
(
c
*
queryNodeCluster
)
h
asWatchedDeltaChannel
(
ctx
context
.
Context
,
nodeID
int64
,
collectionID
UniqueID
)
bool
{
func
(
c
*
queryNodeCluster
)
H
asWatchedDeltaChannel
(
ctx
context
.
Context
,
nodeID
int64
,
collectionID
UniqueID
)
bool
{
c
.
RLock
()
defer
c
.
RUnlock
()
return
c
.
nodes
[
nodeID
]
.
hasWatchedDeltaChannel
(
collectionID
)
}
func
(
c
*
queryNodeCluster
)
r
eleaseCollection
(
ctx
context
.
Context
,
nodeID
int64
,
in
*
querypb
.
ReleaseCollectionRequest
)
error
{
func
(
c
*
queryNodeCluster
)
R
eleaseCollection
(
ctx
context
.
Context
,
nodeID
int64
,
in
*
querypb
.
ReleaseCollectionRequest
)
error
{
c
.
RLock
()
var
targetNode
Node
if
node
,
ok
:=
c
.
nodes
[
nodeID
];
ok
{
...
...
@@ -328,7 +328,7 @@ func (c *queryNodeCluster) releaseCollection(ctx context.Context, nodeID int64,
return
fmt
.
Errorf
(
"releaseCollection: can't find QueryNode by nodeID, nodeID = %d"
,
nodeID
)
}
func
(
c
*
queryNodeCluster
)
r
eleasePartitions
(
ctx
context
.
Context
,
nodeID
int64
,
in
*
querypb
.
ReleasePartitionsRequest
)
error
{
func
(
c
*
queryNodeCluster
)
R
eleasePartitions
(
ctx
context
.
Context
,
nodeID
int64
,
in
*
querypb
.
ReleasePartitionsRequest
)
error
{
c
.
RLock
()
var
targetNode
Node
if
node
,
ok
:=
c
.
nodes
[
nodeID
];
ok
{
...
...
@@ -348,7 +348,7 @@ func (c *queryNodeCluster) releasePartitions(ctx context.Context, nodeID int64,
return
fmt
.
Errorf
(
"releasePartitions: can't find QueryNode by nodeID, nodeID = %d"
,
nodeID
)
}
func
(
c
*
queryNodeCluster
)
g
etSegmentInfoByID
(
ctx
context
.
Context
,
segmentID
UniqueID
)
(
*
querypb
.
SegmentInfo
,
error
)
{
func
(
c
*
queryNodeCluster
)
G
etSegmentInfoByID
(
ctx
context
.
Context
,
segmentID
UniqueID
)
(
*
querypb
.
SegmentInfo
,
error
)
{
segmentInfo
,
err
:=
c
.
clusterMeta
.
getSegmentInfoByID
(
segmentID
)
if
err
!=
nil
{
return
nil
,
err
...
...
@@ -381,7 +381,7 @@ func (c *queryNodeCluster) getSegmentInfoByID(ctx context.Context, segmentID Uni
return
nil
,
fmt
.
Errorf
(
"updateSegmentInfo: can't find segment %d on QueryNode %d"
,
segmentID
,
segmentInfo
.
NodeID
)
}
func
(
c
*
queryNodeCluster
)
g
etSegmentInfo
(
ctx
context
.
Context
,
in
*
querypb
.
GetSegmentInfoRequest
)
([]
*
querypb
.
SegmentInfo
,
error
)
{
func
(
c
*
queryNodeCluster
)
G
etSegmentInfo
(
ctx
context
.
Context
,
in
*
querypb
.
GetSegmentInfoRequest
)
([]
*
querypb
.
SegmentInfo
,
error
)
{
type
respTuple
struct
{
res
*
querypb
.
GetSegmentInfoResponse
err
error
...
...
@@ -447,7 +447,7 @@ func (c *queryNodeCluster) getSegmentInfo(ctx context.Context, in *querypb.GetSe
return
segmentInfos
,
nil
}
func
(
c
*
queryNodeCluster
)
g
etSegmentInfoByNode
(
ctx
context
.
Context
,
nodeID
int64
,
in
*
querypb
.
GetSegmentInfoRequest
)
([]
*
querypb
.
SegmentInfo
,
error
)
{
func
(
c
*
queryNodeCluster
)
G
etSegmentInfoByNode
(
ctx
context
.
Context
,
nodeID
int64
,
in
*
querypb
.
GetSegmentInfoRequest
)
([]
*
querypb
.
SegmentInfo
,
error
)
{
c
.
RLock
()
node
,
ok
:=
c
.
nodes
[
nodeID
]
c
.
RUnlock
()
...
...
@@ -462,7 +462,7 @@ func (c *queryNodeCluster) getSegmentInfoByNode(ctx context.Context, nodeID int6
return
res
.
GetInfos
(),
nil
}
func
(
c
*
queryNodeCluster
)
s
yncReplicaSegments
(
ctx
context
.
Context
,
leaderID
UniqueID
,
in
*
querypb
.
SyncReplicaSegmentsRequest
)
error
{
func
(
c
*
queryNodeCluster
)
S
yncReplicaSegments
(
ctx
context
.
Context
,
leaderID
UniqueID
,
in
*
querypb
.
SyncReplicaSegmentsRequest
)
error
{
c
.
RLock
()
leader
,
ok
:=
c
.
nodes
[
leaderID
]
c
.
RUnlock
()
...
...
@@ -478,7 +478,7 @@ type queryNodeGetMetricsResponse struct {
err
error
}
func
(
c
*
queryNodeCluster
)
g
etMetrics
(
ctx
context
.
Context
,
in
*
milvuspb
.
GetMetricsRequest
)
[]
queryNodeGetMetricsResponse
{
func
(
c
*
queryNodeCluster
)
G
etMetrics
(
ctx
context
.
Context
,
in
*
milvuspb
.
GetMetricsRequest
)
[]
queryNodeGetMetricsResponse
{
c
.
RLock
()
var
wg
sync
.
WaitGroup
cnt
:=
len
(
c
.
nodes
)
...
...
@@ -527,7 +527,7 @@ func (c *queryNodeCluster) setNodeState(nodeID int64, node Node, state nodeState
node
.
setState
(
state
)
}
func
(
c
*
queryNodeCluster
)
r
egisterNode
(
ctx
context
.
Context
,
session
*
sessionutil
.
Session
,
id
UniqueID
,
state
nodeState
)
error
{
func
(
c
*
queryNodeCluster
)
R
egisterNode
(
ctx
context
.
Context
,
session
*
sessionutil
.
Session
,
id
UniqueID
,
state
nodeState
)
error
{
c
.
Lock
()
defer
c
.
Unlock
()
...
...
@@ -559,7 +559,7 @@ func (c *queryNodeCluster) registerNode(ctx context.Context, session *sessionuti
return
fmt
.
Errorf
(
"registerNode: QueryNode %d alredy exists in cluster"
,
id
)
}
func
(
c
*
queryNodeCluster
)
g
etNodeInfoByID
(
nodeID
int64
)
(
Node
,
error
)
{
func
(
c
*
queryNodeCluster
)
G
etNodeInfoByID
(
nodeID
int64
)
(
Node
,
error
)
{
c
.
RLock
()
node
,
ok
:=
c
.
nodes
[
nodeID
]
c
.
RUnlock
()
...
...
@@ -574,7 +574,7 @@ func (c *queryNodeCluster) getNodeInfoByID(nodeID int64) (Node, error) {
return
nodeInfo
,
nil
}
func
(
c
*
queryNodeCluster
)
r
emoveNodeInfo
(
nodeID
int64
)
error
{
func
(
c
*
queryNodeCluster
)
R
emoveNodeInfo
(
nodeID
int64
)
error
{
c
.
Lock
()
defer
c
.
Unlock
()
...
...
@@ -591,7 +591,7 @@ func (c *queryNodeCluster) removeNodeInfo(nodeID int64) error {
return
nil
}
func
(
c
*
queryNodeCluster
)
s
topNode
(
nodeID
int64
)
{
func
(
c
*
queryNodeCluster
)
S
topNode
(
nodeID
int64
)
{
c
.
RLock
()
defer
c
.
RUnlock
()
...
...
@@ -602,7 +602,7 @@ func (c *queryNodeCluster) stopNode(nodeID int64) {
}
}
func
(
c
*
queryNodeCluster
)
o
nlineNodeIDs
()
[]
int64
{
func
(
c
*
queryNodeCluster
)
O
nlineNodeIDs
()
[]
int64
{
c
.
RLock
()
defer
c
.
RUnlock
()
...
...
@@ -616,7 +616,7 @@ func (c *queryNodeCluster) onlineNodeIDs() []int64 {
return
onlineNodeIDs
}
func
(
c
*
queryNodeCluster
)
o
fflineNodeIDs
()
[]
int64
{
func
(
c
*
queryNodeCluster
)
O
fflineNodeIDs
()
[]
int64
{
c
.
RLock
()
defer
c
.
RUnlock
()
...
...
@@ -630,7 +630,7 @@ func (c *queryNodeCluster) offlineNodeIDs() []int64 {
return
offlineNodeIDs
}
func
(
c
*
queryNodeCluster
)
h
asNode
(
nodeID
int64
)
bool
{
func
(
c
*
queryNodeCluster
)
H
asNode
(
nodeID
int64
)
bool
{
c
.
RLock
()
defer
c
.
RUnlock
()
...
...
@@ -641,7 +641,7 @@ func (c *queryNodeCluster) hasNode(nodeID int64) bool {
return
false
}
func
(
c
*
queryNodeCluster
)
i
sOnline
(
nodeID
int64
)
(
bool
,
error
)
{
func
(
c
*
queryNodeCluster
)
I
sOnline
(
nodeID
int64
)
(
bool
,
error
)
{
c
.
RLock
()
defer
c
.
RUnlock
()
...
...
@@ -667,17 +667,17 @@ func (c *queryNodeCluster) isOnline(nodeID int64) (bool, error) {
// }
//}
func
(
c
*
queryNodeCluster
)
a
llocateSegmentsToQueryNode
(
ctx
context
.
Context
,
reqs
[]
*
querypb
.
LoadSegmentsRequest
,
wait
bool
,
excludeNodeIDs
[]
int64
,
includeNodeIDs
[]
int64
,
replicaID
int64
)
error
{
func
(
c
*
queryNodeCluster
)
A
llocateSegmentsToQueryNode
(
ctx
context
.
Context
,
reqs
[]
*
querypb
.
LoadSegmentsRequest
,
wait
bool
,
excludeNodeIDs
[]
int64
,
includeNodeIDs
[]
int64
,
replicaID
int64
)
error
{
return
c
.
segmentAllocator
(
ctx
,
reqs
,
c
,
c
.
clusterMeta
,
wait
,
excludeNodeIDs
,
includeNodeIDs
,
replicaID
)
}
func
(
c
*
queryNodeCluster
)
a
llocateChannelsToQueryNode
(
ctx
context
.
Context
,
reqs
[]
*
querypb
.
WatchDmChannelsRequest
,
wait
bool
,
excludeNodeIDs
[]
int64
,
includeNodeIDs
[]
int64
,
replicaID
int64
)
error
{
func
(
c
*
queryNodeCluster
)
A
llocateChannelsToQueryNode
(
ctx
context
.
Context
,
reqs
[]
*
querypb
.
WatchDmChannelsRequest
,
wait
bool
,
excludeNodeIDs
[]
int64
,
includeNodeIDs
[]
int64
,
replicaID
int64
)
error
{
return
c
.
channelAllocator
(
ctx
,
reqs
,
c
,
c
.
clusterMeta
,
wait
,
excludeNodeIDs
,
includeNodeIDs
,
replicaID
)
}
// Return error if no enough nodes/resources to create replicas
func
(
c
*
queryNodeCluster
)
a
ssignNodesToReplicas
(
ctx
context
.
Context
,
replicas
[]
*
milvuspb
.
ReplicaInfo
,
collectionSize
uint64
)
error
{
nodeIds
:=
c
.
o
nlineNodeIDs
()
func
(
c
*
queryNodeCluster
)
A
ssignNodesToReplicas
(
ctx
context
.
Context
,
replicas
[]
*
milvuspb
.
ReplicaInfo
,
collectionSize
uint64
)
error
{
nodeIds
:=
c
.
O
nlineNodeIDs
()
if
len
(
nodeIds
)
<
len
(
replicas
)
{
return
fmt
.
Errorf
(
"no enough nodes to create replicas, node_num=%d replica_num=%d"
,
len
(
nodeIds
),
len
(
replicas
))
}
...
...
@@ -725,7 +725,7 @@ func getNodeInfos(cluster Cluster, nodeIds []UniqueID) []*queryNode {
wg
.
Add
(
1
)
go
func
(
id
UniqueID
)
{
defer
wg
.
Done
()
info
,
err
:=
cluster
.
g
etNodeInfoByID
(
id
)
info
,
err
:=
cluster
.
G
etNodeInfoByID
(
id
)
if
err
!=
nil
{
return
}
...
...
internal/querycoord/cluster_test.go
浏览文件 @
cc69c5cd
...
...
@@ -430,7 +430,7 @@ func TestGrpcRequest(t *testing.T) {
handler
,
err
:=
newChannelUnsubscribeHandler
(
baseCtx
,
kv
,
factory
)
assert
.
Nil
(
t
,
err
)
cluster
:
=
&
queryNodeCluster
{
var
cluster
Cluster
=
&
queryNodeCluster
{
ctx
:
baseCtx
,
cancel
:
cancel
,
client
:
kv
,
...
...
@@ -442,7 +442,7 @@ func TestGrpcRequest(t *testing.T) {
}
t
.
Run
(
"Test GetNodeInfoByIDWithNodeNotExist"
,
func
(
t
*
testing
.
T
)
{
_
,
err
:=
cluster
.
g
etNodeInfoByID
(
defaultQueryNodeID
)
_
,
err
:=
cluster
.
G
etNodeInfoByID
(
defaultQueryNodeID
)
assert
.
NotNil
(
t
,
err
)
})
...
...
@@ -453,7 +453,7 @@ func TestGrpcRequest(t *testing.T) {
},
CollectionID
:
defaultCollectionID
,
}
_
,
err
=
cluster
.
g
etSegmentInfoByNode
(
baseCtx
,
defaultQueryNodeID
,
getSegmentInfoReq
)
_
,
err
=
cluster
.
G
etSegmentInfoByNode
(
baseCtx
,
defaultQueryNodeID
,
getSegmentInfoReq
)
assert
.
NotNil
(
t
,
err
)
})
...
...
@@ -461,7 +461,7 @@ func TestGrpcRequest(t *testing.T) {
assert
.
Nil
(
t
,
err
)
nodeSession
:=
node
.
session
nodeID
:=
node
.
queryNodeID
cluster
.
r
egisterNode
(
baseCtx
,
nodeSession
,
nodeID
,
disConnect
)
cluster
.
R
egisterNode
(
baseCtx
,
nodeSession
,
nodeID
,
disConnect
)
waitQueryNodeOnline
(
cluster
,
nodeID
)
t
.
Run
(
"Test GetComponentInfos"
,
func
(
t
*
testing
.
T
)
{
...
...
@@ -481,7 +481,7 @@ func TestGrpcRequest(t *testing.T) {
Schema
:
genDefaultCollectionSchema
(
false
),
CollectionID
:
defaultCollectionID
,
}
err
:=
cluster
.
l
oadSegments
(
baseCtx
,
nodeID
,
loadSegmentReq
)
err
:=
cluster
.
L
oadSegments
(
baseCtx
,
nodeID
,
loadSegmentReq
)
assert
.
Nil
(
t
,
err
)
})
...
...
@@ -492,7 +492,7 @@ func TestGrpcRequest(t *testing.T) {
PartitionIDs
:
[]
UniqueID
{
defaultPartitionID
},
SegmentIDs
:
[]
UniqueID
{
defaultSegmentID
},
}
err
:=
cluster
.
r
eleaseSegments
(
baseCtx
,
nodeID
,
releaseSegmentReq
)
err
:=
cluster
.
R
eleaseSegments
(
baseCtx
,
nodeID
,
releaseSegmentReq
)
assert
.
Nil
(
t
,
err
)
})
...
...
@@ -503,7 +503,7 @@ func TestGrpcRequest(t *testing.T) {
},
CollectionID
:
defaultCollectionID
,
}
_
,
err
=
cluster
.
g
etSegmentInfo
(
baseCtx
,
getSegmentInfoReq
)
_
,
err
=
cluster
.
G
etSegmentInfo
(
baseCtx
,
getSegmentInfoReq
)
assert
.
Nil
(
t
,
err
)
})
...
...
@@ -514,7 +514,7 @@ func TestGrpcRequest(t *testing.T) {
},
CollectionID
:
defaultCollectionID
,
}
_
,
err
=
cluster
.
g
etSegmentInfoByNode
(
baseCtx
,
nodeID
,
getSegmentInfoReq
)
_
,
err
=
cluster
.
G
etSegmentInfoByNode
(
baseCtx
,
nodeID
,
getSegmentInfoReq
)
assert
.
Nil
(
t
,
err
)
})
...
...
@@ -527,7 +527,7 @@ func TestGrpcRequest(t *testing.T) {
},
CollectionID
:
defaultCollectionID
,
}
_
,
err
=
cluster
.
g
etSegmentInfo
(
baseCtx
,
getSegmentInfoReq
)
_
,
err
=
cluster
.
G
etSegmentInfo
(
baseCtx
,
getSegmentInfoReq
)
assert
.
NotNil
(
t
,
err
)
})
...
...
@@ -538,14 +538,14 @@ func TestGrpcRequest(t *testing.T) {
},
CollectionID
:
defaultCollectionID
,
}
_
,
err
=
cluster
.
g
etSegmentInfoByNode
(
baseCtx
,
nodeID
,
getSegmentInfoReq
)
_
,
err
=
cluster
.
G
etSegmentInfoByNode
(
baseCtx
,
nodeID
,
getSegmentInfoReq
)
assert
.
NotNil
(
t
,
err
)
})
node
.
getSegmentInfos
=
returnSuccessGetSegmentInfoResult
t
.
Run
(
"Test GetNodeInfoByID"
,
func
(
t
*
testing
.
T
)
{
res
,
err
:=
cluster
.
g
etNodeInfoByID
(
nodeID
)
res
,
err
:=
cluster
.
G
etNodeInfoByID
(
nodeID
)
assert
.
Nil
(
t
,
err
)
assert
.
NotNil
(
t
,
res
)
})
...
...
@@ -553,13 +553,13 @@ func TestGrpcRequest(t *testing.T) {
node
.
getMetrics
=
returnFailedGetMetricsResult
t
.
Run
(
"Test GetNodeInfoByIDFailed"
,
func
(
t
*
testing
.
T
)
{
_
,
err
:=
cluster
.
g
etNodeInfoByID
(
nodeID
)
_
,
err
:=
cluster
.
G
etNodeInfoByID
(
nodeID
)
assert
.
NotNil
(
t
,
err
)
})
node
.
getMetrics
=
returnSuccessGetMetricsResult
cluster
.
s
topNode
(
nodeID
)
cluster
.
S
topNode
(
nodeID
)
t
.
Run
(
"Test GetSegmentInfoByNodeAfterNodeStop"
,
func
(
t
*
testing
.
T
)
{
getSegmentInfoReq
:=
&
querypb
.
GetSegmentInfoRequest
{
Base
:
&
commonpb
.
MsgBase
{
...
...
@@ -567,7 +567,7 @@ func TestGrpcRequest(t *testing.T) {
},
CollectionID
:
defaultCollectionID
,
}
_
,
err
=
cluster
.
g
etSegmentInfoByNode
(
baseCtx
,
nodeID
,
getSegmentInfoReq
)
_
,
err
=
cluster
.
G
etSegmentInfoByNode
(
baseCtx
,
nodeID
,
getSegmentInfoReq
)
assert
.
NotNil
(
t
,
err
)
})
...
...
@@ -608,7 +608,7 @@ func TestSetNodeState(t *testing.T) {
node
,
err
:=
startQueryNodeServer
(
baseCtx
)
assert
.
Nil
(
t
,
err
)
err
=
cluster
.
r
egisterNode
(
baseCtx
,
node
.
session
,
node
.
queryNodeID
,
disConnect
)
err
=
cluster
.
R
egisterNode
(
baseCtx
,
node
.
session
,
node
.
queryNodeID
,
disConnect
)
assert
.
Nil
(
t
,
err
)
waitQueryNodeOnline
(
cluster
,
node
.
queryNodeID
)
...
...
@@ -627,7 +627,7 @@ func TestSetNodeState(t *testing.T) {
err
=
meta
.
setDeltaChannel
(
defaultCollectionID
,
[]
*
datapb
.
VchannelInfo
{
deltaChannelInfo
})
assert
.
Nil
(
t
,
err
)
nodeInfo
,
err
:=
cluster
.
g
etNodeInfoByID
(
node
.
queryNodeID
)
nodeInfo
,
err
:=
cluster
.
G
etNodeInfoByID
(
node
.
queryNodeID
)
assert
.
Nil
(
t
,
err
)
cluster
.
setNodeState
(
node
.
queryNodeID
,
nodeInfo
,
offline
)
assert
.
Equal
(
t
,
1
,
len
(
handler
.
downNodeChan
))
...
...
internal/querycoord/impl.go
浏览文件 @
cc69c5cd
...
...
@@ -850,7 +850,7 @@ func (qc *QueryCoord) GetSegmentInfo(ctx context.Context, req *querypb.GetSegmen
//TODO::get segment infos from MetaReplica
//segmentIDs := req.SegmentIDs
//segmentInfos, err := qs.MetaReplica.getSegmentInfos(segmentIDs)
segmentInfos
,
err
:=
qc
.
cluster
.
g
etSegmentInfo
(
ctx
,
req
)
segmentInfos
,
err
:=
qc
.
cluster
.
G
etSegmentInfo
(
ctx
,
req
)
if
err
!=
nil
{
status
.
ErrorCode
=
commonpb
.
ErrorCode_UnexpectedError
status
.
Reason
=
err
.
Error
()
...
...
@@ -1134,7 +1134,7 @@ func (qc *QueryCoord) GetShardLeaders(ctx context.Context, req *querypb.GetShard
}
}
isShardAvailable
,
err
:=
qc
.
cluster
.
i
sOnline
(
shard
.
LeaderID
)
isShardAvailable
,
err
:=
qc
.
cluster
.
I
sOnline
(
shard
.
LeaderID
)
if
err
!=
nil
||
!
isShardAvailable
{
log
.
Warn
(
"shard leader is unavailable"
,
zap
.
Int64
(
"collectionID"
,
replica
.
CollectionID
),
...
...
@@ -1148,7 +1148,7 @@ func (qc *QueryCoord) GetShardLeaders(ctx context.Context, req *querypb.GetShard
nodes
:=
shardNodes
[
shard
.
DmChannelName
]
for
_
,
nodeID
:=
range
replica
.
NodeIds
{
if
_
,
ok
:=
nodes
[
nodeID
];
ok
{
if
ok
,
err
:=
qc
.
cluster
.
i
sOnline
(
nodeID
);
err
!=
nil
||
!
ok
{
if
ok
,
err
:=
qc
.
cluster
.
I
sOnline
(
nodeID
);
err
!=
nil
||
!
ok
{
isShardAvailable
=
false
break
}
...
...
internal/querycoord/impl_test.go
浏览文件 @
cc69c5cd
...
...
@@ -441,7 +441,7 @@ func TestGrpcTaskEnqueueFail(t *testing.T) {
queryCoord
.
scheduler
.
taskIDAllocator
=
failedAllocator
waitQueryNodeOnline
(
queryCoord
.
cluster
,
queryNode
.
queryNodeID
)
assert
.
NotEmpty
(
t
,
queryCoord
.
cluster
.
o
nlineNodeIDs
())
assert
.
NotEmpty
(
t
,
queryCoord
.
cluster
.
O
nlineNodeIDs
())
t
.
Run
(
"Test LoadPartition"
,
func
(
t
*
testing
.
T
)
{
status
,
err
:=
queryCoord
.
LoadPartitions
(
ctx
,
&
querypb
.
LoadPartitionsRequest
{
...
...
@@ -579,7 +579,7 @@ func TestLoadBalanceTask(t *testing.T) {
}
}
nodeID
:=
queryNode1
.
queryNodeID
queryCoord
.
cluster
.
s
topNode
(
nodeID
)
queryCoord
.
cluster
.
S
topNode
(
nodeID
)
loadBalanceSegment
:=
&
querypb
.
LoadBalanceRequest
{
Base
:
&
commonpb
.
MsgBase
{
MsgType
:
commonpb
.
MsgType_LoadBalanceSegments
,
...
...
@@ -914,7 +914,7 @@ func TestLoadCollectionWithReplicas(t *testing.T) {
}
// load collection with 3 replicas, but no enough querynodes
assert
.
Equal
(
t
,
2
,
len
(
queryCoord
.
cluster
.
o
nlineNodeIDs
()))
assert
.
Equal
(
t
,
2
,
len
(
queryCoord
.
cluster
.
O
nlineNodeIDs
()))
status
,
err
:=
queryCoord
.
LoadCollection
(
ctx
,
loadCollectionReq
)
assert
.
NoError
(
t
,
err
)
assert
.
Equal
(
t
,
commonpb
.
ErrorCode_UnexpectedError
,
status
.
ErrorCode
)
...
...
@@ -988,7 +988,7 @@ func TestLoadPartitionsWithReplicas(t *testing.T) {
}
// load collection with 3 replicas, but no enough querynodes
assert
.
Equal
(
t
,
2
,
len
(
queryCoord
.
cluster
.
o
nlineNodeIDs
()))
assert
.
Equal
(
t
,
2
,
len
(
queryCoord
.
cluster
.
O
nlineNodeIDs
()))
status
,
err
:=
queryCoord
.
LoadPartitions
(
ctx
,
loadPartitionsReq
)
assert
.
NoError
(
t
,
err
)
assert
.
Equal
(
t
,
commonpb
.
ErrorCode_UnexpectedError
,
status
.
ErrorCode
)
...
...
internal/querycoord/meta.go
浏览文件 @
cc69c5cd
...
...
@@ -306,7 +306,7 @@ func reloadShardLeaderAddress(meta Meta, cluster Cluster) error {
isModified
:=
false
for
_
,
shard
:=
range
replica
.
ShardReplicas
{
if
len
(
shard
.
LeaderAddr
)
==
0
{
nodeInfo
,
err
:=
cluster
.
g
etNodeInfoByID
(
shard
.
LeaderID
)
nodeInfo
,
err
:=
cluster
.
G
etNodeInfoByID
(
shard
.
LeaderID
)
if
err
!=
nil
{
log
.
Warn
(
"failed to retrieve the node's address"
,
zap
.
Int64
(
"nodeID"
,
shard
.
LeaderID
),
...
...
internal/querycoord/metrics_info.go
浏览文件 @
cc69c5cd
...
...
@@ -66,7 +66,7 @@ func getSystemInfoMetrics(
}
metricsinfo
.
FillDeployMetricsWithEnv
(
&
clusterTopology
.
Self
.
SystemInfo
)
nodesMetrics
:=
qc
.
cluster
.
g
etMetrics
(
ctx
,
req
)
nodesMetrics
:=
qc
.
cluster
.
G
etMetrics
(
ctx
,
req
)
for
_
,
nodeMetrics
:=
range
nodesMetrics
{
if
nodeMetrics
.
err
!=
nil
{
log
.
Warn
(
"invalid metrics of query node was found"
,
...
...
internal/querycoord/mock_cluster.go
浏览文件 @
cc69c5cd
...
...
@@ -28,10 +28,10 @@ func NewMockCluster(cluster Cluster) *MockCluster {
}
}
func
(
mock
*
MockCluster
)
i
sOnline
(
nodeID
int64
)
(
bool
,
error
)
{
func
(
mock
*
MockCluster
)
I
sOnline
(
nodeID
int64
)
(
bool
,
error
)
{
if
mock
.
isOnlineHandler
!=
nil
{
return
mock
.
isOnlineHandler
(
nodeID
)
}
return
mock
.
Cluster
.
i
sOnline
(
nodeID
)
return
mock
.
Cluster
.
I
sOnline
(
nodeID
)
}
internal/querycoord/query_coord.go
浏览文件 @
cc69c5cd
...
...
@@ -346,7 +346,7 @@ func (qc *QueryCoord) watchNodeLoop() {
}
}
offlineNodeIDs
:=
qc
.
cluster
.
o
fflineNodeIDs
()
offlineNodeIDs
:=
qc
.
cluster
.
O
fflineNodeIDs
()
if
len
(
offlineNodeIDs
)
!=
0
{
loadBalanceSegment
:=
&
querypb
.
LoadBalanceRequest
{
Base
:
&
commonpb
.
MsgBase
{
...
...
@@ -371,7 +371,7 @@ func (qc *QueryCoord) watchNodeLoop() {
}
// TODO silverxia add Rewatch logic
qc
.
eventChan
=
qc
.
session
.
WatchServices
(
typeutil
.
QueryNodeRole
,
qc
.
cluster
.
g
etSessionVersion
()
+
1
,
nil
)
qc
.
eventChan
=
qc
.
session
.
WatchServices
(
typeutil
.
QueryNodeRole
,
qc
.
cluster
.
G
etSessionVersion
()
+
1
,
nil
)
qc
.
handleNodeEvent
(
ctx
)
}
...
...
@@ -388,7 +388,7 @@ func (qc *QueryCoord) allocateNode(nodeID int64) error {
return
nil
}
func
(
qc
*
QueryCoord
)
getUnallocatedNodes
()
[]
int64
{
onlines
:=
qc
.
cluster
.
o
nlineNodeIDs
()
onlines
:=
qc
.
cluster
.
O
nlineNodeIDs
()
var
ret
[]
int64
for
_
,
n
:=
range
onlines
{
replica
,
err
:=
qc
.
meta
.
getReplicasByNodeID
(
n
)
...
...
@@ -429,7 +429,7 @@ func (qc *QueryCoord) handleNodeEvent(ctx context.Context) {
case
sessionutil
.
SessionAddEvent
:
serverID
:=
event
.
Session
.
ServerID
log
.
Info
(
"start add a QueryNode to cluster"
,
zap
.
Any
(
"nodeID"
,
serverID
))
err
:=
qc
.
cluster
.
r
egisterNode
(
ctx
,
event
.
Session
,
serverID
,
disConnect
)
err
:=
qc
.
cluster
.
R
egisterNode
(
ctx
,
event
.
Session
,
serverID
,
disConnect
)
if
err
!=
nil
{
log
.
Error
(
"QueryCoord failed to register a QueryNode"
,
zap
.
Int64
(
"nodeID"
,
serverID
),
zap
.
String
(
"error info"
,
err
.
Error
()))
continue
...
...
@@ -444,13 +444,13 @@ func (qc *QueryCoord) handleNodeEvent(ctx context.Context) {
case
sessionutil
.
SessionDelEvent
:
serverID
:=
event
.
Session
.
ServerID
log
.
Info
(
"get a del event after QueryNode down"
,
zap
.
Int64
(
"nodeID"
,
serverID
))
nodeExist
:=
qc
.
cluster
.
h
asNode
(
serverID
)
nodeExist
:=
qc
.
cluster
.
H
asNode
(
serverID
)
if
!
nodeExist
{
log
.
Error
(
"QueryNode not exist"
,
zap
.
Int64
(
"nodeID"
,
serverID
))
continue
}
qc
.
cluster
.
s
topNode
(
serverID
)
qc
.
cluster
.
S
topNode
(
serverID
)
offlineNodeCh
<-
serverID
}
}
...
...
@@ -599,7 +599,7 @@ func (qc *QueryCoord) loadBalanceSegmentLoop() {
nodeID2SegmentInfos
:=
make
(
map
[
int64
]
map
[
UniqueID
]
*
querypb
.
SegmentInfo
)
for
_
,
nodeID
:=
range
onlineNodeIDs
{
if
_
,
ok
:=
nodeID2MemUsage
[
nodeID
];
!
ok
{
nodeInfo
,
err
:=
qc
.
cluster
.
g
etNodeInfoByID
(
nodeID
)
nodeInfo
,
err
:=
qc
.
cluster
.
G
etNodeInfoByID
(
nodeID
)
if
err
!=
nil
{
log
.
Warn
(
"loadBalanceSegmentLoop: get node info from QueryNode failed"
,
zap
.
Int64
(
"nodeID"
,
nodeID
),
zap
.
Int64
(
"collection"
,
replica
.
CollectionID
),
zap
.
Int64
(
"replica"
,
replica
.
ReplicaID
),
...
...
@@ -615,7 +615,7 @@ func (qc *QueryCoord) loadBalanceSegmentLoop() {
leastSegmentInfos
:=
make
(
map
[
UniqueID
]
*
querypb
.
SegmentInfo
)
segmentInfos
:=
qc
.
meta
.
getSegmentInfosByNodeAndCollection
(
nodeID
,
replica
.
GetCollectionID
())
for
_
,
segmentInfo
:=
range
segmentInfos
{
leastInfo
,
err
:=
qc
.
cluster
.
g
etSegmentInfoByID
(
ctx
,
segmentInfo
.
SegmentID
)
leastInfo
,
err
:=
qc
.
cluster
.
G
etSegmentInfoByID
(
ctx
,
segmentInfo
.
SegmentID
)
if
err
!=
nil
{
log
.
Warn
(
"loadBalanceSegmentLoop: get segment info from QueryNode failed"
,
zap
.
Int64
(
"nodeID"
,
nodeID
),
zap
.
Int64
(
"collection"
,
replica
.
CollectionID
),
zap
.
Int64
(
"replica"
,
replica
.
ReplicaID
),
...
...
internal/querycoord/query_coord_test.go
浏览文件 @
cc69c5cd
...
...
@@ -187,7 +187,7 @@ func TestWatchNodeLoop(t *testing.T) {
assert
.
Nil
(
t
,
err
)
for
{
offlineNodeIDs
:=
queryCoord
.
cluster
.
o
fflineNodeIDs
()
offlineNodeIDs
:=
queryCoord
.
cluster
.
O
fflineNodeIDs
()
if
len
(
offlineNodeIDs
)
!=
0
{
log
.
Warn
(
"find offline Nodes"
,
zap
.
Int64s
(
"offlineNodeIDs"
,
offlineNodeIDs
))
break
...
...
@@ -233,7 +233,7 @@ func TestWatchNodeLoop(t *testing.T) {
nodeID
:=
queryNode1
.
queryNodeID
waitQueryNodeOnline
(
queryCoord
.
cluster
,
nodeID
)
onlineNodeIDs
:=
queryCoord
.
cluster
.
o
nlineNodeIDs
()
onlineNodeIDs
:=
queryCoord
.
cluster
.
O
nlineNodeIDs
()
assert
.
Equal
(
t
,
1
,
len
(
onlineNodeIDs
))
queryNode1
.
stop
()
...
...
@@ -598,7 +598,7 @@ func TestLoadBalanceSegmentLoop(t *testing.T) {
err
=
queryCoord
.
scheduler
.
Enqueue
(
loadPartitionTask
)
assert
.
Nil
(
t
,
err
)
waitTaskFinalState
(
loadPartitionTask
,
taskExpired
)
nodeInfo
,
err
:=
queryCoord
.
cluster
.
g
etNodeInfoByID
(
queryNode1
.
queryNodeID
)
nodeInfo
,
err
:=
queryCoord
.
cluster
.
G
etNodeInfoByID
(
queryNode1
.
queryNodeID
)
assert
.
Nil
(
t
,
err
)
if
nodeInfo
.
(
*
queryNode
)
.
memUsageRate
>=
Params
.
QueryCoordCfg
.
OverloadedMemoryThresholdPercentage
{
break
...
...
@@ -612,7 +612,7 @@ func TestLoadBalanceSegmentLoop(t *testing.T) {
// if sealed has been balance to query node2, than balance work
for
{
segmentInfos
,
err
:=
queryCoord
.
cluster
.
g
etSegmentInfoByNode
(
baseCtx
,
queryNode2
.
queryNodeID
,
&
querypb
.
GetSegmentInfoRequest
{
segmentInfos
,
err
:=
queryCoord
.
cluster
.
G
etSegmentInfoByNode
(
baseCtx
,
queryNode2
.
queryNodeID
,
&
querypb
.
GetSegmentInfoRequest
{
Base
:
&
commonpb
.
MsgBase
{
MsgType
:
commonpb
.
MsgType_LoadBalanceSegments
,
},
...
...
internal/querycoord/querynode_test.go
浏览文件 @
cc69c5cd
...
...
@@ -69,7 +69,7 @@ func waitAllQueryNodeOffline(cluster Cluster, nodeIDs []int64) bool {
for
{
allOffline
:=
true
for
_
,
nodeID
:=
range
nodeIDs
{
isOnline
,
err
:=
cluster
.
i
sOnline
(
nodeID
)
isOnline
,
err
:=
cluster
.
I
sOnline
(
nodeID
)
if
err
==
nil
&&
isOnline
{
allOffline
=
false
break
...
...
@@ -85,7 +85,7 @@ func waitAllQueryNodeOffline(cluster Cluster, nodeIDs []int64) bool {
func
waitQueryNodeOnline
(
cluster
Cluster
,
nodeID
int64
)
{
for
{
online
,
err
:=
cluster
.
i
sOnline
(
nodeID
)
online
,
err
:=
cluster
.
I
sOnline
(
nodeID
)
if
err
!=
nil
{
continue
}
...
...
@@ -130,7 +130,7 @@ func TestQueryNode_MultiNode_stop(t *testing.T) {
})
assert
.
Nil
(
t
,
err
)
time
.
Sleep
(
100
*
time
.
Millisecond
)
onlineNodeIDs
:=
queryCoord
.
cluster
.
o
nlineNodeIDs
()
onlineNodeIDs
:=
queryCoord
.
cluster
.
O
nlineNodeIDs
()
assert
.
NotEqual
(
t
,
0
,
len
(
onlineNodeIDs
))
queryNode2
.
stop
()
err
=
removeNodeSession
(
queryNode2
.
queryNodeID
)
...
...
@@ -176,7 +176,7 @@ func TestQueryNode_MultiNode_reStart(t *testing.T) {
CollectionID
:
defaultCollectionID
,
})
assert
.
Nil
(
t
,
err
)
onlineNodeIDs
:=
queryCoord
.
cluster
.
o
nlineNodeIDs
()
onlineNodeIDs
:=
queryCoord
.
cluster
.
O
nlineNodeIDs
()
assert
.
NotEqual
(
t
,
0
,
len
(
onlineNodeIDs
))
queryNode3
.
stop
()
err
=
removeNodeSession
(
queryNode3
.
queryNodeID
)
...
...
internal/querycoord/segment_allocator.go
浏览文件 @
cc69c5cd
...
...
@@ -45,7 +45,7 @@ func shuffleSegmentsToQueryNode(ctx context.Context, reqs []*querypb.LoadSegment
}
for
{
onlineNodeIDs
:=
cluster
.
o
nlineNodeIDs
()
onlineNodeIDs
:=
cluster
.
O
nlineNodeIDs
()
if
len
(
onlineNodeIDs
)
==
0
{
err
:=
errors
.
New
(
"no online QueryNode to allocate"
)
log
.
Error
(
"shuffleSegmentsToQueryNode failed"
,
zap
.
Error
(
err
))
...
...
@@ -117,7 +117,7 @@ func shuffleSegmentsToQueryNodeV2(ctx context.Context, reqs []*querypb.LoadSegme
memUsageRate
:=
make
(
map
[
int64
]
float64
)
var
onlineNodeIDs
[]
int64
if
replicaID
==
-
1
{
onlineNodeIDs
=
cluster
.
o
nlineNodeIDs
()
onlineNodeIDs
=
cluster
.
O
nlineNodeIDs
()
}
else
{
replica
,
err
:=
metaCache
.
getReplicaByID
(
replicaID
)
if
err
!=
nil
{
...
...
@@ -125,7 +125,7 @@ func shuffleSegmentsToQueryNodeV2(ctx context.Context, reqs []*querypb.LoadSegme
}
replicaNodes
:=
replica
.
GetNodeIds
()
for
_
,
nodeID
:=
range
replicaNodes
{
if
ok
,
err
:=
cluster
.
i
sOnline
(
nodeID
);
err
==
nil
&&
ok
{
if
ok
,
err
:=
cluster
.
I
sOnline
(
nodeID
);
err
==
nil
&&
ok
{
onlineNodeIDs
=
append
(
onlineNodeIDs
,
nodeID
)
}
}
...
...
@@ -148,7 +148,7 @@ func shuffleSegmentsToQueryNodeV2(ctx context.Context, reqs []*querypb.LoadSegme
continue
}
// statistic nodeInfo, used memory, memory usage of every query node
nodeInfo
,
err
:=
cluster
.
g
etNodeInfoByID
(
nodeID
)
nodeInfo
,
err
:=
cluster
.
G
etNodeInfoByID
(
nodeID
)
if
err
!=
nil
{
log
.
Warn
(
"shuffleSegmentsToQueryNodeV2: getNodeInfoByID failed"
,
zap
.
Error
(
err
))
continue
...
...
internal/querycoord/segment_allocator_test.go
浏览文件 @
cc69c5cd
...
...
@@ -99,7 +99,7 @@ func TestShuffleSegmentsToQueryNode(t *testing.T) {
assert
.
Nil
(
t
,
err
)
node1Session
:=
node1
.
session
node1ID
:=
node1
.
queryNodeID
cluster
.
r
egisterNode
(
baseCtx
,
node1Session
,
node1ID
,
disConnect
)
cluster
.
R
egisterNode
(
baseCtx
,
node1Session
,
node1ID
,
disConnect
)
waitQueryNodeOnline
(
cluster
,
node1ID
)
t
.
Run
(
"Test shuffleSegmentsToQueryNode"
,
func
(
t
*
testing
.
T
)
{
...
...
@@ -114,9 +114,9 @@ func TestShuffleSegmentsToQueryNode(t *testing.T) {
assert
.
Nil
(
t
,
err
)
node2Session
:=
node2
.
session
node2ID
:=
node2
.
queryNodeID
cluster
.
r
egisterNode
(
baseCtx
,
node2Session
,
node2ID
,
disConnect
)
cluster
.
R
egisterNode
(
baseCtx
,
node2Session
,
node2ID
,
disConnect
)
waitQueryNodeOnline
(
cluster
,
node2ID
)
cluster
.
s
topNode
(
node1ID
)
cluster
.
S
topNode
(
node1ID
)
t
.
Run
(
"Test shuffleSegmentsToQueryNodeV2"
,
func
(
t
*
testing
.
T
)
{
err
=
shuffleSegmentsToQueryNodeV2
(
baseCtx
,
reqs
,
cluster
,
meta
,
false
,
nil
,
nil
,
-
1
)
...
...
internal/querycoord/task.go
浏览文件 @
cc69c5cd
...
...
@@ -336,7 +336,7 @@ func (lct *loadCollectionTask) updateTaskProcess() {
// wait watchDeltaChannel task done after loading segment
nodeID
:=
getDstNodeIDByTask
(
t
)
if
t
.
msgType
()
==
commonpb
.
MsgType_LoadSegments
{
if
!
lct
.
cluster
.
h
asWatchedDeltaChannel
(
lct
.
ctx
,
nodeID
,
collectionID
)
{
if
!
lct
.
cluster
.
H
asWatchedDeltaChannel
(
lct
.
ctx
,
nodeID
,
collectionID
)
{
allDone
=
false
break
}
...
...
@@ -456,7 +456,7 @@ func (lct *loadCollectionTask) execute(ctx context.Context) error {
replicaIds
[
i
]
=
replica
.
ReplicaID
}
err
=
lct
.
cluster
.
a
ssignNodesToReplicas
(
ctx
,
replicas
,
collectionSize
)
err
=
lct
.
cluster
.
A
ssignNodesToReplicas
(
ctx
,
replicas
,
collectionSize
)
if
err
!=
nil
{
log
.
Error
(
"failed to assign nodes to replicas"
,
zap
.
Int64
(
"collectionID"
,
collectionID
),
...
...
@@ -524,7 +524,7 @@ func (lct *loadCollectionTask) execute(ctx context.Context) error {
for
_
,
internalTask
:=
range
internalTasks
{
lct
.
addChildTask
(
internalTask
)
if
task
,
ok
:=
internalTask
.
(
*
watchDmChannelTask
);
ok
{
nodeInfo
,
err
:=
lct
.
cluster
.
g
etNodeInfoByID
(
task
.
NodeID
)
nodeInfo
,
err
:=
lct
.
cluster
.
G
etNodeInfoByID
(
task
.
NodeID
)
if
err
!=
nil
{
log
.
Error
(
"loadCollectionTask: get shard leader node info failed"
,
zap
.
Int64
(
"collectionID"
,
collectionID
),
...
...
@@ -593,7 +593,7 @@ func (lct *loadCollectionTask) postExecute(ctx context.Context) error {
}
func
(
lct
*
loadCollectionTask
)
rollBack
(
ctx
context
.
Context
)
[]
task
{
onlineNodeIDs
:=
lct
.
cluster
.
o
nlineNodeIDs
()
onlineNodeIDs
:=
lct
.
cluster
.
O
nlineNodeIDs
()
resultTasks
:=
make
([]
task
,
0
)
for
_
,
nodeID
:=
range
onlineNodeIDs
{
//brute force rollBack, should optimize
...
...
@@ -686,7 +686,7 @@ func (rct *releaseCollectionTask) execute(ctx context.Context) error {
}
// TODO(yah01): broadcast to all nodes? Or only nodes serve the collection
onlineNodeIDs
:=
rct
.
cluster
.
o
nlineNodeIDs
()
onlineNodeIDs
:=
rct
.
cluster
.
O
nlineNodeIDs
()
for
_
,
nodeID
:=
range
onlineNodeIDs
{
req
:=
proto
.
Clone
(
rct
.
ReleaseCollectionRequest
)
.
(
*
querypb
.
ReleaseCollectionRequest
)
req
.
NodeID
=
nodeID
...
...
@@ -704,7 +704,7 @@ func (rct *releaseCollectionTask) execute(ctx context.Context) error {
}
else
{
// If the node crashed or be offline, the loaded segments are lost
defer
rct
.
reduceRetryCount
()
err
:=
rct
.
cluster
.
r
eleaseCollection
(
ctx
,
rct
.
NodeID
,
rct
.
ReleaseCollectionRequest
)
err
:=
rct
.
cluster
.
R
eleaseCollection
(
ctx
,
rct
.
NodeID
,
rct
.
ReleaseCollectionRequest
)
if
err
!=
nil
{
log
.
Warn
(
"releaseCollectionTask: release collection end, node occur error"
,
zap
.
Int64
(
"collectionID"
,
collectionID
),
zap
.
Int64
(
"nodeID"
,
rct
.
NodeID
))
// after release failed, the task will always redo
...
...
@@ -780,7 +780,7 @@ func (lpt *loadPartitionTask) updateTaskProcess() {
// wait watchDeltaChannel task done after loading segment
nodeID
:=
getDstNodeIDByTask
(
t
)
if
t
.
msgType
()
==
commonpb
.
MsgType_LoadSegments
{
if
!
lpt
.
cluster
.
h
asWatchedDeltaChannel
(
lpt
.
ctx
,
nodeID
,
collectionID
)
{
if
!
lpt
.
cluster
.
H
asWatchedDeltaChannel
(
lpt
.
ctx
,
nodeID
,
collectionID
)
{
allDone
=
false
break
}
...
...
@@ -889,7 +889,7 @@ func (lpt *loadPartitionTask) execute(ctx context.Context) error {
replicaIds
[
i
]
=
replica
.
ReplicaID
}
err
=
lpt
.
cluster
.
a
ssignNodesToReplicas
(
ctx
,
replicas
,
collectionSize
)
err
=
lpt
.
cluster
.
A
ssignNodesToReplicas
(
ctx
,
replicas
,
collectionSize
)
if
err
!=
nil
{
log
.
Error
(
"failed to assign nodes to replicas"
,
zap
.
Int64
(
"collectionID"
,
collectionID
),
...
...
@@ -954,7 +954,7 @@ func (lpt *loadPartitionTask) execute(ctx context.Context) error {
for
_
,
internalTask
:=
range
internalTasks
{
lpt
.
addChildTask
(
internalTask
)
if
task
,
ok
:=
internalTask
.
(
*
watchDmChannelTask
);
ok
{
nodeInfo
,
err
:=
lpt
.
cluster
.
g
etNodeInfoByID
(
task
.
NodeID
)
nodeInfo
,
err
:=
lpt
.
cluster
.
G
etNodeInfoByID
(
task
.
NodeID
)
if
err
!=
nil
{
log
.
Error
(
"loadCollectionTask: get shard leader node info failed"
,
zap
.
Int64
(
"collectionID"
,
collectionID
),
...
...
@@ -1031,7 +1031,7 @@ func (lpt *loadPartitionTask) rollBack(ctx context.Context) []task {
collectionID
:=
lpt
.
CollectionID
resultTasks
:=
make
([]
task
,
0
)
//brute force rollBack, should optimize
onlineNodeIDs
:=
lpt
.
cluster
.
o
nlineNodeIDs
()
onlineNodeIDs
:=
lpt
.
cluster
.
O
nlineNodeIDs
()
for
_
,
nodeID
:=
range
onlineNodeIDs
{
req
:=
&
querypb
.
ReleaseCollectionRequest
{
Base
:
&
commonpb
.
MsgBase
{
...
...
@@ -1119,7 +1119,7 @@ func (rpt *releasePartitionTask) execute(ctx context.Context) error {
// if nodeID ==0, it means that the release request has not been assigned to the specified query node
if
rpt
.
NodeID
<=
0
{
onlineNodeIDs
:=
rpt
.
cluster
.
o
nlineNodeIDs
()
onlineNodeIDs
:=
rpt
.
cluster
.
O
nlineNodeIDs
()
for
_
,
nodeID
:=
range
onlineNodeIDs
{
req
:=
proto
.
Clone
(
rpt
.
ReleasePartitionsRequest
)
.
(
*
querypb
.
ReleasePartitionsRequest
)
req
.
NodeID
=
nodeID
...
...
@@ -1137,7 +1137,7 @@ func (rpt *releasePartitionTask) execute(ctx context.Context) error {
}
else
{
// If the node crashed or be offline, the loaded segments are lost
defer
rpt
.
reduceRetryCount
()
err
:=
rpt
.
cluster
.
r
eleasePartitions
(
ctx
,
rpt
.
NodeID
,
rpt
.
ReleasePartitionsRequest
)
err
:=
rpt
.
cluster
.
R
eleasePartitions
(
ctx
,
rpt
.
NodeID
,
rpt
.
ReleasePartitionsRequest
)
if
err
!=
nil
{
log
.
Warn
(
"ReleasePartitionsTask: release partition end, node occur error"
,
zap
.
Int64
(
"collectionID"
,
collectionID
),
zap
.
String
(
"nodeID"
,
fmt
.
Sprintln
(
rpt
.
NodeID
)))
// after release failed, the task will always redo
...
...
@@ -1195,7 +1195,7 @@ func (lst *loadSegmentTask) marshal() ([]byte, error) {
}
func
(
lst
*
loadSegmentTask
)
isValid
()
bool
{
online
,
err
:=
lst
.
cluster
.
i
sOnline
(
lst
.
DstNodeID
)
online
,
err
:=
lst
.
cluster
.
I
sOnline
(
lst
.
DstNodeID
)
if
err
!=
nil
{
return
false
}
...
...
@@ -1242,7 +1242,7 @@ func (lst *loadSegmentTask) preExecute(ctx context.Context) error {
func
(
lst
*
loadSegmentTask
)
execute
(
ctx
context
.
Context
)
error
{
defer
lst
.
reduceRetryCount
()
err
:=
lst
.
cluster
.
l
oadSegments
(
ctx
,
lst
.
DstNodeID
,
lst
.
LoadSegmentsRequest
)
err
:=
lst
.
cluster
.
L
oadSegments
(
ctx
,
lst
.
DstNodeID
,
lst
.
LoadSegmentsRequest
)
if
err
!=
nil
{
log
.
Warn
(
"loadSegmentTask: loadSegment occur error"
,
zap
.
Int64
(
"taskID"
,
lst
.
getTaskID
()))
lst
.
setResultInfo
(
err
)
...
...
@@ -1322,7 +1322,7 @@ func (rst *releaseSegmentTask) marshal() ([]byte, error) {
}
func
(
rst
*
releaseSegmentTask
)
isValid
()
bool
{
online
,
err
:=
rst
.
cluster
.
i
sOnline
(
rst
.
NodeID
)
online
,
err
:=
rst
.
cluster
.
I
sOnline
(
rst
.
NodeID
)
if
err
!=
nil
{
return
false
}
...
...
@@ -1350,7 +1350,7 @@ func (rst *releaseSegmentTask) preExecute(context.Context) error {
func
(
rst
*
releaseSegmentTask
)
execute
(
ctx
context
.
Context
)
error
{
defer
rst
.
reduceRetryCount
()
err
:=
rst
.
cluster
.
r
eleaseSegments
(
rst
.
ctx
,
rst
.
leaderID
,
rst
.
ReleaseSegmentsRequest
)
err
:=
rst
.
cluster
.
R
eleaseSegments
(
rst
.
ctx
,
rst
.
leaderID
,
rst
.
ReleaseSegmentsRequest
)
if
err
!=
nil
{
log
.
Warn
(
"releaseSegmentTask: releaseSegment occur error"
,
zap
.
Int64
(
"taskID"
,
rst
.
getTaskID
()))
rst
.
setResultInfo
(
err
)
...
...
@@ -1388,7 +1388,7 @@ func (wdt *watchDmChannelTask) marshal() ([]byte, error) {
}
func
(
wdt
*
watchDmChannelTask
)
isValid
()
bool
{
online
,
err
:=
wdt
.
cluster
.
i
sOnline
(
wdt
.
NodeID
)
online
,
err
:=
wdt
.
cluster
.
I
sOnline
(
wdt
.
NodeID
)
if
err
!=
nil
{
return
false
}
...
...
@@ -1429,7 +1429,7 @@ func (wdt *watchDmChannelTask) preExecute(context.Context) error {
func
(
wdt
*
watchDmChannelTask
)
execute
(
ctx
context
.
Context
)
error
{
defer
wdt
.
reduceRetryCount
()
err
:=
wdt
.
cluster
.
w
atchDmChannels
(
wdt
.
ctx
,
wdt
.
NodeID
,
wdt
.
WatchDmChannelsRequest
)
err
:=
wdt
.
cluster
.
W
atchDmChannels
(
wdt
.
ctx
,
wdt
.
NodeID
,
wdt
.
WatchDmChannelsRequest
)
if
err
!=
nil
{
log
.
Warn
(
"watchDmChannelTask: watchDmChannel occur error"
,
zap
.
Int64
(
"taskID"
,
wdt
.
getTaskID
()))
wdt
.
setResultInfo
(
err
)
...
...
@@ -1502,7 +1502,7 @@ func (wdt *watchDeltaChannelTask) marshal() ([]byte, error) {
}
func
(
wdt
*
watchDeltaChannelTask
)
isValid
()
bool
{
online
,
err
:=
wdt
.
cluster
.
i
sOnline
(
wdt
.
NodeID
)
online
,
err
:=
wdt
.
cluster
.
I
sOnline
(
wdt
.
NodeID
)
if
err
!=
nil
{
return
false
}
...
...
@@ -1544,7 +1544,7 @@ func (wdt *watchDeltaChannelTask) preExecute(context.Context) error {
func
(
wdt
*
watchDeltaChannelTask
)
execute
(
ctx
context
.
Context
)
error
{
defer
wdt
.
reduceRetryCount
()
err
:=
wdt
.
cluster
.
w
atchDeltaChannels
(
wdt
.
ctx
,
wdt
.
NodeID
,
wdt
.
WatchDeltaChannelsRequest
)
err
:=
wdt
.
cluster
.
W
atchDeltaChannels
(
wdt
.
ctx
,
wdt
.
NodeID
,
wdt
.
WatchDeltaChannelsRequest
)
if
err
!=
nil
{
log
.
Warn
(
"watchDeltaChannelTask: watchDeltaChannel occur error"
,
zap
.
Int64
(
"taskID"
,
wdt
.
getTaskID
()),
zap
.
Error
(
err
))
wdt
.
setResultInfo
(
err
)
...
...
@@ -2042,7 +2042,7 @@ func (lbt *loadBalanceTask) processManualLoadBalance(ctx context.Context) error
balancedSegmentInfos
:=
make
(
map
[
UniqueID
]
*
querypb
.
SegmentInfo
)
balancedSegmentIDs
:=
make
([]
UniqueID
,
0
)
for
_
,
nodeID
:=
range
lbt
.
SourceNodeIDs
{
nodeExist
:=
lbt
.
cluster
.
h
asNode
(
nodeID
)
nodeExist
:=
lbt
.
cluster
.
H
asNode
(
nodeID
)
if
!
nodeExist
{
err
:=
fmt
.
Errorf
(
"loadBalanceTask: query node %d is not exist to balance"
,
nodeID
)
log
.
Error
(
err
.
Error
())
...
...
@@ -2302,7 +2302,7 @@ func (lbt *loadBalanceTask) globalPostExecute(ctx context.Context) error {
// then the queryCoord will panic, and the nodeInfo should not be removed immediately
// after queryCoord recovery, the balanceTask will redo
for
_
,
offlineNodeID
:=
range
lbt
.
SourceNodeIDs
{
err
:=
lbt
.
cluster
.
r
emoveNodeInfo
(
offlineNodeID
)
err
:=
lbt
.
cluster
.
R
emoveNodeInfo
(
offlineNodeID
)
if
err
!=
nil
{
log
.
Error
(
"loadBalanceTask: occur error when removing node info from cluster"
,
zap
.
Int64
(
"nodeID"
,
offlineNodeID
),
...
...
@@ -2345,7 +2345,7 @@ func (lbt *loadBalanceTask) globalPostExecute(ctx context.Context) error {
leaderID
:=
task
.
NodeID
dmChannel
:=
task
.
Infos
[
0
]
.
ChannelName
nodeInfo
,
err
:=
lbt
.
cluster
.
g
etNodeInfoByID
(
leaderID
)
nodeInfo
,
err
:=
lbt
.
cluster
.
G
etNodeInfoByID
(
leaderID
)
if
err
!=
nil
{
log
.
Error
(
"failed to get node info to update shard leader info"
,
zap
.
Int64
(
"triggerTaskID"
,
lbt
.
getTaskID
()),
...
...
@@ -2399,14 +2399,14 @@ func assignInternalTask(ctx context.Context,
broker
*
globalMetaBroker
)
([]
task
,
error
)
{
internalTasks
:=
make
([]
task
,
0
)
err
:=
cluster
.
a
llocateSegmentsToQueryNode
(
ctx
,
loadSegmentRequests
,
wait
,
excludeNodeIDs
,
includeNodeIDs
,
replicaID
)
err
:=
cluster
.
A
llocateSegmentsToQueryNode
(
ctx
,
loadSegmentRequests
,
wait
,
excludeNodeIDs
,
includeNodeIDs
,
replicaID
)
if
err
!=
nil
{
log
.
Error
(
"assignInternalTask: assign segment to node failed"
,
zap
.
Error
(
err
))
return
nil
,
err
}
log
.
Info
(
"assignInternalTask: assign segment to node success"
,
zap
.
Int
(
"load segments"
,
len
(
loadSegmentRequests
)))
err
=
cluster
.
a
llocateChannelsToQueryNode
(
ctx
,
watchDmChannelRequests
,
wait
,
excludeNodeIDs
,
includeNodeIDs
,
replicaID
)
err
=
cluster
.
A
llocateChannelsToQueryNode
(
ctx
,
watchDmChannelRequests
,
wait
,
excludeNodeIDs
,
includeNodeIDs
,
replicaID
)
if
err
!=
nil
{
log
.
Error
(
"assignInternalTask: assign dmChannel to node failed"
,
zap
.
Error
(
err
))
return
nil
,
err
...
...
internal/querycoord/task_scheduler.go
浏览文件 @
cc69c5cd
...
...
@@ -1019,7 +1019,7 @@ func generateDerivedInternalTasks(triggerTask task, meta Meta, cluster Cluster)
collectionID
:=
loadSegmentTask
.
CollectionID
replicaID
:=
loadSegmentTask
.
GetReplicaID
()
nodeID
:=
loadSegmentTask
.
DstNodeID
if
!
cluster
.
h
asWatchedDeltaChannel
(
triggerTask
.
traceCtx
(),
nodeID
,
collectionID
)
{
if
!
cluster
.
H
asWatchedDeltaChannel
(
triggerTask
.
traceCtx
(),
nodeID
,
collectionID
)
{
addChannelWatchInfoFn
(
nodeID
,
collectionID
,
replicaID
,
watchDeltaChannelInfo
)
}
}
...
...
internal/querycoord/util.go
浏览文件 @
cc69c5cd
...
...
@@ -188,7 +188,7 @@ func syncReplicaSegments(ctx context.Context, cluster Cluster, childTasks []task
}
}
err
:=
cluster
.
s
yncReplicaSegments
(
ctx
,
leader
.
LeaderID
,
&
req
)
err
:=
cluster
.
S
yncReplicaSegments
(
ctx
,
leader
.
LeaderID
,
&
req
)
if
err
!=
nil
{
return
err
}
...
...
编辑
预览
Markdown
is supported
0%
请重试
或
添加新附件
.
添加附件
取消
You are about to add
0
people
to the discussion. Proceed with caution.
先完成此消息的编辑!
取消
想要评论请
注册
或
登录