Skip to content
体验新版
项目
组织
正在加载...
登录
切换导航
打开侧边栏
milvus
milvus
提交
080bed90
M
milvus
项目概览
milvus
/
milvus
大约 1 年 前同步成功
通知
261
Star
22476
Fork
2472
代码
文件
提交
分支
Tags
贡献者
分支图
Diff
Issue
0
列表
看板
标记
里程碑
合并请求
0
DevOps
流水线
流水线任务
计划
Wiki
0
Wiki
分析
仓库
DevOps
项目成员
Pages
M
milvus
项目概览
项目概览
详情
发布
仓库
仓库
文件
提交
分支
标签
贡献者
分支图
比较
Issue
0
Issue
0
列表
看板
标记
里程碑
合并请求
0
合并请求
0
Pages
DevOps
DevOps
流水线
流水线任务
计划
分析
分析
仓库分析
DevOps
Wiki
0
Wiki
成员
成员
收起侧边栏
关闭侧边栏
动态
分支图
创建新Issue
流水线任务
提交
Issue看板
体验新版 GitCode,发现更多精彩内容 >>
未验证
提交
080bed90
编写于
1月 09, 2023
作者:
S
SimFG
提交者:
GitHub
1月 09, 2023
浏览文件
操作
浏览文件
下载
电子邮件补丁
差异文件
Improvement apis and error messages about the graceful stop (#21580)
Signed-off-by:
N
SimFG
<
bang.fu@zilliz.com
>
上级
7b12865c
变更
5
隐藏空白更改
内联
并排
Showing
5 changed file
with
52 addition
and
6 deletion
+52
-6
configs/milvus.yaml
configs/milvus.yaml
+2
-0
internal/querycoordv2/dist/dist_handler.go
internal/querycoordv2/dist/dist_handler.go
+6
-6
internal/querycoordv2/services.go
internal/querycoordv2/services.go
+22
-0
internal/querycoordv2/services_test.go
internal/querycoordv2/services_test.go
+20
-0
internal/util/paramtable/component_param_test.go
internal/util/paramtable/component_param_test.go
+2
-0
未找到文件。
configs/milvus.yaml
浏览文件 @
080bed90
...
...
@@ -198,6 +198,7 @@ queryNode:
loadMemoryUsageFactor
:
3
# The multiply factor of calculating the memory usage while loading segments
enableDisk
:
true
# enable querynode load disk index, and search on disk index
maxDiskUsagePercentage
:
95
gracefulStopTimeout
:
30
stats
:
publishInterval
:
1000
# Interval for querynode to report node information (milliseconds)
...
...
@@ -256,6 +257,7 @@ indexNode:
port
:
21121
enableDisk
:
true
# enable index node build disk vector index
maxDiskUsagePercentage
:
95
gracefulStopTimeout
:
30
scheduler
:
buildParallel
:
1
...
...
internal/querycoordv2/dist/dist_handler.go
浏览文件 @
080bed90
...
...
@@ -68,8 +68,8 @@ func (dh *distHandler) start(ctx context.Context) {
logger
.
Info
(
"close dist handelr"
)
return
case
<-
ticker
.
C
:
dh
.
getDistribution
(
ctx
,
func
(
is
Success
bool
)
{
if
!
isSuccess
{
dh
.
getDistribution
(
ctx
,
func
(
is
Fail
bool
)
{
if
isFail
{
failures
++
}
else
{
failures
=
0
...
...
@@ -199,7 +199,7 @@ func (dh *distHandler) updateLeaderView(resp *querypb.GetDataDistributionRespons
dh
.
dist
.
LeaderViewManager
.
Update
(
resp
.
GetNodeID
(),
updates
...
)
}
func
(
dh
*
distHandler
)
getDistribution
(
ctx
context
.
Context
,
fn
func
(
is
Success
bool
))
{
func
(
dh
*
distHandler
)
getDistribution
(
ctx
context
.
Context
,
fn
func
(
is
Fail
bool
))
{
dh
.
mu
.
Lock
()
defer
dh
.
mu
.
Unlock
()
cctx
,
cancel
:=
context
.
WithTimeout
(
ctx
,
distReqTimeout
)
...
...
@@ -210,15 +210,15 @@ func (dh *distHandler) getDistribution(ctx context.Context, fn func(isSuccess bo
})
cancel
()
is
Success
:=
err
!=
nil
||
resp
.
GetStatus
()
.
GetErrorCode
()
!=
commonpb
.
ErrorCode_Success
if
is
Success
{
is
Fail
:=
err
!=
nil
||
resp
.
GetStatus
()
.
GetErrorCode
()
!=
commonpb
.
ErrorCode_Success
if
is
Fail
{
dh
.
logFailureInfo
(
resp
,
err
)
}
else
{
dh
.
handleDistResp
(
resp
)
}
if
fn
!=
nil
{
fn
(
is
Success
)
fn
(
is
Fail
)
}
}
...
...
internal/querycoordv2/services.go
浏览文件 @
080bed90
...
...
@@ -452,6 +452,20 @@ func (s *Server) GetSegmentInfo(ctx context.Context, req *querypb.GetSegmentInfo
},
nil
}
func
(
s
*
Server
)
isStoppingNode
(
nodeID
int64
)
error
{
isStopping
,
err
:=
s
.
nodeMgr
.
IsStoppingNode
(
nodeID
)
if
err
!=
nil
{
log
.
Warn
(
"fail to check whether the node is stopping"
,
zap
.
Int64
(
"node_id"
,
nodeID
),
zap
.
Error
(
err
))
return
err
}
if
isStopping
{
msg
:=
fmt
.
Sprintf
(
"failed to balance due to the source/destination node[%d] is stopping"
,
nodeID
)
log
.
Warn
(
msg
)
return
errors
.
New
(
msg
)
}
return
nil
}
func
(
s
*
Server
)
LoadBalance
(
ctx
context
.
Context
,
req
*
querypb
.
LoadBalanceRequest
)
(
*
commonpb
.
Status
,
error
)
{
log
:=
log
.
With
(
zap
.
Int64
(
"msgID"
,
req
.
GetBase
()
.
GetMsgID
()),
...
...
@@ -487,12 +501,20 @@ func (s *Server) LoadBalance(ctx context.Context, req *querypb.LoadBalanceReques
log
.
Warn
(
msg
)
return
utils
.
WrapStatus
(
commonpb
.
ErrorCode_UnexpectedError
,
msg
),
nil
}
if
err
:=
s
.
isStoppingNode
(
srcNode
);
err
!=
nil
{
return
utils
.
WrapStatus
(
commonpb
.
ErrorCode_UnexpectedError
,
fmt
.
Sprintf
(
"can't balance, because the source node[%d] is invalid"
,
srcNode
),
err
),
nil
}
for
_
,
dstNode
:=
range
req
.
GetDstNodeIDs
()
{
if
!
replica
.
Nodes
.
Contain
(
dstNode
)
{
msg
:=
"destination nodes have to be in the same replica of source node"
log
.
Warn
(
msg
)
return
utils
.
WrapStatus
(
commonpb
.
ErrorCode_UnexpectedError
,
msg
),
nil
}
if
err
:=
s
.
isStoppingNode
(
dstNode
);
err
!=
nil
{
return
utils
.
WrapStatus
(
commonpb
.
ErrorCode_UnexpectedError
,
fmt
.
Sprintf
(
"can't balance, because the destination node[%d] is invalid"
,
dstNode
),
err
),
nil
}
}
err
:=
s
.
balanceSegments
(
ctx
,
req
,
replica
)
...
...
internal/querycoordv2/services_test.go
浏览文件 @
080bed90
...
...
@@ -745,6 +745,26 @@ func (suite *ServiceSuite) TestLoadBalanceFailed() {
suite
.
Equal
(
commonpb
.
ErrorCode_UnexpectedError
,
resp
.
ErrorCode
)
suite
.
Contains
(
resp
.
Reason
,
"failed to balance segments"
)
suite
.
Contains
(
resp
.
Reason
,
task
.
ErrTaskCanceled
.
Error
())
suite
.
meta
.
ReplicaManager
.
AddNode
(
replicas
[
0
]
.
ID
,
10
)
req
.
SourceNodeIDs
=
[]
int64
{
10
}
resp
,
err
=
server
.
LoadBalance
(
ctx
,
req
)
suite
.
NoError
(
err
)
suite
.
Equal
(
commonpb
.
ErrorCode_UnexpectedError
,
resp
.
ErrorCode
)
req
.
SourceNodeIDs
=
[]
int64
{
srcNode
}
req
.
DstNodeIDs
=
[]
int64
{
10
}
resp
,
err
=
server
.
LoadBalance
(
ctx
,
req
)
suite
.
NoError
(
err
)
suite
.
Equal
(
commonpb
.
ErrorCode_UnexpectedError
,
resp
.
ErrorCode
)
suite
.
nodeMgr
.
Add
(
session
.
NewNodeInfo
(
10
,
"localhost"
))
suite
.
nodeMgr
.
Stopping
(
10
)
resp
,
err
=
server
.
LoadBalance
(
ctx
,
req
)
suite
.
NoError
(
err
)
suite
.
Equal
(
commonpb
.
ErrorCode_UnexpectedError
,
resp
.
ErrorCode
)
suite
.
nodeMgr
.
Remove
(
10
)
suite
.
meta
.
ReplicaManager
.
RemoveNode
(
replicas
[
0
]
.
ID
,
10
)
}
}
...
...
internal/util/paramtable/component_param_test.go
浏览文件 @
080bed90
...
...
@@ -73,6 +73,8 @@ func TestComponentParam(t *testing.T) {
assert
.
Equal
(
t
,
CParams
.
IndexNodeCfg
.
GracefulStopTimeout
,
Params
.
GracefulStopTimeout
)
t
.
Logf
(
"default grafeful stop timeout = %d"
,
Params
.
GracefulStopTimeout
)
Params
.
Base
.
Save
(
"common.gracefulStopTimeout"
,
"50"
)
Params
.
Base
.
Remove
(
"queryNode.gracefulStopTimeout"
)
Params
.
Base
.
Remove
(
"indexNode.gracefulStopTimeout"
)
Params
.
initGracefulStopTimeout
()
assert
.
Equal
(
t
,
Params
.
GracefulStopTimeout
,
int64
(
50
))
CParams
.
QueryNodeCfg
.
initGracefulStopTimeout
()
...
...
编辑
预览
Markdown
is supported
0%
请重试
或
添加新附件
.
添加附件
取消
You are about to add
0
people
to the discussion. Proceed with caution.
先完成此消息的编辑!
取消
想要评论请
注册
或
登录