Skip to content
体验新版
项目
组织
正在加载...
登录
切换导航
打开侧边栏
milvus
milvus
提交
8b4d60b0
M
milvus
项目概览
milvus
/
milvus
大约 1 年 前同步成功
通知
261
Star
22476
Fork
2472
代码
文件
提交
分支
Tags
贡献者
分支图
Diff
Issue
0
列表
看板
标记
里程碑
合并请求
0
DevOps
流水线
流水线任务
计划
Wiki
0
Wiki
分析
仓库
DevOps
项目成员
Pages
M
milvus
项目概览
项目概览
详情
发布
仓库
仓库
文件
提交
分支
标签
贡献者
分支图
比较
Issue
0
Issue
0
列表
看板
标记
里程碑
合并请求
0
合并请求
0
Pages
DevOps
DevOps
流水线
流水线任务
计划
分析
分析
仓库分析
DevOps
Wiki
0
Wiki
成员
成员
收起侧边栏
关闭侧边栏
动态
分支图
创建新Issue
流水线任务
提交
Issue看板
体验新版 GitCode,发现更多精彩内容 >>
未验证
提交
8b4d60b0
编写于
12月 29, 2022
作者:
C
congqixia
提交者:
GitHub
12月 29, 2022
浏览文件
操作
浏览文件
下载
电子邮件补丁
差异文件
Add guarantee and serviceable lag too large check (#21441)
Signed-off-by:
N
Congqi Xia
<
congqi.xia@zilliz.com
>
上级
3a58ac9f
变更
5
隐藏空白更改
内联
并排
Showing
5 changed file
with
85 addition
and
6 deletion
+85
-6
configs/milvus.yaml
configs/milvus.yaml
+4
-0
internal/querynode/errors.go
internal/querynode/errors.go
+10
-0
internal/querynode/task_read.go
internal/querynode/task_read.go
+17
-6
internal/querynode/task_read_test.go
internal/querynode/task_read_test.go
+46
-0
internal/util/paramtable/component_param.go
internal/util/paramtable/component_param.go
+8
-0
未找到文件。
configs/milvus.yaml
浏览文件 @
8b4d60b0
...
...
@@ -230,6 +230,10 @@ queryNode:
# Max read concurrency must greater than or equal to 1, and less than or equal to runtime.NumCPU * 100.
maxReadConcurrentRatio
:
2.0
# (0, 100]
cpuRatio
:
10.0
# ratio used to estimate read task cpu usage.
# maxTimestampLag is the max ts lag between serviceable and guarantee timestamp.
# if the lag is larger than this config, scheduler will return error without waiting.
# the valid value is [3600, infinite)
maxTimestampLag
:
86400
grouping
:
enabled
:
true
...
...
internal/querynode/errors.go
浏览文件 @
8b4d60b0
...
...
@@ -19,16 +19,26 @@ package querynode
import
(
"errors"
"fmt"
"time"
)
var
(
// ErrShardNotAvailable shard not available error base.
ErrShardNotAvailable
=
errors
.
New
(
"ShardNotAvailable"
)
// ErrTsLagTooLarge serviceable and guarantee lag too large.
ErrTsLagTooLarge
=
errors
.
New
(
"Timestamp lag too large"
)
)
// WrapErrShardNotAvailable wraps ErrShardNotAvailable with replica id and channel name.
func
WrapErrShardNotAvailable
(
replicaID
int64
,
shard
string
)
error
{
return
fmt
.
Errorf
(
"%w(replica=%d, shard=%s)"
,
ErrShardNotAvailable
,
replicaID
,
shard
)
}
// WrapErrTsLagTooLarge wraps ErrTsLagTooLarge with lag and max value.
func
WrapErrTsLagTooLarge
(
duration
time
.
Duration
,
maxLag
time
.
Duration
)
error
{
return
fmt
.
Errorf
(
"%w lag(%s) max(%s)"
,
ErrTsLagTooLarge
,
duration
,
maxLag
)
}
// msgQueryNodeIsUnhealthy is the error msg of unhealthy query node
func
msgQueryNodeIsUnhealthy
(
nodeID
UniqueID
)
string
{
return
fmt
.
Sprintf
(
"query node %d is not ready"
,
nodeID
)
...
...
internal/querynode/task_read.go
浏览文件 @
8b4d60b0
...
...
@@ -168,15 +168,26 @@ func (b *baseReadTask) Ready() (bool, error) {
gt
,
_
:=
tsoutil
.
ParseTS
(
guaranteeTs
)
st
,
_
:=
tsoutil
.
ParseTS
(
serviceTime
)
if
guaranteeTs
>
serviceTime
{
lag
:=
gt
.
Sub
(
st
)
maxLag
:=
Params
.
QueryNodeCfg
.
MaxTimestampLag
.
GetAsDuration
(
time
.
Second
)
if
lag
>
maxLag
{
log
.
Warn
(
"guarantee and servicable ts larger than MaxLag"
,
zap
.
Time
(
"guaranteeTime"
,
gt
),
zap
.
Time
(
"serviceableTime"
,
st
),
zap
.
Duration
(
"lag"
,
lag
),
zap
.
Duration
(
"maxTsLag"
,
maxLag
),
)
return
false
,
WrapErrTsLagTooLarge
(
lag
,
maxLag
)
}
return
false
,
nil
}
log
.
Debug
(
"query msg can do"
,
zap
.
Any
(
"collectionID"
,
b
.
CollectionID
),
zap
.
Any
(
"sm.GuaranteeTimestamp"
,
gt
),
zap
.
Any
(
"serviceTime"
,
st
),
zap
.
Any
(
"delta milliseconds"
,
gt
.
Sub
(
st
)
.
Milliseconds
()),
zap
.
Any
(
"channel"
,
channel
),
zap
.
Any
(
"msgID"
,
b
.
ID
()))
zap
.
Int64
(
"collectionID"
,
b
.
CollectionID
),
zap
.
Time
(
"sm.GuaranteeTimestamp"
,
gt
),
zap
.
Time
(
"serviceTime"
,
st
),
zap
.
Int64
(
"delta milliseconds"
,
gt
.
Sub
(
st
)
.
Milliseconds
()),
zap
.
String
(
"channel"
,
channel
),
zap
.
Int64
(
"msgID"
,
b
.
ID
()))
b
.
waitTsDur
=
b
.
waitTSafeTr
.
Elapse
(
"wait for tsafe done"
)
return
true
,
nil
}
internal/querynode/task_read_test.go
浏览文件 @
8b4d60b0
...
...
@@ -6,7 +6,10 @@ import (
"time"
"github.com/milvus-io/milvus/internal/mocks"
"github.com/milvus-io/milvus/internal/proto/querypb"
"github.com/milvus-io/milvus/internal/util/timerecord"
"github.com/milvus-io/milvus/internal/util/tsoutil"
"github.com/stretchr/testify/mock"
"github.com/stretchr/testify/suite"
)
...
...
@@ -30,6 +33,7 @@ func (s *baseReadTaskSuite) SetupSuite() {
lcm
:=
&
mocks
.
ChunkManager
{}
tsafe
:=
&
MockTSafeReplicaInterface
{}
s
.
tsafe
=
tsafe
qs
,
err
:=
newQueryShard
(
context
.
Background
(),
defaultCollectionID
,
defaultDMLChannel
,
defaultReplicaID
,
nil
,
meta
,
tsafe
,
lcm
,
rcm
,
false
)
s
.
Require
()
.
NoError
(
err
)
...
...
@@ -107,7 +111,49 @@ func (s *baseReadTaskSuite) TestTimeoutError() {
s
.
Assert
()
.
ErrorIs
(
s
.
task
.
TimeoutError
(),
context
.
DeadlineExceeded
)
})
}
func
(
s
*
baseReadTaskSuite
)
TestReady
()
{
ctx
,
cancel
:=
context
.
WithCancel
(
context
.
Background
())
defer
cancel
()
s
.
task
.
ctx
=
ctx
baseTime
:=
time
.
Now
()
serviceable
:=
tsoutil
.
ComposeTSByTime
(
baseTime
,
0
)
s
.
tsafe
.
EXPECT
()
.
getTSafe
(
mock
.
AnythingOfType
(
"string"
))
.
Return
(
serviceable
,
nil
)
s
.
Run
(
"lag too large"
,
func
()
{
tooLargeGuarantee
:=
baseTime
.
Add
(
Params
.
QueryNodeCfg
.
MaxTimestampLag
.
GetAsDuration
(
time
.
Second
))
.
Add
(
time
.
Second
)
guaranteeTs
:=
tsoutil
.
ComposeTSByTime
(
tooLargeGuarantee
,
0
)
s
.
task
.
GuaranteeTimestamp
=
guaranteeTs
s
.
task
.
DataScope
=
querypb
.
DataScope_Historical
ready
,
err
:=
s
.
task
.
Ready
()
s
.
False
(
ready
)
s
.
Error
(
err
)
s
.
ErrorIs
(
err
,
ErrTsLagTooLarge
)
})
s
.
Run
(
"not ready"
,
func
()
{
guarantee
:=
baseTime
.
Add
(
Params
.
QueryNodeCfg
.
MaxTimestampLag
.
GetAsDuration
(
time
.
Second
))
.
Add
(
-
time
.
Second
)
guaranteeTs
:=
tsoutil
.
ComposeTSByTime
(
guarantee
,
0
)
s
.
task
.
GuaranteeTimestamp
=
guaranteeTs
s
.
task
.
DataScope
=
querypb
.
DataScope_Historical
ready
,
err
:=
s
.
task
.
Ready
()
s
.
False
(
ready
)
s
.
NoError
(
err
)
})
s
.
Run
(
"ready"
,
func
()
{
guarantee
:=
baseTime
.
Add
(
-
time
.
Second
)
guaranteeTs
:=
tsoutil
.
ComposeTSByTime
(
guarantee
,
0
)
s
.
task
.
GuaranteeTimestamp
=
guaranteeTs
s
.
task
.
DataScope
=
querypb
.
DataScope_Historical
ready
,
err
:=
s
.
task
.
Ready
()
s
.
True
(
ready
)
s
.
NoError
(
err
)
})
}
func
TestBaseReadTask
(
t
*
testing
.
T
)
{
...
...
internal/util/paramtable/component_param.go
浏览文件 @
8b4d60b0
...
...
@@ -1041,6 +1041,7 @@ type queryNodeConfig struct {
MaxGroupNQ
ParamItem
`refreshable:"true"`
TopKMergeRatio
ParamItem
`refreshable:"true"`
CPURatio
ParamItem
`refreshable:"true"`
MaxTimestampLag
ParamItem
`refreshable:"true"`
GCHelperEnabled
ParamItem
`refreshable:"false"`
MinimumGOGCConfig
ParamItem
`refreshable:"false"`
...
...
@@ -1254,6 +1255,13 @@ func (p *queryNodeConfig) init(base *BaseTable) {
}
p
.
MaxDiskUsagePercentage
.
Init
(
base
.
mgr
)
p
.
MaxTimestampLag
=
ParamItem
{
Key
:
"queryNode.scheduler.maxTimestampLag"
,
Version
:
"2.2.3"
,
DefaultValue
:
"86400"
,
}
p
.
MaxTimestampLag
.
Init
(
base
.
mgr
)
p
.
GCHelperEnabled
=
ParamItem
{
Key
:
"queryNode.gchelper.enabled"
,
Version
:
"2.0.0"
,
...
...
编辑
预览
Markdown
is supported
0%
请重试
或
添加新附件
.
添加附件
取消
You are about to add
0
people
to the discussion. Proceed with caution.
先完成此消息的编辑!
取消
想要评论请
注册
或
登录