Skip to content
体验新版
项目
组织
正在加载...
登录
切换导航
打开侧边栏
milvus
milvus
提交
c7f55c2e
M
milvus
项目概览
milvus
/
milvus
大约 1 年 前同步成功
通知
261
Star
22476
Fork
2472
代码
文件
提交
分支
Tags
贡献者
分支图
Diff
Issue
0
列表
看板
标记
里程碑
合并请求
0
DevOps
流水线
流水线任务
计划
Wiki
0
Wiki
分析
仓库
DevOps
项目成员
Pages
M
milvus
项目概览
项目概览
详情
发布
仓库
仓库
文件
提交
分支
标签
贡献者
分支图
比较
Issue
0
Issue
0
列表
看板
标记
里程碑
合并请求
0
合并请求
0
Pages
DevOps
DevOps
流水线
流水线任务
计划
分析
分析
仓库分析
DevOps
Wiki
0
Wiki
成员
成员
收起侧边栏
关闭侧边栏
动态
分支图
创建新Issue
流水线任务
提交
Issue看板
体验新版 GitCode,发现更多精彩内容 >>
未验证
提交
c7f55c2e
编写于
2月 15, 2022
作者:
C
congqixia
提交者:
GitHub
2月 15, 2022
浏览文件
操作
浏览文件
下载
电子邮件补丁
差异文件
Make SessionWatch keep watch even no Rewatch func when find ErrCompacted (#15497)
Signed-off-by:
N
Congqi Xia
<
congqi.xia@zilliz.com
>
上级
684110bc
变更
10
隐藏空白更改
内联
并排
Showing
10 changed file
with
215 addition
and
33 deletion
+215
-33
internal/datacoord/server.go
internal/datacoord/server.go
+8
-2
internal/datacoord/server_test.go
internal/datacoord/server_test.go
+22
-5
internal/indexcoord/index_coord.go
internal/indexcoord/index_coord.go
+7
-1
internal/indexcoord/index_coord_test.go
internal/indexcoord/index_coord_test.go
+23
-4
internal/querycoord/query_coord.go
internal/querycoord/query_coord.go
+11
-0
internal/querycoord/query_coord_test.go
internal/querycoord/query_coord_test.go
+40
-0
internal/querynode/query_node.go
internal/querynode/query_node.go
+7
-1
internal/querynode/query_node_test.go
internal/querynode/query_node_test.go
+80
-0
internal/util/sessionutil/session_util.go
internal/util/sessionutil/session_util.go
+6
-9
internal/util/sessionutil/session_util_test.go
internal/util/sessionutil/session_util_test.go
+11
-11
未找到文件。
internal/datacoord/server.go
浏览文件 @
c7f55c2e
...
...
@@ -406,6 +406,7 @@ func (s *Server) initServiceDiscovery() error {
s
.
cluster
.
Startup
(
datanodes
)
// TODO implement rewatch logic
s
.
eventCh
=
s
.
session
.
WatchServices
(
typeutil
.
DataNodeRole
,
rev
+
1
,
nil
)
return
nil
}
...
...
@@ -607,7 +608,13 @@ func (s *Server) watchService(ctx context.Context) {
return
case
event
,
ok
:=
<-
s
.
eventCh
:
if
!
ok
{
//TODO add retry logic
// ErrCompacted in handled inside SessionWatcher
// So there is some other error occurred, closing DataCoord server
logutil
.
Logger
(
s
.
ctx
)
.
Error
(
"watch service channel closed"
,
zap
.
Int64
(
"serverID"
,
s
.
session
.
ServerID
))
go
s
.
Stop
()
if
s
.
session
.
TriggerKill
{
syscall
.
Kill
(
syscall
.
Getpid
(),
syscall
.
SIGINT
)
}
return
}
if
err
:=
s
.
handleSessionEvent
(
ctx
,
event
);
err
!=
nil
{
...
...
@@ -620,7 +627,6 @@ func (s *Server) watchService(ctx context.Context) {
}
}
}
}
// handles session events - DataNodes Add/Del
...
...
internal/datacoord/server_test.go
浏览文件 @
c7f55c2e
...
...
@@ -22,9 +22,11 @@ import (
"fmt"
"math/rand"
"os"
"os/signal"
"path"
"strconv"
"sync/atomic"
"syscall"
"testing"
"time"
...
...
@@ -611,25 +613,40 @@ func TestGetFlushedSegments(t *testing.T) {
}
func
TestService_WatchServices
(
t
*
testing
.
T
)
{
sc
:=
make
(
chan
os
.
Signal
,
1
)
signal
.
Notify
(
sc
,
syscall
.
SIGINT
)
defer
signal
.
Reset
(
syscall
.
SIGINT
)
factory
:=
msgstream
.
NewPmsFactory
()
svr
:=
CreateServer
(
context
.
TODO
(),
factory
)
svr
.
session
=
&
sessionutil
.
Session
{
TriggerKill
:
true
,
}
svr
.
serverLoopWg
.
Add
(
1
)
ech
:=
make
(
chan
*
sessionutil
.
SessionEvent
)
svr
.
eventCh
=
ech
flag
:=
false
signal
:=
make
(
chan
struct
{},
1
)
closed
:=
false
sigDone
:=
make
(
chan
struct
{},
1
)
sigQuit
:=
make
(
chan
struct
{},
1
)
go
func
()
{
svr
.
watchService
(
context
.
Background
())
flag
=
true
signal
<-
struct
{}{}
sigDone
<-
struct
{}{}
}()
go
func
()
{
<-
sc
closed
=
true
sigQuit
<-
struct
{}{}
}()
close
(
ech
)
<-
signal
<-
sigDone
<-
sigQuit
assert
.
True
(
t
,
flag
)
assert
.
True
(
t
,
closed
)
ech
=
make
(
chan
*
sessionutil
.
SessionEvent
)
...
...
@@ -641,12 +658,12 @@ func TestService_WatchServices(t *testing.T) {
go
func
()
{
svr
.
watchService
(
ctx
)
flag
=
true
sig
nal
<-
struct
{}{}
sig
Done
<-
struct
{}{}
}()
ech
<-
nil
cancel
()
<-
sig
nal
<-
sig
Done
assert
.
True
(
t
,
flag
)
}
...
...
internal/indexcoord/index_coord.go
浏览文件 @
c7f55c2e
...
...
@@ -200,6 +200,7 @@ func (i *IndexCoord) Init() error {
}
log
.
Debug
(
"IndexCoord"
,
zap
.
Int
(
"IndexNode number"
,
len
(
i
.
nodeManager
.
nodeClients
)))
// TODO silverxia add Rewatch logic
i
.
eventChan
=
i
.
session
.
WatchServices
(
typeutil
.
IndexNodeRole
,
revision
+
1
,
nil
)
nodeTasks
:=
i
.
metaTable
.
GetNodeTaskStats
()
for
nodeID
,
taskNum
:=
range
nodeTasks
{
...
...
@@ -758,7 +759,12 @@ func (i *IndexCoord) watchNodeLoop() {
return
case
event
,
ok
:=
<-
i
.
eventChan
:
if
!
ok
{
//TODO silverxia add retry
// ErrCompacted is handled inside SessionWatcher
log
.
Error
(
"Session Watcher channel closed"
,
zap
.
Int64
(
"server id"
,
i
.
session
.
ServerID
))
go
i
.
Stop
()
if
i
.
session
.
TriggerKill
{
syscall
.
Kill
(
syscall
.
Getpid
(),
syscall
.
SIGINT
)
}
return
}
log
.
Debug
(
"IndexCoord watchNodeLoop event updated"
)
...
...
internal/indexcoord/index_coord_test.go
浏览文件 @
c7f55c2e
...
...
@@ -19,7 +19,10 @@ package indexcoord
import
(
"context"
"math/rand"
"os"
"os/signal"
"sync"
"syscall"
"testing"
"time"
...
...
@@ -227,21 +230,37 @@ func TestIndexCoord_watchNodeLoop(t *testing.T) {
loopWg
:
sync
.
WaitGroup
{},
loopCtx
:
context
.
Background
(),
eventChan
:
ech
,
session
:
&
sessionutil
.
Session
{
TriggerKill
:
true
,
ServerID
:
0
,
},
}
in
.
loopWg
.
Add
(
1
)
flag
:=
false
signal
:=
make
(
chan
struct
{},
1
)
closed
:=
false
sigDone
:=
make
(
chan
struct
{},
1
)
sigQuit
:=
make
(
chan
struct
{},
1
)
sc
:=
make
(
chan
os
.
Signal
,
1
)
signal
.
Notify
(
sc
,
syscall
.
SIGINT
)
defer
signal
.
Reset
(
syscall
.
SIGINT
)
go
func
()
{
in
.
watchNodeLoop
()
flag
=
true
signal
<-
struct
{}{}
sigDone
<-
struct
{}{}
}()
go
func
()
{
<-
sc
closed
=
true
sigQuit
<-
struct
{}{}
}()
close
(
ech
)
<-
signal
<-
sigDone
<-
sigQuit
assert
.
True
(
t
,
flag
)
assert
.
True
(
t
,
closed
)
}
func
TestIndexCoord_GetComponentStates
(
t
*
testing
.
T
)
{
...
...
internal/querycoord/query_coord.go
浏览文件 @
c7f55c2e
...
...
@@ -367,13 +367,24 @@ func (qc *QueryCoord) watchNodeLoop() {
log
.
Debug
(
"start a loadBalance task"
,
zap
.
Any
(
"task"
,
loadBalanceTask
))
}
// TODO silverxia add Rewatch logic
qc
.
eventChan
=
qc
.
session
.
WatchServices
(
typeutil
.
QueryNodeRole
,
qc
.
cluster
.
getSessionVersion
()
+
1
,
nil
)
qc
.
handleNodeEvent
(
ctx
)
}
func
(
qc
*
QueryCoord
)
handleNodeEvent
(
ctx
context
.
Context
)
{
for
{
select
{
case
<-
ctx
.
Done
()
:
return
case
event
,
ok
:=
<-
qc
.
eventChan
:
if
!
ok
{
// ErrCompacted is handled inside SessionWatcher
log
.
Error
(
"Session Watcher channel closed"
,
zap
.
Int64
(
"server id"
,
qc
.
session
.
ServerID
))
go
qc
.
Stop
()
if
qc
.
session
.
TriggerKill
{
syscall
.
Kill
(
syscall
.
Getpid
(),
syscall
.
SIGINT
)
}
return
}
switch
event
.
EventType
{
...
...
internal/querycoord/query_coord_test.go
浏览文件 @
c7f55c2e
...
...
@@ -22,7 +22,9 @@ import (
"fmt"
"math/rand"
"os"
"os/signal"
"strconv"
"syscall"
"testing"
"time"
...
...
@@ -243,6 +245,44 @@ func TestWatchNodeLoop(t *testing.T) {
})
}
func
TestHandleNodeEventClosed
(
t
*
testing
.
T
)
{
ech
:=
make
(
chan
*
sessionutil
.
SessionEvent
)
qc
:=
&
QueryCoord
{
eventChan
:
ech
,
session
:
&
sessionutil
.
Session
{
TriggerKill
:
true
,
ServerID
:
0
,
},
}
flag
:=
false
closed
:=
false
sigDone
:=
make
(
chan
struct
{},
1
)
sigQuit
:=
make
(
chan
struct
{},
1
)
sc
:=
make
(
chan
os
.
Signal
,
1
)
signal
.
Notify
(
sc
,
syscall
.
SIGINT
)
defer
signal
.
Reset
(
syscall
.
SIGINT
)
go
func
()
{
qc
.
handleNodeEvent
(
context
.
Background
())
flag
=
true
sigDone
<-
struct
{}{}
}()
go
func
()
{
<-
sc
closed
=
true
sigQuit
<-
struct
{}{}
}()
close
(
ech
)
<-
sigDone
<-
sigQuit
assert
.
True
(
t
,
flag
)
assert
.
True
(
t
,
closed
)
}
func
TestHandoffSegmentLoop
(
t
*
testing
.
T
)
{
refreshParams
()
baseCtx
:=
context
.
Background
()
...
...
internal/querynode/query_node.go
浏览文件 @
c7f55c2e
...
...
@@ -217,7 +217,13 @@ func (node *QueryNode) watchService(ctx context.Context) {
return
case
event
,
ok
:=
<-
node
.
eventCh
:
if
!
ok
{
//TODO add retry logic
// ErrCompacted is handled inside SessionWatcher
log
.
Error
(
"Session Watcher channel closed"
,
zap
.
Int64
(
"server id"
,
node
.
session
.
ServerID
))
// need to call stop in separate goroutine
go
node
.
Stop
()
if
node
.
session
.
TriggerKill
{
syscall
.
Kill
(
syscall
.
Getpid
(),
syscall
.
SIGINT
)
}
return
}
if
err
:=
node
.
handleSessionEvent
(
ctx
,
event
);
err
!=
nil
{
...
...
internal/querynode/query_node_test.go
浏览文件 @
c7f55c2e
...
...
@@ -20,8 +20,10 @@ import (
"context"
"math/rand"
"os"
"os/signal"
"strconv"
"sync"
"syscall"
"testing"
"time"
...
...
@@ -36,6 +38,7 @@ import (
"github.com/milvus-io/milvus/internal/proto/schemapb"
"github.com/milvus-io/milvus/internal/types"
"github.com/milvus-io/milvus/internal/util/etcd"
"github.com/milvus-io/milvus/internal/util/sessionutil"
)
// mock of query coordinator client
...
...
@@ -425,3 +428,80 @@ func TestQueryNode_watchChangeInfo(t *testing.T) {
})
wg
.
Wait
()
}
func
TestQueryNode_watchService
(
t
*
testing
.
T
)
{
t
.
Run
(
"watch channel closed"
,
func
(
t
*
testing
.
T
)
{
ech
:=
make
(
chan
*
sessionutil
.
SessionEvent
)
qn
:=
&
QueryNode
{
session
:
&
sessionutil
.
Session
{
TriggerKill
:
true
,
ServerID
:
0
,
},
wg
:
sync
.
WaitGroup
{},
eventCh
:
ech
,
queryNodeLoopCancel
:
func
()
{},
}
flag
:=
false
closed
:=
false
sigDone
:=
make
(
chan
struct
{},
1
)
sigQuit
:=
make
(
chan
struct
{},
1
)
sc
:=
make
(
chan
os
.
Signal
,
1
)
signal
.
Notify
(
sc
,
syscall
.
SIGINT
)
defer
signal
.
Reset
(
syscall
.
SIGINT
)
qn
.
wg
.
Add
(
1
)
go
func
()
{
qn
.
watchService
(
context
.
Background
())
flag
=
true
sigDone
<-
struct
{}{}
}()
go
func
()
{
<-
sc
closed
=
true
sigQuit
<-
struct
{}{}
}()
close
(
ech
)
<-
sigDone
<-
sigQuit
assert
.
True
(
t
,
flag
)
assert
.
True
(
t
,
closed
)
})
t
.
Run
(
"context done"
,
func
(
t
*
testing
.
T
)
{
ech
:=
make
(
chan
*
sessionutil
.
SessionEvent
)
qn
:=
&
QueryNode
{
session
:
&
sessionutil
.
Session
{
TriggerKill
:
true
,
ServerID
:
0
,
},
wg
:
sync
.
WaitGroup
{},
eventCh
:
ech
,
}
flag
:=
false
sigDone
:=
make
(
chan
struct
{},
1
)
sc
:=
make
(
chan
os
.
Signal
,
1
)
signal
.
Notify
(
sc
,
syscall
.
SIGINT
)
defer
signal
.
Reset
(
syscall
.
SIGINT
)
qn
.
wg
.
Add
(
1
)
ctx
,
cancel
:=
context
.
WithCancel
(
context
.
Background
())
go
func
()
{
qn
.
watchService
(
ctx
)
flag
=
true
sigDone
<-
struct
{}{}
}()
assert
.
False
(
t
,
flag
)
cancel
()
<-
sigDone
assert
.
True
(
t
,
flag
)
})
}
internal/util/sessionutil/session_util.go
浏览文件 @
c7f55c2e
...
...
@@ -403,21 +403,18 @@ func (w *sessionWatcher) handleWatchErr(err error) error {
return
err
}
// rewatch is nil, no logic to handle
if
w
.
rewatch
==
nil
{
log
.
Warn
(
"Watch service with ErrCompacted but no rewatch logic provided"
)
close
(
w
.
eventCh
)
return
err
}
sessions
,
revision
,
err
:=
w
.
s
.
GetSessions
(
w
.
prefix
)
if
err
!=
nil
{
log
.
Warn
(
"GetSession before rewatch failed"
,
zap
.
String
(
"prefix"
,
w
.
prefix
),
zap
.
Error
(
err
))
close
(
w
.
eventCh
)
return
err
}
err
=
w
.
rewatch
(
sessions
)
// rewatch is nil, no logic to handle
if
w
.
rewatch
==
nil
{
log
.
Warn
(
"Watch service with ErrCompacted but no rewatch logic provided"
)
}
else
{
err
=
w
.
rewatch
(
sessions
)
}
if
err
!=
nil
{
log
.
Warn
(
"WatchServices rewatch failed"
,
zap
.
String
(
"prefix"
,
w
.
prefix
),
zap
.
Error
(
err
))
close
(
w
.
eventCh
)
...
...
internal/util/sessionutil/session_util_test.go
浏览文件 @
c7f55c2e
...
...
@@ -18,7 +18,6 @@ import (
"github.com/stretchr/testify/require"
"go.etcd.io/etcd/api/v3/mvccpb"
v3rpc
"go.etcd.io/etcd/api/v3/v3rpc/rpctypes"
clientv3
"go.etcd.io/etcd/client/v3"
)
...
...
@@ -303,8 +302,7 @@ func TestWatcherHandleWatchResp(t *testing.T) {
CompactRevision
:
1
,
}
err
:=
w
.
handleWatchResponse
(
wresp
)
assert
.
Error
(
t
,
err
)
assert
.
Equal
(
t
,
v3rpc
.
ErrCompacted
,
err
)
assert
.
NoError
(
t
,
err
)
})
t
.
Run
(
"err compacted resp, valid Rewatch"
,
func
(
t
*
testing
.
T
)
{
...
...
@@ -327,31 +325,33 @@ func TestWatcherHandleWatchResp(t *testing.T) {
assert
.
Error
(
t
,
err
)
})
t
.
Run
(
"err handled but list failed"
,
func
(
t
*
testing
.
T
)
{
s
:=
NewSession
(
ctx
,
"/by-dev/session-ut"
,
etcdCli
)
s
.
etcdCli
.
Close
()
t
.
Run
(
"err handled but rewatch failed"
,
func
(
t
*
testing
.
T
)
{
w
:=
getWatcher
(
s
,
func
(
sessions
map
[
string
]
*
Session
)
error
{
return
nil
return
errors
.
New
(
"mocked"
)
})
wresp
:=
clientv3
.
WatchResponse
{
CompactRevision
:
1
,
}
err
:=
w
.
handleWatchResponse
(
wresp
)
t
.
Log
(
err
.
Error
())
err
=
w
.
handleWatchResponse
(
wresp
)
assert
.
Error
(
t
,
err
)
})
t
.
Run
(
"err handled but rewatch failed"
,
func
(
t
*
testing
.
T
)
{
t
.
Run
(
"err handled but list failed"
,
func
(
t
*
testing
.
T
)
{
s
:=
NewSession
(
ctx
,
"/by-dev/session-ut"
,
etcdCli
)
s
.
etcdCli
.
Close
()
w
:=
getWatcher
(
s
,
func
(
sessions
map
[
string
]
*
Session
)
error
{
return
errors
.
New
(
"mocked"
)
return
nil
})
wresp
:=
clientv3
.
WatchResponse
{
CompactRevision
:
1
,
}
err
:=
w
.
handleWatchResponse
(
wresp
)
err
=
w
.
handleWatchResponse
(
wresp
)
assert
.
Error
(
t
,
err
)
})
}
func
TestSessionRevoke
(
t
*
testing
.
T
)
{
...
...
编辑
预览
Markdown
is supported
0%
请重试
或
添加新附件
.
添加附件
取消
You are about to add
0
people
to the discussion. Proceed with caution.
先完成此消息的编辑!
取消
想要评论请
注册
或
登录