未验证 提交 fc42ee5b 编写于 作者: T Ten Thousand Leaves 提交者: GitHub

Fix # of rows of recovering segment (#18736)

Signed-off-by: NYuchen Gao <yuchen.gao@zilliz.com>
Signed-off-by: NYuchen Gao <yuchen.gao@zilliz.com>
上级 516fd928
......@@ -42,13 +42,13 @@ type SegmentInfo struct {
}
// NewSegmentInfo create `SegmentInfo` wrapper from `datapb.SegmentInfo`
// assign current rows to 0 and pre-allocate `allocations` slice
// assign current rows to last checkpoint and pre-allocate `allocations` slice
// Note that the allocation information is not preserved,
// the worst case scenario is to have a segment with twice size we expects
func NewSegmentInfo(info *datapb.SegmentInfo) *SegmentInfo {
return &SegmentInfo{
SegmentInfo: info,
currRows: 0,
currRows: info.GetNumOfRows(),
allocations: make([]*Allocation, 0, 16),
lastFlushTime: time.Now().Add(-1 * flushInterval),
}
......
......@@ -308,7 +308,7 @@ func (s *Server) Start() error {
// data from all DataNode.
// This will prevent DataCoord from missing out any important segment stats
// data while offline.
log.Info("DataNode (re)starts successfully and re-collecting segment stats from DataNodes")
log.Info("DataCoord (re)starts successfully and re-collecting segment stats from DataNodes")
s.reCollectSegmentStats(s.ctx)
return nil
......@@ -621,6 +621,16 @@ func (s *Server) handleTimetickMessage(ctx context.Context, ttMsg *msgstream.Dat
func (s *Server) updateSegmentStatistics(stats []*datapb.SegmentStats) {
for _, stat := range stats {
// Log if # of rows is updated.
if s.meta.GetAllSegment(stat.GetSegmentID()) != nil &&
s.meta.GetAllSegment(stat.GetSegmentID()).GetNumOfRows() != stat.GetNumRows() {
log.Debug("Updating segment number of rows",
zap.Int64("segment ID", stat.GetSegmentID()),
zap.Int64("old value", s.meta.GetAllSegment(stat.GetSegmentID()).GetNumOfRows()),
zap.Int64("new value", stat.GetNumRows()),
zap.Any("seg info", s.meta.GetSegment(stat.GetSegmentID())),
)
}
s.meta.SetCurrentRows(stat.GetSegmentID(), stat.GetNumRows())
}
}
......@@ -958,7 +968,7 @@ func (s *Server) reCollectSegmentStats(ctx context.Context) {
log.Error("null channel manager found, which should NOT happen in non-testing environment")
return
}
nodes := s.channelManager.store.GetNodes()
nodes := s.sessionManager.getLiveNodeIDs()
log.Info("re-collecting segment stats from DataNodes",
zap.Int64s("DataNode IDs", nodes))
for _, node := range nodes {
......
......@@ -95,6 +95,18 @@ func (c *SessionManager) DeleteSession(node *NodeInfo) {
}
}
// getLiveNodeIDs returns IDs of all live DataNodes.
func (c *SessionManager) getLiveNodeIDs() []int64 {
c.sessions.RLock()
defer c.sessions.RUnlock()
ret := make([]int64, 0, len(c.sessions.data))
for id := range c.sessions.data {
ret = append(ret, id)
}
return ret
}
// GetSessions gets all node sessions
func (c *SessionManager) GetSessions() []*Session {
c.sessions.RLock()
......
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册