diff --git a/internal/querycoordv2/job/job_load.go b/internal/querycoordv2/job/job_load.go index 51598171ce55f2525e5d0a9c5415273134580cd1..4031b76dfe09fcf4b9ae4334a8038a320dedc0fd 100644 --- a/internal/querycoordv2/job/job_load.go +++ b/internal/querycoordv2/job/job_load.go @@ -204,9 +204,6 @@ func (job *LoadCollectionJob) Execute() error { return utils.WrapError(msg, err) } - if !colExisted { - metrics.QueryCoordNumCollections.WithLabelValues().Inc() - } metrics.QueryCoordNumPartitions.WithLabelValues().Add(float64(len(partitions))) return nil } @@ -380,7 +377,6 @@ func (job *LoadPartitionJob) Execute() error { log.Error(msg, zap.Error(err)) return utils.WrapError(msg, err) } - metrics.QueryCoordNumCollections.WithLabelValues().Inc() } else { // collection exists, put partitions only err = job.meta.CollectionManager.PutPartition(partitions...) if err != nil { diff --git a/internal/querycoordv2/meta/collection_manager.go b/internal/querycoordv2/meta/collection_manager.go index f6194dd952f2de65513e113b9e5bf15be5b0e1b8..8f40f6b4275ebcd3faee55d64f30fb0de2006083 100644 --- a/internal/querycoordv2/meta/collection_manager.go +++ b/internal/querycoordv2/meta/collection_manager.go @@ -491,6 +491,11 @@ func (m *CollectionManager) UpdateLoadPercent(partitionID int64, loadPercent int saveCollection = true newCollection.Status = querypb.LoadStatus_Loaded elapsed := time.Since(newCollection.CreatedAt) + + // TODO: what if part of the collection has been unloaded? Now we decrease the metric only after + // `ReleaseCollection` is triggered. Maybe it's hard to make this metric really accurate. + metrics.QueryCoordNumCollections.WithLabelValues().Inc() + metrics.QueryCoordLoadLatency.WithLabelValues().Observe(float64(elapsed.Milliseconds())) } return collectionPercent, m.putCollection(saveCollection, newCollection) diff --git a/internal/querycoordv2/server.go b/internal/querycoordv2/server.go index 8a36f86fd395882ba35a85f8388694ba47ed0345..daf0b03d71bef4bb7340de315dd11fe0b5ac780c 100644 --- a/internal/querycoordv2/server.go +++ b/internal/querycoordv2/server.go @@ -311,7 +311,10 @@ func (s *Server) initMeta() error { } collections := s.meta.GetAll() log.Info("recovering collections...", zap.Int64s("collections", collections)) - metrics.QueryCoordNumCollections.WithLabelValues().Set(float64(len(collections))) + + // We really update the metric after observers think the collection loaded. + metrics.QueryCoordNumCollections.WithLabelValues().Set(0) + metrics.QueryCoordNumPartitions.WithLabelValues().Set(float64(len(s.meta.GetAllPartitions()))) err = s.meta.ReplicaManager.Recover(collections)