未验证 提交 a68cec85 编写于 作者: Y yah01 提交者: GitHub

Refine QueryCoordV2 metrics (#20461)

Signed-off-by: Nyah01 <yang.cen@zilliz.com>
Signed-off-by: Nyah01 <yang.cen@zilliz.com>
上级 6bff0b66
...@@ -77,29 +77,12 @@ var ( ...@@ -77,29 +77,12 @@ var (
Buckets: []float64{0, 5, 10, 20, 40, 100, 200, 400, 1000, 10000}, Buckets: []float64{0, 5, 10, 20, 40, 100, 200, 400, 1000, 10000},
}, []string{}) }, []string{})
QueryCoordNumChildTasks = prometheus.NewGaugeVec( QueryCoordTaskNum = prometheus.NewGaugeVec(
prometheus.GaugeOpts{ prometheus.GaugeOpts{
Namespace: milvusNamespace, Namespace: milvusNamespace,
Subsystem: typeutil.QueryCoordRole, Subsystem: typeutil.QueryCoordRole,
Name: "child_task_num", Name: "task_num",
Help: "number of child tasks in QueryCoord's queue", Help: "the number of tasks in QueryCoord's scheduler",
}, []string{})
QueryCoordNumParentTasks = prometheus.NewGaugeVec(
prometheus.GaugeOpts{
Namespace: milvusNamespace,
Subsystem: typeutil.QueryCoordRole,
Name: "parent_task_num",
Help: "number of parent tasks in QueryCoord's queue",
}, []string{})
QueryCoordChildTaskLatency = prometheus.NewHistogramVec(
prometheus.HistogramOpts{
Namespace: milvusNamespace,
Subsystem: typeutil.QueryCoordRole,
Name: "child_task_latency",
Help: "latency of child tasks",
Buckets: buckets,
}, []string{}) }, []string{})
QueryCoordNumQueryNodes = prometheus.NewGaugeVec( QueryCoordNumQueryNodes = prometheus.NewGaugeVec(
...@@ -119,8 +102,6 @@ func RegisterQueryCoord(registry *prometheus.Registry) { ...@@ -119,8 +102,6 @@ func RegisterQueryCoord(registry *prometheus.Registry) {
registry.MustRegister(QueryCoordReleaseCount) registry.MustRegister(QueryCoordReleaseCount)
registry.MustRegister(QueryCoordLoadLatency) registry.MustRegister(QueryCoordLoadLatency)
registry.MustRegister(QueryCoordReleaseLatency) registry.MustRegister(QueryCoordReleaseLatency)
registry.MustRegister(QueryCoordNumChildTasks) registry.MustRegister(QueryCoordTaskNum)
registry.MustRegister(QueryCoordNumParentTasks)
registry.MustRegister(QueryCoordChildTaskLatency)
registry.MustRegister(QueryCoordNumQueryNodes) registry.MustRegister(QueryCoordNumQueryNodes)
} }
...@@ -25,6 +25,7 @@ import ( ...@@ -25,6 +25,7 @@ import (
"go.uber.org/zap" "go.uber.org/zap"
"github.com/milvus-io/milvus/internal/log" "github.com/milvus-io/milvus/internal/log"
"github.com/milvus-io/milvus/internal/metrics"
"github.com/milvus-io/milvus/internal/proto/querypb" "github.com/milvus-io/milvus/internal/proto/querypb"
"github.com/milvus-io/milvus/internal/querycoordv2/meta" "github.com/milvus-io/milvus/internal/querycoordv2/meta"
"github.com/milvus-io/milvus/internal/querycoordv2/session" "github.com/milvus-io/milvus/internal/querycoordv2/session"
...@@ -227,6 +228,7 @@ func (job *LoadCollectionJob) Execute() error { ...@@ -227,6 +228,7 @@ func (job *LoadCollectionJob) Execute() error {
return utils.WrapError(msg, err) return utils.WrapError(msg, err)
} }
metrics.QueryCoordNumCollections.WithLabelValues().Inc()
return nil return nil
} }
...@@ -285,6 +287,7 @@ func (job *ReleaseCollectionJob) Execute() error { ...@@ -285,6 +287,7 @@ func (job *ReleaseCollectionJob) Execute() error {
job.targetMgr.RemoveCollection(req.GetCollectionID()) job.targetMgr.RemoveCollection(req.GetCollectionID())
waitCollectionReleased(job.dist, req.GetCollectionID()) waitCollectionReleased(job.dist, req.GetCollectionID())
metrics.QueryCoordNumCollections.WithLabelValues().Dec()
return nil return nil
} }
...@@ -422,6 +425,7 @@ func (job *LoadPartitionJob) Execute() error { ...@@ -422,6 +425,7 @@ func (job *LoadPartitionJob) Execute() error {
return utils.WrapError(msg, err) return utils.WrapError(msg, err)
} }
metrics.QueryCoordNumCollections.WithLabelValues().Inc()
return nil return nil
} }
...@@ -510,5 +514,6 @@ func (job *ReleasePartitionJob) Execute() error { ...@@ -510,5 +514,6 @@ func (job *ReleasePartitionJob) Execute() error {
job.targetMgr.RemovePartition(req.GetCollectionID(), toRelease...) job.targetMgr.RemovePartition(req.GetCollectionID(), toRelease...)
waitCollectionReleased(job.dist, req.GetCollectionID(), toRelease...) waitCollectionReleased(job.dist, req.GetCollectionID(), toRelease...)
} }
metrics.QueryCoordNumCollections.WithLabelValues().Dec()
return nil return nil
} }
...@@ -27,6 +27,7 @@ import ( ...@@ -27,6 +27,7 @@ import (
"syscall" "syscall"
"time" "time"
"github.com/milvus-io/milvus/internal/metrics"
"github.com/milvus-io/milvus/internal/util/timerecord" "github.com/milvus-io/milvus/internal/util/timerecord"
"github.com/milvus-io/milvus-proto/go-api/commonpb" "github.com/milvus-io/milvus-proto/go-api/commonpb"
...@@ -253,6 +254,8 @@ func (s *Server) initMeta() error { ...@@ -253,6 +254,8 @@ func (s *Server) initMeta() error {
log.Error("failed to recover collections") log.Error("failed to recover collections")
return err return err
} }
metrics.QueryCoordNumCollections.WithLabelValues().Set(float64(len(s.meta.GetAll())))
err = s.meta.ReplicaManager.Recover() err = s.meta.ReplicaManager.Recover()
if err != nil { if err != nil {
log.Error("failed to recover replicas") log.Error("failed to recover replicas")
......
...@@ -16,7 +16,11 @@ ...@@ -16,7 +16,11 @@
package session package session
import "sync" import (
"sync"
"github.com/milvus-io/milvus/internal/metrics"
)
type Manager interface { type Manager interface {
Add(node *NodeInfo) Add(node *NodeInfo)
...@@ -34,12 +38,14 @@ func (m *NodeManager) Add(node *NodeInfo) { ...@@ -34,12 +38,14 @@ func (m *NodeManager) Add(node *NodeInfo) {
m.mu.Lock() m.mu.Lock()
defer m.mu.Unlock() defer m.mu.Unlock()
m.nodes[node.ID()] = node m.nodes[node.ID()] = node
metrics.QueryCoordNumQueryNodes.WithLabelValues().Set(float64(len(m.nodes)))
} }
func (m *NodeManager) Remove(nodeID int64) { func (m *NodeManager) Remove(nodeID int64) {
m.mu.Lock() m.mu.Lock()
defer m.mu.Unlock() defer m.mu.Unlock()
delete(m.nodes, nodeID) delete(m.nodes, nodeID)
metrics.QueryCoordNumQueryNodes.WithLabelValues().Set(float64(len(m.nodes)))
} }
func (m *NodeManager) Get(nodeID int64) *NodeInfo { func (m *NodeManager) Get(nodeID int64) *NodeInfo {
......
...@@ -25,6 +25,7 @@ import ( ...@@ -25,6 +25,7 @@ import (
"go.uber.org/zap" "go.uber.org/zap"
"github.com/milvus-io/milvus/internal/log" "github.com/milvus-io/milvus/internal/log"
"github.com/milvus-io/milvus/internal/metrics"
"github.com/milvus-io/milvus/internal/proto/datapb" "github.com/milvus-io/milvus/internal/proto/datapb"
"github.com/milvus-io/milvus/internal/proto/querypb" "github.com/milvus-io/milvus/internal/proto/querypb"
"github.com/milvus-io/milvus/internal/querycoordv2/meta" "github.com/milvus-io/milvus/internal/querycoordv2/meta"
...@@ -232,6 +233,7 @@ func (scheduler *taskScheduler) Add(task Task) error { ...@@ -232,6 +233,7 @@ func (scheduler *taskScheduler) Add(task Task) error {
log.Warn("failed to add task", zap.String("task", task.String())) log.Warn("failed to add task", zap.String("task", task.String()))
return nil return nil
} }
metrics.QueryCoordTaskNum.WithLabelValues().Set(float64(scheduler.tasks.Len()))
log.Info("task added", zap.String("task", task.String())) log.Info("task added", zap.String("task", task.String()))
return nil return nil
} }
...@@ -582,6 +584,7 @@ func (scheduler *taskScheduler) remove(task Task) { ...@@ -582,6 +584,7 @@ func (scheduler *taskScheduler) remove(task Task) {
log = log.With(zap.String("channel", task.Channel())) log = log.With(zap.String("channel", task.Channel()))
} }
metrics.QueryCoordTaskNum.WithLabelValues().Set(float64(scheduler.tasks.Len()))
log.Debug("task removed") log.Debug("task removed")
} }
......
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册