From e81eb56b92a91787911fcc486d4176427488cce9 Mon Sep 17 00:00:00 2001 From: "yihao.dai" Date: Fri, 9 Jun 2023 11:26:35 +0800 Subject: [PATCH] Add metric of waiting for tSafe latency (#24765) Signed-off-by: bigsheeper --- internal/querynodev2/delegator/delegator.go | 10 ++++++++++ pkg/metrics/querynode_metrics.go | 13 +++++++++++++ 2 files changed, 23 insertions(+) diff --git a/internal/querynodev2/delegator/delegator.go b/internal/querynodev2/delegator/delegator.go index 62447a062..63076d16f 100644 --- a/internal/querynodev2/delegator/delegator.go +++ b/internal/querynodev2/delegator/delegator.go @@ -38,10 +38,12 @@ import ( "github.com/milvus-io/milvus/internal/querynodev2/segments" "github.com/milvus-io/milvus/internal/querynodev2/tsafe" "github.com/milvus-io/milvus/pkg/log" + "github.com/milvus-io/milvus/pkg/metrics" "github.com/milvus-io/milvus/pkg/mq/msgstream" "github.com/milvus-io/milvus/pkg/util/funcutil" "github.com/milvus-io/milvus/pkg/util/merr" "github.com/milvus-io/milvus/pkg/util/paramtable" + "github.com/milvus-io/milvus/pkg/util/timerecord" "github.com/milvus-io/milvus/pkg/util/tsoutil" ) @@ -215,11 +217,15 @@ func (sd *shardDelegator) Search(ctx context.Context, req *querypb.SearchRequest } // wait tsafe + waitTr := timerecord.NewTimeRecorder("wait tSafe") err := sd.waitTSafe(ctx, req.Req.GuaranteeTimestamp) if err != nil { log.Warn("delegator search failed to wait tsafe", zap.Error(err)) return nil, err } + metrics.QueryNodeSQLatencyWaitTSafe.WithLabelValues( + fmt.Sprint(paramtable.GetNodeID()), metrics.SearchLabel). + Observe(float64(waitTr.ElapseSpan().Milliseconds())) sealed, growing, version := sd.distribution.GetCurrent(req.GetReq().GetPartitionIDs()...) defer sd.distribution.FinishUsage(version) @@ -270,11 +276,15 @@ func (sd *shardDelegator) Query(ctx context.Context, req *querypb.QueryRequest) } // wait tsafe + waitTr := timerecord.NewTimeRecorder("wait tSafe") err := sd.waitTSafe(ctx, req.Req.GuaranteeTimestamp) if err != nil { log.Warn("delegator query failed to wait tsafe", zap.Error(err)) return nil, err } + metrics.QueryNodeSQLatencyWaitTSafe.WithLabelValues( + fmt.Sprint(paramtable.GetNodeID()), metrics.QueryLabel). + Observe(float64(waitTr.ElapseSpan().Milliseconds())) sealed, growing, version := sd.distribution.GetCurrent(req.GetReq().GetPartitionIDs()...) defer sd.distribution.FinishUsage(version) diff --git a/pkg/metrics/querynode_metrics.go b/pkg/metrics/querynode_metrics.go index b0cf7bc0b..10e9cdfd9 100644 --- a/pkg/metrics/querynode_metrics.go +++ b/pkg/metrics/querynode_metrics.go @@ -128,6 +128,18 @@ var ( requestScope, }) + QueryNodeSQLatencyWaitTSafe = prometheus.NewHistogramVec( + prometheus.HistogramOpts{ + Namespace: milvusNamespace, + Subsystem: typeutil.QueryNodeRole, + Name: "sq_wait_tsafe_latency", + Help: "latency of search or query to wait for tsafe", + Buckets: buckets, + }, []string{ + nodeIDLabelName, + queryTypeLabelName, + }) + QueryNodeSQLatencyInQueue = prometheus.NewHistogramVec( prometheus.HistogramOpts{ Namespace: milvusNamespace, @@ -356,6 +368,7 @@ func RegisterQueryNode(registry *prometheus.Registry) { registry.MustRegister(QueryNodeNumDeltaChannels) registry.MustRegister(QueryNodeSQCount) registry.MustRegister(QueryNodeSQReqLatency) + registry.MustRegister(QueryNodeSQLatencyWaitTSafe) registry.MustRegister(QueryNodeSQLatencyInQueue) registry.MustRegister(QueryNodeSQSegmentLatency) registry.MustRegister(QueryNodeSQSegmentLatencyInCore) -- GitLab