未验证 提交 7b39873a 编写于 作者: Y yah01 提交者: GitHub

limit the frequency of GetMetrics() log (#21514)

Signed-off-by: Nyah01 <yang.cen@zilliz.com>
上级 bf3c0215
......@@ -860,7 +860,7 @@ func (s *Server) ShowConfigurations(ctx context.Context, req *internalpb.ShowCon
func (s *Server) GetMetrics(ctx context.Context, req *milvuspb.GetMetricsRequest) (*milvuspb.GetMetricsResponse, error) {
if s.isClosed() {
log.Warn("DataCoord.GetMetrics failed",
zap.Int64("node_id", paramtable.GetNodeID()),
zap.Int64("nodeID", paramtable.GetNodeID()),
zap.String("req", req.Request),
zap.Error(errDataCoordIsUnhealthy(paramtable.GetNodeID())))
......@@ -877,7 +877,7 @@ func (s *Server) GetMetrics(ctx context.Context, req *milvuspb.GetMetricsRequest
metricType, err := metricsinfo.ParseMetricType(req.Request)
if err != nil {
log.Warn("DataCoord.GetMetrics failed to parse metric type",
zap.Int64("node_id", paramtable.GetNodeID()),
zap.Int64("nodeID", paramtable.GetNodeID()),
zap.String("req", req.Request),
zap.Error(err))
......@@ -903,10 +903,10 @@ func (s *Server) GetMetrics(ctx context.Context, req *milvuspb.GetMetricsRequest
}, nil
}
log.Debug("DataCoord.GetMetrics",
zap.Int64("node_id", paramtable.GetNodeID()),
log.RatedDebug(60, "DataCoord.GetMetrics",
zap.Int64("nodeID", paramtable.GetNodeID()),
zap.String("req", req.Request),
zap.String("metric_type", metricType),
zap.String("metricType", metricType),
zap.Any("metrics", metrics), // TODO(dragondriver): necessary? may be very large
zap.Error(err))
......@@ -914,9 +914,9 @@ func (s *Server) GetMetrics(ctx context.Context, req *milvuspb.GetMetricsRequest
}
log.RatedWarn(60.0, "DataCoord.GetMetrics failed, request metric type is not implemented yet",
zap.Int64("node_id", paramtable.GetNodeID()),
zap.Int64("nodeID", paramtable.GetNodeID()),
zap.String("req", req.Request),
zap.String("metric_type", metricType))
zap.String("metricType", metricType))
return &milvuspb.GetMetricsResponse{
ComponentName: metricsinfo.ConstructComponentName(typeutil.DataCoordRole, paramtable.GetNodeID()),
......
......@@ -240,7 +240,7 @@ func (node *DataNode) ShowConfigurations(ctx context.Context, req *internalpb.Sh
func (node *DataNode) GetMetrics(ctx context.Context, req *milvuspb.GetMetricsRequest) (*milvuspb.GetMetricsResponse, error) {
if !node.isHealthy() {
log.Warn("DataNode.GetMetrics failed",
zap.Int64("node_id", paramtable.GetNodeID()),
zap.Int64("nodeID", paramtable.GetNodeID()),
zap.String("req", req.Request),
zap.Error(errDataNodeIsUnhealthy(paramtable.GetNodeID())))
......@@ -255,7 +255,7 @@ func (node *DataNode) GetMetrics(ctx context.Context, req *milvuspb.GetMetricsRe
metricType, err := metricsinfo.ParseMetricType(req.Request)
if err != nil {
log.Warn("DataNode.GetMetrics failed to parse metric type",
zap.Int64("node_id", paramtable.GetNodeID()),
zap.Int64("nodeID", paramtable.GetNodeID()),
zap.String("req", req.Request),
zap.Error(err))
......@@ -282,8 +282,8 @@ func (node *DataNode) GetMetrics(ctx context.Context, req *milvuspb.GetMetricsRe
return systemInfoMetrics, nil
}
log.Debug("DataNode.GetMetrics failed, request metric type is not implemented yet",
zap.Int64("node_id", paramtable.GetNodeID()),
log.RatedWarn(60, "DataNode.GetMetrics failed, request metric type is not implemented yet",
zap.Int64("nodeID", paramtable.GetNodeID()),
zap.String("req", req.Request),
zap.String("metric_type", metricType))
......
......@@ -967,7 +967,7 @@ func (i *IndexCoord) ShowConfigurations(ctx context.Context, req *internalpb.Sho
// GetMetrics gets the metrics info of IndexCoord.
func (i *IndexCoord) GetMetrics(ctx context.Context, req *milvuspb.GetMetricsRequest) (*milvuspb.GetMetricsResponse, error) {
log.RatedInfo(5, "IndexCoord.GetMetrics", zap.Int64("nodeID", paramtable.GetNodeID()), zap.String("req", req.Request))
log.RatedInfo(60, "IndexCoord.GetMetrics", zap.Int64("nodeID", paramtable.GetNodeID()), zap.String("req", req.Request))
if !i.isHealthy() {
log.Warn(msgIndexCoordIsUnhealthy(paramtable.GetNodeID()))
......@@ -984,7 +984,7 @@ func (i *IndexCoord) GetMetrics(ctx context.Context, req *milvuspb.GetMetricsReq
metricType, err := metricsinfo.ParseMetricType(req.Request)
if err != nil {
log.Error("IndexCoord.GetMetrics failed to parse metric type",
zap.Int64("node id", i.session.ServerID),
zap.Int64("nodeID", i.session.ServerID),
zap.String("req", req.Request),
zap.Error(err))
......@@ -997,35 +997,30 @@ func (i *IndexCoord) GetMetrics(ctx context.Context, req *milvuspb.GetMetricsReq
}, nil
}
log.Debug("IndexCoord.GetMetrics",
zap.String("metric type", metricType))
if metricType == metricsinfo.SystemInfoMetrics {
ret, err := i.metricsCacheManager.GetSystemInfoMetrics()
if err == nil && ret != nil {
return ret, nil
metrics, err := i.metricsCacheManager.GetSystemInfoMetrics()
if err != nil {
// Miss cache
metrics, err = getSystemInfoMetrics(ctx, req, i)
}
log.Warn("failed to get system info metrics from cache, recompute instead",
zap.Error(err))
metrics, err := getSystemInfoMetrics(ctx, req, i)
log.Debug("IndexCoord.GetMetrics",
zap.Int64("node id", i.session.ServerID),
log.RatedDebug(60, "IndexCoord.GetMetrics",
zap.Int64("nodeID", i.session.ServerID),
zap.String("req", req.Request),
zap.String("metric type", metricType),
zap.String("metricType", metricType),
zap.String("metrics", metrics.Response), // TODO(dragondriver): necessary? may be very large
zap.Error(err))
zap.Error(err),
)
i.metricsCacheManager.UpdateSystemInfoMetrics(metrics)
return metrics, nil
}
log.Warn("IndexCoord.GetMetrics failed, request metric type is not implemented yet",
zap.Int64("node id", i.session.ServerID),
log.RatedWarn(60, "IndexCoord.GetMetrics failed, request metric type is not implemented yet",
zap.Int64("nodeID", i.session.ServerID),
zap.String("req", req.Request),
zap.String("metric type", metricType))
zap.String("metricType", metricType))
return &milvuspb.GetMetricsResponse{
Status: &commonpb.Status{
......
......@@ -230,7 +230,7 @@ func (i *IndexNode) GetJobStats(ctx context.Context, req *indexpb.GetJobStatsReq
func (i *IndexNode) GetMetrics(ctx context.Context, req *milvuspb.GetMetricsRequest) (*milvuspb.GetMetricsResponse, error) {
if !commonpbutil.IsHealthyOrStopping(i.stateCode) {
log.Ctx(ctx).Warn("IndexNode.GetMetrics failed",
zap.Int64("node_id", paramtable.GetNodeID()),
zap.Int64("nodeID", paramtable.GetNodeID()),
zap.String("req", req.Request),
zap.Error(errIndexNodeIsUnhealthy(paramtable.GetNodeID())))
......@@ -246,7 +246,7 @@ func (i *IndexNode) GetMetrics(ctx context.Context, req *milvuspb.GetMetricsRequ
metricType, err := metricsinfo.ParseMetricType(req.Request)
if err != nil {
log.Ctx(ctx).Warn("IndexNode.GetMetrics failed to parse metric type",
zap.Int64("node_id", paramtable.GetNodeID()),
zap.Int64("nodeID", paramtable.GetNodeID()),
zap.String("req", req.Request),
zap.Error(err))
......@@ -262,8 +262,8 @@ func (i *IndexNode) GetMetrics(ctx context.Context, req *milvuspb.GetMetricsRequ
if metricType == metricsinfo.SystemInfoMetrics {
metrics, err := getSystemInfoMetrics(ctx, req, i)
log.Ctx(ctx).Debug("IndexNode.GetMetrics",
zap.Int64("node_id", paramtable.GetNodeID()),
log.Ctx(ctx).RatedDebug(60, "IndexNode.GetMetrics",
zap.Int64("nodeID", paramtable.GetNodeID()),
zap.String("req", req.Request),
zap.String("metric_type", metricType),
zap.Error(err))
......@@ -271,8 +271,8 @@ func (i *IndexNode) GetMetrics(ctx context.Context, req *milvuspb.GetMetricsRequ
return metrics, nil
}
log.Ctx(ctx).Warn("IndexNode.GetMetrics failed, request metric type is not implemented yet",
zap.Int64("node_id", paramtable.GetNodeID()),
log.Ctx(ctx).RatedWarn(60, "IndexNode.GetMetrics failed, request metric type is not implemented yet",
zap.Int64("nodeID", paramtable.GetNodeID()),
zap.String("req", req.Request),
zap.String("metric_type", metricType))
......
......@@ -3220,13 +3220,13 @@ func (node *Proxy) GetMetrics(ctx context.Context, req *milvuspb.GetMetricsReque
log := log.Ctx(ctx)
log.Debug("Proxy.GetMetrics",
zap.Int64("node_id", paramtable.GetNodeID()),
log.RatedDebug(60, "Proxy.GetMetrics",
zap.Int64("nodeID", paramtable.GetNodeID()),
zap.String("req", req.Request))
if !node.checkHealthy() {
log.Warn("Proxy.GetMetrics failed",
zap.Int64("node_id", paramtable.GetNodeID()),
zap.Int64("nodeID", paramtable.GetNodeID()),
zap.String("req", req.Request),
zap.Error(errProxyIsUnhealthy(paramtable.GetNodeID())))
......@@ -3242,7 +3242,7 @@ func (node *Proxy) GetMetrics(ctx context.Context, req *milvuspb.GetMetricsReque
metricType, err := metricsinfo.ParseMetricType(req.Request)
if err != nil {
log.Warn("Proxy.GetMetrics failed to parse metric type",
zap.Int64("node_id", paramtable.GetNodeID()),
zap.Int64("nodeID", paramtable.GetNodeID()),
zap.String("req", req.Request),
zap.Error(err))
......@@ -3255,28 +3255,21 @@ func (node *Proxy) GetMetrics(ctx context.Context, req *milvuspb.GetMetricsReque
}, nil
}
log.Debug("Proxy.GetMetrics",
zap.String("metric_type", metricType))
req.Base = commonpbutil.NewMsgBase(
commonpbutil.WithMsgType(commonpb.MsgType_SystemInfo),
commonpbutil.WithMsgID(0),
commonpbutil.WithSourceID(paramtable.GetNodeID()),
)
if metricType == metricsinfo.SystemInfoMetrics {
ret, err := node.metricsCacheManager.GetSystemInfoMetrics()
if err == nil && ret != nil {
return ret, nil
metrics, err := node.metricsCacheManager.GetSystemInfoMetrics()
if err != nil {
metrics, err = getSystemInfoMetrics(ctx, req, node)
}
log.Debug("failed to get system info metrics from cache, recompute instead",
zap.Error(err))
metrics, err := getSystemInfoMetrics(ctx, req, node)
log.Debug("Proxy.GetMetrics",
zap.Int64("node_id", paramtable.GetNodeID()),
log.RatedDebug(60, "Proxy.GetMetrics",
zap.Int64("nodeID", paramtable.GetNodeID()),
zap.String("req", req.Request),
zap.String("metric_type", metricType),
zap.String("metricType", metricType),
zap.Any("metrics", metrics), // TODO(dragondriver): necessary? may be very large
zap.Error(err))
......@@ -3285,10 +3278,10 @@ func (node *Proxy) GetMetrics(ctx context.Context, req *milvuspb.GetMetricsReque
return metrics, nil
}
log.Warn("Proxy.GetMetrics failed, request metric type is not implemented yet",
zap.Int64("node_id", paramtable.GetNodeID()),
log.RatedWarn(60, "Proxy.GetMetrics failed, request metric type is not implemented yet",
zap.Int64("nodeID", paramtable.GetNodeID()),
zap.String("req", req.Request),
zap.String("metric_type", metricType))
zap.String("metricType", metricType))
return &milvuspb.GetMetricsResponse{
Status: &commonpb.Status{
......@@ -3306,7 +3299,7 @@ func (node *Proxy) GetProxyMetrics(ctx context.Context, req *milvuspb.GetMetrics
defer sp.Finish()
log := log.Ctx(ctx).With(
zap.Int64("node_id", paramtable.GetNodeID()),
zap.Int64("nodeID", paramtable.GetNodeID()),
zap.String("req", req.Request))
if !node.checkHealthy() {
......@@ -3355,13 +3348,13 @@ func (node *Proxy) GetProxyMetrics(ctx context.Context, req *milvuspb.GetMetrics
}
log.Debug("Proxy.GetProxyMetrics",
zap.String("metric_type", metricType))
zap.String("metricType", metricType))
return proxyMetrics, nil
}
log.Warn("Proxy.GetProxyMetrics failed, request metric type is not implemented yet",
zap.String("metric_type", metricType))
zap.String("metricType", metricType))
return &milvuspb.GetMetricsResponse{
Status: &commonpb.Status{
......
......@@ -550,7 +550,7 @@ func (s *Server) ShowConfigurations(ctx context.Context, req *internalpb.ShowCon
func (s *Server) GetMetrics(ctx context.Context, req *milvuspb.GetMetricsRequest) (*milvuspb.GetMetricsResponse, error) {
log := log.Ctx(ctx)
log.Debug("get metrics request received",
log.RatedDebug(60, "get metrics request received",
zap.String("metricType", req.GetRequest()))
if s.status.Load() != commonpb.StateCode_Healthy {
......
......@@ -1208,7 +1208,7 @@ func (node *QueryNode) SyncReplicaSegments(ctx context.Context, req *querypb.Syn
func (node *QueryNode) ShowConfigurations(ctx context.Context, req *internalpb.ShowConfigurationsRequest) (*internalpb.ShowConfigurationsResponse, error) {
if !node.isHealthyOrStopping() {
log.Warn("QueryNode.ShowConfigurations failed",
zap.Int64("nodeId", paramtable.GetNodeID()),
zap.Int64("nodeID", paramtable.GetNodeID()),
zap.String("req", req.Pattern),
zap.Error(errQueryNodeIsUnhealthy(paramtable.GetNodeID())))
......@@ -1245,7 +1245,7 @@ func (node *QueryNode) ShowConfigurations(ctx context.Context, req *internalpb.S
func (node *QueryNode) GetMetrics(ctx context.Context, req *milvuspb.GetMetricsRequest) (*milvuspb.GetMetricsResponse, error) {
if !node.isHealthyOrStopping() {
log.Ctx(ctx).Warn("QueryNode.GetMetrics failed",
zap.Int64("nodeId", paramtable.GetNodeID()),
zap.Int64("nodeID", paramtable.GetNodeID()),
zap.String("req", req.Request),
zap.Error(errQueryNodeIsUnhealthy(paramtable.GetNodeID())))
......@@ -1263,7 +1263,7 @@ func (node *QueryNode) GetMetrics(ctx context.Context, req *milvuspb.GetMetricsR
metricType, err := metricsinfo.ParseMetricType(req.Request)
if err != nil {
log.Ctx(ctx).Warn("QueryNode.GetMetrics failed to parse metric type",
zap.Int64("nodeId", paramtable.GetNodeID()),
zap.Int64("nodeID", paramtable.GetNodeID()),
zap.String("req", req.Request),
zap.Error(err))
......@@ -1279,7 +1279,7 @@ func (node *QueryNode) GetMetrics(ctx context.Context, req *milvuspb.GetMetricsR
queryNodeMetrics, err := getSystemInfoMetrics(ctx, req, node)
if err != nil {
log.Ctx(ctx).Warn("QueryNode.GetMetrics failed",
zap.Int64("nodeId", paramtable.GetNodeID()),
zap.Int64("nodeID", paramtable.GetNodeID()),
zap.String("req", req.Request),
zap.String("metricType", metricType),
zap.Error(err))
......@@ -1293,8 +1293,8 @@ func (node *QueryNode) GetMetrics(ctx context.Context, req *milvuspb.GetMetricsR
return queryNodeMetrics, nil
}
log.Ctx(ctx).Debug("QueryNode.GetMetrics failed, request metric type is not implemented yet",
zap.Int64("nodeId", paramtable.GetNodeID()),
log.Ctx(ctx).RatedDebug(60, "QueryNode.GetMetrics failed, request metric type is not implemented yet",
zap.Int64("nodeID", paramtable.GetNodeID()),
zap.String("req", req.Request),
zap.String("metricType", metricType))
......
......@@ -1479,32 +1479,23 @@ func (c *Core) GetMetrics(ctx context.Context, in *milvuspb.GetMetricsRequest) (
metricType, err := metricsinfo.ParseMetricType(in.Request)
if err != nil {
log.Warn("ParseMetricType failed", zap.String("role", typeutil.RootCoordRole),
zap.Int64("node_id", c.session.ServerID), zap.String("req", in.Request), zap.Error(err))
zap.Int64("nodeID", c.session.ServerID), zap.String("req", in.Request), zap.Error(err))
return &milvuspb.GetMetricsResponse{
Status: failStatus(commonpb.ErrorCode_UnexpectedError, "ParseMetricType failed: "+err.Error()),
Response: "",
}, nil
}
log.Ctx(ctx).Debug("GetMetrics success",
zap.String("role", typeutil.RootCoordRole),
zap.String("metric_type", metricType))
if metricType == metricsinfo.SystemInfoMetrics {
ret, err := c.metricsCacheManager.GetSystemInfoMetrics()
if err == nil && ret != nil {
return ret, nil
metrics, err := c.metricsCacheManager.GetSystemInfoMetrics()
if err != nil {
metrics, err = c.getSystemInfoMetrics(ctx, in)
}
log.Warn("GetSystemInfoMetrics from cache failed",
zap.String("role", typeutil.RootCoordRole),
zap.Error(err))
systemInfoMetrics, err := c.getSystemInfoMetrics(ctx, in)
if err != nil {
log.Warn("GetSystemInfoMetrics failed",
zap.String("role", typeutil.RootCoordRole),
zap.String("metric_type", metricType),
zap.String("metricType", metricType),
zap.Error(err))
return &milvuspb.GetMetricsResponse{
Status: failStatus(commonpb.ErrorCode_UnexpectedError, fmt.Sprintf("getSystemInfoMetrics failed: %s", err.Error())),
......@@ -1512,12 +1503,12 @@ func (c *Core) GetMetrics(ctx context.Context, in *milvuspb.GetMetricsRequest) (
}, nil
}
c.metricsCacheManager.UpdateSystemInfoMetrics(systemInfoMetrics)
return systemInfoMetrics, err
c.metricsCacheManager.UpdateSystemInfoMetrics(metrics)
return metrics, err
}
log.Warn("GetMetrics failed, metric type not implemented", zap.String("role", typeutil.RootCoordRole),
zap.String("metric_type", metricType))
log.RatedWarn(60, "GetMetrics failed, metric type not implemented", zap.String("role", typeutil.RootCoordRole),
zap.String("metricType", metricType))
return &milvuspb.GetMetricsResponse{
Status: failStatus(commonpb.ErrorCode_UnexpectedError, metricsinfo.MsgUnimplementedMetric),
......
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册