未验证 提交 ed5e98c3 编写于 作者: sinat_25235033's avatar sinat_25235033 提交者: GitHub

support enable alert threshold auto resolved notice (#1185)

Signed-off-by: sinat_25235033's avatartomsun28 <tomsun28@outlook.com>
上级 cb29cc6d
......@@ -35,6 +35,7 @@ import org.dromara.hertzbeat.alert.util.AlertTemplateUtil;
import org.dromara.hertzbeat.common.entity.manager.Monitor;
import org.dromara.hertzbeat.common.entity.message.CollectRep;
import org.dromara.hertzbeat.common.constants.CommonConstants;
import org.dromara.hertzbeat.common.support.event.MonitorDeletedEvent;
import org.dromara.hertzbeat.common.support.event.SystemConfigChangeEvent;
import org.dromara.hertzbeat.common.util.CommonUtil;
import org.dromara.hertzbeat.common.util.ResourceBundleUtil;
......@@ -70,8 +71,12 @@ public class CalculateAlarm {
* key - monitorId+alertDefineId 为普通阈值告警 | The alarm is a common threshold alarm
* key - monitorId 为监控状态可用性可达性告警 | Indicates the monitoring status availability reachability alarm
*/
public Map<String, Alert> triggeredAlertMap;
public Set<Long> unAvailableMonitors;
private final Map<String, Alert> triggeredAlertMap;
/**
* The not recover alert
* key - monitorId + alertDefineId + (instance)
*/
private final Map<String, Alert> notRecoveredAlertMap;
private final AlerterWorkerPool workerPool;
private final CommonDataQueue dataQueue;
private final AlertDefineService alertDefineService;
......@@ -89,13 +94,13 @@ public class CalculateAlarm {
this.alertService = alertService;
this.bundle = ResourceBundleUtil.getBundle("alerter");
this.triggeredAlertMap = new ConcurrentHashMap<>(128);
this.unAvailableMonitors = Collections.synchronizedSet(new HashSet<>(16));
this.notRecoveredAlertMap = new ConcurrentHashMap<>(128);
// Initialize stateAlertMap
// 初始化stateAlertMap
List<Monitor> monitors = monitorDao.findMonitorsByStatus(CommonConstants.UN_AVAILABLE_CODE);
if (monitors != null) {
for (Monitor monitor : monitors) {
this.unAvailableMonitors.add(monitor.getId());
this.notRecoveredAlertMap.put(monitor.getId() + CommonConstants.AVAILABILITY, Alert.builder().build());
}
}
startCalculate();
......@@ -127,7 +132,7 @@ public class CalculateAlarm {
// If the indicator group whose scheduling priority is 0 has the status of collecting response data UN_REACHABLE/UN_CONNECTABLE, the highest severity alarm is generated to monitor the status change
// 先判断调度优先级为0的指标组采集响应数据状态 UN_REACHABLE/UN_CONNECTABLE 则需发最高级别告警进行监控状态变更
if (metricsData.getPriority() == 0) {
handlerAvailableMetrics(monitorId, app, metrics, metricsData);
handlerAvailableMetrics(monitorId, app, metricsData);
}
// Query the alarm definitions associated with the indicator set of the monitoring type
// 查出此监控类型下的此指标集合下关联配置的告警定义信息
......@@ -156,6 +161,9 @@ public class CalculateAlarm {
afterThresholdRuleMatch(currentTimeMilli, monitorId, app, metrics, fieldValueMap, define);
// 若此阈值已被触发,则其它数据行的触发忽略
continue;
} else if (define.isRecoverNotice()) {
String notResolvedAlertKey = String.valueOf(monitorId) + define.getId() + null;
handleRecoveredAlert(currentTimeMilli, monitorId, app, define, expr, notResolvedAlertKey);
}
} catch (Exception e) {
log.warn(e.getMessage(), e);
......@@ -193,6 +201,9 @@ public class CalculateAlarm {
afterThresholdRuleMatch(currentTimeMilli, monitorId, app, metrics, fieldValueMap, define);
// 若此阈值已被触发,则其它数据行的触发忽略
break;
} else if (define.isRecoverNotice()) {
String notResolvedAlertKey = String.valueOf(monitorId) + define.getId() + (!"".equals(instance) ? instance : null);
handleRecoveredAlert(currentTimeMilli, monitorId, app, define, expr, notResolvedAlertKey);
}
} catch (Exception e) {
log.warn(e.getMessage(), e);
......@@ -203,6 +214,26 @@ public class CalculateAlarm {
}
}
private void handleRecoveredAlert(long currentTimeMilli, long monitorId, String app, AlertDefine define, String expr, String notResolvedAlertKey) {
Alert notResolvedAlert = notRecoveredAlertMap.remove(notResolvedAlertKey);
if (notResolvedAlert != null) {
// Sending an alarm Restore
Map<String, String> tags = notResolvedAlert.getTags();
String content = this.bundle.getString("alerter.alarm.recover") + " : " + expr;
Alert resumeAlert = Alert.builder()
.tags(tags)
.target(define.getApp() + "." + define.getMetric() + "." + define.getField())
.content(content)
.priority(CommonConstants.ALERT_PRIORITY_CODE_WARNING)
.status(CommonConstants.ALERT_STATUS_CODE_RESTORED)
.firstAlarmTime(currentTimeMilli)
.lastAlarmTime(currentTimeMilli)
.triggerTimes(1)
.build();
alarmCommonReduce.reduceAndSendAlarm(resumeAlert);
}
}
private void afterThresholdRuleMatch(long currentTimeMilli, long monitorId, String app, String metrics, Map<String, Object> fieldValueMap, AlertDefine define) {
String monitorAlertKey = String.valueOf(monitorId) + define.getId();
Alert triggeredAlert = triggeredAlertMap.get(monitorAlertKey);
......@@ -213,7 +244,9 @@ public class CalculateAlarm {
triggeredAlert.setLastAlarmTime(currentTimeMilli);
int defineTimes = define.getTimes() == null ? 1 : define.getTimes();
if (times >= defineTimes) {
String notResolvedAlertKey = String.valueOf(monitorId) + define.getId() + fieldValueMap.get("instance");
triggeredAlertMap.remove(monitorAlertKey);
notRecoveredAlertMap.put(notResolvedAlertKey, triggeredAlert);
alarmCommonReduce.reduceAndSendAlarm(triggeredAlert.clone());
}
} else {
......@@ -244,6 +277,8 @@ public class CalculateAlarm {
.build();
int defineTimes = define.getTimes() == null ? 1 : define.getTimes();
if (1 >= defineTimes) {
String notResolvedAlertKey = String.valueOf(monitorId) + define.getId() + fieldValueMap.get("instance");
notRecoveredAlertMap.put(notResolvedAlertKey, alert);
alarmCommonReduce.reduceAndSendAlarm(alert);
} else {
triggeredAlertMap.put(monitorAlertKey, alert);
......@@ -267,52 +302,93 @@ public class CalculateAlarm {
return match;
}
private void handlerAvailableMetrics(long monitorId, String app, String metrics, CollectRep.MetricsData metricsData) {
private void handlerAvailableMetrics(long monitorId, String app, CollectRep.MetricsData metricsData) {
AlertDefine avaAlertDefine = alertDefineService.getMonitorBindAlertAvaDefine(monitorId, app, CommonConstants.AVAILABILITY);
if (avaAlertDefine == null) {
return;
}
long currentTimeMill = System.currentTimeMillis();
if (metricsData.getCode() != CollectRep.Code.SUCCESS) {
// Collection and abnormal
// 采集异常
if (metricsData.getCode() == CollectRep.Code.UN_AVAILABLE) {
// The todo collector is unavailable
// todo 采集器不可用
return;
} else if (metricsData.getCode() == CollectRep.Code.UN_REACHABLE) {
// UN_REACHABLE Peer unreachable (Network layer icmp)
// UN_REACHABLE 对端不可达(网络层icmp)
handlerMonitorAvailableAlert(monitorId, app, metricsData.getCode());
} else if (metricsData.getCode() == CollectRep.Code.UN_CONNECTABLE) {
// UN_CONNECTABLE Peer connection failure (transport layer tcp,udp)
// UN_CONNECTABLE 对端连接失败(传输层tcp,udp)
handlerMonitorAvailableAlert(monitorId, app, metricsData.getCode());
Alert preAlert = triggeredAlertMap.get(String.valueOf(monitorId));
Map<String, String> tags = new HashMap<>(6);
tags.put(CommonConstants.TAG_MONITOR_ID, String.valueOf(monitorId));
tags.put(CommonConstants.TAG_MONITOR_APP, app);
tags.put("metrics", CommonConstants.AVAILABILITY);
tags.put("code", metricsData.getCode().name());
Map<String, Object> valueMap = tags.entrySet().stream()
.collect(Collectors.toMap(Map.Entry::getKey, Map.Entry::getValue));
if (avaAlertDefine.getTags() != null && !avaAlertDefine.getTags().isEmpty()) {
for (TagItem tagItem : avaAlertDefine.getTags()) {
valueMap.put(tagItem.getName(), tagItem.getValue());
tags.put(tagItem.getName(), tagItem.getValue());
}
}
if (preAlert == null) {
Alert.AlertBuilder alertBuilder = Alert.builder()
.tags(tags)
.priority(CommonConstants.ALERT_PRIORITY_CODE_EMERGENCY)
.status(ALERT_STATUS_CODE_PENDING)
.target(CommonConstants.AVAILABILITY)
.content(AlertTemplateUtil.render(avaAlertDefine.getTemplate(), valueMap))
.firstAlarmTime(currentTimeMill)
.lastAlarmTime(currentTimeMill)
.triggerTimes(1);
if (avaAlertDefine.getTimes() == null || avaAlertDefine.getTimes() <= 1) {
String notResolvedAlertKey = monitorId + CommonConstants.AVAILABILITY;
notRecoveredAlertMap.put(notResolvedAlertKey, alertBuilder.build());
alarmCommonReduce.reduceAndSendAlarm(alertBuilder.build().clone());
} else {
alertBuilder.status(CommonConstants.ALERT_STATUS_CODE_NOT_REACH);
}
triggeredAlertMap.put(String.valueOf(monitorId), alertBuilder.build());
} else {
// Other exceptions
// 其他异常
handlerMonitorAvailableAlert(monitorId, app, metricsData.getCode());
int times = preAlert.getTriggerTimes() + 1;
if (preAlert.getStatus() == ALERT_STATUS_CODE_PENDING) {
times = 1;
preAlert.setContent(AlertTemplateUtil.render(avaAlertDefine.getTemplate(), valueMap));
preAlert.setTags(tags);
}
preAlert.setTriggerTimes(times);
preAlert.setFirstAlarmTime(currentTimeMill);
preAlert.setLastAlarmTime(currentTimeMill);
int defineTimes = avaAlertDefine.getTimes() == null ? 1 : avaAlertDefine.getTimes();
if (times >= defineTimes) {
preAlert.setStatus(ALERT_STATUS_CODE_PENDING);
String notResolvedAlertKey = monitorId + CommonConstants.AVAILABILITY;
notRecoveredAlertMap.put(notResolvedAlertKey, preAlert.clone());
alarmCommonReduce.reduceAndSendAlarm(preAlert.clone());
} else {
preAlert.setStatus(CommonConstants.ALERT_STATUS_CODE_NOT_REACH);
}
}
return;
} else {
// Check whether an availability or unreachable alarm is generated before the association monitoring, and send a clear alarm to clear the monitoring status
} else if (avaAlertDefine.isRecoverNotice()) {
// Check whether an availability or unreachable alarm is generated before the association monitoring
// and send a clear alarm to clear the monitoring status
// 判断关联监控之前是否有可用性或者不可达告警,发送恢复告警进行监控状态恢复
triggeredAlertMap.remove(String.valueOf(monitorId));
boolean isRestartUnavailable = unAvailableMonitors.remove(monitorId);
if (isRestartUnavailable) {
String notResolvedAlertKey = monitorId + CommonConstants.AVAILABILITY;
Alert notResolvedAlert = notRecoveredAlertMap.remove(notResolvedAlertKey);
if (notResolvedAlert != null) {
// Sending an alarm Restore
Map<String, String> tags = new HashMap<>(6);
tags.put(CommonConstants.TAG_MONITOR_ID, String.valueOf(monitorId));
tags.put(CommonConstants.TAG_MONITOR_APP, app);
String content = this.bundle.getString("alerter.availability.resolved");
long currentTimeMilli = System.currentTimeMillis();
if (notResolvedAlert.getTags() != null) {
tags.putAll(notResolvedAlert.getTags());
}
String content = this.bundle.getString("alerter.availability.recover");
Alert resumeAlert = Alert.builder()
.tags(tags)
.target(CommonConstants.AVAILABILITY)
.content(content)
.priority(CommonConstants.ALERT_PRIORITY_CODE_WARNING)
.status(CommonConstants.ALERT_STATUS_CODE_RESTORED)
.firstAlarmTime(currentTimeMilli)
.lastAlarmTime(currentTimeMilli)
.triggerTimes(1)
.build();
.tags(tags)
.target(CommonConstants.AVAILABILITY)
.content(content)
.priority(CommonConstants.ALERT_PRIORITY_CODE_WARNING)
.status(CommonConstants.ALERT_STATUS_CODE_RESTORED)
.firstAlarmTime(currentTimeMill)
.lastAlarmTime(currentTimeMill)
.triggerTimes(1)
.build();
alarmCommonReduce.reduceAndSendAlarm(resumeAlert);
Runnable updateStatusJob = () -> {
// todo update pre all type alarm status
updateAvailabilityAlertStatus(monitorId, resumeAlert);
};
workerPool.executeJob(updateStatusJob);
......@@ -320,69 +396,10 @@ public class CalculateAlarm {
}
}
private void handlerMonitorAvailableAlert(long monitorId, String app, CollectRep.Code code) {
AlertDefine avaAlertDefine = alertDefineService.getMonitorBindAlertAvaDefine(monitorId, app, CommonConstants.AVAILABILITY);
if (avaAlertDefine == null) {
return;
}
Alert preAlert = triggeredAlertMap.get(String.valueOf(monitorId));
long currentTimeMill = System.currentTimeMillis();
Map<String, String> tags = new HashMap<>(6);
tags.put(CommonConstants.TAG_MONITOR_ID, String.valueOf(monitorId));
tags.put(CommonConstants.TAG_MONITOR_APP, app);
tags.put("metrics", CommonConstants.AVAILABILITY);
tags.put("code", code.name());
Map<String, Object> valueMap = tags.entrySet()
.stream().collect(Collectors.toMap(Map.Entry::getKey, Map.Entry::getValue));
if (avaAlertDefine.getTags() != null && !avaAlertDefine.getTags().isEmpty()) {
for (TagItem tagItem : avaAlertDefine.getTags()) {
valueMap.put(tagItem.getName(), tagItem.getValue());
tags.put(tagItem.getName(), tagItem.getValue());
}
}
if (preAlert == null) {
Alert.AlertBuilder alertBuilder = Alert.builder()
.tags(tags)
.priority(CommonConstants.ALERT_PRIORITY_CODE_EMERGENCY)
.status(ALERT_STATUS_CODE_PENDING)
.target(CommonConstants.AVAILABILITY)
.content(AlertTemplateUtil.render(avaAlertDefine.getTemplate(), valueMap))
.firstAlarmTime(currentTimeMill)
.lastAlarmTime(currentTimeMill)
.triggerTimes(1);
if (avaAlertDefine.getTimes() == null || avaAlertDefine.getTimes() <= 1) {
alarmCommonReduce.reduceAndSendAlarm(alertBuilder.build().clone());
unAvailableMonitors.add(monitorId);
} else {
alertBuilder.status(CommonConstants.ALERT_STATUS_CODE_NOT_REACH);
}
triggeredAlertMap.put(String.valueOf(monitorId), alertBuilder.build());
} else {
int times = preAlert.getTriggerTimes() + 1;
if (preAlert.getStatus() == ALERT_STATUS_CODE_PENDING) {
times = 1;
preAlert.setContent(AlertTemplateUtil.render(avaAlertDefine.getTemplate(), valueMap));
preAlert.setTags(tags);
}
preAlert.setTriggerTimes(times);
preAlert.setFirstAlarmTime(currentTimeMill);
preAlert.setLastAlarmTime(currentTimeMill);
int defineTimes = avaAlertDefine.getTimes() == null ? 1 : avaAlertDefine.getTimes();
if (times >= defineTimes) {
preAlert.setStatus(ALERT_STATUS_CODE_PENDING);
alarmCommonReduce.reduceAndSendAlarm(preAlert.clone());
unAvailableMonitors.add(monitorId);
} else {
preAlert.setStatus(CommonConstants.ALERT_STATUS_CODE_NOT_REACH);
}
}
}
private void updateAvailabilityAlertStatus(long monitorId, Alert restoreAlert) {
List<Alert> availabilityAlerts = queryAvailabilityAlerts(monitorId, restoreAlert);
availabilityAlerts.stream().parallel().forEach(alert -> {
log.info("updating alert id:{}",alert.getId());
log.info("updating alert status solved id: {}", alert.getId());
alertService.editAlertStatus(ALERT_STATUS_CODE_SOLVED, List.of(alert.getId()));
});
}
......@@ -413,8 +430,15 @@ public class CalculateAlarm {
}
@EventListener(SystemConfigChangeEvent.class)
public void onEvent(SystemConfigChangeEvent event) {
public void onSystemConfigChangeEvent(SystemConfigChangeEvent event) {
log.info("calculate alarm receive system config change event: {}.", event.getSource());
this.bundle = ResourceBundleUtil.getBundle("alerter");
}
@EventListener(MonitorDeletedEvent.class)
public void onMonitorDeletedEvent(MonitorDeletedEvent event) {
log.info("calculate alarm receive monitor {} has been deleted.", event.getMonitorId());
this.triggeredAlertMap.remove(String.valueOf(event.getMonitorId()));
}
}
......@@ -13,10 +13,8 @@
# See the License for the specific language governing permissions and
# limitations under the License.
alerter.availability.emergency = Monitoring Availability Emergency Alert
alerter.reachability.emergency = Monitoring Reachability Emergency Alert
alerter.availability.resolved = Availability Alert Resolved, Monitor Status Normal Now
alerter.reachability.resolved = Reachability Alert Resolved, Monitor Status Normal Now
alerter.availability.recover = Availability Alert Resolved, Monitor Status Normal Now
alerter.alarm.recover = Alert Resolved Notice
alerter.notify.title = HertzBeat Alert Notify
alerter.notify.target = Monitor Target
alerter.notify.monitorId = Monitor ID
......@@ -27,4 +25,4 @@ alerter.notify.content = Alert Content
alerter.notify.console = Console Login
alerter.priority.0 = Emergency Alert
alerter.priority.1 = Critical Alert
alerter.priority.2 = Warning Alert
\ No newline at end of file
alerter.priority.2 = Warning Alert
......@@ -13,10 +13,8 @@
# See the License for the specific language governing permissions and
# limitations under the License.
alerter.availability.emergency = 监控紧急可用性告警
alerter.reachability.emergency = 监控紧急可达性告警
alerter.availability.resolved = 可用性告警恢复通知, 监控状态已恢复正常
alerter.reachability.resolved = 可达性告警恢复通知, 监控状态已恢复正常
alerter.availability.recover = 可用性告警恢复通知, 监控状态已恢复正常
alerter.alarm.recover = 告警恢复通知
alerter.notify.title = HertzBeat告警通知
alerter.notify.target = 告警目标对象
alerter.notify.monitorId = 所属监控ID
......@@ -27,4 +25,4 @@ alerter.notify.content = 内容详情
alerter.notify.console = 登入控制台
alerter.priority.0 = 紧急告警
alerter.priority.1 = 严重告警
alerter.priority.2 = 警告告警
\ No newline at end of file
alerter.priority.2 = 警告告警
......@@ -105,6 +105,10 @@ public class AlertDefine {
@Schema(title = "告警阈值开关", example = "true", accessMode = READ_WRITE)
private boolean enable = true;
@Schema(title = "Is send alarm recover notice | 是否发送告警恢复通知", example = "false", accessMode = READ_WRITE)
@Column(columnDefinition = "boolean default false")
private boolean recoverNotice = false;
@Schema(title = "告警通知内容模版", example = "linux {monitor_name}: {monitor_id} cpu usage high",
accessMode = READ_WRITE)
......
package org.dromara.hertzbeat.common.support.event;
import org.springframework.context.ApplicationEvent;
/**
* the event for system config change
* @author tom
*/
public class MonitorDeletedEvent extends ApplicationEvent {
/**
* monitoring id
*/
private final Long monitorId;
public MonitorDeletedEvent(Object source, Long monitorId) {
super(source);
this.monitorId = monitorId;
}
public Long getMonitorId() {
return monitorId;
}
}
......@@ -32,8 +32,8 @@ import java.util.Map;
/**
* 报警持久化 - 落地到数据库
* Alarm data persistence - landing in the database
* @author <a href="mailto:Musk.Chen@fanruan.com">Musk.Chen</a>
*
* @author <a href="mailto:Musk.Chen@fanruan.com">Musk.Chen</a>
*/
@Component
@RequiredArgsConstructor
......@@ -43,7 +43,7 @@ final class DbAlertStoreHandlerImpl implements AlertStoreHandler {
private final MonitorService monitorService;
private final AlertService alertService;
@Override
public void store(Alert alert) {
Map<String, String> tags = alert.getTags();
......@@ -60,19 +60,17 @@ final class DbAlertStoreHandlerImpl implements AlertStoreHandler {
// 当监控未管理时 忽略静默其告警信息
return;
}
if (monitor.getStatus() == CommonConstants.AVAILABLE_CODE) {
if (CommonConstants.AVAILABILITY.equals(alert.getTarget())) {
if (CommonConstants.AVAILABILITY.equals(alert.getTarget())) {
if (alert.getStatus() == CommonConstants.ALERT_STATUS_CODE_PENDING && monitor.getStatus() == CommonConstants.AVAILABLE_CODE) {
// Availability Alarm Need to change the monitoring status to unavailable
// 可用性告警 需变更监控状态为不可用
monitorService.updateMonitorStatus(monitor.getId(), CommonConstants.UN_AVAILABLE_CODE);
}
} else {
// If the alarm is restored, the monitoring state needs to be restored
// 若是恢复告警 需对监控状态进行恢复
if (alert.getStatus() == CommonConstants.ALERT_STATUS_CODE_RESTORED) {
} else if (alert.getStatus() == CommonConstants.ALERT_STATUS_CODE_RESTORED && monitor.getStatus() == CommonConstants.UN_AVAILABLE_CODE) {
// If the alarm is restored, the monitoring state needs to be restored
// 若是恢复告警 需对监控状态进行恢复
monitorService.updateMonitorStatus(monitorId, CommonConstants.AVAILABLE_CODE);
}
}
}
} else {
log.debug("store extern alert content: {}.", alert);
}
......
......@@ -41,6 +41,7 @@ public class AvailableAlertDefineInit implements CommandLineRunner {
.preset(true)
.times(1)
.enable(true)
.recoverNotice(true)
.priority(CommonConstants.ALERT_PRIORITY_CODE_EMERGENCY)
.template("${app} monitoring availability alert, code is ${code}")
.build();
......
......@@ -20,7 +20,6 @@ package org.dromara.hertzbeat.manager.service.impl;
import com.fasterxml.jackson.core.type.TypeReference;
import com.google.gson.Gson;
import lombok.extern.slf4j.Slf4j;
import org.dromara.hertzbeat.alert.calculate.CalculateAlarm;
import org.dromara.hertzbeat.alert.dao.AlertDefineBindDao;
import org.dromara.hertzbeat.common.constants.CommonConstants;
import org.dromara.hertzbeat.common.entity.job.Configmap;
......@@ -33,6 +32,7 @@ import org.dromara.hertzbeat.common.entity.manager.Param;
import org.dromara.hertzbeat.common.entity.manager.ParamDefine;
import org.dromara.hertzbeat.common.entity.manager.Tag;
import org.dromara.hertzbeat.common.entity.message.CollectRep;
import org.dromara.hertzbeat.common.support.event.MonitorDeletedEvent;
import org.dromara.hertzbeat.common.util.*;
import org.dromara.hertzbeat.manager.dao.CollectorDao;
import org.dromara.hertzbeat.manager.dao.CollectorMonitorBindDao;
......@@ -50,6 +50,7 @@ import org.dromara.hertzbeat.manager.support.exception.MonitorDatabaseException;
import org.dromara.hertzbeat.manager.support.exception.MonitorDetectException;
import org.dromara.hertzbeat.manager.support.exception.MonitorMetricsException;
import org.springframework.beans.factory.annotation.Autowired;
import org.springframework.context.ApplicationContext;
import org.springframework.data.domain.Page;
import org.springframework.data.domain.PageRequest;
import org.springframework.data.jpa.domain.Specification;
......@@ -112,9 +113,9 @@ public class MonitorServiceImpl implements MonitorService {
@Autowired
private TagMonitorBindDao tagMonitorBindDao;
@Autowired
private CalculateAlarm calculateAlarm;
private ApplicationContext applicationContext;
private final Map<String, ImExportService> imExportServiceMap = new HashMap<>();
......@@ -545,7 +546,6 @@ public class MonitorServiceImpl implements MonitorService {
if (params != null) {
paramDao.saveAll(params);
}
calculateAlarm.triggeredAlertMap.remove(String.valueOf(monitorId));
} catch (Exception e) {
log.error(e.getMessage(), e);
// Repository brushing abnormally cancels the previously delivered task
......@@ -568,7 +568,7 @@ public class MonitorServiceImpl implements MonitorService {
tagMonitorBindDao.deleteTagMonitorBindsByMonitorId(id);
alertDefineBindDao.deleteAlertDefineMonitorBindsByMonitorIdEquals(id);
collectJobScheduling.cancelAsyncCollectJob(monitor.getJobId());
calculateAlarm.triggeredAlertMap.remove(String.valueOf(monitor.getId()));
applicationContext.publishEvent(new MonitorDeletedEvent(applicationContext, monitor.getId()));
}
}
......@@ -586,7 +586,7 @@ public class MonitorServiceImpl implements MonitorService {
// delete tag 删除监控对应的标签
tagService.deleteMonitorSystemTags(monitor);
collectJobScheduling.cancelAsyncCollectJob(monitor.getJobId());
calculateAlarm.triggeredAlertMap.remove(String.valueOf(monitor.getId()));
applicationContext.publishEvent(new MonitorDeletedEvent(applicationContext, monitor.getId()));
}
}
}
......@@ -674,7 +674,7 @@ public class MonitorServiceImpl implements MonitorService {
// Issue collection tasks 下发采集任务
long newJobId = collectJobScheduling.addAsyncCollectJob(appDefine);
monitor.setJobId(newJobId);
calculateAlarm.triggeredAlertMap.remove(String.valueOf(monitor.getId()));
applicationContext.publishEvent(new MonitorDeletedEvent(applicationContext, monitor.getId()));
}
monitorDao.saveAll(unManagedMonitors);
}
......@@ -773,7 +773,6 @@ public class MonitorServiceImpl implements MonitorService {
// 下发采集任务
long newJobId = collectJobScheduling.addAsyncCollectJob(appDefine);
monitor.setJobId(newJobId);
calculateAlarm.triggeredAlertMap.remove(String.valueOf(monitor.getId()));
monitorDao.save(monitor);
}
}
......
package org.dromara.hertzbeat.manager.service;
import org.dromara.hertzbeat.alert.calculate.CalculateAlarm;
import org.dromara.hertzbeat.alert.dao.AlertDefineBindDao;
import org.dromara.hertzbeat.common.entity.alerter.Alert;
import org.dromara.hertzbeat.common.entity.job.Job;
......@@ -32,6 +31,7 @@ import org.junit.jupiter.params.provider.CsvSource;
import org.mockito.InjectMocks;
import org.mockito.Mock;
import org.mockito.junit.jupiter.MockitoExtension;
import org.springframework.context.ApplicationContext;
import org.springframework.data.domain.Page;
import org.springframework.data.domain.PageRequest;
import org.springframework.data.jpa.domain.Specification;
......@@ -96,7 +96,7 @@ class MonitorServiceTest {
private CollectorMonitorBindDao collectorMonitorBindDao;
@Mock
private CalculateAlarm calculateAlarm;
private ApplicationContext applicationContext;
@Mock
Map<String, Alert> triggeredAlertMap = spy(new HashMap<>());
......@@ -105,9 +105,7 @@ class MonitorServiceTest {
* 属性无法直接mock,测试执行前-手动赋值
*/
@BeforeEach
public void setUp() {
calculateAlarm.triggeredAlertMap = triggeredAlertMap;
}
public void setUp() {}
@Test
void detectMonitorEmpty() {
......
......@@ -125,21 +125,22 @@ CREATE TABLE hzb_tag_monitor_bind
DROP TABLE IF EXISTS hzb_alert_define ;
CREATE TABLE hzb_alert_define
(
id bigint not null auto_increment comment '告警定义ID',
app varchar(100) not null comment '配置告警的监控类型:linux,mysql,jvm...',
metric varchar(100) not null comment '配置告警的指标集合:cpu,memory,info...',
field varchar(100) comment '配置告警的指标:usage,cores...',
preset boolean not null default false comment '是否是全局默认告警,是则所有此类型监控默认关联此告警',
expr varchar(255) comment '告警触发条件表达式',
priority tinyint not null default 0 comment '告警级别 0:高-emergency-紧急告警-红色 1:中-critical-严重告警-橙色 2:低-warning-警告告警-黄色',
times int not null default 1 comment '触发次数,即达到触发阈值次数要求后才算触发告警',
tags varchar(4000) comment '附加告警标签(status:success,env:prod)',
enable boolean not null default true comment '告警阈值开关',
template varchar(255) not null comment '告警通知模板内容',
creator varchar(100) comment '创建者',
modifier varchar(100) comment '最新修改者',
gmt_create timestamp default current_timestamp comment 'create time',
gmt_update datetime default current_timestamp on update current_timestamp comment 'update time',
id bigint not null auto_increment comment '告警定义ID',
app varchar(100) not null comment '配置告警的监控类型:linux,mysql,jvm...',
metric varchar(100) not null comment '配置告警的指标集合:cpu,memory,info...',
field varchar(100) comment '配置告警的指标:usage,cores...',
preset boolean not null default false comment '是否是全局默认告警,是则所有此类型监控默认关联此告警',
expr varchar(255) comment '告警触发条件表达式',
priority tinyint not null default 0 comment '告警级别 0:高-emergency-紧急告警-红色 1:中-critical-严重告警-橙色 2:低-warning-警告告警-黄色',
times int not null default 1 comment '触发次数,即达到触发阈值次数要求后才算触发告警',
tags varchar(4000) comment '附加告警标签(status:success,env:prod)',
enable boolean not null default true comment '告警阈值开关',
template varchar(255) not null comment '告警通知模板内容',
recover_notice boolean not null default false comment 'Is send alarm recovered notice | 是否发送告警恢复通知',
creator varchar(100) comment '创建者',
modifier varchar(100) comment '最新修改者',
gmt_create timestamp default current_timestamp comment 'create time',
gmt_update datetime default current_timestamp on update current_timestamp comment 'update time',
primary key (id)
) ENGINE = InnoDB DEFAULT CHARSET=utf8mb4;
......
......@@ -13,6 +13,7 @@ export class AlertDefine {
times: number = 3;
tags!: TagItem[];
enable: boolean = true;
recoverNotice: boolean = false;
template!: string;
creator!: string;
modifier!: string;
......
......@@ -449,6 +449,19 @@
<nz-switch [(ngModel)]="define.preset" name="preset" id="preset"></nz-switch>
</nz-form-control>
</nz-form-item>
<nz-form-item>
<nz-form-label nzSpan="7" nzFor="recoverNotice" [nzTooltipTitle]="'alert.setting.recover-notice.tip' | i18n">
{{ 'alert.setting.recover-notice' | i18n }}
</nz-form-label>
<nz-form-control nzSpan="12">
<nz-switch
[(ngModel)]="define.recoverNotice"
[ngModelOptions]="{ standalone: true }"
name="recoverNotice"
id="recoverNotice"
></nz-switch>
</nz-form-control>
</nz-form-item>
<nz-form-item>
<nz-form-label nzSpan="7" nzRequired="true" nzFor="enable" [nzTooltipTitle]="'alert.setting.enable.tip' | i18n">
{{ 'alert.setting.enable' | i18n }}
......
......@@ -146,6 +146,8 @@
"alert.setting.default.tip": "Whether this alarm threshold configuration applies to all this type of monitoring globally",
"alert.setting.enable": "Enable Alert",
"alert.setting.enable.tip": "This alarm threshold configuration is enabled or disabled",
"alert.setting.recover-notice": "Recover Notice",
"alert.setting.recover-notice.tip": "Whether to send the corresponding recovered notification when the alarm is resolved under this threshold rule",
"alert.setting.connect": "Alert Associate Monitors",
"alert.setting.connect.left": "No Associate",
"alert.setting.connect.right": "Associated",
......
......@@ -146,6 +146,8 @@
"alert.setting.default.tip": "此告警阈值配置是否应用于全局所有此类型监控",
"alert.setting.enable": "启用告警",
"alert.setting.enable.tip": "此告警阈值配置开启生效或关闭",
"alert.setting.recover-notice": "恢复通知",
"alert.setting.recover-notice.tip": "是否在此阈值规则下告警恢复时发送对应的恢复通知",
"alert.setting.connect": "告警定义关联监控",
"alert.setting.connect.left": "未关联监控",
"alert.setting.connect.right": "已关联监控",
......
......@@ -146,6 +146,8 @@
"alert.setting.default.tip": "此告警阈值配置是否應用于全局所有此類型監控",
"alert.setting.enable": "啓用告警",
"alert.setting.enable.tip": "此告警阈值配置開啓生效或關閉",
"alert.setting.recover-notice": "恢復通知",
"alert.setting.recover-notice.tip": "是否在此閾值規則下告警恢復時發送對應的恢復通知",
"alert.setting.connect": "告警定義關聯監控",
"alert.setting.connect.left": "未關聯監控",
"alert.setting.connect.right": "已關聯監控",
......
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册