Files
scripts-bash/servers/linux/monitoring/bin/alert-engine.sh
2026-03-16 14:02:00 +01:00

354 lines
9.0 KiB
Bash
Executable File

#!/bin/bash
# Copyright (C) 2026 Cédric Abonnel
#
# This program is free software: you can redistribute it and/or modify
# it under the terms of the GNU Affero General Public License as published by
# the Free Software Foundation, either version 3 of the License, or
# (at your option) any later version.
#
# This program is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
# GNU Affero General Public License for more details.
set -u
SCRIPT_NAME="$(basename "$0")"
SCRIPT_PATH="$(readlink -f "$0" 2>/dev/null || realpath "$0" 2>/dev/null || echo "$0")"
# shellcheck source=/opt/monitoring/lib/monitoring-lib.sh
. /opt/monitoring/lib/monitoring-lib.sh || exit 3
load_conf_if_exists "/opt/monitoring/conf/alert-engine.conf"
load_conf_if_exists "/opt/monitoring/conf/alert-engine.local.conf"
lock_or_exit "alert-engine"
require_cmd awk sed grep date tail stat cut tr
LOG_SOURCE="${LOG_FILE:-/var/log/monitoring/events.jsonl}"
STATE_FILE="${ALERT_STATE_FILE:-/var/lib/monitoring/alert-engine.offset}"
DEDUP_FILE="${ALERT_DEDUP_FILE:-/var/lib/monitoring/alert-engine.dedup}"
mkdir -p "$(dirname "$STATE_FILE")" "$(dirname "$DEDUP_FILE")" || fail_internal "Impossible de créer les répertoires d'état"
touch "$STATE_FILE" "$DEDUP_FILE" || fail_internal "Impossible d'initialiser les fichiers d'état"
json_get() {
local key="$1"
local line="$2"
printf '%s\n' "$line" \
| sed -n "s/.*\"${key}\":\"\([^\"]*\)\".*/\1/p" \
| head -n1
}
json_get_number() {
local key="$1"
local line="$2"
printf '%s\n' "$line" \
| sed -n "s/.*\"${key}\":\([0-9][0-9]*\).*/\1/p" \
| head -n1
}
get_last_offset() {
local offset
offset="$(cat "$STATE_FILE" 2>/dev/null || true)"
if [[ "$offset" =~ ^[0-9]+$ ]]; then
printf '%s\n' "$offset"
else
printf '0\n'
fi
}
set_last_offset() {
printf '%s\n' "$1" > "$STATE_FILE"
}
current_log_size() {
stat -c '%s' "$LOG_SOURCE" 2>/dev/null || printf '0\n'
}
cleanup_dedup_file() {
local now window tmp
now="$(date +%s)"
window="${ALERT_DEDUP_WINDOW:-3600}"
tmp="$(mktemp "${MONITORING_STATE_DIR}/alert-engine.dedup.XXXXXX")" || return 0
awk -F'|' -v now="$now" -v window="$window" '
NF >= 2 {
if ((now - $2) <= window) print $0
}
' "$DEDUP_FILE" > "$tmp" 2>/dev/null || true
mv -f "$tmp" "$DEDUP_FILE" 2>/dev/null || true
}
dedup_key() {
local host="$1"
local app="$2"
local level="$3"
local event="$4"
printf '%s|%s|%s|%s\n' "$host" "$app" "$level" "$event"
}
should_notify_dedup() {
local key="$1"
local now window found_ts
now="$(date +%s)"
window="${ALERT_DEDUP_WINDOW:-3600}"
found_ts="$(awk -F'|' -v k="$key" '
$1 "|" $3 "|" $4 "|" $5 == k {print $2}
' "$DEDUP_FILE" | tail -n1)"
if [[ "$found_ts" =~ ^[0-9]+$ ]]; then
if [ $((now - found_ts)) -lt "$window" ]; then
return 1
fi
fi
return 0
}
save_dedup_entry() {
local host="$1"
local app="$2"
local level="$3"
local event="$4"
local now
now="$(date +%s)"
printf '%s|%s|%s|%s|%s\n' "$host" "$now" "$app" "$level" "$event" >> "$DEDUP_FILE"
}
event_is_ignored() {
local event="$1" ignored
for ignored in ${ALERT_IGNORE_EVENTS:-}; do
[ "$ignored" = "$event" ] && return 0
done
return 1
}
channels_for_event() {
local level="$1"
local event="$2"
local varname value
varname="ALERT_RULE_${event}"
value="${!varname:-}"
if [ -n "$value" ]; then
printf '%s\n' "$value"
return 0
fi
case "$level" in
WARNING)
printf '%s\n' "${ALERT_DEFAULT_CHANNELS_WARNING:-ntfy}"
;;
ERROR)
printf '%s\n' "${ALERT_DEFAULT_CHANNELS_ERROR:-ntfy,mail}"
;;
CRITICAL)
printf '%s\n' "${ALERT_DEFAULT_CHANNELS_CRITICAL:-ntfy,mail}"
;;
*)
printf '\n'
;;
esac
}
tags_for_level() {
case "$1" in
WARNING) printf '%s\n' "${NTFY_TAGS_WARNING:-warning}" ;;
ERROR) printf '%s\n' "${NTFY_TAGS_ERROR:-warning,rotating_light}" ;;
CRITICAL) printf '%s\n' "${NTFY_TAGS_CRITICAL:-skull,warning}" ;;
*) printf '\n' ;;
esac
}
send_ntfy() {
local title="$1"
local body="$2"
local priority="$3"
[ "${ALERT_NTFY_ENABLED:-true}" = "true" ] || return 0
[ -n "${NTFY_SERVER:-}" ] || return 1
[ -n "${NTFY_TOPIC:-}" ] || return 1
local url="${NTFY_SERVER%/}/${NTFY_TOPIC}"
local curl_args=(
-fsS
-X POST
-H "Title: ${title}"
-H "Priority: ${priority}"
-H "Tags: warning"
-d "$body"
)
# topic protégé
if [ -n "${NTFY_TOKEN:-}" ]; then
curl_args+=(-H "Authorization: Bearer ${NTFY_TOKEN}")
fi
curl "${curl_args[@]}" "$url" >/dev/null
}
send_mail() {
local subject="$1"
local body="$2"
[ "${ALERT_MAIL_ENABLED:-true}" = "true" ] || return 0
[ -n "${DEST:-}" ] || return 1
[ -x "${ALERT_MAIL_BIN:-/usr/sbin/sendmail}" ] || return 1
{
printf 'To: %s\n' "${DEST}"
printf 'Subject: %s %s\n' "${ALERT_MAIL_SUBJECT_PREFIX:-[monitoring]}" "$subject"
printf 'Content-Type: text/plain; charset=UTF-8\n'
printf '\n'
printf '%s\n' "$body"
} | "${ALERT_MAIL_BIN:-/usr/sbin/sendmail}" -t
}
priority_for_level() {
case "$1" in
CRITICAL) printf 'urgent\n' ;;
ERROR) printf 'high\n' ;;
WARNING) printf 'default\n' ;;
*) printf 'default\n' ;;
esac
}
build_title() {
local host="$1"
local app="$2"
local level="$3"
local event="$4"
printf '%s [%s] %s %s\n' "$host" "$app" "$level" "$event"
}
build_body() {
local ts="$1"
local host="$2"
local app="$3"
local level="$4"
local event="$5"
local message="$6"
local line="$7"
cat <<EOF
Date: $ts
Hôte: $host
Script: $app
Niveau: $level
Événement: $event
Message:
$message
Ligne brute:
$line
EOF
}
process_line() {
local line="$1"
local ts host app level event message channels title body prio ch key
ts="$(json_get "ts" "$line")"
host="$(json_get "host" "$line")"
app="$(json_get "app" "$line")"
level="$(json_get "level" "$line")"
event="$(json_get "event" "$line")"
message="$(json_get "message" "$line")"
local tags
tags="$(tags_for_level "$level")"
[ -n "$level" ] || return 0
[ -n "$event" ] || return 0
case "$level" in
DEBUG|INFO|NOTICE)
return 0
;;
esac
if event_is_ignored "$event"; then
return 0
fi
key="$(dedup_key "$host" "$app" "$level" "$event")"
if ! should_notify_dedup "$key"; then
log_debug "alert_suppressed_dedup" "Alerte supprimée par déduplication" \
"event=$event" "level=$level" "host=$host" "app=$app"
return 0
fi
channels="$(channels_for_event "$level" "$event")"
[ -n "$channels" ] || return 0
title="$(build_title "$host" "$app" "$level" "$event")"
body="$(build_body "$ts" "$host" "$app" "$level" "$event" "$message" "$line")"
prio="$(priority_for_level "$level")"
IFS=',' read -r -a channel_array <<< "$channels"
for ch in "${channel_array[@]}"; do
case "$ch" in
ntfy)
if send_ntfy "$title" "$body" "$prio" "$tags"; then
log_info "alert_sent_ntfy" "Notification ntfy envoyée" \
"event=$event" "level=$level" "host=$host" "app=$app"
else
log_error "alert_ntfy_failed" "Échec d'envoi ntfy" \
"event=$event" "level=$level" "host=$host" "app=$app"
fi
;;
mail)
if send_mail "$title" "$body"; then
log_info "alert_sent_mail" "Mail d'alerte envoyé" \
"event=$event" "level=$level" "host=$host" "app=$app"
else
log_error "alert_mail_failed" "Échec d'envoi mail" \
"event=$event" "level=$level" "host=$host" "app=$app"
fi
;;
esac
done
save_dedup_entry "$host" "$app" "$level" "$event"
}
main() {
local last_offset log_size
last_offset="$(get_last_offset)"
log_size="$(current_log_size)"
if [ ! -f "$LOG_SOURCE" ]; then
log_notice "alert_log_missing" "Fichier de log absent, rien à traiter" "file=$LOG_SOURCE"
exit 0
fi
if [ "$last_offset" -gt "$log_size" ]; then
log_notice "alert_offset_reset" "Offset réinitialisé après rotation ou troncature du log" \
"old_offset=$last_offset" "new_offset=0"
last_offset=0
fi
cleanup_dedup_file
tail -c +$((last_offset + 1)) "$LOG_SOURCE" | while IFS= read -r line; do
[ -n "$line" ] || continue
process_line "$line"
done
set_last_offset "$log_size"
}
main
exit_with_status