From d53b5da31a810dd118a8a2c48032b1dbd8c39061 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?C=C3=A9drix?= Date: Tue, 19 May 2026 21:33:47 +0200 Subject: [PATCH] =?UTF-8?q?v1.6.31=20:=20analyse=20compl=C3=A8te=20des=20l?= =?UTF-8?q?ogs=20+=20d=C3=A9tection=20bots?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit - AccessLogParser : tous chemins/statuts pour IPs publiques (ipAllPaths, ipAllDays, ipAgents) - Détection bots par patterns (data/bots.json, ~50 patterns initiaux) - Section « Agents détectés » en bas de page admin/stats avec badge 🤖 - Panneau d'édition des patterns bots (formulaire avec CSRF) - Drill-down IP : section « Autres chemins » (hors articles/livres) Co-Authored-By: Claude Sonnet 4.6 --- CHANGELOG.md | 11 +++ public/assets/js/admin-stats.js | 133 +++++++++++++++++++++++++++----- public/index.php | 45 +++++++++++ public/version.txt | 2 +- src/AccessLogParser.php | 124 ++++++++++++++++++----------- templates/admin_stats.php | 50 +++++++++--- 6 files changed, 288 insertions(+), 77 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index d8336c2..6ea23b0 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -5,6 +5,17 @@ Format : [Keep a Changelog](https://keepachangelog.com/fr/1.0.0/) — versionnag --- +## [1.6.31] - 2026-05-19 + +### Ajouté +- Admin stats : section « Agents détectés » en bas de page — agrège tous les user agents, détecte bots/humains, badge 🤖 pour les bots connus +- Admin stats : panneau d'édition des patterns bots (un par ligne, correspondance insensible à la casse), sauvegardé dans `data/bots.json` +- Admin stats / drill-down IP : section « Autres chemins » (tous chemins/statuts hors articles et livres), triée par volume +- AccessLogParser : analyse tous les chemins et statuts pour les IPs publiques (pas seulement /post/ et /book/ en 200), tracking `ipAllPaths`, `ipAllDays`, `ipAgents` +- `index.php` : action `admin_save_bots` — enregistre les patterns bots avec token CSRF ; initialisation automatique de `data/bots.json` avec ~50 patterns connus (Googlebot, GPTBot, curl, Scrapy…) + +--- + ## [1.6.30] - 2026-05-19 ### Ajouté diff --git a/public/assets/js/admin-stats.js b/public/assets/js/admin-stats.js index a27c4a3..7d7e3ec 100644 --- a/public/assets/js/admin-stats.js +++ b/public/assets/js/admin-stats.js @@ -1,4 +1,4 @@ -/* Admin stats : graphiques, sparklines, accordéon pays/AS/IP */ +/* Admin stats : graphiques, sparklines, accordéon pays/AS/IP, agents */ function esc(s) { return String(s).replace(/&/g, '&').replace(//g, '>').replace(/"/g, '"'); @@ -8,6 +8,20 @@ function trunc(s, n) { return s.length > n ? s.slice(0, n) + '…' : s; } +// Détection de bot par correspondance partielle insensible à la casse +var _botPatterns = (typeof FOLIO_BOT_PATTERNS !== 'undefined') ? FOLIO_BOT_PATTERNS : []; +function isBot(ua) { + if (!ua) { return false; } + var lo = ua.toLowerCase(); + for (var i = 0; i < _botPatterns.length; i++) { + if (lo.indexOf(_botPatterns[i].toLowerCase()) !== -1) { return true; } + } + return false; +} +function botBadge(ua) { + return isBot(ua) ? '🤖 ' : ''; +} + // ── Visiteurs par pays ──────────────────────────────────────────────────────── (function () { var el = document.getElementById('stats-country-container'); @@ -27,7 +41,7 @@ function trunc(s, n) { return String.fromCodePoint(cp[0], cp[1]) + ' '; } - // Index IPs par ASN pour le drill-down + // Index IPs par ASN var ipsByAsn = {}; Object.keys(ipData).forEach(function (ip) { var d = ipData[ip]; @@ -39,7 +53,6 @@ function trunc(s, n) { ipsByAsn[k].sort(function (a, b) { return b.hits - a.hits; }); }); - // Mini sparkline (80x20px polyline) pour chaque IP function ipSparkline(daily) { if (!daily || !daily.length) { return ''; } var W = 80, H = 20, padX = 1, padY = 2; @@ -89,36 +102,44 @@ function trunc(s, n) { var asnKey = n.asn || '__unknown__'; var ips = ipsByAsn[asnKey] || []; - // Lignes IP : adresse + agents à gauche, sparkline, chemins, hits var ipRows = ips.slice(0, 20).map(function (ipInfo) { - // Agents sous l'IP + // Agents sous l'IP avec badge bot var agentsHtml = ''; (ipInfo.agents || []).forEach(function (ua) { agentsHtml += '
' - + esc(trunc(ua, 55)) + '
'; + + botBadge(ua) + esc(trunc(ua, 55)) + ''; }); - // Chemins triés par date desc - var articles = [], books = []; + // Chemins triés : /post/ et /book/ avec ts, reste sans ts + var postBook = [], other = []; Object.keys(ipInfo.paths || {}).forEach(function (path) { var p = ipInfo.paths[path]; var cnt = (p && typeof p === 'object') ? p.n : p; var ts = (p && typeof p === 'object') ? p.ts : 0; - if (path.indexOf('/post/') === 0) { articles.push({ path: path, cnt: cnt, ts: ts }); } - else if (path.indexOf('/book/') === 0) { books.push({ path: path, cnt: cnt, ts: ts }); } + if (ts > 0) { postBook.push({ path: path, cnt: cnt, ts: ts }); } + else { other.push({ path: path, cnt: cnt }); } }); - articles.sort(function (a, b) { return b.ts - a.ts; }); - books.sort(function (a, b) { return b.ts - a.ts; }); + postBook.sort(function (a, b) { return b.ts - a.ts; }); + other.sort(function (a, b) { return b.cnt - a.cnt; }); function pathLine(p, prefix) { - var slug = decodeURIComponent(p.path.replace(prefix, '')); + var raw = p.path.replace(prefix, ''); + var slug = ''; + try { slug = decodeURIComponent(raw); } catch (e) { slug = raw; } return '
' + '' - + esc(trunc(slug, 40)) + '' + + esc(trunc(slug || p.path, 40)) + '' + + ' (' + p.cnt + ')
'; + } + function otherLine(p) { + return '
' + + '' + esc(trunc(p.path, 44)) + '' + ' (' + p.cnt + ')
'; } var pathsHtml = ''; + var articles = postBook.filter(function (p) { return p.path.indexOf('/post/') === 0; }); + var books = postBook.filter(function (p) { return p.path.indexOf('/book/') === 0; }); if (articles.length) { pathsHtml += '
Articles
' + articles.map(function (p) { return pathLine(p, '/post/'); }).join(''); @@ -127,6 +148,10 @@ function trunc(s, n) { pathsHtml += '
Livres
' + books.map(function (p) { return pathLine(p, '/book/'); }).join(''); } + if (other.length) { + pathsHtml += '
Autres chemins
' + + other.map(otherLine).join(''); + } if (!pathsHtml) { pathsHtml = ''; } return '
' @@ -141,9 +166,9 @@ function trunc(s, n) { + '
'; }).join(''); - var hasIps = ips.length > 0; - var toggleAttrs = hasIps ? ' data-bs-toggle="collapse" data-bs-target="#' + asId + '" role="button"' : ''; - var chevron = hasIps ? '' : ''; + var hasIps = ips.length > 0; + var toggleAttrs = hasIps ? ' data-bs-toggle="collapse" data-bs-target="#' + asId + '" role="button"' : ''; + var chevron = hasIps ? '' : ''; return '
' + '
' @@ -184,6 +209,76 @@ function trunc(s, n) { el.innerHTML = html; }()); +// ── Liste consolidée de tous les agents ────────────────────────────────────── +(function () { + var el = document.getElementById('stats-agents-container'); + var badge = document.getElementById('agents-count'); + var ipData = (typeof FOLIO_IP_DATA !== 'undefined') ? FOLIO_IP_DATA : {}; + if (!el) { return; } + + // Agréger toutes les UAs depuis FOLIO_IP_DATA + var uaCounts = {}; + Object.keys(ipData).forEach(function (ip) { + (ipData[ip].agents || []).forEach(function (ua) { + uaCounts[ua] = (uaCounts[ua] || 0) + (ipData[ip].hits || 0); + }); + }); + + var agents = Object.keys(uaCounts).map(function (ua) { + return { ua: ua, hits: uaCounts[ua], bot: isBot(ua) }; + }).sort(function (a, b) { + // Bots d'abord, puis par hits desc + if (a.bot !== b.bot) { return a.bot ? -1 : 1; } + return b.hits - a.hits; + }); + + if (!agents.length) { + el.innerHTML = '

Aucun agent détecté.

'; + return; + } + + var bots = agents.filter(function (a) { return a.bot; }); + var unknown = agents.filter(function (a) { return !a.bot; }); + if (badge) { badge.textContent = '— ' + bots.length + ' bot(s) détecté(s) sur ' + agents.length; } + + function agentRow(a) { + return '' + + '' + + (a.bot ? '🤖' : '?') + '' + + '' + + '' + esc(a.ua) + '' + + '' + + a.hits.toLocaleString('fr-FR') + '' + + ''; + } + + var botsHtml = bots.map(agentRow).join(''); + var unknownHtml = unknown.map(agentRow).join(''); + + var html = '
' + + '' + + '' + + '' + + '' + + '' + + '' + + ''; + + if (botsHtml) { + html += '' + + botsHtml; + } + if (unknownHtml) { + html += '' + + unknownHtml; + } + + html += '
User-AgentReq.
' + + 'Bots connus (' + bots.length + ')
' + + 'Agents non classés (' + unknown.length + ')
'; + el.innerHTML = html; +}()); + // ── Pages les plus visitées (RSS XML + sparklines) ─────────────────────────── (function () { var container = document.getElementById('stats-pages-container'); @@ -303,9 +398,7 @@ function trunc(s, n) { + '' + '' - + dots - + yLabels - + xLabels + + dots + yLabels + xLabels + ''; } diff --git a/public/index.php b/public/index.php index 20a0b73..d469912 100644 --- a/public/index.php +++ b/public/index.php @@ -2768,6 +2768,37 @@ switch ($action) { $adminData['as_groups'] = asGroups(); $adminData['stats_pages_by_day'] = $statsRaw['pages_by_day'] ?? []; $adminData['stats_ip_data'] = $statsRaw['ip_data'] ?? []; + + // Patterns de bots — initialisation si absent + $botsFile = DATA_PATH . '/bots.json'; + if (!file_exists($botsFile)) { + $defaultBots = [ + 'Googlebot','Googlebot-Image','Google-InspectionTool','Google-Extended', + 'bingbot','BingPreview','msnbot', + 'DuckDuckBot','DuckDuckGo-Favicons-Bot', + 'Baiduspider','YandexBot','YandexImages','YandexMetrika', + 'Applebot', + 'facebookexternalhit','facebot', + 'Twitterbot','LinkedInBot','Slackbot','TelegramBot','WhatsApp','Discordbot', + 'PetalBot','Bytespider','SogouSpider','SeznamBot','Exabot', + 'AhrefsBot','SemrushBot','MJ12bot','DotBot','rogerbot','BLEXBot','DataForSeoBot', + 'Screaming Frog SEO Spider', + 'ClaudeBot','GPTBot','Google-Extended','PerplexityBot','cohere-ai','anthropic-ai', + 'meta-externalagent','OAI-SearchBot','Amazonbot', + 'CCBot','ia_archiver','archive.org_bot', + 'NetcraftSurveyAgent', + 'python-requests','python-urllib','Python/', + 'curl/','wget/','Wget/', + 'Go-http-client/1','Java/','Apache-HttpClient','okhttp/', + 'Scrapy','HeadlessChrome','PhantomJS','Puppeteer','Playwright','Selenium', + 'UptimeRobot','Pingdom','StatusCake','Site24x7','GTmetrix', + 'Chrome-Lighthouse','PageSpeed','Zabbix','check_http', + 'libwww-perl','GuzzleHttp','masscan','zgrab','nuclei', + ]; + @mkdir(dirname($botsFile), 0755, true); + @file_put_contents($botsFile, json_encode($defaultBots, JSON_UNESCAPED_UNICODE | JSON_PRETTY_PRINT)); + } + $adminData['bot_patterns'] = json_decode((string) file_get_contents($botsFile), true) ?: []; } if ($tab === 'categories') { @@ -3206,6 +3237,20 @@ switch ($action) { header('Location: /admin/stats?' . ($ok ? 'saved=1' : 'error=write')); exit; + case 'admin_save_bots': + requireAuth(); + if (!isAdmin() || $_SERVER['REQUEST_METHOD'] !== 'POST') { + http_response_code(403); + exit; + } + $botsFile = DATA_PATH . '/bots.json'; + $patterns = array_values(array_unique(array_filter( + array_map('trim', explode("\n", (string) ($_POST['bot_patterns'] ?? ''))) + ))); + $ok = @file_put_contents($botsFile, json_encode($patterns, JSON_UNESCAPED_UNICODE | JSON_PRETTY_PRINT)) !== false; + header('Location: /admin/stats?' . ($ok ? 'saved=1' : 'error=write')); + exit; + case 'admin_create_role': requireAuth(); if (!isAdmin() || $_SERVER['REQUEST_METHOD'] !== 'POST') { diff --git a/public/version.txt b/public/version.txt index 14781be..599e1a1 100644 --- a/public/version.txt +++ b/public/version.txt @@ -1 +1 @@ -1.6.30 +1.6.31 diff --git a/src/AccessLogParser.php b/src/AccessLogParser.php index 4aaf572..c140236 100644 --- a/src/AccessLogParser.php +++ b/src/AccessLogParser.php @@ -30,7 +30,15 @@ class AccessLogParser } /** - * @return array{pages:array,books:array,ips:array,pages_by_day:array>,ips_by_day:array>,ip_top_paths:array>,ip_agents:array>} + * @return array{ + * pages:array, + * books:array, + * ips:array, + * pages_by_day:array>, + * ips_by_day:array>, + * ip_top_paths:array>, + * ip_agents:array> + * } */ public function stats(): array { @@ -47,15 +55,16 @@ class AccessLogParser $cutoff = strtotime("-{$this->days} days midnight") ?: (time() - $this->days * 86400); $pages = []; $books = []; - $ips = []; + $ips = []; // toutes requêtes publiques (tous chemins, tous statuts) $dayPages = []; - $ipDays = []; // [ip => [dayOffset => count]] - $ipPaths = []; // [ip => [path => count]] - $ipPathTs = []; // [ip => [path => last_timestamp]] - $ipAgents = []; // [ip => [ua => count]] + $ipPaths = []; // chemins /post/ et /book/ avec statut 200 (pour les ts) + $ipPathTs = []; + $ipAllPaths = []; // tous chemins, tous statuts + $ipAllDays = []; // tous jours, tous statuts + $ipAgents = []; // tous user-agents par IP foreach ($this->logFiles() as $file) { - $this->parseFile($file, $cutoff, $pages, $books, $ips, $dayPages, $ipDays, $ipPaths, $ipPathTs, $ipAgents); + $this->parseFile($file, $cutoff, $pages, $books, $ips, $dayPages, $ipPaths, $ipPathTs, $ipAllPaths, $ipAllDays, $ipAgents); } arsort($pages); @@ -73,27 +82,30 @@ class AccessLogParser $pagesByDay[$path] = $arr; } - // Per-IP daily counts + top paths + top agents, limité aux 200 IPs les plus actives + // Top 200 IPs par volume total de requêtes $topIpKeys = array_keys(array_slice($ips, 0, 200, true)); $ipsByDay = []; $ipTopPaths = []; $ipTopAgents = []; foreach ($topIpKeys as $ip) { + // Sparkline : activité totale par jour $arr = array_fill(0, $this->days, 0); - foreach ($ipDays[$ip] ?? [] as $offset => $count) { + foreach ($ipAllDays[$ip] ?? [] as $offset => $count) { if ($offset >= 0 && $offset < $this->days) { $arr[$offset] = $count; } } $ipsByDay[$ip] = $arr; - $paths = $ipPaths[$ip] ?? []; - arsort($paths); + // Top 20 chemins tous types confondus + $allPaths = $ipAllPaths[$ip] ?? []; + arsort($allPaths); $ipTopPaths[$ip] = []; - foreach (array_slice($paths, 0, 10, true) as $p => $cnt) { + foreach (array_slice($allPaths, 0, 20, true) as $p => $cnt) { $ipTopPaths[$ip][$p] = ['n' => $cnt, 'ts' => $ipPathTs[$ip][$p] ?? 0]; } + // Top 5 user-agents $agents = $ipAgents[$ip] ?? []; arsort($agents); $ipTopAgents[$ip] = array_keys(array_slice($agents, 0, 5, true)); @@ -163,16 +175,24 @@ class AccessLogParser return (int) strtotime("{$m[1]} {$m[2]} {$m[3]} {$m[4]} {$m[5]}"); } - private function parseLine(string $line, int $cutoff, array &$pages, array &$books, array &$ips, array &$dayPages, array &$ipDays, array &$ipPaths, array &$ipPathTs, array &$ipAgents): void - { + private function parseLine( + string $line, + int $cutoff, + array &$pages, + array &$books, + array &$ips, + array &$dayPages, + array &$ipPaths, + array &$ipPathTs, + array &$ipAllPaths, + array &$ipAllDays, + array &$ipAgents + ): void { if (!preg_match(self::RE, $line, $m)) { return; } [, $ip, $ts, $path, $status, $ua] = $m; - if ($status !== '200') { - return; - } $tsVal = self::parseTimestamp($ts); if ($tsVal < $cutoff) { return; @@ -181,38 +201,54 @@ class AccessLogParser $publicIp = filter_var($ip, FILTER_VALIDATE_IP, FILTER_FLAG_NO_PRIV_RANGE | FILTER_FLAG_NO_RES_RANGE) !== false; $dayOffset = (int) floor(($tsVal - $cutoff) / 86400); - if (str_starts_with($path, '/post/') && strlen($path) > 6) { - $pages[$path] = ($pages[$path] ?? 0) + 1; - if ($publicIp) { - $ips[$ip] = ($ips[$ip] ?? 0) + 1; - } - $dayPages[$path][$dayOffset] = ($dayPages[$path][$dayOffset] ?? 0) + 1; - $ipDays[$ip][$dayOffset] = ($ipDays[$ip][$dayOffset] ?? 0) + 1; - $ipPaths[$ip][$path] = ($ipPaths[$ip][$path] ?? 0) + 1; - if ($tsVal > ($ipPathTs[$ip][$path] ?? 0)) { - $ipPathTs[$ip][$path] = $tsVal; - } + // Toutes les requêtes publiques : comptage global, chemins, jours, agents + if ($publicIp) { + $ips[$ip] = ($ips[$ip] ?? 0) + 1; + $ipAllPaths[$ip][$path] = ($ipAllPaths[$ip][$path] ?? 0) + 1; + $ipAllDays[$ip][$dayOffset] = ($ipAllDays[$ip][$dayOffset] ?? 0) + 1; if ($ua !== '') { - $ipAgents[$ip][$ua] = ($ipAgents[$ip][$ua] ?? 0) + 1; + $ipAgents[$ip][$ua] = ($ipAgents[$ip][$ua] ?? 0) + 1; } - } elseif (str_starts_with($path, '/book/') && strlen($path) > 6) { + } + + // Comptage spécifique aux pages de contenu (statut 200, /post/ ou /book/) + if ($status !== '200') { + return; + } + + if (str_starts_with($path, '/post/') && strlen($path) > 6) { + $pages[$path] = ($pages[$path] ?? 0) + 1; + $dayPages[$path][$dayOffset] = ($dayPages[$path][$dayOffset] ?? 0) + 1; + if ($publicIp) { + $ipPaths[$ip][$path] = ($ipPaths[$ip][$path] ?? 0) + 1; + if ($tsVal > ($ipPathTs[$ip][$path] ?? 0)) { + $ipPathTs[$ip][$path] = $tsVal; + } + } + } elseif (str_ends_with($path, '/') === false && str_starts_with($path, '/book/') && strlen($path) > 6) { $books[$path] = ($books[$path] ?? 0) + 1; if ($publicIp) { - $ips[$ip] = ($ips[$ip] ?? 0) + 1; - } - $ipDays[$ip][$dayOffset] = ($ipDays[$ip][$dayOffset] ?? 0) + 1; - $ipPaths[$ip][$path] = ($ipPaths[$ip][$path] ?? 0) + 1; - if ($tsVal > ($ipPathTs[$ip][$path] ?? 0)) { - $ipPathTs[$ip][$path] = $tsVal; - } - if ($ua !== '') { - $ipAgents[$ip][$ua] = ($ipAgents[$ip][$ua] ?? 0) + 1; + $ipPaths[$ip][$path] = ($ipPaths[$ip][$path] ?? 0) + 1; + if ($tsVal > ($ipPathTs[$ip][$path] ?? 0)) { + $ipPathTs[$ip][$path] = $tsVal; + } } } } - private function parseFile(array $file, int $cutoff, array &$pages, array &$books, array &$ips, array &$dayPages, array &$ipDays, array &$ipPaths, array &$ipPathTs, array &$ipAgents): void - { + private function parseFile( + array $file, + int $cutoff, + array &$pages, + array &$books, + array &$ips, + array &$dayPages, + array &$ipPaths, + array &$ipPathTs, + array &$ipAllPaths, + array &$ipAllDays, + array &$ipAgents + ): void { if ($file['type'] === 'tgz') { try { $phar = new PharData($file['path']); @@ -222,7 +258,7 @@ class AccessLogParser continue; } foreach (explode("\n", $content) as $line) { - $this->parseLine($line, $cutoff, $pages, $books, $ips, $dayPages, $ipDays, $ipPaths, $ipPathTs, $ipAgents); + $this->parseLine($line, $cutoff, $pages, $books, $ips, $dayPages, $ipPaths, $ipPathTs, $ipAllPaths, $ipAllDays, $ipAgents); } } } catch (\Exception $e) { @@ -235,7 +271,7 @@ class AccessLogParser while (!gzeof($h)) { $line = gzgets($h, 8192); if ($line !== false) { - $this->parseLine($line, $cutoff, $pages, $books, $ips, $dayPages, $ipDays, $ipPaths, $ipPathTs, $ipAgents); + $this->parseLine($line, $cutoff, $pages, $books, $ips, $dayPages, $ipPaths, $ipPathTs, $ipAllPaths, $ipAllDays, $ipAgents); } } gzclose($h); @@ -245,7 +281,7 @@ class AccessLogParser return; } while (($line = fgets($h)) !== false) { - $this->parseLine($line, $cutoff, $pages, $books, $ips, $dayPages, $ipDays, $ipPaths, $ipPathTs, $ipAgents); + $this->parseLine($line, $cutoff, $pages, $books, $ips, $dayPages, $ipPaths, $ipPathTs, $ipAllPaths, $ipAllDays, $ipAgents); } fclose($h); } diff --git a/templates/admin_stats.php b/templates/admin_stats.php index 42ad2fa..36e33f9 100644 --- a/templates/admin_stats.php +++ b/templates/admin_stats.php @@ -1,11 +1,12 @@ @@ -21,12 +22,13 @@ $_ipData = $adminData['stats_ip_data'] ?? [];
-

14 derniers jours · visiteurs uniques · flux RSS XML

+

14 derniers jours · tous les chemins · flux RSS XML

@@ -49,7 +51,6 @@ var FOLIO_IP_DATA =
-
@@ -65,7 +66,7 @@ var FOLIO_IP_DATA = $hits): $rankB++; @@ -94,10 +95,35 @@ var FOLIO_IP_DATA =
-
+ +
+
+ Agents détectés + +
+
+

Chargement…

+
+ + +
+ +
+
+ -