v1.6.31 : analyse complète des logs + détection bots

- AccessLogParser : tous chemins/statuts pour IPs publiques (ipAllPaths, ipAllDays, ipAgents)
- Détection bots par patterns (data/bots.json, ~50 patterns initiaux)
- Section « Agents détectés » en bas de page admin/stats avec badge 🤖
- Panneau d'édition des patterns bots (formulaire avec CSRF)
- Drill-down IP : section « Autres chemins » (hors articles/livres)

Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>
This commit is contained in:
2026-05-19 21:33:47 +02:00
parent 68a44d19d1
commit d53b5da31a
6 changed files with 288 additions and 77 deletions
+80 -44
View File
@@ -30,7 +30,15 @@ class AccessLogParser
}
/**
* @return array{pages:array<string,int>,books:array<string,int>,ips:array<string,int>,pages_by_day:array<string,list<int>>,ips_by_day:array<string,list<int>>,ip_top_paths:array<string,array<string,array{n:int,ts:int}>>,ip_agents:array<string,list<string>>}
* @return array{
* pages:array<string,int>,
* books:array<string,int>,
* ips:array<string,int>,
* pages_by_day:array<string,list<int>>,
* ips_by_day:array<string,list<int>>,
* ip_top_paths:array<string,array<string,array{n:int,ts:int}>>,
* ip_agents:array<string,list<string>>
* }
*/
public function stats(): array
{
@@ -47,15 +55,16 @@ class AccessLogParser
$cutoff = strtotime("-{$this->days} days midnight") ?: (time() - $this->days * 86400);
$pages = [];
$books = [];
$ips = [];
$ips = []; // toutes requêtes publiques (tous chemins, tous statuts)
$dayPages = [];
$ipDays = []; // [ip => [dayOffset => count]]
$ipPaths = []; // [ip => [path => count]]
$ipPathTs = []; // [ip => [path => last_timestamp]]
$ipAgents = []; // [ip => [ua => count]]
$ipPaths = []; // chemins /post/ et /book/ avec statut 200 (pour les ts)
$ipPathTs = [];
$ipAllPaths = []; // tous chemins, tous statuts
$ipAllDays = []; // tous jours, tous statuts
$ipAgents = []; // tous user-agents par IP
foreach ($this->logFiles() as $file) {
$this->parseFile($file, $cutoff, $pages, $books, $ips, $dayPages, $ipDays, $ipPaths, $ipPathTs, $ipAgents);
$this->parseFile($file, $cutoff, $pages, $books, $ips, $dayPages, $ipPaths, $ipPathTs, $ipAllPaths, $ipAllDays, $ipAgents);
}
arsort($pages);
@@ -73,27 +82,30 @@ class AccessLogParser
$pagesByDay[$path] = $arr;
}
// Per-IP daily counts + top paths + top agents, limité aux 200 IPs les plus actives
// Top 200 IPs par volume total de requêtes
$topIpKeys = array_keys(array_slice($ips, 0, 200, true));
$ipsByDay = [];
$ipTopPaths = [];
$ipTopAgents = [];
foreach ($topIpKeys as $ip) {
// Sparkline : activité totale par jour
$arr = array_fill(0, $this->days, 0);
foreach ($ipDays[$ip] ?? [] as $offset => $count) {
foreach ($ipAllDays[$ip] ?? [] as $offset => $count) {
if ($offset >= 0 && $offset < $this->days) {
$arr[$offset] = $count;
}
}
$ipsByDay[$ip] = $arr;
$paths = $ipPaths[$ip] ?? [];
arsort($paths);
// Top 20 chemins tous types confondus
$allPaths = $ipAllPaths[$ip] ?? [];
arsort($allPaths);
$ipTopPaths[$ip] = [];
foreach (array_slice($paths, 0, 10, true) as $p => $cnt) {
foreach (array_slice($allPaths, 0, 20, true) as $p => $cnt) {
$ipTopPaths[$ip][$p] = ['n' => $cnt, 'ts' => $ipPathTs[$ip][$p] ?? 0];
}
// Top 5 user-agents
$agents = $ipAgents[$ip] ?? [];
arsort($agents);
$ipTopAgents[$ip] = array_keys(array_slice($agents, 0, 5, true));
@@ -163,16 +175,24 @@ class AccessLogParser
return (int) strtotime("{$m[1]} {$m[2]} {$m[3]} {$m[4]} {$m[5]}");
}
private function parseLine(string $line, int $cutoff, array &$pages, array &$books, array &$ips, array &$dayPages, array &$ipDays, array &$ipPaths, array &$ipPathTs, array &$ipAgents): void
{
private function parseLine(
string $line,
int $cutoff,
array &$pages,
array &$books,
array &$ips,
array &$dayPages,
array &$ipPaths,
array &$ipPathTs,
array &$ipAllPaths,
array &$ipAllDays,
array &$ipAgents
): void {
if (!preg_match(self::RE, $line, $m)) {
return;
}
[, $ip, $ts, $path, $status, $ua] = $m;
if ($status !== '200') {
return;
}
$tsVal = self::parseTimestamp($ts);
if ($tsVal < $cutoff) {
return;
@@ -181,38 +201,54 @@ class AccessLogParser
$publicIp = filter_var($ip, FILTER_VALIDATE_IP, FILTER_FLAG_NO_PRIV_RANGE | FILTER_FLAG_NO_RES_RANGE) !== false;
$dayOffset = (int) floor(($tsVal - $cutoff) / 86400);
if (str_starts_with($path, '/post/') && strlen($path) > 6) {
$pages[$path] = ($pages[$path] ?? 0) + 1;
if ($publicIp) {
$ips[$ip] = ($ips[$ip] ?? 0) + 1;
}
$dayPages[$path][$dayOffset] = ($dayPages[$path][$dayOffset] ?? 0) + 1;
$ipDays[$ip][$dayOffset] = ($ipDays[$ip][$dayOffset] ?? 0) + 1;
$ipPaths[$ip][$path] = ($ipPaths[$ip][$path] ?? 0) + 1;
if ($tsVal > ($ipPathTs[$ip][$path] ?? 0)) {
$ipPathTs[$ip][$path] = $tsVal;
}
// Toutes les requêtes publiques : comptage global, chemins, jours, agents
if ($publicIp) {
$ips[$ip] = ($ips[$ip] ?? 0) + 1;
$ipAllPaths[$ip][$path] = ($ipAllPaths[$ip][$path] ?? 0) + 1;
$ipAllDays[$ip][$dayOffset] = ($ipAllDays[$ip][$dayOffset] ?? 0) + 1;
if ($ua !== '') {
$ipAgents[$ip][$ua] = ($ipAgents[$ip][$ua] ?? 0) + 1;
$ipAgents[$ip][$ua] = ($ipAgents[$ip][$ua] ?? 0) + 1;
}
} elseif (str_starts_with($path, '/book/') && strlen($path) > 6) {
}
// Comptage spécifique aux pages de contenu (statut 200, /post/ ou /book/)
if ($status !== '200') {
return;
}
if (str_starts_with($path, '/post/') && strlen($path) > 6) {
$pages[$path] = ($pages[$path] ?? 0) + 1;
$dayPages[$path][$dayOffset] = ($dayPages[$path][$dayOffset] ?? 0) + 1;
if ($publicIp) {
$ipPaths[$ip][$path] = ($ipPaths[$ip][$path] ?? 0) + 1;
if ($tsVal > ($ipPathTs[$ip][$path] ?? 0)) {
$ipPathTs[$ip][$path] = $tsVal;
}
}
} elseif (str_ends_with($path, '/') === false && str_starts_with($path, '/book/') && strlen($path) > 6) {
$books[$path] = ($books[$path] ?? 0) + 1;
if ($publicIp) {
$ips[$ip] = ($ips[$ip] ?? 0) + 1;
}
$ipDays[$ip][$dayOffset] = ($ipDays[$ip][$dayOffset] ?? 0) + 1;
$ipPaths[$ip][$path] = ($ipPaths[$ip][$path] ?? 0) + 1;
if ($tsVal > ($ipPathTs[$ip][$path] ?? 0)) {
$ipPathTs[$ip][$path] = $tsVal;
}
if ($ua !== '') {
$ipAgents[$ip][$ua] = ($ipAgents[$ip][$ua] ?? 0) + 1;
$ipPaths[$ip][$path] = ($ipPaths[$ip][$path] ?? 0) + 1;
if ($tsVal > ($ipPathTs[$ip][$path] ?? 0)) {
$ipPathTs[$ip][$path] = $tsVal;
}
}
}
}
private function parseFile(array $file, int $cutoff, array &$pages, array &$books, array &$ips, array &$dayPages, array &$ipDays, array &$ipPaths, array &$ipPathTs, array &$ipAgents): void
{
private function parseFile(
array $file,
int $cutoff,
array &$pages,
array &$books,
array &$ips,
array &$dayPages,
array &$ipPaths,
array &$ipPathTs,
array &$ipAllPaths,
array &$ipAllDays,
array &$ipAgents
): void {
if ($file['type'] === 'tgz') {
try {
$phar = new PharData($file['path']);
@@ -222,7 +258,7 @@ class AccessLogParser
continue;
}
foreach (explode("\n", $content) as $line) {
$this->parseLine($line, $cutoff, $pages, $books, $ips, $dayPages, $ipDays, $ipPaths, $ipPathTs, $ipAgents);
$this->parseLine($line, $cutoff, $pages, $books, $ips, $dayPages, $ipPaths, $ipPathTs, $ipAllPaths, $ipAllDays, $ipAgents);
}
}
} catch (\Exception $e) {
@@ -235,7 +271,7 @@ class AccessLogParser
while (!gzeof($h)) {
$line = gzgets($h, 8192);
if ($line !== false) {
$this->parseLine($line, $cutoff, $pages, $books, $ips, $dayPages, $ipDays, $ipPaths, $ipPathTs, $ipAgents);
$this->parseLine($line, $cutoff, $pages, $books, $ips, $dayPages, $ipPaths, $ipPathTs, $ipAllPaths, $ipAllDays, $ipAgents);
}
}
gzclose($h);
@@ -245,7 +281,7 @@ class AccessLogParser
return;
}
while (($line = fgets($h)) !== false) {
$this->parseLine($line, $cutoff, $pages, $books, $ips, $dayPages, $ipDays, $ipPaths, $ipPathTs, $ipAgents);
$this->parseLine($line, $cutoff, $pages, $books, $ips, $dayPages, $ipPaths, $ipPathTs, $ipAllPaths, $ipAllDays, $ipAgents);
}
fclose($h);
}