v1.6.30 : agents utilisateur dans le drill-down IP

- Drill-down IP : user agents affichés sous l'adresse IP, top 5 par fréquence
- AccessLogParser : regex COMBINED étendue pour capturer le UA (groupe 5)
- Tracking ipAgents [ip => [ua => count]], ip_agents dans le résultat de stats()

Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>
This commit is contained in:
2026-05-19 20:16:20 +02:00
parent e3d7e433e0
commit 68a44d19d1
5 changed files with 56 additions and 22 deletions
+26 -13
View File
@@ -13,7 +13,7 @@ class AccessLogParser
private static ?array $memo = null;
// Apache COMBINED : IP - - [timestamp] "METHOD /path HTTP/x" STATUS bytes "ref" "ua"
private const RE = '/^(\S+) \S+ \S+ \[(\d{2}\/\w+\/\d{4}:\d{2}:\d{2}:\d{2} [+-]\d{4})\] "[A-Z-]+ ([^\s"?]+)[^"]*" (\d{3}) /';
private const RE = '/^(\S+) \S+ \S+ \[(\d{2}\/\w+\/\d{4}:\d{2}:\d{2}:\d{2} [+-]\d{4})\] "[A-Z-]+ ([^\s"?]+)[^"]*" (\d{3}) \S+ "[^"]*" "([^"]*)"/u';
public function __construct(
string $logDir = '/var/log/apache2',
@@ -30,7 +30,7 @@ class AccessLogParser
}
/**
* @return array{pages:array<string,int>,books:array<string,int>,ips:array<string,int>,pages_by_day:array<string,list<int>>,ips_by_day:array<string,list<int>>,ip_top_paths:array<string,array<string,array{n:int,ts:int}>>}
* @return array{pages:array<string,int>,books:array<string,int>,ips:array<string,int>,pages_by_day:array<string,list<int>>,ips_by_day:array<string,list<int>>,ip_top_paths:array<string,array<string,array{n:int,ts:int}>>,ip_agents:array<string,list<string>>}
*/
public function stats(): array
{
@@ -52,9 +52,10 @@ class AccessLogParser
$ipDays = []; // [ip => [dayOffset => count]]
$ipPaths = []; // [ip => [path => count]]
$ipPathTs = []; // [ip => [path => last_timestamp]]
$ipAgents = []; // [ip => [ua => count]]
foreach ($this->logFiles() as $file) {
$this->parseFile($file, $cutoff, $pages, $books, $ips, $dayPages, $ipDays, $ipPaths, $ipPathTs);
$this->parseFile($file, $cutoff, $pages, $books, $ips, $dayPages, $ipDays, $ipPaths, $ipPathTs, $ipAgents);
}
arsort($pages);
@@ -72,10 +73,11 @@ class AccessLogParser
$pagesByDay[$path] = $arr;
}
// Per-IP daily counts + top paths, limité aux 200 IPs les plus actives
$topIpKeys = array_keys(array_slice($ips, 0, 200, true));
$ipsByDay = [];
$ipTopPaths = [];
// Per-IP daily counts + top paths + top agents, limité aux 200 IPs les plus actives
$topIpKeys = array_keys(array_slice($ips, 0, 200, true));
$ipsByDay = [];
$ipTopPaths = [];
$ipTopAgents = [];
foreach ($topIpKeys as $ip) {
$arr = array_fill(0, $this->days, 0);
foreach ($ipDays[$ip] ?? [] as $offset => $count) {
@@ -91,6 +93,10 @@ class AccessLogParser
foreach (array_slice($paths, 0, 10, true) as $p => $cnt) {
$ipTopPaths[$ip][$p] = ['n' => $cnt, 'ts' => $ipPathTs[$ip][$p] ?? 0];
}
$agents = $ipAgents[$ip] ?? [];
arsort($agents);
$ipTopAgents[$ip] = array_keys(array_slice($agents, 0, 5, true));
}
$result = [
@@ -100,6 +106,7 @@ class AccessLogParser
'pages_by_day' => $pagesByDay,
'ips_by_day' => $ipsByDay,
'ip_top_paths' => $ipTopPaths,
'ip_agents' => $ipTopAgents,
];
@mkdir(dirname($this->cacheFile), 0755, true);
@file_put_contents($this->cacheFile, json_encode($result, JSON_UNESCAPED_UNICODE | JSON_UNESCAPED_SLASHES));
@@ -156,12 +163,12 @@ class AccessLogParser
return (int) strtotime("{$m[1]} {$m[2]} {$m[3]} {$m[4]} {$m[5]}");
}
private function parseLine(string $line, int $cutoff, array &$pages, array &$books, array &$ips, array &$dayPages, array &$ipDays, array &$ipPaths, array &$ipPathTs): void
private function parseLine(string $line, int $cutoff, array &$pages, array &$books, array &$ips, array &$dayPages, array &$ipDays, array &$ipPaths, array &$ipPathTs, array &$ipAgents): void
{
if (!preg_match(self::RE, $line, $m)) {
return;
}
[, $ip, $ts, $path, $status] = $m;
[, $ip, $ts, $path, $status, $ua] = $m;
if ($status !== '200') {
return;
@@ -185,6 +192,9 @@ class AccessLogParser
if ($tsVal > ($ipPathTs[$ip][$path] ?? 0)) {
$ipPathTs[$ip][$path] = $tsVal;
}
if ($ua !== '') {
$ipAgents[$ip][$ua] = ($ipAgents[$ip][$ua] ?? 0) + 1;
}
} elseif (str_starts_with($path, '/book/') && strlen($path) > 6) {
$books[$path] = ($books[$path] ?? 0) + 1;
if ($publicIp) {
@@ -195,10 +205,13 @@ class AccessLogParser
if ($tsVal > ($ipPathTs[$ip][$path] ?? 0)) {
$ipPathTs[$ip][$path] = $tsVal;
}
if ($ua !== '') {
$ipAgents[$ip][$ua] = ($ipAgents[$ip][$ua] ?? 0) + 1;
}
}
}
private function parseFile(array $file, int $cutoff, array &$pages, array &$books, array &$ips, array &$dayPages, array &$ipDays, array &$ipPaths, array &$ipPathTs): void
private function parseFile(array $file, int $cutoff, array &$pages, array &$books, array &$ips, array &$dayPages, array &$ipDays, array &$ipPaths, array &$ipPathTs, array &$ipAgents): void
{
if ($file['type'] === 'tgz') {
try {
@@ -209,7 +222,7 @@ class AccessLogParser
continue;
}
foreach (explode("\n", $content) as $line) {
$this->parseLine($line, $cutoff, $pages, $books, $ips, $dayPages, $ipDays, $ipPaths, $ipPathTs);
$this->parseLine($line, $cutoff, $pages, $books, $ips, $dayPages, $ipDays, $ipPaths, $ipPathTs, $ipAgents);
}
}
} catch (\Exception $e) {
@@ -222,7 +235,7 @@ class AccessLogParser
while (!gzeof($h)) {
$line = gzgets($h, 8192);
if ($line !== false) {
$this->parseLine($line, $cutoff, $pages, $books, $ips, $dayPages, $ipDays, $ipPaths, $ipPathTs);
$this->parseLine($line, $cutoff, $pages, $books, $ips, $dayPages, $ipDays, $ipPaths, $ipPathTs, $ipAgents);
}
}
gzclose($h);
@@ -232,7 +245,7 @@ class AccessLogParser
return;
}
while (($line = fgets($h)) !== false) {
$this->parseLine($line, $cutoff, $pages, $books, $ips, $dayPages, $ipDays, $ipPaths, $ipPathTs);
$this->parseLine($line, $cutoff, $pages, $books, $ips, $dayPages, $ipDays, $ipPaths, $ipPathTs, $ipAgents);
}
fclose($h);
}