v1.6.32 : UA en entier + bouton « + bot » + filtrage bots des stats
- Agents détectés : UA affiché sans troncature (drill-down et liste) - Bouton « + bot » pour ajouter un agent aux patterns via AJAX (CSRF) - Section Agents alimentée par all_uas (tous UAs publics, bots inclus) - AccessLogParser : bots exclus des compteurs pages/livres/visiteurs - Caches stats vidés après chaque modification des patterns Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>
This commit is contained in:
+69
-33
@@ -9,24 +9,31 @@ class AccessLogParser
|
||||
private string $cacheFile;
|
||||
private int $cacheTtl;
|
||||
private int $days;
|
||||
/** @var list<string> */
|
||||
private array $botPatterns;
|
||||
|
||||
private static ?array $memo = null;
|
||||
|
||||
// Apache COMBINED : IP - - [timestamp] "METHOD /path HTTP/x" STATUS bytes "ref" "ua"
|
||||
private const RE = '/^(\S+) \S+ \S+ \[(\d{2}\/\w+\/\d{4}:\d{2}:\d{2}:\d{2} [+-]\d{4})\] "[A-Z-]+ ([^\s"?]+)[^"]*" (\d{3}) \S+ "[^"]*" "([^"]*)"/u';
|
||||
|
||||
/**
|
||||
* @param list<string> $botPatterns
|
||||
*/
|
||||
public function __construct(
|
||||
string $logDir = '/var/log/apache2',
|
||||
string $pattern = '*-access.log',
|
||||
string $cacheFile = '',
|
||||
int $cacheTtl = 600,
|
||||
int $days = 14
|
||||
string $logDir = '/var/log/apache2',
|
||||
string $pattern = '*-access.log',
|
||||
string $cacheFile = '',
|
||||
int $cacheTtl = 600,
|
||||
int $days = 14,
|
||||
array $botPatterns = []
|
||||
) {
|
||||
$this->logDir = rtrim($logDir, '/');
|
||||
$this->pattern = $pattern;
|
||||
$this->cacheFile = $cacheFile !== '' ? $cacheFile : dirname(__DIR__) . '/_cache/access_stats.json';
|
||||
$this->cacheTtl = $cacheTtl;
|
||||
$this->days = $days;
|
||||
$this->logDir = rtrim($logDir, '/');
|
||||
$this->pattern = $pattern;
|
||||
$this->cacheFile = $cacheFile !== '' ? $cacheFile : dirname(__DIR__) . '/_cache/access_stats.json';
|
||||
$this->cacheTtl = $cacheTtl;
|
||||
$this->days = $days;
|
||||
$this->botPatterns = array_map('strtolower', $botPatterns);
|
||||
}
|
||||
|
||||
/**
|
||||
@@ -37,7 +44,8 @@ class AccessLogParser
|
||||
* pages_by_day:array<string,list<int>>,
|
||||
* ips_by_day:array<string,list<int>>,
|
||||
* ip_top_paths:array<string,array<string,array{n:int,ts:int}>>,
|
||||
* ip_agents:array<string,list<string>>
|
||||
* ip_agents:array<string,list<string>>,
|
||||
* all_uas:array<string,int>
|
||||
* }
|
||||
*/
|
||||
public function stats(): array
|
||||
@@ -52,24 +60,26 @@ class AccessLogParser
|
||||
}
|
||||
}
|
||||
|
||||
$cutoff = strtotime("-{$this->days} days midnight") ?: (time() - $this->days * 86400);
|
||||
$pages = [];
|
||||
$books = [];
|
||||
$ips = []; // toutes requêtes publiques (tous chemins, tous statuts)
|
||||
$dayPages = [];
|
||||
$ipPaths = []; // chemins /post/ et /book/ avec statut 200 (pour les ts)
|
||||
$ipPathTs = [];
|
||||
$ipAllPaths = []; // tous chemins, tous statuts
|
||||
$ipAllDays = []; // tous jours, tous statuts
|
||||
$ipAgents = []; // tous user-agents par IP
|
||||
$cutoff = strtotime("-{$this->days} days midnight") ?: (time() - $this->days * 86400);
|
||||
$pages = [];
|
||||
$books = [];
|
||||
$ips = []; // requêtes publiques non-bot (tous chemins, tous statuts)
|
||||
$dayPages = [];
|
||||
$ipPaths = []; // chemins /post/ et /book/ avec statut 200 (pour les ts)
|
||||
$ipPathTs = [];
|
||||
$ipAllPaths = []; // tous chemins, tous statuts, non-bots
|
||||
$ipAllDays = []; // tous jours, tous statuts, non-bots
|
||||
$ipAgents = []; // user-agents non-bot par IP
|
||||
$allUas = []; // tous UAs publics (bots inclus) pour "Agents détectés"
|
||||
|
||||
foreach ($this->logFiles() as $file) {
|
||||
$this->parseFile($file, $cutoff, $pages, $books, $ips, $dayPages, $ipPaths, $ipPathTs, $ipAllPaths, $ipAllDays, $ipAgents);
|
||||
$this->parseFile($file, $cutoff, $pages, $books, $ips, $dayPages, $ipPaths, $ipPathTs, $ipAllPaths, $ipAllDays, $ipAgents, $allUas);
|
||||
}
|
||||
|
||||
arsort($pages);
|
||||
arsort($books);
|
||||
arsort($ips);
|
||||
arsort($allUas);
|
||||
|
||||
$pagesByDay = [];
|
||||
foreach ($dayPages as $path => $byOffset) {
|
||||
@@ -82,7 +92,7 @@ class AccessLogParser
|
||||
$pagesByDay[$path] = $arr;
|
||||
}
|
||||
|
||||
// Top 200 IPs par volume total de requêtes
|
||||
// Top 200 IPs non-bot par volume total de requêtes
|
||||
$topIpKeys = array_keys(array_slice($ips, 0, 200, true));
|
||||
$ipsByDay = [];
|
||||
$ipTopPaths = [];
|
||||
@@ -119,9 +129,11 @@ class AccessLogParser
|
||||
'ips_by_day' => $ipsByDay,
|
||||
'ip_top_paths' => $ipTopPaths,
|
||||
'ip_agents' => $ipTopAgents,
|
||||
'all_uas' => array_slice($allUas, 0, 300, true),
|
||||
];
|
||||
@mkdir(dirname($this->cacheFile), 0755, true);
|
||||
@file_put_contents($this->cacheFile, json_encode($result, JSON_UNESCAPED_UNICODE | JSON_UNESCAPED_SLASHES));
|
||||
|
||||
return self::$memo = $result;
|
||||
}
|
||||
|
||||
@@ -136,6 +148,21 @@ class AccessLogParser
|
||||
&& (time() - filemtime($this->cacheFile)) < $this->cacheTtl;
|
||||
}
|
||||
|
||||
private function matchesBot(string $ua): bool
|
||||
{
|
||||
if ($ua === '' || $this->botPatterns === []) {
|
||||
return false;
|
||||
}
|
||||
$lo = strtolower($ua);
|
||||
foreach ($this->botPatterns as $p) {
|
||||
if ($p !== '' && str_contains($lo, $p)) {
|
||||
return true;
|
||||
}
|
||||
}
|
||||
|
||||
return false;
|
||||
}
|
||||
|
||||
/** @return list<array{path:string,type:string}> */
|
||||
private function logFiles(): array
|
||||
{
|
||||
@@ -172,6 +199,7 @@ class AccessLogParser
|
||||
if (!preg_match('/(\d{2})\/(\w{3})\/(\d{4}):(\d{2}:\d{2}:\d{2}) ([+-]\d{4})/', $raw, $m)) {
|
||||
return 0;
|
||||
}
|
||||
|
||||
return (int) strtotime("{$m[1]} {$m[2]} {$m[3]} {$m[4]} {$m[5]}");
|
||||
}
|
||||
|
||||
@@ -186,7 +214,8 @@ class AccessLogParser
|
||||
array &$ipPathTs,
|
||||
array &$ipAllPaths,
|
||||
array &$ipAllDays,
|
||||
array &$ipAgents
|
||||
array &$ipAgents,
|
||||
array &$allUas
|
||||
): void {
|
||||
if (!preg_match(self::RE, $line, $m)) {
|
||||
return;
|
||||
@@ -200,19 +229,25 @@ class AccessLogParser
|
||||
|
||||
$publicIp = filter_var($ip, FILTER_VALIDATE_IP, FILTER_FLAG_NO_PRIV_RANGE | FILTER_FLAG_NO_RES_RANGE) !== false;
|
||||
$dayOffset = (int) floor(($tsVal - $cutoff) / 86400);
|
||||
$isBot = $this->matchesBot($ua);
|
||||
|
||||
// Toutes les requêtes publiques : comptage global, chemins, jours, agents
|
||||
if ($publicIp) {
|
||||
// Tous les UAs publics pour la section "Agents détectés" (bots inclus)
|
||||
if ($publicIp && $ua !== '') {
|
||||
$allUas[$ua] = ($allUas[$ua] ?? 0) + 1;
|
||||
}
|
||||
|
||||
// Requêtes publiques non-bot : comptage visiteurs, chemins, jours, agents
|
||||
if ($publicIp && !$isBot) {
|
||||
$ips[$ip] = ($ips[$ip] ?? 0) + 1;
|
||||
$ipAllPaths[$ip][$path] = ($ipAllPaths[$ip][$path] ?? 0) + 1;
|
||||
$ipAllDays[$ip][$dayOffset] = ($ipAllDays[$ip][$dayOffset] ?? 0) + 1;
|
||||
if ($ua !== '') {
|
||||
$ipAgents[$ip][$ua] = ($ipAgents[$ip][$ua] ?? 0) + 1;
|
||||
$ipAgents[$ip][$ua] = ($ipAgents[$ip][$ua] ?? 0) + 1;
|
||||
}
|
||||
}
|
||||
|
||||
// Comptage spécifique aux pages de contenu (statut 200, /post/ ou /book/)
|
||||
if ($status !== '200') {
|
||||
// Comptage spécifique aux pages de contenu (statut 200, non-bot)
|
||||
if ($status !== '200' || $isBot) {
|
||||
return;
|
||||
}
|
||||
|
||||
@@ -247,7 +282,8 @@ class AccessLogParser
|
||||
array &$ipPathTs,
|
||||
array &$ipAllPaths,
|
||||
array &$ipAllDays,
|
||||
array &$ipAgents
|
||||
array &$ipAgents,
|
||||
array &$allUas
|
||||
): void {
|
||||
if ($file['type'] === 'tgz') {
|
||||
try {
|
||||
@@ -258,7 +294,7 @@ class AccessLogParser
|
||||
continue;
|
||||
}
|
||||
foreach (explode("\n", $content) as $line) {
|
||||
$this->parseLine($line, $cutoff, $pages, $books, $ips, $dayPages, $ipPaths, $ipPathTs, $ipAllPaths, $ipAllDays, $ipAgents);
|
||||
$this->parseLine($line, $cutoff, $pages, $books, $ips, $dayPages, $ipPaths, $ipPathTs, $ipAllPaths, $ipAllDays, $ipAgents, $allUas);
|
||||
}
|
||||
}
|
||||
} catch (\Exception $e) {
|
||||
@@ -271,7 +307,7 @@ class AccessLogParser
|
||||
while (!gzeof($h)) {
|
||||
$line = gzgets($h, 8192);
|
||||
if ($line !== false) {
|
||||
$this->parseLine($line, $cutoff, $pages, $books, $ips, $dayPages, $ipPaths, $ipPathTs, $ipAllPaths, $ipAllDays, $ipAgents);
|
||||
$this->parseLine($line, $cutoff, $pages, $books, $ips, $dayPages, $ipPaths, $ipPathTs, $ipAllPaths, $ipAllDays, $ipAgents, $allUas);
|
||||
}
|
||||
}
|
||||
gzclose($h);
|
||||
@@ -281,7 +317,7 @@ class AccessLogParser
|
||||
return;
|
||||
}
|
||||
while (($line = fgets($h)) !== false) {
|
||||
$this->parseLine($line, $cutoff, $pages, $books, $ips, $dayPages, $ipPaths, $ipPathTs, $ipAllPaths, $ipAllDays, $ipAgents);
|
||||
$this->parseLine($line, $cutoff, $pages, $books, $ips, $dayPages, $ipPaths, $ipPathTs, $ipAllPaths, $ipAllDays, $ipAgents, $allUas);
|
||||
}
|
||||
fclose($h);
|
||||
}
|
||||
|
||||
Reference in New Issue
Block a user