*/ private array $botPatterns; /** @var array> */ private array $artIp7 = []; /** @var array> */ private array $artIp14 = []; /** @var array> */ private array $artIp30 = []; private static ?array $memo = null; // Apache COMBINED : IP - - [timestamp] "METHOD /path HTTP/x" STATUS bytes "ref" "ua" private const RE = '/^(\S+) \S+ \S+ \[(\d{2}\/\w+\/\d{4}:\d{2}:\d{2}:\d{2} [+-]\d{4})\] "[A-Z-]+ ([^\s"?]+)[^"]*" (\d{3}) \S+ "[^"]*" "([^"]*)"/u'; /** * @param list $botPatterns */ public function __construct( string $logDir = '/var/log/apache2', string $pattern = '*-access.log', string $cacheFile = '', int $cacheTtl = 600, int $days = 30, array $botPatterns = [] ) { $this->logDir = rtrim($logDir, '/'); $this->pattern = $pattern; $this->cacheFile = $cacheFile !== '' ? $cacheFile : dirname(__DIR__) . '/_cache/access_stats.json'; $this->cacheTtl = $cacheTtl; $this->days = $days; $this->botPatterns = array_map('strtolower', $botPatterns); } /** * @return array{ * pages:array, * books:array, * ips:array, * pages_by_day:array>, * ips_by_day:array>, * ip_top_paths:array>, * ip_agents:array>, * all_uas:array, * unique_visitors:array, * article_unique_visitors:array> * } */ public function stats(): array { if (self::$memo !== null) { return self::$memo; } if ($this->cacheValid()) { $d = json_decode((string) file_get_contents($this->cacheFile), true); if (is_array($d)) { return self::$memo = $d; } } $cutoff = strtotime("-{$this->days} days midnight") ?: (time() - $this->days * 86400); $pages = []; $books = []; $ips = []; // requêtes publiques non-bot (tous chemins, tous statuts) $dayPages = []; $ipPaths = []; // chemins /post/ et /book/ avec statut 200 (pour les ts) $ipPathTs = []; $ipAllPaths = []; // tous chemins, tous statuts, non-bots $ipAllDays = []; // tous jours, tous statuts, non-bots $ipAgents = []; // user-agents non-bot par IP $allUas = []; // tous UAs publics (bots inclus) pour "Agents détectés" foreach ($this->logFiles() as $file) { $this->parseFile($file, $cutoff, $pages, $books, $ips, $dayPages, $ipPaths, $ipPathTs, $ipAllPaths, $ipAllDays, $ipAgents, $allUas); } arsort($pages); arsort($books); arsort($ips); arsort($allUas); $pagesByDay = []; foreach ($dayPages as $path => $byOffset) { $arr = array_fill(0, $this->days, 0); foreach ($byOffset as $offset => $count) { if ($offset >= 0 && $offset < $this->days) { $arr[$offset] = $count; } } $pagesByDay[$path] = $arr; } // Top 200 IPs non-bot par volume total de requêtes $topIpKeys = array_keys(array_slice($ips, 0, 200, true)); $ipsByDay = []; $ipTopPaths = []; $ipTopAgents = []; foreach ($topIpKeys as $ip) { // Sparkline : activité totale par jour $arr = array_fill(0, $this->days, 0); foreach ($ipAllDays[$ip] ?? [] as $offset => $count) { if ($offset >= 0 && $offset < $this->days) { $arr[$offset] = $count; } } $ipsByDay[$ip] = $arr; // Top 20 chemins tous types confondus $allPaths = $ipAllPaths[$ip] ?? []; arsort($allPaths); $ipTopPaths[$ip] = []; foreach (array_slice($allPaths, 0, 20, true) as $p => $cnt) { $ipTopPaths[$ip][$p] = ['n' => $cnt, 'ts' => $ipPathTs[$ip][$p] ?? 0]; } // Top 5 user-agents $agents = $ipAgents[$ip] ?? []; arsort($agents); $ipTopAgents[$ip] = array_keys(array_slice($agents, 0, 5, true)); } // Visiteurs uniques par période — calculé sur TOUS les IPs non-bot (pas seulement le top 200) $uniqueVisitors = [7 => 0, 14 => 0, 30 => 0]; $start7 = $this->days - 7; $start14 = $this->days - 14; foreach ($ipAllDays as $ipDay) { $active7 = $active14 = $active30 = false; foreach ($ipDay as $offset => $cnt) { if ($cnt <= 0) { continue; } $active30 = true; if ($offset >= $start14) { $active14 = true; } if ($offset >= $start7) { $active7 = true; } } if ($active7) { ++$uniqueVisitors[7]; } if ($active14) { ++$uniqueVisitors[14]; } if ($active30) { ++$uniqueVisitors[30]; } } // Visiteurs uniques par article (IPs publiques non-bot, /post/ statut 200) $articleUv = []; foreach ($this->artIp30 as $path => $_artIpSet) { $articleUv[$path] = [ '7' => count($this->artIp7[$path] ?? []), '14' => count($this->artIp14[$path] ?? []), '30' => count($_artIpSet), ]; } $result = [ 'pages' => $pages, 'books' => $books, 'ips' => $ips, 'pages_by_day' => $pagesByDay, 'ips_by_day' => $ipsByDay, 'ip_top_paths' => $ipTopPaths, 'ip_agents' => $ipTopAgents, 'all_uas' => array_slice($allUas, 0, 300, true), 'unique_visitors' => $uniqueVisitors, 'article_unique_visitors' => $articleUv, ]; @mkdir(dirname($this->cacheFile), 0755, true); @file_put_contents($this->cacheFile, json_encode($result, JSON_UNESCAPED_UNICODE | JSON_UNESCAPED_SLASHES)); return self::$memo = $result; } public function isReadable(): bool { return count($this->logFiles()) > 0; } private function cacheValid(): bool { return file_exists($this->cacheFile) && (time() - filemtime($this->cacheFile)) < $this->cacheTtl; } private function matchesBot(string $ua): bool { if ($ua === '' || $this->botPatterns === []) { return false; } $lo = strtolower($ua); foreach ($this->botPatterns as $p) { if ($p !== '' && str_contains($lo, $p)) { return true; } } return false; } /** @return list */ private function logFiles(): array { $files = []; $cutoff = time() - ($this->days + 1) * 86400; foreach (glob($this->logDir . '/' . $this->pattern) ?: [] as $base) { if (str_ends_with($base, '.gz') || preg_match('/\.\d+$/', $base)) { continue; } foreach (array_merge([$base], glob($base . '.*') ?: []) as $path) { if ($path !== $base && filemtime($path) < $cutoff) { continue; } if (!is_readable($path)) { continue; } if (str_ends_with($path, '.tar.gz')) { $files[] = ['path' => $path, 'type' => 'tgz']; } elseif (str_ends_with($path, '.gz')) { $files[] = ['path' => $path, 'type' => 'gz']; } else { $files[] = ['path' => $path, 'type' => 'plain']; } } } return $files; } private static function parseTimestamp(string $raw): int { // "15/May/2026:00:41:01 +0200" if (!preg_match('/(\d{2})\/(\w{3})\/(\d{4}):(\d{2}:\d{2}:\d{2}) ([+-]\d{4})/', $raw, $m)) { return 0; } return (int) strtotime("{$m[1]} {$m[2]} {$m[3]} {$m[4]} {$m[5]}"); } private function parseLine( string $line, int $cutoff, array &$pages, array &$books, array &$ips, array &$dayPages, array &$ipPaths, array &$ipPathTs, array &$ipAllPaths, array &$ipAllDays, array &$ipAgents, array &$allUas ): void { if (!preg_match(self::RE, $line, $m)) { return; } [, $ip, $ts, $path, $status, $ua] = $m; $tsVal = self::parseTimestamp($ts); if ($tsVal < $cutoff) { return; } $publicIp = filter_var($ip, FILTER_VALIDATE_IP, FILTER_FLAG_NO_PRIV_RANGE | FILTER_FLAG_NO_RES_RANGE) !== false; $dayOffset = (int) floor(($tsVal - $cutoff) / 86400); $isBot = $this->matchesBot($ua); // Tous les UAs publics pour la section "Agents détectés" (bots inclus) if ($publicIp && $ua !== '') { $allUas[$ua] = ($allUas[$ua] ?? 0) + 1; } // Requêtes publiques non-bot : comptage visiteurs, chemins, jours, agents if ($publicIp && !$isBot) { $ips[$ip] = ($ips[$ip] ?? 0) + 1; $ipAllPaths[$ip][$path] = ($ipAllPaths[$ip][$path] ?? 0) + 1; $ipAllDays[$ip][$dayOffset] = ($ipAllDays[$ip][$dayOffset] ?? 0) + 1; if ($ua !== '') { $ipAgents[$ip][$ua] = ($ipAgents[$ip][$ua] ?? 0) + 1; } } // Comptage spécifique aux pages de contenu (statut 200, non-bot) if ($status !== '200' || $isBot) { return; } if (str_starts_with($path, '/post/') && strlen($path) > 6) { $pages[$path] = ($pages[$path] ?? 0) + 1; $dayPages[$path][$dayOffset] = ($dayPages[$path][$dayOffset] ?? 0) + 1; if ($publicIp) { $ipPaths[$ip][$path] = ($ipPaths[$ip][$path] ?? 0) + 1; if ($tsVal > ($ipPathTs[$ip][$path] ?? 0)) { $ipPathTs[$ip][$path] = $tsVal; } // Visiteurs uniques par article (IPs publiques non-bot uniquement) $this->artIp30[$path][$ip] = true; if ($dayOffset >= $this->days - 14) { $this->artIp14[$path][$ip] = true; } if ($dayOffset >= $this->days - 7) { $this->artIp7[$path][$ip] = true; } } } elseif (str_ends_with($path, '/') === false && str_starts_with($path, '/book/') && strlen($path) > 6) { $books[$path] = ($books[$path] ?? 0) + 1; if ($publicIp) { $ipPaths[$ip][$path] = ($ipPaths[$ip][$path] ?? 0) + 1; if ($tsVal > ($ipPathTs[$ip][$path] ?? 0)) { $ipPathTs[$ip][$path] = $tsVal; } } } } private function parseFile( array $file, int $cutoff, array &$pages, array &$books, array &$ips, array &$dayPages, array &$ipPaths, array &$ipPathTs, array &$ipAllPaths, array &$ipAllDays, array &$ipAgents, array &$allUas ): void { if ($file['type'] === 'tgz') { try { $phar = new PharData($file['path']); foreach ($phar as $entry) { $content = @file_get_contents('phar://' . $file['path'] . '/' . $entry->getFilename()); if ($content === false) { continue; } foreach (explode("\n", $content) as $line) { $this->parseLine($line, $cutoff, $pages, $books, $ips, $dayPages, $ipPaths, $ipPathTs, $ipAllPaths, $ipAllDays, $ipAgents, $allUas); } } } catch (\Exception $e) { } } elseif ($file['type'] === 'gz') { $h = @gzopen($file['path'], 'rb'); if (!$h) { return; } while (!gzeof($h)) { $line = gzgets($h, 8192); if ($line !== false) { $this->parseLine($line, $cutoff, $pages, $books, $ips, $dayPages, $ipPaths, $ipPathTs, $ipAllPaths, $ipAllDays, $ipAgents, $allUas); } } gzclose($h); } else { $h = @fopen($file['path'], 'rb'); if (!$h) { return; } while (($line = fgets($h)) !== false) { $this->parseLine($line, $cutoff, $pages, $books, $ips, $dayPages, $ipPaths, $ipPathTs, $ipAllPaths, $ipAllDays, $ipAgents, $allUas); } fclose($h); } } }