40656631ba
- Admin stats : clic sur un réseau AS affiche les IPs avec mini sparkline 14 jours + articles/livres consultés - AccessLogParser : calcul ip_data (daily + top paths) inclus dans le cache stats - Suppression du tableau statique "Répartition par réseau" (fusionné dans accordéon pays) - PHP-CS-Fixer appliqué sur l'ensemble des fichiers modifiés Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>
195 lines
6.6 KiB
PHP
195 lines
6.6 KiB
PHP
<?php
|
|
|
|
declare(strict_types=1);
|
|
|
|
/**
|
|
* Lit les logs Apache et retourne les chemins /post/* les plus consultés
|
|
* sur une fenêtre temporelle donnée, en comptant les visiteurs uniques (IPs distinctes)
|
|
* avec code HTTP 200 uniquement.
|
|
*/
|
|
class TrendingParser
|
|
{
|
|
// Apache COMBINED : IP - - [timestamp] "METHOD /path HTTP/x" STATUS bytes "ref" "ua"
|
|
private const RE = '/^(\S+) \S+ \S+ \[(\d{2}\/\w+\/\d{4}:\d{2}:\d{2}:\d{2} [+-]\d{4})\] "[A-Z-]+ ([^\s"?]+)[^"]*" (\d{3}) /';
|
|
|
|
public function __construct(
|
|
private string $logDir,
|
|
private string $pattern,
|
|
) {
|
|
}
|
|
|
|
/**
|
|
* Retourne les $limit chemins les plus consultés depuis $cutoff,
|
|
* triés par nombre décroissant de visiteurs uniques.
|
|
*
|
|
* @param list<string> $prefixes ex. ['/post/'], ['/post/', '/book/']
|
|
* @return array<string, int> chemin => nb visiteurs uniques
|
|
*/
|
|
public function top(int $cutoff, int $limit = 50, array $prefixes = ['/post/']): array
|
|
{
|
|
$visitors = []; // [path][ip] = true
|
|
|
|
foreach ($this->logFiles($cutoff) as $file) {
|
|
$this->parseFile($file, $cutoff, $visitors, $prefixes);
|
|
}
|
|
|
|
$counts = [];
|
|
foreach ($visitors as $path => $ips) {
|
|
$counts[$path] = count($ips);
|
|
}
|
|
arsort($counts);
|
|
|
|
return array_slice($counts, 0, $limit, true);
|
|
}
|
|
|
|
/**
|
|
* Parse une seule fois les logs et retourne les tops séparés par préfixe.
|
|
* Plus efficace que plusieurs appels à top() sur la même période.
|
|
*
|
|
* @param array<string, int> $limits préfixe => limite
|
|
* @return array<string, array<string, int>> préfixe => (chemin => visiteurs)
|
|
*/
|
|
public function topGrouped(int $cutoff, array $limits): array
|
|
{
|
|
$prefixes = array_keys($limits);
|
|
$visitors = []; // [path][ip] = true
|
|
|
|
foreach ($this->logFiles($cutoff) as $file) {
|
|
$this->parseFile($file, $cutoff, $visitors, $prefixes);
|
|
}
|
|
|
|
$result = array_fill_keys($prefixes, []);
|
|
foreach ($visitors as $path => $ips) {
|
|
foreach ($prefixes as $prefix) {
|
|
if (str_starts_with($path, $prefix)) {
|
|
$result[$prefix][$path] = count($ips);
|
|
break;
|
|
}
|
|
}
|
|
}
|
|
|
|
foreach ($prefixes as $prefix) {
|
|
arsort($result[$prefix]);
|
|
$result[$prefix] = array_slice($result[$prefix], 0, $limits[$prefix], true);
|
|
}
|
|
|
|
return $result;
|
|
}
|
|
|
|
public function isReadable(): bool
|
|
{
|
|
return count($this->logFiles(time() - 86400)) > 0;
|
|
}
|
|
|
|
// ── Fichiers de log ───────────────────────────────────────────────────────
|
|
|
|
/** @return list<array{path:string,type:string}> */
|
|
private function logFiles(int $cutoff): array
|
|
{
|
|
$files = [];
|
|
$oldest = $cutoff - 86400; // une journée de marge pour les rotations
|
|
|
|
foreach (glob($this->logDir . '/' . $this->pattern) ?: [] as $base) {
|
|
if (str_ends_with($base, '.gz') || preg_match('/\.\d+$/', $base)) {
|
|
continue;
|
|
}
|
|
foreach (array_merge([$base], glob($base . '.*') ?: []) as $path) {
|
|
if ($path !== $base && filemtime($path) < $oldest) {
|
|
continue;
|
|
}
|
|
if (!is_readable($path)) {
|
|
continue;
|
|
}
|
|
if (str_ends_with($path, '.tar.gz')) {
|
|
$files[] = ['path' => $path, 'type' => 'tgz'];
|
|
} elseif (str_ends_with($path, '.gz')) {
|
|
$files[] = ['path' => $path, 'type' => 'gz'];
|
|
} else {
|
|
$files[] = ['path' => $path, 'type' => 'plain'];
|
|
}
|
|
}
|
|
}
|
|
|
|
return $files;
|
|
}
|
|
|
|
// ── Parsing ───────────────────────────────────────────────────────────────
|
|
|
|
private static function parseTimestamp(string $raw): int
|
|
{
|
|
if (!preg_match('/(\d{2})\/(\w{3})\/(\d{4}):(\d{2}:\d{2}:\d{2}) ([+-]\d{4})/', $raw, $m)) {
|
|
return 0;
|
|
}
|
|
return (int) strtotime("{$m[1]} {$m[2]} {$m[3]} {$m[4]} {$m[5]}");
|
|
}
|
|
|
|
/**
|
|
* @param array<string, array<string, true>> $visitors
|
|
* @param list<string> $prefixes
|
|
*/
|
|
private function parseLine(string $line, int $cutoff, array &$visitors, array $prefixes): void
|
|
{
|
|
if (!preg_match(self::RE, $line, $m)) {
|
|
return;
|
|
}
|
|
[, $ip, $ts, $path, $status] = $m;
|
|
|
|
if ($status !== '200') {
|
|
return;
|
|
}
|
|
if (self::parseTimestamp($ts) < $cutoff) {
|
|
return;
|
|
}
|
|
foreach ($prefixes as $prefix) {
|
|
if (str_starts_with($path, $prefix) && strlen($path) > strlen($prefix)) {
|
|
$visitors[$path][$ip] = true;
|
|
break;
|
|
}
|
|
}
|
|
}
|
|
|
|
/**
|
|
* @param array<string, array<string, true>> $visitors
|
|
* @param list<string> $prefixes
|
|
*/
|
|
private function parseFile(array $file, int $cutoff, array &$visitors, array $prefixes): void
|
|
{
|
|
if ($file['type'] === 'tgz') {
|
|
try {
|
|
$phar = new PharData($file['path']);
|
|
foreach ($phar as $entry) {
|
|
$content = @file_get_contents('phar://' . $file['path'] . '/' . $entry->getFilename());
|
|
if ($content === false) {
|
|
continue;
|
|
}
|
|
foreach (explode("\n", $content) as $line) {
|
|
$this->parseLine($line, $cutoff, $visitors, $prefixes);
|
|
}
|
|
}
|
|
} catch (\Exception) {
|
|
}
|
|
} elseif ($file['type'] === 'gz') {
|
|
$h = @gzopen($file['path'], 'rb');
|
|
if (!$h) {
|
|
return;
|
|
}
|
|
while (!gzeof($h)) {
|
|
$line = gzgets($h, 8192);
|
|
if ($line !== false) {
|
|
$this->parseLine($line, $cutoff, $visitors, $prefixes);
|
|
}
|
|
}
|
|
gzclose($h);
|
|
} else {
|
|
$h = @fopen($file['path'], 'rb');
|
|
if (!$h) {
|
|
return;
|
|
}
|
|
while (($line = fgets($h)) !== false) {
|
|
$this->parseLine($line, $cutoff, $visitors, $prefixes);
|
|
}
|
|
fclose($h);
|
|
}
|
|
}
|
|
}
|