From 8cab6362a3aea4636a2b5b2c585e1be5cbe4ad7a Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?C=C3=A9drix?= Date: Fri, 15 May 2026 00:48:34 +0200 Subject: [PATCH] =?UTF-8?q?feat=20:=20onglet=20Statistiques=20=E2=80=94=20?= =?UTF-8?q?pages,=20livres,=20r=C3=A9partition=20AS=20avec=20groupes=20con?= =?UTF-8?q?figurables?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit - AccessLogParser : parse COMBINED, agrège hits /post/ et /book/, cache 10 min - AsnLookup : batch lookup ip-api.com, cache 30j, agrégation et groupes AS - Onglet Statistiques dans l'admin : top pages, top livres, répartition réseau - Filtrage par groupe AS (badges) + formulaire de configuration des groupes Co-Authored-By: Claude Sonnet 4.6 --- public/index.php | 47 +++++++- src/AccessLogParser.php | 178 ++++++++++++++++++++++++++++ src/AsnLookup.php | 190 ++++++++++++++++++++++++++++++ src/SiteSettings.php | 11 ++ templates/admin_stats.php | 241 ++++++++++++++++++++++++++++++++++++++ 5 files changed, 666 insertions(+), 1 deletion(-) create mode 100644 src/AccessLogParser.php create mode 100644 src/AsnLookup.php create mode 100644 templates/admin_stats.php diff --git a/public/index.php b/public/index.php index 092b3b9..e74687b 100644 --- a/public/index.php +++ b/public/index.php @@ -43,7 +43,7 @@ $action = $_GET['action'] ?? 'list'; $uuid = $_GET['uuid'] ?? ''; $slug = $_GET['slug'] ?? ''; -$_noindexActions = ['create', 'edit', 'admin', 'categories', 'diff', 'add_files', 'import_image', 'import_image_step2', 'sources', 'profile', 'delete_file', 'delete_external_link', 'rename_category', 'delete_category', 'toggle_private_category', 'admin_save_site', 'not_found', 'add_feed', 'delete_feed', 'add_link', 'delete_link', 'reorder_links', 'react', 'comment', 'verify_comment', 'comment_moderate', 'comment_delete', 'comment_resend', 'create_tag_type', 'delete_tag_type', 'edit_tags', 'book_save', 'book_delete']; +$_noindexActions = ['create', 'edit', 'admin', 'categories', 'diff', 'add_files', 'import_image', 'import_image_step2', 'sources', 'profile', 'delete_file', 'delete_external_link', 'rename_category', 'delete_category', 'toggle_private_category', 'admin_save_site', 'not_found', 'add_feed', 'delete_feed', 'add_link', 'delete_link', 'reorder_links', 'react', 'comment', 'verify_comment', 'comment_moderate', 'comment_delete', 'comment_resend', 'create_tag_type', 'delete_tag_type', 'edit_tags', 'book_save', 'book_delete', 'admin_save_as_groups']; $metaRobots = in_array($action, $_noindexActions, true) ? 'noindex, nofollow' : null; unset($_noindexActions); @@ -2542,6 +2542,27 @@ switch ($action) { $adminData['search_log_readable'] = $parser->isReadable(); } + if ($tab === 'stats') { + if (!isAdmin()) { + http_response_code(403); + exit; + } + require_once BASE_PATH . '/src/AccessLogParser.php'; + require_once BASE_PATH . '/src/AsnLookup.php'; + $accessParser = new AccessLogParser('/var/log/apache2', apacheAccessLog()); + $accessStats = $accessParser->stats(); + $adminData['stats_readable'] = $accessParser->isReadable(); + $adminData['stats_pages'] = array_slice($accessStats['pages'], 0, 30, true); + $adminData['stats_books'] = array_slice($accessStats['books'], 0, 20, true); + // Lookup AS pour les top 200 IPs + $topIps = array_slice($accessStats['ips'], 0, 200, true); + $asnMap = (new AsnLookup())->batchLookup(array_keys($topIps)); + $asList = AsnLookup::aggregateByAs($topIps, $asnMap); + $adminData['stats_as'] = $asList; + $adminData['stats_as_groups'] = AsnLookup::applyGroups($asList, asGroups()); + $adminData['as_groups'] = asGroups(); + } + if ($tab === 'categories') { $adminData['cats'] = $articles->getCategories(); $adminData['privateCats'] = $articles->getPrivateCategories(); @@ -2820,6 +2841,30 @@ switch ($action) { header('Location: /admin/searches?' . ($ok ? 'saved=1' : 'error=write')); exit; + case 'admin_save_as_groups': + requireAuth(); + if (!isAdmin() || $_SERVER['REQUEST_METHOD'] !== 'POST') { + http_response_code(403); + exit; + } + $rawLabels = $_POST['as_group_label'] ?? []; + $rawPatterns = $_POST['as_group_patterns'] ?? []; + $groups = []; + foreach ((array) $rawLabels as $i => $label) { + $label = trim((string) $label); + if ($label === '') { + continue; + } + $patterns = array_values(array_filter(array_map( + 'trim', + explode("\n", (string) ($rawPatterns[$i] ?? '')) + ))); + $groups[] = ['label' => $label, 'patterns' => $patterns]; + } + $ok = saveSiteSettings(['as_groups' => $groups]); + header('Location: /admin/stats?' . ($ok ? 'saved=1' : 'error=write')); + exit; + case 'admin_create_role': requireAuth(); if (!isAdmin() || $_SERVER['REQUEST_METHOD'] !== 'POST') { diff --git a/src/AccessLogParser.php b/src/AccessLogParser.php new file mode 100644 index 0000000..8ff514e --- /dev/null +++ b/src/AccessLogParser.php @@ -0,0 +1,178 @@ +logDir = rtrim($logDir, '/'); + $this->pattern = $pattern; + $this->cacheFile = $cacheFile !== '' ? $cacheFile : dirname(__DIR__) . '/_cache/access_stats.json'; + $this->cacheTtl = $cacheTtl; + $this->days = $days; + } + + /** + * @return array{pages:array,books:array,ips:array} + */ + public function stats(): array + { + if (self::$memo !== null) { + return self::$memo; + } + if ($this->cacheValid()) { + $d = json_decode((string) file_get_contents($this->cacheFile), true); + if (is_array($d)) { + return self::$memo = $d; + } + } + + $cutoff = strtotime("-{$this->days} days midnight") ?: (time() - $this->days * 86400); + $pages = []; + $books = []; + $ips = []; + + foreach ($this->logFiles() as $file) { + $this->parseFile($file, $cutoff, $pages, $books, $ips); + } + + arsort($pages); + arsort($books); + arsort($ips); + + $result = compact('pages', 'books', 'ips'); + @mkdir(dirname($this->cacheFile), 0755, true); + @file_put_contents($this->cacheFile, json_encode($result, JSON_UNESCAPED_UNICODE | JSON_UNESCAPED_SLASHES)); + return self::$memo = $result; + } + + public function isReadable(): bool + { + return count($this->logFiles()) > 0; + } + + private function cacheValid(): bool + { + return file_exists($this->cacheFile) + && (time() - filemtime($this->cacheFile)) < $this->cacheTtl; + } + + /** @return list */ + private function logFiles(): array + { + $files = []; + $cutoff = time() - ($this->days + 1) * 86400; + + foreach (glob($this->logDir . '/' . $this->pattern) ?: [] as $base) { + if (str_ends_with($base, '.gz') || preg_match('/\.\d+$/', $base)) { + continue; + } + foreach (array_merge([$base], glob($base . '.*') ?: []) as $path) { + if ($path !== $base && filemtime($path) < $cutoff) { + continue; + } + if (!is_readable($path)) { + continue; + } + if (str_ends_with($path, '.tar.gz')) { + $files[] = ['path' => $path, 'type' => 'tgz']; + } elseif (str_ends_with($path, '.gz')) { + $files[] = ['path' => $path, 'type' => 'gz']; + } else { + $files[] = ['path' => $path, 'type' => 'plain']; + } + } + } + + return $files; + } + + private static function parseTimestamp(string $raw): int + { + // "15/May/2026:00:41:01 +0200" + if (!preg_match('/(\d{2})\/(\w{3})\/(\d{4}):(\d{2}:\d{2}:\d{2}) ([+-]\d{4})/', $raw, $m)) { + return 0; + } + return (int) strtotime("{$m[1]} {$m[2]} {$m[3]} {$m[4]} {$m[5]}"); + } + + private function parseLine(string $line, int $cutoff, array &$pages, array &$books, array &$ips): void + { + if (!preg_match(self::RE, $line, $m)) { + return; + } + [, $ip, $ts, $path, $status] = $m; + + if ($status !== '200') { + return; + } + if (self::parseTimestamp($ts) < $cutoff) { + return; + } + + if (str_starts_with($path, '/post/') && strlen($path) > 6) { + $pages[$path] = ($pages[$path] ?? 0) + 1; + $ips[$ip] = ($ips[$ip] ?? 0) + 1; + } elseif (str_starts_with($path, '/book/') && strlen($path) > 6) { + $books[$path] = ($books[$path] ?? 0) + 1; + $ips[$ip] = ($ips[$ip] ?? 0) + 1; + } + } + + private function parseFile(array $file, int $cutoff, array &$pages, array &$books, array &$ips): void + { + if ($file['type'] === 'tgz') { + try { + $phar = new PharData($file['path']); + foreach ($phar as $entry) { + $content = @file_get_contents('phar://' . $file['path'] . '/' . $entry->getFilename()); + if ($content === false) { + continue; + } + foreach (explode("\n", $content) as $line) { + $this->parseLine($line, $cutoff, $pages, $books, $ips); + } + } + } catch (\Exception $e) { + } + } elseif ($file['type'] === 'gz') { + $h = @gzopen($file['path'], 'rb'); + if (!$h) { + return; + } + while (!gzeof($h)) { + $line = gzgets($h, 8192); + if ($line !== false) { + $this->parseLine($line, $cutoff, $pages, $books, $ips); + } + } + gzclose($h); + } else { + $h = @fopen($file['path'], 'rb'); + if (!$h) { + return; + } + while (($line = fgets($h)) !== false) { + $this->parseLine($line, $cutoff, $pages, $books, $ips); + } + fclose($h); + } + } +} diff --git a/src/AsnLookup.php b/src/AsnLookup.php new file mode 100644 index 0000000..c59f545 --- /dev/null +++ b/src/AsnLookup.php @@ -0,0 +1,190 @@ +cacheDir = $cacheDir !== '' ? $cacheDir : dirname(__DIR__) . '/_cache/asn'; + $this->ttl = $ttl; + } + + /** + * Lookup AS info pour une liste d'IPs. + * IPs privées : retournées avec name='LAN', pas d'appel API. + * + * @param list $ips + * @return array + */ + public function batchLookup(array $ips): array + { + $results = []; + $missing = []; + + foreach (array_unique($ips) as $ip) { + if ($this->isPrivate($ip)) { + $results[$ip] = ['asn' => '', 'name' => 'LAN', 'country' => '']; + continue; + } + $cached = $this->fromCache($ip); + if ($cached !== null) { + $results[$ip] = $cached; + } else { + $missing[] = $ip; + } + } + + foreach (array_chunk($missing, 100) as $chunk) { + foreach ($this->fetchBatch($chunk) as $ip => $info) { + $this->toCache($ip, $info); + $results[$ip] = $info; + } + } + + return $results; + } + + public function isPrivate(string $ip): bool + { + return filter_var($ip, FILTER_VALIDATE_IP, FILTER_FLAG_NO_PRIV_RANGE | FILTER_FLAG_NO_RES_RANGE) === false; + } + + /** + * Agrège les hits par AS depuis un tableau [ip => hits] et les infos AS. + * Retourne [asKey => [asn, name, country, hits]] trié par hits desc. + * + * @param array $ipHits + * @param array $asnMap + * @return list + */ + public static function aggregateByAs(array $ipHits, array $asnMap): array + { + $byAs = []; + foreach ($ipHits as $ip => $hits) { + $info = $asnMap[$ip] ?? ['asn' => '?', 'name' => '?', 'country' => '']; + $key = $info['asn'] !== '' ? $info['asn'] : $info['name']; + if (!isset($byAs[$key])) { + $byAs[$key] = ['asn' => $info['asn'], 'name' => $info['name'], 'country' => $info['country'], 'hits' => 0]; + } + $byAs[$key]['hits'] += $hits; + } + usort($byAs, static fn ($a, $b) => $b['hits'] <=> $a['hits']); + return array_values($byAs); + } + + /** + * Applique les groupes définis par l'admin. + * Chaque groupe : ['label' => string, 'patterns' => [string, ...]] + * Un AS est affecté au premier groupe dont un pattern est contenu dans son nom (case-insensitive). + * + * @param list $asList + * @param list}> $groups + * @return array> + * clés : labels des groupes + 'Autres' + */ + public static function applyGroups(array $asList, array $groups): array + { + $result = []; + foreach ($groups as $g) { + $result[$g['label']] = []; + } + $result['Autres'] = []; + + foreach ($asList as $as) { + $matched = false; + foreach ($groups as $g) { + foreach ($g['patterns'] as $pattern) { + if ($pattern !== '' && mb_stripos($as['name'], $pattern) !== false) { + $result[$g['label']][] = $as; + $matched = true; + break 2; + } + } + } + if (!$matched) { + $result['Autres'][] = $as; + } + } + + return $result; + } + + // ─── Cache ──────────────────────────────────────────────────────────────── + + private function cacheFile(string $ip): string + { + return $this->cacheDir . '/' . md5($ip) . '.json'; + } + + /** @return array{asn:string,name:string,country:string}|null */ + private function fromCache(string $ip): ?array + { + $f = $this->cacheFile($ip); + if (!file_exists($f) || (time() - filemtime($f)) > $this->ttl) { + return null; + } + $d = json_decode((string) file_get_contents($f), true); + return is_array($d) ? $d : null; + } + + /** @param array{asn:string,name:string,country:string} $data */ + private function toCache(string $ip, array $data): void + { + @mkdir($this->cacheDir, 0755, true); + @file_put_contents($this->cacheFile($ip), json_encode($data)); + } + + // ─── API ip-api.com ─────────────────────────────────────────────────────── + + /** + * @param list $ips + * @return array + */ + private function fetchBatch(array $ips): array + { + $body = json_encode($ips); + $context = stream_context_create(['http' => [ + 'method' => 'POST', + 'header' => "Content-Type: application/json\r\nContent-Length: " . strlen((string) $body) . "\r\n", + 'content' => $body, + 'timeout' => 10, + ]]); + + $resp = @file_get_contents( + 'http://ip-api.com/batch?fields=query,as,org,country,countryCode', + false, + $context + ); + + if ($resp === false) { + return []; + } + + $rows = json_decode($resp, true); + if (!is_array($rows)) { + return []; + } + + $results = []; + foreach ($rows as $row) { + $ip = $row['query'] ?? ''; + if ($ip === '') { + continue; + } + $asRaw = $row['as'] ?? ''; + $asn = ''; + if (preg_match('/^AS(\d+)/', $asRaw, $m)) { + $asn = $m[1]; + } + $name = $row['org'] !== '' ? ($row['org'] ?? '') : preg_replace('/^AS\d+\s*/', '', $asRaw); + $country = $row['countryCode'] ?? ''; + $results[$ip] = ['asn' => $asn, 'name' => (string) $name, 'country' => $country]; + } + + return $results; + } +} diff --git a/src/SiteSettings.php b/src/SiteSettings.php index 856d0ae..13da22a 100644 --- a/src/SiteSettings.php +++ b/src/SiteSettings.php @@ -68,6 +68,13 @@ function apacheAccessLog(): string return (string)($_ENV['APACHE_ACCESS_LOG'] ?? getenv('APACHE_ACCESS_LOG') ?: '*-access.log'); } +/** @return list}> */ +function asGroups(): array +{ + $raw = siteSettings()['as_groups'] ?? []; + return is_array($raw) ? $raw : []; +} + function saveSiteSettings(array $data): bool { $current = siteSettings(); @@ -86,6 +93,10 @@ function saveSiteSettings(array $data): bool $current['posts_per_page'] = $val; } } + if (array_key_exists('as_groups', $data) && is_array($data['as_groups'])) { + $current['as_groups'] = $data['as_groups']; + } + return file_put_contents( siteSettingsPath(), json_encode($current, JSON_PRETTY_PRINT | JSON_UNESCAPED_UNICODE | JSON_UNESCAPED_SLASHES) diff --git a/templates/admin_stats.php b/templates/admin_stats.php new file mode 100644 index 0000000..4bad47b --- /dev/null +++ b/templates/admin_stats.php @@ -0,0 +1,241 @@ + + + +
Configuration enregistrée.
+ +
Impossible d'enregistrer : fichier non accessible en écriture.
+ + + +
+ Les logs ne sont pas lisibles. Vérifiez le pattern dans l'onglet Recherches + et que www-data appartient au groupe adm. +
+ + +

14 derniers jours · cache 10 min

+ +
+ + +
+
+
+ Pages les plus visitées + URLs +
+
+ +

Aucune donnée.

+ +
+ + + $hits): + $rankP++; + $slug = rawurldecode(substr($url, 6)); + $pct = round($hits / $maxP * 100); + ?> + + + + + + + +
+ + + +
+
+
+
+
+ +
+
+
+ + +
+
+
+ Livres consultés + livres +
+
+ +

Aucun accès à /book/ dans les logs.

+ +
+ + + $hits): + $rankB++; + $slug = rawurldecode(substr($url, 6)); + $pct = round($hits / $maxB * 100); + ?> + + + + + + + +
+ + + +
+
+
+
+
+ +
+
+
+ +
+ + +
+
+ Répartition par réseau + +
+ Tous + + + + + + Autres +
+ +
+
+ + +

+ +

+ + +
+ + + + + + + + + + + $as): ?> + + + + + + + + +
#RéseauPaysVisites
+ + + AS + +
+
+
+
+
+ +
+
+ + + + +
+
Groupes de réseaux
+
+

Regroupez plusieurs réseaux sous un label. Chaque ligne est un motif cherché dans le nom du réseau (insensible à la casse).

+
+
+ $g): ?> +
+
+ + +
+ +
+ +
+
+ + +
+
+
+
+ + + +