v1.6.32 : UA en entier + bouton « + bot » + filtrage bots des stats

- Agents détectés : UA affiché sans troncature (drill-down et liste)
- Bouton « + bot » pour ajouter un agent aux patterns via AJAX (CSRF)
- Section Agents alimentée par all_uas (tous UAs publics, bots inclus)
- AccessLogParser : bots exclus des compteurs pages/livres/visiteurs
- Caches stats vidés après chaque modification des patterns

Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>
This commit is contained in:
2026-05-19 21:45:10 +02:00
parent d53b5da31a
commit b0f4814bb0
6 changed files with 199 additions and 84 deletions
+71 -32
View File
@@ -2725,6 +2725,38 @@ switch ($action) {
require_once BASE_PATH . '/src/AccessLogParser.php';
require_once BASE_PATH . '/src/AsnLookup.php';
// Patterns de bots — initialisation si absent
$botsFile = DATA_PATH . '/bots.json';
if (!file_exists($botsFile)) {
$defaultBots = [
'Googlebot', 'Googlebot-Image', 'Google-InspectionTool', 'Google-Extended',
'bingbot', 'BingPreview', 'msnbot',
'DuckDuckBot', 'DuckDuckGo-Favicons-Bot',
'Baiduspider', 'YandexBot', 'YandexImages', 'YandexMetrika',
'Applebot',
'facebookexternalhit', 'facebot',
'Twitterbot', 'LinkedInBot', 'Slackbot', 'TelegramBot', 'WhatsApp', 'Discordbot',
'PetalBot', 'Bytespider', 'SogouSpider', 'SeznamBot', 'Exabot',
'AhrefsBot', 'SemrushBot', 'MJ12bot', 'DotBot', 'rogerbot', 'BLEXBot', 'DataForSeoBot',
'Screaming Frog SEO Spider',
'ClaudeBot', 'GPTBot', 'PerplexityBot', 'cohere-ai', 'anthropic-ai',
'meta-externalagent', 'OAI-SearchBot', 'Amazonbot',
'CCBot', 'ia_archiver', 'archive.org_bot',
'NetcraftSurveyAgent',
'python-requests', 'python-urllib', 'Python/',
'curl/', 'wget/', 'Wget/',
'Go-http-client/1', 'Java/', 'Apache-HttpClient', 'okhttp/',
'Scrapy', 'HeadlessChrome', 'PhantomJS', 'Puppeteer', 'Playwright', 'Selenium',
'UptimeRobot', 'Pingdom', 'StatusCake', 'Site24x7', 'GTmetrix',
'Chrome-Lighthouse', 'PageSpeed', 'Zabbix', 'check_http',
'libwww-perl', 'GuzzleHttp', 'masscan', 'zgrab', 'nuclei',
];
@mkdir(dirname($botsFile), 0755, true);
@file_put_contents($botsFile, json_encode($defaultBots, JSON_UNESCAPED_UNICODE | JSON_PRETTY_PRINT));
}
$botPatterns = json_decode((string) file_get_contents($botsFile), true) ?: [];
$adminData['bot_patterns'] = $botPatterns;
$statsCacheFile = DATA_PATH . '/.stats_cache.json';
$statsRaw = null;
if (file_exists($statsCacheFile) && (time() - filemtime($statsCacheFile)) < 60) {
@@ -2733,7 +2765,7 @@ switch ($action) {
if ($statsRaw === null) {
$cutoff14 = strtotime('-14 days midnight') ?: (time() - 14 * 86400);
$tParser = new TrendingParser('/var/log/apache2', apacheAccessLog());
$accessParser = new AccessLogParser('/var/log/apache2', apacheAccessLog());
$accessParser = new AccessLogParser('/var/log/apache2', apacheAccessLog(), '', 600, 14, $botPatterns);
$accessStats = $accessParser->stats();
$topIps = array_slice($accessStats['ips'], 0, 200, true);
$asnMap = (new AsnLookup())->batchLookup(array_keys($topIps));
@@ -2758,6 +2790,7 @@ switch ($action) {
'as' => AsnLookup::aggregateByAs($topIps, $asnMap),
'pages_by_day' => $accessStats['pages_by_day'] ?? [],
'ip_data' => $ipData,
'all_uas' => $accessStats['all_uas'] ?? [],
];
@file_put_contents($statsCacheFile, json_encode($statsRaw));
}
@@ -2768,37 +2801,7 @@ switch ($action) {
$adminData['as_groups'] = asGroups();
$adminData['stats_pages_by_day'] = $statsRaw['pages_by_day'] ?? [];
$adminData['stats_ip_data'] = $statsRaw['ip_data'] ?? [];
// Patterns de bots — initialisation si absent
$botsFile = DATA_PATH . '/bots.json';
if (!file_exists($botsFile)) {
$defaultBots = [
'Googlebot','Googlebot-Image','Google-InspectionTool','Google-Extended',
'bingbot','BingPreview','msnbot',
'DuckDuckBot','DuckDuckGo-Favicons-Bot',
'Baiduspider','YandexBot','YandexImages','YandexMetrika',
'Applebot',
'facebookexternalhit','facebot',
'Twitterbot','LinkedInBot','Slackbot','TelegramBot','WhatsApp','Discordbot',
'PetalBot','Bytespider','SogouSpider','SeznamBot','Exabot',
'AhrefsBot','SemrushBot','MJ12bot','DotBot','rogerbot','BLEXBot','DataForSeoBot',
'Screaming Frog SEO Spider',
'ClaudeBot','GPTBot','Google-Extended','PerplexityBot','cohere-ai','anthropic-ai',
'meta-externalagent','OAI-SearchBot','Amazonbot',
'CCBot','ia_archiver','archive.org_bot',
'NetcraftSurveyAgent',
'python-requests','python-urllib','Python/',
'curl/','wget/','Wget/',
'Go-http-client/1','Java/','Apache-HttpClient','okhttp/',
'Scrapy','HeadlessChrome','PhantomJS','Puppeteer','Playwright','Selenium',
'UptimeRobot','Pingdom','StatusCake','Site24x7','GTmetrix',
'Chrome-Lighthouse','PageSpeed','Zabbix','check_http',
'libwww-perl','GuzzleHttp','masscan','zgrab','nuclei',
];
@mkdir(dirname($botsFile), 0755, true);
@file_put_contents($botsFile, json_encode($defaultBots, JSON_UNESCAPED_UNICODE | JSON_PRETTY_PRINT));
}
$adminData['bot_patterns'] = json_decode((string) file_get_contents($botsFile), true) ?: [];
$adminData['stats_all_uas'] = $statsRaw['all_uas'] ?? [];
}
if ($tab === 'categories') {
@@ -3248,9 +3251,45 @@ switch ($action) {
array_map('trim', explode("\n", (string) ($_POST['bot_patterns'] ?? '')))
)));
$ok = @file_put_contents($botsFile, json_encode($patterns, JSON_UNESCAPED_UNICODE | JSON_PRETTY_PRINT)) !== false;
if ($ok) {
@unlink(DATA_PATH . '/.stats_cache.json');
@unlink(BASE_PATH . '/_cache/access_stats.json');
}
header('Location: /admin/stats?' . ($ok ? 'saved=1' : 'error=write'));
exit;
case 'admin_add_bot':
requireAuth();
if (!isAdmin() || $_SERVER['REQUEST_METHOD'] !== 'POST') {
http_response_code(403);
exit;
}
$csrf = $_POST['_csrf'] ?? '';
if ($csrf !== ($_session['csrf'] ?? '')) {
http_response_code(403);
header('Content-Type: application/json');
echo json_encode(['ok' => false, 'error' => 'csrf']);
exit;
}
$addPattern = trim((string) ($_POST['pattern'] ?? ''));
if ($addPattern === '') {
http_response_code(400);
header('Content-Type: application/json');
echo json_encode(['ok' => false, 'error' => 'empty']);
exit;
}
$botsFile = DATA_PATH . '/bots.json';
$botPatterns = is_file($botsFile) ? (json_decode((string) file_get_contents($botsFile), true) ?: []) : [];
if (!in_array($addPattern, $botPatterns, true)) {
$botPatterns[] = $addPattern;
@file_put_contents($botsFile, json_encode($botPatterns, JSON_UNESCAPED_UNICODE | JSON_PRETTY_PRINT));
}
@unlink(DATA_PATH . '/.stats_cache.json');
@unlink(BASE_PATH . '/_cache/access_stats.json');
header('Content-Type: application/json');
echo json_encode(['ok' => true, 'pattern' => $addPattern]);
exit;
case 'admin_create_role':
requireAuth();
if (!isAdmin() || $_SERVER['REQUEST_METHOD'] !== 'POST') {