feat : visiteurs uniques, filtre jours, redirect 404→search, edit_tags (v1.6.16)
- SearchLogParser : visiteurs uniques par terme (IPs distinctes) au lieu de hits bruts (#41) - SearchLogParser : paramètre $days (7/14), cache distinct par période, filtre logFiles par date (#46) - admin/searches : boutons 7 j / 14 j, label dynamique, colonne « Visiteurs » (#41, #46) - URL inconnue / slug absent : redirect 302 /search?q=… au lieu de page 404 (#57) - edit_tags : masquer abbrev/camel si des valeurs connues existent pour le type (#48) Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>
This commit is contained in:
+26
-13
@@ -8,22 +8,25 @@ class SearchLogParser
|
||||
private string $vhostBase;
|
||||
private string $cacheFile;
|
||||
private int $cacheTtl;
|
||||
private int $days;
|
||||
|
||||
public function __construct(
|
||||
string $logDir = '/var/log/apache2',
|
||||
string $vhostBase = '*-access.log',
|
||||
string $cacheFile = '',
|
||||
int $cacheTtl = 600
|
||||
int $cacheTtl = 600,
|
||||
int $days = 14
|
||||
) {
|
||||
$this->logDir = rtrim($logDir, '/');
|
||||
$this->vhostBase = $vhostBase;
|
||||
$this->days = max(1, min(30, $days));
|
||||
$this->cacheFile = $cacheFile !== ''
|
||||
? $cacheFile
|
||||
: dirname(__DIR__) . '/_cache/search_terms.json';
|
||||
: dirname(__DIR__) . '/_cache/search_terms_' . $this->days . 'd.json';
|
||||
$this->cacheTtl = $cacheTtl;
|
||||
}
|
||||
|
||||
/** @return array<string,int> terme => nombre d'occurrences, trié desc */
|
||||
/** @return array<string,int> terme => visiteurs uniques, trié desc */
|
||||
public function topTerms(int $limit = 100): array
|
||||
{
|
||||
if ($this->cacheValid()) {
|
||||
@@ -33,9 +36,14 @@ class SearchLogParser
|
||||
}
|
||||
}
|
||||
|
||||
$counts = [];
|
||||
$visitors = []; // terme => [ip => true]
|
||||
foreach ($this->logFiles() as $file) {
|
||||
$this->parseFile($file, $counts);
|
||||
$this->parseFile($file, $visitors);
|
||||
}
|
||||
|
||||
$counts = [];
|
||||
foreach ($visitors as $term => $ips) {
|
||||
$counts[$term] = count($ips);
|
||||
}
|
||||
arsort($counts);
|
||||
|
||||
@@ -61,6 +69,7 @@ class SearchLogParser
|
||||
{
|
||||
$pattern = $this->logDir . '/' . $this->vhostBase;
|
||||
$files = [];
|
||||
$cutoff = time() - $this->days * 86400;
|
||||
|
||||
// Fichiers correspondant au pattern de base (courants + rotations incluses si glob)
|
||||
$bases = glob($pattern) ?: [];
|
||||
@@ -75,6 +84,9 @@ class SearchLogParser
|
||||
if (!is_readable($path)) {
|
||||
continue;
|
||||
}
|
||||
if (@filemtime($path) < $cutoff) {
|
||||
continue;
|
||||
}
|
||||
if (str_ends_with($path, '.tar.gz')) {
|
||||
$files[] = ['path' => $path, 'type' => 'tgz'];
|
||||
} elseif (str_ends_with($path, '.gz')) {
|
||||
@@ -88,7 +100,7 @@ class SearchLogParser
|
||||
return $files;
|
||||
}
|
||||
|
||||
private function parseFile(array $file, array &$counts): void
|
||||
private function parseFile(array $file, array &$visitors): void
|
||||
{
|
||||
if ($file['type'] === 'tgz') {
|
||||
try {
|
||||
@@ -99,7 +111,7 @@ class SearchLogParser
|
||||
continue;
|
||||
}
|
||||
foreach (explode("\n", $content) as $line) {
|
||||
$this->parseLine($line, $counts);
|
||||
$this->parseLine($line, $visitors);
|
||||
}
|
||||
}
|
||||
} catch (\Exception $e) {
|
||||
@@ -113,7 +125,7 @@ class SearchLogParser
|
||||
while (!gzeof($h)) {
|
||||
$line = gzgets($h, 8192);
|
||||
if ($line !== false) {
|
||||
$this->parseLine($line, $counts);
|
||||
$this->parseLine($line, $visitors);
|
||||
}
|
||||
}
|
||||
gzclose($h);
|
||||
@@ -123,28 +135,29 @@ class SearchLogParser
|
||||
return;
|
||||
}
|
||||
while (($line = fgets($h)) !== false) {
|
||||
$this->parseLine($line, $counts);
|
||||
$this->parseLine($line, $visitors);
|
||||
}
|
||||
fclose($h);
|
||||
}
|
||||
}
|
||||
|
||||
private function parseLine(string $line, array &$counts): void
|
||||
private function parseLine(string $line, array &$visitors): void
|
||||
{
|
||||
if (!str_contains($line, 'GET /search?')) {
|
||||
return;
|
||||
}
|
||||
if (!preg_match('/"GET \/search\?([^"]*) HTTP\//', $line, $m)) {
|
||||
if (!preg_match('/^(\S+) \S+ \S+ \[[^\]]+\] "GET \/search\?([^"]*) HTTP\//', $line, $m)) {
|
||||
return;
|
||||
}
|
||||
|
||||
parse_str($m[1], $params);
|
||||
$ip = $m[1];
|
||||
parse_str($m[2], $params);
|
||||
$q = trim(urldecode($params['q'] ?? ''));
|
||||
|
||||
if ($q === '' || mb_strlen($q) > 200) {
|
||||
return;
|
||||
}
|
||||
$q = mb_strtolower($q);
|
||||
$counts[$q] = ($counts[$q] ?? 0) + 1;
|
||||
$visitors[$q][$ip] = true;
|
||||
}
|
||||
}
|
||||
|
||||
Reference in New Issue
Block a user