logDir = rtrim($logDir, '/'); $this->vhostBase = $vhostBase; $this->cacheFile = $cacheFile !== '' ? $cacheFile : dirname(__DIR__) . '/_cache/search_terms.json'; $this->cacheTtl = $cacheTtl; } /** @return array terme => nombre d'occurrences, trié desc */ public function topTerms(int $limit = 100): array { if ($this->cacheValid()) { $data = json_decode((string) file_get_contents($this->cacheFile), true); if (is_array($data)) { return array_slice($data, 0, $limit, true); } } $counts = []; foreach ($this->logFiles() as $file) { $this->parseFile($file, $counts); } arsort($counts); @mkdir(dirname($this->cacheFile), 0755, true); file_put_contents($this->cacheFile, json_encode($counts, JSON_UNESCAPED_UNICODE)); return array_slice($counts, 0, $limit, true); } public function isReadable(): bool { return count($this->logFiles()) > 0; } private function cacheValid(): bool { return file_exists($this->cacheFile) && (time() - filemtime($this->cacheFile)) < $this->cacheTtl; } /** @return list type: plain|gz|tgz */ private function logFiles(): array { $pattern = $this->logDir . '/' . $this->vhostBase; $files = []; // Fichiers correspondant au pattern de base (courants + rotations incluses si glob) $bases = glob($pattern) ?: []; // Ajouter aussi les rotations (.N, .N.gz, .N.tar.gz) pour chaque base trouvée foreach ($bases as $base) { // Exclure les rotations déjà capturées par le pattern glob if (str_ends_with($base, '.gz') || preg_match('/\.\d+$/', $base)) { continue; } $candidates = array_merge([$base], glob($base . '.*') ?: []); foreach ($candidates as $path) { if (!is_readable($path)) { continue; } if (str_ends_with($path, '.tar.gz')) { $files[] = ['path' => $path, 'type' => 'tgz']; } elseif (str_ends_with($path, '.gz')) { $files[] = ['path' => $path, 'type' => 'gz']; } else { $files[] = ['path' => $path, 'type' => 'plain']; } } } return $files; } private function parseFile(array $file, array &$counts): void { if ($file['type'] === 'tgz') { try { $phar = new PharData($file['path']); foreach ($phar as $entry) { $content = @file_get_contents('phar://' . $file['path'] . '/' . $entry->getFilename()); if ($content === false) { continue; } foreach (explode("\n", $content) as $line) { $this->parseLine($line, $counts); } } } catch (\Exception $e) { // archive illisible, on ignore } } elseif ($file['type'] === 'gz') { $h = @gzopen($file['path'], 'rb'); if (!$h) { return; } while (!gzeof($h)) { $line = gzgets($h, 8192); if ($line !== false) { $this->parseLine($line, $counts); } } gzclose($h); } else { $h = @fopen($file['path'], 'rb'); if (!$h) { return; } while (($line = fgets($h)) !== false) { $this->parseLine($line, $counts); } fclose($h); } } private function parseLine(string $line, array &$counts): void { if (!str_contains($line, 'GET /search?')) { return; } if (!preg_match('/"GET \/search\?([^"]*) HTTP\//', $line, $m)) { return; } parse_str($m[1], $params); $q = trim(urldecode($params['q'] ?? '')); if ($q === '' || mb_strlen($q) > 200) { return; } $q = mb_strtolower($q); $counts[$q] = ($counts[$q] ?? 0) + 1; } }