186 lines
5.9 KiB
PHP
186 lines
5.9 KiB
PHP
<?php
|
|
|
|
declare(strict_types=1);
|
|
|
|
class FeedFetcher
|
|
{
|
|
private const MIN_TTL = 900; // 15 min
|
|
private const MAX_TTL = 86400; // 24 h
|
|
|
|
public function __construct(private string $cacheDir)
|
|
{
|
|
}
|
|
|
|
/**
|
|
* Retourne les items du feed (depuis le cache si valide, sinon refetch).
|
|
* @return array{items: array, feed_title: string, fetched_at: int, ttl: int}|null
|
|
*/
|
|
public function get(string $url): ?array
|
|
{
|
|
$cached = $this->cacheRead($url);
|
|
if ($cached !== null && time() < (int)$cached['fetched_at'] + (int)$cached['ttl']) {
|
|
return $cached;
|
|
}
|
|
return $this->fetch($url);
|
|
}
|
|
|
|
/** Force le refetch et met le cache à jour. */
|
|
public function fetch(string $url): ?array
|
|
{
|
|
$ch = curl_init($url);
|
|
curl_setopt_array($ch, [
|
|
CURLOPT_RETURNTRANSFER => true,
|
|
CURLOPT_FOLLOWLOCATION => true,
|
|
CURLOPT_MAXREDIRS => 5,
|
|
CURLOPT_TIMEOUT => 10,
|
|
CURLOPT_USERAGENT => 'varlog/1.0 FeedFetcher (+' . (defined('APP_URL') ? APP_URL : '') . ')',
|
|
CURLOPT_HEADER => true,
|
|
]);
|
|
$raw = curl_exec($ch);
|
|
$httpCode = curl_getinfo($ch, CURLINFO_HTTP_CODE);
|
|
$hSize = curl_getinfo($ch, CURLINFO_HEADER_SIZE);
|
|
curl_close($ch);
|
|
|
|
if ($raw === false || !is_int($httpCode) || $httpCode < 200 || $httpCode >= 400) {
|
|
return null;
|
|
}
|
|
|
|
$headers = substr((string)$raw, 0, $hSize);
|
|
$body = substr((string)$raw, $hSize);
|
|
|
|
libxml_use_internal_errors(true);
|
|
$xml = simplexml_load_string($body);
|
|
libxml_clear_errors();
|
|
if ($xml === false) {
|
|
return null;
|
|
}
|
|
|
|
$isAtom = ($xml->getName() === 'feed');
|
|
$items = $isAtom ? $this->parseAtom($xml) : $this->parseRss($xml);
|
|
$feedTitle = $isAtom
|
|
? (string)($xml->title ?? '')
|
|
: (string)($xml->channel->title ?? '');
|
|
|
|
$ttl = $this->resolveTtl($xml, $isAtom, $headers);
|
|
|
|
$data = [
|
|
'feed_title' => $feedTitle,
|
|
'fetched_at' => time(),
|
|
'ttl' => $ttl,
|
|
'items' => $items,
|
|
];
|
|
$this->cacheWrite($url, $data);
|
|
return $data;
|
|
}
|
|
|
|
// ------------------------------------------------------------------ //
|
|
|
|
private function parseRss(\SimpleXMLElement $xml): array
|
|
{
|
|
$items = [];
|
|
foreach ($xml->channel->item ?? [] as $item) {
|
|
$date = (string)($item->pubDate ?? '');
|
|
$items[] = [
|
|
'title' => trim((string)($item->title ?? '')),
|
|
'url' => trim((string)($item->link ?? '')),
|
|
'summary' => $this->cleanSummary((string)($item->description ?? '')),
|
|
'date' => $date !== '' ? (int)strtotime($date) : 0,
|
|
'author' => trim((string)($item->author ?? '')),
|
|
];
|
|
}
|
|
return $this->sortItems($items);
|
|
}
|
|
|
|
private function parseAtom(\SimpleXMLElement $xml): array
|
|
{
|
|
$ns = $xml->getNamespaces(true);
|
|
$items = [];
|
|
foreach ($xml->entry ?? [] as $entry) {
|
|
$url = '';
|
|
foreach ($entry->link ?? [] as $link) {
|
|
$rel = (string)($link['rel'] ?? 'alternate');
|
|
if ($rel === 'alternate' || $rel === '') {
|
|
$url = (string)($link['href'] ?? '');
|
|
break;
|
|
}
|
|
}
|
|
$date = (string)($entry->published ?? $entry->updated ?? '');
|
|
$author = (string)($entry->author->name ?? '');
|
|
$summary = (string)($entry->summary ?? $entry->content ?? '');
|
|
$items[] = [
|
|
'title' => trim((string)($entry->title ?? '')),
|
|
'url' => trim($url),
|
|
'summary' => $this->cleanSummary($summary),
|
|
'date' => $date !== '' ? (int)strtotime($date) : 0,
|
|
'author' => trim($author),
|
|
];
|
|
}
|
|
return $this->sortItems($items);
|
|
}
|
|
|
|
private function cleanSummary(string $html): string
|
|
{
|
|
$text = strip_tags($html);
|
|
$text = preg_replace('/\s+/', ' ', $text) ?? $text;
|
|
return mb_strimwidth(trim($text), 0, 200, '…');
|
|
}
|
|
|
|
private function sortItems(array $items): array
|
|
{
|
|
usort($items, static fn ($a, $b) => $b['date'] <=> $a['date']);
|
|
return $items;
|
|
}
|
|
|
|
private function resolveTtl(\SimpleXMLElement $xml, bool $isAtom, string $headers): int
|
|
{
|
|
// 1. TTL déclaré dans le flux RSS (<ttl> en minutes)
|
|
if (!$isAtom) {
|
|
$rssttl = (int)($xml->channel->ttl ?? 0);
|
|
if ($rssttl > 0) {
|
|
return $this->clampTtl($rssttl * 60);
|
|
}
|
|
}
|
|
|
|
// 2. Cache-Control: max-age depuis les headers HTTP
|
|
if (preg_match('/max-age=(\d+)/i', $headers, $m)) {
|
|
return $this->clampTtl((int)$m[1]);
|
|
}
|
|
|
|
// 3. Valeur par défaut : 1 heure
|
|
return 3600;
|
|
}
|
|
|
|
private function clampTtl(int $seconds): int
|
|
{
|
|
return max(self::MIN_TTL, min(self::MAX_TTL, $seconds));
|
|
}
|
|
|
|
// ------------------------------------------------------------------ //
|
|
|
|
private function cachePath(string $url): string
|
|
{
|
|
return $this->cacheDir . '/' . md5($url) . '.json';
|
|
}
|
|
|
|
private function cacheRead(string $url): ?array
|
|
{
|
|
$path = $this->cachePath($url);
|
|
if (!file_exists($path)) {
|
|
return null;
|
|
}
|
|
$data = json_decode((string)file_get_contents($path), true);
|
|
return is_array($data) ? $data : null;
|
|
}
|
|
|
|
private function cacheWrite(string $url, array $data): void
|
|
{
|
|
if (!is_dir($this->cacheDir)) {
|
|
mkdir($this->cacheDir, 0755, true);
|
|
}
|
|
file_put_contents(
|
|
$this->cachePath($url),
|
|
json_encode($data, JSON_UNESCAPED_UNICODE | JSON_UNESCAPED_SLASHES)
|
|
);
|
|
}
|
|
}
|