pdo = $connection->getPdo(); } /** * Parse an RSS feed URL and return entries. */ public function fetchEntries(string $url): array { $context = stream_context_create([ 'http' => [ 'timeout' => 15, 'user_agent' => 'ScoutIQ/1.0 (Crawler)', ], 'ssl' => [ 'verify_peer' => false, 'verify_peer_name' => false, ], ]); $xml = @file_get_contents($url, false, $context); if (!$xml) { return []; } $feed = @simplexml_load_string($xml); if (!$feed) { return []; } $entries = []; $items = $feed->channel->item ?? $feed->entry ?? []; foreach ($items as $item) { $title = (string)($item->title ?? ''); $description = (string)($item->description ?? $item->summary ?? ''); $link = (string)($item->link ?? $item->guid ?? ''); $pubDate = (string)($item->pubDate ?? $item->updated ?? ''); $categories = []; if (isset($item->category)) { foreach ($item->category as $cat) { $categories[] = (string)$cat; } } if (empty($title)) { continue; } $entries[] = [ 'title' => $title, 'description' => strip_tags($description), 'url' => $link, 'published_at' => $pubDate ? date('Y-m-d H:i:s', strtotime($pubDate)) : date('Y-m-d H:i:s'), 'categories' => $categories, 'source_raw' => $xml, ]; } return $entries; } /** * Check if entry URL already exists in opportunities. */ public function entryExists(string $url): bool { $stmt = $this->pdo->prepare("SELECT id FROM opportunities WHERE url = ? AND deleted_at IS NULL"); $stmt->execute([$url]); return (bool)$stmt->fetch(); } /** * Check if organization already exists by domain or name. */ public function organizationExists(string $name, ?string $domain = null): ?int { if ($domain) { $stmt = $this->pdo->prepare("SELECT id FROM organizations WHERE domain = ? AND deleted_at IS NULL"); $stmt->execute([$domain]); $id = $stmt->fetchColumn(); if ($id) return (int)$id; } // Fuzzy match by name $stmt = $this->pdo->prepare("SELECT id FROM organizations WHERE name LIKE ? AND deleted_at IS NULL LIMIT 1"); $stmt->execute(['%' . $name . '%']); $id = $stmt->fetchColumn(); return $id ? (int)$id : null; } }