pdo = $connection->getPdo(); } /** * Parse an RSS feed URL and return entries. */ public function fetchEntries(string $url): array { $xml = false; if (function_exists('curl_init')) { $ch = curl_init(); curl_setopt($ch, CURLOPT_URL, $url); curl_setopt($ch, CURLOPT_RETURNTRANSFER, true); curl_setopt($ch, CURLOPT_FOLLOWLOCATION, true); curl_setopt($ch, CURLOPT_USERAGENT, 'ScoutIQ/1.0 (Crawler)'); curl_setopt($ch, CURLOPT_SSL_VERIFYPEER, false); curl_setopt($ch, CURLOPT_TIMEOUT, 15); $xml = curl_exec($ch); $httpCode = curl_getinfo($ch, CURLINFO_HTTP_CODE); curl_close($ch); if ($httpCode !== 200) { throw new \Exception("HTTP Error {$httpCode}"); } if ($xml === false) { throw new \Exception("Connection failed via cURL"); } } else { $context = stream_context_create([ 'http' => [ 'timeout' => 15, 'user_agent' => 'ScoutIQ/1.0 (Crawler)', ], 'ssl' => [ 'verify_peer' => false, 'verify_peer_name' => false, ], ]); $xml = @file_get_contents($url, false, $context); if ($xml === false) { $status = "Connection failed"; if (isset($http_response_header) && isset($http_response_header[0])) { $status = $http_response_header[0]; } throw new \Exception($status); } } $feed = @simplexml_load_string($xml); if (!$feed) { throw new \Exception("Invalid XML structure"); } $entries = []; $items = $feed->channel->item ?? $feed->entry ?? []; foreach ($items as $item) { $title = (string)($item->title ?? ''); $description = (string)($item->description ?? $item->summary ?? ''); $link = (string)($item->link ?? $item->guid ?? ''); $pubDate = (string)($item->pubDate ?? $item->updated ?? ''); $categories = []; if (isset($item->category)) { foreach ($item->category as $cat) { $categories[] = (string)$cat; } } if (empty($title)) { continue; } $entries[] = [ 'title' => $title, 'description' => strip_tags($description), 'url' => $link, 'published_at' => $pubDate ? date('Y-m-d H:i:s', strtotime($pubDate)) : date('Y-m-d H:i:s'), 'categories' => $categories, 'source_raw' => $xml, ]; } return $entries; } /** * Check if entry URL already exists in opportunities. */ public function entryExists(string $url): bool { $stmt = $this->pdo->prepare("SELECT id FROM opportunities WHERE url = ? AND deleted_at IS NULL"); $stmt->execute([$url]); return (bool)$stmt->fetch(); } /** * Check if organization already exists by domain or name. */ public function organizationExists(string $name, ?string $domain = null): ?int { if ($domain) { $stmt = $this->pdo->prepare("SELECT id FROM organizations WHERE domain = ? AND deleted_at IS NULL"); $stmt->execute([$domain]); $id = $stmt->fetchColumn(); if ($id) return (int)$id; } // Fuzzy match by name $stmt = $this->pdo->prepare("SELECT id FROM organizations WHERE name LIKE ? AND deleted_at IS NULL LIMIT 1"); $stmt->execute(['%' . $name . '%']); $id = $stmt->fetchColumn(); return $id ? (int)$id : null; } }