Deploy on 2026-06-05 17:03:37

This commit is contained in:
Hamza-Ayed
2026-06-05 17:03:37 +03:00
parent 89b72a0b42
commit 227fd7c412
3 changed files with 62 additions and 18 deletions

View File

@@ -158,6 +158,25 @@ PROMPT;
$score = 30; $score = 30;
$tags = []; $tags = [];
$isOpportunity = true; // Treat all entries as opportunities by default $isOpportunity = true; // Treat all entries as opportunities by default
$orgName = null;
$country = null;
// Smart Regex rules for extracting organization names from title
if (preg_match('/^([A-Z0-9][A-Za-z0-9\s\-\.\&]{2,40})\s+(raises|launches|secures|gets|partners|funded|acquires|announces|closes|receives|seeks)\b/i', $title, $matches)) {
$orgName = trim($matches[1]);
} elseif (preg_match('/(backs|invests in|funds|acquires)\s+([A-Z0-9][A-Za-z0-9\s\-\.\&]{2,40})/i', $title, $matches)) {
$orgName = trim($matches[2]);
} elseif (preg_match('/(investment in|funding for|launch of)\s+([A-Z0-9][A-Za-z0-9\s\-\.\&]{2,40})/i', $title, $matches)) {
$orgName = trim($matches[2]);
}
if ($orgName) {
$orgName = preg_replace('/\b(series|seed|funding|round|raised|million|billion|capital|partners|ventures|inc|ltd|corp|co|llc)\b.*$/i', '', $orgName);
$orgName = trim($orgName, " \t\n\r\0\x0B,.-");
if (strlen($orgName) < 2 || in_array(strtolower($orgName), ['startup', 'founder', 'investor', 'program', 'new', 'why', 'how', 'what', 'who', 'the'])) {
$orgName = null;
}
}
// Keyword patterns // Keyword patterns
if (preg_match('/\b(grant|funding|award|prize)\b/i', $text)) { if (preg_match('/\b(grant|funding|award|prize)\b/i', $text)) {
@@ -238,8 +257,8 @@ PROMPT;
'tags' => $tags, 'tags' => $tags,
'is_opportunity' => $isOpportunity, 'is_opportunity' => $isOpportunity,
'summary' => substr($description, 0, 200), 'summary' => substr($description, 0, 200),
'organization_name' => null, 'organization_name' => $orgName,
'country' => null, 'country' => $country,
]; ];
} }
} }

View File

@@ -20,25 +20,50 @@ class RssParser
*/ */
public function fetchEntries(string $url): array public function fetchEntries(string $url): array
{ {
$context = stream_context_create([ $xml = false;
'http' => [ if (function_exists('curl_init')) {
'timeout' => 15, $ch = curl_init();
'user_agent' => 'ScoutIQ/1.0 (Crawler)', curl_setopt($ch, CURLOPT_URL, $url);
], curl_setopt($ch, CURLOPT_RETURNTRANSFER, true);
'ssl' => [ curl_setopt($ch, CURLOPT_FOLLOWLOCATION, true);
'verify_peer' => false, curl_setopt($ch, CURLOPT_USERAGENT, 'ScoutIQ/1.0 (Crawler)');
'verify_peer_name' => false, curl_setopt($ch, CURLOPT_SSL_VERIFYPEER, false);
], curl_setopt($ch, CURLOPT_TIMEOUT, 15);
]);
$xml = @file_get_contents($url, false, $context); $xml = curl_exec($ch);
if (!$xml) { $httpCode = curl_getinfo($ch, CURLINFO_HTTP_CODE);
return []; curl_close($ch);
if ($httpCode !== 200) {
throw new \Exception("HTTP Error {$httpCode}");
}
if ($xml === false) {
throw new \Exception("Connection failed via cURL");
}
} else {
$context = stream_context_create([
'http' => [
'timeout' => 15,
'user_agent' => 'ScoutIQ/1.0 (Crawler)',
],
'ssl' => [
'verify_peer' => false,
'verify_peer_name' => false,
],
]);
$xml = @file_get_contents($url, false, $context);
if ($xml === false) {
$status = "Connection failed";
if (isset($http_response_header) && isset($http_response_header[0])) {
$status = $http_response_header[0];
}
throw new \Exception($status);
}
} }
$feed = @simplexml_load_string($xml); $feed = @simplexml_load_string($xml);
if (!$feed) { if (!$feed) {
return []; throw new \Exception("Invalid XML structure");
} }
$entries = []; $entries = [];

View File

@@ -2,11 +2,11 @@
return [ return [
'gemini' => [ 'gemini' => [
'api_key' => ($_ENV['GEMINI_API_KEY'] === 'null' || !$_ENV['GEMINI_API_KEY']) ? null : $_ENV['GEMINI_API_KEY'], 'api_key' => (getenv('GEMINI_API_KEY') ?: ($_SERVER['GEMINI_API_KEY'] ?? ($_ENV['GEMINI_API_KEY'] ?? null))),
'model' => 'gemini-flash-lite-latest', 'model' => 'gemini-flash-lite-latest',
], ],
'jwt' => [ 'jwt' => [
'secret' => $_ENV['JWT_SECRET'] ?? 'base64:3uFzGf9o8+D+U0mC4/3K1y4m81Qj7G6qTzS=', 'secret' => getenv('JWT_SECRET') ?: ($_SERVER['JWT_SECRET'] ?? ($_ENV['JWT_SECRET'] ?? 'base64:3uFzGf9o8+D+U0mC4/3K1y4m81Qj7G6qTzS=')),
'algorithm' => 'HS256', 'algorithm' => 'HS256',
'expires_in' => 86400 * 30, // 30 days 'expires_in' => 86400 * 30, // 30 days
], ],