Deploy on 2026-06-05 17:03:37

This commit is contained in:
Hamza-Ayed
2026-06-05 17:03:37 +03:00
parent 89b72a0b42
commit 227fd7c412
3 changed files with 62 additions and 18 deletions

View File

@@ -158,6 +158,25 @@ PROMPT;
$score = 30;
$tags = [];
$isOpportunity = true; // Treat all entries as opportunities by default
$orgName = null;
$country = null;
// Smart Regex rules for extracting organization names from title
if (preg_match('/^([A-Z0-9][A-Za-z0-9\s\-\.\&]{2,40})\s+(raises|launches|secures|gets|partners|funded|acquires|announces|closes|receives|seeks)\b/i', $title, $matches)) {
$orgName = trim($matches[1]);
} elseif (preg_match('/(backs|invests in|funds|acquires)\s+([A-Z0-9][A-Za-z0-9\s\-\.\&]{2,40})/i', $title, $matches)) {
$orgName = trim($matches[2]);
} elseif (preg_match('/(investment in|funding for|launch of)\s+([A-Z0-9][A-Za-z0-9\s\-\.\&]{2,40})/i', $title, $matches)) {
$orgName = trim($matches[2]);
}
if ($orgName) {
$orgName = preg_replace('/\b(series|seed|funding|round|raised|million|billion|capital|partners|ventures|inc|ltd|corp|co|llc)\b.*$/i', '', $orgName);
$orgName = trim($orgName, " \t\n\r\0\x0B,.-");
if (strlen($orgName) < 2 || in_array(strtolower($orgName), ['startup', 'founder', 'investor', 'program', 'new', 'why', 'how', 'what', 'who', 'the'])) {
$orgName = null;
}
}
// Keyword patterns
if (preg_match('/\b(grant|funding|award|prize)\b/i', $text)) {
@@ -238,8 +257,8 @@ PROMPT;
'tags' => $tags,
'is_opportunity' => $isOpportunity,
'summary' => substr($description, 0, 200),
'organization_name' => null,
'country' => null,
'organization_name' => $orgName,
'country' => $country,
];
}
}

View File

@@ -20,25 +20,50 @@ class RssParser
*/
public function fetchEntries(string $url): array
{
$context = stream_context_create([
'http' => [
'timeout' => 15,
'user_agent' => 'ScoutIQ/1.0 (Crawler)',
],
'ssl' => [
'verify_peer' => false,
'verify_peer_name' => false,
],
]);
$xml = false;
if (function_exists('curl_init')) {
$ch = curl_init();
curl_setopt($ch, CURLOPT_URL, $url);
curl_setopt($ch, CURLOPT_RETURNTRANSFER, true);
curl_setopt($ch, CURLOPT_FOLLOWLOCATION, true);
curl_setopt($ch, CURLOPT_USERAGENT, 'ScoutIQ/1.0 (Crawler)');
curl_setopt($ch, CURLOPT_SSL_VERIFYPEER, false);
curl_setopt($ch, CURLOPT_TIMEOUT, 15);
$xml = curl_exec($ch);
$httpCode = curl_getinfo($ch, CURLINFO_HTTP_CODE);
curl_close($ch);
$xml = @file_get_contents($url, false, $context);
if (!$xml) {
return [];
if ($httpCode !== 200) {
throw new \Exception("HTTP Error {$httpCode}");
}
if ($xml === false) {
throw new \Exception("Connection failed via cURL");
}
} else {
$context = stream_context_create([
'http' => [
'timeout' => 15,
'user_agent' => 'ScoutIQ/1.0 (Crawler)',
],
'ssl' => [
'verify_peer' => false,
'verify_peer_name' => false,
],
]);
$xml = @file_get_contents($url, false, $context);
if ($xml === false) {
$status = "Connection failed";
if (isset($http_response_header) && isset($http_response_header[0])) {
$status = $http_response_header[0];
}
throw new \Exception($status);
}
}
$feed = @simplexml_load_string($xml);
if (!$feed) {
return [];
throw new \Exception("Invalid XML structure");
}
$entries = [];

View File

@@ -2,11 +2,11 @@
return [
'gemini' => [
'api_key' => ($_ENV['GEMINI_API_KEY'] === 'null' || !$_ENV['GEMINI_API_KEY']) ? null : $_ENV['GEMINI_API_KEY'],
'api_key' => (getenv('GEMINI_API_KEY') ?: ($_SERVER['GEMINI_API_KEY'] ?? ($_ENV['GEMINI_API_KEY'] ?? null))),
'model' => 'gemini-flash-lite-latest',
],
'jwt' => [
'secret' => $_ENV['JWT_SECRET'] ?? 'base64:3uFzGf9o8+D+U0mC4/3K1y4m81Qj7G6qTzS=',
'secret' => getenv('JWT_SECRET') ?: ($_SERVER['JWT_SECRET'] ?? ($_ENV['JWT_SECRET'] ?? 'base64:3uFzGf9o8+D+U0mC4/3K1y4m81Qj7G6qTzS=')),
'algorithm' => 'HS256',
'expires_in' => 86400 * 30, // 30 days
],