Deploy on 2026-06-05 17:03:37
This commit is contained in:
@@ -158,6 +158,25 @@ PROMPT;
|
||||
$score = 30;
|
||||
$tags = [];
|
||||
$isOpportunity = true; // Treat all entries as opportunities by default
|
||||
$orgName = null;
|
||||
$country = null;
|
||||
|
||||
// Smart Regex rules for extracting organization names from title
|
||||
if (preg_match('/^([A-Z0-9][A-Za-z0-9\s\-\.\&]{2,40})\s+(raises|launches|secures|gets|partners|funded|acquires|announces|closes|receives|seeks)\b/i', $title, $matches)) {
|
||||
$orgName = trim($matches[1]);
|
||||
} elseif (preg_match('/(backs|invests in|funds|acquires)\s+([A-Z0-9][A-Za-z0-9\s\-\.\&]{2,40})/i', $title, $matches)) {
|
||||
$orgName = trim($matches[2]);
|
||||
} elseif (preg_match('/(investment in|funding for|launch of)\s+([A-Z0-9][A-Za-z0-9\s\-\.\&]{2,40})/i', $title, $matches)) {
|
||||
$orgName = trim($matches[2]);
|
||||
}
|
||||
|
||||
if ($orgName) {
|
||||
$orgName = preg_replace('/\b(series|seed|funding|round|raised|million|billion|capital|partners|ventures|inc|ltd|corp|co|llc)\b.*$/i', '', $orgName);
|
||||
$orgName = trim($orgName, " \t\n\r\0\x0B,.-");
|
||||
if (strlen($orgName) < 2 || in_array(strtolower($orgName), ['startup', 'founder', 'investor', 'program', 'new', 'why', 'how', 'what', 'who', 'the'])) {
|
||||
$orgName = null;
|
||||
}
|
||||
}
|
||||
|
||||
// Keyword patterns
|
||||
if (preg_match('/\b(grant|funding|award|prize)\b/i', $text)) {
|
||||
@@ -238,8 +257,8 @@ PROMPT;
|
||||
'tags' => $tags,
|
||||
'is_opportunity' => $isOpportunity,
|
||||
'summary' => substr($description, 0, 200),
|
||||
'organization_name' => null,
|
||||
'country' => null,
|
||||
'organization_name' => $orgName,
|
||||
'country' => $country,
|
||||
];
|
||||
}
|
||||
}
|
||||
@@ -20,6 +20,27 @@ class RssParser
|
||||
*/
|
||||
public function fetchEntries(string $url): array
|
||||
{
|
||||
$xml = false;
|
||||
if (function_exists('curl_init')) {
|
||||
$ch = curl_init();
|
||||
curl_setopt($ch, CURLOPT_URL, $url);
|
||||
curl_setopt($ch, CURLOPT_RETURNTRANSFER, true);
|
||||
curl_setopt($ch, CURLOPT_FOLLOWLOCATION, true);
|
||||
curl_setopt($ch, CURLOPT_USERAGENT, 'ScoutIQ/1.0 (Crawler)');
|
||||
curl_setopt($ch, CURLOPT_SSL_VERIFYPEER, false);
|
||||
curl_setopt($ch, CURLOPT_TIMEOUT, 15);
|
||||
|
||||
$xml = curl_exec($ch);
|
||||
$httpCode = curl_getinfo($ch, CURLINFO_HTTP_CODE);
|
||||
curl_close($ch);
|
||||
|
||||
if ($httpCode !== 200) {
|
||||
throw new \Exception("HTTP Error {$httpCode}");
|
||||
}
|
||||
if ($xml === false) {
|
||||
throw new \Exception("Connection failed via cURL");
|
||||
}
|
||||
} else {
|
||||
$context = stream_context_create([
|
||||
'http' => [
|
||||
'timeout' => 15,
|
||||
@@ -30,15 +51,19 @@ class RssParser
|
||||
'verify_peer_name' => false,
|
||||
],
|
||||
]);
|
||||
|
||||
$xml = @file_get_contents($url, false, $context);
|
||||
if (!$xml) {
|
||||
return [];
|
||||
if ($xml === false) {
|
||||
$status = "Connection failed";
|
||||
if (isset($http_response_header) && isset($http_response_header[0])) {
|
||||
$status = $http_response_header[0];
|
||||
}
|
||||
throw new \Exception($status);
|
||||
}
|
||||
}
|
||||
|
||||
$feed = @simplexml_load_string($xml);
|
||||
if (!$feed) {
|
||||
return [];
|
||||
throw new \Exception("Invalid XML structure");
|
||||
}
|
||||
|
||||
$entries = [];
|
||||
|
||||
@@ -2,11 +2,11 @@
|
||||
|
||||
return [
|
||||
'gemini' => [
|
||||
'api_key' => ($_ENV['GEMINI_API_KEY'] === 'null' || !$_ENV['GEMINI_API_KEY']) ? null : $_ENV['GEMINI_API_KEY'],
|
||||
'api_key' => (getenv('GEMINI_API_KEY') ?: ($_SERVER['GEMINI_API_KEY'] ?? ($_ENV['GEMINI_API_KEY'] ?? null))),
|
||||
'model' => 'gemini-flash-lite-latest',
|
||||
],
|
||||
'jwt' => [
|
||||
'secret' => $_ENV['JWT_SECRET'] ?? 'base64:3uFzGf9o8+D+U0mC4/3K1y4m81Qj7G6qTzS=',
|
||||
'secret' => getenv('JWT_SECRET') ?: ($_SERVER['JWT_SECRET'] ?? ($_ENV['JWT_SECRET'] ?? 'base64:3uFzGf9o8+D+U0mC4/3K1y4m81Qj7G6qTzS=')),
|
||||
'algorithm' => 'HS256',
|
||||
'expires_in' => 86400 * 30, // 30 days
|
||||
],
|
||||
|
||||
Reference in New Issue
Block a user