Deploy on 2026-06-05 17:03:37
This commit is contained in:
@@ -158,6 +158,25 @@ PROMPT;
|
|||||||
$score = 30;
|
$score = 30;
|
||||||
$tags = [];
|
$tags = [];
|
||||||
$isOpportunity = true; // Treat all entries as opportunities by default
|
$isOpportunity = true; // Treat all entries as opportunities by default
|
||||||
|
$orgName = null;
|
||||||
|
$country = null;
|
||||||
|
|
||||||
|
// Smart Regex rules for extracting organization names from title
|
||||||
|
if (preg_match('/^([A-Z0-9][A-Za-z0-9\s\-\.\&]{2,40})\s+(raises|launches|secures|gets|partners|funded|acquires|announces|closes|receives|seeks)\b/i', $title, $matches)) {
|
||||||
|
$orgName = trim($matches[1]);
|
||||||
|
} elseif (preg_match('/(backs|invests in|funds|acquires)\s+([A-Z0-9][A-Za-z0-9\s\-\.\&]{2,40})/i', $title, $matches)) {
|
||||||
|
$orgName = trim($matches[2]);
|
||||||
|
} elseif (preg_match('/(investment in|funding for|launch of)\s+([A-Z0-9][A-Za-z0-9\s\-\.\&]{2,40})/i', $title, $matches)) {
|
||||||
|
$orgName = trim($matches[2]);
|
||||||
|
}
|
||||||
|
|
||||||
|
if ($orgName) {
|
||||||
|
$orgName = preg_replace('/\b(series|seed|funding|round|raised|million|billion|capital|partners|ventures|inc|ltd|corp|co|llc)\b.*$/i', '', $orgName);
|
||||||
|
$orgName = trim($orgName, " \t\n\r\0\x0B,.-");
|
||||||
|
if (strlen($orgName) < 2 || in_array(strtolower($orgName), ['startup', 'founder', 'investor', 'program', 'new', 'why', 'how', 'what', 'who', 'the'])) {
|
||||||
|
$orgName = null;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
// Keyword patterns
|
// Keyword patterns
|
||||||
if (preg_match('/\b(grant|funding|award|prize)\b/i', $text)) {
|
if (preg_match('/\b(grant|funding|award|prize)\b/i', $text)) {
|
||||||
@@ -238,8 +257,8 @@ PROMPT;
|
|||||||
'tags' => $tags,
|
'tags' => $tags,
|
||||||
'is_opportunity' => $isOpportunity,
|
'is_opportunity' => $isOpportunity,
|
||||||
'summary' => substr($description, 0, 200),
|
'summary' => substr($description, 0, 200),
|
||||||
'organization_name' => null,
|
'organization_name' => $orgName,
|
||||||
'country' => null,
|
'country' => $country,
|
||||||
];
|
];
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
@@ -20,25 +20,50 @@ class RssParser
|
|||||||
*/
|
*/
|
||||||
public function fetchEntries(string $url): array
|
public function fetchEntries(string $url): array
|
||||||
{
|
{
|
||||||
$context = stream_context_create([
|
$xml = false;
|
||||||
'http' => [
|
if (function_exists('curl_init')) {
|
||||||
'timeout' => 15,
|
$ch = curl_init();
|
||||||
'user_agent' => 'ScoutIQ/1.0 (Crawler)',
|
curl_setopt($ch, CURLOPT_URL, $url);
|
||||||
],
|
curl_setopt($ch, CURLOPT_RETURNTRANSFER, true);
|
||||||
'ssl' => [
|
curl_setopt($ch, CURLOPT_FOLLOWLOCATION, true);
|
||||||
'verify_peer' => false,
|
curl_setopt($ch, CURLOPT_USERAGENT, 'ScoutIQ/1.0 (Crawler)');
|
||||||
'verify_peer_name' => false,
|
curl_setopt($ch, CURLOPT_SSL_VERIFYPEER, false);
|
||||||
],
|
curl_setopt($ch, CURLOPT_TIMEOUT, 15);
|
||||||
]);
|
|
||||||
|
$xml = curl_exec($ch);
|
||||||
|
$httpCode = curl_getinfo($ch, CURLINFO_HTTP_CODE);
|
||||||
|
curl_close($ch);
|
||||||
|
|
||||||
$xml = @file_get_contents($url, false, $context);
|
if ($httpCode !== 200) {
|
||||||
if (!$xml) {
|
throw new \Exception("HTTP Error {$httpCode}");
|
||||||
return [];
|
}
|
||||||
|
if ($xml === false) {
|
||||||
|
throw new \Exception("Connection failed via cURL");
|
||||||
|
}
|
||||||
|
} else {
|
||||||
|
$context = stream_context_create([
|
||||||
|
'http' => [
|
||||||
|
'timeout' => 15,
|
||||||
|
'user_agent' => 'ScoutIQ/1.0 (Crawler)',
|
||||||
|
],
|
||||||
|
'ssl' => [
|
||||||
|
'verify_peer' => false,
|
||||||
|
'verify_peer_name' => false,
|
||||||
|
],
|
||||||
|
]);
|
||||||
|
$xml = @file_get_contents($url, false, $context);
|
||||||
|
if ($xml === false) {
|
||||||
|
$status = "Connection failed";
|
||||||
|
if (isset($http_response_header) && isset($http_response_header[0])) {
|
||||||
|
$status = $http_response_header[0];
|
||||||
|
}
|
||||||
|
throw new \Exception($status);
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
$feed = @simplexml_load_string($xml);
|
$feed = @simplexml_load_string($xml);
|
||||||
if (!$feed) {
|
if (!$feed) {
|
||||||
return [];
|
throw new \Exception("Invalid XML structure");
|
||||||
}
|
}
|
||||||
|
|
||||||
$entries = [];
|
$entries = [];
|
||||||
|
|||||||
@@ -2,11 +2,11 @@
|
|||||||
|
|
||||||
return [
|
return [
|
||||||
'gemini' => [
|
'gemini' => [
|
||||||
'api_key' => ($_ENV['GEMINI_API_KEY'] === 'null' || !$_ENV['GEMINI_API_KEY']) ? null : $_ENV['GEMINI_API_KEY'],
|
'api_key' => (getenv('GEMINI_API_KEY') ?: ($_SERVER['GEMINI_API_KEY'] ?? ($_ENV['GEMINI_API_KEY'] ?? null))),
|
||||||
'model' => 'gemini-flash-lite-latest',
|
'model' => 'gemini-flash-lite-latest',
|
||||||
],
|
],
|
||||||
'jwt' => [
|
'jwt' => [
|
||||||
'secret' => $_ENV['JWT_SECRET'] ?? 'base64:3uFzGf9o8+D+U0mC4/3K1y4m81Qj7G6qTzS=',
|
'secret' => getenv('JWT_SECRET') ?: ($_SERVER['JWT_SECRET'] ?? ($_ENV['JWT_SECRET'] ?? 'base64:3uFzGf9o8+D+U0mC4/3K1y4m81Qj7G6qTzS=')),
|
||||||
'algorithm' => 'HS256',
|
'algorithm' => 'HS256',
|
||||||
'expires_in' => 86400 * 30, // 30 days
|
'expires_in' => 86400 * 30, // 30 days
|
||||||
],
|
],
|
||||||
|
|||||||
Reference in New Issue
Block a user