apiKey = $config['gemini']['api_key'] ?? null; $this->model = $config['gemini']['model'] ?? 'gemini-flash-lite-latest'; } /** * Analyze text using Google Gemini AI to classify and extract info. * Returns: type, score, tags, is_opportunity, summary */ public function analyze(string $title, string $description): array { if (!$this->apiKey) { return $this->fallbackAnalysis($title, $description); } $prompt = <<callGemini($prompt); $json = json_decode($response, true); if (json_last_error() === JSON_ERROR_NONE && isset($json['type'])) { return $json; } } catch (Throwable $e) { // Fallback } return $this->fallbackAnalysis($title, $description); } /** * Analyze content for organization/investor extraction. */ public function extractOrganization(string $text): array { if (!$this->apiKey) { return [ 'name' => null, 'type' => null, 'country' => null, 'website' => null, 'description' => substr($text, 0, 500), ]; } $prompt = <<callGemini($prompt); $json = json_decode($response, true); if (json_last_error() === JSON_ERROR_NONE) { return $json; } } catch (Throwable $e) { // Fallback } return [ 'name' => null, 'type' => null, 'country' => null, 'website' => null, 'description' => substr($text, 0, 500), ]; } /** * Call Gemini API. */ private function callGemini(string $prompt): string { $url = "https://generativelanguage.googleapis.com/v1beta/models/{$this->model}:generateContent?key={$this->apiKey}"; $payload = json_encode([ 'contents' => [ [ 'parts' => [ ['text' => $prompt] ] ] ], 'generationConfig' => [ 'temperature' => 0.2, 'maxOutputTokens' => 500, ] ]); $context = stream_context_create([ 'http' => [ 'method' => 'POST', 'header' => "Content-Type: application/json\r\n", 'content' => $payload, 'timeout' => 30, ], 'ssl' => [ 'verify_peer' => false, 'verify_peer_name' => false, ], ]); $response = @file_get_contents($url, false, $context); if (!$response) { return '{}'; } $data = json_decode($response, true); return $data['candidates'][0]['content']['parts'][0]['text'] ?? '{}'; } /** * Simple keyword-based fallback when AI is unavailable. */ private function fallbackAnalysis(string $title, string $description): array { $text = strtolower($title . ' ' . $description); $type = 'news'; $opportunityType = 'other'; $score = 30; $tags = []; $isOpportunity = true; // Treat all entries as opportunities by default $orgName = null; $country = null; // Smart Regex rules for extracting organization names from title if (preg_match('/^([A-Z0-9][A-Za-z0-9\s\-\.\&]{2,40})\s+(raises|launches|secures|gets|partners|funded|acquires|announces|closes|receives|seeks)\b/i', $title, $matches)) { $orgName = trim($matches[1]); } elseif (preg_match('/(backs|invests in|funds|acquires)\s+([A-Z0-9][A-Za-z0-9\s\-\.\&]{2,40})/i', $title, $matches)) { $orgName = trim($matches[2]); } elseif (preg_match('/(investment in|funding for|launch of)\s+([A-Z0-9][A-Za-z0-9\s\-\.\&]{2,40})/i', $title, $matches)) { $orgName = trim($matches[2]); } if ($orgName) { $orgName = preg_replace('/\b(series|seed|funding|round|raised|million|billion|capital|partners|ventures|inc|ltd|corp|co|llc)\b.*$/i', '', $orgName); $orgName = trim($orgName, " \t\n\r\0\x0B,.-"); if (strlen($orgName) < 2 || in_array(strtolower($orgName), ['startup', 'founder', 'investor', 'program', 'new', 'why', 'how', 'what', 'who', 'the'])) { $orgName = null; } } // Keyword patterns if (preg_match('/\b(grant|funding|award|prize)\b/i', $text)) { $type = 'grant'; $opportunityType = 'grant'; $score = 75; $isOpportunity = true; $tags[] = 'grant'; } if (preg_match('/\b(competition|contest|challenge|hackathon)\b/i', $text)) { $type = 'competition'; $opportunityType = 'competition'; $score = 65; $isOpportunity = true; $tags[] = 'competition'; } if (preg_match('/\b(demo day|pitch day|investor day)\b/i', $text)) { $type = 'demo_day'; $opportunityType = 'demo_day'; $score = 60; $isOpportunity = true; $tags[] = 'demo_day'; } if (preg_match('/\b(accelerator|incubator|venture studio)\b/i', $text)) { $opportunityType = 'accelerator'; $score = 80; $isOpportunity = true; $tags[] = 'accelerator'; $type = 'investment'; } if (preg_match('/\b(vc|venture capital|seed fund|series [a-z])\b/i', $text)) { $opportunityType = 'vc_funding'; $score = 85; $isOpportunity = true; $tags[] = 'vc_funding'; $type = 'investment'; } if (preg_match('/\b(partnership|collaboration|strategic alliance)\b/i', $text)) { $type = 'partnership'; $opportunityType = 'partnership'; $score = 50; $isOpportunity = true; $tags[] = 'partnership'; } if (preg_match('/\b(conference|summit|meetup|webinar|workshop)\b/i', $text)) { $type = 'event'; $opportunityType = 'event'; $score = 40; $tags[] = 'event'; } // Industry tags if (preg_match('/\b(ai|artificial intelligence|machine learning|deep learning)\b/i', $text)) { $tags[] = 'ai'; } if (preg_match('/\b(fintech|financial technology|blockchain|crypto)\b/i', $text)) { $tags[] = 'fintech'; } if (preg_match('/\b(saas|software|cloud)\b/i', $text)) { $tags[] = 'saas'; } if (preg_match('/\b(mobility|transportation|ev|electric vehicle|logistics)\b/i', $text)) { $tags[] = 'mobility'; } if (preg_match('/\b(healthtech|healthcare|biotech|medtech)\b/i', $text)) { $tags[] = 'healthtech'; } if (preg_match('/\b(climate|cleantech|sustainability|green energy|renewable)\b/i', $text)) { $tags[] = 'cleantech'; } $tags = array_unique($tags); return [ 'type' => $type, 'opportunity_type' => $opportunityType, 'score' => $score, 'tags' => $tags, 'is_opportunity' => $isOpportunity, 'summary' => substr($description, 0, 200), 'organization_name' => $orgName, 'country' => $country, ]; } }