245 lines
7.8 KiB
PHP
245 lines
7.8 KiB
PHP
<?php
|
|
|
|
namespace App\Services\Crawler;
|
|
|
|
use Throwable;
|
|
|
|
class AiAnalyzer
|
|
{
|
|
private ?string $apiKey;
|
|
private string $model;
|
|
|
|
public function __construct()
|
|
{
|
|
$config = require __DIR__ . '/../../../config/ai.php';
|
|
$this->apiKey = $config['gemini']['api_key'] ?? null;
|
|
$this->model = $config['gemini']['model'] ?? 'gemini-1.5-flash-latest';
|
|
}
|
|
|
|
/**
|
|
* Analyze text using Google Gemini AI to classify and extract info.
|
|
* Returns: type, score, tags, is_opportunity, summary
|
|
*/
|
|
public function analyze(string $title, string $description): array
|
|
{
|
|
if (!$this->apiKey) {
|
|
return $this->fallbackAnalysis($title, $description);
|
|
}
|
|
|
|
$prompt = <<<PROMPT
|
|
You are ScoutIQ, an investor intelligence AI. Analyze the following startup/investment content and return a JSON object with:
|
|
- "type": one of ["grant", "competition", "demo_day", "event", "partnership", "investment", "news", "other"]
|
|
- "opportunity_type": one of ["vc_funding", "accelerator", "incubator", "grant", "competition", "demo_day", "event", "partnership", "other"]
|
|
- "score": integer 0-100 (relevance to startups seeking funding)
|
|
- "tags": array of relevant tags (max 5)
|
|
- "is_opportunity": boolean (true if it's a funding/investment opportunity)
|
|
- "summary": 1-2 sentence summary of what this is
|
|
- "organization_name": extracted organization name if any, or null
|
|
- "country": extracted country if any, or null
|
|
|
|
Title: {$title}
|
|
Description: {$description}
|
|
|
|
Respond ONLY with valid JSON, no markdown, no code fences.
|
|
PROMPT;
|
|
|
|
try {
|
|
$response = $this->callGemini($prompt);
|
|
$json = json_decode($response, true);
|
|
if (json_last_error() === JSON_ERROR_NONE && isset($json['type'])) {
|
|
return $json;
|
|
}
|
|
} catch (Throwable $e) {
|
|
// Fallback
|
|
}
|
|
|
|
return $this->fallbackAnalysis($title, $description);
|
|
}
|
|
|
|
/**
|
|
* Analyze content for organization/investor extraction.
|
|
*/
|
|
public function extractOrganization(string $text): array
|
|
{
|
|
if (!$this->apiKey) {
|
|
return [
|
|
'name' => null,
|
|
'type' => null,
|
|
'country' => null,
|
|
'website' => null,
|
|
'description' => substr($text, 0, 500),
|
|
];
|
|
}
|
|
|
|
$prompt = <<<PROMPT
|
|
Extract organization/investor information from this text. Return JSON:
|
|
- "name": organization name or null
|
|
- "type": one of ["vc", "angel", "accelerator", "incubator", "venture_studio", "partner", "other"] or null
|
|
- "country": country name or null
|
|
- "website": website URL or null
|
|
- "description": brief description max 200 chars
|
|
|
|
Text: {$text}
|
|
|
|
Respond ONLY with valid JSON.
|
|
PROMPT;
|
|
|
|
try {
|
|
$response = $this->callGemini($prompt);
|
|
$json = json_decode($response, true);
|
|
if (json_last_error() === JSON_ERROR_NONE) {
|
|
return $json;
|
|
}
|
|
} catch (Throwable $e) {
|
|
// Fallback
|
|
}
|
|
|
|
return [
|
|
'name' => null,
|
|
'type' => null,
|
|
'country' => null,
|
|
'website' => null,
|
|
'description' => substr($text, 0, 500),
|
|
];
|
|
}
|
|
|
|
/**
|
|
* Call Gemini API.
|
|
*/
|
|
private function callGemini(string $prompt): string
|
|
{
|
|
$url = "https://generativelanguage.googleapis.com/v1beta/models/{$this->model}:generateContent?key={$this->apiKey}";
|
|
|
|
$payload = json_encode([
|
|
'contents' => [
|
|
[
|
|
'parts' => [
|
|
['text' => $prompt]
|
|
]
|
|
]
|
|
],
|
|
'generationConfig' => [
|
|
'temperature' => 0.2,
|
|
'maxOutputTokens' => 500,
|
|
]
|
|
]);
|
|
|
|
$context = stream_context_create([
|
|
'http' => [
|
|
'method' => 'POST',
|
|
'header' => "Content-Type: application/json\r\n",
|
|
'content' => $payload,
|
|
'timeout' => 30,
|
|
],
|
|
'ssl' => [
|
|
'verify_peer' => false,
|
|
'verify_peer_name' => false,
|
|
],
|
|
]);
|
|
|
|
$response = @file_get_contents($url, false, $context);
|
|
if (!$response) {
|
|
return '{}';
|
|
}
|
|
|
|
$data = json_decode($response, true);
|
|
return $data['candidates'][0]['content']['parts'][0]['text'] ?? '{}';
|
|
}
|
|
|
|
/**
|
|
* Simple keyword-based fallback when AI is unavailable.
|
|
*/
|
|
private function fallbackAnalysis(string $title, string $description): array
|
|
{
|
|
$text = strtolower($title . ' ' . $description);
|
|
|
|
$type = 'news';
|
|
$opportunityType = 'other';
|
|
$score = 30;
|
|
$tags = [];
|
|
$isOpportunity = true; // Treat all entries as opportunities by default
|
|
|
|
// Keyword patterns
|
|
if (preg_match('/\b(grant|funding|award|prize)\b/i', $text)) {
|
|
$type = 'grant';
|
|
$opportunityType = 'grant';
|
|
$score = 75;
|
|
$isOpportunity = true;
|
|
$tags[] = 'grant';
|
|
}
|
|
if (preg_match('/\b(competition|contest|challenge|hackathon)\b/i', $text)) {
|
|
$type = 'competition';
|
|
$opportunityType = 'competition';
|
|
$score = 65;
|
|
$isOpportunity = true;
|
|
$tags[] = 'competition';
|
|
}
|
|
if (preg_match('/\b(demo day|pitch day|investor day)\b/i', $text)) {
|
|
$type = 'demo_day';
|
|
$opportunityType = 'demo_day';
|
|
$score = 60;
|
|
$isOpportunity = true;
|
|
$tags[] = 'demo_day';
|
|
}
|
|
if (preg_match('/\b(accelerator|incubator|venture studio)\b/i', $text)) {
|
|
$opportunityType = 'accelerator';
|
|
$score = 80;
|
|
$isOpportunity = true;
|
|
$tags[] = 'accelerator';
|
|
$type = 'investment';
|
|
}
|
|
if (preg_match('/\b(vc|venture capital|seed fund|series [a-z])\b/i', $text)) {
|
|
$opportunityType = 'vc_funding';
|
|
$score = 85;
|
|
$isOpportunity = true;
|
|
$tags[] = 'vc_funding';
|
|
$type = 'investment';
|
|
}
|
|
if (preg_match('/\b(partnership|collaboration|strategic alliance)\b/i', $text)) {
|
|
$type = 'partnership';
|
|
$opportunityType = 'partnership';
|
|
$score = 50;
|
|
$isOpportunity = true;
|
|
$tags[] = 'partnership';
|
|
}
|
|
if (preg_match('/\b(conference|summit|meetup|webinar|workshop)\b/i', $text)) {
|
|
$type = 'event';
|
|
$opportunityType = 'event';
|
|
$score = 40;
|
|
$tags[] = 'event';
|
|
}
|
|
|
|
// Industry tags
|
|
if (preg_match('/\b(ai|artificial intelligence|machine learning|deep learning)\b/i', $text)) {
|
|
$tags[] = 'ai';
|
|
}
|
|
if (preg_match('/\b(fintech|financial technology|blockchain|crypto)\b/i', $text)) {
|
|
$tags[] = 'fintech';
|
|
}
|
|
if (preg_match('/\b(saas|software|cloud)\b/i', $text)) {
|
|
$tags[] = 'saas';
|
|
}
|
|
if (preg_match('/\b(mobility|transportation|ev|electric vehicle|logistics)\b/i', $text)) {
|
|
$tags[] = 'mobility';
|
|
}
|
|
if (preg_match('/\b(healthtech|healthcare|biotech|medtech)\b/i', $text)) {
|
|
$tags[] = 'healthtech';
|
|
}
|
|
if (preg_match('/\b(climate|cleantech|sustainability|green energy|renewable)\b/i', $text)) {
|
|
$tags[] = 'cleantech';
|
|
}
|
|
|
|
$tags = array_unique($tags);
|
|
|
|
return [
|
|
'type' => $type,
|
|
'opportunity_type' => $opportunityType,
|
|
'score' => $score,
|
|
'tags' => $tags,
|
|
'is_opportunity' => $isOpportunity,
|
|
'summary' => substr($description, 0, 200),
|
|
'organization_name' => null,
|
|
'country' => null,
|
|
];
|
|
}
|
|
} |