Add complete ScoutIQ system: Crawler (RSS+AI), CRUD Controllers (Organizations, Contacts, Opportunities, Sources), dynamic Views, API routes, CLI collector

This commit is contained in:
Hamza-Ayed
2026-06-05 02:23:32 +03:00
parent d2f323a563
commit bd7984f8e3
20 changed files with 2084 additions and 0 deletions

View File

@@ -0,0 +1,163 @@
<?php
namespace App\Controllers\Admin;
use App\Controllers\Controller;
use App\Core\Request;
use App\Core\Response;
use App\Services\Database\Connection;
use PDO;
use Throwable;
class ContactsController extends Controller
{
private PDO $pdo;
public function __construct(Connection $connection)
{
parent::__construct();
$this->pdo = $connection->getPdo();
}
public function index(Request $request, Response $response): string
{
$search = $request->get('search', '');
$page = max(1, (int)$request->get('page', 1));
$perPage = 20;
$offset = ($page - 1) * $perPage;
$where = ['c.deleted_at IS NULL'];
$params = [];
if ($search) {
$where[] = '(c.name LIKE ? OR c.email LIKE ? OR c.phone LIKE ?)';
$params[] = "%{$search}%";
$params[] = "%{$search}%";
$params[] = "%{$search}%";
}
$whereClause = implode(' AND ', $where);
$stmt = $this->pdo->prepare("SELECT COUNT(*) FROM contacts c WHERE {$whereClause}");
$stmt->execute($params);
$total = (int)$stmt->fetchColumn();
$stmt = $this->pdo->prepare(
"SELECT c.*, org.name as org_name,
(SELECT COUNT(*) FROM interactions WHERE contact_id = c.id) as interaction_count
FROM contacts c
LEFT JOIN organizations org ON org.id = c.organization_id
WHERE {$whereClause}
ORDER BY c.updated_at DESC
LIMIT ? OFFSET ?"
);
$stmt->execute(array_merge($params, [$perPage, $offset]));
$contacts = $stmt->fetchAll();
return $this->render('admin/contacts/index', [
'contacts' => $contacts,
'total' => $total,
'page' => $page,
'perPage' => $perPage,
'search' => $search,
], 'admin');
}
public function show(Request $request, Response $response, int $id): string
{
$stmt = $this->pdo->prepare(
"SELECT c.*, org.name as org_name, org.id as org_id
FROM contacts c
LEFT JOIN organizations org ON org.id = c.organization_id
WHERE c.id = ? AND c.deleted_at IS NULL"
);
$stmt->execute([$id]);
$contact = $stmt->fetch();
if (!$contact) { $response->redirect('/admin/contacts'); return ''; }
$stmt = $this->pdo->prepare("SELECT * FROM interactions WHERE contact_id = ? ORDER BY created_at DESC");
$stmt->execute([$id]);
$interactions = $stmt->fetchAll();
return $this->render('admin/contacts/show', [
'contact' => $contact,
'interactions' => $interactions,
], 'admin');
}
public function create(Request $request, Response $response): string
{
$orgId = $request->get('organization_id', '');
$orgs = $this->pdo->query("SELECT id, name FROM organizations WHERE deleted_at IS NULL ORDER BY name")->fetchAll();
return $this->render('admin/contacts/form', [
'contact' => null,
'organizations' => $orgs,
'selectedOrgId' => $orgId,
], 'admin');
}
public function edit(Request $request, Response $response, int $id): string
{
$stmt = $this->pdo->prepare("SELECT * FROM contacts WHERE id = ? AND deleted_at IS NULL");
$stmt->execute([$id]);
$contact = $stmt->fetch();
if (!$contact) { $response->redirect('/admin/contacts'); return ''; }
$orgs = $this->pdo->query("SELECT id, name FROM organizations WHERE deleted_at IS NULL ORDER BY name")->fetchAll();
return $this->render('admin/contacts/form', [
'contact' => $contact,
'organizations' => $orgs,
'selectedOrgId' => $contact['organization_id'],
], 'admin');
}
public function store(Request $request, Response $response): void
{
$id = $request->post('id', '');
$name = $request->post('name', '');
$email = $request->post('email', '');
$phone = $request->post('phone', '');
$position = $request->post('position', '');
$organizationId = $request->post('organization_id', '');
$notes = $request->post('notes', '');
try {
if ($id) {
$stmt = $this->pdo->prepare("UPDATE contacts SET name=?, email=?, phone=?, position=?, organization_id=?, notes=? WHERE id=?");
$stmt->execute([$name, $email ?: null, $phone ?: null, $position ?: null, $organizationId ?: null, $notes, $id]);
} else {
$stmt = $this->pdo->prepare("INSERT INTO contacts (name, email, phone, position, organization_id, notes) VALUES (?, ?, ?, ?, ?, ?)");
$stmt->execute([$name, $email ?: null, $phone ?: null, $position ?: null, $organizationId ?: null, $notes]);
$id = $this->pdo->lastInsertId();
}
$this->session->setFlash('success', 'Contact saved.');
$response->redirect('/admin/contacts/' . $id);
} catch (Throwable $e) {
$this->session->setFlash('error', 'Error: ' . $e->getMessage());
$response->redirect('/admin/contacts');
}
}
public function delete(Request $request, Response $response, int $id): void
{
$this->pdo->prepare("UPDATE contacts SET deleted_at = NOW() WHERE id = ?")->execute([$id]);
$this->session->setFlash('success', 'Contact deleted.');
$response->redirect('/admin/contacts');
}
public function addInteraction(Request $request, Response $response, int $contactId): void
{
$type = $request->post('type', 'note');
$notes = $request->post('notes', '');
try {
$stmt = $this->pdo->prepare("INSERT INTO interactions (contact_id, type, notes) VALUES (?, ?, ?)");
$stmt->execute([$contactId, $type, $notes]);
$this->session->setFlash('success', 'Interaction logged.');
} catch (Throwable $e) {
$this->session->setFlash('error', 'Error: ' . $e->getMessage());
}
$response->redirect('/admin/contacts/' . $contactId);
}
}

View File

@@ -0,0 +1,112 @@
<?php
namespace App\Controllers\Admin;
use App\Controllers\Controller;
use App\Core\Request;
use App\Core\Response;
use App\Services\Database\Connection;
use PDO;
use Throwable;
class OpportunitiesController extends Controller
{
private PDO $pdo;
public function __construct(Connection $connection)
{
parent::__construct();
$this->pdo = $connection->getPdo();
}
public function index(Request $request, Response $response): string
{
$type = $request->get('type', '');
$search = $request->get('search', '');
$status = $request->get('status', '');
$page = max(1, (int)$request->get('page', 1));
$perPage = 20;
$offset = ($page - 1) * $perPage;
$where = ['o.deleted_at IS NULL'];
$params = [];
if ($type) { $where[] = 'o.type = ?'; $params[] = $type; }
if ($status) { $where[] = 'o.status = ?'; $params[] = $status; }
if ($search) {
$where[] = '(o.title LIKE ? OR o.description LIKE ?)';
$params[] = "%{$search}%";
$params[] = "%{$search}%";
}
$whereClause = implode(' AND ', $where);
$stmt = $this->pdo->prepare("SELECT COUNT(*) FROM opportunities o WHERE {$whereClause}");
$stmt->execute($params);
$total = (int)$stmt->fetchColumn();
$stmt = $this->pdo->prepare(
"SELECT o.*, org.name as org_name,
GROUP_CONCAT(DISTINCT t.name) as tag_names
FROM opportunities o
LEFT JOIN organizations org ON org.id = o.organization_id
LEFT JOIN opportunity_tags ot ON ot.opportunity_id = o.id
LEFT JOIN tags t ON t.id = ot.tag_id
WHERE {$whereClause}
GROUP BY o.id
ORDER BY o.score DESC, o.created_at DESC
LIMIT ? OFFSET ?"
);
$stmt->execute(array_merge($params, [$perPage, $offset]));
$opportunities = $stmt->fetchAll();
// Get type counts for sidebar
$typeCounts = $this->pdo->query(
"SELECT type, COUNT(*) as count FROM opportunities WHERE deleted_at IS NULL GROUP BY type"
)->fetchAll(PDO::FETCH_KEY_PAIR);
return $this->render('admin/opportunities/index', [
'opportunities' => $opportunities,
'total' => $total,
'page' => $page,
'perPage' => $perPage,
'type' => $type,
'status' => $status,
'search' => $search,
'typeCounts' => $typeCounts,
'types' => ['grant', 'competition', 'demo_day', 'event', 'partnership', 'investment', 'other'],
'statuses' => ['active', 'closed', 'expired'],
], 'admin');
}
public function show(Request $request, Response $response, int $id): string
{
$stmt = $this->pdo->prepare(
"SELECT o.*, org.name as org_name, org.type as org_type, org.website_url as org_website,
GROUP_CONCAT(DISTINCT t.name) as tag_names
FROM opportunities o
LEFT JOIN organizations org ON org.id = o.organization_id
LEFT JOIN opportunity_tags ot ON ot.opportunity_id = o.id
LEFT JOIN tags t ON t.id = ot.tag_id
WHERE o.id = ? AND o.deleted_at IS NULL
GROUP BY o.id"
);
$stmt->execute([$id]);
$opportunity = $stmt->fetch();
if (!$opportunity) {
$response->redirect('/admin/opportunities');
return '';
}
// Get applications
$stmt = $this->pdo->prepare("SELECT * FROM applications WHERE opportunity_id = ? AND deleted_at IS NULL ORDER BY created_at DESC");
$stmt->execute([$id]);
$applications = $stmt->fetchAll();
return $this->render('admin/opportunities/show', [
'opportunity' => $opportunity,
'applications' => $applications,
], 'admin');
}
}

View File

@@ -0,0 +1,219 @@
<?php
namespace App\Controllers\Admin;
use App\Controllers\Controller;
use App\Core\Request;
use App\Core\Response;
use App\Services\Database\Connection;
use PDO;
use Throwable;
class OrganizationsController extends Controller
{
private PDO $pdo;
public function __construct(Connection $connection)
{
parent::__construct();
$this->pdo = $connection->getPdo();
}
/**
* List all organizations with filters.
*/
public function index(Request $request, Response $response): string
{
$type = $request->get('type', '');
$search = $request->get('search', '');
$page = max(1, (int)$request->get('page', 1));
$perPage = 20;
$offset = ($page - 1) * $perPage;
$where = ['o.deleted_at IS NULL'];
$params = [];
if ($type) {
$where[] = 'o.type = ?';
$params[] = $type;
}
if ($search) {
$where[] = '(o.name LIKE ? OR o.domain LIKE ? OR o.description LIKE ?)';
$params[] = "%{$search}%";
$params[] = "%{$search}%";
$params[] = "%{$search}%";
}
$whereClause = implode(' AND ', $where);
// Count total
$stmt = $this->pdo->prepare("SELECT COUNT(*) FROM organizations o WHERE {$whereClause}");
$stmt->execute($params);
$total = (int)$stmt->fetchColumn();
// Fetch page
$stmt = $this->pdo->prepare(
"SELECT o.*,
(SELECT COUNT(*) FROM opportunities WHERE organization_id = o.id AND deleted_at IS NULL) as opportunities_count
FROM organizations o
WHERE {$whereClause}
ORDER BY o.updated_at DESC
LIMIT ? OFFSET ?"
);
$stmt->execute(array_merge($params, [$perPage, $offset]));
$organizations = $stmt->fetchAll();
return $this->render('admin/organizations/index', [
'organizations' => $organizations,
'total' => $total,
'page' => $page,
'perPage' => $perPage,
'type' => $type,
'search' => $search,
'types' => ['vc', 'angel', 'accelerator', 'incubator', 'venture_studio', 'partner'],
'statuses' => ['New', 'Researching', 'Contacted', 'Follow Up', 'Meeting Scheduled', 'Interested', 'Rejected', 'Invested'],
], 'admin');
}
/**
* Show single organization.
*/
public function show(Request $request, Response $response, int $id): string
{
$stmt = $this->pdo->prepare("SELECT * FROM organizations WHERE id = ? AND deleted_at IS NULL");
$stmt->execute([$id]);
$org = $stmt->fetch();
if (!$org) {
$response->redirect('/admin/organizations');
return '';
}
// Get opportunities for this org
$stmt = $this->pdo->prepare(
"SELECT o.*, GROUP_CONCAT(t.name) as tag_names
FROM opportunities o
LEFT JOIN opportunity_tags ot ON ot.opportunity_id = o.id
LEFT JOIN tags t ON t.id = ot.tag_id
WHERE o.organization_id = ? AND o.deleted_at IS NULL
GROUP BY o.id
ORDER BY o.score DESC"
);
$stmt->execute([$id]);
$opportunities = $stmt->fetchAll();
// Get contacts for this org
$stmt = $this->pdo->prepare(
"SELECT c.*,
(SELECT COUNT(*) FROM interactions WHERE contact_id = c.id) as interaction_count
FROM contacts c
WHERE c.organization_id = ? AND c.deleted_at IS NULL
ORDER BY c.created_at DESC"
);
$stmt->execute([$id]);
$contacts = $stmt->fetchAll();
// Get activity logs
$stmt = $this->pdo->prepare(
"SELECT * FROM activity_logs WHERE description LIKE ? ORDER BY created_at DESC LIMIT 20"
);
$stmt->execute(['%' . $org['name'] . '%']);
$activities = $stmt->fetchAll();
return $this->render('admin/organizations/show', [
'org' => $org,
'opportunities' => $opportunities,
'contacts' => $contacts,
'activities' => $activities,
], 'admin');
}
/**
* Show create/edit form.
*/
public function create(Request $request, Response $response): string
{
return $this->render('admin/organizations/form', [
'org' => null,
'types' => ['vc', 'angel', 'accelerator', 'incubator', 'venture_studio', 'partner'],
'statuses' => ['New', 'Researching', 'Contacted', 'Follow Up', 'Meeting Scheduled', 'Interested', 'Rejected', 'Invested'],
], 'admin');
}
/**
* Show edit form.
*/
public function edit(Request $request, Response $response, int $id): string
{
$stmt = $this->pdo->prepare("SELECT * FROM organizations WHERE id = ? AND deleted_at IS NULL");
$stmt->execute([$id]);
$org = $stmt->fetch();
if (!$org) {
$response->redirect('/admin/organizations');
return '';
}
return $this->render('admin/organizations/form', [
'org' => $org,
'types' => ['vc', 'angel', 'accelerator', 'incubator', 'venture_studio', 'partner'],
'statuses' => ['New', 'Researching', 'Contacted', 'Follow Up', 'Meeting Scheduled', 'Interested', 'Rejected', 'Invested'],
], 'admin');
}
/**
* Save organization (create or update).
*/
public function store(Request $request, Response $response): void
{
$id = $request->post('id', '');
$name = $request->post('name', '');
$domain = $request->post('domain', '');
$type = $request->post('type', 'partner');
$country = $request->post('country', '');
$city = $request->post('city', '');
$websiteUrl = $request->post('website_url', '');
$description = $request->post('description', '');
$crmStatus = $request->post('crm_status', 'New');
$fundingStage = $request->post('funding_stage', '');
try {
if ($id) {
$stmt = $this->pdo->prepare(
"UPDATE organizations SET name=?, domain=?, type=?, country=?, city=?, website_url=?,
description=?, crm_status=?, funding_stage=? WHERE id=?"
);
$stmt->execute([$name, $domain ?: null, $type, $country ?: null, $city ?: null, $websiteUrl ?: null, $description, $crmStatus, $fundingStage ?: null, $id]);
$this->session->setFlash('success', 'Organization updated successfully.');
} else {
$stmt = $this->pdo->prepare(
"INSERT INTO organizations (name, domain, type, country, city, website_url, description, crm_status, funding_stage)
VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?)"
);
$stmt->execute([$name, $domain ?: null, $type, $country ?: null, $city ?: null, $websiteUrl ?: null, $description, $crmStatus, $fundingStage ?: null]);
$id = $this->pdo->lastInsertId();
$this->session->setFlash('success', 'Organization created successfully.');
}
$response->redirect('/admin/organizations/' . $id);
} catch (Throwable $e) {
$this->session->setFlash('error', 'Error saving organization: ' . $e->getMessage());
$response->redirect('/admin/organizations' . ($id ? '/' . $id : '/create'));
}
}
/**
* Delete organization (soft delete).
*/
public function delete(Request $request, Response $response, int $id): void
{
try {
$stmt = $this->pdo->prepare("UPDATE organizations SET deleted_at = NOW() WHERE id = ?");
$stmt->execute([$id]);
$this->session->setFlash('success', 'Organization deleted.');
} catch (Throwable $e) {
$this->session->setFlash('error', 'Error deleting organization.');
}
$response->redirect('/admin/organizations');
}
}

View File

@@ -0,0 +1,93 @@
<?php
namespace App\Controllers\Admin;
use App\Controllers\Controller;
use App\Core\Request;
use App\Core\Response;
use App\Services\Database\Connection;
use App\Services\Crawler\Collector;
use PDO;
use Throwable;
class SourcesController extends Controller
{
private PDO $pdo;
private Collector $collector;
public function __construct(Connection $connection, Collector $collector)
{
parent::__construct();
$this->pdo = $connection->getPdo();
$this->collector = $collector;
}
public function index(Request $request, Response $response): string
{
$sources = $this->collector->getActiveSources();
// Also get inactive ones
$stmt = $this->pdo->query("SELECT * FROM sources ORDER BY status, name");
$allSources = $stmt->fetchAll();
return $this->render('admin/sources/index', ['sources' => $allSources], 'admin');
}
public function create(Request $request, Response $response): string
{
return $this->render('admin/sources/form', ['source' => null], 'admin');
}
public function edit(Request $request, Response $response, int $id): string
{
$stmt = $this->pdo->prepare("SELECT * FROM sources WHERE id = ?");
$stmt->execute([$id]);
$source = $stmt->fetch();
if (!$source) { $response->redirect('/admin/sources'); return ''; }
return $this->render('admin/sources/form', ['source' => $source], 'admin');
}
public function store(Request $request, Response $response): void
{
$id = $request->post('id', '');
$name = $request->post('name', '');
$url = $request->post('url', '');
$type = $request->post('type', 'rss');
$status = $request->post('status', 'active');
try {
if ($id) {
$stmt = $this->pdo->prepare("UPDATE sources SET name=?, url=?, type=?, status=? WHERE id=?");
$stmt->execute([$name, $url, $type, $status, $id]);
} else {
$stmt = $this->pdo->prepare("INSERT INTO sources (name, url, type, status) VALUES (?, ?, ?, ?)");
$stmt->execute([$name, $url, $type, $status]);
}
$this->session->setFlash('success', 'Source saved.');
} catch (Throwable $e) {
$this->session->setFlash('error', 'Error: ' . $e->getMessage());
}
$response->redirect('/admin/sources');
}
public function delete(Request $request, Response $response, int $id): void
{
$this->pdo->prepare("DELETE FROM sources WHERE id = ?")->execute([$id]);
$this->session->setFlash('success', 'Source deleted.');
$response->redirect('/admin/sources');
}
public function run(Request $request, Response $response, int $id): void
{
$stmt = $this->pdo->prepare("SELECT * FROM sources WHERE id = ?");
$stmt->execute([$id]);
$source = $stmt->fetch();
if (!$source) { $this->session->setFlash('error', 'Source not found.'); $response->redirect('/admin/sources'); return; }
try {
$result = $this->collector->collectSource($source);
$this->session->setFlash('success', "Collected {$result['entries_found']} entries, {$result['opportunities']} new opportunities.");
} catch (Throwable $e) {
$this->session->setFlash('error', 'Collection error: ' . $e->getMessage());
}
$response->redirect('/admin/sources');
}
}

View File

@@ -0,0 +1,245 @@
<?php
namespace App\Services\Crawler;
use Throwable;
class AiAnalyzer
{
private ?string $apiKey;
private string $model;
public function __construct()
{
$config = require __DIR__ . '/../../../config/ai.php';
$this->apiKey = $config['gemini']['api_key'] ?? null;
$this->model = $config['gemini']['model'] ?? 'gemini-1.5-flash-latest';
}
/**
* Analyze text using Google Gemini AI to classify and extract info.
* Returns: type, score, tags, is_opportunity, summary
*/
public function analyze(string $title, string $description): array
{
if (!$this->apiKey) {
return $this->fallbackAnalysis($title, $description);
}
$prompt = <<<PROMPT
You are ScoutIQ, an investor intelligence AI. Analyze the following startup/investment content and return a JSON object with:
- "type": one of ["grant", "competition", "demo_day", "event", "partnership", "investment", "news", "other"]
- "opportunity_type": one of ["vc_funding", "accelerator", "incubator", "grant", "competition", "demo_day", "event", "partnership", "other"]
- "score": integer 0-100 (relevance to startups seeking funding)
- "tags": array of relevant tags (max 5)
- "is_opportunity": boolean (true if it's a funding/investment opportunity)
- "summary": 1-2 sentence summary of what this is
- "organization_name": extracted organization name if any, or null
- "country": extracted country if any, or null
Title: {$title}
Description: {$description}
Respond ONLY with valid JSON, no markdown, no code fences.
PROMPT;
try {
$response = $this->callGemini($prompt);
$json = json_decode($response, true);
if (json_last_error() === JSON_ERROR_NONE && isset($json['type'])) {
return $json;
}
} catch (Throwable $e) {
// Fallback
}
return $this->fallbackAnalysis($title, $description);
}
/**
* Analyze content for organization/investor extraction.
*/
public function extractOrganization(string $text): array
{
if (!$this->apiKey) {
return [
'name' => null,
'type' => null,
'country' => null,
'website' => null,
'description' => substr($text, 0, 500),
];
}
$prompt = <<<PROMPT
Extract organization/investor information from this text. Return JSON:
- "name": organization name or null
- "type": one of ["vc", "angel", "accelerator", "incubator", "venture_studio", "partner", "other"] or null
- "country": country name or null
- "website": website URL or null
- "description": brief description max 200 chars
Text: {$text}
Respond ONLY with valid JSON.
PROMPT;
try {
$response = $this->callGemini($prompt);
$json = json_decode($response, true);
if (json_last_error() === JSON_ERROR_NONE) {
return $json;
}
} catch (Throwable $e) {
// Fallback
}
return [
'name' => null,
'type' => null,
'country' => null,
'website' => null,
'description' => substr($text, 0, 500),
];
}
/**
* Call Gemini API.
*/
private function callGemini(string $prompt): string
{
$url = "https://generativelanguage.googleapis.com/v1beta/models/{$this->model}:generateContent?key={$this->apiKey}";
$payload = json_encode([
'contents' => [
[
'parts' => [
['text' => $prompt]
]
]
],
'generationConfig' => [
'temperature' => 0.2,
'maxOutputTokens' => 500,
]
]);
$context = stream_context_create([
'http' => [
'method' => 'POST',
'header' => "Content-Type: application/json\r\n",
'content' => $payload,
'timeout' => 30,
],
'ssl' => [
'verify_peer' => false,
'verify_peer_name' => false,
],
]);
$response = @file_get_contents($url, false, $context);
if (!$response) {
return '{}';
}
$data = json_decode($response, true);
return $data['candidates'][0]['content']['parts'][0]['text'] ?? '{}';
}
/**
* Simple keyword-based fallback when AI is unavailable.
*/
private function fallbackAnalysis(string $title, string $description): array
{
$text = strtolower($title . ' ' . $description);
$type = 'news';
$opportunityType = 'other';
$score = 10;
$tags = [];
$isOpportunity = false;
// Keyword patterns
if (preg_match('/\b(grant|funding|award|prize)\b/i', $text)) {
$type = 'grant';
$opportunityType = 'grant';
$score = 75;
$isOpportunity = true;
$tags[] = 'grant';
}
if (preg_match('/\b(competition|contest|challenge|hackathon)\b/i', $text)) {
$type = 'competition';
$opportunityType = 'competition';
$score = 65;
$isOpportunity = true;
$tags[] = 'competition';
}
if (preg_match('/\b(demo day|pitch day|investor day)\b/i', $text)) {
$type = 'demo_day';
$opportunityType = 'demo_day';
$score = 60;
$isOpportunity = true;
$tags[] = 'demo_day';
}
if (preg_match('/\b(accelerator|incubator|venture studio)\b/i', $text)) {
$opportunityType = 'accelerator';
$score = 80;
$isOpportunity = true;
$tags[] = 'accelerator';
$type = 'investment';
}
if (preg_match('/\b(vc|venture capital|seed fund|series [a-z])\b/i', $text)) {
$opportunityType = 'vc_funding';
$score = 85;
$isOpportunity = true;
$tags[] = 'vc_funding';
$type = 'investment';
}
if (preg_match('/\b(partnership|collaboration|strategic alliance)\b/i', $text)) {
$type = 'partnership';
$opportunityType = 'partnership';
$score = 50;
$isOpportunity = true;
$tags[] = 'partnership';
}
if (preg_match('/\b(conference|summit|meetup|webinar|workshop)\b/i', $text)) {
$type = 'event';
$opportunityType = 'event';
$score = 40;
$tags[] = 'event';
}
// Industry tags
if (preg_match('/\b(ai|artificial intelligence|machine learning|deep learning)\b/i', $text)) {
$tags[] = 'ai';
}
if (preg_match('/\b(fintech|financial technology|blockchain|crypto)\b/i', $text)) {
$tags[] = 'fintech';
}
if (preg_match('/\b(saas|software|cloud)\b/i', $text)) {
$tags[] = 'saas';
}
if (preg_match('/\b(mobility|transportation|ev|electric vehicle|logistics)\b/i', $text)) {
$tags[] = 'mobility';
}
if (preg_match('/\b(healthtech|healthcare|biotech|medtech)\b/i', $text)) {
$tags[] = 'healthtech';
}
if (preg_match('/\b(climate|cleantech|sustainability|green energy|renewable)\b/i', $text)) {
$tags[] = 'cleantech';
}
$tags = array_unique($tags);
return [
'type' => $type,
'opportunity_type' => $opportunityType,
'score' => $score,
'tags' => $tags,
'is_opportunity' => $isOpportunity,
'summary' => substr($description, 0, 200),
'organization_name' => null,
'country' => null,
];
}
}

View File

@@ -0,0 +1,249 @@
<?php
namespace App\Services\Crawler;
use App\Services\Database\Connection;
use App\Services\Database\ActivityLogger;
use PDO;
use Throwable;
class Collector
{
private PDO $pdo;
private RssParser $rssParser;
private AiAnalyzer $aiAnalyzer;
private ActivityLogger $logger;
public function __construct(
Connection $connection,
RssParser $rssParser,
AiAnalyzer $aiAnalyzer,
ActivityLogger $logger
) {
$this->pdo = $connection->getPdo();
$this->rssParser = $rssParser;
$this->aiAnalyzer = $aiAnalyzer;
$this->logger = $logger;
}
/**
* Collect from all active sources.
*/
public function collectAll(): array
{
$results = [
'total_sources' => 0,
'processed' => 0,
'errors' => 0,
'new_opportunities' => 0,
'new_organizations' => 0,
'details' => [],
];
$sources = $this->getActiveSources();
foreach ($sources as $source) {
$results['total_sources']++;
try {
$result = $this->collectSource($source);
$results['processed']++;
$results['new_opportunities'] += $result['opportunities'];
$results['new_organizations'] += $result['organizations'];
$results['details'][] = [
'source' => $source['name'],
'type' => $source['type'],
'status' => 'success',
'entries_found' => $result['entries_found'],
'new_opportunities' => $result['opportunities'],
'new_organizations' => $result['organizations'],
];
} catch (Throwable $e) {
$results['errors']++;
$results['details'][] = [
'source' => $source['name'],
'type' => $source['type'],
'status' => 'error',
'error' => $e->getMessage(),
];
}
}
$this->logger->log(null, 'collector_run', 'Collector completed: ' . json_encode([
'total_sources' => $results['total_sources'],
'processed' => $results['processed'],
'errors' => $results['errors'],
'new_opportunities' => $results['new_opportunities'],
'new_organizations' => $results['new_organizations'],
]));
return $results;
}
/**
* Collect from a single source.
*/
public function collectSource(array $source): array
{
$result = [
'entries_found' => 0,
'opportunities' => 0,
'organizations' => 0,
];
if ($source['type'] === 'rss') {
$entries = $this->rssParser->fetchEntries($source['url']);
$result['entries_found'] = count($entries);
foreach ($entries as $entry) {
$this->processEntry($entry, $source, $result);
}
}
return $result;
}
/**
* Process a single entry: analyze, save opportunity, save organization.
*/
private function processEntry(array $entry, array $source, array &$result): void
{
// Skip if already exists
if ($this->rssParser->entryExists($entry['url'])) {
return;
}
// AI Analysis
$analysis = $this->aiAnalyzer->analyze($entry['title'], $entry['description']);
// Extract organization if any
$orgId = null;
if (!empty($analysis['organization_name'])) {
$orgId = $this->rssParser->organizationExists($analysis['organization_name']);
}
// If no org found and AI suggests one, try to extract more details
if (!$orgId && !empty($analysis['organization_name'])) {
$orgData = $this->aiAnalyzer->extractOrganization($entry['title'] . ' ' . $entry['description']);
if (!empty($orgData['name'])) {
$orgId = $this->createOrganization($orgData);
if ($orgId) {
$result['organizations']++;
}
}
}
// Create opportunity
$this->createOpportunity($entry, $analysis, $orgId, $source);
$result['opportunities']++;
}
/**
* Create an organization record.
*/
private function createOrganization(array $data): ?int
{
try {
$stmt = $this->pdo->prepare(
"INSERT INTO organizations (name, description, type, country, website_url, crm_status)
VALUES (?, ?, ?, ?, ?, 'New')"
);
$stmt->execute([
$data['name'],
$data['description'] ?? '',
$data['type'] ?? 'partner',
$data['country'] ?? null,
$data['website'] ?? null,
]);
return (int)$this->pdo->lastInsertId();
} catch (Throwable $e) {
return null;
}
}
/**
* Create an opportunity record.
*/
private function createOpportunity(array $entry, array $analysis, ?int $orgId, array $source): void
{
try {
$score = min(100, max(0, $analysis['score'] ?? 10));
$stmt = $this->pdo->prepare(
"INSERT INTO opportunities (title, description, type, organization_id, url, status, score, raw_data)
VALUES (?, ?, ?, ?, ?, 'active', ?, ?)"
);
$stmt->execute([
$entry['title'],
$analysis['summary'] ?? $entry['description'],
$analysis['opportunity_type'] ?? $analysis['type'] ?? 'other',
$orgId,
$entry['url'],
$score,
json_encode([
'source_id' => $source['id'] ?? null,
'source_name' => $source['name'] ?? '',
'published_at' => $entry['published_at'],
'categories' => $entry['categories'] ?? [],
'analysis' => $analysis,
]),
]);
$opportunityId = (int)$this->pdo->lastInsertId();
// Save tags
if (!empty($analysis['tags'])) {
foreach ($analysis['tags'] as $tagName) {
$tagId = $this->getOrCreateTag($tagName);
if ($tagId) {
$stmt = $this->pdo->prepare(
"INSERT IGNORE INTO opportunity_tags (opportunity_id, tag_id) VALUES (?, ?)"
);
$stmt->execute([$opportunityId, $tagId]);
}
}
}
} catch (Throwable $e) {
// Log but don't fail
}
}
/**
* Get or create a tag.
*/
private function getOrCreateTag(string $name): ?int
{
$slug = strtolower(preg_replace('/[^a-z0-9]+/', '-', $name));
$slug = trim($slug, '-');
$stmt = $this->pdo->prepare("SELECT id FROM tags WHERE slug = ?");
$stmt->execute([$slug]);
$id = $stmt->fetchColumn();
if ($id) {
return (int)$id;
}
try {
$stmt = $this->pdo->prepare("INSERT INTO tags (name, slug) VALUES (?, ?)");
$stmt->execute([$name, $slug]);
return (int)$this->pdo->lastInsertId();
} catch (Throwable $e) {
return null;
}
}
/**
* Get all active sources.
*/
public function getActiveSources(): array
{
$stmt = $this->pdo->query(
"SELECT s.*, GROUP_CONCAT(sc.category) as categories
FROM sources s
LEFT JOIN source_categories sc ON sc.source_id = s.id
WHERE s.status = 'active'
GROUP BY s.id"
);
return $stmt->fetchAll() ?: [];
}
}

View File

@@ -0,0 +1,105 @@
<?php
namespace App\Services\Crawler;
use App\Services\Database\Connection;
use PDO;
use Throwable;
class RssParser
{
private PDO $pdo;
public function __construct(Connection $connection)
{
$this->pdo = $connection->getPdo();
}
/**
* Parse an RSS feed URL and return entries.
*/
public function fetchEntries(string $url): array
{
$context = stream_context_create([
'http' => [
'timeout' => 15,
'user_agent' => 'ScoutIQ/1.0 (Crawler)',
],
'ssl' => [
'verify_peer' => false,
'verify_peer_name' => false,
],
]);
$xml = @file_get_contents($url, false, $context);
if (!$xml) {
return [];
}
$feed = @simplexml_load_string($xml);
if (!$feed) {
return [];
}
$entries = [];
$items = $feed->channel->item ?? $feed->entry ?? [];
foreach ($items as $item) {
$title = (string)($item->title ?? '');
$description = (string)($item->description ?? $item->summary ?? '');
$link = (string)($item->link ?? $item->guid ?? '');
$pubDate = (string)($item->pubDate ?? $item->updated ?? '');
$categories = [];
if (isset($item->category)) {
foreach ($item->category as $cat) {
$categories[] = (string)$cat;
}
}
if (empty($title)) {
continue;
}
$entries[] = [
'title' => $title,
'description' => strip_tags($description),
'url' => $link,
'published_at' => $pubDate ? date('Y-m-d H:i:s', strtotime($pubDate)) : date('Y-m-d H:i:s'),
'categories' => $categories,
'source_raw' => $xml,
];
}
return $entries;
}
/**
* Check if entry URL already exists in opportunities.
*/
public function entryExists(string $url): bool
{
$stmt = $this->pdo->prepare("SELECT id FROM opportunities WHERE url = ? AND deleted_at IS NULL");
$stmt->execute([$url]);
return (bool)$stmt->fetch();
}
/**
* Check if organization already exists by domain or name.
*/
public function organizationExists(string $name, ?string $domain = null): ?int
{
if ($domain) {
$stmt = $this->pdo->prepare("SELECT id FROM organizations WHERE domain = ? AND deleted_at IS NULL");
$stmt->execute([$domain]);
$id = $stmt->fetchColumn();
if ($id) return (int)$id;
}
// Fuzzy match by name
$stmt = $this->pdo->prepare("SELECT id FROM organizations WHERE name LIKE ? AND deleted_at IS NULL LIMIT 1");
$stmt->execute(['%' . $name . '%']);
$id = $stmt->fetchColumn();
return $id ? (int)$id : null;
}
}