Add complete ScoutIQ system: Crawler (RSS+AI), CRUD Controllers (Organizations, Contacts, Opportunities, Sources), dynamic Views, API routes, CLI collector
This commit is contained in:
105
app/Services/Crawler/RssParser.php
Normal file
105
app/Services/Crawler/RssParser.php
Normal file
@@ -0,0 +1,105 @@
|
||||
<?php
|
||||
|
||||
namespace App\Services\Crawler;
|
||||
|
||||
use App\Services\Database\Connection;
|
||||
use PDO;
|
||||
use Throwable;
|
||||
|
||||
class RssParser
|
||||
{
|
||||
private PDO $pdo;
|
||||
|
||||
public function __construct(Connection $connection)
|
||||
{
|
||||
$this->pdo = $connection->getPdo();
|
||||
}
|
||||
|
||||
/**
|
||||
* Parse an RSS feed URL and return entries.
|
||||
*/
|
||||
public function fetchEntries(string $url): array
|
||||
{
|
||||
$context = stream_context_create([
|
||||
'http' => [
|
||||
'timeout' => 15,
|
||||
'user_agent' => 'ScoutIQ/1.0 (Crawler)',
|
||||
],
|
||||
'ssl' => [
|
||||
'verify_peer' => false,
|
||||
'verify_peer_name' => false,
|
||||
],
|
||||
]);
|
||||
|
||||
$xml = @file_get_contents($url, false, $context);
|
||||
if (!$xml) {
|
||||
return [];
|
||||
}
|
||||
|
||||
$feed = @simplexml_load_string($xml);
|
||||
if (!$feed) {
|
||||
return [];
|
||||
}
|
||||
|
||||
$entries = [];
|
||||
$items = $feed->channel->item ?? $feed->entry ?? [];
|
||||
|
||||
foreach ($items as $item) {
|
||||
$title = (string)($item->title ?? '');
|
||||
$description = (string)($item->description ?? $item->summary ?? '');
|
||||
$link = (string)($item->link ?? $item->guid ?? '');
|
||||
$pubDate = (string)($item->pubDate ?? $item->updated ?? '');
|
||||
$categories = [];
|
||||
|
||||
if (isset($item->category)) {
|
||||
foreach ($item->category as $cat) {
|
||||
$categories[] = (string)$cat;
|
||||
}
|
||||
}
|
||||
|
||||
if (empty($title)) {
|
||||
continue;
|
||||
}
|
||||
|
||||
$entries[] = [
|
||||
'title' => $title,
|
||||
'description' => strip_tags($description),
|
||||
'url' => $link,
|
||||
'published_at' => $pubDate ? date('Y-m-d H:i:s', strtotime($pubDate)) : date('Y-m-d H:i:s'),
|
||||
'categories' => $categories,
|
||||
'source_raw' => $xml,
|
||||
];
|
||||
}
|
||||
|
||||
return $entries;
|
||||
}
|
||||
|
||||
/**
|
||||
* Check if entry URL already exists in opportunities.
|
||||
*/
|
||||
public function entryExists(string $url): bool
|
||||
{
|
||||
$stmt = $this->pdo->prepare("SELECT id FROM opportunities WHERE url = ? AND deleted_at IS NULL");
|
||||
$stmt->execute([$url]);
|
||||
return (bool)$stmt->fetch();
|
||||
}
|
||||
|
||||
/**
|
||||
* Check if organization already exists by domain or name.
|
||||
*/
|
||||
public function organizationExists(string $name, ?string $domain = null): ?int
|
||||
{
|
||||
if ($domain) {
|
||||
$stmt = $this->pdo->prepare("SELECT id FROM organizations WHERE domain = ? AND deleted_at IS NULL");
|
||||
$stmt->execute([$domain]);
|
||||
$id = $stmt->fetchColumn();
|
||||
if ($id) return (int)$id;
|
||||
}
|
||||
|
||||
// Fuzzy match by name
|
||||
$stmt = $this->pdo->prepare("SELECT id FROM organizations WHERE name LIKE ? AND deleted_at IS NULL LIMIT 1");
|
||||
$stmt->execute(['%' . $name . '%']);
|
||||
$id = $stmt->fetchColumn();
|
||||
return $id ? (int)$id : null;
|
||||
}
|
||||
}
|
||||
Reference in New Issue
Block a user