nabeh/backend/app/Services/GeminiService.php

<?php

namespace App\Services;

class GeminiService
{
    public const DEFAULT_MODEL = 'gemini-3.1-flash-lite';

    /**
     * Get a random Gemini API key from a comma-separated list of keys
     */
    public static function getGeminiApiKey(?string $configuredKey = null): string
    {
        $keySource = !empty($configuredKey) ? $configuredKey : getenv('GEMINI_API_KEY');
        if (empty($keySource)) {
            return '';
        }
        $keys = array_filter(array_map('trim', explode(',', $keySource)));
        if (empty($keys)) {
            return '';
        }
        return $keys[array_rand($keys)];
    }

    /**
     * Get a random ElevenLabs API key from a comma-separated list of keys
     */
    public static function getElevenLabsApiKey(?string $configuredKey = null): ?string
    {
        $keySource = !empty($configuredKey) ? $configuredKey : getenv('ELEVENLABS_API_KEY');
        if (empty($keySource)) {
            return null;
        }
        $keys = array_filter(array_map('trim', explode(',', $keySource)));
        if (empty($keys)) {
            return null;
        }
        return $keys[array_rand($keys)];
    }

    /**
     * Get a random ElevenLabs Voice ID from a comma-separated list of Voice IDs
     */
    public static function getElevenLabsVoiceId(?string $configuredVoiceId = null): string
    {
        $voiceIdSource = !empty($configuredVoiceId) ? $configuredVoiceId : (getenv('ELEVENLABS_VOICE_ID') ?: 'EXAVITQu4vr4xnSDxMaL');
        $voiceIds = array_filter(array_map('trim', explode(',', $voiceIdSource)));
        if (empty($voiceIds)) {
            return 'EXAVITQu4vr4xnSDxMaL';
        }
        return $voiceIds[array_rand($voiceIds)];
    }

    /**
     * Call Gemini API to generate a response
     */
    public static function generateResponse(string $apiKey, string $systemPrompt, string $userMessage): ?string
    {
        $url = 'https://generativelanguage.googleapis.com/v1beta/models/' . self::DEFAULT_MODEL . ':generateContent?key=' . $apiKey;

        $payload = json_encode([
            'contents' => [
                [
                    'role' => 'user',
                    'parts' => [
                        ['text' => $userMessage]
                    ]
                ]
            ],
            'systemInstruction' => [
                'parts' => [
                    ['text' => $systemPrompt]
                ]
            ]
        ]);

        $ch = curl_init($url);
        curl_setopt($ch, CURLOPT_RETURNTRANSFER, true);
        curl_setopt($ch, CURLOPT_POST, true);
        curl_setopt($ch, CURLOPT_POSTFIELDS, $payload);
        curl_setopt($ch, CURLOPT_HTTPHEADER, [
            'Content-Type: application/json'
        ]);
        curl_setopt($ch, CURLOPT_TIMEOUT, 15);

        $response = curl_exec($ch);
        $httpCode = curl_getinfo($ch, CURLINFO_HTTP_CODE);
        curl_close($ch);

        if ($httpCode !== 200) {
            error_log("[Gemini API Error] HTTP " . $httpCode . " | Response: " . $response);
            return null;
        }

        $data = json_decode($response, true);
        return $data['candidates'][0]['content']['parts'][0]['text'] ?? null;
    }

    /**
     * Call Gemini API with audio inline data to generate a chatbot prompt
     */
    public static function generatePromptFromAudio(string $apiKey, string $audioBase64, string $mimeType): ?string
    {
        $url = 'https://generativelanguage.googleapis.com/v1beta/models/' . self::DEFAULT_MODEL . ':generateContent?key=' . $apiKey;

        $payload = json_encode([
            'contents' => [
                [
                    'role' => 'user',
                    'parts' => [
                        [
                            'inlineData' => [
                                'mimeType' => $mimeType,
                                'data' => $audioBase64
                            ]
                        ],
                        [
                            'text' => "أنت خبير محترف في هندسة التعليمات (Prompt Engineering). استمع جيداً للتسجيل الصوتي المرفق الذي يصف متجراً أو مشروعاً تجارياً ومتطلبات خدمة العملاء، واستخرج التفاصيل المهمة (اسم المتجر، الخدمات، اللهجة المطلوبة، ساعات العمل، سياسات الشحن والاستبدال، والأسئلة الشائعة). ثم قم بصياغة تعليمة نظام (System Instruction Prompt) مفصلة ومنظمة وعالية الجودة باللغة العربية لروبوت خدمة العملاء المعتمد على الذكاء الاصطناعي. يجب أن ترشد التعليمة الروبوت بكيفية التصرف والرد بنبرة مناسبة. أرجع فقط تعليمة النظام الناتجة مباشرة بدون أي نصوص تمهيدية أو تنسيقات markdown أو علامات اقتباس برمجية."
                        ]
                    ]
                ]
            ]
        ]);

        $ch = curl_init($url);
        curl_setopt($ch, CURLOPT_RETURNTRANSFER, true);
        curl_setopt($ch, CURLOPT_POST, true);
        curl_setopt($ch, CURLOPT_POSTFIELDS, $payload);
        curl_setopt($ch, CURLOPT_HTTPHEADER, [
            'Content-Type: application/json'
        ]);
        curl_setopt($ch, CURLOPT_TIMEOUT, 35); // 35 seconds timeout for audio analysis

        $response = curl_exec($ch);
        $httpCode = curl_getinfo($ch, CURLINFO_HTTP_CODE);
        curl_close($ch);

        if ($httpCode !== 200) {
            error_log("[Gemini Audio API Error] HTTP " . $httpCode . " | Response: " . $response);
            return null;
        }

        $data = json_decode($response, true);
        return $data['candidates'][0]['content']['parts'][0]['text'] ?? null;
    }

    /**
     * Call Gemini API with audio inline data and system instruction to generate a response text
     */
    public static function generateResponseFromAudio(string $apiKey, string $systemPrompt, string $audioBase64, string $mimeType): ?string
    {
        $url = 'https://generativelanguage.googleapis.com/v1beta/models/' . self::DEFAULT_MODEL . ':generateContent?key=' . $apiKey;

        // Clean mimeType if it contains codec info
        if (strpos($mimeType, ';') !== false) {
            $mimeType = trim(explode(';', $mimeType)[0]);
        }

        $payload = json_encode([
            'contents' => [
                [
                    'role' => 'user',
                    'parts' => [
                        [
                            'inlineData' => [
                                'mimeType' => $mimeType,
                                'data' => $audioBase64
                            ]
                        ],
                        [
                            'text' => "استمع إلى التسجيل الصوتي المرفق وأجب عليه مباشرة باللغة المناسبة بناءً على الإرشادات المحددة."
                        ]
                    ]
                ]
            ],
            'systemInstruction' => [
                'parts' => [
                    ['text' => $systemPrompt]
                ]
            ]
        ]);

        $ch = curl_init($url);
        curl_setopt($ch, CURLOPT_RETURNTRANSFER, true);
        curl_setopt($ch, CURLOPT_POST, true);
        curl_setopt($ch, CURLOPT_POSTFIELDS, $payload);
        curl_setopt($ch, CURLOPT_HTTPHEADER, [
            'Content-Type: application/json'
        ]);
        curl_setopt($ch, CURLOPT_TIMEOUT, 35); // 35 seconds timeout for audio analysis

        $response = curl_exec($ch);
        $httpCode = curl_getinfo($ch, CURLINFO_HTTP_CODE);
        curl_close($ch);

        if ($httpCode !== 200) {
            error_log("[Gemini Audio Response Error] HTTP " . $httpCode . " | Response: " . $response);
            return null;
        }

        $data = json_decode($response, true);
        return $data['candidates'][0]['content']['parts'][0]['text'] ?? null;
    }

    /**
     * Transcribe incoming audio voice note to text using gemini-3.1-flash-lite
     */
    public static function transcribeAudio(string $apiKey, string $audioBase64, string $mimeType): ?string
    {
        $url = 'https://generativelanguage.googleapis.com/v1beta/models/' . self::DEFAULT_MODEL . ':generateContent?key=' . $apiKey;

        // Clean mimeType if it contains codec info
        if (strpos($mimeType, ';') !== false) {
            $mimeType = trim(explode(';', $mimeType)[0]);
        }

        $payload = json_encode([
            'contents' => [
                [
                    'role' => 'user',
                    'parts' => [
                        [
                            'inlineData' => [
                                'mimeType' => $mimeType,
                                'data' => $audioBase64
                            ]
                        ],
                        [
                            'text' => "Transcribe the following audio message to Arabic text. Output only the transcription, no translation, no commentary, no markdown, and no code blocks."
                        ]
                    ]
                ]
            ]
        ]);

        $ch = curl_init($url);
        curl_setopt($ch, CURLOPT_RETURNTRANSFER, true);
        curl_setopt($ch, CURLOPT_POST, true);
        curl_setopt($ch, CURLOPT_POSTFIELDS, $payload);
        curl_setopt($ch, CURLOPT_HTTPHEADER, [
            'Content-Type: application/json'
        ]);
        curl_setopt($ch, CURLOPT_TIMEOUT, 30);

        $response = curl_exec($ch);
        $httpCode = curl_getinfo($ch, CURLINFO_HTTP_CODE);
        curl_close($ch);

        if ($httpCode !== 200) {
            error_log("[Gemini Audio Transcription Error] HTTP " . $httpCode . " | Response: " . $response);
            return null;
        }

        $data = json_decode($response, true);
        return trim($data['candidates'][0]['content']['parts'][0]['text'] ?? '');
    }

    /**
     * Call Gemini API with image inline data and system instruction to generate a response text
     */
    public static function generateResponseFromImage(string $apiKey, string $systemPrompt, string $imageBase64, string $mimeType): ?string
    {
        $url = 'https://generativelanguage.googleapis.com/v1beta/models/' . self::DEFAULT_MODEL . ':generateContent?key=' . $apiKey;

        // Clean mimeType if it contains codec info
        if (strpos($mimeType, ';') !== false) {
            $mimeType = trim(explode(';', $mimeType)[0]);
        }

        $payload = json_encode([
            'contents' => [
                [
                    'role' => 'user',
                    'parts' => [
                        [
                            'inlineData' => [
                                'mimeType' => $mimeType,
                                'data' => $imageBase64
                            ]
                        ],
                        [
                            'text' => "حلل الصورة المرفقة وأجب عليها باللغة المناسبة بناءً على الإرشادات المحددة."
                        ]
                    ]
                ]
            ],
            'systemInstruction' => [
                'parts' => [
                    ['text' => $systemPrompt]
                ]
            ]
        ]);

        $ch = curl_init($url);
        curl_setopt($ch, CURLOPT_RETURNTRANSFER, true);
        curl_setopt($ch, CURLOPT_POST, true);
        curl_setopt($ch, CURLOPT_POSTFIELDS, $payload);
        curl_setopt($ch, CURLOPT_HTTPHEADER, [
            'Content-Type: application/json'
        ]);
        curl_setopt($ch, CURLOPT_TIMEOUT, 35); // 35 seconds timeout for image analysis

        $response = curl_exec($ch);
        $httpCode = curl_getinfo($ch, CURLINFO_HTTP_CODE);
        curl_close($ch);

        if ($httpCode !== 200) {
            error_log("[Gemini Image Response Error] HTTP " . $httpCode . " | Response: " . $response);
            return null;
        }

        $data = json_decode($response, true);
        return $data['candidates'][0]['content']['parts'][0]['text'] ?? null;
    }

    /**
     * Call Gemini API with image inline data and custom prompt to extract structured OCR data
     */
    public static function generateOcrFromImage(string $apiKey, string $prompt, string $imageBase64, string $mimeType): ?string
    {
        $url = 'https://generativelanguage.googleapis.com/v1beta/models/' . self::DEFAULT_MODEL . ':generateContent?key=' . $apiKey;

        // Clean mimeType if it contains codec info
        if (strpos($mimeType, ';') !== false) {
            $mimeType = trim(explode(';', $mimeType)[0]);
        }

        $payload = json_encode([
            'contents' => [
                [
                    'role' => 'user',
                    'parts' => [
                        [
                            'text' => $prompt
                        ],
                        [
                            'inlineData' => [
                                'mimeType' => $mimeType,
                                'data' => $imageBase64
                            ]
                        ]
                    ]
                ]
            ]
        ]);

        $ch = curl_init($url);
        curl_setopt($ch, CURLOPT_RETURNTRANSFER, true);
        curl_setopt($ch, CURLOPT_POST, true);
        curl_setopt($ch, CURLOPT_POSTFIELDS, $payload);
        curl_setopt($ch, CURLOPT_HTTPHEADER, [
            'Content-Type: application/json'
        ]);
        curl_setopt($ch, CURLOPT_TIMEOUT, 35); // 35 seconds timeout for image analysis

        $response = curl_exec($ch);
        $httpCode = curl_getinfo($ch, CURLINFO_HTTP_CODE);
        curl_close($ch);

        if ($httpCode !== 200) {
            error_log("[Gemini OCR Image Response Error] HTTP " . $httpCode . " | Response: " . $response);
            return null;
        }

        $data = json_decode($response, true);
        $textRaw = $data['candidates'][0]['content']['parts'][0]['text'] ?? null;
        if ($textRaw) {
            // Clean markdown block if present
            $textRaw = trim(preg_replace('/```json|```/', '', $textRaw));
        }
        return $textRaw;
    }

    /**
     * Call ElevenLabs API to generate a native audio response from text
     */
    public static function generateAudioResponseWithElevenLabs(string $elApiKey, string $text, string $voiceId): ?array
    {
        $url = 'https://api.elevenlabs.io/v1/text-to-speech/' . $voiceId;

        $payload = json_encode([
            'text' => $text
        ]);

        $ch = curl_init($url);
        curl_setopt($ch, CURLOPT_RETURNTRANSFER, true);
        curl_setopt($ch, CURLOPT_POST, true);
        curl_setopt($ch, CURLOPT_POSTFIELDS, $payload);
        curl_setopt($ch, CURLOPT_HTTPHEADER, [
            'Content-Type: application/json',
            'xi-api-key: ' . $elApiKey
        ]);
        curl_setopt($ch, CURLOPT_TIMEOUT, 30);

        $response = curl_exec($ch);
        $httpCode = curl_getinfo($ch, CURLINFO_HTTP_CODE);
        curl_close($ch);

        if ($httpCode === 200) {
            return [
                'audio' => base64_encode($response),
                'mimeType' => 'audio/mpeg'
            ];
        } else {
            error_log("[ElevenLabs API Error] HTTP " . $httpCode . " | Response: " . $response);
            return null;
        }
    }

    /**
     * Call Gemini API or ElevenLabs to generate a native audio (speech) response from text
     */
    public static function generateAudioResponse(
        string $apiKey,
        string $systemPrompt,
        string $userMessage,
        string $voiceName = 'Puck',
        ?string $elApiKey = null,
        ?string $elVoiceId = null
    ): ?array {
        // Use ElevenLabs if the API Key is provided
        if (!empty($elApiKey)) {
            $voiceId = !empty($elVoiceId) ? $elVoiceId : 'pNInz6obpgDQGcFmaJgB'; // Default to Adam
            $audioData = self::generateAudioResponseWithElevenLabs($elApiKey, $userMessage, $voiceId);
            if ($audioData) {
                return $audioData;
            }
            error_log("[TTS Service] ElevenLabs failed, falling back to Gemini TTS.");
        }

        // Gemini Fallback Logic:
        $models = [
            'gemini-3.1-flash-tts-preview',
            'gemini-2.5-flash-preview-tts'
        ];

        foreach ($models as $model) {
            $url = 'https://generativelanguage.googleapis.com/v1beta/models/' . $model . ':generateContent?key=' . $apiKey;

            $parts = [];
            if (!empty($systemPrompt)) {
                $parts[] = ['text' => "System instruction: " . $systemPrompt];
            }
            $parts[] = ['text' => $userMessage];

            $payload = json_encode([
                'contents' => [
                    [
                        'role' => 'user',
                        'parts' => $parts
                    ]
                ],
                'generationConfig' => [
                    'responseModalities' => ['AUDIO'],
                    'speechConfig' => [
                        'voiceConfig' => [
                            'prebuiltVoiceConfig' => [
                                'voiceName' => $voiceName
                            ]
                        ]
                    ]
                ]
            ]);

            $ch = curl_init($url);
            curl_setopt($ch, CURLOPT_RETURNTRANSFER, true);
            curl_setopt($ch, CURLOPT_POST, true);
            curl_setopt($ch, CURLOPT_POSTFIELDS, $payload);
            curl_setopt($ch, CURLOPT_HTTPHEADER, [
                'Content-Type: application/json'
            ]);
            curl_setopt($ch, CURLOPT_TIMEOUT, 30);

            $response = curl_exec($ch);
            $httpCode = curl_getinfo($ch, CURLINFO_HTTP_CODE);
            curl_close($ch);

            if ($httpCode === 200) {
                $data = json_decode($response, true);
                $part = $data['candidates'][0]['content']['parts'][0] ?? null;
                if ($part && isset($part['inlineData'])) {
                    return [
                        'audio' => $part['inlineData']['data'],
                        'mimeType' => $part['inlineData']['mimeType'] ?? 'audio/mp4'
                    ];
                }
            } else {
                error_log("[Gemini Audio API Error] Model " . $model . " failed with HTTP " . $httpCode . " | Response: " . $response);
            }
        }

        return null;
    }

    /**
     * Call Gemini API with audio inline data to generate a native audio response
     */
    public static function generateAudioResponseFromAudio(
        string $apiKey,
        string $systemPrompt,
        string $audioBase64,
        string $mimeType,
        string $voiceName = 'Puck',
        ?string $elApiKey = null,
        ?string $elVoiceId = null
    ): ?array {
        // Step 1: Use gemini-flash-lite-latest (which supports audio input) to understand the audio message and generate a text reply
        $replyText = self::generateResponseFromAudio($apiKey, $systemPrompt, $audioBase64, $mimeType);
        if (empty($replyText)) {
            error_log("[Gemini Audio-to-Audio Error] Could not generate text response from audio.");
            return null;
        }

        // Step 2: Use ElevenLabs or Gemini TTS to convert the text response into a native audio voice note
        return self::generateAudioResponse($apiKey, $systemPrompt, $replyText, $voiceName, $elApiKey, $elVoiceId);
    }
}