nabeh/backend/app/Services/GeminiService.php

<?php

namespace App\Services;

class GeminiService
{
    /**
     * Call Gemini API to generate a response
     */
    public static function generateResponse(string $apiKey, string $systemPrompt, string $userMessage): ?string
    {
        $url = 'https://generativelanguage.googleapis.com/v1beta/models/gemini-flash-lite-latest:generateContent?key=' . $apiKey;

        $payload = json_encode([
            'contents' => [
                [
                    'role' => 'user',
                    'parts' => [
                        ['text' => $userMessage]
                    ]
                ]
            ],
            'systemInstruction' => [
                'parts' => [
                    ['text' => $systemPrompt]
                ]
            ]
        ]);

        $ch = curl_init($url);
        curl_setopt($ch, CURLOPT_RETURNTRANSFER, true);
        curl_setopt($ch, CURLOPT_POST, true);
        curl_setopt($ch, CURLOPT_POSTFIELDS, $payload);
        curl_setopt($ch, CURLOPT_HTTPHEADER, [
            'Content-Type: application/json'
        ]);
        curl_setopt($ch, CURLOPT_TIMEOUT, 15);

        $response = curl_exec($ch);
        $httpCode = curl_getinfo($ch, CURLINFO_HTTP_CODE);
        curl_close($ch);

        if ($httpCode !== 200) {
            error_log("[Gemini API Error] HTTP " . $httpCode . " | Response: " . $response);
            return null;
        }

        $data = json_decode($response, true);
        return $data['candidates'][0]['content']['parts'][0]['text'] ?? null;
    }

    /**
     * Call Gemini API with audio inline data to generate a chatbot prompt
     */
    public static function generatePromptFromAudio(string $apiKey, string $audioBase64, string $mimeType): ?string
    {
        $url = 'https://generativelanguage.googleapis.com/v1beta/models/gemini-flash-lite-latest:generateContent?key=' . $apiKey;

        $payload = json_encode([
            'contents' => [
                [
                    'role' => 'user',
                    'parts' => [
                        [
                            'inlineData' => [
                                'mimeType' => $mimeType,
                                'data' => $audioBase64
                            ]
                        ],
                        [
                            'text' => "أنت خبير محترف في هندسة التعليمات (Prompt Engineering). استمع جيداً للتسجيل الصوتي المرفق الذي يصف متجراً أو مشروعاً تجارياً ومتطلبات خدمة العملاء، واستخرج التفاصيل المهمة (اسم المتجر، الخدمات، اللهجة المطلوبة، ساعات العمل، سياسات الشحن والاستبدال، والأسئلة الشائعة). ثم قم بصياغة تعليمة نظام (System Instruction Prompt) مفصلة ومنظمة وعالية الجودة باللغة العربية لروبوت خدمة العملاء المعتمد على الذكاء الاصطناعي. يجب أن ترشد التعليمة الروبوت بكيفية التصرف والرد بنبرة مناسبة. أرجع فقط تعليمة النظام الناتجة مباشرة بدون أي نصوص تمهيدية أو تنسيقات markdown أو علامات اقتباس برمجية."
                        ]
                    ]
                ]
            ]
        ]);

        $ch = curl_init($url);
        curl_setopt($ch, CURLOPT_RETURNTRANSFER, true);
        curl_setopt($ch, CURLOPT_POST, true);
        curl_setopt($ch, CURLOPT_POSTFIELDS, $payload);
        curl_setopt($ch, CURLOPT_HTTPHEADER, [
            'Content-Type: application/json'
        ]);
        curl_setopt($ch, CURLOPT_TIMEOUT, 35); // 35 seconds timeout for audio analysis

        $response = curl_exec($ch);
        $httpCode = curl_getinfo($ch, CURLINFO_HTTP_CODE);
        curl_close($ch);

        if ($httpCode !== 200) {
            error_log("[Gemini Audio API Error] HTTP " . $httpCode . " | Response: " . $response);
            return null;
        }

        $data = json_decode($response, true);
        return $data['candidates'][0]['content']['parts'][0]['text'] ?? null;
    }

    /**
     * Call Gemini API with audio inline data and system instruction to generate a response text
     */
    public static function generateResponseFromAudio(string $apiKey, string $systemPrompt, string $audioBase64, string $mimeType): ?string
    {
        $url = 'https://generativelanguage.googleapis.com/v1beta/models/gemini-flash-lite-latest:generateContent?key=' . $apiKey;

        // Clean mimeType if it contains codec info
        if (strpos($mimeType, ';') !== false) {
            $mimeType = trim(explode(';', $mimeType)[0]);
        }

        $payload = json_encode([
            'contents' => [
                [
                    'role' => 'user',
                    'parts' => [
                        [
                            'inlineData' => [
                                'mimeType' => $mimeType,
                                'data' => $audioBase64
                            ]
                        ],
                        [
                            'text' => "استمع إلى التسجيل الصوتي المرفق وأجب عليه مباشرة باللغة المناسبة بناءً على الإرشادات المحددة."
                        ]
                    ]
                ]
            ],
            'systemInstruction' => [
                'parts' => [
                    ['text' => $systemPrompt]
                ]
            ]
        ]);

        $ch = curl_init($url);
        curl_setopt($ch, CURLOPT_RETURNTRANSFER, true);
        curl_setopt($ch, CURLOPT_POST, true);
        curl_setopt($ch, CURLOPT_POSTFIELDS, $payload);
        curl_setopt($ch, CURLOPT_HTTPHEADER, [
            'Content-Type: application/json'
        ]);
        curl_setopt($ch, CURLOPT_TIMEOUT, 35); // 35 seconds timeout for audio analysis

        $response = curl_exec($ch);
        $httpCode = curl_getinfo($ch, CURLINFO_HTTP_CODE);
        curl_close($ch);

        if ($httpCode !== 200) {
            error_log("[Gemini Audio Response Error] HTTP " . $httpCode . " | Response: " . $response);
            return null;
        }

        $data = json_decode($response, true);
        return $data['candidates'][0]['content']['parts'][0]['text'] ?? null;
    }

    /**
     * Call Gemini API with image inline data and system instruction to generate a response text
     */
    public static function generateResponseFromImage(string $apiKey, string $systemPrompt, string $imageBase64, string $mimeType): ?string
    {
        $url = 'https://generativelanguage.googleapis.com/v1beta/models/gemini-flash-lite-latest:generateContent?key=' . $apiKey;

        // Clean mimeType if it contains codec info
        if (strpos($mimeType, ';') !== false) {
            $mimeType = trim(explode(';', $mimeType)[0]);
        }

        $payload = json_encode([
            'contents' => [
                [
                    'role' => 'user',
                    'parts' => [
                        [
                            'inlineData' => [
                                'mimeType' => $mimeType,
                                'data' => $imageBase64
                            ]
                        ],
                        [
                            'text' => "حلل الصورة المرفقة وأجب عليها باللغة المناسبة بناءً على الإرشادات المحددة."
                        ]
                    ]
                ]
            ],
            'systemInstruction' => [
                'parts' => [
                    ['text' => $systemPrompt]
                ]
            ]
        ]);

        $ch = curl_init($url);
        curl_setopt($ch, CURLOPT_RETURNTRANSFER, true);
        curl_setopt($ch, CURLOPT_POST, true);
        curl_setopt($ch, CURLOPT_POSTFIELDS, $payload);
        curl_setopt($ch, CURLOPT_HTTPHEADER, [
            'Content-Type: application/json'
        ]);
        curl_setopt($ch, CURLOPT_TIMEOUT, 35); // 35 seconds timeout for image analysis

        $response = curl_exec($ch);
        $httpCode = curl_getinfo($ch, CURLINFO_HTTP_CODE);
        curl_close($ch);

        if ($httpCode !== 200) {
            error_log("[Gemini Image Response Error] HTTP " . $httpCode . " | Response: " . $response);
            return null;
        }

        $data = json_decode($response, true);
        return $data['candidates'][0]['content']['parts'][0]['text'] ?? null;
    }

    /**
     * Call ElevenLabs API to generate a native audio response from text
     */
    public static function generateAudioResponseWithElevenLabs(string $elApiKey, string $text, string $voiceId): ?array
    {
        $url = 'https://api.elevenlabs.io/v1/text-to-speech/' . $voiceId;

        $payload = json_encode([
            'text' => $text
        ]);

        $ch = curl_init($url);
        curl_setopt($ch, CURLOPT_RETURNTRANSFER, true);
        curl_setopt($ch, CURLOPT_POST, true);
        curl_setopt($ch, CURLOPT_POSTFIELDS, $payload);
        curl_setopt($ch, CURLOPT_HTTPHEADER, [
            'Content-Type: application/json',
            'xi-api-key: ' . $elApiKey
        ]);
        curl_setopt($ch, CURLOPT_TIMEOUT, 30);

        $response = curl_exec($ch);
        $httpCode = curl_getinfo($ch, CURLINFO_HTTP_CODE);
        curl_close($ch);

        if ($httpCode === 200) {
            return [
                'audio' => base64_encode($response),
                'mimeType' => 'audio/mpeg'
            ];
        } else {
            error_log("[ElevenLabs API Error] HTTP " . $httpCode . " | Response: " . $response);
            return null;
        }
    }

    /**
     * Call Gemini API or ElevenLabs to generate a native audio (speech) response from text
     */
    public static function generateAudioResponse(
        string $apiKey,
        string $systemPrompt,
        string $userMessage,
        string $voiceName = 'Puck',
        ?string $elApiKey = null,
        ?string $elVoiceId = null
    ): ?array {
        // Use ElevenLabs if the API Key is provided
        if (!empty($elApiKey)) {
            $voiceId = !empty($elVoiceId) ? $elVoiceId : 'pNInz6obpgDQGcFmaJgB'; // Default to Adam
            $audioData = self::generateAudioResponseWithElevenLabs($elApiKey, $userMessage, $voiceId);
            if ($audioData) {
                return $audioData;
            }
            error_log("[TTS Service] ElevenLabs failed, falling back to Gemini TTS.");
        }

        // Gemini Fallback Logic:
        $models = [
            'gemini-3.1-flash-tts-preview',
            'gemini-2.5-flash-preview-tts'
        ];

        foreach ($models as $model) {
            $url = 'https://generativelanguage.googleapis.com/v1beta/models/' . $model . ':generateContent?key=' . $apiKey;

            $parts = [];
            if (!empty($systemPrompt)) {
                $parts[] = ['text' => "System instruction: " . $systemPrompt];
            }
            $parts[] = ['text' => $userMessage];

            $payload = json_encode([
                'contents' => [
                    [
                        'role' => 'user',
                        'parts' => $parts
                    ]
                ],
                'generationConfig' => [
                    'responseModalities' => ['AUDIO'],
                    'speechConfig' => [
                        'voiceConfig' => [
                            'prebuiltVoiceConfig' => [
                                'voiceName' => $voiceName
                            ]
                        ]
                    ]
                ]
            ]);

            $ch = curl_init($url);
            curl_setopt($ch, CURLOPT_RETURNTRANSFER, true);
            curl_setopt($ch, CURLOPT_POST, true);
            curl_setopt($ch, CURLOPT_POSTFIELDS, $payload);
            curl_setopt($ch, CURLOPT_HTTPHEADER, [
                'Content-Type: application/json'
            ]);
            curl_setopt($ch, CURLOPT_TIMEOUT, 30);

            $response = curl_exec($ch);
            $httpCode = curl_getinfo($ch, CURLINFO_HTTP_CODE);
            curl_close($ch);

            if ($httpCode === 200) {
                $data = json_decode($response, true);
                $part = $data['candidates'][0]['content']['parts'][0] ?? null;
                if ($part && isset($part['inlineData'])) {
                    return [
                        'audio' => $part['inlineData']['data'],
                        'mimeType' => $part['inlineData']['mimeType'] ?? 'audio/mp4'
                    ];
                }
            } else {
                error_log("[Gemini Audio API Error] Model " . $model . " failed with HTTP " . $httpCode . " | Response: " . $response);
            }
        }

        return null;
    }

    /**
     * Call Gemini API with audio inline data to generate a native audio response
     */
    public static function generateAudioResponseFromAudio(
        string $apiKey,
        string $systemPrompt,
        string $audioBase64,
        string $mimeType,
        string $voiceName = 'Puck',
        ?string $elApiKey = null,
        ?string $elVoiceId = null
    ): ?array {
        // Step 1: Use gemini-flash-lite-latest (which supports audio input) to understand the audio message and generate a text reply
        $replyText = self::generateResponseFromAudio($apiKey, $systemPrompt, $audioBase64, $mimeType);
        if (empty($replyText)) {
            error_log("[Gemini Audio-to-Audio Error] Could not generate text response from audio.");
            return null;
        }

        // Step 2: Use ElevenLabs or Gemini TTS to convert the text response into a native audio voice note
        return self::generateAudioResponse($apiKey, $systemPrompt, $replyText, $voiceName, $elApiKey, $elVoiceId);
    }
}