From f339488517af561ff9d524ec86c89217cbe7e09c Mon Sep 17 00:00:00 2001 From: Hamza-Ayed Date: Fri, 22 May 2026 15:22:58 +0300 Subject: [PATCH] Update test_audio to try candidate models --- backend/public/test_audio.php | 120 +++++++++++++++++++++++++++------- 1 file changed, 95 insertions(+), 25 deletions(-) diff --git a/backend/public/test_audio.php b/backend/public/test_audio.php index 28d180e..aa09477 100644 --- a/backend/public/test_audio.php +++ b/backend/public/test_audio.php @@ -33,13 +33,27 @@ if (empty($apiKey)) { echo "✅ [Database] Found active Gemini rule for Company ID: {$rule['company_id']}\n"; echo "ℹ️ [Config] Gemini API Key starts with: " . substr($apiKey, 0, 6) . "...\n"; -// 2. Test Gemini API Audio Output (Puck Voice, gemini-flash-lite-latest) +// 2. Test Gemini API Audio Output (Puck Voice, multiple models) echo "\n--- Testing Gemini Audio Response ---\n"; $systemPrompt = "You are a friendly customer service assistant. Speak in a warm, welcoming tone."; $testMsg = "مرحبا، هل متجركم مفتوح اليوم؟"; - // Test via cURL directly to dump exact error - $url = 'https://generativelanguage.googleapis.com/v1beta/models/gemini-2.0-flash:generateContent?key=' . $apiKey; +$candidateModels = [ + 'gemini-2.5-flash-preview-tts', + 'gemini-3.1-flash-tts-preview', + 'gemini-2.0-flash', + 'gemini-2.5-flash', + 'gemini-3.5-flash', + 'gemini-flash-latest' +]; + +$successfulModel = null; +$audioResponse = null; + +foreach ($candidateModels as $model) { + echo "Testing model: models/{$model} ... "; + + $url = 'https://generativelanguage.googleapis.com/v1beta/models/' . $model . ':generateContent?key=' . $apiKey; $payload = json_encode([ 'contents' => [[ 'role' => 'user', @@ -65,15 +79,15 @@ $testMsg = "مرحبا، هل متجركم مفتوح اليوم؟"; curl_setopt($ch, CURLOPT_POST, true); curl_setopt($ch, CURLOPT_POSTFIELDS, $payload); curl_setopt($ch, CURLOPT_HTTPHEADER, ['Content-Type: application/json']); - curl_setopt($ch, CURLOPT_TIMEOUT, 30); + curl_setopt($ch, CURLOPT_TIMEOUT, 15); $response = curl_exec($ch); $httpCode = curl_getinfo($ch, CURLINFO_HTTP_CODE); curl_close($ch); if ($httpCode !== 200) { - echo "❌ [Gemini API Error] HTTP Status: " . $httpCode . "\n"; - echo "Response Payload: " . $response . "\n"; - $audioResponse = null; + $errorData = json_decode($response, true); + $errMsg = $errorData['error']['message'] ?? 'Unknown error'; + echo "❌ Failed (HTTP {$httpCode}: {$errMsg})\n"; } else { $data = json_decode($response, true); $part = $data['candidates'][0]['content']['parts'][0] ?? null; @@ -82,33 +96,89 @@ $testMsg = "مرحبا، هل متجركم مفتوح اليوم؟"; 'audio' => $part['inlineData']['data'], 'mimeType' => $part['inlineData']['mimeType'] ?? 'audio/mp4' ]; + $successfulModel = $model; + echo "✅ SUCCESS!\n"; + break; // Stop at first successful model } else { - echo "❌ [Gemini API Error] Response did not contain inlineData. Response: " . $response . "\n"; - $audioResponse = null; + echo "❌ Failed (No inlineData in response)\n"; } } +} - if ($audioResponse && !empty($audioResponse['audio'])) { - echo "✅ [Gemini] Successfully generated voice note!\n"; - echo "ℹ️ [Gemini] Audio MimeType: " . $audioResponse['mimeType'] . "\n"; - echo "ℹ️ [Gemini] Audio Size: " . strlen($audioResponse['audio']) . " base64 chars\n"; - - // 3. Test Audio-to-Audio conversion - echo "\n--- Testing Audio-to-Audio (Speech-to-Speech) ---\n"; - $startTime = microtime(true); - $audioResponse2 = GeminiService::generateAudioResponseFromAudio($apiKey, $systemPrompt, $audioResponse['audio'], $audioResponse['mimeType'], 'Puck'); - $elapsedTime2 = round(microtime(true) - $startTime, 2); - - if ($audioResponse2 && !empty($audioResponse2['audio'])) { +if ($successfulModel && $audioResponse && !empty($audioResponse['audio'])) { + echo "\n✅ [Gemini] Successfully generated voice note using model: models/{$successfulModel}\n"; + echo "ℹ️ [Gemini] Audio MimeType: " . $audioResponse['mimeType'] . "\n"; + echo "ℹ️ [Gemini] Audio Size: " . strlen($audioResponse['audio']) . " base64 chars\n"; + + // 3. Test Audio-to-Audio conversion using the successful model + echo "\n--- Testing Audio-to-Audio (Speech-to-Speech) ---\n"; + $startTime = microtime(true); + + // We temporarily override the model inside GeminiService for this test, but since GeminiService is not updated yet, + // we'll run a direct curl call for testing audio-to-audio: + $url = 'https://generativelanguage.googleapis.com/v1beta/models/' . $successfulModel . ':generateContent?key=' . $apiKey; + $payload2 = json_encode([ + 'contents' => [ + [ + 'role' => 'user', + 'parts' => [ + [ + 'inlineData' => [ + 'mimeType' => $audioResponse['mimeType'], + 'data' => $audioResponse['audio'] + ] + ], + [ + 'text' => "استمع إلى التسجيل الصوتي المرفق وأجب عليه مباشرة بصوتك بناءً على الإرشادات المحددة." + ] + ] + ] + ], + 'systemInstruction' => [ + 'parts' => [['text' => $systemPrompt]] + ], + 'generationConfig' => [ + 'responseModalities' => ['AUDIO'], + 'speechConfig' => [ + 'voiceConfig' => [ + 'prebuiltVoiceConfig' => [ + 'voiceName' => 'Puck' + ] + ] + ] + ] + ]); + + $ch = curl_init($url); + curl_setopt($ch, CURLOPT_RETURNTRANSFER, true); + curl_setopt($ch, CURLOPT_POST, true); + curl_setopt($ch, CURLOPT_POSTFIELDS, $payload2); + curl_setopt($ch, CURLOPT_HTTPHEADER, ['Content-Type: application/json']); + curl_setopt($ch, CURLOPT_TIMEOUT, 30); + $response2 = curl_exec($ch); + $httpCode2 = curl_getinfo($ch, CURLINFO_HTTP_CODE); + curl_close($ch); + + $elapsedTime2 = round(microtime(true) - $startTime, 2); + + if ($httpCode2 === 200) { + $data2 = json_decode($response2, true); + $part2 = $data2['candidates'][0]['content']['parts'][0] ?? null; + if ($part2 && isset($part2['inlineData'])) { echo "✅ [Gemini] Successfully generated Audio-to-Audio response in {$elapsedTime2} seconds!\n"; - echo "ℹ️ [Gemini] Audio MimeType: " . $audioResponse2['mimeType'] . "\n"; - echo "ℹ️ [Gemini] Audio Size: " . strlen($audioResponse2['audio']) . " base64 chars\n"; + echo "ℹ️ [Gemini] Audio MimeType: " . ($part2['inlineData']['mimeType'] ?? 'audio/mp4') . "\n"; + echo "ℹ️ [Gemini] Audio Size: " . strlen($part2['inlineData']['data']) . " base64 chars\n"; } else { - echo "❌ [Gemini] Audio-to-Audio generation failed.\n"; + echo "❌ [Gemini] Audio-to-Audio response did not contain inlineData.\n"; } } else { - echo "❌ [Gemini] Audio response generation failed.\n"; + $errorData2 = json_decode($response2, true); + $errMsg2 = $errorData2['error']['message'] ?? 'Unknown error'; + echo "❌ [Gemini] Audio-to-Audio generation failed. HTTP {$httpCode2}: {$errMsg2}\n"; } +} else { + echo "\n❌ [Gemini] All model trials for audio response generation failed.\n"; +} // 4. Check Node.js WhatsApp Gateway Status echo "\n--- Checking Node.js Gateway Status ---\n";