Implement 2-step audio-to-audio process

2026-05-22 15:29:30 +03:00
parent ffd8c6f2a5
commit f1d57e2763
2 changed files with 22 additions and 133 deletions
--- a/backend/public/test_audio.php
+++ b/backend/public/test_audio.php
@@ -135,71 +135,19 @@ if ($successfulModel && $audioResponse && !empty($audioResponse['audio'])) {
    echo "ℹ️ [Gemini] Audio MimeType: " . $audioResponse['mimeType'] . "\n";
    echo "ℹ️ [Gemini] Audio Size: " . strlen($audioResponse['audio']) . " base64 chars\n";
    
-    // 3. Test Audio-to-Audio conversion using the successful model
-    echo "\n--- Testing Audio-to-Audio (Speech-to-Speech) ---\n";
+    // 3. Test Audio-to-Audio conversion using the Service
+    echo "\n--- Testing Audio-to-Audio (Speech-to-Speech) via GeminiService ---\n";
    $startTime = microtime(true);
    
-    // We temporarily override the model inside GeminiService for this test, but since GeminiService is not updated yet,
-    // we'll run a direct curl call for testing audio-to-audio:
-    $url = 'https://generativelanguage.googleapis.com/v1beta/models/' . $successfulModel . ':generateContent?key=' . $apiKey;
-    $payload2 = json_encode([
-        'contents' => [
-            [
-                'role' => 'user',
-                'parts' => [
-                    [
-                        'inlineData' => [
-                            'mimeType' => $audioResponse['mimeType'],
-                            'data' => $audioResponse['audio']
-                        ]
-                    ],
-                    [
-                        'text' => "استمع إلى التسجيل الصوتي المرفق وأجب عليه مباشرة بصوتك بناءً على الإرشادات المحددة."
-                    ]
-                ]
-            ]
-        ],
-        'systemInstruction' => [
-            'parts' => [['text' => $systemPrompt]]
-        ],
-        'generationConfig' => [
-            'responseModalities' => ['AUDIO'],
-            'speechConfig' => [
-                'voiceConfig' => [
-                    'prebuiltVoiceConfig' => [
-                        'voiceName' => 'Puck'
-                    ]
-                ]
-            ]
-        ]
-    ]);
-    
-    $ch = curl_init($url);
-    curl_setopt($ch, CURLOPT_RETURNTRANSFER, true);
-    curl_setopt($ch, CURLOPT_POST, true);
-    curl_setopt($ch, CURLOPT_POSTFIELDS, $payload2);
-    curl_setopt($ch, CURLOPT_HTTPHEADER, ['Content-Type: application/json']);
-    curl_setopt($ch, CURLOPT_TIMEOUT, 30);
-    $response2 = curl_exec($ch);
-    $httpCode2 = curl_getinfo($ch, CURLINFO_HTTP_CODE);
-    curl_close($ch);
-    
+    $audioResponse2 = GeminiService::generateAudioResponseFromAudio($apiKey, $systemPrompt, $audioResponse['audio'], $audioResponse['mimeType'], 'Puck');
    $elapsedTime2 = round(microtime(true) - $startTime, 2);
    
-    if ($httpCode2 === 200) {
-        $data2 = json_decode($response2, true);
-        $part2 = $data2['candidates'][0]['content']['parts'][0] ?? null;
-        if ($part2 && isset($part2['inlineData'])) {
-            echo "✅ [Gemini] Successfully generated Audio-to-Audio response in {$elapsedTime2} seconds!\n";
-            echo "ℹ️ [Gemini] Audio MimeType: " . ($part2['inlineData']['mimeType'] ?? 'audio/mp4') . "\n";
-            echo "ℹ️ [Gemini] Audio Size: " . strlen($part2['inlineData']['data']) . " base64 chars\n";
-        } else {
-            echo "❌ [Gemini] Audio-to-Audio response did not contain inlineData.\n";
-        }
+    if ($audioResponse2 && !empty($audioResponse2['audio'])) {
+        echo "✅ [Gemini] Successfully generated Audio-to-Audio response in {$elapsedTime2} seconds!\n";
+        echo "ℹ️ [Gemini] Audio MimeType: " . $audioResponse2['mimeType'] . "\n";
+        echo "ℹ️ [Gemini] Audio Size: " . strlen($audioResponse2['audio']) . " base64 chars\n";
    } else {
-        $errorData2 = json_decode($response2, true);
-        $errMsg2 = $errorData2['error']['message'] ?? 'Unknown error';
-        echo "❌ [Gemini] Audio-to-Audio generation failed. HTTP {$httpCode2}: {$errMsg2}\n";
+        echo "❌ [Gemini] Audio-to-Audio generation failed.\n";
    }
 } else {
    echo "\n❌ [Gemini] All model trials for audio response generation failed.\n";