Implement Gemini audio voice note replies

This commit is contained in:
Hamza-Ayed
2026-05-22 15:10:13 +03:00
parent 395c8ee8eb
commit 2e7cd11f85
5 changed files with 264 additions and 13 deletions

View File

@@ -212,4 +212,140 @@ class GeminiService
$data = json_decode($response, true);
return $data['candidates'][0]['content']['parts'][0]['text'] ?? null;
}
/**
* Call Gemini API to generate a native audio (speech) response from text
*/
public static function generateAudioResponse(string $apiKey, string $systemPrompt, string $userMessage, string $voiceName = 'Puck'): ?array
{
$url = 'https://generativelanguage.googleapis.com/v1beta/models/gemini-flash-lite-latest:generateContent?key=' . $apiKey;
$payload = json_encode([
'contents' => [
[
'role' => 'user',
'parts' => [
['text' => $userMessage]
]
]
],
'systemInstruction' => [
'parts' => [
['text' => $systemPrompt]
]
],
'generationConfig' => [
'responseModalities' => ['AUDIO'],
'speechConfig' => [
'voiceConfig' => [
'prebuiltVoiceConfig' => [
'voiceName' => $voiceName
]
]
]
]
]);
$ch = curl_init($url);
curl_setopt($ch, CURLOPT_RETURNTRANSFER, true);
curl_setopt($ch, CURLOPT_POST, true);
curl_setopt($ch, CURLOPT_POSTFIELDS, $payload);
curl_setopt($ch, CURLOPT_HTTPHEADER, [
'Content-Type: application/json'
]);
curl_setopt($ch, CURLOPT_TIMEOUT, 30);
$response = curl_exec($ch);
$httpCode = curl_getinfo($ch, CURLINFO_HTTP_CODE);
curl_close($ch);
if ($httpCode !== 200) {
error_log("[Gemini Audio API Error] HTTP " . $httpCode . " | Response: " . $response);
return null;
}
$data = json_decode($response, true);
$part = $data['candidates'][0]['content']['parts'][0] ?? null;
if ($part && isset($part['inlineData'])) {
return [
'audio' => $part['inlineData']['data'],
'mimeType' => $part['inlineData']['mimeType'] ?? 'audio/mp4'
];
}
return null;
}
/**
* Call Gemini API with audio inline data to generate a native audio response
*/
public static function generateAudioResponseFromAudio(string $apiKey, string $systemPrompt, string $audioBase64, string $mimeType, string $voiceName = 'Puck'): ?array
{
$url = 'https://generativelanguage.googleapis.com/v1beta/models/gemini-flash-lite-latest:generateContent?key=' . $apiKey;
if (strpos($mimeType, ';') !== false) {
$mimeType = trim(explode(';', $mimeType)[0]);
}
$payload = json_encode([
'contents' => [
[
'role' => 'user',
'parts' => [
[
'inlineData' => [
'mimeType' => $mimeType,
'data' => $audioBase64
]
],
[
'text' => "استمع إلى التسجيل الصوتي المرفق وأجب عليه مباشرة بصوتك بناءً على الإرشادات المحددة."
]
]
]
],
'systemInstruction' => [
'parts' => [
['text' => $systemPrompt]
]
],
'generationConfig' => [
'responseModalities' => ['AUDIO'],
'speechConfig' => [
'voiceConfig' => [
'prebuiltVoiceConfig' => [
'voiceName' => $voiceName
]
]
]
]
]);
$ch = curl_init($url);
curl_setopt($ch, CURLOPT_RETURNTRANSFER, true);
curl_setopt($ch, CURLOPT_POST, true);
curl_setopt($ch, CURLOPT_POSTFIELDS, $payload);
curl_setopt($ch, CURLOPT_HTTPHEADER, [
'Content-Type: application/json'
]);
curl_setopt($ch, CURLOPT_TIMEOUT, 45); // 45 seconds timeout for audio-to-audio generation
$response = curl_exec($ch);
$httpCode = curl_getinfo($ch, CURLINFO_HTTP_CODE);
curl_close($ch);
if ($httpCode !== 200) {
error_log("[Gemini Audio-to-Audio Response Error] HTTP " . $httpCode . " | Response: " . $response);
return null;
}
$data = json_decode($response, true);
$part = $data['candidates'][0]['content']['parts'][0] ?? null;
if ($part && isset($part['inlineData'])) {
return [
'audio' => $part['inlineData']['data'],
'mimeType' => $part['inlineData']['mimeType'] ?? 'audio/mp4'
];
}
return null;
}
}