Implement Gemini audio voice note replies
This commit is contained in:
@@ -318,6 +318,7 @@ class WhatsAppController extends BaseController
|
||||
}
|
||||
|
||||
$replyText = null;
|
||||
$replyAudio = null;
|
||||
|
||||
if ($rule['trigger_type'] === 'keyword') {
|
||||
if (empty($incomingText)) {
|
||||
@@ -372,7 +373,15 @@ class WhatsAppController extends BaseController
|
||||
if (strpos($mimeType, ';') !== false) {
|
||||
$mimeType = trim(explode(';', $mimeType)[0]);
|
||||
}
|
||||
$replyText = \App\Services\GeminiService::generateResponseFromAudio($apiKey, $systemPrompt, $msgData['audio'], $mimeType);
|
||||
// Try generating native audio response first
|
||||
$audioResponse = \App\Services\GeminiService::generateAudioResponseFromAudio($apiKey, $systemPrompt, $msgData['audio'], $mimeType);
|
||||
if ($audioResponse && !empty($audioResponse['audio'])) {
|
||||
$replyAudio = $audioResponse['audio'];
|
||||
$replyText = '[صوت من الذكاء الاصطناعي]';
|
||||
} else {
|
||||
// Fallback to text output from audio
|
||||
$replyText = \App\Services\GeminiService::generateResponseFromAudio($apiKey, $systemPrompt, $msgData['audio'], $mimeType);
|
||||
}
|
||||
} elseif ($hasImage) {
|
||||
$mimeType = $msgData['imageMimeType'];
|
||||
if (strpos($mimeType, ';') !== false) {
|
||||
@@ -388,9 +397,9 @@ class WhatsAppController extends BaseController
|
||||
}
|
||||
}
|
||||
|
||||
if (!empty($replyText)) {
|
||||
if (!empty($replyText) || !empty($replyAudio)) {
|
||||
// Check if the reply contains [PAYMENT_RECEIPT: { ... }] tag from Gemini
|
||||
if (preg_match('/\[PAYMENT_RECEIPT:\s*(\{.*?\})\]/s', $replyText, $matches)) {
|
||||
if (!empty($replyText) && preg_match('/\[PAYMENT_RECEIPT:\s*(\{.*?\})\]/s', $replyText, $matches)) {
|
||||
$jsonStr = $matches[1];
|
||||
// Strip the tag from the final reply sent to user
|
||||
$replyText = trim(str_replace($matches[0], '', $replyText));
|
||||
@@ -410,11 +419,16 @@ class WhatsAppController extends BaseController
|
||||
$sendUrl = $gatewayUrl . '/api/messages/send';
|
||||
}
|
||||
|
||||
$payload = json_encode([
|
||||
$payloadData = [
|
||||
'session_key' => $session['session_key'],
|
||||
'phone' => $msgData['phone'],
|
||||
'message' => $replyText
|
||||
]);
|
||||
'phone' => $msgData['phone']
|
||||
];
|
||||
if (!empty($replyAudio)) {
|
||||
$payloadData['audio'] = $replyAudio;
|
||||
} else {
|
||||
$payloadData['message'] = $replyText;
|
||||
}
|
||||
$payload = json_encode($payloadData);
|
||||
|
||||
$ch = curl_init($sendUrl);
|
||||
curl_setopt($ch, CURLOPT_RETURNTRANSFER, true);
|
||||
@@ -450,7 +464,7 @@ class WhatsAppController extends BaseController
|
||||
'session_id' => $session['id'],
|
||||
'contact_phone' => $msgData['phone'],
|
||||
'direction' => 'outbound',
|
||||
'message_type' => 'text',
|
||||
'message_type' => !empty($replyAudio) ? 'audio' : 'text',
|
||||
'message_body' => $replyText,
|
||||
'whatsapp_message_id' => $waMsgId,
|
||||
'status' => $status,
|
||||
|
||||
@@ -212,4 +212,140 @@ class GeminiService
|
||||
$data = json_decode($response, true);
|
||||
return $data['candidates'][0]['content']['parts'][0]['text'] ?? null;
|
||||
}
|
||||
|
||||
/**
|
||||
* Call Gemini API to generate a native audio (speech) response from text
|
||||
*/
|
||||
public static function generateAudioResponse(string $apiKey, string $systemPrompt, string $userMessage, string $voiceName = 'Puck'): ?array
|
||||
{
|
||||
$url = 'https://generativelanguage.googleapis.com/v1beta/models/gemini-flash-lite-latest:generateContent?key=' . $apiKey;
|
||||
|
||||
$payload = json_encode([
|
||||
'contents' => [
|
||||
[
|
||||
'role' => 'user',
|
||||
'parts' => [
|
||||
['text' => $userMessage]
|
||||
]
|
||||
]
|
||||
],
|
||||
'systemInstruction' => [
|
||||
'parts' => [
|
||||
['text' => $systemPrompt]
|
||||
]
|
||||
],
|
||||
'generationConfig' => [
|
||||
'responseModalities' => ['AUDIO'],
|
||||
'speechConfig' => [
|
||||
'voiceConfig' => [
|
||||
'prebuiltVoiceConfig' => [
|
||||
'voiceName' => $voiceName
|
||||
]
|
||||
]
|
||||
]
|
||||
]
|
||||
]);
|
||||
|
||||
$ch = curl_init($url);
|
||||
curl_setopt($ch, CURLOPT_RETURNTRANSFER, true);
|
||||
curl_setopt($ch, CURLOPT_POST, true);
|
||||
curl_setopt($ch, CURLOPT_POSTFIELDS, $payload);
|
||||
curl_setopt($ch, CURLOPT_HTTPHEADER, [
|
||||
'Content-Type: application/json'
|
||||
]);
|
||||
curl_setopt($ch, CURLOPT_TIMEOUT, 30);
|
||||
|
||||
$response = curl_exec($ch);
|
||||
$httpCode = curl_getinfo($ch, CURLINFO_HTTP_CODE);
|
||||
curl_close($ch);
|
||||
|
||||
if ($httpCode !== 200) {
|
||||
error_log("[Gemini Audio API Error] HTTP " . $httpCode . " | Response: " . $response);
|
||||
return null;
|
||||
}
|
||||
|
||||
$data = json_decode($response, true);
|
||||
$part = $data['candidates'][0]['content']['parts'][0] ?? null;
|
||||
if ($part && isset($part['inlineData'])) {
|
||||
return [
|
||||
'audio' => $part['inlineData']['data'],
|
||||
'mimeType' => $part['inlineData']['mimeType'] ?? 'audio/mp4'
|
||||
];
|
||||
}
|
||||
return null;
|
||||
}
|
||||
|
||||
/**
|
||||
* Call Gemini API with audio inline data to generate a native audio response
|
||||
*/
|
||||
public static function generateAudioResponseFromAudio(string $apiKey, string $systemPrompt, string $audioBase64, string $mimeType, string $voiceName = 'Puck'): ?array
|
||||
{
|
||||
$url = 'https://generativelanguage.googleapis.com/v1beta/models/gemini-flash-lite-latest:generateContent?key=' . $apiKey;
|
||||
|
||||
if (strpos($mimeType, ';') !== false) {
|
||||
$mimeType = trim(explode(';', $mimeType)[0]);
|
||||
}
|
||||
|
||||
$payload = json_encode([
|
||||
'contents' => [
|
||||
[
|
||||
'role' => 'user',
|
||||
'parts' => [
|
||||
[
|
||||
'inlineData' => [
|
||||
'mimeType' => $mimeType,
|
||||
'data' => $audioBase64
|
||||
]
|
||||
],
|
||||
[
|
||||
'text' => "استمع إلى التسجيل الصوتي المرفق وأجب عليه مباشرة بصوتك بناءً على الإرشادات المحددة."
|
||||
]
|
||||
]
|
||||
]
|
||||
],
|
||||
'systemInstruction' => [
|
||||
'parts' => [
|
||||
['text' => $systemPrompt]
|
||||
]
|
||||
],
|
||||
'generationConfig' => [
|
||||
'responseModalities' => ['AUDIO'],
|
||||
'speechConfig' => [
|
||||
'voiceConfig' => [
|
||||
'prebuiltVoiceConfig' => [
|
||||
'voiceName' => $voiceName
|
||||
]
|
||||
]
|
||||
]
|
||||
]
|
||||
]);
|
||||
|
||||
$ch = curl_init($url);
|
||||
curl_setopt($ch, CURLOPT_RETURNTRANSFER, true);
|
||||
curl_setopt($ch, CURLOPT_POST, true);
|
||||
curl_setopt($ch, CURLOPT_POSTFIELDS, $payload);
|
||||
curl_setopt($ch, CURLOPT_HTTPHEADER, [
|
||||
'Content-Type: application/json'
|
||||
]);
|
||||
curl_setopt($ch, CURLOPT_TIMEOUT, 45); // 45 seconds timeout for audio-to-audio generation
|
||||
|
||||
$response = curl_exec($ch);
|
||||
$httpCode = curl_getinfo($ch, CURLINFO_HTTP_CODE);
|
||||
curl_close($ch);
|
||||
|
||||
if ($httpCode !== 200) {
|
||||
error_log("[Gemini Audio-to-Audio Response Error] HTTP " . $httpCode . " | Response: " . $response);
|
||||
return null;
|
||||
}
|
||||
|
||||
$data = json_decode($response, true);
|
||||
$part = $data['candidates'][0]['content']['parts'][0] ?? null;
|
||||
if ($part && isset($part['inlineData'])) {
|
||||
return [
|
||||
'audio' => $part['inlineData']['data'],
|
||||
'mimeType' => $part['inlineData']['mimeType'] ?? 'audio/mp4'
|
||||
];
|
||||
}
|
||||
return null;
|
||||
}
|
||||
}
|
||||
|
||||
Reference in New Issue
Block a user