Implement Gemini audio voice note replies
This commit is contained in:
@@ -318,6 +318,7 @@ class WhatsAppController extends BaseController
|
|||||||
}
|
}
|
||||||
|
|
||||||
$replyText = null;
|
$replyText = null;
|
||||||
|
$replyAudio = null;
|
||||||
|
|
||||||
if ($rule['trigger_type'] === 'keyword') {
|
if ($rule['trigger_type'] === 'keyword') {
|
||||||
if (empty($incomingText)) {
|
if (empty($incomingText)) {
|
||||||
@@ -372,7 +373,15 @@ class WhatsAppController extends BaseController
|
|||||||
if (strpos($mimeType, ';') !== false) {
|
if (strpos($mimeType, ';') !== false) {
|
||||||
$mimeType = trim(explode(';', $mimeType)[0]);
|
$mimeType = trim(explode(';', $mimeType)[0]);
|
||||||
}
|
}
|
||||||
$replyText = \App\Services\GeminiService::generateResponseFromAudio($apiKey, $systemPrompt, $msgData['audio'], $mimeType);
|
// Try generating native audio response first
|
||||||
|
$audioResponse = \App\Services\GeminiService::generateAudioResponseFromAudio($apiKey, $systemPrompt, $msgData['audio'], $mimeType);
|
||||||
|
if ($audioResponse && !empty($audioResponse['audio'])) {
|
||||||
|
$replyAudio = $audioResponse['audio'];
|
||||||
|
$replyText = '[صوت من الذكاء الاصطناعي]';
|
||||||
|
} else {
|
||||||
|
// Fallback to text output from audio
|
||||||
|
$replyText = \App\Services\GeminiService::generateResponseFromAudio($apiKey, $systemPrompt, $msgData['audio'], $mimeType);
|
||||||
|
}
|
||||||
} elseif ($hasImage) {
|
} elseif ($hasImage) {
|
||||||
$mimeType = $msgData['imageMimeType'];
|
$mimeType = $msgData['imageMimeType'];
|
||||||
if (strpos($mimeType, ';') !== false) {
|
if (strpos($mimeType, ';') !== false) {
|
||||||
@@ -388,9 +397,9 @@ class WhatsAppController extends BaseController
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
if (!empty($replyText)) {
|
if (!empty($replyText) || !empty($replyAudio)) {
|
||||||
// Check if the reply contains [PAYMENT_RECEIPT: { ... }] tag from Gemini
|
// Check if the reply contains [PAYMENT_RECEIPT: { ... }] tag from Gemini
|
||||||
if (preg_match('/\[PAYMENT_RECEIPT:\s*(\{.*?\})\]/s', $replyText, $matches)) {
|
if (!empty($replyText) && preg_match('/\[PAYMENT_RECEIPT:\s*(\{.*?\})\]/s', $replyText, $matches)) {
|
||||||
$jsonStr = $matches[1];
|
$jsonStr = $matches[1];
|
||||||
// Strip the tag from the final reply sent to user
|
// Strip the tag from the final reply sent to user
|
||||||
$replyText = trim(str_replace($matches[0], '', $replyText));
|
$replyText = trim(str_replace($matches[0], '', $replyText));
|
||||||
@@ -410,11 +419,16 @@ class WhatsAppController extends BaseController
|
|||||||
$sendUrl = $gatewayUrl . '/api/messages/send';
|
$sendUrl = $gatewayUrl . '/api/messages/send';
|
||||||
}
|
}
|
||||||
|
|
||||||
$payload = json_encode([
|
$payloadData = [
|
||||||
'session_key' => $session['session_key'],
|
'session_key' => $session['session_key'],
|
||||||
'phone' => $msgData['phone'],
|
'phone' => $msgData['phone']
|
||||||
'message' => $replyText
|
];
|
||||||
]);
|
if (!empty($replyAudio)) {
|
||||||
|
$payloadData['audio'] = $replyAudio;
|
||||||
|
} else {
|
||||||
|
$payloadData['message'] = $replyText;
|
||||||
|
}
|
||||||
|
$payload = json_encode($payloadData);
|
||||||
|
|
||||||
$ch = curl_init($sendUrl);
|
$ch = curl_init($sendUrl);
|
||||||
curl_setopt($ch, CURLOPT_RETURNTRANSFER, true);
|
curl_setopt($ch, CURLOPT_RETURNTRANSFER, true);
|
||||||
@@ -450,7 +464,7 @@ class WhatsAppController extends BaseController
|
|||||||
'session_id' => $session['id'],
|
'session_id' => $session['id'],
|
||||||
'contact_phone' => $msgData['phone'],
|
'contact_phone' => $msgData['phone'],
|
||||||
'direction' => 'outbound',
|
'direction' => 'outbound',
|
||||||
'message_type' => 'text',
|
'message_type' => !empty($replyAudio) ? 'audio' : 'text',
|
||||||
'message_body' => $replyText,
|
'message_body' => $replyText,
|
||||||
'whatsapp_message_id' => $waMsgId,
|
'whatsapp_message_id' => $waMsgId,
|
||||||
'status' => $status,
|
'status' => $status,
|
||||||
|
|||||||
@@ -212,4 +212,140 @@ class GeminiService
|
|||||||
$data = json_decode($response, true);
|
$data = json_decode($response, true);
|
||||||
return $data['candidates'][0]['content']['parts'][0]['text'] ?? null;
|
return $data['candidates'][0]['content']['parts'][0]['text'] ?? null;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Call Gemini API to generate a native audio (speech) response from text
|
||||||
|
*/
|
||||||
|
public static function generateAudioResponse(string $apiKey, string $systemPrompt, string $userMessage, string $voiceName = 'Puck'): ?array
|
||||||
|
{
|
||||||
|
$url = 'https://generativelanguage.googleapis.com/v1beta/models/gemini-flash-lite-latest:generateContent?key=' . $apiKey;
|
||||||
|
|
||||||
|
$payload = json_encode([
|
||||||
|
'contents' => [
|
||||||
|
[
|
||||||
|
'role' => 'user',
|
||||||
|
'parts' => [
|
||||||
|
['text' => $userMessage]
|
||||||
|
]
|
||||||
|
]
|
||||||
|
],
|
||||||
|
'systemInstruction' => [
|
||||||
|
'parts' => [
|
||||||
|
['text' => $systemPrompt]
|
||||||
|
]
|
||||||
|
],
|
||||||
|
'generationConfig' => [
|
||||||
|
'responseModalities' => ['AUDIO'],
|
||||||
|
'speechConfig' => [
|
||||||
|
'voiceConfig' => [
|
||||||
|
'prebuiltVoiceConfig' => [
|
||||||
|
'voiceName' => $voiceName
|
||||||
|
]
|
||||||
|
]
|
||||||
|
]
|
||||||
|
]
|
||||||
|
]);
|
||||||
|
|
||||||
|
$ch = curl_init($url);
|
||||||
|
curl_setopt($ch, CURLOPT_RETURNTRANSFER, true);
|
||||||
|
curl_setopt($ch, CURLOPT_POST, true);
|
||||||
|
curl_setopt($ch, CURLOPT_POSTFIELDS, $payload);
|
||||||
|
curl_setopt($ch, CURLOPT_HTTPHEADER, [
|
||||||
|
'Content-Type: application/json'
|
||||||
|
]);
|
||||||
|
curl_setopt($ch, CURLOPT_TIMEOUT, 30);
|
||||||
|
|
||||||
|
$response = curl_exec($ch);
|
||||||
|
$httpCode = curl_getinfo($ch, CURLINFO_HTTP_CODE);
|
||||||
|
curl_close($ch);
|
||||||
|
|
||||||
|
if ($httpCode !== 200) {
|
||||||
|
error_log("[Gemini Audio API Error] HTTP " . $httpCode . " | Response: " . $response);
|
||||||
|
return null;
|
||||||
|
}
|
||||||
|
|
||||||
|
$data = json_decode($response, true);
|
||||||
|
$part = $data['candidates'][0]['content']['parts'][0] ?? null;
|
||||||
|
if ($part && isset($part['inlineData'])) {
|
||||||
|
return [
|
||||||
|
'audio' => $part['inlineData']['data'],
|
||||||
|
'mimeType' => $part['inlineData']['mimeType'] ?? 'audio/mp4'
|
||||||
|
];
|
||||||
|
}
|
||||||
|
return null;
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Call Gemini API with audio inline data to generate a native audio response
|
||||||
|
*/
|
||||||
|
public static function generateAudioResponseFromAudio(string $apiKey, string $systemPrompt, string $audioBase64, string $mimeType, string $voiceName = 'Puck'): ?array
|
||||||
|
{
|
||||||
|
$url = 'https://generativelanguage.googleapis.com/v1beta/models/gemini-flash-lite-latest:generateContent?key=' . $apiKey;
|
||||||
|
|
||||||
|
if (strpos($mimeType, ';') !== false) {
|
||||||
|
$mimeType = trim(explode(';', $mimeType)[0]);
|
||||||
|
}
|
||||||
|
|
||||||
|
$payload = json_encode([
|
||||||
|
'contents' => [
|
||||||
|
[
|
||||||
|
'role' => 'user',
|
||||||
|
'parts' => [
|
||||||
|
[
|
||||||
|
'inlineData' => [
|
||||||
|
'mimeType' => $mimeType,
|
||||||
|
'data' => $audioBase64
|
||||||
|
]
|
||||||
|
],
|
||||||
|
[
|
||||||
|
'text' => "استمع إلى التسجيل الصوتي المرفق وأجب عليه مباشرة بصوتك بناءً على الإرشادات المحددة."
|
||||||
|
]
|
||||||
|
]
|
||||||
|
]
|
||||||
|
],
|
||||||
|
'systemInstruction' => [
|
||||||
|
'parts' => [
|
||||||
|
['text' => $systemPrompt]
|
||||||
|
]
|
||||||
|
],
|
||||||
|
'generationConfig' => [
|
||||||
|
'responseModalities' => ['AUDIO'],
|
||||||
|
'speechConfig' => [
|
||||||
|
'voiceConfig' => [
|
||||||
|
'prebuiltVoiceConfig' => [
|
||||||
|
'voiceName' => $voiceName
|
||||||
|
]
|
||||||
|
]
|
||||||
|
]
|
||||||
|
]
|
||||||
|
]);
|
||||||
|
|
||||||
|
$ch = curl_init($url);
|
||||||
|
curl_setopt($ch, CURLOPT_RETURNTRANSFER, true);
|
||||||
|
curl_setopt($ch, CURLOPT_POST, true);
|
||||||
|
curl_setopt($ch, CURLOPT_POSTFIELDS, $payload);
|
||||||
|
curl_setopt($ch, CURLOPT_HTTPHEADER, [
|
||||||
|
'Content-Type: application/json'
|
||||||
|
]);
|
||||||
|
curl_setopt($ch, CURLOPT_TIMEOUT, 45); // 45 seconds timeout for audio-to-audio generation
|
||||||
|
|
||||||
|
$response = curl_exec($ch);
|
||||||
|
$httpCode = curl_getinfo($ch, CURLINFO_HTTP_CODE);
|
||||||
|
curl_close($ch);
|
||||||
|
|
||||||
|
if ($httpCode !== 200) {
|
||||||
|
error_log("[Gemini Audio-to-Audio Response Error] HTTP " . $httpCode . " | Response: " . $response);
|
||||||
|
return null;
|
||||||
|
}
|
||||||
|
|
||||||
|
$data = json_decode($response, true);
|
||||||
|
$part = $data['candidates'][0]['content']['parts'][0] ?? null;
|
||||||
|
if ($part && isset($part['inlineData'])) {
|
||||||
|
return [
|
||||||
|
'audio' => $part['inlineData']['data'],
|
||||||
|
'mimeType' => $part['inlineData']['mimeType'] ?? 'audio/mp4'
|
||||||
|
];
|
||||||
|
}
|
||||||
|
return null;
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|||||||
88
backend/public/test_audio.php
Normal file
88
backend/public/test_audio.php
Normal file
@@ -0,0 +1,88 @@
|
|||||||
|
<?php
|
||||||
|
// Secure token check to prevent unauthorized execution on production
|
||||||
|
if (($_GET['token'] ?? '') !== 'nabeh_test_audio_1298') {
|
||||||
|
http_response_code(403);
|
||||||
|
die('Unauthorized access');
|
||||||
|
}
|
||||||
|
|
||||||
|
require_once dirname(__DIR__) . '/app/bootstrap.php';
|
||||||
|
|
||||||
|
use App\Services\GeminiService;
|
||||||
|
use App\Models\ChatbotRule;
|
||||||
|
use App\Core\Database;
|
||||||
|
|
||||||
|
header('Content-Type: text/plain; charset=utf-8');
|
||||||
|
|
||||||
|
echo "=== Starting Chatbot Audio Reply Diagnostics ===\n\n";
|
||||||
|
|
||||||
|
// 1. Fetch Chatbot Rules and check for Gemini Key
|
||||||
|
$rules = Database::select("SELECT * FROM chatbot_rules WHERE trigger_type = 'gemini_ai' AND is_active = 1 LIMIT 1");
|
||||||
|
if (empty($rules)) {
|
||||||
|
echo "❌ [Database] No active chatbot rule found with 'gemini_ai' trigger type.\n";
|
||||||
|
exit(1);
|
||||||
|
}
|
||||||
|
|
||||||
|
$rule = $rules[0];
|
||||||
|
$apiKey = $rule['gemini_api_key'] ?: getenv('GEMINI_API_KEY');
|
||||||
|
|
||||||
|
if (empty($apiKey)) {
|
||||||
|
echo "❌ [Config] Gemini API Key is empty. Please set gemini_api_key in the active chatbot rule or GEMINI_API_KEY in the .env file.\n";
|
||||||
|
exit(1);
|
||||||
|
}
|
||||||
|
|
||||||
|
echo "✅ [Database] Found active Gemini rule for Company ID: {$rule['company_id']}\n";
|
||||||
|
echo "ℹ️ [Config] Gemini API Key starts with: " . substr($apiKey, 0, 6) . "...\n";
|
||||||
|
|
||||||
|
// 2. Test Gemini API Audio Output (Puck Voice, gemini-flash-lite-latest)
|
||||||
|
echo "\n--- Testing Gemini Audio Response ---\n";
|
||||||
|
$systemPrompt = "You are a friendly customer service assistant. Speak in a warm, welcoming tone.";
|
||||||
|
$testMsg = "مرحبا، هل متجركم مفتوح اليوم؟";
|
||||||
|
|
||||||
|
echo "Sending text prompt: '$testMsg'\n";
|
||||||
|
$startTime = microtime(true);
|
||||||
|
$audioResponse = GeminiService::generateAudioResponse($apiKey, $systemPrompt, $testMsg, 'Puck');
|
||||||
|
$elapsedTime = round(microtime(true) - $startTime, 2);
|
||||||
|
|
||||||
|
if ($audioResponse && !empty($audioResponse['audio'])) {
|
||||||
|
echo "✅ [Gemini] Successfully generated voice note in {$elapsedTime} seconds!\n";
|
||||||
|
echo "ℹ️ [Gemini] Audio MimeType: " . $audioResponse['mimeType'] . "\n";
|
||||||
|
echo "ℹ️ [Gemini] Audio Size: " . strlen($audioResponse['audio']) . " base64 chars\n";
|
||||||
|
|
||||||
|
// 3. Test Audio-to-Audio conversion
|
||||||
|
echo "\n--- Testing Audio-to-Audio (Speech-to-Speech) ---\n";
|
||||||
|
$startTime = microtime(true);
|
||||||
|
$audioResponse2 = GeminiService::generateAudioResponseFromAudio($apiKey, $systemPrompt, $audioResponse['audio'], $audioResponse['mimeType'], 'Puck');
|
||||||
|
$elapsedTime2 = round(microtime(true) - $startTime, 2);
|
||||||
|
|
||||||
|
if ($audioResponse2 && !empty($audioResponse2['audio'])) {
|
||||||
|
echo "✅ [Gemini] Successfully generated Audio-to-Audio response in {$elapsedTime2} seconds!\n";
|
||||||
|
echo "ℹ️ [Gemini] Audio MimeType: " . $audioResponse2['mimeType'] . "\n";
|
||||||
|
echo "ℹ️ [Gemini] Audio Size: " . strlen($audioResponse2['audio']) . " base64 chars\n";
|
||||||
|
} else {
|
||||||
|
echo "❌ [Gemini] Audio-to-Audio generation failed. Check errors below:\n";
|
||||||
|
}
|
||||||
|
} else {
|
||||||
|
echo "❌ [Gemini] Audio response generation failed. Please check the logs.\n";
|
||||||
|
}
|
||||||
|
|
||||||
|
// 4. Check Node.js WhatsApp Gateway Status
|
||||||
|
echo "\n--- Checking Node.js Gateway Status ---\n";
|
||||||
|
$gatewayUrl = rtrim(getenv('WHATSAPP_GATEWAY_URL') ?: 'http://localhost:3722', '/');
|
||||||
|
echo "Gateway URL: {$gatewayUrl}\n";
|
||||||
|
|
||||||
|
$ch = curl_init($gatewayUrl . '/health');
|
||||||
|
curl_setopt($ch, CURLOPT_RETURNTRANSFER, true);
|
||||||
|
curl_setopt($ch, CURLOPT_TIMEOUT, 5);
|
||||||
|
$healthResponse = curl_exec($ch);
|
||||||
|
$healthHttpCode = curl_getinfo($ch, CURLINFO_HTTP_CODE);
|
||||||
|
$healthError = curl_error($ch);
|
||||||
|
curl_close($ch);
|
||||||
|
|
||||||
|
if ($healthHttpCode === 200) {
|
||||||
|
echo "✅ [Gateway] Gateway is ONLINE and healthy.\n";
|
||||||
|
echo "ℹ️ [Gateway] Response: {$healthResponse}\n";
|
||||||
|
} else {
|
||||||
|
echo "❌ [Gateway] Gateway is OFFLINE or returning error. HTTP Code: {$healthHttpCode}. Error: {$healthError}\n";
|
||||||
|
}
|
||||||
|
|
||||||
|
echo "\n=== Diagnostics Complete ===\n";
|
||||||
@@ -288,7 +288,7 @@ async function disconnectSession(session_key) {
|
|||||||
/**
|
/**
|
||||||
* Send a message using an active session
|
* Send a message using an active session
|
||||||
*/
|
*/
|
||||||
async function sendMessage(session_key, phone, message, mediaUrl = null) {
|
async function sendMessage(session_key, phone, message, mediaUrl = null, audioBase64 = null) {
|
||||||
const sock = sessions.get(session_key);
|
const sock = sessions.get(session_key);
|
||||||
if (!sock) {
|
if (!sock) {
|
||||||
throw new Error(`Session ${session_key} is not active or connected`);
|
throw new Error(`Session ${session_key} is not active or connected`);
|
||||||
@@ -296,6 +296,15 @@ async function sendMessage(session_key, phone, message, mediaUrl = null) {
|
|||||||
|
|
||||||
let jid = phone.includes('@') ? phone : `${phone}@s.whatsapp.net`;
|
let jid = phone.includes('@') ? phone : `${phone}@s.whatsapp.net`;
|
||||||
|
|
||||||
|
if (audioBase64) {
|
||||||
|
const buffer = Buffer.from(audioBase64, 'base64');
|
||||||
|
return await sock.sendMessage(jid, {
|
||||||
|
audio: buffer,
|
||||||
|
mimetype: 'audio/mp4',
|
||||||
|
ptt: true
|
||||||
|
});
|
||||||
|
}
|
||||||
|
|
||||||
if (mediaUrl) {
|
if (mediaUrl) {
|
||||||
const ext = mediaUrl.split('.').pop().toLowerCase();
|
const ext = mediaUrl.split('.').pop().toLowerCase();
|
||||||
if (['jpg', 'jpeg', 'png', 'webp'].includes(ext)) {
|
if (['jpg', 'jpeg', 'png', 'webp'].includes(ext)) {
|
||||||
|
|||||||
@@ -83,14 +83,18 @@ app.get('/api/sessions/active', (req, res) => {
|
|||||||
|
|
||||||
// Send outbound message
|
// Send outbound message
|
||||||
app.post('/api/messages/send', async (req, res) => {
|
app.post('/api/messages/send', async (req, res) => {
|
||||||
const { session_key, phone, message, media_url } = req.body;
|
const { session_key, phone, message, media_url, audio } = req.body;
|
||||||
|
|
||||||
if (!session_key || !phone || !message) {
|
if (!session_key || !phone) {
|
||||||
return res.status(400).json({ error: 'Missing session_key, phone, or message' });
|
return res.status(400).json({ error: 'Missing session_key or phone' });
|
||||||
|
}
|
||||||
|
|
||||||
|
if (!message && !audio) {
|
||||||
|
return res.status(400).json({ error: 'Missing message or audio' });
|
||||||
}
|
}
|
||||||
|
|
||||||
try {
|
try {
|
||||||
const result = await sendMessage(session_key, phone, message, media_url);
|
const result = await sendMessage(session_key, phone, message, media_url, audio);
|
||||||
res.json({ status: 'success', data: result });
|
res.json({ status: 'success', data: result });
|
||||||
} catch (err) {
|
} catch (err) {
|
||||||
console.error(`Error sending message via ${session_key} to ${phone}:`, err);
|
console.error(`Error sending message via ${session_key} to ${phone}:`, err);
|
||||||
|
|||||||
Reference in New Issue
Block a user