From da7d3571e0045f8e367962da44f786a90c405a30 Mon Sep 17 00:00:00 2001 From: Hamza-Ayed Date: Fri, 22 May 2026 01:13:50 +0300 Subject: [PATCH] Deploy: 2026-05-22 01:13:50 --- .../app/Controllers/WhatsAppController.php | 24 ++++++-- backend/app/Services/GeminiService.php | 58 +++++++++++++++++++ whatsapp-gateway/baileys-client.js | 39 +++++++++++-- 3 files changed, 112 insertions(+), 9 deletions(-) diff --git a/backend/app/Controllers/WhatsAppController.php b/backend/app/Controllers/WhatsAppController.php index 008bf8e..0d7ecd5 100644 --- a/backend/app/Controllers/WhatsAppController.php +++ b/backend/app/Controllers/WhatsAppController.php @@ -218,13 +218,14 @@ class WhatsAppController extends BaseController } // 2. Log the incoming message in history log + $isAudioMsg = !empty($msgData['audio']) && !empty($msgData['mimeType']); \App\Models\MessageLog::logMessage([ 'company_id' => $session['company_id'], 'session_id' => $session['id'], 'contact_phone' => $msgData['phone'], 'direction' => 'inbound', - 'message_type' => 'text', - 'message_body' => $msgData['body'], + 'message_type' => $isAudioMsg ? 'audio' : 'text', + 'message_body' => $isAudioMsg ? ($msgData['body'] ?: '[Voice Note]') : $msgData['body'], 'whatsapp_message_id' => $msgData['id'], 'status' => 'read' ]); @@ -287,14 +288,19 @@ class WhatsAppController extends BaseController return; } - $incomingText = trim($msgData['body']); - if (empty($incomingText)) { + $incomingText = isset($msgData['body']) ? trim($msgData['body']) : ''; + $hasAudio = !empty($msgData['audio']) && !empty($msgData['mimeType']); + + if (empty($incomingText) && !$hasAudio) { return; } $replyText = null; if ($rule['trigger_type'] === 'keyword') { + if (empty($incomingText)) { + return; + } $keywords = array_filter(array_map('trim', explode(',', $rule['keyword']))); $matched = false; foreach ($keywords as $kw) { @@ -317,7 +323,15 @@ class WhatsAppController extends BaseController // Enforce language matching rule dynamically $systemPrompt .= "\n\nIMPORTANT LANGUAGE RULE: Detect the language of the incoming message. If the incoming message is in English, you MUST reply in English. If the incoming message is in Arabic, you MUST reply in Arabic. Override any default language instruction to match the user's language."; - $replyText = \App\Services\GeminiService::generateResponse($apiKey, $systemPrompt, $incomingText); + if ($hasAudio) { + $mimeType = $msgData['mimeType']; + if (strpos($mimeType, ';') !== false) { + $mimeType = trim(explode(';', $mimeType)[0]); + } + $replyText = \App\Services\GeminiService::generateResponseFromAudio($apiKey, $systemPrompt, $msgData['audio'], $mimeType); + } else { + $replyText = \App\Services\GeminiService::generateResponse($apiKey, $systemPrompt, $incomingText); + } } if (!empty($replyText)) { diff --git a/backend/app/Services/GeminiService.php b/backend/app/Services/GeminiService.php index 8d7086f..65757a2 100644 --- a/backend/app/Services/GeminiService.php +++ b/backend/app/Services/GeminiService.php @@ -96,4 +96,62 @@ class GeminiService $data = json_decode($response, true); return $data['candidates'][0]['content']['parts'][0]['text'] ?? null; } + + /** + * Call Gemini API with audio inline data and system instruction to generate a response text + */ + public static function generateResponseFromAudio(string $apiKey, string $systemPrompt, string $audioBase64, string $mimeType): ?string + { + $url = 'https://generativelanguage.googleapis.com/v1beta/models/gemini-flash-lite-latest:generateContent?key=' . $apiKey; + + // Clean mimeType if it contains codec info + if (strpos($mimeType, ';') !== false) { + $mimeType = trim(explode(';', $mimeType)[0]); + } + + $payload = json_encode([ + 'contents' => [ + [ + 'role' => 'user', + 'parts' => [ + [ + 'inlineData' => [ + 'mimeType' => $mimeType, + 'data' => $audioBase64 + ] + ], + [ + 'text' => "استمع إلى التسجيل الصوتي المرفق وأجب عليه مباشرة باللغة المناسبة بناءً على الإرشادات المحددة." + ] + ] + ] + ], + 'systemInstruction' => [ + 'parts' => [ + ['text' => $systemPrompt] + ] + ] + ]); + + $ch = curl_init($url); + curl_setopt($ch, CURLOPT_RETURNTRANSFER, true); + curl_setopt($ch, CURLOPT_POST, true); + curl_setopt($ch, CURLOPT_POSTFIELDS, $payload); + curl_setopt($ch, CURLOPT_HTTPHEADER, [ + 'Content-Type: application/json' + ]); + curl_setopt($ch, CURLOPT_TIMEOUT, 35); // 35 seconds timeout for audio analysis + + $response = curl_exec($ch); + $httpCode = curl_getinfo($ch, CURLINFO_HTTP_CODE); + curl_close($ch); + + if ($httpCode !== 200) { + error_log("[Gemini Audio Response Error] HTTP " . $httpCode . " | Response: " . $response); + return null; + } + + $data = json_decode($response, true); + return $data['candidates'][0]['content']['parts'][0]['text'] ?? null; + } } diff --git a/whatsapp-gateway/baileys-client.js b/whatsapp-gateway/baileys-client.js index e9ccb17..d27878f 100644 --- a/whatsapp-gateway/baileys-client.js +++ b/whatsapp-gateway/baileys-client.js @@ -1,6 +1,6 @@ const baileys = require('@whiskeysockets/baileys'); const makeWASocket = baileys.default || baileys.makeWASocket || baileys; -const { useMultiFileAuthState, DisconnectReason, fetchLatestBaileysVersion } = baileys; +const { useMultiFileAuthState, DisconnectReason, fetchLatestBaileysVersion, downloadMediaMessage } = baileys; const pino = require('pino'); const axios = require('axios'); const fs = require('fs'); @@ -103,8 +103,33 @@ async function startSession(session_key, webhook_url) { msg.message?.imageMessage?.caption || msg.message?.videoMessage?.caption || ''; - // For now, only process messages that have text content - if (!body) continue; + const isAudio = !!msg.message?.audioMessage; + + // Only process messages that have text content OR are audio messages + if (!body && !isAudio) continue; + + let audioBase64 = null; + let audioMimeType = null; + + if (isAudio) { + try { + console.log(`[Baileys] Downloading audio message for ${remoteJid}`); + const buffer = await downloadMediaMessage( + msg, + 'buffer', + {}, + { + logger: pino({ level: 'silent' }), + rekey: true + } + ); + audioBase64 = buffer.toString('base64'); + audioMimeType = msg.message.audioMessage.mimetype || 'audio/ogg'; + } catch (e) { + console.error('[Baileys] Failed to download audio message:', e.message); + continue; // Skip if audio download fails to prevent empty processing + } + } // Extract sender phone number (handle LID privacy scheme) let senderPhone = ''; @@ -120,7 +145,11 @@ async function startSession(session_key, webhook_url) { const senderName = msg.pushName || ''; - console.log(`[Message] Received from ${senderPhone} (JID: ${remoteJid}): ${body}`); + if (isAudio) { + console.log(`[Message] Received audio voice note from ${senderPhone} (JID: ${remoteJid})`); + } else { + console.log(`[Message] Received from ${senderPhone} (JID: ${remoteJid}): ${body}`); + } await sendWebhook(webhook_url, { session_key, @@ -130,6 +159,8 @@ async function startSession(session_key, webhook_url) { phone: senderPhone, name: senderName, body: body, + audio: audioBase64, + mimeType: audioMimeType, timestamp: msg.messageTimestamp } });