Deploy: 2026-05-22 21:52:51

This commit is contained in:
Hamza-Ayed
2026-05-22 21:52:51 +03:00
parent 5269789b51
commit 8acca92bba
12 changed files with 1938 additions and 285 deletions

View File

@@ -155,6 +155,59 @@ class GeminiService
return $data['candidates'][0]['content']['parts'][0]['text'] ?? null;
}
/**
* Transcribe incoming audio voice note to text using gemini-2.0-flash-lite
*/
public static function transcribeAudio(string $apiKey, string $audioBase64, string $mimeType): ?string
{
$url = 'https://generativelanguage.googleapis.com/v1beta/models/gemini-2.0-flash-lite:generateContent?key=' . $apiKey;
// Clean mimeType if it contains codec info
if (strpos($mimeType, ';') !== false) {
$mimeType = trim(explode(';', $mimeType)[0]);
}
$payload = json_encode([
'contents' => [
[
'role' => 'user',
'parts' => [
[
'inlineData' => [
'mimeType' => $mimeType,
'data' => $audioBase64
]
],
[
'text' => "Transcribe the following audio message to Arabic text. Output only the transcription, no translation, no commentary, no markdown, and no code blocks."
]
]
]
]
]);
$ch = curl_init($url);
curl_setopt($ch, CURLOPT_RETURNTRANSFER, true);
curl_setopt($ch, CURLOPT_POST, true);
curl_setopt($ch, CURLOPT_POSTFIELDS, $payload);
curl_setopt($ch, CURLOPT_HTTPHEADER, [
'Content-Type: application/json'
]);
curl_setopt($ch, CURLOPT_TIMEOUT, 30);
$response = curl_exec($ch);
$httpCode = curl_getinfo($ch, CURLINFO_HTTP_CODE);
curl_close($ch);
if ($httpCode !== 200) {
error_log("[Gemini Audio Transcription Error] HTTP " . $httpCode . " | Response: " . $response);
return null;
}
$data = json_decode($response, true);
return trim($data['candidates'][0]['content']['parts'][0]['text'] ?? '');
}
/**
* Call Gemini API with image inline data and system instruction to generate a response text
*/
@@ -213,6 +266,64 @@ class GeminiService
return $data['candidates'][0]['content']['parts'][0]['text'] ?? null;
}
/**
* Call Gemini API with image inline data and custom prompt to extract structured OCR data
*/
public static function generateOcrFromImage(string $apiKey, string $prompt, string $imageBase64, string $mimeType): ?string
{
$url = 'https://generativelanguage.googleapis.com/v1beta/models/gemini-2.0-flash-lite:generateContent?key=' . $apiKey;
// Clean mimeType if it contains codec info
if (strpos($mimeType, ';') !== false) {
$mimeType = trim(explode(';', $mimeType)[0]);
}
$payload = json_encode([
'contents' => [
[
'role' => 'user',
'parts' => [
[
'text' => $prompt
],
[
'inlineData' => [
'mimeType' => $mimeType,
'data' => $imageBase64
]
]
]
]
]
]);
$ch = curl_init($url);
curl_setopt($ch, CURLOPT_RETURNTRANSFER, true);
curl_setopt($ch, CURLOPT_POST, true);
curl_setopt($ch, CURLOPT_POSTFIELDS, $payload);
curl_setopt($ch, CURLOPT_HTTPHEADER, [
'Content-Type: application/json'
]);
curl_setopt($ch, CURLOPT_TIMEOUT, 35); // 35 seconds timeout for image analysis
$response = curl_exec($ch);
$httpCode = curl_getinfo($ch, CURLINFO_HTTP_CODE);
curl_close($ch);
if ($httpCode !== 200) {
error_log("[Gemini OCR Image Response Error] HTTP " . $httpCode . " | Response: " . $response);
return null;
}
$data = json_decode($response, true);
$textRaw = $data['candidates'][0]['content']['parts'][0]['text'] ?? null;
if ($textRaw) {
// Clean markdown block if present
$textRaw = trim(preg_replace('/```json|```/', '', $textRaw));
}
return $textRaw;
}
/**
* Call ElevenLabs API to generate a native audio response from text
*/