/** * ════════════════════════════════════════════════════════════ * مُصادَق (Musadaq) — Gemini AI Extraction Service * ════════════════════════════════════════════════════════════ * يقوم باستخراج البيانات من صور/ملفات الفواتير باستخدام Gemini. * يضمن تحويل البيانات غير المهيكلة إلى JSON مطابق لمعايير UBL 2.1. * ════════════════════════════════════════════════════════════ */ import { Injectable, Logger, InternalServerErrorException } from '@nestjs/common'; import { ConfigService } from '@nestjs/config'; import { GoogleGenerativeAI } from '@google/generative-ai'; import * as fs from 'fs'; import * as path from 'path'; @Injectable() export class GeminiExtractorService { private readonly logger = new Logger(GeminiExtractorService.name); private genAI: GoogleGenerativeAI; private model: any; constructor(private configService: ConfigService) { const apiKey = this.configService.getOrThrow('GEMINI_API_KEY'); this.genAI = new GoogleGenerativeAI(apiKey); this.model = this.genAI.getGenerativeModel({ model: this.configService.get('GEMINI_MODEL', 'gemini-1.5-flash'), }); } /** * استخراج البيانات من صورة الفاتورة (يدعم فواتير متعددة في ملف واحد) */ async extractInvoiceData(filePath: string, storageRoot: string): Promise { try { const fullPath = path.join(storageRoot, filePath); const fileData = fs.readFileSync(fullPath); const prompt = ` You are a Jordanian tax expert. Extract all details from this file (image or PDF). The file may contain ONE or MULTIPLE distinct invoices. Extract EACH invoice separately. The output MUST be a strict JSON object with this schema: { "invoices": [ { "invoice_number": "string", "invoice_date": "YYYY-MM-DD", "invoice_type": "cash" | "credit", "invoice_category": "standard" | "simplified", "supplier_name": "string", "supplier_tin": "string (10 digits)", "buyer_name": "string (optional)", "buyer_tin": "string (optional)", "subtotal": number (before discount and tax), "discount_total": number (total discount), "tax_amount": number (total tax), "grand_total": number (final amount), "currency_code": "JOD", "lines": [ { "line_number": number, "description": "string", "quantity": number, "unit_price": number, "discount": number, "tax_rate": number (e.g. 0.16 for 16%), "line_total": number (quantity * unit_price - discount) } ] } ] } JoFotara Specific Rules: - invoice_category: "simplified" if the buyer is a regular person (no TIN), "standard" if B2B (buyer has TIN). - Prices in Jordan (JOD) often have 3 decimal places (e.g. 2.800). - Standard VAT Rate: 0.16 (16%) - The formula MUST hold: Subtotal - Discount + Tax = Grand Total. - If multiple invoices are in the PDF, ensure the "invoices" array contains all of them. Return ONLY the JSON. No markdown formatting. `; // Detect MIME type based on extension const ext = path.extname(filePath).toLowerCase(); let mimeType = 'image/jpeg'; if (ext === '.pdf') mimeType = 'application/pdf'; else if (ext === '.png') mimeType = 'image/png'; else if (ext === '.webp') mimeType = 'image/webp'; const result = await this.model.generateContent([ prompt, { inlineData: { data: fileData.toString('base64'), mimeType: mimeType, }, }, ]); const responseText = result.response.text(); // Clean up markdown if any const cleanedJson = responseText.replace(/```json|```/g, '').trim(); const data = JSON.parse(cleanedJson); return data.invoices || []; } catch (error: any) { this.logger.error(\`AI Extraction failed: \${error.message}\`); throw new InternalServerErrorException('AI Extraction failed'); } } }