Files
musadaq-saas/scripts/generate_jsonl.php
2026-05-09 17:09:49 +03:00

59 lines
1.7 KiB
PHP

<?php
/**
* JSONL Generator for Gemini Batch API
* This script scans a directory of images and generates a .jsonl file
* ready to be uploaded for Gemini Batch Processing.
*/
require_once __DIR__ . '/../app/bootstrap/init.php';
$sourceDir = __DIR__ . '/../storage/batch_input';
$outputFile = __DIR__ . '/../storage/batch_requests.jsonl';
$model = \App\Core\AIConfig::MODEL_NAME;
$prompt = \App\Core\AIConfig::getExtractionPrompt();
if (!is_dir($sourceDir)) {
mkdir($sourceDir, 0755, true);
die("Please put your invoice images in: $sourceDir and run again.\n");
}
$files = glob($sourceDir . '/*.{jpg,jpeg,png,pdf}', GLOB_BRACE);
$handle = fopen($outputFile, 'w');
echo "Generating JSONL for " . count($files) . " files...\n";
foreach ($files as $index => $filePath) {
$mimeType = mime_content_type($filePath);
$base64Data = base64_encode(file_get_contents($filePath));
// Build the request object for this line
$request = [
"custom_id" => "inv_" . ($index + 1),
"method" => "POST",
"url" => "/v1/models/$model:generateContent",
"body" => [
"contents" => [
[
"parts" => [
["text" => $prompt],
[
"inline_data" => [
"mime_type" => $mimeType,
"data" => $base64Data
]
]
]
]
],
"generationConfig" => [
"response_mime_type" => "application/json"
]
]
];
fwrite($handle, json_encode($request) . "\n");
}
fclose($handle);
echo "Done! File saved to: $outputFile\n";