Update: 2026-06-13 00:57:17
This commit is contained in:
153
scratch/align_and_generate_driver_translations.py
Normal file
153
scratch/align_and_generate_driver_translations.py
Normal file
@@ -0,0 +1,153 @@
|
||||
import json
|
||||
import re
|
||||
import os
|
||||
|
||||
def parse_dart_map(filepath):
|
||||
translations = {}
|
||||
if not os.path.exists(filepath):
|
||||
return translations
|
||||
with open(filepath, 'r', encoding='utf-8') as f:
|
||||
content = f.read()
|
||||
|
||||
# Matches maps of form: "key": "value" or 'key': 'value'
|
||||
# Handles escapes and multi-line content matched on single lines
|
||||
pattern = re.compile(r'^\s*([\'"])(.*?)\1\s*:\s*([\'"])(.*?)\3\s*,?\s*$', re.MULTILINE)
|
||||
matches = pattern.findall(content)
|
||||
for match in matches:
|
||||
key = match[1]
|
||||
val = match[3]
|
||||
# Unescape simple sequences (single and double quotes)
|
||||
key = key.replace('\\"', '"').replace("\\'", "'")
|
||||
val = val.replace('\\"', '"').replace("\\'", "'")
|
||||
translations[key] = val
|
||||
|
||||
return translations
|
||||
|
||||
def to_dart_string(s):
|
||||
# Escape characters to be safe inside double quotes in Dart source code
|
||||
result = []
|
||||
for char in s:
|
||||
if char == '\\':
|
||||
result.append('\\\\')
|
||||
elif char == '"':
|
||||
result.append('\\"')
|
||||
elif char == '$':
|
||||
result.append('\\$')
|
||||
elif char == '\n':
|
||||
result.append('\\n')
|
||||
elif char == '\r':
|
||||
result.append('\\r')
|
||||
elif char == '\t':
|
||||
result.append('\\t')
|
||||
else:
|
||||
result.append(char)
|
||||
return "".join(result)
|
||||
|
||||
# Load current Arabic files
|
||||
ar_eg = parse_dart_map("siro_driver/lib/controller/local/ar_eg.dart")
|
||||
ar_jo = parse_dart_map("siro_driver/lib/controller/local/ar_jo.dart")
|
||||
ar_sy = parse_dart_map("siro_driver/lib/controller/local/ar_sy.dart")
|
||||
|
||||
# Load JSON data
|
||||
with open("siro_driver_translations_data.json", "r", encoding="utf-8") as f:
|
||||
json_data = json.load(f)
|
||||
|
||||
existing_syrian = json_data.get('existing_syrian', {})
|
||||
missing_keys = json_data.get('missing_keys', [])
|
||||
|
||||
# Load legacy non-Arabic languages
|
||||
with open("scratch/legacy_extracted_languages.json", "r", encoding="utf-8") as f:
|
||||
legacy_extracted = json.load(f)
|
||||
|
||||
# Compute master keys
|
||||
master_keys = set(ar_sy.keys())
|
||||
master_keys.update(ar_jo.keys())
|
||||
master_keys.update(ar_eg.keys())
|
||||
master_keys.update(existing_syrian.keys())
|
||||
master_keys.update(missing_keys)
|
||||
|
||||
master_keys = sorted(list(master_keys))
|
||||
print(f"Master keys count: {len(master_keys)}")
|
||||
|
||||
# Let's build translation maps
|
||||
aligned_maps = {
|
||||
'ar-EG': {},
|
||||
'ar-JO': {},
|
||||
'ar-SY': {},
|
||||
'en': {},
|
||||
'de': {},
|
||||
'el': {},
|
||||
'es': {},
|
||||
'fa': {},
|
||||
'fr': {},
|
||||
'hi': {},
|
||||
'it': {},
|
||||
'ru': {},
|
||||
'tr': {},
|
||||
'ur': {},
|
||||
'zh': {}
|
||||
}
|
||||
|
||||
for key in master_keys:
|
||||
# 1. English is always the key itself
|
||||
aligned_maps['en'][key] = key
|
||||
|
||||
# 2. Syrian Arabic (ar-SY)
|
||||
val_sy = ar_sy.get(key)
|
||||
if not val_sy:
|
||||
val_sy = existing_syrian.get(key)
|
||||
if not val_sy:
|
||||
val_sy = ar_jo.get(key)
|
||||
if not val_sy:
|
||||
val_sy = ar_eg.get(key)
|
||||
if not val_sy:
|
||||
val_sy = key # fallback to key
|
||||
aligned_maps['ar-SY'][key] = val_sy
|
||||
|
||||
# 3. Jordanian Arabic (ar-JO)
|
||||
val_jo = ar_jo.get(key)
|
||||
if not val_jo:
|
||||
val_jo = val_sy # fallback to Syrian
|
||||
aligned_maps['ar-JO'][key] = val_jo
|
||||
|
||||
# 4. Egyptian Arabic (ar-EG)
|
||||
val_eg = ar_eg.get(key)
|
||||
if not val_eg:
|
||||
val_eg = val_jo # fallback to Jordanian/Syrian
|
||||
aligned_maps['ar-EG'][key] = val_eg
|
||||
|
||||
# 5. Non-Arabic languages
|
||||
for lang in ['de', 'el', 'es', 'fa', 'fr', 'hi', 'it', 'ru', 'tr', 'ur', 'zh']:
|
||||
val_lang = legacy_extracted.get(lang, {}).get(key)
|
||||
if not val_lang:
|
||||
val_lang = key # fallback to English
|
||||
aligned_maps[lang][key] = val_lang
|
||||
|
||||
# Directories and file writes
|
||||
output_dir = "siro_driver/lib/controller/local"
|
||||
|
||||
def write_dart_file(filename, map_name, data_map):
|
||||
filepath = os.path.join(output_dir, filename)
|
||||
with open(filepath, 'w', encoding='utf-8') as f:
|
||||
# Write final Map<String, String> map_name = { ... };
|
||||
f.write(f"final Map<String, String> {map_name} = {{\n")
|
||||
for k, v in data_map.items():
|
||||
k_escaped = to_dart_string(k)
|
||||
v_escaped = to_dart_string(v)
|
||||
f.write(f' "{k_escaped}": "{v_escaped}",\n')
|
||||
f.write("};\n")
|
||||
print(f"Wrote {filepath} with {len(data_map)} keys.")
|
||||
|
||||
# Write Arabic dialects
|
||||
write_dart_file("ar_eg.dart", "ar_eg", aligned_maps['ar-EG'])
|
||||
write_dart_file("ar_jo.dart", "ar_jo", aligned_maps['ar-JO'])
|
||||
write_dart_file("ar_sy.dart", "ar_sy", aligned_maps['ar-SY'])
|
||||
|
||||
# Write English
|
||||
write_dart_file("en.dart", "en", aligned_maps['en'])
|
||||
|
||||
# Write non-Arabic languages
|
||||
for lang in ['de', 'el', 'es', 'fa', 'fr', 'hi', 'it', 'ru', 'tr', 'ur', 'zh']:
|
||||
write_dart_file(f"{lang}.dart", lang, aligned_maps[lang])
|
||||
|
||||
print("Alignment and file writing complete.")
|
||||
17
scratch/analyze_driver_translations.py
Normal file
17
scratch/analyze_driver_translations.py
Normal file
@@ -0,0 +1,17 @@
|
||||
import re
|
||||
|
||||
def parse_translations_file(filepath):
|
||||
with open(filepath, 'r', encoding='utf-8') as f:
|
||||
content = f.read()
|
||||
|
||||
# Let's find language maps like: "en": { ... }, "fr": { ... }
|
||||
# Look for patterns like "lang": { or 'lang': {
|
||||
pattern = re.compile(r'[\'"]([a-zA-Z\-]+)[\'"]\s*:\s*\{')
|
||||
matches = pattern.findall(content)
|
||||
return matches
|
||||
|
||||
print("Legacy translations.dart languages:")
|
||||
print(parse_translations_file("scratch/legacy_translations.dart"))
|
||||
|
||||
print("\nLegacy driver_translations.dart languages:")
|
||||
print(parse_translations_file("scratch/legacy_driver_translations.dart"))
|
||||
60
scratch/extract_legacy_languages.py
Normal file
60
scratch/extract_legacy_languages.py
Normal file
@@ -0,0 +1,60 @@
|
||||
import re
|
||||
import json
|
||||
|
||||
def extract_language_maps(filepath):
|
||||
with open(filepath, 'r', encoding='utf-8') as f:
|
||||
content = f.read()
|
||||
|
||||
# Let's find language blocks like: "en": { ... }, "fr": { ... }
|
||||
# To parse these blocks, we find the starting position of each language identifier
|
||||
# and scan until the matching closing brace.
|
||||
languages = ['tr', 'fr', 'de', 'es', 'fa', 'el', 'ur', 'hi', 'ru', 'it', 'zh']
|
||||
extracted = {}
|
||||
|
||||
for lang in languages:
|
||||
# Regex to find: "lang": { or 'lang': {
|
||||
pattern = re.compile(r'[\'"]' + lang + r'[\'"]\s*:\s*\{')
|
||||
match = pattern.search(content)
|
||||
if not match:
|
||||
print(f"Language {lang} map start not found!")
|
||||
continue
|
||||
|
||||
start_idx = match.end()
|
||||
# Find matching closing brace
|
||||
brace_count = 1
|
||||
current_idx = start_idx
|
||||
while brace_count > 0 and current_idx < len(content):
|
||||
char = content[current_idx]
|
||||
if char == '{':
|
||||
brace_count += 1
|
||||
elif char == '}':
|
||||
brace_count -= 1
|
||||
current_idx += 1
|
||||
|
||||
block = content[start_idx:current_idx-1]
|
||||
|
||||
# Now parse the key-value pairs inside the block
|
||||
# Example: "key": "value", or 'key': 'value',
|
||||
# Handle escaped quotes.
|
||||
# Pattern: (['"])(.*?)\1\s*:\s*(['"])(.*?)\3\s*(?:,|$)
|
||||
kv_pattern = re.compile(r'^\s*([\'"])(.*?)\1\s*:\s*([\'"])(.*?)\3\s*,?\s*$', re.MULTILINE)
|
||||
kv_matches = kv_pattern.findall(block)
|
||||
|
||||
lang_map = {}
|
||||
for kv in kv_matches:
|
||||
key = kv[1]
|
||||
val = kv[3]
|
||||
# Unescape quotes
|
||||
key = key.replace('\\"', '"').replace("\\'", "'")
|
||||
val = val.replace('\\"', '"').replace("\\'", "'")
|
||||
lang_map[key] = val
|
||||
|
||||
extracted[lang] = lang_map
|
||||
print(f"Extracted {lang}: {len(lang_map)} keys")
|
||||
|
||||
return extracted
|
||||
|
||||
extracted = extract_language_maps("scratch/legacy_translations.dart")
|
||||
with open("scratch/legacy_extracted_languages.json", "w", encoding="utf-8") as f:
|
||||
json.dump(extracted, f, ensure_ascii=False, indent=2)
|
||||
print("Saved legacy extracted languages to JSON.")
|
||||
42
scratch/inspect_keys.py
Normal file
42
scratch/inspect_keys.py
Normal file
@@ -0,0 +1,42 @@
|
||||
import json
|
||||
import re
|
||||
import os
|
||||
|
||||
def parse_dart_map(filepath):
|
||||
translations = {}
|
||||
if not os.path.exists(filepath):
|
||||
return translations
|
||||
with open(filepath, 'r', encoding='utf-8') as f:
|
||||
content = f.read()
|
||||
|
||||
pattern = re.compile(r'^\s*([\'"])(.*?)\1\s*:\s*([\'"])(.*?)\3\s*,?\s*$', re.MULTILINE)
|
||||
matches = pattern.findall(content)
|
||||
for match in matches:
|
||||
key = match[1]
|
||||
val = match[3]
|
||||
key = key.replace('\\"', '"').replace("\\'", "'")
|
||||
val = val.replace('\\"', '"').replace("\\'", "'")
|
||||
translations[key] = val
|
||||
|
||||
return translations
|
||||
|
||||
ar_eg = parse_dart_map("siro_driver/lib/controller/local/ar_eg.dart")
|
||||
ar_jo = parse_dart_map("siro_driver/lib/controller/local/ar_jo.dart")
|
||||
ar_sy = parse_dart_map("siro_driver/lib/controller/local/ar_sy.dart")
|
||||
|
||||
with open("siro_driver_translations_data.json", "r", encoding="utf-8") as f:
|
||||
json_data = json.load(f)
|
||||
|
||||
missing_keys_list = json_data.get('missing_keys', [])
|
||||
|
||||
ar_sy_keys = set(ar_sy.keys())
|
||||
ar_jo_keys = set(ar_jo.keys())
|
||||
ar_eg_keys = set(ar_eg.keys())
|
||||
missing_keys_set = set(missing_keys_list)
|
||||
|
||||
all_keys = ar_sy_keys.union(ar_jo_keys).union(ar_eg_keys).union(missing_keys_set)
|
||||
print(f"Total unique keys in union: {len(all_keys)}")
|
||||
|
||||
print(f"Keys in ar_eg not in ar_sy: {ar_eg_keys - ar_sy_keys}")
|
||||
print(f"Keys in ar_sy not in ar_jo: {ar_sy_keys - ar_jo_keys}")
|
||||
print(f"Keys in ar_jo not in ar_sy: {ar_jo_keys - ar_sy_keys}")
|
||||
14072
scratch/legacy_extracted_languages.json
Normal file
14072
scratch/legacy_extracted_languages.json
Normal file
File diff suppressed because it is too large
Load Diff
1
scratch/test_simple.py
Normal file
1
scratch/test_simple.py
Normal file
@@ -0,0 +1 @@
|
||||
print("Hello from python script")
|
||||
81
scratch/verify_driver_translations.py
Normal file
81
scratch/verify_driver_translations.py
Normal file
@@ -0,0 +1,81 @@
|
||||
import os
|
||||
import re
|
||||
|
||||
languages = ['ar_eg', 'ar_jo', 'ar_sy', 'en', 'de', 'el', 'es', 'fa', 'fr', 'hi', 'it', 'ru', 'tr', 'ur', 'zh']
|
||||
local_dir = "siro_driver/lib/controller/local"
|
||||
|
||||
def parse_dart_map(filepath):
|
||||
translations = {}
|
||||
if not os.path.exists(filepath):
|
||||
print(f"Error: file {filepath} does not exist!")
|
||||
return None
|
||||
with open(filepath, 'r', encoding='utf-8') as f:
|
||||
content = f.read()
|
||||
|
||||
# Matches: "key": "value"
|
||||
pattern = re.compile(r'^\s*([\'"])(.*?)\1\s*:\s*([\'"])(.*?)\3\s*,?\s*$', re.MULTILINE)
|
||||
matches = pattern.findall(content)
|
||||
for match in matches:
|
||||
key = match[1]
|
||||
val = match[3]
|
||||
# Store raw keys and values to check escaping
|
||||
translations[key] = val
|
||||
return translations
|
||||
|
||||
parsed_data = {}
|
||||
all_valid = True
|
||||
|
||||
for lang in languages:
|
||||
filepath = os.path.join(local_dir, f"{lang}.dart")
|
||||
trans = parse_dart_map(filepath)
|
||||
if trans is None:
|
||||
all_valid = False
|
||||
continue
|
||||
parsed_data[lang] = trans
|
||||
print(f"Verified {lang}.dart: parsed {len(trans)} entries.")
|
||||
if len(trans) != 2660:
|
||||
print(f" ERROR: Expected 2660 entries, got {len(trans)}")
|
||||
all_valid = False
|
||||
|
||||
if not all_valid:
|
||||
print("Verification FAILED on basic counts.")
|
||||
exit(1)
|
||||
|
||||
# Check that key sets are identical
|
||||
ref_lang = languages[0]
|
||||
ref_keys = set(parsed_data[ref_lang].keys())
|
||||
|
||||
for lang in languages[1:]:
|
||||
keys = set(parsed_data[lang].keys())
|
||||
if keys != ref_keys:
|
||||
print(f"ERROR: Key sets differ between {ref_lang} and {lang}!")
|
||||
print(f" Keys in {ref_lang} not in {lang}: {len(ref_keys - keys)}")
|
||||
print(f" Keys in {lang} not in {ref_lang}: {len(keys - ref_keys)}")
|
||||
all_valid = False
|
||||
|
||||
# Check for escaping errors (e.g. unescaped dollar signs in double-quoted strings in the Dart source code)
|
||||
# In the raw file content, any dollar sign must be preceded by a backslash unless it is already escaped.
|
||||
# Let's inspect the files directly for raw '$' characters that are not preceded by '\'
|
||||
dollar_pattern = re.compile(r'(?<!\\)\$')
|
||||
|
||||
for lang in languages:
|
||||
filepath = os.path.join(local_dir, f"{lang}.dart")
|
||||
with open(filepath, 'r', encoding='utf-8') as f:
|
||||
lines = f.readlines()
|
||||
for idx, line in enumerate(lines):
|
||||
# We search inside map entries
|
||||
if ":" in line:
|
||||
# check for unescaped dollar signs
|
||||
# A dollar sign not preceded by a backslash is a compilation error in Dart double-quoted strings
|
||||
# unless it's single quotes or some specific construct, but we used double quotes for all lines.
|
||||
unescaped_dollars = dollar_pattern.findall(line)
|
||||
if unescaped_dollars:
|
||||
print(f"ERROR: Unescaped dollar sign in {filepath} line {idx+1}:")
|
||||
print(f" {line.strip()}")
|
||||
all_valid = False
|
||||
|
||||
if all_valid:
|
||||
print("\nSUCCESS: All files verified! They have identical keys (2660 keys) and correct escaping.")
|
||||
else:
|
||||
print("\nVerification FAILED.")
|
||||
exit(1)
|
||||
Reference in New Issue
Block a user