mirror of
https://github.com/BigBodyCobain/Shadowbroker.git
synced 2026-04-23 19:16:06 +02:00
362a6e2ceb
Former-commit-id: 8ed321f2ba
113 lines
4.6 KiB
Python
113 lines
4.6 KiB
Python
import zipfile
|
|
import xml.etree.ElementTree as ET
|
|
import re
|
|
import csv
|
|
import os
|
|
|
|
xlsx_path = r"f:\Codebase\Oracle\live-risk-dashboard\TheAirTraffic Database.xlsx"
|
|
output_path = r"f:\Codebase\Oracle\live-risk-dashboard\backend\xlsx_analysis.txt"
|
|
|
|
def parse_xlsx_sheet(z, shared_strings, sheet_num):
|
|
ns = {'s': 'http://schemas.openxmlformats.org/spreadsheetml/2006/main'}
|
|
sheet_file = f'xl/worksheets/sheet{sheet_num}.xml'
|
|
if sheet_file not in z.namelist():
|
|
return []
|
|
ws_xml = z.read(sheet_file)
|
|
ws_root = ET.fromstring(ws_xml)
|
|
rows = []
|
|
for row in ws_root.findall('.//s:sheetData/s:row', ns):
|
|
cells = {}
|
|
for cell in row.findall('s:c', ns):
|
|
cell_ref = cell.get('r', '')
|
|
cell_type = cell.get('t', '')
|
|
val_elem = cell.find('s:v', ns)
|
|
val = val_elem.text if val_elem is not None else ''
|
|
if cell_type == 's' and val:
|
|
val = shared_strings[int(val)]
|
|
col = re.match(r'([A-Z]+)', cell_ref).group(1) if re.match(r'([A-Z]+)', cell_ref) else ''
|
|
cells[col] = val
|
|
rows.append(cells)
|
|
return rows
|
|
|
|
with open(output_path, 'w', encoding='utf-8') as out:
|
|
with zipfile.ZipFile(xlsx_path, 'r') as z:
|
|
shared_strings = []
|
|
if 'xl/sharedStrings.xml' in z.namelist():
|
|
ss_xml = z.read('xl/sharedStrings.xml')
|
|
root = ET.fromstring(ss_xml)
|
|
ns = {'s': 'http://schemas.openxmlformats.org/spreadsheetml/2006/main'}
|
|
for si in root.findall('.//s:si', ns):
|
|
texts = si.findall('.//s:t', ns)
|
|
val = ''.join(t.text or '' for t in texts)
|
|
shared_strings.append(val)
|
|
|
|
all_entries = []
|
|
for sheet_idx in range(1, 5):
|
|
rows = parse_xlsx_sheet(z, shared_strings, sheet_idx)
|
|
if not rows:
|
|
continue
|
|
|
|
out.write(f"\n=== SHEET {sheet_idx}: {len(rows)} rows ===\n")
|
|
# Print first 5 rows
|
|
for i in range(min(5, len(rows))):
|
|
for col in sorted(rows[i].keys(), key=lambda x: (len(x), x)):
|
|
val = rows[i][col]
|
|
if val:
|
|
out.write(f" Row{i} {col}: '{val[:80]}'\n")
|
|
out.write("\n")
|
|
|
|
for r in rows[1:]:
|
|
for col, val in r.items():
|
|
val = str(val).strip()
|
|
n_regs = re.findall(r'N\d{1,5}[A-Z]{0,2}', val)
|
|
owner = r.get('B', r.get('A', '')).strip()
|
|
aircraft_type = r.get('C', r.get('D', '')).strip()
|
|
for reg in n_regs:
|
|
all_entries.append({
|
|
'registration': reg.upper(),
|
|
'owner': owner,
|
|
'type': aircraft_type,
|
|
'sheet': sheet_idx
|
|
})
|
|
|
|
unique_regs = set(e['registration'] for e in all_entries)
|
|
out.write(f"\nTOTAL ENTRIES: {len(all_entries)}\n")
|
|
out.write(f"UNIQUE REGISTRATIONS: {len(unique_regs)}\n")
|
|
|
|
csv_path = r"f:\Codebase\Oracle\live-risk-dashboard\PLANEALERTLIST\plane-alert-db-main\plane-alert-db.csv"
|
|
existing = {}
|
|
with open(csv_path, 'r', encoding='utf-8') as f:
|
|
reader = csv.DictReader(f)
|
|
for row in reader:
|
|
icao = row.get('$ICAO', '').strip().upper()
|
|
reg = row.get('$Registration', '').strip().upper()
|
|
if reg:
|
|
existing[reg] = {
|
|
'icao': icao,
|
|
'category': row.get('Category', ''),
|
|
'operator': row.get('$Operator', ''),
|
|
}
|
|
|
|
already_in = unique_regs & set(existing.keys())
|
|
missing = unique_regs - set(existing.keys())
|
|
out.write(f"\nplane-alert-db: {len(existing)} registrations\n")
|
|
out.write(f"Already covered: {len(already_in)}\n")
|
|
out.write(f"MISSING: {len(missing)}\n")
|
|
|
|
out.write(f"\n--- ALREADY TRACKED ---\n")
|
|
seen = set()
|
|
for e in all_entries:
|
|
if e['registration'] in already_in and e['registration'] not in seen:
|
|
info = existing[e['registration']]
|
|
out.write(f" {e['owner'][:40]:40s} {e['registration']:10s} DB_CAT: {info['category'][:25]:25s} DB_OP: {info['operator'][:40]}\n")
|
|
seen.add(e['registration'])
|
|
|
|
out.write(f"\n--- MISSING (NEED TO ADD) ---\n")
|
|
seen = set()
|
|
for e in all_entries:
|
|
if e['registration'] in missing and e['registration'] not in seen:
|
|
out.write(f" {e['owner'][:40]:40s} {e['registration']:10s} TYPE: {e['type'][:30]}\n")
|
|
seen.add(e['registration'])
|
|
|
|
print(f"Analysis written to {output_path}")
|