mirror of
https://github.com/BigBodyCobain/Shadowbroker.git
synced 2026-06-25 15:30:07 +02:00
a0c79c2044
Import oligarchs, royals, and curated celebrities from plane-alert-db while excluding joke tag labels from tracked_names. Sync plane_alert_db.json metadata, add import scripts, and map oligarch/royal/celebrity colors in the legend. Co-authored-by: Cursor <cursoragent@cursor.com>
224 lines
8.2 KiB
Python
224 lines
8.2 KiB
Python
#!/usr/bin/env python3
|
|
"""Extract plane-alert-db entries missing from tracked_names.json."""
|
|
from __future__ import annotations
|
|
|
|
import csv
|
|
import json
|
|
import re
|
|
from pathlib import Path
|
|
|
|
ROOT = Path(__file__).resolve().parents[1]
|
|
SB = ROOT / "backend" / "data" / "tracked_names.json"
|
|
PAD = Path.home() / "Downloads" / "plane-alert-db-main" / "plane-alert-db-main"
|
|
|
|
# Categories to import into tracked_names
|
|
IMPORT_CATS = {
|
|
"Don't you know who I am?",
|
|
"Oligarch",
|
|
"Royal Aircraft",
|
|
"Football",
|
|
"Head of State",
|
|
"Dictator Alert",
|
|
}
|
|
|
|
# As Seen on TV / Bizjets only when operator looks like a person (heuristic)
|
|
PERSON_CATS = {"As Seen on TV", "Bizjets", "Vanity Plate"}
|
|
|
|
# Skip obvious corps / generic operators
|
|
CORP_RE = re.compile(
|
|
r"\b(inc|llc|ltd|corp|company|co\.|group|holdings|university|air force|"
|
|
r"airlines|aviation|services|systems|international|global|partners|"
|
|
r"foundation|bank|pharma|laboratories|transportation|motors|enterprises)\b",
|
|
re.I,
|
|
)
|
|
|
|
CELEB_HINTS = re.compile(
|
|
r"\b(actor|actress|singer|rapper|musician|celebrity|nfl|nba|f1|formula|"
|
|
r"royal|prince|princess|king|queen|duke|sheik|sultan|oligarch|billionaire|"
|
|
r"mogul|tycoon|founder|ceo|president|senator|governor|judge|athlete|"
|
|
r"footballer|golfer|tennis|director|producer|host|comedian|model|"
|
|
r"influencer|youtuber|podcast|chef|author|writer|artist|designer)\b",
|
|
re.I,
|
|
)
|
|
|
|
KNOWN_PERSON_NAMES = {
|
|
"elon musk", "jay z", "jay-z", "kanye", "west", "kim kardashian", "taylor swift",
|
|
"beyonce", "drake", "rihanna", "oprah", "gates", "bezos", "zuckerberg",
|
|
"buffett", "dalio", "icahn", "ackman", "soros", "thiel", "musk", "cruise",
|
|
"dicaprio", "pitt", "jolie", "clooney", "hanks", "spielberg", "lucas",
|
|
"branson", "trump", "biden", "obama", "clinton", "bush", "romney",
|
|
"ramaswamy", "benioff", "blavatnik", "abramovich", "abramov", "potanin",
|
|
"fridman", "deripaska", "kerimov", "tinkov", "mordashov", "rybolovlev",
|
|
"lisin", "vekselberg", "medvedchuk", "alekperov", "mikhelson", "diddy",
|
|
"combs", "sean combs", "ronaldo", "messi", "mbappe", "beckham", "jordan",
|
|
"lebron", "brady", "mahomes", "kroenke", "kraft", "jones", "snyder",
|
|
"sheindlin", "judge judy", "elton john", "moss", "ambani", "adani",
|
|
"lowry", "ecclestone", "hamilton", "verstappen", "schumacher", "woods",
|
|
"nicklaus", "federer", "nadal", "djokovic", "osaka", "williams", "serena",
|
|
"venus", "sharapova", "mcgregor", "mayweather", "paul", "logan paul",
|
|
"jake paul", "mrbeast", "pewdiepie", "charlie munger", "larry ellison",
|
|
"michael dell", "tim cook", "satya nadella", "sundar pichai", "jensen huang",
|
|
"gisele", "tom brady", "gwyneth", "howard stern", "howard marks",
|
|
"steven cohen", "ken griffin", "david tepper", "ray dalio", "peter thiel",
|
|
"paul allen", "steve ballmer", "mark cuban", "richard branson", "larry page",
|
|
"sergey brin", "eric schmidt", "reid hoffman", "marc andreessen",
|
|
"chamath", "naval", "andretti", "penske", "hendrick", "rick hendrick",
|
|
}
|
|
|
|
|
|
def norm_reg(s: str) -> str:
|
|
return (s or "").strip().upper()
|
|
|
|
|
|
def norm_name(s: str) -> str:
|
|
return re.sub(r"\s+", " ", (s or "").strip())
|
|
|
|
|
|
def looks_like_person(operator: str, tag1: str, tag2: str, tag3: str) -> bool:
|
|
blob = " ".join([operator, tag1, tag2, tag3]).strip()
|
|
if not blob or len(blob) < 3:
|
|
return False
|
|
low = blob.lower()
|
|
if CORP_RE.search(low) and not any(h in low for h in KNOWN_PERSON_NAMES):
|
|
# allow "Falcon Landing LLC" when tag says Elon Musk
|
|
if not any(h in low for h in KNOWN_PERSON_NAMES):
|
|
return False
|
|
if any(h in low for h in KNOWN_PERSON_NAMES):
|
|
return True
|
|
if CELEB_HINTS.search(low):
|
|
return True
|
|
# Two+ capitalized words, no corp suffix — weak person signal
|
|
words = operator.split()
|
|
if 2 <= len(words) <= 4 and operator == operator.title() and not CORP_RE.search(low):
|
|
return True
|
|
return False
|
|
|
|
|
|
def sb_category_for(cat: str, operator: str) -> str:
|
|
low = operator.lower()
|
|
if cat in {"Oligarch", "Dictator Alert"}:
|
|
return "Oligarch"
|
|
if cat == "Royal Aircraft" or "royal" in low:
|
|
return "Royal"
|
|
if cat == "Football":
|
|
return "Sports"
|
|
if cat in {"Head of State"}:
|
|
return "Government"
|
|
if any(x in low for x in ("nfl", "nba", "mlb", "football", "basketball", "soccer", "f1", "formula")):
|
|
return "Sports"
|
|
return "Celebrity"
|
|
|
|
|
|
def row_get(row: dict[str, str], *keys: str) -> str:
|
|
for k in keys:
|
|
if row.get(k):
|
|
return str(row[k]).strip()
|
|
return ""
|
|
|
|
|
|
def main() -> None:
|
|
with SB.open(encoding="utf-8") as f:
|
|
sb = json.load(f)
|
|
|
|
sb_regs: set[str] = set()
|
|
sb_names: dict[str, str] = {}
|
|
for name, info in sb.get("details", {}).items():
|
|
for reg in info.get("registrations", []):
|
|
r = norm_reg(reg)
|
|
if r:
|
|
sb_regs.add(r)
|
|
sb_names[r] = name
|
|
|
|
additions: dict[str, dict] = {}
|
|
merge: dict[str, list[str]] = {}
|
|
|
|
csv_paths = [
|
|
PAD / "plane-alert-db.csv",
|
|
PAD / "plane-alert-civ.csv",
|
|
PAD / "plane-alert-gov.csv",
|
|
PAD / "plane-alert-mil.csv",
|
|
]
|
|
|
|
seen: set[tuple[str, str]] = set()
|
|
person_hits = 0
|
|
|
|
for path in csv_paths:
|
|
if not path.exists():
|
|
continue
|
|
with path.open(encoding="utf-8", errors="replace") as f:
|
|
for row in csv.DictReader(f):
|
|
cat = row_get(row, "Category")
|
|
reg = norm_reg(row_get(row, "$Registration", "Registration"))
|
|
op = norm_reg(row_get(row, "$Operator", "Operator"))
|
|
op_display = norm_name(row_get(row, "$Operator", "Operator"))
|
|
tag1 = row_get(row, "$Tag 1", "Tag 1")
|
|
tag2 = row_get(row, "#Tag 2", "$#Tag 2")
|
|
tag3 = row_get(row, "#Tag 3", "$#Tag 3")
|
|
|
|
if not reg:
|
|
continue
|
|
if (reg, cat) in seen:
|
|
continue
|
|
seen.add((reg, cat))
|
|
|
|
include = cat in IMPORT_CATS
|
|
if not include and cat in PERSON_CATS:
|
|
if looks_like_person(op_display, tag1, tag2, tag3):
|
|
include = True
|
|
person_hits += 1
|
|
|
|
if not include:
|
|
continue
|
|
if reg in sb_regs:
|
|
continue
|
|
|
|
# Prefer tag person name over shell company
|
|
display = op_display
|
|
for tag in (tag1, tag2, tag3):
|
|
if tag and any(h in tag.lower() for h in KNOWN_PERSON_NAMES):
|
|
display = tag
|
|
break
|
|
if tag and len(tag.split()) <= 4 and tag[0].isupper() and "llc" not in tag.lower():
|
|
if cat == "Don't you know who I am?" and tag not in {"Bizjet", "Pusher Prop"}:
|
|
display = tag
|
|
|
|
key = display
|
|
if key in sb.get("details", {}):
|
|
merge.setdefault(key, []).append(reg)
|
|
else:
|
|
entry = additions.setdefault(
|
|
key,
|
|
{"category": sb_category_for(cat, display), "registrations": []},
|
|
)
|
|
if reg not in entry["registrations"]:
|
|
entry["registrations"].append(reg)
|
|
|
|
print(f"New named entries: {len(additions)}")
|
|
print(f"Merge into existing: {len(merge)}")
|
|
print(f"Person-heuristic hits (ASTV/Bizjets): {person_hits}")
|
|
print()
|
|
|
|
by_cat: dict[str, list[tuple[str, list[str]]]] = {}
|
|
for name, info in sorted(additions.items()):
|
|
by_cat.setdefault(info["category"], []).append((name, info["registrations"]))
|
|
|
|
for cat in sorted(by_cat):
|
|
items = by_cat[cat]
|
|
print(f"## {cat} ({len(items)})")
|
|
for name, regs in items[:40]:
|
|
print(f" {name}: {', '.join(regs)}")
|
|
if len(items) > 40:
|
|
print(f" ... +{len(items)-40} more")
|
|
print()
|
|
|
|
out = ROOT / "scripts" / "plane_alert_additions.json"
|
|
out.write_text(
|
|
json.dumps({"additions": additions, "merge": merge}, indent=2),
|
|
encoding="utf-8",
|
|
)
|
|
print(f"Wrote {out}")
|
|
|
|
|
|
if __name__ == "__main__":
|
|
main()
|