mirror of
https://github.com/PlaneQuery/OpenAirframes.git
synced 2026-06-08 22:23:56 +02:00
moidfy src/get_historical_faa.py
This commit is contained in:
+29
-17
@@ -9,33 +9,44 @@ import subprocess, re
|
|||||||
from pathlib import Path
|
from pathlib import Path
|
||||||
from collections import OrderedDict
|
from collections import OrderedDict
|
||||||
|
|
||||||
def run(*args: str) -> str:
|
REPO = "/Users/jonahgoode/Documents/PlaneQuery/Other-Code/scrape-faa-releasable-aircraft"
|
||||||
return subprocess.check_output(args, text=True).strip()
|
OUT_ROOT = Path("data/faa_releasable_historical")
|
||||||
|
OUT_ROOT.mkdir(parents=True, exist_ok=True)
|
||||||
|
|
||||||
# Get commits that touched any MASTER-*.txt, oldest -> newest
|
def run_git(*args: str, text: bool = True) -> str:
|
||||||
log = run("git", "log", "--reverse", "--format=%H %cs", "--", ".")
|
return subprocess.check_output(
|
||||||
# If you want to restrict to only commits that touched the master parts, use:
|
["git", "-C", REPO, *args],
|
||||||
# log = run("git", "log", "--reverse", "--format=%H %cs", "--", "MASTER-1.txt")
|
text=text
|
||||||
|
).strip()
|
||||||
|
|
||||||
|
# Commits (oldest -> newest), restricted to master parts
|
||||||
|
log = run_git(
|
||||||
|
"log",
|
||||||
|
"--reverse",
|
||||||
|
"--format=%H %cs",
|
||||||
|
"--",
|
||||||
|
"MASTER-1.txt"
|
||||||
|
)
|
||||||
|
|
||||||
lines = [ln for ln in log.splitlines() if ln.strip()]
|
lines = [ln for ln in log.splitlines() if ln.strip()]
|
||||||
if not lines:
|
if not lines:
|
||||||
raise SystemExit("No commits found.")
|
raise SystemExit("No commits found.")
|
||||||
|
|
||||||
# Map date -> last commit SHA on that date (Ordered by history)
|
# Map date -> last commit SHA on that date (only Feb 2024)
|
||||||
date_to_sha = OrderedDict()
|
date_to_sha = OrderedDict()
|
||||||
for ln in lines:
|
for ln in lines:
|
||||||
sha, date = ln.split()
|
sha, date = ln.split()
|
||||||
# keep last SHA per day
|
if date.startswith("2024-02"):
|
||||||
date_to_sha[date] = sha
|
date_to_sha[date] = sha
|
||||||
|
|
||||||
out_root = Path("out_master_by_day")
|
if not date_to_sha:
|
||||||
out_root.mkdir(exist_ok=True)
|
raise SystemExit("No February 2024 commit-days found.")
|
||||||
|
|
||||||
master_re = re.compile(r"^MASTER-(\d+)\.txt$")
|
master_re = re.compile(r"^MASTER-(\d+)\.txt$")
|
||||||
|
|
||||||
for date, sha in date_to_sha.items():
|
for date, sha in date_to_sha.items():
|
||||||
# list files at this commit, filter MASTER-*.txt in repo root
|
names = run_git("ls-tree", "--name-only", sha).splitlines()
|
||||||
names = run("git", "ls-tree", "--name-only", sha).splitlines()
|
|
||||||
parts = []
|
parts = []
|
||||||
for n in names:
|
for n in names:
|
||||||
m = master_re.match(n)
|
m = master_re.match(n)
|
||||||
@@ -44,20 +55,21 @@ for date, sha in date_to_sha.items():
|
|||||||
parts.sort()
|
parts.sort()
|
||||||
|
|
||||||
if not parts:
|
if not parts:
|
||||||
# no master parts in that commit/day; skip
|
|
||||||
continue
|
continue
|
||||||
|
|
||||||
day_dir = out_root / date
|
day_dir = OUT_ROOT / date
|
||||||
day_dir.mkdir(parents=True, exist_ok=True)
|
day_dir.mkdir(parents=True, exist_ok=True)
|
||||||
out_path = day_dir / "Master.txt"
|
out_path = day_dir / "Master.txt"
|
||||||
|
|
||||||
with out_path.open("wb") as w:
|
with out_path.open("wb") as w:
|
||||||
for _, fname in parts:
|
for _, fname in parts:
|
||||||
data = subprocess.check_output(["git", "show", f"{sha}:{fname}"])
|
data = subprocess.check_output(
|
||||||
|
["git", "-C", REPO, "show", f"{sha}:{fname}"]
|
||||||
|
)
|
||||||
w.write(data)
|
w.write(data)
|
||||||
if data and not data.endswith(b"\n"):
|
if data and not data.endswith(b"\n"):
|
||||||
w.write(b"\n")
|
w.write(b"\n")
|
||||||
|
|
||||||
print(f"{date} {sha[:7]} -> {out_path} ({len(parts)} parts)")
|
print(f"{date} {sha[:7]} -> {out_path} ({len(parts)} parts)")
|
||||||
|
|
||||||
print(f"\nDone. Output root: {out_root.resolve()}")
|
print(f"\nDone. Output root: {OUT_ROOT.resolve()}")
|
||||||
|
|||||||
Reference in New Issue
Block a user