fix: update repository path and improve cloning logic in FAA data retrieval

This commit is contained in:
ggman12
2026-02-01 19:01:42 -05:00
parent 60869bc178
commit aedd7b3df5
+17 -5
View File
@@ -14,23 +14,35 @@ from derive_from_faa_master_txt import convert_faa_master_txt_to_csv
import zipfile import zipfile
import pandas as pd import pandas as pd
REPO = "/Users/jonahgoode/Documents/PlaneQuery/Other-Code/scrape-faa-releasable-aircraft" # Clone repository if it doesn't exist
REPO = Path("data/scrape-faa-releasable-aircraft")
if not REPO.exists():
print(f"Cloning repository to {REPO}...")
subprocess.run([
"git", "clone",
"https://github.com/simonw/scrape-faa-releasable-aircraft",
str(REPO)
], check=True)
print("Repository cloned successfully.")
else:
print(f"Repository already exists at {REPO}")
OUT_ROOT = Path("data/faa_releasable_historical") OUT_ROOT = Path("data/faa_releasable_historical")
OUT_ROOT.mkdir(parents=True, exist_ok=True) OUT_ROOT.mkdir(parents=True, exist_ok=True)
def run_git_text(*args: str) -> str: def run_git_text(*args: str) -> str:
return subprocess.check_output(["git", "-C", REPO, *args], text=True).strip() return subprocess.check_output(["git", "-C", str(REPO), *args], text=True).strip()
def run_git_bytes(*args: str) -> bytes: def run_git_bytes(*args: str) -> bytes:
return subprocess.check_output(["git", "-C", REPO, *args]) return subprocess.check_output(["git", "-C", str(REPO), *args])
# All commits in Feb 2024 (oldest -> newest) # All commits in Feb 2024 (oldest -> newest)
log = run_git_text( log = run_git_text(
"log", "log",
"--reverse", "--reverse",
"--format=%H %cs", "--format=%H %cs",
"--since=2024-01-01", "--since=2024-06-01",
"--until=2024-08-08", "--until=2024-06-08",
) )
lines = [ln for ln in log.splitlines() if ln.strip()] lines = [ln for ln in log.splitlines() if ln.strip()]
if not lines: if not lines: