mirror of
https://github.com/PlaneQuery/OpenAirframes.git
synced 2026-04-24 03:56:24 +02:00
add download date, change days
This commit is contained in:
@@ -3,11 +3,15 @@ import zipfile
|
||||
import pandas as pd
|
||||
from faa_aircraft_registry import read
|
||||
|
||||
def convert_faa_master_txt_to_csv(zip_path: Path, csv_path: Path) -> None:
|
||||
def convert_faa_master_txt_to_csv(zip_path: Path, csv_path: Path, date: str = None):
|
||||
with zipfile.ZipFile(zip_path) as z:
|
||||
registrations = read(z)
|
||||
|
||||
df = pd.DataFrame(registrations['master'].values())
|
||||
|
||||
if date is not None:
|
||||
df.insert(0, "download_date", date)
|
||||
|
||||
col = "transponder_code_hex"
|
||||
df = df[[col] + [c for c in df.columns if c != col]]
|
||||
df = df.rename(columns={"transponder_code_hex": "icao"})
|
||||
@@ -20,4 +24,5 @@ def convert_faa_master_txt_to_csv(zip_path: Path, csv_path: Path) -> None:
|
||||
engine = pd.json_normalize(df["engine"].where(df["engine"].notna(), {})).add_prefix("engine_")
|
||||
df = df.drop(columns="engine").join(engine)
|
||||
df = df.sort_values(by=["icao"])
|
||||
df.to_csv(csv_path, index=False)
|
||||
df.to_csv(csv_path, index=False)
|
||||
return df
|
||||
@@ -12,6 +12,7 @@ from pathlib import Path
|
||||
from collections import OrderedDict
|
||||
from derive_from_faa_master_txt import convert_faa_master_txt_to_csv
|
||||
import zipfile
|
||||
import pandas as pd
|
||||
|
||||
REPO = "/Users/jonahgoode/Documents/PlaneQuery/Other-Code/scrape-faa-releasable-aircraft"
|
||||
OUT_ROOT = Path("data/faa_releasable_historical")
|
||||
@@ -28,8 +29,8 @@ log = run_git_text(
|
||||
"log",
|
||||
"--reverse",
|
||||
"--format=%H %cs",
|
||||
"--since=2024-02-01",
|
||||
"--until=2024-02-29",
|
||||
"--since=2024-01-25",
|
||||
"--until=2024-02-05",
|
||||
)
|
||||
lines = [ln for ln in log.splitlines() if ln.strip()]
|
||||
if not lines:
|
||||
@@ -43,8 +44,13 @@ for ln in lines:
|
||||
|
||||
OTHER_FILES = ["ACFTREF.txt", "DEALER.txt", "DOCINDEX.txt", "ENGINE.txt", "RESERVED.txt"]
|
||||
master_re = re.compile(r"^MASTER-(\d+)\.txt$")
|
||||
|
||||
df_base = pd.DataFrame()
|
||||
start_date = None
|
||||
end_date = None
|
||||
for date, sha in date_to_sha.items():
|
||||
if start_date is None:
|
||||
start_date = date
|
||||
end_date = date
|
||||
day_dir = OUT_ROOT / date
|
||||
day_dir.mkdir(parents=True, exist_ok=True)
|
||||
|
||||
@@ -77,11 +83,26 @@ for date, sha in date_to_sha.items():
|
||||
for p in day_dir.iterdir():
|
||||
z.write(p, arcname=p.name)
|
||||
|
||||
print(f"{date} {sha[:7]} -> {day_dir} (master parts: {len(parts)})")
|
||||
# 4) Convert ZIP -> CSV
|
||||
out_csv = day_dir / f"ReleasableAircraft_{date}.csv"
|
||||
convert_faa_master_txt_to_csv(zip_path, out_csv)
|
||||
df_new = convert_faa_master_txt_to_csv(zip_path, out_csv, date)
|
||||
if df_base.empty:
|
||||
df_base = df_new
|
||||
print(df_base["unique_regulatory_id"].size, "total unique_regulatory_id entries so far")
|
||||
# Delete all files in the day directory
|
||||
for p in day_dir.iterdir():
|
||||
p.unlink()
|
||||
day_dir.rmdir()
|
||||
continue
|
||||
key = "unique_regulatory_id"
|
||||
df_to_add = df_new[~df_new[key].isin(df_base[key])]
|
||||
df_base = pd.concat([df_base, df_to_add], ignore_index=True)
|
||||
print(df_base[key].size, "total unique_regulatory_id entries so far")
|
||||
|
||||
# Delete all files in the day directory
|
||||
for p in day_dir.iterdir():
|
||||
p.unlink()
|
||||
day_dir.rmdir()
|
||||
|
||||
print(f"{date} {sha[:7]} -> {day_dir} (master parts: {len(parts)})")
|
||||
|
||||
print(f"\nDone. Output root: {OUT_ROOT.resolve()}")
|
||||
df_base.to_csv(OUT_ROOT / f"MASTER_{start_date}_{end_date}.csv", index=False)
|
||||
|
||||
Reference in New Issue
Block a user