Works. appending csv that already exisits.

This commit is contained in:
ggman12
2026-02-01 20:33:22 -05:00
parent 2c7c1a713a
commit c2e174f22c
3 changed files with 14 additions and 13 deletions
+2 -5
View File
@@ -3,14 +3,13 @@ import zipfile
import pandas as pd
from faa_aircraft_registry import read
def convert_faa_master_txt_to_csv(zip_path: Path, csv_path: Path, date: str = None):
def convert_faa_master_txt_to_df(zip_path: Path, date: str):
with zipfile.ZipFile(zip_path) as z:
registrations = read(z)
df = pd.DataFrame(registrations['master'].values())
if date is not None:
df.insert(0, "download_date", date)
df.insert(0, "download_date", date)
registrant = pd.json_normalize(df["registrant"]).add_prefix("registrant_")
df = df.drop(columns="registrant").join(registrant)
@@ -45,8 +44,6 @@ def convert_faa_master_txt_to_csv(zip_path: Path, csv_path: Path, date: str = No
reg_idx = cols.index("registration_number")
cols.insert(reg_idx + 1, "planequery_airframe_id")
df = df[cols]
df.to_csv(csv_path, index=False)
return df
+3 -3
View File
@@ -11,7 +11,7 @@ import subprocess, re
from pathlib import Path
import shutil
from collections import OrderedDict
from derive_from_faa_master_txt import convert_faa_master_txt_to_csv, concat_faa_historical_df
from derive_from_faa_master_txt import convert_faa_master_txt_to_df, concat_faa_historical_df
import zipfile
import pandas as pd
import argparse
@@ -106,7 +106,7 @@ for date, sha in date_to_sha.items():
print(f"{date} {sha[:7]} -> {day_dir} (master parts: {len(parts)})")
# 4) Convert ZIP -> CSV
out_csv = day_dir / f"ReleasableAircraft_{date}.csv"
df_new = convert_faa_master_txt_to_csv(zip_path, out_csv, date)
df_new = convert_faa_master_txt_to_df(zip_path, out_csv, date)
if df_base.empty:
df_base = df_new
print(len(df_base), "total entries so far")
@@ -119,5 +119,5 @@ for date, sha in date_to_sha.items():
print(len(df_base), "total entries so far")
assert df_base['download_date'].is_monotonic_increasing, "download_date is not monotonic increasing"
df_base.to_csv(OUT_ROOT / f"MASTER_{start_date}_{end_date}.csv", index=False)
df_base.to_csv(OUT_ROOT / f"planequery_aircraft_{start_date}_{end_date}.csv", index=False)
# TODO: get average number of new rows per day.
+9 -5
View File
@@ -1,4 +1,3 @@
import zipfile
from pathlib import Path
from datetime import datetime, timezone
date_str = datetime.now(timezone.utc).strftime("%Y-%m-%d")
@@ -6,10 +5,8 @@ date_str = datetime.now(timezone.utc).strftime("%Y-%m-%d")
out_dir = Path("data/faa_releasable")
out_dir.mkdir(parents=True, exist_ok=True)
zip_name = f"ReleasableAircraft_{date_str}.zip"
csv_name = f"Master_{date_str}.csv"
zip_path = out_dir / zip_name
csv_path = out_dir / csv_name
# URL and paths
url = "https://registry.faa.gov/database/ReleasableAircraft.zip"
@@ -25,5 +22,12 @@ with urlopen(req, timeout=120) as r:
body = r.read()
zip_path.write_bytes(body)
from derive_from_faa_master_txt import convert_faa_master_txt_to_csv
convert_faa_master_txt_to_csv(zip_path, csv_path)
OUT_ROOT = Path("data/planequery_aircraft")
OUT_ROOT.mkdir(parents=True, exist_ok=True)
from derive_from_faa_master_txt import convert_faa_master_txt_to_df, concat_faa_historical_df
from get_latest_planequery_aircraft_release import get_latest_aircraft_csv_df
df_new = convert_faa_master_txt_to_df(zip_path, date_str)
df_base, start_date_str = get_latest_aircraft_csv_df()
df_base = concat_faa_historical_df(df_base, df_new)
assert df_base['download_date'].is_monotonic_increasing, "download_date is not monotonic increasing"
df_base.to_csv(OUT_ROOT / f"planequery_aircraft_{start_date_str}_{date_str}.csv", index=False)