mirror of
https://github.com/PlaneQuery/OpenAirframes.git
synced 2026-04-23 19:46:09 +02:00
Works. appending csv that already exisits.
This commit is contained in:
@@ -3,14 +3,13 @@ import zipfile
|
||||
import pandas as pd
|
||||
from faa_aircraft_registry import read
|
||||
|
||||
def convert_faa_master_txt_to_csv(zip_path: Path, csv_path: Path, date: str = None):
|
||||
def convert_faa_master_txt_to_df(zip_path: Path, date: str):
|
||||
with zipfile.ZipFile(zip_path) as z:
|
||||
registrations = read(z)
|
||||
|
||||
df = pd.DataFrame(registrations['master'].values())
|
||||
|
||||
if date is not None:
|
||||
df.insert(0, "download_date", date)
|
||||
df.insert(0, "download_date", date)
|
||||
|
||||
registrant = pd.json_normalize(df["registrant"]).add_prefix("registrant_")
|
||||
df = df.drop(columns="registrant").join(registrant)
|
||||
@@ -45,8 +44,6 @@ def convert_faa_master_txt_to_csv(zip_path: Path, csv_path: Path, date: str = No
|
||||
reg_idx = cols.index("registration_number")
|
||||
cols.insert(reg_idx + 1, "planequery_airframe_id")
|
||||
df = df[cols]
|
||||
|
||||
df.to_csv(csv_path, index=False)
|
||||
return df
|
||||
|
||||
|
||||
|
||||
@@ -11,7 +11,7 @@ import subprocess, re
|
||||
from pathlib import Path
|
||||
import shutil
|
||||
from collections import OrderedDict
|
||||
from derive_from_faa_master_txt import convert_faa_master_txt_to_csv, concat_faa_historical_df
|
||||
from derive_from_faa_master_txt import convert_faa_master_txt_to_df, concat_faa_historical_df
|
||||
import zipfile
|
||||
import pandas as pd
|
||||
import argparse
|
||||
@@ -106,7 +106,7 @@ for date, sha in date_to_sha.items():
|
||||
print(f"{date} {sha[:7]} -> {day_dir} (master parts: {len(parts)})")
|
||||
# 4) Convert ZIP -> CSV
|
||||
out_csv = day_dir / f"ReleasableAircraft_{date}.csv"
|
||||
df_new = convert_faa_master_txt_to_csv(zip_path, out_csv, date)
|
||||
df_new = convert_faa_master_txt_to_df(zip_path, out_csv, date)
|
||||
if df_base.empty:
|
||||
df_base = df_new
|
||||
print(len(df_base), "total entries so far")
|
||||
@@ -119,5 +119,5 @@ for date, sha in date_to_sha.items():
|
||||
print(len(df_base), "total entries so far")
|
||||
|
||||
assert df_base['download_date'].is_monotonic_increasing, "download_date is not monotonic increasing"
|
||||
df_base.to_csv(OUT_ROOT / f"MASTER_{start_date}_{end_date}.csv", index=False)
|
||||
df_base.to_csv(OUT_ROOT / f"planequery_aircraft_{start_date}_{end_date}.csv", index=False)
|
||||
# TODO: get average number of new rows per day.
|
||||
|
||||
+9
-5
@@ -1,4 +1,3 @@
|
||||
import zipfile
|
||||
from pathlib import Path
|
||||
from datetime import datetime, timezone
|
||||
date_str = datetime.now(timezone.utc).strftime("%Y-%m-%d")
|
||||
@@ -6,10 +5,8 @@ date_str = datetime.now(timezone.utc).strftime("%Y-%m-%d")
|
||||
out_dir = Path("data/faa_releasable")
|
||||
out_dir.mkdir(parents=True, exist_ok=True)
|
||||
zip_name = f"ReleasableAircraft_{date_str}.zip"
|
||||
csv_name = f"Master_{date_str}.csv"
|
||||
|
||||
zip_path = out_dir / zip_name
|
||||
csv_path = out_dir / csv_name
|
||||
|
||||
# URL and paths
|
||||
url = "https://registry.faa.gov/database/ReleasableAircraft.zip"
|
||||
@@ -25,5 +22,12 @@ with urlopen(req, timeout=120) as r:
|
||||
body = r.read()
|
||||
zip_path.write_bytes(body)
|
||||
|
||||
from derive_from_faa_master_txt import convert_faa_master_txt_to_csv
|
||||
convert_faa_master_txt_to_csv(zip_path, csv_path)
|
||||
OUT_ROOT = Path("data/planequery_aircraft")
|
||||
OUT_ROOT.mkdir(parents=True, exist_ok=True)
|
||||
from derive_from_faa_master_txt import convert_faa_master_txt_to_df, concat_faa_historical_df
|
||||
from get_latest_planequery_aircraft_release import get_latest_aircraft_csv_df
|
||||
df_new = convert_faa_master_txt_to_df(zip_path, date_str)
|
||||
df_base, start_date_str = get_latest_aircraft_csv_df()
|
||||
df_base = concat_faa_historical_df(df_base, df_new)
|
||||
assert df_base['download_date'].is_monotonic_increasing, "download_date is not monotonic increasing"
|
||||
df_base.to_csv(OUT_ROOT / f"planequery_aircraft_{start_date_str}_{date_str}.csv", index=False)
|
||||
Reference in New Issue
Block a user