mirror of
https://github.com/PlaneQuery/OpenAirframes.git
synced 2026-06-08 22:23:56 +02:00
feat: add download date and restructure columns in FAA master conversion
This commit is contained in:
@@ -12,17 +12,54 @@ def convert_faa_master_txt_to_csv(zip_path: Path, csv_path: Path, date: str = No
|
|||||||
if date is not None:
|
if date is not None:
|
||||||
df.insert(0, "download_date", date)
|
df.insert(0, "download_date", date)
|
||||||
|
|
||||||
col = "transponder_code_hex"
|
|
||||||
df = df[[col] + [c for c in df.columns if c != col]]
|
|
||||||
df = df.rename(columns={"transponder_code_hex": "icao"})
|
|
||||||
registrant = pd.json_normalize(df["registrant"]).add_prefix("registrant_")
|
registrant = pd.json_normalize(df["registrant"]).add_prefix("registrant_")
|
||||||
df = df.drop(columns="registrant").join(registrant)
|
df = df.drop(columns="registrant").join(registrant)
|
||||||
|
|
||||||
|
# Move transponder_code_hex to second column (after registration_number)
|
||||||
|
cols = df.columns.tolist()
|
||||||
|
cols.remove("transponder_code_hex")
|
||||||
|
cols.insert(1, "transponder_code_hex")
|
||||||
|
df = df[cols]
|
||||||
|
|
||||||
df = df.rename(columns={"aircraft_type": "aircraft_type_2"})
|
df = df.rename(columns={"aircraft_type": "aircraft_type_2"})
|
||||||
aircraft = pd.json_normalize(df["aircraft"].where(df["aircraft"].notna(), {})).add_prefix("aircraft_")
|
aircraft = pd.json_normalize(df["aircraft"].where(df["aircraft"].notna(), {})).add_prefix("aircraft_")
|
||||||
df = df.drop(columns="aircraft").join(aircraft)
|
df = df.drop(columns="aircraft").join(aircraft)
|
||||||
df = df.rename(columns={"engine_type": "engine_type_2"})
|
df = df.rename(columns={"engine_type": "engine_type_2"})
|
||||||
engine = pd.json_normalize(df["engine"].where(df["engine"].notna(), {})).add_prefix("engine_")
|
engine = pd.json_normalize(df["engine"].where(df["engine"].notna(), {})).add_prefix("engine_")
|
||||||
df = df.drop(columns="engine").join(engine)
|
df = df.drop(columns="engine").join(engine)
|
||||||
df = df.sort_values(by=["icao"])
|
certification = pd.json_normalize(df["certification"].where(df["certification"].notna(), {})).add_prefix("certificate_")
|
||||||
|
df = df.drop(columns="certification").join(certification)
|
||||||
|
|
||||||
|
# Create planequery_airframe_id
|
||||||
|
df["planequery_airframe_id"] = (
|
||||||
|
normalize(df["aircraft_manufacturer"])
|
||||||
|
+ "|"
|
||||||
|
+ normalize(df["aircraft_model"])
|
||||||
|
+ "|"
|
||||||
|
+ normalize(df["serial_number"])
|
||||||
|
)
|
||||||
|
|
||||||
|
# Move planequery_airframe_id to come after registration_number
|
||||||
|
cols = df.columns.tolist()
|
||||||
|
cols.remove("planequery_airframe_id")
|
||||||
|
reg_idx = cols.index("registration_number")
|
||||||
|
cols.insert(reg_idx + 1, "planequery_airframe_id")
|
||||||
|
df = df[cols]
|
||||||
|
|
||||||
df.to_csv(csv_path, index=False)
|
df.to_csv(csv_path, index=False)
|
||||||
return df
|
return df
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
def normalize(s: pd.Series) -> pd.Series:
|
||||||
|
return (
|
||||||
|
s.fillna("")
|
||||||
|
.astype(str)
|
||||||
|
.str.upper()
|
||||||
|
.str.strip()
|
||||||
|
# collapse whitespace
|
||||||
|
.str.replace(r"\s+", " ", regex=True)
|
||||||
|
# remove characters that cause false mismatches
|
||||||
|
.str.replace(r"[^\w\-]", "", regex=True)
|
||||||
|
)
|
||||||
|
|
||||||
|
|||||||
Reference in New Issue
Block a user