mirror of
https://github.com/PlaneQuery/OpenAirframes.git
synced 2026-04-24 03:56:24 +02:00
Handle ADSB when ADSB.lol has not released any data for day. Just rerelease latest adsb
This commit is contained in:
@@ -1,7 +1,7 @@
|
||||
from pathlib import Path
|
||||
import polars as pl
|
||||
import argparse
|
||||
|
||||
import os
|
||||
OUTPUT_DIR = Path("./data/output")
|
||||
CORRECT_ORDER_OF_COLUMNS = ["time", "icao", "r", "t", "dbFlags", "ownOp", "year", "desc", "aircraft_category"]
|
||||
|
||||
@@ -13,26 +13,25 @@ def main():
|
||||
|
||||
compressed_dir = OUTPUT_DIR / "compressed"
|
||||
date_dir = compressed_dir / args.date
|
||||
if not date_dir.is_dir():
|
||||
raise FileNotFoundError(f"No date folder found: {date_dir}")
|
||||
|
||||
parquet_files = sorted(date_dir.glob("*.parquet"))
|
||||
if not parquet_files:
|
||||
raise FileNotFoundError(f"No parquet files found in {date_dir}")
|
||||
df = None
|
||||
if parquet_files: # TODO: This logic could be updated slightly.
|
||||
print(f"No parquet files found in {date_dir}")
|
||||
|
||||
frames = [pl.read_parquet(p) for p in parquet_files]
|
||||
df = pl.concat(frames, how="vertical", rechunk=True)
|
||||
frames = [pl.read_parquet(p) for p in parquet_files]
|
||||
df = pl.concat(frames, how="vertical", rechunk=True)
|
||||
|
||||
df = df.sort(["time", "icao"])
|
||||
df = df.select(CORRECT_ORDER_OF_COLUMNS)
|
||||
|
||||
output_path = OUTPUT_DIR / f"openairframes_adsb_{args.date}.parquet"
|
||||
print(f"Writing combined parquet to {output_path} with {df.height} rows")
|
||||
df.write_parquet(output_path)
|
||||
df = df.sort(["time", "icao"])
|
||||
df = df.select(CORRECT_ORDER_OF_COLUMNS)
|
||||
|
||||
output_path = OUTPUT_DIR / f"openairframes_adsb_{args.date}.parquet"
|
||||
print(f"Writing combined parquet to {output_path} with {df.height} rows")
|
||||
df.write_parquet(output_path)
|
||||
|
||||
csv_output_path = OUTPUT_DIR / f"openairframes_adsb_{args.date}.csv.gz"
|
||||
print(f"Writing combined csv.gz to {csv_output_path} with {df.height} rows")
|
||||
df.write_csv(csv_output_path, compression="gzip")
|
||||
csv_output_path = OUTPUT_DIR / f"openairframes_adsb_{args.date}.csv.gz"
|
||||
print(f"Writing combined csv.gz to {csv_output_path} with {df.height} rows")
|
||||
df.write_csv(csv_output_path, compression="gzip")
|
||||
|
||||
if args.concat_with_latest_csv:
|
||||
print("Loading latest CSV from GitHub releases to concatenate with...")
|
||||
@@ -46,9 +45,10 @@ def main():
|
||||
csv_end_dt = datetime.strptime(csv_end_date, "%Y-%m-%d")
|
||||
args_dt = datetime.strptime(args.date, "%Y-%m-%d")
|
||||
|
||||
if csv_end_dt >= args_dt:
|
||||
if df is None or csv_end_dt >= args_dt:
|
||||
print(f"Latest CSV already includes data through {args.date} (end_date={csv_end_date} is exclusive)")
|
||||
print("Writing latest CSV directly without concatenation to avoid duplicates")
|
||||
os.makedirs(OUTPUT_DIR, exist_ok=True)
|
||||
final_csv_output_path = OUTPUT_DIR / f"openairframes_adsb_{csv_start_date}_{csv_end_date}.csv.gz"
|
||||
df_latest_csv = df_latest_csv.select(CORRECT_ORDER_OF_COLUMNS)
|
||||
df_latest_csv.write_csv(final_csv_output_path, compression="gzip")
|
||||
|
||||
Reference in New Issue
Block a user