Files
OpenAirframes/src/adsb/main.py
T

78 lines
2.9 KiB
Python

"""
Main pipeline for processing ADS-B data from adsb.lol.
Usage:
python -m src.adsb.main --date 2026-01-01
python -m src.adsb.main --start_date 2026-01-01 --end_date 2026-01-03
"""
import argparse
import subprocess
import sys
from datetime import datetime, timedelta
import polars as pl
from src.adsb.download_and_list_icaos import NUMBER_PARTS
def main():
parser = argparse.ArgumentParser(description="Process ADS-B data for a single day or date range")
parser.add_argument("--date", type=str, help="Single date in YYYY-MM-DD format")
parser.add_argument("--start_date", type=str, help="Start date (inclusive, YYYY-MM-DD)")
parser.add_argument("--end_date", type=str, help="End date (exclusive, YYYY-MM-DD)")
parser.add_argument("--concat_with_latest_csv", action="store_true", help="Also concatenate with latest CSV from GitHub releases")
args = parser.parse_args()
if args.date and (args.start_date or args.end_date):
raise SystemExit("Use --date or --start_date/--end_date, not both.")
if args.date:
start_date = datetime.strptime(args.date, "%Y-%m-%d")
end_date = start_date + timedelta(days=1)
else:
if not args.start_date or not args.end_date:
raise SystemExit("Provide --start_date and --end_date, or use --date.")
start_date = datetime.strptime(args.start_date, "%Y-%m-%d")
end_date = datetime.strptime(args.end_date, "%Y-%m-%d")
current = start_date
while current < end_date:
date_str = current.strftime("%Y-%m-%d")
print(f"Processing day: {date_str}")
# Download and split
subprocess.run([sys.executable, "-m", "src.adsb.download_and_list_icaos", "--date", date_str], check=True)
# Process parts
for part_id in range(NUMBER_PARTS):
subprocess.run([sys.executable, "-m", "src.adsb.process_icao_chunk", "--part-id", str(part_id), "--date", date_str], check=True)
# Concatenate
concat_cmd = [sys.executable, "-m", "src.adsb.concat_parquet_to_final", "--date", date_str]
if args.concat_with_latest_csv:
concat_cmd.append("--concat_with_latest_csv")
subprocess.run(concat_cmd, check=True)
current += timedelta(days=1)
if end_date - start_date > timedelta(days=1):
dates = []
cur = start_date
while cur < end_date:
dates.append(cur.strftime("%Y-%m-%d"))
cur += timedelta(days=1)
csv_files = [
f"data/outputs/openairframes_adsb_{d}_{d}.csv"
for d in dates
]
frames = [pl.read_csv(p) for p in csv_files]
df = pl.concat(frames, how="vertical", rechunk=True)
output_path = f"data/outputs/openairframes_adsb_{start_date.strftime('%Y-%m-%d')}_{end_date.strftime('%Y-%m-%d')}.csv"
df.write_csv(output_path)
print(f"Wrote combined CSV: {output_path}")
print("Done")
if __name__ == "__main__":
main()