From 1b15e43669a873c26997faabc05d5dedfcfa6471 Mon Sep 17 00:00:00 2001 From: ggman12 Date: Sat, 14 Feb 2026 22:22:14 -0500 Subject: [PATCH] use .csv.gz --- .github/workflows/openairframes-daily-release.yaml | 4 ++-- src/adsb/combine_chunks_to_csv.py | 4 ++-- src/adsb/compress_adsb_to_aircraft_data.py | 2 +- src/create_daily_adsb_release.py | 2 +- src/get_latest_release.py | 4 ++-- 5 files changed, 8 insertions(+), 8 deletions(-) diff --git a/.github/workflows/openairframes-daily-release.yaml b/.github/workflows/openairframes-daily-release.yaml index 0486f31..1a03762 100644 --- a/.github/workflows/openairframes-daily-release.yaml +++ b/.github/workflows/openairframes-daily-release.yaml @@ -227,7 +227,7 @@ jobs: uses: actions/upload-artifact@v4 with: name: adsb-release - path: data/openairframes/openairframes_adsb_*.csv + path: data/openairframes/openairframes_adsb_*.csv.gz retention-days: 1 build-community: @@ -388,7 +388,7 @@ jobs: # Find files from artifacts using find (handles nested structures) CSV_FILE_FAA=$(find artifacts/faa -name "openairframes_faa_*.csv" -type f 2>/dev/null | head -1) - CSV_FILE_ADSB=$(find artifacts/adsb -name "openairframes_adsb_*.csv" -type f 2>/dev/null | head -1) + CSV_FILE_ADSB=$(find artifacts/adsb -name "openairframes_adsb_*.csv.gz" -type f 2>/dev/null | head -1) CSV_FILE_COMMUNITY=$(find artifacts/community -name "openairframes_community_*.csv" -type f 2>/dev/null | head -1) ZIP_FILE=$(find artifacts/faa -name "ReleasableAircraft_*.zip" -type f 2>/dev/null | head -1) JSON_FILE_ADSBX=$(find artifacts/adsbexchange -name "basic-ac-db_*.json.gz" -type f 2>/dev/null | head -1) diff --git a/src/adsb/combine_chunks_to_csv.py b/src/adsb/combine_chunks_to_csv.py index b5afca3..e9c53bb 100644 --- a/src/adsb/combine_chunks_to_csv.py +++ b/src/adsb/combine_chunks_to_csv.py @@ -176,7 +176,7 @@ def main(): if args.start_date and args.end_date: # Historical mode output_id = f"{args.start_date}_{args.end_date}" - output_filename = f"openairframes_adsb_{args.start_date}_{args.end_date}.csv" + output_filename = f"openairframes_adsb_{args.start_date}_{args.end_date}.csv.gz" print(f"Combining chunks for date range: {args.start_date} to {args.end_date}") else: # Daily mode - use same date for start and end @@ -187,7 +187,7 @@ def main(): date_str = target_day.strftime("%Y-%m-%d") output_id = date_str - output_filename = f"openairframes_adsb_{date_str}_{date_str}.csv" + output_filename = f"openairframes_adsb_{date_str}_{date_str}.csv.gz" print(f"Combining chunks for {date_str}") chunks_dir = args.chunks_dir diff --git a/src/adsb/compress_adsb_to_aircraft_data.py b/src/adsb/compress_adsb_to_aircraft_data.py index 0938883..42cc0f4 100644 --- a/src/adsb/compress_adsb_to_aircraft_data.py +++ b/src/adsb/compress_adsb_to_aircraft_data.py @@ -264,7 +264,7 @@ def get_latest_aircraft_adsb_csv_df(): if df[col].dtype == pl.Utf8: df = df.with_columns(pl.col(col).fill_null("")) - # Extract start date from filename pattern: openairframes_adsb_{start_date}_{end_date}.csv + # Extract start date from filename pattern: openairframes_adsb_{start_date}_{end_date}.csv[.gz] match = re.search(r"openairframes_adsb_(\d{4}-\d{2}-\d{2})_", str(csv_path)) if not match: raise ValueError(f"Could not extract date from filename: {csv_path.name}") diff --git a/src/create_daily_adsb_release.py b/src/create_daily_adsb_release.py index 0a5137e..887ea97 100644 --- a/src/create_daily_adsb_release.py +++ b/src/create_daily_adsb_release.py @@ -77,7 +77,7 @@ if __name__ == '__main__': OUT_ROOT = Path("data/openairframes") OUT_ROOT.mkdir(parents=True, exist_ok=True) - output_file = OUT_ROOT / f"openairframes_adsb_{start_date_str}_{date_str}.csv" + output_file = OUT_ROOT / f"openairframes_adsb_{start_date_str}_{date_str}.csv.gz" df_combined.write_csv(output_file) print(f"Saved: {output_file}") diff --git a/src/get_latest_release.py b/src/get_latest_release.py index b29b82a..74832d7 100644 --- a/src/get_latest_release.py +++ b/src/get_latest_release.py @@ -165,7 +165,7 @@ def download_latest_aircraft_adsb_csv( Path to the downloaded file """ assets = get_latest_release_assets(repo, github_token=github_token) - asset = pick_asset(assets, name_regex=r"^openairframes_adsb_.*\.csv$") + asset = pick_asset(assets, name_regex=r"^openairframes_adsb_.*\.csv(\.gz)?$") saved_to = download_asset(asset, output_dir / asset.name, github_token=github_token) print(f"Downloaded: {asset.name} ({asset.size} bytes) -> {saved_to}") return saved_to @@ -176,7 +176,7 @@ def get_latest_aircraft_adsb_csv_df(): import pandas as pd df = pd.read_csv(csv_path) df = df.fillna("") - # Extract start date from filename pattern: openairframes_adsb_{start_date}_{end_date}.csv + # Extract start date from filename pattern: openairframes_adsb_{start_date}_{end_date}.csv[.gz] match = re.search(r"openairframes_adsb_(\d{4}-\d{2}-\d{2})_", str(csv_path)) if not match: raise ValueError(f"Could not extract date from filename: {csv_path.name}")