From 55c464aad769b1ee3b0d014854a72a8fefb974d5 Mon Sep 17 00:00:00 2001 From: ggman12 Date: Tue, 24 Feb 2026 02:12:55 -0500 Subject: [PATCH] Fix adsb asset retrival to be more fault tolerant. Fix download issue for 2024-07-03 --- .../workflows/adsb-to-aircraft-for-day.yaml | 2 +- .../openairframes-daily-release.yaml | 6 +- scripts/run_historical_adsb_action.py | 2 +- src/get_latest_release.py | 66 ++++++++++++++----- 4 files changed, 57 insertions(+), 19 deletions(-) diff --git a/.github/workflows/adsb-to-aircraft-for-day.yaml b/.github/workflows/adsb-to-aircraft-for-day.yaml index a41494d..f3fabba 100644 --- a/.github/workflows/adsb-to-aircraft-for-day.yaml +++ b/.github/workflows/adsb-to-aircraft-for-day.yaml @@ -177,6 +177,6 @@ jobs: uses: actions/upload-artifact@v4 with: name: openairframes_adsb-${{ inputs.date }} - path: data/output/openairframes_adsb_${{ inputs.date }}* + path: data/output/openairframes_adsb_* retention-days: 30 if-no-files-found: error diff --git a/.github/workflows/openairframes-daily-release.yaml b/.github/workflows/openairframes-daily-release.yaml index e8ee045..9fbae9a 100644 --- a/.github/workflows/openairframes-daily-release.yaml +++ b/.github/workflows/openairframes-daily-release.yaml @@ -266,7 +266,11 @@ jobs: # Find files from artifacts using find (handles nested structures) CSV_FILE_FAA=$(find artifacts/faa -name "openairframes_faa_*.csv" -type f 2>/dev/null | head -1) - CSV_FILE_ADSB=$(find artifacts/adsb -name "openairframes_adsb_*.csv.gz" -type f 2>/dev/null | head -1) + # Prefer concatenated file (with date range) over single-day file + CSV_FILE_ADSB=$(find artifacts/adsb -name "openairframes_adsb_*_*.csv.gz" -type f 2>/dev/null | head -1) + if [ -z "$CSV_FILE_ADSB" ]; then + CSV_FILE_ADSB=$(find artifacts/adsb -name "openairframes_adsb_*.csv.gz" -type f 2>/dev/null | head -1) + fi CSV_FILE_COMMUNITY=$(find artifacts/community -name "openairframes_community_*.csv" -type f 2>/dev/null | head -1) ZIP_FILE=$(find artifacts/faa -name "ReleasableAircraft_*.zip" -type f 2>/dev/null | head -1) JSON_FILE_ADSBX=$(find artifacts/adsbexchange -name "basic-ac-db_*.json.gz" -type f 2>/dev/null | head -1) diff --git a/scripts/run_historical_adsb_action.py b/scripts/run_historical_adsb_action.py index f728529..7c6b658 100644 --- a/scripts/run_historical_adsb_action.py +++ b/scripts/run_historical_adsb_action.py @@ -194,7 +194,7 @@ def main(): if triggered_runs and not args.dry_run: import json timestamp = datetime.now().strftime('%Y%m%d_%H%M%S') - runs_file = f"./triggered_runs_{timestamp}.json" + runs_file = f"./output/triggered_runs_{timestamp}.json" with open(runs_file, 'w') as f: json.dump({ 'start_date': args.start_date, diff --git a/src/get_latest_release.py b/src/get_latest_release.py index 3283b06..72d9302 100644 --- a/src/get_latest_release.py +++ b/src/get_latest_release.py @@ -27,6 +27,33 @@ def _http_get_json(url: str, headers: dict[str, str]) -> dict: return json.loads(data.decode("utf-8")) +def get_releases(repo: str = REPO, github_token: Optional[str] = None, per_page: int = 30) -> list[dict]: + """Get a list of releases from the repository.""" + url = f"https://api.github.com/repos/{repo}/releases?per_page={per_page}" + headers = { + "Accept": "application/vnd.github+json", + "User-Agent": "openairframes-downloader/1.0", + } + if github_token: + headers["Authorization"] = f"Bearer {github_token}" + + return _http_get_json(url, headers=headers) + + +def get_release_assets_from_release_data(release_data: dict) -> list[ReleaseAsset]: + """Extract assets from a release data dictionary.""" + assets = [] + for a in release_data.get("assets", []): + assets.append( + ReleaseAsset( + name=a["name"], + download_url=a["browser_download_url"], + size=int(a.get("size", 0)), + ) + ) + return assets + + def get_latest_release_assets(repo: str = REPO, github_token: Optional[str] = None) -> list[ReleaseAsset]: url = f"https://api.github.com/repos/{repo}/releases/latest" headers = { @@ -37,16 +64,7 @@ def get_latest_release_assets(repo: str = REPO, github_token: Optional[str] = No headers["Authorization"] = f"Bearer {github_token}" payload = _http_get_json(url, headers=headers) - assets = [] - for a in payload.get("assets", []): - assets.append( - ReleaseAsset( - name=a["name"], - download_url=a["browser_download_url"], - size=int(a.get("size", 0)), - ) - ) - return assets + return get_release_assets_from_release_data(payload) def pick_asset( @@ -155,7 +173,8 @@ def download_latest_aircraft_adsb_csv( repo: str = REPO, ) -> Path: """ - Download the latest openairframes_adsb_*.csv file from the latest GitHub release. + Download the latest openairframes_adsb_*.csv file from GitHub releases. + If the latest release doesn't have the file, searches previous releases. Args: output_dir: Directory to save the downloaded file (default: "downloads") @@ -166,11 +185,25 @@ def download_latest_aircraft_adsb_csv( Path to the downloaded file """ output_dir = Path(output_dir) - assets = get_latest_release_assets(repo, github_token=github_token) - asset = pick_asset(assets, name_regex=r"^openairframes_adsb_.*\.csv(\.gz)?$") - saved_to = download_asset(asset, output_dir / asset.name, github_token=github_token) - print(f"Downloaded: {asset.name} ({asset.size} bytes) -> {saved_to}") - return saved_to + + # Get multiple releases + releases = get_releases(repo, github_token=github_token, per_page=30) + + # Try each release until we find one with the matching asset + for release in releases: + assets = get_release_assets_from_release_data(release) + try: + asset = pick_asset(assets, name_regex=r"^openairframes_adsb_.*\.csv(\.gz)?$") + saved_to = download_asset(asset, output_dir / asset.name, github_token=github_token) + print(f"Downloaded: {asset.name} ({asset.size} bytes) -> {saved_to}") + return saved_to + except FileNotFoundError: + # This release doesn't have the matching asset, try the next one + continue + + raise FileNotFoundError( + f"No release in the last 30 releases has an asset matching 'openairframes_adsb_.*\\.csv(\\.gz)?$'" + ) import polars as pl def get_latest_aircraft_adsb_csv_df(): @@ -212,3 +245,4 @@ def get_latest_aircraft_adsb_csv_df(): if __name__ == "__main__": download_latest_aircraft_csv() + download_latest_aircraft_adsb_csv() \ No newline at end of file