From 97284c69a916a4af9f3cd6f4b9354f72aca92df3 Mon Sep 17 00:00:00 2001 From: ggman12 Date: Wed, 18 Feb 2026 20:52:54 -0500 Subject: [PATCH] verify downlaod asssets --- src/adsb/download_adsb_data_to_parquet.py | 43 ++++++++++++++++++++--- src/adsb/download_and_list_icaos.py | 3 +- 2 files changed, 40 insertions(+), 6 deletions(-) diff --git a/src/adsb/download_adsb_data_to_parquet.py b/src/adsb/download_adsb_data_to_parquet.py index 4dfe3eb..30b0089 100644 --- a/src/adsb/download_adsb_data_to_parquet.py +++ b/src/adsb/download_adsb_data_to_parquet.py @@ -129,13 +129,32 @@ def fetch_releases(version_date: str) -> list: return releases -def download_asset(asset_url: str, file_path: str) -> bool: - """Download a single release asset.""" +def download_asset(asset_url: str, file_path: str, expected_size: int | None = None) -> bool: + """Download a single release asset with size verification. + + Args: + asset_url: URL to download from + file_path: Local path to save to + expected_size: Expected file size in bytes (for verification) + + Returns: + True if download succeeded and size matches (if provided), False otherwise + """ os.makedirs(os.path.dirname(file_path) or OUTPUT_DIR, exist_ok=True) + # Check if file exists and has correct size if os.path.exists(file_path): - print(f"[SKIP] {file_path} already downloaded.") - return True + if expected_size is not None: + actual_size = os.path.getsize(file_path) + if actual_size == expected_size: + print(f"[SKIP] {file_path} already downloaded and verified ({actual_size} bytes).") + return True + else: + print(f"[WARN] {file_path} exists but size mismatch (expected {expected_size}, got {actual_size}). Re-downloading.") + os.remove(file_path) + else: + print(f"[SKIP] {file_path} already downloaded.") + return True max_retries = 2 retry_delay = 30 @@ -153,7 +172,21 @@ def download_asset(asset_url: str, file_path: str) -> bool: if not chunk: break file.write(chunk) - print(f"Saved {file_path}") + + # Verify file size if expected_size was provided + if expected_size is not None: + actual_size = os.path.getsize(file_path) + if actual_size != expected_size: + print(f"[ERROR] Size mismatch for {file_path}: expected {expected_size} bytes, got {actual_size} bytes") + os.remove(file_path) + if attempt < max_retries: + print(f"Waiting {retry_delay} seconds before retry") + time.sleep(retry_delay) + continue + return False + print(f"Saved {file_path} ({actual_size} bytes, verified)") + else: + print(f"Saved {file_path}") return True else: print(f"Failed to download {asset_url}: {response.status} {response.msg}") diff --git a/src/adsb/download_and_list_icaos.py b/src/adsb/download_and_list_icaos.py index fb12377..2296fe9 100644 --- a/src/adsb/download_and_list_icaos.py +++ b/src/adsb/download_and_list_icaos.py @@ -77,8 +77,9 @@ def download_and_extract(version_date: str) -> str | None: for asset in use_assets: asset_name = asset["name"] asset_url = asset["browser_download_url"] + asset_size = asset.get("size") # Get expected file size file_path = os.path.join(OUTPUT_DIR, asset_name) - if download_asset(asset_url, file_path): + if download_asset(asset_url, file_path, expected_size=asset_size): downloaded_files.append(file_path) if not downloaded_files: