mirror of
https://github.com/PlaneQuery/OpenAirframes.git
synced 2026-04-23 19:46:09 +02:00
Merge pull request #31 from PlaneQuery/develop
Develop to Main Fix adsb asset retrival to be more fault tolerant. Fix download issue
This commit is contained in:
@@ -49,11 +49,38 @@ jobs:
|
||||
python -m src.adsb.download_and_list_icaos --date "$DATE"
|
||||
ls -lah data/output/adsb_archives/"$DATE" || true
|
||||
|
||||
- name: Upload archives
|
||||
- name: Upload archive part 0
|
||||
uses: actions/upload-artifact@v4
|
||||
with:
|
||||
name: adsb-archives-${{ inputs.date }}
|
||||
path: data/output/adsb_archives/${{ inputs.date }}
|
||||
name: adsb-archive-${{ inputs.date }}-part-0
|
||||
path: data/output/adsb_archives/${{ inputs.date }}/${{ inputs.date }}_part_0.tar.gz
|
||||
retention-days: 1
|
||||
compression-level: 0
|
||||
if-no-files-found: error
|
||||
|
||||
- name: Upload archive part 1
|
||||
uses: actions/upload-artifact@v4
|
||||
with:
|
||||
name: adsb-archive-${{ inputs.date }}-part-1
|
||||
path: data/output/adsb_archives/${{ inputs.date }}/${{ inputs.date }}_part_1.tar.gz
|
||||
retention-days: 1
|
||||
compression-level: 0
|
||||
if-no-files-found: error
|
||||
|
||||
- name: Upload archive part 2
|
||||
uses: actions/upload-artifact@v4
|
||||
with:
|
||||
name: adsb-archive-${{ inputs.date }}-part-2
|
||||
path: data/output/adsb_archives/${{ inputs.date }}/${{ inputs.date }}_part_2.tar.gz
|
||||
retention-days: 1
|
||||
compression-level: 0
|
||||
if-no-files-found: error
|
||||
|
||||
- name: Upload archive part 3
|
||||
uses: actions/upload-artifact@v4
|
||||
with:
|
||||
name: adsb-archive-${{ inputs.date }}-part-3
|
||||
path: data/output/adsb_archives/${{ inputs.date }}/${{ inputs.date }}_part_3.tar.gz
|
||||
retention-days: 1
|
||||
compression-level: 0
|
||||
if-no-files-found: error
|
||||
@@ -79,12 +106,22 @@ jobs:
|
||||
python -m pip install --upgrade pip
|
||||
pip install -r requirements.txt
|
||||
|
||||
- name: Download archives
|
||||
- name: Download archive part
|
||||
uses: actions/download-artifact@v4
|
||||
with:
|
||||
name: adsb-archives-${{ inputs.date }}
|
||||
name: adsb-archive-${{ inputs.date }}-part-${{ matrix.part_id }}
|
||||
path: data/output/adsb_archives/${{ inputs.date }}
|
||||
|
||||
- name: Verify archive
|
||||
run: |
|
||||
FILE="data/output/adsb_archives/${{ inputs.date }}/${{ inputs.date }}_part_${{ matrix.part_id }}.tar.gz"
|
||||
ls -lah data/output/adsb_archives/${{ inputs.date }}/
|
||||
if [ ! -f "$FILE" ]; then
|
||||
echo "::error::Archive not found: $FILE"
|
||||
exit 1
|
||||
fi
|
||||
echo "Verified: $(du -h "$FILE")"
|
||||
|
||||
- name: Process part
|
||||
env:
|
||||
DATE: ${{ inputs.date }}
|
||||
@@ -140,6 +177,6 @@ jobs:
|
||||
uses: actions/upload-artifact@v4
|
||||
with:
|
||||
name: openairframes_adsb-${{ inputs.date }}
|
||||
path: data/output/openairframes_adsb_${{ inputs.date }}*
|
||||
path: data/output/openairframes_adsb_*
|
||||
retention-days: 30
|
||||
if-no-files-found: error
|
||||
|
||||
@@ -266,7 +266,11 @@ jobs:
|
||||
|
||||
# Find files from artifacts using find (handles nested structures)
|
||||
CSV_FILE_FAA=$(find artifacts/faa -name "openairframes_faa_*.csv" -type f 2>/dev/null | head -1)
|
||||
CSV_FILE_ADSB=$(find artifacts/adsb -name "openairframes_adsb_*.csv.gz" -type f 2>/dev/null | head -1)
|
||||
# Prefer concatenated file (with date range) over single-day file
|
||||
CSV_FILE_ADSB=$(find artifacts/adsb -name "openairframes_adsb_*_*.csv.gz" -type f 2>/dev/null | head -1)
|
||||
if [ -z "$CSV_FILE_ADSB" ]; then
|
||||
CSV_FILE_ADSB=$(find artifacts/adsb -name "openairframes_adsb_*.csv.gz" -type f 2>/dev/null | head -1)
|
||||
fi
|
||||
CSV_FILE_COMMUNITY=$(find artifacts/community -name "openairframes_community_*.csv" -type f 2>/dev/null | head -1)
|
||||
ZIP_FILE=$(find artifacts/faa -name "ReleasableAircraft_*.zip" -type f 2>/dev/null | head -1)
|
||||
JSON_FILE_ADSBX=$(find artifacts/adsbexchange -name "basic-ac-db_*.json.gz" -type f 2>/dev/null | head -1)
|
||||
|
||||
@@ -194,7 +194,7 @@ def main():
|
||||
if triggered_runs and not args.dry_run:
|
||||
import json
|
||||
timestamp = datetime.now().strftime('%Y%m%d_%H%M%S')
|
||||
runs_file = f"./triggered_runs_{timestamp}.json"
|
||||
runs_file = f"./output/triggered_runs_{timestamp}.json"
|
||||
with open(runs_file, 'w') as f:
|
||||
json.dump({
|
||||
'start_date': args.start_date,
|
||||
|
||||
@@ -123,7 +123,16 @@ def main():
|
||||
print(f"Processing part {args.part_id} for {args.date}")
|
||||
|
||||
# Get specific archive file for this part
|
||||
archive_path = os.path.join(OUTPUT_DIR, "adsb_archives", args.date, f"{args.date}_part_{args.part_id}.tar.gz")
|
||||
archive_dir = os.path.join(OUTPUT_DIR, "adsb_archives", args.date)
|
||||
archive_path = os.path.join(archive_dir, f"{args.date}_part_{args.part_id}.tar.gz")
|
||||
|
||||
if not os.path.isfile(archive_path):
|
||||
print(f"ERROR: Archive not found: {archive_path}")
|
||||
if os.path.isdir(archive_dir):
|
||||
print(f"Files in {archive_dir}: {os.listdir(archive_dir)}")
|
||||
else:
|
||||
print(f"Directory does not exist: {archive_dir}")
|
||||
sys.exit(1)
|
||||
|
||||
# Extract and collect trace files
|
||||
trace_map = build_trace_file_map(archive_path)
|
||||
|
||||
+50
-16
@@ -27,6 +27,33 @@ def _http_get_json(url: str, headers: dict[str, str]) -> dict:
|
||||
return json.loads(data.decode("utf-8"))
|
||||
|
||||
|
||||
def get_releases(repo: str = REPO, github_token: Optional[str] = None, per_page: int = 30) -> list[dict]:
|
||||
"""Get a list of releases from the repository."""
|
||||
url = f"https://api.github.com/repos/{repo}/releases?per_page={per_page}"
|
||||
headers = {
|
||||
"Accept": "application/vnd.github+json",
|
||||
"User-Agent": "openairframes-downloader/1.0",
|
||||
}
|
||||
if github_token:
|
||||
headers["Authorization"] = f"Bearer {github_token}"
|
||||
|
||||
return _http_get_json(url, headers=headers)
|
||||
|
||||
|
||||
def get_release_assets_from_release_data(release_data: dict) -> list[ReleaseAsset]:
|
||||
"""Extract assets from a release data dictionary."""
|
||||
assets = []
|
||||
for a in release_data.get("assets", []):
|
||||
assets.append(
|
||||
ReleaseAsset(
|
||||
name=a["name"],
|
||||
download_url=a["browser_download_url"],
|
||||
size=int(a.get("size", 0)),
|
||||
)
|
||||
)
|
||||
return assets
|
||||
|
||||
|
||||
def get_latest_release_assets(repo: str = REPO, github_token: Optional[str] = None) -> list[ReleaseAsset]:
|
||||
url = f"https://api.github.com/repos/{repo}/releases/latest"
|
||||
headers = {
|
||||
@@ -37,16 +64,7 @@ def get_latest_release_assets(repo: str = REPO, github_token: Optional[str] = No
|
||||
headers["Authorization"] = f"Bearer {github_token}"
|
||||
|
||||
payload = _http_get_json(url, headers=headers)
|
||||
assets = []
|
||||
for a in payload.get("assets", []):
|
||||
assets.append(
|
||||
ReleaseAsset(
|
||||
name=a["name"],
|
||||
download_url=a["browser_download_url"],
|
||||
size=int(a.get("size", 0)),
|
||||
)
|
||||
)
|
||||
return assets
|
||||
return get_release_assets_from_release_data(payload)
|
||||
|
||||
|
||||
def pick_asset(
|
||||
@@ -155,7 +173,8 @@ def download_latest_aircraft_adsb_csv(
|
||||
repo: str = REPO,
|
||||
) -> Path:
|
||||
"""
|
||||
Download the latest openairframes_adsb_*.csv file from the latest GitHub release.
|
||||
Download the latest openairframes_adsb_*.csv file from GitHub releases.
|
||||
If the latest release doesn't have the file, searches previous releases.
|
||||
|
||||
Args:
|
||||
output_dir: Directory to save the downloaded file (default: "downloads")
|
||||
@@ -166,11 +185,25 @@ def download_latest_aircraft_adsb_csv(
|
||||
Path to the downloaded file
|
||||
"""
|
||||
output_dir = Path(output_dir)
|
||||
assets = get_latest_release_assets(repo, github_token=github_token)
|
||||
asset = pick_asset(assets, name_regex=r"^openairframes_adsb_.*\.csv(\.gz)?$")
|
||||
saved_to = download_asset(asset, output_dir / asset.name, github_token=github_token)
|
||||
print(f"Downloaded: {asset.name} ({asset.size} bytes) -> {saved_to}")
|
||||
return saved_to
|
||||
|
||||
# Get multiple releases
|
||||
releases = get_releases(repo, github_token=github_token, per_page=30)
|
||||
|
||||
# Try each release until we find one with the matching asset
|
||||
for release in releases:
|
||||
assets = get_release_assets_from_release_data(release)
|
||||
try:
|
||||
asset = pick_asset(assets, name_regex=r"^openairframes_adsb_.*\.csv(\.gz)?$")
|
||||
saved_to = download_asset(asset, output_dir / asset.name, github_token=github_token)
|
||||
print(f"Downloaded: {asset.name} ({asset.size} bytes) -> {saved_to}")
|
||||
return saved_to
|
||||
except FileNotFoundError:
|
||||
# This release doesn't have the matching asset, try the next one
|
||||
continue
|
||||
|
||||
raise FileNotFoundError(
|
||||
f"No release in the last 30 releases has an asset matching 'openairframes_adsb_.*\\.csv(\\.gz)?$'"
|
||||
)
|
||||
|
||||
import polars as pl
|
||||
def get_latest_aircraft_adsb_csv_df():
|
||||
@@ -212,3 +245,4 @@ def get_latest_aircraft_adsb_csv_df():
|
||||
|
||||
if __name__ == "__main__":
|
||||
download_latest_aircraft_csv()
|
||||
download_latest_aircraft_adsb_csv()
|
||||
Reference in New Issue
Block a user