Compare commits

...

7 Commits

Author SHA1 Message Date
JG 2bb0a5eac3 Merge pull request #33 from PlaneQuery/develop
Develop to Main: Handle ADSB when ADSB.lol has not released any data for day. Just rerelease latest adsb
2026-02-26 15:32:59 -05:00
ggman12 b54f33aa56 Handle ADSB when ADSB.lol has not released any data for day. Just rerelease latest adsb 2026-02-26 15:31:47 -05:00
JG 2dda3d341c Merge pull request #32 from PlaneQuery/develop
Develop to Main: Fix Community Submission export. Fix CSV concatenation logic to prevent duplicates when there is no new ADSB.lol data.
2026-02-24 15:37:54 -05:00
ggman12 b0526f0a95 Fix Community Submission export. Fix CSV concatenation logic to prevent duplicates when there is no new ADSB.lol data. 2026-02-24 15:36:10 -05:00
JG 4b6a043a9d Merge pull request #31 from PlaneQuery/develop
Develop to Main Fix adsb asset retrival to be more fault tolerant. Fix download issue
2026-02-24 02:17:08 -05:00
ggman12 55c464aad7 Fix adsb asset retrival to be more fault tolerant. Fix download issue for 2024-07-03 2026-02-24 02:12:55 -05:00
ggman12 aa509e8560 attempt to fix download issue for 2024-07-03 2026-02-19 17:51:49 -05:00
7 changed files with 212 additions and 61 deletions
@@ -49,11 +49,38 @@ jobs:
python -m src.adsb.download_and_list_icaos --date "$DATE" python -m src.adsb.download_and_list_icaos --date "$DATE"
ls -lah data/output/adsb_archives/"$DATE" || true ls -lah data/output/adsb_archives/"$DATE" || true
- name: Upload archives - name: Upload archive part 0
uses: actions/upload-artifact@v4 uses: actions/upload-artifact@v4
with: with:
name: adsb-archives-${{ inputs.date }} name: adsb-archive-${{ inputs.date }}-part-0
path: data/output/adsb_archives/${{ inputs.date }} path: data/output/adsb_archives/${{ inputs.date }}/${{ inputs.date }}_part_0.tar.gz
retention-days: 1
compression-level: 0
if-no-files-found: error
- name: Upload archive part 1
uses: actions/upload-artifact@v4
with:
name: adsb-archive-${{ inputs.date }}-part-1
path: data/output/adsb_archives/${{ inputs.date }}/${{ inputs.date }}_part_1.tar.gz
retention-days: 1
compression-level: 0
if-no-files-found: error
- name: Upload archive part 2
uses: actions/upload-artifact@v4
with:
name: adsb-archive-${{ inputs.date }}-part-2
path: data/output/adsb_archives/${{ inputs.date }}/${{ inputs.date }}_part_2.tar.gz
retention-days: 1
compression-level: 0
if-no-files-found: error
- name: Upload archive part 3
uses: actions/upload-artifact@v4
with:
name: adsb-archive-${{ inputs.date }}-part-3
path: data/output/adsb_archives/${{ inputs.date }}/${{ inputs.date }}_part_3.tar.gz
retention-days: 1 retention-days: 1
compression-level: 0 compression-level: 0
if-no-files-found: error if-no-files-found: error
@@ -79,12 +106,22 @@ jobs:
python -m pip install --upgrade pip python -m pip install --upgrade pip
pip install -r requirements.txt pip install -r requirements.txt
- name: Download archives - name: Download archive part
uses: actions/download-artifact@v4 uses: actions/download-artifact@v4
with: with:
name: adsb-archives-${{ inputs.date }} name: adsb-archive-${{ inputs.date }}-part-${{ matrix.part_id }}
path: data/output/adsb_archives/${{ inputs.date }} path: data/output/adsb_archives/${{ inputs.date }}
- name: Verify archive
run: |
FILE="data/output/adsb_archives/${{ inputs.date }}/${{ inputs.date }}_part_${{ matrix.part_id }}.tar.gz"
ls -lah data/output/adsb_archives/${{ inputs.date }}/
if [ ! -f "$FILE" ]; then
echo "::error::Archive not found: $FILE"
exit 1
fi
echo "Verified: $(du -h "$FILE")"
- name: Process part - name: Process part
env: env:
DATE: ${{ inputs.date }} DATE: ${{ inputs.date }}
@@ -140,6 +177,6 @@ jobs:
uses: actions/upload-artifact@v4 uses: actions/upload-artifact@v4
with: with:
name: openairframes_adsb-${{ inputs.date }} name: openairframes_adsb-${{ inputs.date }}
path: data/output/openairframes_adsb_${{ inputs.date }}* path: data/output/openairframes_adsb_*
retention-days: 30 retention-days: 30
if-no-files-found: error if-no-files-found: error
@@ -101,6 +101,51 @@ jobs:
date: ${{ needs.resolve-dates.outputs.adsb_date }} date: ${{ needs.resolve-dates.outputs.adsb_date }}
concat_with_latest_csv: true concat_with_latest_csv: true
adsb-reduce:
needs: [resolve-dates, adsb-to-aircraft]
if: always() && github.event_name != 'schedule' && needs.adsb-to-aircraft.result == 'failure'
runs-on: ubuntu-24.04-arm
steps:
- name: Checkout
uses: actions/checkout@v6
- name: Setup Python
uses: actions/setup-python@v6
with:
python-version: '3.12'
- name: Install dependencies
run: |
python -m pip install --upgrade pip
pip install -r requirements.txt
- name: Download compressed outputs
uses: actions/download-artifact@v4
with:
pattern: adsb-compressed-${{ needs.resolve-dates.outputs.adsb_date }}-part-*
path: data/output/compressed/${{ needs.resolve-dates.outputs.adsb_date }}
merge-multiple: true
- name: Concatenate final outputs
env:
DATE: ${{ needs.resolve-dates.outputs.adsb_date }}
CONCAT_WITH_LATEST_CSV: true
run: |
EXTRA=""
if [ "$CONCAT_WITH_LATEST_CSV" = "true" ]; then
EXTRA="--concat_with_latest_csv"
fi
python -m src.adsb.concat_parquet_to_final --date "$DATE" $EXTRA
ls -lah data/output/ || true
- name: Upload final artifacts
uses: actions/upload-artifact@v4
with:
name: openairframes_adsb-${{ needs.resolve-dates.outputs.adsb_date }}
path: data/output/openairframes_adsb_*
retention-days: 30
if-no-files-found: error
build-community: build-community:
runs-on: ubuntu-latest runs-on: ubuntu-latest
if: github.event_name != 'schedule' if: github.event_name != 'schedule'
@@ -188,13 +233,13 @@ jobs:
create-release: create-release:
runs-on: ubuntu-latest runs-on: ubuntu-latest
needs: [resolve-dates, build-faa, adsb-to-aircraft, build-community, build-adsbexchange-json, build-mictronics-db] needs: [resolve-dates, build-faa, adsb-to-aircraft, adsb-reduce, build-community, build-adsbexchange-json, build-mictronics-db]
if: github.event_name != 'schedule' && !cancelled() if: github.event_name != 'schedule' && !cancelled()
steps: steps:
- name: Check adsb-to-aircraft status - name: Check ADS-B workflow status
if: needs.adsb-to-aircraft.result != 'success' if: needs.adsb-to-aircraft.result != 'success' && needs.adsb-reduce.result != 'success'
run: | run: |
echo "WARNING: adsb-to-aircraft result was '${{ needs.adsb-to-aircraft.result }}', will continue without ADS-B artifacts" echo "WARNING: ADS-B workflow failed (adsb-to-aircraft='${{ needs.adsb-to-aircraft.result }}', adsb-reduce='${{ needs.adsb-reduce.result }}'), will continue without ADS-B artifacts"
- name: Checkout for gh CLI - name: Checkout for gh CLI
uses: actions/checkout@v4 uses: actions/checkout@v4
@@ -211,7 +256,7 @@ jobs:
- name: Download ADS-B artifacts - name: Download ADS-B artifacts
uses: actions/download-artifact@v5 uses: actions/download-artifact@v5
if: needs.adsb-to-aircraft.result == 'success' if: needs.adsb-to-aircraft.result == 'success' || needs.adsb-reduce.result == 'success'
continue-on-error: true continue-on-error: true
with: with:
name: openairframes_adsb-${{ needs.resolve-dates.outputs.adsb_date }} name: openairframes_adsb-${{ needs.resolve-dates.outputs.adsb_date }}
@@ -266,7 +311,11 @@ jobs:
# Find files from artifacts using find (handles nested structures) # Find files from artifacts using find (handles nested structures)
CSV_FILE_FAA=$(find artifacts/faa -name "openairframes_faa_*.csv" -type f 2>/dev/null | head -1) CSV_FILE_FAA=$(find artifacts/faa -name "openairframes_faa_*.csv" -type f 2>/dev/null | head -1)
CSV_FILE_ADSB=$(find artifacts/adsb -name "openairframes_adsb_*.csv.gz" -type f 2>/dev/null | head -1) # Prefer concatenated file (with date range) over single-day file
CSV_FILE_ADSB=$(find artifacts/adsb -name "openairframes_adsb_*_*.csv.gz" -type f 2>/dev/null | head -1)
if [ -z "$CSV_FILE_ADSB" ]; then
CSV_FILE_ADSB=$(find artifacts/adsb -name "openairframes_adsb_*.csv.gz" -type f 2>/dev/null | head -1)
fi
CSV_FILE_COMMUNITY=$(find artifacts/community -name "openairframes_community_*.csv" -type f 2>/dev/null | head -1) CSV_FILE_COMMUNITY=$(find artifacts/community -name "openairframes_community_*.csv" -type f 2>/dev/null | head -1)
ZIP_FILE=$(find artifacts/faa -name "ReleasableAircraft_*.zip" -type f 2>/dev/null | head -1) ZIP_FILE=$(find artifacts/faa -name "ReleasableAircraft_*.zip" -type f 2>/dev/null | head -1)
JSON_FILE_ADSBX=$(find artifacts/adsbexchange -name "basic-ac-db_*.json.gz" -type f 2>/dev/null | head -1) JSON_FILE_ADSBX=$(find artifacts/adsbexchange -name "basic-ac-db_*.json.gz" -type f 2>/dev/null | head -1)
+1 -1
View File
@@ -194,7 +194,7 @@ def main():
if triggered_runs and not args.dry_run: if triggered_runs and not args.dry_run:
import json import json
timestamp = datetime.now().strftime('%Y%m%d_%H%M%S') timestamp = datetime.now().strftime('%Y%m%d_%H%M%S')
runs_file = f"./triggered_runs_{timestamp}.json" runs_file = f"./output/triggered_runs_{timestamp}.json"
with open(runs_file, 'w') as f: with open(runs_file, 'w') as f:
json.dump({ json.dump({
'start_date': args.start_date, 'start_date': args.start_date,
+41 -24
View File
@@ -1,7 +1,7 @@
from pathlib import Path from pathlib import Path
import polars as pl import polars as pl
import argparse import argparse
import os
OUTPUT_DIR = Path("./data/output") OUTPUT_DIR = Path("./data/output")
CORRECT_ORDER_OF_COLUMNS = ["time", "icao", "r", "t", "dbFlags", "ownOp", "year", "desc", "aircraft_category"] CORRECT_ORDER_OF_COLUMNS = ["time", "icao", "r", "t", "dbFlags", "ownOp", "year", "desc", "aircraft_category"]
@@ -13,38 +13,55 @@ def main():
compressed_dir = OUTPUT_DIR / "compressed" compressed_dir = OUTPUT_DIR / "compressed"
date_dir = compressed_dir / args.date date_dir = compressed_dir / args.date
if not date_dir.is_dir():
raise FileNotFoundError(f"No date folder found: {date_dir}")
parquet_files = sorted(date_dir.glob("*.parquet")) parquet_files = sorted(date_dir.glob("*.parquet"))
if not parquet_files: df = None
raise FileNotFoundError(f"No parquet files found in {date_dir}") if parquet_files: # TODO: This logic could be updated slightly.
print(f"No parquet files found in {date_dir}")
frames = [pl.read_parquet(p) for p in parquet_files] frames = [pl.read_parquet(p) for p in parquet_files]
df = pl.concat(frames, how="vertical", rechunk=True) df = pl.concat(frames, how="vertical", rechunk=True)
df = df.sort(["time", "icao"]) df = df.sort(["time", "icao"])
df = df.select(CORRECT_ORDER_OF_COLUMNS) df = df.select(CORRECT_ORDER_OF_COLUMNS)
output_path = OUTPUT_DIR / f"openairframes_adsb_{args.date}.parquet" output_path = OUTPUT_DIR / f"openairframes_adsb_{args.date}.parquet"
print(f"Writing combined parquet to {output_path} with {df.height} rows") print(f"Writing combined parquet to {output_path} with {df.height} rows")
df.write_parquet(output_path) df.write_parquet(output_path)
csv_output_path = OUTPUT_DIR / f"openairframes_adsb_{args.date}.csv.gz" csv_output_path = OUTPUT_DIR / f"openairframes_adsb_{args.date}.csv.gz"
print(f"Writing combined csv.gz to {csv_output_path} with {df.height} rows") print(f"Writing combined csv.gz to {csv_output_path} with {df.height} rows")
df.write_csv(csv_output_path, compression="gzip") df.write_csv(csv_output_path, compression="gzip")
if args.concat_with_latest_csv: if args.concat_with_latest_csv:
print("Loading latest CSV from GitHub releases to concatenate with...") print("Loading latest CSV from GitHub releases to concatenate with...")
from src.get_latest_release import get_latest_aircraft_adsb_csv_df from src.get_latest_release import get_latest_aircraft_adsb_csv_df
df_latest_csv, csv_date = get_latest_aircraft_adsb_csv_df() from datetime import datetime
# Ensure column order matches before concatenating
df_latest_csv = df_latest_csv.select(CORRECT_ORDER_OF_COLUMNS) df_latest_csv, csv_start_date, csv_end_date = get_latest_aircraft_adsb_csv_df()
from src.adsb.compress_adsb_to_aircraft_data import concat_compressed_dfs
df_final = concat_compressed_dfs(df_latest_csv, df) # Compare dates: end_date is exclusive, so if csv_end_date > args.date,
df_final = df_final.select(CORRECT_ORDER_OF_COLUMNS) # the latest CSV already includes this day's data
final_csv_output_path = OUTPUT_DIR / f"openairframes_adsb_{csv_date}_{args.date}.csv.gz" csv_end_dt = datetime.strptime(csv_end_date, "%Y-%m-%d")
df_final.write_csv(final_csv_output_path, compression="gzip") args_dt = datetime.strptime(args.date, "%Y-%m-%d")
if df is None or csv_end_dt >= args_dt:
print(f"Latest CSV already includes data through {args.date} (end_date={csv_end_date} is exclusive)")
print("Writing latest CSV directly without concatenation to avoid duplicates")
os.makedirs(OUTPUT_DIR, exist_ok=True)
final_csv_output_path = OUTPUT_DIR / f"openairframes_adsb_{csv_start_date}_{csv_end_date}.csv.gz"
df_latest_csv = df_latest_csv.select(CORRECT_ORDER_OF_COLUMNS)
df_latest_csv.write_csv(final_csv_output_path, compression="gzip")
else:
print(f"Concatenating latest CSV (through {csv_end_date}) with new data ({args.date})")
# Ensure column order matches before concatenating
df_latest_csv = df_latest_csv.select(CORRECT_ORDER_OF_COLUMNS)
from src.adsb.compress_adsb_to_aircraft_data import concat_compressed_dfs
df_final = concat_compressed_dfs(df_latest_csv, df)
df_final = df_final.select(CORRECT_ORDER_OF_COLUMNS)
final_csv_output_path = OUTPUT_DIR / f"openairframes_adsb_{csv_start_date}_{args.date}.csv.gz"
df_final.write_csv(final_csv_output_path, compression="gzip")
print(f"Final CSV written to {final_csv_output_path}")
if __name__ == "__main__": if __name__ == "__main__":
main() main()
+10 -1
View File
@@ -123,7 +123,16 @@ def main():
print(f"Processing part {args.part_id} for {args.date}") print(f"Processing part {args.part_id} for {args.date}")
# Get specific archive file for this part # Get specific archive file for this part
archive_path = os.path.join(OUTPUT_DIR, "adsb_archives", args.date, f"{args.date}_part_{args.part_id}.tar.gz") archive_dir = os.path.join(OUTPUT_DIR, "adsb_archives", args.date)
archive_path = os.path.join(archive_dir, f"{args.date}_part_{args.part_id}.tar.gz")
if not os.path.isfile(archive_path):
print(f"ERROR: Archive not found: {archive_path}")
if os.path.isdir(archive_dir):
print(f"Files in {archive_dir}: {os.listdir(archive_dir)}")
else:
print(f"Directory does not exist: {archive_dir}")
sys.exit(1)
# Extract and collect trace files # Extract and collect trace files
trace_map = build_trace_file_map(archive_path) trace_map = build_trace_file_map(archive_path)
@@ -24,7 +24,7 @@ def read_all_submissions(community_dir: Path) -> list[dict]:
"""Read all JSON submissions from the community directory.""" """Read all JSON submissions from the community directory."""
all_submissions = [] all_submissions = []
for json_file in sorted(community_dir.glob("*.json")): for json_file in sorted(community_dir.glob("**/*.json")):
try: try:
with open(json_file) as f: with open(json_file) as f:
data = json.load(f) data = json.load(f)
+61 -22
View File
@@ -27,6 +27,33 @@ def _http_get_json(url: str, headers: dict[str, str]) -> dict:
return json.loads(data.decode("utf-8")) return json.loads(data.decode("utf-8"))
def get_releases(repo: str = REPO, github_token: Optional[str] = None, per_page: int = 30) -> list[dict]:
"""Get a list of releases from the repository."""
url = f"https://api.github.com/repos/{repo}/releases?per_page={per_page}"
headers = {
"Accept": "application/vnd.github+json",
"User-Agent": "openairframes-downloader/1.0",
}
if github_token:
headers["Authorization"] = f"Bearer {github_token}"
return _http_get_json(url, headers=headers)
def get_release_assets_from_release_data(release_data: dict) -> list[ReleaseAsset]:
"""Extract assets from a release data dictionary."""
assets = []
for a in release_data.get("assets", []):
assets.append(
ReleaseAsset(
name=a["name"],
download_url=a["browser_download_url"],
size=int(a.get("size", 0)),
)
)
return assets
def get_latest_release_assets(repo: str = REPO, github_token: Optional[str] = None) -> list[ReleaseAsset]: def get_latest_release_assets(repo: str = REPO, github_token: Optional[str] = None) -> list[ReleaseAsset]:
url = f"https://api.github.com/repos/{repo}/releases/latest" url = f"https://api.github.com/repos/{repo}/releases/latest"
headers = { headers = {
@@ -37,16 +64,7 @@ def get_latest_release_assets(repo: str = REPO, github_token: Optional[str] = No
headers["Authorization"] = f"Bearer {github_token}" headers["Authorization"] = f"Bearer {github_token}"
payload = _http_get_json(url, headers=headers) payload = _http_get_json(url, headers=headers)
assets = [] return get_release_assets_from_release_data(payload)
for a in payload.get("assets", []):
assets.append(
ReleaseAsset(
name=a["name"],
download_url=a["browser_download_url"],
size=int(a.get("size", 0)),
)
)
return assets
def pick_asset( def pick_asset(
@@ -155,7 +173,8 @@ def download_latest_aircraft_adsb_csv(
repo: str = REPO, repo: str = REPO,
) -> Path: ) -> Path:
""" """
Download the latest openairframes_adsb_*.csv file from the latest GitHub release. Download the latest openairframes_adsb_*.csv file from GitHub releases.
If the latest release doesn't have the file, searches previous releases.
Args: Args:
output_dir: Directory to save the downloaded file (default: "downloads") output_dir: Directory to save the downloaded file (default: "downloads")
@@ -166,15 +185,33 @@ def download_latest_aircraft_adsb_csv(
Path to the downloaded file Path to the downloaded file
""" """
output_dir = Path(output_dir) output_dir = Path(output_dir)
assets = get_latest_release_assets(repo, github_token=github_token)
asset = pick_asset(assets, name_regex=r"^openairframes_adsb_.*\.csv(\.gz)?$") # Get multiple releases
saved_to = download_asset(asset, output_dir / asset.name, github_token=github_token) releases = get_releases(repo, github_token=github_token, per_page=30)
print(f"Downloaded: {asset.name} ({asset.size} bytes) -> {saved_to}")
return saved_to # Try each release until we find one with the matching asset
for release in releases:
assets = get_release_assets_from_release_data(release)
try:
asset = pick_asset(assets, name_regex=r"^openairframes_adsb_.*\.csv(\.gz)?$")
saved_to = download_asset(asset, output_dir / asset.name, github_token=github_token)
print(f"Downloaded: {asset.name} ({asset.size} bytes) -> {saved_to}")
return saved_to
except FileNotFoundError:
# This release doesn't have the matching asset, try the next one
continue
raise FileNotFoundError(
f"No release in the last 30 releases has an asset matching 'openairframes_adsb_.*\\.csv(\\.gz)?$'"
)
import polars as pl import polars as pl
def get_latest_aircraft_adsb_csv_df(): def get_latest_aircraft_adsb_csv_df():
"""Download and load the latest ADS-B CSV from GitHub releases.""" """Download and load the latest ADS-B CSV from GitHub releases.
Returns:
tuple: (df, start_date, end_date) where dates are in YYYY-MM-DD format
"""
import re import re
csv_path = download_latest_aircraft_adsb_csv() csv_path = download_latest_aircraft_adsb_csv()
@@ -198,17 +235,19 @@ def get_latest_aircraft_adsb_csv_df():
if df[col].dtype == pl.Utf8: if df[col].dtype == pl.Utf8:
df = df.with_columns(pl.col(col).fill_null("")) df = df.with_columns(pl.col(col).fill_null(""))
# Extract start date from filename pattern: openairframes_adsb_{start_date}_{end_date}.csv[.gz] # Extract start and end dates from filename pattern: openairframes_adsb_{start_date}_{end_date}.csv[.gz]
match = re.search(r"openairframes_adsb_(\d{4}-\d{2}-\d{2})_", str(csv_path)) match = re.search(r"openairframes_adsb_(\d{4}-\d{2}-\d{2})_(\d{4}-\d{2}-\d{2})\.csv", str(csv_path))
if not match: if not match:
raise ValueError(f"Could not extract date from filename: {csv_path.name}") raise ValueError(f"Could not extract dates from filename: {csv_path.name}")
date_str = match.group(1) start_date = match.group(1)
end_date = match.group(2)
print(df.columns) print(df.columns)
print(df.dtypes) print(df.dtypes)
return df, date_str return df, start_date, end_date
if __name__ == "__main__": if __name__ == "__main__":
download_latest_aircraft_csv() download_latest_aircraft_csv()
download_latest_aircraft_adsb_csv()