mirror of
https://github.com/PlaneQuery/OpenAirframes.git
synced 2026-05-03 16:25:08 +02:00
Compare commits
11 Commits
| Author | SHA1 | Date | |
|---|---|---|---|
| 2bb0a5eac3 | |||
| b54f33aa56 | |||
| 2dda3d341c | |||
| b0526f0a95 | |||
| 4b6a043a9d | |||
| 55c464aad7 | |||
| aa509e8560 | |||
| 82d11d8d24 | |||
| 76a217ad14 | |||
| ec2d1a1291 | |||
| 97284c69a9 |
@@ -49,11 +49,38 @@ jobs:
|
||||
python -m src.adsb.download_and_list_icaos --date "$DATE"
|
||||
ls -lah data/output/adsb_archives/"$DATE" || true
|
||||
|
||||
- name: Upload archives
|
||||
- name: Upload archive part 0
|
||||
uses: actions/upload-artifact@v4
|
||||
with:
|
||||
name: adsb-archives-${{ inputs.date }}
|
||||
path: data/output/adsb_archives/${{ inputs.date }}
|
||||
name: adsb-archive-${{ inputs.date }}-part-0
|
||||
path: data/output/adsb_archives/${{ inputs.date }}/${{ inputs.date }}_part_0.tar.gz
|
||||
retention-days: 1
|
||||
compression-level: 0
|
||||
if-no-files-found: error
|
||||
|
||||
- name: Upload archive part 1
|
||||
uses: actions/upload-artifact@v4
|
||||
with:
|
||||
name: adsb-archive-${{ inputs.date }}-part-1
|
||||
path: data/output/adsb_archives/${{ inputs.date }}/${{ inputs.date }}_part_1.tar.gz
|
||||
retention-days: 1
|
||||
compression-level: 0
|
||||
if-no-files-found: error
|
||||
|
||||
- name: Upload archive part 2
|
||||
uses: actions/upload-artifact@v4
|
||||
with:
|
||||
name: adsb-archive-${{ inputs.date }}-part-2
|
||||
path: data/output/adsb_archives/${{ inputs.date }}/${{ inputs.date }}_part_2.tar.gz
|
||||
retention-days: 1
|
||||
compression-level: 0
|
||||
if-no-files-found: error
|
||||
|
||||
- name: Upload archive part 3
|
||||
uses: actions/upload-artifact@v4
|
||||
with:
|
||||
name: adsb-archive-${{ inputs.date }}-part-3
|
||||
path: data/output/adsb_archives/${{ inputs.date }}/${{ inputs.date }}_part_3.tar.gz
|
||||
retention-days: 1
|
||||
compression-level: 0
|
||||
if-no-files-found: error
|
||||
@@ -79,12 +106,22 @@ jobs:
|
||||
python -m pip install --upgrade pip
|
||||
pip install -r requirements.txt
|
||||
|
||||
- name: Download archives
|
||||
- name: Download archive part
|
||||
uses: actions/download-artifact@v4
|
||||
with:
|
||||
name: adsb-archives-${{ inputs.date }}
|
||||
name: adsb-archive-${{ inputs.date }}-part-${{ matrix.part_id }}
|
||||
path: data/output/adsb_archives/${{ inputs.date }}
|
||||
|
||||
- name: Verify archive
|
||||
run: |
|
||||
FILE="data/output/adsb_archives/${{ inputs.date }}/${{ inputs.date }}_part_${{ matrix.part_id }}.tar.gz"
|
||||
ls -lah data/output/adsb_archives/${{ inputs.date }}/
|
||||
if [ ! -f "$FILE" ]; then
|
||||
echo "::error::Archive not found: $FILE"
|
||||
exit 1
|
||||
fi
|
||||
echo "Verified: $(du -h "$FILE")"
|
||||
|
||||
- name: Process part
|
||||
env:
|
||||
DATE: ${{ inputs.date }}
|
||||
@@ -140,6 +177,6 @@ jobs:
|
||||
uses: actions/upload-artifact@v4
|
||||
with:
|
||||
name: openairframes_adsb-${{ inputs.date }}
|
||||
path: data/output/openairframes_adsb_${{ inputs.date }}*
|
||||
path: data/output/openairframes_adsb_*
|
||||
retention-days: 30
|
||||
if-no-files-found: error
|
||||
|
||||
@@ -101,6 +101,51 @@ jobs:
|
||||
date: ${{ needs.resolve-dates.outputs.adsb_date }}
|
||||
concat_with_latest_csv: true
|
||||
|
||||
adsb-reduce:
|
||||
needs: [resolve-dates, adsb-to-aircraft]
|
||||
if: always() && github.event_name != 'schedule' && needs.adsb-to-aircraft.result == 'failure'
|
||||
runs-on: ubuntu-24.04-arm
|
||||
steps:
|
||||
- name: Checkout
|
||||
uses: actions/checkout@v6
|
||||
|
||||
- name: Setup Python
|
||||
uses: actions/setup-python@v6
|
||||
with:
|
||||
python-version: '3.12'
|
||||
|
||||
- name: Install dependencies
|
||||
run: |
|
||||
python -m pip install --upgrade pip
|
||||
pip install -r requirements.txt
|
||||
|
||||
- name: Download compressed outputs
|
||||
uses: actions/download-artifact@v4
|
||||
with:
|
||||
pattern: adsb-compressed-${{ needs.resolve-dates.outputs.adsb_date }}-part-*
|
||||
path: data/output/compressed/${{ needs.resolve-dates.outputs.adsb_date }}
|
||||
merge-multiple: true
|
||||
|
||||
- name: Concatenate final outputs
|
||||
env:
|
||||
DATE: ${{ needs.resolve-dates.outputs.adsb_date }}
|
||||
CONCAT_WITH_LATEST_CSV: true
|
||||
run: |
|
||||
EXTRA=""
|
||||
if [ "$CONCAT_WITH_LATEST_CSV" = "true" ]; then
|
||||
EXTRA="--concat_with_latest_csv"
|
||||
fi
|
||||
python -m src.adsb.concat_parquet_to_final --date "$DATE" $EXTRA
|
||||
ls -lah data/output/ || true
|
||||
|
||||
- name: Upload final artifacts
|
||||
uses: actions/upload-artifact@v4
|
||||
with:
|
||||
name: openairframes_adsb-${{ needs.resolve-dates.outputs.adsb_date }}
|
||||
path: data/output/openairframes_adsb_*
|
||||
retention-days: 30
|
||||
if-no-files-found: error
|
||||
|
||||
build-community:
|
||||
runs-on: ubuntu-latest
|
||||
if: github.event_name != 'schedule'
|
||||
@@ -188,13 +233,13 @@ jobs:
|
||||
|
||||
create-release:
|
||||
runs-on: ubuntu-latest
|
||||
needs: [resolve-dates, build-faa, adsb-to-aircraft, build-community, build-adsbexchange-json, build-mictronics-db]
|
||||
needs: [resolve-dates, build-faa, adsb-to-aircraft, adsb-reduce, build-community, build-adsbexchange-json, build-mictronics-db]
|
||||
if: github.event_name != 'schedule' && !cancelled()
|
||||
steps:
|
||||
- name: Check adsb-to-aircraft status
|
||||
if: needs.adsb-to-aircraft.result != 'success'
|
||||
- name: Check ADS-B workflow status
|
||||
if: needs.adsb-to-aircraft.result != 'success' && needs.adsb-reduce.result != 'success'
|
||||
run: |
|
||||
echo "WARNING: adsb-to-aircraft result was '${{ needs.adsb-to-aircraft.result }}', will continue without ADS-B artifacts"
|
||||
echo "WARNING: ADS-B workflow failed (adsb-to-aircraft='${{ needs.adsb-to-aircraft.result }}', adsb-reduce='${{ needs.adsb-reduce.result }}'), will continue without ADS-B artifacts"
|
||||
|
||||
- name: Checkout for gh CLI
|
||||
uses: actions/checkout@v4
|
||||
@@ -211,7 +256,7 @@ jobs:
|
||||
|
||||
- name: Download ADS-B artifacts
|
||||
uses: actions/download-artifact@v5
|
||||
if: needs.adsb-to-aircraft.result == 'success'
|
||||
if: needs.adsb-to-aircraft.result == 'success' || needs.adsb-reduce.result == 'success'
|
||||
continue-on-error: true
|
||||
with:
|
||||
name: openairframes_adsb-${{ needs.resolve-dates.outputs.adsb_date }}
|
||||
@@ -266,7 +311,11 @@ jobs:
|
||||
|
||||
# Find files from artifacts using find (handles nested structures)
|
||||
CSV_FILE_FAA=$(find artifacts/faa -name "openairframes_faa_*.csv" -type f 2>/dev/null | head -1)
|
||||
CSV_FILE_ADSB=$(find artifacts/adsb -name "openairframes_adsb_*.csv.gz" -type f 2>/dev/null | head -1)
|
||||
# Prefer concatenated file (with date range) over single-day file
|
||||
CSV_FILE_ADSB=$(find artifacts/adsb -name "openairframes_adsb_*_*.csv.gz" -type f 2>/dev/null | head -1)
|
||||
if [ -z "$CSV_FILE_ADSB" ]; then
|
||||
CSV_FILE_ADSB=$(find artifacts/adsb -name "openairframes_adsb_*.csv.gz" -type f 2>/dev/null | head -1)
|
||||
fi
|
||||
CSV_FILE_COMMUNITY=$(find artifacts/community -name "openairframes_community_*.csv" -type f 2>/dev/null | head -1)
|
||||
ZIP_FILE=$(find artifacts/faa -name "ReleasableAircraft_*.zip" -type f 2>/dev/null | head -1)
|
||||
JSON_FILE_ADSBX=$(find artifacts/adsbexchange -name "basic-ac-db_*.json.gz" -type f 2>/dev/null | head -1)
|
||||
|
||||
@@ -23,6 +23,12 @@ gh run list \
|
||||
"repos/$REPO/actions/runs/$run_id/artifacts" \
|
||||
--jq '.artifacts[] | select(.name | test("^openairframes_adsb-[0-9]{4}-[0-9]{2}-[0-9]{2}-[0-9]{4}-[0-9]{2}-[0-9]{2}$")) | .name' | while read -r artifact_name; do
|
||||
|
||||
# Check if artifact directory already exists and has files
|
||||
if [ -d "downloads/adsb_artifacts/$artifact_name" ] && [ -n "$(ls -A "downloads/adsb_artifacts/$artifact_name" 2>/dev/null)" ]; then
|
||||
echo " Skipping (already exists): $artifact_name"
|
||||
continue
|
||||
fi
|
||||
|
||||
echo " Downloading: $artifact_name"
|
||||
gh run download "$run_id" \
|
||||
--repo "$REPO" \
|
||||
|
||||
@@ -194,7 +194,7 @@ def main():
|
||||
if triggered_runs and not args.dry_run:
|
||||
import json
|
||||
timestamp = datetime.now().strftime('%Y%m%d_%H%M%S')
|
||||
runs_file = f"./triggered_runs_{timestamp}.json"
|
||||
runs_file = f"./output/triggered_runs_{timestamp}.json"
|
||||
with open(runs_file, 'w') as f:
|
||||
json.dump({
|
||||
'start_date': args.start_date,
|
||||
|
||||
@@ -1,7 +1,7 @@
|
||||
from pathlib import Path
|
||||
import polars as pl
|
||||
import argparse
|
||||
|
||||
import os
|
||||
OUTPUT_DIR = Path("./data/output")
|
||||
CORRECT_ORDER_OF_COLUMNS = ["time", "icao", "r", "t", "dbFlags", "ownOp", "year", "desc", "aircraft_category"]
|
||||
|
||||
@@ -13,38 +13,55 @@ def main():
|
||||
|
||||
compressed_dir = OUTPUT_DIR / "compressed"
|
||||
date_dir = compressed_dir / args.date
|
||||
if not date_dir.is_dir():
|
||||
raise FileNotFoundError(f"No date folder found: {date_dir}")
|
||||
|
||||
parquet_files = sorted(date_dir.glob("*.parquet"))
|
||||
if not parquet_files:
|
||||
raise FileNotFoundError(f"No parquet files found in {date_dir}")
|
||||
df = None
|
||||
if parquet_files: # TODO: This logic could be updated slightly.
|
||||
print(f"No parquet files found in {date_dir}")
|
||||
|
||||
frames = [pl.read_parquet(p) for p in parquet_files]
|
||||
df = pl.concat(frames, how="vertical", rechunk=True)
|
||||
frames = [pl.read_parquet(p) for p in parquet_files]
|
||||
df = pl.concat(frames, how="vertical", rechunk=True)
|
||||
|
||||
df = df.sort(["time", "icao"])
|
||||
df = df.select(CORRECT_ORDER_OF_COLUMNS)
|
||||
|
||||
output_path = OUTPUT_DIR / f"openairframes_adsb_{args.date}.parquet"
|
||||
print(f"Writing combined parquet to {output_path} with {df.height} rows")
|
||||
df.write_parquet(output_path)
|
||||
df = df.sort(["time", "icao"])
|
||||
df = df.select(CORRECT_ORDER_OF_COLUMNS)
|
||||
|
||||
output_path = OUTPUT_DIR / f"openairframes_adsb_{args.date}.parquet"
|
||||
print(f"Writing combined parquet to {output_path} with {df.height} rows")
|
||||
df.write_parquet(output_path)
|
||||
|
||||
csv_output_path = OUTPUT_DIR / f"openairframes_adsb_{args.date}.csv.gz"
|
||||
print(f"Writing combined csv.gz to {csv_output_path} with {df.height} rows")
|
||||
df.write_csv(csv_output_path, compression="gzip")
|
||||
csv_output_path = OUTPUT_DIR / f"openairframes_adsb_{args.date}.csv.gz"
|
||||
print(f"Writing combined csv.gz to {csv_output_path} with {df.height} rows")
|
||||
df.write_csv(csv_output_path, compression="gzip")
|
||||
|
||||
if args.concat_with_latest_csv:
|
||||
print("Loading latest CSV from GitHub releases to concatenate with...")
|
||||
from src.get_latest_release import get_latest_aircraft_adsb_csv_df
|
||||
df_latest_csv, csv_date = get_latest_aircraft_adsb_csv_df()
|
||||
# Ensure column order matches before concatenating
|
||||
df_latest_csv = df_latest_csv.select(CORRECT_ORDER_OF_COLUMNS)
|
||||
from src.adsb.compress_adsb_to_aircraft_data import concat_compressed_dfs
|
||||
df_final = concat_compressed_dfs(df_latest_csv, df)
|
||||
df_final = df_final.select(CORRECT_ORDER_OF_COLUMNS)
|
||||
final_csv_output_path = OUTPUT_DIR / f"openairframes_adsb_{csv_date}_{args.date}.csv.gz"
|
||||
df_final.write_csv(final_csv_output_path, compression="gzip")
|
||||
from datetime import datetime
|
||||
|
||||
df_latest_csv, csv_start_date, csv_end_date = get_latest_aircraft_adsb_csv_df()
|
||||
|
||||
# Compare dates: end_date is exclusive, so if csv_end_date > args.date,
|
||||
# the latest CSV already includes this day's data
|
||||
csv_end_dt = datetime.strptime(csv_end_date, "%Y-%m-%d")
|
||||
args_dt = datetime.strptime(args.date, "%Y-%m-%d")
|
||||
|
||||
if df is None or csv_end_dt >= args_dt:
|
||||
print(f"Latest CSV already includes data through {args.date} (end_date={csv_end_date} is exclusive)")
|
||||
print("Writing latest CSV directly without concatenation to avoid duplicates")
|
||||
os.makedirs(OUTPUT_DIR, exist_ok=True)
|
||||
final_csv_output_path = OUTPUT_DIR / f"openairframes_adsb_{csv_start_date}_{csv_end_date}.csv.gz"
|
||||
df_latest_csv = df_latest_csv.select(CORRECT_ORDER_OF_COLUMNS)
|
||||
df_latest_csv.write_csv(final_csv_output_path, compression="gzip")
|
||||
else:
|
||||
print(f"Concatenating latest CSV (through {csv_end_date}) with new data ({args.date})")
|
||||
# Ensure column order matches before concatenating
|
||||
df_latest_csv = df_latest_csv.select(CORRECT_ORDER_OF_COLUMNS)
|
||||
from src.adsb.compress_adsb_to_aircraft_data import concat_compressed_dfs
|
||||
df_final = concat_compressed_dfs(df_latest_csv, df)
|
||||
df_final = df_final.select(CORRECT_ORDER_OF_COLUMNS)
|
||||
final_csv_output_path = OUTPUT_DIR / f"openairframes_adsb_{csv_start_date}_{args.date}.csv.gz"
|
||||
df_final.write_csv(final_csv_output_path, compression="gzip")
|
||||
print(f"Final CSV written to {final_csv_output_path}")
|
||||
|
||||
if __name__ == "__main__":
|
||||
main()
|
||||
@@ -129,13 +129,32 @@ def fetch_releases(version_date: str) -> list:
|
||||
return releases
|
||||
|
||||
|
||||
def download_asset(asset_url: str, file_path: str) -> bool:
|
||||
"""Download a single release asset."""
|
||||
def download_asset(asset_url: str, file_path: str, expected_size: int | None = None) -> bool:
|
||||
"""Download a single release asset with size verification.
|
||||
|
||||
Args:
|
||||
asset_url: URL to download from
|
||||
file_path: Local path to save to
|
||||
expected_size: Expected file size in bytes (for verification)
|
||||
|
||||
Returns:
|
||||
True if download succeeded and size matches (if provided), False otherwise
|
||||
"""
|
||||
os.makedirs(os.path.dirname(file_path) or OUTPUT_DIR, exist_ok=True)
|
||||
|
||||
# Check if file exists and has correct size
|
||||
if os.path.exists(file_path):
|
||||
print(f"[SKIP] {file_path} already downloaded.")
|
||||
return True
|
||||
if expected_size is not None:
|
||||
actual_size = os.path.getsize(file_path)
|
||||
if actual_size == expected_size:
|
||||
print(f"[SKIP] {file_path} already downloaded and verified ({actual_size} bytes).")
|
||||
return True
|
||||
else:
|
||||
print(f"[WARN] {file_path} exists but size mismatch (expected {expected_size}, got {actual_size}). Re-downloading.")
|
||||
os.remove(file_path)
|
||||
else:
|
||||
print(f"[SKIP] {file_path} already downloaded.")
|
||||
return True
|
||||
|
||||
max_retries = 2
|
||||
retry_delay = 30
|
||||
@@ -153,7 +172,21 @@ def download_asset(asset_url: str, file_path: str) -> bool:
|
||||
if not chunk:
|
||||
break
|
||||
file.write(chunk)
|
||||
print(f"Saved {file_path}")
|
||||
|
||||
# Verify file size if expected_size was provided
|
||||
if expected_size is not None:
|
||||
actual_size = os.path.getsize(file_path)
|
||||
if actual_size != expected_size:
|
||||
print(f"[ERROR] Size mismatch for {file_path}: expected {expected_size} bytes, got {actual_size} bytes")
|
||||
os.remove(file_path)
|
||||
if attempt < max_retries:
|
||||
print(f"Waiting {retry_delay} seconds before retry")
|
||||
time.sleep(retry_delay)
|
||||
continue
|
||||
return False
|
||||
print(f"Saved {file_path} ({actual_size} bytes, verified)")
|
||||
else:
|
||||
print(f"Saved {file_path}")
|
||||
return True
|
||||
else:
|
||||
print(f"Failed to download {asset_url}: {response.status} {response.msg}")
|
||||
@@ -227,7 +260,6 @@ def extract_split_archive(file_paths: list, extract_dir: str) -> bool:
|
||||
stdin=cat_proc.stdout,
|
||||
stdout=subprocess.PIPE,
|
||||
stderr=subprocess.PIPE,
|
||||
check=True
|
||||
)
|
||||
cat_proc.stdout.close()
|
||||
cat_stderr = cat_proc.stderr.read().decode() if cat_proc.stderr else ""
|
||||
@@ -236,6 +268,24 @@ def extract_split_archive(file_paths: list, extract_dir: str) -> bool:
|
||||
if cat_stderr:
|
||||
print(f"cat stderr: {cat_stderr}")
|
||||
|
||||
tar_stderr = result.stderr.decode() if result.stderr else ""
|
||||
if result.returncode != 0:
|
||||
# GNU tar exits non-zero for format issues that BSD tar silently
|
||||
# tolerates (e.g. trailing junk after the last valid entry).
|
||||
# Check whether files were actually extracted before giving up.
|
||||
extracted_items = os.listdir(extract_dir)
|
||||
if extracted_items:
|
||||
print(f"[WARN] tar exited {result.returncode} but extracted "
|
||||
f"{len(extracted_items)} items — treating as success")
|
||||
if tar_stderr:
|
||||
print(f"tar stderr: {tar_stderr}")
|
||||
else:
|
||||
print(f"Failed to extract split archive (tar exit {result.returncode})")
|
||||
if tar_stderr:
|
||||
print(f"tar stderr: {tar_stderr}")
|
||||
shutil.rmtree(extract_dir, ignore_errors=True)
|
||||
return False
|
||||
|
||||
print(f"Successfully extracted archive to {extract_dir}")
|
||||
|
||||
# Delete tar files immediately after extraction
|
||||
@@ -252,11 +302,9 @@ def extract_split_archive(file_paths: list, extract_dir: str) -> bool:
|
||||
print(f"Disk space after tar deletion: {free_gb:.1f}GB free")
|
||||
|
||||
return True
|
||||
except subprocess.CalledProcessError as e:
|
||||
stderr_output = e.stderr.decode() if e.stderr else ""
|
||||
except Exception as e:
|
||||
print(f"Failed to extract split archive: {e}")
|
||||
if stderr_output:
|
||||
print(f"tar stderr: {stderr_output}")
|
||||
shutil.rmtree(extract_dir, ignore_errors=True)
|
||||
return False
|
||||
|
||||
|
||||
|
||||
@@ -77,8 +77,9 @@ def download_and_extract(version_date: str) -> str | None:
|
||||
for asset in use_assets:
|
||||
asset_name = asset["name"]
|
||||
asset_url = asset["browser_download_url"]
|
||||
asset_size = asset.get("size") # Get expected file size
|
||||
file_path = os.path.join(OUTPUT_DIR, asset_name)
|
||||
if download_asset(asset_url, file_path):
|
||||
if download_asset(asset_url, file_path, expected_size=asset_size):
|
||||
downloaded_files.append(file_path)
|
||||
|
||||
if not downloaded_files:
|
||||
|
||||
@@ -123,7 +123,16 @@ def main():
|
||||
print(f"Processing part {args.part_id} for {args.date}")
|
||||
|
||||
# Get specific archive file for this part
|
||||
archive_path = os.path.join(OUTPUT_DIR, "adsb_archives", args.date, f"{args.date}_part_{args.part_id}.tar.gz")
|
||||
archive_dir = os.path.join(OUTPUT_DIR, "adsb_archives", args.date)
|
||||
archive_path = os.path.join(archive_dir, f"{args.date}_part_{args.part_id}.tar.gz")
|
||||
|
||||
if not os.path.isfile(archive_path):
|
||||
print(f"ERROR: Archive not found: {archive_path}")
|
||||
if os.path.isdir(archive_dir):
|
||||
print(f"Files in {archive_dir}: {os.listdir(archive_dir)}")
|
||||
else:
|
||||
print(f"Directory does not exist: {archive_dir}")
|
||||
sys.exit(1)
|
||||
|
||||
# Extract and collect trace files
|
||||
trace_map = build_trace_file_map(archive_path)
|
||||
|
||||
@@ -246,6 +246,20 @@ def process_submission(
|
||||
if schema_updated:
|
||||
schema_note = f"\n**Schema Updated:** Added new tags: `{', '.join(new_tags)}`\n"
|
||||
|
||||
# Truncate JSON preview to stay under GitHub's 65536 char body limit
|
||||
max_json_preview = 50000
|
||||
if len(content_json) > max_json_preview:
|
||||
# Show first few entries as a preview
|
||||
preview_entries = submissions[:10]
|
||||
preview_json = json.dumps(preview_entries, indent=2, sort_keys=True)
|
||||
json_section = (
|
||||
f"### Submissions (showing 10 of {len(submissions)})\n"
|
||||
f"```json\n{preview_json}\n```\n\n"
|
||||
f"*Full submission ({len(submissions)} entries, {len(content_json):,} chars) is in the committed file.*"
|
||||
)
|
||||
else:
|
||||
json_section = f"### Submissions\n```json\n{content_json}\n```"
|
||||
|
||||
pr_body = f"""## Community Submission
|
||||
|
||||
Adds {len(submissions)} submission(s) from @{author_username}.
|
||||
@@ -257,10 +271,7 @@ Closes #{issue_number}
|
||||
|
||||
---
|
||||
|
||||
### Submissions
|
||||
```json
|
||||
{content_json}
|
||||
```"""
|
||||
{json_section}"""
|
||||
|
||||
pr = create_pull_request(
|
||||
title=f"Community submission: {filename}",
|
||||
|
||||
@@ -24,7 +24,7 @@ def read_all_submissions(community_dir: Path) -> list[dict]:
|
||||
"""Read all JSON submissions from the community directory."""
|
||||
all_submissions = []
|
||||
|
||||
for json_file in sorted(community_dir.glob("*.json")):
|
||||
for json_file in sorted(community_dir.glob("**/*.json")):
|
||||
try:
|
||||
with open(json_file) as f:
|
||||
data = json.load(f)
|
||||
|
||||
+61
-22
@@ -27,6 +27,33 @@ def _http_get_json(url: str, headers: dict[str, str]) -> dict:
|
||||
return json.loads(data.decode("utf-8"))
|
||||
|
||||
|
||||
def get_releases(repo: str = REPO, github_token: Optional[str] = None, per_page: int = 30) -> list[dict]:
|
||||
"""Get a list of releases from the repository."""
|
||||
url = f"https://api.github.com/repos/{repo}/releases?per_page={per_page}"
|
||||
headers = {
|
||||
"Accept": "application/vnd.github+json",
|
||||
"User-Agent": "openairframes-downloader/1.0",
|
||||
}
|
||||
if github_token:
|
||||
headers["Authorization"] = f"Bearer {github_token}"
|
||||
|
||||
return _http_get_json(url, headers=headers)
|
||||
|
||||
|
||||
def get_release_assets_from_release_data(release_data: dict) -> list[ReleaseAsset]:
|
||||
"""Extract assets from a release data dictionary."""
|
||||
assets = []
|
||||
for a in release_data.get("assets", []):
|
||||
assets.append(
|
||||
ReleaseAsset(
|
||||
name=a["name"],
|
||||
download_url=a["browser_download_url"],
|
||||
size=int(a.get("size", 0)),
|
||||
)
|
||||
)
|
||||
return assets
|
||||
|
||||
|
||||
def get_latest_release_assets(repo: str = REPO, github_token: Optional[str] = None) -> list[ReleaseAsset]:
|
||||
url = f"https://api.github.com/repos/{repo}/releases/latest"
|
||||
headers = {
|
||||
@@ -37,16 +64,7 @@ def get_latest_release_assets(repo: str = REPO, github_token: Optional[str] = No
|
||||
headers["Authorization"] = f"Bearer {github_token}"
|
||||
|
||||
payload = _http_get_json(url, headers=headers)
|
||||
assets = []
|
||||
for a in payload.get("assets", []):
|
||||
assets.append(
|
||||
ReleaseAsset(
|
||||
name=a["name"],
|
||||
download_url=a["browser_download_url"],
|
||||
size=int(a.get("size", 0)),
|
||||
)
|
||||
)
|
||||
return assets
|
||||
return get_release_assets_from_release_data(payload)
|
||||
|
||||
|
||||
def pick_asset(
|
||||
@@ -155,7 +173,8 @@ def download_latest_aircraft_adsb_csv(
|
||||
repo: str = REPO,
|
||||
) -> Path:
|
||||
"""
|
||||
Download the latest openairframes_adsb_*.csv file from the latest GitHub release.
|
||||
Download the latest openairframes_adsb_*.csv file from GitHub releases.
|
||||
If the latest release doesn't have the file, searches previous releases.
|
||||
|
||||
Args:
|
||||
output_dir: Directory to save the downloaded file (default: "downloads")
|
||||
@@ -166,15 +185,33 @@ def download_latest_aircraft_adsb_csv(
|
||||
Path to the downloaded file
|
||||
"""
|
||||
output_dir = Path(output_dir)
|
||||
assets = get_latest_release_assets(repo, github_token=github_token)
|
||||
asset = pick_asset(assets, name_regex=r"^openairframes_adsb_.*\.csv(\.gz)?$")
|
||||
saved_to = download_asset(asset, output_dir / asset.name, github_token=github_token)
|
||||
print(f"Downloaded: {asset.name} ({asset.size} bytes) -> {saved_to}")
|
||||
return saved_to
|
||||
|
||||
# Get multiple releases
|
||||
releases = get_releases(repo, github_token=github_token, per_page=30)
|
||||
|
||||
# Try each release until we find one with the matching asset
|
||||
for release in releases:
|
||||
assets = get_release_assets_from_release_data(release)
|
||||
try:
|
||||
asset = pick_asset(assets, name_regex=r"^openairframes_adsb_.*\.csv(\.gz)?$")
|
||||
saved_to = download_asset(asset, output_dir / asset.name, github_token=github_token)
|
||||
print(f"Downloaded: {asset.name} ({asset.size} bytes) -> {saved_to}")
|
||||
return saved_to
|
||||
except FileNotFoundError:
|
||||
# This release doesn't have the matching asset, try the next one
|
||||
continue
|
||||
|
||||
raise FileNotFoundError(
|
||||
f"No release in the last 30 releases has an asset matching 'openairframes_adsb_.*\\.csv(\\.gz)?$'"
|
||||
)
|
||||
|
||||
import polars as pl
|
||||
def get_latest_aircraft_adsb_csv_df():
|
||||
"""Download and load the latest ADS-B CSV from GitHub releases."""
|
||||
"""Download and load the latest ADS-B CSV from GitHub releases.
|
||||
|
||||
Returns:
|
||||
tuple: (df, start_date, end_date) where dates are in YYYY-MM-DD format
|
||||
"""
|
||||
import re
|
||||
|
||||
csv_path = download_latest_aircraft_adsb_csv()
|
||||
@@ -198,17 +235,19 @@ def get_latest_aircraft_adsb_csv_df():
|
||||
if df[col].dtype == pl.Utf8:
|
||||
df = df.with_columns(pl.col(col).fill_null(""))
|
||||
|
||||
# Extract start date from filename pattern: openairframes_adsb_{start_date}_{end_date}.csv[.gz]
|
||||
match = re.search(r"openairframes_adsb_(\d{4}-\d{2}-\d{2})_", str(csv_path))
|
||||
# Extract start and end dates from filename pattern: openairframes_adsb_{start_date}_{end_date}.csv[.gz]
|
||||
match = re.search(r"openairframes_adsb_(\d{4}-\d{2}-\d{2})_(\d{4}-\d{2}-\d{2})\.csv", str(csv_path))
|
||||
if not match:
|
||||
raise ValueError(f"Could not extract date from filename: {csv_path.name}")
|
||||
raise ValueError(f"Could not extract dates from filename: {csv_path.name}")
|
||||
|
||||
date_str = match.group(1)
|
||||
start_date = match.group(1)
|
||||
end_date = match.group(2)
|
||||
print(df.columns)
|
||||
print(df.dtypes)
|
||||
return df, date_str
|
||||
return df, start_date, end_date
|
||||
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
download_latest_aircraft_csv()
|
||||
download_latest_aircraft_adsb_csv()
|
||||
Reference in New Issue
Block a user