mirror of
https://github.com/PlaneQuery/OpenAirframes.git
synced 2026-04-23 19:46:09 +02:00
rename from planequery to openairframes
This commit is contained in:
@@ -220,11 +220,11 @@ jobs:
|
||||
END_DATE: ${{ needs.generate-matrix.outputs.global_end }}
|
||||
run: |
|
||||
python -m src.adsb.combine_chunks_to_csv --chunks-dir data/output/adsb_chunks --start-date "$START_DATE" --end-date "$END_DATE" --skip-base --stream
|
||||
ls -lah data/planequery_aircraft/
|
||||
ls -lah data/openairframes/
|
||||
|
||||
- name: Upload final artifact
|
||||
uses: actions/upload-artifact@v4
|
||||
with:
|
||||
name: planequery_aircraft_adsb-${{ needs.generate-matrix.outputs.global_start }}-${{ needs.generate-matrix.outputs.global_end }}
|
||||
path: data/planequery_aircraft/*.csv
|
||||
name: openairframes_adsb-${{ needs.generate-matrix.outputs.global_start }}-${{ needs.generate-matrix.outputs.global_end }}
|
||||
path: data/openairframes/*.csv
|
||||
retention-days: 30
|
||||
|
||||
+15
-15
@@ -1,4 +1,4 @@
|
||||
name: planequery-aircraft Daily Release
|
||||
name: OpenAirframes Daily Release
|
||||
|
||||
on:
|
||||
schedule:
|
||||
@@ -22,7 +22,7 @@ jobs:
|
||||
await github.rest.actions.createWorkflowDispatch({
|
||||
owner: context.repo.owner,
|
||||
repo: context.repo.repo,
|
||||
workflow_id: 'planequery-aircraft-daily-release.yaml',
|
||||
workflow_id: 'openairframes-daily-release.yaml',
|
||||
ref: 'main'
|
||||
});
|
||||
|
||||
@@ -33,7 +33,7 @@ jobs:
|
||||
await github.rest.actions.createWorkflowDispatch({
|
||||
owner: context.repo.owner,
|
||||
repo: context.repo.repo,
|
||||
workflow_id: 'planequery-aircraft-daily-release.yaml',
|
||||
workflow_id: 'openairframes-daily-release.yaml',
|
||||
ref: 'develop'
|
||||
});
|
||||
|
||||
@@ -58,16 +58,16 @@ jobs:
|
||||
|
||||
- name: Run FAA release script
|
||||
run: |
|
||||
python src/create_daily_planequery_aircraft_faa_release.py
|
||||
python src/create_daily_faa_release.py
|
||||
ls -lah data/faa_releasable
|
||||
ls -lah data/planequery_aircraft
|
||||
ls -lah data/openairframes
|
||||
|
||||
- name: Upload FAA artifacts
|
||||
uses: actions/upload-artifact@v4
|
||||
with:
|
||||
name: faa-release
|
||||
path: |
|
||||
data/planequery_aircraft/planequery_aircraft_faa_*.csv
|
||||
data/openairframes/openairframes_faa_*.csv
|
||||
data/faa_releasable/ReleasableAircraft_*.zip
|
||||
retention-days: 1
|
||||
|
||||
@@ -214,13 +214,13 @@ jobs:
|
||||
mkdir -p data/output/adsb_chunks
|
||||
ls -lah data/output/adsb_chunks/ || echo "Directory empty or does not exist"
|
||||
python -m src.adsb.combine_chunks_to_csv --chunks-dir data/output/adsb_chunks
|
||||
ls -lah data/planequery_aircraft/
|
||||
ls -lah data/openairframes/
|
||||
|
||||
- name: Upload ADS-B artifacts
|
||||
uses: actions/upload-artifact@v4
|
||||
with:
|
||||
name: adsb-release
|
||||
path: data/planequery_aircraft/planequery_aircraft_adsb_*.csv
|
||||
path: data/openairframes/openairframes_adsb_*.csv
|
||||
retention-days: 1
|
||||
|
||||
build-community:
|
||||
@@ -245,13 +245,13 @@ jobs:
|
||||
- name: Run Community release script
|
||||
run: |
|
||||
python -m src.contributions.create_daily_community_release
|
||||
ls -lah data/planequery_aircraft
|
||||
ls -lah data/openairframes
|
||||
|
||||
- name: Upload Community artifacts
|
||||
uses: actions/upload-artifact@v4
|
||||
with:
|
||||
name: community-release
|
||||
path: data/planequery_aircraft/planequery_aircraft_community_*.csv
|
||||
path: data/openairframes/openairframes_community_*.csv
|
||||
retention-days: 1
|
||||
|
||||
create-release:
|
||||
@@ -297,14 +297,14 @@ jobs:
|
||||
elif [ "$BRANCH_NAME" = "develop" ]; then
|
||||
BRANCH_SUFFIX="-develop"
|
||||
fi
|
||||
TAG="planequery-aircraft-${DATE}${BRANCH_SUFFIX}"
|
||||
TAG="openairframes-${DATE}${BRANCH_SUFFIX}"
|
||||
|
||||
# Find files from artifacts using find (handles nested structures)
|
||||
CSV_FILE_FAA=$(find artifacts/faa -name "planequery_aircraft_faa_*.csv" | head -1)
|
||||
CSV_FILE_FAA=$(find artifacts/faa -name "openairframes_faa_*.csv" | head -1)
|
||||
CSV_BASENAME_FAA=$(basename "$CSV_FILE_FAA")
|
||||
CSV_FILE_ADSB=$(find artifacts/adsb -name "planequery_aircraft_adsb_*.csv" | head -1)
|
||||
CSV_FILE_ADSB=$(find artifacts/adsb -name "openairframes_adsb_*.csv" | head -1)
|
||||
CSV_BASENAME_ADSB=$(basename "$CSV_FILE_ADSB")
|
||||
CSV_FILE_COMMUNITY=$(find artifacts/community -name "planequery_aircraft_community_*.csv" 2>/dev/null | head -1 || echo "")
|
||||
CSV_FILE_COMMUNITY=$(find artifacts/community -name "openairframes_community_*.csv" 2>/dev/null | head -1 || echo "")
|
||||
CSV_BASENAME_COMMUNITY=$(basename "$CSV_FILE_COMMUNITY" 2>/dev/null || echo "")
|
||||
ZIP_FILE=$(find artifacts/faa -name "ReleasableAircraft_*.zip" | head -1)
|
||||
ZIP_BASENAME=$(basename "$ZIP_FILE")
|
||||
@@ -319,7 +319,7 @@ jobs:
|
||||
echo "csv_basename_community=$CSV_BASENAME_COMMUNITY" >> "$GITHUB_OUTPUT"
|
||||
echo "zip_file=$ZIP_FILE" >> "$GITHUB_OUTPUT"
|
||||
echo "zip_basename=$ZIP_BASENAME" >> "$GITHUB_OUTPUT"
|
||||
echo "name=planequery-aircraft snapshot ($DATE)${BRANCH_SUFFIX}" >> "$GITHUB_OUTPUT"
|
||||
echo "name=OpenAirframes snapshot ($DATE)${BRANCH_SUFFIX}" >> "$GITHUB_OUTPUT"
|
||||
|
||||
- name: Checkout for gh CLI
|
||||
uses: actions/checkout@v4
|
||||
@@ -0,0 +1,171 @@
|
||||
name: Process Historical FAA Data
|
||||
|
||||
on:
|
||||
workflow_dispatch: # Manual trigger
|
||||
|
||||
jobs:
|
||||
generate-matrix:
|
||||
runs-on: ubuntu-latest
|
||||
outputs:
|
||||
matrix: ${{ steps.set-matrix.outputs.matrix }}
|
||||
steps:
|
||||
- name: Generate date ranges
|
||||
id: set-matrix
|
||||
run: |
|
||||
python3 << 'EOF'
|
||||
import json
|
||||
from datetime import datetime, timedelta
|
||||
|
||||
start = datetime(2023, 8, 16)
|
||||
end = datetime(2026, 1, 1)
|
||||
|
||||
ranges = []
|
||||
current = start
|
||||
|
||||
# Process in 4-day chunks
|
||||
while current < end:
|
||||
chunk_end = current + timedelta(days=4)
|
||||
# Don't go past the end date
|
||||
if chunk_end > end:
|
||||
chunk_end = end
|
||||
|
||||
ranges.append({
|
||||
"since": current.strftime("%Y-%m-%d"),
|
||||
"until": chunk_end.strftime("%Y-%m-%d")
|
||||
})
|
||||
|
||||
current = chunk_end
|
||||
|
||||
print(f"::set-output name=matrix::{json.dumps(ranges)}")
|
||||
EOF
|
||||
|
||||
clone-faa-repo:
|
||||
runs-on: ubuntu-latest
|
||||
steps:
|
||||
- name: Cache FAA repository
|
||||
id: cache-faa-repo
|
||||
uses: actions/cache@v4
|
||||
with:
|
||||
path: data/scrape-faa-releasable-aircraft
|
||||
key: faa-repo-v1
|
||||
|
||||
- name: Clone FAA repository
|
||||
if: steps.cache-faa-repo.outputs.cache-hit != 'true'
|
||||
run: |
|
||||
mkdir -p data
|
||||
git clone https://github.com/simonw/scrape-faa-releasable-aircraft data/scrape-faa-releasable-aircraft
|
||||
echo "Repository cloned successfully"
|
||||
|
||||
process-chunk:
|
||||
needs: [generate-matrix, clone-faa-repo]
|
||||
runs-on: ubuntu-latest
|
||||
strategy:
|
||||
max-parallel: 5 # Process 5 chunks at a time
|
||||
matrix:
|
||||
range: ${{ fromJson(needs.generate-matrix.outputs.matrix) }}
|
||||
steps:
|
||||
- name: Checkout repository
|
||||
uses: actions/checkout@v4
|
||||
|
||||
- name: Set up Python
|
||||
uses: actions/setup-python@v5
|
||||
with:
|
||||
python-version: '3.12'
|
||||
|
||||
- name: Restore FAA repository cache
|
||||
uses: actions/cache/restore@v4
|
||||
with:
|
||||
path: data/scrape-faa-releasable-aircraft
|
||||
key: faa-repo-v1
|
||||
fail-on-cache-miss: true
|
||||
|
||||
- name: Install dependencies
|
||||
run: |
|
||||
pip install -r requirements.txt
|
||||
|
||||
- name: Process chunk ${{ matrix.range.since }} to ${{ matrix.range.until }}
|
||||
run: |
|
||||
python src/get_historical_faa.py "${{ matrix.range.since }}" "${{ matrix.range.until }}"
|
||||
|
||||
- name: Upload CSV artifact
|
||||
uses: actions/upload-artifact@v4
|
||||
with:
|
||||
name: csv-${{ matrix.range.since }}-to-${{ matrix.range.until }}
|
||||
path: data/faa_releasable_historical/*.csv
|
||||
retention-days: 1
|
||||
|
||||
create-release:
|
||||
needs: process-chunk
|
||||
runs-on: ubuntu-latest
|
||||
permissions:
|
||||
contents: write
|
||||
steps:
|
||||
- name: Download all artifacts
|
||||
uses: actions/download-artifact@v4
|
||||
with:
|
||||
path: artifacts
|
||||
|
||||
- name: Prepare release files
|
||||
run: |
|
||||
mkdir -p release-files
|
||||
find artifacts -name "*.csv" -exec cp {} release-files/ \;
|
||||
ls -lh release-files/
|
||||
|
||||
- name: Create Release
|
||||
uses: softprops/action-gh-release@v1
|
||||
with:
|
||||
tag_name: historical-faa-${{ github.run_number }}
|
||||
name: Historical FAA Data Release ${{ github.run_number }}
|
||||
body: |
|
||||
Automated release of historical FAA aircraft data
|
||||
Processing period: 2023-08-16 to 2026-01-01
|
||||
Generated: ${{ github.event.repository.updated_at }}
|
||||
files: release-files/*.csv
|
||||
draft: false
|
||||
prerelease: false
|
||||
|
||||
concatenate-and-release:
|
||||
needs: process-chunk
|
||||
runs-on: ubuntu-latest
|
||||
permissions:
|
||||
contents: write
|
||||
steps:
|
||||
- name: Checkout repository
|
||||
uses: actions/checkout@v4
|
||||
|
||||
- name: Set up Python
|
||||
uses: actions/setup-python@v5
|
||||
with:
|
||||
python-version: '3.12'
|
||||
|
||||
- name: Install dependencies
|
||||
run: |
|
||||
pip install -r requirements.txt
|
||||
|
||||
- name: Download all artifacts
|
||||
uses: actions/download-artifact@v4
|
||||
with:
|
||||
path: artifacts
|
||||
|
||||
- name: Prepare CSVs for concatenation
|
||||
run: |
|
||||
mkdir -p data/faa_releasable_historical
|
||||
find artifacts -name "*.csv" -exec cp {} data/faa_releasable_historical/ \;
|
||||
ls -lh data/faa_releasable_historical/
|
||||
|
||||
- name: Concatenate all CSVs
|
||||
run: |
|
||||
python scripts/concat_csvs.py
|
||||
|
||||
- name: Create Combined Release
|
||||
uses: softprops/action-gh-release@v1
|
||||
with:
|
||||
tag_name: historical-faa-combined-${{ github.run_number }}
|
||||
name: Historical FAA Data Combined Release ${{ github.run_number }}
|
||||
body: |
|
||||
Combined historical FAA aircraft data (all chunks concatenated)
|
||||
Processing period: 2023-08-16 to 2026-01-01
|
||||
Generated: ${{ github.event.repository.updated_at }}
|
||||
files: data/openairframes/*.csv
|
||||
draft: false
|
||||
prerelease: false
|
||||
@@ -1,6 +1,6 @@
|
||||
MIT License
|
||||
|
||||
Copyright (c) 2026 PlaneQuery
|
||||
Copyright (c) 2026 OpenAirframes
|
||||
|
||||
Permission is hereby granted, free of charge, to any person obtaining a copy
|
||||
of this software and associated documentation files (the "Software"), to deal
|
||||
|
||||
+1
-1
@@ -23,7 +23,7 @@ class AdsbProcessingStack(Stack):
|
||||
# --- S3 bucket for intermediate and final results ---
|
||||
bucket = s3.Bucket(
|
||||
self, "ResultsBucket",
|
||||
bucket_name="planequery-aircraft-dev",
|
||||
bucket_name="openairframes-dev",
|
||||
removal_policy=RemovalPolicy.DESTROY,
|
||||
auto_delete_objects=True,
|
||||
lifecycle_rules=[
|
||||
|
||||
@@ -1,6 +1,6 @@
|
||||
{
|
||||
"$schema": "https://json-schema.org/draft/2020-12/schema",
|
||||
"title": "PlaneQuery Aircraft Community Submission (v1)",
|
||||
"title": "OpenAirframes Community Submission (v1)",
|
||||
"type": "object",
|
||||
"additionalProperties": false,
|
||||
"properties": {
|
||||
|
||||
@@ -27,7 +27,7 @@ from src.adsb.compress_adsb_to_aircraft_data import compress_multi_icao_df, COLU
|
||||
|
||||
|
||||
DEFAULT_CHUNK_DIR = os.path.join(OUTPUT_DIR, "adsb_chunks")
|
||||
FINAL_OUTPUT_DIR = "./data/planequery_aircraft"
|
||||
FINAL_OUTPUT_DIR = "./data/openairframes"
|
||||
os.makedirs(FINAL_OUTPUT_DIR, exist_ok=True)
|
||||
|
||||
|
||||
@@ -85,12 +85,12 @@ def combine_compressed_chunks(compressed_dfs: list[pl.DataFrame]) -> pl.DataFram
|
||||
|
||||
def download_and_merge_base_release(compressed_df: pl.DataFrame) -> pl.DataFrame:
|
||||
"""Download base release and merge with new data."""
|
||||
from src.get_latest_planequery_aircraft_release import download_latest_aircraft_adsb_csv
|
||||
from src.get_latest_release import download_latest_aircraft_adsb_csv
|
||||
|
||||
print("Downloading base ADS-B release...")
|
||||
try:
|
||||
base_path = download_latest_aircraft_adsb_csv(
|
||||
output_dir="./data/planequery_aircraft_base"
|
||||
output_dir="./data/openairframes_base"
|
||||
)
|
||||
print(f"Download returned: {base_path}")
|
||||
|
||||
@@ -176,7 +176,7 @@ def main():
|
||||
if args.start_date and args.end_date:
|
||||
# Historical mode
|
||||
output_id = f"{args.start_date}_{args.end_date}"
|
||||
output_filename = f"planequery_aircraft_adsb_{args.start_date}_{args.end_date}.csv"
|
||||
output_filename = f"openairframes_adsb_{args.start_date}_{args.end_date}.csv"
|
||||
print(f"Combining chunks for date range: {args.start_date} to {args.end_date}")
|
||||
else:
|
||||
# Daily mode - use same date for start and end
|
||||
@@ -187,7 +187,7 @@ def main():
|
||||
|
||||
date_str = target_day.strftime("%Y-%m-%d")
|
||||
output_id = date_str
|
||||
output_filename = f"planequery_aircraft_adsb_{date_str}_{date_str}.csv"
|
||||
output_filename = f"openairframes_adsb_{date_str}_{date_str}.csv"
|
||||
print(f"Combining chunks for {date_str}")
|
||||
|
||||
chunks_dir = args.chunks_dir
|
||||
|
||||
@@ -253,7 +253,7 @@ def concat_compressed_dfs(df_base, df_new):
|
||||
|
||||
def get_latest_aircraft_adsb_csv_df():
|
||||
"""Download and load the latest ADS-B CSV from GitHub releases."""
|
||||
from get_latest_planequery_aircraft_release import download_latest_aircraft_adsb_csv
|
||||
from get_latest_release import download_latest_aircraft_adsb_csv
|
||||
import re
|
||||
|
||||
csv_path = download_latest_aircraft_adsb_csv()
|
||||
@@ -264,8 +264,8 @@ def get_latest_aircraft_adsb_csv_df():
|
||||
if df[col].dtype == pl.Utf8:
|
||||
df = df.with_columns(pl.col(col).fill_null(""))
|
||||
|
||||
# Extract start date from filename pattern: planequery_aircraft_adsb_{start_date}_{end_date}.csv
|
||||
match = re.search(r"planequery_aircraft_adsb_(\d{4}-\d{2}-\d{2})_", str(csv_path))
|
||||
# Extract start date from filename pattern: openairframes_adsb_{start_date}_{end_date}.csv
|
||||
match = re.search(r"openairframes_adsb_(\d{4}-\d{2}-\d{2})_", str(csv_path))
|
||||
if not match:
|
||||
raise ValueError(f"Could not extract date from filename: {csv_path.name}")
|
||||
|
||||
|
||||
+2
-2
@@ -76,8 +76,8 @@ def main():
|
||||
print(f"After dedup: {df_accumulated.height} rows")
|
||||
|
||||
# Write and upload final result
|
||||
output_name = f"planequery_aircraft_adsb_{global_start}_{global_end}.csv.gz"
|
||||
csv_output = Path(f"/tmp/planequery_aircraft_adsb_{global_start}_{global_end}.csv")
|
||||
output_name = f"openairframes_adsb_{global_start}_{global_end}.csv.gz"
|
||||
csv_output = Path(f"/tmp/openairframes_adsb_{global_start}_{global_end}.csv")
|
||||
gz_output = Path(f"/tmp/{output_name}")
|
||||
|
||||
df_accumulated.write_csv(csv_output)
|
||||
|
||||
@@ -17,7 +17,7 @@ import pandas as pd
|
||||
|
||||
|
||||
COMMUNITY_DIR = Path(__file__).parent.parent.parent / "community"
|
||||
OUT_ROOT = Path("data/planequery_aircraft")
|
||||
OUT_ROOT = Path("data/openairframes")
|
||||
|
||||
|
||||
def read_all_submissions(community_dir: Path) -> list[dict]:
|
||||
@@ -127,7 +127,7 @@ def main():
|
||||
|
||||
# Output
|
||||
OUT_ROOT.mkdir(parents=True, exist_ok=True)
|
||||
output_file = OUT_ROOT / f"planequery_aircraft_community_{start_date_str}_{date_str}.csv"
|
||||
output_file = OUT_ROOT / f"openairframes_community_{start_date_str}_{date_str}.csv"
|
||||
|
||||
df.to_csv(output_file, index=False)
|
||||
|
||||
|
||||
@@ -111,7 +111,7 @@ def download_github_attachment(url: str) -> str | None:
|
||||
import urllib.error
|
||||
|
||||
try:
|
||||
req = urllib.request.Request(url, headers={"User-Agent": "PlaneQuery-Bot"})
|
||||
req = urllib.request.Request(url, headers={"User-Agent": "OpenAirframes-Bot"})
|
||||
with urllib.request.urlopen(req, timeout=30) as response:
|
||||
return response.read().decode("utf-8")
|
||||
except (urllib.error.URLError, urllib.error.HTTPError, UnicodeDecodeError) as e:
|
||||
|
||||
+2
-2
@@ -74,10 +74,10 @@ if __name__ == '__main__':
|
||||
)
|
||||
|
||||
# Save the result
|
||||
OUT_ROOT = Path("data/planequery_aircraft")
|
||||
OUT_ROOT = Path("data/openairframes")
|
||||
OUT_ROOT.mkdir(parents=True, exist_ok=True)
|
||||
|
||||
output_file = OUT_ROOT / f"planequery_aircraft_adsb_{start_date_str}_{date_str}.csv"
|
||||
output_file = OUT_ROOT / f"openairframes_adsb_{start_date_str}_{date_str}.csv"
|
||||
df_combined.write_csv(output_file)
|
||||
|
||||
print(f"Saved: {output_file}")
|
||||
+3
-3
@@ -22,12 +22,12 @@ if not zip_path.exists():
|
||||
body = r.read()
|
||||
zip_path.write_bytes(body)
|
||||
|
||||
OUT_ROOT = Path("data/planequery_aircraft")
|
||||
OUT_ROOT = Path("data/openairframes")
|
||||
OUT_ROOT.mkdir(parents=True, exist_ok=True)
|
||||
from derive_from_faa_master_txt import convert_faa_master_txt_to_df, concat_faa_historical_df
|
||||
from get_latest_planequery_aircraft_release import get_latest_aircraft_faa_csv_df
|
||||
from get_latest_release import get_latest_aircraft_faa_csv_df
|
||||
df_new = convert_faa_master_txt_to_df(zip_path, date_str)
|
||||
df_base, start_date_str = get_latest_aircraft_faa_csv_df()
|
||||
df_base = concat_faa_historical_df(df_base, df_new)
|
||||
assert df_base['download_date'].is_monotonic_increasing, "download_date is not monotonic increasing"
|
||||
df_base.to_csv(OUT_ROOT / f"planequery_aircraft_faa_{start_date_str}_{date_str}.csv", index=False)
|
||||
df_base.to_csv(OUT_ROOT / f"openairframes_faa_{start_date_str}_{date_str}.csv", index=False)
|
||||
@@ -0,0 +1,84 @@
|
||||
from pathlib import Path
|
||||
from datetime import datetime, timezone, timedelta
|
||||
import sys
|
||||
|
||||
import polars as pl
|
||||
|
||||
# Add adsb directory to path
|
||||
sys.path.insert(0, str(Path(__file__).parent / "adsb")) # TODO: Fix this hacky path manipulation
|
||||
|
||||
from adsb.compress_adsb_to_aircraft_data import (
|
||||
load_historical_for_day,
|
||||
concat_compressed_dfs,
|
||||
get_latest_aircraft_adsb_csv_df,
|
||||
)
|
||||
|
||||
if __name__ == '__main__':
|
||||
# Get yesterday's date (data for the previous day)
|
||||
day = datetime.now(timezone.utc) - timedelta(days=1)
|
||||
|
||||
# Find a day with complete data
|
||||
max_attempts = 2 # Don't look back more than a week
|
||||
for attempt in range(max_attempts):
|
||||
date_str = day.strftime("%Y-%m-%d")
|
||||
print(f"Processing ADS-B data for {date_str}")
|
||||
|
||||
print("Loading new ADS-B data...")
|
||||
df_new = load_historical_for_day(day)
|
||||
if df_new.height == 0:
|
||||
day = day - timedelta(days=1)
|
||||
continue
|
||||
max_time = df_new['time'].max()
|
||||
if max_time is not None:
|
||||
# Handle timezone
|
||||
max_time_dt = max_time
|
||||
if hasattr(max_time_dt, 'replace'):
|
||||
max_time_dt = max_time_dt.replace(tzinfo=timezone.utc)
|
||||
|
||||
end_of_day = day.replace(hour=23, minute=59, second=59, tzinfo=timezone.utc) - timedelta(minutes=5)
|
||||
|
||||
# Convert polars datetime to python datetime if needed
|
||||
if isinstance(max_time_dt, datetime):
|
||||
if max_time_dt.replace(tzinfo=timezone.utc) >= end_of_day:
|
||||
break
|
||||
else:
|
||||
# Polars returns python datetime already
|
||||
if max_time >= day.replace(hour=23, minute=54, second=59):
|
||||
break
|
||||
|
||||
print(f"WARNING: Latest data time is {max_time}, which is more than 5 minutes before end of day.")
|
||||
day = day - timedelta(days=1)
|
||||
else:
|
||||
raise RuntimeError(f"Could not find complete data in the last {max_attempts} days")
|
||||
|
||||
try:
|
||||
# Get the latest release data
|
||||
print("Downloading latest ADS-B release...")
|
||||
df_base, start_date_str = get_latest_aircraft_adsb_csv_df()
|
||||
# Combine with historical data
|
||||
print("Combining with historical data...")
|
||||
df_combined = concat_compressed_dfs(df_base, df_new)
|
||||
except Exception as e:
|
||||
print(f"Error downloading latest ADS-B release: {e}")
|
||||
df_combined = df_new
|
||||
start_date_str = date_str
|
||||
|
||||
# Sort by time for consistent ordering
|
||||
df_combined = df_combined.sort('time')
|
||||
|
||||
# Convert any list columns to strings for CSV compatibility
|
||||
for col in df_combined.columns:
|
||||
if df_combined[col].dtype == pl.List:
|
||||
df_combined = df_combined.with_columns(
|
||||
pl.col(col).list.join(",").alias(col)
|
||||
)
|
||||
|
||||
# Save the result
|
||||
OUT_ROOT = Path("data/openairframes")
|
||||
OUT_ROOT.mkdir(parents=True, exist_ok=True)
|
||||
|
||||
output_file = OUT_ROOT / f"openairframes_adsb_{start_date_str}_{date_str}.csv"
|
||||
df_combined.write_csv(output_file)
|
||||
|
||||
print(f"Saved: {output_file}")
|
||||
print(f"Total aircraft: {df_combined.height}")
|
||||
@@ -0,0 +1,33 @@
|
||||
from pathlib import Path
|
||||
from datetime import datetime, timezone
|
||||
date_str = datetime.now(timezone.utc).strftime("%Y-%m-%d")
|
||||
|
||||
out_dir = Path("data/faa_releasable")
|
||||
out_dir.mkdir(parents=True, exist_ok=True)
|
||||
zip_name = f"ReleasableAircraft_{date_str}.zip"
|
||||
|
||||
zip_path = out_dir / zip_name
|
||||
if not zip_path.exists():
|
||||
# URL and paths
|
||||
url = "https://registry.faa.gov/database/ReleasableAircraft.zip"
|
||||
from urllib.request import Request, urlopen
|
||||
|
||||
req = Request(
|
||||
url,
|
||||
headers={"User-Agent": "Mozilla/5.0"},
|
||||
method="GET",
|
||||
)
|
||||
|
||||
with urlopen(req, timeout=120) as r:
|
||||
body = r.read()
|
||||
zip_path.write_bytes(body)
|
||||
|
||||
OUT_ROOT = Path("data/openairframes")
|
||||
OUT_ROOT.mkdir(parents=True, exist_ok=True)
|
||||
from derive_from_faa_master_txt import convert_faa_master_txt_to_df, concat_faa_historical_df
|
||||
from get_latest_openairframes_release import get_latest_aircraft_faa_csv_df
|
||||
df_new = convert_faa_master_txt_to_df(zip_path, date_str)
|
||||
df_base, start_date_str = get_latest_aircraft_faa_csv_df()
|
||||
df_base = concat_faa_historical_df(df_base, df_new)
|
||||
assert df_base['download_date'].is_monotonic_increasing, "download_date is not monotonic increasing"
|
||||
df_base.to_csv(OUT_ROOT / f"openairframes_faa_{start_date_str}_{date_str}.csv", index=False)
|
||||
+14
-14
@@ -9,7 +9,7 @@ import urllib.error
|
||||
import json
|
||||
|
||||
|
||||
REPO = "PlaneQuery/planequery-aircraft"
|
||||
REPO = "PlaneQuery/openairframes"
|
||||
LATEST_RELEASE_URL = f"https://api.github.com/repos/{REPO}/releases/latest"
|
||||
|
||||
|
||||
@@ -31,7 +31,7 @@ def get_latest_release_assets(repo: str = REPO, github_token: Optional[str] = No
|
||||
url = f"https://api.github.com/repos/{repo}/releases/latest"
|
||||
headers = {
|
||||
"Accept": "application/vnd.github+json",
|
||||
"User-Agent": "planequery-aircraft-downloader/1.0",
|
||||
"User-Agent": "openairframes-downloader/1.0",
|
||||
}
|
||||
if github_token:
|
||||
headers["Authorization"] = f"Bearer {github_token}"
|
||||
@@ -80,7 +80,7 @@ def download_asset(asset: ReleaseAsset, out_path: Path, github_token: Optional[s
|
||||
out_path.parent.mkdir(parents=True, exist_ok=True)
|
||||
|
||||
headers = {
|
||||
"User-Agent": "planequery-aircraft-downloader/1.0",
|
||||
"User-Agent": "openairframes-downloader/1.0",
|
||||
"Accept": "application/octet-stream",
|
||||
}
|
||||
if github_token:
|
||||
@@ -109,7 +109,7 @@ def download_latest_aircraft_csv(
|
||||
repo: str = REPO,
|
||||
) -> Path:
|
||||
"""
|
||||
Download the latest planequery_aircraft_faa_*.csv file from the latest GitHub release.
|
||||
Download the latest openairframes_faa_*.csv file from the latest GitHub release.
|
||||
|
||||
Args:
|
||||
output_dir: Directory to save the downloaded file (default: "downloads")
|
||||
@@ -121,10 +121,10 @@ def download_latest_aircraft_csv(
|
||||
"""
|
||||
assets = get_latest_release_assets(repo, github_token=github_token)
|
||||
try:
|
||||
asset = pick_asset(assets, name_regex=r"^planequery_aircraft_faa_.*\.csv$")
|
||||
asset = pick_asset(assets, name_regex=r"^openairframes_faa_.*\.csv$")
|
||||
except FileNotFoundError:
|
||||
# Fallback to old naming pattern
|
||||
asset = pick_asset(assets, name_regex=r"^planequery_aircraft_\d{4}-\d{2}-\d{2}_.*\.csv$")
|
||||
asset = pick_asset(assets, name_regex=r"^openairframes_\d{4}-\d{2}-\d{2}_.*\.csv$")
|
||||
saved_to = download_asset(asset, output_dir / asset.name, github_token=github_token)
|
||||
print(f"Downloaded: {asset.name} ({asset.size} bytes) -> {saved_to}")
|
||||
return saved_to
|
||||
@@ -136,11 +136,11 @@ def get_latest_aircraft_faa_csv_df():
|
||||
'unique_regulatory_id': str,
|
||||
'registrant_county': str})
|
||||
df = df.fillna("")
|
||||
# Extract start date from filename pattern: planequery_aircraft_faa_{start_date}_{end_date}.csv
|
||||
match = re.search(r"planequery_aircraft_faa_(\d{4}-\d{2}-\d{2})_", str(csv_path))
|
||||
# Extract start date from filename pattern: openairframes_faa_{start_date}_{end_date}.csv
|
||||
match = re.search(r"openairframes_faa_(\d{4}-\d{2}-\d{2})_", str(csv_path))
|
||||
if not match:
|
||||
# Fallback to old naming pattern: planequery_aircraft_{start_date}_{end_date}.csv
|
||||
match = re.search(r"planequery_aircraft_(\d{4}-\d{2}-\d{2})_", str(csv_path))
|
||||
# Fallback to old naming pattern: openairframes_{start_date}_{end_date}.csv
|
||||
match = re.search(r"openairframes_(\d{4}-\d{2}-\d{2})_", str(csv_path))
|
||||
if not match:
|
||||
raise ValueError(f"Could not extract date from filename: {csv_path.name}")
|
||||
|
||||
@@ -154,7 +154,7 @@ def download_latest_aircraft_adsb_csv(
|
||||
repo: str = REPO,
|
||||
) -> Path:
|
||||
"""
|
||||
Download the latest planequery_aircraft_adsb_*.csv file from the latest GitHub release.
|
||||
Download the latest openairframes_adsb_*.csv file from the latest GitHub release.
|
||||
|
||||
Args:
|
||||
output_dir: Directory to save the downloaded file (default: "downloads")
|
||||
@@ -165,7 +165,7 @@ def download_latest_aircraft_adsb_csv(
|
||||
Path to the downloaded file
|
||||
"""
|
||||
assets = get_latest_release_assets(repo, github_token=github_token)
|
||||
asset = pick_asset(assets, name_regex=r"^planequery_aircraft_adsb_.*\.csv$")
|
||||
asset = pick_asset(assets, name_regex=r"^openairframes_adsb_.*\.csv$")
|
||||
saved_to = download_asset(asset, output_dir / asset.name, github_token=github_token)
|
||||
print(f"Downloaded: {asset.name} ({asset.size} bytes) -> {saved_to}")
|
||||
return saved_to
|
||||
@@ -176,8 +176,8 @@ def get_latest_aircraft_adsb_csv_df():
|
||||
import pandas as pd
|
||||
df = pd.read_csv(csv_path)
|
||||
df = df.fillna("")
|
||||
# Extract start date from filename pattern: planequery_aircraft_adsb_{start_date}_{end_date}.csv
|
||||
match = re.search(r"planequery_aircraft_adsb_(\d{4}-\d{2}-\d{2})_", str(csv_path))
|
||||
# Extract start date from filename pattern: openairframes_adsb_{start_date}_{end_date}.csv
|
||||
match = re.search(r"openairframes_adsb_(\d{4}-\d{2}-\d{2})_", str(csv_path))
|
||||
if not match:
|
||||
raise ValueError(f"Could not extract date from filename: {csv_path.name}")
|
||||
|
||||
@@ -0,0 +1,189 @@
|
||||
from __future__ import annotations
|
||||
|
||||
from dataclasses import dataclass
|
||||
from pathlib import Path
|
||||
from typing import Iterable, Optional
|
||||
import re
|
||||
import urllib.request
|
||||
import urllib.error
|
||||
import json
|
||||
|
||||
|
||||
REPO = "PlaneQuery/openairframes"
|
||||
LATEST_RELEASE_URL = f"https://api.github.com/repos/{REPO}/releases/latest"
|
||||
|
||||
|
||||
@dataclass(frozen=True)
|
||||
class ReleaseAsset:
|
||||
name: str
|
||||
download_url: str
|
||||
size: int # bytes
|
||||
|
||||
|
||||
def _http_get_json(url: str, headers: dict[str, str]) -> dict:
|
||||
req = urllib.request.Request(url, headers=headers, method="GET")
|
||||
with urllib.request.urlopen(req, timeout=120) as resp:
|
||||
data = resp.read()
|
||||
return json.loads(data.decode("utf-8"))
|
||||
|
||||
|
||||
def get_latest_release_assets(repo: str = REPO, github_token: Optional[str] = None) -> list[ReleaseAsset]:
|
||||
url = f"https://api.github.com/repos/{repo}/releases/latest"
|
||||
headers = {
|
||||
"Accept": "application/vnd.github+json",
|
||||
"User-Agent": "openairframes-downloader/1.0",
|
||||
}
|
||||
if github_token:
|
||||
headers["Authorization"] = f"Bearer {github_token}"
|
||||
|
||||
payload = _http_get_json(url, headers=headers)
|
||||
assets = []
|
||||
for a in payload.get("assets", []):
|
||||
assets.append(
|
||||
ReleaseAsset(
|
||||
name=a["name"],
|
||||
download_url=a["browser_download_url"],
|
||||
size=int(a.get("size", 0)),
|
||||
)
|
||||
)
|
||||
return assets
|
||||
|
||||
|
||||
def pick_asset(
|
||||
assets: Iterable[ReleaseAsset],
|
||||
*,
|
||||
exact_name: Optional[str] = None,
|
||||
name_regex: Optional[str] = None,
|
||||
) -> ReleaseAsset:
|
||||
assets = list(assets)
|
||||
|
||||
if exact_name:
|
||||
for a in assets:
|
||||
if a.name == exact_name:
|
||||
return a
|
||||
raise FileNotFoundError(f"No asset exactly named {exact_name!r}. Available: {[a.name for a in assets]}")
|
||||
|
||||
if name_regex:
|
||||
rx = re.compile(name_regex)
|
||||
matches = [a for a in assets if rx.search(a.name)]
|
||||
if not matches:
|
||||
raise FileNotFoundError(f"No asset matched regex {name_regex!r}. Available: {[a.name for a in assets]}")
|
||||
if len(matches) > 1:
|
||||
raise FileExistsError(f"Regex {name_regex!r} matched multiple assets: {[m.name for m in matches]}")
|
||||
return matches[0]
|
||||
|
||||
raise ValueError("Provide either exact_name=... or name_regex=...")
|
||||
|
||||
|
||||
def download_asset(asset: ReleaseAsset, out_path: Path, github_token: Optional[str] = None) -> Path:
|
||||
out_path = Path(out_path)
|
||||
out_path.parent.mkdir(parents=True, exist_ok=True)
|
||||
|
||||
headers = {
|
||||
"User-Agent": "openairframes-downloader/1.0",
|
||||
"Accept": "application/octet-stream",
|
||||
}
|
||||
if github_token:
|
||||
headers["Authorization"] = f"Bearer {github_token}"
|
||||
|
||||
req = urllib.request.Request(asset.download_url, headers=headers, method="GET")
|
||||
|
||||
try:
|
||||
with urllib.request.urlopen(req, timeout=300) as resp, out_path.open("wb") as f:
|
||||
# Stream download
|
||||
while True:
|
||||
chunk = resp.read(1024 * 1024) # 1 MiB
|
||||
if not chunk:
|
||||
break
|
||||
f.write(chunk)
|
||||
except urllib.error.HTTPError as e:
|
||||
body = e.read().decode("utf-8", errors="replace") if hasattr(e, "read") else ""
|
||||
raise RuntimeError(f"HTTPError {e.code} downloading {asset.name}: {body[:500]}") from e
|
||||
|
||||
return out_path
|
||||
|
||||
|
||||
def download_latest_aircraft_csv(
|
||||
output_dir: Path = Path("downloads"),
|
||||
github_token: Optional[str] = None,
|
||||
repo: str = REPO,
|
||||
) -> Path:
|
||||
"""
|
||||
Download the latest openairframes_faa_*.csv file from the latest GitHub release.
|
||||
|
||||
Args:
|
||||
output_dir: Directory to save the downloaded file (default: "downloads")
|
||||
github_token: Optional GitHub token for authentication
|
||||
repo: GitHub repository in format "owner/repo" (default: REPO)
|
||||
|
||||
Returns:
|
||||
Path to the downloaded file
|
||||
"""
|
||||
assets = get_latest_release_assets(repo, github_token=github_token)
|
||||
try:
|
||||
asset = pick_asset(assets, name_regex=r"^openairframes_faa_.*\.csv$")
|
||||
except FileNotFoundError:
|
||||
# Fallback to old naming pattern
|
||||
asset = pick_asset(assets, name_regex=r"^openairframes_\d{4}-\d{2}-\d{2}_.*\.csv$")
|
||||
saved_to = download_asset(asset, output_dir / asset.name, github_token=github_token)
|
||||
print(f"Downloaded: {asset.name} ({asset.size} bytes) -> {saved_to}")
|
||||
return saved_to
|
||||
|
||||
def get_latest_aircraft_faa_csv_df():
|
||||
csv_path = download_latest_aircraft_csv()
|
||||
import pandas as pd
|
||||
df = pd.read_csv(csv_path, dtype={'transponder_code': str,
|
||||
'unique_regulatory_id': str,
|
||||
'registrant_county': str})
|
||||
df = df.fillna("")
|
||||
# Extract start date from filename pattern: openairframes_faa_{start_date}_{end_date}.csv
|
||||
match = re.search(r"openairframes_faa_(\d{4}-\d{2}-\d{2})_", str(csv_path))
|
||||
if not match:
|
||||
# Fallback to old naming pattern: openairframes_{start_date}_{end_date}.csv
|
||||
match = re.search(r"openairframes_(\d{4}-\d{2}-\d{2})_", str(csv_path))
|
||||
if not match:
|
||||
raise ValueError(f"Could not extract date from filename: {csv_path.name}")
|
||||
|
||||
date_str = match.group(1)
|
||||
return df, date_str
|
||||
|
||||
|
||||
def download_latest_aircraft_adsb_csv(
|
||||
output_dir: Path = Path("downloads"),
|
||||
github_token: Optional[str] = None,
|
||||
repo: str = REPO,
|
||||
) -> Path:
|
||||
"""
|
||||
Download the latest openairframes_adsb_*.csv file from the latest GitHub release.
|
||||
|
||||
Args:
|
||||
output_dir: Directory to save the downloaded file (default: "downloads")
|
||||
github_token: Optional GitHub token for authentication
|
||||
repo: GitHub repository in format "owner/repo" (default: REPO)
|
||||
|
||||
Returns:
|
||||
Path to the downloaded file
|
||||
"""
|
||||
assets = get_latest_release_assets(repo, github_token=github_token)
|
||||
asset = pick_asset(assets, name_regex=r"^openairframes_adsb_.*\.csv$")
|
||||
saved_to = download_asset(asset, output_dir / asset.name, github_token=github_token)
|
||||
print(f"Downloaded: {asset.name} ({asset.size} bytes) -> {saved_to}")
|
||||
return saved_to
|
||||
|
||||
|
||||
def get_latest_aircraft_adsb_csv_df():
|
||||
csv_path = download_latest_aircraft_adsb_csv()
|
||||
import pandas as pd
|
||||
df = pd.read_csv(csv_path)
|
||||
df = df.fillna("")
|
||||
# Extract start date from filename pattern: openairframes_adsb_{start_date}_{end_date}.csv
|
||||
match = re.search(r"openairframes_adsb_(\d{4}-\d{2}-\d{2})_", str(csv_path))
|
||||
if not match:
|
||||
raise ValueError(f"Could not extract date from filename: {csv_path.name}")
|
||||
|
||||
date_str = match.group(1)
|
||||
return df, date_str
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
download_latest_aircraft_csv()
|
||||
Reference in New Issue
Block a user