Daily ADSB and Histoircal updates. Update readme.md

This commit is contained in:
ggman12
2026-02-13 11:49:18 -05:00
parent 4015a5fcf1
commit d216ea9329
32 changed files with 1489 additions and 1744 deletions
+49
View File
@@ -0,0 +1,49 @@
#!/usr/bin/env python3
import re
from pathlib import Path
import polars as pl
# Find all CSV.gz files in the downloaded artifacts
artifacts_dir = Path("downloads/adsb_artifacts")
files = sorted(artifacts_dir.glob("*/openairframes_adsb_*.csv.gz"))
if not files:
raise SystemExit("No CSV.gz files found in downloads/adsb_artifacts/")
print(f"Found {len(files)} files to concatenate")
# Extract dates from filenames to determine range
def extract_dates(path: Path) -> tuple[str, str]:
"""Extract start and end dates from filename"""
m = re.search(r"openairframes_adsb_(\d{4}-\d{2}-\d{2})_(\d{4}-\d{2}-\d{2})\.csv\.gz", path.name)
if m:
return m.group(1), m.group(2)
return None, None
# Collect all dates
all_dates = []
for f in files:
start, end = extract_dates(f)
if start and end:
all_dates.extend([start, end])
print(f" {f.name}: {start} to {end}")
if not all_dates:
raise SystemExit("Could not extract dates from filenames")
# Find earliest and latest dates
earliest = min(all_dates)
latest = max(all_dates)
print(f"\nDate range: {earliest} to {latest}")
# Read and concatenate all files
print("\nReading and concatenating files...")
frames = [pl.read_csv(f) for f in files]
df = pl.concat(frames, how="vertical", rechunk=True)
# Write output
output_path = Path("downloads") / f"openairframes_adsb_{earliest}_{latest}.csv.gz"
output_path.parent.mkdir(parents=True, exist_ok=True)
df.write_csv(output_path, compression="gzip")
print(f"\nWrote {output_path} with {df.height:,} rows")
+34
View File
@@ -0,0 +1,34 @@
#!/bin/bash
# Create download directory
mkdir -p downloads/adsb_artifacts
# Repository from the workflow comment
REPO="ggman12/OpenAirframes"
# Get last 15 runs of the workflow and download matching artifacts
gh run list \
--repo "$REPO" \
--workflow adsb-to-aircraft-multiple-day-run.yaml \
--limit 15 \
--json databaseId \
--jq '.[].databaseId' | while read -r run_id; do
echo "Checking run ID: $run_id"
# List artifacts for this run using the API
# Match pattern: openairframes_adsb-YYYY-MM-DD-YYYY-MM-DD (with second date)
gh api \
--paginate \
"repos/$REPO/actions/runs/$run_id/artifacts" \
--jq '.artifacts[] | select(.name | test("^openairframes_adsb-[0-9]{4}-[0-9]{2}-[0-9]{2}-[0-9]{4}-[0-9]{2}-[0-9]{2}$")) | .name' | while read -r artifact_name; do
echo " Downloading: $artifact_name"
gh run download "$run_id" \
--repo "$REPO" \
--name "$artifact_name" \
--dir "downloads/adsb_artifacts/$artifact_name"
done
done
echo "Download complete! Files saved to downloads/adsb_artifacts/"
+182
View File
@@ -0,0 +1,182 @@
#!/usr/bin/env python3
"""
Download and concatenate artifacts from a specific set of workflow runs.
Usage:
python scripts/download_and_concat_runs.py triggered_runs_20260216_123456.json
"""
import argparse
import json
import os
import subprocess
import sys
from pathlib import Path
def download_run_artifact(run_id, output_dir):
"""Download artifact from a specific workflow run."""
print(f" Downloading artifacts from run {run_id}...")
cmd = [
'gh', 'run', 'download', str(run_id),
'--pattern', 'openairframes_adsb-*',
'--dir', output_dir
]
result = subprocess.run(cmd, capture_output=True, text=True)
if result.returncode == 0:
print(f" ✓ Downloaded")
return True
else:
if "no artifacts" in result.stderr.lower():
print(f" ⚠ No artifacts found (workflow may still be running)")
else:
print(f" ✗ Failed: {result.stderr}")
return False
def find_csv_files(download_dir):
"""Find all CSV.gz files in the download directory."""
csv_files = []
for root, dirs, files in os.walk(download_dir):
for file in files:
if file.endswith('.csv.gz'):
csv_files.append(os.path.join(root, file))
return sorted(csv_files)
def concatenate_csv_files(csv_files, output_file):
"""Concatenate CSV files in order, preserving headers."""
import gzip
print(f"\nConcatenating {len(csv_files)} CSV files...")
with gzip.open(output_file, 'wt') as outf:
header_written = False
for i, csv_file in enumerate(csv_files, 1):
print(f" [{i}/{len(csv_files)}] Processing {os.path.basename(csv_file)}")
with gzip.open(csv_file, 'rt') as inf:
lines = inf.readlines()
if not header_written:
# Write header from first file
outf.writelines(lines)
header_written = True
else:
# Skip header for subsequent files
outf.writelines(lines[1:])
print(f"\n✓ Concatenated CSV saved to: {output_file}")
# Show file size
size_mb = os.path.getsize(output_file) / (1024 * 1024)
print(f" Size: {size_mb:.1f} MB")
def main():
parser = argparse.ArgumentParser(
description='Download and concatenate artifacts from workflow runs'
)
parser.add_argument(
'runs_file',
help='JSON file containing run IDs (from run_historical_adsb_action.py)'
)
parser.add_argument(
'--output-dir',
default='./downloads/historical_concat',
help='Directory for downloads (default: ./downloads/historical_concat)'
)
parser.add_argument(
'--wait',
action='store_true',
help='Wait for workflows to complete before downloading'
)
args = parser.parse_args()
# Load run IDs
if not os.path.exists(args.runs_file):
print(f"Error: File not found: {args.runs_file}")
sys.exit(1)
with open(args.runs_file, 'r') as f:
data = json.load(f)
runs = data['runs']
start_date = data['start_date']
end_date = data['end_date']
print("=" * 60)
print("Download and Concatenate Historical Artifacts")
print("=" * 60)
print(f"Date range: {start_date} to {end_date}")
print(f"Workflow runs: {len(runs)}")
print(f"Output directory: {args.output_dir}")
print("=" * 60)
# Create output directory
os.makedirs(args.output_dir, exist_ok=True)
# Wait for workflows to complete if requested
if args.wait:
print("\nWaiting for workflows to complete...")
for run_info in runs:
run_id = run_info['run_id']
print(f" Checking run {run_id}...")
cmd = ['gh', 'run', 'watch', str(run_id)]
subprocess.run(cmd)
# Download artifacts
print("\nDownloading artifacts...")
successful_downloads = 0
for i, run_info in enumerate(runs, 1):
run_id = run_info['run_id']
print(f"\n[{i}/{len(runs)}] Run {run_id} ({run_info['start']} to {run_info['end']})")
if download_run_artifact(run_id, args.output_dir):
successful_downloads += 1
print(f"\n\nDownload Summary: {successful_downloads}/{len(runs)} artifacts downloaded")
if successful_downloads == 0:
print("\nNo artifacts downloaded. Workflows may still be running.")
print("Use --wait to wait for completion, or try again later.")
sys.exit(1)
# Find all CSV files
csv_files = find_csv_files(args.output_dir)
if not csv_files:
print("\nError: No CSV files found in download directory")
sys.exit(1)
print(f"\nFound {len(csv_files)} CSV file(s):")
for csv_file in csv_files:
print(f" - {os.path.basename(csv_file)}")
# Concatenate
# Calculate actual end date for filename (end_date - 1 day since it's exclusive)
from datetime import datetime, timedelta
end_dt = datetime.strptime(end_date, '%Y-%m-%d') - timedelta(days=1)
actual_end = end_dt.strftime('%Y-%m-%d')
output_file = os.path.join(
args.output_dir,
f"openairframes_adsb_{start_date}_{actual_end}.csv.gz"
)
concatenate_csv_files(csv_files, output_file)
print("\n" + "=" * 60)
print("Done!")
print("=" * 60)
if __name__ == '__main__':
main()
+215
View File
@@ -0,0 +1,215 @@
#!/usr/bin/env python3
"""
Script to trigger adsb-to-aircraft-multiple-day-run workflow runs in monthly chunks.
Usage:
python scripts/run_historical_adsb_action.py --start-date 2025-01-01 --end-date 2025-06-01
"""
import argparse
import subprocess
import sys
from datetime import datetime, timedelta
from calendar import monthrange
def generate_monthly_chunks(start_date_str, end_date_str):
"""Generate date ranges in monthly chunks from start to end date.
End dates are exclusive (e.g., to process Jan 1-31, end_date should be Feb 1).
"""
start_date = datetime.strptime(start_date_str, '%Y-%m-%d')
end_date = datetime.strptime(end_date_str, '%Y-%m-%d')
chunks = []
current = start_date
while current < end_date:
# Get the first day of the next month (exclusive end)
_, days_in_month = monthrange(current.year, current.month)
month_end = current.replace(day=days_in_month)
next_month_start = month_end + timedelta(days=1)
# Don't go past the global end date
chunk_end = min(next_month_start, end_date)
chunks.append({
'start': current.strftime('%Y-%m-%d'),
'end': chunk_end.strftime('%Y-%m-%d')
})
# Move to first day of next month
if next_month_start >= end_date:
break
current = next_month_start
return chunks
def trigger_workflow(start_date, end_date, repo='ggman12/OpenAirframes', branch='main', dry_run=False):
"""Trigger the adsb-to-aircraft-multiple-day-run workflow via GitHub CLI."""
cmd = [
'gh', 'workflow', 'run', 'adsb-to-aircraft-multiple-day-run.yaml',
'--repo', repo,
'--ref', branch,
'-f', f'start_date={start_date}',
'-f', f'end_date={end_date}'
]
if dry_run:
print(f"[DRY RUN] Would run: {' '.join(cmd)}")
return True, None
print(f"Triggering workflow: {start_date} to {end_date} (on {branch})")
result = subprocess.run(cmd, capture_output=True, text=True)
if result.returncode == 0:
print(f"✓ Successfully triggered workflow for {start_date} to {end_date}")
# Get the run ID of the workflow we just triggered
# Wait a moment for it to appear
import time
time.sleep(2)
# Get the most recent run (should be the one we just triggered)
list_cmd = [
'gh', 'run', 'list',
'--repo', repo,
'--workflow', 'adsb-to-aircraft-multiple-day-run.yaml',
'--branch', branch,
'--limit', '1',
'--json', 'databaseId',
'--jq', '.[0].databaseId'
]
list_result = subprocess.run(list_cmd, capture_output=True, text=True)
run_id = list_result.stdout.strip() if list_result.returncode == 0 else None
return True, run_id
else:
print(f"✗ Failed to trigger workflow for {start_date} to {end_date}")
print(f"Error: {result.stderr}")
return False, None
def main():
parser = argparse.ArgumentParser(
description='Trigger adsb-to-aircraft-multiple-day-run workflow runs in monthly chunks'
)
parser.add_argument(
'--start-date', '--start_date',
dest='start_date',
required=True,
help='Start date in YYYY-MM-DD format (inclusive)'
)
parser.add_argument(
'--end-date', '--end_date',
dest='end_date',
required=True,
help='End date in YYYY-MM-DD format (exclusive)'
)
parser.add_argument(
'--repo',
type=str,
default='ggman12/OpenAirframes',
help='GitHub repository (default: ggman12/OpenAirframes)'
)
parser.add_argument(
'--branch',
type=str,
default='main',
help='Branch to run the workflow on (default: main)'
)
parser.add_argument(
'--dry-run',
action='store_true',
help='Print commands without executing them'
)
parser.add_argument(
'--delay',
type=int,
default=5,
help='Delay in seconds between workflow triggers (default: 5)'
)
args = parser.parse_args()
# Validate dates
try:
start = datetime.strptime(args.start_date, '%Y-%m-%d')
end = datetime.strptime(args.end_date, '%Y-%m-%d')
if start > end:
print("Error: start_date must be before or equal to end_date")
sys.exit(1)
except ValueError as e:
print(f"Error: Invalid date format - {e}")
sys.exit(1)
# Generate monthly chunks
chunks = generate_monthly_chunks(args.start_date, args.end_date)
print(f"\nGenerating {len(chunks)} monthly workflow runs on branch '{args.branch}' (repo: {args.repo}):")
for i, chunk in enumerate(chunks, 1):
print(f" {i}. {chunk['start']} to {chunk['end']}")
if not args.dry_run:
response = input(f"\nProceed with triggering {len(chunks)} workflows on '{args.branch}'? [y/N]: ")
if response.lower() != 'y':
print("Cancelled.")
sys.exit(0)
print()
# Trigger workflows
import time
success_count = 0
triggered_runs = []
for i, chunk in enumerate(chunks, 1):
print(f"\n[{i}/{len(chunks)}] ", end='')
success, run_id = trigger_workflow(
chunk['start'],
chunk['end'],
repo=args.repo,
branch=args.branch,
dry_run=args.dry_run
)
if success:
success_count += 1
if run_id:
triggered_runs.append({
'run_id': run_id,
'start': chunk['start'],
'end': chunk['end']
})
# Add delay between triggers (except for last one)
if i < len(chunks) and not args.dry_run:
time.sleep(args.delay)
print(f"\n\nSummary: {success_count}/{len(chunks)} workflows triggered successfully")
# Save triggered run IDs to a file
if triggered_runs and not args.dry_run:
import json
timestamp = datetime.now().strftime('%Y%m%d_%H%M%S')
runs_file = f"./triggered_runs_{timestamp}.json"
with open(runs_file, 'w') as f:
json.dump({
'start_date': args.start_date,
'end_date': args.end_date,
'repo': args.repo,
'branch': args.branch,
'runs': triggered_runs
}, f, indent=2)
print(f"\nRun IDs saved to: {runs_file}")
print(f"\nTo download and concatenate these artifacts, run:")
print(f" python scripts/download_and_concat_runs.py {runs_file}")
if success_count < len(chunks):
sys.exit(1)
if __name__ == '__main__':
main()
+82
View File
@@ -0,0 +1,82 @@
#!/usr/bin/env python3
"""
Run src.adsb.main in an isolated git worktree so edits in the main
working tree won't affect subprocess imports during the run.
Usage:
python scripts/run_main_isolated.py 2026-01-01
python scripts/run_main_isolated.py --start_date 2026-01-01 --end_date 2026-01-03
"""
import argparse
import os
import shutil
import subprocess
import sys
from datetime import datetime, timezone
from pathlib import Path
def run(
cmd: list[str],
*,
cwd: Path | None = None,
check: bool = True,
) -> subprocess.CompletedProcess:
print(f"\n>>> {' '.join(cmd)}")
return subprocess.run(cmd, cwd=cwd, check=check)
def main() -> int:
parser = argparse.ArgumentParser(description="Run src.adsb.main in an isolated worktree")
parser.add_argument("date", nargs="?", help="Single date to process (YYYY-MM-DD)")
parser.add_argument("--start_date", help="Start date (inclusive, YYYY-MM-DD)")
parser.add_argument("--end_date", help="End date (exclusive, YYYY-MM-DD)")
parser.add_argument("--concat_with_latest_csv", action="store_true", help="Also concatenate with latest CSV from GitHub releases")
args = parser.parse_args()
if args.date and (args.start_date or args.end_date):
raise SystemExit("Use a single date or --start_date/--end_date, not both.")
if args.date:
datetime.strptime(args.date, "%Y-%m-%d")
main_args = ["--date", args.date]
else:
if not args.start_date or not args.end_date:
raise SystemExit("Provide --start_date and --end_date, or a single date.")
datetime.strptime(args.start_date, "%Y-%m-%d")
datetime.strptime(args.end_date, "%Y-%m-%d")
main_args = ["--start_date", args.start_date, "--end_date", args.end_date]
if args.concat_with_latest_csv:
main_args.append("--concat_with_latest_csv")
repo_root = Path(__file__).resolve().parents[1]
snapshots_root = repo_root / ".snapshots"
snapshots_root.mkdir(exist_ok=True)
timestamp = datetime.now(timezone.utc).strftime("%Y%m%d_%H%M%S")
snapshot_root = snapshots_root / f"run_{timestamp}"
snapshot_src = snapshot_root / "src"
exit_code = 0
try:
shutil.copytree(repo_root / "src", snapshot_src)
runner = (
"import sys, runpy; "
f"sys.path.insert(0, {repr(str(snapshot_root))}); "
f"sys.argv = ['src.adsb.main'] + {main_args!r}; "
"runpy.run_module('src.adsb.main', run_name='__main__')"
)
cmd = [sys.executable, "-c", runner]
run(cmd, cwd=repo_root)
except subprocess.CalledProcessError as exc:
exit_code = exc.returncode
finally:
shutil.rmtree(snapshot_root, ignore_errors=True)
return exit_code
if __name__ == "__main__":
raise SystemExit(main())