mirror of
https://github.com/PlaneQuery/OpenAirframes.git
synced 2026-04-23 19:46:09 +02:00
Daily ADSB and Histoircal updates. Update readme.md
This commit is contained in:
@@ -0,0 +1,49 @@
|
||||
#!/usr/bin/env python3
|
||||
import re
|
||||
from pathlib import Path
|
||||
import polars as pl
|
||||
|
||||
# Find all CSV.gz files in the downloaded artifacts
|
||||
artifacts_dir = Path("downloads/adsb_artifacts")
|
||||
files = sorted(artifacts_dir.glob("*/openairframes_adsb_*.csv.gz"))
|
||||
|
||||
if not files:
|
||||
raise SystemExit("No CSV.gz files found in downloads/adsb_artifacts/")
|
||||
|
||||
print(f"Found {len(files)} files to concatenate")
|
||||
|
||||
# Extract dates from filenames to determine range
|
||||
def extract_dates(path: Path) -> tuple[str, str]:
|
||||
"""Extract start and end dates from filename"""
|
||||
m = re.search(r"openairframes_adsb_(\d{4}-\d{2}-\d{2})_(\d{4}-\d{2}-\d{2})\.csv\.gz", path.name)
|
||||
if m:
|
||||
return m.group(1), m.group(2)
|
||||
return None, None
|
||||
|
||||
# Collect all dates
|
||||
all_dates = []
|
||||
for f in files:
|
||||
start, end = extract_dates(f)
|
||||
if start and end:
|
||||
all_dates.extend([start, end])
|
||||
print(f" {f.name}: {start} to {end}")
|
||||
|
||||
if not all_dates:
|
||||
raise SystemExit("Could not extract dates from filenames")
|
||||
|
||||
# Find earliest and latest dates
|
||||
earliest = min(all_dates)
|
||||
latest = max(all_dates)
|
||||
print(f"\nDate range: {earliest} to {latest}")
|
||||
|
||||
# Read and concatenate all files
|
||||
print("\nReading and concatenating files...")
|
||||
frames = [pl.read_csv(f) for f in files]
|
||||
df = pl.concat(frames, how="vertical", rechunk=True)
|
||||
|
||||
# Write output
|
||||
output_path = Path("downloads") / f"openairframes_adsb_{earliest}_{latest}.csv.gz"
|
||||
output_path.parent.mkdir(parents=True, exist_ok=True)
|
||||
df.write_csv(output_path, compression="gzip")
|
||||
|
||||
print(f"\nWrote {output_path} with {df.height:,} rows")
|
||||
@@ -0,0 +1,34 @@
|
||||
#!/bin/bash
|
||||
|
||||
# Create download directory
|
||||
mkdir -p downloads/adsb_artifacts
|
||||
|
||||
# Repository from the workflow comment
|
||||
REPO="ggman12/OpenAirframes"
|
||||
|
||||
# Get last 15 runs of the workflow and download matching artifacts
|
||||
gh run list \
|
||||
--repo "$REPO" \
|
||||
--workflow adsb-to-aircraft-multiple-day-run.yaml \
|
||||
--limit 15 \
|
||||
--json databaseId \
|
||||
--jq '.[].databaseId' | while read -r run_id; do
|
||||
|
||||
echo "Checking run ID: $run_id"
|
||||
|
||||
# List artifacts for this run using the API
|
||||
# Match pattern: openairframes_adsb-YYYY-MM-DD-YYYY-MM-DD (with second date)
|
||||
gh api \
|
||||
--paginate \
|
||||
"repos/$REPO/actions/runs/$run_id/artifacts" \
|
||||
--jq '.artifacts[] | select(.name | test("^openairframes_adsb-[0-9]{4}-[0-9]{2}-[0-9]{2}-[0-9]{4}-[0-9]{2}-[0-9]{2}$")) | .name' | while read -r artifact_name; do
|
||||
|
||||
echo " Downloading: $artifact_name"
|
||||
gh run download "$run_id" \
|
||||
--repo "$REPO" \
|
||||
--name "$artifact_name" \
|
||||
--dir "downloads/adsb_artifacts/$artifact_name"
|
||||
done
|
||||
done
|
||||
|
||||
echo "Download complete! Files saved to downloads/adsb_artifacts/"
|
||||
@@ -0,0 +1,182 @@
|
||||
#!/usr/bin/env python3
|
||||
"""
|
||||
Download and concatenate artifacts from a specific set of workflow runs.
|
||||
|
||||
Usage:
|
||||
python scripts/download_and_concat_runs.py triggered_runs_20260216_123456.json
|
||||
"""
|
||||
|
||||
import argparse
|
||||
import json
|
||||
import os
|
||||
import subprocess
|
||||
import sys
|
||||
from pathlib import Path
|
||||
|
||||
|
||||
def download_run_artifact(run_id, output_dir):
|
||||
"""Download artifact from a specific workflow run."""
|
||||
print(f" Downloading artifacts from run {run_id}...")
|
||||
|
||||
cmd = [
|
||||
'gh', 'run', 'download', str(run_id),
|
||||
'--pattern', 'openairframes_adsb-*',
|
||||
'--dir', output_dir
|
||||
]
|
||||
|
||||
result = subprocess.run(cmd, capture_output=True, text=True)
|
||||
|
||||
if result.returncode == 0:
|
||||
print(f" ✓ Downloaded")
|
||||
return True
|
||||
else:
|
||||
if "no artifacts" in result.stderr.lower():
|
||||
print(f" ⚠ No artifacts found (workflow may still be running)")
|
||||
else:
|
||||
print(f" ✗ Failed: {result.stderr}")
|
||||
return False
|
||||
|
||||
|
||||
def find_csv_files(download_dir):
|
||||
"""Find all CSV.gz files in the download directory."""
|
||||
csv_files = []
|
||||
for root, dirs, files in os.walk(download_dir):
|
||||
for file in files:
|
||||
if file.endswith('.csv.gz'):
|
||||
csv_files.append(os.path.join(root, file))
|
||||
return sorted(csv_files)
|
||||
|
||||
|
||||
def concatenate_csv_files(csv_files, output_file):
|
||||
"""Concatenate CSV files in order, preserving headers."""
|
||||
import gzip
|
||||
|
||||
print(f"\nConcatenating {len(csv_files)} CSV files...")
|
||||
|
||||
with gzip.open(output_file, 'wt') as outf:
|
||||
header_written = False
|
||||
|
||||
for i, csv_file in enumerate(csv_files, 1):
|
||||
print(f" [{i}/{len(csv_files)}] Processing {os.path.basename(csv_file)}")
|
||||
|
||||
with gzip.open(csv_file, 'rt') as inf:
|
||||
lines = inf.readlines()
|
||||
|
||||
if not header_written:
|
||||
# Write header from first file
|
||||
outf.writelines(lines)
|
||||
header_written = True
|
||||
else:
|
||||
# Skip header for subsequent files
|
||||
outf.writelines(lines[1:])
|
||||
|
||||
print(f"\n✓ Concatenated CSV saved to: {output_file}")
|
||||
|
||||
# Show file size
|
||||
size_mb = os.path.getsize(output_file) / (1024 * 1024)
|
||||
print(f" Size: {size_mb:.1f} MB")
|
||||
|
||||
|
||||
def main():
|
||||
parser = argparse.ArgumentParser(
|
||||
description='Download and concatenate artifacts from workflow runs'
|
||||
)
|
||||
parser.add_argument(
|
||||
'runs_file',
|
||||
help='JSON file containing run IDs (from run_historical_adsb_action.py)'
|
||||
)
|
||||
parser.add_argument(
|
||||
'--output-dir',
|
||||
default='./downloads/historical_concat',
|
||||
help='Directory for downloads (default: ./downloads/historical_concat)'
|
||||
)
|
||||
parser.add_argument(
|
||||
'--wait',
|
||||
action='store_true',
|
||||
help='Wait for workflows to complete before downloading'
|
||||
)
|
||||
|
||||
args = parser.parse_args()
|
||||
|
||||
# Load run IDs
|
||||
if not os.path.exists(args.runs_file):
|
||||
print(f"Error: File not found: {args.runs_file}")
|
||||
sys.exit(1)
|
||||
|
||||
with open(args.runs_file, 'r') as f:
|
||||
data = json.load(f)
|
||||
|
||||
runs = data['runs']
|
||||
start_date = data['start_date']
|
||||
end_date = data['end_date']
|
||||
|
||||
print("=" * 60)
|
||||
print("Download and Concatenate Historical Artifacts")
|
||||
print("=" * 60)
|
||||
print(f"Date range: {start_date} to {end_date}")
|
||||
print(f"Workflow runs: {len(runs)}")
|
||||
print(f"Output directory: {args.output_dir}")
|
||||
print("=" * 60)
|
||||
|
||||
# Create output directory
|
||||
os.makedirs(args.output_dir, exist_ok=True)
|
||||
|
||||
# Wait for workflows to complete if requested
|
||||
if args.wait:
|
||||
print("\nWaiting for workflows to complete...")
|
||||
for run_info in runs:
|
||||
run_id = run_info['run_id']
|
||||
print(f" Checking run {run_id}...")
|
||||
|
||||
cmd = ['gh', 'run', 'watch', str(run_id)]
|
||||
subprocess.run(cmd)
|
||||
|
||||
# Download artifacts
|
||||
print("\nDownloading artifacts...")
|
||||
successful_downloads = 0
|
||||
|
||||
for i, run_info in enumerate(runs, 1):
|
||||
run_id = run_info['run_id']
|
||||
print(f"\n[{i}/{len(runs)}] Run {run_id} ({run_info['start']} to {run_info['end']})")
|
||||
|
||||
if download_run_artifact(run_id, args.output_dir):
|
||||
successful_downloads += 1
|
||||
|
||||
print(f"\n\nDownload Summary: {successful_downloads}/{len(runs)} artifacts downloaded")
|
||||
|
||||
if successful_downloads == 0:
|
||||
print("\nNo artifacts downloaded. Workflows may still be running.")
|
||||
print("Use --wait to wait for completion, or try again later.")
|
||||
sys.exit(1)
|
||||
|
||||
# Find all CSV files
|
||||
csv_files = find_csv_files(args.output_dir)
|
||||
|
||||
if not csv_files:
|
||||
print("\nError: No CSV files found in download directory")
|
||||
sys.exit(1)
|
||||
|
||||
print(f"\nFound {len(csv_files)} CSV file(s):")
|
||||
for csv_file in csv_files:
|
||||
print(f" - {os.path.basename(csv_file)}")
|
||||
|
||||
# Concatenate
|
||||
# Calculate actual end date for filename (end_date - 1 day since it's exclusive)
|
||||
from datetime import datetime, timedelta
|
||||
end_dt = datetime.strptime(end_date, '%Y-%m-%d') - timedelta(days=1)
|
||||
actual_end = end_dt.strftime('%Y-%m-%d')
|
||||
|
||||
output_file = os.path.join(
|
||||
args.output_dir,
|
||||
f"openairframes_adsb_{start_date}_{actual_end}.csv.gz"
|
||||
)
|
||||
|
||||
concatenate_csv_files(csv_files, output_file)
|
||||
|
||||
print("\n" + "=" * 60)
|
||||
print("Done!")
|
||||
print("=" * 60)
|
||||
|
||||
|
||||
if __name__ == '__main__':
|
||||
main()
|
||||
@@ -0,0 +1,215 @@
|
||||
#!/usr/bin/env python3
|
||||
"""
|
||||
Script to trigger adsb-to-aircraft-multiple-day-run workflow runs in monthly chunks.
|
||||
|
||||
Usage:
|
||||
python scripts/run_historical_adsb_action.py --start-date 2025-01-01 --end-date 2025-06-01
|
||||
"""
|
||||
|
||||
import argparse
|
||||
import subprocess
|
||||
import sys
|
||||
from datetime import datetime, timedelta
|
||||
from calendar import monthrange
|
||||
|
||||
|
||||
def generate_monthly_chunks(start_date_str, end_date_str):
|
||||
"""Generate date ranges in monthly chunks from start to end date.
|
||||
|
||||
End dates are exclusive (e.g., to process Jan 1-31, end_date should be Feb 1).
|
||||
"""
|
||||
start_date = datetime.strptime(start_date_str, '%Y-%m-%d')
|
||||
end_date = datetime.strptime(end_date_str, '%Y-%m-%d')
|
||||
|
||||
chunks = []
|
||||
current = start_date
|
||||
|
||||
while current < end_date:
|
||||
# Get the first day of the next month (exclusive end)
|
||||
_, days_in_month = monthrange(current.year, current.month)
|
||||
month_end = current.replace(day=days_in_month)
|
||||
next_month_start = month_end + timedelta(days=1)
|
||||
|
||||
# Don't go past the global end date
|
||||
chunk_end = min(next_month_start, end_date)
|
||||
|
||||
chunks.append({
|
||||
'start': current.strftime('%Y-%m-%d'),
|
||||
'end': chunk_end.strftime('%Y-%m-%d')
|
||||
})
|
||||
|
||||
# Move to first day of next month
|
||||
if next_month_start >= end_date:
|
||||
break
|
||||
current = next_month_start
|
||||
|
||||
return chunks
|
||||
|
||||
|
||||
def trigger_workflow(start_date, end_date, repo='ggman12/OpenAirframes', branch='main', dry_run=False):
|
||||
"""Trigger the adsb-to-aircraft-multiple-day-run workflow via GitHub CLI."""
|
||||
cmd = [
|
||||
'gh', 'workflow', 'run', 'adsb-to-aircraft-multiple-day-run.yaml',
|
||||
'--repo', repo,
|
||||
'--ref', branch,
|
||||
'-f', f'start_date={start_date}',
|
||||
'-f', f'end_date={end_date}'
|
||||
]
|
||||
|
||||
if dry_run:
|
||||
print(f"[DRY RUN] Would run: {' '.join(cmd)}")
|
||||
return True, None
|
||||
|
||||
print(f"Triggering workflow: {start_date} to {end_date} (on {branch})")
|
||||
result = subprocess.run(cmd, capture_output=True, text=True)
|
||||
|
||||
if result.returncode == 0:
|
||||
print(f"✓ Successfully triggered workflow for {start_date} to {end_date}")
|
||||
|
||||
# Get the run ID of the workflow we just triggered
|
||||
# Wait a moment for it to appear
|
||||
import time
|
||||
time.sleep(2)
|
||||
|
||||
# Get the most recent run (should be the one we just triggered)
|
||||
list_cmd = [
|
||||
'gh', 'run', 'list',
|
||||
'--repo', repo,
|
||||
'--workflow', 'adsb-to-aircraft-multiple-day-run.yaml',
|
||||
'--branch', branch,
|
||||
'--limit', '1',
|
||||
'--json', 'databaseId',
|
||||
'--jq', '.[0].databaseId'
|
||||
]
|
||||
list_result = subprocess.run(list_cmd, capture_output=True, text=True)
|
||||
run_id = list_result.stdout.strip() if list_result.returncode == 0 else None
|
||||
|
||||
return True, run_id
|
||||
else:
|
||||
print(f"✗ Failed to trigger workflow for {start_date} to {end_date}")
|
||||
print(f"Error: {result.stderr}")
|
||||
return False, None
|
||||
|
||||
|
||||
def main():
|
||||
parser = argparse.ArgumentParser(
|
||||
description='Trigger adsb-to-aircraft-multiple-day-run workflow runs in monthly chunks'
|
||||
)
|
||||
parser.add_argument(
|
||||
'--start-date', '--start_date',
|
||||
dest='start_date',
|
||||
required=True,
|
||||
help='Start date in YYYY-MM-DD format (inclusive)'
|
||||
)
|
||||
parser.add_argument(
|
||||
'--end-date', '--end_date',
|
||||
dest='end_date',
|
||||
required=True,
|
||||
help='End date in YYYY-MM-DD format (exclusive)'
|
||||
)
|
||||
parser.add_argument(
|
||||
'--repo',
|
||||
type=str,
|
||||
default='ggman12/OpenAirframes',
|
||||
help='GitHub repository (default: ggman12/OpenAirframes)'
|
||||
)
|
||||
parser.add_argument(
|
||||
'--branch',
|
||||
type=str,
|
||||
default='main',
|
||||
help='Branch to run the workflow on (default: main)'
|
||||
)
|
||||
parser.add_argument(
|
||||
'--dry-run',
|
||||
action='store_true',
|
||||
help='Print commands without executing them'
|
||||
)
|
||||
parser.add_argument(
|
||||
'--delay',
|
||||
type=int,
|
||||
default=5,
|
||||
help='Delay in seconds between workflow triggers (default: 5)'
|
||||
)
|
||||
|
||||
args = parser.parse_args()
|
||||
|
||||
# Validate dates
|
||||
try:
|
||||
start = datetime.strptime(args.start_date, '%Y-%m-%d')
|
||||
end = datetime.strptime(args.end_date, '%Y-%m-%d')
|
||||
if start > end:
|
||||
print("Error: start_date must be before or equal to end_date")
|
||||
sys.exit(1)
|
||||
except ValueError as e:
|
||||
print(f"Error: Invalid date format - {e}")
|
||||
sys.exit(1)
|
||||
|
||||
# Generate monthly chunks
|
||||
chunks = generate_monthly_chunks(args.start_date, args.end_date)
|
||||
|
||||
print(f"\nGenerating {len(chunks)} monthly workflow runs on branch '{args.branch}' (repo: {args.repo}):")
|
||||
for i, chunk in enumerate(chunks, 1):
|
||||
print(f" {i}. {chunk['start']} to {chunk['end']}")
|
||||
|
||||
if not args.dry_run:
|
||||
response = input(f"\nProceed with triggering {len(chunks)} workflows on '{args.branch}'? [y/N]: ")
|
||||
if response.lower() != 'y':
|
||||
print("Cancelled.")
|
||||
sys.exit(0)
|
||||
|
||||
print()
|
||||
|
||||
# Trigger workflows
|
||||
import time
|
||||
success_count = 0
|
||||
triggered_runs = []
|
||||
|
||||
for i, chunk in enumerate(chunks, 1):
|
||||
print(f"\n[{i}/{len(chunks)}] ", end='')
|
||||
|
||||
success, run_id = trigger_workflow(
|
||||
chunk['start'],
|
||||
chunk['end'],
|
||||
repo=args.repo,
|
||||
branch=args.branch,
|
||||
dry_run=args.dry_run
|
||||
)
|
||||
|
||||
if success:
|
||||
success_count += 1
|
||||
if run_id:
|
||||
triggered_runs.append({
|
||||
'run_id': run_id,
|
||||
'start': chunk['start'],
|
||||
'end': chunk['end']
|
||||
})
|
||||
|
||||
# Add delay between triggers (except for last one)
|
||||
if i < len(chunks) and not args.dry_run:
|
||||
time.sleep(args.delay)
|
||||
|
||||
print(f"\n\nSummary: {success_count}/{len(chunks)} workflows triggered successfully")
|
||||
|
||||
# Save triggered run IDs to a file
|
||||
if triggered_runs and not args.dry_run:
|
||||
import json
|
||||
timestamp = datetime.now().strftime('%Y%m%d_%H%M%S')
|
||||
runs_file = f"./triggered_runs_{timestamp}.json"
|
||||
with open(runs_file, 'w') as f:
|
||||
json.dump({
|
||||
'start_date': args.start_date,
|
||||
'end_date': args.end_date,
|
||||
'repo': args.repo,
|
||||
'branch': args.branch,
|
||||
'runs': triggered_runs
|
||||
}, f, indent=2)
|
||||
print(f"\nRun IDs saved to: {runs_file}")
|
||||
print(f"\nTo download and concatenate these artifacts, run:")
|
||||
print(f" python scripts/download_and_concat_runs.py {runs_file}")
|
||||
|
||||
if success_count < len(chunks):
|
||||
sys.exit(1)
|
||||
|
||||
|
||||
if __name__ == '__main__':
|
||||
main()
|
||||
@@ -0,0 +1,82 @@
|
||||
#!/usr/bin/env python3
|
||||
"""
|
||||
Run src.adsb.main in an isolated git worktree so edits in the main
|
||||
working tree won't affect subprocess imports during the run.
|
||||
|
||||
Usage:
|
||||
python scripts/run_main_isolated.py 2026-01-01
|
||||
python scripts/run_main_isolated.py --start_date 2026-01-01 --end_date 2026-01-03
|
||||
"""
|
||||
import argparse
|
||||
import os
|
||||
import shutil
|
||||
import subprocess
|
||||
import sys
|
||||
from datetime import datetime, timezone
|
||||
from pathlib import Path
|
||||
|
||||
|
||||
def run(
|
||||
cmd: list[str],
|
||||
*,
|
||||
cwd: Path | None = None,
|
||||
check: bool = True,
|
||||
) -> subprocess.CompletedProcess:
|
||||
print(f"\n>>> {' '.join(cmd)}")
|
||||
return subprocess.run(cmd, cwd=cwd, check=check)
|
||||
|
||||
|
||||
def main() -> int:
|
||||
parser = argparse.ArgumentParser(description="Run src.adsb.main in an isolated worktree")
|
||||
parser.add_argument("date", nargs="?", help="Single date to process (YYYY-MM-DD)")
|
||||
parser.add_argument("--start_date", help="Start date (inclusive, YYYY-MM-DD)")
|
||||
parser.add_argument("--end_date", help="End date (exclusive, YYYY-MM-DD)")
|
||||
parser.add_argument("--concat_with_latest_csv", action="store_true", help="Also concatenate with latest CSV from GitHub releases")
|
||||
args = parser.parse_args()
|
||||
|
||||
if args.date and (args.start_date or args.end_date):
|
||||
raise SystemExit("Use a single date or --start_date/--end_date, not both.")
|
||||
|
||||
if args.date:
|
||||
datetime.strptime(args.date, "%Y-%m-%d")
|
||||
main_args = ["--date", args.date]
|
||||
else:
|
||||
if not args.start_date or not args.end_date:
|
||||
raise SystemExit("Provide --start_date and --end_date, or a single date.")
|
||||
datetime.strptime(args.start_date, "%Y-%m-%d")
|
||||
datetime.strptime(args.end_date, "%Y-%m-%d")
|
||||
main_args = ["--start_date", args.start_date, "--end_date", args.end_date]
|
||||
|
||||
if args.concat_with_latest_csv:
|
||||
main_args.append("--concat_with_latest_csv")
|
||||
|
||||
repo_root = Path(__file__).resolve().parents[1]
|
||||
snapshots_root = repo_root / ".snapshots"
|
||||
snapshots_root.mkdir(exist_ok=True)
|
||||
|
||||
timestamp = datetime.now(timezone.utc).strftime("%Y%m%d_%H%M%S")
|
||||
snapshot_root = snapshots_root / f"run_{timestamp}"
|
||||
snapshot_src = snapshot_root / "src"
|
||||
|
||||
exit_code = 0
|
||||
try:
|
||||
shutil.copytree(repo_root / "src", snapshot_src)
|
||||
|
||||
runner = (
|
||||
"import sys, runpy; "
|
||||
f"sys.path.insert(0, {repr(str(snapshot_root))}); "
|
||||
f"sys.argv = ['src.adsb.main'] + {main_args!r}; "
|
||||
"runpy.run_module('src.adsb.main', run_name='__main__')"
|
||||
)
|
||||
cmd = [sys.executable, "-c", runner]
|
||||
run(cmd, cwd=repo_root)
|
||||
except subprocess.CalledProcessError as exc:
|
||||
exit_code = exc.returncode
|
||||
finally:
|
||||
shutil.rmtree(snapshot_root, ignore_errors=True)
|
||||
|
||||
return exit_code
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
raise SystemExit(main())
|
||||
Reference in New Issue
Block a user