update histoircal

split large file into chuncks
delete aws
2026-06-28 15:29:57 +02:00 · 2026-02-13 00:12:18 -05:00 · 2026-02-12 20:22:36 -05:00 · 2026-02-12 20:13:40 -05:00 · 2026-02-12 19:32:34 -05:00 · 2026-02-12 19:09:35 -05:00
11 changed files with 44 additions and 502 deletions
@@ -8,8 +8,8 @@ body:
  - type: markdown
    attributes:
      value: |
-        Submit **one object** or an **array of objects** that matches the community submission [schema](https://github.com/PlaneQuery/OpenAirframes/blob/main/schemas/community_submission.v1.schema.json). Reuse existing tags from the schema when possible.
+        Submit **one object** or an **array of objects** that matches the community submission schema.
-        
+
        **Rules (enforced on review/automation):**
        - Each object must include **at least one** of:
          - `registration_number`
@@ -27,7 +27,7 @@ body:
        ```json
        {
            "registration_number": "N12345",
-            "tags": {"owner": "John Doe", "photo": "https://example.com/photo.jpg"},
+            "tags": {"owner": "John Doe"},
            "start_date": "2025-01-01"
        }
        ```
@@ -77,5 +77,6 @@ body:
    id: notes
    attributes:
      label: Notes (optional)
      description: Any context, sources, or links that help validate your submission.
    validations:
      required: false
@@ -95,27 +95,20 @@ jobs:
            # Verify tar integrity
            tar -tf extracted_data.tar > /dev/null && echo "Tar integrity check passed" || { echo "Tar integrity check FAILED"; exit 1; }
-            # Record tar size and checksum for verification after reassembly
+            # Create checksum of the FULL tar before splitting (for verification after reassembly)
-            echo "=== Recording tar metadata ==="
+            echo "=== Creating checksum of full tar ==="
-            ORIGINAL_SIZE=$(stat --format=%s extracted_data.tar)
+            sha256sum extracted_data.tar > full_tar.sha256
-            ORIGINAL_SHA=$(sha256sum extracted_data.tar | awk '{print $1}')
+            cat full_tar.sha256
            echo "Size: $ORIGINAL_SIZE"
            echo "SHA256: $ORIGINAL_SHA"
            # Split into 500MB chunks to avoid artifact upload issues
            echo "=== Splitting tar into 500MB chunks ==="
            mkdir -p tar_chunks
            split -b 500M extracted_data.tar tar_chunks/extracted_data.tar.part_
            rm extracted_data.tar
-            
+            mv full_tar.sha256 tar_chunks/
            # Write metadata file (plain text so artifact upload won't skip it)
            echo "$ORIGINAL_SHA  extracted_data.tar" > tar_chunks/checksum.txt
            echo "$ORIGINAL_SIZE" >> tar_chunks/checksum.txt
            echo "=== Chunks created ==="
            ls -lah tar_chunks/
            echo "=== Checksum file ==="
            cat tar_chunks/checksum.txt
          else
            echo "ERROR: No extracted directories found, cannot create tar"
            exit 1
@@ -186,30 +179,19 @@ jobs:
            echo "=== Reassembled tar file info ==="
            ls -lah extracted_data.tar
-            # Verify integrity
+            # Verify checksum of reassembled tar matches original
-            echo "=== Verifying reassembled tar ==="
+            echo "=== Verifying reassembled tar checksum ==="
-            if [ -f tar_chunks/checksum.txt ]; then
+            echo "Original checksum:"
-              EXPECTED_SHA=$(head -1 tar_chunks/checksum.txt | awk '{print $1}')
+            cat tar_chunks/full_tar.sha256
-              EXPECTED_SIZE=$(sed -n '2p' tar_chunks/checksum.txt)
+            echo "Reassembled checksum:"
-              ACTUAL_SHA=$(sha256sum extracted_data.tar | awk '{print $1}')
+            sha256sum extracted_data.tar
-              ACTUAL_SIZE=$(stat --format=%s extracted_data.tar)
+            sha256sum -c tar_chunks/full_tar.sha256 || { echo "ERROR: Reassembled tar checksum mismatch - data corrupted during transfer"; exit 1; }
-              echo "Expected: SHA=$EXPECTED_SHA Size=$EXPECTED_SIZE"
+            echo "Checksum verified - data integrity confirmed"
              echo "Actual:   SHA=$ACTUAL_SHA Size=$ACTUAL_SIZE"
              if [ "$EXPECTED_SHA" != "$ACTUAL_SHA" ] || [ "$EXPECTED_SIZE" != "$ACTUAL_SIZE" ]; then
                echo "ERROR: Reassembled tar does not match original - data corrupted during transfer"
                exit 1
              fi
              echo "Checksum and size verified"
            else
              echo "WARNING: No checksum file found, falling back to tar integrity check"
              tar -tf extracted_data.tar > /dev/null || { echo "ERROR: Tar file is corrupted"; exit 1; }
              echo "Tar integrity check passed"
            fi
            rm -rf tar_chunks
            echo "=== Extracting ==="
-            tar -xf extracted_data.tar
+            tar -xvf extracted_data.tar
            rm extracted_data.tar
            echo "has_data=true" >> "$GITHUB_OUTPUT"
            echo "=== Contents of data/output ==="
@@ -261,64 +261,10 @@ jobs:
          path: data/openairframes/openairframes_community_*.csv
          retention-days: 1
  build-adsbexchange-json:
    runs-on: ubuntu-latest
    if: github.event_name != 'schedule'
    steps:
      - name: Checkout
        uses: actions/checkout@v6
        with:
          fetch-depth: 0
      - name: Setup Python
        uses: actions/setup-python@v6
        with:
          python-version: "3.14"
      - name: Run ADS-B Exchange JSON release script
        run: |
          python -m src.contributions.create_daily_adsbexchange_release ${{ inputs.date && format('--date {0}', inputs.date) || '' }}
          ls -lah data/openairframes
      - name: Upload ADS-B Exchange JSON artifact
        uses: actions/upload-artifact@v4
        with:
          name: adsbexchange-json
          path: data/openairframes/basic-ac-db_*.json.gz
          retention-days: 1
  build-mictronics-db:
    runs-on: ubuntu-latest
    if: github.event_name != 'schedule'
    steps:
      - name: Checkout
        uses: actions/checkout@v6
        with:
          fetch-depth: 0
      - name: Setup Python
        uses: actions/setup-python@v6
        with:
          python-version: "3.14"
      - name: Run Mictronics DB release script
        continue-on-error: true
        run: |
          python -m src.contributions.create_daily_microtonics_release ${{ inputs.date && format('--date {0}', inputs.date) || '' }}
          ls -lah data/openairframes
      - name: Upload Mictronics DB artifact
        uses: actions/upload-artifact@v4
        with:
          name: mictronics-db
          path: data/openairframes/mictronics-db_*.zip
          retention-days: 1
          if-no-files-found: ignore
  create-release:
    runs-on: ubuntu-latest
-    needs: [build-faa, adsb-reduce, build-community, build-adsbexchange-json, build-mictronics-db]
+    needs: [build-faa, adsb-reduce, build-community]
-    if: github.event_name != 'schedule' && !failure() && !cancelled()
+    if: github.event_name != 'schedule'
    steps:
      - name: Checkout for gh CLI
        uses: actions/checkout@v4
@@ -345,19 +291,6 @@ jobs:
          name: community-release
          path: artifacts/community
      - name: Download ADS-B Exchange JSON artifact
        uses: actions/download-artifact@v4
        with:
          name: adsbexchange-json
          path: artifacts/adsbexchange
      - name: Download Mictronics DB artifact
        uses: actions/download-artifact@v4
        continue-on-error: true
        with:
          name: mictronics-db
          path: artifacts/mictronics
      - name: Debug artifact structure
        run: |
          echo "=== Full artifacts tree ==="
@@ -368,10 +301,6 @@ jobs:
          find artifacts/adsb -type f 2>/dev/null || echo "No files found in artifacts/adsb"
          echo "=== Community artifacts ==="
          find artifacts/community -type f 2>/dev/null || echo "No files found in artifacts/community"
          echo "=== ADS-B Exchange JSON artifacts ==="
          find artifacts/adsbexchange -type f 2>/dev/null || echo "No files found in artifacts/adsbexchange"
          echo "=== Mictronics DB artifacts ==="
          find artifacts/mictronics -type f 2>/dev/null || echo "No files found in artifacts/mictronics"
      - name: Prepare release metadata
        id: meta
@@ -391,8 +320,6 @@ jobs:
          CSV_FILE_ADSB=$(find artifacts/adsb -name "openairframes_adsb_*.csv" -type f 2>/dev/null | head -1)
          CSV_FILE_COMMUNITY=$(find artifacts/community -name "openairframes_community_*.csv" -type f 2>/dev/null | head -1)
          ZIP_FILE=$(find artifacts/faa -name "ReleasableAircraft_*.zip" -type f 2>/dev/null | head -1)
          JSON_FILE_ADSBX=$(find artifacts/adsbexchange -name "basic-ac-db_*.json.gz" -type f 2>/dev/null | head -1)
          ZIP_FILE_MICTRONICS=$(find artifacts/mictronics -name "mictronics-db_*.zip" -type f 2>/dev/null | head -1)
          # Validate required files exist
          MISSING_FILES=""
@@ -405,24 +332,12 @@ jobs:
          if [ -z "$ZIP_FILE" ] || [ ! -f "$ZIP_FILE" ]; then
            MISSING_FILES="$MISSING_FILES FAA_ZIP"
          fi
          if [ -z "$JSON_FILE_ADSBX" ] || [ ! -f "$JSON_FILE_ADSBX" ]; then
            MISSING_FILES="$MISSING_FILES ADSBX_JSON"
          fi
          # Optional files - warn but don't fail
          OPTIONAL_MISSING=""
          if [ -z "$ZIP_FILE_MICTRONICS" ] || [ ! -f "$ZIP_FILE_MICTRONICS" ]; then
            OPTIONAL_MISSING="$OPTIONAL_MISSING MICTRONICS_ZIP"
            ZIP_FILE_MICTRONICS=""
          fi
          if [ -n "$MISSING_FILES" ]; then
            echo "ERROR: Missing required release files:$MISSING_FILES"
            echo "FAA CSV: $CSV_FILE_FAA"
            echo "ADSB CSV: $CSV_FILE_ADSB"
            echo "ZIP: $ZIP_FILE"
            echo "ADSBX JSON: $JSON_FILE_ADSBX"
            echo "MICTRONICS ZIP: $ZIP_FILE_MICTRONICS"
            exit 1
          fi
@@ -431,15 +346,6 @@ jobs:
          CSV_BASENAME_ADSB=$(basename "$CSV_FILE_ADSB")
          CSV_BASENAME_COMMUNITY=$(basename "$CSV_FILE_COMMUNITY" 2>/dev/null || echo "")
          ZIP_BASENAME=$(basename "$ZIP_FILE")
          JSON_BASENAME_ADSBX=$(basename "$JSON_FILE_ADSBX")
          ZIP_BASENAME_MICTRONICS=""
          if [ -n "$ZIP_FILE_MICTRONICS" ]; then
            ZIP_BASENAME_MICTRONICS=$(basename "$ZIP_FILE_MICTRONICS")
          fi
          if [ -n "$OPTIONAL_MISSING" ]; then
            echo "WARNING: Optional files missing:$OPTIONAL_MISSING (will continue without them)"
          fi
          echo "date=$DATE" >> "$GITHUB_OUTPUT"
          echo "tag=$TAG" >> "$GITHUB_OUTPUT"
@@ -451,10 +357,6 @@ jobs:
          echo "csv_basename_community=$CSV_BASENAME_COMMUNITY" >> "$GITHUB_OUTPUT"
          echo "zip_file=$ZIP_FILE" >> "$GITHUB_OUTPUT"
          echo "zip_basename=$ZIP_BASENAME" >> "$GITHUB_OUTPUT"
          echo "json_file_adsbx=$JSON_FILE_ADSBX" >> "$GITHUB_OUTPUT"
          echo "json_basename_adsbx=$JSON_BASENAME_ADSBX" >> "$GITHUB_OUTPUT"
          echo "zip_file_mictronics=$ZIP_FILE_MICTRONICS" >> "$GITHUB_OUTPUT"
          echo "zip_basename_mictronics=$ZIP_BASENAME_MICTRONICS" >> "$GITHUB_OUTPUT"
          echo "name=OpenAirframes snapshot ($DATE)${BRANCH_SUFFIX}" >> "$GITHUB_OUTPUT"
          echo "Found files:"
@@ -462,8 +364,6 @@ jobs:
          echo "  ADSB CSV: $CSV_FILE_ADSB"
          echo "  Community CSV: $CSV_FILE_COMMUNITY"
          echo "  ZIP: $ZIP_FILE"
          echo "  ADSBX JSON: $JSON_FILE_ADSBX"
          echo "  MICTRONICS ZIP: $ZIP_FILE_MICTRONICS"
      - name: Delete existing release if exists
        run: |
@@ -477,7 +377,7 @@ jobs:
        with:
          tag_name: ${{ steps.meta.outputs.tag }}
          name: ${{ steps.meta.outputs.name }}
-          fail_on_unmatched_files: false
+          fail_on_unmatched_files: true
          body: |
            Automated daily snapshot generated at 06:00 UTC for ${{ steps.meta.outputs.date }}.
@@ -486,14 +386,10 @@ jobs:
            - ${{ steps.meta.outputs.csv_basename_adsb }}
            - ${{ steps.meta.outputs.csv_basename_community }}
            - ${{ steps.meta.outputs.zip_basename }}
            - ${{ steps.meta.outputs.json_basename_adsbx }}
            ${{ steps.meta.outputs.zip_basename_mictronics && format('- {0}', steps.meta.outputs.zip_basename_mictronics) || '' }}
          files: |
            ${{ steps.meta.outputs.csv_file_faa }}
            ${{ steps.meta.outputs.csv_file_adsb }}
            ${{ steps.meta.outputs.csv_file_community }}
            ${{ steps.meta.outputs.zip_file }}
            ${{ steps.meta.outputs.json_file_adsbx }}
            ${{ steps.meta.outputs.zip_file_mictronics }}
        env:
          GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }}
@@ -48,52 +48,29 @@ jobs:
            git fetch origin "$branch_name"
            git checkout "$branch_name"
            # Merge main into PR branch
            git config user.name "github-actions[bot]"
            git config user.email "github-actions[bot]@users.noreply.github.com"
-            # Get the community submission file(s) and schema from this branch
+            if git merge origin/main -m "Merge main to update schema"; then
-            community_files=$(git diff --name-only origin/main...HEAD -- 'community/' 'schemas/')
+              # Regenerate schema for this PR's submission (adds any new tags)
-            
+              python -m src.contributions.regenerate_pr_schema || true
-            if [ -z "$community_files" ]; then
+              
-              echo "  No community/schema files found in PR #$pr_number, skipping"
+              # If there are changes, commit and push
-              git checkout main
+              if [ -n "$(git status --porcelain schemas/)" ]; then
-              continue
+                git add schemas/
-            fi
+                git commit -m "Update schema with new tags"
-            
+                git push origin "$branch_name"
-            echo "  Files to preserve: $community_files"
+                echo "  Updated PR #$pr_number with schema changes"
-            
+              else
-            # Save the community files content
+                git push origin "$branch_name"
-            mkdir -p /tmp/pr_files
+                echo "  Merged main into PR #$pr_number"
            for file in $community_files; do
              if [ -f "$file" ]; then
                mkdir -p "/tmp/pr_files/$(dirname "$file")"
                cp "$file" "/tmp/pr_files/$file"
              fi
            done
            # Reset branch to main (clean slate)
            git reset --hard origin/main
            # Restore the community files
            for file in $community_files; do
              if [ -f "/tmp/pr_files/$file" ]; then
                mkdir -p "$(dirname "$file")"
                cp "/tmp/pr_files/$file" "$file"
              fi
            done
            rm -rf /tmp/pr_files
            # Regenerate schema with current main + this submission's tags
            python -m src.contributions.regenerate_pr_schema || true
            # Stage and commit all changes
            git add community/ schemas/
            if ! git diff --cached --quiet; then
              git commit -m "Community submission (rebased on main)"
              git push --force origin "$branch_name"
              echo "  Rebased PR #$pr_number onto main"
            else
-              echo "  No changes needed for PR #$pr_number"
+              echo "  Merge conflict in PR #$pr_number, adding comment"
              gh pr comment "$pr_number" --body $'⚠️ **Merge Conflict**\n\nAnother community submission was merged and this PR has conflicts.\n\nA maintainer may need to:\n1. Close this PR\n2. Remove the `approved` label from the original issue\n3. Re-add the `approved` label to regenerate the PR'
              git merge --abort
              fi
            fi
            git checkout main
@@ -1,50 +1 @@
-# OpenAirframes.org
+Downloads [`https://registry.faa.gov/database/ReleasableAircraft.zip`](https://registry.faa.gov/database/ReleasableAircraft.zip). Creates a daily GitHub Release at 06:00 UTC containing the unaltered `ReleasableAircraft.zip` and a derived CSV file with all data from FAA database since 2023-08-16. The FAA database updates daily at 05:30 UTC.
 OpenAirframes.org is an open-source, community-driven airframes database.
 The data includes:
 - Registration information from Civil Aviation Authorities (FAA)
 - Airline data (e.g., Air France)
 - Community contributions such as ownership details, military aircraft info, photos, and more
 ---
 ## For Users
 A daily release is created at **06:00 UTC** and includes:
 - **openairframes_community.csv**  
  All community submissions
 - **openairframes_faa.csv**  
  All [FAA registration data](https://www.faa.gov/licenses_certificates/aircraft_certification/aircraft_registry/releasable_aircraft_download) from 2023-08-16 to present (~260 MB)
 - **openairframes_adsb.csv**  
  Airframe information derived from ADS-B messages on the [ADSB.lol](https://www.adsb.lol/) network, from 2026-02-12 to present (will be from 2024-01-01 soon). The airframe information originates from [mictronics aircraft database](https://www.mictronics.de/aircraft-database/) (~5 MB).
 - **ReleasableAircraft_{date}.zip**  
  A daily snapshot of the FAA database, which updates at **05:30 UTC**
 ---
 ## For Contributors
 Submit data via a [GitHub Issue](https://github.com/PlaneQuery/OpenAirframes/issues/new?template=community_submission.yaml) with your preferred attribution. Once approved, it will appear in the daily release. A leaderboard will be available in the future.
 All data is valuable. Examples include:
 - Celebrity ownership (with citations)
 - Photos
 - Internet capability
 - Military aircraft information
 - Unique facts (e.g., an airframe that crashed, performs aerobatics, etc.)
 Please try to follow the submission formatting guidelines. If you are struggling with them, that is fine—submit your data anyway and it will be formatted for you.
 ---
 ## For Developers
 All code, compute (GitHub Actions), and storage (releases) are in this GitHub repository Improvements are welcome. Potential features include:
 - Web UI for data
 - Web UI for contributors
 - Additional export formats in the daily release
 - Data fusion from multiple sources in the daily release
 - Automated airframe data connectors, including (but not limited to) civil aviation authorities and airline APIs
@@ -82,8 +82,8 @@ def fetch_releases(version_date: str) -> list:
    if version_date == "v2024.12.31":
        year = "2025"
    BASE_URL = f"https://api.github.com/repos/adsblol/globe_history_{year}/releases"
-    # Match both normal and tmp releases
+    # Match exact release name, exclude tmp releases
-    PATTERN = rf"^{re.escape(version_date)}-planes-readsb-prod-\d+(tmp)?$"
+    PATTERN = rf"^{re.escape(version_date)}-planes-readsb-prod-\d+$"
    releases = []
    page = 1
@@ -582,12 +582,6 @@ def process_version_date(version_date: str, keep_folders: bool = False):
            print(f"No releases found for {vd}.")
            return None
        # Prefer non-tmp releases; only use tmp if no normal releases exist
        normal_releases = [r for r in releases if "tmp" not in r["tag_name"]]
        tmp_releases = [r for r in releases if "tmp" in r["tag_name"]]
        releases = normal_releases if normal_releases else tmp_releases
        print(f"Using {'normal' if normal_releases else 'tmp'} releases ({len(releases)} found)")
        downloaded_files = []
        for release in releases:
            tag_name = release["tag_name"]
@@ -59,12 +59,6 @@ def download_and_extract(version_date: str) -> str | None:
            print(f"No releases found for {version_date}")
            return None
        # Prefer non-tmp releases; only use tmp if no normal releases exist
        normal_releases = [r for r in releases if "tmp" not in r["tag_name"]]
        tmp_releases = [r for r in releases if "tmp" in r["tag_name"]]
        releases = normal_releases if normal_releases else tmp_releases
        print(f"Using {'normal' if normal_releases else 'tmp'} releases ({len(releases)} found)")
        downloaded_files = []
        for release in releases:
            tag_name = release["tag_name"]
@@ -1,155 +0,0 @@
 #!/usr/bin/env python3
 """
 Run the full ADS-B processing pipeline locally.
 Downloads adsb.lol data, processes trace files, and outputs openairframes_adsb CSV.
 Usage:
    # Single day (yesterday by default)
    python -m src.adsb.run_local
    # Single day (specific date)
    python -m src.adsb.run_local 2024-01-15
    # Date range (inclusive)
    python -m src.adsb.run_local 2024-01-01 2024-01-07
 """
 import argparse
 import os
 import subprocess
 import sys
 from datetime import datetime, timedelta
 def run_cmd(cmd: list[str], description: str) -> None:
    """Run a command and exit on failure."""
    print(f"\n>>> {' '.join(cmd)}")
    result = subprocess.run(cmd)
    if result.returncode != 0:
        print(f"ERROR: {description} failed with exit code {result.returncode}")
        sys.exit(result.returncode)
 def main():
    parser = argparse.ArgumentParser(
        description="Run full ADS-B processing pipeline locally",
        usage="python -m src.adsb.run_local [start_date] [end_date]"
    )
    parser.add_argument(
        "start_date",
        nargs="?",
        help="Start date (YYYY-MM-DD). Default: yesterday"
    )
    parser.add_argument(
        "end_date",
        nargs="?",
        help="End date (YYYY-MM-DD, inclusive). If omitted, processes single day"
    )
    parser.add_argument(
        "--chunks",
        type=int,
        default=4,
        help="Number of parallel chunks (default: 4)"
    )
    parser.add_argument(
        "--skip-base",
        action="store_true",
        help="Skip downloading and merging with base release"
    )
    args = parser.parse_args()
    # Determine dates
    if args.start_date:
        start_date = datetime.strptime(args.start_date, "%Y-%m-%d")
    else:
        start_date = datetime.utcnow() - timedelta(days=1)
    end_date = None
    if args.end_date:
        end_date = datetime.strptime(args.end_date, "%Y-%m-%d")
    start_str = start_date.strftime("%Y-%m-%d")
    end_str = end_date.strftime("%Y-%m-%d") if end_date else None
    print("=" * 60)
    print("ADS-B Processing Pipeline")
    print("=" * 60)
    if end_str:
        print(f"Date range: {start_str} to {end_str}")
    else:
        print(f"Date: {start_str}")
    print(f"Chunks: {args.chunks}")
    print("=" * 60)
    # Step 1: Download and extract
    print("\n" + "=" * 60)
    print("Step 1: Download and Extract")
    print("=" * 60)
    if end_str:
        cmd = ["python", "-m", "src.adsb.download_and_list_icaos",
               "--start-date", start_str, "--end-date", end_str]
    else:
        cmd = ["python", "-m", "src.adsb.download_and_list_icaos",
               "--date", start_str]
    run_cmd(cmd, "Download and extract")
    # Step 2: Process chunks
    print("\n" + "=" * 60)
    print("Step 2: Process Chunks")
    print("=" * 60)
    for chunk_id in range(args.chunks):
        print(f"\n--- Chunk {chunk_id + 1}/{args.chunks} ---")
        if end_str:
            cmd = ["python", "-m", "src.adsb.process_icao_chunk",
                   "--chunk-id", str(chunk_id),
                   "--total-chunks", str(args.chunks),
                   "--start-date", start_str,
                   "--end-date", end_str]
        else:
            cmd = ["python", "-m", "src.adsb.process_icao_chunk",
                   "--chunk-id", str(chunk_id),
                   "--total-chunks", str(args.chunks),
                   "--date", start_str]
        run_cmd(cmd, f"Process chunk {chunk_id}")
    # Step 3: Combine chunks to CSV
    print("\n" + "=" * 60)
    print("Step 3: Combine to CSV")
    print("=" * 60)
    chunks_dir = "./data/output/adsb_chunks"
    cmd = ["python", "-m", "src.adsb.combine_chunks_to_csv",
           "--chunks-dir", chunks_dir]
    if end_str:
        cmd.extend(["--start-date", start_str, "--end-date", end_str])
    else:
        cmd.extend(["--date", start_str])
    if args.skip_base:
        cmd.append("--skip-base")
    run_cmd(cmd, "Combine chunks")
    print("\n" + "=" * 60)
    print("Done!")
    print("=" * 60)
    # Show output
    output_dir = "./data/openairframes"
    if end_str:
        output_file = f"openairframes_adsb_{start_str}_{end_str}.csv"
    else:
        output_file = f"openairframes_adsb_{start_str}_{start_str}.csv"
    output_path = os.path.join(output_dir, output_file)
    if os.path.exists(output_path):
        size_mb = os.path.getsize(output_path) / (1024 * 1024)
        print(f"Output: {output_path}")
        print(f"Size: {size_mb:.1f} MB")
 if __name__ == "__main__":
    main()
@@ -1,40 +0,0 @@
 #!/usr/bin/env python3
 """
 Download ADS-B Exchange basic-ac-db.json.gz.
 Usage:
    python -m src.contributions.create_daily_adsbexchange_release [--date YYYY-MM-DD]
 """
 from __future__ import annotations
 import argparse
 import shutil
 from datetime import datetime, timezone
 from pathlib import Path
 from urllib.request import Request, urlopen
 URL = "https://downloads.adsbexchange.com/downloads/basic-ac-db.json.gz"
 OUT_ROOT = Path("data/openairframes")
 def main() -> None:
    parser = argparse.ArgumentParser(description="Create daily ADS-B Exchange JSON release")
    parser.add_argument("--date", type=str, help="Date to process (YYYY-MM-DD format, default: today UTC)")
    args = parser.parse_args()
    date_str = args.date or datetime.now(timezone.utc).strftime("%Y-%m-%d")
    OUT_ROOT.mkdir(parents=True, exist_ok=True)
    gz_path = OUT_ROOT / f"basic-ac-db_{date_str}.json.gz"
    print(f"Downloading {URL}...")
    req = Request(URL, headers={"User-Agent": "openairframes-downloader/1.0"}, method="GET")
    with urlopen(req, timeout=300) as r, gz_path.open("wb") as f:
        shutil.copyfileobj(r, f)
    print(f"Wrote: {gz_path}")
 if __name__ == "__main__":
    main()
@@ -1,55 +0,0 @@
 #!/usr/bin/env python3
 """
 Download Mictronics aircraft database zip.
 Usage:
    python -m src.contributions.create_daily_microtonics_release [--date YYYY-MM-DD]
 """
 from __future__ import annotations
 import argparse
 import shutil
 import sys
 import time
 from datetime import datetime, timezone
 from pathlib import Path
 from urllib.error import URLError
 from urllib.request import Request, urlopen
 URL = "https://www.mictronics.de/aircraft-database/indexedDB_old.php"
 OUT_ROOT = Path("data/openairframes")
 MAX_RETRIES = 3
 RETRY_DELAY = 30  # seconds
 def main() -> None:
    parser = argparse.ArgumentParser(description="Create daily Mictronics database release")
    parser.add_argument("--date", type=str, help="Date to process (YYYY-MM-DD format, default: today UTC)")
    args = parser.parse_args()
    date_str = args.date or datetime.now(timezone.utc).strftime("%Y-%m-%d")
    OUT_ROOT.mkdir(parents=True, exist_ok=True)
    zip_path = OUT_ROOT / f"mictronics-db_{date_str}.zip"
    for attempt in range(1, MAX_RETRIES + 1):
        try:
            print(f"Downloading {URL} (attempt {attempt}/{MAX_RETRIES})...")
            req = Request(URL, headers={"User-Agent": "Mozilla/5.0 (compatible; openairframes-downloader/1.0)"}, method="GET")
            with urlopen(req, timeout=120) as r, zip_path.open("wb") as f:
                shutil.copyfileobj(r, f)
            print(f"Wrote: {zip_path}")
            return
        except (URLError, TimeoutError) as e:
            print(f"Attempt {attempt} failed: {e}")
            if attempt < MAX_RETRIES:
                print(f"Retrying in {RETRY_DELAY} seconds...")
                time.sleep(RETRY_DELAY)
            else:
                print("All retries exhausted. Mictronics download failed.")
                sys.exit(1)
 if __name__ == "__main__":
    main()
@@ -47,9 +47,6 @@ def convert_faa_master_txt_to_df(zip_path: Path, date: str):
    # Convert all NaN to empty strings
    df = df.fillna("")
    # The FAA parser can produce the literal string "None" for missing values;
    # replace those so they match the empty-string convention used everywhere else.
    df = df.replace("None", "")
    return df
@@ -87,8 +84,8 @@ def concat_faa_historical_df(df_base, df_new):
            # Convert to string
            val_str = str(val).strip()
-            # Handle empty strings and null-like literals
+            # Handle empty strings
-            if val_str == "" or val_str == "nan" or val_str == "None":
+            if val_str == "" or val_str == "nan":
                return ""
            # Check if it looks like a list representation (starts with [ )
Author	SHA1	Message	Date
ggman12	8999a943a9	update histoircal	2026-02-13 00:12:18 -05:00
ggman12	74625b9bc9	split large file into chuncks	2026-02-12 20:22:36 -05:00
ggman12	f2728d6156	delete aws	2026-02-12 20:13:40 -05:00
ggman12	5ed10ec42e	update	2026-02-12 19:32:34 -05:00
ggman12	3b8a14a4b9	add ability for custom run input date	2026-02-12 19:09:35 -05:00
ggman12	e5f124428f	use github token for adsb.lol downlaods	2026-02-12 19:03:23 -05:00
ggman12	d5039fb766	update to fix files	2026-02-12 19:01:02 -05:00
ggman12	50267f3c57	make faa work with no new data	2026-02-12 17:26:48 -05:00
ggman12	dd323f6e55	delete old files	2026-02-12 17:25:50 -05:00
ggman12	0e8b21daf9	rename from planequery to openairframes	2026-02-12 17:24:08 -05:00
ggman12	3960e6936c	use start_date_end_date for adsb naming	2026-02-12 17:13:06 -05:00
ggman12	48623ef79e	delete existign release	2026-02-12 17:12:09 -05:00
ggman12	5affe8937c	rename to openairframes	2026-02-12 17:09:07 -05:00
ggman12	d0254146f3	update release to fix not grabbing FAA file	2026-02-12 16:42:47 -05:00
ggman12	1699ad6d8a	rename file	2026-02-12 16:12:03 -05:00
ggman12	2a6892c347	fix download	2026-02-12 16:08:08 -05:00
ggman12	47ccecb9ba	set fail-fast to true	2026-02-12 16:07:42 -05:00
ggman12	2826dfd450	remove notebook	2026-02-12 16:07:28 -05:00
ggman12	fecf9ff0ea	format properly	2026-02-12 16:01:14 -05:00
ggman12	7e0a396fc7	only modify key parts of schemas/community_submission.v1.schema.json schema. Lowest diffs	2026-02-12 15:55:44 -05:00
ggman12	b0503bb3b2	fix: should update schema now	2026-02-12 15:46:11 -05:00
ggman12	0b89138daf	modify existing json schema instead of creating a new file every time	2026-02-12 15:40:01 -05:00
ggman12	4b756cdaef	fix syntax error	2026-02-12 15:32:37 -05:00
ggman12	9acffe1e56	handle multiple PRs with schema changes	2026-02-12 15:31:53 -05:00
ggman12	1694fe0b46	allow fileupload in submission	2026-02-12 15:26:45 -05:00
ggman12	c6d9e59d01	update template	2026-02-12 13:29:45 -05:00
ggman12	dd6cd7b6fd	update schema with optional start_date and end_date scope	2026-02-12 13:28:43 -05:00
ggman12	f543b671f8	updating schema	2026-02-12 13:22:56 -05:00
ggman12	efb4cbb953	update example	2026-02-12 13:22:43 -05:00
ggman12	5578133a99	update schema to be uppercase only	2026-02-12 12:36:50 -05:00
ggman12	eace7d5a63	update folder	2026-02-12 12:34:27 -05:00
ggman12	82f47b662c	make blank username work	2026-02-12 12:32:41 -05:00
ggman12	787796c3ab	update approve_submission	2026-02-12 12:26:54 -05:00
ggman12	61aae586ee	fix approve	2026-02-12 12:18:28 -05:00
ggman12	5abfa6b226	update submission validation	2026-02-12 12:15:04 -05:00
ggman12	a743b74ae5	Merge branch 'develop'	2026-02-12 12:10:24 -05:00
ggman12	53a020ab73	add jsonschema to requirements.txt	2026-02-12 12:09:03 -05:00
ggman12	2de41c9883	update historical. To check tar and fail fast if any maps fail	2026-02-12 12:01:13 -05:00
ggman12	bccc634158	remove existing release	2026-02-12 11:50:45 -05:00
ggman12	43b07942b0	add needed permissions	2026-02-12 11:42:49 -05:00
ggman12	2c9e994a12	add debug for FAA	2026-02-12 11:06:38 -05:00
ggman12	99b680476a	delete parquet chunck after load to not use so much space for big historical run	2026-02-12 10:52:42 -05:00