OpenAirframes 1.0

2026-04-23 19:46:09 +02:00 · 2026-02-12 10:52:42 -05:00
parent f9e04337ae
commit 4015a5fcf1
33 changed files with 1212 additions and 1138 deletions
@@ -38,9 +38,10 @@ jobs:
        env:
          GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }}
          GITHUB_REPOSITORY: ${{ github.repository }}
+          ISSUE_BODY: ${{ github.event.issue.body }}
        run: |
          python -m src.contributions.approve_submission \
            --issue-number ${{ github.event.issue.number }} \
-            --issue-body "${{ github.event.issue.body }}" \
+            --issue-body "$ISSUE_BODY" \
            --author "${{ steps.author.outputs.username }}" \
            --author-id ${{ steps.author.outputs.user_id }}
@@ -48,7 +48,7 @@ jobs:
      matrix:
        chunk: ${{ fromJson(needs.generate-matrix.outputs.chunks) }}
      max-parallel: 3
-      fail-fast: false
+      fail-fast: true
    steps:
      - name: Checkout
        uses: actions/checkout@v4
@@ -74,21 +74,51 @@ jobs:
        env:
          START_DATE: ${{ matrix.chunk.start_date }}
          END_DATE: ${{ matrix.chunk.end_date }}
+          GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }}
        run: |
          python -m src.adsb.download_and_list_icaos --start-date "$START_DATE" --end-date "$END_DATE"
          ls -lah data/output/

-      - name: Create tar of extracted data
+      - name: Create tar of extracted data and split into chunks
        run: |
          cd data/output
-          tar -cf extracted_data.tar *-planes-readsb-prod-0.tar_0 icao_manifest_*.txt 2>/dev/null || echo "Some files may not exist"
-          ls -lah extracted_data.tar || echo "No tar created"
+          echo "=== Disk space before tar ==="
+          df -h .
+          echo "=== Files to tar ==="
+          ls -lah *-planes-readsb-prod-0.tar_0 icao_manifest_*.txt 2>/dev/null || echo "No files found"
+          
+          # Create tar with explicit error checking
+          if ls *-planes-readsb-prod-0.tar_0 1>/dev/null 2>&1; then
+            tar -cvf extracted_data.tar *-planes-readsb-prod-0.tar_0 icao_manifest_*.txt
+            echo "=== Tar file created ==="
+            ls -lah extracted_data.tar
+            # Verify tar integrity
+            tar -tf extracted_data.tar > /dev/null && echo "Tar integrity check passed" || { echo "Tar integrity check FAILED"; exit 1; }
+            
+            # Create checksum of the FULL tar before splitting (for verification after reassembly)
+            echo "=== Creating checksum of full tar ==="
+            sha256sum extracted_data.tar > full_tar.sha256
+            cat full_tar.sha256
+            
+            # Split into 500MB chunks to avoid artifact upload issues
+            echo "=== Splitting tar into 500MB chunks ==="
+            mkdir -p tar_chunks
+            split -b 500M extracted_data.tar tar_chunks/extracted_data.tar.part_
+            rm extracted_data.tar
+            mv full_tar.sha256 tar_chunks/
+            
+            echo "=== Chunks created ==="
+            ls -lah tar_chunks/
+          else
+            echo "ERROR: No extracted directories found, cannot create tar"
+            exit 1
+          fi

-      - name: Upload extracted data
+      - name: Upload extracted data chunks
        uses: actions/upload-artifact@v4
        with:
          name: adsb-extracted-${{ matrix.chunk.start_date }}-${{ matrix.chunk.end_date }}
-          path: data/output/extracted_data.tar
+          path: data/output/tar_chunks/
          retention-days: 1
          compression-level: 0
          if-no-files-found: warn
@@ -97,7 +127,7 @@ jobs:
    needs: [generate-matrix, adsb-extract]
    runs-on: ubuntu-24.04-arm
    strategy:
-      fail-fast: false
+      fail-fast: true
      matrix:
        chunk: ${{ fromJson(needs.generate-matrix.outputs.chunks) }}
        icao_chunk: [0, 1, 2, 3]
@@ -126,21 +156,48 @@ jobs:
        uses: actions/download-artifact@v4
        with:
          name: adsb-extracted-${{ matrix.chunk.start_date }}-${{ matrix.chunk.end_date }}
-          path: data/output/
-        continue-on-error: true
+          path: data/output/tar_chunks/

-      - name: Extract tar
+      - name: Reassemble and extract tar
        id: extract
        run: |
          cd data/output
-          if [ -f extracted_data.tar ]; then
-            tar -xf extracted_data.tar
+          if [ -d tar_chunks ] && ls tar_chunks/extracted_data.tar.part_* 1>/dev/null 2>&1; then
+            echo "=== Chunk files info ==="
+            ls -lah tar_chunks/
+            
+            cd tar_chunks
+            
+            # Reassemble tar with explicit sorting
+            echo "=== Reassembling tar file ==="
+            ls -1 extracted_data.tar.part_?? | sort | while read part; do
+              echo "Appending $part..."
+              cat "$part" >> ../extracted_data.tar
+            done
+            cd ..
+            
+            echo "=== Reassembled tar file info ==="
+            ls -lah extracted_data.tar
+            
+            # Verify checksum of reassembled tar matches original
+            echo "=== Verifying reassembled tar checksum ==="
+            echo "Original checksum:"
+            cat tar_chunks/full_tar.sha256
+            echo "Reassembled checksum:"
+            sha256sum extracted_data.tar
+            sha256sum -c tar_chunks/full_tar.sha256 || { echo "ERROR: Reassembled tar checksum mismatch - data corrupted during transfer"; exit 1; }
+            echo "Checksum verified - data integrity confirmed"
+            
+            rm -rf tar_chunks
+            
+            echo "=== Extracting ==="
+            tar -xvf extracted_data.tar
            rm extracted_data.tar
            echo "has_data=true" >> "$GITHUB_OUTPUT"
            echo "=== Contents of data/output ==="
            ls -lah
          else
-            echo "No extracted_data.tar found"
+            echo "No tar chunks found"
            echo "has_data=false" >> "$GITHUB_OUTPUT"
          fi

@@ -188,22 +245,24 @@ jobs:

      - name: Debug downloaded files
        run: |
+          echo "=== Disk space before processing ==="
+          df -h
          echo "=== Listing data/output/adsb_chunks/ ==="
-          find data/output/adsb_chunks/ -type f 2>/dev/null | head -50 || echo "No files found"
-          echo "=== Looking for parquet files ==="
-          find . -name "*.parquet" 2>/dev/null | head -20 || echo "No parquet files found"
+          find data/output/adsb_chunks/ -type f 2>/dev/null | wc -l
+          echo "=== Total parquet size ==="
+          du -sh data/output/adsb_chunks/ || echo "No chunks dir"

      - name: Combine chunks to CSV
        env:
          START_DATE: ${{ needs.generate-matrix.outputs.global_start }}
          END_DATE: ${{ needs.generate-matrix.outputs.global_end }}
        run: |
-          python -m src.adsb.combine_chunks_to_csv --chunks-dir data/output/adsb_chunks --start-date "$START_DATE" --end-date "$END_DATE" --skip-base
-          ls -lah data/planequery_aircraft/
+          python -m src.adsb.combine_chunks_to_csv --chunks-dir data/output/adsb_chunks --start-date "$START_DATE" --end-date "$END_DATE" --skip-base --stream
+          ls -lah data/openairframes/

      - name: Upload final artifact
        uses: actions/upload-artifact@v4
        with:
-          name: planequery_aircraft_adsb-${{ needs.generate-matrix.outputs.global_start }}-${{ needs.generate-matrix.outputs.global_end }}
-          path: data/planequery_aircraft/*.csv
+          name: openairframes_adsb-${{ needs.generate-matrix.outputs.global_start }}-${{ needs.generate-matrix.outputs.global_end }}
+          path: data/openairframes/*.csv
          retention-days: 30
@@ -1,10 +1,15 @@
-name: planequery-aircraft Daily Release
+name: OpenAirframes Daily Release

 on:
  schedule:
    # 6:00pm UTC every day - runs on default branch, triggers both
    - cron: "0 06 * * *"
  workflow_dispatch:
+    inputs:
+      date:
+        description: 'Date to process (YYYY-MM-DD format, default: yesterday)'
+        required: false
+        type: string

 permissions:
  contents: write
@@ -22,7 +27,7 @@ jobs:
            await github.rest.actions.createWorkflowDispatch({
              owner: context.repo.owner,
              repo: context.repo.repo,
-              workflow_id: 'planequery-aircraft-daily-release.yaml',
+              workflow_id: 'openairframes-daily-release.yaml',
              ref: 'main'
            });
      
@@ -33,7 +38,7 @@ jobs:
            await github.rest.actions.createWorkflowDispatch({
              owner: context.repo.owner,
              repo: context.repo.repo,
-              workflow_id: 'planequery-aircraft-daily-release.yaml',
+              workflow_id: 'openairframes-daily-release.yaml',
              ref: 'develop'
            });

@@ -58,16 +63,16 @@ jobs:

      - name: Run FAA release script
        run: |
-          python src/create_daily_planequery_aircraft_release.py
+          python src/create_daily_faa_release.py ${{ inputs.date && format('--date {0}', inputs.date) || '' }}
          ls -lah data/faa_releasable
-          ls -lah data/planequery_aircraft
+          ls -lah data/openairframes

      - name: Upload FAA artifacts
        uses: actions/upload-artifact@v4
        with:
          name: faa-release
          path: |
-            data/planequery_aircraft/planequery_aircraft_faa_*.csv
+            data/openairframes/openairframes_faa_*.csv
            data/faa_releasable/ReleasableAircraft_*.zip
          retention-days: 1

@@ -93,8 +98,10 @@ jobs:
          pip install -r requirements.txt

      - name: Download and extract ADS-B data
+        env:
+          GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }}
        run: |
-          python -m src.adsb.download_and_list_icaos
+          python -m src.adsb.download_and_list_icaos ${{ inputs.date && format('--date {0}', inputs.date) || '' }}
          ls -lah data/output/

      - name: Check manifest exists
@@ -164,7 +171,7 @@ jobs:

      - name: Process chunk ${{ matrix.chunk }}
        run: |
-          python -m src.adsb.process_icao_chunk --chunk-id ${{ matrix.chunk }} --total-chunks 4
+          python -m src.adsb.process_icao_chunk --chunk-id ${{ matrix.chunk }} --total-chunks 4 ${{ inputs.date && format('--date {0}', inputs.date) || '' }}
          mkdir -p data/output/adsb_chunks
          ls -lah data/output/adsb_chunks/ || echo "No chunks created"

@@ -213,14 +220,14 @@ jobs:
        run: |
          mkdir -p data/output/adsb_chunks
          ls -lah data/output/adsb_chunks/ || echo "Directory empty or does not exist"
-          python -m src.adsb.combine_chunks_to_csv --chunks-dir data/output/adsb_chunks
-          ls -lah data/planequery_aircraft/
+          python -m src.adsb.combine_chunks_to_csv --chunks-dir data/output/adsb_chunks ${{ inputs.date && format('--date {0}', inputs.date) || '' }}
+          ls -lah data/openairframes/

      - name: Upload ADS-B artifacts
        uses: actions/upload-artifact@v4
        with:
          name: adsb-release
-          path: data/planequery_aircraft/planequery_aircraft_adsb_*.csv
+          path: data/openairframes/openairframes_adsb_*.csv
          retention-days: 1

  build-community:
@@ -245,13 +252,13 @@ jobs:
      - name: Run Community release script
        run: |
          python -m src.contributions.create_daily_community_release
-          ls -lah data/planequery_aircraft
+          ls -lah data/openairframes

      - name: Upload Community artifacts
        uses: actions/upload-artifact@v4
        with:
          name: community-release
-          path: data/planequery_aircraft/planequery_aircraft_community_*.csv
+          path: data/openairframes/openairframes_community_*.csv
          retention-days: 1

  create-release:
@@ -259,6 +266,13 @@ jobs:
    needs: [build-faa, adsb-reduce, build-community]
    if: github.event_name != 'schedule'
    steps:
+      - name: Checkout for gh CLI
+        uses: actions/checkout@v4
+        with:
+          sparse-checkout: |
+            .github
+          sparse-checkout-cone-mode: false
+
      - name: Download FAA artifacts
        uses: actions/download-artifact@v4
        with:
@@ -277,6 +291,17 @@ jobs:
          name: community-release
          path: artifacts/community

+      - name: Debug artifact structure
+        run: |
+          echo "=== Full artifacts tree ==="
+          find artifacts -type f 2>/dev/null || echo "No files found in artifacts"
+          echo "=== FAA artifacts ==="
+          find artifacts/faa -type f 2>/dev/null || echo "No files found in artifacts/faa"
+          echo "=== ADS-B artifacts ==="
+          find artifacts/adsb -type f 2>/dev/null || echo "No files found in artifacts/adsb"
+          echo "=== Community artifacts ==="
+          find artifacts/community -type f 2>/dev/null || echo "No files found in artifacts/community"
+
      - name: Prepare release metadata
        id: meta
        run: |
@@ -288,16 +313,38 @@ jobs:
          elif [ "$BRANCH_NAME" = "develop" ]; then
            BRANCH_SUFFIX="-develop"
          fi
-          TAG="planequery-aircraft-${DATE}${BRANCH_SUFFIX}"
+          TAG="openairframes-${DATE}${BRANCH_SUFFIX}"
          
-          # Find files from artifacts
-          CSV_FILE_FAA=$(ls artifacts/faa/data/planequery_aircraft/planequery_aircraft_faa_*.csv | head -1)
+          # Find files from artifacts using find (handles nested structures)
+          CSV_FILE_FAA=$(find artifacts/faa -name "openairframes_faa_*.csv" -type f 2>/dev/null | head -1)
+          CSV_FILE_ADSB=$(find artifacts/adsb -name "openairframes_adsb_*.csv" -type f 2>/dev/null | head -1)
+          CSV_FILE_COMMUNITY=$(find artifacts/community -name "openairframes_community_*.csv" -type f 2>/dev/null | head -1)
+          ZIP_FILE=$(find artifacts/faa -name "ReleasableAircraft_*.zip" -type f 2>/dev/null | head -1)
+          
+          # Validate required files exist
+          MISSING_FILES=""
+          if [ -z "$CSV_FILE_FAA" ] || [ ! -f "$CSV_FILE_FAA" ]; then
+            MISSING_FILES="$MISSING_FILES FAA_CSV"
+          fi
+          if [ -z "$CSV_FILE_ADSB" ] || [ ! -f "$CSV_FILE_ADSB" ]; then
+            MISSING_FILES="$MISSING_FILES ADSB_CSV"
+          fi
+          if [ -z "$ZIP_FILE" ] || [ ! -f "$ZIP_FILE" ]; then
+            MISSING_FILES="$MISSING_FILES FAA_ZIP"
+          fi
+          
+          if [ -n "$MISSING_FILES" ]; then
+            echo "ERROR: Missing required release files:$MISSING_FILES"
+            echo "FAA CSV: $CSV_FILE_FAA"
+            echo "ADSB CSV: $CSV_FILE_ADSB"
+            echo "ZIP: $ZIP_FILE"
+            exit 1
+          fi
+          
+          # Get basenames for display
          CSV_BASENAME_FAA=$(basename "$CSV_FILE_FAA")
-          CSV_FILE_ADSB=$(ls artifacts/adsb/planequery_aircraft_adsb_*.csv | head -1)
          CSV_BASENAME_ADSB=$(basename "$CSV_FILE_ADSB")
-          CSV_FILE_COMMUNITY=$(ls artifacts/community/planequery_aircraft_community_*.csv 2>/dev/null | head -1 || echo "")
          CSV_BASENAME_COMMUNITY=$(basename "$CSV_FILE_COMMUNITY" 2>/dev/null || echo "")
-          ZIP_FILE=$(ls artifacts/faa/data/faa_releasable/ReleasableAircraft_*.zip | head -1)
          ZIP_BASENAME=$(basename "$ZIP_FILE")
          
          echo "date=$DATE" >> "$GITHUB_OUTPUT"
@@ -310,13 +357,27 @@ jobs:
          echo "csv_basename_community=$CSV_BASENAME_COMMUNITY" >> "$GITHUB_OUTPUT"
          echo "zip_file=$ZIP_FILE" >> "$GITHUB_OUTPUT"
          echo "zip_basename=$ZIP_BASENAME" >> "$GITHUB_OUTPUT"
-          echo "name=planequery-aircraft snapshot ($DATE)${BRANCH_SUFFIX}" >> "$GITHUB_OUTPUT"
+          echo "name=OpenAirframes snapshot ($DATE)${BRANCH_SUFFIX}" >> "$GITHUB_OUTPUT"
+          
+          echo "Found files:"
+          echo "  FAA CSV: $CSV_FILE_FAA"
+          echo "  ADSB CSV: $CSV_FILE_ADSB"
+          echo "  Community CSV: $CSV_FILE_COMMUNITY"
+          echo "  ZIP: $ZIP_FILE"
+
+      - name: Delete existing release if exists
+        run: |
+          echo "Attempting to delete release: ${{ steps.meta.outputs.tag }}"
+          gh release delete "${{ steps.meta.outputs.tag }}" --yes --cleanup-tag || echo "No existing release to delete"
+        env:
+          GH_TOKEN: ${{ secrets.GITHUB_TOKEN }}

      - name: Create GitHub Release and upload assets
        uses: softprops/action-gh-release@v2
        with:
          tag_name: ${{ steps.meta.outputs.tag }}
          name: ${{ steps.meta.outputs.name }}
+          fail_on_unmatched_files: true
          body: |
            Automated daily snapshot generated at 06:00 UTC for ${{ steps.meta.outputs.date }}.

@@ -0,0 +1,171 @@
+name: Process Historical FAA Data
+
+on:
+  workflow_dispatch:  # Manual trigger
+
+jobs:
+  generate-matrix:
+    runs-on: ubuntu-latest
+    outputs:
+      matrix: ${{ steps.set-matrix.outputs.matrix }}
+    steps:
+      - name: Generate date ranges
+        id: set-matrix
+        run: |
+          python3 << 'EOF'
+          import json
+          from datetime import datetime, timedelta
+          
+          start = datetime(2023, 8, 16)
+          end = datetime(2026, 1, 1)
+          
+          ranges = []
+          current = start
+          
+          # Process in 4-day chunks
+          while current < end:
+            chunk_end = current + timedelta(days=4)
+            # Don't go past the end date
+            if chunk_end > end:
+              chunk_end = end
+            
+            ranges.append({
+              "since": current.strftime("%Y-%m-%d"),
+              "until": chunk_end.strftime("%Y-%m-%d")
+            })
+            
+            current = chunk_end
+          
+          print(f"::set-output name=matrix::{json.dumps(ranges)}")
+          EOF
+
+  clone-faa-repo:
+    runs-on: ubuntu-latest
+    steps:
+      - name: Cache FAA repository
+        id: cache-faa-repo
+        uses: actions/cache@v4
+        with:
+          path: data/scrape-faa-releasable-aircraft
+          key: faa-repo-v1
+          
+      - name: Clone FAA repository
+        if: steps.cache-faa-repo.outputs.cache-hit != 'true'
+        run: |
+          mkdir -p data
+          git clone https://github.com/simonw/scrape-faa-releasable-aircraft data/scrape-faa-releasable-aircraft
+          echo "Repository cloned successfully"
+
+  process-chunk:
+    needs: [generate-matrix, clone-faa-repo]
+    runs-on: ubuntu-latest
+    strategy:
+      max-parallel: 5  # Process 5 chunks at a time
+      matrix:
+        range: ${{ fromJson(needs.generate-matrix.outputs.matrix) }}
+    steps:
+      - name: Checkout repository
+        uses: actions/checkout@v4
+      
+      - name: Set up Python
+        uses: actions/setup-python@v5
+        with:
+          python-version: '3.12'
+      
+      - name: Restore FAA repository cache
+        uses: actions/cache/restore@v4
+        with:
+          path: data/scrape-faa-releasable-aircraft
+          key: faa-repo-v1
+          fail-on-cache-miss: true
+      
+      - name: Install dependencies
+        run: |
+          pip install -r requirements.txt
+      
+      - name: Process chunk ${{ matrix.range.since }} to ${{ matrix.range.until }}
+        run: |
+          python src/get_historical_faa.py "${{ matrix.range.since }}" "${{ matrix.range.until }}"
+      
+      - name: Upload CSV artifact
+        uses: actions/upload-artifact@v4
+        with:
+          name: csv-${{ matrix.range.since }}-to-${{ matrix.range.until }}
+          path: data/faa_releasable_historical/*.csv
+          retention-days: 1
+
+  create-release:
+    needs: process-chunk
+    runs-on: ubuntu-latest
+    permissions:
+      contents: write
+    steps:
+      - name: Download all artifacts
+        uses: actions/download-artifact@v4
+        with:
+          path: artifacts
+      
+      - name: Prepare release files
+        run: |
+          mkdir -p release-files
+          find artifacts -name "*.csv" -exec cp {} release-files/ \;
+          ls -lh release-files/
+      
+      - name: Create Release
+        uses: softprops/action-gh-release@v1
+        with:
+          tag_name: historical-faa-${{ github.run_number }}
+          name: Historical FAA Data Release ${{ github.run_number }}
+          body: |
+            Automated release of historical FAA aircraft data
+            Processing period: 2023-08-16 to 2026-01-01
+            Generated: ${{ github.event.repository.updated_at }}
+          files: release-files/*.csv
+          draft: false
+          prerelease: false
+
+  concatenate-and-release:
+    needs: process-chunk
+    runs-on: ubuntu-latest
+    permissions:
+      contents: write
+    steps:
+      - name: Checkout repository
+        uses: actions/checkout@v4
+      
+      - name: Set up Python
+        uses: actions/setup-python@v5
+        with:
+          python-version: '3.12'
+      
+      - name: Install dependencies
+        run: |
+          pip install -r requirements.txt
+      
+      - name: Download all artifacts
+        uses: actions/download-artifact@v4
+        with:
+          path: artifacts
+      
+      - name: Prepare CSVs for concatenation
+        run: |
+          mkdir -p data/faa_releasable_historical
+          find artifacts -name "*.csv" -exec cp {} data/faa_releasable_historical/ \;
+          ls -lh data/faa_releasable_historical/
+      
+      - name: Concatenate all CSVs
+        run: |
+          python scripts/concat_csvs.py
+      
+      - name: Create Combined Release
+        uses: softprops/action-gh-release@v1
+        with:
+          tag_name: historical-faa-combined-${{ github.run_number }}
+          name: Historical FAA Data Combined Release ${{ github.run_number }}
+          body: |
+            Combined historical FAA aircraft data (all chunks concatenated)
+            Processing period: 2023-08-16 to 2026-01-01
+            Generated: ${{ github.event.repository.updated_at }}
+          files: data/openairframes/*.csv
+          draft: false
+          prerelease: false
@@ -0,0 +1,77 @@
+name: Update Community PRs After Merge
+
+on:
+  push:
+    branches: [main]
+    paths:
+      - 'community/**'
+      - 'schemas/community_submission.v1.schema.json'
+
+permissions:
+  contents: write
+  pull-requests: write
+
+jobs:
+  update-open-prs:
+    runs-on: ubuntu-latest
+    steps:
+      - name: Checkout
+        uses: actions/checkout@v4
+        with:
+          fetch-depth: 0
+          token: ${{ secrets.GITHUB_TOKEN }}
+
+      - name: Setup Python
+        uses: actions/setup-python@v5
+        with:
+          python-version: "3.12"
+
+      - name: Install dependencies
+        run: pip install jsonschema
+
+      - name: Find and update open community PRs
+        env:
+          GH_TOKEN: ${{ secrets.GITHUB_TOKEN }}
+        run: |
+          # Get list of open community PRs
+          prs=$(gh pr list --label community --state open --json number,headRefName --jq '.[] | "\(.number) \(.headRefName)"')
+          
+          if [ -z "$prs" ]; then
+            echo "No open community PRs found"
+            exit 0
+          fi
+          
+          echo "$prs" | while read pr_number branch_name; do
+            echo "Processing PR #$pr_number (branch: $branch_name)"
+            
+            # Checkout PR branch
+            git fetch origin "$branch_name"
+            git checkout "$branch_name"
+            
+            # Merge main into PR branch
+            git config user.name "github-actions[bot]"
+            git config user.email "github-actions[bot]@users.noreply.github.com"
+            
+            if git merge origin/main -m "Merge main to update schema"; then
+              # Regenerate schema for this PR's submission (adds any new tags)
+              python -m src.contributions.regenerate_pr_schema || true
+              
+              # If there are changes, commit and push
+              if [ -n "$(git status --porcelain schemas/)" ]; then
+                git add schemas/
+                git commit -m "Update schema with new tags"
+                git push origin "$branch_name"
+                echo "  Updated PR #$pr_number with schema changes"
+              else
+                git push origin "$branch_name"
+                echo "  Merged main into PR #$pr_number"
+              fi
+            else
+              echo "  Merge conflict in PR #$pr_number, adding comment"
+              gh pr comment "$pr_number" --body $'⚠️ **Merge Conflict**\n\nAnother community submission was merged and this PR has conflicts.\n\nA maintainer may need to:\n1. Close this PR\n2. Remove the `approved` label from the original issue\n3. Re-add the `approved` label to regenerate the PR'
+              git merge --abort
+              fi
+            fi
+            
+            git checkout main
+          done
@@ -4,6 +4,9 @@ on:
  issues:
    types: [opened, edited]

+permissions:
+  issues: write
+
 jobs:
  validate:
    if: contains(github.event.issue.labels.*.name, 'submission')
@@ -20,11 +23,24 @@ jobs:
      - name: Install dependencies
        run: pip install jsonschema

+      - name: Debug issue body
+        run: |
+          echo "=== Issue Body ==="
+          cat << 'ISSUE_BODY_EOF'
+          ${{ github.event.issue.body }}
+          ISSUE_BODY_EOF
+
+      - name: Save issue body to file
+        run: |
+          cat << 'ISSUE_BODY_EOF' > /tmp/issue_body.txt
+          ${{ github.event.issue.body }}
+          ISSUE_BODY_EOF
+
      - name: Validate submission
        env:
          GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }}
          GITHUB_REPOSITORY: ${{ github.repository }}
        run: |
          python -m src.contributions.validate_submission \
-            --issue-body "${{ github.event.issue.body }}" \
+            --issue-body-file /tmp/issue_body.txt \
            --issue-number ${{ github.event.issue.number }}