From 99b680476adb429c252bb1daf3c881ed38a832f2 Mon Sep 17 00:00:00 2001
From: ggman12 <goodejonah@gmail.com>
Date: Thu, 12 Feb 2026 10:52:42 -0500
Subject: [PATCH 1/6] delete parquet chunck after load to not use so much space
 for big historical run

---
 .github/workflows/historical-adsb.yaml | 10 ++++++----
 src/adsb/combine_chunks_to_csv.py      | 21 ++++++++++++++++++---
 2 files changed, 24 insertions(+), 7 deletions(-)

diff --git a/.github/workflows/historical-adsb.yaml b/.github/workflows/historical-adsb.yaml
index 0bb99a1..456d500 100644
--- a/.github/workflows/historical-adsb.yaml
+++ b/.github/workflows/historical-adsb.yaml
@@ -188,17 +188,19 @@ jobs:
 
       - name: Debug downloaded files
         run: |
+          echo "=== Disk space before processing ==="
+          df -h
           echo "=== Listing data/output/adsb_chunks/ ==="
-          find data/output/adsb_chunks/ -type f 2>/dev/null | head -50 || echo "No files found"
-          echo "=== Looking for parquet files ==="
-          find . -name "*.parquet" 2>/dev/null | head -20 || echo "No parquet files found"
+          find data/output/adsb_chunks/ -type f 2>/dev/null | wc -l
+          echo "=== Total parquet size ==="
+          du -sh data/output/adsb_chunks/ || echo "No chunks dir"
 
       - name: Combine chunks to CSV
         env:
           START_DATE: ${{ needs.generate-matrix.outputs.global_start }}
           END_DATE: ${{ needs.generate-matrix.outputs.global_end }}
         run: |
-          python -m src.adsb.combine_chunks_to_csv --chunks-dir data/output/adsb_chunks --start-date "$START_DATE" --end-date "$END_DATE" --skip-base
+          python -m src.adsb.combine_chunks_to_csv --chunks-dir data/output/adsb_chunks --start-date "$START_DATE" --end-date "$END_DATE" --skip-base --stream
           ls -lah data/planequery_aircraft/
 
       - name: Upload final artifact
diff --git a/src/adsb/combine_chunks_to_csv.py b/src/adsb/combine_chunks_to_csv.py
index 2fe8b4e..9b6eaab 100644
--- a/src/adsb/combine_chunks_to_csv.py
+++ b/src/adsb/combine_chunks_to_csv.py
@@ -36,8 +36,13 @@ def get_target_day() -> datetime:
     return datetime.utcnow() - timedelta(days=1)
 
 
-def process_single_chunk(chunk_path: str) -> pl.DataFrame:
-    """Load and compress a single chunk parquet file."""
+def process_single_chunk(chunk_path: str, delete_after_load: bool = False) -> pl.DataFrame:
+    """Load and compress a single chunk parquet file.
+    
+    Args:
+        chunk_path: Path to parquet file
+        delete_after_load: If True, delete the parquet file after loading to free disk space
+    """
     print(f"Processing {os.path.basename(chunk_path)}... | {get_resource_usage()}")
     
     # Load chunk - only columns we need
@@ -45,6 +50,14 @@ def process_single_chunk(chunk_path: str) -> pl.DataFrame:
     df = pl.read_parquet(chunk_path, columns=needed_columns)
     print(f"  Loaded {len(df)} rows")
     
+    # Delete file immediately after loading to free disk space
+    if delete_after_load:
+        try:
+            os.remove(chunk_path)
+            print(f"  Deleted {chunk_path} to free disk space")
+        except Exception as e:
+            print(f"  Warning: Failed to delete {chunk_path}: {e}")
+    
     # Compress to aircraft records (one per ICAO) using shared function
     compressed = compress_multi_icao_df(df, verbose=True)
     print(f"  Compressed to {len(compressed)} aircraft records")
@@ -156,6 +169,7 @@ def main():
     parser.add_argument("--chunks-dir", type=str, default=DEFAULT_CHUNK_DIR, help="Directory containing chunk parquet files")
     parser.add_argument("--skip-base", action="store_true", help="Skip downloading and merging base release")
     parser.add_argument("--keep-chunks", action="store_true", help="Keep chunk files after merging")
+    parser.add_argument("--stream", action="store_true", help="Delete parquet files immediately after loading to save disk space")
     args = parser.parse_args()
     
     # Determine output ID and filename based on mode
@@ -190,9 +204,10 @@ def main():
     print(f"Found {len(chunk_files)} chunk files")
     
     # Process each chunk separately to save memory
+    # With --stream, delete parquet files immediately after loading to save disk space
     compressed_chunks = []
     for chunk_path in chunk_files:
-        compressed = process_single_chunk(chunk_path)
+        compressed = process_single_chunk(chunk_path, delete_after_load=args.stream)
         compressed_chunks.append(compressed)
         gc.collect()
     

From 2c9e994a12dbd7e9188d6bdd6b60721bc394713a Mon Sep 17 00:00:00 2001
From: ggman12 <goodejonah@gmail.com>
Date: Thu, 12 Feb 2026 11:06:38 -0500
Subject: [PATCH 2/6] add debug for FAA

---
 .github/workflows/planequery-aircraft-daily-release.yaml | 9 +++++++++
 1 file changed, 9 insertions(+)

diff --git a/.github/workflows/planequery-aircraft-daily-release.yaml b/.github/workflows/planequery-aircraft-daily-release.yaml
index 00838cb..8dc1c96 100644
--- a/.github/workflows/planequery-aircraft-daily-release.yaml
+++ b/.github/workflows/planequery-aircraft-daily-release.yaml
@@ -277,6 +277,15 @@ jobs:
           name: community-release
           path: artifacts/community
 
+      - name: Debug artifact structure
+        run: |
+          echo "=== FAA artifacts ==="
+          find artifacts/faa -type f 2>/dev/null || echo "No files found in artifacts/faa"
+          echo "=== ADS-B artifacts ==="
+          find artifacts/adsb -type f 2>/dev/null || echo "No files found in artifacts/adsb"
+          echo "=== Community artifacts ==="
+          find artifacts/community -type f 2>/dev/null || echo "No files found in artifacts/community"
+
       - name: Prepare release metadata
         id: meta
         run: |

From 43b07942b05b06986ea52e688f37061f891bcf69 Mon Sep 17 00:00:00 2001
From: ggman12 <goodejonah@gmail.com>
Date: Thu, 12 Feb 2026 11:42:49 -0500
Subject: [PATCH 3/6] add needed permissions

---
 .github/workflows/validate-community-submission.yaml | 10 ++++++++++
 1 file changed, 10 insertions(+)

diff --git a/.github/workflows/validate-community-submission.yaml b/.github/workflows/validate-community-submission.yaml
index e217401..dbb1a34 100644
--- a/.github/workflows/validate-community-submission.yaml
+++ b/.github/workflows/validate-community-submission.yaml
@@ -4,6 +4,9 @@ on:
   issues:
     types: [opened, edited]
 
+permissions:
+  issues: write
+
 jobs:
   validate:
     if: contains(github.event.issue.labels.*.name, 'submission')
@@ -20,6 +23,13 @@ jobs:
       - name: Install dependencies
         run: pip install jsonschema
 
+      - name: Debug issue body
+        run: |
+          echo "=== Issue Body ==="
+          cat << 'ISSUE_BODY_EOF'
+          ${{ github.event.issue.body }}
+          ISSUE_BODY_EOF
+
       - name: Validate submission
         env:
           GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }}

From bccc634158026d6f4c48f7f31a9d3103ef2c3cd4 Mon Sep 17 00:00:00 2001
From: ggman12 <goodejonah@gmail.com>
Date: Thu, 12 Feb 2026 11:50:45 -0500
Subject: [PATCH 4/6] remove existing release

---
 .github/workflows/planequery-aircraft-daily-release.yaml | 7 +++++++
 1 file changed, 7 insertions(+)

diff --git a/.github/workflows/planequery-aircraft-daily-release.yaml b/.github/workflows/planequery-aircraft-daily-release.yaml
index 8dc1c96..ac62132 100644
--- a/.github/workflows/planequery-aircraft-daily-release.yaml
+++ b/.github/workflows/planequery-aircraft-daily-release.yaml
@@ -321,6 +321,13 @@ jobs:
           echo "zip_basename=$ZIP_BASENAME" >> "$GITHUB_OUTPUT"
           echo "name=planequery-aircraft snapshot ($DATE)${BRANCH_SUFFIX}" >> "$GITHUB_OUTPUT"
 
+      - name: Delete existing release if exists
+        run: |
+          gh release delete "${{ steps.meta.outputs.tag }}" --yes 2>/dev/null || true
+          git push --delete origin "refs/tags/${{ steps.meta.outputs.tag }}" 2>/dev/null || true
+        env:
+          GH_TOKEN: ${{ secrets.GITHUB_TOKEN }}
+
       - name: Create GitHub Release and upload assets
         uses: softprops/action-gh-release@v2
         with:

From 2de41c98835fd0e97845f0626dc4b98c0df9e811 Mon Sep 17 00:00:00 2001
From: ggman12 <goodejonah@gmail.com>
Date: Thu, 12 Feb 2026 12:01:13 -0500
Subject: [PATCH 5/6] update historical. To check tar and fail fast if any maps
 fail

---
 .github/workflows/historical-adsb.yaml | 27 ++++++++++++++++++++++----
 1 file changed, 23 insertions(+), 4 deletions(-)

diff --git a/.github/workflows/historical-adsb.yaml b/.github/workflows/historical-adsb.yaml
index 456d500..596833a 100644
--- a/.github/workflows/historical-adsb.yaml
+++ b/.github/workflows/historical-adsb.yaml
@@ -81,8 +81,22 @@ jobs:
       - name: Create tar of extracted data
         run: |
           cd data/output
-          tar -cf extracted_data.tar *-planes-readsb-prod-0.tar_0 icao_manifest_*.txt 2>/dev/null || echo "Some files may not exist"
-          ls -lah extracted_data.tar || echo "No tar created"
+          echo "=== Disk space before tar ==="
+          df -h .
+          echo "=== Files to tar ==="
+          ls -lah *-planes-readsb-prod-0.tar_0 icao_manifest_*.txt 2>/dev/null || echo "No files found"
+          
+          # Create tar with explicit error checking
+          if ls *-planes-readsb-prod-0.tar_0 1>/dev/null 2>&1; then
+            tar -cvf extracted_data.tar *-planes-readsb-prod-0.tar_0 icao_manifest_*.txt
+            echo "=== Tar file created ==="
+            ls -lah extracted_data.tar
+            # Verify tar integrity
+            tar -tf extracted_data.tar > /dev/null && echo "Tar integrity check passed" || { echo "Tar integrity check FAILED"; exit 1; }
+          else
+            echo "ERROR: No extracted directories found, cannot create tar"
+            exit 1
+          fi
 
       - name: Upload extracted data
         uses: actions/upload-artifact@v4
@@ -97,7 +111,7 @@ jobs:
     needs: [generate-matrix, adsb-extract]
     runs-on: ubuntu-24.04-arm
     strategy:
-      fail-fast: false
+      fail-fast: true
       matrix:
         chunk: ${{ fromJson(needs.generate-matrix.outputs.chunks) }}
         icao_chunk: [0, 1, 2, 3]
@@ -134,7 +148,12 @@ jobs:
         run: |
           cd data/output
           if [ -f extracted_data.tar ]; then
-            tar -xf extracted_data.tar
+            echo "=== Tar file info ==="
+            ls -lah extracted_data.tar
+            echo "=== Verifying tar integrity ==="
+            tar -tf extracted_data.tar > /dev/null || { echo "ERROR: Tar file is corrupted"; exit 1; }
+            echo "=== Extracting ==="
+            tar -xvf extracted_data.tar
             rm extracted_data.tar
             echo "has_data=true" >> "$GITHUB_OUTPUT"
             echo "=== Contents of data/output ==="

From 53a020ab7329deb8bbce0f4f32d792f2dfd02395 Mon Sep 17 00:00:00 2001
From: ggman12 <goodejonah@gmail.com>
Date: Thu, 12 Feb 2026 12:09:03 -0500
Subject: [PATCH 6/6] add jsonschema to requirements.txt

---
 requirements.txt | 1 +
 1 file changed, 1 insertion(+)

diff --git a/requirements.txt b/requirements.txt
index 6a4ec9a..5d93f27 100644
--- a/requirements.txt
+++ b/requirements.txt
@@ -3,3 +3,4 @@ pandas==3.0.0
 pyarrow==23.0.0
 orjson==3.11.7
 polars==1.38.1
+jsonschema==4.26.0
\ No newline at end of file