From 74625b9bc99312d028100a40f9f2ccecd54f5f66 Mon Sep 17 00:00:00 2001 From: ggman12 Date: Thu, 12 Feb 2026 20:22:36 -0500 Subject: [PATCH] split large file into chuncks --- .github/workflows/historical-adsb.yaml | 52 +++++++++++++++++++++----- 1 file changed, 42 insertions(+), 10 deletions(-) diff --git a/.github/workflows/historical-adsb.yaml b/.github/workflows/historical-adsb.yaml index f18b296..b06938f 100644 --- a/.github/workflows/historical-adsb.yaml +++ b/.github/workflows/historical-adsb.yaml @@ -79,7 +79,7 @@ jobs: python -m src.adsb.download_and_list_icaos --start-date "$START_DATE" --end-date "$END_DATE" ls -lah data/output/ - - name: Create tar of extracted data + - name: Create tar of extracted data and split into chunks run: | cd data/output echo "=== Disk space before tar ===" @@ -94,16 +94,30 @@ jobs: ls -lah extracted_data.tar # Verify tar integrity tar -tf extracted_data.tar > /dev/null && echo "Tar integrity check passed" || { echo "Tar integrity check FAILED"; exit 1; } + + # Split into 1GB chunks to avoid artifact upload issues + echo "=== Splitting tar into 1GB chunks ===" + mkdir -p tar_chunks + split -b 1G extracted_data.tar tar_chunks/extracted_data.tar.part_ + rm extracted_data.tar + + # Create checksums for each chunk + cd tar_chunks + for f in extracted_data.tar.part_*; do + sha256sum "$f" > "$f.sha256" + done + echo "=== Chunks created ===" + ls -lah else echo "ERROR: No extracted directories found, cannot create tar" exit 1 fi - - name: Upload extracted data + - name: Upload extracted data chunks uses: actions/upload-artifact@v4 with: name: adsb-extracted-${{ matrix.chunk.start_date }}-${{ matrix.chunk.end_date }} - path: data/output/extracted_data.tar + path: data/output/tar_chunks/ retention-days: 1 compression-level: 0 if-no-files-found: warn @@ -141,18 +155,36 @@ jobs: uses: actions/download-artifact@v4 with: name: adsb-extracted-${{ matrix.chunk.start_date }}-${{ matrix.chunk.end_date }} - path: data/output/ - continue-on-error: true + path: data/output/tar_chunks/ - - name: Extract tar + - name: Reassemble and extract tar id: extract run: | cd data/output - if [ -f extracted_data.tar ]; then - echo "=== Tar file info ===" + if [ -d tar_chunks ] && ls tar_chunks/extracted_data.tar.part_* 1>/dev/null 2>&1; then + echo "=== Chunk files info ===" + ls -lah tar_chunks/ + + # Verify checksums + echo "=== Verifying chunk checksums ===" + cd tar_chunks + for f in extracted_data.tar.part_*.sha256; do + sha256sum -c "$f" || { echo "ERROR: Checksum verification failed for $f"; exit 1; } + done + echo "All checksums verified" + + # Reassemble tar + echo "=== Reassembling tar file ===" + cat extracted_data.tar.part_* > ../extracted_data.tar + cd .. + rm -rf tar_chunks + + echo "=== Reassembled tar file info ===" ls -lah extracted_data.tar + echo "=== Verifying tar integrity ===" - tar -tf extracted_data.tar > /dev/null || { echo "ERROR: Tar file is corrupted"; exit 1; } + tar -tf extracted_data.tar > /dev/null || { echo "ERROR: Reassembled tar file is corrupted"; exit 1; } + echo "=== Extracting ===" tar -xvf extracted_data.tar rm extracted_data.tar @@ -160,7 +192,7 @@ jobs: echo "=== Contents of data/output ===" ls -lah else - echo "No extracted_data.tar found" + echo "No tar chunks found" echo "has_data=false" >> "$GITHUB_OUTPUT" fi