mirror of
https://github.com/PlaneQuery/OpenAirframes.git
synced 2026-06-08 14:13:57 +02:00
split large file into chuncks
This commit is contained in:
@@ -79,7 +79,7 @@ jobs:
|
|||||||
python -m src.adsb.download_and_list_icaos --start-date "$START_DATE" --end-date "$END_DATE"
|
python -m src.adsb.download_and_list_icaos --start-date "$START_DATE" --end-date "$END_DATE"
|
||||||
ls -lah data/output/
|
ls -lah data/output/
|
||||||
|
|
||||||
- name: Create tar of extracted data
|
- name: Create tar of extracted data and split into chunks
|
||||||
run: |
|
run: |
|
||||||
cd data/output
|
cd data/output
|
||||||
echo "=== Disk space before tar ==="
|
echo "=== Disk space before tar ==="
|
||||||
@@ -94,16 +94,30 @@ jobs:
|
|||||||
ls -lah extracted_data.tar
|
ls -lah extracted_data.tar
|
||||||
# Verify tar integrity
|
# Verify tar integrity
|
||||||
tar -tf extracted_data.tar > /dev/null && echo "Tar integrity check passed" || { echo "Tar integrity check FAILED"; exit 1; }
|
tar -tf extracted_data.tar > /dev/null && echo "Tar integrity check passed" || { echo "Tar integrity check FAILED"; exit 1; }
|
||||||
|
|
||||||
|
# Split into 1GB chunks to avoid artifact upload issues
|
||||||
|
echo "=== Splitting tar into 1GB chunks ==="
|
||||||
|
mkdir -p tar_chunks
|
||||||
|
split -b 1G extracted_data.tar tar_chunks/extracted_data.tar.part_
|
||||||
|
rm extracted_data.tar
|
||||||
|
|
||||||
|
# Create checksums for each chunk
|
||||||
|
cd tar_chunks
|
||||||
|
for f in extracted_data.tar.part_*; do
|
||||||
|
sha256sum "$f" > "$f.sha256"
|
||||||
|
done
|
||||||
|
echo "=== Chunks created ==="
|
||||||
|
ls -lah
|
||||||
else
|
else
|
||||||
echo "ERROR: No extracted directories found, cannot create tar"
|
echo "ERROR: No extracted directories found, cannot create tar"
|
||||||
exit 1
|
exit 1
|
||||||
fi
|
fi
|
||||||
|
|
||||||
- name: Upload extracted data
|
- name: Upload extracted data chunks
|
||||||
uses: actions/upload-artifact@v4
|
uses: actions/upload-artifact@v4
|
||||||
with:
|
with:
|
||||||
name: adsb-extracted-${{ matrix.chunk.start_date }}-${{ matrix.chunk.end_date }}
|
name: adsb-extracted-${{ matrix.chunk.start_date }}-${{ matrix.chunk.end_date }}
|
||||||
path: data/output/extracted_data.tar
|
path: data/output/tar_chunks/
|
||||||
retention-days: 1
|
retention-days: 1
|
||||||
compression-level: 0
|
compression-level: 0
|
||||||
if-no-files-found: warn
|
if-no-files-found: warn
|
||||||
@@ -141,18 +155,36 @@ jobs:
|
|||||||
uses: actions/download-artifact@v4
|
uses: actions/download-artifact@v4
|
||||||
with:
|
with:
|
||||||
name: adsb-extracted-${{ matrix.chunk.start_date }}-${{ matrix.chunk.end_date }}
|
name: adsb-extracted-${{ matrix.chunk.start_date }}-${{ matrix.chunk.end_date }}
|
||||||
path: data/output/
|
path: data/output/tar_chunks/
|
||||||
continue-on-error: true
|
|
||||||
|
|
||||||
- name: Extract tar
|
- name: Reassemble and extract tar
|
||||||
id: extract
|
id: extract
|
||||||
run: |
|
run: |
|
||||||
cd data/output
|
cd data/output
|
||||||
if [ -f extracted_data.tar ]; then
|
if [ -d tar_chunks ] && ls tar_chunks/extracted_data.tar.part_* 1>/dev/null 2>&1; then
|
||||||
echo "=== Tar file info ==="
|
echo "=== Chunk files info ==="
|
||||||
|
ls -lah tar_chunks/
|
||||||
|
|
||||||
|
# Verify checksums
|
||||||
|
echo "=== Verifying chunk checksums ==="
|
||||||
|
cd tar_chunks
|
||||||
|
for f in extracted_data.tar.part_*.sha256; do
|
||||||
|
sha256sum -c "$f" || { echo "ERROR: Checksum verification failed for $f"; exit 1; }
|
||||||
|
done
|
||||||
|
echo "All checksums verified"
|
||||||
|
|
||||||
|
# Reassemble tar
|
||||||
|
echo "=== Reassembling tar file ==="
|
||||||
|
cat extracted_data.tar.part_* > ../extracted_data.tar
|
||||||
|
cd ..
|
||||||
|
rm -rf tar_chunks
|
||||||
|
|
||||||
|
echo "=== Reassembled tar file info ==="
|
||||||
ls -lah extracted_data.tar
|
ls -lah extracted_data.tar
|
||||||
|
|
||||||
echo "=== Verifying tar integrity ==="
|
echo "=== Verifying tar integrity ==="
|
||||||
tar -tf extracted_data.tar > /dev/null || { echo "ERROR: Tar file is corrupted"; exit 1; }
|
tar -tf extracted_data.tar > /dev/null || { echo "ERROR: Reassembled tar file is corrupted"; exit 1; }
|
||||||
|
|
||||||
echo "=== Extracting ==="
|
echo "=== Extracting ==="
|
||||||
tar -xvf extracted_data.tar
|
tar -xvf extracted_data.tar
|
||||||
rm extracted_data.tar
|
rm extracted_data.tar
|
||||||
@@ -160,7 +192,7 @@ jobs:
|
|||||||
echo "=== Contents of data/output ==="
|
echo "=== Contents of data/output ==="
|
||||||
ls -lah
|
ls -lah
|
||||||
else
|
else
|
||||||
echo "No extracted_data.tar found"
|
echo "No tar chunks found"
|
||||||
echo "has_data=false" >> "$GITHUB_OUTPUT"
|
echo "has_data=false" >> "$GITHUB_OUTPUT"
|
||||||
fi
|
fi
|
||||||
|
|
||||||
|
|||||||
Reference in New Issue
Block a user