name: Process Historical FAA Data on: workflow_dispatch: # Manual trigger jobs: generate-matrix: runs-on: ubuntu-latest outputs: matrix: ${{ steps.set-matrix.outputs.matrix }} steps: - name: Generate date ranges id: set-matrix run: | python3 << 'EOF' import json from datetime import datetime, timedelta start = datetime(2023, 8, 16) end = datetime(2026, 1, 1) ranges = [] current = start # Process in 4-day chunks while current < end: chunk_end = current + timedelta(days=4) # Don't go past the end date if chunk_end > end: chunk_end = end ranges.append({ "since": current.strftime("%Y-%m-%d"), "until": chunk_end.strftime("%Y-%m-%d") }) current = chunk_end print(f"::set-output name=matrix::{json.dumps(ranges)}") EOF clone-faa-repo: runs-on: ubuntu-latest steps: - name: Cache FAA repository id: cache-faa-repo uses: actions/cache@v4 with: path: data/scrape-faa-releasable-aircraft key: faa-repo-v1 - name: Clone FAA repository if: steps.cache-faa-repo.outputs.cache-hit != 'true' run: | mkdir -p data git clone https://github.com/simonw/scrape-faa-releasable-aircraft data/scrape-faa-releasable-aircraft echo "Repository cloned successfully" process-chunk: needs: [generate-matrix, clone-faa-repo] runs-on: ubuntu-latest strategy: max-parallel: 5 # Process 5 chunks at a time matrix: range: ${{ fromJson(needs.generate-matrix.outputs.matrix) }} steps: - name: Checkout repository uses: actions/checkout@v4 - name: Set up Python uses: actions/setup-python@v5 with: python-version: '3.12' - name: Restore FAA repository cache uses: actions/cache/restore@v4 with: path: data/scrape-faa-releasable-aircraft key: faa-repo-v1 fail-on-cache-miss: true - name: Install dependencies run: | pip install -r requirements.txt - name: Process chunk ${{ matrix.range.since }} to ${{ matrix.range.until }} run: | python src/get_historical_faa.py "${{ matrix.range.since }}" "${{ matrix.range.until }}" - name: Upload CSV artifact uses: actions/upload-artifact@v4 with: name: csv-${{ matrix.range.since }}-to-${{ matrix.range.until }} path: data/faa_releasable_historical/*.csv retention-days: 1 create-release: needs: process-chunk runs-on: ubuntu-latest permissions: contents: write steps: - name: Download all artifacts uses: actions/download-artifact@v4 with: path: artifacts - name: Prepare release files run: | mkdir -p release-files find artifacts -name "*.csv" -exec cp {} release-files/ \; ls -lh release-files/ - name: Create Release uses: softprops/action-gh-release@v1 with: tag_name: historical-faa-${{ github.run_number }} name: Historical FAA Data Release ${{ github.run_number }} body: | Automated release of historical FAA aircraft data Processing period: 2023-08-16 to 2026-01-01 Generated: ${{ github.event.repository.updated_at }} files: release-files/*.csv draft: false prerelease: false concatenate-and-release: needs: process-chunk runs-on: ubuntu-latest permissions: contents: write steps: - name: Checkout repository uses: actions/checkout@v4 - name: Set up Python uses: actions/setup-python@v5 with: python-version: '3.12' - name: Install dependencies run: | pip install -r requirements.txt - name: Download all artifacts uses: actions/download-artifact@v4 with: path: artifacts - name: Prepare CSVs for concatenation run: | mkdir -p data/faa_releasable_historical find artifacts -name "*.csv" -exec cp {} data/faa_releasable_historical/ \; ls -lh data/faa_releasable_historical/ - name: Concatenate all CSVs run: | python scripts/concat_csvs.py - name: Create Combined Release uses: softprops/action-gh-release@v1 with: tag_name: historical-faa-combined-${{ github.run_number }} name: Historical FAA Data Combined Release ${{ github.run_number }} body: | Combined historical FAA aircraft data (all chunks concatenated) Processing period: 2023-08-16 to 2026-01-01 Generated: ${{ github.event.repository.updated_at }} files: data/openairframes/*.csv draft: false prerelease: false