mirror of
https://github.com/PlaneQuery/OpenAirframes.git
synced 2026-05-03 16:25:08 +02:00
Compare commits
10 Commits
| Author | SHA1 | Date | |
|---|---|---|---|
| 1db92d2ffd | |||
| 61aae586ee | |||
| 5abfa6b226 | |||
| a743b74ae5 | |||
| 53a020ab73 | |||
| 2de41c9883 | |||
| bccc634158 | |||
| 43b07942b0 | |||
| 2c9e994a12 | |||
| 99b680476a |
@@ -38,9 +38,10 @@ jobs:
|
||||
env:
|
||||
GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }}
|
||||
GITHUB_REPOSITORY: ${{ github.repository }}
|
||||
ISSUE_BODY: ${{ github.event.issue.body }}
|
||||
run: |
|
||||
python -m src.contributions.approve_submission \
|
||||
--issue-number ${{ github.event.issue.number }} \
|
||||
--issue-body "${{ github.event.issue.body }}" \
|
||||
--issue-body "$ISSUE_BODY" \
|
||||
--author "${{ steps.author.outputs.username }}" \
|
||||
--author-id ${{ steps.author.outputs.user_id }}
|
||||
|
||||
@@ -81,8 +81,22 @@ jobs:
|
||||
- name: Create tar of extracted data
|
||||
run: |
|
||||
cd data/output
|
||||
tar -cf extracted_data.tar *-planes-readsb-prod-0.tar_0 icao_manifest_*.txt 2>/dev/null || echo "Some files may not exist"
|
||||
ls -lah extracted_data.tar || echo "No tar created"
|
||||
echo "=== Disk space before tar ==="
|
||||
df -h .
|
||||
echo "=== Files to tar ==="
|
||||
ls -lah *-planes-readsb-prod-0.tar_0 icao_manifest_*.txt 2>/dev/null || echo "No files found"
|
||||
|
||||
# Create tar with explicit error checking
|
||||
if ls *-planes-readsb-prod-0.tar_0 1>/dev/null 2>&1; then
|
||||
tar -cvf extracted_data.tar *-planes-readsb-prod-0.tar_0 icao_manifest_*.txt
|
||||
echo "=== Tar file created ==="
|
||||
ls -lah extracted_data.tar
|
||||
# Verify tar integrity
|
||||
tar -tf extracted_data.tar > /dev/null && echo "Tar integrity check passed" || { echo "Tar integrity check FAILED"; exit 1; }
|
||||
else
|
||||
echo "ERROR: No extracted directories found, cannot create tar"
|
||||
exit 1
|
||||
fi
|
||||
|
||||
- name: Upload extracted data
|
||||
uses: actions/upload-artifact@v4
|
||||
@@ -97,7 +111,7 @@ jobs:
|
||||
needs: [generate-matrix, adsb-extract]
|
||||
runs-on: ubuntu-24.04-arm
|
||||
strategy:
|
||||
fail-fast: false
|
||||
fail-fast: true
|
||||
matrix:
|
||||
chunk: ${{ fromJson(needs.generate-matrix.outputs.chunks) }}
|
||||
icao_chunk: [0, 1, 2, 3]
|
||||
@@ -134,7 +148,12 @@ jobs:
|
||||
run: |
|
||||
cd data/output
|
||||
if [ -f extracted_data.tar ]; then
|
||||
tar -xf extracted_data.tar
|
||||
echo "=== Tar file info ==="
|
||||
ls -lah extracted_data.tar
|
||||
echo "=== Verifying tar integrity ==="
|
||||
tar -tf extracted_data.tar > /dev/null || { echo "ERROR: Tar file is corrupted"; exit 1; }
|
||||
echo "=== Extracting ==="
|
||||
tar -xvf extracted_data.tar
|
||||
rm extracted_data.tar
|
||||
echo "has_data=true" >> "$GITHUB_OUTPUT"
|
||||
echo "=== Contents of data/output ==="
|
||||
@@ -188,17 +207,19 @@ jobs:
|
||||
|
||||
- name: Debug downloaded files
|
||||
run: |
|
||||
echo "=== Disk space before processing ==="
|
||||
df -h
|
||||
echo "=== Listing data/output/adsb_chunks/ ==="
|
||||
find data/output/adsb_chunks/ -type f 2>/dev/null | head -50 || echo "No files found"
|
||||
echo "=== Looking for parquet files ==="
|
||||
find . -name "*.parquet" 2>/dev/null | head -20 || echo "No parquet files found"
|
||||
find data/output/adsb_chunks/ -type f 2>/dev/null | wc -l
|
||||
echo "=== Total parquet size ==="
|
||||
du -sh data/output/adsb_chunks/ || echo "No chunks dir"
|
||||
|
||||
- name: Combine chunks to CSV
|
||||
env:
|
||||
START_DATE: ${{ needs.generate-matrix.outputs.global_start }}
|
||||
END_DATE: ${{ needs.generate-matrix.outputs.global_end }}
|
||||
run: |
|
||||
python -m src.adsb.combine_chunks_to_csv --chunks-dir data/output/adsb_chunks --start-date "$START_DATE" --end-date "$END_DATE" --skip-base
|
||||
python -m src.adsb.combine_chunks_to_csv --chunks-dir data/output/adsb_chunks --start-date "$START_DATE" --end-date "$END_DATE" --skip-base --stream
|
||||
ls -lah data/planequery_aircraft/
|
||||
|
||||
- name: Upload final artifact
|
||||
|
||||
@@ -277,6 +277,15 @@ jobs:
|
||||
name: community-release
|
||||
path: artifacts/community
|
||||
|
||||
- name: Debug artifact structure
|
||||
run: |
|
||||
echo "=== FAA artifacts ==="
|
||||
find artifacts/faa -type f 2>/dev/null || echo "No files found in artifacts/faa"
|
||||
echo "=== ADS-B artifacts ==="
|
||||
find artifacts/adsb -type f 2>/dev/null || echo "No files found in artifacts/adsb"
|
||||
echo "=== Community artifacts ==="
|
||||
find artifacts/community -type f 2>/dev/null || echo "No files found in artifacts/community"
|
||||
|
||||
- name: Prepare release metadata
|
||||
id: meta
|
||||
run: |
|
||||
@@ -312,6 +321,13 @@ jobs:
|
||||
echo "zip_basename=$ZIP_BASENAME" >> "$GITHUB_OUTPUT"
|
||||
echo "name=planequery-aircraft snapshot ($DATE)${BRANCH_SUFFIX}" >> "$GITHUB_OUTPUT"
|
||||
|
||||
- name: Delete existing release if exists
|
||||
run: |
|
||||
gh release delete "${{ steps.meta.outputs.tag }}" --yes 2>/dev/null || true
|
||||
git push --delete origin "refs/tags/${{ steps.meta.outputs.tag }}" 2>/dev/null || true
|
||||
env:
|
||||
GH_TOKEN: ${{ secrets.GITHUB_TOKEN }}
|
||||
|
||||
- name: Create GitHub Release and upload assets
|
||||
uses: softprops/action-gh-release@v2
|
||||
with:
|
||||
|
||||
@@ -4,6 +4,9 @@ on:
|
||||
issues:
|
||||
types: [opened, edited]
|
||||
|
||||
permissions:
|
||||
issues: write
|
||||
|
||||
jobs:
|
||||
validate:
|
||||
if: contains(github.event.issue.labels.*.name, 'submission')
|
||||
@@ -20,11 +23,24 @@ jobs:
|
||||
- name: Install dependencies
|
||||
run: pip install jsonschema
|
||||
|
||||
- name: Debug issue body
|
||||
run: |
|
||||
echo "=== Issue Body ==="
|
||||
cat << 'ISSUE_BODY_EOF'
|
||||
${{ github.event.issue.body }}
|
||||
ISSUE_BODY_EOF
|
||||
|
||||
- name: Save issue body to file
|
||||
run: |
|
||||
cat << 'ISSUE_BODY_EOF' > /tmp/issue_body.txt
|
||||
${{ github.event.issue.body }}
|
||||
ISSUE_BODY_EOF
|
||||
|
||||
- name: Validate submission
|
||||
env:
|
||||
GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }}
|
||||
GITHUB_REPOSITORY: ${{ github.repository }}
|
||||
run: |
|
||||
python -m src.contributions.validate_submission \
|
||||
--issue-body "${{ github.event.issue.body }}" \
|
||||
--issue-body-file /tmp/issue_body.txt \
|
||||
--issue-number ${{ github.event.issue.number }}
|
||||
|
||||
@@ -0,0 +1,11 @@
|
||||
[
|
||||
{
|
||||
"contributor_name": "@ggman12",
|
||||
"contributor_uuid": "2981c3ee-8712-5f96-84bf-732eda515a3f",
|
||||
"creation_timestamp": "2026-02-12T17:28:52.823922+00:00",
|
||||
"registration_number": "N12345",
|
||||
"tags": {
|
||||
"internet": "starlink"
|
||||
}
|
||||
}
|
||||
]
|
||||
@@ -3,3 +3,4 @@ pandas==3.0.0
|
||||
pyarrow==23.0.0
|
||||
orjson==3.11.7
|
||||
polars==1.38.1
|
||||
jsonschema==4.26.0
|
||||
@@ -36,8 +36,13 @@ def get_target_day() -> datetime:
|
||||
return datetime.utcnow() - timedelta(days=1)
|
||||
|
||||
|
||||
def process_single_chunk(chunk_path: str) -> pl.DataFrame:
|
||||
"""Load and compress a single chunk parquet file."""
|
||||
def process_single_chunk(chunk_path: str, delete_after_load: bool = False) -> pl.DataFrame:
|
||||
"""Load and compress a single chunk parquet file.
|
||||
|
||||
Args:
|
||||
chunk_path: Path to parquet file
|
||||
delete_after_load: If True, delete the parquet file after loading to free disk space
|
||||
"""
|
||||
print(f"Processing {os.path.basename(chunk_path)}... | {get_resource_usage()}")
|
||||
|
||||
# Load chunk - only columns we need
|
||||
@@ -45,6 +50,14 @@ def process_single_chunk(chunk_path: str) -> pl.DataFrame:
|
||||
df = pl.read_parquet(chunk_path, columns=needed_columns)
|
||||
print(f" Loaded {len(df)} rows")
|
||||
|
||||
# Delete file immediately after loading to free disk space
|
||||
if delete_after_load:
|
||||
try:
|
||||
os.remove(chunk_path)
|
||||
print(f" Deleted {chunk_path} to free disk space")
|
||||
except Exception as e:
|
||||
print(f" Warning: Failed to delete {chunk_path}: {e}")
|
||||
|
||||
# Compress to aircraft records (one per ICAO) using shared function
|
||||
compressed = compress_multi_icao_df(df, verbose=True)
|
||||
print(f" Compressed to {len(compressed)} aircraft records")
|
||||
@@ -156,6 +169,7 @@ def main():
|
||||
parser.add_argument("--chunks-dir", type=str, default=DEFAULT_CHUNK_DIR, help="Directory containing chunk parquet files")
|
||||
parser.add_argument("--skip-base", action="store_true", help="Skip downloading and merging base release")
|
||||
parser.add_argument("--keep-chunks", action="store_true", help="Keep chunk files after merging")
|
||||
parser.add_argument("--stream", action="store_true", help="Delete parquet files immediately after loading to save disk space")
|
||||
args = parser.parse_args()
|
||||
|
||||
# Determine output ID and filename based on mode
|
||||
@@ -190,9 +204,10 @@ def main():
|
||||
print(f"Found {len(chunk_files)} chunk files")
|
||||
|
||||
# Process each chunk separately to save memory
|
||||
# With --stream, delete parquet files immediately after loading to save disk space
|
||||
compressed_chunks = []
|
||||
for chunk_path in chunk_files:
|
||||
compressed = process_single_chunk(chunk_path)
|
||||
compressed = process_single_chunk(chunk_path, delete_after_load=args.stream)
|
||||
compressed_chunks.append(compressed)
|
||||
gc.collect()
|
||||
|
||||
|
||||
@@ -54,7 +54,9 @@ def extract_json_from_issue_body(body: str) -> str | None:
|
||||
"""
|
||||
Extract JSON from GitHub issue body.
|
||||
|
||||
Looks for JSON in the 'Submission JSON' section wrapped in code blocks.
|
||||
Looks for JSON in the 'Submission JSON' section, either:
|
||||
- Wrapped in code blocks (```json ... ``` or ``` ... ```)
|
||||
- Or raw JSON after the header
|
||||
|
||||
Args:
|
||||
body: The issue body text
|
||||
@@ -62,13 +64,28 @@ def extract_json_from_issue_body(body: str) -> str | None:
|
||||
Returns:
|
||||
Extracted JSON string or None if not found
|
||||
"""
|
||||
# Match JSON in "### Submission JSON" section
|
||||
pattern = r"### Submission JSON\s*\n\s*```(?:json)?\s*\n([\s\S]*?)\n\s*```"
|
||||
match = re.search(pattern, body)
|
||||
|
||||
# Try: JSON in code blocks after "### Submission JSON"
|
||||
pattern_codeblock = r"### Submission JSON\s*\n\s*```(?:json)?\s*\n([\s\S]*?)\n\s*```"
|
||||
match = re.search(pattern_codeblock, body)
|
||||
if match:
|
||||
return match.group(1).strip()
|
||||
|
||||
# Try: Raw JSON after "### Submission JSON" until next section or end
|
||||
pattern_raw = r"### Submission JSON\s*\n\s*([\[{][\s\S]*?[\]}])(?=\n###|\n\n###|$)"
|
||||
match = re.search(pattern_raw, body)
|
||||
if match:
|
||||
return match.group(1).strip()
|
||||
|
||||
# Try: Any JSON object/array in the body (fallback)
|
||||
pattern_any = r"([\[{][\s\S]*?[\]}])"
|
||||
for match in re.finditer(pattern_any, body):
|
||||
candidate = match.group(1).strip()
|
||||
# Validate it looks like JSON
|
||||
if candidate.startswith('{') and candidate.endswith('}'):
|
||||
return candidate
|
||||
if candidate.startswith('[') and candidate.endswith(']'):
|
||||
return candidate
|
||||
|
||||
return None
|
||||
|
||||
|
||||
|
||||
@@ -7,6 +7,7 @@ submissions when issues are opened or edited.
|
||||
|
||||
Usage:
|
||||
python -m src.contributions.validate_submission --issue-body "..."
|
||||
python -m src.contributions.validate_submission --issue-body-file /path/to/body.txt
|
||||
python -m src.contributions.validate_submission --file submission.json
|
||||
echo '{"registration_number": "N12345"}' | python -m src.contributions.validate_submission --stdin
|
||||
|
||||
@@ -106,6 +107,7 @@ def main():
|
||||
parser = argparse.ArgumentParser(description="Validate community submission JSON")
|
||||
source_group = parser.add_mutually_exclusive_group(required=True)
|
||||
source_group.add_argument("--issue-body", help="Issue body text containing JSON")
|
||||
source_group.add_argument("--issue-body-file", help="File containing issue body text")
|
||||
source_group.add_argument("--file", help="JSON file to validate")
|
||||
source_group.add_argument("--stdin", action="store_true", help="Read JSON from stdin")
|
||||
|
||||
@@ -125,6 +127,20 @@ def main():
|
||||
"Please ensure your JSON is in the 'Submission JSON' field wrapped in code blocks."
|
||||
)
|
||||
sys.exit(1)
|
||||
elif args.issue_body_file:
|
||||
with open(args.issue_body_file) as f:
|
||||
issue_body = f.read()
|
||||
json_str = extract_json_from_issue_body(issue_body)
|
||||
if not json_str:
|
||||
print("❌ Could not extract JSON from issue body", file=sys.stderr)
|
||||
print(f"Issue body:\n{issue_body}", file=sys.stderr)
|
||||
if args.issue_number:
|
||||
add_issue_comment(
|
||||
args.issue_number,
|
||||
"❌ **Validation Failed**\n\nCould not extract JSON from submission. "
|
||||
"Please ensure your JSON is in the 'Submission JSON' field."
|
||||
)
|
||||
sys.exit(1)
|
||||
elif args.file:
|
||||
with open(args.file) as f:
|
||||
json_str = f.read()
|
||||
|
||||
Reference in New Issue
Block a user