Compare commits

...

5 Commits

Author SHA1 Message Date
ggman12 82d11d8d24 try less strict tar extract for 2025-10-15 and other days that fail 2026-02-19 00:20:03 -05:00
ggman12 76a217ad14 src/contributions/approve_submission.py handle big json files 2026-02-18 23:18:19 -05:00
ggman12 ec2d1a1291 update download.sh 2026-02-18 23:18:19 -05:00
ggman12 97284c69a9 verify downlaod asssets 2026-02-18 23:18:19 -05:00
JG 892ffa78af Merge pull request #28 from PlaneQuery/community-submission-27
Community submission: ggman12_2026-02-18_5ddbb8bd.json
2026-02-18 17:18:49 -05:00
4 changed files with 81 additions and 15 deletions
+6
View File
@@ -23,6 +23,12 @@ gh run list \
"repos/$REPO/actions/runs/$run_id/artifacts" \
--jq '.artifacts[] | select(.name | test("^openairframes_adsb-[0-9]{4}-[0-9]{2}-[0-9]{2}-[0-9]{4}-[0-9]{2}-[0-9]{2}$")) | .name' | while read -r artifact_name; do
# Check if artifact directory already exists and has files
if [ -d "downloads/adsb_artifacts/$artifact_name" ] && [ -n "$(ls -A "downloads/adsb_artifacts/$artifact_name" 2>/dev/null)" ]; then
echo " Skipping (already exists): $artifact_name"
continue
fi
echo " Downloading: $artifact_name"
gh run download "$run_id" \
--repo "$REPO" \
+58 -10
View File
@@ -129,13 +129,32 @@ def fetch_releases(version_date: str) -> list:
return releases
def download_asset(asset_url: str, file_path: str) -> bool:
"""Download a single release asset."""
def download_asset(asset_url: str, file_path: str, expected_size: int | None = None) -> bool:
"""Download a single release asset with size verification.
Args:
asset_url: URL to download from
file_path: Local path to save to
expected_size: Expected file size in bytes (for verification)
Returns:
True if download succeeded and size matches (if provided), False otherwise
"""
os.makedirs(os.path.dirname(file_path) or OUTPUT_DIR, exist_ok=True)
# Check if file exists and has correct size
if os.path.exists(file_path):
print(f"[SKIP] {file_path} already downloaded.")
return True
if expected_size is not None:
actual_size = os.path.getsize(file_path)
if actual_size == expected_size:
print(f"[SKIP] {file_path} already downloaded and verified ({actual_size} bytes).")
return True
else:
print(f"[WARN] {file_path} exists but size mismatch (expected {expected_size}, got {actual_size}). Re-downloading.")
os.remove(file_path)
else:
print(f"[SKIP] {file_path} already downloaded.")
return True
max_retries = 2
retry_delay = 30
@@ -153,7 +172,21 @@ def download_asset(asset_url: str, file_path: str) -> bool:
if not chunk:
break
file.write(chunk)
print(f"Saved {file_path}")
# Verify file size if expected_size was provided
if expected_size is not None:
actual_size = os.path.getsize(file_path)
if actual_size != expected_size:
print(f"[ERROR] Size mismatch for {file_path}: expected {expected_size} bytes, got {actual_size} bytes")
os.remove(file_path)
if attempt < max_retries:
print(f"Waiting {retry_delay} seconds before retry")
time.sleep(retry_delay)
continue
return False
print(f"Saved {file_path} ({actual_size} bytes, verified)")
else:
print(f"Saved {file_path}")
return True
else:
print(f"Failed to download {asset_url}: {response.status} {response.msg}")
@@ -227,7 +260,6 @@ def extract_split_archive(file_paths: list, extract_dir: str) -> bool:
stdin=cat_proc.stdout,
stdout=subprocess.PIPE,
stderr=subprocess.PIPE,
check=True
)
cat_proc.stdout.close()
cat_stderr = cat_proc.stderr.read().decode() if cat_proc.stderr else ""
@@ -236,6 +268,24 @@ def extract_split_archive(file_paths: list, extract_dir: str) -> bool:
if cat_stderr:
print(f"cat stderr: {cat_stderr}")
tar_stderr = result.stderr.decode() if result.stderr else ""
if result.returncode != 0:
# GNU tar exits non-zero for format issues that BSD tar silently
# tolerates (e.g. trailing junk after the last valid entry).
# Check whether files were actually extracted before giving up.
extracted_items = os.listdir(extract_dir)
if extracted_items:
print(f"[WARN] tar exited {result.returncode} but extracted "
f"{len(extracted_items)} items — treating as success")
if tar_stderr:
print(f"tar stderr: {tar_stderr}")
else:
print(f"Failed to extract split archive (tar exit {result.returncode})")
if tar_stderr:
print(f"tar stderr: {tar_stderr}")
shutil.rmtree(extract_dir, ignore_errors=True)
return False
print(f"Successfully extracted archive to {extract_dir}")
# Delete tar files immediately after extraction
@@ -252,11 +302,9 @@ def extract_split_archive(file_paths: list, extract_dir: str) -> bool:
print(f"Disk space after tar deletion: {free_gb:.1f}GB free")
return True
except subprocess.CalledProcessError as e:
stderr_output = e.stderr.decode() if e.stderr else ""
except Exception as e:
print(f"Failed to extract split archive: {e}")
if stderr_output:
print(f"tar stderr: {stderr_output}")
shutil.rmtree(extract_dir, ignore_errors=True)
return False
+2 -1
View File
@@ -77,8 +77,9 @@ def download_and_extract(version_date: str) -> str | None:
for asset in use_assets:
asset_name = asset["name"]
asset_url = asset["browser_download_url"]
asset_size = asset.get("size") # Get expected file size
file_path = os.path.join(OUTPUT_DIR, asset_name)
if download_asset(asset_url, file_path):
if download_asset(asset_url, file_path, expected_size=asset_size):
downloaded_files.append(file_path)
if not downloaded_files:
+15 -4
View File
@@ -246,6 +246,20 @@ def process_submission(
if schema_updated:
schema_note = f"\n**Schema Updated:** Added new tags: `{', '.join(new_tags)}`\n"
# Truncate JSON preview to stay under GitHub's 65536 char body limit
max_json_preview = 50000
if len(content_json) > max_json_preview:
# Show first few entries as a preview
preview_entries = submissions[:10]
preview_json = json.dumps(preview_entries, indent=2, sort_keys=True)
json_section = (
f"### Submissions (showing 10 of {len(submissions)})\n"
f"```json\n{preview_json}\n```\n\n"
f"*Full submission ({len(submissions)} entries, {len(content_json):,} chars) is in the committed file.*"
)
else:
json_section = f"### Submissions\n```json\n{content_json}\n```"
pr_body = f"""## Community Submission
Adds {len(submissions)} submission(s) from @{author_username}.
@@ -257,10 +271,7 @@ Closes #{issue_number}
---
### Submissions
```json
{content_json}
```"""
{json_section}"""
pr = create_pull_request(
title=f"Community submission: {filename}",