mirror of
https://github.com/PlaneQuery/OpenAirframes.git
synced 2026-04-29 22:37:51 +02:00
FEATURE: Add contributions framework. Fix and improve daily adsb release using Github actions for map reduce.
This commit is contained in:
@@ -0,0 +1 @@
|
||||
"""Community contributions processing module."""
|
||||
@@ -0,0 +1,249 @@
|
||||
#!/usr/bin/env python3
|
||||
"""
|
||||
Approve a community submission and create a PR.
|
||||
|
||||
This script is called by the GitHub Actions workflow when the 'approved'
|
||||
label is added to a validated submission issue.
|
||||
|
||||
Usage:
|
||||
python -m src.contributions.approve_submission --issue-number 123 --issue-body "..." --author "username" --author-id 12345
|
||||
|
||||
Environment variables:
|
||||
GITHUB_TOKEN: GitHub API token with repo write permissions
|
||||
GITHUB_REPOSITORY: owner/repo
|
||||
"""
|
||||
import argparse
|
||||
import base64
|
||||
import json
|
||||
import os
|
||||
import sys
|
||||
import urllib.request
|
||||
import urllib.error
|
||||
from datetime import datetime, timezone
|
||||
|
||||
from .schema import extract_json_from_issue_body, extract_contributor_name_from_issue_body, parse_and_validate
|
||||
from .contributor import (
|
||||
generate_contributor_uuid,
|
||||
generate_submission_filename,
|
||||
compute_content_hash,
|
||||
)
|
||||
|
||||
|
||||
def github_api_request(
|
||||
method: str,
|
||||
endpoint: str,
|
||||
data: dict | None = None,
|
||||
accept: str = "application/vnd.github.v3+json"
|
||||
) -> dict:
|
||||
"""Make a GitHub API request."""
|
||||
token = os.environ.get("GITHUB_TOKEN")
|
||||
repo = os.environ.get("GITHUB_REPOSITORY")
|
||||
|
||||
if not token or not repo:
|
||||
raise EnvironmentError("GITHUB_TOKEN and GITHUB_REPOSITORY must be set")
|
||||
|
||||
url = f"https://api.github.com/repos/{repo}{endpoint}"
|
||||
headers = {
|
||||
"Authorization": f"token {token}",
|
||||
"Accept": accept,
|
||||
"Content-Type": "application/json",
|
||||
}
|
||||
|
||||
body = json.dumps(data).encode() if data else None
|
||||
req = urllib.request.Request(url, data=body, headers=headers, method=method)
|
||||
|
||||
try:
|
||||
with urllib.request.urlopen(req) as response:
|
||||
return json.loads(response.read())
|
||||
except urllib.error.HTTPError as e:
|
||||
error_body = e.read().decode() if e.fp else ""
|
||||
print(f"GitHub API error: {e.code} {e.reason}: {error_body}", file=sys.stderr)
|
||||
raise
|
||||
|
||||
|
||||
def add_issue_comment(issue_number: int, body: str) -> None:
|
||||
"""Add a comment to a GitHub issue."""
|
||||
github_api_request("POST", f"/issues/{issue_number}/comments", {"body": body})
|
||||
|
||||
|
||||
def get_default_branch_sha() -> str:
|
||||
"""Get the SHA of the default branch (main)."""
|
||||
ref = github_api_request("GET", "/git/ref/heads/main")
|
||||
return ref["object"]["sha"]
|
||||
|
||||
|
||||
def create_branch(branch_name: str, sha: str) -> None:
|
||||
"""Create a new branch from a SHA."""
|
||||
try:
|
||||
github_api_request("POST", "/git/refs", {
|
||||
"ref": f"refs/heads/{branch_name}",
|
||||
"sha": sha,
|
||||
})
|
||||
except urllib.error.HTTPError as e:
|
||||
if e.code == 422: # Branch exists
|
||||
# Delete and recreate
|
||||
try:
|
||||
github_api_request("DELETE", f"/git/refs/heads/{branch_name}")
|
||||
except urllib.error.HTTPError:
|
||||
pass
|
||||
github_api_request("POST", "/git/refs", {
|
||||
"ref": f"refs/heads/{branch_name}",
|
||||
"sha": sha,
|
||||
})
|
||||
else:
|
||||
raise
|
||||
|
||||
|
||||
def create_or_update_file(path: str, content: str, message: str, branch: str) -> None:
|
||||
"""Create or update a file in the repository."""
|
||||
content_b64 = base64.b64encode(content.encode()).decode()
|
||||
github_api_request("PUT", f"/contents/{path}", {
|
||||
"message": message,
|
||||
"content": content_b64,
|
||||
"branch": branch,
|
||||
})
|
||||
|
||||
|
||||
def create_pull_request(title: str, head: str, base: str, body: str) -> dict:
|
||||
"""Create a pull request."""
|
||||
return github_api_request("POST", "/pulls", {
|
||||
"title": title,
|
||||
"head": head,
|
||||
"base": base,
|
||||
"body": body,
|
||||
})
|
||||
|
||||
|
||||
def add_labels_to_issue(issue_number: int, labels: list[str]) -> None:
|
||||
"""Add labels to an issue or PR."""
|
||||
github_api_request("POST", f"/issues/{issue_number}/labels", {"labels": labels})
|
||||
|
||||
|
||||
def process_submission(
|
||||
issue_number: int,
|
||||
issue_body: str,
|
||||
author_username: str,
|
||||
author_id: int,
|
||||
) -> bool:
|
||||
"""
|
||||
Process an approved submission and create a PR.
|
||||
|
||||
Args:
|
||||
issue_number: The GitHub issue number
|
||||
issue_body: The issue body text
|
||||
author_username: The GitHub username of the issue author
|
||||
author_id: The numeric GitHub user ID
|
||||
|
||||
Returns:
|
||||
True if successful, False otherwise
|
||||
"""
|
||||
# Extract and validate JSON
|
||||
json_str = extract_json_from_issue_body(issue_body)
|
||||
if not json_str:
|
||||
add_issue_comment(issue_number, "❌ Could not extract JSON from submission.")
|
||||
return False
|
||||
|
||||
data, errors = parse_and_validate(json_str)
|
||||
if errors:
|
||||
error_list = "\n".join(f"- {e}" for e in errors)
|
||||
add_issue_comment(issue_number, f"❌ **Validation Failed**\n\n{error_list}")
|
||||
return False
|
||||
|
||||
# Normalize to list
|
||||
submissions = data if isinstance(data, list) else [data]
|
||||
|
||||
# Generate contributor UUID from GitHub ID
|
||||
contributor_uuid = generate_contributor_uuid(author_id)
|
||||
|
||||
# Extract contributor name from issue form (or default to GitHub username)
|
||||
contributor_name = extract_contributor_name_from_issue_body(issue_body)
|
||||
if not contributor_name:
|
||||
contributor_name = f"@{author_username}"
|
||||
|
||||
# Add metadata to each submission
|
||||
now = datetime.now(timezone.utc)
|
||||
date_str = now.strftime("%Y-%m-%d")
|
||||
timestamp_str = now.isoformat()
|
||||
|
||||
for submission in submissions:
|
||||
submission["contributor_uuid"] = contributor_uuid
|
||||
submission["contributor_name"] = contributor_name
|
||||
submission["creation_timestamp"] = timestamp_str
|
||||
|
||||
# Generate unique filename
|
||||
content_json = json.dumps(submissions, indent=2, sort_keys=True)
|
||||
content_hash = compute_content_hash(content_json)
|
||||
filename = generate_submission_filename(author_username, date_str, content_hash)
|
||||
file_path = f"community/{filename}"
|
||||
|
||||
# Create branch
|
||||
branch_name = f"community-submission-{issue_number}"
|
||||
default_sha = get_default_branch_sha()
|
||||
create_branch(branch_name, default_sha)
|
||||
|
||||
# Create file
|
||||
commit_message = f"Add community submission from @{author_username} (closes #{issue_number})"
|
||||
create_or_update_file(file_path, content_json, commit_message, branch_name)
|
||||
|
||||
# Create PR
|
||||
pr_body = f"""## Community Submission
|
||||
|
||||
Adds {len(submissions)} submission(s) from @{author_username}.
|
||||
|
||||
**File:** `{file_path}`
|
||||
**Contributor UUID:** `{contributor_uuid}`
|
||||
|
||||
Closes #{issue_number}
|
||||
|
||||
---
|
||||
|
||||
### Submissions
|
||||
```json
|
||||
{content_json}
|
||||
```"""
|
||||
|
||||
pr = create_pull_request(
|
||||
title=f"Community submission: {filename}",
|
||||
head=branch_name,
|
||||
base="main",
|
||||
body=pr_body,
|
||||
)
|
||||
|
||||
# Add labels to PR
|
||||
add_labels_to_issue(pr["number"], ["community", "auto-generated"])
|
||||
|
||||
# Comment on original issue
|
||||
add_issue_comment(
|
||||
issue_number,
|
||||
f"✅ **Submission Approved**\n\n"
|
||||
f"PR #{pr['number']} has been created to add your submission.\n\n"
|
||||
f"**File:** `{file_path}`\n"
|
||||
f"**Your Contributor UUID:** `{contributor_uuid}`\n\n"
|
||||
f"The PR will be merged by a maintainer."
|
||||
)
|
||||
|
||||
print(f"Created PR #{pr['number']} for submission")
|
||||
return True
|
||||
|
||||
|
||||
def main():
|
||||
parser = argparse.ArgumentParser(description="Approve community submission and create PR")
|
||||
parser.add_argument("--issue-number", type=int, required=True, help="GitHub issue number")
|
||||
parser.add_argument("--issue-body", required=True, help="Issue body text")
|
||||
parser.add_argument("--author", required=True, help="Issue author username")
|
||||
parser.add_argument("--author-id", type=int, required=True, help="Issue author numeric ID")
|
||||
|
||||
args = parser.parse_args()
|
||||
|
||||
success = process_submission(
|
||||
issue_number=args.issue_number,
|
||||
issue_body=args.issue_body,
|
||||
author_username=args.author,
|
||||
author_id=args.author_id,
|
||||
)
|
||||
|
||||
sys.exit(0 if success else 1)
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
main()
|
||||
@@ -0,0 +1,86 @@
|
||||
"""Contributor identification utilities."""
|
||||
import hashlib
|
||||
import uuid
|
||||
|
||||
|
||||
# DNS namespace UUID for generating UUIDv5
|
||||
DNS_NAMESPACE = uuid.UUID('6ba7b810-9dad-11d1-80b4-00c04fd430c8')
|
||||
|
||||
|
||||
def generate_contributor_uuid(github_user_id: int) -> str:
|
||||
"""
|
||||
Generate a deterministic UUID v5 from a GitHub user ID.
|
||||
|
||||
This ensures the same GitHub account always gets the same contributor UUID.
|
||||
|
||||
Args:
|
||||
github_user_id: The numeric GitHub user ID
|
||||
|
||||
Returns:
|
||||
UUID string in standard format
|
||||
"""
|
||||
name = f"github:{github_user_id}"
|
||||
return str(uuid.uuid5(DNS_NAMESPACE, name))
|
||||
|
||||
|
||||
def sanitize_username(username: str, max_length: int = 20) -> str:
|
||||
"""
|
||||
Sanitize a GitHub username for use in filenames.
|
||||
|
||||
Args:
|
||||
username: GitHub username
|
||||
max_length: Maximum length of sanitized name
|
||||
|
||||
Returns:
|
||||
Lowercase alphanumeric string with underscores
|
||||
"""
|
||||
sanitized = ""
|
||||
for char in username.lower():
|
||||
if char.isalnum():
|
||||
sanitized += char
|
||||
else:
|
||||
sanitized += "_"
|
||||
|
||||
# Collapse multiple underscores
|
||||
while "__" in sanitized:
|
||||
sanitized = sanitized.replace("__", "_")
|
||||
|
||||
return sanitized.strip("_")[:max_length]
|
||||
|
||||
|
||||
def generate_submission_filename(
|
||||
username: str,
|
||||
date_str: str,
|
||||
content_hash: str,
|
||||
extension: str = ".json"
|
||||
) -> str:
|
||||
"""
|
||||
Generate a unique filename for a community submission.
|
||||
|
||||
Format: {sanitized_username}_{date}_{short_hash}.json
|
||||
|
||||
Args:
|
||||
username: GitHub username
|
||||
date_str: Date in YYYY-MM-DD format
|
||||
content_hash: Hash of the submission content (will be truncated to 8 chars)
|
||||
extension: File extension (default: .json)
|
||||
|
||||
Returns:
|
||||
Unique filename string
|
||||
"""
|
||||
sanitized_name = sanitize_username(username)
|
||||
short_hash = content_hash[:8]
|
||||
return f"{sanitized_name}_{date_str}_{short_hash}{extension}"
|
||||
|
||||
|
||||
def compute_content_hash(content: str) -> str:
|
||||
"""
|
||||
Compute SHA256 hash of content.
|
||||
|
||||
Args:
|
||||
content: String content to hash
|
||||
|
||||
Returns:
|
||||
Hex digest of SHA256 hash
|
||||
"""
|
||||
return hashlib.sha256(content.encode()).hexdigest()
|
||||
@@ -0,0 +1,141 @@
|
||||
#!/usr/bin/env python3
|
||||
"""
|
||||
Generate a daily CSV of all community contributions.
|
||||
|
||||
Reads all JSON files from the community/ directory and outputs a sorted CSV
|
||||
with creation_timestamp as the first column and contributor_name/contributor_uuid as the last columns.
|
||||
|
||||
Usage:
|
||||
python -m src.contributions.create_daily_community_release
|
||||
"""
|
||||
from datetime import datetime, timezone
|
||||
from pathlib import Path
|
||||
import json
|
||||
import sys
|
||||
|
||||
import pandas as pd
|
||||
|
||||
|
||||
COMMUNITY_DIR = Path(__file__).parent.parent.parent / "community"
|
||||
OUT_ROOT = Path("data/planequery_aircraft")
|
||||
|
||||
|
||||
def read_all_submissions(community_dir: Path) -> list[dict]:
|
||||
"""Read all JSON submissions from the community directory."""
|
||||
all_submissions = []
|
||||
|
||||
for json_file in sorted(community_dir.glob("*.json")):
|
||||
try:
|
||||
with open(json_file) as f:
|
||||
data = json.load(f)
|
||||
|
||||
# Normalize to list
|
||||
submissions = data if isinstance(data, list) else [data]
|
||||
all_submissions.extend(submissions)
|
||||
|
||||
except (json.JSONDecodeError, OSError) as e:
|
||||
print(f"Warning: Failed to read {json_file}: {e}", file=sys.stderr)
|
||||
|
||||
return all_submissions
|
||||
|
||||
|
||||
def submissions_to_dataframe(submissions: list[dict]) -> pd.DataFrame:
|
||||
"""
|
||||
Convert submissions to a DataFrame with proper column ordering.
|
||||
|
||||
Column order:
|
||||
- creation_timestamp (first)
|
||||
- transponder_code_hex
|
||||
- registration_number
|
||||
- planequery_airframe_id
|
||||
- contributor_name
|
||||
- [other columns alphabetically]
|
||||
- contributor_uuid (last)
|
||||
"""
|
||||
if not submissions:
|
||||
return pd.DataFrame()
|
||||
|
||||
df = pd.DataFrame(submissions)
|
||||
|
||||
# Ensure required columns exist
|
||||
required_cols = [
|
||||
"creation_timestamp",
|
||||
"transponder_code_hex",
|
||||
"registration_number",
|
||||
"planequery_airframe_id",
|
||||
"contributor_name",
|
||||
"contributor_uuid",
|
||||
]
|
||||
for col in required_cols:
|
||||
if col not in df.columns:
|
||||
df[col] = None
|
||||
|
||||
# Sort by creation_timestamp ascending
|
||||
df = df.sort_values("creation_timestamp", ascending=True, na_position="last")
|
||||
|
||||
# Reorder columns: specific order first, contributor_uuid last
|
||||
first_cols = [
|
||||
"creation_timestamp",
|
||||
"transponder_code_hex",
|
||||
"registration_number",
|
||||
"planequery_airframe_id",
|
||||
"contributor_name",
|
||||
]
|
||||
last_cols = ["contributor_uuid"]
|
||||
|
||||
middle_cols = sorted([
|
||||
col for col in df.columns
|
||||
if col not in first_cols and col not in last_cols
|
||||
])
|
||||
|
||||
ordered_cols = first_cols + middle_cols + last_cols
|
||||
df = df[ordered_cols]
|
||||
|
||||
return df.reset_index(drop=True)
|
||||
|
||||
|
||||
def main():
|
||||
"""Generate the daily community contributions CSV."""
|
||||
date_str = datetime.now(timezone.utc).strftime("%Y-%m-%d")
|
||||
|
||||
print(f"Reading community submissions from {COMMUNITY_DIR}")
|
||||
submissions = read_all_submissions(COMMUNITY_DIR)
|
||||
|
||||
if not submissions:
|
||||
print("No community submissions found.")
|
||||
# Still create an empty CSV with headers
|
||||
df = pd.DataFrame(columns=[
|
||||
"creation_timestamp",
|
||||
"transponder_code_hex",
|
||||
"registration_number",
|
||||
"planequery_airframe_id",
|
||||
"contributor_name",
|
||||
"tags",
|
||||
"contributor_uuid",
|
||||
])
|
||||
else:
|
||||
print(f"Found {len(submissions)} total submissions")
|
||||
df = submissions_to_dataframe(submissions)
|
||||
|
||||
# Determine date range for filename
|
||||
if not df.empty and df["creation_timestamp"].notna().any():
|
||||
# Get earliest timestamp for start date
|
||||
earliest = pd.to_datetime(df["creation_timestamp"]).min()
|
||||
start_date_str = earliest.strftime("%Y-%m-%d")
|
||||
else:
|
||||
start_date_str = date_str
|
||||
|
||||
# Output
|
||||
OUT_ROOT.mkdir(parents=True, exist_ok=True)
|
||||
output_file = OUT_ROOT / f"planequery_aircraft_community_{start_date_str}_{date_str}.csv"
|
||||
|
||||
df.to_csv(output_file, index=False)
|
||||
|
||||
print(f"Saved: {output_file}")
|
||||
print(f"Total contributions: {len(df)}")
|
||||
|
||||
return output_file
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
main()
|
||||
@@ -0,0 +1,115 @@
|
||||
#!/usr/bin/env python3
|
||||
"""
|
||||
Read and aggregate all community submission data.
|
||||
|
||||
Usage:
|
||||
python -m src.contributions.read_community_data
|
||||
python -m src.contributions.read_community_data --output merged.json
|
||||
"""
|
||||
import argparse
|
||||
import json
|
||||
import sys
|
||||
from pathlib import Path
|
||||
|
||||
|
||||
COMMUNITY_DIR = Path(__file__).parent.parent.parent / "community"
|
||||
|
||||
|
||||
def read_all_submissions(community_dir: Path | None = None) -> list[dict]:
|
||||
"""
|
||||
Read all JSON submissions from the community directory.
|
||||
|
||||
Args:
|
||||
community_dir: Path to community directory. Uses default if None.
|
||||
|
||||
Returns:
|
||||
List of all submission dictionaries
|
||||
"""
|
||||
if community_dir is None:
|
||||
community_dir = COMMUNITY_DIR
|
||||
|
||||
all_submissions = []
|
||||
|
||||
for json_file in sorted(community_dir.glob("*.json")):
|
||||
try:
|
||||
with open(json_file) as f:
|
||||
data = json.load(f)
|
||||
|
||||
# Normalize to list
|
||||
submissions = data if isinstance(data, list) else [data]
|
||||
|
||||
# Add source file metadata
|
||||
for submission in submissions:
|
||||
submission["_source_file"] = json_file.name
|
||||
|
||||
all_submissions.extend(submissions)
|
||||
|
||||
except (json.JSONDecodeError, OSError) as e:
|
||||
print(f"Warning: Failed to read {json_file}: {e}", file=sys.stderr)
|
||||
|
||||
return all_submissions
|
||||
|
||||
|
||||
def group_by_identifier(submissions: list[dict]) -> dict[str, list[dict]]:
|
||||
"""
|
||||
Group submissions by their identifier (registration, transponder, or airframe ID).
|
||||
|
||||
Returns:
|
||||
Dict mapping identifier to list of submissions for that identifier
|
||||
"""
|
||||
grouped = {}
|
||||
|
||||
for submission in submissions:
|
||||
# Determine identifier
|
||||
if "registration_number" in submission:
|
||||
key = f"reg:{submission['registration_number']}"
|
||||
elif "transponder_code_hex" in submission:
|
||||
key = f"icao:{submission['transponder_code_hex']}"
|
||||
elif "planequery_airframe_id" in submission:
|
||||
key = f"id:{submission['planequery_airframe_id']}"
|
||||
else:
|
||||
key = "_unknown"
|
||||
|
||||
if key not in grouped:
|
||||
grouped[key] = []
|
||||
grouped[key].append(submission)
|
||||
|
||||
return grouped
|
||||
|
||||
|
||||
def main():
|
||||
parser = argparse.ArgumentParser(description="Read community submission data")
|
||||
parser.add_argument("--output", "-o", help="Output file (default: stdout)")
|
||||
parser.add_argument("--group", action="store_true", help="Group by identifier")
|
||||
parser.add_argument("--stats", action="store_true", help="Print statistics only")
|
||||
|
||||
args = parser.parse_args()
|
||||
|
||||
submissions = read_all_submissions()
|
||||
|
||||
if args.stats:
|
||||
grouped = group_by_identifier(submissions)
|
||||
contributors = set(s.get("contributor_uuid", "unknown") for s in submissions)
|
||||
|
||||
print(f"Total submissions: {len(submissions)}")
|
||||
print(f"Unique identifiers: {len(grouped)}")
|
||||
print(f"Unique contributors: {len(contributors)}")
|
||||
return
|
||||
|
||||
if args.group:
|
||||
result = group_by_identifier(submissions)
|
||||
else:
|
||||
result = submissions
|
||||
|
||||
output = json.dumps(result, indent=2)
|
||||
|
||||
if args.output:
|
||||
with open(args.output, "w") as f:
|
||||
f.write(output)
|
||||
print(f"Wrote {len(submissions)} submissions to {args.output}")
|
||||
else:
|
||||
print(output)
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
main()
|
||||
@@ -0,0 +1,117 @@
|
||||
"""Schema validation for community submissions."""
|
||||
import json
|
||||
import re
|
||||
from pathlib import Path
|
||||
from typing import Any
|
||||
|
||||
try:
|
||||
from jsonschema import Draft202012Validator
|
||||
except ImportError:
|
||||
Draft202012Validator = None
|
||||
|
||||
|
||||
SCHEMA_PATH = Path(__file__).parent.parent.parent / "schemas" / "community_submission.v1.schema.json"
|
||||
|
||||
|
||||
def load_schema() -> dict:
|
||||
"""Load the community submission schema."""
|
||||
with open(SCHEMA_PATH) as f:
|
||||
return json.load(f)
|
||||
|
||||
|
||||
def validate_submission(data: dict | list, schema: dict | None = None) -> list[str]:
|
||||
"""
|
||||
Validate submission(s) against schema.
|
||||
|
||||
Args:
|
||||
data: Single submission dict or list of submissions
|
||||
schema: Optional schema dict. If None, loads from default path.
|
||||
|
||||
Returns:
|
||||
List of error messages. Empty list means validation passed.
|
||||
"""
|
||||
if Draft202012Validator is None:
|
||||
raise ImportError("jsonschema is required: pip install jsonschema")
|
||||
|
||||
if schema is None:
|
||||
schema = load_schema()
|
||||
|
||||
submissions = data if isinstance(data, list) else [data]
|
||||
errors = []
|
||||
|
||||
validator = Draft202012Validator(schema)
|
||||
|
||||
for i, submission in enumerate(submissions):
|
||||
prefix = f"[{i}] " if len(submissions) > 1 else ""
|
||||
for error in validator.iter_errors(submission):
|
||||
path = ".".join(str(p) for p in error.path) if error.path else "(root)"
|
||||
errors.append(f"{prefix}{path}: {error.message}")
|
||||
|
||||
return errors
|
||||
|
||||
|
||||
def extract_json_from_issue_body(body: str) -> str | None:
|
||||
"""
|
||||
Extract JSON from GitHub issue body.
|
||||
|
||||
Looks for JSON in the 'Submission JSON' section wrapped in code blocks.
|
||||
|
||||
Args:
|
||||
body: The issue body text
|
||||
|
||||
Returns:
|
||||
Extracted JSON string or None if not found
|
||||
"""
|
||||
# Match JSON in "### Submission JSON" section
|
||||
pattern = r"### Submission JSON\s*\n\s*```(?:json)?\s*\n([\s\S]*?)\n\s*```"
|
||||
match = re.search(pattern, body)
|
||||
|
||||
if match:
|
||||
return match.group(1).strip()
|
||||
|
||||
return None
|
||||
|
||||
|
||||
def extract_contributor_name_from_issue_body(body: str) -> str | None:
|
||||
"""
|
||||
Extract contributor name from GitHub issue body.
|
||||
|
||||
Looks for the 'Contributor Name' field in the issue form.
|
||||
|
||||
Args:
|
||||
body: The issue body text
|
||||
|
||||
Returns:
|
||||
Contributor name string or None if not found/empty
|
||||
"""
|
||||
# Match "### Contributor Name" section
|
||||
pattern = r"### Contributor Name\s*\n\s*(.+?)(?=\n###|\n\n|$)"
|
||||
match = re.search(pattern, body)
|
||||
|
||||
if match:
|
||||
name = match.group(1).strip()
|
||||
# GitHub issue forms show "_No response_" for empty optional fields
|
||||
if name and name != "_No response_":
|
||||
return name
|
||||
|
||||
return None
|
||||
|
||||
|
||||
def parse_and_validate(json_str: str, schema: dict | None = None) -> tuple[list | dict | None, list[str]]:
|
||||
"""
|
||||
Parse JSON string and validate against schema.
|
||||
|
||||
Args:
|
||||
json_str: JSON string to parse
|
||||
schema: Optional schema dict
|
||||
|
||||
Returns:
|
||||
Tuple of (parsed data or None, list of errors)
|
||||
"""
|
||||
try:
|
||||
data = json.loads(json_str)
|
||||
except json.JSONDecodeError as e:
|
||||
return None, [f"Invalid JSON: {e}"]
|
||||
|
||||
errors = validate_submission(data, schema)
|
||||
return data, errors
|
||||
@@ -0,0 +1,140 @@
|
||||
#!/usr/bin/env python3
|
||||
"""
|
||||
Validate a community submission from a GitHub issue.
|
||||
|
||||
This script is called by the GitHub Actions workflow to validate
|
||||
submissions when issues are opened or edited.
|
||||
|
||||
Usage:
|
||||
python -m src.contributions.validate_submission --issue-body "..."
|
||||
python -m src.contributions.validate_submission --file submission.json
|
||||
echo '{"registration_number": "N12345"}' | python -m src.contributions.validate_submission --stdin
|
||||
|
||||
Environment variables (for GitHub Actions):
|
||||
GITHUB_TOKEN: GitHub API token
|
||||
GITHUB_REPOSITORY: owner/repo
|
||||
ISSUE_NUMBER: Issue number to comment on
|
||||
"""
|
||||
import argparse
|
||||
import json
|
||||
import os
|
||||
import sys
|
||||
import urllib.request
|
||||
import urllib.error
|
||||
|
||||
from .schema import extract_json_from_issue_body, parse_and_validate, load_schema
|
||||
|
||||
|
||||
def github_api_request(method: str, endpoint: str, data: dict | None = None) -> dict:
|
||||
"""Make a GitHub API request."""
|
||||
token = os.environ.get("GITHUB_TOKEN")
|
||||
repo = os.environ.get("GITHUB_REPOSITORY")
|
||||
|
||||
if not token or not repo:
|
||||
raise EnvironmentError("GITHUB_TOKEN and GITHUB_REPOSITORY must be set")
|
||||
|
||||
url = f"https://api.github.com/repos/{repo}{endpoint}"
|
||||
headers = {
|
||||
"Authorization": f"token {token}",
|
||||
"Accept": "application/vnd.github.v3+json",
|
||||
"Content-Type": "application/json",
|
||||
}
|
||||
|
||||
body = json.dumps(data).encode() if data else None
|
||||
req = urllib.request.Request(url, data=body, headers=headers, method=method)
|
||||
|
||||
with urllib.request.urlopen(req) as response:
|
||||
return json.loads(response.read())
|
||||
|
||||
|
||||
def add_issue_comment(issue_number: int, body: str) -> None:
|
||||
"""Add a comment to a GitHub issue."""
|
||||
github_api_request("POST", f"/issues/{issue_number}/comments", {"body": body})
|
||||
|
||||
|
||||
def add_issue_label(issue_number: int, label: str) -> None:
|
||||
"""Add a label to a GitHub issue."""
|
||||
github_api_request("POST", f"/issues/{issue_number}/labels", {"labels": [label]})
|
||||
|
||||
|
||||
def remove_issue_label(issue_number: int, label: str) -> None:
|
||||
"""Remove a label from a GitHub issue."""
|
||||
try:
|
||||
github_api_request("DELETE", f"/issues/{issue_number}/labels/{label}")
|
||||
except urllib.error.HTTPError:
|
||||
pass # Label might not exist
|
||||
|
||||
|
||||
def validate_and_report(json_str: str, issue_number: int | None = None) -> bool:
|
||||
"""
|
||||
Validate JSON and optionally report to GitHub issue.
|
||||
|
||||
Args:
|
||||
json_str: JSON string to validate
|
||||
issue_number: Optional issue number to comment on
|
||||
|
||||
Returns:
|
||||
True if validation passed, False otherwise
|
||||
"""
|
||||
data, errors = parse_and_validate(json_str)
|
||||
|
||||
if errors:
|
||||
error_list = "\n".join(f"- {e}" for e in errors)
|
||||
message = f"❌ **Validation Failed**\n\n{error_list}\n\nPlease fix the errors and edit your submission."
|
||||
|
||||
print(message, file=sys.stderr)
|
||||
|
||||
if issue_number:
|
||||
add_issue_comment(issue_number, message)
|
||||
remove_issue_label(issue_number, "validated")
|
||||
|
||||
return False
|
||||
|
||||
count = len(data) if isinstance(data, list) else 1
|
||||
message = f"✅ **Validation Passed**\n\n{count} submission(s) validated successfully against the schema.\n\nA maintainer can approve this submission by adding the `approved` label."
|
||||
|
||||
print(message)
|
||||
|
||||
if issue_number:
|
||||
add_issue_comment(issue_number, message)
|
||||
add_issue_label(issue_number, "validated")
|
||||
|
||||
return True
|
||||
|
||||
|
||||
def main():
|
||||
parser = argparse.ArgumentParser(description="Validate community submission JSON")
|
||||
source_group = parser.add_mutually_exclusive_group(required=True)
|
||||
source_group.add_argument("--issue-body", help="Issue body text containing JSON")
|
||||
source_group.add_argument("--file", help="JSON file to validate")
|
||||
source_group.add_argument("--stdin", action="store_true", help="Read JSON from stdin")
|
||||
|
||||
parser.add_argument("--issue-number", type=int, help="GitHub issue number to comment on")
|
||||
|
||||
args = parser.parse_args()
|
||||
|
||||
# Get JSON string
|
||||
if args.issue_body:
|
||||
json_str = extract_json_from_issue_body(args.issue_body)
|
||||
if not json_str:
|
||||
print("❌ Could not extract JSON from issue body", file=sys.stderr)
|
||||
if args.issue_number:
|
||||
add_issue_comment(
|
||||
args.issue_number,
|
||||
"❌ **Validation Failed**\n\nCould not extract JSON from submission. "
|
||||
"Please ensure your JSON is in the 'Submission JSON' field wrapped in code blocks."
|
||||
)
|
||||
sys.exit(1)
|
||||
elif args.file:
|
||||
with open(args.file) as f:
|
||||
json_str = f.read()
|
||||
else: # stdin
|
||||
json_str = sys.stdin.read()
|
||||
|
||||
# Validate
|
||||
success = validate_and_report(json_str, args.issue_number)
|
||||
sys.exit(0 if success else 1)
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
main()
|
||||
Reference in New Issue
Block a user