From f543b671f8fc183b6e332917cece9b8fac532b24 Mon Sep 17 00:00:00 2001 From: ggman12 Date: Thu, 12 Feb 2026 13:22:56 -0500 Subject: [PATCH] updating schema --- src/contributions/approve_submission.py | 54 +++++++- src/contributions/read_community_data.py | 49 ++++++- src/contributions/schema.py | 55 +++++++- src/contributions/update_schema.py | 169 +++++++++++++++++++++++ src/contributions/validate_submission.py | 62 +++++++++ 5 files changed, 379 insertions(+), 10 deletions(-) create mode 100644 src/contributions/update_schema.py diff --git a/src/contributions/approve_submission.py b/src/contributions/approve_submission.py index e6a8ed8..9436afe 100644 --- a/src/contributions/approve_submission.py +++ b/src/contributions/approve_submission.py @@ -21,12 +21,14 @@ import urllib.request import urllib.error from datetime import datetime, timezone -from .schema import extract_json_from_issue_body, extract_contributor_name_from_issue_body, parse_and_validate +from .schema import extract_json_from_issue_body, extract_contributor_name_from_issue_body, parse_and_validate, get_latest_schema_version, load_schema from .contributor import ( generate_contributor_uuid, generate_submission_filename, compute_content_hash, ) +from .update_schema import generate_new_schema, check_for_new_tags, get_existing_tag_definitions +from .read_community_data import build_tag_type_registry def github_api_request( @@ -148,13 +150,13 @@ def process_submission( return False data, errors = parse_and_validate(json_str) - if errors: - error_list = "\n".join(f"- {e}" for e in errors) + if errors or data is None: + error_list = "\n".join(f"- {e}" for e in errors) if errors else "Unknown error" add_issue_comment(issue_number, f"❌ **Validation Failed**\n\n{error_list}") return False # Normalize to list - submissions = data if isinstance(data, list) else [data] + submissions: list[dict] = data if isinstance(data, list) else [data] # Generate contributor UUID from GitHub ID contributor_uuid = generate_contributor_uuid(author_id) @@ -188,14 +190,56 @@ def process_submission( commit_message = f"Add community submission from @{author_username} (closes #{issue_number})" create_or_update_file(file_path, content_json, commit_message, branch_name) + # Update schema with any new tags (creates new version if needed) + schema_updated = False + new_version = None + new_tags = [] + try: + # Build tag registry from new submissions + tag_registry = build_tag_type_registry(submissions) + + # Get current schema and merge existing tags + current_version = get_latest_schema_version() + current_schema = load_schema(current_version) + existing_tags = get_existing_tag_definitions(current_schema) + + # Merge existing tags into registry + for tag_name, tag_def in existing_tags.items(): + if tag_name not in tag_registry: + tag_type = tag_def.get("type", "string") + tag_registry[tag_name] = tag_type + + # Check for new tags + new_tags = check_for_new_tags(tag_registry, current_schema) + + if new_tags: + # Generate new schema version + new_version = current_version + 1 + new_schema = generate_new_schema(current_schema, tag_registry, new_version) + schema_json = json.dumps(new_schema, indent=2) + "\n" + + create_or_update_file( + f"schemas/community_submission.v{new_version}.schema.json", + schema_json, + f"Create schema v{new_version} with new tags: {', '.join(new_tags)}", + branch_name + ) + schema_updated = True + except Exception as e: + print(f"Warning: Could not update schema: {e}", file=sys.stderr) + # Create PR + schema_note = "" + if schema_updated: + schema_note = f"\n**Schema Updated:** Created v{new_version} with new tags: `{', '.join(new_tags)}`\n" + pr_body = f"""## Community Submission Adds {len(submissions)} submission(s) from @{author_username}. **File:** `{file_path}` **Contributor UUID:** `{contributor_uuid}` - +{schema_note} Closes #{issue_number} --- diff --git a/src/contributions/read_community_data.py b/src/contributions/read_community_data.py index 0e6e4ea..c9ab44b 100644 --- a/src/contributions/read_community_data.py +++ b/src/contributions/read_community_data.py @@ -30,7 +30,8 @@ def read_all_submissions(community_dir: Path | None = None) -> list[dict]: all_submissions = [] - for json_file in sorted(community_dir.glob("*.json")): + # Search both root directory and date subdirectories (e.g., 2026-02-12/) + for json_file in sorted(community_dir.glob("**/*.json")): try: with open(json_file) as f: data = json.load(f) @@ -50,6 +51,52 @@ def read_all_submissions(community_dir: Path | None = None) -> list[dict]: return all_submissions +def get_python_type_name(value) -> str: + """Get a normalized type name for a value.""" + if value is None: + return "null" + if isinstance(value, bool): + return "boolean" + if isinstance(value, int): + return "integer" + if isinstance(value, float): + return "number" + if isinstance(value, str): + return "string" + if isinstance(value, list): + return "array" + if isinstance(value, dict): + return "object" + return type(value).__name__ + + +def build_tag_type_registry(submissions: list[dict]) -> dict[str, str]: + """ + Build a registry of tag names to their expected types from existing submissions. + + Args: + submissions: List of existing submission dictionaries + + Returns: + Dict mapping tag name to expected type (e.g., {"internet": "string", "year_built": "integer"}) + """ + tag_types = {} + + for submission in submissions: + tags = submission.get("tags", {}) + if not isinstance(tags, dict): + continue + + for key, value in tags.items(): + inferred_type = get_python_type_name(value) + + if key not in tag_types: + tag_types[key] = inferred_type + # If there's a conflict, keep the first type (it's already in use) + + return tag_types + + def group_by_identifier(submissions: list[dict]) -> dict[str, list[dict]]: """ Group submissions by their identifier (registration, transponder, or airframe ID). diff --git a/src/contributions/schema.py b/src/contributions/schema.py index 8d9994b..be43819 100644 --- a/src/contributions/schema.py +++ b/src/contributions/schema.py @@ -10,12 +10,59 @@ except ImportError: Draft202012Validator = None -SCHEMA_PATH = Path(__file__).parent.parent.parent / "schemas" / "community_submission.v1.schema.json" +SCHEMAS_DIR = Path(__file__).parent.parent.parent / "schemas" + +# For backwards compatibility +SCHEMA_PATH = SCHEMAS_DIR / "community_submission.v1.schema.json" -def load_schema() -> dict: - """Load the community submission schema.""" - with open(SCHEMA_PATH) as f: +def get_latest_schema_version() -> int: + """ + Find the latest schema version number. + + Returns: + Latest version number (e.g., 1, 2, 3) + """ + import re + pattern = re.compile(r"community_submission\.v(\d+)\.schema\.json$") + max_version = 0 + + for path in SCHEMAS_DIR.glob("community_submission.v*.schema.json"): + match = pattern.search(path.name) + if match: + version = int(match.group(1)) + max_version = max(max_version, version) + + return max_version + + +def get_schema_path(version: int | None = None) -> Path: + """ + Get path to a specific schema version, or latest if version is None. + + Args: + version: Schema version number, or None for latest + + Returns: + Path to schema file + """ + if version is None: + version = get_latest_schema_version() + return SCHEMAS_DIR / f"community_submission.v{version}.schema.json" + + +def load_schema(version: int | None = None) -> dict: + """ + Load the community submission schema. + + Args: + version: Schema version to load. If None, loads the latest version. + + Returns: + Schema dict + """ + schema_path = get_schema_path(version) + with open(schema_path) as f: return json.load(f) diff --git a/src/contributions/update_schema.py b/src/contributions/update_schema.py new file mode 100644 index 0000000..c1610f6 --- /dev/null +++ b/src/contributions/update_schema.py @@ -0,0 +1,169 @@ +#!/usr/bin/env python3 +""" +Update the schema with tag type definitions from existing submissions. + +This script reads all community submissions and generates a new schema version +that includes explicit type definitions for all known tags. + +When new tags are introduced, a new schema version is created (e.g., v1 -> v2 -> v3). + +Usage: + python -m src.contributions.update_schema + python -m src.contributions.update_schema --check # Check if update needed +""" +import argparse +import json +import sys +from pathlib import Path + +from .read_community_data import read_all_submissions, build_tag_type_registry +from .schema import SCHEMAS_DIR, get_latest_schema_version, get_schema_path, load_schema + + +def get_existing_tag_definitions(schema: dict) -> dict[str, dict]: + """Extract existing tag property definitions from schema.""" + tags_props = schema.get("properties", {}).get("tags", {}).get("properties", {}) + return tags_props + + +def type_name_to_json_schema(type_name: str) -> dict: + """Convert a type name to a JSON Schema type definition.""" + type_map = { + "string": {"type": "string"}, + "integer": {"type": "integer"}, + "number": {"type": "number"}, + "boolean": {"type": "boolean"}, + "null": {"type": "null"}, + "array": {"type": "array", "items": {"$ref": "#/$defs/tagScalar"}}, + "object": {"type": "object", "additionalProperties": {"$ref": "#/$defs/tagScalar"}}, + } + return type_map.get(type_name, {"$ref": "#/$defs/tagValue"}) + + +def generate_new_schema(base_schema: dict, tag_registry: dict[str, str], new_version: int) -> dict: + """ + Generate a new schema version with explicit tag definitions. + + Args: + base_schema: The current schema to base the new one on + tag_registry: Dict mapping tag name to type name + new_version: The new version number + + Returns: + Complete new schema dict + """ + schema = json.loads(json.dumps(base_schema)) # Deep copy + + # Update title with new version + schema["title"] = f"PlaneQuery Aircraft Community Submission (v{new_version})" + + # Build tag properties with explicit types + tag_properties = {} + for tag_name, type_name in sorted(tag_registry.items()): + tag_properties[tag_name] = type_name_to_json_schema(type_name) + + # Update tags definition + schema["properties"]["tags"] = { + "type": "object", + "description": "Community-defined tags. New tags can be added, but must use consistent types.", + "propertyNames": { + "type": "string", + "pattern": "^[a-z][a-z0-9_]{0,63}$" + }, + "properties": tag_properties, + # Still allow additional properties for new tags + "additionalProperties": {"$ref": "#/$defs/tagValue"} + } + + return schema + + +def check_for_new_tags(tag_registry: dict[str, str], current_schema: dict) -> list[str]: + """ + Check which tags in the registry are not yet defined in the schema. + + Returns: + List of new tag names + """ + existing_tags = get_existing_tag_definitions(current_schema) + return [tag for tag in tag_registry if tag not in existing_tags] + + +def create_new_schema_version( + tag_registry: dict[str, str], + check_only: bool = False +) -> tuple[int | None, list[str]]: + """ + Create a new schema version if there are new tags. + + Args: + tag_registry: Dict mapping tag name to type name + check_only: If True, only check if update is needed without writing + + Returns: + Tuple of (new_version or None if no update, list_of_new_tags) + """ + current_version = get_latest_schema_version() + current_schema = load_schema(current_version) + + # Find new tags + new_tags = check_for_new_tags(tag_registry, current_schema) + + if not new_tags: + return None, [] + + if check_only: + return current_version + 1, new_tags + + # Generate and write new schema + new_version = current_version + 1 + new_schema = generate_new_schema(current_schema, tag_registry, new_version) + new_schema_path = get_schema_path(new_version) + + with open(new_schema_path, "w") as f: + json.dump(new_schema, f, indent=2) + f.write("\n") + + return new_version, new_tags + + +def update_schema_from_submissions(check_only: bool = False) -> tuple[int | None, list[str]]: + """ + Read all submissions and create a new schema version if needed. + + Args: + check_only: If True, only check if update is needed without writing + + Returns: + Tuple of (new_version or None if no update, list_of_new_tags) + """ + submissions = read_all_submissions() + tag_registry = build_tag_type_registry(submissions) + return create_new_schema_version(tag_registry, check_only) + + +def main(): + parser = argparse.ArgumentParser(description="Update schema with tag definitions") + parser.add_argument("--check", action="store_true", help="Check if update needed without writing") + + args = parser.parse_args() + + new_version, new_tags = update_schema_from_submissions(check_only=args.check) + + if args.check: + if new_version: + print(f"Schema update needed -> v{new_version}. New tags: {', '.join(new_tags)}") + sys.exit(1) + else: + print(f"Schema is up to date (v{get_latest_schema_version()})") + sys.exit(0) + else: + if new_version: + print(f"Created {get_schema_path(new_version)}") + print(f"Added tags: {', '.join(new_tags)}") + else: + print(f"No update needed (v{get_latest_schema_version()})") + + +if __name__ == "__main__": + main() diff --git a/src/contributions/validate_submission.py b/src/contributions/validate_submission.py index 8b19732..84bee1b 100644 --- a/src/contributions/validate_submission.py +++ b/src/contributions/validate_submission.py @@ -24,6 +24,7 @@ import urllib.request import urllib.error from .schema import extract_json_from_issue_body, parse_and_validate, load_schema +from .read_community_data import read_all_submissions, build_tag_type_registry, get_python_type_name def github_api_request(method: str, endpoint: str, data: dict | None = None) -> dict: @@ -66,6 +67,40 @@ def remove_issue_label(issue_number: int, label: str) -> None: pass # Label might not exist +def validate_tag_consistency(data: dict | list, tag_registry: dict[str, str]) -> list[str]: + """ + Check that tag types in new submissions match existing tag types. + + Args: + data: Single submission dict or list of submissions + tag_registry: Dict mapping tag name to expected type + + Returns: + List of error messages. Empty list means validation passed. + """ + errors = [] + submissions = data if isinstance(data, list) else [data] + + for i, submission in enumerate(submissions): + prefix = f"[{i}] " if len(submissions) > 1 else "" + tags = submission.get("tags", {}) + + if not isinstance(tags, dict): + continue + + for key, value in tags.items(): + actual_type = get_python_type_name(value) + + if key in tag_registry: + expected_type = tag_registry[key] + if actual_type != expected_type: + errors.append( + f"{prefix}tags.{key}: expected type '{expected_type}', got '{actual_type}'" + ) + + return errors + + def validate_and_report(json_str: str, issue_number: int | None = None) -> bool: """ Validate JSON and optionally report to GitHub issue. @@ -91,6 +126,33 @@ def validate_and_report(json_str: str, issue_number: int | None = None) -> bool: return False + # Check tag type consistency against existing submissions + if data is not None: + try: + existing_submissions = read_all_submissions() + tag_registry = build_tag_type_registry(existing_submissions) + tag_errors = validate_tag_consistency(data, tag_registry) + + if tag_errors: + error_list = "\n".join(f"- {e}" for e in tag_errors) + message = ( + f"❌ **Tag Type Mismatch**\n\n" + f"Your submission uses tags with types that don't match existing submissions:\n\n" + f"{error_list}\n\n" + f"Please use the same type as existing tags, or use a different tag name." + ) + + print(message, file=sys.stderr) + + if issue_number: + add_issue_comment(issue_number, message) + remove_issue_label(issue_number, "validated") + + return False + except Exception as e: + # Don't fail validation if we can't read existing submissions + print(f"Warning: Could not check tag consistency: {e}", file=sys.stderr) + count = len(data) if isinstance(data, list) else 1 message = f"✅ **Validation Passed**\n\n{count} submission(s) validated successfully against the schema.\n\nA maintainer can approve this submission by adding the `approved` label."