Files
OpenAirframes/src/contributions/update_schema.py
T

155 lines
4.8 KiB
Python

#!/usr/bin/env python3
"""
Update the schema with tag type definitions from existing submissions.
This script reads all community submissions and generates a new schema version
that includes explicit type definitions for all known tags.
When new tags are introduced, a new schema version is created (e.g., v1 -> v2 -> v3).
Usage:
python -m src.contributions.update_schema
python -m src.contributions.update_schema --check # Check if update needed
"""
import argparse
import json
import sys
from pathlib import Path
from .read_community_data import read_all_submissions, build_tag_type_registry
from .schema import SCHEMAS_DIR, get_latest_schema_version, get_schema_path, load_schema
def get_existing_tag_definitions(schema: dict) -> dict[str, dict]:
"""Extract existing tag property definitions from schema."""
tags_props = schema.get("properties", {}).get("tags", {}).get("properties", {})
return tags_props
def type_name_to_json_schema(type_name: str) -> dict:
"""Convert a type name to a JSON Schema type definition."""
type_map = {
"string": {"type": "string"},
"integer": {"type": "integer"},
"number": {"type": "number"},
"boolean": {"type": "boolean"},
"null": {"type": "null"},
"array": {"type": "array", "items": {"$ref": "#/$defs/tagScalar"}},
"object": {"type": "object", "additionalProperties": {"$ref": "#/$defs/tagScalar"}},
}
return type_map.get(type_name, {"$ref": "#/$defs/tagValue"})
def generate_updated_schema(base_schema: dict, tag_registry: dict[str, str]) -> dict:
"""
Generate an updated schema with explicit tag definitions.
Args:
base_schema: The current schema to update
tag_registry: Dict mapping tag name to type name
Returns:
Updated schema dict
"""
schema = json.loads(json.dumps(base_schema)) # Deep copy
# Build tag properties with explicit types
tag_properties = {}
for tag_name, type_name in sorted(tag_registry.items()):
tag_properties[tag_name] = type_name_to_json_schema(type_name)
# Only add/update the properties key within tags, preserve everything else
if "properties" in schema and "tags" in schema["properties"]:
schema["properties"]["tags"]["properties"] = tag_properties
return schema
def check_for_new_tags(tag_registry: dict[str, str], current_schema: dict) -> list[str]:
"""
Check which tags in the registry are not yet defined in the schema.
Returns:
List of new tag names
"""
existing_tags = get_existing_tag_definitions(current_schema)
return [tag for tag in tag_registry if tag not in existing_tags]
def update_schema_file(
tag_registry: dict[str, str],
check_only: bool = False
) -> tuple[bool, list[str]]:
"""
Update the v1 schema file with new tag definitions.
Args:
tag_registry: Dict mapping tag name to type name
check_only: If True, only check if update is needed without writing
Returns:
Tuple of (was_updated, list_of_new_tags)
"""
current_schema = load_schema()
# Find new tags
new_tags = check_for_new_tags(tag_registry, current_schema)
if not new_tags:
return False, []
if check_only:
return True, new_tags
# Generate and write updated schema (in place)
updated_schema = generate_updated_schema(current_schema, tag_registry)
schema_path = get_schema_path()
with open(schema_path, "w") as f:
json.dump(updated_schema, f, indent=2)
f.write("\n")
return True, new_tags
def update_schema_from_submissions(check_only: bool = False) -> tuple[bool, list[str]]:
"""
Read all submissions and update the schema if needed.
Args:
check_only: If True, only check if update is needed without writing
Returns:
Tuple of (was_updated, list_of_new_tags)
"""
submissions = read_all_submissions()
tag_registry = build_tag_type_registry(submissions)
return update_schema_file(tag_registry, check_only)
def main():
parser = argparse.ArgumentParser(description="Update schema with tag definitions")
parser.add_argument("--check", action="store_true", help="Check if update needed without writing")
args = parser.parse_args()
was_updated, new_tags = update_schema_from_submissions(check_only=args.check)
if args.check:
if was_updated:
print(f"Schema update needed. New tags: {', '.join(new_tags)}")
sys.exit(1)
else:
print("Schema is up to date")
sys.exit(0)
else:
if was_updated:
print(f"Updated {get_schema_path()}")
print(f"Added tags: {', '.join(new_tags)}")
else:
print("No update needed")
if __name__ == "__main__":
main()