mirror of
https://github.com/PlaneQuery/OpenAirframes.git
synced 2026-05-05 17:15:29 +02:00
142 lines
4.1 KiB
Python
142 lines
4.1 KiB
Python
#!/usr/bin/env python3
|
|
"""
|
|
Generate a daily CSV of all community contributions.
|
|
|
|
Reads all JSON files from the community/ directory and outputs a sorted CSV
|
|
with creation_timestamp as the first column and contributor_name/contributor_uuid as the last columns.
|
|
|
|
Usage:
|
|
python -m src.contributions.create_daily_community_release
|
|
"""
|
|
from datetime import datetime, timezone
|
|
from pathlib import Path
|
|
import json
|
|
import sys
|
|
|
|
import pandas as pd
|
|
|
|
|
|
COMMUNITY_DIR = Path(__file__).parent.parent.parent / "community"
|
|
OUT_ROOT = Path("data/planequery_aircraft")
|
|
|
|
|
|
def read_all_submissions(community_dir: Path) -> list[dict]:
|
|
"""Read all JSON submissions from the community directory."""
|
|
all_submissions = []
|
|
|
|
for json_file in sorted(community_dir.glob("*.json")):
|
|
try:
|
|
with open(json_file) as f:
|
|
data = json.load(f)
|
|
|
|
# Normalize to list
|
|
submissions = data if isinstance(data, list) else [data]
|
|
all_submissions.extend(submissions)
|
|
|
|
except (json.JSONDecodeError, OSError) as e:
|
|
print(f"Warning: Failed to read {json_file}: {e}", file=sys.stderr)
|
|
|
|
return all_submissions
|
|
|
|
|
|
def submissions_to_dataframe(submissions: list[dict]) -> pd.DataFrame:
|
|
"""
|
|
Convert submissions to a DataFrame with proper column ordering.
|
|
|
|
Column order:
|
|
- creation_timestamp (first)
|
|
- transponder_code_hex
|
|
- registration_number
|
|
- planequery_airframe_id
|
|
- contributor_name
|
|
- [other columns alphabetically]
|
|
- contributor_uuid (last)
|
|
"""
|
|
if not submissions:
|
|
return pd.DataFrame()
|
|
|
|
df = pd.DataFrame(submissions)
|
|
|
|
# Ensure required columns exist
|
|
required_cols = [
|
|
"creation_timestamp",
|
|
"transponder_code_hex",
|
|
"registration_number",
|
|
"planequery_airframe_id",
|
|
"contributor_name",
|
|
"contributor_uuid",
|
|
]
|
|
for col in required_cols:
|
|
if col not in df.columns:
|
|
df[col] = None
|
|
|
|
# Sort by creation_timestamp ascending
|
|
df = df.sort_values("creation_timestamp", ascending=True, na_position="last")
|
|
|
|
# Reorder columns: specific order first, contributor_uuid last
|
|
first_cols = [
|
|
"creation_timestamp",
|
|
"transponder_code_hex",
|
|
"registration_number",
|
|
"planequery_airframe_id",
|
|
"contributor_name",
|
|
]
|
|
last_cols = ["contributor_uuid"]
|
|
|
|
middle_cols = sorted([
|
|
col for col in df.columns
|
|
if col not in first_cols and col not in last_cols
|
|
])
|
|
|
|
ordered_cols = first_cols + middle_cols + last_cols
|
|
df = df[ordered_cols]
|
|
|
|
return df.reset_index(drop=True)
|
|
|
|
|
|
def main():
|
|
"""Generate the daily community contributions CSV."""
|
|
date_str = datetime.now(timezone.utc).strftime("%Y-%m-%d")
|
|
|
|
print(f"Reading community submissions from {COMMUNITY_DIR}")
|
|
submissions = read_all_submissions(COMMUNITY_DIR)
|
|
|
|
if not submissions:
|
|
print("No community submissions found.")
|
|
# Still create an empty CSV with headers
|
|
df = pd.DataFrame(columns=[
|
|
"creation_timestamp",
|
|
"transponder_code_hex",
|
|
"registration_number",
|
|
"planequery_airframe_id",
|
|
"contributor_name",
|
|
"tags",
|
|
"contributor_uuid",
|
|
])
|
|
else:
|
|
print(f"Found {len(submissions)} total submissions")
|
|
df = submissions_to_dataframe(submissions)
|
|
|
|
# Determine date range for filename
|
|
if not df.empty and df["creation_timestamp"].notna().any():
|
|
# Get earliest timestamp for start date
|
|
earliest = pd.to_datetime(df["creation_timestamp"]).min()
|
|
start_date_str = earliest.strftime("%Y-%m-%d")
|
|
else:
|
|
start_date_str = date_str
|
|
|
|
# Output
|
|
OUT_ROOT.mkdir(parents=True, exist_ok=True)
|
|
output_file = OUT_ROOT / f"planequery_aircraft_community_{start_date_str}_{date_str}.csv"
|
|
|
|
df.to_csv(output_file, index=False)
|
|
|
|
print(f"Saved: {output_file}")
|
|
print(f"Total contributions: {len(df)}")
|
|
|
|
return output_file
|
|
|
|
|
|
if __name__ == "__main__":
|
|
main()
|