mirror of
https://github.com/PlaneQuery/OpenAirframes.git
synced 2026-04-23 11:36:35 +02:00
Fix Community Submission export. Fix CSV concatenation logic to prevent duplicates when there is no new ADSB.lol data.
This commit is contained in:
@@ -37,14 +37,31 @@ def main():
|
||||
if args.concat_with_latest_csv:
|
||||
print("Loading latest CSV from GitHub releases to concatenate with...")
|
||||
from src.get_latest_release import get_latest_aircraft_adsb_csv_df
|
||||
df_latest_csv, csv_date = get_latest_aircraft_adsb_csv_df()
|
||||
# Ensure column order matches before concatenating
|
||||
df_latest_csv = df_latest_csv.select(CORRECT_ORDER_OF_COLUMNS)
|
||||
from src.adsb.compress_adsb_to_aircraft_data import concat_compressed_dfs
|
||||
df_final = concat_compressed_dfs(df_latest_csv, df)
|
||||
df_final = df_final.select(CORRECT_ORDER_OF_COLUMNS)
|
||||
final_csv_output_path = OUTPUT_DIR / f"openairframes_adsb_{csv_date}_{args.date}.csv.gz"
|
||||
df_final.write_csv(final_csv_output_path, compression="gzip")
|
||||
from datetime import datetime
|
||||
|
||||
df_latest_csv, csv_start_date, csv_end_date = get_latest_aircraft_adsb_csv_df()
|
||||
|
||||
# Compare dates: end_date is exclusive, so if csv_end_date > args.date,
|
||||
# the latest CSV already includes this day's data
|
||||
csv_end_dt = datetime.strptime(csv_end_date, "%Y-%m-%d")
|
||||
args_dt = datetime.strptime(args.date, "%Y-%m-%d")
|
||||
|
||||
if csv_end_dt >= args_dt:
|
||||
print(f"Latest CSV already includes data through {args.date} (end_date={csv_end_date} is exclusive)")
|
||||
print("Writing latest CSV directly without concatenation to avoid duplicates")
|
||||
final_csv_output_path = OUTPUT_DIR / f"openairframes_adsb_{csv_start_date}_{csv_end_date}.csv.gz"
|
||||
df_latest_csv = df_latest_csv.select(CORRECT_ORDER_OF_COLUMNS)
|
||||
df_latest_csv.write_csv(final_csv_output_path, compression="gzip")
|
||||
else:
|
||||
print(f"Concatenating latest CSV (through {csv_end_date}) with new data ({args.date})")
|
||||
# Ensure column order matches before concatenating
|
||||
df_latest_csv = df_latest_csv.select(CORRECT_ORDER_OF_COLUMNS)
|
||||
from src.adsb.compress_adsb_to_aircraft_data import concat_compressed_dfs
|
||||
df_final = concat_compressed_dfs(df_latest_csv, df)
|
||||
df_final = df_final.select(CORRECT_ORDER_OF_COLUMNS)
|
||||
final_csv_output_path = OUTPUT_DIR / f"openairframes_adsb_{csv_start_date}_{args.date}.csv.gz"
|
||||
df_final.write_csv(final_csv_output_path, compression="gzip")
|
||||
print(f"Final CSV written to {final_csv_output_path}")
|
||||
|
||||
if __name__ == "__main__":
|
||||
main()
|
||||
@@ -24,7 +24,7 @@ def read_all_submissions(community_dir: Path) -> list[dict]:
|
||||
"""Read all JSON submissions from the community directory."""
|
||||
all_submissions = []
|
||||
|
||||
for json_file in sorted(community_dir.glob("*.json")):
|
||||
for json_file in sorted(community_dir.glob("**/*.json")):
|
||||
try:
|
||||
with open(json_file) as f:
|
||||
data = json.load(f)
|
||||
|
||||
@@ -207,7 +207,11 @@ def download_latest_aircraft_adsb_csv(
|
||||
|
||||
import polars as pl
|
||||
def get_latest_aircraft_adsb_csv_df():
|
||||
"""Download and load the latest ADS-B CSV from GitHub releases."""
|
||||
"""Download and load the latest ADS-B CSV from GitHub releases.
|
||||
|
||||
Returns:
|
||||
tuple: (df, start_date, end_date) where dates are in YYYY-MM-DD format
|
||||
"""
|
||||
import re
|
||||
|
||||
csv_path = download_latest_aircraft_adsb_csv()
|
||||
@@ -231,15 +235,16 @@ def get_latest_aircraft_adsb_csv_df():
|
||||
if df[col].dtype == pl.Utf8:
|
||||
df = df.with_columns(pl.col(col).fill_null(""))
|
||||
|
||||
# Extract start date from filename pattern: openairframes_adsb_{start_date}_{end_date}.csv[.gz]
|
||||
match = re.search(r"openairframes_adsb_(\d{4}-\d{2}-\d{2})_", str(csv_path))
|
||||
# Extract start and end dates from filename pattern: openairframes_adsb_{start_date}_{end_date}.csv[.gz]
|
||||
match = re.search(r"openairframes_adsb_(\d{4}-\d{2}-\d{2})_(\d{4}-\d{2}-\d{2})\.csv", str(csv_path))
|
||||
if not match:
|
||||
raise ValueError(f"Could not extract date from filename: {csv_path.name}")
|
||||
raise ValueError(f"Could not extract dates from filename: {csv_path.name}")
|
||||
|
||||
date_str = match.group(1)
|
||||
start_date = match.group(1)
|
||||
end_date = match.group(2)
|
||||
print(df.columns)
|
||||
print(df.dtypes)
|
||||
return df, date_str
|
||||
return df, start_date, end_date
|
||||
|
||||
|
||||
|
||||
|
||||
Reference in New Issue
Block a user