mirror of
https://github.com/PlaneQuery/OpenAirframes.git
synced 2026-04-23 11:36:35 +02:00
slight update for compress by day
This commit is contained in:
@@ -126,7 +126,8 @@ def download_and_merge_base_release(compressed_df: pl.DataFrame) -> tuple[pl.Dat
|
||||
# Reorder columns to match
|
||||
compressed_df = compressed_df.select(base_df.columns)
|
||||
|
||||
# Concat and deduplicate by icao (keep new data - it comes last)
|
||||
# Concat base (old days) with new data (new days)
|
||||
# No deduplication needed since they represent different UTC days
|
||||
combined = pl.concat([base_df, compressed_df])
|
||||
print(f"After concat: {len(combined)} records")
|
||||
|
||||
|
||||
@@ -123,6 +123,10 @@ def compress_multi_icao_df(df: pl.DataFrame, verbose: bool = True) -> pl.DataFra
|
||||
# partition_by with as_dict=True returns tuple keys: (date, icao)
|
||||
date_val, icao = group_key
|
||||
compressed = compress_df_polars(group_df, str(icao))
|
||||
# Set time to start of UTC day for consistent deduplication
|
||||
compressed = compressed.with_columns(
|
||||
pl.lit(date_val).cast(pl.Date).cast(pl.Datetime).alias('time')
|
||||
)
|
||||
compressed_dfs.append(compressed)
|
||||
|
||||
if compressed_dfs:
|
||||
|
||||
Reference in New Issue
Block a user