slight update for compress by day

This commit is contained in:
ggman12
2026-02-15 20:32:33 -05:00
parent be33fd2eaf
commit 9964ce576b
2 changed files with 6 additions and 1 deletions
+2 -1
View File
@@ -126,7 +126,8 @@ def download_and_merge_base_release(compressed_df: pl.DataFrame) -> tuple[pl.Dat
# Reorder columns to match
compressed_df = compressed_df.select(base_df.columns)
# Concat and deduplicate by icao (keep new data - it comes last)
# Concat base (old days) with new data (new days)
# No deduplication needed since they represent different UTC days
combined = pl.concat([base_df, compressed_df])
print(f"After concat: {len(combined)} records")
@@ -123,6 +123,10 @@ def compress_multi_icao_df(df: pl.DataFrame, verbose: bool = True) -> pl.DataFra
# partition_by with as_dict=True returns tuple keys: (date, icao)
date_val, icao = group_key
compressed = compress_df_polars(group_df, str(icao))
# Set time to start of UTC day for consistent deduplication
compressed = compressed.with_columns(
pl.lit(date_val).cast(pl.Date).cast(pl.Datetime).alias('time')
)
compressed_dfs.append(compressed)
if compressed_dfs: