From f29abad52af2a408bae9c612ccbe93e2c50907a9 Mon Sep 17 00:00:00 2001 From: ggman12 Date: Tue, 17 Feb 2026 14:10:01 -0500 Subject: [PATCH] output to parted tar.gz --- src/adsb/download_and_list_icaos.py | 19 ++++++++++++++++++- 1 file changed, 18 insertions(+), 1 deletion(-) diff --git a/src/adsb/download_and_list_icaos.py b/src/adsb/download_and_list_icaos.py index b98dca1..a30e0a4 100644 --- a/src/adsb/download_and_list_icaos.py +++ b/src/adsb/download_and_list_icaos.py @@ -140,6 +140,19 @@ def process_single_day(target_day: datetime) -> tuple[str | None, list[str]]: return extract_dir, icaos +from pathlib import Path +import tarfile +def split_folders_into_gzip_archives(extract_dir: Path, tar_output_dir: Path, icaos: list[str], parts = 4) -> list[str]: + traces_dir = extract_dir / "traces" + buckets = sorted(traces_dir.iterdir()) + tars = [] + for i in range(parts): + tar_path = tar_output_dir / f"{tar_output_dir.name}_part_{i+1}.tar.gz" + tars.append(tarfile.open(tar_path, "w:gz")) + for idx, bucket_path in enumerate(buckets): + tar_idx = idx % parts + tars[tar_idx].add(bucket_path, arcname=bucket_path.name) + def main(): parser = argparse.ArgumentParser(description="Download and list ICAOs from adsb.lol data for a single day") @@ -153,9 +166,13 @@ def main(): target_day = get_target_day() date_str = target_day.strftime("%Y-%m-%d") + tar_output_dir = Path(f"./data/output/adsb_archives/{date_str}") extract_dir, icaos = process_single_day(target_day) - + extract_dir = Path(extract_dir) + print(extract_dir) + tar_output_dir.mkdir(parents=True, exist_ok=True) + split_folders_into_gzip_archives(extract_dir, tar_output_dir, icaos) if not icaos: print("No ICAOs found") sys.exit(1)