change to use path

This commit is contained in:
ggman12
2026-02-17 16:16:35 -05:00
parent 94cf50ac3a
commit db98b3021a
2 changed files with 4 additions and 3 deletions
+2 -1
View File
@@ -20,13 +20,14 @@ from datetime import datetime
import orjson
import pyarrow as pa
import pyarrow.parquet as pq
from pathlib import Path
# ============================================================================
# Configuration
# ============================================================================
OUTPUT_DIR = "./data/output"
OUTPUT_DIR = Path("./data/output")
os.makedirs(OUTPUT_DIR, exist_ok=True)
PARQUET_DIR = os.path.join(OUTPUT_DIR, "parquet_output")
+2 -2
View File
@@ -144,12 +144,12 @@ def main():
df_compressed = compress_parquet_part(args.part_id, args.date)
# Write parquet
df_compressed_output = Path(OUTPUT_DIR) / "compressed" / f"part_{args.part_id}_{args.date}.parquet"
df_compressed_output = OUTPUT_DIR / "compressed" / f"part_{args.part_id}_{args.date}.parquet"
os.makedirs(df_compressed_output.parent, exist_ok=True)
df_compressed.write_parquet(df_compressed_output, compression='snappy')
# Write CSV
csv_output = Path(OUTPUT_DIR) / "compressed" / f"part_{args.part_id}_{args.date}.csv"
csv_output = OUTPUT_DIR / "compressed" / f"part_{args.part_id}_{args.date}.csv"
df_compressed.write_csv(csv_output)
print(df_compressed)