mirror of
https://github.com/PlaneQuery/OpenAirframes.git
synced 2026-04-24 12:06:31 +02:00
change to use path
This commit is contained in:
@@ -20,13 +20,14 @@ from datetime import datetime
|
||||
import orjson
|
||||
import pyarrow as pa
|
||||
import pyarrow.parquet as pq
|
||||
from pathlib import Path
|
||||
|
||||
|
||||
# ============================================================================
|
||||
# Configuration
|
||||
# ============================================================================
|
||||
|
||||
OUTPUT_DIR = "./data/output"
|
||||
OUTPUT_DIR = Path("./data/output")
|
||||
os.makedirs(OUTPUT_DIR, exist_ok=True)
|
||||
|
||||
PARQUET_DIR = os.path.join(OUTPUT_DIR, "parquet_output")
|
||||
|
||||
@@ -144,12 +144,12 @@ def main():
|
||||
df_compressed = compress_parquet_part(args.part_id, args.date)
|
||||
|
||||
# Write parquet
|
||||
df_compressed_output = Path(OUTPUT_DIR) / "compressed" / f"part_{args.part_id}_{args.date}.parquet"
|
||||
df_compressed_output = OUTPUT_DIR / "compressed" / f"part_{args.part_id}_{args.date}.parquet"
|
||||
os.makedirs(df_compressed_output.parent, exist_ok=True)
|
||||
df_compressed.write_parquet(df_compressed_output, compression='snappy')
|
||||
|
||||
# Write CSV
|
||||
csv_output = Path(OUTPUT_DIR) / "compressed" / f"part_{args.part_id}_{args.date}.csv"
|
||||
csv_output = OUTPUT_DIR / "compressed" / f"part_{args.part_id}_{args.date}.csv"
|
||||
df_compressed.write_csv(csv_output)
|
||||
|
||||
print(df_compressed)
|
||||
|
||||
Reference in New Issue
Block a user