mirror of
https://github.com/PlaneQuery/OpenAirframes.git
synced 2026-06-08 06:03:55 +02:00
use .csv.gz
This commit is contained in:
@@ -227,7 +227,7 @@ jobs:
|
|||||||
uses: actions/upload-artifact@v4
|
uses: actions/upload-artifact@v4
|
||||||
with:
|
with:
|
||||||
name: adsb-release
|
name: adsb-release
|
||||||
path: data/openairframes/openairframes_adsb_*.csv
|
path: data/openairframes/openairframes_adsb_*.csv.gz
|
||||||
retention-days: 1
|
retention-days: 1
|
||||||
|
|
||||||
build-community:
|
build-community:
|
||||||
@@ -388,7 +388,7 @@ jobs:
|
|||||||
|
|
||||||
# Find files from artifacts using find (handles nested structures)
|
# Find files from artifacts using find (handles nested structures)
|
||||||
CSV_FILE_FAA=$(find artifacts/faa -name "openairframes_faa_*.csv" -type f 2>/dev/null | head -1)
|
CSV_FILE_FAA=$(find artifacts/faa -name "openairframes_faa_*.csv" -type f 2>/dev/null | head -1)
|
||||||
CSV_FILE_ADSB=$(find artifacts/adsb -name "openairframes_adsb_*.csv" -type f 2>/dev/null | head -1)
|
CSV_FILE_ADSB=$(find artifacts/adsb -name "openairframes_adsb_*.csv.gz" -type f 2>/dev/null | head -1)
|
||||||
CSV_FILE_COMMUNITY=$(find artifacts/community -name "openairframes_community_*.csv" -type f 2>/dev/null | head -1)
|
CSV_FILE_COMMUNITY=$(find artifacts/community -name "openairframes_community_*.csv" -type f 2>/dev/null | head -1)
|
||||||
ZIP_FILE=$(find artifacts/faa -name "ReleasableAircraft_*.zip" -type f 2>/dev/null | head -1)
|
ZIP_FILE=$(find artifacts/faa -name "ReleasableAircraft_*.zip" -type f 2>/dev/null | head -1)
|
||||||
JSON_FILE_ADSBX=$(find artifacts/adsbexchange -name "basic-ac-db_*.json.gz" -type f 2>/dev/null | head -1)
|
JSON_FILE_ADSBX=$(find artifacts/adsbexchange -name "basic-ac-db_*.json.gz" -type f 2>/dev/null | head -1)
|
||||||
|
|||||||
@@ -176,7 +176,7 @@ def main():
|
|||||||
if args.start_date and args.end_date:
|
if args.start_date and args.end_date:
|
||||||
# Historical mode
|
# Historical mode
|
||||||
output_id = f"{args.start_date}_{args.end_date}"
|
output_id = f"{args.start_date}_{args.end_date}"
|
||||||
output_filename = f"openairframes_adsb_{args.start_date}_{args.end_date}.csv"
|
output_filename = f"openairframes_adsb_{args.start_date}_{args.end_date}.csv.gz"
|
||||||
print(f"Combining chunks for date range: {args.start_date} to {args.end_date}")
|
print(f"Combining chunks for date range: {args.start_date} to {args.end_date}")
|
||||||
else:
|
else:
|
||||||
# Daily mode - use same date for start and end
|
# Daily mode - use same date for start and end
|
||||||
@@ -187,7 +187,7 @@ def main():
|
|||||||
|
|
||||||
date_str = target_day.strftime("%Y-%m-%d")
|
date_str = target_day.strftime("%Y-%m-%d")
|
||||||
output_id = date_str
|
output_id = date_str
|
||||||
output_filename = f"openairframes_adsb_{date_str}_{date_str}.csv"
|
output_filename = f"openairframes_adsb_{date_str}_{date_str}.csv.gz"
|
||||||
print(f"Combining chunks for {date_str}")
|
print(f"Combining chunks for {date_str}")
|
||||||
|
|
||||||
chunks_dir = args.chunks_dir
|
chunks_dir = args.chunks_dir
|
||||||
|
|||||||
@@ -264,7 +264,7 @@ def get_latest_aircraft_adsb_csv_df():
|
|||||||
if df[col].dtype == pl.Utf8:
|
if df[col].dtype == pl.Utf8:
|
||||||
df = df.with_columns(pl.col(col).fill_null(""))
|
df = df.with_columns(pl.col(col).fill_null(""))
|
||||||
|
|
||||||
# Extract start date from filename pattern: openairframes_adsb_{start_date}_{end_date}.csv
|
# Extract start date from filename pattern: openairframes_adsb_{start_date}_{end_date}.csv[.gz]
|
||||||
match = re.search(r"openairframes_adsb_(\d{4}-\d{2}-\d{2})_", str(csv_path))
|
match = re.search(r"openairframes_adsb_(\d{4}-\d{2}-\d{2})_", str(csv_path))
|
||||||
if not match:
|
if not match:
|
||||||
raise ValueError(f"Could not extract date from filename: {csv_path.name}")
|
raise ValueError(f"Could not extract date from filename: {csv_path.name}")
|
||||||
|
|||||||
@@ -77,7 +77,7 @@ if __name__ == '__main__':
|
|||||||
OUT_ROOT = Path("data/openairframes")
|
OUT_ROOT = Path("data/openairframes")
|
||||||
OUT_ROOT.mkdir(parents=True, exist_ok=True)
|
OUT_ROOT.mkdir(parents=True, exist_ok=True)
|
||||||
|
|
||||||
output_file = OUT_ROOT / f"openairframes_adsb_{start_date_str}_{date_str}.csv"
|
output_file = OUT_ROOT / f"openairframes_adsb_{start_date_str}_{date_str}.csv.gz"
|
||||||
df_combined.write_csv(output_file)
|
df_combined.write_csv(output_file)
|
||||||
|
|
||||||
print(f"Saved: {output_file}")
|
print(f"Saved: {output_file}")
|
||||||
|
|||||||
@@ -165,7 +165,7 @@ def download_latest_aircraft_adsb_csv(
|
|||||||
Path to the downloaded file
|
Path to the downloaded file
|
||||||
"""
|
"""
|
||||||
assets = get_latest_release_assets(repo, github_token=github_token)
|
assets = get_latest_release_assets(repo, github_token=github_token)
|
||||||
asset = pick_asset(assets, name_regex=r"^openairframes_adsb_.*\.csv$")
|
asset = pick_asset(assets, name_regex=r"^openairframes_adsb_.*\.csv(\.gz)?$")
|
||||||
saved_to = download_asset(asset, output_dir / asset.name, github_token=github_token)
|
saved_to = download_asset(asset, output_dir / asset.name, github_token=github_token)
|
||||||
print(f"Downloaded: {asset.name} ({asset.size} bytes) -> {saved_to}")
|
print(f"Downloaded: {asset.name} ({asset.size} bytes) -> {saved_to}")
|
||||||
return saved_to
|
return saved_to
|
||||||
@@ -176,7 +176,7 @@ def get_latest_aircraft_adsb_csv_df():
|
|||||||
import pandas as pd
|
import pandas as pd
|
||||||
df = pd.read_csv(csv_path)
|
df = pd.read_csv(csv_path)
|
||||||
df = df.fillna("")
|
df = df.fillna("")
|
||||||
# Extract start date from filename pattern: openairframes_adsb_{start_date}_{end_date}.csv
|
# Extract start date from filename pattern: openairframes_adsb_{start_date}_{end_date}.csv[.gz]
|
||||||
match = re.search(r"openairframes_adsb_(\d{4}-\d{2}-\d{2})_", str(csv_path))
|
match = re.search(r"openairframes_adsb_(\d{4}-\d{2}-\d{2})_", str(csv_path))
|
||||||
if not match:
|
if not match:
|
||||||
raise ValueError(f"Could not extract date from filename: {csv_path.name}")
|
raise ValueError(f"Could not extract date from filename: {csv_path.name}")
|
||||||
|
|||||||
Reference in New Issue
Block a user