mirror of
https://github.com/0xMarcio/cve.git
synced 2026-02-12 18:42:46 +00:00
300 lines
9.7 KiB
Python
300 lines
9.7 KiB
Python
from __future__ import annotations
|
|
|
|
import argparse
|
|
from pathlib import Path
|
|
from typing import Dict, Iterable, List, Set, Tuple
|
|
|
|
from utils import (
|
|
API_DIR,
|
|
DATA_DIR,
|
|
DOCS_DIR,
|
|
SNAPSHOT_DIR,
|
|
load_json,
|
|
save_json,
|
|
today_str,
|
|
ensure_dirs,
|
|
load_poc_index,
|
|
slugify,
|
|
stable_unique,
|
|
)
|
|
|
|
KEV_PATH = DATA_DIR / "kev.json"
|
|
EPSS_PATH = DATA_DIR / "epss.json"
|
|
|
|
DEFAULT_TOP_KEV = 75
|
|
DEFAULT_HIGH_EPSS_LIMIT = 250
|
|
DEFAULT_HIGH_EPSS_THRESHOLD = 0.5
|
|
|
|
|
|
def load_inputs(kev_path: Path, epss_path: Path) -> Tuple[Dict, Dict]:
|
|
kev_data = load_json(kev_path, default={}) or {}
|
|
epss_data = load_json(epss_path, default={}) or {}
|
|
return kev_data, epss_data
|
|
|
|
|
|
def enrich_kev(kev_items: List[Dict], epss_lookup: Dict[str, Dict], poc_index: Dict[str, Dict]) -> List[Dict]:
|
|
enriched = []
|
|
for entry in kev_items:
|
|
cve = entry.get("cve") or entry.get("cveID") or ""
|
|
if not cve:
|
|
continue
|
|
cve = cve.upper()
|
|
epss_info = epss_lookup.get(cve, {})
|
|
poc_count = len(poc_index.get(cve, {}).get("poc", []))
|
|
enriched.append(
|
|
{
|
|
"cve": cve,
|
|
"vendor": entry.get("vendor") or entry.get("vendorProject", ""),
|
|
"product": entry.get("product", ""),
|
|
"date_added": entry.get("date_added") or entry.get("dateAdded"),
|
|
"due_date": entry.get("due_date") or entry.get("dueDate"),
|
|
"short_description": entry.get("short_description") or entry.get("shortDescription", ""),
|
|
"required_action": entry.get("required_action") or entry.get("requiredAction", ""),
|
|
"notes": entry.get("notes", ""),
|
|
"epss": epss_info.get("epss"),
|
|
"percentile": epss_info.get("percentile"),
|
|
"poc_count": poc_count,
|
|
}
|
|
)
|
|
enriched.sort(key=lambda row: (-float(row.get("percentile") or 0), row["cve"]))
|
|
return enriched
|
|
|
|
|
|
def build_epss_lookup(epss_items: List[Dict]) -> Dict[str, Dict]:
|
|
return {row.get("cve", "").upper(): row for row in epss_items if row.get("cve")}
|
|
|
|
|
|
def build_high_epss_not_in_kev(
|
|
epss_items: List[Dict],
|
|
kev_set: Set[str],
|
|
poc_index: Dict[str, Dict],
|
|
*,
|
|
threshold: float,
|
|
limit: int,
|
|
) -> List[Dict]:
|
|
output: List[Dict] = []
|
|
for row in epss_items:
|
|
cve = row.get("cve", "").upper()
|
|
if not cve or cve in kev_set:
|
|
continue
|
|
epss_score = row.get("epss") or 0.0
|
|
if epss_score < threshold:
|
|
continue
|
|
poc_count = len(poc_index.get(cve, {}).get("poc", []))
|
|
output.append(
|
|
{
|
|
"cve": cve,
|
|
"epss": row.get("epss"),
|
|
"percentile": row.get("percentile"),
|
|
"poc_count": poc_count,
|
|
}
|
|
)
|
|
if len(output) >= limit:
|
|
break
|
|
return output
|
|
|
|
|
|
def build_cve_details(
|
|
kev_enriched: Iterable[Dict],
|
|
high_epss: Iterable[Dict],
|
|
poc_index: Dict[str, Dict],
|
|
) -> Dict[str, Dict]:
|
|
details: Dict[str, Dict] = {}
|
|
|
|
def ensure_detail(cve: str) -> Dict:
|
|
if cve not in details:
|
|
data = poc_index.get(cve, {})
|
|
details[cve] = {
|
|
"cve": cve,
|
|
"description": data.get("desc", ""),
|
|
"poc_links": data.get("poc", []),
|
|
"poc_count": len(data.get("poc", [])),
|
|
"kev": None,
|
|
"epss": None,
|
|
"percentile": None,
|
|
"vendor": None,
|
|
"product": None,
|
|
}
|
|
return details[cve]
|
|
|
|
for entry in kev_enriched:
|
|
cve = entry["cve"]
|
|
detail = ensure_detail(cve)
|
|
detail.update(
|
|
{
|
|
"kev": {
|
|
"date_added": entry.get("date_added"),
|
|
"due_date": entry.get("due_date"),
|
|
"short_description": entry.get("short_description"),
|
|
"required_action": entry.get("required_action"),
|
|
"notes": entry.get("notes"),
|
|
},
|
|
"epss": entry.get("epss"),
|
|
"percentile": entry.get("percentile"),
|
|
"vendor": entry.get("vendor"),
|
|
"product": entry.get("product"),
|
|
}
|
|
)
|
|
|
|
for entry in high_epss:
|
|
cve = entry["cve"]
|
|
detail = ensure_detail(cve)
|
|
if detail.get("epss") is None:
|
|
detail["epss"] = entry.get("epss")
|
|
detail["percentile"] = entry.get("percentile")
|
|
|
|
return details
|
|
|
|
|
|
def build_vendor_map(details: Dict[str, Dict]) -> Dict[str, Dict]:
|
|
vendors: Dict[str, Dict] = {}
|
|
for detail in details.values():
|
|
vendor_name = detail.get("vendor")
|
|
if not vendor_name:
|
|
continue
|
|
slug = slugify(vendor_name)
|
|
entry = vendors.setdefault(slug, {"vendor": vendor_name, "cves": []})
|
|
entry["cves"].append(detail["cve"])
|
|
|
|
for value in vendors.values():
|
|
value["cves"].sort()
|
|
return dict(sorted(vendors.items(), key=lambda kv: kv[0]))
|
|
|
|
|
|
def truncate_description(text: str, limit: int = 220) -> str:
|
|
if not text:
|
|
return ""
|
|
text = " ".join(text.split())
|
|
return text if len(text) <= limit else text[: limit - 3].rstrip() + "..."
|
|
|
|
|
|
def build_joined(
|
|
kev_data: Dict,
|
|
epss_data: Dict,
|
|
poc_index: Dict[str, Dict],
|
|
*,
|
|
top_kev: int = DEFAULT_TOP_KEV,
|
|
high_epss_threshold: float = DEFAULT_HIGH_EPSS_THRESHOLD,
|
|
high_epss_limit: int = DEFAULT_HIGH_EPSS_LIMIT,
|
|
extra_cves: Iterable[str] | None = None,
|
|
) -> Dict:
|
|
kev_items = kev_data.get("items") or []
|
|
epss_items = epss_data.get("items") or []
|
|
|
|
epss_lookup = build_epss_lookup(epss_items)
|
|
kev_enriched = enrich_kev(kev_items, epss_lookup, poc_index)
|
|
kev_top = kev_enriched[:top_kev]
|
|
|
|
kev_set = {row["cve"] for row in kev_enriched}
|
|
high_epss = build_high_epss_not_in_kev(epss_items, kev_set, poc_index, threshold=high_epss_threshold, limit=high_epss_limit)
|
|
|
|
details = build_cve_details(kev_top, high_epss, poc_index)
|
|
|
|
if extra_cves:
|
|
extra_set = {cve.upper() for cve in extra_cves}
|
|
epss_lookup = build_epss_lookup(epss_items)
|
|
kev_lookup = {row["cve"]: row for row in kev_enriched}
|
|
for cve in sorted(extra_set):
|
|
if cve in details:
|
|
continue
|
|
epss_row = epss_lookup.get(cve, {})
|
|
kev_row = kev_lookup.get(cve)
|
|
details[cve] = {
|
|
"cve": cve,
|
|
"description": poc_index.get(cve, {}).get("desc", ""),
|
|
"poc_links": poc_index.get(cve, {}).get("poc", []),
|
|
"poc_count": len(poc_index.get(cve, {}).get("poc", [])),
|
|
"kev": None,
|
|
"epss": epss_row.get("epss"),
|
|
"percentile": epss_row.get("percentile"),
|
|
"vendor": None,
|
|
"product": None,
|
|
}
|
|
if kev_row:
|
|
details[cve]["kev"] = {
|
|
"date_added": kev_row.get("date_added"),
|
|
"due_date": kev_row.get("due_date"),
|
|
"short_description": kev_row.get("short_description"),
|
|
"required_action": kev_row.get("required_action"),
|
|
"notes": kev_row.get("notes"),
|
|
}
|
|
details[cve]["vendor"] = kev_row.get("vendor")
|
|
details[cve]["product"] = kev_row.get("product")
|
|
|
|
vendors = build_vendor_map(details)
|
|
|
|
# add display summary
|
|
for collection in (kev_top, high_epss):
|
|
for row in collection:
|
|
desc = poc_index.get(row["cve"], {}).get("desc") or ""
|
|
row["summary"] = truncate_description(desc)
|
|
|
|
joined = {
|
|
"generated": today_str(),
|
|
"kev_top": kev_top,
|
|
"high_epss": high_epss,
|
|
}
|
|
|
|
return {
|
|
"joined": joined,
|
|
"kev_enriched": kev_enriched,
|
|
"epss_items": epss_items,
|
|
"details": details,
|
|
"vendors": vendors,
|
|
}
|
|
|
|
|
|
def write_api_outputs(payload: Dict, *, api_dir: Path = API_DIR) -> None:
|
|
ensure_dirs(api_dir, api_dir / "cve", SNAPSHOT_DIR)
|
|
joined = payload["joined"]
|
|
save_json(api_dir / "kev.json", {"generated": joined["generated"], "items": payload["kev_enriched"]})
|
|
save_json(
|
|
api_dir / "epss_top.json",
|
|
{
|
|
"generated": joined["generated"],
|
|
"items": payload["joined"]["high_epss"],
|
|
},
|
|
)
|
|
save_json(api_dir / "joined_top.json", joined)
|
|
|
|
for cve, detail in payload["details"].items():
|
|
save_json(api_dir / "cve" / f"{cve}.json", detail)
|
|
|
|
|
|
def main() -> int:
|
|
parser = argparse.ArgumentParser(description="Join KEV and EPSS with PoC data")
|
|
parser.add_argument("--kev", type=Path, default=KEV_PATH, help="Path to KEV JSON")
|
|
parser.add_argument("--epss", type=Path, default=EPSS_PATH, help="Path to EPSS JSON")
|
|
parser.add_argument("--top-kev", type=int, default=DEFAULT_TOP_KEV, help="How many KEV rows to surface on top list")
|
|
parser.add_argument(
|
|
"--high-epss-threshold",
|
|
type=float,
|
|
default=DEFAULT_HIGH_EPSS_THRESHOLD,
|
|
help="Minimum EPSS to include when selecting high EPSs CVEs",
|
|
)
|
|
parser.add_argument(
|
|
"--high-epss-limit",
|
|
type=int,
|
|
default=DEFAULT_HIGH_EPSS_LIMIT,
|
|
help="Maximum number of high EPSs CVEs to keep",
|
|
)
|
|
args = parser.parse_args()
|
|
|
|
poc_index = load_poc_index()
|
|
kev_data, epss_data = load_inputs(args.kev, args.epss)
|
|
payload = build_joined(
|
|
kev_data,
|
|
epss_data,
|
|
poc_index,
|
|
top_kev=args.top_kev,
|
|
high_epss_threshold=args.high_epss_threshold,
|
|
high_epss_limit=args.high_epss_limit,
|
|
)
|
|
write_api_outputs(payload)
|
|
print("Generated joined JSON endpoints under docs/api/v1/")
|
|
return 0
|
|
|
|
|
|
if __name__ == "__main__":
|
|
raise SystemExit(main())
|