Align trending PoCs and filter to recent data

This commit is contained in:
0xMarcio
2025-12-17 20:24:46 +01:00
parent da14307c37
commit 0bd6c7ceda
12 changed files with 88 additions and 392 deletions
+21
View File
@@ -24,6 +24,7 @@ EPSS_PATH = DATA_DIR / "epss.json"
DEFAULT_TOP_KEV = 75
DEFAULT_HIGH_EPSS_LIMIT = 50
DEFAULT_HIGH_EPSS_THRESHOLD = 0.05
RECENT_YEAR_WINDOW = 1
def load_inputs(kev_path: Path, epss_path: Path) -> Tuple[Dict, Dict]:
@@ -34,11 +35,22 @@ def load_inputs(kev_path: Path, epss_path: Path) -> Tuple[Dict, Dict]:
def enrich_kev(kev_items: List[Dict], epss_lookup: Dict[str, Dict], poc_index: Dict[str, Dict]) -> List[Dict]:
enriched = []
current_year = today_str()
current_year_int = int(current_year.split("-")[0])
def is_recent(cve_id: str) -> bool:
try:
year = int(cve_id.split("-")[1])
except Exception:
return False
return year >= current_year_int - RECENT_YEAR_WINDOW
for entry in kev_items:
cve = entry.get("cve") or entry.get("cveID") or ""
if not cve:
continue
cve = cve.upper()
if not is_recent(cve):
continue
epss_info = epss_lookup.get(cve, {})
poc_info = poc_index.get(cve)
if not poc_info or not poc_info.get("poc"):
@@ -75,6 +87,14 @@ def build_high_epss_not_in_kev(
threshold: float,
limit: int,
) -> List[Dict]:
current_year_int = int(today_str().split("-")[0])
def is_recent(cve_id: str) -> bool:
try:
year = int(cve_id.split("-")[1])
except Exception:
return False
return year >= current_year_int - RECENT_YEAR_WINDOW
ranked = sorted(
(
row
@@ -82,6 +102,7 @@ def build_high_epss_not_in_kev(
if row.get("cve")
and row.get("cve", "").upper() not in kev_set
and (row.get("epss") is not None)
and is_recent(row.get("cve", ""))
),
key=lambda row: (-float(row.get("epss") or 0), row.get("cve", "")),
)
+25 -1
View File
@@ -1,7 +1,9 @@
from __future__ import annotations
import argparse
from datetime import datetime, timezone
from pathlib import Path
import re
from typing import Dict, Tuple
from jinja2 import Environment, FileSystemLoader, select_autoescape
@@ -59,7 +61,29 @@ def build_pages(env: Environment, data: Dict, diff: Dict | None = None, html_mod
joined = data["joined"]
details = data["details"]
vendors = data["vendors"]
trending = parse_trending_from_readme(README_PATH)
def is_recent_label(label: str) -> bool:
label = (label or "").lower()
if "minute" in label or "hour" in label:
return True
m = re.search(r"(\d+)\\s*day", label)
if not m:
return False
return int(m.group(1)) <= 4
current_year = datetime.now(timezone.utc).year
def extract_year(name: str) -> int | None:
m = re.search(r"cve-(\\d{4})-", name.lower())
return int(m.group(1)) if m else None
trending_raw = parse_trending_from_readme(README_PATH)
trending = [
row
for row in trending_raw
if is_recent_label(row.get("updated", ""))
and (extract_year(row.get("name", "")) or current_year) >= current_year - 1
]
trending.sort(key=lambda r: int(r.get("stars") or 0), reverse=True)
recent_kev = (diff or {}).get("new_kev_entries") or []
metrics = {
"kev_total": len(data["kev_enriched"]),