Restore README trending tables and auto-build site

This commit is contained in:
0xMarcio
2025-12-17 21:39:16 +01:00
parent 0afef5597e
commit 5cb3a28aeb
4 changed files with 173 additions and 43 deletions
+69 -32
View File
@@ -1,11 +1,12 @@
#!/usr/bin/env python3
"""Regenerate the Trending PoCs tables in README.md.
- Consider the latest 4 years (current year and previous 3).
- Require repository name to contain a CVE for that year (e.g., CVE-2025-1234).
- Require a non-empty description (we only want actual PoCs, not empty shells).
- Restrict to repositories updated in the last 4 days.
- Sort by most recently updated, then stars, and emit up to 20 rows per year.
Goals (matching the legacy README that worked well):
- Cover the current year plus the previous three.
- Keep the familiar heading “Latest 20 of N Repositories”.
- Only show repos updated in the last WINDOW_DAYS.
- Require a CVE-shaped repo name for that year and a non-empty description.
- Sort newest first, then by stars, and cap at MAX_ROWS per year.
"""
from __future__ import annotations
@@ -21,6 +22,7 @@ import requests
WINDOW_DAYS = 4
MAX_ROWS = 20
YEARS_BACK = 4
MIN_STARS = 0 # keep low to capture fresh repos
class Repo(TypedDict):
@@ -53,34 +55,69 @@ def time_ago(updated_at: str, now: datetime) -> str:
return "just now"
def fetch_trending(year: int, cutoff: datetime) -> List[Repo]:
query = f"CVE-{year} in:name stars:>2 pushed:>={cutoff.date().isoformat()} archived:false"
def _search_total(year: int) -> int:
"""Return total repositories matching CVE-year (used for table heading)."""
stars_clause = f"stars:>{MIN_STARS}" if MIN_STARS >= 0 else "stars:>0"
query = f"CVE-{year} in:name {stars_clause} archived:false"
url = "https://api.github.com/search/repositories"
params = {
"q": query,
"sort": "updated",
"order": "desc",
"per_page": 100,
"page": 1,
}
resp = requests.get(url, params=params, headers=github_headers(), timeout=30)
resp = requests.get(
url, params={"q": query, "per_page": 1}, headers=github_headers(), timeout=30
)
resp.raise_for_status()
items: Iterable[Repo] = resp.json().get("items", [])
return int(resp.json().get("total_count", 0))
def fetch_trending(year: int, cutoff: datetime) -> tuple[List[Repo], int]:
"""Fetch and filter trending repos for a year, returning rows and total_count."""
stars_clause = f"stars:>{MIN_STARS}" if MIN_STARS >= 0 else "stars:>0"
query = f"CVE-{year} in:name {stars_clause} archived:false pushed:>={cutoff.date().isoformat()}"
url = "https://api.github.com/search/repositories"
total_count = _search_total(year)
pattern = re.compile(rf"cve-{year}-\d+", re.IGNORECASE)
filtered: List[Repo] = []
for item in items:
name = item.get("name", "")
updated_at = item.get("updated_at")
description = (item.get("description") or "").strip()
if not updated_at or not pattern.search(name or "") or not description:
continue
updated_dt = datetime.strptime(updated_at, "%Y-%m-%dT%H:%M:%SZ").replace(tzinfo=timezone.utc)
if updated_dt < cutoff:
continue
filtered.append(item)
seen_urls: set[str] = set()
# Walk multiple pages to gather enough fresh repos (up to MAX_ROWS).
for page in range(1, 2):
params = {
"q": query,
"sort": "updated",
"order": "desc",
"per_page": 100,
"page": page,
}
resp = requests.get(url, params=params, headers=github_headers(), timeout=30)
resp.raise_for_status()
items: Iterable[Repo] = resp.json().get("items", [])
if not items:
break
for item in items:
name = item.get("name", "")
updated_at = item.get("updated_at")
description = (item.get("description") or "").strip()
html_url = item.get("html_url")
if not updated_at or not html_url or not description:
continue
if not pattern.search(name or ""):
continue
updated_dt = datetime.strptime(updated_at, "%Y-%m-%dT%H:%M:%SZ").replace(tzinfo=timezone.utc)
if updated_dt < cutoff:
continue
if html_url in seen_urls:
continue
seen_urls.add(html_url)
filtered.append(item)
if len(filtered) >= MAX_ROWS:
break
# Already sorted by updated desc; break ties by stars
filtered.sort(key=lambda r: (-datetime.strptime(r["updated_at"], "%Y-%m-%dT%H:%M:%SZ").timestamp(), -int(r.get("stargazers_count", 0))))
return filtered[:MAX_ROWS]
filtered.sort(
key=lambda r: (
-datetime.strptime(r["updated_at"], "%Y-%m-%dT%H:%M:%SZ").timestamp(),
-int(r.get("stargazers_count", 0)),
)
)
return filtered[:MAX_ROWS], total_count
def build_rows(repos: List[Repo], now: datetime) -> List[str]:
@@ -94,16 +131,16 @@ def build_rows(repos: List[Repo], now: datetime) -> List[str]:
def main() -> None:
current_year = datetime.now(timezone.utc).year
cutoff = datetime.now(timezone.utc) - timedelta(days=WINDOW_DAYS)
now = datetime.now(timezone.utc)
current_year = now.year
cutoff = now - timedelta(days=WINDOW_DAYS)
output: List[str] = ['<h1 align="center">Recently updated Proof-of-Concepts</h1>']
for year in range(current_year, current_year - YEARS_BACK, -1):
repos = fetch_trending(year, cutoff)
repos, total = fetch_trending(year, cutoff)
output.append(f"\n\n## {year}\n")
output.append(f"### Updated in the last {WINDOW_DAYS} days (up to {MAX_ROWS} repos)\n")
output.append(f"### Latest {MAX_ROWS} of {total} Repositories\n")
output.append("| Stars | Updated | Name | Description |")
output.append("| --- | --- | --- | --- |")
if repos:
+1 -1
View File
@@ -49,7 +49,7 @@ jobs:
python scripts/fetch_epss.py
- name: Build site
run: python scripts/build_site.py
run: python scripts/build_site.py --html-mode summary
- name: Configure Pages
uses: actions/configure-pages@v5