Filter trending PoCs to current-year updates

This commit is contained in:
0xMarcio
2025-12-17 21:06:33 +01:00
parent 23be2e0751
commit 722d7261ba
7 changed files with 199 additions and 341 deletions
+105 -90
View File
@@ -1,99 +1,114 @@
#!/usr/bin/env python3
# -*- coding:utf-8 -*-
"""Regenerate the Trending PoCs table in README.md.
- Only consider repositories whose names contain the current year's CVE pattern (e.g., CVE-2025-1234).
- Restrict to repositories updated in the last 4 days.
- Sort by most recently updated, then stars, and emit up to 20 rows.
"""
from __future__ import annotations
import os
import re
from datetime import datetime, timedelta, timezone
from pathlib import Path
from typing import Iterable, List, TypedDict
import requests
import json
from datetime import datetime
WINDOW_DAYS = 4
MAX_ROWS = 20
def time_ago(datetime_str):
datetime_obj = datetime.strptime(datetime_str, "%Y-%m-%dT%H:%M:%SZ")
current_datetime = datetime.now()
delta = current_datetime - datetime_obj
class Repo(TypedDict):
name: str
html_url: str
description: str | None
stargazers_count: int
updated_at: str
def github_headers() -> dict:
token = os.environ.get("GITHUB_TOKEN") or os.environ.get("GH_TOKEN")
headers = {"Accept": "application/vnd.github+json"}
if token:
headers["Authorization"] = f"Bearer {token}"
return headers
def time_ago(updated_at: str, now: datetime) -> str:
dt = datetime.strptime(updated_at, "%Y-%m-%dT%H:%M:%SZ").replace(tzinfo=timezone.utc)
delta = now - dt
if delta.days > 0:
if delta.days == 1:
return "1 day ago"
else:
return f"{delta.days} days ago"
elif delta.seconds >= 3600:
hours = delta.seconds // 3600
if hours == 1:
return "1 hour ago"
else:
return f"{hours} hours ago"
elif delta.seconds >= 60:
minutes = delta.seconds // 60
if minutes == 1:
return "1 minute ago"
else:
return f"{minutes} minutes ago"
return "1 day ago" if delta.days == 1 else f"{delta.days} days ago"
hours = delta.seconds // 3600
if hours:
return "1 hour ago" if hours == 1 else f"{hours} hours ago"
minutes = (delta.seconds % 3600) // 60
if minutes:
return "1 minute ago" if minutes == 1 else f"{minutes} minutes ago"
return "just now"
def fetch_trending(current_year: int, cutoff: datetime) -> List[Repo]:
query = f"CVE-{current_year} in:name stars:>2 pushed:>={cutoff.date().isoformat()} archived:false"
url = "https://api.github.com/search/repositories"
params = {
"q": query,
"sort": "updated",
"order": "desc",
"per_page": 100,
"page": 1,
}
resp = requests.get(url, params=params, headers=github_headers(), timeout=30)
resp.raise_for_status()
items: Iterable[Repo] = resp.json().get("items", [])
pattern = re.compile(rf"cve-{current_year}-\d+", re.IGNORECASE)
filtered: List[Repo] = []
for item in items:
name = item.get("name", "")
updated_at = item.get("updated_at")
if not updated_at or not pattern.search(name or ""):
continue
updated_dt = datetime.strptime(updated_at, "%Y-%m-%dT%H:%M:%SZ").replace(tzinfo=timezone.utc)
if updated_dt < cutoff:
continue
filtered.append(item)
# Already sorted by updated desc; break ties by stars
filtered.sort(key=lambda r: (-datetime.strptime(r["updated_at"], "%Y-%m-%dT%H:%M:%SZ").timestamp(), -int(r.get("stargazers_count", 0))))
return filtered[:MAX_ROWS]
def build_rows(repos: List[Repo], now: datetime) -> List[str]:
rows: List[str] = []
for repo in repos:
desc = repo.get("description") or ""
stars = int(repo.get("stargazers_count", 0))
updated = time_ago(repo["updated_at"], now)
rows.append(f"| {stars}⭐ | {updated} | [{repo['name']}]({repo['html_url']}) | {desc} |")
return rows
def main() -> None:
current_year = datetime.now(timezone.utc).year
cutoff = datetime.now(timezone.utc) - timedelta(days=WINDOW_DAYS)
now = datetime.now(timezone.utc)
repos = fetch_trending(current_year, cutoff)
output: List[str] = ['<h1 align="center">Recently updated Proof-of-Concepts</h1>']
output.append(f"\n\n## {current_year}\n")
output.append(f"### Updated in the last {WINDOW_DAYS} days (up to {MAX_ROWS} repos)\n")
output.append("| Stars | Updated | Name | Description |")
output.append("| --- | --- | --- | --- |")
if repos:
output.extend(build_rows(repos, now))
else:
return "just now"
output.append("| 0⭐ | — | No recent CVE PoCs | No repositories matched the filters. |")
current_year = datetime.now().year
total_repos_per_year = {}
#tz_header = {"Time-Zone": "Europe/Amsterdam"}
Path("README.md").write_text("\n".join(output), encoding="utf-8")
print(f"Wrote {len(repos)} rows for {current_year}")
repositories_by_year = {}
for year in range(current_year, current_year - 5, -1):
year_repositories = []
print(f"Fetching data for {year}")
response = requests.get(f'https://api.github.com/search/repositories?q=CVE-{year}%20in:name%20%20stars:>2%20language:Shell%20language:Go%20language:ASP%20language:WebAssembly%20language:R%20language:Lua%20language:Python%20%20%20language:C++%20language:C%20language:JavaScript%20language:Perl%20language:PowerShell%20language:Ruby%20language:Rust%20language:Java%20%20language:PHP&s=updated&o=desc&page=1&per_page=20')
if response.status_code != 200:
print(f"Failed to fetch data for year {year}: {response.status_code}")
continue
data = response.json()
total_count = data.get("total_count", 0)
print(f"Found: {total_count}")
total_repos_per_year[year] = total_count
if "items" in data:
items = data["items"]
if items:
year_repositories.extend(items)
else:
print(f"No more items found for year {year}")
if year_repositories:
# Sort the repositories by stargazers_count in descending order
#year_repositories.sort(key=lambda repo: repo['stargazers_count'], reverse=True)
repositories_by_year[year] = year_repositories
# Define a class to handle repository information
class RepositoryInfo:
def __init__(self, description, stargazers_count, name, html_url, updated_at):
self.description = description
self.stargazers_count = stargazers_count
self.name = name
self.html_url = html_url
self.updated_at = updated_at
def __hash__(self):
return hash(self.name + self.html_url)
def __eq__(self, other):
return self.html_url == other.html_url and self.name == other.name
final_output = ['<h1 align="center">Recently updated Proof-of-Concepts</h1>']
for year in range(current_year, current_year - 5, -1):
if year in repositories_by_year:
year_repositories = repositories_by_year[year]
year_repositories = [RepositoryInfo(repo["description"], repo["stargazers_count"], repo["name"], repo["html_url"], repo["updated_at"]) for repo in year_repositories]
final_output.append(f"\n\n## {year}\n")
final_output.append(f"### Latest 20 of {total_repos_per_year[year]} Repositories\n")
final_output.append("| Stars | Updated | Name | Description |")
final_output.append("| --- | --- | --- | --- |")
for repo in year_repositories:
try:
description = repo.description or ""
updated = time_ago(repo.updated_at)
final_output.append(f"| {repo.stargazers_count}⭐ | {updated} | [{repo.name}]({repo.html_url}) | {description} |")
except Exception as e:
print(f"Error generating final output for repository {repo.name}: {e}")
pass
if repositories_by_year:
with open("README.md", "w", encoding="utf-8") as file:
file.write("\n".join(final_output))
print("Final output written to README.md")
if __name__ == "__main__":
main()
+2
View File
@@ -24,6 +24,8 @@ jobs:
cd /home/runner/work/cve/cve
pip install requests
python .github/getTrending.py
env:
GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }}
- name: Check for changes and commit if necessary
run: |
+2
View File
@@ -12,6 +12,8 @@ on:
- 'templates/**'
- 'docs/assets/**'
- 'README.md'
- '.github/getTrending.py'
- '.github/workflows/hot_cves.yml'
- 'requirements.txt'
- '.github/workflows/site.yml'