mirror of
https://github.com/0xMarcio/cve.git
synced 2026-02-12 18:42:46 +00:00
Add PoC pipeline with blacklist filtering and Pages build
This commit is contained in:
69
.github/workflows/build.yml
vendored
Normal file
69
.github/workflows/build.yml
vendored
Normal file
@@ -0,0 +1,69 @@
|
||||
name: Build pipeline + Pages
|
||||
|
||||
on:
|
||||
schedule:
|
||||
- cron: "15 5 * * *"
|
||||
workflow_dispatch:
|
||||
|
||||
permissions:
|
||||
contents: read
|
||||
pages: write
|
||||
id-token: write
|
||||
|
||||
concurrency:
|
||||
group: pages
|
||||
cancel-in-progress: false
|
||||
|
||||
jobs:
|
||||
build:
|
||||
runs-on: ubuntu-latest
|
||||
steps:
|
||||
- name: Checkout
|
||||
uses: actions/checkout@v4
|
||||
|
||||
- name: Set up Python
|
||||
uses: actions/setup-python@v5
|
||||
with:
|
||||
python-version: "3.12"
|
||||
|
||||
- name: Cache dependencies and API cache
|
||||
uses: actions/cache@v4
|
||||
with:
|
||||
path: |
|
||||
~/.cache/pip
|
||||
data/cache
|
||||
key: ${{ runner.os }}-cve-pipeline-${{ hashFiles('requirements.txt') }}
|
||||
restore-keys: |
|
||||
${{ runner.os }}-cve-pipeline-
|
||||
|
||||
- name: Install requirements
|
||||
run: |
|
||||
python -m pip install --upgrade pip
|
||||
pip install -r requirements.txt
|
||||
|
||||
- name: Build pipeline outputs + site
|
||||
env:
|
||||
GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }}
|
||||
run: python scripts/build_all.py --days 7
|
||||
|
||||
- name: Validate JSON index
|
||||
run: python -m json.tool docs/api/v1/index.json > /dev/null
|
||||
|
||||
- name: Configure Pages
|
||||
uses: actions/configure-pages@v5
|
||||
|
||||
- name: Upload artifact
|
||||
uses: actions/upload-pages-artifact@v3
|
||||
with:
|
||||
path: docs
|
||||
|
||||
deploy:
|
||||
needs: build
|
||||
runs-on: ubuntu-latest
|
||||
environment:
|
||||
name: github-pages
|
||||
url: ${{ steps.deploy.outputs.page_url }}
|
||||
steps:
|
||||
- name: Deploy to GitHub Pages
|
||||
id: deploy
|
||||
uses: actions/deploy-pages@v4
|
||||
@@ -1,5 +1,7 @@
|
||||
<h1 align="center">Recently updated Proof-of-Concepts</h1>
|
||||
|
||||
> Live API + site: `/api/v1/index.json`, `/api/v1/top/today.json`, and Pages generated via `python scripts/build_all.py` for the new GitHub PoC discovery + scoring pipeline.
|
||||
|
||||
|
||||
## 2025
|
||||
|
||||
@@ -138,4 +140,4 @@
|
||||
| 312⭐ | 4 days ago | [CVE-2021-26084_Confluence](https://github.com/hev0x/CVE-2021-26084_Confluence) | Confluence Server Webwork OGNL injection |
|
||||
| 328⭐ | 6 days ago | [CVE-2021-1675-LPE](https://github.com/hlldz/CVE-2021-1675-LPE) | Local Privilege Escalation Edition for CVE-2021-1675/CVE-2021-34527 |
|
||||
| 233⭐ | 92 days ago | [CVE-2021-38647](https://github.com/horizon3ai/CVE-2021-38647) | Proof on Concept Exploit for CVE-2021-38647 (OMIGOD) |
|
||||
| 235⭐ | 15 days ago | [CVE-2021-24086](https://github.com/0vercl0k/CVE-2021-24086) | Proof of concept for CVE-2021-24086, a NULL dereference in tcpip.sys triggered remotely. |
|
||||
| 235⭐ | 15 days ago | [CVE-2021-24086](https://github.com/0vercl0k/CVE-2021-24086) | Proof of concept for CVE-2021-24086, a NULL dereference in tcpip.sys triggered remotely. |
|
||||
|
||||
187191
docs/CVE_blacklist_removed.json
Normal file
187191
docs/CVE_blacklist_removed.json
Normal file
File diff suppressed because it is too large
Load Diff
File diff suppressed because one or more lines are too long
54
docs/assets/app.js
Normal file
54
docs/assets/app.js
Normal file
@@ -0,0 +1,54 @@
|
||||
(function() {
|
||||
const qs = (sel) => document.querySelector(sel);
|
||||
|
||||
const initIndexSearch = () => {
|
||||
const input = document.querySelector("[data-index-search]");
|
||||
if (!input) return;
|
||||
const targetSel = input.getAttribute("data-target");
|
||||
const target = targetSel ? qs(targetSel) : null;
|
||||
const indexUrl = input.getAttribute("data-index-url");
|
||||
if (!target || !indexUrl) return;
|
||||
|
||||
let cached = [];
|
||||
fetch(indexUrl)
|
||||
.then((resp) => resp.json())
|
||||
.then((data) => { cached = data.items || []; })
|
||||
.catch(() => { target.innerHTML = "<p class='muted small'>Index unavailable.</p>"; });
|
||||
|
||||
const render = (term) => {
|
||||
if (!cached.length) return;
|
||||
const value = term.trim().toLowerCase();
|
||||
const results = cached.filter((row) => {
|
||||
if (!value) return false;
|
||||
return row.cve_id.toLowerCase().includes(value) ||
|
||||
(row.top_languages || []).join(" ").toLowerCase().includes(value) ||
|
||||
String(row.max_score || "").includes(value);
|
||||
}).slice(0, 40);
|
||||
|
||||
if (!results.length) {
|
||||
target.innerHTML = "<p class='muted small'>No matches yet.</p>";
|
||||
return;
|
||||
}
|
||||
|
||||
target.innerHTML = results.map((row) => {
|
||||
const langs = (row.top_languages || []).map((lang) => `<span class="pill tiny">${lang}</span>`).join(" ");
|
||||
return `<article class="card">
|
||||
<div class="card-title"><a href="/cve/${row.cve_id}.html">${row.cve_id}</a></div>
|
||||
<div class="meta-row">
|
||||
<span class="pill tier-high">${row.high_confidence} high</span>
|
||||
<span class="pill tier-medium">${row.medium_confidence} med</span>
|
||||
<span class="pill">${row.poc_count} PoCs</span>
|
||||
</div>
|
||||
<div class="muted small">Max score ${row.max_score || 0}</div>
|
||||
<div class="pill-row">${langs}</div>
|
||||
</article>`;
|
||||
}).join("");
|
||||
};
|
||||
|
||||
input.addEventListener("input", (e) => render(e.target.value));
|
||||
};
|
||||
|
||||
document.addEventListener("DOMContentLoaded", () => {
|
||||
initIndexSearch();
|
||||
});
|
||||
})();
|
||||
@@ -1,54 +1,97 @@
|
||||
:root {
|
||||
--bg: #0b0c10;
|
||||
--panel: #11131a;
|
||||
--text: #e5e8f0;
|
||||
--muted: #9aa3b5;
|
||||
--accent: #5ad4e6;
|
||||
--warn: #f6c177;
|
||||
--border: #1f2430;
|
||||
--shadow: 0 10px 30px rgba(0,0,0,0.35);
|
||||
font-family: "Inter", system-ui, -apple-system, sans-serif;
|
||||
--bg: #05070d;
|
||||
--panel: #0d1020;
|
||||
--panel-2: #11162b;
|
||||
--text: #f3f4ff;
|
||||
--muted: #8fa2c8;
|
||||
--accent: #7ef1d3;
|
||||
--accent-2: #5bc0eb;
|
||||
--warn: #ffb86c;
|
||||
--success: #6ef2a6;
|
||||
--border: #1f2742;
|
||||
--shadow: 0 18px 45px rgba(0,0,0,0.35);
|
||||
font-family: "Space Grotesk", "Inter", "Helvetica Neue", system-ui, sans-serif;
|
||||
line-height: 1.55;
|
||||
}
|
||||
|
||||
* { box-sizing: border-box; }
|
||||
body { margin:0; background: var(--bg); color: var(--text); }
|
||||
body {
|
||||
margin: 0;
|
||||
background: radial-gradient(circle at 20% 20%, rgba(91,192,235,0.08), transparent 25%), radial-gradient(circle at 80% 0%, rgba(126,241,211,0.08), transparent 23%), var(--bg);
|
||||
color: var(--text);
|
||||
}
|
||||
a { color: var(--accent); text-decoration: none; }
|
||||
a:hover { text-decoration: underline; }
|
||||
code { background: rgba(255,255,255,0.04); padding: 2px 6px; border-radius: 6px; color: var(--accent-2); }
|
||||
|
||||
.wrap { width: min(1100px, 95vw); margin: 0 auto; padding: 1.5rem 0; }
|
||||
.site-header { background: var(--panel); border-bottom: 1px solid var(--border); position: sticky; top:0; z-index:10; box-shadow: var(--shadow); }
|
||||
.site-header .wrap { display:flex; align-items:center; justify-content: space-between; padding: 0.9rem 0; }
|
||||
.brand a { font-weight: 700; letter-spacing: 0.5px; }
|
||||
nav a { margin-left: 1rem; color: var(--text); opacity: 0.85; }
|
||||
nav a:hover { opacity: 1; }
|
||||
.wrap { width: min(1200px, 94vw); margin: 0 auto; padding: 1.5rem 0; }
|
||||
|
||||
h1, h2, h3 { margin: 0 0 0.5rem; }
|
||||
section { margin-bottom: 2rem; }
|
||||
.lead { color: var(--muted); line-height: 1.5; }
|
||||
.topbar { position: sticky; top: 0; z-index: 10; background: rgba(13,16,32,0.85); backdrop-filter: blur(10px); border-bottom: 1px solid var(--border); }
|
||||
.topbar .wrap { display: flex; justify-content: space-between; align-items: center; padding: 1rem 0; }
|
||||
.brand a { font-weight: 700; letter-spacing: 0.5px; color: var(--text); }
|
||||
.brand .dot { color: var(--accent); margin-right: 4px; }
|
||||
nav a { margin-left: 1rem; color: var(--muted); font-weight: 600; }
|
||||
nav a:hover { color: var(--accent); }
|
||||
|
||||
h1, h2, h3, h4 { margin: 0 0 0.5rem; line-height: 1.25; }
|
||||
p { margin: 0 0 0.75rem; }
|
||||
.muted { color: var(--muted); }
|
||||
.small { font-size: 0.9rem; }
|
||||
.eyebrow { text-transform: uppercase; letter-spacing: 0.15em; font-size: 0.8rem; color: var(--accent); margin-bottom: 0.35rem; }
|
||||
.lede { color: var(--muted); max-width: 60ch; }
|
||||
|
||||
.hero { display: grid; grid-template-columns: 2fr 1fr; gap: 1.5rem; align-items: center; padding: 1rem 0 2rem; }
|
||||
.hero-panel { background: var(--panel); border: 1px solid var(--border); border-radius: 16px; padding: 1rem; box-shadow: var(--shadow); display: grid; grid-template-columns: repeat(auto-fit, minmax(140px, 1fr)); gap: 0.75rem; }
|
||||
.stat .label { color: var(--muted); font-size: 0.9rem; }
|
||||
.stat .value { font-size: 1.9rem; font-weight: 700; }
|
||||
|
||||
.cta-row { display: flex; gap: 0.75rem; flex-wrap: wrap; margin-top: 1rem; }
|
||||
.btn { background: linear-gradient(90deg, var(--accent), var(--accent-2)); color: #041019; padding: 0.75rem 1rem; border-radius: 12px; font-weight: 700; border: none; display: inline-block; }
|
||||
.btn.ghost { background: transparent; color: var(--text); border: 1px solid var(--border); }
|
||||
.text-link { color: var(--accent); font-weight: 600; }
|
||||
|
||||
.section-header { display: flex; align-items: center; justify-content: space-between; gap: 1rem; margin-bottom: 0.75rem; }
|
||||
|
||||
.card-grid { display: grid; grid-template-columns: repeat(auto-fit, minmax(240px, 1fr)); gap: 1rem; }
|
||||
.card { background: var(--panel); padding: 1rem; border: 1px solid var(--border); border-radius: 10px; box-shadow: var(--shadow); }
|
||||
.card-title { font-weight: 700; margin-bottom: 0.2rem; }
|
||||
.card-meta { color: var(--muted); font-size: 0.9rem; margin-bottom: 0.5rem; }
|
||||
.badge { display: inline-block; background: rgba(90,212,230,0.12); color: var(--accent); padding: 0.15rem 0.5rem; border-radius: 999px; font-size: 0.8rem; margin-right: 0.25rem; }
|
||||
.card { background: var(--panel); border: 1px solid var(--border); border-radius: 14px; padding: 1rem; box-shadow: var(--shadow); }
|
||||
.card-title { font-weight: 700; margin-bottom: 0.4rem; }
|
||||
.meta-row { display: flex; flex-wrap: wrap; gap: 0.35rem; align-items: center; margin-bottom: 0.35rem; }
|
||||
|
||||
.filter { width: 100%; padding: 0.65rem 0.75rem; margin: 0 0 0.75rem; border-radius: 8px; border: 1px solid var(--border); background: #0f1320; color: var(--text); }
|
||||
.pill { display: inline-flex; align-items: center; gap: 4px; padding: 0.25rem 0.6rem; border-radius: 999px; background: rgba(255,255,255,0.04); border: 1px solid var(--border); color: var(--text); font-size: 0.85rem; }
|
||||
.pill.ghost { background: transparent; color: var(--muted); }
|
||||
.pill.warn { border-color: var(--warn); color: var(--warn); }
|
||||
.pill.tier-high { border-color: var(--success); color: var(--success); }
|
||||
.pill.tier-medium { border-color: var(--accent-2); color: var(--accent-2); }
|
||||
.pill.tier-low { border-color: var(--muted); color: var(--muted); }
|
||||
.pill.tiny { font-size: 0.75rem; padding: 0.15rem 0.4rem; }
|
||||
|
||||
.table-responsive { overflow-x: auto; border: 1px solid var(--border); border-radius: 10px; box-shadow: var(--shadow); }
|
||||
.table-responsive table { width: 100%; border-collapse: collapse; }
|
||||
.table-responsive th, .table-responsive td { padding: 0.75rem 0.9rem; border-bottom: 1px solid var(--border); text-align: left; }
|
||||
.table-responsive th { background: #161a22; color: #d6dae6; font-size: 0.9rem; letter-spacing: 0.2px; }
|
||||
.table-responsive tr:last-child td { border-bottom: none; }
|
||||
.input { width: 100%; padding: 0.75rem; border-radius: 12px; border: 1px solid var(--border); background: var(--panel-2); color: var(--text); margin: 0.5rem 0 1rem; }
|
||||
|
||||
.pill-row { display: flex; flex-wrap: wrap; gap: 0.5rem; margin: 0.8rem 0 1rem; }
|
||||
.pill { padding: 0.35rem 0.65rem; border-radius: 999px; background: #1b202c; border: 1px solid var(--border); color: var(--text); font-size: 0.9rem; }
|
||||
.pill-warn { background: rgba(246,193,119,0.15); border-color: #f6c177; color: #f6c177; }
|
||||
.table-wrap { overflow-x: auto; border: 1px solid var(--border); border-radius: 14px; box-shadow: var(--shadow); background: var(--panel); }
|
||||
table { width: 100%; border-collapse: collapse; }
|
||||
th, td { padding: 0.85rem 1rem; border-bottom: 1px solid var(--border); text-align: left; }
|
||||
th { background: #0f1326; color: var(--muted); font-weight: 600; letter-spacing: 0.02em; }
|
||||
tr:last-child td { border-bottom: none; }
|
||||
|
||||
.site-footer { border-top: 1px solid var(--border); padding: 1rem 0; color: var(--muted); }
|
||||
.site-footer .wrap { display: flex; gap: 1rem; flex-wrap: wrap; font-size: 0.9rem; }
|
||||
.matches ul { list-style: none; padding: 0; margin: 0.35rem 0 0; }
|
||||
.matches li { margin-bottom: 0.25rem; color: var(--muted); }
|
||||
|
||||
@media (max-width: 640px) {
|
||||
nav a { margin-left: 0.6rem; }
|
||||
.card-grid { grid-template-columns: repeat(auto-fit, minmax(180px, 1fr)); }
|
||||
.table-responsive th, .table-responsive td { padding: 0.6rem; }
|
||||
.grid-2 { display: grid; grid-template-columns: repeat(auto-fit, minmax(280px, 1fr)); gap: 1rem; }
|
||||
.list { list-style: none; padding: 0; margin: 0.35rem 0; }
|
||||
.list li { padding: 0.4rem 0; border-bottom: 1px solid var(--border); }
|
||||
.list li:last-child { border-bottom: none; }
|
||||
|
||||
.pill-row { display: flex; flex-wrap: wrap; gap: 0.5rem; margin: 0.8rem 0; }
|
||||
|
||||
.footer { border-top: 1px solid var(--border); margin-top: 2rem; }
|
||||
.footer-inner { display: flex; flex-wrap: wrap; gap: 1rem; padding: 1rem 0; color: var(--muted); }
|
||||
|
||||
@media (max-width: 840px) {
|
||||
.hero { grid-template-columns: 1fr; }
|
||||
nav { display: none; }
|
||||
}
|
||||
|
||||
@media (max-width: 620px) {
|
||||
.card-grid { grid-template-columns: repeat(auto-fit, minmax(200px, 1fr)); }
|
||||
th, td { padding: 0.65rem; }
|
||||
}
|
||||
|
||||
@@ -3,10 +3,55 @@ import json
|
||||
import os
|
||||
import re
|
||||
from pathlib import Path
|
||||
from typing import Dict, List
|
||||
from typing import Dict, List, Optional, Tuple
|
||||
from urllib.parse import urlparse
|
||||
|
||||
ROOT = Path(__file__).resolve().parent.parent
|
||||
OUTPUT = Path(__file__).resolve().with_name("CVE_list.json")
|
||||
REMOVED_OUTPUT = Path(__file__).resolve().with_name("CVE_blacklist_removed.json")
|
||||
BLACKLIST = ROOT / "blacklist.txt"
|
||||
|
||||
|
||||
def load_blacklist(path: Path = BLACKLIST) -> List[str]:
|
||||
if not path.exists():
|
||||
return []
|
||||
items: List[str] = []
|
||||
for raw in path.read_text(encoding="utf-8").splitlines():
|
||||
entry = raw.strip()
|
||||
if entry and not entry.startswith("#"):
|
||||
items.append(entry)
|
||||
return items
|
||||
|
||||
|
||||
def repo_from_url(url: str) -> str:
|
||||
try:
|
||||
parsed = urlparse(url)
|
||||
host = (parsed.netloc or "").lower()
|
||||
if host and "github" not in host:
|
||||
return ""
|
||||
path = parsed.path or url
|
||||
except Exception:
|
||||
path = url
|
||||
parts = path.strip("/").split("/")
|
||||
if len(parts) >= 2:
|
||||
return parts[1].lower()
|
||||
return (parts[-1] if parts else "").lower()
|
||||
|
||||
|
||||
def is_blacklisted(url: str, blacklist: List[str]) -> bool:
|
||||
repo = repo_from_url(url)
|
||||
if not repo:
|
||||
return False
|
||||
for entry in blacklist:
|
||||
slug = entry.lower()
|
||||
if not slug:
|
||||
continue
|
||||
if slug.endswith("*"):
|
||||
if repo.startswith(slug[:-1]):
|
||||
return True
|
||||
elif repo == slug:
|
||||
return True
|
||||
return False
|
||||
|
||||
|
||||
def normalise_block(text: str) -> str:
|
||||
@@ -37,21 +82,32 @@ def parse_sections(content: str) -> Dict[str, str]:
|
||||
return sections
|
||||
|
||||
|
||||
def collect_links(block: str) -> List[str]:
|
||||
def collect_links(block: str, *, blacklist: Optional[List[str]] = None, removed: Optional[List[str]] = None) -> List[str]:
|
||||
links: List[str] = []
|
||||
blacklist = blacklist or []
|
||||
if removed is None:
|
||||
removed = []
|
||||
for raw in block.splitlines():
|
||||
entry = raw.strip()
|
||||
if not entry or "No PoCs" in entry:
|
||||
continue
|
||||
if entry.startswith("- "):
|
||||
entry = entry[2:].strip()
|
||||
if entry and entry not in links:
|
||||
if not entry:
|
||||
continue
|
||||
if is_blacklisted(entry, blacklist):
|
||||
removed.append(entry)
|
||||
continue
|
||||
if entry not in links:
|
||||
links.append(entry)
|
||||
return links
|
||||
|
||||
|
||||
def main() -> None:
|
||||
blacklist = load_blacklist()
|
||||
cve_entries = []
|
||||
removed_by_cve: Dict[str, List[str]] = {}
|
||||
removed_seen: set[str] = set()
|
||||
years = [entry for entry in os.listdir(ROOT) if entry.isdigit()]
|
||||
years.sort(reverse=True)
|
||||
|
||||
@@ -65,8 +121,9 @@ def main() -> None:
|
||||
|
||||
sections = parse_sections(content)
|
||||
description = normalise_block(sections.get("### Description", ""))
|
||||
references = collect_links(sections.get("#### Reference", ""))
|
||||
github_links = collect_links(sections.get("#### Github", ""))
|
||||
removed_links: List[str] = []
|
||||
references = collect_links(sections.get("#### Reference", ""), blacklist=blacklist, removed=removed_links)
|
||||
github_links = collect_links(sections.get("#### Github", ""), blacklist=blacklist, removed=removed_links)
|
||||
|
||||
poc_entries: List[str] = []
|
||||
seen = set()
|
||||
@@ -75,8 +132,17 @@ def main() -> None:
|
||||
poc_entries.append(link)
|
||||
seen.add(link)
|
||||
|
||||
cve_id = filename.replace(".md", "")
|
||||
if removed_links:
|
||||
removed_by_cve[cve_id] = sorted(set(removed_links))
|
||||
removed_seen.update(removed_links)
|
||||
|
||||
# Skip CVEs with zero PoCs (both sections empty) to keep lookup clean
|
||||
if not poc_entries:
|
||||
continue
|
||||
|
||||
cve_entries.append({
|
||||
"cve": filename.replace(".md", ""),
|
||||
"cve": cve_id,
|
||||
"desc": description,
|
||||
"poc": poc_entries,
|
||||
})
|
||||
@@ -84,6 +150,17 @@ def main() -> None:
|
||||
with open(OUTPUT, "w", encoding="utf-8") as outfile:
|
||||
json.dump(cve_entries, outfile, ensure_ascii=False)
|
||||
|
||||
with open(REMOVED_OUTPUT, "w", encoding="utf-8") as removed_file:
|
||||
json.dump(
|
||||
{
|
||||
"removed": sorted(removed_seen),
|
||||
"by_cve": removed_by_cve,
|
||||
},
|
||||
removed_file,
|
||||
ensure_ascii=False,
|
||||
indent=2,
|
||||
)
|
||||
|
||||
print("CVE list saved to CVE_list.json")
|
||||
|
||||
|
||||
|
||||
@@ -7,6 +7,7 @@ pip install -r requirements.txt
|
||||
python scripts/fetch_kev.py
|
||||
python scripts/fetch_epss.py
|
||||
python scripts/build_site.py
|
||||
python scripts/build_all.py # new PoC discovery + scoring pipeline
|
||||
```
|
||||
|
||||
Outputs land in `docs/` and JSON under `docs/api/v1/`. Snapshots live in `docs/api/v1/snapshots/` (last 14 days) and diffs under `docs/api/v1/diff/`.
|
||||
Outputs land in `docs/` and JSON under `docs/api/v1/`. Snapshots live in `docs/api/v1/snapshots/` (last 14 days) and diffs under `docs/api/v1/diffs/`.
|
||||
|
||||
118
scripts/build_all.py
Normal file
118
scripts/build_all.py
Normal file
@@ -0,0 +1,118 @@
|
||||
from __future__ import annotations
|
||||
|
||||
import argparse
|
||||
import sys
|
||||
from pathlib import Path
|
||||
from typing import Dict, List
|
||||
|
||||
import requests
|
||||
|
||||
from pipeline_outputs import (
|
||||
build_diff,
|
||||
prune_old_diffs,
|
||||
prune_old_snapshots,
|
||||
summarise_for_snapshot,
|
||||
write_cve_outputs,
|
||||
write_diff,
|
||||
write_index,
|
||||
write_snapshot,
|
||||
write_top,
|
||||
)
|
||||
from poc_pipeline import PoCPipeline, build_scope, persist_evidence
|
||||
from site_renderer import SiteRenderer
|
||||
from utils import API_DIR, DOCS_DIR, load_json
|
||||
|
||||
|
||||
def load_existing_results(api_dir: Path) -> List[Dict]:
|
||||
results: List[Dict] = []
|
||||
if not api_dir.exists():
|
||||
return results
|
||||
for path in api_dir.glob("CVE-*.json"):
|
||||
data = load_json(path, default={}) or {}
|
||||
if "pocs" in data:
|
||||
results.append({"cve_id": data.get("cve_id") or path.stem, "pocs": data.get("pocs", []), "last_updated": data.get("last_updated")})
|
||||
return results
|
||||
|
||||
|
||||
def main(argv: List[str] | None = None) -> int:
|
||||
parser = argparse.ArgumentParser(description="Build CVE PoC pipeline outputs, snapshots, and static site")
|
||||
parser.add_argument("--days", type=int, default=7, help="Days window for GitHub discovery windows")
|
||||
parser.add_argument("--mode", choices=["daily", "weekly"], default="daily", help="Run mode to tune scope")
|
||||
parser.add_argument("--limit", type=int, default=50, help="Maximum CVEs to scan per run")
|
||||
parser.add_argument("--cve", action="append", help="Explicit CVE IDs to scan (can be passed multiple times)")
|
||||
parser.add_argument("--skip-discovery", action="store_true", help="Skip GitHub discovery and reuse existing API outputs")
|
||||
parser.add_argument("--check-links", action="store_true", help="Optionally HEAD check repo URLs for dead links")
|
||||
args = parser.parse_args(argv)
|
||||
|
||||
pipeline = PoCPipeline()
|
||||
scope: List[str] = []
|
||||
discovery_days = args.days
|
||||
if args.cve:
|
||||
scope = [cve.upper() for cve in args.cve]
|
||||
elif not args.skip_discovery:
|
||||
prefer_recent = True
|
||||
scan_days = args.days
|
||||
limit = args.limit
|
||||
if args.mode == "weekly":
|
||||
scan_days = max(scan_days, 30)
|
||||
discovery_days = scan_days
|
||||
prefer_recent = False
|
||||
limit = None
|
||||
scope = build_scope(scan_days, github_list=Path("github.txt"), existing_api=API_DIR / "cve", prefer_recent_years=prefer_recent, max_cves=limit)
|
||||
|
||||
results: List[Dict] = []
|
||||
if args.skip_discovery:
|
||||
results = load_existing_results(API_DIR / "cve")
|
||||
else:
|
||||
for idx, cve_id in enumerate(scope):
|
||||
try:
|
||||
results.append(pipeline.discover_for_cve(cve_id, days=discovery_days))
|
||||
except Exception as exc: # noqa: BLE001
|
||||
print(f"[warn] Failed to process {cve_id}: {exc}", file=sys.stderr)
|
||||
persist_evidence(results)
|
||||
|
||||
if not results:
|
||||
print("No results to write; aborting.")
|
||||
return 1
|
||||
|
||||
write_cve_outputs(results)
|
||||
index_payload = write_index(results)
|
||||
top_payload = write_top(results)
|
||||
|
||||
def maybe_check_links() -> List[Dict]:
|
||||
if not args.check_links:
|
||||
return []
|
||||
urls = []
|
||||
for result in results:
|
||||
for poc in result.get("pocs", []):
|
||||
if poc.get("confidence_tier") in {"high", "medium"} and poc.get("repo_url"):
|
||||
urls.append(poc["repo_url"])
|
||||
urls = urls[:25]
|
||||
dead: List[Dict] = []
|
||||
for url in urls:
|
||||
try:
|
||||
resp = requests.head(url, timeout=5, allow_redirects=True)
|
||||
if resp.status_code >= 400:
|
||||
dead.append({"url": url, "status": resp.status_code})
|
||||
except requests.RequestException as exc: # noqa: BLE001
|
||||
dead.append({"url": url, "error": str(exc)})
|
||||
return dead
|
||||
|
||||
snapshot_payload = summarise_for_snapshot(results, top=top_payload)
|
||||
prev_snapshot = load_json(API_DIR / "snapshots" / "latest.json", default={}) or {}
|
||||
snapshot_path = write_snapshot(snapshot_payload)
|
||||
diff_payload = build_diff(prev_snapshot, snapshot_payload, dead_links=maybe_check_links())
|
||||
write_diff(diff_payload)
|
||||
prune_old_snapshots()
|
||||
prune_old_diffs()
|
||||
|
||||
renderer = SiteRenderer(results=results, index_payload=index_payload, top_payload=top_payload, diff_payload=diff_payload)
|
||||
renderer.build()
|
||||
|
||||
print(f"Generated site under {DOCS_DIR}")
|
||||
print(f"Wrote latest snapshot to {snapshot_path}")
|
||||
return 0
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
raise SystemExit(main())
|
||||
210
scripts/github_client.py
Normal file
210
scripts/github_client.py
Normal file
@@ -0,0 +1,210 @@
|
||||
from __future__ import annotations
|
||||
|
||||
import json
|
||||
import os
|
||||
import time
|
||||
from dataclasses import dataclass
|
||||
from pathlib import Path
|
||||
from typing import Dict, Iterable, List, Optional, Tuple
|
||||
|
||||
import requests
|
||||
|
||||
from utils import CACHE_DIR, chunked, hash_key, isoformat
|
||||
|
||||
|
||||
TEXT_MATCH_HEADER = "application/vnd.github.text-match+json"
|
||||
|
||||
|
||||
class RateLimiter:
|
||||
def __init__(self, calls_per_minute: int) -> None:
|
||||
self.min_interval = 60.0 / max(calls_per_minute, 1)
|
||||
self.last_call: Dict[str, float] = {}
|
||||
|
||||
def wait(self, bucket: str) -> None:
|
||||
last = self.last_call.get(bucket, 0.0)
|
||||
elapsed = time.time() - last
|
||||
if elapsed < self.min_interval:
|
||||
time.sleep(self.min_interval - elapsed)
|
||||
self.last_call[bucket] = time.time()
|
||||
|
||||
|
||||
class FileCache:
|
||||
def __init__(self, base: Path) -> None:
|
||||
self.base = base
|
||||
self.base.mkdir(parents=True, exist_ok=True)
|
||||
|
||||
def _path_for(self, key: str) -> Path:
|
||||
digest = hash_key(key)
|
||||
return self.base / digest[:2] / f"{digest}.json"
|
||||
|
||||
def load(self, key: str, *, ttl: int) -> Optional[Dict]:
|
||||
path = self._path_for(key)
|
||||
if not path.exists():
|
||||
return None
|
||||
try:
|
||||
with path.open("r", encoding="utf-8") as handle:
|
||||
data = json.load(handle)
|
||||
except (OSError, json.JSONDecodeError):
|
||||
return None
|
||||
expires_at = data.get("expires_at")
|
||||
if expires_at:
|
||||
try:
|
||||
expires_ts = time.mktime(time.strptime(expires_at, "%Y-%m-%dT%H:%M:%S"))
|
||||
if time.time() > expires_ts:
|
||||
return None
|
||||
except Exception:
|
||||
return None
|
||||
return data.get("payload")
|
||||
|
||||
def save(self, key: str, payload: Dict, *, ttl: int) -> None:
|
||||
path = self._path_for(key)
|
||||
path.parent.mkdir(parents=True, exist_ok=True)
|
||||
data = {
|
||||
"fetched_at": isoformat(),
|
||||
"expires_at": time.strftime("%Y-%m-%dT%H:%M:%S", time.gmtime(time.time() + ttl)),
|
||||
"payload": payload,
|
||||
}
|
||||
with path.open("w", encoding="utf-8") as handle:
|
||||
json.dump(data, handle, ensure_ascii=False, indent=2)
|
||||
|
||||
|
||||
@dataclass
|
||||
class SearchResult:
|
||||
kind: str
|
||||
query: str
|
||||
page: int
|
||||
payload: Dict
|
||||
|
||||
|
||||
class GitHubClient:
|
||||
def __init__(
|
||||
self,
|
||||
token: Optional[str],
|
||||
*,
|
||||
cache_dir: Path | None = None,
|
||||
code_search_rpm: int = 10,
|
||||
general_rpm: int = 30,
|
||||
) -> None:
|
||||
self.session = requests.Session()
|
||||
self.session.headers.update({"Accept": TEXT_MATCH_HEADER})
|
||||
if token:
|
||||
self.session.headers["Authorization"] = f"Bearer {token}"
|
||||
self.base_url = "https://api.github.com"
|
||||
self.graphql_url = f"{self.base_url}/graphql"
|
||||
cache_root = cache_dir or CACHE_DIR / "github"
|
||||
self.cache = FileCache(cache_root)
|
||||
self.rate_limiters = {
|
||||
"code": RateLimiter(code_search_rpm),
|
||||
"search": RateLimiter(general_rpm),
|
||||
"graphql": RateLimiter(general_rpm),
|
||||
}
|
||||
|
||||
def _request(self, method: str, url: str, *, bucket: str, **kwargs) -> requests.Response:
|
||||
self.rate_limiters[bucket].wait(bucket)
|
||||
attempts = 0
|
||||
while True:
|
||||
attempts += 1
|
||||
try:
|
||||
response = self.session.request(method, url, timeout=30, **kwargs)
|
||||
except requests.RequestException:
|
||||
if attempts >= 3:
|
||||
raise
|
||||
time.sleep(2 * attempts)
|
||||
continue
|
||||
|
||||
if response.status_code == 403 and "X-RateLimit-Remaining" in response.headers:
|
||||
remaining = int(response.headers.get("X-RateLimit-Remaining") or "0")
|
||||
reset = response.headers.get("X-RateLimit-Reset")
|
||||
if remaining <= 0 and reset:
|
||||
try:
|
||||
reset_ts = int(reset)
|
||||
wait_for = max(0, reset_ts - int(time.time()) + 1)
|
||||
time.sleep(wait_for)
|
||||
continue
|
||||
except ValueError:
|
||||
pass
|
||||
if response.status_code >= 500 and attempts < 3:
|
||||
time.sleep(1 + attempts)
|
||||
continue
|
||||
response.raise_for_status()
|
||||
return response
|
||||
|
||||
def _cached_search(self, kind: str, query: str, page: int, per_page: int, ttl: int) -> Dict:
|
||||
cache_key = f"{kind}:{query}:p{page}:n{per_page}"
|
||||
cached = self.cache.load(cache_key, ttl=ttl)
|
||||
if cached is not None:
|
||||
return cached
|
||||
|
||||
url = f"{self.base_url}/search/{kind}"
|
||||
params = {"q": query, "page": page, "per_page": per_page}
|
||||
resp = self._request("GET", url, params=params, bucket="code" if kind == "code" else "search")
|
||||
payload = resp.json()
|
||||
self.cache.save(cache_key, payload, ttl=ttl)
|
||||
return payload
|
||||
|
||||
def search_repositories(self, query: str, *, page: int = 1, per_page: int = 100, ttl: int = 3600) -> SearchResult:
|
||||
return SearchResult("repositories", query, page, self._cached_search("repositories", query, page, per_page, ttl))
|
||||
|
||||
def search_code(self, query: str, *, page: int = 1, per_page: int = 100, ttl: int = 3600) -> SearchResult:
|
||||
return SearchResult("code", query, page, self._cached_search("code", query, page, per_page, ttl))
|
||||
|
||||
def search_topics(self, query: str, *, page: int = 1, per_page: int = 100, ttl: int = 3600) -> SearchResult:
|
||||
return SearchResult("repositories", query, page, self._cached_search("repositories", query, page, per_page, ttl))
|
||||
|
||||
def fetch_repo_metadata(self, full_names: Iterable[str], *, ttl: int = 6 * 3600) -> Dict[str, Dict]:
|
||||
results: Dict[str, Dict] = {}
|
||||
to_fetch: List[str] = []
|
||||
for name in full_names:
|
||||
cache_key = f"repo-meta:{name}"
|
||||
cached = self.cache.load(cache_key, ttl=ttl)
|
||||
if cached is not None:
|
||||
results[name] = cached
|
||||
else:
|
||||
to_fetch.append(name)
|
||||
|
||||
if not to_fetch:
|
||||
return results
|
||||
|
||||
fields = """
|
||||
nameWithOwner
|
||||
url
|
||||
stargazerCount
|
||||
description
|
||||
forkCount
|
||||
isFork
|
||||
isArchived
|
||||
pushedAt
|
||||
updatedAt
|
||||
primaryLanguage { name }
|
||||
parent { nameWithOwner url }
|
||||
repositoryTopics(first: 20) { nodes { topic { name } } }
|
||||
"""
|
||||
|
||||
for batch in chunked(to_fetch, 12):
|
||||
parts = []
|
||||
for idx, full_name in enumerate(batch):
|
||||
if "/" not in full_name:
|
||||
continue
|
||||
owner, name = full_name.split("/", 1)
|
||||
owner = owner.replace('"', "")
|
||||
name = name.replace('"', "")
|
||||
parts.append(f'repo_{idx}: repository(owner: "{owner}", name: "{name}") {{ {fields} }}')
|
||||
if not parts:
|
||||
continue
|
||||
query = "query { " + " ".join(parts) + " }"
|
||||
resp = self._request("POST", self.graphql_url, json={"query": query}, bucket="graphql")
|
||||
data = resp.json()
|
||||
repos = data.get("data", {})
|
||||
for idx, full_name in enumerate(batch):
|
||||
key = f"repo_{idx}"
|
||||
meta = repos.get(key) or {}
|
||||
cache_key = f"repo-meta:{full_name}"
|
||||
self.cache.save(cache_key, meta, ttl=ttl)
|
||||
results[full_name] = meta
|
||||
|
||||
return results
|
||||
|
||||
|
||||
def build_client(token_env: str = "GITHUB_TOKEN") -> GitHubClient:
|
||||
token = os.environ.get(token_env)
|
||||
return GitHubClient(token, cache_dir=CACHE_DIR / "github")
|
||||
220
scripts/pipeline_outputs.py
Normal file
220
scripts/pipeline_outputs.py
Normal file
@@ -0,0 +1,220 @@
|
||||
from __future__ import annotations
|
||||
|
||||
from collections import Counter
|
||||
from datetime import datetime, timedelta
|
||||
from pathlib import Path
|
||||
from typing import Dict, Iterable, List, Tuple
|
||||
|
||||
from utils import API_DIR, DIFFS_DIR, SNAPSHOT_DIR, TOP_DIR, ensure_dirs, load_json, save_json, today_str
|
||||
|
||||
|
||||
def write_cve_outputs(results: List[Dict], *, base_dir: Path | None = None) -> None:
|
||||
target_dir = base_dir or API_DIR / "cve"
|
||||
ensure_dirs(target_dir)
|
||||
for result in results:
|
||||
last_updated = result.get("last_updated") or today_str()
|
||||
output = {
|
||||
"cve_id": result["cve_id"],
|
||||
"last_updated": last_updated,
|
||||
"pocs": [
|
||||
{
|
||||
"repo_full_name": poc.get("repo_full_name"),
|
||||
"repo_url": poc.get("repo_url"),
|
||||
"is_fork": poc.get("is_fork"),
|
||||
"parent_repo_url": poc.get("parent_repo_url"),
|
||||
"stars": poc.get("stars"),
|
||||
"forks": poc.get("forks"),
|
||||
"archived": poc.get("archived"),
|
||||
"pushed_at": poc.get("pushed_at") or poc.get("updated_at"),
|
||||
"topics": poc.get("topics", []),
|
||||
"primary_language": poc.get("primary_language"),
|
||||
"matches": poc.get("matches", []),
|
||||
"confidence_score": poc.get("confidence_score"),
|
||||
"confidence_tier": poc.get("confidence_tier"),
|
||||
}
|
||||
for poc in result.get("pocs", [])
|
||||
],
|
||||
}
|
||||
save_json(target_dir / f"{result['cve_id']}.json", output)
|
||||
|
||||
|
||||
def build_index(results: List[Dict]) -> Dict:
|
||||
items: List[Dict] = []
|
||||
for result in results:
|
||||
poc_entries = result.get("pocs", [])
|
||||
high = [p for p in poc_entries if p.get("confidence_tier") == "high"]
|
||||
medium = [p for p in poc_entries if p.get("confidence_tier") == "medium"]
|
||||
langs = Counter()
|
||||
max_score = 0.0
|
||||
for poc in poc_entries:
|
||||
lang = poc.get("primary_language")
|
||||
if lang:
|
||||
langs[lang] += 1
|
||||
max_score = max(max_score, poc.get("confidence_score") or 0)
|
||||
items.append(
|
||||
{
|
||||
"cve_id": result["cve_id"],
|
||||
"poc_count": len(poc_entries),
|
||||
"high_confidence": len(high),
|
||||
"medium_confidence": len(medium),
|
||||
"top_languages": [lang for lang, _ in langs.most_common(3)],
|
||||
"max_score": max_score,
|
||||
"last_updated": result.get("last_updated"),
|
||||
}
|
||||
)
|
||||
return {"generated": today_str(), "items": sorted(items, key=lambda r: r["cve_id"], reverse=True)}
|
||||
|
||||
|
||||
def write_index(results: List[Dict]) -> Dict:
|
||||
ensure_dirs(API_DIR)
|
||||
payload = build_index(results)
|
||||
save_json(API_DIR / "index.json", payload)
|
||||
return payload
|
||||
|
||||
|
||||
def write_top(results: List[Dict], *, limit: int = 100) -> Dict:
|
||||
ensure_dirs(TOP_DIR)
|
||||
entries: List[Dict] = []
|
||||
for result in results:
|
||||
for poc in result.get("pocs", []):
|
||||
if poc.get("confidence_tier") not in {"high", "medium"}:
|
||||
continue
|
||||
entries.append(
|
||||
{
|
||||
"cve_id": result["cve_id"],
|
||||
"repo_full_name": poc.get("repo_full_name"),
|
||||
"repo_url": poc.get("repo_url"),
|
||||
"score": poc.get("confidence_score"),
|
||||
"tier": poc.get("confidence_tier"),
|
||||
"stars": poc.get("stars"),
|
||||
"primary_language": poc.get("primary_language"),
|
||||
}
|
||||
)
|
||||
entries.sort(key=lambda e: (-(e.get("score") or 0), -(e.get("stars") or 0)))
|
||||
payload = {"generated": today_str(), "items": entries[:limit]}
|
||||
save_json(TOP_DIR / "today.json", payload)
|
||||
return payload
|
||||
|
||||
|
||||
def summarise_for_snapshot(results: List[Dict], *, top: Dict | None = None) -> Dict:
|
||||
summary: Dict[str, Dict[str, Dict]] = {}
|
||||
for result in results:
|
||||
repo_map: Dict[str, Dict] = {}
|
||||
for poc in result.get("pocs", []):
|
||||
repo_map[poc.get("repo_full_name")] = {
|
||||
"score": poc.get("confidence_score"),
|
||||
"tier": poc.get("confidence_tier"),
|
||||
}
|
||||
summary[result["cve_id"]] = repo_map
|
||||
payload = {"generated": today_str(), "entries": summary}
|
||||
if top:
|
||||
payload["top"] = top
|
||||
return payload
|
||||
|
||||
|
||||
def write_snapshot(summary: Dict) -> Path:
|
||||
ensure_dirs(SNAPSHOT_DIR)
|
||||
target = SNAPSHOT_DIR / f"{summary['generated']}.json"
|
||||
save_json(target, summary)
|
||||
save_json(SNAPSHOT_DIR / "latest.json", summary)
|
||||
return target
|
||||
|
||||
|
||||
def prune_old_snapshots(days: int = 14) -> None:
|
||||
if not SNAPSHOT_DIR.exists():
|
||||
return
|
||||
cutoff = datetime.utcnow().date() - timedelta(days=days)
|
||||
for snap in SNAPSHOT_DIR.glob("*.json"):
|
||||
try:
|
||||
snap_date = datetime.strptime(snap.stem, "%Y-%m-%d").date()
|
||||
except ValueError:
|
||||
continue
|
||||
if snap_date < cutoff:
|
||||
snap.unlink(missing_ok=True)
|
||||
|
||||
|
||||
def prune_old_diffs(days: int = 14) -> None:
|
||||
if not DIFFS_DIR.exists():
|
||||
return
|
||||
cutoff = datetime.now().date() - timedelta(days=days)
|
||||
for diff in DIFFS_DIR.glob("*.json"):
|
||||
try:
|
||||
diff_date = datetime.strptime(diff.stem, "%Y-%m-%d").date()
|
||||
except ValueError:
|
||||
continue
|
||||
if diff_date < cutoff:
|
||||
diff.unlink(missing_ok=True)
|
||||
|
||||
|
||||
def _load_snapshot(path: Path) -> Dict:
|
||||
return load_json(path, default={}) or {}
|
||||
|
||||
|
||||
def build_diff(prev: Dict, curr: Dict, *, dead_links: List[Dict] | None = None) -> Dict:
|
||||
prev_entries = prev.get("entries", {})
|
||||
curr_entries = curr.get("entries", {})
|
||||
|
||||
new_high: List[Dict] = []
|
||||
promoted: List[Dict] = []
|
||||
demoted: List[Dict] = []
|
||||
|
||||
for cve_id, repos in curr_entries.items():
|
||||
for repo_name, info in repos.items():
|
||||
tier = info.get("tier")
|
||||
if tier != "high":
|
||||
continue
|
||||
prev_info = (prev_entries.get(cve_id) or {}).get(repo_name)
|
||||
if not prev_info:
|
||||
new_high.append({"cve_id": cve_id, "repo_full_name": repo_name, "score": info.get("score")})
|
||||
elif prev_info.get("tier") != "high":
|
||||
promoted.append(
|
||||
{
|
||||
"cve_id": cve_id,
|
||||
"repo_full_name": repo_name,
|
||||
"score": info.get("score"),
|
||||
"previous_tier": prev_info.get("tier"),
|
||||
}
|
||||
)
|
||||
|
||||
for cve_id, repos in prev_entries.items():
|
||||
for repo_name, info in repos.items():
|
||||
if info.get("tier") != "high":
|
||||
continue
|
||||
curr_info = (curr_entries.get(cve_id) or {}).get(repo_name)
|
||||
if not curr_info or curr_info.get("tier") != "high":
|
||||
demoted.append(
|
||||
{
|
||||
"cve_id": cve_id,
|
||||
"repo_full_name": repo_name,
|
||||
"previous_score": info.get("score"),
|
||||
"previous_tier": info.get("tier"),
|
||||
"current_tier": curr_info.get("tier") if curr_info else None,
|
||||
}
|
||||
)
|
||||
|
||||
return {
|
||||
"generated": curr.get("generated"),
|
||||
"new_high_conf_pocs": new_high,
|
||||
"promoted_to_high": promoted,
|
||||
"demoted_or_removed": demoted,
|
||||
"dead_links": dead_links or [],
|
||||
}
|
||||
|
||||
|
||||
def write_diff(diff: Dict) -> Path:
|
||||
ensure_dirs(DIFFS_DIR)
|
||||
target = DIFFS_DIR / f"{diff['generated']}.json"
|
||||
save_json(target, diff)
|
||||
save_json(DIFFS_DIR / "latest.json", diff)
|
||||
return target
|
||||
|
||||
|
||||
def latest_snapshots() -> Tuple[Dict, Dict]:
|
||||
if not SNAPSHOT_DIR.exists():
|
||||
return {}, {}
|
||||
snaps = sorted(SNAPSHOT_DIR.glob("*.json"))
|
||||
if not snaps:
|
||||
return {}, {}
|
||||
curr = _load_snapshot(snaps[-1])
|
||||
prev = _load_snapshot(snaps[-2]) if len(snaps) > 1 else {}
|
||||
return prev, curr
|
||||
274
scripts/poc_pipeline.py
Normal file
274
scripts/poc_pipeline.py
Normal file
@@ -0,0 +1,274 @@
|
||||
from __future__ import annotations
|
||||
|
||||
import re
|
||||
from dataclasses import dataclass, field
|
||||
from datetime import date, datetime, timedelta
|
||||
from pathlib import Path
|
||||
from typing import Dict, Iterable, List, Optional, Set, Tuple
|
||||
|
||||
from github_client import GitHubClient, SearchResult, build_client
|
||||
from poc_scoring import match_score, score_repo
|
||||
from utils import API_DIR, EVIDENCE_DIR, chunked, cve_year, ensure_dirs, isoformat, load_blacklist, load_json, save_json, today_str
|
||||
|
||||
|
||||
LANG_PARTITIONS = ("python", "go", "c", "shell", "powershell", "java", "ruby", "js")
|
||||
CVE_RE = re.compile(r"CVE-\d{4}-\d{4,}", re.IGNORECASE)
|
||||
|
||||
|
||||
@dataclass
|
||||
class MatchEvidence:
|
||||
path: str
|
||||
match_type: str
|
||||
query: str
|
||||
score: float | None = None
|
||||
|
||||
|
||||
@dataclass
|
||||
class RepoCandidate:
|
||||
cve_id: str
|
||||
repo_full_name: str
|
||||
repo_url: str
|
||||
matches: List[MatchEvidence] = field(default_factory=list)
|
||||
metadata: Dict[str, object] = field(default_factory=dict)
|
||||
|
||||
def add_match(self, path: str, match_type: str, query: str) -> None:
|
||||
key = (path, match_type)
|
||||
existing = {(m.path, m.match_type) for m in self.matches}
|
||||
if key in existing:
|
||||
return
|
||||
self.matches.append(MatchEvidence(path=path, match_type=match_type, query=query))
|
||||
|
||||
|
||||
def build_created_ranges(days: int, *, window: int = 7) -> List[Tuple[str, str]]:
|
||||
end = date.today()
|
||||
start = end - timedelta(days=max(days, 1))
|
||||
ranges: List[Tuple[str, str]] = []
|
||||
cursor = start
|
||||
while cursor <= end:
|
||||
window_end = min(cursor + timedelta(days=window - 1), end)
|
||||
ranges.append((cursor.isoformat(), window_end.isoformat()))
|
||||
cursor = window_end + timedelta(days=1)
|
||||
return ranges or [(start.isoformat(), end.isoformat())]
|
||||
|
||||
|
||||
def build_query_pack(cve_id: str, created_range: Tuple[str, str] | None = None) -> List[Dict[str, str]]:
|
||||
base_repo = f'{cve_id} in:name,description,readme fork:false'
|
||||
enriched_repo = f'{cve_id} (poc OR exploit) in:name,description,readme fork:false'
|
||||
topic_query = f"topic:{cve_id.lower()} fork:false"
|
||||
created_suffix = ""
|
||||
if created_range:
|
||||
created_suffix = f" created:{created_range[0]}..{created_range[1]}"
|
||||
|
||||
queries = [
|
||||
{"kind": "repositories", "query": base_repo + created_suffix, "match_type": "name"},
|
||||
{"kind": "repositories", "query": enriched_repo + created_suffix, "match_type": "description"},
|
||||
{"kind": "repositories", "query": topic_query + created_suffix, "match_type": "topic"},
|
||||
]
|
||||
|
||||
for lang in LANG_PARTITIONS:
|
||||
base_code = f'{cve_id} in:file language:{lang}{created_suffix}'
|
||||
queries.append({"kind": "code", "query": base_code, "match_type": "code"})
|
||||
|
||||
# generic code search without language partition for the most recent window
|
||||
queries.append({"kind": "code", "query": f"{cve_id} in:file{created_suffix}", "match_type": "code"})
|
||||
return queries
|
||||
|
||||
|
||||
def parse_repo_from_item(item: Dict) -> Tuple[str | None, str | None]:
|
||||
repo_full_name = item.get("full_name") or item.get("repository", {}).get("full_name")
|
||||
repo_url = item.get("html_url") or item.get("repository", {}).get("html_url")
|
||||
if not repo_full_name and "repository" in item:
|
||||
repo_full_name = item["repository"].get("owner", {}).get("login", "")
|
||||
if repo_full_name:
|
||||
repo_full_name = f"{repo_full_name}/{item['repository'].get('name', '')}"
|
||||
return repo_full_name, repo_url
|
||||
|
||||
|
||||
def extract_matches(item: Dict, default_type: str, query: str) -> List[MatchEvidence]:
|
||||
matches: List[MatchEvidence] = []
|
||||
for text_match in item.get("text_matches", []) or []:
|
||||
prop = text_match.get("property") or text_match.get("object_type") or ""
|
||||
fragment = text_match.get("fragment") or text_match.get("path") or prop or ""
|
||||
match_type = prop if prop else default_type
|
||||
matches.append(MatchEvidence(path=str(fragment), match_type=str(match_type), query=query))
|
||||
if not matches:
|
||||
path = item.get("path") or default_type
|
||||
matches.append(MatchEvidence(path=str(path), match_type=default_type, query=query))
|
||||
return matches
|
||||
|
||||
|
||||
def normalise_metadata(meta: Dict, fallback_full_name: str, fallback_url: str) -> Dict:
|
||||
topics = []
|
||||
if meta.get("repositoryTopics"):
|
||||
for node in meta["repositoryTopics"].get("nodes", []):
|
||||
topic = (node.get("topic") or {}).get("name")
|
||||
if topic:
|
||||
topics.append(topic)
|
||||
primary_language = None
|
||||
if meta.get("primaryLanguage"):
|
||||
primary_language = meta["primaryLanguage"].get("name")
|
||||
parent = meta.get("parent") or {}
|
||||
return {
|
||||
"repo_full_name": meta.get("nameWithOwner") or fallback_full_name,
|
||||
"repo_url": meta.get("url") or fallback_url,
|
||||
"description": meta.get("description") or "",
|
||||
"is_fork": bool(meta.get("isFork")),
|
||||
"parent_repo_url": parent.get("url"),
|
||||
"stars": meta.get("stargazerCount") or 0,
|
||||
"forks": meta.get("forkCount") or 0,
|
||||
"archived": bool(meta.get("isArchived")),
|
||||
"pushed_at": meta.get("pushedAt"),
|
||||
"updated_at": meta.get("updatedAt"),
|
||||
"topics": topics,
|
||||
"primary_language": primary_language,
|
||||
}
|
||||
|
||||
|
||||
class PoCPipeline:
|
||||
def __init__(
|
||||
self,
|
||||
client: GitHubClient | None = None,
|
||||
*,
|
||||
blacklist_path: Path | None = None,
|
||||
search_ttl: int = 3 * 3600,
|
||||
) -> None:
|
||||
self.client = client or build_client()
|
||||
self.blacklist = load_blacklist(blacklist_path)
|
||||
self.search_ttl = search_ttl
|
||||
|
||||
def _run_query(self, query: Dict, page: int) -> SearchResult:
|
||||
if query["kind"] == "repositories":
|
||||
return self.client.search_repositories(query["query"], page=page, per_page=50, ttl=self.search_ttl)
|
||||
if query["kind"] == "code":
|
||||
return self.client.search_code(query["query"], page=page, per_page=50, ttl=self.search_ttl)
|
||||
return self.client.search_topics(query["query"], page=page, per_page=50, ttl=self.search_ttl)
|
||||
|
||||
def discover_for_cve(self, cve_id: str, *, days: int, max_pages_repo: int = 2, max_pages_code: int = 2) -> Dict:
|
||||
ranges = build_created_ranges(days)
|
||||
candidates: Dict[str, RepoCandidate] = {}
|
||||
query_log: List[Dict] = []
|
||||
|
||||
for created_range in ranges:
|
||||
query_pack = build_query_pack(cve_id, created_range)
|
||||
for query in query_pack:
|
||||
query_log.append({"query": query["query"], "kind": query["kind"], "window": created_range})
|
||||
page_limit = max_pages_code if query["kind"] == "code" else max_pages_repo
|
||||
for page in range(1, page_limit + 1):
|
||||
result = self._run_query(query, page)
|
||||
items = result.payload.get("items", [])
|
||||
for item in items:
|
||||
repo_full_name, repo_url = parse_repo_from_item(item)
|
||||
if not repo_full_name or not repo_url:
|
||||
continue
|
||||
candidate = candidates.setdefault(
|
||||
repo_full_name,
|
||||
RepoCandidate(cve_id=cve_id, repo_full_name=repo_full_name, repo_url=repo_url),
|
||||
)
|
||||
for match in extract_matches(item, query["match_type"], query["query"]):
|
||||
candidate.add_match(match.path, match.match_type, match.query)
|
||||
if len(items) < 50:
|
||||
break
|
||||
|
||||
metadata = self.client.fetch_repo_metadata(candidates.keys())
|
||||
for repo_full_name, candidate in candidates.items():
|
||||
meta = metadata.get(repo_full_name, {})
|
||||
candidate.metadata = normalise_metadata(meta, repo_full_name, candidate.repo_url)
|
||||
|
||||
repos: List[Dict] = []
|
||||
for candidate in candidates.values():
|
||||
matches_dicts = []
|
||||
for m in candidate.matches:
|
||||
m.score = match_score({"path": m.path, "match_type": m.match_type})
|
||||
matches_dicts.append({"path": m.path, "match_type": m.match_type, "query": m.query, "score": m.score})
|
||||
score, tier = score_repo(candidate.metadata, matches_dicts, self.blacklist)
|
||||
repo_entry = {
|
||||
**candidate.metadata,
|
||||
"matches": matches_dicts,
|
||||
"confidence_score": score,
|
||||
"confidence_tier": tier,
|
||||
"cve_id": cve_id,
|
||||
}
|
||||
repos.append(repo_entry)
|
||||
|
||||
repos.sort(key=lambda r: (-r["confidence_score"], -r.get("stars", 0)))
|
||||
|
||||
evidence = {
|
||||
"queries": query_log,
|
||||
"candidates": [
|
||||
{
|
||||
"repo_full_name": r["repo_full_name"],
|
||||
"matches": r["matches"],
|
||||
"match_count": len(r["matches"]),
|
||||
"score": r["confidence_score"],
|
||||
"tier": r["confidence_tier"],
|
||||
}
|
||||
for r in repos
|
||||
],
|
||||
}
|
||||
return {"cve_id": cve_id, "last_updated": isoformat(), "pocs": repos, "evidence": evidence}
|
||||
|
||||
def discover_many(self, cve_ids: Iterable[str], *, days: int, limit: Optional[int] = None) -> List[Dict]:
|
||||
results: List[Dict] = []
|
||||
for idx, cve_id in enumerate(cve_ids):
|
||||
if limit and idx >= limit:
|
||||
break
|
||||
results.append(self.discover_for_cve(cve_id, days=days))
|
||||
return results
|
||||
|
||||
|
||||
def persist_evidence(results: List[Dict]) -> None:
|
||||
ensure_dirs(EVIDENCE_DIR)
|
||||
for result in results:
|
||||
cve_id = result["cve_id"]
|
||||
evidence_path = EVIDENCE_DIR / f"{cve_id}.json"
|
||||
save_json(evidence_path, result.get("evidence", {}))
|
||||
|
||||
|
||||
def discover_from_github_list(path: Path) -> List[str]:
|
||||
if not path.exists():
|
||||
return []
|
||||
ids: List[str] = []
|
||||
for line in path.read_text(encoding="utf-8").splitlines():
|
||||
matches = CVE_RE.findall(line)
|
||||
for match in matches:
|
||||
if match.upper() not in ids:
|
||||
ids.append(match.upper())
|
||||
return ids
|
||||
|
||||
|
||||
def load_existing_cves(api_dir: Path = API_DIR / "cve") -> List[str]:
|
||||
if not api_dir.exists():
|
||||
return []
|
||||
return sorted({p.stem.upper() for p in api_dir.glob("CVE-*.json") if CVE_RE.match(p.stem)})
|
||||
|
||||
|
||||
def build_scope(
|
||||
days: int,
|
||||
*,
|
||||
github_list: Path,
|
||||
existing_api: Path,
|
||||
prefer_recent_years: bool = True,
|
||||
max_cves: int | None = None,
|
||||
low_conf_threshold: int = 1,
|
||||
) -> List[str]:
|
||||
seeds = discover_from_github_list(github_list)
|
||||
existing = load_existing_cves(existing_api)
|
||||
candidates = seeds or existing
|
||||
|
||||
if prefer_recent_years:
|
||||
current_year = date.today().year
|
||||
candidates = [cve for cve in candidates if cve_year(cve) and cve_year(cve) >= current_year - 2] or candidates
|
||||
|
||||
index_path = API_DIR / "index.json"
|
||||
low_conf: List[str] = []
|
||||
if index_path.exists():
|
||||
index_payload = load_json(index_path, default={}) or {}
|
||||
for item in index_payload.get("items", []):
|
||||
score = (item.get("high_confidence", 0) or 0) + (item.get("medium_confidence", 0) or 0)
|
||||
if score <= low_conf_threshold:
|
||||
low_conf.append(item.get("cve_id"))
|
||||
|
||||
scoped = candidates + [cve for cve in low_conf if cve and cve not in candidates]
|
||||
if max_cves:
|
||||
scoped = scoped[:max_cves]
|
||||
return scoped
|
||||
121
scripts/poc_scoring.py
Normal file
121
scripts/poc_scoring.py
Normal file
@@ -0,0 +1,121 @@
|
||||
from __future__ import annotations
|
||||
|
||||
import re
|
||||
from datetime import datetime, timedelta, timezone
|
||||
from typing import Dict, Iterable, List, Tuple
|
||||
|
||||
from utils import clamp, parse_date
|
||||
|
||||
DOC_EXTS = {"md", "txt", "rst", "adoc", "markdown", "mkd", "mdown"}
|
||||
POSITIVE_KEYWORDS = ("poc", "exploit", "rce", "lpe", "auth bypass", "bypass")
|
||||
NEGATIVE_KEYWORDS = ("report", "writeup", "advisory", "changelog")
|
||||
|
||||
|
||||
def is_doc_path(path: str) -> bool:
|
||||
lower = path.lower()
|
||||
if lower.endswith("/"):
|
||||
return True
|
||||
if "." not in lower:
|
||||
return False
|
||||
ext = lower.rsplit(".", 1)[-1]
|
||||
return ext in DOC_EXTS
|
||||
|
||||
|
||||
def match_score(match: Dict) -> float:
|
||||
path = str(match.get("path", ""))
|
||||
match_type = str(match.get("match_type", "")).lower()
|
||||
base = 50 if not is_doc_path(path) else 30
|
||||
if match_type in ("code",):
|
||||
base += 10
|
||||
if "readme" in match_type:
|
||||
base += 5
|
||||
if "topic" in match_type:
|
||||
base -= 5
|
||||
return clamp(base, 0, 100)
|
||||
|
||||
|
||||
def tier_for_score(score: float) -> str:
|
||||
if score >= 75:
|
||||
return "high"
|
||||
if score >= 45:
|
||||
return "medium"
|
||||
return "low"
|
||||
|
||||
|
||||
def keyword_hits(text: str, keywords: Iterable[str]) -> int:
|
||||
if not text:
|
||||
return 0
|
||||
lower = text.lower()
|
||||
return sum(1 for kw in keywords if kw in lower)
|
||||
|
||||
|
||||
def recency_bonus(pushed_at: str | None) -> float:
|
||||
if not pushed_at:
|
||||
return 0.0
|
||||
dt = parse_date(pushed_at)
|
||||
if not dt:
|
||||
return 0.0
|
||||
if dt.tzinfo is None:
|
||||
dt = dt.replace(tzinfo=timezone.utc)
|
||||
delta = datetime.now(timezone.utc) - dt
|
||||
if delta <= timedelta(days=30):
|
||||
return 18.0
|
||||
if delta <= timedelta(days=90):
|
||||
return 10.0
|
||||
if delta <= timedelta(days=180):
|
||||
return 5.0
|
||||
return 0.0
|
||||
|
||||
|
||||
def score_repo(repo: Dict, matches: List[Dict], blacklist: List[str]) -> Tuple[float, str]:
|
||||
stars = repo.get("stargazerCount") or repo.get("stars") or 0
|
||||
forks = repo.get("forkCount") or repo.get("forks") or 0
|
||||
is_fork = bool(repo.get("isFork"))
|
||||
archived = bool(repo.get("isArchived"))
|
||||
topics = [t.lower() for t in repo.get("topics", []) if t]
|
||||
name = str(repo.get("nameWithOwner") or repo.get("repo_full_name") or "").lower()
|
||||
description = str(repo.get("description") or "").lower()
|
||||
|
||||
non_doc_matches = [m for m in matches if not is_doc_path(str(m.get("path", "")))]
|
||||
doc_matches = [m for m in matches if is_doc_path(str(m.get("path", "")))]
|
||||
|
||||
score = 12.0
|
||||
if non_doc_matches:
|
||||
score += 25 + min(len(non_doc_matches) * 2, 10)
|
||||
if doc_matches and not non_doc_matches:
|
||||
score -= 20
|
||||
|
||||
score += recency_bonus(repo.get("pushed_at") or repo.get("pushedAt") or repo.get("updated_at"))
|
||||
|
||||
score += min(stars / 50.0, 25.0)
|
||||
score += min(forks / 200.0, 5.0)
|
||||
|
||||
score += keyword_hits(description, POSITIVE_KEYWORDS) * 4.0
|
||||
score += keyword_hits(" ".join(topics), POSITIVE_KEYWORDS) * 4.0
|
||||
|
||||
negative_bias = keyword_hits(description, NEGATIVE_KEYWORDS)
|
||||
if negative_bias and not non_doc_matches:
|
||||
score -= 15
|
||||
|
||||
if is_fork:
|
||||
score -= 12
|
||||
if archived:
|
||||
score -= 30
|
||||
|
||||
lowered_blacklist = [entry.lower() for entry in blacklist]
|
||||
for forbidden in lowered_blacklist:
|
||||
if not forbidden:
|
||||
continue
|
||||
if forbidden.endswith("*"):
|
||||
prefix = forbidden[:-1]
|
||||
if prefix and name.startswith(prefix):
|
||||
score -= 40
|
||||
break
|
||||
elif forbidden in name:
|
||||
score -= 40
|
||||
break
|
||||
|
||||
for match in matches:
|
||||
score += match_score(match) / 25.0
|
||||
|
||||
return clamp(score, 0, 100), tier_for_score(score)
|
||||
99
scripts/site_renderer.py
Normal file
99
scripts/site_renderer.py
Normal file
@@ -0,0 +1,99 @@
|
||||
from __future__ import annotations
|
||||
|
||||
from pathlib import Path
|
||||
from typing import Dict, List
|
||||
|
||||
from jinja2 import Environment, FileSystemLoader, select_autoescape
|
||||
|
||||
from utils import DOCS_DIR, TEMPLATES_DIR, ensure_dirs
|
||||
|
||||
|
||||
def build_env() -> Environment:
|
||||
loader = FileSystemLoader(str(TEMPLATES_DIR))
|
||||
env = Environment(loader=loader, autoescape=select_autoescape(["html", "xml"]))
|
||||
env.trim_blocks = True
|
||||
env.lstrip_blocks = True
|
||||
return env
|
||||
|
||||
|
||||
class SiteRenderer:
|
||||
def __init__(
|
||||
self,
|
||||
*,
|
||||
results: List[Dict],
|
||||
index_payload: Dict,
|
||||
top_payload: Dict,
|
||||
diff_payload: Dict | None = None,
|
||||
) -> None:
|
||||
self.results = []
|
||||
for result in results:
|
||||
visible = [p for p in result.get("pocs", []) if p.get("confidence_tier") in {"high", "medium"}]
|
||||
if not visible:
|
||||
visible = result.get("pocs", [])
|
||||
self.results.append({**result, "visible_pocs": visible})
|
||||
self.index_payload = index_payload
|
||||
self.top_payload = top_payload
|
||||
self.diff_payload = diff_payload or {}
|
||||
self.env = build_env()
|
||||
ensure_dirs(
|
||||
DOCS_DIR,
|
||||
DOCS_DIR / "pocs",
|
||||
DOCS_DIR / "cve",
|
||||
DOCS_DIR / "diffs",
|
||||
DOCS_DIR / "assets",
|
||||
)
|
||||
|
||||
def render(self, template_name: str, context: Dict, target: Path) -> None:
|
||||
html = self.env.get_template(template_name).render(**context)
|
||||
target.parent.mkdir(parents=True, exist_ok=True)
|
||||
target.write_text(html, encoding="utf-8")
|
||||
|
||||
def build(self) -> None:
|
||||
generated = self.index_payload.get("generated")
|
||||
summary = {
|
||||
"generated": generated,
|
||||
"total_cves": len(self.index_payload.get("items", [])),
|
||||
"total_pocs": sum(item.get("poc_count", 0) for item in self.index_payload.get("items", [])),
|
||||
"high_total": sum(item.get("high_confidence", 0) for item in self.index_payload.get("items", [])),
|
||||
"medium_total": sum(item.get("medium_confidence", 0) for item in self.index_payload.get("items", [])),
|
||||
}
|
||||
self.render(
|
||||
"pipeline_index.html",
|
||||
{
|
||||
"summary": summary,
|
||||
"top": self.top_payload.get("items", [])[:25],
|
||||
"diff": self.diff_payload or {},
|
||||
},
|
||||
DOCS_DIR / "index.html",
|
||||
)
|
||||
|
||||
self.render(
|
||||
"pipeline_pocs.html",
|
||||
{
|
||||
"generated": generated,
|
||||
"index": self.index_payload.get("items", []),
|
||||
"top": self.top_payload.get("items", [])[:100],
|
||||
},
|
||||
DOCS_DIR / "pocs" / "index.html",
|
||||
)
|
||||
|
||||
for result in self.results:
|
||||
self.render(
|
||||
"pipeline_cve.html",
|
||||
{"cve": result, "generated": generated},
|
||||
DOCS_DIR / "cve" / f"{result['cve_id']}.html",
|
||||
)
|
||||
|
||||
if self.diff_payload:
|
||||
diff_date = self.diff_payload.get("generated")
|
||||
self.render(
|
||||
"pipeline_diff.html",
|
||||
{"diff": self.diff_payload, "generated": generated},
|
||||
DOCS_DIR / "diffs" / "index.html",
|
||||
)
|
||||
if diff_date:
|
||||
self.render(
|
||||
"pipeline_diff.html",
|
||||
{"diff": self.diff_payload, "generated": generated},
|
||||
DOCS_DIR / "diffs" / f"{diff_date}.html",
|
||||
)
|
||||
135
scripts/utils.py
135
scripts/utils.py
@@ -13,8 +13,13 @@ DATA_DIR = ROOT / "data"
|
||||
DOCS_DIR = ROOT / "docs"
|
||||
API_DIR = DOCS_DIR / "api" / "v1"
|
||||
SNAPSHOT_DIR = API_DIR / "snapshots"
|
||||
DIFFS_DIR = API_DIR / "diffs"
|
||||
TOP_DIR = API_DIR / "top"
|
||||
TEMPLATES_DIR = ROOT / "templates"
|
||||
ASSETS_DIR = DOCS_DIR / "assets"
|
||||
CACHE_DIR = DATA_DIR / "cache"
|
||||
STATE_DIR = DATA_DIR / "state"
|
||||
EVIDENCE_DIR = DATA_DIR / "evidence"
|
||||
|
||||
|
||||
def ensure_dirs(*paths: Path) -> None:
|
||||
@@ -45,6 +50,21 @@ def today_str() -> str:
|
||||
return datetime.now(timezone.utc).date().isoformat()
|
||||
|
||||
|
||||
def now_utc() -> datetime:
|
||||
return datetime.now(timezone.utc)
|
||||
|
||||
|
||||
def isoformat(dt: datetime | None = None) -> str:
|
||||
return (dt or now_utc()).isoformat()
|
||||
|
||||
|
||||
def parse_date(value: str) -> datetime | None:
|
||||
try:
|
||||
return datetime.fromisoformat(value.replace("Z", "+00:00"))
|
||||
except ValueError:
|
||||
return None
|
||||
|
||||
|
||||
def slugify(text: str) -> str:
|
||||
cleaned = re.sub(r"[^A-Za-z0-9]+", "-", text.strip().lower())
|
||||
cleaned = cleaned.strip("-")
|
||||
@@ -79,6 +99,7 @@ CVE_SECTION_RE = re.compile(r"^CVE-\d{4}-\d{4,}$", re.IGNORECASE)
|
||||
def load_poc_index() -> Dict[str, Dict[str, object]]:
|
||||
"""Load CVE → {desc, poc} mapping from docs/CVE_list.json or markdown files."""
|
||||
cve_json = DOCS_DIR / "CVE_list.json"
|
||||
blacklist = load_blacklist()
|
||||
if cve_json.exists():
|
||||
data = load_json(cve_json, default=[]) or []
|
||||
mapping = {}
|
||||
@@ -86,32 +107,35 @@ def load_poc_index() -> Dict[str, Dict[str, object]]:
|
||||
cve = str(entry.get("cve", "")).upper()
|
||||
if not is_valid_cve(cve):
|
||||
continue
|
||||
poc_links = stable_unique(entry.get("poc", []) or [])
|
||||
poc_links = filter_links_by_blacklist(poc_links, blacklist)
|
||||
mapping[cve] = {
|
||||
"desc": entry.get("desc", ""),
|
||||
"poc": stable_unique(entry.get("poc", []) or []),
|
||||
"poc": poc_links,
|
||||
}
|
||||
return mapping
|
||||
|
||||
return build_poc_index_from_markdown()
|
||||
return build_poc_index_from_markdown(blacklist=blacklist)
|
||||
|
||||
|
||||
def build_poc_index_from_markdown() -> Dict[str, Dict[str, object]]:
|
||||
def build_poc_index_from_markdown(*, blacklist: Optional[List[str]] = None) -> Dict[str, Dict[str, object]]:
|
||||
mapping: Dict[str, Dict[str, object]] = {}
|
||||
for md_path in sorted(ROOT.glob("[12][0-9][0-9][0-9]/CVE-*.md")):
|
||||
cve = md_path.stem.upper()
|
||||
if not is_valid_cve(cve):
|
||||
continue
|
||||
desc, poc_links = parse_cve_markdown(md_path)
|
||||
desc, poc_links = parse_cve_markdown(md_path, blacklist=blacklist)
|
||||
mapping[cve] = {"desc": desc, "poc": poc_links}
|
||||
return mapping
|
||||
|
||||
|
||||
def parse_cve_markdown(path: Path) -> Tuple[str, List[str]]:
|
||||
def parse_cve_markdown(path: Path, *, blacklist: Optional[List[str]] = None) -> Tuple[str, List[str]]:
|
||||
text = path.read_text(encoding="utf-8")
|
||||
sections = parse_sections(text)
|
||||
description = normalise_block(sections.get("### Description", ""))
|
||||
references = collect_links(sections.get("#### Reference", ""))
|
||||
github_links = collect_links(sections.get("#### Github", ""))
|
||||
blacklist = blacklist or []
|
||||
references = collect_links(sections.get("#### Reference", ""), blacklist=blacklist)
|
||||
github_links = collect_links(sections.get("#### Github", ""), blacklist=blacklist)
|
||||
poc_links = stable_unique([*references, *github_links])
|
||||
return description, poc_links
|
||||
|
||||
@@ -144,7 +168,7 @@ def parse_sections(content: str) -> Dict[str, str]:
|
||||
return sections
|
||||
|
||||
|
||||
def collect_links(block: str) -> List[str]:
|
||||
def collect_links(block: str, *, blacklist: Optional[List[str]] = None) -> List[str]:
|
||||
links: List[str] = []
|
||||
for raw in block.splitlines():
|
||||
entry = raw.strip()
|
||||
@@ -154,7 +178,7 @@ def collect_links(block: str) -> List[str]:
|
||||
entry = entry[2:].strip()
|
||||
if entry and entry not in links:
|
||||
links.append(entry)
|
||||
return links
|
||||
return filter_links_by_blacklist(links, blacklist or [])
|
||||
|
||||
|
||||
def is_valid_cve(cve_id: str) -> bool:
|
||||
@@ -165,6 +189,15 @@ def is_valid_cve(cve_id: str) -> bool:
|
||||
return year.isdigit() and parts[2].isdigit()
|
||||
|
||||
|
||||
def cve_year(cve_id: str) -> int | None:
|
||||
if not is_valid_cve(cve_id):
|
||||
return None
|
||||
try:
|
||||
return int(cve_id.split("-")[1])
|
||||
except (TypeError, ValueError):
|
||||
return None
|
||||
|
||||
|
||||
# --- Trending PoCs -------------------------------------------------------
|
||||
|
||||
TREND_ROW_RE = re.compile(r"^\|\s*(?P<stars>\d+)\s*⭐\s*\|\s*(?P<updated>[^|]+)\|\s*\[(?P<name>[^\]]+)\]\((?P<url>[^)]+)\)\s*\|\s*(?P<desc>.*)\|$")
|
||||
@@ -199,3 +232,87 @@ def read_text(path: Path) -> str:
|
||||
def write_text(path: Path, content: str) -> None:
|
||||
path.parent.mkdir(parents=True, exist_ok=True)
|
||||
path.write_text(content, encoding="utf-8")
|
||||
|
||||
|
||||
# --- New helpers for PoC discovery -------------------------------------------------
|
||||
|
||||
|
||||
def clamp(value: float, minimum: float = 0, maximum: float = 100) -> float:
|
||||
return max(minimum, min(maximum, value))
|
||||
|
||||
|
||||
def chunked(iterable: Iterable, size: int) -> Iterable[List]:
|
||||
chunk: List = []
|
||||
for item in iterable:
|
||||
chunk.append(item)
|
||||
if len(chunk) >= size:
|
||||
yield chunk
|
||||
chunk = []
|
||||
if chunk:
|
||||
yield chunk
|
||||
|
||||
|
||||
def hash_key(text: str) -> str:
|
||||
import hashlib
|
||||
|
||||
return hashlib.sha256(text.encode("utf-8")).hexdigest()
|
||||
|
||||
|
||||
def load_blacklist(path: Path | None = None) -> List[str]:
|
||||
target = path or ROOT / "blacklist.txt"
|
||||
if not target.exists():
|
||||
return []
|
||||
entries: List[str] = []
|
||||
for raw in target.read_text(encoding="utf-8").splitlines():
|
||||
line = raw.strip()
|
||||
if line and not line.startswith("#"):
|
||||
entries.append(line)
|
||||
return entries
|
||||
|
||||
|
||||
def extract_repo_from_url(url: str) -> str:
|
||||
"""Return repository name segment from a URL (best effort)."""
|
||||
try:
|
||||
from urllib.parse import urlparse
|
||||
|
||||
parsed = urlparse(url)
|
||||
host = (parsed.netloc or "").lower()
|
||||
if host and "github" not in host:
|
||||
return ""
|
||||
path = parsed.path or url
|
||||
except Exception:
|
||||
path = url
|
||||
parts = path.strip("/").split("/")
|
||||
if len(parts) >= 2:
|
||||
return parts[1].lower()
|
||||
if parts:
|
||||
return parts[-1].lower()
|
||||
return ""
|
||||
|
||||
|
||||
def is_blacklisted_repo(url: str, blacklist: List[str]) -> bool:
|
||||
repo = extract_repo_from_url(url)
|
||||
if not repo:
|
||||
return False
|
||||
for entry in blacklist:
|
||||
slug = entry.strip().lower()
|
||||
if not slug:
|
||||
continue
|
||||
if slug.endswith("*"):
|
||||
prefix = slug[:-1]
|
||||
if prefix and repo.startswith(prefix):
|
||||
return True
|
||||
elif repo == slug:
|
||||
return True
|
||||
return False
|
||||
|
||||
|
||||
def filter_links_by_blacklist(links: List[str], blacklist: List[str]) -> List[str]:
|
||||
if not blacklist:
|
||||
return links
|
||||
filtered: List[str] = []
|
||||
for link in links:
|
||||
if is_blacklisted_repo(link, blacklist):
|
||||
continue
|
||||
filtered.append(link)
|
||||
return filtered
|
||||
|
||||
35
templates/pipeline_base.html
Normal file
35
templates/pipeline_base.html
Normal file
@@ -0,0 +1,35 @@
|
||||
<!DOCTYPE html>
|
||||
<html lang="en">
|
||||
<head>
|
||||
<meta charset="utf-8">
|
||||
<meta name="viewport" content="width=device-width, initial-scale=1">
|
||||
<title>{% block title %}CVE PoC Radar{% endblock %}</title>
|
||||
<link rel="stylesheet" href="/assets/style.css">
|
||||
</head>
|
||||
<body>
|
||||
<header class="topbar">
|
||||
<div class="wrap">
|
||||
<div class="brand">
|
||||
<a href="/"><span class="dot">●</span> CVE PoC Radar</a>
|
||||
<div class="muted small">Generated {{ generated or summary.generated }}</div>
|
||||
</div>
|
||||
<nav>
|
||||
<a href="/">Dashboard</a>
|
||||
<a href="/pocs/">PoC Explorer</a>
|
||||
<a href="/diffs/">Diffs</a>
|
||||
<a href="https://github.com/0xMarcio/cve" target="_blank" rel="noreferrer">GitHub</a>
|
||||
</nav>
|
||||
</div>
|
||||
</header>
|
||||
<main class="wrap">
|
||||
{% block content %}{% endblock %}
|
||||
</main>
|
||||
<footer class="footer">
|
||||
<div class="wrap footer-inner">
|
||||
<div>Built daily from GitHub search with scoring + evidence.</div>
|
||||
<div class="muted">API: <code>/api/v1/</code> · Pages under <code>/docs/</code></div>
|
||||
</div>
|
||||
</footer>
|
||||
<script src="/assets/app.js"></script>
|
||||
</body>
|
||||
</html>
|
||||
46
templates/pipeline_cve.html
Normal file
46
templates/pipeline_cve.html
Normal file
@@ -0,0 +1,46 @@
|
||||
{% extends "pipeline_base.html" %}
|
||||
{% block title %}{{ cve.cve_id }} PoCs{% endblock %}
|
||||
{% block content %}
|
||||
<section>
|
||||
<div class="section-header">
|
||||
<div>
|
||||
<p class="eyebrow">CVE record</p>
|
||||
<h1>{{ cve.cve_id }}</h1>
|
||||
<p class="muted small">Last updated {{ cve.last_updated }}</p>
|
||||
</div>
|
||||
<a class="text-link" href="/api/v1/cve/{{ cve.cve_id }}.json">JSON</a>
|
||||
</div>
|
||||
|
||||
<div class="card-grid">
|
||||
{% for poc in cve.visible_pocs %}
|
||||
<article class="card">
|
||||
<div class="card-title"><a href="{{ poc.repo_url }}" target="_blank" rel="noreferrer">{{ poc.repo_full_name }}</a></div>
|
||||
<div class="meta-row">
|
||||
<span class="pill tier-{{ poc.confidence_tier }}">{{ poc.confidence_tier|capitalize }} ({{ poc.confidence_score|round(1) }})</span>
|
||||
{% if poc.primary_language %}<span class="pill">{{ poc.primary_language }}</span>{% endif %}
|
||||
{% if poc.stars %}<span class="pill">{{ poc.stars }}★</span>{% endif %}
|
||||
{% if poc.is_fork %}<span class="pill ghost">Fork</span>{% endif %}
|
||||
</div>
|
||||
<div class="muted small">
|
||||
{% if poc.pushed_at %}Updated {{ poc.pushed_at }} · {% endif %}
|
||||
{% if poc.archived %}<span class="pill warn">Archived</span>{% endif %}
|
||||
{% if poc.parent_repo_url %}Parent: <a href="{{ poc.parent_repo_url }}" target="_blank" rel="noreferrer">{{ poc.parent_repo_url }}</a>{% endif %}
|
||||
</div>
|
||||
<div class="pill-row">
|
||||
{% for topic in poc.topics %}<span class="pill ghost">{{ topic }}</span>{% endfor %}
|
||||
</div>
|
||||
<div class="matches">
|
||||
<div class="muted small">Matches</div>
|
||||
<ul>
|
||||
{% for match in poc.matches %}
|
||||
<li><span class="pill tiny">{{ match.match_type }}</span> {{ match.path }}</li>
|
||||
{% endfor %}
|
||||
</ul>
|
||||
</div>
|
||||
</article>
|
||||
{% else %}
|
||||
<p class="muted">No PoCs found yet for {{ cve.cve_id }}.</p>
|
||||
{% endfor %}
|
||||
</div>
|
||||
</section>
|
||||
{% endblock %}
|
||||
72
templates/pipeline_diff.html
Normal file
72
templates/pipeline_diff.html
Normal file
@@ -0,0 +1,72 @@
|
||||
{% extends "pipeline_base.html" %}
|
||||
{% block title %}Diff {{ diff.generated or generated }}{% endblock %}
|
||||
{% block content %}
|
||||
<section>
|
||||
<div class="section-header">
|
||||
<div>
|
||||
<p class="eyebrow">Daily delta</p>
|
||||
<h1>Diff for {{ diff.generated }}</h1>
|
||||
</div>
|
||||
<a class="text-link" href="/api/v1/diffs/{{ diff.generated }}.json">JSON</a>
|
||||
</div>
|
||||
|
||||
<div class="grid-2">
|
||||
<div>
|
||||
<h3>New high-confidence PoCs</h3>
|
||||
<ul class="list">
|
||||
{% for item in diff.new_high_conf_pocs %}
|
||||
<li>
|
||||
<span class="pill">+ High</span>
|
||||
<a href="/cve/{{ item.cve_id }}.html">{{ item.cve_id }}</a>
|
||||
<a href="https://github.com/{{ item.repo_full_name }}" target="_blank" rel="noreferrer">{{ item.repo_full_name }}</a>
|
||||
</li>
|
||||
{% else %}
|
||||
<li class="muted">No new high-confidence entries.</li>
|
||||
{% endfor %}
|
||||
</ul>
|
||||
</div>
|
||||
<div>
|
||||
<h3>Promoted to high</h3>
|
||||
<ul class="list">
|
||||
{% for item in diff.promoted_to_high %}
|
||||
<li>
|
||||
<span class="pill">↗</span>
|
||||
<a href="/cve/{{ item.cve_id }}.html">{{ item.cve_id }}</a>
|
||||
<a href="https://github.com/{{ item.repo_full_name }}" target="_blank" rel="noreferrer">{{ item.repo_full_name }}</a>
|
||||
<span class="muted small">(prev {{ item.previous_tier }})</span>
|
||||
</li>
|
||||
{% else %}
|
||||
<li class="muted">No promotions this run.</li>
|
||||
{% endfor %}
|
||||
</ul>
|
||||
</div>
|
||||
</div>
|
||||
|
||||
<div class="grid-2">
|
||||
<div>
|
||||
<h3>Demoted or removed</h3>
|
||||
<ul class="list">
|
||||
{% for item in diff.demoted_or_removed %}
|
||||
<li>
|
||||
<span class="pill warn">↘</span>
|
||||
<a href="/cve/{{ item.cve_id }}.html">{{ item.cve_id }}</a>
|
||||
<span class="muted small">{{ item.repo_full_name }}</span>
|
||||
</li>
|
||||
{% else %}
|
||||
<li class="muted">No removals.</li>
|
||||
{% endfor %}
|
||||
</ul>
|
||||
</div>
|
||||
<div>
|
||||
<h3>Dead links (optional checks)</h3>
|
||||
<ul class="list">
|
||||
{% for item in diff.dead_links %}
|
||||
<li><span class="pill warn">offline</span> <a href="{{ item.url }}">{{ item.url }}</a></li>
|
||||
{% else %}
|
||||
<li class="muted">Link checks skipped or none failed.</li>
|
||||
{% endfor %}
|
||||
</ul>
|
||||
</div>
|
||||
</div>
|
||||
</section>
|
||||
{% endblock %}
|
||||
69
templates/pipeline_index.html
Normal file
69
templates/pipeline_index.html
Normal file
@@ -0,0 +1,69 @@
|
||||
{% extends "pipeline_base.html" %}
|
||||
{% block title %}CVE PoC Radar{% endblock %}
|
||||
{% block content %}
|
||||
<section class="hero">
|
||||
<div>
|
||||
<p class="eyebrow">Daily GitHub sweep</p>
|
||||
<h1>CVE PoC Goldmine</h1>
|
||||
<p class="lede">Incremental discovery, scoring, and diffing for public exploit PoCs. High-confidence hits surface first; low-signal noise stays out of the spotlight.</p>
|
||||
<div class="cta-row">
|
||||
<a class="btn" href="/pocs/">Open PoC Explorer</a>
|
||||
<a class="btn ghost" href="/api/v1/index.json">API index</a>
|
||||
</div>
|
||||
</div>
|
||||
<div class="hero-panel">
|
||||
<div class="stat">
|
||||
<div class="label">High confidence</div>
|
||||
<div class="value">{{ summary.high_total }}</div>
|
||||
</div>
|
||||
<div class="stat">
|
||||
<div class="label">Medium confidence</div>
|
||||
<div class="value">{{ summary.medium_total }}</div>
|
||||
</div>
|
||||
<div class="stat">
|
||||
<div class="label">Tracked CVEs</div>
|
||||
<div class="value">{{ summary.total_cves }}</div>
|
||||
</div>
|
||||
<div class="label muted small">Generated {{ summary.generated }}</div>
|
||||
</div>
|
||||
</section>
|
||||
|
||||
<section>
|
||||
<div class="section-header">
|
||||
<h2>Top PoCs right now</h2>
|
||||
<a class="text-link" href="/api/v1/top/today.json">JSON</a>
|
||||
</div>
|
||||
<div class="card-grid">
|
||||
{% for poc in top %}
|
||||
<article class="card">
|
||||
<div class="card-title"><a href="{{ poc.repo_url }}" target="_blank" rel="noreferrer">{{ poc.repo_full_name }}</a></div>
|
||||
<div class="meta-row">
|
||||
<span class="pill tier-{{ poc.tier }}">{{ poc.tier|capitalize }}</span>
|
||||
<span class="pill">{{ poc.score|round(1) }} pts</span>
|
||||
{% if poc.stars %}<span class="pill">{{ poc.stars }}★</span>{% endif %}
|
||||
{% if poc.primary_language %}<span class="pill">{{ poc.primary_language }}</span>{% endif %}
|
||||
</div>
|
||||
<div class="muted small">CVE: <a href="/cve/{{ poc.cve_id }}.html">{{ poc.cve_id }}</a></div>
|
||||
</article>
|
||||
{% else %}
|
||||
<p class="muted">No PoCs available yet.</p>
|
||||
{% endfor %}
|
||||
</div>
|
||||
</section>
|
||||
|
||||
<section>
|
||||
<div class="section-header">
|
||||
<h2>Latest diff</h2>
|
||||
<a class="text-link" href="/diffs/">Diffs</a>
|
||||
</div>
|
||||
{% if diff and diff.new_high_conf_pocs %}
|
||||
<div class="pill-row">
|
||||
{% for item in diff.new_high_conf_pocs %}
|
||||
<span class="pill">+ {{ item.cve_id }} / {{ item.repo_full_name }}</span>
|
||||
{% endfor %}
|
||||
</div>
|
||||
{% else %}
|
||||
<p class="muted small">No new high-confidence PoCs in the latest run.</p>
|
||||
{% endif %}
|
||||
</section>
|
||||
{% endblock %}
|
||||
47
templates/pipeline_pocs.html
Normal file
47
templates/pipeline_pocs.html
Normal file
@@ -0,0 +1,47 @@
|
||||
{% extends "pipeline_base.html" %}
|
||||
{% block title %}PoC Explorer{% endblock %}
|
||||
{% block content %}
|
||||
<section>
|
||||
<div class="section-header">
|
||||
<h1>PoC Explorer</h1>
|
||||
<div class="muted">Search across the pre-built index JSON. Client-side results stay small and fast.</div>
|
||||
</div>
|
||||
<input class="input" type="search" placeholder="Search CVE id, language, tier…" data-index-search data-index-url="/api/v1/index.json" data-target="#search-results">
|
||||
<div id="search-results" class="card-grid" data-search-results>
|
||||
<p class="muted small">Type to search recent CVEs. Results stream in from <code>/api/v1/index.json</code>.</p>
|
||||
</div>
|
||||
</section>
|
||||
|
||||
<section>
|
||||
<div class="section-header">
|
||||
<h2>Latest high + medium</h2>
|
||||
<div class="muted small">Server-side snapshot</div>
|
||||
</div>
|
||||
<div class="table-wrap">
|
||||
<table>
|
||||
<thead>
|
||||
<tr>
|
||||
<th>CVE</th>
|
||||
<th>High</th>
|
||||
<th>Medium</th>
|
||||
<th>Languages</th>
|
||||
<th>Max score</th>
|
||||
</tr>
|
||||
</thead>
|
||||
<tbody>
|
||||
{% for item in index %}
|
||||
<tr>
|
||||
<td><a href="/cve/{{ item.cve_id }}.html">{{ item.cve_id }}</a></td>
|
||||
<td>{{ item.high_confidence }}</td>
|
||||
<td>{{ item.medium_confidence }}</td>
|
||||
<td>{% for lang in item.top_languages %}<span class="pill">{{ lang }}</span>{% else %}<span class="muted">—</span>{% endfor %}</td>
|
||||
<td>{{ item.max_score|round(1) }}</td>
|
||||
</tr>
|
||||
{% else %}
|
||||
<tr><td colspan="5" class="muted">No entries available.</td></tr>
|
||||
{% endfor %}
|
||||
</tbody>
|
||||
</table>
|
||||
</div>
|
||||
</section>
|
||||
{% endblock %}
|
||||
Reference in New Issue
Block a user