mirror of
https://github.com/0xMarcio/cve.git
synced 2026-02-12 18:42:46 +00:00
169 lines
5.1 KiB
Python
169 lines
5.1 KiB
Python
#!/usr/bin/python3
|
|
import json
|
|
import os
|
|
import re
|
|
from pathlib import Path
|
|
from typing import Dict, List, Optional, Tuple
|
|
from urllib.parse import urlparse
|
|
|
|
ROOT = Path(__file__).resolve().parent.parent
|
|
OUTPUT = Path(__file__).resolve().with_name("CVE_list.json")
|
|
REMOVED_OUTPUT = Path(__file__).resolve().with_name("CVE_blacklist_removed.json")
|
|
BLACKLIST = ROOT / "blacklist.txt"
|
|
|
|
|
|
def load_blacklist(path: Path = BLACKLIST) -> List[str]:
|
|
if not path.exists():
|
|
return []
|
|
items: List[str] = []
|
|
for raw in path.read_text(encoding="utf-8").splitlines():
|
|
entry = raw.strip()
|
|
if entry and not entry.startswith("#"):
|
|
items.append(entry)
|
|
return items
|
|
|
|
|
|
def repo_from_url(url: str) -> str:
|
|
try:
|
|
parsed = urlparse(url)
|
|
host = (parsed.netloc or "").lower()
|
|
if host and "github" not in host:
|
|
return ""
|
|
path = parsed.path or url
|
|
except Exception:
|
|
path = url
|
|
parts = path.strip("/").split("/")
|
|
if len(parts) >= 2:
|
|
return parts[1].lower()
|
|
return (parts[-1] if parts else "").lower()
|
|
|
|
|
|
def is_blacklisted(url: str, blacklist: List[str]) -> bool:
|
|
repo = repo_from_url(url)
|
|
if not repo:
|
|
return False
|
|
for entry in blacklist:
|
|
slug = entry.lower()
|
|
if not slug:
|
|
continue
|
|
if slug.endswith("*"):
|
|
if repo.startswith(slug[:-1]):
|
|
return True
|
|
elif repo == slug:
|
|
return True
|
|
return False
|
|
|
|
|
|
def normalise_block(text: str) -> str:
|
|
text = text.replace("\r\n", "\n")
|
|
text = re.sub(r"\n{2,}", "\n", text.strip())
|
|
lines = [line.lstrip("- ").rstrip() for line in text.split("\n")]
|
|
return "\n".join(line for line in lines if line)
|
|
|
|
|
|
def parse_sections(content: str) -> Dict[str, str]:
|
|
sections: Dict[str, str] = {}
|
|
current_header: str | None = None
|
|
buffer: List[str] = []
|
|
|
|
for line in content.splitlines():
|
|
header = line.strip()
|
|
if header.startswith("### ") or header.startswith("#### "):
|
|
if current_header is not None:
|
|
sections[current_header] = "\n".join(buffer).strip()
|
|
current_header = header
|
|
buffer = []
|
|
else:
|
|
buffer.append(line)
|
|
|
|
if current_header is not None:
|
|
sections[current_header] = "\n".join(buffer).strip()
|
|
|
|
return sections
|
|
|
|
|
|
def collect_links(block: str, *, blacklist: Optional[List[str]] = None, removed: Optional[List[str]] = None) -> List[str]:
|
|
links: List[str] = []
|
|
blacklist = blacklist or []
|
|
if removed is None:
|
|
removed = []
|
|
for raw in block.splitlines():
|
|
entry = raw.strip()
|
|
if not entry or "No PoCs" in entry:
|
|
continue
|
|
if entry.startswith("- "):
|
|
entry = entry[2:].strip()
|
|
if not entry:
|
|
continue
|
|
if is_blacklisted(entry, blacklist):
|
|
removed.append(entry)
|
|
continue
|
|
if entry not in links:
|
|
links.append(entry)
|
|
return links
|
|
|
|
|
|
def main() -> None:
|
|
blacklist = load_blacklist()
|
|
cve_entries = []
|
|
removed_by_cve: Dict[str, List[str]] = {}
|
|
removed_seen: set[str] = set()
|
|
years = [entry for entry in os.listdir(ROOT) if entry.isdigit()]
|
|
years.sort(reverse=True)
|
|
|
|
for year in years:
|
|
year_dir = ROOT / year
|
|
for filename in sorted(os.listdir(year_dir)):
|
|
if not filename.endswith(".md"):
|
|
continue
|
|
with open(year_dir / filename, "r", encoding="utf-8") as handle:
|
|
content = handle.read()
|
|
|
|
sections = parse_sections(content)
|
|
description = normalise_block(sections.get("### Description", ""))
|
|
removed_links: List[str] = []
|
|
references = collect_links(sections.get("#### Reference", ""), blacklist=blacklist, removed=removed_links)
|
|
github_links = collect_links(sections.get("#### Github", ""), blacklist=blacklist, removed=removed_links)
|
|
|
|
poc_entries: List[str] = []
|
|
seen = set()
|
|
for link in references + github_links:
|
|
if link not in seen:
|
|
poc_entries.append(link)
|
|
seen.add(link)
|
|
|
|
cve_id = filename.replace(".md", "")
|
|
if removed_links:
|
|
removed_by_cve[cve_id] = sorted(set(removed_links))
|
|
removed_seen.update(removed_links)
|
|
|
|
# Skip CVEs with zero PoCs (both sections empty) to keep lookup clean
|
|
if not poc_entries:
|
|
continue
|
|
|
|
cve_entries.append({
|
|
"cve": cve_id,
|
|
"desc": description,
|
|
"poc": poc_entries,
|
|
})
|
|
|
|
with open(OUTPUT, "w", encoding="utf-8") as outfile:
|
|
json.dump(cve_entries, outfile, ensure_ascii=False)
|
|
|
|
with open(REMOVED_OUTPUT, "w", encoding="utf-8") as removed_file:
|
|
json.dump(
|
|
{
|
|
"removed": sorted(removed_seen),
|
|
"by_cve": removed_by_cve,
|
|
},
|
|
removed_file,
|
|
ensure_ascii=False,
|
|
indent=2,
|
|
)
|
|
|
|
print("CVE list saved to CVE_list.json")
|
|
|
|
|
|
if __name__ == "__main__":
|
|
main()
|