Initial commit: AASRT v1.0.0 - AI Agent Security Reconnaissance Tool

This commit is contained in:
swethab
2026-02-10 10:53:31 -05:00
commit a714a3399b
61 changed files with 14858 additions and 0 deletions
+19
View File
@@ -0,0 +1,19 @@
"""Enrichment modules for AASRT.
This module contains data enrichment capabilities:
- ClawSec threat intelligence integration
- (Future) WHOIS lookups
- (Future) Geolocation
- (Future) SSL/TLS certificate analysis
- (Future) DNS records
"""
from .clawsec_feed import ClawSecFeedManager, ClawSecFeed, ClawSecAdvisory
from .threat_enricher import ThreatEnricher
__all__ = [
'ClawSecFeedManager',
'ClawSecFeed',
'ClawSecAdvisory',
'ThreatEnricher'
]
+380
View File
@@ -0,0 +1,380 @@
"""ClawSec Threat Intelligence Feed Manager for AASRT."""
import json
import os
import threading
from dataclasses import dataclass, field
from datetime import datetime, timedelta
from pathlib import Path
from typing import Any, Dict, List, Optional
import requests
from src.utils.logger import get_logger
logger = get_logger(__name__)
@dataclass
class ClawSecAdvisory:
"""Represents a single ClawSec CVE advisory."""
cve_id: str
severity: str # CRITICAL, HIGH, MEDIUM, LOW
vuln_type: str # e.g., "prompt_injection", "missing_authentication"
cvss_score: float
title: str
description: str
affected: List[str] = field(default_factory=list)
action: str = ""
nvd_url: Optional[str] = None
cwe_id: Optional[str] = None
published_date: Optional[datetime] = None
def to_dict(self) -> Dict[str, Any]:
"""Convert to dictionary."""
return {
'cve_id': self.cve_id,
'severity': self.severity,
'vuln_type': self.vuln_type,
'cvss_score': self.cvss_score,
'title': self.title,
'description': self.description,
'affected': self.affected,
'action': self.action,
'nvd_url': self.nvd_url,
'cwe_id': self.cwe_id,
'published_date': self.published_date.isoformat() if self.published_date else None
}
@classmethod
def from_dict(cls, data: Dict[str, Any]) -> 'ClawSecAdvisory':
"""Create from dictionary."""
published = data.get('published')
if published and isinstance(published, str):
try:
published = datetime.fromisoformat(published.replace('Z', '+00:00'))
except:
published = None
return cls(
cve_id=data.get('id', ''),
severity=data.get('severity', 'MEDIUM').upper(),
vuln_type=data.get('type', 'unknown'),
cvss_score=float(data.get('cvss_score', 0.0)),
title=data.get('title', ''),
description=data.get('description', ''),
affected=data.get('affected', []),
action=data.get('action', ''),
nvd_url=data.get('nvd_url'),
cwe_id=data.get('nvd_category_id'),
published_date=published
)
@dataclass
class ClawSecFeed:
"""Container for the full ClawSec advisory feed."""
advisories: List[ClawSecAdvisory]
last_updated: datetime
feed_version: str
total_count: int
def to_dict(self) -> Dict[str, Any]:
"""Convert to dictionary for caching."""
return {
'advisories': [a.to_dict() for a in self.advisories],
'last_updated': self.last_updated.isoformat(),
'feed_version': self.feed_version,
'total_count': self.total_count
}
@classmethod
def from_dict(cls, data: Dict[str, Any]) -> 'ClawSecFeed':
"""Create from dictionary."""
return cls(
advisories=[ClawSecAdvisory.from_dict(a) for a in data.get('advisories', [])],
last_updated=datetime.fromisoformat(data.get('last_updated', datetime.utcnow().isoformat())),
feed_version=data.get('feed_version', '0.0.0'),
total_count=data.get('total_count', 0)
)
class ClawSecFeedManager:
"""
Manages ClawSec threat intelligence feed with caching and offline support.
Features:
- HTTP fetch with configurable timeout
- Local file caching for offline mode
- Advisory matching by product/version/banner
- Non-blocking background updates
"""
DEFAULT_FEED_URL = "https://clawsec.prompt.security/advisories/feed.json"
DEFAULT_CACHE_FILE = "./data/clawsec_cache.json"
DEFAULT_TTL = 86400 # 24 hours
def __init__(self, config=None):
"""
Initialize ClawSecFeedManager.
Args:
config: Configuration object with clawsec settings
"""
self.config = config
# Get configuration values
if config:
clawsec_config = config.get('clawsec', default={})
self.feed_url = clawsec_config.get('feed_url', self.DEFAULT_FEED_URL)
self.cache_file = clawsec_config.get('cache_file', self.DEFAULT_CACHE_FILE)
self.cache_ttl = clawsec_config.get('cache_ttl_seconds', self.DEFAULT_TTL)
self.offline_mode = clawsec_config.get('offline_mode', False)
self.timeout = clawsec_config.get('timeout', 30)
else:
self.feed_url = self.DEFAULT_FEED_URL
self.cache_file = self.DEFAULT_CACHE_FILE
self.cache_ttl = self.DEFAULT_TTL
self.offline_mode = False
self.timeout = 30
self._cache: Optional[ClawSecFeed] = None
self._cache_timestamp: Optional[datetime] = None
self._lock = threading.Lock()
def fetch_feed(self, force_refresh: bool = False) -> Optional[ClawSecFeed]:
"""
Fetch the ClawSec advisory feed.
Args:
force_refresh: Force fetch from URL even if cache is valid
Returns:
ClawSecFeed object or None if fetch fails
"""
# Check cache first
if not force_refresh and self.is_cache_valid():
logger.debug("Using cached ClawSec feed")
return self._cache
# In offline mode, only use cache
if self.offline_mode:
logger.info("ClawSec offline mode - using cached data only")
return self.get_cached_feed()
try:
logger.info(f"Fetching ClawSec feed from {self.feed_url}")
response = requests.get(self.feed_url, timeout=self.timeout)
response.raise_for_status()
data = response.json()
feed = self._parse_feed(data)
with self._lock:
self._cache = feed
self._cache_timestamp = datetime.utcnow()
# Persist to disk
self.save_cache()
logger.info(f"ClawSec feed loaded: {feed.total_count} advisories")
return feed
except requests.RequestException as e:
logger.warning(f"Failed to fetch ClawSec feed: {e}")
# Fall back to cache
return self.get_cached_feed()
except (json.JSONDecodeError, KeyError) as e:
logger.error(f"Failed to parse ClawSec feed: {e}")
return self.get_cached_feed()
def _parse_feed(self, data: Dict[str, Any]) -> ClawSecFeed:
"""Parse raw feed JSON into ClawSecFeed object."""
advisories = []
for advisory_data in data.get('advisories', []):
try:
advisory = ClawSecAdvisory.from_dict(advisory_data)
advisories.append(advisory)
except Exception as e:
logger.warning(f"Failed to parse advisory: {e}")
continue
return ClawSecFeed(
advisories=advisories,
last_updated=datetime.utcnow(),
feed_version=data.get('version', '0.0.0'),
total_count=len(advisories)
)
def get_cached_feed(self) -> Optional[ClawSecFeed]:
"""Return cached feed without network call."""
if self._cache:
return self._cache
# Try loading from disk
self.load_cache()
return self._cache
def is_cache_valid(self) -> bool:
"""Check if cache is within TTL."""
if not self._cache or not self._cache_timestamp:
return False
age = datetime.utcnow() - self._cache_timestamp
return age.total_seconds() < self.cache_ttl
def save_cache(self) -> None:
"""Persist cache to local file for offline mode."""
if not self._cache:
return
try:
cache_path = Path(self.cache_file)
cache_path.parent.mkdir(parents=True, exist_ok=True)
cache_data = {
'feed': self._cache.to_dict(),
'cached_at': datetime.utcnow().isoformat()
}
with open(cache_path, 'w') as f:
json.dump(cache_data, f, indent=2)
logger.debug(f"ClawSec cache saved to {self.cache_file}")
except Exception as e:
logger.warning(f"Failed to save ClawSec cache: {e}")
def load_cache(self) -> bool:
"""Load cache from local file."""
try:
cache_path = Path(self.cache_file)
if not cache_path.exists():
return False
with open(cache_path, 'r') as f:
cache_data = json.load(f)
self._cache = ClawSecFeed.from_dict(cache_data.get('feed', {}))
cached_at = cache_data.get('cached_at')
if cached_at:
self._cache_timestamp = datetime.fromisoformat(cached_at)
logger.info(f"ClawSec cache loaded: {self._cache.total_count} advisories")
return True
except Exception as e:
logger.warning(f"Failed to load ClawSec cache: {e}")
return False
def match_advisories(
self,
product: Optional[str] = None,
version: Optional[str] = None,
banner: Optional[str] = None
) -> List[ClawSecAdvisory]:
"""
Find matching advisories for a product/version/banner.
Matching strategies (in order):
1. Exact product name match in affected list
2. Fuzzy product match (clawdbot, clawbot, claw-bot)
3. Banner text contains product from affected
Args:
product: Product name to match
version: Version string to check
banner: Banner text to search
Returns:
List of matching ClawSecAdvisory objects
"""
feed = self.get_cached_feed()
if not feed:
return []
matches = []
product_lower = (product or '').lower()
banner_lower = (banner or '').lower()
# AI agent keywords to look for
ai_keywords = ['clawdbot', 'clawbot', 'moltbot', 'openclaw', 'autogpt', 'langchain']
for advisory in feed.advisories:
matched = False
# Check each affected product
for affected in advisory.affected:
affected_lower = affected.lower()
# Strategy 1: Direct product match
if product_lower and product_lower in affected_lower:
matched = True
break
# Strategy 2: Check AI keywords in affected and product/banner
for keyword in ai_keywords:
if keyword in affected_lower:
if keyword in product_lower or keyword in banner_lower:
matched = True
break
if matched:
break
# Strategy 3: Banner contains affected product
if banner_lower:
# Extract product name from affected (e.g., "ClawdBot < 2.0" -> "clawdbot")
affected_product = affected_lower.split('<')[0].split('>')[0].strip()
if affected_product and affected_product in banner_lower:
matched = True
break
if matched and advisory not in matches:
matches.append(advisory)
logger.debug(f"ClawSec matched {len(matches)} advisories for product={product}")
return matches
def background_refresh(self) -> None:
"""Start background thread to refresh feed."""
def _refresh():
try:
self.fetch_feed(force_refresh=True)
except Exception as e:
logger.warning(f"Background ClawSec refresh failed: {e}")
thread = threading.Thread(target=_refresh, daemon=True)
thread.start()
logger.debug("ClawSec background refresh started")
def get_statistics(self) -> Dict[str, Any]:
"""Get feed statistics for UI display."""
feed = self.get_cached_feed()
if not feed:
return {
'total_advisories': 0,
'critical_count': 0,
'high_count': 0,
'last_updated': None,
'is_stale': True
}
severity_counts = {'CRITICAL': 0, 'HIGH': 0, 'MEDIUM': 0, 'LOW': 0}
for advisory in feed.advisories:
if advisory.severity in severity_counts:
severity_counts[advisory.severity] += 1
return {
'total_advisories': feed.total_count,
'critical_count': severity_counts['CRITICAL'],
'high_count': severity_counts['HIGH'],
'medium_count': severity_counts['MEDIUM'],
'low_count': severity_counts['LOW'],
'last_updated': feed.last_updated.isoformat() if feed.last_updated else None,
'feed_version': feed.feed_version,
'is_stale': not self.is_cache_valid()
}
+228
View File
@@ -0,0 +1,228 @@
"""Threat Intelligence Enrichment for AASRT."""
from typing import Any, Dict, List, Optional, Tuple
from src.engines import SearchResult
from src.utils.logger import get_logger
from .clawsec_feed import ClawSecAdvisory, ClawSecFeedManager
logger = get_logger(__name__)
class ThreatEnricher:
"""
Enriches SearchResult objects with ClawSec threat intelligence.
Responsibilities:
- Match results against ClawSec advisories
- Add CVE metadata to result.metadata
- Inject ClawSec vulnerabilities into result.vulnerabilities
"""
def __init__(self, feed_manager: ClawSecFeedManager, config=None):
"""
Initialize ThreatEnricher.
Args:
feed_manager: ClawSecFeedManager instance
config: Optional configuration object
"""
self.feed_manager = feed_manager
self.config = config
def enrich(self, result: SearchResult) -> SearchResult:
"""
Enrich a single result with threat intelligence.
Args:
result: SearchResult to enrich
Returns:
Enriched SearchResult with ClawSec metadata
"""
# Extract product info from result
product, version = self._extract_product_info(result)
banner = result.banner or ''
# Get HTTP title if available
http_info = result.metadata.get('http', {}) or {}
title = http_info.get('title') or ''
if title:
banner = f"{banner} {title}"
# Match against ClawSec advisories
advisories = self.feed_manager.match_advisories(
product=product,
version=version,
banner=banner
)
if advisories:
result = self._add_cve_context(result, advisories)
logger.debug(f"Enriched {result.ip}:{result.port} with {len(advisories)} ClawSec advisories")
return result
def enrich_batch(self, results: List[SearchResult]) -> List[SearchResult]:
"""
Enrich multiple results efficiently.
Args:
results: List of SearchResults to enrich
Returns:
List of enriched SearchResults
"""
enriched = []
for result in results:
enriched.append(self.enrich(result))
return enriched
def _extract_product_info(self, result: SearchResult) -> Tuple[Optional[str], Optional[str]]:
"""
Extract product name and version from result metadata.
Args:
result: SearchResult to analyze
Returns:
Tuple of (product_name, version) or (None, None)
"""
product = None
version = None
# Check metadata for product info
metadata = result.metadata if isinstance(result.metadata, dict) else {}
# Try product field directly
if 'product' in metadata:
product = metadata['product']
# Try version field
if 'version' in metadata:
version = metadata['version']
# Check HTTP info
http_info = metadata.get('http') or {}
if http_info:
title = http_info.get('title') or ''
# Look for AI agent keywords in title
ai_products = {
'clawdbot': 'ClawdBot',
'moltbot': 'MoltBot',
'autogpt': 'AutoGPT',
'langchain': 'LangChain',
'openclaw': 'OpenClaw'
}
for keyword, name in ai_products.items():
if title and keyword in title.lower():
product = name
break
# Check service name
if not product and result.service:
service_lower = result.service.lower()
for keyword in ['clawdbot', 'moltbot', 'autogpt', 'langchain']:
if keyword in service_lower:
product = result.service
break
# Check banner for version patterns
if result.banner and not version:
import re
version_patterns = [
r'v?(\d+\.\d+(?:\.\d+)?)', # v1.2.3 or 1.2.3
r'version[:\s]+(\d+\.\d+(?:\.\d+)?)', # version: 1.2.3
]
for pattern in version_patterns:
match = re.search(pattern, result.banner, re.IGNORECASE)
if match:
version = match.group(1)
break
return product, version
def _add_cve_context(
self,
result: SearchResult,
advisories: List[ClawSecAdvisory]
) -> SearchResult:
"""
Add CVE information to result metadata and vulnerabilities.
Args:
result: SearchResult to update
advisories: List of matched ClawSecAdvisory objects
Returns:
Updated SearchResult
"""
# Add ClawSec advisories to metadata
clawsec_data = []
for advisory in advisories:
clawsec_data.append({
'cve_id': advisory.cve_id,
'severity': advisory.severity,
'cvss_score': advisory.cvss_score,
'title': advisory.title,
'vuln_type': advisory.vuln_type,
'action': advisory.action,
'nvd_url': advisory.nvd_url,
'cwe_id': advisory.cwe_id
})
result.metadata['clawsec_advisories'] = clawsec_data
# Track highest severity for quick access
severity_order = {'CRITICAL': 4, 'HIGH': 3, 'MEDIUM': 2, 'LOW': 1}
highest_severity = max(
(a.severity for a in advisories),
key=lambda s: severity_order.get(s, 0),
default='LOW'
)
result.metadata['clawsec_severity'] = highest_severity
# Add CVE IDs to vulnerabilities list
for advisory in advisories:
vuln_id = f"clawsec_{advisory.cve_id}"
if vuln_id not in result.vulnerabilities:
result.vulnerabilities.append(vuln_id)
return result
def get_enrichment_stats(self, results: List[SearchResult]) -> Dict[str, Any]:
"""
Get statistics about enrichment for a set of results.
Args:
results: List of enriched SearchResults
Returns:
Dictionary with enrichment statistics
"""
enriched_count = 0
total_cves = 0
severity_counts = {'CRITICAL': 0, 'HIGH': 0, 'MEDIUM': 0, 'LOW': 0}
cve_list = set()
for result in results:
advisories = result.metadata.get('clawsec_advisories', [])
if advisories:
enriched_count += 1
total_cves += len(advisories)
for advisory in advisories:
cve_list.add(advisory['cve_id'])
severity = advisory.get('severity', 'LOW')
if severity in severity_counts:
severity_counts[severity] += 1
return {
'enriched_results': enriched_count,
'total_results': len(results),
'enrichment_rate': (enriched_count / len(results) * 100) if results else 0,
'unique_cves': len(cve_list),
'total_cve_matches': total_cves,
'severity_breakdown': severity_counts,
'cve_ids': list(cve_list)
}