first commit

2026-02-12 20:52:45 +00:00 · 2025-11-20 10:49:44 +01:00
commit 502ef48b5c
14 changed files with 433 additions and 0 deletions
--- a/README.md
+++ b/README.md
@@ -0,0 +1,86 @@
+# POC : OSINT with LLM
+
+This repository demonstrates domain, IP, and email reconnaissance with **LLM-powered** security reporting..
+
+---
+
+##  Overview
+
+The project is divided into **two main components**:
+
+1. **Recon Modules** 
+2. **LLM Analysis and reporting** 
+
+---
+
+##  Recon Modules
+
+### **Purpose**
+Gathering information about an ip, domain or email
+
+### **Approach**
+- **Domain OSINT:** 
+    * WHOIS Lookup
+
+    * Shodan Info Gathering
+
+    * SSL Certificate Validation
+
+    * VirusTotal "malicious/clean" status
+- **IP Recon:** 
+    * AbuseIPDB score & classification
+- **Email Recon:** 
+    * Breach/exposure lookup
+
+### **LLM Analysis and reporting**
+
+* Converts technical OSINT into human-readable summaries
+
+* Extracts key findings & risk insights
+
+* Generates reports
+
+## **Required API keys for OSINT modules**
+
+* VT_API_KEY=your_virustotal_api_key
+* ABUSEIPDB_KEY=your_abuseipdb_api_key
+* SHODAN_KEY=your_shodan_api_key
+
+## **Usage**
+### **Install dependencies**
+```bash
+pip3 install -r requirements.txt
+```
+### **Demo**
+```bash
+python3 main.py
+```
+When finished:
+
+* OSINT recon runs
+
+* LLM analyzes results
+
+* A report is saved in /reports/
+
+#### **Demo with domain**
+
+![Project Logo](images/llm_domain_1.png)
+
+![Project Logo](images/llm_domain_2.png)
+
+#### **Demo with ip**
+
+![Project Logo](images/llm_ip_1.png)
+
+![Project Logo](images/llm_ip_2.png)
+
+#### **Demo with email**
+
+![Project Logo](images/ll_email_1.png)
+
+![Project Logo](images/llm_email_2.png)
+
+##  Notes 
+* The scripts are designed to be run locally, in a Python 3.13+ environment with the listed dependencies.
+* Install Ollama on your machine and add the MISTRAL model.
--- a/images/ll_email_1.png
+++ b/images/ll_email_1.png
--- a/images/llm_domain_1.png
+++ b/images/llm_domain_1.png
--- a/images/llm_domain_2.png
+++ b/images/llm_domain_2.png
--- a/images/llm_email_2.png
+++ b/images/llm_email_2.png
--- a/images/llm_ip_1.png
+++ b/images/llm_ip_1.png
--- a/images/llm_ip_2.png
+++ b/images/llm_ip_2.png
--- a/llm/llm_analysis.py
+++ b/llm/llm_analysis.py
@@ -0,0 +1,70 @@
+import requests
+
+def ask_llm(prompt, model="mistral"):
+    print("[LLM] Sending prompt...")
+    try:
+        res = requests.post("http://localhost:11434/api/generate", json={
+            "model": model,
+            "prompt": prompt,
+            "stream": False
+        })
+        if res.status_code != 200:
+            print(f"[LLM] Error: {res.status_code} - {res.text}")
+            return "[LLM Error]"
+        output = res.json().get("response", "[No response]")
+        #print("\n[LLM Response]:\n", output)
+        return output.strip()
+    except Exception as e:
+        print(f"[LLM] Exception: {e}")
+        return "[LLM Exception]"
+
+#####################################
+#summarize with LLM if it is domain
+#####################################
+def summarize_domain(raw_data):
+    prompt = f"""
+You are an OSINT analyst.
+Analyze this domain data and summarize key security findings:
+- WHOIS or registrant issues
+- Subdomain risks
+- Is it malicious or not based on virustotal result
+- Action recommendations
+
+DATA:
+{raw_data}
+"""
+    return ask_llm(prompt)
+
+#####################################
+#summarize with LLM if it is email
+#####################################
+def summarize_email(raw_data):
+    prompt = f"""
+You are a breach analyst.
+Summarize this email breach data:
+- Sources of exposure
+- Likely leaked data types
+- Risk level
+- Remediation advice
+
+DATA:
+{raw_data}
+"""
+    return ask_llm(prompt)
+
+#####################################
+#summarize with LLM if it is ip
+#####################################
+def summarize_ip(raw_data):
+    prompt = f"""
+You are a SOC (Security Operations Center) analyst.
+Summarize this IP intelligence report:
+- Whether the IP is malicious
+- Number of abuse reports
+- Type of malicious activity
+- Action recommendations
+
+DATA:
+{raw_data}
+"""
+    return ask_llm(prompt)
--- a/main.py
+++ b/main.py
@@ -0,0 +1,71 @@
+from recon.domain_recon import get_whois, search_shodan, check_domain_virustotal, check_ssl
+from recon.email_recon import search_breaches
+from recon.ip_recon import check_ip
+from llm.llm_analysis import summarize_domain, summarize_email, summarize_ip
+from utils.format import save_report, is_ip, is_email, is_domain
+
+# Email data logic
+def get_email_data(email):
+    print(f"[+] Recon on: {email}")
+    subs = search_breaches(email)
+    return subs
+
+# Domain data logic
+def get_domain_data(domain):
+    print(f"[+] Recon on: {domain}")
+    whois = get_whois(domain)
+    if not whois:
+        whois = "Not found"
+    shodandata = search_shodan(domain)
+    if not shodandata:
+        shodandata = "Not found"
+    virustotal_status = check_domain_virustotal(domain)
+    if not virustotal_status:
+        virustotal_status = "Not found"
+    certificate = check_ssl(domain)
+    if not certificate:
+        certificate = "Not found"
+    raw_data = (
+        f"WHOIS:\n{whois}\n\n"
+        f"Shodan:\n{shodandata}\n\n"
+        f"SSL check:\n{certificate}\n\n"
+        f"Virustotal status:\n{virustotal_status}"
+    )
+    return raw_data
+
+def get_ip_data(ip):
+    result = check_ip(ip)
+    return result
+    
+###############
+# Entry point
+##############
+def run(target):
+    valid_format = True
+    if is_email(target):
+        raw_data = get_email_data(target)
+        summary = summarize_email(raw_data)
+
+    elif is_ip(target):
+        raw_data = get_ip_data(target)  # We'll add this next
+        summary = summarize_ip(raw_data)
+
+    elif is_domain(target):
+        raw_data = get_domain_data(target)
+        summary = summarize_domain(raw_data)
+    else:
+        valid_format = False
+        print("\nInvalid target format. Please provide a valid email, IP, or domain.")
+    
+    if valid_format:
+        #print("\n[FINAL SUMMARY]:\n", summary)   #  this prints to console
+        save_report(summary)                     #  this writes to report.md
+        print("\n[+] Report saved to reports/report.md")
+
+# CLI entry
+if __name__ == "__main__":
+    import sys
+    if len(sys.argv) != 2:
+        print("Usage: python main.py <email_or_domain_or_ip>")
+    else:
+        run(sys.argv[1])
--- a/recon/domain_recon.py
+++ b/recon/domain_recon.py
@@ -0,0 +1,84 @@
+import requests
+import whois
+import shodan
+import ssl
+import socket
+import datetime
+
+##########################
+#whois
+##########################
+def get_whois(domain):
+    try:
+        w = whois.whois(domain)
+        return w.text
+    except Exception as e:
+        return f"Error: {e}"
+    
+##########################
+#shodan
+##########################
+def search_shodan(domain):
+    api = shodan.Shodan("your-api-key")
+    try:
+        results = api.search(domain)
+        summary = ""
+        for result in results['matches'][:5]:
+            summary += f"IP: {result['ip_str']} | Port: {result['port']} | Org: {result.get('org')}\n"
+        return summary
+    except:
+        return "nothing found in shodan"
+
+##########################
+#virustotal
+##########################  
+def check_domain_virustotal(domain):
+    url = f"https://www.virustotal.com/api/v3/domains/{domain}"
+    headers = {"x-apikey": "your-api-key"}
+    r = requests.get(url, headers=headers)
+
+    if r.status_code != 200:
+        return {"error": r.text}
+
+    data = r.json()
+
+    stats = data.get("data", {}).get("attributes", {}).get("last_analysis_stats", {})
+
+    malicious = stats.get("malicious", 0)
+    suspicious = stats.get("suspicious", 0)
+
+    if malicious > 0 or suspicious > 0:
+        status ="domain is malicious"
+    else:
+        status = "domain is clean" 
+    return status
+
+##########################
+#ssl_check
+##########################      
+def check_ssl(domain):
+    try:
+        ctx = ssl.create_default_context()
+        with socket.create_connection((domain, 443), timeout=5) as sock:
+            with ctx.wrap_socket(sock, server_hostname=domain) as ssock:
+                cert = ssock.getpeercert()
+
+        # Convertir la date d'expiration en datetime timezone-aware UTC
+        exp_date = datetime.datetime.strptime(cert['notAfter'], "%b %d %H:%M:%S %Y %Z")
+        # Assurer que exp_date est en UTC
+        exp_date = exp_date.replace(tzinfo=datetime.timezone.utc)
+
+        # Comparer avec l'heure actuelle UTC
+        now = datetime.datetime.now(datetime.timezone.utc)
+        days_left = (exp_date - now).days
+
+        return {
+            "status": "ok",
+            "issuer": dict(x[0] for x in cert["issuer"]),
+            "subject": dict(x[0] for x in cert["subject"]),
+            "expire_date": cert["notAfter"],
+            "days_left": days_left
+        }
+
+    except:
+        return
--- a/recon/email_recon.py
+++ b/recon/email_recon.py
@@ -0,0 +1,5 @@
+import requests
+
+def search_breaches(email):
+    url = f"https://leakcheck.io/api/public?check={email}"
+    return requests.get(url).json()
--- a/recon/ip_recon.py
+++ b/recon/ip_recon.py
@@ -0,0 +1,8 @@
+import requests
+
+def check_ip(ip):
+    url = "https://api.abuseipdb.com/api/v2/check"
+    headers = {"Key": "your-api-key", "Accept": "application/json"}
+    params = {"ipAddress": ip, "maxAgeInDays": 90}
+    r = requests.get(url, headers=headers, params=params)
+    return r.json()
--- a/requirements.txt
+++ b/requirements.txt
@@ -0,0 +1,89 @@
+aiohappyeyeballs==2.6.1
+aiohttp==3.12.14
+aioresponses==0.7.8
+aiosignal==1.4.0
+altair==5.5.0
+annotated-types==0.7.0
+appdirs==1.4.4
+attrs==25.3.0
+beautifulsoup4==4.13.4
+blinker==1.9.0
+builtwith==1.3.4
+cachetools==6.1.0
+certifi==2025.7.9
+cffi==1.17.1
+charset-normalizer==3.4.2
+click==8.2.1
+click-plugins==1.1.1.2
+colorama==0.4.6
+cryptography==44.0.3
+dnspython==2.7.0
+filelock==3.18.0
+frozenlist==1.7.0
+gitdb==4.0.12
+GitPython==3.1.44
+h11==0.16.0
+httpretty==1.1.4
+huepy==1.2.1
+idna==3.10
+importlib_metadata==8.7.0
+Jinja2==3.1.6
+jsonschema==4.24.0
+jsonschema-specifications==2025.4.1
+lxml==6.0.0
+MarkupSafe==3.0.2
+multidict==6.6.3
+narwhals==1.46.0
+nassl==5.3.1
+numpy==2.3.1
+outcome==1.3.0.post0
+packaging==25.0
+pandas==2.3.1
+pillow==11.3.0
+propcache==0.3.2
+protobuf==6.31.1
+pyarrow==20.0.0
+pycparser==2.22
+pydantic==2.12.4
+pydantic_core==2.41.5
+pydeck==0.9.1
+pyee==11.1.1
+pyppeteer==2.0.0
+PySocks==1.7.1
+python-dateutil==2.9.0.post0
+python-Wappalyzer==0.3.1
+python-whois==0.9.5
+pytz==2025.2
+referencing==0.36.2
+regex==2024.11.6
+requests==2.32.4
+requests-file==2.1.0
+rpds-py==0.26.0
+selenium==4.34.2
+setuptools==66.1.1
+shodan==1.31.0
+six==1.17.0
+smmap==5.0.2
+sniffio==1.3.1
+sortedcontainers==2.4.0
+soupsieve==2.7
+streamlit==1.46.1
+tenacity==9.1.2
+tldextract==5.3.0
+tls_parser==2.0.2
+toml==0.10.2
+tornado==6.5.1
+tqdm==4.67.1
+trio==0.30.0
+trio-websocket==0.12.2
+typing-inspection==0.4.2
+typing_extensions==4.14.1
+tzdata==2025.2
+urllib3==1.26.20
+watchdog==6.0.0
+websocket-client==1.8.0
+websockets==10.4
+wsproto==1.2.0
+xlsxwriter==3.2.5
+yarl==1.20.1
+zipp==3.23.0
--- a/utils/format.py
+++ b/utils/format.py
@@ -0,0 +1,20 @@
+import os
+import re
+
+def save_report(text, path="reports/report.md"):
+    os.makedirs(os.path.dirname(path), exist_ok=True)
+    with open(path, "w", encoding="utf-8") as f:
+        f.write(text)
+    #print(f"[+] Report saved to {path}")
+
+# Helper to detect input type
+def is_email(target):
+    return re.match(r"[^@]+@[^@]+\.[^@]+", target)
+
+def is_ip(target):
+    return re.match(r"\b(?:[0-9]{1,3}\.){3}[0-9]{1,3}\b", target)
+
+def is_domain(target):
+    # domain regex (RFC 1035 simplified)
+    domain_regex = r"^(?!-)([a-zA-Z0-9-]{1,63}\.)+[A-Za-z]{2,}$"
+    return re.match(domain_regex, target) is not None