mirror of
https://github.com/mouna23/OSINT-with-LLM.git
synced 2026-02-12 20:52:45 +00:00
first commit
This commit is contained in:
86
README.md
Normal file
86
README.md
Normal file
@@ -0,0 +1,86 @@
|
||||
# POC : OSINT with LLM
|
||||
|
||||
This repository demonstrates domain, IP, and email reconnaissance with **LLM-powered** security reporting..
|
||||
|
||||
---
|
||||
|
||||
## Overview
|
||||
|
||||
The project is divided into **two main components**:
|
||||
|
||||
1. **Recon Modules**
|
||||
2. **LLM Analysis and reporting**
|
||||
|
||||
---
|
||||
|
||||
## Recon Modules
|
||||
|
||||
### **Purpose**
|
||||
Gathering information about an ip, domain or email
|
||||
|
||||
### **Approach**
|
||||
- **Domain OSINT:**
|
||||
* WHOIS Lookup
|
||||
|
||||
* Shodan Info Gathering
|
||||
|
||||
* SSL Certificate Validation
|
||||
|
||||
* VirusTotal "malicious/clean" status
|
||||
- **IP Recon:**
|
||||
* AbuseIPDB score & classification
|
||||
- **Email Recon:**
|
||||
* Breach/exposure lookup
|
||||
|
||||
### **LLM Analysis and reporting**
|
||||
|
||||
* Converts technical OSINT into human-readable summaries
|
||||
|
||||
* Extracts key findings & risk insights
|
||||
|
||||
* Generates reports
|
||||
|
||||
## **Required API keys for OSINT modules**
|
||||
|
||||
* VT_API_KEY=your_virustotal_api_key
|
||||
* ABUSEIPDB_KEY=your_abuseipdb_api_key
|
||||
* SHODAN_KEY=your_shodan_api_key
|
||||
|
||||
## **Usage**
|
||||
### **Install dependencies**
|
||||
```bash
|
||||
pip3 install -r requirements.txt
|
||||
```
|
||||
### **Demo**
|
||||
```bash
|
||||
python3 main.py
|
||||
```
|
||||
When finished:
|
||||
|
||||
* OSINT recon runs
|
||||
|
||||
* LLM analyzes results
|
||||
|
||||
* A report is saved in /reports/
|
||||
|
||||
#### **Demo with domain**
|
||||
|
||||

|
||||
|
||||

|
||||
|
||||
#### **Demo with ip**
|
||||
|
||||

|
||||
|
||||

|
||||
|
||||
#### **Demo with email**
|
||||
|
||||

|
||||
|
||||

|
||||
|
||||
## Notes
|
||||
* The scripts are designed to be run locally, in a Python 3.13+ environment with the listed dependencies.
|
||||
* Install Ollama on your machine and add the MISTRAL model.
|
||||
BIN
images/ll_email_1.png
Normal file
BIN
images/ll_email_1.png
Normal file
Binary file not shown.
|
After Width: | Height: | Size: 43 KiB |
BIN
images/llm_domain_1.png
Normal file
BIN
images/llm_domain_1.png
Normal file
Binary file not shown.
|
After Width: | Height: | Size: 9.1 KiB |
BIN
images/llm_domain_2.png
Normal file
BIN
images/llm_domain_2.png
Normal file
Binary file not shown.
|
After Width: | Height: | Size: 111 KiB |
BIN
images/llm_email_2.png
Normal file
BIN
images/llm_email_2.png
Normal file
Binary file not shown.
|
After Width: | Height: | Size: 39 KiB |
BIN
images/llm_ip_1.png
Normal file
BIN
images/llm_ip_1.png
Normal file
Binary file not shown.
|
After Width: | Height: | Size: 18 KiB |
BIN
images/llm_ip_2.png
Normal file
BIN
images/llm_ip_2.png
Normal file
Binary file not shown.
|
After Width: | Height: | Size: 94 KiB |
70
llm/llm_analysis.py
Normal file
70
llm/llm_analysis.py
Normal file
@@ -0,0 +1,70 @@
|
||||
import requests
|
||||
|
||||
def ask_llm(prompt, model="mistral"):
|
||||
print("[LLM] Sending prompt...")
|
||||
try:
|
||||
res = requests.post("http://localhost:11434/api/generate", json={
|
||||
"model": model,
|
||||
"prompt": prompt,
|
||||
"stream": False
|
||||
})
|
||||
if res.status_code != 200:
|
||||
print(f"[LLM] Error: {res.status_code} - {res.text}")
|
||||
return "[LLM Error]"
|
||||
output = res.json().get("response", "[No response]")
|
||||
#print("\n[LLM Response]:\n", output)
|
||||
return output.strip()
|
||||
except Exception as e:
|
||||
print(f"[LLM] Exception: {e}")
|
||||
return "[LLM Exception]"
|
||||
|
||||
#####################################
|
||||
#summarize with LLM if it is domain
|
||||
#####################################
|
||||
def summarize_domain(raw_data):
|
||||
prompt = f"""
|
||||
You are an OSINT analyst.
|
||||
Analyze this domain data and summarize key security findings:
|
||||
- WHOIS or registrant issues
|
||||
- Subdomain risks
|
||||
- Is it malicious or not based on virustotal result
|
||||
- Action recommendations
|
||||
|
||||
DATA:
|
||||
{raw_data}
|
||||
"""
|
||||
return ask_llm(prompt)
|
||||
|
||||
#####################################
|
||||
#summarize with LLM if it is email
|
||||
#####################################
|
||||
def summarize_email(raw_data):
|
||||
prompt = f"""
|
||||
You are a breach analyst.
|
||||
Summarize this email breach data:
|
||||
- Sources of exposure
|
||||
- Likely leaked data types
|
||||
- Risk level
|
||||
- Remediation advice
|
||||
|
||||
DATA:
|
||||
{raw_data}
|
||||
"""
|
||||
return ask_llm(prompt)
|
||||
|
||||
#####################################
|
||||
#summarize with LLM if it is ip
|
||||
#####################################
|
||||
def summarize_ip(raw_data):
|
||||
prompt = f"""
|
||||
You are a SOC (Security Operations Center) analyst.
|
||||
Summarize this IP intelligence report:
|
||||
- Whether the IP is malicious
|
||||
- Number of abuse reports
|
||||
- Type of malicious activity
|
||||
- Action recommendations
|
||||
|
||||
DATA:
|
||||
{raw_data}
|
||||
"""
|
||||
return ask_llm(prompt)
|
||||
71
main.py
Normal file
71
main.py
Normal file
@@ -0,0 +1,71 @@
|
||||
from recon.domain_recon import get_whois, search_shodan, check_domain_virustotal, check_ssl
|
||||
from recon.email_recon import search_breaches
|
||||
from recon.ip_recon import check_ip
|
||||
from llm.llm_analysis import summarize_domain, summarize_email, summarize_ip
|
||||
from utils.format import save_report, is_ip, is_email, is_domain
|
||||
|
||||
# Email data logic
|
||||
def get_email_data(email):
|
||||
print(f"[+] Recon on: {email}")
|
||||
subs = search_breaches(email)
|
||||
return subs
|
||||
|
||||
# Domain data logic
|
||||
def get_domain_data(domain):
|
||||
print(f"[+] Recon on: {domain}")
|
||||
whois = get_whois(domain)
|
||||
if not whois:
|
||||
whois = "Not found"
|
||||
shodandata = search_shodan(domain)
|
||||
if not shodandata:
|
||||
shodandata = "Not found"
|
||||
virustotal_status = check_domain_virustotal(domain)
|
||||
if not virustotal_status:
|
||||
virustotal_status = "Not found"
|
||||
certificate = check_ssl(domain)
|
||||
if not certificate:
|
||||
certificate = "Not found"
|
||||
raw_data = (
|
||||
f"WHOIS:\n{whois}\n\n"
|
||||
f"Shodan:\n{shodandata}\n\n"
|
||||
f"SSL check:\n{certificate}\n\n"
|
||||
f"Virustotal status:\n{virustotal_status}"
|
||||
)
|
||||
return raw_data
|
||||
|
||||
def get_ip_data(ip):
|
||||
result = check_ip(ip)
|
||||
return result
|
||||
|
||||
###############
|
||||
# Entry point
|
||||
##############
|
||||
def run(target):
|
||||
valid_format = True
|
||||
if is_email(target):
|
||||
raw_data = get_email_data(target)
|
||||
summary = summarize_email(raw_data)
|
||||
|
||||
elif is_ip(target):
|
||||
raw_data = get_ip_data(target) # We'll add this next
|
||||
summary = summarize_ip(raw_data)
|
||||
|
||||
elif is_domain(target):
|
||||
raw_data = get_domain_data(target)
|
||||
summary = summarize_domain(raw_data)
|
||||
else:
|
||||
valid_format = False
|
||||
print("\nInvalid target format. Please provide a valid email, IP, or domain.")
|
||||
|
||||
if valid_format:
|
||||
#print("\n[FINAL SUMMARY]:\n", summary) # this prints to console
|
||||
save_report(summary) # this writes to report.md
|
||||
print("\n[+] Report saved to reports/report.md")
|
||||
|
||||
# CLI entry
|
||||
if __name__ == "__main__":
|
||||
import sys
|
||||
if len(sys.argv) != 2:
|
||||
print("Usage: python main.py <email_or_domain_or_ip>")
|
||||
else:
|
||||
run(sys.argv[1])
|
||||
84
recon/domain_recon.py
Normal file
84
recon/domain_recon.py
Normal file
@@ -0,0 +1,84 @@
|
||||
import requests
|
||||
import whois
|
||||
import shodan
|
||||
import ssl
|
||||
import socket
|
||||
import datetime
|
||||
|
||||
##########################
|
||||
#whois
|
||||
##########################
|
||||
def get_whois(domain):
|
||||
try:
|
||||
w = whois.whois(domain)
|
||||
return w.text
|
||||
except Exception as e:
|
||||
return f"Error: {e}"
|
||||
|
||||
##########################
|
||||
#shodan
|
||||
##########################
|
||||
def search_shodan(domain):
|
||||
api = shodan.Shodan("your-api-key")
|
||||
try:
|
||||
results = api.search(domain)
|
||||
summary = ""
|
||||
for result in results['matches'][:5]:
|
||||
summary += f"IP: {result['ip_str']} | Port: {result['port']} | Org: {result.get('org')}\n"
|
||||
return summary
|
||||
except:
|
||||
return "nothing found in shodan"
|
||||
|
||||
##########################
|
||||
#virustotal
|
||||
##########################
|
||||
def check_domain_virustotal(domain):
|
||||
url = f"https://www.virustotal.com/api/v3/domains/{domain}"
|
||||
headers = {"x-apikey": "your-api-key"}
|
||||
r = requests.get(url, headers=headers)
|
||||
|
||||
if r.status_code != 200:
|
||||
return {"error": r.text}
|
||||
|
||||
data = r.json()
|
||||
|
||||
stats = data.get("data", {}).get("attributes", {}).get("last_analysis_stats", {})
|
||||
|
||||
malicious = stats.get("malicious", 0)
|
||||
suspicious = stats.get("suspicious", 0)
|
||||
|
||||
if malicious > 0 or suspicious > 0:
|
||||
status ="domain is malicious"
|
||||
else:
|
||||
status = "domain is clean"
|
||||
return status
|
||||
|
||||
##########################
|
||||
#ssl_check
|
||||
##########################
|
||||
def check_ssl(domain):
|
||||
try:
|
||||
ctx = ssl.create_default_context()
|
||||
with socket.create_connection((domain, 443), timeout=5) as sock:
|
||||
with ctx.wrap_socket(sock, server_hostname=domain) as ssock:
|
||||
cert = ssock.getpeercert()
|
||||
|
||||
# Convertir la date d'expiration en datetime timezone-aware UTC
|
||||
exp_date = datetime.datetime.strptime(cert['notAfter'], "%b %d %H:%M:%S %Y %Z")
|
||||
# Assurer que exp_date est en UTC
|
||||
exp_date = exp_date.replace(tzinfo=datetime.timezone.utc)
|
||||
|
||||
# Comparer avec l'heure actuelle UTC
|
||||
now = datetime.datetime.now(datetime.timezone.utc)
|
||||
days_left = (exp_date - now).days
|
||||
|
||||
return {
|
||||
"status": "ok",
|
||||
"issuer": dict(x[0] for x in cert["issuer"]),
|
||||
"subject": dict(x[0] for x in cert["subject"]),
|
||||
"expire_date": cert["notAfter"],
|
||||
"days_left": days_left
|
||||
}
|
||||
|
||||
except:
|
||||
return
|
||||
5
recon/email_recon.py
Normal file
5
recon/email_recon.py
Normal file
@@ -0,0 +1,5 @@
|
||||
import requests
|
||||
|
||||
def search_breaches(email):
|
||||
url = f"https://leakcheck.io/api/public?check={email}"
|
||||
return requests.get(url).json()
|
||||
8
recon/ip_recon.py
Normal file
8
recon/ip_recon.py
Normal file
@@ -0,0 +1,8 @@
|
||||
import requests
|
||||
|
||||
def check_ip(ip):
|
||||
url = "https://api.abuseipdb.com/api/v2/check"
|
||||
headers = {"Key": "your-api-key", "Accept": "application/json"}
|
||||
params = {"ipAddress": ip, "maxAgeInDays": 90}
|
||||
r = requests.get(url, headers=headers, params=params)
|
||||
return r.json()
|
||||
89
requirements.txt
Normal file
89
requirements.txt
Normal file
@@ -0,0 +1,89 @@
|
||||
aiohappyeyeballs==2.6.1
|
||||
aiohttp==3.12.14
|
||||
aioresponses==0.7.8
|
||||
aiosignal==1.4.0
|
||||
altair==5.5.0
|
||||
annotated-types==0.7.0
|
||||
appdirs==1.4.4
|
||||
attrs==25.3.0
|
||||
beautifulsoup4==4.13.4
|
||||
blinker==1.9.0
|
||||
builtwith==1.3.4
|
||||
cachetools==6.1.0
|
||||
certifi==2025.7.9
|
||||
cffi==1.17.1
|
||||
charset-normalizer==3.4.2
|
||||
click==8.2.1
|
||||
click-plugins==1.1.1.2
|
||||
colorama==0.4.6
|
||||
cryptography==44.0.3
|
||||
dnspython==2.7.0
|
||||
filelock==3.18.0
|
||||
frozenlist==1.7.0
|
||||
gitdb==4.0.12
|
||||
GitPython==3.1.44
|
||||
h11==0.16.0
|
||||
httpretty==1.1.4
|
||||
huepy==1.2.1
|
||||
idna==3.10
|
||||
importlib_metadata==8.7.0
|
||||
Jinja2==3.1.6
|
||||
jsonschema==4.24.0
|
||||
jsonschema-specifications==2025.4.1
|
||||
lxml==6.0.0
|
||||
MarkupSafe==3.0.2
|
||||
multidict==6.6.3
|
||||
narwhals==1.46.0
|
||||
nassl==5.3.1
|
||||
numpy==2.3.1
|
||||
outcome==1.3.0.post0
|
||||
packaging==25.0
|
||||
pandas==2.3.1
|
||||
pillow==11.3.0
|
||||
propcache==0.3.2
|
||||
protobuf==6.31.1
|
||||
pyarrow==20.0.0
|
||||
pycparser==2.22
|
||||
pydantic==2.12.4
|
||||
pydantic_core==2.41.5
|
||||
pydeck==0.9.1
|
||||
pyee==11.1.1
|
||||
pyppeteer==2.0.0
|
||||
PySocks==1.7.1
|
||||
python-dateutil==2.9.0.post0
|
||||
python-Wappalyzer==0.3.1
|
||||
python-whois==0.9.5
|
||||
pytz==2025.2
|
||||
referencing==0.36.2
|
||||
regex==2024.11.6
|
||||
requests==2.32.4
|
||||
requests-file==2.1.0
|
||||
rpds-py==0.26.0
|
||||
selenium==4.34.2
|
||||
setuptools==66.1.1
|
||||
shodan==1.31.0
|
||||
six==1.17.0
|
||||
smmap==5.0.2
|
||||
sniffio==1.3.1
|
||||
sortedcontainers==2.4.0
|
||||
soupsieve==2.7
|
||||
streamlit==1.46.1
|
||||
tenacity==9.1.2
|
||||
tldextract==5.3.0
|
||||
tls_parser==2.0.2
|
||||
toml==0.10.2
|
||||
tornado==6.5.1
|
||||
tqdm==4.67.1
|
||||
trio==0.30.0
|
||||
trio-websocket==0.12.2
|
||||
typing-inspection==0.4.2
|
||||
typing_extensions==4.14.1
|
||||
tzdata==2025.2
|
||||
urllib3==1.26.20
|
||||
watchdog==6.0.0
|
||||
websocket-client==1.8.0
|
||||
websockets==10.4
|
||||
wsproto==1.2.0
|
||||
xlsxwriter==3.2.5
|
||||
yarl==1.20.1
|
||||
zipp==3.23.0
|
||||
20
utils/format.py
Normal file
20
utils/format.py
Normal file
@@ -0,0 +1,20 @@
|
||||
import os
|
||||
import re
|
||||
|
||||
def save_report(text, path="reports/report.md"):
|
||||
os.makedirs(os.path.dirname(path), exist_ok=True)
|
||||
with open(path, "w", encoding="utf-8") as f:
|
||||
f.write(text)
|
||||
#print(f"[+] Report saved to {path}")
|
||||
|
||||
# Helper to detect input type
|
||||
def is_email(target):
|
||||
return re.match(r"[^@]+@[^@]+\.[^@]+", target)
|
||||
|
||||
def is_ip(target):
|
||||
return re.match(r"\b(?:[0-9]{1,3}\.){3}[0-9]{1,3}\b", target)
|
||||
|
||||
def is_domain(target):
|
||||
# domain regex (RFC 1035 simplified)
|
||||
domain_regex = r"^(?!-)([a-zA-Z0-9-]{1,63}\.)+[A-Za-z]{2,}$"
|
||||
return re.match(domain_regex, target) is not None
|
||||
Reference in New Issue
Block a user