first commit

This commit is contained in:
mouna23
2025-11-20 10:49:44 +01:00
commit 502ef48b5c
14 changed files with 433 additions and 0 deletions

86
README.md Normal file
View File

@@ -0,0 +1,86 @@
# POC : OSINT with LLM
This repository demonstrates domain, IP, and email reconnaissance with **LLM-powered** security reporting..
---
## Overview
The project is divided into **two main components**:
1. **Recon Modules**
2. **LLM Analysis and reporting**
---
## Recon Modules
### **Purpose**
Gathering information about an ip, domain or email
### **Approach**
- **Domain OSINT:**
* WHOIS Lookup
* Shodan Info Gathering
* SSL Certificate Validation
* VirusTotal "malicious/clean" status
- **IP Recon:**
* AbuseIPDB score & classification
- **Email Recon:**
* Breach/exposure lookup
### **LLM Analysis and reporting**
* Converts technical OSINT into human-readable summaries
* Extracts key findings & risk insights
* Generates reports
## **Required API keys for OSINT modules**
* VT_API_KEY=your_virustotal_api_key
* ABUSEIPDB_KEY=your_abuseipdb_api_key
* SHODAN_KEY=your_shodan_api_key
## **Usage**
### **Install dependencies**
```bash
pip3 install -r requirements.txt
```
### **Demo**
```bash
python3 main.py
```
When finished:
* OSINT recon runs
* LLM analyzes results
* A report is saved in /reports/
#### **Demo with domain**
![Project Logo](images/llm_domain_1.png)
![Project Logo](images/llm_domain_2.png)
#### **Demo with ip**
![Project Logo](images/llm_ip_1.png)
![Project Logo](images/llm_ip_2.png)
#### **Demo with email**
![Project Logo](images/ll_email_1.png)
![Project Logo](images/llm_email_2.png)
## Notes
* The scripts are designed to be run locally, in a Python 3.13+ environment with the listed dependencies.
* Install Ollama on your machine and add the MISTRAL model.

BIN
images/ll_email_1.png Normal file

Binary file not shown.

After

Width:  |  Height:  |  Size: 43 KiB

BIN
images/llm_domain_1.png Normal file

Binary file not shown.

After

Width:  |  Height:  |  Size: 9.1 KiB

BIN
images/llm_domain_2.png Normal file

Binary file not shown.

After

Width:  |  Height:  |  Size: 111 KiB

BIN
images/llm_email_2.png Normal file

Binary file not shown.

After

Width:  |  Height:  |  Size: 39 KiB

BIN
images/llm_ip_1.png Normal file

Binary file not shown.

After

Width:  |  Height:  |  Size: 18 KiB

BIN
images/llm_ip_2.png Normal file

Binary file not shown.

After

Width:  |  Height:  |  Size: 94 KiB

70
llm/llm_analysis.py Normal file
View File

@@ -0,0 +1,70 @@
import requests
def ask_llm(prompt, model="mistral"):
print("[LLM] Sending prompt...")
try:
res = requests.post("http://localhost:11434/api/generate", json={
"model": model,
"prompt": prompt,
"stream": False
})
if res.status_code != 200:
print(f"[LLM] Error: {res.status_code} - {res.text}")
return "[LLM Error]"
output = res.json().get("response", "[No response]")
#print("\n[LLM Response]:\n", output)
return output.strip()
except Exception as e:
print(f"[LLM] Exception: {e}")
return "[LLM Exception]"
#####################################
#summarize with LLM if it is domain
#####################################
def summarize_domain(raw_data):
prompt = f"""
You are an OSINT analyst.
Analyze this domain data and summarize key security findings:
- WHOIS or registrant issues
- Subdomain risks
- Is it malicious or not based on virustotal result
- Action recommendations
DATA:
{raw_data}
"""
return ask_llm(prompt)
#####################################
#summarize with LLM if it is email
#####################################
def summarize_email(raw_data):
prompt = f"""
You are a breach analyst.
Summarize this email breach data:
- Sources of exposure
- Likely leaked data types
- Risk level
- Remediation advice
DATA:
{raw_data}
"""
return ask_llm(prompt)
#####################################
#summarize with LLM if it is ip
#####################################
def summarize_ip(raw_data):
prompt = f"""
You are a SOC (Security Operations Center) analyst.
Summarize this IP intelligence report:
- Whether the IP is malicious
- Number of abuse reports
- Type of malicious activity
- Action recommendations
DATA:
{raw_data}
"""
return ask_llm(prompt)

71
main.py Normal file
View File

@@ -0,0 +1,71 @@
from recon.domain_recon import get_whois, search_shodan, check_domain_virustotal, check_ssl
from recon.email_recon import search_breaches
from recon.ip_recon import check_ip
from llm.llm_analysis import summarize_domain, summarize_email, summarize_ip
from utils.format import save_report, is_ip, is_email, is_domain
# Email data logic
def get_email_data(email):
print(f"[+] Recon on: {email}")
subs = search_breaches(email)
return subs
# Domain data logic
def get_domain_data(domain):
print(f"[+] Recon on: {domain}")
whois = get_whois(domain)
if not whois:
whois = "Not found"
shodandata = search_shodan(domain)
if not shodandata:
shodandata = "Not found"
virustotal_status = check_domain_virustotal(domain)
if not virustotal_status:
virustotal_status = "Not found"
certificate = check_ssl(domain)
if not certificate:
certificate = "Not found"
raw_data = (
f"WHOIS:\n{whois}\n\n"
f"Shodan:\n{shodandata}\n\n"
f"SSL check:\n{certificate}\n\n"
f"Virustotal status:\n{virustotal_status}"
)
return raw_data
def get_ip_data(ip):
result = check_ip(ip)
return result
###############
# Entry point
##############
def run(target):
valid_format = True
if is_email(target):
raw_data = get_email_data(target)
summary = summarize_email(raw_data)
elif is_ip(target):
raw_data = get_ip_data(target) # We'll add this next
summary = summarize_ip(raw_data)
elif is_domain(target):
raw_data = get_domain_data(target)
summary = summarize_domain(raw_data)
else:
valid_format = False
print("\nInvalid target format. Please provide a valid email, IP, or domain.")
if valid_format:
#print("\n[FINAL SUMMARY]:\n", summary) # this prints to console
save_report(summary) # this writes to report.md
print("\n[+] Report saved to reports/report.md")
# CLI entry
if __name__ == "__main__":
import sys
if len(sys.argv) != 2:
print("Usage: python main.py <email_or_domain_or_ip>")
else:
run(sys.argv[1])

84
recon/domain_recon.py Normal file
View File

@@ -0,0 +1,84 @@
import requests
import whois
import shodan
import ssl
import socket
import datetime
##########################
#whois
##########################
def get_whois(domain):
try:
w = whois.whois(domain)
return w.text
except Exception as e:
return f"Error: {e}"
##########################
#shodan
##########################
def search_shodan(domain):
api = shodan.Shodan("your-api-key")
try:
results = api.search(domain)
summary = ""
for result in results['matches'][:5]:
summary += f"IP: {result['ip_str']} | Port: {result['port']} | Org: {result.get('org')}\n"
return summary
except:
return "nothing found in shodan"
##########################
#virustotal
##########################
def check_domain_virustotal(domain):
url = f"https://www.virustotal.com/api/v3/domains/{domain}"
headers = {"x-apikey": "your-api-key"}
r = requests.get(url, headers=headers)
if r.status_code != 200:
return {"error": r.text}
data = r.json()
stats = data.get("data", {}).get("attributes", {}).get("last_analysis_stats", {})
malicious = stats.get("malicious", 0)
suspicious = stats.get("suspicious", 0)
if malicious > 0 or suspicious > 0:
status ="domain is malicious"
else:
status = "domain is clean"
return status
##########################
#ssl_check
##########################
def check_ssl(domain):
try:
ctx = ssl.create_default_context()
with socket.create_connection((domain, 443), timeout=5) as sock:
with ctx.wrap_socket(sock, server_hostname=domain) as ssock:
cert = ssock.getpeercert()
# Convertir la date d'expiration en datetime timezone-aware UTC
exp_date = datetime.datetime.strptime(cert['notAfter'], "%b %d %H:%M:%S %Y %Z")
# Assurer que exp_date est en UTC
exp_date = exp_date.replace(tzinfo=datetime.timezone.utc)
# Comparer avec l'heure actuelle UTC
now = datetime.datetime.now(datetime.timezone.utc)
days_left = (exp_date - now).days
return {
"status": "ok",
"issuer": dict(x[0] for x in cert["issuer"]),
"subject": dict(x[0] for x in cert["subject"]),
"expire_date": cert["notAfter"],
"days_left": days_left
}
except:
return

5
recon/email_recon.py Normal file
View File

@@ -0,0 +1,5 @@
import requests
def search_breaches(email):
url = f"https://leakcheck.io/api/public?check={email}"
return requests.get(url).json()

8
recon/ip_recon.py Normal file
View File

@@ -0,0 +1,8 @@
import requests
def check_ip(ip):
url = "https://api.abuseipdb.com/api/v2/check"
headers = {"Key": "your-api-key", "Accept": "application/json"}
params = {"ipAddress": ip, "maxAgeInDays": 90}
r = requests.get(url, headers=headers, params=params)
return r.json()

89
requirements.txt Normal file
View File

@@ -0,0 +1,89 @@
aiohappyeyeballs==2.6.1
aiohttp==3.12.14
aioresponses==0.7.8
aiosignal==1.4.0
altair==5.5.0
annotated-types==0.7.0
appdirs==1.4.4
attrs==25.3.0
beautifulsoup4==4.13.4
blinker==1.9.0
builtwith==1.3.4
cachetools==6.1.0
certifi==2025.7.9
cffi==1.17.1
charset-normalizer==3.4.2
click==8.2.1
click-plugins==1.1.1.2
colorama==0.4.6
cryptography==44.0.3
dnspython==2.7.0
filelock==3.18.0
frozenlist==1.7.0
gitdb==4.0.12
GitPython==3.1.44
h11==0.16.0
httpretty==1.1.4
huepy==1.2.1
idna==3.10
importlib_metadata==8.7.0
Jinja2==3.1.6
jsonschema==4.24.0
jsonschema-specifications==2025.4.1
lxml==6.0.0
MarkupSafe==3.0.2
multidict==6.6.3
narwhals==1.46.0
nassl==5.3.1
numpy==2.3.1
outcome==1.3.0.post0
packaging==25.0
pandas==2.3.1
pillow==11.3.0
propcache==0.3.2
protobuf==6.31.1
pyarrow==20.0.0
pycparser==2.22
pydantic==2.12.4
pydantic_core==2.41.5
pydeck==0.9.1
pyee==11.1.1
pyppeteer==2.0.0
PySocks==1.7.1
python-dateutil==2.9.0.post0
python-Wappalyzer==0.3.1
python-whois==0.9.5
pytz==2025.2
referencing==0.36.2
regex==2024.11.6
requests==2.32.4
requests-file==2.1.0
rpds-py==0.26.0
selenium==4.34.2
setuptools==66.1.1
shodan==1.31.0
six==1.17.0
smmap==5.0.2
sniffio==1.3.1
sortedcontainers==2.4.0
soupsieve==2.7
streamlit==1.46.1
tenacity==9.1.2
tldextract==5.3.0
tls_parser==2.0.2
toml==0.10.2
tornado==6.5.1
tqdm==4.67.1
trio==0.30.0
trio-websocket==0.12.2
typing-inspection==0.4.2
typing_extensions==4.14.1
tzdata==2025.2
urllib3==1.26.20
watchdog==6.0.0
websocket-client==1.8.0
websockets==10.4
wsproto==1.2.0
xlsxwriter==3.2.5
yarl==1.20.1
zipp==3.23.0

20
utils/format.py Normal file
View File

@@ -0,0 +1,20 @@
import os
import re
def save_report(text, path="reports/report.md"):
os.makedirs(os.path.dirname(path), exist_ok=True)
with open(path, "w", encoding="utf-8") as f:
f.write(text)
#print(f"[+] Report saved to {path}")
# Helper to detect input type
def is_email(target):
return re.match(r"[^@]+@[^@]+\.[^@]+", target)
def is_ip(target):
return re.match(r"\b(?:[0-9]{1,3}\.){3}[0-9]{1,3}\b", target)
def is_domain(target):
# domain regex (RFC 1035 simplified)
domain_regex = r"^(?!-)([a-zA-Z0-9-]{1,63}\.)+[A-Za-z]{2,}$"
return re.match(domain_regex, target) is not None