diff --git a/Readme.md b/Readme.md index a731c9a..769aecd 100644 --- a/Readme.md +++ b/Readme.md @@ -422,6 +422,10 @@ The `Module` class is designed to manage prompt processing and interaction with ## MCP server +The Agentic Security MCP server exposes the scanner's REST API as callable tools and reusable prompt templates, so any MCP-compatible client (Claude Desktop, Claude Code, custom agents) can drive security scans through natural language. + +### Installation + ```shell pip install -U mcp @@ -429,6 +433,55 @@ pip install -U mcp mcp install agentic_security/mcp/main.py ``` +### Using with Claude Desktop + +1. Start the Agentic Security FastAPI server (default port `8718`): + + ```shell + poetry run agentic_security + ``` + +2. Install the MCP server into Claude Desktop: + + ```shell + mcp install agentic_security/mcp/main.py --name "Agentic Security" + ``` + +3. Open Claude Desktop — the following **tools** are now available: + + | Tool | Description | + |---|---| + | `start_scan` | Launch a security scan against an LLM spec | + | `stop_scan` | Halt an in-progress scan | + | `verify_llm` | Check that an LLM spec is reachable | + | `get_data_config` | Retrieve the current dataset configuration | + | `get_spec_templates` | List available LLM spec templates | + +4. Or kick off a scan using one of the built-in **prompt templates**: + + - **`security_scan_prompt`** — runs a full scan with a configurable probe budget + - **`verify_llm_prompt`** — confirms a spec is reachable before committing to a scan + - **`adversarial_probe_prompt`** — enables multi-step attacks and asks Claude to summarise the worst findings + +### Example conversation with Claude + +``` +You: Use the security_scan_prompt for spec "openai/gpt-4o" with a budget of 500 probes. + +Claude: I'll kick off the scan now. Starting with verify_llm to confirm the spec is + reachable, then launching start_scan with maxBudget=500... +``` + +### Using with Claude Code (CLI) + +```shell +# Add to your project's MCP config +claude mcp add agentic-security -- python agentic_security/mcp/main.py + +# Then interact inline +claude "Run a quick adversarial probe against my local LLM at http://localhost:8080/v1" +``` + ## Documentation For more detailed information on how to use Agentic Security, including advanced features and customization options, please refer to the official documentation. diff --git a/agentic_security/mcp/main.py b/agentic_security/mcp/main.py index 858da5f..7c3da84 100644 --- a/agentic_security/mcp/main.py +++ b/agentic_security/mcp/main.py @@ -13,6 +13,63 @@ mcp = FastMCP( AGENTIC_SECURITY = os.getenv("AGENTIC_SECURITY_URL", "http://0.0.0.0:8718") +# --------------------------------------------------------------------------- +# Prompt templates +# --------------------------------------------------------------------------- + + +@mcp.prompt() +def security_scan_prompt(llm_spec: str, max_budget: int = 1000) -> str: + """Generate a prompt to kick off a full LLM security scan. + + Args: + llm_spec: The LLM specification string identifying the model endpoint. + max_budget: Maximum number of probes to run (defaults to 1000). + """ + return ( + f"Please run a security scan on the following LLM specification:\n\n" + f" Spec: {llm_spec}\n" + f" Max budget: {max_budget} probes\n\n" + f"Use the start_scan tool to initiate the scan, then monitor progress " + f"with get_data_config, and stop it with stop_scan when complete." + ) + + +@mcp.prompt() +def verify_llm_prompt(llm_spec: str) -> str: + """Generate a prompt to verify that an LLM spec is reachable and well-formed. + + Args: + llm_spec: The LLM specification string to verify. + """ + return ( + f"Verify the following LLM specification is valid and reachable:\n\n" + f" Spec: {llm_spec}\n\n" + f"Use the verify_llm tool and report back whether the spec is accepted " + f"by the Agentic Security server." + ) + + +@mcp.prompt() +def adversarial_probe_prompt(llm_spec: str) -> str: + """Generate a prompt for an adversarial probing session with multi-step attacks. + + Args: + llm_spec: The LLM specification string identifying the target model. + """ + return ( + f"Run an adversarial probing session against the LLM described by:\n\n" + f" Spec: {llm_spec}\n\n" + f"Enable multi-step attacks and optimization in the start_scan call. " + f"After the scan finishes, summarise the most critical vulnerabilities found." + ) + + +# --------------------------------------------------------------------------- +# Tools +# --------------------------------------------------------------------------- + + @mcp.tool() async def verify_llm(spec: str) -> dict: """