diff --git a/Readme.md b/Readme.md index 43a9092..e1da58d 100644 --- a/Readme.md +++ b/Readme.md @@ -1,6 +1,5 @@

-

Agentic Security

@@ -35,17 +34,14 @@ ## Features - - Comprehensive Threat Detection 🛡️: Scans for a wide array of LLM vulnerabilities including prompt injection, jailbreaking, hallucinations, biases, and other malicious exploitation attempts. - - OWASP Top 10 for LLMs scan: to test the list of the most critical LLM vulnerabilities. - - Privacy-centric Architecture 🔒: Ensures that all data scanning and analysis occur on-premise or in a local environment, with no external data transmission, maintaining strict data privacy. - - Comprehensive Reporting Tools 📊: Offers detailed reports of vulnerability, helping teams to quickly understand and respond to security incidents. - - Customizable Rule Sets 🛠️: Allows users to define custom attack rules and parameters to meet specific prompt attacks needs and compliance standards. - - +- Comprehensive Threat Detection 🛡️: Scans for a wide array of LLM vulnerabilities including prompt injection, jailbreaking, hallucinations, biases, and other malicious exploitation attempts. +- OWASP Top 10 for LLMs scan: to test the list of the most critical LLM vulnerabilities. +- Privacy-centric Architecture 🔒: Ensures that all data scanning and analysis occur on-premise or in a local environment, with no external data transmission, maintaining strict data privacy. +- Comprehensive Reporting Tools 📊: Offers detailed reports of vulnerability, helping teams to quickly understand and respond to security incidents. +- Customizable Rule Sets 🛠️: Allows users to define custom attack rules and parameters to meet specific prompt attacks needs and compliance standards. Note: Please be aware that Langalf is designed as a safety scanner tool and not a foolproof solution. It cannot guarantee complete protection against all possible threats. - ## 📦 Installation To get started with Langalf, simply install the package using pip: @@ -77,7 +73,6 @@ langalf --port=PORT --host=HOST ``` - ## LLM kwargs Langalf uses plain text HTTP spec like: @@ -97,13 +92,14 @@ Content-Type: application/json Where `<>` will be replaced with the actual attack vector during the scan, insert the `Bearer XXXXX` header value with your app credentials. - ### Adding LLM integration templates TBD + ``` .... ``` + ## Adding own dataset To add your own dataset you can place one or multiples csv files with `prompt` column, this data will be loaded on `langalf` startup @@ -116,6 +112,7 @@ To add your own dataset you can place one or multiples csv files with `prompt` c ## Extending dataset collections 1. Add new metadata to langalf.probe_data.REGISTRY + ```python { "dataset_name": "markush1/LLM-Jailbreak-Classifier", @@ -131,7 +128,6 @@ To add your own dataset you can place one or multiples csv files with `prompt` c and implement loader into - ```python @dataclass class ProbeDataset: @@ -192,11 +188,11 @@ class Stenography: approx_cost=0.0, ) ``` + ## Probe endpoint In the example of custom integration, we use `/v1/self-probe` for the sake of integration testing. - ```python POST https://langalf-preview.vercel.app/v1/self-probe Authorization: Bearer XXXXX @@ -207,6 +203,7 @@ Content-Type: application/json } ``` + This endpoint randomly mimics the refusal of a fake LLM. ```python @@ -243,15 +240,13 @@ For more detailed information on how to use Langalf, including advanced features ## Roadmap and Future Goals -- [ ] Expand dataset variety -- [ ] Introduce two new attack vectors -- [ ] Develop initial attacker LLM -- [ ] Complete integration of OWASP Top 10 classification +- \[ \] Expand dataset variety +- \[ \] Introduce two new attack vectors +- \[ \] Develop initial attacker LLM +- \[ \] Complete integration of OWASP Top 10 classification Note: All dates are tentative and subject to change based on project progress and priorities. - - ## 👋 Contributing Contributions to Langalf are welcome! If you'd like to contribute, please follow these steps: diff --git a/langalf/app.py b/langalf/app.py index 9127949..301a7cf 100644 --- a/langalf/app.py +++ b/langalf/app.py @@ -3,7 +3,7 @@ import sys from datetime import datetime from pathlib import Path -from fastapi import BackgroundTasks, FastAPI, HTTPException +from fastapi import BackgroundTasks, FastAPI, HTTPException, Response from fastapi.middleware.cors import CORSMiddleware from fastapi.responses import FileResponse, StreamingResponse from loguru import logger @@ -13,6 +13,7 @@ from .http_spec import LLMSpec from .probe_actor import fuzzer from .probe_actor.refusal import REFUSAL_MARKS from .probe_data import REGISTRY +from .report_chart import plot_security_report logger.remove(0) logger.add( @@ -138,3 +139,13 @@ async def failures_csv(): if not Path("failures.csv").exists(): return {"error": "No failures found"} return FileResponse("failures.csv") + + +class Table(BaseModel): + table: list[dict] + + +@app.post("/plot.jpeg", response_class=Response) +async def get_plot(table: Table): + buf = plot_security_report(table.table) + return StreamingResponse(buf, media_type="image/jpeg") diff --git a/langalf/probe_actor/refusal.py b/langalf/probe_actor/refusal.py index d7c67dc..610eb61 100644 --- a/langalf/probe_actor/refusal.py +++ b/langalf/probe_actor/refusal.py @@ -31,8 +31,7 @@ REFUSAL_MARKS = [ def check_refusal(response: str, refusal_phrases: list = REFUSAL_MARKS) -> bool: - """ - Check if the response contains any phrases indicating refusal to answer. + """Check if the response contains any phrases indicating refusal to answer. Args: - response (str): The response from the language model. diff --git a/langalf/report_chart.py b/langalf/report_chart.py new file mode 100644 index 0000000..0de1a54 --- /dev/null +++ b/langalf/report_chart.py @@ -0,0 +1,74 @@ +from io import BytesIO +from textwrap import wrap + +import matplotlib as mpl +import matplotlib.pyplot as plt +import numpy as np +import pandas as pd +from matplotlib.cm import ScalarMappable + + +def plot_security_report(table): + + data = pd.DataFrame(table) + + # Sorting by failureRate for a meaningful arrangement + data_sorted = data.sort_values("failureRate", ascending=False) + + # Values for the plot + angles = np.linspace(0, 2 * np.pi, len(data_sorted), endpoint=False) + failure_rate = data_sorted["failureRate"] + tokens = data_sorted["tokens"] + + # Styling parameters + COLORS = ["#6C5B7B", "#C06C84", "#F67280", "#F8B195"] + cmap = mpl.colors.LinearSegmentedColormap.from_list("custom", COLORS, N=256) + norm = mpl.colors.Normalize(vmin=tokens.min(), vmax=tokens.max()) + + # Polar plot setup + fig, ax = plt.subplots(figsize=(10, 8), subplot_kw={"projection": "polar"}) + ax.set_theta_offset(np.pi / 2) + ax.set_theta_direction(-1) + ax.set_facecolor("white") + # Bars for failureRate with colors based on 'tokens' + bars = ax.bar( + angles, + failure_rate, + width=0.3, + color=[cmap(norm(t)) for t in tokens], + alpha=0.75, + label="Failure Rate %", + ) + + # Add labels for the modules + module_labels = ["\n".join(wrap(m, 10)) for m in data_sorted["module"]] + ax.set_xticks(angles) + + # Add dashed vertical lines. These are just references + + ax.set_xticklabels(module_labels, fontsize=7, color="#333") + + # Color bar for the tokens + sm = ScalarMappable(cmap=cmap, norm=norm) + sm.set_array([]) + cbar = plt.colorbar(sm, ax=ax, orientation="horizontal", pad=0.1) + cbar.set_label("Token Count (k)", fontsize=12, color="#444") + + # Grid and legend + ax.grid(True, color="gray", linestyle=":", linewidth=0.5) + plt.legend(loc="upper right", bbox_to_anchor=(1.1, 1.1)) + ax.vlines(angles, 0, 100, color="#444", ls=(0, (4, 4)), zorder=11) + + # Title and subtitle + title = "Security Report for Different Modules" + # fig.suptitle(title, fontsize=18, weight="bold", ha="center", va="top") + + caption = "Report generated by https://github.com/msoedov/agentic_security" + + fig.text(0.5, 0.025, caption, fontsize=10, ha="center", va="baseline") + + buf = BytesIO() + plt.savefig(buf, format="jpeg") + plt.close(fig) + buf.seek(0) + return buf diff --git a/langalf/static/index.html b/langalf/static/index.html index 26dfb92..ddb9aa8 100644 --- a/langalf/static/index.html +++ b/langalf/static/index.html @@ -286,6 +286,7 @@ v-bind:style="{width: progressWidth}"> + Generated Plot

@@ -419,6 +420,7 @@ Content-Type: application/json errorMsg: '', maskMode: false, okMsg: '', + reportImageUrl: '', selectedConfig: 0, configs: [ { name: 'Custom API', prompts: 40000, customInstructions: 'Requires api spec' }, @@ -549,10 +551,31 @@ Content-Type: application/json last.last = false; this.mainTable.push(event); event.last = true; + // this.newRow() } this.okMsg = `New event: ${event.module}: ${event.progress}%`; }, + newRow: async function() { + console.log('New row'); + let payload = { + table: this.mainTable, + }; + const response = await fetch(`${URL}/plot.jpeg`, { + method: 'POST', + headers: { + 'Content-Type': 'application/json', + }, + body: JSON.stringify(payload), + }); + // Convert image response to a data URL for the src + const blob = await response.blob(); + const reader = new FileReader(); + reader.readAsDataURL(blob); + reader.onloadend = () => { + this.reportImageUrl = reader.result; + }; + }, startScan: async function() { let payload = { maxBudget: this.budget,