feat(Report chart):

This commit is contained in:
Alexander Myasoedov
2024-04-26 13:51:17 +03:00
parent 5855139d9c
commit 8182d5efec
5 changed files with 124 additions and 22 deletions
+14 -19
View File
@@ -1,6 +1,5 @@
<p align="center">
<h1 align="center">Agentic Security</h1>
<p align="center">
@@ -35,17 +34,14 @@
## Features
- Comprehensive Threat Detection 🛡️: Scans for a wide array of LLM vulnerabilities including prompt injection, jailbreaking, hallucinations, biases, and other malicious exploitation attempts.
- OWASP Top 10 for LLMs scan: to test the list of the most critical LLM vulnerabilities.
- Privacy-centric Architecture 🔒: Ensures that all data scanning and analysis occur on-premise or in a local environment, with no external data transmission, maintaining strict data privacy.
- Comprehensive Reporting Tools 📊: Offers detailed reports of vulnerability, helping teams to quickly understand and respond to security incidents.
- Customizable Rule Sets 🛠️: Allows users to define custom attack rules and parameters to meet specific prompt attacks needs and compliance standards.
- Comprehensive Threat Detection 🛡️: Scans for a wide array of LLM vulnerabilities including prompt injection, jailbreaking, hallucinations, biases, and other malicious exploitation attempts.
- OWASP Top 10 for LLMs scan: to test the list of the most critical LLM vulnerabilities.
- Privacy-centric Architecture 🔒: Ensures that all data scanning and analysis occur on-premise or in a local environment, with no external data transmission, maintaining strict data privacy.
- Comprehensive Reporting Tools 📊: Offers detailed reports of vulnerability, helping teams to quickly understand and respond to security incidents.
- Customizable Rule Sets 🛠️: Allows users to define custom attack rules and parameters to meet specific prompt attacks needs and compliance standards.
Note: Please be aware that Langalf is designed as a safety scanner tool and not a foolproof solution. It cannot guarantee complete protection against all possible threats.
## 📦 Installation
To get started with Langalf, simply install the package using pip:
@@ -77,7 +73,6 @@ langalf --port=PORT --host=HOST
```
## LLM kwargs
Langalf uses plain text HTTP spec like:
@@ -97,13 +92,14 @@ Content-Type: application/json
Where `<<PROMPT>>` will be replaced with the actual attack vector during the scan, insert the `Bearer XXXXX` header value with your app credentials.
### Adding LLM integration templates
TBD
```
....
```
## Adding own dataset
To add your own dataset you can place one or multiples csv files with `prompt` column, this data will be loaded on `langalf` startup
@@ -116,6 +112,7 @@ To add your own dataset you can place one or multiples csv files with `prompt` c
## Extending dataset collections
1. Add new metadata to langalf.probe_data.REGISTRY
```python
{
"dataset_name": "markush1/LLM-Jailbreak-Classifier",
@@ -131,7 +128,6 @@ To add your own dataset you can place one or multiples csv files with `prompt` c
and implement loader into
```python
@dataclass
class ProbeDataset:
@@ -192,11 +188,11 @@ class Stenography:
approx_cost=0.0,
)
```
## Probe endpoint
In the example of custom integration, we use `/v1/self-probe` for the sake of integration testing.
```python
POST https://langalf-preview.vercel.app/v1/self-probe
Authorization: Bearer XXXXX
@@ -207,6 +203,7 @@ Content-Type: application/json
}
```
This endpoint randomly mimics the refusal of a fake LLM.
```python
@@ -243,15 +240,13 @@ For more detailed information on how to use Langalf, including advanced features
## Roadmap and Future Goals
- [ ] Expand dataset variety
- [ ] Introduce two new attack vectors
- [ ] Develop initial attacker LLM
- [ ] Complete integration of OWASP Top 10 classification
- \[ \] Expand dataset variety
- \[ \] Introduce two new attack vectors
- \[ \] Develop initial attacker LLM
- \[ \] Complete integration of OWASP Top 10 classification
Note: All dates are tentative and subject to change based on project progress and priorities.
## 👋 Contributing
Contributions to Langalf are welcome! If you'd like to contribute, please follow these steps:
+12 -1
View File
@@ -3,7 +3,7 @@ import sys
from datetime import datetime
from pathlib import Path
from fastapi import BackgroundTasks, FastAPI, HTTPException
from fastapi import BackgroundTasks, FastAPI, HTTPException, Response
from fastapi.middleware.cors import CORSMiddleware
from fastapi.responses import FileResponse, StreamingResponse
from loguru import logger
@@ -13,6 +13,7 @@ from .http_spec import LLMSpec
from .probe_actor import fuzzer
from .probe_actor.refusal import REFUSAL_MARKS
from .probe_data import REGISTRY
from .report_chart import plot_security_report
logger.remove(0)
logger.add(
@@ -138,3 +139,13 @@ async def failures_csv():
if not Path("failures.csv").exists():
return {"error": "No failures found"}
return FileResponse("failures.csv")
class Table(BaseModel):
table: list[dict]
@app.post("/plot.jpeg", response_class=Response)
async def get_plot(table: Table):
buf = plot_security_report(table.table)
return StreamingResponse(buf, media_type="image/jpeg")
+1 -2
View File
@@ -31,8 +31,7 @@ REFUSAL_MARKS = [
def check_refusal(response: str, refusal_phrases: list = REFUSAL_MARKS) -> bool:
"""
Check if the response contains any phrases indicating refusal to answer.
"""Check if the response contains any phrases indicating refusal to answer.
Args:
- response (str): The response from the language model.
+74
View File
@@ -0,0 +1,74 @@
from io import BytesIO
from textwrap import wrap
import matplotlib as mpl
import matplotlib.pyplot as plt
import numpy as np
import pandas as pd
from matplotlib.cm import ScalarMappable
def plot_security_report(table):
data = pd.DataFrame(table)
# Sorting by failureRate for a meaningful arrangement
data_sorted = data.sort_values("failureRate", ascending=False)
# Values for the plot
angles = np.linspace(0, 2 * np.pi, len(data_sorted), endpoint=False)
failure_rate = data_sorted["failureRate"]
tokens = data_sorted["tokens"]
# Styling parameters
COLORS = ["#6C5B7B", "#C06C84", "#F67280", "#F8B195"]
cmap = mpl.colors.LinearSegmentedColormap.from_list("custom", COLORS, N=256)
norm = mpl.colors.Normalize(vmin=tokens.min(), vmax=tokens.max())
# Polar plot setup
fig, ax = plt.subplots(figsize=(10, 8), subplot_kw={"projection": "polar"})
ax.set_theta_offset(np.pi / 2)
ax.set_theta_direction(-1)
ax.set_facecolor("white")
# Bars for failureRate with colors based on 'tokens'
bars = ax.bar(
angles,
failure_rate,
width=0.3,
color=[cmap(norm(t)) for t in tokens],
alpha=0.75,
label="Failure Rate %",
)
# Add labels for the modules
module_labels = ["\n".join(wrap(m, 10)) for m in data_sorted["module"]]
ax.set_xticks(angles)
# Add dashed vertical lines. These are just references
ax.set_xticklabels(module_labels, fontsize=7, color="#333")
# Color bar for the tokens
sm = ScalarMappable(cmap=cmap, norm=norm)
sm.set_array([])
cbar = plt.colorbar(sm, ax=ax, orientation="horizontal", pad=0.1)
cbar.set_label("Token Count (k)", fontsize=12, color="#444")
# Grid and legend
ax.grid(True, color="gray", linestyle=":", linewidth=0.5)
plt.legend(loc="upper right", bbox_to_anchor=(1.1, 1.1))
ax.vlines(angles, 0, 100, color="#444", ls=(0, (4, 4)), zorder=11)
# Title and subtitle
title = "Security Report for Different Modules"
# fig.suptitle(title, fontsize=18, weight="bold", ha="center", va="top")
caption = "Report generated by https://github.com/msoedov/agentic_security"
fig.text(0.5, 0.025, caption, fontsize=10, ha="center", va="baseline")
buf = BytesIO()
plt.savefig(buf, format="jpeg")
plt.close(fig)
buf.seek(0)
return buf
+23
View File
@@ -286,6 +286,7 @@
v-bind:style="{width: progressWidth}">
</div>
<img :src="imageUrl" alt="Generated Plot">
<div
class="rounded-lg border bg-card text-card-foreground shadow-sm"
data-v0-t="card">
@@ -419,6 +420,7 @@ Content-Type: application/json
errorMsg: '',
maskMode: false,
okMsg: '',
reportImageUrl: '',
selectedConfig: 0,
configs: [
{ name: 'Custom API', prompts: 40000, customInstructions: 'Requires api spec' },
@@ -549,10 +551,31 @@ Content-Type: application/json
last.last = false;
this.mainTable.push(event);
event.last = true;
// this.newRow()
}
this.okMsg = `New event: ${event.module}: ${event.progress}%`;
},
newRow: async function() {
console.log('New row');
let payload = {
table: this.mainTable,
};
const response = await fetch(`${URL}/plot.jpeg`, {
method: 'POST',
headers: {
'Content-Type': 'application/json',
},
body: JSON.stringify(payload),
});
// Convert image response to a data URL for the <img> src
const blob = await response.blob();
const reader = new FileReader();
reader.readAsDataURL(blob);
reader.onloadend = () => {
this.reportImageUrl = reader.result;
};
},
startScan: async function() {
let payload = {
maxBudget: this.budget,