Add files via upload

This commit is contained in:
公明
2026-06-18 12:44:42 +08:00
committed by GitHub
parent 01b361e4a7
commit 7eadccbff6
85 changed files with 33500 additions and 0 deletions
+132
View File
@@ -0,0 +1,132 @@
package vision
import (
"context"
"encoding/base64"
"fmt"
"net"
"net/http"
"strings"
"time"
"cyberstrike-ai/internal/config"
"cyberstrike-ai/internal/openai"
einoopenai "github.com/cloudwego/eino-ext/components/model/openai"
"github.com/cloudwego/eino/schema"
)
// Client 调用独立 Vision ChatModel(单次 Generate)。
type Client struct {
cfg config.VisionConfig
mainOA config.OpenAIConfig
}
// NewClient 构造视觉客户端。
func NewClient(visionCfg config.VisionConfig, mainOpenAI config.OpenAIConfig) *Client {
return &Client{cfg: visionCfg, mainOA: mainOpenAI}
}
// Analyze 将图片字节送入 VL 模型并返回文本描述。
func (c *Client) Analyze(ctx context.Context, img ImagePayload, question string) (string, error) {
if len(img.Bytes) == 0 {
return "", fmt.Errorf("empty image payload")
}
mime := strings.TrimSpace(img.MIMEType)
if mime == "" {
mime = "image/jpeg"
}
oa := c.cfg.OpenAICfgEffective(c.mainOA)
if strings.TrimSpace(oa.APIKey) == "" {
return "", fmt.Errorf("vision API key is empty (set vision.api_key or openai.api_key)")
}
if strings.TrimSpace(oa.Model) == "" {
return "", fmt.Errorf("vision model is empty")
}
timeout := time.Duration(c.cfg.TimeoutSecondsEffective()) * time.Second
ctx, cancel := context.WithTimeout(ctx, timeout)
defer cancel()
httpClient := &http.Client{
Timeout: timeout + 15*time.Second,
Transport: &http.Transport{
DialContext: (&net.Dialer{
Timeout: 60 * time.Second,
KeepAlive: 60 * time.Second,
}).DialContext,
ResponseHeaderTimeout: timeout + 10*time.Second,
},
}
httpClient = openai.NewEinoHTTPClient(&oa, httpClient)
modelCfg := &einoopenai.ChatModelConfig{
APIKey: oa.APIKey,
BaseURL: strings.TrimSuffix(oa.BaseURL, "/"),
Model: oa.Model,
HTTPClient: httpClient,
}
chatModel, err := einoopenai.NewChatModel(ctx, modelCfg)
if err != nil {
return "", fmt.Errorf("vision chat model: %w", err)
}
b64 := base64.StdEncoding.EncodeToString(img.Bytes)
detail := schema.ImageURLDetailLow
switch c.cfg.DetailEffective() {
case "high":
detail = schema.ImageURLDetailHigh
case "auto":
detail = schema.ImageURLDetailAuto
}
prompt := buildVisionPrompt(question)
userMsg := &schema.Message{
Role: schema.User,
UserInputMultiContent: []schema.MessageInputPart{
{Type: schema.ChatMessagePartTypeText, Text: prompt},
{
Type: schema.ChatMessagePartTypeImageURL,
Image: &schema.MessageInputImage{
MessagePartCommon: schema.MessagePartCommon{
Base64Data: &b64,
MIMEType: mime,
},
Detail: detail,
},
},
},
}
resp, err := chatModel.Generate(ctx, []*schema.Message{userMsg})
if err != nil {
return "", fmt.Errorf("vision generate: %w", err)
}
if resp == nil || strings.TrimSpace(resp.Content) == "" {
return "", fmt.Errorf("vision model returned empty content")
}
return strings.TrimSpace(resp.Content), nil
}
func buildVisionPrompt(question string) string {
q := strings.TrimSpace(question)
if q == "" {
q = "请对图片做通用描述,侧重授权安全测试场景(可见文本、表单、按钮、验证码、错误信息、技术栈线索)。"
}
extra := ""
if looksLikeCaptchaQuestion(q) {
extra = "\n若为验证码:仅输出你辨认出的字符序列,不要空格、标点、解释;看不清则明确说无法识别。"
}
return `你是授权安全测试助手。请根据图片回答用户问题,只描述你能从图中确认的内容,不要编造。
用户问题:` + q + extra
}
func looksLikeCaptchaQuestion(q string) bool {
s := strings.ToLower(q)
for _, kw := range []string{"验证码", "captcha", "verification code", "verify code", "vcode", "图形码"} {
if strings.Contains(s, kw) {
return true
}
}
return strings.Contains(s, "只输出") && (strings.Contains(s, "字符") || strings.Contains(s, "character"))
}
+12
View File
@@ -0,0 +1,12 @@
package vision
import "testing"
func TestLooksLikeCaptchaQuestion(t *testing.T) {
if !looksLikeCaptchaQuestion("识别验证码,只输出字符") {
t.Fatal("expected captcha hint")
}
if looksLikeCaptchaQuestion("描述登录页布局") {
t.Fatal("expected non-captcha")
}
}
+72
View File
@@ -0,0 +1,72 @@
package vision
import (
"fmt"
"os"
"path/filepath"
"strings"
)
var allowedImageExt = map[string]struct{}{
".png": {}, ".jpg": {}, ".jpeg": {}, ".webp": {}, ".gif": {},
".bmp": {}, ".tif": {}, ".tiff": {},
}
// ResolveImagePath 解析并校验可读图片路径(支持任意目录;仍校验扩展名与常规文件)。
func ResolveImagePath(path string, cwd string) (string, error) {
p := strings.TrimSpace(path)
if p == "" {
return "", fmt.Errorf("path is empty")
}
cwdTrim := strings.TrimSpace(cwd)
if cwdTrim == "" {
var err error
cwdTrim, err = os.Getwd()
if err != nil {
return "", fmt.Errorf("getwd: %w", err)
}
}
cwdAbs, err := filepath.Abs(filepath.Clean(cwdTrim))
if err != nil {
return "", err
}
var candidate string
if filepath.IsAbs(p) {
candidate = filepath.Clean(p)
} else {
candidate = filepath.Clean(filepath.Join(cwdAbs, p))
}
resolved := normalizeAbsPath(candidate)
if resolved == "" {
return "", fmt.Errorf("invalid path")
}
ext := strings.ToLower(filepath.Ext(resolved))
if _, ok := allowedImageExt[ext]; !ok {
return "", fmt.Errorf("unsupported image extension %q", ext)
}
st, err := os.Stat(resolved)
if err != nil {
return "", fmt.Errorf("stat: %w", err)
}
if st.IsDir() {
return "", fmt.Errorf("not a regular file")
}
if st.Size() > 0 && st.Size() > 1<<30 {
return "", fmt.Errorf("file too large on disk")
}
return resolved, nil
}
func normalizeAbsPath(p string) string {
abs, err := filepath.Abs(filepath.Clean(p))
if err != nil {
return ""
}
if link, err := filepath.EvalSymlinks(abs); err == nil {
return link
}
return abs
}
+52
View File
@@ -0,0 +1,52 @@
package vision
import (
"os"
"path/filepath"
"testing"
)
func TestResolveImagePath_underCWD(t *testing.T) {
dir := t.TempDir()
img := filepath.Join(dir, "shot.png")
if err := os.WriteFile(img, []byte{0x89, 0x50, 0x4e, 0x47}, 0o644); err != nil {
t.Fatal(err)
}
got, err := ResolveImagePath(img, dir)
if err != nil {
t.Fatal(err)
}
want := normalizeAbsPath(img)
if got != want {
t.Fatalf("got %q want %q", got, want)
}
}
func TestResolveImagePath_absoluteOutsideCWD(t *testing.T) {
dir := t.TempDir()
cwd := t.TempDir()
img := filepath.Join(dir, "remote.png")
if err := os.WriteFile(img, []byte{0x89, 0x50, 0x4e, 0x47}, 0o644); err != nil {
t.Fatal(err)
}
got, err := ResolveImagePath(img, cwd)
if err != nil {
t.Fatalf("expected absolute path outside cwd to be allowed: %v", err)
}
want := normalizeAbsPath(img)
if got != want {
t.Fatalf("got %q want %q", got, want)
}
}
func TestResolveImagePath_rejectsNonImageExt(t *testing.T) {
dir := t.TempDir()
f := filepath.Join(dir, "notes.txt")
if err := os.WriteFile(f, []byte("x"), 0o644); err != nil {
t.Fatal(err)
}
_, err := ResolveImagePath(f, dir)
if err == nil {
t.Fatal("expected error for non-image extension")
}
}
+212
View File
@@ -0,0 +1,212 @@
package vision
import (
"bytes"
"fmt"
"image"
"os"
"strings"
"github.com/disintegration/imaging"
)
// ImagePayload 送入 VL API 的图片字节与 MIME。
type ImagePayload struct {
Bytes []byte
MIMEType string
}
// PreprocessMeta 记录缩放与编码结果,供工具输出与排障。
type PreprocessMeta struct {
OriginalPath string
OriginalBytes int64
OriginalWidth int
OriginalHeight int
OutputWidth int
OutputHeight int
OutputBytes int
OutputMIMEType string
JPEGQuality int // 0 表示未 JPEG 重编码(原图直传)
PreprocessMode string // passthrough | jpeg
}
// PreprocessOptions 图片预处理参数。
type PreprocessOptions struct {
MaxImageBytes int64
MaxDimension int
JPEGQuality int
MaxPayloadBytes int64
SkipPreprocessBelowBytes int64 // 0 = 始终压缩;>0 时小图+尺寸合规可直传
}
// PreprocessImageFile 读取图片;大图或超尺寸走 imaging 缩放+JPEG,否则可原图直传。
func PreprocessImageFile(path string, opt PreprocessOptions) (ImagePayload, PreprocessMeta, error) {
var meta PreprocessMeta
meta.OriginalPath = path
st, err := os.Stat(path)
if err != nil {
return ImagePayload{}, meta, err
}
meta.OriginalBytes = st.Size()
if opt.MaxImageBytes > 0 && st.Size() > opt.MaxImageBytes {
return ImagePayload{}, meta, fmt.Errorf("file size %d exceeds max_image_bytes %d", st.Size(), opt.MaxImageBytes)
}
cfgW, cfgH, format, err := imageDimensions(path)
if err != nil {
return ImagePayload{}, meta, err
}
meta.OriginalWidth = cfgW
meta.OriginalHeight = cfgH
maxDim := opt.MaxDimension
if maxDim <= 0 {
maxDim = 2048
}
maxPayload := opt.MaxPayloadBytes
if maxPayload <= 0 {
maxPayload = 512 * 1024
}
if payload, meta, ok, err := tryPassthrough(path, st.Size(), cfgW, cfgH, format, opt, maxDim, maxPayload); ok {
return payload, meta, err
}
return compressWithImaging(path, opt, maxDim, maxPayload, meta)
}
func tryPassthrough(path string, size int64, w, h int, format string, opt PreprocessOptions, maxDim int, maxPayload int64) (ImagePayload, PreprocessMeta, bool, error) {
var meta PreprocessMeta
meta.OriginalPath = path
meta.OriginalBytes = size
meta.OriginalWidth = w
meta.OriginalHeight = h
threshold := opt.SkipPreprocessBelowBytes
if threshold <= 0 {
return ImagePayload{}, meta, false, nil
}
if size > threshold {
return ImagePayload{}, meta, false, nil
}
longEdge := w
if h > longEdge {
longEdge = h
}
if longEdge > maxDim {
return ImagePayload{}, meta, false, nil
}
if size > maxPayload {
return ImagePayload{}, meta, false, nil
}
raw, err := os.ReadFile(path)
if err != nil {
return ImagePayload{}, meta, false, err
}
mime := mimeFromImageFormat(format)
if mime == "" {
return ImagePayload{}, meta, false, nil
}
meta.OutputWidth = w
meta.OutputHeight = h
meta.OutputBytes = len(raw)
meta.OutputMIMEType = mime
meta.PreprocessMode = "passthrough"
return ImagePayload{Bytes: raw, MIMEType: mime}, meta, true, nil
}
func compressWithImaging(path string, opt PreprocessOptions, maxDim int, maxPayload int64, meta PreprocessMeta) (ImagePayload, PreprocessMeta, error) {
src, err := imaging.Open(path)
if err != nil {
return ImagePayload{}, meta, fmt.Errorf("open image: %w", err)
}
bounds := src.Bounds()
meta.OriginalWidth = bounds.Dx()
meta.OriginalHeight = bounds.Dy()
dst := imaging.Fit(src, maxDim, maxDim, imaging.Lanczos)
outBounds := dst.Bounds()
meta.OutputWidth = outBounds.Dx()
meta.OutputHeight = outBounds.Dy()
quality := opt.JPEGQuality
if quality <= 0 || quality > 100 {
quality = 82
}
dim := maxDim
for attempt := 0; attempt < 6; attempt++ {
if attempt > 0 {
dim = int(float64(dim) * 0.85)
if dim < 256 {
dim = 256
}
dst = imaging.Fit(src, dim, dim, imaging.Lanczos)
outBounds = dst.Bounds()
meta.OutputWidth = outBounds.Dx()
meta.OutputHeight = outBounds.Dy()
}
q := quality
for q >= 60 {
var buf bytes.Buffer
if err := imaging.Encode(&buf, dst, imaging.JPEG, imaging.JPEGQuality(q)); err != nil {
return ImagePayload{}, meta, fmt.Errorf("encode jpeg: %w", err)
}
if int64(buf.Len()) <= maxPayload {
meta.JPEGQuality = q
meta.OutputBytes = buf.Len()
meta.OutputMIMEType = "image/jpeg"
meta.PreprocessMode = "jpeg"
return ImagePayload{Bytes: buf.Bytes(), MIMEType: "image/jpeg"}, meta, nil
}
q -= 5
}
quality = 75
}
return ImagePayload{}, meta, fmt.Errorf("could not compress image under max_payload_bytes %d", maxPayload)
}
func imageDimensions(path string) (w, h int, format string, err error) {
f, err := os.Open(path)
if err != nil {
return 0, 0, "", err
}
defer f.Close()
cfg, format, err := image.DecodeConfig(f)
if err != nil {
return 0, 0, "", fmt.Errorf("decode image config: %w", err)
}
return cfg.Width, cfg.Height, format, nil
}
func mimeFromImageFormat(format string) string {
switch strings.ToLower(strings.TrimSpace(format)) {
case "jpeg", "jpg":
return "image/jpeg"
case "png":
return "image/png"
case "gif":
return "image/gif"
case "webp":
return "image/webp"
case "bmp":
return "image/bmp"
case "tiff":
return "image/tiff"
default:
return ""
}
}
// DecodeImageConfig 用于测试:确认文件可被解码。
func DecodeImageConfig(path string) (image.Config, string, error) {
f, err := os.Open(path)
if err != nil {
return image.Config{}, "", err
}
defer f.Close()
return image.DecodeConfig(f)
}
+109
View File
@@ -0,0 +1,109 @@
package vision
import (
"image"
"image/color"
"image/png"
"os"
"path/filepath"
"testing"
"github.com/disintegration/imaging"
)
func TestPreprocessImageFile_scalesAndLimitsPayload(t *testing.T) {
dir := t.TempDir()
path := filepath.Join(dir, "big.png")
img := imaging.New(3000, 2000, color.White)
if err := imaging.Save(img, path); err != nil {
t.Fatal(err)
}
out, meta, err := PreprocessImageFile(path, PreprocessOptions{
MaxImageBytes: 10 * 1024 * 1024,
MaxDimension: 1024,
JPEGQuality: 85,
MaxPayloadBytes: 600 * 1024,
SkipPreprocessBelowBytes: 0,
})
if err != nil {
t.Fatal(err)
}
if len(out.Bytes) == 0 {
t.Fatal("empty output")
}
if meta.PreprocessMode != "jpeg" {
t.Fatalf("mode: %s", meta.PreprocessMode)
}
if meta.OutputWidth > 1024 || meta.OutputHeight > 1024 {
t.Fatalf("expected fit within 1024, got %dx%d", meta.OutputWidth, meta.OutputHeight)
}
if int64(len(out.Bytes)) > 600*1024 {
t.Fatalf("payload %d exceeds max", len(out.Bytes))
}
}
func TestPreprocessImageFile_passthroughSmallPNG(t *testing.T) {
dir := t.TempDir()
path := filepath.Join(dir, "small.png")
if err := imaging.Save(imaging.New(400, 300, color.White), path); err != nil {
t.Fatal(err)
}
out, meta, err := PreprocessImageFile(path, PreprocessOptions{
MaxImageBytes: 5 * 1024 * 1024,
MaxDimension: 2048,
MaxPayloadBytes: 512 * 1024,
SkipPreprocessBelowBytes: 2 * 1024 * 1024,
})
if err != nil {
t.Fatal(err)
}
if meta.PreprocessMode != "passthrough" {
t.Fatalf("expected passthrough, got %s", meta.PreprocessMode)
}
if out.MIMEType != "image/png" {
t.Fatalf("mime: %s", out.MIMEType)
}
if meta.OutputWidth != 400 || meta.OutputHeight != 300 {
t.Fatalf("dims: %dx%d", meta.OutputWidth, meta.OutputHeight)
}
}
func TestPreprocessImageFile_passthroughDisabled(t *testing.T) {
dir := t.TempDir()
path := filepath.Join(dir, "small.png")
if err := imaging.Save(imaging.New(100, 100, color.White), path); err != nil {
t.Fatal(err)
}
_, meta, err := PreprocessImageFile(path, PreprocessOptions{
MaxDimension: 2048,
MaxPayloadBytes: 512 * 1024,
SkipPreprocessBelowBytes: 0,
})
if err != nil {
t.Fatal(err)
}
if meta.PreprocessMode != "jpeg" {
t.Fatalf("expected jpeg compress, got %s", meta.PreprocessMode)
}
}
func TestPreprocessImageFile_rejectsOversizeFile(t *testing.T) {
dir := t.TempDir()
path := filepath.Join(dir, "tiny.png")
f, err := os.Create(path)
if err != nil {
t.Fatal(err)
}
if err := png.Encode(f, image.NewRGBA(image.Rect(0, 0, 2, 2))); err != nil {
t.Fatal(err)
}
f.Close()
_, _, err = PreprocessImageFile(path, PreprocessOptions{MaxImageBytes: 1})
if err == nil {
t.Fatal("expected error when file exceeds max_image_bytes")
}
}
+125
View File
@@ -0,0 +1,125 @@
package vision
import (
"context"
"fmt"
"os"
"strings"
"cyberstrike-ai/internal/config"
"cyberstrike-ai/internal/mcp"
"cyberstrike-ai/internal/mcp/builtin"
"go.uber.org/zap"
)
// RegisterAnalyzeImageTool 在 vision.enabled 且 model 已配置时注册 MCP 工具 analyze_image。
func RegisterAnalyzeImageTool(mcpServer *mcp.Server, cfg *config.Config, logger *zap.Logger) {
if mcpServer == nil || cfg == nil {
return
}
if !cfg.Vision.Ready() {
if cfg.Vision.Enabled && logger != nil {
logger.Warn("vision.enabled 但 vision.model 为空,跳过注册 analyze_image")
}
return
}
cwd, err := os.Getwd()
if err != nil {
if logger != nil {
logger.Warn("vision: getwd failed, skip analyze_image", zap.Error(err))
}
return
}
preOpt := PreprocessOptions{
MaxImageBytes: cfg.Vision.MaxImageBytesEffective(),
MaxDimension: cfg.Vision.MaxDimensionEffective(),
JPEGQuality: cfg.Vision.JPEGQualityEffective(),
MaxPayloadBytes: cfg.Vision.MaxPayloadBytesEffective(),
SkipPreprocessBelowBytes: cfg.Vision.SkipPreprocessBelowBytesEffective(),
}
client := NewClient(cfg.Vision, cfg.OpenAI)
tool := mcp.Tool{
Name: builtin.ToolAnalyzeImage,
Description: "分析服务器上的本地图片并返回文字描述(验证码、UI 元素、报错、架构图要点等)。" +
"输入为文件路径(如用户上传的 chat_uploads 路径或工具截图路径)。" +
"输出仅为文本,不含图片数据。不要对二进制图片使用 read_file 指望理解内容。",
ShortDescription: "分析本地图片并返回文字描述(验证码/UI/报错等)",
InputSchema: map[string]interface{}{
"type": "object",
"properties": map[string]interface{}{
"path": map[string]interface{}{
"type": "string",
"description": "图片绝对路径或相对于进程工作目录的路径",
},
"question": map[string]interface{}{
"type": "string",
"description": "可选:希望模型重点回答的问题。验证码图建议:只输出验证码字符,不要空格和解释",
},
},
"required": []string{"path"},
},
}
handler := func(ctx context.Context, args map[string]interface{}) (*mcp.ToolResult, error) {
path, _ := args["path"].(string)
question, _ := args["question"].(string)
abs, err := ResolveImagePath(path, cwd)
if err != nil {
return textResult(fmt.Sprintf("路径校验失败: %v", err), true), nil
}
img, meta, err := PreprocessImageFile(abs, preOpt)
if err != nil {
return textResult(fmt.Sprintf("图片预处理失败: %v", err), true), nil
}
summary, err := client.Analyze(ctx, img, question)
if err != nil {
return textResult(fmt.Sprintf("视觉模型调用失败: %v", err), true), nil
}
body := formatAnalysisResult(abs, meta, summary)
return textResult(body, false), nil
}
mcpServer.RegisterTool(tool, handler)
if logger != nil {
logger.Info("vision: analyze_image 工具已注册", zap.String("model", cfg.Vision.Model))
}
}
func textResult(text string, isError bool) *mcp.ToolResult {
return &mcp.ToolResult{
Content: []mcp.Content{{Type: "text", Text: text}},
IsError: isError,
}
}
func formatAnalysisResult(path string, meta PreprocessMeta, summary string) string {
var b strings.Builder
b.WriteString("## Image analysis\n")
b.WriteString("- **path**: ")
b.WriteString(path)
b.WriteString("\n")
switch meta.PreprocessMode {
case "passthrough":
b.WriteString(fmt.Sprintf("- **preprocess**: passthrough %dx%d, %s, %dKB (original %dKB)\n\n",
meta.OutputWidth, meta.OutputHeight, meta.OutputMIMEType,
(meta.OutputBytes+1023)/1024, (meta.OriginalBytes+1023)/1024))
default:
b.WriteString(fmt.Sprintf("- **preprocess**: %dx%d → %dx%d, jpeg q=%d, %dKB (original %dKB)\n\n",
meta.OriginalWidth, meta.OriginalHeight,
meta.OutputWidth, meta.OutputHeight,
meta.JPEGQuality, (meta.OutputBytes+1023)/1024,
(meta.OriginalBytes+1023)/1024))
}
b.WriteString("### Summary\n")
b.WriteString(strings.TrimSpace(summary))
b.WriteString("\n")
return b.String()
}