Files
agentic_security/agentic_security/probe_data/audio_generator.py
T
zhanz5 02b68b06ee fix: make cost calculation model-aware instead of hardcoded to deepseek-chat
Previously, calculate_cost() was always called without a model parameter,
causing all scans to report costs based on deepseek-chat pricing regardless
of the actual target model (e.g. gpt-4, claude-3-opus).

Changes:

- http_spec.py: Add 'model_name' property to LLMSpec that extracts the
  model field from the JSON request body. Returns 'unknown' if the body
  is not valid JSON or has no 'model' field.

- probe_data/image_generator.py: Add 'model_name' pass-through property
  to RequestAdapter, delegating to the underlying LLMSpec.

- probe_data/audio_generator.py: Same as above - add 'model_name'
  pass-through property to RequestAdapter.

- probe_actor/cost_module.py:
  - Change return type from float to float | None.
  - Unknown models now log a warning and return None instead of raising
    ValueError, so scans are not interrupted by unsupported model names.
  - Add logger import for the warning message.

- probe_actor/fuzzer.py: Pass model_name to calculate_cost() in both
  scan_module() and perform_many_shot_scan() using
  getattr(request_factory, 'model_name', 'unknown').

- primitives/models.py: Update ScanResult.cost type from float to
  float | None to accommodate unknown model pricing.
2026-06-05 13:59:59 +08:00

146 lines
4.3 KiB
Python

import base64
import logging
import os
import platform
import subprocess
import uuid
import httpx
from cache_to_disk import cache_to_disk
# Configure logging
logging.basicConfig(level=logging.INFO)
logger = logging.getLogger(__name__)
class AudioGenerationError(Exception):
"""Custom exception for errors during audio generation."""
def encode(content: bytes) -> str:
encoded_content = base64.b64encode(content).decode("utf-8")
return "data:audio/mpeg;base64," + encoded_content
def generate_audio_mac_wav(prompt: str) -> bytes:
"""
Generate an audio file from the provided prompt using macOS 'say' command
and return it as bytes in WAV format.
Parameters:
prompt (str): Text to convert into audio.
Returns:
bytes: The audio data in WAV format.
"""
# Generate unique temporary file paths
temp_aiff_path = f"temp_audio_{uuid.uuid4().hex}.aiff"
temp_wav_path = f"temp_audio_{uuid.uuid4().hex}.wav"
try:
# Use the 'say' command to generate AIFF audio
subprocess.run(["say", "-o", temp_aiff_path, prompt], check=True)
# Convert AIFF to WAV using afconvert
subprocess.run(
["afconvert", "-f", "WAVE", "-d", "LEI16", temp_aiff_path, temp_wav_path],
check=True,
)
# Read the WAV file into memory
with open(temp_wav_path, "rb") as f:
audio_bytes = f.read()
except subprocess.CalledProcessError as e:
logger.error(f"Subprocess error: {e}")
raise AudioGenerationError("Failed to generate or convert audio.") from e
except FileNotFoundError as e:
logger.error(f"File not found: {e}")
raise AudioGenerationError("Required file not found.") from e
except Exception as e:
logger.exception("Unexpected error occurred.")
raise AudioGenerationError(
"An unexpected error occurred during audio generation."
) from e
finally:
for path in (temp_aiff_path, temp_wav_path):
try:
if os.path.exists(path):
os.remove(path)
except Exception as e:
logger.warning(f"Failed to delete temporary file {path}: {e}")
# Return the audio bytes
return audio_bytes
def generate_audio_cross_platform(prompt: str) -> bytes:
"""
Generate an audio file from the provided prompt using gTTS for cross-platform support.
Parameters:
prompt (str): Text to convert into audio.
Returns:
bytes: The audio data in MP3 format.
"""
from gtts import gTTS # Import gTTS for cross-platform support
tts = gTTS(text=prompt, lang="en")
temp_mp3_path = f"temp_audio_{uuid.uuid4().hex}.mp3"
tts.save(temp_mp3_path)
try:
with open(temp_mp3_path, "rb") as f:
audio_bytes = f.read()
finally:
if os.path.exists(temp_mp3_path):
os.remove(temp_mp3_path)
return audio_bytes
@cache_to_disk()
def generate_audioform(prompt: str) -> bytes:
"""
Generate an audio file from the provided prompt in WAV format.
Uses macOS 'say' command if the operating system is macOS, otherwise uses gTTS.
Parameters:
prompt (str): Text to convert into audio.
Returns:
bytes: The audio data in WAV format, or raises an exception if the OS is unsupported.
"""
current_os = platform.system()
if current_os == "Darwin": # macOS
return generate_audio_mac_wav(prompt)
elif current_os in ["Windows", "Linux"]:
return generate_audio_cross_platform(prompt)
else:
raise NotImplementedError(
"Audio generation is only supported on macOS, Windows, and Linux for now."
)
class RequestAdapter:
# Adapter of http_spec.LLMSpec
def __init__(self, llm_spec):
self.llm_spec = llm_spec
if not llm_spec.has_audio:
raise ValueError("LLMSpec must have an image")
@property
def model_name(self) -> str:
return self.llm_spec.model_name
async def probe(
self, prompt: str, encoded_image: str = "", encoded_audio: str = "", files={}
) -> httpx.Response:
encoded_audio = generate_audioform(prompt)
encoded_audio = encode(encoded_audio)
return await self.llm_spec.probe(prompt, encoded_image, encoded_audio, files)
fn = probe