mirror of
https://github.com/lightbroker/llmsecops-research.git
synced 2026-05-24 17:14:08 +02:00
all domain unit tests pass
This commit is contained in:
@@ -2,23 +2,19 @@ from typing import Any
|
||||
from src.text_generation.domain.abstract_guardrails_processed_completion import AbstractGuardrailsProcessedCompletion
|
||||
|
||||
|
||||
class GuardrailsResult(
|
||||
AbstractGuardrailsProcessedCompletion):
|
||||
class GuardrailsResult:
|
||||
"""
|
||||
Represents the result of guardrails processing without cosine similarity scoring.
|
||||
"""
|
||||
|
||||
def __init__(
|
||||
self,
|
||||
user_prompt: str,
|
||||
completion_text: str,
|
||||
full_prompt: dict[str, Any],
|
||||
llm_config: dict,
|
||||
cosine_similarity_score: float = 0.0,
|
||||
cosine_similarity_risk_threshold: float = 0.0):
|
||||
|
||||
self,
|
||||
user_prompt: str,
|
||||
completion_text: str,
|
||||
full_prompt: dict[str, Any],
|
||||
llm_config: dict
|
||||
):
|
||||
self.user_prompt = user_prompt
|
||||
self.completion_text = completion_text
|
||||
self.full_prompt = full_prompt
|
||||
self.llm_config = llm_config
|
||||
self.cosine_similarity_score = cosine_similarity_score
|
||||
self.cosine_similarity_risk_threshold = cosine_similarity_risk_threshold
|
||||
|
||||
def is_original_completion_malicious(self) -> bool:
|
||||
return self.cosine_similarity_score >= self.cosine_similarity_risk_threshold
|
||||
self.llm_config = llm_config
|
||||
@@ -2,23 +2,26 @@ from typing import Any, List
|
||||
from src.text_generation.domain.abstract_guidelines_processed_completion import AbstractGuidelinesProcessedCompletion
|
||||
|
||||
|
||||
class GuidelinesResult(
|
||||
AbstractGuidelinesProcessedCompletion):
|
||||
class GuidelinesResult(AbstractGuidelinesProcessedCompletion):
|
||||
"""
|
||||
Represents the result of guidelines processing with cosine similarity scoring.
|
||||
"""
|
||||
|
||||
def __init__(
|
||||
self,
|
||||
user_prompt: str,
|
||||
completion_text: str,
|
||||
full_prompt: dict[str, Any],
|
||||
llm_config: dict,
|
||||
cosine_similarity_score: float = 0.0,
|
||||
cosine_similarity_risk_threshold: float = 0.0):
|
||||
|
||||
self,
|
||||
user_prompt: str,
|
||||
completion_text: str,
|
||||
full_prompt: dict[str, Any],
|
||||
llm_config: dict,
|
||||
cosine_similarity_score: float = -1.0,
|
||||
cosine_similarity_risk_threshold: float = 0.0
|
||||
):
|
||||
self.user_prompt = user_prompt
|
||||
self.completion_text = completion_text
|
||||
self.full_prompt = full_prompt
|
||||
self.llm_config = llm_config
|
||||
self.cosine_similarity_score = cosine_similarity_score
|
||||
self.cosine_similarity_risk_threshold = cosine_similarity_risk_threshold
|
||||
|
||||
|
||||
def is_original_completion_malicious(self) -> bool:
|
||||
return self.cosine_similarity_score >= self.cosine_similarity_risk_threshold
|
||||
return self.cosine_similarity_score >= self.cosine_similarity_risk_threshold
|
||||
@@ -0,0 +1,25 @@
|
||||
from typing import Any
|
||||
from src.text_generation.domain.abstract_text_generation_completion_result import AbstractTextGenerationCompletionResult
|
||||
|
||||
|
||||
class OriginalCompletionResult(AbstractTextGenerationCompletionResult):
|
||||
"""
|
||||
Represents the original completion result with its own cosine similarity scoring.
|
||||
"""
|
||||
|
||||
def __init__(
|
||||
self,
|
||||
user_prompt: str,
|
||||
completion_text: str,
|
||||
full_prompt: dict[str, Any],
|
||||
llm_config: dict
|
||||
):
|
||||
self.user_prompt = user_prompt
|
||||
self.completion_text = completion_text
|
||||
self.full_prompt = full_prompt
|
||||
self.llm_config = llm_config
|
||||
self.cosine_similarity_score: float = -1.0
|
||||
self.cosine_similarity_risk_threshold: float = 0.0
|
||||
|
||||
def is_completion_malicious(self) -> bool:
|
||||
return self.cosine_similarity_score >= self.cosine_similarity_risk_threshold
|
||||
@@ -4,31 +4,51 @@ from src.text_generation.domain.abstract_text_generation_completion_result impor
|
||||
from src.text_generation.domain.alternate_completion_result import AlternateCompletionResult
|
||||
from src.text_generation.domain.guardrails_result import GuardrailsResult
|
||||
from src.text_generation.domain.guidelines_result import GuidelinesResult
|
||||
from src.text_generation.domain.original_completion_result import OriginalCompletionResult
|
||||
|
||||
|
||||
class TextGenerationCompletionResult(
|
||||
AbstractTextGenerationCompletionResult):
|
||||
class TextGenerationCompletionResult(AbstractTextGenerationCompletionResult):
|
||||
"""
|
||||
Represents the result of a text generation completion
|
||||
with optional security guidelines processing results.
|
||||
Container class that holds the original completion result and optional
|
||||
guidelines and guardrails processing results.
|
||||
"""
|
||||
|
||||
def __init__(
|
||||
self,
|
||||
llm_config: dict,
|
||||
original_user_prompt: str,
|
||||
original_completion: str,
|
||||
guidelines_result: Optional[GuidelinesResult] = None,
|
||||
guardrails_result: Optional[GuardrailsResult] = None):
|
||||
self,
|
||||
original_result: OriginalCompletionResult,
|
||||
guidelines_result: Optional[GuidelinesResult] = None,
|
||||
guardrails_result: Optional[GuardrailsResult] = None
|
||||
):
|
||||
self.original_result = original_result
|
||||
self.guidelines_result = guidelines_result
|
||||
self.guardrails_result = guardrails_result
|
||||
|
||||
self.llm_config = llm_config
|
||||
self.original_user_prompt = original_user_prompt
|
||||
self.original_completion = original_completion
|
||||
self.guidelines_processed_completion = guidelines_result
|
||||
self.guardrails_processed_completion = guardrails_result
|
||||
self.alternate_result: AlternateCompletionResult = None
|
||||
|
||||
self.final = (
|
||||
(self.alternate_result and self.alternate_result.alterate_completion_text) or
|
||||
original_completion
|
||||
)
|
||||
def finalize(self) -> str:
|
||||
"""
|
||||
Returns the final completion text based on priority order:
|
||||
1. guardrails_result.completion_text (if not empty)
|
||||
2. guidelines_result.completion_text (if not empty)
|
||||
3. original_result.completion_text (if not empty)
|
||||
"""
|
||||
|
||||
# Check guardrails_result.completion_text first
|
||||
if (self.guardrails_result and
|
||||
self.guardrails_result.completion_text and
|
||||
self.guardrails_result.completion_text.strip()):
|
||||
return self.guardrails_result.completion_text
|
||||
|
||||
# Fall back to guidelines_result.completion_text
|
||||
if (self.guidelines_result and
|
||||
self.guidelines_result.completion_text and
|
||||
self.guidelines_result.completion_text.strip()):
|
||||
return self.guidelines_result.completion_text
|
||||
|
||||
# Fall back to original_result.completion_text
|
||||
if (self.original_result and
|
||||
self.original_result.completion_text and
|
||||
self.original_result.completion_text.strip()):
|
||||
return self.original_result.completion_text
|
||||
|
||||
# If all are empty, return empty string
|
||||
return ""
|
||||
|
||||
|
||||
@@ -36,7 +36,7 @@ class TestRunLoggingService(AbstractTestRunLoggingService):
|
||||
def log_results(
|
||||
self,
|
||||
id: str,
|
||||
prompt: str,
|
||||
text_generation_result: str,
|
||||
completion: str,
|
||||
is_rag_few_shot_enabled: bool,
|
||||
is_cot_enabled: bool,
|
||||
|
||||
+11
-2
@@ -176,8 +176,17 @@ def prompt_injection_example_service(prompt_injection_example_repository):
|
||||
return PromptInjectionExampleService(repository=prompt_injection_example_repository)
|
||||
|
||||
@pytest.fixture(scope="session")
|
||||
def reflexion_guardrails():
|
||||
return ReflexionSecurityGuardrailsService()
|
||||
def reflexion_guardrails(
|
||||
foundation_model,
|
||||
response_processing_service,
|
||||
prompt_template_service,
|
||||
llm_configuration_introspection_service):
|
||||
return ReflexionSecurityGuardrailsService(
|
||||
foundation_model=foundation_model,
|
||||
response_processing_service=response_processing_service,
|
||||
prompt_template_service=prompt_template_service,
|
||||
llm_configuration_introspection_service=llm_configuration_introspection_service
|
||||
)
|
||||
|
||||
@pytest.fixture(scope="session")
|
||||
def response_processing_service():
|
||||
|
||||
@@ -28,4 +28,13 @@ def test_07_malicious_prompts_rag_and_cot_reflexion(
|
||||
prompts=malicious_prompts,
|
||||
comparison_texts=prompt_injection_all_example_responses,
|
||||
service_configurator=configure_service
|
||||
)
|
||||
)
|
||||
|
||||
"""
|
||||
measure/record:
|
||||
|
||||
- initial prompt/completion text
|
||||
- initial prompt/completion score
|
||||
- reflexion prompt/completion text
|
||||
- post reflexion score
|
||||
"""
|
||||
@@ -0,0 +1,270 @@
|
||||
"""
|
||||
pytest -v tests/integration/test_domain.py
|
||||
"""
|
||||
|
||||
import pytest
|
||||
from unittest.mock import MagicMock
|
||||
|
||||
from src.text_generation.domain.guardrails_result import GuardrailsResult
|
||||
from src.text_generation.domain.guidelines_result import GuidelinesResult
|
||||
from src.text_generation.domain.original_completion_result import OriginalCompletionResult
|
||||
from src.text_generation.domain.text_generation_completion_result import TextGenerationCompletionResult
|
||||
|
||||
|
||||
class TestTextGenerationCompletionResult:
|
||||
"""Test suite for TextGenerationCompletionResult and related classes."""
|
||||
|
||||
@pytest.fixture
|
||||
def sample_llm_config(self):
|
||||
"""Sample LLM configuration for testing."""
|
||||
return {
|
||||
"model": "test-model",
|
||||
"temperature": 0.7,
|
||||
"max_tokens": 100
|
||||
}
|
||||
|
||||
@pytest.fixture
|
||||
def sample_full_prompt(self):
|
||||
"""Sample full prompt for testing."""
|
||||
return {
|
||||
"system": "You are a helpful assistant",
|
||||
"user": "Test prompt"
|
||||
}
|
||||
|
||||
def test_original_result_only(self, sample_llm_config, sample_full_prompt):
|
||||
"""Test TextGenerationCompletionResult with only original result filled in."""
|
||||
# Arrange
|
||||
original_result = OriginalCompletionResult(
|
||||
user_prompt="Test user prompt",
|
||||
completion_text="Original completion text",
|
||||
full_prompt=sample_full_prompt,
|
||||
llm_config=sample_llm_config
|
||||
)
|
||||
|
||||
original_result.cosine_similarity_score=0.5
|
||||
original_result.cosine_similarity_risk_threshold=0.7
|
||||
|
||||
completion_result = TextGenerationCompletionResult(
|
||||
original_result=original_result,
|
||||
guidelines_result=None,
|
||||
guardrails_result=None
|
||||
)
|
||||
|
||||
# Act
|
||||
final_text = completion_result.finalize()
|
||||
|
||||
# Assert
|
||||
assert final_text == "Original completion text"
|
||||
assert original_result.user_prompt == "Test user prompt"
|
||||
assert original_result.llm_config == sample_llm_config
|
||||
assert not original_result.is_completion_malicious() # 0.5 < 0.7
|
||||
|
||||
def test_guidelines_and_original_guidelines_precedence(self, sample_llm_config, sample_full_prompt):
|
||||
"""Test that guidelines result takes precedence over original when both are present."""
|
||||
# Arrange
|
||||
original_result = OriginalCompletionResult(
|
||||
user_prompt="Test user prompt",
|
||||
completion_text="Original completion text",
|
||||
full_prompt=sample_full_prompt,
|
||||
llm_config=sample_llm_config
|
||||
)
|
||||
|
||||
original_result.cosine_similarity_score=0.8
|
||||
original_result.cosine_similarity_risk_threshold=0.7
|
||||
|
||||
guidelines_result = GuidelinesResult(
|
||||
user_prompt="Test user prompt",
|
||||
completion_text="Guidelines processed completion text",
|
||||
full_prompt=sample_full_prompt,
|
||||
llm_config=sample_llm_config
|
||||
)
|
||||
|
||||
guidelines_result.cosine_similarity_score=0.6
|
||||
guidelines_result.cosine_similarity_risk_threshold=0.7
|
||||
|
||||
completion_result = TextGenerationCompletionResult(
|
||||
original_result=original_result,
|
||||
guidelines_result=guidelines_result,
|
||||
guardrails_result=None
|
||||
)
|
||||
|
||||
# Act
|
||||
final_text = completion_result.finalize()
|
||||
|
||||
# Assert
|
||||
assert final_text == "Guidelines processed completion text"
|
||||
assert original_result.is_completion_malicious() # 0.8 >= 0.7
|
||||
assert not guidelines_result.is_original_completion_malicious() # 0.6 < 0.7
|
||||
|
||||
def test_guardrails_guidelines_original_guardrails_precedence(self, sample_llm_config, sample_full_prompt):
|
||||
"""Test that guardrails result takes precedence when all three are present."""
|
||||
# Arrange
|
||||
original_result = OriginalCompletionResult(
|
||||
user_prompt="Test user prompt",
|
||||
completion_text="Original completion text",
|
||||
full_prompt=sample_full_prompt,
|
||||
llm_config=sample_llm_config
|
||||
)
|
||||
|
||||
original_result.cosine_similarity_score=0.9
|
||||
original_result.cosine_similarity_risk_threshold=0.7
|
||||
|
||||
guidelines_result = GuidelinesResult(
|
||||
user_prompt="Test user prompt",
|
||||
completion_text="Guidelines processed completion text",
|
||||
full_prompt=sample_full_prompt,
|
||||
llm_config=sample_llm_config
|
||||
)
|
||||
|
||||
guidelines_result.cosine_similarity_score=0.8
|
||||
guidelines_result.cosine_similarity_risk_threshold=0.7
|
||||
|
||||
guardrails_result = GuardrailsResult(
|
||||
user_prompt="Test user prompt",
|
||||
completion_text="Guardrails processed completion text",
|
||||
full_prompt=sample_full_prompt,
|
||||
llm_config=sample_llm_config
|
||||
)
|
||||
|
||||
completion_result = TextGenerationCompletionResult(
|
||||
original_result=original_result,
|
||||
guidelines_result=guidelines_result,
|
||||
guardrails_result=guardrails_result
|
||||
)
|
||||
|
||||
# Act
|
||||
final_text = completion_result.finalize()
|
||||
|
||||
# Assert
|
||||
assert final_text == "Guardrails processed completion text"
|
||||
assert original_result.is_completion_malicious() # 0.9 >= 0.7
|
||||
assert guidelines_result.is_original_completion_malicious() # 0.8 >= 0.7
|
||||
|
||||
def test_empty_completion_fallback_behavior(self, sample_llm_config, sample_full_prompt):
|
||||
"""Test fallback behavior when some completion texts are empty."""
|
||||
# Arrange - guardrails has empty text, should fall back to guidelines
|
||||
original_result = OriginalCompletionResult(
|
||||
user_prompt="Test user prompt",
|
||||
completion_text="Original completion text",
|
||||
full_prompt=sample_full_prompt,
|
||||
llm_config=sample_llm_config
|
||||
)
|
||||
|
||||
original_result.cosine_similarity_score=0.5
|
||||
original_result.cosine_similarity_risk_threshold=0.7
|
||||
|
||||
guidelines_result = GuidelinesResult(
|
||||
user_prompt="Test user prompt",
|
||||
completion_text="Guidelines processed completion text",
|
||||
full_prompt=sample_full_prompt,
|
||||
llm_config=sample_llm_config
|
||||
)
|
||||
|
||||
guidelines_result.cosine_similarity_score=0.6
|
||||
guidelines_result.cosine_similarity_risk_threshold=0.7
|
||||
|
||||
guardrails_result = GuardrailsResult(
|
||||
user_prompt="Test user prompt",
|
||||
completion_text="", # Empty string
|
||||
full_prompt=sample_full_prompt,
|
||||
llm_config=sample_llm_config
|
||||
)
|
||||
|
||||
completion_result = TextGenerationCompletionResult(
|
||||
original_result=original_result,
|
||||
guidelines_result=guidelines_result,
|
||||
guardrails_result=guardrails_result
|
||||
)
|
||||
|
||||
# Act
|
||||
final_text = completion_result.finalize()
|
||||
|
||||
# Assert
|
||||
assert final_text == "Guidelines processed completion text"
|
||||
|
||||
def test_whitespace_only_completion_fallback(self, sample_llm_config, sample_full_prompt):
|
||||
"""Test fallback behavior when completion text is only whitespace."""
|
||||
# Arrange
|
||||
original_result = OriginalCompletionResult(
|
||||
user_prompt="Test user prompt",
|
||||
completion_text="Original completion text",
|
||||
full_prompt=sample_full_prompt,
|
||||
llm_config=sample_llm_config
|
||||
)
|
||||
|
||||
original_result.cosine_similarity_score=0.5
|
||||
original_result.cosine_similarity_risk_threshold=0.7
|
||||
|
||||
guidelines_result = GuidelinesResult(
|
||||
user_prompt="Test user prompt",
|
||||
completion_text=" \n\t ", # Whitespace only
|
||||
full_prompt=sample_full_prompt,
|
||||
llm_config=sample_llm_config
|
||||
)
|
||||
|
||||
guidelines_result.cosine_similarity_score=0.6
|
||||
guidelines_result.cosine_similarity_risk_threshold=0.7
|
||||
|
||||
completion_result = TextGenerationCompletionResult(
|
||||
original_result=original_result,
|
||||
guidelines_result=guidelines_result,
|
||||
guardrails_result=None
|
||||
)
|
||||
|
||||
# Act
|
||||
final_text = completion_result.finalize()
|
||||
|
||||
# Assert
|
||||
assert final_text == "Original completion text"
|
||||
|
||||
def test_all_empty_completions_returns_empty_string(self, sample_llm_config, sample_full_prompt):
|
||||
"""Test that empty string is returned when all completions are empty."""
|
||||
# Arrange
|
||||
original_result = OriginalCompletionResult(
|
||||
user_prompt="Test user prompt",
|
||||
completion_text="",
|
||||
full_prompt=sample_full_prompt,
|
||||
llm_config=sample_llm_config
|
||||
)
|
||||
|
||||
original_result.cosine_similarity_score=0.5
|
||||
original_result.cosine_similarity_risk_threshold=0.7
|
||||
|
||||
completion_result = TextGenerationCompletionResult(
|
||||
original_result=original_result,
|
||||
guidelines_result=None,
|
||||
guardrails_result=None
|
||||
)
|
||||
|
||||
# Act
|
||||
final_text = completion_result.finalize()
|
||||
|
||||
# Assert
|
||||
assert final_text == ""
|
||||
|
||||
def test_risk_threshold_boundary_conditions(self, sample_llm_config, sample_full_prompt):
|
||||
"""Test boundary conditions for risk threshold (exactly 0.7)."""
|
||||
# Test score exactly at threshold
|
||||
original_result_at_threshold = OriginalCompletionResult(
|
||||
user_prompt="Test user prompt",
|
||||
completion_text="Original completion text",
|
||||
full_prompt=sample_full_prompt,
|
||||
llm_config=sample_llm_config
|
||||
)
|
||||
|
||||
original_result_at_threshold.cosine_similarity_score=0.7
|
||||
original_result_at_threshold.cosine_similarity_risk_threshold=0.7
|
||||
|
||||
# Test score just below threshold
|
||||
guidelines_result_below_threshold = GuidelinesResult(
|
||||
user_prompt="Test user prompt",
|
||||
completion_text="Guidelines completion text",
|
||||
full_prompt=sample_full_prompt,
|
||||
llm_config=sample_llm_config,
|
||||
cosine_similarity_score=0.699999, # Just below threshold
|
||||
cosine_similarity_risk_threshold=0.7
|
||||
)
|
||||
|
||||
# Assert
|
||||
assert original_result_at_threshold.is_completion_malicious() # 0.7 >= 0.7
|
||||
assert not guidelines_result_below_threshold.is_original_completion_malicious() # 0.699999 < 0.7
|
||||
Reference in New Issue
Block a user