all domain unit tests pass

This commit is contained in:
Adam Wilson
2025-07-27 18:53:30 -06:00
parent b83553d767
commit 5bc9f480f9
8 changed files with 385 additions and 53 deletions
+12 -16
View File
@@ -2,23 +2,19 @@ from typing import Any
from src.text_generation.domain.abstract_guardrails_processed_completion import AbstractGuardrailsProcessedCompletion
class GuardrailsResult(
AbstractGuardrailsProcessedCompletion):
class GuardrailsResult:
"""
Represents the result of guardrails processing without cosine similarity scoring.
"""
def __init__(
self,
user_prompt: str,
completion_text: str,
full_prompt: dict[str, Any],
llm_config: dict,
cosine_similarity_score: float = 0.0,
cosine_similarity_risk_threshold: float = 0.0):
self,
user_prompt: str,
completion_text: str,
full_prompt: dict[str, Any],
llm_config: dict
):
self.user_prompt = user_prompt
self.completion_text = completion_text
self.full_prompt = full_prompt
self.llm_config = llm_config
self.cosine_similarity_score = cosine_similarity_score
self.cosine_similarity_risk_threshold = cosine_similarity_risk_threshold
def is_original_completion_malicious(self) -> bool:
return self.cosine_similarity_score >= self.cosine_similarity_risk_threshold
self.llm_config = llm_config
+15 -12
View File
@@ -2,23 +2,26 @@ from typing import Any, List
from src.text_generation.domain.abstract_guidelines_processed_completion import AbstractGuidelinesProcessedCompletion
class GuidelinesResult(
AbstractGuidelinesProcessedCompletion):
class GuidelinesResult(AbstractGuidelinesProcessedCompletion):
"""
Represents the result of guidelines processing with cosine similarity scoring.
"""
def __init__(
self,
user_prompt: str,
completion_text: str,
full_prompt: dict[str, Any],
llm_config: dict,
cosine_similarity_score: float = 0.0,
cosine_similarity_risk_threshold: float = 0.0):
self,
user_prompt: str,
completion_text: str,
full_prompt: dict[str, Any],
llm_config: dict,
cosine_similarity_score: float = -1.0,
cosine_similarity_risk_threshold: float = 0.0
):
self.user_prompt = user_prompt
self.completion_text = completion_text
self.full_prompt = full_prompt
self.llm_config = llm_config
self.cosine_similarity_score = cosine_similarity_score
self.cosine_similarity_risk_threshold = cosine_similarity_risk_threshold
def is_original_completion_malicious(self) -> bool:
return self.cosine_similarity_score >= self.cosine_similarity_risk_threshold
return self.cosine_similarity_score >= self.cosine_similarity_risk_threshold
@@ -0,0 +1,25 @@
from typing import Any
from src.text_generation.domain.abstract_text_generation_completion_result import AbstractTextGenerationCompletionResult
class OriginalCompletionResult(AbstractTextGenerationCompletionResult):
"""
Represents the original completion result with its own cosine similarity scoring.
"""
def __init__(
self,
user_prompt: str,
completion_text: str,
full_prompt: dict[str, Any],
llm_config: dict
):
self.user_prompt = user_prompt
self.completion_text = completion_text
self.full_prompt = full_prompt
self.llm_config = llm_config
self.cosine_similarity_score: float = -1.0
self.cosine_similarity_risk_threshold: float = 0.0
def is_completion_malicious(self) -> bool:
return self.cosine_similarity_score >= self.cosine_similarity_risk_threshold
@@ -4,31 +4,51 @@ from src.text_generation.domain.abstract_text_generation_completion_result impor
from src.text_generation.domain.alternate_completion_result import AlternateCompletionResult
from src.text_generation.domain.guardrails_result import GuardrailsResult
from src.text_generation.domain.guidelines_result import GuidelinesResult
from src.text_generation.domain.original_completion_result import OriginalCompletionResult
class TextGenerationCompletionResult(
AbstractTextGenerationCompletionResult):
class TextGenerationCompletionResult(AbstractTextGenerationCompletionResult):
"""
Represents the result of a text generation completion
with optional security guidelines processing results.
Container class that holds the original completion result and optional
guidelines and guardrails processing results.
"""
def __init__(
self,
llm_config: dict,
original_user_prompt: str,
original_completion: str,
guidelines_result: Optional[GuidelinesResult] = None,
guardrails_result: Optional[GuardrailsResult] = None):
self,
original_result: OriginalCompletionResult,
guidelines_result: Optional[GuidelinesResult] = None,
guardrails_result: Optional[GuardrailsResult] = None
):
self.original_result = original_result
self.guidelines_result = guidelines_result
self.guardrails_result = guardrails_result
self.llm_config = llm_config
self.original_user_prompt = original_user_prompt
self.original_completion = original_completion
self.guidelines_processed_completion = guidelines_result
self.guardrails_processed_completion = guardrails_result
self.alternate_result: AlternateCompletionResult = None
self.final = (
(self.alternate_result and self.alternate_result.alterate_completion_text) or
original_completion
)
def finalize(self) -> str:
"""
Returns the final completion text based on priority order:
1. guardrails_result.completion_text (if not empty)
2. guidelines_result.completion_text (if not empty)
3. original_result.completion_text (if not empty)
"""
# Check guardrails_result.completion_text first
if (self.guardrails_result and
self.guardrails_result.completion_text and
self.guardrails_result.completion_text.strip()):
return self.guardrails_result.completion_text
# Fall back to guidelines_result.completion_text
if (self.guidelines_result and
self.guidelines_result.completion_text and
self.guidelines_result.completion_text.strip()):
return self.guidelines_result.completion_text
# Fall back to original_result.completion_text
if (self.original_result and
self.original_result.completion_text and
self.original_result.completion_text.strip()):
return self.original_result.completion_text
# If all are empty, return empty string
return ""
@@ -36,7 +36,7 @@ class TestRunLoggingService(AbstractTestRunLoggingService):
def log_results(
self,
id: str,
prompt: str,
text_generation_result: str,
completion: str,
is_rag_few_shot_enabled: bool,
is_cot_enabled: bool,
+11 -2
View File
@@ -176,8 +176,17 @@ def prompt_injection_example_service(prompt_injection_example_repository):
return PromptInjectionExampleService(repository=prompt_injection_example_repository)
@pytest.fixture(scope="session")
def reflexion_guardrails():
return ReflexionSecurityGuardrailsService()
def reflexion_guardrails(
foundation_model,
response_processing_service,
prompt_template_service,
llm_configuration_introspection_service):
return ReflexionSecurityGuardrailsService(
foundation_model=foundation_model,
response_processing_service=response_processing_service,
prompt_template_service=prompt_template_service,
llm_configuration_introspection_service=llm_configuration_introspection_service
)
@pytest.fixture(scope="session")
def response_processing_service():
@@ -28,4 +28,13 @@ def test_07_malicious_prompts_rag_and_cot_reflexion(
prompts=malicious_prompts,
comparison_texts=prompt_injection_all_example_responses,
service_configurator=configure_service
)
)
"""
measure/record:
- initial prompt/completion text
- initial prompt/completion score
- reflexion prompt/completion text
- post reflexion score
"""
+270
View File
@@ -0,0 +1,270 @@
"""
pytest -v tests/integration/test_domain.py
"""
import pytest
from unittest.mock import MagicMock
from src.text_generation.domain.guardrails_result import GuardrailsResult
from src.text_generation.domain.guidelines_result import GuidelinesResult
from src.text_generation.domain.original_completion_result import OriginalCompletionResult
from src.text_generation.domain.text_generation_completion_result import TextGenerationCompletionResult
class TestTextGenerationCompletionResult:
"""Test suite for TextGenerationCompletionResult and related classes."""
@pytest.fixture
def sample_llm_config(self):
"""Sample LLM configuration for testing."""
return {
"model": "test-model",
"temperature": 0.7,
"max_tokens": 100
}
@pytest.fixture
def sample_full_prompt(self):
"""Sample full prompt for testing."""
return {
"system": "You are a helpful assistant",
"user": "Test prompt"
}
def test_original_result_only(self, sample_llm_config, sample_full_prompt):
"""Test TextGenerationCompletionResult with only original result filled in."""
# Arrange
original_result = OriginalCompletionResult(
user_prompt="Test user prompt",
completion_text="Original completion text",
full_prompt=sample_full_prompt,
llm_config=sample_llm_config
)
original_result.cosine_similarity_score=0.5
original_result.cosine_similarity_risk_threshold=0.7
completion_result = TextGenerationCompletionResult(
original_result=original_result,
guidelines_result=None,
guardrails_result=None
)
# Act
final_text = completion_result.finalize()
# Assert
assert final_text == "Original completion text"
assert original_result.user_prompt == "Test user prompt"
assert original_result.llm_config == sample_llm_config
assert not original_result.is_completion_malicious() # 0.5 < 0.7
def test_guidelines_and_original_guidelines_precedence(self, sample_llm_config, sample_full_prompt):
"""Test that guidelines result takes precedence over original when both are present."""
# Arrange
original_result = OriginalCompletionResult(
user_prompt="Test user prompt",
completion_text="Original completion text",
full_prompt=sample_full_prompt,
llm_config=sample_llm_config
)
original_result.cosine_similarity_score=0.8
original_result.cosine_similarity_risk_threshold=0.7
guidelines_result = GuidelinesResult(
user_prompt="Test user prompt",
completion_text="Guidelines processed completion text",
full_prompt=sample_full_prompt,
llm_config=sample_llm_config
)
guidelines_result.cosine_similarity_score=0.6
guidelines_result.cosine_similarity_risk_threshold=0.7
completion_result = TextGenerationCompletionResult(
original_result=original_result,
guidelines_result=guidelines_result,
guardrails_result=None
)
# Act
final_text = completion_result.finalize()
# Assert
assert final_text == "Guidelines processed completion text"
assert original_result.is_completion_malicious() # 0.8 >= 0.7
assert not guidelines_result.is_original_completion_malicious() # 0.6 < 0.7
def test_guardrails_guidelines_original_guardrails_precedence(self, sample_llm_config, sample_full_prompt):
"""Test that guardrails result takes precedence when all three are present."""
# Arrange
original_result = OriginalCompletionResult(
user_prompt="Test user prompt",
completion_text="Original completion text",
full_prompt=sample_full_prompt,
llm_config=sample_llm_config
)
original_result.cosine_similarity_score=0.9
original_result.cosine_similarity_risk_threshold=0.7
guidelines_result = GuidelinesResult(
user_prompt="Test user prompt",
completion_text="Guidelines processed completion text",
full_prompt=sample_full_prompt,
llm_config=sample_llm_config
)
guidelines_result.cosine_similarity_score=0.8
guidelines_result.cosine_similarity_risk_threshold=0.7
guardrails_result = GuardrailsResult(
user_prompt="Test user prompt",
completion_text="Guardrails processed completion text",
full_prompt=sample_full_prompt,
llm_config=sample_llm_config
)
completion_result = TextGenerationCompletionResult(
original_result=original_result,
guidelines_result=guidelines_result,
guardrails_result=guardrails_result
)
# Act
final_text = completion_result.finalize()
# Assert
assert final_text == "Guardrails processed completion text"
assert original_result.is_completion_malicious() # 0.9 >= 0.7
assert guidelines_result.is_original_completion_malicious() # 0.8 >= 0.7
def test_empty_completion_fallback_behavior(self, sample_llm_config, sample_full_prompt):
"""Test fallback behavior when some completion texts are empty."""
# Arrange - guardrails has empty text, should fall back to guidelines
original_result = OriginalCompletionResult(
user_prompt="Test user prompt",
completion_text="Original completion text",
full_prompt=sample_full_prompt,
llm_config=sample_llm_config
)
original_result.cosine_similarity_score=0.5
original_result.cosine_similarity_risk_threshold=0.7
guidelines_result = GuidelinesResult(
user_prompt="Test user prompt",
completion_text="Guidelines processed completion text",
full_prompt=sample_full_prompt,
llm_config=sample_llm_config
)
guidelines_result.cosine_similarity_score=0.6
guidelines_result.cosine_similarity_risk_threshold=0.7
guardrails_result = GuardrailsResult(
user_prompt="Test user prompt",
completion_text="", # Empty string
full_prompt=sample_full_prompt,
llm_config=sample_llm_config
)
completion_result = TextGenerationCompletionResult(
original_result=original_result,
guidelines_result=guidelines_result,
guardrails_result=guardrails_result
)
# Act
final_text = completion_result.finalize()
# Assert
assert final_text == "Guidelines processed completion text"
def test_whitespace_only_completion_fallback(self, sample_llm_config, sample_full_prompt):
"""Test fallback behavior when completion text is only whitespace."""
# Arrange
original_result = OriginalCompletionResult(
user_prompt="Test user prompt",
completion_text="Original completion text",
full_prompt=sample_full_prompt,
llm_config=sample_llm_config
)
original_result.cosine_similarity_score=0.5
original_result.cosine_similarity_risk_threshold=0.7
guidelines_result = GuidelinesResult(
user_prompt="Test user prompt",
completion_text=" \n\t ", # Whitespace only
full_prompt=sample_full_prompt,
llm_config=sample_llm_config
)
guidelines_result.cosine_similarity_score=0.6
guidelines_result.cosine_similarity_risk_threshold=0.7
completion_result = TextGenerationCompletionResult(
original_result=original_result,
guidelines_result=guidelines_result,
guardrails_result=None
)
# Act
final_text = completion_result.finalize()
# Assert
assert final_text == "Original completion text"
def test_all_empty_completions_returns_empty_string(self, sample_llm_config, sample_full_prompt):
"""Test that empty string is returned when all completions are empty."""
# Arrange
original_result = OriginalCompletionResult(
user_prompt="Test user prompt",
completion_text="",
full_prompt=sample_full_prompt,
llm_config=sample_llm_config
)
original_result.cosine_similarity_score=0.5
original_result.cosine_similarity_risk_threshold=0.7
completion_result = TextGenerationCompletionResult(
original_result=original_result,
guidelines_result=None,
guardrails_result=None
)
# Act
final_text = completion_result.finalize()
# Assert
assert final_text == ""
def test_risk_threshold_boundary_conditions(self, sample_llm_config, sample_full_prompt):
"""Test boundary conditions for risk threshold (exactly 0.7)."""
# Test score exactly at threshold
original_result_at_threshold = OriginalCompletionResult(
user_prompt="Test user prompt",
completion_text="Original completion text",
full_prompt=sample_full_prompt,
llm_config=sample_llm_config
)
original_result_at_threshold.cosine_similarity_score=0.7
original_result_at_threshold.cosine_similarity_risk_threshold=0.7
# Test score just below threshold
guidelines_result_below_threshold = GuidelinesResult(
user_prompt="Test user prompt",
completion_text="Guidelines completion text",
full_prompt=sample_full_prompt,
llm_config=sample_llm_config,
cosine_similarity_score=0.699999, # Just below threshold
cosine_similarity_risk_threshold=0.7
)
# Assert
assert original_result_at_threshold.is_completion_malicious() # 0.7 >= 0.7
assert not guidelines_result_below_threshold.is_original_completion_malicious() # 0.699999 < 0.7