diff --git a/src/text_generation/domain/guardrails_result.py b/src/text_generation/domain/guardrails_result.py index d903539f1..ee4ccead0 100644 --- a/src/text_generation/domain/guardrails_result.py +++ b/src/text_generation/domain/guardrails_result.py @@ -2,23 +2,19 @@ from typing import Any from src.text_generation.domain.abstract_guardrails_processed_completion import AbstractGuardrailsProcessedCompletion -class GuardrailsResult( - AbstractGuardrailsProcessedCompletion): +class GuardrailsResult: + """ + Represents the result of guardrails processing without cosine similarity scoring. + """ + def __init__( - self, - user_prompt: str, - completion_text: str, - full_prompt: dict[str, Any], - llm_config: dict, - cosine_similarity_score: float = 0.0, - cosine_similarity_risk_threshold: float = 0.0): - + self, + user_prompt: str, + completion_text: str, + full_prompt: dict[str, Any], + llm_config: dict + ): self.user_prompt = user_prompt self.completion_text = completion_text self.full_prompt = full_prompt - self.llm_config = llm_config - self.cosine_similarity_score = cosine_similarity_score - self.cosine_similarity_risk_threshold = cosine_similarity_risk_threshold - - def is_original_completion_malicious(self) -> bool: - return self.cosine_similarity_score >= self.cosine_similarity_risk_threshold + self.llm_config = llm_config \ No newline at end of file diff --git a/src/text_generation/domain/guidelines_result.py b/src/text_generation/domain/guidelines_result.py index 341d43479..75d77e9b6 100644 --- a/src/text_generation/domain/guidelines_result.py +++ b/src/text_generation/domain/guidelines_result.py @@ -2,23 +2,26 @@ from typing import Any, List from src.text_generation.domain.abstract_guidelines_processed_completion import AbstractGuidelinesProcessedCompletion -class GuidelinesResult( - AbstractGuidelinesProcessedCompletion): +class GuidelinesResult(AbstractGuidelinesProcessedCompletion): + """ + Represents the result of guidelines processing with cosine similarity scoring. + """ + def __init__( - self, - user_prompt: str, - completion_text: str, - full_prompt: dict[str, Any], - llm_config: dict, - cosine_similarity_score: float = 0.0, - cosine_similarity_risk_threshold: float = 0.0): - + self, + user_prompt: str, + completion_text: str, + full_prompt: dict[str, Any], + llm_config: dict, + cosine_similarity_score: float = -1.0, + cosine_similarity_risk_threshold: float = 0.0 + ): self.user_prompt = user_prompt self.completion_text = completion_text self.full_prompt = full_prompt self.llm_config = llm_config self.cosine_similarity_score = cosine_similarity_score self.cosine_similarity_risk_threshold = cosine_similarity_risk_threshold - + def is_original_completion_malicious(self) -> bool: - return self.cosine_similarity_score >= self.cosine_similarity_risk_threshold + return self.cosine_similarity_score >= self.cosine_similarity_risk_threshold \ No newline at end of file diff --git a/src/text_generation/domain/original_completion_result.py b/src/text_generation/domain/original_completion_result.py new file mode 100644 index 000000000..23f8aeeb9 --- /dev/null +++ b/src/text_generation/domain/original_completion_result.py @@ -0,0 +1,25 @@ +from typing import Any +from src.text_generation.domain.abstract_text_generation_completion_result import AbstractTextGenerationCompletionResult + + +class OriginalCompletionResult(AbstractTextGenerationCompletionResult): + """ + Represents the original completion result with its own cosine similarity scoring. + """ + + def __init__( + self, + user_prompt: str, + completion_text: str, + full_prompt: dict[str, Any], + llm_config: dict + ): + self.user_prompt = user_prompt + self.completion_text = completion_text + self.full_prompt = full_prompt + self.llm_config = llm_config + self.cosine_similarity_score: float = -1.0 + self.cosine_similarity_risk_threshold: float = 0.0 + + def is_completion_malicious(self) -> bool: + return self.cosine_similarity_score >= self.cosine_similarity_risk_threshold \ No newline at end of file diff --git a/src/text_generation/domain/text_generation_completion_result.py b/src/text_generation/domain/text_generation_completion_result.py index 9ca94be0c..e267b4628 100644 --- a/src/text_generation/domain/text_generation_completion_result.py +++ b/src/text_generation/domain/text_generation_completion_result.py @@ -4,31 +4,51 @@ from src.text_generation.domain.abstract_text_generation_completion_result impor from src.text_generation.domain.alternate_completion_result import AlternateCompletionResult from src.text_generation.domain.guardrails_result import GuardrailsResult from src.text_generation.domain.guidelines_result import GuidelinesResult +from src.text_generation.domain.original_completion_result import OriginalCompletionResult -class TextGenerationCompletionResult( - AbstractTextGenerationCompletionResult): +class TextGenerationCompletionResult(AbstractTextGenerationCompletionResult): """ - Represents the result of a text generation completion - with optional security guidelines processing results. + Container class that holds the original completion result and optional + guidelines and guardrails processing results. """ def __init__( - self, - llm_config: dict, - original_user_prompt: str, - original_completion: str, - guidelines_result: Optional[GuidelinesResult] = None, - guardrails_result: Optional[GuardrailsResult] = None): + self, + original_result: OriginalCompletionResult, + guidelines_result: Optional[GuidelinesResult] = None, + guardrails_result: Optional[GuardrailsResult] = None + ): + self.original_result = original_result + self.guidelines_result = guidelines_result + self.guardrails_result = guardrails_result - self.llm_config = llm_config - self.original_user_prompt = original_user_prompt - self.original_completion = original_completion - self.guidelines_processed_completion = guidelines_result - self.guardrails_processed_completion = guardrails_result - self.alternate_result: AlternateCompletionResult = None - - self.final = ( - (self.alternate_result and self.alternate_result.alterate_completion_text) or - original_completion - ) \ No newline at end of file + def finalize(self) -> str: + """ + Returns the final completion text based on priority order: + 1. guardrails_result.completion_text (if not empty) + 2. guidelines_result.completion_text (if not empty) + 3. original_result.completion_text (if not empty) + """ + + # Check guardrails_result.completion_text first + if (self.guardrails_result and + self.guardrails_result.completion_text and + self.guardrails_result.completion_text.strip()): + return self.guardrails_result.completion_text + + # Fall back to guidelines_result.completion_text + if (self.guidelines_result and + self.guidelines_result.completion_text and + self.guidelines_result.completion_text.strip()): + return self.guidelines_result.completion_text + + # Fall back to original_result.completion_text + if (self.original_result and + self.original_result.completion_text and + self.original_result.completion_text.strip()): + return self.original_result.completion_text + + # If all are empty, return empty string + return "" + diff --git a/src/text_generation/services/logging/test_run_logging_service.py b/src/text_generation/services/logging/test_run_logging_service.py index 5592ceb34..5df7205a5 100644 --- a/src/text_generation/services/logging/test_run_logging_service.py +++ b/src/text_generation/services/logging/test_run_logging_service.py @@ -36,7 +36,7 @@ class TestRunLoggingService(AbstractTestRunLoggingService): def log_results( self, id: str, - prompt: str, + text_generation_result: str, completion: str, is_rag_few_shot_enabled: bool, is_cot_enabled: bool, diff --git a/tests/conftest.py b/tests/conftest.py index 9ff80f8c0..1c76afb3e 100644 --- a/tests/conftest.py +++ b/tests/conftest.py @@ -176,8 +176,17 @@ def prompt_injection_example_service(prompt_injection_example_repository): return PromptInjectionExampleService(repository=prompt_injection_example_repository) @pytest.fixture(scope="session") -def reflexion_guardrails(): - return ReflexionSecurityGuardrailsService() +def reflexion_guardrails( + foundation_model, + response_processing_service, + prompt_template_service, + llm_configuration_introspection_service): + return ReflexionSecurityGuardrailsService( + foundation_model=foundation_model, + response_processing_service=response_processing_service, + prompt_template_service=prompt_template_service, + llm_configuration_introspection_service=llm_configuration_introspection_service + ) @pytest.fixture(scope="session") def response_processing_service(): diff --git a/tests/integration/test_07_malicious_prompts_rag_and_cot_reflexion.py b/tests/integration/test_07_malicious_prompts_rag_and_cot_reflexion.py index ddf5e8e82..84eb7f055 100644 --- a/tests/integration/test_07_malicious_prompts_rag_and_cot_reflexion.py +++ b/tests/integration/test_07_malicious_prompts_rag_and_cot_reflexion.py @@ -28,4 +28,13 @@ def test_07_malicious_prompts_rag_and_cot_reflexion( prompts=malicious_prompts, comparison_texts=prompt_injection_all_example_responses, service_configurator=configure_service - ) \ No newline at end of file + ) + +""" + measure/record: + + - initial prompt/completion text + - initial prompt/completion score + - reflexion prompt/completion text + - post reflexion score +""" \ No newline at end of file diff --git a/tests/integration/test_domain.py b/tests/integration/test_domain.py new file mode 100644 index 000000000..c2e5debe0 --- /dev/null +++ b/tests/integration/test_domain.py @@ -0,0 +1,270 @@ +""" +pytest -v tests/integration/test_domain.py +""" + +import pytest +from unittest.mock import MagicMock + +from src.text_generation.domain.guardrails_result import GuardrailsResult +from src.text_generation.domain.guidelines_result import GuidelinesResult +from src.text_generation.domain.original_completion_result import OriginalCompletionResult +from src.text_generation.domain.text_generation_completion_result import TextGenerationCompletionResult + + +class TestTextGenerationCompletionResult: + """Test suite for TextGenerationCompletionResult and related classes.""" + + @pytest.fixture + def sample_llm_config(self): + """Sample LLM configuration for testing.""" + return { + "model": "test-model", + "temperature": 0.7, + "max_tokens": 100 + } + + @pytest.fixture + def sample_full_prompt(self): + """Sample full prompt for testing.""" + return { + "system": "You are a helpful assistant", + "user": "Test prompt" + } + + def test_original_result_only(self, sample_llm_config, sample_full_prompt): + """Test TextGenerationCompletionResult with only original result filled in.""" + # Arrange + original_result = OriginalCompletionResult( + user_prompt="Test user prompt", + completion_text="Original completion text", + full_prompt=sample_full_prompt, + llm_config=sample_llm_config + ) + + original_result.cosine_similarity_score=0.5 + original_result.cosine_similarity_risk_threshold=0.7 + + completion_result = TextGenerationCompletionResult( + original_result=original_result, + guidelines_result=None, + guardrails_result=None + ) + + # Act + final_text = completion_result.finalize() + + # Assert + assert final_text == "Original completion text" + assert original_result.user_prompt == "Test user prompt" + assert original_result.llm_config == sample_llm_config + assert not original_result.is_completion_malicious() # 0.5 < 0.7 + + def test_guidelines_and_original_guidelines_precedence(self, sample_llm_config, sample_full_prompt): + """Test that guidelines result takes precedence over original when both are present.""" + # Arrange + original_result = OriginalCompletionResult( + user_prompt="Test user prompt", + completion_text="Original completion text", + full_prompt=sample_full_prompt, + llm_config=sample_llm_config + ) + + original_result.cosine_similarity_score=0.8 + original_result.cosine_similarity_risk_threshold=0.7 + + guidelines_result = GuidelinesResult( + user_prompt="Test user prompt", + completion_text="Guidelines processed completion text", + full_prompt=sample_full_prompt, + llm_config=sample_llm_config + ) + + guidelines_result.cosine_similarity_score=0.6 + guidelines_result.cosine_similarity_risk_threshold=0.7 + + completion_result = TextGenerationCompletionResult( + original_result=original_result, + guidelines_result=guidelines_result, + guardrails_result=None + ) + + # Act + final_text = completion_result.finalize() + + # Assert + assert final_text == "Guidelines processed completion text" + assert original_result.is_completion_malicious() # 0.8 >= 0.7 + assert not guidelines_result.is_original_completion_malicious() # 0.6 < 0.7 + + def test_guardrails_guidelines_original_guardrails_precedence(self, sample_llm_config, sample_full_prompt): + """Test that guardrails result takes precedence when all three are present.""" + # Arrange + original_result = OriginalCompletionResult( + user_prompt="Test user prompt", + completion_text="Original completion text", + full_prompt=sample_full_prompt, + llm_config=sample_llm_config + ) + + original_result.cosine_similarity_score=0.9 + original_result.cosine_similarity_risk_threshold=0.7 + + guidelines_result = GuidelinesResult( + user_prompt="Test user prompt", + completion_text="Guidelines processed completion text", + full_prompt=sample_full_prompt, + llm_config=sample_llm_config + ) + + guidelines_result.cosine_similarity_score=0.8 + guidelines_result.cosine_similarity_risk_threshold=0.7 + + guardrails_result = GuardrailsResult( + user_prompt="Test user prompt", + completion_text="Guardrails processed completion text", + full_prompt=sample_full_prompt, + llm_config=sample_llm_config + ) + + completion_result = TextGenerationCompletionResult( + original_result=original_result, + guidelines_result=guidelines_result, + guardrails_result=guardrails_result + ) + + # Act + final_text = completion_result.finalize() + + # Assert + assert final_text == "Guardrails processed completion text" + assert original_result.is_completion_malicious() # 0.9 >= 0.7 + assert guidelines_result.is_original_completion_malicious() # 0.8 >= 0.7 + + def test_empty_completion_fallback_behavior(self, sample_llm_config, sample_full_prompt): + """Test fallback behavior when some completion texts are empty.""" + # Arrange - guardrails has empty text, should fall back to guidelines + original_result = OriginalCompletionResult( + user_prompt="Test user prompt", + completion_text="Original completion text", + full_prompt=sample_full_prompt, + llm_config=sample_llm_config + ) + + original_result.cosine_similarity_score=0.5 + original_result.cosine_similarity_risk_threshold=0.7 + + guidelines_result = GuidelinesResult( + user_prompt="Test user prompt", + completion_text="Guidelines processed completion text", + full_prompt=sample_full_prompt, + llm_config=sample_llm_config + ) + + guidelines_result.cosine_similarity_score=0.6 + guidelines_result.cosine_similarity_risk_threshold=0.7 + + guardrails_result = GuardrailsResult( + user_prompt="Test user prompt", + completion_text="", # Empty string + full_prompt=sample_full_prompt, + llm_config=sample_llm_config + ) + + completion_result = TextGenerationCompletionResult( + original_result=original_result, + guidelines_result=guidelines_result, + guardrails_result=guardrails_result + ) + + # Act + final_text = completion_result.finalize() + + # Assert + assert final_text == "Guidelines processed completion text" + + def test_whitespace_only_completion_fallback(self, sample_llm_config, sample_full_prompt): + """Test fallback behavior when completion text is only whitespace.""" + # Arrange + original_result = OriginalCompletionResult( + user_prompt="Test user prompt", + completion_text="Original completion text", + full_prompt=sample_full_prompt, + llm_config=sample_llm_config + ) + + original_result.cosine_similarity_score=0.5 + original_result.cosine_similarity_risk_threshold=0.7 + + guidelines_result = GuidelinesResult( + user_prompt="Test user prompt", + completion_text=" \n\t ", # Whitespace only + full_prompt=sample_full_prompt, + llm_config=sample_llm_config + ) + + guidelines_result.cosine_similarity_score=0.6 + guidelines_result.cosine_similarity_risk_threshold=0.7 + + completion_result = TextGenerationCompletionResult( + original_result=original_result, + guidelines_result=guidelines_result, + guardrails_result=None + ) + + # Act + final_text = completion_result.finalize() + + # Assert + assert final_text == "Original completion text" + + def test_all_empty_completions_returns_empty_string(self, sample_llm_config, sample_full_prompt): + """Test that empty string is returned when all completions are empty.""" + # Arrange + original_result = OriginalCompletionResult( + user_prompt="Test user prompt", + completion_text="", + full_prompt=sample_full_prompt, + llm_config=sample_llm_config + ) + + original_result.cosine_similarity_score=0.5 + original_result.cosine_similarity_risk_threshold=0.7 + + completion_result = TextGenerationCompletionResult( + original_result=original_result, + guidelines_result=None, + guardrails_result=None + ) + + # Act + final_text = completion_result.finalize() + + # Assert + assert final_text == "" + + def test_risk_threshold_boundary_conditions(self, sample_llm_config, sample_full_prompt): + """Test boundary conditions for risk threshold (exactly 0.7).""" + # Test score exactly at threshold + original_result_at_threshold = OriginalCompletionResult( + user_prompt="Test user prompt", + completion_text="Original completion text", + full_prompt=sample_full_prompt, + llm_config=sample_llm_config + ) + + original_result_at_threshold.cosine_similarity_score=0.7 + original_result_at_threshold.cosine_similarity_risk_threshold=0.7 + + # Test score just below threshold + guidelines_result_below_threshold = GuidelinesResult( + user_prompt="Test user prompt", + completion_text="Guidelines completion text", + full_prompt=sample_full_prompt, + llm_config=sample_llm_config, + cosine_similarity_score=0.699999, # Just below threshold + cosine_similarity_risk_threshold=0.7 + ) + + # Assert + assert original_result_at_threshold.is_completion_malicious() # 0.7 >= 0.7 + assert not guidelines_result_below_threshold.is_original_completion_malicious() # 0.699999 < 0.7 \ No newline at end of file