From b83553d767220f362673892bd7bcd5be8027db58 Mon Sep 17 00:00:00 2001
From: Adam Wilson <lightbrok3r@gmail.com>
Date: Sun, 27 Jul 2025 17:44:40 -0600
Subject: [PATCH] make mitigation tests consistent

---
 .../test_00_benign_prompts_no_mitigation.py   | 11 +------
 ...test_01_malicious_prompts_no_mitigation.py | 17 ++--------
 .../test_02_malicious_prompts_cot.py          | 16 ++--------
 .../test_03_malicious_prompts_rag.py          | 16 ++--------
 .../test_04_malicious_prompts_rag_and_cot.py  | 18 ++---------
 ...05_malicious_prompts_cot_with_reflexion.py | 31 +++++++++++++++++++
 ...06_malicious_prompts_rag_with_reflexion.py | 30 ++++++++++++++++++
 ...malicious_prompts_rag_and_cot_reflexion.py | 31 +++++++++++++++++++
 8 files changed, 102 insertions(+), 68 deletions(-)
 create mode 100644 tests/integration/test_05_malicious_prompts_cot_with_reflexion.py
 create mode 100644 tests/integration/test_06_malicious_prompts_rag_with_reflexion.py
 create mode 100644 tests/integration/test_07_malicious_prompts_rag_and_cot_reflexion.py

diff --git a/tests/integration/test_00_benign_prompts_no_mitigation.py b/tests/integration/test_00_benign_prompts_no_mitigation.py
index 088ba8b99..49c4d6793 100644
--- a/tests/integration/test_00_benign_prompts_no_mitigation.py
+++ b/tests/integration/test_00_benign_prompts_no_mitigation.py
@@ -1,17 +1,8 @@
 """
     Usage:
-    $ pytest -k test_00_benign_prompts_no_mitigation -s
-
-    Data used:
-        1. Samples of compromised output (resulting from NVIDIA garak tests)
-        2. Samples of prompt injection input
-
-    Test cases (batches of 100+ runs each):
-        0. Baseline (no mitigations), with (neutral) mathematical prompt(s)
-
+    $ pytest -k test_00_benign_prompts_no_mitigation -s --disable-warnings
 """
 
-import pytest
 from test_utils import run_prompt_analysis_test
 from src.text_generation.services.nlp.abstract_text_generation_completion_service import AbstractTextGenerationCompletionService
 
diff --git a/tests/integration/test_01_malicious_prompts_no_mitigation.py b/tests/integration/test_01_malicious_prompts_no_mitigation.py
index f1591642a..a4f43e4ae 100644
--- a/tests/integration/test_01_malicious_prompts_no_mitigation.py
+++ b/tests/integration/test_01_malicious_prompts_no_mitigation.py
@@ -1,14 +1,6 @@
 """
     Usage:
-    $ pytest -k test_01_malicious_prompts_no_mitigation -s
-
-    Data used:
-        1. Samples of compromised output (resulting from NVIDIA garak tests)
-        2. Samples of prompt injection input
-
-    Test cases (batches of 100+ runs each):
-        1. Baseline (no mitigations), with malicious prompt(s)
-
+    $ pytest -k test_01_malicious_prompts_no_mitigation -s --disable-warnings
 """
 
 import pytest
@@ -33,9 +25,4 @@ def test_01_malicious_prompts_no_mitigation(
         prompts=malicious_prompts,
         comparison_texts=prompt_injection_all_example_responses,
         service_configurator=configure_service
-    )
-    
-    # Assertions specific to this test
-    # for result in results:
-    #     # at least 1 close match means the prompt is likely an injection attempt
-    #     assert result.max > 0.5
\ No newline at end of file
+    )
\ No newline at end of file
diff --git a/tests/integration/test_02_malicious_prompts_cot.py b/tests/integration/test_02_malicious_prompts_cot.py
index 417aed4c5..757df3245 100644
--- a/tests/integration/test_02_malicious_prompts_cot.py
+++ b/tests/integration/test_02_malicious_prompts_cot.py
@@ -1,20 +1,13 @@
 """
     Usage:
-    $ pytest -k test_02_malicious_prompts_cot -s
-
-    Data used:
-        1. Samples of compromised output (resulting from NVIDIA garak tests)
-        2. Samples of prompt injection input
-
-    Test cases (batches of 100+ runs each):
-        2. Malicious prompt(s), zero-shot chain-of-thought prompt template
-
+    $ pytest -k test_02_malicious_prompts_cot -s --disable-warnings
 """
 
 import pytest
 from test_utils import run_prompt_analysis_test
 from src.text_generation.services.nlp.abstract_text_generation_completion_service import AbstractTextGenerationCompletionService
 
+
 @pytest.mark.mitigation
 def test_02_malicious_prompts_cot(
     text_generation_completion_service: AbstractTextGenerationCompletionService,
@@ -33,7 +26,4 @@ def test_02_malicious_prompts_cot(
         prompts=malicious_prompts,
         comparison_texts=prompt_injection_all_example_responses,
         service_configurator=configure_service
-    )
-        # assert result.mean < 0.5
-        # at least 1 close match means the prompt is likely an injection attempt
-        # assert result.max > 0.5
+    )
\ No newline at end of file
diff --git a/tests/integration/test_03_malicious_prompts_rag.py b/tests/integration/test_03_malicious_prompts_rag.py
index 019fca34d..e530aad2d 100644
--- a/tests/integration/test_03_malicious_prompts_rag.py
+++ b/tests/integration/test_03_malicious_prompts_rag.py
@@ -1,14 +1,6 @@
 """
     Usage:
-    $ pytest -k test_03_malicious_prompts_rag -s
-
-    Data used:
-        1. Samples of compromised output (resulting from NVIDIA garak tests)
-        2. Samples of prompt injection input
-
-    Test cases (batches of 100+ runs each):
-        3. Malicious prompt(s), RAG with few-shot examples prompt template
-
+    $ pytest -k test_03_malicious_prompts_rag -s --disable-warnings
 """
 
 import pytest
@@ -34,8 +26,4 @@ def test_03_malicious_prompts_rag(
         prompts=malicious_prompts,
         comparison_texts=prompt_injection_all_example_responses,
         service_configurator=configure_service
-    )
-    
-        # assert result.mean < 0.5
-        # at least 1 close match means the prompt is likely an injection attempt
-        # assert result.max > 0.5
\ No newline at end of file
+    )
\ No newline at end of file
diff --git a/tests/integration/test_04_malicious_prompts_rag_and_cot.py b/tests/integration/test_04_malicious_prompts_rag_and_cot.py
index 7434fcef5..2f3860ea7 100644
--- a/tests/integration/test_04_malicious_prompts_rag_and_cot.py
+++ b/tests/integration/test_04_malicious_prompts_rag_and_cot.py
@@ -1,14 +1,6 @@
 """
     Usage:
-    $ pytest -k test_04_malicious_prompts_rag_and_cot -s
-
-    Data used:
-        1. Samples of compromised output (resulting from NVIDIA garak tests)
-        2. Samples of prompt injection input
-
-    Test cases (batches of 100+ runs each):
-        3. Malicious prompt(s), RAG with few-shot examples prompt template
-
+    $ pytest -k test_04_malicious_prompts_rag_and_cot -s --disable-warnings
 """
 
 import pytest
@@ -28,8 +20,6 @@ def test_04_malicious_prompts_rag_and_cot(
                 .with_chain_of_thought_guidelines()
                 .with_rag_context_guidelines())
     
-    # TODO: reflexion tests
-    
     results = run_prompt_analysis_test(
         test_id=4,
         text_generation_completion_service=text_generation_completion_service,
@@ -37,8 +27,4 @@ def test_04_malicious_prompts_rag_and_cot(
         prompts=malicious_prompts,
         comparison_texts=prompt_injection_all_example_responses,
         service_configurator=configure_service
-    )
-
-        # assert result.mean < 0.5
-        # at least 1 close match means the prompt is likely an injection attempt
-        # assert result.max > 0.5
+    )
\ No newline at end of file
diff --git a/tests/integration/test_05_malicious_prompts_cot_with_reflexion.py b/tests/integration/test_05_malicious_prompts_cot_with_reflexion.py
new file mode 100644
index 000000000..78ac45a03
--- /dev/null
+++ b/tests/integration/test_05_malicious_prompts_cot_with_reflexion.py
@@ -0,0 +1,31 @@
+"""
+    Usage:
+    $ pytest -k test_05_malicious_prompts_cot_with_reflexion -s --disable-warnings
+"""
+
+import pytest
+from test_utils import run_prompt_analysis_test
+from src.text_generation.services.nlp.abstract_text_generation_completion_service import AbstractTextGenerationCompletionService
+
+
+@pytest.mark.mitigation
+def test_05_malicious_prompts_cot_with_reflexion(
+    text_generation_completion_service: AbstractTextGenerationCompletionService,
+    semantic_similarity_service,
+    malicious_prompts,
+    prompt_injection_all_example_responses
+):
+    def configure_service(service: AbstractTextGenerationCompletionService):
+        return (service
+                .with_chain_of_thought_guidelines()
+                .with_reflexion_guardrails())
+    
+    results = run_prompt_analysis_test(
+        test_id=5,
+        text_generation_completion_service=text_generation_completion_service,
+        semantic_similarity_service=semantic_similarity_service,
+        prompts=malicious_prompts,
+        comparison_texts=prompt_injection_all_example_responses,
+        service_configurator=configure_service
+    )
+
diff --git a/tests/integration/test_06_malicious_prompts_rag_with_reflexion.py b/tests/integration/test_06_malicious_prompts_rag_with_reflexion.py
new file mode 100644
index 000000000..74b6848d1
--- /dev/null
+++ b/tests/integration/test_06_malicious_prompts_rag_with_reflexion.py
@@ -0,0 +1,30 @@
+"""
+    Usage:
+    $ pytest -k test_06_malicious_prompts_rag_with_reflexion -s --disable-warnings
+"""
+
+import pytest
+from test_utils import run_prompt_analysis_test
+from src.text_generation.services.nlp.abstract_text_generation_completion_service import AbstractTextGenerationCompletionService
+
+
+@pytest.mark.mitigation
+def test_06_malicious_prompts_rag_with_reflexion(
+    text_generation_completion_service: AbstractTextGenerationCompletionService,
+    semantic_similarity_service,
+    malicious_prompts,
+    prompt_injection_all_example_responses
+):
+    def configure_service(service: AbstractTextGenerationCompletionService):
+        return (service
+                .with_rag_context_guidelines()
+                .with_reflexion_guardrails())
+    
+    results = run_prompt_analysis_test(
+        test_id=6,
+        text_generation_completion_service=text_generation_completion_service,
+        semantic_similarity_service=semantic_similarity_service,
+        prompts=malicious_prompts,
+        comparison_texts=prompt_injection_all_example_responses,
+        service_configurator=configure_service
+    )
\ No newline at end of file
diff --git a/tests/integration/test_07_malicious_prompts_rag_and_cot_reflexion.py b/tests/integration/test_07_malicious_prompts_rag_and_cot_reflexion.py
new file mode 100644
index 000000000..ddf5e8e82
--- /dev/null
+++ b/tests/integration/test_07_malicious_prompts_rag_and_cot_reflexion.py
@@ -0,0 +1,31 @@
+"""
+    Usage:
+    $ pytest -k test_07_malicious_prompts_rag_and_cot_reflexion -s --disable-warnings
+"""
+
+import pytest
+from test_utils import run_prompt_analysis_test
+from src.text_generation.services.nlp.abstract_text_generation_completion_service import AbstractTextGenerationCompletionService
+
+
+@pytest.mark.mitigation
+def test_07_malicious_prompts_rag_and_cot_reflexion(
+    text_generation_completion_service: AbstractTextGenerationCompletionService,
+    semantic_similarity_service,
+    malicious_prompts,
+    prompt_injection_all_example_responses
+):
+    def configure_service(service: AbstractTextGenerationCompletionService):
+        return (service
+                .with_chain_of_thought_guidelines()
+                .with_rag_context_guidelines()
+                .with_reflexion_guardrails())
+        
+    results = run_prompt_analysis_test(
+        test_id=7,
+        text_generation_completion_service=text_generation_completion_service,
+        semantic_similarity_service=semantic_similarity_service,
+        prompts=malicious_prompts,
+        comparison_texts=prompt_injection_all_example_responses,
+        service_configurator=configure_service
+    )
\ No newline at end of file