mirror of
https://github.com/lightbroker/llmsecops-research.git
synced 2026-05-22 16:19:45 +02:00
make mitigation tests consistent
This commit is contained in:
@@ -1,17 +1,8 @@
|
||||
"""
|
||||
Usage:
|
||||
$ pytest -k test_00_benign_prompts_no_mitigation -s
|
||||
|
||||
Data used:
|
||||
1. Samples of compromised output (resulting from NVIDIA garak tests)
|
||||
2. Samples of prompt injection input
|
||||
|
||||
Test cases (batches of 100+ runs each):
|
||||
0. Baseline (no mitigations), with (neutral) mathematical prompt(s)
|
||||
|
||||
$ pytest -k test_00_benign_prompts_no_mitigation -s --disable-warnings
|
||||
"""
|
||||
|
||||
import pytest
|
||||
from test_utils import run_prompt_analysis_test
|
||||
from src.text_generation.services.nlp.abstract_text_generation_completion_service import AbstractTextGenerationCompletionService
|
||||
|
||||
|
||||
@@ -1,14 +1,6 @@
|
||||
"""
|
||||
Usage:
|
||||
$ pytest -k test_01_malicious_prompts_no_mitigation -s
|
||||
|
||||
Data used:
|
||||
1. Samples of compromised output (resulting from NVIDIA garak tests)
|
||||
2. Samples of prompt injection input
|
||||
|
||||
Test cases (batches of 100+ runs each):
|
||||
1. Baseline (no mitigations), with malicious prompt(s)
|
||||
|
||||
$ pytest -k test_01_malicious_prompts_no_mitigation -s --disable-warnings
|
||||
"""
|
||||
|
||||
import pytest
|
||||
@@ -33,9 +25,4 @@ def test_01_malicious_prompts_no_mitigation(
|
||||
prompts=malicious_prompts,
|
||||
comparison_texts=prompt_injection_all_example_responses,
|
||||
service_configurator=configure_service
|
||||
)
|
||||
|
||||
# Assertions specific to this test
|
||||
# for result in results:
|
||||
# # at least 1 close match means the prompt is likely an injection attempt
|
||||
# assert result.max > 0.5
|
||||
)
|
||||
@@ -1,20 +1,13 @@
|
||||
"""
|
||||
Usage:
|
||||
$ pytest -k test_02_malicious_prompts_cot -s
|
||||
|
||||
Data used:
|
||||
1. Samples of compromised output (resulting from NVIDIA garak tests)
|
||||
2. Samples of prompt injection input
|
||||
|
||||
Test cases (batches of 100+ runs each):
|
||||
2. Malicious prompt(s), zero-shot chain-of-thought prompt template
|
||||
|
||||
$ pytest -k test_02_malicious_prompts_cot -s --disable-warnings
|
||||
"""
|
||||
|
||||
import pytest
|
||||
from test_utils import run_prompt_analysis_test
|
||||
from src.text_generation.services.nlp.abstract_text_generation_completion_service import AbstractTextGenerationCompletionService
|
||||
|
||||
|
||||
@pytest.mark.mitigation
|
||||
def test_02_malicious_prompts_cot(
|
||||
text_generation_completion_service: AbstractTextGenerationCompletionService,
|
||||
@@ -33,7 +26,4 @@ def test_02_malicious_prompts_cot(
|
||||
prompts=malicious_prompts,
|
||||
comparison_texts=prompt_injection_all_example_responses,
|
||||
service_configurator=configure_service
|
||||
)
|
||||
# assert result.mean < 0.5
|
||||
# at least 1 close match means the prompt is likely an injection attempt
|
||||
# assert result.max > 0.5
|
||||
)
|
||||
@@ -1,14 +1,6 @@
|
||||
"""
|
||||
Usage:
|
||||
$ pytest -k test_03_malicious_prompts_rag -s
|
||||
|
||||
Data used:
|
||||
1. Samples of compromised output (resulting from NVIDIA garak tests)
|
||||
2. Samples of prompt injection input
|
||||
|
||||
Test cases (batches of 100+ runs each):
|
||||
3. Malicious prompt(s), RAG with few-shot examples prompt template
|
||||
|
||||
$ pytest -k test_03_malicious_prompts_rag -s --disable-warnings
|
||||
"""
|
||||
|
||||
import pytest
|
||||
@@ -34,8 +26,4 @@ def test_03_malicious_prompts_rag(
|
||||
prompts=malicious_prompts,
|
||||
comparison_texts=prompt_injection_all_example_responses,
|
||||
service_configurator=configure_service
|
||||
)
|
||||
|
||||
# assert result.mean < 0.5
|
||||
# at least 1 close match means the prompt is likely an injection attempt
|
||||
# assert result.max > 0.5
|
||||
)
|
||||
@@ -1,14 +1,6 @@
|
||||
"""
|
||||
Usage:
|
||||
$ pytest -k test_04_malicious_prompts_rag_and_cot -s
|
||||
|
||||
Data used:
|
||||
1. Samples of compromised output (resulting from NVIDIA garak tests)
|
||||
2. Samples of prompt injection input
|
||||
|
||||
Test cases (batches of 100+ runs each):
|
||||
3. Malicious prompt(s), RAG with few-shot examples prompt template
|
||||
|
||||
$ pytest -k test_04_malicious_prompts_rag_and_cot -s --disable-warnings
|
||||
"""
|
||||
|
||||
import pytest
|
||||
@@ -28,8 +20,6 @@ def test_04_malicious_prompts_rag_and_cot(
|
||||
.with_chain_of_thought_guidelines()
|
||||
.with_rag_context_guidelines())
|
||||
|
||||
# TODO: reflexion tests
|
||||
|
||||
results = run_prompt_analysis_test(
|
||||
test_id=4,
|
||||
text_generation_completion_service=text_generation_completion_service,
|
||||
@@ -37,8 +27,4 @@ def test_04_malicious_prompts_rag_and_cot(
|
||||
prompts=malicious_prompts,
|
||||
comparison_texts=prompt_injection_all_example_responses,
|
||||
service_configurator=configure_service
|
||||
)
|
||||
|
||||
# assert result.mean < 0.5
|
||||
# at least 1 close match means the prompt is likely an injection attempt
|
||||
# assert result.max > 0.5
|
||||
)
|
||||
@@ -0,0 +1,31 @@
|
||||
"""
|
||||
Usage:
|
||||
$ pytest -k test_05_malicious_prompts_cot_with_reflexion -s --disable-warnings
|
||||
"""
|
||||
|
||||
import pytest
|
||||
from test_utils import run_prompt_analysis_test
|
||||
from src.text_generation.services.nlp.abstract_text_generation_completion_service import AbstractTextGenerationCompletionService
|
||||
|
||||
|
||||
@pytest.mark.mitigation
|
||||
def test_05_malicious_prompts_cot_with_reflexion(
|
||||
text_generation_completion_service: AbstractTextGenerationCompletionService,
|
||||
semantic_similarity_service,
|
||||
malicious_prompts,
|
||||
prompt_injection_all_example_responses
|
||||
):
|
||||
def configure_service(service: AbstractTextGenerationCompletionService):
|
||||
return (service
|
||||
.with_chain_of_thought_guidelines()
|
||||
.with_reflexion_guardrails())
|
||||
|
||||
results = run_prompt_analysis_test(
|
||||
test_id=5,
|
||||
text_generation_completion_service=text_generation_completion_service,
|
||||
semantic_similarity_service=semantic_similarity_service,
|
||||
prompts=malicious_prompts,
|
||||
comparison_texts=prompt_injection_all_example_responses,
|
||||
service_configurator=configure_service
|
||||
)
|
||||
|
||||
@@ -0,0 +1,30 @@
|
||||
"""
|
||||
Usage:
|
||||
$ pytest -k test_06_malicious_prompts_rag_with_reflexion -s --disable-warnings
|
||||
"""
|
||||
|
||||
import pytest
|
||||
from test_utils import run_prompt_analysis_test
|
||||
from src.text_generation.services.nlp.abstract_text_generation_completion_service import AbstractTextGenerationCompletionService
|
||||
|
||||
|
||||
@pytest.mark.mitigation
|
||||
def test_06_malicious_prompts_rag_with_reflexion(
|
||||
text_generation_completion_service: AbstractTextGenerationCompletionService,
|
||||
semantic_similarity_service,
|
||||
malicious_prompts,
|
||||
prompt_injection_all_example_responses
|
||||
):
|
||||
def configure_service(service: AbstractTextGenerationCompletionService):
|
||||
return (service
|
||||
.with_rag_context_guidelines()
|
||||
.with_reflexion_guardrails())
|
||||
|
||||
results = run_prompt_analysis_test(
|
||||
test_id=6,
|
||||
text_generation_completion_service=text_generation_completion_service,
|
||||
semantic_similarity_service=semantic_similarity_service,
|
||||
prompts=malicious_prompts,
|
||||
comparison_texts=prompt_injection_all_example_responses,
|
||||
service_configurator=configure_service
|
||||
)
|
||||
@@ -0,0 +1,31 @@
|
||||
"""
|
||||
Usage:
|
||||
$ pytest -k test_07_malicious_prompts_rag_and_cot_reflexion -s --disable-warnings
|
||||
"""
|
||||
|
||||
import pytest
|
||||
from test_utils import run_prompt_analysis_test
|
||||
from src.text_generation.services.nlp.abstract_text_generation_completion_service import AbstractTextGenerationCompletionService
|
||||
|
||||
|
||||
@pytest.mark.mitigation
|
||||
def test_07_malicious_prompts_rag_and_cot_reflexion(
|
||||
text_generation_completion_service: AbstractTextGenerationCompletionService,
|
||||
semantic_similarity_service,
|
||||
malicious_prompts,
|
||||
prompt_injection_all_example_responses
|
||||
):
|
||||
def configure_service(service: AbstractTextGenerationCompletionService):
|
||||
return (service
|
||||
.with_chain_of_thought_guidelines()
|
||||
.with_rag_context_guidelines()
|
||||
.with_reflexion_guardrails())
|
||||
|
||||
results = run_prompt_analysis_test(
|
||||
test_id=7,
|
||||
text_generation_completion_service=text_generation_completion_service,
|
||||
semantic_similarity_service=semantic_similarity_service,
|
||||
prompts=malicious_prompts,
|
||||
comparison_texts=prompt_injection_all_example_responses,
|
||||
service_configurator=configure_service
|
||||
)
|
||||
Reference in New Issue
Block a user