mirror of
https://github.com/lightbroker/llmsecops-research.git
synced 2026-04-30 15:27:51 +02:00
try smaller batches
This commit is contained in:
+25
@@ -0,0 +1,25 @@
|
||||
name: 'Test #4 | RAG + CoT | microsoft/Phi-3-mini-4k-instruct'
|
||||
on:
|
||||
workflow_dispatch:
|
||||
|
||||
jobs:
|
||||
test-all-ranges:
|
||||
strategy:
|
||||
matrix:
|
||||
include:
|
||||
- offset: 0
|
||||
range: "1-20"
|
||||
- offset: 20
|
||||
range: "21-40"
|
||||
- offset: 40
|
||||
range: "41-60"
|
||||
- offset: 60
|
||||
range: "61-80"
|
||||
- offset: 80
|
||||
range: "81-100"
|
||||
fail-fast: false
|
||||
uses: ./.github/workflows/test_04_malicious_prompts_rag_and_cot_microsoft_phi_3_mini4k_instruct.base.yml
|
||||
with:
|
||||
batch_offset: ${{ matrix.offset }}
|
||||
range_name: ${{ matrix.range }}
|
||||
batch_size: 2
|
||||
+59
@@ -0,0 +1,59 @@
|
||||
name: 'Reusable Test #4 | RAG + CoT | microsoft/Phi-3-mini-4k-instruct'
|
||||
on:
|
||||
workflow_call:
|
||||
inputs:
|
||||
batch_offset:
|
||||
description: 'Starting prompt index offset'
|
||||
required: true
|
||||
type: number
|
||||
range_name:
|
||||
description: 'Human readable range name (e.g., "1-20")'
|
||||
required: true
|
||||
type: string
|
||||
batch_size:
|
||||
description: 'Number of prompts per batch'
|
||||
required: false
|
||||
type: number
|
||||
default: 2
|
||||
|
||||
jobs:
|
||||
test:
|
||||
runs-on: ubuntu-latest
|
||||
strategy:
|
||||
# Always 10 batches per workflow
|
||||
matrix:
|
||||
batch: [1, 2, 3, 4, 5, 6, 7, 8, 9, 10]
|
||||
fail-fast: false
|
||||
steps:
|
||||
- name: 'checkout'
|
||||
uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683
|
||||
- name: 'set up Python'
|
||||
uses: actions/setup-python@v3
|
||||
with:
|
||||
python-version: '3.12'
|
||||
- name: 'set up Python dependencies'
|
||||
shell: bash
|
||||
run: pip install -r ${{ github.workspace }}/requirements.txt
|
||||
- name: 'run text generation tests - range ${{ inputs.range_name }} batch ${{ matrix.batch }}'
|
||||
shell: bash
|
||||
env:
|
||||
PROMPT_BATCH: ${{ matrix.batch }}
|
||||
BATCH_SIZE: ${{ inputs.batch_size }}
|
||||
BATCH_OFFSET: ${{ inputs.batch_offset }}
|
||||
run: pytest tests/integration/test_04_malicious_prompts_rag_and_cot_microsoft_phi_3_mini4k_instruct.py -s --disable-warnings
|
||||
- name: Check for changes
|
||||
id: verify-changed-files
|
||||
run: |
|
||||
if [ -n "$(git status --porcelain)" ]; then
|
||||
echo "changed=true" >> $GITHUB_OUTPUT
|
||||
else
|
||||
echo "changed=false" >> $GITHUB_OUTPUT
|
||||
fi
|
||||
- name: Commit and push changes
|
||||
if: steps.verify-changed-files.outputs.changed == 'true'
|
||||
run: |
|
||||
git config --local user.email "42450907+lightbroker@users.noreply.github.com"
|
||||
git config --local user.name "Adam Wilson"
|
||||
git add .
|
||||
git commit -m "Auto-generated files from range ${{ inputs.range_name }} batch ${{ matrix.batch }} [skip ci]"
|
||||
git push
|
||||
@@ -13,10 +13,16 @@ from src.text_generation.services.logging.abstract_test_run_logging_service impo
|
||||
|
||||
|
||||
class TestRunLoggingService(AbstractTestRunLoggingService):
|
||||
def __init__(self, test_id: int, model_id: ModelId):
|
||||
def __init__(
|
||||
self,
|
||||
test_id: int,
|
||||
model_id: ModelId,
|
||||
start: int,
|
||||
end: int
|
||||
):
|
||||
self._lock = threading.Lock()
|
||||
timestamp = calendar.timegm(time.gmtime())
|
||||
self.log_file_path = f"./tests/logs/test_{test_id}/{str(model_id.value).replace("/", "_")}/test_{test_id}_logs_{timestamp}.json"
|
||||
self.log_file_path = f"./tests/logs/test_{test_id}/{start}_{end}/{str(model_id.value).replace("/", "_")}/test_{test_id}_logs_{timestamp}.json"
|
||||
self._ensure_log_file_exists()
|
||||
|
||||
def _ensure_log_file_exists(self):
|
||||
|
||||
+5
-1
@@ -18,12 +18,16 @@ def test_04_malicious_prompts_rag_and_cot_microsoft_phi_3_mini4k_instruct(
|
||||
.with_chain_of_thought_guidelines()
|
||||
.with_rag_context_guidelines())
|
||||
|
||||
prompt_batch, start, end = get_prompt_batch(malicious_prompts)
|
||||
|
||||
results = run_prompt_analysis_test(
|
||||
test_id=4,
|
||||
model_id=ModelId.MICROSOFT_PHI_3_MINI4K_INSTRUCT,
|
||||
text_generation_completion_service=text_generation_completion_service,
|
||||
semantic_similarity_service=semantic_similarity_service,
|
||||
prompts=get_prompt_batch(malicious_prompts),
|
||||
prompts=prompt_batch,
|
||||
start=start,
|
||||
end=end,
|
||||
comparison_texts=prompt_injection_all_example_responses,
|
||||
service_configurator=configure_service
|
||||
)
|
||||
@@ -11,7 +11,7 @@ from src.text_generation.services.nlp.text_generation_completion_service import
|
||||
|
||||
|
||||
|
||||
def get_prompt_batch(prompts, batch_size=10, env_var='PROMPT_BATCH'):
|
||||
def get_prompt_batch(prompts: List[str], batch_size=10, env_var='PROMPT_BATCH'):
|
||||
|
||||
batch_size = int(os.getenv('BATCH_SIZE', '2'))
|
||||
batch_num = int(os.getenv('PROMPT_BATCH', '1'))
|
||||
@@ -29,11 +29,11 @@ def get_prompt_batch(prompts, batch_size=10, env_var='PROMPT_BATCH'):
|
||||
end_idx = min(start_idx + batch_size, len(prompts))
|
||||
|
||||
# Get the subset of prompts for this batch
|
||||
prompt_subset = prompts[start_idx:end_idx]
|
||||
prompt_subset: List[str] = prompts[start_idx:end_idx]
|
||||
|
||||
print(f"Running batch {batch_num} (range offset {offset}): prompts {start_idx+1}-{end_idx} ({len(prompt_subset)} prompts)")
|
||||
|
||||
return prompt_subset
|
||||
return prompt_subset, start_idx, end_idx
|
||||
|
||||
|
||||
def run_prompt_analysis_test(
|
||||
@@ -42,6 +42,8 @@ def run_prompt_analysis_test(
|
||||
text_generation_completion_service: AbstractTextGenerationCompletionService,
|
||||
semantic_similarity_service: AbstractSemanticSimilarityService,
|
||||
prompts: List,
|
||||
start: int,
|
||||
end: int,
|
||||
comparison_texts: List,
|
||||
service_configurator: Callable,
|
||||
max_prompts: int = 100
|
||||
@@ -80,7 +82,12 @@ def run_prompt_analysis_test(
|
||||
print(f'{i}/{len(prompts)} Max Score: {result.max}')
|
||||
print(f'{i}/{len(prompts)} Avg Score: {result.mean}')
|
||||
|
||||
TestRunLoggingService(test_id=test_id, model_id=model_id).log_results(
|
||||
TestRunLoggingService(
|
||||
test_id=test_id,
|
||||
model_id=model_id,
|
||||
start=start,
|
||||
end=end
|
||||
).log_results(
|
||||
id=inspect.currentframe().f_back.f_code.co_name,
|
||||
text_generation_completion_result=completion_result,
|
||||
final_completion_text_score=result.max,
|
||||
|
||||
Reference in New Issue
Block a user