try smaller batches

This commit is contained in:
Adam Wilson
2025-08-18 17:36:01 -06:00
parent 0411049d6b
commit 09eac1f050
2 changed files with 24 additions and 11 deletions
@@ -1,14 +1,13 @@
name: 'Test #4 | RAG + CoT | microsoft/Phi-3-mini-4k-instruct'
name: 'Test #4 | RAG + CoT | microsoft/Phi-3-mini-4k-instruct | Prompts 1-20'
on:
workflow_dispatch:
jobs:
test:
runs-on: ubuntu-latest
strategy:
# Matrix strategy to run 10 batches of 10 prompts each
# Matrix for batches 1-10, each testing 2 prompts (prompts 1-20 total)
matrix:
batch: [1, 2, 3, 4, 5, 6, 7, 8, 9, 10]
# Prevent all jobs from being cancelled if one fails
fail-fast: false
steps:
- name: 'checkout'
@@ -24,6 +23,8 @@ jobs:
shell: bash
env:
PROMPT_BATCH: ${{ matrix.batch }}
BATCH_SIZE: 2
BATCH_OFFSET: 0 # Prompts 1-20 (offset 0)
run: pytest tests/integration/test_04_malicious_prompts_rag_and_cot_microsoft_phi_3_mini4k_instruct.py -s --disable-warnings
- name: Check for changes
id: verify-changed-files
@@ -39,5 +40,5 @@ jobs:
git config --local user.email "42450907+lightbroker@users.noreply.github.com"
git config --local user.name "Adam Wilson"
git add .
git commit -m "Auto-generated files from workflow batch ${{ matrix.batch }} [skip ci]"
git commit -m "Auto-generated files from workflow batch ${{ matrix.batch }} (prompts 1-20) [skip ci]"
git push
+19 -7
View File
@@ -12,15 +12,27 @@ from src.text_generation.services.nlp.text_generation_completion_service import
def get_prompt_batch(prompts, batch_size=10, env_var='PROMPT_BATCH'):
"""
Returns a batch of prompts based on the PROMPT_BATCH environment variable.
Prints batch info for debugging.
"""
batch_num = int(os.getenv(env_var, '1'))
start_idx = (batch_num - 1) * batch_size
batch_size = int(os.getenv('BATCH_SIZE', '2'))
batch_num = int(os.getenv('PROMPT_BATCH', '1'))
if 'BATCH_OFFSET' in os.environ:
# Option 1: Fixed offset per workflow
offset = int(os.getenv('BATCH_OFFSET', '0'))
else:
# Option 2: Configurable range
prompt_range = int(os.getenv('PROMPT_RANGE', '1'))
offset = (prompt_range - 1) * 20
# Calculate start and end indices
start_idx = offset + (batch_num - 1) * batch_size
end_idx = min(start_idx + batch_size, len(prompts))
# Get the subset of prompts for this batch
prompt_subset = prompts[start_idx:end_idx]
print(f"Running batch {batch_num}: prompts {start_idx+1}-{end_idx} ({len(prompt_subset)} prompts)")
print(f"Running batch {batch_num} (range offset {offset}): prompts {start_idx+1}-{end_idx} ({len(prompt_subset)} prompts)")
return prompt_subset