try smaller batches

2026-07-12 15:46:37 +02:00 · 2025-08-18 17:36:01 -06:00
parent 0411049d6b
commit 09eac1f050
2 changed files with 24 additions and 11 deletions
@@ -1,14 +1,13 @@
-name: 'Test #4 | RAG + CoT | microsoft/Phi-3-mini-4k-instruct'
+name: 'Test #4 | RAG + CoT | microsoft/Phi-3-mini-4k-instruct | Prompts 1-20'
 on:
  workflow_dispatch:
 jobs:
  test:
    runs-on: ubuntu-latest
    strategy:
-      # Matrix strategy to run 10 batches of 10 prompts each
+      # Matrix for batches 1-10, each testing 2 prompts (prompts 1-20 total)
      matrix:
        batch: [1, 2, 3, 4, 5, 6, 7, 8, 9, 10]
-      # Prevent all jobs from being cancelled if one fails
      fail-fast: false
    steps:
      - name: 'checkout'
@@ -24,6 +23,8 @@ jobs:
        shell: bash
        env:
          PROMPT_BATCH: ${{ matrix.batch }}
+          BATCH_SIZE: 2
+          BATCH_OFFSET: 0  # Prompts 1-20 (offset 0)
        run: pytest tests/integration/test_04_malicious_prompts_rag_and_cot_microsoft_phi_3_mini4k_instruct.py -s --disable-warnings
      - name: Check for changes
        id: verify-changed-files
@@ -39,5 +40,5 @@ jobs:
          git config --local user.email "42450907+lightbroker@users.noreply.github.com"
          git config --local user.name "Adam Wilson"
          git add .
-          git commit -m "Auto-generated files from workflow batch ${{ matrix.batch }} [skip ci]"
+          git commit -m "Auto-generated files from workflow batch ${{ matrix.batch }} (prompts 1-20) [skip ci]"
          git push
@@ -12,15 +12,27 @@ from src.text_generation.services.nlp.text_generation_completion_service import


 def get_prompt_batch(prompts, batch_size=10, env_var='PROMPT_BATCH'):
-    """
-    Returns a batch of prompts based on the PROMPT_BATCH environment variable.
-    Prints batch info for debugging.
-    """
-    batch_num = int(os.getenv(env_var, '1'))
-    start_idx = (batch_num - 1) * batch_size
+    
+    batch_size = int(os.getenv('BATCH_SIZE', '2'))
+    batch_num = int(os.getenv('PROMPT_BATCH', '1'))
+    
+    if 'BATCH_OFFSET' in os.environ:
+        # Option 1: Fixed offset per workflow
+        offset = int(os.getenv('BATCH_OFFSET', '0'))
+    else:
+        # Option 2: Configurable range
+        prompt_range = int(os.getenv('PROMPT_RANGE', '1'))
+        offset = (prompt_range - 1) * 20
+    
+    # Calculate start and end indices
+    start_idx = offset + (batch_num - 1) * batch_size
    end_idx = min(start_idx + batch_size, len(prompts))
+    
+    # Get the subset of prompts for this batch
    prompt_subset = prompts[start_idx:end_idx]
-    print(f"Running batch {batch_num}: prompts {start_idx+1}-{end_idx} ({len(prompt_subset)} prompts)")
+    
+    print(f"Running batch {batch_num} (range offset {offset}): prompts {start_idx+1}-{end_idx} ({len(prompt_subset)} prompts)")
+    
    return prompt_subset