support batch tests

This commit is contained in:
Adam Wilson
2025-08-19 20:09:34 -06:00
parent 3585f80414
commit cc124a91a3
12 changed files with 277 additions and 74 deletions
@@ -0,0 +1,83 @@
name: 'Reusable Test Runner | RAG + CoT | Generic'
on:
workflow_call:
inputs:
batch_offset:
description: 'Starting prompt index offset'
required: true
type: number
range_name:
description: 'Human readable range name (e.g., "1-20")'
required: true
type: string
test_file_path:
description: 'Path to the test file to run'
required: true
type: string
model_display_name:
description: 'Human readable model name for workflow title'
required: true
type: string
batch_size:
description: 'Number of prompts per batch'
required: false
type: number
default: 2
jobs:
test:
runs-on: ubuntu-latest
timeout-minutes: 55 # Set max runtime for each job
continue-on-error: true
strategy:
# Always 10 batches per workflow
matrix:
batch: [1, 2, 3, 4, 5, 6, 7, 8, 9, 10]
fail-fast: false
steps:
- name: 'checkout'
uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683
- name: 'set up Python'
uses: actions/setup-python@v3
with:
python-version: '3.12'
- name: 'set up Python dependencies'
shell: bash
run: |
pip install -r ${{ github.workspace }}/requirements.txt
- name: 'set up Microsoft Phi-3 Mini 4k LLM from HuggingFace'
shell: bash
run: |
pip install huggingface-hub[cli]
huggingface-cli download microsoft/Phi-3-mini-4k-instruct-onnx --include cpu_and_mobile/cpu-int4-rtn-block-32-acc-level-4/* --local-dir ${{ github.workspace }}/infrastructure/foundation_model
- name: 'run text generation tests - ${{ inputs.model_display_name }} - range ${{ inputs.range_name }} batch ${{ matrix.batch }}'
shell: bash
env:
PROMPT_BATCH: ${{ matrix.batch }}
BATCH_SIZE: ${{ inputs.batch_size }}
BATCH_OFFSET: ${{ inputs.batch_offset }}
run: pytest ${{ inputs.test_file_path }} -s --disable-warnings
- name: Check for changes
id: verify-changed-files
run: |
if [ -n "$(git status --porcelain)" ]; then
echo "changed=true" >> $GITHUB_OUTPUT
else
echo "changed=false" >> $GITHUB_OUTPUT
fi
- name: Commit and push changes
if: steps.verify-changed-files.outputs.changed == 'true'
run: |
git config --local user.email "42450907+lightbroker@users.noreply.github.com"
git config --local user.name "Adam Wilson"
git add .
git add .
git commit -m "Auto-generated files from microsoft/Phi-3-mini-4k-instruct range 1-20 batch 4 [skip ci]"
git checkout -b auto-generated-$(date +%Y%m%d-%H%M%S)
git push origin HEAD
@@ -0,0 +1,14 @@
name: '#4 (1-20) | RAG + CoT | microsoft/Phi-3-mini-4k-instruct'
on:
workflow_dispatch:
schedule:
# Workflow 1 - Starts at hours: 0, 5, 10, 15, 20 (every 5th hour starting from midnight)
- cron: '0 */5 * * *'
jobs:
test-prompts-1-20:
uses: ./.github/workflows/test_04_malicious_prompts_rag_and_cot_microsoft_phi_3_mini4k_instruct.base.yml
with:
batch_offset: 0
range_name: "1-20"
batch_size: 2
@@ -0,0 +1,14 @@
name: '#4 (21-40) | RAG + CoT | microsoft/Phi-3-mini-4k-instruct'
on:
workflow_dispatch:
schedule:
# Workflow 2 - Starts at hours: 1, 6, 11, 16, 21 (every 5th hour starting from 1 AM)
- cron: '0 1-23/5 * * *'
jobs:
test-prompts-21-40:
uses: ./.github/workflows/test_04_malicious_prompts_rag_and_cot_microsoft_phi_3_mini4k_instruct.base.yml
with:
batch_offset: 20
range_name: "21-40"
batch_size: 2
@@ -0,0 +1,14 @@
name: '#4 (41-60) | RAG + CoT | microsoft/Phi-3-mini-4k-instruct'
on:
workflow_dispatch:
schedule:
# Workflow 3 - Starts at hours: 2, 7, 12, 17, 22 (every 5th hour starting from 2 AM)
- cron: '0 2-23/5 * * *'
jobs:
test-prompts-41-60:
uses: ./.github/workflows/test_04_malicious_prompts_rag_and_cot_microsoft_phi_3_mini4k_instruct.base.yml
with:
batch_offset: 40
range_name: "41-60"
batch_size: 2
@@ -0,0 +1,14 @@
name: '#4 (61-80) | RAG + CoT | microsoft/Phi-3-mini-4k-instruct'
on:
workflow_dispatch:
schedule:
# Workflow 4 - Starts at hours: 3, 8, 13, 18, 23 (every 5th hour starting from 3 AM)
- cron: '0 3-23/5 * * *'
jobs:
test-prompts-61-80:
uses: ./.github/workflows/test_04_malicious_prompts_rag_and_cot_microsoft_phi_3_mini4k_instruct.base.yml
with:
batch_offset: 60
range_name: "61-80"
batch_size: 2
@@ -0,0 +1,14 @@
name: '#4 (81-100) | RAG + CoT | microsoft/Phi-3-mini-4k-instruct'
on:
workflow_dispatch:
schedule:
# Workflow 5 - Starts at hours: 4, 9, 14, 19 (every 5th hour starting from 4 AM)
- cron: '0 4-19/5 * * *'
jobs:
test-prompts-81-100:
uses: ./.github/workflows/test_04_malicious_prompts_rag_and_cot_microsoft_phi_3_mini4k_instruct.base.yml
with:
batch_offset: 80
range_name: "81-100"
batch_size: 2
@@ -0,0 +1,27 @@
name: 'Reusable Test #4 | RAG + CoT | microsoft/Phi-3-mini-4k-instruct'
on:
workflow_call:
inputs:
batch_offset:
description: 'Starting prompt index offset'
required: true
type: number
range_name:
description: 'Human readable range name (e.g., "1-20")'
required: true
type: string
batch_size:
description: 'Number of prompts per batch'
required: false
type: number
default: 2
jobs:
test:
uses: ./.github/workflows/test_04.abstract_base.yml
with:
batch_offset: ${{ inputs.batch_offset }}
range_name: ${{ inputs.range_name }}
batch_size: ${{ inputs.batch_size }}
test_file_path: tests/integration/test_04_malicious_prompts_rag_and_cot_microsoft_phi_3_mini4k_instruct.py
model_display_name: microsoft/Phi-3-mini-4k-instruct