diff --git a/.github/workflows/test_04_malicious_prompts_rag_and_cot_microsoft_phi_3_mini4k_instruct.01-20.yml b/.github/workflows/test_04_malicious_prompts_rag_and_cot_microsoft_phi_3_mini4k_instruct.01-20.yml
new file mode 100644
index 000000000..4ed1be9fb
--- /dev/null
+++ b/.github/workflows/test_04_malicious_prompts_rag_and_cot_microsoft_phi_3_mini4k_instruct.01-20.yml
@@ -0,0 +1,25 @@
+name: 'Test #4 | RAG + CoT | microsoft/Phi-3-mini-4k-instruct'
+on:
+  workflow_dispatch:
+
+jobs:
+  test-all-ranges:
+    strategy:
+      matrix:
+        include:
+          - offset: 0
+            range: "1-20"
+          - offset: 20
+            range: "21-40"
+          - offset: 40
+            range: "41-60"
+          - offset: 60
+            range: "61-80"
+          - offset: 80
+            range: "81-100"
+      fail-fast: false
+    uses: ./.github/workflows/test_04_malicious_prompts_rag_and_cot_microsoft_phi_3_mini4k_instruct.base.yml
+    with:
+      batch_offset: ${{ matrix.offset }}
+      range_name: ${{ matrix.range }}
+      batch_size: 2
\ No newline at end of file
diff --git a/.github/workflows/test_04_malicious_prompts_rag_and_cot_microsoft_phi_3_mini4k_instruct.base.yml b/.github/workflows/test_04_malicious_prompts_rag_and_cot_microsoft_phi_3_mini4k_instruct.base.yml
new file mode 100644
index 000000000..b513e57fc
--- /dev/null
+++ b/.github/workflows/test_04_malicious_prompts_rag_and_cot_microsoft_phi_3_mini4k_instruct.base.yml
@@ -0,0 +1,59 @@
+name: 'Reusable Test #4 | RAG + CoT | microsoft/Phi-3-mini-4k-instruct'
+on:
+  workflow_call:
+    inputs:
+      batch_offset:
+        description: 'Starting prompt index offset'
+        required: true
+        type: number
+      range_name:
+        description: 'Human readable range name (e.g., "1-20")'
+        required: true
+        type: string
+      batch_size:
+        description: 'Number of prompts per batch'
+        required: false
+        type: number
+        default: 2
+
+jobs:
+  test:
+    runs-on: ubuntu-latest
+    strategy:
+      # Always 10 batches per workflow
+      matrix:
+        batch: [1, 2, 3, 4, 5, 6, 7, 8, 9, 10]
+      fail-fast: false
+    steps:
+      - name: 'checkout'
+        uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683
+      - name: 'set up Python'
+        uses: actions/setup-python@v3
+        with:
+          python-version: '3.12'
+      - name: 'set up Python dependencies'
+        shell: bash
+        run: pip install -r ${{ github.workspace }}/requirements.txt
+      - name: 'run text generation tests - range ${{ inputs.range_name }} batch ${{ matrix.batch }}'
+        shell: bash
+        env:
+          PROMPT_BATCH: ${{ matrix.batch }}
+          BATCH_SIZE: ${{ inputs.batch_size }}
+          BATCH_OFFSET: ${{ inputs.batch_offset }}
+        run: pytest tests/integration/test_04_malicious_prompts_rag_and_cot_microsoft_phi_3_mini4k_instruct.py -s --disable-warnings
+      - name: Check for changes
+        id: verify-changed-files
+        run: |
+          if [ -n "$(git status --porcelain)" ]; then
+            echo "changed=true" >> $GITHUB_OUTPUT
+          else
+            echo "changed=false" >> $GITHUB_OUTPUT
+          fi
+      - name: Commit and push changes
+        if: steps.verify-changed-files.outputs.changed == 'true'
+        run: |
+          git config --local user.email "42450907+lightbroker@users.noreply.github.com"
+          git config --local user.name "Adam Wilson"
+          git add .
+          git commit -m "Auto-generated files from range ${{ inputs.range_name }} batch ${{ matrix.batch }} [skip ci]"
+          git push
\ No newline at end of file
diff --git a/src/text_generation/services/logging/test_run_logging_service.py b/src/text_generation/services/logging/test_run_logging_service.py
index 9f3146130..88c66efb9 100644
--- a/src/text_generation/services/logging/test_run_logging_service.py
+++ b/src/text_generation/services/logging/test_run_logging_service.py
@@ -13,10 +13,16 @@ from src.text_generation.services.logging.abstract_test_run_logging_service impo
 
 
 class TestRunLoggingService(AbstractTestRunLoggingService):
-    def __init__(self, test_id: int, model_id: ModelId):
+    def __init__(
+            self, 
+            test_id: int, 
+            model_id: ModelId,
+            start: int,
+            end: int
+    ):
         self._lock = threading.Lock()
         timestamp = calendar.timegm(time.gmtime())
-        self.log_file_path = f"./tests/logs/test_{test_id}/{str(model_id.value).replace("/", "_")}/test_{test_id}_logs_{timestamp}.json"
+        self.log_file_path = f"./tests/logs/test_{test_id}/{start}_{end}/{str(model_id.value).replace("/", "_")}/test_{test_id}_logs_{timestamp}.json"
         self._ensure_log_file_exists()
 
     def _ensure_log_file_exists(self):
diff --git a/tests/integration/test_04_malicious_prompts_rag_and_cot_microsoft_phi_3_mini4k_instruct.py b/tests/integration/test_04_malicious_prompts_rag_and_cot_microsoft_phi_3_mini4k_instruct.py
index 0a9ede299..4fc7a230b 100644
--- a/tests/integration/test_04_malicious_prompts_rag_and_cot_microsoft_phi_3_mini4k_instruct.py
+++ b/tests/integration/test_04_malicious_prompts_rag_and_cot_microsoft_phi_3_mini4k_instruct.py
@@ -18,12 +18,16 @@ def test_04_malicious_prompts_rag_and_cot_microsoft_phi_3_mini4k_instruct(
             .with_chain_of_thought_guidelines()
             .with_rag_context_guidelines())
     
+    prompt_batch, start, end = get_prompt_batch(malicious_prompts)
+
     results = run_prompt_analysis_test(
         test_id=4,
         model_id=ModelId.MICROSOFT_PHI_3_MINI4K_INSTRUCT,
         text_generation_completion_service=text_generation_completion_service,
         semantic_similarity_service=semantic_similarity_service,
-        prompts=get_prompt_batch(malicious_prompts),
+        prompts=prompt_batch,
+        start=start,
+        end=end,
         comparison_texts=prompt_injection_all_example_responses,
         service_configurator=configure_service
     )
\ No newline at end of file
diff --git a/tests/integration/test_utils.py b/tests/integration/test_utils.py
index f8bfe36a3..7c8edeaf5 100644
--- a/tests/integration/test_utils.py
+++ b/tests/integration/test_utils.py
@@ -11,7 +11,7 @@ from src.text_generation.services.nlp.text_generation_completion_service import
 
 
 
-def get_prompt_batch(prompts, batch_size=10, env_var='PROMPT_BATCH'):
+def get_prompt_batch(prompts: List[str], batch_size=10, env_var='PROMPT_BATCH'):
     
     batch_size = int(os.getenv('BATCH_SIZE', '2'))
     batch_num = int(os.getenv('PROMPT_BATCH', '1'))
@@ -29,11 +29,11 @@ def get_prompt_batch(prompts, batch_size=10, env_var='PROMPT_BATCH'):
     end_idx = min(start_idx + batch_size, len(prompts))
     
     # Get the subset of prompts for this batch
-    prompt_subset = prompts[start_idx:end_idx]
+    prompt_subset: List[str] = prompts[start_idx:end_idx]
     
     print(f"Running batch {batch_num} (range offset {offset}): prompts {start_idx+1}-{end_idx} ({len(prompt_subset)} prompts)")
     
-    return prompt_subset
+    return prompt_subset, start_idx, end_idx
 
 
 def run_prompt_analysis_test(
@@ -42,6 +42,8 @@ def run_prompt_analysis_test(
     text_generation_completion_service: AbstractTextGenerationCompletionService,
     semantic_similarity_service: AbstractSemanticSimilarityService,
     prompts: List,
+    start: int,
+    end: int,
     comparison_texts: List,
     service_configurator: Callable,
     max_prompts: int = 100
@@ -80,7 +82,12 @@ def run_prompt_analysis_test(
         print(f'{i}/{len(prompts)} Max Score: {result.max}')
         print(f'{i}/{len(prompts)} Avg Score: {result.mean}')
         
-        TestRunLoggingService(test_id=test_id, model_id=model_id).log_results(
+        TestRunLoggingService(
+            test_id=test_id,
+            model_id=model_id,
+            start=start,
+            end=end
+        ).log_results(
             id=inspect.currentframe().f_back.f_code.co_name,
             text_generation_completion_result=completion_result,
             final_completion_text_score=result.max,