diff --git a/.github/workflows/llmsecops-cicd.llm.yml b/.github/workflows/archived/llmsecops-cicd.llm.yml similarity index 100% rename from .github/workflows/llmsecops-cicd.llm.yml rename to .github/workflows/archived/llmsecops-cicd.llm.yml diff --git a/.github/workflows/llmsecops-cicd.llm_rag.yml b/.github/workflows/archived/llmsecops-cicd.llm_rag.yml similarity index 100% rename from .github/workflows/llmsecops-cicd.llm_rag.yml rename to .github/workflows/archived/llmsecops-cicd.llm_rag.yml diff --git a/.github/workflows/llmsecops-cicd.no-rag.yml b/.github/workflows/archived/llmsecops-cicd.no-rag.yml similarity index 100% rename from .github/workflows/llmsecops-cicd.no-rag.yml rename to .github/workflows/archived/llmsecops-cicd.no-rag.yml diff --git a/.github/workflows/llmsecops-cicd.test.garak.yml b/.github/workflows/archived/llmsecops-cicd.test.garak.yml similarity index 100% rename from .github/workflows/llmsecops-cicd.test.garak.yml rename to .github/workflows/archived/llmsecops-cicd.test.garak.yml diff --git a/.github/workflows/llmsecops-cicd.test.yml b/.github/workflows/archived/llmsecops-cicd.test.yml similarity index 100% rename from .github/workflows/llmsecops-cicd.test.yml rename to .github/workflows/archived/llmsecops-cicd.test.yml diff --git a/.github/workflows/llmsecops-cicd.yml b/.github/workflows/archived/llmsecops-cicd.yml similarity index 100% rename from .github/workflows/llmsecops-cicd.yml rename to .github/workflows/archived/llmsecops-cicd.yml diff --git a/.github/workflows/test_00.base.yml b/.github/workflows/test_00.base.yml new file mode 100644 index 000000000..1ac6ca697 --- /dev/null +++ b/.github/workflows/test_00.base.yml @@ -0,0 +1,27 @@ +name: 'Reusable Test #0 | No Mitigation | Benign' +on: + workflow_call: + inputs: + batch_offset: + description: 'Starting prompt index offset' + required: true + type: number + range_name: + description: 'Human readable range name (e.g., "1-20")' + required: true + type: string + batch_size: + description: 'Number of prompts per batch' + required: false + type: number + default: 2 + +jobs: + test: + uses: ./.github/workflows/tests.abstract_base.yml + with: + batch_offset: ${{ inputs.batch_offset }} + range_name: ${{ inputs.range_name }} + batch_size: ${{ inputs.batch_size }} + test_file_path: tests/integration/test_00_benign_prompts_no_mitigation.py + model_display_name: microsoft/Phi-3-mini-4k-instruct-onnx diff --git a/.github/workflows/test_00_benign_prompts_no_mitigation.01-20.yml b/.github/workflows/test_00_benign_prompts_no_mitigation.01-20.yml new file mode 100644 index 000000000..18e90fc28 --- /dev/null +++ b/.github/workflows/test_00_benign_prompts_no_mitigation.01-20.yml @@ -0,0 +1,13 @@ +name: '#0 (1-20)' +on: + workflow_dispatch: + schedule: + - cron: '0 0 * * *' # Starts at 00:00 UTC + +jobs: + test-prompts-1-20: + uses: ./.github/workflows/test_00.base.yml + with: + batch_offset: 0 + range_name: "1-20" + batch_size: 2 \ No newline at end of file diff --git a/.github/workflows/test_00_benign_prompts_no_mitigation.21-40.yml b/.github/workflows/test_00_benign_prompts_no_mitigation.21-40.yml new file mode 100644 index 000000000..5cae87876 --- /dev/null +++ b/.github/workflows/test_00_benign_prompts_no_mitigation.21-40.yml @@ -0,0 +1,13 @@ +name: '#0 (21-40)' +on: + workflow_dispatch: + schedule: + - cron: '57 0 * * *' # Starts at 00:57 UTC + +jobs: + test-prompts-21-40: + uses: ./.github/workflows/test_00.base.yml + with: + batch_offset: 20 + range_name: "21-40" + batch_size: 2 \ No newline at end of file diff --git a/.github/workflows/test_00_benign_prompts_no_mitigation.41-60.yml b/.github/workflows/test_00_benign_prompts_no_mitigation.41-60.yml new file mode 100644 index 000000000..a2b4d19c5 --- /dev/null +++ b/.github/workflows/test_00_benign_prompts_no_mitigation.41-60.yml @@ -0,0 +1,13 @@ +name: '#0 (41-60)' +on: + workflow_dispatch: + schedule: + - cron: '54 1 * * *' # Starts at 01:54 UTC + +jobs: + test-prompts-41-60: + uses: ./.github/workflows/test_00.base.yml + with: + batch_offset: 40 + range_name: "41-60" + batch_size: 2 diff --git a/.github/workflows/test_00_benign_prompts_no_mitigation.61-80.yml b/.github/workflows/test_00_benign_prompts_no_mitigation.61-80.yml new file mode 100644 index 000000000..335521de0 --- /dev/null +++ b/.github/workflows/test_00_benign_prompts_no_mitigation.61-80.yml @@ -0,0 +1,13 @@ +name: '#0 (61-80)' +on: + workflow_dispatch: + schedule: + - cron: '51 2 * * *' # Starts at 02:51 UTC + +jobs: + test-prompts-61-80: + uses: ./.github/workflows/test_00.base.yml + with: + batch_offset: 60 + range_name: "61-80" + batch_size: 2 \ No newline at end of file diff --git a/.github/workflows/test_00_benign_prompts_no_mitigation.81-100.yml b/.github/workflows/test_00_benign_prompts_no_mitigation.81-100.yml new file mode 100644 index 000000000..cdcc75ecf --- /dev/null +++ b/.github/workflows/test_00_benign_prompts_no_mitigation.81-100.yml @@ -0,0 +1,13 @@ +name: '#0 (81-100)' +on: + workflow_dispatch: + schedule: + - cron: '48 3 * * *' # Starts at 03:48 UTC + +jobs: + test-prompts-81-100: + uses: ./.github/workflows/test_00.base.yml + with: + batch_offset: 80 + range_name: "81-100" + batch_size: 2 \ No newline at end of file diff --git a/.github/workflows/test_01.base.yml b/.github/workflows/test_01.base.yml new file mode 100644 index 000000000..649a86ee7 --- /dev/null +++ b/.github/workflows/test_01.base.yml @@ -0,0 +1,27 @@ +name: 'Reusable Test #1 | No Mitigation | Malicious' +on: + workflow_call: + inputs: + batch_offset: + description: 'Starting prompt index offset' + required: true + type: number + range_name: + description: 'Human readable range name (e.g., "1-20")' + required: true + type: string + batch_size: + description: 'Number of prompts per batch' + required: false + type: number + default: 2 + +jobs: + test: + uses: ./.github/workflows/tests.abstract_base.yml + with: + batch_offset: ${{ inputs.batch_offset }} + range_name: ${{ inputs.range_name }} + batch_size: ${{ inputs.batch_size }} + test_file_path: tests/integration/test_01_malicious_prompts_no_mitigation.py + model_display_name: microsoft/Phi-3-mini-4k-instruct-onnx diff --git a/.github/workflows/test_01_malicious_prompts_no_mitigation.01-20.yml b/.github/workflows/test_01_malicious_prompts_no_mitigation.01-20.yml new file mode 100644 index 000000000..994e58bbb --- /dev/null +++ b/.github/workflows/test_01_malicious_prompts_no_mitigation.01-20.yml @@ -0,0 +1,13 @@ +name: '#1 (1-20)' +on: + workflow_dispatch: + schedule: + - cron: '45 4 * * *' # Starts at 04:45 UTC + +jobs: + test-prompts-1-20: + uses: ./.github/workflows/test_01.base.yml + with: + batch_offset: 0 + range_name: "1-20" + batch_size: 2 diff --git a/.github/workflows/test_01_malicious_prompts_no_mitigation.21-40.yml b/.github/workflows/test_01_malicious_prompts_no_mitigation.21-40.yml new file mode 100644 index 000000000..484fe8045 --- /dev/null +++ b/.github/workflows/test_01_malicious_prompts_no_mitigation.21-40.yml @@ -0,0 +1,13 @@ +name: '#1 (21-40)' +on: + workflow_dispatch: + schedule: + - cron: '42 5 * * *' # Starts at 05:42 UTC + +jobs: + test-prompts-21-40: + uses: ./.github/workflows/test_01.base.yml + with: + batch_offset: 20 + range_name: "21-40" + batch_size: 2 \ No newline at end of file diff --git a/.github/workflows/test_01_malicious_prompts_no_mitigation.41-60.yml b/.github/workflows/test_01_malicious_prompts_no_mitigation.41-60.yml new file mode 100644 index 000000000..fbae81467 --- /dev/null +++ b/.github/workflows/test_01_malicious_prompts_no_mitigation.41-60.yml @@ -0,0 +1,13 @@ +name: '#1 (41-60)' +on: + workflow_dispatch: + schedule: + - cron: '39 6 * * *' # Starts at 06:39 UTC + +jobs: + test-prompts-41-60: + uses: ./.github/workflows/test_01.base.yml + with: + batch_offset: 40 + range_name: "41-60" + batch_size: 2 diff --git a/.github/workflows/test_01_malicious_prompts_no_mitigation.61-80.yml b/.github/workflows/test_01_malicious_prompts_no_mitigation.61-80.yml new file mode 100644 index 000000000..e3f9db2b7 --- /dev/null +++ b/.github/workflows/test_01_malicious_prompts_no_mitigation.61-80.yml @@ -0,0 +1,13 @@ +name: '#1 (61-80)' +on: + workflow_dispatch: + schedule: + - cron: '36 7 * * *' # Starts at 07:36 UTC + +jobs: + test-prompts-61-80: + uses: ./.github/workflows/test_01.base.yml + with: + batch_offset: 60 + range_name: "61-80" + batch_size: 2 \ No newline at end of file diff --git a/.github/workflows/test_01_malicious_prompts_no_mitigation.81-100.yml b/.github/workflows/test_01_malicious_prompts_no_mitigation.81-100.yml new file mode 100644 index 000000000..7112057ab --- /dev/null +++ b/.github/workflows/test_01_malicious_prompts_no_mitigation.81-100.yml @@ -0,0 +1,13 @@ +name: '#1 (81-100)' +on: + workflow_dispatch: + schedule: + - cron: '33 8 * * *' # Starts at 08:33 UTC + +jobs: + test-prompts-81-100: + uses: ./.github/workflows/test_01.base.yml + with: + batch_offset: 80 + range_name: "81-100" + batch_size: 2 \ No newline at end of file diff --git a/.github/workflows/test_02.base.yml b/.github/workflows/test_02.base.yml new file mode 100644 index 000000000..9eca82442 --- /dev/null +++ b/.github/workflows/test_02.base.yml @@ -0,0 +1,27 @@ +name: 'Reusable Test #2 | CoT | Malicious' +on: + workflow_call: + inputs: + batch_offset: + description: 'Starting prompt index offset' + required: true + type: number + range_name: + description: 'Human readable range name (e.g., "1-20")' + required: true + type: string + batch_size: + description: 'Number of prompts per batch' + required: false + type: number + default: 2 + +jobs: + test: + uses: ./.github/workflows/tests.abstract_base.yml + with: + batch_offset: ${{ inputs.batch_offset }} + range_name: ${{ inputs.range_name }} + batch_size: ${{ inputs.batch_size }} + test_file_path: tests/integration/test_02_malicious_prompts_cot.py + model_display_name: microsoft/Phi-3-mini-4k-instruct-onnx \ No newline at end of file diff --git a/.github/workflows/test_02_malicious_prompts_cot.01-20.yml b/.github/workflows/test_02_malicious_prompts_cot.01-20.yml new file mode 100644 index 000000000..8351a3b25 --- /dev/null +++ b/.github/workflows/test_02_malicious_prompts_cot.01-20.yml @@ -0,0 +1,13 @@ +name: '#2 (1-20)' +on: + workflow_dispatch: + schedule: + - cron: '30 9 * * *' # Starts at 09:30 UTC + +jobs: + test-prompts-1-20: + uses: ./.github/workflows/test_02.base.yml + with: + batch_offset: 0 + range_name: "1-20" + batch_size: 2 diff --git a/.github/workflows/test_02_malicious_prompts_cot.21-40.yml b/.github/workflows/test_02_malicious_prompts_cot.21-40.yml new file mode 100644 index 000000000..5d226c7dd --- /dev/null +++ b/.github/workflows/test_02_malicious_prompts_cot.21-40.yml @@ -0,0 +1,13 @@ +name: '#2 (21-40)' +on: + workflow_dispatch: + schedule: + - cron: '27 10 * * *' # Starts at 10:27 UTC + +jobs: + test-prompts-21-40: + uses: ./.github/workflows/test_02.base.yml + with: + batch_offset: 20 + range_name: "21-40" + batch_size: 2 \ No newline at end of file diff --git a/.github/workflows/test_02_malicious_prompts_cot.41-60.yml b/.github/workflows/test_02_malicious_prompts_cot.41-60.yml new file mode 100644 index 000000000..a1ca62310 --- /dev/null +++ b/.github/workflows/test_02_malicious_prompts_cot.41-60.yml @@ -0,0 +1,13 @@ +name: '#2 (41-60)' +on: + workflow_dispatch: + schedule: + - cron: '24 11 * * *' # Starts at 11:24 UTC + +jobs: + test-prompts-41-60: + uses: ./.github/workflows/test_02.base.yml + with: + batch_offset: 40 + range_name: "41-60" + batch_size: 2 \ No newline at end of file diff --git a/.github/workflows/test_02_malicious_prompts_cot.61-80.yml b/.github/workflows/test_02_malicious_prompts_cot.61-80.yml new file mode 100644 index 000000000..c42058135 --- /dev/null +++ b/.github/workflows/test_02_malicious_prompts_cot.61-80.yml @@ -0,0 +1,13 @@ +name: '#2 (61-80)' +on: + workflow_dispatch: + schedule: + - cron: '21 12 * * *' # Starts at 12:21 UTC + +jobs: + test-prompts-61-80: + uses: ./.github/workflows/test_02.base.yml + with: + batch_offset: 60 + range_name: "61-80" + batch_size: 2 \ No newline at end of file diff --git a/.github/workflows/test_02_malicious_prompts_cot.81-100.yml b/.github/workflows/test_02_malicious_prompts_cot.81-100.yml new file mode 100644 index 000000000..d1f787fe8 --- /dev/null +++ b/.github/workflows/test_02_malicious_prompts_cot.81-100.yml @@ -0,0 +1,13 @@ +name: '#2 (81-100)' +on: + workflow_dispatch: + schedule: + - cron: '18 13 * * *' # Starts at 13:18 UTC + +jobs: + test-prompts-81-100: + uses: ./.github/workflows/test_02.base.yml + with: + batch_offset: 80 + range_name: "81-100" + batch_size: 2 \ No newline at end of file diff --git a/.github/workflows/test_03_malicious_prompts_rag.base.yml b/.github/workflows/test_03.base.yml similarity index 100% rename from .github/workflows/test_03_malicious_prompts_rag.base.yml rename to .github/workflows/test_03.base.yml diff --git a/.github/workflows/test_03_malicious_prompts_rag.01-20.yml b/.github/workflows/test_03_malicious_prompts_rag.01-20.yml new file mode 100644 index 000000000..b0f026e13 --- /dev/null +++ b/.github/workflows/test_03_malicious_prompts_rag.01-20.yml @@ -0,0 +1,13 @@ +name: '#3 (1-20)' +on: + workflow_dispatch: + schedule: + - cron: '15 14 * * *' # Starts at 14:15 UTC + +jobs: + test-prompts-1-20: + uses: ./.github/workflows/test_03.base.yml + with: + batch_offset: 0 + range_name: "1-20" + batch_size: 2 \ No newline at end of file diff --git a/.github/workflows/test_03_malicious_prompts_rag.21-40.yml b/.github/workflows/test_03_malicious_prompts_rag.21-40.yml new file mode 100644 index 000000000..d873f1325 --- /dev/null +++ b/.github/workflows/test_03_malicious_prompts_rag.21-40.yml @@ -0,0 +1,13 @@ +name: '#3 (21-40)' +on: + workflow_dispatch: + schedule: + - cron: '12 15 * * *' # Starts at 15:12 UTC + +jobs: + test-prompts-21-40: + uses: ./.github/workflows/test_03.base.yml + with: + batch_offset: 20 + range_name: "21-40" + batch_size: 2 \ No newline at end of file diff --git a/.github/workflows/test_03_malicious_prompts_rag.41-60.yml b/.github/workflows/test_03_malicious_prompts_rag.41-60.yml new file mode 100644 index 000000000..6ede0be6f --- /dev/null +++ b/.github/workflows/test_03_malicious_prompts_rag.41-60.yml @@ -0,0 +1,13 @@ +name: '#3 (41-60)' +on: + workflow_dispatch: + schedule: + - cron: '9 16 * * *' # Starts at 16:09 UTC + +jobs: + test-prompts-41-60: + uses: ./.github/workflows/test_03.base.yml + with: + batch_offset: 40 + range_name: "41-60" + batch_size: 2 \ No newline at end of file diff --git a/.github/workflows/test_03_malicious_prompts_rag.61-80.yml b/.github/workflows/test_03_malicious_prompts_rag.61-80.yml new file mode 100644 index 000000000..66785ab50 --- /dev/null +++ b/.github/workflows/test_03_malicious_prompts_rag.61-80.yml @@ -0,0 +1,13 @@ +name: '#3 (61-80)' +on: + workflow_dispatch: + schedule: + - cron: '6 17 * * *' # Starts at 17:06 UTC + +jobs: + test-prompts-61-80: + uses: ./.github/workflows/test_03.base.yml + with: + batch_offset: 60 + range_name: "61-80" + batch_size: 2 \ No newline at end of file diff --git a/.github/workflows/test_03_malicious_prompts_rag.81-100.yml b/.github/workflows/test_03_malicious_prompts_rag.81-100.yml new file mode 100644 index 000000000..6b48afe8a --- /dev/null +++ b/.github/workflows/test_03_malicious_prompts_rag.81-100.yml @@ -0,0 +1,13 @@ +name: '#3 (81-100)' +on: + workflow_dispatch: + schedule: + - cron: '3 18 * * *' # Starts at 18:03 UTC + +jobs: + test-prompts-81-100: + uses: ./.github/workflows/test_03.base.yml + with: + batch_offset: 80 + range_name: "81-100" + batch_size: 2 \ No newline at end of file diff --git a/.github/workflows/test_04_malicious_prompts_rag_and_cot.base.yml b/.github/workflows/test_04.base.yml similarity index 100% rename from .github/workflows/test_04_malicious_prompts_rag_and_cot.base.yml rename to .github/workflows/test_04.base.yml diff --git a/.github/workflows/test_04_malicious_prompts_rag_and_cot.01-20.yml b/.github/workflows/test_04_malicious_prompts_rag_and_cot.01-20.yml index af719e8a0..b9ea4715a 100644 --- a/.github/workflows/test_04_malicious_prompts_rag_and_cot.01-20.yml +++ b/.github/workflows/test_04_malicious_prompts_rag_and_cot.01-20.yml @@ -2,12 +2,11 @@ name: '#4 (1-20)' on: workflow_dispatch: schedule: - # Workflow 1 - Starts at hours: 0, 5, 10, 15, 20 (every 5th hour starting from midnight) - - cron: '0 */5 * * *' + - cron: '0 19 * * *' # Starts at 19:00 UTC jobs: test-prompts-1-20: - uses: ./.github/workflows/test_04_malicious_prompts_rag_and_cot.base.yml + uses: ./.github/workflows/test_04.base.yml with: batch_offset: 0 range_name: "1-20" diff --git a/.github/workflows/test_04_malicious_prompts_rag_and_cot.21-40.yml b/.github/workflows/test_04_malicious_prompts_rag_and_cot.21-40.yml index e877a87d3..9361abc35 100644 --- a/.github/workflows/test_04_malicious_prompts_rag_and_cot.21-40.yml +++ b/.github/workflows/test_04_malicious_prompts_rag_and_cot.21-40.yml @@ -2,12 +2,11 @@ name: '#4 (21-40)' on: workflow_dispatch: schedule: - # Workflow 2 - Starts at hours: 1, 6, 11, 16, 21 (every 5th hour starting from 1 AM) - - cron: '0 1-23/5 * * *' + - cron: '57 19 * * *' # Starts at 19:57 UTC jobs: test-prompts-21-40: - uses: ./.github/workflows/test_04_malicious_prompts_rag_and_cot.base.yml + uses: ./.github/workflows/test_04.base.yml with: batch_offset: 20 range_name: "21-40" diff --git a/.github/workflows/test_04_malicious_prompts_rag_and_cot.41-60.yml b/.github/workflows/test_04_malicious_prompts_rag_and_cot.41-60.yml index cd295ac5a..6821b79e9 100644 --- a/.github/workflows/test_04_malicious_prompts_rag_and_cot.41-60.yml +++ b/.github/workflows/test_04_malicious_prompts_rag_and_cot.41-60.yml @@ -2,12 +2,11 @@ name: '#4 (41-60)' on: workflow_dispatch: schedule: - # Workflow 3 - Starts at hours: 2, 7, 12, 17, 22 (every 5th hour starting from 2 AM) - - cron: '0 2-23/5 * * *' + - cron: '54 20 * * *' # Starts at 20:54 UTC jobs: test-prompts-41-60: - uses: ./.github/workflows/test_04_malicious_prompts_rag_and_cot.base.yml + uses: ./.github/workflows/test_04.base.yml with: batch_offset: 40 range_name: "41-60" diff --git a/.github/workflows/test_04_malicious_prompts_rag_and_cot.61-80.yml b/.github/workflows/test_04_malicious_prompts_rag_and_cot.61-80.yml index 98ed38d90..bf0210d6c 100644 --- a/.github/workflows/test_04_malicious_prompts_rag_and_cot.61-80.yml +++ b/.github/workflows/test_04_malicious_prompts_rag_and_cot.61-80.yml @@ -2,12 +2,11 @@ name: '#4 (61-80)' on: workflow_dispatch: schedule: - # Workflow 4 - Starts at hours: 3, 8, 13, 18, 23 (every 5th hour starting from 3 AM) - - cron: '0 3-23/5 * * *' + - cron: '51 21 * * *' # Starts at 21:51 UTC jobs: test-prompts-61-80: - uses: ./.github/workflows/test_04_malicious_prompts_rag_and_cot.base.yml + uses: ./.github/workflows/test_04.base.yml with: batch_offset: 60 range_name: "61-80" diff --git a/.github/workflows/test_04_malicious_prompts_rag_and_cot.81-100.yml b/.github/workflows/test_04_malicious_prompts_rag_and_cot.81-100.yml index 7e5fc6165..7d1b8ea86 100644 --- a/.github/workflows/test_04_malicious_prompts_rag_and_cot.81-100.yml +++ b/.github/workflows/test_04_malicious_prompts_rag_and_cot.81-100.yml @@ -2,12 +2,11 @@ name: '#4 (81-100)' on: workflow_dispatch: schedule: - # Workflow 5 - Starts at hours: 4, 9, 14, 19 (every 5th hour starting from 4 AM) - - cron: '0 4-19/5 * * *' + - cron: '48 22 * * *' # Starts at 22:48 UTC jobs: test-prompts-81-100: - uses: ./.github/workflows/test_04_malicious_prompts_rag_and_cot.base.yml + uses: ./.github/workflows/test_04.base.yml with: batch_offset: 80 range_name: "81-100"