From 264f332c1062df9a23e97ecc87bc8011da5af36a Mon Sep 17 00:00:00 2001
From: Adam Wilson <lightbrok3r@gmail.com>
Date: Wed, 23 Apr 2025 12:32:08 -0600
Subject: [PATCH 1/9] echo response back

---
 .github/workflows/llmsecops-cicd.yml | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/.github/workflows/llmsecops-cicd.yml b/.github/workflows/llmsecops-cicd.yml
index 81806e940..8f6251a14 100644
--- a/.github/workflows/llmsecops-cicd.yml
+++ b/.github/workflows/llmsecops-cicd.yml
@@ -38,4 +38,4 @@ jobs:
         
     - name: Test API call
       run: |
-        curl -i localhost:9999
+        curl -X POST -i localhost:9999 -d '{ "key":"123456789" }'

From 8c6f38db31a5df9df359ac315164925a68ca7b72 Mon Sep 17 00:00:00 2001
From: Adam Wilson <lightbrok3r@gmail.com>
Date: Wed, 23 Apr 2025 16:09:05 -0600
Subject: [PATCH 2/9] support JSON request payload passed to LLM service layer

---
 .github/workflows/llmsecops-cicd.yml          | 10 ++++---
 .../api/{PathDispatcher.py => controller.py}  | 28 +++++++++++++------
 tests/api/server.py                           |  7 ++---
 tests/llm/phi3_language_model.py              | 26 +++++++++++------
 4 files changed, 47 insertions(+), 24 deletions(-)
 rename tests/api/{PathDispatcher.py => controller.py} (68%)

diff --git a/.github/workflows/llmsecops-cicd.yml b/.github/workflows/llmsecops-cicd.yml
index 8f6251a14..cc2877a22 100644
--- a/.github/workflows/llmsecops-cicd.yml
+++ b/.github/workflows/llmsecops-cicd.yml
@@ -28,9 +28,11 @@ jobs:
         huggingface-cli download microsoft/Phi-3-mini-4k-instruct-onnx --include cpu_and_mobile/cpu-int4-rtn-block-32-acc-level-4/* --local-dir .
         pip install onnxruntime-genai
         # curl https://raw.githubusercontent.com/microsoft/onnxruntime-genai/main/examples/python/phi3-qa.py -o phi3-qa.py
-        python ${{ github.workspace }}/tests/llm/phi3_language_model.py \
-          --prompt 'Describe the principle of existence, from the first principles of philosophy.' \
-          -m cpu_and_mobile/cpu-int4-rtn-block-32-acc-level-4 \
+
+        # python ./tests/llm/phi3_language_model.py --prompt 'Describe the principle of existence, from the first principles of philosophy.' -m cpu_and_mobile/cpu-int4-rtn-block-32-acc-level-4
+        # python ${{ github.workspace }}/tests/llm/phi3_language_model.py \
+        #   --prompt 'Describe the principle of existence, from the first principles of philosophy.' \
+        #   -m cpu_and_mobile/cpu-int4-rtn-block-32-acc-level-4
 
     - name: Run REST API server
       run: |
@@ -38,4 +40,4 @@ jobs:
         
     - name: Test API call
       run: |
-        curl -X POST -i localhost:9999 -d '{ "key":"123456789" }'
+        curl -X POST -i localhost:9999 -d '{ "prompt": "describe a random planet in our solar system in 10 words or less" }'
\ No newline at end of file
diff --git a/tests/api/PathDispatcher.py b/tests/api/controller.py
similarity index 68%
rename from tests/api/PathDispatcher.py
rename to tests/api/controller.py
index 338e72368..20612ed99 100644
--- a/tests/api/PathDispatcher.py
+++ b/tests/api/controller.py
@@ -1,8 +1,14 @@
-import cgi
 import json
+import os
+import sys
+
+# Add the parent folder (or any relative path)
+sys.path.append(os.path.abspath('./../llm'))
+
+from phi3_language_model import Phi3LanguageModel
 
 
-class PathDispatcher:
+class ApiController:
     def __init__(self):
         self.routes = {}
 
@@ -11,6 +17,10 @@ class PathDispatcher:
         start_response('415 Unsupported Media Type', self.response_headers)
         return [json.dumps({'error': 'Unsupported Content-Type'}).encode('utf-8')]
 
+    def get_service_response(self, prompt):
+        service = Phi3LanguageModel()
+        response = service.get_response(prompt_input=prompt)
+        return response
 
     def __http_200_ok(self, env, start_response):
         try:
@@ -19,10 +29,14 @@ class PathDispatcher:
             request_body_size = 0
 
         request_body = env['wsgi.input'].read(request_body_size)
-        request_body = request_body.decode('utf-8')
+        request_json = json.loads(request_body.decode('utf-8'))
+        prompt = request_json.get('prompt')
         
         # for now, just reading request and echoing back in response
-        data = json.loads(request_body)
+        # data = json.loads(prompt)
+        # response_body = json.dumps(data).encode('utf-8')
+
+        data = self.get_service_response(prompt)
         response_body = json.dumps(data).encode('utf-8')
         
         response_headers = [('Content-Type', 'application/json'), ('Content-Length', str(len(response_body)))]
@@ -34,6 +48,8 @@ class PathDispatcher:
         method = env.get('REQUEST_METHOD').upper()
         path = env.get('PATH_INFO')
 
+        # TODO: register route for POST /api/conversations
+
         if not method == 'POST':
             self.__http_415_notsupported(env, start_response)
 
@@ -44,7 +60,3 @@ class PathDispatcher:
             start_response('400 Bad Request', self.response_headers)
             return [json.dumps({'error': 'Invalid JSON'}).encode('utf-8')]
 
-
-    def register(self, method, path, function):
-        self.routes[method.lower(), path] = function
-        return function
diff --git a/tests/api/server.py b/tests/api/server.py
index 7a8d0c620..2c08107c5 100644
--- a/tests/api/server.py
+++ b/tests/api/server.py
@@ -1,6 +1,6 @@
 import json
 
-from PathDispatcher import PathDispatcher
+from controller import ApiController
 from wsgiref.simple_server import make_server
 
 
@@ -14,9 +14,8 @@ class RestApiServer:
 
     def listen(self):
         port = 9999
-        dispatcher = PathDispatcher()
-        dispatcher.register('POST', '/', self.post_response)
-        with make_server('', port, dispatcher) as wsgi_srv:
+        controller = ApiController()
+        with make_server('', port, controller) as wsgi_srv:
             print(f'listening on port {port}...')
             wsgi_srv.serve_forever()
 
diff --git a/tests/llm/phi3_language_model.py b/tests/llm/phi3_language_model.py
index a08357ec4..2e4721376 100644
--- a/tests/llm/phi3_language_model.py
+++ b/tests/llm/phi3_language_model.py
@@ -4,10 +4,13 @@ import onnxruntime_genai as og
 import argparse
 
 
+default_model_path = './cpu_and_mobile/cpu-int4-rtn-block-32-acc-level-4'
+
 class Phi3LanguageModel:
 
-    def __init__(self, model_path):
+    def __init__(self, model_path=None):
         # configure ONNX runtime
+        model_path = default_model_path if model_path == None else model_path
         config = og.Config(model_path)
         config.clear_providers()
         self.model = og.Model(config)
@@ -17,7 +20,7 @@ class Phi3LanguageModel:
 
     def get_response(self, prompt_input):
 
-        search_options = { 'max_length': 2048 }
+        search_options = { 'max_length': 1024 }
         params = og.GeneratorParams(self.model)
         params.set_search_options(**search_options)
         generator = og.Generator(self.model, params)
@@ -28,20 +31,22 @@ class Phi3LanguageModel:
         input_tokens = self.tokenizer.encode(prompt)
         generator.append_tokens(input_tokens)
 
-        print("Output: ", end='', flush=True)
-
+        # generate output
+        output = ''
         try:
             while not generator.is_done():
                 generator.generate_next_token()
                 new_token = generator.get_next_tokens()[0]
-                print(self.tokenizer_stream.decode(new_token), end='', flush=True)
+                decoded = self.tokenizer_stream.decode(new_token)
+                output = output + decoded
         except Exception as e:
-            print(f'{e}')
+            return f'{e}'
+        return { 'response': output }
 
 
 if __name__ == "__main__":
     parser = argparse.ArgumentParser(argument_default=argparse.SUPPRESS, description="End-to-end AI Question/Answer example for gen-ai")
-    parser.add_argument('-m', '--model_path', type=str, required=True, help='Onnx model folder path (must contain genai_config.json and model.onnx)')
+    parser.add_argument('-m', '--model_path', type=str, required=False, help='Onnx model folder path (must contain genai_config.json and model.onnx)')
     parser.add_argument('-p', '--prompt', type=str, required=True, help='Prompt input')
     parser.add_argument('-i', '--min_length', type=int, help='Min number of tokens to generate including the prompt')
     parser.add_argument('-l', '--max_length', type=int, help='Max number of tokens to generate including the prompt')
@@ -52,5 +57,10 @@ if __name__ == "__main__":
     parser.add_argument('--repetition_penalty', type=float, help='Repetition penalty to sample with')
     args = parser.parse_args()
 
-    model = Phi3LanguageModel(args.model_path)
+    try:
+        model_path = args.model_path
+    except:
+        model_path = None
+
+    model = Phi3LanguageModel(model_path)
     model.get_response(args.prompt)

From c9c5b8b0a7a022ede2ede2dafb599733bd3cb1fe Mon Sep 17 00:00:00 2001
From: Adam Wilson <lightbrok3r@gmail.com>
Date: Thu, 24 Apr 2025 11:27:03 -0600
Subject: [PATCH 3/9] use Python package approach to project structure

---
 .github/workflows/llmsecops-cicd.yml | 3 ++-
 tests/__init__.py                    | 0
 tests/api/__init__.py                | 0
 tests/api/controller.py              | 7 +------
 tests/api/server.py                  | 2 +-
 tests/llm/__init__.py                | 0
 tests/llm/phi3_language_model.py     | 8 ++++----
 7 files changed, 8 insertions(+), 12 deletions(-)
 create mode 100644 tests/__init__.py
 create mode 100644 tests/api/__init__.py
 create mode 100644 tests/llm/__init__.py

diff --git a/.github/workflows/llmsecops-cicd.yml b/.github/workflows/llmsecops-cicd.yml
index cc2877a22..c9130131d 100644
--- a/.github/workflows/llmsecops-cicd.yml
+++ b/.github/workflows/llmsecops-cicd.yml
@@ -36,7 +36,8 @@ jobs:
 
     - name: Run REST API server
       run: |
-        python ${{ github.workspace }}/tests/api/server.py &
+        # python ${{ github.workspace }}/tests/api/server.py &
+        python -m tests.api.server
         
     - name: Test API call
       run: |
diff --git a/tests/__init__.py b/tests/__init__.py
new file mode 100644
index 000000000..e69de29bb
diff --git a/tests/api/__init__.py b/tests/api/__init__.py
new file mode 100644
index 000000000..e69de29bb
diff --git a/tests/api/controller.py b/tests/api/controller.py
index 20612ed99..2dc1a7676 100644
--- a/tests/api/controller.py
+++ b/tests/api/controller.py
@@ -1,11 +1,6 @@
 import json
-import os
-import sys
 
-# Add the parent folder (or any relative path)
-sys.path.append(os.path.abspath('./../llm'))
-
-from phi3_language_model import Phi3LanguageModel
+from tests.llm.phi3_language_model import Phi3LanguageModel
 
 
 class ApiController:
diff --git a/tests/api/server.py b/tests/api/server.py
index 2c08107c5..503cb5069 100644
--- a/tests/api/server.py
+++ b/tests/api/server.py
@@ -1,6 +1,6 @@
 import json
 
-from controller import ApiController
+from tests.api.controller import ApiController
 from wsgiref.simple_server import make_server
 
 
diff --git a/tests/llm/__init__.py b/tests/llm/__init__.py
new file mode 100644
index 000000000..e69de29bb
diff --git a/tests/llm/phi3_language_model.py b/tests/llm/phi3_language_model.py
index 2e4721376..8c4eed47e 100644
--- a/tests/llm/phi3_language_model.py
+++ b/tests/llm/phi3_language_model.py
@@ -1,16 +1,16 @@
 # TODO: business logic for REST API interaction w/ LLM via prompt input
 
-import onnxruntime_genai as og
 import argparse
+import onnxruntime_genai as og
+import os
 
 
-default_model_path = './cpu_and_mobile/cpu-int4-rtn-block-32-acc-level-4'
-
 class Phi3LanguageModel:
 
     def __init__(self, model_path=None):
         # configure ONNX runtime
-        model_path = default_model_path if model_path == None else model_path
+        base_dir = os.path.dirname(os.path.abspath(__file__))
+        model_path = os.path.join(base_dir, "cpu_and_mobile", "cpu-int4-rtn-block-32-acc-level-4")
         config = og.Config(model_path)
         config.clear_providers()
         self.model = og.Model(config)

From c3cf6a5b54a4493e6ae59fb99418c4cf2c403837 Mon Sep 17 00:00:00 2001
From: Adam Wilson <lightbrok3r@gmail.com>
Date: Thu, 24 Apr 2025 11:29:08 -0600
Subject: [PATCH 4/9] move to background

---
 .github/workflows/llmsecops-cicd.yml | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/.github/workflows/llmsecops-cicd.yml b/.github/workflows/llmsecops-cicd.yml
index c9130131d..d6a27c867 100644
--- a/.github/workflows/llmsecops-cicd.yml
+++ b/.github/workflows/llmsecops-cicd.yml
@@ -37,7 +37,7 @@ jobs:
     - name: Run REST API server
       run: |
         # python ${{ github.workspace }}/tests/api/server.py &
-        python -m tests.api.server
+        python -m tests.api.server &
         
     - name: Test API call
       run: |

From 3563053ac3653a2146557312788b0280c7b0c44c Mon Sep 17 00:00:00 2001
From: Adam Wilson <lightbrok3r@gmail.com>
Date: Thu, 24 Apr 2025 11:39:04 -0600
Subject: [PATCH 5/9] error handling

---
 tests/api/controller.py | 18 ++++++++++--------
 1 file changed, 10 insertions(+), 8 deletions(-)

diff --git a/tests/api/controller.py b/tests/api/controller.py
index 2dc1a7676..00d43a766 100644
--- a/tests/api/controller.py
+++ b/tests/api/controller.py
@@ -26,10 +26,6 @@ class ApiController:
         request_body = env['wsgi.input'].read(request_body_size)
         request_json = json.loads(request_body.decode('utf-8'))
         prompt = request_json.get('prompt')
-        
-        # for now, just reading request and echoing back in response
-        # data = json.loads(prompt)
-        # response_body = json.dumps(data).encode('utf-8')
 
         data = self.get_service_response(prompt)
         response_body = json.dumps(data).encode('utf-8')
@@ -51,7 +47,13 @@ class ApiController:
         try:                
             handler = self.routes.get((method,path), self.__http_200_ok)
             return handler(env, start_response)
-        except json.JSONDecodeError:
-            start_response('400 Bad Request', self.response_headers)
-            return [json.dumps({'error': 'Invalid JSON'}).encode('utf-8')]
-
+        except json.JSONDecodeError as e:
+            response_body = e.msg.encode('utf-8')
+            response_headers = [('Content-Type', 'text/plain'), ('Content-Length', str(len(response_body)))]
+            start_response('400 Bad Request', response_headers)
+            return [response_body]
+        except Exception as e:
+            response_body = e.msg.encode('utf-8')
+            response_headers = [('Content-Type', 'text/plain'), ('Content-Length', str(len(response_body)))]
+            start_response('500 Internal Server Error', response_headers)
+            return [response_body]

From 37c84617b1036c6fe56c40ea369dcb58f6ec2921 Mon Sep 17 00:00:00 2001
From: Adam Wilson <lightbrok3r@gmail.com>
Date: Thu, 24 Apr 2025 11:44:46 -0600
Subject: [PATCH 6/9] error handling - take 2

---
 .github/workflows/llmsecops-cicd.yml | 10 +++++-----
 tests/api/controller.py              | 17 +++++++++++++----
 2 files changed, 18 insertions(+), 9 deletions(-)

diff --git a/.github/workflows/llmsecops-cicd.yml b/.github/workflows/llmsecops-cicd.yml
index d6a27c867..93caa8d8e 100644
--- a/.github/workflows/llmsecops-cicd.yml
+++ b/.github/workflows/llmsecops-cicd.yml
@@ -37,8 +37,8 @@ jobs:
     - name: Run REST API server
       run: |
         # python ${{ github.workspace }}/tests/api/server.py &
-        python -m tests.api.server &
-        
-    - name: Test API call
-      run: |
-        curl -X POST -i localhost:9999 -d '{ "prompt": "describe a random planet in our solar system in 10 words or less" }'
\ No newline at end of file
+        nohup python -m tests.api.server > server.log 2>&1 &
+        sleep 2
+        curl -X POST -i localhost:9999 -d '{ "prompt": "describe a random planet in our solar system in 10 words or less" }' || true
+        echo "--- SERVER LOG ---"
+        cat server.log
\ No newline at end of file
diff --git a/tests/api/controller.py b/tests/api/controller.py
index 00d43a766..60343085f 100644
--- a/tests/api/controller.py
+++ b/tests/api/controller.py
@@ -1,4 +1,5 @@
 import json
+import traceback
 
 from tests.llm.phi3_language_model import Phi3LanguageModel
 
@@ -53,7 +54,15 @@ class ApiController:
             start_response('400 Bad Request', response_headers)
             return [response_body]
         except Exception as e:
-            response_body = e.msg.encode('utf-8')
-            response_headers = [('Content-Type', 'text/plain'), ('Content-Length', str(len(response_body)))]
-            start_response('500 Internal Server Error', response_headers)
-            return [response_body]
+            # response_body = e.msg.encode('utf-8')
+            # response_headers = [('Content-Type', 'text/plain'), ('Content-Length', str(len(response_body)))]
+            # start_response('500 Internal Server Error', response_headers)
+            # return [response_body]
+        
+            # Log to stdout so it shows in GitHub Actions
+            print("Exception occurred:")
+            traceback.print_exc()
+
+            # Return more detailed error response
+            start_response('500 Internal Server Error', [('Content-Type', 'text/plain')])
+            return [f"Internal Server Error:\n{e}\n".encode()]

From d3d6d96484a6e08cf27af451ee7eb7a1249aa51c Mon Sep 17 00:00:00 2001
From: Adam Wilson <lightbrok3r@gmail.com>
Date: Thu, 24 Apr 2025 11:48:16 -0600
Subject: [PATCH 7/9] error handling - troubleshooting

---
 .github/workflows/llmsecops-cicd.yml | 1 +
 1 file changed, 1 insertion(+)

diff --git a/.github/workflows/llmsecops-cicd.yml b/.github/workflows/llmsecops-cicd.yml
index 93caa8d8e..c45efd005 100644
--- a/.github/workflows/llmsecops-cicd.yml
+++ b/.github/workflows/llmsecops-cicd.yml
@@ -37,6 +37,7 @@ jobs:
     - name: Run REST API server
       run: |
         # python ${{ github.workspace }}/tests/api/server.py &
+        ls -al
         nohup python -m tests.api.server > server.log 2>&1 &
         sleep 2
         curl -X POST -i localhost:9999 -d '{ "prompt": "describe a random planet in our solar system in 10 words or less" }' || true

From 597078b8c58b853361a882c961c2000f623964a6 Mon Sep 17 00:00:00 2001
From: Adam Wilson <lightbrok3r@gmail.com>
Date: Thu, 24 Apr 2025 11:50:23 -0600
Subject: [PATCH 8/9] error handling - troubleshooting

---
 .github/workflows/llmsecops-cicd.yml | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/.github/workflows/llmsecops-cicd.yml b/.github/workflows/llmsecops-cicd.yml
index c45efd005..020026e1d 100644
--- a/.github/workflows/llmsecops-cicd.yml
+++ b/.github/workflows/llmsecops-cicd.yml
@@ -25,7 +25,7 @@ jobs:
     - name: Set up HuggingFace LLM
       run: |
         pip install huggingface-hub[cli]
-        huggingface-cli download microsoft/Phi-3-mini-4k-instruct-onnx --include cpu_and_mobile/cpu-int4-rtn-block-32-acc-level-4/* --local-dir .
+        huggingface-cli download microsoft/Phi-3-mini-4k-instruct-onnx --include cpu_and_mobile/cpu-int4-rtn-block-32-acc-level-4/* --local-dir ${{ github.workspace }}/tests/llm
         pip install onnxruntime-genai
         # curl https://raw.githubusercontent.com/microsoft/onnxruntime-genai/main/examples/python/phi3-qa.py -o phi3-qa.py
 

From ee37c53a8dcf7db496f2f37f1566e5427612004b Mon Sep 17 00:00:00 2001
From: Adam Wilson <lightbrok3r@gmail.com>
Date: Thu, 24 Apr 2025 11:54:30 -0600
Subject: [PATCH 9/9] clean up workflow

---
 .github/workflows/llmsecops-cicd.yml | 19 +++++--------------
 1 file changed, 5 insertions(+), 14 deletions(-)

diff --git a/.github/workflows/llmsecops-cicd.yml b/.github/workflows/llmsecops-cicd.yml
index 020026e1d..750a73b02 100644
--- a/.github/workflows/llmsecops-cicd.yml
+++ b/.github/workflows/llmsecops-cicd.yml
@@ -1,4 +1,4 @@
-name: LLM Prompt Testing
+name: 'LLM Prompt Testing'
 
 on:
   # push:
@@ -14,32 +14,23 @@ jobs:
     steps:
     - uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683    
 
-    - name: Set up git LFS
+    - name: 'set up git LFS'
       run: git lfs install
 
-    - name: Set up Python
+    - name: 'set up Python'
       uses: actions/setup-python@v3
       with:
         python-version: '3.12'
 
-    - name: Set up HuggingFace LLM
+    - name: 'set up Microsoft Phi-3 Mini 4k LLM from HuggingFace'
       run: |
         pip install huggingface-hub[cli]
         huggingface-cli download microsoft/Phi-3-mini-4k-instruct-onnx --include cpu_and_mobile/cpu-int4-rtn-block-32-acc-level-4/* --local-dir ${{ github.workspace }}/tests/llm
         pip install onnxruntime-genai
-        # curl https://raw.githubusercontent.com/microsoft/onnxruntime-genai/main/examples/python/phi3-qa.py -o phi3-qa.py
 
-        # python ./tests/llm/phi3_language_model.py --prompt 'Describe the principle of existence, from the first principles of philosophy.' -m cpu_and_mobile/cpu-int4-rtn-block-32-acc-level-4
-        # python ${{ github.workspace }}/tests/llm/phi3_language_model.py \
-        #   --prompt 'Describe the principle of existence, from the first principles of philosophy.' \
-        #   -m cpu_and_mobile/cpu-int4-rtn-block-32-acc-level-4
-
-    - name: Run REST API server
+    - name: 'run HTTP server and call REST API'
       run: |
-        # python ${{ github.workspace }}/tests/api/server.py &
-        ls -al
         nohup python -m tests.api.server > server.log 2>&1 &
         sleep 2
         curl -X POST -i localhost:9999 -d '{ "prompt": "describe a random planet in our solar system in 10 words or less" }' || true
-        echo "--- SERVER LOG ---"
         cat server.log
\ No newline at end of file