From eecd577eba60948e0d4fb9163c0fbf72d5fe2e75 Mon Sep 17 00:00:00 2001
From: Adam Wilson <lightbrok3r@gmail.com>
Date: Tue, 22 Apr 2025 20:57:46 -0600
Subject: [PATCH 1/8] workflow: local API for ONNX interactions

---
 .github/workflows/llmsecops-cicd.yml | 11 +++++++----
 1 file changed, 7 insertions(+), 4 deletions(-)

diff --git a/.github/workflows/llmsecops-cicd.yml b/.github/workflows/llmsecops-cicd.yml
index b0492a415..ea9c45717 100644
--- a/.github/workflows/llmsecops-cicd.yml
+++ b/.github/workflows/llmsecops-cicd.yml
@@ -22,13 +22,16 @@ jobs:
       with:
         python-version: '3.12'
 
-    - name: Download Huggingface CLI
+    - name: Set up HuggingFace LLM
       run: |
         pip install huggingface-hub[cli]
         huggingface-cli download microsoft/Phi-3-mini-4k-instruct-onnx --include cpu_and_mobile/cpu-int4-rtn-block-32-acc-level-4/* --local-dir .
         pip install onnxruntime-genai
-        curl https://raw.githubusercontent.com/microsoft/onnxruntime-genai/main/examples/python/phi3-qa.py -o phi3-qa.py
-        python phi3-qa.py -m cpu_and_mobile/cpu-int4-rtn-block-32-acc-level-4 -e cpu -v
+        # curl https://raw.githubusercontent.com/microsoft/onnxruntime-genai/main/examples/python/phi3-qa.py -o phi3-qa.py
+        python phi3_language_model.py -v \
+          --prompt 'Describe the principle of existence, from the first principles of philosophy.' \
+          -m cpu_and_mobile/cpu-int4-rtn-block-32-acc-level-4 \
+          -e cpu
 
     - name: Run REST API server
       run: |
@@ -36,4 +39,4 @@ jobs:
         
     - name: Test API call
       run: |
-        curl -i localhost:9999/hello
+        curl -i localhost:9999

From 66371bf208daad95543349dbc990b867443c5e27 Mon Sep 17 00:00:00 2001
From: Adam Wilson <lightbrok3r@gmail.com>
Date: Tue, 22 Apr 2025 20:58:16 -0600
Subject: [PATCH 2/8] support POST

---
 tests/api/PathDispatcher.py | 41 +++++++++++++++++++++++++++++--------
 tests/api/server.py         | 18 ++++++++--------
 2 files changed, 42 insertions(+), 17 deletions(-)

diff --git a/tests/api/PathDispatcher.py b/tests/api/PathDispatcher.py
index 0649b8201..7e534677f 100644
--- a/tests/api/PathDispatcher.py
+++ b/tests/api/PathDispatcher.py
@@ -1,22 +1,45 @@
 import cgi
+import json
 
 
 class PathDispatcher:
     def __init__(self):
         self.routes = {}
+        self.response_headers = [('Content-Type', 'application/json')]
+
+
+    def __http_415_notsupported(self, env, start_response):
+        start_response('415 Unsupported Media Type', self.response_headers)
+        return [json.dumps({'error': 'Unsupported Content-Type'}).encode('utf-8')]
+
+
+    def __http_200_ok(self, env, start_response):
+        try:
+            request_body_size = int(env.get('CONTENT_LENGTH', 0))
+        except (ValueError):
+            request_body_size = 0
+
+        request_body = env['wsgi.input'].read(request_body_size)
+        data = json.loads(request_body.decode('utf-8'))
+        start_response('200 OK', self.response_headers)
+        return [json.dumps({'received': data}).encode('utf-8')]
 
-    def notfound_404(self, env, start_response):
-        start_response('404 Not Found', [ ('Content-Type', 'text/plain') ])
-        return [b'Not Found']
 
     def __call__(self, env, start_response):
+        method = env.get('REQUEST_METHOD').upper()
         path = env.get('PATH_INFO')
-        params = cgi.FieldStorage(env.get('wsgi.output'), environ=env)
-        method = env.get('REQUEST_METHOD').lower()
-        env['params'] = { key: params.getvalue(key) for key in params }
-        handler = self.routes.get((method,path), self.notfound_404)
-        return handler(env, start_response)
-    
+
+        if not method == 'POST':
+            self.__http_415_notsupported(env, start_response)
+
+        try:                
+            handler = self.routes.get((method,path), self.__http_200_ok)
+            return handler(env, start_response)
+        except json.JSONDecodeError:
+            start_response('400 Bad Request', self.response_headers)
+            return [json.dumps({'error': 'Invalid JSON'}).encode('utf-8')]
+
+
     def register(self, method, path, function):
         self.routes[method.lower(), path] = function
         return function
diff --git a/tests/api/server.py b/tests/api/server.py
index 9abc4565d..7a8d0c620 100644
--- a/tests/api/server.py
+++ b/tests/api/server.py
@@ -1,3 +1,5 @@
+import json
+
 from PathDispatcher import PathDispatcher
 from wsgiref.simple_server import make_server
 
@@ -6,19 +8,19 @@ class RestApiServer:
     def __init__(self):
         pass
 
-    def response_function(self, environ, start_response):
-        start_response('200 OK', [('Content-Type','text/html')])
-        yield str(f'testing...\n').encode('utf-8')
+    def post_response(self, env, start_response):
+        start_response('200 OK', [('Content-Type', 'application/json')])
+        yield [json.dumps({'received': 'data'}).encode('utf-8')]
 
     def listen(self):
         port = 9999
         dispatcher = PathDispatcher()
-        dispatcher.register('GET', '/hello', self.response_function)
-        wsgi_srv = make_server('', port, dispatcher)
-        print(f'listening on port {port}...')
-        wsgi_srv.serve_forever()
+        dispatcher.register('POST', '/', self.post_response)
+        with make_server('', port, dispatcher) as wsgi_srv:
+            print(f'listening on port {port}...')
+            wsgi_srv.serve_forever()
 
 
 if __name__ == '__main__':
     srv = RestApiServer()
-    srv.listen()
+    srv.listen()
\ No newline at end of file

From e04235606d557e5394240fefe395cc2140bc6940 Mon Sep 17 00:00:00 2001
From: Adam Wilson <lightbrok3r@gmail.com>
Date: Tue, 22 Apr 2025 20:59:02 -0600
Subject: [PATCH 3/8] refactor example Python class to accept a single prompt
 and exit

---
 tests/llm/phi3_interface.py      |  1 -
 tests/llm/phi3_language_model.py | 57 ++++++++++++++++++++++++++++++++
 2 files changed, 57 insertions(+), 1 deletion(-)
 delete mode 100644 tests/llm/phi3_interface.py
 create mode 100644 tests/llm/phi3_language_model.py

diff --git a/tests/llm/phi3_interface.py b/tests/llm/phi3_interface.py
deleted file mode 100644
index ec8805df0..000000000
--- a/tests/llm/phi3_interface.py
+++ /dev/null
@@ -1 +0,0 @@
-# TODO: business logic for REST API interaction w/ LLM via prompt input
\ No newline at end of file
diff --git a/tests/llm/phi3_language_model.py b/tests/llm/phi3_language_model.py
new file mode 100644
index 000000000..542a396c6
--- /dev/null
+++ b/tests/llm/phi3_language_model.py
@@ -0,0 +1,57 @@
+# TODO: business logic for REST API interaction w/ LLM via prompt input
+
+import onnxruntime_genai as og
+import argparse
+
+
+class Phi3LanguageModel:
+
+    def __init__(self, model_path):
+        # configure the ONNX runtime
+        config = og.Config(model_path)
+        config.clear_providers()
+        self.model = og.Model(config)
+        self.tokenizer = og.Tokenizer(model)
+        self.tokenizer_stream = self.tokenizer.create_stream()    
+    
+
+    def get_response(self, args):
+
+        search_options = { 'max_length': 2048 }
+        params = og.GeneratorParams(model)
+        params.set_search_options(**search_options)
+        generator = og.Generator(model, params)
+
+        # process prompt input and generate tokens
+        prompt_input = args.prompt
+        chat_template = '<|user|>\n{input} <|end|>\n<|assistant|>'
+        prompt = f'{chat_template.format(input=prompt_input)}'
+        input_tokens = self.tokenizer.encode(prompt)
+        generator.append_tokens(input_tokens)
+
+        print("Output: ", end='', flush=True)
+
+        try:
+            while not generator.is_done():
+                generator.generate_next_token()
+                new_token = generator.get_next_tokens()[0]
+                print(self.tokenizer_stream.decode(new_token), end='', flush=True)
+        except Exception as e:
+            print(f'{e}')
+
+
+if __name__ == "__main__":
+    parser = argparse.ArgumentParser(argument_default=argparse.SUPPRESS, description="End-to-end AI Question/Answer example for gen-ai")
+    parser.add_argument('-m', '--model_path', type=str, required=True, help='Onnx model folder path (must contain genai_config.json and model.onnx)')
+    parser.add_argument('-p', '--prompt', type=str, required=True, help='Prompt input')
+    parser.add_argument('-i', '--min_length', type=int, help='Min number of tokens to generate including the prompt')
+    parser.add_argument('-l', '--max_length', type=int, help='Max number of tokens to generate including the prompt')
+    parser.add_argument('-ds', '--do_sample', action='store_true', default=False, help='Do random sampling. When false, greedy or beam search are used to generate the output. Defaults to false')
+    parser.add_argument('--top_p', type=float, help='Top p probability to sample with')
+    parser.add_argument('--top_k', type=int, help='Top k tokens to sample from')
+    parser.add_argument('--temperature', type=float, help='Temperature to sample with')
+    parser.add_argument('--repetition_penalty', type=float, help='Repetition penalty to sample with')
+    args = parser.parse_args()
+
+    model = Phi3LanguageModel(args.model_path)
+    model.get_response()

From 3e27caa991cd42b6ef5b643e0e2fb3ffe6e67e44 Mon Sep 17 00:00:00 2001
From: Adam Wilson <lightbrok3r@gmail.com>
Date: Tue, 22 Apr 2025 21:25:26 -0600
Subject: [PATCH 4/8] fix path

---
 .github/workflows/llmsecops-cicd.yml | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/.github/workflows/llmsecops-cicd.yml b/.github/workflows/llmsecops-cicd.yml
index ea9c45717..f9aa8d518 100644
--- a/.github/workflows/llmsecops-cicd.yml
+++ b/.github/workflows/llmsecops-cicd.yml
@@ -28,7 +28,7 @@ jobs:
         huggingface-cli download microsoft/Phi-3-mini-4k-instruct-onnx --include cpu_and_mobile/cpu-int4-rtn-block-32-acc-level-4/* --local-dir .
         pip install onnxruntime-genai
         # curl https://raw.githubusercontent.com/microsoft/onnxruntime-genai/main/examples/python/phi3-qa.py -o phi3-qa.py
-        python phi3_language_model.py -v \
+        python ${{ github.workspace }}/tests/llm/phi3_language_model.py -v \
           --prompt 'Describe the principle of existence, from the first principles of philosophy.' \
           -m cpu_and_mobile/cpu-int4-rtn-block-32-acc-level-4 \
           -e cpu

From 3ccb57e40350cf311dab682007df25a48c1756bf Mon Sep 17 00:00:00 2001
From: Adam Wilson <lightbrok3r@gmail.com>
Date: Tue, 22 Apr 2025 21:28:05 -0600
Subject: [PATCH 5/8] fix args

---
 .github/workflows/llmsecops-cicd.yml | 3 +--
 1 file changed, 1 insertion(+), 2 deletions(-)

diff --git a/.github/workflows/llmsecops-cicd.yml b/.github/workflows/llmsecops-cicd.yml
index f9aa8d518..9be8285a8 100644
--- a/.github/workflows/llmsecops-cicd.yml
+++ b/.github/workflows/llmsecops-cicd.yml
@@ -28,10 +28,9 @@ jobs:
         huggingface-cli download microsoft/Phi-3-mini-4k-instruct-onnx --include cpu_and_mobile/cpu-int4-rtn-block-32-acc-level-4/* --local-dir .
         pip install onnxruntime-genai
         # curl https://raw.githubusercontent.com/microsoft/onnxruntime-genai/main/examples/python/phi3-qa.py -o phi3-qa.py
-        python ${{ github.workspace }}/tests/llm/phi3_language_model.py -v \
+        python ${{ github.workspace }}/tests/llm/phi3_language_model.py \
           --prompt 'Describe the principle of existence, from the first principles of philosophy.' \
           -m cpu_and_mobile/cpu-int4-rtn-block-32-acc-level-4 \
-          -e cpu
 
     - name: Run REST API server
       run: |

From 1ba607cd3012166f904a731dc40aea175cb3606b Mon Sep 17 00:00:00 2001
From: Adam Wilson <lightbrok3r@gmail.com>
Date: Wed, 23 Apr 2025 06:32:28 -0600
Subject: [PATCH 6/8] fix model ref

---
 tests/llm/phi3_language_model.py | 8 ++++----
 1 file changed, 4 insertions(+), 4 deletions(-)

diff --git a/tests/llm/phi3_language_model.py b/tests/llm/phi3_language_model.py
index 542a396c6..7c6ab73b0 100644
--- a/tests/llm/phi3_language_model.py
+++ b/tests/llm/phi3_language_model.py
@@ -7,20 +7,20 @@ import argparse
 class Phi3LanguageModel:
 
     def __init__(self, model_path):
-        # configure the ONNX runtime
+        # configure ONNX runtime
         config = og.Config(model_path)
         config.clear_providers()
         self.model = og.Model(config)
-        self.tokenizer = og.Tokenizer(model)
+        self.tokenizer = og.Tokenizer(self.model)
         self.tokenizer_stream = self.tokenizer.create_stream()    
     
 
     def get_response(self, args):
 
         search_options = { 'max_length': 2048 }
-        params = og.GeneratorParams(model)
+        params = og.GeneratorParams(self.model)
         params.set_search_options(**search_options)
-        generator = og.Generator(model, params)
+        generator = og.Generator(self.model, params)
 
         # process prompt input and generate tokens
         prompt_input = args.prompt

From 5d44d90047e7ae082c49187b09ce6f2a6b707987 Mon Sep 17 00:00:00 2001
From: Adam Wilson <lightbrok3r@gmail.com>
Date: Wed, 23 Apr 2025 06:56:24 -0600
Subject: [PATCH 7/8] fix prompt ref

---
 tests/llm/phi3_language_model.py | 5 ++---
 1 file changed, 2 insertions(+), 3 deletions(-)

diff --git a/tests/llm/phi3_language_model.py b/tests/llm/phi3_language_model.py
index 7c6ab73b0..a08357ec4 100644
--- a/tests/llm/phi3_language_model.py
+++ b/tests/llm/phi3_language_model.py
@@ -15,7 +15,7 @@ class Phi3LanguageModel:
         self.tokenizer_stream = self.tokenizer.create_stream()    
     
 
-    def get_response(self, args):
+    def get_response(self, prompt_input):
 
         search_options = { 'max_length': 2048 }
         params = og.GeneratorParams(self.model)
@@ -23,7 +23,6 @@ class Phi3LanguageModel:
         generator = og.Generator(self.model, params)
 
         # process prompt input and generate tokens
-        prompt_input = args.prompt
         chat_template = '<|user|>\n{input} <|end|>\n<|assistant|>'
         prompt = f'{chat_template.format(input=prompt_input)}'
         input_tokens = self.tokenizer.encode(prompt)
@@ -54,4 +53,4 @@ if __name__ == "__main__":
     args = parser.parse_args()
 
     model = Phi3LanguageModel(args.model_path)
-    model.get_response()
+    model.get_response(args.prompt)

From 771e2a1668695f07257fcf721a7711530a86df47 Mon Sep 17 00:00:00 2001
From: Adam Wilson <lightbrok3r@gmail.com>
Date: Wed, 23 Apr 2025 06:57:23 -0600
Subject: [PATCH 8/8] update workflow name

---
 .github/workflows/llmsecops-cicd.yml | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/.github/workflows/llmsecops-cicd.yml b/.github/workflows/llmsecops-cicd.yml
index 9be8285a8..81806e940 100644
--- a/.github/workflows/llmsecops-cicd.yml
+++ b/.github/workflows/llmsecops-cicd.yml
@@ -1,4 +1,4 @@
-name: REST Server
+name: LLM Prompt Testing
 
 on:
   # push: