From 264f332c1062df9a23e97ecc87bc8011da5af36a Mon Sep 17 00:00:00 2001 From: Adam Wilson Date: Wed, 23 Apr 2025 12:32:08 -0600 Subject: [PATCH 1/9] echo response back --- .github/workflows/llmsecops-cicd.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/llmsecops-cicd.yml b/.github/workflows/llmsecops-cicd.yml index 81806e940..8f6251a14 100644 --- a/.github/workflows/llmsecops-cicd.yml +++ b/.github/workflows/llmsecops-cicd.yml @@ -38,4 +38,4 @@ jobs: - name: Test API call run: | - curl -i localhost:9999 + curl -X POST -i localhost:9999 -d '{ "key":"123456789" }' From 8c6f38db31a5df9df359ac315164925a68ca7b72 Mon Sep 17 00:00:00 2001 From: Adam Wilson Date: Wed, 23 Apr 2025 16:09:05 -0600 Subject: [PATCH 2/9] support JSON request payload passed to LLM service layer --- .github/workflows/llmsecops-cicd.yml | 10 ++++--- .../api/{PathDispatcher.py => controller.py} | 28 +++++++++++++------ tests/api/server.py | 7 ++--- tests/llm/phi3_language_model.py | 26 +++++++++++------ 4 files changed, 47 insertions(+), 24 deletions(-) rename tests/api/{PathDispatcher.py => controller.py} (68%) diff --git a/.github/workflows/llmsecops-cicd.yml b/.github/workflows/llmsecops-cicd.yml index 8f6251a14..cc2877a22 100644 --- a/.github/workflows/llmsecops-cicd.yml +++ b/.github/workflows/llmsecops-cicd.yml @@ -28,9 +28,11 @@ jobs: huggingface-cli download microsoft/Phi-3-mini-4k-instruct-onnx --include cpu_and_mobile/cpu-int4-rtn-block-32-acc-level-4/* --local-dir . pip install onnxruntime-genai # curl https://raw.githubusercontent.com/microsoft/onnxruntime-genai/main/examples/python/phi3-qa.py -o phi3-qa.py - python ${{ github.workspace }}/tests/llm/phi3_language_model.py \ - --prompt 'Describe the principle of existence, from the first principles of philosophy.' \ - -m cpu_and_mobile/cpu-int4-rtn-block-32-acc-level-4 \ + + # python ./tests/llm/phi3_language_model.py --prompt 'Describe the principle of existence, from the first principles of philosophy.' -m cpu_and_mobile/cpu-int4-rtn-block-32-acc-level-4 + # python ${{ github.workspace }}/tests/llm/phi3_language_model.py \ + # --prompt 'Describe the principle of existence, from the first principles of philosophy.' \ + # -m cpu_and_mobile/cpu-int4-rtn-block-32-acc-level-4 - name: Run REST API server run: | @@ -38,4 +40,4 @@ jobs: - name: Test API call run: | - curl -X POST -i localhost:9999 -d '{ "key":"123456789" }' + curl -X POST -i localhost:9999 -d '{ "prompt": "describe a random planet in our solar system in 10 words or less" }' \ No newline at end of file diff --git a/tests/api/PathDispatcher.py b/tests/api/controller.py similarity index 68% rename from tests/api/PathDispatcher.py rename to tests/api/controller.py index 338e72368..20612ed99 100644 --- a/tests/api/PathDispatcher.py +++ b/tests/api/controller.py @@ -1,8 +1,14 @@ -import cgi import json +import os +import sys + +# Add the parent folder (or any relative path) +sys.path.append(os.path.abspath('./../llm')) + +from phi3_language_model import Phi3LanguageModel -class PathDispatcher: +class ApiController: def __init__(self): self.routes = {} @@ -11,6 +17,10 @@ class PathDispatcher: start_response('415 Unsupported Media Type', self.response_headers) return [json.dumps({'error': 'Unsupported Content-Type'}).encode('utf-8')] + def get_service_response(self, prompt): + service = Phi3LanguageModel() + response = service.get_response(prompt_input=prompt) + return response def __http_200_ok(self, env, start_response): try: @@ -19,10 +29,14 @@ class PathDispatcher: request_body_size = 0 request_body = env['wsgi.input'].read(request_body_size) - request_body = request_body.decode('utf-8') + request_json = json.loads(request_body.decode('utf-8')) + prompt = request_json.get('prompt') # for now, just reading request and echoing back in response - data = json.loads(request_body) + # data = json.loads(prompt) + # response_body = json.dumps(data).encode('utf-8') + + data = self.get_service_response(prompt) response_body = json.dumps(data).encode('utf-8') response_headers = [('Content-Type', 'application/json'), ('Content-Length', str(len(response_body)))] @@ -34,6 +48,8 @@ class PathDispatcher: method = env.get('REQUEST_METHOD').upper() path = env.get('PATH_INFO') + # TODO: register route for POST /api/conversations + if not method == 'POST': self.__http_415_notsupported(env, start_response) @@ -44,7 +60,3 @@ class PathDispatcher: start_response('400 Bad Request', self.response_headers) return [json.dumps({'error': 'Invalid JSON'}).encode('utf-8')] - - def register(self, method, path, function): - self.routes[method.lower(), path] = function - return function diff --git a/tests/api/server.py b/tests/api/server.py index 7a8d0c620..2c08107c5 100644 --- a/tests/api/server.py +++ b/tests/api/server.py @@ -1,6 +1,6 @@ import json -from PathDispatcher import PathDispatcher +from controller import ApiController from wsgiref.simple_server import make_server @@ -14,9 +14,8 @@ class RestApiServer: def listen(self): port = 9999 - dispatcher = PathDispatcher() - dispatcher.register('POST', '/', self.post_response) - with make_server('', port, dispatcher) as wsgi_srv: + controller = ApiController() + with make_server('', port, controller) as wsgi_srv: print(f'listening on port {port}...') wsgi_srv.serve_forever() diff --git a/tests/llm/phi3_language_model.py b/tests/llm/phi3_language_model.py index a08357ec4..2e4721376 100644 --- a/tests/llm/phi3_language_model.py +++ b/tests/llm/phi3_language_model.py @@ -4,10 +4,13 @@ import onnxruntime_genai as og import argparse +default_model_path = './cpu_and_mobile/cpu-int4-rtn-block-32-acc-level-4' + class Phi3LanguageModel: - def __init__(self, model_path): + def __init__(self, model_path=None): # configure ONNX runtime + model_path = default_model_path if model_path == None else model_path config = og.Config(model_path) config.clear_providers() self.model = og.Model(config) @@ -17,7 +20,7 @@ class Phi3LanguageModel: def get_response(self, prompt_input): - search_options = { 'max_length': 2048 } + search_options = { 'max_length': 1024 } params = og.GeneratorParams(self.model) params.set_search_options(**search_options) generator = og.Generator(self.model, params) @@ -28,20 +31,22 @@ class Phi3LanguageModel: input_tokens = self.tokenizer.encode(prompt) generator.append_tokens(input_tokens) - print("Output: ", end='', flush=True) - + # generate output + output = '' try: while not generator.is_done(): generator.generate_next_token() new_token = generator.get_next_tokens()[0] - print(self.tokenizer_stream.decode(new_token), end='', flush=True) + decoded = self.tokenizer_stream.decode(new_token) + output = output + decoded except Exception as e: - print(f'{e}') + return f'{e}' + return { 'response': output } if __name__ == "__main__": parser = argparse.ArgumentParser(argument_default=argparse.SUPPRESS, description="End-to-end AI Question/Answer example for gen-ai") - parser.add_argument('-m', '--model_path', type=str, required=True, help='Onnx model folder path (must contain genai_config.json and model.onnx)') + parser.add_argument('-m', '--model_path', type=str, required=False, help='Onnx model folder path (must contain genai_config.json and model.onnx)') parser.add_argument('-p', '--prompt', type=str, required=True, help='Prompt input') parser.add_argument('-i', '--min_length', type=int, help='Min number of tokens to generate including the prompt') parser.add_argument('-l', '--max_length', type=int, help='Max number of tokens to generate including the prompt') @@ -52,5 +57,10 @@ if __name__ == "__main__": parser.add_argument('--repetition_penalty', type=float, help='Repetition penalty to sample with') args = parser.parse_args() - model = Phi3LanguageModel(args.model_path) + try: + model_path = args.model_path + except: + model_path = None + + model = Phi3LanguageModel(model_path) model.get_response(args.prompt) From c9c5b8b0a7a022ede2ede2dafb599733bd3cb1fe Mon Sep 17 00:00:00 2001 From: Adam Wilson Date: Thu, 24 Apr 2025 11:27:03 -0600 Subject: [PATCH 3/9] use Python package approach to project structure --- .github/workflows/llmsecops-cicd.yml | 3 ++- tests/__init__.py | 0 tests/api/__init__.py | 0 tests/api/controller.py | 7 +------ tests/api/server.py | 2 +- tests/llm/__init__.py | 0 tests/llm/phi3_language_model.py | 8 ++++---- 7 files changed, 8 insertions(+), 12 deletions(-) create mode 100644 tests/__init__.py create mode 100644 tests/api/__init__.py create mode 100644 tests/llm/__init__.py diff --git a/.github/workflows/llmsecops-cicd.yml b/.github/workflows/llmsecops-cicd.yml index cc2877a22..c9130131d 100644 --- a/.github/workflows/llmsecops-cicd.yml +++ b/.github/workflows/llmsecops-cicd.yml @@ -36,7 +36,8 @@ jobs: - name: Run REST API server run: | - python ${{ github.workspace }}/tests/api/server.py & + # python ${{ github.workspace }}/tests/api/server.py & + python -m tests.api.server - name: Test API call run: | diff --git a/tests/__init__.py b/tests/__init__.py new file mode 100644 index 000000000..e69de29bb diff --git a/tests/api/__init__.py b/tests/api/__init__.py new file mode 100644 index 000000000..e69de29bb diff --git a/tests/api/controller.py b/tests/api/controller.py index 20612ed99..2dc1a7676 100644 --- a/tests/api/controller.py +++ b/tests/api/controller.py @@ -1,11 +1,6 @@ import json -import os -import sys -# Add the parent folder (or any relative path) -sys.path.append(os.path.abspath('./../llm')) - -from phi3_language_model import Phi3LanguageModel +from tests.llm.phi3_language_model import Phi3LanguageModel class ApiController: diff --git a/tests/api/server.py b/tests/api/server.py index 2c08107c5..503cb5069 100644 --- a/tests/api/server.py +++ b/tests/api/server.py @@ -1,6 +1,6 @@ import json -from controller import ApiController +from tests.api.controller import ApiController from wsgiref.simple_server import make_server diff --git a/tests/llm/__init__.py b/tests/llm/__init__.py new file mode 100644 index 000000000..e69de29bb diff --git a/tests/llm/phi3_language_model.py b/tests/llm/phi3_language_model.py index 2e4721376..8c4eed47e 100644 --- a/tests/llm/phi3_language_model.py +++ b/tests/llm/phi3_language_model.py @@ -1,16 +1,16 @@ # TODO: business logic for REST API interaction w/ LLM via prompt input -import onnxruntime_genai as og import argparse +import onnxruntime_genai as og +import os -default_model_path = './cpu_and_mobile/cpu-int4-rtn-block-32-acc-level-4' - class Phi3LanguageModel: def __init__(self, model_path=None): # configure ONNX runtime - model_path = default_model_path if model_path == None else model_path + base_dir = os.path.dirname(os.path.abspath(__file__)) + model_path = os.path.join(base_dir, "cpu_and_mobile", "cpu-int4-rtn-block-32-acc-level-4") config = og.Config(model_path) config.clear_providers() self.model = og.Model(config) From c3cf6a5b54a4493e6ae59fb99418c4cf2c403837 Mon Sep 17 00:00:00 2001 From: Adam Wilson Date: Thu, 24 Apr 2025 11:29:08 -0600 Subject: [PATCH 4/9] move to background --- .github/workflows/llmsecops-cicd.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/llmsecops-cicd.yml b/.github/workflows/llmsecops-cicd.yml index c9130131d..d6a27c867 100644 --- a/.github/workflows/llmsecops-cicd.yml +++ b/.github/workflows/llmsecops-cicd.yml @@ -37,7 +37,7 @@ jobs: - name: Run REST API server run: | # python ${{ github.workspace }}/tests/api/server.py & - python -m tests.api.server + python -m tests.api.server & - name: Test API call run: | From 3563053ac3653a2146557312788b0280c7b0c44c Mon Sep 17 00:00:00 2001 From: Adam Wilson Date: Thu, 24 Apr 2025 11:39:04 -0600 Subject: [PATCH 5/9] error handling --- tests/api/controller.py | 18 ++++++++++-------- 1 file changed, 10 insertions(+), 8 deletions(-) diff --git a/tests/api/controller.py b/tests/api/controller.py index 2dc1a7676..00d43a766 100644 --- a/tests/api/controller.py +++ b/tests/api/controller.py @@ -26,10 +26,6 @@ class ApiController: request_body = env['wsgi.input'].read(request_body_size) request_json = json.loads(request_body.decode('utf-8')) prompt = request_json.get('prompt') - - # for now, just reading request and echoing back in response - # data = json.loads(prompt) - # response_body = json.dumps(data).encode('utf-8') data = self.get_service_response(prompt) response_body = json.dumps(data).encode('utf-8') @@ -51,7 +47,13 @@ class ApiController: try: handler = self.routes.get((method,path), self.__http_200_ok) return handler(env, start_response) - except json.JSONDecodeError: - start_response('400 Bad Request', self.response_headers) - return [json.dumps({'error': 'Invalid JSON'}).encode('utf-8')] - + except json.JSONDecodeError as e: + response_body = e.msg.encode('utf-8') + response_headers = [('Content-Type', 'text/plain'), ('Content-Length', str(len(response_body)))] + start_response('400 Bad Request', response_headers) + return [response_body] + except Exception as e: + response_body = e.msg.encode('utf-8') + response_headers = [('Content-Type', 'text/plain'), ('Content-Length', str(len(response_body)))] + start_response('500 Internal Server Error', response_headers) + return [response_body] From 37c84617b1036c6fe56c40ea369dcb58f6ec2921 Mon Sep 17 00:00:00 2001 From: Adam Wilson Date: Thu, 24 Apr 2025 11:44:46 -0600 Subject: [PATCH 6/9] error handling - take 2 --- .github/workflows/llmsecops-cicd.yml | 10 +++++----- tests/api/controller.py | 17 +++++++++++++---- 2 files changed, 18 insertions(+), 9 deletions(-) diff --git a/.github/workflows/llmsecops-cicd.yml b/.github/workflows/llmsecops-cicd.yml index d6a27c867..93caa8d8e 100644 --- a/.github/workflows/llmsecops-cicd.yml +++ b/.github/workflows/llmsecops-cicd.yml @@ -37,8 +37,8 @@ jobs: - name: Run REST API server run: | # python ${{ github.workspace }}/tests/api/server.py & - python -m tests.api.server & - - - name: Test API call - run: | - curl -X POST -i localhost:9999 -d '{ "prompt": "describe a random planet in our solar system in 10 words or less" }' \ No newline at end of file + nohup python -m tests.api.server > server.log 2>&1 & + sleep 2 + curl -X POST -i localhost:9999 -d '{ "prompt": "describe a random planet in our solar system in 10 words or less" }' || true + echo "--- SERVER LOG ---" + cat server.log \ No newline at end of file diff --git a/tests/api/controller.py b/tests/api/controller.py index 00d43a766..60343085f 100644 --- a/tests/api/controller.py +++ b/tests/api/controller.py @@ -1,4 +1,5 @@ import json +import traceback from tests.llm.phi3_language_model import Phi3LanguageModel @@ -53,7 +54,15 @@ class ApiController: start_response('400 Bad Request', response_headers) return [response_body] except Exception as e: - response_body = e.msg.encode('utf-8') - response_headers = [('Content-Type', 'text/plain'), ('Content-Length', str(len(response_body)))] - start_response('500 Internal Server Error', response_headers) - return [response_body] + # response_body = e.msg.encode('utf-8') + # response_headers = [('Content-Type', 'text/plain'), ('Content-Length', str(len(response_body)))] + # start_response('500 Internal Server Error', response_headers) + # return [response_body] + + # Log to stdout so it shows in GitHub Actions + print("Exception occurred:") + traceback.print_exc() + + # Return more detailed error response + start_response('500 Internal Server Error', [('Content-Type', 'text/plain')]) + return [f"Internal Server Error:\n{e}\n".encode()] From d3d6d96484a6e08cf27af451ee7eb7a1249aa51c Mon Sep 17 00:00:00 2001 From: Adam Wilson Date: Thu, 24 Apr 2025 11:48:16 -0600 Subject: [PATCH 7/9] error handling - troubleshooting --- .github/workflows/llmsecops-cicd.yml | 1 + 1 file changed, 1 insertion(+) diff --git a/.github/workflows/llmsecops-cicd.yml b/.github/workflows/llmsecops-cicd.yml index 93caa8d8e..c45efd005 100644 --- a/.github/workflows/llmsecops-cicd.yml +++ b/.github/workflows/llmsecops-cicd.yml @@ -37,6 +37,7 @@ jobs: - name: Run REST API server run: | # python ${{ github.workspace }}/tests/api/server.py & + ls -al nohup python -m tests.api.server > server.log 2>&1 & sleep 2 curl -X POST -i localhost:9999 -d '{ "prompt": "describe a random planet in our solar system in 10 words or less" }' || true From 597078b8c58b853361a882c961c2000f623964a6 Mon Sep 17 00:00:00 2001 From: Adam Wilson Date: Thu, 24 Apr 2025 11:50:23 -0600 Subject: [PATCH 8/9] error handling - troubleshooting --- .github/workflows/llmsecops-cicd.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/llmsecops-cicd.yml b/.github/workflows/llmsecops-cicd.yml index c45efd005..020026e1d 100644 --- a/.github/workflows/llmsecops-cicd.yml +++ b/.github/workflows/llmsecops-cicd.yml @@ -25,7 +25,7 @@ jobs: - name: Set up HuggingFace LLM run: | pip install huggingface-hub[cli] - huggingface-cli download microsoft/Phi-3-mini-4k-instruct-onnx --include cpu_and_mobile/cpu-int4-rtn-block-32-acc-level-4/* --local-dir . + huggingface-cli download microsoft/Phi-3-mini-4k-instruct-onnx --include cpu_and_mobile/cpu-int4-rtn-block-32-acc-level-4/* --local-dir ${{ github.workspace }}/tests/llm pip install onnxruntime-genai # curl https://raw.githubusercontent.com/microsoft/onnxruntime-genai/main/examples/python/phi3-qa.py -o phi3-qa.py From ee37c53a8dcf7db496f2f37f1566e5427612004b Mon Sep 17 00:00:00 2001 From: Adam Wilson Date: Thu, 24 Apr 2025 11:54:30 -0600 Subject: [PATCH 9/9] clean up workflow --- .github/workflows/llmsecops-cicd.yml | 19 +++++-------------- 1 file changed, 5 insertions(+), 14 deletions(-) diff --git a/.github/workflows/llmsecops-cicd.yml b/.github/workflows/llmsecops-cicd.yml index 020026e1d..750a73b02 100644 --- a/.github/workflows/llmsecops-cicd.yml +++ b/.github/workflows/llmsecops-cicd.yml @@ -1,4 +1,4 @@ -name: LLM Prompt Testing +name: 'LLM Prompt Testing' on: # push: @@ -14,32 +14,23 @@ jobs: steps: - uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683 - - name: Set up git LFS + - name: 'set up git LFS' run: git lfs install - - name: Set up Python + - name: 'set up Python' uses: actions/setup-python@v3 with: python-version: '3.12' - - name: Set up HuggingFace LLM + - name: 'set up Microsoft Phi-3 Mini 4k LLM from HuggingFace' run: | pip install huggingface-hub[cli] huggingface-cli download microsoft/Phi-3-mini-4k-instruct-onnx --include cpu_and_mobile/cpu-int4-rtn-block-32-acc-level-4/* --local-dir ${{ github.workspace }}/tests/llm pip install onnxruntime-genai - # curl https://raw.githubusercontent.com/microsoft/onnxruntime-genai/main/examples/python/phi3-qa.py -o phi3-qa.py - # python ./tests/llm/phi3_language_model.py --prompt 'Describe the principle of existence, from the first principles of philosophy.' -m cpu_and_mobile/cpu-int4-rtn-block-32-acc-level-4 - # python ${{ github.workspace }}/tests/llm/phi3_language_model.py \ - # --prompt 'Describe the principle of existence, from the first principles of philosophy.' \ - # -m cpu_and_mobile/cpu-int4-rtn-block-32-acc-level-4 - - - name: Run REST API server + - name: 'run HTTP server and call REST API' run: | - # python ${{ github.workspace }}/tests/api/server.py & - ls -al nohup python -m tests.api.server > server.log 2>&1 & sleep 2 curl -X POST -i localhost:9999 -d '{ "prompt": "describe a random planet in our solar system in 10 words or less" }' || true - echo "--- SERVER LOG ---" cat server.log \ No newline at end of file