support JSON request payload passed to LLM service layer

This commit is contained in:
Adam Wilson
2025-04-23 16:09:05 -06:00
parent 264f332c10
commit 8c6f38db31
4 changed files with 47 additions and 24 deletions

View File

@@ -28,9 +28,11 @@ jobs:
huggingface-cli download microsoft/Phi-3-mini-4k-instruct-onnx --include cpu_and_mobile/cpu-int4-rtn-block-32-acc-level-4/* --local-dir .
pip install onnxruntime-genai
# curl https://raw.githubusercontent.com/microsoft/onnxruntime-genai/main/examples/python/phi3-qa.py -o phi3-qa.py
python ${{ github.workspace }}/tests/llm/phi3_language_model.py \
--prompt 'Describe the principle of existence, from the first principles of philosophy.' \
-m cpu_and_mobile/cpu-int4-rtn-block-32-acc-level-4 \
# python ./tests/llm/phi3_language_model.py --prompt 'Describe the principle of existence, from the first principles of philosophy.' -m cpu_and_mobile/cpu-int4-rtn-block-32-acc-level-4
# python ${{ github.workspace }}/tests/llm/phi3_language_model.py \
# --prompt 'Describe the principle of existence, from the first principles of philosophy.' \
# -m cpu_and_mobile/cpu-int4-rtn-block-32-acc-level-4
- name: Run REST API server
run: |
@@ -38,4 +40,4 @@ jobs:
- name: Test API call
run: |
curl -X POST -i localhost:9999 -d '{ "key":"123456789" }'
curl -X POST -i localhost:9999 -d '{ "prompt": "describe a random planet in our solar system in 10 words or less" }'

View File

@@ -1,8 +1,14 @@
import cgi
import json
import os
import sys
# Add the parent folder (or any relative path)
sys.path.append(os.path.abspath('./../llm'))
from phi3_language_model import Phi3LanguageModel
class PathDispatcher:
class ApiController:
def __init__(self):
self.routes = {}
@@ -11,6 +17,10 @@ class PathDispatcher:
start_response('415 Unsupported Media Type', self.response_headers)
return [json.dumps({'error': 'Unsupported Content-Type'}).encode('utf-8')]
def get_service_response(self, prompt):
service = Phi3LanguageModel()
response = service.get_response(prompt_input=prompt)
return response
def __http_200_ok(self, env, start_response):
try:
@@ -19,10 +29,14 @@ class PathDispatcher:
request_body_size = 0
request_body = env['wsgi.input'].read(request_body_size)
request_body = request_body.decode('utf-8')
request_json = json.loads(request_body.decode('utf-8'))
prompt = request_json.get('prompt')
# for now, just reading request and echoing back in response
data = json.loads(request_body)
# data = json.loads(prompt)
# response_body = json.dumps(data).encode('utf-8')
data = self.get_service_response(prompt)
response_body = json.dumps(data).encode('utf-8')
response_headers = [('Content-Type', 'application/json'), ('Content-Length', str(len(response_body)))]
@@ -34,6 +48,8 @@ class PathDispatcher:
method = env.get('REQUEST_METHOD').upper()
path = env.get('PATH_INFO')
# TODO: register route for POST /api/conversations
if not method == 'POST':
self.__http_415_notsupported(env, start_response)
@@ -44,7 +60,3 @@ class PathDispatcher:
start_response('400 Bad Request', self.response_headers)
return [json.dumps({'error': 'Invalid JSON'}).encode('utf-8')]
def register(self, method, path, function):
self.routes[method.lower(), path] = function
return function

View File

@@ -1,6 +1,6 @@
import json
from PathDispatcher import PathDispatcher
from controller import ApiController
from wsgiref.simple_server import make_server
@@ -14,9 +14,8 @@ class RestApiServer:
def listen(self):
port = 9999
dispatcher = PathDispatcher()
dispatcher.register('POST', '/', self.post_response)
with make_server('', port, dispatcher) as wsgi_srv:
controller = ApiController()
with make_server('', port, controller) as wsgi_srv:
print(f'listening on port {port}...')
wsgi_srv.serve_forever()

View File

@@ -4,10 +4,13 @@ import onnxruntime_genai as og
import argparse
default_model_path = './cpu_and_mobile/cpu-int4-rtn-block-32-acc-level-4'
class Phi3LanguageModel:
def __init__(self, model_path):
def __init__(self, model_path=None):
# configure ONNX runtime
model_path = default_model_path if model_path == None else model_path
config = og.Config(model_path)
config.clear_providers()
self.model = og.Model(config)
@@ -17,7 +20,7 @@ class Phi3LanguageModel:
def get_response(self, prompt_input):
search_options = { 'max_length': 2048 }
search_options = { 'max_length': 1024 }
params = og.GeneratorParams(self.model)
params.set_search_options(**search_options)
generator = og.Generator(self.model, params)
@@ -28,20 +31,22 @@ class Phi3LanguageModel:
input_tokens = self.tokenizer.encode(prompt)
generator.append_tokens(input_tokens)
print("Output: ", end='', flush=True)
# generate output
output = ''
try:
while not generator.is_done():
generator.generate_next_token()
new_token = generator.get_next_tokens()[0]
print(self.tokenizer_stream.decode(new_token), end='', flush=True)
decoded = self.tokenizer_stream.decode(new_token)
output = output + decoded
except Exception as e:
print(f'{e}')
return f'{e}'
return { 'response': output }
if __name__ == "__main__":
parser = argparse.ArgumentParser(argument_default=argparse.SUPPRESS, description="End-to-end AI Question/Answer example for gen-ai")
parser.add_argument('-m', '--model_path', type=str, required=True, help='Onnx model folder path (must contain genai_config.json and model.onnx)')
parser.add_argument('-m', '--model_path', type=str, required=False, help='Onnx model folder path (must contain genai_config.json and model.onnx)')
parser.add_argument('-p', '--prompt', type=str, required=True, help='Prompt input')
parser.add_argument('-i', '--min_length', type=int, help='Min number of tokens to generate including the prompt')
parser.add_argument('-l', '--max_length', type=int, help='Max number of tokens to generate including the prompt')
@@ -52,5 +57,10 @@ if __name__ == "__main__":
parser.add_argument('--repetition_penalty', type=float, help='Repetition penalty to sample with')
args = parser.parse_args()
model = Phi3LanguageModel(args.model_path)
try:
model_path = args.model_path
except:
model_path = None
model = Phi3LanguageModel(model_path)
model.get_response(args.prompt)