mirror of
https://github.com/lightbroker/llmsecops-research.git
synced 2026-07-01 10:55:32 +02:00
Merge pull request #4 from lightbroker/init
support API POST with LLM service layer response
This commit is contained in:
@@ -1,4 +1,4 @@
|
||||
name: LLM Prompt Testing
|
||||
name: 'LLM Prompt Testing'
|
||||
|
||||
on:
|
||||
# push:
|
||||
@@ -14,28 +14,23 @@ jobs:
|
||||
steps:
|
||||
- uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683
|
||||
|
||||
- name: Set up git LFS
|
||||
- name: 'set up git LFS'
|
||||
run: git lfs install
|
||||
|
||||
- name: Set up Python
|
||||
- name: 'set up Python'
|
||||
uses: actions/setup-python@v3
|
||||
with:
|
||||
python-version: '3.12'
|
||||
|
||||
- name: Set up HuggingFace LLM
|
||||
- name: 'set up Microsoft Phi-3 Mini 4k LLM from HuggingFace'
|
||||
run: |
|
||||
pip install huggingface-hub[cli]
|
||||
huggingface-cli download microsoft/Phi-3-mini-4k-instruct-onnx --include cpu_and_mobile/cpu-int4-rtn-block-32-acc-level-4/* --local-dir .
|
||||
huggingface-cli download microsoft/Phi-3-mini-4k-instruct-onnx --include cpu_and_mobile/cpu-int4-rtn-block-32-acc-level-4/* --local-dir ${{ github.workspace }}/tests/llm
|
||||
pip install onnxruntime-genai
|
||||
# curl https://raw.githubusercontent.com/microsoft/onnxruntime-genai/main/examples/python/phi3-qa.py -o phi3-qa.py
|
||||
python ${{ github.workspace }}/tests/llm/phi3_language_model.py \
|
||||
--prompt 'Describe the principle of existence, from the first principles of philosophy.' \
|
||||
-m cpu_and_mobile/cpu-int4-rtn-block-32-acc-level-4 \
|
||||
|
||||
- name: Run REST API server
|
||||
- name: 'run HTTP server and call REST API'
|
||||
run: |
|
||||
python ${{ github.workspace }}/tests/api/server.py &
|
||||
|
||||
- name: Test API call
|
||||
run: |
|
||||
curl -i localhost:9999
|
||||
nohup python -m tests.api.server > server.log 2>&1 &
|
||||
sleep 2
|
||||
curl -X POST -i localhost:9999 -d '{ "prompt": "describe a random planet in our solar system in 10 words or less" }' || true
|
||||
cat server.log
|
||||
@@ -1,50 +0,0 @@
|
||||
import cgi
|
||||
import json
|
||||
|
||||
|
||||
class PathDispatcher:
|
||||
def __init__(self):
|
||||
self.routes = {}
|
||||
|
||||
|
||||
def __http_415_notsupported(self, env, start_response):
|
||||
start_response('415 Unsupported Media Type', self.response_headers)
|
||||
return [json.dumps({'error': 'Unsupported Content-Type'}).encode('utf-8')]
|
||||
|
||||
|
||||
def __http_200_ok(self, env, start_response):
|
||||
try:
|
||||
request_body_size = int(env.get('CONTENT_LENGTH', 0))
|
||||
except (ValueError):
|
||||
request_body_size = 0
|
||||
|
||||
request_body = env['wsgi.input'].read(request_body_size)
|
||||
request_body = request_body.decode('utf-8')
|
||||
|
||||
# for now, just reading request and echoing back in response
|
||||
data = json.loads(request_body)
|
||||
response_body = json.dumps(data).encode('utf-8')
|
||||
|
||||
response_headers = [('Content-Type', 'application/json'), ('Content-Length', str(len(response_body)))]
|
||||
start_response('200 OK', response_headers)
|
||||
return [response_body]
|
||||
|
||||
|
||||
def __call__(self, env, start_response):
|
||||
method = env.get('REQUEST_METHOD').upper()
|
||||
path = env.get('PATH_INFO')
|
||||
|
||||
if not method == 'POST':
|
||||
self.__http_415_notsupported(env, start_response)
|
||||
|
||||
try:
|
||||
handler = self.routes.get((method,path), self.__http_200_ok)
|
||||
return handler(env, start_response)
|
||||
except json.JSONDecodeError:
|
||||
start_response('400 Bad Request', self.response_headers)
|
||||
return [json.dumps({'error': 'Invalid JSON'}).encode('utf-8')]
|
||||
|
||||
|
||||
def register(self, method, path, function):
|
||||
self.routes[method.lower(), path] = function
|
||||
return function
|
||||
@@ -0,0 +1,68 @@
|
||||
import json
|
||||
import traceback
|
||||
|
||||
from tests.llm.phi3_language_model import Phi3LanguageModel
|
||||
|
||||
|
||||
class ApiController:
|
||||
def __init__(self):
|
||||
self.routes = {}
|
||||
|
||||
|
||||
def __http_415_notsupported(self, env, start_response):
|
||||
start_response('415 Unsupported Media Type', self.response_headers)
|
||||
return [json.dumps({'error': 'Unsupported Content-Type'}).encode('utf-8')]
|
||||
|
||||
def get_service_response(self, prompt):
|
||||
service = Phi3LanguageModel()
|
||||
response = service.get_response(prompt_input=prompt)
|
||||
return response
|
||||
|
||||
def __http_200_ok(self, env, start_response):
|
||||
try:
|
||||
request_body_size = int(env.get('CONTENT_LENGTH', 0))
|
||||
except (ValueError):
|
||||
request_body_size = 0
|
||||
|
||||
request_body = env['wsgi.input'].read(request_body_size)
|
||||
request_json = json.loads(request_body.decode('utf-8'))
|
||||
prompt = request_json.get('prompt')
|
||||
|
||||
data = self.get_service_response(prompt)
|
||||
response_body = json.dumps(data).encode('utf-8')
|
||||
|
||||
response_headers = [('Content-Type', 'application/json'), ('Content-Length', str(len(response_body)))]
|
||||
start_response('200 OK', response_headers)
|
||||
return [response_body]
|
||||
|
||||
|
||||
def __call__(self, env, start_response):
|
||||
method = env.get('REQUEST_METHOD').upper()
|
||||
path = env.get('PATH_INFO')
|
||||
|
||||
# TODO: register route for POST /api/conversations
|
||||
|
||||
if not method == 'POST':
|
||||
self.__http_415_notsupported(env, start_response)
|
||||
|
||||
try:
|
||||
handler = self.routes.get((method,path), self.__http_200_ok)
|
||||
return handler(env, start_response)
|
||||
except json.JSONDecodeError as e:
|
||||
response_body = e.msg.encode('utf-8')
|
||||
response_headers = [('Content-Type', 'text/plain'), ('Content-Length', str(len(response_body)))]
|
||||
start_response('400 Bad Request', response_headers)
|
||||
return [response_body]
|
||||
except Exception as e:
|
||||
# response_body = e.msg.encode('utf-8')
|
||||
# response_headers = [('Content-Type', 'text/plain'), ('Content-Length', str(len(response_body)))]
|
||||
# start_response('500 Internal Server Error', response_headers)
|
||||
# return [response_body]
|
||||
|
||||
# Log to stdout so it shows in GitHub Actions
|
||||
print("Exception occurred:")
|
||||
traceback.print_exc()
|
||||
|
||||
# Return more detailed error response
|
||||
start_response('500 Internal Server Error', [('Content-Type', 'text/plain')])
|
||||
return [f"Internal Server Error:\n{e}\n".encode()]
|
||||
+3
-4
@@ -1,6 +1,6 @@
|
||||
import json
|
||||
|
||||
from PathDispatcher import PathDispatcher
|
||||
from tests.api.controller import ApiController
|
||||
from wsgiref.simple_server import make_server
|
||||
|
||||
|
||||
@@ -14,9 +14,8 @@ class RestApiServer:
|
||||
|
||||
def listen(self):
|
||||
port = 9999
|
||||
dispatcher = PathDispatcher()
|
||||
dispatcher.register('POST', '/', self.post_response)
|
||||
with make_server('', port, dispatcher) as wsgi_srv:
|
||||
controller = ApiController()
|
||||
with make_server('', port, controller) as wsgi_srv:
|
||||
print(f'listening on port {port}...')
|
||||
wsgi_srv.serve_forever()
|
||||
|
||||
|
||||
@@ -1,13 +1,16 @@
|
||||
# TODO: business logic for REST API interaction w/ LLM via prompt input
|
||||
|
||||
import onnxruntime_genai as og
|
||||
import argparse
|
||||
import onnxruntime_genai as og
|
||||
import os
|
||||
|
||||
|
||||
class Phi3LanguageModel:
|
||||
|
||||
def __init__(self, model_path):
|
||||
def __init__(self, model_path=None):
|
||||
# configure ONNX runtime
|
||||
base_dir = os.path.dirname(os.path.abspath(__file__))
|
||||
model_path = os.path.join(base_dir, "cpu_and_mobile", "cpu-int4-rtn-block-32-acc-level-4")
|
||||
config = og.Config(model_path)
|
||||
config.clear_providers()
|
||||
self.model = og.Model(config)
|
||||
@@ -17,7 +20,7 @@ class Phi3LanguageModel:
|
||||
|
||||
def get_response(self, prompt_input):
|
||||
|
||||
search_options = { 'max_length': 2048 }
|
||||
search_options = { 'max_length': 1024 }
|
||||
params = og.GeneratorParams(self.model)
|
||||
params.set_search_options(**search_options)
|
||||
generator = og.Generator(self.model, params)
|
||||
@@ -28,20 +31,22 @@ class Phi3LanguageModel:
|
||||
input_tokens = self.tokenizer.encode(prompt)
|
||||
generator.append_tokens(input_tokens)
|
||||
|
||||
print("Output: ", end='', flush=True)
|
||||
|
||||
# generate output
|
||||
output = ''
|
||||
try:
|
||||
while not generator.is_done():
|
||||
generator.generate_next_token()
|
||||
new_token = generator.get_next_tokens()[0]
|
||||
print(self.tokenizer_stream.decode(new_token), end='', flush=True)
|
||||
decoded = self.tokenizer_stream.decode(new_token)
|
||||
output = output + decoded
|
||||
except Exception as e:
|
||||
print(f'{e}')
|
||||
return f'{e}'
|
||||
return { 'response': output }
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
parser = argparse.ArgumentParser(argument_default=argparse.SUPPRESS, description="End-to-end AI Question/Answer example for gen-ai")
|
||||
parser.add_argument('-m', '--model_path', type=str, required=True, help='Onnx model folder path (must contain genai_config.json and model.onnx)')
|
||||
parser.add_argument('-m', '--model_path', type=str, required=False, help='Onnx model folder path (must contain genai_config.json and model.onnx)')
|
||||
parser.add_argument('-p', '--prompt', type=str, required=True, help='Prompt input')
|
||||
parser.add_argument('-i', '--min_length', type=int, help='Min number of tokens to generate including the prompt')
|
||||
parser.add_argument('-l', '--max_length', type=int, help='Max number of tokens to generate including the prompt')
|
||||
@@ -52,5 +57,10 @@ if __name__ == "__main__":
|
||||
parser.add_argument('--repetition_penalty', type=float, help='Repetition penalty to sample with')
|
||||
args = parser.parse_args()
|
||||
|
||||
model = Phi3LanguageModel(args.model_path)
|
||||
try:
|
||||
model_path = args.model_path
|
||||
except:
|
||||
model_path = None
|
||||
|
||||
model = Phi3LanguageModel(model_path)
|
||||
model.get_response(args.prompt)
|
||||
|
||||
Reference in New Issue
Block a user