Flask API

This commit is contained in:
Adam Wilson
2025-05-18 14:48:59 -06:00
parent e5c47ac8fb
commit 8679f9ad8d
19 changed files with 34 additions and 15 deletions
+2 -2
View File
@@ -33,9 +33,9 @@ jobs:
- name: 'run HTTP server and call REST API'
run: |
python -m tests.api.server
python -m src.api.controller
sleep 2
curl -X POST -i localhost:9999/api/conversations -d '{ "prompt": "describe a random planet in our solar system in 10 words or less" }' || exit 1
curl -X POST -i localhost:9998/api/conversations -d '{ "prompt": "describe a random planet in our solar system in 10 words or less" }' -H "Content-Type: application/json" || exit 1
echo
garak -v \
+10 -10
View File
@@ -175,13 +175,13 @@ cython_debug/
# HuggingFace / Microsoft LLM supporting files
# (these are downloaded for local development via bash script, or inside GH Action workflow context)
tests/llm/cpu_and_mobile/cpu-int4-rtn-block-32-acc-level-4/added_tokens.json
tests/llm/cpu_and_mobile/cpu-int4-rtn-block-32-acc-level-4/config.json
tests/llm/cpu_and_mobile/cpu-int4-rtn-block-32-acc-level-4/configuration_phi3.py
tests/llm/cpu_and_mobile/cpu-int4-rtn-block-32-acc-level-4/genai_config.json
tests/llm/cpu_and_mobile/cpu-int4-rtn-block-32-acc-level-4/phi3-mini-4k-instruct-cpu-int4-rtn-block-32-acc-level-4.onnx
tests/llm/cpu_and_mobile/cpu-int4-rtn-block-32-acc-level-4/phi3-mini-4k-instruct-cpu-int4-rtn-block-32-acc-level-4.onnx.data
tests/llm/cpu_and_mobile/cpu-int4-rtn-block-32-acc-level-4/special_tokens_map.json
tests/llm/cpu_and_mobile/cpu-int4-rtn-block-32-acc-level-4/tokenizer_config.json
tests/llm/cpu_and_mobile/cpu-int4-rtn-block-32-acc-level-4/tokenizer.json
tests/llm/cpu_and_mobile/cpu-int4-rtn-block-32-acc-level-4/tokenizer.model
src/llm/cpu_and_mobile/cpu-int4-rtn-block-32-acc-level-4/added_tokens.json
src/llm/cpu_and_mobile/cpu-int4-rtn-block-32-acc-level-4/config.json
src/llm/cpu_and_mobile/cpu-int4-rtn-block-32-acc-level-4/configuration_phi3.py
src/llm/cpu_and_mobile/cpu-int4-rtn-block-32-acc-level-4/genai_config.json
src/llm/cpu_and_mobile/cpu-int4-rtn-block-32-acc-level-4/phi3-mini-4k-instruct-cpu-int4-rtn-block-32-acc-level-4.onnx
src/llm/cpu_and_mobile/cpu-int4-rtn-block-32-acc-level-4/phi3-mini-4k-instruct-cpu-int4-rtn-block-32-acc-level-4.onnx.data
src/llm/cpu_and_mobile/cpu-int4-rtn-block-32-acc-level-4/special_tokens_map.json
src/llm/cpu_and_mobile/cpu-int4-rtn-block-32-acc-level-4/tokenizer_config.json
src/llm/cpu_and_mobile/cpu-int4-rtn-block-32-acc-level-4/tokenizer.json
src/llm/cpu_and_mobile/cpu-int4-rtn-block-32-acc-level-4/tokenizer.model
@@ -1,8 +1,8 @@
import json
import traceback
from tests.llm.llm import Phi3LanguageModel
from tests.llm.llm_rag import Phi3LanguageModelWithRag
from src.llm.llm import Phi3LanguageModel
from src.llm.llm_rag import Phi3LanguageModelWithRag
class ApiController:
def __init__(self):
+19
View File
@@ -0,0 +1,19 @@
import logging
from flask import Flask, jsonify, request
from src.llm.llm import Phi3LanguageModel
from src.llm.llm_rag import Phi3LanguageModelWithRag
app = Flask(__name__)
@app.route('/api/conversations', methods=['POST'])
def get_llm_response():
prompt = request.json['prompt']
service = Phi3LanguageModel()
response = service.invoke(user_input=prompt)
return jsonify({'response': response}), 201
if __name__ == '__main__':
logger = logging.Logger(name='Flask API', level=logging.DEBUG)
print('test')
logger.debug('running...')
app.run(debug=True, port=9998)
+1 -1
View File
@@ -32,7 +32,7 @@ class Phi3LanguageModel:
return text
def invoke(self, user_input):
def invoke(self, user_input: str) -> str:
# Set up paths to the local model
base_dir = os.path.dirname(os.path.abspath(__file__))
model_path = os.path.join(base_dir, "cpu_and_mobile", "cpu-int4-rtn-block-32-acc-level-4")
View File
View File