diff --git a/.github/workflows/run_server.sh b/.github/workflows/run_server.sh deleted file mode 100755 index 1f7bb00f4..000000000 --- a/.github/workflows/run_server.sh +++ /dev/null @@ -1,48 +0,0 @@ -#!/bin/bash - -# Get the directory of the script -SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)" - -# Navigate to the project root (2 levels up from .github/workflows) -PROJECT_ROOT="$(cd "$SCRIPT_DIR/../.." && pwd)" - -# Move to the project root -cd "$PROJECT_ROOT" - -# Start Flask server in the background -python -m src.api.controller & -SERVER_PID=$! - -# Function to check if server is up -wait_for_server() { - echo "Waiting for Flask server to start..." - local max_attempts=100 - local attempt=0 - - while [ $attempt -lt $max_attempts ]; do - if curl -s http://localhost:9998/ > /dev/null 2>&1; then - echo "Server is up!" - return 0 - fi - - attempt=$((attempt + 1)) - echo "Attempt $attempt/$max_attempts - Server not ready yet, waiting..." - sleep 1 - done - - echo "Server failed to start after $max_attempts attempts" - kill $SERVER_PID - return 1 -} - -# Wait for server to be ready -wait_for_server || exit 1 - -# Make the actual request once server is ready -echo "Making API request..." -curl -X POST -i http://localhost:9998/api/conversations \ - -d '{ "prompt": "describe a random planet in our solar system in 10 words or less" }' \ - -H "Content-Type: application/json" || exit 1 -echo - -exit 0 \ No newline at end of file diff --git a/src/api/controller.flask.py b/src/api/controller.flask.py deleted file mode 100644 index 3ff759964..000000000 --- a/src/api/controller.flask.py +++ /dev/null @@ -1,26 +0,0 @@ -import logging -from flask import Flask, jsonify, request -from waitress import serve -from src.llm.llm import Phi3LanguageModel -from src.llm.llm_rag import Phi3LanguageModelWithRag - -app = Flask(__name__) - -@app.route('/', methods=['GET']) -def health_check(): - return f"Server is running\n", 200 - -@app.route('/api/conversations', methods=['POST']) -def get_llm_response(): - prompt = request.json['prompt'] - service = Phi3LanguageModel() - response = service.invoke(user_input=prompt) - return jsonify({'response': response}), 201 - -if __name__ == '__main__': - logger = logging.Logger(name='Flask API', level=logging.DEBUG) - print('test') - logger.debug('running...') - - # TODO set up port # as env var - serve(app, host='0.0.0.0', port=9999) \ No newline at end of file diff --git a/src/api/controller.py b/src/api/controller.py index e0723ebc2..9f17159b6 100644 --- a/src/api/controller.py +++ b/src/api/controller.py @@ -1,5 +1,4 @@ import json -import time import traceback diff --git a/src/api/http_api.py b/src/api/http_api.py index ed0366878..5f0225bc3 100644 --- a/src/api/http_api.py +++ b/src/api/http_api.py @@ -1,35 +1,35 @@ -""" - Usage: - $ uvicorn src.api.http_api:app --host 0.0.0.0 --port 9999 -""" +# """ +# Usage: +# $ uvicorn src.api.http_api:app --host 0.0.0.0 --port 9999 +# """ -from fastapi import FastAPI -from pathlib import Path -from pydantic import BaseModel -from src.llm.llm import Phi3LanguageModel +# from fastapi import FastAPI +# from pathlib import Path +# from pydantic import BaseModel +# from src.llm.llm import Phi3LanguageModel -STATIC_PATH = Path(__file__).parent.absolute() / 'static' +# STATIC_PATH = Path(__file__).parent.absolute() / 'static' -app = FastAPI( - title='Phi-3 Language Model API', - description='HTTP API for interacting with Phi-3 Mini 4K language model' -) +# app = FastAPI( +# title='Phi-3 Language Model API', +# description='HTTP API for interacting with Phi-3 Mini 4K language model' +# ) -class LanguageModelPrompt(BaseModel): - prompt: str +# class LanguageModelPrompt(BaseModel): +# prompt: str -class LanguageModelResponse(BaseModel): - response: str +# class LanguageModelResponse(BaseModel): +# response: str -@app.get('/', response_model=str) -async def health_check(): - return 'success' +# @app.get('/', response_model=str) +# async def health_check(): +# return 'success' -@app.post('/api/conversations', response_model=LanguageModelResponse) -async def get_llm_conversation_response(request: LanguageModelPrompt): - service = Phi3LanguageModel() - response = service.invoke(user_input=request.prompt) - return LanguageModelResponse(response=response) +# @app.post('/api/conversations', response_model=LanguageModelResponse) +# async def get_llm_conversation_response(request: LanguageModelPrompt): +# service = Phi3LanguageModel() +# response = service.invoke(user_input=request.prompt) +# return LanguageModelResponse(response=response) diff --git a/src/llm/llm.py b/src/llm/llm.py index 8c737cef7..d68b53d73 100644 --- a/src/llm/llm.py +++ b/src/llm/llm.py @@ -5,16 +5,10 @@ RAG implementation with local Phi-3-mini-4k-instruct-onnx and embeddings import logging import os import sys -from typing import List # LangChain imports from langchain_huggingface import HuggingFacePipeline -from langchain_huggingface import HuggingFaceEmbeddings -from langchain.text_splitter import RecursiveCharacterTextSplitter -from langchain_community.vectorstores import FAISS -from langchain.chains import LLMChain from langchain.prompts import PromptTemplate -from langchain.schema import Document from langchain_core.output_parsers import StrOutputParser from langchain_core.runnables import RunnablePassthrough