From 48c0abaae20a12f0f4d78c5514bb483906c58583 Mon Sep 17 00:00:00 2001 From: Adam Wilson Date: Mon, 19 May 2025 12:42:43 -0600 Subject: [PATCH] try to fix model path bug --- src/llm/llm.py | 134 ++++++++++++++++++++----------------------------- 1 file changed, 54 insertions(+), 80 deletions(-) diff --git a/src/llm/llm.py b/src/llm/llm.py index 58cc2ffa8..d47f767a5 100644 --- a/src/llm/llm.py +++ b/src/llm/llm.py @@ -56,86 +56,60 @@ class Phi3LanguageModel: except Exception as e: print(f"ERROR: Unexpected error when listing directory: {str(e)}") + # Load the tokenizer and model + tokenizer = AutoTokenizer.from_pretrained( + pretrained_model_name_or_path=model_path, + trust_remote_code=True, + local_files_only=True # Add this line + ) + model = ORTModelForCausalLM.from_pretrained( + model_path, # Change model_id to just model_path + provider="CPUExecutionProvider", + trust_remote_code=True, + local_files_only=True # Add this line + ) + model.name_or_path = model_path + + # Create the text generation pipeline + pipe = pipeline( + "text-generation", + model=model, + tokenizer=tokenizer, + max_new_tokens=512, + temperature=0.7, + top_p=0.9, + repetition_penalty=1.1, + do_sample=True + ) + + # Create the LangChain LLM + llm = HuggingFacePipeline(pipeline=pipe) + + # Phi-3 specific prompt template + template = """<|user|> + Act as a conversational agent to respond to the end user's prompt. + Question: {question} + <|assistant|> + """ + + prompt = PromptTemplate.from_template(template) - - # def invoke(self, user_input: str) -> str: - # # Set up paths to the local model - # base_dir = os.path.dirname(os.path.abspath(__file__)) - # model_path = os.path.join(base_dir, "cpu_and_mobile", "cpu-int4-rtn-block-32-acc-level-4") - # print(f"Loading Phi-3 model from: {model_path}") - - # # List and print the contents of the model_path directory - # print(f"Listing contents of model directory: {model_path}") - # try: - # files = os.listdir(model_path) - # for i, file in enumerate(files): - # file_path = os.path.join(model_path, file) - # file_size = os.path.getsize(file_path) - # is_dir = os.path.isdir(file_path) - # file_type = "dir" if is_dir else "file" - # print(f"{i+1:2d}. {file:50s} [{file_type}] {file_size:,} bytes") - # print(f"Total: {len(files)} items found") - # except FileNotFoundError: - # print(f"ERROR: Directory {model_path} not found!") - # except PermissionError: - # print(f"ERROR: Permission denied when accessing {model_path}") - # except Exception as e: - # print(f"ERROR: Unexpected error when listing directory: {str(e)}") - - # # Load the tokenizer and model - # tokenizer = AutoTokenizer.from_pretrained( - # pretrained_model_name_or_path=model_path, - # trust_remote_code=True, - # local_files_only=True # Add this line - # ) - # model = ORTModelForCausalLM.from_pretrained( - # model_path, # Change model_id to just model_path - # provider="CPUExecutionProvider", - # trust_remote_code=True, - # local_files_only=True # Add this line - # ) - # model.name_or_path = model_path - - # # Create the text generation pipeline - # pipe = pipeline( - # "text-generation", - # model=model, - # tokenizer=tokenizer, - # max_new_tokens=512, - # temperature=0.7, - # top_p=0.9, - # repetition_penalty=1.1, - # do_sample=True - # ) - - # # Create the LangChain LLM - # llm = HuggingFacePipeline(pipeline=pipe) - - # # Phi-3 specific prompt template - # template = """<|user|> - # Act as a conversational agent to respond to the end user's prompt. - # Question: {question} - # <|assistant|> - # """ - - # prompt = PromptTemplate.from_template(template) + # Create a chain using LCEL + chain = ( + {"question": RunnablePassthrough()} + | prompt + | llm + | StrOutputParser() + | self.extract_assistant_response + ) - # # Create a chain using LCEL - # chain = ( - # {"question": RunnablePassthrough()} - # | prompt - # | llm - # | StrOutputParser() - # | self.extract_assistant_response - # ) - - # try: - # # Get response from the chain - # response = chain.invoke(user_input) - # # Print the answer - # print(response) - # return response - # except Exception as e: - # print(f"Failed: {e}") - # return e + try: + # Get response from the chain + response = chain.invoke(user_input) + # Print the answer + print(response) + return response + except Exception as e: + print(f"Failed: {e}") + return e