From 48c0abaae20a12f0f4d78c5514bb483906c58583 Mon Sep 17 00:00:00 2001
From: Adam Wilson <lightbrok3r@gmail.com>
Date: Mon, 19 May 2025 12:42:43 -0600
Subject: [PATCH] try to fix model path bug

---
 src/llm/llm.py | 134 ++++++++++++++++++++-----------------------------
 1 file changed, 54 insertions(+), 80 deletions(-)

diff --git a/src/llm/llm.py b/src/llm/llm.py
index 58cc2ffa8..d47f767a5 100644
--- a/src/llm/llm.py
+++ b/src/llm/llm.py
@@ -56,86 +56,60 @@ class Phi3LanguageModel:
         except Exception as e:
             print(f"ERROR: Unexpected error when listing directory: {str(e)}")
 
+        # Load the tokenizer and model
+        tokenizer = AutoTokenizer.from_pretrained(
+            pretrained_model_name_or_path=model_path,
+            trust_remote_code=True,
+            local_files_only=True  # Add this line
+        )
+        model = ORTModelForCausalLM.from_pretrained(
+            model_path,  # Change model_id to just model_path
+            provider="CPUExecutionProvider",
+            trust_remote_code=True,
+            local_files_only=True  # Add this line
+        )
+        model.name_or_path = model_path
+
+        # Create the text generation pipeline
+        pipe = pipeline(
+            "text-generation",
+            model=model,
+            tokenizer=tokenizer,
+            max_new_tokens=512,
+            temperature=0.7,
+            top_p=0.9,
+            repetition_penalty=1.1,
+            do_sample=True
+        )
+
+        # Create the LangChain LLM
+        llm = HuggingFacePipeline(pipeline=pipe)
+
+        # Phi-3 specific prompt template
+        template = """<|user|>
+        Act as a conversational agent to respond to the end user's prompt. 
+        Question: {question}
+        <|assistant|>
+        """
+
+        prompt = PromptTemplate.from_template(template)
         
-
-    # def invoke(self, user_input: str) -> str:
-    #     # Set up paths to the local model
-    #     base_dir = os.path.dirname(os.path.abspath(__file__))
-    #     model_path = os.path.join(base_dir, "cpu_and_mobile", "cpu-int4-rtn-block-32-acc-level-4")
-    #     print(f"Loading Phi-3 model from: {model_path}")
-
-    #     # List and print the contents of the model_path directory
-    #     print(f"Listing contents of model directory: {model_path}")
-    #     try:
-    #         files = os.listdir(model_path)
-    #         for i, file in enumerate(files):
-    #             file_path = os.path.join(model_path, file)
-    #             file_size = os.path.getsize(file_path)
-    #             is_dir = os.path.isdir(file_path)
-    #             file_type = "dir" if is_dir else "file"
-    #             print(f"{i+1:2d}. {file:50s} [{file_type}] {file_size:,} bytes")
-    #         print(f"Total: {len(files)} items found")
-    #     except FileNotFoundError:
-    #         print(f"ERROR: Directory {model_path} not found!")
-    #     except PermissionError:
-    #         print(f"ERROR: Permission denied when accessing {model_path}")
-    #     except Exception as e:
-    #         print(f"ERROR: Unexpected error when listing directory: {str(e)}")
-
-    #     # Load the tokenizer and model
-    #     tokenizer = AutoTokenizer.from_pretrained(
-    #         pretrained_model_name_or_path=model_path,
-    #         trust_remote_code=True,
-    #         local_files_only=True  # Add this line
-    #     )
-    #     model = ORTModelForCausalLM.from_pretrained(
-    #         model_path,  # Change model_id to just model_path
-    #         provider="CPUExecutionProvider",
-    #         trust_remote_code=True,
-    #         local_files_only=True  # Add this line
-    #     )
-    #     model.name_or_path = model_path
-
-    #     # Create the text generation pipeline
-    #     pipe = pipeline(
-    #         "text-generation",
-    #         model=model,
-    #         tokenizer=tokenizer,
-    #         max_new_tokens=512,
-    #         temperature=0.7,
-    #         top_p=0.9,
-    #         repetition_penalty=1.1,
-    #         do_sample=True
-    #     )
-
-    #     # Create the LangChain LLM
-    #     llm = HuggingFacePipeline(pipeline=pipe)
-
-    #     # Phi-3 specific prompt template
-    #     template = """<|user|>
-    #     Act as a conversational agent to respond to the end user's prompt. 
-    #     Question: {question}
-    #     <|assistant|>
-    #     """
-
-    #     prompt = PromptTemplate.from_template(template)
+        # Create a chain using LCEL
+        chain = (
+            {"question": RunnablePassthrough()}
+            | prompt
+            | llm
+            | StrOutputParser()
+            | self.extract_assistant_response
+        )
         
-    #     # Create a chain using LCEL
-    #     chain = (
-    #         {"question": RunnablePassthrough()}
-    #         | prompt
-    #         | llm
-    #         | StrOutputParser()
-    #         | self.extract_assistant_response
-    #     )
-        
-    #     try:
-    #         # Get response from the chain
-    #         response = chain.invoke(user_input)
-    #         # Print the answer
-    #         print(response)
-    #         return response
-    #     except Exception as e:
-    #         print(f"Failed: {e}")
-    #         return e
+        try:
+            # Get response from the chain
+            response = chain.invoke(user_input)
+            # Print the answer
+            print(response)
+            return response
+        except Exception as e:
+            print(f"Failed: {e}")
+            return e