From 0c6bc7c78f5bf88b61495b8585ccc878b3cc7ae4 Mon Sep 17 00:00:00 2001
From: Adam Wilson <lightbrok3r@gmail.com>
Date: Mon, 19 May 2025 12:32:52 -0600
Subject: [PATCH] try to fix model path bug

---
 src/llm/llm.py | 134 +++++++++++++++++++++++++++++--------------------
 1 file changed, 80 insertions(+), 54 deletions(-)

diff --git a/src/llm/llm.py b/src/llm/llm.py
index d47f767a5..58cc2ffa8 100644
--- a/src/llm/llm.py
+++ b/src/llm/llm.py
@@ -56,60 +56,86 @@ class Phi3LanguageModel:
         except Exception as e:
             print(f"ERROR: Unexpected error when listing directory: {str(e)}")
 
-        # Load the tokenizer and model
-        tokenizer = AutoTokenizer.from_pretrained(
-            pretrained_model_name_or_path=model_path,
-            trust_remote_code=True,
-            local_files_only=True  # Add this line
-        )
-        model = ORTModelForCausalLM.from_pretrained(
-            model_path,  # Change model_id to just model_path
-            provider="CPUExecutionProvider",
-            trust_remote_code=True,
-            local_files_only=True  # Add this line
-        )
-        model.name_or_path = model_path
-
-        # Create the text generation pipeline
-        pipe = pipeline(
-            "text-generation",
-            model=model,
-            tokenizer=tokenizer,
-            max_new_tokens=512,
-            temperature=0.7,
-            top_p=0.9,
-            repetition_penalty=1.1,
-            do_sample=True
-        )
-
-        # Create the LangChain LLM
-        llm = HuggingFacePipeline(pipeline=pipe)
-
-        # Phi-3 specific prompt template
-        template = """<|user|>
-        Act as a conversational agent to respond to the end user's prompt. 
-        Question: {question}
-        <|assistant|>
-        """
-
-        prompt = PromptTemplate.from_template(template)
         
-        # Create a chain using LCEL
-        chain = (
-            {"question": RunnablePassthrough()}
-            | prompt
-            | llm
-            | StrOutputParser()
-            | self.extract_assistant_response
-        )
+
+    # def invoke(self, user_input: str) -> str:
+    #     # Set up paths to the local model
+    #     base_dir = os.path.dirname(os.path.abspath(__file__))
+    #     model_path = os.path.join(base_dir, "cpu_and_mobile", "cpu-int4-rtn-block-32-acc-level-4")
+    #     print(f"Loading Phi-3 model from: {model_path}")
+
+    #     # List and print the contents of the model_path directory
+    #     print(f"Listing contents of model directory: {model_path}")
+    #     try:
+    #         files = os.listdir(model_path)
+    #         for i, file in enumerate(files):
+    #             file_path = os.path.join(model_path, file)
+    #             file_size = os.path.getsize(file_path)
+    #             is_dir = os.path.isdir(file_path)
+    #             file_type = "dir" if is_dir else "file"
+    #             print(f"{i+1:2d}. {file:50s} [{file_type}] {file_size:,} bytes")
+    #         print(f"Total: {len(files)} items found")
+    #     except FileNotFoundError:
+    #         print(f"ERROR: Directory {model_path} not found!")
+    #     except PermissionError:
+    #         print(f"ERROR: Permission denied when accessing {model_path}")
+    #     except Exception as e:
+    #         print(f"ERROR: Unexpected error when listing directory: {str(e)}")
+
+    #     # Load the tokenizer and model
+    #     tokenizer = AutoTokenizer.from_pretrained(
+    #         pretrained_model_name_or_path=model_path,
+    #         trust_remote_code=True,
+    #         local_files_only=True  # Add this line
+    #     )
+    #     model = ORTModelForCausalLM.from_pretrained(
+    #         model_path,  # Change model_id to just model_path
+    #         provider="CPUExecutionProvider",
+    #         trust_remote_code=True,
+    #         local_files_only=True  # Add this line
+    #     )
+    #     model.name_or_path = model_path
+
+    #     # Create the text generation pipeline
+    #     pipe = pipeline(
+    #         "text-generation",
+    #         model=model,
+    #         tokenizer=tokenizer,
+    #         max_new_tokens=512,
+    #         temperature=0.7,
+    #         top_p=0.9,
+    #         repetition_penalty=1.1,
+    #         do_sample=True
+    #     )
+
+    #     # Create the LangChain LLM
+    #     llm = HuggingFacePipeline(pipeline=pipe)
+
+    #     # Phi-3 specific prompt template
+    #     template = """<|user|>
+    #     Act as a conversational agent to respond to the end user's prompt. 
+    #     Question: {question}
+    #     <|assistant|>
+    #     """
+
+    #     prompt = PromptTemplate.from_template(template)
         
-        try:
-            # Get response from the chain
-            response = chain.invoke(user_input)
-            # Print the answer
-            print(response)
-            return response
-        except Exception as e:
-            print(f"Failed: {e}")
-            return e
+    #     # Create a chain using LCEL
+    #     chain = (
+    #         {"question": RunnablePassthrough()}
+    #         | prompt
+    #         | llm
+    #         | StrOutputParser()
+    #         | self.extract_assistant_response
+    #     )
+        
+    #     try:
+    #         # Get response from the chain
+    #         response = chain.invoke(user_input)
+    #         # Print the answer
+    #         print(response)
+    #         return response
+    #     except Exception as e:
+    #         print(f"Failed: {e}")
+    #         return e