From bf145e70b6cd29880a287df73dbdfc513d2f84cc Mon Sep 17 00:00:00 2001 From: Adam Wilson Date: Thu, 15 May 2025 09:30:25 -0600 Subject: [PATCH] LangChain WIP; demo of RAG integration from Claude Sonnet 3.7 --- .github/workflows/llmsecops-cicd.test.yml | 3 +- requirements.txt | 192 ++++++++++++++++ tests/llm/llm.py | 36 ++- tests/llm/llm_rag.py | 263 ++++++++++++++++++++++ 4 files changed, 483 insertions(+), 11 deletions(-) create mode 100644 requirements.txt create mode 100644 tests/llm/llm_rag.py diff --git a/.github/workflows/llmsecops-cicd.test.yml b/.github/workflows/llmsecops-cicd.test.yml index 7c733f159..bc196ffb0 100644 --- a/.github/workflows/llmsecops-cicd.test.yml +++ b/.github/workflows/llmsecops-cicd.test.yml @@ -31,5 +31,6 @@ jobs: pip install langchain_huggingface pip install transformers pip install accelerate - python -m tests.llm.llm --prompt "What is the capital of France?" + pip install optimum[exporters,onnxruntime] + python -m tests.llm.llm_rag --prompt "What is the capital of France?" diff --git a/requirements.txt b/requirements.txt new file mode 100644 index 000000000..07baf0a56 --- /dev/null +++ b/requirements.txt @@ -0,0 +1,192 @@ +accelerate==1.6.0 +aiohappyeyeballs==2.6.1 +aiohttp==3.11.18 +aiosignal==1.3.2 +annotated-types==0.7.0 +anyio==4.9.0 +attrs==25.3.0 +avidtools==0.1.2 +backoff==2.2.1 +base2048==0.1.3 +boto3==1.38.2 +botocore==1.38.2 +cachetools==5.5.2 +certifi==2025.1.31 +cffi==1.17.1 +charset-normalizer==3.4.1 +chevron==0.14.0 +click==8.1.8 +cmd2==2.4.3 +cohere==4.57 +colorama==0.4.6 +coloredlogs==15.0.1 +dataclasses-json==0.6.7 +datasets==2.16.1 +DateTime==5.5 +deepl==1.17.0 +dill==0.3.7 +distro==1.9.0 +ecoji==0.1.1 +fastapi==0.115.12 +fastavro==1.10.0 +filelock==3.18.0 +flatbuffers==25.2.10 +frozenlist==1.6.0 +fschat==0.2.36 +fsspec==2023.10.0 +garak==0.10.3.1 +google-api-core==2.24.2 +google-api-python-client==2.168.0 +google-auth==2.39.0 +google-auth-httplib2==0.2.0 +googleapis-common-protos==1.70.0 +greenlet==3.2.1 +h11==0.14.0 +httpcore==1.0.8 +httplib2==0.22.0 +httpx==0.28.1 +httpx-sse==0.4.0 +huggingface-hub==0.31.2 +humanfriendly==10.0 +idna==3.10 +importlib-metadata==6.11.0 +inquirerpy==0.3.4 +Jinja2==3.1.6 +jiter==0.9.0 +jmespath==1.0.1 +joblib==1.4.2 +jsonpatch==1.33 +jsonpath-ng==1.7.0 +jsonpointer==3.0.0 +jsonschema==4.23.0 +jsonschema-specifications==2025.4.1 +langchain==0.3.25 +langchain-community==0.3.24 +langchain-core==0.3.59 +langchain-huggingface==0.2.0 +langchain-text-splitters==0.3.8 +langsmith==0.3.33 +latex2mathml==3.77.0 +litellm==1.67.2 +lorem==0.1.1 +Markdown==3.8 +markdown-it-py==3.0.0 +markdown2==2.5.3 +MarkupSafe==3.0.2 +marshmallow==3.26.1 +mdurl==0.1.2 +mpmath==1.3.0 +multidict==6.4.3 +multiprocess==0.70.15 +mypy_extensions==1.1.0 +nemollm==0.3.5 +networkx==3.4.2 +nh3==0.2.21 +nltk==3.9.1 +numpy==1.26.4 +nvdlib==0.8.0 +nvidia-cublas-cu12==12.6.4.1 +nvidia-cuda-cupti-cu12==12.6.80 +nvidia-cuda-nvrtc-cu12==12.6.77 +nvidia-cuda-runtime-cu12==12.6.77 +nvidia-cudnn-cu12==9.5.1.17 +nvidia-cufft-cu12==11.3.0.4 +nvidia-cufile-cu12==1.11.1.6 +nvidia-curand-cu12==10.3.7.77 +nvidia-cusolver-cu12==11.7.1.2 +nvidia-cusparse-cu12==12.5.4.2 +nvidia-cusparselt-cu12==0.6.3 +nvidia-nccl-cu12==2.26.2 +nvidia-nvjitlink-cu12==12.6.85 +nvidia-nvtx-cu12==12.6.77 +octoai-sdk==0.10.1 +ollama==0.4.8 +onnx==1.18.0 +onnxruntime==1.21.0 +onnxruntime-genai==0.7.0 +openai==1.76.0 +optimum==1.25.0 +orjson==3.10.16 +packaging==24.2 +pandas==2.2.3 +pfzy==0.3.4 +pillow==10.4.0 +ply==3.11 +prompt_toolkit==3.0.50 +propcache==0.3.1 +proto-plus==1.26.1 +protobuf==6.30.2 +psutil==7.0.0 +pyarrow==19.0.1 +pyarrow-hotfix==0.6 +pyasn1==0.6.1 +pyasn1_modules==0.4.2 +pycparser==2.22 +pydantic==2.11.3 +pydantic-settings==2.9.1 +pydantic_core==2.33.1 +Pygments==2.19.1 +pyparsing==3.2.3 +pyperclip==1.9.0 +python-dateutil==2.9.0.post0 +python-dotenv==1.1.0 +python-magic==0.4.27 +python-multipart==0.0.20 +pytz==2025.2 +PyYAML==6.0.2 +RapidFuzz==3.13.0 +referencing==0.36.2 +regex==2024.11.6 +replicate==1.0.4 +requests==2.32.3 +requests-futures==1.0.2 +requests-toolbelt==1.0.0 +rich==14.0.0 +rpds-py==0.24.0 +rsa==4.9.1 +s3transfer==0.12.0 +safetensors==0.5.3 +scikit-learn==1.6.1 +scipy==1.15.3 +sentence-transformers==4.1.0 +sentencepiece==0.2.0 +setuptools==79.0.1 +shortuuid==1.0.13 +six==1.17.0 +sniffio==1.3.1 +soundfile==0.13.1 +SQLAlchemy==2.0.40 +starlette==0.46.2 +stdlibs==2025.4.4 +svgwrite==1.4.3 +sympy==1.13.3 +tenacity==9.1.2 +threadpoolctl==3.6.0 +tiktoken==0.9.0 +timm==1.0.15 +tokenizers==0.21.1 +tomli==2.2.1 +torch==2.7.0 +torchvision==0.22.0 +tqdm==4.67.1 +transformers==4.51.3 +triton==3.3.0 +types-PyYAML==6.0.12.20250402 +types-requests==2.32.0.20250328 +typing-inspect==0.9.0 +typing-inspection==0.4.0 +typing_extensions==4.13.1 +tzdata==2025.2 +uritemplate==4.1.1 +urllib3==2.3.0 +uvicorn==0.34.2 +wavedrom==2.0.3.post3 +wcwidth==0.2.13 +wn==0.9.5 +xdg-base-dirs==6.0.2 +xxhash==3.5.0 +yarl==1.20.0 +zalgolib==0.2.2 +zipp==3.21.0 +zope.interface==7.2 +zstandard==0.23.0 diff --git a/tests/llm/llm.py b/tests/llm/llm.py index cf60716c6..98a9f4b4a 100644 --- a/tests/llm/llm.py +++ b/tests/llm/llm.py @@ -8,36 +8,52 @@ from langchain_huggingface import HuggingFaceEmbeddings from langchain_huggingface import HuggingFacePipeline from transformers import AutoTokenizer, AutoModelForCausalLM, pipeline +import os +from langchain_community.llms import HuggingFacePipeline +from optimum.onnxruntime import ORTModelForCausalLM +from transformers import AutoTokenizer, pipeline class Llm: def __init__(self, model_path=None): + + # Get path to the model directory using your specified structure base_dir = os.path.dirname(os.path.abspath(__file__)) - model_path = os.path.join(base_dir, "phi3") - tokenizer = AutoTokenizer.from_pretrained(model_path) - model = AutoModelForCausalLM.from_pretrained( - pretrained_model_name_or_path=model_path, - device_map="cpu", # Use available GPU - trust_remote_code=True, # If model requires custom code + model_path = os.path.join(base_dir, "cpu_and_mobile", "cpu-int4-rtn-block-32-acc-level-4") + + # Load the tokenizer from local path + tokenizer = AutoTokenizer.from_pretrained( + model_path, + trust_remote_code=True # Important for some models with custom code ) - # Create a pipeline + # Load the ONNX model with optimum from local path + model = ORTModelForCausalLM.from_pretrained( + model_path, + provider="CPUExecutionProvider", + trust_remote_code=True + ) + + # Create a text generation pipeline pipe = pipeline( "text-generation", model=model, tokenizer=tokenizer, max_new_tokens=512, temperature=0.7, + top_p=0.9, + repetition_penalty=1.1, + do_sample=True ) - # Create LangChain LLM - self.hf_model = HuggingFacePipeline(pipeline=pipe) + # Create the LangChain LLM + self.llm = HuggingFacePipeline(pipeline=pipe) def get_response(self, input): # Use the model print(f'End user prompt: {input}') - response = self.hf_model.invoke(input) + response = self.llm.invoke(input) print(response) if __name__ == "__main__": diff --git a/tests/llm/llm_rag.py b/tests/llm/llm_rag.py new file mode 100644 index 000000000..8bdbe50b9 --- /dev/null +++ b/tests/llm/llm_rag.py @@ -0,0 +1,263 @@ +""" +RAG implementation with local Phi-3-mini-4k-instruct-onnx and embeddings +""" + +import os +from typing import List +import numpy as np + +# LangChain imports +from langchain_community.llms import HuggingFacePipeline +from langchain_community.embeddings import HuggingFaceEmbeddings +from langchain.text_splitter import RecursiveCharacterTextSplitter +from langchain_community.vectorstores import FAISS +from langchain.chains import RetrievalQA +from langchain.prompts import PromptTemplate +from langchain.schema import Document + +# HuggingFace and ONNX imports +from optimum.onnxruntime import ORTModelForCausalLM +from transformers import AutoTokenizer, pipeline + +# ------------------------------------------------------ +# 1. LOAD THE LOCAL PHI-3 MODEL +# ------------------------------------------------------ + +# Set up paths to the local model +base_dir = os.path.dirname(os.path.abspath(__file__)) +model_path = os.path.join(base_dir, "cpu_and_mobile", "cpu-int4-rtn-block-32-acc-level-4") +print(f"Loading Phi-3 model from: {model_path}") + +# Load the tokenizer and model +tokenizer = AutoTokenizer.from_pretrained(model_path, trust_remote_code=True) +model = ORTModelForCausalLM.from_pretrained( + model_path, + provider="CPUExecutionProvider", + trust_remote_code=True +) + +# Create the text generation pipeline +pipe = pipeline( + "text-generation", + model=model, + tokenizer=tokenizer, + max_new_tokens=512, + temperature=0.7, + top_p=0.9, + repetition_penalty=1.1, + do_sample=True +) + +# Create the LangChain LLM +llm = HuggingFacePipeline(pipeline=pipe) + +# ------------------------------------------------------ +# 2. LOAD THE EMBEDDING MODEL +# ------------------------------------------------------ + +# Initialize the embedding model - using a small, efficient model +# Options: +# - "BAAI/bge-small-en-v1.5" (385MB, good performance/size ratio) +# - "sentence-transformers/all-MiniLM-L6-v2" (91MB, very small) +# - "intfloat/e5-small-v2" (134MB, good performance) +embeddings = HuggingFaceEmbeddings( + model_name="sentence-transformers/all-MiniLM-L6-v2", + model_kwargs={"device": "cpu"}, + encode_kwargs={"normalize_embeddings": True} +) +print("Embedding model loaded") + +# ------------------------------------------------------ +# 3. CREATE A SAMPLE DOCUMENT COLLECTION +# ------------------------------------------------------ + +# Sample documents about artificial intelligence +docs = [ + Document( + page_content="Artificial intelligence (AI) is intelligence demonstrated by machines, unlike the natural intelligence displayed by humans and animals, which involves consciousness and emotionality.", + metadata={"source": "AI Definition"} + ), + Document( + page_content="Machine learning is a subset of AI focused on building systems that learn from data. Deep learning is a subset of machine learning based on neural networks.", + metadata={"source": "Machine Learning"} + ), + Document( + page_content="Neural networks are computing systems inspired by the biological neural networks that constitute animal brains. They form the basis of many modern AI systems.", + metadata={"source": "Neural Networks"} + ), + Document( + page_content="Natural Language Processing (NLP) is a field of AI that focuses on the interaction between computers and humans through natural language.", + metadata={"source": "NLP"} + ), + Document( + page_content="Reinforcement learning is an area of machine learning where an agent learns to make decisions by taking actions in an environment to maximize a reward signal.", + metadata={"source": "Reinforcement Learning"} + ), + Document( + page_content="Computer vision is a field of AI that enables computers to derive meaningful information from digital images, videos and other visual inputs.", + metadata={"source": "Computer Vision"} + ), +] + +# ------------------------------------------------------ +# 4. SPLIT DOCUMENTS AND CREATE VECTOR STORE +# ------------------------------------------------------ + +# Split documents into chunks +text_splitter = RecursiveCharacterTextSplitter( + chunk_size=500, + chunk_overlap=50, + separators=["\n\n", "\n", ".", " ", ""] +) + +# Split the documents +split_docs = text_splitter.split_documents(docs) +print(f"Split {len(docs)} documents into {len(split_docs)} chunks") + +# Create a FAISS vector store from the chunks +vectorstore = FAISS.from_documents(split_docs, embeddings) +print("Vector store created") + +# ------------------------------------------------------ +# 5. CREATE RAG PROMPT TEMPLATE FOR PHI-3 +# ------------------------------------------------------ + +# Phi-3 specific RAG prompt template +rag_prompt_template = """<|user|> +Use the following context to answer the question. If you don't know the answer based on the context, just say you don't know. + +Context: +{context} + +Question: {question} +<|assistant|>""" + +# Create the prompt +prompt = PromptTemplate( + template=rag_prompt_template, + input_variables=["context", "question"] +) + +# ------------------------------------------------------ +# 6. CREATE RAG CHAIN +# ------------------------------------------------------ + +# Create the retrieval QA chain +qa_chain = RetrievalQA.from_chain_type( + llm=llm, + chain_type="stuff", # "stuff" method puts all retrieved docs into one prompt + retriever=vectorstore.as_retriever(search_kwargs={"k": 3}), # Retrieve top 3 results + return_source_documents=True, # Return source docs for transparency + chain_type_kwargs={"prompt": prompt} # Use our custom prompt +) + +# ------------------------------------------------------ +# 7. QUERY FUNCTIONS +# ------------------------------------------------------ + +def ask_rag(question: str): + """Query the RAG system with a question""" + print(f"\nQuestion: {question}") + + # Get response from the chain + response = qa_chain({"query": question}) + + # Print the answer + print("\nAnswer:") + print(response["result"]) + + # Print the source documents + print("\nSources:") + for i, doc in enumerate(response["source_documents"]): + print(f"\nSource {i+1}: {doc.metadata['source']}") + print(f"Content: {doc.page_content}") + + return response + +# ------------------------------------------------------ +# 8. FUNCTION TO LOAD CUSTOM DOCUMENTS +# ------------------------------------------------------ + +def load_docs_from_files(file_paths: List[str]): + """Load documents from files""" + from langchain_community.document_loaders import TextLoader, PyPDFLoader + + all_docs = [] + for file_path in file_paths: + try: + if file_path.lower().endswith('.pdf'): + loader = PyPDFLoader(file_path) + else: + loader = TextLoader(file_path) + docs = loader.load() + all_docs.extend(docs) + print(f"Loaded {len(docs)} document(s) from {file_path}") + except Exception as e: + print(f"Error loading {file_path}: {e}") + + return all_docs + +def create_rag_from_files(file_paths: List[str]): + """Create a new RAG system from the provided files""" + # Load documents + loaded_docs = load_docs_from_files(file_paths) + + # Split documents + text_splitter = RecursiveCharacterTextSplitter( + chunk_size=500, + chunk_overlap=50, + separators=["\n\n", "\n", ".", " ", ""] + ) + split_docs = text_splitter.split_documents(loaded_docs) + + # Create vector store + new_vectorstore = FAISS.from_documents(split_docs, embeddings) + + # Create QA chain + new_qa_chain = RetrievalQA.from_chain_type( + llm=llm, + chain_type="stuff", + retriever=new_vectorstore.as_retriever(search_kwargs={"k": 3}), + return_source_documents=True, + chain_type_kwargs={"prompt": prompt} # Use our custom prompt + ) + + return new_qa_chain + +# ------------------------------------------------------ +# 9. EXAMPLE USAGE +# ------------------------------------------------------ + +if __name__ == "__main__": + # Test with sample questions + print("\n===== RAG System Demo =====") + + # Example 1: Basic retrieval + ask_rag("What is the difference between machine learning and deep learning?") + + # Example 2: Testing knowledge boundaries + ask_rag("What are the key components of a neural network?") + + # Example 3: Question outside the knowledge base + ask_rag("What is the capital of France?") + + print("\n===== Demo Complete =====") + +# ------------------------------------------------------ +# 10. SAVE AND LOAD VECTOR STORE FOR FUTURE USE +# ------------------------------------------------------ + +def save_vectorstore(vectorstore, directory="faiss_index"): + """Save the FAISS vector store to disk""" + vectorstore.save_local(directory) + print(f"Vector store saved to {directory}") + +def load_vectorstore(directory="faiss_index"): + """Load a FAISS vector store from disk""" + if os.path.exists(directory): + loaded_vectorstore = FAISS.load_local(directory, embeddings) + print(f"Vector store loaded from {directory}") + return loaded_vectorstore + else: + print(f"No vector store found at {directory}") + return None \ No newline at end of file