mirror of
https://github.com/lightbroker/llmsecops-research.git
synced 2026-02-13 07:02:51 +00:00
LangChain WIP; demo of RAG integration from Claude Sonnet 3.7
This commit is contained in:
3
.github/workflows/llmsecops-cicd.test.yml
vendored
3
.github/workflows/llmsecops-cicd.test.yml
vendored
@@ -31,5 +31,6 @@ jobs:
|
||||
pip install langchain_huggingface
|
||||
pip install transformers
|
||||
pip install accelerate
|
||||
python -m tests.llm.llm --prompt "What is the capital of France?"
|
||||
pip install optimum[exporters,onnxruntime]
|
||||
python -m tests.llm.llm_rag --prompt "What is the capital of France?"
|
||||
|
||||
|
||||
192
requirements.txt
Normal file
192
requirements.txt
Normal file
@@ -0,0 +1,192 @@
|
||||
accelerate==1.6.0
|
||||
aiohappyeyeballs==2.6.1
|
||||
aiohttp==3.11.18
|
||||
aiosignal==1.3.2
|
||||
annotated-types==0.7.0
|
||||
anyio==4.9.0
|
||||
attrs==25.3.0
|
||||
avidtools==0.1.2
|
||||
backoff==2.2.1
|
||||
base2048==0.1.3
|
||||
boto3==1.38.2
|
||||
botocore==1.38.2
|
||||
cachetools==5.5.2
|
||||
certifi==2025.1.31
|
||||
cffi==1.17.1
|
||||
charset-normalizer==3.4.1
|
||||
chevron==0.14.0
|
||||
click==8.1.8
|
||||
cmd2==2.4.3
|
||||
cohere==4.57
|
||||
colorama==0.4.6
|
||||
coloredlogs==15.0.1
|
||||
dataclasses-json==0.6.7
|
||||
datasets==2.16.1
|
||||
DateTime==5.5
|
||||
deepl==1.17.0
|
||||
dill==0.3.7
|
||||
distro==1.9.0
|
||||
ecoji==0.1.1
|
||||
fastapi==0.115.12
|
||||
fastavro==1.10.0
|
||||
filelock==3.18.0
|
||||
flatbuffers==25.2.10
|
||||
frozenlist==1.6.0
|
||||
fschat==0.2.36
|
||||
fsspec==2023.10.0
|
||||
garak==0.10.3.1
|
||||
google-api-core==2.24.2
|
||||
google-api-python-client==2.168.0
|
||||
google-auth==2.39.0
|
||||
google-auth-httplib2==0.2.0
|
||||
googleapis-common-protos==1.70.0
|
||||
greenlet==3.2.1
|
||||
h11==0.14.0
|
||||
httpcore==1.0.8
|
||||
httplib2==0.22.0
|
||||
httpx==0.28.1
|
||||
httpx-sse==0.4.0
|
||||
huggingface-hub==0.31.2
|
||||
humanfriendly==10.0
|
||||
idna==3.10
|
||||
importlib-metadata==6.11.0
|
||||
inquirerpy==0.3.4
|
||||
Jinja2==3.1.6
|
||||
jiter==0.9.0
|
||||
jmespath==1.0.1
|
||||
joblib==1.4.2
|
||||
jsonpatch==1.33
|
||||
jsonpath-ng==1.7.0
|
||||
jsonpointer==3.0.0
|
||||
jsonschema==4.23.0
|
||||
jsonschema-specifications==2025.4.1
|
||||
langchain==0.3.25
|
||||
langchain-community==0.3.24
|
||||
langchain-core==0.3.59
|
||||
langchain-huggingface==0.2.0
|
||||
langchain-text-splitters==0.3.8
|
||||
langsmith==0.3.33
|
||||
latex2mathml==3.77.0
|
||||
litellm==1.67.2
|
||||
lorem==0.1.1
|
||||
Markdown==3.8
|
||||
markdown-it-py==3.0.0
|
||||
markdown2==2.5.3
|
||||
MarkupSafe==3.0.2
|
||||
marshmallow==3.26.1
|
||||
mdurl==0.1.2
|
||||
mpmath==1.3.0
|
||||
multidict==6.4.3
|
||||
multiprocess==0.70.15
|
||||
mypy_extensions==1.1.0
|
||||
nemollm==0.3.5
|
||||
networkx==3.4.2
|
||||
nh3==0.2.21
|
||||
nltk==3.9.1
|
||||
numpy==1.26.4
|
||||
nvdlib==0.8.0
|
||||
nvidia-cublas-cu12==12.6.4.1
|
||||
nvidia-cuda-cupti-cu12==12.6.80
|
||||
nvidia-cuda-nvrtc-cu12==12.6.77
|
||||
nvidia-cuda-runtime-cu12==12.6.77
|
||||
nvidia-cudnn-cu12==9.5.1.17
|
||||
nvidia-cufft-cu12==11.3.0.4
|
||||
nvidia-cufile-cu12==1.11.1.6
|
||||
nvidia-curand-cu12==10.3.7.77
|
||||
nvidia-cusolver-cu12==11.7.1.2
|
||||
nvidia-cusparse-cu12==12.5.4.2
|
||||
nvidia-cusparselt-cu12==0.6.3
|
||||
nvidia-nccl-cu12==2.26.2
|
||||
nvidia-nvjitlink-cu12==12.6.85
|
||||
nvidia-nvtx-cu12==12.6.77
|
||||
octoai-sdk==0.10.1
|
||||
ollama==0.4.8
|
||||
onnx==1.18.0
|
||||
onnxruntime==1.21.0
|
||||
onnxruntime-genai==0.7.0
|
||||
openai==1.76.0
|
||||
optimum==1.25.0
|
||||
orjson==3.10.16
|
||||
packaging==24.2
|
||||
pandas==2.2.3
|
||||
pfzy==0.3.4
|
||||
pillow==10.4.0
|
||||
ply==3.11
|
||||
prompt_toolkit==3.0.50
|
||||
propcache==0.3.1
|
||||
proto-plus==1.26.1
|
||||
protobuf==6.30.2
|
||||
psutil==7.0.0
|
||||
pyarrow==19.0.1
|
||||
pyarrow-hotfix==0.6
|
||||
pyasn1==0.6.1
|
||||
pyasn1_modules==0.4.2
|
||||
pycparser==2.22
|
||||
pydantic==2.11.3
|
||||
pydantic-settings==2.9.1
|
||||
pydantic_core==2.33.1
|
||||
Pygments==2.19.1
|
||||
pyparsing==3.2.3
|
||||
pyperclip==1.9.0
|
||||
python-dateutil==2.9.0.post0
|
||||
python-dotenv==1.1.0
|
||||
python-magic==0.4.27
|
||||
python-multipart==0.0.20
|
||||
pytz==2025.2
|
||||
PyYAML==6.0.2
|
||||
RapidFuzz==3.13.0
|
||||
referencing==0.36.2
|
||||
regex==2024.11.6
|
||||
replicate==1.0.4
|
||||
requests==2.32.3
|
||||
requests-futures==1.0.2
|
||||
requests-toolbelt==1.0.0
|
||||
rich==14.0.0
|
||||
rpds-py==0.24.0
|
||||
rsa==4.9.1
|
||||
s3transfer==0.12.0
|
||||
safetensors==0.5.3
|
||||
scikit-learn==1.6.1
|
||||
scipy==1.15.3
|
||||
sentence-transformers==4.1.0
|
||||
sentencepiece==0.2.0
|
||||
setuptools==79.0.1
|
||||
shortuuid==1.0.13
|
||||
six==1.17.0
|
||||
sniffio==1.3.1
|
||||
soundfile==0.13.1
|
||||
SQLAlchemy==2.0.40
|
||||
starlette==0.46.2
|
||||
stdlibs==2025.4.4
|
||||
svgwrite==1.4.3
|
||||
sympy==1.13.3
|
||||
tenacity==9.1.2
|
||||
threadpoolctl==3.6.0
|
||||
tiktoken==0.9.0
|
||||
timm==1.0.15
|
||||
tokenizers==0.21.1
|
||||
tomli==2.2.1
|
||||
torch==2.7.0
|
||||
torchvision==0.22.0
|
||||
tqdm==4.67.1
|
||||
transformers==4.51.3
|
||||
triton==3.3.0
|
||||
types-PyYAML==6.0.12.20250402
|
||||
types-requests==2.32.0.20250328
|
||||
typing-inspect==0.9.0
|
||||
typing-inspection==0.4.0
|
||||
typing_extensions==4.13.1
|
||||
tzdata==2025.2
|
||||
uritemplate==4.1.1
|
||||
urllib3==2.3.0
|
||||
uvicorn==0.34.2
|
||||
wavedrom==2.0.3.post3
|
||||
wcwidth==0.2.13
|
||||
wn==0.9.5
|
||||
xdg-base-dirs==6.0.2
|
||||
xxhash==3.5.0
|
||||
yarl==1.20.0
|
||||
zalgolib==0.2.2
|
||||
zipp==3.21.0
|
||||
zope.interface==7.2
|
||||
zstandard==0.23.0
|
||||
@@ -8,36 +8,52 @@ from langchain_huggingface import HuggingFaceEmbeddings
|
||||
|
||||
from langchain_huggingface import HuggingFacePipeline
|
||||
from transformers import AutoTokenizer, AutoModelForCausalLM, pipeline
|
||||
import os
|
||||
from langchain_community.llms import HuggingFacePipeline
|
||||
from optimum.onnxruntime import ORTModelForCausalLM
|
||||
from transformers import AutoTokenizer, pipeline
|
||||
|
||||
|
||||
class Llm:
|
||||
|
||||
def __init__(self, model_path=None):
|
||||
|
||||
# Get path to the model directory using your specified structure
|
||||
base_dir = os.path.dirname(os.path.abspath(__file__))
|
||||
model_path = os.path.join(base_dir, "phi3")
|
||||
tokenizer = AutoTokenizer.from_pretrained(model_path)
|
||||
model = AutoModelForCausalLM.from_pretrained(
|
||||
pretrained_model_name_or_path=model_path,
|
||||
device_map="cpu", # Use available GPU
|
||||
trust_remote_code=True, # If model requires custom code
|
||||
model_path = os.path.join(base_dir, "cpu_and_mobile", "cpu-int4-rtn-block-32-acc-level-4")
|
||||
|
||||
# Load the tokenizer from local path
|
||||
tokenizer = AutoTokenizer.from_pretrained(
|
||||
model_path,
|
||||
trust_remote_code=True # Important for some models with custom code
|
||||
)
|
||||
|
||||
# Create a pipeline
|
||||
# Load the ONNX model with optimum from local path
|
||||
model = ORTModelForCausalLM.from_pretrained(
|
||||
model_path,
|
||||
provider="CPUExecutionProvider",
|
||||
trust_remote_code=True
|
||||
)
|
||||
|
||||
# Create a text generation pipeline
|
||||
pipe = pipeline(
|
||||
"text-generation",
|
||||
model=model,
|
||||
tokenizer=tokenizer,
|
||||
max_new_tokens=512,
|
||||
temperature=0.7,
|
||||
top_p=0.9,
|
||||
repetition_penalty=1.1,
|
||||
do_sample=True
|
||||
)
|
||||
|
||||
# Create LangChain LLM
|
||||
self.hf_model = HuggingFacePipeline(pipeline=pipe)
|
||||
# Create the LangChain LLM
|
||||
self.llm = HuggingFacePipeline(pipeline=pipe)
|
||||
|
||||
def get_response(self, input):
|
||||
# Use the model
|
||||
print(f'End user prompt: {input}')
|
||||
response = self.hf_model.invoke(input)
|
||||
response = self.llm.invoke(input)
|
||||
print(response)
|
||||
|
||||
if __name__ == "__main__":
|
||||
|
||||
263
tests/llm/llm_rag.py
Normal file
263
tests/llm/llm_rag.py
Normal file
@@ -0,0 +1,263 @@
|
||||
"""
|
||||
RAG implementation with local Phi-3-mini-4k-instruct-onnx and embeddings
|
||||
"""
|
||||
|
||||
import os
|
||||
from typing import List
|
||||
import numpy as np
|
||||
|
||||
# LangChain imports
|
||||
from langchain_community.llms import HuggingFacePipeline
|
||||
from langchain_community.embeddings import HuggingFaceEmbeddings
|
||||
from langchain.text_splitter import RecursiveCharacterTextSplitter
|
||||
from langchain_community.vectorstores import FAISS
|
||||
from langchain.chains import RetrievalQA
|
||||
from langchain.prompts import PromptTemplate
|
||||
from langchain.schema import Document
|
||||
|
||||
# HuggingFace and ONNX imports
|
||||
from optimum.onnxruntime import ORTModelForCausalLM
|
||||
from transformers import AutoTokenizer, pipeline
|
||||
|
||||
# ------------------------------------------------------
|
||||
# 1. LOAD THE LOCAL PHI-3 MODEL
|
||||
# ------------------------------------------------------
|
||||
|
||||
# Set up paths to the local model
|
||||
base_dir = os.path.dirname(os.path.abspath(__file__))
|
||||
model_path = os.path.join(base_dir, "cpu_and_mobile", "cpu-int4-rtn-block-32-acc-level-4")
|
||||
print(f"Loading Phi-3 model from: {model_path}")
|
||||
|
||||
# Load the tokenizer and model
|
||||
tokenizer = AutoTokenizer.from_pretrained(model_path, trust_remote_code=True)
|
||||
model = ORTModelForCausalLM.from_pretrained(
|
||||
model_path,
|
||||
provider="CPUExecutionProvider",
|
||||
trust_remote_code=True
|
||||
)
|
||||
|
||||
# Create the text generation pipeline
|
||||
pipe = pipeline(
|
||||
"text-generation",
|
||||
model=model,
|
||||
tokenizer=tokenizer,
|
||||
max_new_tokens=512,
|
||||
temperature=0.7,
|
||||
top_p=0.9,
|
||||
repetition_penalty=1.1,
|
||||
do_sample=True
|
||||
)
|
||||
|
||||
# Create the LangChain LLM
|
||||
llm = HuggingFacePipeline(pipeline=pipe)
|
||||
|
||||
# ------------------------------------------------------
|
||||
# 2. LOAD THE EMBEDDING MODEL
|
||||
# ------------------------------------------------------
|
||||
|
||||
# Initialize the embedding model - using a small, efficient model
|
||||
# Options:
|
||||
# - "BAAI/bge-small-en-v1.5" (385MB, good performance/size ratio)
|
||||
# - "sentence-transformers/all-MiniLM-L6-v2" (91MB, very small)
|
||||
# - "intfloat/e5-small-v2" (134MB, good performance)
|
||||
embeddings = HuggingFaceEmbeddings(
|
||||
model_name="sentence-transformers/all-MiniLM-L6-v2",
|
||||
model_kwargs={"device": "cpu"},
|
||||
encode_kwargs={"normalize_embeddings": True}
|
||||
)
|
||||
print("Embedding model loaded")
|
||||
|
||||
# ------------------------------------------------------
|
||||
# 3. CREATE A SAMPLE DOCUMENT COLLECTION
|
||||
# ------------------------------------------------------
|
||||
|
||||
# Sample documents about artificial intelligence
|
||||
docs = [
|
||||
Document(
|
||||
page_content="Artificial intelligence (AI) is intelligence demonstrated by machines, unlike the natural intelligence displayed by humans and animals, which involves consciousness and emotionality.",
|
||||
metadata={"source": "AI Definition"}
|
||||
),
|
||||
Document(
|
||||
page_content="Machine learning is a subset of AI focused on building systems that learn from data. Deep learning is a subset of machine learning based on neural networks.",
|
||||
metadata={"source": "Machine Learning"}
|
||||
),
|
||||
Document(
|
||||
page_content="Neural networks are computing systems inspired by the biological neural networks that constitute animal brains. They form the basis of many modern AI systems.",
|
||||
metadata={"source": "Neural Networks"}
|
||||
),
|
||||
Document(
|
||||
page_content="Natural Language Processing (NLP) is a field of AI that focuses on the interaction between computers and humans through natural language.",
|
||||
metadata={"source": "NLP"}
|
||||
),
|
||||
Document(
|
||||
page_content="Reinforcement learning is an area of machine learning where an agent learns to make decisions by taking actions in an environment to maximize a reward signal.",
|
||||
metadata={"source": "Reinforcement Learning"}
|
||||
),
|
||||
Document(
|
||||
page_content="Computer vision is a field of AI that enables computers to derive meaningful information from digital images, videos and other visual inputs.",
|
||||
metadata={"source": "Computer Vision"}
|
||||
),
|
||||
]
|
||||
|
||||
# ------------------------------------------------------
|
||||
# 4. SPLIT DOCUMENTS AND CREATE VECTOR STORE
|
||||
# ------------------------------------------------------
|
||||
|
||||
# Split documents into chunks
|
||||
text_splitter = RecursiveCharacterTextSplitter(
|
||||
chunk_size=500,
|
||||
chunk_overlap=50,
|
||||
separators=["\n\n", "\n", ".", " ", ""]
|
||||
)
|
||||
|
||||
# Split the documents
|
||||
split_docs = text_splitter.split_documents(docs)
|
||||
print(f"Split {len(docs)} documents into {len(split_docs)} chunks")
|
||||
|
||||
# Create a FAISS vector store from the chunks
|
||||
vectorstore = FAISS.from_documents(split_docs, embeddings)
|
||||
print("Vector store created")
|
||||
|
||||
# ------------------------------------------------------
|
||||
# 5. CREATE RAG PROMPT TEMPLATE FOR PHI-3
|
||||
# ------------------------------------------------------
|
||||
|
||||
# Phi-3 specific RAG prompt template
|
||||
rag_prompt_template = """<|user|>
|
||||
Use the following context to answer the question. If you don't know the answer based on the context, just say you don't know.
|
||||
|
||||
Context:
|
||||
{context}
|
||||
|
||||
Question: {question}
|
||||
<|assistant|>"""
|
||||
|
||||
# Create the prompt
|
||||
prompt = PromptTemplate(
|
||||
template=rag_prompt_template,
|
||||
input_variables=["context", "question"]
|
||||
)
|
||||
|
||||
# ------------------------------------------------------
|
||||
# 6. CREATE RAG CHAIN
|
||||
# ------------------------------------------------------
|
||||
|
||||
# Create the retrieval QA chain
|
||||
qa_chain = RetrievalQA.from_chain_type(
|
||||
llm=llm,
|
||||
chain_type="stuff", # "stuff" method puts all retrieved docs into one prompt
|
||||
retriever=vectorstore.as_retriever(search_kwargs={"k": 3}), # Retrieve top 3 results
|
||||
return_source_documents=True, # Return source docs for transparency
|
||||
chain_type_kwargs={"prompt": prompt} # Use our custom prompt
|
||||
)
|
||||
|
||||
# ------------------------------------------------------
|
||||
# 7. QUERY FUNCTIONS
|
||||
# ------------------------------------------------------
|
||||
|
||||
def ask_rag(question: str):
|
||||
"""Query the RAG system with a question"""
|
||||
print(f"\nQuestion: {question}")
|
||||
|
||||
# Get response from the chain
|
||||
response = qa_chain({"query": question})
|
||||
|
||||
# Print the answer
|
||||
print("\nAnswer:")
|
||||
print(response["result"])
|
||||
|
||||
# Print the source documents
|
||||
print("\nSources:")
|
||||
for i, doc in enumerate(response["source_documents"]):
|
||||
print(f"\nSource {i+1}: {doc.metadata['source']}")
|
||||
print(f"Content: {doc.page_content}")
|
||||
|
||||
return response
|
||||
|
||||
# ------------------------------------------------------
|
||||
# 8. FUNCTION TO LOAD CUSTOM DOCUMENTS
|
||||
# ------------------------------------------------------
|
||||
|
||||
def load_docs_from_files(file_paths: List[str]):
|
||||
"""Load documents from files"""
|
||||
from langchain_community.document_loaders import TextLoader, PyPDFLoader
|
||||
|
||||
all_docs = []
|
||||
for file_path in file_paths:
|
||||
try:
|
||||
if file_path.lower().endswith('.pdf'):
|
||||
loader = PyPDFLoader(file_path)
|
||||
else:
|
||||
loader = TextLoader(file_path)
|
||||
docs = loader.load()
|
||||
all_docs.extend(docs)
|
||||
print(f"Loaded {len(docs)} document(s) from {file_path}")
|
||||
except Exception as e:
|
||||
print(f"Error loading {file_path}: {e}")
|
||||
|
||||
return all_docs
|
||||
|
||||
def create_rag_from_files(file_paths: List[str]):
|
||||
"""Create a new RAG system from the provided files"""
|
||||
# Load documents
|
||||
loaded_docs = load_docs_from_files(file_paths)
|
||||
|
||||
# Split documents
|
||||
text_splitter = RecursiveCharacterTextSplitter(
|
||||
chunk_size=500,
|
||||
chunk_overlap=50,
|
||||
separators=["\n\n", "\n", ".", " ", ""]
|
||||
)
|
||||
split_docs = text_splitter.split_documents(loaded_docs)
|
||||
|
||||
# Create vector store
|
||||
new_vectorstore = FAISS.from_documents(split_docs, embeddings)
|
||||
|
||||
# Create QA chain
|
||||
new_qa_chain = RetrievalQA.from_chain_type(
|
||||
llm=llm,
|
||||
chain_type="stuff",
|
||||
retriever=new_vectorstore.as_retriever(search_kwargs={"k": 3}),
|
||||
return_source_documents=True,
|
||||
chain_type_kwargs={"prompt": prompt} # Use our custom prompt
|
||||
)
|
||||
|
||||
return new_qa_chain
|
||||
|
||||
# ------------------------------------------------------
|
||||
# 9. EXAMPLE USAGE
|
||||
# ------------------------------------------------------
|
||||
|
||||
if __name__ == "__main__":
|
||||
# Test with sample questions
|
||||
print("\n===== RAG System Demo =====")
|
||||
|
||||
# Example 1: Basic retrieval
|
||||
ask_rag("What is the difference between machine learning and deep learning?")
|
||||
|
||||
# Example 2: Testing knowledge boundaries
|
||||
ask_rag("What are the key components of a neural network?")
|
||||
|
||||
# Example 3: Question outside the knowledge base
|
||||
ask_rag("What is the capital of France?")
|
||||
|
||||
print("\n===== Demo Complete =====")
|
||||
|
||||
# ------------------------------------------------------
|
||||
# 10. SAVE AND LOAD VECTOR STORE FOR FUTURE USE
|
||||
# ------------------------------------------------------
|
||||
|
||||
def save_vectorstore(vectorstore, directory="faiss_index"):
|
||||
"""Save the FAISS vector store to disk"""
|
||||
vectorstore.save_local(directory)
|
||||
print(f"Vector store saved to {directory}")
|
||||
|
||||
def load_vectorstore(directory="faiss_index"):
|
||||
"""Load a FAISS vector store from disk"""
|
||||
if os.path.exists(directory):
|
||||
loaded_vectorstore = FAISS.load_local(directory, embeddings)
|
||||
print(f"Vector store loaded from {directory}")
|
||||
return loaded_vectorstore
|
||||
else:
|
||||
print(f"No vector store found at {directory}")
|
||||
return None
|
||||
Reference in New Issue
Block a user