initial port from private repo

This commit is contained in:
Adam Wilson
2025-04-03 20:32:13 -06:00
parent 617b8d6b51
commit a91cfd0a81
9 changed files with 456 additions and 1 deletions

39
.github/workflows/llmsecops-cicd.yml vendored Normal file
View File

@@ -0,0 +1,39 @@
name: REST Server
on:
# push:
# branches: [ "main" ]
# pull_request:
# branches: [ "main" ]
workflow_dispatch:
jobs:
build:
runs-on: ubuntu-latest
steps:
- uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683
- name: Set up git LFS
run: git lfs install
- name: Set up Python
uses: actions/setup-python@v3
with:
python-version: '3.12'
- name: Download Huggingface CLI
run: |
pip install huggingface-hub[cli]
huggingface-cli download microsoft/Phi-3-mini-4k-instruct-onnx --include cpu_and_mobile/cpu-int4-rtn-block-32-acc-level-4/* --local-dir .
pip install onnxruntime-genai
curl https://raw.githubusercontent.com/microsoft/onnxruntime-genai/main/examples/python/phi3-qa.py -o phi3-qa.py
python phi3-qa.py -m cpu_and_mobile/cpu-int4-rtn-block-32-acc-level-4 -e cpu -v
- name: Run REST API server
run: |
python ${{ github.workspace }}/tests/api/server.py &
- name: Test API call
run: |
curl -i localhost:9999/hello

187
.gitignore vendored Normal file
View File

@@ -0,0 +1,187 @@
# Byte-compiled / optimized / DLL files
__pycache__/
*.py[cod]
*$py.class
# C extensions
*.so
# Distribution / packaging
.Python
build/
develop-eggs/
dist/
downloads/
eggs/
.eggs/
lib/
lib64/
parts/
sdist/
var/
wheels/
share/python-wheels/
*.egg-info/
.installed.cfg
*.egg
MANIFEST
# PyInstaller
# Usually these files are written by a python script from a template
# before PyInstaller builds the exe, so as to inject date/other infos into it.
*.manifest
*.spec
# Installer logs
pip-log.txt
pip-delete-this-directory.txt
# Unit test / coverage reports
htmlcov/
.tox/
.nox/
.coverage
.coverage.*
.cache
nosetests.xml
coverage.xml
*.cover
*.py,cover
.hypothesis/
.pytest_cache/
cover/
# Translations
*.mo
*.pot
# Django stuff:
*.log
local_settings.py
db.sqlite3
db.sqlite3-journal
# Flask stuff:
instance/
.webassets-cache
# Scrapy stuff:
.scrapy
# Sphinx documentation
docs/_build/
# PyBuilder
.pybuilder/
target/
# Jupyter Notebook
.ipynb_checkpoints
# IPython
profile_default/
ipython_config.py
# pyenv
# For a library or package, you might want to ignore these files since the code is
# intended to run in multiple environments; otherwise, check them in:
# .python-version
# pipenv
# According to pypa/pipenv#598, it is recommended to include Pipfile.lock in version control.
# However, in case of collaboration, if having platform-specific dependencies or dependencies
# having no cross-platform support, pipenv may install dependencies that don't work, or not
# install all needed dependencies.
#Pipfile.lock
# UV
# Similar to Pipfile.lock, it is generally recommended to include uv.lock in version control.
# This is especially recommended for binary packages to ensure reproducibility, and is more
# commonly ignored for libraries.
#uv.lock
# poetry
# Similar to Pipfile.lock, it is generally recommended to include poetry.lock in version control.
# This is especially recommended for binary packages to ensure reproducibility, and is more
# commonly ignored for libraries.
# https://python-poetry.org/docs/basic-usage/#commit-your-poetrylock-file-to-version-control
#poetry.lock
# pdm
# Similar to Pipfile.lock, it is generally recommended to include pdm.lock in version control.
#pdm.lock
# pdm stores project-wide configurations in .pdm.toml, but it is recommended to not include it
# in version control.
# https://pdm.fming.dev/latest/usage/project/#working-with-version-control
.pdm.toml
.pdm-python
.pdm-build/
# PEP 582; used by e.g. github.com/David-OConnor/pyflow and github.com/pdm-project/pdm
__pypackages__/
# Celery stuff
celerybeat-schedule
celerybeat.pid
# SageMath parsed files
*.sage.py
# Environments
.env
.venv
env/
venv/
ENV/
env.bak/
venv.bak/
# Spyder project settings
.spyderproject
.spyproject
# Rope project settings
.ropeproject
# mkdocs documentation
/site
# mypy
.mypy_cache/
.dmypy.json
dmypy.json
# Pyre type checker
.pyre/
# pytype static type analyzer
.pytype/
# Cython debug symbols
cython_debug/
# PyCharm
# JetBrains specific template is maintained in a separate JetBrains.gitignore that can
# be found at https://github.com/github/gitignore/blob/main/Global/JetBrains.gitignore
# and can be added to the global gitignore or merged into this file. For a more nuclear
# option (not recommended) you can uncomment the following to ignore the entire idea folder.
#.idea/
# Ruff stuff:
.ruff_cache/
# PyPI configuration file
.pypirc
# HuggingFace / Microsoft LLM supporting files
# (these are downloaded for local development via bash script, or inside GH Action workflow context)
tests/llm/cpu_and_mobile/cpu-int4-rtn-block-32-acc-level-4/added_tokens.json
tests/llm/cpu_and_mobile/cpu-int4-rtn-block-32-acc-level-4/config.json
tests/llm/cpu_and_mobile/cpu-int4-rtn-block-32-acc-level-4/configuration_phi3.py
tests/llm/cpu_and_mobile/cpu-int4-rtn-block-32-acc-level-4/genai_config.json
tests/llm/cpu_and_mobile/cpu-int4-rtn-block-32-acc-level-4/phi3-mini-4k-instruct-cpu-int4-rtn-block-32-acc-level-4.onnx
tests/llm/cpu_and_mobile/cpu-int4-rtn-block-32-acc-level-4/phi3-mini-4k-instruct-cpu-int4-rtn-block-32-acc-level-4.onnx.data
tests/llm/cpu_and_mobile/cpu-int4-rtn-block-32-acc-level-4/special_tokens_map.json
tests/llm/cpu_and_mobile/cpu-int4-rtn-block-32-acc-level-4/tokenizer_config.json
tests/llm/cpu_and_mobile/cpu-int4-rtn-block-32-acc-level-4/tokenizer.json
tests/llm/cpu_and_mobile/cpu-int4-rtn-block-32-acc-level-4/tokenizer.model

53
LICENSE Normal file
View File

@@ -0,0 +1,53 @@
MIT License
Copyright (c) 2025 Adam Wilson
Permission is hereby granted, free of charge, to any person obtaining a copy
of this software and associated documentation files (the "Software"), to deal
in the Software without restriction, including without limitation the rights
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
copies of the Software, and to permit persons to whom the Software is
furnished to do so, subject to the following conditions:
The above copyright notice and this permission notice shall be included in all
copies or substantial portions of the Software.
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
SOFTWARE.
------------------------------------------------------------------------------
Additional source code is based on work by Microsoft:
https://github.com/microsoft/onnxruntime-genai
That repos is made available under the MIT License, referenced below:
https://github.com/microsoft/onnxruntime-genai/blob/main/LICENSE
MIT License
Copyright (c) Microsoft Corporation.
Permission is hereby granted, free of charge, to any person obtaining a copy
of this software and associated documentation files (the "Software"), to deal
in the Software without restriction, including without limitation the rights
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
copies of the Software, and to permit persons to whom the Software is
furnished to do so, subject to the following conditions:
The above copyright notice and this permission notice shall be included in all
copies or substantial portions of the Software.
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
SOFTWARE

View File

@@ -1 +1,11 @@
# llmsecops-research
# LLMSecOps Research
## Overview
This repo supports graduate research conducted by Adam Wilson for the M.Sc., Information Security Engineering program at SANS Technology Institute.
## Local setup (Linux Ubuntu)
```sh
$ sudo ./llm_setup.sh
```

21
llm_setup.sh Executable file
View File

@@ -0,0 +1,21 @@
#!/usr/bin/bash
# create Python virtual environment
virtualenv --python="/usr/bin/python3.12" .env
source .env/bin/activate
# the ONNX model/data require git Large File System support
git lfs install
# get the system-under-test LLM dependencies from HuggingFace / Microsoft
pip3.12 install huggingface-hub[cli]
cd ./tests/llm
huggingface-cli download microsoft/Phi-3-mini-4k-instruct-onnx --include cpu_and_mobile/cpu-int4-rtn-block-32-acc-level-4/* --local-dir .
pip3.12 install onnxruntime-genai
if ! [[ -e ./phi3-qa.py ]]
then
curl https://raw.githubusercontent.com/microsoft/onnxruntime-genai/main/examples/python/phi3-qa.py -o ./phi3-qa.py
fi
python3.12 ./phi3-qa.py -m ./cpu_and_mobile/cpu-int4-rtn-block-32-acc-level-4 -e cpu -v

View File

@@ -0,0 +1,22 @@
import cgi
class PathDispatcher:
def __init__(self):
self.routes = {}
def notfound_404(self, env, start_response):
start_response('404 Not Found', [ ('Content-Type', 'text/plain') ])
return [b'Not Found']
def __call__(self, env, start_response):
path = env.get('PATH_INFO')
params = cgi.FieldStorage(env.get('wsgi.output'), environ=env)
method = env.get('REQUEST_METHOD').lower()
env['params'] = { key: params.getvalue(key) for key in params }
handler = self.routes.get((method,path), self.notfound_404)
return handler(env, start_response)
def register(self, method, path, function):
self.routes[method.lower(), path] = function
return function

24
tests/api/server.py Normal file
View File

@@ -0,0 +1,24 @@
from PathDispatcher import PathDispatcher
from wsgiref.simple_server import make_server
class RestApiServer:
def __init__(self):
pass
def response_function(self, environ, start_response):
start_response('200 OK', [('Content-Type','text/html')])
yield str(f'testing...\n').encode('utf-8')
def listen(self):
port = 9999
dispatcher = PathDispatcher()
dispatcher.register('GET', '/hello', self.response_function)
wsgi_srv = make_server('', port, dispatcher)
print(f'listening on port {port}...')
wsgi_srv.serve_forever()
if __name__ == '__main__':
srv = RestApiServer()
srv.listen()

98
tests/llm/phi3-qa.py Normal file
View File

@@ -0,0 +1,98 @@
import onnxruntime_genai as og
import argparse
import time
def main(args):
if args.verbose: print("Loading model...")
if args.timings:
started_timestamp = 0
first_token_timestamp = 0
config = og.Config(args.model_path)
config.clear_providers()
if args.execution_provider != "cpu":
if args.verbose: print(f"Setting model to {args.execution_provider}")
config.append_provider(args.execution_provider)
model = og.Model(config)
if args.verbose: print("Model loaded")
tokenizer = og.Tokenizer(model)
tokenizer_stream = tokenizer.create_stream()
if args.verbose: print("Tokenizer created")
if args.verbose: print()
search_options = {name:getattr(args, name) for name in ['do_sample', 'max_length', 'min_length', 'top_p', 'top_k', 'temperature', 'repetition_penalty'] if name in args}
# Set the max length to something sensible by default, unless it is specified by the user,
# since otherwise it will be set to the entire context length
if 'max_length' not in search_options:
search_options['max_length'] = 2048
chat_template = '<|user|>\n{input} <|end|>\n<|assistant|>'
params = og.GeneratorParams(model)
params.set_search_options(**search_options)
generator = og.Generator(model, params)
# Keep asking for input prompts in a loop
while True:
text = input("Input: ")
if not text:
print("Error, input cannot be empty")
continue
if args.timings: started_timestamp = time.time()
# If there is a chat template, use it
prompt = f'{chat_template.format(input=text)}'
input_tokens = tokenizer.encode(prompt)
generator.append_tokens(input_tokens)
if args.verbose: print("Generator created")
if args.verbose: print("Running generation loop ...")
if args.timings:
first = True
new_tokens = []
print()
print("Output: ", end='', flush=True)
try:
while not generator.is_done():
generator.generate_next_token()
if args.timings:
if first:
first_token_timestamp = time.time()
first = False
new_token = generator.get_next_tokens()[0]
print(tokenizer_stream.decode(new_token), end='', flush=True)
if args.timings: new_tokens.append(new_token)
except KeyboardInterrupt:
print(" --control+c pressed, aborting generation--")
print()
print()
if args.timings:
prompt_time = first_token_timestamp - started_timestamp
run_time = time.time() - first_token_timestamp
print(f"Prompt length: {len(input_tokens)}, New tokens: {len(new_tokens)}, Time to first: {(prompt_time):.2f}s, Prompt tokens per second: {len(input_tokens)/prompt_time:.2f} tps, New tokens per second: {len(new_tokens)/run_time:.2f} tps")
if __name__ == "__main__":
parser = argparse.ArgumentParser(argument_default=argparse.SUPPRESS, description="End-to-end AI Question/Answer example for gen-ai")
parser.add_argument('-m', '--model_path', type=str, required=True, help='Onnx model folder path (must contain genai_config.json and model.onnx)')
parser.add_argument('-e', '--execution_provider', type=str, required=True, choices=["cpu", "cuda", "dml"], help="Execution provider to run ONNX model with")
parser.add_argument('-i', '--min_length', type=int, help='Min number of tokens to generate including the prompt')
parser.add_argument('-l', '--max_length', type=int, help='Max number of tokens to generate including the prompt')
parser.add_argument('-ds', '--do_sample', action='store_true', default=False, help='Do random sampling. When false, greedy or beam search are used to generate the output. Defaults to false')
parser.add_argument('-p', '--top_p', type=float, help='Top p probability to sample with')
parser.add_argument('-k', '--top_k', type=int, help='Top k tokens to sample from')
parser.add_argument('-t', '--temperature', type=float, help='Temperature to sample with')
parser.add_argument('-r', '--repetition_penalty', type=float, help='Repetition penalty to sample with')
parser.add_argument('-v', '--verbose', action='store_true', default=False, help='Print verbose output and timing information. Defaults to false')
parser.add_argument('-g', '--timings', action='store_true', default=False, help='Print timing information for each generation step. Defaults to false')
args = parser.parse_args()
main(args)

View File

@@ -0,0 +1 @@
# TODO: business logic for REST API interaction w/ LLM via prompt input