From a91cfd0a818fa68cf9e6ebd1d7fcbf69994f0c74 Mon Sep 17 00:00:00 2001 From: Adam Wilson Date: Thu, 3 Apr 2025 20:32:13 -0600 Subject: [PATCH] initial port from private repo --- .github/workflows/llmsecops-cicd.yml | 39 ++++++ .gitignore | 187 +++++++++++++++++++++++++++ LICENSE | 53 ++++++++ README.md | 12 +- llm_setup.sh | 21 +++ tests/api/PathDispatcher.py | 22 ++++ tests/api/server.py | 24 ++++ tests/llm/phi3-qa.py | 98 ++++++++++++++ tests/llm/phi3_interface.py | 1 + 9 files changed, 456 insertions(+), 1 deletion(-) create mode 100644 .github/workflows/llmsecops-cicd.yml create mode 100644 .gitignore create mode 100644 LICENSE create mode 100755 llm_setup.sh create mode 100644 tests/api/PathDispatcher.py create mode 100644 tests/api/server.py create mode 100644 tests/llm/phi3-qa.py create mode 100644 tests/llm/phi3_interface.py diff --git a/.github/workflows/llmsecops-cicd.yml b/.github/workflows/llmsecops-cicd.yml new file mode 100644 index 000000000..b0492a415 --- /dev/null +++ b/.github/workflows/llmsecops-cicd.yml @@ -0,0 +1,39 @@ +name: REST Server + +on: + # push: + # branches: [ "main" ] + # pull_request: + # branches: [ "main" ] + workflow_dispatch: + +jobs: + build: + runs-on: ubuntu-latest + + steps: + - uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683 + + - name: Set up git LFS + run: git lfs install + + - name: Set up Python + uses: actions/setup-python@v3 + with: + python-version: '3.12' + + - name: Download Huggingface CLI + run: | + pip install huggingface-hub[cli] + huggingface-cli download microsoft/Phi-3-mini-4k-instruct-onnx --include cpu_and_mobile/cpu-int4-rtn-block-32-acc-level-4/* --local-dir . + pip install onnxruntime-genai + curl https://raw.githubusercontent.com/microsoft/onnxruntime-genai/main/examples/python/phi3-qa.py -o phi3-qa.py + python phi3-qa.py -m cpu_and_mobile/cpu-int4-rtn-block-32-acc-level-4 -e cpu -v + + - name: Run REST API server + run: | + python ${{ github.workspace }}/tests/api/server.py & + + - name: Test API call + run: | + curl -i localhost:9999/hello diff --git a/.gitignore b/.gitignore new file mode 100644 index 000000000..6c65f4fce --- /dev/null +++ b/.gitignore @@ -0,0 +1,187 @@ +# Byte-compiled / optimized / DLL files +__pycache__/ +*.py[cod] +*$py.class + +# C extensions +*.so + +# Distribution / packaging +.Python +build/ +develop-eggs/ +dist/ +downloads/ +eggs/ +.eggs/ +lib/ +lib64/ +parts/ +sdist/ +var/ +wheels/ +share/python-wheels/ +*.egg-info/ +.installed.cfg +*.egg +MANIFEST + +# PyInstaller +# Usually these files are written by a python script from a template +# before PyInstaller builds the exe, so as to inject date/other infos into it. +*.manifest +*.spec + +# Installer logs +pip-log.txt +pip-delete-this-directory.txt + +# Unit test / coverage reports +htmlcov/ +.tox/ +.nox/ +.coverage +.coverage.* +.cache +nosetests.xml +coverage.xml +*.cover +*.py,cover +.hypothesis/ +.pytest_cache/ +cover/ + +# Translations +*.mo +*.pot + +# Django stuff: +*.log +local_settings.py +db.sqlite3 +db.sqlite3-journal + +# Flask stuff: +instance/ +.webassets-cache + +# Scrapy stuff: +.scrapy + +# Sphinx documentation +docs/_build/ + +# PyBuilder +.pybuilder/ +target/ + +# Jupyter Notebook +.ipynb_checkpoints + +# IPython +profile_default/ +ipython_config.py + +# pyenv +# For a library or package, you might want to ignore these files since the code is +# intended to run in multiple environments; otherwise, check them in: +# .python-version + +# pipenv +# According to pypa/pipenv#598, it is recommended to include Pipfile.lock in version control. +# However, in case of collaboration, if having platform-specific dependencies or dependencies +# having no cross-platform support, pipenv may install dependencies that don't work, or not +# install all needed dependencies. +#Pipfile.lock + +# UV +# Similar to Pipfile.lock, it is generally recommended to include uv.lock in version control. +# This is especially recommended for binary packages to ensure reproducibility, and is more +# commonly ignored for libraries. +#uv.lock + +# poetry +# Similar to Pipfile.lock, it is generally recommended to include poetry.lock in version control. +# This is especially recommended for binary packages to ensure reproducibility, and is more +# commonly ignored for libraries. +# https://python-poetry.org/docs/basic-usage/#commit-your-poetrylock-file-to-version-control +#poetry.lock + +# pdm +# Similar to Pipfile.lock, it is generally recommended to include pdm.lock in version control. +#pdm.lock +# pdm stores project-wide configurations in .pdm.toml, but it is recommended to not include it +# in version control. +# https://pdm.fming.dev/latest/usage/project/#working-with-version-control +.pdm.toml +.pdm-python +.pdm-build/ + +# PEP 582; used by e.g. github.com/David-OConnor/pyflow and github.com/pdm-project/pdm +__pypackages__/ + +# Celery stuff +celerybeat-schedule +celerybeat.pid + +# SageMath parsed files +*.sage.py + +# Environments +.env +.venv +env/ +venv/ +ENV/ +env.bak/ +venv.bak/ + +# Spyder project settings +.spyderproject +.spyproject + +# Rope project settings +.ropeproject + +# mkdocs documentation +/site + +# mypy +.mypy_cache/ +.dmypy.json +dmypy.json + +# Pyre type checker +.pyre/ + +# pytype static type analyzer +.pytype/ + +# Cython debug symbols +cython_debug/ + +# PyCharm +# JetBrains specific template is maintained in a separate JetBrains.gitignore that can +# be found at https://github.com/github/gitignore/blob/main/Global/JetBrains.gitignore +# and can be added to the global gitignore or merged into this file. For a more nuclear +# option (not recommended) you can uncomment the following to ignore the entire idea folder. +#.idea/ + +# Ruff stuff: +.ruff_cache/ + +# PyPI configuration file +.pypirc + +# HuggingFace / Microsoft LLM supporting files +# (these are downloaded for local development via bash script, or inside GH Action workflow context) +tests/llm/cpu_and_mobile/cpu-int4-rtn-block-32-acc-level-4/added_tokens.json +tests/llm/cpu_and_mobile/cpu-int4-rtn-block-32-acc-level-4/config.json +tests/llm/cpu_and_mobile/cpu-int4-rtn-block-32-acc-level-4/configuration_phi3.py +tests/llm/cpu_and_mobile/cpu-int4-rtn-block-32-acc-level-4/genai_config.json +tests/llm/cpu_and_mobile/cpu-int4-rtn-block-32-acc-level-4/phi3-mini-4k-instruct-cpu-int4-rtn-block-32-acc-level-4.onnx +tests/llm/cpu_and_mobile/cpu-int4-rtn-block-32-acc-level-4/phi3-mini-4k-instruct-cpu-int4-rtn-block-32-acc-level-4.onnx.data +tests/llm/cpu_and_mobile/cpu-int4-rtn-block-32-acc-level-4/special_tokens_map.json +tests/llm/cpu_and_mobile/cpu-int4-rtn-block-32-acc-level-4/tokenizer_config.json +tests/llm/cpu_and_mobile/cpu-int4-rtn-block-32-acc-level-4/tokenizer.json +tests/llm/cpu_and_mobile/cpu-int4-rtn-block-32-acc-level-4/tokenizer.model diff --git a/LICENSE b/LICENSE new file mode 100644 index 000000000..efff28d58 --- /dev/null +++ b/LICENSE @@ -0,0 +1,53 @@ +MIT License + +Copyright (c) 2025 Adam Wilson + +Permission is hereby granted, free of charge, to any person obtaining a copy +of this software and associated documentation files (the "Software"), to deal +in the Software without restriction, including without limitation the rights +to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +copies of the Software, and to permit persons to whom the Software is +furnished to do so, subject to the following conditions: + +The above copyright notice and this permission notice shall be included in all +copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE +SOFTWARE. + + +------------------------------------------------------------------------------ + +Additional source code is based on work by Microsoft: +https://github.com/microsoft/onnxruntime-genai + +That repos is made available under the MIT License, referenced below: +https://github.com/microsoft/onnxruntime-genai/blob/main/LICENSE + + + MIT License + + Copyright (c) Microsoft Corporation. + + Permission is hereby granted, free of charge, to any person obtaining a copy + of this software and associated documentation files (the "Software"), to deal + in the Software without restriction, including without limitation the rights + to use, copy, modify, merge, publish, distribute, sublicense, and/or sell + copies of the Software, and to permit persons to whom the Software is + furnished to do so, subject to the following conditions: + + The above copyright notice and this permission notice shall be included in all + copies or substantial portions of the Software. + + THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + SOFTWARE \ No newline at end of file diff --git a/README.md b/README.md index 5f782932f..e49e81383 100644 --- a/README.md +++ b/README.md @@ -1 +1,11 @@ -# llmsecops-research \ No newline at end of file +# LLMSecOps Research + +## Overview + +This repo supports graduate research conducted by Adam Wilson for the M.Sc., Information Security Engineering program at SANS Technology Institute. + +## Local setup (Linux Ubuntu) + +```sh +$ sudo ./llm_setup.sh +``` \ No newline at end of file diff --git a/llm_setup.sh b/llm_setup.sh new file mode 100755 index 000000000..0e0ed55e7 --- /dev/null +++ b/llm_setup.sh @@ -0,0 +1,21 @@ +#!/usr/bin/bash + +# create Python virtual environment +virtualenv --python="/usr/bin/python3.12" .env +source .env/bin/activate + +# the ONNX model/data require git Large File System support +git lfs install + +# get the system-under-test LLM dependencies from HuggingFace / Microsoft +pip3.12 install huggingface-hub[cli] +cd ./tests/llm +huggingface-cli download microsoft/Phi-3-mini-4k-instruct-onnx --include cpu_and_mobile/cpu-int4-rtn-block-32-acc-level-4/* --local-dir . +pip3.12 install onnxruntime-genai + +if ! [[ -e ./phi3-qa.py ]] +then + curl https://raw.githubusercontent.com/microsoft/onnxruntime-genai/main/examples/python/phi3-qa.py -o ./phi3-qa.py +fi + +python3.12 ./phi3-qa.py -m ./cpu_and_mobile/cpu-int4-rtn-block-32-acc-level-4 -e cpu -v \ No newline at end of file diff --git a/tests/api/PathDispatcher.py b/tests/api/PathDispatcher.py new file mode 100644 index 000000000..0649b8201 --- /dev/null +++ b/tests/api/PathDispatcher.py @@ -0,0 +1,22 @@ +import cgi + + +class PathDispatcher: + def __init__(self): + self.routes = {} + + def notfound_404(self, env, start_response): + start_response('404 Not Found', [ ('Content-Type', 'text/plain') ]) + return [b'Not Found'] + + def __call__(self, env, start_response): + path = env.get('PATH_INFO') + params = cgi.FieldStorage(env.get('wsgi.output'), environ=env) + method = env.get('REQUEST_METHOD').lower() + env['params'] = { key: params.getvalue(key) for key in params } + handler = self.routes.get((method,path), self.notfound_404) + return handler(env, start_response) + + def register(self, method, path, function): + self.routes[method.lower(), path] = function + return function diff --git a/tests/api/server.py b/tests/api/server.py new file mode 100644 index 000000000..9abc4565d --- /dev/null +++ b/tests/api/server.py @@ -0,0 +1,24 @@ +from PathDispatcher import PathDispatcher +from wsgiref.simple_server import make_server + + +class RestApiServer: + def __init__(self): + pass + + def response_function(self, environ, start_response): + start_response('200 OK', [('Content-Type','text/html')]) + yield str(f'testing...\n').encode('utf-8') + + def listen(self): + port = 9999 + dispatcher = PathDispatcher() + dispatcher.register('GET', '/hello', self.response_function) + wsgi_srv = make_server('', port, dispatcher) + print(f'listening on port {port}...') + wsgi_srv.serve_forever() + + +if __name__ == '__main__': + srv = RestApiServer() + srv.listen() diff --git a/tests/llm/phi3-qa.py b/tests/llm/phi3-qa.py new file mode 100644 index 000000000..56cc8a82d --- /dev/null +++ b/tests/llm/phi3-qa.py @@ -0,0 +1,98 @@ +import onnxruntime_genai as og +import argparse +import time + +def main(args): + if args.verbose: print("Loading model...") + if args.timings: + started_timestamp = 0 + first_token_timestamp = 0 + + config = og.Config(args.model_path) + config.clear_providers() + if args.execution_provider != "cpu": + if args.verbose: print(f"Setting model to {args.execution_provider}") + config.append_provider(args.execution_provider) + model = og.Model(config) + + if args.verbose: print("Model loaded") + + tokenizer = og.Tokenizer(model) + tokenizer_stream = tokenizer.create_stream() + if args.verbose: print("Tokenizer created") + if args.verbose: print() + search_options = {name:getattr(args, name) for name in ['do_sample', 'max_length', 'min_length', 'top_p', 'top_k', 'temperature', 'repetition_penalty'] if name in args} + + # Set the max length to something sensible by default, unless it is specified by the user, + # since otherwise it will be set to the entire context length + if 'max_length' not in search_options: + search_options['max_length'] = 2048 + + chat_template = '<|user|>\n{input} <|end|>\n<|assistant|>' + + params = og.GeneratorParams(model) + params.set_search_options(**search_options) + generator = og.Generator(model, params) + + # Keep asking for input prompts in a loop + while True: + text = input("Input: ") + if not text: + print("Error, input cannot be empty") + continue + + if args.timings: started_timestamp = time.time() + + # If there is a chat template, use it + prompt = f'{chat_template.format(input=text)}' + + input_tokens = tokenizer.encode(prompt) + + generator.append_tokens(input_tokens) + if args.verbose: print("Generator created") + + if args.verbose: print("Running generation loop ...") + if args.timings: + first = True + new_tokens = [] + + print() + print("Output: ", end='', flush=True) + + try: + while not generator.is_done(): + generator.generate_next_token() + if args.timings: + if first: + first_token_timestamp = time.time() + first = False + + new_token = generator.get_next_tokens()[0] + print(tokenizer_stream.decode(new_token), end='', flush=True) + if args.timings: new_tokens.append(new_token) + except KeyboardInterrupt: + print(" --control+c pressed, aborting generation--") + print() + print() + + if args.timings: + prompt_time = first_token_timestamp - started_timestamp + run_time = time.time() - first_token_timestamp + print(f"Prompt length: {len(input_tokens)}, New tokens: {len(new_tokens)}, Time to first: {(prompt_time):.2f}s, Prompt tokens per second: {len(input_tokens)/prompt_time:.2f} tps, New tokens per second: {len(new_tokens)/run_time:.2f} tps") + + +if __name__ == "__main__": + parser = argparse.ArgumentParser(argument_default=argparse.SUPPRESS, description="End-to-end AI Question/Answer example for gen-ai") + parser.add_argument('-m', '--model_path', type=str, required=True, help='Onnx model folder path (must contain genai_config.json and model.onnx)') + parser.add_argument('-e', '--execution_provider', type=str, required=True, choices=["cpu", "cuda", "dml"], help="Execution provider to run ONNX model with") + parser.add_argument('-i', '--min_length', type=int, help='Min number of tokens to generate including the prompt') + parser.add_argument('-l', '--max_length', type=int, help='Max number of tokens to generate including the prompt') + parser.add_argument('-ds', '--do_sample', action='store_true', default=False, help='Do random sampling. When false, greedy or beam search are used to generate the output. Defaults to false') + parser.add_argument('-p', '--top_p', type=float, help='Top p probability to sample with') + parser.add_argument('-k', '--top_k', type=int, help='Top k tokens to sample from') + parser.add_argument('-t', '--temperature', type=float, help='Temperature to sample with') + parser.add_argument('-r', '--repetition_penalty', type=float, help='Repetition penalty to sample with') + parser.add_argument('-v', '--verbose', action='store_true', default=False, help='Print verbose output and timing information. Defaults to false') + parser.add_argument('-g', '--timings', action='store_true', default=False, help='Print timing information for each generation step. Defaults to false') + args = parser.parse_args() + main(args) diff --git a/tests/llm/phi3_interface.py b/tests/llm/phi3_interface.py new file mode 100644 index 000000000..ec8805df0 --- /dev/null +++ b/tests/llm/phi3_interface.py @@ -0,0 +1 @@ +# TODO: business logic for REST API interaction w/ LLM via prompt input \ No newline at end of file