Files
fuzzforge_ai/.github/workflows/benchmark.yml
tduhamel42 11b3e6db6a fix: Resolve CI failures for v0.7.0 release
Fix lint errors:
- Remove unused Optional import from gitleaks workflow
- Remove unused logging import from trufflehog activities

Fix documentation broken links:
- Update workspace-isolation links to use /docs/ prefix in resource-management.md
- Update workspace-isolation links to use /docs/ prefix in create-workflow.md

Fix benchmark dependency:
- Add fuzzforge-sdk installation to benchmark workflow
- SDK is required for bench_comparison.py import

All CI checks should now pass.
2025-10-16 12:55:20 +02:00

167 lines
5.3 KiB
YAML
Raw Blame History

This file contains invisible Unicode characters
This file contains invisible Unicode characters that are indistinguishable to humans but may be processed differently by a computer. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.
name: Benchmarks
on:
# Run on schedule (nightly)
schedule:
- cron: '0 2 * * *' # 2 AM UTC every day
# Allow manual trigger
workflow_dispatch:
inputs:
compare_with:
description: 'Baseline commit to compare against (optional)'
required: false
default: ''
# Run on PR when benchmarks are modified
pull_request:
paths:
- 'backend/benchmarks/**'
- 'backend/toolbox/modules/**'
- '.github/workflows/benchmark.yml'
jobs:
benchmark:
name: Run Benchmarks
runs-on: ubuntu-latest
steps:
- uses: actions/checkout@v4
with:
fetch-depth: 0 # Fetch all history for comparison
- name: Set up Python
uses: actions/setup-python@v5
with:
python-version: '3.11'
- name: Install system dependencies
run: |
sudo apt-get update
sudo apt-get install -y build-essential
- name: Install Python dependencies
working-directory: ./backend
run: |
python -m pip install --upgrade pip
pip install -e ".[dev]"
pip install pytest pytest-asyncio pytest-benchmark pytest-benchmark[histogram]
pip install -e ../sdk # Install SDK for benchmarks
- name: Run benchmarks
working-directory: ./backend
run: |
pytest benchmarks/ \
-v \
--benchmark-only \
--benchmark-json=benchmark-results.json \
--benchmark-histogram=benchmark-histogram
- name: Store benchmark results
uses: actions/upload-artifact@v4
with:
name: benchmark-results-${{ github.run_number }}
path: |
backend/benchmark-results.json
backend/benchmark-histogram.svg
- name: Download baseline benchmarks
if: github.event_name == 'pull_request'
uses: dawidd6/action-download-artifact@v3
continue-on-error: true
with:
workflow: benchmark.yml
branch: ${{ github.base_ref }}
name: benchmark-results-*
path: ./baseline
search_artifacts: true
- name: Compare with baseline
if: github.event_name == 'pull_request' && hashFiles('baseline/benchmark-results.json') != ''
run: |
python -c "
import json
import sys
with open('backend/benchmark-results.json') as f:
current = json.load(f)
with open('baseline/benchmark-results.json') as f:
baseline = json.load(f)
print('\\n## Benchmark Comparison\\n')
print('| Benchmark | Current | Baseline | Change |')
print('|-----------|---------|----------|--------|')
regressions = []
for bench in current['benchmarks']:
name = bench['name']
current_time = bench['stats']['mean']
# Find matching baseline
baseline_bench = next((b for b in baseline['benchmarks'] if b['name'] == name), None)
if baseline_bench:
baseline_time = baseline_bench['stats']['mean']
change = ((current_time - baseline_time) / baseline_time) * 100
print(f'| {name} | {current_time:.4f}s | {baseline_time:.4f}s | {change:+.2f}% |')
# Flag regressions > 10%
if change > 10:
regressions.append((name, change))
else:
print(f'| {name} | {current_time:.4f}s | N/A | NEW |')
if regressions:
print('\\n⚠ **Performance Regressions Detected:**')
for name, change in regressions:
print(f'- {name}: +{change:.2f}%')
sys.exit(1)
else:
print('\\n✅ No significant performance regressions detected')
"
- name: Comment PR with results
if: github.event_name == 'pull_request'
uses: actions/github-script@v7
with:
script: |
const fs = require('fs');
const results = JSON.parse(fs.readFileSync('backend/benchmark-results.json', 'utf8'));
let body = '## Benchmark Results\\n\\n';
body += '| Category | Benchmark | Mean Time | Std Dev |\\n';
body += '|----------|-----------|-----------|---------|\\n';
for (const bench of results.benchmarks) {
const group = bench.group || 'ungrouped';
const name = bench.name.split('::').pop();
const mean = bench.stats.mean.toFixed(4);
const stddev = bench.stats.stddev.toFixed(4);
body += `| ${group} | ${name} | ${mean}s | ${stddev}s |\\n`;
}
body += '\\n📊 Full benchmark results available in artifacts.';
github.rest.issues.createComment({
issue_number: context.issue.number,
owner: context.repo.owner,
repo: context.repo.repo,
body: body
});
benchmark-summary:
name: Benchmark Summary
runs-on: ubuntu-latest
needs: benchmark
if: always()
steps:
- name: Check results
run: |
if [ "${{ needs.benchmark.result }}" != "success" ]; then
echo "Benchmarks failed or detected regressions"
exit 1
fi
echo "Benchmarks completed successfully!"