mirror of
https://github.com/FuzzingLabs/fuzzforge_ai.git
synced 2026-05-20 22:54:45 +02:00
32b45f24cb
Benchmarks are not ready for CI/CD yet. Disabled automatic triggers: - Removed schedule (nightly) trigger - Removed pull_request trigger Kept workflow_dispatch for manual testing when benchmarks are ready. This prevents benchmark failures from blocking PR merges and releases.
166 lines
5.3 KiB
YAML
166 lines
5.3 KiB
YAML
name: Benchmarks
|
|
|
|
on:
|
|
# Disabled automatic runs - benchmarks not ready for CI/CD yet
|
|
# schedule:
|
|
# - cron: '0 2 * * *' # 2 AM UTC every day
|
|
|
|
# Allow manual trigger for testing
|
|
workflow_dispatch:
|
|
inputs:
|
|
compare_with:
|
|
description: 'Baseline commit to compare against (optional)'
|
|
required: false
|
|
default: ''
|
|
|
|
# pull_request:
|
|
# paths:
|
|
# - 'backend/benchmarks/**'
|
|
# - 'backend/toolbox/modules/**'
|
|
# - '.github/workflows/benchmark.yml'
|
|
|
|
jobs:
|
|
benchmark:
|
|
name: Run Benchmarks
|
|
runs-on: ubuntu-latest
|
|
|
|
steps:
|
|
- uses: actions/checkout@v4
|
|
with:
|
|
fetch-depth: 0 # Fetch all history for comparison
|
|
|
|
- name: Set up Python
|
|
uses: actions/setup-python@v5
|
|
with:
|
|
python-version: '3.11'
|
|
|
|
- name: Install system dependencies
|
|
run: |
|
|
sudo apt-get update
|
|
sudo apt-get install -y build-essential
|
|
|
|
- name: Install Python dependencies
|
|
working-directory: ./backend
|
|
run: |
|
|
python -m pip install --upgrade pip
|
|
pip install -e ".[dev]"
|
|
pip install pytest pytest-asyncio pytest-benchmark pytest-benchmark[histogram]
|
|
pip install -e ../sdk # Install SDK for benchmarks
|
|
|
|
- name: Run benchmarks
|
|
working-directory: ./backend
|
|
run: |
|
|
pytest benchmarks/ \
|
|
-v \
|
|
--benchmark-only \
|
|
--benchmark-json=benchmark-results.json \
|
|
--benchmark-histogram=benchmark-histogram
|
|
|
|
- name: Store benchmark results
|
|
uses: actions/upload-artifact@v4
|
|
with:
|
|
name: benchmark-results-${{ github.run_number }}
|
|
path: |
|
|
backend/benchmark-results.json
|
|
backend/benchmark-histogram.svg
|
|
|
|
- name: Download baseline benchmarks
|
|
if: github.event_name == 'pull_request'
|
|
uses: dawidd6/action-download-artifact@v3
|
|
continue-on-error: true
|
|
with:
|
|
workflow: benchmark.yml
|
|
branch: ${{ github.base_ref }}
|
|
name: benchmark-results-*
|
|
path: ./baseline
|
|
search_artifacts: true
|
|
|
|
- name: Compare with baseline
|
|
if: github.event_name == 'pull_request' && hashFiles('baseline/benchmark-results.json') != ''
|
|
run: |
|
|
python -c "
|
|
import json
|
|
import sys
|
|
|
|
with open('backend/benchmark-results.json') as f:
|
|
current = json.load(f)
|
|
|
|
with open('baseline/benchmark-results.json') as f:
|
|
baseline = json.load(f)
|
|
|
|
print('\\n## Benchmark Comparison\\n')
|
|
print('| Benchmark | Current | Baseline | Change |')
|
|
print('|-----------|---------|----------|--------|')
|
|
|
|
regressions = []
|
|
|
|
for bench in current['benchmarks']:
|
|
name = bench['name']
|
|
current_time = bench['stats']['mean']
|
|
|
|
# Find matching baseline
|
|
baseline_bench = next((b for b in baseline['benchmarks'] if b['name'] == name), None)
|
|
if baseline_bench:
|
|
baseline_time = baseline_bench['stats']['mean']
|
|
change = ((current_time - baseline_time) / baseline_time) * 100
|
|
|
|
print(f'| {name} | {current_time:.4f}s | {baseline_time:.4f}s | {change:+.2f}% |')
|
|
|
|
# Flag regressions > 10%
|
|
if change > 10:
|
|
regressions.append((name, change))
|
|
else:
|
|
print(f'| {name} | {current_time:.4f}s | N/A | NEW |')
|
|
|
|
if regressions:
|
|
print('\\n⚠️ **Performance Regressions Detected:**')
|
|
for name, change in regressions:
|
|
print(f'- {name}: +{change:.2f}%')
|
|
sys.exit(1)
|
|
else:
|
|
print('\\n✅ No significant performance regressions detected')
|
|
"
|
|
|
|
- name: Comment PR with results
|
|
if: github.event_name == 'pull_request'
|
|
uses: actions/github-script@v7
|
|
with:
|
|
script: |
|
|
const fs = require('fs');
|
|
const results = JSON.parse(fs.readFileSync('backend/benchmark-results.json', 'utf8'));
|
|
|
|
let body = '## Benchmark Results\\n\\n';
|
|
body += '| Category | Benchmark | Mean Time | Std Dev |\\n';
|
|
body += '|----------|-----------|-----------|---------|\\n';
|
|
|
|
for (const bench of results.benchmarks) {
|
|
const group = bench.group || 'ungrouped';
|
|
const name = bench.name.split('::').pop();
|
|
const mean = bench.stats.mean.toFixed(4);
|
|
const stddev = bench.stats.stddev.toFixed(4);
|
|
body += `| ${group} | ${name} | ${mean}s | ${stddev}s |\\n`;
|
|
}
|
|
|
|
body += '\\n📊 Full benchmark results available in artifacts.';
|
|
|
|
github.rest.issues.createComment({
|
|
issue_number: context.issue.number,
|
|
owner: context.repo.owner,
|
|
repo: context.repo.repo,
|
|
body: body
|
|
});
|
|
|
|
benchmark-summary:
|
|
name: Benchmark Summary
|
|
runs-on: ubuntu-latest
|
|
needs: benchmark
|
|
if: always()
|
|
steps:
|
|
- name: Check results
|
|
run: |
|
|
if [ "${{ needs.benchmark.result }}" != "success" ]; then
|
|
echo "Benchmarks failed or detected regressions"
|
|
exit 1
|
|
fi
|
|
echo "Benchmarks completed successfully!"
|