mirror of
https://github.com/FuzzingLabs/fuzzforge_ai.git
synced 2026-02-12 20:32:46 +00:00
Benchmarks are not ready for CI/CD yet. Disabled automatic triggers: - Removed schedule (nightly) trigger - Removed pull_request trigger Kept workflow_dispatch for manual testing when benchmarks are ready. This prevents benchmark failures from blocking PR merges and releases.
166 lines
5.3 KiB
YAML
166 lines
5.3 KiB
YAML
name: Benchmarks
|
||
|
||
on:
|
||
# Disabled automatic runs - benchmarks not ready for CI/CD yet
|
||
# schedule:
|
||
# - cron: '0 2 * * *' # 2 AM UTC every day
|
||
|
||
# Allow manual trigger for testing
|
||
workflow_dispatch:
|
||
inputs:
|
||
compare_with:
|
||
description: 'Baseline commit to compare against (optional)'
|
||
required: false
|
||
default: ''
|
||
|
||
# pull_request:
|
||
# paths:
|
||
# - 'backend/benchmarks/**'
|
||
# - 'backend/toolbox/modules/**'
|
||
# - '.github/workflows/benchmark.yml'
|
||
|
||
jobs:
|
||
benchmark:
|
||
name: Run Benchmarks
|
||
runs-on: ubuntu-latest
|
||
|
||
steps:
|
||
- uses: actions/checkout@v4
|
||
with:
|
||
fetch-depth: 0 # Fetch all history for comparison
|
||
|
||
- name: Set up Python
|
||
uses: actions/setup-python@v5
|
||
with:
|
||
python-version: '3.11'
|
||
|
||
- name: Install system dependencies
|
||
run: |
|
||
sudo apt-get update
|
||
sudo apt-get install -y build-essential
|
||
|
||
- name: Install Python dependencies
|
||
working-directory: ./backend
|
||
run: |
|
||
python -m pip install --upgrade pip
|
||
pip install -e ".[dev]"
|
||
pip install pytest pytest-asyncio pytest-benchmark pytest-benchmark[histogram]
|
||
pip install -e ../sdk # Install SDK for benchmarks
|
||
|
||
- name: Run benchmarks
|
||
working-directory: ./backend
|
||
run: |
|
||
pytest benchmarks/ \
|
||
-v \
|
||
--benchmark-only \
|
||
--benchmark-json=benchmark-results.json \
|
||
--benchmark-histogram=benchmark-histogram
|
||
|
||
- name: Store benchmark results
|
||
uses: actions/upload-artifact@v4
|
||
with:
|
||
name: benchmark-results-${{ github.run_number }}
|
||
path: |
|
||
backend/benchmark-results.json
|
||
backend/benchmark-histogram.svg
|
||
|
||
- name: Download baseline benchmarks
|
||
if: github.event_name == 'pull_request'
|
||
uses: dawidd6/action-download-artifact@v3
|
||
continue-on-error: true
|
||
with:
|
||
workflow: benchmark.yml
|
||
branch: ${{ github.base_ref }}
|
||
name: benchmark-results-*
|
||
path: ./baseline
|
||
search_artifacts: true
|
||
|
||
- name: Compare with baseline
|
||
if: github.event_name == 'pull_request' && hashFiles('baseline/benchmark-results.json') != ''
|
||
run: |
|
||
python -c "
|
||
import json
|
||
import sys
|
||
|
||
with open('backend/benchmark-results.json') as f:
|
||
current = json.load(f)
|
||
|
||
with open('baseline/benchmark-results.json') as f:
|
||
baseline = json.load(f)
|
||
|
||
print('\\n## Benchmark Comparison\\n')
|
||
print('| Benchmark | Current | Baseline | Change |')
|
||
print('|-----------|---------|----------|--------|')
|
||
|
||
regressions = []
|
||
|
||
for bench in current['benchmarks']:
|
||
name = bench['name']
|
||
current_time = bench['stats']['mean']
|
||
|
||
# Find matching baseline
|
||
baseline_bench = next((b for b in baseline['benchmarks'] if b['name'] == name), None)
|
||
if baseline_bench:
|
||
baseline_time = baseline_bench['stats']['mean']
|
||
change = ((current_time - baseline_time) / baseline_time) * 100
|
||
|
||
print(f'| {name} | {current_time:.4f}s | {baseline_time:.4f}s | {change:+.2f}% |')
|
||
|
||
# Flag regressions > 10%
|
||
if change > 10:
|
||
regressions.append((name, change))
|
||
else:
|
||
print(f'| {name} | {current_time:.4f}s | N/A | NEW |')
|
||
|
||
if regressions:
|
||
print('\\n⚠️ **Performance Regressions Detected:**')
|
||
for name, change in regressions:
|
||
print(f'- {name}: +{change:.2f}%')
|
||
sys.exit(1)
|
||
else:
|
||
print('\\n✅ No significant performance regressions detected')
|
||
"
|
||
|
||
- name: Comment PR with results
|
||
if: github.event_name == 'pull_request'
|
||
uses: actions/github-script@v7
|
||
with:
|
||
script: |
|
||
const fs = require('fs');
|
||
const results = JSON.parse(fs.readFileSync('backend/benchmark-results.json', 'utf8'));
|
||
|
||
let body = '## Benchmark Results\\n\\n';
|
||
body += '| Category | Benchmark | Mean Time | Std Dev |\\n';
|
||
body += '|----------|-----------|-----------|---------|\\n';
|
||
|
||
for (const bench of results.benchmarks) {
|
||
const group = bench.group || 'ungrouped';
|
||
const name = bench.name.split('::').pop();
|
||
const mean = bench.stats.mean.toFixed(4);
|
||
const stddev = bench.stats.stddev.toFixed(4);
|
||
body += `| ${group} | ${name} | ${mean}s | ${stddev}s |\\n`;
|
||
}
|
||
|
||
body += '\\n📊 Full benchmark results available in artifacts.';
|
||
|
||
github.rest.issues.createComment({
|
||
issue_number: context.issue.number,
|
||
owner: context.repo.owner,
|
||
repo: context.repo.repo,
|
||
body: body
|
||
});
|
||
|
||
benchmark-summary:
|
||
name: Benchmark Summary
|
||
runs-on: ubuntu-latest
|
||
needs: benchmark
|
||
if: always()
|
||
steps:
|
||
- name: Check results
|
||
run: |
|
||
if [ "${{ needs.benchmark.result }}" != "success" ]; then
|
||
echo "Benchmarks failed or detected regressions"
|
||
exit 1
|
||
fi
|
||
echo "Benchmarks completed successfully!"
|