Files
fuzzforge_ai/.github/workflows/benchmark.yml
tduhamel42 32b45f24cb ci: Disable automatic benchmark runs
Benchmarks are not ready for CI/CD yet. Disabled automatic triggers:
- Removed schedule (nightly) trigger
- Removed pull_request trigger

Kept workflow_dispatch for manual testing when benchmarks are ready.

This prevents benchmark failures from blocking PR merges and releases.
2025-10-16 13:50:10 +02:00

166 lines
5.3 KiB
YAML
Raw Blame History

This file contains invisible Unicode characters
This file contains invisible Unicode characters that are indistinguishable to humans but may be processed differently by a computer. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.
name: Benchmarks
on:
# Disabled automatic runs - benchmarks not ready for CI/CD yet
# schedule:
# - cron: '0 2 * * *' # 2 AM UTC every day
# Allow manual trigger for testing
workflow_dispatch:
inputs:
compare_with:
description: 'Baseline commit to compare against (optional)'
required: false
default: ''
# pull_request:
# paths:
# - 'backend/benchmarks/**'
# - 'backend/toolbox/modules/**'
# - '.github/workflows/benchmark.yml'
jobs:
benchmark:
name: Run Benchmarks
runs-on: ubuntu-latest
steps:
- uses: actions/checkout@v4
with:
fetch-depth: 0 # Fetch all history for comparison
- name: Set up Python
uses: actions/setup-python@v5
with:
python-version: '3.11'
- name: Install system dependencies
run: |
sudo apt-get update
sudo apt-get install -y build-essential
- name: Install Python dependencies
working-directory: ./backend
run: |
python -m pip install --upgrade pip
pip install -e ".[dev]"
pip install pytest pytest-asyncio pytest-benchmark pytest-benchmark[histogram]
pip install -e ../sdk # Install SDK for benchmarks
- name: Run benchmarks
working-directory: ./backend
run: |
pytest benchmarks/ \
-v \
--benchmark-only \
--benchmark-json=benchmark-results.json \
--benchmark-histogram=benchmark-histogram
- name: Store benchmark results
uses: actions/upload-artifact@v4
with:
name: benchmark-results-${{ github.run_number }}
path: |
backend/benchmark-results.json
backend/benchmark-histogram.svg
- name: Download baseline benchmarks
if: github.event_name == 'pull_request'
uses: dawidd6/action-download-artifact@v3
continue-on-error: true
with:
workflow: benchmark.yml
branch: ${{ github.base_ref }}
name: benchmark-results-*
path: ./baseline
search_artifacts: true
- name: Compare with baseline
if: github.event_name == 'pull_request' && hashFiles('baseline/benchmark-results.json') != ''
run: |
python -c "
import json
import sys
with open('backend/benchmark-results.json') as f:
current = json.load(f)
with open('baseline/benchmark-results.json') as f:
baseline = json.load(f)
print('\\n## Benchmark Comparison\\n')
print('| Benchmark | Current | Baseline | Change |')
print('|-----------|---------|----------|--------|')
regressions = []
for bench in current['benchmarks']:
name = bench['name']
current_time = bench['stats']['mean']
# Find matching baseline
baseline_bench = next((b for b in baseline['benchmarks'] if b['name'] == name), None)
if baseline_bench:
baseline_time = baseline_bench['stats']['mean']
change = ((current_time - baseline_time) / baseline_time) * 100
print(f'| {name} | {current_time:.4f}s | {baseline_time:.4f}s | {change:+.2f}% |')
# Flag regressions > 10%
if change > 10:
regressions.append((name, change))
else:
print(f'| {name} | {current_time:.4f}s | N/A | NEW |')
if regressions:
print('\\n⚠ **Performance Regressions Detected:**')
for name, change in regressions:
print(f'- {name}: +{change:.2f}%')
sys.exit(1)
else:
print('\\n✅ No significant performance regressions detected')
"
- name: Comment PR with results
if: github.event_name == 'pull_request'
uses: actions/github-script@v7
with:
script: |
const fs = require('fs');
const results = JSON.parse(fs.readFileSync('backend/benchmark-results.json', 'utf8'));
let body = '## Benchmark Results\\n\\n';
body += '| Category | Benchmark | Mean Time | Std Dev |\\n';
body += '|----------|-----------|-----------|---------|\\n';
for (const bench of results.benchmarks) {
const group = bench.group || 'ungrouped';
const name = bench.name.split('::').pop();
const mean = bench.stats.mean.toFixed(4);
const stddev = bench.stats.stddev.toFixed(4);
body += `| ${group} | ${name} | ${mean}s | ${stddev}s |\\n`;
}
body += '\\n📊 Full benchmark results available in artifacts.';
github.rest.issues.createComment({
issue_number: context.issue.number,
owner: context.repo.owner,
repo: context.repo.repo,
body: body
});
benchmark-summary:
name: Benchmark Summary
runs-on: ubuntu-latest
needs: benchmark
if: always()
steps:
- name: Check results
run: |
if [ "${{ needs.benchmark.result }}" != "success" ]; then
echo "Benchmarks failed or detected regressions"
exit 1
fi
echo "Benchmarks completed successfully!"