name: Benchmarks on: # Run on schedule (nightly) schedule: - cron: '0 2 * * *' # 2 AM UTC every day # Allow manual trigger workflow_dispatch: inputs: compare_with: description: 'Baseline commit to compare against (optional)' required: false default: '' # Run on PR when benchmarks are modified pull_request: paths: - 'backend/benchmarks/**' - 'backend/toolbox/modules/**' - '.github/workflows/benchmark.yml' jobs: benchmark: name: Run Benchmarks runs-on: ubuntu-latest steps: - uses: actions/checkout@v4 with: fetch-depth: 0 # Fetch all history for comparison - name: Set up Python uses: actions/setup-python@v5 with: python-version: '3.11' - name: Install system dependencies run: | sudo apt-get update sudo apt-get install -y build-essential - name: Install Python dependencies working-directory: ./backend run: | python -m pip install --upgrade pip pip install -e ".[dev]" pip install pytest pytest-asyncio pytest-benchmark pytest-benchmark[histogram] - name: Run benchmarks working-directory: ./backend run: | pytest benchmarks/ \ -v \ --benchmark-only \ --benchmark-json=benchmark-results.json \ --benchmark-histogram=benchmark-histogram - name: Store benchmark results uses: actions/upload-artifact@v4 with: name: benchmark-results-${{ github.run_number }} path: | backend/benchmark-results.json backend/benchmark-histogram.svg - name: Download baseline benchmarks if: github.event_name == 'pull_request' uses: dawidd6/action-download-artifact@v3 continue-on-error: true with: workflow: benchmark.yml branch: ${{ github.base_ref }} name: benchmark-results-* path: ./baseline search_artifacts: true - name: Compare with baseline if: github.event_name == 'pull_request' && hashFiles('baseline/benchmark-results.json') != '' run: | python -c " import json import sys with open('backend/benchmark-results.json') as f: current = json.load(f) with open('baseline/benchmark-results.json') as f: baseline = json.load(f) print('\\n## Benchmark Comparison\\n') print('| Benchmark | Current | Baseline | Change |') print('|-----------|---------|----------|--------|') regressions = [] for bench in current['benchmarks']: name = bench['name'] current_time = bench['stats']['mean'] # Find matching baseline baseline_bench = next((b for b in baseline['benchmarks'] if b['name'] == name), None) if baseline_bench: baseline_time = baseline_bench['stats']['mean'] change = ((current_time - baseline_time) / baseline_time) * 100 print(f'| {name} | {current_time:.4f}s | {baseline_time:.4f}s | {change:+.2f}% |') # Flag regressions > 10% if change > 10: regressions.append((name, change)) else: print(f'| {name} | {current_time:.4f}s | N/A | NEW |') if regressions: print('\\n⚠️ **Performance Regressions Detected:**') for name, change in regressions: print(f'- {name}: +{change:.2f}%') sys.exit(1) else: print('\\n✅ No significant performance regressions detected') " - name: Comment PR with results if: github.event_name == 'pull_request' uses: actions/github-script@v7 with: script: | const fs = require('fs'); const results = JSON.parse(fs.readFileSync('backend/benchmark-results.json', 'utf8')); let body = '## Benchmark Results\\n\\n'; body += '| Category | Benchmark | Mean Time | Std Dev |\\n'; body += '|----------|-----------|-----------|---------|\\n'; for (const bench of results.benchmarks) { const group = bench.group || 'ungrouped'; const name = bench.name.split('::').pop(); const mean = bench.stats.mean.toFixed(4); const stddev = bench.stats.stddev.toFixed(4); body += `| ${group} | ${name} | ${mean}s | ${stddev}s |\\n`; } body += '\\n📊 Full benchmark results available in artifacts.'; github.rest.issues.createComment({ issue_number: context.issue.number, owner: context.repo.owner, repo: context.repo.repo, body: body }); benchmark-summary: name: Benchmark Summary runs-on: ubuntu-latest needs: benchmark if: always() steps: - name: Check results run: | if [ "${{ needs.benchmark.result }}" != "success" ]; then echo "Benchmarks failed or detected regressions" exit 1 fi echo "Benchmarks completed successfully!"