Files
fuzzforge_ai/fuzzforge-modules/harness-tester/src/module/feedback.py

149 lines
4.2 KiB
Python

"""Feedback types and schemas for harness testing."""
from enum import Enum
from typing import Any
from pydantic import BaseModel, Field
class FeedbackSeverity(str, Enum):
"""Severity levels for feedback issues."""
CRITICAL = "critical" # Blocks execution (compilation errors, crashes)
WARNING = "warning" # Should fix (low coverage, slow execution)
INFO = "info" # Nice to have (optimization suggestions)
class FeedbackCategory(str, Enum):
"""Categories of feedback."""
COMPILATION = "compilation"
EXECUTION = "execution"
PERFORMANCE = "performance"
COVERAGE = "coverage"
STABILITY = "stability"
CODE_QUALITY = "code_quality"
class FeedbackIssue(BaseModel):
"""A single feedback issue with actionable suggestion."""
category: FeedbackCategory
severity: FeedbackSeverity
type: str = Field(description="Specific issue type (e.g., 'low_coverage', 'compilation_error')")
message: str = Field(description="Human-readable description of the issue")
suggestion: str = Field(description="Actionable suggestion for AI agent to fix the issue")
details: dict[str, Any] = Field(default_factory=dict, description="Additional technical details")
class CompilationResult(BaseModel):
"""Results from compilation attempt."""
success: bool
time_ms: int | None = None
errors: list[str] = Field(default_factory=list)
warnings: list[str] = Field(default_factory=list)
stderr: str | None = None
class ExecutionResult(BaseModel):
"""Results from execution test."""
success: bool
runs_completed: int | None = None
immediate_crash: bool = False
timeout: bool = False
crash_details: str | None = None
class CoverageMetrics(BaseModel):
"""Coverage metrics from fuzzing trial."""
initial_edges: int = 0
final_edges: int = 0
new_edges_found: int = 0
growth_rate: str = Field(
description="Qualitative assessment: 'excellent', 'good', 'poor', 'none'"
)
percentage_estimate: float | None = Field(
None, description="Estimated percentage of target code covered"
)
stagnation_time_sec: float | None = Field(
None, description="Time until coverage stopped growing"
)
class PerformanceMetrics(BaseModel):
"""Performance metrics from fuzzing trial."""
total_execs: int
execs_per_sec: float
average_exec_time_us: float | None = None
performance_rating: str = Field(
description="'excellent' (>1000/s), 'good' (100-1000/s), 'poor' (<100/s)"
)
class StabilityMetrics(BaseModel):
"""Stability metrics from fuzzing trial."""
status: str = Field(
description="'stable', 'unstable', 'crashes_frequently', 'hangs'"
)
crashes_found: int = 0
hangs_found: int = 0
unique_crashes: int = 0
crash_rate: float = Field(0.0, description="Crashes per 1000 executions")
class FuzzingTrial(BaseModel):
"""Results from short fuzzing trial."""
duration_seconds: int
coverage: CoverageMetrics
performance: PerformanceMetrics
stability: StabilityMetrics
trial_successful: bool
class QualityAssessment(BaseModel):
"""Overall quality assessment of the harness."""
score: int = Field(ge=0, le=100, description="Quality score 0-100")
verdict: str = Field(
description="'production-ready', 'needs-improvement', 'broken'"
)
issues: list[FeedbackIssue] = Field(default_factory=list)
strengths: list[str] = Field(default_factory=list)
recommended_actions: list[str] = Field(default_factory=list)
class HarnessEvaluation(BaseModel):
"""Complete evaluation of a single harness."""
name: str
path: str | None = None
compilation: CompilationResult
execution: ExecutionResult | None = None
fuzzing_trial: FuzzingTrial | None = None
quality: QualityAssessment
class EvaluationSummary(BaseModel):
"""Summary of all harness evaluations."""
total_harnesses: int
production_ready: int
needs_improvement: int
broken: int
average_score: float
recommended_action: str
class HarnessTestReport(BaseModel):
"""Complete harness testing report."""
harnesses: list[HarnessEvaluation]
summary: EvaluationSummary
test_configuration: dict[str, Any] = Field(default_factory=dict)