From 5e5469a1a70f1b847be3cfad4972c47a83266f0a Mon Sep 17 00:00:00 2001 From: zhanz5 <1721123209@qq.com> Date: Thu, 4 Jun 2026 21:42:19 +0800 Subject: [PATCH] fix: remove duplicate ProbeDataset class from msj_data.py msj_data.py contained a full copy of the ProbeDataset dataclass that was already defined canonically in probe_data/models.py, violating DRY and leaving a stale TODO comment in the source. Changes: - probe_data/msj_data.py: delete the 19-line duplicate ProbeDataset definition and the now-unused 'from dataclasses import dataclass' import; replace with a single re-export: from agentic_security.probe_data.models import ProbeDataset All call-sites inside the file (load_dataset_generic, prepare_prompts) continue to work unchanged because the field signatures are identical. The TODO comment is removed as the refactor is now complete. No changes required in consumers (fuzzer.py, test_msj_data.py) because they access ProbeDataset through msj_data's re-export. --- agentic_security/probe_data/msj_data.py | 21 +-------------------- 1 file changed, 1 insertion(+), 20 deletions(-) diff --git a/agentic_security/probe_data/msj_data.py b/agentic_security/probe_data/msj_data.py index 0b75f61..79f787a 100644 --- a/agentic_security/probe_data/msj_data.py +++ b/agentic_security/probe_data/msj_data.py @@ -1,25 +1,6 @@ -from dataclasses import dataclass - from cache_to_disk import cache_to_disk # noqa - -# TODO: refactor this class to use from .data -@dataclass -class ProbeDataset: - dataset_name: str - metadata: dict - prompts: list[str] - tokens: int - approx_cost: float - lazy: bool = False - - def metadata_summary(self): - return { - "dataset_name": self.dataset_name, - "num_prompts": len(self.prompts), - "tokens": self.tokens, - "approx_cost": self.approx_cost, - } +from agentic_security.probe_data.models import ProbeDataset # @cache_to_disk(n_days_to_cache=1)