feat(add \Reinforcement Learning Optimization doc):

2026-06-27 07:39:56 +02:00 · 2025-02-07 01:02:12 +02:00
parent e0eed6fd92
commit eb27f7bbaa
4 changed files with 255 additions and 2 deletions
@@ -41,7 +41,7 @@ class PromptSelectionInterface(ABC):
 class RandomPromptSelector(PromptSelectionInterface):
    """Random prompt selector with cycle prevention using history."""

-    def __init__(self, prompts: list[str], history_size: int = 3):
+    def __init__(self, prompts: list[str], history_size: int = 300):
        if not prompts:
            raise ValueError("Prompts list cannot be empty")
        self.prompts = prompts
@@ -120,7 +120,8 @@ class CloudRLPromptSelector(PromptSelectionInterface):
        current_prompt: str,
        reward: float,
        passed_guard: bool,
-    ) -> None: ...
+    ) -> None:
+        ...


 class QLearningPromptSelector(PromptSelectionInterface):