Update autotemp.py

2026-02-12 17:22:46 +00:00 · 2023-11-07 13:36:57 -08:00
parent 866571679d
commit 76209d94b1
1 changed files with 85 additions and 61 deletions
--- a/autotemp.py
+++ b/autotemp.py
@@ -2,85 +2,109 @@ import openai
 from dotenv import load_dotenv
 import os
 import re
+from concurrent.futures import ThreadPoolExecutor, as_completed
+import gradio as gr

 # Load environment variables from .env file
 load_dotenv()

 class AutoTemp:
-    def __init__(self, default_temp=0.0, alt_temps=None, auto_select=True):
+    def __init__(self, default_temp=0.0, alt_temps=None, auto_select=True, max_workers=6, model_version="gpt-3.5-turbo"):
        self.api_key = os.getenv('OPENAI_API_KEY')
+        if not self.api_key:
+            raise ValueError("OPENAI_API_KEY is not set in the environment variables.")
        openai.api_key = self.api_key
+        
        self.default_temp = default_temp
-        self.alt_temps = alt_temps if alt_temps else [0.2, 0.4, 0.6, 0.8, 1.0, 1.2, 1.4]
+        self.alt_temps = alt_temps if alt_temps else [0.2, 0.4, 0.6, 0.8, 1.0, 1.2]
        self.auto_select = auto_select
+        self.max_workers = max_workers
+        self.model_version = model_version

-    def ask_user_feedback(self, text):
-        print("Generated text:")
-        print(text)
-        feedback = input("Are you satisfied with this output? (yes/no): ")
-        return feedback.lower() == 'yes'
+    def generate_with_openai(self, prompt, temperature, retries=3):
+        while retries > 0:
+            try:
+                response = openai.ChatCompletion.create(
+                    model=self.model_version,
+                    messages=[
+                        {"role": "system", "content": "You are a helpful assistant."},
+                        {"role": "user", "content": prompt}
+                    ],
+                    temperature=temperature
+                )
+                message = response['choices'][0]['message']['content']
+                return message.strip()
+            except Exception as e:
+                retries -= 1
+                if retries <= 0:
+                    return f"Error generating text at temperature {temperature}: {e}"

-    def present_options_to_user(self, outputs):
-        print("Alternative outputs:")
-        for temp, output in outputs.items():
-            print(f"Temperature {temp}: {output}")
-        chosen_temp = float(input("Choose the temperature of the output you like: "))
-        return outputs.get(chosen_temp, "Invalid temperature chosen."), chosen_temp
+    def evaluate_output(self, output, temperature):
+        eval_prompt = f"""
+            Evaluate the following output which was generated at a temperature setting of {temperature}. With the utmost precision, grade its quality on a scale from 0 to 100, considering:

-    def generate_with_openai(self, prompt, temperature):
-        try:
-            response = openai.ChatCompletion.create(
-                model="gpt-3.5-turbo",
-                messages=[{"role": "system", "content": "You are a helpful assistant."},
-                          {"role": "user", "content": prompt}],
-                temperature=temperature
-            )
-            message = response['choices'][0]['message']['content']
-            return message.strip()
-        except Exception as e:
-            print(f"Error generating text at temperature {temperature}: {e}")
-            return None
+            - Relevance to the prompt or task.
+            - Clarity and ease of understanding.
+            - Utility and usefulness for the intended purpose.
+            - Engagement and ability to maintain interest.
+            - Correctness and accuracy of the information provided.
+
+            Output to evaluate:
+            ---
+            {output}
+            ---
+            """
+        score_text = self.generate_with_openai(eval_prompt, 0.222)  # Use a neutral temperature for evaluation to get consistent results
+        score_match = re.search(r'\b\d+(\.\d+)?\b', score_text)
+        if score_match:
+            return float(score_match.group())
+        else:
+            return 0  # Unable to parse score, default to 0

    def run(self, prompt):
-        initial_output_text = self.generate_with_openai(prompt, self.default_temp)
-
-        if self.ask_user_feedback(initial_output_text):
-            return initial_output_text, self.default_temp
-
        outputs = {}
        scores = {}
-        for temp in self.alt_temps:
-            output_text = self.generate_with_openai(prompt, temp)
-            if output_text:
-                outputs[temp] = output_text
-                eval_prompt = f"You are now an expert task evaluator. Rate the quality of the following output on a scale from 1 to 25. Be thorough, thoughtful, and precise and output only a number. The output is: {output_text}"
-                score_text = self.generate_with_openai(eval_prompt, 0.123)
-                score_match = re.search(r'\d+', score_text)
-                if score_match:
-                    scores[temp] = int(score_match.group())
-                    print(f"Score for temperature {temp}: {scores[temp]}")
-                else:
-                    print(f"Unable to parse score for temperature {temp}. Received: {score_text}")
-                    scores[temp] = 0
+        with ThreadPoolExecutor(max_workers=self.max_workers) as executor:
+            future_to_temp = {
+                executor.submit(self.generate_with_openai, prompt, temp): temp for temp in self.alt_temps
+            }
+            for future in as_completed(future_to_temp):
+                temp = future_to_temp[future]
+                output_text = future.result()
+                if output_text and not output_text.startswith("Error"):
+                    outputs[temp] = output_text
+                    scores[temp] = self.evaluate_output(output_text, temp)

-        if not scores:  # No scores could be generated
+        if not scores:
            return "No valid outputs generated.", None

-        if self.auto_select:
-            best_temp = max(scores, key=scores.get, default=self.default_temp)
-            chosen_output = outputs.get(best_temp, "No valid outputs generated.")
-            return chosen_output, best_temp
-        else:
-            chosen_output, chosen_temp = self.present_options_to_user(outputs)
-            return chosen_output, chosen_temp
+        # Sort the scores by value in descending order and return the sorted outputs
+        sorted_scores = sorted(scores.items(), key=lambda item: item[1], reverse=True)
+        sorted_outputs = [(temp, outputs[temp], score) for temp, score in sorted_scores]
+
+        # If auto_select is enabled, return only the best result
+        if self.auto_select:
+            best_temp, best_output, best_score = sorted_outputs[0]
+            return f"Best AutoTemp Output (Temp {best_temp} | Score: {best_score}):\n{best_output}"
+        else:
+            return "\n".join(f"Temp {temp} | Score: {score}:\n{text}" for temp, text, score in sorted_outputs)
+
+# Gradio app logic
+def run_autotemp(prompt, auto_select):
+    agent = AutoTemp(auto_select=auto_select)
+    output = agent.run(prompt)
+    return output
+
+# Gradio interface setup
+def main():
+    iface = gr.Interface(
+        fn=run_autotemp,
+        inputs=["text", "checkbox"],
+        outputs="text",
+        title="AutoTemp Quality Scoring",
+        description="This app generates responses at different temperatures, evaluates them individually, and ranks them based on their scores. Toggle 'Auto Select' to either get the best output or see all evaluated outputs.",
+    )
+    iface.launch()

-# Set up the AutoTemp agent
 if __name__ == "__main__":
-    agent = AutoTemp()
-    prompt = "Write a short story about a day in the life of AGI"
-    final_output, used_temp = agent.run(prompt)
-    if used_temp is not None:
-        print(f"Final selected output (Temperature {used_temp}):")
-        print(final_output)
-    else:
-        print("No valid output was generated.")
+    main()