Update autotemp.py

This commit is contained in:
pliny
2023-11-07 13:36:57 -08:00
committed by GitHub
parent 866571679d
commit 76209d94b1

View File

@@ -2,85 +2,109 @@ import openai
from dotenv import load_dotenv
import os
import re
from concurrent.futures import ThreadPoolExecutor, as_completed
import gradio as gr
# Load environment variables from .env file
load_dotenv()
class AutoTemp:
def __init__(self, default_temp=0.0, alt_temps=None, auto_select=True):
def __init__(self, default_temp=0.0, alt_temps=None, auto_select=True, max_workers=6, model_version="gpt-3.5-turbo"):
self.api_key = os.getenv('OPENAI_API_KEY')
if not self.api_key:
raise ValueError("OPENAI_API_KEY is not set in the environment variables.")
openai.api_key = self.api_key
self.default_temp = default_temp
self.alt_temps = alt_temps if alt_temps else [0.2, 0.4, 0.6, 0.8, 1.0, 1.2, 1.4]
self.alt_temps = alt_temps if alt_temps else [0.2, 0.4, 0.6, 0.8, 1.0, 1.2]
self.auto_select = auto_select
self.max_workers = max_workers
self.model_version = model_version
def ask_user_feedback(self, text):
print("Generated text:")
print(text)
feedback = input("Are you satisfied with this output? (yes/no): ")
return feedback.lower() == 'yes'
def generate_with_openai(self, prompt, temperature, retries=3):
while retries > 0:
try:
response = openai.ChatCompletion.create(
model=self.model_version,
messages=[
{"role": "system", "content": "You are a helpful assistant."},
{"role": "user", "content": prompt}
],
temperature=temperature
)
message = response['choices'][0]['message']['content']
return message.strip()
except Exception as e:
retries -= 1
if retries <= 0:
return f"Error generating text at temperature {temperature}: {e}"
def present_options_to_user(self, outputs):
print("Alternative outputs:")
for temp, output in outputs.items():
print(f"Temperature {temp}: {output}")
chosen_temp = float(input("Choose the temperature of the output you like: "))
return outputs.get(chosen_temp, "Invalid temperature chosen."), chosen_temp
def evaluate_output(self, output, temperature):
eval_prompt = f"""
Evaluate the following output which was generated at a temperature setting of {temperature}. With the utmost precision, grade its quality on a scale from 0 to 100, considering:
def generate_with_openai(self, prompt, temperature):
try:
response = openai.ChatCompletion.create(
model="gpt-3.5-turbo",
messages=[{"role": "system", "content": "You are a helpful assistant."},
{"role": "user", "content": prompt}],
temperature=temperature
)
message = response['choices'][0]['message']['content']
return message.strip()
except Exception as e:
print(f"Error generating text at temperature {temperature}: {e}")
return None
- Relevance to the prompt or task.
- Clarity and ease of understanding.
- Utility and usefulness for the intended purpose.
- Engagement and ability to maintain interest.
- Correctness and accuracy of the information provided.
Output to evaluate:
---
{output}
---
"""
score_text = self.generate_with_openai(eval_prompt, 0.222) # Use a neutral temperature for evaluation to get consistent results
score_match = re.search(r'\b\d+(\.\d+)?\b', score_text)
if score_match:
return float(score_match.group())
else:
return 0 # Unable to parse score, default to 0
def run(self, prompt):
initial_output_text = self.generate_with_openai(prompt, self.default_temp)
if self.ask_user_feedback(initial_output_text):
return initial_output_text, self.default_temp
outputs = {}
scores = {}
for temp in self.alt_temps:
output_text = self.generate_with_openai(prompt, temp)
if output_text:
outputs[temp] = output_text
eval_prompt = f"You are now an expert task evaluator. Rate the quality of the following output on a scale from 1 to 25. Be thorough, thoughtful, and precise and output only a number. The output is: {output_text}"
score_text = self.generate_with_openai(eval_prompt, 0.123)
score_match = re.search(r'\d+', score_text)
if score_match:
scores[temp] = int(score_match.group())
print(f"Score for temperature {temp}: {scores[temp]}")
else:
print(f"Unable to parse score for temperature {temp}. Received: {score_text}")
scores[temp] = 0
with ThreadPoolExecutor(max_workers=self.max_workers) as executor:
future_to_temp = {
executor.submit(self.generate_with_openai, prompt, temp): temp for temp in self.alt_temps
}
for future in as_completed(future_to_temp):
temp = future_to_temp[future]
output_text = future.result()
if output_text and not output_text.startswith("Error"):
outputs[temp] = output_text
scores[temp] = self.evaluate_output(output_text, temp)
if not scores: # No scores could be generated
if not scores:
return "No valid outputs generated.", None
if self.auto_select:
best_temp = max(scores, key=scores.get, default=self.default_temp)
chosen_output = outputs.get(best_temp, "No valid outputs generated.")
return chosen_output, best_temp
else:
chosen_output, chosen_temp = self.present_options_to_user(outputs)
return chosen_output, chosen_temp
# Sort the scores by value in descending order and return the sorted outputs
sorted_scores = sorted(scores.items(), key=lambda item: item[1], reverse=True)
sorted_outputs = [(temp, outputs[temp], score) for temp, score in sorted_scores]
# If auto_select is enabled, return only the best result
if self.auto_select:
best_temp, best_output, best_score = sorted_outputs[0]
return f"Best AutoTemp Output (Temp {best_temp} | Score: {best_score}):\n{best_output}"
else:
return "\n".join(f"Temp {temp} | Score: {score}:\n{text}" for temp, text, score in sorted_outputs)
# Gradio app logic
def run_autotemp(prompt, auto_select):
agent = AutoTemp(auto_select=auto_select)
output = agent.run(prompt)
return output
# Gradio interface setup
def main():
iface = gr.Interface(
fn=run_autotemp,
inputs=["text", "checkbox"],
outputs="text",
title="AutoTemp Quality Scoring",
description="This app generates responses at different temperatures, evaluates them individually, and ranks them based on their scores. Toggle 'Auto Select' to either get the best output or see all evaluated outputs.",
)
iface.launch()
# Set up the AutoTemp agent
if __name__ == "__main__":
agent = AutoTemp()
prompt = "Write a short story about a day in the life of AGI"
final_output, used_temp = agent.run(prompt)
if used_temp is not None:
print(f"Final selected output (Temperature {used_temp}):")
print(final_output)
else:
print("No valid output was generated.")
main()