diff --git a/autotemp.py b/autotemp.py index fe56ff8..a6ed9d9 100644 --- a/autotemp.py +++ b/autotemp.py @@ -21,7 +21,7 @@ class AutoTemp: self.max_workers = max_workers self.model_version = model_version - def generate_with_openai(self, prompt, temperature, retries=3): + def generate_with_openai(self, prompt, temperature, top_p, retries=3): while retries > 0: try: response = openai.ChatCompletion.create( @@ -30,14 +30,15 @@ class AutoTemp: {"role": "system", "content": "You are a helpful assistant."}, {"role": "user", "content": prompt} ], - temperature=temperature + temperature=temperature, + top_p=top_p ) message = response['choices'][0]['message']['content'] return message.strip() except Exception as e: retries -= 1 if retries <= 0: - return f"Error generating text at temperature {temperature}: {e}" + return f"Error generating text at temperature {temperature} and top-p {top_p}: {e}" def evaluate_output(self, output, temperature): eval_prompt = f""" @@ -56,21 +57,20 @@ class AutoTemp: {output} --- """ - score_text = self.generate_with_openai(eval_prompt, 0.5) # Use a neutral temperature for evaluation to get consistent results + score_text = self.generate_with_openai(eval_prompt, 0.5, top_p) # Use a neutral temperature for evaluation score_match = re.search(r'\b\d+(\.\d)?\b', score_text) if score_match: return round(float(score_match.group()), 1) # Round the score to one decimal place else: return 0.0 # Unable to parse score, default to 0.0 - def run(self, prompt, temperature_list=None): - if temperature_list is not None: - self.alt_temps = temperature_list + def run(self, prompt, temperature_string, top_p): + temperature_list = [float(temp.strip()) for temp in temperature_string.split(',')] outputs = {} scores = {} with ThreadPoolExecutor(max_workers=self.max_workers) as executor: future_to_temp = { - executor.submit(self.generate_with_openai, prompt, temp): temp for temp in self.alt_temps + executor.submit(self.generate_with_openai, prompt, temp, top_p): temp for temp in temperature_list } for future in as_completed(future_to_temp): temp = future_to_temp[future] @@ -89,33 +89,43 @@ class AutoTemp: # If auto_select is enabled, return only the best result if self.auto_select: best_temp, best_output, best_score = sorted_outputs[0] - return f"Best AutoTemp Output (Temp {best_temp} | Score: {best_score}):\n{best_output}" + return f"Best AutoTemp Output (Temp {best_temp} | Top-p {top_p} | Score: {best_score}):\n{best_output}" else: - return "\n".join(f"Temp {temp} | Score: {score}:\n{text}" for temp, text, score in sorted_outputs) + return "\n".join(f"Temp {temp} | Top-p {top_p} | Score: {score}:\n{text}" for temp, text, score in sorted_outputs) # Gradio app logic -def run_autotemp(prompt, temperature_string, auto_select): - temperature_list = [float(temp.strip()) for temp in temperature_string.split(',')] +def run_autotemp(prompt, temperature_string, top_p, auto_select): agent = AutoTemp(auto_select=auto_select) - output = agent.run(prompt, temperature_list=temperature_list) + output = agent.run(prompt, temperature_string, top_p=float(top_p)) return output # Gradio interface setup def main(): iface = gr.Interface( fn=run_autotemp, - inputs=["text", "text", "checkbox"], + inputs=[ + "text", + "text", + gr.Slider(minimum=0.0, maximum=1.0, step=0.1, default=1.0, label="Top-p value"), + "checkbox" + ], outputs="text", - title="AutoTemp: Improved LLM Completions through Temperature Tuning", - description="Enter different temperatures separated by commas (e.g., 0.4, 0.6, 0.8, 1.0, 1.2). Toggle 'Auto Select' to either see just the best output or all evaluated outputs.", + title="AutoTemp: Enhanced LLM Responses with Temperature and Top-p Tuning", + description="""AutoTemp generates responses at different temperatures, evaluates them, and ranks them based on quality. + Enter temperatures separated by commas for evaluation. + Adjust 'Top-p' to control output diversity: lower for precision, higher for creativity. + Toggle 'Auto Select' to either see the top-rated output or all evaluated outputs.""", + article="""**What's Top-p?** 'Top-p' controls the diversity of AI responses: a low 'top-p' makes output more focused and predictable, + while a high 'top-p' encourages variety and surprise. Pair with temperature to fine-tune AI creativity: + higher temperatures with high 'top-p' for bold ideas, or lower temperatures with low 'top-p' for precise answers.""", examples=[ - ["Write a short story about AGI learning to love", "0.5, 0.7, 0.9, 1.1", False], - ["Create a dialogue between a chef and an alien creating an innovative new recipe", "0.3, 0.6, 0.9, 1.2", True], - ["Explain quantum computing to a 5-year-old", "0.4, 0.8, 1.2, 1.5", False], - ["Draft an email to a hotel asking for a special arrangement for a marriage proposal", "0.4, 0.7, 1.0, 1.3", True], - ["Describe a futuristic city powered by renewable energy", "0.5, 0.75, 1.0, 1.25", False], - ["Generate a poem about the ocean's depths in the style of Edgar Allan Poe", "0.6, 0.8, 1.0, 1.2", True], - ["What are some innovative startup ideas for improving urban transportation?", "0.45, 0.65, 0.85, 1.05", False] + ["Write a short story about AGI learning to love", "0.5, 0.7, 0.9, 1.1", 1.0, False], + ["Create a dialogue between a chef and an alien creating an innovative new recipe", "0.3, 0.6, 0.9, 1.2", 0.9, True], + ["Explain quantum computing to a 5-year-old", "0.4, 0.8, 1.2, 1.5", 0.8, False], + ["Draft an email to a hotel asking for a special arrangement for a marriage proposal", "0.4, 0.7, 1.0, 1.3", 0.7, True], + ["Describe a futuristic city powered by renewable energy", "0.5, 0.75, 1.0, 1.25", 0.6, False], + ["Generate a poem about the ocean's depths in the style of Edgar Allan Poe", "0.6, 0.8, 1.0, 1.2", 0.5, True], + ["What are some innovative startup ideas for improving urban transportation?", "0.45, 0.65, 0.85, 1.05", 0.4, False] ] ) iface.launch()