Initial commit

2026-02-12 17:52:47 +00:00 · 2025-07-28 18:43:47 +08:00
parent 0a6e4ab682
commit 06408ffa6a
702 changed files with 153932 additions and 0 deletions
--- a/html_generator.py
+++ b/html_generator.py
@@ -0,0 +1,391 @@
+from utils import encode_image, Doubao, Qwen_2_5_VL
+from PIL import Image
+import bs4
+from threading import Thread
+import time
+
+# user instruction for each component
+user_instruction = {
+    "sidebar": "",
+    "header": "",
+    "navigation": "",
+    "main content": ""
+}
+
+# Prompt for each component
+PROMPT_DICT = {
+    "sidebar": f"""这是一个container的截图。这是用户给的额外要求：{user_instruction["sidebar"]}请填写一段完整的HTML和tail-wind CSS代码以准确再现给定的容器。请注意所有组块的排版、图标样式、大小、文字信息需要在用户额外条件的基础上与原始截图基本保持一致。以下是供填写的代码：
+
+    <div>
+    your code here
+    </div>
+
+    只需返回<div>和</div>标签内的代码""",
+
+    "header": f"""这是一个container的截图。这是用户给的额外要求：{user_instruction["header"]}请填写一段完整的HTML和tail-wind CSS代码以准确再现给定的容器。请注意所有组块在boundary box中的相对位置、排版、文字信息、颜色需要在用户额外条件的基础上与原始截图基本保持一致。以下是供填写的代码：
+
+    <div>
+    your code here
+    </div>
+
+    只需返回<div>和</div>标签内的代码""",
+
+    "navigation": f"""这是一个container的截图。这是用户给的额外要求：{user_instruction["navigation"]}请填写一段完整的HTML和tail-wind CSS代码以准确再现给定的容器。请注意所有组块的在boundary box中的相对位置、文字排版、颜色需要在用户额外条件的基础上与原始截图基本保持一致。请你直接使用原始截图中一致的图标。以下是供填写的代码：
+
+    <div>
+    your code here
+    </div>
+
+    只需返回<div>和</div>标签内的代码""",
+
+    "main content": f"""这是一个container的截图。这是用户给的额外要求：{user_instruction["main content"]}请填写一段完整的HTML和tail-wind CSS代码以准确再现给定的容器。请使用相同大小的纯灰色图像块替换原始截图中的图像，不需要识别图像中的文字信息。请注意所有组块在boundary box中的相对位置、排版、文字信息、颜色需要在用户额外条件的基础上与原始截图基本保持一致。以下是供填写的代码：
+
+    <div>
+    your code here
+    </div>
+
+    只需返回<div>和</div>标签内的代码"""
+}
+
+# PROMPT_refinement = """Here is a prototype image of a webpage. I have an draft HTML file that contains most of the elements and their correct positions, but it has *inaccurate background*, and some missing or wrong elements. Please compare the draft and the prototype image, then revise the draft implementation. Return a single piece of accurate HTML+tail-wind CSS code to reproduce the website. Use "placeholder.png" to replace the images. Respond with the content of the HTML+tail-wind CSS code. The current implementation I have is: \n\n [CODE]"""
+
+# PROMPT_sidebar = f"""这是一个container的截图。请填写一段完整的HTML和tail-wind CSS代码以准确再现给定的容器。请注意所有组块的排版、图标样式、大小、文字信息需要在用户额外条件的基础上与原始截图基本保持一致。以下是供填写的代码：
+
+#     <div>
+#     your code here
+#     </div>
+
+#     只需返回<div>和</div>标签内的代码"""
+
+# PROMPT_header = f"""这是一个container的截图。请填写一段完整的HTML和tail-wind CSS代码以准确再现给定的容器。请注意所有组块在boundary box中的相对位置、排版、文字信息、颜色需要在用户额外条件的基础上与原始截图基本保持一致。以下是供填写的代码：
+
+#     <div>
+#     your code here
+#     </div>
+
+#     只需返回<div>和</div>标签内的代码"""
+
+# PROMPT_navigation = f"""这是一个container的截图。请填写一段完整的HTML和tail-wind CSS代码以准确再现给定的容器。请注意所有组块的在boundary box中的相对位置、文字排版、颜色需要在用户额外条件的基础上与原始截图基本保持一致。请你直接使用原始截图中一致的图标。以下是供填写的代码：
+
+#     <div>
+#     your code here
+#     </div>
+
+#     只需返回<div>和</div>标签内的代码"""
+
+# PROMPT_main_content = f"""这是一个container的截图。请填写一段完整的HTML和tail-wind CSS代码以准确再现给定的容器。截图中显示的图像务必全部用与原始截图中对应图像同样大小的纯灰色图像块替换，不需要识别图像中的文字信息。请注意所有组块在boundary box中的相对位置、排版、文字信息、颜色需要在用户额外条件的基础上与原始截图基本保持一致。以下是供填写的代码：
+
+#     <div>
+#     your code here
+#     </div>
+
+#     只需返回<div>和</div>标签内的代码"""
+
+# Generate code for each component
+def generate_code(bbox_tree, img_path, bot):
+    """generate code for all the leaf nodes in the bounding box tree, return a dictionary: {'id': 'code'}"""
+    img = Image.open(img_path)
+    code_dict = {}
+    
+    def _generate_code(node):
+        if node["children"] == []:
+            bbox = node["bbox"]
+            # bbox is already in pixel coordinates [x1, y1, x2, y2]
+            cropped_img = img.crop(bbox)
+            
+            # Select prompt based on node type
+            if "type" in node:
+                if node["type"] == "sidebar":
+                    prompt = PROMPT_DICT["sidebar"]
+                elif node["type"] == "header":
+                    prompt = PROMPT_DICT["header"]
+                elif node["type"] == "navigation":
+                    prompt = PROMPT_DICT["navigation"]
+                elif node["type"] == "main content":
+                    prompt = PROMPT_DICT["main content"]
+                else:
+                    print(f"Unknown component type: {node['type']}")
+                    return
+            else:
+                print("Node type not found")
+                return
+                
+            try:
+                code = bot.ask(prompt, encode_image(cropped_img))
+                code_dict[node["id"]] = code
+            except Exception as e:
+                print(f"Error generating code for {node.get('type', 'unknown')}: {str(e)}")
+                code_dict[node["id"]] = f"<!-- Error: {str(e)} -->"
+        else:
+            for child in node["children"]:
+                _generate_code(child)
+
+    _generate_code(bbox_tree)
+    return code_dict
+
+# Generate code for each component in parallel
+def generate_code_parallel(bbox_tree, img_path, bot):
+    """generate code for all the leaf nodes in the bounding box tree, return a dictionary: {'id': 'code'}"""
+    code_dict = {}
+    t_list = []
+    
+    def _generate_code_with_retry(node, max_retries=3, retry_delay=2):
+        """Generate code with retry mechanism for rate limit errors"""
+        try:
+            # Create a new image instance for each thread
+            with Image.open(img_path) as img:
+                bbox = node["bbox"]
+                cropped_img = img.crop(bbox)
+
+                # Select prompt based on node type
+                if "type" in node:
+                    if node["type"] in PROMPT_DICT:
+                        prompt = PROMPT_DICT[node["type"]]
+                    else:
+                        print(f"Unknown component type: {node['type']}")
+                        code_dict[node["id"]] = f"<!-- Unknown component type: {node['type']} -->"
+                        return
+                else:
+                    print("Node type not found")
+                    code_dict[node["id"]] = f"<!-- Node type not found -->"
+                    return
+                
+                for attempt in range(max_retries):
+                    try:
+                        code = bot.ask(prompt, encode_image(cropped_img))
+                        code_dict[node["id"]] = code
+                        return
+                    except Exception as e:
+                        if "rate_limit" in str(e).lower() and attempt < max_retries - 1:
+                            print(f"Rate limit hit, retrying in {retry_delay} seconds... (Attempt {attempt + 1}/{max_retries})")
+                            time.sleep(retry_delay)
+                            retry_delay *= 2  # Exponential backoff
+                        else:
+                            print(f"Error generating code for node {node['id']}: {str(e)}")
+                            code_dict[node["id"]] = f"<!-- Error: {str(e)} -->"
+                            return
+        except Exception as e:
+            print(f"Error processing image for node {node['id']}: {str(e)}")
+            code_dict[node["id"]] = f"<!-- Error: {str(e)} -->"
+
+    def _generate_code(node):
+        if not node.get("children"):
+            t = Thread(target=_generate_code_with_retry, args=(node,))
+            t.start()
+            t_list.append(t)
+        else:
+            for child in node["children"]:
+                _generate_code(child)
+
+    _generate_code(bbox_tree)
+    
+    # Wait for all threads to complete
+    for t in t_list:
+        t.join()
+        
+    return code_dict
+
+# Generate HTML from the bounding box tree
+def generate_html(bbox_tree, output_file="output.html", img_path="data/test1.png"):
+    """
+    Generates an HTML file with nested containers based on the bounding box tree.
+
+    :param bbox_tree: Dictionary representing the bounding box tree.
+    :param output_file: The name of the output HTML file.
+    """
+    # HTML and CSS templates
+    # the container class is used to create grid and position the boxes
+    # include the tailwind css in the head tag
+    html_template_start = """
+    <!DOCTYPE html>
+    <html lang="en">
+    <head>
+        <meta charset="UTF-8">
+        <title>Bounding Boxes Layout</title>
+        <style>
+            body, html {
+                margin: 0;
+                padding: 0;
+                width: 100%;
+                height: 100%;
+            }
+            .container { 
+                position: relative;
+                width: 100%;
+                height: 100%;
+                box-sizing: border-box;
+            }
+            .box {
+                position: absolute;
+                box-sizing: border-box;
+                overflow: hidden;
+            }
+            .box > .container {
+                display: grid;
+                width: 100%;
+                height: 100%;
+            }
+        </style>
+        <link href="https://cdn.jsdelivr.net/npm/tailwindcss@2.2.19/dist/tailwind.min.css" rel="stylesheet">
+    </head>
+    <body>
+        <div class="container">
+    """
+
+    html_template_end = """
+        </div>
+    </body>
+    </html>
+    """
+
+    # Function to recursively generate HTML
+    def process_bbox(node, parent_width, parent_height, parent_left, parent_top, img):
+        bbox = node['bbox']
+        children = node.get('children', [])
+        id = node['id']
+
+        # Calculate relative positions and sizes
+        left = (bbox[0] - parent_left) / parent_width * 100
+        top = (bbox[1] - parent_top) / parent_height * 100
+        width = (bbox[2] - bbox[0]) / parent_width * 100
+        height = (bbox[3] - bbox[1]) / parent_height * 100
+
+        # Start the box div
+        html = f'''
+            <div id="{id}" class="box" style="left: {left}%; top: {top}%; width: {width}%; height: {height}%;">
+        '''
+
+        if children:
+            # If there are children, add a nested container
+            html += '''
+                <div class="container">
+            '''
+            # Get the current box's width and height in pixels for child calculations
+            current_width = bbox[2] - bbox[0]
+            current_height = bbox[3] - bbox[1]
+            for child in children:
+                html += process_bbox(child, current_width, current_height, bbox[0], bbox[1], img)
+            html += '''
+                </div>
+            '''
+        
+        # Close the box div
+        html += '''
+            </div>
+        '''
+        return html
+
+    root_bbox = bbox_tree['bbox']
+    root_children = bbox_tree.get('children', [])
+    root_width = root_bbox[2]
+    root_height = root_bbox[3]
+    root_x = root_bbox[0]
+    root_y = root_bbox[1]
+
+    html_content = html_template_start
+    for child in root_children:
+        html_content += process_bbox(child, root_width, root_height, root_x, root_y, img)
+    html_content += html_template_end
+
+    soup = bs4.BeautifulSoup(html_content, 'html.parser')
+    html_content = soup.prettify()
+
+    with open(output_file, 'w') as f:
+        f.write(html_content)
+
+# Substitute the code in the html file
+def code_substitution(html_file, code_dict):
+    """substitute the code in the html file"""
+    with open(html_file, "r") as f:
+        html = f.read()
+    soup = bs4.BeautifulSoup(html, 'html.parser')
+    for id, code in code_dict.items():
+        code = code.replace("```html", "").replace("```", "")
+        div = soup.find(id=id)
+        # replace the inner html of the div
+        if div:
+            div.append(bs4.BeautifulSoup(code, 'html.parser'))
+    with open(html_file, "w") as f:
+        f.write(soup.prettify())
+
+# def html_refinement(html_file, output_file, img_path, bot):
+#     """refine the html file"""
+#     try:
+#         with open(html_file, "r") as f:
+#             html_content = f.read()
+
+#         img = Image.open(img_path)
+
+#         prompt = PROMPT_refinement.replace("[CODE]", html_content)
+
+#         refined_html = bot.ask(prompt, encode_image(img))
+#         refined_html = refined_html.replace("```html", "").replace("```", "").strip()
+
+#         with open(output_file, "w") as f:
+#             f.write(refined_html)
+#     except Exception as e:
+#         print(f"An error occurred during HTML refinement: {e}")
+
+# Main
+if __name__ == "__main__":
+    import json
+    import time
+    from PIL import Image
+    
+    # Load bboxes from block_parsing.py output
+    boxes_data = json.load(open("data/tmp/test2_bboxes.json"))
+
+
+    img_path = "data/input/test2.png"
+    with Image.open(img_path) as img:
+        width, height = img.size
+    
+    # Create root node with actual image dimensions
+    root = {
+        "bbox": [0, 0, width, height],  # Use actual image dimensions
+        "children": []
+    }
+    
+    # Add each region as a child with its type
+    for component_name, norm_bbox in boxes_data.items():
+        # The coordinates from block_parsor are normalized to 1000x1000
+        # Convert normalized coordinates to pixel coordinates
+        x1 = int(norm_bbox[0] * width / 1000)
+        y1 = int(norm_bbox[1] * height / 1000)
+        x2 = int(norm_bbox[2] * width / 1000)
+        y2 = int(norm_bbox[3] * height / 1000)
+        
+        child = {
+            "bbox": [x1, y1, x2, y2],
+            "children": [],
+            "type": component_name
+        }
+        root["children"].append(child)
+    
+    # Assign IDs to all nodes
+    def assign_id(node, id):
+        node["id"] = id
+        for child in node.get("children", []):
+            id = assign_id(child, id+1)
+        return id
+    
+    assign_id(root, 0)
+
+    # print(root)
+    # Generate initial HTML layout
+    generate_html(root, 'data/tmp/test2_layout.html')
+
+    # Initialize the bot
+    bot = Doubao("doubao_api.txt", model = "doubao-1.5-thinking-vision-pro-250428")
+    # bot = Qwen_2_5_VL("qwen_api.txt", model="qwen2.5-vl-72b-instruct")
+
+    # Generate code for each component
+    # code_dict = generate_code(root, img_path, bot)
+
+    code_dict = generate_code_parallel(root, img_path, bot)
+    
+    # Substitute the generated code into the HTML
+    code_substitution('data/tmp/test2_layout.html', code_dict)
+
+    # Refine the html file
+    # html_refinement('data/tmp/test1_layout.html', 'data/tmp/test1_layout_refined.html', img_path, bot)