mirror of
https://github.com/leigest519/ScreenCoder.git
synced 2026-02-12 17:52:47 +00:00
142 lines
6.0 KiB
Python
142 lines
6.0 KiB
Python
import argparse
|
|
import json
|
|
from pathlib import Path
|
|
from bs4 import BeautifulSoup
|
|
import cv2
|
|
import re
|
|
|
|
def main(args):
|
|
# --- Phase 1: Crop and Save All Images First ---
|
|
|
|
# 1. Load data
|
|
mapping_data = json.loads(args.mapping.read_text())
|
|
uied_data = json.loads(args.uied.read_text())
|
|
original_image = cv2.imread(str(args.original_image))
|
|
|
|
if original_image is None:
|
|
raise ValueError(f"Could not load the original image from {args.original_image}")
|
|
|
|
# Get image shapes to calculate a simple, global scaling factor
|
|
H_proc, W_proc, _ = uied_data['img_shape']
|
|
H_orig, W_orig, _ = original_image.shape
|
|
scale_x = W_orig / W_proc
|
|
scale_y = H_orig / H_proc
|
|
print(f"Using global scaling for cropping: scale_x={scale_x:.3f}, scale_y={scale_y:.3f}")
|
|
|
|
uied_boxes = {
|
|
comp['id']: (comp['column_min'], comp['row_min'], comp['width'], comp['height'])
|
|
for comp in uied_data['compos']
|
|
}
|
|
|
|
# 2. Create a directory for cropped images
|
|
crop_dir = args.output_html.parent / "cropped_images"
|
|
crop_dir.mkdir(exist_ok=True)
|
|
print(f"Saving cropped images to: {crop_dir.resolve()}")
|
|
|
|
# 3. Iterate through mappings and save cropped images to files
|
|
for region_id, region_data in mapping_data.items():
|
|
for placeholder_id, uied_id in region_data['mapping'].items():
|
|
if uied_id not in uied_boxes:
|
|
print(f"Warning: UIED ID {uied_id} from mapping not found. Skipping placeholder {placeholder_id}.")
|
|
continue
|
|
|
|
uied_bbox = uied_boxes[uied_id]
|
|
|
|
x_proc, y_proc, w_proc, h_proc = uied_bbox
|
|
x_tf = x_proc * scale_x
|
|
y_tf = y_proc * scale_y
|
|
w_tf = w_proc * scale_x
|
|
h_tf = h_proc * scale_y
|
|
|
|
x1, y1 = int(x_tf), int(y_tf)
|
|
x2, y2 = int(x_tf + w_tf), int(y_tf + h_tf)
|
|
|
|
h_img, w_img, _ = original_image.shape
|
|
x1, y1 = max(0, x1), max(0, y1)
|
|
x2, y2 = min(w_img, x2), min(h_img, y2)
|
|
|
|
cropped_img = original_image[y1:y2, x1:x2]
|
|
|
|
if cropped_img.size == 0:
|
|
print(f"Warning: Cropped image for {placeholder_id} is empty. Skipping.")
|
|
continue
|
|
|
|
output_path = crop_dir / f"{placeholder_id}.png"
|
|
cv2.imwrite(str(output_path), cropped_img)
|
|
|
|
# --- Phase 2: Use BeautifulSoup to Replace Placeholders by Order ---
|
|
|
|
print("\nStarting offline HTML processing with BeautifulSoup...")
|
|
html_content = args.gray_html.read_text()
|
|
soup = BeautifulSoup(html_content, 'html.parser')
|
|
|
|
# 1. Find all placeholder elements by their class, in document order.
|
|
placeholder_elements = soup.find_all(class_="bg-gray-400")
|
|
|
|
# 2. Get the placeholder IDs from the mapping file in the correct, sorted order.
|
|
def natural_sort_key(s):
|
|
return [int(text) if text.isdigit() else text.lower() for text in re.split('([0-9]+)', s)]
|
|
|
|
ordered_placeholder_ids = []
|
|
# Sort region IDs numerically to process them in order
|
|
for region_id in sorted(mapping_data.keys(), key=int):
|
|
region_mapping = mapping_data[region_id]['mapping']
|
|
# Sort the placeholder IDs within each region naturally (e.g., ph1, ph2, ph10)
|
|
sorted_ph_ids = sorted(region_mapping.keys(), key=natural_sort_key)
|
|
ordered_placeholder_ids.extend(sorted_ph_ids)
|
|
|
|
# 3. Check for count mismatches
|
|
if len(placeholder_elements) != len(ordered_placeholder_ids):
|
|
print(f"Warning: Mismatch in counts! Found {len(placeholder_elements)} gray boxes in HTML, but {len(ordered_placeholder_ids)} mappings.")
|
|
else:
|
|
print(f"Found {len(placeholder_elements)} gray boxes to replace.")
|
|
|
|
# 4. Iterate through both lists, create a proper <img> tag, and replace the placeholder.
|
|
for i, ph_element in enumerate(placeholder_elements):
|
|
if i >= len(ordered_placeholder_ids):
|
|
print(f"Warning: More gray boxes in HTML than mappings. Stopping at box {i+1}.")
|
|
break
|
|
|
|
ph_id = ordered_placeholder_ids[i]
|
|
relative_img_path = f"{crop_dir.name}/{ph_id}.png"
|
|
|
|
# --- Create a new <img> tag and replace the placeholder ---
|
|
|
|
# a. Get all classes from the original placeholder to preserve styling.
|
|
original_classes = ph_element.get('class', [])
|
|
if 'bg-gray-400' in original_classes:
|
|
original_classes.remove('bg-gray-400') # Remove the placeholder background
|
|
|
|
# b. Create the new <img> tag
|
|
img_tag = soup.new_tag("img", src=relative_img_path)
|
|
img_tag['class'] = original_classes
|
|
|
|
# c. Replace the placeholder with the new image tag.
|
|
ph_element.replace_with(img_tag)
|
|
|
|
# Save the modified HTML
|
|
args.output_html.write_text(str(soup))
|
|
|
|
print(f"\nSuccessfully replaced {min(len(placeholder_elements), len(ordered_placeholder_ids))} placeholders.")
|
|
print(f"Final HTML generated at {args.output_html.resolve()}")
|
|
|
|
|
|
if __name__ == "__main__":
|
|
parser = argparse.ArgumentParser(description="Replace placeholder divs in an HTML file with cropped images based on UIED mappings.")
|
|
parser.add_argument("--mapping", type=Path, required=False, help="Path to the mapping JSON file from mapping.py.")
|
|
parser.add_argument("--uied", type=Path, required=False, help="Path to the UIED JSON file.")
|
|
parser.add_argument("--original-image", type=Path, required=False, help="Path to the original screenshot image.")
|
|
parser.add_argument("--gray-html", type=Path, required=False, help="Path to the input HTML file with gray placeholders.")
|
|
parser.add_argument("--output-html", type=Path, required=False, help="Path to save the final, modified HTML file.")
|
|
|
|
parser.set_defaults(
|
|
mapping=Path('data/tmp/mapping_full_test1.json'),
|
|
uied=Path('data/tmp/ip/test1.json'),
|
|
original_image=Path('data/input/test1.png'),
|
|
gray_html=Path('data/output/test1_layout.html'),
|
|
output_html=Path('data/output/test1_layout_final.html')
|
|
)
|
|
|
|
args = parser.parse_args()
|
|
main(args)
|