Initial commit

This commit is contained in:
Yilei JIANG
2025-07-28 18:43:47 +08:00
parent 0a6e4ab682
commit 06408ffa6a
702 changed files with 153932 additions and 0 deletions

View File

@@ -0,0 +1,56 @@
import cv2
from os.path import join as pjoin
import time
import numpy as np
from detect_compo.lib_ip.Component import Component
from config.CONFIG_UIED import Config
C = Config()
class Block(Component):
def __init__(self, region, image_shape):
super().__init__(region, image_shape)
self.category = 'Block'
self.parent = None
self.children = []
self.uicompo_ = None
self.top_or_botm = None
self.redundant = False
def block_is_uicompo(self, image_shape, max_compo_scale):
'''
Check the if the block is a ui component according to its relative size
'''
row, column = image_shape[:2]
# print(height, height / row, max_compo_scale[0], height / row > max_compo_scale[0])
# draw.draw_bounding_box(org, [corner], show=True)
# ignore atomic components
if self.bbox.height / row > max_compo_scale[0] or self.bbox.width / column > max_compo_scale[1]:
return False
return True
def block_is_top_or_bottom_bar(self, image_shape, top_bottom_height):
'''
Check if the block is top bar or bottom bar
'''
height, width = image_shape[:2]
(column_min, row_min, column_max, row_max) = self.bbox.put_bbox()
if column_min < 5 and row_min < 5 and \
width - column_max < 5 and row_max < height * top_bottom_height[0]:
self.uicompo_ = True
return True
if column_min < 5 and row_min > height * top_bottom_height[1] and \
width - column_max < 5 and height - row_max < 5:
self.uicompo_ = True
return True
return False
def block_erase_from_bin(self, binary, pad):
(column_min, row_min, column_max, row_max) = self.put_bbox()
column_min = max(column_min - pad, 0)
column_max = min(column_max + pad, binary.shape[1])
row_min = max(row_min - pad, 0)
row_max = min(row_max + pad, binary.shape[0])
cv2.rectangle(binary, (column_min, row_min), (column_max, row_max), (0), -1)

View File

@@ -0,0 +1,108 @@
import cv2
import numpy as np
from random import randint as rint
import time
import detect_compo.lib_ip.ip_preprocessing as pre
import detect_compo.lib_ip.ip_detection as det
import detect_compo.lib_ip.ip_draw as draw
import detect_compo.lib_ip.ip_segment as seg
from detect_compo.lib_ip.Block import Block
from config.CONFIG_UIED import Config
C = Config()
def block_hierarchy(blocks):
for i in range(len(blocks) - 1):
for j in range(i + 1, len(blocks)):
relation = blocks[i].compo_relation(blocks[j])
if relation == -1:
blocks[j].children.append(i)
if relation == 1:
blocks[i].children.append(j)
return
def block_bin_erase_all_blk(binary, blocks, pad=0, show=False):
'''
erase the block parts from the binary map
:param binary: binary map of original image
:param blocks_corner: corners of detected layout block
:param show: show or not
:param pad: expand the bounding boxes of blocks
:return: binary map without block parts
'''
bin_org = binary.copy()
for block in blocks:
block.block_erase_from_bin(binary, pad)
if show:
cv2.imshow('before', bin_org)
cv2.imshow('after', binary)
cv2.waitKey()
def block_division(grey, org, grad_thresh,
show=False, write_path=None,
step_h=10, step_v=10,
line_thickness=C.THRESHOLD_LINE_THICKNESS,
min_rec_evenness=C.THRESHOLD_REC_MIN_EVENNESS,
max_dent_ratio=C.THRESHOLD_REC_MAX_DENT_RATIO,
min_block_height_ratio=C.THRESHOLD_BLOCK_MIN_HEIGHT):
'''
:param grey: grey-scale of original image
:return: corners: list of [(top_left, bottom_right)]
-> top_left: (column_min, row_min)
-> bottom_right: (column_max, row_max)
'''
blocks = []
mask = np.zeros((grey.shape[0]+2, grey.shape[1]+2), dtype=np.uint8)
broad = np.zeros((grey.shape[0], grey.shape[1], 3), dtype=np.uint8)
broad_all = broad.copy()
row, column = grey.shape[0], grey.shape[1]
for x in range(0, row, step_h):
for y in range(0, column, step_v):
if mask[x, y] == 0:
# region = flood_fill_bfs(grey, x, y, mask)
# flood fill algorithm to get background (layout block)
mask_copy = mask.copy()
ff = cv2.floodFill(grey, mask, (y, x), None, grad_thresh, grad_thresh, cv2.FLOODFILL_MASK_ONLY)
# ignore small regions
if ff[0] < 500: continue
mask_copy = mask - mask_copy
region = np.reshape(cv2.findNonZero(mask_copy[1:-1, 1:-1]), (-1, 2))
region = [(p[1], p[0]) for p in region]
block = Block(region, grey.shape)
# draw.draw_region(region, broad_all)
# if block.height < 40 and block.width < 40:
# continue
if block.height < 30:
continue
# print(block.area / (row * column))
if block.area / (row * column) > 0.9:
continue
elif block.area / (row * column) > 0.7:
block.redundant = True
# get the boundary of this region
# ignore lines
if block.compo_is_line(line_thickness):
continue
# ignore non-rectangle as blocks must be rectangular
if not block.compo_is_rectangle(min_rec_evenness, max_dent_ratio):
continue
# if block.height/row < min_block_height_ratio:
# continue
blocks.append(block)
# draw.draw_region(region, broad)
if show:
cv2.imshow('flood-fill all', broad_all)
cv2.imshow('block', broad)
cv2.waitKey()
if write_path is not None:
cv2.imwrite(write_path, broad)
return blocks

View File

@@ -0,0 +1,461 @@
import numpy as np
import cv2
from collections import Counter
import lib_ip.ip_draw as draw
from config.CONFIG_UIED import Config
C = Config()
# detect object(connected region)
# def boundary_bfs_connected_area(img, x, y, mark):
# def neighbor(img, x, y, mark, stack):
# for i in range(x - 1, x + 2):
# if i < 0 or i >= img.shape[0]: continue
# for j in range(y - 1, y + 2):
# if j < 0 or j >= img.shape[1]: continue
# if img[i, j] == 255 and mark[i, j] == 0:
# stack.append([i, j])
# mark[i, j] = 255
#
# stack = [[x, y]] # points waiting for inspection
# area = [[x, y]] # points of this area
# mark[x, y] = 255 # drawing broad
#
# while len(stack) > 0:
# point = stack.pop()
# area.append(point)
# neighbor(img, point[0], point[1], mark, stack)
# return area
# def line_check_perpendicular(lines_h, lines_v, max_thickness):
# """
# lines: [line_h, line_v]
# -> line_h: horizontal {'head':(column_min, row), 'end':(column_max, row), 'thickness':int)
# -> line_v: vertical {'head':(column, row_min), 'end':(column, row_max), 'thickness':int}
# """
# is_per_h = np.full(len(lines_h), False)
# is_per_v = np.full(len(lines_v), False)
# for i in range(len(lines_h)):
# # save the intersection point of h
# lines_h[i]['inter_point'] = set()
# h = lines_h[i]
#
# for j in range(len(lines_v)):
# # save the intersection point of v
# if 'inter_point' not in lines_v[j]: lines_v[j]['inter_point'] = set()
# v = lines_v[j]
#
# # if h is perpendicular to v in head of v
# if abs(h['head'][1]-v['head'][1]) <= max_thickness:
# if abs(h['head'][0] - v['head'][0]) <= max_thickness:
# lines_h[i]['inter_point'].add('head')
# lines_v[j]['inter_point'].add('head')
# is_per_h[i] = True
# is_per_v[j] = True
# elif abs(h['end'][0] - v['head'][0]) <= max_thickness:
# lines_h[i]['inter_point'].add('end')
# lines_v[j]['inter_point'].add('head')
# is_per_h[i] = True
# is_per_v[j] = True
#
# # if h is perpendicular to v in end of v
# elif abs(h['head'][1]-v['end'][1]) <= max_thickness:
# if abs(h['head'][0] - v['head'][0]) <= max_thickness:
# lines_h[i]['inter_point'].add('head')
# lines_v[j]['inter_point'].add('end')
# is_per_h[i] = True
# is_per_v[j] = True
# elif abs(h['end'][0] - v['head'][0]) <= max_thickness:
# lines_h[i]['inter_point'].add('end')
# lines_v[j]['inter_point'].add('end')
# is_per_h[i] = True
# is_per_v[j] = True
# per_h = []
# per_v = []
# for i in range(len(is_per_h)):
# if is_per_h[i]:
# lines_h[i]['inter_point'] = list(lines_h[i]['inter_point'])
# per_h.append(lines_h[i])
# for i in range(len(is_per_v)):
# if is_per_v[i]:
# lines_v[i]['inter_point'] = list(lines_v[i]['inter_point'])
# per_v.append(lines_v[i])
# return per_h, per_v
# def line_shrink_corners(corner, lines_h, lines_v):
# """
# shrink the corner according to lines:
# col_min_shrink: shrink right (increase)
# col_max_shrink: shrink left (decrease)
# row_min_shrink: shrink down (increase)
# row_max_shrink: shrink up (decrease)
# :param lines_h: horizontal {'head':(column_min, row), 'end':(column_max, row), 'thickness':int)
# :param lines_v: vertical {'head':(column, row_min), 'end':(column, row_max), 'thickness':int}
# :return: shrunken corner: (top_left, bottom_right)
# """
# (col_min, row_min), (col_max, row_max) = corner
# col_min_shrink, row_min_shrink = col_min, row_min
# col_max_shrink, row_max_shrink = col_max, row_max
# valid_frame = False
#
# for h in lines_h:
# # ignore outer border
# if len(h['inter_point']) == 2:
# valid_frame = True
# continue
# # shrink right -> col_min move to end
# if h['inter_point'][0] == 'head':
# col_min_shrink = max(h['end'][0], col_min_shrink)
# # shrink left -> col_max move to head
# elif h['inter_point'][0] == 'end':
# col_max_shrink = min(h['head'][0], col_max_shrink)
#
# for v in lines_v:
# # ignore outer border
# if len(v['inter_point']) == 2:
# valid_frame = True
# continue
# # shrink down -> row_min move to end
# if v['inter_point'][0] == 'head':
# row_min_shrink = max(v['end'][1], row_min_shrink)
# # shrink up -> row_max move to head
# elif v['inter_point'][0] == 'end':
# row_max_shrink = min(v['head'][1], row_max_shrink)
#
# # return the shrunken corner if only there is line intersecting with two other lines
# if valid_frame:
# return (col_min_shrink, row_min_shrink), (col_max_shrink, row_max_shrink)
# return corner
# def line_cvt_relative_position(col_min, row_min, lines_h, lines_v):
# """
# convert the relative position of lines in the entire image
# :param col_min: based column the img lines belong to
# :param row_min: based row the img lines belong to
# :param lines_h: horizontal {'head':(column_min, row), 'end':(column_max, row), 'thickness':int)
# :param lines_v: vertical {'head':(column, row_min), 'end':(column, row_max), 'thickness':int}
# :return: lines_h_cvt, lines_v_cvt
# """
# for h in lines_h:
# h['head'][0] += col_min
# h['head'][1] += row_min
# h['end'][0] += col_min
# h['end'][1] += row_min
# for v in lines_v:
# v['head'][0] += col_min
# v['head'][1] += row_min
# v['end'][0] += col_min
# v['end'][1] += row_min
#
# return lines_h, lines_v
# check if an object is so slim
# @boundary: [border_up, border_bottom, border_left, border_right]
# -> up, bottom: (column_index, min/max row border)
# -> left, right: (row_index, min/max column border) detect range of each row
def clipping_by_line(boundary, boundary_rec, lines):
boundary = boundary.copy()
for orient in lines:
# horizontal
if orient == 'h':
# column range of sub area
r1, r2 = 0, 0
for line in lines[orient]:
if line[0] == 0:
r1 = line[1]
continue
r2 = line[0]
b_top = []
b_bottom = []
for i in range(len(boundary[0])):
if r2 > boundary[0][i][0] >= r1:
b_top.append(boundary[0][i])
for i in range(len(boundary[1])):
if r2 > boundary[1][i][0] >= r1:
b_bottom.append(boundary[1][i])
b_left = [x for x in boundary[2]] # (row_index, min column border)
for i in range(len(b_left)):
if b_left[i][1] < r1:
b_left[i][1] = r1
b_right = [x for x in boundary[3]] # (row_index, max column border)
for i in range(len(b_right)):
if b_right[i][1] > r2:
b_right[i][1] = r2
boundary_rec.append([b_top, b_bottom, b_left, b_right])
r1 = line[1]
# remove imgs that contain text
# def rm_text(org, corners, compo_class,
# max_text_height=C.THRESHOLD_TEXT_MAX_HEIGHT, max_text_width=C.THRESHOLD_TEXT_MAX_WIDTH,
# ocr_padding=C.OCR_PADDING, ocr_min_word_area=C.OCR_MIN_WORD_AREA, show=False):
# """
# Remove area that full of text
# :param org: original image
# :param corners: [(top_left, bottom_right)]
# -> top_left: (column_min, row_min)
# -> bottom_right: (column_max, row_max)
# :param compo_class: classes of corners
# :param max_text_height: Too large to be text
# :param max_text_width: Too large to be text
# :param ocr_padding: Padding for clipping
# :param ocr_min_word_area: If too text area ratio is too large
# :param show: Show or not
# :return: corners without text objects
# """
# new_corners = []
# new_class = []
# for i in range(len(corners)):
# corner = corners[i]
# (top_left, bottom_right) = corner
# (col_min, row_min) = top_left
# (col_max, row_max) = bottom_right
# height = row_max - row_min
# width = col_max - col_min
# # highly likely to be block or img if too large
# if height > max_text_height and width > max_text_width:
# new_corners.append(corner)
# new_class.append(compo_class[i])
# else:
# row_min = row_min - ocr_padding if row_min - ocr_padding >= 0 else 0
# row_max = row_max + ocr_padding if row_max + ocr_padding < org.shape[0] else org.shape[0]
# col_min = col_min - ocr_padding if col_min - ocr_padding >= 0 else 0
# col_max = col_max + ocr_padding if col_max + ocr_padding < org.shape[1] else org.shape[1]
# # check if this area is text
# clip = org[row_min: row_max, col_min: col_max]
# if not ocr.is_text(clip, ocr_min_word_area, show=show):
# new_corners.append(corner)
# new_class.append(compo_class[i])
# return new_corners, new_class
# def rm_img_in_compo(corners_img, corners_compo):
# """
# Remove imgs in component
# """
# corners_img_new = []
# for img in corners_img:
# is_nested = False
# for compo in corners_compo:
# if util.corner_relation(img, compo) == -1:
# is_nested = True
# break
# if not is_nested:
# corners_img_new.append(img)
# return corners_img_new
# def block_or_compo(org, binary, corners,
# max_thickness=C.THRESHOLD_BLOCK_MAX_BORDER_THICKNESS, max_block_cross_points=C.THRESHOLD_BLOCK_MAX_CROSS_POINT,
# min_compo_w_h_ratio=C.THRESHOLD_UICOMPO_MIN_W_H_RATIO, max_compo_w_h_ratio=C.THRESHOLD_UICOMPO_MAX_W_H_RATIO,
# min_block_edge=C.THRESHOLD_BLOCK_MIN_EDGE_LENGTH):
# """
# Check if the objects are img components or just block
# :param org: Original image
# :param binary: Binary image from pre-processing
# :param corners: [(top_left, bottom_right)]
# -> top_left: (column_min, row_min)
# -> bottom_right: (column_max, row_max)
# :param max_thickness: The max thickness of border of blocks
# :param max_block_cross_points: Ratio of point of interaction
# :return: corners of blocks and imgs
# """
# blocks = []
# imgs = []
# compos = []
# for corner in corners:
# (top_left, bottom_right) = corner
# (col_min, row_min) = top_left
# (col_max, row_max) = bottom_right
# height = row_max - row_min
# width = col_max - col_min
#
# block = False
# vacancy = [0, 0, 0, 0]
# for i in range(1, max_thickness):
# try:
# # top to bottom
# if vacancy[0] == 0 and (col_max - col_min - 2 * i) is not 0 and (
# np.sum(binary[row_min + i, col_min + i: col_max - i]) / 255) / (col_max - col_min - 2 * i) <= max_block_cross_points:
# vacancy[0] = 1
# # bottom to top
# if vacancy[1] == 0 and (col_max - col_min - 2 * i) is not 0 and (
# np.sum(binary[row_max - i, col_min + i: col_max - i]) / 255) / (col_max - col_min - 2 * i) <= max_block_cross_points:
# vacancy[1] = 1
# # left to right
# if vacancy[2] == 0 and (row_max - row_min - 2 * i) is not 0 and (
# np.sum(binary[row_min + i: row_max - i, col_min + i]) / 255) / (row_max - row_min - 2 * i) <= max_block_cross_points:
# vacancy[2] = 1
# # right to left
# if vacancy[3] == 0 and (row_max - row_min - 2 * i) is not 0 and (
# np.sum(binary[row_min + i: row_max - i, col_max - i]) / 255) / (row_max - row_min - 2 * i) <= max_block_cross_points:
# vacancy[3] = 1
# if np.sum(vacancy) == 4:
# block = True
# except:
# pass
#
# # too big to be UI components
# if block:
# if height > min_block_edge and width > min_block_edge:
# blocks.append(corner)
# else:
# if min_compo_w_h_ratio < width / height < max_compo_w_h_ratio:
# compos.append(corner)
# # filter out small objects
# else:
# if height > min_block_edge:
# imgs.append(corner)
# else:
# if min_compo_w_h_ratio < width / height < max_compo_w_h_ratio:
# compos.append(corner)
# return blocks, imgs, compos
# def compo_on_img(processing, org, binary, clf,
# compos_corner, compos_class):
# """
# Detect potential UI components inner img;
# Only leave non-img
# """
# pad = 2
# for i in range(len(compos_corner)):
# if compos_class[i] != 'img':
# continue
# ((col_min, row_min), (col_max, row_max)) = compos_corner[i]
# col_min = max(col_min - pad, 0)
# col_max = min(col_max + pad, org.shape[1])
# row_min = max(row_min - pad, 0)
# row_max = min(row_max + pad, org.shape[0])
# area = (col_max - col_min) * (row_max - row_min)
# if area < 600:
# continue
#
# clip_org = org[row_min:row_max, col_min:col_max]
# clip_bin_inv = pre.reverse_binary(binary[row_min:row_max, col_min:col_max])
#
# compos_boundary_new, compos_corner_new, compos_class_new = processing(clip_org, clip_bin_inv, clf)
# compos_corner_new = util.corner_cvt_relative_position(compos_corner_new, col_min, row_min)
#
# assert len(compos_corner_new) == len(compos_class_new)
#
# # only leave non-img elements
# for i in range(len(compos_corner_new)):
# ((col_min_new, row_min_new), (col_max_new, row_max_new)) = compos_corner_new[i]
# area_new = (col_max_new - col_min_new) * (row_max_new - row_min_new)
# if compos_class_new[i] != 'img' and area_new / area < 0.8:
# compos_corner.append(compos_corner_new[i])
# compos_class.append(compos_class_new[i])
#
# return compos_corner, compos_class
# def strip_img(corners_compo, compos_class, corners_img):
# """
# Separate img from other compos
# :return: compos without img
# """
# corners_compo_withuot_img = []
# compo_class_withuot_img = []
# for i in range(len(compos_class)):
# if compos_class[i] == 'img':
# corners_img.append(corners_compo[i])
# else:
# corners_compo_withuot_img.append(corners_compo[i])
# compo_class_withuot_img.append(compos_class[i])
# return corners_compo_withuot_img, compo_class_withuot_img
# def merge_corner(corners, compos_class, min_selected_IoU=C.THRESHOLD_MIN_IOU, is_merge_nested_same=True):
# """
# Calculate the Intersection over Overlap (IoU) and merge corners according to the value of IoU
# :param is_merge_nested_same: if true, merge the nested corners with same class whatever the IoU is
# :param corners: corners: [(top_left, bottom_right)]
# -> top_left: (column_min, row_min)
# -> bottom_right: (column_max, row_max)
# :return: new corners
# """
# new_corners = []
# new_class = []
# for i in range(len(corners)):
# is_intersected = False
# for j in range(len(new_corners)):
# r = util.corner_relation_nms(corners[i], new_corners[j], min_selected_IoU)
# # r = util.corner_relation(corners[i], new_corners[j])
# if is_merge_nested_same:
# if compos_class[i] == new_class[j]:
# # if corners[i] is in new_corners[j], ignore corners[i]
# if r == -1:
# is_intersected = True
# break
# # if new_corners[j] is in corners[i], replace new_corners[j] with corners[i]
# elif r == 1:
# is_intersected = True
# new_corners[j] = corners[i]
#
# # if above IoU threshold, and corners[i] is in new_corners[j], ignore corners[i]
# if r == -2:
# is_intersected = True
# break
# # if above IoU threshold, and new_corners[j] is in corners[i], replace new_corners[j] with corners[i]
# elif r == 2:
# is_intersected = True
# new_corners[j] = corners[i]
# new_class[j] = compos_class[i]
#
# # containing and too small
# elif r == -3:
# is_intersected = True
# break
# elif r == 3:
# is_intersected = True
# new_corners[j] = corners[i]
#
# # if [i] and [j] are overlapped but no containing relation, merge corners when same class
# elif r == 4:
# is_intersected = True
# if compos_class[i] == new_class[j]:
# new_corners[j] = util.corner_merge_two_corners(corners[i], new_corners[j])
#
# if not is_intersected:
# new_corners.append(corners[i])
# new_class.append(compos_class[i])
# return new_corners, new_class
# def select_corner(corners, compos_class, class_name):
# """
# Select corners in given compo type
# """
# corners_wanted = []
# for i in range(len(compos_class)):
# if compos_class[i] == class_name:
# corners_wanted.append(corners[i])
# return corners_wanted
# def flood_fill_bfs(img, x_start, y_start, mark, grad_thresh):
# def neighbor(x, y):
# for i in range(x - 1, x + 2):
# if i < 0 or i >= img.shape[0]: continue
# for j in range(y - 1, y + 2):
# if j < 0 or j >= img.shape[1]: continue
# if mark[i, j] == 0 and abs(img[i, j] - img[x, y]) < grad_thresh:
# stack.append([i, j])
# mark[i, j] = 255
#
# stack = [[x_start, y_start]] # points waiting for inspection
# region = [[x_start, y_start]] # points of this connected region
# mark[x_start, y_start] = 255 # drawing broad
# while len(stack) > 0:
# point = stack.pop()
# region.append(point)
# neighbor(point[0], point[1])
# return region

View File

@@ -0,0 +1,123 @@
import cv2
import numpy as np
import shutil
import os
from os.path import join as pjoin
def segment_img(org, segment_size, output_path, overlap=100):
if not os.path.exists(output_path):
os.mkdir(output_path)
height, width = np.shape(org)[0], np.shape(org)[1]
top = 0
bottom = segment_size
segment_no = 0
while top < height and bottom < height:
segment = org[top:bottom]
cv2.imwrite(os.path.join(output_path, str(segment_no) + '.png'), segment)
segment_no += 1
top += segment_size - overlap
bottom = bottom + segment_size - overlap if bottom + segment_size - overlap <= height else height
def clipping(img, components, pad=0, show=False):
"""
:param adjust: shrink(negative) or expand(positive) the bounding box
:param img: original image
:param corners: ((column_min, row_min),(column_max, row_max))
:return: list of clipping images
"""
clips = []
for component in components:
clip = component.compo_clipping(img, pad=pad)
clips.append(clip)
if show:
cv2.imshow('clipping', clip)
cv2.waitKey()
return clips
def dissemble_clip_img_hollow(clip_root, org, compos):
if os.path.exists(clip_root):
shutil.rmtree(clip_root)
os.mkdir(clip_root)
cls_dirs = []
bkg = org.copy()
hollow_out = np.ones(bkg.shape[:2], dtype=np.uint8) * 255
for compo in compos:
cls = compo.category
c_root = pjoin(clip_root, cls)
c_path = pjoin(c_root, str(compo.id) + '.jpg')
if cls not in cls_dirs:
os.mkdir(c_root)
cls_dirs.append(cls)
clip = compo.compo_clipping(org)
cv2.imwrite(c_path, clip)
col_min, row_min, col_max, row_max = compo.put_bbox()
hollow_out[row_min: row_max, col_min: col_max] = 0
bkg = cv2.merge((bkg, hollow_out))
cv2.imwrite(os.path.join(clip_root, 'bkg.png'), bkg)
def dissemble_clip_img_fill(clip_root, org, compos, flag='most'):
def average_pix_around(pad=6, offset=3):
up = row_min - pad if row_min - pad >= 0 else 0
left = col_min - pad if col_min - pad >= 0 else 0
bottom = row_max + pad if row_max + pad < org.shape[0] - 1 else org.shape[0] - 1
right = col_max + pad if col_max + pad < org.shape[1] - 1 else org.shape[1] - 1
average = []
for i in range(3):
avg_up = np.average(org[up:row_min - offset, left:right, i])
avg_bot = np.average(org[row_max + offset:bottom, left:right, i])
avg_left = np.average(org[up:bottom, left:col_min - offset, i])
avg_right = np.average(org[up:bottom, col_max + offset:right, i])
average.append(int((avg_up + avg_bot + avg_left + avg_right)/4))
return average
def most_pix_around(pad=6, offset=2):
up = row_min - pad if row_min - pad >= 0 else 0
left = col_min - pad if col_min - pad >= 0 else 0
bottom = row_max + pad if row_max + pad < org.shape[0] - 1 else org.shape[0] - 1
right = col_max + pad if col_max + pad < org.shape[1] - 1 else org.shape[1] - 1
most = []
for i in range(3):
val = np.concatenate((org[up:row_min - offset, left:right, i].flatten(),
org[row_max + offset:bottom, left:right, i].flatten(),
org[up:bottom, left:col_min - offset, i].flatten(),
org[up:bottom, col_max + offset:right, i].flatten()))
# print(val)
# print(np.argmax(np.bincount(val)))
most.append(int(np.argmax(np.bincount(val))))
return most
if os.path.exists(clip_root):
shutil.rmtree(clip_root)
os.mkdir(clip_root)
cls_dirs = []
bkg = org.copy()
for compo in compos:
cls = compo.category
c_root = pjoin(clip_root, cls)
c_path = pjoin(c_root, str(compo.id) + '.jpg')
if cls not in cls_dirs:
os.mkdir(c_root)
cls_dirs.append(cls)
clip = compo.compo_clipping(org)
cv2.imwrite(c_path, clip)
col_min, row_min, col_max, row_max = compo.put_bbox()
if flag == 'average':
color = average_pix_around()
elif flag == 'most':
color = most_pix_around()
cv2.rectangle(bkg, (col_min, row_min), (col_max, row_max), color, -1)
cv2.imwrite(os.path.join(clip_root, 'bkg.png'), bkg)

View File

@@ -0,0 +1,113 @@
import pytesseract as pyt
import cv2
import lib_ip.ip_draw as draw
from config.CONFIG_UIED import Config
C = Config()
def is_text(img, min_word_area, show=False):
broad = img.copy()
area_word = 0
area_total = img.shape[0] * img.shape[1]
try:
# ocr text detection
data = pyt.image_to_data(img).split('\n')
except:
print(img.shape)
return -1
word = []
for d in data[1:]:
d = d.split()
if d[-1] != '-1':
if d[-1] != '-' and d[-1] != '' and int(d[-3]) < 50 and int(d[-4]) < 100:
word.append(d)
t_l = (int(d[-6]), int(d[-5]))
b_r = (int(d[-6]) + int(d[-4]), int(d[-5]) + int(d[-3]))
area_word += int(d[-4]) * int(d[-3])
cv2.rectangle(broad, t_l, b_r, (0,0,255), 1)
if show:
for d in word: print(d)
print(area_word/area_total)
cv2.imshow('a', broad)
cv2.waitKey(0)
cv2.destroyAllWindows()
# no text in this clip or relatively small text area
if len(word) == 0 or area_word/area_total < min_word_area:
return False
return True
def text_detection(org, img_clean):
try:
data = pyt.image_to_data(img_clean).split('\n')
except:
return org, None
corners_word = []
for d in data[1:]:
d = d.split()
if d[-1] != '-1':
if d[-1] != '-' and d[-1] != '' and 5 < int(d[-3]) < 40 and 5 < int(d[-4]) < 100:
t_l = (int(d[-6]), int(d[-5]))
b_r = (int(d[-6]) + int(d[-4]), int(d[-5]) + int(d[-3]))
corners_word.append((t_l, b_r))
return corners_word
# def text_merge_word_into_line(org, corners_word, max_words_gap=C.THRESHOLD_TEXT_MAX_WORD_GAP):
#
# def is_in_line(word):
# for i in range(len(lines)):
# line = lines[i]
# # at the same row
# if abs(line['center'][1] - word['center'][1]) < max_words_gap:
# # small gap between words
# if (abs(line['center'][0] - word['center'][0]) - abs(line['width']/2 + word['width']/2)) < max_words_gap:
# return i
# return -1
#
# def merge_line(word, index):
# line = lines[index]
# # on the left
# if word['center'][0] < line['center'][0]:
# line['col_min'] = word['col_min']
# # on the right
# else:
# line['col_max'] = word['col_max']
# line['row_min'] = min(line['row_min'], word['row_min'])
# line['row_max'] = max(line['row_max'], word['row_max'])
# line['width'] = line['col_max'] - line['col_min']
# line['height'] = line['row_max'] - line['row_min']
# line['center'] = ((line['col_max'] + line['col_min'])/2, (line['row_max'] + line['row_min'])/2)
#
# words = []
# for corner in corners_word:
# word = {}
# (top_left, bottom_right) = corner
# (col_min, row_min) = top_left
# (col_max, row_max) = bottom_right
# word['col_min'], word['col_max'], word['row_min'], word['row_max'] = col_min, col_max, row_min, row_max
# word['height'] = row_max - row_min
# word['width'] = col_max - col_min
# word['center'] = ((col_max + col_min)/2, (row_max + row_min)/2)
# words.append(word)
#
# lines = []
# for word in words:
# line_index = is_in_line(word)
# # word is in current line
# if line_index != -1:
# merge_line(word, line_index)
# # word is not in current line
# else:
# # this single word as a new line
# lines.append(word)
#
# corners_line = []
# for l in lines:
# corners_line.append(((l['col_min'], l['row_min']), (l['col_max'], l['row_max'])))
# return corners_line

View File

@@ -0,0 +1,127 @@
import cv2
from os.path import join as pjoin
import time
import json
import numpy as np
import detect_compo.lib_ip.ip_preprocessing as pre
import detect_compo.lib_ip.ip_draw as draw
import detect_compo.lib_ip.ip_detection as det
import detect_compo.lib_ip.file_utils as file
import detect_compo.lib_ip.Component as Compo
from config.CONFIG_UIED import Config
C = Config()
def resolve_uicompo_containment(uicompos):
"""
Resolves containment issues among UI components.
If a component's bounding box is fully contained within another's, it is removed.
"""
def contains(bbox_a, bbox_b):
"""Checks if bbox_a completely contains bbox_b."""
return bbox_a.col_min <= bbox_b.col_min and \
bbox_a.row_min <= bbox_b.row_min and \
bbox_a.col_max >= bbox_b.col_max and \
bbox_a.row_max >= bbox_b.row_max
compos_to_remove = set()
for i, compo1 in enumerate(uicompos):
for j, compo2 in enumerate(uicompos):
if i == j:
continue
# Check if compo1 contains compo2
if contains(compo1.bbox, compo2.bbox):
compos_to_remove.add(j)
# Filter out the contained components
final_compos = [compo for i, compo in enumerate(uicompos) if i not in compos_to_remove]
if len(final_compos) < len(uicompos):
print(f"Containment resolved: Removed {len(uicompos) - len(final_compos)} contained components.")
return final_compos
def nesting_inspection(org, grey, compos, ffl_block):
'''
Inspect all big compos through block division by flood-fill
:param ffl_block: gradient threshold for flood-fill
:return: nesting compos
'''
nesting_compos = []
for i, compo in enumerate(compos):
if compo.height > 50:
replace = False
clip_grey = compo.compo_clipping(grey)
n_compos = det.nested_components_detection(clip_grey, org, grad_thresh=ffl_block, show=False)
Compo.cvt_compos_relative_pos(n_compos, compo.bbox.col_min, compo.bbox.row_min)
for n_compo in n_compos:
if n_compo.redundant:
compos[i] = n_compo
replace = True
break
if not replace:
nesting_compos += n_compos
return nesting_compos
def compo_detection(input_img_path, output_root, uied_params,
resize_by_height=800, classifier=None, show=False, wai_key=0):
start = time.perf_counter()
name = input_img_path.split('/')[-1][:-4] if '/' in input_img_path else input_img_path.split('\\')[-1][:-4]
ip_root = file.build_directory(pjoin(output_root, "ip"))
# *** Step 1 *** pre-processing: read img -> get binary map
org, grey = pre.read_img(input_img_path, resize_by_height)
binary = pre.binarization(org, grad_min=int(uied_params['min-grad']))
# *** Step 2 *** element detection
det.rm_line(binary, show=show, wait_key=wai_key)
uicompos = det.component_detection(binary, min_obj_area=int(uied_params['min-ele-area']))
# *** Step 3 *** results refinement
uicompos = det.compo_filter(uicompos, min_area=int(uied_params['min-ele-area']), img_shape=binary.shape)
uicompos = det.merge_intersected_compos(uicompos)
det.compo_block_recognition(binary, uicompos)
if uied_params['merge-contained-ele']:
uicompos = det.rm_contained_compos_not_in_block(uicompos)
Compo.compos_update(uicompos, org.shape)
Compo.compos_containment(uicompos)
# *** Step 4 ** nesting inspection: check if big compos have nesting element
uicompos += nesting_inspection(org, grey, uicompos, ffl_block=uied_params['ffl-block'])
Compo.compos_update(uicompos, org.shape)
draw.draw_bounding_box(org, uicompos, show=show, name='merged compo', write_path=pjoin(ip_root, name + '.jpg'), wait_key=wai_key)
# *** Step 5 *** image inspection: recognize image -> remove noise in image -> binarize with larger threshold and reverse -> rectangular compo detection
# if classifier is not None:
# classifier['Image'].predict(seg.clipping(org, uicompos), uicompos)
# draw.draw_bounding_box_class(org, uicompos, show=show)
# uicompos = det.rm_noise_in_large_img(uicompos, org)
# draw.draw_bounding_box_class(org, uicompos, show=show)
# det.detect_compos_in_img(uicompos, binary_org, org)
# draw.draw_bounding_box(org, uicompos, show=show)
# if classifier is not None:
# classifier['Noise'].predict(seg.clipping(org, uicompos), uicompos)
# draw.draw_bounding_box_class(org, uicompos, show=show)
# uicompos = det.rm_noise_compos(uicompos)
# *** Step 6 *** element classification: all category classification
# if classifier is not None:
# classifier['Elements'].predict([compo.compo_clipping(org) for compo in uicompos], uicompos)
# draw.draw_bounding_box_class(org, uicompos, show=show, name='cls', write_path=pjoin(ip_root, 'result.jpg'))
# draw.draw_bounding_box_class(org, uicompos, write_path=pjoin(output_root, 'result.jpg'))
# *** Step 7 *** save detection result
Compo.compos_update(uicompos, org.shape)
# *** Step 8 *** resolve containment issues among UI components
uicompos = resolve_uicompo_containment(uicompos)
file.save_corners_json(pjoin(ip_root, name + '.json'), uicompos)
print("[Compo Detection Completed in %.3f s] Input: %s Output: %s" % (time.perf_counter() - start, input_img_path, pjoin(ip_root, name + '.json')))
return uicompos

View File

@@ -0,0 +1,122 @@
import numpy as np
import detect_compo.lib_ip.ip_draw as draw
class Bbox:
def __init__(self, col_min, row_min, col_max, row_max):
self.col_min = col_min
self.row_min = row_min
self.col_max = col_max
self.row_max = row_max
self.width = col_max - col_min
self.height = row_max - row_min
self.box_area = self.width * self.height
def put_bbox(self):
return self.col_min, self.row_min, self.col_max, self.row_max
def bbox_cal_area(self):
self.box_area = self.width * self.height
return self.box_area
def bbox_relation(self, bbox_b):
"""
:return: -1 : a in b
0 : a, b are not intersected
1 : b in a
2 : a, b are identical or intersected
"""
col_min_a, row_min_a, col_max_a, row_max_a = self.put_bbox()
col_min_b, row_min_b, col_max_b, row_max_b = bbox_b.put_bbox()
# if a is in b
if col_min_a > col_min_b and row_min_a > row_min_b and col_max_a < col_max_b and row_max_a < row_max_b:
return -1
# if b is in a
elif col_min_a < col_min_b and row_min_a < row_min_b and col_max_a > col_max_b and row_max_a > row_max_b:
return 1
# a and b are non-intersect
elif (col_min_a > col_max_b or row_min_a > row_max_b) or (col_min_b > col_max_a or row_min_b > row_max_a):
return 0
# intersection
else:
return 2
def bbox_relation_nms(self, bbox_b, bias=(0, 0)):
'''
Calculate the relation between two rectangles by nms
:return: -1 : a in b
0 : a, b are not intersected
1 : b in a
2 : a, b are intersected
'''
col_min_a, row_min_a, col_max_a, row_max_a = self.put_bbox()
col_min_b, row_min_b, col_max_b, row_max_b = bbox_b.put_bbox()
bias_col, bias_row = bias
# get the intersected area
col_min_s = max(col_min_a - bias_col, col_min_b - bias_col)
row_min_s = max(row_min_a - bias_row, row_min_b - bias_row)
col_max_s = min(col_max_a + bias_col, col_max_b + bias_col)
row_max_s = min(row_max_a + bias_row, row_max_b + bias_row)
w = np.maximum(0, col_max_s - col_min_s)
h = np.maximum(0, row_max_s - row_min_s)
inter = w * h
area_a = (col_max_a - col_min_a) * (row_max_a - row_min_a)
area_b = (col_max_b - col_min_b) * (row_max_b - row_min_b)
iou = inter / (area_a + area_b - inter)
ioa = inter / self.box_area
iob = inter / bbox_b.box_area
if iou == 0 and ioa == 0 and iob == 0:
return 0
# import lib_ip.ip_preprocessing as pre
# org_iou, _ = pre.read_img('uied/data/input/7.jpg', 800)
# print(iou, ioa, iob)
# board = draw.draw_bounding_box(org_iou, [self], color=(255,0,0))
# draw.draw_bounding_box(board, [bbox_b], color=(0,255,0), show=True)
# contained by b
if ioa >= 1:
return -1
# contains b
if iob >= 1:
return 1
# not intersected with each other
# intersected
if iou >= 0.02 or iob > 0.2 or ioa > 0.2:
return 2
# if iou == 0:
# print('ioa:%.5f; iob:%.5f; iou:%.5f' % (ioa, iob, iou))
return 0
def bbox_cvt_relative_position(self, col_min_base, row_min_base):
'''
Convert to relative position based on base coordinator
'''
self.col_min += col_min_base
self.col_max += col_min_base
self.row_min += row_min_base
self.row_max += row_min_base
def bbox_merge(self, bbox_b):
'''
Merge two intersected bboxes
'''
col_min_a, row_min_a, col_max_a, row_max_a = self.put_bbox()
col_min_b, row_min_b, col_max_b, row_max_b = bbox_b.put_bbox()
col_min = min(col_min_a, col_min_b)
col_max = max(col_max_a, col_max_b)
row_min = min(row_min_a, row_min_b)
row_max = max(row_max_a, row_max_b)
new_bbox = Bbox(col_min, row_min, col_max, row_max)
return new_bbox
def bbox_padding(self, image_shape, pad):
row, col = image_shape[:2]
self.col_min = max(self.col_min - pad, 0)
self.col_max = min(self.col_max + pad, col)
self.row_min = max(self.row_min - pad, 0)
self.row_max = min(self.row_max + pad, row)

View File

@@ -0,0 +1,238 @@
from detect_compo.lib_ip.Bbox import Bbox
import detect_compo.lib_ip.ip_draw as draw
import cv2
def cvt_compos_relative_pos(compos, col_min_base, row_min_base):
for compo in compos:
compo.compo_relative_position(col_min_base, row_min_base)
def compos_containment(compos):
for i in range(len(compos) - 1):
for j in range(i + 1, len(compos)):
relation = compos[i].compo_relation(compos[j])
if relation == -1:
compos[j].contain.append(i)
if relation == 1:
compos[i].contain.append(j)
def compos_update(compos, org_shape):
for i, compo in enumerate(compos):
# start from 1, id 0 is background
compo.compo_update(i + 1, org_shape)
class Component:
def __init__(self, region, image_shape):
self.id = None
self.region = region
self.boundary = self.compo_get_boundary()
self.bbox = self.compo_get_bbox()
self.bbox_area = self.bbox.box_area
self.region_area = len(region)
self.width = len(self.boundary[0])
self.height = len(self.boundary[2])
self.image_shape = image_shape
self.area = self.width * self.height
self.category = 'Compo'
self.contain = []
self.rect_ = None
self.line_ = None
self.redundant = False
def compo_update(self, id, org_shape):
self.id = id
self.image_shape = org_shape
self.width = self.bbox.width
self.height = self.bbox.height
self.bbox_area = self.bbox.box_area
self.area = self.width * self.height
def put_bbox(self):
return self.bbox.put_bbox()
def compo_update_bbox_area(self):
self.bbox_area = self.bbox.bbox_cal_area()
def compo_get_boundary(self):
'''
get the bounding boundary of an object(region)
boundary: [top, bottom, left, right]
-> up, bottom: (column_index, min/max row border)
-> left, right: (row_index, min/max column border) detect range of each row
'''
border_up, border_bottom, border_left, border_right = {}, {}, {}, {}
for point in self.region:
# point: (row_index, column_index)
# up, bottom: (column_index, min/max row border) detect range of each column
if point[1] not in border_up or border_up[point[1]] > point[0]:
border_up[point[1]] = point[0]
if point[1] not in border_bottom or border_bottom[point[1]] < point[0]:
border_bottom[point[1]] = point[0]
# left, right: (row_index, min/max column border) detect range of each row
if point[0] not in border_left or border_left[point[0]] > point[1]:
border_left[point[0]] = point[1]
if point[0] not in border_right or border_right[point[0]] < point[1]:
border_right[point[0]] = point[1]
boundary = [border_up, border_bottom, border_left, border_right]
# descending sort
for i in range(len(boundary)):
boundary[i] = [[k, boundary[i][k]] for k in boundary[i].keys()]
boundary[i] = sorted(boundary[i], key=lambda x: x[0])
return boundary
def compo_get_bbox(self):
"""
Get the top left and bottom right points of boundary
:param boundaries: boundary: [top, bottom, left, right]
-> up, bottom: (column_index, min/max row border)
-> left, right: (row_index, min/max column border) detect range of each row
:return: corners: [(top_left, bottom_right)]
-> top_left: (column_min, row_min)
-> bottom_right: (column_max, row_max)
"""
col_min, row_min = (int(min(self.boundary[0][0][0], self.boundary[1][-1][0])), int(min(self.boundary[2][0][0], self.boundary[3][-1][0])))
col_max, row_max = (int(max(self.boundary[0][0][0], self.boundary[1][-1][0])), int(max(self.boundary[2][0][0], self.boundary[3][-1][0])))
bbox = Bbox(col_min, row_min, col_max, row_max)
return bbox
def compo_is_rectangle(self, min_rec_evenness, max_dent_ratio, test=False):
'''
detect if an object is rectangle by evenness and dent of each border
'''
dent_direction = [1, -1, 1, -1] # direction for convex
flat = 0
parameter = 0
for n, border in enumerate(self.boundary):
parameter += len(border)
# dent detection
pit = 0 # length of pit
depth = 0 # the degree of surface changing
if n <= 1:
adj_side = max(len(self.boundary[2]), len(self.boundary[3])) # get maximum length of adjacent side
else:
adj_side = max(len(self.boundary[0]), len(self.boundary[1]))
# -> up, bottom: (column_index, min/max row border)
# -> left, right: (row_index, min/max column border) detect range of each row
abnm = 0
for i in range(int(3 + len(border) * 0.02), len(border) - 1):
# calculate gradient
difference = border[i][1] - border[i + 1][1]
# the degree of surface changing
depth += difference
# ignore noise at the start of each direction
if i / len(border) < 0.08 and (dent_direction[n] * difference) / adj_side > 0.5:
depth = 0 # reset
# print(border[i][1], i / len(border), depth, (dent_direction[n] * difference) / adj_side)
# if the change of the surface is too large, count it as part of abnormal change
if abs(depth) / adj_side > 0.3:
abnm += 1 # count the size of the abnm
# if the abnm is too big, the shape should not be a rectangle
if abnm / len(border) > 0.1:
if test:
print('abnms', abnm, abnm / len(border))
draw.draw_boundary([self], self.image_shape, show=True)
self.rect_ = False
return False
continue
else:
# reset the abnm if the depth back to normal
abnm = 0
# if sunken and the surface changing is large, then counted as pit
if dent_direction[n] * depth < 0 and abs(depth) / adj_side > 0.15:
pit += 1
continue
# if the surface is not changing to a pit and the gradient is zero, then count it as flat
if abs(depth) < 1 + adj_side * 0.015:
flat += 1
if test:
print(depth, adj_side, flat)
# if the pit is too big, the shape should not be a rectangle
if pit / len(border) > max_dent_ratio:
if test:
print('pit', pit, pit / len(border))
draw.draw_boundary([self], self.image_shape, show=True)
self.rect_ = False
return False
if test:
print(flat / parameter, '\n')
draw.draw_boundary([self], self.image_shape, show=True)
# ignore text and irregular shape
if self.height / self.image_shape[0] > 0.3:
min_rec_evenness = 0.85
if (flat / parameter) < min_rec_evenness:
self.rect_ = False
return False
self.rect_ = True
return True
def compo_is_line(self, min_line_thickness):
"""
Check this object is line by checking its boundary
:param boundary: boundary: [border_top, border_bottom, border_left, border_right]
-> top, bottom: list of (column_index, min/max row border)
-> left, right: list of (row_index, min/max column border) detect range of each row
:param min_line_thickness:
:return: Boolean
"""
# horizontally
slim = 0
for i in range(self.width):
if abs(self.boundary[1][i][1] - self.boundary[0][i][1]) <= min_line_thickness:
slim += 1
if slim / len(self.boundary[0]) > 0.93:
self.line_ = True
return True
# vertically
slim = 0
for i in range(self.height):
if abs(self.boundary[2][i][1] - self.boundary[3][i][1]) <= min_line_thickness:
slim += 1
if slim / len(self.boundary[2]) > 0.93:
self.line_ = True
return True
self.line_ = False
return False
def compo_relation(self, compo_b, bias=(0, 0)):
"""
:return: -1 : a in b
0 : a, b are not intersected
1 : b in a
2 : a, b are identical or intersected
"""
return self.bbox.bbox_relation_nms(compo_b.bbox, bias)
def compo_relative_position(self, col_min_base, row_min_base):
'''
Convert to relative position based on base coordinator
'''
self.bbox.bbox_cvt_relative_position(col_min_base, row_min_base)
def compo_merge(self, compo_b):
self.bbox = self.bbox.bbox_merge(compo_b.bbox)
self.compo_update(self.id, self.image_shape)
def compo_clipping(self, img, pad=0, show=False):
(column_min, row_min, column_max, row_max) = self.put_bbox()
column_min = max(column_min - pad, 0)
column_max = min(column_max + pad, img.shape[1])
row_min = max(row_min - pad, 0)
row_max = min(row_max + pad, img.shape[0])
clip = img[row_min:row_max, column_min:column_max]
if show:
cv2.imshow('clipping', clip)
cv2.waitKey()
return clip

View File

@@ -0,0 +1,72 @@
import os
import pandas as pd
import json
from os.path import join as pjoin
import time
import cv2
def save_corners(file_path, corners, compo_name, clear=True):
try:
df = pd.read_csv(file_path, index_col=0)
except:
df = pd.DataFrame(columns=['component', 'x_max', 'x_min', 'y_max', 'y_min', 'height', 'width'])
if clear:
df = df.drop(df.index)
for corner in corners:
(up_left, bottom_right) = corner
c = {'component': compo_name}
(c['y_min'], c['x_min']) = up_left
(c['y_max'], c['x_max']) = bottom_right
c['width'] = c['y_max'] - c['y_min']
c['height'] = c['x_max'] - c['x_min']
df = df.append(c, True)
df.to_csv(file_path)
def save_corners_json(file_path, compos):
img_shape = compos[0].image_shape
output = {'img_shape': img_shape, 'compos': []}
f_out = open(file_path, 'w')
for compo in compos:
c = {'id': compo.id, 'class': compo.category}
(c['column_min'], c['row_min'], c['column_max'], c['row_max']) = compo.put_bbox()
c['width'] = compo.width
c['height'] = compo.height
output['compos'].append(c)
json.dump(output, f_out, indent=4)
def save_clipping(org, output_root, corners, compo_classes, compo_index):
if not os.path.exists(output_root):
os.mkdir(output_root)
pad = 2
for i in range(len(corners)):
compo = compo_classes[i]
(up_left, bottom_right) = corners[i]
(col_min, row_min) = up_left
(col_max, row_max) = bottom_right
col_min = max(col_min - pad, 0)
col_max = min(col_max + pad, org.shape[1])
row_min = max(row_min - pad, 0)
row_max = min(row_max + pad, org.shape[0])
# if component type already exists, index increase by 1, otherwise add this type
compo_path = pjoin(output_root, compo)
if compo_classes[i] not in compo_index:
compo_index[compo_classes[i]] = 0
if not os.path.exists(compo_path):
os.mkdir(compo_path)
else:
compo_index[compo_classes[i]] += 1
clip = org[row_min:row_max, col_min:col_max]
cv2.imwrite(pjoin(compo_path, str(compo_index[compo_classes[i]]) + '.png'), clip)
def build_directory(directory):
if not os.path.exists(directory):
os.mkdir(directory)
return directory

View File

@@ -0,0 +1,535 @@
import cv2
import numpy as np
import detect_compo.lib_ip.ip_draw as draw
import detect_compo.lib_ip.ip_preprocessing as pre
from detect_compo.lib_ip.Component import Component
import detect_compo.lib_ip.Component as Compo
from config.CONFIG_UIED import Config
C = Config()
def merge_intersected_corner(compos, org, is_merge_contained_ele, max_gap=(0, 0), max_ele_height=25):
'''
:param is_merge_contained_ele: if true, merge compos nested in others
:param max_gap: (horizontal_distance, vertical_distance) to be merge into one line/column
:param max_ele_height: if higher than it, recognize the compo as text
:return:
'''
changed = False
new_compos = []
Compo.compos_update(compos, org.shape)
for i in range(len(compos)):
merged = False
cur_compo = compos[i]
for j in range(len(new_compos)):
relation = cur_compo.compo_relation(new_compos[j], max_gap)
# print(relation)
# draw.draw_bounding_box(org, [cur_compo, new_compos[j]], name='b-merge', show=True)
# merge compo[i] to compo[j] if
# 1. compo[j] contains compo[i]
# 2. compo[j] intersects with compo[i] with certain iou
# 3. is_merge_contained_ele and compo[j] is contained in compo[i]
if relation == 1 or \
relation == 2 or \
(is_merge_contained_ele and relation == -1):
# (relation == 2 and new_compos[j].height < max_ele_height and cur_compo.height < max_ele_height) or\
new_compos[j].compo_merge(cur_compo)
cur_compo = new_compos[j]
# draw.draw_bounding_box(org, [new_compos[j]], name='a-merge', show=True)
merged = True
changed = True
# break
if not merged:
new_compos.append(compos[i])
if not changed:
return compos
else:
return merge_intersected_corner(new_compos, org, is_merge_contained_ele, max_gap, max_ele_height)
def merge_intersected_compos(compos):
changed = True
while changed:
changed = False
temp_set = []
for compo_a in compos:
merged = False
for compo_b in temp_set:
if compo_a.compo_relation(compo_b) == 2:
compo_b.compo_merge(compo_a)
merged = True
changed = True
break
if not merged:
temp_set.append(compo_a)
compos = temp_set.copy()
return compos
def rm_contained_compos_not_in_block(compos):
'''
remove all components contained by others that are not Block
'''
marked = np.full(len(compos), False)
for i in range(len(compos) - 1):
for j in range(i + 1, len(compos)):
relation = compos[i].compo_relation(compos[j])
if relation == -1 and compos[j].category != 'Block':
marked[i] = True
if relation == 1 and compos[i].category != 'Block':
marked[j] = True
new_compos = []
for i in range(len(marked)):
if not marked[i]:
new_compos.append(compos[i])
return new_compos
def merge_text(compos, org_shape, max_word_gad=4, max_word_height=20):
def is_text_line(compo_a, compo_b):
(col_min_a, row_min_a, col_max_a, row_max_a) = compo_a.put_bbox()
(col_min_b, row_min_b, col_max_b, row_max_b) = compo_b.put_bbox()
col_min_s = max(col_min_a, col_min_b)
col_max_s = min(col_max_a, col_max_b)
row_min_s = max(row_min_a, row_min_b)
row_max_s = min(row_max_a, row_max_b)
# on the same line
# if abs(row_min_a - row_min_b) < max_word_gad and abs(row_max_a - row_max_b) < max_word_gad:
if row_min_s < row_max_s:
# close distance
if col_min_s < col_max_s or \
(0 < col_min_b - col_max_a < max_word_gad) or (0 < col_min_a - col_max_b < max_word_gad):
return True
return False
changed = False
new_compos = []
row, col = org_shape[:2]
for i in range(len(compos)):
merged = False
height = compos[i].height
# ignore non-text
# if height / row > max_word_height_ratio\
# or compos[i].category != 'Text':
if height > max_word_height:
new_compos.append(compos[i])
continue
for j in range(len(new_compos)):
# if compos[j].category != 'Text':
# continue
if is_text_line(compos[i], new_compos[j]):
new_compos[j].compo_merge(compos[i])
merged = True
changed = True
break
if not merged:
new_compos.append(compos[i])
if not changed:
return compos
else:
return merge_text(new_compos, org_shape)
def rm_top_or_bottom_corners(components, org_shape, top_bottom_height=C.THRESHOLD_TOP_BOTTOM_BAR):
new_compos = []
height, width = org_shape[:2]
for compo in components:
(column_min, row_min, column_max, row_max) = compo.put_bbox()
# remove big ones
# if (row_max - row_min) / height > 0.65 and (column_max - column_min) / width > 0.8:
# continue
if not (row_max < height * top_bottom_height[0] or row_min > height * top_bottom_height[1]):
new_compos.append(compo)
return new_compos
def rm_line_v_h(binary, show=False, max_line_thickness=C.THRESHOLD_LINE_THICKNESS):
def check_continuous_line(line, edge):
continuous_length = 0
line_start = -1
for j, p in enumerate(line):
if p > 0:
if line_start == -1:
line_start = j
continuous_length += 1
elif continuous_length > 0:
if continuous_length / edge > 0.6:
return [line_start, j]
continuous_length = 0
line_start = -1
if continuous_length / edge > 0.6:
return [line_start, len(line)]
else:
return None
def extract_line_area(line, start_idx, flag='v'):
for e, l in enumerate(line):
if flag == 'v':
map_line[start_idx + e, l[0]:l[1]] = binary[start_idx + e, l[0]:l[1]]
map_line = np.zeros(binary.shape[:2], dtype=np.uint8)
cv2.imshow('binary', binary)
width = binary.shape[1]
start_row = -1
line_area = []
for i, row in enumerate(binary):
line_v = check_continuous_line(row, width)
if line_v is not None:
# new line
if start_row == -1:
start_row = i
line_area = []
line_area.append(line_v)
else:
# checking line
if start_row != -1:
if i - start_row < max_line_thickness:
# binary[start_row: i] = 0
# map_line[start_row: i] = binary[start_row: i]
print(line_area, start_row, i)
extract_line_area(line_area, start_row)
start_row = -1
height = binary.shape[0]
start_col = -1
for i in range(width):
col = binary[:, i]
line_h = check_continuous_line(col, height)
if line_h is not None:
# new line
if start_col == -1:
start_col = i
else:
# checking line
if start_col != -1:
if i - start_col < max_line_thickness:
# binary[:, start_col: i] = 0
map_line[:, start_col: i] = binary[:, start_col: i]
start_col = -1
binary -= map_line
if show:
cv2.imshow('no-line', binary)
cv2.imshow('lines', map_line)
cv2.waitKey()
def rm_line(binary,
max_line_thickness=C.THRESHOLD_LINE_THICKNESS,
min_line_length_ratio=C.THRESHOLD_LINE_MIN_LENGTH,
show=False, wait_key=0):
def is_valid_line(line):
line_length = 0
line_gap = 0
for j in line:
if j > 0:
if line_gap > 5:
return False
line_length += 1
line_gap = 0
elif line_length > 0:
line_gap += 1
if line_length / width > 0.95:
return True
return False
height, width = binary.shape[:2]
board = np.zeros(binary.shape[:2], dtype=np.uint8)
start_row, end_row = -1, -1
check_line = False
check_gap = False
for i, row in enumerate(binary):
# line_ratio = (sum(row) / 255) / width
# if line_ratio > 0.9:
if is_valid_line(row):
# new start: if it is checking a new line, mark this row as start
if not check_line:
start_row = i
check_line = True
else:
# end the line
if check_line:
# thin enough to be a line, then start checking gap
if i - start_row < max_line_thickness:
end_row = i
check_gap = True
else:
start_row, end_row = -1, -1
check_line = False
# check gap
if check_gap and i - end_row > max_line_thickness:
binary[start_row: end_row] = 0
start_row, end_row = -1, -1
check_line = False
check_gap = False
if (check_line and (height - start_row) < max_line_thickness) or check_gap:
binary[start_row: end_row] = 0
if show:
cv2.imshow('no-line binary', binary)
if wait_key is not None:
cv2.waitKey(wait_key)
if wait_key == 0:
cv2.destroyWindow('no-line binary')
def rm_noise_compos(compos):
compos_new = []
for compo in compos:
if compo.category == 'Noise':
continue
compos_new.append(compo)
return compos_new
def rm_noise_in_large_img(compos, org,
max_compo_scale=C.THRESHOLD_COMPO_MAX_SCALE):
row, column = org.shape[:2]
remain = np.full(len(compos), True)
new_compos = []
for compo in compos:
if compo.category == 'Image':
for i in compo.contain:
remain[i] = False
for i in range(len(remain)):
if remain[i]:
new_compos.append(compos[i])
return new_compos
def detect_compos_in_img(compos, binary, org, max_compo_scale=C.THRESHOLD_COMPO_MAX_SCALE, show=False):
compos_new = []
row, column = binary.shape[:2]
for compo in compos:
if compo.category == 'Image':
compo.compo_update_bbox_area()
# org_clip = compo.compo_clipping(org)
# bin_clip = pre.binarization(org_clip, show=show)
bin_clip = compo.compo_clipping(binary)
bin_clip = pre.reverse_binary(bin_clip, show=show)
compos_rec, compos_nonrec = component_detection(bin_clip, test=False, step_h=10, step_v=10, rec_detect=True)
for compo_rec in compos_rec:
compo_rec.compo_relative_position(compo.bbox.col_min, compo.bbox.row_min)
if compo_rec.bbox_area / compo.bbox_area < 0.8 and compo_rec.bbox.height > 20 and compo_rec.bbox.width > 20:
compos_new.append(compo_rec)
# draw.draw_bounding_box(org, [compo_rec], show=True)
# compos_inner = component_detection(bin_clip, rec_detect=False)
# for compo_inner in compos_inner:
# compo_inner.compo_relative_position(compo.bbox.col_min, compo.bbox.row_min)
# draw.draw_bounding_box(org, [compo_inner], show=True)
# if compo_inner.bbox_area / compo.bbox_area < 0.8:
# compos_new.append(compo_inner)
compos += compos_new
def compo_filter(compos, min_area, img_shape):
max_height = img_shape[0] * 0.8
compos_new = []
for compo in compos:
if compo.area < min_area:
continue
if compo.height > max_height:
continue
ratio_h = compo.width / compo.height
ratio_w = compo.height / compo.width
if ratio_h > 50 or ratio_w > 40 or \
(min(compo.height, compo.width) < 8 and max(ratio_h, ratio_w) > 10):
continue
compos_new.append(compo)
return compos_new
def is_block(clip, thread=0.15):
'''
Block is a rectangle border enclosing a group of compos (consider it as a wireframe)
Check if a compo is block by checking if the inner side of its border is blank
'''
side = 4 # scan 4 lines inner forward each border
# top border - scan top down
blank_count = 0
for i in range(1, 5):
if sum(clip[side + i]) / 255 > thread * clip.shape[1]:
blank_count += 1
if blank_count > 2: return False
# left border - scan left to right
blank_count = 0
for i in range(1, 5):
if sum(clip[:, side + i]) / 255 > thread * clip.shape[0]:
blank_count += 1
if blank_count > 2: return False
side = -4
# bottom border - scan bottom up
blank_count = 0
for i in range(-1, -5, -1):
if sum(clip[side + i]) / 255 > thread * clip.shape[1]:
blank_count += 1
if blank_count > 2: return False
# right border - scan right to left
blank_count = 0
for i in range(-1, -5, -1):
if sum(clip[:, side + i]) / 255 > thread * clip.shape[0]:
blank_count += 1
if blank_count > 2: return False
return True
def compo_block_recognition(binary, compos, block_side_length=0.15):
height, width = binary.shape
for compo in compos:
if compo.height / height > block_side_length and compo.width / width > block_side_length:
clip = compo.compo_clipping(binary)
if is_block(clip):
compo.category = 'Block'
# take the binary image as input
# calculate the connected regions -> get the bounding boundaries of them -> check if those regions are rectangles
# return all boundaries and boundaries of rectangles
def component_detection(binary, min_obj_area,
line_thickness=C.THRESHOLD_LINE_THICKNESS,
min_rec_evenness=C.THRESHOLD_REC_MIN_EVENNESS,
max_dent_ratio=C.THRESHOLD_REC_MAX_DENT_RATIO,
step_h = 5, step_v = 2,
rec_detect=False, show=False, test=False):
"""
:param binary: Binary image from pre-processing
:param min_obj_area: If not pass then ignore the small object
:param min_obj_perimeter: If not pass then ignore the small object
:param line_thickness: If not pass then ignore the slim object
:param min_rec_evenness: If not pass then this object cannot be rectangular
:param max_dent_ratio: If not pass then this object cannot be rectangular
:return: boundary: [top, bottom, left, right]
-> up, bottom: list of (column_index, min/max row border)
-> left, right: list of (row_index, min/max column border) detect range of each row
"""
mask = np.zeros((binary.shape[0] + 2, binary.shape[1] + 2), dtype=np.uint8)
compos_all = []
compos_rec = []
compos_nonrec = []
row, column = binary.shape[0], binary.shape[1]
for i in range(0, row, step_h):
for j in range(i % 2, column, step_v):
if binary[i, j] == 255 and mask[i, j] == 0:
# get connected area
# region = util.boundary_bfs_connected_area(binary, i, j, mask)
mask_copy = mask.copy()
ff = cv2.floodFill(binary, mask, (j, i), None, 0, 0, cv2.FLOODFILL_MASK_ONLY)
if ff[0] < min_obj_area: continue
mask_copy = mask - mask_copy
region = np.reshape(cv2.findNonZero(mask_copy[1:-1, 1:-1]), (-1, 2))
region = [(p[1], p[0]) for p in region]
# filter out some compos
component = Component(region, binary.shape)
# calculate the boundary of the connected area
# ignore small area
if component.width <= 3 or component.height <= 3:
continue
# check if it is line by checking the length of edges
# if component.compo_is_line(line_thickness):
# continue
if test:
print('Area:%d' % (len(region)))
draw.draw_boundary([component], binary.shape, show=True)
compos_all.append(component)
if rec_detect:
# rectangle check
if component.compo_is_rectangle(min_rec_evenness, max_dent_ratio):
component.rect_ = True
compos_rec.append(component)
else:
component.rect_ = False
compos_nonrec.append(component)
if show:
print('Area:%d' % (len(region)))
draw.draw_boundary(compos_all, binary.shape, show=True)
# draw.draw_boundary(compos_all, binary.shape, show=True)
if rec_detect:
return compos_rec, compos_nonrec
else:
return compos_all
def nested_components_detection(grey, org, grad_thresh,
show=False, write_path=None,
step_h=10, step_v=10,
line_thickness=C.THRESHOLD_LINE_THICKNESS,
min_rec_evenness=C.THRESHOLD_REC_MIN_EVENNESS,
max_dent_ratio=C.THRESHOLD_REC_MAX_DENT_RATIO):
'''
:param grey: grey-scale of original image
:return: corners: list of [(top_left, bottom_right)]
-> top_left: (column_min, row_min)
-> bottom_right: (column_max, row_max)
'''
compos = []
mask = np.zeros((grey.shape[0]+2, grey.shape[1]+2), dtype=np.uint8)
broad = np.zeros((grey.shape[0], grey.shape[1], 3), dtype=np.uint8)
broad_all = broad.copy()
row, column = grey.shape[0], grey.shape[1]
for x in range(0, row, step_h):
for y in range(0, column, step_v):
if mask[x, y] == 0:
# region = flood_fill_bfs(grey, x, y, mask)
# flood fill algorithm to get background (layout block)
mask_copy = mask.copy()
ff = cv2.floodFill(grey, mask, (y, x), None, grad_thresh, grad_thresh, cv2.FLOODFILL_MASK_ONLY)
# ignore small regions
if ff[0] < 500: continue
mask_copy = mask - mask_copy
region = np.reshape(cv2.findNonZero(mask_copy[1:-1, 1:-1]), (-1, 2))
region = [(p[1], p[0]) for p in region]
compo = Component(region, grey.shape)
# draw.draw_region(region, broad_all)
# if block.height < 40 and block.width < 40:
# continue
if compo.height < 30:
continue
# print(block.area / (row * column))
if compo.area / (row * column) > 0.9:
continue
elif compo.area / (row * column) > 0.7:
compo.redundant = True
# get the boundary of this region
# ignore lines
if compo.compo_is_line(line_thickness):
continue
# ignore non-rectangle as blocks must be rectangular
if not compo.compo_is_rectangle(min_rec_evenness, max_dent_ratio):
continue
# if block.height/row < min_block_height_ratio:
# continue
compos.append(compo)
# draw.draw_region(region, broad)
if show:
cv2.imshow('flood-fill all', broad_all)
cv2.imshow('block', broad)
cv2.waitKey()
if write_path is not None:
cv2.imwrite(write_path, broad)
return compos

View File

@@ -0,0 +1,132 @@
import cv2
import numpy as np
from random import randint as rint
from config.CONFIG_UIED import Config
C = Config()
def draw_bounding_box_class(org, components, color_map=C.COLOR, line=2, show=False, write_path=None, name='board'):
"""
Draw bounding box of components with their classes on the original image
:param org: original image
:param components: bbox [(column_min, row_min, column_max, row_max)]
-> top_left: (column_min, row_min)
-> bottom_right: (column_max, row_max)
:param color_map: colors mapping to different components
:param line: line thickness
:param compo_class: classes matching the corners of components
:param show: show or not
:return: labeled image
"""
board = org.copy()
for compo in components:
bbox = compo.put_bbox()
board = cv2.rectangle(board, (bbox[0], bbox[1]), (bbox[2], bbox[3]), color_map[compo.category], line)
# board = cv2.putText(board, compo.category, (bbox[0]+5, bbox[1]+20), cv2.FONT_HERSHEY_SIMPLEX, 0.5, color_map[compo.category], 2)
if show:
cv2.imshow(name, board)
cv2.waitKey(0)
if write_path is not None:
cv2.imwrite(write_path, board)
return board
def draw_bounding_box(org, components, color=(0, 255, 0), line=2,
show=False, write_path=None, name='board', is_return=False, wait_key=0):
"""
Draw bounding box of components on the original image
:param org: original image
:param components: bbox [(column_min, row_min, column_max, row_max)]
-> top_left: (column_min, row_min)
-> bottom_right: (column_max, row_max)
:param color: line color
:param line: line thickness
:param show: show or not
:return: labeled image
"""
if not show and write_path is None and not is_return: return
board = org.copy()
for compo in components:
bbox = compo.put_bbox()
board = cv2.rectangle(board, (bbox[0], bbox[1]), (bbox[2], bbox[3]), color, line)
if show:
cv2.imshow(name, board)
if wait_key is not None:
cv2.waitKey(wait_key)
if wait_key == 0:
cv2.destroyWindow(name)
if write_path is not None:
# board = cv2.resize(board, (1080, 1920))
# board = board[100:-110]
cv2.imwrite(write_path, board)
return board
def draw_line(org, lines, color=(0, 255, 0), show=False):
"""
Draw detected lines on the original image
:param org: original image
:param lines: [line_h, line_v]
-> line_h: horizontal {'head':(column_min, row), 'end':(column_max, row), 'thickness':int)
-> line_v: vertical {'head':(column, row_min), 'end':(column, row_max), 'thickness':int}
:param color: drawn color
:param show: show or not
:return: image with lines drawn
"""
board = org.copy()
line_h, line_v = lines
for line in line_h:
cv2.line(board, tuple(line['head']), tuple(line['end']), color, line['thickness'])
for line in line_v:
cv2.line(board, tuple(line['head']), tuple(line['end']), color, line['thickness'])
if show:
cv2.imshow('img', board)
cv2.waitKey(0)
return board
def draw_boundary(components, shape, show=False):
"""
Draw boundary of objects on the black withe
:param components: boundary: [top, bottom, left, right]
-> up, bottom: (column_index, min/max row border)
-> left, right: (row_index, min/max column border) detect range of each row
:param shape: shape or original image
:param show: show or not
:return: drawn board
"""
board = np.zeros(shape[:2], dtype=np.uint8) # binary board
for component in components:
# up and bottom: (column_index, min/max row border)
for point in component.boundary[0] + component.boundary[1]:
board[point[1], point[0]] = 255
# left, right: (row_index, min/max column border)
for point in component.boundary[2] + component.boundary[3]:
board[point[0], point[1]] = 255
if show:
cv2.imshow('rec', board)
cv2.waitKey(0)
return board
def draw_region(region, broad, show=False):
color = (rint(0,255), rint(0,255), rint(0,255))
for point in region:
broad[point[0], point[1]] = color
if show:
cv2.imshow('region', broad)
cv2.waitKey()
return broad
def draw_region_bin(region, broad, show=False):
for point in region:
broad[point[0], point[1]] = 255
if show:
cv2.imshow('region', broad)
cv2.waitKey()
return broad

View File

@@ -0,0 +1,69 @@
import cv2
import numpy as np
from config.CONFIG_UIED import Config
C = Config()
def read_img(path, resize_height=None, kernel_size=None):
def resize_by_height(org):
w_h_ratio = org.shape[1] / org.shape[0]
resize_w = resize_height * w_h_ratio
re = cv2.resize(org, (int(resize_w), int(resize_height)))
return re
try:
img = cv2.imread(path)
if kernel_size is not None:
img = cv2.medianBlur(img, kernel_size)
if img is None:
print("*** Image does not exist ***")
return None, None
if resize_height is not None:
img = resize_by_height(img)
gray = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY)
return img, gray
except Exception as e:
print(e)
print("*** Img Reading Failed ***\n")
return None, None
def gray_to_gradient(img):
if len(img.shape) == 3:
img = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY)
img_f = np.copy(img)
img_f = img_f.astype("float")
kernel_h = np.array([[0,0,0], [0,-1.,1.], [0,0,0]])
kernel_v = np.array([[0,0,0], [0,-1.,0], [0,1.,0]])
dst1 = abs(cv2.filter2D(img_f, -1, kernel_h))
dst2 = abs(cv2.filter2D(img_f, -1, kernel_v))
gradient = (dst1 + dst2).astype('uint8')
return gradient
def reverse_binary(bin, show=False):
"""
Reverse the input binary image
"""
r, bin = cv2.threshold(bin, 1, 255, cv2.THRESH_BINARY_INV)
if show:
cv2.imshow('binary_rev', bin)
cv2.waitKey()
return bin
def binarization(org, grad_min, show=False, write_path=None, wait_key=0):
grey = cv2.cvtColor(org, cv2.COLOR_BGR2GRAY)
grad = gray_to_gradient(grey) # get RoI with high gradient
rec, binary = cv2.threshold(grad, grad_min, 255, cv2.THRESH_BINARY) # enhance the RoI
morph = cv2.morphologyEx(binary, cv2.MORPH_CLOSE, (3, 3)) # remove noises
if write_path is not None:
cv2.imwrite(write_path, morph)
if show:
cv2.imshow('binary', morph)
if wait_key is not None:
cv2.waitKey(wait_key)
return morph