mirror of
https://github.com/leigest519/ScreenCoder.git
synced 2026-02-13 02:02:48 +00:00
Initial commit
This commit is contained in:
937
UIED/result_processing/Untitled.ipynb
Normal file
937
UIED/result_processing/Untitled.ipynb
Normal file
File diff suppressed because one or more lines are too long
215
UIED/result_processing/eval_classes.py
Normal file
215
UIED/result_processing/eval_classes.py
Normal file
@@ -0,0 +1,215 @@
|
||||
import json
|
||||
import numpy as np
|
||||
import cv2
|
||||
from glob import glob
|
||||
from os.path import join as pjoin
|
||||
from tqdm import tqdm
|
||||
|
||||
class_map = {'0':'Button', '1':'CheckBox', '2':'Chronometer', '3':'EditText', '4':'ImageButton', '5':'ImageView',
|
||||
'6':'ProgressBar', '7':'RadioButton', '8':'RatingBar', '9':'SeekBar', '10':'Spinner', '11':'Switch',
|
||||
'12':'ToggleButton', '13':'VideoView', '14':'TextView'}
|
||||
|
||||
|
||||
def resize_label(bboxes, d_height, gt_height, bias=0):
|
||||
bboxes_new = []
|
||||
scale = gt_height / d_height
|
||||
for bbox in bboxes:
|
||||
bbox = [int(b * scale + bias) for b in bbox]
|
||||
bboxes_new.append(bbox)
|
||||
return bboxes_new
|
||||
|
||||
|
||||
def draw_bounding_box(org, corners, color=(0, 255, 0), line=2, show=False):
|
||||
board = org.copy()
|
||||
for i in range(len(corners)):
|
||||
board = cv2.rectangle(board, (corners[i][0], corners[i][1]), (corners[i][2], corners[i][3]), color, line)
|
||||
if show:
|
||||
cv2.imshow('a', cv2.resize(board, (500, 1000)))
|
||||
cv2.waitKey(0)
|
||||
return board
|
||||
|
||||
|
||||
def load_detect_result_json(reslut_file_root, shrink=4):
|
||||
def is_bottom_or_top(corner):
|
||||
column_min, row_min, column_max, row_max = corner
|
||||
if row_max < 36 or row_min > 725:
|
||||
return True
|
||||
return False
|
||||
|
||||
result_files = glob(pjoin(reslut_file_root, '*.json'))
|
||||
compos_reform = {}
|
||||
print('Loading %d detection results' % len(result_files))
|
||||
for reslut_file in tqdm(result_files):
|
||||
img_name = reslut_file.split('\\')[-1].split('.')[0]
|
||||
compos = json.load(open(reslut_file, 'r'))['compos']
|
||||
for compo in compos:
|
||||
if compo['column_max'] - compo['column_min'] < 10 or compo['row_max'] - compo['row_min'] < 10:
|
||||
continue
|
||||
if is_bottom_or_top((compo['column_min'], compo['row_min'], compo['column_max'], compo['row_max'])):
|
||||
continue
|
||||
if img_name not in compos_reform:
|
||||
compos_reform[img_name] = {'bboxes': [[compo['column_min'] + shrink, compo['row_min'] + shrink, compo['column_max'] - shrink, compo['row_max'] - shrink]],
|
||||
'categories': [compo['category']]}
|
||||
else:
|
||||
compos_reform[img_name]['bboxes'].append([compo['column_min'] + shrink, compo['row_min'] + shrink, compo['column_max'] - shrink, compo['row_max'] - shrink])
|
||||
compos_reform[img_name]['categories'].append(compo['category'])
|
||||
return compos_reform
|
||||
|
||||
|
||||
def load_ground_truth_json(gt_file):
|
||||
def get_img_by_id(img_id):
|
||||
for image in images:
|
||||
if image['id'] == img_id:
|
||||
return image['file_name'].split('/')[-1][:-4], (image['height'], image['width'])
|
||||
|
||||
def cvt_bbox(bbox):
|
||||
'''
|
||||
:param bbox: [x,y,width,height]
|
||||
:return: [col_min, row_min, col_max, row_max]
|
||||
'''
|
||||
bbox = [int(b) for b in bbox]
|
||||
return [bbox[0], bbox[1], bbox[0] + bbox[2], bbox[1] + bbox[3]]
|
||||
|
||||
data = json.load(open(gt_file, 'r'))
|
||||
images = data['images']
|
||||
annots = data['annotations']
|
||||
compos = {}
|
||||
print('Loading %d ground truth' % len(annots))
|
||||
for annot in tqdm(annots):
|
||||
img_name, size = get_img_by_id(annot['image_id'])
|
||||
if img_name not in compos:
|
||||
compos[img_name] = {'bboxes': [cvt_bbox(annot['bbox'])], 'categories': [class_map[str(annot['category_id'])]], 'size': size}
|
||||
else:
|
||||
compos[img_name]['bboxes'].append(cvt_bbox(annot['bbox']))
|
||||
compos[img_name]['categories'].append(class_map[str(annot['category_id'])])
|
||||
return compos
|
||||
|
||||
|
||||
def eval(detection, ground_truth, img_root, show=True, no_text=False, only_text=False):
|
||||
def compo_filter(compos, flag):
|
||||
if not no_text and not only_text:
|
||||
return compos
|
||||
compos_new = {'bboxes': [], 'categories': []}
|
||||
for k, category in enumerate(compos['categories']):
|
||||
if only_text:
|
||||
if flag == 'det' and category != 'TextView':
|
||||
continue
|
||||
if flag == 'gt' and category != 'TextView':
|
||||
continue
|
||||
elif no_text:
|
||||
if flag == 'det' and category == 'TextView':
|
||||
continue
|
||||
if flag == 'gt' and category == 'TextView':
|
||||
continue
|
||||
|
||||
compos_new['bboxes'].append(compos['bboxes'][k])
|
||||
compos_new['categories'].append(category)
|
||||
return compos_new
|
||||
|
||||
def match(org, d_bbox, d_category, gt_compos, matched):
|
||||
'''
|
||||
:param matched: mark if the ground truth component is matched
|
||||
:param d_bbox: [col_min, row_min, col_max, row_max]
|
||||
:param gt_bboxes: list of ground truth [[col_min, row_min, col_max, row_max]]
|
||||
:return: Boolean: if IOU large enough or detected box is contained by ground truth
|
||||
'''
|
||||
area_d = (d_bbox[2] - d_bbox[0]) * (d_bbox[3] - d_bbox[1])
|
||||
gt_bboxes = gt_compos['bboxes']
|
||||
gt_categories = gt_compos['categories']
|
||||
for i, gt_bbox in enumerate(gt_bboxes):
|
||||
if matched[i] == 0:
|
||||
continue
|
||||
area_gt = (gt_bbox[2] - gt_bbox[0]) * (gt_bbox[3] - gt_bbox[1])
|
||||
col_min = max(d_bbox[0], gt_bbox[0])
|
||||
row_min = max(d_bbox[1], gt_bbox[1])
|
||||
col_max = min(d_bbox[2], gt_bbox[2])
|
||||
row_max = min(d_bbox[3], gt_bbox[3])
|
||||
# if not intersected, area intersection should be 0
|
||||
w = max(0, col_max - col_min)
|
||||
h = max(0, row_max - row_min)
|
||||
area_inter = w * h
|
||||
if area_inter == 0:
|
||||
continue
|
||||
iod = area_inter / area_d
|
||||
iou = area_inter / (area_d + area_gt - area_inter)
|
||||
# if show:
|
||||
# cv2.putText(org, (str(round(iou, 2)) + ',' + str(round(iod, 2))), (d_bbox[0], d_bbox[1]),
|
||||
# cv2.FONT_HERSHEY_SIMPLEX, 1, (0, 0, 255), 2)
|
||||
|
||||
if iou > 0.9 or iod == 1:
|
||||
if d_category == gt_categories[i]:
|
||||
matched[i] = 0
|
||||
return True
|
||||
return False
|
||||
|
||||
amount = len(detection)
|
||||
TP, FP, FN = 0, 0, 0
|
||||
pres, recalls, f1s = [], [], []
|
||||
for i, image_id in enumerate(detection):
|
||||
TP_this, FP_this, FN_this = 0, 0, 0
|
||||
img = cv2.imread(pjoin(img_root, image_id + '.jpg'))
|
||||
d_compos = detection[image_id]
|
||||
if image_id not in ground_truth:
|
||||
continue
|
||||
gt_compos = ground_truth[image_id]
|
||||
|
||||
org_height = gt_compos['size'][0]
|
||||
|
||||
d_compos = compo_filter(d_compos, 'det')
|
||||
gt_compos = compo_filter(gt_compos, 'gt')
|
||||
|
||||
d_compos['bboxes'] = resize_label(d_compos['bboxes'], 800, org_height)
|
||||
matched = np.ones(len(gt_compos['bboxes']), dtype=int)
|
||||
for j, d_bbox in enumerate(d_compos['bboxes']):
|
||||
if match(img, d_bbox, d_compos['categories'][j], gt_compos, matched):
|
||||
TP += 1
|
||||
TP_this += 1
|
||||
else:
|
||||
FP += 1
|
||||
FP_this += 1
|
||||
FN += sum(matched)
|
||||
FN_this = sum(matched)
|
||||
|
||||
try:
|
||||
pre_this = TP_this / (TP_this + FP_this)
|
||||
recall_this = TP_this / (TP_this + FN_this)
|
||||
f1_this = 2 * (pre_this * recall_this) / (pre_this + recall_this)
|
||||
except:
|
||||
print('empty')
|
||||
continue
|
||||
|
||||
pres.append(pre_this)
|
||||
recalls.append(recall_this)
|
||||
f1s.append(f1_this)
|
||||
if show:
|
||||
print(image_id + '.jpg')
|
||||
print('[%d/%d] TP:%d, FP:%d, FN:%d, Precesion:%.3f, Recall:%.3f' % (
|
||||
i, amount, TP_this, FP_this, FN_this, pre_this, recall_this))
|
||||
# cv2.imshow('org', cv2.resize(img, (500, 1000)))
|
||||
broad = draw_bounding_box(img, d_compos['bboxes'], color=(255, 0, 0), line=3)
|
||||
draw_bounding_box(broad, gt_compos['bboxes'], color=(0, 0, 255), show=True, line=2)
|
||||
|
||||
if i % 200 == 0:
|
||||
precision = TP / (TP + FP)
|
||||
recall = TP / (TP + FN)
|
||||
f1 = 2 * (precision * recall) / (precision + recall)
|
||||
print(
|
||||
'[%d/%d] TP:%d, FP:%d, FN:%d, Precesion:%.3f, Recall:%.3f, F1:%.3f' % (i, amount, TP, FP, FN, precision, recall, f1))
|
||||
|
||||
precision = TP / (TP + FP)
|
||||
recall = TP / (TP + FN)
|
||||
print('[%d/%d] TP:%d, FP:%d, FN:%d, Precesion:%.3f, Recall:%.3f, F1:%.3f' % (i, amount, TP, FP, FN, precision, recall, f1))
|
||||
# print("Average precision:%.4f; Average recall:%.3f" % (sum(pres)/len(pres), sum(recalls)/len(recalls)))
|
||||
|
||||
return pres, recalls, f1s
|
||||
|
||||
|
||||
no_text = True
|
||||
only_text = False
|
||||
|
||||
# detect = load_detect_result_json('E:\\Mulong\\Result\\rico\\rico_uied\\rico_new_uied_cls\\ip')
|
||||
# detect = load_detect_result_json('E:\\Mulong\\Result\\rico\\rico_uied\\rico_new_uied_cls\\merge')
|
||||
detect = load_detect_result_json('E:\\Mulong\\Result\\rico\\rico_uied\\rico_new_uied_v3\\merge')
|
||||
# detect = load_detect_result_json('E:\\Mulong\\Result\\rico\\rico_uied\\rico_new_uied_v3\\ocr')
|
||||
gt = load_ground_truth_json('E:\\Mulong\\Datasets\\rico\\instances_test.json')
|
||||
eval(detect, gt, 'E:\\Mulong\\Datasets\\rico\\combined', show=False, no_text=no_text, only_text=only_text)
|
||||
219
UIED/result_processing/eval_size.py
Normal file
219
UIED/result_processing/eval_size.py
Normal file
@@ -0,0 +1,219 @@
|
||||
import json
|
||||
import numpy as np
|
||||
import cv2
|
||||
from glob import glob
|
||||
from os.path import join as pjoin
|
||||
from tqdm import tqdm
|
||||
|
||||
|
||||
def resize_label(bboxes, d_height, gt_height, bias=0):
|
||||
bboxes_new = []
|
||||
scale = gt_height / d_height
|
||||
for bbox in bboxes:
|
||||
bbox = [int(b * scale + bias) for b in bbox]
|
||||
bboxes_new.append(bbox)
|
||||
return bboxes_new
|
||||
|
||||
|
||||
def draw_bounding_box(org, corners, color=(0, 255, 0), line=2, show=False):
|
||||
board = cv2.resize(org, (608, 1024))
|
||||
for i in range(len(corners)):
|
||||
board = cv2.rectangle(board, (corners[i][0], corners[i][1]), (corners[i][2], corners[i][3]), color, line)
|
||||
if show:
|
||||
cv2.imshow('a', board)
|
||||
cv2.waitKey(0)
|
||||
return board
|
||||
|
||||
|
||||
def load_detect_result_json(reslut_file_root, shrink=3):
|
||||
def is_bottom_or_top(corner):
|
||||
column_min, row_min, column_max, row_max = corner
|
||||
if row_max < 36 or row_min > 725:
|
||||
return True
|
||||
return False
|
||||
|
||||
result_files = glob(pjoin(reslut_file_root, '*.json'))
|
||||
compos_reform = {}
|
||||
print('Loading %d detection results' % len(result_files))
|
||||
for reslut_file in tqdm(result_files):
|
||||
img_name = reslut_file.split('\\')[-1].split('.')[0]
|
||||
compos = json.load(open(reslut_file, 'r'))['compos']
|
||||
for compo in compos:
|
||||
if compo['column_max'] - compo['column_min'] < 10 or compo['row_max'] - compo['row_min'] < 10:
|
||||
continue
|
||||
if is_bottom_or_top((compo['column_min'], compo['row_min'], compo['column_max'], compo['row_max'])):
|
||||
continue
|
||||
if img_name not in compos_reform:
|
||||
compos_reform[img_name] = {'bboxes': [
|
||||
[compo['column_min'] + shrink, compo['row_min'] + shrink, compo['column_max'] - shrink,
|
||||
compo['row_max'] - shrink]],
|
||||
'categories': [compo['category']]}
|
||||
else:
|
||||
compos_reform[img_name]['bboxes'].append(
|
||||
[compo['column_min'] + shrink, compo['row_min'] + shrink, compo['column_max'] - shrink,
|
||||
compo['row_max'] - shrink])
|
||||
compos_reform[img_name]['categories'].append(compo['category'])
|
||||
return compos_reform
|
||||
|
||||
|
||||
def load_ground_truth_json(gt_file):
|
||||
def get_img_by_id(img_id):
|
||||
for image in images:
|
||||
if image['id'] == img_id:
|
||||
return image['file_name'].split('/')[-1][:-4], (image['height'], image['width'])
|
||||
|
||||
def cvt_bbox(bbox):
|
||||
'''
|
||||
:param bbox: [x,y,width,height]
|
||||
:return: [col_min, row_min, col_max, row_max]
|
||||
'''
|
||||
bbox = [int(b) for b in bbox]
|
||||
return [bbox[0], bbox[1], bbox[0] + bbox[2], bbox[1] + bbox[3]]
|
||||
|
||||
data = json.load(open(gt_file, 'r'))
|
||||
images = data['images']
|
||||
annots = data['annotations']
|
||||
compos = {}
|
||||
print('Loading %d ground truth' % len(annots))
|
||||
for annot in tqdm(annots):
|
||||
img_name, size = get_img_by_id(annot['image_id'])
|
||||
if img_name not in compos:
|
||||
compos[img_name] = {'bboxes': [cvt_bbox(annot['bbox'])], 'categories': [annot['category_id']], 'size': size}
|
||||
else:
|
||||
compos[img_name]['bboxes'].append(cvt_bbox(annot['bbox']))
|
||||
compos[img_name]['categories'].append(annot['category_id'])
|
||||
return compos
|
||||
|
||||
|
||||
def eval(detection, ground_truth, img_root, show=True, no_text=False, only_text=False):
|
||||
def compo_filter(compos, flag):
|
||||
if not no_text and not only_text:
|
||||
return compos
|
||||
compos_new = {'bboxes': [], 'categories': []}
|
||||
for k, category in enumerate(compos['categories']):
|
||||
if only_text:
|
||||
if flag == 'det' and category != 'TextView':
|
||||
continue
|
||||
if flag == 'gt' and int(category) != 14:
|
||||
continue
|
||||
elif no_text:
|
||||
if flag == 'det' and category == 'TextView':
|
||||
continue
|
||||
if flag == 'gt' and int(category) == 14:
|
||||
continue
|
||||
|
||||
compos_new['bboxes'].append(compos['bboxes'][k])
|
||||
compos_new['categories'].append(category)
|
||||
return compos_new
|
||||
|
||||
def match(org, d_bbox, gt_bboxes, matched):
|
||||
'''
|
||||
:param matched: mark if the ground truth component is matched
|
||||
:param d_bbox: [col_min, row_min, col_max, row_max]
|
||||
:param gt_bboxes: list of ground truth [[col_min, row_min, col_max, row_max]]
|
||||
:return: Boolean: if IOU large enough or detected box is contained by ground truth
|
||||
'''
|
||||
area_d = (d_bbox[2] - d_bbox[0]) * (d_bbox[3] - d_bbox[1])
|
||||
size = -1
|
||||
for i, gt_bbox in enumerate(gt_bboxes):
|
||||
if matched[i] == 0:
|
||||
continue
|
||||
area_gt = (gt_bbox[2] - gt_bbox[0]) * (gt_bbox[3] - gt_bbox[1])
|
||||
col_min = max(d_bbox[0], gt_bbox[0])
|
||||
row_min = max(d_bbox[1], gt_bbox[1])
|
||||
col_max = min(d_bbox[2], gt_bbox[2])
|
||||
row_max = min(d_bbox[3], gt_bbox[3])
|
||||
# if not intersected, area intersection should be 0
|
||||
w = max(0, col_max - col_min)
|
||||
h = max(0, row_max - row_min)
|
||||
area_inter = w * h
|
||||
if area_inter == 0:
|
||||
continue
|
||||
iod = area_inter / area_d
|
||||
iou = area_inter / (area_d + area_gt - area_inter)
|
||||
# if show:
|
||||
# cv2.putText(org, (str(round(iou, 2)) + ',' + str(round(iod, 2))), (d_bbox[0], d_bbox[1]),
|
||||
# cv2.FONT_HERSHEY_SIMPLEX, 1, (0, 0, 255), 2)
|
||||
|
||||
if iou > 0.9 or iod == 1:
|
||||
if (gt_bbox[2] - gt_bbox[0]) < 64:
|
||||
size = 0
|
||||
elif 64 < (gt_bbox[2] - gt_bbox[0]) < 128:
|
||||
size = 1
|
||||
elif (gt_bbox[2] - gt_bbox[0]) > 128:
|
||||
size = 2
|
||||
matched[i] = 0
|
||||
return True, size
|
||||
return False, size
|
||||
|
||||
amount = len(detection)
|
||||
TP, FP, FN = [[0, 0, 0], [0, 0, 0], [0, 0, 0]]
|
||||
for i, image_id in enumerate(detection):
|
||||
img = cv2.imread(pjoin(img_root, image_id + '.jpg'))
|
||||
d_compos = detection[image_id]
|
||||
if image_id not in ground_truth:
|
||||
continue
|
||||
gt_compos = ground_truth[image_id]
|
||||
|
||||
org_height = gt_compos['size'][0]
|
||||
|
||||
d_compos = compo_filter(d_compos, 'det')
|
||||
gt_compos = compo_filter(gt_compos, 'gt')
|
||||
|
||||
d_compos['bboxes'] = resize_label(d_compos['bboxes'], 800, 1024)
|
||||
gt_compos['bboxes'] = resize_label(gt_compos['bboxes'], org_height, 1024)
|
||||
matched = np.ones(len(gt_compos['bboxes']), dtype=int)
|
||||
for d_bbox in d_compos['bboxes']:
|
||||
m, size = match(img, d_bbox, gt_compos['bboxes'], matched)
|
||||
if m:
|
||||
TP[size] += 1
|
||||
else:
|
||||
h = d_bbox[2] - d_bbox[0]
|
||||
if h < 64:
|
||||
size = 0
|
||||
elif 64 < h < 128:
|
||||
size = 1
|
||||
elif h > 128:
|
||||
size = 2
|
||||
FP[size] += 1
|
||||
|
||||
for i in range(len(matched)):
|
||||
if matched[i] == 1:
|
||||
gt_bboxes = gt_compos['bboxes']
|
||||
h = gt_bboxes[i][2] - gt_bboxes[i][0]
|
||||
if h < 64:
|
||||
size = 0
|
||||
elif 64 < h < 128:
|
||||
size = 1
|
||||
elif h > 128:
|
||||
size = 2
|
||||
FN[size] += 1
|
||||
|
||||
if show:
|
||||
print(image_id + '.jpg')
|
||||
# cv2.imshow('org', cv2.resize(img, (500, 1000)))
|
||||
broad = draw_bounding_box(img, d_compos['bboxes'], color=(255, 0, 0), line=3)
|
||||
draw_bounding_box(broad, gt_compos['bboxes'], color=(0, 0, 255), show=True, line=2)
|
||||
|
||||
if i % 200 == 0:
|
||||
precision = [round(TP[i] / (TP[i] + FP[i]),3) for i in range(len(TP))]
|
||||
recall = [round(TP[i] / (TP[i] + FN[i]),3) for i in range(len(TP))]
|
||||
f1 = [round(2 * (precision[i] * recall[i]) / (precision[i] + recall[i]), 3) for i in range(3)]
|
||||
print(
|
||||
'[%d/%d] TP:%s, FP:%s, FN:%s, Precesion:%s, Recall:%s, F1:%s' % (
|
||||
i, amount, str(TP), str(FP), str(FN), str(precision), str(recall), str(f1)))
|
||||
|
||||
precision = [round(TP[i] / (TP[i] + FP[i]),3) for i in range(len(TP))]
|
||||
recall = [round(TP[i] / (TP[i] + FN[i]),3) for i in range(len(TP))]
|
||||
f1 = [round(2 * (precision[i] * recall[i]) / (precision[i] + recall[i]), 3) for i in range(3)]
|
||||
print(
|
||||
'[%d/%d] TP:%s, FP:%s, FN:%s, Precesion:%s, Recall:%s, F1:%s' % (
|
||||
i, amount, str(TP), str(FP), str(FN), str(precision), str(recall), str(f1)))
|
||||
# print("Average precision:%.4f; Average recall:%.3f" % (sum(pres)/len(pres), sum(recalls)/len(recalls)))
|
||||
|
||||
|
||||
no_text = False
|
||||
only_text = False
|
||||
detect = load_detect_result_json('E:\\Mulong\\Result\\rico\\rico_uied\\rico_new_uied_v3\\merge')
|
||||
gt = load_ground_truth_json('E:\\Mulong\\Datasets\\rico\\instances_test.json')
|
||||
eval(detect, gt, 'E:\\Mulong\\Datasets\\rico\\combined', show=False, no_text=no_text, only_text=only_text)
|
||||
208
UIED/result_processing/evaluation.py
Normal file
208
UIED/result_processing/evaluation.py
Normal file
@@ -0,0 +1,208 @@
|
||||
import json
|
||||
import numpy as np
|
||||
import cv2
|
||||
from glob import glob
|
||||
from os.path import join as pjoin
|
||||
from tqdm import tqdm
|
||||
|
||||
|
||||
def resize_label(bboxes, d_height, gt_height, bias=0):
|
||||
bboxes_new = []
|
||||
scale = gt_height / d_height
|
||||
for bbox in bboxes:
|
||||
bbox = [int(b * scale + bias) for b in bbox]
|
||||
bboxes_new.append(bbox)
|
||||
return bboxes_new
|
||||
|
||||
|
||||
def draw_bounding_box(org, corners, color=(0, 255, 0), line=2, show=False):
|
||||
board = org.copy()
|
||||
for i in range(len(corners)):
|
||||
board = cv2.rectangle(board, (corners[i][0], corners[i][1]), (corners[i][2], corners[i][3]), color, line)
|
||||
if show:
|
||||
cv2.imshow('a', cv2.resize(board, (500, 1000)))
|
||||
cv2.waitKey(0)
|
||||
return board
|
||||
|
||||
|
||||
def load_detect_result_json(reslut_file_root, shrink=4):
|
||||
def is_bottom_or_top(corner):
|
||||
column_min, row_min, column_max, row_max = corner
|
||||
if row_max < 36 or row_min > 725:
|
||||
return True
|
||||
return False
|
||||
|
||||
result_files = glob(pjoin(reslut_file_root, '*.json'))
|
||||
compos_reform = {}
|
||||
print('Loading %d detection results' % len(result_files))
|
||||
for reslut_file in tqdm(result_files):
|
||||
img_name = reslut_file.split('\\')[-1].split('.')[0]
|
||||
compos = json.load(open(reslut_file, 'r'))['compos']
|
||||
for compo in compos:
|
||||
if compo['column_max'] - compo['column_min'] < 10 or compo['row_max'] - compo['row_min'] < 10:
|
||||
continue
|
||||
if is_bottom_or_top((compo['column_min'], compo['row_min'], compo['column_max'], compo['row_max'])):
|
||||
continue
|
||||
if img_name not in compos_reform:
|
||||
compos_reform[img_name] = {'bboxes': [[compo['column_min'] + shrink, compo['row_min'] + shrink, compo['column_max'] - shrink, compo['row_max'] - shrink]],
|
||||
'categories': [compo['category']]}
|
||||
else:
|
||||
compos_reform[img_name]['bboxes'].append([compo['column_min'] + shrink, compo['row_min'] + shrink, compo['column_max'] - shrink, compo['row_max'] - shrink])
|
||||
compos_reform[img_name]['categories'].append(compo['category'])
|
||||
return compos_reform
|
||||
|
||||
|
||||
def load_ground_truth_json(gt_file):
|
||||
def get_img_by_id(img_id):
|
||||
for image in images:
|
||||
if image['id'] == img_id:
|
||||
return image['file_name'].split('/')[-1][:-4], (image['height'], image['width'])
|
||||
|
||||
def cvt_bbox(bbox):
|
||||
'''
|
||||
:param bbox: [x,y,width,height]
|
||||
:return: [col_min, row_min, col_max, row_max]
|
||||
'''
|
||||
bbox = [int(b) for b in bbox]
|
||||
return [bbox[0], bbox[1], bbox[0] + bbox[2], bbox[1] + bbox[3]]
|
||||
|
||||
data = json.load(open(gt_file, 'r'))
|
||||
images = data['images']
|
||||
annots = data['annotations']
|
||||
compos = {}
|
||||
print('Loading %d ground truth' % len(annots))
|
||||
for annot in tqdm(annots):
|
||||
img_name, size = get_img_by_id(annot['image_id'])
|
||||
if img_name not in compos:
|
||||
compos[img_name] = {'bboxes': [cvt_bbox(annot['bbox'])], 'categories': [annot['category_id']], 'size': size}
|
||||
else:
|
||||
compos[img_name]['bboxes'].append(cvt_bbox(annot['bbox']))
|
||||
compos[img_name]['categories'].append(annot['category_id'])
|
||||
return compos
|
||||
|
||||
|
||||
def eval(detection, ground_truth, img_root, show=True, no_text=False, only_text=False):
|
||||
def compo_filter(compos, flag):
|
||||
if not no_text and not only_text:
|
||||
return compos
|
||||
compos_new = {'bboxes': [], 'categories': []}
|
||||
for k, category in enumerate(compos['categories']):
|
||||
if only_text:
|
||||
if flag == 'det' and category != 'TextView':
|
||||
continue
|
||||
if flag == 'gt' and int(category) != 14:
|
||||
continue
|
||||
elif no_text:
|
||||
if flag == 'det' and category == 'TextView':
|
||||
continue
|
||||
if flag == 'gt' and int(category) == 14:
|
||||
continue
|
||||
|
||||
compos_new['bboxes'].append(compos['bboxes'][k])
|
||||
compos_new['categories'].append(category)
|
||||
return compos_new
|
||||
|
||||
def match(org, d_bbox, gt_bboxes, matched):
|
||||
'''
|
||||
:param matched: mark if the ground truth component is matched
|
||||
:param d_bbox: [col_min, row_min, col_max, row_max]
|
||||
:param gt_bboxes: list of ground truth [[col_min, row_min, col_max, row_max]]
|
||||
:return: Boolean: if IOU large enough or detected box is contained by ground truth
|
||||
'''
|
||||
area_d = (d_bbox[2] - d_bbox[0]) * (d_bbox[3] - d_bbox[1])
|
||||
for i, gt_bbox in enumerate(gt_bboxes):
|
||||
if matched[i] == 0:
|
||||
continue
|
||||
area_gt = (gt_bbox[2] - gt_bbox[0]) * (gt_bbox[3] - gt_bbox[1])
|
||||
col_min = max(d_bbox[0], gt_bbox[0])
|
||||
row_min = max(d_bbox[1], gt_bbox[1])
|
||||
col_max = min(d_bbox[2], gt_bbox[2])
|
||||
row_max = min(d_bbox[3], gt_bbox[3])
|
||||
# if not intersected, area intersection should be 0
|
||||
w = max(0, col_max - col_min)
|
||||
h = max(0, row_max - row_min)
|
||||
area_inter = w * h
|
||||
if area_inter == 0:
|
||||
continue
|
||||
iod = area_inter / area_d
|
||||
iou = area_inter / (area_d + area_gt - area_inter)
|
||||
# if show:
|
||||
# cv2.putText(org, (str(round(iou, 2)) + ',' + str(round(iod, 2))), (d_bbox[0], d_bbox[1]),
|
||||
# cv2.FONT_HERSHEY_SIMPLEX, 1, (0, 0, 255), 2)
|
||||
|
||||
if iou > 0.9 or iod == 1:
|
||||
matched[i] = 0
|
||||
return True
|
||||
return False
|
||||
|
||||
amount = len(detection)
|
||||
TP, FP, FN = 0, 0, 0
|
||||
pres, recalls, f1s = [], [], []
|
||||
for i, image_id in enumerate(detection):
|
||||
TP_this, FP_this, FN_this = 0, 0, 0
|
||||
img = cv2.imread(pjoin(img_root, image_id + '.jpg'))
|
||||
d_compos = detection[image_id]
|
||||
if image_id not in ground_truth:
|
||||
continue
|
||||
gt_compos = ground_truth[image_id]
|
||||
|
||||
org_height = gt_compos['size'][0]
|
||||
|
||||
d_compos = compo_filter(d_compos, 'det')
|
||||
gt_compos = compo_filter(gt_compos, 'gt')
|
||||
|
||||
d_compos['bboxes'] = resize_label(d_compos['bboxes'], 800, org_height)
|
||||
matched = np.ones(len(gt_compos['bboxes']), dtype=int)
|
||||
for d_bbox in d_compos['bboxes']:
|
||||
if match(img, d_bbox, gt_compos['bboxes'], matched):
|
||||
TP += 1
|
||||
TP_this += 1
|
||||
else:
|
||||
FP += 1
|
||||
FP_this += 1
|
||||
FN += sum(matched)
|
||||
FN_this = sum(matched)
|
||||
|
||||
try:
|
||||
pre_this = TP_this / (TP_this + FP_this)
|
||||
recall_this = TP_this / (TP_this + FN_this)
|
||||
f1_this = 2 * (pre_this * recall_this) / (pre_this + recall_this)
|
||||
except:
|
||||
print('empty')
|
||||
continue
|
||||
|
||||
pres.append(pre_this)
|
||||
recalls.append(recall_this)
|
||||
f1s.append(f1_this)
|
||||
if show:
|
||||
print(image_id + '.jpg')
|
||||
print('[%d/%d] TP:%d, FP:%d, FN:%d, Precesion:%.3f, Recall:%.3f' % (
|
||||
i, amount, TP_this, FP_this, FN_this, pre_this, recall_this))
|
||||
# cv2.imshow('org', cv2.resize(img, (500, 1000)))
|
||||
broad = draw_bounding_box(img, d_compos['bboxes'], color=(255, 0, 0), line=3)
|
||||
draw_bounding_box(broad, gt_compos['bboxes'], color=(0, 0, 255), show=True, line=2)
|
||||
|
||||
if i % 200 == 0:
|
||||
precision = TP / (TP + FP)
|
||||
recall = TP / (TP + FN)
|
||||
f1 = 2 * (precision * recall) / (precision + recall)
|
||||
print(
|
||||
'[%d/%d] TP:%d, FP:%d, FN:%d, Precesion:%.3f, Recall:%.3f, F1:%.3f' % (i, amount, TP, FP, FN, precision, recall, f1))
|
||||
|
||||
precision = TP / (TP + FP)
|
||||
recall = TP / (TP + FN)
|
||||
print('[%d/%d] TP:%d, FP:%d, FN:%d, Precesion:%.3f, Recall:%.3f, F1:%.3f' % (i, amount, TP, FP, FN, precision, recall, f1))
|
||||
# print("Average precision:%.4f; Average recall:%.3f" % (sum(pres)/len(pres), sum(recalls)/len(recalls)))
|
||||
|
||||
return pres, recalls, f1s
|
||||
|
||||
|
||||
no_text = True
|
||||
only_text = False
|
||||
|
||||
# detect = load_detect_result_json('E:\\Mulong\\Result\\rico\\rico_uied\\rico_new_uied_cls\\ip')
|
||||
detect = load_detect_result_json('E:\\Mulong\\Result\\rico\\rico_uied\\rico_new_uied_cls\\merge')
|
||||
# detect = load_detect_result_json('E:\\Mulong\\Result\\rico\\rico_uied\\rico_new_uied_v3\\merge')
|
||||
# detect = load_detect_result_json('E:\\Mulong\\Result\\rico\\rico_uied\\rico_new_uied_v3\\ocr')
|
||||
gt = load_ground_truth_json('E:\\Mulong\\Datasets\\rico\\instances_test.json')
|
||||
eval(detect, gt, 'E:\\Mulong\\Datasets\\rico\\combined', show=False, no_text=no_text, only_text=only_text)
|
||||
72
UIED/result_processing/experiment.py
Normal file
72
UIED/result_processing/experiment.py
Normal file
@@ -0,0 +1,72 @@
|
||||
import cv2
|
||||
import numpy as np
|
||||
|
||||
import lib_ip.block_division as blk
|
||||
import lib_ip.ip_preprocessing as pre
|
||||
import lib_ip.ip_detection as det
|
||||
|
||||
|
||||
def nothing(x):
|
||||
pass
|
||||
|
||||
|
||||
def get_contour(org, binary):
|
||||
def cvt_bbox(bbox):
|
||||
'''
|
||||
x,y,w,h -> colmin, rowmin, colmax, rowmax
|
||||
'''
|
||||
return bbox[0], bbox[1], bbox[0] + bbox[2], bbox[1] + bbox[3]
|
||||
|
||||
board = org.copy()
|
||||
hie, contours, _ = cv2.findContours(binary, cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_SIMPLE)
|
||||
res_contour = []
|
||||
for i in range(len(contours)):
|
||||
if cv2.contourArea(contours[i]) < 200:
|
||||
continue
|
||||
cnt = cv2.approxPolyDP(contours[i], 0.001*cv2.arcLength(contours[i], True), True)
|
||||
res_contour.append(cnt)
|
||||
cv2.drawContours(board, res_contour, -1, (0,0,255), 1)
|
||||
return board
|
||||
|
||||
|
||||
img_file = 'E:\\Mulong\\Datasets\\rico\\combined\\1014.jpg'
|
||||
resize_height = 800
|
||||
|
||||
cv2.namedWindow('control')
|
||||
cv2.createTrackbar('resize_height', 'control', 800, 1600, nothing)
|
||||
cv2.createTrackbar('grad_min', 'control', 4, 255, nothing)
|
||||
cv2.createTrackbar('grad_min_blk', 'control', 5, 255, nothing)
|
||||
cv2.createTrackbar('c1', 'control', 1, 1000, nothing)
|
||||
cv2.createTrackbar('c2', 'control', 1, 1000, nothing)
|
||||
|
||||
|
||||
while 1:
|
||||
resize_height = cv2.getTrackbarPos('resize_height', 'control')
|
||||
grad_min = cv2.getTrackbarPos('grad_min', 'control')
|
||||
grad_min_blk = cv2.getTrackbarPos('grad_min_blk', 'control')
|
||||
c1 = cv2.getTrackbarPos('c1', 'control')
|
||||
c2 = cv2.getTrackbarPos('c2', 'control')
|
||||
|
||||
org, grey = pre.read_img(img_file, resize_height)
|
||||
# org = cv2.medianBlur(org, 3)
|
||||
# org = cv2.GaussianBlur(org, (3,3), 0)
|
||||
|
||||
binary = pre.binarization(org, grad_min)
|
||||
binary_r = pre.reverse_binary(binary)
|
||||
# blk.block_division(grey, grad_thresh=grad_min_blk, step_v=10, step_h=10, show=True)
|
||||
cv2.imshow('bijn', binary)
|
||||
cv2.imshow('r', binary_r)
|
||||
cv2.waitKey(10)
|
||||
|
||||
# canny = cv2.Canny(grey, c1, c2)
|
||||
# hie, contours, _ = cv2.findContours(binary, cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_SIMPLE)
|
||||
# b_contour = get_contour(org, binary)
|
||||
# c_contour = get_contour(org, canny)
|
||||
|
||||
# b_contour = cv2.hconcat([b_contour, c_contour])
|
||||
# binary = cv2.hconcat([binary, binary_r, canny])
|
||||
|
||||
# cv2.imshow('org', org)
|
||||
# cv2.imshow('b_cnt', b_contour)
|
||||
# cv2.imshow('bin', binary)
|
||||
# cv2.imshow('canny', canny)
|
||||
31
UIED/result_processing/merge_east.py
Normal file
31
UIED/result_processing/merge_east.py
Normal file
@@ -0,0 +1,31 @@
|
||||
import multiprocessing
|
||||
from glob import glob
|
||||
import time
|
||||
import json
|
||||
from tqdm import tqdm
|
||||
from os.path import join as pjoin, exists
|
||||
|
||||
import merge
|
||||
|
||||
|
||||
input_root = 'E:\\Mulong\\Datasets\\rico\\combined'
|
||||
output_root = 'E:\\Mulong\\Result\\rico\\rico_uied\\rico_new_uied_cls\\merge'
|
||||
compo_root = 'E:\\Mulong\\Result\\rico\\rico_uied\\rico_new_uied_cls\\ip'
|
||||
text_root = 'E:\\Mulong\\Result\\east'
|
||||
|
||||
data = json.load(open('E:\\Mulong\\Datasets\\rico\\instances_test.json', 'r'))
|
||||
input_paths_img = [pjoin(input_root, img['file_name'].split('/')[-1]) for img in data['images']]
|
||||
input_paths_img = sorted(input_paths_img, key=lambda x: int(x.split('\\')[-1][:-4])) # sorted by index
|
||||
|
||||
# set the range of target inputs' indices
|
||||
num = 0
|
||||
start_index = 0
|
||||
end_index = 100000
|
||||
for input_path_img in input_paths_img:
|
||||
index = input_path_img.split('\\')[-1][:-4]
|
||||
if int(index) < start_index:
|
||||
continue
|
||||
if int(index) > end_index:
|
||||
break
|
||||
|
||||
merge.incorporate(input_path_img, compo_root, text_root, output_root, resize_by_height=800, show=False)
|
||||
89
UIED/result_processing/view_gt.py
Normal file
89
UIED/result_processing/view_gt.py
Normal file
@@ -0,0 +1,89 @@
|
||||
from tqdm import tqdm
|
||||
import json
|
||||
import cv2
|
||||
from os.path import join as pjoin
|
||||
|
||||
from config.CONFIG_UIED import Config
|
||||
C = Config()
|
||||
|
||||
|
||||
def draw_bounding_box_class(org, components, color=C.COLOR, line=2, show=False, write_path=None):
|
||||
"""
|
||||
Draw bounding box of components with their classes on the original image
|
||||
:param org: original image
|
||||
:param components: bbox [(column_min, row_min, column_max, row_max)]
|
||||
-> top_left: (column_min, row_min)
|
||||
-> bottom_right: (column_max, row_max)
|
||||
:param color_map: colors mapping to different components
|
||||
:param line: line thickness
|
||||
:param compo_class: classes matching the corners of components
|
||||
:param show: show or not
|
||||
:return: labeled image
|
||||
"""
|
||||
board = org.copy()
|
||||
bboxes = components['bboxes']
|
||||
categories = components['categories']
|
||||
for i in range(len(bboxes)):
|
||||
bbox = bboxes[i]
|
||||
category = categories[i]
|
||||
board = cv2.rectangle(board, (bbox[0], bbox[1]), (bbox[2], bbox[3]), color[C.CLASS_MAP[str(category)]], line)
|
||||
board = cv2.putText(board, C.CLASS_MAP[str(category)], (bbox[0]+5, bbox[1]+20), cv2.FONT_HERSHEY_SIMPLEX, 0.8, color[C.CLASS_MAP[str(category)]], 2)
|
||||
if show:
|
||||
cv2.imshow('a', cv2.resize(board, (500, 1000)))
|
||||
cv2.waitKey(0)
|
||||
if write_path is not None:
|
||||
cv2.imwrite(write_path, board)
|
||||
return board
|
||||
|
||||
|
||||
def load_ground_truth_json(gt_file, no_text=True):
|
||||
def get_img_by_id(img_id):
|
||||
for image in images:
|
||||
if image['id'] == img_id:
|
||||
return image['file_name'].split('/')[-1][:-4], (image['height'], image['width'])
|
||||
|
||||
def cvt_bbox(bbox):
|
||||
'''
|
||||
:param bbox: [x,y,width,height]
|
||||
:return: [col_min, row_min, col_max, row_max]
|
||||
'''
|
||||
bbox = [int(b) for b in bbox]
|
||||
return [bbox[0], bbox[1], bbox[0] + bbox[2], bbox[1] + bbox[3]]
|
||||
|
||||
data = json.load(open(gt_file, 'r'))
|
||||
images = data['images']
|
||||
annots = data['annotations']
|
||||
compos = {}
|
||||
print('Loading %d ground truth' % len(annots))
|
||||
for annot in tqdm(annots):
|
||||
img_name, size = get_img_by_id(annot['image_id'])
|
||||
if no_text and int(annot['category_id']) == 14:
|
||||
compos[img_name] = {'bboxes': [], 'categories': [], 'size': size}
|
||||
continue
|
||||
if img_name not in compos:
|
||||
compos[img_name] = {'bboxes': [cvt_bbox(annot['bbox'])], 'categories': [annot['category_id']], 'size':size}
|
||||
else:
|
||||
compos[img_name]['bboxes'].append(cvt_bbox(annot['bbox']))
|
||||
compos[img_name]['categories'].append(annot['category_id'])
|
||||
return compos
|
||||
|
||||
|
||||
def view_gt_all(gt, img_root):
|
||||
for img_id in gt:
|
||||
compos = gt[img_id]
|
||||
img = cv2.imread(pjoin(img_root, img_id + '.jpg'))
|
||||
print(pjoin(img_root, img_id + '.jpg'))
|
||||
draw_bounding_box_class(img, compos, show=True)
|
||||
|
||||
|
||||
def view_gt_single(gt, img_root, img_id):
|
||||
img_id = str(img_id)
|
||||
compos = gt[img_id]
|
||||
img = cv2.imread(pjoin(img_root, img_id + '.jpg'))
|
||||
print(pjoin(img_root, img_id + '.jpg'))
|
||||
draw_bounding_box_class(img, compos, show=True)
|
||||
|
||||
|
||||
gt = load_ground_truth_json('E:\\Mulong\\Datasets\\rico\\instances_test.json', no_text=False)
|
||||
# view_gt_all(gt, 'E:\\Mulong\\Datasets\\rico\\combined')
|
||||
view_gt_single(gt, 'E:\\Mulong\\Datasets\\rico\\combined', 670)
|
||||
Reference in New Issue
Block a user