Initial commit

2026-02-13 10:12:46 +00:00 · 2025-07-28 18:43:47 +08:00
parent 0a6e4ab682
commit 06408ffa6a
702 changed files with 153932 additions and 0 deletions
--- a/UIED/result_processing/eval_size.py
+++ b/UIED/result_processing/eval_size.py
@@ -0,0 +1,219 @@
+import json
+import numpy as np
+import cv2
+from glob import glob
+from os.path import join as pjoin
+from tqdm import tqdm
+
+
+def resize_label(bboxes, d_height, gt_height, bias=0):
+    bboxes_new = []
+    scale = gt_height / d_height
+    for bbox in bboxes:
+        bbox = [int(b * scale + bias) for b in bbox]
+        bboxes_new.append(bbox)
+    return bboxes_new
+
+
+def draw_bounding_box(org, corners, color=(0, 255, 0), line=2, show=False):
+    board = cv2.resize(org, (608, 1024))
+    for i in range(len(corners)):
+        board = cv2.rectangle(board, (corners[i][0], corners[i][1]), (corners[i][2], corners[i][3]), color, line)
+    if show:
+        cv2.imshow('a', board)
+        cv2.waitKey(0)
+    return board
+
+
+def load_detect_result_json(reslut_file_root, shrink=3):
+    def is_bottom_or_top(corner):
+        column_min, row_min, column_max, row_max = corner
+        if row_max < 36 or row_min > 725:
+            return True
+        return False
+
+    result_files = glob(pjoin(reslut_file_root, '*.json'))
+    compos_reform = {}
+    print('Loading %d detection results' % len(result_files))
+    for reslut_file in tqdm(result_files):
+        img_name = reslut_file.split('\\')[-1].split('.')[0]
+        compos = json.load(open(reslut_file, 'r'))['compos']
+        for compo in compos:
+            if compo['column_max'] - compo['column_min'] < 10 or compo['row_max'] - compo['row_min'] < 10:
+                continue
+            if is_bottom_or_top((compo['column_min'], compo['row_min'], compo['column_max'], compo['row_max'])):
+                continue
+            if img_name not in compos_reform:
+                compos_reform[img_name] = {'bboxes': [
+                    [compo['column_min'] + shrink, compo['row_min'] + shrink, compo['column_max'] - shrink,
+                     compo['row_max'] - shrink]],
+                                           'categories': [compo['category']]}
+            else:
+                compos_reform[img_name]['bboxes'].append(
+                    [compo['column_min'] + shrink, compo['row_min'] + shrink, compo['column_max'] - shrink,
+                     compo['row_max'] - shrink])
+                compos_reform[img_name]['categories'].append(compo['category'])
+    return compos_reform
+
+
+def load_ground_truth_json(gt_file):
+    def get_img_by_id(img_id):
+        for image in images:
+            if image['id'] == img_id:
+                return image['file_name'].split('/')[-1][:-4], (image['height'], image['width'])
+
+    def cvt_bbox(bbox):
+        '''
+        :param bbox: [x,y,width,height]
+        :return: [col_min, row_min, col_max, row_max]
+        '''
+        bbox = [int(b) for b in bbox]
+        return [bbox[0], bbox[1], bbox[0] + bbox[2], bbox[1] + bbox[3]]
+
+    data = json.load(open(gt_file, 'r'))
+    images = data['images']
+    annots = data['annotations']
+    compos = {}
+    print('Loading %d ground truth' % len(annots))
+    for annot in tqdm(annots):
+        img_name, size = get_img_by_id(annot['image_id'])
+        if img_name not in compos:
+            compos[img_name] = {'bboxes': [cvt_bbox(annot['bbox'])], 'categories': [annot['category_id']], 'size': size}
+        else:
+            compos[img_name]['bboxes'].append(cvt_bbox(annot['bbox']))
+            compos[img_name]['categories'].append(annot['category_id'])
+    return compos
+
+
+def eval(detection, ground_truth, img_root, show=True, no_text=False, only_text=False):
+    def compo_filter(compos, flag):
+        if not no_text and not only_text:
+            return compos
+        compos_new = {'bboxes': [], 'categories': []}
+        for k, category in enumerate(compos['categories']):
+            if only_text:
+                if flag == 'det' and category != 'TextView':
+                    continue
+                if flag == 'gt' and int(category) != 14:
+                    continue
+            elif no_text:
+                if flag == 'det' and category == 'TextView':
+                    continue
+                if flag == 'gt' and int(category) == 14:
+                    continue
+
+            compos_new['bboxes'].append(compos['bboxes'][k])
+            compos_new['categories'].append(category)
+        return compos_new
+
+    def match(org, d_bbox, gt_bboxes, matched):
+        '''
+        :param matched: mark if the ground truth component is matched
+        :param d_bbox: [col_min, row_min, col_max, row_max]
+        :param gt_bboxes: list of ground truth [[col_min, row_min, col_max, row_max]]
+        :return: Boolean: if IOU large enough or detected box is contained by ground truth
+        '''
+        area_d = (d_bbox[2] - d_bbox[0]) * (d_bbox[3] - d_bbox[1])
+        size = -1
+        for i, gt_bbox in enumerate(gt_bboxes):
+            if matched[i] == 0:
+                continue
+            area_gt = (gt_bbox[2] - gt_bbox[0]) * (gt_bbox[3] - gt_bbox[1])
+            col_min = max(d_bbox[0], gt_bbox[0])
+            row_min = max(d_bbox[1], gt_bbox[1])
+            col_max = min(d_bbox[2], gt_bbox[2])
+            row_max = min(d_bbox[3], gt_bbox[3])
+            # if not intersected, area intersection should be 0
+            w = max(0, col_max - col_min)
+            h = max(0, row_max - row_min)
+            area_inter = w * h
+            if area_inter == 0:
+                continue
+            iod = area_inter / area_d
+            iou = area_inter / (area_d + area_gt - area_inter)
+            # if show:
+            #     cv2.putText(org, (str(round(iou, 2)) + ',' + str(round(iod, 2))), (d_bbox[0], d_bbox[1]),
+            #                 cv2.FONT_HERSHEY_SIMPLEX, 1, (0, 0, 255), 2)
+
+            if iou > 0.9 or iod == 1:
+                if (gt_bbox[2] - gt_bbox[0]) < 64:
+                    size = 0
+                elif 64 < (gt_bbox[2] - gt_bbox[0]) < 128:
+                    size = 1
+                elif (gt_bbox[2] - gt_bbox[0]) > 128:
+                    size = 2
+                matched[i] = 0
+                return True, size
+        return False, size
+
+    amount = len(detection)
+    TP, FP, FN = [[0, 0, 0], [0, 0, 0], [0, 0, 0]]
+    for i, image_id in enumerate(detection):
+        img = cv2.imread(pjoin(img_root, image_id + '.jpg'))
+        d_compos = detection[image_id]
+        if image_id not in ground_truth:
+            continue
+        gt_compos = ground_truth[image_id]
+
+        org_height = gt_compos['size'][0]
+
+        d_compos = compo_filter(d_compos, 'det')
+        gt_compos = compo_filter(gt_compos, 'gt')
+
+        d_compos['bboxes'] = resize_label(d_compos['bboxes'], 800, 1024)
+        gt_compos['bboxes'] = resize_label(gt_compos['bboxes'], org_height, 1024)
+        matched = np.ones(len(gt_compos['bboxes']), dtype=int)
+        for d_bbox in d_compos['bboxes']:
+            m, size = match(img, d_bbox, gt_compos['bboxes'], matched)
+            if m:
+                TP[size] += 1
+            else:
+                h = d_bbox[2] - d_bbox[0]
+                if h < 64:
+                    size = 0
+                elif 64 < h < 128:
+                    size = 1
+                elif h > 128:
+                    size = 2
+                FP[size] += 1
+
+        for i in range(len(matched)):
+            if matched[i] == 1:
+                gt_bboxes = gt_compos['bboxes']
+                h = gt_bboxes[i][2] - gt_bboxes[i][0]
+                if h < 64:
+                    size = 0
+                elif 64 < h < 128:
+                    size = 1
+                elif h > 128:
+                    size = 2
+                FN[size] += 1
+
+        if show:
+            print(image_id + '.jpg')
+            # cv2.imshow('org', cv2.resize(img, (500, 1000)))
+            broad = draw_bounding_box(img, d_compos['bboxes'], color=(255, 0, 0), line=3)
+            draw_bounding_box(broad, gt_compos['bboxes'], color=(0, 0, 255), show=True, line=2)
+
+        if i % 200 == 0:
+            precision = [round(TP[i] / (TP[i] + FP[i]),3) for i in range(len(TP))]
+            recall = [round(TP[i] / (TP[i] + FN[i]),3) for i in range(len(TP))]
+            f1 = [round(2 * (precision[i] * recall[i]) / (precision[i] + recall[i]), 3) for i in range(3)]
+            print(
+                '[%d/%d] TP:%s, FP:%s, FN:%s, Precesion:%s, Recall:%s, F1:%s' % (
+                i, amount, str(TP), str(FP), str(FN), str(precision), str(recall), str(f1)))
+
+    precision = [round(TP[i] / (TP[i] + FP[i]),3) for i in range(len(TP))]
+    recall = [round(TP[i] / (TP[i] + FN[i]),3) for i in range(len(TP))]
+    f1 = [round(2 * (precision[i] * recall[i]) / (precision[i] + recall[i]), 3) for i in range(3)]
+    print(
+        '[%d/%d] TP:%s, FP:%s, FN:%s, Precesion:%s, Recall:%s, F1:%s' % (
+            i, amount, str(TP), str(FP), str(FN), str(precision), str(recall), str(f1)))
+    # print("Average precision:%.4f; Average recall:%.3f" % (sum(pres)/len(pres), sum(recalls)/len(recalls)))
+
+
+no_text = False
+only_text = False
+detect = load_detect_result_json('E:\\Mulong\\Result\\rico\\rico_uied\\rico_new_uied_v3\\merge')
+gt = load_ground_truth_json('E:\\Mulong\\Datasets\\rico\\instances_test.json')
+eval(detect, gt, 'E:\\Mulong\\Datasets\\rico\\combined', show=False, no_text=no_text, only_text=only_text)