import cv2 import time import math import os import numpy as np import tensorflow.compat.v1 as tf import nms_locality from icdar import restore_rectangle, ground_truth_to_word font = cv2.FONT_HERSHEY_SIMPLEX test_folder = 'cs/' #pb_file_path = '/data2/hejinlong/model_saved/FOTS-master/savedmodel/0902/ocr_model-0902-2201.pb' pb_file_path = '/data2/liudan/ocr/savemode/0801/ocr_model-0801-1920.pb' def get_images(): files = [] exts = ['jpg', 'png', 'jpeg', 'JPG'] for parent, dirnames, filenames in os.walk(test_folder): for filename in filenames: for ext in exts: if filename.endswith(ext): files.append(os.path.join(parent, filename)) break print('Find {} images'.format(len(files))) return files def detect(score_map, geo_map, timer, score_map_thresh=0.8, box_thresh=0.1, nms_thres=0.2): if len(score_map.shape) == 4: score_map = score_map[0, :, :, 0] geo_map = geo_map[0, :, :, ] xy_text = np.argwhere(score_map > score_map_thresh) xy_text = xy_text[np.argsort(xy_text[:, 0])] start = time.time() text_box_restored = restore_rectangle(xy_text[:, ::-1]*4, geo_map[xy_text[:, 0], xy_text[:, 1], :]) # N*4*2 print('{} text boxes before nms'.format(text_box_restored.shape[0])) boxes = np.zeros((text_box_restored.shape[0], 9), dtype=np.float32) boxes[:, :8] = text_box_restored.reshape((-1, 8)) boxes[:, 8] = score_map[xy_text[:, 0], xy_text[:, 1]] timer['restore'] = time.time() - start start = time.time() boxes = nms_locality.nms_locality(boxes.astype('float32'), nms_thres) timer['nms'] = time.time() - start if boxes.shape[0] == 0: return None, timer for i, box in enumerate(boxes): mask = np.zeros_like(score_map, dtype=np.uint8) cv2.fillPoly(mask, box[:8].reshape((-1, 4, 2)).astype(np.int32) // 4, 1) boxes[i, 8] = cv2.mean(score_map, mask)[0] boxes = boxes[boxes[:, 8] > box_thresh] return boxes, timer def get_project_matrix_and_width(text_polyses, target_height=8.0): project_matrixes = [] box_widths = [] filter_box_masks = [] for i in range(text_polyses.shape[0]): x1, y1, x2, y2, x3, y3, x4, y4 = text_polyses[i] / 4 rotated_rect = cv2.minAreaRect(np.array([[x1, y1], [x2, y2], [x3, y3], [x4, y4]])) box_w, box_h = rotated_rect[1][0], rotated_rect[1][1] if box_w <= box_h: box_w, box_h = box_h, box_w mapped_x1, mapped_y1 = (0, 0) mapped_x4, mapped_y4 = (0, 8) width_box = math.ceil(8 * box_w / box_h) width_box = int(min(width_box, 128)) mapped_x2, mapped_y2 = (width_box, 0) src_pts = np.float32([(x1, y1), (x2, y2), (x4, y4)]) dst_pts = np.float32([(mapped_x1, mapped_y1), (mapped_x2, mapped_y2), (mapped_x4, mapped_y4)]) affine_matrix = cv2.getAffineTransform(dst_pts.astype(np.float32), src_pts.astype(np.float32)) affine_matrix = affine_matrix.flatten() project_matrixes.append(affine_matrix) box_widths.append(width_box) project_matrixes = np.array(project_matrixes) box_widths = np.array(box_widths) return project_matrixes, box_widths def sort_poly(p): min_axis = np.argmin(np.sum(p, axis=1)) p = p[[min_axis, (min_axis+1)%4, (min_axis+2)%4, (min_axis+3)%4]] if abs(p[0, 0] - p[1, 0]) > abs(p[0, 1] - p[1, 1]): return p else: return p[[0, 3, 2, 1]] from shapely.geometry import Polygon import copy def sort_by_area(boxes): A_lst = [] for box in boxes: g = Polygon(box[:8].reshape((4, 2))) A_lst.append(g.area) B_lst = copy.deepcopy(A_lst) A_lst.sort(reverse=True) box_lst = [] idx_lst = [] for i in A_lst: index = B_lst.index(i) idx_lst.append(index) box_lst.append(boxes[index]) return idx_lst,np.array(box_lst) def sort_box_by_dist(boxes): A_lst = [] boxes_c = boxes.copy() boxes_c = boxes_c.reshape((-1,4,2)) boxes_c_x = np.mean(boxes_c[...,0]) boxes_c_y = np.mean(boxes_c[...,1]) for i,j in zip(np.mean(boxes_c[...,0],axis=1),np.mean(boxes_c[...,1],axis=1)): g = abs(i-boxes_c_x)+abs(j-boxes_c_y) A_lst.append(g) B_lst = copy.deepcopy(A_lst) A_lst.sort(reverse=False) box_lst = [] idx_lst = [] for i in A_lst: index = B_lst.index(i) idx_lst.append(index) box_lst.append(boxes[index]) return idx_lst,np.array(box_lst) def test(): try: os.makedirs('outputs/') except OSError as e: if e.errno != 17: raise with tf.Session(config=tf.ConfigProto(allow_soft_placement=True)) as sess: input_images = tf.placeholder(tf.float32, shape=[None, None, None, 3], name='input_images') input_transform_matrix = tf.placeholder(tf.float32, shape=[None, 6], name='input_transform_matrix') input_box_mask = [] input_box_mask.append(tf.placeholder(tf.int32, shape=[None], name='input_box_masks_0')) input_box_widths = tf.placeholder(tf.int32, shape=[None], name='input_box_widths') with open(pb_file_path, 'rb') as f: graph_def = tf.GraphDef() graph_def.ParseFromString(f.read()) output1 = tf.import_graph_def(graph_def, input_map={'input_images:0': input_images}, return_elements=['feature_fusion/Conv_7/Sigmoid:0','feature_fusion/concat_3:0'] ) output2 = tf.import_graph_def( graph_def, input_map={'input_images:0': input_images,\ 'input_transform_matrix:0':input_transform_matrix,\ 'input_box_masks_0:0':input_box_mask[0],\ 'input_box_widths:0':input_box_widths }, return_elements=['SparseToDense:0'] ) input_size = 512 im_fn_list = get_images() for im_fn in im_fn_list: im = cv2.imread(im_fn)[:, :, ::-1] new_h, new_w, _ = im.shape h_ratio_hegd,w_ratio_hegd = 1.,1. max_h_w_i = np.max([new_h, new_w, input_size]) # im_padded = np.ones((max_h_w_i, max_h_w_i, 3), dtype=np.uint8)*127 im_padded = np.zeros((max_h_w_i, max_h_w_i, 3), dtype=np.uint8) im_padded[:new_h, :new_w, :] = im.copy() if max_h_w_i == input_size: im = im_padded.copy() # if new_h > new_w: # im = cv2.resize(im, (round(new_w*512/new_h),512)) # new_h_hegd,new_w_hegd,_ = im.shape # im_padded = np.zeros((512, 512, 3), dtype=np.uint8) # im_padded[:new_h_hegd, :new_w_hegd, :] = im.copy() # im = im_padded # h_ratio_hegd,w_ratio_hegd = 512/new_h,512/new_h # else: # im = cv2.resize(im, (512,round(new_h*512/new_w))) # new_h_hegd,new_w_hegd,_ = im.shape # im_padded = np.zeros((512, 512, 3), dtype=np.uint8) # im_padded[:new_h_hegd, :new_w_hegd, :] = im.copy() # im = im_padded # h_ratio_hegd,w_ratio_hegd = input_size/new_w,input_size/new_w else: im = cv2.resize(im_padded, dsize=(input_size, input_size)) h_ratio_hegd,w_ratio_hegd = input_size/max_h_w_i,input_size/max_h_w_i start_time = time.time() timer = {'net': 0, 'restore': 0, 'nms': 0} start = time.time() score, geometry = sess.run(output1, feed_dict={input_images: [im]}) boxes, timer = detect(score_map=score, geo_map=geometry, timer=timer) res_file = 'outputs/001.txt' if boxes is not None and boxes.shape[0] != 0: input_roi_boxes = boxes[:, :8].reshape(-1, 8) boxes_masks = [int(0)] * input_roi_boxes.shape[0] transform_matrixes, box_widths = get_project_matrix_and_width(input_roi_boxes) try: recog_decode = sess.run(output2, feed_dict={input_images: [im], \ input_transform_matrix: transform_matrixes,\ input_box_mask[0]: boxes_masks,\ input_box_widths: box_widths})[0] except: with open(res_file, 'w') as f: f.write('') continue timer['net'] = time.time() - start boxes = boxes[:, :8].reshape((-1, 4, 2)) if recog_decode.shape[0] != boxes.shape[0]: print("detection and recognition result are not equal!") exit(-1) # idx_lst,boxes = sort_box_by_dist(boxes) # recog_decode_lst = [] # for i in idx_lst: # recog_decode_lst.append(recog_decode[i]) # recog_decode = recog_decode_lst # if im_fn.split('/')[-2] == '刷新表': # boxes = boxes[:2,...] # recog_decode = recog_decode[:2] # if im_fn.split('/')[-2] in ['绿色表' ,'黑色表']: # boxes = boxes[:4,...] # recog_decode = recog_decode[:4] # if len(boxes) < 4: # boxes = [] with open(res_file, 'w') as f: text_tags = [] boxes_hegd = [] for i, box in enumerate(boxes): box = sort_poly(box.astype(np.int32)) box[...,0],box[...,1] = box[...,0]/h_ratio_hegd,box[...,1]/w_ratio_hegd if np.linalg.norm(box[0] - box[1]) < 5 or np.linalg.norm(box[3]-box[0]) < 5: continue if np.any(box[...,0]>new_w*1.1) or np.any(box[...,1]>new_h*1.1) or np.any(box[...,0]<-new_w*0.1) or np.any(box[...,1]<-new_h*0.1): continue recognition_result = ground_truth_to_word(recog_decode[i]) for ii in range(box.shape[0]): for jj in range(box.shape[1]): box[ii,jj] = round(box[ii,jj]) box = box.astype(np.int32) text_tags.append(recognition_result) boxes_hegd.append(box) f.write('{},{},{},{},{},{},{},{},{}\r\n'.format( box[0, 0], box[0, 1], box[1, 0], box[1, 1],\ box[2, 0], box[2, 1], box[3, 0], box[3, 1],\ recognition_result )) from box_a_pic import box_pic boxes_hegd = np.array(boxes_hegd) if len(boxes_hegd) == 0: im_txt1 = cv2.imread(im_fn) else: im_txt1,_,_ = box_pic(boxes_hegd,text_tags,im_fn) im_fn = im_fn.split(test_folder)[-1] #[1:] img_path = os.path.join('outputs', im_fn) print(img_path) dir_path = img_path.split(os.path.basename(img_path))[0] if not os.path.exists(dir_path): os.makedirs(dir_path) cv2.imwrite(img_path, im_txt1) else: timer['net'] = time.time() - start f = open(res_file, "w") f.close() print('{} : net {:.0f}ms, restore {:.0f}ms, nms {:.0f}ms'.format( im_fn, timer['net']*1000, timer['restore']*1000, timer['nms']*1000)) duration = time.time() - start_time print('[timing] {}'.format(duration)) if __name__ == '__main__': test()