''' Author: hejinlong LastEditTime: 2021-03-08 09:15:27 Description: hegd@mail.ustc.edu.cn ''' # -*- coding: utf-8 -*- import numpy as np def simple_load_np_dataset(root_path = '/data2/liudan/ocr/data/ocr_txt/'): img_list_rec_file = root_path + 'val_img_path_rec.txt' boxes_rec_file = root_path + 'val_boxes_rec.txt' text_tag_rec_file = root_path + 'val_text_tag_rec.txt' all_img_path = [] with open(img_list_rec_file,"r") as f: file_content = f.readlines() for i in file_content: all_img_path.append(i.strip()) with open(boxes_rec_file,'r') as f: file_content = f.read().strip() num_list = file_content.split('\t') boxes_flatten = [] for i in num_list: boxes_flatten.append(int(i)) boxes_flatten = np.array(boxes_flatten) boxes = boxes_flatten.reshape((len(all_img_path),-1, 9)) with open(text_tag_rec_file,'r') as f: all_text_tags = [] file_content = f.readlines() for i in file_content: all_text_tags.append(i.split('\t**hegd**\t')[:-1]) return all_img_path,boxes,all_text_tags