12345678910111213141516171819202122232425262728293031 |
- '''
- Author: hejinlong
- LastEditTime: 2021-03-08 09:15:27
- Description: hegd@mail.ustc.edu.cn
- '''
- # -*- coding: utf-8 -*-
- import numpy as np
-
- def simple_load_np_dataset(root_path = '/data2/liudan/ocr/data/ocr_txt/'):
- img_list_rec_file = root_path + 'val_img_path_rec.txt'
- boxes_rec_file = root_path + 'val_boxes_rec.txt'
- text_tag_rec_file = root_path + 'val_text_tag_rec.txt'
- all_img_path = []
- with open(img_list_rec_file,"r") as f:
- file_content = f.readlines()
- for i in file_content:
- all_img_path.append(i.strip())
- with open(boxes_rec_file,'r') as f:
- file_content = f.read().strip()
- num_list = file_content.split('\t')
- boxes_flatten = []
- for i in num_list:
- boxes_flatten.append(int(i))
- boxes_flatten = np.array(boxes_flatten)
- boxes = boxes_flatten.reshape((len(all_img_path),-1, 9))
- with open(text_tag_rec_file,'r') as f:
- all_text_tags = []
- file_content = f.readlines()
- for i in file_content:
- all_text_tags.append(i.split('\t**hegd**\t')[:-1])
- return all_img_path,boxes,all_text_tags
|