x2coco.py 20 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542
  1. #!/usr/bin/env python
  2. # coding: utf-8
  3. # Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserved.
  4. #
  5. # Licensed under the Apache License, Version 2.0 (the "License");
  6. # you may not use this file except in compliance with the License.
  7. # You may obtain a copy of the License at
  8. #
  9. # http://www.apache.org/licenses/LICENSE-2.0
  10. #
  11. # Unless required by applicable law or agreed to in writing, software
  12. # distributed under the License is distributed on an "AS IS" BASIS,
  13. # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  14. # See the License for the specific language governing permissions and
  15. # limitations under the License.
  16. import argparse
  17. import glob
  18. import json
  19. import os
  20. import os.path as osp
  21. import shutil
  22. import xml.etree.ElementTree as ET
  23. import numpy as np
  24. import PIL.ImageDraw
  25. from tqdm import tqdm
  26. import cv2
  27. label_to_num = {}
  28. categories_list = []
  29. labels_list = []
  30. class MyEncoder(json.JSONEncoder):
  31. def default(self, obj):
  32. if isinstance(obj, np.integer):
  33. return int(obj)
  34. elif isinstance(obj, np.floating):
  35. return float(obj)
  36. elif isinstance(obj, np.ndarray):
  37. return obj.tolist()
  38. else:
  39. return super(MyEncoder, self).default(obj)
  40. def images_labelme(data, num):
  41. image = {}
  42. image['height'] = data['imageHeight']
  43. image['width'] = data['imageWidth']
  44. image['id'] = num + 1
  45. if '\\' in data['imagePath']:
  46. image['file_name'] = data['imagePath'].split('\\')[-1]
  47. else:
  48. image['file_name'] = data['imagePath'].split('/')[-1]
  49. return image
  50. def images_cityscape(data, num, img_file):
  51. image = {}
  52. image['height'] = data['imgHeight']
  53. image['width'] = data['imgWidth']
  54. image['id'] = num + 1
  55. image['file_name'] = img_file
  56. return image
  57. def categories(label, labels_list):
  58. category = {}
  59. category['supercategory'] = 'component'
  60. category['id'] = len(labels_list) + 1
  61. category['name'] = label
  62. return category
  63. def annotations_rectangle(points, label, image_num, object_num, label_to_num):
  64. annotation = {}
  65. seg_points = np.asarray(points).copy()
  66. seg_points[1, :] = np.asarray(points)[2, :]
  67. seg_points[2, :] = np.asarray(points)[1, :]
  68. annotation['segmentation'] = [list(seg_points.flatten())]
  69. annotation['iscrowd'] = 0
  70. annotation['image_id'] = image_num + 1
  71. annotation['bbox'] = list(
  72. map(float, [
  73. points[0][0], points[0][1], points[1][0] - points[0][0], points[1][
  74. 1] - points[0][1]
  75. ]))
  76. annotation['area'] = annotation['bbox'][2] * annotation['bbox'][3]
  77. annotation['category_id'] = label_to_num[label]
  78. annotation['id'] = object_num + 1
  79. return annotation
  80. def annotations_polygon(height, width, points, label, image_num, object_num,
  81. label_to_num):
  82. annotation = {}
  83. annotation['segmentation'] = [list(np.asarray(points).flatten())]
  84. annotation['iscrowd'] = 0
  85. annotation['image_id'] = image_num + 1
  86. annotation['bbox'] = list(map(float, get_bbox(height, width, points)))
  87. annotation['area'] = annotation['bbox'][2] * annotation['bbox'][3]
  88. annotation['category_id'] = label_to_num[label]
  89. annotation['id'] = object_num + 1
  90. return annotation
  91. def get_bbox(height, width, points):
  92. polygons = points
  93. mask = np.zeros([height, width], dtype=np.uint8)
  94. mask = PIL.Image.fromarray(mask)
  95. xy = list(map(tuple, polygons))
  96. PIL.ImageDraw.Draw(mask).polygon(xy=xy, outline=1, fill=1)
  97. mask = np.array(mask, dtype=bool)
  98. index = np.argwhere(mask == 1)
  99. rows = index[:, 0]
  100. clos = index[:, 1]
  101. left_top_r = np.min(rows)
  102. left_top_c = np.min(clos)
  103. right_bottom_r = np.max(rows)
  104. right_bottom_c = np.max(clos)
  105. return [
  106. left_top_c, left_top_r, right_bottom_c - left_top_c,
  107. right_bottom_r - left_top_r
  108. ]
  109. def deal_json(ds_type, img_path, json_path):
  110. data_coco = {}
  111. images_list = []
  112. annotations_list = []
  113. image_num = -1
  114. object_num = -1
  115. for img_file in os.listdir(img_path):
  116. img_label = os.path.splitext(img_file)[0]
  117. if img_file.split('.')[
  118. -1] not in ['bmp', 'jpg', 'jpeg', 'png', 'JPEG', 'JPG', 'PNG']:
  119. continue
  120. label_file = osp.join(json_path, img_label + '.json')
  121. print('Generating dataset from:', label_file)
  122. image_num = image_num + 1
  123. with open(label_file) as f:
  124. data = json.load(f)
  125. if ds_type == 'labelme':
  126. images_list.append(images_labelme(data, image_num))
  127. elif ds_type == 'cityscape':
  128. images_list.append(images_cityscape(data, image_num, img_file))
  129. if ds_type == 'labelme':
  130. for shapes in data['shapes']:
  131. object_num = object_num + 1
  132. label = shapes['label']
  133. if label not in labels_list:
  134. categories_list.append(categories(label, labels_list))
  135. labels_list.append(label)
  136. label_to_num[label] = len(labels_list)
  137. p_type = shapes['shape_type']
  138. if p_type == 'polygon':
  139. points = shapes['points']
  140. annotations_list.append(
  141. annotations_polygon(data['imageHeight'], data[
  142. 'imageWidth'], points, label, image_num,
  143. object_num, label_to_num))
  144. if p_type == 'rectangle':
  145. (x1, y1), (x2, y2) = shapes['points']
  146. x1, x2 = sorted([x1, x2])
  147. y1, y2 = sorted([y1, y2])
  148. points = [[x1, y1], [x2, y2], [x1, y2], [x2, y1]]
  149. annotations_list.append(
  150. annotations_rectangle(points, label, image_num,
  151. object_num, label_to_num))
  152. elif ds_type == 'cityscape':
  153. for shapes in data['objects']:
  154. object_num = object_num + 1
  155. label = shapes['label']
  156. if label not in labels_list:
  157. categories_list.append(categories(label, labels_list))
  158. labels_list.append(label)
  159. label_to_num[label] = len(labels_list)
  160. points = shapes['polygon']
  161. annotations_list.append(
  162. annotations_polygon(data['imgHeight'], data[
  163. 'imgWidth'], points, label, image_num, object_num,
  164. label_to_num))
  165. data_coco['images'] = images_list
  166. data_coco['categories'] = categories_list
  167. data_coco['annotations'] = annotations_list
  168. return data_coco
  169. def voc_get_label_anno(ann_dir_path, ann_ids_path, labels_path):
  170. with open(labels_path, 'r') as f:
  171. labels_str = f.read().split()
  172. labels_ids = list(range(1, len(labels_str) + 1))
  173. with open(ann_ids_path, 'r') as f:
  174. ann_ids = [lin.strip().split(' ')[-1] for lin in f.readlines()]
  175. ann_paths = []
  176. for aid in ann_ids:
  177. if aid.endswith('xml'):
  178. ann_path = os.path.join(ann_dir_path, aid)
  179. else:
  180. ann_path = os.path.join(ann_dir_path, aid + '.xml')
  181. ann_paths.append(ann_path)
  182. return dict(zip(labels_str, labels_ids)), ann_paths
  183. def voc_get_image_info(annotation_root, im_id):
  184. filename = annotation_root.findtext('filename')
  185. assert filename is not None
  186. img_name = os.path.basename(filename)
  187. size = annotation_root.find('size')
  188. width = float(size.findtext('width'))
  189. height = float(size.findtext('height'))
  190. image_info = {
  191. 'file_name': filename,
  192. 'height': height,
  193. 'width': width,
  194. 'id': im_id
  195. }
  196. return image_info
  197. def voc_get_coco_annotation(obj, label2id):
  198. label = obj.findtext('name')
  199. assert label in label2id, "label is not in label2id."
  200. category_id = label2id[label]
  201. bndbox = obj.find('bndbox')
  202. xmin = float(bndbox.findtext('xmin'))
  203. ymin = float(bndbox.findtext('ymin'))
  204. xmax = float(bndbox.findtext('xmax'))
  205. ymax = float(bndbox.findtext('ymax'))
  206. assert xmax > xmin and ymax > ymin, "Box size error."
  207. o_width = xmax - xmin
  208. o_height = ymax - ymin
  209. anno = {
  210. 'area': o_width * o_height,
  211. 'iscrowd': 0,
  212. 'bbox': [xmin, ymin, o_width, o_height],
  213. 'category_id': category_id,
  214. 'ignore': 0,
  215. }
  216. return anno
  217. def voc_xmls_to_cocojson(annotation_paths, label2id, output_dir, output_file):
  218. output_json_dict = {
  219. "images": [],
  220. "type": "instances",
  221. "annotations": [],
  222. "categories": []
  223. }
  224. bnd_id = 1 # bounding box start id
  225. im_id = 0
  226. print('Start converting !')
  227. for a_path in tqdm(annotation_paths):
  228. # Read annotation xml
  229. ann_tree = ET.parse(a_path)
  230. ann_root = ann_tree.getroot()
  231. img_info = voc_get_image_info(ann_root, im_id)
  232. output_json_dict['images'].append(img_info)
  233. for obj in ann_root.findall('object'):
  234. ann = voc_get_coco_annotation(obj=obj, label2id=label2id)
  235. ann.update({'image_id': im_id, 'id': bnd_id})
  236. output_json_dict['annotations'].append(ann)
  237. bnd_id = bnd_id + 1
  238. im_id += 1
  239. for label, label_id in label2id.items():
  240. category_info = {'supercategory': 'none', 'id': label_id, 'name': label}
  241. output_json_dict['categories'].append(category_info)
  242. output_file = os.path.join(output_dir, output_file)
  243. with open(output_file, 'w') as f:
  244. output_json = json.dumps(output_json_dict)
  245. f.write(output_json)
  246. def widerface_to_cocojson(root_path):
  247. train_gt_txt = os.path.join(root_path, "wider_face_split", "wider_face_train_bbx_gt.txt")
  248. val_gt_txt = os.path.join(root_path, "wider_face_split", "wider_face_val_bbx_gt.txt")
  249. train_img_dir = os.path.join(root_path, "WIDER_train", "images")
  250. val_img_dir = os.path.join(root_path, "WIDER_val", "images")
  251. assert train_gt_txt
  252. assert val_gt_txt
  253. assert train_img_dir
  254. assert val_img_dir
  255. save_path = os.path.join(root_path, "widerface_train.json")
  256. widerface_convert(train_gt_txt, train_img_dir, save_path)
  257. print("Wider Face train dataset converts sucess, the json path: {}".format(save_path))
  258. save_path = os.path.join(root_path, "widerface_val.json")
  259. widerface_convert(val_gt_txt, val_img_dir, save_path)
  260. print("Wider Face val dataset converts sucess, the json path: {}".format(save_path))
  261. def widerface_convert(gt_txt, img_dir, save_path):
  262. output_json_dict = {
  263. "images": [],
  264. "type": "instances",
  265. "annotations": [],
  266. "categories": [{'supercategory': 'none', 'id': 0, 'name': "human_face"}]
  267. }
  268. bnd_id = 1 # bounding box start id
  269. im_id = 0
  270. print('Start converting !')
  271. with open(gt_txt) as fd:
  272. lines = fd.readlines()
  273. i = 0
  274. while i < len(lines):
  275. image_name = lines[i].strip()
  276. bbox_num = int(lines[i + 1].strip())
  277. i += 2
  278. img_info = get_widerface_image_info(img_dir, image_name, im_id)
  279. if img_info:
  280. output_json_dict["images"].append(img_info)
  281. for j in range(i, i + bbox_num):
  282. anno = get_widerface_ann_info(lines[j])
  283. anno.update({'image_id': im_id, 'id': bnd_id})
  284. output_json_dict['annotations'].append(anno)
  285. bnd_id += 1
  286. else:
  287. print("The image dose not exist: {}".format(os.path.join(img_dir, image_name)))
  288. bbox_num = 1 if bbox_num == 0 else bbox_num
  289. i += bbox_num
  290. im_id += 1
  291. with open(save_path, 'w') as f:
  292. output_json = json.dumps(output_json_dict)
  293. f.write(output_json)
  294. def get_widerface_image_info(img_root, img_relative_path, img_id):
  295. image_info = {}
  296. save_path = os.path.join(img_root, img_relative_path)
  297. if os.path.exists(save_path):
  298. img = cv2.imread(save_path)
  299. image_info["file_name"] = os.path.join(os.path.basename(
  300. os.path.dirname(img_root)), os.path.basename(img_root),
  301. img_relative_path)
  302. image_info["height"] = img.shape[0]
  303. image_info["width"] = img.shape[1]
  304. image_info["id"] = img_id
  305. return image_info
  306. def get_widerface_ann_info(info):
  307. info = [int(x) for x in info.strip().split()]
  308. anno = {
  309. 'area': info[2] * info[3],
  310. 'iscrowd': 0,
  311. 'bbox': [info[0], info[1], info[2], info[3]],
  312. 'category_id': 0,
  313. 'ignore': 0,
  314. 'blur': info[4],
  315. 'expression': info[5],
  316. 'illumination': info[6],
  317. 'invalid': info[7],
  318. 'occlusion': info[8],
  319. 'pose': info[9]
  320. }
  321. return anno
  322. def main():
  323. parser = argparse.ArgumentParser(
  324. formatter_class=argparse.ArgumentDefaultsHelpFormatter)
  325. parser.add_argument(
  326. '--dataset_type',
  327. help='the type of dataset, can be `voc`, `widerface`, `labelme` or `cityscape`')
  328. parser.add_argument('--json_input_dir', help='input annotated directory')
  329. parser.add_argument('--image_input_dir', help='image directory')
  330. parser.add_argument(
  331. '--output_dir', help='output dataset directory', default='./')
  332. parser.add_argument(
  333. '--train_proportion',
  334. help='the proportion of train dataset',
  335. type=float,
  336. default=1.0)
  337. parser.add_argument(
  338. '--val_proportion',
  339. help='the proportion of validation dataset',
  340. type=float,
  341. default=0.0)
  342. parser.add_argument(
  343. '--test_proportion',
  344. help='the proportion of test dataset',
  345. type=float,
  346. default=0.0)
  347. parser.add_argument(
  348. '--voc_anno_dir',
  349. help='In Voc format dataset, path to annotation files directory.',
  350. type=str,
  351. default=None)
  352. parser.add_argument(
  353. '--voc_anno_list',
  354. help='In Voc format dataset, path to annotation files ids list.',
  355. type=str,
  356. default=None)
  357. parser.add_argument(
  358. '--voc_label_list',
  359. help='In Voc format dataset, path to label list. The content of each line is a category.',
  360. type=str,
  361. default=None)
  362. parser.add_argument(
  363. '--voc_out_name',
  364. type=str,
  365. default='voc.json',
  366. help='In Voc format dataset, path to output json file')
  367. parser.add_argument(
  368. '--widerface_root_dir',
  369. help='The root_path for wider face dataset, which contains `wider_face_split`, `WIDER_train` and `WIDER_val`.And the json file will save in this path',
  370. type=str,
  371. default=None)
  372. args = parser.parse_args()
  373. try:
  374. assert args.dataset_type in ['voc', 'labelme', 'cityscape', 'widerface']
  375. except AssertionError as e:
  376. print(
  377. 'Now only support the voc, cityscape dataset and labelme dataset!!')
  378. os._exit(0)
  379. if args.dataset_type == 'voc':
  380. assert args.voc_anno_dir and args.voc_anno_list and args.voc_label_list
  381. label2id, ann_paths = voc_get_label_anno(
  382. args.voc_anno_dir, args.voc_anno_list, args.voc_label_list)
  383. voc_xmls_to_cocojson(
  384. annotation_paths=ann_paths,
  385. label2id=label2id,
  386. output_dir=args.output_dir,
  387. output_file=args.voc_out_name)
  388. elif args.dataset_type == "widerface":
  389. assert args.widerface_root_dir
  390. widerface_to_cocojson(args.widerface_root_dir)
  391. else:
  392. try:
  393. assert os.path.exists(args.json_input_dir)
  394. except AssertionError as e:
  395. print('The json folder does not exist!')
  396. os._exit(0)
  397. try:
  398. assert os.path.exists(args.image_input_dir)
  399. except AssertionError as e:
  400. print('The image folder does not exist!')
  401. os._exit(0)
  402. try:
  403. assert abs(args.train_proportion + args.val_proportion \
  404. + args.test_proportion - 1.0) < 1e-5
  405. except AssertionError as e:
  406. print(
  407. 'The sum of pqoportion of training, validation and test datase must be 1!'
  408. )
  409. os._exit(0)
  410. # Allocate the dataset.
  411. total_num = len(glob.glob(osp.join(args.json_input_dir, '*.json')))
  412. if args.train_proportion != 0:
  413. train_num = int(total_num * args.train_proportion)
  414. out_dir = args.output_dir + '/train'
  415. if not os.path.exists(out_dir):
  416. os.makedirs(out_dir)
  417. else:
  418. train_num = 0
  419. if args.val_proportion == 0.0:
  420. val_num = 0
  421. test_num = total_num - train_num
  422. out_dir = args.output_dir + '/test'
  423. if args.test_proportion != 0.0 and not os.path.exists(out_dir):
  424. os.makedirs(out_dir)
  425. else:
  426. val_num = int(total_num * args.val_proportion)
  427. test_num = total_num - train_num - val_num
  428. val_out_dir = args.output_dir + '/val'
  429. if not os.path.exists(val_out_dir):
  430. os.makedirs(val_out_dir)
  431. test_out_dir = args.output_dir + '/test'
  432. if args.test_proportion != 0.0 and not os.path.exists(test_out_dir):
  433. os.makedirs(test_out_dir)
  434. count = 1
  435. for img_name in os.listdir(args.image_input_dir):
  436. if count <= train_num:
  437. if osp.exists(args.output_dir + '/train/'):
  438. shutil.copyfile(
  439. osp.join(args.image_input_dir, img_name),
  440. osp.join(args.output_dir + '/train/', img_name))
  441. else:
  442. if count <= train_num + val_num:
  443. if osp.exists(args.output_dir + '/val/'):
  444. shutil.copyfile(
  445. osp.join(args.image_input_dir, img_name),
  446. osp.join(args.output_dir + '/val/', img_name))
  447. else:
  448. if osp.exists(args.output_dir + '/test/'):
  449. shutil.copyfile(
  450. osp.join(args.image_input_dir, img_name),
  451. osp.join(args.output_dir + '/test/', img_name))
  452. count = count + 1
  453. # Deal with the json files.
  454. if not os.path.exists(args.output_dir + '/annotations'):
  455. os.makedirs(args.output_dir + '/annotations')
  456. if args.train_proportion != 0:
  457. train_data_coco = deal_json(args.dataset_type,
  458. args.output_dir + '/train',
  459. args.json_input_dir)
  460. train_json_path = osp.join(args.output_dir + '/annotations',
  461. 'instance_train.json')
  462. json.dump(
  463. train_data_coco,
  464. open(train_json_path, 'w'),
  465. indent=4,
  466. cls=MyEncoder)
  467. if args.val_proportion != 0:
  468. val_data_coco = deal_json(args.dataset_type,
  469. args.output_dir + '/val',
  470. args.json_input_dir)
  471. val_json_path = osp.join(args.output_dir + '/annotations',
  472. 'instance_val.json')
  473. json.dump(
  474. val_data_coco,
  475. open(val_json_path, 'w'),
  476. indent=4,
  477. cls=MyEncoder)
  478. if args.test_proportion != 0:
  479. test_data_coco = deal_json(args.dataset_type,
  480. args.output_dir + '/test',
  481. args.json_input_dir)
  482. test_json_path = osp.join(args.output_dir + '/annotations',
  483. 'instance_test.json')
  484. json.dump(
  485. test_data_coco,
  486. open(test_json_path, 'w'),
  487. indent=4,
  488. cls=MyEncoder)
  489. if __name__ == '__main__':
  490. main()