widerface_utils.py 14 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391
  1. # Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved.
  2. #
  3. # Licensed under the Apache License, Version 2.0 (the "License");
  4. # you may not use this file except in compliance with the License.
  5. # You may obtain a copy of the License at
  6. #
  7. # http://www.apache.org/licenses/LICENSE-2.0
  8. #
  9. # Unless required by applicable law or agreed to in writing, software
  10. # distributed under the License is distributed on an "AS IS" BASIS,
  11. # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  12. # See the License for the specific language governing permissions and
  13. # limitations under the License.
  14. from __future__ import absolute_import
  15. from __future__ import division
  16. from __future__ import print_function
  17. import os
  18. import cv2
  19. import numpy as np
  20. from collections import OrderedDict
  21. import paddle
  22. from ppdet.utils.logger import setup_logger
  23. logger = setup_logger(__name__)
  24. __all__ = ['face_eval_run', 'lmk2out']
  25. def face_eval_run(model,
  26. image_dir,
  27. gt_file,
  28. pred_dir='output/pred',
  29. eval_mode='widerface',
  30. multi_scale=False):
  31. # load ground truth files
  32. with open(gt_file, 'r') as f:
  33. gt_lines = f.readlines()
  34. imid2path = []
  35. pos_gt = 0
  36. while pos_gt < len(gt_lines):
  37. name_gt = gt_lines[pos_gt].strip('\n\t').split()[0]
  38. imid2path.append(name_gt)
  39. pos_gt += 1
  40. n_gt = int(gt_lines[pos_gt].strip('\n\t').split()[0])
  41. pos_gt += 1 + n_gt
  42. logger.info('The ground truth file load {} images'.format(len(imid2path)))
  43. dets_dist = OrderedDict()
  44. for iter_id, im_path in enumerate(imid2path):
  45. image_path = os.path.join(image_dir, im_path)
  46. if eval_mode == 'fddb':
  47. image_path += '.jpg'
  48. assert os.path.exists(image_path)
  49. image = cv2.imread(image_path)
  50. image = cv2.cvtColor(image, cv2.COLOR_BGR2RGB)
  51. if multi_scale:
  52. shrink, max_shrink = get_shrink(image.shape[0], image.shape[1])
  53. det0 = detect_face(model, image, shrink)
  54. det1 = flip_test(model, image, shrink)
  55. [det2, det3] = multi_scale_test(model, image, max_shrink)
  56. det4 = multi_scale_test_pyramid(model, image, max_shrink)
  57. det = np.row_stack((det0, det1, det2, det3, det4))
  58. dets = bbox_vote(det)
  59. else:
  60. dets = detect_face(model, image, 1)
  61. if eval_mode == 'widerface':
  62. save_widerface_bboxes(image_path, dets, pred_dir)
  63. else:
  64. dets_dist[im_path] = dets
  65. if iter_id % 100 == 0:
  66. logger.info('Test iter {}'.format(iter_id))
  67. if eval_mode == 'fddb':
  68. save_fddb_bboxes(dets_dist, pred_dir)
  69. logger.info("Finish evaluation.")
  70. def detect_face(model, image, shrink):
  71. image_shape = [image.shape[0], image.shape[1]]
  72. if shrink != 1:
  73. h, w = int(image_shape[0] * shrink), int(image_shape[1] * shrink)
  74. image = cv2.resize(image, (w, h))
  75. image_shape = [h, w]
  76. img = face_img_process(image)
  77. image_shape = np.asarray([image_shape])
  78. scale_factor = np.asarray([[shrink, shrink]])
  79. data = {
  80. "image": paddle.to_tensor(
  81. img, dtype='float32'),
  82. "im_shape": paddle.to_tensor(
  83. image_shape, dtype='float32'),
  84. "scale_factor": paddle.to_tensor(
  85. scale_factor, dtype='float32')
  86. }
  87. model.eval()
  88. detection = model(data)
  89. detection = detection['bbox'].numpy()
  90. # layout: xmin, ymin, xmax. ymax, score
  91. if np.prod(detection.shape) == 1:
  92. logger.info("No face detected")
  93. return np.array([[0, 0, 0, 0, 0]])
  94. det_conf = detection[:, 1]
  95. det_xmin = detection[:, 2]
  96. det_ymin = detection[:, 3]
  97. det_xmax = detection[:, 4]
  98. det_ymax = detection[:, 5]
  99. det = np.column_stack((det_xmin, det_ymin, det_xmax, det_ymax, det_conf))
  100. return det
  101. def flip_test(model, image, shrink):
  102. img = cv2.flip(image, 1)
  103. det_f = detect_face(model, img, shrink)
  104. det_t = np.zeros(det_f.shape)
  105. img_width = image.shape[1]
  106. det_t[:, 0] = img_width - det_f[:, 2]
  107. det_t[:, 1] = det_f[:, 1]
  108. det_t[:, 2] = img_width - det_f[:, 0]
  109. det_t[:, 3] = det_f[:, 3]
  110. det_t[:, 4] = det_f[:, 4]
  111. return det_t
  112. def multi_scale_test(model, image, max_shrink):
  113. # Shrink detecting is only used to detect big faces
  114. st = 0.5 if max_shrink >= 0.75 else 0.5 * max_shrink
  115. det_s = detect_face(model, image, st)
  116. index = np.where(
  117. np.maximum(det_s[:, 2] - det_s[:, 0] + 1, det_s[:, 3] - det_s[:, 1] + 1)
  118. > 30)[0]
  119. det_s = det_s[index, :]
  120. # Enlarge one times
  121. bt = min(2, max_shrink) if max_shrink > 1 else (st + max_shrink) / 2
  122. det_b = detect_face(model, image, bt)
  123. # Enlarge small image x times for small faces
  124. if max_shrink > 2:
  125. bt *= 2
  126. while bt < max_shrink:
  127. det_b = np.row_stack((det_b, detect_face(model, image, bt)))
  128. bt *= 2
  129. det_b = np.row_stack((det_b, detect_face(model, image, max_shrink)))
  130. # Enlarged images are only used to detect small faces.
  131. if bt > 1:
  132. index = np.where(
  133. np.minimum(det_b[:, 2] - det_b[:, 0] + 1,
  134. det_b[:, 3] - det_b[:, 1] + 1) < 100)[0]
  135. det_b = det_b[index, :]
  136. # Shrinked images are only used to detect big faces.
  137. else:
  138. index = np.where(
  139. np.maximum(det_b[:, 2] - det_b[:, 0] + 1,
  140. det_b[:, 3] - det_b[:, 1] + 1) > 30)[0]
  141. det_b = det_b[index, :]
  142. return det_s, det_b
  143. def multi_scale_test_pyramid(model, image, max_shrink):
  144. # Use image pyramids to detect faces
  145. det_b = detect_face(model, image, 0.25)
  146. index = np.where(
  147. np.maximum(det_b[:, 2] - det_b[:, 0] + 1, det_b[:, 3] - det_b[:, 1] + 1)
  148. > 30)[0]
  149. det_b = det_b[index, :]
  150. st = [0.75, 1.25, 1.5, 1.75]
  151. for i in range(len(st)):
  152. if st[i] <= max_shrink:
  153. det_temp = detect_face(model, image, st[i])
  154. # Enlarged images are only used to detect small faces.
  155. if st[i] > 1:
  156. index = np.where(
  157. np.minimum(det_temp[:, 2] - det_temp[:, 0] + 1,
  158. det_temp[:, 3] - det_temp[:, 1] + 1) < 100)[0]
  159. det_temp = det_temp[index, :]
  160. # Shrinked images are only used to detect big faces.
  161. else:
  162. index = np.where(
  163. np.maximum(det_temp[:, 2] - det_temp[:, 0] + 1,
  164. det_temp[:, 3] - det_temp[:, 1] + 1) > 30)[0]
  165. det_temp = det_temp[index, :]
  166. det_b = np.row_stack((det_b, det_temp))
  167. return det_b
  168. def to_chw(image):
  169. """
  170. Transpose image from HWC to CHW.
  171. Args:
  172. image (np.array): an image with HWC layout.
  173. """
  174. # HWC to CHW
  175. if len(image.shape) == 3:
  176. image = np.swapaxes(image, 1, 2)
  177. image = np.swapaxes(image, 1, 0)
  178. return image
  179. def face_img_process(image,
  180. mean=[104., 117., 123.],
  181. std=[127.502231, 127.502231, 127.502231]):
  182. img = np.array(image)
  183. img = to_chw(img)
  184. img = img.astype('float32')
  185. img -= np.array(mean)[:, np.newaxis, np.newaxis].astype('float32')
  186. img /= np.array(std)[:, np.newaxis, np.newaxis].astype('float32')
  187. img = [img]
  188. img = np.array(img)
  189. return img
  190. def get_shrink(height, width):
  191. """
  192. Args:
  193. height (int): image height.
  194. width (int): image width.
  195. """
  196. # avoid out of memory
  197. max_shrink_v1 = (0x7fffffff / 577.0 / (height * width))**0.5
  198. max_shrink_v2 = ((678 * 1024 * 2.0 * 2.0) / (height * width))**0.5
  199. def get_round(x, loc):
  200. str_x = str(x)
  201. if '.' in str_x:
  202. str_before, str_after = str_x.split('.')
  203. len_after = len(str_after)
  204. if len_after >= 3:
  205. str_final = str_before + '.' + str_after[0:loc]
  206. return float(str_final)
  207. else:
  208. return x
  209. max_shrink = get_round(min(max_shrink_v1, max_shrink_v2), 2) - 0.3
  210. if max_shrink >= 1.5 and max_shrink < 2:
  211. max_shrink = max_shrink - 0.1
  212. elif max_shrink >= 2 and max_shrink < 3:
  213. max_shrink = max_shrink - 0.2
  214. elif max_shrink >= 3 and max_shrink < 4:
  215. max_shrink = max_shrink - 0.3
  216. elif max_shrink >= 4 and max_shrink < 5:
  217. max_shrink = max_shrink - 0.4
  218. elif max_shrink >= 5:
  219. max_shrink = max_shrink - 0.5
  220. elif max_shrink <= 0.1:
  221. max_shrink = 0.1
  222. shrink = max_shrink if max_shrink < 1 else 1
  223. return shrink, max_shrink
  224. def bbox_vote(det):
  225. order = det[:, 4].ravel().argsort()[::-1]
  226. det = det[order, :]
  227. if det.shape[0] == 0:
  228. dets = np.array([[10, 10, 20, 20, 0.002]])
  229. det = np.empty(shape=[0, 5])
  230. while det.shape[0] > 0:
  231. # IOU
  232. area = (det[:, 2] - det[:, 0] + 1) * (det[:, 3] - det[:, 1] + 1)
  233. xx1 = np.maximum(det[0, 0], det[:, 0])
  234. yy1 = np.maximum(det[0, 1], det[:, 1])
  235. xx2 = np.minimum(det[0, 2], det[:, 2])
  236. yy2 = np.minimum(det[0, 3], det[:, 3])
  237. w = np.maximum(0.0, xx2 - xx1 + 1)
  238. h = np.maximum(0.0, yy2 - yy1 + 1)
  239. inter = w * h
  240. o = inter / (area[0] + area[:] - inter)
  241. # nms
  242. merge_index = np.where(o >= 0.3)[0]
  243. det_accu = det[merge_index, :]
  244. det = np.delete(det, merge_index, 0)
  245. if merge_index.shape[0] <= 1:
  246. if det.shape[0] == 0:
  247. try:
  248. dets = np.row_stack((dets, det_accu))
  249. except:
  250. dets = det_accu
  251. continue
  252. det_accu[:, 0:4] = det_accu[:, 0:4] * np.tile(det_accu[:, -1:], (1, 4))
  253. max_score = np.max(det_accu[:, 4])
  254. det_accu_sum = np.zeros((1, 5))
  255. det_accu_sum[:, 0:4] = np.sum(det_accu[:, 0:4],
  256. axis=0) / np.sum(det_accu[:, -1:])
  257. det_accu_sum[:, 4] = max_score
  258. try:
  259. dets = np.row_stack((dets, det_accu_sum))
  260. except:
  261. dets = det_accu_sum
  262. dets = dets[0:750, :]
  263. keep_index = np.where(dets[:, 4] >= 0.01)[0]
  264. dets = dets[keep_index, :]
  265. return dets
  266. def save_widerface_bboxes(image_path, bboxes_scores, output_dir):
  267. image_name = image_path.split('/')[-1]
  268. image_class = image_path.split('/')[-2]
  269. odir = os.path.join(output_dir, image_class)
  270. if not os.path.exists(odir):
  271. os.makedirs(odir)
  272. ofname = os.path.join(odir, '%s.txt' % (image_name[:-4]))
  273. f = open(ofname, 'w')
  274. f.write('{:s}\n'.format(image_class + '/' + image_name))
  275. f.write('{:d}\n'.format(bboxes_scores.shape[0]))
  276. for box_score in bboxes_scores:
  277. xmin, ymin, xmax, ymax, score = box_score
  278. f.write('{:.1f} {:.1f} {:.1f} {:.1f} {:.3f}\n'.format(xmin, ymin, (
  279. xmax - xmin + 1), (ymax - ymin + 1), score))
  280. f.close()
  281. logger.info("The predicted result is saved as {}".format(ofname))
  282. def save_fddb_bboxes(bboxes_scores,
  283. output_dir,
  284. output_fname='pred_fddb_res.txt'):
  285. if not os.path.exists(output_dir):
  286. os.makedirs(output_dir)
  287. predict_file = os.path.join(output_dir, output_fname)
  288. f = open(predict_file, 'w')
  289. for image_path, dets in bboxes_scores.iteritems():
  290. f.write('{:s}\n'.format(image_path))
  291. f.write('{:d}\n'.format(dets.shape[0]))
  292. for box_score in dets:
  293. xmin, ymin, xmax, ymax, score = box_score
  294. width, height = xmax - xmin, ymax - ymin
  295. f.write('{:.1f} {:.1f} {:.1f} {:.1f} {:.3f}\n'
  296. .format(xmin, ymin, width, height, score))
  297. logger.info("The predicted result is saved as {}".format(predict_file))
  298. return predict_file
  299. def lmk2out(results, is_bbox_normalized=False):
  300. """
  301. Args:
  302. results: request a dict, should include: `landmark`, `im_id`,
  303. if is_bbox_normalized=True, also need `im_shape`.
  304. is_bbox_normalized: whether or not landmark is normalized.
  305. """
  306. xywh_res = []
  307. for t in results:
  308. bboxes = t['bbox'][0]
  309. lengths = t['bbox'][1][0]
  310. im_ids = np.array(t['im_id'][0]).flatten()
  311. if bboxes.shape == (1, 1) or bboxes is None:
  312. continue
  313. face_index = t['face_index'][0]
  314. prior_box = t['prior_boxes'][0]
  315. predict_lmk = t['landmark'][0]
  316. prior = np.reshape(prior_box, (-1, 4))
  317. predictlmk = np.reshape(predict_lmk, (-1, 10))
  318. k = 0
  319. for a in range(len(lengths)):
  320. num = lengths[a]
  321. im_id = int(im_ids[a])
  322. for i in range(num):
  323. score = bboxes[k][1]
  324. theindex = face_index[i][0]
  325. me_prior = prior[theindex, :]
  326. lmk_pred = predictlmk[theindex, :]
  327. prior_w = me_prior[2] - me_prior[0]
  328. prior_h = me_prior[3] - me_prior[1]
  329. prior_w_center = (me_prior[2] + me_prior[0]) / 2
  330. prior_h_center = (me_prior[3] + me_prior[1]) / 2
  331. lmk_decode = np.zeros((10))
  332. for j in [0, 2, 4, 6, 8]:
  333. lmk_decode[j] = lmk_pred[j] * 0.1 * prior_w + prior_w_center
  334. for j in [1, 3, 5, 7, 9]:
  335. lmk_decode[j] = lmk_pred[j] * 0.1 * prior_h + prior_h_center
  336. im_shape = t['im_shape'][0][a].tolist()
  337. image_h, image_w = int(im_shape[0]), int(im_shape[1])
  338. if is_bbox_normalized:
  339. lmk_decode = lmk_decode * np.array([
  340. image_w, image_h, image_w, image_h, image_w, image_h,
  341. image_w, image_h, image_w, image_h
  342. ])
  343. lmk_res = {
  344. 'image_id': im_id,
  345. 'landmark': lmk_decode,
  346. 'score': score,
  347. }
  348. xywh_res.append(lmk_res)
  349. k += 1
  350. return xywh_res