keypoint_utils.py 11 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342
  1. # Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved.
  2. #
  3. # Licensed under the Apache License, Version 2.0 (the "License");
  4. # you may not use this file except in compliance with the License.
  5. # You may obtain a copy of the License at
  6. #
  7. # http://www.apache.org/licenses/LICENSE-2.0
  8. #
  9. # Unless required by applicable law or agreed to in writing, software
  10. # distributed under the License is distributed on an "AS IS" BASIS,
  11. # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  12. # See the License for the specific language governing permissions and
  13. # limitations under the License.
  14. """
  15. this code is based on https://github.com/open-mmlab/mmpose
  16. """
  17. import cv2
  18. import numpy as np
  19. def get_affine_mat_kernel(h, w, s, inv=False):
  20. if w < h:
  21. w_ = s
  22. h_ = int(np.ceil((s / w * h) / 64.) * 64)
  23. scale_w = w
  24. scale_h = h_ / w_ * w
  25. else:
  26. h_ = s
  27. w_ = int(np.ceil((s / h * w) / 64.) * 64)
  28. scale_h = h
  29. scale_w = w_ / h_ * h
  30. center = np.array([np.round(w / 2.), np.round(h / 2.)])
  31. size_resized = (w_, h_)
  32. trans = get_affine_transform(
  33. center, np.array([scale_w, scale_h]), 0, size_resized, inv=inv)
  34. return trans, size_resized
  35. def get_affine_transform(center,
  36. input_size,
  37. rot,
  38. output_size,
  39. shift=(0., 0.),
  40. inv=False):
  41. """Get the affine transform matrix, given the center/scale/rot/output_size.
  42. Args:
  43. center (np.ndarray[2, ]): Center of the bounding box (x, y).
  44. input_size (np.ndarray[2, ]): Size of input feature (width, height).
  45. rot (float): Rotation angle (degree).
  46. output_size (np.ndarray[2, ]): Size of the destination heatmaps.
  47. shift (0-100%): Shift translation ratio wrt the width/height.
  48. Default (0., 0.).
  49. inv (bool): Option to inverse the affine transform direction.
  50. (inv=False: src->dst or inv=True: dst->src)
  51. Returns:
  52. np.ndarray: The transform matrix.
  53. """
  54. assert len(center) == 2
  55. assert len(output_size) == 2
  56. assert len(shift) == 2
  57. if not isinstance(input_size, (np.ndarray, list)):
  58. input_size = np.array([input_size, input_size], dtype=np.float32)
  59. scale_tmp = input_size
  60. shift = np.array(shift)
  61. src_w = scale_tmp[0]
  62. dst_w = output_size[0]
  63. dst_h = output_size[1]
  64. rot_rad = np.pi * rot / 180
  65. src_dir = rotate_point([0., src_w * -0.5], rot_rad)
  66. dst_dir = np.array([0., dst_w * -0.5])
  67. src = np.zeros((3, 2), dtype=np.float32)
  68. src[0, :] = center + scale_tmp * shift
  69. src[1, :] = center + src_dir + scale_tmp * shift
  70. src[2, :] = _get_3rd_point(src[0, :], src[1, :])
  71. dst = np.zeros((3, 2), dtype=np.float32)
  72. dst[0, :] = [dst_w * 0.5, dst_h * 0.5]
  73. dst[1, :] = np.array([dst_w * 0.5, dst_h * 0.5]) + dst_dir
  74. dst[2, :] = _get_3rd_point(dst[0, :], dst[1, :])
  75. if inv:
  76. trans = cv2.getAffineTransform(np.float32(dst), np.float32(src))
  77. else:
  78. trans = cv2.getAffineTransform(np.float32(src), np.float32(dst))
  79. return trans
  80. def get_warp_matrix(theta, size_input, size_dst, size_target):
  81. """This code is based on
  82. https://github.com/open-mmlab/mmpose/blob/master/mmpose/core/post_processing/post_transforms.py
  83. Calculate the transformation matrix under the constraint of unbiased.
  84. Paper ref: Huang et al. The Devil is in the Details: Delving into Unbiased
  85. Data Processing for Human Pose Estimation (CVPR 2020).
  86. Args:
  87. theta (float): Rotation angle in degrees.
  88. size_input (np.ndarray): Size of input image [w, h].
  89. size_dst (np.ndarray): Size of output image [w, h].
  90. size_target (np.ndarray): Size of ROI in input plane [w, h].
  91. Returns:
  92. matrix (np.ndarray): A matrix for transformation.
  93. """
  94. theta = np.deg2rad(theta)
  95. matrix = np.zeros((2, 3), dtype=np.float32)
  96. scale_x = size_dst[0] / size_target[0]
  97. scale_y = size_dst[1] / size_target[1]
  98. matrix[0, 0] = np.cos(theta) * scale_x
  99. matrix[0, 1] = -np.sin(theta) * scale_x
  100. matrix[0, 2] = scale_x * (
  101. -0.5 * size_input[0] * np.cos(theta) + 0.5 * size_input[1] *
  102. np.sin(theta) + 0.5 * size_target[0])
  103. matrix[1, 0] = np.sin(theta) * scale_y
  104. matrix[1, 1] = np.cos(theta) * scale_y
  105. matrix[1, 2] = scale_y * (
  106. -0.5 * size_input[0] * np.sin(theta) - 0.5 * size_input[1] *
  107. np.cos(theta) + 0.5 * size_target[1])
  108. return matrix
  109. def _get_3rd_point(a, b):
  110. """To calculate the affine matrix, three pairs of points are required. This
  111. function is used to get the 3rd point, given 2D points a & b.
  112. The 3rd point is defined by rotating vector `a - b` by 90 degrees
  113. anticlockwise, using b as the rotation center.
  114. Args:
  115. a (np.ndarray): point(x,y)
  116. b (np.ndarray): point(x,y)
  117. Returns:
  118. np.ndarray: The 3rd point.
  119. """
  120. assert len(
  121. a) == 2, 'input of _get_3rd_point should be point with length of 2'
  122. assert len(
  123. b) == 2, 'input of _get_3rd_point should be point with length of 2'
  124. direction = a - b
  125. third_pt = b + np.array([-direction[1], direction[0]], dtype=np.float32)
  126. return third_pt
  127. def rotate_point(pt, angle_rad):
  128. """Rotate a point by an angle.
  129. Args:
  130. pt (list[float]): 2 dimensional point to be rotated
  131. angle_rad (float): rotation angle by radian
  132. Returns:
  133. list[float]: Rotated point.
  134. """
  135. assert len(pt) == 2
  136. sn, cs = np.sin(angle_rad), np.cos(angle_rad)
  137. new_x = pt[0] * cs - pt[1] * sn
  138. new_y = pt[0] * sn + pt[1] * cs
  139. rotated_pt = [new_x, new_y]
  140. return rotated_pt
  141. def transpred(kpts, h, w, s):
  142. trans, _ = get_affine_mat_kernel(h, w, s, inv=True)
  143. return warp_affine_joints(kpts[..., :2].copy(), trans)
  144. def warp_affine_joints(joints, mat):
  145. """Apply affine transformation defined by the transform matrix on the
  146. joints.
  147. Args:
  148. joints (np.ndarray[..., 2]): Origin coordinate of joints.
  149. mat (np.ndarray[3, 2]): The affine matrix.
  150. Returns:
  151. matrix (np.ndarray[..., 2]): Result coordinate of joints.
  152. """
  153. joints = np.array(joints)
  154. shape = joints.shape
  155. joints = joints.reshape(-1, 2)
  156. return np.dot(np.concatenate(
  157. (joints, joints[:, 0:1] * 0 + 1), axis=1),
  158. mat.T).reshape(shape)
  159. def affine_transform(pt, t):
  160. new_pt = np.array([pt[0], pt[1], 1.]).T
  161. new_pt = np.dot(t, new_pt)
  162. return new_pt[:2]
  163. def transform_preds(coords, center, scale, output_size):
  164. target_coords = np.zeros(coords.shape)
  165. trans = get_affine_transform(center, scale * 200, 0, output_size, inv=1)
  166. for p in range(coords.shape[0]):
  167. target_coords[p, 0:2] = affine_transform(coords[p, 0:2], trans)
  168. return target_coords
  169. def oks_iou(g, d, a_g, a_d, sigmas=None, in_vis_thre=None):
  170. if not isinstance(sigmas, np.ndarray):
  171. sigmas = np.array([
  172. .26, .25, .25, .35, .35, .79, .79, .72, .72, .62, .62, 1.07, 1.07,
  173. .87, .87, .89, .89
  174. ]) / 10.0
  175. vars = (sigmas * 2)**2
  176. xg = g[0::3]
  177. yg = g[1::3]
  178. vg = g[2::3]
  179. ious = np.zeros((d.shape[0]))
  180. for n_d in range(0, d.shape[0]):
  181. xd = d[n_d, 0::3]
  182. yd = d[n_d, 1::3]
  183. vd = d[n_d, 2::3]
  184. dx = xd - xg
  185. dy = yd - yg
  186. e = (dx**2 + dy**2) / vars / ((a_g + a_d[n_d]) / 2 + np.spacing(1)) / 2
  187. if in_vis_thre is not None:
  188. ind = list(vg > in_vis_thre) and list(vd > in_vis_thre)
  189. e = e[ind]
  190. ious[n_d] = np.sum(np.exp(-e)) / e.shape[0] if e.shape[0] != 0 else 0.0
  191. return ious
  192. def oks_nms(kpts_db, thresh, sigmas=None, in_vis_thre=None):
  193. """greedily select boxes with high confidence and overlap with current maximum <= thresh
  194. rule out overlap >= thresh
  195. Args:
  196. kpts_db (list): The predicted keypoints within the image
  197. thresh (float): The threshold to select the boxes
  198. sigmas (np.array): The variance to calculate the oks iou
  199. Default: None
  200. in_vis_thre (float): The threshold to select the high confidence boxes
  201. Default: None
  202. Return:
  203. keep (list): indexes to keep
  204. """
  205. if len(kpts_db) == 0:
  206. return []
  207. scores = np.array([kpts_db[i]['score'] for i in range(len(kpts_db))])
  208. kpts = np.array(
  209. [kpts_db[i]['keypoints'].flatten() for i in range(len(kpts_db))])
  210. areas = np.array([kpts_db[i]['area'] for i in range(len(kpts_db))])
  211. order = scores.argsort()[::-1]
  212. keep = []
  213. while order.size > 0:
  214. i = order[0]
  215. keep.append(i)
  216. oks_ovr = oks_iou(kpts[i], kpts[order[1:]], areas[i], areas[order[1:]],
  217. sigmas, in_vis_thre)
  218. inds = np.where(oks_ovr <= thresh)[0]
  219. order = order[inds + 1]
  220. return keep
  221. def rescore(overlap, scores, thresh, type='gaussian'):
  222. assert overlap.shape[0] == scores.shape[0]
  223. if type == 'linear':
  224. inds = np.where(overlap >= thresh)[0]
  225. scores[inds] = scores[inds] * (1 - overlap[inds])
  226. else:
  227. scores = scores * np.exp(-overlap**2 / thresh)
  228. return scores
  229. def soft_oks_nms(kpts_db, thresh, sigmas=None, in_vis_thre=None):
  230. """greedily select boxes with high confidence and overlap with current maximum <= thresh
  231. rule out overlap >= thresh
  232. Args:
  233. kpts_db (list): The predicted keypoints within the image
  234. thresh (float): The threshold to select the boxes
  235. sigmas (np.array): The variance to calculate the oks iou
  236. Default: None
  237. in_vis_thre (float): The threshold to select the high confidence boxes
  238. Default: None
  239. Return:
  240. keep (list): indexes to keep
  241. """
  242. if len(kpts_db) == 0:
  243. return []
  244. scores = np.array([kpts_db[i]['score'] for i in range(len(kpts_db))])
  245. kpts = np.array(
  246. [kpts_db[i]['keypoints'].flatten() for i in range(len(kpts_db))])
  247. areas = np.array([kpts_db[i]['area'] for i in range(len(kpts_db))])
  248. order = scores.argsort()[::-1]
  249. scores = scores[order]
  250. # max_dets = order.size
  251. max_dets = 20
  252. keep = np.zeros(max_dets, dtype=np.intp)
  253. keep_cnt = 0
  254. while order.size > 0 and keep_cnt < max_dets:
  255. i = order[0]
  256. oks_ovr = oks_iou(kpts[i], kpts[order[1:]], areas[i], areas[order[1:]],
  257. sigmas, in_vis_thre)
  258. order = order[1:]
  259. scores = rescore(oks_ovr, scores[1:], thresh)
  260. tmp = scores.argsort()[::-1]
  261. order = order[tmp]
  262. scores = scores[tmp]
  263. keep[keep_cnt] = i
  264. keep_cnt += 1
  265. keep = keep[:keep_cnt]
  266. return keep