preprocess.py 11 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336
  1. # Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved.
  2. #
  3. # Licensed under the Apache License, Version 2.0 (the "License");
  4. # you may not use this file except in compliance with the License.
  5. # You may obtain a copy of the License at
  6. #
  7. # http://www.apache.org/licenses/LICENSE-2.0
  8. #
  9. # Unless required by applicable law or agreed to in writing, software
  10. # distributed under the License is distributed on an "AS IS" BASIS,
  11. # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  12. # See the License for the specific language governing permissions and
  13. # limitations under the License.
  14. import cv2
  15. import numpy as np
  16. from keypoint_preprocess import get_affine_transform
  17. def decode_image(im_file, im_info):
  18. """read rgb image
  19. Args:
  20. im_file (str|np.ndarray): input can be image path or np.ndarray
  21. im_info (dict): info of image
  22. Returns:
  23. im (np.ndarray): processed image (np.ndarray)
  24. im_info (dict): info of processed image
  25. """
  26. if isinstance(im_file, str):
  27. with open(im_file, 'rb') as f:
  28. im_read = f.read()
  29. data = np.frombuffer(im_read, dtype='uint8')
  30. im = cv2.imdecode(data, 1) # BGR mode, but need RGB mode
  31. im = cv2.cvtColor(im, cv2.COLOR_BGR2RGB)
  32. else:
  33. im = im_file
  34. im_info['im_shape'] = np.array(im.shape[:2], dtype=np.float32)
  35. im_info['scale_factor'] = np.array([1., 1.], dtype=np.float32)
  36. return im, im_info
  37. class Resize(object):
  38. """resize image by target_size and max_size
  39. Args:
  40. target_size (int): the target size of image
  41. keep_ratio (bool): whether keep_ratio or not, default true
  42. interp (int): method of resize
  43. """
  44. def __init__(self, target_size, keep_ratio=True, interp=cv2.INTER_LINEAR):
  45. if isinstance(target_size, int):
  46. target_size = [target_size, target_size]
  47. self.target_size = target_size
  48. self.keep_ratio = keep_ratio
  49. self.interp = interp
  50. def __call__(self, im, im_info):
  51. """
  52. Args:
  53. im (np.ndarray): image (np.ndarray)
  54. im_info (dict): info of image
  55. Returns:
  56. im (np.ndarray): processed image (np.ndarray)
  57. im_info (dict): info of processed image
  58. """
  59. assert len(self.target_size) == 2
  60. assert self.target_size[0] > 0 and self.target_size[1] > 0
  61. im_channel = im.shape[2]
  62. im_scale_y, im_scale_x = self.generate_scale(im)
  63. im = cv2.resize(
  64. im,
  65. None,
  66. None,
  67. fx=im_scale_x,
  68. fy=im_scale_y,
  69. interpolation=self.interp)
  70. im_info['im_shape'] = np.array(im.shape[:2]).astype('float32')
  71. im_info['scale_factor'] = np.array(
  72. [im_scale_y, im_scale_x]).astype('float32')
  73. return im, im_info
  74. def generate_scale(self, im):
  75. """
  76. Args:
  77. im (np.ndarray): image (np.ndarray)
  78. Returns:
  79. im_scale_x: the resize ratio of X
  80. im_scale_y: the resize ratio of Y
  81. """
  82. origin_shape = im.shape[:2]
  83. im_c = im.shape[2]
  84. if self.keep_ratio:
  85. im_size_min = np.min(origin_shape)
  86. im_size_max = np.max(origin_shape)
  87. target_size_min = np.min(self.target_size)
  88. target_size_max = np.max(self.target_size)
  89. im_scale = float(target_size_min) / float(im_size_min)
  90. if np.round(im_scale * im_size_max) > target_size_max:
  91. im_scale = float(target_size_max) / float(im_size_max)
  92. im_scale_x = im_scale
  93. im_scale_y = im_scale
  94. else:
  95. resize_h, resize_w = self.target_size
  96. im_scale_y = resize_h / float(origin_shape[0])
  97. im_scale_x = resize_w / float(origin_shape[1])
  98. return im_scale_y, im_scale_x
  99. class NormalizeImage(object):
  100. """normalize image
  101. Args:
  102. mean (list): im - mean
  103. std (list): im / std
  104. is_scale (bool): whether need im / 255
  105. is_channel_first (bool): if True: image shape is CHW, else: HWC
  106. """
  107. def __init__(self, mean, std, is_scale=True):
  108. self.mean = mean
  109. self.std = std
  110. self.is_scale = is_scale
  111. def __call__(self, im, im_info):
  112. """
  113. Args:
  114. im (np.ndarray): image (np.ndarray)
  115. im_info (dict): info of image
  116. Returns:
  117. im (np.ndarray): processed image (np.ndarray)
  118. im_info (dict): info of processed image
  119. """
  120. im = im.astype(np.float32, copy=False)
  121. mean = np.array(self.mean)[np.newaxis, np.newaxis, :]
  122. std = np.array(self.std)[np.newaxis, np.newaxis, :]
  123. if self.is_scale:
  124. im = im / 255.0
  125. im -= mean
  126. im /= std
  127. return im, im_info
  128. class Permute(object):
  129. """permute image
  130. Args:
  131. to_bgr (bool): whether convert RGB to BGR
  132. channel_first (bool): whether convert HWC to CHW
  133. """
  134. def __init__(self, ):
  135. super(Permute, self).__init__()
  136. def __call__(self, im, im_info):
  137. """
  138. Args:
  139. im (np.ndarray): image (np.ndarray)
  140. im_info (dict): info of image
  141. Returns:
  142. im (np.ndarray): processed image (np.ndarray)
  143. im_info (dict): info of processed image
  144. """
  145. im = im.transpose((2, 0, 1)).copy()
  146. return im, im_info
  147. class PadStride(object):
  148. """ padding image for model with FPN, instead PadBatch(pad_to_stride) in original config
  149. Args:
  150. stride (bool): model with FPN need image shape % stride == 0
  151. """
  152. def __init__(self, stride=0):
  153. self.coarsest_stride = stride
  154. def __call__(self, im, im_info):
  155. """
  156. Args:
  157. im (np.ndarray): image (np.ndarray)
  158. im_info (dict): info of image
  159. Returns:
  160. im (np.ndarray): processed image (np.ndarray)
  161. im_info (dict): info of processed image
  162. """
  163. coarsest_stride = self.coarsest_stride
  164. if coarsest_stride <= 0:
  165. return im, im_info
  166. im_c, im_h, im_w = im.shape
  167. pad_h = int(np.ceil(float(im_h) / coarsest_stride) * coarsest_stride)
  168. pad_w = int(np.ceil(float(im_w) / coarsest_stride) * coarsest_stride)
  169. padding_im = np.zeros((im_c, pad_h, pad_w), dtype=np.float32)
  170. padding_im[:, :im_h, :im_w] = im
  171. return padding_im, im_info
  172. class LetterBoxResize(object):
  173. def __init__(self, target_size):
  174. """
  175. Resize image to target size, convert normalized xywh to pixel xyxy
  176. format ([x_center, y_center, width, height] -> [x0, y0, x1, y1]).
  177. Args:
  178. target_size (int|list): image target size.
  179. """
  180. super(LetterBoxResize, self).__init__()
  181. if isinstance(target_size, int):
  182. target_size = [target_size, target_size]
  183. self.target_size = target_size
  184. def letterbox(self, img, height, width, color=(127.5, 127.5, 127.5)):
  185. # letterbox: resize a rectangular image to a padded rectangular
  186. shape = img.shape[:2] # [height, width]
  187. ratio_h = float(height) / shape[0]
  188. ratio_w = float(width) / shape[1]
  189. ratio = min(ratio_h, ratio_w)
  190. new_shape = (round(shape[1] * ratio),
  191. round(shape[0] * ratio)) # [width, height]
  192. padw = (width - new_shape[0]) / 2
  193. padh = (height - new_shape[1]) / 2
  194. top, bottom = round(padh - 0.1), round(padh + 0.1)
  195. left, right = round(padw - 0.1), round(padw + 0.1)
  196. img = cv2.resize(
  197. img, new_shape, interpolation=cv2.INTER_AREA) # resized, no border
  198. img = cv2.copyMakeBorder(
  199. img, top, bottom, left, right, cv2.BORDER_CONSTANT,
  200. value=color) # padded rectangular
  201. return img, ratio, padw, padh
  202. def __call__(self, im, im_info):
  203. """
  204. Args:
  205. im (np.ndarray): image (np.ndarray)
  206. im_info (dict): info of image
  207. Returns:
  208. im (np.ndarray): processed image (np.ndarray)
  209. im_info (dict): info of processed image
  210. """
  211. assert len(self.target_size) == 2
  212. assert self.target_size[0] > 0 and self.target_size[1] > 0
  213. height, width = self.target_size
  214. h, w = im.shape[:2]
  215. im, ratio, padw, padh = self.letterbox(im, height=height, width=width)
  216. new_shape = [round(h * ratio), round(w * ratio)]
  217. im_info['im_shape'] = np.array(new_shape, dtype=np.float32)
  218. im_info['scale_factor'] = np.array([ratio, ratio], dtype=np.float32)
  219. return im, im_info
  220. class Pad(object):
  221. def __init__(self, size, fill_value=[114.0, 114.0, 114.0]):
  222. """
  223. Pad image to a specified size.
  224. Args:
  225. size (list[int]): image target size
  226. fill_value (list[float]): rgb value of pad area, default (114.0, 114.0, 114.0)
  227. """
  228. super(Pad, self).__init__()
  229. if isinstance(size, int):
  230. size = [size, size]
  231. self.size = size
  232. self.fill_value = fill_value
  233. def __call__(self, im, im_info):
  234. im_h, im_w = im.shape[:2]
  235. h, w = self.size
  236. if h == im_h and w == im_w:
  237. im = im.astype(np.float32)
  238. return im, im_info
  239. canvas = np.ones((h, w, 3), dtype=np.float32)
  240. canvas *= np.array(self.fill_value, dtype=np.float32)
  241. canvas[0:im_h, 0:im_w, :] = im.astype(np.float32)
  242. im = canvas
  243. return im, im_info
  244. class WarpAffine(object):
  245. """Warp affine the image
  246. """
  247. def __init__(self,
  248. keep_res=False,
  249. pad=31,
  250. input_h=512,
  251. input_w=512,
  252. scale=0.4,
  253. shift=0.1):
  254. self.keep_res = keep_res
  255. self.pad = pad
  256. self.input_h = input_h
  257. self.input_w = input_w
  258. self.scale = scale
  259. self.shift = shift
  260. def __call__(self, im, im_info):
  261. """
  262. Args:
  263. im (np.ndarray): image (np.ndarray)
  264. im_info (dict): info of image
  265. Returns:
  266. im (np.ndarray): processed image (np.ndarray)
  267. im_info (dict): info of processed image
  268. """
  269. img = cv2.cvtColor(im, cv2.COLOR_RGB2BGR)
  270. h, w = img.shape[:2]
  271. if self.keep_res:
  272. input_h = (h | self.pad) + 1
  273. input_w = (w | self.pad) + 1
  274. s = np.array([input_w, input_h], dtype=np.float32)
  275. c = np.array([w // 2, h // 2], dtype=np.float32)
  276. else:
  277. s = max(h, w) * 1.0
  278. input_h, input_w = self.input_h, self.input_w
  279. c = np.array([w / 2., h / 2.], dtype=np.float32)
  280. trans_input = get_affine_transform(c, s, 0, [input_w, input_h])
  281. img = cv2.resize(img, (w, h))
  282. inp = cv2.warpAffine(
  283. img, trans_input, (input_w, input_h), flags=cv2.INTER_LINEAR)
  284. return inp, im_info
  285. def preprocess(im, preprocess_ops):
  286. # process image by preprocess_ops
  287. im_info = {
  288. 'scale_factor': np.array(
  289. [1., 1.], dtype=np.float32),
  290. 'im_shape': None,
  291. }
  292. im, im_info = decode_image(im, im_info)
  293. for operator in preprocess_ops:
  294. im, im_info = operator(im, im_info)
  295. return im, im_info